├── models
    ├── __init__.py
    ├── mask_rcnn
    │   └── __init__.py
    ├── faster_rcnn
    │   ├── __init__.py
    │   ├── init.py
    │   ├── vgg_adver_expansion_cluster.py
    │   └── test_module.py
    ├── head.py
    └── losses.py
├── utils
    ├── __init__.py
    ├── distributed_utils.py
    ├── log_helper.py
    ├── lr_helper.py
    ├── load_helper.py
    ├── coco_eval.py
    ├── anchor_helper.py
    ├── bbox_helper.py
    ├── cal_mAP.py
    └── visualize_helper.py
├── datasets
    ├── __init__.py
    ├── pycocotools
    │   ├── __init__.py
    │   ├── Makefile
    │   ├── setup.py
    │   ├── common
    │   │   ├── maskApi.h
    │   │   └── gason.h
    │   └── mask.py
    ├── target_dataset.py
    ├── example_loader.py
    ├── coco_loader.py
    └── example_dataset.py
├── functions
    ├── __init__.py
    ├── predict_bbox.py
    ├── rpn_proposal.py
    ├── proposal_assign.py
    └── anchor_target.py
├── extensions
    ├── _nms
    │   ├── __init__.py
    │   ├── _ext
    │   │   ├── __init__.py
    │   │   └── nms
    │   │   │   └── __init__.py
    │   ├── src
    │   │   ├── nms_cuda.h
    │   │   ├── nms.h
    │   │   ├── cuda
    │   │   │   ├── nms_kernel.h
    │   │   │   └── nms_kernel.cu
    │   │   ├── nms_cuda.c
    │   │   └── nms.c
    │   ├── build.sh
    │   ├── build.py
    │   └── pth_nms.py
    ├── _roi_align
    │   ├── __init__.py
    │   ├── _ext
    │   │   ├── __init__.py
    │   │   └── roi_align
    │   │   │   └── __init__.py
    │   ├── functions
    │   │   ├── __init__.py
    │   │   └── roi_align.py
    │   ├── modules
    │   │   ├── __init__.py
    │   │   └── roi_align.py
    │   ├── build.sh
    │   ├── src
    │   │   ├── roi_align_cuda.h
    │   │   ├── roi_align_kernel.h
    │   │   ├── roi_align_cuda.c
    │   │   └── roi_align_kernel.cu
    │   └── build.py
    ├── _bbox_helper
    │   ├── __init__.py
    │   ├── _ext
    │   │   ├── __init__.py
    │   │   └── bbox_helper
    │   │   │   └── __init__.py
    │   ├── src
    │   │   ├── bbox_helper.h
    │   │   ├── bbox_helper_cuda.h
    │   │   ├── bbox_helper.c
    │   │   ├── cuda
    │   │   │   ├── iou_overlap_kernel.h
    │   │   │   └── iou_overlap_kernel.cu
    │   │   └── bbox_helper_cuda.c
    │   ├── build.sh
    │   ├── bbox_helper.py
    │   └── build.py
    ├── _roi_pooling
    │   ├── __init__.py
    │   ├── _ext
    │   │   ├── __init__.py
    │   │   └── roi_pooling
    │   │   │   └── __init__.py
    │   ├── modules
    │   │   ├── __init__.py
    │   │   ├── roi_pool.py
    │   │   └── roi_pool_py.py
    │   ├── functions
    │   │   ├── __init__.py
    │   │   └── roi_pool.py
    │   ├── src
    │   │   ├── roi_pooling.h
    │   │   ├── roi_pooling_cuda.h
    │   │   ├── roi_pooling_kernel.h
    │   │   ├── roi_pooling_cuda.c
    │   │   └── roi_pooling.c
    │   ├── build.sh
    │   └── build.py
    ├── _focal_loss
    │   ├── _ext
    │   │   ├── __init__.py
    │   │   └── focal_loss
    │   │   │   └── __init__.py
    │   ├── build.sh
    │   ├── src
    │   │   ├── cuda
    │   │   │   ├── focal_loss_sigmoid_kernel.h
    │   │   │   ├── focal_loss_softmax_kernel.h
    │   │   │   ├── focal_loss_sigmoid_kernel.cu
    │   │   │   └── focal_loss_softmax_kernel.cu
    │   │   ├── focal_loss_cuda.h
    │   │   └── focal_loss_cuda.c
    │   ├── build.py
    │   └── focal_loss.py
    ├── _cython_bbox
    │   ├── build.sh
    │   ├── setup.py
    │   ├── cython_bbox.pyx
    │   └── cython_nms.pyx
    ├── __init__.py
    └── build_all.sh
├── img
    └── pipeline4.png
├── examples
    └── faster-rcnn
    │   └── cityscapes
    │       └── vgg
    │           ├── eval_single.sh
    │           ├── 2cluster.sh
    │           ├── 4cluster.sh
    │           ├── 8cluster.sh
    │           ├── eval.sh
    │           └── config_512.json
└── README.md


/models/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/functions/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/extensions/_nms/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/models/mask_rcnn/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/extensions/_nms/_ext/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/extensions/_roi_align/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/models/faster_rcnn/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/extensions/_bbox_helper/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/extensions/_roi_align/_ext/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/extensions/_roi_pooling/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/extensions/_bbox_helper/_ext/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/extensions/_focal_loss/_ext/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/extensions/_roi_align/functions/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/extensions/_roi_align/modules/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/extensions/_roi_pooling/_ext/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/extensions/_roi_pooling/modules/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/extensions/_roi_pooling/functions/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/datasets/pycocotools/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'tylin'
2 | 


--------------------------------------------------------------------------------
/img/pipeline4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/xinge008/SCDA/HEAD/img/pipeline4.png


--------------------------------------------------------------------------------
/extensions/_cython_bbox/build.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | python setup.py build_ext --inplace
3 | 


--------------------------------------------------------------------------------
/extensions/_nms/src/nms_cuda.h:
--------------------------------------------------------------------------------
1 | int gpu_nms(THLongTensor * keep_out, THLongTensor* num_out, THCudaTensor * boxes, float nms_overlap_thresh);


--------------------------------------------------------------------------------
/extensions/_bbox_helper/src/bbox_helper.h:
--------------------------------------------------------------------------------
1 | int cpu_iou_overlaps(THFloatTensor * bboxes1, THFloatTensor * bboxes2, THFloatTensor * output);
2 | 


--------------------------------------------------------------------------------
/extensions/_bbox_helper/src/bbox_helper_cuda.h:
--------------------------------------------------------------------------------
1 | int gpu_iou_overlaps(THCudaTensor * bboxes1, THCudaTensor * bboxes2, THCudaTensor * output);
2 | 


--------------------------------------------------------------------------------
/extensions/_nms/src/nms.h:
--------------------------------------------------------------------------------
1 | int cpu_nms(THLongTensor * keep_out, THLongTensor * num_out, THFloatTensor * boxes, THLongTensor * order, THFloatTensor * areas, float nms_overlap_thresh);


--------------------------------------------------------------------------------
/extensions/_nms/build.sh:
--------------------------------------------------------------------------------
1 | cd src/cuda
2 | echo "Compiling nms kernels by nvcc..."
3 | nvcc -c -o nms_kernel.cu.o nms_kernel.cu -x cu -Xcompiler -fPIC -arch=sm_50
4 | cd ../../
5 | python build.py
6 | 


--------------------------------------------------------------------------------
/extensions/_bbox_helper/build.sh:
--------------------------------------------------------------------------------
1 | cd src/cuda
2 | echo "Compiling nms kernels by nvcc..."
3 | nvcc -c -o iou_overlap_kernel.cu.o iou_overlap_kernel.cu -x cu -Xcompiler -fPIC -arch=sm_50
4 | cd ../../
5 | python build.py
6 | 


--------------------------------------------------------------------------------
/extensions/_roi_pooling/src/roi_pooling.h:
--------------------------------------------------------------------------------
1 | int roi_pooling_forward(int pooled_height, int pooled_width, float spatial_scale,
2 |                         THFloatTensor * features, THFloatTensor * rois, THFloatTensor * output);


--------------------------------------------------------------------------------
/extensions/__init__.py:
--------------------------------------------------------------------------------
1 | from extensions._nms.pth_nms import pth_nms as nms
2 | # from extensions._psroi_pooling.psroi_pool import PSRoIPool
3 | from extensions._roi_pooling.modules.roi_pool import _RoIPooling as RoIPool
4 | # from extensions._deformable_convolution.deformable_conv import *


--------------------------------------------------------------------------------
/extensions/_roi_align/build.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | CUDA_PATH=/usr/local/cuda/
 4 | 
 5 | cd src
 6 | echo "Compiling my_lib kernels by nvcc..."
 7 | nvcc -c -o roi_align_kernel.cu.o roi_align_kernel.cu -x cu -Xcompiler -fPIC -arch=sm_50
 8 | 
 9 | cd ../
10 | python build.py
11 | 


--------------------------------------------------------------------------------
/extensions/_roi_pooling/build.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | CUDA_PATH=/usr/local/cuda/
 4 | 
 5 | cd src
 6 | echo "Compiling my_lib kernels by nvcc..."
 7 | nvcc -c -o roi_pooling.cu.o roi_pooling_kernel.cu -x cu -Xcompiler -fPIC -arch=sm_50
 8 | 
 9 | cd ../
10 | python build.py
11 | 
12 | 


--------------------------------------------------------------------------------
/extensions/build_all.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | for file in ./*
 3 | do
 4 |     if test -d $file && test -f $file/build.sh
 5 |     then
 6 |         cd $file
 7 |         echo building $file
 8 |         bash build.sh
 9 |         if [ $? != 0 ]; then
10 |             exit
11 |         fi
12 |         cd ..
13 |     fi
14 | done
15 | 


--------------------------------------------------------------------------------
/datasets/pycocotools/Makefile:
--------------------------------------------------------------------------------
 1 | all:
 2 |     # install pycocotools locally
 3 | 	python setup.py build_ext --inplace
 4 | 	rm -rf build
 5 | 
 6 | install:
 7 | 	# install pycocotools to the Python site-packages
 8 | 	python setup.py build_ext install
 9 | 	rm -rf build
10 | clean:
11 | 	rm _mask.c _mask.cpython-36m-x86_64-linux-gnu.so
12 | 


--------------------------------------------------------------------------------
/extensions/_focal_loss/build.sh:
--------------------------------------------------------------------------------
1 | cd src/cuda
2 | echo "Compiling focal_loss kernels by nvcc..."
3 | nvcc -c -o focal_loss_sigmoid_kernel.cu.o focal_loss_sigmoid_kernel.cu -x cu -Xcompiler -fPIC -arch=sm_50
4 | nvcc -c -o focal_loss_softmax_kernel.cu.o focal_loss_softmax_kernel.cu -x cu -Xcompiler -fPIC -arch=sm_50
5 | cd ../../
6 | python build.py
7 | 


--------------------------------------------------------------------------------
/extensions/_bbox_helper/src/bbox_helper.c:
--------------------------------------------------------------------------------
 1 | #include <TH/TH.h>
 2 | #include <math.h>
 3 | 
 4 | int cpu_iou_overlaps(THFloatTensor * bboxes1, THFloatTensor * bboxes2, THFloatTensor * output){
 5 | 
 6 |     float * bboxes1_flat = THFloatTensor_data(bboxes1);
 7 |     float * bboxes2_flat = THFloatTensor_data(bboxes2);
 8 | 
 9 |     // TO BE IMPLEMENTED
10 | }
11 | 


--------------------------------------------------------------------------------
/extensions/_roi_align/src/roi_align_cuda.h:
--------------------------------------------------------------------------------
1 | int roi_align_forward_cuda(int aligned_height, int aligned_width, float spatial_scale,
2 |                         THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output);
3 | 
4 | int roi_align_backward_cuda(int aligned_height, int aligned_width, float spatial_scale,
5 |                         THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad);
6 | 


--------------------------------------------------------------------------------
/extensions/_nms/src/cuda/nms_kernel.h:
--------------------------------------------------------------------------------
 1 | #ifndef _NMS_KERNEL
 2 | #define _NMS_KERNEL
 3 | 
 4 | #ifdef __cplusplus
 5 | extern "C" {
 6 | #endif
 7 | 
 8 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0))
 9 | int const threadsPerBlock = sizeof(unsigned long long) * 8;
10 | 
11 | void _nms(int boxes_num, float * boxes_dev,
12 |           unsigned long long * mask_dev, float nms_overlap_thresh);
13 | 
14 | #ifdef __cplusplus
15 | }
16 | #endif
17 | 
18 | #endif
19 | 
20 | 


--------------------------------------------------------------------------------
/extensions/_roi_pooling/src/roi_pooling_cuda.h:
--------------------------------------------------------------------------------
1 | int roi_pooling_forward_cuda(int pooled_height, int pooled_width, float spatial_scale,
2 |                         THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output, THCudaIntTensor * argmax);
3 | 
4 | int roi_pooling_backward_cuda(int pooled_height, int pooled_width, float spatial_scale,
5 |                         THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad, THCudaIntTensor * argmax);


--------------------------------------------------------------------------------
/extensions/_bbox_helper/src/cuda/iou_overlap_kernel.h:
--------------------------------------------------------------------------------
 1 | #ifndef _IOU_OVERLAP_KERNEL
 2 | #define _IOU_OVERLAP_KERNEL
 3 | 
 4 | #ifdef __cplusplus
 5 | extern "C" {
 6 | #endif
 7 | 
 8 | int IOUOverlap(
 9 |     const float* bboxes1_data, const float* bboxes2_data, 
10 |     const int size_bbox,
11 |     const int num_bbox1,
12 |     const int num_bbox2,
13 |     float* top_data, 
14 |     cudaStream_t stream);
15 | 
16 | #ifdef __cplusplus
17 | }
18 | #endif
19 | 
20 | #endif
21 | 
22 | 


--------------------------------------------------------------------------------
/extensions/_nms/_ext/nms/__init__.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from torch.utils.ffi import _wrap_function
 3 | from ._nms import lib as _lib, ffi as _ffi
 4 | 
 5 | __all__ = []
 6 | def _import_symbols(locals):
 7 |     for symbol in dir(_lib):
 8 |         fn = getattr(_lib, symbol)
 9 |         if callable(fn):
10 |             locals[symbol] = _wrap_function(fn, _ffi)
11 |         else:
12 |             locals[symbol] = fn
13 |         __all__.append(symbol)
14 | 
15 | _import_symbols(locals())
16 | 


--------------------------------------------------------------------------------
/extensions/_roi_align/_ext/roi_align/__init__.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from torch.utils.ffi import _wrap_function
 3 | from ._roi_align import lib as _lib, ffi as _ffi
 4 | 
 5 | __all__ = []
 6 | def _import_symbols(locals):
 7 |     for symbol in dir(_lib):
 8 |         fn = getattr(_lib, symbol)
 9 |         if callable(fn):
10 |             locals[symbol] = _wrap_function(fn, _ffi)
11 |         else:
12 |             locals[symbol] = fn
13 |         __all__.append(symbol)
14 | 
15 | _import_symbols(locals())
16 | 


--------------------------------------------------------------------------------
/extensions/_bbox_helper/_ext/bbox_helper/__init__.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from torch.utils.ffi import _wrap_function
 3 | from ._bbox_helper import lib as _lib, ffi as _ffi
 4 | 
 5 | __all__ = []
 6 | def _import_symbols(locals):
 7 |     for symbol in dir(_lib):
 8 |         fn = getattr(_lib, symbol)
 9 |         if callable(fn):
10 |             locals[symbol] = _wrap_function(fn, _ffi)
11 |         else:
12 |             locals[symbol] = fn
13 |         __all__.append(symbol)
14 | 
15 | _import_symbols(locals())
16 | 


--------------------------------------------------------------------------------
/extensions/_focal_loss/_ext/focal_loss/__init__.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from torch.utils.ffi import _wrap_function
 3 | from ._focal_loss import lib as _lib, ffi as _ffi
 4 | 
 5 | __all__ = []
 6 | def _import_symbols(locals):
 7 |     for symbol in dir(_lib):
 8 |         fn = getattr(_lib, symbol)
 9 |         if callable(fn):
10 |             locals[symbol] = _wrap_function(fn, _ffi)
11 |         else:
12 |             locals[symbol] = fn
13 |         __all__.append(symbol)
14 | 
15 | _import_symbols(locals())
16 | 


--------------------------------------------------------------------------------
/extensions/_roi_pooling/_ext/roi_pooling/__init__.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from torch.utils.ffi import _wrap_function
 3 | from ._roi_pooling import lib as _lib, ffi as _ffi
 4 | 
 5 | __all__ = []
 6 | def _import_symbols(locals):
 7 |     for symbol in dir(_lib):
 8 |         fn = getattr(_lib, symbol)
 9 |         if callable(fn):
10 |             locals[symbol] = _wrap_function(fn, _ffi)
11 |         else:
12 |             locals[symbol] = fn
13 |         __all__.append(symbol)
14 | 
15 | _import_symbols(locals())
16 | 


--------------------------------------------------------------------------------
/models/faster_rcnn/init.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # @Time    : 18-4-19
 3 | # @Author  : Xinge
 4 | import torch.nn.init as init
 5 | import numpy as np
 6 | 
 7 | 
 8 | def gaussian_weights_init(m):
 9 |     classname = m.__class__.__name__
10 |     if classname.find('Conv') != -1 and classname.find('Conv') == 0:
11 |         # print m.__class__.__name__
12 |         m.weight.data.normal_(0.0, 0.02)
13 | 
14 | def xavier_weights_init(m):
15 |     classname = m.__class__.__name__
16 |     if classname.find('Conv') != -1:
17 |         init.xavier_uniform(m.weight, gain=np.sqrt(2))
18 |         init.constant(m.bias, 0.1)
19 | 
20 | 


--------------------------------------------------------------------------------
/extensions/_bbox_helper/bbox_helper.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from extensions._bbox_helper._ext import bbox_helper
 3 | import numpy as np
 4 | 
 5 | def overlap(bboxes1, bboxes2):
 6 |     # bboxes1, bboxes2 has to be a tensor
 7 |     # bboxes1  [N, 4]: x1, y1, x2, y2
 8 |     # bboxes2  [M, 4]: x1, y1, x2, y2
 9 |     bboxes1 = torch.from_numpy(bboxes1[:, :4]).float().cuda().contiguous()
10 |     bboxes2 = torch.from_numpy(bboxes2[:, :4]).float().cuda().contiguous()
11 | 
12 |     output = torch.cuda.FloatTensor(bboxes1.shape[0], bboxes2.shape[0])
13 |     bbox_helper.gpu_iou_overlaps(bboxes1, bboxes2, output)
14 | 
15 |     return output.cpu().numpy()
16 | 
17 | 


--------------------------------------------------------------------------------
/extensions/_roi_pooling/modules/roi_pool.py:
--------------------------------------------------------------------------------
 1 | from torch.nn.modules.module import Module
 2 | from ..functions.roi_pool import RoIPoolFunction
 3 | 
 4 | 
 5 | class _RoIPooling(Module):
 6 |     def __init__(self, pooled_height, pooled_width, spatial_scale):
 7 |         super(_RoIPooling, self).__init__()
 8 | 
 9 |         self.pooled_width = int(pooled_width)
10 |         self.pooled_height = int(pooled_height)
11 |         self.spatial_scale = float(spatial_scale)
12 | 
13 |     def forward(self, features, rois):
14 |         assert(rois.shape[1] == 5)
15 |         return RoIPoolFunction(self.pooled_height, self.pooled_width, self.spatial_scale)(features, rois)
16 | 


--------------------------------------------------------------------------------
/extensions/_focal_loss/src/cuda/focal_loss_sigmoid_kernel.h:
--------------------------------------------------------------------------------
 1 | #ifndef _FOCAL_LOSS_SIGMOID_KERNEL
 2 | #define _FOCAL_LOSS_SIGMOID_KERNEL
 3 | 
 4 | #ifdef __cplusplus
 5 | extern "C" {
 6 | #endif
 7 | 
 8 | int SigmoidFocalLossForwardLaucher(
 9 |     const int N, const float* logits,
10 |     const int* targets, const float weight_pos,
11 |     const float gamma, const float alpha,
12 |     const int num_classes, float* losses, cudaStream_t stream);
13 | 
14 | int SigmoidFocalLossBackwardLaucher(
15 |     const int N, const float* logits, 
16 |     const int* targets, float* dX_data, const float weight_pos,
17 |     const float gamma, const float alpha, const int num_classes,
18 |     cudaStream_t stream);
19 | 
20 | #ifdef __cplusplus
21 | }
22 | #endif
23 | 
24 | #endif
25 | 


--------------------------------------------------------------------------------
/extensions/_focal_loss/src/cuda/focal_loss_softmax_kernel.h:
--------------------------------------------------------------------------------
 1 | #ifndef _FOCAL_LOSS_SOFTMAX_KERNEL
 2 | #define _FOCAL_LOSS_SOFTMAX_KERNEL
 3 | 
 4 | #ifdef __cplusplus
 5 | extern "C" {
 6 | #endif
 7 | 
 8 | int SoftmaxFocalLossForwardLaucher(
 9 |     const int N, const float* logits,
10 |     const int* targets, const float weight_pos,
11 |     const float gamma, const float alpha,
12 |     const int num_classes, float* losses,
13 |     float* priors, cudaStream_t stream);
14 | 
15 | int SoftmaxFocalLossBackwardLaucher(
16 |     const int N, const float* logits, 
17 |     const int* targets, float* dX_data, const float weight_pos,
18 |     const float gamma, const float alpha, const int num_classes,
19 |     const float* priors, float* buff, cudaStream_t stream);
20 | 
21 | #ifdef __cplusplus
22 | }
23 | #endif
24 | 
25 | #endif
26 | 


--------------------------------------------------------------------------------
/datasets/pycocotools/setup.py:
--------------------------------------------------------------------------------
 1 | from distutils.core import setup
 2 | from Cython.Build import cythonize
 3 | from distutils.extension import Extension
 4 | import numpy as np
 5 | 
 6 | # To compile and install locally run "python setup.py build_ext --inplace"
 7 | # To install library to Python site-packages run "python setup.py build_ext install"
 8 | 
 9 | ext_modules = [
10 |     Extension(
11 |         '_mask',
12 |         sources=['common/maskApi.c', '_mask.pyx'],
13 |         include_dirs = [np.get_include(), 'common'],
14 |         extra_compile_args=['-Wno-cpp', '-Wno-unused-function', '-std=c99'],
15 |     )
16 | ]
17 | 
18 | setup(name='pycocotools',
19 |       packages=['pycocotools'],
20 |       package_dir = {'pycocotools': '.'},
21 |       version='2.0',
22 |       ext_modules=
23 |           cythonize(ext_modules)
24 |       )
25 | 


--------------------------------------------------------------------------------
/extensions/_roi_pooling/src/roi_pooling_kernel.h:
--------------------------------------------------------------------------------
 1 | #ifndef _ROI_POOLING_KERNEL
 2 | #define _ROI_POOLING_KERNEL
 3 | 
 4 | #ifdef __cplusplus
 5 | extern "C" {
 6 | #endif
 7 | 
 8 | int ROIPoolForwardLaucher(
 9 |     const float* bottom_data, const float spatial_scale, const int num_rois, const int height,
10 |     const int width, const int channels, const int pooled_height,
11 |     const int pooled_width, const float* bottom_rois,
12 |     float* top_data, int* argmax_data, cudaStream_t stream);
13 | 
14 | 
15 | int ROIPoolBackwardLaucher(const float* top_diff, const float spatial_scale, const int batch_size, const int num_rois,
16 |     const int height, const int width, const int channels, const int pooled_height,
17 |     const int pooled_width, const float* bottom_rois,
18 |     float* bottom_diff, const int* argmax_data, cudaStream_t stream);
19 | 
20 | #ifdef __cplusplus
21 | }
22 | #endif
23 | 
24 | #endif
25 | 
26 | 


--------------------------------------------------------------------------------
/extensions/_nms/build.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import torch
 3 | from torch.utils.ffi import create_extension
 4 | 
 5 | 
 6 | sources = ['src/nms.c']
 7 | headers = ['src/nms.h']
 8 | defines = []
 9 | with_cuda = False
10 | 
11 | if torch.cuda.is_available():
12 |     print('Including CUDA code.')
13 |     sources += ['src/nms_cuda.c']
14 |     headers += ['src/nms_cuda.h']
15 |     defines += [('WITH_CUDA', None)]
16 |     with_cuda = True
17 | 
18 | this_file = os.path.dirname(os.path.realpath(__file__))
19 | print(this_file)
20 | extra_objects = ['src/cuda/nms_kernel.cu.o']
21 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects]
22 | 
23 | ffi = create_extension(
24 |     '_ext.nms',
25 |     headers=headers,
26 |     sources=sources,
27 |     define_macros=defines,
28 |     relative_to=__file__,
29 |     with_cuda=with_cuda,
30 |     extra_objects=extra_objects
31 | )
32 | 
33 | if __name__ == '__main__':
34 |     ffi.build()
35 | 


--------------------------------------------------------------------------------
/extensions/_roi_pooling/build.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import torch
 3 | from torch.utils.ffi import create_extension
 4 | 
 5 | 
 6 | sources = ['src/roi_pooling.c']
 7 | headers = ['src/roi_pooling.h']
 8 | defines = []
 9 | with_cuda = False
10 | 
11 | if torch.cuda.is_available():
12 |     print('Including CUDA code.')
13 |     sources += ['src/roi_pooling_cuda.c']
14 |     headers += ['src/roi_pooling_cuda.h']
15 |     defines += [('WITH_CUDA', None)]
16 |     with_cuda = True
17 | 
18 | this_file = os.path.dirname(os.path.realpath(__file__))
19 | print(this_file)
20 | extra_objects = ['src/roi_pooling.cu.o']
21 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects]
22 | 
23 | ffi = create_extension(
24 |     '_ext.roi_pooling',
25 |     headers=headers,
26 |     sources=sources,
27 |     define_macros=defines,
28 |     relative_to=__file__,
29 |     with_cuda=with_cuda,
30 |     extra_objects=extra_objects
31 | )
32 | 
33 | if __name__ == '__main__':
34 |     ffi.build()
35 | 


--------------------------------------------------------------------------------
/extensions/_bbox_helper/build.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import torch
 3 | from torch.utils.ffi import create_extension
 4 | 
 5 | 
 6 | sources = ['src/bbox_helper.c']
 7 | headers = ['src/bbox_helper.h']
 8 | defines = []
 9 | with_cuda = False
10 | 
11 | if torch.cuda.is_available():
12 |     print('Including CUDA code.')
13 |     sources += ['src/bbox_helper_cuda.c']
14 |     headers += ['src/bbox_helper_cuda.h']
15 |     defines += [('WITH_CUDA', None)]
16 |     with_cuda = True
17 | 
18 | this_file = os.path.dirname(os.path.realpath(__file__))
19 | print(this_file)
20 | extra_objects = ['src/cuda/iou_overlap_kernel.cu.o']
21 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects]
22 | 
23 | ffi = create_extension(
24 |     '_ext.bbox_helper',
25 |     headers=headers,
26 |     sources=sources,
27 |     define_macros=defines,
28 |     relative_to=__file__,
29 |     with_cuda=with_cuda,
30 |     extra_objects=extra_objects
31 | )
32 | 
33 | if __name__ == '__main__':
34 |     ffi.build()
35 | 


--------------------------------------------------------------------------------
/extensions/_roi_align/build.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import torch
 3 | from torch.utils.ffi import create_extension
 4 | 
 5 | # sources = ['src/roi_align.c']
 6 | # headers = ['src/roi_align.h']
 7 | sources = []
 8 | headers = []
 9 | defines = []
10 | with_cuda = False
11 | 
12 | if torch.cuda.is_available():
13 |     print('Including CUDA code.')
14 |     sources += ['src/roi_align_cuda.c']
15 |     headers += ['src/roi_align_cuda.h']
16 |     defines += [('WITH_CUDA', None)]
17 |     with_cuda = True
18 | 
19 | this_file = os.path.dirname(os.path.realpath(__file__))
20 | print(this_file)
21 | extra_objects = ['src/roi_align_kernel.cu.o']
22 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects]
23 | 
24 | ffi = create_extension(
25 |     '_ext.roi_align',
26 |     headers=headers,
27 |     sources=sources,
28 |     define_macros=defines,
29 |     relative_to=__file__,
30 |     with_cuda=with_cuda,
31 |     extra_objects=extra_objects
32 | )
33 | 
34 | if __name__ == '__main__':
35 |     ffi.build()
36 | 


--------------------------------------------------------------------------------
/extensions/_focal_loss/build.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import torch
 3 | from torch.utils.ffi import create_extension
 4 | 
 5 | 
 6 | sources = []
 7 | headers = []
 8 | defines = []
 9 | with_cuda = False
10 | 
11 | if torch.cuda.is_available():
12 |     print('Including CUDA code.')
13 |     sources += ['src/focal_loss_cuda.c']
14 |     headers += ['src/focal_loss_cuda.h']
15 |     defines += [('WITH_CUDA', None)]
16 |     with_cuda = True
17 | 
18 | this_file = os.path.dirname(os.path.realpath(__file__))
19 | print(this_file)
20 | extra_objects  = ['src/cuda/focal_loss_sigmoid_kernel.cu.o', 'src/cuda/focal_loss_softmax_kernel.cu.o']
21 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects]
22 | print('extra_objects {0}'.format(extra_objects))
23 | 
24 | ffi = create_extension(
25 |     '_ext.focal_loss',
26 |     headers=headers,
27 |     sources=sources,
28 |     define_macros=defines,
29 |     relative_to=__file__,
30 |     with_cuda=with_cuda,
31 |     extra_objects=extra_objects
32 | )
33 | 
34 | if __name__ == '__main__':
35 |     ffi.build()
36 | 


--------------------------------------------------------------------------------
/examples/faster-rcnn/cityscapes/vgg/eval_single.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | ROOT=../../../..
 3 | export PYTHONPATH=$ROOT:$PYTHONPATH
 4 | #--------------------------
 5 | job_name=Test
 6 | ckdir=4cluster
 7 | mkdir -p ./${ckdir}/${job_name}
 8 | #--------------------------
 9 | 
10 | python -u $ROOT/tools/faster_rcnn_train_val.py \
11 |   --config=config_512.json \
12 |   --dist=0 \
13 |   --fix_num=3 \
14 |   --L1=1 \
15 |   -e \
16 |   --cluster_num=4 \
17 |   --threshold=128 \
18 |   --recon_size=256 \
19 |   --port=21603 \
20 |   --arch=vgg16_FasterRCNN \
21 |   --warmup_epochs=1 \
22 |   --lr=0.0000125 \
23 |   --step_epochs=16,22 \
24 |   --batch-size=1 \
25 |   --epochs=25 \
26 |   --dataset=cityscapes \
27 |   --resume=/path/to/checkpoint.pth \
28 |   --train_meta_file=/path/to/train.txt \
29 |   --target_meta_file=/path/to/foggy_train.txt \
30 |   --val_meta_file=/path/to/foggy_val.txt \
31 |   --datadir=/path/to/leftImg8bit/ \
32 |   --pretrained=/path/to/torchvision_models/vgg16-397923af.pth \
33 |   --results_dir=${ckdir}/${job_name}/results_dir \
34 |   --save_dir=${ckdir}/${job_name} \
35 |   2>&1 | tee ${ckdir}/${job_name}/train.log
36 | 


--------------------------------------------------------------------------------
/models/head.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | 
 3 | class NaiveRpnHead(nn.Module):
 4 |     def __init__(self, inplanes, num_classes, num_anchors):
 5 |         '''
 6 |         Args:
 7 |             inplanes: input channel
 8 |             num_classes: as the name implies
 9 |             num_anchors: as the name implies
10 |         '''
11 |         super(NaiveRpnHead, self).__init__()
12 |         self.num_anchors, self.num_classes = num_anchors, num_classes
13 |         self.conv3x3 = nn.Conv2d(inplanes, 512, kernel_size=3, stride=1, padding=1)
14 |         self.relu3x3 = nn.ReLU(inplace=True)
15 |         self.conv_cls = nn.Conv2d(
16 |             512, num_anchors * num_classes, kernel_size=1, stride=1)
17 |         self.conv_loc = nn.Conv2d(
18 |             512, num_anchors * 4, kernel_size=1, stride=1)
19 | 
20 |     def forward(self, x):
21 |         '''
22 |         Args:
23 |             x: [B, inplanes, h, w], input feature
24 |         Return:
25 |             pred_cls: [B, num_anchors, h, w]
26 |             pred_loc: [B, num_anchors*4, h, w]
27 |         '''
28 |         x = self.conv3x3(x)
29 |         x = self.relu3x3(x)
30 |         pred_cls = self.conv_cls(x)
31 |         pred_loc = self.conv_loc(x)
32 |         return pred_cls, pred_loc
33 | 


--------------------------------------------------------------------------------
/examples/faster-rcnn/cityscapes/vgg/2cluster.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | ROOT=../../../..
 3 | export PYTHONPATH=$ROOT:$PYTHONPATH
 4 | #--------------------------
 5 | job_name=training_2cluster
 6 | ckdir=2cluster
 7 | mkdir -p ./${ckdir}/${job_name}
 8 | #--------------------------
 9 | PARTITION=$1
10 | GPUS=${5:-8}
11 | GPUS_PER_NODE=${GPUS_PER_NODE:-8}
12 | 
13 | srun -p ${PARTITION} --ntasks=${GPUS} --gres=gpu:${GPUS_PER_NODE} \
14 | 		--ntasks-per-node=${GPUS_PER_NODE} \
15 |         --job-name=${job_name} \
16 | python -u -W ignore $ROOT/tools/faster_rcnn_train_val.py \
17 |   --config=config_512.json \
18 |   --dist=1 \
19 |   --fix_num=0 \
20 |   --L1=1 \
21 |   --cluster_num=2 \
22 |   --threshold=256 \
23 |   --recon_size=512 \
24 |   --port=21603 \
25 |   --arch=vgg16_FasterRCNN \
26 |   --warmup_epochs=1 \
27 |   --lr=0.0000125 \
28 |   --step_epochs=16,22 \
29 |   --batch-size=1 \
30 |   --epochs=25 \
31 |   --dataset=cityscapes \
32 |   --train_meta_file=/path/to/train.txt \
33 |   --target_meta_file=/path/to/foggy_train.txt \
34 |   --val_meta_file=/path/to/foggy_val.txt \
35 |   --datadir=/path/to/leftImg8bit/ \
36 |   --pretrained=/path/to/torchvision_models/vgg16-397923af.pth \
37 |   --results_dir=${ckdir}/${job_name}/results_dir \
38 |   --save_dir=${ckdir}/${job_name} \
39 |   2>&1 | tee ${ckdir}/${job_name}/train.log
40 | 


--------------------------------------------------------------------------------
/examples/faster-rcnn/cityscapes/vgg/4cluster.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | ROOT=../../../..
 3 | export PYTHONPATH=$ROOT:$PYTHONPATH
 4 | #--------------------------
 5 | job_name=training_4cluster
 6 | ckdir=4cluster
 7 | mkdir -p ./${ckdir}/${job_name}
 8 | #--------------------------
 9 | PARTITION=$1
10 | GPUS=${5:-8}
11 | GPUS_PER_NODE=${GPUS_PER_NODE:-8}
12 | 
13 | srun -p ${PARTITION} --ntasks=${GPUS} --gres=gpu:${GPUS_PER_NODE} \
14 | 		--ntasks-per-node=${GPUS_PER_NODE} \
15 |         --job-name=${job_name} \
16 | python -u -W ignore $ROOT/tools/faster_rcnn_train_val.py \
17 |   --config=config_512.json \
18 |   --dist=1 \
19 |   --fix_num=0 \
20 |   --L1=1 \
21 |   --cluster_num=4 \
22 |   --threshold=128 \
23 |   --recon_size=256 \
24 |   --port=21603 \
25 |   --arch=vgg16_FasterRCNN \
26 |   --warmup_epochs=1 \
27 |   --lr=0.0000125 \
28 |   --step_epochs=16,22 \
29 |   --batch-size=1 \
30 |   --epochs=25 \
31 |   --dataset=cityscapes \
32 |   --train_meta_file=/path/to/train.txt \
33 |   --target_meta_file=/path/to/foggy_train.txt \
34 |   --val_meta_file=/path/to/foggy_val.txt \
35 |   --datadir=/path/to/leftImg8bit/ \
36 |   --pretrained=/path/to/torchvision_models/vgg16-397923af.pth \
37 |   --results_dir=${ckdir}/${job_name}/results_dir \
38 |   --save_dir=${ckdir}/${job_name} \
39 |   2>&1 | tee ${ckdir}/${job_name}/train.log
40 | 
41 | 


--------------------------------------------------------------------------------
/examples/faster-rcnn/cityscapes/vgg/8cluster.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | ROOT=../../../..
 3 | export PYTHONPATH=$ROOT:$PYTHONPATH
 4 | #--------------------------
 5 | job_name=training_8cluster
 6 | ckdir=8cluster
 7 | mkdir -p ./${ckdir}/${job_name}
 8 | #--------------------------
 9 | PARTITION=$1
10 | GPUS=${5:-8}
11 | GPUS_PER_NODE=${GPUS_PER_NODE:-8}
12 | 
13 | srun -p ${PARTITION} --ntasks=${GPUS} --gres=gpu:${GPUS_PER_NODE} \
14 | 		--ntasks-per-node=${GPUS_PER_NODE} \
15 |         --job-name=${job_name} \
16 | python -u -W ignore $ROOT/tools/faster_rcnn_train_val.py \
17 |   --config=config_512.json \
18 |   --dist=1 \
19 |   --fix_num=0 \
20 |   --L1=1 \
21 |   --cluster_num=8 \
22 |   --threshold=64 \
23 |   --recon_size=128 \
24 |   --port=21603 \
25 |   --arch=vgg16_FasterRCNN \
26 |   --warmup_epochs=1 \
27 |   --lr=0.0000125 \
28 |   --step_epochs=16,22 \
29 |   --batch-size=1 \
30 |   --epochs=25 \
31 |   --dataset=cityscapes \
32 |   --train_meta_file=/path/to/train.txt \
33 |   --target_meta_file=/path/to/foggy_train.txt \
34 |   --val_meta_file=/path/to/foggy_val.txt \
35 |   --datadir=/path/to/leftImg8bit/ \
36 |   --pretrained=/path/to/torchvision_models/vgg16-397923af.pth \
37 |   --results_dir=${ckdir}/${job_name}/results_dir \
38 |   --save_dir=${ckdir}/${job_name} \
39 |   2>&1 | tee ${ckdir}/${job_name}/train.log
40 | 
41 | 


--------------------------------------------------------------------------------
/examples/faster-rcnn/cityscapes/vgg/eval.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | ROOT=../../../..
 3 | export PYTHONPATH=$ROOT:$PYTHONPATH
 4 | #--------------------------
 5 | job_name=Test
 6 | ckdir=4cluster
 7 | mkdir -p ./${ckdir}/${job_name}
 8 | #--------------------------
 9 | PARTITION=$1
10 | GPUS=${5:-8}
11 | GPUS_PER_NODE=${GPUS_PER_NODE:-8}
12 | 
13 | srun -p ${PARTITION} --ntasks=${GPUS} --gres=gpu:${GPUS_PER_NODE} \
14 | 		--ntasks-per-node=${GPUS_PER_NODE} \
15 |         --job-name=${job_name} \
16 | python -u $ROOT/tools/faster_rcnn_train_val.py \
17 |   --config=config_512.json \
18 |   --dist=1 \
19 |   --fix_num=3 \
20 |   --L1=1 \
21 |   -e \
22 |   --cluster_num=4 \
23 |   --threshold=128 \
24 |   --recon_size=256 \
25 |   --port=21603 \
26 |   --arch=vgg16_FasterRCNN \
27 |   --warmup_epochs=1 \
28 |   --lr=0.0000125 \
29 |   --step_epochs=16,22 \
30 |   --batch-size=1 \
31 |   --epochs=25 \
32 |   --dataset=cityscapes \
33 |   --resume=/path/to/checkpoint.pth \
34 |   --train_meta_file=/path/to/train.txt \
35 |   --target_meta_file=/path/to/foggy_train.txt \
36 |   --val_meta_file=/path/to/foggy_val.txt \
37 |   --datadir=/path/to/leftImg8bit/ \
38 |   --pretrained=/path/to/torchvision_models/vgg16-397923af.pth \
39 |   --results_dir=${ckdir}/${job_name}/results_dir \
40 |   --save_dir=${ckdir}/${job_name} \
41 |   2>&1 | tee ${ckdir}/${job_name}/train.log
42 | 


--------------------------------------------------------------------------------
/extensions/_roi_align/src/roi_align_kernel.h:
--------------------------------------------------------------------------------
 1 | #ifndef _ROI_ALIGN_KERNEL
 2 | #define _ROI_ALIGN_KERNEL
 3 | 
 4 | #ifdef __cplusplus
 5 | extern "C" {
 6 | #endif
 7 | 
 8 | __global__ void ROIAlignForward(const int nthreads, const float* bottom_data,
 9 |     const float spatial_scale, const int height, const int width,
10 |     const int channels, const int aligned_height, const int aligned_width,
11 |     const float* bottom_rois, float* top_data);
12 | 
13 | int ROIAlignForwardLaucher(
14 |     const float* bottom_data, const float spatial_scale, const int num_rois, const int height,
15 |     const int width, const int channels, const int aligned_height,
16 |     const int aligned_width, const float* bottom_rois,
17 |     float* top_data, cudaStream_t stream);
18 | 
19 | __global__ void ROIAlignBackward(const int nthreads, const float* top_diff,
20 |     const float spatial_scale, const int height, const int width,
21 |     const int channels, const int aligned_height, const int aligned_width,
22 |     float* bottom_diff, const float* bottom_rois);
23 | 
24 | int ROIAlignBackwardLaucher(const float* top_diff, const float spatial_scale, const int batch_size, const int num_rois,
25 |     const int height, const int width, const int channels, const int aligned_height,
26 |     const int aligned_width, const float* bottom_rois,
27 |     float* bottom_diff, cudaStream_t stream);
28 | 
29 | #ifdef __cplusplus
30 | }
31 | #endif
32 | 
33 | #endif
34 | 
35 | 


--------------------------------------------------------------------------------
/extensions/_nms/pth_nms.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from extensions._nms._ext import nms
 3 | import numpy as np
 4 | 
 5 | def pth_nms(dets, thresh):
 6 |   #"""
 7 |   #dets has to be a tensor
 8 |   #"""
 9 |   #if not dets.is_cuda:
10 |   #  x1 = dets[:, 0]
11 |   #  y1 = dets[:, 1]
12 |   #  x2 = dets[:, 2]
13 |   #  y2 = dets[:, 3]
14 |   #  scores = dets[:, 4]
15 | 
16 |   #  areas = (x2 - x1 + 1) * (y2 - y1 + 1)
17 |   #  order = scores.sort(0, descending=True)[1]
18 |   #  # order = torch.from_numpy(np.ascontiguousarray(scores.numpy().argsort()[::-1])).long()
19 | 
20 |   #  keep = torch.LongTensor(dets.size(0))
21 |   #  num_out = torch.LongTensor(1)
22 |   #  nms.cpu_nms(keep, num_out, dets, order, areas, thresh)
23 | 
24 |   #  return keep[:num_out[0]]
25 |   #else:
26 | 
27 |     #x1 = dets[:, 0]
28 |     #y1 = dets[:, 1]
29 |     #x2 = dets[:, 2]
30 |     #y2 = dets[:, 3]
31 |     # scores = dets[:, 4].cuda().contiguous()
32 |     dets = dets.cuda().contiguous()
33 | 
34 | 
35 |     #areas = (x2 - x1 + 1) * (y2 - y1 + 1)
36 |     # order = scores.sort(0, descending=True)[1][:6000]
37 |     # order = torch.from_numpy(np.ascontiguousarray(scores.cpu().numpy().argsort()[::-1])).long().cuda()
38 | 
39 |     # dets = dets[order].contiguous()
40 | 
41 |     keep = torch.LongTensor(dets.size(0))
42 |     num_out = torch.LongTensor(1)
43 |     # keep = torch.cuda.LongTensor(dets.size(0))
44 |     # num_out = torch.cuda.LongTensor(1)
45 |     nms.gpu_nms(keep, num_out, dets.float(), thresh)
46 | 
47 |     return keep[:num_out[0]].cpu().contiguous()
48 |     # return order[keep[:num_out[0]]].contiguous()
49 | 
50 | 


--------------------------------------------------------------------------------
/extensions/_bbox_helper/src/bbox_helper_cuda.c:
--------------------------------------------------------------------------------
 1 | // ------------------------------------------------------------------
 2 | // Faster R-CNN
 3 | // Copyright (c) 2015 Microsoft
 4 | // Licensed under The MIT License [see fast-rcnn/LICENSE for details]
 5 | // Written by Shaoqing Ren
 6 | // ------------------------------------------------------------------
 7 | #include <THC/THC.h>
 8 | #include <TH/TH.h>
 9 | #include <math.h>
10 | #include <assert.h>
11 | #include <stdio.h>
12 | 
13 | #include "cuda/iou_overlap_kernel.h"
14 | 
15 | 
16 | extern THCState *state;
17 | 
18 | int gpu_iou_overlaps(THCudaTensor * bboxes1, THCudaTensor * bboxes2, THCudaTensor * output){
19 |     // Grad the input tensor
20 |     float * bboxes1_data = THCudaTensor_data(state, bboxes1);
21 |     float * bboxes2_data = THCudaTensor_data(state, bboxes2);
22 |     float * output_data = THCudaTensor_data(state, output);
23 | 
24 |     // Number of boxes
25 |     int num_bbox1 = THCudaTensor_size(state, bboxes1, 0);
26 |     int num_bbox2 = THCudaTensor_size(state, bboxes2, 0);
27 |     int size_bbox1 = THCudaTensor_size(state, bboxes1, 1);
28 |     int size_bbox2 = THCudaTensor_size(state, bboxes2, 1);
29 |     
30 |     assert(size_bbox1 == 4);
31 |     assert(size_bbox2 == 4);
32 |     if(size_bbox1 != 4 || size_bbox2 != 4){
33 |         exit(1);
34 |         return 0;
35 |     }
36 |  
37 |     cudaStream_t stream = THCState_getCurrentStream(state);
38 |     IOUOverlap(
39 |                bboxes1_data,
40 |                bboxes2_data,
41 |                size_bbox1,
42 |                num_bbox1,
43 |                num_bbox2,
44 |                output_data,
45 |                stream);
46 |   return 1;
47 | }
48 | 


--------------------------------------------------------------------------------
/examples/faster-rcnn/cityscapes/vgg/config_512.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "shared": {
 3 |         "gan_model_flag": 2,
 4 |         "scales": [512],
 5 |         "max_size": 1024,
 6 |         "anchor_scales": [2, 4, 8, 16, 32],
 7 |         "anchor_ratios": [0.5, 1, 2],
 8 |         "anchor_stride": 16,
 9 |         "bbox_normalize_stats_precomputed": true,
10 |         "bbox_normalize_stds": [0.1, 0.1, 0.2, 0.2],
11 |         "bbox_normalize_means": [0, 0, 0, 0],
12 |         "num_classes": 9,
13 |         "class_names":[
14 |             "__background__",
15 |             "person", "rider", "car", "truck",
16 |             "bus", "train", "motorcycle", "bicycle"],
17 | 	    "roi_align": false
18 |     },
19 |     "train_anchor_target_cfg": {
20 |         "rpn_batch_size": 256,
21 |         "nms_iou_thresh": 0.7,
22 |         "positive_iou_thresh": 0.7,
23 |         "negative_iou_thresh": 0.3,
24 |         "positive_percent": 0.5,
25 |         "ignore_iou_thresh": 0.5
26 |     },
27 |     "train_rpn_proposal_cfg": {
28 |         "nms_iou_thresh": 0.7,
29 |         "pre_nms_top_n": 12000,
30 |         "post_nms_top_n": 2000,
31 |         "roi_min_size": 2
32 |     },
33 |     "train_proposal_target_cfg": {
34 |         "batch_size": 512,
35 |         "positive_iou_thresh": 0.5,
36 |         "negative_iou_thresh_hi": 0.5,
37 |         "negative_iou_thresh_lo": 0.0,
38 |         "ignore_iou_thresh": 0.5,
39 |         "positive_percent": 0.25,
40 |         "append_gts": true
41 |     },
42 |     "test_rpn_proposal_cfg": {
43 |         "nms_iou_thresh": 0.7,
44 |         "pre_nms_top_n": 6000,
45 |         "post_nms_top_n": 300,
46 |         "roi_min_size": 2
47 |     },
48 |     "test_predict_bbox_cfg": {
49 |         "nms_iou_thresh": 0.5,
50 |         "score_thresh": 0.00,
51 |         "top_n": 100
52 |     }
53 | }
54 | 


--------------------------------------------------------------------------------
/extensions/_cython_bbox/setup.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2017-present, Facebook, Inc.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | ##############################################################################
15 | 
16 | from __future__ import absolute_import
17 | from __future__ import division
18 | from __future__ import print_function
19 | 
20 | from Cython.Build import cythonize
21 | from setuptools import Extension
22 | from setuptools import setup
23 | 
24 | import numpy as np
25 | 
26 | _NP_INCLUDE_DIRS = np.get_include()
27 | 
28 | 
29 | # Extension modules
30 | ext_modules = [
31 |     Extension(
32 |         name='cython_bbox',
33 |         sources=[
34 |             'cython_bbox.pyx'
35 |         ],
36 |         extra_compile_args=[
37 |             '-Wno-cpp'
38 |         ],
39 |         include_dirs=[
40 |             _NP_INCLUDE_DIRS
41 |         ]
42 |     ),
43 |     Extension(
44 |         name='cython_nms',
45 |         sources=[
46 |             'cython_nms.pyx'
47 |         ],
48 |         extra_compile_args=[
49 |             '-Wno-cpp'
50 |         ],
51 |         include_dirs=[
52 |             _NP_INCLUDE_DIRS
53 |         ]
54 |     )
55 | ]
56 | 
57 | setup(
58 |     name='Detectron',
59 |     ext_modules=cythonize(ext_modules)
60 | )
61 | 


--------------------------------------------------------------------------------
/extensions/_focal_loss/src/focal_loss_cuda.h:
--------------------------------------------------------------------------------
 1 | 
 2 | int focal_loss_sigmoid_forward_cuda(
 3 |                            int N,
 4 |                            THCudaTensor * logits,
 5 |                            THCudaIntTensor * targets,
 6 |                            float weight_pos,
 7 |                            float gamma, 
 8 |                            float alpha,
 9 |                            int num_classes,
10 |                            THCudaTensor * losses);
11 | 
12 | int focal_loss_sigmoid_backward_cuda(
13 |                            int N,
14 |                            THCudaTensor * logits,
15 |                            THCudaIntTensor * targets,
16 |                            THCudaTensor * dX_data,
17 |                            float weight_pos,
18 |                            float gamma,
19 |                            float alpha,
20 |                            int num_classes);
21 | 
22 | int focal_loss_softmax_forward_cuda(
23 |                            int N,
24 |                            THCudaTensor * logits,
25 |                            THCudaIntTensor * targets,
26 |                            float weight_pos,
27 |                            float gamma, 
28 |                            float alpha,
29 |                            int num_classes,
30 |                            THCudaTensor * losses,
31 |                            THCudaTensor * priors);
32 | 
33 | int focal_loss_softmax_backward_cuda(
34 |                            int N,
35 |                            THCudaTensor * logits,
36 |                            THCudaIntTensor * targets,
37 |                            THCudaTensor * dX_data,
38 |                            float weight_pos,
39 |                            float gamma,
40 |                            float alpha,
41 |                            int num_classes,
42 |                            THCudaTensor * priors,
43 |                            THCudaTensor * buff);
44 | 


--------------------------------------------------------------------------------
/extensions/_roi_align/modules/roi_align.py:
--------------------------------------------------------------------------------
 1 | from torch.nn.modules.module import Module
 2 | from torch.nn.functional import avg_pool2d, max_pool2d
 3 | from ..functions.roi_align import RoIAlignFunction
 4 | 
 5 | 
 6 | class RoIAlign(Module):
 7 |     def __init__(self, aligned_height, aligned_width, spatial_scale):
 8 |         super(RoIAlign, self).__init__()
 9 | 
10 |         self.aligned_width = int(aligned_width)
11 |         self.aligned_height = int(aligned_height)
12 |         self.spatial_scale = float(spatial_scale)
13 | 
14 |     def forward(self, features, rois):
15 |         return RoIAlignFunction(self.aligned_height, self.aligned_width,
16 |                                 self.spatial_scale)(features, rois)
17 | 
18 | class RoIAlignAvg(Module):
19 |     def __init__(self, aligned_height, aligned_width, spatial_scale):
20 |         super(RoIAlignAvg, self).__init__()
21 | 
22 |         self.aligned_width = int(aligned_width)
23 |         self.aligned_height = int(aligned_height)
24 |         self.spatial_scale = float(spatial_scale)
25 | 
26 |     def forward(self, features, rois):
27 |         assert(rois.shape[1] == 5)
28 |         x =  RoIAlignFunction(self.aligned_height+1, self.aligned_width+1,
29 |                                 self.spatial_scale)(features, rois)
30 |         return avg_pool2d(x, kernel_size=2, stride=1)
31 | 
32 | class RoIAlignMax(Module):
33 |     def __init__(self, aligned_height, aligned_width, spatial_scale):
34 |         super(RoIAlignMax, self).__init__()
35 | 
36 |         self.aligned_width = int(aligned_width)
37 |         self.aligned_height = int(aligned_height)
38 |         self.spatial_scale = float(spatial_scale)
39 | 
40 |     def forward(self, features, rois):
41 |         assert(rois.shape[1] == 5)
42 |         x =  RoIAlignFunction(self.aligned_height+1, self.aligned_width+1,
43 |                                 self.spatial_scale)(features, rois)
44 |         return max_pool2d(x, kernel_size=2, stride=1)
45 | 


--------------------------------------------------------------------------------
/utils/distributed_utils.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import torch
 3 | import torch.distributed as dist
 4 | from torch.nn import Module
 5 | import torch.multiprocessing as mp
 6 | import logging
 7 | logger = logging.getLogger('global')
 8 | 
 9 | def average_gradients(model):
10 |     """ average gradients """
11 |     for param in model.parameters():
12 |         if param.requires_grad and not (param.grad is None):
13 |             dist.all_reduce(param.grad.data)
14 | 
15 | def broadcast_params(model):
16 |     """ broadcast model parameters """
17 |     # for models in model:
18 |     for p in model.state_dict().values():
19 |         dist.broadcast(p, 0)
20 | 
21 | def dist_init(port, backend = 'nccl'):
22 |     method = mp.get_start_method(allow_none=True)
23 |     if method is None:
24 |         mp.set_start_method('spawn')
25 |     logger.info('multiprocessing start method:{}'.format(method))
26 |     proc_id = int(os.environ['SLURM_PROCID'])
27 |     ntasks = int(os.environ['SLURM_NTASKS'])
28 |     node_list = os.environ['SLURM_NODELIST']
29 |     num_gpus = torch.cuda.device_count()
30 |     torch.cuda.set_device(proc_id%num_gpus)
31 | 
32 |     if '[' in node_list:
33 |         beg = node_list.find('[')
34 |         pos1 = node_list.find('-', beg)
35 |         if pos1 < 0:
36 |             pos1 = 1000
37 |         pos2 = node_list.find(',', beg)
38 |         if pos2 < 0:
39 |             pos2 = 1000
40 |         node_list = node_list[:min(pos1,pos2)].replace('[', '')
41 |     addr = node_list[8:].replace('-', '.')
42 |     os.environ['MASTER_PORT'] = port
43 |     os.environ['MASTER_ADDR'] = addr
44 |     os.environ['WORLD_SIZE'] = str(ntasks)
45 |     os.environ['RANK'] = str(proc_id)
46 |     if backend == 'nccl':
47 |         dist.init_process_group(backend='nccl')
48 |     else:
49 |         dist.init_process_group(backend='gloo', rank=proc_id, world_size=ntasks)
50 | 
51 |     rank = dist.get_rank()
52 |     world_size = dist.get_world_size()
53 |     return rank, world_size
54 | 
55 | 


--------------------------------------------------------------------------------
/utils/log_helper.py:
--------------------------------------------------------------------------------
 1 | #encoding: utf8
 2 | from __future__ import division
 3 | 
 4 | import os
 5 | import logging
 6 | import math
 7 | 
 8 | logs = set()
 9 | 
10 | def init_log(name, level = logging.INFO):
11 |     if (name, level) in logs: return
12 |     logs.add((name, level))
13 |     logger = logging.getLogger(name)
14 |     logger.setLevel(level)
15 |     ch = logging.StreamHandler()
16 |     ch.setLevel(level)
17 |     if 'SLURM_PROCID' in os.environ:
18 |         rank = int(os.environ['SLURM_PROCID'])
19 |         logger.addFilter(lambda record: rank == 0)
20 |     else:
21 |         rank = 0
22 |     format_str = '%(asctime)s-rk{}-%(filename)s#%(lineno)d:%(message)s'.format(rank)
23 |     formatter = logging.Formatter(format_str)
24 |     ch.setFormatter(formatter)
25 |     logger.addHandler(ch)
26 | 
27 | # init_log('global')
28 | 
29 | def print_speed(i, i_time, n):
30 |     """print_speed(index, index_time, total_iteration)"""
31 |     logger = logging.getLogger('global')
32 |     average_time = i_time
33 |     remaining_time = (n - i) * average_time
34 |     remaining_day = math.floor(remaining_time / 86400)
35 |     remaining_hour = math.floor(remaining_time / 3600 - remaining_day * 24)
36 |     remaining_min = math.floor(remaining_time / 60 - remaining_day * 1440 - remaining_hour * 60)
37 |     logger.info('Progress: %d / %d [%d%%], Speed: %.3f s/iter, ETA %d:%02d:%02d (D:H:M)\n' % (i, n, i/n*100, average_time, remaining_day, remaining_hour, remaining_min))
38 | 
39 | 
40 | def main():
41 |     for i, lvl in enumerate([logging.DEBUG, logging.INFO, logging.WARNING, logging.ERROR, logging.CRITICAL]):
42 |         log_name = str(lvl)
43 |         init_log(log_name, lvl)
44 |         logger = logging.getLogger(log_name)
45 |         print('****cur lvl:{}'.format(lvl))
46 |         logger.debug('debug')
47 |         logger.info('info')
48 |         logger.warning('warning')
49 |         logger.error('error')
50 |         logger.critical('critiacal')
51 | if __name__ == '__main__':
52 |     main()
53 | 


--------------------------------------------------------------------------------
/utils/lr_helper.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.optim import Optimizer
 3 | 
 4 | class _IterLRScheduler(object):
 5 |     def __init__(self, optimizer, last_iter=-1):
 6 |         if not isinstance(optimizer, Optimizer):
 7 |             raise TypeError('{} is not an Optimizer'.format(
 8 |                 type(optimizer).__name__))
 9 |         self.optimizer = optimizer
10 |         if last_iter == -1:
11 |             for group in optimizer.param_groups:
12 |                 group.setdefault('initial_lr', group['lr'])
13 |         else:
14 |             for i, group in enumerate(optimizer.param_groups):
15 |                 if 'initial_lr' not in group:
16 |                     raise KeyError("param 'initial_lr' is not specified "
17 |                                    "in param_groups[{}] when resuming an optimizer".format(i))
18 |         self.base_lrs = list(map(lambda group: group['initial_lr'], optimizer.param_groups))
19 |         self.step(last_iter + 1)
20 |         self.last_iter = last_iter
21 | 
22 |     def get_lr(self):
23 |         raise NotImplementedError
24 | 
25 |     def step(self, iter=None):
26 |         if iter is None:
27 |             iter = self.last_iter + 1
28 |         self.last_iter = iter
29 |         for param_group, lr in zip(self.optimizer.param_groups, self.get_lr()):
30 |             param_group['lr'] = lr
31 | 
32 | 
33 | class IterExponentialLR(_IterLRScheduler):
34 |     """Set the learning rate of each parameter group to the initial lr decayed
35 |     by gamma every iteration. When last_iter=-1, sets initial lr as lr.
36 | 
37 |     Args:
38 |         optimizer (Optimizer): Wrapped optimizer.
39 |         gamma (float): Multiplicative factor of learning rate decay.
40 |         last_iter (int): The index of last iter. Default: -1.
41 |     """
42 | 
43 |     def __init__(self, optimizer, gamma, last_iter=-1):
44 |         self.gamma = gamma
45 |         super(IterExponentialLR, self).__init__(optimizer, last_iter)
46 | 
47 |     def get_lr(self):
48 |         return [base_lr * self.gamma ** self.last_iter
49 |                 for base_lr in self.base_lrs]
50 | 


--------------------------------------------------------------------------------
/extensions/_roi_pooling/functions/roi_pool.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.autograd import Function
 3 | from .._ext import roi_pooling
 4 | import pdb
 5 | 
 6 | class RoIPoolFunction(Function):
 7 |     def __init__(ctx, pooled_height, pooled_width, spatial_scale):
 8 |         ctx.pooled_width = pooled_width
 9 |         ctx.pooled_height = pooled_height
10 |         ctx.spatial_scale = spatial_scale
11 |         ctx.feature_size = None
12 | 
13 |     def forward(ctx, features, rois): 
14 |         ctx.feature_size = features.size()           
15 |         batch_size, num_channels, data_height, data_width = ctx.feature_size
16 |         num_rois = rois.size(0)
17 |         output = features.new(num_rois, num_channels, ctx.pooled_height, ctx.pooled_width).zero_()
18 |         ctx.argmax = features.new(num_rois, num_channels, ctx.pooled_height, ctx.pooled_width).zero_().int()
19 |         ctx.rois = rois
20 |         if not features.is_cuda:
21 |             _features = features.permute(0, 2, 3, 1)
22 |             roi_pooling.roi_pooling_forward(ctx.pooled_height, ctx.pooled_width, ctx.spatial_scale,
23 |                                             _features, rois, output)
24 |         else:
25 |             assert(features.is_contiguous())
26 |             assert(rois.is_contiguous())
27 |             roi_pooling.roi_pooling_forward_cuda(ctx.pooled_height, ctx.pooled_width, ctx.spatial_scale,
28 |                                                  features, rois, output, ctx.argmax)
29 | 
30 |         return output
31 | 
32 |     def backward(ctx, grad_output):
33 |         assert(ctx.feature_size is not None and grad_output.is_cuda)
34 |         batch_size, num_channels, data_height, data_width = ctx.feature_size
35 |         grad_input = grad_output.new(batch_size, num_channels, data_height, data_width).zero_()
36 |         
37 |         assert(grad_output.is_contiguous())
38 |         assert(ctx.rois.is_contiguous())
39 |         roi_pooling.roi_pooling_backward_cuda(ctx.pooled_height, ctx.pooled_width, ctx.spatial_scale,
40 |                                               grad_output, ctx.rois, grad_input, ctx.argmax)
41 | 
42 |         return grad_input, None
43 | 


--------------------------------------------------------------------------------
/extensions/_roi_align/functions/roi_align.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.autograd import Function
 3 | from .._ext import roi_align
 4 | 
 5 | 
 6 | # TODO use save_for_backward instead
 7 | class RoIAlignFunction(Function):
 8 |     def __init__(self, aligned_height, aligned_width, spatial_scale):
 9 |         self.aligned_width = int(aligned_width)
10 |         self.aligned_height = int(aligned_height)
11 |         self.spatial_scale = float(spatial_scale)
12 |         self.rois = None
13 |         self.feature_size = None
14 | 
15 |     def forward(self, features, rois):
16 |         self.rois = rois
17 |         self.feature_size = features.size()
18 | 
19 |         batch_size, num_channels, data_height, data_width = features.size()
20 |         num_rois = rois.size(0)
21 | 
22 |         output = features.new(num_rois, num_channels, self.aligned_height, self.aligned_width).zero_()
23 |         assert(features.is_contiguous())
24 |         assert(rois.is_contiguous())
25 |         if features.is_cuda:
26 |             roi_align.roi_align_forward_cuda(self.aligned_height,
27 |                                              self.aligned_width,
28 |                                              self.spatial_scale, features,
29 |                                              rois, output)
30 |         else:
31 |             raise NotImplementedError
32 | 
33 |         return output
34 | 
35 |     def backward(self, grad_output):
36 |         assert(self.feature_size is not None and grad_output.is_cuda)
37 | 
38 |         batch_size, num_channels, data_height, data_width = self.feature_size
39 | 
40 |         grad_input = self.rois.new(batch_size, num_channels, data_height,
41 |                                   data_width).zero_()
42 |         assert(grad_output.is_contiguous())
43 |         assert(self.rois.is_contiguous())
44 |         roi_align.roi_align_backward_cuda(self.aligned_height,
45 |                                           self.aligned_width,
46 |                                           self.spatial_scale, grad_output,
47 |                                           self.rois, grad_input)
48 | 
49 |         # print grad_input
50 | 
51 |         return grad_input, None
52 | 


--------------------------------------------------------------------------------
/extensions/_roi_pooling/modules/roi_pool_py.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | from torch.autograd import Variable
 4 | import numpy as np
 5 | 
 6 | 
 7 | class RoIPool(nn.Module):
 8 |     def __init__(self, pooled_height, pooled_width, spatial_scale):
 9 |         super(RoIPool, self).__init__()
10 |         self.pooled_width = int(pooled_width)
11 |         self.pooled_height = int(pooled_height)
12 |         self.spatial_scale = float(spatial_scale)
13 | 
14 |     def forward(self, features, rois):
15 |         batch_size, num_channels, data_height, data_width = features.size()
16 |         num_rois = rois.size()[0]
17 |         outputs = Variable(torch.zeros(num_rois, num_channels, self.pooled_height, self.pooled_width)).cuda()
18 | 
19 |         for roi_ind, roi in enumerate(rois):
20 |             batch_ind = int(roi[0].data[0])
21 |             roi_start_w, roi_start_h, roi_end_w, roi_end_h = np.round(
22 |                 roi[1:].data.cpu().numpy() * self.spatial_scale).astype(int)
23 |             roi_width = max(roi_end_w - roi_start_w + 1, 1)
24 |             roi_height = max(roi_end_h - roi_start_h + 1, 1)
25 |             bin_size_w = float(roi_width) / float(self.pooled_width)
26 |             bin_size_h = float(roi_height) / float(self.pooled_height)
27 | 
28 |             for ph in range(self.pooled_height):
29 |                 hstart = int(np.floor(ph * bin_size_h))
30 |                 hend = int(np.ceil((ph + 1) * bin_size_h))
31 |                 hstart = min(data_height, max(0, hstart + roi_start_h))
32 |                 hend = min(data_height, max(0, hend + roi_start_h))
33 |                 for pw in range(self.pooled_width):
34 |                     wstart = int(np.floor(pw * bin_size_w))
35 |                     wend = int(np.ceil((pw + 1) * bin_size_w))
36 |                     wstart = min(data_width, max(0, wstart + roi_start_w))
37 |                     wend = min(data_width, max(0, wend + roi_start_w))
38 | 
39 |                     is_empty = (hend <= hstart) or(wend <= wstart)
40 |                     if is_empty:
41 |                         outputs[roi_ind, :, ph, pw] = 0
42 |                     else:
43 |                         data = features[batch_ind]
44 |                         outputs[roi_ind, :, ph, pw] = torch.max(
45 |                             torch.max(data[:, hstart:hend, wstart:wend], 1)[0], 2)[0].view(-1)
46 | 
47 |         return outputs
48 | 
49 | 


--------------------------------------------------------------------------------
/datasets/pycocotools/common/maskApi.h:
--------------------------------------------------------------------------------
 1 | /**************************************************************************
 2 | * Microsoft COCO Toolbox.      version 2.0
 3 | * Data, paper, and tutorials available at:  http://mscoco.org/
 4 | * Code written by Piotr Dollar and Tsung-Yi Lin, 2015.
 5 | * Licensed under the Simplified BSD License [see coco/license.txt]
 6 | **************************************************************************/
 7 | #pragma once
 8 | 
 9 | typedef unsigned int uint;
10 | typedef unsigned long siz;
11 | typedef unsigned char byte;
12 | typedef double* BB;
13 | typedef struct { siz h, w, m; uint *cnts; } RLE;
14 | 
15 | /* Initialize/destroy RLE. */
16 | void rleInit( RLE *R, siz h, siz w, siz m, uint *cnts );
17 | void rleFree( RLE *R );
18 | 
19 | /* Initialize/destroy RLE array. */
20 | void rlesInit( RLE **R, siz n );
21 | void rlesFree( RLE **R, siz n );
22 | 
23 | /* Encode binary masks using RLE. */
24 | void rleEncode( RLE *R, const byte *mask, siz h, siz w, siz n );
25 | 
26 | /* Decode binary masks encoded via RLE. */
27 | void rleDecode( const RLE *R, byte *mask, siz n );
28 | 
29 | /* Compute union or intersection of encoded masks. */
30 | void rleMerge( const RLE *R, RLE *M, siz n, int intersect );
31 | 
32 | /* Compute area of encoded masks. */
33 | void rleArea( const RLE *R, siz n, uint *a );
34 | 
35 | /* Compute intersection over union between masks. */
36 | void rleIou( RLE *dt, RLE *gt, siz m, siz n, byte *iscrowd, double *o );
37 | 
38 | /* Compute non-maximum suppression between bounding masks */
39 | void rleNms( RLE *dt, siz n, uint *keep, double thr );
40 | 
41 | /* Compute intersection over union between bounding boxes. */
42 | void bbIou( BB dt, BB gt, siz m, siz n, byte *iscrowd, double *o );
43 | 
44 | /* Compute non-maximum suppression between bounding boxes */
45 | void bbNms( BB dt, siz n, uint *keep, double thr );
46 | 
47 | /* Get bounding boxes surrounding encoded masks. */
48 | void rleToBbox( const RLE *R, BB bb, siz n );
49 | 
50 | /* Convert bounding boxes to encoded masks. */
51 | void rleFrBbox( RLE *R, const BB bb, siz h, siz w, siz n );
52 | 
53 | /* Convert polygon to encoded mask. */
54 | void rleFrPoly( RLE *R, const double *xy, siz k, siz h, siz w );
55 | 
56 | /* Get compressed string representation of encoded mask. */
57 | char* rleToString( const RLE *R );
58 | 
59 | /* Convert from compressed string representation of encoded mask. */
60 | void rleFrString( RLE *R, char *s, siz h, siz w );
61 | 


--------------------------------------------------------------------------------
/utils/load_helper.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import logging
 3 | import pprint
 4 | logger = logging.getLogger('global')
 5 | 
 6 | def check_keys(model, pretrained_state_dict):
 7 |     ckpt_keys = set(pretrained_state_dict.keys())
 8 |     model_keys = set(model.state_dict().keys())
 9 |     used_pretrained_keys = model_keys & ckpt_keys
10 |     unused_pretrained_keys = ckpt_keys - model_keys
11 |     missing_keys = model_keys - ckpt_keys
12 |     pprint.pprint(model_keys)
13 |     pprint.pprint(ckpt_keys)
14 |     logger.info('missing keys:{}'.format(len(missing_keys)))
15 |     logger.info('unused checkpoint keys:{}'.format(len(unused_pretrained_keys)))
16 |     logger.info('used keys:{}'.format(len(used_pretrained_keys)))
17 |     assert len(used_pretrained_keys) > 0, 'load NONE from pretrained checkpoint'
18 |     return True
19 | 
20 | 
21 | def remove_prefix(state_dict, prefix):
22 |     ''' Old style model is stored with all names of parameters share common prefix 'module.' '''
23 |     logger.info('remove prefix \'{}\''.format(prefix))
24 |     f = lambda x: x.split(prefix, 1)[-1] if x.startswith(prefix) else x
25 |     return {f(key): value for key, value in state_dict.items()}
26 | 
27 | 
28 | def load_pretrain(model, pretrained_path):
29 |     logger.info('load pretrained model from {}'.format(pretrained_path))
30 |     device = torch.cuda.current_device()
31 |     pretrained_dict = torch.load(pretrained_path, map_location = lambda storage, loc: storage.cuda(device))
32 |     if pretrained_path.endswith('tar'):
33 |         pretrained_dict = remove_prefix(pretrained_dict['state_dict'], 'module.')
34 |     else:
35 |         pretrained_dict = remove_prefix(pretrained_dict, 'module.')
36 |     check_keys(model, pretrained_dict)
37 |     model.load_state_dict(pretrained_dict, strict=False)
38 |     return model
39 | 
40 | 
41 | def restore_from(model, optimizer, ckpt_path):
42 |     logger.info('restore from {}'.format(ckpt_path))
43 |     device = torch.cuda.current_device()
44 |     ckpt = torch.load(ckpt_path, map_location = lambda storage, loc: storage.cuda(device))
45 |     epoch = ckpt['epoch']
46 |     best_recall = ckpt['best_recall']
47 |     arch = ckpt['arch']
48 |     ckpt_model_dict = remove_prefix(ckpt['state_dict'], 'module.')
49 |     check_keys(model, ckpt_model_dict)
50 |     model.load_state_dict(ckpt_model_dict, strict=False)
51 | 
52 |     # optimizer.load_state_dict(ckpt['optimizer'])
53 |     optimizer = None
54 |     return model, optimizer, epoch, best_recall, arch
55 | 


--------------------------------------------------------------------------------
/extensions/_nms/src/nms_cuda.c:
--------------------------------------------------------------------------------
 1 | // ------------------------------------------------------------------
 2 | // Faster R-CNN
 3 | // Copyright (c) 2015 Microsoft
 4 | // Licensed under The MIT License [see fast-rcnn/LICENSE for details]
 5 | // Written by Shaoqing Ren
 6 | // ------------------------------------------------------------------
 7 | #include <THC/THC.h>
 8 | #include <TH/TH.h>
 9 | #include <math.h>
10 | #include <stdio.h>
11 | 
12 | #include "cuda/nms_kernel.h"
13 | 
14 | 
15 | extern THCState *state;
16 | 
17 | int gpu_nms(THLongTensor * keep, THLongTensor* num_out, THCudaTensor * boxes, float nms_overlap_thresh) {
18 |   // boxes has to be sorted
19 |   THArgCheck(THLongTensor_isContiguous(keep), 0, "boxes must be contiguous");
20 |   THArgCheck(THCudaTensor_isContiguous(state, boxes), 2, "boxes must be contiguous");
21 |   // Number of ROIs
22 |   int boxes_num = THCudaTensor_size(state, boxes, 0);
23 |   int boxes_dim = THCudaTensor_size(state, boxes, 1);
24 | 
25 |   float* boxes_flat = THCudaTensor_data(state, boxes);
26 | 
27 |   const int col_blocks = DIVUP(boxes_num, threadsPerBlock);
28 |   THCudaLongTensor * mask = THCudaLongTensor_newWithSize2d(state, boxes_num, col_blocks);
29 |   unsigned long long* mask_flat = THCudaLongTensor_data(state, mask);
30 | 
31 |   _nms(boxes_num, boxes_flat, mask_flat, nms_overlap_thresh);
32 | 
33 |   THLongTensor * mask_cpu = THLongTensor_newWithSize2d(boxes_num, col_blocks);
34 |   THLongTensor_copyCuda(state, mask_cpu, mask);
35 |   THCudaLongTensor_free(state, mask);
36 | 
37 |   unsigned long long * mask_cpu_flat = THLongTensor_data(mask_cpu);
38 | 
39 |   THLongTensor * remv_cpu = THLongTensor_newWithSize1d(col_blocks);
40 |   unsigned long long* remv_cpu_flat = THLongTensor_data(remv_cpu);
41 |   THLongTensor_fill(remv_cpu, 0);
42 | 
43 |   long * keep_flat = THLongTensor_data(keep);
44 |   long num_to_keep = 0;
45 | 
46 |   int i, j;
47 |   for (i = 0; i < boxes_num; i++) {
48 |     int nblock = i / threadsPerBlock;
49 |     int inblock = i % threadsPerBlock;
50 | 
51 |     if (!(remv_cpu_flat[nblock] & (1ULL << inblock))) {
52 |       keep_flat[num_to_keep++] = i;
53 |       unsigned long long *p = &mask_cpu_flat[0] + i * col_blocks;
54 |       for (j = nblock; j < col_blocks; j++) {
55 |         remv_cpu_flat[j] |= p[j];
56 |       }
57 |     }
58 |   }
59 | 
60 |   long * num_out_flat = THLongTensor_data(num_out);
61 |   * num_out_flat = num_to_keep;
62 | 
63 |   THLongTensor_free(mask_cpu);
64 |   THLongTensor_free(remv_cpu);
65 | 
66 |   return 1;
67 | }
68 | 


--------------------------------------------------------------------------------
/datasets/target_dataset.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # @Time    : 18-5-3 4:40
 3 | # @Author  : Xinge
 4 | 
 5 | from __future__ import division
 6 | import torch
 7 | from torch.utils.data import DataLoader, Dataset
 8 | import torchvision.transforms as transforms
 9 | import numpy as np
10 | from io import StringIO
11 | from PIL import Image
12 | import pickle as pk
13 | import os
14 | 
15 | def pil_loader(img_str):
16 |     #buff = StringIO.StringIO()
17 |     buff = StringIO()
18 |     buff.write(img_str)
19 |     buff.seek(0)
20 |     with Image.open(buff) as img:
21 |         return img.convert('RGB')
22 | 
23 | class TargetDataset(Dataset):
24 |     def __init__(self, root_dir, list_file, normalize_fn=None, memcached=False, new_w=1024, new_h=512):
25 |         # self.logger = logging.getLogger('global')
26 |         self.root_dir = root_dir
27 |         # self.transform_fn = transform_fn
28 |         self.normalize_fn = normalize_fn
29 |         self.new_w = new_w
30 |         self.new_h = new_h
31 |         # self.memcached = memcached
32 |         with open(list_file) as f:
33 |             lines = f.readlines()
34 |         self.metas = [x.strip() for x in lines]
35 | 
36 |         self.num = len(self.metas)
37 |         # # aspect ratio of images for sampler sort
38 |         # self.aspect_ratios = [float(m[1]) / m[2] for m in self.metas]
39 | 
40 |     def __len__(self):
41 |         return self.num
42 | 
43 |     def __getitem__(self, idx):
44 |         filename = os.path.join(self.root_dir, self.metas[idx])
45 |         # h, w, bbox, labels, ignores = self.metas[idx][1:]
46 |         # bbox = bbox.astype(np.float32)
47 |         # ignores = ignores.astype(np.float32)
48 |         # labels = labels.astype(np.float32)
49 |         img = Image.open(filename)
50 |         if img.mode == 'L':
51 |             img = img.convert('RGB')
52 |         # assert (img.size[0] == w and img.size[1] == h)
53 |         ## det transform
54 |         img = self.transform(img, self.new_w, self.new_h)
55 |         # new_w, new_h = img.size
56 |         ## to tensor
57 |         to_tensor = transforms.ToTensor()
58 |         img = to_tensor(img)
59 |         if self.normalize_fn != None:
60 |             img = self.normalize_fn(img)
61 |         # bbox = np.hstack([bbox, labels[:, np.newaxis]])
62 |         return img
63 | 
64 | 
65 |     def transform(self, img, new_w, new_h):
66 |         """transform
67 | 
68 |         :param img:
69 |         :param lbl:
70 |         """
71 |         new_img = img.resize((new_w, new_h))
72 |         return new_img


--------------------------------------------------------------------------------
/utils/coco_eval.py:
--------------------------------------------------------------------------------
 1 | from __future__ import division
 2 | 
 3 | from datasets.pycocotools.coco import COCO
 4 | from datasets.pycocotools.cocoeval import COCOeval
 5 | from datasets.coco_dataset import COCODataset
 6 | import sys,os
 7 | import re
 8 | import logging
 9 | 
10 | logger = logging.getLogger('global')
11 | def eval_coco_ap_from_results_txt(result_dir, test_type, anno_file):
12 |     logger.info("start eval coco ...")
13 | 
14 |     assert(test_type in ['segm', 'bbox', 'keypoints', 'person_bbox', 'person_proposal', 'proposal'])
15 |    
16 |     category_ids = set()
17 |     coco_gt = COCO(anno_file)
18 |     for anno in coco_gt.anns.values():
19 |         category_ids.add(anno['category_id'])
20 |     class_to_category = {i+1:c for i, c in enumerate(sorted(category_ids))}
21 |     
22 |     all_res = []
23 |     for f in os.listdir(result_dir):
24 |         if 'results.txt.rank' in f:
25 |             for aline in open(os.path.join(result_dir, f),'r'):
26 |                 aline = aline.rstrip().split()
27 |                 res = {}
28 |                 res["image_id"] = int(re.split('[/.]', aline[0])[-2])
29 |                 x1 = float(aline[1])
30 |                 y1 = float(aline[2])
31 |                 x2 = float(aline[3])
32 |                 y2 = float(aline[4])
33 |                 if test_type == 'proposal':
34 |                     res["bbox"] = [x1, y1, x2, y2]
35 |                     res["score"]= float(aline[-1])
36 |                     res["category_id"] = 1
37 |                 else:
38 |                     res["bbox"] = [x1, y1, x2 - x1, y2 - y1]
39 |                     res["score"]= float(aline[-2])
40 |                     res["category_id"] = class_to_category[int(aline[-1])]
41 |                 all_res.append(res)
42 | 
43 |     logger.info("all res line: {}".format(len(all_res)))
44 |     
45 |     #prefix = {'keypoints':'person_keypoints', 'person_bbox':'person_keypoints',
46 |     #        'bbox':'instances', 'segm':'instances',
47 |     #        'proposal': 'instances', 'person_proposal':'person_keypoints'}[test_type]
48 |     iou_type = {'keypoints':'keypoints', 'person_bbox':'bbox',
49 |             'bbox':'bbox', 'segm':'segm',
50 |             'proposal': 'bbox', 'person_proposal':'bbox'}[test_type]
51 | 
52 |     logger.info('loading annotations from %s\n' % anno_file)
53 |     coco_dt = coco_gt.loadRes(all_res)
54 |     coco_eval = COCOeval(coco_gt, coco_dt, iou_type)
55 | 
56 |     if test_type.find('proposal') >= 0:
57 |         coco_eval.params.useCats = 0
58 |         coco_eval.params.maxDets = [1,100,1000]
59 |     coco_eval.evaluate()
60 |     coco_eval.accumulate()
61 |     coco_eval.summarize()
62 | 
63 | 


--------------------------------------------------------------------------------
/extensions/_roi_align/src/roi_align_cuda.c:
--------------------------------------------------------------------------------
 1 | #include <THC/THC.h>
 2 | #include <math.h>
 3 | #include "roi_align_kernel.h"
 4 | 
 5 | extern THCState *state;
 6 | 
 7 | int roi_align_forward_cuda(int aligned_height, int aligned_width, float spatial_scale,
 8 |                         THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output)
 9 | {
10 |     // Grab the input tensor
11 |     float * data_flat = THCudaTensor_data(state, features);
12 |     float * rois_flat = THCudaTensor_data(state, rois);
13 | 
14 |     float * output_flat = THCudaTensor_data(state, output);
15 | 
16 |     // Number of ROIs
17 |     int num_rois = THCudaTensor_size(state, rois, 0);
18 |     int size_rois = THCudaTensor_size(state, rois, 1);
19 |     if (size_rois != 5)
20 |     {
21 |         return 0;
22 |     }
23 | 
24 |     // data height
25 |     int data_height = THCudaTensor_size(state, features, 2);
26 |     // data width
27 |     int data_width = THCudaTensor_size(state, features, 3);
28 |     // Number of channels
29 |     int num_channels = THCudaTensor_size(state, features, 1);
30 | 
31 |     cudaStream_t stream = THCState_getCurrentStream(state);
32 | 
33 |     ROIAlignForwardLaucher(
34 |         data_flat, spatial_scale, num_rois, data_height,
35 |         data_width, num_channels, aligned_height,
36 |         aligned_width, rois_flat,
37 |         output_flat, stream);
38 | 
39 |     return 1;
40 | }
41 | 
42 | int roi_align_backward_cuda(int aligned_height, int aligned_width, float spatial_scale,
43 |                         THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad)
44 | {
45 |     // Grab the input tensor
46 |     float * top_grad_flat = THCudaTensor_data(state, top_grad);
47 |     float * rois_flat = THCudaTensor_data(state, rois);
48 | 
49 |     float * bottom_grad_flat = THCudaTensor_data(state, bottom_grad);
50 | 
51 |     // Number of ROIs
52 |     int num_rois = THCudaTensor_size(state, rois, 0);
53 |     int size_rois = THCudaTensor_size(state, rois, 1);
54 |     if (size_rois != 5)
55 |     {
56 |         return 0;
57 |     }
58 | 
59 |     // batch size
60 |     int batch_size = THCudaTensor_size(state, bottom_grad, 0);
61 |     // data height
62 |     int data_height = THCudaTensor_size(state, bottom_grad, 2);
63 |     // data width
64 |     int data_width = THCudaTensor_size(state, bottom_grad, 3);
65 |     // Number of channels
66 |     int num_channels = THCudaTensor_size(state, bottom_grad, 1);
67 | 
68 |     cudaStream_t stream = THCState_getCurrentStream(state);
69 |     ROIAlignBackwardLaucher(
70 |         top_grad_flat, spatial_scale, batch_size, num_rois, data_height,
71 |         data_width, num_channels, aligned_height,
72 |         aligned_width, rois_flat,
73 |         bottom_grad_flat, stream);
74 | 
75 |     return 1;
76 | }
77 | 


--------------------------------------------------------------------------------
/datasets/example_loader.py:
--------------------------------------------------------------------------------
 1 | #encoding: utf-8
 2 | 
 3 | import torch
 4 | import torch.nn.functional as F
 5 | import numpy as np
 6 | import logging
 7 | 
 8 | class ExampleDataLoader(torch.utils.data.DataLoader):
 9 |     def __init__(self, dataset, batch_size=1, shuffle=False, sampler=None, batch_sampler=None,
10 |                  num_workers=0, pin_memory=False, drop_last=False):
11 |         super(ExampleDataLoader, self).__init__(dataset, batch_size, shuffle, sampler, batch_sampler,
12 |                                         num_workers, self._collate_fn, pin_memory, drop_last)
13 |     def _collate_fn(self, batch):
14 |         batch_size = len(batch)
15 | 
16 |         zip_batch = list(zip(*batch))
17 |         images = zip_batch[0]
18 |         unpad_image_sizes = zip_batch[1]
19 |         ground_truth_bboxes = zip_batch[2]
20 |         ignores = zip_batch[3]
21 |         filenames = zip_batch[4]
22 | 
23 |         max_img_h = max([_.shape[-2] for _ in images])
24 |         max_img_w = max([_.shape[-1] for _ in images])
25 |         max_num_gt_bboxes = max([_.shape[0] for _ in ground_truth_bboxes])
26 |         max_num_ig_bboxes = max([_.shape[0] for _ in ignores])
27 | 
28 | 
29 |         padded_images = []
30 |         padded_gt_bboxes = []
31 |         padded_ig_bboxes = []
32 |         for b_ix in range(batch_size):
33 |             img = images[b_ix]
34 |             # pad zeros to right bottom of each image
35 |             pad_size = (0, max_img_w - img.shape[-1], 0, max_img_h - img.shape[-2])
36 |             padded_images.append(F.pad(img, pad_size, 'constant', 0).data.cpu())
37 | 
38 |             # pad zeros to gt_bboxes
39 |             gt_bboxes = ground_truth_bboxes[b_ix].numpy()
40 |             new_gt_bboxes = np.zeros([max_num_gt_bboxes, gt_bboxes.shape[-1]])
41 |             new_gt_bboxes[range(gt_bboxes.shape[0]), :] = gt_bboxes
42 |             padded_gt_bboxes.append(new_gt_bboxes)
43 | 
44 |             # pad zeros to ig_bboxes
45 |             ig_bboxes = ignores[b_ix].numpy()
46 |             new_ig_bboxes = np.zeros([max_num_ig_bboxes, ig_bboxes.shape[-1]])
47 |             new_ig_bboxes[range(ig_bboxes.shape[0]), :] = ig_bboxes
48 |             padded_ig_bboxes.append(new_ig_bboxes)
49 | 
50 |         padded_images = images = torch.cat(padded_images, dim = 0)
51 |         padded_gt_bboxes = torch.from_numpy(np.stack(padded_gt_bboxes, axis = 0))
52 |         padded_ig_bboxes = torch.from_numpy(np.stack(padded_ig_bboxes, axis = 0))
53 |         unpad_image_sizes = torch.stack(unpad_image_sizes, dim = 0)
54 |         #logger = logging.getLogger('global')
55 |         #logger.debug('{0},{1},{2}'.format(padded_images.shape, padded_gt_bboxes.shape, unpad_image_sizes.shape))
56 |         return padded_images, unpad_image_sizes, padded_gt_bboxes, padded_ig_bboxes, filenames
57 | 


--------------------------------------------------------------------------------
/extensions/_cython_bbox/cython_bbox.pyx:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2017-present, Facebook, Inc.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | ##############################################################################
15 | #
16 | # Based on:
17 | # --------------------------------------------------------
18 | # Fast R-CNN
19 | # Copyright (c) 2015 Microsoft
20 | # Licensed under The MIT License [see LICENSE for details]
21 | # Written by Sergey Karayev
22 | # --------------------------------------------------------
23 | 
24 | cimport cython
25 | import numpy as np
26 | cimport numpy as np
27 | 
28 | DTYPE = np.float32
29 | ctypedef np.float32_t DTYPE_t
30 | 
31 | @cython.boundscheck(False)
32 | def bbox_overlaps(
33 |         np.ndarray[DTYPE_t, ndim=2] boxes,
34 |         np.ndarray[DTYPE_t, ndim=2] query_boxes):
35 |     """
36 |     Parameters
37 |     ----------
38 |     boxes: (N, 4) ndarray of float
39 |     query_boxes: (K, 4) ndarray of float
40 |     Returns
41 |     -------
42 |     overlaps: (N, K) ndarray of overlap between boxes and query_boxes
43 |     """
44 |     cdef unsigned int N = boxes.shape[0]
45 |     cdef unsigned int K = query_boxes.shape[0]
46 |     cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE)
47 |     cdef DTYPE_t iw, ih, box_area
48 |     cdef DTYPE_t ua
49 |     cdef unsigned int k, n
50 |     with nogil:
51 |         for k in range(K):
52 |             box_area = (
53 |                 (query_boxes[k, 2] - query_boxes[k, 0]) *
54 |                 (query_boxes[k, 3] - query_boxes[k, 1])
55 |             )
56 |             for n in range(N):
57 |                 iw = (
58 |                     min(boxes[n, 2], query_boxes[k, 2]) -
59 |                     max(boxes[n, 0], query_boxes[k, 0])
60 |                 )
61 |                 if iw > 0:
62 |                     ih = (
63 |                         min(boxes[n, 3], query_boxes[k, 3]) -
64 |                         max(boxes[n, 1], query_boxes[k, 1]) 
65 |                     )
66 |                     if ih > 0:
67 |                         ua = float(
68 |                             (boxes[n, 2] - boxes[n, 0]) *
69 |                             (boxes[n, 3] - boxes[n, 1]) +
70 |                             box_area - iw * ih
71 |                         )
72 |                         overlaps[n, k] = iw * ih / ua
73 |     return overlaps
74 | 


--------------------------------------------------------------------------------
/extensions/_nms/src/nms.c:
--------------------------------------------------------------------------------
 1 | #include <TH/TH.h>
 2 | #include <math.h>
 3 | 
 4 | int cpu_nms(THLongTensor * keep_out, THLongTensor * num_out, THFloatTensor * boxes, THLongTensor * order, THFloatTensor * areas, float nms_overlap_thresh) {
 5 |     // boxes has to be sorted
 6 |     THArgCheck(THLongTensor_isContiguous(keep_out), 0, "keep_out must be contiguous");
 7 |     THArgCheck(THLongTensor_isContiguous(boxes), 2, "boxes must be contiguous");
 8 |     THArgCheck(THLongTensor_isContiguous(order), 3, "order must be contiguous");
 9 |     THArgCheck(THLongTensor_isContiguous(areas), 4, "areas must be contiguous");
10 |     // Number of ROIs
11 |     long boxes_num = THFloatTensor_size(boxes, 0);
12 |     long boxes_dim = THFloatTensor_size(boxes, 1);
13 | 
14 |     long * keep_out_flat = THLongTensor_data(keep_out);
15 |     float * boxes_flat = THFloatTensor_data(boxes);
16 |     long * order_flat = THLongTensor_data(order);
17 |     float * areas_flat = THFloatTensor_data(areas);
18 | 
19 |     THByteTensor* suppressed = THByteTensor_newWithSize1d(boxes_num);
20 |     THByteTensor_fill(suppressed, 0);
21 |     unsigned char * suppressed_flat =  THByteTensor_data(suppressed);
22 | 
23 |     // nominal indices
24 |     int i, j;
25 |     // sorted indices
26 |     int _i, _j;
27 |     // temp variables for box i's (the box currently under consideration)
28 |     float ix1, iy1, ix2, iy2, iarea;
29 |     // variables for computing overlap with box j (lower scoring box)
30 |     float xx1, yy1, xx2, yy2;
31 |     float w, h;
32 |     float inter, ovr;
33 | 
34 |     long num_to_keep = 0;
35 |     for (_i=0; _i < boxes_num; ++_i) {
36 |         i = order_flat[_i];
37 |         if (suppressed_flat[i] == 1) {
38 |             continue;
39 |         }
40 |         keep_out_flat[num_to_keep++] = i;
41 |         ix1 = boxes_flat[i * boxes_dim];
42 |         iy1 = boxes_flat[i * boxes_dim + 1];
43 |         ix2 = boxes_flat[i * boxes_dim + 2];
44 |         iy2 = boxes_flat[i * boxes_dim + 3];
45 |         iarea = areas_flat[i];
46 |         for (_j = _i + 1; _j < boxes_num; ++_j) {
47 |             j = order_flat[_j];
48 |             if (suppressed_flat[j] == 1) {
49 |                 continue;
50 |             }
51 |             xx1 = fmaxf(ix1, boxes_flat[j * boxes_dim]);
52 |             yy1 = fmaxf(iy1, boxes_flat[j * boxes_dim + 1]);
53 |             xx2 = fminf(ix2, boxes_flat[j * boxes_dim + 2]);
54 |             yy2 = fminf(iy2, boxes_flat[j * boxes_dim + 3]);
55 |             w = fmaxf(0.0, xx2 - xx1 + 1);
56 |             h = fmaxf(0.0, yy2 - yy1 + 1);
57 |             inter = w * h;
58 |             ovr = inter / (iarea + areas_flat[j] - inter);
59 |             if (ovr >= nms_overlap_thresh) {
60 |                 suppressed_flat[j] = 1;
61 |             }
62 |         }
63 |     }
64 | 
65 |     long *num_out_flat = THLongTensor_data(num_out);
66 |     *num_out_flat = num_to_keep;
67 |     THByteTensor_free(suppressed);
68 |     return 1;
69 | }


--------------------------------------------------------------------------------
/functions/predict_bbox.py:
--------------------------------------------------------------------------------
 1 | #encoding:utf8
 2 | from utils import bbox_helper
 3 | from extensions import nms
 4 | import torch
 5 | import logging
 6 | import numpy as np
 7 | def to_np_array(x):
 8 |     if x is None:
 9 |         return None
10 |     # if isinstance(x, Variable): x = x.data
11 |     return x.cpu().data.numpy() if torch.is_tensor(x) else x
12 | 
13 | def compute_predicted_bboxes(rois, pred_cls, pred_loc, image_info, cfg):
14 |     '''
15 |     :param cfg: config
16 |     :param rois: [N, k] k>=5, batch_ix, x1, y1, x2, y2
17 |     :param pred_cls:[N, num_classes, 1, 1]
18 |     :param pred_loc:[N, num_classes * 4, 1, 1]
19 |     :param image_info:[N, 3]
20 |     :return: bboxes: [M, 7], batch_ix, x1, y1, x2, y2, score, cls
21 |     '''
22 |     # logger = logging.getLogger('global')
23 |     rois, pred_cls, pred_loc = map(to_np_array, [rois, pred_cls, pred_loc])
24 |     N, num_classes = pred_cls.shape[0:2]
25 |     B = max(rois[:, 0].astype(np.int32))+1
26 |     assert(N == rois.shape[0])
27 |     nmsed_bboxes = []
28 |     for cls in range(1, num_classes):
29 |         scores = pred_cls[:, cls].squeeze()
30 |         deltas = pred_loc[:, cls*4:cls*4+4].squeeze()
31 |         if cfg['bbox_normalize_stats_precomputed']:
32 |             deltas = deltas * np.array(cfg['bbox_normalize_stds'])[np.newaxis, :]\
33 |                      + np.array(cfg['bbox_normalize_means'])[np.newaxis, :]
34 |         bboxes = bbox_helper.compute_loc_bboxes(rois[:,1:1+4], deltas)
35 |         bboxes = np.hstack([bboxes, scores[:, np.newaxis]])
36 |         # for each image, do nms
37 |         for b_ix in range(B):
38 |             rois_ix = np.where(rois[:, 0] == b_ix)[0]
39 |             pre_scores = scores[rois_ix]
40 |             pre_bboxes = bboxes[rois_ix]
41 |             pre_bboxes[:, :4] = bbox_helper.clip_bbox(pre_bboxes[:,:4], image_info[b_ix])
42 |             if cfg['score_thresh'] > 0:
43 |                 keep_ix = np.where(pre_scores > cfg['score_thresh'])[0]
44 |                 pre_scores = pre_scores[keep_ix]
45 |                 pre_bboxes = pre_bboxes[keep_ix]
46 |             if pre_scores.size == 0: continue
47 |             order = pre_scores.argsort()[::-1]
48 |             pre_bboxes = pre_bboxes[order, :]
49 |             keep_index = nms(torch.from_numpy(pre_bboxes).float().cuda(), cfg['nms_iou_thresh']).numpy()
50 |             post_bboxes = pre_bboxes[keep_index]
51 |             batch_ix = np.full(post_bboxes.shape[0], b_ix)
52 |             batch_cls = np.full(post_bboxes.shape[0], cls)
53 |             post_bboxes = np.hstack([batch_ix[:, np.newaxis], post_bboxes, batch_cls[:, np.newaxis]])
54 |             nmsed_bboxes.append(post_bboxes)
55 |     nmsed_bboxes = np.vstack(nmsed_bboxes)
56 |     if cfg['top_n'] > 0:
57 |         top_n_bboxes = []
58 |         for b_ix in range(B):
59 |             bboxes = nmsed_bboxes[nmsed_bboxes[:, 0] == b_ix]
60 |             scores = bboxes[:, -2]
61 |             order = scores.argsort()[::-1][:cfg['top_n']]
62 |             bboxes = bboxes[order]
63 |             top_n_bboxes.append(bboxes)
64 |         nmsed_bboxes = np.vstack(top_n_bboxes)
65 |     nmsed_bboxes = (torch.from_numpy(nmsed_bboxes)).float().cuda()
66 |     return nmsed_bboxes
67 | 


--------------------------------------------------------------------------------
/extensions/_roi_pooling/src/roi_pooling_cuda.c:
--------------------------------------------------------------------------------
 1 | #include <THC/THC.h>
 2 | #include <math.h>
 3 | #include "roi_pooling_kernel.h"
 4 | 
 5 | extern THCState *state;
 6 | 
 7 | int roi_pooling_forward_cuda(int pooled_height, int pooled_width, float spatial_scale,
 8 |                         THCudaTensor * features, THCudaTensor * rois, THCudaTensor * output, THCudaIntTensor * argmax)
 9 | {
10 |     // Grab the input tensor
11 |     float * data_flat = THCudaTensor_data(state, features);
12 |     float * rois_flat = THCudaTensor_data(state, rois);
13 | 
14 |     float * output_flat = THCudaTensor_data(state, output);
15 |     int * argmax_flat = THCudaIntTensor_data(state, argmax);
16 | 
17 |     // Number of ROIs
18 |     int num_rois = THCudaTensor_size(state, rois, 0);
19 |     int size_rois = THCudaTensor_size(state, rois, 1);
20 |     if (size_rois != 5)
21 |     {
22 |         return 0;
23 |     }
24 | 
25 |     // batch size
26 |     // int batch_size = THCudaTensor_size(state, features, 0);
27 |     // if (batch_size != 1)
28 |     // {
29 |     //     return 0;
30 |     // }
31 |     // data height
32 |     int data_height = THCudaTensor_size(state, features, 2);
33 |     // data width
34 |     int data_width = THCudaTensor_size(state, features, 3);
35 |     // Number of channels
36 |     int num_channels = THCudaTensor_size(state, features, 1);
37 | 
38 |     cudaStream_t stream = THCState_getCurrentStream(state);
39 | 
40 |     ROIPoolForwardLaucher(
41 |         data_flat, spatial_scale, num_rois, data_height,
42 |         data_width, num_channels, pooled_height,
43 |         pooled_width, rois_flat,
44 |         output_flat, argmax_flat, stream);
45 | 
46 |     return 1;
47 | }
48 | 
49 | int roi_pooling_backward_cuda(int pooled_height, int pooled_width, float spatial_scale,
50 |                         THCudaTensor * top_grad, THCudaTensor * rois, THCudaTensor * bottom_grad, THCudaIntTensor * argmax)
51 | {
52 |     // Grab the input tensor
53 |     float * top_grad_flat = THCudaTensor_data(state, top_grad);
54 |     float * rois_flat = THCudaTensor_data(state, rois);
55 | 
56 |     float * bottom_grad_flat = THCudaTensor_data(state, bottom_grad);
57 |     int * argmax_flat = THCudaIntTensor_data(state, argmax);
58 | 
59 |     // Number of ROIs
60 |     int num_rois = THCudaTensor_size(state, rois, 0);
61 |     int size_rois = THCudaTensor_size(state, rois, 1);
62 |     if (size_rois != 5)
63 |     {
64 |         return 0;
65 |     }
66 | 
67 |     // batch size
68 |     int batch_size = THCudaTensor_size(state, bottom_grad, 0);
69 |     // if (batch_size != 1)
70 |     // {
71 |     //     return 0;
72 |     // }
73 |     // data height
74 |     int data_height = THCudaTensor_size(state, bottom_grad, 2);
75 |     // data width
76 |     int data_width = THCudaTensor_size(state, bottom_grad, 3);
77 |     // Number of channels
78 |     int num_channels = THCudaTensor_size(state, bottom_grad, 1);
79 | 
80 |     cudaStream_t stream = THCState_getCurrentStream(state);
81 |     ROIPoolBackwardLaucher(
82 |         top_grad_flat, spatial_scale, batch_size, num_rois, data_height,
83 |         data_width, num_channels, pooled_height,
84 |         pooled_width, rois_flat,
85 |         bottom_grad_flat, argmax_flat, stream);
86 | 
87 |     return 1;
88 | }
89 | 


--------------------------------------------------------------------------------
/extensions/_nms/src/cuda/nms_kernel.cu:
--------------------------------------------------------------------------------
 1 | // ------------------------------------------------------------------
 2 | // Faster R-CNN
 3 | // Copyright (c) 2015 Microsoft
 4 | // Licensed under The MIT License [see fast-rcnn/LICENSE for details]
 5 | // Written by Shaoqing Ren
 6 | // ------------------------------------------------------------------
 7 | #ifdef __cplusplus
 8 | extern "C" {
 9 | #endif
10 | 
11 | #include <math.h>
12 | #include <stdio.h>
13 | #include <float.h>
14 | #include "nms_kernel.h"
15 | 
16 | __device__ inline float devIoU(float const * const a, float const * const b) {
17 |   float left = fmaxf(a[0], b[0]), right = fminf(a[2], b[2]);
18 |   float top = fmaxf(a[1], b[1]), bottom = fminf(a[3], b[3]);
19 |   float width = fmaxf(right - left + 1, 0.f), height = fmaxf(bottom - top + 1, 0.f);
20 |   float interS = width * height;
21 |   float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1);
22 |   float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1);
23 |   return interS / (Sa + Sb - interS);
24 | }
25 | 
26 | __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh,
27 |                            const float *dev_boxes, unsigned long long *dev_mask) {
28 |   const int row_start = blockIdx.y;
29 |   const int col_start = blockIdx.x;
30 | 
31 |   // if (row_start > col_start) return;
32 | 
33 |   const int row_size =
34 |         fminf(n_boxes - row_start * threadsPerBlock, threadsPerBlock);
35 |   const int col_size =
36 |         fminf(n_boxes - col_start * threadsPerBlock, threadsPerBlock);
37 | 
38 |   __shared__ float block_boxes[threadsPerBlock * 5];
39 |   if (threadIdx.x < col_size) {
40 |     block_boxes[threadIdx.x * 5 + 0] =
41 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0];
42 |     block_boxes[threadIdx.x * 5 + 1] =
43 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1];
44 |     block_boxes[threadIdx.x * 5 + 2] =
45 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2];
46 |     block_boxes[threadIdx.x * 5 + 3] =
47 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3];
48 |     block_boxes[threadIdx.x * 5 + 4] =
49 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4];
50 |   }
51 |   __syncthreads();
52 | 
53 |   if (threadIdx.x < row_size) {
54 |     const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x;
55 |     const float *cur_box = dev_boxes + cur_box_idx * 5;
56 |     int i = 0;
57 |     unsigned long long t = 0;
58 |     int start = 0;
59 |     if (row_start == col_start) {
60 |       start = threadIdx.x + 1;
61 |     }
62 |     for (i = start; i < col_size; i++) {
63 |       if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) {
64 |         t |= 1ULL << i;
65 |       }
66 |     }
67 |     const int col_blocks = DIVUP(n_boxes, threadsPerBlock);
68 |     dev_mask[cur_box_idx * col_blocks + col_start] = t;
69 |   }
70 | }
71 | 
72 | 
73 | void _nms(int boxes_num, float * boxes_dev,
74 |           unsigned long long * mask_dev, float nms_overlap_thresh) {
75 | 
76 |   dim3 blocks(DIVUP(boxes_num, threadsPerBlock),
77 |               DIVUP(boxes_num, threadsPerBlock));
78 |   dim3 threads(threadsPerBlock);
79 |   nms_kernel<<<blocks, threads>>>(boxes_num,
80 |                                   nms_overlap_thresh,
81 |                                   boxes_dev,
82 |                                   mask_dev);
83 | }
84 | 
85 | #ifdef __cplusplus
86 | }
87 | #endif
88 | 


--------------------------------------------------------------------------------
/functions/rpn_proposal.py:
--------------------------------------------------------------------------------
 1 | #encoding: utf-8
 2 | from utils import bbox_helper
 3 | from utils import anchor_helper
 4 | from extensions import nms
 5 | import torch
 6 | import torch.nn.functional as F
 7 | import numpy as np
 8 | import logging
 9 | logger = logging.getLogger('global')
10 | 
11 | def to_np_array(x):
12 |     if x is None:
13 |         return None
14 |     # if isinstance(x, Variable): x = x.data
15 |     return x.cpu().data.numpy() if torch.is_tensor(x) else x
16 | 
17 | def compute_rpn_proposals(conv_cls, conv_loc, cfg, image_info):
18 |     '''
19 |     :argument
20 |         cfg: configs
21 |         conv_cls: FloatTensor, [batch, num_anchors * x, h, w], conv output of classification
22 |         conv_loc: FloatTensor, [batch, num_anchors * 4, h, w], conv output of localization
23 |         image_info: FloatTensor, [batch, 3], image size
24 |     :returns
25 |         proposals: Variable, [N, 5], 2-dim: batch_ix, x1, y1, x2, y2
26 |     '''
27 | 
28 |     batch_size, num_anchors_4, featmap_h, featmap_w = conv_loc.shape
29 |     # [K*A, 4]
30 |     anchors_overplane = anchor_helper.get_anchors_over_plane(featmap_h, featmap_w,
31 |                                                              cfg['anchor_ratios'], cfg['anchor_scales'], cfg['anchor_stride'])
32 |     B = batch_size
33 |     A = num_anchors = num_anchors_4 // 4
34 |     assert(A * 4 == num_anchors_4)
35 |     K = featmap_h * featmap_w
36 | 
37 |     cls_view = conv_cls.permute(0, 2, 3, 1).contiguous().view(B, K*A, -1).cpu().numpy()
38 |     loc_view = conv_loc.permute(0, 2, 3, 1).contiguous().view(B, K*A, 4).cpu().numpy()
39 |     if torch.is_tensor(image_info):
40 |         image_info = image_info.cpu().numpy()
41 | 
42 |     #all_proposals = [bbox_helper.compute_loc_bboxes(anchors_overplane, loc_view[ix]) for ix in range(B)]
43 |     # [B, K*A, 4]
44 |     #pred_loc = np.stack(all_proposals, axis = 0)
45 |     #pred_cls = cls_view
46 |     batch_proposals = []
47 |     pre_nms_top_n = cfg['pre_nms_top_n']
48 |     for b_ix in range(B):
49 |         scores = cls_view[b_ix, :, -1] # to compatible with sigmoid
50 |         if pre_nms_top_n <= 0 or pre_nms_top_n > scores.shape[0]:
51 |             order = scores.argsort()[::-1]
52 |         else:
53 |             inds = np.argpartition(-scores, pre_nms_top_n)[:pre_nms_top_n]
54 |             order = np.argsort(-scores[inds])
55 |             order = inds[order]
56 |         loc_delta = loc_view[b_ix, order, :]
57 |         loc_anchors = anchors_overplane[order, :]
58 |         scores = scores[order]
59 |         boxes = bbox_helper.compute_loc_bboxes(loc_anchors, loc_delta)
60 |         boxes = bbox_helper.clip_bbox(boxes, image_info[b_ix])
61 |         proposals = np.hstack([boxes, scores[:, np.newaxis]])
62 |         proposals = proposals[(proposals[:, 2] - proposals[:, 0] + 1 >= cfg['roi_min_size'])
63 |                             & (proposals[:, 3] - proposals[:, 1] + 1 >= cfg['roi_min_size'])]
64 |         keep_index = nms(torch.from_numpy(proposals).float().cuda(), cfg['nms_iou_thresh']).numpy()
65 |         if cfg['post_nms_top_n'] > 0:
66 |             keep_index = keep_index[:cfg['post_nms_top_n']]
67 |         proposals = proposals[keep_index]
68 |         batch_ix = np.full(keep_index.shape, b_ix)
69 |         proposals = np.hstack([batch_ix[:, np.newaxis], proposals])
70 |         batch_proposals.append(proposals)
71 |     batch_proposals = (torch.from_numpy(np.vstack(batch_proposals))).float()
72 |     if batch_proposals.dim() < 2:
73 |         batch_proposals.unsqueeze(dim=0)
74 |     return batch_proposals
75 | 


--------------------------------------------------------------------------------
/extensions/_bbox_helper/src/cuda/iou_overlap_kernel.cu:
--------------------------------------------------------------------------------
  1 | // #ifdef __cplusplus
  2 | // extern "C" {
  3 | // #endif
  4 | 
  5 | #include <math.h>
  6 | #include <stdio.h>
  7 | #include <float.h>
  8 | #include "iou_overlap_kernel.h"
  9 | 
 10 | 
 11 | #define DIVUP(m, n) ((m) / (m) + ((m) % (n) > 0))
 12 | 
 13 | #define CUDA_1D_KERNEL_LOOP(i, n)                            \
 14 |   for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; \
 15 |        i += blockDim.x * gridDim.x)
 16 | 
 17 | // CUDA: grid stride looping
 18 | #define CUDA_KERNEL_LOOP(i, n) \
 19 |   for (int i = blockIdx.x * blockDim.x + threadIdx.x; \
 20 |        i < (n); \
 21 |        i += blockDim.x * gridDim.x)
 22 | 
 23 | //__device__ inline float devIoU(float const * const a, float const * const b) {
 24 | //  float left = fmaxf(a[0], b[0]), right = fminf(a[2], b[2]);
 25 | //  float top = fmaxf(a[1], b[1]), bottom = fminf(a[3], b[3]);
 26 | //  float width = fmaxf(right - left + 1, 0.f), height = fmaxf(bottom - top + 1, 0.f);
 27 | //  float interS = width * height;
 28 | //  float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1);
 29 | //  float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1);
 30 | //  return interS / (Sa + Sb - interS);
 31 | //}
 32 | 
 33 | __global__ void IOUOverlapKernel(
 34 |     const float* bbox1,
 35 |     const float* bbox2,
 36 |     const int size_bbox,
 37 |     const int num_bbox1,
 38 |     const int num_bbox2,
 39 |     float* top_data){
 40 |     CUDA_KERNEL_LOOP(index, num_bbox1 * num_bbox2){
 41 |         int b1 = index / num_bbox2;
 42 |         int b2 = index % num_bbox2;
 43 | 
 44 |         int base1 = b1 * size_bbox;
 45 |         float b1_x1 = bbox1[base1];
 46 |         float b1_y1 = bbox1[base1 + 1];
 47 |         float b1_x2 = bbox1[base1 + 2];
 48 |         float b1_y2 = bbox1[base1 + 3];
 49 |         float b1_area = (b1_x2 - b1_x1) * (b1_y2 - b1_y1); 
 50 | 
 51 |         int base2 = b2 * size_bbox;
 52 |         float b2_x1 = bbox2[base2];
 53 |         float b2_y1 = bbox2[base2 + 1];
 54 |         float b2_x2 = bbox2[base2 + 2];
 55 |         float b2_y2 = bbox2[base2 + 3];
 56 |         float b2_area = (b2_x2 - b2_x1) * (b2_y2 - b2_y1); 
 57 | 
 58 |         float left = fmaxf(b1_x1, b2_x1), right  = fminf(b1_x2, b2_x2);
 59 |         float top  = fmaxf(b1_y1, b2_y1), bottom = fminf(b1_y2, b2_y2);
 60 |         float width = fmaxf(right - left, 0.f), height = fmaxf(bottom - top, 0.f);
 61 |         float interS = width * height;
 62 |         float unionS = fmaxf(b1_area + b2_area - interS, 1.0);
 63 |         top_data[b1 * num_bbox2 + b2] = interS / unionS;
 64 |     }
 65 | }
 66 | 
 67 | int IOUOverlap(
 68 |     const float* bboxes1_data, 
 69 |     const float* bboxes2_data, 
 70 |     const int size_bbox,
 71 |     const int num_bbox1,
 72 |     const int num_bbox2,
 73 |     float* top_data,
 74 |     cudaStream_t stream){
 75 |         const int kThreadsPerBlock = 1024;
 76 |         int output_size = num_bbox1 * num_bbox2;
 77 |         //int output_size = num_bbox1;
 78 |         cudaError_t err;
 79 | 
 80 |         err = cudaGetLastError();
 81 |         if(cudaSuccess != err)
 82 |         {
 83 |             fprintf( stderr, "%s#%d: cudaCheckError() failed : %s\n", __FILE__,
 84 |                 __LINE__, cudaGetErrorString( err ) );
 85 |             exit( -1 );
 86 |         }
 87 | 
 88 |         IOUOverlapKernel<<<(output_size + kThreadsPerBlock - 1) / kThreadsPerBlock, kThreadsPerBlock, 0, stream>>>(
 89 |                      bboxes1_data, bboxes2_data, size_bbox, num_bbox1, num_bbox2, top_data);
 90 | 
 91 |     err = cudaGetLastError();
 92 |     if(cudaSuccess != err)
 93 |     {
 94 |         fprintf( stderr, "%s#%d: cudaCheckError() failed : %s\n", __FILE__,
 95 |                 __LINE__, cudaGetErrorString( err ) );
 96 |         exit( -1 );
 97 |     }
 98 | 
 99 |     return 1;
100 | }
101 | 
102 | // #ifdef __cplusplus
103 | // }
104 | // #endif
105 | 


--------------------------------------------------------------------------------
/utils/anchor_helper.py:
--------------------------------------------------------------------------------
 1 | #encoding: utf-8
 2 | import numpy as np
 3 | 
 4 | def get_anchors_over_grid(ratios, scales, stride):
 5 |     """
 6 |     Generate anchor (reference) windows by enumerating aspect ratios X
 7 |     scales wrt a reference (0, 0, stride-1, stride-1) window.
 8 |     """
 9 |     # ratios, scales = np.meshgrid(ratios, scales)
10 |     scales = np.array(scales) * stride
11 |     return generate_anchors(stride=stride, sizes=scales)
12 | 
13 |     scales, ratios = np.meshgrid(scales, ratios)
14 |     sqrt_ratios = np.sqrt(ratios)
15 |     ws = (scales / sqrt_ratios).reshape(-1,1)
16 |     hs = (scales * sqrt_ratios).reshape(-1,1)
17 |     x = np.round(ws / 2.0)
18 |     y = np.round(hs / 2.0)
19 |     return np.hstack([-x,-y,x,y]) + stride / 2
20 | 
21 | def get_anchors_over_plane(featmap_h, featmap_w, anchor_ratios, anchor_scales, anchor_stride):
22 |     # get anchors over one grid
23 |     anchors_overgrid = get_anchors_over_grid(anchor_ratios, anchor_scales, anchor_stride)
24 |     # spread anchors over each grid
25 |     shift_x = np.arange(0, featmap_w) * anchor_stride
26 |     shift_y = np.arange(0, featmap_h) * anchor_stride
27 |     # [featmap_h, featmap_w]
28 |     shift_x, shift_y = np.meshgrid(shift_x, shift_y)
29 |     shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
30 |                         shift_x.ravel(), shift_y.ravel())).transpose()
31 |     A = anchors_overgrid.shape[0]
32 |     K = shifts.shape[0]
33 |     anchors_overplane = (anchors_overgrid.reshape((1, A, 4)) +
34 |                     shifts.reshape((1, K, 4)).transpose((1, 0, 2)))
35 |     return anchors_overplane.reshape((K * A, 4))
36 | 
37 | def generate_anchors(
38 |     stride=16, sizes=(32, 64), aspect_ratios=(0.5, 1, 2)
39 | ):
40 |     """Generates a matrix of anchor boxes in (x1, y1, x2, y2) format. Anchors
41 |     are centered on stride / 2, have (approximate) sqrt areas of the specified
42 |     sizes, and aspect ratios as given.
43 |     """
44 |     return _generate_anchors(
45 |         stride,
46 |         np.array(sizes, dtype=np.float) / stride,
47 |         np.array(aspect_ratios, dtype=np.float)
48 |     )
49 | def _generate_anchors(base_size, scales, aspect_ratios):
50 |     """Generate anchor (reference) windows by enumerating aspect ratios X
51 |     scales wrt a reference (0, 0, base_size - 1, base_size - 1) window.
52 |     """
53 |     anchor = np.array([1, 1, base_size, base_size], dtype=np.float) - 1
54 |     anchors = _ratio_enum(anchor, aspect_ratios)
55 |     anchors = np.vstack(
56 |         [_scale_enum(anchors[i, :], scales) for i in range(anchors.shape[0])]
57 |     )
58 |     return anchors
59 | def _ratio_enum(anchor, ratios):
60 |     """Enumerate a set of anchors for each aspect ratio wrt an anchor."""
61 |     w, h, x_ctr, y_ctr = _whctrs(anchor)
62 |     size = w * h
63 |     size_ratios = size / ratios
64 |     ws = np.round(np.sqrt(size_ratios))
65 |     hs = np.round(ws * ratios)
66 |     anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
67 |     return anchors
68 | def _scale_enum(anchor, scales):
69 |     """Enumerate a set of anchors for each scale wrt an anchor."""
70 |     w, h, x_ctr, y_ctr = _whctrs(anchor)
71 |     ws = w * scales
72 |     hs = h * scales
73 |     anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
74 |     return anchors
75 | def _whctrs(anchor):
76 |     """Return width, height, x center, and y center for an anchor (window)."""
77 |     w = anchor[2] - anchor[0] + 1
78 |     h = anchor[3] - anchor[1] + 1
79 |     x_ctr = anchor[0] + 0.5 * (w - 1)
80 |     y_ctr = anchor[1] + 0.5 * (h - 1)
81 |     return w, h, x_ctr, y_ctr
82 | def _mkanchors(ws, hs, x_ctr, y_ctr):
83 |     """Given a vector of widths (ws) and heights (hs) around a center
84 |     (x_ctr, y_ctr), output a set of anchors (windows).
85 |     """
86 |     ws = ws[:, np.newaxis]
87 |     hs = hs[:, np.newaxis]
88 |     anchors = np.hstack(
89 |         (
90 |             x_ctr - 0.5 * (ws - 1),
91 |             y_ctr - 0.5 * (hs - 1),
92 |             x_ctr + 0.5 * (ws - 1),
93 |             y_ctr + 0.5 * (hs - 1)
94 |         )
95 |     )
96 |     return anchors
97 | 
98 | 
99 | 


--------------------------------------------------------------------------------
/datasets/pycocotools/common/gason.h:
--------------------------------------------------------------------------------
  1 | // https://github.com/vivkin/gason - pulled January 10, 2016
  2 | #pragma once
  3 | 
  4 | #include <stdint.h>
  5 | #include <stddef.h>
  6 | #include <assert.h>
  7 | 
  8 | enum JsonTag {
  9 |     JSON_NUMBER = 0,
 10 |     JSON_STRING,
 11 |     JSON_ARRAY,
 12 |     JSON_OBJECT,
 13 |     JSON_TRUE,
 14 |     JSON_FALSE,
 15 |     JSON_NULL = 0xF
 16 | };
 17 | 
 18 | struct JsonNode;
 19 | 
 20 | #define JSON_VALUE_PAYLOAD_MASK 0x00007FFFFFFFFFFFULL
 21 | #define JSON_VALUE_NAN_MASK 0x7FF8000000000000ULL
 22 | #define JSON_VALUE_TAG_MASK 0xF
 23 | #define JSON_VALUE_TAG_SHIFT 47
 24 | 
 25 | union JsonValue {
 26 |     uint64_t ival;
 27 |     double fval;
 28 | 
 29 |     JsonValue(double x)
 30 |         : fval(x) {
 31 |     }
 32 |     JsonValue(JsonTag tag = JSON_NULL, void *payload = nullptr) {
 33 |         assert((uintptr_t)payload <= JSON_VALUE_PAYLOAD_MASK);
 34 |         ival = JSON_VALUE_NAN_MASK | ((uint64_t)tag << JSON_VALUE_TAG_SHIFT) | (uintptr_t)payload;
 35 |     }
 36 |     bool isDouble() const {
 37 |         return (int64_t)ival <= (int64_t)JSON_VALUE_NAN_MASK;
 38 |     }
 39 |     JsonTag getTag() const {
 40 |         return isDouble() ? JSON_NUMBER : JsonTag((ival >> JSON_VALUE_TAG_SHIFT) & JSON_VALUE_TAG_MASK);
 41 |     }
 42 |     uint64_t getPayload() const {
 43 |         assert(!isDouble());
 44 |         return ival & JSON_VALUE_PAYLOAD_MASK;
 45 |     }
 46 |     double toNumber() const {
 47 |         assert(getTag() == JSON_NUMBER);
 48 |         return fval;
 49 |     }
 50 |     char *toString() const {
 51 |         assert(getTag() == JSON_STRING);
 52 |         return (char *)getPayload();
 53 |     }
 54 |     JsonNode *toNode() const {
 55 |         assert(getTag() == JSON_ARRAY || getTag() == JSON_OBJECT);
 56 |         return (JsonNode *)getPayload();
 57 |     }
 58 | };
 59 | 
 60 | struct JsonNode {
 61 |     JsonValue value;
 62 |     JsonNode *next;
 63 |     char *key;
 64 | };
 65 | 
 66 | struct JsonIterator {
 67 |     JsonNode *p;
 68 | 
 69 |     void operator++() {
 70 |         p = p->next;
 71 |     }
 72 |     bool operator!=(const JsonIterator &x) const {
 73 |         return p != x.p;
 74 |     }
 75 |     JsonNode *operator*() const {
 76 |         return p;
 77 |     }
 78 |     JsonNode *operator->() const {
 79 |         return p;
 80 |     }
 81 | };
 82 | 
 83 | inline JsonIterator begin(JsonValue o) {
 84 |     return JsonIterator{o.toNode()};
 85 | }
 86 | inline JsonIterator end(JsonValue) {
 87 |     return JsonIterator{nullptr};
 88 | }
 89 | 
 90 | #define JSON_ERRNO_MAP(XX)                           \
 91 |     XX(OK, "ok")                                     \
 92 |     XX(BAD_NUMBER, "bad number")                     \
 93 |     XX(BAD_STRING, "bad string")                     \
 94 |     XX(BAD_IDENTIFIER, "bad identifier")             \
 95 |     XX(STACK_OVERFLOW, "stack overflow")             \
 96 |     XX(STACK_UNDERFLOW, "stack underflow")           \
 97 |     XX(MISMATCH_BRACKET, "mismatch bracket")         \
 98 |     XX(UNEXPECTED_CHARACTER, "unexpected character") \
 99 |     XX(UNQUOTED_KEY, "unquoted key")                 \
100 |     XX(BREAKING_BAD, "breaking bad")                 \
101 |     XX(ALLOCATION_FAILURE, "allocation failure")
102 | 
103 | enum JsonErrno {
104 | #define XX(no, str) JSON_##no,
105 |     JSON_ERRNO_MAP(XX)
106 | #undef XX
107 | };
108 | 
109 | const char *jsonStrError(int err);
110 | 
111 | class JsonAllocator {
112 |     struct Zone {
113 |         Zone *next;
114 |         size_t used;
115 |     } *head = nullptr;
116 | 
117 | public:
118 |     JsonAllocator() = default;
119 |     JsonAllocator(const JsonAllocator &) = delete;
120 |     JsonAllocator &operator=(const JsonAllocator &) = delete;
121 |     JsonAllocator(JsonAllocator &&x) : head(x.head) {
122 |         x.head = nullptr;
123 |     }
124 |     JsonAllocator &operator=(JsonAllocator &&x) {
125 |         head = x.head;
126 |         x.head = nullptr;
127 |         return *this;
128 |     }
129 |     ~JsonAllocator() {
130 |         deallocate();
131 |     }
132 |     void *allocate(size_t size);
133 |     void deallocate();
134 | };
135 | 
136 | int jsonParse(char *str, char **endptr, JsonValue *value, JsonAllocator &allocator);
137 | 


--------------------------------------------------------------------------------
/functions/proposal_assign.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import numpy as np
  3 | import logging
  4 | #from utils.timer import Timer
  5 | 
  6 | def to_np_array(x):
  7 |     if x is None:
  8 |         return None
  9 |     # if isinstance(x, Variable): x = x.data
 10 |     return x.cpu().data.numpy() if torch.is_tensor(x) else np.array(x)
 11 | 
 12 | def get_rois_target_levels(levels, base_scale, base_level, rois):
 13 |     '''assign proposals to different level feature map to roi pooling
 14 |         Args:
 15 |             rois: [R, 5], batch_ix,x1,y1,x2,y2
 16 |             levels: [L], levels. e.g.[2,3,4,5,6]
 17 |     '''
 18 |     rois = to_np_array(rois)
 19 |     w = rois[:, 3] - rois[:, 1] + 1
 20 |     h = rois[:, 4] - rois[:, 2] + 1
 21 |     scale = (w * h)**0.5
 22 |     eps = 1e-6
 23 |     target_levels = np.floor(base_level + np.log2(scale/base_scale+eps)).astype(np.int32)
 24 |     min_level, max_level = min(levels), max(levels)
 25 |     return np.clip(target_levels, min_level, max_level)
 26 | 
 27 | def get_rois_by_level(levels, base_scale, base_level, rois):
 28 |     rois = to_np_array(rois)
 29 |     target_lvls = get_rois_target_levels(levels, base_scale, base_level, rois)
 30 |     rois_by_level, rois_ix_by_level = [], []
 31 |     for lvl in levels:
 32 |         ix = np.where(target_lvls == lvl)[0]
 33 |         rois_by_level.append(rois[ix])
 34 |         rois_ix_by_level.append(ix)
 35 |     return rois_by_level, rois_ix_by_level
 36 | 
 37 | def assign_args_by_level(levels, base_scale, base_level, rois, *args):
 38 |     '''
 39 |         Args:
 40 |             rois: [R, 5], batch_ix,x1,y1,x2,y2
 41 |             levels: [L], levels. e.g.[2,3,4,5,6]
 42 |         return:
 43 |             args by level
 44 |     '''
 45 |     args_by_level = []
 46 |     rois = to_np_array(rois)
 47 |     rois_by_level, rois_ix_by_level = \
 48 |             get_rois_by_level(levels, base_scale, base_level, rois)
 49 | 
 50 |     args_by_level.append(rois_by_level)
 51 |     for arg in args:
 52 |         # assign arg to each level
 53 |         arg = to_np_array(arg)
 54 |         arg_by_level = []
 55 |         for ix in rois_ix_by_level:
 56 |             arg_by_level.append(arg[ix])
 57 |         args_by_level.append(arg_by_level)
 58 |     return args_by_level
 59 | 
 60 | def get_proposals_assign(proposals, base_scale=224, layer_index=4):
 61 |     '''
 62 |     :arguement
 63 |         proposals:[N, k], k>=5, batch_idx, x1, y1, x2, y2
 64 |         base_scale: base scale
 65 |         layer_index: the layer RoI with wxh=224x22 should be mapped into
 66 |     returns:
 67 |         p*: [N, 5]
 68 |     '''
 69 |     #logger = logging.getLogger('global')
 70 |     #p = map(lambda x: x.cpu().numpy() if torch.is_tensor(x) else x, [proposals])
 71 |     p = to_np_array(proposals)
 72 |     w = p[:,3] - p[:,1] + 1
 73 |     h = p[:,4] - p[:,2] + 1
 74 |     area = (w*h)**0.5
 75 |     k = np.floor(layer_index + np.log2(area/base_scale))
 76 |     p2 = p[k <= 2] 
 77 |     p3 = p[k == 3] 
 78 |     p4 = p[k == 4] 
 79 |     p5 = p[k >= 5] 
 80 |     return p2, p3, p4, p5
 81 | 
 82 | def get_rois_assign(rois, cls_targets, loc_targets, loc_weights, base_scale=224, layer_index=4):
 83 |     #logger = logging.getLogger('global')
 84 |     #T = Timer()
 85 |     #roi = rois.data.cpu().numpy()
 86 |     #cls_t = cls_targets.data.cpu().numpy()
 87 |     #loc_t = loc_targets.data.cpu().numpy()
 88 |     #loc_w = loc_weights.data.cpu().numpy()
 89 |     roi = rois
 90 |     cls_t = cls_targets
 91 |     loc_t = loc_targets
 92 |     loc_w = loc_weights
 93 | 
 94 |     w = roi[:,3] - roi[:,1] + 1
 95 |     h = roi[:,4] - roi[:,2] + 1
 96 |     area = (w*h)**0.5
 97 |     k = np.floor(layer_index + np.log2(area/base_scale))
 98 |     p2 = k <= 2
 99 |     p3 = k == 3
100 |     p4 = k == 4
101 |     p5 = k >= 5
102 |     roi_new = []
103 |     cls_t_new = []
104 |     loc_t_new = []
105 |     loc_w_new = []
106 |     for p in [p2, p3, p4, p5]:
107 |          roi_new.append(roi[p])
108 |          if np.where(p==True)[0].size > 0:
109 |              cls_t_new.append(cls_t[p])
110 |              loc_t_new.append(loc_t[p])
111 |              loc_w_new.append(loc_w[p])
112 | 
113 |     cuda_device = rois.device
114 |     f = lambda x: (torch.from_numpy(x)).cuda()
115 |     cls_ts = f(np.concatenate(cls_t_new)).long()
116 |     loc_ts = f(np.vstack(loc_t_new)).float()
117 |     loc_ws = f(np.vstack(loc_w_new)).float()
118 |     return roi_new, cls_ts, loc_ts, loc_ws
119 | 


--------------------------------------------------------------------------------
/extensions/_focal_loss/src/focal_loss_cuda.c:
--------------------------------------------------------------------------------
  1 | #include <math.h>
  2 | #include <THC/THC.h>
  3 | #include <assert.h>
  4 | #include <stdio.h>
  5 | #include "cuda/focal_loss_sigmoid_kernel.h"
  6 | #include "cuda/focal_loss_softmax_kernel.h"
  7 | 
  8 | extern THCState *state;
  9 | 
 10 | int focal_loss_sigmoid_forward_cuda(
 11 |                            int N,
 12 |                            THCudaTensor * logits,
 13 |                            THCudaIntTensor * targets,
 14 |                            float weight_pos,
 15 |                            float gamma, 
 16 |                            float alpha,
 17 |                            int num_classes,
 18 |                            THCudaTensor * losses){
 19 |     // Grab the input tensor
 20 |     float * logits_flat = THCudaTensor_data(state, logits);
 21 |     int * targets_flat = THCudaIntTensor_data(state, targets);
 22 | 
 23 |     float * losses_flat = THCudaTensor_data(state, losses);
 24 | 
 25 |     cudaStream_t stream = THCState_getCurrentStream(state);
 26 | 
 27 |     SigmoidFocalLossForwardLaucher(
 28 |         N, logits_flat, targets_flat, weight_pos, 
 29 |         gamma, alpha, num_classes, losses_flat, stream);
 30 | 
 31 |     return 1;
 32 | }
 33 | 
 34 | int focal_loss_sigmoid_backward_cuda(
 35 |                            int N,
 36 |                            THCudaTensor * logits,
 37 |                            THCudaIntTensor * targets,
 38 |                            THCudaTensor * dX_data,
 39 |                            float weight_pos,
 40 |                            float gamma,
 41 |                            float alpha,
 42 |                            int num_classes){
 43 |     // Grab the input tensor
 44 |     float * logits_flat = THCudaTensor_data(state, logits);
 45 |     int * targets_flat = THCudaIntTensor_data(state, targets);
 46 | 
 47 |     float * dX_data_flat = THCudaTensor_data(state, dX_data);
 48 | 
 49 |     cudaStream_t stream = THCState_getCurrentStream(state);
 50 |     SigmoidFocalLossBackwardLaucher(
 51 |         N, logits_flat, targets_flat, dX_data_flat,
 52 |         weight_pos, gamma, alpha, num_classes, stream);
 53 | 
 54 |     return 1;
 55 | }
 56 | 
 57 | int focal_loss_softmax_forward_cuda(
 58 |                            int N,
 59 |                            THCudaTensor * logits,
 60 |                            THCudaIntTensor * targets,
 61 |                            float weight_pos,
 62 |                            float gamma, 
 63 |                            float alpha,
 64 |                            int num_classes,
 65 |                            THCudaTensor * losses,
 66 |                            THCudaTensor * priors){
 67 |     // Grab the input tensor
 68 |     float * logits_flat = THCudaTensor_data(state, logits);
 69 |     int * targets_flat = THCudaIntTensor_data(state, targets);
 70 | 
 71 |     float * losses_flat = THCudaTensor_data(state, losses);
 72 |     float * priors_flat = THCudaTensor_data(state, priors);
 73 | 
 74 |     cudaStream_t stream = THCState_getCurrentStream(state);
 75 | 
 76 |     SoftmaxFocalLossForwardLaucher(
 77 |         N, logits_flat, targets_flat, weight_pos, 
 78 |         gamma, alpha, num_classes, losses_flat, priors_flat, stream);
 79 | 
 80 |     return 1;
 81 | }
 82 | 
 83 | int focal_loss_softmax_backward_cuda(
 84 |                            int N,
 85 |                            THCudaTensor * logits,
 86 |                            THCudaIntTensor * targets,
 87 |                            THCudaTensor * dX_data,
 88 |                            float weight_pos,
 89 |                            float gamma,
 90 |                            float alpha,
 91 |                            int num_classes,
 92 |                            THCudaTensor * priors,
 93 |                            THCudaTensor * buff){
 94 |     // Grab the input tensor
 95 |     float * logits_flat = THCudaTensor_data(state, logits);
 96 |     int * targets_flat = THCudaIntTensor_data(state, targets);
 97 | 
 98 |     float * dX_data_flat = THCudaTensor_data(state, dX_data);
 99 |     float * priors_flat = THCudaTensor_data(state, priors);
100 |     float * buff_flat = THCudaTensor_data(state, buff);
101 | 
102 |     cudaStream_t stream = THCState_getCurrentStream(state);
103 |     SoftmaxFocalLossBackwardLaucher(
104 |         N, logits_flat, targets_flat, dX_data_flat,
105 |         weight_pos, gamma, alpha, num_classes, priors_flat, buff_flat, stream);
106 | 
107 |     return 1;
108 | }
109 | 


--------------------------------------------------------------------------------
/extensions/_roi_pooling/src/roi_pooling.c:
--------------------------------------------------------------------------------
  1 | #include <TH/TH.h>
  2 | #include <math.h>
  3 | 
  4 | int roi_pooling_forward(int pooled_height, int pooled_width, float spatial_scale,
  5 |                         THFloatTensor * features, THFloatTensor * rois, THFloatTensor * output)
  6 | {
  7 |     // Grab the input tensor
  8 |     float * data_flat = THFloatTensor_data(features);
  9 |     float * rois_flat = THFloatTensor_data(rois);
 10 | 
 11 |     float * output_flat = THFloatTensor_data(output);
 12 | 
 13 |     // Number of ROIs
 14 |     int num_rois = THFloatTensor_size(rois, 0);
 15 |     int size_rois = THFloatTensor_size(rois, 1);
 16 |     // batch size
 17 |     int batch_size = THFloatTensor_size(features, 0);
 18 |     if(batch_size != 1)
 19 |     {
 20 |         return 0;
 21 |     }
 22 |     // data height
 23 |     int data_height = THFloatTensor_size(features, 1);
 24 |     // data width
 25 |     int data_width = THFloatTensor_size(features, 2);
 26 |     // Number of channels
 27 |     int num_channels = THFloatTensor_size(features, 3);
 28 | 
 29 |     // Set all element of the output tensor to -inf.
 30 |     THFloatStorage_fill(THFloatTensor_storage(output), -1);
 31 | 
 32 |     // For each ROI R = [batch_index x1 y1 x2 y2]: max pool over R
 33 |     int index_roi = 0;
 34 |     int index_output = 0;
 35 |     int n;
 36 |     for (n = 0; n < num_rois; ++n)
 37 |     {
 38 |         int roi_batch_ind = rois_flat[index_roi + 0];
 39 |         int roi_start_w = round(rois_flat[index_roi + 1] * spatial_scale);
 40 |         int roi_start_h = round(rois_flat[index_roi + 2] * spatial_scale);
 41 |         int roi_end_w = round(rois_flat[index_roi + 3] * spatial_scale);
 42 |         int roi_end_h = round(rois_flat[index_roi + 4] * spatial_scale);
 43 |         //      CHECK_GE(roi_batch_ind, 0);
 44 |         //      CHECK_LT(roi_batch_ind, batch_size);
 45 | 
 46 |         int roi_height = fmaxf(roi_end_h - roi_start_h + 1, 1);
 47 |         int roi_width = fmaxf(roi_end_w - roi_start_w + 1, 1);
 48 |         float bin_size_h = (float)(roi_height) / (float)(pooled_height);
 49 |         float bin_size_w = (float)(roi_width) / (float)(pooled_width);
 50 | 
 51 |         int index_data = roi_batch_ind * data_height * data_width * num_channels;
 52 |         const int output_area = pooled_width * pooled_height;
 53 | 
 54 |         int c, ph, pw;
 55 |         for (ph = 0; ph < pooled_height; ++ph)
 56 |         {
 57 |             for (pw = 0; pw < pooled_width; ++pw)
 58 |             {
 59 |                 int hstart = (floor((float)(ph) * bin_size_h));
 60 |                 int wstart = (floor((float)(pw) * bin_size_w));
 61 |                 int hend = (ceil((float)(ph + 1) * bin_size_h));
 62 |                 int wend = (ceil((float)(pw + 1) * bin_size_w));
 63 | 
 64 |                 hstart = fminf(fmaxf(hstart + roi_start_h, 0), data_height);
 65 |                 hend = fminf(fmaxf(hend + roi_start_h, 0), data_height);
 66 |                 wstart = fminf(fmaxf(wstart + roi_start_w, 0), data_width);
 67 |                 wend = fminf(fmaxf(wend + roi_start_w, 0), data_width);
 68 | 
 69 |                 const int pool_index = index_output + (ph * pooled_width + pw);
 70 |                 int is_empty = (hend <= hstart) || (wend <= wstart);
 71 |                 if (is_empty)
 72 |                 {
 73 |                     for (c = 0; c < num_channels * output_area; c += output_area)
 74 |                     {
 75 |                         output_flat[pool_index + c] = 0;
 76 |                     }
 77 |                 }
 78 |                 else
 79 |                 {
 80 |                     int h, w, c;
 81 |                     for (h = hstart; h < hend; ++h)
 82 |                     {
 83 |                         for (w = wstart; w < wend; ++w)
 84 |                         {
 85 |                             for (c = 0; c < num_channels; ++c)
 86 |                             {
 87 |                                 const int index = (h * data_width + w) * num_channels + c;
 88 |                                 if (data_flat[index_data + index] > output_flat[pool_index + c * output_area])
 89 |                                 {
 90 |                                     output_flat[pool_index + c * output_area] = data_flat[index_data + index];
 91 |                                 }
 92 |                             }
 93 |                         }
 94 |                     }
 95 |                 }
 96 |             }
 97 |         }
 98 | 
 99 |         // Increment ROI index
100 |         index_roi += size_rois;
101 |         index_output += pooled_height * pooled_width * num_channels;
102 |     }
103 |     return 1;
104 | }


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | ![License CC BY-NC-SA 4.0](https://img.shields.io/badge/license-CC4.0-blue.svg)
  2 | ![Python 3.6](https://img.shields.io/badge/python-3.6-green.svg)
  3 | 
  4 | # SCDA
  5 | 
  6 |  The project of our work **"Adapting Object Detectors via Selective Cross-Domain Alignment" (CVPR2019)**
  7 | 
  8 | ![img|center](./img/pipeline4.png)
  9 | 
 10 | 
 11 | ## Quick View
 12 | Quick view about the paper can be found in this [slide](https://drive.google.com/open?id=1P7gf9dicO1K07l-btBRgG3hZM9ofl0kC)
 13 | 
 14 | ## Installation
 15 | #### Requirements
 16 | - PyTorch 0.4.1 (also test with 0.4.0)
 17 | - torchvision 0.2.1
 18 | - OpenCV
 19 | - scikit-learn
 20 | - Cython
 21 | - GCC >= 4.9 (test with 5.4)
 22 | 
 23 | #### Install
 24 | 1. cd extensions; ./build_all.sh
 25 | 2. cd datasets/pycocotools & make  (install pycocotools according to the guideline)
 26 | 
 27 | ## Data Preparation
 28 | Download the cityscapes and foggy-cityscapes datasets from [cityscapes](https://www.cityscapes-dataset.com/downloads/).
 29 | 
 30 | We provide the meta-files for training and validation, and you can find them in this [url](https://drive.google.com/open?id=1Cv6pLJh0E5elvhhTcXaH6eZXDvN8KfEX). It consists of train.txt, foggy_train.txt and foggy_val.txt. If you want to train with your own datasets, please custom these meta-files with your setting.
 31 | 
 32 | ## Training
 33 | We provide several training scripts for our three-types models. Following with the MMDetection, we use the slurm for distributed training (details can be found [here](https://github.com/open-mmlab/mmdetection/blob/master/tools/slurm_train.sh)). 
 34 | 
 35 | 1. you need to modify hyper-parameters in these bash scripts (./example/faster-rcnn/cityscapes/vgg/); For example, train_meta_file, target_meta_file and val_meta_file, etc.
 36 | You also need to download these image-net pretrained models, such as vgg16.
 37 | 2. the hyper-parameters of detection are placed in config_512.json, and you can custom them.
 38 | 3. training:
 39 | ```bash
 40 | cd ./example/faster-rcnn/cityscapes/vgg/
 41 | sh 4cluster.sh # for our type-2
 42 | 
 43 | # Details (we follow the slurm training in MMDetection); 
 44 | # For the detailed descriptions of these hyperparameters, please refer to the ./tools/faster_rcnn_train_val.py
 45 | 
 46 | #!/bin/bash
 47 | ROOT=../../../..
 48 | export PYTHONPATH=$ROOT:$PYTHONPATH
 49 | #--------------------------
 50 | job_name=training_4cluster
 51 | ckdir=4cluster
 52 | mkdir ./${ckdir}/${job_name}
 53 | #--------------------------
 54 | PARTITION=$1
 55 | GPUS=${5:-8}
 56 | GPUS_PER_NODE=${GPUS_PER_NODE:-8}
 57 | 
 58 | srun -p ${PARTITION} --ntasks=${GPUS} --gres=gpu:${GPUS_PER_NODE} \
 59 | 		--ntasks-per-node=${GPUS_PER_NODE} \
 60 |     --job-name=${job_name} \
 61 | python -u -W ignore $ROOT/tools/faster_rcnn_train_val.py \
 62 |   --config=config_512.json \
 63 |   --dist=1 \
 64 |   --fix_num=0 \
 65 |   --L1=1 \
 66 |   --cluster_num=4 \
 67 |   --threshold=128 \
 68 |   --recon_size=256 \
 69 |   --port=21603 \
 70 |   --arch=vgg16_FasterRCNN \
 71 |   --warmup_epochs=1 \
 72 |   --lr=0.0000125 \
 73 |   --step_epochs=16,22 \
 74 |   --batch-size=1 \
 75 |   --epochs=25 \
 76 |   --dataset=cityscapes \
 77 |   --train_meta_file=/path/to/train.txt \
 78 |   --target_meta_file=/path/to/foggy_train.txt \
 79 |   --val_meta_file=/path/to/foggy_val.txt \
 80 |   --datadir=/path/to/leftImg8bit/ \
 81 |   --pretrained=/path/to/torchvision_models/vgg16-397923af.pth \
 82 |   --results_dir=${ckdir}/${job_name}/results_dir \
 83 |   --save_dir=${ckdir}/${job_name} \
 84 |   2>&1 | tee ${ckdir}/${job_name}/train.log
 85 | 
 86 | ```
 87 | 
 88 | 
 89 | ## Evaluation
 90 | We provide our pre-trained model in this [url](https://drive.google.com/open?id=1FlQePvlavZVgmzBik6IdcG_xWh0xtROz). You can download it and make a test (please modify these parameters before evaluation).
 91 | 
 92 | We support slurm evaluation and single-gpu evaluation. Please check the eval.sh and eval_single.sh
 93 | ```bash
 94 | sh eval_single.sh # evaluation with single gpu ==> mAP:33.91
 95 | ```
 96 | 
 97 | ## Citation
 98 | If you find our work useful in your research, please consider citing our paper:
 99 | ```
100 | @inproceedings{zhu2019adapting,
101 |   title={Adapting Object Detectors via Selective Cross-Domain Alignment},
102 |   author={Zhu, Xinge and Pang, Jiangmiao and Yang, Ceyuan and Shi, Jianping and Lin, Dahua},
103 |   booktitle={Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition},
104 |   pages={687--696},
105 |   year={2019}
106 | }
107 | ```
108 | 
109 | ## Acknowledgments
110 | We thanks for the opensource codebases, [mmdetetion](https://github.com/open-mmlab/mmdetection) and [Detectron.pytorch](https://github.com/roytseng-tw/Detectron.pytorch).
111 | 
112 | 


--------------------------------------------------------------------------------
/utils/bbox_helper.py:
--------------------------------------------------------------------------------
  1 | #encoding: utf-8
  2 | 
  3 | import numpy as np
  4 | import warnings
  5 | from extensions._cython_bbox import cython_bbox
  6 | 
  7 | 
  8 | def bbox_iou_overlaps(b1, b2):
  9 |     return cython_bbox.bbox_overlaps(b1.astype(np.float32), b2.astype(np.float32))
 10 |     '''
 11 |     :argument
 12 |         b1,b2: [n, k], k>=4, x1,y1,x2,y2,...
 13 |     :returns
 14 |         intersection-over-union pair-wise.
 15 |     '''
 16 |     area1 = (b1[:, 2] - b1[:, 0]) * (b1[:, 3] - b1[:, 1])
 17 |     area2 = (b2[:, 2] - b2[:, 0]) * (b2[:, 3] - b2[:, 1])
 18 |     inter_xmin = np.maximum(b1[:, 0].reshape(-1, 1), b2[:, 0].reshape(1, -1))
 19 |     inter_ymin = np.maximum(b1[:, 1].reshape(-1, 1), b2[:, 1].reshape(1, -1))
 20 |     inter_xmax = np.minimum(b1[:, 2].reshape(-1, 1), b2[:, 2].reshape(1, -1))
 21 |     inter_ymax = np.minimum(b1[:, 3].reshape(-1, 1), b2[:, 3].reshape(1, -1))
 22 |     inter_h = np.maximum(inter_xmax - inter_xmin, 0)
 23 |     inter_w = np.maximum(inter_ymax - inter_ymin, 0)
 24 |     inter_area = inter_h * inter_w
 25 |     union_area1 = area1.reshape(-1, 1) + area2.reshape(1, -1)
 26 |     union_area2 = (union_area1 - inter_area)
 27 |     return inter_area / np.maximum(union_area2, 1)
 28 |     
 29 | def bbox_iof_overlaps(b1, b2):
 30 |     '''
 31 |     :argument
 32 |         b1,b2: [n, k], k>=4 with x1,y1,x2,y2,....
 33 |     :returns
 34 |         intersection-over-former-box pair-wise
 35 |     '''
 36 |     area1 = (b1[:, 2] - b1[:, 0]) * (b1[:, 3] - b1[:, 1])
 37 |     # area2 = (b2[:, 2] - b2[:, 0]) * (b2[:, 3] - b2[:, 1])
 38 |     inter_xmin = np.maximum(b1[:, 0].reshape(-1, 1), b2[:, 0].reshape(1, -1))
 39 |     inter_ymin = np.maximum(b1[:, 1].reshape(-1, 1), b2[:, 1].reshape(1, -1))
 40 |     inter_xmax = np.minimum(b1[:, 2].reshape(-1, 1), b2[:, 2].reshape(1, -1))
 41 |     inter_ymax = np.minimum(b1[:, 3].reshape(-1, 1), b2[:, 3].reshape(1, -1))
 42 |     inter_h = np.maximum(inter_xmax - inter_xmin, 0)
 43 |     inter_w = np.maximum(inter_ymax - inter_ymin, 0)
 44 |     inter_area = inter_h * inter_w
 45 |     return inter_area / np.maximum(area1[:,np.newaxis], 1)
 46 | 
 47 | def center_to_corner(boxes):
 48 |     '''
 49 |     :argument
 50 |         boxes: [N, 4] of center_x, center_y, w, h
 51 |     :returns
 52 |         boxes: [N, 4] of xmin, ymin, xmax, ymax
 53 |     '''
 54 |     xmin = boxes[:, 0] - boxes[:, 2] / 2.
 55 |     ymin = boxes[:, 1] - boxes[:, 3] / 2.
 56 |     xmax = boxes[:, 0] + boxes[:, 2] / 2.
 57 |     ymax = boxes[:, 1] + boxes[:, 3] / 2.
 58 |     return np.vstack([xmin, ymin, xmax, ymax]).transpose()
 59 | 
 60 | def corner_to_center(boxes):
 61 |     '''
 62 |         inverse of center_to_corner
 63 |     '''
 64 |     cx = (boxes[:, 0] + boxes[:, 2]) / 2.
 65 |     cy = (boxes[:, 1] + boxes[:, 3]) / 2.
 66 |     w = (boxes[:, 2] - boxes[:, 0])
 67 |     h = (boxes[:, 3] - boxes[:, 1])
 68 |     return np.vstack([cx, cy, w, h]).transpose()
 69 | 
 70 | def compute_loc_targets(raw_bboxes, gt_bboxes):
 71 |     '''
 72 |     :argument
 73 |         raw_bboxes, gt_bboxes:[N, k] first dim must be equal
 74 |     :returns
 75 |         loc_targets:[N, 4]
 76 |     '''
 77 |     bb = corner_to_center(raw_bboxes) # cx, cy, w, h
 78 |     gt = corner_to_center(gt_bboxes)
 79 |     assert (np.all(bb[:, 2] > 0))
 80 |     assert (np.all(bb[:, 3] > 0))
 81 |     trgt_ctr_x = (gt[:, 0] - bb[:, 0]) / bb[:, 2]
 82 |     trgt_ctr_y = (gt[:, 1] - bb[:, 1]) / bb[:, 3]
 83 |     trgt_w = np.log(gt[:, 2] / bb[:, 2])
 84 |     trgt_h = np.log(gt[:, 3] / bb[:, 3])
 85 |     return np.vstack([trgt_ctr_x, trgt_ctr_y, trgt_w, trgt_h]).transpose()
 86 | 
 87 | 
 88 | def compute_loc_bboxes(raw_bboxes, deltas):
 89 |     '''
 90 |     :argument
 91 |         raw_bboxes, delta:[N, k] first dim must be equal
 92 |     :returns
 93 |         bboxes:[N, 4]
 94 |     '''
 95 |     with warnings.catch_warnings(record=True) as w:
 96 |         warnings.simplefilter("always")
 97 |         bb = corner_to_center(raw_bboxes) # cx, cy, w, h
 98 |         dt_cx = deltas[:, 0] * bb[:, 2] + bb[:, 0]
 99 |         dt_cy = deltas[:, 1] * bb[:, 3] + bb[:, 1]
100 |         dt_w = np.exp(deltas[:, 2]) * bb[:, 2]
101 |         dt_h = np.exp(deltas[:, 3]) * bb[:, 3]
102 |         dt = np.vstack([dt_cx, dt_cy, dt_w, dt_h]).transpose()
103 |         return center_to_corner(dt)
104 | 
105 | def clip_bbox(bbox, img_size):
106 |     h, w = img_size[:2]
107 |     bbox[:, 0] = np.clip(bbox[:, 0], 0, w - 1)
108 |     bbox[:, 1] = np.clip(bbox[:, 1], 0, h - 1)
109 |     bbox[:, 2] = np.clip(bbox[:, 2], 0, w - 1)
110 |     bbox[:, 3] = np.clip(bbox[:, 3], 0, h - 1)
111 |     return bbox
112 | 
113 | def compute_recall(box_pred, box_gt):
114 |     n_gt = box_gt.shape[0]
115 |     if box_pred.size == 0 or n_gt == 0:
116 |         return 0, n_gt
117 |     ov = bbox_iou_overlaps(box_gt, box_pred)
118 |     max_ov = np.max(ov, axis=1)
119 |     idx = np.where(max_ov>0.5)[0]
120 |     n_rc = idx.size
121 |     return n_rc, n_gt
122 | 
123 | 


--------------------------------------------------------------------------------
/datasets/pycocotools/mask.py:
--------------------------------------------------------------------------------
  1 | __author__ = 'tsungyi'
  2 | 
  3 | #import pycocotools._mask as _mask
  4 | from . import _mask
  5 | 
  6 | # Interface for manipulating masks stored in RLE format.
  7 | #
  8 | # RLE is a simple yet efficient format for storing binary masks. RLE
  9 | # first divides a vector (or vectorized image) into a series of piecewise
 10 | # constant regions and then for each piece simply stores the length of
 11 | # that piece. For example, given M=[0 0 1 1 1 0 1] the RLE counts would
 12 | # be [2 3 1 1], or for M=[1 1 1 1 1 1 0] the counts would be [0 6 1]
 13 | # (note that the odd counts are always the numbers of zeros). Instead of
 14 | # storing the counts directly, additional compression is achieved with a
 15 | # variable bitrate representation based on a common scheme called LEB128.
 16 | #
 17 | # Compression is greatest given large piecewise constant regions.
 18 | # Specifically, the size of the RLE is proportional to the number of
 19 | # *boundaries* in M (or for an image the number of boundaries in the y
 20 | # direction). Assuming fairly simple shapes, the RLE representation is
 21 | # O(sqrt(n)) where n is number of pixels in the object. Hence space usage
 22 | # is substantially lower, especially for large simple objects (large n).
 23 | #
 24 | # Many common operations on masks can be computed directly using the RLE
 25 | # (without need for decoding). This includes computations such as area,
 26 | # union, intersection, etc. All of these operations are linear in the
 27 | # size of the RLE, in other words they are O(sqrt(n)) where n is the area
 28 | # of the object. Computing these operations on the original mask is O(n).
 29 | # Thus, using the RLE can result in substantial computational savings.
 30 | #
 31 | # The following API functions are defined:
 32 | #  encode         - Encode binary masks using RLE.
 33 | #  decode         - Decode binary masks encoded via RLE.
 34 | #  merge          - Compute union or intersection of encoded masks.
 35 | #  iou            - Compute intersection over union between masks.
 36 | #  area           - Compute area of encoded masks.
 37 | #  toBbox         - Get bounding boxes surrounding encoded masks.
 38 | #  frPyObjects    - Convert polygon, bbox, and uncompressed RLE to encoded RLE mask.
 39 | #
 40 | # Usage:
 41 | #  Rs     = encode( masks )
 42 | #  masks  = decode( Rs )
 43 | #  R      = merge( Rs, intersect=false )
 44 | #  o      = iou( dt, gt, iscrowd )
 45 | #  a      = area( Rs )
 46 | #  bbs    = toBbox( Rs )
 47 | #  Rs     = frPyObjects( [pyObjects], h, w )
 48 | #
 49 | # In the API the following formats are used:
 50 | #  Rs      - [dict] Run-length encoding of binary masks
 51 | #  R       - dict Run-length encoding of binary mask
 52 | #  masks   - [hxwxn] Binary mask(s) (must have type np.ndarray(dtype=uint8) in column-major order)
 53 | #  iscrowd - [nx1] list of np.ndarray. 1 indicates corresponding gt image has crowd region to ignore
 54 | #  bbs     - [nx4] Bounding box(es) stored as [x y w h]
 55 | #  poly    - Polygon stored as [[x1 y1 x2 y2...],[x1 y1 ...],...] (2D list)
 56 | #  dt,gt   - May be either bounding boxes or encoded masks
 57 | # Both poly and bbs are 0-indexed (bbox=[0 0 1 1] encloses first pixel).
 58 | #
 59 | # Finally, a note about the intersection over union (iou) computation.
 60 | # The standard iou of a ground truth (gt) and detected (dt) object is
 61 | #  iou(gt,dt) = area(intersect(gt,dt)) / area(union(gt,dt))
 62 | # For "crowd" regions, we use a modified criteria. If a gt object is
 63 | # marked as "iscrowd", we allow a dt to match any subregion of the gt.
 64 | # Choosing gt' in the crowd gt that best matches the dt can be done using
 65 | # gt'=intersect(dt,gt). Since by definition union(gt',dt)=dt, computing
 66 | #  iou(gt,dt,iscrowd) = iou(gt',dt) = area(intersect(gt,dt)) / area(dt)
 67 | # For crowd gt regions we use this modified criteria above for the iou.
 68 | #
 69 | # To compile run "python setup.py build_ext --inplace"
 70 | # Please do not contact us for help with compiling.
 71 | #
 72 | # Microsoft COCO Toolbox.      version 2.0
 73 | # Data, paper, and tutorials available at:  http://mscoco.org/
 74 | # Code written by Piotr Dollar and Tsung-Yi Lin, 2015.
 75 | # Licensed under the Simplified BSD License [see coco/license.txt]
 76 | 
 77 | iou         = _mask.iou
 78 | merge       = _mask.merge
 79 | frPyObjects = _mask.frPyObjects
 80 | 
 81 | def encode(bimask):
 82 |     if len(bimask.shape) == 3:
 83 |         return _mask.encode(bimask)
 84 |     elif len(bimask.shape) == 2:
 85 |         h, w = bimask.shape
 86 |         return _mask.encode(bimask.reshape((h, w, 1), order='F'))[0]
 87 | 
 88 | def decode(rleObjs):
 89 |     if type(rleObjs) == list:
 90 |         return _mask.decode(rleObjs)
 91 |     else:
 92 |         return _mask.decode([rleObjs])[:,:,0]
 93 | 
 94 | def area(rleObjs):
 95 |     if type(rleObjs) == list:
 96 |         return _mask.area(rleObjs)
 97 |     else:
 98 |         return _mask.area([rleObjs])[0]
 99 | 
100 | def toBbox(rleObjs):
101 |     if type(rleObjs) == list:
102 |         return _mask.toBbox(rleObjs)
103 |     else:
104 |         return _mask.toBbox([rleObjs])[0]
105 | 


--------------------------------------------------------------------------------
/extensions/_focal_loss/src/cuda/focal_loss_sigmoid_kernel.cu:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include <math.h>
  3 | #include <float.h>
  4 | #include "focal_loss_sigmoid_kernel.h"
  5 | 
  6 | #define DIVUP(m, n) ((m) / (m) + ((m) % (n) > 0))
  7 | 
  8 | #define CUDA_1D_KERNEL_LOOP(i, n)                            \
  9 |   for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; \
 10 |        i += blockDim.x * gridDim.x)
 11 | 
 12 | __global__ void SigmoidFocalLossKernel(
 13 |     const int N, const float* logits,
 14 |     const int* targets, const float weight_pos,
 15 |     const float gamma, const float alpha,
 16 |     const int num_classes, float* losses) {
 17 |   CUDA_1D_KERNEL_LOOP(i, N) {
 18 |       int d = i % num_classes;   //current class
 19 |       int tmp = i / num_classes; //targets index
 20 |       int t = targets[tmp];
 21 | 
 22 |     // check whether the class is true class or not.
 23 |     // The target classes are in range 1 - 81 and the d is in range 0-80
 24 |     // because we predict A*80 dim, so for comparison purpose, compare t and (d+1)
 25 |     float c1 = (t == (d + 1));
 26 |     float c2 = (t != -1 & t != (d + 1));
 27 | 
 28 |     float Np = max(weight_pos, 1.0);
 29 |     float zn = (1.0 - alpha) / Np;
 30 |     float zp = alpha / Np;
 31 | 
 32 |     // p = 1. / 1. + expf(-x)
 33 |     float p = 1. / (1. + expf(-logits[i]));
 34 | 
 35 |     // (1 - p)**gamma * log(p) where
 36 |     float term1 = powf((1. - p), gamma) * logf(max(p, FLT_MIN));
 37 |     // p**gamma * log(1 - p)
 38 |     float term2 =
 39 |         powf(p, gamma) *
 40 |         (-1. * logits[i] * (logits[i] >= 0) -
 41 |          logf(1. + expf(logits[i] - 2. * logits[i] * (logits[i] >= 0))));
 42 | 
 43 |     losses[i] = 0.0;
 44 |     losses[i] += -c1 * term1 * zp;
 45 |     losses[i] += -c2 * term2 * zn;
 46 |   }
 47 | }
 48 | 
 49 | __global__ void SigmoidFocalLossGradientKernel(
 50 |     const int N, const float* logits,
 51 |     const int* targets, float* dX_data, const float weight_pos,
 52 |     const float gamma, const float alpha, const int num_classes) {
 53 |   CUDA_1D_KERNEL_LOOP(i, N) {
 54 |       int d = i % num_classes;   //current class
 55 |       int tmp = i / num_classes; //targets index
 56 |       int t = targets[tmp];
 57 | 
 58 |       float Np = max(weight_pos, 1.0);
 59 |       float zn = (1.0 - alpha) / Np;
 60 |       float zp = alpha / Np;
 61 |       //int t = targets[n * (H * W * A) + a * (H * W) + y * W + x];
 62 | 
 63 |       float c1 = (t == (d + 1));
 64 |       float c2 = (t != -1 & t != (d + 1));
 65 |       float p = 1. / (1. + expf(-logits[i]));
 66 | 
 67 |       // (1-p)**g * (1 - p - g*p*log(p))
 68 |       float term1 =
 69 |           powf((1. - p), gamma) *
 70 |           (1. - p - (p * gamma * logf(max(p, FLT_MIN))));
 71 |       // (p**g) * (g*(1-p)*log(1-p) - p)
 72 |       float term2 =
 73 |           powf(p, gamma) *
 74 |           ((-1. * logits[i] * (logits[i] >= 0) -
 75 |            logf(1. + expf(logits[i] - 2. * logits[i] * (logits[i] >= 0)))) *
 76 |            (1. - p) * gamma - p);
 77 |       dX_data[i] = 0.0;
 78 |       dX_data[i] += -c1 * zp * term1;
 79 |       dX_data[i] += -c2 * zn * term2;
 80 |   }
 81 | }
 82 | 
 83 | int SigmoidFocalLossForwardLaucher(
 84 |     const int N, const float* logits,
 85 |     const int* targets, const float weight_pos,
 86 |     const float gamma, const float alpha,
 87 |     const int num_classes, float* losses, cudaStream_t stream){
 88 | 
 89 |     const int kThreadsPerBlock = 1024;
 90 |     int output_size = N;
 91 |     cudaError_t err;
 92 | 
 93 |     err = cudaGetLastError();
 94 |     if(cudaSuccess != err)
 95 |     {
 96 |         fprintf( stderr, "%s#%d: cudaCheckError() failed : %s\n", __FILE__,
 97 |                 __LINE__, cudaGetErrorString( err ) );
 98 |         exit( -1 );
 99 |     }
100 |     SigmoidFocalLossKernel<<<(output_size + kThreadsPerBlock - 1) / kThreadsPerBlock, kThreadsPerBlock, 0, stream>>>(
101 |       N, logits, targets, weight_pos, gamma, alpha, num_classes, losses);
102 |     err = cudaGetLastError();
103 |     if(cudaSuccess != err)
104 |     {
105 |         fprintf( stderr, "%s#%d: cudaCheckError() failed : %s\n", __FILE__,
106 |                 __LINE__, cudaGetErrorString( err ) );
107 |         exit( -1 );
108 |     }
109 | 
110 |     return 1;
111 | }
112 | 
113 | 
114 | int SigmoidFocalLossBackwardLaucher(
115 |     const int N, const float* logits, const int* targets,
116 |     float* dX_data, const float weight_pos,
117 |     const float gamma, const float alpha, const int num_classes,
118 |     cudaStream_t stream){
119 | 
120 |     const int kThreadsPerBlock = 1024;
121 |     int output_size = N;
122 |     cudaError_t err;
123 | 
124 |     SigmoidFocalLossGradientKernel<<<(output_size + kThreadsPerBlock - 1) / kThreadsPerBlock, kThreadsPerBlock, 0, stream>>>(
125 |         N, logits, targets, dX_data, weight_pos, gamma, alpha, num_classes);
126 |     err = cudaGetLastError();
127 |     if(cudaSuccess != err)
128 |     {
129 |         fprintf( stderr, "%s#%d: cudaCheckError() failed : %s\n", __FILE__,
130 |                 __LINE__, cudaGetErrorString( err ) );
131 |         exit( -1 );
132 |     }
133 | 
134 |     return 1;
135 | }
136 | 
137 | 
138 | 


--------------------------------------------------------------------------------
/functions/anchor_target.py:
--------------------------------------------------------------------------------
  1 | #encoding: utf-8
  2 | # from utils.debug_helper import debugger
  3 | from utils import bbox_helper
  4 | from utils import anchor_helper
  5 | import numpy as np
  6 | import torch
  7 | import logging
  8 | logger = logging.getLogger('global')
  9 | 
 10 | def to_np_array(x):
 11 |     if x is None:
 12 |         return None
 13 |     # if isinstance(x, Variable): x = x.data
 14 |     return x.cpu().data.numpy() if torch.is_tensor(x) else np.array(x)
 15 | 
 16 | def compute_anchor_targets(feature_size, cfg, ground_truth_bboxes, image_info, ignore_regions = None):
 17 |     r'''
 18 |     :argument
 19 |         cfg.keys(): {
 20 |             'anchor_ratios', anchor_scales, anchor_stride,
 21 |             negative_iou_thresh, ignore_iou_thresh,positive_iou_thresh,
 22 |             positive_percent, rpn_batch_size
 23 |         }
 24 |         feature_size: IntTensor, [4]. i.e. batch, num_anchors * 4, height, width
 25 |         ground_truth_bboxes: FloatTensor, [batch, max_num_gt_bboxes, 5]
 26 |         image_info: FloatTensor, [batch, 3]
 27 |         ignore_regions: FloatTensor, [batch, max_num_ignore_regions, 4]
 28 |     :returns
 29 |         cls_targets: Variable, [batch, num_anchors * 1, height, width]
 30 |         loc_targets, loc_masks: Variable, [batch, num_anchors * 4, height, width]
 31 |     '''
 32 |     cuda_device = ground_truth_bboxes.device
 33 |     ground_truth_bboxes, image_info, ignore_regions = \
 34 |         map(to_np_array, [ground_truth_bboxes, image_info, ignore_regions])
 35 | 
 36 |     batch_size, num_anchors_4, featmap_h, featmap_w = feature_size
 37 |     num_anchors = num_anchors_4 // 4
 38 |     assert(num_anchors * 4 == num_anchors_4)
 39 |     # [K*A, 4]
 40 |     anchors_overplane = anchor_helper.get_anchors_over_plane(
 41 |             featmap_h, featmap_w, cfg['anchor_ratios'], cfg['anchor_scales'],
 42 |             cfg['anchor_stride'])
 43 | 
 44 |     B = batch_size
 45 |     A = num_anchors
 46 |     K = featmap_h * featmap_w
 47 |     G = ground_truth_bboxes.shape[1]
 48 | 
 49 |     # compute overlaps between anchors and gt_bboxes within each batch
 50 |     # shape: [B, K*A, G]
 51 |     overlaps = np.stack([bbox_helper.bbox_iou_overlaps(anchors_overplane,
 52 |                                                        ground_truth_bboxes[ix]) for ix in range(B)], axis = 0)
 53 | 
 54 |     # shape of [B, K*A]
 55 |     argmax_overlaps = overlaps.argmax(axis = 2)
 56 |     max_overlaps = overlaps.max(axis = 2)
 57 | 
 58 |     # [B, G]
 59 |     gt_max_overlaps = overlaps.max(axis=1)
 60 |     # ignore thoese gt_max_overlap too small
 61 |     gt_max_overlaps[gt_max_overlaps < 0.1] = -1
 62 |     gt_argmax_b_ix, gt_argmax_ka_ix, gt_argmax_g_ix = \
 63 |         np.where(overlaps == gt_max_overlaps[:, np.newaxis, :])
 64 |     # match each anchor to the ground truth bbox
 65 |     argmax_overlaps[gt_argmax_b_ix, gt_argmax_ka_ix] = gt_argmax_g_ix
 66 |     
 67 |     labels = np.empty([B, K*A], dtype=np.int64)
 68 |     labels.fill(-1)
 69 |     labels[max_overlaps < cfg['negative_iou_thresh']] = 0
 70 | 
 71 |     # remove negatives located in ignore regions
 72 |     if ignore_regions is not None:
 73 |         iof_overlaps = np.stack([bbox_helper.bbox_iof_overlaps
 74 |                                      (anchors_overplane, ignore_regions[ix]) for ix in range(B)], axis=0)
 75 |         max_iof_overlaps = iof_overlaps.max(axis=2)  # [B, K*A]
 76 |         labels[max_iof_overlaps > cfg['ignore_iou_thresh']] = -1
 77 | 
 78 |     labels[gt_argmax_b_ix, gt_argmax_ka_ix] = 1
 79 |     labels[max_overlaps > cfg['positive_iou_thresh']] = 1
 80 |     
 81 |     # sampling
 82 |     num_pos_sampling = int(cfg['positive_percent'] * cfg['rpn_batch_size'] * batch_size)
 83 |     pos_b_ix, pos_ka_ix = np.where(labels > 0)
 84 |     num_positives = len(pos_b_ix)
 85 |     if num_positives > num_pos_sampling:
 86 |         remove_ix = np.random.choice(num_positives, size = num_positives - num_pos_sampling, replace = False)
 87 |         labels[pos_b_ix[remove_ix], pos_ka_ix[remove_ix]] = -1
 88 |         num_positives = num_pos_sampling
 89 |     num_neg_sampling = cfg['rpn_batch_size'] * batch_size - num_positives
 90 |     neg_b_ix, neg_ka_ix = np.where(labels == 0)
 91 |     num_negatives = len(neg_b_ix)
 92 |     if num_negatives > num_neg_sampling:
 93 |         remove_ix = np.random.choice(num_negatives, size = num_negatives - num_neg_sampling, replace = False)
 94 |         labels[neg_b_ix[remove_ix], neg_ka_ix[remove_ix]] = -1
 95 |    
 96 |     pos_b_ix, pos_ka_ix = np.where(labels > 0)
 97 |     pos_anchors = anchors_overplane[pos_ka_ix, :]
 98 | 
 99 |     pos_target_ix = argmax_overlaps[pos_b_ix, pos_ka_ix]
100 |     pos_target_gt = ground_truth_bboxes[pos_b_ix, pos_target_ix]
101 |     pos_loc_targets = bbox_helper.compute_loc_targets(pos_anchors, pos_target_gt)
102 | 
103 |     loc_targets = np.zeros([B, K*A, 4], dtype = np.float32)
104 |     loc_targets[pos_b_ix, pos_ka_ix, :] = pos_loc_targets
105 |     # loc_weights = np.zeros([B, K*A, 4])
106 |     loc_masks = np.zeros([B, K*A, 4], dtype = np.float32)
107 |     loc_masks[pos_b_ix, pos_ka_ix, :] = 1.
108 | 
109 |     # transpose to match the predicted convolution shape
110 | 
111 |     cls_targets = torch.from_numpy(labels).long().view(B, featmap_h, featmap_w, A).permute(0, 3, 1, 2).cuda().contiguous()
112 |     loc_targets = torch.from_numpy(loc_targets).float().view(B, featmap_h, featmap_w, A * 4).permute(0, 3, 1, 2).cuda().contiguous()
113 |     loc_masks = torch.from_numpy(loc_masks).float().view(B, featmap_h, featmap_w, A * 4).permute(0, 3, 1, 2).cuda().contiguous()
114 |     loc_nomalizer = max(1,len(np.where(labels >= 0)[0]))
115 |     logger.debug('positive anchors:%d' % len(pos_b_ix))
116 |     return cls_targets, loc_targets, loc_masks, loc_nomalizer
117 | 


--------------------------------------------------------------------------------
/extensions/_focal_loss/src/cuda/focal_loss_softmax_kernel.cu:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include <math.h>
  3 | #include <float.h>
  4 | #include "focal_loss_softmax_kernel.h"
  5 | 
  6 | #define DIVUP(m, n) ((m) / (m) + ((m) % (n) > 0))
  7 | 
  8 | #define CUDA_1D_KERNEL_LOOP(i, n)                            \
  9 |   for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; \
 10 |        i += blockDim.x * gridDim.x)
 11 | 
 12 | __global__ void SpatialSoftmaxKernel(const int N, const float* Xdata, float* Pdata,
 13 |     const int num_classes) {
 14 |   CUDA_1D_KERNEL_LOOP(index, N / num_classes) {
 15 |     int base = index * num_classes; //base index
 16 | 
 17 |     // Subtract max on each cell for numerical reasons
 18 |     float max_val = -FLT_MAX;
 19 |     for(int c = 0; c < num_classes; ++c) {
 20 |       max_val = max(max_val, Xdata[base + c]);
 21 |     }
 22 |     // Exponentiate
 23 |     float expsum = 0.0f;
 24 |     for(int c = 0; c < num_classes; ++c) {
 25 |       float expx = expf(Xdata[base + c] - max_val);
 26 |       Pdata[base + c] = expx;
 27 |       expsum += expx;
 28 |     }
 29 |     // Normalize
 30 |     for(int c = 0; c < num_classes; ++c) {
 31 |       Pdata[base + c] /= expsum;
 32 |     }
 33 |   }
 34 | }
 35 | 
 36 | __global__ void SoftmaxFocalLossKernel(
 37 |     const int N, 
 38 |     const float* Pdata, const int* targets, float* losses,
 39 |     const float weight_pos, const float gamma, const float alpha,
 40 |     const int num_classes) {
 41 |   CUDA_1D_KERNEL_LOOP(i, N / num_classes) {
 42 | 
 43 |     int base = i * num_classes;
 44 |     const int label = static_cast<int>(targets[i]);
 45 | 
 46 |     float Np = max(weight_pos, 1.0);
 47 |     float z = (label == 0) * (1 - alpha) / Np +
 48 |               (label >= 1) * alpha / Np;
 49 | 
 50 |     losses[i] = 0.0;
 51 |     if (label >= 0) {
 52 |       losses[i] =
 53 |           -(powf(1.0 - Pdata[base + label], gamma) *
 54 |           log(max(Pdata[base + label], FLT_MIN))) * z;
 55 |     }
 56 |   }
 57 | }
 58 | 
 59 | __global__ void SoftmaxFocalLossGradientWeightKernel(
 60 |     const int N,
 61 |     const float* Pdata, const int* targets, float* buff,
 62 |     const float weight_pos, const float gamma, const float alpha,
 63 |     const int num_classes) {
 64 |   CUDA_1D_KERNEL_LOOP(i, N / num_classes) {
 65 | 
 66 |     int base = i * num_classes;
 67 |     const int label = static_cast<int>(targets[i]);
 68 |     float Np = max(weight_pos, 1.0);
 69 |     float z =  (label == 0) * (1 - alpha) / Np +
 70 |                (label >= 1) * alpha / Np;
 71 | 
 72 |     buff[i] = 0.0;
 73 |     if (label >= 0) {
 74 |       float onemp = 1. - Pdata[base + label];
 75 |       float p = Pdata[base + label];
 76 |       buff[i] =
 77 |           (-powf(onemp, gamma) +
 78 |           gamma * powf(onemp, gamma - 1) * p * log(max(p, FLT_MIN))) * z;
 79 |     }
 80 |   }
 81 | }
 82 | 
 83 | 
 84 | __global__ void SoftmaxFocalLossGradientKernel(
 85 |     const int N,
 86 |     const float* Pdata, const int* targets, const float* buff,
 87 |     float* dX, const int num_classes) {
 88 |   CUDA_1D_KERNEL_LOOP(i, N) {
 89 | 
 90 |     int ind = i / num_classes;
 91 |     int cls = i % num_classes;
 92 | 
 93 |     const int label = static_cast<int>(targets[ind]);
 94 | 
 95 |     float c1 = (label >= 0) * 1.0;
 96 |     float c2 = (label == cls) * 1.0;
 97 |     dX[i] = 0.0;
 98 |     dX[i] = c1 * buff[ind] * (c2 - Pdata[i]);
 99 |   }
100 | }
101 | 
102 | int SoftmaxFocalLossForwardLaucher(
103 |     const int N, const float* logits,
104 |     const int* targets, const float weight_pos,
105 |     const float gamma, const float alpha,
106 |     const int num_classes, float* losses,
107 |     float* priors, cudaStream_t stream){
108 | 
109 |     const int kThreadsPerBlock = 1024;
110 |     int output_size = N;
111 |     cudaError_t err;
112 | 
113 |     err = cudaGetLastError();
114 |     if(cudaSuccess != err)
115 |     {
116 |         fprintf( stderr, "%s#%d: cudaCheckError() failed : %s\n", __FILE__,
117 |                 __LINE__, cudaGetErrorString( err ) );
118 |         exit( -1 );
119 |     }
120 |     SpatialSoftmaxKernel<<<(output_size / num_classes + kThreadsPerBlock - 1) / kThreadsPerBlock, kThreadsPerBlock, 0, stream>>>(
121 |       N, logits, priors, num_classes);
122 | 
123 |     SoftmaxFocalLossKernel<<<(output_size / num_classes + kThreadsPerBlock - 1) / kThreadsPerBlock, kThreadsPerBlock, 0, stream>>>(
124 |       N, priors, targets, losses, weight_pos, gamma, alpha, num_classes);
125 | 
126 | 
127 |     err = cudaGetLastError();
128 |     if(cudaSuccess != err)
129 |     {
130 |         fprintf( stderr, "%s#%d: cudaCheckError() failed : %s\n", __FILE__,
131 |                 __LINE__, cudaGetErrorString( err ) );
132 |         exit( -1 );
133 |     }
134 | 
135 |     return 1;
136 | }
137 | 
138 | 
139 | int SoftmaxFocalLossBackwardLaucher(
140 |     const int N, const float* logits, const int* targets,
141 |     float* dX_data, const float weight_pos,
142 |     const float gamma, const float alpha, const int num_classes, 
143 |     const float* priors, float* buff, cudaStream_t stream){
144 | 
145 |     const int kThreadsPerBlock = 1024;
146 |     int output_size = N;
147 |     cudaError_t err;
148 | 
149 |     SoftmaxFocalLossGradientWeightKernel<<<(output_size / num_classes + kThreadsPerBlock - 1) / kThreadsPerBlock, kThreadsPerBlock, 0, stream>>>(
150 |         N, priors, targets, buff, weight_pos, gamma, alpha, num_classes);
151 | 
152 |     SoftmaxFocalLossGradientKernel<<<(output_size + kThreadsPerBlock - 1) / kThreadsPerBlock, kThreadsPerBlock, 0, stream>>>(
153 |         N, priors, targets, buff, dX_data, num_classes);
154 | 
155 |     err = cudaGetLastError();
156 |     if(cudaSuccess != err)
157 |     {
158 |         fprintf( stderr, "%s#%d: cudaCheckError() failed : %s\n", __FILE__,
159 |                 __LINE__, cudaGetErrorString( err ) );
160 |         exit( -1 );
161 |     }
162 | 
163 |     return 1;
164 | }
165 | 
166 | 
167 | 


--------------------------------------------------------------------------------
/datasets/coco_loader.py:
--------------------------------------------------------------------------------
  1 | #encoding: utf-8
  2 | 
  3 | import torch
  4 | import torch.utils.data
  5 | import torch.nn.functional as F
  6 | from torch.autograd import Variable
  7 | import numpy as np
  8 | import logging
  9 | #logger = logging.getLogger('global')
 10 | 
 11 | def to_np_array(x):
 12 |     if x is None:
 13 |         return None
 14 |     if isinstance(x, Variable): x = x.data
 15 |     return x.cpu().numpy() if torch.is_tensor(x) else np.array(x)
 16 | 
 17 | class COCODataLoader(torch.utils.data.DataLoader):
 18 |     def __init__(self, dataset, batch_size=1, shuffle=False, sampler=None, batch_sampler=None,
 19 |                  num_workers=0, pin_memory=False, drop_last=False):
 20 |         super(COCODataLoader, self).__init__(dataset, batch_size, shuffle, sampler, batch_sampler,
 21 |                                         num_workers, self._collate_fn, pin_memory, drop_last)
 22 |     def _collate_fn(self, batch):
 23 |         '''
 24 |         Return: a mini-batch of data:
 25 |             image_data: FloatTensor of image, with shape of [b, 3, max_h, max_w]
 26 |             image_info: np.array of shape [b, 5], (resized_image_h, resized_image_w, resize_scale, origin_image_h, origin_image_w)
 27 |             bboxes: np.array of shape [b, max_num_gts, 5]
 28 |             keypoints: np.array of shape[b, max_num_gts, k, 2]
 29 |             masks: np.array of shape [b, max_num_gts, max_h, max_w]
 30 |             filename: list of str
 31 |         '''
 32 |         batch_size = len(batch)
 33 | 
 34 |         zip_batch = list(zip(*batch))
 35 |         images = zip_batch[0]
 36 |         unpad_image_sizes = zip_batch[1]
 37 |         ground_truth_bboxes = zip_batch[2]
 38 |         ignore_regions = zip_batch[3]
 39 |         ground_truth_keypoints = zip_batch[4]
 40 |         ground_truth_masks = zip_batch[5]
 41 |         filenames = zip_batch[6]
 42 |         has_keyp = ground_truth_keypoints[0] is not None
 43 |         has_mask = ground_truth_masks[0] is not None
 44 | 
 45 | 
 46 |         max_img_h = max([_.shape[-2] for _ in images])
 47 |         max_img_w = max([_.shape[-1] for _ in images])
 48 | 
 49 |         max_img_h = int(np.ceil(max_img_h / 128.0) * 128)
 50 |         max_img_w = int(np.ceil(max_img_w / 128.0) * 128)
 51 | 
 52 |         max_num_gt_bboxes = max([_.shape[0] for _ in ground_truth_bboxes])
 53 |         max_num_ig_bboxes = max([_.shape[0] for _ in ignore_regions])
 54 |         assert(max_num_gt_bboxes > 0)
 55 |         assert(max_num_ig_bboxes > 0)
 56 | 
 57 |         padded_images = []
 58 |         padded_gt_bboxes = []
 59 |         padded_ig_bboxes = []
 60 |         padded_gt_keypoints = [] if has_keyp else None
 61 |         padded_gt_masks = [] if has_mask else None
 62 |         for b_ix in range(batch_size):
 63 |             img = images[b_ix]
 64 | 
 65 |             # pad zeros to right bottom of each image
 66 |             pad_size = (0, max_img_w - img.shape[-1], 0, max_img_h - img.shape[-2])
 67 |             padded_images.append(F.pad(img, pad_size, 'constant', 0).data.cpu())
 68 | 
 69 |             # pad zeros to gt_bboxes
 70 |             gt_bboxes = to_np_array(ground_truth_bboxes[b_ix])
 71 |             new_gt_bboxes = np.zeros([max_num_gt_bboxes, gt_bboxes.shape[-1]])
 72 |             new_gt_bboxes[range(gt_bboxes.shape[0]), :] = gt_bboxes
 73 |             padded_gt_bboxes.append(new_gt_bboxes)
 74 | 
 75 |             # pad zeros to ig_bboxes
 76 |             ig_bboxes = to_np_array(ignore_regions[b_ix])
 77 |             new_ig_bboxes = np.zeros([max_num_ig_bboxes, ig_bboxes.shape[-1]])
 78 |             new_ig_bboxes[range(ig_bboxes.shape[0]), :] = ig_bboxes
 79 |             padded_ig_bboxes.append(new_ig_bboxes)
 80 | 
 81 |             # pad zero to keypoints
 82 |             if has_keyp:
 83 |                 keypoints = to_np_array(ground_truth_keypoints[b_ix])
 84 |                 shape = keypoints.shape
 85 |                 new_keypoints = np.zeros([max_num_gt_bboxes, shape[1], shape[2]])
 86 |                 new_keypoints[range(keypoints.shape[0]), ...] = keypoints
 87 |                 padded_gt_keypoints.append(new_keypoints)
 88 | 
 89 |             # pad zeros to masks
 90 |             if has_mask:
 91 |                 # [n, img_h, img_w] -> [n, max_img_h, max_img_w]
 92 |                 masks = torch.from_numpy(ground_truth_masks[b_ix])
 93 |                 masks = F.pad(Variable(masks), pad_size, 'constant', 0).data.cpu()
 94 |                 # [n, max_img_h, max_img_w] -> [max_num_gt_bboxes, max_img_h, max_img_w]
 95 |                 if masks.shape[0] < max_num_gt_bboxes:
 96 |                     pad_masks = masks.new(max_num_gt_bboxes - masks.shape[0], max_img_h, max_img_w).zero_()
 97 |                     masks = torch.cat([masks, pad_masks], dim=0)
 98 |                 padded_gt_masks.append(masks.numpy())
 99 | 
100 |         padded_images = torch.cat(padded_images, dim = 0)
101 |         unpad_image_sizes = np.stack(unpad_image_sizes, axis = 0)
102 |         stack_fn = lambda x : np.stack(x, axis=0) if x else np.array([])
103 |         padded_gt_bboxes = stack_fn(padded_gt_bboxes)
104 |         padded_ig_bboxes = stack_fn(padded_ig_bboxes)
105 |         padded_gt_keypoints = stack_fn(padded_gt_keypoints)
106 |         padded_gt_masks = stack_fn(padded_gt_masks)
107 | 
108 |         #logger.debug('image.shape:{}'.format(padded_images.shape))
109 |         #logger.debug('gt_box.shape:{}'.format(padded_gt_bboxes.shape))
110 |         #logger.debug('image_info.shape:{}'.format(unpad_image_sizes.shape))
111 |         #logger.debug('gt_kpts.shape:{}'.format(padded_gt_keypoints.shape))
112 |         #logger.debug('gt_mask.shape:{}'.format(padded_gt_masks.shape))
113 |         return [padded_images,
114 |                 unpad_image_sizes,
115 |                 padded_gt_bboxes,
116 |                 padded_ig_bboxes,
117 |                 padded_gt_keypoints,
118 |                 padded_gt_masks,
119 |                 filenames]
120 | 
121 | 
122 | def validate(anno_file):
123 |     from pycocotools.coco import COCO
124 |     coco = COCO(anno_file)
125 |     image_a = set()
126 |     image_b = set()
127 |     for anno in coco.anns.values():
128 |         image_a.add(anno['image_id'])
129 |         if anno['num_keypoints'] > 0:
130 |             image_b.add(anno['image_id'])
131 |     print('total images of person :{}\n'.format(len(image_a)))
132 |     print('images with annotated keypoints:{}\n'.format(len(image_b)))
133 | 
134 | 


--------------------------------------------------------------------------------
/extensions/_focal_loss/focal_loss.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torch.autograd import Function
  3 | from ._ext import focal_loss
  4 | import time
  5 | import logging
  6 | 
  7 | class SigmoidFocalLossFunction(Function):
  8 |     def __init__(self, gamma, alpha, num_classes):
  9 |         self.gamma = gamma
 10 |         self.alpha = alpha
 11 |         self.num_classes = num_classes
 12 |         
 13 |         self.weight_pos = None
 14 |         self.preds = None
 15 |         self.targets = None
 16 | 
 17 |     def forward(self, preds, targets, weight_pos):
 18 |         # preds shape: [Batch * h * w * num_anchors, num_classes]
 19 |         # targets shape: [Batch * h * w * num_anchors]
 20 |         preds_size  = preds.size()
 21 |         targets_size = targets.size()
 22 | 
 23 |         assert(preds_size[0] == targets_size[0])
 24 |         assert(preds_size[1] == self.num_classes)
 25 | 
 26 |         losses = preds.new(preds_size[0], preds_size[1]).zero_()
 27 |         weight_pos = float(weight_pos[0])
 28 |         N = preds_size[0] * preds_size[1]
 29 | 
 30 |         assert(losses.is_contiguous())
 31 |         assert(preds.is_contiguous())
 32 |         assert(targets.is_contiguous())
 33 | 
 34 |         assert(preds.is_cuda and targets.is_cuda)
 35 |         focal_loss.focal_loss_sigmoid_forward_cuda(N,
 36 |                                                    preds,
 37 |                                                    targets,
 38 |                                                    weight_pos,
 39 |                                                    self.gamma,
 40 |                                                    self.alpha,
 41 |                                                    self.num_classes,
 42 |                                                    losses)
 43 |         self.preds = preds
 44 |         self.targets = targets
 45 |         self.weight_pos = weight_pos
 46 |         return torch.cuda.FloatTensor([losses.sum()])
 47 | 
 48 |     def backward(self, grad_output):
 49 |         # grad_output: 1.0 / num_of_gpus
 50 |         preds_size = self.preds.size()
 51 |         grad_input = self.preds.new(preds_size[0], preds_size[1]).zero_()
 52 |         N = preds_size[0] * preds_size[1]
 53 | 
 54 |         assert(self.preds.is_contiguous())
 55 |         assert(self.targets.is_contiguous())
 56 |         assert(grad_input.is_contiguous())
 57 | 
 58 |         assert(self.preds.is_cuda and self.targets.is_cuda and grad_input.is_cuda)
 59 |         focal_loss.focal_loss_sigmoid_backward_cuda(N,
 60 |                                               self.preds,
 61 |                                               self.targets,
 62 |                                               grad_input,
 63 |                                               self.weight_pos,
 64 |                                               self.gamma,
 65 |                                               self.alpha,
 66 |                                               self.num_classes)
 67 |         grad_input = grad_input * grad_output
 68 |         return grad_input, None, None
 69 | 
 70 | class SoftmaxFocalLossFunction(Function):
 71 |     def __init__(self, gamma, alpha, num_classes):
 72 |         self.gamma = gamma
 73 |         self.alpha = alpha
 74 |         self.num_classes = num_classes
 75 |         
 76 |         self.weight_pos = None
 77 |         self.preds = None
 78 |         self.targets = None
 79 | 
 80 |     def forward(self, preds, targets, weight_pos):
 81 |         # preds shape: [Batch * h * w * num_anchors, num_classes]
 82 |         # targets shape: [Batch * h * w * num_anchors]
 83 |         preds_size  = preds.size()
 84 |         targets_size = targets.size()
 85 | 
 86 |         assert(preds_size[0] == targets_size[0])
 87 |         assert(preds_size[1] == self.num_classes)
 88 | 
 89 |         losses = preds.new(preds_size[0]).zero_()
 90 |         priors = preds.new(preds_size[0], preds_size[1]).zero_()
 91 | 
 92 |         weight_pos = float(weight_pos[0])
 93 |         N = preds_size[0] * preds_size[1]
 94 | 
 95 | 
 96 |         assert(losses.is_contiguous())
 97 |         assert(preds.is_contiguous())
 98 |         assert(targets.is_contiguous())
 99 |         assert(priors.is_contiguous())
100 | 
101 |         assert(preds.is_cuda and targets.is_cuda)
102 |         focal_loss.focal_loss_softmax_forward_cuda(N,
103 |                                                    preds,
104 |                                                    targets,
105 |                                                    weight_pos,
106 |                                                    self.gamma,
107 |                                                    self.alpha,
108 |                                                    self.num_classes,
109 |                                                    losses,
110 |                                                    priors)
111 | 
112 |         self.preds = preds
113 |         self.targets = targets
114 |         self.weight_pos = weight_pos
115 |         self.priors = priors
116 |         return torch.cuda.FloatTensor([losses.sum()])
117 | 
118 |     def backward(self, grad_output):
119 |         # grad_output: 1.0 / num_of_gpus
120 |         preds_size = self.preds.size()
121 |         grad_input = self.preds.new(preds_size[0], preds_size[1]).zero_()
122 |         buff = self.preds.new(preds_size[0]).zero_()
123 |         N = preds_size[0] * preds_size[1]
124 | 
125 |         assert(self.preds.is_contiguous())
126 |         assert(self.targets.is_contiguous())
127 |         assert(grad_input.is_contiguous())
128 |         assert(buff.is_contiguous())
129 | 
130 |         assert(self.preds.is_cuda and self.targets.is_cuda and grad_input.is_cuda and buff.is_cuda)
131 |         focal_loss.focal_loss_softmax_backward_cuda(N,
132 |                                               self.preds,
133 |                                               self.targets,
134 |                                               grad_input,
135 |                                               self.weight_pos,
136 |                                               self.gamma,
137 |                                               self.alpha,
138 |                                               self.num_classes,
139 |                                               self.priors,
140 |                                               buff)
141 |         grad_input = grad_input * grad_output
142 |         return grad_input, None, None
143 | 


--------------------------------------------------------------------------------
/datasets/example_dataset.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division
  2 | import torch
  3 | from torch.utils.data import DataLoader, Dataset
  4 | import torchvision.transforms as transforms
  5 | import numpy as np
  6 | from io import StringIO
  7 | from PIL import Image
  8 | import pickle as pk
  9 | import os
 10 | import logging
 11 | 
 12 | def pil_loader(img_str):
 13 |     #buff = StringIO.StringIO()
 14 |     buff = StringIO()
 15 |     buff.write(img_str)
 16 |     buff.seek(0)
 17 |     with Image.open(buff) as img:
 18 |         return img.convert('RGB')
 19 |  
 20 | class ExampleDataset(Dataset):
 21 |     def __init__(self, root_dir, list_file, transform_fn, normalize_fn=None, memcached=False):
 22 |         #self.logger = logging.getLogger('global')
 23 |         self.root_dir = root_dir
 24 |         self.transform_fn = transform_fn
 25 |         self.normalize_fn = normalize_fn
 26 |         # self.memcached = memcached
 27 | 
 28 |         #self.logger.info("building dataset from %s" % list_file)
 29 |         save_name = 'meta_%s'%(list_file.split('.')[0].strip('/').replace('/', '_'))
 30 |         ## load annotations if exist
 31 |         if os.path.exists(save_name):
 32 |             with open(save_name, 'rb') as f:
 33 |                 self.metas = pk.load(f)
 34 |                 self.num = len(self.metas)
 35 |                 # aspect ratio of images for sampler sort
 36 |                 self.aspect_ratios = [float(m[1])/m[2] for m in self.metas]
 37 |             return
 38 |         ## otherwise parse annotations
 39 |         with open(list_file) as f:
 40 |             lines = f.readlines()
 41 |         self.metas = []
 42 |         count = 0
 43 |         i = 0
 44 |         while i < len(lines):
 45 |             img_ig = []
 46 |             img_gt = []
 47 |             labels = []
 48 |             img_name = lines[i + 1].rstrip()
 49 |             img_height = float(lines[i + 3])
 50 |             img_width = float(lines[i + 4])
 51 |             img_ig_size = int (lines[i + 6])
 52 |             i += 7
 53 |             for j in range(img_ig_size):
 54 |                 sp = lines[i + j].split()
 55 |                 img_ig.append([float(sp[0]), float(sp[1]), float(sp[2]), float(sp[3])])
 56 |             if len(img_ig) == 0:
 57 |                 img_ig.append([0,0,0,0])
 58 |             i += img_ig_size
 59 |             img_gt_size = int(lines[i])
 60 |             i += 1
 61 |             for j in range(img_gt_size):
 62 |                 sp = lines[i + j].split()
 63 |                 img_gt.append([float(sp[1]),float(sp[2]),float(sp[3]),float(sp[4])])
 64 |                 labels.append(int(sp[0]))
 65 |             i += img_gt_size
 66 |             count += 1
 67 |             #if count % 100 == 0:
 68 |             #    self.logger.info(count)
 69 |             self.metas.append([img_name, img_height, img_width, np.array(img_gt), np.array(labels), np.array(img_ig)])
 70 |         with open(save_name, 'wb') as f:
 71 |             pk.dump(self.metas, f)
 72 |         #self.logger.info("read meta done")
 73 |         self.num = len(self.metas)
 74 |         # aspect ratio of images for sampler sort
 75 |         self.aspect_ratios = [float(m[1])/m[2] for m in self.metas]
 76 |  
 77 |     def __len__(self):
 78 |         return self.num
 79 |  
 80 |     def __getitem__(self, idx):
 81 |         filename = os.path.join(self.root_dir, self.metas[idx][0])
 82 |         h, w, bbox, labels, ignores = self.metas[idx][1:]
 83 |         bbox = bbox.astype(np.float32)
 84 |         ignores = ignores.astype(np.float32)
 85 |         labels = labels.astype(np.float32)
 86 |         img = Image.open(filename)
 87 |         if img.mode == 'L':
 88 |             img = img.convert('RGB')
 89 |         assert(img.size[0]==w and img.size[1]==h)
 90 |         ## det transform
 91 |         img, bbox, resize_scale, ignores = self.transform_fn(img, bbox, ignores)
 92 |         new_w, new_h = img.size
 93 |         ## to tensor
 94 |         to_tensor = transforms.ToTensor()
 95 |         img = to_tensor(img)
 96 |         if self.normalize_fn != None:
 97 |             img = self.normalize_fn(img)
 98 |         bbox = np.hstack([bbox, labels[:, np.newaxis]])
 99 |         return [img.unsqueeze(0),
100 |                 torch.Tensor([new_h, new_w, resize_scale]),
101 |                 torch.from_numpy(bbox),
102 |                 torch.from_numpy(ignores),
103 |                 filename]
104 | 
105 | 
106 | class ExampleTransform(object):
107 |     def __init__(self, sizes, max_size, flip=False):
108 |         if not isinstance(sizes, list):
109 |             sizes = [sizes]
110 |         self.scale_min = min(sizes)
111 |         self.scale_max = max(sizes)
112 |         self.max_size = max_size
113 |         self.flip = flip
114 | 
115 |     def __call__(self, img, bbox, ignores):
116 | 
117 |         w, h = img.size
118 |         short = min(w, h)
119 |         large = max(w, h)
120 | 
121 |         size = np.random.randint(self.scale_min, self.scale_max + 1)
122 |         scale = min(size / short, self.max_size / large)
123 |         new_w, new_h = int(w * scale), int(h * scale)
124 | 
125 |         new_img = img.resize((new_w, new_h))
126 | 
127 |         new_bbox = np.array(bbox)
128 |         new_bbox[:, 0] = np.floor(new_bbox[:, 0] * scale)
129 |         new_bbox[:, 1] = np.floor(new_bbox[:, 1] * scale)
130 |         new_bbox[:, 2] = np.ceil(new_bbox[:, 2] * scale)
131 |         new_bbox[:, 3] = np.ceil(new_bbox[:, 3] * scale)
132 |         new_ignores = np.array(ignores)
133 |         if new_ignores.shape[0] > 0:
134 |             new_ignores[:, 0] = np.floor(new_ignores[:, 0] * scale)
135 |             new_ignores[:, 1] = np.floor(new_ignores[:, 1] * scale)
136 |             new_ignores[:, 2] = np.ceil(new_ignores[:, 2] * scale)
137 |             new_ignores[:, 3] = np.ceil(new_ignores[:, 3] * scale)
138 | 
139 |         if self.flip:
140 |             if np.random.random() < 0.5:
141 |                 new_img = new_img.transpose(Image.FLIP_LEFT_RIGHT)
142 |                 new_bbox[:, 0], new_bbox[:, 2] = new_w - new_bbox[:, 2], new_w - new_bbox[:, 0]
143 |                 if new_ignores.shape[0] > 0:
144 |                     new_ignores[:, 0], new_ignores[:,2] = new_w - new_ignores[:, 2], new_w - new_ignores[:, 0]
145 |         return new_img, new_bbox, scale, new_ignores
146 | 


--------------------------------------------------------------------------------
/utils/cal_mAP.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | #-------------------------------------------
  3 | # cal mAP | base on pytorch example dataset
  4 | # for cityscapes specifically
  5 | # pang jiangmiao | 2018.04.15
  6 | #-------------------------------------------
  7 | # import sys
  8 | import numpy as np
  9 | from collections import defaultdict
 10 | import subprocess
 11 | 
 12 | # import pprint
 13 | # import pdb
 14 | import logging
 15 | logger = logging.getLogger('global')
 16 | def parse_gts(gts_list, num_classes):
 17 |     '''parse detection ground truths list
 18 |         dict[img_name] = {height:, width:, bbox_num:, bbox:{cls:[[x1,y1,x2,y2],...], ...} }
 19 |     '''
 20 |     logger.info('Start parsing gts list......')
 21 |     index_info = [temp for temp in enumerate(gts_list) if temp[1].startswith('#')]
 22 |     gts = defaultdict(list)
 23 |     gts['num'] = np.zeros(num_classes)
 24 |     for i in range(len(index_info)):
 25 |         index = index_info[i][0]
 26 |         img_name = gts_list[index + 1].strip()      # val/folder/img_name.png
 27 |         pure_name = img_name.split('/')[-1][0:-4]  # img_name
 28 |         gts[pure_name] = defaultdict(list)
 29 |         gts[pure_name]['height'] = gts_list[index+3].strip()
 30 |         gts[pure_name]['width'] = gts_list[index+4].strip()
 31 |         gts[pure_name]['bbox_num'] = int(gts_list[index+7])
 32 |         gts[pure_name]['bbox'] = defaultdict(list)
 33 |         for b in gts_list[index+8:index+8+int(gts_list[index+7])]:
 34 |             b = b.split()
 35 |             label = int(b[0])
 36 |             x1 = int(b[1])
 37 |             y1 = int(b[2])
 38 |             x2 = int(b[3])
 39 |             y2 = int(b[4])
 40 |             gts[pure_name]['bbox'][label].append([x1, y1, x2, y2])
 41 |             gts['num'][label] += 1
 42 |         gts[pure_name]['is_det'] = defaultdict(list)
 43 |         for l in range(1, num_classes):
 44 |             gts[pure_name]['is_det'][l] = np.zeros(len(gts[pure_name]['bbox'][l]))
 45 |     logger.info('Done!')
 46 |     return gts
 47 | 
 48 | def parse_res(res_list):
 49 |     '''parse results list
 50 |         dict[cls] = [[x1, y1, x2, y2, score, img_name], ...]
 51 |     '''
 52 |     logger.info('Start parsing results list......')
 53 |     results = defaultdict(list)
 54 |     for r in res_list:
 55 |         r = r.split()
 56 |         img_name = r[0]  # img_name no extension
 57 |         label = int(r[6])
 58 |         score = float(r[5])
 59 |         x1 = int(float(r[1]))
 60 |         y1 = int(float(r[2]))
 61 |         x2 = int(float(r[3]))
 62 |         y2 = int(float(r[4]))
 63 |         results[label].append([x1, y1, x2, y2, score, img_name])
 64 |     logger.info('Done!')
 65 |     return results
 66 | 
 67 | def calIoU(result, gt_i):
 68 |     # result: [x1, y1, x2, y2, score, img_name]
 69 |     # gts: [[x1, x2, y1, y2], []...]
 70 |     x1 = result[0]
 71 |     y1 = result[1]
 72 |     x2 = result[2]
 73 |     y2 = result[3]
 74 |     overmax = -1
 75 |     is_which = -1
 76 |     for k, gt in enumerate(gt_i):
 77 |         gt_x1 = gt[0]
 78 |         gt_y1 = gt[1]
 79 |         gt_x2 = gt[2]
 80 |         gt_y2 = gt[3]
 81 |         inter_x1 = max(x1, gt_x1)
 82 |         inter_y1 = max(y1, gt_y1)
 83 |         inter_x2 = min(x2, gt_x2)
 84 |         inter_y2 = min(y2, gt_y2)
 85 |         if inter_x1 < inter_x2 and inter_y1 < inter_y2:
 86 |             area_inter = (inter_x2 - inter_x1 + 1) * (inter_y2 - inter_y1 + 1)
 87 |             area_sum1 = (x2 - x1 + 1) * (y2 - y1 + 1)
 88 |             area_sum2 = (gt_x2 - gt_x1 + 1) * (gt_y2 - gt_y1 + 1)
 89 |             IoU = area_inter/(area_sum1 + area_sum2 - area_inter)
 90 |             if IoU > overmax:
 91 |                 overmax = IoU
 92 |                 is_which = k
 93 |     return overmax, is_which
 94 | 
 95 | def cal_mAP(gts, results, num_classes, overlap_thre):
 96 |     ap = np.zeros(num_classes)
 97 |     max_recall = np.zeros(num_classes)
 98 |     for class_i in range(1, num_classes):
 99 |         results_i = results[class_i]
100 |         res_num = len(results_i)
101 |         tp = np.zeros(res_num)
102 |         fp = np.zeros(res_num)
103 |         sum_gt = gts['num'][class_i]
104 |         logger.info('sum_gt: {}'.format(sum_gt))
105 |         results_i = sorted(results_i, key = lambda xx : xx[4], reverse=True)
106 |         for k, res in enumerate(results_i):
107 |             img_name = res[-1]
108 |             gts_i = gts[img_name]['bbox'][int(class_i)]
109 |             overmax, is_which = calIoU(res, gts_i)
110 |             if overmax >= overlap_thre and gts[img_name]['is_det'][class_i][is_which] == 0:
111 |                 tp[k] = 1
112 |                 gts[img_name]['is_det'][class_i][is_which] = 1
113 |             else:
114 |                 fp[k] = 1
115 |         rec = np.zeros(res_num)
116 |         prec = np.zeros(res_num)
117 |         for v in range(res_num):
118 |             if v > 0:
119 |                 tp[v] = tp[v] + tp[v-1]
120 |                 fp[v] = fp[v] + fp[v-1]
121 |             rec[v] = tp[v] / sum_gt
122 |             prec[v] = tp[v] / (tp[v] + fp[v])
123 |         for v in range(res_num-2, -1, -1):
124 |             prec[v] = max(prec[v], prec[v+1])
125 |         for v in range(res_num):
126 |             if v == 0:
127 |                 ap[class_i] += rec[v] * prec[v]
128 |             else:
129 |                 ap[class_i] += (rec[v] - rec[v-1]) * prec[v]
130 |         max_recall[class_i] = np.max(rec)
131 |         logger.info('class {} --- ap: {}   max recall: {}'.format(class_i, ap[class_i], max_recall[class_i]))
132 |     return ap, max_recall
133 | 
134 | 
135 | def Cal_MAP1(res_list, gts_list, num_classes):
136 |     # with open(res_list, 'r') as f_res:
137 |     #     res_list = f_res.readlines()
138 |     # with open(gts_list, 'r') as f_gts:
139 |     #     gts_list = f_gts.readlines()
140 |     overlap_thre = 0.5
141 |     num_classes = int(num_classes)
142 |     gts = parse_gts(gts_list, num_classes)
143 |     results = parse_res(res_list)
144 | 
145 |     ap, max_recall = cal_mAP(gts, results, num_classes, overlap_thre)
146 |     mAP = np.mean(ap[1:])
147 |     m_rec = np.mean(max_recall[1:])
148 |     # print('--------------------')
149 |     logger.info('mAP: {}   max recall: {}'.format(mAP, m_rec))
150 |     # print('--------------------')
151 |     return mAP
152 | 
153 | def Cal_MAP(res_dir, gts_list, num_classes):
154 |     overlap_thre = 0.5
155 |     res_list = 'results.txt'
156 |     subprocess.call("cat {}/results.txt.rank* > {}/{}".format(res_dir,res_dir, res_list), shell=True)
157 | 
158 |     with open("{}/{}".format(res_dir, res_list), 'r', encoding='utf-8') as f_res:
159 |         res_list = f_res.readlines()
160 |     with open(gts_list, 'r', encoding='utf-8') as f_gts:
161 |         gts_list = f_gts.readlines()
162 | 
163 |     gts = parse_gts(gts_list, num_classes)
164 |     results = parse_res(res_list)
165 | 
166 |     ap, max_recall = cal_mAP(gts, results, num_classes, overlap_thre)
167 |     mAP = np.mean(ap[1:])
168 |     m_rec = np.mean(max_recall[1:])
169 |     print('--------------------')
170 |     print('mAP: {}   max recall: {}'.format(mAP, m_rec))
171 |     print('--------------------')
172 | 
173 | 
174 | 
175 | 


--------------------------------------------------------------------------------
/extensions/_cython_bbox/cython_nms.pyx:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2017-present, Facebook, Inc.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | ##############################################################################
 15 | #
 16 | # Based on:
 17 | # --------------------------------------------------------
 18 | # Fast R-CNN
 19 | # Copyright (c) 2015 Microsoft
 20 | # Licensed under The MIT License [see LICENSE for details]
 21 | # Written by Ross Girshick
 22 | # --------------------------------------------------------
 23 | 
 24 | cimport cython
 25 | import numpy as np
 26 | cimport numpy as np
 27 | 
 28 | cdef inline np.float32_t max(np.float32_t a, np.float32_t b) nogil:
 29 |     return a if a >= b else b
 30 | 
 31 | cdef inline np.float32_t min(np.float32_t a, np.float32_t b) nogil:
 32 |     return a if a <= b else b
 33 | 
 34 | @cython.boundscheck(False)
 35 | @cython.cdivision(True)
 36 | @cython.wraparound(False)
 37 | def nms(np.ndarray[np.float32_t, ndim=2] dets, np.float32_t thresh):
 38 |     cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0]
 39 |     cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1]
 40 |     cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2]
 41 |     cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3]
 42 |     cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4]
 43 | 
 44 |     cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1)
 45 |     cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1]
 46 | 
 47 |     cdef int ndets = dets.shape[0]
 48 |     cdef np.ndarray[np.int_t, ndim=1] suppressed = \
 49 |             np.zeros((ndets), dtype=np.int)
 50 | 
 51 |     # nominal indices
 52 |     cdef int _i, _j
 53 |     # sorted indices
 54 |     cdef int i, j
 55 |     # temp variables for box i's (the box currently under consideration)
 56 |     cdef np.float32_t ix1, iy1, ix2, iy2, iarea
 57 |     # variables for computing overlap with box j (lower scoring box)
 58 |     cdef np.float32_t xx1, yy1, xx2, yy2
 59 |     cdef np.float32_t w, h
 60 |     cdef np.float32_t inter, ovr
 61 | 
 62 |     with nogil:
 63 |       for _i in range(ndets):
 64 |           i = order[_i]
 65 |           if suppressed[i] == 1:
 66 |               continue
 67 |           ix1 = x1[i]
 68 |           iy1 = y1[i]
 69 |           ix2 = x2[i]
 70 |           iy2 = y2[i]
 71 |           iarea = areas[i]
 72 |           for _j in range(_i + 1, ndets):
 73 |               j = order[_j]
 74 |               if suppressed[j] == 1:
 75 |                   continue
 76 |               xx1 = max(ix1, x1[j])
 77 |               yy1 = max(iy1, y1[j])
 78 |               xx2 = min(ix2, x2[j])
 79 |               yy2 = min(iy2, y2[j])
 80 |               w = max(0.0, xx2 - xx1 + 1)
 81 |               h = max(0.0, yy2 - yy1 + 1)
 82 |               inter = w * h
 83 |               ovr = inter / (iarea + areas[j] - inter)
 84 |               if ovr >= thresh:
 85 |                   suppressed[j] = 1
 86 | 
 87 |     return np.where(suppressed == 0)[0]
 88 | 
 89 | # ----------------------------------------------------------
 90 | # Soft-NMS: Improving Object Detection With One Line of Code
 91 | # Copyright (c) University of Maryland, College Park
 92 | # Licensed under The MIT License [see LICENSE for details]
 93 | # Written by Navaneeth Bodla and Bharat Singh
 94 | # ----------------------------------------------------------
 95 | @cython.boundscheck(False)
 96 | @cython.cdivision(True)
 97 | @cython.wraparound(False)
 98 | def soft_nms(
 99 |     np.ndarray[float, ndim=2] boxes_in,
100 |     float sigma=0.5,
101 |     float Nt=0.3,
102 |     float threshold=0.001,
103 |     unsigned int method=0
104 | ):
105 |     boxes = boxes_in.copy()
106 |     cdef unsigned int N = boxes.shape[0]
107 |     cdef float iw, ih, box_area
108 |     cdef float ua
109 |     cdef int pos = 0
110 |     cdef float maxscore = 0
111 |     cdef int maxpos = 0
112 |     cdef float x1, x2, y1, y2, tx1, tx2, ty1, ty2, ts, area, weight, ov
113 |     inds = np.arange(N)
114 | 
115 |     for i in range(N):
116 |         maxscore = boxes[i, 4]
117 |         maxpos = i
118 | 
119 |         tx1 = boxes[i,0]
120 |         ty1 = boxes[i,1]
121 |         tx2 = boxes[i,2]
122 |         ty2 = boxes[i,3]
123 |         ts = boxes[i,4]
124 |         ti = inds[i]
125 | 
126 |         pos = i + 1
127 |         # get max box
128 |         while pos < N:
129 |             if maxscore < boxes[pos, 4]:
130 |                 maxscore = boxes[pos, 4]
131 |                 maxpos = pos
132 |             pos = pos + 1
133 | 
134 |         # add max box as a detection
135 |         boxes[i,0] = boxes[maxpos,0]
136 |         boxes[i,1] = boxes[maxpos,1]
137 |         boxes[i,2] = boxes[maxpos,2]
138 |         boxes[i,3] = boxes[maxpos,3]
139 |         boxes[i,4] = boxes[maxpos,4]
140 |         inds[i] = inds[maxpos]
141 | 
142 |         # swap ith box with position of max box
143 |         boxes[maxpos,0] = tx1
144 |         boxes[maxpos,1] = ty1
145 |         boxes[maxpos,2] = tx2
146 |         boxes[maxpos,3] = ty2
147 |         boxes[maxpos,4] = ts
148 |         inds[maxpos] = ti
149 | 
150 |         tx1 = boxes[i,0]
151 |         ty1 = boxes[i,1]
152 |         tx2 = boxes[i,2]
153 |         ty2 = boxes[i,3]
154 |         ts = boxes[i,4]
155 | 
156 |         pos = i + 1
157 |         # NMS iterations, note that N changes if detection boxes fall below
158 |         # threshold
159 |         while pos < N:
160 |             x1 = boxes[pos, 0]
161 |             y1 = boxes[pos, 1]
162 |             x2 = boxes[pos, 2]
163 |             y2 = boxes[pos, 3]
164 |             s = boxes[pos, 4]
165 | 
166 |             area = (x2 - x1 + 1) * (y2 - y1 + 1)
167 |             iw = (min(tx2, x2) - max(tx1, x1) + 1)
168 |             if iw > 0:
169 |                 ih = (min(ty2, y2) - max(ty1, y1) + 1)
170 |                 if ih > 0:
171 |                     ua = float((tx2 - tx1 + 1) * (ty2 - ty1 + 1) + area - iw * ih)
172 |                     ov = iw * ih / ua #iou between max box and detection box
173 | 
174 |                     if method == 1: # linear
175 |                         if ov > Nt:
176 |                             weight = 1 - ov
177 |                         else:
178 |                             weight = 1
179 |                     elif method == 2: # gaussian
180 |                         weight = np.exp(-(ov * ov)/sigma)
181 |                     else: # original NMS
182 |                         if ov > Nt:
183 |                             weight = 0
184 |                         else:
185 |                             weight = 1
186 | 
187 |                     boxes[pos, 4] = weight*boxes[pos, 4]
188 | 
189 |                     # if box score falls below threshold, discard the box by
190 |                     # swapping with last box update N
191 |                     if boxes[pos, 4] < threshold:
192 |                         boxes[pos,0] = boxes[N-1, 0]
193 |                         boxes[pos,1] = boxes[N-1, 1]
194 |                         boxes[pos,2] = boxes[N-1, 2]
195 |                         boxes[pos,3] = boxes[N-1, 3]
196 |                         boxes[pos,4] = boxes[N-1, 4]
197 |                         inds[pos] = inds[N-1]
198 |                         N = N - 1
199 |                         pos = pos - 1
200 | 
201 |             pos = pos + 1
202 | 
203 |     return boxes[:N], inds[:N]
204 | 


--------------------------------------------------------------------------------
/models/losses.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # @Time    : 18-4-19
  3 | # @Author  : Xinge
  4 | # import torch
  5 | import torch.nn as nn
  6 | # import numpy as np
  7 | import torch
  8 | import torch.nn.functional as F
  9 | from torch.autograd import Variable, Function
 10 | # import numpy as np
 11 | from math import exp
 12 | 
 13 | 
 14 | class Losses(nn.Module):
 15 |     def __init__(self):
 16 |         super(Losses, self).__init__()
 17 |         # self.loss = nn.functional.kl_div
 18 | 
 19 | 
 20 |     def forward(self, input1, input2):
 21 |         """
 22 |         KL divergence loss
 23 |         :param input1:
 24 |         :param input2:
 25 |         :return:
 26 |         """
 27 |         # return 0.5 * (self.loss(input1, input2) + self.loss(input2, input1))
 28 |         # assert input1.size() == 2, "more than two dimensions"
 29 |         input1 = nn.functional.log_softmax(input1, dim = 1)
 30 |         input2 = nn.functional.softmax(input2, dim = 1)
 31 |         # loss_output = (input2 * (input2.log() - input1) ).sum() / input1.size(0)
 32 |         final_loss = (input2 * (input2.log() - input1.log())).mean()
 33 |         return final_loss * input1.size(0)
 34 | 
 35 | class Losses_triplet(nn.Module):
 36 |     def __init__(self):
 37 |         super(Losses_triplet, self).__init__()
 38 |         self.loss = nn.functional.kl_div
 39 | 
 40 | 
 41 |     def forward(self, real_img, input1, input2):
 42 |         """
 43 |         KL divergence loss
 44 |         :param input1: fake source
 45 |         :param input2: fake target
 46 |         :param real_img: real source
 47 |         :return:
 48 |         """
 49 |         # return 0.5 * (self.loss(input1, input2) + self.loss(input2, input1))
 50 |         # assert input1.size() == 2, "more than two dimensions"
 51 |         input1_log = nn.functional.log_softmax(input1, dim = 1)
 52 |         input2_log = nn.functional.log_softmax(input2, dim = 1)
 53 |         # input1 = nn.functional.softmax(input1, dim = 1)
 54 |         # input2 = nn.functional.softmax(input2, dim = 1)
 55 |         real_img = nn.functional.softmax(real_img, dim = 1)
 56 |         positive_loss = self.loss(input2_log, real_img, size_average=True) * 1000.0
 57 |         # negative_loss = torch.max(0, 1.0 - self.loss(input1_log, real_img, size_average=True))
 58 |         negative_loss = 1.0 - self.loss(input1_log, real_img, size_average=True) * 1000.0
 59 |         if (negative_loss.data < 0.0).all():
 60 |             negative_loss.data = torch.cuda.FloatTensor([0.0])
 61 |         # print("posi: ", positive_loss)
 62 |         # print("nega: ", negative_loss)
 63 |         # loss_output = (input2 * (input2.log() - input1) ).sum() / input1.size(0)
 64 |         return positive_loss + negative_loss
 65 | 
 66 | class Losses_triplet_nll(nn.Module):
 67 |     def __init__(self):
 68 |         super(Losses_triplet_nll, self).__init__()
 69 |         self.loss = nn.functional.mse_loss
 70 | 
 71 | 
 72 |     def forward(self, real_img, input1, input2):
 73 |         """
 74 |         KL divergence loss
 75 |         :param input1: fake source
 76 |         :param input2: fake target
 77 |         :param real_img: real source
 78 |         :return:
 79 |         """
 80 |         # return 0.5 * (self.loss(input1, input2) + self.loss(input2, input1))
 81 |         # assert input1.size() == 2, "more than two dimensions"
 82 |         posi_dist = self.loss(input2, real_img)
 83 |         nega_dist = self.loss(input1, real_img)
 84 | 
 85 |         Pt = torch.exp(nega_dist) / (torch.exp(nega_dist) + torch.exp(posi_dist))
 86 | 
 87 |         loss_pt = -1.0 * torch.log(Pt)
 88 | 
 89 |         return loss_pt
 90 | 
 91 | 
 92 | class GradReverse(Function):
 93 | 
 94 |     def __init__(self, lambd):
 95 |         self.lambd = lambd
 96 | 
 97 |     def forward(self, x):
 98 |         return x.view_as(x)
 99 | 
100 |     def backward(self, grad_output):
101 |         return (grad_output * -self.lambd)
102 | 
103 | 
104 | def grad_reverse(x, lambd):
105 |     return GradReverse(lambd)(x)
106 | 
107 | 
108 | class Losses3(nn.Module):
109 |     def __init__(self):
110 |         super(Losses3, self).__init__()
111 |         # self.loss = nn.functional.kl_div
112 | 
113 | 
114 |     def forward(self, input1, input2):
115 |         """
116 |         KL divergence loss
117 |         :param input1:
118 |         :param input2:
119 |         :return:
120 |         """
121 |         # return 0.5 * (self.loss(input1, input2) + self.loss(input2, input1))
122 |         # assert input1.size() == 2, "more than two dimensions"
123 |         input1 = nn.functional.log_softmax(input1, dim = 1)
124 |         input2 = nn.functional.softmax(input2, dim = 1)
125 |         loss_output = (input2 * (input2.log() - input1) ).sum() / input1.size(0)
126 |         return loss_output
127 | 
128 | class Losses2(nn.Module):
129 |     def __init__(self, in1_size, in2_size, out_size):
130 |         super(Losses2, self).__init__()
131 |         self.loss = nn.Bilinear(in1_size, in2_size, out_size, False)
132 | 
133 |     def forward(self, input1, input2):
134 |         """
135 |         Bilinear Transform Loss
136 |         :param input1: (N, in1_size)
137 |         :param input2: (N, in2_size)
138 |         :return: (N, out_size)
139 |         """
140 |         return self.loss(input1, input2)
141 | 
142 | 
143 | 
144 | 
145 | def gaussian(window_size, sigma):
146 |     gauss = torch.Tensor([exp(-(x - window_size // 2) ** 2 / float(2 * sigma ** 2)) for x in range(window_size)])
147 |     return gauss / gauss.sum()
148 | 
149 | 
150 | def create_window(window_size, channel):
151 |     _1D_window = gaussian(window_size, 1.5).unsqueeze(1)
152 |     _2D_window = _1D_window.mm(_1D_window.t()).float().unsqueeze(0).unsqueeze(0)
153 |     window = Variable(_2D_window.expand(channel, 1, window_size, window_size).contiguous())
154 |     return window
155 | 
156 | 
157 | def _ssim(img1, img2, window, window_size, channel, size_average=True):
158 |     mu1 = F.conv2d(img1, window, padding=window_size // 2, groups=channel)
159 |     mu2 = F.conv2d(img2, window, padding=window_size // 2, groups=channel)
160 | 
161 |     mu1_sq = mu1.pow(2)
162 |     mu2_sq = mu2.pow(2)
163 |     mu1_mu2 = mu1 * mu2
164 | 
165 |     sigma1_sq = F.conv2d(img1 * img1, window, padding=window_size // 2, groups=channel) - mu1_sq
166 |     sigma2_sq = F.conv2d(img2 * img2, window, padding=window_size // 2, groups=channel) - mu2_sq
167 |     sigma12 = F.conv2d(img1 * img2, window, padding=window_size // 2, groups=channel) - mu1_mu2
168 | 
169 |     C1 = 0.01 ** 2
170 |     C2 = 0.03 ** 2
171 | 
172 |     ssim_map = ((2 * mu1_mu2 + C1) * (2 * sigma12 + C2)) / ((mu1_sq + mu2_sq + C1) * (sigma1_sq + sigma2_sq + C2))
173 | 
174 |     if size_average:
175 |         return ssim_map.mean()
176 |     else:
177 |         return ssim_map.mean(1).mean(1).mean(1)
178 | 
179 | 
180 | class SSIM(torch.nn.Module):
181 |     def __init__(self, window_size=110, size_average=True):
182 |         super(SSIM, self).__init__()
183 |         self.window_size = window_size
184 |         self.size_average = size_average
185 |         self.channel = 1
186 |         self.window = create_window(window_size, self.channel)
187 | 
188 |     def forward(self, img1, img2):
189 |         (_, channel, _, _) = img1.size()
190 | 
191 |         if channel == self.channel and self.window.data.type() == img1.data.type():
192 |             window = self.window
193 |         else:
194 |             window = create_window(self.window_size, channel)
195 | 
196 |             if img1.is_cuda:
197 |                 window = window.cuda(img1.get_device())
198 |             window = window.type_as(img1)
199 | 
200 |             self.window = window
201 |             self.channel = channel
202 | 
203 |         return _ssim(img1, img2, window, self.window_size, channel, self.size_average)
204 | 
205 | 
206 | def ssim(img1, img2, window_size=110, size_average=True):
207 |     (_, channel, _, _) = img1.size()
208 |     window = create_window(window_size, channel)
209 | 
210 |     if img1.is_cuda:
211 |         window = window.cuda(img1.get_device())
212 |     window = window.type_as(img1)
213 | 
214 |     return _ssim(img1, img2, window, window_size, channel, size_average)
215 | 
216 | 


--------------------------------------------------------------------------------
/models/faster_rcnn/vgg_adver_expansion_cluster.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # @Time    : 18-6-22 9:30
  3 | # @Author  : Xinge
  4 | 
  5 | from extensions import RoIPool
  6 | from .faster_rcnn_adver_expansion_reweight_cluster import FasterRCNN_AdEx
  7 | from models.head import NaiveRpnHead
  8 | import torch.nn as nn
  9 | import torch.utils.model_zoo as model_zoo
 10 | import math
 11 | # from .common_net import LayerNorm
 12 | 
 13 | __all__ = [
 14 |     'VGG', 'vgg11', 'vgg11_bn', 'vgg13', 'vgg13_bn', 'vgg16', 'vgg16_bn',
 15 |     'vgg19_bn', 'vgg19',
 16 | ]
 17 | 
 18 | 
 19 | model_urls = {
 20 |     'vgg11': 'https://download.pytorch.org/models/vgg11-bbd30ac9.pth',
 21 |     'vgg13': 'https://download.pytorch.org/models/vgg13-c768596a.pth',
 22 |     'vgg16': 'https://download.pytorch.org/models/vgg16-397923af.pth',
 23 |     'vgg19': 'https://download.pytorch.org/models/vgg19-dcbb9e9d.pth',
 24 |     'vgg11_bn': 'https://download.pytorch.org/models/vgg11_bn-6002323d.pth',
 25 |     'vgg13_bn': 'https://download.pytorch.org/models/vgg13_bn-abd245e5.pth',
 26 |     'vgg16_bn': 'https://download.pytorch.org/models/vgg16_bn-6c64b313.pth',
 27 |     'vgg19_bn': 'https://download.pytorch.org/models/vgg19_bn-c79401a0.pth',
 28 | }
 29 | 
 30 | class VGG(FasterRCNN_AdEx):
 31 | 
 32 |     def __init__(self, features, cfg):
 33 |         super(VGG, self).__init__(cfg['gan_model_flag'])
 34 | 
 35 |         self.features = features
 36 |         # self.features2 = features
 37 |         #drop out last pooling layer so that feature stride is 2^4
 38 |         last_pooling = self.features._modules.popitem(last = True)
 39 |         # last_pooling2 = self.features2._modules.popitem(last = True)
 40 |         # rpn head
 41 |         num_anchors = len(cfg['anchor_scales']) * len(cfg['anchor_ratios'])
 42 |         self.rpn_head = NaiveRpnHead(512, num_classes=2, num_anchors=num_anchors)
 43 | 
 44 |         # rcnn head
 45 |         self.roipooling = RoIPool(7, 7, 1.0 / cfg['anchor_stride'])
 46 |         self.classifier = nn.Sequential(
 47 |             nn.Linear(512 * 7 * 7, 4096),
 48 |             # nn.BatchNorm1d(num_features=4096),
 49 |             # LayerNorm(4096),
 50 |             nn.ReLU(True),
 51 |             nn.Dropout(),
 52 |             nn.Linear(4096, 4096),
 53 |             # LayerNorm(4096),
 54 |             # nn.BatchNorm1d(num_features=4096),
 55 |             nn.ReLU(True),
 56 |             nn.Dropout(),
 57 |             # nn.Linear(4096, num_classes),
 58 |         )
 59 |         self.fc_rcnn_cls = nn.Linear(4096, cfg['num_classes'])
 60 |         self.fc_rcnn_loc = nn.Linear(4096, cfg['num_classes'] * 4)
 61 | 
 62 |         self._initialize_weights()
 63 | 
 64 |     def feature_extractor(self, x):
 65 |         return self.features(x)
 66 | 
 67 |     # def feature_extractor2(self, x):
 68 |     #     return self.features2(x)
 69 | 
 70 |     def rpn(self, x):
 71 |         return self.rpn_head(x)
 72 | 
 73 |     def rcnn(self, x, rois):
 74 |         assert(rois.shape[1] == 5)
 75 |         x = self.roipooling(x, rois) # x.size(): [512, 512, 7, 7]
 76 |         x = x.view(x.size(0), -1)
 77 |         x_fea = self.classifier(x) # torch.Size([512, 4096])
 78 |         rcnn_pred_cls = self.fc_rcnn_cls(x_fea)
 79 |         rcnn_pred_loc = self.fc_rcnn_loc(x_fea)
 80 |         return x_fea, rcnn_pred_cls, rcnn_pred_loc
 81 | 
 82 |     def _initialize_weights(self):
 83 |         # count = 1
 84 |         for m in self.modules():
 85 |             if isinstance(m, nn.Conv2d):
 86 |                 n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
 87 |                 m.weight.data.normal_(0, math.sqrt(2. / n))
 88 |                 if m.bias is not None:
 89 |                     m.bias.data.zero_()
 90 |                 # if count <= 2:
 91 |                 #     m.eval()
 92 |                 #     count += 1
 93 |             elif isinstance(m, nn.BatchNorm2d):
 94 |                 m.weight.data.fill_(1)
 95 |                 m.bias.data.zero_()
 96 |             elif isinstance(m, nn.Linear):
 97 |                 m.weight.data.normal_(0, 0.01)
 98 |                 m.bias.data.zero_()
 99 | 
100 | 
101 | def make_layers(cfg, batch_norm=False):
102 |     layers = []
103 |     in_channels = 3
104 |     for v in cfg:
105 |         if v == 'M':
106 |             layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
107 |         else:
108 |             conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1)
109 |             if batch_norm:
110 |                 layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)]
111 |             else:
112 |                 layers += [conv2d, nn.ReLU(inplace=True)]
113 |             in_channels = v
114 |     return nn.Sequential(*layers)
115 | 
116 | 
117 | cfg = {
118 |     'A': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
119 |     'B': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
120 |     'D': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],
121 |     'E': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'],
122 | }
123 | 
124 | 
125 | def vgg11(pretrained=False, **kwargs):
126 |     """VGG 11-layer model (configuration "A")
127 | 
128 |     Args:
129 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
130 |     """
131 |     model = VGG(make_layers(cfg['A']), **kwargs)
132 |     if pretrained:
133 |         model.load_state_dict(model_zoo.load_url(model_urls['vgg11']))
134 |     return model
135 | 
136 | 
137 | def vgg11_bn(pretrained=False, **kwargs):
138 |     """VGG 11-layer model (configuration "A") with batch normalization
139 | 
140 |     Args:
141 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
142 |     """
143 |     model = VGG(make_layers(cfg['A'], batch_norm=True), **kwargs)
144 |     if pretrained:
145 |         model.load_state_dict(model_zoo.load_url(model_urls['vgg11_bn']))
146 |     return model
147 | 
148 | 
149 | def vgg13(pretrained=False, **kwargs):
150 |     """VGG 13-layer model (configuration "B")
151 | 
152 |     Args:
153 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
154 |     """
155 |     model = VGG(make_layers(cfg['B']), **kwargs)
156 |     if pretrained:
157 |         model.load_state_dict(model_zoo.load_url(model_urls['vgg13']))
158 |     return model
159 | 
160 | 
161 | def vgg13_bn(pretrained=False, **kwargs):
162 |     """VGG 13-layer model (configuration "B") with batch normalization
163 | 
164 |     Args:
165 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
166 |     """
167 |     model = VGG(make_layers(cfg['B'], batch_norm=True), **kwargs)
168 |     if pretrained:
169 |         model.load_state_dict(model_zoo.load_url(model_urls['vgg13_bn']))
170 |     return model
171 | 
172 | 
173 | def vgg16(pretrained=False, **kwargs):
174 |     """VGG 16-layer model (configuration "D")
175 | 
176 |     Args:
177 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
178 |     """
179 |     model = VGG(make_layers(cfg['D']), **kwargs)
180 |     if pretrained:
181 |         model.load_state_dict(model_zoo.load_url(model_urls['vgg16']))
182 |     return model
183 | 
184 | 
185 | def vgg16_bn(pretrained=False, **kwargs):
186 |     """VGG 16-layer model (configuration "D") with batch normalization
187 | 
188 |     Args:
189 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
190 |     """
191 |     model = VGG(make_layers(cfg['D'], batch_norm=True), **kwargs)
192 |     if pretrained:
193 |         model.load_state_dict(model_zoo.load_url(model_urls['vgg16_bn']))
194 |     return model
195 | 
196 | 
197 | def vgg19(pretrained=False, **kwargs):
198 |     """VGG 19-layer model (configuration "E")
199 | 
200 |     Args:
201 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
202 |     """
203 |     model = VGG(make_layers(cfg['E']), **kwargs)
204 |     if pretrained:
205 |         model.load_state_dict(model_zoo.load_url(model_urls['vgg19']))
206 |     return model
207 | 
208 | 
209 | def vgg19_bn(pretrained=False, **kwargs):
210 |     """VGG 19-layer model (configuration 'E') with batch normalization
211 | 
212 |     Args:
213 |         pretrained (bool): If True, returns a model pre-trained on ImageNet
214 |     """
215 |     model = VGG(make_layers(cfg['E'], batch_norm=True), **kwargs)
216 |     if pretrained:
217 |         model.load_state_dict(model_zoo.load_url(model_urls['vgg19_bn']))
218 |     return model
219 | 


--------------------------------------------------------------------------------
/extensions/_roi_align/src/roi_align_kernel.cu:
--------------------------------------------------------------------------------
  1 | #ifdef __cplusplus
  2 | extern "C" {
  3 | #endif
  4 | 
  5 | #include <stdio.h>
  6 | #include <math.h>
  7 | #include <float.h>
  8 | #include "roi_align_kernel.h"
  9 | 
 10 | #define CUDA_1D_KERNEL_LOOP(i, n)                            \
 11 |     for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; \
 12 |             i += blockDim.x * gridDim.x)
 13 | 
 14 | 
 15 |     __global__ void ROIAlignForward(const int nthreads, const float* bottom_data, const float spatial_scale, const int height, const int width,
 16 |                                     const int channels, const int aligned_height, const int aligned_width, const float* bottom_rois, float* top_data) {
 17 |         CUDA_1D_KERNEL_LOOP(index, nthreads) {
 18 |             // (n, c, ph, pw) is an element in the aligned output
 19 |             // int n = index;
 20 |             // int pw = n % aligned_width;
 21 |             // n /= aligned_width;
 22 |             // int ph = n % aligned_height;
 23 |             // n /= aligned_height;
 24 |             // int c = n % channels;
 25 |             // n /= channels;
 26 | 
 27 |             int pw = index % aligned_width;
 28 |             int ph = (index / aligned_width) % aligned_height;
 29 |             int c  = (index / aligned_width / aligned_height) % channels;
 30 |             int n  = index / aligned_width / aligned_height / channels;
 31 | 
 32 |             // bottom_rois += n * 5;
 33 |             float roi_batch_ind = bottom_rois[n * 5 + 0];
 34 |             float roi_start_w = bottom_rois[n * 5 + 1] * spatial_scale;
 35 |             float roi_start_h = bottom_rois[n * 5 + 2] * spatial_scale;
 36 |             float roi_end_w = bottom_rois[n * 5 + 3] * spatial_scale;
 37 |             float roi_end_h = bottom_rois[n * 5 + 4] * spatial_scale;
 38 | 
 39 |             // Force malformed ROIs to be 1x1
 40 |             float roi_width = fmaxf(roi_end_w - roi_start_w + 1., 0.);
 41 |             float roi_height = fmaxf(roi_end_h - roi_start_h + 1., 0.);
 42 |             float bin_size_h = roi_height / (aligned_height - 1.);
 43 |             float bin_size_w = roi_width / (aligned_width - 1.);
 44 | 
 45 |             float h = (float)(ph) * bin_size_h + roi_start_h;
 46 |             float w = (float)(pw) * bin_size_w + roi_start_w;
 47 | 
 48 |             int hstart = fminf(floor(h), height - 2);
 49 |             int wstart = fminf(floor(w), width - 2);
 50 | 
 51 |             int img_start = roi_batch_ind * channels * height * width;
 52 | 
 53 |             // bilinear interpolation
 54 |             if (h < 0 || h >= height || w < 0 || w >= width) {
 55 |                 top_data[index] = 0.;
 56 |             } else {
 57 |                 float h_ratio = h - (float)(hstart);
 58 |                 float w_ratio = w - (float)(wstart);
 59 |                 int upleft = img_start + (c * height + hstart) * width + wstart;
 60 |                 int upright = upleft + 1;
 61 |                 int downleft = upleft + width;
 62 |                 int downright = downleft + 1;
 63 | 
 64 |                 top_data[index] = bottom_data[upleft] * (1. - h_ratio) * (1. - w_ratio)
 65 |                     + bottom_data[upright] * (1. - h_ratio) * w_ratio
 66 |                     + bottom_data[downleft] * h_ratio * (1. - w_ratio)
 67 |                     + bottom_data[downright] * h_ratio * w_ratio;
 68 |             }
 69 |         }
 70 |     }
 71 | 
 72 | 
 73 |     int ROIAlignForwardLaucher(const float* bottom_data, const float spatial_scale, const int num_rois, const int height, const int width,
 74 |                                const int channels, const int aligned_height, const int aligned_width, const float* bottom_rois, float* top_data, cudaStream_t stream) {
 75 |         const int kThreadsPerBlock = 1024;
 76 |         const int output_size = num_rois * aligned_height * aligned_width * channels;
 77 |         cudaError_t err;
 78 | 
 79 | 
 80 |         ROIAlignForward<<<(output_size + kThreadsPerBlock - 1) / kThreadsPerBlock, kThreadsPerBlock, 0, stream>>>(
 81 |           output_size, bottom_data, spatial_scale, height, width, channels,
 82 |           aligned_height, aligned_width, bottom_rois, top_data);
 83 | 
 84 |         err = cudaGetLastError();
 85 |         if(cudaSuccess != err) {
 86 |             fprintf( stderr, "cudaCheckError() failed : %s\n", cudaGetErrorString( err ) );
 87 |             exit( -1 );
 88 |         }
 89 | 
 90 |         return 1;
 91 |     }
 92 | 
 93 | 
 94 |     __global__ void ROIAlignBackward(const int nthreads, const float* top_diff, const float spatial_scale, const int height, const int width,
 95 |                                      const int channels, const int aligned_height, const int aligned_width, float* bottom_diff, const float* bottom_rois) {
 96 |         CUDA_1D_KERNEL_LOOP(index, nthreads) {
 97 | 
 98 |             // (n, c, ph, pw) is an element in the aligned output
 99 |             int pw = index % aligned_width;
100 |             int ph = (index / aligned_width) % aligned_height;
101 |             int c  = (index / aligned_width / aligned_height) % channels;
102 |             int n  = index / aligned_width / aligned_height / channels;
103 | 
104 |             float roi_batch_ind = bottom_rois[n * 5 + 0];
105 |             float roi_start_w = bottom_rois[n * 5 + 1] * spatial_scale;
106 |             float roi_start_h = bottom_rois[n * 5 + 2] * spatial_scale;
107 |             float roi_end_w = bottom_rois[n * 5 + 3] * spatial_scale;
108 |             float roi_end_h = bottom_rois[n * 5 + 4] * spatial_scale;
109 |             /* int roi_start_w = round(bottom_rois[1] * spatial_scale); */
110 |             /* int roi_start_h = round(bottom_rois[2] * spatial_scale); */
111 |             /* int roi_end_w = round(bottom_rois[3] * spatial_scale); */
112 |             /* int roi_end_h = round(bottom_rois[4] * spatial_scale); */
113 | 
114 |             // Force malformed ROIs to be 1x1
115 |             float roi_width = fmaxf(roi_end_w - roi_start_w + 1., 0.);
116 |             float roi_height = fmaxf(roi_end_h - roi_start_h + 1., 0.);
117 |             float bin_size_h = roi_height / (aligned_height - 1.);
118 |             float bin_size_w = roi_width / (aligned_width - 1.);
119 | 
120 |             float h = (float)(ph) * bin_size_h + roi_start_h;
121 |             float w = (float)(pw) * bin_size_w + roi_start_w;
122 | 
123 |             int hstart = fminf(floor(h), height - 2);
124 |             int wstart = fminf(floor(w), width - 2);
125 | 
126 |             int img_start = roi_batch_ind * channels * height * width;
127 | 
128 |             // bilinear interpolation
129 |             if (!(h < 0 || h >= height || w < 0 || w >= width)) {
130 |                 float h_ratio = h - (float)(hstart);
131 |                 float w_ratio = w - (float)(wstart);
132 |                 int upleft = img_start + (c * height + hstart) * width + wstart;
133 |                 int upright = upleft + 1;
134 |                 int downleft = upleft + width;
135 |                 int downright = downleft + 1;
136 | 
137 |                 atomicAdd(bottom_diff + upleft, top_diff[index] * (1. - h_ratio) * (1 - w_ratio));
138 |                 atomicAdd(bottom_diff + upright, top_diff[index] * (1. - h_ratio) * w_ratio);
139 |                 atomicAdd(bottom_diff + downleft, top_diff[index] * h_ratio * (1 - w_ratio));
140 |                 atomicAdd(bottom_diff + downright, top_diff[index] * h_ratio * w_ratio);
141 |             }
142 |         }
143 |     }
144 | 
145 |     int ROIAlignBackwardLaucher(const float* top_diff, const float spatial_scale, const int batch_size, const int num_rois, const int height, const int width,
146 |                                 const int channels, const int aligned_height, const int aligned_width, const float* bottom_rois, float* bottom_diff, cudaStream_t stream) {
147 |         const int kThreadsPerBlock = 1024;
148 |         const int output_size = num_rois * aligned_height * aligned_width * channels;
149 |         cudaError_t err;
150 | 
151 |         ROIAlignBackward<<<(output_size + kThreadsPerBlock - 1) / kThreadsPerBlock, kThreadsPerBlock, 0, stream>>>(
152 |           output_size, top_diff, spatial_scale, height, width, channels,
153 |           aligned_height, aligned_width, bottom_diff, bottom_rois);
154 | 
155 |         err = cudaGetLastError();
156 |         if(cudaSuccess != err) {
157 |             fprintf( stderr, "cudaCheckError() failed : %s\n", cudaGetErrorString( err ) );
158 |             exit( -1 );
159 |         }
160 | 
161 |         return 1;
162 |     }
163 | 
164 | 
165 | #ifdef __cplusplus
166 | }
167 | #endif
168 | 


--------------------------------------------------------------------------------
/models/faster_rcnn/test_module.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # @Time    : 18-6-23 1:48
  3 | # @Author  : Xinge
  4 | 
  5 | 
  6 | import torch.nn as nn
  7 | import torch
  8 | # from torch.autograd import Variable
  9 | from common_net import *
 10 | import torch.nn.functional as F
 11 | 
 12 | class GAN_dis_AE(nn.Module):
 13 |     def __init__(self, params):
 14 |         super(GAN_dis_AE, self).__init__()
 15 |         ch = params['ch']  # 32
 16 |         input_dim_a = params['input_dim_a']  # 3
 17 | 
 18 |         n_layer = params['n_layer'] # 5
 19 |         self.model_A = self._make_net(ch, input_dim_a, n_layer - 1)  # for the first stage
 20 |         self.model_A.apply(gaussian_weights_init)
 21 |         self.model_B = self._make_net(ch, input_dim_a, n_layer - 1)  # for the first stage
 22 |         self.model_B.apply(gaussian_weights_init)
 23 | 
 24 | 
 25 | 
 26 |     def _make_net(self, ch, input_dim, n_layer):
 27 |         model = []
 28 |         model += [LeakyReLUConv2d(input_dim, ch, kernel_size=3, stride=2, padding=1)]  # 16
 29 |         tch = ch
 30 |         for i in range(0, n_layer):
 31 |             model += [LeakyReLUConv2d(tch, tch * 2, kernel_size=3, stride=2, padding=1)]  # 8
 32 |             tch *= 2
 33 |         model += [nn.Conv2d(tch, 1, kernel_size=1, stride=1, padding=0)]  # 1
 34 |         return nn.Sequential(*model)
 35 | 
 36 |     def forward(self, x_aa, x_bb):
 37 |         """
 38 |         :param x_bA: the concatenation of
 39 |         :param x_aB:
 40 |         :param rois_feature: (512 x 4096)
 41 |         :return:
 42 |         """
 43 |         # x_aa, x_bb = torch.split(x_A, x_A.size(0) // 2, 0)
 44 |         out_A = self.model_A(x_aa)
 45 |         out_A = out_A.view(out_A.size(0), -1)
 46 |         out_B = self.model_B(x_bb)
 47 |         out_B = out_B.view(out_B.size(0), -1)
 48 | 
 49 |         # out = torch.cat((out_A, out_B), 0)
 50 |         return out_A, out_B
 51 | 
 52 | 
 53 | 
 54 | class GAN_dis_AE_patch(nn.Module):
 55 |     def __init__(self):
 56 |         super(GAN_dis_AE_patch, self).__init__()
 57 |         # for source domain only
 58 |         model_A_patch = [ResDis_cluster(n_in=128, n_out=256, kernel_size=3, stride=2, padding=1, w=64, h=64)]
 59 |         self.model_A_patch = nn.Sequential(*model_A_patch)
 60 |         # self.model_A_patch.apply(gaussian_weights_init)
 61 | 
 62 |     def forward(self, rois_features):
 63 |         out_C = self.model_A_patch(rois_features)
 64 |         out_C = torch.sigmoid(out_C) # size(4, 512)
 65 |         return out_C
 66 | 
 67 | # class GAN_dis_AE_patch_tar(nn.Module):
 68 | #     def __init__(self):
 69 | #         super(GAN_dis_AE_patch_tar, self).__init__()
 70 | #         # for source domain only
 71 | #         model_A_patch = [ResDis_cluster(n_in=512, n_out=512, kernel_size=3, stride=2, padding=1, w=64, h=64)]
 72 | #         self.model_A_patch = nn.Sequential(*model_A_patch)
 73 | #         self.model_A_patch.apply(gaussian_weights_init)
 74 | #
 75 | #     def forward(self, rois_features):
 76 | #         out_C = self.model_A_patch(rois_features)
 77 | #         out_C = torch.sigmoid(out_C)
 78 | #         return out_C
 79 | 
 80 | class GAN_decoder_AE(nn.Module):
 81 |     def __init__(self, params):
 82 |         super(GAN_decoder_AE, self).__init__()
 83 |         input_dim_b = params['input_dim_b']
 84 |         ch = params['ch'] # 32
 85 |         # n_gen_shared_blk = params['n_gen_shared_blk']
 86 |         n_gen_res_blk    = params['n_gen_res_blk']   # 4
 87 |         n_gen_front_blk  = params['n_gen_front_blk'] # 3
 88 |         if 'res_dropout_ratio' in params.keys():
 89 |             res_dropout_ratio = params['res_dropout_ratio']
 90 |         else:
 91 |             res_dropout_ratio = 0
 92 | 
 93 |         # self.embedding1= nn.Linear(4096, 2048, bias=None)
 94 |         # self.embedding2 = nn.Linear(4096, 2048, bias=None)
 95 |         if 'neww' in params.keys():
 96 |             neww = params['neww']
 97 |         else:
 98 |             neww = 64
 99 | 
100 |         if 'newh' in params.keys():
101 |             newh = params['newh']
102 |         else:
103 |             newh = 64
104 | 
105 |         tch = ch
106 |         decB = []
107 |         decA = []
108 |         decB += [LinUnsRes_cluster(128, neww, newh)]
109 |         decA += [LinUnsRes_cluster(128, neww, newh)]
110 | 
111 |         for i in range(0, n_gen_res_blk):
112 |             decB += [INSResBlock(tch, tch, dropout=res_dropout_ratio)]
113 |             decA += [INSResBlock(tch, tch, dropout=res_dropout_ratio)]
114 |         for i in range(0, n_gen_front_blk-1):
115 |             decB += [LeakyReLUConvTranspose2d_2(tch, tch//2, kernel_size=3, stride=1, padding=1, output_padding=0)]
116 |             decA += [LeakyReLUConvTranspose2d_2(tch, tch//2, kernel_size=3, stride=1, padding=1, output_padding=0)]
117 |             tch = tch//2
118 |         # decB += [nn.Conv2d(tch, input_dim_b, kernel_size=1, stride=1, padding=0)]
119 |         decB += [nn.ConvTranspose2d(tch, input_dim_b, kernel_size=1, stride=1, padding=0)]
120 |         decA += [nn.ConvTranspose2d(tch, input_dim_b, kernel_size=1, stride=1, padding=0)]
121 |         decB += [nn.Tanh()]
122 |         decA += [nn.Tanh()]
123 | 
124 |         # decB += [nn.LeakyReLU(inplace=True)]
125 |         # self.dec_shared = nn.Sequential(*dec_shared)
126 |         self.decode_B = nn.Sequential(*decB)
127 |         self.decode_B.apply(gaussian_weights_init)
128 |         self.decode_A = nn.Sequential(*decA)
129 |         self.decode_A.apply(gaussian_weights_init)
130 | 
131 |     def forward(self, x_aa, x_bb):
132 |         # x_aa and x_bb is 512 x 4096 ==> 512 x 64 x 64
133 |         # out = self.dec_shared(x_A)
134 |         # x_aa, x_bb = torch.split(x_A, x_A.size(0) // 2, 0)
135 |         out1 = self.decode_A(x_aa)
136 |         out2 = self.decode_B(x_bb)
137 |         # out = torch.cat((out1, out2), 0)
138 |         return out1, out2
139 | 
140 | class GAN_decoder_AE_de(nn.Module):
141 |     def __init__(self, params):
142 |         super(GAN_decoder_AE_de, self).__init__()
143 |         input_dim_b = params['input_dim_b']
144 |         ch = params['ch']  # 32
145 |         # n_gen_shared_blk = params['n_gen_shared_blk']
146 |         n_gen_res_blk = params['n_gen_res_blk']  # 3
147 |         n_gen_front_blk = params['n_gen_front_blk']  # 4
148 |         if 'res_dropout_ratio' in params.keys():
149 |             res_dropout_ratio = params['res_dropout_ratio']
150 |         else:
151 |             res_dropout_ratio = 0
152 | 
153 |         # self.embedding1= nn.Linear(4096, 2048, bias=None)
154 |         # self.embedding2 = nn.Linear(4096, 2048, bias=None)
155 |         if 'neww' in params.keys():
156 |             neww = params['neww']
157 |         else:
158 |             neww = 64
159 | 
160 |         if 'newh' in params.keys():
161 |             newh = params['newh']
162 |         else:
163 |             newh = 64
164 | 
165 |         tch = ch
166 |         decB = []
167 |         decA = []
168 |         decB += [LinUnsRes_cluster(128, neww, newh)]
169 |         decA += [LinUnsRes_cluster(128, neww, newh)]
170 | 
171 |         for i in range(0, n_gen_res_blk):
172 |             decB += [INSResBlock(tch, tch, dropout=res_dropout_ratio)]
173 |             decA += [INSResBlock(tch, tch, dropout=res_dropout_ratio)]
174 |         for i in range(0, n_gen_front_blk - 1):
175 |             decB += [LeakyReLUConvTranspose2d(tch, tch // 2, kernel_size=3, stride=2, padding=1, output_padding=1)]
176 |             decA += [LeakyReLUConvTranspose2d(tch, tch // 2, kernel_size=3, stride=2, padding=1, output_padding=1)]
177 |             tch = tch // 2
178 |         # decB += [nn.Conv2d(tch, input_dim_b, kernel_size=1, stride=1, padding=0)]
179 |         decB += [nn.ConvTranspose2d(tch, input_dim_b, kernel_size=1, stride=1, padding=0)]
180 |         decA += [nn.ConvTranspose2d(tch, input_dim_b, kernel_size=1, stride=1, padding=0)]
181 |         decB += [nn.Tanh()]
182 |         decA += [nn.Tanh()]
183 | 
184 |       # decB += [nn.LeakyReLU(inplace=True)]
185 |       # self.dec_shared = nn.Sequential(*dec_shared)
186 |         self.decode_B = nn.Sequential(*decB)
187 |         self.decode_B.apply(gaussian_weights_init)
188 |         self.decode_A = nn.Sequential(*decA)
189 |         self.decode_A.apply(gaussian_weights_init)
190 | 
191 | 
192 |     def forward(self, x_aa, x_bb):
193 |         # x_aa and x_bb is 512 x 4096 ==> 512 x 64 x 64
194 |         # out = self.dec_shared(x_A)
195 |         # x_aa, x_bb = torch.split(x_A, x_A.size(0) // 2, 0)
196 |         out1 = self.decode_A(x_aa)
197 |         out2 = self.decode_B(x_bb)
198 |         # out = torch.cat((out1, out2), 0)
199 |         return out1, out2
200 | 
201 | 
202 | 


--------------------------------------------------------------------------------
/utils/visualize_helper.py:
--------------------------------------------------------------------------------
  1 | #encoding:utf8
  2 | 
  3 | from utils import bbox_helper
  4 | try:
  5 |     from graphviz import Digraph
  6 | except Exception as e:
  7 |     print(e)
  8 | import torch
  9 | import numpy as np
 10 | import cv2
 11 | import os
 12 | 
 13 | classes = [
 14 |     '__background__',  # always index 0
 15 |     'aeroplane', 'bicycle', 'bird', 'boat',
 16 |     'bottle', 'bus', 'car', 'cat', 'chair',
 17 |     'cow', 'diningtable', 'dog', 'horse',
 18 |     'motorbike', 'person', 'pottedplant',
 19 |     'sheep', 'sofa', 'train', 'tvmonitor'
 20 | ]
 21 | 
 22 | def draw_bbox(img, bbox, color = (255,0,0)):
 23 |     box = np.array(bbox).astype(np.int32)
 24 |     return cv2.rectangle(img, tuple(box[0:2]), tuple(box[2:4]), color)
 25 | 
 26 | def draw_keypoint(img, keypoints, color = (255,0,0)):
 27 |     kpts = keypoints.reshape(-1, 2).astype(np.int32)
 28 |     for k in range(kpts.shape[0]):
 29 |         if k&1:
 30 |             cv2.circle(img, tuple(kpts[k]), 2, color, thickness=2) # left parts:blue
 31 |         else:
 32 |             cv2.circle(img, tuple(kpts[k]), 2, color[::-1], thickness=2) # right parts: red
 33 |     return img
 34 | def draw_mask(img, mask, thresh = 0.5):
 35 |     assert img.shape == mask.shape, 'img.shape:{} vs mask.shape'.format(img.shape, mask.shape)
 36 |     mask = (mask > thresh).astype(np.uint8) * 250
 37 |     img *= 0.5
 38 |     img += mask[..., np.newaxis] * 0.5
 39 |     return img
 40 | 
 41 | 
 42 | def vis_results(results_dir,image_info, bboxes, keypoints, masks, heatmap, class_names):
 43 |     from utils.debug_helper import debugger
 44 |     import logging
 45 |     logger = logging.getLogger('global')
 46 |     batch_size = len(image_info)
 47 |     if not os.path.exists(results_dir):
 48 |         os.makedirs(results_dir)
 49 |     for b_ix in range(batch_size):
 50 |         image_size = image_info[b_ix]
 51 |         keep_ix = np.where(bboxes[:, 0] == b_ix)[0]
 52 |         bbs = bboxes[keep_ix]
 53 |         kps = keypoints[keep_ix, :, :2] if keypoints else None
 54 |         msks = [masks[ix] for ix in keep_ix] if masks else None
 55 | 
 56 |         hmap = heatmap[keep_ix]
 57 |         filename = debugger.get_filename(b_ix).split('/')[-1].split('.')[0]
 58 |         for r_ix, B in enumerate(bbs):
 59 |             box_score, class_id = B[-2:]
 60 |             if box_score < 0.9:
 61 |                 continue
 62 | 
 63 |             image = debugger.get_image(b_ix).copy()
 64 |             x1, y1, x2, y2 = map(int, B[1:1+4])
 65 |             r_h = y2 - y1
 66 |             r_w = x2 - x1
 67 |             draw_bbox(image, B[1:1+4])
 68 |             category_name = class_names[int(class_id)]
 69 |             cv2.putText(image, 'category:{0}, score:{1}'.format(category_name,box_score), (100, 100), 2, 1, (0, 0, 255))
 70 |             logger.info('{0}/{1}_{2}.jpg'.format(results_dir, filename, r_ix))
 71 | 
 72 |             if kps:
 73 |                 draw_keypoint(image, kps[r_ix])
 74 |                 #for k in range(hmap.shape[1]):
 75 |                 #    hp = hmap[r_ix, k]
 76 |                 #    hp = cv2.resize(hp, (r_w, r_h)) * 250
 77 |                 #    hp[hp < 0] = 0
 78 |                 #    img = image.copy()
 79 |                 #    img[y1:y2, x1:x2, ...] *= 0.5
 80 |                 #    img[y1:y2, x1:x2, ...] += hp[..., np.newaxis] * 0.5
 81 |                 #    cv2.imwrite('{0}/{1}_{2}_{3}.jpg'.format(results_dir, filename, r_ix, k), img)
 82 |                 cv2.imwrite('{0}/{1}_{2}_keypoints.jpg'.format(results_dir, filename, r_ix), image)
 83 |                 hp = cv2.resize(np.max(hmap[r_ix], axis=0), (r_w, r_h)) * 100
 84 |                 hp[hp < 0] = 0
 85 |                 image[y1:y2, x1:x2, ...] *= 0.5
 86 |                 image[y1:y2, x1:x2, ...] += hp[..., np.newaxis] * 0.5
 87 |                 cv2.imwrite('{0}/{1}_{2}_heatmap.jpg'.format(results_dir, filename, r_ix), image)
 88 |             if msks:
 89 |                 draw_mask(image, msks[r_ix])
 90 |                 cv2.imwrite('{0}/{1}_{2}_mask.jpg'.format(results_dir, filename, r_ix), image)
 91 | 
 92 | def vis_detections(img, bboxes, gts, img_name, score_thresh):
 93 |     vis_dir = 'visualize'
 94 |     if not os.path.exists(vis_dir):
 95 |         os.makedirs(vis_dir)
 96 |     img_name = img_name.rsplit('/',1)[-1].split('.')[0]
 97 |     overlaps = bbox_helper.bbox_iou_overlaps(bboxes, gts)
 98 |     max_overlaps = overlaps.max(axis=1)
 99 |     for box_ix in range(bboxes.shape[0]):
100 |         box = bboxes[box_ix, :4].astype(np.int32)
101 |         score = bboxes[box_ix, 4]
102 |         if score < score_thresh:
103 |             continue
104 |         cls = int(bboxes[box_ix, 5])
105 |         img_cpy = img.copy()
106 |         ov = max_overlaps[box_ix]
107 |         text = 'label:%s, iou:%.3f, score:%.3f' % (classes[cls], ov, score)
108 |         cv2.putText(img_cpy, text, (30, 30), 2, 0.8, (0, 0, 255))
109 |         vis = cv2.rectangle(img_cpy, tuple(box[0:2]), tuple(box[2:4]), (255, 0, 0))
110 |         cv2.imwrite('%s/%s_%d.jpg' %(vis_dir, img_name, box_ix), vis)
111 | 
112 | def vis_batch(input, output_dir, prefix):
113 |     from utils.debug_helper import debugger
114 |     import logging
115 |     logger = logging.getLogger('global')
116 |     if not os.path.exists(output_dir):
117 |         os.makedirs(output_dir)
118 |     if torch.is_tensor(input[0]):
119 |         debugger.store_tensor_as_image(input[0])
120 | 
121 |     image_info = input[1]
122 |     gt_boxes = input[2]
123 |     ignores = input[3]
124 |     kpts = input[4]
125 |     masks = input[5]
126 |     #filenames = input[6]
127 |     B = gt_boxes.shape[0]
128 |     for b in range(B):
129 |         #image = imgs[b]
130 |         image = debugger.get_image(b)
131 |         bxs = gt_boxes[b]
132 |         #igs = ignores[b]
133 |         kts = kpts[b]
134 |         #mks = masks[b]
135 |         n = bxs.shape[0]
136 |         for ix in range(n):
137 |             img_cpy = image.copy()
138 |             draw_bbox(img_cpy, bxs[ix])
139 |             draw_keypoint(img_cpy, kts[ix])
140 |             #draw_mask(img_cpy, mks[ix])
141 |             filename = os.path.join(output_dir, '{0}_{1}_{2}.jpg'.format(prefix, b, ix))
142 |             cv2.imwrite(filename, img_cpy)
143 |         #for ix in range(igs.shape[0]):
144 |         #    img_cpy = imgs[b].copy()
145 |         #    draw_bbox(img_cpy, igs[ix], color=(0,0,255))
146 |         #    filename = os.path.join(test_dir, '{0}_{1}_{2}.jpg'.format(prefix, b, ix + n))
147 |         #    cv2.imwrite(filename, img_cpy)
148 | 
149 | def make_dot(var, params=None):
150 |     """ Produces Graphviz representation of PyTorch autograd graph
151 | 
152 |     Blue nodes are the Variables that require grad, orange are Tensors
153 |     saved for backward in torch.autograd.Function
154 | 
155 |     Args:
156 |         var: output Variable
157 |         params: dict of (name, Variable) to add names to node that
158 |             require grad (TODO: make optional)
159 |     """
160 |     if params is not None:
161 |         # assert isinstance(params.values()[0], Variable)
162 |         param_map = {id(v): k for k, v in params.items()}
163 | 
164 |     node_attr = dict(style='filled',
165 |                      shape='box',
166 |                      align='left',
167 |                      fontsize='12',
168 |                      ranksep='0.1',
169 |                      height='0.2')
170 |     dot = Digraph(node_attr=node_attr, graph_attr=dict(size="20,20"), format='svg')
171 |     seen = set()
172 | 
173 |     def size_to_str(size):
174 |         return '('+(', ').join(['%d' % v for v in size])+')'
175 | 
176 |     def add_nodes(var):
177 |         if var not in seen:
178 |             if torch.is_tensor(var):
179 |                 dot.node(str(id(var)), size_to_str(var.size()), fillcolor='orange')
180 |             elif hasattr(var, 'variable'):
181 |                 u = var.variable
182 |                 name = param_map[id(u)] if params is not None else ''
183 |                 node_name = '%s\n %s' % (name, size_to_str(u.size()))
184 |                 dot.node(str(id(var)), node_name, fillcolor='lightblue')
185 |             else:
186 |                 dot.node(str(id(var)), str(type(var).__name__))
187 |             seen.add(var)
188 |             if hasattr(var, 'next_functions'):
189 |                 for u in var.next_functions:
190 |                     if u[0] is not None:
191 |                         dot.edge(str(id(u[0])), str(id(var)))
192 |                         add_nodes(u[0])
193 |             if hasattr(var, 'saved_tensors'):
194 |                 for t in var.saved_tensors:
195 |                     dot.edge(str(id(t)), str(id(var)))
196 |                     add_nodes(t)
197 |     add_nodes(var.grad_fn)
198 |     return dot
199 | 
200 | def visualize(var, filename):
201 |     make_dot()
202 | 


--------------------------------------------------------------------------------