├── .DS_Store
├── _init_paths.py
├── cfgs
    └── snet.yml
├── demo.py
├── lib
    ├── __init__.py
    ├── build
    │   ├── lib.linux-x86_64-3.6
    │   │   └── model
    │   │   │   └── _C.cpython-36m-x86_64-linux-gnu.so
    │   └── temp.linux-x86_64-3.6
    │   │   └── mnt
    │   │       └── data1
    │   │           └── yanghuiyu
    │   │               └── project
    │   │                   └── object_detect
    │   │                       └── Thundernet_new
    │   │                           └── lib
    │   │                               └── model
    │   │                                   └── csrc
    │   │                                       ├── cpu
    │   │                                           ├── ROIAlign_cpu.o
    │   │                                           └── nms_cpu.o
    │   │                                       ├── cuda
    │   │                                           ├── ROIAlign_cuda.o
    │   │                                           ├── ROIPool_cuda.o
    │   │                                           └── nms.o
    │   │                                       └── vision.o
    ├── datasets
    │   ├── VOCdevkit-matlab-wrapper
    │   │   ├── get_voc_opts.m
    │   │   ├── voc_eval.m
    │   │   └── xVOCap.m
    │   ├── __init__.py
    │   ├── coco.py
    │   ├── ds_utils.py
    │   ├── factory.py
    │   ├── imagenet.py
    │   ├── imdb.py
    │   ├── pascal_voc.py
    │   ├── pascal_voc_rbg.py
    │   ├── tools
    │   │   └── mcg_munge.py
    │   ├── vg.py
    │   ├── vg_eval.py
    │   └── voc_eval.py
    ├── external
    │   ├── .gitignore
    │   ├── Makefile
    │   ├── __init__.py
    │   ├── nms.pyx
    │   └── setup.py
    ├── model
    │   ├── _C.cpython-36m-x86_64-linux-gnu.so
    │   ├── build
    │   │   └── lib.linux-x86_64-3.6
    │   │   │   └── model
    │   │   │       └── _C.cpython-36m-x86_64-linux-gnu.so
    │   ├── csrc
    │   │   ├── ROIAlign.h
    │   │   ├── ROIPool.h
    │   │   ├── cpu
    │   │   │   ├── ROIAlign_cpu.cpp
    │   │   │   ├── nms_cpu.cpp
    │   │   │   └── vision.h
    │   │   ├── cuda
    │   │   │   ├── ROIAlign_cuda.cu
    │   │   │   ├── ROIPool_cuda.cu
    │   │   │   ├── nms.cu
    │   │   │   └── vision.h
    │   │   ├── nms.h
    │   │   └── vision.cpp
    │   ├── faster_rcnn
    │   │   ├── Snet.py
    │   │   ├── __init__.py
    │   │   ├── faster_rcnn.py
    │   │   └── modules.py
    │   ├── loss
    │   │   ├── __init__.py
    │   │   └── losses.py
    │   ├── rpn
    │   │   ├── __init__.py
    │   │   ├── anchor_target_layer.py
    │   │   ├── bbox_transform.py
    │   │   ├── centernet_rpn.py
    │   │   ├── generate_anchors.py
    │   │   ├── proposal_layer.py
    │   │   ├── proposal_target_layer_cascade.py
    │   │   └── rpn.py
    │   └── utils
    │   │   ├── .gitignore
    │   │   ├── __init__.py
    │   │   ├── bbox.pyx
    │   │   ├── blob.py
    │   │   ├── cente_decode.py
    │   │   ├── config.py
    │   │   ├── layer_utils.py
    │   │   ├── logger.py
    │   │   └── net_utils.py
    ├── psroialign
    │   ├── PSROIAlign
    │   │   ├── .gitattributes
    │   │   ├── LICENSE
    │   │   ├── README.md
    │   │   ├── __init__.py
    │   │   ├── build.sh
    │   │   ├── build
    │   │   │   ├── lib.linux-x86_64-3.6
    │   │   │   │   └── model
    │   │   │   │   │   └── _C.cpython-36m-x86_64-linux-gnu.so
    │   │   │   └── temp.linux-x86_64-3.6
    │   │   │   │   └── mnt
    │   │   │   │       └── data1
    │   │   │   │           └── yanghuiyu
    │   │   │   │               └── myself
    │   │   │   │                   └── object_detect
    │   │   │   │                       └── light_head_rcnn
    │   │   │   │                           └── psroialign
    │   │   │   │                               └── PSROIAlign
    │   │   │   │                                   └── model
    │   │   │   │                                       └── csrc
    │   │   │   │                                           ├── cuda
    │   │   │   │                                               ├── PSROIAlign_cuda.o
    │   │   │   │                                               └── PSROIPool_cuda.o
    │   │   │   │                                           └── vision.o
    │   │   ├── model
    │   │   │   ├── _C.cpython-36m-x86_64-linux-gnu.so
    │   │   │   ├── __init__.py
    │   │   │   ├── csrc
    │   │   │   │   ├── .DS_Store
    │   │   │   │   ├── PSROIAlign.h
    │   │   │   │   ├── PSROIPool.h
    │   │   │   │   ├── cuda
    │   │   │   │   │   ├── PSROIAlign_cuda.cpp
    │   │   │   │   │   ├── PSROIAlign_cuda.cu
    │   │   │   │   │   ├── PSROIPool_cuda.cu
    │   │   │   │   │   └── vision.h
    │   │   │   │   └── vision.cpp
    │   │   │   ├── example.py
    │   │   │   └── roi_layers
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── ps_roi_align.py
    │   │   │   │   └── ps_roi_pool.py
    │   │   └── setup.py
    │   ├── __init__.py
    │   ├── pollers.py
    │   └── psroialign.py
    ├── roi_data_layer
    │   ├── __init__.py
    │   ├── augmentation.py
    │   ├── minibatch.py
    │   ├── roibatchLoader.py
    │   ├── roidb.py
    │   └── utils.py
    └── setup.py
├── onnx
    ├── __init__.py
    ├── onnx_infer.py
    ├── rcnn_head_to_onnx.py
    ├── rpn_to_onnx.py
    └── utils.py
├── readme.md
├── script
    ├── pre.sh
    ├── train_146.sh
    ├── train_49.sh
    └── train_535.sh
├── test_net.py
├── trainval_net.py
├── utils.py
├── voc_images
    ├── .DS_Store
    └── input
    │   ├── 2008_000005.jpg
    │   ├── 2008_000038.jpg
    │   └── 2008_000175.jpg
└── weights
    ├── snet_146.tar
    └── thundernet146_voc_map67.pth


/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DayBreak-u/Thundernet_Pytorch/ac359d128a44e566ba5852a830c0a2154e10edb2/.DS_Store


--------------------------------------------------------------------------------
/_init_paths.py:
--------------------------------------------------------------------------------
 1 | import os.path as osp
 2 | import sys
 3 | 
 4 | def add_path(path):
 5 |     if path not in sys.path:
 6 |         sys.path.insert(0, path)
 7 | 
 8 | this_dir = osp.dirname(__file__)
 9 | 
10 | # Add lib to PYTHONPATH
11 | lib_path = osp.join(this_dir, 'lib')
12 | add_path(lib_path)
13 | 
14 | coco_path = osp.join(this_dir, 'data', 'coco', 'PythonAPI')
15 | add_path(coco_path)
16 | 


--------------------------------------------------------------------------------
/cfgs/snet.yml:
--------------------------------------------------------------------------------
 1 | EXP_DIR: snet
 2 | TRAIN:
 3 |   HAS_RPN: True
 4 |   BBOX_NORMALIZE_TARGETS_PRECOMPUTED: True
 5 |   RPN_POSITIVE_OVERLAP: 0.7
 6 |   RPN_POST_NMS_TOP_N: 2000
 7 |   RPN_BATCHSIZE: 256
 8 |   PROPOSAL_METHOD: gt
 9 |   BG_THRESH_LO: 0.0
10 |   BG_THRESH_HI: 0.3
11 |   DISPLAY: 20
12 |   BATCH_SIZE: 128
13 |   WEIGHT_DECAY: 0.0001
14 |   DOUBLE_BIAS: False
15 |   SNAPSHOT_PREFIX: Snet
16 |   LEARNING_RATE: 0.01
17 |   SIZE: [240,320,480]
18 | TEST:
19 |   HAS_RPN: True
20 |   SIZE: 320
21 |   RPN_POST_NMS_TOP_N: 200
22 | POOLING_SIZE: 7
23 | POOLING_MODE: align
24 | CROP_RESIZE_WITH_MAX_POOL: False
25 | FEAT_STRIDE: 16
26 | 


--------------------------------------------------------------------------------
/lib/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DayBreak-u/Thundernet_Pytorch/ac359d128a44e566ba5852a830c0a2154e10edb2/lib/__init__.py


--------------------------------------------------------------------------------
/lib/build/lib.linux-x86_64-3.6/model/_C.cpython-36m-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DayBreak-u/Thundernet_Pytorch/ac359d128a44e566ba5852a830c0a2154e10edb2/lib/build/lib.linux-x86_64-3.6/model/_C.cpython-36m-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/lib/build/temp.linux-x86_64-3.6/mnt/data1/yanghuiyu/project/object_detect/Thundernet_new/lib/model/csrc/cpu/ROIAlign_cpu.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DayBreak-u/Thundernet_Pytorch/ac359d128a44e566ba5852a830c0a2154e10edb2/lib/build/temp.linux-x86_64-3.6/mnt/data1/yanghuiyu/project/object_detect/Thundernet_new/lib/model/csrc/cpu/ROIAlign_cpu.o


--------------------------------------------------------------------------------
/lib/build/temp.linux-x86_64-3.6/mnt/data1/yanghuiyu/project/object_detect/Thundernet_new/lib/model/csrc/cpu/nms_cpu.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DayBreak-u/Thundernet_Pytorch/ac359d128a44e566ba5852a830c0a2154e10edb2/lib/build/temp.linux-x86_64-3.6/mnt/data1/yanghuiyu/project/object_detect/Thundernet_new/lib/model/csrc/cpu/nms_cpu.o


--------------------------------------------------------------------------------
/lib/build/temp.linux-x86_64-3.6/mnt/data1/yanghuiyu/project/object_detect/Thundernet_new/lib/model/csrc/cuda/ROIAlign_cuda.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DayBreak-u/Thundernet_Pytorch/ac359d128a44e566ba5852a830c0a2154e10edb2/lib/build/temp.linux-x86_64-3.6/mnt/data1/yanghuiyu/project/object_detect/Thundernet_new/lib/model/csrc/cuda/ROIAlign_cuda.o


--------------------------------------------------------------------------------
/lib/build/temp.linux-x86_64-3.6/mnt/data1/yanghuiyu/project/object_detect/Thundernet_new/lib/model/csrc/cuda/ROIPool_cuda.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DayBreak-u/Thundernet_Pytorch/ac359d128a44e566ba5852a830c0a2154e10edb2/lib/build/temp.linux-x86_64-3.6/mnt/data1/yanghuiyu/project/object_detect/Thundernet_new/lib/model/csrc/cuda/ROIPool_cuda.o


--------------------------------------------------------------------------------
/lib/build/temp.linux-x86_64-3.6/mnt/data1/yanghuiyu/project/object_detect/Thundernet_new/lib/model/csrc/cuda/nms.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DayBreak-u/Thundernet_Pytorch/ac359d128a44e566ba5852a830c0a2154e10edb2/lib/build/temp.linux-x86_64-3.6/mnt/data1/yanghuiyu/project/object_detect/Thundernet_new/lib/model/csrc/cuda/nms.o


--------------------------------------------------------------------------------
/lib/build/temp.linux-x86_64-3.6/mnt/data1/yanghuiyu/project/object_detect/Thundernet_new/lib/model/csrc/vision.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DayBreak-u/Thundernet_Pytorch/ac359d128a44e566ba5852a830c0a2154e10edb2/lib/build/temp.linux-x86_64-3.6/mnt/data1/yanghuiyu/project/object_detect/Thundernet_new/lib/model/csrc/vision.o


--------------------------------------------------------------------------------
/lib/datasets/VOCdevkit-matlab-wrapper/get_voc_opts.m:
--------------------------------------------------------------------------------
 1 | function VOCopts = get_voc_opts(path)
 2 | 
 3 | tmp = pwd;
 4 | cd(path);
 5 | try
 6 |   addpath('VOCcode');
 7 |   VOCinit;
 8 | catch
 9 |   rmpath('VOCcode');
10 |   cd(tmp);
11 |   error(sprintf('VOCcode directory not found under %s', path));
12 | end
13 | rmpath('VOCcode');
14 | cd(tmp);
15 | 


--------------------------------------------------------------------------------
/lib/datasets/VOCdevkit-matlab-wrapper/voc_eval.m:
--------------------------------------------------------------------------------
 1 | function res = voc_eval(path, comp_id, test_set, output_dir)
 2 | 
 3 | VOCopts = get_voc_opts(path);
 4 | VOCopts.testset = test_set;
 5 | 
 6 | for i = 1:length(VOCopts.classes)
 7 |   cls = VOCopts.classes{i};
 8 |   res(i) = voc_eval_cls(cls, VOCopts, comp_id, output_dir);
 9 | end
10 | 
11 | fprintf('\n~~~~~~~~~~~~~~~~~~~~\n');
12 | fprintf('Results:\n');
13 | aps = [res(:).ap]';
14 | fprintf('%.1f\n', aps * 100);
15 | fprintf('%.1f\n', mean(aps) * 100);
16 | fprintf('~~~~~~~~~~~~~~~~~~~~\n');
17 | 
18 | function res = voc_eval_cls(cls, VOCopts, comp_id, output_dir)
19 | 
20 | test_set = VOCopts.testset;
21 | year = VOCopts.dataset(4:end);
22 | 
23 | addpath(fullfile(VOCopts.datadir, 'VOCcode'));
24 | 
25 | res_fn = sprintf(VOCopts.detrespath, comp_id, cls);
26 | 
27 | recall = [];
28 | prec = [];
29 | ap = 0;
30 | ap_auc = 0;
31 | 
32 | do_eval = (str2num(year) <= 2007) | ~strcmp(test_set, 'test');
33 | if do_eval
34 |   % Bug in VOCevaldet requires that tic has been called first
35 |   tic;
36 |   [recall, prec, ap] = VOCevaldet(VOCopts, comp_id, cls, true);
37 |   ap_auc = xVOCap(recall, prec);
38 | 
39 |   % force plot limits
40 |   ylim([0 1]);
41 |   xlim([0 1]);
42 | 
43 |   print(gcf, '-djpeg', '-r0', ...
44 |         [output_dir '/' cls '_pr.jpg']);
45 | end
46 | fprintf('!!! %s : %.4f %.4f\n', cls, ap, ap_auc);
47 | 
48 | res.recall = recall;
49 | res.prec = prec;
50 | res.ap = ap;
51 | res.ap_auc = ap_auc;
52 | 
53 | save([output_dir '/' cls '_pr.mat'], ...
54 |      'res', 'recall', 'prec', 'ap', 'ap_auc');
55 | 
56 | rmpath(fullfile(VOCopts.datadir, 'VOCcode'));
57 | 


--------------------------------------------------------------------------------
/lib/datasets/VOCdevkit-matlab-wrapper/xVOCap.m:
--------------------------------------------------------------------------------
 1 | function ap = xVOCap(rec,prec)
 2 | % From the PASCAL VOC 2011 devkit
 3 | 
 4 | mrec=[0 ; rec ; 1];
 5 | mpre=[0 ; prec ; 0];
 6 | for i=numel(mpre)-1:-1:1
 7 |     mpre(i)=max(mpre(i),mpre(i+1));
 8 | end
 9 | i=find(mrec(2:end)~=mrec(1:end-1))+1;
10 | ap=sum((mrec(i)-mrec(i-1)).*mpre(i));
11 | 


--------------------------------------------------------------------------------
/lib/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 | 


--------------------------------------------------------------------------------
/lib/datasets/ds_utils.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast/er R-CNN
 3 | # Licensed under The MIT License [see LICENSE for details]
 4 | # Written by Ross Girshick
 5 | # --------------------------------------------------------
 6 | from __future__ import absolute_import
 7 | from __future__ import division
 8 | from __future__ import print_function
 9 | 
10 | import numpy as np
11 | 
12 | 
13 | def unique_boxes(boxes, scale=1.0):
14 |   """Return indices of unique boxes."""
15 |   v = np.array([1, 1e3, 1e6, 1e9])
16 |   hashes = np.round(boxes * scale).dot(v)
17 |   _, index = np.unique(hashes, return_index=True)
18 |   return np.sort(index)
19 | 
20 | 
21 | def xywh_to_xyxy(boxes):
22 |   """Convert [x y w h] box format to [x1 y1 x2 y2] format."""
23 |   return np.hstack((boxes[:, 0:2], boxes[:, 0:2] + boxes[:, 2:4] - 1))
24 | 
25 | 
26 | def xyxy_to_xywh(boxes):
27 |   """Convert [x1 y1 x2 y2] box format to [x y w h] format."""
28 |   return np.hstack((boxes[:, 0:2], boxes[:, 2:4] - boxes[:, 0:2] + 1))
29 | 
30 | 
31 | def validate_boxes(boxes, width=0, height=0):
32 |   """Check that a set of boxes are valid."""
33 |   x1 = boxes[:, 0]
34 |   y1 = boxes[:, 1]
35 |   x2 = boxes[:, 2]
36 |   y2 = boxes[:, 3]
37 |   assert (x1 >= 0).all()
38 |   assert (y1 >= 0).all()
39 |   assert (x2 >= x1).all()
40 |   assert (y2 >= y1).all()
41 |   assert (x2 < width).all()
42 |   assert (y2 < height).all()
43 | 
44 | 
45 | def filter_small_boxes(boxes, min_size):
46 |   w = boxes[:, 2] - boxes[:, 0]
47 |   h = boxes[:, 3] - boxes[:, 1]
48 |   keep = np.where((w >= min_size) & (h > min_size))[0]
49 |   return keep
50 | 


--------------------------------------------------------------------------------
/lib/datasets/factory.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | """Factory method for easily getting imdbs by name."""
 9 | from __future__ import absolute_import
10 | from __future__ import division
11 | from __future__ import print_function
12 | 
13 | __sets = {}
14 | from datasets.pascal_voc import pascal_voc
15 | from datasets.coco import coco
16 | 
17 | 
18 | for year in ['2007', '2012']:
19 |   for split in ['train', 'val', 'trainval', 'test']:
20 |     name = 'voc_{}_{}'.format(year, split)
21 |     __sets[name] = (lambda split=split, year=year: pascal_voc(split, year))
22 | 
23 | # Set up coco_2017_<split>
24 | 
25 | for year in ['2017']:
26 |   for split in ['train', 'val']:
27 |     name = 'coco_{}_{}'.format(year, split)
28 |     __sets[name] = (lambda split=split, year=year: coco(split, year))
29 | 
30 | # Set up vg_<split>
31 | # for version in ['1600-400-20']:
32 | #     for split in ['minitrain', 'train', 'minival', 'val', 'test']:
33 | #         name = 'vg_{}_{}'.format(version,split)
34 | #         __sets[name] = (lambda split=split, version=version: vg(version, split))
35 | #
36 | # for version in ['150-50-20', '150-50-50', '500-150-80', '750-250-150', '1750-700-450', '1600-400-20']:
37 | #     for split in ['minitrain', 'smalltrain', 'train', 'minival', 'smallval', 'val', 'test']:
38 | #         name = 'vg_{}_{}'.format(version,split)
39 | #         __sets[name] = (lambda split=split, version=version: vg(version, split))
40 | #
41 | # # set up image net.
42 | # for split in ['train', 'val', 'val1', 'val2', 'test']:
43 | #     name = 'imagenet_{}'.format(split)
44 | #     devkit_path = 'data/imagenet/ILSVRC/devkit'
45 | #     data_path = 'data/imagenet/ILSVRC'
46 | #     __sets[name] = (lambda split=split, devkit_path=devkit_path, data_path=data_path: imagenet(split,devkit_path,data_path))
47 | 
48 | def get_imdb(name):
49 |   """Get an imdb (image database) by name."""
50 |   # if name not in __sets:
51 |   #   raise KeyError('Unknown dataset: {}'.format(name))
52 |   return __sets[name]()
53 | 
54 | 
55 | def list_imdbs():
56 |   """List all registered imdbs."""
57 |   return list(__sets.keys())
58 | 


--------------------------------------------------------------------------------
/lib/datasets/imagenet.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | # --------------------------------------------------------
  3 | # Fast R-CNN
  4 | # Copyright (c) 2015 Microsoft
  5 | # Licensed under The MIT License [see LICENSE for details]
  6 | # Written by Ross Girshick
  7 | # --------------------------------------------------------
  8 | 
  9 | import datasets
 10 | import datasets.imagenet
 11 | import os, sys
 12 | from datasets.imdb import imdb
 13 | import xml.dom.minidom as minidom
 14 | import numpy as np
 15 | import scipy.sparse
 16 | import scipy.io as sio
 17 | import subprocess
 18 | import pdb
 19 | import pickle
 20 | try:
 21 |     xrange          # Python 2
 22 | except NameError:
 23 |     xrange = range  # Python 3
 24 | 
 25 | 
 26 | class imagenet(imdb):
 27 |     def __init__(self, image_set, devkit_path, data_path):
 28 |         imdb.__init__(self, image_set)
 29 |         self._image_set = image_set
 30 |         self._devkit_path = devkit_path
 31 |         self._data_path = data_path
 32 |         synsets_image = sio.loadmat(os.path.join(self._devkit_path, 'data', 'meta_det.mat'))
 33 |         synsets_video = sio.loadmat(os.path.join(self._devkit_path, 'data', 'meta_vid.mat'))
 34 |         self._classes_image = ('__background__',)
 35 |         self._wnid_image = (0,)
 36 | 
 37 |         self._classes = ('__background__',)
 38 |         self._wnid = (0,)
 39 | 
 40 |         for i in xrange(200):
 41 |             self._classes_image = self._classes_image + (synsets_image['synsets'][0][i][2][0],)
 42 |             self._wnid_image = self._wnid_image + (synsets_image['synsets'][0][i][1][0],)
 43 | 
 44 |         for i in xrange(30):
 45 |             self._classes = self._classes + (synsets_video['synsets'][0][i][2][0],)
 46 |             self._wnid = self._wnid + (synsets_video['synsets'][0][i][1][0],)
 47 | 
 48 |         self._wnid_to_ind_image = dict(zip(self._wnid_image, xrange(201)))
 49 |         self._class_to_ind_image = dict(zip(self._classes_image, xrange(201)))
 50 | 
 51 |         self._wnid_to_ind = dict(zip(self._wnid, xrange(31)))
 52 |         self._class_to_ind = dict(zip(self._classes, xrange(31)))
 53 | 
 54 |         #check for valid intersection between video and image classes
 55 |         self._valid_image_flag = [0]*201
 56 | 
 57 |         for i in range(1,201):
 58 |             if self._wnid_image[i] in self._wnid_to_ind:
 59 |                 self._valid_image_flag[i] = 1
 60 | 
 61 |         self._image_ext = ['.JPEG']
 62 | 
 63 |         self._image_index = self._load_image_set_index()
 64 |         # Default to roidb handler
 65 |         self._roidb_handler = self.gt_roidb
 66 | 
 67 |         # Specific config options
 68 |         self.config = {'cleanup'  : True,
 69 |                        'use_salt' : True,
 70 |                        'top_k'    : 2000}
 71 | 
 72 |         assert os.path.exists(self._devkit_path), 'Devkit path does not exist: {}'.format(self._devkit_path)
 73 |         assert os.path.exists(self._data_path), 'Path does not exist: {}'.format(self._data_path)
 74 | 
 75 |     def image_path_at(self, i):
 76 |         """
 77 |         Return the absolute path to image i in the image sequence.
 78 |         """
 79 |         return self.image_path_from_index(self._image_index[i])
 80 | 
 81 |     def image_path_from_index(self, index):
 82 |         """
 83 |         Construct an image path from the image's "index" identifier.
 84 |         """
 85 |         image_path = os.path.join(self._data_path, 'Data', self._image_set, index + self._image_ext[0])
 86 |         assert os.path.exists(image_path), 'path does not exist: {}'.format(image_path)
 87 |         return image_path
 88 | 
 89 |     def _load_image_set_index(self):
 90 |         """
 91 |         Load the indexes listed in this dataset's image set file.
 92 |         """
 93 |         # Example path to image set file:
 94 |         # self._data_path + /ImageSets/val.txt
 95 | 
 96 |         if self._image_set == 'train':
 97 |             image_set_file = os.path.join(self._data_path, 'ImageSets', 'trainr.txt')
 98 |             image_index = []
 99 |             if os.path.exists(image_set_file):
100 |                 f = open(image_set_file, 'r')
101 |                 data = f.read().split()
102 |                 for lines in data:
103 |                     if lines != '':
104 |                         image_index.append(lines)
105 |                 f.close()
106 |                 return image_index
107 | 
108 |             for i in range(1,200):
109 |                 print(i)
110 |                 image_set_file = os.path.join(self._data_path, 'ImageSets', 'DET', 'train_' + str(i) + '.txt')
111 |                 with open(image_set_file) as f:
112 |                     tmp_index = [x.strip() for x in f.readlines()]
113 |                     vtmp_index = []
114 |                     for line in tmp_index:
115 |                         line = line.split(' ')
116 |                         image_list = os.popen('ls ' + self._data_path + '/Data/DET/train/' + line[0] + '/*.JPEG').read().split()
117 |                         tmp_list = []
118 |                         for imgs in image_list:
119 |                             tmp_list.append(imgs[:-5])
120 |                         vtmp_index = vtmp_index + tmp_list
121 | 
122 |                 num_lines = len(vtmp_index)
123 |                 ids = np.random.permutation(num_lines)
124 |                 count = 0
125 |                 while count < 2000:
126 |                     image_index.append(vtmp_index[ids[count % num_lines]])
127 |                     count = count + 1
128 | 
129 |             for i in range(1,201):
130 |                 if self._valid_image_flag[i] == 1:
131 |                     image_set_file = os.path.join(self._data_path, 'ImageSets', 'train_pos_' + str(i) + '.txt')
132 |                     with open(image_set_file) as f:
133 |                         tmp_index = [x.strip() for x in f.readlines()]
134 |                     num_lines = len(tmp_index)
135 |                     ids = np.random.permutation(num_lines)
136 |                     count = 0
137 |                     while count < 2000:
138 |                         image_index.append(tmp_index[ids[count % num_lines]])
139 |                         count = count + 1
140 |             image_set_file = os.path.join(self._data_path, 'ImageSets', 'trainr.txt')
141 |             f = open(image_set_file, 'w')
142 |             for lines in image_index:
143 |                 f.write(lines + '\n')
144 |             f.close()
145 |         else:
146 |             image_set_file = os.path.join(self._data_path, 'ImageSets', 'val.txt')
147 |             with open(image_set_file) as f:
148 |                 image_index = [x.strip() for x in f.readlines()]
149 |         return image_index
150 | 
151 |     def gt_roidb(self):
152 |         """
153 |         Return the database of ground-truth regions of interest.
154 |         This function loads/saves from/to a cache file to speed up future calls.
155 |         """
156 |         cache_file = os.path.join(self.cache_path, self.name + '_gt_roidb.pkl')
157 |         if os.path.exists(cache_file):
158 |             with open(cache_file, 'rb') as fid:
159 |                 roidb = pickle.load(fid)
160 |             print('{} gt roidb loaded from {}'.format(self.name, cache_file))
161 |             return roidb
162 | 
163 |         gt_roidb = [self._load_imagenet_annotation(index)
164 |                     for index in self.image_index]
165 |         with open(cache_file, 'wb') as fid:
166 |             pickle.dump(gt_roidb, fid, pickle.HIGHEST_PROTOCOL)
167 |         print('wrote gt roidb to {}'.format(cache_file))
168 | 
169 |         return gt_roidb
170 | 
171 | 
172 |     def _load_imagenet_annotation(self, index):
173 |         """
174 |         Load image and bounding boxes info from txt files of imagenet.
175 |         """
176 |         filename = os.path.join(self._data_path, 'Annotations', self._image_set, index + '.xml')
177 | 
178 |         # print 'Loading: {}'.format(filename)
179 |         def get_data_from_tag(node, tag):
180 |             return node.getElementsByTagName(tag)[0].childNodes[0].data
181 | 
182 |         with open(filename) as f:
183 |             data = minidom.parseString(f.read())
184 | 
185 |         objs = data.getElementsByTagName('object')
186 |         num_objs = len(objs)
187 | 
188 |         boxes = np.zeros((num_objs, 4), dtype=np.uint16)
189 |         gt_classes = np.zeros((num_objs), dtype=np.int32)
190 |         overlaps = np.zeros((num_objs, self.num_classes), dtype=np.float32)
191 | 
192 |         # Load object bounding boxes into a data frame.
193 |         for ix, obj in enumerate(objs):
194 |             x1 = float(get_data_from_tag(obj, 'xmin'))
195 |             y1 = float(get_data_from_tag(obj, 'ymin'))
196 |             x2 = float(get_data_from_tag(obj, 'xmax'))
197 |             y2 = float(get_data_from_tag(obj, 'ymax'))
198 |             cls = self._wnid_to_ind[
199 |                     str(get_data_from_tag(obj, "name")).lower().strip()]
200 |             boxes[ix, :] = [x1, y1, x2, y2]
201 |             gt_classes[ix] = cls
202 |             overlaps[ix, cls] = 1.0
203 | 
204 |         overlaps = scipy.sparse.csr_matrix(overlaps)
205 | 
206 |         return {'boxes' : boxes,
207 |                 'gt_classes': gt_classes,
208 |                 'gt_overlaps' : overlaps,
209 |                 'flipped' : False}
210 | 
211 | if __name__ == '__main__':
212 |     d = datasets.imagenet('val', '')
213 |     res = d.roidb
214 |     from IPython import embed; embed()
215 | 


--------------------------------------------------------------------------------
/lib/datasets/tools/mcg_munge.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | import os
 3 | import sys
 4 | 
 5 | """Hacky tool to convert file system layout of MCG boxes downloaded from
 6 | http://www.eecs.berkeley.edu/Research/Projects/CS/vision/grouping/mcg/
 7 | so that it's consistent with those computed by Jan Hosang (see:
 8 | http://www.mpi-inf.mpg.de/departments/computer-vision-and-multimodal-
 9 |   computing/research/object-recognition-and-scene-understanding/how-
10 |   good-are-detection-proposals-really/)
11 | 
12 | NB: Boxes from the MCG website are in (y1, x1, y2, x2) order.
13 | Boxes from Hosang et al. are in (x1, y1, x2, y2) order.
14 | """
15 | 
16 | def munge(src_dir):
17 |     # stored as: ./MCG-COCO-val2014-boxes/COCO_val2014_000000193401.mat
18 |     # want:      ./MCG/mat/COCO_val2014_0/COCO_val2014_000000141/COCO_val2014_000000141334.mat
19 | 
20 |     files = os.listdir(src_dir)
21 |     for fn in files:
22 |         base, ext = os.path.splitext(fn)
23 |         # first 14 chars / first 22 chars / all chars + .mat
24 |         # COCO_val2014_0/COCO_val2014_000000447/COCO_val2014_000000447991.mat
25 |         first = base[:14]
26 |         second = base[:22]
27 |         dst_dir = os.path.join('MCG', 'mat', first, second)
28 |         if not os.path.exists(dst_dir):
29 |             os.makedirs(dst_dir)
30 |         src = os.path.join(src_dir, fn)
31 |         dst = os.path.join(dst_dir, fn)
32 |         print('MV: {} -> {}'.format(src, dst))
33 |         os.rename(src, dst)
34 | 
35 | if __name__ == '__main__':
36 |     # src_dir should look something like:
37 |     #  src_dir = 'MCG-COCO-val2014-boxes'
38 |     src_dir = sys.argv[1]
39 |     munge(src_dir)
40 | 


--------------------------------------------------------------------------------
/lib/datasets/vg_eval.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | # --------------------------------------------------------
  3 | # Fast/er R-CNN
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Bharath Hariharan
  6 | # --------------------------------------------------------
  7 | 
  8 | import xml.etree.ElementTree as ET
  9 | import os
 10 | import numpy as np
 11 | from .voc_eval import voc_ap
 12 | 
 13 | def vg_eval( detpath,
 14 |              gt_roidb,
 15 |              image_index,
 16 |              classindex,
 17 |              ovthresh=0.5,
 18 |              use_07_metric=False,
 19 |              eval_attributes=False):
 20 |     """rec, prec, ap, sorted_scores, npos = voc_eval(
 21 |                                 detpath, 
 22 |                                 gt_roidb,
 23 |                                 image_index,
 24 |                                 classindex,
 25 |                                 [ovthresh],
 26 |                                 [use_07_metric])
 27 | 
 28 |     Top level function that does the Visual Genome evaluation.
 29 | 
 30 |     detpath: Path to detections
 31 |     gt_roidb: List of ground truth structs.
 32 |     image_index: List of image ids.
 33 |     classindex: Category index
 34 |     [ovthresh]: Overlap threshold (default = 0.5)
 35 |     [use_07_metric]: Whether to use VOC07's 11 point AP computation
 36 |         (default False)
 37 |     """
 38 |     # extract gt objects for this class
 39 |     class_recs = {}
 40 |     npos = 0
 41 |     for item,imagename in zip(gt_roidb,image_index):
 42 |         if eval_attributes:
 43 |             bbox = item['boxes'][np.where(np.any(item['gt_attributes'].toarray() == classindex, axis=1))[0], :]
 44 |         else:
 45 |             bbox = item['boxes'][np.where(item['gt_classes'] == classindex)[0], :]
 46 |         difficult = np.zeros((bbox.shape[0],)).astype(np.bool)
 47 |         det = [False] * bbox.shape[0]
 48 |         npos = npos + sum(~difficult)        
 49 |         class_recs[str(imagename)] = {'bbox': bbox,
 50 |                                  'difficult': difficult,
 51 |                                  'det': det}
 52 |     if npos == 0:
 53 |         # No ground truth examples
 54 |         return 0,0,0,0,npos
 55 | 
 56 |     # read dets
 57 |     with open(detpath, 'r') as f:
 58 |         lines = f.readlines()
 59 |     if len(lines) == 0:
 60 |         # No detection examples
 61 |         return 0,0,0,0,npos
 62 | 
 63 |     splitlines = [x.strip().split(' ') for x in lines]
 64 |     image_ids = [x[0] for x in splitlines]
 65 |     confidence = np.array([float(x[1]) for x in splitlines])
 66 |     BB = np.array([[float(z) for z in x[2:]] for x in splitlines])
 67 | 
 68 |     # sort by confidence
 69 |     sorted_ind = np.argsort(-confidence)
 70 |     sorted_scores = -np.sort(-confidence)
 71 |     BB = BB[sorted_ind, :]
 72 |     image_ids = [image_ids[x] for x in sorted_ind]
 73 | 
 74 |     # go down dets and mark TPs and FPs
 75 |     nd = len(image_ids)
 76 |     tp = np.zeros(nd)
 77 |     fp = np.zeros(nd)
 78 |     for d in range(nd):
 79 |         R = class_recs[image_ids[d]]
 80 |         bb = BB[d, :].astype(float)
 81 |         ovmax = -np.inf
 82 |         BBGT = R['bbox'].astype(float)
 83 | 
 84 |         if BBGT.size > 0:
 85 |             # compute overlaps
 86 |             # intersection
 87 |             ixmin = np.maximum(BBGT[:, 0], bb[0])
 88 |             iymin = np.maximum(BBGT[:, 1], bb[1])
 89 |             ixmax = np.minimum(BBGT[:, 2], bb[2])
 90 |             iymax = np.minimum(BBGT[:, 3], bb[3])
 91 |             iw = np.maximum(ixmax - ixmin + 1., 0.)
 92 |             ih = np.maximum(iymax - iymin + 1., 0.)
 93 |             inters = iw * ih
 94 | 
 95 |             # union
 96 |             uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) +
 97 |                    (BBGT[:, 2] - BBGT[:, 0] + 1.) *
 98 |                    (BBGT[:, 3] - BBGT[:, 1] + 1.) - inters)
 99 | 
100 |             overlaps = inters / uni
101 |             ovmax = np.max(overlaps)
102 |             jmax = np.argmax(overlaps)
103 | 
104 |         if ovmax > ovthresh:
105 |             if not R['difficult'][jmax]:
106 |                 if not R['det'][jmax]:
107 |                     tp[d] = 1.
108 |                     R['det'][jmax] = 1
109 |                 else:
110 |                     fp[d] = 1.
111 |         else:
112 |             fp[d] = 1.
113 | 
114 |     # compute precision recall
115 |     fp = np.cumsum(fp)
116 |     tp = np.cumsum(tp)
117 |     rec = tp / float(npos)
118 |     # avoid divide by zero in case the first detection matches a difficult
119 |     # ground truth
120 |     prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps)
121 |     ap = voc_ap(rec, prec, use_07_metric)
122 |     
123 |     return rec, prec, ap, sorted_scores, npos
124 | 


--------------------------------------------------------------------------------
/lib/datasets/voc_eval.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Fast/er R-CNN
  3 | # Licensed under The MIT License [see LICENSE for details]
  4 | # Written by Bharath Hariharan
  5 | # --------------------------------------------------------
  6 | from __future__ import absolute_import
  7 | from __future__ import division
  8 | from __future__ import print_function
  9 | 
 10 | import xml.etree.ElementTree as ET
 11 | import os
 12 | import pickle
 13 | import numpy as np
 14 | 
 15 | def parse_rec(filename):
 16 |   """ Parse a PASCAL VOC xml file """
 17 |   tree = ET.parse(filename)
 18 |   objects = []
 19 |   for obj in tree.findall('object'):
 20 |     obj_struct = {}
 21 |     obj_struct['name'] = obj.find('name').text
 22 |     obj_struct['pose'] = obj.find('pose').text
 23 |     obj_struct['truncated'] = int(obj.find('truncated').text)
 24 |     obj_struct['difficult'] = int(obj.find('difficult').text)
 25 |     bbox = obj.find('bndbox')
 26 |     obj_struct['bbox'] = [int(bbox.find('xmin').text),
 27 |                           int(bbox.find('ymin').text),
 28 |                           int(bbox.find('xmax').text),
 29 |                           int(bbox.find('ymax').text)]
 30 |     objects.append(obj_struct)
 31 | 
 32 |   return objects
 33 | 
 34 | 
 35 | def voc_ap(rec, prec, use_07_metric=False):
 36 |   """ ap = voc_ap(rec, prec, [use_07_metric])
 37 |   Compute VOC AP given precision and recall.
 38 |   If use_07_metric is true, uses the
 39 |   VOC 07 11 point method (default:False).
 40 |   """
 41 |   if use_07_metric:
 42 |     # 11 point metric
 43 |     ap = 0.
 44 |     for t in np.arange(0., 1.1, 0.1):
 45 |       if np.sum(rec >= t) == 0:
 46 |         p = 0
 47 |       else:
 48 |         p = np.max(prec[rec >= t])
 49 |       ap = ap + p / 11.
 50 |   else:
 51 |     # correct AP calculation
 52 |     # first append sentinel values at the end
 53 |     mrec = np.concatenate(([0.], rec, [1.]))
 54 |     mpre = np.concatenate(([0.], prec, [0.]))
 55 | 
 56 |     # compute the precision envelope
 57 |     for i in range(mpre.size - 1, 0, -1):
 58 |       mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
 59 | 
 60 |     # to calculate area under PR curve, look for points
 61 |     # where X axis (recall) changes value
 62 |     i = np.where(mrec[1:] != mrec[:-1])[0]
 63 | 
 64 |     # and sum (\Delta recall) * prec
 65 |     ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
 66 |   return ap
 67 | 
 68 | 
 69 | def voc_eval(detpath,
 70 |              annopath,
 71 |              imagesetfile,
 72 |              classname,
 73 |              cachedir,
 74 |              ovthresh=0.5,
 75 |              use_07_metric=False):
 76 |   """rec, prec, ap = voc_eval(detpath,
 77 |                               annopath,
 78 |                               imagesetfile,
 79 |                               classname,
 80 |                               [ovthresh],
 81 |                               [use_07_metric])
 82 | 
 83 |   Top level function that does the PASCAL VOC evaluation.
 84 | 
 85 |   detpath: Path to detections
 86 |       detpath.format(classname) should produce the detection results file.
 87 |   annopath: Path to annotations
 88 |       annopath.format(imagename) should be the xml annotations file.
 89 |   imagesetfile: Text file containing the list of images, one image per line.
 90 |   classname: Category name (duh)
 91 |   cachedir: Directory for caching the annotations
 92 |   [ovthresh]: Overlap threshold (default = 0.5)
 93 |   [use_07_metric]: Whether to use VOC07's 11 point AP computation
 94 |       (default False)
 95 |   """
 96 |   # assumes detections are in detpath.format(classname)
 97 |   # assumes annotations are in annopath.format(imagename)
 98 |   # assumes imagesetfile is a text file with each line an image name
 99 |   # cachedir caches the annotations in a pickle file
100 | 
101 |   # first load gt
102 |   if not os.path.isdir(cachedir):
103 |     os.mkdir(cachedir)
104 |   cachefile = os.path.join(cachedir, '%s_annots.pkl' % imagesetfile)
105 |   # read list of images
106 |   with open(imagesetfile, 'r') as f:
107 |     lines = f.readlines()
108 |   imagenames = [x.strip() for x in lines]
109 | 
110 |   if not os.path.isfile(cachefile):
111 |     # load annotations
112 |     recs = {}
113 |     for i, imagename in enumerate(imagenames):
114 |       recs[imagename] = parse_rec(annopath.format(imagename))
115 |       if i % 100 == 0:
116 |         print('Reading annotation for {:d}/{:d}'.format(
117 |           i + 1, len(imagenames)))
118 |     # save
119 |     print('Saving cached annotations to {:s}'.format(cachefile))
120 |     with open(cachefile, 'wb') as f:
121 |       pickle.dump(recs, f)
122 |   else:
123 |     # load
124 |     with open(cachefile, 'rb') as f:
125 |       try:
126 |         recs = pickle.load(f)
127 |       except:
128 |         recs = pickle.load(f, encoding='bytes')
129 | 
130 |   # extract gt objects for this class
131 |   class_recs = {}
132 |   npos = 0
133 |   for imagename in imagenames:
134 |     R = [obj for obj in recs[imagename] if obj['name'] == classname]
135 |     bbox = np.array([x['bbox'] for x in R])
136 |     difficult = np.array([x['difficult'] for x in R]).astype(np.bool)
137 |     det = [False] * len(R)
138 |     npos = npos + sum(~difficult)
139 |     class_recs[imagename] = {'bbox': bbox,
140 |                              'difficult': difficult,
141 |                              'det': det}
142 | 
143 |   # read dets
144 |   detfile = detpath.format(classname)
145 |   with open(detfile, 'r') as f:
146 |     lines = f.readlines()
147 | 
148 |   splitlines = [x.strip().split(' ') for x in lines]
149 |   image_ids = [x[0] for x in splitlines]
150 |   confidence = np.array([float(x[1]) for x in splitlines])
151 |   BB = np.array([[float(z) for z in x[2:]] for x in splitlines])
152 | 
153 |   nd = len(image_ids)
154 |   tp = np.zeros(nd)
155 |   fp = np.zeros(nd)
156 | 
157 |   if BB.shape[0] > 0:
158 |     # sort by confidence
159 |     sorted_ind = np.argsort(-confidence)
160 |     sorted_scores = np.sort(-confidence)
161 |     BB = BB[sorted_ind, :]
162 |     image_ids = [image_ids[x] for x in sorted_ind]
163 | 
164 |     # go down dets and mark TPs and FPs
165 |     for d in range(nd):
166 |       R = class_recs[image_ids[d]]
167 |       bb = BB[d, :].astype(float)
168 |       ovmax = -np.inf
169 |       BBGT = R['bbox'].astype(float)
170 | 
171 |       if BBGT.size > 0:
172 |         # compute overlaps
173 |         # intersection
174 |         ixmin = np.maximum(BBGT[:, 0], bb[0])
175 |         iymin = np.maximum(BBGT[:, 1], bb[1])
176 |         ixmax = np.minimum(BBGT[:, 2], bb[2])
177 |         iymax = np.minimum(BBGT[:, 3], bb[3])
178 |         iw = np.maximum(ixmax - ixmin + 1., 0.)
179 |         ih = np.maximum(iymax - iymin + 1., 0.)
180 |         inters = iw * ih
181 | 
182 |         # union
183 |         uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) +
184 |                (BBGT[:, 2] - BBGT[:, 0] + 1.) *
185 |                (BBGT[:, 3] - BBGT[:, 1] + 1.) - inters)
186 | 
187 |         overlaps = inters / uni
188 |         ovmax = np.max(overlaps)
189 |         jmax = np.argmax(overlaps)
190 | 
191 |       if ovmax > ovthresh:
192 |         if not R['difficult'][jmax]:
193 |           if not R['det'][jmax]:
194 |             tp[d] = 1.
195 |             R['det'][jmax] = 1
196 |           else:
197 |             fp[d] = 1.
198 |       else:
199 |         fp[d] = 1.
200 | 
201 |   # compute precision recall
202 |   fp = np.cumsum(fp)
203 |   tp = np.cumsum(tp)
204 |   rec = tp / float(npos)
205 |   # avoid divide by zero in case the first detection matches a difficult
206 |   # ground truth
207 |   prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps)
208 |   ap = voc_ap(rec, prec, use_07_metric)
209 | 
210 |   return rec, prec, ap
211 | 


--------------------------------------------------------------------------------
/lib/external/.gitignore:
--------------------------------------------------------------------------------
1 | bbox.c
2 | bbox.cpython-35m-x86_64-linux-gnu.so
3 | bbox.cpython-36m-x86_64-linux-gnu.so
4 | 
5 | nms.c
6 | nms.cpython-35m-x86_64-linux-gnu.so
7 | nms.cpython-36m-x86_64-linux-gnu.so
8 | 


--------------------------------------------------------------------------------
/lib/external/Makefile:
--------------------------------------------------------------------------------
1 | all:
2 | 	python setup.py build_ext --inplace
3 | 	rm -rf build
4 | 


--------------------------------------------------------------------------------
/lib/external/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DayBreak-u/Thundernet_Pytorch/ac359d128a44e566ba5852a830c0a2154e10edb2/lib/external/__init__.py


--------------------------------------------------------------------------------
/lib/external/setup.py:
--------------------------------------------------------------------------------
 1 | import numpy
 2 | from distutils.core import setup
 3 | from distutils.extension import Extension
 4 | from Cython.Build import cythonize
 5 | 
 6 | extensions = [
 7 |     Extension(
 8 |         "nms", 
 9 |         ["nms.pyx"],
10 |         extra_compile_args=["-Wno-cpp", "-Wno-unused-function"]
11 |     )
12 | ]
13 | 
14 | setup(
15 |     name="coco",
16 |     ext_modules=cythonize(extensions),
17 |     include_dirs=[numpy.get_include()]
18 | )
19 | 


--------------------------------------------------------------------------------
/lib/model/_C.cpython-36m-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DayBreak-u/Thundernet_Pytorch/ac359d128a44e566ba5852a830c0a2154e10edb2/lib/model/_C.cpython-36m-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/lib/model/build/lib.linux-x86_64-3.6/model/_C.cpython-36m-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DayBreak-u/Thundernet_Pytorch/ac359d128a44e566ba5852a830c0a2154e10edb2/lib/model/build/lib.linux-x86_64-3.6/model/_C.cpython-36m-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/lib/model/csrc/ROIAlign.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #pragma once
 3 | 
 4 | #include "cpu/vision.h"
 5 | 
 6 | #ifdef WITH_CUDA
 7 | #include "cuda/vision.h"
 8 | #endif
 9 | 
10 | // Interface for Python
11 | at::Tensor ROIAlign_forward(const at::Tensor& input,
12 |                             const at::Tensor& rois,
13 |                             const float spatial_scale,
14 |                             const int pooled_height,
15 |                             const int pooled_width,
16 |                             const int sampling_ratio) {
17 |   if (input.type().is_cuda()) {
18 | #ifdef WITH_CUDA
19 |     return ROIAlign_forward_cuda(input, rois, spatial_scale, pooled_height, pooled_width, sampling_ratio);
20 | #else
21 |     AT_ERROR("Not compiled with GPU support");
22 | #endif
23 |   }
24 |   return ROIAlign_forward_cpu(input, rois, spatial_scale, pooled_height, pooled_width, sampling_ratio);
25 | }
26 | 
27 | at::Tensor ROIAlign_backward(const at::Tensor& grad,
28 |                              const at::Tensor& rois,
29 |                              const float spatial_scale,
30 |                              const int pooled_height,
31 |                              const int pooled_width,
32 |                              const int batch_size,
33 |                              const int channels,
34 |                              const int height,
35 |                              const int width,
36 |                              const int sampling_ratio) {
37 |   if (grad.type().is_cuda()) {
38 | #ifdef WITH_CUDA
39 |     return ROIAlign_backward_cuda(grad, rois, spatial_scale, pooled_height, pooled_width, batch_size, channels, height, width, sampling_ratio);
40 | #else
41 |     AT_ERROR("Not compiled with GPU support");
42 | #endif
43 |   }
44 |   AT_ERROR("Not implemented on the CPU");
45 | }
46 | 
47 | 


--------------------------------------------------------------------------------
/lib/model/csrc/ROIPool.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #pragma once
 3 | 
 4 | #include "cpu/vision.h"
 5 | 
 6 | #ifdef WITH_CUDA
 7 | #include "cuda/vision.h"
 8 | #endif
 9 | 
10 | 
11 | std::tuple<at::Tensor, at::Tensor> ROIPool_forward(const at::Tensor& input,
12 |                                 const at::Tensor& rois,
13 |                                 const float spatial_scale,
14 |                                 const int pooled_height,
15 |                                 const int pooled_width) {
16 |   if (input.type().is_cuda()) {
17 | #ifdef WITH_CUDA
18 |     return ROIPool_forward_cuda(input, rois, spatial_scale, pooled_height, pooled_width);
19 | #else
20 |     AT_ERROR("Not compiled with GPU support");
21 | #endif
22 |   }
23 |   AT_ERROR("Not implemented on the CPU");
24 | }
25 | 
26 | at::Tensor ROIPool_backward(const at::Tensor& grad,
27 |                                  const at::Tensor& input,
28 |                                  const at::Tensor& rois,
29 |                                  const at::Tensor& argmax,
30 |                                  const float spatial_scale,
31 |                                  const int pooled_height,
32 |                                  const int pooled_width,
33 |                                  const int batch_size,
34 |                                  const int channels,
35 |                                  const int height,
36 |                                  const int width) {
37 |   if (grad.type().is_cuda()) {
38 | #ifdef WITH_CUDA
39 |     return ROIPool_backward_cuda(grad, input, rois, argmax, spatial_scale, pooled_height, pooled_width, batch_size, channels, height, width);
40 | #else
41 |     AT_ERROR("Not compiled with GPU support");
42 | #endif
43 |   }
44 |   AT_ERROR("Not implemented on the CPU");
45 | }
46 | 
47 | 
48 | 
49 | 


--------------------------------------------------------------------------------
/lib/model/csrc/cpu/ROIAlign_cpu.cpp:
--------------------------------------------------------------------------------
  1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
  2 | #include "cpu/vision.h"
  3 | 
  4 | // implementation taken from Caffe2
  5 | template <typename T>
  6 | struct PreCalc {
  7 |   int pos1;
  8 |   int pos2;
  9 |   int pos3;
 10 |   int pos4;
 11 |   T w1;
 12 |   T w2;
 13 |   T w3;
 14 |   T w4;
 15 | };
 16 | 
 17 | template <typename T>
 18 | void pre_calc_for_bilinear_interpolate(
 19 |     const int height,
 20 |     const int width,
 21 |     const int pooled_height,
 22 |     const int pooled_width,
 23 |     const int iy_upper,
 24 |     const int ix_upper,
 25 |     T roi_start_h,
 26 |     T roi_start_w,
 27 |     T bin_size_h,
 28 |     T bin_size_w,
 29 |     int roi_bin_grid_h,
 30 |     int roi_bin_grid_w,
 31 |     std::vector<PreCalc<T>>& pre_calc) {
 32 |   int pre_calc_index = 0;
 33 |   for (int ph = 0; ph < pooled_height; ph++) {
 34 |     for (int pw = 0; pw < pooled_width; pw++) {
 35 |       for (int iy = 0; iy < iy_upper; iy++) {
 36 |         const T yy = roi_start_h + ph * bin_size_h +
 37 |             static_cast<T>(iy + .5f) * bin_size_h /
 38 |                 static_cast<T>(roi_bin_grid_h); // e.g., 0.5, 1.5
 39 |         for (int ix = 0; ix < ix_upper; ix++) {
 40 |           const T xx = roi_start_w + pw * bin_size_w +
 41 |               static_cast<T>(ix + .5f) * bin_size_w /
 42 |                   static_cast<T>(roi_bin_grid_w);
 43 | 
 44 |           T x = xx;
 45 |           T y = yy;
 46 |           // deal with: inverse elements are out of feature map boundary
 47 |           if (y < -1.0 || y > height || x < -1.0 || x > width) {
 48 |             // empty
 49 |             PreCalc<T> pc;
 50 |             pc.pos1 = 0;
 51 |             pc.pos2 = 0;
 52 |             pc.pos3 = 0;
 53 |             pc.pos4 = 0;
 54 |             pc.w1 = 0;
 55 |             pc.w2 = 0;
 56 |             pc.w3 = 0;
 57 |             pc.w4 = 0;
 58 |             pre_calc[pre_calc_index] = pc;
 59 |             pre_calc_index += 1;
 60 |             continue;
 61 |           }
 62 | 
 63 |           if (y <= 0) {
 64 |             y = 0;
 65 |           }
 66 |           if (x <= 0) {
 67 |             x = 0;
 68 |           }
 69 | 
 70 |           int y_low = (int)y;
 71 |           int x_low = (int)x;
 72 |           int y_high;
 73 |           int x_high;
 74 | 
 75 |           if (y_low >= height - 1) {
 76 |             y_high = y_low = height - 1;
 77 |             y = (T)y_low;
 78 |           } else {
 79 |             y_high = y_low + 1;
 80 |           }
 81 | 
 82 |           if (x_low >= width - 1) {
 83 |             x_high = x_low = width - 1;
 84 |             x = (T)x_low;
 85 |           } else {
 86 |             x_high = x_low + 1;
 87 |           }
 88 | 
 89 |           T ly = y - y_low;
 90 |           T lx = x - x_low;
 91 |           T hy = 1. - ly, hx = 1. - lx;
 92 |           T w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx;
 93 | 
 94 |           // save weights and indeces
 95 |           PreCalc<T> pc;
 96 |           pc.pos1 = y_low * width + x_low;
 97 |           pc.pos2 = y_low * width + x_high;
 98 |           pc.pos3 = y_high * width + x_low;
 99 |           pc.pos4 = y_high * width + x_high;
100 |           pc.w1 = w1;
101 |           pc.w2 = w2;
102 |           pc.w3 = w3;
103 |           pc.w4 = w4;
104 |           pre_calc[pre_calc_index] = pc;
105 | 
106 |           pre_calc_index += 1;
107 |         }
108 |       }
109 |     }
110 |   }
111 | }
112 | 
113 | template <typename T>
114 | void ROIAlignForward_cpu_kernel(
115 |     const int nthreads,
116 |     const T* bottom_data,
117 |     const T& spatial_scale,
118 |     const int channels,
119 |     const int height,
120 |     const int width,
121 |     const int pooled_height,
122 |     const int pooled_width,
123 |     const int sampling_ratio,
124 |     const T* bottom_rois,
125 |     //int roi_cols,
126 |     T* top_data) {
127 |   //AT_ASSERT(roi_cols == 4 || roi_cols == 5);
128 |   int roi_cols = 5;
129 | 
130 |   int n_rois = nthreads / channels / pooled_width / pooled_height;
131 |   // (n, c, ph, pw) is an element in the pooled output
132 |   // can be parallelized using omp
133 |   // #pragma omp parallel for num_threads(32)
134 |   for (int n = 0; n < n_rois; n++) {
135 |     int index_n = n * channels * pooled_width * pooled_height;
136 | 
137 |     // roi could have 4 or 5 columns
138 |     const T* offset_bottom_rois = bottom_rois + n * roi_cols;
139 |     int roi_batch_ind = 0;
140 |     if (roi_cols == 5) {
141 |       roi_batch_ind = offset_bottom_rois[0];
142 |       offset_bottom_rois++;
143 |     }
144 | 
145 |     // Do not using rounding; this implementation detail is critical
146 |     T roi_start_w = offset_bottom_rois[0] * spatial_scale;
147 |     T roi_start_h = offset_bottom_rois[1] * spatial_scale;
148 |     T roi_end_w = offset_bottom_rois[2] * spatial_scale;
149 |     T roi_end_h = offset_bottom_rois[3] * spatial_scale;
150 |     // T roi_start_w = round(offset_bottom_rois[0] * spatial_scale);
151 |     // T roi_start_h = round(offset_bottom_rois[1] * spatial_scale);
152 |     // T roi_end_w = round(offset_bottom_rois[2] * spatial_scale);
153 |     // T roi_end_h = round(offset_bottom_rois[3] * spatial_scale);
154 | 
155 |     // Force malformed ROIs to be 1x1
156 |     T roi_width = std::max(roi_end_w - roi_start_w, (T)1.);
157 |     T roi_height = std::max(roi_end_h - roi_start_h, (T)1.);
158 |     T bin_size_h = static_cast<T>(roi_height) / static_cast<T>(pooled_height);
159 |     T bin_size_w = static_cast<T>(roi_width) / static_cast<T>(pooled_width);
160 | 
161 |     // We use roi_bin_grid to sample the grid and mimic integral
162 |     int roi_bin_grid_h = (sampling_ratio > 0)
163 |         ? sampling_ratio
164 |         : ceil(roi_height / pooled_height); // e.g., = 2
165 |     int roi_bin_grid_w =
166 |         (sampling_ratio > 0) ? sampling_ratio : ceil(roi_width / pooled_width);
167 | 
168 |     // We do average (integral) pooling inside a bin
169 |     const T count = roi_bin_grid_h * roi_bin_grid_w; // e.g. = 4
170 | 
171 |     // we want to precalculate indeces and weights shared by all chanels,
172 |     // this is the key point of optimiation
173 |     std::vector<PreCalc<T>> pre_calc(
174 |         roi_bin_grid_h * roi_bin_grid_w * pooled_width * pooled_height);
175 |     pre_calc_for_bilinear_interpolate(
176 |         height,
177 |         width,
178 |         pooled_height,
179 |         pooled_width,
180 |         roi_bin_grid_h,
181 |         roi_bin_grid_w,
182 |         roi_start_h,
183 |         roi_start_w,
184 |         bin_size_h,
185 |         bin_size_w,
186 |         roi_bin_grid_h,
187 |         roi_bin_grid_w,
188 |         pre_calc);
189 | 
190 |       for (int c = 0; c < channels; c++) {
191 |       int index_n_c = index_n + c * pooled_width * pooled_height;
192 |       const T* offset_bottom_data =
193 |           bottom_data + (roi_batch_ind * channels + c) * height * width;
194 |       int pre_calc_index = 0;
195 | 
196 |       for (int ph = 0; ph < pooled_height; ph++) {
197 |         for (int pw = 0; pw < pooled_width; pw++) {
198 |           int index = index_n_c + ph * pooled_width + pw;
199 | 
200 |           T output_val = 0.;
201 |           for (int iy = 0; iy < roi_bin_grid_h; iy++) {
202 |             for (int ix = 0; ix < roi_bin_grid_w; ix++) {
203 |               PreCalc<T> pc = pre_calc[pre_calc_index];
204 |               output_val += pc.w1 * offset_bottom_data[pc.pos1] +
205 |                   pc.w2 * offset_bottom_data[pc.pos2] +
206 |                   pc.w3 * offset_bottom_data[pc.pos3] +
207 |                   pc.w4 * offset_bottom_data[pc.pos4];
208 | 
209 |               pre_calc_index += 1;
210 |             }
211 |           }
212 |           output_val /= count;
213 | 
214 |           top_data[index] = output_val;
215 |         } // for pw
216 |       } // for ph
217 |     } // for c
218 |   } // for n
219 | }
220 | 
221 | at::Tensor ROIAlign_forward_cpu(const at::Tensor& input,
222 |                                 const at::Tensor& rois,
223 |                                 const float spatial_scale,
224 |                                 const int pooled_height,
225 |                                 const int pooled_width,
226 |                                 const int sampling_ratio) {
227 |   AT_ASSERTM(!input.type().is_cuda(), "input must be a CPU tensor");
228 |   AT_ASSERTM(!rois.type().is_cuda(), "rois must be a CPU tensor");
229 | 
230 |   auto num_rois = rois.size(0);
231 |   auto channels = input.size(1);
232 |   auto height = input.size(2);
233 |   auto width = input.size(3);
234 | 
235 |   auto output = at::empty({num_rois, channels, pooled_height, pooled_width}, input.options());
236 |   auto output_size = num_rois * pooled_height * pooled_width * channels;
237 | 
238 |   if (output.numel() == 0) {
239 |     return output;
240 |   }
241 | 
242 |   AT_DISPATCH_FLOATING_TYPES(input.type(), "ROIAlign_forward", [&] {
243 |     ROIAlignForward_cpu_kernel<scalar_t>(
244 |          output_size,
245 |          input.data<scalar_t>(),
246 |          spatial_scale,
247 |          channels,
248 |          height,
249 |          width,
250 |          pooled_height,
251 |          pooled_width,
252 |          sampling_ratio,
253 |          rois.data<scalar_t>(),
254 |          output.data<scalar_t>());
255 |   });
256 |   return output;
257 | }
258 | 


--------------------------------------------------------------------------------
/lib/model/csrc/cpu/nms_cpu.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #include "cpu/vision.h"
 3 | 
 4 | 
 5 | template <typename scalar_t>
 6 | at::Tensor nms_cpu_kernel(const at::Tensor& dets,
 7 |                           const at::Tensor& scores,
 8 |                           const float threshold) {
 9 |   AT_ASSERTM(!dets.type().is_cuda(), "dets must be a CPU tensor");
10 |   AT_ASSERTM(!scores.type().is_cuda(), "scores must be a CPU tensor");
11 |   AT_ASSERTM(dets.type() == scores.type(), "dets should have the same type as scores");
12 | 
13 |   if (dets.numel() == 0) {
14 |     return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU));
15 |   }
16 | 
17 |   auto x1_t = dets.select(1, 0).contiguous();
18 |   auto y1_t = dets.select(1, 1).contiguous();
19 |   auto x2_t = dets.select(1, 2).contiguous();
20 |   auto y2_t = dets.select(1, 3).contiguous();
21 | 
22 |   at::Tensor areas_t = (x2_t - x1_t + 1) * (y2_t - y1_t + 1);
23 | 
24 |   auto order_t = std::get<1>(scores.sort(0, /* descending=*/true));
25 | 
26 |   auto ndets = dets.size(0);
27 |   at::Tensor suppressed_t = at::zeros({ndets}, dets.options().dtype(at::kByte).device(at::kCPU));
28 | 
29 |   auto suppressed = suppressed_t.data<uint8_t>();
30 |   auto order = order_t.data<int64_t>();
31 |   auto x1 = x1_t.data<scalar_t>();
32 |   auto y1 = y1_t.data<scalar_t>();
33 |   auto x2 = x2_t.data<scalar_t>();
34 |   auto y2 = y2_t.data<scalar_t>();
35 |   auto areas = areas_t.data<scalar_t>();
36 | 
37 |   for (int64_t _i = 0; _i < ndets; _i++) {
38 |     auto i = order[_i];
39 |     if (suppressed[i] == 1)
40 |       continue;
41 |     auto ix1 = x1[i];
42 |     auto iy1 = y1[i];
43 |     auto ix2 = x2[i];
44 |     auto iy2 = y2[i];
45 |     auto iarea = areas[i];
46 | 
47 |     for (int64_t _j = _i + 1; _j < ndets; _j++) {
48 |       auto j = order[_j];
49 |       if (suppressed[j] == 1)
50 |         continue;
51 |       auto xx1 = std::max(ix1, x1[j]);
52 |       auto yy1 = std::max(iy1, y1[j]);
53 |       auto xx2 = std::min(ix2, x2[j]);
54 |       auto yy2 = std::min(iy2, y2[j]);
55 | 
56 |       auto w = std::max(static_cast<scalar_t>(0), xx2 - xx1 + 1);
57 |       auto h = std::max(static_cast<scalar_t>(0), yy2 - yy1 + 1);
58 |       auto inter = w * h;
59 |       auto ovr = inter / (iarea + areas[j] - inter);
60 |       if (ovr >= threshold)
61 |         suppressed[j] = 1;
62 |    }
63 |   }
64 |   return at::nonzero(suppressed_t == 0).squeeze(1);
65 | }
66 | 
67 | at::Tensor nms_cpu(const at::Tensor& dets,
68 |                const at::Tensor& scores,
69 |                const float threshold) {
70 |   at::Tensor result;
71 |   AT_DISPATCH_FLOATING_TYPES(dets.type(), "nms", [&] {
72 |     result = nms_cpu_kernel<scalar_t>(dets, scores, threshold);
73 |   });
74 |   return result;
75 | }
76 | 


--------------------------------------------------------------------------------
/lib/model/csrc/cpu/vision.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #pragma once
 3 | #include <torch/extension.h>
 4 | 
 5 | 
 6 | at::Tensor ROIAlign_forward_cpu(const at::Tensor& input,
 7 |                                 const at::Tensor& rois,
 8 |                                 const float spatial_scale,
 9 |                                 const int pooled_height,
10 |                                 const int pooled_width,
11 |                                 const int sampling_ratio);
12 | 
13 | 
14 | at::Tensor nms_cpu(const at::Tensor& dets,
15 |                    const at::Tensor& scores,
16 |                    const float threshold);
17 | 


--------------------------------------------------------------------------------
/lib/model/csrc/cuda/ROIPool_cuda.cu:
--------------------------------------------------------------------------------
  1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
  2 | #include <ATen/ATen.h>
  3 | #include <ATen/cuda/CUDAContext.h>
  4 | 
  5 | #include <THC/THC.h>
  6 | #include <THC/THCAtomics.cuh>
  7 | #include <THC/THCDeviceUtils.cuh>
  8 | 
  9 | 
 10 | // TODO make it in a common file
 11 | #define CUDA_1D_KERNEL_LOOP(i, n)                            \
 12 |   for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; \
 13 |        i += blockDim.x * gridDim.x)
 14 | 
 15 | 
 16 | template <typename T>
 17 | __global__ void RoIPoolFForward(const int nthreads, const T* bottom_data,
 18 |     const T spatial_scale, const int channels, const int height,
 19 |     const int width, const int pooled_height, const int pooled_width,
 20 |     const T* bottom_rois, T* top_data, int* argmax_data) {
 21 |   CUDA_1D_KERNEL_LOOP(index, nthreads) {
 22 |     // (n, c, ph, pw) is an element in the pooled output
 23 |     int pw = index % pooled_width;
 24 |     int ph = (index / pooled_width) % pooled_height;
 25 |     int c = (index / pooled_width / pooled_height) % channels;
 26 |     int n = index / pooled_width / pooled_height / channels;
 27 | 
 28 |     const T* offset_bottom_rois = bottom_rois + n * 5;
 29 |     int roi_batch_ind = offset_bottom_rois[0];
 30 |     int roi_start_w = round(offset_bottom_rois[1] * spatial_scale);
 31 |     int roi_start_h = round(offset_bottom_rois[2] * spatial_scale);
 32 |     int roi_end_w = round(offset_bottom_rois[3] * spatial_scale);
 33 |     int roi_end_h = round(offset_bottom_rois[4] * spatial_scale);
 34 | 
 35 |     // Force malformed ROIs to be 1x1
 36 |     int roi_width = max(roi_end_w - roi_start_w + 1, 1);
 37 |     int roi_height = max(roi_end_h - roi_start_h + 1, 1);
 38 |     T bin_size_h = static_cast<T>(roi_height)
 39 |                        / static_cast<T>(pooled_height);
 40 |     T bin_size_w = static_cast<T>(roi_width)
 41 |                        / static_cast<T>(pooled_width);
 42 | 
 43 |     int hstart = static_cast<int>(floor(static_cast<T>(ph)
 44 |                                         * bin_size_h));
 45 |     int wstart = static_cast<int>(floor(static_cast<T>(pw)
 46 |                                         * bin_size_w));
 47 |     int hend = static_cast<int>(ceil(static_cast<T>(ph + 1)
 48 |                                      * bin_size_h));
 49 |     int wend = static_cast<int>(ceil(static_cast<T>(pw + 1)
 50 |                                      * bin_size_w));
 51 | 
 52 |     // Add roi offsets and clip to input boundaries
 53 |     hstart = min(max(hstart + roi_start_h, 0), height);
 54 |     hend = min(max(hend + roi_start_h, 0), height);
 55 |     wstart = min(max(wstart + roi_start_w, 0), width);
 56 |     wend = min(max(wend + roi_start_w, 0), width);
 57 |     bool is_empty = (hend <= hstart) || (wend <= wstart);
 58 | 
 59 |     // Define an empty pooling region to be zero
 60 |     T maxval = is_empty ? 0 : -FLT_MAX;
 61 |     // If nothing is pooled, argmax = -1 causes nothing to be backprop'd
 62 |     int maxidx = -1;
 63 |     const T* offset_bottom_data =
 64 |         bottom_data + (roi_batch_ind * channels + c) * height * width;
 65 |     for (int h = hstart; h < hend; ++h) {
 66 |       for (int w = wstart; w < wend; ++w) {
 67 |         int bottom_index = h * width + w;
 68 |         if (offset_bottom_data[bottom_index] > maxval) {
 69 |           maxval = offset_bottom_data[bottom_index];
 70 |           maxidx = bottom_index;
 71 |         }
 72 |       }
 73 |     }
 74 |     top_data[index] = maxval;
 75 |     argmax_data[index] = maxidx;
 76 |   }
 77 | }
 78 | 
 79 | template <typename T>
 80 | __global__ void RoIPoolFBackward(const int nthreads, const T* top_diff,
 81 |     const int* argmax_data, const int num_rois, const T spatial_scale,
 82 |     const int channels, const int height, const int width,
 83 |     const int pooled_height, const int pooled_width, T* bottom_diff,
 84 |     const T* bottom_rois) {
 85 |   CUDA_1D_KERNEL_LOOP(index, nthreads) {
 86 |     // (n, c, ph, pw) is an element in the pooled output
 87 |     int pw = index % pooled_width;
 88 |     int ph = (index / pooled_width) % pooled_height;
 89 |     int c = (index / pooled_width / pooled_height) % channels;
 90 |     int n = index / pooled_width / pooled_height / channels;
 91 | 
 92 |     const T* offset_bottom_rois = bottom_rois + n * 5;
 93 |     int roi_batch_ind = offset_bottom_rois[0];
 94 |     int bottom_offset = (roi_batch_ind * channels + c) * height * width;
 95 |     int top_offset    = (n * channels + c) * pooled_height * pooled_width;
 96 |     const T* offset_top_diff = top_diff + top_offset;
 97 |     T* offset_bottom_diff = bottom_diff + bottom_offset;
 98 |     const int* offset_argmax_data = argmax_data + top_offset;
 99 | 
100 |     int argmax = offset_argmax_data[ph * pooled_width + pw];
101 |     if (argmax != -1) {
102 |       atomicAdd(
103 |           offset_bottom_diff + argmax,
104 |           static_cast<T>(offset_top_diff[ph * pooled_width + pw]));
105 | 
106 |     }
107 |   }
108 | }
109 | 
110 | std::tuple<at::Tensor, at::Tensor> ROIPool_forward_cuda(const at::Tensor& input,
111 |                                 const at::Tensor& rois,
112 |                                 const float spatial_scale,
113 |                                 const int pooled_height,
114 |                                 const int pooled_width) {
115 |   AT_ASSERTM(input.type().is_cuda(), "input must be a CUDA tensor");
116 |   AT_ASSERTM(rois.type().is_cuda(), "rois must be a CUDA tensor");
117 | 
118 |   auto num_rois = rois.size(0);
119 |   auto channels = input.size(1);
120 |   auto height = input.size(2);
121 |   auto width = input.size(3);
122 | 
123 |   auto output = at::empty({num_rois, channels, pooled_height, pooled_width}, input.options());
124 |   auto output_size = num_rois * pooled_height * pooled_width * channels;
125 |   auto argmax = at::zeros({num_rois, channels, pooled_height, pooled_width}, input.options().dtype(at::kInt));
126 | 
127 |   cudaStream_t stream = at::cuda::getCurrentCUDAStream();
128 | 
129 |   dim3 grid(std::min(THCCeilDiv(output_size, 512L), 4096L));
130 |   dim3 block(512);
131 | 
132 |   if (output.numel() == 0) {
133 |     THCudaCheck(cudaGetLastError());
134 |     return std::make_tuple(output, argmax);
135 |   }
136 | 
137 |   AT_DISPATCH_FLOATING_TYPES(input.type(), "ROIPool_forward", [&] {
138 |     RoIPoolFForward<scalar_t><<<grid, block, 0, stream>>>(
139 |          output_size,
140 |          input.contiguous().data<scalar_t>(),
141 |          spatial_scale,
142 |          channels,
143 |          height,
144 |          width,
145 |          pooled_height,
146 |          pooled_width,
147 |          rois.contiguous().data<scalar_t>(),
148 |          output.data<scalar_t>(),
149 |          argmax.data<int>());
150 |   });
151 |   THCudaCheck(cudaGetLastError());
152 |   return std::make_tuple(output, argmax);
153 | }
154 | 
155 | // TODO remove the dependency on input and use instead its sizes -> save memory
156 | at::Tensor ROIPool_backward_cuda(const at::Tensor& grad,
157 |                                  const at::Tensor& input,
158 |                                  const at::Tensor& rois,
159 |                                  const at::Tensor& argmax,
160 |                                  const float spatial_scale,
161 |                                  const int pooled_height,
162 |                                  const int pooled_width,
163 |                                  const int batch_size,
164 |                                  const int channels,
165 |                                  const int height,
166 |                                  const int width) {
167 |   AT_ASSERTM(grad.type().is_cuda(), "grad must be a CUDA tensor");
168 |   AT_ASSERTM(rois.type().is_cuda(), "rois must be a CUDA tensor");
169 |   // TODO add more checks
170 | 
171 |   auto num_rois = rois.size(0);
172 |   auto grad_input = at::zeros({batch_size, channels, height, width}, grad.options());
173 | 
174 |   cudaStream_t stream = at::cuda::getCurrentCUDAStream();
175 | 
176 |   dim3 grid(std::min(THCCeilDiv(grad.numel(), 512L), 4096L));
177 |   dim3 block(512);
178 | 
179 |   // handle possibly empty gradients
180 |   if (grad.numel() == 0) {
181 |     THCudaCheck(cudaGetLastError());
182 |     return grad_input;
183 |   }
184 | 
185 |   AT_DISPATCH_FLOATING_TYPES(grad.type(), "ROIPool_backward", [&] {
186 |     RoIPoolFBackward<scalar_t><<<grid, block, 0, stream>>>(
187 |          grad.numel(),
188 |          grad.contiguous().data<scalar_t>(),
189 |          argmax.data<int>(),
190 |          num_rois,
191 |          spatial_scale,
192 |          channels,
193 |          height,
194 |          width,
195 |          pooled_height,
196 |          pooled_width,
197 |          grad_input.data<scalar_t>(),
198 |          rois.contiguous().data<scalar_t>());
199 |   });
200 |   THCudaCheck(cudaGetLastError());
201 |   return grad_input;
202 | }
203 | 


--------------------------------------------------------------------------------
/lib/model/csrc/cuda/nms.cu:
--------------------------------------------------------------------------------
  1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
  2 | #include <ATen/ATen.h>
  3 | #include <ATen/cuda/CUDAContext.h>
  4 | 
  5 | #include <THC/THC.h>
  6 | #include <THC/THCDeviceUtils.cuh>
  7 | 
  8 | #include <vector>
  9 | #include <iostream>
 10 | 
 11 | int const threadsPerBlock = sizeof(unsigned long long) * 8;
 12 | 
 13 | __device__ inline float devIoU(float const * const a, float const * const b) {
 14 |   float left = max(a[0], b[0]), right = min(a[2], b[2]);
 15 |   float top = max(a[1], b[1]), bottom = min(a[3], b[3]);
 16 |   float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f);
 17 |   float interS = width * height;
 18 |   float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1);
 19 |   float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1);
 20 |   return interS / (Sa + Sb - interS);
 21 | }
 22 | 
 23 | __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh,
 24 |                            const float *dev_boxes, unsigned long long *dev_mask) {
 25 |   const int row_start = blockIdx.y;
 26 |   const int col_start = blockIdx.x;
 27 | 
 28 |   // if (row_start > col_start) return;
 29 | 
 30 |   const int row_size =
 31 |         min(n_boxes - row_start * threadsPerBlock, threadsPerBlock);
 32 |   const int col_size =
 33 |         min(n_boxes - col_start * threadsPerBlock, threadsPerBlock);
 34 | 
 35 |   __shared__ float block_boxes[threadsPerBlock * 5];
 36 |   if (threadIdx.x < col_size) {
 37 |     block_boxes[threadIdx.x * 5 + 0] =
 38 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0];
 39 |     block_boxes[threadIdx.x * 5 + 1] =
 40 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1];
 41 |     block_boxes[threadIdx.x * 5 + 2] =
 42 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2];
 43 |     block_boxes[threadIdx.x * 5 + 3] =
 44 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3];
 45 |     block_boxes[threadIdx.x * 5 + 4] =
 46 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4];
 47 |   }
 48 |   __syncthreads();
 49 | 
 50 |   if (threadIdx.x < row_size) {
 51 |     const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x;
 52 |     const float *cur_box = dev_boxes + cur_box_idx * 5;
 53 |     int i = 0;
 54 |     unsigned long long t = 0;
 55 |     int start = 0;
 56 |     if (row_start == col_start) {
 57 |       start = threadIdx.x + 1;
 58 |     }
 59 |     for (i = start; i < col_size; i++) {
 60 |       if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) {
 61 |         t |= 1ULL << i;
 62 |       }
 63 |     }
 64 |     const int col_blocks = THCCeilDiv(n_boxes, threadsPerBlock);
 65 |     dev_mask[cur_box_idx * col_blocks + col_start] = t;
 66 |   }
 67 | }
 68 | 
 69 | // boxes is a N x 5 tensor
 70 | at::Tensor nms_cuda(const at::Tensor boxes, float nms_overlap_thresh) {
 71 |   using scalar_t = float;
 72 |   AT_ASSERTM(boxes.type().is_cuda(), "boxes must be a CUDA tensor");
 73 |   auto scores = boxes.select(1, 4);
 74 |   auto order_t = std::get<1>(scores.sort(0, /* descending=*/true));
 75 |   auto boxes_sorted = boxes.index_select(0, order_t);
 76 | 
 77 |   int boxes_num = boxes.size(0);
 78 | 
 79 |   const int col_blocks = THCCeilDiv(boxes_num, threadsPerBlock);
 80 | 
 81 |   scalar_t* boxes_dev = boxes_sorted.data<scalar_t>();
 82 | 
 83 |   THCState *state = at::globalContext().lazyInitCUDA(); // TODO replace with getTHCState
 84 | 
 85 |   unsigned long long* mask_dev = NULL;
 86 |   //THCudaCheck(THCudaMalloc(state, (void**) &mask_dev,
 87 |   //                      boxes_num * col_blocks * sizeof(unsigned long long)));
 88 | 
 89 |   mask_dev = (unsigned long long*) THCudaMalloc(state, boxes_num * col_blocks * sizeof(unsigned long long));
 90 | 
 91 |   dim3 blocks(THCCeilDiv(boxes_num, threadsPerBlock),
 92 |               THCCeilDiv(boxes_num, threadsPerBlock));
 93 |   dim3 threads(threadsPerBlock);
 94 |   nms_kernel<<<blocks, threads>>>(boxes_num,
 95 |                                   nms_overlap_thresh,
 96 |                                   boxes_dev,
 97 |                                   mask_dev);
 98 | 
 99 |   std::vector<unsigned long long> mask_host(boxes_num * col_blocks);
100 |   THCudaCheck(cudaMemcpy(&mask_host[0],
101 |                         mask_dev,
102 |                         sizeof(unsigned long long) * boxes_num * col_blocks,
103 |                         cudaMemcpyDeviceToHost));
104 | 
105 |   std::vector<unsigned long long> remv(col_blocks);
106 |   memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks);
107 | 
108 |   at::Tensor keep = at::empty({boxes_num}, boxes.options().dtype(at::kLong).device(at::kCPU));
109 |   int64_t* keep_out = keep.data<int64_t>();
110 | 
111 |   int num_to_keep = 0;
112 |   for (int i = 0; i < boxes_num; i++) {
113 |     int nblock = i / threadsPerBlock;
114 |     int inblock = i % threadsPerBlock;
115 | 
116 |     if (!(remv[nblock] & (1ULL << inblock))) {
117 |       keep_out[num_to_keep++] = i;
118 |       unsigned long long *p = &mask_host[0] + i * col_blocks;
119 |       for (int j = nblock; j < col_blocks; j++) {
120 |         remv[j] |= p[j];
121 |       }
122 |     }
123 |   }
124 | 
125 |   THCudaFree(state, mask_dev);
126 |   // TODO improve this part
127 |   return std::get<0>(order_t.index({
128 |                        keep.narrow(/*dim=*/0, /*start=*/0, /*length=*/num_to_keep).to(
129 |                          order_t.device(), keep.scalar_type())
130 |                      }).sort(0, false));
131 | }
132 | 


--------------------------------------------------------------------------------
/lib/model/csrc/cuda/vision.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #pragma once
 3 | #include <torch/extension.h>
 4 | 
 5 | 
 6 | at::Tensor ROIAlign_forward_cuda(const at::Tensor& input,
 7 |                                  const at::Tensor& rois,
 8 |                                  const float spatial_scale,
 9 |                                  const int pooled_height,
10 |                                  const int pooled_width,
11 |                                  const int sampling_ratio);
12 | 
13 | at::Tensor ROIAlign_backward_cuda(const at::Tensor& grad,
14 |                                   const at::Tensor& rois,
15 |                                   const float spatial_scale,
16 |                                   const int pooled_height,
17 |                                   const int pooled_width,
18 |                                   const int batch_size,
19 |                                   const int channels,
20 |                                   const int height,
21 |                                   const int width,
22 |                                   const int sampling_ratio);
23 | 
24 | 
25 | std::tuple<at::Tensor, at::Tensor> ROIPool_forward_cuda(const at::Tensor& input,
26 |                                 const at::Tensor& rois,
27 |                                 const float spatial_scale,
28 |                                 const int pooled_height,
29 |                                 const int pooled_width);
30 | 
31 | at::Tensor ROIPool_backward_cuda(const at::Tensor& grad,
32 |                                  const at::Tensor& input,
33 |                                  const at::Tensor& rois,
34 |                                  const at::Tensor& argmax,
35 |                                  const float spatial_scale,
36 |                                  const int pooled_height,
37 |                                  const int pooled_width,
38 |                                  const int batch_size,
39 |                                  const int channels,
40 |                                  const int height,
41 |                                  const int width);
42 | 
43 | at::Tensor nms_cuda(const at::Tensor boxes, float nms_overlap_thresh);
44 | 
45 | 
46 | at::Tensor compute_flow_cuda(const at::Tensor& boxes,
47 |                              const int height,
48 |                              const int width);
49 | 


--------------------------------------------------------------------------------
/lib/model/csrc/nms.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #pragma once
 3 | #include "cpu/vision.h"
 4 | 
 5 | #ifdef WITH_CUDA
 6 | #include "cuda/vision.h"
 7 | #endif
 8 | 
 9 | 
10 | at::Tensor nms(const at::Tensor& dets,
11 |                const at::Tensor& scores,
12 |                const float threshold) {
13 | 
14 |   if (dets.type().is_cuda()) {
15 | #ifdef WITH_CUDA
16 |     // TODO raise error if not compiled with CUDA
17 |     if (dets.numel() == 0)
18 |       return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU));
19 |     auto b = at::cat({dets, scores.unsqueeze(1)}, 1);
20 |     return nms_cuda(b, threshold);
21 | #else
22 |     AT_ERROR("Not compiled with GPU support");
23 | #endif
24 |   }
25 | 
26 |   at::Tensor result = nms_cpu(dets, scores, threshold);
27 |   return result;
28 | }
29 | 


--------------------------------------------------------------------------------
/lib/model/csrc/vision.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #include "nms.h"
 3 | #include "ROIAlign.h"
 4 | #include "ROIPool.h"
 5 | 
 6 | 
 7 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
 8 |   m.def("nms", &nms, "non-maximum suppression");
 9 |   m.def("roi_align_forward", &ROIAlign_forward, "ROIAlign_forward");
10 |   m.def("roi_align_backward", &ROIAlign_backward, "ROIAlign_backward");
11 |   m.def("roi_pool_forward", &ROIPool_forward, "ROIPool_forward");
12 |   m.def("roi_pool_backward", &ROIPool_backward, "ROIPool_backward");
13 | }
14 | 


--------------------------------------------------------------------------------
/lib/model/faster_rcnn/Snet.py:
--------------------------------------------------------------------------------
  1 | from .modules import *
  2 | 
  3 | from model.faster_rcnn.faster_rcnn import _fasterRCNN
  4 | from model.utils.config import cfg
  5 | 
  6 | class SnetExtractor(nn.Module):
  7 |     cfg = {
  8 |         49: [24, 60, 120, 240, 512],
  9 |         146: [24, 132, 264, 528],
 10 |         535: [48, 248, 496, 992],
 11 |     }
 12 | 
 13 |     def __init__(self,  version = 146 ,model_path=None ,  **kwargs):
 14 | 
 15 |         super(SnetExtractor,self).__init__()
 16 |         num_layers = [4, 8, 4]
 17 |         self.model_path = model_path
 18 | 
 19 |         self.num_layers = num_layers
 20 |         channels = self.cfg[version]
 21 |         self.channels = channels
 22 | 
 23 | 
 24 | 
 25 |         self.conv1 = conv_bn(
 26 |             3, channels[0], kernel_size=3, stride=2,pad = 1
 27 |         )
 28 |         self.maxpool = nn.MaxPool2d(
 29 |             kernel_size=3, stride=2, padding=1,
 30 |         )
 31 | 
 32 | 
 33 |         self.stage1 = self._make_layer(
 34 |             num_layers[0], channels[0], channels[1], **kwargs)
 35 |         self.stage2 = self._make_layer(
 36 |             num_layers[1], channels[1], channels[2], **kwargs)
 37 |         self.stage3 = self._make_layer(
 38 |             num_layers[2], channels[2], channels[3], **kwargs)
 39 |         if len(self.channels) == 5:
 40 |             self.conv5 = conv_bn(
 41 |                 channels[3], channels[4], kernel_size=1, stride=1 ,pad=0 )
 42 | 
 43 | 
 44 | 
 45 |         if len(channels) == 5:
 46 |             self.cem = CEM(channels[-3], channels[-1], channels[-1] ,cfg.FEAT_STRIDE)
 47 |         else:
 48 |             self.cem = CEM(channels[-2], channels[-1], channels[-1],cfg.FEAT_STRIDE)
 49 |         self.avgpool = nn.AdaptiveAvgPool2d(1)
 50 |         self._initialize_weights()
 51 | 
 52 |     def _make_layer(self, num_layers, in_channels, out_channels, **kwargs):
 53 |         layers = []
 54 |         for i in range(num_layers):
 55 |             if i == 0:
 56 |                 layers.append(ShuffleV2Block(in_channels, out_channels, mid_channels=out_channels // 2, ksize=5, stride=2))
 57 |             else:
 58 |                 layers.append(ShuffleV2Block(in_channels // 2, out_channels,
 59 |                                                     mid_channels=out_channels // 2, ksize=5, stride=1))
 60 |             in_channels = out_channels
 61 |         return nn.Sequential(*layers)
 62 | 
 63 | 
 64 | 
 65 | 
 66 |     def _initialize_weights(self):
 67 | 
 68 |         def set_bn_fix(m):
 69 |             classname = m.__class__.__name__
 70 |             if classname.find('BatchNorm') != -1:
 71 |                 for p in m.parameters(): p.requires_grad = False
 72 | 
 73 |         if  self.model_path is not None:
 74 | 
 75 |             print("Loading pretrained weights from %s" % (self.model_path))
 76 |             if torch.cuda.is_available():
 77 |                 state_dict = torch.load(self.model_path)["state_dict"]
 78 |             else:
 79 |                 state_dict = torch.load(
 80 |                     self.model_path, map_location=lambda storage, loc: storage)["state_dict"]
 81 |             keys = []
 82 |             for k, v in state_dict.items():
 83 |                 keys.append(k)
 84 |             for k in keys:
 85 |                 state_dict[k.replace("module.", "")] = state_dict.pop(k)
 86 | 
 87 |             self.load_state_dict(state_dict,strict = False)
 88 | 
 89 |             for para in self.conv1.parameters():
 90 |                 para.requires_grad = False
 91 |             print('extractor conv1 freezed')
 92 |             for para in self.stage1.parameters():
 93 |                 para.requires_grad = False
 94 |             print('extractor stage1 freezed')
 95 |             # for para in self.stage2.parameters():
 96 |             #     para.requires_grad = False
 97 |             # print('extractor stage2 freezed')
 98 |             # for para in self.stage3.parameters():
 99 |             #     para.requires_grad = False
100 |             # print('extractor stage3 freezed')
101 |             set_bn_fix(self.conv1)
102 |             set_bn_fix(self.stage1)
103 |             set_bn_fix(self.stage2)
104 |             set_bn_fix(self.stage3)
105 | 
106 |         else:
107 |             for name, m in self.named_modules():
108 |                 if isinstance(m, nn.Conv2d):
109 |                     if 'first' in name:
110 |                         nn.init.normal_(m.weight, 0, 0.01)
111 |                     else:
112 |                         nn.init.normal_(m.weight, 0, 1.0 / m.weight.shape[1])
113 |                     if m.bias is not None:
114 |                         nn.init.constant_(m.bias, 0)
115 |                 elif isinstance(m, nn.BatchNorm2d):
116 |                     nn.init.constant_(m.weight, 1)
117 |                     if m.bias is not None:
118 |                         nn.init.constant_(m.bias, 0.0001)
119 |                     nn.init.constant_(m.running_mean, 0)
120 |                 elif isinstance(m, nn.BatchNorm1d):
121 |                     nn.init.constant_(m.weight, 1)
122 |                     if m.bias is not None:
123 |                         nn.init.constant_(m.bias, 0.0001)
124 |                     nn.init.constant_(m.running_mean, 0)
125 |                 elif isinstance(m, nn.Linear):
126 |                     nn.init.normal_(m.weight, 0, 0.01)
127 |                     if m.bias is not None:
128 |                         nn.init.constant_(m.bias, 0)
129 | 
130 |     def forward(self, x):
131 | 
132 |         x = self.conv1(x)
133 |         x = self.maxpool(x)
134 |         c3 = self.stage1(x)
135 |         c4 = self.stage2(c3)
136 |         c5 = self.stage3(c4)
137 |         if len(self.channels) == 5:
138 |             c5 = self.conv5(c5)
139 | 
140 |         Cglb_lat = self.avgpool(c5)
141 | 
142 |         if cfg.FEAT_STRIDE == 16:
143 |             cem_out = self.cem([c4, c5, Cglb_lat])
144 |         elif cfg.FEAT_STRIDE == 8:
145 |             cem_out = self.cem([c3,c4, c5, Cglb_lat])
146 | 
147 |         return cem_out
148 | 
149 | class snet(_fasterRCNN):
150 |     def __init__(self,
151 |                  classes,
152 |                  layer ,
153 |                  pretrained_path=None,
154 |                  class_agnostic=False,
155 |                 ):
156 |         self.pretrained_path = pretrained_path
157 | 
158 |         self.class_agnostic = class_agnostic
159 | 
160 |         self.dout_base_model = 256
161 |         self.layer = layer
162 | 
163 |         self.dout_lh_base_model = 245
164 | 
165 |         _fasterRCNN.__init__(self,
166 |                              classes,
167 |                              class_agnostic,
168 |                              compact_mode=True)
169 | 
170 |     def _init_modules(self):
171 |         snet = SnetExtractor(self.layer, self.pretrained_path)
172 | 
173 | 
174 | 
175 | 
176 |         # Build snet.
177 |         self.RCNN_base = snet
178 | 
179 |         # Fix Layers
180 |         # if self.pretrained:
181 |         #     for layer  in self.RCNN_base:
182 |         #         print(layer)
183 |         #         for p in self.RCNN_base[layer].parameters():
184 |         #             p.requires_grad = False
185 | 
186 | 
187 |         self.RCNN_top = nn.Sequential(nn.Linear(5 * 7 * 7, 1024),
188 |                                           nn.ReLU(inplace=True),
189 | 
190 |                                          )
191 | 
192 | 
193 |         c_in = 1024
194 | 
195 |         self.RCNN_cls_score = nn.Linear(c_in, self.n_classes)
196 |         if self.class_agnostic:
197 |             self.RCNN_bbox_pred = nn.Linear(c_in, 4)
198 |         else:
199 |             self.RCNN_bbox_pred = nn.Linear(c_in, 4 * self.n_classes)
200 | 
201 |     def train(self, mode=True):
202 |         # Override train so that the training mode is set as we want
203 |         nn.Module.train(self, mode)
204 |         if mode:
205 |             # Set fixed blocks to be in eval mode
206 |             self.RCNN_base.conv1.eval()
207 |             self.RCNN_base.stage1.eval()
208 |             self.RCNN_base.stage2.train()
209 |             self.RCNN_base.stage3.train()
210 | 
211 | 
212 |             def set_bn_eval(m):
213 |                 classname = m.__class__.__name__
214 |                 if classname.find('BatchNorm') != -1:
215 |                     m.eval()
216 | 
217 |             set_bn_eval(self.RCNN_base.conv1)
218 |             set_bn_eval(self.RCNN_base.stage1)
219 |             set_bn_eval(self.RCNN_base.stage2)
220 |             set_bn_eval(self.RCNN_base.stage3)
221 | 
222 | 
223 |     def _head_to_tail(self, pool5):
224 |         pool5_flat = pool5.view(pool5.size(0), -1)
225 |         fc7 = self.RCNN_top(pool5_flat)  # or two large fully-connected layers
226 | 
227 |         return fc7
228 | 
229 | 


--------------------------------------------------------------------------------
/lib/model/faster_rcnn/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DayBreak-u/Thundernet_Pytorch/ac359d128a44e566ba5852a830c0a2154e10edb2/lib/model/faster_rcnn/__init__.py


--------------------------------------------------------------------------------
/lib/model/faster_rcnn/faster_rcnn.py:
--------------------------------------------------------------------------------
  1 | import time
  2 | import random
  3 | import numpy as np
  4 | import torch
  5 | import torch.nn as nn
  6 | import torch.nn.functional as F
  7 | from torch.autograd import Variable
  8 | import torchvision.models as models
  9 | from torchvision.ops import RoIAlign, RoIPool
 10 | 
 11 | from psroialign.psroialign import PSROIAlignhandle,PSROIPoolhandle
 12 | 
 13 | # from psroi_pooling.modules.psroi_pool import PSRoIPool
 14 | 
 15 | from .modules import  RPN,SAM
 16 | from model.utils.config import cfg
 17 | # from model.rpn.rpn import _RPN
 18 | from model.rpn.rpn import _RPN
 19 | # from model.rpn.centernet_rpn import _RPN
 20 | from model.rpn.proposal_target_layer_cascade import _ProposalTargetLayer
 21 | from model.loss.losses import _smooth_l1_loss
 22 | 
 23 | from model.loss.losses import  OHEM_loss,hard_negative_mining
 24 | 
 25 | 
 26 | 
 27 | 
 28 | 
 29 | class _fasterRCNN(nn.Module):
 30 |     """ faster RCNN """
 31 |     def __init__(self,
 32 |                  classes,
 33 |                  class_agnostic,
 34 |                  compact_mode=False):
 35 |         super(_fasterRCNN, self).__init__()
 36 |         self.classes = classes
 37 |         self.n_classes = len(classes)
 38 |         self.class_agnostic = class_agnostic
 39 | 
 40 | 
 41 |         # loss
 42 |         self.RCNN_loss_cls = 0
 43 |         self.RCNN_loss_bbox = 0
 44 |         # self.focalloss_handle = FocalLossV4(num_class=21, alpha=0.25, gamma=2.0, balance_index=2)
 45 |         # define Large Separable Convolution Layer
 46 | 
 47 |         self.rpn = RPN(in_channels=245, f_channels=256)
 48 | 
 49 | 
 50 |         self.sam = SAM(256,245)
 51 |         # define rpn
 52 |         self.RCNN_rpn = _RPN(self.dout_base_model)
 53 |         self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes)
 54 | 
 55 |         self.rpn_time = None
 56 |         self.pre_roi_time = None
 57 |         self.roi_pooling_time = None
 58 |         self.subnet_time = None
 59 |         self.psroiAlign =  PSROIAlignhandle(1./cfg.FEAT_STRIDE, 7,2, 5)
 60 |         self.psroiPool =  PSROIPoolhandle(7,7,1./cfg.FEAT_STRIDE,7,5)
 61 | 
 62 | 
 63 | 
 64 |     def _roi_pool_layer(self, bottom, rois):
 65 |         return self.psroiPool.forward(bottom, rois)
 66 | 
 67 |     def _roi_align_layer(self, bottom, rois):
 68 |         return self.psroiAlign.forward(bottom, rois)
 69 | 
 70 |     def forward(self, im_data, im_info, gt_boxes, num_boxes,
 71 |                 # hm,reg_mask,wh,offset,ind
 72 |                 ):
 73 |         batch_size = im_data.size(0)
 74 | 
 75 | 
 76 |         im_info = im_info.data
 77 |         gt_boxes = gt_boxes.data
 78 |         num_boxes = num_boxes.data
 79 |         # hm = hm.data
 80 |         # reg_mask = reg_mask.data
 81 |         # wh = wh.data
 82 |         # offset = offset.data
 83 |         # ind = ind.data
 84 | 
 85 |         # feed image data to base model to obtain base feature map
 86 |         start = time.time()
 87 |         basefeat = self.RCNN_base(im_data)
 88 | 
 89 |         # feed base feature map tp RPN to obtain rois
 90 |         rpn_feat= self.rpn(basefeat)
 91 | 
 92 | 
 93 |         # rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn(rpn_feat, im_info, gt_boxes, num_boxes,hm,reg_mask,wh,offset,ind)
 94 |         rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn(rpn_feat, im_info, gt_boxes, num_boxes)
 95 | 
 96 |         rpn_time = time.time()
 97 |         self.rpn_time = rpn_time - start
 98 |         # if it is training phrase, then use ground trubut bboxes for refining
 99 |         if self.training:
100 |             roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes)
101 |             rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data
102 | 
103 |             rois_label = Variable(rois_label.view(-1).long())
104 |             rois_target = Variable(rois_target.view(-1, rois_target.size(2)))
105 |             rois_inside_ws = Variable(rois_inside_ws.view(-1, rois_inside_ws.size(2)))
106 |             rois_outside_ws = Variable(rois_outside_ws.view(-1, rois_outside_ws.size(2)))
107 |         else:
108 |             rois_label = None
109 |             rois_target = None
110 |             rois_inside_ws = None
111 |             rois_outside_ws = None
112 |             rpn_loss_cls = 0
113 |             rpn_loss_bbox = 0
114 | 
115 |         rois = Variable(rois)
116 | 
117 |         pre_roi_time = time.time()
118 |         self.pre_roi_time = pre_roi_time - rpn_time
119 | 
120 |         base_feat = self.sam([basefeat,rpn_feat])
121 | 
122 | 
123 | 
124 |         # do roi pooling based on predicted rois
125 |         if cfg.POOLING_MODE == 'align':
126 |             pooled_feat = self._roi_align_layer(base_feat, rois.view(-1, 5))
127 |         elif cfg.POOLING_MODE == 'pool':
128 |             pooled_feat = self._roi_pool_layer(base_feat, rois.view(-1, 5))
129 | 
130 |         roi_pool_time = time.time()
131 |         self.roi_pooling_time = roi_pool_time - pre_roi_time
132 | 
133 |         # feed pooled features to top model
134 |         pooled_feat = self._head_to_tail(pooled_feat)
135 | 
136 |         # compute bbox offset
137 |         bbox_pred = self.RCNN_bbox_pred(pooled_feat)
138 |         if self.training and not self.class_agnostic:
139 |             # select the corresponding columns according to roi labels
140 |             bbox_pred_view = bbox_pred.view(bbox_pred.size(0),
141 |                                             int(bbox_pred.size(1) / 4), 4)
142 |             bbox_pred_select = torch.gather(
143 |                 bbox_pred_view, 1,
144 |                 rois_label.view(rois_label.size(0), 1,
145 |                                 1).expand(rois_label.size(0), 1, 4))
146 | 
147 |             bbox_pred = bbox_pred_select.squeeze(1)
148 | 
149 | 
150 |         # compute object classification probability
151 |         cls_score = self.RCNN_cls_score(pooled_feat)
152 |         cls_prob = F.softmax(cls_score, 1)
153 | 
154 |         RCNN_loss_cls = 0
155 |         RCNN_loss_bbox = 0
156 | 
157 | 
158 |         if self.training:
159 |             # classification loss
160 |             # RCNN_loss_cls = OHEM_loss(cls_score,rois_label)
161 |             # from collections import  Counter
162 |             # label = rois_label.cpu().numpy()
163 |             # print(Counter(label))
164 | 
165 |             loss = -F.log_softmax(cls_score, dim=1)[:, 0]
166 |             mask, num_pos = hard_negative_mining(loss, rois_label)
167 |             confidence = cls_score[mask, :]
168 |             RCNN_loss_cls = F.cross_entropy(confidence, rois_label[mask], reduction='mean')
169 | 
170 | 
171 |             # bounding box regression L1 loss
172 | 
173 |             RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target, rois_inside_ws, rois_outside_ws)
174 |             RCNN_loss_bbox = RCNN_loss_bbox * 2  # "to balance multi-task training"
175 | 
176 |         cls_prob = cls_prob.view(batch_size, rois.size(1), -1)
177 |         bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1)
178 | 
179 |         subnet_time = time.time()
180 |         self.subnet_time = subnet_time - roi_pool_time
181 |         time_measure = [
182 |             self.rpn_time, self.pre_roi_time, self.roi_pooling_time,
183 |             self.subnet_time
184 |         ]
185 | 
186 |         return time_measure, rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label
187 | 
188 |     def _init_weights(self):
189 |         def normal_init(m, mean, stddev, truncated=False):
190 |             """
191 |             weight initalizer: truncated normal and random normal.
192 |             """
193 |             # x is a parameter
194 |             if truncated:
195 |                 # not a perfect approximation
196 |                 m.weight.data.normal_().fmod_(2).mul_(stddev).add_(mean)
197 |             else:
198 |                 m.weight.data.normal_(mean, stddev)
199 |                 m.bias.data.zero_()
200 | 
201 |         normal_init(self.RCNN_rpn.RPN_cls_score, 0, 0.01, cfg.TRAIN.TRUNCATED)
202 |         normal_init(self.RCNN_rpn.RPN_bbox_pred, 0, 0.01, cfg.TRAIN.TRUNCATED)
203 | 
204 |         # normal_init(self.RCNN_rpn.RPN_hm_score, 0, 0.01, cfg.TRAIN.TRUNCATED)
205 |         # normal_init(self.RCNN_rpn.PRN_wh_score, 0, 0.01, cfg.TRAIN.TRUNCATED)
206 |         # normal_init(self.RCNN_rpn.PRN_offset_score, 0, 0.01, cfg.TRAIN.TRUNCATED)
207 | 
208 | 
209 |         normal_init(self.RCNN_cls_score, 0, 0.01, cfg.TRAIN.TRUNCATED)
210 |         normal_init(self.RCNN_bbox_pred, 0, 0.001, cfg.TRAIN.TRUNCATED)
211 | 
212 |     def create_architecture(self):
213 |         self._init_modules()
214 |         self._init_weights()
215 | 


--------------------------------------------------------------------------------
/lib/model/loss/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DayBreak-u/Thundernet_Pytorch/ac359d128a44e566ba5852a830c0a2154e10edb2/lib/model/loss/__init__.py


--------------------------------------------------------------------------------
/lib/model/rpn/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DayBreak-u/Thundernet_Pytorch/ac359d128a44e566ba5852a830c0a2154e10edb2/lib/model/rpn/__init__.py


--------------------------------------------------------------------------------
/lib/model/rpn/bbox_transform.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import torch
  3 | import numpy as np
  4 | import pdb
  5 | 
  6 | def bbox_transform(ex_rois, gt_rois):
  7 |     ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + 1.0
  8 |     ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + 1.0
  9 |     ex_ctr_x = ex_rois[:, 0] + 0.5 * ex_widths
 10 |     ex_ctr_y = ex_rois[:, 1] + 0.5 * ex_heights
 11 | 
 12 |     gt_widths = gt_rois[:, 2] - gt_rois[:, 0] + 1.0
 13 |     gt_heights = gt_rois[:, 3] - gt_rois[:, 1] + 1.0
 14 |     gt_ctr_x = gt_rois[:, 0] + 0.5 * gt_widths
 15 |     gt_ctr_y = gt_rois[:, 1] + 0.5 * gt_heights
 16 | 
 17 |     targets_dx = (gt_ctr_x - ex_ctr_x) / ex_widths
 18 |     targets_dy = (gt_ctr_y - ex_ctr_y) / ex_heights
 19 |     targets_dw = torch.log(gt_widths / ex_widths)
 20 |     targets_dh = torch.log(gt_heights / ex_heights)
 21 | 
 22 |     targets = torch.stack(
 23 |         (targets_dx, targets_dy, targets_dw, targets_dh),1)
 24 | 
 25 |     return targets
 26 | 
 27 | def bbox_transform_batch(ex_rois, gt_rois):
 28 | 
 29 |     if ex_rois.dim() == 2:
 30 |         ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + 1.0
 31 |         ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + 1.0
 32 |         ex_ctr_x = ex_rois[:, 0] + 0.5 * ex_widths
 33 |         ex_ctr_y = ex_rois[:, 1] + 0.5 * ex_heights
 34 | 
 35 |         gt_widths = gt_rois[:, :, 2] - gt_rois[:, :, 0] + 1.0
 36 |         gt_heights = gt_rois[:, :, 3] - gt_rois[:, :, 1] + 1.0
 37 |         gt_ctr_x = gt_rois[:, :, 0] + 0.5 * gt_widths
 38 |         gt_ctr_y = gt_rois[:, :, 1] + 0.5 * gt_heights
 39 | 
 40 |         targets_dx = (gt_ctr_x - ex_ctr_x.view(1,-1).expand_as(gt_ctr_x)) / ex_widths
 41 |         targets_dy = (gt_ctr_y - ex_ctr_y.view(1,-1).expand_as(gt_ctr_y)) / ex_heights
 42 |         targets_dw = torch.log(gt_widths / ex_widths.view(1,-1).expand_as(gt_widths))
 43 |         targets_dh = torch.log(gt_heights / ex_heights.view(1,-1).expand_as(gt_heights))
 44 | 
 45 |     elif ex_rois.dim() == 3:
 46 |         ex_widths = ex_rois[:, :, 2] - ex_rois[:, :, 0] + 1.0
 47 |         ex_heights = ex_rois[:,:, 3] - ex_rois[:,:, 1] + 1.0
 48 |         ex_ctr_x = ex_rois[:, :, 0] + 0.5 * ex_widths
 49 |         ex_ctr_y = ex_rois[:, :, 1] + 0.5 * ex_heights
 50 | 
 51 |         gt_widths = gt_rois[:, :, 2] - gt_rois[:, :, 0] + 1.0
 52 |         gt_heights = gt_rois[:, :, 3] - gt_rois[:, :, 1] + 1.0
 53 |         gt_ctr_x = gt_rois[:, :, 0] + 0.5 * gt_widths
 54 |         gt_ctr_y = gt_rois[:, :, 1] + 0.5 * gt_heights
 55 | 
 56 |         targets_dx = (gt_ctr_x - ex_ctr_x) / ex_widths
 57 |         targets_dy = (gt_ctr_y - ex_ctr_y) / ex_heights
 58 |         targets_dw = torch.log(gt_widths / ex_widths)
 59 |         targets_dh = torch.log(gt_heights / ex_heights)
 60 |     else:
 61 |         raise ValueError('ex_roi input dimension is not correct.')
 62 | 
 63 |     targets = torch.stack(
 64 |         (targets_dx, targets_dy, targets_dw, targets_dh),2)
 65 | 
 66 |     return targets
 67 | 
 68 | def bbox_transform_inv(boxes, deltas, batch_size):
 69 |     widths = boxes[:, :, 2] - boxes[:, :, 0] + 1.0
 70 |     heights = boxes[:, :, 3] - boxes[:, :, 1] + 1.0
 71 |     ctr_x = boxes[:, :, 0] + 0.5 * widths
 72 |     ctr_y = boxes[:, :, 1] + 0.5 * heights
 73 | 
 74 |     dx = deltas[:, :, 0::4]
 75 |     dy = deltas[:, :, 1::4]
 76 |     dw = deltas[:, :, 2::4]
 77 |     dh = deltas[:, :, 3::4]
 78 | 
 79 |     pred_ctr_x = dx * widths.unsqueeze(2) + ctr_x.unsqueeze(2)
 80 |     pred_ctr_y = dy * heights.unsqueeze(2) + ctr_y.unsqueeze(2)
 81 |     pred_w = torch.exp(dw) * widths.unsqueeze(2)
 82 |     pred_h = torch.exp(dh) * heights.unsqueeze(2)
 83 | 
 84 |     pred_boxes = deltas.clone()
 85 |     # x1
 86 |     pred_boxes[:, :, 0::4] = pred_ctr_x - 0.5 * pred_w
 87 |     # y1
 88 |     pred_boxes[:, :, 1::4] = pred_ctr_y - 0.5 * pred_h
 89 |     # x2
 90 |     pred_boxes[:, :, 2::4] = pred_ctr_x + 0.5 * pred_w
 91 |     # y2
 92 |     pred_boxes[:, :, 3::4] = pred_ctr_y + 0.5 * pred_h
 93 | 
 94 |     return pred_boxes
 95 | 
 96 | def clip_boxes_batch(boxes, im_shape, batch_size):
 97 |     """
 98 |     Clip boxes to image boundaries.
 99 |     """
100 |     num_rois = boxes.size(1)
101 | 
102 |     boxes[boxes < 0] = 0
103 |     # batch_x = (im_shape[:,0]-1).view(batch_size, 1).expand(batch_size, num_rois)
104 |     # batch_y = (im_shape[:,1]-1).view(batch_size, 1).expand(batch_size, num_rois)
105 | 
106 |     batch_x = im_shape[:, 1] - 1
107 |     batch_y = im_shape[:, 0] - 1
108 | 
109 |     boxes[:,:,0][boxes[:,:,0] > batch_x] = batch_x
110 |     boxes[:,:,1][boxes[:,:,1] > batch_y] = batch_y
111 |     boxes[:,:,2][boxes[:,:,2] > batch_x] = batch_x
112 |     boxes[:,:,3][boxes[:,:,3] > batch_y] = batch_y
113 | 
114 |     return boxes
115 | 
116 | def clip_boxes(boxes, im_shape, batch_size):
117 | 
118 |     for i in range(batch_size):
119 |         boxes[i,:,0::4].clamp_(0, im_shape[i, 1]-1)
120 |         boxes[i,:,1::4].clamp_(0, im_shape[i, 0]-1)
121 |         boxes[i,:,2::4].clamp_(0, im_shape[i, 1]-1)
122 |         boxes[i,:,3::4].clamp_(0, im_shape[i, 0]-1)
123 | 
124 |     return boxes
125 | 
126 | 
127 | def bbox_overlaps(anchors, gt_boxes):
128 |     """
129 |     anchors: (N, 4) ndarray of float
130 |     gt_boxes: (K, 4) ndarray of float
131 | 
132 |     overlaps: (N, K) ndarray of overlap between boxes and query_boxes
133 |     """
134 |     N = anchors.size(0)
135 |     K = gt_boxes.size(0)
136 | 
137 |     gt_boxes_area = ((gt_boxes[:,2] - gt_boxes[:,0] + 1) *
138 |                 (gt_boxes[:,3] - gt_boxes[:,1] + 1)).view(1, K)
139 | 
140 |     anchors_area = ((anchors[:,2] - anchors[:,0] + 1) *
141 |                 (anchors[:,3] - anchors[:,1] + 1)).view(N, 1)
142 | 
143 |     boxes = anchors.view(N, 1, 4).expand(N, K, 4)
144 |     query_boxes = gt_boxes.view(1, K, 4).expand(N, K, 4)
145 | 
146 |     iw = (torch.min(boxes[:,:,2], query_boxes[:,:,2]) -
147 |         torch.max(boxes[:,:,0], query_boxes[:,:,0]) + 1)
148 |     iw[iw < 0] = 0
149 | 
150 |     ih = (torch.min(boxes[:,:,3], query_boxes[:,:,3]) -
151 |         torch.max(boxes[:,:,1], query_boxes[:,:,1]) + 1)
152 |     ih[ih < 0] = 0
153 | 
154 |     ua = anchors_area + gt_boxes_area - (iw * ih)
155 |     overlaps = iw * ih / ua
156 | 
157 |     return overlaps
158 | 
159 | def bbox_overlaps_batch(anchors, gt_boxes):
160 |     """
161 |     anchors: (N, 4) ndarray of float
162 |     gt_boxes: (b, K, 5) ndarray of float
163 | 
164 |     overlaps: (N, K) ndarray of overlap between boxes and query_boxes
165 |     """
166 |     batch_size = gt_boxes.size(0)
167 | 
168 | 
169 |     if anchors.dim() == 2:
170 | 
171 |         N = anchors.size(0)
172 |         K = gt_boxes.size(1)
173 | 
174 |         anchors = anchors.view(1, N, 4).expand(batch_size, N, 4).contiguous()
175 |         gt_boxes = gt_boxes[:,:,:4].contiguous()
176 | 
177 | 
178 |         gt_boxes_x = (gt_boxes[:,:,2] - gt_boxes[:,:,0] + 1)
179 |         gt_boxes_y = (gt_boxes[:,:,3] - gt_boxes[:,:,1] + 1)
180 |         gt_boxes_area = (gt_boxes_x * gt_boxes_y).view(batch_size, 1, K)
181 | 
182 |         anchors_boxes_x = (anchors[:,:,2] - anchors[:,:,0] + 1)
183 |         anchors_boxes_y = (anchors[:,:,3] - anchors[:,:,1] + 1)
184 |         anchors_area = (anchors_boxes_x * anchors_boxes_y).view(batch_size, N, 1)
185 | 
186 |         gt_area_zero = (gt_boxes_x == 1) & (gt_boxes_y == 1)
187 |         anchors_area_zero = (anchors_boxes_x == 1) & (anchors_boxes_y == 1)
188 | 
189 |         boxes = anchors.view(batch_size, N, 1, 4).expand(batch_size, N, K, 4)
190 |         query_boxes = gt_boxes.view(batch_size, 1, K, 4).expand(batch_size, N, K, 4)
191 | 
192 |         iw = (torch.min(boxes[:,:,:,2], query_boxes[:,:,:,2]) -
193 |             torch.max(boxes[:,:,:,0], query_boxes[:,:,:,0]) + 1)
194 |         iw[iw < 0] = 0
195 | 
196 |         ih = (torch.min(boxes[:,:,:,3], query_boxes[:,:,:,3]) -
197 |             torch.max(boxes[:,:,:,1], query_boxes[:,:,:,1]) + 1)
198 |         ih[ih < 0] = 0
199 |         ua = anchors_area + gt_boxes_area - (iw * ih)
200 |         overlaps = iw * ih / ua
201 | 
202 |         # mask the overlap here.
203 |         overlaps.masked_fill_(gt_area_zero.view(batch_size, 1, K).expand(batch_size, N, K), 0)
204 |         overlaps.masked_fill_(anchors_area_zero.view(batch_size, N, 1).expand(batch_size, N, K), -1)
205 | 
206 |     elif anchors.dim() == 3:
207 |         N = anchors.size(1)
208 |         K = gt_boxes.size(1)
209 | 
210 |         if anchors.size(2) == 4:
211 |             anchors = anchors[:,:,:4].contiguous()
212 |         else:
213 |             anchors = anchors[:,:,1:5].contiguous()
214 | 
215 |         gt_boxes = gt_boxes[:,:,:4].contiguous()
216 | 
217 |         gt_boxes_x = (gt_boxes[:,:,2] - gt_boxes[:,:,0] + 1)
218 |         gt_boxes_y = (gt_boxes[:,:,3] - gt_boxes[:,:,1] + 1)
219 |         gt_boxes_area = (gt_boxes_x * gt_boxes_y).view(batch_size, 1, K)
220 | 
221 |         anchors_boxes_x = (anchors[:,:,2] - anchors[:,:,0] + 1)
222 |         anchors_boxes_y = (anchors[:,:,3] - anchors[:,:,1] + 1)
223 |         anchors_area = (anchors_boxes_x * anchors_boxes_y).view(batch_size, N, 1)
224 | 
225 |         gt_area_zero = (gt_boxes_x == 1) & (gt_boxes_y == 1)
226 |         anchors_area_zero = (anchors_boxes_x == 1) & (anchors_boxes_y == 1)
227 | 
228 |         boxes = anchors.view(batch_size, N, 1, 4).expand(batch_size, N, K, 4)
229 |         query_boxes = gt_boxes.view(batch_size, 1, K, 4).expand(batch_size, N, K, 4)
230 | 
231 |         iw = (torch.min(boxes[:,:,:,2], query_boxes[:,:,:,2]) -
232 |             torch.max(boxes[:,:,:,0], query_boxes[:,:,:,0]) + 1)
233 |         iw[iw < 0] = 0
234 | 
235 |         ih = (torch.min(boxes[:,:,:,3], query_boxes[:,:,:,3]) -
236 |             torch.max(boxes[:,:,:,1], query_boxes[:,:,:,1]) + 1)
237 |         ih[ih < 0] = 0
238 |         ua = anchors_area + gt_boxes_area - (iw * ih)
239 | 
240 |         overlaps = iw * ih / ua
241 | 
242 |         # mask the overlap here.
243 |         overlaps.masked_fill_(gt_area_zero.view(batch_size, 1, K).expand(batch_size, N, K), 0)
244 |         overlaps.masked_fill_(anchors_area_zero.view(batch_size, N, 1).expand(batch_size, N, K), -1)
245 |     else:
246 |         raise ValueError('anchors input dimension is not correct.')
247 | 
248 |     return overlaps
249 | 


--------------------------------------------------------------------------------
/lib/model/rpn/centernet_rpn.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | 
  3 | import torch
  4 | import torch.nn as nn
  5 | import torch.nn.functional as F
  6 | from model.utils.config import cfg
  7 | from model.loss.losses import FocalLoss,RegL1Loss
  8 | from torch.autograd import Variable
  9 | from  model.utils.cente_decode import ctdet_decode
 10 | 
 11 | 
 12 | class _ProposalLayer(nn.Module):
 13 |     """
 14 |     Outputs object detection proposals by applying estimated bounding-box
 15 |     transformations to a set of regular boxes (called "anchors").
 16 |     """
 17 | 
 18 |     def __init__(self, feat_stride):
 19 |         super(_ProposalLayer, self).__init__()
 20 | 
 21 |         self._feat_stride = feat_stride
 22 | 
 23 | 
 24 |         # rois blob: holds R regions of interest, each is a 5-tuple
 25 |         # (n, x1, y1, x2, y2) specifying an image batch index n and a
 26 |         # rectangle (x1, y1, x2, y2)
 27 |         # top[0].reshape(1, 5)
 28 |         #
 29 |         # # scores blob: holds scores for R regions of interest
 30 |         # if len(top) > 1:
 31 |         #     top[1].reshape(1, 1, 1, 1)
 32 | 
 33 |     def forward(self, input):
 34 | 
 35 |         # Algorithm:
 36 |         #
 37 |         # for each (H, W) location i
 38 |         #   generate A anchor boxes centered on cell i
 39 |         #   apply predicted bbox deltas at cell i to each of the A anchors
 40 |         # clip predicted boxes to image
 41 |         # remove predicted boxes with either height or width < threshold
 42 |         # sort all (proposal, score) pairs by score from highest to lowest
 43 |         # take top pre_nms_topN proposals before NMS
 44 |         # apply NMS with threshold 0.7 to remaining proposals
 45 |         # take after_nms_topN proposals after NMS
 46 |         # return the top proposals (-> RoIs top, scores top)
 47 | 
 48 | 
 49 |         # the first set of _num_anchors channels are bg probs
 50 |         # the second set are the fg probs
 51 |         scores = input[0]
 52 |         wh_deltas = input[1]
 53 |         offset_deltas = input[2]
 54 |         im_info = input[3]
 55 |         cfg_key = input[4]
 56 | 
 57 | 
 58 | 
 59 |         post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
 60 | 
 61 |         detections = ctdet_decode(scores,wh_deltas,offset_deltas,K=post_nms_topN)
 62 | 
 63 | 
 64 | 
 65 | 
 66 |         detections[:, :, :4] *=  self._feat_stride
 67 |         batch_size = scores.size(0)
 68 | 
 69 | 
 70 | 
 71 |         output = scores.new(batch_size, post_nms_topN, 5).zero_()
 72 | 
 73 |         for i in range(batch_size):
 74 |             # # 3. remove predicted boxes with either height or width < threshold
 75 |             # # (NOTE: convert min_size to input image scale stored in im_info[2])
 76 | 
 77 |             output[i,:,0] = i
 78 | 
 79 |             output[i,:,1:] = detections[i,:,:4]
 80 | 
 81 |         return output
 82 | 
 83 |     def backward(self, top, propagate_down, bottom):
 84 |         """This layer does not propagate gradients."""
 85 |         pass
 86 | 
 87 |     def reshape(self, bottom, top):
 88 |         """Reshaping happens during the call to forward."""
 89 |         pass
 90 | 
 91 |     def _filter_boxes(self, boxes, min_size):
 92 |         """Remove all boxes with any side smaller than min_size."""
 93 |         ws = boxes[:, :, 2] - boxes[:, :, 0] + 1
 94 |         hs = boxes[:, :, 3] - boxes[:, :, 1] + 1
 95 |         keep = ((ws >= min_size.view(-1,1).expand_as(ws)) & (hs >= min_size.view(-1,1).expand_as(hs)))
 96 |         return keep
 97 | 
 98 | 
 99 | 
100 | 
101 | class _RPN(nn.Module):
102 |     """ region proposal network """
103 |     def __init__(self, din):
104 |         super(_RPN, self).__init__()
105 | 
106 |         self.din = din  # get depth of input feature map, e.g., 512
107 | 
108 |         self.feat_stride = cfg.FEAT_STRIDE
109 | 
110 | 
111 |         self.RPN_hm_score = nn.Conv2d(self.din, 1 , 1, 1, 0)
112 |         self.PRN_wh_score = nn.Conv2d(self.din, 2 , 1, 1, 0)
113 |         self.PRN_offset_score = nn.Conv2d(self.din, 2 , 1, 1, 0)
114 |         self.RPN_proposal = _ProposalLayer(self.feat_stride)
115 | 
116 |         self.crit = FocalLoss()
117 |         # self.crit =torch.nn.MSELoss()
118 |         self.crit_offset = RegL1Loss()
119 |         self.crit_wh = RegL1Loss()
120 | 
121 |         self.rpn_loss_hm = 0
122 |         self.rpn_loss_wh = 0
123 |         self.rpn_loss_offset = 0
124 | 
125 | 
126 |     @staticmethod
127 |     def reshape(x, d):
128 |         input_shape = x.size()
129 |         x = x.view(input_shape[0], int(d),
130 |                    int(float(input_shape[1] * input_shape[2]) / float(d)),
131 |                    input_shape[3])
132 |         return x
133 | 
134 |     def forward(self, base_feat, im_info, gt_boxes, num_boxes,hm,reg_mask,wh,offset,ind):
135 | 
136 |         batch_size = base_feat.size(0)
137 | 
138 |         rpn_hm_score = self.RPN_hm_score(base_feat)
139 |         rpn_cls_prob = F.sigmoid(rpn_hm_score)
140 |         rpn_wh_pred = self.PRN_wh_score(base_feat)
141 |         rpn_offset_pred = self.PRN_offset_score(base_feat)
142 | 
143 |         cfg_key = 'TRAIN' if self.training else 'TEST'
144 | 
145 | 
146 | 
147 |         self.rpn_loss_cls = 0
148 |         self.rpn_loss_box = 0
149 | 
150 |         # generating training labels and build the rpn loss
151 |         if self.training:
152 |             assert gt_boxes is not None
153 | 
154 |             hm_loss =  self.crit(rpn_cls_prob, hm)
155 | 
156 |             offset_loss = self.crit_offset(rpn_offset_pred, reg_mask,
157 |                           ind, offset)
158 | 
159 |             wh_loss = self.crit_wh(rpn_wh_pred, reg_mask,
160 |                                        ind, wh)
161 | 
162 |             self.rpn_loss_cls = hm_loss + offset_loss
163 |             self.rpn_loss_box =  wh_loss
164 | 
165 |         rois = self.RPN_proposal(
166 |             (rpn_cls_prob, rpn_wh_pred, rpn_offset_pred, im_info, cfg_key))
167 |         return rois, self.rpn_loss_cls, self.rpn_loss_box
168 | 


--------------------------------------------------------------------------------
/lib/model/rpn/generate_anchors.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | 
  3 | import numpy as np
  4 | import pdb
  5 | 
  6 | # Verify that we compute the same anchors as Shaoqing's matlab implementation:
  7 | #
  8 | #    >> load output/rpn_cachedir/faster_rcnn_VOC2007_ZF_stage1_rpn/anchors.mat
  9 | #    >> anchors
 10 | #
 11 | #    anchors =
 12 | #
 13 | #       -83   -39   100    56
 14 | #      -175   -87   192   104
 15 | #      -359  -183   376   200
 16 | #       -55   -55    72    72
 17 | #      -119  -119   136   136
 18 | #      -247  -247   264   264
 19 | #       -35   -79    52    96
 20 | #       -79  -167    96   184
 21 | #      -167  -343   184   360
 22 | 
 23 | #array([[ -83.,  -39.,  100.,   56.],
 24 | #       [-175.,  -87.,  192.,  104.],
 25 | #       [-359., -183.,  376.,  200.],
 26 | #       [ -55.,  -55.,   72.,   72.],
 27 | #       [-119., -119.,  136.,  136.],
 28 | #       [-247., -247.,  264.,  264.],
 29 | #       [ -35.,  -79.,   52.,   96.],
 30 | #       [ -79., -167.,   96.,  184.],
 31 | #       [-167., -343.,  184.,  360.]])
 32 | 
 33 | try:
 34 |     xrange          # Python 2
 35 | except NameError:
 36 |     xrange = range  # Python 3
 37 | 
 38 | 
 39 | def generate_anchors(base_size=16, ratios=[0.5, 1, 2],
 40 |                      scales=2**np.arange(3, 6)):
 41 |     """
 42 |     Generate anchor (reference) windows by enumerating aspect ratios X
 43 |     scales wrt a reference (0, 0, 15, 15) window.
 44 |     """
 45 | 
 46 |     base_anchor = np.array([1, 1, base_size, base_size]) - 1
 47 |     ratio_anchors = _ratio_enum(base_anchor, ratios)
 48 |     anchors = np.vstack([_scale_enum(ratio_anchors[i, :], scales)
 49 |                          for i in xrange(ratio_anchors.shape[0])])
 50 |     return anchors
 51 | 
 52 | def _whctrs(anchor):
 53 |     """
 54 |     Return width, height, x center, and y center for an anchor (window).
 55 |     """
 56 | 
 57 |     w = anchor[2] - anchor[0] + 1
 58 |     h = anchor[3] - anchor[1] + 1
 59 |     x_ctr = anchor[0] + 0.5 * (w - 1)
 60 |     y_ctr = anchor[1] + 0.5 * (h - 1)
 61 |     return w, h, x_ctr, y_ctr
 62 | 
 63 | def _mkanchors(ws, hs, x_ctr, y_ctr):
 64 |     """
 65 |     Given a vector of widths (ws) and heights (hs) around a center
 66 |     (x_ctr, y_ctr), output a set of anchors (windows).
 67 |     """
 68 | 
 69 |     ws = ws[:, np.newaxis]
 70 |     hs = hs[:, np.newaxis]
 71 |     anchors = np.hstack((x_ctr - 0.5 * (ws - 1),
 72 |                          y_ctr - 0.5 * (hs - 1),
 73 |                          x_ctr + 0.5 * (ws - 1),
 74 |                          y_ctr + 0.5 * (hs - 1)))
 75 |     return anchors
 76 | 
 77 | def _ratio_enum(anchor, ratios):
 78 |     """
 79 |     Enumerate a set of anchors for each aspect ratio wrt an anchor.
 80 |     """
 81 | 
 82 |     w, h, x_ctr, y_ctr = _whctrs(anchor)
 83 |     size = w * h
 84 |     size_ratios = size / ratios
 85 |     ws = np.round(np.sqrt(size_ratios))
 86 |     hs = np.round(ws * ratios)
 87 |     anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
 88 |     return anchors
 89 | 
 90 | def _scale_enum(anchor, scales):
 91 |     """
 92 |     Enumerate a set of anchors for each scale wrt an anchor.
 93 |     """
 94 | 
 95 |     w, h, x_ctr, y_ctr = _whctrs(anchor)
 96 |     ws = w * scales
 97 |     hs = h * scales
 98 |     anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
 99 |     return anchors
100 | 
101 | if __name__ == '__main__':
102 |     import time
103 |     t = time.time()
104 |     a = generate_anchors()
105 |     print(time.time() - t)
106 |     print(a)
107 |     from IPython import embed; embed()
108 | 


--------------------------------------------------------------------------------
/lib/model/rpn/proposal_layer.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | # --------------------------------------------------------
  3 | # Faster R-CNN
  4 | # Copyright (c) 2015 Microsoft
  5 | # Licensed under The MIT License [see LICENSE for details]
  6 | # Written by Ross Girshick and Sean Bell
  7 | # --------------------------------------------------------
  8 | # --------------------------------------------------------
  9 | # Reorganized and modified by Jianwei Yang and Jiasen Lu
 10 | # --------------------------------------------------------
 11 | 
 12 | import torch
 13 | import torch.nn as nn
 14 | import numpy as np
 15 | import math
 16 | import yaml
 17 | from model.utils.config import cfg
 18 | from .generate_anchors import generate_anchors
 19 | from .bbox_transform import bbox_transform_inv, clip_boxes, clip_boxes_batch
 20 | from torchvision.ops import nms
 21 | from external.nms import soft_nms
 22 | import pdb
 23 | 
 24 | DEBUG = False
 25 | 
 26 | class _ProposalLayer(nn.Module):
 27 |     """
 28 |     Outputs object detection proposals by applying estimated bounding-box
 29 |     transformations to a set of regular boxes (called "anchors").
 30 |     """
 31 | 
 32 |     def __init__(self, feat_stride, scales, ratios):
 33 |         super(_ProposalLayer, self).__init__()
 34 | 
 35 |         self._feat_stride = feat_stride
 36 |         self._anchors = torch.from_numpy(generate_anchors(scales=np.array(scales),
 37 |             ratios=np.array(ratios))).float()
 38 |         self._num_anchors = self._anchors.size(0)
 39 | 
 40 |         # rois blob: holds R regions of interest, each is a 5-tuple
 41 |         # (n, x1, y1, x2, y2) specifying an image batch index n and a
 42 |         # rectangle (x1, y1, x2, y2)
 43 |         # top[0].reshape(1, 5)
 44 |         #
 45 |         # # scores blob: holds scores for R regions of interest
 46 |         # if len(top) > 1:
 47 |         #     top[1].reshape(1, 1, 1, 1)
 48 | 
 49 |     def forward(self, input):
 50 | 
 51 |         # Algorithm:
 52 |         #
 53 |         # for each (H, W) location i
 54 |         #   generate A anchor boxes centered on cell i
 55 |         #   apply predicted bbox deltas at cell i to each of the A anchors
 56 |         # clip predicted boxes to image
 57 |         # remove predicted boxes with either height or width < threshold
 58 |         # sort all (proposal, score) pairs by score from highest to lowest
 59 |         # take top pre_nms_topN proposals before NMS
 60 |         # apply NMS with threshold 0.7 to remaining proposals
 61 |         # take after_nms_topN proposals after NMS
 62 |         # return the top proposals (-> RoIs top, scores top)
 63 | 
 64 | 
 65 |         # the first set of _num_anchors channels are bg probs
 66 |         # the second set are the fg probs
 67 |         scores = input[0][:, self._num_anchors:, :, :]
 68 |         bbox_deltas = input[1]
 69 |         im_info = input[2]
 70 |         cfg_key = input[3]
 71 | 
 72 |         pre_nms_topN  = cfg[cfg_key].RPN_PRE_NMS_TOP_N
 73 |         post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
 74 |         nms_thresh    = cfg[cfg_key].RPN_NMS_THRESH
 75 |         min_size      = cfg[cfg_key].RPN_MIN_SIZE
 76 | 
 77 |         batch_size = bbox_deltas.size(0)
 78 | 
 79 |         feat_height, feat_width = scores.size(2), scores.size(3)
 80 |         shift_x = np.arange(0, feat_width) * self._feat_stride
 81 |         shift_y = np.arange(0, feat_height) * self._feat_stride
 82 |         shift_x, shift_y = np.meshgrid(shift_x, shift_y)
 83 |         shifts = torch.from_numpy(np.vstack((shift_x.ravel(), shift_y.ravel(),
 84 |                                   shift_x.ravel(), shift_y.ravel())).transpose())
 85 |         shifts = shifts.contiguous().type_as(scores).float()
 86 | 
 87 |         A = self._num_anchors
 88 |         K = shifts.size(0)
 89 | 
 90 |         self._anchors = self._anchors.type_as(scores)
 91 |         # anchors = self._anchors.view(1, A, 4) + shifts.view(1, K, 4).permute(1, 0, 2).contiguous()
 92 |         anchors = self._anchors.view(1, A, 4) + shifts.view(K, 1, 4)
 93 |         anchors = anchors.view(1, K * A, 4).expand(batch_size, K * A, 4)
 94 | 
 95 |         # Transpose and reshape predicted bbox transformations to get them
 96 |         # into the same order as the anchors:
 97 | 
 98 |         bbox_deltas = bbox_deltas.permute(0, 2, 3, 1).contiguous()
 99 |         bbox_deltas = bbox_deltas.view(batch_size, -1, 4)
100 | 
101 |         # Same story for the scores:
102 |         scores = scores.permute(0, 2, 3, 1).contiguous()
103 |         scores = scores.view(batch_size, -1)
104 | 
105 |         # Convert anchors into proposals via bbox transformations
106 |         proposals = bbox_transform_inv(anchors, bbox_deltas, batch_size)
107 | 
108 |         # 2. clip predicted boxes to image
109 |         proposals = clip_boxes(proposals, im_info, batch_size)
110 |         # proposals = clip_boxes_batch(proposals, im_info, batch_size)
111 | 
112 |         # assign the score to 0 if it's non keep.
113 |         # keep = self._filter_boxes(proposals, min_size * im_info[:, 2])
114 | 
115 |         # trim keep index to make it euqal over batch
116 |         # keep_idx = torch.cat(tuple(keep_idx), 0)
117 | 
118 |         # scores_keep = scores.view(-1)[keep_idx].view(batch_size, trim_size)
119 |         # proposals_keep = proposals.view(-1, 4)[keep_idx, :].contiguous().view(batch_size, trim_size, 4)
120 | 
121 |         # _, order = torch.sort(scores_keep, 1, True)
122 | 
123 |         scores_keep = scores
124 |         proposals_keep = proposals
125 |         _, order = torch.sort(scores_keep, 1, True)
126 | 
127 |         output = scores.new(batch_size, post_nms_topN, 5).zero_()
128 |         for i in range(batch_size):
129 |             # # 3. remove predicted boxes with either height or width < threshold
130 |             # # (NOTE: convert min_size to input image scale stored in im_info[2])
131 |             proposals_single = proposals_keep[i]
132 |             scores_single = scores_keep[i]
133 | 
134 |             # # 4. sort all (proposal, score) pairs by score from highest to lowest
135 |             # # 5. take top pre_nms_topN (e.g. 6000)
136 |             order_single = order[i]
137 | 
138 |             if pre_nms_topN > 0 and pre_nms_topN < scores_keep.numel():
139 |                 order_single = order_single[:pre_nms_topN]
140 | 
141 |             proposals_single = proposals_single[order_single, :]
142 |             scores_single = scores_single[order_single].view(-1,1)
143 | 
144 |             # 6. apply nms (e.g. threshold = 0.7)
145 |             # 7. take after_nms_topN (e.g. 300)
146 |             # 8. return the top proposals (-> RoIs top)
147 |             keep_idx_i = nms(proposals_single, scores_single.squeeze(1), nms_thresh)
148 | 
149 |             # keep_idx_i = soft_nms(torch.cat((proposals_single, scores_single), 1).cpu().numpy(), Nt=0.5, method=2)
150 |             # keep_idx_i = torch.as_tensor(keep_idx_i, dtype=torch.long)
151 |             #
152 |             # keep_idx_i = keep_idx_i.long().view(-1)
153 | 
154 |             if post_nms_topN > 0:
155 |                 keep_idx_i = keep_idx_i[:post_nms_topN]
156 |             proposals_single = proposals_single[keep_idx_i, :]
157 |             scores_single = scores_single[keep_idx_i, :]
158 | 
159 |             # padding 0 at the end.
160 |             num_proposal = proposals_single.size(0)
161 |             output[i,:,0] = i
162 |             output[i,:num_proposal,1:] = proposals_single
163 | 
164 |         return output
165 | 
166 |     def backward(self, top, propagate_down, bottom):
167 |         """This layer does not propagate gradients."""
168 |         pass
169 | 
170 |     def reshape(self, bottom, top):
171 |         """Reshaping happens during the call to forward."""
172 |         pass
173 | 
174 |     def _filter_boxes(self, boxes, min_size):
175 |         """Remove all boxes with any side smaller than min_size."""
176 |         ws = boxes[:, :, 2] - boxes[:, :, 0] + 1
177 |         hs = boxes[:, :, 3] - boxes[:, :, 1] + 1
178 |         keep = ((ws >= min_size.view(-1,1).expand_as(ws)) & (hs >= min_size.view(-1,1).expand_as(hs)))
179 |         return keep
180 | 


--------------------------------------------------------------------------------
/lib/model/rpn/rpn.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | 
  3 | import torch
  4 | import torch.nn as nn
  5 | import torch.nn.functional as F
  6 | from model.utils.config import cfg
  7 | from model.loss.losses import _smooth_l1_loss
  8 | from torch.autograd import Variable
  9 | 
 10 | from .anchor_target_layer import _AnchorTargetLayer
 11 | from .proposal_layer import _ProposalLayer
 12 | from model.loss.losses import  hard_negative_mining
 13 | 
 14 | 
 15 | 
 16 | 
 17 | class _RPN(nn.Module):
 18 |     """ region proposal network """
 19 |     def __init__(self, din):
 20 |         super(_RPN, self).__init__()
 21 | 
 22 |         self.din = din  # get depth of input feature map, e.g., 512
 23 |         self.anchor_scales = cfg.ANCHOR_SCALES
 24 |         self.anchor_ratios = cfg.ANCHOR_RATIOS
 25 |         self.feat_stride = cfg.FEAT_STRIDE
 26 | 
 27 |         # define the convrelu layers processing input feature map
 28 |         # self.RPN_Conv = nn.Conv2d(self.din, 512, 3, 1, 1, bias=True)
 29 | 
 30 |         # define bg/fg classifcation score layer
 31 |         self.nc_score_out = len(self.anchor_scales) * len(
 32 |             self.anchor_ratios) * 2  # 2(bg/fg) * 9 (anchors)
 33 | 
 34 |         self.RPN_cls_score = nn.Conv2d(self.din, self.nc_score_out, 1, 1, 0)
 35 | 
 36 |         # define anchor box offset prediction layer
 37 |         self.nc_bbox_out = len(self.anchor_scales) * len(
 38 |             self.anchor_ratios) * 4  # 4(coords) * 9 (anchors)
 39 |         self.RPN_bbox_pred = nn.Conv2d(self.din, self.nc_bbox_out, 1, 1, 0)
 40 | 
 41 |         # define proposal layer
 42 |         self.RPN_proposal = _ProposalLayer(self.feat_stride,
 43 |                                            self.anchor_scales,
 44 |                                            self.anchor_ratios)
 45 | 
 46 |         # define anchor target layer
 47 |         self.RPN_anchor_target = _AnchorTargetLayer(self.feat_stride,
 48 |                                                     self.anchor_scales,
 49 |                                                     self.anchor_ratios)
 50 | 
 51 |         self.rpn_loss_cls = 0
 52 |         self.rpn_loss_box = 0
 53 | 
 54 | 
 55 | 
 56 |     @staticmethod
 57 |     def reshape(x, d):
 58 |         input_shape = x.size()
 59 |         x = x.view(input_shape[0], int(d),
 60 |                    int(float(input_shape[1] * input_shape[2]) / float(d)),
 61 |                    input_shape[3])
 62 |         return x
 63 | 
 64 |     def forward(self, base_feat, im_info, gt_boxes, num_boxes):
 65 |         batch_size = base_feat.size(0)
 66 | 
 67 |         # return feature map after convrelu layer
 68 |         # rpn_conv1 = F.relu(self.RPN_Conv(base_feat), inplace=True)
 69 |         # get rpn classification score
 70 |         rpn_cls_score = self.RPN_cls_score(base_feat)
 71 | 
 72 |         rpn_cls_score_reshape = self.reshape(rpn_cls_score, 2)
 73 |         rpn_cls_prob_reshape = F.softmax(rpn_cls_score_reshape, 1)
 74 |         rpn_cls_prob = self.reshape(rpn_cls_prob_reshape, self.nc_score_out)
 75 | 
 76 |         # get rpn offsets to the anchor boxes
 77 |         rpn_bbox_pred = self.RPN_bbox_pred(base_feat)
 78 | 
 79 |         # proposal layer
 80 |         cfg_key = 'TRAIN' if self.training else 'TEST'
 81 | 
 82 |         rois = self.RPN_proposal(
 83 |             (rpn_cls_prob.data, rpn_bbox_pred.data, im_info, cfg_key))
 84 | 
 85 |         self.rpn_loss_cls = 0
 86 |         self.rpn_loss_box = 0
 87 | 
 88 |         # generating training labels and build the rpn loss
 89 |         if self.training:
 90 |             assert gt_boxes is not None
 91 | 
 92 |             rpn_data = self.RPN_anchor_target(
 93 |                 (rpn_cls_score.data, gt_boxes, im_info, num_boxes))
 94 | 
 95 |             # compute classification loss
 96 |             rpn_cls_score = rpn_cls_score_reshape.permute(
 97 |                 0, 2, 3, 1).contiguous().view(batch_size, -1, 2)
 98 |             rpn_label = rpn_data[0].view(batch_size, -1)
 99 | 
100 |             rpn_keep = Variable(rpn_label.view(-1).ne(-1).nonzero().view(-1))
101 | 
102 |             rpn_cls_score = torch.index_select(rpn_cls_score.view(-1, 2), 0,
103 |                                                rpn_keep)
104 | 
105 |             rpn_label = torch.index_select(rpn_label.view(-1), 0,
106 |                                            rpn_keep.data)
107 |             rpn_label = Variable(rpn_label.long())
108 | 
109 |             # from collections import  Counter
110 |             # label = rpn_label.cpu().numpy()
111 |             # print(Counter(label))
112 | 
113 |             loss = -F.log_softmax(rpn_cls_score, dim=1)[:, 0]
114 |             mask ,num_pos = hard_negative_mining(loss, rpn_label)
115 |             confidence = rpn_cls_score[mask, :]
116 |             self.rpn_loss_cls = F.cross_entropy(confidence.reshape(-1, 2), rpn_label[mask], reduction='mean')
117 | 
118 |             # self.rpn_loss_cls = F.cross_entropy(rpn_cls_score, rpn_label)
119 |             # self.rpn_loss_cls = OHEM_loss(rpn_cls_score, rpn_label)
120 | 
121 | 
122 | 
123 | 
124 |             fg_cnt = torch.sum(rpn_label.data.ne(0))
125 | 
126 |             rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights = rpn_data[
127 |                 1:]
128 | 
129 |             # compute bbox regression loss
130 |             rpn_bbox_inside_weights = Variable(rpn_bbox_inside_weights)
131 |             rpn_bbox_outside_weights = Variable(rpn_bbox_outside_weights)
132 |             rpn_bbox_targets = Variable(rpn_bbox_targets)
133 | 
134 | 
135 | 
136 | 
137 |             self.rpn_loss_box = _smooth_l1_loss(rpn_bbox_pred,
138 |                                                 rpn_bbox_targets,
139 |                                                 rpn_bbox_inside_weights,
140 |                                                 rpn_bbox_outside_weights,
141 |                                                 sigma=3,
142 |                                                 dim=[1, 2, 3],
143 |                                            )
144 | 
145 | 
146 | 
147 |         return rois, self.rpn_loss_cls, self.rpn_loss_box
148 | 


--------------------------------------------------------------------------------
/lib/model/utils/.gitignore:
--------------------------------------------------------------------------------
1 | *.c
2 | *.cpp
3 | *.so
4 | 


--------------------------------------------------------------------------------
/lib/model/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DayBreak-u/Thundernet_Pytorch/ac359d128a44e566ba5852a830c0a2154e10edb2/lib/model/utils/__init__.py


--------------------------------------------------------------------------------
/lib/model/utils/bbox.pyx:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Fast R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Sergey Karayev
  6 | # --------------------------------------------------------
  7 | 
  8 | cimport cython
  9 | import numpy as np
 10 | cimport numpy as np
 11 | 
 12 | DTYPE = np.float
 13 | ctypedef np.float_t DTYPE_t
 14 | 
 15 | def bbox_overlaps(np.ndarray[DTYPE_t, ndim=2] boxes,
 16 |         np.ndarray[DTYPE_t, ndim=2] query_boxes):
 17 |     return bbox_overlaps_c(boxes, query_boxes)
 18 | 
 19 | cdef np.ndarray[DTYPE_t, ndim=2] bbox_overlaps_c(
 20 |         np.ndarray[DTYPE_t, ndim=2] boxes,
 21 |         np.ndarray[DTYPE_t, ndim=2] query_boxes):
 22 |     """
 23 |     Parameters
 24 |     ----------
 25 |     boxes: (N, 4) ndarray of float
 26 |     query_boxes: (K, 4) ndarray of float
 27 |     Returns
 28 |     -------
 29 |     overlaps: (N, K) ndarray of overlap between boxes and query_boxes
 30 |     """
 31 |     cdef unsigned int N = boxes.shape[0]
 32 |     cdef unsigned int K = query_boxes.shape[0]
 33 |     cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE)
 34 |     cdef DTYPE_t iw, ih, box_area
 35 |     cdef DTYPE_t ua
 36 |     cdef unsigned int k, n
 37 |     for k in range(K):
 38 |         box_area = (
 39 |             (query_boxes[k, 2] - query_boxes[k, 0] + 1) *
 40 |             (query_boxes[k, 3] - query_boxes[k, 1] + 1)
 41 |         )
 42 |         for n in range(N):
 43 |             iw = (
 44 |                 min(boxes[n, 2], query_boxes[k, 2]) -
 45 |                 max(boxes[n, 0], query_boxes[k, 0]) + 1
 46 |             )
 47 |             if iw > 0:
 48 |                 ih = (
 49 |                     min(boxes[n, 3], query_boxes[k, 3]) -
 50 |                     max(boxes[n, 1], query_boxes[k, 1]) + 1
 51 |                 )
 52 |                 if ih > 0:
 53 |                     ua = float(
 54 |                         (boxes[n, 2] - boxes[n, 0] + 1) *
 55 |                         (boxes[n, 3] - boxes[n, 1] + 1) +
 56 |                         box_area - iw * ih
 57 |                     )
 58 |                     overlaps[n, k] = iw * ih / ua
 59 |     return overlaps
 60 | 
 61 | 
 62 | def bbox_intersections(
 63 |         np.ndarray[DTYPE_t, ndim=2] boxes,
 64 |         np.ndarray[DTYPE_t, ndim=2] query_boxes):
 65 |     return bbox_intersections_c(boxes, query_boxes)
 66 | 
 67 | 
 68 | cdef np.ndarray[DTYPE_t, ndim=2] bbox_intersections_c(
 69 |         np.ndarray[DTYPE_t, ndim=2] boxes,
 70 |         np.ndarray[DTYPE_t, ndim=2] query_boxes):
 71 |     """
 72 |     For each query box compute the intersection ratio covered by boxes
 73 |     ----------
 74 |     Parameters
 75 |     ----------
 76 |     boxes: (N, 4) ndarray of float
 77 |     query_boxes: (K, 4) ndarray of float
 78 |     Returns
 79 |     -------
 80 |     overlaps: (N, K) ndarray of intersec between boxes and query_boxes
 81 |     """
 82 |     cdef unsigned int N = boxes.shape[0]
 83 |     cdef unsigned int K = query_boxes.shape[0]
 84 |     cdef np.ndarray[DTYPE_t, ndim=2] intersec = np.zeros((N, K), dtype=DTYPE)
 85 |     cdef DTYPE_t iw, ih, box_area
 86 |     cdef DTYPE_t ua
 87 |     cdef unsigned int k, n
 88 |     for k in range(K):
 89 |         box_area = (
 90 |             (query_boxes[k, 2] - query_boxes[k, 0] + 1) *
 91 |             (query_boxes[k, 3] - query_boxes[k, 1] + 1)
 92 |         )
 93 |         for n in range(N):
 94 |             iw = (
 95 |                 min(boxes[n, 2], query_boxes[k, 2]) -
 96 |                 max(boxes[n, 0], query_boxes[k, 0]) + 1
 97 |             )
 98 |             if iw > 0:
 99 |                 ih = (
100 |                     min(boxes[n, 3], query_boxes[k, 3]) -
101 |                     max(boxes[n, 1], query_boxes[k, 1]) + 1
102 |                 )
103 |                 if ih > 0:
104 |                     intersec[n, k] = iw * ih / box_area
105 |     return intersec


--------------------------------------------------------------------------------
/lib/model/utils/blob.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | """Blob helper functions."""
 8 | 
 9 | import numpy as np
10 | # from scipy.misc import imread, imresize
11 | import cv2
12 | 
13 | try:
14 |     xrange  # Python 2
15 | except NameError:
16 |     xrange = range  # Python 3
17 | 
18 | 
19 | def im_list_to_blob(ims):
20 |     """Convert a list of images into a network input.
21 | 
22 |     Assumes images are already prepared (means subtracted, BGR order, ...).
23 |     """
24 |     max_shape = np.array([im.shape for im in ims]).max(axis=0)
25 |     num_images = len(ims)
26 |     blob = np.zeros((num_images, max_shape[0], max_shape[1], 3),
27 |                     dtype=np.float32)
28 |     for i in xrange(num_images):
29 |         im = ims[i]
30 |         blob[i, 0:im.shape[0], 0:im.shape[1], :] = im
31 | 
32 |     return blob
33 | 
34 | 
35 | def prep_im_for_blob(im, pixel_means, target_size, max_size):
36 |     """Mean subtract and scale an image for use in a blob."""
37 | 
38 |     im = im.astype(np.float32, copy=False)
39 |     im -= pixel_means
40 |     # im = im[:, :, ::-1]
41 |     im_shape = im.shape
42 |     # print(target_size)
43 |     # print(im_shape)
44 |     im_size_min = np.min(im_shape[0:2])
45 |     im_size_max = np.max(im_shape[0:2])
46 |     im_scale = float(target_size) / float(im_size_min)
47 |     # Prevent the biggest axis from being more than MAX_SIZE
48 |     # if np.round(im_scale * im_size_max) > max_size:
49 |     #     im_scale = float(max_size) / float(im_size_max)
50 |     # im = imresize(im, im_scale)
51 |     im = cv2.resize(im,
52 |                     None,
53 |                     None,
54 |                     fx=im_scale,
55 |                     fy=im_scale,
56 |                     interpolation=cv2.INTER_LINEAR)
57 | 
58 |     return im, im_scale
59 | 


--------------------------------------------------------------------------------
/lib/model/utils/logger.py:
--------------------------------------------------------------------------------
 1 | # Code referenced from https://gist.github.com/gyglim/1f8dfb1b5c82627ae3efcfbbadb9f514
 2 | import tensorflow as tf
 3 | import numpy as np
 4 | import scipy.misc
 5 | try:
 6 |     from StringIO import StringIO  # Python 2.7
 7 | except ImportError:
 8 |     from io import BytesIO  # Python 3.x
 9 | 
10 | 
11 | class Logger(object):
12 |     def __init__(self, log_dir):
13 |         """Create a summary writer logging to log_dir."""
14 |         self.writer = tf.summary.FileWriter(log_dir)
15 | 
16 |     def scalar_summary(self, tag, value, step):
17 |         """Log a scalar variable."""
18 |         summary = tf.Summary(
19 |             value=[tf.Summary.Value(tag=tag, simple_value=value)])
20 |         self.writer.add_summary(summary, step)
21 | 
22 |     def image_summary(self, tag, images, step):
23 |         """Log a list of images."""
24 | 
25 |         img_summaries = []
26 |         for i, img in enumerate(images):
27 |             # Write the image to a string
28 |             try:
29 |                 s = StringIO()
30 |             except:
31 |                 s = BytesIO()
32 |             scipy.misc.toimage(img).save(s, format="png")
33 | 
34 |             # Create an Image object
35 |             img_sum = tf.Summary.Image(encoded_image_string=s.getvalue(),
36 |                                        height=img.shape[0],
37 |                                        width=img.shape[1])
38 |             # Create a Summary value
39 |             img_summaries.append(
40 |                 tf.Summary.Value(tag='%s/%d' % (tag, i), image=img_sum))
41 | 
42 |         # Create and write Summary
43 |         summary = tf.Summary(value=img_summaries)
44 |         self.writer.add_summary(summary, step)
45 | 
46 |     def histo_summary(self, tag, values, step, bins=1000):
47 |         """Log a histogram of the tensor of values."""
48 | 
49 |         # Create a histogram using numpy
50 |         counts, bin_edges = np.histogram(values, bins=bins)
51 | 
52 |         # Fill the fields of the histogram proto
53 |         hist = tf.HistogramProto()
54 |         hist.min = float(np.min(values))
55 |         hist.max = float(np.max(values))
56 |         hist.num = int(np.prod(values.shape))
57 |         hist.sum = float(np.sum(values))
58 |         hist.sum_squares = float(np.sum(values**2))
59 | 
60 |         # Drop the start of the first bin
61 |         bin_edges = bin_edges[1:]
62 | 
63 |         # Add bin edges and counts
64 |         for edge in bin_edges:
65 |             hist.bucket_limit.append(edge)
66 |         for c in counts:
67 |             hist.bucket.append(c)
68 | 
69 |         # Create and write Summary
70 |         summary = tf.Summary(value=[tf.Summary.Value(tag=tag, histo=hist)])
71 |         self.writer.add_summary(summary, step)
72 |         self.writer.flush()
73 | 


--------------------------------------------------------------------------------
/lib/model/utils/net_utils.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | from torch.autograd import Variable
  5 | import numpy as np
  6 | import torchvision.models as models
  7 | from model.utils.config import cfg
  8 | import cv2
  9 | 
 10 | 
 11 | def save_net(fname, net):
 12 |     import h5py
 13 |     h5f = h5py.File(fname, mode='w')
 14 |     for k, v in net.state_dict().items():
 15 |         h5f.create_dataset(k, data=v.cpu().numpy())
 16 | 
 17 | 
 18 | def load_net(fname, net):
 19 |     import h5py
 20 |     h5f = h5py.File(fname, mode='r')
 21 |     for k, v in net.state_dict().items():
 22 |         param = torch.from_numpy(np.asarray(h5f[k]))
 23 |         v.copy_(param)
 24 | 
 25 | 
 26 | def weights_normal_init(model, dev=0.01):
 27 |     if isinstance(model, list):
 28 |         for m in model:
 29 |             weights_normal_init(m, dev)
 30 |     else:
 31 |         for m in model.modules():
 32 |             if isinstance(m, nn.Conv2d):
 33 |                 m.weight.data.normal_(0.0, dev)
 34 |             elif isinstance(m, nn.Linear):
 35 |                 m.weight.data.normal_(0.0, dev)
 36 | 
 37 | 
 38 | def clip_gradient(model, clip_norm):
 39 |     """Computes a gradient clipping coefficient based on gradient norm."""
 40 |     totalnorm = 0
 41 |     for p in model.parameters():
 42 |         if p.requires_grad and p.grad is not None:
 43 |             modulenorm = p.grad.norm()
 44 |             totalnorm += modulenorm**2
 45 |     totalnorm = torch.sqrt(totalnorm).item()
 46 |     norm = (clip_norm / max(totalnorm, clip_norm))
 47 |     for p in model.parameters():
 48 |         if p.requires_grad and p.grad is not None:
 49 |             p.grad.mul_(norm)
 50 | 
 51 | 
 52 | def vis_detections(img, class_name,c ,  dets, thresh=0.8):
 53 |     for i in range(np.minimum(10, dets.shape[0])):
 54 |         bbox = tuple(int(np.round(x)) for x in dets[i, :4])
 55 |         score = dets[i, -1]
 56 |         if score < thresh:
 57 |             continue
 58 | 
 59 | 
 60 | 
 61 |         txt = '{}:{:.2f}'.format( class_name,score)
 62 |         font = cv2.FONT_HERSHEY_SIMPLEX
 63 |         cat_size = cv2.getTextSize(txt, font, 0.5, 2)[0]
 64 |         cv2.rectangle(
 65 |             img, (bbox[0], bbox[1]), (bbox[2], bbox[3]), c, 2)
 66 | 
 67 |         cv2.rectangle(img,
 68 |                       (bbox[0], bbox[1] - cat_size[1] - 2),
 69 |                       (bbox[0] + cat_size[0], bbox[1] - 2), c, -1)
 70 |         cv2.putText(img, txt, (bbox[0], bbox[1] - 2),
 71 |                     font, 0.5, (0, 0, 0), thickness=1, lineType=cv2.LINE_AA)
 72 | 
 73 | 
 74 | def adjust_learning_rate(optimizer, decay=0.1):
 75 |     """Sets the learning rate to the initial LR decayed by 0.5 every 20 epochs"""
 76 |     for param_group in optimizer.param_groups:
 77 |         param_group['lr'] = decay * param_group['lr']
 78 | 
 79 | 
 80 | def save_checkpoint(state, filename):
 81 |     torch.save(state, filename)
 82 | 
 83 | 
 84 | 
 85 | 
 86 | def _crop_pool_layer(bottom, rois, max_pool=True):
 87 |     # code modified from
 88 |     # https://github.com/ruotianluo/pytorch-faster-rcnn
 89 |     # implement it using stn
 90 |     # box to affine
 91 |     # input (x1,y1,x2,y2)
 92 |     """
 93 |     [  x2-x1             x1 + x2 - W + 1  ]
 94 |     [  -----      0      ---------------  ]
 95 |     [  W - 1                  W - 1       ]
 96 |     [                                     ]
 97 |     [           y2-y1    y1 + y2 - H + 1  ]
 98 |     [    0      -----    ---------------  ]
 99 |     [           H - 1         H - 1      ]
100 |     """
101 |     rois = rois.detach()
102 |     batch_size = bottom.size(0)
103 |     D = bottom.size(1)
104 |     H = bottom.size(2)
105 |     W = bottom.size(3)
106 |     roi_per_batch = rois.size(0) / batch_size
107 |     x1 = rois[:, 1::4] / 16.0
108 |     y1 = rois[:, 2::4] / 16.0
109 |     x2 = rois[:, 3::4] / 16.0
110 |     y2 = rois[:, 4::4] / 16.0
111 | 
112 |     height = bottom.size(2)
113 |     width = bottom.size(3)
114 | 
115 |     # affine theta
116 |     zero = Variable(rois.data.new(rois.size(0), 1).zero_())
117 |     theta = torch.cat([\
118 |       (x2 - x1) / (width - 1),
119 |       zero,
120 |       (x1 + x2 - width + 1) / (width - 1),
121 |       zero,
122 |       (y2 - y1) / (height - 1),
123 |       (y1 + y2 - height + 1) / (height - 1)], 1).view(-1, 2, 3)
124 | 
125 |     if max_pool:
126 |         pre_pool_size = cfg.POOLING_SIZE * 2
127 |         grid = F.affine_grid(
128 |             theta, torch.Size((rois.size(0), 1, pre_pool_size, pre_pool_size)))
129 |         bottom = bottom.view(1, batch_size, D, H, W).contiguous().expand(roi_per_batch, batch_size, D, H, W)\
130 |                                                                   .contiguous().view(-1, D, H, W)
131 |         crops = F.grid_sample(bottom, grid)
132 |         crops = F.max_pool2d(crops, 2, 2)
133 |     else:
134 |         grid = F.affine_grid(
135 |             theta,
136 |             torch.Size((rois.size(0), 1, cfg.POOLING_SIZE, cfg.POOLING_SIZE)))
137 |         bottom = bottom.view(1, batch_size, D, H, W).contiguous().expand(roi_per_batch, batch_size, D, H, W)\
138 |                                                                   .contiguous().view(-1, D, H, W)
139 |         crops = F.grid_sample(bottom, grid)
140 | 
141 |     return crops, grid
142 | 
143 | 
144 | def _affine_grid_gen(rois, input_size, grid_size):
145 | 
146 |     rois = rois.detach()
147 |     x1 = rois[:, 1::4] / 16.0
148 |     y1 = rois[:, 2::4] / 16.0
149 |     x2 = rois[:, 3::4] / 16.0
150 |     y2 = rois[:, 4::4] / 16.0
151 | 
152 |     height = input_size[0]
153 |     width = input_size[1]
154 | 
155 |     zero = Variable(rois.data.new(rois.size(0), 1).zero_())
156 |     theta = torch.cat([\
157 |       (x2 - x1) / (width - 1),
158 |       zero,
159 |       (x1 + x2 - width + 1) / (width - 1),
160 |       zero,
161 |       (y2 - y1) / (height - 1),
162 |       (y1 + y2 - height + 1) / (height - 1)], 1).view(-1, 2, 3)
163 | 
164 |     grid = F.affine_grid(theta,
165 |                          torch.Size((rois.size(0), 1, grid_size, grid_size)))
166 | 
167 |     return grid
168 | 
169 | 
170 | def _affine_theta(rois, input_size):
171 | 
172 |     rois = rois.detach()
173 |     x1 = rois[:, 1::4] / 16.0
174 |     y1 = rois[:, 2::4] / 16.0
175 |     x2 = rois[:, 3::4] / 16.0
176 |     y2 = rois[:, 4::4] / 16.0
177 | 
178 |     height = input_size[0]
179 |     width = input_size[1]
180 | 
181 |     zero = Variable(rois.data.new(rois.size(0), 1).zero_())
182 | 
183 |     # theta = torch.cat([\
184 |     #   (x2 - x1) / (width - 1),
185 |     #   zero,
186 |     #   (x1 + x2 - width + 1) / (width - 1),
187 |     #   zero,
188 |     #   (y2 - y1) / (height - 1),
189 |     #   (y1 + y2 - height + 1) / (height - 1)], 1).view(-1, 2, 3)
190 | 
191 |     theta = torch.cat([\
192 |       (y2 - y1) / (height - 1),
193 |       zero,
194 |       (y1 + y2 - height + 1) / (height - 1),
195 |       zero,
196 |       (x2 - x1) / (width - 1),
197 |       (x1 + x2 - width + 1) / (width - 1)], 1).view(-1, 2, 3)
198 | 
199 |     return theta
200 | 


--------------------------------------------------------------------------------
/lib/psroialign/PSROIAlign/.gitattributes:
--------------------------------------------------------------------------------
1 | # Auto detect text files and perform LF normalization
2 | * text=auto
3 | 


--------------------------------------------------------------------------------
/lib/psroialign/PSROIAlign/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 Do Lin
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/lib/psroialign/PSROIAlign/README.md:
--------------------------------------------------------------------------------
 1 | # PSROIAlign with multi-batch training support - PyTorch
 2 | **Position-Sensitive-Region-of-Interests-Alignment** has been widely used throughout numerous well known deep object detectors, s.t. [R-FCN](https://arxiv.org/pdf/1605.06409.pdf), [LightHead R-CNN](https://arxiv.org/pdf/1711.07264.pdf), etc. However there are not that much implementations support ***multi-batch training*** in the world of PyTorch. With just one image per GPU, models would hardly be aware of the statistical information of the training data especially in cases that rarely one or two GPUs at hand.
 3 | 
 4 | This CUDA based implementation fully supports multi-batch training, and can be easily integrated into your PyTorch object detectors.
 5 | 
 6 | 
 7 | ## Prerequisite
 8 | ```
 9 | python3
10 | pytorch >= 1.0 with CUDA support
11 | ```
12 | 
13 | 
14 | ## Build the module
15 | ```bash
16 | sh build.sh
17 | ```
18 | 
19 | 
20 | ## Use Case
21 | ```python
22 | import torch
23 | import torch.nn as nn
24 | from model.roi_layers import PSROIAlign
25 | ```
26 | 
27 | ```python
28 | class PSROIAlignExample(nn.Module):
29 |     """
30 |     :spatial_scale: stride of the backbone
31 |     :roi_size:      output size of the pooled feature
32 |     :sample_ratio:  sample ratio of bilinear interpolation
33 |     :pooled_dim:    output channel of the pooled feature
34 |     """
35 |     def __init__(self,
36 |                  spatial_scale=1./16.,
37 |                  roi_size=7,
38 |                  sample_ratio=2,
39 |                  pooled_dim=10):
40 | 
41 |         super(PSROIAlignExample, self).__init__()
42 |         self.psroialign = PSROIAlign(spatial_scale=spatial_scale,
43 |                                      roi_size=roi_size,
44 |                                      sampling_ratio=sample_ratio,
45 |                                      pooled_dim=pooled_dim)
46 | 
47 |     def forward(self, feat, rois):
48 |         return self.psroialign(feat, rois)
49 | ```
50 | 
51 | #### Feature Map to be pooled
52 | ```python
53 | batch_size = 4
54 | feat_height = 30
55 | feat_width = 40
56 | roi_size = 7
57 | oup_dim = 10
58 | 
59 | feature = torch.randn((batch_size,
60 |                        roi_size * roi_size * oup_dim,
61 |                        feat_height,
62 |                        feat_width),
63 |                        requires_grad=True).cuda()
64 | ```
65 | 
66 | #### RoIs should be formatted as **(batch_index, x1, y1, x2, y2)**
67 | ```python
68 | rois = torch.tensor([
69 |     [0, 1., 1., 5., 5.],
70 |     [0, 3., 3., 9., 9.],
71 |     [1, 5., 5., 10., 10.],
72 |     [1, 7., 7., 12., 12.]
73 | ]).cuda()
74 | ```
75 | 
76 | #### Essential Job
77 | ```python
78 | psroialign_pooled_feat = psroialign_example(feature, rois)
79 | ```
80 | 
81 | 
82 | Play with ***example.py*** to get more details.
83 | 
84 | 
85 | ## License
86 | [MIT](LICENSE)


--------------------------------------------------------------------------------
/lib/psroialign/PSROIAlign/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DayBreak-u/Thundernet_Pytorch/ac359d128a44e566ba5852a830c0a2154e10edb2/lib/psroialign/PSROIAlign/__init__.py


--------------------------------------------------------------------------------
/lib/psroialign/PSROIAlign/build.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | 
3 | rm -rf ./build/ ./model/_C*
4 | python setup.py build_ext --inplace


--------------------------------------------------------------------------------
/lib/psroialign/PSROIAlign/build/lib.linux-x86_64-3.6/model/_C.cpython-36m-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DayBreak-u/Thundernet_Pytorch/ac359d128a44e566ba5852a830c0a2154e10edb2/lib/psroialign/PSROIAlign/build/lib.linux-x86_64-3.6/model/_C.cpython-36m-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/lib/psroialign/PSROIAlign/build/temp.linux-x86_64-3.6/mnt/data1/yanghuiyu/myself/object_detect/light_head_rcnn/psroialign/PSROIAlign/model/csrc/cuda/PSROIAlign_cuda.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DayBreak-u/Thundernet_Pytorch/ac359d128a44e566ba5852a830c0a2154e10edb2/lib/psroialign/PSROIAlign/build/temp.linux-x86_64-3.6/mnt/data1/yanghuiyu/myself/object_detect/light_head_rcnn/psroialign/PSROIAlign/model/csrc/cuda/PSROIAlign_cuda.o


--------------------------------------------------------------------------------
/lib/psroialign/PSROIAlign/build/temp.linux-x86_64-3.6/mnt/data1/yanghuiyu/myself/object_detect/light_head_rcnn/psroialign/PSROIAlign/model/csrc/cuda/PSROIPool_cuda.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DayBreak-u/Thundernet_Pytorch/ac359d128a44e566ba5852a830c0a2154e10edb2/lib/psroialign/PSROIAlign/build/temp.linux-x86_64-3.6/mnt/data1/yanghuiyu/myself/object_detect/light_head_rcnn/psroialign/PSROIAlign/model/csrc/cuda/PSROIPool_cuda.o


--------------------------------------------------------------------------------
/lib/psroialign/PSROIAlign/build/temp.linux-x86_64-3.6/mnt/data1/yanghuiyu/myself/object_detect/light_head_rcnn/psroialign/PSROIAlign/model/csrc/vision.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DayBreak-u/Thundernet_Pytorch/ac359d128a44e566ba5852a830c0a2154e10edb2/lib/psroialign/PSROIAlign/build/temp.linux-x86_64-3.6/mnt/data1/yanghuiyu/myself/object_detect/light_head_rcnn/psroialign/PSROIAlign/model/csrc/vision.o


--------------------------------------------------------------------------------
/lib/psroialign/PSROIAlign/model/_C.cpython-36m-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DayBreak-u/Thundernet_Pytorch/ac359d128a44e566ba5852a830c0a2154e10edb2/lib/psroialign/PSROIAlign/model/_C.cpython-36m-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/lib/psroialign/PSROIAlign/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DayBreak-u/Thundernet_Pytorch/ac359d128a44e566ba5852a830c0a2154e10edb2/lib/psroialign/PSROIAlign/model/__init__.py


--------------------------------------------------------------------------------
/lib/psroialign/PSROIAlign/model/csrc/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DayBreak-u/Thundernet_Pytorch/ac359d128a44e566ba5852a830c0a2154e10edb2/lib/psroialign/PSROIAlign/model/csrc/.DS_Store


--------------------------------------------------------------------------------
/lib/psroialign/PSROIAlign/model/csrc/PSROIAlign.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include <torch/extension.h>
 3 | 
 4 | #ifdef WITH_CUDA
 5 | #include "cuda/vision.h"
 6 | 
 7 | #define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ")
 8 | #define CHECK_CONTIGUOUS(x) \
 9 |   AT_CHECK(x.is_contiguous(), #x, " must be contiguous ")
10 | #define CHECK_INPUT(x) \
11 |   CHECK_CUDA(x);       \
12 |   CHECK_CONTIGUOUS(x)
13 | 
14 | extern THCState* state;
15 | 
16 | #endif
17 | 
18 | 
19 | int PSROIAlign_forward(
20 |     at::Tensor bottom_data,
21 |     at::Tensor bottom_rois,
22 |     at::Tensor top_data,
23 |     at::Tensor argmax_data,
24 |     float spatial_scale,
25 |     int group_size,
26 |     int sampling_ratio) {
27 | 
28 | #ifdef WITH_CUDA
29 |     CHECK_INPUT(bottom_data);
30 |     CHECK_INPUT(bottom_rois);
31 |     CHECK_INPUT(top_data);
32 |     CHECK_INPUT(argmax_data);
33 | 
34 |     int size_rois = bottom_rois.size(1);
35 | 
36 |     if (size_rois != 5) {
37 |         printf("wrong roi size. (roi size should be 5)\n");
38 |         return 0;
39 |     }
40 | 
41 |     cudaStream_t stream = THCState_getCurrentStream(state);
42 | 
43 |     PSROIAlignForwardLaucher(bottom_data,
44 |                              bottom_rois,
45 |                              top_data,
46 |                              argmax_data,
47 |                              spatial_scale,
48 |                              group_size,
49 |                              sampling_ratio,
50 |                              stream);
51 | #endif
52 |     return 1;
53 | }
54 | 
55 | int PSROIAlign_backward(
56 |     at::Tensor top_diff,
57 |     at::Tensor argmax_data,
58 |     at::Tensor bottom_rois,
59 |     at::Tensor bottom_diff,
60 |     float spatial_scale,
61 |     int group_size,
62 |     int sampling_ratio) {
63 | 
64 | #ifdef WITH_CUDA
65 |     CHECK_INPUT(top_diff);
66 |     CHECK_INPUT(bottom_rois);
67 |     CHECK_INPUT(bottom_diff);
68 |     CHECK_INPUT(argmax_data);
69 | 
70 |     int size_rois = bottom_rois.size(1);
71 | 
72 |     if (size_rois != 5) {
73 |         printf("wrong roi size. (roi size should be 5)\n");
74 |         return 0;
75 |     }
76 | 
77 |     cudaStream_t stream = THCState_getCurrentStream(state);
78 | 
79 |     PSROIAlignBackwardLaucher(top_diff,
80 |                               argmax_data,
81 |                               bottom_rois,
82 |                               bottom_diff,
83 |                               spatial_scale,
84 |                               group_size,
85 |                               sampling_ratio,
86 |                               stream);
87 | #endif
88 |     return 1;
89 | }
90 | 


--------------------------------------------------------------------------------
/lib/psroialign/PSROIAlign/model/csrc/PSROIPool.h:
--------------------------------------------------------------------------------
  1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
  2 | #pragma once
  3 | #include <torch/extension.h>
  4 | 
  5 | #ifdef WITH_CUDA
  6 | #include "cuda/vision.h"
  7 | 
  8 | #define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ")
  9 | #define CHECK_CONTIGUOUS(x) \
 10 |   AT_CHECK(x.is_contiguous(), #x, " must be contiguous ")
 11 | #define CHECK_INPUT(x) \
 12 |   CHECK_CUDA(x);       \
 13 |   CHECK_CONTIGUOUS(x)
 14 | 
 15 | extern THCState* state;
 16 | 
 17 | #endif
 18 | 
 19 | 
 20 | int PSROIPool_forward(int pooled_height,
 21 |                       int pooled_width,
 22 |                       float spatial_scale,
 23 |                       int group_size,
 24 |                       int output_dim,
 25 |                       at::Tensor features,
 26 |                       at::Tensor rois,
 27 |                       at::Tensor output,
 28 |                       at::Tensor mappingchannel) {
 29 | #ifdef WITH_CUDA
 30 |     CHECK_INPUT(features);
 31 |     CHECK_INPUT(rois);
 32 |     CHECK_INPUT(output);
 33 |     CHECK_INPUT(mappingchannel);
 34 | 
 35 | 	// Get # of Rois
 36 | 	int num_rois = rois.size(0);
 37 | 	int size_rois = rois.size(1);
 38 | 	if (size_rois != 5) {
 39 |         printf("wrong roi size\n");
 40 | 		return 0;
 41 | 	}
 42 | 
 43 | 	int data_height = features.size(2);
 44 | 	int data_width = features.size(3);
 45 | 	int num_channels = features.size(1);
 46 | 
 47 | 	cudaStream_t stream = THCState_getCurrentStream(state);
 48 | 
 49 | 	// call the gpu kernel for psroi_pooling
 50 | 	PSROIPoolForwardLauncher(features,
 51 | 	                         spatial_scale,
 52 | 	                         num_rois,
 53 | 	                         data_height,
 54 | 	                         data_width,
 55 | 	                         num_channels,
 56 | 	                         pooled_height,
 57 | 	                         pooled_width,
 58 | 	                         rois,
 59 | 	                         group_size,
 60 | 	                         output_dim,
 61 | 	                         output,
 62 | 	                         mappingchannel,
 63 | 	                         stream);
 64 | #endif
 65 | 	return 1;
 66 | }
 67 | 
 68 | 
 69 | int PSROIPool_backward(int pooled_height,
 70 |                        int pooled_width,
 71 |                        float spatial_scale,
 72 |                        int output_dim,
 73 |                        at::Tensor top_grad,
 74 |                        at::Tensor rois,
 75 |                        at::Tensor bottom_grad,
 76 |                        at::Tensor mappingchannel) {
 77 | #ifdef WITH_CUDA
 78 |     CHECK_INPUT(top_grad);
 79 |     CHECK_INPUT(rois);
 80 |     CHECK_INPUT(bottom_grad);
 81 |     CHECK_INPUT(mappingchannel);
 82 | 
 83 |     int batch_size = bottom_grad.size(0);
 84 | 
 85 |     // Number of ROIs
 86 |     int num_rois = rois.size(0);
 87 |     int size_rois = rois.size(1);
 88 |     if (size_rois != 5) {
 89 |         	return 0;
 90 |     }
 91 | 
 92 |     // data height
 93 |     int data_height = bottom_grad.size(2);
 94 |     // data width
 95 |     int data_width = bottom_grad.size(3);
 96 |     // Number of channels
 97 |     int num_channels = bottom_grad.size(1);
 98 | 
 99 |     cudaStream_t stream = THCState_getCurrentStream(state);
100 | 
101 |     PSROIPoolBackwardLauncher(top_grad,
102 |                               mappingchannel,
103 |                               batch_size,
104 |                               num_rois,
105 |                               spatial_scale,
106 |                               num_channels,
107 |                               data_height,
108 |                               data_width,
109 |                               pooled_width,
110 |                               pooled_height,
111 |                               output_dim,
112 |                               bottom_grad,
113 |                               rois,
114 |                               stream);
115 | #endif
116 |     return 1;
117 | }
118 | 


--------------------------------------------------------------------------------
/lib/psroialign/PSROIAlign/model/csrc/cuda/PSROIPool_cuda.cu:
--------------------------------------------------------------------------------
  1 | #include <ATen/ATen.h>
  2 | 
  3 | #include <cuda.h>
  4 | #include <cuda_runtime.h>
  5 | #include <math.h>
  6 | #include <float.h>
  7 | #include <vector>
  8 | 
  9 | #define CUDA_1D_KERNEL_LOOP(i, n)                            \
 10 |   for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; \
 11 |        i += blockDim.x * gridDim.x)
 12 | 
 13 | 
 14 | __global__ void PSROIPoolForward(
 15 | 	const int nthreads,  		// (B*K) * 10 * 7 * 7
 16 | 	const float* __restrict__ bottom_data,	// (B, 490, H, W)
 17 | 	const float spatial_scale, 	// 1./16.
 18 | 	const int height, 			// H
 19 | 	const int width,			// W
 20 | 	const int channels, 		// 490
 21 | 	const int pooled_height, 	// 7
 22 | 	const int pooled_width,		// 7
 23 | 	const int group_size, 		// 7
 24 | 	const int output_dim,		// 10
 25 | 	const float* __restrict__ bottom_rois, 	// (B*K, 5)
 26 | 	float* __restrict__ top_data, 			// (B*K, 10, 7, 7)
 27 | 	int* __restrict__ mapping_channel		// (B*K, 10, 7, 7)
 28 | ) {
 29 |     CUDA_1D_KERNEL_LOOP(index, nthreads) {
 30 | 		/* (n, ctop, ph, pw) is an element in the pooled output.
 31 | 		 * Whole size is up to (B*K, 10, 7, 7), where
 32 | 		 * n is up to B*K, e.g. K = 128,
 33 | 		 * ctop is up to 10,
 34 | 		 * ph is up to 7
 35 | 		 * pw is up to 7
 36 | 		 */ 
 37 |         int pw = index % pooled_width;
 38 |       	int ph = (index / pooled_width) % pooled_height;
 39 |       	int ctop = (index / pooled_width / pooled_height) % output_dim;
 40 |       	int n = index / pooled_width / pooled_height / output_dim;
 41 | 
 42 |         bottom_rois += n * 5;
 43 |         int roi_batch_ind = bottom_rois[0];
 44 | 		float roi_start_w = static_cast<float>(round(bottom_rois[1])) * spatial_scale;
 45 |       	float roi_start_h = static_cast<float>(round(bottom_rois[2])) * spatial_scale;
 46 |       	float roi_end_w = static_cast<float>(round(bottom_rois[3]) + 1.) * spatial_scale;
 47 |       	float roi_end_h = static_cast<float>(round(bottom_rois[4]) + 1.) * spatial_scale;
 48 | 
 49 |         // Force malformed ROIs to be 1x1
 50 |         float roi_width = max(roi_end_w - roi_start_w, 0.1);  // avoid 0
 51 |       	float roi_height = max(roi_end_h - roi_start_h, 0.1);
 52 | 
 53 |         float bin_size_h = (float)(roi_height) / (float)(pooled_height);
 54 |         float bin_size_w = (float)(roi_width) / (float)(pooled_width);
 55 | 
 56 |         int hstart = floor(static_cast<float>(ph) * bin_size_h + roi_start_h);
 57 |       	int wstart = floor(static_cast<float>(pw)* bin_size_w + roi_start_w);
 58 |       	int hend = ceil(static_cast<float>(ph + 1) * bin_size_h + roi_start_h);
 59 |       	int wend = ceil(static_cast<float>(pw + 1) * bin_size_w + roi_start_w);
 60 | 
 61 |         // Add roi offsets and clip to input boundaries
 62 |         hstart = min(max(hstart, 0), height);
 63 |       	hend = min(max(hend, 0), height);
 64 |       	wstart = min(max(wstart, 0), width);
 65 |       	wend = min(max(wend, 0), width);
 66 |         bool is_empty = (hend <= hstart) || (wend <= wstart);
 67 | 
 68 |         int gw = pw;
 69 |       	int gh = ph;
 70 |       	int c = (ctop * group_size + gh) * group_size + gw;
 71 | 
 72 |         bottom_data += (roi_batch_ind * channels + c) * height * width;
 73 |         float out_sum = 0;
 74 |       	for (int h = hstart; h < hend; ++h) {
 75 |       	  for (int w = wstart; w < wend; ++w) {
 76 |       	    int bottom_index = h * width + w;
 77 |       	    out_sum += bottom_data[bottom_index];
 78 |       	  }
 79 |       	}
 80 |       	float bin_area = (hend - hstart) * (wend - wstart);
 81 |       	top_data[index] = is_empty ? 0. : out_sum / bin_area;
 82 |       	mapping_channel[index] = c;
 83 |     }
 84 | }
 85 | 
 86 | 
 87 | int PSROIPoolForwardLauncher(
 88 | 	at::Tensor bottom_data,  	// (B, 490, H, W)
 89 | 	const float spatial_scale, 	// 1./16.
 90 | 	const int num_rois, 		// B*K, K = 128		
 91 | 	const int height,			// H
 92 | 	const int width, 			// W
 93 | 	const int channels, 		// 490
 94 | 	const int pooled_height,	// 7
 95 | 	const int pooled_width, 	// 7
 96 | 	at::Tensor bottom_rois,	    // (B*K, 5)
 97 | 	const int group_size, 		// 7
 98 | 	const int output_dim,		// 10
 99 | 	at::Tensor top_data, 	    // (B*K, 10, 7, 7)
100 | 	at::Tensor mapping_channel, // (B*K, 10, 7, 7)
101 | 	cudaStream_t stream
102 | ) {
103 | 
104 |     const int kThreadsPerBlock = 1024;
105 |     const int output_size = output_dim * pooled_height * pooled_width * num_rois;
106 | 
107 |     PSROIPoolForward<<<(output_size + kThreadsPerBlock - 1) / kThreadsPerBlock, kThreadsPerBlock, 0, stream>>>(
108 | 		output_size, 
109 | 		bottom_data.data<float>(),
110 | 		spatial_scale, 
111 | 		height,
112 | 		width,
113 | 		channels, 
114 | 		pooled_height,
115 | 		pooled_width, 
116 | 		group_size, 
117 | 		output_dim, 
118 | 		bottom_rois.data<float>(),
119 | 		top_data.data<float>(),
120 | 		mapping_channel.data<int>());
121 | 
122 | 	cudaError_t err = cudaGetLastError();
123 |     if(cudaSuccess != err) {
124 |         fprintf( stderr, "cudaCheckError() failed : %s\n", cudaGetErrorString( err ) );
125 |         exit( -1 );
126 |     }
127 |     return 1;
128 | }
129 | 
130 | 
131 | __global__ void PSROIPoolBackward(const int nthreads, const float* __restrict__ top_diff,
132 |     const int* __restrict__ mapping_channel, const int num_rois, const float spatial_scale,
133 |     const int height, const int width, const int channels,
134 |     const int pooled_height, const int pooled_width, const int output_dim, float* __restrict__ bottom_diff,
135 |     const float* __restrict__ bottom_rois) {
136 |     CUDA_1D_KERNEL_LOOP(index, nthreads)
137 |     {
138 | 
139 |       int pw = index % pooled_width;
140 |       int ph = (index / pooled_width) % pooled_height;
141 |       int n = index / pooled_width / pooled_height / output_dim;
142 | 
143 |       // [start, end) interval for spatial sampling
144 |       bottom_rois += n * 5;
145 |       int roi_batch_ind = bottom_rois[0];
146 |       float roi_start_w =
147 |         static_cast<float>(round(bottom_rois[1])) * spatial_scale;
148 |       float roi_start_h =
149 |         static_cast<float>(round(bottom_rois[2])) * spatial_scale;
150 |       float roi_end_w =
151 |         static_cast<float>(round(bottom_rois[3]) + 1.) * spatial_scale;
152 |       float roi_end_h =
153 |         static_cast<float>(round(bottom_rois[4]) + 1.) * spatial_scale;
154 | 
155 |       // Force too small ROIs to be 1x1
156 |       float roi_width = max(roi_end_w - roi_start_w, 0.1);  // avoid 0
157 |       float roi_height = max(roi_end_h - roi_start_h, 0.1);
158 | 
159 |       // Compute w and h at bottom
160 |       float bin_size_h = roi_height / static_cast<float>(pooled_height);
161 |       float bin_size_w = roi_width / static_cast<float>(pooled_width);
162 | 
163 |       int hstart = floor(static_cast<float>(ph)* bin_size_h
164 |         + roi_start_h);
165 |       int wstart = floor(static_cast<float>(pw)* bin_size_w
166 |         + roi_start_w);
167 |       int hend = ceil(static_cast<float>(ph + 1) * bin_size_h
168 |         + roi_start_h);
169 |       int wend = ceil(static_cast<float>(pw + 1) * bin_size_w
170 |         + roi_start_w);
171 |       // Add roi offsets and clip to input boundaries
172 |       hstart = min(max(hstart, 0), height);
173 |       hend = min(max(hend, 0), height);
174 |       wstart = min(max(wstart, 0), width);
175 |       wend = min(max(wend, 0), width);
176 |       bool is_empty = (hend <= hstart) || (wend <= wstart);
177 | 
178 |       // Compute c at bottom
179 |       int c = mapping_channel[index];
180 |       float* offset_bottom_diff = bottom_diff +
181 |         (roi_batch_ind * channels + c) * height * width;
182 |       float bin_area = (hend - hstart)*(wend - wstart);
183 |       float diff_val = is_empty ? 0. : top_diff[index] / bin_area;
184 |       for (int h = hstart; h < hend; ++h) {
185 |         for (int w = wstart; w < wend; ++w) {
186 |           int bottom_index = h*width + w;
187 |           //caffe_gpu_atomic_add(diff_val, offset_bottom_diff + bottom_index);
188 |           atomicAdd(offset_bottom_diff + bottom_index, diff_val);
189 |         }
190 |       }
191 |   }
192 | }
193 | 
194 | int PSROIPoolBackwardLauncher(at::Tensor top_diff,
195 |                               at::Tensor mapping_channel,
196 |                               const int batch_size,
197 |                               const int num_rois,
198 |                               const float spatial_scale,
199 |                               const int channels,
200 |                               const int height,
201 |                               const int width,
202 |                               const int pooled_width,
203 |                               const int pooled_height,
204 |                               const int output_dim,
205 |                               at::Tensor bottom_diff,
206 |                               at::Tensor bottom_rois,
207 |                               cudaStream_t stream) {
208 | 
209 |     const int kThreadsPerBlock = 1024;
210 |     //const int output_size = output_dim * height * width * channels;
211 |     const int output_size = output_dim * pooled_height * pooled_width * num_rois;
212 | 
213 |     PSROIPoolBackward<<<(output_size + kThreadsPerBlock - 1) / kThreadsPerBlock, kThreadsPerBlock, 0, stream>>>(
214 |       output_size,
215 |       top_diff.data<float>(),
216 |       mapping_channel.data<int>(),
217 |       num_rois,
218 |       spatial_scale,
219 |       height,
220 |       width,
221 |       channels,
222 |       pooled_height,
223 |       pooled_width,
224 |       output_dim,
225 |       bottom_diff.data<float>(),
226 |       bottom_rois.data<float>());
227 | 
228 | 	cudaError_t err = cudaGetLastError();
229 |     if(cudaSuccess != err) {
230 |         fprintf( stderr, "cudaCheckError() failed : %s\n", cudaGetErrorString( err ) );
231 |         exit( -1 );
232 |     }
233 | 
234 |     return 1;
235 | }
236 | 


--------------------------------------------------------------------------------
/lib/psroialign/PSROIAlign/model/csrc/cuda/vision.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #pragma once
 3 | #include <THC/THC.h>
 4 | #include <torch/extension.h>
 5 | 
 6 | 
 7 | int PSROIPoolForwardLauncher(at::Tensor bottom_data,
 8 |                              const float spatial_scale,
 9 |                              const int num_rois,
10 |                              const int height,
11 |                              const int width,
12 |                              const int channels,
13 |                              const int pooled_height,
14 |                              const int pooled_width,
15 |                              at::Tensor bottom_rois,
16 |                              const int group_size,
17 |                              const int output_dim,
18 |                              at::Tensor top_data,
19 |                              at::Tensor mapping_channel,
20 |                              cudaStream_t stream);
21 | 
22 | 
23 | int PSROIPoolBackwardLauncher(at::Tensor top_diff,
24 |                               at::Tensor mapping_channel,
25 |                               const int batch_size,
26 |                               const int num_rois,
27 |                               const float spatial_scale,
28 |                               const int channels,
29 |                               const int height,
30 |                               const int width,
31 |                               const int pooled_width,
32 |                               const int pooled_height,
33 |                               const int output_dim,
34 |                               at::Tensor bottom_diff,
35 |                               at::Tensor bottom_rois,
36 |                               cudaStream_t stream);
37 | 
38 | 
39 | int PSROIAlignForwardLaucher(at::Tensor bottom_data,
40 |                              at::Tensor bottom_rois,
41 |                              at::Tensor top_data,
42 |                              at::Tensor argmax_data,
43 |                              float spatial_scale,
44 |                              int group_size,
45 |                              int sampling_ratio,
46 |                              cudaStream_t stream);
47 | 
48 | 
49 | int PSROIAlignBackwardLaucher(at::Tensor top_diff,
50 |                               at::Tensor argmax_data,
51 |                               at::Tensor bottom_rois,
52 |                               at::Tensor bottom_diff,
53 |                               float spatial_scale,
54 |                               int group_size,
55 |                               int sampling_ratio,
56 |                               cudaStream_t stream);


--------------------------------------------------------------------------------
/lib/psroialign/PSROIAlign/model/csrc/vision.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #include "PSROIAlign.h"
 3 | #include "PSROIPool.h"
 4 | 
 5 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
 6 |   m.def("ps_roi_align_forward", &PSROIAlign_forward, "PSROIAlign_forward");
 7 |   m.def("ps_roi_align_backward", &PSROIAlign_backward, "PSROIAlign_backward");
 8 |   m.def("ps_roi_pool_forward", &PSROIPool_forward, "PSROIPool_forward");
 9 |   m.def("ps_roi_pool_backward", &PSROIPool_backward, "PSROIPool_backward");
10 | }
11 | 


--------------------------------------------------------------------------------
/lib/psroialign/PSROIAlign/model/example.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | from roi_layers import PSROIAlign
 4 | 
 5 | #
 6 | # class PSROIPoolExample(nn.Module):
 7 | #     def __init__(self,
 8 | #                  pooled_height=7,
 9 | #                  pooled_width=7,
10 | #                  spatial_scale=1./16.,
11 | #                  group_size=7,
12 | #                  output_dim=10):
13 | #
14 | #         super(PSROIPoolExample, self).__init__()
15 | #         self.psroipool = PSROIPool(pooled_height=pooled_height,
16 | #                                    pooled_width=pooled_width,
17 | #                                    spatial_scale=spatial_scale,
18 | #                                    group_size=group_size,
19 | #                                    output_dim=output_dim)
20 | #
21 | #     def forward(self, feat, rois):
22 | #         print("PSROIPool:")
23 | #         print(f"feature.shape:\t{feat.shape}")
24 | #         print(f"rois.shape:\t{rois.shape}")
25 | #         pooled_feat = self.psroipool(feat, rois)
26 | #         print(f"pooled feature: {pooled_feat.shape}\n{pooled_feat}\n")
27 | #         return pooled_feat
28 | 
29 | 
30 | class PSROIAlignExample(nn.Module):
31 |     def __init__(self,
32 |                  spatial_scale=1./16.,
33 |                  roi_size=7,
34 |                  sample_ratio=2,
35 |                  pooled_dim=10):
36 | 
37 |         super(PSROIAlignExample, self).__init__()
38 |         self.psroialign = PSROIAlign(spatial_scale=spatial_scale,
39 |                                      roi_size=roi_size,
40 |                                      sampling_ratio=sample_ratio,
41 |                                      pooled_dim=pooled_dim)
42 | 
43 |     def forward(self, feat, rois):
44 |         print("PSROIAlign:")
45 |         print(f"feature.shape:\t{feat.shape}")
46 |         print(f"rois.shape:\t{rois.shape}")
47 |         pooled_feat = self.psroialign(feat, rois)
48 |         print(f"pooled feature: {pooled_feat.shape}\n{pooled_feat}\n")
49 |         return pooled_feat
50 | 
51 | 
52 | if __name__ == '__main__':
53 |     if not torch.cuda.is_available():
54 |         exit('Only works with cuda')
55 | 
56 |     # psroipool_example = PSROIPoolExample()
57 |     psroialign_example = PSROIAlignExample()
58 | 
59 |     # feature map to be pooled
60 |     batch_size = 4
61 |     feat_height = 30
62 |     feat_width = 40
63 |     roi_size = 7
64 |     oup_dim = 10
65 | 
66 |     feature = torch.randn((batch_size,
67 |                            roi_size * roi_size * oup_dim,
68 |                            feat_height,
69 |                            feat_width),
70 |                           requires_grad=True).cuda()
71 | 
72 |     # RoI: (batch_index, x1, y1, x2, y2)
73 |     rois = torch.tensor([
74 |         [0, 1., 1., 5., 5.],
75 |         [0, 3., 3., 9., 9.],
76 |         [1, 5., 5., 10., 10.],
77 |         [1, 7., 7., 12., 12.]
78 |     ]).cuda()
79 | 
80 |     # PSROIPool and PSROIAlign
81 |     # psroipool_pooled_feat = psroipool_example(feature, rois)
82 |     psroialign_pooled_feat = psroialign_example(feature, rois)
83 | 


--------------------------------------------------------------------------------
/lib/psroialign/PSROIAlign/model/roi_layers/__init__.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from .ps_roi_pool import ps_roi_pool, PSROIPool
 3 | from .ps_roi_align import ps_roi_align, PSROIAlign
 4 | 
 5 | __all__ = [
 6 |     "ps_roi_pool", 
 7 |     "PSROIPool",
 8 |     "ps_roi_align",
 9 |     "PSROIAlign"
10 | ]
11 | 
12 | 


--------------------------------------------------------------------------------
/lib/psroialign/PSROIAlign/model/roi_layers/ps_roi_align.py:
--------------------------------------------------------------------------------
 1 | from torch import nn
 2 | from torch.autograd import Function
 3 | from torch.autograd.function import once_differentiable
 4 | import torch
 5 | import sys
 6 | sys.path.insert(0,"/mnt/data1/yanghuiyu/myself/object_detect/thundernetbylightheadrcnn/lib")
 7 | from psroialign.PSROIAlign.model import _C
 8 | 
 9 | 
10 | class _PSROIAlign(Function):
11 |     @staticmethod
12 |     def forward(ctx, bottom_data, bottom_rois, spatial_scale, roi_size, sampling_ratio, pooled_dim):
13 |         ctx.spatial_scale = spatial_scale  # 1./16.
14 |         ctx.roi_size = roi_size  # 7
15 |         ctx.sampling_ratio = sampling_ratio  # 2
16 |         ctx.pooled_dim = pooled_dim  # 10
17 |         ctx.feature_size = bottom_data.size()  # (B, 490, H, W)
18 |         num_rois = bottom_rois.size(0)  # B*K
19 |         # (B*K, 10, 7, 7)
20 |         top_data = torch.zeros([num_rois, pooled_dim, roi_size, roi_size], dtype=torch.float32).to(bottom_data.device)
21 |         # (B*K, 10, 7, 7)
22 |         argmax_data = torch.zeros([num_rois, pooled_dim, roi_size, roi_size], dtype=torch.int32).to(bottom_data.device)
23 |         if bottom_data.is_cuda:
24 |             _C.ps_roi_align_forward(bottom_data,    # (B, 490, H, W)
25 |                                     bottom_rois,    # (B*K, 5), e.g. K = 128
26 |                                     top_data,       # (B*K, 10, 7, 7)
27 |                                     argmax_data,    # (B*K, 10, 7, 7)
28 |                                     spatial_scale,  # 1./16.
29 |                                     roi_size,       # 7
30 |                                     sampling_ratio  # 2
31 |                                     )
32 |             ctx.save_for_backward(bottom_rois, argmax_data)
33 |         else:
34 |             raise NotImplementedError
35 | 
36 |         return top_data
37 | 
38 |     @staticmethod
39 |     @once_differentiable
40 |     def backward(ctx, top_diff):
41 |         spatial_scale = ctx.spatial_scale  # 1./16.
42 |         roi_size = ctx.roi_size  # 7
43 |         sampling_ratio = ctx.sampling_ratio  # 2
44 |         batch_size, channels, height, width = ctx.feature_size
45 |         [bottom_rois, argmax_data] = ctx.saved_tensors
46 |         bottom_diff = None
47 |         if ctx.needs_input_grad[0]:
48 |             bottom_diff = torch.zeros([batch_size, channels, height, width], dtype=torch.float32).to(top_diff.device)
49 |             _C.ps_roi_align_backward(top_diff,      # (B*K, 10, 7, 7)
50 |                                      argmax_data,   # (B*K, 10, 7, 7)
51 |                                      bottom_rois,   # (B*K, 10, 7, 7)
52 |                                      bottom_diff,   # (B, 490, H, W)
53 |                                      spatial_scale, # 1./16.
54 |                                      roi_size,      # 7
55 |                                      sampling_ratio # 2
56 |                                      )
57 | 
58 |         return bottom_diff, None, None, None, None, None
59 | 
60 | 
61 | ps_roi_align = _PSROIAlign.apply
62 | 
63 | 
64 | class PSROIAlign(nn.Module):
65 |     def __init__(self, spatial_scale, roi_size, sampling_ratio, pooled_dim):
66 |         super(PSROIAlign, self).__init__()
67 |         self.spatial_scale = spatial_scale
68 |         self.roi_size = roi_size
69 |         self.sampling_ratio = sampling_ratio
70 |         self.pooled_dim = pooled_dim
71 | 
72 |     def forward(self, bottom_data, bottom_rois):
73 |         return ps_roi_align(bottom_data,  # (B, 490, H, W)
74 |                             bottom_rois,  # (B*K, 5)
75 |                             self.spatial_scale,  # 1./16.
76 |                             self.roi_size,  # 7
77 |                             self.sampling_ratio,  # 2
78 |                             self.pooled_dim  # 10
79 |                             )
80 | 
81 |     def __repr__(self):
82 |         tmpstr = self.__class__.__name__ + "("
83 |         tmpstr += "spatial_scale=" + str(self.spatial_scale)
84 |         tmpstr += ", roi_size=" + str(self.roi_size)
85 |         tmpstr += ", sampling_ratio=" + str(self.sampling_ratio)
86 |         tmpstr += ", pooled_dim=" + str(self.pooled_dim)
87 |         tmpstr += ")"
88 |         return tmpstr
89 | 


--------------------------------------------------------------------------------
/lib/psroialign/PSROIAlign/model/roi_layers/ps_roi_pool.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn
 3 | from torch.autograd import Function
 4 | from torch.autograd.function import once_differentiable
 5 | from psroialign.PSROIAlign.model import _C
 6 | 
 7 | 
 8 | class _PSROIPool(Function):
 9 |     @staticmethod
10 |     def forward(ctx, features, rois, pooled_height, pooled_width, spatial_scale, group_size, output_dim):
11 |         ctx.pooled_height = int(pooled_height)
12 |         ctx.pooled_width = int(pooled_width)
13 |         ctx.spatial_scale = float(spatial_scale)
14 |         ctx.group_size = int(group_size)
15 |         ctx.output_dim = int(output_dim)
16 |         num_rois = rois.size()[0]
17 |         output = torch.zeros(num_rois, ctx.output_dim, ctx.pooled_height, ctx.pooled_width).to(features.device)
18 |         mappingchannel = torch.IntTensor(num_rois, ctx.output_dim, ctx.pooled_height, ctx.pooled_width).zero_().to(features.device)
19 |         _C.ps_roi_pool_forward(ctx.pooled_height,
20 |                                ctx.pooled_width,
21 |                                ctx.spatial_scale,
22 |                                ctx.group_size,
23 |                                ctx.output_dim,
24 |                                features,
25 |                                rois,
26 |                                output,
27 |                                mappingchannel)
28 |         ctx.save_for_backward(rois, mappingchannel)
29 |         # ctx.output = output
30 |         # ctx.mappingchannel = mappingchannel
31 |         # ctx.rois = rois
32 |         ctx.feature_size = features.size()
33 | 
34 |         return output
35 | 
36 |     @staticmethod
37 |     @once_differentiable
38 |     def backward(ctx, grad_output):
39 |         assert(ctx.feature_size is not None and grad_output.is_cuda)
40 |         batch_size, num_channels, data_height, data_width = ctx.feature_size
41 |         [rois, mappingchannel] = ctx.saved_tensors
42 |         grad_input = None
43 |         if ctx.needs_input_grad[0]:
44 |             grad_input = torch.zeros(batch_size, num_channels, data_height, data_width).to(grad_output.device)
45 |             _C.ps_roi_pool_backward(ctx.pooled_height,
46 |                                     ctx.pooled_width,
47 |                                     ctx.spatial_scale,
48 |                                     ctx.output_dim,
49 |                                     grad_output,
50 |                                     rois,
51 |                                     grad_input,
52 |                                     mappingchannel)
53 |         return grad_input, None, None, None, None, None, None
54 | 
55 | 
56 | ps_roi_pool = _PSROIPool.apply
57 | 
58 | 
59 | class PSROIPool(nn.Module):
60 |     def __init__(self, pooled_height, pooled_width, spatial_scale, group_size, output_dim):
61 |         super(PSROIPool, self).__init__()
62 | 
63 |         self.pooled_width = int(pooled_width)
64 |         self.pooled_height = int(pooled_height)
65 |         self.spatial_scale = float(spatial_scale)
66 |         self.group_size = int(group_size)
67 |         self.output_dim = int(output_dim)
68 | 
69 |     def forward(self, features, rois):
70 |         return ps_roi_pool(features,
71 |                            rois,
72 |                            self.pooled_height,
73 |                            self.pooled_width,
74 |                            self.spatial_scale,
75 |                            self.group_size,
76 |                            self.output_dim)
77 | 
78 |     def __repr__(self):
79 |         tmpstr = self.__class__.__name__ + "("
80 |         tmpstr += "pooled_width=" + str(self.pooled_width)
81 |         tmpstr += ", pooled_height=" + str(self.pooled_height)
82 |         tmpstr += ", spatial_scale=" + str(self.spatial_scale)
83 |         tmpstr += ", group_size=" + str(self.group_size)
84 |         tmpstr += ", output_dim=" + str(self.output_dim)
85 |         tmpstr += ")"
86 |         return tmpstr
87 | 


--------------------------------------------------------------------------------
/lib/psroialign/PSROIAlign/setup.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
  2 | #!/usr/bin/env python
  3 | 
  4 | import glob
  5 | import os
  6 | import platform
  7 | 
  8 | import torch
  9 | from setuptools import find_packages
 10 | from setuptools import setup
 11 | from torch.utils.cpp_extension import CUDA_HOME
 12 | from torch.utils.cpp_extension import CppExtension
 13 | from torch.utils.cpp_extension import CUDAExtension
 14 | 
 15 | requirements = ["torch", "torchvision"]
 16 | 
 17 | 
 18 | #
 19 | # if torch.cuda.is_available():
 20 | #     print('Including CUDA code.')
 21 | #     sources += ['src/psroi_pooling_cuda.c']
 22 | #     headers += ['src/psroi_pooling_cuda.h']
 23 | #     defines += [('WITH_CUDA', None)]
 24 | #     with_cuda = True
 25 | #
 26 | # this_file = os.path.dirname(os.path.realpath(__file__))
 27 | # print(this_file)
 28 | # extra_objects = ['src/cuda/psroi_pooling.cu.o']
 29 | # extra_objects = [os.path.join(this_file, fname) for fname in extra_objects]
 30 | #
 31 | # ffi = create_extension(
 32 | #     '_ext.psroi_pooling',
 33 | #     headers=headers,
 34 | #     sources=sources,
 35 | #     define_macros=defines,
 36 | #     relative_to=__file__,
 37 | #     with_cuda=with_cuda,
 38 | #     extra_objects=extra_objects
 39 | # )
 40 | #
 41 | # if __name__ == '__main__':
 42 | #     ffi.build()
 43 | 
 44 | 
 45 | def get_extensions():
 46 |     this_dir = os.path.dirname(os.path.abspath(__file__))
 47 |     extensions_dir = os.path.join(this_dir, "model", "csrc")
 48 | 
 49 |     main_file = glob.glob(os.path.join(extensions_dir, "*.cpp"))
 50 |     source_cuda = glob.glob(os.path.join(extensions_dir, "cuda", "*.cu"))
 51 | 
 52 |     sources = main_file
 53 |     extension = CppExtension
 54 |     cxx_flags = []
 55 |     if platform.system() == 'Darwin':
 56 |         cxx_flags = ["-g", "-stdlib=libc++", "-std=c++11", "-mmacosx-version-min=10.9"]
 57 |         platform.release()
 58 | 
 59 |     extra_compile_args = {"cxx": cxx_flags}
 60 |     define_macros = []
 61 | 
 62 |     if torch.cuda.is_available() and CUDA_HOME is not None:
 63 |         extension = CUDAExtension
 64 |         sources += source_cuda
 65 |         define_macros += [("WITH_CUDA", None)]
 66 |         extra_compile_args["nvcc"] = [
 67 |             "-DCUDA_HAS_FP16=1",
 68 |             "-D__CUDA_NO_HALF_OPERATORS__",
 69 |             "-D__CUDA_NO_HALF_CONVERSIONS__",
 70 |             "-D__CUDA_NO_HALF2_OPERATORS__",
 71 |         ]
 72 | 
 73 |     sources = [os.path.join(extensions_dir, s) for s in sources]
 74 | 
 75 |     include_dirs = [extensions_dir]
 76 | 
 77 |     ext_modules = [
 78 |         extension(
 79 |             "model._C",
 80 |             sources=sources,
 81 |             include_dirs=include_dirs,
 82 |             define_macros=define_macros,
 83 |             extra_compile_args=extra_compile_args,
 84 |         )
 85 |     ]
 86 | 
 87 |     return ext_modules
 88 | 
 89 | 
 90 | setup(
 91 |     name="psroialign",
 92 |     version="1.0.0",
 93 |     description="psroialign with pytorch 1.x",
 94 |     author="Do Lin",
 95 |     author_email="mcdooooo@gmail.com",
 96 |     license="MIT",
 97 |     packages=find_packages(exclude=("configs", "tests",)),
 98 |     # install_requires=requirements,
 99 |     ext_modules=get_extensions(),
100 |     cmdclass={"build_ext": torch.utils.cpp_extension.BuildExtension},
101 | )
102 | 


--------------------------------------------------------------------------------
/lib/psroialign/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DayBreak-u/Thundernet_Pytorch/ac359d128a44e566ba5852a830c0a2154e10edb2/lib/psroialign/__init__.py


--------------------------------------------------------------------------------
/lib/psroialign/pollers.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import torch
 3 | from torch import nn
 4 | 
 5 | 
 6 | from versions.psroialign.psroialign import PSROIAlignhandle , PSROIPoolhandle
 7 | from config import  Configs
 8 | 
 9 | CEM_FILTER = Configs.get("CEM_FILTER")
10 | spatial_scale = Configs.get("spatial_scale")
11 | 
12 | 
13 | 
14 | 
15 | 
16 | class PsRoIAlign(nn.Module):
17 |     """
18 |     Multi-scale RoIAlign pooling, which is useful for detection with or without FPN.
19 | 
20 |     It infers the scale of the pooling via the heuristics present in the FPN paper.
21 | 
22 |     Arguments:
23 |         featmap_names (List[str]): the names of the feature maps that will be used
24 |             for the pooling.
25 |         output_size (List[Tuple[int, int]] or List[int]): output size for the pooled region
26 |         sampling_ratio (int): sampling ratio for ROIAlign
27 | 
28 |     Examples::
29 | 
30 |     """
31 | 
32 |     def __init__(self,  output_size, sampling_ratio):
33 |         super(PsRoIAlign, self).__init__()
34 |         if isinstance(output_size, int):
35 |             output_size = (output_size, output_size)
36 | 
37 |         self.sampling_ratio = sampling_ratio
38 |         self.output_size = tuple(output_size)
39 |         self.scales = spatial_scale
40 | 
41 | 
42 |     def convert_to_roi_format(self, boxes):
43 |         concat_boxes = torch.cat(boxes, dim=0)
44 |         device, dtype = concat_boxes.device, concat_boxes.dtype
45 |         ids = torch.cat(
46 |             [
47 |                 torch.full((len(b), 1), i, dtype=dtype, device=device)
48 |                 for i, b in enumerate(boxes)
49 |             ],
50 |             dim=0,
51 |         )
52 |         rois = torch.cat([ids, concat_boxes], dim=1)
53 |         return rois
54 | 
55 | 
56 |     def forward(self, x, boxes, image_shapes):
57 |         """
58 |         Arguments:
59 |             x (OrderedDict[Tensor]): feature maps for each level. They are assumed to have
60 |                 all the same number of channels, but they can have different sizes.
61 |             boxes (List[Tensor[N, 4]]): boxes to be used to perform the pooling operation, in
62 |                 (x1, y1, x2, y2) format and in the image reference size, not the feature map
63 |                 reference.
64 |             image_shapes (List[Tuple[height, width]]): the sizes of each image before they
65 |                 have been fed to a CNN to obtain feature maps. This allows us to infer the
66 |                 scale factor for each one of the levels to be pooled.
67 |         Returns:
68 |             result (Tensor)
69 |         """
70 | 
71 | 
72 |         rois = self.convert_to_roi_format(boxes)
73 | 
74 | 
75 |         roi_align = PSROIAlignhandle(sampling_ratio=self.sampling_ratio, spatial_scale=self.scales, roi_size=7,
76 |                                       pooled_dim=CEM_FILTER//(7*7))
77 | 
78 | 
79 |         return roi_align(
80 |             x, rois
81 |         )
82 | 
83 | 
84 | 


--------------------------------------------------------------------------------
/lib/psroialign/psroialign.py:
--------------------------------------------------------------------------------
 1 | from psroialign.PSROIAlign.model.roi_layers import PSROIAlign,PSROIPool
 2 | from torch import  nn
 3 | 
 4 | class PSROIAlignhandle(nn.Module):
 5 |     def __init__(self,
 6 |                  spatial_scale=1./16.,
 7 |                  roi_size=7,
 8 |                  sampling_ratio=2,
 9 |                  pooled_dim=5):
10 | 
11 |         super(PSROIAlignhandle, self).__init__()
12 |         self.psroialign = PSROIAlign(spatial_scale=spatial_scale,
13 |                                      roi_size=roi_size,
14 |                                      sampling_ratio=sampling_ratio,
15 |                                      pooled_dim=pooled_dim)
16 | 
17 |     def forward(self, feat, rois):
18 |         # print(feat.shape)
19 |         pooled_feat = self.psroialign(feat, rois)
20 | 
21 |         return pooled_feat
22 | 
23 | 
24 | 
25 | class PSROIPoolhandle(nn.Module):
26 |     def __init__(self,
27 |                  pooled_height=7,
28 |                  pooled_width=7,
29 |                  spatial_scale=1./16.,
30 |                  group_size=7,
31 |                  output_dim=5):
32 | 
33 |         super(PSROIPoolhandle, self).__init__()
34 |         self.psroipool = PSROIPool(pooled_height=pooled_height,
35 |                                    pooled_width=pooled_width,
36 |                                    spatial_scale=spatial_scale,
37 |                                    group_size=group_size,
38 |                                    output_dim=output_dim)
39 | 
40 |     def forward(self, feat, rois):
41 |         pooled_feat = self.psroipool(feat, rois)
42 |         return pooled_feat


--------------------------------------------------------------------------------
/lib/roi_data_layer/__init__.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 | 


--------------------------------------------------------------------------------
/lib/roi_data_layer/minibatch.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick, Xinlei Chen, Lichao Wang
 6 | # --------------------------------------------------------
 7 | """Compute minibatch blobs for training a Fast R-CNN network."""
 8 | from __future__ import absolute_import
 9 | from __future__ import division
10 | from __future__ import print_function
11 | 
12 | import numpy as np
13 | import numpy.random as npr
14 | # from scipy.misc import imread
15 | import imageio  # ImportError: cannot import name 'imread' from 'scipy.misc' 
16 | from model.utils.config import cfg
17 | from model.utils.blob import prep_im_for_blob, im_list_to_blob
18 | import pdb
19 | 
20 | 
21 | def get_minibatch(roidb, num_classes):
22 |     """Given a roidb, construct a minibatch sampled from it."""
23 |     num_images = len(roidb)
24 | 
25 |     # Sample random scales to use for each image in this batch
26 |     random_scale_inds = npr.randint(0,
27 |                                     high=len(cfg.TRAIN.SCALES),
28 |                                     size=num_images)
29 |     assert(cfg.TRAIN.BATCH_SIZE % num_images == 0), \
30 |       'num_images ({}) must divide BATCH_SIZE ({})'. \
31 |       format(num_images, cfg.TRAIN.BATCH_SIZE)
32 | 
33 |     # Get the input image blob, formatted for caffe
34 |     im_blob, im_scales = _get_image_blob(roidb, random_scale_inds)
35 | 
36 |     blobs = {'data': im_blob}
37 | 
38 |     assert len(im_scales) == 1, "Single batch only"
39 |     assert len(roidb) == 1, "Single batch only"
40 | 
41 |     # gt boxes: (x1, y1, x2, y2, cls)
42 |     if cfg.TRAIN.USE_ALL_GT:
43 |         # Include all ground truth boxes
44 |         gt_inds = np.where(roidb[0]['gt_classes'] != 0)[0]
45 |     else:
46 |         # For the COCO ground truth boxes, exclude the ones that are ''iscrowd''
47 |         gt_inds = np.where(
48 |             (roidb[0]['gt_classes'] != 0)
49 |             & np.all(roidb[0]['gt_overlaps'].toarray() > -1.0, axis=1))[0]
50 |     gt_boxes = np.empty((len(gt_inds), 5), dtype=np.float32)
51 |     gt_boxes[:, 0:4] = roidb[0]['boxes'][gt_inds, :] * im_scales[0]
52 |     gt_boxes[:, 4] = roidb[0]['gt_classes'][gt_inds]
53 |     blobs['gt_boxes'] = gt_boxes
54 |     blobs['im_info'] = np.array(
55 |         [[im_blob.shape[1], im_blob.shape[2], im_scales[0]]], dtype=np.float32)
56 | 
57 |     blobs['img_id'] = roidb[0]['img_id']
58 | 
59 |     return blobs
60 | 
61 | 
62 | def _get_image_blob(roidb, scale_inds):
63 |     """Builds an input blob from the images in the roidb at the specified
64 |   scales.
65 |   """
66 |     num_images = len(roidb)
67 | 
68 |     processed_ims = []
69 |     im_scales = []
70 |     for i in range(num_images):
71 |         #im = cv2.imread(roidb[i]['image'])
72 |         # im = imread(roidb[i]['image'])
73 |         im = imageio.imread(roidb[i]['image'])  # ImportError: cannot import name 'imread' from 'scipy.misc' 
74 | 
75 |         if len(im.shape) == 2:
76 |             im = im[:, :, np.newaxis]
77 |             im = np.concatenate((im, im, im), axis=2)
78 |         # flip the channel, since the original one using cv2
79 |         # rgb -> bgr
80 |         im = im[:, :, ::-1]
81 | 
82 |         if roidb[i]['flipped']:
83 |             im = im[:, ::-1, :]
84 |         # if roidb[i]['ver_flipped']:
85 |         #     im = im[::-1, :, :]
86 |         target_size = cfg.TRAIN.SCALES[scale_inds[i]]
87 |         im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size,
88 |                                         cfg.TRAIN.MAX_SIZE)
89 |         im_scales.append(im_scale)
90 |         processed_ims.append(im)
91 | 
92 |     # Create a blob to hold the input images
93 |     blob = im_list_to_blob(processed_ims)
94 | 
95 |     return blob, im_scales
96 | 


--------------------------------------------------------------------------------
/lib/roi_data_layer/roibatchLoader.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import os.path as osp
  3 | import sys
  4 | import torch
  5 | import torch.utils.data as data
  6 | import torchvision.transforms as transforms
  7 | import cv2
  8 | import numpy as np
  9 | from roi_data_layer.augmentation import SSDAugmentation
 10 | import model.utils.config  as  config
 11 | from roi_data_layer.utils import BaseTransform
 12 | 
 13 | cfg = config.cfg
 14 | 
 15 | class Detection(data.Dataset):
 16 |     """`MS Coco Detection <http://mscoco.org/dataset/#detections-challenge2016>`_ Dataset.
 17 |     Args:
 18 |         root (string): Root directory where images are downloaded to.
 19 |         set_name (string): Name of the specific set of COCO images.
 20 |         transform (callable, optional): A function/transform that augments the
 21 |                                         raw images`
 22 |         target_transform (callable, optional): A function/transform that takes
 23 |         in the target (bbox) and transforms it.
 24 |     """
 25 | 
 26 |     def __init__(self, roidb, num_classes, training=True,transform=None):
 27 |         self._roidb = roidb
 28 |         self.training = training
 29 |         self.transform = transform
 30 |         self.num_classes = num_classes
 31 |         self.max_num_box = cfg.MAX_NUM_GT_BOXES
 32 | 
 33 | 
 34 | 
 35 |     def __len__(self):
 36 |         return len(self._roidb)
 37 | 
 38 |     def __getitem__(self, index):
 39 |         """
 40 |         Args:
 41 |             index (int): Index
 42 |         Returns:
 43 |             tuple: Tuple (image, target, height, width).
 44 |                    target is the object returned by ``coco.loadAnns``.
 45 |         """
 46 | 
 47 |         if self.training:
 48 |             index , size = index
 49 |             self.transform = SSDAugmentation(size, cfg.PIXEL_MEANS)
 50 |         else:
 51 |             size  = cfg.TEST.SIZE
 52 | 
 53 |         roidb = self._roidb[index]
 54 |         im  = cv2.imread(roidb['image'])
 55 |         if len(im.shape) == 2:
 56 |             im = im[:, :, np.newaxis]
 57 |             im = np.concatenate((im, im, im), axis=2)
 58 |             # flip the channel, since the original one using cv2
 59 | 
 60 |         if roidb['flipped']:
 61 |             im = im[:, ::-1, :]
 62 |         height, width = im.shape[0], im.shape[1]
 63 | 
 64 |         boxes =  roidb['boxes']
 65 |         gt_classes = roidb['gt_classes']
 66 | 
 67 | 
 68 | 
 69 |         boxes_all = []
 70 |         for b,class_gt in zip(boxes,gt_classes):
 71 |             boxes_all.append([b[0]/width,b[1]/height,b[2]/width,b[3]/height,class_gt])
 72 | 
 73 | 
 74 | 
 75 |         target =  np.array(boxes_all)
 76 | 
 77 |         target_re = np.zeros([self.max_num_box,5])
 78 | 
 79 |         if self.transform is not None:
 80 | 
 81 |             img, boxes, labels = self.transform(im, target[:,:4],
 82 |                                                 target[:,4])
 83 | 
 84 |             img = img.transpose(2, 0, 1)
 85 |             number_box  = 0
 86 |             for box in boxes:
 87 |                 if number_box>=20:
 88 |                     break
 89 |                 target_re[number_box] =   np.array([box[0]*size ,box[1]*size,box[2]*size,box[3]*size,labels[number_box]])
 90 | 
 91 |                 number_box+=1
 92 |             # target = np.hstack((boxes, np.expand_dims(labels, axis=1)))
 93 |             # img_id, img, gt_boxes_padding, img_info, num_gt_boxes
 94 | 
 95 |         data = torch.as_tensor(img, dtype=torch.float32)
 96 |         im_info = torch.from_numpy(np.array([img.shape[1], img.shape[2],  size/width ,size/height ]))
 97 |         im_info = im_info.view(4)
 98 |         gt_boxes = torch.as_tensor(target_re, dtype=torch.int16)
 99 | 
100 | 
101 |         if self.training:
102 |             return data, im_info, gt_boxes, number_box
103 |         else:
104 | 
105 |             # im_info = np.array([[im.shape[1], im.shape[2], ratio]], dtype=np.float32)
106 |             im_info = np.array([[img.shape[1], img.shape[2],  size/width ,size/height]], dtype=np.float32)
107 |             im_info = torch.as_tensor(im_info, dtype=torch.float32)
108 |             im_info = im_info.view(4)
109 |             gt_boxes = torch.FloatTensor([1, 1, 1, 1, 1])
110 | 
111 |             return data, im_info, gt_boxes, number_box
112 | 
113 | 
114 | 


--------------------------------------------------------------------------------
/lib/roi_data_layer/roidb.py:
--------------------------------------------------------------------------------
  1 | """Transform a roidb into a trainable roidb by adding a bunch of metadata."""
  2 | from __future__ import absolute_import
  3 | from __future__ import division
  4 | from __future__ import print_function
  5 | 
  6 | import datasets
  7 | import numpy as np
  8 | from model.utils.config import cfg
  9 | from datasets.factory import get_imdb
 10 | import PIL
 11 | import pdb
 12 | 
 13 | 
 14 | def prepare_roidb(imdb):
 15 |     """Enrich the imdb's roidb by adding some derived quantities that
 16 |     are useful for training. This function precomputes the maximum
 17 |     overlap, taken over ground-truth boxes, between each ROI and
 18 |     each ground-truth box. The class with maximum overlap is also
 19 |     recorded.
 20 |     """
 21 | 
 22 |     roidb = imdb.roidb
 23 |     if not (imdb.name.startswith('coco')):
 24 |         sizes = [
 25 |             PIL.Image.open(imdb.image_path_at(i)).size
 26 |             for i in range(imdb.num_images)
 27 |         ]
 28 | 
 29 |     for i in range(len(imdb.image_index)):
 30 |         roidb[i]['img_id'] = imdb.image_id_at(i)
 31 |         roidb[i]['image'] = imdb.image_path_at(i)
 32 |         if not (imdb.name.startswith('coco')):
 33 |             roidb[i]['width'] = sizes[i][0]
 34 |             roidb[i]['height'] = sizes[i][1]
 35 |         # need gt_overlaps as a dense array for argmax
 36 |         gt_overlaps = roidb[i]['gt_overlaps'].toarray()
 37 |         # max overlap with gt over classes (columns)
 38 |         max_overlaps = gt_overlaps.max(axis=1)
 39 |         # gt class that had the max overlap
 40 |         max_classes = gt_overlaps.argmax(axis=1)
 41 |         roidb[i]['max_classes'] = max_classes
 42 |         roidb[i]['max_overlaps'] = max_overlaps
 43 |         # sanity checks
 44 |         # max overlap of 0 => class should be zero (background)
 45 |         zero_inds = np.where(max_overlaps == 0)[0]
 46 |         assert all(max_classes[zero_inds] == 0)
 47 |         # max overlap > 0 => class should not be zero (must be a fg class)
 48 |         nonzero_inds = np.where(max_overlaps > 0)[0]
 49 |         assert all(max_classes[nonzero_inds] != 0)
 50 | 
 51 | 
 52 | def rank_roidb_ratio(roidb):
 53 |     # rank roidb based on the ratio between width and height.
 54 |     ratio_large = 2.0  # largest ratio to preserve.
 55 |     ratio_small = 0.5  # smallest ratio to preserve.
 56 | 
 57 |     ratio_list = []
 58 |     for i in range(len(roidb)):
 59 |         width = roidb[i]['width']
 60 |         height = roidb[i]['height']
 61 |         ratio = width / float(height)
 62 | 
 63 |         if cfg.TRAIN.ASPECT_CROPPING:
 64 |             if ratio > ratio_large:
 65 |                 roidb[i]['need_crop'] = 1
 66 |                 ratio = ratio_large
 67 |             elif ratio < ratio_small:
 68 |                 roidb[i]['need_crop'] = 1
 69 |                 ratio = ratio_small
 70 |             else:
 71 |                 roidb[i]['need_crop'] = 0
 72 |         else:
 73 |             roidb[i]['need_crop'] = 0
 74 | 
 75 |         ratio_list.append(ratio)
 76 | 
 77 |     ratio_list = np.array(ratio_list)
 78 |     ratio_index = np.argsort(ratio_list)
 79 |     return ratio_list[ratio_index], ratio_index
 80 | 
 81 | 
 82 | def filter_roidb(roidb):
 83 |     # filter the image without bounding box.
 84 |     print('before filtering, there are %d images...' % (len(roidb)))
 85 |     i = 0
 86 |     while i < len(roidb):
 87 |         if len(roidb[i]['boxes']) == 0:
 88 |             del roidb[i]
 89 |             i -= 1
 90 |         i += 1
 91 | 
 92 |     print('after filtering, there are %d images...' % (len(roidb)))
 93 |     return roidb
 94 | 
 95 | 
 96 | def combined_roidb(imdb_names, training=True):
 97 |     """
 98 |     Combine multiple roidbs
 99 |     """
100 | 
101 |     def get_training_roidb(imdb):
102 |         """Returns a roidb (Region of Interest database) for use in training."""
103 |         if cfg.TRAIN.USE_FLIPPED:
104 |             print('Appending horizontally-flipped training examples...')
105 |             imdb.append_flipped_images()
106 |             print('done')
107 | 
108 |         if cfg.TRAIN.USE_VER_FLIPPED:
109 |             print('Appending vertically-flipped training examples...')
110 |             imdb.append_vertical_flipped_images()
111 |             print('done')
112 | 
113 |         print('Preparing training data...')
114 | 
115 |         prepare_roidb(imdb)
116 |         # ratio_index = rank_roidb_ratio(imdb)
117 |         print('done')
118 | 
119 |         return imdb.roidb
120 | 
121 |     def get_roidb(imdb_name):
122 |         imdb = get_imdb(imdb_name)
123 |         print('Loaded dataset `{:s}` for training'.format(imdb.name))
124 |         imdb.set_proposal_method(cfg.TRAIN.PROPOSAL_METHOD)
125 |         print('Set proposal method: {:s}'.format(cfg.TRAIN.PROPOSAL_METHOD))
126 |         roidb = get_training_roidb(imdb)
127 |         return roidb
128 | 
129 |     roidbs = [get_roidb(s) for s in imdb_names.split('+')]
130 |     roidb = roidbs[0]
131 | 
132 |     if len(roidbs) > 1:
133 |         for r in roidbs[1:]:
134 |             roidb.extend(r)
135 |         tmp = get_imdb(imdb_names.split('+')[1])
136 |         imdb = datasets.imdb.imdb(imdb_names, tmp.classes)
137 |     else:
138 |         imdb = get_imdb(imdb_names)
139 | 
140 |     if training:
141 |         roidb = filter_roidb(roidb)
142 |         return imdb, roidb
143 |     else:
144 |         ratio_list, ratio_index = rank_roidb_ratio(roidb)
145 | 
146 |         return imdb, roidb, ratio_list, ratio_index
147 | 
148 | 


--------------------------------------------------------------------------------
/lib/roi_data_layer/utils.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | 
 5 | import numpy as np
 6 | import cv2
 7 | import random
 8 | import  torch
 9 | 
10 | def flip(img):
11 |     return img[:, :, ::-1].copy()
12 | 
13 | 
14 | 
15 | 
16 | 
17 | def base_transform(image, size, mean):
18 |     x = cv2.resize(image, (size, size)).astype(np.float32)
19 |     x -= mean
20 |     x = x.astype(np.float32)
21 |     return x
22 | 
23 | 
24 | class BaseTransform:
25 |     def __init__(self, size, mean):
26 |         self.size = size
27 |         self.mean = np.array(mean, dtype=np.float32)
28 | 
29 |     def __call__(self, image, boxes=None, labels=None):
30 |         return base_transform(image, self.size, self.mean), boxes, labels
31 | 
32 | 


--------------------------------------------------------------------------------
/lib/setup.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #!/usr/bin/env python
 3 | 
 4 | import glob
 5 | import os
 6 | 
 7 | import torch
 8 | from setuptools import find_packages
 9 | from setuptools import setup
10 | from torch.utils.cpp_extension import CUDA_HOME
11 | from torch.utils.cpp_extension import CppExtension
12 | from torch.utils.cpp_extension import CUDAExtension
13 | 
14 | requirements = ["torch", "torchvision"]
15 | 
16 | 
17 | def get_extensions():
18 |     this_dir = os.path.dirname(os.path.abspath(__file__))
19 |     extensions_dir = os.path.join(this_dir, "model", "csrc")
20 | 
21 |     main_file = glob.glob(os.path.join(extensions_dir, "*.cpp"))
22 |     source_cpu = glob.glob(os.path.join(extensions_dir, "cpu", "*.cpp"))
23 |     source_cuda = glob.glob(os.path.join(extensions_dir, "cuda", "*.cu"))
24 | 
25 |     sources = main_file + source_cpu
26 |     extension = CppExtension
27 | 
28 |     extra_compile_args = {"cxx": []}
29 |     define_macros = []
30 | 
31 |     if torch.cuda.is_available() and CUDA_HOME is not None:
32 |         extension = CUDAExtension
33 |         sources += source_cuda
34 |         define_macros += [("WITH_CUDA", None)]
35 |         extra_compile_args["nvcc"] = [
36 |             "-DCUDA_HAS_FP16=1",
37 |             "-D__CUDA_NO_HALF_OPERATORS__",
38 |             "-D__CUDA_NO_HALF_CONVERSIONS__",
39 |             "-D__CUDA_NO_HALF2_OPERATORS__",
40 |         ]
41 | 
42 |     sources = [os.path.join(extensions_dir, s) for s in sources]
43 | 
44 |     include_dirs = [extensions_dir]
45 | 
46 |     ext_modules = [
47 |         extension(
48 |             "model._C",
49 |             sources,
50 |             include_dirs=include_dirs,
51 |             define_macros=define_macros,
52 |             extra_compile_args=extra_compile_args,
53 |         )
54 |     ]
55 | 
56 |     return ext_modules
57 | 
58 | 
59 | setup(
60 |     name="faster_rcnn",
61 |     version="0.1",
62 |     description="object detection in pytorch",
63 |     packages=find_packages(exclude=("configs", "tests",)),
64 |     # install_requires=requirements,
65 |     ext_modules=get_extensions(),
66 |     cmdclass={"build_ext": torch.utils.cpp_extension.BuildExtension},
67 | )
68 | 


--------------------------------------------------------------------------------
/onnx/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DayBreak-u/Thundernet_Pytorch/ac359d128a44e566ba5852a830c0a2154e10edb2/onnx/__init__.py


--------------------------------------------------------------------------------
/onnx/onnx_infer.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This code uses the onnx model to detect faces from live video or cameras.
 3 | """
 4 | import os,sys
 5 | import time
 6 | 
 7 | import cv2
 8 | import numpy as np
 9 | import onnx
10 | 
11 | 
12 | from caffe2.python.onnx import backend
13 | 
14 | # onnx runtime
15 | import onnxruntime as ort
16 | 
17 | onnx_path = "thundernet146_rpn.onnx"
18 | 
19 | 
20 | predictor = onnx.load(onnx_path)
21 | onnx.checker.check_model(predictor)
22 | onnx.helper.printable_graph(predictor.graph)
23 | predictor = backend.prepare(predictor, device="CPU")  # default CPU
24 | 
25 | ort_session = ort.InferenceSession(onnx_path)
26 | input_name = ort_session.get_inputs()[0].name
27 | result_path = "./result"
28 | 
29 | threshold = 0.7
30 | # path = "/mnt/data1/yanghuiyu/dlmodel/Fd/Face-Detector-1MB-with-landmark/images/input"
31 | path = "/mnt/data1/yanghuiyu/project/object_detect/Thundernet_new/voc_images/input/2008_000179.jpg"
32 | sum = 0
33 | if not os.path.exists(result_path):
34 |     os.makedirs(result_path)
35 | # listdir = os.listdir(path)
36 | sum = 0
37 | # for file_path in listdir:
38 | img_path = os.path.join(path, path)
39 | orig_image = cv2.imread(img_path)
40 | print(orig_image.shape)
41 | image = cv2.resize(orig_image, (320, 320))
42 | # image = image/255.0
43 | # image = cv2.resize(image, (640, 480))
44 | 
45 | # mean = np.array([0.40789654, 0.44719302, 0.47026115],
46 | #                 dtype=np.float32).reshape(1, 1, 3)
47 | # std = np.array([0.28863828, 0.27408164, 0.27809835],
48 | #                dtype=np.float32).reshape(1, 1, 3)
49 | 
50 | # print(image)
51 | mean = np.array([[[0.485 * 255, 0.456 * 255, 0.406 * 255]]])
52 | 
53 | image = (image - mean)
54 | image = np.transpose(image, [2, 0, 1])
55 | image = np.expand_dims(image, axis=0)
56 | image = image.astype(np.float32)
57 | 
58 | # confidences, boxes = predictor.run(image)
59 | time_time = time.time()
60 | # boxes , confidences, landmark  = ort_session.run(None, {input_name: image})
61 | rpn_cls_prob,rpn_bbox_pred,base_feat  = predictor.run(image)
62 | base_feat = np.swapaxes(base_feat[0],1,2)
63 | print(rpn_bbox_pred)
64 | 


--------------------------------------------------------------------------------
/onnx/rcnn_head_to_onnx.py:
--------------------------------------------------------------------------------
 1 | from torch import  nn
 2 | from utils import  load_model
 3 | import torch.nn.functional as F
 4 | import torch
 5 | class _fasterRCNN(nn.Module):
 6 |     """ faster RCNN """
 7 |     def __init__(self,n_classes
 8 |         ):
 9 |         self.n_classes = n_classes
10 |         super(_fasterRCNN, self).__init__()
11 | 
12 |         c_in = 1024
13 | 
14 |         self.RCNN_top = nn.Sequential(nn.Linear(5 * 7 * 7, c_in),
15 |                                           nn.ReLU(inplace=True))
16 | 
17 | 
18 |         self.RCNN_cls_score = nn.Linear(c_in, self.n_classes)
19 |         self.RCNN_bbox_pred = nn.Linear(c_in, 4 * self.n_classes)
20 | 
21 | 
22 | 
23 | 
24 |     def forward(self, pool5):
25 | 
26 |         pool5_flat = pool5.view(pool5.size(0), -1)
27 |         print(pool5_flat.shape)
28 |         fc7 = self.RCNN_top(pool5_flat)
29 | 
30 | 
31 |         RCNN_cls_score = self.RCNN_cls_score(fc7)
32 | 
33 |         cls_prob = F.softmax(RCNN_cls_score, 1)
34 | 
35 |         bbox_pred =  self.RCNN_bbox_pred(fc7)
36 | 
37 | 
38 |         return [cls_prob,bbox_pred]
39 | 
40 | 
41 | 
42 | net  = _fasterRCNN(21)
43 | 
44 | net = load_model(net, "../snet_146_3/snet_146/pascal_voc_0712/thundernet_epoch_4.pth")
45 | net.eval()
46 | print('Finished loading model!')
47 | print(net)
48 | device = torch.device("cpu")
49 | net = net.to(device)
50 | 
51 | ##################export###############
52 | output_onnx = 'thundernet146_rcnn_head.onnx'
53 | print("==> Exporting model to ONNX format at '{}'".format(output_onnx))
54 | input_names = ["roi_pool"]
55 | # output_names = ["hm" , "wh"  , "reg"]
56 | output_names = ["cls_prob" , "bbox_pred" ]
57 | inputs = torch.randn(1, 5 , 7 , 7).to(device)
58 | torch_out = torch.onnx._export(net, inputs, output_onnx, export_params=True, verbose=False,
59 |                                input_names=input_names, output_names=output_names)


--------------------------------------------------------------------------------
/onnx/rpn_to_onnx.py:
--------------------------------------------------------------------------------
  1 | import  sys
  2 | sys.path.insert(0,"../lib")
  3 | from  torch import  nn
  4 | from model.faster_rcnn.modules import  *
  5 | from model.faster_rcnn.Snet import SnetExtractor
  6 | from utils import  load_model
  7 | class _RPN(nn.Module):
  8 |     """ region proposal network """
  9 |     def __init__(self, din):
 10 |         super(_RPN, self).__init__()
 11 | 
 12 |         self.din = din  # get depth of input feature map, e.g., 512
 13 | 
 14 | 
 15 |         # define the convrelu layers processing input feature map
 16 |         # self.RPN_Conv = nn.Conv2d(self.din, 512, 3, 1, 1, bias=True)
 17 | 
 18 |         # define bg/fg classifcation score layer
 19 |         self.nc_score_out = 25*2
 20 |         self.RPN_cls_score = nn.Conv2d(self.din, self.nc_score_out, 1, 1, 0)
 21 | 
 22 |         # define anchor box offset prediction layer
 23 |         self.nc_bbox_out = 25 * 4  # 4(coords) * 9 (anchors)
 24 |         self.RPN_bbox_pred = nn.Conv2d(self.din, self.nc_bbox_out, 1, 1, 0)
 25 |         self.softmax  = nn.Softmax(1)
 26 | 
 27 |     @staticmethod
 28 |     def reshape(x, d):
 29 |         input_shape = x.size()
 30 |         x = x.view(input_shape[0], int(d),
 31 |                    int(float(input_shape[1] * input_shape[2]) / float(d)),
 32 |                    input_shape[3])
 33 |         return x
 34 | 
 35 |     def forward(self, base_feat):
 36 | 
 37 |         rpn_cls_score = self.RPN_cls_score(base_feat)
 38 | 
 39 |         rpn_cls_score_reshape = self.reshape(rpn_cls_score, 2)
 40 |         rpn_cls_prob_reshape = self.softmax(rpn_cls_score_reshape)
 41 |         rpn_cls_prob = self.reshape(rpn_cls_prob_reshape, self.nc_score_out)
 42 | 
 43 |         # get rpn offsets to the anchor boxes
 44 |         rpn_bbox_pred = self.RPN_bbox_pred(base_feat)
 45 | 
 46 | 
 47 |         return rpn_cls_prob, rpn_bbox_pred
 48 | 
 49 | 
 50 | 
 51 | class _fasterRCNN(nn.Module):
 52 |     """ faster RCNN """
 53 |     def __init__(self
 54 |         ):
 55 |         super(_fasterRCNN, self).__init__()
 56 | 
 57 | 
 58 |         self.RCNN_base = SnetExtractor(146)
 59 | 
 60 |         # loss
 61 |         self.RCNN_loss_cls = 0
 62 |         self.RCNN_loss_bbox = 0
 63 |         # self.focalloss_handle = FocalLossV4(num_class=21, alpha=0.25, gamma=2.0, balance_index=2)
 64 |         # define Large Separable Convolution Layer
 65 | 
 66 |         self.rpn = RPN(in_channels=245, f_channels=256)
 67 | 
 68 | 
 69 |         self.sam = SAM(256,245)
 70 |         # define rpn
 71 |         self.RCNN_rpn = _RPN(256)
 72 | 
 73 | 
 74 | 
 75 | 
 76 |     def forward(self, im_data):
 77 | 
 78 |         basefeat = self.RCNN_base(im_data)
 79 | 
 80 |         # feed base feature map tp RPN to obtain rois
 81 |         rpn_feat= self.rpn(basefeat)
 82 | 
 83 |         rpn_cls_prob, rpn_bbox_pred = self.RCNN_rpn(rpn_feat)
 84 | 
 85 |         base_feat = self.sam([basefeat,rpn_feat])
 86 |         return [rpn_cls_prob, rpn_bbox_pred ,base_feat]
 87 | 
 88 | net  = _fasterRCNN()
 89 | 
 90 | net = load_model(net, "../snet_146_3/snet_146/pascal_voc_0712/thundernet_epoch_4.pth")
 91 | net.eval()
 92 | print('Finished loading model!')
 93 | print(net)
 94 | device = torch.device("cpu")
 95 | net = net.to(device)
 96 | 
 97 | ##################export###############
 98 | output_onnx = 'thundernet146_rpn.onnx'
 99 | print("==> Exporting model to ONNX format at '{}'".format(output_onnx))
100 | input_names = ["input"]
101 | # output_names = ["hm" , "wh"  , "reg"]
102 | output_names = ["rpn_cls_prob" , "rpn_bbox_pred" , "base_feat" ]
103 | inputs = torch.randn(1, 3, 320, 320).to(device)
104 | torch_out = torch.onnx._export(net, inputs, output_onnx, export_params=True, verbose=False,
105 |                                input_names=input_names, output_names=output_names)


--------------------------------------------------------------------------------
/onnx/utils.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | def check_keys(model, pretrained_state_dict):
 3 |     ckpt_keys = set(pretrained_state_dict.keys())
 4 |     model_keys = set(model.state_dict().keys())
 5 |     used_pretrained_keys = model_keys & ckpt_keys
 6 |     unused_pretrained_keys = ckpt_keys - model_keys
 7 |     missing_keys = model_keys - ckpt_keys
 8 |     print('Missing keys:{}'.format(len(missing_keys)))
 9 |     print('Unused checkpoint keys:{}'.format(len(unused_pretrained_keys)))
10 |     print('Used keys:{}'.format(len(used_pretrained_keys)))
11 |     assert len(used_pretrained_keys) > 0, 'load NONE from pretrained checkpoint'
12 |     return True
13 | 
14 | 
15 | def remove_prefix(state_dict, prefix):
16 |     ''' Old style model is stored with all names of parameters sharing common prefix 'module.' '''
17 |     print('remove prefix \'{}\''.format(prefix))
18 |     f = lambda x: x.split(prefix, 1)[-1] if x.startswith(prefix) else x
19 |     return {f(key): value for key, value in state_dict.items()}
20 | 
21 | 
22 | def load_model(model, model_path):
23 |     start_epoch = 0
24 |     checkpoint = torch.load(model_path, map_location=lambda storage, loc: storage)
25 |     print('loaded {}, epoch {}'.format(model_path, checkpoint['epoch']))
26 |     state_dict_ = checkpoint['model']
27 |     state_dict = {}
28 | 
29 |     # convert data_parallal to model
30 |     for k in state_dict_:
31 |         if k.startswith('module') and not k.startswith('module_list'):
32 |             state_dict[k[7:]] = state_dict_[k]
33 |         else:
34 |             state_dict[k] = state_dict_[k]
35 |     model_state_dict = model.state_dict()
36 | 
37 |     # check loaded parameters and created model parameters
38 |     msg = 'If you see this, your model does not fully load the ' + \
39 |           'pre-trained weight. Please make sure ' + \
40 |           'you have correctly specified --arch xxx ' + \
41 |           'or set the correct --num_classes for your own dataset.'
42 |     for k in state_dict:
43 |         if k in model_state_dict:
44 |             if state_dict[k].shape != model_state_dict[k].shape:
45 |                 print('Skip loading parameter {}, required shape{}, ' \
46 |                       'loaded shape{}. {}'.format(
47 |                     k, model_state_dict[k].shape, state_dict[k].shape, msg))
48 |                 state_dict[k] = model_state_dict[k]
49 |         else:
50 |             print('Drop parameter {}.'.format(k) + msg)
51 |     for k in model_state_dict:
52 |         if not (k in state_dict):
53 |             print('No param {}.'.format(k) + msg)
54 |             state_dict[k] = model_state_dict[k]
55 |     model.load_state_dict(state_dict, strict=False)
56 |     return model


--------------------------------------------------------------------------------
/readme.md:
--------------------------------------------------------------------------------
 1 | # Thundernet_Pytorch
 2 | 
 3 | ## 项目已经更新：[thundernet_mmdetection](https://github.com/ouyanghuiyu/thundernet_mmdetection)
 4 | ## 20191222 update
 5 | - add data augmentation
 6 | - add Multi-scale training
 7 | - add onnx (doing)
 8 | 
 9 | ## pretrained model
10 | - train code in : https://github.com/ouyanghuiyu/Snet
11 | 
12 | ## Requirements
13 | * pytorch 1.2.0
14 | * torchvision 0.4
15 | 
16 | 
17 | 
18 | ## Lib Prepare 
19 | ```sh
20 | git clone https://github.com/ouyanghuiyu/Thundernet_Pytorch.git
21 | ```
22 | 
23 | ### Build  
24 | ```sh
25 | cd lib && python setup.py  build_ext --inplace
26 | cd psroialign/PSROIAlign && sh build.sh 
27 |  ```   
28 | ## Data Prepare 
29 | Download VOC0712 datasets 
30 | ln -s "YOUR PATH" data
31 | 
32 | ## Train
33 | ```sh
34 | cd script
35 | sh  train_49.sh
36 | sh  train_146.sh
37 | sh  train_535.sh
38 | ```
39 | 
40 | ## demo
41 | ```sh
42 | cd script
43 | sh  pre.sh
44 | 
45 | ```
46 | 
47 | ## TODO LIST
48 |  
49 |  - add coco train and test
50 |  - add NCNN inference
51 | 
52 | ## Citation
53 | Please cite the paper in your publications if it helps your research:
54 | ```
55 | @article{zheng2019thundernet,
56 |   title={ThunderNet: Towards Real-time Generic Object Detection},
57 |   author={Zheng Qin, Zeming Li,Zhaoning Zhang,Yiping Bao,Gang Yu, Yuxing Peng, Jian Sun},
58 |   journal={arXiv preprint arXiv:1903.11752},
59 |   year={2019}
60 | }
61 | ```
62 | 
63 | ## VOC TEST EXAMPLE
64 | ![test](https://github.com/ouyanghuiyu/Thundernet_Pytorch/blob/master/voc_images/output/2008_000005.jpg)
65 | 
66 | 
67 | 
68 | 
69 | 
70 | 
71 | 


--------------------------------------------------------------------------------
/script/pre.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | set -e
3 | cd ..
4 | 
5 | CUDA_VISIBLE_DEVICES=0  python demo.py --dataset pascal_voc_0712 --net snet_146 --load_dir snet146_2 \
6 |        --checkepoch 6  --cuda \
7 |         --image_dir /mnt/data1/yanghuiyu/project/object_detect/thundernetbylightheadrcnn/voc_images/input
8 | 


--------------------------------------------------------------------------------
/script/train_146.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | set -e
3 | cd ..
4 | 
5 | CUDA_VISIBLE_DEVICES=0  python trainval_net.py --dataset pascal_voc_0712 --net snet_146 --bs 64 --nw 8 \
6 |      --lr 1e-2   --epochs 150 --cuda  --lr_decay_step 25,50,75  --use_tfboard  True \
7 |      --save_dir snet146  --eval_interval 2   --logdir snet146_log --pre ./weights/snet_146.tar \
8 |      --r True --checkepoch 2
9 | 


--------------------------------------------------------------------------------
/script/train_49.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | set -e
3 | cd ..
4 | 
5 | CUDA_VISIBLE_DEVICES=0  python trainval_net.py --dataset pascal_voc_0712 --net snet_49 --bs 16 --nw 8 \
6 |     --lr 1e-2   --epochs 150 --cuda  --lr_decay_step 50,75,100  --use_tfboard  True\
7 |      --save_dir snet_49  --eval_interval 5   \
8 |      --r True  --checkepoch 4
9 | 


--------------------------------------------------------------------------------
/script/train_535.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | set -e
3 | cd ..
4 | 
5 | CUDA_VISIBLE_DEVICES=0  python trainval_net.py --dataset pascal_voc_0712 --net snet_535 --bs 16 --nw 8 \
6 |     --lr 1e-2   --epochs 150 --cuda  --lr_decay_step 50,75,100  --use_tfboard  True\
7 |      --save_dir snet_49  --eval_interval 5   \
8 |      --r True --checkepoch 4
9 | 


--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
 1 | import  numpy as np
 2 | color_list = np.array(
 3 |         [
 4 |             1.000, 1.000, 1.000,
 5 |             0.850, 0.325, 0.098,
 6 |             0.929, 0.694, 0.125,
 7 |             0.494, 0.184, 0.556,
 8 |             0.466, 0.674, 0.188,
 9 |             0.301, 0.745, 0.933,
10 |             0.635, 0.078, 0.184,
11 |             0.300, 0.300, 0.300,
12 |             0.600, 0.600, 0.600,
13 |             1.000, 0.000, 0.000,
14 |             1.000, 0.500, 0.000,
15 |             0.749, 0.749, 0.000,
16 |             0.000, 1.000, 0.000,
17 |             0.000, 0.000, 1.000,
18 |             0.667, 0.000, 1.000,
19 |             0.333, 0.333, 0.000,
20 |             0.333, 0.667, 0.000,
21 |             0.333, 1.000, 0.000,
22 |             0.667, 0.333, 0.000,
23 |             0.667, 0.667, 0.000,
24 |             0.667, 1.000, 0.000,
25 |             1.000, 0.333, 0.000,
26 |             1.000, 0.667, 0.000,
27 |             1.000, 1.000, 0.000,
28 |             0.000, 0.333, 0.500,
29 |             0.000, 0.667, 0.500,
30 |             0.000, 1.000, 0.500,
31 |             0.333, 0.000, 0.500,
32 |             0.333, 0.333, 0.500,
33 |             0.333, 0.667, 0.500,
34 |             0.333, 1.000, 0.500,
35 |             0.667, 0.000, 0.500,
36 |             0.667, 0.333, 0.500,
37 |             0.667, 0.667, 0.500,
38 |             0.667, 1.000, 0.500,
39 |             1.000, 0.000, 0.500,
40 |             1.000, 0.333, 0.500,
41 |             1.000, 0.667, 0.500,
42 |             1.000, 1.000, 0.500,
43 |             0.000, 0.333, 1.000,
44 |             0.000, 0.667, 1.000,
45 |             0.000, 1.000, 1.000,
46 |             0.333, 0.000, 1.000,
47 |             0.333, 0.333, 1.000,
48 |             0.333, 0.667, 1.000,
49 |             0.333, 1.000, 1.000,
50 |             0.667, 0.000, 1.000,
51 |             0.667, 0.333, 1.000,
52 |             0.667, 0.667, 1.000,
53 |             0.667, 1.000, 1.000,
54 |             1.000, 0.000, 1.000,
55 |             1.000, 0.333, 1.000,
56 |             1.000, 0.667, 1.000,
57 |             0.167, 0.000, 0.000,
58 |             0.333, 0.000, 0.000,
59 |             0.500, 0.000, 0.000,
60 |             0.667, 0.000, 0.000,
61 |             0.833, 0.000, 0.000,
62 |             1.000, 0.000, 0.000,
63 |             0.000, 0.167, 0.000,
64 |             0.000, 0.333, 0.000,
65 |             0.000, 0.500, 0.000,
66 |             0.000, 0.667, 0.000,
67 |             0.000, 0.833, 0.000,
68 |             0.000, 1.000, 0.000,
69 |             0.000, 0.000, 0.167,
70 |             0.000, 0.000, 0.333,
71 |             0.000, 0.000, 0.500,
72 |             0.000, 0.000, 0.667,
73 |             0.000, 0.000, 0.833,
74 |             0.000, 0.000, 1.000,
75 |             0.000, 0.000, 0.000,
76 |             0.143, 0.143, 0.143,
77 |             0.286, 0.286, 0.286,
78 |             0.429, 0.429, 0.429,
79 |             0.571, 0.571, 0.571,
80 |             0.714, 0.714, 0.714,
81 |             0.857, 0.857, 0.857,
82 |             0.000, 0.447, 0.741,
83 |             0.50, 0.5, 0
84 |         ]
85 |     ).astype(np.float32)
86 | color_list = color_list.reshape((-1, 3)) * 255
87 | 
88 | 


--------------------------------------------------------------------------------
/voc_images/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DayBreak-u/Thundernet_Pytorch/ac359d128a44e566ba5852a830c0a2154e10edb2/voc_images/.DS_Store


--------------------------------------------------------------------------------
/voc_images/input/2008_000005.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DayBreak-u/Thundernet_Pytorch/ac359d128a44e566ba5852a830c0a2154e10edb2/voc_images/input/2008_000005.jpg


--------------------------------------------------------------------------------
/voc_images/input/2008_000038.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DayBreak-u/Thundernet_Pytorch/ac359d128a44e566ba5852a830c0a2154e10edb2/voc_images/input/2008_000038.jpg


--------------------------------------------------------------------------------
/voc_images/input/2008_000175.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DayBreak-u/Thundernet_Pytorch/ac359d128a44e566ba5852a830c0a2154e10edb2/voc_images/input/2008_000175.jpg


--------------------------------------------------------------------------------
/weights/snet_146.tar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DayBreak-u/Thundernet_Pytorch/ac359d128a44e566ba5852a830c0a2154e10edb2/weights/snet_146.tar


--------------------------------------------------------------------------------
/weights/thundernet146_voc_map67.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DayBreak-u/Thundernet_Pytorch/ac359d128a44e566ba5852a830c0a2154e10edb2/weights/thundernet146_voc_map67.pth


--------------------------------------------------------------------------------