├── .DS_Store ├── _init_paths.py ├── cfgs └── snet.yml ├── demo.py ├── lib ├── __init__.py ├── build │ ├── lib.linux-x86_64-3.6 │ │ └── model │ │ │ └── _C.cpython-36m-x86_64-linux-gnu.so │ └── temp.linux-x86_64-3.6 │ │ └── mnt │ │ └── data1 │ │ └── yanghuiyu │ │ └── project │ │ └── object_detect │ │ └── Thundernet_new │ │ └── lib │ │ └── model │ │ └── csrc │ │ ├── cpu │ │ ├── ROIAlign_cpu.o │ │ └── nms_cpu.o │ │ ├── cuda │ │ ├── ROIAlign_cuda.o │ │ ├── ROIPool_cuda.o │ │ └── nms.o │ │ └── vision.o ├── datasets │ ├── VOCdevkit-matlab-wrapper │ │ ├── get_voc_opts.m │ │ ├── voc_eval.m │ │ └── xVOCap.m │ ├── __init__.py │ ├── coco.py │ ├── ds_utils.py │ ├── factory.py │ ├── imagenet.py │ ├── imdb.py │ ├── pascal_voc.py │ ├── pascal_voc_rbg.py │ ├── tools │ │ └── mcg_munge.py │ ├── vg.py │ ├── vg_eval.py │ └── voc_eval.py ├── external │ ├── .gitignore │ ├── Makefile │ ├── __init__.py │ ├── nms.pyx │ └── setup.py ├── model │ ├── _C.cpython-36m-x86_64-linux-gnu.so │ ├── build │ │ └── lib.linux-x86_64-3.6 │ │ │ └── model │ │ │ └── _C.cpython-36m-x86_64-linux-gnu.so │ ├── csrc │ │ ├── ROIAlign.h │ │ ├── ROIPool.h │ │ ├── cpu │ │ │ ├── ROIAlign_cpu.cpp │ │ │ ├── nms_cpu.cpp │ │ │ └── vision.h │ │ ├── cuda │ │ │ ├── ROIAlign_cuda.cu │ │ │ ├── ROIPool_cuda.cu │ │ │ ├── nms.cu │ │ │ └── vision.h │ │ ├── nms.h │ │ └── vision.cpp │ ├── faster_rcnn │ │ ├── Snet.py │ │ ├── __init__.py │ │ ├── faster_rcnn.py │ │ └── modules.py │ ├── loss │ │ ├── __init__.py │ │ └── losses.py │ ├── rpn │ │ ├── __init__.py │ │ ├── anchor_target_layer.py │ │ ├── bbox_transform.py │ │ ├── centernet_rpn.py │ │ ├── generate_anchors.py │ │ ├── proposal_layer.py │ │ ├── proposal_target_layer_cascade.py │ │ └── rpn.py │ └── utils │ │ ├── .gitignore │ │ ├── __init__.py │ │ ├── bbox.pyx │ │ ├── blob.py │ │ ├── cente_decode.py │ │ ├── config.py │ │ ├── layer_utils.py │ │ ├── logger.py │ │ └── net_utils.py ├── psroialign │ ├── PSROIAlign │ │ ├── .gitattributes │ │ ├── LICENSE │ │ ├── README.md │ │ ├── __init__.py │ │ ├── build.sh │ │ ├── build │ │ │ ├── lib.linux-x86_64-3.6 │ │ │ │ └── model │ │ │ │ │ └── _C.cpython-36m-x86_64-linux-gnu.so │ │ │ └── temp.linux-x86_64-3.6 │ │ │ │ └── mnt │ │ │ │ └── data1 │ │ │ │ └── yanghuiyu │ │ │ │ └── myself │ │ │ │ └── object_detect │ │ │ │ └── light_head_rcnn │ │ │ │ └── psroialign │ │ │ │ └── PSROIAlign │ │ │ │ └── model │ │ │ │ └── csrc │ │ │ │ ├── cuda │ │ │ │ ├── PSROIAlign_cuda.o │ │ │ │ └── PSROIPool_cuda.o │ │ │ │ └── vision.o │ │ ├── model │ │ │ ├── _C.cpython-36m-x86_64-linux-gnu.so │ │ │ ├── __init__.py │ │ │ ├── csrc │ │ │ │ ├── .DS_Store │ │ │ │ ├── PSROIAlign.h │ │ │ │ ├── PSROIPool.h │ │ │ │ ├── cuda │ │ │ │ │ ├── PSROIAlign_cuda.cpp │ │ │ │ │ ├── PSROIAlign_cuda.cu │ │ │ │ │ ├── PSROIPool_cuda.cu │ │ │ │ │ └── vision.h │ │ │ │ └── vision.cpp │ │ │ ├── example.py │ │ │ └── roi_layers │ │ │ │ ├── __init__.py │ │ │ │ ├── ps_roi_align.py │ │ │ │ └── ps_roi_pool.py │ │ └── setup.py │ ├── __init__.py │ ├── pollers.py │ └── psroialign.py ├── roi_data_layer │ ├── __init__.py │ ├── augmentation.py │ ├── minibatch.py │ ├── roibatchLoader.py │ ├── roidb.py │ └── utils.py └── setup.py ├── onnx ├── __init__.py ├── onnx_infer.py ├── rcnn_head_to_onnx.py ├── rpn_to_onnx.py └── utils.py ├── readme.md ├── script ├── pre.sh ├── train_146.sh ├── train_49.sh └── train_535.sh ├── test_net.py ├── trainval_net.py ├── utils.py ├── voc_images ├── .DS_Store └── input │ ├── 2008_000005.jpg │ ├── 2008_000038.jpg │ └── 2008_000175.jpg └── weights ├── snet_146.tar └── thundernet146_voc_map67.pth /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DayBreak-u/Thundernet_Pytorch/ac359d128a44e566ba5852a830c0a2154e10edb2/.DS_Store -------------------------------------------------------------------------------- /_init_paths.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | import sys 3 | 4 | def add_path(path): 5 | if path not in sys.path: 6 | sys.path.insert(0, path) 7 | 8 | this_dir = osp.dirname(__file__) 9 | 10 | # Add lib to PYTHONPATH 11 | lib_path = osp.join(this_dir, 'lib') 12 | add_path(lib_path) 13 | 14 | coco_path = osp.join(this_dir, 'data', 'coco', 'PythonAPI') 15 | add_path(coco_path) 16 | -------------------------------------------------------------------------------- /cfgs/snet.yml: -------------------------------------------------------------------------------- 1 | EXP_DIR: snet 2 | TRAIN: 3 | HAS_RPN: True 4 | BBOX_NORMALIZE_TARGETS_PRECOMPUTED: True 5 | RPN_POSITIVE_OVERLAP: 0.7 6 | RPN_POST_NMS_TOP_N: 2000 7 | RPN_BATCHSIZE: 256 8 | PROPOSAL_METHOD: gt 9 | BG_THRESH_LO: 0.0 10 | BG_THRESH_HI: 0.3 11 | DISPLAY: 20 12 | BATCH_SIZE: 128 13 | WEIGHT_DECAY: 0.0001 14 | DOUBLE_BIAS: False 15 | SNAPSHOT_PREFIX: Snet 16 | LEARNING_RATE: 0.01 17 | SIZE: [240,320,480] 18 | TEST: 19 | HAS_RPN: True 20 | SIZE: 320 21 | RPN_POST_NMS_TOP_N: 200 22 | POOLING_SIZE: 7 23 | POOLING_MODE: align 24 | CROP_RESIZE_WITH_MAX_POOL: False 25 | FEAT_STRIDE: 16 26 | -------------------------------------------------------------------------------- /lib/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DayBreak-u/Thundernet_Pytorch/ac359d128a44e566ba5852a830c0a2154e10edb2/lib/__init__.py -------------------------------------------------------------------------------- /lib/build/lib.linux-x86_64-3.6/model/_C.cpython-36m-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DayBreak-u/Thundernet_Pytorch/ac359d128a44e566ba5852a830c0a2154e10edb2/lib/build/lib.linux-x86_64-3.6/model/_C.cpython-36m-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /lib/build/temp.linux-x86_64-3.6/mnt/data1/yanghuiyu/project/object_detect/Thundernet_new/lib/model/csrc/cpu/ROIAlign_cpu.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DayBreak-u/Thundernet_Pytorch/ac359d128a44e566ba5852a830c0a2154e10edb2/lib/build/temp.linux-x86_64-3.6/mnt/data1/yanghuiyu/project/object_detect/Thundernet_new/lib/model/csrc/cpu/ROIAlign_cpu.o -------------------------------------------------------------------------------- /lib/build/temp.linux-x86_64-3.6/mnt/data1/yanghuiyu/project/object_detect/Thundernet_new/lib/model/csrc/cpu/nms_cpu.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DayBreak-u/Thundernet_Pytorch/ac359d128a44e566ba5852a830c0a2154e10edb2/lib/build/temp.linux-x86_64-3.6/mnt/data1/yanghuiyu/project/object_detect/Thundernet_new/lib/model/csrc/cpu/nms_cpu.o -------------------------------------------------------------------------------- /lib/build/temp.linux-x86_64-3.6/mnt/data1/yanghuiyu/project/object_detect/Thundernet_new/lib/model/csrc/cuda/ROIAlign_cuda.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DayBreak-u/Thundernet_Pytorch/ac359d128a44e566ba5852a830c0a2154e10edb2/lib/build/temp.linux-x86_64-3.6/mnt/data1/yanghuiyu/project/object_detect/Thundernet_new/lib/model/csrc/cuda/ROIAlign_cuda.o -------------------------------------------------------------------------------- /lib/build/temp.linux-x86_64-3.6/mnt/data1/yanghuiyu/project/object_detect/Thundernet_new/lib/model/csrc/cuda/ROIPool_cuda.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DayBreak-u/Thundernet_Pytorch/ac359d128a44e566ba5852a830c0a2154e10edb2/lib/build/temp.linux-x86_64-3.6/mnt/data1/yanghuiyu/project/object_detect/Thundernet_new/lib/model/csrc/cuda/ROIPool_cuda.o -------------------------------------------------------------------------------- /lib/build/temp.linux-x86_64-3.6/mnt/data1/yanghuiyu/project/object_detect/Thundernet_new/lib/model/csrc/cuda/nms.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DayBreak-u/Thundernet_Pytorch/ac359d128a44e566ba5852a830c0a2154e10edb2/lib/build/temp.linux-x86_64-3.6/mnt/data1/yanghuiyu/project/object_detect/Thundernet_new/lib/model/csrc/cuda/nms.o -------------------------------------------------------------------------------- /lib/build/temp.linux-x86_64-3.6/mnt/data1/yanghuiyu/project/object_detect/Thundernet_new/lib/model/csrc/vision.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DayBreak-u/Thundernet_Pytorch/ac359d128a44e566ba5852a830c0a2154e10edb2/lib/build/temp.linux-x86_64-3.6/mnt/data1/yanghuiyu/project/object_detect/Thundernet_new/lib/model/csrc/vision.o -------------------------------------------------------------------------------- /lib/datasets/VOCdevkit-matlab-wrapper/get_voc_opts.m: -------------------------------------------------------------------------------- 1 | function VOCopts = get_voc_opts(path) 2 | 3 | tmp = pwd; 4 | cd(path); 5 | try 6 | addpath('VOCcode'); 7 | VOCinit; 8 | catch 9 | rmpath('VOCcode'); 10 | cd(tmp); 11 | error(sprintf('VOCcode directory not found under %s', path)); 12 | end 13 | rmpath('VOCcode'); 14 | cd(tmp); 15 | -------------------------------------------------------------------------------- /lib/datasets/VOCdevkit-matlab-wrapper/voc_eval.m: -------------------------------------------------------------------------------- 1 | function res = voc_eval(path, comp_id, test_set, output_dir) 2 | 3 | VOCopts = get_voc_opts(path); 4 | VOCopts.testset = test_set; 5 | 6 | for i = 1:length(VOCopts.classes) 7 | cls = VOCopts.classes{i}; 8 | res(i) = voc_eval_cls(cls, VOCopts, comp_id, output_dir); 9 | end 10 | 11 | fprintf('\n~~~~~~~~~~~~~~~~~~~~\n'); 12 | fprintf('Results:\n'); 13 | aps = [res(:).ap]'; 14 | fprintf('%.1f\n', aps * 100); 15 | fprintf('%.1f\n', mean(aps) * 100); 16 | fprintf('~~~~~~~~~~~~~~~~~~~~\n'); 17 | 18 | function res = voc_eval_cls(cls, VOCopts, comp_id, output_dir) 19 | 20 | test_set = VOCopts.testset; 21 | year = VOCopts.dataset(4:end); 22 | 23 | addpath(fullfile(VOCopts.datadir, 'VOCcode')); 24 | 25 | res_fn = sprintf(VOCopts.detrespath, comp_id, cls); 26 | 27 | recall = []; 28 | prec = []; 29 | ap = 0; 30 | ap_auc = 0; 31 | 32 | do_eval = (str2num(year) <= 2007) | ~strcmp(test_set, 'test'); 33 | if do_eval 34 | % Bug in VOCevaldet requires that tic has been called first 35 | tic; 36 | [recall, prec, ap] = VOCevaldet(VOCopts, comp_id, cls, true); 37 | ap_auc = xVOCap(recall, prec); 38 | 39 | % force plot limits 40 | ylim([0 1]); 41 | xlim([0 1]); 42 | 43 | print(gcf, '-djpeg', '-r0', ... 44 | [output_dir '/' cls '_pr.jpg']); 45 | end 46 | fprintf('!!! %s : %.4f %.4f\n', cls, ap, ap_auc); 47 | 48 | res.recall = recall; 49 | res.prec = prec; 50 | res.ap = ap; 51 | res.ap_auc = ap_auc; 52 | 53 | save([output_dir '/' cls '_pr.mat'], ... 54 | 'res', 'recall', 'prec', 'ap', 'ap_auc'); 55 | 56 | rmpath(fullfile(VOCopts.datadir, 'VOCcode')); 57 | -------------------------------------------------------------------------------- /lib/datasets/VOCdevkit-matlab-wrapper/xVOCap.m: -------------------------------------------------------------------------------- 1 | function ap = xVOCap(rec,prec) 2 | % From the PASCAL VOC 2011 devkit 3 | 4 | mrec=[0 ; rec ; 1]; 5 | mpre=[0 ; prec ; 0]; 6 | for i=numel(mpre)-1:-1:1 7 | mpre(i)=max(mpre(i),mpre(i+1)); 8 | end 9 | i=find(mrec(2:end)~=mrec(1:end-1))+1; 10 | ap=sum((mrec(i)-mrec(i-1)).*mpre(i)); 11 | -------------------------------------------------------------------------------- /lib/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | -------------------------------------------------------------------------------- /lib/datasets/ds_utils.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast/er R-CNN 3 | # Licensed under The MIT License [see LICENSE for details] 4 | # Written by Ross Girshick 5 | # -------------------------------------------------------- 6 | from __future__ import absolute_import 7 | from __future__ import division 8 | from __future__ import print_function 9 | 10 | import numpy as np 11 | 12 | 13 | def unique_boxes(boxes, scale=1.0): 14 | """Return indices of unique boxes.""" 15 | v = np.array([1, 1e3, 1e6, 1e9]) 16 | hashes = np.round(boxes * scale).dot(v) 17 | _, index = np.unique(hashes, return_index=True) 18 | return np.sort(index) 19 | 20 | 21 | def xywh_to_xyxy(boxes): 22 | """Convert [x y w h] box format to [x1 y1 x2 y2] format.""" 23 | return np.hstack((boxes[:, 0:2], boxes[:, 0:2] + boxes[:, 2:4] - 1)) 24 | 25 | 26 | def xyxy_to_xywh(boxes): 27 | """Convert [x1 y1 x2 y2] box format to [x y w h] format.""" 28 | return np.hstack((boxes[:, 0:2], boxes[:, 2:4] - boxes[:, 0:2] + 1)) 29 | 30 | 31 | def validate_boxes(boxes, width=0, height=0): 32 | """Check that a set of boxes are valid.""" 33 | x1 = boxes[:, 0] 34 | y1 = boxes[:, 1] 35 | x2 = boxes[:, 2] 36 | y2 = boxes[:, 3] 37 | assert (x1 >= 0).all() 38 | assert (y1 >= 0).all() 39 | assert (x2 >= x1).all() 40 | assert (y2 >= y1).all() 41 | assert (x2 < width).all() 42 | assert (y2 < height).all() 43 | 44 | 45 | def filter_small_boxes(boxes, min_size): 46 | w = boxes[:, 2] - boxes[:, 0] 47 | h = boxes[:, 3] - boxes[:, 1] 48 | keep = np.where((w >= min_size) & (h > min_size))[0] 49 | return keep 50 | -------------------------------------------------------------------------------- /lib/datasets/factory.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | """Factory method for easily getting imdbs by name.""" 9 | from __future__ import absolute_import 10 | from __future__ import division 11 | from __future__ import print_function 12 | 13 | __sets = {} 14 | from datasets.pascal_voc import pascal_voc 15 | from datasets.coco import coco 16 | 17 | 18 | for year in ['2007', '2012']: 19 | for split in ['train', 'val', 'trainval', 'test']: 20 | name = 'voc_{}_{}'.format(year, split) 21 | __sets[name] = (lambda split=split, year=year: pascal_voc(split, year)) 22 | 23 | # Set up coco_2017_ 24 | 25 | for year in ['2017']: 26 | for split in ['train', 'val']: 27 | name = 'coco_{}_{}'.format(year, split) 28 | __sets[name] = (lambda split=split, year=year: coco(split, year)) 29 | 30 | # Set up vg_ 31 | # for version in ['1600-400-20']: 32 | # for split in ['minitrain', 'train', 'minival', 'val', 'test']: 33 | # name = 'vg_{}_{}'.format(version,split) 34 | # __sets[name] = (lambda split=split, version=version: vg(version, split)) 35 | # 36 | # for version in ['150-50-20', '150-50-50', '500-150-80', '750-250-150', '1750-700-450', '1600-400-20']: 37 | # for split in ['minitrain', 'smalltrain', 'train', 'minival', 'smallval', 'val', 'test']: 38 | # name = 'vg_{}_{}'.format(version,split) 39 | # __sets[name] = (lambda split=split, version=version: vg(version, split)) 40 | # 41 | # # set up image net. 42 | # for split in ['train', 'val', 'val1', 'val2', 'test']: 43 | # name = 'imagenet_{}'.format(split) 44 | # devkit_path = 'data/imagenet/ILSVRC/devkit' 45 | # data_path = 'data/imagenet/ILSVRC' 46 | # __sets[name] = (lambda split=split, devkit_path=devkit_path, data_path=data_path: imagenet(split,devkit_path,data_path)) 47 | 48 | def get_imdb(name): 49 | """Get an imdb (image database) by name.""" 50 | # if name not in __sets: 51 | # raise KeyError('Unknown dataset: {}'.format(name)) 52 | return __sets[name]() 53 | 54 | 55 | def list_imdbs(): 56 | """List all registered imdbs.""" 57 | return list(__sets.keys()) 58 | -------------------------------------------------------------------------------- /lib/datasets/imagenet.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | # -------------------------------------------------------- 3 | # Fast R-CNN 4 | # Copyright (c) 2015 Microsoft 5 | # Licensed under The MIT License [see LICENSE for details] 6 | # Written by Ross Girshick 7 | # -------------------------------------------------------- 8 | 9 | import datasets 10 | import datasets.imagenet 11 | import os, sys 12 | from datasets.imdb import imdb 13 | import xml.dom.minidom as minidom 14 | import numpy as np 15 | import scipy.sparse 16 | import scipy.io as sio 17 | import subprocess 18 | import pdb 19 | import pickle 20 | try: 21 | xrange # Python 2 22 | except NameError: 23 | xrange = range # Python 3 24 | 25 | 26 | class imagenet(imdb): 27 | def __init__(self, image_set, devkit_path, data_path): 28 | imdb.__init__(self, image_set) 29 | self._image_set = image_set 30 | self._devkit_path = devkit_path 31 | self._data_path = data_path 32 | synsets_image = sio.loadmat(os.path.join(self._devkit_path, 'data', 'meta_det.mat')) 33 | synsets_video = sio.loadmat(os.path.join(self._devkit_path, 'data', 'meta_vid.mat')) 34 | self._classes_image = ('__background__',) 35 | self._wnid_image = (0,) 36 | 37 | self._classes = ('__background__',) 38 | self._wnid = (0,) 39 | 40 | for i in xrange(200): 41 | self._classes_image = self._classes_image + (synsets_image['synsets'][0][i][2][0],) 42 | self._wnid_image = self._wnid_image + (synsets_image['synsets'][0][i][1][0],) 43 | 44 | for i in xrange(30): 45 | self._classes = self._classes + (synsets_video['synsets'][0][i][2][0],) 46 | self._wnid = self._wnid + (synsets_video['synsets'][0][i][1][0],) 47 | 48 | self._wnid_to_ind_image = dict(zip(self._wnid_image, xrange(201))) 49 | self._class_to_ind_image = dict(zip(self._classes_image, xrange(201))) 50 | 51 | self._wnid_to_ind = dict(zip(self._wnid, xrange(31))) 52 | self._class_to_ind = dict(zip(self._classes, xrange(31))) 53 | 54 | #check for valid intersection between video and image classes 55 | self._valid_image_flag = [0]*201 56 | 57 | for i in range(1,201): 58 | if self._wnid_image[i] in self._wnid_to_ind: 59 | self._valid_image_flag[i] = 1 60 | 61 | self._image_ext = ['.JPEG'] 62 | 63 | self._image_index = self._load_image_set_index() 64 | # Default to roidb handler 65 | self._roidb_handler = self.gt_roidb 66 | 67 | # Specific config options 68 | self.config = {'cleanup' : True, 69 | 'use_salt' : True, 70 | 'top_k' : 2000} 71 | 72 | assert os.path.exists(self._devkit_path), 'Devkit path does not exist: {}'.format(self._devkit_path) 73 | assert os.path.exists(self._data_path), 'Path does not exist: {}'.format(self._data_path) 74 | 75 | def image_path_at(self, i): 76 | """ 77 | Return the absolute path to image i in the image sequence. 78 | """ 79 | return self.image_path_from_index(self._image_index[i]) 80 | 81 | def image_path_from_index(self, index): 82 | """ 83 | Construct an image path from the image's "index" identifier. 84 | """ 85 | image_path = os.path.join(self._data_path, 'Data', self._image_set, index + self._image_ext[0]) 86 | assert os.path.exists(image_path), 'path does not exist: {}'.format(image_path) 87 | return image_path 88 | 89 | def _load_image_set_index(self): 90 | """ 91 | Load the indexes listed in this dataset's image set file. 92 | """ 93 | # Example path to image set file: 94 | # self._data_path + /ImageSets/val.txt 95 | 96 | if self._image_set == 'train': 97 | image_set_file = os.path.join(self._data_path, 'ImageSets', 'trainr.txt') 98 | image_index = [] 99 | if os.path.exists(image_set_file): 100 | f = open(image_set_file, 'r') 101 | data = f.read().split() 102 | for lines in data: 103 | if lines != '': 104 | image_index.append(lines) 105 | f.close() 106 | return image_index 107 | 108 | for i in range(1,200): 109 | print(i) 110 | image_set_file = os.path.join(self._data_path, 'ImageSets', 'DET', 'train_' + str(i) + '.txt') 111 | with open(image_set_file) as f: 112 | tmp_index = [x.strip() for x in f.readlines()] 113 | vtmp_index = [] 114 | for line in tmp_index: 115 | line = line.split(' ') 116 | image_list = os.popen('ls ' + self._data_path + '/Data/DET/train/' + line[0] + '/*.JPEG').read().split() 117 | tmp_list = [] 118 | for imgs in image_list: 119 | tmp_list.append(imgs[:-5]) 120 | vtmp_index = vtmp_index + tmp_list 121 | 122 | num_lines = len(vtmp_index) 123 | ids = np.random.permutation(num_lines) 124 | count = 0 125 | while count < 2000: 126 | image_index.append(vtmp_index[ids[count % num_lines]]) 127 | count = count + 1 128 | 129 | for i in range(1,201): 130 | if self._valid_image_flag[i] == 1: 131 | image_set_file = os.path.join(self._data_path, 'ImageSets', 'train_pos_' + str(i) + '.txt') 132 | with open(image_set_file) as f: 133 | tmp_index = [x.strip() for x in f.readlines()] 134 | num_lines = len(tmp_index) 135 | ids = np.random.permutation(num_lines) 136 | count = 0 137 | while count < 2000: 138 | image_index.append(tmp_index[ids[count % num_lines]]) 139 | count = count + 1 140 | image_set_file = os.path.join(self._data_path, 'ImageSets', 'trainr.txt') 141 | f = open(image_set_file, 'w') 142 | for lines in image_index: 143 | f.write(lines + '\n') 144 | f.close() 145 | else: 146 | image_set_file = os.path.join(self._data_path, 'ImageSets', 'val.txt') 147 | with open(image_set_file) as f: 148 | image_index = [x.strip() for x in f.readlines()] 149 | return image_index 150 | 151 | def gt_roidb(self): 152 | """ 153 | Return the database of ground-truth regions of interest. 154 | This function loads/saves from/to a cache file to speed up future calls. 155 | """ 156 | cache_file = os.path.join(self.cache_path, self.name + '_gt_roidb.pkl') 157 | if os.path.exists(cache_file): 158 | with open(cache_file, 'rb') as fid: 159 | roidb = pickle.load(fid) 160 | print('{} gt roidb loaded from {}'.format(self.name, cache_file)) 161 | return roidb 162 | 163 | gt_roidb = [self._load_imagenet_annotation(index) 164 | for index in self.image_index] 165 | with open(cache_file, 'wb') as fid: 166 | pickle.dump(gt_roidb, fid, pickle.HIGHEST_PROTOCOL) 167 | print('wrote gt roidb to {}'.format(cache_file)) 168 | 169 | return gt_roidb 170 | 171 | 172 | def _load_imagenet_annotation(self, index): 173 | """ 174 | Load image and bounding boxes info from txt files of imagenet. 175 | """ 176 | filename = os.path.join(self._data_path, 'Annotations', self._image_set, index + '.xml') 177 | 178 | # print 'Loading: {}'.format(filename) 179 | def get_data_from_tag(node, tag): 180 | return node.getElementsByTagName(tag)[0].childNodes[0].data 181 | 182 | with open(filename) as f: 183 | data = minidom.parseString(f.read()) 184 | 185 | objs = data.getElementsByTagName('object') 186 | num_objs = len(objs) 187 | 188 | boxes = np.zeros((num_objs, 4), dtype=np.uint16) 189 | gt_classes = np.zeros((num_objs), dtype=np.int32) 190 | overlaps = np.zeros((num_objs, self.num_classes), dtype=np.float32) 191 | 192 | # Load object bounding boxes into a data frame. 193 | for ix, obj in enumerate(objs): 194 | x1 = float(get_data_from_tag(obj, 'xmin')) 195 | y1 = float(get_data_from_tag(obj, 'ymin')) 196 | x2 = float(get_data_from_tag(obj, 'xmax')) 197 | y2 = float(get_data_from_tag(obj, 'ymax')) 198 | cls = self._wnid_to_ind[ 199 | str(get_data_from_tag(obj, "name")).lower().strip()] 200 | boxes[ix, :] = [x1, y1, x2, y2] 201 | gt_classes[ix] = cls 202 | overlaps[ix, cls] = 1.0 203 | 204 | overlaps = scipy.sparse.csr_matrix(overlaps) 205 | 206 | return {'boxes' : boxes, 207 | 'gt_classes': gt_classes, 208 | 'gt_overlaps' : overlaps, 209 | 'flipped' : False} 210 | 211 | if __name__ == '__main__': 212 | d = datasets.imagenet('val', '') 213 | res = d.roidb 214 | from IPython import embed; embed() 215 | -------------------------------------------------------------------------------- /lib/datasets/tools/mcg_munge.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import os 3 | import sys 4 | 5 | """Hacky tool to convert file system layout of MCG boxes downloaded from 6 | http://www.eecs.berkeley.edu/Research/Projects/CS/vision/grouping/mcg/ 7 | so that it's consistent with those computed by Jan Hosang (see: 8 | http://www.mpi-inf.mpg.de/departments/computer-vision-and-multimodal- 9 | computing/research/object-recognition-and-scene-understanding/how- 10 | good-are-detection-proposals-really/) 11 | 12 | NB: Boxes from the MCG website are in (y1, x1, y2, x2) order. 13 | Boxes from Hosang et al. are in (x1, y1, x2, y2) order. 14 | """ 15 | 16 | def munge(src_dir): 17 | # stored as: ./MCG-COCO-val2014-boxes/COCO_val2014_000000193401.mat 18 | # want: ./MCG/mat/COCO_val2014_0/COCO_val2014_000000141/COCO_val2014_000000141334.mat 19 | 20 | files = os.listdir(src_dir) 21 | for fn in files: 22 | base, ext = os.path.splitext(fn) 23 | # first 14 chars / first 22 chars / all chars + .mat 24 | # COCO_val2014_0/COCO_val2014_000000447/COCO_val2014_000000447991.mat 25 | first = base[:14] 26 | second = base[:22] 27 | dst_dir = os.path.join('MCG', 'mat', first, second) 28 | if not os.path.exists(dst_dir): 29 | os.makedirs(dst_dir) 30 | src = os.path.join(src_dir, fn) 31 | dst = os.path.join(dst_dir, fn) 32 | print('MV: {} -> {}'.format(src, dst)) 33 | os.rename(src, dst) 34 | 35 | if __name__ == '__main__': 36 | # src_dir should look something like: 37 | # src_dir = 'MCG-COCO-val2014-boxes' 38 | src_dir = sys.argv[1] 39 | munge(src_dir) 40 | -------------------------------------------------------------------------------- /lib/datasets/vg_eval.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | # -------------------------------------------------------- 3 | # Fast/er R-CNN 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Bharath Hariharan 6 | # -------------------------------------------------------- 7 | 8 | import xml.etree.ElementTree as ET 9 | import os 10 | import numpy as np 11 | from .voc_eval import voc_ap 12 | 13 | def vg_eval( detpath, 14 | gt_roidb, 15 | image_index, 16 | classindex, 17 | ovthresh=0.5, 18 | use_07_metric=False, 19 | eval_attributes=False): 20 | """rec, prec, ap, sorted_scores, npos = voc_eval( 21 | detpath, 22 | gt_roidb, 23 | image_index, 24 | classindex, 25 | [ovthresh], 26 | [use_07_metric]) 27 | 28 | Top level function that does the Visual Genome evaluation. 29 | 30 | detpath: Path to detections 31 | gt_roidb: List of ground truth structs. 32 | image_index: List of image ids. 33 | classindex: Category index 34 | [ovthresh]: Overlap threshold (default = 0.5) 35 | [use_07_metric]: Whether to use VOC07's 11 point AP computation 36 | (default False) 37 | """ 38 | # extract gt objects for this class 39 | class_recs = {} 40 | npos = 0 41 | for item,imagename in zip(gt_roidb,image_index): 42 | if eval_attributes: 43 | bbox = item['boxes'][np.where(np.any(item['gt_attributes'].toarray() == classindex, axis=1))[0], :] 44 | else: 45 | bbox = item['boxes'][np.where(item['gt_classes'] == classindex)[0], :] 46 | difficult = np.zeros((bbox.shape[0],)).astype(np.bool) 47 | det = [False] * bbox.shape[0] 48 | npos = npos + sum(~difficult) 49 | class_recs[str(imagename)] = {'bbox': bbox, 50 | 'difficult': difficult, 51 | 'det': det} 52 | if npos == 0: 53 | # No ground truth examples 54 | return 0,0,0,0,npos 55 | 56 | # read dets 57 | with open(detpath, 'r') as f: 58 | lines = f.readlines() 59 | if len(lines) == 0: 60 | # No detection examples 61 | return 0,0,0,0,npos 62 | 63 | splitlines = [x.strip().split(' ') for x in lines] 64 | image_ids = [x[0] for x in splitlines] 65 | confidence = np.array([float(x[1]) for x in splitlines]) 66 | BB = np.array([[float(z) for z in x[2:]] for x in splitlines]) 67 | 68 | # sort by confidence 69 | sorted_ind = np.argsort(-confidence) 70 | sorted_scores = -np.sort(-confidence) 71 | BB = BB[sorted_ind, :] 72 | image_ids = [image_ids[x] for x in sorted_ind] 73 | 74 | # go down dets and mark TPs and FPs 75 | nd = len(image_ids) 76 | tp = np.zeros(nd) 77 | fp = np.zeros(nd) 78 | for d in range(nd): 79 | R = class_recs[image_ids[d]] 80 | bb = BB[d, :].astype(float) 81 | ovmax = -np.inf 82 | BBGT = R['bbox'].astype(float) 83 | 84 | if BBGT.size > 0: 85 | # compute overlaps 86 | # intersection 87 | ixmin = np.maximum(BBGT[:, 0], bb[0]) 88 | iymin = np.maximum(BBGT[:, 1], bb[1]) 89 | ixmax = np.minimum(BBGT[:, 2], bb[2]) 90 | iymax = np.minimum(BBGT[:, 3], bb[3]) 91 | iw = np.maximum(ixmax - ixmin + 1., 0.) 92 | ih = np.maximum(iymax - iymin + 1., 0.) 93 | inters = iw * ih 94 | 95 | # union 96 | uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) + 97 | (BBGT[:, 2] - BBGT[:, 0] + 1.) * 98 | (BBGT[:, 3] - BBGT[:, 1] + 1.) - inters) 99 | 100 | overlaps = inters / uni 101 | ovmax = np.max(overlaps) 102 | jmax = np.argmax(overlaps) 103 | 104 | if ovmax > ovthresh: 105 | if not R['difficult'][jmax]: 106 | if not R['det'][jmax]: 107 | tp[d] = 1. 108 | R['det'][jmax] = 1 109 | else: 110 | fp[d] = 1. 111 | else: 112 | fp[d] = 1. 113 | 114 | # compute precision recall 115 | fp = np.cumsum(fp) 116 | tp = np.cumsum(tp) 117 | rec = tp / float(npos) 118 | # avoid divide by zero in case the first detection matches a difficult 119 | # ground truth 120 | prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps) 121 | ap = voc_ap(rec, prec, use_07_metric) 122 | 123 | return rec, prec, ap, sorted_scores, npos 124 | -------------------------------------------------------------------------------- /lib/datasets/voc_eval.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast/er R-CNN 3 | # Licensed under The MIT License [see LICENSE for details] 4 | # Written by Bharath Hariharan 5 | # -------------------------------------------------------- 6 | from __future__ import absolute_import 7 | from __future__ import division 8 | from __future__ import print_function 9 | 10 | import xml.etree.ElementTree as ET 11 | import os 12 | import pickle 13 | import numpy as np 14 | 15 | def parse_rec(filename): 16 | """ Parse a PASCAL VOC xml file """ 17 | tree = ET.parse(filename) 18 | objects = [] 19 | for obj in tree.findall('object'): 20 | obj_struct = {} 21 | obj_struct['name'] = obj.find('name').text 22 | obj_struct['pose'] = obj.find('pose').text 23 | obj_struct['truncated'] = int(obj.find('truncated').text) 24 | obj_struct['difficult'] = int(obj.find('difficult').text) 25 | bbox = obj.find('bndbox') 26 | obj_struct['bbox'] = [int(bbox.find('xmin').text), 27 | int(bbox.find('ymin').text), 28 | int(bbox.find('xmax').text), 29 | int(bbox.find('ymax').text)] 30 | objects.append(obj_struct) 31 | 32 | return objects 33 | 34 | 35 | def voc_ap(rec, prec, use_07_metric=False): 36 | """ ap = voc_ap(rec, prec, [use_07_metric]) 37 | Compute VOC AP given precision and recall. 38 | If use_07_metric is true, uses the 39 | VOC 07 11 point method (default:False). 40 | """ 41 | if use_07_metric: 42 | # 11 point metric 43 | ap = 0. 44 | for t in np.arange(0., 1.1, 0.1): 45 | if np.sum(rec >= t) == 0: 46 | p = 0 47 | else: 48 | p = np.max(prec[rec >= t]) 49 | ap = ap + p / 11. 50 | else: 51 | # correct AP calculation 52 | # first append sentinel values at the end 53 | mrec = np.concatenate(([0.], rec, [1.])) 54 | mpre = np.concatenate(([0.], prec, [0.])) 55 | 56 | # compute the precision envelope 57 | for i in range(mpre.size - 1, 0, -1): 58 | mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i]) 59 | 60 | # to calculate area under PR curve, look for points 61 | # where X axis (recall) changes value 62 | i = np.where(mrec[1:] != mrec[:-1])[0] 63 | 64 | # and sum (\Delta recall) * prec 65 | ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1]) 66 | return ap 67 | 68 | 69 | def voc_eval(detpath, 70 | annopath, 71 | imagesetfile, 72 | classname, 73 | cachedir, 74 | ovthresh=0.5, 75 | use_07_metric=False): 76 | """rec, prec, ap = voc_eval(detpath, 77 | annopath, 78 | imagesetfile, 79 | classname, 80 | [ovthresh], 81 | [use_07_metric]) 82 | 83 | Top level function that does the PASCAL VOC evaluation. 84 | 85 | detpath: Path to detections 86 | detpath.format(classname) should produce the detection results file. 87 | annopath: Path to annotations 88 | annopath.format(imagename) should be the xml annotations file. 89 | imagesetfile: Text file containing the list of images, one image per line. 90 | classname: Category name (duh) 91 | cachedir: Directory for caching the annotations 92 | [ovthresh]: Overlap threshold (default = 0.5) 93 | [use_07_metric]: Whether to use VOC07's 11 point AP computation 94 | (default False) 95 | """ 96 | # assumes detections are in detpath.format(classname) 97 | # assumes annotations are in annopath.format(imagename) 98 | # assumes imagesetfile is a text file with each line an image name 99 | # cachedir caches the annotations in a pickle file 100 | 101 | # first load gt 102 | if not os.path.isdir(cachedir): 103 | os.mkdir(cachedir) 104 | cachefile = os.path.join(cachedir, '%s_annots.pkl' % imagesetfile) 105 | # read list of images 106 | with open(imagesetfile, 'r') as f: 107 | lines = f.readlines() 108 | imagenames = [x.strip() for x in lines] 109 | 110 | if not os.path.isfile(cachefile): 111 | # load annotations 112 | recs = {} 113 | for i, imagename in enumerate(imagenames): 114 | recs[imagename] = parse_rec(annopath.format(imagename)) 115 | if i % 100 == 0: 116 | print('Reading annotation for {:d}/{:d}'.format( 117 | i + 1, len(imagenames))) 118 | # save 119 | print('Saving cached annotations to {:s}'.format(cachefile)) 120 | with open(cachefile, 'wb') as f: 121 | pickle.dump(recs, f) 122 | else: 123 | # load 124 | with open(cachefile, 'rb') as f: 125 | try: 126 | recs = pickle.load(f) 127 | except: 128 | recs = pickle.load(f, encoding='bytes') 129 | 130 | # extract gt objects for this class 131 | class_recs = {} 132 | npos = 0 133 | for imagename in imagenames: 134 | R = [obj for obj in recs[imagename] if obj['name'] == classname] 135 | bbox = np.array([x['bbox'] for x in R]) 136 | difficult = np.array([x['difficult'] for x in R]).astype(np.bool) 137 | det = [False] * len(R) 138 | npos = npos + sum(~difficult) 139 | class_recs[imagename] = {'bbox': bbox, 140 | 'difficult': difficult, 141 | 'det': det} 142 | 143 | # read dets 144 | detfile = detpath.format(classname) 145 | with open(detfile, 'r') as f: 146 | lines = f.readlines() 147 | 148 | splitlines = [x.strip().split(' ') for x in lines] 149 | image_ids = [x[0] for x in splitlines] 150 | confidence = np.array([float(x[1]) for x in splitlines]) 151 | BB = np.array([[float(z) for z in x[2:]] for x in splitlines]) 152 | 153 | nd = len(image_ids) 154 | tp = np.zeros(nd) 155 | fp = np.zeros(nd) 156 | 157 | if BB.shape[0] > 0: 158 | # sort by confidence 159 | sorted_ind = np.argsort(-confidence) 160 | sorted_scores = np.sort(-confidence) 161 | BB = BB[sorted_ind, :] 162 | image_ids = [image_ids[x] for x in sorted_ind] 163 | 164 | # go down dets and mark TPs and FPs 165 | for d in range(nd): 166 | R = class_recs[image_ids[d]] 167 | bb = BB[d, :].astype(float) 168 | ovmax = -np.inf 169 | BBGT = R['bbox'].astype(float) 170 | 171 | if BBGT.size > 0: 172 | # compute overlaps 173 | # intersection 174 | ixmin = np.maximum(BBGT[:, 0], bb[0]) 175 | iymin = np.maximum(BBGT[:, 1], bb[1]) 176 | ixmax = np.minimum(BBGT[:, 2], bb[2]) 177 | iymax = np.minimum(BBGT[:, 3], bb[3]) 178 | iw = np.maximum(ixmax - ixmin + 1., 0.) 179 | ih = np.maximum(iymax - iymin + 1., 0.) 180 | inters = iw * ih 181 | 182 | # union 183 | uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) + 184 | (BBGT[:, 2] - BBGT[:, 0] + 1.) * 185 | (BBGT[:, 3] - BBGT[:, 1] + 1.) - inters) 186 | 187 | overlaps = inters / uni 188 | ovmax = np.max(overlaps) 189 | jmax = np.argmax(overlaps) 190 | 191 | if ovmax > ovthresh: 192 | if not R['difficult'][jmax]: 193 | if not R['det'][jmax]: 194 | tp[d] = 1. 195 | R['det'][jmax] = 1 196 | else: 197 | fp[d] = 1. 198 | else: 199 | fp[d] = 1. 200 | 201 | # compute precision recall 202 | fp = np.cumsum(fp) 203 | tp = np.cumsum(tp) 204 | rec = tp / float(npos) 205 | # avoid divide by zero in case the first detection matches a difficult 206 | # ground truth 207 | prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps) 208 | ap = voc_ap(rec, prec, use_07_metric) 209 | 210 | return rec, prec, ap 211 | -------------------------------------------------------------------------------- /lib/external/.gitignore: -------------------------------------------------------------------------------- 1 | bbox.c 2 | bbox.cpython-35m-x86_64-linux-gnu.so 3 | bbox.cpython-36m-x86_64-linux-gnu.so 4 | 5 | nms.c 6 | nms.cpython-35m-x86_64-linux-gnu.so 7 | nms.cpython-36m-x86_64-linux-gnu.so 8 | -------------------------------------------------------------------------------- /lib/external/Makefile: -------------------------------------------------------------------------------- 1 | all: 2 | python setup.py build_ext --inplace 3 | rm -rf build 4 | -------------------------------------------------------------------------------- /lib/external/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DayBreak-u/Thundernet_Pytorch/ac359d128a44e566ba5852a830c0a2154e10edb2/lib/external/__init__.py -------------------------------------------------------------------------------- /lib/external/setup.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | from distutils.core import setup 3 | from distutils.extension import Extension 4 | from Cython.Build import cythonize 5 | 6 | extensions = [ 7 | Extension( 8 | "nms", 9 | ["nms.pyx"], 10 | extra_compile_args=["-Wno-cpp", "-Wno-unused-function"] 11 | ) 12 | ] 13 | 14 | setup( 15 | name="coco", 16 | ext_modules=cythonize(extensions), 17 | include_dirs=[numpy.get_include()] 18 | ) 19 | -------------------------------------------------------------------------------- /lib/model/_C.cpython-36m-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DayBreak-u/Thundernet_Pytorch/ac359d128a44e566ba5852a830c0a2154e10edb2/lib/model/_C.cpython-36m-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /lib/model/build/lib.linux-x86_64-3.6/model/_C.cpython-36m-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DayBreak-u/Thundernet_Pytorch/ac359d128a44e566ba5852a830c0a2154e10edb2/lib/model/build/lib.linux-x86_64-3.6/model/_C.cpython-36m-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /lib/model/csrc/ROIAlign.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #pragma once 3 | 4 | #include "cpu/vision.h" 5 | 6 | #ifdef WITH_CUDA 7 | #include "cuda/vision.h" 8 | #endif 9 | 10 | // Interface for Python 11 | at::Tensor ROIAlign_forward(const at::Tensor& input, 12 | const at::Tensor& rois, 13 | const float spatial_scale, 14 | const int pooled_height, 15 | const int pooled_width, 16 | const int sampling_ratio) { 17 | if (input.type().is_cuda()) { 18 | #ifdef WITH_CUDA 19 | return ROIAlign_forward_cuda(input, rois, spatial_scale, pooled_height, pooled_width, sampling_ratio); 20 | #else 21 | AT_ERROR("Not compiled with GPU support"); 22 | #endif 23 | } 24 | return ROIAlign_forward_cpu(input, rois, spatial_scale, pooled_height, pooled_width, sampling_ratio); 25 | } 26 | 27 | at::Tensor ROIAlign_backward(const at::Tensor& grad, 28 | const at::Tensor& rois, 29 | const float spatial_scale, 30 | const int pooled_height, 31 | const int pooled_width, 32 | const int batch_size, 33 | const int channels, 34 | const int height, 35 | const int width, 36 | const int sampling_ratio) { 37 | if (grad.type().is_cuda()) { 38 | #ifdef WITH_CUDA 39 | return ROIAlign_backward_cuda(grad, rois, spatial_scale, pooled_height, pooled_width, batch_size, channels, height, width, sampling_ratio); 40 | #else 41 | AT_ERROR("Not compiled with GPU support"); 42 | #endif 43 | } 44 | AT_ERROR("Not implemented on the CPU"); 45 | } 46 | 47 | -------------------------------------------------------------------------------- /lib/model/csrc/ROIPool.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #pragma once 3 | 4 | #include "cpu/vision.h" 5 | 6 | #ifdef WITH_CUDA 7 | #include "cuda/vision.h" 8 | #endif 9 | 10 | 11 | std::tuple ROIPool_forward(const at::Tensor& input, 12 | const at::Tensor& rois, 13 | const float spatial_scale, 14 | const int pooled_height, 15 | const int pooled_width) { 16 | if (input.type().is_cuda()) { 17 | #ifdef WITH_CUDA 18 | return ROIPool_forward_cuda(input, rois, spatial_scale, pooled_height, pooled_width); 19 | #else 20 | AT_ERROR("Not compiled with GPU support"); 21 | #endif 22 | } 23 | AT_ERROR("Not implemented on the CPU"); 24 | } 25 | 26 | at::Tensor ROIPool_backward(const at::Tensor& grad, 27 | const at::Tensor& input, 28 | const at::Tensor& rois, 29 | const at::Tensor& argmax, 30 | const float spatial_scale, 31 | const int pooled_height, 32 | const int pooled_width, 33 | const int batch_size, 34 | const int channels, 35 | const int height, 36 | const int width) { 37 | if (grad.type().is_cuda()) { 38 | #ifdef WITH_CUDA 39 | return ROIPool_backward_cuda(grad, input, rois, argmax, spatial_scale, pooled_height, pooled_width, batch_size, channels, height, width); 40 | #else 41 | AT_ERROR("Not compiled with GPU support"); 42 | #endif 43 | } 44 | AT_ERROR("Not implemented on the CPU"); 45 | } 46 | 47 | 48 | 49 | -------------------------------------------------------------------------------- /lib/model/csrc/cpu/ROIAlign_cpu.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #include "cpu/vision.h" 3 | 4 | // implementation taken from Caffe2 5 | template 6 | struct PreCalc { 7 | int pos1; 8 | int pos2; 9 | int pos3; 10 | int pos4; 11 | T w1; 12 | T w2; 13 | T w3; 14 | T w4; 15 | }; 16 | 17 | template 18 | void pre_calc_for_bilinear_interpolate( 19 | const int height, 20 | const int width, 21 | const int pooled_height, 22 | const int pooled_width, 23 | const int iy_upper, 24 | const int ix_upper, 25 | T roi_start_h, 26 | T roi_start_w, 27 | T bin_size_h, 28 | T bin_size_w, 29 | int roi_bin_grid_h, 30 | int roi_bin_grid_w, 31 | std::vector>& pre_calc) { 32 | int pre_calc_index = 0; 33 | for (int ph = 0; ph < pooled_height; ph++) { 34 | for (int pw = 0; pw < pooled_width; pw++) { 35 | for (int iy = 0; iy < iy_upper; iy++) { 36 | const T yy = roi_start_h + ph * bin_size_h + 37 | static_cast(iy + .5f) * bin_size_h / 38 | static_cast(roi_bin_grid_h); // e.g., 0.5, 1.5 39 | for (int ix = 0; ix < ix_upper; ix++) { 40 | const T xx = roi_start_w + pw * bin_size_w + 41 | static_cast(ix + .5f) * bin_size_w / 42 | static_cast(roi_bin_grid_w); 43 | 44 | T x = xx; 45 | T y = yy; 46 | // deal with: inverse elements are out of feature map boundary 47 | if (y < -1.0 || y > height || x < -1.0 || x > width) { 48 | // empty 49 | PreCalc pc; 50 | pc.pos1 = 0; 51 | pc.pos2 = 0; 52 | pc.pos3 = 0; 53 | pc.pos4 = 0; 54 | pc.w1 = 0; 55 | pc.w2 = 0; 56 | pc.w3 = 0; 57 | pc.w4 = 0; 58 | pre_calc[pre_calc_index] = pc; 59 | pre_calc_index += 1; 60 | continue; 61 | } 62 | 63 | if (y <= 0) { 64 | y = 0; 65 | } 66 | if (x <= 0) { 67 | x = 0; 68 | } 69 | 70 | int y_low = (int)y; 71 | int x_low = (int)x; 72 | int y_high; 73 | int x_high; 74 | 75 | if (y_low >= height - 1) { 76 | y_high = y_low = height - 1; 77 | y = (T)y_low; 78 | } else { 79 | y_high = y_low + 1; 80 | } 81 | 82 | if (x_low >= width - 1) { 83 | x_high = x_low = width - 1; 84 | x = (T)x_low; 85 | } else { 86 | x_high = x_low + 1; 87 | } 88 | 89 | T ly = y - y_low; 90 | T lx = x - x_low; 91 | T hy = 1. - ly, hx = 1. - lx; 92 | T w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx; 93 | 94 | // save weights and indeces 95 | PreCalc pc; 96 | pc.pos1 = y_low * width + x_low; 97 | pc.pos2 = y_low * width + x_high; 98 | pc.pos3 = y_high * width + x_low; 99 | pc.pos4 = y_high * width + x_high; 100 | pc.w1 = w1; 101 | pc.w2 = w2; 102 | pc.w3 = w3; 103 | pc.w4 = w4; 104 | pre_calc[pre_calc_index] = pc; 105 | 106 | pre_calc_index += 1; 107 | } 108 | } 109 | } 110 | } 111 | } 112 | 113 | template 114 | void ROIAlignForward_cpu_kernel( 115 | const int nthreads, 116 | const T* bottom_data, 117 | const T& spatial_scale, 118 | const int channels, 119 | const int height, 120 | const int width, 121 | const int pooled_height, 122 | const int pooled_width, 123 | const int sampling_ratio, 124 | const T* bottom_rois, 125 | //int roi_cols, 126 | T* top_data) { 127 | //AT_ASSERT(roi_cols == 4 || roi_cols == 5); 128 | int roi_cols = 5; 129 | 130 | int n_rois = nthreads / channels / pooled_width / pooled_height; 131 | // (n, c, ph, pw) is an element in the pooled output 132 | // can be parallelized using omp 133 | // #pragma omp parallel for num_threads(32) 134 | for (int n = 0; n < n_rois; n++) { 135 | int index_n = n * channels * pooled_width * pooled_height; 136 | 137 | // roi could have 4 or 5 columns 138 | const T* offset_bottom_rois = bottom_rois + n * roi_cols; 139 | int roi_batch_ind = 0; 140 | if (roi_cols == 5) { 141 | roi_batch_ind = offset_bottom_rois[0]; 142 | offset_bottom_rois++; 143 | } 144 | 145 | // Do not using rounding; this implementation detail is critical 146 | T roi_start_w = offset_bottom_rois[0] * spatial_scale; 147 | T roi_start_h = offset_bottom_rois[1] * spatial_scale; 148 | T roi_end_w = offset_bottom_rois[2] * spatial_scale; 149 | T roi_end_h = offset_bottom_rois[3] * spatial_scale; 150 | // T roi_start_w = round(offset_bottom_rois[0] * spatial_scale); 151 | // T roi_start_h = round(offset_bottom_rois[1] * spatial_scale); 152 | // T roi_end_w = round(offset_bottom_rois[2] * spatial_scale); 153 | // T roi_end_h = round(offset_bottom_rois[3] * spatial_scale); 154 | 155 | // Force malformed ROIs to be 1x1 156 | T roi_width = std::max(roi_end_w - roi_start_w, (T)1.); 157 | T roi_height = std::max(roi_end_h - roi_start_h, (T)1.); 158 | T bin_size_h = static_cast(roi_height) / static_cast(pooled_height); 159 | T bin_size_w = static_cast(roi_width) / static_cast(pooled_width); 160 | 161 | // We use roi_bin_grid to sample the grid and mimic integral 162 | int roi_bin_grid_h = (sampling_ratio > 0) 163 | ? sampling_ratio 164 | : ceil(roi_height / pooled_height); // e.g., = 2 165 | int roi_bin_grid_w = 166 | (sampling_ratio > 0) ? sampling_ratio : ceil(roi_width / pooled_width); 167 | 168 | // We do average (integral) pooling inside a bin 169 | const T count = roi_bin_grid_h * roi_bin_grid_w; // e.g. = 4 170 | 171 | // we want to precalculate indeces and weights shared by all chanels, 172 | // this is the key point of optimiation 173 | std::vector> pre_calc( 174 | roi_bin_grid_h * roi_bin_grid_w * pooled_width * pooled_height); 175 | pre_calc_for_bilinear_interpolate( 176 | height, 177 | width, 178 | pooled_height, 179 | pooled_width, 180 | roi_bin_grid_h, 181 | roi_bin_grid_w, 182 | roi_start_h, 183 | roi_start_w, 184 | bin_size_h, 185 | bin_size_w, 186 | roi_bin_grid_h, 187 | roi_bin_grid_w, 188 | pre_calc); 189 | 190 | for (int c = 0; c < channels; c++) { 191 | int index_n_c = index_n + c * pooled_width * pooled_height; 192 | const T* offset_bottom_data = 193 | bottom_data + (roi_batch_ind * channels + c) * height * width; 194 | int pre_calc_index = 0; 195 | 196 | for (int ph = 0; ph < pooled_height; ph++) { 197 | for (int pw = 0; pw < pooled_width; pw++) { 198 | int index = index_n_c + ph * pooled_width + pw; 199 | 200 | T output_val = 0.; 201 | for (int iy = 0; iy < roi_bin_grid_h; iy++) { 202 | for (int ix = 0; ix < roi_bin_grid_w; ix++) { 203 | PreCalc pc = pre_calc[pre_calc_index]; 204 | output_val += pc.w1 * offset_bottom_data[pc.pos1] + 205 | pc.w2 * offset_bottom_data[pc.pos2] + 206 | pc.w3 * offset_bottom_data[pc.pos3] + 207 | pc.w4 * offset_bottom_data[pc.pos4]; 208 | 209 | pre_calc_index += 1; 210 | } 211 | } 212 | output_val /= count; 213 | 214 | top_data[index] = output_val; 215 | } // for pw 216 | } // for ph 217 | } // for c 218 | } // for n 219 | } 220 | 221 | at::Tensor ROIAlign_forward_cpu(const at::Tensor& input, 222 | const at::Tensor& rois, 223 | const float spatial_scale, 224 | const int pooled_height, 225 | const int pooled_width, 226 | const int sampling_ratio) { 227 | AT_ASSERTM(!input.type().is_cuda(), "input must be a CPU tensor"); 228 | AT_ASSERTM(!rois.type().is_cuda(), "rois must be a CPU tensor"); 229 | 230 | auto num_rois = rois.size(0); 231 | auto channels = input.size(1); 232 | auto height = input.size(2); 233 | auto width = input.size(3); 234 | 235 | auto output = at::empty({num_rois, channels, pooled_height, pooled_width}, input.options()); 236 | auto output_size = num_rois * pooled_height * pooled_width * channels; 237 | 238 | if (output.numel() == 0) { 239 | return output; 240 | } 241 | 242 | AT_DISPATCH_FLOATING_TYPES(input.type(), "ROIAlign_forward", [&] { 243 | ROIAlignForward_cpu_kernel( 244 | output_size, 245 | input.data(), 246 | spatial_scale, 247 | channels, 248 | height, 249 | width, 250 | pooled_height, 251 | pooled_width, 252 | sampling_ratio, 253 | rois.data(), 254 | output.data()); 255 | }); 256 | return output; 257 | } 258 | -------------------------------------------------------------------------------- /lib/model/csrc/cpu/nms_cpu.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #include "cpu/vision.h" 3 | 4 | 5 | template 6 | at::Tensor nms_cpu_kernel(const at::Tensor& dets, 7 | const at::Tensor& scores, 8 | const float threshold) { 9 | AT_ASSERTM(!dets.type().is_cuda(), "dets must be a CPU tensor"); 10 | AT_ASSERTM(!scores.type().is_cuda(), "scores must be a CPU tensor"); 11 | AT_ASSERTM(dets.type() == scores.type(), "dets should have the same type as scores"); 12 | 13 | if (dets.numel() == 0) { 14 | return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU)); 15 | } 16 | 17 | auto x1_t = dets.select(1, 0).contiguous(); 18 | auto y1_t = dets.select(1, 1).contiguous(); 19 | auto x2_t = dets.select(1, 2).contiguous(); 20 | auto y2_t = dets.select(1, 3).contiguous(); 21 | 22 | at::Tensor areas_t = (x2_t - x1_t + 1) * (y2_t - y1_t + 1); 23 | 24 | auto order_t = std::get<1>(scores.sort(0, /* descending=*/true)); 25 | 26 | auto ndets = dets.size(0); 27 | at::Tensor suppressed_t = at::zeros({ndets}, dets.options().dtype(at::kByte).device(at::kCPU)); 28 | 29 | auto suppressed = suppressed_t.data(); 30 | auto order = order_t.data(); 31 | auto x1 = x1_t.data(); 32 | auto y1 = y1_t.data(); 33 | auto x2 = x2_t.data(); 34 | auto y2 = y2_t.data(); 35 | auto areas = areas_t.data(); 36 | 37 | for (int64_t _i = 0; _i < ndets; _i++) { 38 | auto i = order[_i]; 39 | if (suppressed[i] == 1) 40 | continue; 41 | auto ix1 = x1[i]; 42 | auto iy1 = y1[i]; 43 | auto ix2 = x2[i]; 44 | auto iy2 = y2[i]; 45 | auto iarea = areas[i]; 46 | 47 | for (int64_t _j = _i + 1; _j < ndets; _j++) { 48 | auto j = order[_j]; 49 | if (suppressed[j] == 1) 50 | continue; 51 | auto xx1 = std::max(ix1, x1[j]); 52 | auto yy1 = std::max(iy1, y1[j]); 53 | auto xx2 = std::min(ix2, x2[j]); 54 | auto yy2 = std::min(iy2, y2[j]); 55 | 56 | auto w = std::max(static_cast(0), xx2 - xx1 + 1); 57 | auto h = std::max(static_cast(0), yy2 - yy1 + 1); 58 | auto inter = w * h; 59 | auto ovr = inter / (iarea + areas[j] - inter); 60 | if (ovr >= threshold) 61 | suppressed[j] = 1; 62 | } 63 | } 64 | return at::nonzero(suppressed_t == 0).squeeze(1); 65 | } 66 | 67 | at::Tensor nms_cpu(const at::Tensor& dets, 68 | const at::Tensor& scores, 69 | const float threshold) { 70 | at::Tensor result; 71 | AT_DISPATCH_FLOATING_TYPES(dets.type(), "nms", [&] { 72 | result = nms_cpu_kernel(dets, scores, threshold); 73 | }); 74 | return result; 75 | } 76 | -------------------------------------------------------------------------------- /lib/model/csrc/cpu/vision.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #pragma once 3 | #include 4 | 5 | 6 | at::Tensor ROIAlign_forward_cpu(const at::Tensor& input, 7 | const at::Tensor& rois, 8 | const float spatial_scale, 9 | const int pooled_height, 10 | const int pooled_width, 11 | const int sampling_ratio); 12 | 13 | 14 | at::Tensor nms_cpu(const at::Tensor& dets, 15 | const at::Tensor& scores, 16 | const float threshold); 17 | -------------------------------------------------------------------------------- /lib/model/csrc/cuda/ROIPool_cuda.cu: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #include 3 | #include 4 | 5 | #include 6 | #include 7 | #include 8 | 9 | 10 | // TODO make it in a common file 11 | #define CUDA_1D_KERNEL_LOOP(i, n) \ 12 | for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; \ 13 | i += blockDim.x * gridDim.x) 14 | 15 | 16 | template 17 | __global__ void RoIPoolFForward(const int nthreads, const T* bottom_data, 18 | const T spatial_scale, const int channels, const int height, 19 | const int width, const int pooled_height, const int pooled_width, 20 | const T* bottom_rois, T* top_data, int* argmax_data) { 21 | CUDA_1D_KERNEL_LOOP(index, nthreads) { 22 | // (n, c, ph, pw) is an element in the pooled output 23 | int pw = index % pooled_width; 24 | int ph = (index / pooled_width) % pooled_height; 25 | int c = (index / pooled_width / pooled_height) % channels; 26 | int n = index / pooled_width / pooled_height / channels; 27 | 28 | const T* offset_bottom_rois = bottom_rois + n * 5; 29 | int roi_batch_ind = offset_bottom_rois[0]; 30 | int roi_start_w = round(offset_bottom_rois[1] * spatial_scale); 31 | int roi_start_h = round(offset_bottom_rois[2] * spatial_scale); 32 | int roi_end_w = round(offset_bottom_rois[3] * spatial_scale); 33 | int roi_end_h = round(offset_bottom_rois[4] * spatial_scale); 34 | 35 | // Force malformed ROIs to be 1x1 36 | int roi_width = max(roi_end_w - roi_start_w + 1, 1); 37 | int roi_height = max(roi_end_h - roi_start_h + 1, 1); 38 | T bin_size_h = static_cast(roi_height) 39 | / static_cast(pooled_height); 40 | T bin_size_w = static_cast(roi_width) 41 | / static_cast(pooled_width); 42 | 43 | int hstart = static_cast(floor(static_cast(ph) 44 | * bin_size_h)); 45 | int wstart = static_cast(floor(static_cast(pw) 46 | * bin_size_w)); 47 | int hend = static_cast(ceil(static_cast(ph + 1) 48 | * bin_size_h)); 49 | int wend = static_cast(ceil(static_cast(pw + 1) 50 | * bin_size_w)); 51 | 52 | // Add roi offsets and clip to input boundaries 53 | hstart = min(max(hstart + roi_start_h, 0), height); 54 | hend = min(max(hend + roi_start_h, 0), height); 55 | wstart = min(max(wstart + roi_start_w, 0), width); 56 | wend = min(max(wend + roi_start_w, 0), width); 57 | bool is_empty = (hend <= hstart) || (wend <= wstart); 58 | 59 | // Define an empty pooling region to be zero 60 | T maxval = is_empty ? 0 : -FLT_MAX; 61 | // If nothing is pooled, argmax = -1 causes nothing to be backprop'd 62 | int maxidx = -1; 63 | const T* offset_bottom_data = 64 | bottom_data + (roi_batch_ind * channels + c) * height * width; 65 | for (int h = hstart; h < hend; ++h) { 66 | for (int w = wstart; w < wend; ++w) { 67 | int bottom_index = h * width + w; 68 | if (offset_bottom_data[bottom_index] > maxval) { 69 | maxval = offset_bottom_data[bottom_index]; 70 | maxidx = bottom_index; 71 | } 72 | } 73 | } 74 | top_data[index] = maxval; 75 | argmax_data[index] = maxidx; 76 | } 77 | } 78 | 79 | template 80 | __global__ void RoIPoolFBackward(const int nthreads, const T* top_diff, 81 | const int* argmax_data, const int num_rois, const T spatial_scale, 82 | const int channels, const int height, const int width, 83 | const int pooled_height, const int pooled_width, T* bottom_diff, 84 | const T* bottom_rois) { 85 | CUDA_1D_KERNEL_LOOP(index, nthreads) { 86 | // (n, c, ph, pw) is an element in the pooled output 87 | int pw = index % pooled_width; 88 | int ph = (index / pooled_width) % pooled_height; 89 | int c = (index / pooled_width / pooled_height) % channels; 90 | int n = index / pooled_width / pooled_height / channels; 91 | 92 | const T* offset_bottom_rois = bottom_rois + n * 5; 93 | int roi_batch_ind = offset_bottom_rois[0]; 94 | int bottom_offset = (roi_batch_ind * channels + c) * height * width; 95 | int top_offset = (n * channels + c) * pooled_height * pooled_width; 96 | const T* offset_top_diff = top_diff + top_offset; 97 | T* offset_bottom_diff = bottom_diff + bottom_offset; 98 | const int* offset_argmax_data = argmax_data + top_offset; 99 | 100 | int argmax = offset_argmax_data[ph * pooled_width + pw]; 101 | if (argmax != -1) { 102 | atomicAdd( 103 | offset_bottom_diff + argmax, 104 | static_cast(offset_top_diff[ph * pooled_width + pw])); 105 | 106 | } 107 | } 108 | } 109 | 110 | std::tuple ROIPool_forward_cuda(const at::Tensor& input, 111 | const at::Tensor& rois, 112 | const float spatial_scale, 113 | const int pooled_height, 114 | const int pooled_width) { 115 | AT_ASSERTM(input.type().is_cuda(), "input must be a CUDA tensor"); 116 | AT_ASSERTM(rois.type().is_cuda(), "rois must be a CUDA tensor"); 117 | 118 | auto num_rois = rois.size(0); 119 | auto channels = input.size(1); 120 | auto height = input.size(2); 121 | auto width = input.size(3); 122 | 123 | auto output = at::empty({num_rois, channels, pooled_height, pooled_width}, input.options()); 124 | auto output_size = num_rois * pooled_height * pooled_width * channels; 125 | auto argmax = at::zeros({num_rois, channels, pooled_height, pooled_width}, input.options().dtype(at::kInt)); 126 | 127 | cudaStream_t stream = at::cuda::getCurrentCUDAStream(); 128 | 129 | dim3 grid(std::min(THCCeilDiv(output_size, 512L), 4096L)); 130 | dim3 block(512); 131 | 132 | if (output.numel() == 0) { 133 | THCudaCheck(cudaGetLastError()); 134 | return std::make_tuple(output, argmax); 135 | } 136 | 137 | AT_DISPATCH_FLOATING_TYPES(input.type(), "ROIPool_forward", [&] { 138 | RoIPoolFForward<<>>( 139 | output_size, 140 | input.contiguous().data(), 141 | spatial_scale, 142 | channels, 143 | height, 144 | width, 145 | pooled_height, 146 | pooled_width, 147 | rois.contiguous().data(), 148 | output.data(), 149 | argmax.data()); 150 | }); 151 | THCudaCheck(cudaGetLastError()); 152 | return std::make_tuple(output, argmax); 153 | } 154 | 155 | // TODO remove the dependency on input and use instead its sizes -> save memory 156 | at::Tensor ROIPool_backward_cuda(const at::Tensor& grad, 157 | const at::Tensor& input, 158 | const at::Tensor& rois, 159 | const at::Tensor& argmax, 160 | const float spatial_scale, 161 | const int pooled_height, 162 | const int pooled_width, 163 | const int batch_size, 164 | const int channels, 165 | const int height, 166 | const int width) { 167 | AT_ASSERTM(grad.type().is_cuda(), "grad must be a CUDA tensor"); 168 | AT_ASSERTM(rois.type().is_cuda(), "rois must be a CUDA tensor"); 169 | // TODO add more checks 170 | 171 | auto num_rois = rois.size(0); 172 | auto grad_input = at::zeros({batch_size, channels, height, width}, grad.options()); 173 | 174 | cudaStream_t stream = at::cuda::getCurrentCUDAStream(); 175 | 176 | dim3 grid(std::min(THCCeilDiv(grad.numel(), 512L), 4096L)); 177 | dim3 block(512); 178 | 179 | // handle possibly empty gradients 180 | if (grad.numel() == 0) { 181 | THCudaCheck(cudaGetLastError()); 182 | return grad_input; 183 | } 184 | 185 | AT_DISPATCH_FLOATING_TYPES(grad.type(), "ROIPool_backward", [&] { 186 | RoIPoolFBackward<<>>( 187 | grad.numel(), 188 | grad.contiguous().data(), 189 | argmax.data(), 190 | num_rois, 191 | spatial_scale, 192 | channels, 193 | height, 194 | width, 195 | pooled_height, 196 | pooled_width, 197 | grad_input.data(), 198 | rois.contiguous().data()); 199 | }); 200 | THCudaCheck(cudaGetLastError()); 201 | return grad_input; 202 | } 203 | -------------------------------------------------------------------------------- /lib/model/csrc/cuda/nms.cu: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #include 3 | #include 4 | 5 | #include 6 | #include 7 | 8 | #include 9 | #include 10 | 11 | int const threadsPerBlock = sizeof(unsigned long long) * 8; 12 | 13 | __device__ inline float devIoU(float const * const a, float const * const b) { 14 | float left = max(a[0], b[0]), right = min(a[2], b[2]); 15 | float top = max(a[1], b[1]), bottom = min(a[3], b[3]); 16 | float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f); 17 | float interS = width * height; 18 | float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1); 19 | float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1); 20 | return interS / (Sa + Sb - interS); 21 | } 22 | 23 | __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh, 24 | const float *dev_boxes, unsigned long long *dev_mask) { 25 | const int row_start = blockIdx.y; 26 | const int col_start = blockIdx.x; 27 | 28 | // if (row_start > col_start) return; 29 | 30 | const int row_size = 31 | min(n_boxes - row_start * threadsPerBlock, threadsPerBlock); 32 | const int col_size = 33 | min(n_boxes - col_start * threadsPerBlock, threadsPerBlock); 34 | 35 | __shared__ float block_boxes[threadsPerBlock * 5]; 36 | if (threadIdx.x < col_size) { 37 | block_boxes[threadIdx.x * 5 + 0] = 38 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0]; 39 | block_boxes[threadIdx.x * 5 + 1] = 40 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1]; 41 | block_boxes[threadIdx.x * 5 + 2] = 42 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2]; 43 | block_boxes[threadIdx.x * 5 + 3] = 44 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3]; 45 | block_boxes[threadIdx.x * 5 + 4] = 46 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4]; 47 | } 48 | __syncthreads(); 49 | 50 | if (threadIdx.x < row_size) { 51 | const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x; 52 | const float *cur_box = dev_boxes + cur_box_idx * 5; 53 | int i = 0; 54 | unsigned long long t = 0; 55 | int start = 0; 56 | if (row_start == col_start) { 57 | start = threadIdx.x + 1; 58 | } 59 | for (i = start; i < col_size; i++) { 60 | if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) { 61 | t |= 1ULL << i; 62 | } 63 | } 64 | const int col_blocks = THCCeilDiv(n_boxes, threadsPerBlock); 65 | dev_mask[cur_box_idx * col_blocks + col_start] = t; 66 | } 67 | } 68 | 69 | // boxes is a N x 5 tensor 70 | at::Tensor nms_cuda(const at::Tensor boxes, float nms_overlap_thresh) { 71 | using scalar_t = float; 72 | AT_ASSERTM(boxes.type().is_cuda(), "boxes must be a CUDA tensor"); 73 | auto scores = boxes.select(1, 4); 74 | auto order_t = std::get<1>(scores.sort(0, /* descending=*/true)); 75 | auto boxes_sorted = boxes.index_select(0, order_t); 76 | 77 | int boxes_num = boxes.size(0); 78 | 79 | const int col_blocks = THCCeilDiv(boxes_num, threadsPerBlock); 80 | 81 | scalar_t* boxes_dev = boxes_sorted.data(); 82 | 83 | THCState *state = at::globalContext().lazyInitCUDA(); // TODO replace with getTHCState 84 | 85 | unsigned long long* mask_dev = NULL; 86 | //THCudaCheck(THCudaMalloc(state, (void**) &mask_dev, 87 | // boxes_num * col_blocks * sizeof(unsigned long long))); 88 | 89 | mask_dev = (unsigned long long*) THCudaMalloc(state, boxes_num * col_blocks * sizeof(unsigned long long)); 90 | 91 | dim3 blocks(THCCeilDiv(boxes_num, threadsPerBlock), 92 | THCCeilDiv(boxes_num, threadsPerBlock)); 93 | dim3 threads(threadsPerBlock); 94 | nms_kernel<<>>(boxes_num, 95 | nms_overlap_thresh, 96 | boxes_dev, 97 | mask_dev); 98 | 99 | std::vector mask_host(boxes_num * col_blocks); 100 | THCudaCheck(cudaMemcpy(&mask_host[0], 101 | mask_dev, 102 | sizeof(unsigned long long) * boxes_num * col_blocks, 103 | cudaMemcpyDeviceToHost)); 104 | 105 | std::vector remv(col_blocks); 106 | memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks); 107 | 108 | at::Tensor keep = at::empty({boxes_num}, boxes.options().dtype(at::kLong).device(at::kCPU)); 109 | int64_t* keep_out = keep.data(); 110 | 111 | int num_to_keep = 0; 112 | for (int i = 0; i < boxes_num; i++) { 113 | int nblock = i / threadsPerBlock; 114 | int inblock = i % threadsPerBlock; 115 | 116 | if (!(remv[nblock] & (1ULL << inblock))) { 117 | keep_out[num_to_keep++] = i; 118 | unsigned long long *p = &mask_host[0] + i * col_blocks; 119 | for (int j = nblock; j < col_blocks; j++) { 120 | remv[j] |= p[j]; 121 | } 122 | } 123 | } 124 | 125 | THCudaFree(state, mask_dev); 126 | // TODO improve this part 127 | return std::get<0>(order_t.index({ 128 | keep.narrow(/*dim=*/0, /*start=*/0, /*length=*/num_to_keep).to( 129 | order_t.device(), keep.scalar_type()) 130 | }).sort(0, false)); 131 | } 132 | -------------------------------------------------------------------------------- /lib/model/csrc/cuda/vision.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #pragma once 3 | #include 4 | 5 | 6 | at::Tensor ROIAlign_forward_cuda(const at::Tensor& input, 7 | const at::Tensor& rois, 8 | const float spatial_scale, 9 | const int pooled_height, 10 | const int pooled_width, 11 | const int sampling_ratio); 12 | 13 | at::Tensor ROIAlign_backward_cuda(const at::Tensor& grad, 14 | const at::Tensor& rois, 15 | const float spatial_scale, 16 | const int pooled_height, 17 | const int pooled_width, 18 | const int batch_size, 19 | const int channels, 20 | const int height, 21 | const int width, 22 | const int sampling_ratio); 23 | 24 | 25 | std::tuple ROIPool_forward_cuda(const at::Tensor& input, 26 | const at::Tensor& rois, 27 | const float spatial_scale, 28 | const int pooled_height, 29 | const int pooled_width); 30 | 31 | at::Tensor ROIPool_backward_cuda(const at::Tensor& grad, 32 | const at::Tensor& input, 33 | const at::Tensor& rois, 34 | const at::Tensor& argmax, 35 | const float spatial_scale, 36 | const int pooled_height, 37 | const int pooled_width, 38 | const int batch_size, 39 | const int channels, 40 | const int height, 41 | const int width); 42 | 43 | at::Tensor nms_cuda(const at::Tensor boxes, float nms_overlap_thresh); 44 | 45 | 46 | at::Tensor compute_flow_cuda(const at::Tensor& boxes, 47 | const int height, 48 | const int width); 49 | -------------------------------------------------------------------------------- /lib/model/csrc/nms.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #pragma once 3 | #include "cpu/vision.h" 4 | 5 | #ifdef WITH_CUDA 6 | #include "cuda/vision.h" 7 | #endif 8 | 9 | 10 | at::Tensor nms(const at::Tensor& dets, 11 | const at::Tensor& scores, 12 | const float threshold) { 13 | 14 | if (dets.type().is_cuda()) { 15 | #ifdef WITH_CUDA 16 | // TODO raise error if not compiled with CUDA 17 | if (dets.numel() == 0) 18 | return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU)); 19 | auto b = at::cat({dets, scores.unsqueeze(1)}, 1); 20 | return nms_cuda(b, threshold); 21 | #else 22 | AT_ERROR("Not compiled with GPU support"); 23 | #endif 24 | } 25 | 26 | at::Tensor result = nms_cpu(dets, scores, threshold); 27 | return result; 28 | } 29 | -------------------------------------------------------------------------------- /lib/model/csrc/vision.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #include "nms.h" 3 | #include "ROIAlign.h" 4 | #include "ROIPool.h" 5 | 6 | 7 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 8 | m.def("nms", &nms, "non-maximum suppression"); 9 | m.def("roi_align_forward", &ROIAlign_forward, "ROIAlign_forward"); 10 | m.def("roi_align_backward", &ROIAlign_backward, "ROIAlign_backward"); 11 | m.def("roi_pool_forward", &ROIPool_forward, "ROIPool_forward"); 12 | m.def("roi_pool_backward", &ROIPool_backward, "ROIPool_backward"); 13 | } 14 | -------------------------------------------------------------------------------- /lib/model/faster_rcnn/Snet.py: -------------------------------------------------------------------------------- 1 | from .modules import * 2 | 3 | from model.faster_rcnn.faster_rcnn import _fasterRCNN 4 | from model.utils.config import cfg 5 | 6 | class SnetExtractor(nn.Module): 7 | cfg = { 8 | 49: [24, 60, 120, 240, 512], 9 | 146: [24, 132, 264, 528], 10 | 535: [48, 248, 496, 992], 11 | } 12 | 13 | def __init__(self, version = 146 ,model_path=None , **kwargs): 14 | 15 | super(SnetExtractor,self).__init__() 16 | num_layers = [4, 8, 4] 17 | self.model_path = model_path 18 | 19 | self.num_layers = num_layers 20 | channels = self.cfg[version] 21 | self.channels = channels 22 | 23 | 24 | 25 | self.conv1 = conv_bn( 26 | 3, channels[0], kernel_size=3, stride=2,pad = 1 27 | ) 28 | self.maxpool = nn.MaxPool2d( 29 | kernel_size=3, stride=2, padding=1, 30 | ) 31 | 32 | 33 | self.stage1 = self._make_layer( 34 | num_layers[0], channels[0], channels[1], **kwargs) 35 | self.stage2 = self._make_layer( 36 | num_layers[1], channels[1], channels[2], **kwargs) 37 | self.stage3 = self._make_layer( 38 | num_layers[2], channels[2], channels[3], **kwargs) 39 | if len(self.channels) == 5: 40 | self.conv5 = conv_bn( 41 | channels[3], channels[4], kernel_size=1, stride=1 ,pad=0 ) 42 | 43 | 44 | 45 | if len(channels) == 5: 46 | self.cem = CEM(channels[-3], channels[-1], channels[-1] ,cfg.FEAT_STRIDE) 47 | else: 48 | self.cem = CEM(channels[-2], channels[-1], channels[-1],cfg.FEAT_STRIDE) 49 | self.avgpool = nn.AdaptiveAvgPool2d(1) 50 | self._initialize_weights() 51 | 52 | def _make_layer(self, num_layers, in_channels, out_channels, **kwargs): 53 | layers = [] 54 | for i in range(num_layers): 55 | if i == 0: 56 | layers.append(ShuffleV2Block(in_channels, out_channels, mid_channels=out_channels // 2, ksize=5, stride=2)) 57 | else: 58 | layers.append(ShuffleV2Block(in_channels // 2, out_channels, 59 | mid_channels=out_channels // 2, ksize=5, stride=1)) 60 | in_channels = out_channels 61 | return nn.Sequential(*layers) 62 | 63 | 64 | 65 | 66 | def _initialize_weights(self): 67 | 68 | def set_bn_fix(m): 69 | classname = m.__class__.__name__ 70 | if classname.find('BatchNorm') != -1: 71 | for p in m.parameters(): p.requires_grad = False 72 | 73 | if self.model_path is not None: 74 | 75 | print("Loading pretrained weights from %s" % (self.model_path)) 76 | if torch.cuda.is_available(): 77 | state_dict = torch.load(self.model_path)["state_dict"] 78 | else: 79 | state_dict = torch.load( 80 | self.model_path, map_location=lambda storage, loc: storage)["state_dict"] 81 | keys = [] 82 | for k, v in state_dict.items(): 83 | keys.append(k) 84 | for k in keys: 85 | state_dict[k.replace("module.", "")] = state_dict.pop(k) 86 | 87 | self.load_state_dict(state_dict,strict = False) 88 | 89 | for para in self.conv1.parameters(): 90 | para.requires_grad = False 91 | print('extractor conv1 freezed') 92 | for para in self.stage1.parameters(): 93 | para.requires_grad = False 94 | print('extractor stage1 freezed') 95 | # for para in self.stage2.parameters(): 96 | # para.requires_grad = False 97 | # print('extractor stage2 freezed') 98 | # for para in self.stage3.parameters(): 99 | # para.requires_grad = False 100 | # print('extractor stage3 freezed') 101 | set_bn_fix(self.conv1) 102 | set_bn_fix(self.stage1) 103 | set_bn_fix(self.stage2) 104 | set_bn_fix(self.stage3) 105 | 106 | else: 107 | for name, m in self.named_modules(): 108 | if isinstance(m, nn.Conv2d): 109 | if 'first' in name: 110 | nn.init.normal_(m.weight, 0, 0.01) 111 | else: 112 | nn.init.normal_(m.weight, 0, 1.0 / m.weight.shape[1]) 113 | if m.bias is not None: 114 | nn.init.constant_(m.bias, 0) 115 | elif isinstance(m, nn.BatchNorm2d): 116 | nn.init.constant_(m.weight, 1) 117 | if m.bias is not None: 118 | nn.init.constant_(m.bias, 0.0001) 119 | nn.init.constant_(m.running_mean, 0) 120 | elif isinstance(m, nn.BatchNorm1d): 121 | nn.init.constant_(m.weight, 1) 122 | if m.bias is not None: 123 | nn.init.constant_(m.bias, 0.0001) 124 | nn.init.constant_(m.running_mean, 0) 125 | elif isinstance(m, nn.Linear): 126 | nn.init.normal_(m.weight, 0, 0.01) 127 | if m.bias is not None: 128 | nn.init.constant_(m.bias, 0) 129 | 130 | def forward(self, x): 131 | 132 | x = self.conv1(x) 133 | x = self.maxpool(x) 134 | c3 = self.stage1(x) 135 | c4 = self.stage2(c3) 136 | c5 = self.stage3(c4) 137 | if len(self.channels) == 5: 138 | c5 = self.conv5(c5) 139 | 140 | Cglb_lat = self.avgpool(c5) 141 | 142 | if cfg.FEAT_STRIDE == 16: 143 | cem_out = self.cem([c4, c5, Cglb_lat]) 144 | elif cfg.FEAT_STRIDE == 8: 145 | cem_out = self.cem([c3,c4, c5, Cglb_lat]) 146 | 147 | return cem_out 148 | 149 | class snet(_fasterRCNN): 150 | def __init__(self, 151 | classes, 152 | layer , 153 | pretrained_path=None, 154 | class_agnostic=False, 155 | ): 156 | self.pretrained_path = pretrained_path 157 | 158 | self.class_agnostic = class_agnostic 159 | 160 | self.dout_base_model = 256 161 | self.layer = layer 162 | 163 | self.dout_lh_base_model = 245 164 | 165 | _fasterRCNN.__init__(self, 166 | classes, 167 | class_agnostic, 168 | compact_mode=True) 169 | 170 | def _init_modules(self): 171 | snet = SnetExtractor(self.layer, self.pretrained_path) 172 | 173 | 174 | 175 | 176 | # Build snet. 177 | self.RCNN_base = snet 178 | 179 | # Fix Layers 180 | # if self.pretrained: 181 | # for layer in self.RCNN_base: 182 | # print(layer) 183 | # for p in self.RCNN_base[layer].parameters(): 184 | # p.requires_grad = False 185 | 186 | 187 | self.RCNN_top = nn.Sequential(nn.Linear(5 * 7 * 7, 1024), 188 | nn.ReLU(inplace=True), 189 | 190 | ) 191 | 192 | 193 | c_in = 1024 194 | 195 | self.RCNN_cls_score = nn.Linear(c_in, self.n_classes) 196 | if self.class_agnostic: 197 | self.RCNN_bbox_pred = nn.Linear(c_in, 4) 198 | else: 199 | self.RCNN_bbox_pred = nn.Linear(c_in, 4 * self.n_classes) 200 | 201 | def train(self, mode=True): 202 | # Override train so that the training mode is set as we want 203 | nn.Module.train(self, mode) 204 | if mode: 205 | # Set fixed blocks to be in eval mode 206 | self.RCNN_base.conv1.eval() 207 | self.RCNN_base.stage1.eval() 208 | self.RCNN_base.stage2.train() 209 | self.RCNN_base.stage3.train() 210 | 211 | 212 | def set_bn_eval(m): 213 | classname = m.__class__.__name__ 214 | if classname.find('BatchNorm') != -1: 215 | m.eval() 216 | 217 | set_bn_eval(self.RCNN_base.conv1) 218 | set_bn_eval(self.RCNN_base.stage1) 219 | set_bn_eval(self.RCNN_base.stage2) 220 | set_bn_eval(self.RCNN_base.stage3) 221 | 222 | 223 | def _head_to_tail(self, pool5): 224 | pool5_flat = pool5.view(pool5.size(0), -1) 225 | fc7 = self.RCNN_top(pool5_flat) # or two large fully-connected layers 226 | 227 | return fc7 228 | 229 | -------------------------------------------------------------------------------- /lib/model/faster_rcnn/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DayBreak-u/Thundernet_Pytorch/ac359d128a44e566ba5852a830c0a2154e10edb2/lib/model/faster_rcnn/__init__.py -------------------------------------------------------------------------------- /lib/model/faster_rcnn/faster_rcnn.py: -------------------------------------------------------------------------------- 1 | import time 2 | import random 3 | import numpy as np 4 | import torch 5 | import torch.nn as nn 6 | import torch.nn.functional as F 7 | from torch.autograd import Variable 8 | import torchvision.models as models 9 | from torchvision.ops import RoIAlign, RoIPool 10 | 11 | from psroialign.psroialign import PSROIAlignhandle,PSROIPoolhandle 12 | 13 | # from psroi_pooling.modules.psroi_pool import PSRoIPool 14 | 15 | from .modules import RPN,SAM 16 | from model.utils.config import cfg 17 | # from model.rpn.rpn import _RPN 18 | from model.rpn.rpn import _RPN 19 | # from model.rpn.centernet_rpn import _RPN 20 | from model.rpn.proposal_target_layer_cascade import _ProposalTargetLayer 21 | from model.loss.losses import _smooth_l1_loss 22 | 23 | from model.loss.losses import OHEM_loss,hard_negative_mining 24 | 25 | 26 | 27 | 28 | 29 | class _fasterRCNN(nn.Module): 30 | """ faster RCNN """ 31 | def __init__(self, 32 | classes, 33 | class_agnostic, 34 | compact_mode=False): 35 | super(_fasterRCNN, self).__init__() 36 | self.classes = classes 37 | self.n_classes = len(classes) 38 | self.class_agnostic = class_agnostic 39 | 40 | 41 | # loss 42 | self.RCNN_loss_cls = 0 43 | self.RCNN_loss_bbox = 0 44 | # self.focalloss_handle = FocalLossV4(num_class=21, alpha=0.25, gamma=2.0, balance_index=2) 45 | # define Large Separable Convolution Layer 46 | 47 | self.rpn = RPN(in_channels=245, f_channels=256) 48 | 49 | 50 | self.sam = SAM(256,245) 51 | # define rpn 52 | self.RCNN_rpn = _RPN(self.dout_base_model) 53 | self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes) 54 | 55 | self.rpn_time = None 56 | self.pre_roi_time = None 57 | self.roi_pooling_time = None 58 | self.subnet_time = None 59 | self.psroiAlign = PSROIAlignhandle(1./cfg.FEAT_STRIDE, 7,2, 5) 60 | self.psroiPool = PSROIPoolhandle(7,7,1./cfg.FEAT_STRIDE,7,5) 61 | 62 | 63 | 64 | def _roi_pool_layer(self, bottom, rois): 65 | return self.psroiPool.forward(bottom, rois) 66 | 67 | def _roi_align_layer(self, bottom, rois): 68 | return self.psroiAlign.forward(bottom, rois) 69 | 70 | def forward(self, im_data, im_info, gt_boxes, num_boxes, 71 | # hm,reg_mask,wh,offset,ind 72 | ): 73 | batch_size = im_data.size(0) 74 | 75 | 76 | im_info = im_info.data 77 | gt_boxes = gt_boxes.data 78 | num_boxes = num_boxes.data 79 | # hm = hm.data 80 | # reg_mask = reg_mask.data 81 | # wh = wh.data 82 | # offset = offset.data 83 | # ind = ind.data 84 | 85 | # feed image data to base model to obtain base feature map 86 | start = time.time() 87 | basefeat = self.RCNN_base(im_data) 88 | 89 | # feed base feature map tp RPN to obtain rois 90 | rpn_feat= self.rpn(basefeat) 91 | 92 | 93 | # rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn(rpn_feat, im_info, gt_boxes, num_boxes,hm,reg_mask,wh,offset,ind) 94 | rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn(rpn_feat, im_info, gt_boxes, num_boxes) 95 | 96 | rpn_time = time.time() 97 | self.rpn_time = rpn_time - start 98 | # if it is training phrase, then use ground trubut bboxes for refining 99 | if self.training: 100 | roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes) 101 | rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data 102 | 103 | rois_label = Variable(rois_label.view(-1).long()) 104 | rois_target = Variable(rois_target.view(-1, rois_target.size(2))) 105 | rois_inside_ws = Variable(rois_inside_ws.view(-1, rois_inside_ws.size(2))) 106 | rois_outside_ws = Variable(rois_outside_ws.view(-1, rois_outside_ws.size(2))) 107 | else: 108 | rois_label = None 109 | rois_target = None 110 | rois_inside_ws = None 111 | rois_outside_ws = None 112 | rpn_loss_cls = 0 113 | rpn_loss_bbox = 0 114 | 115 | rois = Variable(rois) 116 | 117 | pre_roi_time = time.time() 118 | self.pre_roi_time = pre_roi_time - rpn_time 119 | 120 | base_feat = self.sam([basefeat,rpn_feat]) 121 | 122 | 123 | 124 | # do roi pooling based on predicted rois 125 | if cfg.POOLING_MODE == 'align': 126 | pooled_feat = self._roi_align_layer(base_feat, rois.view(-1, 5)) 127 | elif cfg.POOLING_MODE == 'pool': 128 | pooled_feat = self._roi_pool_layer(base_feat, rois.view(-1, 5)) 129 | 130 | roi_pool_time = time.time() 131 | self.roi_pooling_time = roi_pool_time - pre_roi_time 132 | 133 | # feed pooled features to top model 134 | pooled_feat = self._head_to_tail(pooled_feat) 135 | 136 | # compute bbox offset 137 | bbox_pred = self.RCNN_bbox_pred(pooled_feat) 138 | if self.training and not self.class_agnostic: 139 | # select the corresponding columns according to roi labels 140 | bbox_pred_view = bbox_pred.view(bbox_pred.size(0), 141 | int(bbox_pred.size(1) / 4), 4) 142 | bbox_pred_select = torch.gather( 143 | bbox_pred_view, 1, 144 | rois_label.view(rois_label.size(0), 1, 145 | 1).expand(rois_label.size(0), 1, 4)) 146 | 147 | bbox_pred = bbox_pred_select.squeeze(1) 148 | 149 | 150 | # compute object classification probability 151 | cls_score = self.RCNN_cls_score(pooled_feat) 152 | cls_prob = F.softmax(cls_score, 1) 153 | 154 | RCNN_loss_cls = 0 155 | RCNN_loss_bbox = 0 156 | 157 | 158 | if self.training: 159 | # classification loss 160 | # RCNN_loss_cls = OHEM_loss(cls_score,rois_label) 161 | # from collections import Counter 162 | # label = rois_label.cpu().numpy() 163 | # print(Counter(label)) 164 | 165 | loss = -F.log_softmax(cls_score, dim=1)[:, 0] 166 | mask, num_pos = hard_negative_mining(loss, rois_label) 167 | confidence = cls_score[mask, :] 168 | RCNN_loss_cls = F.cross_entropy(confidence, rois_label[mask], reduction='mean') 169 | 170 | 171 | # bounding box regression L1 loss 172 | 173 | RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target, rois_inside_ws, rois_outside_ws) 174 | RCNN_loss_bbox = RCNN_loss_bbox * 2 # "to balance multi-task training" 175 | 176 | cls_prob = cls_prob.view(batch_size, rois.size(1), -1) 177 | bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1) 178 | 179 | subnet_time = time.time() 180 | self.subnet_time = subnet_time - roi_pool_time 181 | time_measure = [ 182 | self.rpn_time, self.pre_roi_time, self.roi_pooling_time, 183 | self.subnet_time 184 | ] 185 | 186 | return time_measure, rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label 187 | 188 | def _init_weights(self): 189 | def normal_init(m, mean, stddev, truncated=False): 190 | """ 191 | weight initalizer: truncated normal and random normal. 192 | """ 193 | # x is a parameter 194 | if truncated: 195 | # not a perfect approximation 196 | m.weight.data.normal_().fmod_(2).mul_(stddev).add_(mean) 197 | else: 198 | m.weight.data.normal_(mean, stddev) 199 | m.bias.data.zero_() 200 | 201 | normal_init(self.RCNN_rpn.RPN_cls_score, 0, 0.01, cfg.TRAIN.TRUNCATED) 202 | normal_init(self.RCNN_rpn.RPN_bbox_pred, 0, 0.01, cfg.TRAIN.TRUNCATED) 203 | 204 | # normal_init(self.RCNN_rpn.RPN_hm_score, 0, 0.01, cfg.TRAIN.TRUNCATED) 205 | # normal_init(self.RCNN_rpn.PRN_wh_score, 0, 0.01, cfg.TRAIN.TRUNCATED) 206 | # normal_init(self.RCNN_rpn.PRN_offset_score, 0, 0.01, cfg.TRAIN.TRUNCATED) 207 | 208 | 209 | normal_init(self.RCNN_cls_score, 0, 0.01, cfg.TRAIN.TRUNCATED) 210 | normal_init(self.RCNN_bbox_pred, 0, 0.001, cfg.TRAIN.TRUNCATED) 211 | 212 | def create_architecture(self): 213 | self._init_modules() 214 | self._init_weights() 215 | -------------------------------------------------------------------------------- /lib/model/loss/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DayBreak-u/Thundernet_Pytorch/ac359d128a44e566ba5852a830c0a2154e10edb2/lib/model/loss/__init__.py -------------------------------------------------------------------------------- /lib/model/rpn/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DayBreak-u/Thundernet_Pytorch/ac359d128a44e566ba5852a830c0a2154e10edb2/lib/model/rpn/__init__.py -------------------------------------------------------------------------------- /lib/model/rpn/bbox_transform.py: -------------------------------------------------------------------------------- 1 | 2 | import torch 3 | import numpy as np 4 | import pdb 5 | 6 | def bbox_transform(ex_rois, gt_rois): 7 | ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + 1.0 8 | ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + 1.0 9 | ex_ctr_x = ex_rois[:, 0] + 0.5 * ex_widths 10 | ex_ctr_y = ex_rois[:, 1] + 0.5 * ex_heights 11 | 12 | gt_widths = gt_rois[:, 2] - gt_rois[:, 0] + 1.0 13 | gt_heights = gt_rois[:, 3] - gt_rois[:, 1] + 1.0 14 | gt_ctr_x = gt_rois[:, 0] + 0.5 * gt_widths 15 | gt_ctr_y = gt_rois[:, 1] + 0.5 * gt_heights 16 | 17 | targets_dx = (gt_ctr_x - ex_ctr_x) / ex_widths 18 | targets_dy = (gt_ctr_y - ex_ctr_y) / ex_heights 19 | targets_dw = torch.log(gt_widths / ex_widths) 20 | targets_dh = torch.log(gt_heights / ex_heights) 21 | 22 | targets = torch.stack( 23 | (targets_dx, targets_dy, targets_dw, targets_dh),1) 24 | 25 | return targets 26 | 27 | def bbox_transform_batch(ex_rois, gt_rois): 28 | 29 | if ex_rois.dim() == 2: 30 | ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + 1.0 31 | ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + 1.0 32 | ex_ctr_x = ex_rois[:, 0] + 0.5 * ex_widths 33 | ex_ctr_y = ex_rois[:, 1] + 0.5 * ex_heights 34 | 35 | gt_widths = gt_rois[:, :, 2] - gt_rois[:, :, 0] + 1.0 36 | gt_heights = gt_rois[:, :, 3] - gt_rois[:, :, 1] + 1.0 37 | gt_ctr_x = gt_rois[:, :, 0] + 0.5 * gt_widths 38 | gt_ctr_y = gt_rois[:, :, 1] + 0.5 * gt_heights 39 | 40 | targets_dx = (gt_ctr_x - ex_ctr_x.view(1,-1).expand_as(gt_ctr_x)) / ex_widths 41 | targets_dy = (gt_ctr_y - ex_ctr_y.view(1,-1).expand_as(gt_ctr_y)) / ex_heights 42 | targets_dw = torch.log(gt_widths / ex_widths.view(1,-1).expand_as(gt_widths)) 43 | targets_dh = torch.log(gt_heights / ex_heights.view(1,-1).expand_as(gt_heights)) 44 | 45 | elif ex_rois.dim() == 3: 46 | ex_widths = ex_rois[:, :, 2] - ex_rois[:, :, 0] + 1.0 47 | ex_heights = ex_rois[:,:, 3] - ex_rois[:,:, 1] + 1.0 48 | ex_ctr_x = ex_rois[:, :, 0] + 0.5 * ex_widths 49 | ex_ctr_y = ex_rois[:, :, 1] + 0.5 * ex_heights 50 | 51 | gt_widths = gt_rois[:, :, 2] - gt_rois[:, :, 0] + 1.0 52 | gt_heights = gt_rois[:, :, 3] - gt_rois[:, :, 1] + 1.0 53 | gt_ctr_x = gt_rois[:, :, 0] + 0.5 * gt_widths 54 | gt_ctr_y = gt_rois[:, :, 1] + 0.5 * gt_heights 55 | 56 | targets_dx = (gt_ctr_x - ex_ctr_x) / ex_widths 57 | targets_dy = (gt_ctr_y - ex_ctr_y) / ex_heights 58 | targets_dw = torch.log(gt_widths / ex_widths) 59 | targets_dh = torch.log(gt_heights / ex_heights) 60 | else: 61 | raise ValueError('ex_roi input dimension is not correct.') 62 | 63 | targets = torch.stack( 64 | (targets_dx, targets_dy, targets_dw, targets_dh),2) 65 | 66 | return targets 67 | 68 | def bbox_transform_inv(boxes, deltas, batch_size): 69 | widths = boxes[:, :, 2] - boxes[:, :, 0] + 1.0 70 | heights = boxes[:, :, 3] - boxes[:, :, 1] + 1.0 71 | ctr_x = boxes[:, :, 0] + 0.5 * widths 72 | ctr_y = boxes[:, :, 1] + 0.5 * heights 73 | 74 | dx = deltas[:, :, 0::4] 75 | dy = deltas[:, :, 1::4] 76 | dw = deltas[:, :, 2::4] 77 | dh = deltas[:, :, 3::4] 78 | 79 | pred_ctr_x = dx * widths.unsqueeze(2) + ctr_x.unsqueeze(2) 80 | pred_ctr_y = dy * heights.unsqueeze(2) + ctr_y.unsqueeze(2) 81 | pred_w = torch.exp(dw) * widths.unsqueeze(2) 82 | pred_h = torch.exp(dh) * heights.unsqueeze(2) 83 | 84 | pred_boxes = deltas.clone() 85 | # x1 86 | pred_boxes[:, :, 0::4] = pred_ctr_x - 0.5 * pred_w 87 | # y1 88 | pred_boxes[:, :, 1::4] = pred_ctr_y - 0.5 * pred_h 89 | # x2 90 | pred_boxes[:, :, 2::4] = pred_ctr_x + 0.5 * pred_w 91 | # y2 92 | pred_boxes[:, :, 3::4] = pred_ctr_y + 0.5 * pred_h 93 | 94 | return pred_boxes 95 | 96 | def clip_boxes_batch(boxes, im_shape, batch_size): 97 | """ 98 | Clip boxes to image boundaries. 99 | """ 100 | num_rois = boxes.size(1) 101 | 102 | boxes[boxes < 0] = 0 103 | # batch_x = (im_shape[:,0]-1).view(batch_size, 1).expand(batch_size, num_rois) 104 | # batch_y = (im_shape[:,1]-1).view(batch_size, 1).expand(batch_size, num_rois) 105 | 106 | batch_x = im_shape[:, 1] - 1 107 | batch_y = im_shape[:, 0] - 1 108 | 109 | boxes[:,:,0][boxes[:,:,0] > batch_x] = batch_x 110 | boxes[:,:,1][boxes[:,:,1] > batch_y] = batch_y 111 | boxes[:,:,2][boxes[:,:,2] > batch_x] = batch_x 112 | boxes[:,:,3][boxes[:,:,3] > batch_y] = batch_y 113 | 114 | return boxes 115 | 116 | def clip_boxes(boxes, im_shape, batch_size): 117 | 118 | for i in range(batch_size): 119 | boxes[i,:,0::4].clamp_(0, im_shape[i, 1]-1) 120 | boxes[i,:,1::4].clamp_(0, im_shape[i, 0]-1) 121 | boxes[i,:,2::4].clamp_(0, im_shape[i, 1]-1) 122 | boxes[i,:,3::4].clamp_(0, im_shape[i, 0]-1) 123 | 124 | return boxes 125 | 126 | 127 | def bbox_overlaps(anchors, gt_boxes): 128 | """ 129 | anchors: (N, 4) ndarray of float 130 | gt_boxes: (K, 4) ndarray of float 131 | 132 | overlaps: (N, K) ndarray of overlap between boxes and query_boxes 133 | """ 134 | N = anchors.size(0) 135 | K = gt_boxes.size(0) 136 | 137 | gt_boxes_area = ((gt_boxes[:,2] - gt_boxes[:,0] + 1) * 138 | (gt_boxes[:,3] - gt_boxes[:,1] + 1)).view(1, K) 139 | 140 | anchors_area = ((anchors[:,2] - anchors[:,0] + 1) * 141 | (anchors[:,3] - anchors[:,1] + 1)).view(N, 1) 142 | 143 | boxes = anchors.view(N, 1, 4).expand(N, K, 4) 144 | query_boxes = gt_boxes.view(1, K, 4).expand(N, K, 4) 145 | 146 | iw = (torch.min(boxes[:,:,2], query_boxes[:,:,2]) - 147 | torch.max(boxes[:,:,0], query_boxes[:,:,0]) + 1) 148 | iw[iw < 0] = 0 149 | 150 | ih = (torch.min(boxes[:,:,3], query_boxes[:,:,3]) - 151 | torch.max(boxes[:,:,1], query_boxes[:,:,1]) + 1) 152 | ih[ih < 0] = 0 153 | 154 | ua = anchors_area + gt_boxes_area - (iw * ih) 155 | overlaps = iw * ih / ua 156 | 157 | return overlaps 158 | 159 | def bbox_overlaps_batch(anchors, gt_boxes): 160 | """ 161 | anchors: (N, 4) ndarray of float 162 | gt_boxes: (b, K, 5) ndarray of float 163 | 164 | overlaps: (N, K) ndarray of overlap between boxes and query_boxes 165 | """ 166 | batch_size = gt_boxes.size(0) 167 | 168 | 169 | if anchors.dim() == 2: 170 | 171 | N = anchors.size(0) 172 | K = gt_boxes.size(1) 173 | 174 | anchors = anchors.view(1, N, 4).expand(batch_size, N, 4).contiguous() 175 | gt_boxes = gt_boxes[:,:,:4].contiguous() 176 | 177 | 178 | gt_boxes_x = (gt_boxes[:,:,2] - gt_boxes[:,:,0] + 1) 179 | gt_boxes_y = (gt_boxes[:,:,3] - gt_boxes[:,:,1] + 1) 180 | gt_boxes_area = (gt_boxes_x * gt_boxes_y).view(batch_size, 1, K) 181 | 182 | anchors_boxes_x = (anchors[:,:,2] - anchors[:,:,0] + 1) 183 | anchors_boxes_y = (anchors[:,:,3] - anchors[:,:,1] + 1) 184 | anchors_area = (anchors_boxes_x * anchors_boxes_y).view(batch_size, N, 1) 185 | 186 | gt_area_zero = (gt_boxes_x == 1) & (gt_boxes_y == 1) 187 | anchors_area_zero = (anchors_boxes_x == 1) & (anchors_boxes_y == 1) 188 | 189 | boxes = anchors.view(batch_size, N, 1, 4).expand(batch_size, N, K, 4) 190 | query_boxes = gt_boxes.view(batch_size, 1, K, 4).expand(batch_size, N, K, 4) 191 | 192 | iw = (torch.min(boxes[:,:,:,2], query_boxes[:,:,:,2]) - 193 | torch.max(boxes[:,:,:,0], query_boxes[:,:,:,0]) + 1) 194 | iw[iw < 0] = 0 195 | 196 | ih = (torch.min(boxes[:,:,:,3], query_boxes[:,:,:,3]) - 197 | torch.max(boxes[:,:,:,1], query_boxes[:,:,:,1]) + 1) 198 | ih[ih < 0] = 0 199 | ua = anchors_area + gt_boxes_area - (iw * ih) 200 | overlaps = iw * ih / ua 201 | 202 | # mask the overlap here. 203 | overlaps.masked_fill_(gt_area_zero.view(batch_size, 1, K).expand(batch_size, N, K), 0) 204 | overlaps.masked_fill_(anchors_area_zero.view(batch_size, N, 1).expand(batch_size, N, K), -1) 205 | 206 | elif anchors.dim() == 3: 207 | N = anchors.size(1) 208 | K = gt_boxes.size(1) 209 | 210 | if anchors.size(2) == 4: 211 | anchors = anchors[:,:,:4].contiguous() 212 | else: 213 | anchors = anchors[:,:,1:5].contiguous() 214 | 215 | gt_boxes = gt_boxes[:,:,:4].contiguous() 216 | 217 | gt_boxes_x = (gt_boxes[:,:,2] - gt_boxes[:,:,0] + 1) 218 | gt_boxes_y = (gt_boxes[:,:,3] - gt_boxes[:,:,1] + 1) 219 | gt_boxes_area = (gt_boxes_x * gt_boxes_y).view(batch_size, 1, K) 220 | 221 | anchors_boxes_x = (anchors[:,:,2] - anchors[:,:,0] + 1) 222 | anchors_boxes_y = (anchors[:,:,3] - anchors[:,:,1] + 1) 223 | anchors_area = (anchors_boxes_x * anchors_boxes_y).view(batch_size, N, 1) 224 | 225 | gt_area_zero = (gt_boxes_x == 1) & (gt_boxes_y == 1) 226 | anchors_area_zero = (anchors_boxes_x == 1) & (anchors_boxes_y == 1) 227 | 228 | boxes = anchors.view(batch_size, N, 1, 4).expand(batch_size, N, K, 4) 229 | query_boxes = gt_boxes.view(batch_size, 1, K, 4).expand(batch_size, N, K, 4) 230 | 231 | iw = (torch.min(boxes[:,:,:,2], query_boxes[:,:,:,2]) - 232 | torch.max(boxes[:,:,:,0], query_boxes[:,:,:,0]) + 1) 233 | iw[iw < 0] = 0 234 | 235 | ih = (torch.min(boxes[:,:,:,3], query_boxes[:,:,:,3]) - 236 | torch.max(boxes[:,:,:,1], query_boxes[:,:,:,1]) + 1) 237 | ih[ih < 0] = 0 238 | ua = anchors_area + gt_boxes_area - (iw * ih) 239 | 240 | overlaps = iw * ih / ua 241 | 242 | # mask the overlap here. 243 | overlaps.masked_fill_(gt_area_zero.view(batch_size, 1, K).expand(batch_size, N, K), 0) 244 | overlaps.masked_fill_(anchors_area_zero.view(batch_size, N, 1).expand(batch_size, N, K), -1) 245 | else: 246 | raise ValueError('anchors input dimension is not correct.') 247 | 248 | return overlaps 249 | -------------------------------------------------------------------------------- /lib/model/rpn/centernet_rpn.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | import torch 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | from model.utils.config import cfg 7 | from model.loss.losses import FocalLoss,RegL1Loss 8 | from torch.autograd import Variable 9 | from model.utils.cente_decode import ctdet_decode 10 | 11 | 12 | class _ProposalLayer(nn.Module): 13 | """ 14 | Outputs object detection proposals by applying estimated bounding-box 15 | transformations to a set of regular boxes (called "anchors"). 16 | """ 17 | 18 | def __init__(self, feat_stride): 19 | super(_ProposalLayer, self).__init__() 20 | 21 | self._feat_stride = feat_stride 22 | 23 | 24 | # rois blob: holds R regions of interest, each is a 5-tuple 25 | # (n, x1, y1, x2, y2) specifying an image batch index n and a 26 | # rectangle (x1, y1, x2, y2) 27 | # top[0].reshape(1, 5) 28 | # 29 | # # scores blob: holds scores for R regions of interest 30 | # if len(top) > 1: 31 | # top[1].reshape(1, 1, 1, 1) 32 | 33 | def forward(self, input): 34 | 35 | # Algorithm: 36 | # 37 | # for each (H, W) location i 38 | # generate A anchor boxes centered on cell i 39 | # apply predicted bbox deltas at cell i to each of the A anchors 40 | # clip predicted boxes to image 41 | # remove predicted boxes with either height or width < threshold 42 | # sort all (proposal, score) pairs by score from highest to lowest 43 | # take top pre_nms_topN proposals before NMS 44 | # apply NMS with threshold 0.7 to remaining proposals 45 | # take after_nms_topN proposals after NMS 46 | # return the top proposals (-> RoIs top, scores top) 47 | 48 | 49 | # the first set of _num_anchors channels are bg probs 50 | # the second set are the fg probs 51 | scores = input[0] 52 | wh_deltas = input[1] 53 | offset_deltas = input[2] 54 | im_info = input[3] 55 | cfg_key = input[4] 56 | 57 | 58 | 59 | post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N 60 | 61 | detections = ctdet_decode(scores,wh_deltas,offset_deltas,K=post_nms_topN) 62 | 63 | 64 | 65 | 66 | detections[:, :, :4] *= self._feat_stride 67 | batch_size = scores.size(0) 68 | 69 | 70 | 71 | output = scores.new(batch_size, post_nms_topN, 5).zero_() 72 | 73 | for i in range(batch_size): 74 | # # 3. remove predicted boxes with either height or width < threshold 75 | # # (NOTE: convert min_size to input image scale stored in im_info[2]) 76 | 77 | output[i,:,0] = i 78 | 79 | output[i,:,1:] = detections[i,:,:4] 80 | 81 | return output 82 | 83 | def backward(self, top, propagate_down, bottom): 84 | """This layer does not propagate gradients.""" 85 | pass 86 | 87 | def reshape(self, bottom, top): 88 | """Reshaping happens during the call to forward.""" 89 | pass 90 | 91 | def _filter_boxes(self, boxes, min_size): 92 | """Remove all boxes with any side smaller than min_size.""" 93 | ws = boxes[:, :, 2] - boxes[:, :, 0] + 1 94 | hs = boxes[:, :, 3] - boxes[:, :, 1] + 1 95 | keep = ((ws >= min_size.view(-1,1).expand_as(ws)) & (hs >= min_size.view(-1,1).expand_as(hs))) 96 | return keep 97 | 98 | 99 | 100 | 101 | class _RPN(nn.Module): 102 | """ region proposal network """ 103 | def __init__(self, din): 104 | super(_RPN, self).__init__() 105 | 106 | self.din = din # get depth of input feature map, e.g., 512 107 | 108 | self.feat_stride = cfg.FEAT_STRIDE 109 | 110 | 111 | self.RPN_hm_score = nn.Conv2d(self.din, 1 , 1, 1, 0) 112 | self.PRN_wh_score = nn.Conv2d(self.din, 2 , 1, 1, 0) 113 | self.PRN_offset_score = nn.Conv2d(self.din, 2 , 1, 1, 0) 114 | self.RPN_proposal = _ProposalLayer(self.feat_stride) 115 | 116 | self.crit = FocalLoss() 117 | # self.crit =torch.nn.MSELoss() 118 | self.crit_offset = RegL1Loss() 119 | self.crit_wh = RegL1Loss() 120 | 121 | self.rpn_loss_hm = 0 122 | self.rpn_loss_wh = 0 123 | self.rpn_loss_offset = 0 124 | 125 | 126 | @staticmethod 127 | def reshape(x, d): 128 | input_shape = x.size() 129 | x = x.view(input_shape[0], int(d), 130 | int(float(input_shape[1] * input_shape[2]) / float(d)), 131 | input_shape[3]) 132 | return x 133 | 134 | def forward(self, base_feat, im_info, gt_boxes, num_boxes,hm,reg_mask,wh,offset,ind): 135 | 136 | batch_size = base_feat.size(0) 137 | 138 | rpn_hm_score = self.RPN_hm_score(base_feat) 139 | rpn_cls_prob = F.sigmoid(rpn_hm_score) 140 | rpn_wh_pred = self.PRN_wh_score(base_feat) 141 | rpn_offset_pred = self.PRN_offset_score(base_feat) 142 | 143 | cfg_key = 'TRAIN' if self.training else 'TEST' 144 | 145 | 146 | 147 | self.rpn_loss_cls = 0 148 | self.rpn_loss_box = 0 149 | 150 | # generating training labels and build the rpn loss 151 | if self.training: 152 | assert gt_boxes is not None 153 | 154 | hm_loss = self.crit(rpn_cls_prob, hm) 155 | 156 | offset_loss = self.crit_offset(rpn_offset_pred, reg_mask, 157 | ind, offset) 158 | 159 | wh_loss = self.crit_wh(rpn_wh_pred, reg_mask, 160 | ind, wh) 161 | 162 | self.rpn_loss_cls = hm_loss + offset_loss 163 | self.rpn_loss_box = wh_loss 164 | 165 | rois = self.RPN_proposal( 166 | (rpn_cls_prob, rpn_wh_pred, rpn_offset_pred, im_info, cfg_key)) 167 | return rois, self.rpn_loss_cls, self.rpn_loss_box 168 | -------------------------------------------------------------------------------- /lib/model/rpn/generate_anchors.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import numpy as np 4 | import pdb 5 | 6 | # Verify that we compute the same anchors as Shaoqing's matlab implementation: 7 | # 8 | # >> load output/rpn_cachedir/faster_rcnn_VOC2007_ZF_stage1_rpn/anchors.mat 9 | # >> anchors 10 | # 11 | # anchors = 12 | # 13 | # -83 -39 100 56 14 | # -175 -87 192 104 15 | # -359 -183 376 200 16 | # -55 -55 72 72 17 | # -119 -119 136 136 18 | # -247 -247 264 264 19 | # -35 -79 52 96 20 | # -79 -167 96 184 21 | # -167 -343 184 360 22 | 23 | #array([[ -83., -39., 100., 56.], 24 | # [-175., -87., 192., 104.], 25 | # [-359., -183., 376., 200.], 26 | # [ -55., -55., 72., 72.], 27 | # [-119., -119., 136., 136.], 28 | # [-247., -247., 264., 264.], 29 | # [ -35., -79., 52., 96.], 30 | # [ -79., -167., 96., 184.], 31 | # [-167., -343., 184., 360.]]) 32 | 33 | try: 34 | xrange # Python 2 35 | except NameError: 36 | xrange = range # Python 3 37 | 38 | 39 | def generate_anchors(base_size=16, ratios=[0.5, 1, 2], 40 | scales=2**np.arange(3, 6)): 41 | """ 42 | Generate anchor (reference) windows by enumerating aspect ratios X 43 | scales wrt a reference (0, 0, 15, 15) window. 44 | """ 45 | 46 | base_anchor = np.array([1, 1, base_size, base_size]) - 1 47 | ratio_anchors = _ratio_enum(base_anchor, ratios) 48 | anchors = np.vstack([_scale_enum(ratio_anchors[i, :], scales) 49 | for i in xrange(ratio_anchors.shape[0])]) 50 | return anchors 51 | 52 | def _whctrs(anchor): 53 | """ 54 | Return width, height, x center, and y center for an anchor (window). 55 | """ 56 | 57 | w = anchor[2] - anchor[0] + 1 58 | h = anchor[3] - anchor[1] + 1 59 | x_ctr = anchor[0] + 0.5 * (w - 1) 60 | y_ctr = anchor[1] + 0.5 * (h - 1) 61 | return w, h, x_ctr, y_ctr 62 | 63 | def _mkanchors(ws, hs, x_ctr, y_ctr): 64 | """ 65 | Given a vector of widths (ws) and heights (hs) around a center 66 | (x_ctr, y_ctr), output a set of anchors (windows). 67 | """ 68 | 69 | ws = ws[:, np.newaxis] 70 | hs = hs[:, np.newaxis] 71 | anchors = np.hstack((x_ctr - 0.5 * (ws - 1), 72 | y_ctr - 0.5 * (hs - 1), 73 | x_ctr + 0.5 * (ws - 1), 74 | y_ctr + 0.5 * (hs - 1))) 75 | return anchors 76 | 77 | def _ratio_enum(anchor, ratios): 78 | """ 79 | Enumerate a set of anchors for each aspect ratio wrt an anchor. 80 | """ 81 | 82 | w, h, x_ctr, y_ctr = _whctrs(anchor) 83 | size = w * h 84 | size_ratios = size / ratios 85 | ws = np.round(np.sqrt(size_ratios)) 86 | hs = np.round(ws * ratios) 87 | anchors = _mkanchors(ws, hs, x_ctr, y_ctr) 88 | return anchors 89 | 90 | def _scale_enum(anchor, scales): 91 | """ 92 | Enumerate a set of anchors for each scale wrt an anchor. 93 | """ 94 | 95 | w, h, x_ctr, y_ctr = _whctrs(anchor) 96 | ws = w * scales 97 | hs = h * scales 98 | anchors = _mkanchors(ws, hs, x_ctr, y_ctr) 99 | return anchors 100 | 101 | if __name__ == '__main__': 102 | import time 103 | t = time.time() 104 | a = generate_anchors() 105 | print(time.time() - t) 106 | print(a) 107 | from IPython import embed; embed() 108 | -------------------------------------------------------------------------------- /lib/model/rpn/proposal_layer.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | # -------------------------------------------------------- 3 | # Faster R-CNN 4 | # Copyright (c) 2015 Microsoft 5 | # Licensed under The MIT License [see LICENSE for details] 6 | # Written by Ross Girshick and Sean Bell 7 | # -------------------------------------------------------- 8 | # -------------------------------------------------------- 9 | # Reorganized and modified by Jianwei Yang and Jiasen Lu 10 | # -------------------------------------------------------- 11 | 12 | import torch 13 | import torch.nn as nn 14 | import numpy as np 15 | import math 16 | import yaml 17 | from model.utils.config import cfg 18 | from .generate_anchors import generate_anchors 19 | from .bbox_transform import bbox_transform_inv, clip_boxes, clip_boxes_batch 20 | from torchvision.ops import nms 21 | from external.nms import soft_nms 22 | import pdb 23 | 24 | DEBUG = False 25 | 26 | class _ProposalLayer(nn.Module): 27 | """ 28 | Outputs object detection proposals by applying estimated bounding-box 29 | transformations to a set of regular boxes (called "anchors"). 30 | """ 31 | 32 | def __init__(self, feat_stride, scales, ratios): 33 | super(_ProposalLayer, self).__init__() 34 | 35 | self._feat_stride = feat_stride 36 | self._anchors = torch.from_numpy(generate_anchors(scales=np.array(scales), 37 | ratios=np.array(ratios))).float() 38 | self._num_anchors = self._anchors.size(0) 39 | 40 | # rois blob: holds R regions of interest, each is a 5-tuple 41 | # (n, x1, y1, x2, y2) specifying an image batch index n and a 42 | # rectangle (x1, y1, x2, y2) 43 | # top[0].reshape(1, 5) 44 | # 45 | # # scores blob: holds scores for R regions of interest 46 | # if len(top) > 1: 47 | # top[1].reshape(1, 1, 1, 1) 48 | 49 | def forward(self, input): 50 | 51 | # Algorithm: 52 | # 53 | # for each (H, W) location i 54 | # generate A anchor boxes centered on cell i 55 | # apply predicted bbox deltas at cell i to each of the A anchors 56 | # clip predicted boxes to image 57 | # remove predicted boxes with either height or width < threshold 58 | # sort all (proposal, score) pairs by score from highest to lowest 59 | # take top pre_nms_topN proposals before NMS 60 | # apply NMS with threshold 0.7 to remaining proposals 61 | # take after_nms_topN proposals after NMS 62 | # return the top proposals (-> RoIs top, scores top) 63 | 64 | 65 | # the first set of _num_anchors channels are bg probs 66 | # the second set are the fg probs 67 | scores = input[0][:, self._num_anchors:, :, :] 68 | bbox_deltas = input[1] 69 | im_info = input[2] 70 | cfg_key = input[3] 71 | 72 | pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N 73 | post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N 74 | nms_thresh = cfg[cfg_key].RPN_NMS_THRESH 75 | min_size = cfg[cfg_key].RPN_MIN_SIZE 76 | 77 | batch_size = bbox_deltas.size(0) 78 | 79 | feat_height, feat_width = scores.size(2), scores.size(3) 80 | shift_x = np.arange(0, feat_width) * self._feat_stride 81 | shift_y = np.arange(0, feat_height) * self._feat_stride 82 | shift_x, shift_y = np.meshgrid(shift_x, shift_y) 83 | shifts = torch.from_numpy(np.vstack((shift_x.ravel(), shift_y.ravel(), 84 | shift_x.ravel(), shift_y.ravel())).transpose()) 85 | shifts = shifts.contiguous().type_as(scores).float() 86 | 87 | A = self._num_anchors 88 | K = shifts.size(0) 89 | 90 | self._anchors = self._anchors.type_as(scores) 91 | # anchors = self._anchors.view(1, A, 4) + shifts.view(1, K, 4).permute(1, 0, 2).contiguous() 92 | anchors = self._anchors.view(1, A, 4) + shifts.view(K, 1, 4) 93 | anchors = anchors.view(1, K * A, 4).expand(batch_size, K * A, 4) 94 | 95 | # Transpose and reshape predicted bbox transformations to get them 96 | # into the same order as the anchors: 97 | 98 | bbox_deltas = bbox_deltas.permute(0, 2, 3, 1).contiguous() 99 | bbox_deltas = bbox_deltas.view(batch_size, -1, 4) 100 | 101 | # Same story for the scores: 102 | scores = scores.permute(0, 2, 3, 1).contiguous() 103 | scores = scores.view(batch_size, -1) 104 | 105 | # Convert anchors into proposals via bbox transformations 106 | proposals = bbox_transform_inv(anchors, bbox_deltas, batch_size) 107 | 108 | # 2. clip predicted boxes to image 109 | proposals = clip_boxes(proposals, im_info, batch_size) 110 | # proposals = clip_boxes_batch(proposals, im_info, batch_size) 111 | 112 | # assign the score to 0 if it's non keep. 113 | # keep = self._filter_boxes(proposals, min_size * im_info[:, 2]) 114 | 115 | # trim keep index to make it euqal over batch 116 | # keep_idx = torch.cat(tuple(keep_idx), 0) 117 | 118 | # scores_keep = scores.view(-1)[keep_idx].view(batch_size, trim_size) 119 | # proposals_keep = proposals.view(-1, 4)[keep_idx, :].contiguous().view(batch_size, trim_size, 4) 120 | 121 | # _, order = torch.sort(scores_keep, 1, True) 122 | 123 | scores_keep = scores 124 | proposals_keep = proposals 125 | _, order = torch.sort(scores_keep, 1, True) 126 | 127 | output = scores.new(batch_size, post_nms_topN, 5).zero_() 128 | for i in range(batch_size): 129 | # # 3. remove predicted boxes with either height or width < threshold 130 | # # (NOTE: convert min_size to input image scale stored in im_info[2]) 131 | proposals_single = proposals_keep[i] 132 | scores_single = scores_keep[i] 133 | 134 | # # 4. sort all (proposal, score) pairs by score from highest to lowest 135 | # # 5. take top pre_nms_topN (e.g. 6000) 136 | order_single = order[i] 137 | 138 | if pre_nms_topN > 0 and pre_nms_topN < scores_keep.numel(): 139 | order_single = order_single[:pre_nms_topN] 140 | 141 | proposals_single = proposals_single[order_single, :] 142 | scores_single = scores_single[order_single].view(-1,1) 143 | 144 | # 6. apply nms (e.g. threshold = 0.7) 145 | # 7. take after_nms_topN (e.g. 300) 146 | # 8. return the top proposals (-> RoIs top) 147 | keep_idx_i = nms(proposals_single, scores_single.squeeze(1), nms_thresh) 148 | 149 | # keep_idx_i = soft_nms(torch.cat((proposals_single, scores_single), 1).cpu().numpy(), Nt=0.5, method=2) 150 | # keep_idx_i = torch.as_tensor(keep_idx_i, dtype=torch.long) 151 | # 152 | # keep_idx_i = keep_idx_i.long().view(-1) 153 | 154 | if post_nms_topN > 0: 155 | keep_idx_i = keep_idx_i[:post_nms_topN] 156 | proposals_single = proposals_single[keep_idx_i, :] 157 | scores_single = scores_single[keep_idx_i, :] 158 | 159 | # padding 0 at the end. 160 | num_proposal = proposals_single.size(0) 161 | output[i,:,0] = i 162 | output[i,:num_proposal,1:] = proposals_single 163 | 164 | return output 165 | 166 | def backward(self, top, propagate_down, bottom): 167 | """This layer does not propagate gradients.""" 168 | pass 169 | 170 | def reshape(self, bottom, top): 171 | """Reshaping happens during the call to forward.""" 172 | pass 173 | 174 | def _filter_boxes(self, boxes, min_size): 175 | """Remove all boxes with any side smaller than min_size.""" 176 | ws = boxes[:, :, 2] - boxes[:, :, 0] + 1 177 | hs = boxes[:, :, 3] - boxes[:, :, 1] + 1 178 | keep = ((ws >= min_size.view(-1,1).expand_as(ws)) & (hs >= min_size.view(-1,1).expand_as(hs))) 179 | return keep 180 | -------------------------------------------------------------------------------- /lib/model/rpn/rpn.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | import torch 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | from model.utils.config import cfg 7 | from model.loss.losses import _smooth_l1_loss 8 | from torch.autograd import Variable 9 | 10 | from .anchor_target_layer import _AnchorTargetLayer 11 | from .proposal_layer import _ProposalLayer 12 | from model.loss.losses import hard_negative_mining 13 | 14 | 15 | 16 | 17 | class _RPN(nn.Module): 18 | """ region proposal network """ 19 | def __init__(self, din): 20 | super(_RPN, self).__init__() 21 | 22 | self.din = din # get depth of input feature map, e.g., 512 23 | self.anchor_scales = cfg.ANCHOR_SCALES 24 | self.anchor_ratios = cfg.ANCHOR_RATIOS 25 | self.feat_stride = cfg.FEAT_STRIDE 26 | 27 | # define the convrelu layers processing input feature map 28 | # self.RPN_Conv = nn.Conv2d(self.din, 512, 3, 1, 1, bias=True) 29 | 30 | # define bg/fg classifcation score layer 31 | self.nc_score_out = len(self.anchor_scales) * len( 32 | self.anchor_ratios) * 2 # 2(bg/fg) * 9 (anchors) 33 | 34 | self.RPN_cls_score = nn.Conv2d(self.din, self.nc_score_out, 1, 1, 0) 35 | 36 | # define anchor box offset prediction layer 37 | self.nc_bbox_out = len(self.anchor_scales) * len( 38 | self.anchor_ratios) * 4 # 4(coords) * 9 (anchors) 39 | self.RPN_bbox_pred = nn.Conv2d(self.din, self.nc_bbox_out, 1, 1, 0) 40 | 41 | # define proposal layer 42 | self.RPN_proposal = _ProposalLayer(self.feat_stride, 43 | self.anchor_scales, 44 | self.anchor_ratios) 45 | 46 | # define anchor target layer 47 | self.RPN_anchor_target = _AnchorTargetLayer(self.feat_stride, 48 | self.anchor_scales, 49 | self.anchor_ratios) 50 | 51 | self.rpn_loss_cls = 0 52 | self.rpn_loss_box = 0 53 | 54 | 55 | 56 | @staticmethod 57 | def reshape(x, d): 58 | input_shape = x.size() 59 | x = x.view(input_shape[0], int(d), 60 | int(float(input_shape[1] * input_shape[2]) / float(d)), 61 | input_shape[3]) 62 | return x 63 | 64 | def forward(self, base_feat, im_info, gt_boxes, num_boxes): 65 | batch_size = base_feat.size(0) 66 | 67 | # return feature map after convrelu layer 68 | # rpn_conv1 = F.relu(self.RPN_Conv(base_feat), inplace=True) 69 | # get rpn classification score 70 | rpn_cls_score = self.RPN_cls_score(base_feat) 71 | 72 | rpn_cls_score_reshape = self.reshape(rpn_cls_score, 2) 73 | rpn_cls_prob_reshape = F.softmax(rpn_cls_score_reshape, 1) 74 | rpn_cls_prob = self.reshape(rpn_cls_prob_reshape, self.nc_score_out) 75 | 76 | # get rpn offsets to the anchor boxes 77 | rpn_bbox_pred = self.RPN_bbox_pred(base_feat) 78 | 79 | # proposal layer 80 | cfg_key = 'TRAIN' if self.training else 'TEST' 81 | 82 | rois = self.RPN_proposal( 83 | (rpn_cls_prob.data, rpn_bbox_pred.data, im_info, cfg_key)) 84 | 85 | self.rpn_loss_cls = 0 86 | self.rpn_loss_box = 0 87 | 88 | # generating training labels and build the rpn loss 89 | if self.training: 90 | assert gt_boxes is not None 91 | 92 | rpn_data = self.RPN_anchor_target( 93 | (rpn_cls_score.data, gt_boxes, im_info, num_boxes)) 94 | 95 | # compute classification loss 96 | rpn_cls_score = rpn_cls_score_reshape.permute( 97 | 0, 2, 3, 1).contiguous().view(batch_size, -1, 2) 98 | rpn_label = rpn_data[0].view(batch_size, -1) 99 | 100 | rpn_keep = Variable(rpn_label.view(-1).ne(-1).nonzero().view(-1)) 101 | 102 | rpn_cls_score = torch.index_select(rpn_cls_score.view(-1, 2), 0, 103 | rpn_keep) 104 | 105 | rpn_label = torch.index_select(rpn_label.view(-1), 0, 106 | rpn_keep.data) 107 | rpn_label = Variable(rpn_label.long()) 108 | 109 | # from collections import Counter 110 | # label = rpn_label.cpu().numpy() 111 | # print(Counter(label)) 112 | 113 | loss = -F.log_softmax(rpn_cls_score, dim=1)[:, 0] 114 | mask ,num_pos = hard_negative_mining(loss, rpn_label) 115 | confidence = rpn_cls_score[mask, :] 116 | self.rpn_loss_cls = F.cross_entropy(confidence.reshape(-1, 2), rpn_label[mask], reduction='mean') 117 | 118 | # self.rpn_loss_cls = F.cross_entropy(rpn_cls_score, rpn_label) 119 | # self.rpn_loss_cls = OHEM_loss(rpn_cls_score, rpn_label) 120 | 121 | 122 | 123 | 124 | fg_cnt = torch.sum(rpn_label.data.ne(0)) 125 | 126 | rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights = rpn_data[ 127 | 1:] 128 | 129 | # compute bbox regression loss 130 | rpn_bbox_inside_weights = Variable(rpn_bbox_inside_weights) 131 | rpn_bbox_outside_weights = Variable(rpn_bbox_outside_weights) 132 | rpn_bbox_targets = Variable(rpn_bbox_targets) 133 | 134 | 135 | 136 | 137 | self.rpn_loss_box = _smooth_l1_loss(rpn_bbox_pred, 138 | rpn_bbox_targets, 139 | rpn_bbox_inside_weights, 140 | rpn_bbox_outside_weights, 141 | sigma=3, 142 | dim=[1, 2, 3], 143 | ) 144 | 145 | 146 | 147 | return rois, self.rpn_loss_cls, self.rpn_loss_box 148 | -------------------------------------------------------------------------------- /lib/model/utils/.gitignore: -------------------------------------------------------------------------------- 1 | *.c 2 | *.cpp 3 | *.so 4 | -------------------------------------------------------------------------------- /lib/model/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DayBreak-u/Thundernet_Pytorch/ac359d128a44e566ba5852a830c0a2154e10edb2/lib/model/utils/__init__.py -------------------------------------------------------------------------------- /lib/model/utils/bbox.pyx: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Sergey Karayev 6 | # -------------------------------------------------------- 7 | 8 | cimport cython 9 | import numpy as np 10 | cimport numpy as np 11 | 12 | DTYPE = np.float 13 | ctypedef np.float_t DTYPE_t 14 | 15 | def bbox_overlaps(np.ndarray[DTYPE_t, ndim=2] boxes, 16 | np.ndarray[DTYPE_t, ndim=2] query_boxes): 17 | return bbox_overlaps_c(boxes, query_boxes) 18 | 19 | cdef np.ndarray[DTYPE_t, ndim=2] bbox_overlaps_c( 20 | np.ndarray[DTYPE_t, ndim=2] boxes, 21 | np.ndarray[DTYPE_t, ndim=2] query_boxes): 22 | """ 23 | Parameters 24 | ---------- 25 | boxes: (N, 4) ndarray of float 26 | query_boxes: (K, 4) ndarray of float 27 | Returns 28 | ------- 29 | overlaps: (N, K) ndarray of overlap between boxes and query_boxes 30 | """ 31 | cdef unsigned int N = boxes.shape[0] 32 | cdef unsigned int K = query_boxes.shape[0] 33 | cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE) 34 | cdef DTYPE_t iw, ih, box_area 35 | cdef DTYPE_t ua 36 | cdef unsigned int k, n 37 | for k in range(K): 38 | box_area = ( 39 | (query_boxes[k, 2] - query_boxes[k, 0] + 1) * 40 | (query_boxes[k, 3] - query_boxes[k, 1] + 1) 41 | ) 42 | for n in range(N): 43 | iw = ( 44 | min(boxes[n, 2], query_boxes[k, 2]) - 45 | max(boxes[n, 0], query_boxes[k, 0]) + 1 46 | ) 47 | if iw > 0: 48 | ih = ( 49 | min(boxes[n, 3], query_boxes[k, 3]) - 50 | max(boxes[n, 1], query_boxes[k, 1]) + 1 51 | ) 52 | if ih > 0: 53 | ua = float( 54 | (boxes[n, 2] - boxes[n, 0] + 1) * 55 | (boxes[n, 3] - boxes[n, 1] + 1) + 56 | box_area - iw * ih 57 | ) 58 | overlaps[n, k] = iw * ih / ua 59 | return overlaps 60 | 61 | 62 | def bbox_intersections( 63 | np.ndarray[DTYPE_t, ndim=2] boxes, 64 | np.ndarray[DTYPE_t, ndim=2] query_boxes): 65 | return bbox_intersections_c(boxes, query_boxes) 66 | 67 | 68 | cdef np.ndarray[DTYPE_t, ndim=2] bbox_intersections_c( 69 | np.ndarray[DTYPE_t, ndim=2] boxes, 70 | np.ndarray[DTYPE_t, ndim=2] query_boxes): 71 | """ 72 | For each query box compute the intersection ratio covered by boxes 73 | ---------- 74 | Parameters 75 | ---------- 76 | boxes: (N, 4) ndarray of float 77 | query_boxes: (K, 4) ndarray of float 78 | Returns 79 | ------- 80 | overlaps: (N, K) ndarray of intersec between boxes and query_boxes 81 | """ 82 | cdef unsigned int N = boxes.shape[0] 83 | cdef unsigned int K = query_boxes.shape[0] 84 | cdef np.ndarray[DTYPE_t, ndim=2] intersec = np.zeros((N, K), dtype=DTYPE) 85 | cdef DTYPE_t iw, ih, box_area 86 | cdef DTYPE_t ua 87 | cdef unsigned int k, n 88 | for k in range(K): 89 | box_area = ( 90 | (query_boxes[k, 2] - query_boxes[k, 0] + 1) * 91 | (query_boxes[k, 3] - query_boxes[k, 1] + 1) 92 | ) 93 | for n in range(N): 94 | iw = ( 95 | min(boxes[n, 2], query_boxes[k, 2]) - 96 | max(boxes[n, 0], query_boxes[k, 0]) + 1 97 | ) 98 | if iw > 0: 99 | ih = ( 100 | min(boxes[n, 3], query_boxes[k, 3]) - 101 | max(boxes[n, 1], query_boxes[k, 1]) + 1 102 | ) 103 | if ih > 0: 104 | intersec[n, k] = iw * ih / box_area 105 | return intersec -------------------------------------------------------------------------------- /lib/model/utils/blob.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | """Blob helper functions.""" 8 | 9 | import numpy as np 10 | # from scipy.misc import imread, imresize 11 | import cv2 12 | 13 | try: 14 | xrange # Python 2 15 | except NameError: 16 | xrange = range # Python 3 17 | 18 | 19 | def im_list_to_blob(ims): 20 | """Convert a list of images into a network input. 21 | 22 | Assumes images are already prepared (means subtracted, BGR order, ...). 23 | """ 24 | max_shape = np.array([im.shape for im in ims]).max(axis=0) 25 | num_images = len(ims) 26 | blob = np.zeros((num_images, max_shape[0], max_shape[1], 3), 27 | dtype=np.float32) 28 | for i in xrange(num_images): 29 | im = ims[i] 30 | blob[i, 0:im.shape[0], 0:im.shape[1], :] = im 31 | 32 | return blob 33 | 34 | 35 | def prep_im_for_blob(im, pixel_means, target_size, max_size): 36 | """Mean subtract and scale an image for use in a blob.""" 37 | 38 | im = im.astype(np.float32, copy=False) 39 | im -= pixel_means 40 | # im = im[:, :, ::-1] 41 | im_shape = im.shape 42 | # print(target_size) 43 | # print(im_shape) 44 | im_size_min = np.min(im_shape[0:2]) 45 | im_size_max = np.max(im_shape[0:2]) 46 | im_scale = float(target_size) / float(im_size_min) 47 | # Prevent the biggest axis from being more than MAX_SIZE 48 | # if np.round(im_scale * im_size_max) > max_size: 49 | # im_scale = float(max_size) / float(im_size_max) 50 | # im = imresize(im, im_scale) 51 | im = cv2.resize(im, 52 | None, 53 | None, 54 | fx=im_scale, 55 | fy=im_scale, 56 | interpolation=cv2.INTER_LINEAR) 57 | 58 | return im, im_scale 59 | -------------------------------------------------------------------------------- /lib/model/utils/logger.py: -------------------------------------------------------------------------------- 1 | # Code referenced from https://gist.github.com/gyglim/1f8dfb1b5c82627ae3efcfbbadb9f514 2 | import tensorflow as tf 3 | import numpy as np 4 | import scipy.misc 5 | try: 6 | from StringIO import StringIO # Python 2.7 7 | except ImportError: 8 | from io import BytesIO # Python 3.x 9 | 10 | 11 | class Logger(object): 12 | def __init__(self, log_dir): 13 | """Create a summary writer logging to log_dir.""" 14 | self.writer = tf.summary.FileWriter(log_dir) 15 | 16 | def scalar_summary(self, tag, value, step): 17 | """Log a scalar variable.""" 18 | summary = tf.Summary( 19 | value=[tf.Summary.Value(tag=tag, simple_value=value)]) 20 | self.writer.add_summary(summary, step) 21 | 22 | def image_summary(self, tag, images, step): 23 | """Log a list of images.""" 24 | 25 | img_summaries = [] 26 | for i, img in enumerate(images): 27 | # Write the image to a string 28 | try: 29 | s = StringIO() 30 | except: 31 | s = BytesIO() 32 | scipy.misc.toimage(img).save(s, format="png") 33 | 34 | # Create an Image object 35 | img_sum = tf.Summary.Image(encoded_image_string=s.getvalue(), 36 | height=img.shape[0], 37 | width=img.shape[1]) 38 | # Create a Summary value 39 | img_summaries.append( 40 | tf.Summary.Value(tag='%s/%d' % (tag, i), image=img_sum)) 41 | 42 | # Create and write Summary 43 | summary = tf.Summary(value=img_summaries) 44 | self.writer.add_summary(summary, step) 45 | 46 | def histo_summary(self, tag, values, step, bins=1000): 47 | """Log a histogram of the tensor of values.""" 48 | 49 | # Create a histogram using numpy 50 | counts, bin_edges = np.histogram(values, bins=bins) 51 | 52 | # Fill the fields of the histogram proto 53 | hist = tf.HistogramProto() 54 | hist.min = float(np.min(values)) 55 | hist.max = float(np.max(values)) 56 | hist.num = int(np.prod(values.shape)) 57 | hist.sum = float(np.sum(values)) 58 | hist.sum_squares = float(np.sum(values**2)) 59 | 60 | # Drop the start of the first bin 61 | bin_edges = bin_edges[1:] 62 | 63 | # Add bin edges and counts 64 | for edge in bin_edges: 65 | hist.bucket_limit.append(edge) 66 | for c in counts: 67 | hist.bucket.append(c) 68 | 69 | # Create and write Summary 70 | summary = tf.Summary(value=[tf.Summary.Value(tag=tag, histo=hist)]) 71 | self.writer.add_summary(summary, step) 72 | self.writer.flush() 73 | -------------------------------------------------------------------------------- /lib/model/utils/net_utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from torch.autograd import Variable 5 | import numpy as np 6 | import torchvision.models as models 7 | from model.utils.config import cfg 8 | import cv2 9 | 10 | 11 | def save_net(fname, net): 12 | import h5py 13 | h5f = h5py.File(fname, mode='w') 14 | for k, v in net.state_dict().items(): 15 | h5f.create_dataset(k, data=v.cpu().numpy()) 16 | 17 | 18 | def load_net(fname, net): 19 | import h5py 20 | h5f = h5py.File(fname, mode='r') 21 | for k, v in net.state_dict().items(): 22 | param = torch.from_numpy(np.asarray(h5f[k])) 23 | v.copy_(param) 24 | 25 | 26 | def weights_normal_init(model, dev=0.01): 27 | if isinstance(model, list): 28 | for m in model: 29 | weights_normal_init(m, dev) 30 | else: 31 | for m in model.modules(): 32 | if isinstance(m, nn.Conv2d): 33 | m.weight.data.normal_(0.0, dev) 34 | elif isinstance(m, nn.Linear): 35 | m.weight.data.normal_(0.0, dev) 36 | 37 | 38 | def clip_gradient(model, clip_norm): 39 | """Computes a gradient clipping coefficient based on gradient norm.""" 40 | totalnorm = 0 41 | for p in model.parameters(): 42 | if p.requires_grad and p.grad is not None: 43 | modulenorm = p.grad.norm() 44 | totalnorm += modulenorm**2 45 | totalnorm = torch.sqrt(totalnorm).item() 46 | norm = (clip_norm / max(totalnorm, clip_norm)) 47 | for p in model.parameters(): 48 | if p.requires_grad and p.grad is not None: 49 | p.grad.mul_(norm) 50 | 51 | 52 | def vis_detections(img, class_name,c , dets, thresh=0.8): 53 | for i in range(np.minimum(10, dets.shape[0])): 54 | bbox = tuple(int(np.round(x)) for x in dets[i, :4]) 55 | score = dets[i, -1] 56 | if score < thresh: 57 | continue 58 | 59 | 60 | 61 | txt = '{}:{:.2f}'.format( class_name,score) 62 | font = cv2.FONT_HERSHEY_SIMPLEX 63 | cat_size = cv2.getTextSize(txt, font, 0.5, 2)[0] 64 | cv2.rectangle( 65 | img, (bbox[0], bbox[1]), (bbox[2], bbox[3]), c, 2) 66 | 67 | cv2.rectangle(img, 68 | (bbox[0], bbox[1] - cat_size[1] - 2), 69 | (bbox[0] + cat_size[0], bbox[1] - 2), c, -1) 70 | cv2.putText(img, txt, (bbox[0], bbox[1] - 2), 71 | font, 0.5, (0, 0, 0), thickness=1, lineType=cv2.LINE_AA) 72 | 73 | 74 | def adjust_learning_rate(optimizer, decay=0.1): 75 | """Sets the learning rate to the initial LR decayed by 0.5 every 20 epochs""" 76 | for param_group in optimizer.param_groups: 77 | param_group['lr'] = decay * param_group['lr'] 78 | 79 | 80 | def save_checkpoint(state, filename): 81 | torch.save(state, filename) 82 | 83 | 84 | 85 | 86 | def _crop_pool_layer(bottom, rois, max_pool=True): 87 | # code modified from 88 | # https://github.com/ruotianluo/pytorch-faster-rcnn 89 | # implement it using stn 90 | # box to affine 91 | # input (x1,y1,x2,y2) 92 | """ 93 | [ x2-x1 x1 + x2 - W + 1 ] 94 | [ ----- 0 --------------- ] 95 | [ W - 1 W - 1 ] 96 | [ ] 97 | [ y2-y1 y1 + y2 - H + 1 ] 98 | [ 0 ----- --------------- ] 99 | [ H - 1 H - 1 ] 100 | """ 101 | rois = rois.detach() 102 | batch_size = bottom.size(0) 103 | D = bottom.size(1) 104 | H = bottom.size(2) 105 | W = bottom.size(3) 106 | roi_per_batch = rois.size(0) / batch_size 107 | x1 = rois[:, 1::4] / 16.0 108 | y1 = rois[:, 2::4] / 16.0 109 | x2 = rois[:, 3::4] / 16.0 110 | y2 = rois[:, 4::4] / 16.0 111 | 112 | height = bottom.size(2) 113 | width = bottom.size(3) 114 | 115 | # affine theta 116 | zero = Variable(rois.data.new(rois.size(0), 1).zero_()) 117 | theta = torch.cat([\ 118 | (x2 - x1) / (width - 1), 119 | zero, 120 | (x1 + x2 - width + 1) / (width - 1), 121 | zero, 122 | (y2 - y1) / (height - 1), 123 | (y1 + y2 - height + 1) / (height - 1)], 1).view(-1, 2, 3) 124 | 125 | if max_pool: 126 | pre_pool_size = cfg.POOLING_SIZE * 2 127 | grid = F.affine_grid( 128 | theta, torch.Size((rois.size(0), 1, pre_pool_size, pre_pool_size))) 129 | bottom = bottom.view(1, batch_size, D, H, W).contiguous().expand(roi_per_batch, batch_size, D, H, W)\ 130 | .contiguous().view(-1, D, H, W) 131 | crops = F.grid_sample(bottom, grid) 132 | crops = F.max_pool2d(crops, 2, 2) 133 | else: 134 | grid = F.affine_grid( 135 | theta, 136 | torch.Size((rois.size(0), 1, cfg.POOLING_SIZE, cfg.POOLING_SIZE))) 137 | bottom = bottom.view(1, batch_size, D, H, W).contiguous().expand(roi_per_batch, batch_size, D, H, W)\ 138 | .contiguous().view(-1, D, H, W) 139 | crops = F.grid_sample(bottom, grid) 140 | 141 | return crops, grid 142 | 143 | 144 | def _affine_grid_gen(rois, input_size, grid_size): 145 | 146 | rois = rois.detach() 147 | x1 = rois[:, 1::4] / 16.0 148 | y1 = rois[:, 2::4] / 16.0 149 | x2 = rois[:, 3::4] / 16.0 150 | y2 = rois[:, 4::4] / 16.0 151 | 152 | height = input_size[0] 153 | width = input_size[1] 154 | 155 | zero = Variable(rois.data.new(rois.size(0), 1).zero_()) 156 | theta = torch.cat([\ 157 | (x2 - x1) / (width - 1), 158 | zero, 159 | (x1 + x2 - width + 1) / (width - 1), 160 | zero, 161 | (y2 - y1) / (height - 1), 162 | (y1 + y2 - height + 1) / (height - 1)], 1).view(-1, 2, 3) 163 | 164 | grid = F.affine_grid(theta, 165 | torch.Size((rois.size(0), 1, grid_size, grid_size))) 166 | 167 | return grid 168 | 169 | 170 | def _affine_theta(rois, input_size): 171 | 172 | rois = rois.detach() 173 | x1 = rois[:, 1::4] / 16.0 174 | y1 = rois[:, 2::4] / 16.0 175 | x2 = rois[:, 3::4] / 16.0 176 | y2 = rois[:, 4::4] / 16.0 177 | 178 | height = input_size[0] 179 | width = input_size[1] 180 | 181 | zero = Variable(rois.data.new(rois.size(0), 1).zero_()) 182 | 183 | # theta = torch.cat([\ 184 | # (x2 - x1) / (width - 1), 185 | # zero, 186 | # (x1 + x2 - width + 1) / (width - 1), 187 | # zero, 188 | # (y2 - y1) / (height - 1), 189 | # (y1 + y2 - height + 1) / (height - 1)], 1).view(-1, 2, 3) 190 | 191 | theta = torch.cat([\ 192 | (y2 - y1) / (height - 1), 193 | zero, 194 | (y1 + y2 - height + 1) / (height - 1), 195 | zero, 196 | (x2 - x1) / (width - 1), 197 | (x1 + x2 - width + 1) / (width - 1)], 1).view(-1, 2, 3) 198 | 199 | return theta 200 | -------------------------------------------------------------------------------- /lib/psroialign/PSROIAlign/.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | -------------------------------------------------------------------------------- /lib/psroialign/PSROIAlign/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Do Lin 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /lib/psroialign/PSROIAlign/README.md: -------------------------------------------------------------------------------- 1 | # PSROIAlign with multi-batch training support - PyTorch 2 | **Position-Sensitive-Region-of-Interests-Alignment** has been widely used throughout numerous well known deep object detectors, s.t. [R-FCN](https://arxiv.org/pdf/1605.06409.pdf), [LightHead R-CNN](https://arxiv.org/pdf/1711.07264.pdf), etc. However there are not that much implementations support ***multi-batch training*** in the world of PyTorch. With just one image per GPU, models would hardly be aware of the statistical information of the training data especially in cases that rarely one or two GPUs at hand. 3 | 4 | This CUDA based implementation fully supports multi-batch training, and can be easily integrated into your PyTorch object detectors. 5 | 6 | 7 | ## Prerequisite 8 | ``` 9 | python3 10 | pytorch >= 1.0 with CUDA support 11 | ``` 12 | 13 | 14 | ## Build the module 15 | ```bash 16 | sh build.sh 17 | ``` 18 | 19 | 20 | ## Use Case 21 | ```python 22 | import torch 23 | import torch.nn as nn 24 | from model.roi_layers import PSROIAlign 25 | ``` 26 | 27 | ```python 28 | class PSROIAlignExample(nn.Module): 29 | """ 30 | :spatial_scale: stride of the backbone 31 | :roi_size: output size of the pooled feature 32 | :sample_ratio: sample ratio of bilinear interpolation 33 | :pooled_dim: output channel of the pooled feature 34 | """ 35 | def __init__(self, 36 | spatial_scale=1./16., 37 | roi_size=7, 38 | sample_ratio=2, 39 | pooled_dim=10): 40 | 41 | super(PSROIAlignExample, self).__init__() 42 | self.psroialign = PSROIAlign(spatial_scale=spatial_scale, 43 | roi_size=roi_size, 44 | sampling_ratio=sample_ratio, 45 | pooled_dim=pooled_dim) 46 | 47 | def forward(self, feat, rois): 48 | return self.psroialign(feat, rois) 49 | ``` 50 | 51 | #### Feature Map to be pooled 52 | ```python 53 | batch_size = 4 54 | feat_height = 30 55 | feat_width = 40 56 | roi_size = 7 57 | oup_dim = 10 58 | 59 | feature = torch.randn((batch_size, 60 | roi_size * roi_size * oup_dim, 61 | feat_height, 62 | feat_width), 63 | requires_grad=True).cuda() 64 | ``` 65 | 66 | #### RoIs should be formatted as **(batch_index, x1, y1, x2, y2)** 67 | ```python 68 | rois = torch.tensor([ 69 | [0, 1., 1., 5., 5.], 70 | [0, 3., 3., 9., 9.], 71 | [1, 5., 5., 10., 10.], 72 | [1, 7., 7., 12., 12.] 73 | ]).cuda() 74 | ``` 75 | 76 | #### Essential Job 77 | ```python 78 | psroialign_pooled_feat = psroialign_example(feature, rois) 79 | ``` 80 | 81 | 82 | Play with ***example.py*** to get more details. 83 | 84 | 85 | ## License 86 | [MIT](LICENSE) -------------------------------------------------------------------------------- /lib/psroialign/PSROIAlign/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DayBreak-u/Thundernet_Pytorch/ac359d128a44e566ba5852a830c0a2154e10edb2/lib/psroialign/PSROIAlign/__init__.py -------------------------------------------------------------------------------- /lib/psroialign/PSROIAlign/build.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | rm -rf ./build/ ./model/_C* 4 | python setup.py build_ext --inplace -------------------------------------------------------------------------------- /lib/psroialign/PSROIAlign/build/lib.linux-x86_64-3.6/model/_C.cpython-36m-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DayBreak-u/Thundernet_Pytorch/ac359d128a44e566ba5852a830c0a2154e10edb2/lib/psroialign/PSROIAlign/build/lib.linux-x86_64-3.6/model/_C.cpython-36m-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /lib/psroialign/PSROIAlign/build/temp.linux-x86_64-3.6/mnt/data1/yanghuiyu/myself/object_detect/light_head_rcnn/psroialign/PSROIAlign/model/csrc/cuda/PSROIAlign_cuda.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DayBreak-u/Thundernet_Pytorch/ac359d128a44e566ba5852a830c0a2154e10edb2/lib/psroialign/PSROIAlign/build/temp.linux-x86_64-3.6/mnt/data1/yanghuiyu/myself/object_detect/light_head_rcnn/psroialign/PSROIAlign/model/csrc/cuda/PSROIAlign_cuda.o -------------------------------------------------------------------------------- /lib/psroialign/PSROIAlign/build/temp.linux-x86_64-3.6/mnt/data1/yanghuiyu/myself/object_detect/light_head_rcnn/psroialign/PSROIAlign/model/csrc/cuda/PSROIPool_cuda.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DayBreak-u/Thundernet_Pytorch/ac359d128a44e566ba5852a830c0a2154e10edb2/lib/psroialign/PSROIAlign/build/temp.linux-x86_64-3.6/mnt/data1/yanghuiyu/myself/object_detect/light_head_rcnn/psroialign/PSROIAlign/model/csrc/cuda/PSROIPool_cuda.o -------------------------------------------------------------------------------- /lib/psroialign/PSROIAlign/build/temp.linux-x86_64-3.6/mnt/data1/yanghuiyu/myself/object_detect/light_head_rcnn/psroialign/PSROIAlign/model/csrc/vision.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DayBreak-u/Thundernet_Pytorch/ac359d128a44e566ba5852a830c0a2154e10edb2/lib/psroialign/PSROIAlign/build/temp.linux-x86_64-3.6/mnt/data1/yanghuiyu/myself/object_detect/light_head_rcnn/psroialign/PSROIAlign/model/csrc/vision.o -------------------------------------------------------------------------------- /lib/psroialign/PSROIAlign/model/_C.cpython-36m-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DayBreak-u/Thundernet_Pytorch/ac359d128a44e566ba5852a830c0a2154e10edb2/lib/psroialign/PSROIAlign/model/_C.cpython-36m-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /lib/psroialign/PSROIAlign/model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DayBreak-u/Thundernet_Pytorch/ac359d128a44e566ba5852a830c0a2154e10edb2/lib/psroialign/PSROIAlign/model/__init__.py -------------------------------------------------------------------------------- /lib/psroialign/PSROIAlign/model/csrc/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DayBreak-u/Thundernet_Pytorch/ac359d128a44e566ba5852a830c0a2154e10edb2/lib/psroialign/PSROIAlign/model/csrc/.DS_Store -------------------------------------------------------------------------------- /lib/psroialign/PSROIAlign/model/csrc/PSROIAlign.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | 4 | #ifdef WITH_CUDA 5 | #include "cuda/vision.h" 6 | 7 | #define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ") 8 | #define CHECK_CONTIGUOUS(x) \ 9 | AT_CHECK(x.is_contiguous(), #x, " must be contiguous ") 10 | #define CHECK_INPUT(x) \ 11 | CHECK_CUDA(x); \ 12 | CHECK_CONTIGUOUS(x) 13 | 14 | extern THCState* state; 15 | 16 | #endif 17 | 18 | 19 | int PSROIAlign_forward( 20 | at::Tensor bottom_data, 21 | at::Tensor bottom_rois, 22 | at::Tensor top_data, 23 | at::Tensor argmax_data, 24 | float spatial_scale, 25 | int group_size, 26 | int sampling_ratio) { 27 | 28 | #ifdef WITH_CUDA 29 | CHECK_INPUT(bottom_data); 30 | CHECK_INPUT(bottom_rois); 31 | CHECK_INPUT(top_data); 32 | CHECK_INPUT(argmax_data); 33 | 34 | int size_rois = bottom_rois.size(1); 35 | 36 | if (size_rois != 5) { 37 | printf("wrong roi size. (roi size should be 5)\n"); 38 | return 0; 39 | } 40 | 41 | cudaStream_t stream = THCState_getCurrentStream(state); 42 | 43 | PSROIAlignForwardLaucher(bottom_data, 44 | bottom_rois, 45 | top_data, 46 | argmax_data, 47 | spatial_scale, 48 | group_size, 49 | sampling_ratio, 50 | stream); 51 | #endif 52 | return 1; 53 | } 54 | 55 | int PSROIAlign_backward( 56 | at::Tensor top_diff, 57 | at::Tensor argmax_data, 58 | at::Tensor bottom_rois, 59 | at::Tensor bottom_diff, 60 | float spatial_scale, 61 | int group_size, 62 | int sampling_ratio) { 63 | 64 | #ifdef WITH_CUDA 65 | CHECK_INPUT(top_diff); 66 | CHECK_INPUT(bottom_rois); 67 | CHECK_INPUT(bottom_diff); 68 | CHECK_INPUT(argmax_data); 69 | 70 | int size_rois = bottom_rois.size(1); 71 | 72 | if (size_rois != 5) { 73 | printf("wrong roi size. (roi size should be 5)\n"); 74 | return 0; 75 | } 76 | 77 | cudaStream_t stream = THCState_getCurrentStream(state); 78 | 79 | PSROIAlignBackwardLaucher(top_diff, 80 | argmax_data, 81 | bottom_rois, 82 | bottom_diff, 83 | spatial_scale, 84 | group_size, 85 | sampling_ratio, 86 | stream); 87 | #endif 88 | return 1; 89 | } 90 | -------------------------------------------------------------------------------- /lib/psroialign/PSROIAlign/model/csrc/PSROIPool.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #pragma once 3 | #include 4 | 5 | #ifdef WITH_CUDA 6 | #include "cuda/vision.h" 7 | 8 | #define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ") 9 | #define CHECK_CONTIGUOUS(x) \ 10 | AT_CHECK(x.is_contiguous(), #x, " must be contiguous ") 11 | #define CHECK_INPUT(x) \ 12 | CHECK_CUDA(x); \ 13 | CHECK_CONTIGUOUS(x) 14 | 15 | extern THCState* state; 16 | 17 | #endif 18 | 19 | 20 | int PSROIPool_forward(int pooled_height, 21 | int pooled_width, 22 | float spatial_scale, 23 | int group_size, 24 | int output_dim, 25 | at::Tensor features, 26 | at::Tensor rois, 27 | at::Tensor output, 28 | at::Tensor mappingchannel) { 29 | #ifdef WITH_CUDA 30 | CHECK_INPUT(features); 31 | CHECK_INPUT(rois); 32 | CHECK_INPUT(output); 33 | CHECK_INPUT(mappingchannel); 34 | 35 | // Get # of Rois 36 | int num_rois = rois.size(0); 37 | int size_rois = rois.size(1); 38 | if (size_rois != 5) { 39 | printf("wrong roi size\n"); 40 | return 0; 41 | } 42 | 43 | int data_height = features.size(2); 44 | int data_width = features.size(3); 45 | int num_channels = features.size(1); 46 | 47 | cudaStream_t stream = THCState_getCurrentStream(state); 48 | 49 | // call the gpu kernel for psroi_pooling 50 | PSROIPoolForwardLauncher(features, 51 | spatial_scale, 52 | num_rois, 53 | data_height, 54 | data_width, 55 | num_channels, 56 | pooled_height, 57 | pooled_width, 58 | rois, 59 | group_size, 60 | output_dim, 61 | output, 62 | mappingchannel, 63 | stream); 64 | #endif 65 | return 1; 66 | } 67 | 68 | 69 | int PSROIPool_backward(int pooled_height, 70 | int pooled_width, 71 | float spatial_scale, 72 | int output_dim, 73 | at::Tensor top_grad, 74 | at::Tensor rois, 75 | at::Tensor bottom_grad, 76 | at::Tensor mappingchannel) { 77 | #ifdef WITH_CUDA 78 | CHECK_INPUT(top_grad); 79 | CHECK_INPUT(rois); 80 | CHECK_INPUT(bottom_grad); 81 | CHECK_INPUT(mappingchannel); 82 | 83 | int batch_size = bottom_grad.size(0); 84 | 85 | // Number of ROIs 86 | int num_rois = rois.size(0); 87 | int size_rois = rois.size(1); 88 | if (size_rois != 5) { 89 | return 0; 90 | } 91 | 92 | // data height 93 | int data_height = bottom_grad.size(2); 94 | // data width 95 | int data_width = bottom_grad.size(3); 96 | // Number of channels 97 | int num_channels = bottom_grad.size(1); 98 | 99 | cudaStream_t stream = THCState_getCurrentStream(state); 100 | 101 | PSROIPoolBackwardLauncher(top_grad, 102 | mappingchannel, 103 | batch_size, 104 | num_rois, 105 | spatial_scale, 106 | num_channels, 107 | data_height, 108 | data_width, 109 | pooled_width, 110 | pooled_height, 111 | output_dim, 112 | bottom_grad, 113 | rois, 114 | stream); 115 | #endif 116 | return 1; 117 | } 118 | -------------------------------------------------------------------------------- /lib/psroialign/PSROIAlign/model/csrc/cuda/PSROIPool_cuda.cu: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #define CUDA_1D_KERNEL_LOOP(i, n) \ 10 | for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; \ 11 | i += blockDim.x * gridDim.x) 12 | 13 | 14 | __global__ void PSROIPoolForward( 15 | const int nthreads, // (B*K) * 10 * 7 * 7 16 | const float* __restrict__ bottom_data, // (B, 490, H, W) 17 | const float spatial_scale, // 1./16. 18 | const int height, // H 19 | const int width, // W 20 | const int channels, // 490 21 | const int pooled_height, // 7 22 | const int pooled_width, // 7 23 | const int group_size, // 7 24 | const int output_dim, // 10 25 | const float* __restrict__ bottom_rois, // (B*K, 5) 26 | float* __restrict__ top_data, // (B*K, 10, 7, 7) 27 | int* __restrict__ mapping_channel // (B*K, 10, 7, 7) 28 | ) { 29 | CUDA_1D_KERNEL_LOOP(index, nthreads) { 30 | /* (n, ctop, ph, pw) is an element in the pooled output. 31 | * Whole size is up to (B*K, 10, 7, 7), where 32 | * n is up to B*K, e.g. K = 128, 33 | * ctop is up to 10, 34 | * ph is up to 7 35 | * pw is up to 7 36 | */ 37 | int pw = index % pooled_width; 38 | int ph = (index / pooled_width) % pooled_height; 39 | int ctop = (index / pooled_width / pooled_height) % output_dim; 40 | int n = index / pooled_width / pooled_height / output_dim; 41 | 42 | bottom_rois += n * 5; 43 | int roi_batch_ind = bottom_rois[0]; 44 | float roi_start_w = static_cast(round(bottom_rois[1])) * spatial_scale; 45 | float roi_start_h = static_cast(round(bottom_rois[2])) * spatial_scale; 46 | float roi_end_w = static_cast(round(bottom_rois[3]) + 1.) * spatial_scale; 47 | float roi_end_h = static_cast(round(bottom_rois[4]) + 1.) * spatial_scale; 48 | 49 | // Force malformed ROIs to be 1x1 50 | float roi_width = max(roi_end_w - roi_start_w, 0.1); // avoid 0 51 | float roi_height = max(roi_end_h - roi_start_h, 0.1); 52 | 53 | float bin_size_h = (float)(roi_height) / (float)(pooled_height); 54 | float bin_size_w = (float)(roi_width) / (float)(pooled_width); 55 | 56 | int hstart = floor(static_cast(ph) * bin_size_h + roi_start_h); 57 | int wstart = floor(static_cast(pw)* bin_size_w + roi_start_w); 58 | int hend = ceil(static_cast(ph + 1) * bin_size_h + roi_start_h); 59 | int wend = ceil(static_cast(pw + 1) * bin_size_w + roi_start_w); 60 | 61 | // Add roi offsets and clip to input boundaries 62 | hstart = min(max(hstart, 0), height); 63 | hend = min(max(hend, 0), height); 64 | wstart = min(max(wstart, 0), width); 65 | wend = min(max(wend, 0), width); 66 | bool is_empty = (hend <= hstart) || (wend <= wstart); 67 | 68 | int gw = pw; 69 | int gh = ph; 70 | int c = (ctop * group_size + gh) * group_size + gw; 71 | 72 | bottom_data += (roi_batch_ind * channels + c) * height * width; 73 | float out_sum = 0; 74 | for (int h = hstart; h < hend; ++h) { 75 | for (int w = wstart; w < wend; ++w) { 76 | int bottom_index = h * width + w; 77 | out_sum += bottom_data[bottom_index]; 78 | } 79 | } 80 | float bin_area = (hend - hstart) * (wend - wstart); 81 | top_data[index] = is_empty ? 0. : out_sum / bin_area; 82 | mapping_channel[index] = c; 83 | } 84 | } 85 | 86 | 87 | int PSROIPoolForwardLauncher( 88 | at::Tensor bottom_data, // (B, 490, H, W) 89 | const float spatial_scale, // 1./16. 90 | const int num_rois, // B*K, K = 128 91 | const int height, // H 92 | const int width, // W 93 | const int channels, // 490 94 | const int pooled_height, // 7 95 | const int pooled_width, // 7 96 | at::Tensor bottom_rois, // (B*K, 5) 97 | const int group_size, // 7 98 | const int output_dim, // 10 99 | at::Tensor top_data, // (B*K, 10, 7, 7) 100 | at::Tensor mapping_channel, // (B*K, 10, 7, 7) 101 | cudaStream_t stream 102 | ) { 103 | 104 | const int kThreadsPerBlock = 1024; 105 | const int output_size = output_dim * pooled_height * pooled_width * num_rois; 106 | 107 | PSROIPoolForward<<<(output_size + kThreadsPerBlock - 1) / kThreadsPerBlock, kThreadsPerBlock, 0, stream>>>( 108 | output_size, 109 | bottom_data.data(), 110 | spatial_scale, 111 | height, 112 | width, 113 | channels, 114 | pooled_height, 115 | pooled_width, 116 | group_size, 117 | output_dim, 118 | bottom_rois.data(), 119 | top_data.data(), 120 | mapping_channel.data()); 121 | 122 | cudaError_t err = cudaGetLastError(); 123 | if(cudaSuccess != err) { 124 | fprintf( stderr, "cudaCheckError() failed : %s\n", cudaGetErrorString( err ) ); 125 | exit( -1 ); 126 | } 127 | return 1; 128 | } 129 | 130 | 131 | __global__ void PSROIPoolBackward(const int nthreads, const float* __restrict__ top_diff, 132 | const int* __restrict__ mapping_channel, const int num_rois, const float spatial_scale, 133 | const int height, const int width, const int channels, 134 | const int pooled_height, const int pooled_width, const int output_dim, float* __restrict__ bottom_diff, 135 | const float* __restrict__ bottom_rois) { 136 | CUDA_1D_KERNEL_LOOP(index, nthreads) 137 | { 138 | 139 | int pw = index % pooled_width; 140 | int ph = (index / pooled_width) % pooled_height; 141 | int n = index / pooled_width / pooled_height / output_dim; 142 | 143 | // [start, end) interval for spatial sampling 144 | bottom_rois += n * 5; 145 | int roi_batch_ind = bottom_rois[0]; 146 | float roi_start_w = 147 | static_cast(round(bottom_rois[1])) * spatial_scale; 148 | float roi_start_h = 149 | static_cast(round(bottom_rois[2])) * spatial_scale; 150 | float roi_end_w = 151 | static_cast(round(bottom_rois[3]) + 1.) * spatial_scale; 152 | float roi_end_h = 153 | static_cast(round(bottom_rois[4]) + 1.) * spatial_scale; 154 | 155 | // Force too small ROIs to be 1x1 156 | float roi_width = max(roi_end_w - roi_start_w, 0.1); // avoid 0 157 | float roi_height = max(roi_end_h - roi_start_h, 0.1); 158 | 159 | // Compute w and h at bottom 160 | float bin_size_h = roi_height / static_cast(pooled_height); 161 | float bin_size_w = roi_width / static_cast(pooled_width); 162 | 163 | int hstart = floor(static_cast(ph)* bin_size_h 164 | + roi_start_h); 165 | int wstart = floor(static_cast(pw)* bin_size_w 166 | + roi_start_w); 167 | int hend = ceil(static_cast(ph + 1) * bin_size_h 168 | + roi_start_h); 169 | int wend = ceil(static_cast(pw + 1) * bin_size_w 170 | + roi_start_w); 171 | // Add roi offsets and clip to input boundaries 172 | hstart = min(max(hstart, 0), height); 173 | hend = min(max(hend, 0), height); 174 | wstart = min(max(wstart, 0), width); 175 | wend = min(max(wend, 0), width); 176 | bool is_empty = (hend <= hstart) || (wend <= wstart); 177 | 178 | // Compute c at bottom 179 | int c = mapping_channel[index]; 180 | float* offset_bottom_diff = bottom_diff + 181 | (roi_batch_ind * channels + c) * height * width; 182 | float bin_area = (hend - hstart)*(wend - wstart); 183 | float diff_val = is_empty ? 0. : top_diff[index] / bin_area; 184 | for (int h = hstart; h < hend; ++h) { 185 | for (int w = wstart; w < wend; ++w) { 186 | int bottom_index = h*width + w; 187 | //caffe_gpu_atomic_add(diff_val, offset_bottom_diff + bottom_index); 188 | atomicAdd(offset_bottom_diff + bottom_index, diff_val); 189 | } 190 | } 191 | } 192 | } 193 | 194 | int PSROIPoolBackwardLauncher(at::Tensor top_diff, 195 | at::Tensor mapping_channel, 196 | const int batch_size, 197 | const int num_rois, 198 | const float spatial_scale, 199 | const int channels, 200 | const int height, 201 | const int width, 202 | const int pooled_width, 203 | const int pooled_height, 204 | const int output_dim, 205 | at::Tensor bottom_diff, 206 | at::Tensor bottom_rois, 207 | cudaStream_t stream) { 208 | 209 | const int kThreadsPerBlock = 1024; 210 | //const int output_size = output_dim * height * width * channels; 211 | const int output_size = output_dim * pooled_height * pooled_width * num_rois; 212 | 213 | PSROIPoolBackward<<<(output_size + kThreadsPerBlock - 1) / kThreadsPerBlock, kThreadsPerBlock, 0, stream>>>( 214 | output_size, 215 | top_diff.data(), 216 | mapping_channel.data(), 217 | num_rois, 218 | spatial_scale, 219 | height, 220 | width, 221 | channels, 222 | pooled_height, 223 | pooled_width, 224 | output_dim, 225 | bottom_diff.data(), 226 | bottom_rois.data()); 227 | 228 | cudaError_t err = cudaGetLastError(); 229 | if(cudaSuccess != err) { 230 | fprintf( stderr, "cudaCheckError() failed : %s\n", cudaGetErrorString( err ) ); 231 | exit( -1 ); 232 | } 233 | 234 | return 1; 235 | } 236 | -------------------------------------------------------------------------------- /lib/psroialign/PSROIAlign/model/csrc/cuda/vision.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #pragma once 3 | #include 4 | #include 5 | 6 | 7 | int PSROIPoolForwardLauncher(at::Tensor bottom_data, 8 | const float spatial_scale, 9 | const int num_rois, 10 | const int height, 11 | const int width, 12 | const int channels, 13 | const int pooled_height, 14 | const int pooled_width, 15 | at::Tensor bottom_rois, 16 | const int group_size, 17 | const int output_dim, 18 | at::Tensor top_data, 19 | at::Tensor mapping_channel, 20 | cudaStream_t stream); 21 | 22 | 23 | int PSROIPoolBackwardLauncher(at::Tensor top_diff, 24 | at::Tensor mapping_channel, 25 | const int batch_size, 26 | const int num_rois, 27 | const float spatial_scale, 28 | const int channels, 29 | const int height, 30 | const int width, 31 | const int pooled_width, 32 | const int pooled_height, 33 | const int output_dim, 34 | at::Tensor bottom_diff, 35 | at::Tensor bottom_rois, 36 | cudaStream_t stream); 37 | 38 | 39 | int PSROIAlignForwardLaucher(at::Tensor bottom_data, 40 | at::Tensor bottom_rois, 41 | at::Tensor top_data, 42 | at::Tensor argmax_data, 43 | float spatial_scale, 44 | int group_size, 45 | int sampling_ratio, 46 | cudaStream_t stream); 47 | 48 | 49 | int PSROIAlignBackwardLaucher(at::Tensor top_diff, 50 | at::Tensor argmax_data, 51 | at::Tensor bottom_rois, 52 | at::Tensor bottom_diff, 53 | float spatial_scale, 54 | int group_size, 55 | int sampling_ratio, 56 | cudaStream_t stream); -------------------------------------------------------------------------------- /lib/psroialign/PSROIAlign/model/csrc/vision.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #include "PSROIAlign.h" 3 | #include "PSROIPool.h" 4 | 5 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 6 | m.def("ps_roi_align_forward", &PSROIAlign_forward, "PSROIAlign_forward"); 7 | m.def("ps_roi_align_backward", &PSROIAlign_backward, "PSROIAlign_backward"); 8 | m.def("ps_roi_pool_forward", &PSROIPool_forward, "PSROIPool_forward"); 9 | m.def("ps_roi_pool_backward", &PSROIPool_backward, "PSROIPool_backward"); 10 | } 11 | -------------------------------------------------------------------------------- /lib/psroialign/PSROIAlign/model/example.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from roi_layers import PSROIAlign 4 | 5 | # 6 | # class PSROIPoolExample(nn.Module): 7 | # def __init__(self, 8 | # pooled_height=7, 9 | # pooled_width=7, 10 | # spatial_scale=1./16., 11 | # group_size=7, 12 | # output_dim=10): 13 | # 14 | # super(PSROIPoolExample, self).__init__() 15 | # self.psroipool = PSROIPool(pooled_height=pooled_height, 16 | # pooled_width=pooled_width, 17 | # spatial_scale=spatial_scale, 18 | # group_size=group_size, 19 | # output_dim=output_dim) 20 | # 21 | # def forward(self, feat, rois): 22 | # print("PSROIPool:") 23 | # print(f"feature.shape:\t{feat.shape}") 24 | # print(f"rois.shape:\t{rois.shape}") 25 | # pooled_feat = self.psroipool(feat, rois) 26 | # print(f"pooled feature: {pooled_feat.shape}\n{pooled_feat}\n") 27 | # return pooled_feat 28 | 29 | 30 | class PSROIAlignExample(nn.Module): 31 | def __init__(self, 32 | spatial_scale=1./16., 33 | roi_size=7, 34 | sample_ratio=2, 35 | pooled_dim=10): 36 | 37 | super(PSROIAlignExample, self).__init__() 38 | self.psroialign = PSROIAlign(spatial_scale=spatial_scale, 39 | roi_size=roi_size, 40 | sampling_ratio=sample_ratio, 41 | pooled_dim=pooled_dim) 42 | 43 | def forward(self, feat, rois): 44 | print("PSROIAlign:") 45 | print(f"feature.shape:\t{feat.shape}") 46 | print(f"rois.shape:\t{rois.shape}") 47 | pooled_feat = self.psroialign(feat, rois) 48 | print(f"pooled feature: {pooled_feat.shape}\n{pooled_feat}\n") 49 | return pooled_feat 50 | 51 | 52 | if __name__ == '__main__': 53 | if not torch.cuda.is_available(): 54 | exit('Only works with cuda') 55 | 56 | # psroipool_example = PSROIPoolExample() 57 | psroialign_example = PSROIAlignExample() 58 | 59 | # feature map to be pooled 60 | batch_size = 4 61 | feat_height = 30 62 | feat_width = 40 63 | roi_size = 7 64 | oup_dim = 10 65 | 66 | feature = torch.randn((batch_size, 67 | roi_size * roi_size * oup_dim, 68 | feat_height, 69 | feat_width), 70 | requires_grad=True).cuda() 71 | 72 | # RoI: (batch_index, x1, y1, x2, y2) 73 | rois = torch.tensor([ 74 | [0, 1., 1., 5., 5.], 75 | [0, 3., 3., 9., 9.], 76 | [1, 5., 5., 10., 10.], 77 | [1, 7., 7., 12., 12.] 78 | ]).cuda() 79 | 80 | # PSROIPool and PSROIAlign 81 | # psroipool_pooled_feat = psroipool_example(feature, rois) 82 | psroialign_pooled_feat = psroialign_example(feature, rois) 83 | -------------------------------------------------------------------------------- /lib/psroialign/PSROIAlign/model/roi_layers/__init__.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from .ps_roi_pool import ps_roi_pool, PSROIPool 3 | from .ps_roi_align import ps_roi_align, PSROIAlign 4 | 5 | __all__ = [ 6 | "ps_roi_pool", 7 | "PSROIPool", 8 | "ps_roi_align", 9 | "PSROIAlign" 10 | ] 11 | 12 | -------------------------------------------------------------------------------- /lib/psroialign/PSROIAlign/model/roi_layers/ps_roi_align.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | from torch.autograd import Function 3 | from torch.autograd.function import once_differentiable 4 | import torch 5 | import sys 6 | sys.path.insert(0,"/mnt/data1/yanghuiyu/myself/object_detect/thundernetbylightheadrcnn/lib") 7 | from psroialign.PSROIAlign.model import _C 8 | 9 | 10 | class _PSROIAlign(Function): 11 | @staticmethod 12 | def forward(ctx, bottom_data, bottom_rois, spatial_scale, roi_size, sampling_ratio, pooled_dim): 13 | ctx.spatial_scale = spatial_scale # 1./16. 14 | ctx.roi_size = roi_size # 7 15 | ctx.sampling_ratio = sampling_ratio # 2 16 | ctx.pooled_dim = pooled_dim # 10 17 | ctx.feature_size = bottom_data.size() # (B, 490, H, W) 18 | num_rois = bottom_rois.size(0) # B*K 19 | # (B*K, 10, 7, 7) 20 | top_data = torch.zeros([num_rois, pooled_dim, roi_size, roi_size], dtype=torch.float32).to(bottom_data.device) 21 | # (B*K, 10, 7, 7) 22 | argmax_data = torch.zeros([num_rois, pooled_dim, roi_size, roi_size], dtype=torch.int32).to(bottom_data.device) 23 | if bottom_data.is_cuda: 24 | _C.ps_roi_align_forward(bottom_data, # (B, 490, H, W) 25 | bottom_rois, # (B*K, 5), e.g. K = 128 26 | top_data, # (B*K, 10, 7, 7) 27 | argmax_data, # (B*K, 10, 7, 7) 28 | spatial_scale, # 1./16. 29 | roi_size, # 7 30 | sampling_ratio # 2 31 | ) 32 | ctx.save_for_backward(bottom_rois, argmax_data) 33 | else: 34 | raise NotImplementedError 35 | 36 | return top_data 37 | 38 | @staticmethod 39 | @once_differentiable 40 | def backward(ctx, top_diff): 41 | spatial_scale = ctx.spatial_scale # 1./16. 42 | roi_size = ctx.roi_size # 7 43 | sampling_ratio = ctx.sampling_ratio # 2 44 | batch_size, channels, height, width = ctx.feature_size 45 | [bottom_rois, argmax_data] = ctx.saved_tensors 46 | bottom_diff = None 47 | if ctx.needs_input_grad[0]: 48 | bottom_diff = torch.zeros([batch_size, channels, height, width], dtype=torch.float32).to(top_diff.device) 49 | _C.ps_roi_align_backward(top_diff, # (B*K, 10, 7, 7) 50 | argmax_data, # (B*K, 10, 7, 7) 51 | bottom_rois, # (B*K, 10, 7, 7) 52 | bottom_diff, # (B, 490, H, W) 53 | spatial_scale, # 1./16. 54 | roi_size, # 7 55 | sampling_ratio # 2 56 | ) 57 | 58 | return bottom_diff, None, None, None, None, None 59 | 60 | 61 | ps_roi_align = _PSROIAlign.apply 62 | 63 | 64 | class PSROIAlign(nn.Module): 65 | def __init__(self, spatial_scale, roi_size, sampling_ratio, pooled_dim): 66 | super(PSROIAlign, self).__init__() 67 | self.spatial_scale = spatial_scale 68 | self.roi_size = roi_size 69 | self.sampling_ratio = sampling_ratio 70 | self.pooled_dim = pooled_dim 71 | 72 | def forward(self, bottom_data, bottom_rois): 73 | return ps_roi_align(bottom_data, # (B, 490, H, W) 74 | bottom_rois, # (B*K, 5) 75 | self.spatial_scale, # 1./16. 76 | self.roi_size, # 7 77 | self.sampling_ratio, # 2 78 | self.pooled_dim # 10 79 | ) 80 | 81 | def __repr__(self): 82 | tmpstr = self.__class__.__name__ + "(" 83 | tmpstr += "spatial_scale=" + str(self.spatial_scale) 84 | tmpstr += ", roi_size=" + str(self.roi_size) 85 | tmpstr += ", sampling_ratio=" + str(self.sampling_ratio) 86 | tmpstr += ", pooled_dim=" + str(self.pooled_dim) 87 | tmpstr += ")" 88 | return tmpstr 89 | -------------------------------------------------------------------------------- /lib/psroialign/PSROIAlign/model/roi_layers/ps_roi_pool.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | from torch.autograd import Function 4 | from torch.autograd.function import once_differentiable 5 | from psroialign.PSROIAlign.model import _C 6 | 7 | 8 | class _PSROIPool(Function): 9 | @staticmethod 10 | def forward(ctx, features, rois, pooled_height, pooled_width, spatial_scale, group_size, output_dim): 11 | ctx.pooled_height = int(pooled_height) 12 | ctx.pooled_width = int(pooled_width) 13 | ctx.spatial_scale = float(spatial_scale) 14 | ctx.group_size = int(group_size) 15 | ctx.output_dim = int(output_dim) 16 | num_rois = rois.size()[0] 17 | output = torch.zeros(num_rois, ctx.output_dim, ctx.pooled_height, ctx.pooled_width).to(features.device) 18 | mappingchannel = torch.IntTensor(num_rois, ctx.output_dim, ctx.pooled_height, ctx.pooled_width).zero_().to(features.device) 19 | _C.ps_roi_pool_forward(ctx.pooled_height, 20 | ctx.pooled_width, 21 | ctx.spatial_scale, 22 | ctx.group_size, 23 | ctx.output_dim, 24 | features, 25 | rois, 26 | output, 27 | mappingchannel) 28 | ctx.save_for_backward(rois, mappingchannel) 29 | # ctx.output = output 30 | # ctx.mappingchannel = mappingchannel 31 | # ctx.rois = rois 32 | ctx.feature_size = features.size() 33 | 34 | return output 35 | 36 | @staticmethod 37 | @once_differentiable 38 | def backward(ctx, grad_output): 39 | assert(ctx.feature_size is not None and grad_output.is_cuda) 40 | batch_size, num_channels, data_height, data_width = ctx.feature_size 41 | [rois, mappingchannel] = ctx.saved_tensors 42 | grad_input = None 43 | if ctx.needs_input_grad[0]: 44 | grad_input = torch.zeros(batch_size, num_channels, data_height, data_width).to(grad_output.device) 45 | _C.ps_roi_pool_backward(ctx.pooled_height, 46 | ctx.pooled_width, 47 | ctx.spatial_scale, 48 | ctx.output_dim, 49 | grad_output, 50 | rois, 51 | grad_input, 52 | mappingchannel) 53 | return grad_input, None, None, None, None, None, None 54 | 55 | 56 | ps_roi_pool = _PSROIPool.apply 57 | 58 | 59 | class PSROIPool(nn.Module): 60 | def __init__(self, pooled_height, pooled_width, spatial_scale, group_size, output_dim): 61 | super(PSROIPool, self).__init__() 62 | 63 | self.pooled_width = int(pooled_width) 64 | self.pooled_height = int(pooled_height) 65 | self.spatial_scale = float(spatial_scale) 66 | self.group_size = int(group_size) 67 | self.output_dim = int(output_dim) 68 | 69 | def forward(self, features, rois): 70 | return ps_roi_pool(features, 71 | rois, 72 | self.pooled_height, 73 | self.pooled_width, 74 | self.spatial_scale, 75 | self.group_size, 76 | self.output_dim) 77 | 78 | def __repr__(self): 79 | tmpstr = self.__class__.__name__ + "(" 80 | tmpstr += "pooled_width=" + str(self.pooled_width) 81 | tmpstr += ", pooled_height=" + str(self.pooled_height) 82 | tmpstr += ", spatial_scale=" + str(self.spatial_scale) 83 | tmpstr += ", group_size=" + str(self.group_size) 84 | tmpstr += ", output_dim=" + str(self.output_dim) 85 | tmpstr += ")" 86 | return tmpstr 87 | -------------------------------------------------------------------------------- /lib/psroialign/PSROIAlign/setup.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #!/usr/bin/env python 3 | 4 | import glob 5 | import os 6 | import platform 7 | 8 | import torch 9 | from setuptools import find_packages 10 | from setuptools import setup 11 | from torch.utils.cpp_extension import CUDA_HOME 12 | from torch.utils.cpp_extension import CppExtension 13 | from torch.utils.cpp_extension import CUDAExtension 14 | 15 | requirements = ["torch", "torchvision"] 16 | 17 | 18 | # 19 | # if torch.cuda.is_available(): 20 | # print('Including CUDA code.') 21 | # sources += ['src/psroi_pooling_cuda.c'] 22 | # headers += ['src/psroi_pooling_cuda.h'] 23 | # defines += [('WITH_CUDA', None)] 24 | # with_cuda = True 25 | # 26 | # this_file = os.path.dirname(os.path.realpath(__file__)) 27 | # print(this_file) 28 | # extra_objects = ['src/cuda/psroi_pooling.cu.o'] 29 | # extra_objects = [os.path.join(this_file, fname) for fname in extra_objects] 30 | # 31 | # ffi = create_extension( 32 | # '_ext.psroi_pooling', 33 | # headers=headers, 34 | # sources=sources, 35 | # define_macros=defines, 36 | # relative_to=__file__, 37 | # with_cuda=with_cuda, 38 | # extra_objects=extra_objects 39 | # ) 40 | # 41 | # if __name__ == '__main__': 42 | # ffi.build() 43 | 44 | 45 | def get_extensions(): 46 | this_dir = os.path.dirname(os.path.abspath(__file__)) 47 | extensions_dir = os.path.join(this_dir, "model", "csrc") 48 | 49 | main_file = glob.glob(os.path.join(extensions_dir, "*.cpp")) 50 | source_cuda = glob.glob(os.path.join(extensions_dir, "cuda", "*.cu")) 51 | 52 | sources = main_file 53 | extension = CppExtension 54 | cxx_flags = [] 55 | if platform.system() == 'Darwin': 56 | cxx_flags = ["-g", "-stdlib=libc++", "-std=c++11", "-mmacosx-version-min=10.9"] 57 | platform.release() 58 | 59 | extra_compile_args = {"cxx": cxx_flags} 60 | define_macros = [] 61 | 62 | if torch.cuda.is_available() and CUDA_HOME is not None: 63 | extension = CUDAExtension 64 | sources += source_cuda 65 | define_macros += [("WITH_CUDA", None)] 66 | extra_compile_args["nvcc"] = [ 67 | "-DCUDA_HAS_FP16=1", 68 | "-D__CUDA_NO_HALF_OPERATORS__", 69 | "-D__CUDA_NO_HALF_CONVERSIONS__", 70 | "-D__CUDA_NO_HALF2_OPERATORS__", 71 | ] 72 | 73 | sources = [os.path.join(extensions_dir, s) for s in sources] 74 | 75 | include_dirs = [extensions_dir] 76 | 77 | ext_modules = [ 78 | extension( 79 | "model._C", 80 | sources=sources, 81 | include_dirs=include_dirs, 82 | define_macros=define_macros, 83 | extra_compile_args=extra_compile_args, 84 | ) 85 | ] 86 | 87 | return ext_modules 88 | 89 | 90 | setup( 91 | name="psroialign", 92 | version="1.0.0", 93 | description="psroialign with pytorch 1.x", 94 | author="Do Lin", 95 | author_email="mcdooooo@gmail.com", 96 | license="MIT", 97 | packages=find_packages(exclude=("configs", "tests",)), 98 | # install_requires=requirements, 99 | ext_modules=get_extensions(), 100 | cmdclass={"build_ext": torch.utils.cpp_extension.BuildExtension}, 101 | ) 102 | -------------------------------------------------------------------------------- /lib/psroialign/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DayBreak-u/Thundernet_Pytorch/ac359d128a44e566ba5852a830c0a2154e10edb2/lib/psroialign/__init__.py -------------------------------------------------------------------------------- /lib/psroialign/pollers.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | from torch import nn 4 | 5 | 6 | from versions.psroialign.psroialign import PSROIAlignhandle , PSROIPoolhandle 7 | from config import Configs 8 | 9 | CEM_FILTER = Configs.get("CEM_FILTER") 10 | spatial_scale = Configs.get("spatial_scale") 11 | 12 | 13 | 14 | 15 | 16 | class PsRoIAlign(nn.Module): 17 | """ 18 | Multi-scale RoIAlign pooling, which is useful for detection with or without FPN. 19 | 20 | It infers the scale of the pooling via the heuristics present in the FPN paper. 21 | 22 | Arguments: 23 | featmap_names (List[str]): the names of the feature maps that will be used 24 | for the pooling. 25 | output_size (List[Tuple[int, int]] or List[int]): output size for the pooled region 26 | sampling_ratio (int): sampling ratio for ROIAlign 27 | 28 | Examples:: 29 | 30 | """ 31 | 32 | def __init__(self, output_size, sampling_ratio): 33 | super(PsRoIAlign, self).__init__() 34 | if isinstance(output_size, int): 35 | output_size = (output_size, output_size) 36 | 37 | self.sampling_ratio = sampling_ratio 38 | self.output_size = tuple(output_size) 39 | self.scales = spatial_scale 40 | 41 | 42 | def convert_to_roi_format(self, boxes): 43 | concat_boxes = torch.cat(boxes, dim=0) 44 | device, dtype = concat_boxes.device, concat_boxes.dtype 45 | ids = torch.cat( 46 | [ 47 | torch.full((len(b), 1), i, dtype=dtype, device=device) 48 | for i, b in enumerate(boxes) 49 | ], 50 | dim=0, 51 | ) 52 | rois = torch.cat([ids, concat_boxes], dim=1) 53 | return rois 54 | 55 | 56 | def forward(self, x, boxes, image_shapes): 57 | """ 58 | Arguments: 59 | x (OrderedDict[Tensor]): feature maps for each level. They are assumed to have 60 | all the same number of channels, but they can have different sizes. 61 | boxes (List[Tensor[N, 4]]): boxes to be used to perform the pooling operation, in 62 | (x1, y1, x2, y2) format and in the image reference size, not the feature map 63 | reference. 64 | image_shapes (List[Tuple[height, width]]): the sizes of each image before they 65 | have been fed to a CNN to obtain feature maps. This allows us to infer the 66 | scale factor for each one of the levels to be pooled. 67 | Returns: 68 | result (Tensor) 69 | """ 70 | 71 | 72 | rois = self.convert_to_roi_format(boxes) 73 | 74 | 75 | roi_align = PSROIAlignhandle(sampling_ratio=self.sampling_ratio, spatial_scale=self.scales, roi_size=7, 76 | pooled_dim=CEM_FILTER//(7*7)) 77 | 78 | 79 | return roi_align( 80 | x, rois 81 | ) 82 | 83 | 84 | -------------------------------------------------------------------------------- /lib/psroialign/psroialign.py: -------------------------------------------------------------------------------- 1 | from psroialign.PSROIAlign.model.roi_layers import PSROIAlign,PSROIPool 2 | from torch import nn 3 | 4 | class PSROIAlignhandle(nn.Module): 5 | def __init__(self, 6 | spatial_scale=1./16., 7 | roi_size=7, 8 | sampling_ratio=2, 9 | pooled_dim=5): 10 | 11 | super(PSROIAlignhandle, self).__init__() 12 | self.psroialign = PSROIAlign(spatial_scale=spatial_scale, 13 | roi_size=roi_size, 14 | sampling_ratio=sampling_ratio, 15 | pooled_dim=pooled_dim) 16 | 17 | def forward(self, feat, rois): 18 | # print(feat.shape) 19 | pooled_feat = self.psroialign(feat, rois) 20 | 21 | return pooled_feat 22 | 23 | 24 | 25 | class PSROIPoolhandle(nn.Module): 26 | def __init__(self, 27 | pooled_height=7, 28 | pooled_width=7, 29 | spatial_scale=1./16., 30 | group_size=7, 31 | output_dim=5): 32 | 33 | super(PSROIPoolhandle, self).__init__() 34 | self.psroipool = PSROIPool(pooled_height=pooled_height, 35 | pooled_width=pooled_width, 36 | spatial_scale=spatial_scale, 37 | group_size=group_size, 38 | output_dim=output_dim) 39 | 40 | def forward(self, feat, rois): 41 | pooled_feat = self.psroipool(feat, rois) 42 | return pooled_feat -------------------------------------------------------------------------------- /lib/roi_data_layer/__init__.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | -------------------------------------------------------------------------------- /lib/roi_data_layer/minibatch.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick, Xinlei Chen, Lichao Wang 6 | # -------------------------------------------------------- 7 | """Compute minibatch blobs for training a Fast R-CNN network.""" 8 | from __future__ import absolute_import 9 | from __future__ import division 10 | from __future__ import print_function 11 | 12 | import numpy as np 13 | import numpy.random as npr 14 | # from scipy.misc import imread 15 | import imageio # ImportError: cannot import name 'imread' from 'scipy.misc' 16 | from model.utils.config import cfg 17 | from model.utils.blob import prep_im_for_blob, im_list_to_blob 18 | import pdb 19 | 20 | 21 | def get_minibatch(roidb, num_classes): 22 | """Given a roidb, construct a minibatch sampled from it.""" 23 | num_images = len(roidb) 24 | 25 | # Sample random scales to use for each image in this batch 26 | random_scale_inds = npr.randint(0, 27 | high=len(cfg.TRAIN.SCALES), 28 | size=num_images) 29 | assert(cfg.TRAIN.BATCH_SIZE % num_images == 0), \ 30 | 'num_images ({}) must divide BATCH_SIZE ({})'. \ 31 | format(num_images, cfg.TRAIN.BATCH_SIZE) 32 | 33 | # Get the input image blob, formatted for caffe 34 | im_blob, im_scales = _get_image_blob(roidb, random_scale_inds) 35 | 36 | blobs = {'data': im_blob} 37 | 38 | assert len(im_scales) == 1, "Single batch only" 39 | assert len(roidb) == 1, "Single batch only" 40 | 41 | # gt boxes: (x1, y1, x2, y2, cls) 42 | if cfg.TRAIN.USE_ALL_GT: 43 | # Include all ground truth boxes 44 | gt_inds = np.where(roidb[0]['gt_classes'] != 0)[0] 45 | else: 46 | # For the COCO ground truth boxes, exclude the ones that are ''iscrowd'' 47 | gt_inds = np.where( 48 | (roidb[0]['gt_classes'] != 0) 49 | & np.all(roidb[0]['gt_overlaps'].toarray() > -1.0, axis=1))[0] 50 | gt_boxes = np.empty((len(gt_inds), 5), dtype=np.float32) 51 | gt_boxes[:, 0:4] = roidb[0]['boxes'][gt_inds, :] * im_scales[0] 52 | gt_boxes[:, 4] = roidb[0]['gt_classes'][gt_inds] 53 | blobs['gt_boxes'] = gt_boxes 54 | blobs['im_info'] = np.array( 55 | [[im_blob.shape[1], im_blob.shape[2], im_scales[0]]], dtype=np.float32) 56 | 57 | blobs['img_id'] = roidb[0]['img_id'] 58 | 59 | return blobs 60 | 61 | 62 | def _get_image_blob(roidb, scale_inds): 63 | """Builds an input blob from the images in the roidb at the specified 64 | scales. 65 | """ 66 | num_images = len(roidb) 67 | 68 | processed_ims = [] 69 | im_scales = [] 70 | for i in range(num_images): 71 | #im = cv2.imread(roidb[i]['image']) 72 | # im = imread(roidb[i]['image']) 73 | im = imageio.imread(roidb[i]['image']) # ImportError: cannot import name 'imread' from 'scipy.misc' 74 | 75 | if len(im.shape) == 2: 76 | im = im[:, :, np.newaxis] 77 | im = np.concatenate((im, im, im), axis=2) 78 | # flip the channel, since the original one using cv2 79 | # rgb -> bgr 80 | im = im[:, :, ::-1] 81 | 82 | if roidb[i]['flipped']: 83 | im = im[:, ::-1, :] 84 | # if roidb[i]['ver_flipped']: 85 | # im = im[::-1, :, :] 86 | target_size = cfg.TRAIN.SCALES[scale_inds[i]] 87 | im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size, 88 | cfg.TRAIN.MAX_SIZE) 89 | im_scales.append(im_scale) 90 | processed_ims.append(im) 91 | 92 | # Create a blob to hold the input images 93 | blob = im_list_to_blob(processed_ims) 94 | 95 | return blob, im_scales 96 | -------------------------------------------------------------------------------- /lib/roi_data_layer/roibatchLoader.py: -------------------------------------------------------------------------------- 1 | import os 2 | import os.path as osp 3 | import sys 4 | import torch 5 | import torch.utils.data as data 6 | import torchvision.transforms as transforms 7 | import cv2 8 | import numpy as np 9 | from roi_data_layer.augmentation import SSDAugmentation 10 | import model.utils.config as config 11 | from roi_data_layer.utils import BaseTransform 12 | 13 | cfg = config.cfg 14 | 15 | class Detection(data.Dataset): 16 | """`MS Coco Detection `_ Dataset. 17 | Args: 18 | root (string): Root directory where images are downloaded to. 19 | set_name (string): Name of the specific set of COCO images. 20 | transform (callable, optional): A function/transform that augments the 21 | raw images` 22 | target_transform (callable, optional): A function/transform that takes 23 | in the target (bbox) and transforms it. 24 | """ 25 | 26 | def __init__(self, roidb, num_classes, training=True,transform=None): 27 | self._roidb = roidb 28 | self.training = training 29 | self.transform = transform 30 | self.num_classes = num_classes 31 | self.max_num_box = cfg.MAX_NUM_GT_BOXES 32 | 33 | 34 | 35 | def __len__(self): 36 | return len(self._roidb) 37 | 38 | def __getitem__(self, index): 39 | """ 40 | Args: 41 | index (int): Index 42 | Returns: 43 | tuple: Tuple (image, target, height, width). 44 | target is the object returned by ``coco.loadAnns``. 45 | """ 46 | 47 | if self.training: 48 | index , size = index 49 | self.transform = SSDAugmentation(size, cfg.PIXEL_MEANS) 50 | else: 51 | size = cfg.TEST.SIZE 52 | 53 | roidb = self._roidb[index] 54 | im = cv2.imread(roidb['image']) 55 | if len(im.shape) == 2: 56 | im = im[:, :, np.newaxis] 57 | im = np.concatenate((im, im, im), axis=2) 58 | # flip the channel, since the original one using cv2 59 | 60 | if roidb['flipped']: 61 | im = im[:, ::-1, :] 62 | height, width = im.shape[0], im.shape[1] 63 | 64 | boxes = roidb['boxes'] 65 | gt_classes = roidb['gt_classes'] 66 | 67 | 68 | 69 | boxes_all = [] 70 | for b,class_gt in zip(boxes,gt_classes): 71 | boxes_all.append([b[0]/width,b[1]/height,b[2]/width,b[3]/height,class_gt]) 72 | 73 | 74 | 75 | target = np.array(boxes_all) 76 | 77 | target_re = np.zeros([self.max_num_box,5]) 78 | 79 | if self.transform is not None: 80 | 81 | img, boxes, labels = self.transform(im, target[:,:4], 82 | target[:,4]) 83 | 84 | img = img.transpose(2, 0, 1) 85 | number_box = 0 86 | for box in boxes: 87 | if number_box>=20: 88 | break 89 | target_re[number_box] = np.array([box[0]*size ,box[1]*size,box[2]*size,box[3]*size,labels[number_box]]) 90 | 91 | number_box+=1 92 | # target = np.hstack((boxes, np.expand_dims(labels, axis=1))) 93 | # img_id, img, gt_boxes_padding, img_info, num_gt_boxes 94 | 95 | data = torch.as_tensor(img, dtype=torch.float32) 96 | im_info = torch.from_numpy(np.array([img.shape[1], img.shape[2], size/width ,size/height ])) 97 | im_info = im_info.view(4) 98 | gt_boxes = torch.as_tensor(target_re, dtype=torch.int16) 99 | 100 | 101 | if self.training: 102 | return data, im_info, gt_boxes, number_box 103 | else: 104 | 105 | # im_info = np.array([[im.shape[1], im.shape[2], ratio]], dtype=np.float32) 106 | im_info = np.array([[img.shape[1], img.shape[2], size/width ,size/height]], dtype=np.float32) 107 | im_info = torch.as_tensor(im_info, dtype=torch.float32) 108 | im_info = im_info.view(4) 109 | gt_boxes = torch.FloatTensor([1, 1, 1, 1, 1]) 110 | 111 | return data, im_info, gt_boxes, number_box 112 | 113 | 114 | -------------------------------------------------------------------------------- /lib/roi_data_layer/roidb.py: -------------------------------------------------------------------------------- 1 | """Transform a roidb into a trainable roidb by adding a bunch of metadata.""" 2 | from __future__ import absolute_import 3 | from __future__ import division 4 | from __future__ import print_function 5 | 6 | import datasets 7 | import numpy as np 8 | from model.utils.config import cfg 9 | from datasets.factory import get_imdb 10 | import PIL 11 | import pdb 12 | 13 | 14 | def prepare_roidb(imdb): 15 | """Enrich the imdb's roidb by adding some derived quantities that 16 | are useful for training. This function precomputes the maximum 17 | overlap, taken over ground-truth boxes, between each ROI and 18 | each ground-truth box. The class with maximum overlap is also 19 | recorded. 20 | """ 21 | 22 | roidb = imdb.roidb 23 | if not (imdb.name.startswith('coco')): 24 | sizes = [ 25 | PIL.Image.open(imdb.image_path_at(i)).size 26 | for i in range(imdb.num_images) 27 | ] 28 | 29 | for i in range(len(imdb.image_index)): 30 | roidb[i]['img_id'] = imdb.image_id_at(i) 31 | roidb[i]['image'] = imdb.image_path_at(i) 32 | if not (imdb.name.startswith('coco')): 33 | roidb[i]['width'] = sizes[i][0] 34 | roidb[i]['height'] = sizes[i][1] 35 | # need gt_overlaps as a dense array for argmax 36 | gt_overlaps = roidb[i]['gt_overlaps'].toarray() 37 | # max overlap with gt over classes (columns) 38 | max_overlaps = gt_overlaps.max(axis=1) 39 | # gt class that had the max overlap 40 | max_classes = gt_overlaps.argmax(axis=1) 41 | roidb[i]['max_classes'] = max_classes 42 | roidb[i]['max_overlaps'] = max_overlaps 43 | # sanity checks 44 | # max overlap of 0 => class should be zero (background) 45 | zero_inds = np.where(max_overlaps == 0)[0] 46 | assert all(max_classes[zero_inds] == 0) 47 | # max overlap > 0 => class should not be zero (must be a fg class) 48 | nonzero_inds = np.where(max_overlaps > 0)[0] 49 | assert all(max_classes[nonzero_inds] != 0) 50 | 51 | 52 | def rank_roidb_ratio(roidb): 53 | # rank roidb based on the ratio between width and height. 54 | ratio_large = 2.0 # largest ratio to preserve. 55 | ratio_small = 0.5 # smallest ratio to preserve. 56 | 57 | ratio_list = [] 58 | for i in range(len(roidb)): 59 | width = roidb[i]['width'] 60 | height = roidb[i]['height'] 61 | ratio = width / float(height) 62 | 63 | if cfg.TRAIN.ASPECT_CROPPING: 64 | if ratio > ratio_large: 65 | roidb[i]['need_crop'] = 1 66 | ratio = ratio_large 67 | elif ratio < ratio_small: 68 | roidb[i]['need_crop'] = 1 69 | ratio = ratio_small 70 | else: 71 | roidb[i]['need_crop'] = 0 72 | else: 73 | roidb[i]['need_crop'] = 0 74 | 75 | ratio_list.append(ratio) 76 | 77 | ratio_list = np.array(ratio_list) 78 | ratio_index = np.argsort(ratio_list) 79 | return ratio_list[ratio_index], ratio_index 80 | 81 | 82 | def filter_roidb(roidb): 83 | # filter the image without bounding box. 84 | print('before filtering, there are %d images...' % (len(roidb))) 85 | i = 0 86 | while i < len(roidb): 87 | if len(roidb[i]['boxes']) == 0: 88 | del roidb[i] 89 | i -= 1 90 | i += 1 91 | 92 | print('after filtering, there are %d images...' % (len(roidb))) 93 | return roidb 94 | 95 | 96 | def combined_roidb(imdb_names, training=True): 97 | """ 98 | Combine multiple roidbs 99 | """ 100 | 101 | def get_training_roidb(imdb): 102 | """Returns a roidb (Region of Interest database) for use in training.""" 103 | if cfg.TRAIN.USE_FLIPPED: 104 | print('Appending horizontally-flipped training examples...') 105 | imdb.append_flipped_images() 106 | print('done') 107 | 108 | if cfg.TRAIN.USE_VER_FLIPPED: 109 | print('Appending vertically-flipped training examples...') 110 | imdb.append_vertical_flipped_images() 111 | print('done') 112 | 113 | print('Preparing training data...') 114 | 115 | prepare_roidb(imdb) 116 | # ratio_index = rank_roidb_ratio(imdb) 117 | print('done') 118 | 119 | return imdb.roidb 120 | 121 | def get_roidb(imdb_name): 122 | imdb = get_imdb(imdb_name) 123 | print('Loaded dataset `{:s}` for training'.format(imdb.name)) 124 | imdb.set_proposal_method(cfg.TRAIN.PROPOSAL_METHOD) 125 | print('Set proposal method: {:s}'.format(cfg.TRAIN.PROPOSAL_METHOD)) 126 | roidb = get_training_roidb(imdb) 127 | return roidb 128 | 129 | roidbs = [get_roidb(s) for s in imdb_names.split('+')] 130 | roidb = roidbs[0] 131 | 132 | if len(roidbs) > 1: 133 | for r in roidbs[1:]: 134 | roidb.extend(r) 135 | tmp = get_imdb(imdb_names.split('+')[1]) 136 | imdb = datasets.imdb.imdb(imdb_names, tmp.classes) 137 | else: 138 | imdb = get_imdb(imdb_names) 139 | 140 | if training: 141 | roidb = filter_roidb(roidb) 142 | return imdb, roidb 143 | else: 144 | ratio_list, ratio_index = rank_roidb_ratio(roidb) 145 | 146 | return imdb, roidb, ratio_list, ratio_index 147 | 148 | -------------------------------------------------------------------------------- /lib/roi_data_layer/utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import numpy as np 6 | import cv2 7 | import random 8 | import torch 9 | 10 | def flip(img): 11 | return img[:, :, ::-1].copy() 12 | 13 | 14 | 15 | 16 | 17 | def base_transform(image, size, mean): 18 | x = cv2.resize(image, (size, size)).astype(np.float32) 19 | x -= mean 20 | x = x.astype(np.float32) 21 | return x 22 | 23 | 24 | class BaseTransform: 25 | def __init__(self, size, mean): 26 | self.size = size 27 | self.mean = np.array(mean, dtype=np.float32) 28 | 29 | def __call__(self, image, boxes=None, labels=None): 30 | return base_transform(image, self.size, self.mean), boxes, labels 31 | 32 | -------------------------------------------------------------------------------- /lib/setup.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #!/usr/bin/env python 3 | 4 | import glob 5 | import os 6 | 7 | import torch 8 | from setuptools import find_packages 9 | from setuptools import setup 10 | from torch.utils.cpp_extension import CUDA_HOME 11 | from torch.utils.cpp_extension import CppExtension 12 | from torch.utils.cpp_extension import CUDAExtension 13 | 14 | requirements = ["torch", "torchvision"] 15 | 16 | 17 | def get_extensions(): 18 | this_dir = os.path.dirname(os.path.abspath(__file__)) 19 | extensions_dir = os.path.join(this_dir, "model", "csrc") 20 | 21 | main_file = glob.glob(os.path.join(extensions_dir, "*.cpp")) 22 | source_cpu = glob.glob(os.path.join(extensions_dir, "cpu", "*.cpp")) 23 | source_cuda = glob.glob(os.path.join(extensions_dir, "cuda", "*.cu")) 24 | 25 | sources = main_file + source_cpu 26 | extension = CppExtension 27 | 28 | extra_compile_args = {"cxx": []} 29 | define_macros = [] 30 | 31 | if torch.cuda.is_available() and CUDA_HOME is not None: 32 | extension = CUDAExtension 33 | sources += source_cuda 34 | define_macros += [("WITH_CUDA", None)] 35 | extra_compile_args["nvcc"] = [ 36 | "-DCUDA_HAS_FP16=1", 37 | "-D__CUDA_NO_HALF_OPERATORS__", 38 | "-D__CUDA_NO_HALF_CONVERSIONS__", 39 | "-D__CUDA_NO_HALF2_OPERATORS__", 40 | ] 41 | 42 | sources = [os.path.join(extensions_dir, s) for s in sources] 43 | 44 | include_dirs = [extensions_dir] 45 | 46 | ext_modules = [ 47 | extension( 48 | "model._C", 49 | sources, 50 | include_dirs=include_dirs, 51 | define_macros=define_macros, 52 | extra_compile_args=extra_compile_args, 53 | ) 54 | ] 55 | 56 | return ext_modules 57 | 58 | 59 | setup( 60 | name="faster_rcnn", 61 | version="0.1", 62 | description="object detection in pytorch", 63 | packages=find_packages(exclude=("configs", "tests",)), 64 | # install_requires=requirements, 65 | ext_modules=get_extensions(), 66 | cmdclass={"build_ext": torch.utils.cpp_extension.BuildExtension}, 67 | ) 68 | -------------------------------------------------------------------------------- /onnx/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DayBreak-u/Thundernet_Pytorch/ac359d128a44e566ba5852a830c0a2154e10edb2/onnx/__init__.py -------------------------------------------------------------------------------- /onnx/onnx_infer.py: -------------------------------------------------------------------------------- 1 | """ 2 | This code uses the onnx model to detect faces from live video or cameras. 3 | """ 4 | import os,sys 5 | import time 6 | 7 | import cv2 8 | import numpy as np 9 | import onnx 10 | 11 | 12 | from caffe2.python.onnx import backend 13 | 14 | # onnx runtime 15 | import onnxruntime as ort 16 | 17 | onnx_path = "thundernet146_rpn.onnx" 18 | 19 | 20 | predictor = onnx.load(onnx_path) 21 | onnx.checker.check_model(predictor) 22 | onnx.helper.printable_graph(predictor.graph) 23 | predictor = backend.prepare(predictor, device="CPU") # default CPU 24 | 25 | ort_session = ort.InferenceSession(onnx_path) 26 | input_name = ort_session.get_inputs()[0].name 27 | result_path = "./result" 28 | 29 | threshold = 0.7 30 | # path = "/mnt/data1/yanghuiyu/dlmodel/Fd/Face-Detector-1MB-with-landmark/images/input" 31 | path = "/mnt/data1/yanghuiyu/project/object_detect/Thundernet_new/voc_images/input/2008_000179.jpg" 32 | sum = 0 33 | if not os.path.exists(result_path): 34 | os.makedirs(result_path) 35 | # listdir = os.listdir(path) 36 | sum = 0 37 | # for file_path in listdir: 38 | img_path = os.path.join(path, path) 39 | orig_image = cv2.imread(img_path) 40 | print(orig_image.shape) 41 | image = cv2.resize(orig_image, (320, 320)) 42 | # image = image/255.0 43 | # image = cv2.resize(image, (640, 480)) 44 | 45 | # mean = np.array([0.40789654, 0.44719302, 0.47026115], 46 | # dtype=np.float32).reshape(1, 1, 3) 47 | # std = np.array([0.28863828, 0.27408164, 0.27809835], 48 | # dtype=np.float32).reshape(1, 1, 3) 49 | 50 | # print(image) 51 | mean = np.array([[[0.485 * 255, 0.456 * 255, 0.406 * 255]]]) 52 | 53 | image = (image - mean) 54 | image = np.transpose(image, [2, 0, 1]) 55 | image = np.expand_dims(image, axis=0) 56 | image = image.astype(np.float32) 57 | 58 | # confidences, boxes = predictor.run(image) 59 | time_time = time.time() 60 | # boxes , confidences, landmark = ort_session.run(None, {input_name: image}) 61 | rpn_cls_prob,rpn_bbox_pred,base_feat = predictor.run(image) 62 | base_feat = np.swapaxes(base_feat[0],1,2) 63 | print(rpn_bbox_pred) 64 | -------------------------------------------------------------------------------- /onnx/rcnn_head_to_onnx.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | from utils import load_model 3 | import torch.nn.functional as F 4 | import torch 5 | class _fasterRCNN(nn.Module): 6 | """ faster RCNN """ 7 | def __init__(self,n_classes 8 | ): 9 | self.n_classes = n_classes 10 | super(_fasterRCNN, self).__init__() 11 | 12 | c_in = 1024 13 | 14 | self.RCNN_top = nn.Sequential(nn.Linear(5 * 7 * 7, c_in), 15 | nn.ReLU(inplace=True)) 16 | 17 | 18 | self.RCNN_cls_score = nn.Linear(c_in, self.n_classes) 19 | self.RCNN_bbox_pred = nn.Linear(c_in, 4 * self.n_classes) 20 | 21 | 22 | 23 | 24 | def forward(self, pool5): 25 | 26 | pool5_flat = pool5.view(pool5.size(0), -1) 27 | print(pool5_flat.shape) 28 | fc7 = self.RCNN_top(pool5_flat) 29 | 30 | 31 | RCNN_cls_score = self.RCNN_cls_score(fc7) 32 | 33 | cls_prob = F.softmax(RCNN_cls_score, 1) 34 | 35 | bbox_pred = self.RCNN_bbox_pred(fc7) 36 | 37 | 38 | return [cls_prob,bbox_pred] 39 | 40 | 41 | 42 | net = _fasterRCNN(21) 43 | 44 | net = load_model(net, "../snet_146_3/snet_146/pascal_voc_0712/thundernet_epoch_4.pth") 45 | net.eval() 46 | print('Finished loading model!') 47 | print(net) 48 | device = torch.device("cpu") 49 | net = net.to(device) 50 | 51 | ##################export############### 52 | output_onnx = 'thundernet146_rcnn_head.onnx' 53 | print("==> Exporting model to ONNX format at '{}'".format(output_onnx)) 54 | input_names = ["roi_pool"] 55 | # output_names = ["hm" , "wh" , "reg"] 56 | output_names = ["cls_prob" , "bbox_pred" ] 57 | inputs = torch.randn(1, 5 , 7 , 7).to(device) 58 | torch_out = torch.onnx._export(net, inputs, output_onnx, export_params=True, verbose=False, 59 | input_names=input_names, output_names=output_names) -------------------------------------------------------------------------------- /onnx/rpn_to_onnx.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.insert(0,"../lib") 3 | from torch import nn 4 | from model.faster_rcnn.modules import * 5 | from model.faster_rcnn.Snet import SnetExtractor 6 | from utils import load_model 7 | class _RPN(nn.Module): 8 | """ region proposal network """ 9 | def __init__(self, din): 10 | super(_RPN, self).__init__() 11 | 12 | self.din = din # get depth of input feature map, e.g., 512 13 | 14 | 15 | # define the convrelu layers processing input feature map 16 | # self.RPN_Conv = nn.Conv2d(self.din, 512, 3, 1, 1, bias=True) 17 | 18 | # define bg/fg classifcation score layer 19 | self.nc_score_out = 25*2 20 | self.RPN_cls_score = nn.Conv2d(self.din, self.nc_score_out, 1, 1, 0) 21 | 22 | # define anchor box offset prediction layer 23 | self.nc_bbox_out = 25 * 4 # 4(coords) * 9 (anchors) 24 | self.RPN_bbox_pred = nn.Conv2d(self.din, self.nc_bbox_out, 1, 1, 0) 25 | self.softmax = nn.Softmax(1) 26 | 27 | @staticmethod 28 | def reshape(x, d): 29 | input_shape = x.size() 30 | x = x.view(input_shape[0], int(d), 31 | int(float(input_shape[1] * input_shape[2]) / float(d)), 32 | input_shape[3]) 33 | return x 34 | 35 | def forward(self, base_feat): 36 | 37 | rpn_cls_score = self.RPN_cls_score(base_feat) 38 | 39 | rpn_cls_score_reshape = self.reshape(rpn_cls_score, 2) 40 | rpn_cls_prob_reshape = self.softmax(rpn_cls_score_reshape) 41 | rpn_cls_prob = self.reshape(rpn_cls_prob_reshape, self.nc_score_out) 42 | 43 | # get rpn offsets to the anchor boxes 44 | rpn_bbox_pred = self.RPN_bbox_pred(base_feat) 45 | 46 | 47 | return rpn_cls_prob, rpn_bbox_pred 48 | 49 | 50 | 51 | class _fasterRCNN(nn.Module): 52 | """ faster RCNN """ 53 | def __init__(self 54 | ): 55 | super(_fasterRCNN, self).__init__() 56 | 57 | 58 | self.RCNN_base = SnetExtractor(146) 59 | 60 | # loss 61 | self.RCNN_loss_cls = 0 62 | self.RCNN_loss_bbox = 0 63 | # self.focalloss_handle = FocalLossV4(num_class=21, alpha=0.25, gamma=2.0, balance_index=2) 64 | # define Large Separable Convolution Layer 65 | 66 | self.rpn = RPN(in_channels=245, f_channels=256) 67 | 68 | 69 | self.sam = SAM(256,245) 70 | # define rpn 71 | self.RCNN_rpn = _RPN(256) 72 | 73 | 74 | 75 | 76 | def forward(self, im_data): 77 | 78 | basefeat = self.RCNN_base(im_data) 79 | 80 | # feed base feature map tp RPN to obtain rois 81 | rpn_feat= self.rpn(basefeat) 82 | 83 | rpn_cls_prob, rpn_bbox_pred = self.RCNN_rpn(rpn_feat) 84 | 85 | base_feat = self.sam([basefeat,rpn_feat]) 86 | return [rpn_cls_prob, rpn_bbox_pred ,base_feat] 87 | 88 | net = _fasterRCNN() 89 | 90 | net = load_model(net, "../snet_146_3/snet_146/pascal_voc_0712/thundernet_epoch_4.pth") 91 | net.eval() 92 | print('Finished loading model!') 93 | print(net) 94 | device = torch.device("cpu") 95 | net = net.to(device) 96 | 97 | ##################export############### 98 | output_onnx = 'thundernet146_rpn.onnx' 99 | print("==> Exporting model to ONNX format at '{}'".format(output_onnx)) 100 | input_names = ["input"] 101 | # output_names = ["hm" , "wh" , "reg"] 102 | output_names = ["rpn_cls_prob" , "rpn_bbox_pred" , "base_feat" ] 103 | inputs = torch.randn(1, 3, 320, 320).to(device) 104 | torch_out = torch.onnx._export(net, inputs, output_onnx, export_params=True, verbose=False, 105 | input_names=input_names, output_names=output_names) -------------------------------------------------------------------------------- /onnx/utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | def check_keys(model, pretrained_state_dict): 3 | ckpt_keys = set(pretrained_state_dict.keys()) 4 | model_keys = set(model.state_dict().keys()) 5 | used_pretrained_keys = model_keys & ckpt_keys 6 | unused_pretrained_keys = ckpt_keys - model_keys 7 | missing_keys = model_keys - ckpt_keys 8 | print('Missing keys:{}'.format(len(missing_keys))) 9 | print('Unused checkpoint keys:{}'.format(len(unused_pretrained_keys))) 10 | print('Used keys:{}'.format(len(used_pretrained_keys))) 11 | assert len(used_pretrained_keys) > 0, 'load NONE from pretrained checkpoint' 12 | return True 13 | 14 | 15 | def remove_prefix(state_dict, prefix): 16 | ''' Old style model is stored with all names of parameters sharing common prefix 'module.' ''' 17 | print('remove prefix \'{}\''.format(prefix)) 18 | f = lambda x: x.split(prefix, 1)[-1] if x.startswith(prefix) else x 19 | return {f(key): value for key, value in state_dict.items()} 20 | 21 | 22 | def load_model(model, model_path): 23 | start_epoch = 0 24 | checkpoint = torch.load(model_path, map_location=lambda storage, loc: storage) 25 | print('loaded {}, epoch {}'.format(model_path, checkpoint['epoch'])) 26 | state_dict_ = checkpoint['model'] 27 | state_dict = {} 28 | 29 | # convert data_parallal to model 30 | for k in state_dict_: 31 | if k.startswith('module') and not k.startswith('module_list'): 32 | state_dict[k[7:]] = state_dict_[k] 33 | else: 34 | state_dict[k] = state_dict_[k] 35 | model_state_dict = model.state_dict() 36 | 37 | # check loaded parameters and created model parameters 38 | msg = 'If you see this, your model does not fully load the ' + \ 39 | 'pre-trained weight. Please make sure ' + \ 40 | 'you have correctly specified --arch xxx ' + \ 41 | 'or set the correct --num_classes for your own dataset.' 42 | for k in state_dict: 43 | if k in model_state_dict: 44 | if state_dict[k].shape != model_state_dict[k].shape: 45 | print('Skip loading parameter {}, required shape{}, ' \ 46 | 'loaded shape{}. {}'.format( 47 | k, model_state_dict[k].shape, state_dict[k].shape, msg)) 48 | state_dict[k] = model_state_dict[k] 49 | else: 50 | print('Drop parameter {}.'.format(k) + msg) 51 | for k in model_state_dict: 52 | if not (k in state_dict): 53 | print('No param {}.'.format(k) + msg) 54 | state_dict[k] = model_state_dict[k] 55 | model.load_state_dict(state_dict, strict=False) 56 | return model -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | # Thundernet_Pytorch 2 | 3 | ## 项目已经更新:[thundernet_mmdetection](https://github.com/ouyanghuiyu/thundernet_mmdetection) 4 | ## 20191222 update 5 | - add data augmentation 6 | - add Multi-scale training 7 | - add onnx (doing) 8 | 9 | ## pretrained model 10 | - train code in : https://github.com/ouyanghuiyu/Snet 11 | 12 | ## Requirements 13 | * pytorch 1.2.0 14 | * torchvision 0.4 15 | 16 | 17 | 18 | ## Lib Prepare 19 | ```sh 20 | git clone https://github.com/ouyanghuiyu/Thundernet_Pytorch.git 21 | ``` 22 | 23 | ### Build 24 | ```sh 25 | cd lib && python setup.py build_ext --inplace 26 | cd psroialign/PSROIAlign && sh build.sh 27 | ``` 28 | ## Data Prepare 29 | Download VOC0712 datasets 30 | ln -s "YOUR PATH" data 31 | 32 | ## Train 33 | ```sh 34 | cd script 35 | sh train_49.sh 36 | sh train_146.sh 37 | sh train_535.sh 38 | ``` 39 | 40 | ## demo 41 | ```sh 42 | cd script 43 | sh pre.sh 44 | 45 | ``` 46 | 47 | ## TODO LIST 48 | 49 | - add coco train and test 50 | - add NCNN inference 51 | 52 | ## Citation 53 | Please cite the paper in your publications if it helps your research: 54 | ``` 55 | @article{zheng2019thundernet, 56 | title={ThunderNet: Towards Real-time Generic Object Detection}, 57 | author={Zheng Qin, Zeming Li,Zhaoning Zhang,Yiping Bao,Gang Yu, Yuxing Peng, Jian Sun}, 58 | journal={arXiv preprint arXiv:1903.11752}, 59 | year={2019} 60 | } 61 | ``` 62 | 63 | ## VOC TEST EXAMPLE 64 | ![test](https://github.com/ouyanghuiyu/Thundernet_Pytorch/blob/master/voc_images/output/2008_000005.jpg) 65 | 66 | 67 | 68 | 69 | 70 | 71 | -------------------------------------------------------------------------------- /script/pre.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | cd .. 4 | 5 | CUDA_VISIBLE_DEVICES=0 python demo.py --dataset pascal_voc_0712 --net snet_146 --load_dir snet146_2 \ 6 | --checkepoch 6 --cuda \ 7 | --image_dir /mnt/data1/yanghuiyu/project/object_detect/thundernetbylightheadrcnn/voc_images/input 8 | -------------------------------------------------------------------------------- /script/train_146.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | cd .. 4 | 5 | CUDA_VISIBLE_DEVICES=0 python trainval_net.py --dataset pascal_voc_0712 --net snet_146 --bs 64 --nw 8 \ 6 | --lr 1e-2 --epochs 150 --cuda --lr_decay_step 25,50,75 --use_tfboard True \ 7 | --save_dir snet146 --eval_interval 2 --logdir snet146_log --pre ./weights/snet_146.tar \ 8 | --r True --checkepoch 2 9 | -------------------------------------------------------------------------------- /script/train_49.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | cd .. 4 | 5 | CUDA_VISIBLE_DEVICES=0 python trainval_net.py --dataset pascal_voc_0712 --net snet_49 --bs 16 --nw 8 \ 6 | --lr 1e-2 --epochs 150 --cuda --lr_decay_step 50,75,100 --use_tfboard True\ 7 | --save_dir snet_49 --eval_interval 5 \ 8 | --r True --checkepoch 4 9 | -------------------------------------------------------------------------------- /script/train_535.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | cd .. 4 | 5 | CUDA_VISIBLE_DEVICES=0 python trainval_net.py --dataset pascal_voc_0712 --net snet_535 --bs 16 --nw 8 \ 6 | --lr 1e-2 --epochs 150 --cuda --lr_decay_step 50,75,100 --use_tfboard True\ 7 | --save_dir snet_49 --eval_interval 5 \ 8 | --r True --checkepoch 4 9 | -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | color_list = np.array( 3 | [ 4 | 1.000, 1.000, 1.000, 5 | 0.850, 0.325, 0.098, 6 | 0.929, 0.694, 0.125, 7 | 0.494, 0.184, 0.556, 8 | 0.466, 0.674, 0.188, 9 | 0.301, 0.745, 0.933, 10 | 0.635, 0.078, 0.184, 11 | 0.300, 0.300, 0.300, 12 | 0.600, 0.600, 0.600, 13 | 1.000, 0.000, 0.000, 14 | 1.000, 0.500, 0.000, 15 | 0.749, 0.749, 0.000, 16 | 0.000, 1.000, 0.000, 17 | 0.000, 0.000, 1.000, 18 | 0.667, 0.000, 1.000, 19 | 0.333, 0.333, 0.000, 20 | 0.333, 0.667, 0.000, 21 | 0.333, 1.000, 0.000, 22 | 0.667, 0.333, 0.000, 23 | 0.667, 0.667, 0.000, 24 | 0.667, 1.000, 0.000, 25 | 1.000, 0.333, 0.000, 26 | 1.000, 0.667, 0.000, 27 | 1.000, 1.000, 0.000, 28 | 0.000, 0.333, 0.500, 29 | 0.000, 0.667, 0.500, 30 | 0.000, 1.000, 0.500, 31 | 0.333, 0.000, 0.500, 32 | 0.333, 0.333, 0.500, 33 | 0.333, 0.667, 0.500, 34 | 0.333, 1.000, 0.500, 35 | 0.667, 0.000, 0.500, 36 | 0.667, 0.333, 0.500, 37 | 0.667, 0.667, 0.500, 38 | 0.667, 1.000, 0.500, 39 | 1.000, 0.000, 0.500, 40 | 1.000, 0.333, 0.500, 41 | 1.000, 0.667, 0.500, 42 | 1.000, 1.000, 0.500, 43 | 0.000, 0.333, 1.000, 44 | 0.000, 0.667, 1.000, 45 | 0.000, 1.000, 1.000, 46 | 0.333, 0.000, 1.000, 47 | 0.333, 0.333, 1.000, 48 | 0.333, 0.667, 1.000, 49 | 0.333, 1.000, 1.000, 50 | 0.667, 0.000, 1.000, 51 | 0.667, 0.333, 1.000, 52 | 0.667, 0.667, 1.000, 53 | 0.667, 1.000, 1.000, 54 | 1.000, 0.000, 1.000, 55 | 1.000, 0.333, 1.000, 56 | 1.000, 0.667, 1.000, 57 | 0.167, 0.000, 0.000, 58 | 0.333, 0.000, 0.000, 59 | 0.500, 0.000, 0.000, 60 | 0.667, 0.000, 0.000, 61 | 0.833, 0.000, 0.000, 62 | 1.000, 0.000, 0.000, 63 | 0.000, 0.167, 0.000, 64 | 0.000, 0.333, 0.000, 65 | 0.000, 0.500, 0.000, 66 | 0.000, 0.667, 0.000, 67 | 0.000, 0.833, 0.000, 68 | 0.000, 1.000, 0.000, 69 | 0.000, 0.000, 0.167, 70 | 0.000, 0.000, 0.333, 71 | 0.000, 0.000, 0.500, 72 | 0.000, 0.000, 0.667, 73 | 0.000, 0.000, 0.833, 74 | 0.000, 0.000, 1.000, 75 | 0.000, 0.000, 0.000, 76 | 0.143, 0.143, 0.143, 77 | 0.286, 0.286, 0.286, 78 | 0.429, 0.429, 0.429, 79 | 0.571, 0.571, 0.571, 80 | 0.714, 0.714, 0.714, 81 | 0.857, 0.857, 0.857, 82 | 0.000, 0.447, 0.741, 83 | 0.50, 0.5, 0 84 | ] 85 | ).astype(np.float32) 86 | color_list = color_list.reshape((-1, 3)) * 255 87 | 88 | -------------------------------------------------------------------------------- /voc_images/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DayBreak-u/Thundernet_Pytorch/ac359d128a44e566ba5852a830c0a2154e10edb2/voc_images/.DS_Store -------------------------------------------------------------------------------- /voc_images/input/2008_000005.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DayBreak-u/Thundernet_Pytorch/ac359d128a44e566ba5852a830c0a2154e10edb2/voc_images/input/2008_000005.jpg -------------------------------------------------------------------------------- /voc_images/input/2008_000038.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DayBreak-u/Thundernet_Pytorch/ac359d128a44e566ba5852a830c0a2154e10edb2/voc_images/input/2008_000038.jpg -------------------------------------------------------------------------------- /voc_images/input/2008_000175.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DayBreak-u/Thundernet_Pytorch/ac359d128a44e566ba5852a830c0a2154e10edb2/voc_images/input/2008_000175.jpg -------------------------------------------------------------------------------- /weights/snet_146.tar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DayBreak-u/Thundernet_Pytorch/ac359d128a44e566ba5852a830c0a2154e10edb2/weights/snet_146.tar -------------------------------------------------------------------------------- /weights/thundernet146_voc_map67.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DayBreak-u/Thundernet_Pytorch/ac359d128a44e566ba5852a830c0a2154e10edb2/weights/thundernet146_voc_map67.pth --------------------------------------------------------------------------------