├── .gitignore
├── LICENSE
├── README.md
├── cls_video_map.py
├── configs
    ├── faster_rcnn_r101_hrnmp_c5.py
    └── faster_rcnn_r101_selsa_c5.py
├── frames2videos.py
├── inference_vis.py
├── mmdet
    ├── __init__.py
    ├── apis
    │   ├── __init__.py
    │   ├── env.py
    │   ├── inference.py
    │   ├── train copy.py
    │   └── train.py
    ├── core
    │   ├── __init__.py
    │   ├── anchor
    │   │   ├── __init__.py
    │   │   ├── anchor_generator.py
    │   │   ├── anchor_target.py
    │   │   ├── guided_anchor_target.py
    │   │   ├── point_generator.py
    │   │   └── point_target.py
    │   ├── bbox
    │   │   ├── __init__.py
    │   │   ├── assign_sampling.py
    │   │   ├── assigners
    │   │   │   ├── __init__.py
    │   │   │   ├── approx_max_iou_assigner.py
    │   │   │   ├── assign_result.py
    │   │   │   ├── base_assigner.py
    │   │   │   ├── max_iou_assigner.py
    │   │   │   └── point_assigner.py
    │   │   ├── bbox_target.py
    │   │   ├── geometry.py
    │   │   ├── samplers
    │   │   │   ├── __init__.py
    │   │   │   ├── base_sampler.py
    │   │   │   ├── combined_sampler.py
    │   │   │   ├── instance_balanced_pos_sampler.py
    │   │   │   ├── iou_balanced_neg_sampler.py
    │   │   │   ├── ohem_hnl_sampler.py
    │   │   │   ├── ohem_sampler.py
    │   │   │   ├── pseudo_sampler.py
    │   │   │   ├── random_sampler.py
    │   │   │   └── sampling_result.py
    │   │   └── transforms.py
    │   ├── evaluation
    │   │   ├── __init__.py
    │   │   ├── bbox_overlaps.py
    │   │   ├── class_names.py
    │   │   ├── coco_utils.py
    │   │   ├── eval_hooks.py
    │   │   ├── mean_ap.py
    │   │   └── recall.py
    │   ├── fp16
    │   │   ├── __init__.py
    │   │   ├── decorators.py
    │   │   ├── hooks.py
    │   │   └── utils.py
    │   ├── mask
    │   │   ├── __init__.py
    │   │   ├── mask_target.py
    │   │   └── utils.py
    │   ├── post_processing
    │   │   ├── __init__.py
    │   │   ├── bbox_nms.py
    │   │   └── merge_augs.py
    │   └── utils
    │   │   ├── __init__.py
    │   │   ├── dist_utils.py
    │   │   └── misc.py
    ├── datasets
    │   ├── __init__.py
    │   ├── builder.py
    │   ├── cityscapes.py
    │   ├── coco.py
    │   ├── custom.py
    │   ├── dataset_wrappers.py
    │   ├── imagenet_det_img.py
    │   ├── imagenet_det_sequence.py
    │   ├── imagenet_vid.py
    │   ├── imagenet_vid_sequence.py
    │   ├── loader
    │   │   ├── __init__.py
    │   │   ├── build_loader.py
    │   │   └── sampler.py
    │   ├── pipelines
    │   │   ├── __init__.py
    │   │   ├── compose.py
    │   │   ├── formating.py
    │   │   ├── loading.py
    │   │   ├── test_aug.py
    │   │   └── transforms.py
    │   ├── registry.py
    │   ├── voc.py
    │   ├── wider_face.py
    │   └── xml_style.py
    ├── models
    │   ├── __init__.py
    │   ├── anchor_heads
    │   │   ├── __init__.py
    │   │   ├── anchor_head.py
    │   │   ├── fcos_head.py
    │   │   ├── fovea_head.py
    │   │   ├── free_anchor_retina_head.py
    │   │   ├── ga_retina_head.py
    │   │   ├── ga_rpn_head.py
    │   │   ├── guided_anchor_head.py
    │   │   ├── reppoints_head.py
    │   │   ├── retina_head.py
    │   │   ├── rpn_head.py
    │   │   └── ssd_head.py
    │   ├── backbones
    │   │   ├── __init__.py
    │   │   ├── hrnet.py
    │   │   ├── res2net_v1b.py
    │   │   ├── resnet.py
    │   │   ├── resnext.py
    │   │   └── ssd_vgg.py
    │   ├── bbox_heads
    │   │   ├── __init__.py
    │   │   ├── bbox_head.py
    │   │   ├── convfc_bbox_head.py
    │   │   ├── double_bbox_head.py
    │   │   ├── hrnmp_bbox_head.py
    │   │   └── selsa_bbox_head.py
    │   ├── builder.py
    │   ├── detectors
    │   │   ├── __init__.py
    │   │   ├── base.py
    │   │   ├── cascade_rcnn.py
    │   │   ├── double_head_rcnn.py
    │   │   ├── fast_rcnn.py
    │   │   ├── faster_rcnn.py
    │   │   ├── fcos.py
    │   │   ├── fovea.py
    │   │   ├── grid_rcnn.py
    │   │   ├── hnl_rcnn.py
    │   │   ├── hnmb_rcnn.py
    │   │   ├── htc.py
    │   │   ├── mask_rcnn.py
    │   │   ├── mask_scoring_rcnn.py
    │   │   ├── reppoints_detector.py
    │   │   ├── retinanet.py
    │   │   ├── rpn.py
    │   │   ├── selsa_rcnn.py
    │   │   ├── single_stage.py
    │   │   ├── test_mixins.py
    │   │   └── two_stage.py
    │   ├── losses
    │   │   ├── __init__.py
    │   │   ├── accuracy.py
    │   │   ├── balanced_l1_loss.py
    │   │   ├── cross_entropy_loss.py
    │   │   ├── focal_loss.py
    │   │   ├── ghm_loss.py
    │   │   ├── iou_loss.py
    │   │   ├── mse_loss.py
    │   │   ├── smooth_l1_loss.py
    │   │   └── utils.py
    │   ├── mask_heads
    │   │   ├── __init__.py
    │   │   ├── fcn_mask_head.py
    │   │   ├── fused_semantic_head.py
    │   │   ├── grid_head.py
    │   │   ├── htc_mask_head.py
    │   │   └── maskiou_head.py
    │   ├── necks
    │   │   ├── __init__.py
    │   │   ├── bfp.py
    │   │   ├── fpn.py
    │   │   └── hrfpn.py
    │   ├── plugins
    │   │   ├── __init__.py
    │   │   ├── generalized_attention.py
    │   │   └── non_local.py
    │   ├── registry.py
    │   ├── roi_extractors
    │   │   ├── __init__.py
    │   │   └── single_level.py
    │   ├── shared_heads
    │   │   ├── __init__.py
    │   │   ├── res2_layer.py
    │   │   ├── res_layer.py
    │   │   └── resx_layer.py
    │   └── utils
    │   │   ├── __init__.py
    │   │   ├── conv_module.py
    │   │   ├── conv_ws.py
    │   │   ├── norm.py
    │   │   ├── scale.py
    │   │   └── weight_init.py
    ├── ops
    │   ├── __init__.py
    │   ├── context_block.py
    │   ├── dcn
    │   │   ├── __init__.py
    │   │   ├── deform_conv.py
    │   │   ├── deform_pool.py
    │   │   └── src
    │   │   │   ├── deform_conv_cuda.cpp
    │   │   │   ├── deform_conv_cuda_kernel.cu
    │   │   │   ├── deform_pool_cuda.cpp
    │   │   │   └── deform_pool_cuda_kernel.cu
    │   ├── masked_conv
    │   │   ├── __init__.py
    │   │   ├── masked_conv.py
    │   │   └── src
    │   │   │   ├── masked_conv2d_cuda.cpp
    │   │   │   └── masked_conv2d_kernel.cu
    │   ├── nms
    │   │   ├── __init__.py
    │   │   ├── nms_wrapper.py
    │   │   └── src
    │   │   │   ├── nms_cpu.cpp
    │   │   │   ├── nms_cuda.cpp
    │   │   │   ├── nms_kernel.cu
    │   │   │   └── soft_nms_cpu.pyx
    │   ├── roi_align
    │   │   ├── __init__.py
    │   │   ├── gradcheck.py
    │   │   ├── roi_align.py
    │   │   └── src
    │   │   │   ├── roi_align_cuda.cpp
    │   │   │   └── roi_align_kernel.cu
    │   ├── roi_pool
    │   │   ├── __init__.py
    │   │   ├── gradcheck.py
    │   │   ├── roi_pool.py
    │   │   └── src
    │   │   │   ├── roi_pool_cuda.cpp
    │   │   │   └── roi_pool_kernel.cu
    │   ├── sigmoid_focal_loss
    │   │   ├── __init__.py
    │   │   ├── sigmoid_focal_loss.py
    │   │   └── src
    │   │   │   ├── sigmoid_focal_loss.cpp
    │   │   │   └── sigmoid_focal_loss_cuda.cu
    │   └── utils
    │   │   ├── __init__.py
    │   │   └── src
    │   │       └── compiling_info.cpp
    └── utils
    │   ├── __init__.py
    │   ├── flops_counter.py
    │   └── registry.py
├── setup.py
├── test.sh
├── tools
    ├── analyze_logs.py
    ├── coco_error_analysis.py
    ├── coco_eval.py
    ├── collect_env.py
    ├── convert_datasets
    │   └── pascal_voc.py
    ├── detectron2pytorch.py
    ├── dist_hnl_test.sh
    ├── dist_test.sh
    ├── dist_train.sh
    ├── dive_into_arch.py
    ├── get_flops.py
    ├── gpu_device_test.py
    ├── hnl_test.py
    ├── plot_PR_curve.py
    ├── publish_model.py
    ├── robustness_eval.py
    ├── selsa_test.py
    ├── slurm_test.sh
    ├── slurm_train.sh
    ├── test.py
    ├── test_robustness.py
    ├── train.py
    ├── upgrade_model_version.py
    ├── vid_eval.py
    └── voc_eval.py
└── train.sh


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | output/
  7 | mmcv-source/
  8 | metric_learning
  9 | 
 10 | # C extensions
 11 | *.so
 12 | 
 13 | # Distribution / packaging
 14 | .Python
 15 | build/
 16 | develop-eggs/
 17 | dist/
 18 | downloads/
 19 | eggs/
 20 | .eggs/
 21 | lib/
 22 | lib64/
 23 | parts/
 24 | sdist/
 25 | var/
 26 | wheels/
 27 | *.egg-info/
 28 | .installed.cfg
 29 | *.egg
 30 | MANIFEST
 31 | 
 32 | # PyInstaller
 33 | #  Usually these files are written by a python script from a template
 34 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 35 | *.manifest
 36 | *.spec
 37 | 
 38 | # Installer logs
 39 | pip-log.txt
 40 | pip-delete-this-directory.txt
 41 | 
 42 | # Unit test / coverage reports
 43 | htmlcov/
 44 | .tox/
 45 | .coverage
 46 | .coverage.*
 47 | .cache
 48 | nosetests.xml
 49 | coverage.xml
 50 | *.cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | 
 63 | # Flask stuff:
 64 | instance/
 65 | .webassets-cache
 66 | 
 67 | # Scrapy stuff:
 68 | .scrapy
 69 | 
 70 | # Sphinx documentation
 71 | docs/_build/
 72 | 
 73 | # PyBuilder
 74 | target/
 75 | 
 76 | # Jupyter Notebook
 77 | .ipynb_checkpoints
 78 | 
 79 | # pyenv
 80 | .python-version
 81 | 
 82 | # celery beat schedule file
 83 | celerybeat-schedule
 84 | 
 85 | # SageMath parsed files
 86 | *.sage.py
 87 | 
 88 | # Environments
 89 | .env
 90 | .venv
 91 | env/
 92 | venv/
 93 | ENV/
 94 | env.bak/
 95 | venv.bak/
 96 | 
 97 | # Spyder project settings
 98 | .spyderproject
 99 | .spyproject
100 | 
101 | # Rope project settings
102 | .ropeproject
103 | 
104 | # mkdocs documentation
105 | /site
106 | 
107 | # mypy
108 | .mypy_cache/
109 | 
110 | # cython generated cpp
111 | mmdet/ops/nms/src/soft_nms_cpu.cpp
112 | mmdet/version.py
113 | data
114 | .vscode
115 | .idea
116 | 
117 | # custom
118 | *.pkl
119 | *.pkl.json
120 | *.log.json
121 | work_dirs/
122 | 
123 | # Pytorch
124 | *.pth
125 | distillation_r101_train_nohup.sh
126 | caffe_r101_train_nohup.sh
127 | hi
128 | history
129 | train_nohup_not_dist.sh
130 | mmdet/models/bbox_heads/new.py
131 | mmdet/models/detectors/selsa_rcnn copy.py
132 | 
133 | *.txt
134 | *.pdf
135 | *.pkl
136 | *.json
137 | *.csv
138 | *.out
139 | work_dirs
140 | CIFAR100_Dataset/cifar-100-python.tar.gz
141 | CIFAR100_Dataset/cifar-100-python/file.txt~
142 | CIFAR100_Dataset/cifar-100-python/meta
143 | CIFAR100_Dataset/cifar-100-python/test
144 | CIFAR100_Dataset/cifar-100-python/train
145 | mmdet/models/losses/center_loss.py
146 | train_nohup.sh
147 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # HVRNet for Video Object Detection
 2 | 
 3 | [![License](https://img.shields.io/badge/license-Apache-blue.svg)](LICENSE)
 4 | 
 5 | Official code for Mining Inter-Video Proposal Relations for Video Object Detection, ECCV 2020
 6 | 
 7 | [Paper](http://www.ecva.net/papers/eccv_2020/papers_ECCV/papers/123660426.pdf)
 8 | 
 9 | By Mingfei Han, Yali Wang, [Xiaojun Chang](http://xiaojun.ai/), Yu Qiao.
10 | 
11 | 
12 | ## Citing HVRNet
13 | Please cite our paper in your publications if it helps your research:
14 | ```
15 | @inproceedings{han20hvrnet,
16 |     Author = {Han, Mingfei and Wang, Yali and Chang, Xiaojun and Qiao, Yu},
17 |     Title = {Mining Inter-Video Proposal Relations for Video Object Detection},
18 |     Conference = {ECCV},
19 |     Year = {2020}
20 | }
21 | ```
22 | 


--------------------------------------------------------------------------------
/cls_video_map.py:
--------------------------------------------------------------------------------
 1 | from argparse import ArgumentParser
 2 | 
 3 | import mmcv
 4 | import numpy as np
 5 | import os.path as osp
 6 | 
 7 | import xml.etree.ElementTree as ET
 8 | 
 9 | CLASSES = ('n02691156', 'n02419796', 'n02131653', 'n02834778',
10 |            'n01503061', 'n02924116', 'n02958343', 'n02402425',
11 |            'n02084071', 'n02121808', 'n02503517', 'n02118333',
12 |            'n02510455', 'n02342885', 'n02374451', 'n02129165',
13 |            'n01674464', 'n02484322', 'n03790512', 'n02324045',
14 |            'n02509815', 'n02411705', 'n01726692', 'n02355227',
15 |            'n02129604', 'n04468005', 'n01662784', 'n04530566',
16 |            'n02062744', 'n02391049')
17 | class_name = ('airplane', 'antelope', 'bear', 'bicycle',
18 |               'bird', 'bus', 'car', 'cattle',
19 |               'dog', 'domestic_cat', 'elephant', 'fox',
20 |               'giant_panda', 'hamster', 'horse', 'lion',
21 |               'lizard', 'monkey', 'motorcycle', 'rabbit',
22 |               'red_panda', 'sheep', 'snake', 'squirrel',
23 |               'tiger', 'train', 'turtle', 'watercraft',
24 |               'whale', 'zebra')
25 | 
26 | 
27 | def load_annotations(ann_file, img_prefix):
28 |     img_infos = dict()
29 |     cls_name_map = {CLASSES[i]: class_name[i] for i in range(len(class_name))}
30 |     cls_video_map = {class_name[i]: set() for i in range(len(class_name))}
31 |     img_ids = mmcv.list_from_file(ann_file)
32 |     for img_id in img_ids:
33 |         img_id = img_id.strip().split(' ')[0]
34 |         video_id = img_id.strip().split('/')[1]
35 |         xml_path = osp.join(img_prefix, 'Annotations',
36 |                             '{}.xml'.format(img_id))
37 |         tree = ET.parse(xml_path)
38 |         root = tree.getroot()
39 |         size = root.find('size')
40 |         width = int(size.find('width').text)
41 |         height = int(size.find('height').text)
42 |         if len(root.findall('object')) == 0:
43 |             continue
44 |         for obj in root.findall('object'):
45 |             name = cls_name_map[obj.find('name').text]
46 |             cls_video_map[name].add(video_id)
47 |     return cls_video_map
48 | 
49 | 
50 | def main():
51 |     anno_file = './data/VID/ImageSets/VID_val_frames.txt'
52 |     img_prefix = './data/VID'
53 |     annos = load_annotations(anno_file, img_prefix)
54 |     import pprint
55 |     pprint.pprint(annos)
56 | 
57 | 
58 | if __name__ == '__main__':
59 |     main()
60 | 


--------------------------------------------------------------------------------
/frames2videos.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import os.path as osp
 4 | from mmcv.video import frames2video
 5 | 
 6 | frames_dir='/home/mfhan/mmdetection/data/VID/vis/val/'
 7 | video_dir='/home/mfhan/mmdetection/data/VID/vis_video/'
 8 | 
 9 | for f_vid in os.listdir(frames_dir):
10 |     video_name="{}.mp4".format(f_vid)
11 |     video_path=osp.join(video_dir, video_name)
12 |     frames2video(osp.join(frames_dir + f_vid), video_path, filename_tmpl='{:06d}.JPEG')


--------------------------------------------------------------------------------
/inference_vis.py:
--------------------------------------------------------------------------------
 1 | from os import path as osp
 2 | import os
 3 | 
 4 | from mmcv.video import frames2video
 5 | from mmdet.apis import init_detector, inference_detector, show_result
 6 | import mmcv
 7 | 
 8 | classes_names = ['airplane', 'antelope', 'bear', 'bicycle',
 9 | 					'bird', 'bus', 'car', 'cattle',
10 | 					'dog', 'domestic_cat', 'elephant', 'fox',
11 | 					'giant_panda', 'hamster', 'horse', 'lion',
12 | 					'lizard', 'monkey', 'motorcycle', 'rabbit',
13 | 					'red_panda', 'sheep', 'snake', 'squirrel',
14 | 					'tiger', 'train', 'turtle', 'watercraft',
15 | 					'whale', 'zebra']
16 | classes_map = ['n02691156', 'n02419796', 'n02131653', 'n02834778',
17 | 						'n01503061', 'n02924116', 'n02958343', 'n02402425',
18 | 						'n02084071', 'n02121808', 'n02503517', 'n02118333',
19 | 						'n02510455', 'n02342885', 'n02374451', 'n02129165',
20 | 						'n01674464', 'n02484322', 'n03790512', 'n02324045',
21 | 						'n02509815', 'n02411705', 'n01726692', 'n02355227',
22 | 						'n02129604', 'n04468005', 'n01662784', 'n04530566',
23 | 						'n02062744', 'n02391049']
24 | name_to_class = {classes_map[i]: classes_names[i] for i in range(len(classes_names))}
25 | class_to_name = {classes_names[i]: classes_map[i] for i in range(len(classes_map))}
26 | 
27 | config_file = './configs/mask_rcnn_r101_fpn_1x_vid_finetune.py'
28 | checkpoint_file = './work_dirs/mask_rcnn_r101_fpn_1x_vid_det/epoch_8.pth'
29 | 
30 | # build the model from a config file and a checkpoint file
31 | model = init_detector(config_file, checkpoint_file, device='cuda:0')
32 | 
33 | # data_root = './data/VID/JPEGImages/'
34 | # out_dir = './output/VID/thresh_05/'
35 | # # test a video and show the results
36 | # # with open('./data/VID/Imagesets/VID_val_videos.txt') as h:
37 | # #     test_videos_frame_lists= h.readlines()
38 | with open('/home/mfhan/sda2/Sequence-Level-Semantics-Aggregation/data/ILSVRC2015/ImageSets/VID_val_sampled_videos_2.txt','r') as h:
39 |     test_videos_frame_lists=h.readlines()
40 | # for video_frames_line in test_videos_frame_lists:
41 | #     frames_path, start_from, _, frames_num = video_frames_line.strip().split()
42 | #     print("test: {}".format(frames_path))
43 | #     frames_output_dir = osp.join(out_dir, frames_path)
44 | #     if not osp.isdir(frames_output_dir):
45 | #         os.makedirs(frames_output_dir)
46 | #     else:
47 | #         print("exists: {}".format(frames_path))
48 | #         continue
49 | #     for frame_id in range(int(frames_num)):
50 | #         frame_name = '{:06}.JPEG'.format(frame_id)
51 | #         frame = osp.join(data_root, frames_path, frame_name)
52 | #         result = inference_detector(model, frame)
53 | #         show_result(frame, result, classes_names, 
54 | #                     score_thr=0.5,
55 | #                     wait_time=0,
56 | #                     thickness=2,
57 | #                     font_scale=1.1,
58 | #                     show=False, 
59 | #                     out_file=osp.join(frames_output_dir, frame_name))
60 | 
61 | # video_dir = './output/VID/videos/'
62 | video_dir = '/home/mfhan/sda2/Sequence-Level-Semantics-Aggregation/output/selsa_rcnn/imagenet_vid/resnet_v1_101_rcnn_selsa_aug/VID_val_sampled_videos_2/videos/'
63 | out_dir='/home/mfhan/sda2/Sequence-Level-Semantics-Aggregation/output/selsa_rcnn/imagenet_vid/resnet_v1_101_rcnn_selsa_aug/VID_val_sampled_videos_2/'
64 | frame_dir = out_dir
65 | if not osp.isdir(video_dir):
66 |     os.makedirs(video_dir)
67 | for video_frames_line in test_videos_frame_lists:
68 |     frames_path, start_from, _, frames_num = video_frames_line.strip().split()
69 |     frames_name = frames_path.split()[0].strip()[4:]
70 |     video_path = osp.join(video_dir, '{}.mp4'.format(frames_name))
71 |     frames2video(osp.join(frame_dir + frames_path), video_path, filename_tmpl='{:06d}.JPEG')


--------------------------------------------------------------------------------
/mmdet/__init__.py:
--------------------------------------------------------------------------------
1 | from .version import __version__, short_version
2 | 
3 | __all__ = ['__version__', 'short_version']
4 | 


--------------------------------------------------------------------------------
/mmdet/apis/__init__.py:
--------------------------------------------------------------------------------
 1 | from .env import get_root_logger, init_dist, set_random_seed
 2 | from .inference import (inference_detector, init_detector, show_result,
 3 |                         show_result_pyplot)
 4 | from .train import train_detector
 5 | 
 6 | __all__ = [
 7 |     'init_dist', 'get_root_logger', 'set_random_seed', 'train_detector',
 8 |     'init_detector', 'inference_detector', 'show_result', 'show_result_pyplot'
 9 | ]
10 | 


--------------------------------------------------------------------------------
/mmdet/apis/env.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import os
 3 | import random
 4 | import subprocess
 5 | 
 6 | import numpy as np
 7 | import torch
 8 | import torch.distributed as dist
 9 | import torch.multiprocessing as mp
10 | from mmcv.runner import get_dist_info
11 | 
12 | 
13 | def init_dist(launcher, backend='nccl', **kwargs):
14 |     if mp.get_start_method(allow_none=True) is None:
15 |         mp.set_start_method('spawn')
16 |     if launcher == 'pytorch':
17 |         _init_dist_pytorch(backend, **kwargs)
18 |     elif launcher == 'mpi':
19 |         _init_dist_mpi(backend, **kwargs)
20 |     elif launcher == 'slurm':
21 |         _init_dist_slurm(backend, **kwargs)
22 |     else:
23 |         raise ValueError('Invalid launcher type: {}'.format(launcher))
24 | 
25 | 
26 | def _init_dist_pytorch(backend, **kwargs):
27 |     # TODO: use local_rank instead of rank % num_gpus
28 |     rank = int(os.environ['RANK'])
29 |     num_gpus = torch.cuda.device_count()
30 |     torch.cuda.set_device(rank % num_gpus)
31 |     dist.init_process_group(backend=backend, **kwargs)
32 | 
33 | 
34 | def _init_dist_mpi(backend, **kwargs):
35 |     raise NotImplementedError
36 | 
37 | 
38 | def _init_dist_slurm(backend, port=29500, **kwargs):
39 |     proc_id = int(os.environ['SLURM_PROCID'])
40 |     ntasks = int(os.environ['SLURM_NTASKS'])
41 |     node_list = os.environ['SLURM_NODELIST']
42 |     num_gpus = torch.cuda.device_count()
43 |     torch.cuda.set_device(proc_id % num_gpus)
44 |     addr = subprocess.getoutput(
45 |         'scontrol show hostname {} | head -n1'.format(node_list))
46 |     os.environ['MASTER_PORT'] = str(port)
47 |     os.environ['MASTER_ADDR'] = addr
48 |     os.environ['WORLD_SIZE'] = str(ntasks)
49 |     os.environ['RANK'] = str(proc_id)
50 |     dist.init_process_group(backend=backend)
51 | 
52 | 
53 | def set_random_seed(seed):
54 |     random.seed(seed)
55 |     np.random.seed(seed)
56 |     torch.manual_seed(seed)
57 |     torch.cuda.manual_seed_all(seed)
58 | 
59 | 
60 | def get_root_logger(log_level=logging.INFO):
61 |     logger = logging.getLogger()
62 |     if not logger.hasHandlers():
63 |         logging.basicConfig(
64 |             format='%(asctime)s - %(levelname)s - %(message)s',
65 |             level=log_level)
66 |     rank, _ = get_dist_info()
67 |     if rank != 0:
68 |         logger.setLevel('ERROR')
69 |     return logger
70 | 


--------------------------------------------------------------------------------
/mmdet/core/__init__.py:
--------------------------------------------------------------------------------
1 | from .anchor import *  # noqa: F401, F403
2 | from .bbox import *  # noqa: F401, F403
3 | from .evaluation import *  # noqa: F401, F403
4 | from .fp16 import *  # noqa: F401, F403
5 | from .mask import *  # noqa: F401, F403
6 | from .post_processing import *  # noqa: F401, F403
7 | from .utils import *  # noqa: F401, F403
8 | 


--------------------------------------------------------------------------------
/mmdet/core/anchor/__init__.py:
--------------------------------------------------------------------------------
 1 | from .anchor_generator import AnchorGenerator
 2 | from .anchor_target import anchor_inside_flags, anchor_target
 3 | from .guided_anchor_target import ga_loc_target, ga_shape_target
 4 | from .point_generator import PointGenerator
 5 | from .point_target import point_target
 6 | 
 7 | __all__ = [
 8 |     'AnchorGenerator', 'anchor_target', 'anchor_inside_flags', 'ga_loc_target',
 9 |     'ga_shape_target', 'PointGenerator', 'point_target'
10 | ]
11 | 


--------------------------------------------------------------------------------
/mmdet/core/anchor/anchor_generator.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | class AnchorGenerator(object):
 5 |     """
 6 |     Examples:
 7 |         >>> from mmdet.core import AnchorGenerator
 8 |         >>> self = AnchorGenerator(9, [1.], [1.])
 9 |         >>> all_anchors = self.grid_anchors((2, 2), device='cpu')
10 |         >>> print(all_anchors)
11 |         tensor([[ 0.,  0.,  8.,  8.],
12 |                 [16.,  0., 24.,  8.],
13 |                 [ 0., 16.,  8., 24.],
14 |                 [16., 16., 24., 24.]])
15 |     """
16 | 
17 |     def __init__(self, base_size, scales, ratios, scale_major=True, ctr=None):
18 |         self.base_size = base_size
19 |         self.scales = torch.Tensor(scales)
20 |         self.ratios = torch.Tensor(ratios)
21 |         self.scale_major = scale_major
22 |         self.ctr = ctr
23 |         self.base_anchors = self.gen_base_anchors()
24 | 
25 |     @property
26 |     def num_base_anchors(self):
27 |         return self.base_anchors.size(0)
28 | 
29 |     def gen_base_anchors(self):
30 |         w = self.base_size
31 |         h = self.base_size
32 |         if self.ctr is None:
33 |             x_ctr = 0.5 * (w - 1)
34 |             y_ctr = 0.5 * (h - 1)
35 |         else:
36 |             x_ctr, y_ctr = self.ctr
37 | 
38 |         h_ratios = torch.sqrt(self.ratios)
39 |         w_ratios = 1 / h_ratios
40 |         if self.scale_major:
41 |             ws = (w * w_ratios[:, None] * self.scales[None, :]).view(-1)
42 |             hs = (h * h_ratios[:, None] * self.scales[None, :]).view(-1)
43 |         else:
44 |             ws = (w * self.scales[:, None] * w_ratios[None, :]).view(-1)
45 |             hs = (h * self.scales[:, None] * h_ratios[None, :]).view(-1)
46 | 
47 |         # yapf: disable
48 |         base_anchors = torch.stack(
49 |             [
50 |                 x_ctr - 0.5 * (ws - 1), y_ctr - 0.5 * (hs - 1),
51 |                 x_ctr + 0.5 * (ws - 1), y_ctr + 0.5 * (hs - 1)
52 |             ],
53 |             dim=-1).round()
54 |         # yapf: enable
55 | 
56 |         return base_anchors
57 | 
58 |     def _meshgrid(self, x, y, row_major=True):
59 |         xx = x.repeat(len(y))
60 |         yy = y.view(-1, 1).repeat(1, len(x)).view(-1)
61 |         if row_major:
62 |             return xx, yy
63 |         else:
64 |             return yy, xx
65 | 
66 |     def grid_anchors(self, featmap_size, stride=16, device='cuda'):
67 |         base_anchors = self.base_anchors.to(device)
68 | 
69 |         feat_h, feat_w = featmap_size
70 |         shift_x = torch.arange(0, feat_w, device=device) * stride
71 |         shift_y = torch.arange(0, feat_h, device=device) * stride
72 |         shift_xx, shift_yy = self._meshgrid(shift_x, shift_y)
73 |         shifts = torch.stack([shift_xx, shift_yy, shift_xx, shift_yy], dim=-1)
74 |         shifts = shifts.type_as(base_anchors)
75 |         # first feat_w elements correspond to the first row of shifts
76 |         # add A anchors (1, A, 4) to K shifts (K, 1, 4) to get
77 |         # shifted anchors (K, A, 4), reshape to (K*A, 4)
78 | 
79 |         all_anchors = base_anchors[None, :, :] + shifts[:, None, :]
80 |         all_anchors = all_anchors.view(-1, 4)
81 |         # first A rows correspond to A anchors of (0, 0) in feature map,
82 |         # then (0, 1), (0, 2), ...
83 |         return all_anchors
84 | 
85 |     def valid_flags(self, featmap_size, valid_size, device='cuda'):
86 |         feat_h, feat_w = featmap_size
87 |         valid_h, valid_w = valid_size
88 |         assert valid_h <= feat_h and valid_w <= feat_w
89 |         valid_x = torch.zeros(feat_w, dtype=torch.uint8, device=device)
90 |         valid_y = torch.zeros(feat_h, dtype=torch.uint8, device=device)
91 |         valid_x[:valid_w] = 1
92 |         valid_y[:valid_h] = 1
93 |         valid_xx, valid_yy = self._meshgrid(valid_x, valid_y)
94 |         valid = valid_xx & valid_yy
95 |         valid = valid[:,
96 |                       None].expand(valid.size(0),
97 |                                    self.num_base_anchors).contiguous().view(-1)
98 |         return valid
99 | 


--------------------------------------------------------------------------------
/mmdet/core/anchor/point_generator.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | class PointGenerator(object):
 5 | 
 6 |     def _meshgrid(self, x, y, row_major=True):
 7 |         xx = x.repeat(len(y))
 8 |         yy = y.view(-1, 1).repeat(1, len(x)).view(-1)
 9 |         if row_major:
10 |             return xx, yy
11 |         else:
12 |             return yy, xx
13 | 
14 |     def grid_points(self, featmap_size, stride=16, device='cuda'):
15 |         feat_h, feat_w = featmap_size
16 |         shift_x = torch.arange(0., feat_w, device=device) * stride
17 |         shift_y = torch.arange(0., feat_h, device=device) * stride
18 |         shift_xx, shift_yy = self._meshgrid(shift_x, shift_y)
19 |         stride = shift_x.new_full((shift_xx.shape[0], ), stride)
20 |         shifts = torch.stack([shift_xx, shift_yy, stride], dim=-1)
21 |         all_points = shifts.to(device)
22 |         return all_points
23 | 
24 |     def valid_flags(self, featmap_size, valid_size, device='cuda'):
25 |         feat_h, feat_w = featmap_size
26 |         valid_h, valid_w = valid_size
27 |         assert valid_h <= feat_h and valid_w <= feat_w
28 |         valid_x = torch.zeros(feat_w, dtype=torch.uint8, device=device)
29 |         valid_y = torch.zeros(feat_h, dtype=torch.uint8, device=device)
30 |         valid_x[:valid_w] = 1
31 |         valid_y[:valid_h] = 1
32 |         valid_xx, valid_yy = self._meshgrid(valid_x, valid_y)
33 |         valid = valid_xx & valid_yy
34 |         return valid
35 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/__init__.py:
--------------------------------------------------------------------------------
 1 | from .assigners import AssignResult, BaseAssigner, MaxIoUAssigner
 2 | from .bbox_target import bbox_target
 3 | from .geometry import bbox_overlaps
 4 | from .samplers import (BaseSampler, CombinedSampler,
 5 |                        InstanceBalancedPosSampler, IoUBalancedNegSampler,
 6 |                        PseudoSampler, RandomSampler, SamplingResult)
 7 | from .transforms import (bbox2delta, bbox2result, bbox2roi, bbox_flip,
 8 |                          bbox_mapping, bbox_mapping_back, delta2bbox,
 9 |                          distance2bbox, roi2bbox)
10 | 
11 | from .assign_sampling import (  # isort:skip, avoid recursive imports
12 |     assign_and_sample, build_assigner, build_sampler)
13 | 
14 | __all__ = [
15 |     'bbox_overlaps', 'BaseAssigner', 'MaxIoUAssigner', 'AssignResult',
16 |     'BaseSampler', 'PseudoSampler', 'RandomSampler',
17 |     'InstanceBalancedPosSampler', 'IoUBalancedNegSampler', 'CombinedSampler',
18 |     'SamplingResult', 'build_assigner', 'build_sampler', 'assign_and_sample',
19 |     'bbox2delta', 'delta2bbox', 'bbox_flip', 'bbox_mapping',
20 |     'bbox_mapping_back', 'bbox2roi', 'roi2bbox', 'bbox2result',
21 |     'distance2bbox', 'bbox_target'
22 | ]
23 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/assign_sampling.py:
--------------------------------------------------------------------------------
 1 | import mmcv
 2 | 
 3 | from . import assigners, samplers
 4 | 
 5 | 
 6 | def build_assigner(cfg, **kwargs):
 7 |     if isinstance(cfg, assigners.BaseAssigner):
 8 |         return cfg
 9 |     elif isinstance(cfg, dict):
10 |         return mmcv.runner.obj_from_dict(cfg, assigners, default_args=kwargs)
11 |     else:
12 |         raise TypeError('Invalid type {} for building a sampler'.format(
13 |             type(cfg)))
14 | 
15 | 
16 | def build_sampler(cfg, **kwargs):
17 |     if isinstance(cfg, samplers.BaseSampler):
18 |         return cfg
19 |     elif isinstance(cfg, dict):
20 |         return mmcv.runner.obj_from_dict(cfg, samplers, default_args=kwargs)
21 |     elif isinstance(cfg, list):
22 |         flags = [isinstance(c, dict) for c in cfg]
23 |         if sum(flags) == len(cfg):
24 |             return [mmcv.runner.obj_from_dict(c, samplers, default_args=kwargs)
25 |                                 for c in cfg]
26 |         else:
27 |             raise TypeError('Invalid element in `list` type configs for sampler builder')
28 |     else:
29 |         raise TypeError('Invalid type {} for building a sampler'.format(
30 |             type(cfg)))
31 | 
32 | 
33 | def assign_and_sample(bboxes, gt_bboxes, gt_bboxes_ignore, gt_labels, cfg):
34 |     bbox_assigner = build_assigner(cfg.assigner)
35 |     bbox_sampler = build_sampler(cfg.sampler)
36 |     assign_result = bbox_assigner.assign(bboxes, gt_bboxes, gt_bboxes_ignore,
37 |                                          gt_labels)
38 |     sampling_result = bbox_sampler.sample(assign_result, bboxes, gt_bboxes,
39 |                                           gt_labels)
40 |     return assign_result, sampling_result
41 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/assigners/__init__.py:
--------------------------------------------------------------------------------
 1 | from .approx_max_iou_assigner import ApproxMaxIoUAssigner
 2 | from .assign_result import AssignResult
 3 | from .base_assigner import BaseAssigner
 4 | from .max_iou_assigner import MaxIoUAssigner
 5 | from .point_assigner import PointAssigner
 6 | 
 7 | __all__ = [
 8 |     'BaseAssigner', 'MaxIoUAssigner', 'ApproxMaxIoUAssigner', 'AssignResult',
 9 |     'PointAssigner'
10 | ]
11 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/assigners/assign_result.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | class AssignResult(object):
 5 | 
 6 |     def __init__(self, num_gts, gt_inds, max_overlaps, labels=None):
 7 |         self.num_gts = num_gts
 8 |         self.gt_inds = gt_inds
 9 |         self.max_overlaps = max_overlaps
10 |         self.labels = labels
11 | 
12 |     def add_gt_(self, gt_labels):
13 |         self_inds = torch.arange(
14 |             1, len(gt_labels) + 1, dtype=torch.long, device=gt_labels.device)
15 |         self.gt_inds = torch.cat([self_inds, self.gt_inds])
16 |         self.max_overlaps = torch.cat(
17 |             [self.max_overlaps.new_ones(self.num_gts), self.max_overlaps])
18 |         if self.labels is not None:
19 |             self.labels = torch.cat([gt_labels, self.labels])
20 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/assigners/base_assigner.py:
--------------------------------------------------------------------------------
1 | from abc import ABCMeta, abstractmethod
2 | 
3 | 
4 | class BaseAssigner(metaclass=ABCMeta):
5 | 
6 |     @abstractmethod
7 |     def assign(self, bboxes, gt_bboxes, gt_bboxes_ignore=None, gt_labels=None):
8 |         pass
9 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/bbox_target.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from ..utils import multi_apply
 4 | from .transforms import bbox2delta
 5 | 
 6 | 
 7 | def bbox_target(pos_bboxes_list,
 8 |                 neg_bboxes_list,
 9 |                 pos_gt_bboxes_list,
10 |                 pos_gt_labels_list,
11 |                 cfg,
12 |                 reg_classes=1,
13 |                 target_means=[.0, .0, .0, .0],
14 |                 target_stds=[1.0, 1.0, 1.0, 1.0],
15 |                 concat=True):
16 |     labels, label_weights, bbox_targets, bbox_weights = multi_apply(
17 |         bbox_target_single,
18 |         pos_bboxes_list,
19 |         neg_bboxes_list,
20 |         pos_gt_bboxes_list,
21 |         pos_gt_labels_list,
22 |         cfg=cfg,
23 |         reg_classes=reg_classes,
24 |         target_means=target_means,
25 |         target_stds=target_stds)
26 | 
27 |     if concat:
28 |         labels = torch.cat(labels, 0)
29 |         label_weights = torch.cat(label_weights, 0)
30 |         bbox_targets = torch.cat(bbox_targets, 0)
31 |         bbox_weights = torch.cat(bbox_weights, 0)
32 |     return labels, label_weights, bbox_targets, bbox_weights
33 | 
34 | 
35 | def bbox_target_single(pos_bboxes,
36 |                        neg_bboxes,
37 |                        pos_gt_bboxes,
38 |                        pos_gt_labels,
39 |                        cfg,
40 |                        reg_classes=1,
41 |                        target_means=[.0, .0, .0, .0],
42 |                        target_stds=[1.0, 1.0, 1.0, 1.0]):
43 |     num_pos = pos_bboxes.size(0)
44 |     num_neg = neg_bboxes.size(0)
45 |     num_samples = num_pos + num_neg
46 |     labels = pos_bboxes.new_zeros(num_samples, dtype=torch.long)
47 |     label_weights = pos_bboxes.new_zeros(num_samples)
48 |     bbox_targets = pos_bboxes.new_zeros(num_samples, 4)
49 |     bbox_weights = pos_bboxes.new_zeros(num_samples, 4)
50 |     if num_pos > 0:
51 |         labels[:num_pos] = pos_gt_labels
52 |         pos_weight = 1.0 if cfg.pos_weight <= 0 else cfg.pos_weight
53 |         label_weights[:num_pos] = pos_weight
54 |         pos_bbox_targets = bbox2delta(pos_bboxes, pos_gt_bboxes, target_means,
55 |                                       target_stds)
56 |         bbox_targets[:num_pos, :] = pos_bbox_targets
57 |         bbox_weights[:num_pos, :] = 1
58 |     if num_neg > 0:
59 |         label_weights[-num_neg:] = 1.0
60 | 
61 |     return labels, label_weights, bbox_targets, bbox_weights
62 | 
63 | 
64 | def expand_target(bbox_targets, bbox_weights, labels, num_classes):
65 |     bbox_targets_expand = bbox_targets.new_zeros(
66 |         (bbox_targets.size(0), 4 * num_classes))
67 |     bbox_weights_expand = bbox_weights.new_zeros(
68 |         (bbox_weights.size(0), 4 * num_classes))
69 |     for i in torch.nonzero(labels > 0).squeeze(-1):
70 |         start, end = labels[i] * 4, (labels[i] + 1) * 4
71 |         bbox_targets_expand[i, start:end] = bbox_targets[i, :]
72 |         bbox_weights_expand[i, start:end] = bbox_weights[i, :]
73 |     return bbox_targets_expand, bbox_weights_expand
74 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/geometry.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | def bbox_overlaps(bboxes1, bboxes2, mode='iou', is_aligned=False):
 5 |     """Calculate overlap between two set of bboxes.
 6 | 
 7 |     If ``is_aligned`` is ``False``, then calculate the ious between each bbox
 8 |     of bboxes1 and bboxes2, otherwise the ious between each aligned pair of
 9 |     bboxes1 and bboxes2.
10 | 
11 |     Args:
12 |         bboxes1 (Tensor): shape (m, 4)
13 |         bboxes2 (Tensor): shape (n, 4), if is_aligned is ``True``, then m and n
14 |             must be equal.
15 |         mode (str): "iou" (intersection over union) or iof (intersection over
16 |             foreground).
17 | 
18 |     Returns:
19 |         ious(Tensor): shape (m, n) if is_aligned == False else shape (m, 1)
20 |     """
21 | 
22 |     assert mode in ['iou', 'iof']
23 | 
24 |     rows = bboxes1.size(0)
25 |     cols = bboxes2.size(0)
26 |     if is_aligned:
27 |         assert rows == cols
28 | 
29 |     if rows * cols == 0:
30 |         return bboxes1.new(rows, 1) if is_aligned else bboxes1.new(rows, cols)
31 | 
32 |     if is_aligned:
33 |         lt = torch.max(bboxes1[:, :2], bboxes2[:, :2])  # [rows, 2]
34 |         rb = torch.min(bboxes1[:, 2:], bboxes2[:, 2:])  # [rows, 2]
35 | 
36 |         wh = (rb - lt + 1).clamp(min=0)  # [rows, 2]
37 |         overlap = wh[:, 0] * wh[:, 1]
38 |         area1 = (bboxes1[:, 2] - bboxes1[:, 0] + 1) * (
39 |             bboxes1[:, 3] - bboxes1[:, 1] + 1)
40 | 
41 |         if mode == 'iou':
42 |             area2 = (bboxes2[:, 2] - bboxes2[:, 0] + 1) * (
43 |                 bboxes2[:, 3] - bboxes2[:, 1] + 1)
44 |             ious = overlap / (area1 + area2 - overlap)
45 |         else:
46 |             ious = overlap / area1
47 |     else:
48 |         lt = torch.max(bboxes1[:, None, :2], bboxes2[:, :2])  # [rows, cols, 2]
49 |         rb = torch.min(bboxes1[:, None, 2:], bboxes2[:, 2:])  # [rows, cols, 2]
50 | 
51 |         wh = (rb - lt + 1).clamp(min=0)  # [rows, cols, 2]
52 |         overlap = wh[:, :, 0] * wh[:, :, 1]
53 |         area1 = (bboxes1[:, 2] - bboxes1[:, 0] + 1) * (
54 |             bboxes1[:, 3] - bboxes1[:, 1] + 1)
55 | 
56 |         if mode == 'iou':
57 |             area2 = (bboxes2[:, 2] - bboxes2[:, 0] + 1) * (
58 |                 bboxes2[:, 3] - bboxes2[:, 1] + 1)
59 |             ious = overlap / (area1[:, None] + area2 - overlap)
60 |         else:
61 |             ious = overlap / (area1[:, None])
62 | 
63 |     return ious
64 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/samplers/__init__.py:
--------------------------------------------------------------------------------
 1 | from .base_sampler import BaseSampler
 2 | from .combined_sampler import CombinedSampler
 3 | from .instance_balanced_pos_sampler import InstanceBalancedPosSampler
 4 | from .iou_balanced_neg_sampler import IoUBalancedNegSampler
 5 | from .ohem_sampler import OHEMSampler
 6 | from .ohem_hnl_sampler import OHEMHNLSampler
 7 | from .pseudo_sampler import PseudoSampler
 8 | from .random_sampler import RandomSampler
 9 | from .sampling_result import SamplingResult
10 | 
11 | __all__ = [
12 |     'BaseSampler', 'PseudoSampler', 'RandomSampler',
13 |     'InstanceBalancedPosSampler', 'IoUBalancedNegSampler', 'CombinedSampler',
14 |     'OHEMSampler', 'SamplingResult', 'OHEMHNLSampler'
15 | ]
16 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/samplers/base_sampler.py:
--------------------------------------------------------------------------------
 1 | from abc import ABCMeta, abstractmethod
 2 | 
 3 | import torch
 4 | 
 5 | from .sampling_result import SamplingResult
 6 | 
 7 | 
 8 | class BaseSampler(metaclass=ABCMeta):
 9 | 
10 |     def __init__(self,
11 |                  num,
12 |                  pos_fraction,
13 |                  neg_pos_ub=-1,
14 |                  add_gt_as_proposals=True,
15 |                  **kwargs):
16 |         self.num = num
17 |         self.pos_fraction = pos_fraction
18 |         self.neg_pos_ub = neg_pos_ub
19 |         self.add_gt_as_proposals = add_gt_as_proposals
20 |         self.pos_sampler = self
21 |         self.neg_sampler = self
22 | 
23 |     @abstractmethod
24 |     def _sample_pos(self, assign_result, num_expected, **kwargs):
25 |         pass
26 | 
27 |     @abstractmethod
28 |     def _sample_neg(self, assign_result, num_expected, **kwargs):
29 |         pass
30 | 
31 |     def sample(self,
32 |                assign_result,
33 |                bboxes,
34 |                gt_bboxes,
35 |                gt_labels=None,
36 |                **kwargs):
37 |         """Sample positive and negative bboxes.
38 | 
39 |         This is a simple implementation of bbox sampling given candidates,
40 |         assigning results and ground truth bboxes.
41 | 
42 |         Args:
43 |             assign_result (:obj:`AssignResult`): Bbox assigning results.
44 |             bboxes (Tensor): Boxes to be sampled from.
45 |             gt_bboxes (Tensor): Ground truth bboxes.
46 |             gt_labels (Tensor, optional): Class labels of ground truth bboxes.
47 | 
48 |         Returns:
49 |             :obj:`SamplingResult`: Sampling result.
50 |         """
51 |         bboxes = bboxes[:, :4]
52 | 
53 |         gt_flags = bboxes.new_zeros((bboxes.shape[0], ), dtype=torch.uint8)
54 |         if self.add_gt_as_proposals:
55 |             bboxes = torch.cat([gt_bboxes, bboxes], dim=0)
56 |             assign_result.add_gt_(gt_labels)
57 |             gt_ones = bboxes.new_ones(gt_bboxes.shape[0], dtype=torch.uint8)
58 |             gt_flags = torch.cat([gt_ones, gt_flags])
59 | 
60 |         num_expected_pos = int(self.num * self.pos_fraction)
61 |         pos_inds = self.pos_sampler._sample_pos(
62 |             assign_result, num_expected_pos, bboxes=bboxes, **kwargs)
63 |         # We found that sampled indices have duplicated items occasionally.
64 |         # (may be a bug of PyTorch)
65 |         pos_inds = pos_inds.unique()
66 |         num_sampled_pos = pos_inds.numel()
67 |         num_expected_neg = self.num - num_sampled_pos
68 |         if self.neg_pos_ub >= 0:
69 |             _pos = max(1, num_sampled_pos)
70 |             neg_upper_bound = int(self.neg_pos_ub * _pos)
71 |             if num_expected_neg > neg_upper_bound:
72 |                 num_expected_neg = neg_upper_bound
73 |         neg_inds = self.neg_sampler._sample_neg(
74 |             assign_result, num_expected_neg, bboxes=bboxes, **kwargs)
75 |         neg_inds = neg_inds.unique()
76 | 
77 |         return SamplingResult(pos_inds, neg_inds, bboxes, gt_bboxes,
78 |                               assign_result, gt_flags)
79 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/samplers/combined_sampler.py:
--------------------------------------------------------------------------------
 1 | from ..assign_sampling import build_sampler
 2 | from .base_sampler import BaseSampler
 3 | 
 4 | 
 5 | class CombinedSampler(BaseSampler):
 6 | 
 7 |     def __init__(self, pos_sampler, neg_sampler, **kwargs):
 8 |         super(CombinedSampler, self).__init__(**kwargs)
 9 |         self.pos_sampler = build_sampler(pos_sampler, **kwargs)
10 |         self.neg_sampler = build_sampler(neg_sampler, **kwargs)
11 | 
12 |     def _sample_pos(self, **kwargs):
13 |         raise NotImplementedError
14 | 
15 |     def _sample_neg(self, **kwargs):
16 |         raise NotImplementedError
17 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/samplers/instance_balanced_pos_sampler.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | 
 4 | from .random_sampler import RandomSampler
 5 | 
 6 | 
 7 | class InstanceBalancedPosSampler(RandomSampler):
 8 | 
 9 |     def _sample_pos(self, assign_result, num_expected, **kwargs):
10 |         pos_inds = torch.nonzero(assign_result.gt_inds > 0)
11 |         if pos_inds.numel() != 0:
12 |             pos_inds = pos_inds.squeeze(1)
13 |         if pos_inds.numel() <= num_expected:
14 |             return pos_inds
15 |         else:
16 |             unique_gt_inds = assign_result.gt_inds[pos_inds].unique()
17 |             num_gts = len(unique_gt_inds)
18 |             num_per_gt = int(round(num_expected / float(num_gts)) + 1)
19 |             sampled_inds = []
20 |             for i in unique_gt_inds:
21 |                 inds = torch.nonzero(assign_result.gt_inds == i.item())
22 |                 if inds.numel() != 0:
23 |                     inds = inds.squeeze(1)
24 |                 else:
25 |                     continue
26 |                 if len(inds) > num_per_gt:
27 |                     inds = self.random_choice(inds, num_per_gt)
28 |                 sampled_inds.append(inds)
29 |             sampled_inds = torch.cat(sampled_inds)
30 |             if len(sampled_inds) < num_expected:
31 |                 num_extra = num_expected - len(sampled_inds)
32 |                 extra_inds = np.array(
33 |                     list(set(pos_inds.cpu()) - set(sampled_inds.cpu())))
34 |                 if len(extra_inds) > num_extra:
35 |                     extra_inds = self.random_choice(extra_inds, num_extra)
36 |                 extra_inds = torch.from_numpy(extra_inds).to(
37 |                     assign_result.gt_inds.device).long()
38 |                 sampled_inds = torch.cat([sampled_inds, extra_inds])
39 |             elif len(sampled_inds) > num_expected:
40 |                 sampled_inds = self.random_choice(sampled_inds, num_expected)
41 |             return sampled_inds
42 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/samplers/ohem_sampler.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from ..transforms import bbox2roi
 4 | from .base_sampler import BaseSampler
 5 | 
 6 | 
 7 | class OHEMSampler(BaseSampler):
 8 |     """
 9 |     Online Hard Example Mining Sampler described in [1]_.
10 | 
11 |     References:
12 |         .. [1] https://arxiv.org/pdf/1604.03540.pdf
13 |     """
14 | 
15 |     def __init__(self,
16 |                  num,
17 |                  pos_fraction,
18 |                  context,
19 |                  neg_pos_ub=-1,
20 |                  add_gt_as_proposals=True,
21 |                  **kwargs):
22 |         super(OHEMSampler, self).__init__(num, pos_fraction, neg_pos_ub,
23 |                                           add_gt_as_proposals)
24 |         if not hasattr(context, 'num_stages'):
25 |             self.bbox_roi_extractor = context.bbox_roi_extractor
26 |             self.bbox_head = context.bbox_head
27 |         else:
28 |             self.bbox_roi_extractor = context.bbox_roi_extractor[
29 |                 context.current_stage]
30 |             self.bbox_head = context.bbox_head[context.current_stage]
31 | 
32 |     def hard_mining(self, inds, num_expected, bboxes, labels, feats):
33 |         with torch.no_grad():
34 |             rois = bbox2roi([bboxes])
35 |             bbox_feats = self.bbox_roi_extractor(
36 |                 feats[:self.bbox_roi_extractor.num_inputs], rois)
37 |             cls_score, _ = self.bbox_head(bbox_feats)
38 |             loss = self.bbox_head.loss(
39 |                 cls_score=cls_score,
40 |                 bbox_pred=None,
41 |                 labels=labels,
42 |                 label_weights=cls_score.new_ones(cls_score.size(0)),
43 |                 bbox_targets=None,
44 |                 bbox_weights=None,
45 |                 reduction_override='none')['loss_cls']
46 |             _, topk_loss_inds = loss.topk(num_expected)
47 |         return inds[topk_loss_inds]
48 | 
49 |     def _sample_pos(self,
50 |                     assign_result,
51 |                     num_expected,
52 |                     bboxes=None,
53 |                     feats=None,
54 |                     **kwargs):
55 |         # Sample some hard positive samples
56 |         pos_inds = torch.nonzero(assign_result.gt_inds > 0)
57 |         if pos_inds.numel() != 0:
58 |             pos_inds = pos_inds.squeeze(1)
59 |         if pos_inds.numel() <= num_expected:
60 |             return pos_inds
61 |         else:
62 |             return self.hard_mining(pos_inds, num_expected, bboxes[pos_inds],
63 |                                     assign_result.labels[pos_inds], feats)
64 | 
65 |     def _sample_neg(self,
66 |                     assign_result,
67 |                     num_expected,
68 |                     bboxes=None,
69 |                     feats=None,
70 |                     **kwargs):
71 |         # Sample some hard negative samples
72 |         neg_inds = torch.nonzero(assign_result.gt_inds == 0)
73 |         if neg_inds.numel() != 0:
74 |             neg_inds = neg_inds.squeeze(1)
75 |         if len(neg_inds) <= num_expected:
76 |             return neg_inds
77 |         else:
78 |             return self.hard_mining(neg_inds, num_expected, bboxes[neg_inds],
79 |                                     assign_result.labels[neg_inds], feats)
80 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/samplers/pseudo_sampler.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from .base_sampler import BaseSampler
 4 | from .sampling_result import SamplingResult
 5 | 
 6 | 
 7 | class PseudoSampler(BaseSampler):
 8 | 
 9 |     def __init__(self, **kwargs):
10 |         pass
11 | 
12 |     def _sample_pos(self, **kwargs):
13 |         raise NotImplementedError
14 | 
15 |     def _sample_neg(self, **kwargs):
16 |         raise NotImplementedError
17 | 
18 |     def sample(self, assign_result, bboxes, gt_bboxes, **kwargs):
19 |         pos_inds = torch.nonzero(
20 |             assign_result.gt_inds > 0).squeeze(-1).unique()
21 |         neg_inds = torch.nonzero(
22 |             assign_result.gt_inds == 0).squeeze(-1).unique()
23 |         gt_flags = bboxes.new_zeros(bboxes.shape[0], dtype=torch.uint8)
24 |         sampling_result = SamplingResult(pos_inds, neg_inds, bboxes, gt_bboxes,
25 |                                          assign_result, gt_flags)
26 |         return sampling_result
27 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/samplers/random_sampler.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | 
 4 | from .base_sampler import BaseSampler
 5 | 
 6 | 
 7 | class RandomSampler(BaseSampler):
 8 | 
 9 |     def __init__(self,
10 |                  num,
11 |                  pos_fraction,
12 |                  neg_pos_ub=-1,
13 |                  add_gt_as_proposals=True,
14 |                  **kwargs):
15 |         super(RandomSampler, self).__init__(num, pos_fraction, neg_pos_ub,
16 |                                             add_gt_as_proposals)
17 | 
18 |     @staticmethod
19 |     def random_choice(gallery, num):
20 |         """Random select some elements from the gallery.
21 | 
22 |         It seems that Pytorch's implementation is slower than numpy so we use
23 |         numpy to randperm the indices.
24 |         """
25 |         assert len(gallery) >= num
26 |         if isinstance(gallery, list):
27 |             gallery = np.array(gallery)
28 |         cands = np.arange(len(gallery))
29 |         np.random.shuffle(cands)
30 |         rand_inds = cands[:num]
31 |         if not isinstance(gallery, np.ndarray):
32 |             rand_inds = torch.from_numpy(rand_inds).long().to(gallery.device)
33 |         return gallery[rand_inds]
34 | 
35 |     def _sample_pos(self, assign_result, num_expected, **kwargs):
36 |         """Randomly sample some positive samples."""
37 |         pos_inds = torch.nonzero(assign_result.gt_inds > 0)
38 |         if pos_inds.numel() != 0:
39 |             pos_inds = pos_inds.squeeze(1)
40 |         if pos_inds.numel() <= num_expected:
41 |             return pos_inds
42 |         else:
43 |             return self.random_choice(pos_inds, num_expected)
44 | 
45 |     def _sample_neg(self, assign_result, num_expected, **kwargs):
46 |         """Randomly sample some negative samples."""
47 |         neg_inds = torch.nonzero(assign_result.gt_inds == 0)
48 |         if neg_inds.numel() != 0:
49 |             neg_inds = neg_inds.squeeze(1)
50 |         if len(neg_inds) <= num_expected:
51 |             return neg_inds
52 |         else:
53 |             return self.random_choice(neg_inds, num_expected)
54 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/samplers/sampling_result.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | class SamplingResult(object):
 5 | 
 6 |     def __init__(self, pos_inds, neg_inds, bboxes, gt_bboxes, assign_result,
 7 |                  gt_flags):
 8 |         self.pos_inds = pos_inds
 9 |         self.neg_inds = neg_inds
10 |         self.pos_bboxes = bboxes[pos_inds]
11 |         self.neg_bboxes = bboxes[neg_inds]
12 |         self.pos_is_gt = gt_flags[pos_inds]
13 | 
14 |         self.num_gts = gt_bboxes.shape[0]
15 |         self.pos_assigned_gt_inds = assign_result.gt_inds[pos_inds] - 1
16 |         self.pos_gt_bboxes = gt_bboxes[self.pos_assigned_gt_inds, :]
17 |         if assign_result.labels is not None:
18 |             self.pos_gt_labels = assign_result.labels[pos_inds]
19 |         else:
20 |             self.pos_gt_labels = None
21 | 
22 |     @property
23 |     def bboxes(self):
24 |         return torch.cat([self.pos_bboxes, self.neg_bboxes])
25 | 


--------------------------------------------------------------------------------
/mmdet/core/evaluation/__init__.py:
--------------------------------------------------------------------------------
 1 | from .class_names import (coco_classes, dataset_aliases, get_classes,
 2 |                           imagenet_det_classes, imagenet_vid_classes,
 3 |                           voc_classes)
 4 | from .coco_utils import coco_eval, fast_eval_recall, results2json
 5 | from .eval_hooks import (CocoDistEvalmAPHook, CocoDistEvalRecallHook,
 6 |                          DistEvalHook, DistEvalmAPHook)
 7 | from .mean_ap import average_precision, eval_map, print_map_summary, analysis_map
 8 | from .recall import (eval_recalls, plot_iou_recall, plot_num_recall,
 9 |                      print_recall_summary)
10 | 
11 | __all__ = [
12 |     'voc_classes', 'imagenet_det_classes', 'imagenet_vid_classes',
13 |     'coco_classes', 'dataset_aliases', 'get_classes', 'coco_eval',
14 |     'fast_eval_recall', 'results2json', 'DistEvalHook', 'DistEvalmAPHook',
15 |     'CocoDistEvalRecallHook', 'CocoDistEvalmAPHook', 'average_precision',
16 |     'eval_map', 'print_map_summary', 'eval_recalls', 'print_recall_summary',
17 |     'plot_num_recall', 'plot_iou_recall', 'analysis_map'
18 | ]
19 | 


--------------------------------------------------------------------------------
/mmdet/core/evaluation/bbox_overlaps.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | def bbox_overlaps(bboxes1, bboxes2, mode='iou'):
 5 |     """Calculate the ious between each bbox of bboxes1 and bboxes2.
 6 | 
 7 |     Args:
 8 |         bboxes1(ndarray): shape (n, 4)
 9 |         bboxes2(ndarray): shape (k, 4)
10 |         mode(str): iou (intersection over union) or iof (intersection
11 |             over foreground)
12 | 
13 |     Returns:
14 |         ious(ndarray): shape (n, k)
15 |     """
16 | 
17 |     assert mode in ['iou', 'iof']
18 | 
19 |     bboxes1 = bboxes1.astype(np.float32)
20 |     bboxes2 = bboxes2.astype(np.float32)
21 |     rows = bboxes1.shape[0]
22 |     cols = bboxes2.shape[0]
23 |     ious = np.zeros((rows, cols), dtype=np.float32)
24 |     if rows * cols == 0:
25 |         return ious
26 |     exchange = False
27 |     if bboxes1.shape[0] > bboxes2.shape[0]:
28 |         bboxes1, bboxes2 = bboxes2, bboxes1
29 |         ious = np.zeros((cols, rows), dtype=np.float32)
30 |         exchange = True
31 |     area1 = (bboxes1[:, 2] - bboxes1[:, 0] + 1) * (
32 |         bboxes1[:, 3] - bboxes1[:, 1] + 1)
33 |     area2 = (bboxes2[:, 2] - bboxes2[:, 0] + 1) * (
34 |         bboxes2[:, 3] - bboxes2[:, 1] + 1)
35 |     for i in range(bboxes1.shape[0]):
36 |         x_start = np.maximum(bboxes1[i, 0], bboxes2[:, 0])
37 |         y_start = np.maximum(bboxes1[i, 1], bboxes2[:, 1])
38 |         x_end = np.minimum(bboxes1[i, 2], bboxes2[:, 2])
39 |         y_end = np.minimum(bboxes1[i, 3], bboxes2[:, 3])
40 |         overlap = np.maximum(x_end - x_start + 1, 0) * np.maximum(
41 |             y_end - y_start + 1, 0)
42 |         if mode == 'iou':
43 |             union = area1[i] + area2 - overlap
44 |         else:
45 |             union = area1[i] if not exchange else area2
46 |         ious[i, :] = overlap / union
47 |     if exchange:
48 |         ious = ious.T
49 |     return ious
50 | 


--------------------------------------------------------------------------------
/mmdet/core/fp16/__init__.py:
--------------------------------------------------------------------------------
1 | from .decorators import auto_fp16, force_fp32
2 | from .hooks import Fp16OptimizerHook, wrap_fp16_model
3 | 
4 | __all__ = ['auto_fp16', 'force_fp32', 'Fp16OptimizerHook', 'wrap_fp16_model']
5 | 


--------------------------------------------------------------------------------
/mmdet/core/fp16/utils.py:
--------------------------------------------------------------------------------
 1 | from collections import abc
 2 | 
 3 | import numpy as np
 4 | import torch
 5 | 
 6 | 
 7 | def cast_tensor_type(inputs, src_type, dst_type):
 8 |     if isinstance(inputs, torch.Tensor):
 9 |         return inputs.to(dst_type)
10 |     elif isinstance(inputs, str):
11 |         return inputs
12 |     elif isinstance(inputs, np.ndarray):
13 |         return inputs
14 |     elif isinstance(inputs, abc.Mapping):
15 |         return type(inputs)({
16 |             k: cast_tensor_type(v, src_type, dst_type)
17 |             for k, v in inputs.items()
18 |         })
19 |     elif isinstance(inputs, abc.Iterable):
20 |         return type(inputs)(
21 |             cast_tensor_type(item, src_type, dst_type) for item in inputs)
22 |     else:
23 |         return inputs
24 | 


--------------------------------------------------------------------------------
/mmdet/core/mask/__init__.py:
--------------------------------------------------------------------------------
1 | from .mask_target import mask_target
2 | from .utils import split_combined_polys
3 | 
4 | __all__ = ['split_combined_polys', 'mask_target']
5 | 


--------------------------------------------------------------------------------
/mmdet/core/mask/mask_target.py:
--------------------------------------------------------------------------------
 1 | import mmcv
 2 | import numpy as np
 3 | import torch
 4 | from torch.nn.modules.utils import _pair
 5 | 
 6 | 
 7 | def mask_target(pos_proposals_list, pos_assigned_gt_inds_list, gt_masks_list,
 8 |                 cfg):
 9 |     cfg_list = [cfg for _ in range(len(pos_proposals_list))]
10 |     mask_targets = map(mask_target_single, pos_proposals_list,
11 |                        pos_assigned_gt_inds_list, gt_masks_list, cfg_list)
12 |     mask_targets = torch.cat(list(mask_targets))
13 |     return mask_targets
14 | 
15 | 
16 | def mask_target_single(pos_proposals, pos_assigned_gt_inds, gt_masks, cfg):
17 |     mask_size = _pair(cfg.mask_size)
18 |     num_pos = pos_proposals.size(0)
19 |     mask_targets = []
20 |     if num_pos > 0:
21 |         proposals_np = pos_proposals.cpu().numpy()
22 |         _, maxh, maxw = gt_masks.shape
23 |         proposals_np[:, [0, 2]] = np.clip(proposals_np[:, [0, 2]], 0, maxw - 1)
24 |         proposals_np[:, [1, 3]] = np.clip(proposals_np[:, [1, 3]], 0, maxh - 1)
25 |         pos_assigned_gt_inds = pos_assigned_gt_inds.cpu().numpy()
26 |         for i in range(num_pos):
27 |             gt_mask = gt_masks[pos_assigned_gt_inds[i]]
28 |             bbox = proposals_np[i, :].astype(np.int32)
29 |             x1, y1, x2, y2 = bbox
30 |             w = np.maximum(x2 - x1 + 1, 1)
31 |             h = np.maximum(y2 - y1 + 1, 1)
32 |             # mask is uint8 both before and after resizing
33 |             # mask_size (h, w) to (w, h)
34 |             target = mmcv.imresize(gt_mask[y1:y1 + h, x1:x1 + w],
35 |                                    mask_size[::-1])
36 |             mask_targets.append(target)
37 |         mask_targets = torch.from_numpy(np.stack(mask_targets)).float().to(
38 |             pos_proposals.device)
39 |     else:
40 |         mask_targets = pos_proposals.new_zeros((0, ) + mask_size)
41 |     return mask_targets
42 | 


--------------------------------------------------------------------------------
/mmdet/core/mask/utils.py:
--------------------------------------------------------------------------------
 1 | import mmcv
 2 | 
 3 | 
 4 | def split_combined_polys(polys, poly_lens, polys_per_mask):
 5 |     """Split the combined 1-D polys into masks.
 6 | 
 7 |     A mask is represented as a list of polys, and a poly is represented as
 8 |     a 1-D array. In dataset, all masks are concatenated into a single 1-D
 9 |     tensor. Here we need to split the tensor into original representations.
10 | 
11 |     Args:
12 |         polys (list): a list (length = image num) of 1-D tensors
13 |         poly_lens (list): a list (length = image num) of poly length
14 |         polys_per_mask (list): a list (length = image num) of poly number
15 |             of each mask
16 | 
17 |     Returns:
18 |         list: a list (length = image num) of list (length = mask num) of
19 |             list (length = poly num) of numpy array
20 |     """
21 |     mask_polys_list = []
22 |     for img_id in range(len(polys)):
23 |         polys_single = polys[img_id]
24 |         polys_lens_single = poly_lens[img_id].tolist()
25 |         polys_per_mask_single = polys_per_mask[img_id].tolist()
26 | 
27 |         split_polys = mmcv.slice_list(polys_single, polys_lens_single)
28 |         mask_polys = mmcv.slice_list(split_polys, polys_per_mask_single)
29 |         mask_polys_list.append(mask_polys)
30 |     return mask_polys_list
31 | 


--------------------------------------------------------------------------------
/mmdet/core/post_processing/__init__.py:
--------------------------------------------------------------------------------
1 | from .bbox_nms import multiclass_nms
2 | from .merge_augs import (merge_aug_bboxes, merge_aug_masks,
3 |                          merge_aug_proposals, merge_aug_scores)
4 | 
5 | __all__ = [
6 |     'multiclass_nms', 'merge_aug_proposals', 'merge_aug_bboxes',
7 |     'merge_aug_scores', 'merge_aug_masks'
8 | ]
9 | 


--------------------------------------------------------------------------------
/mmdet/core/post_processing/bbox_nms.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from mmdet.ops.nms import nms_wrapper
 4 | 
 5 | 
 6 | def multiclass_nms(multi_bboxes,
 7 |                    multi_scores,
 8 |                    score_thr,
 9 |                    nms_cfg,
10 |                    max_num=-1,
11 |                    score_factors=None):
12 |     """NMS for multi-class bboxes.
13 | 
14 |     Args:
15 |         multi_bboxes (Tensor): shape (n, #class*4) or (n, 4)
16 |         multi_scores (Tensor): shape (n, #class), where the 0th column
17 |             contains scores of the background class, but this will be ignored.
18 |         score_thr (float): bbox threshold, bboxes with scores lower than it
19 |             will not be considered.
20 |         nms_thr (float): NMS IoU threshold
21 |         max_num (int): if there are more than max_num bboxes after NMS,
22 |             only top max_num will be kept.
23 |         score_factors (Tensor): The factors multiplied to scores before
24 |             applying NMS
25 | 
26 |     Returns:
27 |         tuple: (bboxes, labels), tensors of shape (k, 5) and (k, 1). Labels
28 |             are 0-based.
29 |     """
30 |     num_classes = multi_scores.shape[1]
31 |     bboxes, labels = [], []
32 |     nms_cfg_ = nms_cfg.copy()
33 |     nms_type = nms_cfg_.pop('type', 'nms')
34 |     nms_op = getattr(nms_wrapper, nms_type)
35 |     for i in range(1, num_classes):
36 |         cls_inds = multi_scores[:, i] > score_thr
37 |         if not cls_inds.any():
38 |             continue
39 |         # get bboxes and scores of this class
40 |         if multi_bboxes.shape[1] == 4:
41 |             _bboxes = multi_bboxes[cls_inds, :]
42 |         else:
43 |             _bboxes = multi_bboxes[cls_inds, i * 4:(i + 1) * 4]
44 |         _scores = multi_scores[cls_inds, i]
45 |         if score_factors is not None:
46 |             _scores *= score_factors[cls_inds]
47 |         cls_dets = torch.cat([_bboxes, _scores[:, None]], dim=1)
48 |         cls_dets, _ = nms_op(cls_dets, **nms_cfg_)
49 |         cls_labels = multi_bboxes.new_full((cls_dets.shape[0], ),
50 |                                            i - 1,
51 |                                            dtype=torch.long)
52 |         bboxes.append(cls_dets)
53 |         labels.append(cls_labels)
54 |     if bboxes:
55 |         bboxes = torch.cat(bboxes)
56 |         labels = torch.cat(labels)
57 |         if bboxes.shape[0] > max_num:
58 |             _, inds = bboxes[:, -1].sort(descending=True)
59 |             inds = inds[:max_num]
60 |             bboxes = bboxes[inds]
61 |             labels = labels[inds]
62 |     else:
63 |         bboxes = multi_bboxes.new_zeros((0, 5))
64 |         labels = multi_bboxes.new_zeros((0, ), dtype=torch.long)
65 | 
66 |     return bboxes, labels
67 | 


--------------------------------------------------------------------------------
/mmdet/core/post_processing/merge_augs.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch
  3 | 
  4 | from mmdet.ops import nms
  5 | from ..bbox import bbox_mapping_back
  6 | 
  7 | 
  8 | def merge_aug_proposals(aug_proposals, img_metas, rpn_test_cfg):
  9 |     """Merge augmented proposals (multiscale, flip, etc.)
 10 | 
 11 |     Args:
 12 |         aug_proposals (list[Tensor]): proposals from different testing
 13 |             schemes, shape (n, 5). Note that they are not rescaled to the
 14 |             original image size.
 15 | 
 16 |         img_metas (list[dict]): list of image info dict where each dict has:
 17 |             'img_shape', 'scale_factor', 'flip', and my also contain
 18 |             'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'.
 19 |             For details on the values of these keys see
 20 |             `mmdet/datasets/pipelines/formatting.py:Collect`.
 21 | 
 22 |         rpn_test_cfg (dict): rpn test config.
 23 | 
 24 |     Returns:
 25 |         Tensor: shape (n, 4), proposals corresponding to original image scale.
 26 |     """
 27 |     recovered_proposals = []
 28 |     for proposals, img_info in zip(aug_proposals, img_metas):
 29 |         img_shape = img_info['img_shape']
 30 |         scale_factor = img_info['scale_factor']
 31 |         flip = img_info['flip']
 32 |         _proposals = proposals.clone()
 33 |         _proposals[:, :4] = bbox_mapping_back(_proposals[:, :4], img_shape,
 34 |                                               scale_factor, flip)
 35 |         recovered_proposals.append(_proposals)
 36 |     aug_proposals = torch.cat(recovered_proposals, dim=0)
 37 |     merged_proposals, _ = nms(aug_proposals, rpn_test_cfg.nms_thr)
 38 |     scores = merged_proposals[:, 4]
 39 |     _, order = scores.sort(0, descending=True)
 40 |     num = min(rpn_test_cfg.max_num, merged_proposals.shape[0])
 41 |     order = order[:num]
 42 |     merged_proposals = merged_proposals[order, :]
 43 |     return merged_proposals
 44 | 
 45 | 
 46 | def merge_aug_bboxes(aug_bboxes, aug_scores, img_metas, rcnn_test_cfg):
 47 |     """Merge augmented detection bboxes and scores.
 48 | 
 49 |     Args:
 50 |         aug_bboxes (list[Tensor]): shape (n, 4*#class)
 51 |         aug_scores (list[Tensor] or None): shape (n, #class)
 52 |         img_shapes (list[Tensor]): shape (3, ).
 53 |         rcnn_test_cfg (dict): rcnn test config.
 54 | 
 55 |     Returns:
 56 |         tuple: (bboxes, scores)
 57 |     """
 58 |     recovered_bboxes = []
 59 |     for bboxes, img_info in zip(aug_bboxes, img_metas):
 60 |         img_shape = img_info[0]['img_shape']
 61 |         scale_factor = img_info[0]['scale_factor']
 62 |         flip = img_info[0]['flip']
 63 |         bboxes = bbox_mapping_back(bboxes, img_shape, scale_factor, flip)
 64 |         recovered_bboxes.append(bboxes)
 65 |     bboxes = torch.stack(recovered_bboxes).mean(dim=0)
 66 |     if aug_scores is None:
 67 |         return bboxes
 68 |     else:
 69 |         scores = torch.stack(aug_scores).mean(dim=0)
 70 |         return bboxes, scores
 71 | 
 72 | 
 73 | def merge_aug_scores(aug_scores):
 74 |     """Merge augmented bbox scores."""
 75 |     if isinstance(aug_scores[0], torch.Tensor):
 76 |         return torch.mean(torch.stack(aug_scores), dim=0)
 77 |     else:
 78 |         return np.mean(aug_scores, axis=0)
 79 | 
 80 | 
 81 | def merge_aug_masks(aug_masks, img_metas, rcnn_test_cfg, weights=None):
 82 |     """Merge augmented mask prediction.
 83 | 
 84 |     Args:
 85 |         aug_masks (list[ndarray]): shape (n, #class, h, w)
 86 |         img_shapes (list[ndarray]): shape (3, ).
 87 |         rcnn_test_cfg (dict): rcnn test config.
 88 | 
 89 |     Returns:
 90 |         tuple: (bboxes, scores)
 91 |     """
 92 |     recovered_masks = [
 93 |         mask if not img_info[0]['flip'] else mask[..., ::-1]
 94 |         for mask, img_info in zip(aug_masks, img_metas)
 95 |     ]
 96 |     if weights is None:
 97 |         merged_masks = np.mean(recovered_masks, axis=0)
 98 |     else:
 99 |         merged_masks = np.average(
100 |             np.array(recovered_masks), axis=0, weights=np.array(weights))
101 |     return merged_masks
102 | 


--------------------------------------------------------------------------------
/mmdet/core/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .dist_utils import DistOptimizerHook, allreduce_grads
2 | from .misc import multi_apply, tensor2imgs, unmap
3 | 
4 | __all__ = [
5 |     'allreduce_grads', 'DistOptimizerHook', 'tensor2imgs', 'unmap',
6 |     'multi_apply'
7 | ]
8 | 


--------------------------------------------------------------------------------
/mmdet/core/utils/dist_utils.py:
--------------------------------------------------------------------------------
 1 | from collections import OrderedDict
 2 | 
 3 | import torch.distributed as dist
 4 | from mmcv.runner import OptimizerHook
 5 | from torch._utils import (_flatten_dense_tensors, _take_tensors,
 6 |                           _unflatten_dense_tensors)
 7 | 
 8 | 
 9 | def _allreduce_coalesced(tensors, world_size, bucket_size_mb=-1):
10 |     if bucket_size_mb > 0:
11 |         bucket_size_bytes = bucket_size_mb * 1024 * 1024
12 |         buckets = _take_tensors(tensors, bucket_size_bytes)
13 |     else:
14 |         buckets = OrderedDict()
15 |         for tensor in tensors:
16 |             tp = tensor.type()
17 |             if tp not in buckets:
18 |                 buckets[tp] = []
19 |             buckets[tp].append(tensor)
20 |         buckets = buckets.values()
21 | 
22 |     for bucket in buckets:
23 |         flat_tensors = _flatten_dense_tensors(bucket)
24 |         dist.all_reduce(flat_tensors)
25 |         flat_tensors.div_(world_size)
26 |         for tensor, synced in zip(
27 |                 bucket, _unflatten_dense_tensors(flat_tensors, bucket)):
28 |             tensor.copy_(synced)
29 | 
30 | 
31 | def allreduce_grads(params, coalesce=True, bucket_size_mb=-1):
32 |     grads = [
33 |         param.grad.data for param in params
34 |         if param.requires_grad and param.grad is not None
35 |     ]
36 |     world_size = dist.get_world_size()
37 |     if coalesce:
38 |         _allreduce_coalesced(grads, world_size, bucket_size_mb)
39 |     else:
40 |         for tensor in grads:
41 |             dist.all_reduce(tensor.div_(world_size))
42 | 
43 | 
44 | class DistOptimizerHook(OptimizerHook):
45 | 
46 |     def __init__(self, grad_clip=None, coalesce=True, bucket_size_mb=-1):
47 |         self.grad_clip = grad_clip
48 |         self.coalesce = coalesce
49 |         self.bucket_size_mb = bucket_size_mb
50 | 
51 |     def after_train_iter(self, runner):
52 |         runner.optimizer.zero_grad()
53 |         runner.outputs['loss'].backward()
54 |         allreduce_grads(runner.model.parameters(), self.coalesce,
55 |                         self.bucket_size_mb)
56 |         if self.grad_clip is not None:
57 |             self.clip_grads(runner.model.parameters())
58 |         runner.optimizer.step()
59 | 


--------------------------------------------------------------------------------
/mmdet/core/utils/misc.py:
--------------------------------------------------------------------------------
 1 | from functools import partial
 2 | 
 3 | import mmcv
 4 | import numpy as np
 5 | from six.moves import map, zip
 6 | 
 7 | 
 8 | def tensor2imgs(tensor, mean=(0, 0, 0), std=(1, 1, 1), to_rgb=True):
 9 |     num_imgs = tensor.size(0)
10 |     mean = np.array(mean, dtype=np.float32)
11 |     std = np.array(std, dtype=np.float32)
12 |     imgs = []
13 |     for img_id in range(num_imgs):
14 |         img = tensor[img_id, ...].cpu().numpy().transpose(1, 2, 0)
15 |         img = mmcv.imdenormalize(
16 |             img, mean, std, to_bgr=to_rgb).astype(np.uint8)
17 |         imgs.append(np.ascontiguousarray(img))
18 |     return imgs
19 | 
20 | 
21 | def multi_apply(func, *args, **kwargs):
22 |     pfunc = partial(func, **kwargs) if kwargs else func
23 |     map_results = map(pfunc, *args)
24 |     return tuple(map(list, zip(*map_results)))
25 | 
26 | 
27 | def unmap(data, count, inds, fill=0):
28 |     """ Unmap a subset of item (data) back to the original set of items (of
29 |     size count) """
30 |     if data.dim() == 1:
31 |         ret = data.new_full((count, ), fill)
32 |         ret[inds] = data
33 |     else:
34 |         new_size = (count, ) + data.size()[1:]
35 |         ret = data.new_full(new_size, fill)
36 |         ret[inds, :] = data
37 |     return ret
38 | 


--------------------------------------------------------------------------------
/mmdet/datasets/__init__.py:
--------------------------------------------------------------------------------
 1 | from .builder import build_dataset
 2 | from .cityscapes import CityscapesDataset
 3 | from .coco import CocoDataset
 4 | from .custom import CustomDataset
 5 | from .dataset_wrappers import ConcatDataset, RepeatDataset
 6 | from .loader import DistributedGroupSampler, GroupSampler, build_dataloader
 7 | from .registry import DATASETS
 8 | from .voc import VOCDataset
 9 | from .imagenet_vid import VIDDataset
10 | from .imagenet_vid_sequence import VIDSeqDataset
11 | from .imagenet_det_img import DETIMGDataset
12 | from .imagenet_det_sequence import DETSeqDataset
13 | from .wider_face import WIDERFaceDataset
14 | from .xml_style import XMLDataset
15 | 
16 | __all__ = [
17 |     'CustomDataset', 'XMLDataset', 'CocoDataset', 'VOCDataset', 'VIDDataset', 'DETIMGDataset',
18 |     'CityscapesDataset', 'GroupSampler', 'DistributedGroupSampler',
19 |     'build_dataloader', 'ConcatDataset', 'RepeatDataset', 'WIDERFaceDataset',
20 |     'DATASETS', 'build_dataset','VIDSeqDataset','DETSeqDataset'
21 | ]
22 | 


--------------------------------------------------------------------------------
/mmdet/datasets/builder.py:
--------------------------------------------------------------------------------
 1 | import copy
 2 | 
 3 | from mmdet.utils import build_from_cfg
 4 | from .dataset_wrappers import ConcatDataset, RepeatDataset
 5 | from .registry import DATASETS
 6 | 
 7 | 
 8 | def _concat_dataset(cfg, default_args=None):
 9 |     ann_files = cfg['ann_file']
10 |     img_prefixes = cfg.get('img_prefix', None)
11 |     seg_prefixes = cfg.get('seg_prefixes', None)
12 |     proposal_files = cfg.get('proposal_file', None)
13 | 
14 |     datasets = []
15 |     num_dset = len(ann_files)
16 |     for i in range(num_dset):
17 |         data_cfg = copy.deepcopy(cfg)
18 |         data_cfg['ann_file'] = ann_files[i]
19 |         if isinstance(img_prefixes, (list, tuple)):
20 |             data_cfg['img_prefix'] = img_prefixes[i]
21 |         if isinstance(seg_prefixes, (list, tuple)):
22 |             data_cfg['seg_prefix'] = seg_prefixes[i]
23 |         if isinstance(proposal_files, (list, tuple)):
24 |             data_cfg['proposal_file'] = proposal_files[i]
25 |         datasets.append(build_dataset(data_cfg, default_args))
26 | 
27 |     return ConcatDataset(datasets)
28 | 
29 | 
30 | def build_dataset(cfg, default_args=None):
31 |     if isinstance(cfg, (list, tuple)):
32 |         dataset = ConcatDataset([build_dataset(c, default_args) for c in cfg])
33 |     elif cfg['type'] == 'RepeatDataset':
34 |         dataset = RepeatDataset(
35 |             build_dataset(cfg['dataset'], default_args), cfg['times'])
36 |     elif isinstance(cfg['ann_file'], (list, tuple)):
37 |         dataset = _concat_dataset(cfg, default_args)
38 |     else:
39 |         dataset = build_from_cfg(cfg, DATASETS, default_args)
40 | 
41 |     return dataset
42 | 


--------------------------------------------------------------------------------
/mmdet/datasets/cityscapes.py:
--------------------------------------------------------------------------------
 1 | from .coco import CocoDataset
 2 | from .registry import DATASETS
 3 | 
 4 | 
 5 | @DATASETS.register_module
 6 | class CityscapesDataset(CocoDataset):
 7 | 
 8 |     CLASSES = ('person', 'rider', 'car', 'truck', 'bus', 'train', 'motorcycle',
 9 |                'bicycle')
10 | 


--------------------------------------------------------------------------------
/mmdet/datasets/dataset_wrappers.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from torch.utils.data.dataset import ConcatDataset as _ConcatDataset
 3 | 
 4 | from .registry import DATASETS
 5 | 
 6 | 
 7 | @DATASETS.register_module
 8 | class ConcatDataset(_ConcatDataset):
 9 |     """A wrapper of concatenated dataset.
10 | 
11 |     Same as :obj:`torch.utils.data.dataset.ConcatDataset`, but
12 |     concat the group flag for image aspect ratio.
13 | 
14 |     Args:
15 |         datasets (list[:obj:`Dataset`]): A list of datasets.
16 |     """
17 | 
18 |     def __init__(self, datasets):
19 |         super(ConcatDataset, self).__init__(datasets)
20 |         self.CLASSES = datasets[0].CLASSES
21 |         if hasattr(datasets[0], 'flag'):
22 |             flags = []
23 |             for i in range(0, len(datasets)):
24 |                 flags.append(datasets[i].flag)
25 |             self.flag = np.concatenate(flags)
26 | 
27 | 
28 | @DATASETS.register_module
29 | class RepeatDataset(object):
30 |     """A wrapper of repeated dataset.
31 | 
32 |     The length of repeated dataset will be `times` larger than the original
33 |     dataset. This is useful when the data loading time is long but the dataset
34 |     is small. Using RepeatDataset can reduce the data loading time between
35 |     epochs.
36 | 
37 |     Args:
38 |         dataset (:obj:`Dataset`): The dataset to be repeated.
39 |         times (int): Repeat times.
40 |     """
41 | 
42 |     def __init__(self, dataset, times):
43 |         self.dataset = dataset
44 |         self.times = times
45 |         self.CLASSES = dataset.CLASSES
46 |         if hasattr(self.dataset, 'flag'):
47 |             self.flag = np.tile(self.dataset.flag, times)
48 | 
49 |         self._ori_len = len(self.dataset)
50 | 
51 |     def __getitem__(self, idx):
52 |         return self.dataset[idx % self._ori_len]
53 | 
54 |     def __len__(self):
55 |         return self.times * self._ori_len
56 | 


--------------------------------------------------------------------------------
/mmdet/datasets/loader/__init__.py:
--------------------------------------------------------------------------------
1 | from .build_loader import build_dataloader
2 | from .sampler import DistributedGroupSampler, GroupSampler
3 | 
4 | __all__ = ['GroupSampler', 'DistributedGroupSampler', 'build_dataloader']
5 | 


--------------------------------------------------------------------------------
/mmdet/datasets/loader/build_loader.py:
--------------------------------------------------------------------------------
 1 | import platform
 2 | from functools import partial
 3 | 
 4 | from mmcv.parallel import collate
 5 | from mmcv.runner import get_dist_info
 6 | from torch.utils.data import DataLoader
 7 | 
 8 | from .sampler import DistributedGroupSampler, DistributedSampler, GroupSampler
 9 | 
10 | if platform.system() != 'Windows':
11 |     # https://github.com/pytorch/pytorch/issues/973
12 |     import resource
13 |     rlimit = resource.getrlimit(resource.RLIMIT_NOFILE)
14 |     resource.setrlimit(resource.RLIMIT_NOFILE, (4096, rlimit[1]))
15 | 
16 | 
17 | #By default, selsa_imgs (for dataloader build) is set same to imgs_per_gpu
18 | def build_dataloader(dataset,
19 |                      imgs_per_gpu,
20 |                      workers_per_gpu,
21 |                      num_gpus=1,
22 |                      dist=True,
23 |                      shuffle=True,
24 |                      selsa_imgs=1,
25 |                      **kwargs):
26 |     #test selsa_img
27 |     # selsa_imgs=3
28 |     print("entering build_dataloader")
29 |     if dist:
30 |         rank, world_size = get_dist_info()
31 |         if shuffle:
32 |             print("entering build_dataloader dist and shuffle")
33 |             sampler = DistributedGroupSampler(dataset, imgs_per_gpu,
34 |                                               world_size, rank)
35 |         else:
36 |             print("entering build_dataloader dist and not shuffle")
37 |             sampler = DistributedSampler(
38 |                 dataset, world_size, rank, shuffle=False)
39 |         batch_size = imgs_per_gpu
40 |         num_workers = workers_per_gpu
41 |     else:
42 |         sampler = GroupSampler(dataset, imgs_per_gpu) if shuffle else None
43 |         batch_size = num_gpus * imgs_per_gpu
44 |         num_workers = num_gpus * workers_per_gpu
45 | 
46 |     data_loader = DataLoader(
47 |         dataset,
48 |         batch_size=batch_size,
49 |         sampler=sampler,
50 |         num_workers=num_workers,
51 |         collate_fn=partial(collate, samples_per_gpu=selsa_imgs),
52 |         pin_memory=False,
53 |         **kwargs)
54 | 
55 |     return data_loader
56 | 


--------------------------------------------------------------------------------
/mmdet/datasets/pipelines/__init__.py:
--------------------------------------------------------------------------------
 1 | from .compose import Compose
 2 | from .formating import (Collect, ImageToTensor, ToDataContainer, ToTensor,
 3 |                         Transpose, to_tensor)
 4 | from .loading import LoadAnnotations, LoadImageFromFile, LoadProposals
 5 | from .test_aug import MultiScaleFlipAug
 6 | from .transforms import (Albu, Expand, MinIoURandomCrop, Normalize, Pad,
 7 |                          PhotoMetricDistortion, RandomCrop, RandomFlip, Resize,
 8 |                          SegResizeFlipPadRescale)
 9 | 
10 | __all__ = [
11 |     'Compose', 'to_tensor', 'ToTensor', 'ImageToTensor', 'ToDataContainer',
12 |     'Transpose', 'Collect', 'LoadAnnotations', 'LoadImageFromFile',
13 |     'LoadProposals', 'MultiScaleFlipAug', 'Resize', 'RandomFlip', 'Pad',
14 |     'RandomCrop', 'Normalize', 'SegResizeFlipPadRescale', 'MinIoURandomCrop',
15 |     'Expand', 'PhotoMetricDistortion', 'Albu'
16 | ]
17 | 


--------------------------------------------------------------------------------
/mmdet/datasets/pipelines/compose.py:
--------------------------------------------------------------------------------
 1 | import collections
 2 | 
 3 | from mmdet.utils import build_from_cfg
 4 | from ..registry import PIPELINES
 5 | 
 6 | 
 7 | @PIPELINES.register_module
 8 | class Compose(object):
 9 | 
10 |     def __init__(self, transforms):
11 |         assert isinstance(transforms, collections.abc.Sequence)
12 |         self.transforms = []
13 |         for transform in transforms:
14 |             if isinstance(transform, dict):
15 |                 transform = build_from_cfg(transform, PIPELINES)
16 |                 self.transforms.append(transform)
17 |             elif callable(transform):
18 |                 self.transforms.append(transform)
19 |             else:
20 |                 raise TypeError('transform must be callable or a dict')
21 | 
22 |     def __call__(self, data):
23 |         for t in self.transforms:
24 |             data = t(data)
25 |             if data is None:
26 |                 return None
27 |         return data
28 | 
29 |     def __repr__(self):
30 |         format_string = self.__class__.__name__ + '('
31 |         for t in self.transforms:
32 |             format_string += '\n'
33 |             format_string += '    {0}'.format(t)
34 |         format_string += '\n)'
35 |         return format_string
36 | 


--------------------------------------------------------------------------------
/mmdet/datasets/pipelines/test_aug.py:
--------------------------------------------------------------------------------
 1 | import mmcv
 2 | 
 3 | from ..registry import PIPELINES
 4 | from .compose import Compose
 5 | 
 6 | 
 7 | @PIPELINES.register_module
 8 | class MultiScaleFlipAug(object):
 9 | 
10 |     def __init__(self, transforms, img_scale, flip=False):
11 |         self.transforms = Compose(transforms)
12 |         self.img_scale = img_scale if isinstance(img_scale,
13 |                                                  list) else [img_scale]
14 |         assert mmcv.is_list_of(self.img_scale, tuple)
15 |         self.flip = flip
16 | 
17 |     def __call__(self, results):
18 |         aug_data = []
19 |         flip_aug = [False, True] if self.flip else [False]
20 |         for scale in self.img_scale:
21 |             for flip in flip_aug:
22 |                 _results = results.copy()
23 |                 _results['scale'] = scale
24 |                 _results['flip'] = flip
25 |                 data = self.transforms(_results)
26 |                 aug_data.append(data)
27 |         # list of dict to dict of list
28 |         aug_data_dict = {key: [] for key in aug_data[0]}
29 |         for data in aug_data:
30 |             for key, val in data.items():
31 |                 aug_data_dict[key].append(val)
32 |         return aug_data_dict
33 | 
34 |     def __repr__(self):
35 |         repr_str = self.__class__.__name__
36 |         repr_str += '(transforms={}, img_scale={}, flip={})'.format(
37 |             self.transforms, self.img_scale, self.flip)
38 |         return repr_str
39 | 


--------------------------------------------------------------------------------
/mmdet/datasets/registry.py:
--------------------------------------------------------------------------------
1 | from mmdet.utils import Registry
2 | 
3 | DATASETS = Registry('dataset')
4 | PIPELINES = Registry('pipeline')
5 | 


--------------------------------------------------------------------------------
/mmdet/datasets/voc.py:
--------------------------------------------------------------------------------
 1 | from .registry import DATASETS
 2 | from .xml_style import XMLDataset
 3 | 
 4 | 
 5 | @DATASETS.register_module
 6 | class VOCDataset(XMLDataset):
 7 | 
 8 |     CLASSES = ('aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car',
 9 |                'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse',
10 |                'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train',
11 |                'tvmonitor')
12 | 
13 |     def __init__(self, **kwargs):
14 |         super(VOCDataset, self).__init__(**kwargs)
15 |         if 'VOC2007' in self.img_prefix:
16 |             self.year = 2007
17 |         elif 'VOC2012' in self.img_prefix:
18 |             self.year = 2012
19 |         else:
20 |             raise ValueError('Cannot infer dataset year from img_prefix')
21 | 


--------------------------------------------------------------------------------
/mmdet/datasets/wider_face.py:
--------------------------------------------------------------------------------
 1 | import os.path as osp
 2 | import xml.etree.ElementTree as ET
 3 | 
 4 | import mmcv
 5 | 
 6 | from .registry import DATASETS
 7 | from .xml_style import XMLDataset
 8 | 
 9 | 
10 | @DATASETS.register_module
11 | class WIDERFaceDataset(XMLDataset):
12 |     """
13 |     Reader for the WIDER Face dataset in PASCAL VOC format.
14 |     Conversion scripts can be found in
15 |     https://github.com/sovrasov/wider-face-pascal-voc-annotations
16 |     """
17 |     CLASSES = ('face', )
18 | 
19 |     def __init__(self, **kwargs):
20 |         super(WIDERFaceDataset, self).__init__(**kwargs)
21 | 
22 |     def load_annotations(self, ann_file):
23 |         img_infos = []
24 |         img_ids = mmcv.list_from_file(ann_file)
25 |         for img_id in img_ids:
26 |             filename = '{}.jpg'.format(img_id)
27 |             xml_path = osp.join(self.img_prefix, 'Annotations',
28 |                                 '{}.xml'.format(img_id))
29 |             tree = ET.parse(xml_path)
30 |             root = tree.getroot()
31 |             size = root.find('size')
32 |             width = int(size.find('width').text)
33 |             height = int(size.find('height').text)
34 |             folder = root.find('folder').text
35 |             img_infos.append(
36 |                 dict(
37 |                     id=img_id,
38 |                     filename=osp.join(folder, filename),
39 |                     width=width,
40 |                     height=height))
41 | 
42 |         return img_infos
43 | 


--------------------------------------------------------------------------------
/mmdet/datasets/xml_style.py:
--------------------------------------------------------------------------------
 1 | import os.path as osp
 2 | import xml.etree.ElementTree as ET
 3 | 
 4 | import mmcv
 5 | import numpy as np
 6 | 
 7 | from .custom import CustomDataset
 8 | from .registry import DATASETS
 9 | 
10 | 
11 | @DATASETS.register_module
12 | class XMLDataset(CustomDataset):
13 | 
14 |     def __init__(self, min_size=None, **kwargs):
15 |         super(XMLDataset, self).__init__(**kwargs)
16 |         self.cat2label = {cat: i + 1 for i, cat in enumerate(self.CLASSES)}
17 |         self.min_size = min_size
18 | 
19 |     def load_annotations(self, ann_file):
20 |         img_infos = []
21 |         img_ids = mmcv.list_from_file(ann_file)
22 |         for img_id in img_ids:
23 |             filename = 'JPEGImages/{}.jpg'.format(img_id)
24 |             xml_path = osp.join(self.img_prefix, 'Annotations',
25 |                                 '{}.xml'.format(img_id))
26 |             tree = ET.parse(xml_path)
27 |             root = tree.getroot()
28 |             size = root.find('size')
29 |             width = int(size.find('width').text)
30 |             height = int(size.find('height').text)
31 |             img_infos.append(
32 |                 dict(id=img_id, filename=filename, width=width, height=height))
33 |         return img_infos
34 | 
35 |     def get_ann_info(self, idx):
36 |         img_id = self.img_infos[idx]['id']
37 |         xml_path = osp.join(self.img_prefix, 'Annotations',
38 |                             '{}.xml'.format(img_id))
39 |         tree = ET.parse(xml_path)
40 |         root = tree.getroot()
41 |         bboxes = []
42 |         labels = []
43 |         bboxes_ignore = []
44 |         labels_ignore = []
45 |         for obj in root.findall('object'):
46 |             name = obj.find('name').text
47 |             label = self.cat2label[name]
48 |             difficult = int(obj.find('difficult').text)
49 |             bnd_box = obj.find('bndbox')
50 |             bbox = [
51 |                 int(bnd_box.find('xmin').text),
52 |                 int(bnd_box.find('ymin').text),
53 |                 int(bnd_box.find('xmax').text),
54 |                 int(bnd_box.find('ymax').text)
55 |             ]
56 |             ignore = False
57 |             if self.min_size:
58 |                 assert not self.test_mode
59 |                 w = bbox[2] - bbox[0]
60 |                 h = bbox[3] - bbox[1]
61 |                 if w < self.min_size or h < self.min_size:
62 |                     ignore = True
63 |             if difficult or ignore:
64 |                 bboxes_ignore.append(bbox)
65 |                 labels_ignore.append(label)
66 |             else:
67 |                 bboxes.append(bbox)
68 |                 labels.append(label)
69 |         if not bboxes:
70 |             bboxes = np.zeros((0, 4))
71 |             labels = np.zeros((0, ))
72 |         else:
73 |             bboxes = np.array(bboxes, ndmin=2) - 1
74 |             labels = np.array(labels)
75 |         if not bboxes_ignore:
76 |             bboxes_ignore = np.zeros((0, 4))
77 |             labels_ignore = np.zeros((0, ))
78 |         else:
79 |             bboxes_ignore = np.array(bboxes_ignore, ndmin=2) - 1
80 |             labels_ignore = np.array(labels_ignore)
81 |         ann = dict(
82 |             bboxes=bboxes.astype(np.float32),
83 |             labels=labels.astype(np.int64),
84 |             bboxes_ignore=bboxes_ignore.astype(np.float32),
85 |             labels_ignore=labels_ignore.astype(np.int64))
86 |         return ann
87 | 


--------------------------------------------------------------------------------
/mmdet/models/__init__.py:
--------------------------------------------------------------------------------
 1 | from .anchor_heads import *  # noqa: F401,F403
 2 | from .backbones import *  # noqa: F401,F403
 3 | from .bbox_heads import *  # noqa: F401,F403
 4 | from .builder import (build_backbone, build_detector, build_head, build_loss,
 5 |                       build_neck, build_roi_extractor, build_shared_head)
 6 | from .detectors import *  # noqa: F401,F403
 7 | from .losses import *  # noqa: F401,F403
 8 | from .mask_heads import *  # noqa: F401,F403
 9 | from .necks import *  # noqa: F401,F403
10 | from .registry import (BACKBONES, DETECTORS, HEADS, LOSSES, NECKS,
11 |                        ROI_EXTRACTORS, SHARED_HEADS)
12 | from .roi_extractors import *  # noqa: F401,F403
13 | from .shared_heads import *  # noqa: F401,F403
14 | 
15 | __all__ = [
16 |     'BACKBONES', 'NECKS', 'ROI_EXTRACTORS', 'SHARED_HEADS', 'HEADS', 'LOSSES',
17 |     'DETECTORS', 'build_backbone', 'build_neck', 'build_roi_extractor',
18 |     'build_shared_head', 'build_head', 'build_loss', 'build_detector'
19 | ]
20 | 


--------------------------------------------------------------------------------
/mmdet/models/anchor_heads/__init__.py:
--------------------------------------------------------------------------------
 1 | from .anchor_head import AnchorHead
 2 | from .fcos_head import FCOSHead
 3 | from .fovea_head import FoveaHead
 4 | from .free_anchor_retina_head import FreeAnchorRetinaHead
 5 | from .ga_retina_head import GARetinaHead
 6 | from .ga_rpn_head import GARPNHead
 7 | from .guided_anchor_head import FeatureAdaption, GuidedAnchorHead
 8 | from .reppoints_head import RepPointsHead
 9 | from .retina_head import RetinaHead
10 | from .rpn_head import RPNHead
11 | from .ssd_head import SSDHead
12 | 
13 | __all__ = [
14 |     'AnchorHead', 'GuidedAnchorHead', 'FeatureAdaption', 'RPNHead',
15 |     'GARPNHead', 'RetinaHead', 'GARetinaHead', 'SSDHead', 'FCOSHead',
16 |     'RepPointsHead', 'FoveaHead', 'FreeAnchorRetinaHead'
17 | ]
18 | 


--------------------------------------------------------------------------------
/mmdet/models/anchor_heads/retina_head.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch.nn as nn
  3 | from mmcv.cnn import normal_init
  4 | 
  5 | from ..registry import HEADS
  6 | from ..utils import ConvModule, bias_init_with_prob
  7 | from .anchor_head import AnchorHead
  8 | 
  9 | 
 10 | @HEADS.register_module
 11 | class RetinaHead(AnchorHead):
 12 |     """
 13 |     An anchor-based head used in [1]_.
 14 | 
 15 |     The head contains two subnetworks. The first classifies anchor boxes and
 16 |     the second regresses deltas for the anchors.
 17 | 
 18 |     References:
 19 |         .. [1]  https://arxiv.org/pdf/1708.02002.pdf
 20 | 
 21 |     Example:
 22 |         >>> import torch
 23 |         >>> self = RetinaHead(11, 7)
 24 |         >>> x = torch.rand(1, 7, 32, 32)
 25 |         >>> cls_score, bbox_pred = self.forward_single(x)
 26 |         >>> # Each anchor predicts a score for each class except background
 27 |         >>> cls_per_anchor = cls_score.shape[1] / self.num_anchors
 28 |         >>> box_per_anchor = bbox_pred.shape[1] / self.num_anchors
 29 |         >>> assert cls_per_anchor == (self.num_classes - 1)
 30 |         >>> assert box_per_anchor == 4
 31 |     """
 32 | 
 33 |     def __init__(self,
 34 |                  num_classes,
 35 |                  in_channels,
 36 |                  stacked_convs=4,
 37 |                  octave_base_scale=4,
 38 |                  scales_per_octave=3,
 39 |                  conv_cfg=None,
 40 |                  norm_cfg=None,
 41 |                  **kwargs):
 42 |         self.stacked_convs = stacked_convs
 43 |         self.octave_base_scale = octave_base_scale
 44 |         self.scales_per_octave = scales_per_octave
 45 |         self.conv_cfg = conv_cfg
 46 |         self.norm_cfg = norm_cfg
 47 |         octave_scales = np.array(
 48 |             [2**(i / scales_per_octave) for i in range(scales_per_octave)])
 49 |         anchor_scales = octave_scales * octave_base_scale
 50 |         super(RetinaHead, self).__init__(
 51 |             num_classes, in_channels, anchor_scales=anchor_scales, **kwargs)
 52 | 
 53 |     def _init_layers(self):
 54 |         self.relu = nn.ReLU(inplace=True)
 55 |         self.cls_convs = nn.ModuleList()
 56 |         self.reg_convs = nn.ModuleList()
 57 |         for i in range(self.stacked_convs):
 58 |             chn = self.in_channels if i == 0 else self.feat_channels
 59 |             self.cls_convs.append(
 60 |                 ConvModule(
 61 |                     chn,
 62 |                     self.feat_channels,
 63 |                     3,
 64 |                     stride=1,
 65 |                     padding=1,
 66 |                     conv_cfg=self.conv_cfg,
 67 |                     norm_cfg=self.norm_cfg))
 68 |             self.reg_convs.append(
 69 |                 ConvModule(
 70 |                     chn,
 71 |                     self.feat_channels,
 72 |                     3,
 73 |                     stride=1,
 74 |                     padding=1,
 75 |                     conv_cfg=self.conv_cfg,
 76 |                     norm_cfg=self.norm_cfg))
 77 |         self.retina_cls = nn.Conv2d(
 78 |             self.feat_channels,
 79 |             self.num_anchors * self.cls_out_channels,
 80 |             3,
 81 |             padding=1)
 82 |         self.retina_reg = nn.Conv2d(
 83 |             self.feat_channels, self.num_anchors * 4, 3, padding=1)
 84 | 
 85 |     def init_weights(self):
 86 |         for m in self.cls_convs:
 87 |             normal_init(m.conv, std=0.01)
 88 |         for m in self.reg_convs:
 89 |             normal_init(m.conv, std=0.01)
 90 |         bias_cls = bias_init_with_prob(0.01)
 91 |         normal_init(self.retina_cls, std=0.01, bias=bias_cls)
 92 |         normal_init(self.retina_reg, std=0.01)
 93 | 
 94 |     def forward_single(self, x):
 95 |         cls_feat = x
 96 |         reg_feat = x
 97 |         for cls_conv in self.cls_convs:
 98 |             cls_feat = cls_conv(cls_feat)
 99 |         for reg_conv in self.reg_convs:
100 |             reg_feat = reg_conv(reg_feat)
101 |         cls_score = self.retina_cls(cls_feat)
102 |         bbox_pred = self.retina_reg(reg_feat)
103 |         return cls_score, bbox_pred
104 | 


--------------------------------------------------------------------------------
/mmdet/models/backbones/__init__.py:
--------------------------------------------------------------------------------
 1 | from .hrnet import HRNet
 2 | from .resnet import ResNet, make_res_layer
 3 | from .resnext import ResNeXt
 4 | from .resnext import make_res_layer as make_resx_layer
 5 | from .res2net_v1b import Res2Net
 6 | from .res2net_v1b import make_res2_layer
 7 | from .ssd_vgg import SSDVGG
 8 | 
 9 | __all__ = ['ResNet', 'make_res_layer', 'make_resx_layer', 'make_res2_layer', 'Res2Net', 'ResNeXt', 'SSDVGG', 'HRNet']
10 | 


--------------------------------------------------------------------------------
/mmdet/models/bbox_heads/__init__.py:
--------------------------------------------------------------------------------
 1 | from .bbox_head import BBoxHead
 2 | from .convfc_bbox_head import ConvFCBBoxHead, SharedFCBBoxHead
 3 | from .double_bbox_head import DoubleConvFCBBoxHead
 4 | from .selsa_bbox_head import SelsaBBoxHead
 5 | from .hnonlocal_bbox_head import HNLBBoxHead
 6 | from .hnmb_bbox_head import HNMBBBoxHead
 7 | from .hmp_bbox_head import HMPBBoxHead
 8 | from .hrnmp_bbox_head import HRNMPBBoxHead
 9 | 
10 | __all__ = [
11 |     'BBoxHead', 'ConvFCBBoxHead', 'SharedFCBBoxHead', 'DoubleConvFCBBoxHead', 'SelsaBBoxHead',
12 |     'HNLBBoxHead', 'HNMBBBoxHead', 'HMPBBoxHead', 'HRNMPBBoxHead'
13 | ]
14 | 


--------------------------------------------------------------------------------
/mmdet/models/builder.py:
--------------------------------------------------------------------------------
 1 | from torch import nn
 2 | 
 3 | from mmdet.utils import build_from_cfg
 4 | from .registry import (BACKBONES, DETECTORS, HEADS, LOSSES, NECKS,
 5 |                        ROI_EXTRACTORS, SHARED_HEADS)
 6 | 
 7 | 
 8 | def build(cfg, registry, default_args=None):
 9 |     if isinstance(cfg, list):
10 |         modules = [
11 |             build_from_cfg(cfg_, registry, default_args) for cfg_ in cfg
12 |         ]
13 |         return nn.Sequential(*modules)
14 |     else:
15 |         return build_from_cfg(cfg, registry, default_args)
16 | 
17 | 
18 | def build_backbone(cfg):
19 |     return build(cfg, BACKBONES)
20 | 
21 | 
22 | def build_neck(cfg):
23 |     return build(cfg, NECKS)
24 | 
25 | 
26 | def build_roi_extractor(cfg):
27 |     return build(cfg, ROI_EXTRACTORS)
28 | 
29 | 
30 | def build_shared_head(cfg):
31 |     return build(cfg, SHARED_HEADS)
32 | 
33 | 
34 | def build_head(cfg):
35 |     return build(cfg, HEADS)
36 | 
37 | 
38 | def build_loss(cfg):
39 |     return build(cfg, LOSSES)
40 | 
41 | 
42 | def build_detector(cfg, train_cfg=None, test_cfg=None):
43 |     return build(cfg, DETECTORS, dict(train_cfg=train_cfg, test_cfg=test_cfg))
44 | 


--------------------------------------------------------------------------------
/mmdet/models/detectors/__init__.py:
--------------------------------------------------------------------------------
 1 | from .base import BaseDetector
 2 | from .cascade_rcnn import CascadeRCNN
 3 | from .double_head_rcnn import DoubleHeadRCNN
 4 | from .fast_rcnn import FastRCNN
 5 | from .faster_rcnn import FasterRCNN
 6 | from .selsa_rcnn import SelsaRCNN
 7 | from .hnl_rcnn import HNLRCNN
 8 | from .hnmb_rcnn import HNMBRCNN
 9 | from .fcos import FCOS
10 | from .fovea import FOVEA
11 | from .grid_rcnn import GridRCNN
12 | from .htc import HybridTaskCascade
13 | from .mask_rcnn import MaskRCNN
14 | from .mask_scoring_rcnn import MaskScoringRCNN
15 | from .reppoints_detector import RepPointsDetector
16 | from .retinanet import RetinaNet
17 | from .rpn import RPN
18 | from .single_stage import SingleStageDetector
19 | from .two_stage import TwoStageDetector
20 | 
21 | __all__ = [
22 |     'BaseDetector', 'SingleStageDetector', 'TwoStageDetector', 'RPN',
23 |     'FastRCNN', 'FasterRCNN', 'MaskRCNN', 'CascadeRCNN', 'HybridTaskCascade',
24 |     'DoubleHeadRCNN', 'RetinaNet', 'FCOS', 'GridRCNN', 'MaskScoringRCNN',
25 |     'RepPointsDetector', 'FOVEA', 'SelsaRCNN', 'HNLRCNN', 'HNMBRCNN'
26 | ]
27 | 


--------------------------------------------------------------------------------
/mmdet/models/detectors/fast_rcnn.py:
--------------------------------------------------------------------------------
 1 | from ..registry import DETECTORS
 2 | from .two_stage import TwoStageDetector
 3 | 
 4 | 
 5 | @DETECTORS.register_module
 6 | class FastRCNN(TwoStageDetector):
 7 | 
 8 |     def __init__(self,
 9 |                  backbone,
10 |                  bbox_roi_extractor,
11 |                  bbox_head,
12 |                  train_cfg,
13 |                  test_cfg,
14 |                  neck=None,
15 |                  shared_head=None,
16 |                  mask_roi_extractor=None,
17 |                  mask_head=None,
18 |                  pretrained=None):
19 |         super(FastRCNN, self).__init__(
20 |             backbone=backbone,
21 |             neck=neck,
22 |             shared_head=shared_head,
23 |             bbox_roi_extractor=bbox_roi_extractor,
24 |             bbox_head=bbox_head,
25 |             train_cfg=train_cfg,
26 |             test_cfg=test_cfg,
27 |             mask_roi_extractor=mask_roi_extractor,
28 |             mask_head=mask_head,
29 |             pretrained=pretrained)
30 | 
31 |     def forward_test(self, imgs, img_metas, proposals, **kwargs):
32 |         """
33 |         Args:
34 |             imgs (List[Tensor]): the outer list indicates test-time
35 |                 augmentations and inner Tensor should have a shape NxCxHxW,
36 |                 which contains all images in the batch.
37 |             img_meta (List[List[dict]]): the outer list indicates test-time
38 |                 augs (multiscale, flip, etc.) and the inner list indicates
39 |                 images in a batch
40 |             proposals (List[List[Tensor | None]]): predefiend proposals for
41 |                 each test-time augmentation and each item.
42 |         """
43 |         for var, name in [(imgs, 'imgs'), (img_metas, 'img_metas')]:
44 |             if not isinstance(var, list):
45 |                 raise TypeError('{} must be a list, but got {}'.format(
46 |                     name, type(var)))
47 | 
48 |         num_augs = len(imgs)
49 |         if num_augs != len(img_metas):
50 |             raise ValueError(
51 |                 'num of augmentations ({}) != num of image meta ({})'.format(
52 |                     len(imgs), len(img_metas)))
53 |         # TODO: remove the restriction of imgs_per_gpu == 1 when prepared
54 |         imgs_per_gpu = imgs[0].size(0)
55 |         assert imgs_per_gpu == 1
56 | 
57 |         if num_augs == 1:
58 |             return self.simple_test(imgs[0], img_metas[0], proposals[0],
59 |                                     **kwargs)
60 |         else:
61 |             return self.aug_test(imgs, img_metas, proposals, **kwargs)
62 | 


--------------------------------------------------------------------------------
/mmdet/models/detectors/faster_rcnn.py:
--------------------------------------------------------------------------------
 1 | from ..registry import DETECTORS
 2 | from .two_stage import TwoStageDetector
 3 | 
 4 | 
 5 | @DETECTORS.register_module
 6 | class FasterRCNN(TwoStageDetector):
 7 | 
 8 |     def __init__(self,
 9 |                  backbone,
10 |                  rpn_head,
11 |                  bbox_roi_extractor,
12 |                  bbox_head,
13 |                  train_cfg,
14 |                  test_cfg,
15 |                  neck=None,
16 |                  shared_head=None,
17 |                  pretrained=None):
18 |         super(FasterRCNN, self).__init__(
19 |             backbone=backbone,
20 |             neck=neck,
21 |             shared_head=shared_head,
22 |             rpn_head=rpn_head,
23 |             bbox_roi_extractor=bbox_roi_extractor,
24 |             bbox_head=bbox_head,
25 |             train_cfg=train_cfg,
26 |             test_cfg=test_cfg,
27 |             pretrained=pretrained)
28 | 


--------------------------------------------------------------------------------
/mmdet/models/detectors/fcos.py:
--------------------------------------------------------------------------------
 1 | from ..registry import DETECTORS
 2 | from .single_stage import SingleStageDetector
 3 | 
 4 | 
 5 | @DETECTORS.register_module
 6 | class FCOS(SingleStageDetector):
 7 | 
 8 |     def __init__(self,
 9 |                  backbone,
10 |                  neck,
11 |                  bbox_head,
12 |                  train_cfg=None,
13 |                  test_cfg=None,
14 |                  pretrained=None):
15 |         super(FCOS, self).__init__(backbone, neck, bbox_head, train_cfg,
16 |                                    test_cfg, pretrained)
17 | 


--------------------------------------------------------------------------------
/mmdet/models/detectors/fovea.py:
--------------------------------------------------------------------------------
 1 | from ..registry import DETECTORS
 2 | from .single_stage import SingleStageDetector
 3 | 
 4 | 
 5 | @DETECTORS.register_module
 6 | class FOVEA(SingleStageDetector):
 7 | 
 8 |     def __init__(self,
 9 |                  backbone,
10 |                  neck,
11 |                  bbox_head,
12 |                  train_cfg=None,
13 |                  test_cfg=None,
14 |                  pretrained=None):
15 |         super(FOVEA, self).__init__(backbone, neck, bbox_head, train_cfg,
16 |                                     test_cfg, pretrained)
17 | 


--------------------------------------------------------------------------------
/mmdet/models/detectors/mask_rcnn.py:
--------------------------------------------------------------------------------
 1 | from ..registry import DETECTORS
 2 | from .two_stage import TwoStageDetector
 3 | 
 4 | 
 5 | @DETECTORS.register_module
 6 | class MaskRCNN(TwoStageDetector):
 7 | 
 8 |     def __init__(self,
 9 |                  backbone,
10 |                  rpn_head,
11 |                  bbox_roi_extractor,
12 |                  bbox_head,
13 |                  mask_roi_extractor,
14 |                  mask_head,
15 |                  train_cfg,
16 |                  test_cfg,
17 |                  neck=None,
18 |                  shared_head=None,
19 |                  pretrained=None):
20 |         super(MaskRCNN, self).__init__(
21 |             backbone=backbone,
22 |             neck=neck,
23 |             shared_head=shared_head,
24 |             rpn_head=rpn_head,
25 |             bbox_roi_extractor=bbox_roi_extractor,
26 |             bbox_head=bbox_head,
27 |             mask_roi_extractor=mask_roi_extractor,
28 |             mask_head=mask_head,
29 |             train_cfg=train_cfg,
30 |             test_cfg=test_cfg,
31 |             pretrained=pretrained)
32 | 


--------------------------------------------------------------------------------
/mmdet/models/detectors/reppoints_detector.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from mmdet.core import bbox2result, bbox_mapping_back, multiclass_nms
 4 | from ..registry import DETECTORS
 5 | from .single_stage import SingleStageDetector
 6 | 
 7 | 
 8 | @DETECTORS.register_module
 9 | class RepPointsDetector(SingleStageDetector):
10 |     """RepPoints: Point Set Representation for Object Detection.
11 | 
12 |         This detector is the implementation of:
13 |         - RepPoints detector (https://arxiv.org/pdf/1904.11490)
14 |     """
15 | 
16 |     def __init__(self,
17 |                  backbone,
18 |                  neck,
19 |                  bbox_head,
20 |                  train_cfg=None,
21 |                  test_cfg=None,
22 |                  pretrained=None):
23 |         super(RepPointsDetector,
24 |               self).__init__(backbone, neck, bbox_head, train_cfg, test_cfg,
25 |                              pretrained)
26 | 
27 |     def merge_aug_results(self, aug_bboxes, aug_scores, img_metas):
28 |         """Merge augmented detection bboxes and scores.
29 | 
30 |         Args:
31 |             aug_bboxes (list[Tensor]): shape (n, 4*#class)
32 |             aug_scores (list[Tensor] or None): shape (n, #class)
33 |             img_shapes (list[Tensor]): shape (3, ).
34 | 
35 |         Returns:
36 |             tuple: (bboxes, scores)
37 |         """
38 |         recovered_bboxes = []
39 |         for bboxes, img_info in zip(aug_bboxes, img_metas):
40 |             img_shape = img_info[0]['img_shape']
41 |             scale_factor = img_info[0]['scale_factor']
42 |             flip = img_info[0]['flip']
43 |             bboxes = bbox_mapping_back(bboxes, img_shape, scale_factor, flip)
44 |             recovered_bboxes.append(bboxes)
45 |         bboxes = torch.cat(recovered_bboxes, dim=0)
46 |         if aug_scores is None:
47 |             return bboxes
48 |         else:
49 |             scores = torch.cat(aug_scores, dim=0)
50 |             return bboxes, scores
51 | 
52 |     def aug_test(self, imgs, img_metas, rescale=False):
53 |         # recompute feats to save memory
54 |         feats = self.extract_feats(imgs)
55 | 
56 |         aug_bboxes = []
57 |         aug_scores = []
58 |         for x, img_meta in zip(feats, img_metas):
59 |             # only one image in the batch
60 |             outs = self.bbox_head(x)
61 |             bbox_inputs = outs + (img_meta, self.test_cfg, False, False)
62 |             det_bboxes, det_scores = self.bbox_head.get_bboxes(*bbox_inputs)[0]
63 |             aug_bboxes.append(det_bboxes)
64 |             aug_scores.append(det_scores)
65 | 
66 |         # after merging, bboxes will be rescaled to the original image size
67 |         merged_bboxes, merged_scores = self.merge_aug_results(
68 |             aug_bboxes, aug_scores, img_metas)
69 |         det_bboxes, det_labels = multiclass_nms(merged_bboxes, merged_scores,
70 |                                                 self.test_cfg.score_thr,
71 |                                                 self.test_cfg.nms,
72 |                                                 self.test_cfg.max_per_img)
73 | 
74 |         if rescale:
75 |             _det_bboxes = det_bboxes
76 |         else:
77 |             _det_bboxes = det_bboxes.clone()
78 |             _det_bboxes[:, :4] *= img_metas[0][0]['scale_factor']
79 |         bbox_results = bbox2result(_det_bboxes, det_labels,
80 |                                    self.bbox_head.num_classes)
81 |         return bbox_results
82 | 


--------------------------------------------------------------------------------
/mmdet/models/detectors/retinanet.py:
--------------------------------------------------------------------------------
 1 | from ..registry import DETECTORS
 2 | from .single_stage import SingleStageDetector
 3 | 
 4 | 
 5 | @DETECTORS.register_module
 6 | class RetinaNet(SingleStageDetector):
 7 | 
 8 |     def __init__(self,
 9 |                  backbone,
10 |                  neck,
11 |                  bbox_head,
12 |                  train_cfg=None,
13 |                  test_cfg=None,
14 |                  pretrained=None):
15 |         super(RetinaNet, self).__init__(backbone, neck, bbox_head, train_cfg,
16 |                                         test_cfg, pretrained)
17 | 


--------------------------------------------------------------------------------
/mmdet/models/detectors/rpn.py:
--------------------------------------------------------------------------------
 1 | import mmcv
 2 | 
 3 | from mmdet.core import bbox_mapping, tensor2imgs
 4 | from .. import builder
 5 | from ..registry import DETECTORS
 6 | from .base import BaseDetector
 7 | from .test_mixins import RPNTestMixin
 8 | 
 9 | 
10 | @DETECTORS.register_module
11 | class RPN(BaseDetector, RPNTestMixin):
12 | 
13 |     def __init__(self,
14 |                  backbone,
15 |                  neck,
16 |                  rpn_head,
17 |                  train_cfg,
18 |                  test_cfg,
19 |                  pretrained=None):
20 |         super(RPN, self).__init__()
21 |         self.backbone = builder.build_backbone(backbone)
22 |         self.neck = builder.build_neck(neck) if neck is not None else None
23 |         self.rpn_head = builder.build_head(rpn_head)
24 |         self.train_cfg = train_cfg
25 |         self.test_cfg = test_cfg
26 |         self.init_weights(pretrained=pretrained)
27 | 
28 |     def init_weights(self, pretrained=None):
29 |         super(RPN, self).init_weights(pretrained)
30 |         self.backbone.init_weights(pretrained=pretrained)
31 |         if self.with_neck:
32 |             self.neck.init_weights()
33 |         self.rpn_head.init_weights()
34 | 
35 |     def extract_feat(self, img):
36 |         x = self.backbone(img)
37 |         if self.with_neck:
38 |             x = self.neck(x)
39 |         return x
40 | 
41 |     def forward_dummy(self, img):
42 |         x = self.extract_feat(img)
43 |         rpn_outs = self.rpn_head(x)
44 |         return rpn_outs
45 | 
46 |     def forward_train(self,
47 |                       img,
48 |                       img_meta,
49 |                       gt_bboxes=None,
50 |                       gt_bboxes_ignore=None):
51 |         if self.train_cfg.rpn.get('debug', False):
52 |             self.rpn_head.debug_imgs = tensor2imgs(img)
53 | 
54 |         x = self.extract_feat(img)
55 |         rpn_outs = self.rpn_head(x)
56 | 
57 |         rpn_loss_inputs = rpn_outs + (gt_bboxes, img_meta, self.train_cfg.rpn)
58 |         losses = self.rpn_head.loss(
59 |             *rpn_loss_inputs, gt_bboxes_ignore=gt_bboxes_ignore)
60 |         return losses
61 | 
62 |     def simple_test(self, img, img_meta, rescale=False):
63 |         x = self.extract_feat(img)
64 |         proposal_list = self.simple_test_rpn(x, img_meta, self.test_cfg.rpn)
65 |         if rescale:
66 |             for proposals, meta in zip(proposal_list, img_meta):
67 |                 proposals[:, :4] /= meta['scale_factor']
68 |         # TODO: remove this restriction
69 |         return proposal_list[0].cpu().numpy()
70 | 
71 |     def aug_test(self, imgs, img_metas, rescale=False):
72 |         proposal_list = self.aug_test_rpn(
73 |             self.extract_feats(imgs), img_metas, self.test_cfg.rpn)
74 |         if not rescale:
75 |             for proposals, img_meta in zip(proposal_list, img_metas[0]):
76 |                 img_shape = img_meta['img_shape']
77 |                 scale_factor = img_meta['scale_factor']
78 |                 flip = img_meta['flip']
79 |                 proposals[:, :4] = bbox_mapping(proposals[:, :4], img_shape,
80 |                                                 scale_factor, flip)
81 |         # TODO: remove this restriction
82 |         return proposal_list[0].cpu().numpy()
83 | 
84 |     def show_result(self, data, result, dataset=None, top_k=20):
85 |         """Show RPN proposals on the image.
86 | 
87 |         Although we assume batch size is 1, this method supports arbitrary
88 |         batch size.
89 |         """
90 |         img_tensor = data['img'][0]
91 |         img_metas = data['img_meta'][0].data[0]
92 |         imgs = tensor2imgs(img_tensor, **img_metas[0]['img_norm_cfg'])
93 |         assert len(imgs) == len(img_metas)
94 |         for img, img_meta in zip(imgs, img_metas):
95 |             h, w, _ = img_meta['img_shape']
96 |             img_show = img[:h, :w, :]
97 |             mmcv.imshow_bboxes(img_show, result, top_k=top_k)
98 | 


--------------------------------------------------------------------------------
/mmdet/models/detectors/single_stage.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | 
 3 | from mmdet.core import bbox2result
 4 | from .. import builder
 5 | from ..registry import DETECTORS
 6 | from .base import BaseDetector
 7 | 
 8 | 
 9 | @DETECTORS.register_module
10 | class SingleStageDetector(BaseDetector):
11 |     """Base class for single-stage detectors.
12 | 
13 |     Single-stage detectors directly and densely predict bounding boxes on the
14 |     output features of the backbone+neck.
15 |     """
16 | 
17 |     def __init__(self,
18 |                  backbone,
19 |                  neck=None,
20 |                  bbox_head=None,
21 |                  train_cfg=None,
22 |                  test_cfg=None,
23 |                  pretrained=None):
24 |         super(SingleStageDetector, self).__init__()
25 |         self.backbone = builder.build_backbone(backbone)
26 |         if neck is not None:
27 |             self.neck = builder.build_neck(neck)
28 |         self.bbox_head = builder.build_head(bbox_head)
29 |         self.train_cfg = train_cfg
30 |         self.test_cfg = test_cfg
31 |         self.init_weights(pretrained=pretrained)
32 | 
33 |     def init_weights(self, pretrained=None):
34 |         super(SingleStageDetector, self).init_weights(pretrained)
35 |         self.backbone.init_weights(pretrained=pretrained)
36 |         if self.with_neck:
37 |             if isinstance(self.neck, nn.Sequential):
38 |                 for m in self.neck:
39 |                     m.init_weights()
40 |             else:
41 |                 self.neck.init_weights()
42 |         self.bbox_head.init_weights()
43 | 
44 |     def extract_feat(self, img):
45 |         """Directly extract features from the backbone+neck
46 |         """
47 |         x = self.backbone(img)
48 |         if self.with_neck:
49 |             x = self.neck(x)
50 |         return x
51 | 
52 |     def forward_dummy(self, img):
53 |         """Used for computing network flops.
54 | 
55 |         See `mmedetection/tools/get_flops.py`
56 |         """
57 |         x = self.extract_feat(img)
58 |         outs = self.bbox_head(x)
59 |         return outs
60 | 
61 |     def forward_train(self,
62 |                       img,
63 |                       img_metas,
64 |                       gt_bboxes,
65 |                       gt_labels,
66 |                       gt_bboxes_ignore=None):
67 |         x = self.extract_feat(img)
68 |         outs = self.bbox_head(x)
69 |         loss_inputs = outs + (gt_bboxes, gt_labels, img_metas, self.train_cfg)
70 |         losses = self.bbox_head.loss(
71 |             *loss_inputs, gt_bboxes_ignore=gt_bboxes_ignore)
72 |         return losses
73 | 
74 |     def simple_test(self, img, img_meta, rescale=False):
75 |         x = self.extract_feat(img)
76 |         outs = self.bbox_head(x)
77 |         bbox_inputs = outs + (img_meta, self.test_cfg, rescale)
78 |         bbox_list = self.bbox_head.get_bboxes(*bbox_inputs)
79 |         bbox_results = [
80 |             bbox2result(det_bboxes, det_labels, self.bbox_head.num_classes)
81 |             for det_bboxes, det_labels in bbox_list
82 |         ]
83 |         return bbox_results[0]
84 | 
85 |     def aug_test(self, imgs, img_metas, rescale=False):
86 |         raise NotImplementedError
87 | 


--------------------------------------------------------------------------------
/mmdet/models/losses/__init__.py:
--------------------------------------------------------------------------------
 1 | from .accuracy import Accuracy, accuracy
 2 | from .balanced_l1_loss import BalancedL1Loss, balanced_l1_loss
 3 | from .cross_entropy_loss import (CrossEntropyLoss, binary_cross_entropy,
 4 |                                  cross_entropy, mask_cross_entropy)
 5 | from .focal_loss import FocalLoss, sigmoid_focal_loss
 6 | from .ghm_loss import GHMC, GHMR
 7 | from .iou_loss import BoundedIoULoss, IoULoss, bounded_iou_loss, iou_loss
 8 | from .mse_loss import MSELoss, mse_loss
 9 | from .smooth_l1_loss import SmoothL1Loss, smooth_l1_loss
10 | from .utils import reduce_loss, weight_reduce_loss, weighted_loss
11 | 
12 | __all__ = [
13 |     'accuracy', 'Accuracy', 'cross_entropy', 'binary_cross_entropy',
14 |     'mask_cross_entropy', 'CrossEntropyLoss', 'sigmoid_focal_loss',
15 |     'FocalLoss', 'smooth_l1_loss', 'SmoothL1Loss', 'balanced_l1_loss',
16 |     'BalancedL1Loss', 'mse_loss', 'MSELoss', 'iou_loss', 'bounded_iou_loss',
17 |     'IoULoss', 'BoundedIoULoss', 'GHMC', 'GHMR', 'reduce_loss',
18 |     'weight_reduce_loss', 'weighted_loss'
19 | ]
20 | 


--------------------------------------------------------------------------------
/mmdet/models/losses/accuracy.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | 
 3 | 
 4 | def accuracy(pred, target, topk=1):
 5 |     assert isinstance(topk, (int, tuple))
 6 |     if isinstance(topk, int):
 7 |         topk = (topk, )
 8 |         return_single = True
 9 |     else:
10 |         return_single = False
11 | 
12 |     maxk = max(topk)
13 |     _, pred_label = pred.topk(maxk, dim=1)
14 |     pred_label = pred_label.t()
15 |     correct = pred_label.eq(target.view(1, -1).expand_as(pred_label))
16 | 
17 |     res = []
18 |     for k in topk:
19 |         correct_k = correct[:k].view(-1).float().sum(0, keepdim=True)
20 |         res.append(correct_k.mul_(100.0 / pred.size(0)))
21 |     return res[0] if return_single else res
22 | 
23 | 
24 | class Accuracy(nn.Module):
25 | 
26 |     def __init__(self, topk=(1, )):
27 |         super().__init__()
28 |         self.topk = topk
29 | 
30 |     def forward(self, pred, target):
31 |         return accuracy(pred, target, self.topk)
32 | 


--------------------------------------------------------------------------------
/mmdet/models/losses/balanced_l1_loss.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | import torch.nn as nn
 4 | 
 5 | from ..registry import LOSSES
 6 | from .utils import weighted_loss
 7 | 
 8 | 
 9 | @weighted_loss
10 | def balanced_l1_loss(pred,
11 |                      target,
12 |                      beta=1.0,
13 |                      alpha=0.5,
14 |                      gamma=1.5,
15 |                      reduction='mean'):
16 |     assert beta > 0
17 |     assert pred.size() == target.size() and target.numel() > 0
18 | 
19 |     diff = torch.abs(pred - target)
20 |     b = np.e**(gamma / alpha) - 1
21 |     loss = torch.where(
22 |         diff < beta, alpha / b *
23 |         (b * diff + 1) * torch.log(b * diff / beta + 1) - alpha * diff,
24 |         gamma * diff + gamma / b - alpha * beta)
25 | 
26 |     return loss
27 | 
28 | 
29 | @LOSSES.register_module
30 | class BalancedL1Loss(nn.Module):
31 |     """Balanced L1 Loss
32 | 
33 |     arXiv: https://arxiv.org/pdf/1904.02701.pdf (CVPR 2019)
34 |     """
35 | 
36 |     def __init__(self,
37 |                  alpha=0.5,
38 |                  gamma=1.5,
39 |                  beta=1.0,
40 |                  reduction='mean',
41 |                  loss_weight=1.0):
42 |         super(BalancedL1Loss, self).__init__()
43 |         self.alpha = alpha
44 |         self.gamma = gamma
45 |         self.beta = beta
46 |         self.reduction = reduction
47 |         self.loss_weight = loss_weight
48 | 
49 |     def forward(self,
50 |                 pred,
51 |                 target,
52 |                 weight=None,
53 |                 avg_factor=None,
54 |                 reduction_override=None,
55 |                 **kwargs):
56 |         assert reduction_override in (None, 'none', 'mean', 'sum')
57 |         reduction = (
58 |             reduction_override if reduction_override else self.reduction)
59 |         loss_bbox = self.loss_weight * balanced_l1_loss(
60 |             pred,
61 |             target,
62 |             weight,
63 |             alpha=self.alpha,
64 |             gamma=self.gamma,
65 |             beta=self.beta,
66 |             reduction=reduction,
67 |             avg_factor=avg_factor,
68 |             **kwargs)
69 |         return loss_bbox
70 | 


--------------------------------------------------------------------------------
/mmdet/models/losses/cross_entropy_loss.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | 
  5 | from ..registry import LOSSES
  6 | from .utils import weight_reduce_loss
  7 | 
  8 | 
  9 | def cross_entropy(pred, label, weight=None, reduction='mean', avg_factor=None):
 10 |     # element-wise losses
 11 |     loss = F.cross_entropy(pred, label, reduction='none')
 12 | 
 13 |     # apply weights and do the reduction
 14 |     if weight is not None:
 15 |         weight = weight.float()
 16 |     loss = weight_reduce_loss(
 17 |         loss, weight=weight, reduction=reduction, avg_factor=avg_factor)
 18 | 
 19 |     return loss
 20 | 
 21 | 
 22 | def _expand_binary_labels(labels, label_weights, label_channels):
 23 |     bin_labels = labels.new_full((labels.size(0), label_channels), 0)
 24 |     inds = torch.nonzero(labels >= 1).squeeze()
 25 |     if inds.numel() > 0:
 26 |         bin_labels[inds, labels[inds] - 1] = 1
 27 |     if label_weights is None:
 28 |         bin_label_weights = None
 29 |     else:
 30 |         bin_label_weights = label_weights.view(-1, 1).expand(
 31 |             label_weights.size(0), label_channels)
 32 |     return bin_labels, bin_label_weights
 33 | 
 34 | 
 35 | def binary_cross_entropy(pred,
 36 |                          label,
 37 |                          weight=None,
 38 |                          reduction='mean',
 39 |                          avg_factor=None):
 40 |     if pred.dim() != label.dim():
 41 |         label, weight = _expand_binary_labels(label, weight, pred.size(-1))
 42 | 
 43 |     # weighted element-wise losses
 44 |     if weight is not None:
 45 |         weight = weight.float()
 46 |     loss = F.binary_cross_entropy_with_logits(
 47 |         pred, label.float(), weight, reduction='none')
 48 |     # do the reduction for the weighted loss
 49 |     loss = weight_reduce_loss(loss, reduction=reduction, avg_factor=avg_factor)
 50 | 
 51 |     return loss
 52 | 
 53 | 
 54 | def mask_cross_entropy(pred, target, label, reduction='mean', avg_factor=None):
 55 |     # TODO: handle these two reserved arguments
 56 |     assert reduction == 'mean' and avg_factor is None
 57 |     num_rois = pred.size()[0]
 58 |     inds = torch.arange(0, num_rois, dtype=torch.long, device=pred.device)
 59 |     pred_slice = pred[inds, label].squeeze(1)
 60 |     return F.binary_cross_entropy_with_logits(
 61 |         pred_slice, target, reduction='mean')[None]
 62 | 
 63 | 
 64 | @LOSSES.register_module
 65 | class CrossEntropyLoss(nn.Module):
 66 | 
 67 |     def __init__(self,
 68 |                  use_sigmoid=False,
 69 |                  use_mask=False,
 70 |                  reduction='mean',
 71 |                  loss_weight=1.0):
 72 |         super(CrossEntropyLoss, self).__init__()
 73 |         assert (use_sigmoid is False) or (use_mask is False)
 74 |         self.use_sigmoid = use_sigmoid
 75 |         self.use_mask = use_mask
 76 |         self.reduction = reduction
 77 |         self.loss_weight = loss_weight
 78 | 
 79 |         if self.use_sigmoid:
 80 |             self.cls_criterion = binary_cross_entropy
 81 |         elif self.use_mask:
 82 |             self.cls_criterion = mask_cross_entropy
 83 |         else:
 84 |             self.cls_criterion = cross_entropy
 85 | 
 86 |     def forward(self,
 87 |                 cls_score,
 88 |                 label,
 89 |                 weight=None,
 90 |                 avg_factor=None,
 91 |                 reduction_override=None,
 92 |                 **kwargs):
 93 |         assert reduction_override in (None, 'none', 'mean', 'sum')
 94 |         reduction = (
 95 |             reduction_override if reduction_override else self.reduction)
 96 |         loss_cls = self.loss_weight * self.cls_criterion(
 97 |             cls_score,
 98 |             label,
 99 |             weight,
100 |             reduction=reduction,
101 |             avg_factor=avg_factor,
102 |             **kwargs)
103 |         return loss_cls
104 | 


--------------------------------------------------------------------------------
/mmdet/models/losses/focal_loss.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | import torch.nn.functional as F
 3 | 
 4 | from mmdet.ops import sigmoid_focal_loss as _sigmoid_focal_loss
 5 | from ..registry import LOSSES
 6 | from .utils import weight_reduce_loss
 7 | 
 8 | 
 9 | # This method is only for debugging
10 | def py_sigmoid_focal_loss(pred,
11 |                           target,
12 |                           weight=None,
13 |                           gamma=2.0,
14 |                           alpha=0.25,
15 |                           reduction='mean',
16 |                           avg_factor=None):
17 |     pred_sigmoid = pred.sigmoid()
18 |     target = target.type_as(pred)
19 |     pt = (1 - pred_sigmoid) * target + pred_sigmoid * (1 - target)
20 |     focal_weight = (alpha * target + (1 - alpha) *
21 |                     (1 - target)) * pt.pow(gamma)
22 |     loss = F.binary_cross_entropy_with_logits(
23 |         pred, target, reduction='none') * focal_weight
24 |     loss = weight_reduce_loss(loss, weight, reduction, avg_factor)
25 |     return loss
26 | 
27 | 
28 | def sigmoid_focal_loss(pred,
29 |                        target,
30 |                        weight=None,
31 |                        gamma=2.0,
32 |                        alpha=0.25,
33 |                        reduction='mean',
34 |                        avg_factor=None):
35 |     # Function.apply does not accept keyword arguments, so the decorator
36 |     # "weighted_loss" is not applicable
37 |     loss = _sigmoid_focal_loss(pred, target, gamma, alpha)
38 |     # TODO: find a proper way to handle the shape of weight
39 |     if weight is not None:
40 |         weight = weight.view(-1, 1)
41 |     loss = weight_reduce_loss(loss, weight, reduction, avg_factor)
42 |     return loss
43 | 
44 | 
45 | @LOSSES.register_module
46 | class FocalLoss(nn.Module):
47 | 
48 |     def __init__(self,
49 |                  use_sigmoid=True,
50 |                  gamma=2.0,
51 |                  alpha=0.25,
52 |                  reduction='mean',
53 |                  loss_weight=1.0):
54 |         super(FocalLoss, self).__init__()
55 |         assert use_sigmoid is True, 'Only sigmoid focal loss supported now.'
56 |         self.use_sigmoid = use_sigmoid
57 |         self.gamma = gamma
58 |         self.alpha = alpha
59 |         self.reduction = reduction
60 |         self.loss_weight = loss_weight
61 | 
62 |     def forward(self,
63 |                 pred,
64 |                 target,
65 |                 weight=None,
66 |                 avg_factor=None,
67 |                 reduction_override=None):
68 |         assert reduction_override in (None, 'none', 'mean', 'sum')
69 |         reduction = (
70 |             reduction_override if reduction_override else self.reduction)
71 |         if self.use_sigmoid:
72 |             loss_cls = self.loss_weight * sigmoid_focal_loss(
73 |                 pred,
74 |                 target,
75 |                 weight,
76 |                 gamma=self.gamma,
77 |                 alpha=self.alpha,
78 |                 reduction=reduction,
79 |                 avg_factor=avg_factor)
80 |         else:
81 |             raise NotImplementedError
82 |         return loss_cls
83 | 


--------------------------------------------------------------------------------
/mmdet/models/losses/mse_loss.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | import torch.nn.functional as F
 3 | 
 4 | from ..registry import LOSSES
 5 | from .utils import weighted_loss
 6 | 
 7 | mse_loss = weighted_loss(F.mse_loss)
 8 | 
 9 | 
10 | @LOSSES.register_module
11 | class MSELoss(nn.Module):
12 | 
13 |     def __init__(self, reduction='mean', loss_weight=1.0):
14 |         super().__init__()
15 |         self.reduction = reduction
16 |         self.loss_weight = loss_weight
17 | 
18 |     def forward(self, pred, target, weight=None, avg_factor=None):
19 |         loss = self.loss_weight * mse_loss(
20 |             pred,
21 |             target,
22 |             weight,
23 |             reduction=self.reduction,
24 |             avg_factor=avg_factor)
25 |         return loss
26 | 


--------------------------------------------------------------------------------
/mmdet/models/losses/smooth_l1_loss.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | from ..registry import LOSSES
 5 | from .utils import weighted_loss
 6 | 
 7 | 
 8 | @weighted_loss
 9 | def smooth_l1_loss(pred, target, beta=1.0):
10 |     assert beta > 0
11 |     if not target.numel() > 0:
12 |         print("debug")
13 |     assert pred.size() == target.size() and target.numel() > 0
14 |     diff = torch.abs(pred - target)
15 |     loss = torch.where(diff < beta, 0.5 * diff * diff / beta,
16 |                        diff - 0.5 * beta)
17 |     return loss
18 | 
19 | 
20 | @LOSSES.register_module
21 | class SmoothL1Loss(nn.Module):
22 | 
23 |     def __init__(self, beta=1.0, reduction='mean', loss_weight=1.0):
24 |         super(SmoothL1Loss, self).__init__()
25 |         self.beta = beta
26 |         self.reduction = reduction
27 |         self.loss_weight = loss_weight
28 | 
29 |     def forward(self,
30 |                 pred,
31 |                 target,
32 |                 weight=None,
33 |                 avg_factor=None,
34 |                 reduction_override=None,
35 |                 **kwargs):
36 |         assert reduction_override in (None, 'none', 'mean', 'sum')
37 |         reduction = (
38 |             reduction_override if reduction_override else self.reduction)
39 |         loss_bbox = self.loss_weight * smooth_l1_loss(
40 |             pred,
41 |             target,
42 |             weight,
43 |             beta=self.beta,
44 |             reduction=reduction,
45 |             avg_factor=avg_factor,
46 |             **kwargs)
47 |         return loss_bbox
48 | 


--------------------------------------------------------------------------------
/mmdet/models/losses/utils.py:
--------------------------------------------------------------------------------
 1 | import functools
 2 | 
 3 | import torch.nn.functional as F
 4 | 
 5 | 
 6 | def reduce_loss(loss, reduction):
 7 |     """Reduce loss as specified.
 8 | 
 9 |     Args:
10 |         loss (Tensor): Elementwise loss tensor.
11 |         reduction (str): Options are "none", "mean" and "sum".
12 | 
13 |     Return:
14 |         Tensor: Reduced loss tensor.
15 |     """
16 |     reduction_enum = F._Reduction.get_enum(reduction)
17 |     # none: 0, elementwise_mean:1, sum: 2
18 |     if reduction_enum == 0:
19 |         return loss
20 |     elif reduction_enum == 1:
21 |         return loss.mean()
22 |     elif reduction_enum == 2:
23 |         return loss.sum()
24 | 
25 | 
26 | def weight_reduce_loss(loss, weight=None, reduction='mean', avg_factor=None):
27 |     """Apply element-wise weight and reduce loss.
28 | 
29 |     Args:
30 |         loss (Tensor): Element-wise loss.
31 |         weight (Tensor): Element-wise weights.
32 |         reduction (str): Same as built-in losses of PyTorch.
33 |         avg_factor (float): Avarage factor when computing the mean of losses.
34 | 
35 |     Returns:
36 |         Tensor: Processed loss values.
37 |     """
38 |     # if weight is specified, apply element-wise weight
39 |     if weight is not None:
40 |         loss = loss * weight
41 | 
42 |     # if avg_factor is not specified, just reduce the loss
43 |     if avg_factor is None:
44 |         loss = reduce_loss(loss, reduction)
45 |     else:
46 |         # if reduction is mean, then average the loss by avg_factor
47 |         if reduction == 'mean':
48 |             loss = loss.sum() / avg_factor
49 |         # if reduction is 'none', then do nothing, otherwise raise an error
50 |         elif reduction != 'none':
51 |             raise ValueError('avg_factor can not be used with reduction="sum"')
52 |     return loss
53 | 
54 | 
55 | def weighted_loss(loss_func):
56 |     """Create a weighted version of a given loss function.
57 | 
58 |     To use this decorator, the loss function must have the signature like
59 |     `loss_func(pred, target, **kwargs)`. The function only needs to compute
60 |     element-wise loss without any reduction. This decorator will add weight
61 |     and reduction arguments to the function. The decorated function will have
62 |     the signature like `loss_func(pred, target, weight=None, reduction='mean',
63 |     avg_factor=None, **kwargs)`.
64 | 
65 |     :Example:
66 | 
67 |     >>> import torch
68 |     >>> @weighted_loss
69 |     >>> def l1_loss(pred, target):
70 |     >>>     return (pred - target).abs()
71 | 
72 |     >>> pred = torch.Tensor([0, 2, 3])
73 |     >>> target = torch.Tensor([1, 1, 1])
74 |     >>> weight = torch.Tensor([1, 0, 1])
75 | 
76 |     >>> l1_loss(pred, target)
77 |     tensor(1.3333)
78 |     >>> l1_loss(pred, target, weight)
79 |     tensor(1.)
80 |     >>> l1_loss(pred, target, reduction='none')
81 |     tensor([1., 1., 2.])
82 |     >>> l1_loss(pred, target, weight, avg_factor=2)
83 |     tensor(1.5000)
84 |     """
85 | 
86 |     @functools.wraps(loss_func)
87 |     def wrapper(pred,
88 |                 target,
89 |                 weight=None,
90 |                 reduction='mean',
91 |                 avg_factor=None,
92 |                 **kwargs):
93 |         # get element-wise loss
94 |         loss = loss_func(pred, target, **kwargs)
95 |         loss = weight_reduce_loss(loss, weight, reduction, avg_factor)
96 |         return loss
97 | 
98 |     return wrapper
99 | 


--------------------------------------------------------------------------------
/mmdet/models/mask_heads/__init__.py:
--------------------------------------------------------------------------------
 1 | from .fcn_mask_head import FCNMaskHead
 2 | from .fused_semantic_head import FusedSemanticHead
 3 | from .grid_head import GridHead
 4 | from .htc_mask_head import HTCMaskHead
 5 | from .maskiou_head import MaskIoUHead
 6 | 
 7 | __all__ = [
 8 |     'FCNMaskHead', 'HTCMaskHead', 'FusedSemanticHead', 'GridHead',
 9 |     'MaskIoUHead'
10 | ]
11 | 


--------------------------------------------------------------------------------
/mmdet/models/mask_heads/fused_semantic_head.py:
--------------------------------------------------------------------------------
  1 | import torch.nn as nn
  2 | import torch.nn.functional as F
  3 | from mmcv.cnn import kaiming_init
  4 | 
  5 | from mmdet.core import auto_fp16, force_fp32
  6 | from ..registry import HEADS
  7 | from ..utils import ConvModule
  8 | 
  9 | 
 10 | @HEADS.register_module
 11 | class FusedSemanticHead(nn.Module):
 12 |     r"""Multi-level fused semantic segmentation head.
 13 | 
 14 |     in_1 -> 1x1 conv ---
 15 |                         |
 16 |     in_2 -> 1x1 conv -- |
 17 |                        ||
 18 |     in_3 -> 1x1 conv - ||
 19 |                       |||                  /-> 1x1 conv (mask prediction)
 20 |     in_4 -> 1x1 conv -----> 3x3 convs (*4)
 21 |                         |                  \-> 1x1 conv (feature)
 22 |     in_5 -> 1x1 conv ---
 23 |     """  # noqa: W605
 24 | 
 25 |     def __init__(self,
 26 |                  num_ins,
 27 |                  fusion_level,
 28 |                  num_convs=4,
 29 |                  in_channels=256,
 30 |                  conv_out_channels=256,
 31 |                  num_classes=183,
 32 |                  ignore_label=255,
 33 |                  loss_weight=0.2,
 34 |                  conv_cfg=None,
 35 |                  norm_cfg=None):
 36 |         super(FusedSemanticHead, self).__init__()
 37 |         self.num_ins = num_ins
 38 |         self.fusion_level = fusion_level
 39 |         self.num_convs = num_convs
 40 |         self.in_channels = in_channels
 41 |         self.conv_out_channels = conv_out_channels
 42 |         self.num_classes = num_classes
 43 |         self.ignore_label = ignore_label
 44 |         self.loss_weight = loss_weight
 45 |         self.conv_cfg = conv_cfg
 46 |         self.norm_cfg = norm_cfg
 47 |         self.fp16_enabled = False
 48 | 
 49 |         self.lateral_convs = nn.ModuleList()
 50 |         for i in range(self.num_ins):
 51 |             self.lateral_convs.append(
 52 |                 ConvModule(
 53 |                     self.in_channels,
 54 |                     self.in_channels,
 55 |                     1,
 56 |                     conv_cfg=self.conv_cfg,
 57 |                     norm_cfg=self.norm_cfg,
 58 |                     inplace=False))
 59 | 
 60 |         self.convs = nn.ModuleList()
 61 |         for i in range(self.num_convs):
 62 |             in_channels = self.in_channels if i == 0 else conv_out_channels
 63 |             self.convs.append(
 64 |                 ConvModule(
 65 |                     in_channels,
 66 |                     conv_out_channels,
 67 |                     3,
 68 |                     padding=1,
 69 |                     conv_cfg=self.conv_cfg,
 70 |                     norm_cfg=self.norm_cfg))
 71 |         self.conv_embedding = ConvModule(
 72 |             conv_out_channels,
 73 |             conv_out_channels,
 74 |             1,
 75 |             conv_cfg=self.conv_cfg,
 76 |             norm_cfg=self.norm_cfg)
 77 |         self.conv_logits = nn.Conv2d(conv_out_channels, self.num_classes, 1)
 78 | 
 79 |         self.criterion = nn.CrossEntropyLoss(ignore_index=ignore_label)
 80 | 
 81 |     def init_weights(self):
 82 |         kaiming_init(self.conv_logits)
 83 | 
 84 |     @auto_fp16()
 85 |     def forward(self, feats):
 86 |         x = self.lateral_convs[self.fusion_level](feats[self.fusion_level])
 87 |         fused_size = tuple(x.shape[-2:])
 88 |         for i, feat in enumerate(feats):
 89 |             if i != self.fusion_level:
 90 |                 feat = F.interpolate(
 91 |                     feat, size=fused_size, mode='bilinear', align_corners=True)
 92 |                 x += self.lateral_convs[i](feat)
 93 | 
 94 |         for i in range(self.num_convs):
 95 |             x = self.convs[i](x)
 96 | 
 97 |         mask_pred = self.conv_logits(x)
 98 |         x = self.conv_embedding(x)
 99 |         return mask_pred, x
100 | 
101 |     @force_fp32(apply_to=('mask_pred', ))
102 |     def loss(self, mask_pred, labels):
103 |         labels = labels.squeeze(1).long()
104 |         loss_semantic_seg = self.criterion(mask_pred, labels)
105 |         loss_semantic_seg *= self.loss_weight
106 |         return loss_semantic_seg
107 | 


--------------------------------------------------------------------------------
/mmdet/models/mask_heads/htc_mask_head.py:
--------------------------------------------------------------------------------
 1 | from ..registry import HEADS
 2 | from ..utils import ConvModule
 3 | from .fcn_mask_head import FCNMaskHead
 4 | 
 5 | 
 6 | @HEADS.register_module
 7 | class HTCMaskHead(FCNMaskHead):
 8 | 
 9 |     def __init__(self, *args, **kwargs):
10 |         super(HTCMaskHead, self).__init__(*args, **kwargs)
11 |         self.conv_res = ConvModule(
12 |             self.conv_out_channels,
13 |             self.conv_out_channels,
14 |             1,
15 |             conv_cfg=self.conv_cfg,
16 |             norm_cfg=self.norm_cfg)
17 | 
18 |     def init_weights(self):
19 |         super(HTCMaskHead, self).init_weights()
20 |         self.conv_res.init_weights()
21 | 
22 |     def forward(self, x, res_feat=None, return_logits=True, return_feat=True):
23 |         if res_feat is not None:
24 |             res_feat = self.conv_res(res_feat)
25 |             x = x + res_feat
26 |         for conv in self.convs:
27 |             x = conv(x)
28 |         res_feat = x
29 |         outs = []
30 |         if return_logits:
31 |             x = self.upsample(x)
32 |             if self.upsample_method == 'deconv':
33 |                 x = self.relu(x)
34 |             mask_pred = self.conv_logits(x)
35 |             outs.append(mask_pred)
36 |         if return_feat:
37 |             outs.append(res_feat)
38 |         return outs if len(outs) > 1 else outs[0]
39 | 


--------------------------------------------------------------------------------
/mmdet/models/necks/__init__.py:
--------------------------------------------------------------------------------
1 | from .bfp import BFP
2 | from .fpn import FPN
3 | from .hrfpn import HRFPN
4 | 
5 | __all__ = ['FPN', 'BFP', 'HRFPN']
6 | 


--------------------------------------------------------------------------------
/mmdet/models/necks/bfp.py:
--------------------------------------------------------------------------------
  1 | import torch.nn as nn
  2 | import torch.nn.functional as F
  3 | from mmcv.cnn import xavier_init
  4 | 
  5 | from ..plugins import NonLocal2D
  6 | from ..registry import NECKS
  7 | from ..utils import ConvModule
  8 | 
  9 | 
 10 | @NECKS.register_module
 11 | class BFP(nn.Module):
 12 |     """BFP (Balanced Feature Pyrmamids)
 13 | 
 14 |     BFP takes multi-level features as inputs and gather them into a single one,
 15 |     then refine the gathered feature and scatter the refined results to
 16 |     multi-level features. This module is used in Libra R-CNN (CVPR 2019), see
 17 |     https://arxiv.org/pdf/1904.02701.pdf for details.
 18 | 
 19 |     Args:
 20 |         in_channels (int): Number of input channels (feature maps of all levels
 21 |             should have the same channels).
 22 |         num_levels (int): Number of input feature levels.
 23 |         conv_cfg (dict): The config dict for convolution layers.
 24 |         norm_cfg (dict): The config dict for normalization layers.
 25 |         refine_level (int): Index of integration and refine level of BSF in
 26 |             multi-level features from bottom to top.
 27 |         refine_type (str): Type of the refine op, currently support
 28 |             [None, 'conv', 'non_local'].
 29 |     """
 30 | 
 31 |     def __init__(self,
 32 |                  in_channels,
 33 |                  num_levels,
 34 |                  refine_level=2,
 35 |                  refine_type=None,
 36 |                  conv_cfg=None,
 37 |                  norm_cfg=None):
 38 |         super(BFP, self).__init__()
 39 |         assert refine_type in [None, 'conv', 'non_local']
 40 | 
 41 |         self.in_channels = in_channels
 42 |         self.num_levels = num_levels
 43 |         self.conv_cfg = conv_cfg
 44 |         self.norm_cfg = norm_cfg
 45 | 
 46 |         self.refine_level = refine_level
 47 |         self.refine_type = refine_type
 48 |         assert 0 <= self.refine_level < self.num_levels
 49 | 
 50 |         if self.refine_type == 'conv':
 51 |             self.refine = ConvModule(
 52 |                 self.in_channels,
 53 |                 self.in_channels,
 54 |                 3,
 55 |                 padding=1,
 56 |                 conv_cfg=self.conv_cfg,
 57 |                 norm_cfg=self.norm_cfg)
 58 |         elif self.refine_type == 'non_local':
 59 |             self.refine = NonLocal2D(
 60 |                 self.in_channels,
 61 |                 reduction=1,
 62 |                 use_scale=False,
 63 |                 conv_cfg=self.conv_cfg,
 64 |                 norm_cfg=self.norm_cfg)
 65 | 
 66 |     def init_weights(self):
 67 |         for m in self.modules():
 68 |             if isinstance(m, nn.Conv2d):
 69 |                 xavier_init(m, distribution='uniform')
 70 | 
 71 |     def forward(self, inputs):
 72 |         assert len(inputs) == self.num_levels
 73 | 
 74 |         # step 1: gather multi-level features by resize and average
 75 |         feats = []
 76 |         gather_size = inputs[self.refine_level].size()[2:]
 77 |         for i in range(self.num_levels):
 78 |             if i < self.refine_level:
 79 |                 gathered = F.adaptive_max_pool2d(
 80 |                     inputs[i], output_size=gather_size)
 81 |             else:
 82 |                 gathered = F.interpolate(
 83 |                     inputs[i], size=gather_size, mode='nearest')
 84 |             feats.append(gathered)
 85 | 
 86 |         bsf = sum(feats) / len(feats)
 87 | 
 88 |         # step 2: refine gathered features
 89 |         if self.refine_type is not None:
 90 |             bsf = self.refine(bsf)
 91 | 
 92 |         # step 3: scatter refined features to multi-levels by a residual path
 93 |         outs = []
 94 |         for i in range(self.num_levels):
 95 |             out_size = inputs[i].size()[2:]
 96 |             if i < self.refine_level:
 97 |                 residual = F.interpolate(bsf, size=out_size, mode='nearest')
 98 |             else:
 99 |                 residual = F.adaptive_max_pool2d(bsf, output_size=out_size)
100 |             outs.append(residual + inputs[i])
101 | 
102 |         return tuple(outs)
103 | 


--------------------------------------------------------------------------------
/mmdet/models/necks/hrfpn.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | from mmcv.cnn.weight_init import caffe2_xavier_init
  5 | from torch.utils.checkpoint import checkpoint
  6 | 
  7 | from ..registry import NECKS
  8 | from ..utils import ConvModule
  9 | 
 10 | 
 11 | @NECKS.register_module
 12 | class HRFPN(nn.Module):
 13 |     """HRFPN (High Resolution Feature Pyrmamids)
 14 | 
 15 |     arXiv: https://arxiv.org/abs/1904.04514
 16 | 
 17 |     Args:
 18 |         in_channels (list): number of channels for each branch.
 19 |         out_channels (int): output channels of feature pyramids.
 20 |         num_outs (int): number of output stages.
 21 |         pooling_type (str): pooling for generating feature pyramids
 22 |             from {MAX, AVG}.
 23 |         conv_cfg (dict): dictionary to construct and config conv layer.
 24 |         norm_cfg (dict): dictionary to construct and config norm layer.
 25 |         with_cp  (bool): Use checkpoint or not. Using checkpoint will save some
 26 |             memory while slowing down the training speed.
 27 |         stride (int): stride of 3x3 convolutional layers
 28 |     """
 29 | 
 30 |     def __init__(self,
 31 |                  in_channels,
 32 |                  out_channels,
 33 |                  num_outs=5,
 34 |                  pooling_type='AVG',
 35 |                  conv_cfg=None,
 36 |                  norm_cfg=None,
 37 |                  with_cp=False,
 38 |                  stride=1):
 39 |         super(HRFPN, self).__init__()
 40 |         assert isinstance(in_channels, list)
 41 |         self.in_channels = in_channels
 42 |         self.out_channels = out_channels
 43 |         self.num_ins = len(in_channels)
 44 |         self.num_outs = num_outs
 45 |         self.with_cp = with_cp
 46 |         self.conv_cfg = conv_cfg
 47 |         self.norm_cfg = norm_cfg
 48 | 
 49 |         self.reduction_conv = ConvModule(
 50 |             sum(in_channels),
 51 |             out_channels,
 52 |             kernel_size=1,
 53 |             conv_cfg=self.conv_cfg,
 54 |             activation=None)
 55 | 
 56 |         self.fpn_convs = nn.ModuleList()
 57 |         for i in range(self.num_outs):
 58 |             self.fpn_convs.append(
 59 |                 ConvModule(
 60 |                     out_channels,
 61 |                     out_channels,
 62 |                     kernel_size=3,
 63 |                     padding=1,
 64 |                     stride=stride,
 65 |                     conv_cfg=self.conv_cfg,
 66 |                     activation=None))
 67 | 
 68 |         if pooling_type == 'MAX':
 69 |             self.pooling = F.max_pool2d
 70 |         else:
 71 |             self.pooling = F.avg_pool2d
 72 | 
 73 |     def init_weights(self):
 74 |         for m in self.modules():
 75 |             if isinstance(m, nn.Conv2d):
 76 |                 caffe2_xavier_init(m)
 77 | 
 78 |     def forward(self, inputs):
 79 |         assert len(inputs) == self.num_ins
 80 |         outs = [inputs[0]]
 81 |         for i in range(1, self.num_ins):
 82 |             outs.append(
 83 |                 F.interpolate(inputs[i], scale_factor=2**i, mode='bilinear'))
 84 |         out = torch.cat(outs, dim=1)
 85 |         if out.requires_grad and self.with_cp:
 86 |             out = checkpoint(self.reduction_conv, out)
 87 |         else:
 88 |             out = self.reduction_conv(out)
 89 |         outs = [out]
 90 |         for i in range(1, self.num_outs):
 91 |             outs.append(self.pooling(out, kernel_size=2**i, stride=2**i))
 92 |         outputs = []
 93 | 
 94 |         for i in range(self.num_outs):
 95 |             if outs[i].requires_grad and self.with_cp:
 96 |                 tmp_out = checkpoint(self.fpn_convs[i], outs[i])
 97 |             else:
 98 |                 tmp_out = self.fpn_convs[i](outs[i])
 99 |             outputs.append(tmp_out)
100 |         return tuple(outputs)
101 | 


--------------------------------------------------------------------------------
/mmdet/models/plugins/__init__.py:
--------------------------------------------------------------------------------
1 | from .generalized_attention import GeneralizedAttention
2 | from .non_local import NonLocal2D
3 | 
4 | __all__ = ['NonLocal2D', 'GeneralizedAttention']
5 | 


--------------------------------------------------------------------------------
/mmdet/models/registry.py:
--------------------------------------------------------------------------------
 1 | from mmdet.utils import Registry
 2 | 
 3 | BACKBONES = Registry('backbone')
 4 | NECKS = Registry('neck')
 5 | ROI_EXTRACTORS = Registry('roi_extractor')
 6 | SHARED_HEADS = Registry('shared_head')
 7 | HEADS = Registry('head')
 8 | LOSSES = Registry('loss')
 9 | DETECTORS = Registry('detector')
10 | 


--------------------------------------------------------------------------------
/mmdet/models/roi_extractors/__init__.py:
--------------------------------------------------------------------------------
1 | from .single_level import SingleRoIExtractor
2 | 
3 | __all__ = ['SingleRoIExtractor']
4 | 


--------------------------------------------------------------------------------
/mmdet/models/shared_heads/__init__.py:
--------------------------------------------------------------------------------
1 | from .res_layer import ResLayer
2 | from .res2_layer import Res2Layer
3 | from .resx_layer import ResXLayer
4 | 
5 | __all__ = ['ResLayer', 'Res2Layer', 'ResXLayer']
6 | 


--------------------------------------------------------------------------------
/mmdet/models/shared_heads/res2_layer.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | import torch.nn as nn
 4 | from mmcv.cnn import constant_init, kaiming_init
 5 | from mmcv.runner import load_checkpoint
 6 | 
 7 | from mmdet.core import auto_fp16
 8 | from ..backbones import Res2Net, make_res2_layer
 9 | from ..registry import SHARED_HEADS
10 | from ..utils import ConvModule
11 | 
12 | 
13 | @SHARED_HEADS.register_module
14 | class Res2Layer(nn.Module):
15 | 
16 |     def __init__(self,
17 |                  depth,
18 |                  stage=3,
19 |                  stride=2,
20 |                  dilation=1,
21 |                  style='pytorch',
22 |                  norm_cfg=dict(type='BN', requires_grad=True),
23 |                  norm_eval=True,
24 |                  with_cp=False,
25 |                  external_conv=False,
26 |                  dcn=None):
27 |         super(Res2Layer, self).__init__()
28 |         self.norm_eval = norm_eval
29 |         self.norm_cfg = norm_cfg
30 |         self.stage = stage
31 |         self.fp16_enabled = False
32 |         self.external_conv = external_conv
33 |         block, stage_blocks = Res2Net.arch_settings[depth]
34 |         stage_block = stage_blocks[stage]
35 |         planes = 64 * 2**stage
36 |         inplanes = 64 * 2**(stage - 1) * block.expansion
37 |         baseWidth = 26
38 |         scale = 4
39 |         
40 |         res_layer = make_res2_layer(
41 |             block,
42 |             inplanes,
43 |             planes,
44 |             stage_block,
45 |             baseWidth=baseWidth,
46 |             scale=scale,
47 |             stride=stride,
48 |             dilation=dilation)
49 |         self.add_module('layer{}'.format(stage + 1), res_layer)
50 |         if external_conv:
51 |             new_layer = ConvModule(2048,256,1)
52 |             self.add_module('new_layer_1', new_layer)
53 | 
54 |     def init_weights(self, pretrained=None):
55 |         if isinstance(pretrained, str):
56 |             logger = logging.getLogger()
57 |             load_checkpoint(self, pretrained, strict=False, logger=logger)
58 |         elif pretrained is None:
59 |             for m in self.modules():
60 |                 if isinstance(m, nn.Conv2d):
61 |                     kaiming_init(m)
62 |                 elif isinstance(m, nn.BatchNorm2d):
63 |                     constant_init(m, 1)
64 |         else:
65 |             raise TypeError('pretrained must be a str or None')
66 | 
67 |     @auto_fp16()
68 |     def forward(self, x):
69 |         res_layer = getattr(self, 'layer{}'.format(self.stage + 1))
70 |         out = res_layer(x)
71 |         if self.external_conv:
72 |             new_layer_1 = getattr(self, 'new_layer_1')
73 |             out = new_layer_1(out)
74 |         return out
75 | 
76 |     def train(self, mode=True):
77 |         super(Res2Layer, self).train(mode)
78 |         if self.norm_eval:
79 |             for m in self.modules():
80 |                 if isinstance(m, nn.BatchNorm2d):
81 |                     m.eval()
82 | 


--------------------------------------------------------------------------------
/mmdet/models/shared_heads/res_layer.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | import torch.nn as nn
 4 | from mmcv.cnn import constant_init, kaiming_init
 5 | from mmcv.runner import load_checkpoint
 6 | 
 7 | from mmdet.core import auto_fp16
 8 | from ..backbones import ResNet, make_res_layer
 9 | from ..registry import SHARED_HEADS
10 | from ..utils import ConvModule
11 | 
12 | 
13 | @SHARED_HEADS.register_module
14 | class ResLayer(nn.Module):
15 | 
16 |     def __init__(self,
17 |                  depth,
18 |                  stage=3,
19 |                  stride=2,
20 |                  dilation=1,
21 |                  style='pytorch',
22 |                  norm_cfg=dict(type='BN', requires_grad=True),
23 |                  norm_eval=True,
24 |                  with_cp=False,
25 |                  external_conv=False,
26 |                  dcn=None):
27 |         super(ResLayer, self).__init__()
28 |         self.norm_eval = norm_eval
29 |         self.norm_cfg = norm_cfg
30 |         self.stage = stage
31 |         self.fp16_enabled = False
32 |         self.external_conv = external_conv
33 |         block, stage_blocks = ResNet.arch_settings[depth]
34 |         stage_block = stage_blocks[stage]
35 |         planes = 64 * 2**stage
36 |         inplanes = 64 * 2**(stage - 1) * block.expansion
37 | 
38 |         res_layer = make_res_layer(
39 |             block,
40 |             inplanes,
41 |             planes,
42 |             stage_block,
43 |             stride=stride,
44 |             dilation=dilation,
45 |             style=style,
46 |             with_cp=with_cp,
47 |             norm_cfg=self.norm_cfg,
48 |             dcn=dcn)
49 |         self.add_module('layer{}'.format(stage + 1), res_layer)
50 |         if external_conv:
51 |             new_layer = ConvModule(2048,256,1)
52 |             self.add_module('new_layer_1', new_layer)
53 | 
54 |     def init_weights(self, pretrained=None):
55 |         if isinstance(pretrained, str):
56 |             logger = logging.getLogger()
57 |             load_checkpoint(self, pretrained, strict=False, logger=logger)
58 |         elif pretrained is None:
59 |             for m in self.modules():
60 |                 if isinstance(m, nn.Conv2d):
61 |                     kaiming_init(m)
62 |                 elif isinstance(m, nn.BatchNorm2d):
63 |                     constant_init(m, 1)
64 |         else:
65 |             raise TypeError('pretrained must be a str or None')
66 | 
67 |     @auto_fp16()
68 |     def forward(self, x):
69 |         res_layer = getattr(self, 'layer{}'.format(self.stage + 1))
70 |         out = res_layer(x)
71 |         if self.external_conv:
72 |             new_layer_1 = getattr(self, 'new_layer_1')
73 |             out = new_layer_1(out)
74 |         return out
75 | 
76 |     def train(self, mode=True):
77 |         super(ResLayer, self).train(mode)
78 |         if self.norm_eval:
79 |             for m in self.modules():
80 |                 if isinstance(m, nn.BatchNorm2d):
81 |                     m.eval()
82 | 


--------------------------------------------------------------------------------
/mmdet/models/shared_heads/resx_layer.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | import torch.nn as nn
 4 | from mmcv.cnn import constant_init, kaiming_init
 5 | from mmcv.runner import load_checkpoint
 6 | 
 7 | from mmdet.core import auto_fp16
 8 | from ..backbones import ResNeXt, make_resx_layer
 9 | from .res_layer import ResLayer
10 | from ..registry import SHARED_HEADS
11 | from ..utils import ConvModule
12 | 
13 | 
14 | @SHARED_HEADS.register_module
15 | class ResXLayer(nn.Module):
16 | 
17 |     def __init__(self,
18 |                  depth,
19 |                  stage=3,
20 |                  stride=2,
21 |                  dilation=1,
22 |                  groups=1,
23 |                  base_width=4,
24 |                  style='pytorch',
25 |                  norm_cfg=dict(type='BN', requires_grad=True),
26 |                  norm_eval=True,
27 |                  with_cp=False,
28 |                  external_conv=False,
29 |                  dcn=None):
30 |         super(ResXLayer, self).__init__()
31 |         self.norm_eval = norm_eval
32 |         self.norm_cfg = norm_cfg
33 |         self.stage = stage
34 |         self.fp16_enabled = False
35 |         self.external_conv = external_conv
36 |         block, stage_blocks = ResNeXt.arch_settings[depth]
37 |         stage_block = stage_blocks[stage]
38 |         planes = 64 * 2**stage
39 |         inplanes = 64 * 2**(stage - 1) * block.expansion
40 | 
41 |         resx_layer = make_resx_layer(
42 |             block,
43 |             inplanes,
44 |             planes,
45 |             stage_block,
46 |             stride=stride,
47 |             dilation=dilation,
48 |             groups=groups,
49 |             base_width=base_width,
50 |             style=style,
51 |             with_cp=with_cp,
52 |             norm_cfg=self.norm_cfg,
53 |             dcn=dcn)
54 |         self.add_module('layer{}'.format(stage + 1), resx_layer)
55 |         if external_conv:
56 |             new_layer = ConvModule(2048,256,1)
57 |             self.add_module('new_layer_1', new_layer)
58 | 
59 |     def init_weights(self, pretrained=None):
60 |         if isinstance(pretrained, str):
61 |             logger = logging.getLogger()
62 |             load_checkpoint(self, pretrained, strict=False, logger=logger)
63 |         elif pretrained is None:
64 |             for m in self.modules():
65 |                 if isinstance(m, nn.Conv2d):
66 |                     kaiming_init(m)
67 |                 elif isinstance(m, nn.BatchNorm2d):
68 |                     constant_init(m, 1)
69 |         else:
70 |             raise TypeError('pretrained must be a str or None')
71 | 
72 |     @auto_fp16()
73 |     def forward(self, x):
74 |         res_layer = getattr(self, 'layer{}'.format(self.stage + 1))
75 |         out = res_layer(x)
76 |         if self.external_conv:
77 |             new_layer_1 = getattr(self, 'new_layer_1')
78 |             out = new_layer_1(out)
79 |         return out
80 | 
81 |     def train(self, mode=True):
82 |         super(ResXLayer, self).train(mode)
83 |         if self.norm_eval:
84 |             for m in self.modules():
85 |                 if isinstance(m, nn.BatchNorm2d):
86 |                     m.eval()
87 | 


--------------------------------------------------------------------------------
/mmdet/models/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | from .conv_module import ConvModule, build_conv_layer
 2 | from .conv_ws import ConvWS2d, conv_ws_2d
 3 | from .norm import build_norm_layer
 4 | from .scale import Scale
 5 | from .weight_init import (bias_init_with_prob, kaiming_init, normal_init,
 6 |                           uniform_init, xavier_init)
 7 | 
 8 | __all__ = [
 9 |     'conv_ws_2d', 'ConvWS2d', 'build_conv_layer', 'ConvModule',
10 |     'build_norm_layer', 'xavier_init', 'normal_init', 'uniform_init',
11 |     'kaiming_init', 'bias_init_with_prob', 'Scale'
12 | ]
13 | 


--------------------------------------------------------------------------------
/mmdet/models/utils/conv_ws.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | import torch.nn.functional as F
 3 | 
 4 | 
 5 | def conv_ws_2d(input,
 6 |                weight,
 7 |                bias=None,
 8 |                stride=1,
 9 |                padding=0,
10 |                dilation=1,
11 |                groups=1,
12 |                eps=1e-5):
13 |     c_in = weight.size(0)
14 |     weight_flat = weight.view(c_in, -1)
15 |     mean = weight_flat.mean(dim=1, keepdim=True).view(c_in, 1, 1, 1)
16 |     std = weight_flat.std(dim=1, keepdim=True).view(c_in, 1, 1, 1)
17 |     weight = (weight - mean) / (std + eps)
18 |     return F.conv2d(input, weight, bias, stride, padding, dilation, groups)
19 | 
20 | 
21 | class ConvWS2d(nn.Conv2d):
22 | 
23 |     def __init__(self,
24 |                  in_channels,
25 |                  out_channels,
26 |                  kernel_size,
27 |                  stride=1,
28 |                  padding=0,
29 |                  dilation=1,
30 |                  groups=1,
31 |                  bias=True,
32 |                  eps=1e-5):
33 |         super(ConvWS2d, self).__init__(
34 |             in_channels,
35 |             out_channels,
36 |             kernel_size,
37 |             stride=stride,
38 |             padding=padding,
39 |             dilation=dilation,
40 |             groups=groups,
41 |             bias=bias)
42 |         self.eps = eps
43 | 
44 |     def forward(self, x):
45 |         return conv_ws_2d(x, self.weight, self.bias, self.stride, self.padding,
46 |                           self.dilation, self.groups, self.eps)
47 | 


--------------------------------------------------------------------------------
/mmdet/models/utils/norm.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | 
 3 | norm_cfg = {
 4 |     # format: layer_type: (abbreviation, module)
 5 |     'BN': ('bn', nn.BatchNorm2d),
 6 |     'SyncBN': ('bn', nn.SyncBatchNorm),
 7 |     'GN': ('gn', nn.GroupNorm),
 8 |     # and potentially 'SN'
 9 | }
10 | 
11 | 
12 | def build_norm_layer(cfg, num_features, postfix=''):
13 |     """ Build normalization layer
14 | 
15 |     Args:
16 |         cfg (dict): cfg should contain:
17 |             type (str): identify norm layer type.
18 |             layer args: args needed to instantiate a norm layer.
19 |             requires_grad (bool): [optional] whether stop gradient updates
20 |         num_features (int): number of channels from input.
21 |         postfix (int, str): appended into norm abbreviation to
22 |             create named layer.
23 | 
24 |     Returns:
25 |         name (str): abbreviation + postfix
26 |         layer (nn.Module): created norm layer
27 |     """
28 |     assert isinstance(cfg, dict) and 'type' in cfg
29 |     cfg_ = cfg.copy()
30 | 
31 |     layer_type = cfg_.pop('type')
32 |     if layer_type not in norm_cfg:
33 |         raise KeyError('Unrecognized norm type {}'.format(layer_type))
34 |     else:
35 |         abbr, norm_layer = norm_cfg[layer_type]
36 |         if norm_layer is None:
37 |             raise NotImplementedError
38 | 
39 |     assert isinstance(postfix, (int, str))
40 |     name = abbr + str(postfix)
41 | 
42 |     requires_grad = cfg_.pop('requires_grad', True)
43 |     cfg_.setdefault('eps', 1e-5)
44 |     if layer_type != 'GN':
45 |         layer = norm_layer(num_features, **cfg_)
46 |         if layer_type == 'SyncBN':
47 |             layer._specify_ddp_gpu_num(1)
48 |     else:
49 |         assert 'num_groups' in cfg_
50 |         layer = norm_layer(num_channels=num_features, **cfg_)
51 | 
52 |     for param in layer.parameters():
53 |         param.requires_grad = requires_grad
54 | 
55 |     return name, layer
56 | 


--------------------------------------------------------------------------------
/mmdet/models/utils/scale.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | 
 5 | class Scale(nn.Module):
 6 |     """
 7 |     A learnable scale parameter
 8 |     """
 9 | 
10 |     def __init__(self, scale=1.0):
11 |         super(Scale, self).__init__()
12 |         self.scale = nn.Parameter(torch.tensor(scale, dtype=torch.float))
13 | 
14 |     def forward(self, x):
15 |         return x * self.scale
16 | 


--------------------------------------------------------------------------------
/mmdet/models/utils/weight_init.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch.nn as nn
 3 | 
 4 | 
 5 | def xavier_init(module, gain=1, bias=0, distribution='normal'):
 6 |     assert distribution in ['uniform', 'normal']
 7 |     if distribution == 'uniform':
 8 |         nn.init.xavier_uniform_(module.weight, gain=gain)
 9 |     else:
10 |         nn.init.xavier_normal_(module.weight, gain=gain)
11 |     if hasattr(module, 'bias'):
12 |         nn.init.constant_(module.bias, bias)
13 | 
14 | 
15 | def normal_init(module, mean=0, std=1, bias=0):
16 |     nn.init.normal_(module.weight, mean, std)
17 |     if hasattr(module, 'bias'):
18 |         nn.init.constant_(module.bias, bias)
19 | 
20 | 
21 | def uniform_init(module, a=0, b=1, bias=0):
22 |     nn.init.uniform_(module.weight, a, b)
23 |     if hasattr(module, 'bias'):
24 |         nn.init.constant_(module.bias, bias)
25 | 
26 | 
27 | def kaiming_init(module,
28 |                  mode='fan_out',
29 |                  nonlinearity='relu',
30 |                  bias=0,
31 |                  distribution='normal'):
32 |     assert distribution in ['uniform', 'normal']
33 |     if distribution == 'uniform':
34 |         nn.init.kaiming_uniform_(
35 |             module.weight, mode=mode, nonlinearity=nonlinearity)
36 |     else:
37 |         nn.init.kaiming_normal_(
38 |             module.weight, mode=mode, nonlinearity=nonlinearity)
39 |     if hasattr(module, 'bias'):
40 |         nn.init.constant_(module.bias, bias)
41 | 
42 | 
43 | def bias_init_with_prob(prior_prob):
44 |     """ initialize conv/fc bias value according to giving probablity"""
45 |     bias_init = float(-np.log((1 - prior_prob) / prior_prob))
46 |     return bias_init
47 | 


--------------------------------------------------------------------------------
/mmdet/ops/__init__.py:
--------------------------------------------------------------------------------
 1 | from .context_block import ContextBlock
 2 | from .dcn import (DeformConv, DeformConvPack, DeformRoIPooling,
 3 |                   DeformRoIPoolingPack, ModulatedDeformConv,
 4 |                   ModulatedDeformConvPack, ModulatedDeformRoIPoolingPack,
 5 |                   deform_conv, deform_roi_pooling, modulated_deform_conv)
 6 | from .masked_conv import MaskedConv2d
 7 | from .nms import nms, soft_nms
 8 | from .roi_align import RoIAlign, roi_align
 9 | from .roi_pool import RoIPool, roi_pool
10 | from .sigmoid_focal_loss import SigmoidFocalLoss, sigmoid_focal_loss
11 | from .utils import get_compiler_version, get_compiling_cuda_version
12 | 
13 | __all__ = [
14 |     'nms', 'soft_nms', 'RoIAlign', 'roi_align', 'RoIPool', 'roi_pool',
15 |     'DeformConv', 'DeformConvPack', 'DeformRoIPooling', 'DeformRoIPoolingPack',
16 |     'ModulatedDeformRoIPoolingPack', 'ModulatedDeformConv',
17 |     'ModulatedDeformConvPack', 'deform_conv', 'modulated_deform_conv',
18 |     'deform_roi_pooling', 'SigmoidFocalLoss', 'sigmoid_focal_loss',
19 |     'MaskedConv2d', 'ContextBlock', 'get_compiler_version',
20 |     'get_compiling_cuda_version'
21 | ]
22 | 


--------------------------------------------------------------------------------
/mmdet/ops/dcn/__init__.py:
--------------------------------------------------------------------------------
 1 | from .deform_conv import (DeformConv, DeformConvPack, ModulatedDeformConv,
 2 |                           ModulatedDeformConvPack, deform_conv,
 3 |                           modulated_deform_conv)
 4 | from .deform_pool import (DeformRoIPooling, DeformRoIPoolingPack,
 5 |                           ModulatedDeformRoIPoolingPack, deform_roi_pooling)
 6 | 
 7 | __all__ = [
 8 |     'DeformConv', 'DeformConvPack', 'ModulatedDeformConv',
 9 |     'ModulatedDeformConvPack', 'DeformRoIPooling', 'DeformRoIPoolingPack',
10 |     'ModulatedDeformRoIPoolingPack', 'deform_conv', 'modulated_deform_conv',
11 |     'deform_roi_pooling'
12 | ]
13 | 


--------------------------------------------------------------------------------
/mmdet/ops/masked_conv/__init__.py:
--------------------------------------------------------------------------------
1 | from .masked_conv import MaskedConv2d, masked_conv2d
2 | 
3 | __all__ = ['masked_conv2d', 'MaskedConv2d']
4 | 


--------------------------------------------------------------------------------
/mmdet/ops/masked_conv/masked_conv.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | 
 3 | import torch
 4 | import torch.nn as nn
 5 | from torch.autograd import Function
 6 | from torch.autograd.function import once_differentiable
 7 | from torch.nn.modules.utils import _pair
 8 | 
 9 | from . import masked_conv2d_cuda
10 | 
11 | 
12 | class MaskedConv2dFunction(Function):
13 | 
14 |     @staticmethod
15 |     def forward(ctx, features, mask, weight, bias, padding=0, stride=1):
16 |         assert mask.dim() == 3 and mask.size(0) == 1
17 |         assert features.dim() == 4 and features.size(0) == 1
18 |         assert features.size()[2:] == mask.size()[1:]
19 |         pad_h, pad_w = _pair(padding)
20 |         stride_h, stride_w = _pair(stride)
21 |         if stride_h != 1 or stride_w != 1:
22 |             raise ValueError(
23 |                 'Stride could not only be 1 in masked_conv2d currently.')
24 |         if not features.is_cuda:
25 |             raise NotImplementedError
26 | 
27 |         out_channel, in_channel, kernel_h, kernel_w = weight.size()
28 | 
29 |         batch_size = features.size(0)
30 |         out_h = int(
31 |             math.floor((features.size(2) + 2 * pad_h -
32 |                         (kernel_h - 1) - 1) / stride_h + 1))
33 |         out_w = int(
34 |             math.floor((features.size(3) + 2 * pad_w -
35 |                         (kernel_h - 1) - 1) / stride_w + 1))
36 |         mask_inds = torch.nonzero(mask[0] > 0)
37 |         output = features.new_zeros(batch_size, out_channel, out_h, out_w)
38 |         if mask_inds.numel() > 0:
39 |             mask_h_idx = mask_inds[:, 0].contiguous()
40 |             mask_w_idx = mask_inds[:, 1].contiguous()
41 |             data_col = features.new_zeros(in_channel * kernel_h * kernel_w,
42 |                                           mask_inds.size(0))
43 |             masked_conv2d_cuda.masked_im2col_forward(features, mask_h_idx,
44 |                                                      mask_w_idx, kernel_h,
45 |                                                      kernel_w, pad_h, pad_w,
46 |                                                      data_col)
47 | 
48 |             masked_output = torch.addmm(1, bias[:, None], 1,
49 |                                         weight.view(out_channel, -1), data_col)
50 |             masked_conv2d_cuda.masked_col2im_forward(masked_output, mask_h_idx,
51 |                                                      mask_w_idx, out_h, out_w,
52 |                                                      out_channel, output)
53 |         return output
54 | 
55 |     @staticmethod
56 |     @once_differentiable
57 |     def backward(ctx, grad_output):
58 |         return (None, ) * 5
59 | 
60 | 
61 | masked_conv2d = MaskedConv2dFunction.apply
62 | 
63 | 
64 | class MaskedConv2d(nn.Conv2d):
65 |     """A MaskedConv2d which inherits the official Conv2d.
66 | 
67 |     The masked forward doesn't implement the backward function and only
68 |     supports the stride parameter to be 1 currently.
69 |     """
70 | 
71 |     def __init__(self,
72 |                  in_channels,
73 |                  out_channels,
74 |                  kernel_size,
75 |                  stride=1,
76 |                  padding=0,
77 |                  dilation=1,
78 |                  groups=1,
79 |                  bias=True):
80 |         super(MaskedConv2d,
81 |               self).__init__(in_channels, out_channels, kernel_size, stride,
82 |                              padding, dilation, groups, bias)
83 | 
84 |     def forward(self, input, mask=None):
85 |         if mask is None:  # fallback to the normal Conv2d
86 |             return super(MaskedConv2d, self).forward(input)
87 |         else:
88 |             return masked_conv2d(input, mask, self.weight, self.bias,
89 |                                  self.padding)
90 | 


--------------------------------------------------------------------------------
/mmdet/ops/masked_conv/src/masked_conv2d_cuda.cpp:
--------------------------------------------------------------------------------
 1 | #include <torch/extension.h>
 2 | 
 3 | #include <cmath>
 4 | #include <vector>
 5 | 
 6 | int MaskedIm2colForwardLaucher(const at::Tensor im, const int height,
 7 |                                const int width, const int channels,
 8 |                                const int kernel_h, const int kernel_w,
 9 |                                const int pad_h, const int pad_w,
10 |                                const at::Tensor mask_h_idx,
11 |                                const at::Tensor mask_w_idx, const int mask_cnt,
12 |                                at::Tensor col);
13 | 
14 | int MaskedCol2imForwardLaucher(const at::Tensor col, const int height,
15 |                                const int width, const int channels,
16 |                                const at::Tensor mask_h_idx,
17 |                                const at::Tensor mask_w_idx, const int mask_cnt,
18 |                                at::Tensor im);
19 | 
20 | #define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ")
21 | #define CHECK_CONTIGUOUS(x) \
22 |   AT_CHECK(x.is_contiguous(), #x, " must be contiguous ")
23 | #define CHECK_INPUT(x) \
24 |   CHECK_CUDA(x);       \
25 |   CHECK_CONTIGUOUS(x)
26 | 
27 | int masked_im2col_forward_cuda(const at::Tensor im, const at::Tensor mask_h_idx,
28 |                                const at::Tensor mask_w_idx, const int kernel_h,
29 |                                const int kernel_w, const int pad_h,
30 |                                const int pad_w, at::Tensor col) {
31 |   CHECK_INPUT(im);
32 |   CHECK_INPUT(mask_h_idx);
33 |   CHECK_INPUT(mask_w_idx);
34 |   CHECK_INPUT(col);
35 |   // im: (n, ic, h, w), kernel size (kh, kw)
36 |   // kernel: (oc, ic * kh * kw), col: (kh * kw * ic, ow * oh)
37 | 
38 |   int channels = im.size(1);
39 |   int height = im.size(2);
40 |   int width = im.size(3);
41 |   int mask_cnt = mask_h_idx.size(0);
42 | 
43 |   MaskedIm2colForwardLaucher(im, height, width, channels, kernel_h, kernel_w,
44 |                              pad_h, pad_w, mask_h_idx, mask_w_idx, mask_cnt,
45 |                              col);
46 | 
47 |   return 1;
48 | }
49 | 
50 | int masked_col2im_forward_cuda(const at::Tensor col,
51 |                                const at::Tensor mask_h_idx,
52 |                                const at::Tensor mask_w_idx, int height,
53 |                                int width, int channels, at::Tensor im) {
54 |   CHECK_INPUT(col);
55 |   CHECK_INPUT(mask_h_idx);
56 |   CHECK_INPUT(mask_w_idx);
57 |   CHECK_INPUT(im);
58 |   // im: (n, ic, h, w), kernel size (kh, kw)
59 |   // kernel: (oc, ic * kh * kh), col: (kh * kw * ic, ow * oh)
60 | 
61 |   int mask_cnt = mask_h_idx.size(0);
62 | 
63 |   MaskedCol2imForwardLaucher(col, height, width, channels, mask_h_idx,
64 |                              mask_w_idx, mask_cnt, im);
65 | 
66 |   return 1;
67 | }
68 | 
69 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
70 |   m.def("masked_im2col_forward", &masked_im2col_forward_cuda,
71 |         "masked_im2col forward (CUDA)");
72 |   m.def("masked_col2im_forward", &masked_col2im_forward_cuda,
73 |         "masked_col2im forward (CUDA)");
74 | }


--------------------------------------------------------------------------------
/mmdet/ops/nms/__init__.py:
--------------------------------------------------------------------------------
1 | from .nms_wrapper import nms, soft_nms
2 | 
3 | __all__ = ['nms', 'soft_nms']
4 | 


--------------------------------------------------------------------------------
/mmdet/ops/nms/nms_wrapper.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch
  3 | 
  4 | from . import nms_cpu, nms_cuda
  5 | from .soft_nms_cpu import soft_nms_cpu
  6 | 
  7 | 
  8 | def nms(dets, iou_thr, device_id=None):
  9 |     """Dispatch to either CPU or GPU NMS implementations.
 10 | 
 11 |     The input can be either a torch tensor or numpy array. GPU NMS will be used
 12 |     if the input is a gpu tensor or device_id is specified, otherwise CPU NMS
 13 |     will be used. The returned type will always be the same as inputs.
 14 | 
 15 |     Arguments:
 16 |         dets (torch.Tensor or np.ndarray): bboxes with scores.
 17 |         iou_thr (float): IoU threshold for NMS.
 18 |         device_id (int, optional): when `dets` is a numpy array, if `device_id`
 19 |             is None, then cpu nms is used, otherwise gpu_nms will be used.
 20 | 
 21 |     Returns:
 22 |         tuple: kept bboxes and indice, which is always the same data type as
 23 |             the input.
 24 | 
 25 |     Example:
 26 |         >>> dets = np.array([[49.1, 32.4, 51.0, 35.9, 0.9],
 27 |         >>>                  [49.3, 32.9, 51.0, 35.3, 0.9],
 28 |         >>>                  [49.2, 31.8, 51.0, 35.4, 0.5],
 29 |         >>>                  [35.1, 11.5, 39.1, 15.7, 0.5],
 30 |         >>>                  [35.6, 11.8, 39.3, 14.2, 0.5],
 31 |         >>>                  [35.3, 11.5, 39.9, 14.5, 0.4],
 32 |         >>>                  [35.2, 11.7, 39.7, 15.7, 0.3]], dtype=np.float32)
 33 |         >>> iou_thr = 0.7
 34 |         >>> supressed, inds = nms(dets, iou_thr)
 35 |         >>> assert len(inds) == len(supressed) == 3
 36 |     """
 37 |     # convert dets (tensor or numpy array) to tensor
 38 |     if isinstance(dets, torch.Tensor):
 39 |         is_numpy = False
 40 |         dets_th = dets
 41 |     elif isinstance(dets, np.ndarray):
 42 |         is_numpy = True
 43 |         device = 'cpu' if device_id is None else 'cuda:{}'.format(device_id)
 44 |         dets_th = torch.from_numpy(dets).to(device)
 45 |     else:
 46 |         raise TypeError(
 47 |             'dets must be either a Tensor or numpy array, but got {}'.format(
 48 |                 type(dets)))
 49 | 
 50 |     # execute cpu or cuda nms
 51 |     if dets_th.shape[0] == 0:
 52 |         inds = dets_th.new_zeros(0, dtype=torch.long)
 53 |     else:
 54 |         if dets_th.is_cuda:
 55 |             inds = nms_cuda.nms(dets_th, iou_thr)
 56 |         else:
 57 |             inds = nms_cpu.nms(dets_th, iou_thr)
 58 | 
 59 |     if is_numpy:
 60 |         inds = inds.cpu().numpy()
 61 |     return dets[inds, :], inds
 62 | 
 63 | 
 64 | def soft_nms(dets, iou_thr, method='linear', sigma=0.5, min_score=1e-3):
 65 |     """
 66 |     Example:
 67 |         >>> dets = np.array([[4., 3., 5., 3., 0.9],
 68 |         >>>                  [4., 3., 5., 4., 0.9],
 69 |         >>>                  [3., 1., 3., 1., 0.5],
 70 |         >>>                  [3., 1., 3., 1., 0.5],
 71 |         >>>                  [3., 1., 3., 1., 0.4],
 72 |         >>>                  [3., 1., 3., 1., 0.0]], dtype=np.float32)
 73 |         >>> iou_thr = 0.7
 74 |         >>> supressed, inds = soft_nms(dets, iou_thr, sigma=0.5)
 75 |         >>> assert len(inds) == len(supressed) == 3
 76 |     """
 77 |     if isinstance(dets, torch.Tensor):
 78 |         is_tensor = True
 79 |         dets_np = dets.detach().cpu().numpy()
 80 |     elif isinstance(dets, np.ndarray):
 81 |         is_tensor = False
 82 |         dets_np = dets
 83 |     else:
 84 |         raise TypeError(
 85 |             'dets must be either a Tensor or numpy array, but got {}'.format(
 86 |                 type(dets)))
 87 | 
 88 |     method_codes = {'linear': 1, 'gaussian': 2}
 89 |     if method not in method_codes:
 90 |         raise ValueError('Invalid method for SoftNMS: {}'.format(method))
 91 |     new_dets, inds = soft_nms_cpu(
 92 |         dets_np,
 93 |         iou_thr,
 94 |         method=method_codes[method],
 95 |         sigma=sigma,
 96 |         min_score=min_score)
 97 | 
 98 |     if is_tensor:
 99 |         return dets.new_tensor(new_dets), dets.new_tensor(
100 |             inds, dtype=torch.long)
101 |     else:
102 |         return new_dets.astype(np.float32), inds.astype(np.int64)
103 | 


--------------------------------------------------------------------------------
/mmdet/ops/nms/src/nms_cpu.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #include <torch/extension.h>
 3 | 
 4 | template <typename scalar_t>
 5 | at::Tensor nms_cpu_kernel(const at::Tensor& dets, const float threshold) {
 6 |   AT_ASSERTM(!dets.type().is_cuda(), "dets must be a CPU tensor");
 7 | 
 8 |   if (dets.numel() == 0) {
 9 |     return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU));
10 |   }
11 | 
12 |   auto x1_t = dets.select(1, 0).contiguous();
13 |   auto y1_t = dets.select(1, 1).contiguous();
14 |   auto x2_t = dets.select(1, 2).contiguous();
15 |   auto y2_t = dets.select(1, 3).contiguous();
16 |   auto scores = dets.select(1, 4).contiguous();
17 | 
18 |   at::Tensor areas_t = (x2_t - x1_t + 1) * (y2_t - y1_t + 1);
19 | 
20 |   auto order_t = std::get<1>(scores.sort(0, /* descending=*/true));
21 | 
22 |   auto ndets = dets.size(0);
23 |   at::Tensor suppressed_t =
24 |       at::zeros({ndets}, dets.options().dtype(at::kByte).device(at::kCPU));
25 | 
26 |   auto suppressed = suppressed_t.data<uint8_t>();
27 |   auto order = order_t.data<int64_t>();
28 |   auto x1 = x1_t.data<scalar_t>();
29 |   auto y1 = y1_t.data<scalar_t>();
30 |   auto x2 = x2_t.data<scalar_t>();
31 |   auto y2 = y2_t.data<scalar_t>();
32 |   auto areas = areas_t.data<scalar_t>();
33 | 
34 |   for (int64_t _i = 0; _i < ndets; _i++) {
35 |     auto i = order[_i];
36 |     if (suppressed[i] == 1) continue;
37 |     auto ix1 = x1[i];
38 |     auto iy1 = y1[i];
39 |     auto ix2 = x2[i];
40 |     auto iy2 = y2[i];
41 |     auto iarea = areas[i];
42 | 
43 |     for (int64_t _j = _i + 1; _j < ndets; _j++) {
44 |       auto j = order[_j];
45 |       if (suppressed[j] == 1) continue;
46 |       auto xx1 = std::max(ix1, x1[j]);
47 |       auto yy1 = std::max(iy1, y1[j]);
48 |       auto xx2 = std::min(ix2, x2[j]);
49 |       auto yy2 = std::min(iy2, y2[j]);
50 | 
51 |       auto w = std::max(static_cast<scalar_t>(0), xx2 - xx1 + 1);
52 |       auto h = std::max(static_cast<scalar_t>(0), yy2 - yy1 + 1);
53 |       auto inter = w * h;
54 |       auto ovr = inter / (iarea + areas[j] - inter);
55 |       if (ovr >= threshold) suppressed[j] = 1;
56 |     }
57 |   }
58 |   return at::nonzero(suppressed_t == 0).squeeze(1);
59 | }
60 | 
61 | at::Tensor nms(const at::Tensor& dets, const float threshold) {
62 |   at::Tensor result;
63 |   AT_DISPATCH_FLOATING_TYPES(dets.scalar_type(), "nms", [&] {
64 |     result = nms_cpu_kernel<scalar_t>(dets, threshold);
65 |   });
66 |   return result;
67 | }
68 | 
69 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
70 |   m.def("nms", &nms, "non-maximum suppression");
71 | }


--------------------------------------------------------------------------------
/mmdet/ops/nms/src/nms_cuda.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #include <torch/extension.h>
 3 | 
 4 | #define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ")
 5 | 
 6 | at::Tensor nms_cuda(const at::Tensor boxes, float nms_overlap_thresh);
 7 | 
 8 | at::Tensor nms(const at::Tensor& dets, const float threshold) {
 9 |   CHECK_CUDA(dets);
10 |   if (dets.numel() == 0)
11 |     return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU));
12 |   return nms_cuda(dets, threshold);
13 | }
14 | 
15 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
16 |   m.def("nms", &nms, "non-maximum suppression");
17 | }


--------------------------------------------------------------------------------
/mmdet/ops/roi_align/__init__.py:
--------------------------------------------------------------------------------
1 | from .roi_align import RoIAlign, roi_align
2 | 
3 | __all__ = ['roi_align', 'RoIAlign']
4 | 


--------------------------------------------------------------------------------
/mmdet/ops/roi_align/gradcheck.py:
--------------------------------------------------------------------------------
 1 | import os.path as osp
 2 | import sys
 3 | 
 4 | import numpy as np
 5 | import torch
 6 | from torch.autograd import gradcheck
 7 | 
 8 | sys.path.append(osp.abspath(osp.join(__file__, '../../')))
 9 | from roi_align import RoIAlign  # noqa: E402, isort:skip
10 | 
11 | feat_size = 15
12 | spatial_scale = 1.0 / 8
13 | img_size = feat_size / spatial_scale
14 | num_imgs = 2
15 | num_rois = 20
16 | 
17 | batch_ind = np.random.randint(num_imgs, size=(num_rois, 1))
18 | rois = np.random.rand(num_rois, 4) * img_size * 0.5
19 | rois[:, 2:] += img_size * 0.5
20 | rois = np.hstack((batch_ind, rois))
21 | 
22 | feat = torch.randn(
23 |     num_imgs, 16, feat_size, feat_size, requires_grad=True, device='cuda:0')
24 | rois = torch.from_numpy(rois).float().cuda()
25 | inputs = (feat, rois)
26 | print('Gradcheck for roi align...')
27 | test = gradcheck(RoIAlign(3, spatial_scale), inputs, atol=1e-3, eps=1e-3)
28 | print(test)
29 | test = gradcheck(RoIAlign(3, spatial_scale, 2), inputs, atol=1e-3, eps=1e-3)
30 | print(test)
31 | 


--------------------------------------------------------------------------------
/mmdet/ops/roi_align/roi_align.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | from torch.autograd import Function
 3 | from torch.autograd.function import once_differentiable
 4 | from torch.nn.modules.utils import _pair
 5 | 
 6 | from . import roi_align_cuda
 7 | 
 8 | 
 9 | class RoIAlignFunction(Function):
10 | 
11 |     @staticmethod
12 |     def forward(ctx, features, rois, out_size, spatial_scale, sample_num=0):
13 |         out_h, out_w = _pair(out_size)
14 |         assert isinstance(out_h, int) and isinstance(out_w, int)
15 |         ctx.spatial_scale = spatial_scale
16 |         ctx.sample_num = sample_num
17 |         ctx.save_for_backward(rois)
18 |         ctx.feature_size = features.size()
19 | 
20 |         batch_size, num_channels, data_height, data_width = features.size()
21 |         num_rois = rois.size(0)
22 | 
23 |         output = features.new_zeros(num_rois, num_channels, out_h, out_w)
24 |         if features.is_cuda:
25 |             roi_align_cuda.forward(features, rois, out_h, out_w, spatial_scale,
26 |                                    sample_num, output)
27 |         else:
28 |             raise NotImplementedError
29 | 
30 |         return output
31 | 
32 |     @staticmethod
33 |     @once_differentiable
34 |     def backward(ctx, grad_output):
35 |         feature_size = ctx.feature_size
36 |         spatial_scale = ctx.spatial_scale
37 |         sample_num = ctx.sample_num
38 |         rois = ctx.saved_tensors[0]
39 |         assert (feature_size is not None and grad_output.is_cuda)
40 | 
41 |         batch_size, num_channels, data_height, data_width = feature_size
42 |         out_w = grad_output.size(3)
43 |         out_h = grad_output.size(2)
44 | 
45 |         grad_input = grad_rois = None
46 |         if ctx.needs_input_grad[0]:
47 |             grad_input = rois.new_zeros(batch_size, num_channels, data_height,
48 |                                         data_width)
49 |             roi_align_cuda.backward(grad_output.contiguous(), rois, out_h,
50 |                                     out_w, spatial_scale, sample_num,
51 |                                     grad_input)
52 | 
53 |         return grad_input, grad_rois, None, None, None
54 | 
55 | 
56 | roi_align = RoIAlignFunction.apply
57 | 
58 | 
59 | class RoIAlign(nn.Module):
60 | 
61 |     def __init__(self,
62 |                  out_size,
63 |                  spatial_scale,
64 |                  sample_num=0,
65 |                  use_torchvision=False):
66 |         super(RoIAlign, self).__init__()
67 | 
68 |         self.out_size = _pair(out_size)
69 |         self.spatial_scale = float(spatial_scale)
70 |         self.sample_num = int(sample_num)
71 |         self.use_torchvision = use_torchvision
72 | 
73 |     def forward(self, features, rois):
74 |         if self.use_torchvision:
75 |             from torchvision.ops import roi_align as tv_roi_align
76 |             return tv_roi_align(features, rois, self.out_size,
77 |                                 self.spatial_scale, self.sample_num)
78 |         else:
79 |             return roi_align(features, rois, self.out_size, self.spatial_scale,
80 |                              self.sample_num)
81 | 
82 |     def __repr__(self):
83 |         format_str = self.__class__.__name__
84 |         format_str += '(out_size={}, spatial_scale={}, sample_num={}'.format(
85 |             self.out_size, self.spatial_scale, self.sample_num)
86 |         format_str += ', use_torchvision={})'.format(self.use_torchvision)
87 |         return format_str
88 | 


--------------------------------------------------------------------------------
/mmdet/ops/roi_align/src/roi_align_cuda.cpp:
--------------------------------------------------------------------------------
 1 | #include <torch/extension.h>
 2 | 
 3 | #include <cmath>
 4 | #include <vector>
 5 | 
 6 | int ROIAlignForwardLaucher(const at::Tensor features, const at::Tensor rois,
 7 |                            const float spatial_scale, const int sample_num,
 8 |                            const int channels, const int height,
 9 |                            const int width, const int num_rois,
10 |                            const int pooled_height, const int pooled_width,
11 |                            at::Tensor output);
12 | 
13 | int ROIAlignBackwardLaucher(const at::Tensor top_grad, const at::Tensor rois,
14 |                             const float spatial_scale, const int sample_num,
15 |                             const int channels, const int height,
16 |                             const int width, const int num_rois,
17 |                             const int pooled_height, const int pooled_width,
18 |                             at::Tensor bottom_grad);
19 | 
20 | #define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ")
21 | #define CHECK_CONTIGUOUS(x) \
22 |   AT_CHECK(x.is_contiguous(), #x, " must be contiguous ")
23 | #define CHECK_INPUT(x) \
24 |   CHECK_CUDA(x);       \
25 |   CHECK_CONTIGUOUS(x)
26 | 
27 | int roi_align_forward_cuda(at::Tensor features, at::Tensor rois,
28 |                            int pooled_height, int pooled_width,
29 |                            float spatial_scale, int sample_num,
30 |                            at::Tensor output) {
31 |   CHECK_INPUT(features);
32 |   CHECK_INPUT(rois);
33 |   CHECK_INPUT(output);
34 | 
35 |   // Number of ROIs
36 |   int num_rois = rois.size(0);
37 |   int size_rois = rois.size(1);
38 | 
39 |   if (size_rois != 5) {
40 |     printf("wrong roi size\n");
41 |     return 0;
42 |   }
43 | 
44 |   int num_channels = features.size(1);
45 |   int data_height = features.size(2);
46 |   int data_width = features.size(3);
47 | 
48 |   ROIAlignForwardLaucher(features, rois, spatial_scale, sample_num,
49 |                          num_channels, data_height, data_width, num_rois,
50 |                          pooled_height, pooled_width, output);
51 | 
52 |   return 1;
53 | }
54 | 
55 | int roi_align_backward_cuda(at::Tensor top_grad, at::Tensor rois,
56 |                             int pooled_height, int pooled_width,
57 |                             float spatial_scale, int sample_num,
58 |                             at::Tensor bottom_grad) {
59 |   CHECK_INPUT(top_grad);
60 |   CHECK_INPUT(rois);
61 |   CHECK_INPUT(bottom_grad);
62 | 
63 |   // Number of ROIs
64 |   int num_rois = rois.size(0);
65 |   int size_rois = rois.size(1);
66 |   if (size_rois != 5) {
67 |     printf("wrong roi size\n");
68 |     return 0;
69 |   }
70 | 
71 |   int num_channels = bottom_grad.size(1);
72 |   int data_height = bottom_grad.size(2);
73 |   int data_width = bottom_grad.size(3);
74 | 
75 |   ROIAlignBackwardLaucher(top_grad, rois, spatial_scale, sample_num,
76 |                           num_channels, data_height, data_width, num_rois,
77 |                           pooled_height, pooled_width, bottom_grad);
78 | 
79 |   return 1;
80 | }
81 | 
82 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
83 |   m.def("forward", &roi_align_forward_cuda, "Roi_Align forward (CUDA)");
84 |   m.def("backward", &roi_align_backward_cuda, "Roi_Align backward (CUDA)");
85 | }
86 | 


--------------------------------------------------------------------------------
/mmdet/ops/roi_pool/__init__.py:
--------------------------------------------------------------------------------
1 | from .roi_pool import RoIPool, roi_pool
2 | 
3 | __all__ = ['roi_pool', 'RoIPool']
4 | 


--------------------------------------------------------------------------------
/mmdet/ops/roi_pool/gradcheck.py:
--------------------------------------------------------------------------------
 1 | import os.path as osp
 2 | import sys
 3 | 
 4 | import torch
 5 | from torch.autograd import gradcheck
 6 | 
 7 | sys.path.append(osp.abspath(osp.join(__file__, '../../')))
 8 | from roi_pool import RoIPool  # noqa: E402, isort:skip
 9 | 
10 | feat = torch.randn(4, 16, 15, 15, requires_grad=True).cuda()
11 | rois = torch.Tensor([[0, 0, 0, 50, 50], [0, 10, 30, 43, 55],
12 |                      [1, 67, 40, 110, 120]]).cuda()
13 | inputs = (feat, rois)
14 | print('Gradcheck for roi pooling...')
15 | test = gradcheck(RoIPool(4, 1.0 / 8), inputs, eps=1e-5, atol=1e-3)
16 | print(test)
17 | 


--------------------------------------------------------------------------------
/mmdet/ops/roi_pool/roi_pool.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | from torch.autograd import Function
 4 | from torch.autograd.function import once_differentiable
 5 | from torch.nn.modules.utils import _pair
 6 | 
 7 | from . import roi_pool_cuda
 8 | 
 9 | 
10 | class RoIPoolFunction(Function):
11 | 
12 |     @staticmethod
13 |     def forward(ctx, features, rois, out_size, spatial_scale):
14 |         assert features.is_cuda
15 |         out_h, out_w = _pair(out_size)
16 |         assert isinstance(out_h, int) and isinstance(out_w, int)
17 |         ctx.save_for_backward(rois)
18 |         num_channels = features.size(1)
19 |         num_rois = rois.size(0)
20 |         out_size = (num_rois, num_channels, out_h, out_w)
21 |         output = features.new_zeros(out_size)
22 |         argmax = features.new_zeros(out_size, dtype=torch.int)
23 |         roi_pool_cuda.forward(features, rois, out_h, out_w, spatial_scale,
24 |                               output, argmax)
25 |         ctx.spatial_scale = spatial_scale
26 |         ctx.feature_size = features.size()
27 |         ctx.argmax = argmax
28 | 
29 |         return output
30 | 
31 |     @staticmethod
32 |     @once_differentiable
33 |     def backward(ctx, grad_output):
34 |         assert grad_output.is_cuda
35 |         spatial_scale = ctx.spatial_scale
36 |         feature_size = ctx.feature_size
37 |         argmax = ctx.argmax
38 |         rois = ctx.saved_tensors[0]
39 |         assert feature_size is not None
40 | 
41 |         grad_input = grad_rois = None
42 |         if ctx.needs_input_grad[0]:
43 |             grad_input = grad_output.new_zeros(feature_size)
44 |             roi_pool_cuda.backward(grad_output.contiguous(), rois, argmax,
45 |                                    spatial_scale, grad_input)
46 | 
47 |         return grad_input, grad_rois, None, None
48 | 
49 | 
50 | roi_pool = RoIPoolFunction.apply
51 | 
52 | 
53 | class RoIPool(nn.Module):
54 | 
55 |     def __init__(self, out_size, spatial_scale, use_torchvision=False):
56 |         super(RoIPool, self).__init__()
57 | 
58 |         self.out_size = _pair(out_size)
59 |         self.spatial_scale = float(spatial_scale)
60 |         self.use_torchvision = use_torchvision
61 | 
62 |     def forward(self, features, rois):
63 |         if self.use_torchvision:
64 |             from torchvision.ops import roi_pool as tv_roi_pool
65 |             return tv_roi_pool(features, rois, self.out_size,
66 |                                self.spatial_scale)
67 |         else:
68 |             return roi_pool(features, rois, self.out_size, self.spatial_scale)
69 | 
70 |     def __repr__(self):
71 |         format_str = self.__class__.__name__
72 |         format_str += '(out_size={}, spatial_scale={}'.format(
73 |             self.out_size, self.spatial_scale)
74 |         format_str += ', use_torchvision={})'.format(self.use_torchvision)
75 |         return format_str
76 | 


--------------------------------------------------------------------------------
/mmdet/ops/roi_pool/src/roi_pool_cuda.cpp:
--------------------------------------------------------------------------------
 1 | #include <torch/extension.h>
 2 | 
 3 | #include <cmath>
 4 | #include <vector>
 5 | 
 6 | int ROIPoolForwardLaucher(const at::Tensor features, const at::Tensor rois,
 7 |                           const float spatial_scale, const int channels,
 8 |                           const int height, const int width, const int num_rois,
 9 |                           const int pooled_h, const int pooled_w,
10 |                           at::Tensor output, at::Tensor argmax);
11 | 
12 | int ROIPoolBackwardLaucher(const at::Tensor top_grad, const at::Tensor rois,
13 |                            const at::Tensor argmax, const float spatial_scale,
14 |                            const int batch_size, const int channels,
15 |                            const int height, const int width,
16 |                            const int num_rois, const int pooled_h,
17 |                            const int pooled_w, at::Tensor bottom_grad);
18 | 
19 | #define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ")
20 | #define CHECK_CONTIGUOUS(x) \
21 |   AT_CHECK(x.is_contiguous(), #x, " must be contiguous ")
22 | #define CHECK_INPUT(x) \
23 |   CHECK_CUDA(x);       \
24 |   CHECK_CONTIGUOUS(x)
25 | 
26 | int roi_pooling_forward_cuda(at::Tensor features, at::Tensor rois,
27 |                              int pooled_height, int pooled_width,
28 |                              float spatial_scale, at::Tensor output,
29 |                              at::Tensor argmax) {
30 |   CHECK_INPUT(features);
31 |   CHECK_INPUT(rois);
32 |   CHECK_INPUT(output);
33 |   CHECK_INPUT(argmax);
34 | 
35 |   // Number of ROIs
36 |   int num_rois = rois.size(0);
37 |   int size_rois = rois.size(1);
38 | 
39 |   if (size_rois != 5) {
40 |     printf("wrong roi size\n");
41 |     return 0;
42 |   }
43 | 
44 |   int channels = features.size(1);
45 |   int height = features.size(2);
46 |   int width = features.size(3);
47 | 
48 |   ROIPoolForwardLaucher(features, rois, spatial_scale, channels, height, width,
49 |                         num_rois, pooled_height, pooled_width, output, argmax);
50 | 
51 |   return 1;
52 | }
53 | 
54 | int roi_pooling_backward_cuda(at::Tensor top_grad, at::Tensor rois,
55 |                               at::Tensor argmax, float spatial_scale,
56 |                               at::Tensor bottom_grad) {
57 |   CHECK_INPUT(top_grad);
58 |   CHECK_INPUT(rois);
59 |   CHECK_INPUT(argmax);
60 |   CHECK_INPUT(bottom_grad);
61 | 
62 |   int pooled_height = top_grad.size(2);
63 |   int pooled_width = top_grad.size(3);
64 |   int num_rois = rois.size(0);
65 |   int size_rois = rois.size(1);
66 | 
67 |   if (size_rois != 5) {
68 |     printf("wrong roi size\n");
69 |     return 0;
70 |   }
71 |   int batch_size = bottom_grad.size(0);
72 |   int channels = bottom_grad.size(1);
73 |   int height = bottom_grad.size(2);
74 |   int width = bottom_grad.size(3);
75 | 
76 |   ROIPoolBackwardLaucher(top_grad, rois, argmax, spatial_scale, batch_size,
77 |                          channels, height, width, num_rois, pooled_height,
78 |                          pooled_width, bottom_grad);
79 | 
80 |   return 1;
81 | }
82 | 
83 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
84 |   m.def("forward", &roi_pooling_forward_cuda, "Roi_Pooling forward (CUDA)");
85 |   m.def("backward", &roi_pooling_backward_cuda, "Roi_Pooling backward (CUDA)");
86 | }
87 | 


--------------------------------------------------------------------------------
/mmdet/ops/sigmoid_focal_loss/__init__.py:
--------------------------------------------------------------------------------
1 | from .sigmoid_focal_loss import SigmoidFocalLoss, sigmoid_focal_loss
2 | 
3 | __all__ = ['SigmoidFocalLoss', 'sigmoid_focal_loss']
4 | 


--------------------------------------------------------------------------------
/mmdet/ops/sigmoid_focal_loss/sigmoid_focal_loss.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | from torch.autograd import Function
 3 | from torch.autograd.function import once_differentiable
 4 | 
 5 | from . import sigmoid_focal_loss_cuda
 6 | 
 7 | 
 8 | class SigmoidFocalLossFunction(Function):
 9 | 
10 |     @staticmethod
11 |     def forward(ctx, input, target, gamma=2.0, alpha=0.25):
12 |         ctx.save_for_backward(input, target)
13 |         num_classes = input.shape[1]
14 |         ctx.num_classes = num_classes
15 |         ctx.gamma = gamma
16 |         ctx.alpha = alpha
17 | 
18 |         loss = sigmoid_focal_loss_cuda.forward(input, target, num_classes,
19 |                                                gamma, alpha)
20 |         return loss
21 | 
22 |     @staticmethod
23 |     @once_differentiable
24 |     def backward(ctx, d_loss):
25 |         input, target = ctx.saved_tensors
26 |         num_classes = ctx.num_classes
27 |         gamma = ctx.gamma
28 |         alpha = ctx.alpha
29 |         d_loss = d_loss.contiguous()
30 |         d_input = sigmoid_focal_loss_cuda.backward(input, target, d_loss,
31 |                                                    num_classes, gamma, alpha)
32 |         return d_input, None, None, None, None
33 | 
34 | 
35 | sigmoid_focal_loss = SigmoidFocalLossFunction.apply
36 | 
37 | 
38 | # TODO: remove this module
39 | class SigmoidFocalLoss(nn.Module):
40 | 
41 |     def __init__(self, gamma, alpha):
42 |         super(SigmoidFocalLoss, self).__init__()
43 |         self.gamma = gamma
44 |         self.alpha = alpha
45 | 
46 |     def forward(self, logits, targets):
47 |         assert logits.is_cuda
48 |         loss = sigmoid_focal_loss(logits, targets, self.gamma, self.alpha)
49 |         return loss.sum()
50 | 
51 |     def __repr__(self):
52 |         tmpstr = self.__class__.__name__ + '(gamma={}, alpha={})'.format(
53 |             self.gamma, self.alpha)
54 |         return tmpstr
55 | 


--------------------------------------------------------------------------------
/mmdet/ops/sigmoid_focal_loss/src/sigmoid_focal_loss.cpp:
--------------------------------------------------------------------------------
 1 | // modify from
 2 | // https://github.com/facebookresearch/maskrcnn-benchmark/blob/master/maskrcnn_benchmark/csrc/SigmoidFocalLoss.h
 3 | #include <torch/extension.h>
 4 | 
 5 | at::Tensor SigmoidFocalLoss_forward_cuda(const at::Tensor &logits,
 6 |                                          const at::Tensor &targets,
 7 |                                          const int num_classes,
 8 |                                          const float gamma, const float alpha);
 9 | 
10 | at::Tensor SigmoidFocalLoss_backward_cuda(const at::Tensor &logits,
11 |                                           const at::Tensor &targets,
12 |                                           const at::Tensor &d_losses,
13 |                                           const int num_classes,
14 |                                           const float gamma, const float alpha);
15 | 
16 | // Interface for Python
17 | at::Tensor SigmoidFocalLoss_forward(const at::Tensor &logits,
18 |                                     const at::Tensor &targets,
19 |                                     const int num_classes, const float gamma,
20 |                                     const float alpha) {
21 |   if (logits.type().is_cuda()) {
22 |     return SigmoidFocalLoss_forward_cuda(logits, targets, num_classes, gamma,
23 |                                          alpha);
24 |   }
25 |   AT_ERROR("SigmoidFocalLoss is not implemented on the CPU");
26 | }
27 | 
28 | at::Tensor SigmoidFocalLoss_backward(const at::Tensor &logits,
29 |                                      const at::Tensor &targets,
30 |                                      const at::Tensor &d_losses,
31 |                                      const int num_classes, const float gamma,
32 |                                      const float alpha) {
33 |   if (logits.type().is_cuda()) {
34 |     return SigmoidFocalLoss_backward_cuda(logits, targets, d_losses,
35 |                                           num_classes, gamma, alpha);
36 |   }
37 |   AT_ERROR("SigmoidFocalLoss is not implemented on the CPU");
38 | }
39 | 
40 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
41 |   m.def("forward", &SigmoidFocalLoss_forward,
42 |         "SigmoidFocalLoss forward (CUDA)");
43 |   m.def("backward", &SigmoidFocalLoss_backward,
44 |         "SigmoidFocalLoss backward (CUDA)");
45 | }
46 | 


--------------------------------------------------------------------------------
/mmdet/ops/utils/__init__.py:
--------------------------------------------------------------------------------
1 | # from . import compiling_info
2 | from .compiling_info import get_compiler_version, get_compiling_cuda_version
3 | 
4 | # get_compiler_version = compiling_info.get_compiler_version
5 | # get_compiling_cuda_version = compiling_info.get_compiling_cuda_version
6 | 
7 | __all__ = ['get_compiler_version', 'get_compiling_cuda_version']
8 | 


--------------------------------------------------------------------------------
/mmdet/ops/utils/src/compiling_info.cpp:
--------------------------------------------------------------------------------
 1 | // modified from
 2 | // https://github.com/facebookresearch/detectron2/blob/master/detectron2/layers/csrc/vision.cpp
 3 | #include <cuda_runtime_api.h>
 4 | #include <torch/extension.h>
 5 | 
 6 | #ifdef WITH_CUDA
 7 | int get_cudart_version() { return CUDART_VERSION; }
 8 | #endif
 9 | 
10 | std::string get_compiling_cuda_version() {
11 | #ifdef WITH_CUDA
12 |   std::ostringstream oss;
13 | 
14 |   // copied from
15 |   // https://github.com/pytorch/pytorch/blob/master/aten/src/ATen/cuda/detail/CUDAHooks.cpp#L231
16 |   auto printCudaStyleVersion = [&](int v) {
17 |     oss << (v / 1000) << "." << (v / 10 % 100);
18 |     if (v % 10 != 0) {
19 |       oss << "." << (v % 10);
20 |     }
21 |   };
22 |   printCudaStyleVersion(get_cudart_version());
23 |   return oss.str();
24 | #else
25 |   return std::string("not available");
26 | #endif
27 | }
28 | 
29 | // similar to
30 | // https://github.com/pytorch/pytorch/blob/master/aten/src/ATen/Version.cpp
31 | std::string get_compiler_version() {
32 |   std::ostringstream ss;
33 | #if defined(__GNUC__)
34 | #ifndef __clang__
35 |   { ss << "GCC " << __GNUC__ << "." << __GNUC_MINOR__; }
36 | #endif
37 | #endif
38 | 
39 | #if defined(__clang_major__)
40 |   {
41 |     ss << "clang " << __clang_major__ << "." << __clang_minor__ << "."
42 |        << __clang_patchlevel__;
43 |   }
44 | #endif
45 | 
46 | #if defined(_MSC_VER)
47 |   { ss << "MSVC " << _MSC_FULL_VER; }
48 | #endif
49 |   return ss.str();
50 | }
51 | 
52 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
53 |   m.def("get_compiler_version", &get_compiler_version, "get_compiler_version");
54 |   m.def("get_compiling_cuda_version", &get_compiling_cuda_version,
55 |         "get_compiling_cuda_version");
56 | }
57 | 


--------------------------------------------------------------------------------
/mmdet/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .flops_counter import get_model_complexity_info
2 | from .registry import Registry, build_from_cfg
3 | 
4 | __all__ = ['Registry', 'build_from_cfg', 'get_model_complexity_info']
5 | 


--------------------------------------------------------------------------------
/mmdet/utils/registry.py:
--------------------------------------------------------------------------------
 1 | import inspect
 2 | 
 3 | import mmcv
 4 | 
 5 | 
 6 | class Registry(object):
 7 | 
 8 |     def __init__(self, name):
 9 |         self._name = name
10 |         self._module_dict = dict()
11 | 
12 |     def __repr__(self):
13 |         format_str = self.__class__.__name__ + '(name={}, items={})'.format(
14 |             self._name, list(self._module_dict.keys()))
15 |         return format_str
16 | 
17 |     @property
18 |     def name(self):
19 |         return self._name
20 | 
21 |     @property
22 |     def module_dict(self):
23 |         return self._module_dict
24 | 
25 |     def get(self, key):
26 |         return self._module_dict.get(key, None)
27 | 
28 |     def _register_module(self, module_class):
29 |         """Register a module.
30 | 
31 |         Args:
32 |             module (:obj:`nn.Module`): Module to be registered.
33 |         """
34 |         if not inspect.isclass(module_class):
35 |             raise TypeError('module must be a class, but got {}'.format(
36 |                 type(module_class)))
37 |         module_name = module_class.__name__
38 |         if module_name in self._module_dict:
39 |             raise KeyError('{} is already registered in {}'.format(
40 |                 module_name, self.name))
41 |         self._module_dict[module_name] = module_class
42 | 
43 |     def register_module(self, cls):
44 |         self._register_module(cls)
45 |         return cls
46 | 
47 | 
48 | def build_from_cfg(cfg, registry, default_args=None):
49 |     """Build a module from config dict.
50 | 
51 |     Args:
52 |         cfg (dict): Config dict. It should at least contain the key "type".
53 |         registry (:obj:`Registry`): The registry to search the type from.
54 |         default_args (dict, optional): Default initialization arguments.
55 | 
56 |     Returns:
57 |         obj: The constructed object.
58 |     """
59 |     assert isinstance(cfg, dict) and 'type' in cfg
60 |     assert isinstance(default_args, dict) or default_args is None
61 |     args = cfg.copy()
62 |     obj_type = args.pop('type')
63 |     if mmcv.is_str(obj_type):
64 |         obj_cls = registry.get(obj_type)
65 |         if obj_cls is None:
66 |             raise KeyError('{} is not in the {} registry'.format(
67 |                 obj_type, registry.name))
68 |     elif inspect.isclass(obj_type):
69 |         obj_cls = obj_type
70 |     else:
71 |         raise TypeError('type must be a str or valid type, but got {}'.format(
72 |             type(obj_type)))
73 |     if default_args is not None:
74 |         for name, value in default_args.items():
75 |             args.setdefault(name, value)
76 |     return obj_cls(**args)
77 | 


--------------------------------------------------------------------------------
/test.sh:
--------------------------------------------------------------------------------
1 | ./tools/dist_test.sh ./configs/faster_rcnn_r101_hnl_c5.py work_dirs/faster_rcnn_r101_hnl_vid/hnmb_c5_rcnn_not_agn_512_aug/epoch_6.pth 4 --out ./work_dirs/faster_rcnn_r101_hnl_vid/hnmb_c5_rcnn_not_agn_512_aug/results_epoch_6.pkl --eval bbox > 2020_01_14_13_14_hnmb_test.log 2>& 1 &
2 | 


--------------------------------------------------------------------------------
/tools/coco_eval.py:
--------------------------------------------------------------------------------
 1 | from argparse import ArgumentParser
 2 | 
 3 | from mmdet.core import coco_eval
 4 | 
 5 | 
 6 | def main():
 7 |     parser = ArgumentParser(description='COCO Evaluation')
 8 |     parser.add_argument('result', help='result file path')
 9 |     parser.add_argument('--ann', help='annotation file path')
10 |     parser.add_argument(
11 |         '--types',
12 |         type=str,
13 |         nargs='+',
14 |         choices=['proposal_fast', 'proposal', 'bbox', 'segm', 'keypoint'],
15 |         default=['bbox'],
16 |         help='result types')
17 |     parser.add_argument(
18 |         '--max-dets',
19 |         type=int,
20 |         nargs='+',
21 |         default=[100, 300, 1000],
22 |         help='proposal numbers, only used for recall evaluation')
23 |     parser.add_argument(
24 |         '--classwise', action='store_true', help='whether eval class wise ap')
25 |     args = parser.parse_args()
26 |     coco_eval(args.result, args.types, args.ann, args.max_dets, args.classwise)
27 | 
28 | 
29 | if __name__ == '__main__':
30 |     main()
31 | 


--------------------------------------------------------------------------------
/tools/collect_env.py:
--------------------------------------------------------------------------------
 1 | import os.path as osp
 2 | import subprocess
 3 | import sys
 4 | from collections import defaultdict
 5 | 
 6 | import cv2
 7 | import mmcv
 8 | import torch
 9 | import torchvision
10 | 
11 | import mmdet
12 | from mmdet.ops import get_compiler_version, get_compiling_cuda_version
13 | 
14 | 
15 | def collect_env():
16 |     env_info = {}
17 |     env_info['sys.platform'] = sys.platform
18 |     env_info['Python'] = sys.version.replace('\n', '')
19 | 
20 |     cuda_available = torch.cuda.is_available()
21 |     env_info['CUDA available'] = cuda_available
22 | 
23 |     if cuda_available:
24 |         from torch.utils.cpp_extension import CUDA_HOME
25 |         env_info['CUDA_HOME'] = CUDA_HOME
26 | 
27 |         if CUDA_HOME is not None and osp.isdir(CUDA_HOME):
28 |             try:
29 |                 nvcc = osp.join(CUDA_HOME, 'bin/nvcc')
30 |                 nvcc = subprocess.check_output(
31 |                     '"{}" -V | tail -n1'.format(nvcc), shell=True)
32 |                 nvcc = nvcc.decode('utf-8').strip()
33 |             except subprocess.SubprocessError:
34 |                 nvcc = 'Not Available'
35 |             env_info['NVCC'] = nvcc
36 | 
37 |         devices = defaultdict(list)
38 |         for k in range(torch.cuda.device_count()):
39 |             devices[torch.cuda.get_device_name(k)].append(str(k))
40 |         for name, devids in devices.items():
41 |             env_info['GPU ' + ','.join(devids)] = name
42 | 
43 |     gcc = subprocess.check_output('gcc --version | head -n1', shell=True)
44 |     gcc = gcc.decode('utf-8').strip()
45 |     env_info['GCC'] = gcc
46 | 
47 |     env_info['PyTorch'] = torch.__version__
48 |     env_info['PyTorch compiling details'] = torch.__config__.show()
49 | 
50 |     env_info['TorchVision'] = torchvision.__version__
51 | 
52 |     env_info['OpenCV'] = cv2.__version__
53 | 
54 |     env_info['MMCV'] = mmcv.__version__
55 |     env_info['MMDetection'] = mmdet.__version__
56 |     env_info['MMDetection Compiler'] = get_compiler_version()
57 |     env_info['MMDetection CUDA Compiler'] = get_compiling_cuda_version()
58 | 
59 |     for name, val in env_info.items():
60 |         print('{}: {}'.format(name, val))
61 | 
62 | 
63 | if __name__ == "__main__":
64 |     collect_env()
65 | 


--------------------------------------------------------------------------------
/tools/detectron2pytorch.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | from collections import OrderedDict
 3 | 
 4 | import mmcv
 5 | import torch
 6 | 
 7 | arch_settings = {50: (3, 4, 6, 3), 101: (3, 4, 23, 3)}
 8 | 
 9 | 
10 | def convert_bn(blobs, state_dict, caffe_name, torch_name, converted_names):
11 |     # detectron replace bn with affine channel layer
12 |     state_dict[torch_name + '.bias'] = torch.from_numpy(blobs[caffe_name +
13 |                                                               '_b'])
14 |     state_dict[torch_name + '.weight'] = torch.from_numpy(blobs[caffe_name +
15 |                                                                 '_s'])
16 |     bn_size = state_dict[torch_name + '.weight'].size()
17 |     state_dict[torch_name + '.running_mean'] = torch.zeros(bn_size)
18 |     state_dict[torch_name + '.running_var'] = torch.ones(bn_size)
19 |     converted_names.add(caffe_name + '_b')
20 |     converted_names.add(caffe_name + '_s')
21 | 
22 | 
23 | def convert_conv_fc(blobs, state_dict, caffe_name, torch_name,
24 |                     converted_names):
25 |     state_dict[torch_name + '.weight'] = torch.from_numpy(blobs[caffe_name +
26 |                                                                 '_w'])
27 |     converted_names.add(caffe_name + '_w')
28 |     if caffe_name + '_b' in blobs:
29 |         state_dict[torch_name + '.bias'] = torch.from_numpy(blobs[caffe_name +
30 |                                                                   '_b'])
31 |         converted_names.add(caffe_name + '_b')
32 | 
33 | 
34 | def convert(src, dst, depth):
35 |     """Convert keys in detectron pretrained ResNet models to pytorch style."""
36 |     # load arch_settings
37 |     if depth not in arch_settings:
38 |         raise ValueError('Only support ResNet-50 and ResNet-101 currently')
39 |     block_nums = arch_settings[depth]
40 |     # load caffe model
41 |     caffe_model = mmcv.load(src, encoding='latin1')
42 |     blobs = caffe_model['blobs'] if 'blobs' in caffe_model else caffe_model
43 |     # convert to pytorch style
44 |     state_dict = OrderedDict()
45 |     converted_names = set()
46 |     convert_conv_fc(blobs, state_dict, 'conv1', 'conv1', converted_names)
47 |     convert_bn(blobs, state_dict, 'res_conv1_bn', 'bn1', converted_names)
48 |     for i in range(1, len(block_nums) + 1):
49 |         for j in range(block_nums[i - 1]):
50 |             if j == 0:
51 |                 convert_conv_fc(blobs, state_dict,
52 |                                 'res{}_{}_branch1'.format(i + 1, j),
53 |                                 'layer{}.{}.downsample.0'.format(i, j),
54 |                                 converted_names)
55 |                 convert_bn(blobs, state_dict,
56 |                            'res{}_{}_branch1_bn'.format(i + 1, j),
57 |                            'layer{}.{}.downsample.1'.format(i, j),
58 |                            converted_names)
59 |             for k, letter in enumerate(['a', 'b', 'c']):
60 |                 convert_conv_fc(blobs, state_dict,
61 |                                 'res{}_{}_branch2{}'.format(i + 1, j, letter),
62 |                                 'layer{}.{}.conv{}'.format(i, j, k + 1),
63 |                                 converted_names)
64 |                 convert_bn(blobs, state_dict,
65 |                            'res{}_{}_branch2{}_bn'.format(i + 1, j, letter),
66 |                            'layer{}.{}.bn{}'.format(i, j,
67 |                                                     k + 1), converted_names)
68 |     # check if all layers are converted
69 |     for key in blobs:
70 |         if key not in converted_names:
71 |             print('Not Convert: {}'.format(key))
72 |     # save checkpoint
73 |     checkpoint = dict()
74 |     checkpoint['state_dict'] = state_dict
75 |     torch.save(checkpoint, dst)
76 | 
77 | 
78 | def main():
79 |     parser = argparse.ArgumentParser(description='Convert model keys')
80 |     parser.add_argument('src', help='src detectron model path')
81 |     parser.add_argument('dst', help='save path')
82 |     parser.add_argument('depth', type=int, help='ResNet model depth')
83 |     args = parser.parse_args()
84 |     convert(args.src, args.dst, args.depth)
85 | 
86 | 
87 | if __name__ == '__main__':
88 |     main()
89 | 


--------------------------------------------------------------------------------
/tools/dist_hnl_test.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | PYTHON=${PYTHON:-"python"}
 4 | 
 5 | CONFIG=$1
 6 | CHECKPOINT=$2
 7 | GPUS=$3
 8 | 
 9 | $PYTHON -m torch.distributed.launch --nproc_per_node=$GPUS \
10 |     $(dirname "$0")/hnl_test.py $CONFIG $CHECKPOINT --launcher pytorch ${@:4}
11 | 


--------------------------------------------------------------------------------
/tools/dist_test.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | PYTHON=${PYTHON:-"python"}
 4 | 
 5 | CONFIG=$1
 6 | CHECKPOINT=$2
 7 | GPUS=$3
 8 | 
 9 | $PYTHON -m torch.distributed.launch --nproc_per_node=$GPUS \
10 |     $(dirname "$0")/test.py $CONFIG $CHECKPOINT --launcher pytorch ${@:4}
11 | 


--------------------------------------------------------------------------------
/tools/dist_train.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | PYTHON=${PYTHON:-"python"}
 4 | 
 5 | CONFIG=$1
 6 | GPUS=$2
 7 | 
 8 | $PYTHON -m torch.distributed.launch --nproc_per_node=$GPUS \
 9 |     $(dirname "$0")/train.py $CONFIG --launcher pytorch ${@:3}
10 | 


--------------------------------------------------------------------------------
/tools/get_flops.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | 
 3 | from mmcv import Config
 4 | 
 5 | from mmdet.models import build_detector
 6 | from mmdet.utils import get_model_complexity_info
 7 | 
 8 | 
 9 | def parse_args():
10 |     parser = argparse.ArgumentParser(description='Train a detector')
11 |     parser.add_argument('config', help='train config file path')
12 |     parser.add_argument(
13 |         '--shape',
14 |         type=int,
15 |         nargs='+',
16 |         default=[1280, 800],
17 |         help='input image size')
18 |     args = parser.parse_args()
19 |     return args
20 | 
21 | 
22 | def main():
23 | 
24 |     args = parse_args()
25 | 
26 |     if len(args.shape) == 1:
27 |         input_shape = (3, args.shape[0], args.shape[0])
28 |     elif len(args.shape) == 2:
29 |         input_shape = (3, ) + tuple(args.shape)
30 |     else:
31 |         raise ValueError('invalid input shape')
32 | 
33 |     cfg = Config.fromfile(args.config)
34 |     model = build_detector(
35 |         cfg.model, train_cfg=cfg.train_cfg, test_cfg=cfg.test_cfg).cuda()
36 |     model.eval()
37 | 
38 |     if hasattr(model, 'forward_dummy'):
39 |         model.forward = model.forward_dummy
40 |     else:
41 |         raise NotImplementedError(
42 |             'FLOPs counter is currently not currently supported with {}'.
43 |             format(model.__class__.__name__))
44 | 
45 |     flops, params = get_model_complexity_info(model, input_shape)
46 |     split_line = '=' * 30
47 |     print('{0}\nInput shape: {1}\nFlops: {2}\nParams: {3}\n{0}'.format(
48 |         split_line, input_shape, flops, params))
49 | 
50 | 
51 | if __name__ == '__main__':
52 |     main()
53 | 


--------------------------------------------------------------------------------
/tools/gpu_device_test.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import sys
 3 | 
 4 | print('__Python VERSION:', sys.version)
 5 | print('__pyTorch VERSION:', torch.__version__)
 6 | print('__CUDA VERSION')
 7 | 
 8 | from subprocess import call
 9 | 
10 | print('__CUDNN VERSION:', torch.backends.cudnn.version())
11 | print('__Number CUDA Devices:', torch.cuda.device_count())
12 | print('__Devices')
13 | call(["nvidia-smi", "--format=csv", "--query-gpu=index,name,driver_version,memory.total,memory.used,memory.free"])
14 | print('Active CUDA Device: GPU', torch.cuda.current_device())
15 | 
16 | print ('Available devices ', torch.cuda.device_count())
17 | print ('Current cuda device ', torch.cuda.current_device())


--------------------------------------------------------------------------------
/tools/plot_PR_curve.py:
--------------------------------------------------------------------------------
  1 | import mmcv
  2 | import numpy as np
  3 | 
  4 | import seaborn as sns
  5 | import matplotlib.pyplot as plt
  6 | 
  7 | from mmdet.core import average_precision
  8 | from mmdet.core import imagenet_vid_classes
  9 | from torch.utils.tensorboard import SummaryWriter
 10 | 
 11 | class_names = imagenet_vid_classes()
 12 | 
 13 | # meta_analysis_res_1 = mmcv.load('/home/mfhan/mmdetection/hnl_epoch4_2018_meta_analysis.pkl')
 14 | # meta_analysis_res_2 = mmcv.load('/home/mfhan/mmdetection/selsa_epoch_12_meta_analysis.pkl')
 15 | meta_analysis_res_1 = mmcv.load('/home/mfhan/mmdetection/hnmb_branch_meta_analysis.pkl')
 16 | meta_analysis_res_2 = mmcv.load('/home/mfhan/mmdetection/hnmb_mining_meta_analysis.pkl')
 17 | 
 18 | eval_results = []
 19 | name = ['hnmb_branch','hnmb_mining_meta']
 20 | 
 21 | writers = []
 22 | for n in name:
 23 |     writer = SummaryWriter(log_dir='/home/mfhan/mmdetection/work_dirs/comparison/{}'.format(n))
 24 |     writers.append(writer)
 25 | 
 26 | for cls_id in range(len(class_names)):
 27 |     # i=14
 28 |     meta=meta_analysis_res_1[cls_id]
 29 |     tp = meta['tp']
 30 |     fp = meta['fp']
 31 |     num_gts = meta['num_gts']
 32 |     det_scores = meta['det_scores']
 33 | 
 34 |     meta2 = meta_analysis_res_2[cls_id]
 35 |     tp2 = meta2['tp']
 36 |     fp2 = meta2['fp']
 37 |     num_gts2 = meta2['num_gts']
 38 |     det_scores2 = meta2['det_scores']
 39 | 
 40 |     x = np.arange(len(det_scores))
 41 |     # h = open("D:/Projects/mmdetection/horse.csv", 'w')
 42 |     for ind, [tp, fp, num_gts, det_scores] in enumerate([[tp, fp, num_gts, det_scores], [tp2, fp2, num_gts2, det_scores2]]):
 43 |         # # calculate recall and precision with tp and fp
 44 |         # tp = np.cumsum(tp, axis=1)
 45 |         # fp = np.cumsum(fp, axis=1)
 46 |         # eps = np.finfo(np.float32).eps
 47 |         # recalls = tp / np.maximum(num_gts[:, np.newaxis], eps)
 48 |         # precisions = tp / np.maximum((tp + fp), eps)
 49 |         # # calculate AP
 50 |         # recalls = recalls[0, :]
 51 |         # precisions = precisions[0, :]
 52 |         # num_gts = num_gts.item()
 53 |         # mode = 'area'
 54 |         # ap = average_precision(recalls, precisions, mode)
 55 | 
 56 |         # no_scale = False
 57 |         # if recalls.ndim == 1:
 58 |         #     no_scale = True
 59 |         #     recalls = recalls[np.newaxis, :]
 60 |         #     precisions = precisions[np.newaxis, :]
 61 |         # assert recalls.shape == precisions.shape and recalls.ndim == 2
 62 |         # num_scales = recalls.shape[0]
 63 |         # ap = np.zeros(num_scales, dtype=np.float32)
 64 |         # if mode == 'area':
 65 |         #     zeros = np.zeros((num_scales, 1), dtype=recalls.dtype)
 66 |         #     ones = np.ones((num_scales, 1), dtype=recalls.dtype)
 67 |         #     mrec = np.hstack((zeros, recalls, ones))
 68 |         #     mpre = np.hstack((zeros, precisions, zeros))
 69 |         #     for i in range(mpre.shape[1] - 1, 0, -1):
 70 |         #         mpre[:, i - 1] = np.maximum(mpre[:, i - 1], mpre[:, i])
 71 |         #     for i in range(num_scales):
 72 |         #         ind = np.where(mrec[i, 1:] != mrec[i, :-1])[0]
 73 |         #         ap[i] = np.sum(
 74 |         #             (mrec[i, ind + 1] - mrec[i, ind]) * mpre[i, ind + 1])
 75 | 
 76 |         # eval_results.append({
 77 |         #     'num_gts': num_gts,
 78 |         #     'recall': recalls,
 79 |         #     'precision': precisions,
 80 |         #     'ap': ap
 81 |         # })
 82 | 
 83 |         # sns.set_color_codes()
 84 |         weight_by_tf = tp[0]*1 + fp[0]*(-1)
 85 |         y = weight_by_tf*det_scores
 86 |         # sns.barplot(x, y, palette="Blues", ax=axes[ind])
 87 |         # # plt.bar(x,y)
 88 |         #
 89 |         # plt.show()
 90 |         # print("")
 91 |         # line = ','.join(list(map(str, y)))
 92 |         # h.writelines(line + '\n')
 93 | 
 94 |         writer = writers[ind]
 95 |         for i in range(15000):
 96 |             writer.add_scalar('{}/15k'.format(class_names[cls_id]), y[i], i)
 97 |         for i in range(len(y)-1):
 98 |             writer.add_scalar('{}/all'.format(class_names[cls_id]), y[i], i)
 99 |         # plt.savefig('./horse.pdf', format='pdf')
100 |     # h.close()
101 | 
102 | for writer in writers:
103 |     writer.close()


--------------------------------------------------------------------------------
/tools/publish_model.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import subprocess
 3 | 
 4 | import torch
 5 | 
 6 | 
 7 | def parse_args():
 8 |     parser = argparse.ArgumentParser(
 9 |         description='Process a checkpoint to be published')
10 |     parser.add_argument('in_file', help='input checkpoint filename')
11 |     parser.add_argument('out_file', help='output checkpoint filename')
12 |     args = parser.parse_args()
13 |     return args
14 | 
15 | 
16 | def process_checkpoint(in_file, out_file):
17 |     checkpoint = torch.load(in_file, map_location='cpu')
18 |     # remove optimizer for smaller file size
19 |     if 'optimizer' in checkpoint:
20 |         del checkpoint['optimizer']
21 |     # if it is necessary to remove some sensitive data in checkpoint['meta'],
22 |     # add the code here.
23 |     torch.save(checkpoint, out_file)
24 |     sha = subprocess.check_output(['sha256sum', out_file]).decode()
25 |     final_file = out_file.rstrip('.pth') + '-{}.pth'.format(sha[:8])
26 |     subprocess.Popen(['mv', out_file, final_file])
27 | 
28 | 
29 | def main():
30 |     args = parse_args()
31 |     process_checkpoint(args.in_file, args.out_file)
32 | 
33 | 
34 | if __name__ == '__main__':
35 |     main()
36 | 


--------------------------------------------------------------------------------
/tools/slurm_test.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -x
 4 | 
 5 | PARTITION=$1
 6 | JOB_NAME=$2
 7 | CONFIG=$3
 8 | CHECKPOINT=$4
 9 | GPUS=${GPUS:-8}
10 | GPUS_PER_NODE=${GPUS_PER_NODE:-8}
11 | CPUS_PER_TASK=${CPUS_PER_TASK:-5}
12 | PY_ARGS=${@:5}
13 | SRUN_ARGS=${SRUN_ARGS:-""}
14 | 
15 | srun -p ${PARTITION} \
16 |     --job-name=${JOB_NAME} \
17 |     --gres=gpu:${GPUS_PER_NODE} \
18 |     --ntasks=${GPUS} \
19 |     --ntasks-per-node=${GPUS_PER_NODE} \
20 |     --cpus-per-task=${CPUS_PER_TASK} \
21 |     --kill-on-bad-exit=1 \
22 |     ${SRUN_ARGS} \
23 |     python -u tools/test.py ${CONFIG} ${CHECKPOINT} --launcher="slurm" ${PY_ARGS}
24 | 


--------------------------------------------------------------------------------
/tools/slurm_train.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -x
 4 | 
 5 | PARTITION=$1
 6 | JOB_NAME=$2
 7 | CONFIG=$3
 8 | WORK_DIR=$4
 9 | GPUS=${5:-8}
10 | GPUS_PER_NODE=${GPUS_PER_NODE:-8}
11 | CPUS_PER_TASK=${CPUS_PER_TASK:-5}
12 | SRUN_ARGS=${SRUN_ARGS:-""}
13 | PY_ARGS=${PY_ARGS:-"--validate"}
14 | 
15 | srun -p ${PARTITION} \
16 |     --job-name=${JOB_NAME} \
17 |     --gres=gpu:${GPUS_PER_NODE} \
18 |     --ntasks=${GPUS} \
19 |     --ntasks-per-node=${GPUS_PER_NODE} \
20 |     --cpus-per-task=${CPUS_PER_TASK} \
21 |     --kill-on-bad-exit=1 \
22 |     ${SRUN_ARGS} \
23 |     python -u tools/train.py ${CONFIG} --work_dir=${WORK_DIR} --launcher="slurm" ${PY_ARGS}
24 | 


--------------------------------------------------------------------------------
/tools/train.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division
  2 | import argparse
  3 | import os
  4 | 
  5 | import torch
  6 | from mmcv import Config
  7 | 
  8 | from mmdet import __version__
  9 | from mmdet.apis import (get_root_logger, init_dist, set_random_seed,
 10 |                         train_detector)
 11 | from mmdet.datasets import build_dataset
 12 | from mmdet.models import build_detector
 13 | 
 14 | 
 15 | def parse_args():
 16 |     parser = argparse.ArgumentParser(description='Train a detector')
 17 |     parser.add_argument('config', help='train config file path')
 18 |     parser.add_argument('--work_dir', help='the dir to save logs and models')
 19 |     parser.add_argument(
 20 |         '--resume_from', help='the checkpoint file to resume from')
 21 |     parser.add_argument(
 22 |         '--validate',
 23 |         action='store_true',
 24 |         help='whether to evaluate the checkpoint during training')
 25 |     parser.add_argument(
 26 |         '--gpus',
 27 |         type=int,
 28 |         default=1,
 29 |         help='number of gpus to use '
 30 |         '(only applicable to non-distributed training)')
 31 |     parser.add_argument('--seed', type=int, default=None, help='random seed')
 32 |     parser.add_argument(
 33 |         '--launcher',
 34 |         choices=['none', 'pytorch', 'slurm', 'mpi'],
 35 |         default='none',
 36 |         help='job launcher')
 37 |     parser.add_argument('--local_rank', type=int, default=0)
 38 |     parser.add_argument(
 39 |         '--autoscale-lr',
 40 |         action='store_true',
 41 |         help='automatically scale lr with the number of gpus')
 42 |     args = parser.parse_args()
 43 |     if 'LOCAL_RANK' not in os.environ:
 44 |         os.environ['LOCAL_RANK'] = str(args.local_rank)
 45 | 
 46 |     return args
 47 | 
 48 | 
 49 | def main():
 50 |     args = parse_args()
 51 | 
 52 |     cfg = Config.fromfile(args.config)
 53 |     # set cudnn_benchmark
 54 |     if cfg.get('cudnn_benchmark', False):
 55 |         torch.backends.cudnn.benchmark = True
 56 |     # update configs according to CLI args
 57 |     if args.work_dir is not None:
 58 |         cfg.work_dir = args.work_dir
 59 |     if args.resume_from is not None:
 60 |         cfg.resume_from = args.resume_from
 61 |     cfg.gpus = args.gpus
 62 | 
 63 |     if args.autoscale_lr:
 64 |         # apply the linear scaling rule (https://arxiv.org/abs/1706.02677)
 65 |         cfg.optimizer['lr'] = cfg.optimizer['lr'] * cfg.gpus / 8
 66 | 
 67 |     # init distributed env first, since logger depends on the dist info.
 68 |     if args.launcher == 'none':
 69 |         distributed = False
 70 |     else:
 71 |         distributed = True
 72 |         init_dist(args.launcher, **cfg.dist_params)
 73 | 
 74 |     # init logger before other steps
 75 |     logger = get_root_logger(cfg.log_level)
 76 |     logger.info('Distributed training: {}'.format(distributed))
 77 |     logger.info('MMDetection Version: {}'.format(__version__))
 78 |     logger.info('Config: {}'.format(cfg.text))
 79 | 
 80 |     # set random seeds
 81 |     if args.seed is not None:
 82 |         logger.info('Set random seed to {}'.format(args.seed))
 83 |         set_random_seed(args.seed)
 84 | 
 85 |     model = build_detector(
 86 |         cfg.model, train_cfg=cfg.train_cfg, test_cfg=cfg.test_cfg)
 87 | 
 88 |     datasets = [build_dataset(cfg.data.train)]
 89 |     if len(cfg.workflow) == 2:
 90 |         datasets.append(build_dataset(cfg.data.val))
 91 |     if cfg.checkpoint_config is not None:
 92 |         # save mmdet version, config file content and class names in
 93 |         # checkpoints as meta data
 94 |         cfg.checkpoint_config.meta = dict(
 95 |             mmdet_version=__version__,
 96 |             config=cfg.text,
 97 |             CLASSES=datasets[0].CLASSES)
 98 |     # add an attribute for visualization convenience
 99 |     model.CLASSES = datasets[0].CLASSES
100 |     with torch.autograd.set_detect_anomaly(True):
101 |         train_detector(
102 |             model,
103 |             datasets,
104 |             cfg,
105 |             distributed=distributed,
106 |             validate=args.validate,
107 |             logger=logger)
108 | 
109 | 
110 | if __name__ == '__main__':
111 |     main()
112 | 


--------------------------------------------------------------------------------
/tools/upgrade_model_version.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import re
 3 | from collections import OrderedDict
 4 | 
 5 | import torch
 6 | 
 7 | 
 8 | def convert(in_file, out_file):
 9 |     """Convert keys in checkpoints.
10 | 
11 |     There can be some breaking changes during the development of mmdetection,
12 |     and this tool is used for upgrading checkpoints trained with old versions
13 |     to the latest one.
14 |     """
15 |     checkpoint = torch.load(in_file)
16 |     in_state_dict = checkpoint.pop('state_dict')
17 |     out_state_dict = OrderedDict()
18 |     for key, val in in_state_dict.items():
19 |         # Use ConvModule instead of nn.Conv2d in RetinaNet
20 |         # cls_convs.0.weight -> cls_convs.0.conv.weight
21 |         m = re.search(r'(cls_convs|reg_convs).\d.(weight|bias)', key)
22 |         if m is not None:
23 |             param = m.groups()[1]
24 |             new_key = key.replace(param, 'conv.{}'.format(param))
25 |             out_state_dict[new_key] = val
26 |             continue
27 | 
28 |         out_state_dict[key] = val
29 |     checkpoint['state_dict'] = out_state_dict
30 |     torch.save(checkpoint, out_file)
31 | 
32 | 
33 | def main():
34 |     parser = argparse.ArgumentParser(description='Upgrade model version')
35 |     parser.add_argument('in_file', help='input checkpoint file')
36 |     parser.add_argument('out_file', help='output checkpoint file')
37 |     args = parser.parse_args()
38 |     convert(args.in_file, args.out_file)
39 | 
40 | 
41 | if __name__ == '__main__':
42 |     main()
43 | 


--------------------------------------------------------------------------------
/tools/voc_eval.py:
--------------------------------------------------------------------------------
 1 | from argparse import ArgumentParser
 2 | 
 3 | import mmcv
 4 | import numpy as np
 5 | 
 6 | from mmdet import datasets
 7 | from mmdet.core import eval_map
 8 | 
 9 | 
10 | def voc_eval(result_file, dataset, iou_thr=0.5):
11 |     det_results = mmcv.load(result_file)
12 |     gt_bboxes = []
13 |     gt_labels = []
14 |     gt_ignore = []
15 |     for i in range(len(dataset)):
16 |         ann = dataset.get_ann_info(i)
17 |         bboxes = ann['bboxes']
18 |         labels = ann['labels']
19 |         if 'bboxes_ignore' in ann:
20 |             ignore = np.concatenate([
21 |                 np.zeros(bboxes.shape[0], dtype=np.bool),
22 |                 np.ones(ann['bboxes_ignore'].shape[0], dtype=np.bool)
23 |             ])
24 |             gt_ignore.append(ignore)
25 |             bboxes = np.vstack([bboxes, ann['bboxes_ignore']])
26 |             labels = np.concatenate([labels, ann['labels_ignore']])
27 |         gt_bboxes.append(bboxes)
28 |         gt_labels.append(labels)
29 |     if not gt_ignore:
30 |         gt_ignore = None
31 |     if hasattr(dataset, 'year') and dataset.year == 2007:
32 |         dataset_name = 'voc07'
33 |     else:
34 |         dataset_name = 'vid'
35 | 
36 |         # dataset_name = ('airplane', 'antelope', 'bear', 'bicycle',
37 |         #                 'bird', 'bus', 'car', 'cattle',
38 |         #                 'dog', 'domestic_cat', 'elephant', 'fox',
39 |         #                 'giant_panda', 'hamster', 'horse', 'lion',
40 |         #                 'lizard', 'monkey', 'motorcycle', 'rabbit',
41 |         #                 'red_panda', 'sheep', 'snake', 'squirrel',
42 |         #                 'tiger', 'train', 'turtle', 'watercraft',
43 |         #                 'whale', 'zebra')
44 |         # dataset_name = dataset.CLASSES
45 |     eval_map(
46 |         det_results,
47 |         gt_bboxes,
48 |         gt_labels,
49 |         gt_ignore=gt_ignore,
50 |         scale_ranges=None,
51 |         iou_thr=iou_thr,
52 |         dataset=dataset_name,
53 |         print_summary=True)
54 | 
55 | 
56 | def main():
57 |     parser = ArgumentParser(description='VOC Evaluation')
58 |     parser.add_argument('result', help='result file path')
59 |     parser.add_argument('config', help='config file path')
60 |     parser.add_argument(
61 |         '--iou-thr',
62 |         type=float,
63 |         default=0.5,
64 |         help='IoU threshold for evaluation')
65 |     args = parser.parse_args()
66 |     cfg = mmcv.Config.fromfile(args.config)
67 |     test_dataset = mmcv.runner.obj_from_dict(cfg.data.test, datasets, dict(test_mode=True, world_size=1))
68 |     voc_eval(args.result, test_dataset, args.iou_thr)
69 | 
70 | 
71 | if __name__ == '__main__':
72 |     main()
73 | 


--------------------------------------------------------------------------------
/train.sh:
--------------------------------------------------------------------------------
1 | ./tools/dist_train.sh ./configs/faster_rcnn_r101_hmp_c5.py 4 >> 2020_01_14_19_40_hmp.log 2>& 1 &
2 | 


--------------------------------------------------------------------------------