├── radet ├── ops │ ├── cluster │ │ ├── __init__.py │ │ ├── cluster_wrapper.py │ │ └── cluster_ext.cpp │ ├── vote │ │ ├── __init__.py │ │ └── vote_wrapper.py │ ├── bbox2distance │ │ └── __init__.py │ └── __init__.py ├── README.md ├── utils │ ├── __init__.py │ ├── collect_env.py │ ├── logger.py │ ├── profiling.py │ ├── util_mixins.py │ └── contextmanagers.py ├── models │ ├── necks │ │ ├── __init__.py │ │ └── channel_mapper.py │ ├── detectors │ │ ├── __init__.py │ │ └── radet.py │ ├── dense_heads │ │ ├── __init__.py │ │ ├── base_dense_head.py │ │ └── dense_test_mixins.py │ ├── utils │ │ ├── builder.py │ │ ├── __init__.py │ │ └── res_layer.py │ ├── __init__.py │ ├── backbones │ │ ├── __init__.py │ │ └── detectors_resnext.py │ ├── losses │ │ ├── __init__.py │ │ ├── accuracy.py │ │ ├── utils.py │ │ └── smooth_l1_loss.py │ └── builder.py ├── core │ ├── visualization │ │ └── __init__.py │ ├── bbox │ │ ├── iou_calculators │ │ │ ├── __init__.py │ │ │ └── builder.py │ │ ├── match_costs │ │ │ ├── __init__.py │ │ │ └── builder.py │ │ ├── assigners │ │ │ ├── base_assigner.py │ │ │ └── __init__.py │ │ ├── coder │ │ │ ├── base_bbox_coder.py │ │ │ ├── __init__.py │ │ │ ├── pseudo_bbox_coder.py │ │ │ └── yolo_bbox_coder.py │ │ ├── builder.py │ │ ├── samplers │ │ │ ├── __init__.py │ │ │ ├── combined_sampler.py │ │ │ ├── pseudo_sampler.py │ │ │ ├── instance_balanced_pos_sampler.py │ │ │ ├── random_sampler.py │ │ │ ├── base_sampler.py │ │ │ └── ohem_sampler.py │ │ ├── __init__.py │ │ └── demodata.py │ ├── utils │ │ ├── __init__.py │ │ ├── misc.py │ │ └── dist_utils.py │ ├── anchor │ │ ├── builder.py │ │ ├── __init__.py │ │ ├── point_generator.py │ │ └── utils.py │ ├── export │ │ ├── __init__.py │ │ └── pytorch2onnx.py │ ├── mask │ │ ├── __init__.py │ │ ├── utils.py │ │ └── mask_target.py │ ├── __init__.py │ ├── post_processing │ │ ├── __init__.py │ │ └── merge_augs.py │ ├── fp16 │ │ ├── __init__.py │ │ └── deprecated_fp16_utils.py │ └── evaluation │ │ ├── __init__.py │ │ ├── bbox_overlaps.py │ │ └── class_names.py ├── datasets │ ├── samplers │ │ ├── __init__.py │ │ ├── distributed_sampler.py │ │ └── group_sampler.py │ ├── deepfashion.py │ ├── ycbv.py │ ├── kitti.py │ ├── __init__.py │ ├── pipelines │ │ ├── compose.py │ │ ├── __init__.py │ │ ├── instaboost.py │ │ └── test_time_aug.py │ ├── wider_face.py │ ├── utils.py │ └── voc.py ├── apis │ └── __init__.py ├── version.py └── __init__.py ├── resources ├── .DS_Store ├── radet.png └── rigidity.png ├── requirements.txt ├── setup.cfg ├── configs ├── base │ ├── default_runtime.py │ └── datasets │ │ ├── bop_detection.py │ │ └── bop_detection_mix.py └── bop │ ├── r50_icbin_pbr.py │ ├── r50_itodd_pbr.py │ ├── r50_tudl_pbr.py │ ├── r50_tless_pbr.py │ ├── r50_hb_pbr.py │ ├── r50_lmo_pbr.py │ ├── r50_ycbv_pbr.py │ ├── r50_tudl_mixpbr.py │ ├── r50_tless_mixpbr.py │ └── r50_ycbv_mixpbr.py ├── tools ├── collect_image_list.py ├── collect_bop_imagelist.py ├── coco_to_bop.py ├── browse_dataset.py ├── eval_metric.py └── show_bop_detbbox.py └── README.md /radet/ops/cluster/__init__.py: -------------------------------------------------------------------------------- 1 | from .cluster_wrapper import cluster_nms -------------------------------------------------------------------------------- /radet/ops/vote/__init__.py: -------------------------------------------------------------------------------- 1 | from .vote_wrapper import vote_nms, global_vote_nms -------------------------------------------------------------------------------- /radet/README.md: -------------------------------------------------------------------------------- 1 | Adapted from [mmdetection](https://github.com/open-mmlab/mmdetection) 2 | -------------------------------------------------------------------------------- /resources/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YangHai-1218/RADet/HEAD/resources/.DS_Store -------------------------------------------------------------------------------- /resources/radet.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YangHai-1218/RADet/HEAD/resources/radet.png -------------------------------------------------------------------------------- /resources/rigidity.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YangHai-1218/RADet/HEAD/resources/rigidity.png -------------------------------------------------------------------------------- /radet/ops/bbox2distance/__init__.py: -------------------------------------------------------------------------------- 1 | from .bbox2distance_wrapper import MBD_box2distance, GDT_box2distance -------------------------------------------------------------------------------- /radet/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .collect_env import collect_env 2 | from .logger import get_root_logger 3 | 4 | __all__ = ['get_root_logger', 'collect_env'] 5 | -------------------------------------------------------------------------------- /radet/models/necks/__init__.py: -------------------------------------------------------------------------------- 1 | from .channel_mapper import ChannelMapper 2 | from .fpn import FPN 3 | 4 | __all__ = [ 5 | 'FPN', 'ChannelMapper', 6 | ] 7 | -------------------------------------------------------------------------------- /radet/core/visualization/__init__.py: -------------------------------------------------------------------------------- 1 | from .image import color_val_matplotlib, imshow_det_bboxes 2 | 3 | __all__ = ['imshow_det_bboxes', 'color_val_matplotlib'] 4 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | cython 2 | numpy 3 | matplotlib 4 | mmpycocotools 5 | numpy 6 | six 7 | terminaltables 8 | mmcv==1.3.18 9 | torch==1.10.0 10 | torchvision==0.11.0 11 | -------------------------------------------------------------------------------- /radet/core/bbox/iou_calculators/__init__.py: -------------------------------------------------------------------------------- 1 | from .builder import build_iou_calculator 2 | from .iou2d_calculator import BboxOverlaps2D, bbox_overlaps 3 | 4 | __all__ = ['build_iou_calculator', 'BboxOverlaps2D', 'bbox_overlaps'] 5 | -------------------------------------------------------------------------------- /radet/models/detectors/__init__.py: -------------------------------------------------------------------------------- 1 | from .base import BaseDetector 2 | from .single_stage import SingleStageDetector 3 | from .radet import RADet 4 | 5 | __all__ = [ 6 | 'BaseDetector', 'SingleStageDetector', 7 | 'RADet', 8 | ] 9 | -------------------------------------------------------------------------------- /radet/datasets/samplers/__init__.py: -------------------------------------------------------------------------------- 1 | from .distributed_sampler import DistributedSampler 2 | from .group_sampler import DistributedGroupSampler, GroupSampler 3 | 4 | __all__ = ['DistributedSampler', 'DistributedGroupSampler', 'GroupSampler'] 5 | -------------------------------------------------------------------------------- /radet/core/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .dist_utils import DistOptimizerHook, allreduce_grads, reduce_mean 2 | from .misc import multi_apply, unmap 3 | 4 | __all__ = [ 5 | 'allreduce_grads', 'DistOptimizerHook', 'reduce_mean', 'multi_apply', 6 | 'unmap' 7 | ] 8 | -------------------------------------------------------------------------------- /radet/core/anchor/builder.py: -------------------------------------------------------------------------------- 1 | from mmcv.utils import Registry, build_from_cfg 2 | 3 | ANCHOR_GENERATORS = Registry('Anchor generator') 4 | 5 | 6 | def build_anchor_generator(cfg, default_args=None): 7 | return build_from_cfg(cfg, ANCHOR_GENERATORS, default_args) 8 | -------------------------------------------------------------------------------- /radet/core/bbox/match_costs/__init__.py: -------------------------------------------------------------------------------- 1 | from .builder import build_match_cost 2 | from .match_cost import BBoxL1Cost, ClassificationCost, FocalLossCost, IoUCost 3 | 4 | __all__ = [ 5 | 'build_match_cost', 'ClassificationCost', 'BBoxL1Cost', 'IoUCost', 6 | 'FocalLossCost' 7 | ] 8 | -------------------------------------------------------------------------------- /radet/models/dense_heads/__init__.py: -------------------------------------------------------------------------------- 1 | from .anchor_free_head import AnchorFreeHead 2 | from .anchor_head import AnchorHead 3 | from .atss_head import ATSSHead 4 | from .radet_head import RADetHead 5 | 6 | __all__ = [ 7 | 'AnchorFreeHead', 'AnchorHead', 'RADetHead', 'ATSSHead' 8 | ] 9 | -------------------------------------------------------------------------------- /radet/core/bbox/match_costs/builder.py: -------------------------------------------------------------------------------- 1 | from mmcv.utils import Registry, build_from_cfg 2 | 3 | MATCH_COST = Registry('Match Cost') 4 | 5 | 6 | def build_match_cost(cfg, default_args=None): 7 | """Builder of IoU calculator.""" 8 | return build_from_cfg(cfg, MATCH_COST, default_args) 9 | -------------------------------------------------------------------------------- /radet/core/bbox/iou_calculators/builder.py: -------------------------------------------------------------------------------- 1 | from mmcv.utils import Registry, build_from_cfg 2 | 3 | IOU_CALCULATORS = Registry('IoU calculator') 4 | 5 | 6 | def build_iou_calculator(cfg, default_args=None): 7 | """Builder of IoU calculator.""" 8 | return build_from_cfg(cfg, IOU_CALCULATORS, default_args) 9 | -------------------------------------------------------------------------------- /radet/core/export/__init__.py: -------------------------------------------------------------------------------- 1 | from .pytorch2onnx import (build_model_from_cfg, 2 | generate_inputs_and_wrap_model, 3 | preprocess_example_input) 4 | 5 | __all__ = [ 6 | 'build_model_from_cfg', 'generate_inputs_and_wrap_model', 7 | 'preprocess_example_input' 8 | ] 9 | -------------------------------------------------------------------------------- /radet/ops/__init__.py: -------------------------------------------------------------------------------- 1 | from .bbox2distance import MBD_box2distance, GDT_box2distance 2 | from .vote import vote_nms, global_vote_nms 3 | from .cluster import cluster_nms 4 | 5 | 6 | __all__ = [ 7 | 'vote_nms', 8 | 'global_vote_nms', 9 | 'MBD_box2distance', 10 | 'GDT_box2distance', 11 | 'cluster_nms' 12 | ] -------------------------------------------------------------------------------- /radet/core/mask/__init__.py: -------------------------------------------------------------------------------- 1 | from .mask_target import mask_target 2 | from .structures import BaseInstanceMasks, BitmapMasks, PolygonMasks 3 | from .utils import encode_mask_results, split_combined_polys 4 | 5 | __all__ = [ 6 | 'split_combined_polys', 'mask_target', 'BaseInstanceMasks', 'BitmapMasks', 7 | 'PolygonMasks', 'encode_mask_results' 8 | ] 9 | -------------------------------------------------------------------------------- /radet/core/__init__.py: -------------------------------------------------------------------------------- 1 | from .anchor import * # noqa: F401, F403 2 | from .bbox import * # noqa: F401, F403 3 | from .evaluation import * # noqa: F401, F403 4 | from .export import * # noqa: F401, F403 5 | from .fp16 import * # noqa: F401, F403 6 | from .mask import * # noqa: F401, F403 7 | from .post_processing import * # noqa: F401, F403 8 | from .utils import * # noqa: F401, F403 -------------------------------------------------------------------------------- /radet/datasets/deepfashion.py: -------------------------------------------------------------------------------- 1 | from .builder import DATASETS 2 | from .coco import CocoDataset 3 | 4 | 5 | @DATASETS.register_module() 6 | class DeepFashionDataset(CocoDataset): 7 | 8 | CLASSES = ('top', 'skirt', 'leggings', 'dress', 'outer', 'pants', 'bag', 9 | 'neckwear', 'headwear', 'eyeglass', 'belt', 'footwear', 'hair', 10 | 'skin', 'face') 11 | -------------------------------------------------------------------------------- /radet/core/post_processing/__init__.py: -------------------------------------------------------------------------------- 1 | from .bbox_nms import fast_nms, multiclass_nms, multiclass_vote 2 | from .merge_augs import (merge_aug_bboxes, merge_aug_masks, 3 | merge_aug_proposals, merge_aug_scores) 4 | 5 | __all__ = [ 6 | 'multiclass_nms', 'merge_aug_proposals', 'merge_aug_bboxes', 7 | 'merge_aug_scores', 'merge_aug_masks', 'fast_nms', 'multiclass_vote' 8 | ] 9 | -------------------------------------------------------------------------------- /radet/core/bbox/assigners/base_assigner.py: -------------------------------------------------------------------------------- 1 | from abc import ABCMeta, abstractmethod 2 | 3 | 4 | class BaseAssigner(metaclass=ABCMeta): 5 | """Base assigner that assigns boxes to ground truth boxes.""" 6 | 7 | @abstractmethod 8 | def assign(self, bboxes, gt_bboxes, gt_bboxes_ignore=None, gt_labels=None): 9 | """Assign boxes to either a ground truth boxe or a negative boxes.""" 10 | pass 11 | -------------------------------------------------------------------------------- /radet/core/fp16/__init__.py: -------------------------------------------------------------------------------- 1 | from .deprecated_fp16_utils import \ 2 | DeprecatedFp16OptimizerHook as Fp16OptimizerHook 3 | from .deprecated_fp16_utils import deprecated_auto_fp16 as auto_fp16 4 | from .deprecated_fp16_utils import deprecated_force_fp32 as force_fp32 5 | from .deprecated_fp16_utils import \ 6 | deprecated_wrap_fp16_model as wrap_fp16_model 7 | 8 | __all__ = ['auto_fp16', 'force_fp32', 'Fp16OptimizerHook', 'wrap_fp16_model'] 9 | -------------------------------------------------------------------------------- /radet/apis/__init__.py: -------------------------------------------------------------------------------- 1 | from .inference import (async_inference_detector, inference_detector, 2 | init_detector, show_result_pyplot) 3 | from .test import multi_gpu_test, single_gpu_test 4 | from .train import get_root_logger, set_random_seed, train_detector 5 | 6 | __all__ = [ 7 | 'get_root_logger', 'set_random_seed', 'train_detector', 'init_detector', 8 | 'async_inference_detector', 'inference_detector', 'show_result_pyplot', 9 | 'multi_gpu_test', 'single_gpu_test' 10 | ] 11 | -------------------------------------------------------------------------------- /radet/utils/collect_env.py: -------------------------------------------------------------------------------- 1 | from mmcv.utils import collect_env as collect_base_env 2 | from mmcv.utils import get_git_hash 3 | 4 | import radet 5 | 6 | 7 | def collect_env(): 8 | """Collect the information of the running environments.""" 9 | env_info = collect_base_env() 10 | env_info['MMDetection'] = radet.__version__ + '+' + get_git_hash()[:7] 11 | return env_info 12 | 13 | 14 | if __name__ == '__main__': 15 | for name, val in collect_env().items(): 16 | print(f'{name}: {val}') 17 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [isort] 2 | line_length = 79 3 | multi_line_output = 0 4 | known_standard_library = setuptools 5 | known_first_party = mmdet 6 | known_third_party = PIL,asynctest,cityscapesscripts,cv2,matplotlib,mmcv,numpy,onnx,onnxruntime,pycocotools,pytest,robustness_eval,seaborn,six,terminaltables,torch 7 | no_lines_before = STDLIB,LOCALFOLDER 8 | default_section = THIRDPARTY 9 | 10 | [yapf] 11 | BASED_ON_STYLE = pep8 12 | BLANK_LINE_BEFORE_NESTED_CLASS_OR_DEF = true 13 | SPLIT_BEFORE_EXPRESSION_AFTER_OPENING_PAREN = true 14 | -------------------------------------------------------------------------------- /radet/models/utils/builder.py: -------------------------------------------------------------------------------- 1 | from mmcv.utils import Registry, build_from_cfg 2 | 3 | TRANSFORMER = Registry('Transformer') 4 | POSITIONAL_ENCODING = Registry('Position encoding') 5 | 6 | 7 | def build_transformer(cfg, default_args=None): 8 | """Builder for Transformer.""" 9 | return build_from_cfg(cfg, TRANSFORMER, default_args) 10 | 11 | 12 | def build_positional_encoding(cfg, default_args=None): 13 | """Builder for Position Encoding.""" 14 | return build_from_cfg(cfg, POSITIONAL_ENCODING, default_args) 15 | -------------------------------------------------------------------------------- /radet/datasets/ycbv.py: -------------------------------------------------------------------------------- 1 | from .builder import DATASETS 2 | from .coco import CocoDataset 3 | 4 | 5 | @DATASETS.register_module() 6 | class YcbvDataset(CocoDataset): 7 | CLASSES = ('master_chef_can', 'cracker_box', 'sugar_box', 'tomato_soup_can', 8 | 'mustard_bottle', 'tuna_fish_can', 'pudding_box', 'gelatin_box', 9 | 'potted_meat_can', 'banana', 'pitcher_base', 'bleach_cleanser', 10 | 'bowl', 'mug', 'power_drill', 'wood_block', 'scissors', 'large_marker', 11 | 'large_clamp', 'extra_large_clamp', 'foam_brick') -------------------------------------------------------------------------------- /radet/utils/logger.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from mmcv.utils import get_logger 4 | 5 | 6 | def get_root_logger(log_file=None, log_level=logging.INFO): 7 | """Get root logger. 8 | 9 | Args: 10 | log_file (str, optional): File path of log. Defaults to None. 11 | log_level (int, optional): The level of logger. 12 | Defaults to logging.INFO. 13 | 14 | Returns: 15 | :obj:`logging.Logger`: The obtained logger 16 | """ 17 | logger = get_logger(name='radet', log_file=log_file, log_level=log_level) 18 | 19 | return logger 20 | -------------------------------------------------------------------------------- /radet/core/anchor/__init__.py: -------------------------------------------------------------------------------- 1 | from .anchor_generator import (AnchorGenerator, LegacyAnchorGenerator, 2 | YOLOAnchorGenerator) 3 | from .builder import ANCHOR_GENERATORS, build_anchor_generator 4 | from .point_generator import PointGenerator 5 | from .utils import anchor_inside_flags, calc_region, images_to_levels 6 | 7 | __all__ = [ 8 | 'AnchorGenerator', 'LegacyAnchorGenerator', 'anchor_inside_flags', 9 | 'PointGenerator', 'images_to_levels', 'calc_region', 10 | 'build_anchor_generator', 'ANCHOR_GENERATORS', 'YOLOAnchorGenerator' 11 | ] 12 | -------------------------------------------------------------------------------- /radet/core/bbox/coder/base_bbox_coder.py: -------------------------------------------------------------------------------- 1 | from abc import ABCMeta, abstractmethod 2 | 3 | 4 | class BaseBBoxCoder(metaclass=ABCMeta): 5 | """Base bounding box coder.""" 6 | 7 | def __init__(self, **kwargs): 8 | pass 9 | 10 | @abstractmethod 11 | def encode(self, bboxes, gt_bboxes): 12 | """Encode deltas between bboxes and ground truth boxes.""" 13 | pass 14 | 15 | @abstractmethod 16 | def decode(self, bboxes, bboxes_pred): 17 | """Decode the predicted bboxes according to prediction and base 18 | boxes.""" 19 | pass 20 | -------------------------------------------------------------------------------- /radet/core/bbox/coder/__init__.py: -------------------------------------------------------------------------------- 1 | from .base_bbox_coder import BaseBBoxCoder 2 | from .bucketing_bbox_coder import BucketingBBoxCoder 3 | from .delta_xywh_bbox_coder import DeltaXYWHBBoxCoder 4 | from .legacy_delta_xywh_bbox_coder import LegacyDeltaXYWHBBoxCoder 5 | from .pseudo_bbox_coder import PseudoBBoxCoder 6 | from .tblr_bbox_coder import TBLRBBoxCoder 7 | from .yolo_bbox_coder import YOLOBBoxCoder 8 | 9 | __all__ = [ 10 | 'BaseBBoxCoder', 'PseudoBBoxCoder', 'DeltaXYWHBBoxCoder', 11 | 'LegacyDeltaXYWHBBoxCoder', 'TBLRBBoxCoder', 'YOLOBBoxCoder', 12 | 'BucketingBBoxCoder' 13 | ] 14 | -------------------------------------------------------------------------------- /radet/version.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Open-MMLab. All rights reserved. 2 | 3 | __version__ = '2.8.0' 4 | short_version = __version__ 5 | 6 | 7 | def parse_version_info(version_str): 8 | version_info = [] 9 | for x in version_str.split('.'): 10 | if x.isdigit(): 11 | version_info.append(int(x)) 12 | elif x.find('rc') != -1: 13 | patch_version = x.split('rc') 14 | version_info.append(int(patch_version[0])) 15 | version_info.append(f'rc{patch_version[1]}') 16 | return tuple(version_info) 17 | 18 | 19 | version_info = parse_version_info(__version__) 20 | -------------------------------------------------------------------------------- /radet/models/__init__.py: -------------------------------------------------------------------------------- 1 | from .backbones import * # noqa: F401,F403 2 | from .builder import (BACKBONES, DETECTORS, HEADS, LOSSES, NECKS, 3 | build_backbone, build_detector, 4 | build_head, build_loss, build_neck) 5 | from .dense_heads import * # noqa: F401,F403 6 | from .detectors import * # noqa: F401,F403 7 | from .losses import * # noqa: F401,F403 8 | from .necks import * # noqa: F401,F403 9 | 10 | __all__ = [ 11 | 'BACKBONES', 'NECKS', 'HEADS', 'LOSSES', 12 | 'DETECTORS', 'build_backbone', 'build_neck', 13 | 'build_head', 'build_loss', 'build_detector' 14 | ] 15 | -------------------------------------------------------------------------------- /radet/core/bbox/coder/pseudo_bbox_coder.py: -------------------------------------------------------------------------------- 1 | from ..builder import BBOX_CODERS 2 | from .base_bbox_coder import BaseBBoxCoder 3 | 4 | 5 | @BBOX_CODERS.register_module() 6 | class PseudoBBoxCoder(BaseBBoxCoder): 7 | """Pseudo bounding box coder.""" 8 | 9 | def __init__(self, **kwargs): 10 | super(BaseBBoxCoder, self).__init__(**kwargs) 11 | 12 | def encode(self, bboxes, gt_bboxes): 13 | """torch.Tensor: return the given ``bboxes``""" 14 | return gt_bboxes 15 | 16 | def decode(self, bboxes, pred_bboxes): 17 | """torch.Tensor: return the given ``pred_bboxes``""" 18 | return pred_bboxes 19 | -------------------------------------------------------------------------------- /radet/core/bbox/builder.py: -------------------------------------------------------------------------------- 1 | from mmcv.utils import Registry, build_from_cfg 2 | 3 | BBOX_ASSIGNERS = Registry('bbox_assigner') 4 | BBOX_SAMPLERS = Registry('bbox_sampler') 5 | BBOX_CODERS = Registry('bbox_coder') 6 | 7 | 8 | def build_assigner(cfg, **default_args): 9 | """Builder of box assigner.""" 10 | return build_from_cfg(cfg, BBOX_ASSIGNERS, default_args) 11 | 12 | 13 | def build_sampler(cfg, **default_args): 14 | """Builder of box sampler.""" 15 | return build_from_cfg(cfg, BBOX_SAMPLERS, default_args) 16 | 17 | 18 | def build_bbox_coder(cfg, **default_args): 19 | """Builder of box coder.""" 20 | return build_from_cfg(cfg, BBOX_CODERS, default_args) 21 | -------------------------------------------------------------------------------- /radet/models/backbones/__init__.py: -------------------------------------------------------------------------------- 1 | from .darknet import Darknet 2 | from .detectors_resnet import DetectoRS_ResNet 3 | from .detectors_resnext import DetectoRS_ResNeXt 4 | from .hourglass import HourglassNet 5 | from .hrnet import HRNet 6 | from .regnet import RegNet 7 | from .res2net import Res2Net 8 | from .resnest import ResNeSt 9 | from .resnet import ResNet, ResNetV1d 10 | from .resnext import ResNeXt 11 | from .ssd_vgg import SSDVGG 12 | from .trident_resnet import TridentResNet 13 | 14 | __all__ = [ 15 | 'RegNet', 'ResNet', 'ResNetV1d', 'ResNeXt', 'SSDVGG', 'HRNet', 'Res2Net', 16 | 'HourglassNet', 'DetectoRS_ResNet', 'DetectoRS_ResNeXt', 'Darknet', 17 | 'ResNeSt', 'TridentResNet' 18 | ] 19 | -------------------------------------------------------------------------------- /radet/core/bbox/samplers/__init__.py: -------------------------------------------------------------------------------- 1 | from .base_sampler import BaseSampler 2 | from .combined_sampler import CombinedSampler 3 | from .instance_balanced_pos_sampler import InstanceBalancedPosSampler 4 | from .iou_balanced_neg_sampler import IoUBalancedNegSampler 5 | from .ohem_sampler import OHEMSampler 6 | from .pseudo_sampler import PseudoSampler 7 | from .random_sampler import RandomSampler 8 | from .sampling_result import SamplingResult 9 | from .score_hlr_sampler import ScoreHLRSampler 10 | 11 | __all__ = [ 12 | 'BaseSampler', 'PseudoSampler', 'RandomSampler', 13 | 'InstanceBalancedPosSampler', 'IoUBalancedNegSampler', 'CombinedSampler', 14 | 'OHEMSampler', 'SamplingResult', 'ScoreHLRSampler' 15 | ] 16 | -------------------------------------------------------------------------------- /radet/core/bbox/assigners/__init__.py: -------------------------------------------------------------------------------- 1 | from .approx_max_iou_assigner import ApproxMaxIoUAssigner 2 | from .assign_result import AssignResult 3 | from .atss_assigner import ATSSAssigner 4 | from .base_assigner import BaseAssigner 5 | from .center_region_assigner import CenterRegionAssigner 6 | from .grid_assigner import GridAssigner 7 | from .hungarian_assigner import HungarianAssigner 8 | from .max_iou_assigner import MaxIoUAssigner 9 | from .point_assigner import PointAssigner 10 | from .region_assigner import RegionAssigner 11 | 12 | __all__ = [ 13 | 'BaseAssigner', 'MaxIoUAssigner', 'ApproxMaxIoUAssigner', 'AssignResult', 14 | 'PointAssigner', 'ATSSAssigner', 'CenterRegionAssigner', 'GridAssigner', 15 | 'HungarianAssigner', 'RegionAssigner' 16 | ] 17 | -------------------------------------------------------------------------------- /radet/core/bbox/samplers/combined_sampler.py: -------------------------------------------------------------------------------- 1 | from ..builder import BBOX_SAMPLERS, build_sampler 2 | from .base_sampler import BaseSampler 3 | 4 | 5 | @BBOX_SAMPLERS.register_module() 6 | class CombinedSampler(BaseSampler): 7 | """A sampler that combines positive sampler and negative sampler.""" 8 | 9 | def __init__(self, pos_sampler, neg_sampler, **kwargs): 10 | super(CombinedSampler, self).__init__(**kwargs) 11 | self.pos_sampler = build_sampler(pos_sampler, **kwargs) 12 | self.neg_sampler = build_sampler(neg_sampler, **kwargs) 13 | 14 | def _sample_pos(self, **kwargs): 15 | """Sample positive samples.""" 16 | raise NotImplementedError 17 | 18 | def _sample_neg(self, **kwargs): 19 | """Sample negative samples.""" 20 | raise NotImplementedError 21 | -------------------------------------------------------------------------------- /radet/ops/cluster/cluster_wrapper.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from . import cluster_ext 3 | import numpy as np 4 | 5 | 6 | def cluster_nms(bboxes, scores, categories, iou_threshold=0.65): 7 | if isinstance(bboxes, np.ndarray): 8 | bboxes = torch.from_numpy(bboxes) 9 | else: 10 | assert isinstance(bboxes, torch.Tensor) 11 | if isinstance(scores, np.ndarray): 12 | scores = torch.from_numpy(scores) 13 | else: 14 | assert isinstance(scores, torch.Tensor) 15 | 16 | if isinstance(categories, np.ndarray): 17 | categories = torch.from_numpy(categories) 18 | else: 19 | assert isinstance(categories, torch.Tensor) 20 | 21 | instance_ids, clusters_num = cluster_ext.cluster_nms(bboxes, scores, categories, iou_threshold) 22 | return instance_ids, clusters_num 23 | -------------------------------------------------------------------------------- /configs/base/default_runtime.py: -------------------------------------------------------------------------------- 1 | optimizer = dict( 2 | type='AdamW', 3 | lr=0.0004, 4 | betas=(0.9, 0.999), 5 | weight_decay=0.05, 6 | eps=1e-08, 7 | amsgrad=False, 8 | ) 9 | lr_config = dict( 10 | policy='OneCycle', 11 | max_lr=0.0004, 12 | total_steps=100100, 13 | pct_start=0.05, 14 | anneal_strategy='linear') 15 | 16 | runner = dict(type='IterBasedRunner', max_iters=100000) 17 | checkpoint_config = dict(interval=10000) 18 | evaluation = dict(interval=10000, metric='bbox') 19 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) 20 | 21 | # yapf:disable 22 | log_config = dict( 23 | interval=50, 24 | hooks=[ 25 | dict(type='TextLoggerHook'), 26 | ]) 27 | # yapf:enable 28 | dist_params = dict(backend='nccl') 29 | log_level = 'INFO' 30 | workflow = [('train', 1)] 31 | -------------------------------------------------------------------------------- /radet/core/evaluation/__init__.py: -------------------------------------------------------------------------------- 1 | from .class_names import (cityscapes_classes, coco_classes, dataset_aliases, 2 | get_classes, imagenet_det_classes, 3 | imagenet_vid_classes, voc_classes) 4 | from .eval_hooks import DistEvalHook, EvalHook 5 | from .mean_ap import average_precision, eval_map, print_map_summary 6 | from .recall import (eval_recalls, plot_iou_recall, plot_num_recall, 7 | print_recall_summary) 8 | 9 | __all__ = [ 10 | 'voc_classes', 'imagenet_det_classes', 'imagenet_vid_classes', 11 | 'coco_classes', 'cityscapes_classes', 'dataset_aliases', 'get_classes', 12 | 'DistEvalHook', 'EvalHook', 'average_precision', 'eval_map', 13 | 'print_map_summary', 'eval_recalls', 'print_recall_summary', 14 | 'plot_num_recall', 'plot_iou_recall' 15 | ] 16 | -------------------------------------------------------------------------------- /tools/collect_image_list.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from glob import glob 3 | from os import path as osp 4 | 5 | def parse_args(): 6 | parser = argparse.ArgumentParser() 7 | parser.add_argument('--source-dir', default='data/ycbv/train_real' ,type=str) 8 | parser.add_argument('--save-path', default='data/ycbv/train_real/train_real.txt', type=str) 9 | parser.add_argument('--pattern', default='*/rgb/*.png', type=str) 10 | args = parser.parse_args() 11 | return args 12 | 13 | 14 | 15 | 16 | if __name__ =='__main__': 17 | args = parse_args() 18 | image_list = glob(osp.join(args.source_dir, args.pattern)) 19 | image_list = sorted(image_list) 20 | image_list = [i.replace(args.source_dir+'/', '')+'\n' for i in image_list] 21 | print(f"Total {len(image_list)} images found") 22 | with open(args.save_path, 'w') as f: 23 | f.writelines(image_list) -------------------------------------------------------------------------------- /radet/models/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .builder import build_positional_encoding, build_transformer 2 | from .gaussian_target import gaussian_radius, gen_gaussian_target 3 | from .positional_encoding import (LearnedPositionalEncoding, 4 | SinePositionalEncoding) 5 | from .res_layer import ResLayer 6 | from .transformer import (FFN, MultiheadAttention, Transformer, 7 | TransformerDecoder, TransformerDecoderLayer, 8 | TransformerEncoder, TransformerEncoderLayer) 9 | 10 | __all__ = [ 11 | 'ResLayer', 'gaussian_radius', 'gen_gaussian_target', 'MultiheadAttention', 12 | 'FFN', 'TransformerEncoderLayer', 'TransformerEncoder', 13 | 'TransformerDecoderLayer', 'TransformerDecoder', 'Transformer', 14 | 'build_transformer', 'build_positional_encoding', 'SinePositionalEncoding', 15 | 'LearnedPositionalEncoding' 16 | ] 17 | -------------------------------------------------------------------------------- /radet/models/losses/__init__.py: -------------------------------------------------------------------------------- 1 | from .accuracy import Accuracy, accuracy 2 | from .cross_entropy_loss import (CrossEntropyLoss, binary_cross_entropy, 3 | cross_entropy, mask_cross_entropy) 4 | from .focal_loss import FocalLoss, sigmoid_focal_loss 5 | from .iou_loss import (BoundedIoULoss, CIoULoss, DIoULoss, GIoULoss, IoULoss, 6 | bounded_iou_loss, iou_loss) 7 | from .smooth_l1_loss import L1Loss, SmoothL1Loss, l1_loss, smooth_l1_loss 8 | from .utils import reduce_loss, weight_reduce_loss, weighted_loss 9 | 10 | __all__ = [ 11 | 'accuracy', 'Accuracy', 'cross_entropy', 'binary_cross_entropy', 12 | 'mask_cross_entropy', 'CrossEntropyLoss', 'sigmoid_focal_loss', 13 | 'FocalLoss', 'smooth_l1_loss', 'SmoothL1Loss', 14 | 'iou_loss', 'bounded_iou_loss', 'l1_loss', 15 | 'IoULoss', 'BoundedIoULoss', 'GIoULoss', 'DIoULoss', 'CIoULoss', 16 | 'reduce_loss', 'weight_reduce_loss', 'weighted_loss', 'L1Loss', 17 | ] 18 | -------------------------------------------------------------------------------- /radet/__init__.py: -------------------------------------------------------------------------------- 1 | import mmcv 2 | 3 | from .version import __version__, short_version 4 | 5 | 6 | 7 | def digit_version(version_str): 8 | digit_version = [] 9 | for x in version_str.split('.'): 10 | if x.isdigit(): 11 | digit_version.append(int(x)) 12 | elif x.find('rc') != -1: 13 | patch_version = x.split('rc') 14 | digit_version.append(int(patch_version[0]) - 1) 15 | digit_version.append(int(patch_version[1])) 16 | return digit_version 17 | 18 | 19 | mmcv_minimum_version = '1.2.4' 20 | mmcv_maximum_version = '1.3.20' 21 | mmcv_version = digit_version(mmcv.__version__) 22 | 23 | 24 | assert (mmcv_version >= digit_version(mmcv_minimum_version) 25 | and mmcv_version <= digit_version(mmcv_maximum_version)), \ 26 | f'MMCV=={mmcv.__version__} is used but incompatible. ' \ 27 | f'Please install mmcv>={mmcv_minimum_version}, <={mmcv_maximum_version}.' 28 | 29 | __all__ = ['__version__', 'short_version'] 30 | -------------------------------------------------------------------------------- /tools/collect_bop_imagelist.py: -------------------------------------------------------------------------------- 1 | import json 2 | import argparse 3 | 4 | 5 | def parse_args(): 6 | parser = argparse.ArgumentParser() 7 | parser.add_argument('bop_test_json', type=str) 8 | parser.add_argument('save_path', type=str) 9 | parser.add_argument('--ext', default='png', type=str) 10 | args = parser.parse_args() 11 | return args 12 | 13 | 14 | 15 | if __name__ == '__main__': 16 | args = parse_args() 17 | bop_test_json, save_path, ext = args.bop_test_json, args.save_path, args.ext 18 | with open(bop_test_json, 'r') as f: 19 | bop_test = json.load(f) 20 | image_paths = [] 21 | for obj in bop_test: 22 | im_id, scene_id = obj['im_id'], obj['scene_id'] 23 | image_path = f"{int(scene_id):06d}/rgb/{int(im_id):06d}.{ext}" 24 | if image_path in image_paths: 25 | continue 26 | else: 27 | image_paths.append(image_path) 28 | print(f"total {len(image_paths)} founded") 29 | with open(save_path, 'w') as f: 30 | f.writelines([p+'\n' for p in image_paths]) 31 | 32 | -------------------------------------------------------------------------------- /radet/datasets/kitti.py: -------------------------------------------------------------------------------- 1 | from .builder import DATASETS 2 | from .coco import CocoDataset 3 | 4 | @DATASETS.register_module() 5 | class KittiDataset(CocoDataset): 6 | CLASSES = ('Car', 'Van', 'Truck', 'Pedestrian', 'Person_sitting', 7 | 'Cyclist', 'Tram', 'Misc') 8 | 9 | def evaluate(self, 10 | results, 11 | metric='bbox', 12 | logger=None, 13 | jsonfile_prefix=None, 14 | classwise=False, 15 | proposal_nums=(100, 300, 1000), 16 | iou_thrs=None, 17 | metric_items=None): 18 | return super(KittiDataset, self).evaluate(results=results, 19 | metric=metric, 20 | logger=logger, 21 | jsonfile_prefix=jsonfile_prefix, 22 | classwise=True, 23 | proposal_nums=proposal_nums, 24 | iou_thrs=iou_thrs, 25 | metric_items=metric_items) 26 | 27 | -------------------------------------------------------------------------------- /radet/datasets/samplers/distributed_sampler.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | import torch 4 | from torch.utils.data import DistributedSampler as _DistributedSampler 5 | 6 | 7 | class DistributedSampler(_DistributedSampler): 8 | 9 | def __init__(self, dataset, num_replicas=None, rank=None, shuffle=True): 10 | super().__init__(dataset, num_replicas=num_replicas, rank=rank) 11 | self.shuffle = shuffle 12 | 13 | def __iter__(self): 14 | # deterministically shuffle based on epoch 15 | if self.shuffle: 16 | g = torch.Generator() 17 | g.manual_seed(self.epoch) 18 | indices = torch.randperm(len(self.dataset), generator=g).tolist() 19 | else: 20 | indices = torch.arange(len(self.dataset)).tolist() 21 | 22 | # add extra samples to make it evenly divisible 23 | # in case that indices is shorter than half of total_size 24 | indices = (indices * 25 | math.ceil(self.total_size / len(indices)))[:self.total_size] 26 | assert len(indices) == self.total_size 27 | 28 | # subsample 29 | indices = indices[self.rank:self.total_size:self.num_replicas] 30 | assert len(indices) == self.num_samples 31 | 32 | return iter(indices) 33 | -------------------------------------------------------------------------------- /radet/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | from .builder import DATASETS, PIPELINES, build_dataloader, build_dataset 2 | from .cityscapes import CityscapesDataset 3 | from .coco import CocoDataset 4 | from .custom import CustomDataset 5 | from .dataset_wrappers import (ClassBalancedDataset, ConcatDataset, MixDataset, 6 | RepeatDataset) 7 | from .deepfashion import DeepFashionDataset 8 | from .lvis import LVISDataset, LVISV1Dataset, LVISV05Dataset 9 | from .samplers import DistributedGroupSampler, DistributedSampler, GroupSampler 10 | from .utils import replace_ImageToTensor 11 | from .voc import VOCDataset 12 | from .wider_face import WIDERFaceDataset 13 | from .xml_style import XMLDataset 14 | from .ycbv import YcbvDataset 15 | from .kitti import KittiDataset 16 | from .bop import BOPDataset 17 | 18 | __all__ = [ 19 | 'CustomDataset', 'XMLDataset', 'CocoDataset', 'DeepFashionDataset', 20 | 'VOCDataset', 'CityscapesDataset', 'LVISDataset', 'LVISV05Dataset', 21 | 'LVISV1Dataset', 'GroupSampler', 'DistributedGroupSampler', 22 | 'DistributedSampler', 'build_dataloader', 'ConcatDataset', 'RepeatDataset', 23 | 'ClassBalancedDataset', 'WIDERFaceDataset', 'DATASETS', 'PIPELINES', 24 | 'build_dataset', 'replace_ImageToTensor', 'YcbvDataset', 'KittiDataset' 25 | ] -------------------------------------------------------------------------------- /radet/models/detectors/radet.py: -------------------------------------------------------------------------------- 1 | from ..builder import DETECTORS 2 | from .single_stage import SingleStageDetector 3 | 4 | 5 | @DETECTORS.register_module() 6 | class RADet(SingleStageDetector): 7 | """Implementation of `ATSS `_.""" 8 | 9 | def __init__(self, 10 | backbone, 11 | neck, 12 | bbox_head, 13 | train_cfg=None, 14 | test_cfg=None, 15 | pretrained=None): 16 | super(RADet, self).__init__(backbone, neck, bbox_head, train_cfg, 17 | test_cfg, pretrained) 18 | 19 | def forward_train(self, 20 | img, 21 | img_metas, 22 | gt_bboxes, 23 | gt_labels, 24 | points_to_gt_index, 25 | points_weight, 26 | gt_bboxes_ignore=None): 27 | super(SingleStageDetector, self).forward_train(img, img_metas) 28 | x = self.extract_feat(img) 29 | losses = self.bbox_head.forward_train(x, img_metas, gt_bboxes, 30 | gt_labels, points_to_gt_index, points_weight, 31 | gt_bboxes_ignore) 32 | return losses 33 | -------------------------------------------------------------------------------- /radet/core/utils/misc.py: -------------------------------------------------------------------------------- 1 | from functools import partial 2 | 3 | import torch 4 | from six.moves import map, zip 5 | 6 | 7 | def multi_apply(func, *args, **kwargs): 8 | """Apply function to a list of arguments. 9 | 10 | Note: 11 | This function applies the ``func`` to multiple inputs and 12 | map the multiple outputs of the ``func`` into different 13 | list. Each list contains the same type of outputs corresponding 14 | to different inputs. 15 | 16 | Args: 17 | func (Function): A function that will be applied to a list of 18 | arguments 19 | 20 | Returns: 21 | tuple(list): A tuple containing multiple list, each list contains \ 22 | a kind of returned results by the function 23 | """ 24 | pfunc = partial(func, **kwargs) if kwargs else func 25 | map_results = map(pfunc, *args) 26 | return tuple(map(list, zip(*map_results))) 27 | 28 | 29 | def unmap(data, count, inds, fill=0): 30 | """Unmap a subset of item (data) back to the original set of items (of size 31 | count)""" 32 | if data.dim() == 1: 33 | ret = data.new_full((count, ), fill) 34 | ret[inds.type(torch.bool)] = data 35 | else: 36 | new_size = (count, ) + data.size()[1:] 37 | ret = data.new_full(new_size, fill) 38 | ret[inds.type(torch.bool), :] = data 39 | return ret 40 | -------------------------------------------------------------------------------- /radet/utils/profiling.py: -------------------------------------------------------------------------------- 1 | import contextlib 2 | import sys 3 | import time 4 | 5 | import torch 6 | 7 | if sys.version_info >= (3, 7): 8 | 9 | @contextlib.contextmanager 10 | def profile_time(trace_name, 11 | name, 12 | enabled=True, 13 | stream=None, 14 | end_stream=None): 15 | """Print time spent by CPU and GPU. 16 | 17 | Useful as a temporary context manager to find sweet spots of code 18 | suitable for async implementation. 19 | """ 20 | if (not enabled) or not torch.cuda.is_available(): 21 | yield 22 | return 23 | stream = stream if stream else torch.cuda.current_stream() 24 | end_stream = end_stream if end_stream else stream 25 | start = torch.cuda.Event(enable_timing=True) 26 | end = torch.cuda.Event(enable_timing=True) 27 | stream.record_event(start) 28 | try: 29 | cpu_start = time.monotonic() 30 | yield 31 | finally: 32 | cpu_end = time.monotonic() 33 | end_stream.record_event(end) 34 | end.synchronize() 35 | cpu_time = (cpu_end - cpu_start) * 1000 36 | gpu_time = start.elapsed_time(end) 37 | msg = f'{trace_name} {name} cpu_time {cpu_time:.2f} ms ' 38 | msg += f'gpu_time {gpu_time:.2f} ms stream {stream}' 39 | print(msg, end_stream) 40 | -------------------------------------------------------------------------------- /radet/core/anchor/point_generator.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from .builder import ANCHOR_GENERATORS 4 | 5 | 6 | @ANCHOR_GENERATORS.register_module() 7 | class PointGenerator(object): 8 | 9 | def _meshgrid(self, x, y, row_major=True): 10 | xx = x.repeat(len(y)) 11 | yy = y.view(-1, 1).repeat(1, len(x)).view(-1) 12 | if row_major: 13 | return xx, yy 14 | else: 15 | return yy, xx 16 | 17 | def grid_points(self, featmap_size, stride=16, device='cuda'): 18 | feat_h, feat_w = featmap_size 19 | shift_x = torch.arange(0., feat_w, device=device) * stride 20 | shift_y = torch.arange(0., feat_h, device=device) * stride 21 | shift_xx, shift_yy = self._meshgrid(shift_x, shift_y) 22 | stride = shift_x.new_full((shift_xx.shape[0], ), stride) 23 | shifts = torch.stack([shift_xx, shift_yy, stride], dim=-1) 24 | all_points = shifts.to(device) 25 | return all_points 26 | 27 | def valid_flags(self, featmap_size, valid_size, device='cuda'): 28 | feat_h, feat_w = featmap_size 29 | valid_h, valid_w = valid_size 30 | assert valid_h <= feat_h and valid_w <= feat_w 31 | valid_x = torch.zeros(feat_w, dtype=torch.bool, device=device) 32 | valid_y = torch.zeros(feat_h, dtype=torch.bool, device=device) 33 | valid_x[:valid_w] = 1 34 | valid_y[:valid_h] = 1 35 | valid_xx, valid_yy = self._meshgrid(valid_x, valid_y) 36 | valid = valid_xx & valid_yy 37 | return valid 38 | -------------------------------------------------------------------------------- /tools/coco_to_bop.py: -------------------------------------------------------------------------------- 1 | import json 2 | import argparse 3 | import os 4 | from os import path as osp 5 | import mmcv 6 | 7 | 8 | def parse_args(): 9 | parser = argparse.ArgumentParser(description='Convert coco format to bop format') 10 | parser.add_argument('json_path', type=str) 11 | parser.add_argument('save_dir', type=str) 12 | args = parser.parse_args() 13 | return args 14 | 15 | if __name__ == '__main__': 16 | args = parse_args() 17 | json_path, save_dir = args.json_path, args.save_dir 18 | with open(json_path, 'r') as f: 19 | json_results = json.load(f) 20 | convert_results = dict() 21 | for result in json_results: 22 | scene_id, image_id = result['scene_id'], result['image_id'] 23 | category_id = result['category_id'] 24 | bbox = result['bbox'] 25 | score = result['score'] 26 | if scene_id not in convert_results: 27 | convert_results[scene_id] = dict() 28 | if str(image_id) not in convert_results[scene_id]: 29 | convert_results[scene_id][str(image_id)] = [] 30 | convert_results[scene_id][str(image_id)].append( 31 | dict( 32 | bbox_obj=bbox, 33 | obj_id=category_id, 34 | score=score, 35 | ) 36 | ) 37 | 38 | for scene_id in convert_results: 39 | save_path = osp.join(save_dir, f"{scene_id:06d}", "scene_gt_info.json") 40 | os.makedirs(osp.dirname(save_path), exist_ok=True) 41 | mmcv.dump(convert_results[scene_id], save_path) 42 | -------------------------------------------------------------------------------- /radet/core/bbox/samplers/pseudo_sampler.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from ..builder import BBOX_SAMPLERS 4 | from .base_sampler import BaseSampler 5 | from .sampling_result import SamplingResult 6 | 7 | 8 | @BBOX_SAMPLERS.register_module() 9 | class PseudoSampler(BaseSampler): 10 | """A pseudo sampler that does not do sampling actually.""" 11 | 12 | def __init__(self, **kwargs): 13 | pass 14 | 15 | def _sample_pos(self, **kwargs): 16 | """Sample positive samples.""" 17 | raise NotImplementedError 18 | 19 | def _sample_neg(self, **kwargs): 20 | """Sample negative samples.""" 21 | raise NotImplementedError 22 | 23 | def sample(self, assign_result, bboxes, gt_bboxes, **kwargs): 24 | """Directly returns the positive and negative indices of samples. 25 | 26 | Args: 27 | assign_result (:obj:`AssignResult`): Assigned results 28 | bboxes (torch.Tensor): Bounding boxes 29 | gt_bboxes (torch.Tensor): Ground truth boxes 30 | 31 | Returns: 32 | :obj:`SamplingResult`: sampler results 33 | """ 34 | pos_inds = torch.nonzero( 35 | assign_result.gt_inds > 0, as_tuple=False).squeeze(-1).unique() 36 | neg_inds = torch.nonzero( 37 | assign_result.gt_inds == 0, as_tuple=False).squeeze(-1).unique() 38 | gt_flags = bboxes.new_zeros(bboxes.shape[0], dtype=torch.uint8) 39 | sampling_result = SamplingResult(pos_inds, neg_inds, bboxes, gt_bboxes, 40 | assign_result, gt_flags) 41 | return sampling_result 42 | -------------------------------------------------------------------------------- /radet/models/builder.py: -------------------------------------------------------------------------------- 1 | from mmcv.utils import Registry, build_from_cfg 2 | from torch import nn 3 | 4 | BACKBONES = Registry('backbone') 5 | NECKS = Registry('neck') 6 | HEADS = Registry('head') 7 | LOSSES = Registry('loss') 8 | DETECTORS = Registry('detector') 9 | 10 | 11 | def build(cfg, registry, default_args=None): 12 | """Build a module. 13 | 14 | Args: 15 | cfg (dict, list[dict]): The config of modules, is is either a dict 16 | or a list of configs. 17 | registry (:obj:`Registry`): A registry the module belongs to. 18 | default_args (dict, optional): Default arguments to build the module. 19 | Defaults to None. 20 | 21 | Returns: 22 | nn.Module: A built nn module. 23 | """ 24 | if isinstance(cfg, list): 25 | modules = [ 26 | build_from_cfg(cfg_, registry, default_args) for cfg_ in cfg 27 | ] 28 | return nn.Sequential(*modules) 29 | else: 30 | return build_from_cfg(cfg, registry, default_args) 31 | 32 | 33 | def build_backbone(cfg): 34 | """Build backbone.""" 35 | return build(cfg, BACKBONES) 36 | 37 | 38 | def build_neck(cfg): 39 | """Build neck.""" 40 | return build(cfg, NECKS) 41 | 42 | 43 | 44 | 45 | def build_head(cfg): 46 | """Build head.""" 47 | return build(cfg, HEADS) 48 | 49 | 50 | def build_loss(cfg): 51 | """Build loss.""" 52 | return build(cfg, LOSSES) 53 | 54 | 55 | def build_detector(cfg, train_cfg=None, test_cfg=None): 56 | """Build detector.""" 57 | return build(cfg, DETECTORS, dict(train_cfg=train_cfg, test_cfg=test_cfg)) 58 | -------------------------------------------------------------------------------- /radet/core/bbox/__init__.py: -------------------------------------------------------------------------------- 1 | from .assigners import (AssignResult, BaseAssigner, CenterRegionAssigner, 2 | MaxIoUAssigner, RegionAssigner) 3 | from .builder import build_assigner, build_bbox_coder, build_sampler 4 | from .coder import (BaseBBoxCoder, DeltaXYWHBBoxCoder, PseudoBBoxCoder, 5 | TBLRBBoxCoder) 6 | from .iou_calculators import BboxOverlaps2D, bbox_overlaps 7 | from .samplers import (BaseSampler, CombinedSampler, 8 | InstanceBalancedPosSampler, IoUBalancedNegSampler, 9 | OHEMSampler, PseudoSampler, RandomSampler, 10 | SamplingResult, ScoreHLRSampler) 11 | from .transforms import (bbox2distance, bbox2result, bbox2roi, 12 | bbox_cxcywh_to_xyxy, bbox_flip, bbox_mapping, 13 | bbox_mapping_back, bbox_rescale, bbox_xyxy_to_cxcywh, 14 | distance2bbox, roi2bbox) 15 | 16 | __all__ = [ 17 | 'bbox_overlaps', 'BboxOverlaps2D', 'BaseAssigner', 'MaxIoUAssigner', 18 | 'AssignResult', 'BaseSampler', 'PseudoSampler', 'RandomSampler', 19 | 'InstanceBalancedPosSampler', 'IoUBalancedNegSampler', 'CombinedSampler', 20 | 'OHEMSampler', 'SamplingResult', 'ScoreHLRSampler', 'build_assigner', 21 | 'build_sampler', 'bbox_flip', 'bbox_mapping', 'bbox_mapping_back', 22 | 'bbox2roi', 'roi2bbox', 'bbox2result', 'distance2bbox', 'bbox2distance', 23 | 'build_bbox_coder', 'BaseBBoxCoder', 'PseudoBBoxCoder', 24 | 'DeltaXYWHBBoxCoder', 'TBLRBBoxCoder', 'CenterRegionAssigner', 25 | 'bbox_rescale', 'bbox_cxcywh_to_xyxy', 'bbox_xyxy_to_cxcywh', 26 | 'RegionAssigner' 27 | ] 28 | -------------------------------------------------------------------------------- /radet/datasets/pipelines/compose.py: -------------------------------------------------------------------------------- 1 | import collections 2 | 3 | from mmcv.utils import build_from_cfg 4 | 5 | from ..builder import PIPELINES 6 | 7 | 8 | @PIPELINES.register_module() 9 | class Compose(object): 10 | """Compose multiple transforms sequentially. 11 | 12 | Args: 13 | transforms (Sequence[dict | callable]): Sequence of transform object or 14 | config dict to be composed. 15 | """ 16 | 17 | def __init__(self, transforms): 18 | assert isinstance(transforms, collections.abc.Sequence) 19 | self.transforms = [] 20 | for transform in transforms: 21 | if isinstance(transform, dict): 22 | transform = build_from_cfg(transform, PIPELINES) 23 | self.transforms.append(transform) 24 | elif callable(transform): 25 | self.transforms.append(transform) 26 | else: 27 | raise TypeError('transform must be callable or a dict') 28 | 29 | def __call__(self, data): 30 | """Call function to apply transforms sequentially. 31 | 32 | Args: 33 | data (dict): A result dict contains the data to transform. 34 | 35 | Returns: 36 | dict: Transformed data. 37 | """ 38 | 39 | for t in self.transforms: 40 | data = t(data) 41 | if data is None: 42 | return None 43 | return data 44 | 45 | def __repr__(self): 46 | format_string = self.__class__.__name__ + '(' 47 | for t in self.transforms: 48 | format_string += '\n' 49 | format_string += f' {t}' 50 | format_string += '\n)' 51 | return format_string 52 | -------------------------------------------------------------------------------- /radet/datasets/wider_face.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | import xml.etree.ElementTree as ET 3 | 4 | import mmcv 5 | 6 | from .builder import DATASETS 7 | from .xml_style import XMLDataset 8 | 9 | 10 | @DATASETS.register_module() 11 | class WIDERFaceDataset(XMLDataset): 12 | """Reader for the WIDER Face dataset in PASCAL VOC format. 13 | 14 | Conversion scripts can be found in 15 | https://github.com/sovrasov/wider-face-pascal-voc-annotations 16 | """ 17 | CLASSES = ('face', ) 18 | 19 | def __init__(self, **kwargs): 20 | super(WIDERFaceDataset, self).__init__(**kwargs) 21 | 22 | def load_annotations(self, ann_file): 23 | """Load annotation from WIDERFace XML style annotation file. 24 | 25 | Args: 26 | ann_file (str): Path of XML file. 27 | 28 | Returns: 29 | list[dict]: Annotation info from XML file. 30 | """ 31 | 32 | data_infos = [] 33 | img_ids = mmcv.list_from_file(ann_file) 34 | for img_id in img_ids: 35 | filename = f'{img_id}.jpg' 36 | xml_path = osp.join(self.img_prefix, 'Annotations', 37 | f'{img_id}.xml') 38 | tree = ET.parse(xml_path) 39 | root = tree.getroot() 40 | size = root.find('size') 41 | width = int(size.find('width').text) 42 | height = int(size.find('height').text) 43 | folder = root.find('folder').text 44 | data_infos.append( 45 | dict( 46 | id=img_id, 47 | filename=osp.join(folder, filename), 48 | width=width, 49 | height=height)) 50 | 51 | return data_infos 52 | -------------------------------------------------------------------------------- /radet/core/fp16/deprecated_fp16_utils.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | 3 | from mmcv.runner import (Fp16OptimizerHook, auto_fp16, force_fp32, 4 | wrap_fp16_model) 5 | 6 | 7 | class DeprecatedFp16OptimizerHook(Fp16OptimizerHook): 8 | """A wrapper class for the FP16 optimizer hook. This class wraps 9 | :class:`Fp16OptimizerHook` in `mmcv.runner` and shows a warning that the 10 | :class:`Fp16OptimizerHook` from `mmdet.core` will be deprecated. 11 | 12 | Refer to :class:`Fp16OptimizerHook` in `mmcv.runner` for more details. 13 | 14 | Args: 15 | loss_scale (float): Scale factor multiplied with loss. 16 | """ 17 | 18 | def __init__(*args, **kwargs): 19 | super().__init__(*args, **kwargs) 20 | warnings.warn( 21 | 'Importing Fp16OptimizerHook from "mmdet.core" will be ' 22 | 'deprecated in the future. Please import them from "mmcv.runner" ' 23 | 'instead') 24 | 25 | 26 | def deprecated_auto_fp16(*args, **kwargs): 27 | warnings.warn( 28 | 'Importing auto_fp16 from "mmdet.core" will be ' 29 | 'deprecated in the future. Please import them from "mmcv.runner" ' 30 | 'instead') 31 | return auto_fp16(*args, **kwargs) 32 | 33 | 34 | def deprecated_force_fp32(*args, **kwargs): 35 | warnings.warn( 36 | 'Importing force_fp32 from "mmdet.core" will be ' 37 | 'deprecated in the future. Please import them from "mmcv.runner" ' 38 | 'instead') 39 | return force_fp32(*args, **kwargs) 40 | 41 | 42 | def deprecated_wrap_fp16_model(*args, **kwargs): 43 | warnings.warn( 44 | 'Importing wrap_fp16_model from "mmdet.core" will be ' 45 | 'deprecated in the future. Please import them from "mmcv.runner" ' 46 | 'instead') 47 | wrap_fp16_model(*args, **kwargs) 48 | -------------------------------------------------------------------------------- /radet/core/evaluation/bbox_overlaps.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def bbox_overlaps(bboxes1, bboxes2, mode='iou', eps=1e-6): 5 | """Calculate the ious between each bbox of bboxes1 and bboxes2. 6 | 7 | Args: 8 | bboxes1(ndarray): shape (n, 4) 9 | bboxes2(ndarray): shape (k, 4) 10 | mode(str): iou (intersection over union) or iof (intersection 11 | over foreground) 12 | 13 | Returns: 14 | ious(ndarray): shape (n, k) 15 | """ 16 | 17 | assert mode in ['iou', 'iof'] 18 | 19 | bboxes1 = bboxes1.astype(np.float32) 20 | bboxes2 = bboxes2.astype(np.float32) 21 | rows = bboxes1.shape[0] 22 | cols = bboxes2.shape[0] 23 | ious = np.zeros((rows, cols), dtype=np.float32) 24 | if rows * cols == 0: 25 | return ious 26 | exchange = False 27 | if bboxes1.shape[0] > bboxes2.shape[0]: 28 | bboxes1, bboxes2 = bboxes2, bboxes1 29 | ious = np.zeros((cols, rows), dtype=np.float32) 30 | exchange = True 31 | area1 = (bboxes1[:, 2] - bboxes1[:, 0]) * (bboxes1[:, 3] - bboxes1[:, 1]) 32 | area2 = (bboxes2[:, 2] - bboxes2[:, 0]) * (bboxes2[:, 3] - bboxes2[:, 1]) 33 | for i in range(bboxes1.shape[0]): 34 | x_start = np.maximum(bboxes1[i, 0], bboxes2[:, 0]) 35 | y_start = np.maximum(bboxes1[i, 1], bboxes2[:, 1]) 36 | x_end = np.minimum(bboxes1[i, 2], bboxes2[:, 2]) 37 | y_end = np.minimum(bboxes1[i, 3], bboxes2[:, 3]) 38 | overlap = np.maximum(x_end - x_start, 0) * np.maximum( 39 | y_end - y_start, 0) 40 | if mode == 'iou': 41 | union = area1[i] + area2 - overlap 42 | else: 43 | union = area1[i] if not exchange else area2 44 | union = np.maximum(union, eps) 45 | ious[i, :] = overlap / union 46 | if exchange: 47 | ious = ious.T 48 | return ious 49 | -------------------------------------------------------------------------------- /radet/datasets/pipelines/__init__.py: -------------------------------------------------------------------------------- 1 | from .auto_augment import (AutoAugment, BrightnessTransform, ColorTransform, 2 | ContrastTransform, EqualizeTransform, Rotate, Shear, 3 | Translate) 4 | from .compose import Compose 5 | from .formating import (Collect, DefaultFormatBundle, ImageToTensor, 6 | ToDataContainer, ToTensor, Transpose, to_tensor) 7 | from .instaboost import InstaBoost 8 | from .loading import (LoadAnnotations, LoadImageFromFile, LoadImageFromWebcam, 9 | LoadMultiChannelImageFromFiles, LoadProposals, GenerateDistanceMap, 10 | LoadMaskFromFile) 11 | from .test_time_aug import MultiScaleFlipAug 12 | from .transforms import (Albu, CutOut, Expand, MinIoURandomCrop, Normalize, 13 | Pad, PhotoMetricDistortion, RandomCenterCropPad, 14 | RandomCrop, RandomFlip, Resize, SegRescale) 15 | from .color_aug import (RandomHSV, RandomNoise, RandomSmooth, RandomBackground, CosyPoseAug, 16 | PillowBrightness, PillowColor, PillowContrast, PillowRGBAugmentation, PillowSharpness, PillowBlur) 17 | from .label_assignment import LabelAssignment, LabelAssignmentParallel 18 | 19 | __all__ = [ 20 | 'Compose', 'to_tensor', 'ToTensor', 'ImageToTensor', 'ToDataContainer', 21 | 'Transpose', 'Collect', 'DefaultFormatBundle', 'LoadAnnotations', 22 | 'LoadImageFromFile', 'LoadImageFromWebcam', 23 | 'LoadMultiChannelImageFromFiles', 'LoadProposals', 'MultiScaleFlipAug', 24 | 'Resize', 'RandomFlip', 'Pad', 'RandomCrop', 'Normalize', 'SegRescale', 25 | 'MinIoURandomCrop', 'Expand', 'PhotoMetricDistortion', 'Albu', 26 | 'InstaBoost', 'RandomCenterCropPad', 'AutoAugment', 'CutOut', 'Shear', 27 | 'Rotate', 'ColorTransform', 'EqualizeTransform', 'BrightnessTransform', 28 | 'ContrastTransform', 'Translate', 'GenerateDistanceMap', 'LoadMaskFromFile' 29 | ] 30 | -------------------------------------------------------------------------------- /radet/core/bbox/demodata.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | 5 | def ensure_rng(rng=None): 6 | """Simple version of the ``kwarray.ensure_rng`` 7 | 8 | Args: 9 | rng (int | numpy.random.RandomState | None): 10 | if None, then defaults to the global rng. Otherwise this can be an 11 | integer or a RandomState class 12 | Returns: 13 | (numpy.random.RandomState) : rng - 14 | a numpy random number generator 15 | 16 | References: 17 | https://gitlab.kitware.com/computer-vision/kwarray/blob/master/kwarray/util_random.py#L270 18 | """ 19 | 20 | if rng is None: 21 | rng = np.random.mtrand._rand 22 | elif isinstance(rng, int): 23 | rng = np.random.RandomState(rng) 24 | else: 25 | rng = rng 26 | return rng 27 | 28 | 29 | def random_boxes(num=1, scale=1, rng=None): 30 | """Simple version of ``kwimage.Boxes.random`` 31 | 32 | Returns: 33 | Tensor: shape (n, 4) in x1, y1, x2, y2 format. 34 | 35 | References: 36 | https://gitlab.kitware.com/computer-vision/kwimage/blob/master/kwimage/structs/boxes.py#L1390 37 | 38 | Example: 39 | >>> num = 3 40 | >>> scale = 512 41 | >>> rng = 0 42 | >>> boxes = random_boxes(num, scale, rng) 43 | >>> print(boxes) 44 | tensor([[280.9925, 278.9802, 308.6148, 366.1769], 45 | [216.9113, 330.6978, 224.0446, 456.5878], 46 | [405.3632, 196.3221, 493.3953, 270.7942]]) 47 | """ 48 | rng = ensure_rng(rng) 49 | 50 | tlbr = rng.rand(num, 4).astype(np.float32) 51 | 52 | tl_x = np.minimum(tlbr[:, 0], tlbr[:, 2]) 53 | tl_y = np.minimum(tlbr[:, 1], tlbr[:, 3]) 54 | br_x = np.maximum(tlbr[:, 0], tlbr[:, 2]) 55 | br_y = np.maximum(tlbr[:, 1], tlbr[:, 3]) 56 | 57 | tlbr[:, 0] = tl_x * scale 58 | tlbr[:, 1] = tl_y * scale 59 | tlbr[:, 2] = br_x * scale 60 | tlbr[:, 3] = br_y * scale 61 | 62 | boxes = torch.from_numpy(tlbr) 63 | return boxes 64 | -------------------------------------------------------------------------------- /radet/models/dense_heads/base_dense_head.py: -------------------------------------------------------------------------------- 1 | from abc import ABCMeta, abstractmethod 2 | 3 | import torch.nn as nn 4 | 5 | 6 | class BaseDenseHead(nn.Module, metaclass=ABCMeta): 7 | """Base class for DenseHeads.""" 8 | 9 | def __init__(self): 10 | super(BaseDenseHead, self).__init__() 11 | 12 | @abstractmethod 13 | def loss(self, **kwargs): 14 | """Compute losses of the head.""" 15 | pass 16 | 17 | @abstractmethod 18 | def get_bboxes(self, **kwargs): 19 | """Transform network output for a batch into bbox predictions.""" 20 | pass 21 | 22 | def forward_train(self, 23 | x, 24 | img_metas, 25 | gt_bboxes, 26 | gt_labels=None, 27 | gt_bboxes_ignore=None, 28 | proposal_cfg=None, 29 | **kwargs): 30 | """ 31 | Args: 32 | x (list[Tensor]): Features from FPN. 33 | img_metas (list[dict]): Meta information of each image, e.g., 34 | image size, scaling factor, etc. 35 | gt_bboxes (Tensor): Ground truth bboxes of the image, 36 | shape (num_gts, 4). 37 | gt_labels (Tensor): Ground truth labels of each box, 38 | shape (num_gts,). 39 | gt_bboxes_ignore (Tensor): Ground truth bboxes to be 40 | ignored, shape (num_ignored_gts, 4). 41 | proposal_cfg (mmcv.Config): Test / postprocessing configuration, 42 | if None, test_cfg would be used 43 | 44 | Returns: 45 | tuple: 46 | losses: (dict[str, Tensor]): A dictionary of loss components. 47 | proposal_list (list[Tensor]): Proposals of each image. 48 | """ 49 | outs = self(x) 50 | if gt_labels is None: 51 | loss_inputs = outs + (gt_bboxes, img_metas) 52 | else: 53 | loss_inputs = outs + (gt_bboxes, gt_labels, img_metas) 54 | losses = self.loss(*loss_inputs, gt_bboxes_ignore=gt_bboxes_ignore) 55 | if proposal_cfg is None: 56 | return losses 57 | else: 58 | proposal_list = self.get_bboxes(*outs, img_metas, cfg=proposal_cfg) 59 | return losses, proposal_list 60 | -------------------------------------------------------------------------------- /radet/core/bbox/samplers/instance_balanced_pos_sampler.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | from ..builder import BBOX_SAMPLERS 5 | from .random_sampler import RandomSampler 6 | 7 | 8 | @BBOX_SAMPLERS.register_module() 9 | class InstanceBalancedPosSampler(RandomSampler): 10 | """Instance balanced sampler that samples equal number of positive samples 11 | for each instance.""" 12 | 13 | def _sample_pos(self, assign_result, num_expected, **kwargs): 14 | """Sample positive boxes. 15 | 16 | Args: 17 | assign_result (:obj:`AssignResult`): The assigned results of boxes. 18 | num_expected (int): The number of expected positive samples 19 | 20 | Returns: 21 | Tensor or ndarray: sampled indices. 22 | """ 23 | pos_inds = torch.nonzero(assign_result.gt_inds > 0, as_tuple=False) 24 | if pos_inds.numel() != 0: 25 | pos_inds = pos_inds.squeeze(1) 26 | if pos_inds.numel() <= num_expected: 27 | return pos_inds 28 | else: 29 | unique_gt_inds = assign_result.gt_inds[pos_inds].unique() 30 | num_gts = len(unique_gt_inds) 31 | num_per_gt = int(round(num_expected / float(num_gts)) + 1) 32 | sampled_inds = [] 33 | for i in unique_gt_inds: 34 | inds = torch.nonzero( 35 | assign_result.gt_inds == i.item(), as_tuple=False) 36 | if inds.numel() != 0: 37 | inds = inds.squeeze(1) 38 | else: 39 | continue 40 | if len(inds) > num_per_gt: 41 | inds = self.random_choice(inds, num_per_gt) 42 | sampled_inds.append(inds) 43 | sampled_inds = torch.cat(sampled_inds) 44 | if len(sampled_inds) < num_expected: 45 | num_extra = num_expected - len(sampled_inds) 46 | extra_inds = np.array( 47 | list(set(pos_inds.cpu()) - set(sampled_inds.cpu()))) 48 | if len(extra_inds) > num_extra: 49 | extra_inds = self.random_choice(extra_inds, num_extra) 50 | extra_inds = torch.from_numpy(extra_inds).to( 51 | assign_result.gt_inds.device).long() 52 | sampled_inds = torch.cat([sampled_inds, extra_inds]) 53 | elif len(sampled_inds) > num_expected: 54 | sampled_inds = self.random_choice(sampled_inds, num_expected) 55 | return sampled_inds 56 | -------------------------------------------------------------------------------- /radet/core/mask/utils.py: -------------------------------------------------------------------------------- 1 | import mmcv 2 | import numpy as np 3 | import pycocotools.mask as mask_util 4 | 5 | 6 | def split_combined_polys(polys, poly_lens, polys_per_mask): 7 | """Split the combined 1-D polys into masks. 8 | 9 | A mask is represented as a list of polys, and a poly is represented as 10 | a 1-D array. In dataset, all masks are concatenated into a single 1-D 11 | tensor. Here we need to split the tensor into original representations. 12 | 13 | Args: 14 | polys (list): a list (length = image num) of 1-D tensors 15 | poly_lens (list): a list (length = image num) of poly length 16 | polys_per_mask (list): a list (length = image num) of poly number 17 | of each mask 18 | 19 | Returns: 20 | list: a list (length = image num) of list (length = mask num) of \ 21 | list (length = poly num) of numpy array. 22 | """ 23 | mask_polys_list = [] 24 | for img_id in range(len(polys)): 25 | polys_single = polys[img_id] 26 | polys_lens_single = poly_lens[img_id].tolist() 27 | polys_per_mask_single = polys_per_mask[img_id].tolist() 28 | 29 | split_polys = mmcv.slice_list(polys_single, polys_lens_single) 30 | mask_polys = mmcv.slice_list(split_polys, polys_per_mask_single) 31 | mask_polys_list.append(mask_polys) 32 | return mask_polys_list 33 | 34 | 35 | # TODO: move this function to more proper place 36 | def encode_mask_results(mask_results): 37 | """Encode bitmap mask to RLE code. 38 | 39 | Args: 40 | mask_results (list | tuple[list]): bitmap mask results. 41 | In mask scoring rcnn, mask_results is a tuple of (segm_results, 42 | segm_cls_score). 43 | 44 | Returns: 45 | list | tuple: RLE encoded mask. 46 | """ 47 | if isinstance(mask_results, tuple): # mask scoring 48 | cls_segms, cls_mask_scores = mask_results 49 | else: 50 | cls_segms = mask_results 51 | num_classes = len(cls_segms) 52 | encoded_mask_results = [[] for _ in range(num_classes)] 53 | for i in range(len(cls_segms)): 54 | for cls_segm in cls_segms[i]: 55 | encoded_mask_results[i].append( 56 | mask_util.encode( 57 | np.array( 58 | cls_segm[:, :, np.newaxis], order='F', 59 | dtype='uint8'))[0]) # encoded with RLE 60 | if isinstance(mask_results, tuple): 61 | return encoded_mask_results, cls_mask_scores 62 | else: 63 | return encoded_mask_results 64 | -------------------------------------------------------------------------------- /radet/core/mask/mask_target.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | from torch.nn.modules.utils import _pair 4 | 5 | 6 | def mask_target(pos_proposals_list, pos_assigned_gt_inds_list, gt_masks_list, 7 | cfg): 8 | """Compute mask target for positive proposals in multiple images. 9 | 10 | Args: 11 | pos_proposals_list (list[Tensor]): Positive proposals in multiple 12 | images. 13 | pos_assigned_gt_inds_list (list[Tensor]): Assigned GT indices for each 14 | positive proposals. 15 | gt_masks_list (list[:obj:`BaseInstanceMasks`]): Ground truth masks of 16 | each image. 17 | cfg (dict): Config dict that specifies the mask size. 18 | 19 | Returns: 20 | list[Tensor]: Mask target of each image. 21 | """ 22 | cfg_list = [cfg for _ in range(len(pos_proposals_list))] 23 | mask_targets = map(mask_target_single, pos_proposals_list, 24 | pos_assigned_gt_inds_list, gt_masks_list, cfg_list) 25 | mask_targets = list(mask_targets) 26 | if len(mask_targets) > 0: 27 | mask_targets = torch.cat(mask_targets) 28 | return mask_targets 29 | 30 | 31 | def mask_target_single(pos_proposals, pos_assigned_gt_inds, gt_masks, cfg): 32 | """Compute mask target for each positive proposal in the image. 33 | 34 | Args: 35 | pos_proposals (Tensor): Positive proposals. 36 | pos_assigned_gt_inds (Tensor): Assigned GT inds of positive proposals. 37 | gt_masks (:obj:`BaseInstanceMasks`): GT masks in the format of Bitmap 38 | or Polygon. 39 | cfg (dict): Config dict that indicate the mask size. 40 | 41 | Returns: 42 | Tensor: Mask target of each positive proposals in the image. 43 | """ 44 | device = pos_proposals.device 45 | mask_size = _pair(cfg.mask_size) 46 | num_pos = pos_proposals.size(0) 47 | if num_pos > 0: 48 | proposals_np = pos_proposals.cpu().numpy() 49 | maxh, maxw = gt_masks.height, gt_masks.width 50 | proposals_np[:, [0, 2]] = np.clip(proposals_np[:, [0, 2]], 0, maxw) 51 | proposals_np[:, [1, 3]] = np.clip(proposals_np[:, [1, 3]], 0, maxh) 52 | pos_assigned_gt_inds = pos_assigned_gt_inds.cpu().numpy() 53 | 54 | mask_targets = gt_masks.crop_and_resize( 55 | proposals_np, mask_size, device=device, 56 | inds=pos_assigned_gt_inds).to_ndarray() 57 | 58 | mask_targets = torch.from_numpy(mask_targets).float().to(device) 59 | else: 60 | mask_targets = pos_proposals.new_zeros((0, ) + mask_size) 61 | 62 | return mask_targets 63 | -------------------------------------------------------------------------------- /radet/core/utils/dist_utils.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | from collections import OrderedDict 3 | 4 | import torch.distributed as dist 5 | from mmcv.runner import OptimizerHook 6 | from torch._utils import (_flatten_dense_tensors, _take_tensors, 7 | _unflatten_dense_tensors) 8 | 9 | 10 | def _allreduce_coalesced(tensors, world_size, bucket_size_mb=-1): 11 | if bucket_size_mb > 0: 12 | bucket_size_bytes = bucket_size_mb * 1024 * 1024 13 | buckets = _take_tensors(tensors, bucket_size_bytes) 14 | else: 15 | buckets = OrderedDict() 16 | for tensor in tensors: 17 | tp = tensor.type() 18 | if tp not in buckets: 19 | buckets[tp] = [] 20 | buckets[tp].append(tensor) 21 | buckets = buckets.values() 22 | 23 | for bucket in buckets: 24 | flat_tensors = _flatten_dense_tensors(bucket) 25 | dist.all_reduce(flat_tensors) 26 | flat_tensors.div_(world_size) 27 | for tensor, synced in zip( 28 | bucket, _unflatten_dense_tensors(flat_tensors, bucket)): 29 | tensor.copy_(synced) 30 | 31 | 32 | def allreduce_grads(params, coalesce=True, bucket_size_mb=-1): 33 | """Allreduce gradients. 34 | 35 | Args: 36 | params (list[torch.Parameters]): List of parameters of a model 37 | coalesce (bool, optional): Whether allreduce parameters as a whole. 38 | Defaults to True. 39 | bucket_size_mb (int, optional): Size of bucket, the unit is MB. 40 | Defaults to -1. 41 | """ 42 | grads = [ 43 | param.grad.data for param in params 44 | if param.requires_grad and param.grad is not None 45 | ] 46 | world_size = dist.get_world_size() 47 | if coalesce: 48 | _allreduce_coalesced(grads, world_size, bucket_size_mb) 49 | else: 50 | for tensor in grads: 51 | dist.all_reduce(tensor.div_(world_size)) 52 | 53 | 54 | class DistOptimizerHook(OptimizerHook): 55 | """Deprecated optimizer hook for distributed training.""" 56 | 57 | def __init__(self, *args, **kwargs): 58 | warnings.warn('"DistOptimizerHook" is deprecated, please switch to' 59 | '"mmcv.runner.OptimizerHook".') 60 | super().__init__(*args, **kwargs) 61 | 62 | 63 | def reduce_mean(tensor): 64 | """"Obtain the mean of tensor on different GPUs.""" 65 | if not (dist.is_available() and dist.is_initialized()): 66 | return tensor 67 | tensor = tensor.clone() 68 | dist.all_reduce(tensor.div_(dist.get_world_size()), op=dist.ReduceOp.SUM) 69 | return tensor 70 | -------------------------------------------------------------------------------- /radet/datasets/utils.py: -------------------------------------------------------------------------------- 1 | import copy 2 | import warnings 3 | 4 | 5 | def replace_ImageToTensor(pipelines): 6 | """Replace the ImageToTensor transform in a data pipeline to 7 | DefaultFormatBundle, which is normally useful in batch inference. 8 | 9 | Args: 10 | pipelines (list[dict]): Data pipeline configs. 11 | 12 | Returns: 13 | list: The new pipeline list with all ImageToTensor replaced by 14 | DefaultFormatBundle. 15 | 16 | Examples: 17 | >>> pipelines = [ 18 | ... dict(type='LoadImageFromFile'), 19 | ... dict( 20 | ... type='MultiScaleFlipAug', 21 | ... img_scale=(1333, 800), 22 | ... flip=False, 23 | ... transforms=[ 24 | ... dict(type='Resize', keep_ratio=True), 25 | ... dict(type='RandomFlip'), 26 | ... dict(type='Normalize', mean=[0, 0, 0], std=[1, 1, 1]), 27 | ... dict(type='Pad', size_divisor=32), 28 | ... dict(type='ImageToTensor', keys=['img']), 29 | ... dict(type='Collect', keys=['img']), 30 | ... ]) 31 | ... ] 32 | >>> expected_pipelines = [ 33 | ... dict(type='LoadImageFromFile'), 34 | ... dict( 35 | ... type='MultiScaleFlipAug', 36 | ... img_scale=(1333, 800), 37 | ... flip=False, 38 | ... transforms=[ 39 | ... dict(type='Resize', keep_ratio=True), 40 | ... dict(type='RandomFlip'), 41 | ... dict(type='Normalize', mean=[0, 0, 0], std=[1, 1, 1]), 42 | ... dict(type='Pad', size_divisor=32), 43 | ... dict(type='DefaultFormatBundle'), 44 | ... dict(type='Collect', keys=['img']), 45 | ... ]) 46 | ... ] 47 | >>> assert expected_pipelines == replace_ImageToTensor(pipelines) 48 | """ 49 | pipelines = copy.deepcopy(pipelines) 50 | for i, pipeline in enumerate(pipelines): 51 | if pipeline['type'] == 'MultiScaleFlipAug': 52 | assert 'transforms' in pipeline 53 | pipeline['transforms'] = replace_ImageToTensor( 54 | pipeline['transforms']) 55 | elif pipeline['type'] == 'ImageToTensor': 56 | warnings.warn( 57 | '"ImageToTensor" pipeline is replaced by ' 58 | '"DefaultFormatBundle" for batch inference. It is ' 59 | 'recommended to manually replace it in the test ' 60 | 'data pipeline in your config file.', UserWarning) 61 | pipelines[i] = {'type': 'DefaultFormatBundle'} 62 | return pipelines 63 | -------------------------------------------------------------------------------- /radet/core/anchor/utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | def images_to_levels(target, num_levels): 5 | """Convert targets by image to targets by feature level. 6 | 7 | [target_img0, target_img1] -> [target_level0, target_level1, ...] 8 | """ 9 | target = torch.stack(target, 0) 10 | level_targets = [] 11 | start = 0 12 | for n in num_levels: 13 | end = start + n 14 | # level_targets.append(target[:, start:end].squeeze(0)) 15 | level_targets.append(target[:, start:end]) 16 | start = end 17 | return level_targets 18 | 19 | 20 | def anchor_inside_flags(flat_anchors, 21 | valid_flags, 22 | img_shape, 23 | allowed_border=0): 24 | """Check whether the anchors are inside the border. 25 | 26 | Args: 27 | flat_anchors (torch.Tensor): Flatten anchors, shape (n, 4). 28 | valid_flags (torch.Tensor): An existing valid flags of anchors. 29 | img_shape (tuple(int)): Shape of current image. 30 | allowed_border (int, optional): The border to allow the valid anchor. 31 | Defaults to 0. 32 | 33 | Returns: 34 | torch.Tensor: Flags indicating whether the anchors are inside a \ 35 | valid range. 36 | """ 37 | img_h, img_w = img_shape[:2] 38 | if allowed_border >= 0: 39 | inside_flags = valid_flags & \ 40 | (flat_anchors[:, 0] >= -allowed_border) & \ 41 | (flat_anchors[:, 1] >= -allowed_border) & \ 42 | (flat_anchors[:, 2] < img_w + allowed_border) & \ 43 | (flat_anchors[:, 3] < img_h + allowed_border) 44 | else: 45 | inside_flags = valid_flags 46 | return inside_flags 47 | 48 | 49 | def calc_region(bbox, ratio, featmap_size=None): 50 | """Calculate a proportional bbox region. 51 | 52 | The bbox center are fixed and the new h' and w' is h * ratio and w * ratio. 53 | 54 | Args: 55 | bbox (Tensor): Bboxes to calculate regions, shape (n, 4). 56 | ratio (float): Ratio of the output region. 57 | featmap_size (tuple): Feature map size used for clipping the boundary. 58 | 59 | Returns: 60 | tuple: x1, y1, x2, y2 61 | """ 62 | x1 = torch.round((1 - ratio) * bbox[0] + ratio * bbox[2]).long() 63 | y1 = torch.round((1 - ratio) * bbox[1] + ratio * bbox[3]).long() 64 | x2 = torch.round(ratio * bbox[0] + (1 - ratio) * bbox[2]).long() 65 | y2 = torch.round(ratio * bbox[1] + (1 - ratio) * bbox[3]).long() 66 | if featmap_size is not None: 67 | x1 = x1.clamp(min=0, max=featmap_size[1]) 68 | y1 = y1.clamp(min=0, max=featmap_size[0]) 69 | x2 = x2.clamp(min=0, max=featmap_size[1]) 70 | y2 = y2.clamp(min=0, max=featmap_size[0]) 71 | return (x1, y1, x2, y2) 72 | -------------------------------------------------------------------------------- /tools/browse_dataset.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os, random 3 | from pathlib import Path 4 | 5 | import mmcv 6 | from mmcv import Config 7 | 8 | from radet.core.visualization import imshow_det_bboxes 9 | from radet.datasets.builder import build_dataset 10 | 11 | 12 | def parse_args(): 13 | parser = argparse.ArgumentParser(description='Browse a dataset') 14 | parser.add_argument('--config', default='configs/mask_bop/r50_lmo_cpuassign.py', help='train config file path') 15 | parser.add_argument( 16 | '--skip-type', 17 | type=str, 18 | nargs='+', 19 | default=['DefaultFormatBundle', 'Normalize', 'Collect'], 20 | help='skip some useless pipeline') 21 | parser.add_argument( 22 | '--output-dir', 23 | default=None, 24 | type=str, 25 | help='If there is no display interface, you can save it') 26 | parser.add_argument('--not-show', default=False, action='store_true') 27 | parser.add_argument( 28 | '--show-interval', 29 | type=float, 30 | default=1, 31 | help='the interval of show (s)') 32 | parser.add_argument('--type', default='train', type=str) 33 | parser.add_argument('--random', default=True, type=bool) 34 | args = parser.parse_args() 35 | return args 36 | 37 | 38 | def retrieve_data_cfg(config_path, skip_type): 39 | cfg = Config.fromfile(config_path) 40 | train_data_cfg = cfg.data.train 41 | if hasattr(train_data_cfg, 'pipeline'): 42 | train_data_cfg['pipeline'] = [ 43 | x for x in train_data_cfg.pipeline if x['type'] not in skip_type 44 | ] 45 | else: 46 | train_data_cfg['dataset']['pipeline'] = [ 47 | x for x in train_data_cfg.dataset.pipeline if x['type'] not in skip_type 48 | ] 49 | 50 | return cfg 51 | 52 | 53 | def main(): 54 | args = parse_args() 55 | cfg = retrieve_data_cfg(args.config, args.skip_type) 56 | 57 | dataset = build_dataset(getattr(cfg.data, args.type)) 58 | 59 | random_index = list(range(len(dataset))) 60 | if args.random: 61 | random.shuffle(random_index) 62 | 63 | progress_bar = mmcv.ProgressBar(len(dataset)) 64 | for index in random_index: 65 | item = dataset[index] 66 | filename = os.path.join(args.output_dir, 67 | Path(item['filename']).name 68 | ) if args.output_dir is not None else None 69 | imshow_det_bboxes( 70 | item['img'], 71 | item['gt_bboxes'], 72 | item['gt_labels'], 73 | class_names=dataset.CLASSES, 74 | show=not args.not_show, 75 | wait_time=args.show_interval, 76 | out_file=filename, 77 | bbox_color=(255, 102, 61), 78 | text_color=(255, 102, 61)) 79 | progress_bar.update() 80 | 81 | 82 | if __name__ == '__main__': 83 | main() 84 | -------------------------------------------------------------------------------- /configs/base/datasets/bop_detection.py: -------------------------------------------------------------------------------- 1 | dataset_type = 'BOPDataset' 2 | data_root = 'data/' 3 | img_norm_cfg = dict( 4 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 5 | train_pipeline = [ 6 | dict(type='LoadImageFromFile'), 7 | dict(type='LoadAnnotations', with_bbox=True, with_bop_mask=True), 8 | dict(type='Resize', img_scale=(640, 480), keep_ratio=True), 9 | dict(type='RandomBackground', background_dir='data/coco', prob=0.3), 10 | dict(type='CosyPoseAug', p=0.8, 11 | pipelines=[ 12 | dict(type='PillowBlur', p=1., factor_interval=(1, 3)), 13 | dict(type='PillowSharpness', p=0.3, factor_interval=(0., 50.)), 14 | dict(type='PillowContrast', p=0.3, factor_interval=(0.2, 50.)), 15 | dict(type='PillowBrightness', p=0.5, factor_interval=(0.1, 6.0)), 16 | dict(type='PillowColor', p=0.3, factor_interval=(0., 20.)), 17 | ]), 18 | dict(type='RandomFlip', flip_ratio=0.5), 19 | dict(type='GenerateDistanceMap'), 20 | dict(type='LabelAssignment', 21 | anchor_generator_cfg=dict( 22 | type='AnchorGenerator', 23 | ratios=[1.0], 24 | octave_base_scale=8, 25 | scales_per_octave=1, 26 | strides=[8, 16, 32, 64, 128] 27 | ), 28 | neg_threshold=0.2, 29 | positive_num=10, 30 | adapt_positive_num=False, 31 | balance_sample=True, 32 | ), 33 | dict(type='Normalize', **img_norm_cfg), 34 | dict(type='Pad', size_divisor=16), 35 | dict(type='DefaultFormatBundle'), 36 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'points_to_gt_index', 'points_weight']) 37 | ] 38 | test_pipeline = [ 39 | dict(type='LoadImageFromFile'), 40 | dict( 41 | type='MultiScaleFlipAug', 42 | img_scale=(640, 480), 43 | flip=False, 44 | transforms=[ 45 | dict(type='Resize', keep_ratio=True), 46 | dict(type='RandomFlip'), 47 | dict(type='Normalize', **img_norm_cfg), 48 | dict(type='Pad', size_divisor=32), 49 | dict(type='ImageToTensor', keys=['img']), 50 | dict(type='Collect', keys=['img']), 51 | ]) 52 | ] 53 | 54 | data = dict( 55 | samples_per_gpu=16, 56 | workers_per_gpu=4, 57 | train=dict( 58 | type=dataset_type, 59 | ann_file=data_root + 'detector_annotations/train_pbr.json', 60 | img_prefix=data_root + 'train_pbr/', 61 | seg_prefix=data_root + 'train_pbr', 62 | pipeline=train_pipeline, 63 | ), 64 | val=dict( 65 | type=dataset_type, 66 | ann_file=data_root +'detector_annotations/test_bop19.json', 67 | img_prefix=data_root + 'test/', 68 | pipeline=test_pipeline, 69 | ), 70 | test=dict( 71 | type=dataset_type, 72 | ann_file=data_root + 'detector_annotations/test_bop19.json', 73 | img_prefix=data_root + 'test/', 74 | pipeline=test_pipeline, 75 | bop_submission=True, 76 | ), 77 | ) 78 | -------------------------------------------------------------------------------- /radet/models/necks/channel_mapper.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | from mmcv.cnn import ConvModule, xavier_init 3 | 4 | from ..builder import NECKS 5 | 6 | 7 | @NECKS.register_module() 8 | class ChannelMapper(nn.Module): 9 | r"""Channel Mapper to reduce/increase channels of backbone features. 10 | 11 | This is used to reduce/increase channels of backbone features. 12 | 13 | Args: 14 | in_channels (List[int]): Number of input channels per scale. 15 | out_channels (int): Number of output channels (used at each scale). 16 | kernel_size (int, optional): kernel_size for reducing channels (used 17 | at each scale). Default: 3. 18 | conv_cfg (dict, optional): Config dict for convolution layer. 19 | Default: None. 20 | norm_cfg (dict, optional): Config dict for normalization layer. 21 | Default: None. 22 | act_cfg (dict, optional): Config dict for activation layer in 23 | ConvModule. Default: dict(type='ReLU'). 24 | 25 | Example: 26 | >>> import torch 27 | >>> in_channels = [2, 3, 5, 7] 28 | >>> scales = [340, 170, 84, 43] 29 | >>> inputs = [torch.rand(1, c, s, s) 30 | ... for c, s in zip(in_channels, scales)] 31 | >>> self = ChannelMapper(in_channels, 11, 3).eval() 32 | >>> outputs = self.forward(inputs) 33 | >>> for i in range(len(outputs)): 34 | ... print(f'outputs[{i}].shape = {outputs[i].shape}') 35 | outputs[0].shape = torch.Size([1, 11, 340, 340]) 36 | outputs[1].shape = torch.Size([1, 11, 170, 170]) 37 | outputs[2].shape = torch.Size([1, 11, 84, 84]) 38 | outputs[3].shape = torch.Size([1, 11, 43, 43]) 39 | """ 40 | 41 | def __init__(self, 42 | in_channels, 43 | out_channels, 44 | kernel_size=3, 45 | conv_cfg=None, 46 | norm_cfg=None, 47 | act_cfg=dict(type='ReLU')): 48 | super(ChannelMapper, self).__init__() 49 | assert isinstance(in_channels, list) 50 | 51 | self.convs = nn.ModuleList() 52 | for in_channel in in_channels: 53 | self.convs.append( 54 | ConvModule( 55 | in_channel, 56 | out_channels, 57 | kernel_size, 58 | padding=(kernel_size - 1) // 2, 59 | conv_cfg=conv_cfg, 60 | norm_cfg=norm_cfg, 61 | act_cfg=act_cfg)) 62 | 63 | # default init_weights for conv(msra) and norm in ConvModule 64 | def init_weights(self): 65 | """Initialize the weights of ChannelMapper module.""" 66 | for m in self.modules(): 67 | if isinstance(m, nn.Conv2d): 68 | xavier_init(m, distribution='uniform') 69 | 70 | def forward(self, inputs): 71 | """Forward function.""" 72 | assert len(inputs) == len(self.convs) 73 | outs = [self.convs[i](inputs[i]) for i in range(len(inputs))] 74 | return tuple(outs) 75 | -------------------------------------------------------------------------------- /radet/core/bbox/samplers/random_sampler.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from ..builder import BBOX_SAMPLERS 4 | from .base_sampler import BaseSampler 5 | 6 | 7 | @BBOX_SAMPLERS.register_module() 8 | class RandomSampler(BaseSampler): 9 | """Random sampler. 10 | 11 | Args: 12 | num (int): Number of samples 13 | pos_fraction (float): Fraction of positive samples 14 | neg_pos_up (int, optional): Upper bound number of negative and 15 | positive samples. Defaults to -1. 16 | add_gt_as_proposals (bool, optional): Whether to add ground truth 17 | boxes as proposals. Defaults to True. 18 | """ 19 | 20 | def __init__(self, 21 | num, 22 | pos_fraction, 23 | neg_pos_ub=-1, 24 | add_gt_as_proposals=True, 25 | **kwargs): 26 | from radet.core.bbox import demodata 27 | super(RandomSampler, self).__init__(num, pos_fraction, neg_pos_ub, 28 | add_gt_as_proposals) 29 | self.rng = demodata.ensure_rng(kwargs.get('rng', None)) 30 | 31 | def random_choice(self, gallery, num): 32 | """Random select some elements from the gallery. 33 | 34 | If `gallery` is a Tensor, the returned indices will be a Tensor; 35 | If `gallery` is a ndarray or list, the returned indices will be a 36 | ndarray. 37 | 38 | Args: 39 | gallery (Tensor | ndarray | list): indices pool. 40 | num (int): expected sample num. 41 | 42 | Returns: 43 | Tensor or ndarray: sampled indices. 44 | """ 45 | assert len(gallery) >= num 46 | 47 | is_tensor = isinstance(gallery, torch.Tensor) 48 | if not is_tensor: 49 | if torch.cuda.is_available(): 50 | device = torch.cuda.current_device() 51 | else: 52 | device = 'cpu' 53 | gallery = torch.tensor(gallery, dtype=torch.long, device=device) 54 | perm = torch.randperm(gallery.numel(), device=gallery.device)[:num] 55 | rand_inds = gallery[perm] 56 | if not is_tensor: 57 | rand_inds = rand_inds.cpu().numpy() 58 | return rand_inds 59 | 60 | def _sample_pos(self, assign_result, num_expected, **kwargs): 61 | """Randomly sample some positive samples.""" 62 | pos_inds = torch.nonzero(assign_result.gt_inds > 0, as_tuple=False) 63 | if pos_inds.numel() != 0: 64 | pos_inds = pos_inds.squeeze(1) 65 | if pos_inds.numel() <= num_expected: 66 | return pos_inds 67 | else: 68 | return self.random_choice(pos_inds, num_expected) 69 | 70 | def _sample_neg(self, assign_result, num_expected, **kwargs): 71 | """Randomly sample some negative samples.""" 72 | neg_inds = torch.nonzero(assign_result.gt_inds == 0, as_tuple=False) 73 | if neg_inds.numel() != 0: 74 | neg_inds = neg_inds.squeeze(1) 75 | if len(neg_inds) <= num_expected: 76 | return neg_inds 77 | else: 78 | return self.random_choice(neg_inds, num_expected) 79 | -------------------------------------------------------------------------------- /tools/eval_metric.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | import mmcv 4 | from mmcv import Config, DictAction 5 | 6 | from radet.datasets import build_dataset 7 | 8 | 9 | def parse_args(): 10 | parser = argparse.ArgumentParser(description='Evaluate metric of the ' 11 | 'results saved in pkl format') 12 | parser.add_argument('config', help='Config of the model') 13 | parser.add_argument('pkl_results', help='Results in pickle format') 14 | parser.add_argument( 15 | '--format-only', 16 | action='store_true', 17 | help='Format the output results without perform evaluation. It is' 18 | 'useful when you want to format the result to a specific format and ' 19 | 'submit it to the test server') 20 | parser.add_argument( 21 | '--eval', 22 | type=str, 23 | nargs='+', 24 | help='Evaluation metrics, which depends on the dataset, e.g., "bbox",' 25 | ' "segm", "proposal" for COCO, and "mAP", "recall" for PASCAL VOC') 26 | parser.add_argument( 27 | '--cfg-options', 28 | nargs='+', 29 | action=DictAction, 30 | help='override some settings in the used config, the key-value pair ' 31 | 'in xxx=yyy format will be merged into config file. If the value to ' 32 | 'be overwritten is a list, it should be like key="[a,b]" or key=a,b ' 33 | 'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" ' 34 | 'Note that the quotation marks are necessary and that no white space ' 35 | 'is allowed.') 36 | parser.add_argument( 37 | '--eval-options', 38 | nargs='+', 39 | action=DictAction, 40 | help='custom options for evaluation, the key-value pair in xxx=yyy ' 41 | 'format will be kwargs for dataset.evaluate() function') 42 | args = parser.parse_args() 43 | return args 44 | 45 | 46 | def main(): 47 | args = parse_args() 48 | 49 | cfg = Config.fromfile(args.config) 50 | assert args.eval or args.format_only, ( 51 | 'Please specify at least one operation (eval/format the results) with ' 52 | 'the argument "--eval", "--format-only"') 53 | if args.eval and args.format_only: 54 | raise ValueError('--eval and --format_only cannot be both specified') 55 | 56 | if args.cfg_options is not None: 57 | cfg.merge_from_dict(args.cfg_options) 58 | cfg.data.test.test_mode = True 59 | 60 | dataset = build_dataset(cfg.data.test) 61 | outputs = mmcv.load(args.pkl_results) 62 | 63 | kwargs = {} if args.eval_options is None else args.eval_options 64 | if args.format_only: 65 | dataset.format_results(outputs, **kwargs) 66 | if args.eval: 67 | eval_kwargs = cfg.get('evaluation', {}).copy() 68 | # hard-code way to remove EvalHook args 69 | for key in [ 70 | 'interval', 'tmpdir', 'start', 'gpu_collect', 'save_best', 71 | 'rule' 72 | ]: 73 | eval_kwargs.pop(key, None) 74 | eval_kwargs.update(dict(metric=args.eval, **kwargs)) 75 | print(dataset.evaluate(outputs, **eval_kwargs)) 76 | 77 | 78 | if __name__ == '__main__': 79 | main() 80 | -------------------------------------------------------------------------------- /radet/models/losses/accuracy.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | 4 | def accuracy(pred, target, topk=1, thresh=None): 5 | """Calculate accuracy according to the prediction and target. 6 | 7 | Args: 8 | pred (torch.Tensor): The model prediction, shape (N, num_class) 9 | target (torch.Tensor): The target of each prediction, shape (N, ) 10 | topk (int | tuple[int], optional): If the predictions in ``topk`` 11 | matches the target, the predictions will be regarded as 12 | correct ones. Defaults to 1. 13 | thresh (float, optional): If not None, predictions with scores under 14 | this threshold are considered incorrect. Default to None. 15 | 16 | Returns: 17 | float | tuple[float]: If the input ``topk`` is a single integer, 18 | the function will return a single float as accuracy. If 19 | ``topk`` is a tuple containing multiple integers, the 20 | function will return a tuple containing accuracies of 21 | each ``topk`` number. 22 | """ 23 | assert isinstance(topk, (int, tuple)) 24 | if isinstance(topk, int): 25 | topk = (topk, ) 26 | return_single = True 27 | else: 28 | return_single = False 29 | 30 | maxk = max(topk) 31 | if pred.size(0) == 0: 32 | accu = [pred.new_tensor(0.) for i in range(len(topk))] 33 | return accu[0] if return_single else accu 34 | assert pred.ndim == 2 and target.ndim == 1 35 | assert pred.size(0) == target.size(0) 36 | assert maxk <= pred.size(1), \ 37 | f'maxk {maxk} exceeds pred dimension {pred.size(1)}' 38 | pred_value, pred_label = pred.topk(maxk, dim=1) 39 | pred_label = pred_label.t() # transpose to shape (maxk, N) 40 | correct = pred_label.eq(target.view(1, -1).expand_as(pred_label)) 41 | if thresh is not None: 42 | # Only prediction values larger than thresh are counted as correct 43 | correct = correct & (pred_value > thresh).t() 44 | res = [] 45 | for k in topk: 46 | correct_k = correct[:k].reshape(-1).float().sum(0, keepdim=True) 47 | res.append(correct_k.mul_(100.0 / pred.size(0))) 48 | return res[0] if return_single else res 49 | 50 | 51 | class Accuracy(nn.Module): 52 | 53 | def __init__(self, topk=(1, ), thresh=None): 54 | """Module to calculate the accuracy. 55 | 56 | Args: 57 | topk (tuple, optional): The criterion used to calculate the 58 | accuracy. Defaults to (1,). 59 | thresh (float, optional): If not None, predictions with scores 60 | under this threshold are considered incorrect. Default to None. 61 | """ 62 | super().__init__() 63 | self.topk = topk 64 | self.thresh = thresh 65 | 66 | def forward(self, pred, target): 67 | """Forward function to calculate accuracy. 68 | 69 | Args: 70 | pred (torch.Tensor): Prediction of models. 71 | target (torch.Tensor): Target for each prediction. 72 | 73 | Returns: 74 | tuple[float]: The accuracies under different topk criterions. 75 | """ 76 | return accuracy(pred, target, self.topk, self.thresh) 77 | -------------------------------------------------------------------------------- /configs/base/datasets/bop_detection_mix.py: -------------------------------------------------------------------------------- 1 | dataset_type = 'BOPDataset' 2 | data_root = 'data/' 3 | img_norm_cfg = dict( 4 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 5 | train_pipeline = [ 6 | dict(type='LoadImageFromFile'), 7 | dict(type='LoadAnnotations', with_bbox=True, with_bop_mask=True), 8 | dict(type='Resize', img_scale=(640, 480), keep_ratio=True), 9 | dict(type='RandomBackground', background_dir='data/coco', prob=0.3), 10 | dict(type='RandomHSV', h_ratio=0.2, s_ratio=0.5, v_ratio=0.5, prob=1.0), 11 | dict(type='RandomNoise', noise_ratio=0.1, prob=1.0), 12 | dict(type='RandomSmooth', max_kernel_size=7, prob=1.0), 13 | dict(type='RandomFlip', flip_ratio=0.5), 14 | dict(type='GenerateDistanceMap'), 15 | dict(type='LabelAssignment', 16 | anchor_generator_cfg=dict( 17 | type='AnchorGenerator', 18 | ratios=[1.0], 19 | octave_base_scale=8, 20 | scales_per_octave=1, 21 | strides=[8, 16, 32, 64, 128] 22 | ), 23 | neg_threshold=0.2, 24 | positive_num=10, 25 | adapt_positive_num=False, 26 | balance_sample=True, 27 | ), 28 | dict(type='Normalize', **img_norm_cfg), 29 | dict(type='Pad', size_divisor=16), 30 | dict(type='DefaultFormatBundle'), 31 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'points_to_gt_index', 'points_weight']) 32 | ] 33 | test_pipeline = [ 34 | dict(type='LoadImageFromFile'), 35 | dict( 36 | type='MultiScaleFlipAug', 37 | img_scale=(640, 480), 38 | flip=False, 39 | transforms=[ 40 | dict(type='Resize', keep_ratio=True), 41 | dict(type='RandomFlip'), 42 | dict(type='Normalize', **img_norm_cfg), 43 | dict(type='Pad', size_divisor=32), 44 | dict(type='ImageToTensor', keys=['img']), 45 | dict(type='Collect', keys=['img']), 46 | ]) 47 | ] 48 | 49 | data = dict( 50 | samples_per_gpu=16, 51 | workers_per_gpu=4, 52 | train=dict( 53 | type='MixDataset', 54 | dataset_0=dict( 55 | type=dataset_type, 56 | ann_file=data_root + 'detector_annotations/train_pbr.json', 57 | img_prefix=data_root + 'train_pbr/', 58 | seg_prefix=data_root + 'train_pbr', 59 | pipeline=train_pipeline, 60 | ratio=1 61 | ), 62 | dataset_1=dict( 63 | type=dataset_type, 64 | ann_file=data_root + 'detector_annotations/train_pbr.json', 65 | img_prefix=data_root + 'train_pbr/', 66 | seg_prefix=data_root + 'train_pbr', 67 | pipeline=train_pipeline, 68 | ratio=1 69 | ), 70 | ), 71 | val=dict( 72 | type=dataset_type, 73 | ann_file=data_root +'detector_annotations/test_bop19.json', 74 | img_prefix=data_root + 'test/', 75 | pipeline=test_pipeline, 76 | ), 77 | test=dict( 78 | type=dataset_type, 79 | ann_file=data_root + 'detector_annotations/test_bop19.json', 80 | img_prefix=data_root + 'test/', 81 | pipeline=test_pipeline, 82 | bop_submission=True, 83 | ), 84 | ) 85 | -------------------------------------------------------------------------------- /configs/bop/r50_icbin_pbr.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../base/datasets/bop_detection.py', 3 | '../base/default_runtime.py'] 4 | 5 | 6 | OBJ_NUM = 2 7 | CLASS_NAMES = ('coffee_cup', 'juice_carton') 8 | 9 | model = dict( 10 | type='RADet', 11 | pretrained='torchvision://resnet50', 12 | backbone=dict( 13 | type='ResNet', 14 | depth=50, 15 | num_stages=4, 16 | out_indices=(0, 1, 2, 3), 17 | frozen_stages=1, 18 | norm_cfg=dict(type='BN', requires_grad=True), 19 | norm_eval=True, 20 | style='pytorch'), 21 | neck=dict( 22 | type='FPN', 23 | in_channels=[256, 512, 1024, 2048], 24 | out_channels=256, 25 | start_level=1, 26 | add_extra_convs='on_output', 27 | num_outs=5), 28 | bbox_head=dict( 29 | type='RADetHead', 30 | num_classes=2, 31 | in_channels=256, 32 | stacked_convs=4, 33 | feat_channels=256, 34 | strides=[8, 16, 32, 64, 128], 35 | anchor_generator=dict( 36 | type='AnchorGenerator', 37 | ratios=[1.0], 38 | octave_base_scale=8, 39 | scales_per_octave=1, 40 | strides=[8, 16, 32, 64, 128]), 41 | bbox_coder=dict( 42 | type='TBLRBBoxCoder', 43 | normalizer=1/8), 44 | loss_cls=dict( 45 | type='FocalLoss', 46 | use_sigmoid=True, 47 | gamma=2.0, 48 | alpha=0.25, 49 | loss_weight=1.0, 50 | ), 51 | loss_bbox=dict(type='GIoULoss', loss_weight=2.0), 52 | loss_centerness=dict( 53 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), 54 | ), 55 | ) 56 | 57 | train_cfg = dict( 58 | assigner=dict( 59 | type='MaxIoUAssigner', 60 | pos_iou_thr=0.5, 61 | neg_iou_thr=0.4, 62 | min_pos_iou=0, 63 | ignore_iof_thr=-1), 64 | allowed_border=-1, 65 | pos_weight=-1, 66 | debug=False) 67 | 68 | test_cfg = dict( 69 | nms_pre=1000, 70 | min_bbox_size=0, 71 | score_thr=0.05, 72 | nms=dict(type='vote', 73 | iou_threshold=0.65, 74 | cluster_score=['cls', 'iou'], 75 | vote_score=['iou', 'cls'], 76 | iou_enable=False, 77 | sima=0.025,), 78 | max_per_img=100) 79 | 80 | 81 | data_root = 'data/icbin/' 82 | data = dict( 83 | samples_per_gpu=16, 84 | workers_per_gpu=8, 85 | train=dict( 86 | ann_file=data_root + 'detector_annotations/train_pbr.json', 87 | img_prefix=data_root + 'train_pbr/', 88 | seg_prefix=data_root + 'train_pbr/', 89 | classes=CLASS_NAMES, 90 | min_visib_frac=0.1, 91 | ), 92 | val=dict( 93 | ann_file=data_root +'detector_annotations/test_bop19.json', 94 | img_prefix=data_root + 'test/', 95 | classes=CLASS_NAMES, 96 | ), 97 | test=dict( 98 | ann_file=data_root + 'detector_annotations/test_bop19.json', 99 | img_prefix=data_root + 'test/', 100 | classes=CLASS_NAMES, 101 | ) 102 | ) 103 | 104 | work_dir = 'work_dirs/icbin_r50_radet' -------------------------------------------------------------------------------- /configs/bop/r50_itodd_pbr.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../base/datasets/bop_detection.py', 3 | '../base/default_runtime.py'] 4 | 5 | 6 | OBJ_NUM = 28 7 | CLASS_NAMES = tuple([i+1 for i in range(OBJ_NUM)]) 8 | 9 | 10 | model = dict( 11 | type='RADet', 12 | pretrained='torchvision://resnet50', 13 | backbone=dict( 14 | type='ResNet', 15 | depth=50, 16 | num_stages=4, 17 | out_indices=(0, 1, 2, 3), 18 | frozen_stages=1, 19 | norm_cfg=dict(type='BN', requires_grad=True), 20 | norm_eval=True, 21 | style='pytorch'), 22 | neck=dict( 23 | type='FPN', 24 | in_channels=[256, 512, 1024, 2048], 25 | out_channels=256, 26 | start_level=1, 27 | add_extra_convs='on_output', 28 | num_outs=5), 29 | bbox_head=dict( 30 | type='RADetHead', 31 | num_classes=28, 32 | in_channels=256, 33 | stacked_convs=4, 34 | feat_channels=256, 35 | strides=[8, 16, 32, 64, 128], 36 | anchor_generator=dict( 37 | type='AnchorGenerator', 38 | ratios=[1.0], 39 | octave_base_scale=8, 40 | scales_per_octave=1, 41 | strides=[8, 16, 32, 64, 128]), 42 | bbox_coder=dict( 43 | type='TBLRBBoxCoder', 44 | normalizer=1/8), 45 | loss_cls=dict( 46 | type='FocalLoss', 47 | use_sigmoid=True, 48 | gamma=2.0, 49 | alpha=0.25, 50 | loss_weight=1.0, 51 | ), 52 | loss_bbox=dict(type='GIoULoss', loss_weight=2.0), 53 | loss_centerness=dict( 54 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), 55 | ), 56 | ) 57 | 58 | train_cfg = dict( 59 | assigner=dict( 60 | type='MaxIoUAssigner', 61 | pos_iou_thr=0.5, 62 | neg_iou_thr=0.4, 63 | min_pos_iou=0, 64 | ignore_iof_thr=-1), 65 | allowed_border=-1, 66 | pos_weight=-1, 67 | debug=False) 68 | 69 | test_cfg = dict( 70 | nms_pre=1000, 71 | min_bbox_size=0, 72 | score_thr=0.05, 73 | nms=dict(type='vote', 74 | iou_threshold=0.65, 75 | cluster_score=['cls', 'iou'], 76 | vote_score=['iou', 'cls'], 77 | iou_enable=False, 78 | sima=0.025,), 79 | max_per_img=100) 80 | 81 | 82 | data_root = 'data/itodd/' 83 | data = dict( 84 | samples_per_gpu=16, 85 | workers_per_gpu=16, 86 | train=dict( 87 | ann_file=data_root + 'detector_annotations/train_pbr.json', 88 | img_prefix=data_root + 'train_pbr/', 89 | seg_prefix=data_root + 'train_pbr/', 90 | classes=CLASS_NAMES, 91 | min_visib_frac=0.1, 92 | ), 93 | val=dict( 94 | ann_file=data_root +'detector_annotations/val.json', 95 | img_prefix=data_root + 'val/', 96 | classes=CLASS_NAMES, 97 | ), 98 | test=dict( 99 | ann_file=data_root + 'detector_annotations/test_bop19.json', 100 | img_prefix=data_root + 'test/', 101 | classes=CLASS_NAMES, 102 | ) 103 | ) 104 | 105 | work_dir = 'work_dirs/itodd_r50_radet' -------------------------------------------------------------------------------- /configs/bop/r50_tudl_pbr.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../base/datasets/bop_detection.py', 3 | '../base/default_runtime.py'] 4 | 5 | 6 | CLASS_NAMES = ('dragon', 'frog', 'can') 7 | 8 | 9 | model = dict( 10 | type='RADet', 11 | pretrained='torchvision://resnet50', 12 | backbone=dict( 13 | type='ResNet', 14 | depth=50, 15 | num_stages=4, 16 | out_indices=(0, 1, 2, 3), 17 | frozen_stages=1, 18 | norm_cfg=dict(type='BN', requires_grad=True), 19 | norm_eval=True, 20 | style='pytorch'), 21 | neck=dict( 22 | type='FPN', 23 | in_channels=[256, 512, 1024, 2048], 24 | out_channels=256, 25 | start_level=1, 26 | add_extra_convs='on_output', 27 | num_outs=5), 28 | bbox_head=dict( 29 | type='RADetHead', 30 | num_classes=3, 31 | in_channels=256, 32 | stacked_convs=4, 33 | feat_channels=256, 34 | strides=[8, 16, 32, 64, 128], 35 | anchor_generator=dict( 36 | type='AnchorGenerator', 37 | ratios=[1.0], 38 | octave_base_scale=8, 39 | scales_per_octave=1, 40 | strides=[8, 16, 32, 64, 128]), 41 | bbox_coder=dict( 42 | type='TBLRBBoxCoder', 43 | normalizer=1/8), 44 | loss_cls=dict( 45 | type='FocalLoss', 46 | use_sigmoid=True, 47 | gamma=2.0, 48 | alpha=0.25, 49 | loss_weight=1.0, 50 | ), 51 | loss_bbox=dict(type='GIoULoss', loss_weight=2.0), 52 | loss_centerness=dict( 53 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), 54 | ), 55 | ) 56 | 57 | train_cfg = dict( 58 | assigner=dict( 59 | type='MaxIoUAssigner', 60 | pos_iou_thr=0.5, 61 | neg_iou_thr=0.4, 62 | min_pos_iou=0, 63 | ignore_iof_thr=-1), 64 | allowed_border=-1, 65 | pos_weight=-1, 66 | debug=False) 67 | 68 | test_cfg = dict( 69 | nms_pre=1000, 70 | min_bbox_size=0, 71 | score_thr=0.05, 72 | nms=dict(type='vote', 73 | iou_threshold=0.65, 74 | cluster_score=['cls', 'iou'], 75 | vote_score=['iou', 'cls'], 76 | iou_enable=False, 77 | sima=0.025,), 78 | max_per_img=100) 79 | 80 | 81 | data_root = 'data/tudl/' 82 | data = dict( 83 | samples_per_gpu=16, 84 | workers_per_gpu=16, 85 | train=dict( 86 | ann_file=data_root + 'detector_annotations/train_pbr.json', 87 | img_prefix=data_root + 'train_pbr/', 88 | seg_prefix=data_root + 'train_pbr/', 89 | classes=CLASS_NAMES, 90 | min_visib_frac=0.1, 91 | ), 92 | val=dict( 93 | ann_file=data_root +'detector_annotations/test_bop19.json', 94 | img_prefix=data_root + 'test/', 95 | classes=CLASS_NAMES, 96 | ), 97 | test=dict( 98 | ann_file=data_root + 'detector_annotations/test_bop19.json', 99 | img_prefix=data_root + 'test/', 100 | classes=CLASS_NAMES, 101 | bop_submission=True, 102 | ) 103 | ) 104 | 105 | 106 | work_dir = 'work_dirs/tudl_r50_radet_pbr' -------------------------------------------------------------------------------- /configs/bop/r50_tless_pbr.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../base/datasets/bop_detection.py', 3 | '../base/default_runtime.py'] 4 | 5 | 6 | OBJ_NUM = 30 7 | CLASS_NAMES = tuple([i+1 for i in range(OBJ_NUM)]) 8 | 9 | 10 | model = dict( 11 | type='RADet', 12 | pretrained='torchvision://resnet50', 13 | backbone=dict( 14 | type='ResNet', 15 | depth=50, 16 | num_stages=4, 17 | out_indices=(0, 1, 2, 3), 18 | frozen_stages=1, 19 | norm_cfg=dict(type='BN', requires_grad=True), 20 | norm_eval=True, 21 | style='pytorch'), 22 | neck=dict( 23 | type='FPN', 24 | in_channels=[256, 512, 1024, 2048], 25 | out_channels=256, 26 | start_level=1, 27 | add_extra_convs='on_output', 28 | num_outs=5), 29 | bbox_head=dict( 30 | type='RADetHead', 31 | num_classes=30, 32 | in_channels=256, 33 | stacked_convs=4, 34 | feat_channels=256, 35 | strides=[8, 16, 32, 64, 128], 36 | anchor_generator=dict( 37 | type='AnchorGenerator', 38 | ratios=[1.0], 39 | octave_base_scale=8, 40 | scales_per_octave=1, 41 | strides=[8, 16, 32, 64, 128]), 42 | bbox_coder=dict( 43 | type='TBLRBBoxCoder', 44 | normalizer=1/8), 45 | loss_cls=dict( 46 | type='FocalLoss', 47 | use_sigmoid=True, 48 | gamma=2.0, 49 | alpha=0.25, 50 | loss_weight=1.0, 51 | ), 52 | loss_bbox=dict(type='GIoULoss', loss_weight=2.0), 53 | loss_centerness=dict( 54 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), 55 | ), 56 | ) 57 | 58 | train_cfg = dict( 59 | assigner=dict( 60 | type='MaxIoUAssigner', 61 | pos_iou_thr=0.5, 62 | neg_iou_thr=0.4, 63 | min_pos_iou=0, 64 | ignore_iof_thr=-1), 65 | allowed_border=-1, 66 | pos_weight=-1, 67 | debug=False) 68 | 69 | test_cfg = dict( 70 | nms_pre=1000, 71 | min_bbox_size=0, 72 | score_thr=0.05, 73 | nms=dict(type='vote', 74 | iou_threshold=0.65, 75 | cluster_score=['cls', 'iou'], 76 | vote_score=['iou', 'cls'], 77 | iou_enable=False, 78 | sima=0.025,), 79 | max_per_img=100) 80 | 81 | 82 | data_root = 'data/tless/' 83 | data = dict( 84 | samples_per_gpu=16, 85 | workers_per_gpu=8, 86 | train=dict( 87 | ann_file=data_root + 'detector_annotations/train_pbr.json', 88 | img_prefix=data_root + 'train_pbr/', 89 | seg_prefix=data_root + 'train_pbr/', 90 | classes=CLASS_NAMES, 91 | min_visib_frac=0.1, 92 | ), 93 | val=dict( 94 | ann_file=data_root +'detector_annotations/test_bop19.json', 95 | img_prefix=data_root + 'test_primesense/', 96 | classes=CLASS_NAMES, 97 | ), 98 | test=dict( 99 | ann_file=data_root + 'detector_annotations/test_bop19.json', 100 | img_prefix=data_root + 'test_primesense/', 101 | classes=CLASS_NAMES, 102 | ) 103 | ) 104 | 105 | 106 | work_dir = 'work_dirs/tless_r50_radet_pbr' -------------------------------------------------------------------------------- /configs/bop/r50_hb_pbr.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../base/datasets/bop_detection.py', 3 | '../base/default_runtime.py'] 4 | 5 | 6 | OBJ_NUM = 33 7 | CLASS_NAMES = tuple([i+1 for i in range(OBJ_NUM)]) 8 | TARGET_CLASS_NAMES= [1, 3, 4, 8, 9, 10, 12, 15, 17, 18, 19, 22, 23, 29, 32, 33] 9 | 10 | model = dict( 11 | type='RADet', 12 | pretrained='torchvision://resnet50', 13 | backbone=dict( 14 | type='ResNet', 15 | depth=50, 16 | num_stages=4, 17 | out_indices=(0, 1, 2, 3), 18 | frozen_stages=1, 19 | norm_cfg=dict(type='BN', requires_grad=True), 20 | norm_eval=True, 21 | style='pytorch'), 22 | neck=dict( 23 | type='FPN', 24 | in_channels=[256, 512, 1024, 2048], 25 | out_channels=256, 26 | start_level=1, 27 | add_extra_convs='on_output', 28 | num_outs=5), 29 | bbox_head=dict( 30 | type='RADetHead', 31 | num_classes=16, 32 | in_channels=256, 33 | stacked_convs=4, 34 | feat_channels=256, 35 | strides=[8, 16, 32, 64, 128], 36 | anchor_generator=dict( 37 | type='AnchorGenerator', 38 | ratios=[1.0], 39 | octave_base_scale=8, 40 | scales_per_octave=1, 41 | strides=[8, 16, 32, 64, 128]), 42 | bbox_coder=dict( 43 | type='TBLRBBoxCoder', 44 | normalizer=1/8), 45 | loss_cls=dict( 46 | type='FocalLoss', 47 | use_sigmoid=True, 48 | gamma=2.0, 49 | alpha=0.25, 50 | loss_weight=1.0, 51 | ), 52 | loss_bbox=dict(type='GIoULoss', loss_weight=2.0), 53 | loss_centerness=dict( 54 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), 55 | ), 56 | ) 57 | 58 | train_cfg = dict( 59 | assigner=dict( 60 | type='MaxIoUAssigner', 61 | pos_iou_thr=0.5, 62 | neg_iou_thr=0.4, 63 | min_pos_iou=0, 64 | ignore_iof_thr=-1), 65 | allowed_border=-1, 66 | pos_weight=-1, 67 | debug=False) 68 | 69 | test_cfg = dict( 70 | nms_pre=1000, 71 | min_bbox_size=0, 72 | score_thr=0.05, 73 | nms=dict(type='vote', 74 | iou_threshold=0.65, 75 | cluster_score=['cls', 'iou'], 76 | vote_score=['iou', 'cls'], 77 | iou_enable=False, 78 | sima=0.025,), 79 | max_per_img=100) 80 | 81 | 82 | data_root = 'data/hb/' 83 | data = dict( 84 | samples_per_gpu=16, 85 | workers_per_gpu=8, 86 | train=dict( 87 | ann_file=data_root + 'detector_annotations/train_pbr.json', 88 | img_prefix=data_root + 'train_pbr/', 89 | seg_prefix=data_root + 'train_pbr/', 90 | classes=TARGET_CLASS_NAMES, 91 | min_visib_frac=0.1, 92 | ), 93 | val=dict( 94 | ann_file=data_root +'detector_annotations/val.json', 95 | img_prefix=data_root + 'val_primesense/', 96 | classes=TARGET_CLASS_NAMES, 97 | ), 98 | test=dict( 99 | ann_file=data_root + 'detector_annotations/test_bop19.json', 100 | img_prefix=data_root + 'test_primesense/', 101 | classes=TARGET_CLASS_NAMES, 102 | bop_submission=True, 103 | ) 104 | ) 105 | 106 | 107 | work_dir = 'work_dirs/hb_r50_radet' -------------------------------------------------------------------------------- /radet/models/losses/utils.py: -------------------------------------------------------------------------------- 1 | import functools 2 | 3 | import torch.nn.functional as F 4 | 5 | 6 | def reduce_loss(loss, reduction): 7 | """Reduce loss as specified. 8 | 9 | Args: 10 | loss (Tensor): Elementwise loss tensor. 11 | reduction (str): Options are "none", "mean" and "sum". 12 | 13 | Return: 14 | Tensor: Reduced loss tensor. 15 | """ 16 | reduction_enum = F._Reduction.get_enum(reduction) 17 | # none: 0, elementwise_mean:1, sum: 2 18 | if reduction_enum == 0: 19 | return loss 20 | elif reduction_enum == 1: 21 | return loss.mean() 22 | elif reduction_enum == 2: 23 | return loss.sum() 24 | 25 | 26 | def weight_reduce_loss(loss, weight=None, reduction='mean', avg_factor=None): 27 | """Apply element-wise weight and reduce loss. 28 | 29 | Args: 30 | loss (Tensor): Element-wise loss. 31 | weight (Tensor): Element-wise weights. 32 | reduction (str): Same as built-in losses of PyTorch. 33 | avg_factor (float): Avarage factor when computing the mean of losses. 34 | 35 | Returns: 36 | Tensor: Processed loss values. 37 | """ 38 | # if weight is specified, apply element-wise weight 39 | if weight is not None: 40 | loss = loss * weight 41 | 42 | # if avg_factor is not specified, just reduce the loss 43 | if avg_factor is None: 44 | loss = reduce_loss(loss, reduction) 45 | else: 46 | # if reduction is mean, then average the loss by avg_factor 47 | if reduction == 'mean': 48 | loss = loss.sum() / avg_factor 49 | # if reduction is 'none', then do nothing, otherwise raise an error 50 | elif reduction != 'none': 51 | raise ValueError('avg_factor can not be used with reduction="sum"') 52 | return loss 53 | 54 | 55 | def weighted_loss(loss_func): 56 | """Create a weighted version of a given loss function. 57 | 58 | To use this decorator, the loss function must have the signature like 59 | `loss_func(pred, target, **kwargs)`. The function only needs to compute 60 | element-wise loss without any reduction. This decorator will add weight 61 | and reduction arguments to the function. The decorated function will have 62 | the signature like `loss_func(pred, target, weight=None, reduction='mean', 63 | avg_factor=None, **kwargs)`. 64 | 65 | :Example: 66 | 67 | >>> import torch 68 | >>> @weighted_loss 69 | >>> def l1_loss(pred, target): 70 | >>> return (pred - target).abs() 71 | 72 | >>> pred = torch.Tensor([0, 2, 3]) 73 | >>> target = torch.Tensor([1, 1, 1]) 74 | >>> weight = torch.Tensor([1, 0, 1]) 75 | 76 | >>> l1_loss(pred, target) 77 | tensor(1.3333) 78 | >>> l1_loss(pred, target, weight) 79 | tensor(1.) 80 | >>> l1_loss(pred, target, reduction='none') 81 | tensor([1., 1., 2.]) 82 | >>> l1_loss(pred, target, weight, avg_factor=2) 83 | tensor(1.5000) 84 | """ 85 | 86 | @functools.wraps(loss_func) 87 | def wrapper(pred, 88 | target, 89 | weight=None, 90 | reduction='mean', 91 | avg_factor=None, 92 | **kwargs): 93 | # get element-wise loss 94 | loss = loss_func(pred, target, **kwargs) 95 | loss = weight_reduce_loss(loss, weight, reduction, avg_factor) 96 | return loss 97 | 98 | return wrapper 99 | -------------------------------------------------------------------------------- /configs/bop/r50_lmo_pbr.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../base/datasets/bop_detection.py', 3 | '../base/default_runtime.py'] 4 | 5 | 6 | OBJ_NUM = 15 7 | CLASS_NAMES = ('ape', 'benchvise', 'bowl', 'cam', 'can', 'cat', 'cup', 'driller', 'duck', 'eggbox', 'glue', 'holepuncher', 'iron','lamp', 'phone') 8 | TARGET_CLASS_NAMES = ['ape', 'can', 'cat', 'driller', 'duck', 'eggbox', 'glue', 'holepuncher'] 9 | 10 | 11 | model = dict( 12 | type='RADet', 13 | pretrained='torchvision://resnet50', 14 | backbone=dict( 15 | type='ResNet', 16 | depth=50, 17 | num_stages=4, 18 | out_indices=(0, 1, 2, 3), 19 | frozen_stages=1, 20 | norm_cfg=dict(type='BN', requires_grad=True), 21 | norm_eval=True, 22 | style='pytorch'), 23 | neck=dict( 24 | type='FPN', 25 | in_channels=[256, 512, 1024, 2048], 26 | out_channels=256, 27 | start_level=1, 28 | add_extra_convs='on_output', 29 | num_outs=5), 30 | bbox_head=dict( 31 | type='RADetHead', 32 | num_classes=8, 33 | in_channels=256, 34 | stacked_convs=4, 35 | feat_channels=256, 36 | strides=[8, 16, 32, 64, 128], 37 | anchor_generator=dict( 38 | type='AnchorGenerator', 39 | ratios=[1.0], 40 | octave_base_scale=8, 41 | scales_per_octave=1, 42 | strides=[8, 16, 32, 64, 128]), 43 | bbox_coder=dict( 44 | type='TBLRBBoxCoder', 45 | normalizer=1/8), 46 | loss_cls=dict( 47 | type='FocalLoss', 48 | use_sigmoid=True, 49 | gamma=2.0, 50 | alpha=0.25, 51 | loss_weight=1.0, 52 | ), 53 | loss_bbox=dict(type='GIoULoss', loss_weight=2.0), 54 | loss_centerness=dict( 55 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), 56 | ), 57 | ) 58 | 59 | train_cfg = dict( 60 | assigner=dict( 61 | type='MaxIoUAssigner', 62 | pos_iou_thr=0.5, 63 | neg_iou_thr=0.4, 64 | min_pos_iou=0, 65 | ignore_iof_thr=-1), 66 | allowed_border=-1, 67 | pos_weight=-1, 68 | debug=False) 69 | 70 | test_cfg = dict( 71 | nms_pre=1000, 72 | min_bbox_size=0, 73 | score_thr=0.05, 74 | nms=dict(type='vote', 75 | iou_threshold=0.65, 76 | cluster_score=['cls', 'iou'], 77 | vote_score=['iou', 'cls'], 78 | iou_enable=False, 79 | sima=0.025,), 80 | max_per_img=100) 81 | 82 | 83 | data_root = 'data/lmo/' 84 | data = dict( 85 | samples_per_gpu=16, 86 | workers_per_gpu=8, 87 | train=dict( 88 | ann_file=data_root + 'detector_annotations/train_pbr.json', 89 | img_prefix=data_root + 'train_pbr/', 90 | seg_prefix=data_root + 'train_pbr/', 91 | classes=TARGET_CLASS_NAMES, 92 | min_visib_frac=0.1, 93 | ), 94 | val=dict( 95 | ann_file=data_root +'detector_annotations/test_bop19.json', 96 | img_prefix=data_root + 'test/', 97 | classes=TARGET_CLASS_NAMES, 98 | ), 99 | test=dict( 100 | ann_file=data_root + 'detector_annotations/test_bop19.json', 101 | img_prefix=data_root + 'test/', 102 | classes=TARGET_CLASS_NAMES, 103 | ) 104 | ) 105 | 106 | 107 | work_dir = 'work_dirs/lmo_r50_radet' -------------------------------------------------------------------------------- /radet/ops/cluster/cluster_ext.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | 4 | std::vector nms_cluster(torch::Tensor &bboxes, 5 | torch::Tensor &scores, 6 | torch::Tensor &labels, 7 | float_t nms_threshold){ 8 | auto order_indices = std::get<1>(torch::sort(scores,0,true)); 9 | auto suppressed = torch::zeros_like(scores, torch::kBool); 10 | auto instances_id = torch::zeros_like(scores, torch::kInt64); 11 | auto clusters_num = torch::zeros_like(scores, torch::kInt64); 12 | 13 | auto x1 = bboxes.select(1, 0).contiguous(); 14 | auto y1 = bboxes.select(1, 1).contiguous(); 15 | auto x2 = bboxes.select(1, 2).contiguous(); 16 | auto y2 = bboxes.select(1, 3).contiguous(); 17 | 18 | auto order_indices_t = order_indices.data_ptr(); 19 | auto x1_t = x1.data_ptr(); 20 | auto y1_t = y1.data_ptr(); 21 | auto x2_t = x2.data_ptr(); 22 | auto y2_t = y2.data_ptr(); 23 | auto labels_t = labels.data_ptr(); 24 | auto suppressed_t = suppressed.data_ptr(); 25 | auto instances_id_t = instances_id.data_ptr(); 26 | auto clusters_num_t = clusters_num.data_ptr(); 27 | 28 | int ndets = scores.size(0); 29 | int64_t instance_id = 0; 30 | int64_t cluster_num = 0; 31 | 32 | for (int i=0; i(0), x_r - x_l); 69 | auto inter_h = std::max(static_cast(0), y_b - y_t); 70 | auto inter = inter_w * inter_h; 71 | auto area_j = (x2_j - x1_j) * (y2_j - y1_j); 72 | auto iou = inter /(area_j + area_i - inter); 73 | 74 | if (iou > nms_threshold){ 75 | instances_id_t[index_j] = instance_id; 76 | suppressed_t[index_j] = true; 77 | cluster_num ++; 78 | } 79 | 80 | } 81 | instances_id_t[index] = instance_id; 82 | clusters_num_t[index] = cluster_num; 83 | instance_id ++; 84 | 85 | } 86 | return {instances_id, clusters_num}; 87 | } 88 | 89 | 90 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m){ 91 | m.def("cluster_nms", &nms_cluster, "nms for cluster"); 92 | } -------------------------------------------------------------------------------- /tools/show_bop_detbbox.py: -------------------------------------------------------------------------------- 1 | import json 2 | import argparse 3 | from radet.core.visualization import imshow_det_bboxes 4 | import os 5 | from os import path as osp 6 | import numpy as np 7 | 8 | class_names_cfg = dict( 9 | icbin=('coffee_cup', 'juice_carton'), 10 | tudl= ('dragon', 'frog', 'can'), 11 | lmo=('ape', 'benchvise', 'bowl', 'cam', 'can', 'cat', 'cup', 'driller', 'duck', 'eggbox', 'glue', 'holepuncher', 'iron','lamp', 'phone'), 12 | ycbv= ('master_chef_can', 'cracker_box', 'sugar_box', 'tomato_soup_can', 'mustard_bottle', 'tuna_fish_can', 'pudding_box', 'gelatin_box', 13 | 'potted_meat_can', 'banana', 'pitcher_base', 'bleach_cleanser', 'bowl', 'mug', 'power_drill', 'wood_block', 'scissors', 'large_marker', 14 | 'large_clamp', 'extra_large_clamp', 'foam_brick'), 15 | hb=tuple([str(i+1) for i in range(33)]), 16 | itodd=tuple([str(i+1) for i in range(28)]), 17 | tless=tuple([str(i+1) for i in range(30)]), 18 | ) 19 | 20 | def parse_args(): 21 | parser = argparse.ArgumentParser() 22 | parser.add_argument('image_dir') 23 | parser.add_argument('result_json') 24 | parser.add_argument('save_dir') 25 | parser.add_argument('--show-score-thr', type=float, default=0.3) 26 | parser.add_argument('--dataset', choices=['icbin', 'itodd', 'ycbv', 'lmo', 'tless', 'hb', 'tudl']) 27 | parser.add_argument('--ext', default='jpg') 28 | args = parser.parse_args() 29 | return args 30 | 31 | 32 | if __name__ == '__main__': 33 | args = parse_args() 34 | image_dir, result_json, save_dir, show_score_thr, dataset, ext = args.image_dir, args.result_json, args.save_dir, args.show_score_thr, args.dataset, args.ext 35 | class_names = class_names_cfg[dataset] 36 | with open(result_json, 'r') as f: 37 | detect_result = json.load(f) 38 | 39 | formated_results = dict() 40 | for pred in detect_result: 41 | scene_id, image_id = pred['scene_id'], pred['image_id'] 42 | bbox, score = pred['bbox'], pred['score'] 43 | category_id = pred['category_id'] 44 | if scene_id not in formated_results: 45 | formated_results[scene_id] = {} 46 | if image_id not in formated_results[scene_id]: 47 | formated_results[scene_id][image_id] = {'bbox':[], 'score':[], 'label':[]} 48 | bbox[2] = bbox[0] + bbox[2] 49 | bbox[3] = bbox[1] + bbox[3] 50 | formated_results[scene_id][image_id]['bbox'].append(bbox) 51 | formated_results[scene_id][image_id]['score'].append(score) 52 | formated_results[scene_id][image_id]['label'].append(category_id) 53 | 54 | for scene_id in formated_results: 55 | for image_id in formated_results[scene_id]: 56 | image = osp.join(image_dir, f"{scene_id:06d}", "rgb", f"{image_id:06d}.{ext}") 57 | save_image = osp.join(save_dir, f"{scene_id:06d}", "rgb", f"{image_id:06d}.{ext}") 58 | os.makedirs(osp.dirname(save_image), exist_ok=True) 59 | result = formated_results[scene_id][image_id] 60 | imshow_det_bboxes( 61 | image, 62 | np.concatenate([np.array(result['bbox']).reshape(-1, 4), np.array(result['score']).reshape(-1, 1)], axis=-1), 63 | np.array(result['label']) -1, 64 | score_thr=show_score_thr, 65 | show=False, 66 | out_file=save_image, 67 | class_names=class_names, 68 | bbox_color=(72, 101, 241), 69 | text_color=(72, 101, 241), 70 | ) 71 | -------------------------------------------------------------------------------- /configs/bop/r50_ycbv_pbr.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../base/datasets/bop_detection.py', 3 | '../base/default_runtime.py'] 4 | 5 | 6 | CLASS_NAMES = ('master_chef_can', 'cracker_box', 'sugar_box', 'tomato_soup_can', 'mustard_bottle', 'tuna_fish_can', 'pudding_box', 'gelatin_box', 7 | 'potted_meat_can', 'banana', 'pitcher_base', 'bleach_cleanser', 'bowl', 'mug', 'power_drill', 'wood_block', 'scissors', 'large_marker', 8 | 'large_clamp', 'extra_large_clamp', 'foam_brick') 9 | 10 | 11 | model = dict( 12 | type='RADet', 13 | pretrained='torchvision://resnet50', 14 | backbone=dict( 15 | type='ResNet', 16 | depth=50, 17 | num_stages=4, 18 | out_indices=(0, 1, 2, 3), 19 | frozen_stages=1, 20 | norm_cfg=dict(type='BN', requires_grad=True), 21 | norm_eval=True, 22 | style='pytorch'), 23 | neck=dict( 24 | type='FPN', 25 | in_channels=[256, 512, 1024, 2048], 26 | out_channels=256, 27 | start_level=1, 28 | add_extra_convs='on_output', 29 | num_outs=5), 30 | bbox_head=dict( 31 | type='RADetHead', 32 | num_classes=21, 33 | in_channels=256, 34 | stacked_convs=4, 35 | feat_channels=256, 36 | strides=[8, 16, 32, 64, 128], 37 | anchor_generator=dict( 38 | type='AnchorGenerator', 39 | ratios=[1.0], 40 | octave_base_scale=8, 41 | scales_per_octave=1, 42 | strides=[8, 16, 32, 64, 128]), 43 | bbox_coder=dict( 44 | type='TBLRBBoxCoder', 45 | normalizer=1/8), 46 | loss_cls=dict( 47 | type='FocalLoss', 48 | use_sigmoid=True, 49 | gamma=2.0, 50 | alpha=0.25, 51 | loss_weight=1.0, 52 | ), 53 | loss_bbox=dict(type='GIoULoss', loss_weight=2.0), 54 | loss_centerness=dict( 55 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), 56 | ), 57 | ) 58 | 59 | train_cfg = dict( 60 | assigner=dict( 61 | type='MaxIoUAssigner', 62 | pos_iou_thr=0.5, 63 | neg_iou_thr=0.4, 64 | min_pos_iou=0, 65 | ignore_iof_thr=-1), 66 | allowed_border=-1, 67 | pos_weight=-1, 68 | debug=False) 69 | 70 | test_cfg = dict( 71 | nms_pre=1000, 72 | min_bbox_size=0, 73 | score_thr=0.05, 74 | nms=dict(type='vote', 75 | iou_threshold=0.65, 76 | cluster_score=['cls', 'iou'], 77 | vote_score=['iou', 'cls'], 78 | iou_enable=False, 79 | sima=0.025,), 80 | max_per_img=100) 81 | 82 | 83 | data_root = 'data/ycbv/' 84 | data = dict( 85 | samples_per_gpu=16, 86 | workers_per_gpu=8, 87 | train=dict( 88 | ann_file=data_root + 'detector_annotations/train_real.json', 89 | img_prefix=data_root + 'train_real/', 90 | seg_prefix=data_root + 'train_real/', 91 | classes=CLASS_NAMES, 92 | min_visib_frac=0.1, 93 | ), 94 | val=dict( 95 | ann_file=data_root +'detector_annotations/test_bop19.json', 96 | img_prefix=data_root + 'test/', 97 | classes=CLASS_NAMES, 98 | ), 99 | test=dict( 100 | ann_file=data_root + 'detector_annotations/test_bop19.json', 101 | img_prefix=data_root + 'test/', 102 | classes=CLASS_NAMES, 103 | bop_submission=True, 104 | ) 105 | ) 106 | 107 | work_dir = 'work_dirs/ycbv_r50_radet_pbr' -------------------------------------------------------------------------------- /configs/bop/r50_tudl_mixpbr.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../base/datasets/bop_detection_mix.py', 3 | '../base/default_runtime.py'] 4 | 5 | 6 | CLASS_NAMES = ('dragon', 'frog', 'can') 7 | 8 | model = dict( 9 | type='RADetHead', 10 | pretrained='torchvision://resnet50', 11 | backbone=dict( 12 | type='ResNet', 13 | depth=50, 14 | num_stages=4, 15 | out_indices=(0, 1, 2, 3), 16 | frozen_stages=1, 17 | norm_cfg=dict(type='BN', requires_grad=True), 18 | norm_eval=True, 19 | style='pytorch'), 20 | neck=dict( 21 | type='FPN', 22 | in_channels=[256, 512, 1024, 2048], 23 | out_channels=256, 24 | start_level=1, 25 | add_extra_convs='on_output', 26 | num_outs=5), 27 | bbox_head=dict( 28 | type='RADetHead', 29 | num_classes=3, 30 | in_channels=256, 31 | stacked_convs=4, 32 | feat_channels=256, 33 | strides=[8, 16, 32, 64, 128], 34 | anchor_generator=dict( 35 | type='AnchorGenerator', 36 | ratios=[1.0], 37 | octave_base_scale=8, 38 | scales_per_octave=1, 39 | strides=[8, 16, 32, 64, 128]), 40 | bbox_coder=dict( 41 | type='TBLRBBoxCoder', 42 | normalizer=1/8), 43 | loss_cls=dict( 44 | type='FocalLoss', 45 | use_sigmoid=True, 46 | gamma=2.0, 47 | alpha=0.25, 48 | loss_weight=1.0, 49 | ), 50 | loss_bbox=dict(type='GIoULoss', loss_weight=2.0), 51 | loss_centerness=dict( 52 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), 53 | ), 54 | ) 55 | 56 | train_cfg = dict( 57 | assigner=dict( 58 | type='MaxIoUAssigner', 59 | pos_iou_thr=0.5, 60 | neg_iou_thr=0.4, 61 | min_pos_iou=0, 62 | ignore_iof_thr=-1), 63 | allowed_border=-1, 64 | pos_weight=-1, 65 | debug=False) 66 | 67 | test_cfg = dict( 68 | nms_pre=1000, 69 | min_bbox_size=0, 70 | score_thr=0.05, 71 | nms=dict(type='vote', 72 | iou_threshold=0.65, 73 | cluster_score=['cls', 'iou'], 74 | vote_score=['iou', 'cls'], 75 | iou_enable=False, 76 | sima=0.025,), 77 | max_per_img=100) 78 | 79 | 80 | data_root = 'data/tudl/' 81 | data = dict( 82 | samples_per_gpu=16, 83 | workers_per_gpu=8, 84 | train=dict( 85 | dataset_0=dict( 86 | ann_file=data_root + 'detector_annotations/train_pbr.json', 87 | img_prefix=data_root + 'train_pbr/', 88 | seg_prefix=data_root + 'train_pbr/', 89 | min_visib_frac=0.1, 90 | ratio=1, 91 | classes=CLASS_NAMES, 92 | ), 93 | dataset_1=dict( 94 | ann_file=data_root+'detector_annotations/train_real.json', 95 | img_prefix=data_root + 'train_real/', 96 | seg_prefix=data_root + 'train_real/', 97 | ratio=1, 98 | classes=CLASS_NAMES, 99 | ) 100 | ), 101 | val=dict( 102 | ann_file=data_root +'detector_annotations/test_bop19.json', 103 | img_prefix=data_root + 'test/', 104 | classes=CLASS_NAMES, 105 | ), 106 | test=dict( 107 | ann_file=data_root + 'detector_annotations/test_bop19.json', 108 | img_prefix=data_root + 'test/', 109 | classes=CLASS_NAMES, 110 | ) 111 | ) 112 | 113 | 114 | load_from = 'work_dirs/tudl_r50_radet_pbr/latest.pth' 115 | work_dir = 'work_dirs/tudl_r50_radet_mixpbr' -------------------------------------------------------------------------------- /configs/bop/r50_tless_mixpbr.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../base/datasets/bop_detection_mix.py', 3 | '../base/default_runtime.py'] 4 | 5 | 6 | OBJ_NUM = 30 7 | CLASS_NAMES = tuple([i+1 for i in range(OBJ_NUM)]) 8 | 9 | 10 | model = dict( 11 | type='RADetHead', 12 | pretrained='torchvision://resnet50', 13 | backbone=dict( 14 | type='ResNet', 15 | depth=50, 16 | num_stages=4, 17 | out_indices=(0, 1, 2, 3), 18 | frozen_stages=1, 19 | norm_cfg=dict(type='BN', requires_grad=True), 20 | norm_eval=True, 21 | style='pytorch'), 22 | neck=dict( 23 | type='FPN', 24 | in_channels=[256, 512, 1024, 2048], 25 | out_channels=256, 26 | start_level=1, 27 | add_extra_convs='on_output', 28 | num_outs=5), 29 | bbox_head=dict( 30 | type='RADetHead', 31 | num_classes=30, 32 | in_channels=256, 33 | stacked_convs=4, 34 | feat_channels=256, 35 | strides=[8, 16, 32, 64, 128], 36 | anchor_generator=dict( 37 | type='AnchorGenerator', 38 | ratios=[1.0], 39 | octave_base_scale=8, 40 | scales_per_octave=1, 41 | strides=[8, 16, 32, 64, 128]), 42 | bbox_coder=dict( 43 | type='TBLRBBoxCoder', 44 | normalizer=1/8), 45 | loss_cls=dict( 46 | type='FocalLoss', 47 | use_sigmoid=True, 48 | gamma=2.0, 49 | alpha=0.25, 50 | loss_weight=1.0, 51 | ), 52 | loss_bbox=dict(type='GIoULoss', loss_weight=2.0), 53 | loss_centerness=dict( 54 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), 55 | ), 56 | ) 57 | 58 | train_cfg = dict( 59 | assigner=dict( 60 | type='MaxIoUAssigner', 61 | pos_iou_thr=0.5, 62 | neg_iou_thr=0.4, 63 | min_pos_iou=0, 64 | ignore_iof_thr=-1), 65 | allowed_border=-1, 66 | pos_weight=-1, 67 | debug=False) 68 | 69 | test_cfg = dict( 70 | nms_pre=1000, 71 | min_bbox_size=0, 72 | score_thr=0.05, 73 | nms=dict(type='vote', 74 | iou_threshold=0.65, 75 | cluster_score=['cls', 'iou'], 76 | vote_score=['iou', 'cls'], 77 | iou_enable=False, 78 | sima=0.025,), 79 | max_per_img=100) 80 | 81 | 82 | data_root = 'data/tless/' 83 | 84 | data = dict( 85 | samples_per_gpu=16, 86 | workers_per_gpu=8, 87 | train=dict( 88 | dataset_0=dict( 89 | ann_file=data_root + 'detector_annotations/train_pbr.json', 90 | img_prefix=data_root + 'train_pbr/', 91 | seg_prefix=data_root + 'train_pbr/', 92 | min_visib_frac = 0.1, 93 | ratio=3, 94 | classes=CLASS_NAMES, 95 | ), 96 | dataset_1=dict( 97 | ann_file=data_root+'detector_annotations/train_real.json', 98 | img_prefix=data_root + 'train_primesense/', 99 | seg_prefix=data_root + 'train_primesense/', 100 | ratio=1, 101 | classes=CLASS_NAMES, 102 | ) 103 | ), 104 | val=dict( 105 | ann_file=data_root +'detector_annotations/test_bop19.json', 106 | img_prefix=data_root + 'test_primesense/', 107 | classes=CLASS_NAMES, 108 | ), 109 | test=dict( 110 | ann_file=data_root + 'detector_annotations/test_bop19.json', 111 | img_prefix=data_root + 'test_primesense/', 112 | classes=CLASS_NAMES, 113 | ) 114 | ) 115 | 116 | 117 | load_from = 'work_dirs/tless_r50_radet_pbr/latest.pth' 118 | work_dir = 'work_dirs/tless_r50_radet_mixpbr' -------------------------------------------------------------------------------- /radet/core/bbox/coder/yolo_bbox_coder.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from ..builder import BBOX_CODERS 4 | from .base_bbox_coder import BaseBBoxCoder 5 | 6 | 7 | @BBOX_CODERS.register_module() 8 | class YOLOBBoxCoder(BaseBBoxCoder): 9 | """YOLO BBox coder. 10 | 11 | Following `YOLO `_, this coder divide 12 | image into grids, and encode bbox (x1, y1, x2, y2) into (cx, cy, dw, dh). 13 | cx, cy in [0., 1.], denotes relative center position w.r.t the center of 14 | bboxes. dw, dh are the same as :obj:`DeltaXYWHBBoxCoder`. 15 | 16 | Args: 17 | eps (float): Min value of cx, cy when encoding. 18 | """ 19 | 20 | def __init__(self, eps=1e-6): 21 | super(BaseBBoxCoder, self).__init__() 22 | self.eps = eps 23 | 24 | def encode(self, bboxes, gt_bboxes, stride): 25 | """Get box regression transformation deltas that can be used to 26 | transform the ``bboxes`` into the ``gt_bboxes``. 27 | 28 | Args: 29 | bboxes (torch.Tensor): Source boxes, e.g., anchors. 30 | gt_bboxes (torch.Tensor): Target of the transformation, e.g., 31 | ground-truth boxes. 32 | stride (torch.Tensor | int): Stride of bboxes. 33 | 34 | Returns: 35 | torch.Tensor: Box transformation deltas 36 | """ 37 | 38 | assert bboxes.size(0) == gt_bboxes.size(0) 39 | assert bboxes.size(-1) == gt_bboxes.size(-1) == 4 40 | x_center_gt = (gt_bboxes[..., 0] + gt_bboxes[..., 2]) * 0.5 41 | y_center_gt = (gt_bboxes[..., 1] + gt_bboxes[..., 3]) * 0.5 42 | w_gt = gt_bboxes[..., 2] - gt_bboxes[..., 0] 43 | h_gt = gt_bboxes[..., 3] - gt_bboxes[..., 1] 44 | x_center = (bboxes[..., 0] + bboxes[..., 2]) * 0.5 45 | y_center = (bboxes[..., 1] + bboxes[..., 3]) * 0.5 46 | w = bboxes[..., 2] - bboxes[..., 0] 47 | h = bboxes[..., 3] - bboxes[..., 1] 48 | w_target = torch.log((w_gt / w).clamp(min=self.eps)) 49 | h_target = torch.log((h_gt / h).clamp(min=self.eps)) 50 | x_center_target = ((x_center_gt - x_center) / stride + 0.5).clamp( 51 | self.eps, 1 - self.eps) 52 | y_center_target = ((y_center_gt - y_center) / stride + 0.5).clamp( 53 | self.eps, 1 - self.eps) 54 | encoded_bboxes = torch.stack( 55 | [x_center_target, y_center_target, w_target, h_target], dim=-1) 56 | return encoded_bboxes 57 | 58 | def decode(self, bboxes, pred_bboxes, stride): 59 | """Apply transformation `pred_bboxes` to `boxes`. 60 | 61 | Args: 62 | boxes (torch.Tensor): Basic boxes, e.g. anchors. 63 | pred_bboxes (torch.Tensor): Encoded boxes with shape 64 | stride (torch.Tensor | int): Strides of bboxes. 65 | 66 | Returns: 67 | torch.Tensor: Decoded boxes. 68 | """ 69 | assert pred_bboxes.size(0) == bboxes.size(0) 70 | assert pred_bboxes.size(-1) == bboxes.size(-1) == 4 71 | x_center = (bboxes[..., 0] + bboxes[..., 2]) * 0.5 72 | y_center = (bboxes[..., 1] + bboxes[..., 3]) * 0.5 73 | w = bboxes[..., 2] - bboxes[..., 0] 74 | h = bboxes[..., 3] - bboxes[..., 1] 75 | # Get outputs x, y 76 | x_center_pred = (pred_bboxes[..., 0] - 0.5) * stride + x_center 77 | y_center_pred = (pred_bboxes[..., 1] - 0.5) * stride + y_center 78 | w_pred = torch.exp(pred_bboxes[..., 2]) * w 79 | h_pred = torch.exp(pred_bboxes[..., 3]) * h 80 | 81 | decoded_bboxes = torch.stack( 82 | (x_center_pred - w_pred / 2, y_center_pred - h_pred / 2, 83 | x_center_pred + w_pred / 2, y_center_pred + h_pred / 2), 84 | dim=-1) 85 | 86 | return decoded_bboxes 87 | -------------------------------------------------------------------------------- /radet/datasets/pipelines/instaboost.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from ..builder import PIPELINES 4 | 5 | 6 | @PIPELINES.register_module() 7 | class InstaBoost(object): 8 | r"""Data augmentation method in `InstaBoost: Boosting Instance 9 | Segmentation Via Probability Map Guided Copy-Pasting 10 | `_. 11 | 12 | Refer to https://github.com/GothicAi/Instaboost for implementation details. 13 | """ 14 | 15 | def __init__(self, 16 | action_candidate=('normal', 'horizontal', 'skip'), 17 | action_prob=(1, 0, 0), 18 | scale=(0.8, 1.2), 19 | dx=15, 20 | dy=15, 21 | theta=(-1, 1), 22 | color_prob=0.5, 23 | hflag=False, 24 | aug_ratio=0.5): 25 | try: 26 | import instaboostfast as instaboost 27 | except ImportError: 28 | raise ImportError( 29 | 'Please run "pip install instaboostfast" ' 30 | 'to install instaboostfast first for instaboost augmentation.') 31 | self.cfg = instaboost.InstaBoostConfig(action_candidate, action_prob, 32 | scale, dx, dy, theta, 33 | color_prob, hflag) 34 | self.aug_ratio = aug_ratio 35 | 36 | def _load_anns(self, results): 37 | labels = results['ann_info']['labels'] 38 | masks = results['ann_info']['masks'] 39 | bboxes = results['ann_info']['bboxes'] 40 | n = len(labels) 41 | 42 | anns = [] 43 | for i in range(n): 44 | label = labels[i] 45 | bbox = bboxes[i] 46 | mask = masks[i] 47 | x1, y1, x2, y2 = bbox 48 | # assert (x2 - x1) >= 1 and (y2 - y1) >= 1 49 | bbox = [x1, y1, x2 - x1, y2 - y1] 50 | anns.append({ 51 | 'category_id': label, 52 | 'segmentation': mask, 53 | 'bbox': bbox 54 | }) 55 | 56 | return anns 57 | 58 | def _parse_anns(self, results, anns, img): 59 | gt_bboxes = [] 60 | gt_labels = [] 61 | gt_masks_ann = [] 62 | for ann in anns: 63 | x1, y1, w, h = ann['bbox'] 64 | # TODO: more essential bug need to be fixed in instaboost 65 | if w <= 0 or h <= 0: 66 | continue 67 | bbox = [x1, y1, x1 + w, y1 + h] 68 | gt_bboxes.append(bbox) 69 | gt_labels.append(ann['category_id']) 70 | gt_masks_ann.append(ann['segmentation']) 71 | gt_bboxes = np.array(gt_bboxes, dtype=np.float32) 72 | gt_labels = np.array(gt_labels, dtype=np.int64) 73 | results['ann_info']['labels'] = gt_labels 74 | results['ann_info']['bboxes'] = gt_bboxes 75 | results['ann_info']['masks'] = gt_masks_ann 76 | results['img'] = img 77 | return results 78 | 79 | def __call__(self, results): 80 | img = results['img'] 81 | orig_type = img.dtype 82 | anns = self._load_anns(results) 83 | if np.random.choice([0, 1], p=[1 - self.aug_ratio, self.aug_ratio]): 84 | try: 85 | import instaboostfast as instaboost 86 | except ImportError: 87 | raise ImportError('Please run "pip install instaboostfast" ' 88 | 'to install instaboostfast first.') 89 | anns, img = instaboost.get_new_data( 90 | anns, img.astype(np.uint8), self.cfg, background=None) 91 | 92 | results = self._parse_anns(results, anns, img.astype(orig_type)) 93 | return results 94 | 95 | def __repr__(self): 96 | repr_str = self.__class__.__name__ 97 | repr_str += f'(cfg={self.cfg}, aug_ratio={self.aug_ratio})' 98 | return repr_str 99 | -------------------------------------------------------------------------------- /radet/ops/vote/vote_wrapper.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from . import vote_ext 3 | 4 | 5 | 6 | 7 | def vote_nms(bboxes, cls_scores, labels, nms_cfg, score_factor=None, max_num=0): 8 | nms_cfg_ = nms_cfg.copy() 9 | nms_threshold = nms_cfg_.pop('iou_threshold', 0.6) 10 | cluster_score_type = nms_cfg_.pop('cluster_score', 'cls') 11 | vote_score_type = nms_cfg_.pop('vote_score', 'iou') 12 | iou_enable = nms_cfg_.pop('iou_enable', False) 13 | sigma = nms_cfg_.pop('sigma', 0.025) 14 | if isinstance(cluster_score_type, (list, tuple)): 15 | cluster_score = cls_scores * score_factor 16 | elif cluster_score_type == 'cls': 17 | cluster_score = cls_scores 18 | elif cluster_score_type == 'iou': 19 | cluster_score = score_factor 20 | else: 21 | raise RuntimeError(f"Unexpected cluster score type:{cluster_score_type}") 22 | 23 | if isinstance(vote_score_type, (list, tuple)): 24 | vote_score = (cls_scores * score_factor).clone() 25 | elif vote_score_type == 'cls': 26 | vote_score = cls_scores 27 | elif vote_score_type == 'iou': 28 | vote_score = score_factor 29 | else: 30 | raise RuntimeError(f"Unexpected vote score type:{vote_score_type}") 31 | 32 | voted_bboxes, voted_labels, voted_scores = vote_ext.vote_nms(bboxes, 33 | cluster_score, 34 | vote_score, 35 | labels, 36 | nms_threshold, 37 | iou_enable, 38 | sigma) 39 | voted_bboxes = torch.cat([voted_bboxes, voted_scores.view(-1, 1)], dim=-1) 40 | if max_num > 0: 41 | voted_bboxes = voted_bboxes[:max_num] 42 | voted_labels = voted_labels[:max_num] 43 | return voted_bboxes, voted_labels 44 | 45 | 46 | 47 | def global_vote_nms(bboxes, cls_scores, labels, nms_cfg, score_factor=None, max_num=0): 48 | nms_cfg_ = nms_cfg.copy() 49 | nms_threshold = nms_cfg_.pop('iou_threshold', 0.6) 50 | cluster_score_type = nms_cfg_.pop('cluster_score', 'cls') 51 | vote_score_type = nms_cfg_.pop('vote_score', 'iou') 52 | iou_enable = nms_cfg_.pop('iou_enable', False) 53 | sigma = nms_cfg_.pop('sigma', 0.025) 54 | if isinstance(cluster_score_type, (list, tuple)): 55 | cluster_score = cls_scores * score_factor 56 | elif cluster_score_type == 'cls': 57 | cluster_score = cls_scores 58 | elif cluster_score_type == 'iou': 59 | cluster_score = score_factor 60 | else: 61 | raise RuntimeError(f"Unexpected cluster score type:{cluster_score_type}") 62 | 63 | if isinstance(vote_score_type, (list, tuple)): 64 | vote_score = (cls_scores * score_factor).clone() 65 | elif vote_score_type == 'cls': 66 | vote_score = cls_scores 67 | elif vote_score_type == 'iou': 68 | vote_score = score_factor 69 | else: 70 | raise RuntimeError(f"Unexpected vote score type:{vote_score_type}") 71 | 72 | voted_bboxes, voted_labels, voted_scores = vote_ext.global_vote_nms(bboxes, 73 | cluster_score, 74 | vote_score, 75 | labels, 76 | nms_threshold, 77 | iou_enable, 78 | sigma) 79 | voted_bboxes = torch.cat([voted_bboxes, voted_scores.view(-1, 1)], dim=-1) 80 | if max_num > 0: 81 | voted_bboxes = voted_bboxes[:max_num] 82 | voted_labels = voted_labels[:max_num] 83 | return voted_bboxes, voted_labels -------------------------------------------------------------------------------- /configs/bop/r50_ycbv_mixpbr.py: -------------------------------------------------------------------------------- 1 | _base_ = [ 2 | '../base/datasets/bop_detection_mix.py', 3 | '../base/default_runtime.py'] 4 | 5 | 6 | CLASS_NAMES = ('master_chef_can', 'cracker_box', 'sugar_box', 'tomato_soup_can', 'mustard_bottle', 'tuna_fish_can', 'pudding_box', 'gelatin_box', 7 | 'potted_meat_can', 'banana', 'pitcher_base', 'bleach_cleanser', 'bowl', 'mug', 'power_drill', 'wood_block', 'scissors', 'large_marker', 8 | 'large_clamp', 'extra_large_clamp', 'foam_brick') 9 | 10 | 11 | model = dict( 12 | type='MaskWoAssign', 13 | pretrained='torchvision://resnet50', 14 | backbone=dict( 15 | type='ResNet', 16 | depth=50, 17 | num_stages=4, 18 | out_indices=(0, 1, 2, 3), 19 | frozen_stages=1, 20 | norm_cfg=dict(type='BN', requires_grad=True), 21 | norm_eval=True, 22 | style='pytorch'), 23 | neck=dict( 24 | type='FPN', 25 | in_channels=[256, 512, 1024, 2048], 26 | out_channels=256, 27 | start_level=1, 28 | add_extra_convs='on_output', 29 | num_outs=5), 30 | bbox_head=dict( 31 | type='MaskHeadWoAssign', 32 | num_classes=21, 33 | in_channels=256, 34 | stacked_convs=4, 35 | feat_channels=256, 36 | strides=[8, 16, 32, 64, 128], 37 | anchor_generator=dict( 38 | type='AnchorGenerator', 39 | ratios=[1.0], 40 | octave_base_scale=8, 41 | scales_per_octave=1, 42 | strides=[8, 16, 32, 64, 128]), 43 | bbox_coder=dict( 44 | type='TBLRBBoxCoder', 45 | normalizer=1/8), 46 | loss_cls=dict( 47 | type='FocalLoss', 48 | use_sigmoid=True, 49 | gamma=2.0, 50 | alpha=0.25, 51 | loss_weight=1.0, 52 | ), 53 | loss_bbox=dict(type='GIoULoss', loss_weight=2.0), 54 | loss_centerness=dict( 55 | type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), 56 | ), 57 | ) 58 | 59 | train_cfg = dict( 60 | assigner=dict( 61 | type='MaxIoUAssigner', 62 | pos_iou_thr=0.5, 63 | neg_iou_thr=0.4, 64 | min_pos_iou=0, 65 | ignore_iof_thr=-1), 66 | allowed_border=-1, 67 | pos_weight=-1, 68 | debug=False) 69 | 70 | test_cfg = dict( 71 | nms_pre=1000, 72 | min_bbox_size=0, 73 | score_thr=0.05, 74 | nms=dict(type='vote', 75 | iou_threshold=0.65, 76 | cluster_score=['cls', 'iou'], 77 | vote_score=['iou', 'cls'], 78 | iou_enable=False, 79 | sima=0.025,), 80 | max_per_img=100) 81 | 82 | 83 | data_root = 'data/ycbv/' 84 | data = dict( 85 | samples_per_gpu=16, 86 | workers_per_gpu=8, 87 | train=dict( 88 | dataset_0=dict( 89 | ann_file=data_root+'detector_annotations/train_pbr.json', 90 | img_prefix=data_root + 'train_pbr/', 91 | seg_prefix=data_root + 'train_pbr/', 92 | min_visib_frac=0.1, 93 | ratio=2, 94 | classes=CLASS_NAMES, 95 | ), 96 | dataset_1=dict( 97 | ann_file=data_root + 'detector_annotations/train_real.json', 98 | img_prefix=data_root + 'train_real/', 99 | seg_prefix=data_root + 'train_real/', 100 | ratio=1, 101 | classes=CLASS_NAMES, 102 | ) 103 | ), 104 | val=dict( 105 | ann_file=data_root +'detector_annotations/test_bop19.json', 106 | img_prefix=data_root + 'test/', 107 | classes=CLASS_NAMES, 108 | ), 109 | test=dict( 110 | ann_file=data_root + 'detector_annotations/test_bop19.json', 111 | img_prefix=data_root + 'test/', 112 | classes=CLASS_NAMES, 113 | ) 114 | ) 115 | 116 | 117 | load_from = 'work_dirs/ycbv_r50_radet_pbr/latest.pth' 118 | work_dir = 'work_dirs/ycbv_r50_radet_mixpbr' -------------------------------------------------------------------------------- /radet/datasets/voc.py: -------------------------------------------------------------------------------- 1 | from collections import OrderedDict 2 | 3 | from mmcv.utils import print_log 4 | 5 | from radet.core import eval_map, eval_recalls 6 | from .builder import DATASETS 7 | from .xml_style import XMLDataset 8 | 9 | 10 | @DATASETS.register_module() 11 | class VOCDataset(XMLDataset): 12 | 13 | CLASSES = ('aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 14 | 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 15 | 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 16 | 'tvmonitor') 17 | 18 | def __init__(self, **kwargs): 19 | super(VOCDataset, self).__init__(**kwargs) 20 | if 'VOC2007' in self.img_prefix: 21 | self.year = 2007 22 | elif 'VOC2012' in self.img_prefix: 23 | self.year = 2012 24 | else: 25 | raise ValueError('Cannot infer dataset year from img_prefix') 26 | 27 | def evaluate(self, 28 | results, 29 | metric='mAP', 30 | logger=None, 31 | proposal_nums=(100, 300, 1000), 32 | iou_thr=0.5, 33 | scale_ranges=None): 34 | """Evaluate in VOC protocol. 35 | 36 | Args: 37 | results (list[list | tuple]): Testing results of the dataset. 38 | metric (str | list[str]): Metrics to be evaluated. Options are 39 | 'mAP', 'recall'. 40 | logger (logging.Logger | str, optional): Logger used for printing 41 | related information during evaluation. Default: None. 42 | proposal_nums (Sequence[int]): Proposal number used for evaluating 43 | recalls, such as recall@100, recall@1000. 44 | Default: (100, 300, 1000). 45 | iou_thr (float | list[float]): IoU threshold. Default: 0.5. 46 | scale_ranges (list[tuple], optional): Scale ranges for evaluating 47 | mAP. If not specified, all bounding boxes would be included in 48 | evaluation. Default: None. 49 | 50 | Returns: 51 | dict[str, float]: AP/recall metrics. 52 | """ 53 | 54 | if not isinstance(metric, str): 55 | assert len(metric) == 1 56 | metric = metric[0] 57 | allowed_metrics = ['mAP', 'recall'] 58 | if metric not in allowed_metrics: 59 | raise KeyError(f'metric {metric} is not supported') 60 | annotations = [self.get_ann_info(i) for i in range(len(self))] 61 | eval_results = OrderedDict() 62 | iou_thrs = [iou_thr] if isinstance(iou_thr, float) else iou_thr 63 | if metric == 'mAP': 64 | assert isinstance(iou_thrs, list) 65 | if self.year == 2007: 66 | ds_name = 'voc07' 67 | else: 68 | ds_name = self.CLASSES 69 | mean_aps = [] 70 | for iou_thr in iou_thrs: 71 | print_log(f'\n{"-" * 15}iou_thr: {iou_thr}{"-" * 15}') 72 | mean_ap, _ = eval_map( 73 | results, 74 | annotations, 75 | scale_ranges=None, 76 | iou_thr=iou_thr, 77 | dataset=ds_name, 78 | logger=logger) 79 | mean_aps.append(mean_ap) 80 | eval_results[f'AP{int(iou_thr * 100):02d}'] = round(mean_ap, 3) 81 | eval_results['mAP'] = sum(mean_aps) / len(mean_aps) 82 | elif metric == 'recall': 83 | gt_bboxes = [ann['bboxes'] for ann in annotations] 84 | recalls = eval_recalls( 85 | gt_bboxes, results, proposal_nums, iou_thr, logger=logger) 86 | for i, num in enumerate(proposal_nums): 87 | for j, iou in enumerate(iou_thr): 88 | eval_results[f'recall@{num}@{iou}'] = recalls[i, j] 89 | if recalls.shape[1] > 1: 90 | ar = recalls.mean(axis=1) 91 | for i, num in enumerate(proposal_nums): 92 | eval_results[f'AR@{num}'] = ar[i] 93 | return eval_results 94 | -------------------------------------------------------------------------------- /radet/models/utils/res_layer.py: -------------------------------------------------------------------------------- 1 | from mmcv.cnn import build_conv_layer, build_norm_layer 2 | from torch import nn as nn 3 | 4 | 5 | class ResLayer(nn.Sequential): 6 | """ResLayer to build ResNet style backbone. 7 | 8 | Args: 9 | block (nn.Module): block used to build ResLayer. 10 | inplanes (int): inplanes of block. 11 | planes (int): planes of block. 12 | num_blocks (int): number of blocks. 13 | stride (int): stride of the first block. Default: 1 14 | avg_down (bool): Use AvgPool instead of stride conv when 15 | downsampling in the bottleneck. Default: False 16 | conv_cfg (dict): dictionary to construct and config conv layer. 17 | Default: None 18 | norm_cfg (dict): dictionary to construct and config norm layer. 19 | Default: dict(type='BN') 20 | downsample_first (bool): Downsample at the first block or last block. 21 | False for Hourglass, True for ResNet. Default: True 22 | """ 23 | 24 | def __init__(self, 25 | block, 26 | inplanes, 27 | planes, 28 | num_blocks, 29 | stride=1, 30 | avg_down=False, 31 | conv_cfg=None, 32 | norm_cfg=dict(type='BN'), 33 | downsample_first=True, 34 | **kwargs): 35 | self.block = block 36 | 37 | downsample = None 38 | if stride != 1 or inplanes != planes * block.expansion: 39 | downsample = [] 40 | conv_stride = stride 41 | if avg_down: 42 | conv_stride = 1 43 | downsample.append( 44 | nn.AvgPool2d( 45 | kernel_size=stride, 46 | stride=stride, 47 | ceil_mode=True, 48 | count_include_pad=False)) 49 | downsample.extend([ 50 | build_conv_layer( 51 | conv_cfg, 52 | inplanes, 53 | planes * block.expansion, 54 | kernel_size=1, 55 | stride=conv_stride, 56 | bias=False), 57 | build_norm_layer(norm_cfg, planes * block.expansion)[1] 58 | ]) 59 | downsample = nn.Sequential(*downsample) 60 | 61 | layers = [] 62 | if downsample_first: 63 | layers.append( 64 | block( 65 | inplanes=inplanes, 66 | planes=planes, 67 | stride=stride, 68 | downsample=downsample, 69 | conv_cfg=conv_cfg, 70 | norm_cfg=norm_cfg, 71 | **kwargs)) 72 | inplanes = planes * block.expansion 73 | for _ in range(1, num_blocks): 74 | layers.append( 75 | block( 76 | inplanes=inplanes, 77 | planes=planes, 78 | stride=1, 79 | conv_cfg=conv_cfg, 80 | norm_cfg=norm_cfg, 81 | **kwargs)) 82 | 83 | else: # downsample_first=False is for HourglassModule 84 | for _ in range(num_blocks - 1): 85 | layers.append( 86 | block( 87 | inplanes=inplanes, 88 | planes=inplanes, 89 | stride=1, 90 | conv_cfg=conv_cfg, 91 | norm_cfg=norm_cfg, 92 | **kwargs)) 93 | layers.append( 94 | block( 95 | inplanes=inplanes, 96 | planes=planes, 97 | stride=stride, 98 | downsample=downsample, 99 | conv_cfg=conv_cfg, 100 | norm_cfg=norm_cfg, 101 | **kwargs)) 102 | super(ResLayer, self).__init__(*layers) 103 | -------------------------------------------------------------------------------- /radet/utils/util_mixins.py: -------------------------------------------------------------------------------- 1 | """This module defines the :class:`NiceRepr` mixin class, which defines a 2 | ``__repr__`` and ``__str__`` method that only depend on a custom ``__nice__`` 3 | method, which you must define. This means you only have to overload one 4 | function instead of two. Furthermore, if the object defines a ``__len__`` 5 | method, then the ``__nice__`` method defaults to something sensible, otherwise 6 | it is treated as abstract and raises ``NotImplementedError``. 7 | 8 | To use simply have your object inherit from :class:`NiceRepr` 9 | (multi-inheritance should be ok). 10 | 11 | This code was copied from the ubelt library: https://github.com/Erotemic/ubelt 12 | 13 | Example: 14 | >>> # Objects that define __nice__ have a default __str__ and __repr__ 15 | >>> class Student(NiceRepr): 16 | ... def __init__(self, name): 17 | ... self.name = name 18 | ... def __nice__(self): 19 | ... return self.name 20 | >>> s1 = Student('Alice') 21 | >>> s2 = Student('Bob') 22 | >>> print(f's1 = {s1}') 23 | >>> print(f's2 = {s2}') 24 | s1 = 25 | s2 = 26 | 27 | Example: 28 | >>> # Objects that define __len__ have a default __nice__ 29 | >>> class Group(NiceRepr): 30 | ... def __init__(self, data): 31 | ... self.data = data 32 | ... def __len__(self): 33 | ... return len(self.data) 34 | >>> g = Group([1, 2, 3]) 35 | >>> print(f'g = {g}') 36 | g = 37 | """ 38 | import warnings 39 | 40 | 41 | class NiceRepr(object): 42 | """Inherit from this class and define ``__nice__`` to "nicely" print your 43 | objects. 44 | 45 | Defines ``__str__`` and ``__repr__`` in terms of ``__nice__`` function 46 | Classes that inherit from :class:`NiceRepr` should redefine ``__nice__``. 47 | If the inheriting class has a ``__len__``, method then the default 48 | ``__nice__`` method will return its length. 49 | 50 | Example: 51 | >>> class Foo(NiceRepr): 52 | ... def __nice__(self): 53 | ... return 'info' 54 | >>> foo = Foo() 55 | >>> assert str(foo) == '' 56 | >>> assert repr(foo).startswith('>> class Bar(NiceRepr): 60 | ... pass 61 | >>> bar = Bar() 62 | >>> import pytest 63 | >>> with pytest.warns(None) as record: 64 | >>> assert 'object at' in str(bar) 65 | >>> assert 'object at' in repr(bar) 66 | 67 | Example: 68 | >>> class Baz(NiceRepr): 69 | ... def __len__(self): 70 | ... return 5 71 | >>> baz = Baz() 72 | >>> assert str(baz) == '' 73 | """ 74 | 75 | def __nice__(self): 76 | """str: a "nice" summary string describing this module""" 77 | if hasattr(self, '__len__'): 78 | # It is a common pattern for objects to use __len__ in __nice__ 79 | # As a convenience we define a default __nice__ for these objects 80 | return str(len(self)) 81 | else: 82 | # In all other cases force the subclass to overload __nice__ 83 | raise NotImplementedError( 84 | f'Define the __nice__ method for {self.__class__!r}') 85 | 86 | def __repr__(self): 87 | """str: the string of the module""" 88 | try: 89 | nice = self.__nice__() 90 | classname = self.__class__.__name__ 91 | return f'<{classname}({nice}) at {hex(id(self))}>' 92 | except NotImplementedError as ex: 93 | warnings.warn(str(ex), category=RuntimeWarning) 94 | return object.__repr__(self) 95 | 96 | def __str__(self): 97 | """str: the string of the module""" 98 | try: 99 | classname = self.__class__.__name__ 100 | nice = self.__nice__() 101 | return f'<{classname}({nice})>' 102 | except NotImplementedError as ex: 103 | warnings.warn(str(ex), category=RuntimeWarning) 104 | return object.__repr__(self) 105 | -------------------------------------------------------------------------------- /radet/models/dense_heads/dense_test_mixins.py: -------------------------------------------------------------------------------- 1 | from inspect import signature 2 | 3 | import torch 4 | 5 | from radet.core import bbox2result, bbox_mapping_back, multiclass_nms 6 | 7 | 8 | class BBoxTestMixin(object): 9 | """Mixin class for test time augmentation of bboxes.""" 10 | 11 | def merge_aug_bboxes(self, aug_bboxes, aug_scores, img_metas): 12 | """Merge augmented detection bboxes and scores. 13 | 14 | Args: 15 | aug_bboxes (list[Tensor]): shape (n, 4*#class) 16 | aug_scores (list[Tensor] or None): shape (n, #class) 17 | img_shapes (list[Tensor]): shape (3, ). 18 | 19 | Returns: 20 | tuple: (bboxes, scores) 21 | """ 22 | recovered_bboxes = [] 23 | for bboxes, img_info in zip(aug_bboxes, img_metas): 24 | img_shape = img_info[0]['img_shape'] 25 | scale_factor = img_info[0]['scale_factor'] 26 | flip = img_info[0]['flip'] 27 | flip_direction = img_info[0]['flip_direction'] 28 | bboxes = bbox_mapping_back(bboxes, img_shape, scale_factor, flip, 29 | flip_direction) 30 | recovered_bboxes.append(bboxes) 31 | bboxes = torch.cat(recovered_bboxes, dim=0) 32 | if aug_scores is None: 33 | return bboxes 34 | else: 35 | scores = torch.cat(aug_scores, dim=0) 36 | return bboxes, scores 37 | 38 | def aug_test_bboxes(self, feats, img_metas, rescale=False): 39 | """Test det bboxes with test time augmentation. 40 | 41 | Args: 42 | feats (list[Tensor]): the outer list indicates test-time 43 | augmentations and inner Tensor should have a shape NxCxHxW, 44 | which contains features for all images in the batch. 45 | img_metas (list[list[dict]]): the outer list indicates test-time 46 | augs (multiscale, flip, etc.) and the inner list indicates 47 | images in a batch. each dict has image information. 48 | rescale (bool, optional): Whether to rescale the results. 49 | Defaults to False. 50 | 51 | Returns: 52 | list[ndarray]: bbox results of each class 53 | """ 54 | # check with_nms argument 55 | gb_sig = signature(self.get_bboxes) 56 | gb_args = [p.name for p in gb_sig.parameters.values()] 57 | gbs_sig = signature(self._get_bboxes_single) 58 | gbs_args = [p.name for p in gbs_sig.parameters.values()] 59 | assert ('with_nms' in gb_args) and ('with_nms' in gbs_args), \ 60 | f'{self.__class__.__name__}' \ 61 | ' does not support test-time augmentation' 62 | 63 | aug_bboxes = [] 64 | aug_scores = [] 65 | aug_factors = [] # score_factors for NMS 66 | for x, img_meta in zip(feats, img_metas): 67 | # only one image in the batch 68 | outs = self.forward(x) 69 | bbox_inputs = outs + (img_meta, self.test_cfg, False, False) 70 | bbox_outputs = self.get_bboxes(*bbox_inputs)[0] 71 | aug_bboxes.append(bbox_outputs[0]) 72 | aug_scores.append(bbox_outputs[1]) 73 | # bbox_outputs of some detectors (e.g., ATSS, FCOS, YOLOv3) 74 | # contains additional element to adjust scores before NMS 75 | if len(bbox_outputs) >= 3: 76 | aug_factors.append(bbox_outputs[2]) 77 | 78 | # after merging, bboxes will be rescaled to the original image size 79 | merged_bboxes, merged_scores = self.merge_aug_bboxes( 80 | aug_bboxes, aug_scores, img_metas) 81 | merged_factors = torch.cat(aug_factors, dim=0) if aug_factors else None 82 | det_bboxes, det_labels = multiclass_nms( 83 | merged_bboxes, 84 | merged_scores, 85 | self.test_cfg.score_thr, 86 | self.test_cfg.nms, 87 | self.test_cfg.max_per_img, 88 | score_factors=merged_factors) 89 | 90 | if rescale: 91 | _det_bboxes = det_bboxes 92 | else: 93 | _det_bboxes = det_bboxes.clone() 94 | _det_bboxes[:, :4] *= det_bboxes.new_tensor( 95 | img_metas[0][0]['scale_factor']) 96 | bbox_results = bbox2result(_det_bboxes, det_labels, self.num_classes) 97 | return bbox_results 98 | -------------------------------------------------------------------------------- /radet/core/bbox/samplers/base_sampler.py: -------------------------------------------------------------------------------- 1 | from abc import ABCMeta, abstractmethod 2 | 3 | import torch 4 | 5 | from .sampling_result import SamplingResult 6 | 7 | 8 | class BaseSampler(metaclass=ABCMeta): 9 | """Base class of samplers.""" 10 | 11 | def __init__(self, 12 | num, 13 | pos_fraction, 14 | neg_pos_ub=-1, 15 | add_gt_as_proposals=True, 16 | **kwargs): 17 | self.num = num 18 | self.pos_fraction = pos_fraction 19 | self.neg_pos_ub = neg_pos_ub 20 | self.add_gt_as_proposals = add_gt_as_proposals 21 | self.pos_sampler = self 22 | self.neg_sampler = self 23 | 24 | @abstractmethod 25 | def _sample_pos(self, assign_result, num_expected, **kwargs): 26 | """Sample positive samples.""" 27 | pass 28 | 29 | @abstractmethod 30 | def _sample_neg(self, assign_result, num_expected, **kwargs): 31 | """Sample negative samples.""" 32 | pass 33 | 34 | def sample(self, 35 | assign_result, 36 | bboxes, 37 | gt_bboxes, 38 | gt_labels=None, 39 | **kwargs): 40 | """Sample positive and negative bboxes. 41 | 42 | This is a simple implementation of bbox sampling given candidates, 43 | assigning results and ground truth bboxes. 44 | 45 | Args: 46 | assign_result (:obj:`AssignResult`): Bbox assigning results. 47 | bboxes (Tensor): Boxes to be sampled from. 48 | gt_bboxes (Tensor): Ground truth bboxes. 49 | gt_labels (Tensor, optional): Class labels of ground truth bboxes. 50 | 51 | Returns: 52 | :obj:`SamplingResult`: Sampling result. 53 | 54 | Example: 55 | >>> from mmdet.core.bbox import RandomSampler 56 | >>> from mmdet.core.bbox import AssignResult 57 | >>> from mmdet.core.bbox.demodata import ensure_rng, random_boxes 58 | >>> rng = ensure_rng(None) 59 | >>> assign_result = AssignResult.random(rng=rng) 60 | >>> bboxes = random_boxes(assign_result.num_preds, rng=rng) 61 | >>> gt_bboxes = random_boxes(assign_result.num_gts, rng=rng) 62 | >>> gt_labels = None 63 | >>> self = RandomSampler(num=32, pos_fraction=0.5, neg_pos_ub=-1, 64 | >>> add_gt_as_proposals=False) 65 | >>> self = self.sample(assign_result, bboxes, gt_bboxes, gt_labels) 66 | """ 67 | if len(bboxes.shape) < 2: 68 | bboxes = bboxes[None, :] 69 | 70 | bboxes = bboxes[:, :4] 71 | 72 | gt_flags = bboxes.new_zeros((bboxes.shape[0], ), dtype=torch.uint8) 73 | if self.add_gt_as_proposals and len(gt_bboxes) > 0: 74 | if gt_labels is None: 75 | raise ValueError( 76 | 'gt_labels must be given when add_gt_as_proposals is True') 77 | bboxes = torch.cat([gt_bboxes, bboxes], dim=0) 78 | assign_result.add_gt_(gt_labels) 79 | gt_ones = bboxes.new_ones(gt_bboxes.shape[0], dtype=torch.uint8) 80 | gt_flags = torch.cat([gt_ones, gt_flags]) 81 | 82 | num_expected_pos = int(self.num * self.pos_fraction) 83 | pos_inds = self.pos_sampler._sample_pos( 84 | assign_result, num_expected_pos, bboxes=bboxes, **kwargs) 85 | # We found that sampled indices have duplicated items occasionally. 86 | # (may be a bug of PyTorch) 87 | pos_inds = pos_inds.unique() 88 | num_sampled_pos = pos_inds.numel() 89 | num_expected_neg = self.num - num_sampled_pos 90 | if self.neg_pos_ub >= 0: 91 | _pos = max(1, num_sampled_pos) 92 | neg_upper_bound = int(self.neg_pos_ub * _pos) 93 | if num_expected_neg > neg_upper_bound: 94 | num_expected_neg = neg_upper_bound 95 | neg_inds = self.neg_sampler._sample_neg( 96 | assign_result, num_expected_neg, bboxes=bboxes, **kwargs) 97 | neg_inds = neg_inds.unique() 98 | 99 | sampling_result = SamplingResult(pos_inds, neg_inds, bboxes, gt_bboxes, 100 | assign_result, gt_flags) 101 | return sampling_result 102 | -------------------------------------------------------------------------------- /radet/models/backbones/detectors_resnext.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | from mmcv.cnn import build_conv_layer, build_norm_layer 4 | 5 | from ..builder import BACKBONES 6 | from .detectors_resnet import Bottleneck as _Bottleneck 7 | from .detectors_resnet import DetectoRS_ResNet 8 | 9 | 10 | class Bottleneck(_Bottleneck): 11 | expansion = 4 12 | 13 | def __init__(self, 14 | inplanes, 15 | planes, 16 | groups=1, 17 | base_width=4, 18 | base_channels=64, 19 | **kwargs): 20 | """Bottleneck block for ResNeXt. 21 | 22 | If style is "pytorch", the stride-two layer is the 3x3 conv layer, if 23 | it is "caffe", the stride-two layer is the first 1x1 conv layer. 24 | """ 25 | super(Bottleneck, self).__init__(inplanes, planes, **kwargs) 26 | 27 | if groups == 1: 28 | width = self.planes 29 | else: 30 | width = math.floor(self.planes * 31 | (base_width / base_channels)) * groups 32 | 33 | self.norm1_name, norm1 = build_norm_layer( 34 | self.norm_cfg, width, postfix=1) 35 | self.norm2_name, norm2 = build_norm_layer( 36 | self.norm_cfg, width, postfix=2) 37 | self.norm3_name, norm3 = build_norm_layer( 38 | self.norm_cfg, self.planes * self.expansion, postfix=3) 39 | 40 | self.conv1 = build_conv_layer( 41 | self.conv_cfg, 42 | self.inplanes, 43 | width, 44 | kernel_size=1, 45 | stride=self.conv1_stride, 46 | bias=False) 47 | self.add_module(self.norm1_name, norm1) 48 | fallback_on_stride = False 49 | self.with_modulated_dcn = False 50 | if self.with_dcn: 51 | fallback_on_stride = self.dcn.pop('fallback_on_stride', False) 52 | if self.with_sac: 53 | self.conv2 = build_conv_layer( 54 | self.sac, 55 | width, 56 | width, 57 | kernel_size=3, 58 | stride=self.conv2_stride, 59 | padding=self.dilation, 60 | dilation=self.dilation, 61 | groups=groups, 62 | bias=False) 63 | elif not self.with_dcn or fallback_on_stride: 64 | self.conv2 = build_conv_layer( 65 | self.conv_cfg, 66 | width, 67 | width, 68 | kernel_size=3, 69 | stride=self.conv2_stride, 70 | padding=self.dilation, 71 | dilation=self.dilation, 72 | groups=groups, 73 | bias=False) 74 | else: 75 | assert self.conv_cfg is None, 'conv_cfg must be None for DCN' 76 | self.conv2 = build_conv_layer( 77 | self.dcn, 78 | width, 79 | width, 80 | kernel_size=3, 81 | stride=self.conv2_stride, 82 | padding=self.dilation, 83 | dilation=self.dilation, 84 | groups=groups, 85 | bias=False) 86 | 87 | self.add_module(self.norm2_name, norm2) 88 | self.conv3 = build_conv_layer( 89 | self.conv_cfg, 90 | width, 91 | self.planes * self.expansion, 92 | kernel_size=1, 93 | bias=False) 94 | self.add_module(self.norm3_name, norm3) 95 | 96 | 97 | @BACKBONES.register_module() 98 | class DetectoRS_ResNeXt(DetectoRS_ResNet): 99 | """ResNeXt backbone for DetectoRS. 100 | 101 | Args: 102 | groups (int): The number of groups in ResNeXt. 103 | base_width (int): The base width of ResNeXt. 104 | """ 105 | 106 | arch_settings = { 107 | 50: (Bottleneck, (3, 4, 6, 3)), 108 | 101: (Bottleneck, (3, 4, 23, 3)), 109 | 152: (Bottleneck, (3, 8, 36, 3)) 110 | } 111 | 112 | def __init__(self, groups=1, base_width=4, **kwargs): 113 | self.groups = groups 114 | self.base_width = base_width 115 | super(DetectoRS_ResNeXt, self).__init__(**kwargs) 116 | 117 | def make_res_layer(self, **kwargs): 118 | return super().make_res_layer( 119 | groups=self.groups, 120 | base_width=self.base_width, 121 | base_channels=self.base_channels, 122 | **kwargs) 123 | -------------------------------------------------------------------------------- /radet/core/bbox/samplers/ohem_sampler.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from ..builder import BBOX_SAMPLERS 4 | from ..transforms import bbox2roi 5 | from .base_sampler import BaseSampler 6 | 7 | 8 | @BBOX_SAMPLERS.register_module() 9 | class OHEMSampler(BaseSampler): 10 | r"""Online Hard Example Mining Sampler described in `Training Region-based 11 | Object Detectors with Online Hard Example Mining 12 | `_. 13 | """ 14 | 15 | def __init__(self, 16 | num, 17 | pos_fraction, 18 | context, 19 | neg_pos_ub=-1, 20 | add_gt_as_proposals=True, 21 | **kwargs): 22 | super(OHEMSampler, self).__init__(num, pos_fraction, neg_pos_ub, 23 | add_gt_as_proposals) 24 | self.context = context 25 | if not hasattr(self.context, 'num_stages'): 26 | self.bbox_head = self.context.bbox_head 27 | else: 28 | self.bbox_head = self.context.bbox_head[self.context.current_stage] 29 | 30 | def hard_mining(self, inds, num_expected, bboxes, labels, feats): 31 | with torch.no_grad(): 32 | rois = bbox2roi([bboxes]) 33 | if not hasattr(self.context, 'num_stages'): 34 | bbox_results = self.context._bbox_forward(feats, rois) 35 | else: 36 | bbox_results = self.context._bbox_forward( 37 | self.context.current_stage, feats, rois) 38 | cls_score = bbox_results['cls_score'] 39 | loss = self.bbox_head.loss( 40 | cls_score=cls_score, 41 | bbox_pred=None, 42 | rois=rois, 43 | labels=labels, 44 | label_weights=cls_score.new_ones(cls_score.size(0)), 45 | bbox_targets=None, 46 | bbox_weights=None, 47 | reduction_override='none')['loss_cls'] 48 | _, topk_loss_inds = loss.topk(num_expected) 49 | return inds[topk_loss_inds] 50 | 51 | def _sample_pos(self, 52 | assign_result, 53 | num_expected, 54 | bboxes=None, 55 | feats=None, 56 | **kwargs): 57 | """Sample positive boxes. 58 | 59 | Args: 60 | assign_result (:obj:`AssignResult`): Assigned results 61 | num_expected (int): Number of expected positive samples 62 | bboxes (torch.Tensor, optional): Boxes. Defaults to None. 63 | feats (list[torch.Tensor], optional): Multi-level features. 64 | Defaults to None. 65 | 66 | Returns: 67 | torch.Tensor: Indices of positive samples 68 | """ 69 | # Sample some hard positive samples 70 | pos_inds = torch.nonzero(assign_result.gt_inds > 0, as_tuple=False) 71 | if pos_inds.numel() != 0: 72 | pos_inds = pos_inds.squeeze(1) 73 | if pos_inds.numel() <= num_expected: 74 | return pos_inds 75 | else: 76 | return self.hard_mining(pos_inds, num_expected, bboxes[pos_inds], 77 | assign_result.labels[pos_inds], feats) 78 | 79 | def _sample_neg(self, 80 | assign_result, 81 | num_expected, 82 | bboxes=None, 83 | feats=None, 84 | **kwargs): 85 | """Sample negative boxes. 86 | 87 | Args: 88 | assign_result (:obj:`AssignResult`): Assigned results 89 | num_expected (int): Number of expected negative samples 90 | bboxes (torch.Tensor, optional): Boxes. Defaults to None. 91 | feats (list[torch.Tensor], optional): Multi-level features. 92 | Defaults to None. 93 | 94 | Returns: 95 | torch.Tensor: Indices of negative samples 96 | """ 97 | # Sample some hard negative samples 98 | neg_inds = torch.nonzero(assign_result.gt_inds == 0, as_tuple=False) 99 | if neg_inds.numel() != 0: 100 | neg_inds = neg_inds.squeeze(1) 101 | if len(neg_inds) <= num_expected: 102 | return neg_inds 103 | else: 104 | neg_labels = assign_result.labels.new_empty( 105 | neg_inds.size(0)).fill_(self.bbox_head.num_classes) 106 | return self.hard_mining(neg_inds, num_expected, bboxes[neg_inds], 107 | neg_labels, feats) 108 | -------------------------------------------------------------------------------- /radet/utils/contextmanagers.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import contextlib 3 | import logging 4 | import os 5 | import time 6 | from typing import List 7 | 8 | import torch 9 | 10 | logger = logging.getLogger(__name__) 11 | 12 | DEBUG_COMPLETED_TIME = bool(os.environ.get('DEBUG_COMPLETED_TIME', False)) 13 | 14 | 15 | @contextlib.asynccontextmanager 16 | async def completed(trace_name='', 17 | name='', 18 | sleep_interval=0.05, 19 | streams: List[torch.cuda.Stream] = None): 20 | """Async context manager that waits for work to complete on given CUDA 21 | streams.""" 22 | if not torch.cuda.is_available(): 23 | yield 24 | return 25 | 26 | stream_before_context_switch = torch.cuda.current_stream() 27 | if not streams: 28 | streams = [stream_before_context_switch] 29 | else: 30 | streams = [s if s else stream_before_context_switch for s in streams] 31 | 32 | end_events = [ 33 | torch.cuda.Event(enable_timing=DEBUG_COMPLETED_TIME) for _ in streams 34 | ] 35 | 36 | if DEBUG_COMPLETED_TIME: 37 | start = torch.cuda.Event(enable_timing=True) 38 | stream_before_context_switch.record_event(start) 39 | 40 | cpu_start = time.monotonic() 41 | logger.debug('%s %s starting, streams: %s', trace_name, name, streams) 42 | grad_enabled_before = torch.is_grad_enabled() 43 | try: 44 | yield 45 | finally: 46 | current_stream = torch.cuda.current_stream() 47 | assert current_stream == stream_before_context_switch 48 | 49 | if DEBUG_COMPLETED_TIME: 50 | cpu_end = time.monotonic() 51 | for i, stream in enumerate(streams): 52 | event = end_events[i] 53 | stream.record_event(event) 54 | 55 | grad_enabled_after = torch.is_grad_enabled() 56 | 57 | # observed change of torch.is_grad_enabled() during concurrent run of 58 | # async_test_bboxes code 59 | assert (grad_enabled_before == grad_enabled_after 60 | ), 'Unexpected is_grad_enabled() value change' 61 | 62 | are_done = [e.query() for e in end_events] 63 | logger.debug('%s %s completed: %s streams: %s', trace_name, name, 64 | are_done, streams) 65 | with torch.cuda.stream(stream_before_context_switch): 66 | while not all(are_done): 67 | await asyncio.sleep(sleep_interval) 68 | are_done = [e.query() for e in end_events] 69 | logger.debug( 70 | '%s %s completed: %s streams: %s', 71 | trace_name, 72 | name, 73 | are_done, 74 | streams, 75 | ) 76 | 77 | current_stream = torch.cuda.current_stream() 78 | assert current_stream == stream_before_context_switch 79 | 80 | if DEBUG_COMPLETED_TIME: 81 | cpu_time = (cpu_end - cpu_start) * 1000 82 | stream_times_ms = '' 83 | for i, stream in enumerate(streams): 84 | elapsed_time = start.elapsed_time(end_events[i]) 85 | stream_times_ms += f' {stream} {elapsed_time:.2f} ms' 86 | logger.info('%s %s %.2f ms %s', trace_name, name, cpu_time, 87 | stream_times_ms) 88 | 89 | 90 | @contextlib.asynccontextmanager 91 | async def concurrent(streamqueue: asyncio.Queue, 92 | trace_name='concurrent', 93 | name='stream'): 94 | """Run code concurrently in different streams. 95 | 96 | :param streamqueue: asyncio.Queue instance. 97 | 98 | Queue tasks define the pool of streams used for concurrent execution. 99 | """ 100 | if not torch.cuda.is_available(): 101 | yield 102 | return 103 | 104 | initial_stream = torch.cuda.current_stream() 105 | 106 | with torch.cuda.stream(initial_stream): 107 | stream = await streamqueue.get() 108 | assert isinstance(stream, torch.cuda.Stream) 109 | 110 | try: 111 | with torch.cuda.stream(stream): 112 | logger.debug('%s %s is starting, stream: %s', trace_name, name, 113 | stream) 114 | yield 115 | current = torch.cuda.current_stream() 116 | assert current == stream 117 | logger.debug('%s %s has finished, stream: %s', trace_name, 118 | name, stream) 119 | finally: 120 | streamqueue.task_done() 121 | streamqueue.put_nowait(stream) 122 | -------------------------------------------------------------------------------- /radet/core/post_processing/merge_augs.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | from mmcv.ops import nms 4 | 5 | from ..bbox import bbox_mapping_back 6 | 7 | 8 | def merge_aug_proposals(aug_proposals, img_metas, rpn_test_cfg): 9 | """Merge augmented proposals (multiscale, flip, etc.) 10 | 11 | Args: 12 | aug_proposals (list[Tensor]): proposals from different testing 13 | schemes, shape (n, 5). Note that they are not rescaled to the 14 | original image size. 15 | 16 | img_metas (list[dict]): list of image info dict where each dict has: 17 | 'img_shape', 'scale_factor', 'flip', and may also contain 18 | 'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'. 19 | For details on the values of these keys see 20 | `mmdet/datasets/pipelines/formatting.py:Collect`. 21 | 22 | rpn_test_cfg (dict): rpn test config. 23 | 24 | Returns: 25 | Tensor: shape (n, 4), proposals corresponding to original image scale. 26 | """ 27 | recovered_proposals = [] 28 | for proposals, img_info in zip(aug_proposals, img_metas): 29 | img_shape = img_info['img_shape'] 30 | scale_factor = img_info['scale_factor'] 31 | flip = img_info['flip'] 32 | flip_direction = img_info['flip_direction'] 33 | _proposals = proposals.clone() 34 | _proposals[:, :4] = bbox_mapping_back(_proposals[:, :4], img_shape, 35 | scale_factor, flip, 36 | flip_direction) 37 | recovered_proposals.append(_proposals) 38 | aug_proposals = torch.cat(recovered_proposals, dim=0) 39 | merged_proposals, _ = nms(aug_proposals[:, :4].contiguous(), 40 | aug_proposals[:, -1].contiguous(), 41 | rpn_test_cfg.nms_thr) 42 | scores = merged_proposals[:, 4] 43 | _, order = scores.sort(0, descending=True) 44 | num = min(rpn_test_cfg.max_num, merged_proposals.shape[0]) 45 | order = order[:num] 46 | merged_proposals = merged_proposals[order, :] 47 | return merged_proposals 48 | 49 | 50 | def merge_aug_bboxes(aug_bboxes, aug_scores, img_metas, rcnn_test_cfg): 51 | """Merge augmented detection bboxes and scores. 52 | 53 | Args: 54 | aug_bboxes (list[Tensor]): shape (n, 4*#class) 55 | aug_scores (list[Tensor] or None): shape (n, #class) 56 | img_shapes (list[Tensor]): shape (3, ). 57 | rcnn_test_cfg (dict): rcnn test config. 58 | 59 | Returns: 60 | tuple: (bboxes, scores) 61 | """ 62 | recovered_bboxes = [] 63 | for bboxes, img_info in zip(aug_bboxes, img_metas): 64 | img_shape = img_info[0]['img_shape'] 65 | scale_factor = img_info[0]['scale_factor'] 66 | flip = img_info[0]['flip'] 67 | flip_direction = img_info[0]['flip_direction'] 68 | bboxes = bbox_mapping_back(bboxes, img_shape, scale_factor, flip, 69 | flip_direction) 70 | recovered_bboxes.append(bboxes) 71 | bboxes = torch.stack(recovered_bboxes).mean(dim=0) 72 | if aug_scores is None: 73 | return bboxes 74 | else: 75 | scores = torch.stack(aug_scores).mean(dim=0) 76 | return bboxes, scores 77 | 78 | 79 | def merge_aug_scores(aug_scores): 80 | """Merge augmented bbox scores.""" 81 | if isinstance(aug_scores[0], torch.Tensor): 82 | return torch.mean(torch.stack(aug_scores), dim=0) 83 | else: 84 | return np.mean(aug_scores, axis=0) 85 | 86 | 87 | def merge_aug_masks(aug_masks, img_metas, rcnn_test_cfg, weights=None): 88 | """Merge augmented mask prediction. 89 | 90 | Args: 91 | aug_masks (list[ndarray]): shape (n, #class, h, w) 92 | img_shapes (list[ndarray]): shape (3, ). 93 | rcnn_test_cfg (dict): rcnn test config. 94 | 95 | Returns: 96 | tuple: (bboxes, scores) 97 | """ 98 | recovered_masks = [] 99 | for mask, img_info in zip(aug_masks, img_metas): 100 | flip = img_info[0]['flip'] 101 | flip_direction = img_info[0]['flip_direction'] 102 | if flip: 103 | if flip_direction == 'horizontal': 104 | mask = mask[:, :, :, ::-1] 105 | elif flip_direction == 'vertical': 106 | mask = mask[:, :, ::-1, :] 107 | else: 108 | raise ValueError( 109 | f"Invalid flipping direction '{flip_direction}'") 110 | recovered_masks.append(mask) 111 | 112 | if weights is None: 113 | merged_masks = np.mean(recovered_masks, axis=0) 114 | else: 115 | merged_masks = np.average( 116 | np.array(recovered_masks), axis=0, weights=np.array(weights)) 117 | return merged_masks 118 | -------------------------------------------------------------------------------- /radet/datasets/pipelines/test_time_aug.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | 3 | import mmcv 4 | 5 | from ..builder import PIPELINES 6 | from .compose import Compose 7 | 8 | 9 | @PIPELINES.register_module() 10 | class MultiScaleFlipAug(object): 11 | """Test-time augmentation with multiple scales and flipping. 12 | 13 | An example configuration is as followed: 14 | 15 | .. code-block:: 16 | 17 | img_scale=[(1333, 400), (1333, 800)], 18 | flip=True, 19 | transforms=[ 20 | dict(type='Resize', keep_ratio=True), 21 | dict(type='RandomFlip'), 22 | dict(type='Normalize', **img_norm_cfg), 23 | dict(type='Pad', size_divisor=32), 24 | dict(type='ImageToTensor', keys=['img']), 25 | dict(type='Collect', keys=['img']), 26 | ] 27 | 28 | After MultiScaleFLipAug with above configuration, the results are wrapped 29 | into lists of the same length as followed: 30 | 31 | .. code-block:: 32 | 33 | dict( 34 | img=[...], 35 | img_shape=[...], 36 | scale=[(1333, 400), (1333, 400), (1333, 800), (1333, 800)] 37 | flip=[False, True, False, True] 38 | ... 39 | ) 40 | 41 | Args: 42 | transforms (list[dict]): Transforms to apply in each augmentation. 43 | img_scale (tuple | list[tuple] | None): Images scales for resizing. 44 | scale_factor (float | list[float] | None): Scale factors for resizing. 45 | flip (bool): Whether apply flip augmentation. Default: False. 46 | flip_direction (str | list[str]): Flip augmentation directions, 47 | options are "horizontal" and "vertical". If flip_direction is list, 48 | multiple flip augmentations will be applied. 49 | It has no effect when flip == False. Default: "horizontal". 50 | """ 51 | 52 | def __init__(self, 53 | transforms, 54 | img_scale=None, 55 | scale_factor=None, 56 | flip=False, 57 | flip_direction='horizontal'): 58 | self.transforms = Compose(transforms) 59 | assert (img_scale is None) ^ (scale_factor is None), ( 60 | 'Must have but only one variable can be setted') 61 | if img_scale is not None: 62 | self.img_scale = img_scale if isinstance(img_scale, 63 | list) else [img_scale] 64 | self.scale_key = 'scale' 65 | assert mmcv.is_list_of(self.img_scale, tuple) 66 | else: 67 | self.img_scale = scale_factor if isinstance( 68 | scale_factor, list) else [scale_factor] 69 | self.scale_key = 'scale_factor' 70 | 71 | self.flip = flip 72 | self.flip_direction = flip_direction if isinstance( 73 | flip_direction, list) else [flip_direction] 74 | assert mmcv.is_list_of(self.flip_direction, str) 75 | if not self.flip and self.flip_direction != ['horizontal']: 76 | warnings.warn( 77 | 'flip_direction has no effect when flip is set to False') 78 | if (self.flip 79 | and not any([t['type'] == 'RandomFlip' for t in transforms])): 80 | warnings.warn( 81 | 'flip has no effect when RandomFlip is not in transforms') 82 | 83 | def __call__(self, results): 84 | """Call function to apply test time augment transforms on results. 85 | 86 | Args: 87 | results (dict): Result dict contains the data to transform. 88 | 89 | Returns: 90 | dict[str: list]: The augmented data, where each value is wrapped 91 | into a list. 92 | """ 93 | 94 | aug_data = [] 95 | flip_args = [(False, None)] 96 | if self.flip: 97 | flip_args += [(True, direction) 98 | for direction in self.flip_direction] 99 | for scale in self.img_scale: 100 | for flip, direction in flip_args: 101 | _results = results.copy() 102 | _results[self.scale_key] = scale 103 | _results['flip'] = flip 104 | _results['flip_direction'] = direction 105 | data = self.transforms(_results) 106 | aug_data.append(data) 107 | # list of dict to dict of list 108 | aug_data_dict = {key: [] for key in aug_data[0]} 109 | for data in aug_data: 110 | for key, val in data.items(): 111 | aug_data_dict[key].append(val) 112 | return aug_data_dict 113 | 114 | def __repr__(self): 115 | repr_str = self.__class__.__name__ 116 | repr_str += f'(transforms={self.transforms}, ' 117 | repr_str += f'img_scale={self.img_scale}, flip={self.flip})' 118 | repr_str += f'flip_direction={self.flip_direction}' 119 | return repr_str 120 | -------------------------------------------------------------------------------- /radet/models/losses/smooth_l1_loss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | from ..builder import LOSSES 5 | from .utils import weighted_loss 6 | 7 | 8 | @weighted_loss 9 | def smooth_l1_loss(pred, target, beta=1.0): 10 | """Smooth L1 loss. 11 | 12 | Args: 13 | pred (torch.Tensor): The prediction. 14 | target (torch.Tensor): The learning target of the prediction. 15 | beta (float, optional): The threshold in the piecewise function. 16 | Defaults to 1.0. 17 | 18 | Returns: 19 | torch.Tensor: Calculated loss 20 | """ 21 | assert beta > 0 22 | assert pred.size() == target.size() and target.numel() > 0 23 | diff = torch.abs(pred - target) 24 | loss = torch.where(diff < beta, 0.5 * diff * diff / beta, 25 | diff - 0.5 * beta) 26 | return loss 27 | 28 | 29 | @weighted_loss 30 | def l1_loss(pred, target): 31 | """L1 loss. 32 | 33 | Args: 34 | pred (torch.Tensor): The prediction. 35 | target (torch.Tensor): The learning target of the prediction. 36 | 37 | Returns: 38 | torch.Tensor: Calculated loss 39 | """ 40 | assert pred.size() == target.size() and target.numel() > 0 41 | loss = torch.abs(pred - target) 42 | return loss 43 | 44 | 45 | @LOSSES.register_module() 46 | class SmoothL1Loss(nn.Module): 47 | """Smooth L1 loss. 48 | 49 | Args: 50 | beta (float, optional): The threshold in the piecewise function. 51 | Defaults to 1.0. 52 | reduction (str, optional): The method to reduce the loss. 53 | Options are "none", "mean" and "sum". Defaults to "mean". 54 | loss_weight (float, optional): The weight of loss. 55 | """ 56 | 57 | def __init__(self, beta=1.0, reduction='mean', loss_weight=1.0): 58 | super(SmoothL1Loss, self).__init__() 59 | self.beta = beta 60 | self.reduction = reduction 61 | self.loss_weight = loss_weight 62 | 63 | def forward(self, 64 | pred, 65 | target, 66 | weight=None, 67 | avg_factor=None, 68 | reduction_override=None, 69 | **kwargs): 70 | """Forward function. 71 | 72 | Args: 73 | pred (torch.Tensor): The prediction. 74 | target (torch.Tensor): The learning target of the prediction. 75 | weight (torch.Tensor, optional): The weight of loss for each 76 | prediction. Defaults to None. 77 | avg_factor (int, optional): Average factor that is used to average 78 | the loss. Defaults to None. 79 | reduction_override (str, optional): The reduction method used to 80 | override the original reduction method of the loss. 81 | Defaults to None. 82 | """ 83 | assert reduction_override in (None, 'none', 'mean', 'sum') 84 | reduction = ( 85 | reduction_override if reduction_override else self.reduction) 86 | loss_bbox = self.loss_weight * smooth_l1_loss( 87 | pred, 88 | target, 89 | weight, 90 | beta=self.beta, 91 | reduction=reduction, 92 | avg_factor=avg_factor, 93 | **kwargs) 94 | return loss_bbox 95 | 96 | 97 | @LOSSES.register_module() 98 | class L1Loss(nn.Module): 99 | """L1 loss. 100 | 101 | Args: 102 | reduction (str, optional): The method to reduce the loss. 103 | Options are "none", "mean" and "sum". 104 | loss_weight (float, optional): The weight of loss. 105 | """ 106 | 107 | def __init__(self, reduction='mean', loss_weight=1.0): 108 | super(L1Loss, self).__init__() 109 | self.reduction = reduction 110 | self.loss_weight = loss_weight 111 | 112 | def forward(self, 113 | pred, 114 | target, 115 | weight=None, 116 | avg_factor=None, 117 | reduction_override=None): 118 | """Forward function. 119 | 120 | Args: 121 | pred (torch.Tensor): The prediction. 122 | target (torch.Tensor): The learning target of the prediction. 123 | weight (torch.Tensor, optional): The weight of loss for each 124 | prediction. Defaults to None. 125 | avg_factor (int, optional): Average factor that is used to average 126 | the loss. Defaults to None. 127 | reduction_override (str, optional): The reduction method used to 128 | override the original reduction method of the loss. 129 | Defaults to None. 130 | """ 131 | assert reduction_override in (None, 'none', 'mean', 'sum') 132 | reduction = ( 133 | reduction_override if reduction_override else self.reduction) 134 | loss_bbox = self.loss_weight * l1_loss( 135 | pred, target, weight, reduction=reduction, avg_factor=avg_factor) 136 | return loss_bbox 137 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |

Rigidity-Aware Detection for 6D Object Pose Estimation (CVPR 2023)

2 |
Yang Hai, Rui Song, Jiaojiao Li, Mathieu Salzmann, Yinlin Hu
3 | 4 |

5 | Paper | 6 | Poster | 7 | Video 8 |

9 | 10 | 📢 **NEWS**: We have released the code for combining RADet with [WDR-Pose](https://arxiv.org/abs/2104.00337), which is a part of the Best Single-Model Solution for BOP Challenge 2022. Please check out [PseudoFlow](https://github.com/YangHai-1218/PseudoFlow)! 11 | 12 | 13 | # Introduction 14 | 15 | Most recent 6D object pose estimation methods first use object detection to obtain 2D bounding boxes before actually regressing the pose. However, the general object detection methods they use are ill-suited to handle cluttered scenes, thus producing poor initialization to the subsequent pose network. 16 | 17 |
18 | Editor 20 |
21 | (a) The standard detection strategy chooses positive samples (green cells) around the object center, thus suffering from occlusions. (b) Instead, we propose to use a visibility-guided sampling strategy to discard the occluded regions and encourage the network to be supervised by all visible parts. The sampling probability is depicted by different shades of green. (c) Our method (green boxes) yields more accurate detections than the standard strategy (red boxes). 22 | 23 |
24 | Editor 26 |
27 | Our motivation is from the rigidity property of targets in 6D object pose estimation. We show the testing accuracy of different sampling strategies w.r.t. different local predictions during training on the typical general object dataset (COCO) and on the typical 6D object pose dataset (YCB). We report the results of FCOSv2 (Center), ATSS (Center+), and a strategy exploiting all the candidates in the ground-truth mask (Visible). The horizontal axis represents the normalized distance of a local prediction to the box center. Although the accuracy of different strategies is similar on COCO, the visibility-guided sampling is much more accurate on YCB, even when the local predictions come from non-center areas, thanks to the rigidity of the target objects. 28 | 29 | # Installation 30 | This code has been tested on a `ubuntu 18.04` server with `CUDA 11.3` 31 | - Install necessary python packages by `pip install -r requirements.txt`. 32 | - Build other dependencies by `python setup.py develop`. 33 | 34 | # Dataset Preparation 35 | 36 | - Download [BOP datasets](https://bop.felk.cvut.cz/), and place them under the `data` directory. 37 | 38 | - Collect image file lists for the desired data. 39 | 40 | For example, collect the PBR images for YCB-V dataset: 41 | 42 | ```shell 43 | python tools/collect_image_list.py --source-dir data/ycbv/train_pbr --save-path data/ycbv/image_lists/train_pbr.txt --pattern '*/rgb/*.png' 44 | ``` 45 | - Collect BOP image file lists for the desired dataset. 46 | 47 | For example, collect the BOP test images for YCB-V dataset. 48 | ```shell 49 | python tools/collect_bop_imagelist.py data/ycbv/test_bop19.json data/ycbv/image_lists/test_bop19.txt --ext png 50 | ``` 51 | - Convert BOP annotations into COCO format for both training(train_pbr/train_real) and testing(test_bop19). 52 | 53 | ```shell 54 | python tools/bop_to_coco.py --images-dir data/ycbv/train_pbr --images-list data/ycbv/images_list/train_pbr.txt --save-path data/ycbv/detector_annotations/train_pbr.json --dataset ycbv 55 | ``` 56 | 57 | # Testing 58 | Use `tools/test.py`: 59 | ```shell 60 | python tools/test.py --config configs/bop/r50_ycbv_pbr.py --checkpoint checkpoints/radet_ycbv_pbr.pth --format-only --eval-options jsonfile_prefix=work_dirs/results/radet_ycbv_pbr 61 | ``` 62 | 63 | # Training 64 | 65 | Use `tools/train.py`: 66 | 67 | ```shell 68 | python tools/train.py --config configs/bop/r50_ycbv_pbr.py 69 | ``` 70 | 71 | 72 | # Pretrained models and detection results 73 | 74 | We put the pretrained models and the corresponding detection results of 7 BOP core datasets at [here](https://drive.google.com/drive/folders/18_P693QoT9yTup1I8rmn7Jcs4DmQ2wOQ?usp=share_link). (We use a score threshold of 0.1 to filter out the false positives for the second-stage pose estimation in the paper.) 75 | 76 | # Citation 77 | 78 | If you find this project is helpful, please cite: 79 | 80 | ``` 81 | @inproceedings{yang2023radet, 82 | title={Rigidity-Aware Detection for 6D Object Pose Estimation}, 83 | author={Yang Hai and Rui Song and Jiaojiao Li and Mathieu Salzmann and Yinlin Hu}, 84 | booktitle={CVPR}, 85 | year={2023} 86 | } 87 | ``` 88 | 89 | # Acknowledgement 90 | 91 | We build our framework based on [mmdetection](https://github.com/open-mmlab/mmdetection) and [MBS](https://github.com/YinlinHu/MBS). We thank the authors for their great code repositories. 92 | -------------------------------------------------------------------------------- /radet/datasets/samplers/group_sampler.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | import math 3 | 4 | import numpy as np 5 | import torch 6 | from mmcv.runner import get_dist_info 7 | from torch.utils.data import Sampler 8 | 9 | 10 | class GroupSampler(Sampler): 11 | 12 | def __init__(self, dataset, samples_per_gpu=1): 13 | assert hasattr(dataset, 'flag') 14 | self.dataset = dataset 15 | self.samples_per_gpu = samples_per_gpu 16 | self.flag = dataset.flag.astype(np.int64) 17 | self.group_sizes = np.bincount(self.flag) 18 | self.num_samples = 0 19 | for i, size in enumerate(self.group_sizes): 20 | self.num_samples += int(np.ceil( 21 | size / self.samples_per_gpu)) * self.samples_per_gpu 22 | 23 | def __iter__(self): 24 | indices = [] 25 | for i, size in enumerate(self.group_sizes): 26 | if size == 0: 27 | continue 28 | indice = np.where(self.flag == i)[0] 29 | assert len(indice) == size 30 | np.random.shuffle(indice) 31 | num_extra = int(np.ceil(size / self.samples_per_gpu) 32 | ) * self.samples_per_gpu - len(indice) 33 | indice = np.concatenate( 34 | [indice, np.random.choice(indice, num_extra)]) 35 | indices.append(indice) 36 | indices = np.concatenate(indices) 37 | indices = [ 38 | indices[i * self.samples_per_gpu:(i + 1) * self.samples_per_gpu] 39 | for i in np.random.permutation( 40 | range(len(indices) // self.samples_per_gpu)) 41 | ] 42 | indices = np.concatenate(indices) 43 | indices = indices.astype(np.int64).tolist() 44 | assert len(indices) == self.num_samples 45 | return iter(indices) 46 | 47 | def __len__(self): 48 | return self.num_samples 49 | 50 | 51 | class DistributedGroupSampler(Sampler): 52 | """Sampler that restricts data loading to a subset of the dataset. 53 | 54 | It is especially useful in conjunction with 55 | :class:`torch.nn.parallel.DistributedDataParallel`. In such case, each 56 | process can pass a DistributedSampler instance as a DataLoader sampler, 57 | and load a subset of the original dataset that is exclusive to it. 58 | 59 | .. note:: 60 | Dataset is assumed to be of constant size. 61 | 62 | Arguments: 63 | dataset: Dataset used for sampling. 64 | num_replicas (optional): Number of processes participating in 65 | distributed training. 66 | rank (optional): Rank of the current process within num_replicas. 67 | """ 68 | 69 | def __init__(self, 70 | dataset, 71 | samples_per_gpu=1, 72 | num_replicas=None, 73 | rank=None): 74 | _rank, _num_replicas = get_dist_info() 75 | if num_replicas is None: 76 | num_replicas = _num_replicas 77 | if rank is None: 78 | rank = _rank 79 | self.dataset = dataset 80 | self.samples_per_gpu = samples_per_gpu 81 | self.num_replicas = num_replicas 82 | self.rank = rank 83 | self.epoch = 0 84 | 85 | assert hasattr(self.dataset, 'flag') 86 | self.flag = self.dataset.flag 87 | self.group_sizes = np.bincount(self.flag) 88 | 89 | self.num_samples = 0 90 | for i, j in enumerate(self.group_sizes): 91 | self.num_samples += int( 92 | math.ceil(self.group_sizes[i] * 1.0 / self.samples_per_gpu / 93 | self.num_replicas)) * self.samples_per_gpu 94 | self.total_size = self.num_samples * self.num_replicas 95 | 96 | def __iter__(self): 97 | # deterministically shuffle based on epoch 98 | g = torch.Generator() 99 | g.manual_seed(self.epoch) 100 | 101 | indices = [] 102 | for i, size in enumerate(self.group_sizes): 103 | if size > 0: 104 | indice = np.where(self.flag == i)[0] 105 | assert len(indice) == size 106 | # add .numpy() to avoid bug when selecting indice in parrots. 107 | # TODO: check whether torch.randperm() can be replaced by 108 | # numpy.random.permutation(). 109 | indice = indice[list( 110 | torch.randperm(int(size), generator=g).numpy())].tolist() 111 | extra = int( 112 | math.ceil( 113 | size * 1.0 / self.samples_per_gpu / self.num_replicas) 114 | ) * self.samples_per_gpu * self.num_replicas - len(indice) 115 | # pad indice 116 | tmp = indice.copy() 117 | for _ in range(extra // size): 118 | indice.extend(tmp) 119 | indice.extend(tmp[:extra % size]) 120 | indices.extend(indice) 121 | 122 | assert len(indices) == self.total_size 123 | 124 | indices = [ 125 | indices[j] for i in list( 126 | torch.randperm( 127 | len(indices) // self.samples_per_gpu, generator=g)) 128 | for j in range(i * self.samples_per_gpu, (i + 1) * 129 | self.samples_per_gpu) 130 | ] 131 | 132 | # subsample 133 | offset = self.num_samples * self.rank 134 | indices = indices[offset:offset + self.num_samples] 135 | assert len(indices) == self.num_samples 136 | 137 | return iter(indices) 138 | 139 | def __len__(self): 140 | return self.num_samples 141 | 142 | def set_epoch(self, epoch): 143 | self.epoch = epoch 144 | -------------------------------------------------------------------------------- /radet/core/evaluation/class_names.py: -------------------------------------------------------------------------------- 1 | import mmcv 2 | 3 | 4 | def wider_face_classes(): 5 | return ['face'] 6 | 7 | 8 | def voc_classes(): 9 | return [ 10 | 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 11 | 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike', 'person', 12 | 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor' 13 | ] 14 | 15 | 16 | def imagenet_det_classes(): 17 | return [ 18 | 'accordion', 'airplane', 'ant', 'antelope', 'apple', 'armadillo', 19 | 'artichoke', 'axe', 'baby_bed', 'backpack', 'bagel', 'balance_beam', 20 | 'banana', 'band_aid', 'banjo', 'baseball', 'basketball', 'bathing_cap', 21 | 'beaker', 'bear', 'bee', 'bell_pepper', 'bench', 'bicycle', 'binder', 22 | 'bird', 'bookshelf', 'bow_tie', 'bow', 'bowl', 'brassiere', 'burrito', 23 | 'bus', 'butterfly', 'camel', 'can_opener', 'car', 'cart', 'cattle', 24 | 'cello', 'centipede', 'chain_saw', 'chair', 'chime', 'cocktail_shaker', 25 | 'coffee_maker', 'computer_keyboard', 'computer_mouse', 'corkscrew', 26 | 'cream', 'croquet_ball', 'crutch', 'cucumber', 'cup_or_mug', 'diaper', 27 | 'digital_clock', 'dishwasher', 'dog', 'domestic_cat', 'dragonfly', 28 | 'drum', 'dumbbell', 'electric_fan', 'elephant', 'face_powder', 'fig', 29 | 'filing_cabinet', 'flower_pot', 'flute', 'fox', 'french_horn', 'frog', 30 | 'frying_pan', 'giant_panda', 'goldfish', 'golf_ball', 'golfcart', 31 | 'guacamole', 'guitar', 'hair_dryer', 'hair_spray', 'hamburger', 32 | 'hammer', 'hamster', 'harmonica', 'harp', 'hat_with_a_wide_brim', 33 | 'head_cabbage', 'helmet', 'hippopotamus', 'horizontal_bar', 'horse', 34 | 'hotdog', 'iPod', 'isopod', 'jellyfish', 'koala_bear', 'ladle', 35 | 'ladybug', 'lamp', 'laptop', 'lemon', 'lion', 'lipstick', 'lizard', 36 | 'lobster', 'maillot', 'maraca', 'microphone', 'microwave', 'milk_can', 37 | 'miniskirt', 'monkey', 'motorcycle', 'mushroom', 'nail', 'neck_brace', 38 | 'oboe', 'orange', 'otter', 'pencil_box', 'pencil_sharpener', 'perfume', 39 | 'person', 'piano', 'pineapple', 'ping-pong_ball', 'pitcher', 'pizza', 40 | 'plastic_bag', 'plate_rack', 'pomegranate', 'popsicle', 'porcupine', 41 | 'power_drill', 'pretzel', 'printer', 'puck', 'punching_bag', 'purse', 42 | 'rabbit', 'racket', 'ray', 'red_panda', 'refrigerator', 43 | 'remote_control', 'rubber_eraser', 'rugby_ball', 'ruler', 44 | 'salt_or_pepper_shaker', 'saxophone', 'scorpion', 'screwdriver', 45 | 'seal', 'sheep', 'ski', 'skunk', 'snail', 'snake', 'snowmobile', 46 | 'snowplow', 'soap_dispenser', 'soccer_ball', 'sofa', 'spatula', 47 | 'squirrel', 'starfish', 'stethoscope', 'stove', 'strainer', 48 | 'strawberry', 'stretcher', 'sunglasses', 'swimming_trunks', 'swine', 49 | 'syringe', 'table', 'tape_player', 'tennis_ball', 'tick', 'tie', 50 | 'tiger', 'toaster', 'traffic_light', 'train', 'trombone', 'trumpet', 51 | 'turtle', 'tv_or_monitor', 'unicycle', 'vacuum', 'violin', 52 | 'volleyball', 'waffle_iron', 'washer', 'water_bottle', 'watercraft', 53 | 'whale', 'wine_bottle', 'zebra' 54 | ] 55 | 56 | 57 | def imagenet_vid_classes(): 58 | return [ 59 | 'airplane', 'antelope', 'bear', 'bicycle', 'bird', 'bus', 'car', 60 | 'cattle', 'dog', 'domestic_cat', 'elephant', 'fox', 'giant_panda', 61 | 'hamster', 'horse', 'lion', 'lizard', 'monkey', 'motorcycle', 'rabbit', 62 | 'red_panda', 'sheep', 'snake', 'squirrel', 'tiger', 'train', 'turtle', 63 | 'watercraft', 'whale', 'zebra' 64 | ] 65 | 66 | 67 | def coco_classes(): 68 | return [ 69 | 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 70 | 'truck', 'boat', 'traffic_light', 'fire_hydrant', 'stop_sign', 71 | 'parking_meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 72 | 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 73 | 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 74 | 'sports_ball', 'kite', 'baseball_bat', 'baseball_glove', 'skateboard', 75 | 'surfboard', 'tennis_racket', 'bottle', 'wine_glass', 'cup', 'fork', 76 | 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 77 | 'broccoli', 'carrot', 'hot_dog', 'pizza', 'donut', 'cake', 'chair', 78 | 'couch', 'potted_plant', 'bed', 'dining_table', 'toilet', 'tv', 79 | 'laptop', 'mouse', 'remote', 'keyboard', 'cell_phone', 'microwave', 80 | 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 81 | 'scissors', 'teddy_bear', 'hair_drier', 'toothbrush' 82 | ] 83 | 84 | 85 | def cityscapes_classes(): 86 | return [ 87 | 'person', 'rider', 'car', 'truck', 'bus', 'train', 'motorcycle', 88 | 'bicycle' 89 | ] 90 | 91 | 92 | dataset_aliases = { 93 | 'voc': ['voc', 'pascal_voc', 'voc07', 'voc12'], 94 | 'imagenet_det': ['det', 'imagenet_det', 'ilsvrc_det'], 95 | 'imagenet_vid': ['vid', 'imagenet_vid', 'ilsvrc_vid'], 96 | 'coco': ['coco', 'mscoco', 'ms_coco'], 97 | 'wider_face': ['WIDERFaceDataset', 'wider_face', 'WDIERFace'], 98 | 'cityscapes': ['cityscapes'] 99 | } 100 | 101 | 102 | def get_classes(dataset): 103 | """Get class names of a dataset.""" 104 | alias2name = {} 105 | for name, aliases in dataset_aliases.items(): 106 | for alias in aliases: 107 | alias2name[alias] = name 108 | 109 | if mmcv.is_str(dataset): 110 | if dataset in alias2name: 111 | labels = eval(alias2name[dataset] + '_classes()') 112 | else: 113 | raise ValueError(f'Unrecognized dataset: {dataset}') 114 | else: 115 | raise TypeError(f'dataset must a str, but got {type(dataset)}') 116 | return labels 117 | -------------------------------------------------------------------------------- /radet/core/export/pytorch2onnx.py: -------------------------------------------------------------------------------- 1 | from functools import partial 2 | 3 | import mmcv 4 | import numpy as np 5 | import torch 6 | from mmcv.runner import load_checkpoint 7 | 8 | 9 | def generate_inputs_and_wrap_model(config_path, checkpoint_path, input_config): 10 | """Prepare sample input and wrap model for ONNX export. 11 | 12 | The ONNX export API only accept args, and all inputs should be 13 | torch.Tensor or corresponding types (such as tuple of tensor). 14 | So we should call this function before exporting. This function will: 15 | 16 | 1. generate corresponding inputs which are used to execute the model. 17 | 2. Wrap the model's forward function. 18 | 19 | For example, the MMDet models' forward function has a parameter 20 | ``return_loss:bool``. As we want to set it as False while export API 21 | supports neither bool type or kwargs. So we have to replace the forward 22 | like: ``model.forward = partial(model.forward, return_loss=False)`` 23 | 24 | Args: 25 | config_path (str): the OpenMMLab config for the model we want to 26 | export to ONNX 27 | checkpoint_path (str): Path to the corresponding checkpoint 28 | input_config (dict): the exactly data in this dict depends on the 29 | framework. For MMSeg, we can just declare the input shape, 30 | and generate the dummy data accordingly. However, for MMDet, 31 | we may pass the real img path, or the NMS will return None 32 | as there is no legal bbox. 33 | 34 | Returns: 35 | tuple: (model, tensor_data) wrapped model which can be called by \ 36 | model(*tensor_data) and a list of inputs which are used to execute \ 37 | the model while exporting. 38 | """ 39 | 40 | model = build_model_from_cfg(config_path, checkpoint_path) 41 | one_img, one_meta = preprocess_example_input(input_config) 42 | tensor_data = [one_img] 43 | model.forward = partial( 44 | model.forward, img_metas=[[one_meta]], return_loss=False) 45 | 46 | # pytorch has some bug in pytorch1.3, we have to fix it 47 | # by replacing these existing op 48 | opset_version = 11 49 | # put the import within the function thus it will not cause import error 50 | # when not using this function 51 | try: 52 | from mmcv.onnx.symbolic import register_extra_symbolics 53 | except ModuleNotFoundError: 54 | raise NotImplementedError('please update mmcv to version>=v1.0.4') 55 | register_extra_symbolics(opset_version) 56 | 57 | return model, tensor_data 58 | 59 | 60 | def build_model_from_cfg(config_path, checkpoint_path): 61 | """Build a model from config and load the given checkpoint. 62 | 63 | Args: 64 | config_path (str): the OpenMMLab config for the model we want to 65 | export to ONNX 66 | checkpoint_path (str): Path to the corresponding checkpoint 67 | 68 | Returns: 69 | torch.nn.Module: the built model 70 | """ 71 | from radet.models import build_detector 72 | 73 | cfg = mmcv.Config.fromfile(config_path) 74 | # import modules from string list. 75 | if cfg.get('custom_imports', None): 76 | from mmcv.utils import import_modules_from_strings 77 | import_modules_from_strings(**cfg['custom_imports']) 78 | cfg.model.pretrained = None 79 | cfg.data.test.test_mode = True 80 | 81 | # build the model 82 | model = build_detector(cfg.model, train_cfg=None, test_cfg=cfg.test_cfg) 83 | load_checkpoint(model, checkpoint_path, map_location='cpu') 84 | model.cpu().eval() 85 | return model 86 | 87 | 88 | def preprocess_example_input(input_config): 89 | """Prepare an example input image for ``generate_inputs_and_wrap_model``. 90 | 91 | Args: 92 | input_config (dict): customized config describing the example input. 93 | 94 | Returns: 95 | tuple: (one_img, one_meta), tensor of the example input image and \ 96 | meta information for the example input image. 97 | 98 | Examples: 99 | >>> from mmdet.core.export import preprocess_example_input 100 | >>> input_config = { 101 | >>> 'input_shape': (1,3,224,224), 102 | >>> 'input_path': 'demo/demo.jpg', 103 | >>> 'normalize_cfg': { 104 | >>> 'mean': (123.675, 116.28, 103.53), 105 | >>> 'std': (58.395, 57.12, 57.375) 106 | >>> } 107 | >>> } 108 | >>> one_img, one_meta = preprocess_example_input(input_config) 109 | >>> print(one_img.shape) 110 | torch.Size([1, 3, 224, 224]) 111 | >>> print(one_meta) 112 | {'img_shape': (224, 224, 3), 113 | 'ori_shape': (224, 224, 3), 114 | 'pad_shape': (224, 224, 3), 115 | 'filename': '.png', 116 | 'scale_factor': 1.0, 117 | 'flip': False} 118 | """ 119 | input_path = input_config['input_path'] 120 | input_shape = input_config['input_shape'] 121 | one_img = mmcv.imread(input_path) 122 | one_img = mmcv.imresize(one_img, input_shape[2:][::-1]) 123 | show_img = one_img.copy() 124 | if 'normalize_cfg' in input_config.keys(): 125 | normalize_cfg = input_config['normalize_cfg'] 126 | mean = np.array(normalize_cfg['mean'], dtype=np.float32) 127 | std = np.array(normalize_cfg['std'], dtype=np.float32) 128 | one_img = mmcv.imnormalize(one_img, mean, std) 129 | one_img = one_img.transpose(2, 0, 1) 130 | one_img = torch.from_numpy(one_img).unsqueeze(0).float().requires_grad_( 131 | True) 132 | (_, C, H, W) = input_shape 133 | one_meta = { 134 | 'img_shape': (H, W, C), 135 | 'ori_shape': (H, W, C), 136 | 'pad_shape': (H, W, C), 137 | 'filename': '.png', 138 | 'scale_factor': 1.0, 139 | 'flip': False, 140 | 'show_img': show_img, 141 | } 142 | 143 | return one_img, one_meta 144 | --------------------------------------------------------------------------------