├── radet
    ├── ops
    │   ├── cluster
    │   │   ├── __init__.py
    │   │   ├── cluster_wrapper.py
    │   │   └── cluster_ext.cpp
    │   ├── vote
    │   │   ├── __init__.py
    │   │   └── vote_wrapper.py
    │   ├── bbox2distance
    │   │   └── __init__.py
    │   └── __init__.py
    ├── README.md
    ├── utils
    │   ├── __init__.py
    │   ├── collect_env.py
    │   ├── logger.py
    │   ├── profiling.py
    │   ├── util_mixins.py
    │   └── contextmanagers.py
    ├── models
    │   ├── necks
    │   │   ├── __init__.py
    │   │   └── channel_mapper.py
    │   ├── detectors
    │   │   ├── __init__.py
    │   │   └── radet.py
    │   ├── dense_heads
    │   │   ├── __init__.py
    │   │   ├── base_dense_head.py
    │   │   └── dense_test_mixins.py
    │   ├── utils
    │   │   ├── builder.py
    │   │   ├── __init__.py
    │   │   └── res_layer.py
    │   ├── __init__.py
    │   ├── backbones
    │   │   ├── __init__.py
    │   │   └── detectors_resnext.py
    │   ├── losses
    │   │   ├── __init__.py
    │   │   ├── accuracy.py
    │   │   ├── utils.py
    │   │   └── smooth_l1_loss.py
    │   └── builder.py
    ├── core
    │   ├── visualization
    │   │   └── __init__.py
    │   ├── bbox
    │   │   ├── iou_calculators
    │   │   │   ├── __init__.py
    │   │   │   └── builder.py
    │   │   ├── match_costs
    │   │   │   ├── __init__.py
    │   │   │   └── builder.py
    │   │   ├── assigners
    │   │   │   ├── base_assigner.py
    │   │   │   └── __init__.py
    │   │   ├── coder
    │   │   │   ├── base_bbox_coder.py
    │   │   │   ├── __init__.py
    │   │   │   ├── pseudo_bbox_coder.py
    │   │   │   └── yolo_bbox_coder.py
    │   │   ├── builder.py
    │   │   ├── samplers
    │   │   │   ├── __init__.py
    │   │   │   ├── combined_sampler.py
    │   │   │   ├── pseudo_sampler.py
    │   │   │   ├── instance_balanced_pos_sampler.py
    │   │   │   ├── random_sampler.py
    │   │   │   ├── base_sampler.py
    │   │   │   └── ohem_sampler.py
    │   │   ├── __init__.py
    │   │   └── demodata.py
    │   ├── utils
    │   │   ├── __init__.py
    │   │   ├── misc.py
    │   │   └── dist_utils.py
    │   ├── anchor
    │   │   ├── builder.py
    │   │   ├── __init__.py
    │   │   ├── point_generator.py
    │   │   └── utils.py
    │   ├── export
    │   │   ├── __init__.py
    │   │   └── pytorch2onnx.py
    │   ├── mask
    │   │   ├── __init__.py
    │   │   ├── utils.py
    │   │   └── mask_target.py
    │   ├── __init__.py
    │   ├── post_processing
    │   │   ├── __init__.py
    │   │   └── merge_augs.py
    │   ├── fp16
    │   │   ├── __init__.py
    │   │   └── deprecated_fp16_utils.py
    │   └── evaluation
    │   │   ├── __init__.py
    │   │   ├── bbox_overlaps.py
    │   │   └── class_names.py
    ├── datasets
    │   ├── samplers
    │   │   ├── __init__.py
    │   │   ├── distributed_sampler.py
    │   │   └── group_sampler.py
    │   ├── deepfashion.py
    │   ├── ycbv.py
    │   ├── kitti.py
    │   ├── __init__.py
    │   ├── pipelines
    │   │   ├── compose.py
    │   │   ├── __init__.py
    │   │   ├── instaboost.py
    │   │   └── test_time_aug.py
    │   ├── wider_face.py
    │   ├── utils.py
    │   └── voc.py
    ├── apis
    │   └── __init__.py
    ├── version.py
    └── __init__.py
├── resources
    ├── .DS_Store
    ├── radet.png
    └── rigidity.png
├── requirements.txt
├── setup.cfg
├── configs
    ├── base
    │   ├── default_runtime.py
    │   └── datasets
    │   │   ├── bop_detection.py
    │   │   └── bop_detection_mix.py
    └── bop
    │   ├── r50_icbin_pbr.py
    │   ├── r50_itodd_pbr.py
    │   ├── r50_tudl_pbr.py
    │   ├── r50_tless_pbr.py
    │   ├── r50_hb_pbr.py
    │   ├── r50_lmo_pbr.py
    │   ├── r50_ycbv_pbr.py
    │   ├── r50_tudl_mixpbr.py
    │   ├── r50_tless_mixpbr.py
    │   └── r50_ycbv_mixpbr.py
├── tools
    ├── collect_image_list.py
    ├── collect_bop_imagelist.py
    ├── coco_to_bop.py
    ├── browse_dataset.py
    ├── eval_metric.py
    └── show_bop_detbbox.py
└── README.md


/radet/ops/cluster/__init__.py:
--------------------------------------------------------------------------------
1 | from .cluster_wrapper import  cluster_nms


--------------------------------------------------------------------------------
/radet/ops/vote/__init__.py:
--------------------------------------------------------------------------------
1 | from .vote_wrapper import vote_nms, global_vote_nms


--------------------------------------------------------------------------------
/radet/README.md:
--------------------------------------------------------------------------------
1 | Adapted from [mmdetection](https://github.com/open-mmlab/mmdetection)
2 | 


--------------------------------------------------------------------------------
/resources/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YangHai-1218/RADet/HEAD/resources/.DS_Store


--------------------------------------------------------------------------------
/resources/radet.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YangHai-1218/RADet/HEAD/resources/radet.png


--------------------------------------------------------------------------------
/resources/rigidity.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YangHai-1218/RADet/HEAD/resources/rigidity.png


--------------------------------------------------------------------------------
/radet/ops/bbox2distance/__init__.py:
--------------------------------------------------------------------------------
1 | from .bbox2distance_wrapper import MBD_box2distance, GDT_box2distance


--------------------------------------------------------------------------------
/radet/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .collect_env import collect_env
2 | from .logger import get_root_logger
3 | 
4 | __all__ = ['get_root_logger', 'collect_env']
5 | 


--------------------------------------------------------------------------------
/radet/models/necks/__init__.py:
--------------------------------------------------------------------------------
1 | from .channel_mapper import ChannelMapper
2 | from .fpn import FPN
3 | 
4 | __all__ = [
5 |     'FPN', 'ChannelMapper',
6 | ]
7 | 


--------------------------------------------------------------------------------
/radet/core/visualization/__init__.py:
--------------------------------------------------------------------------------
1 | from .image import color_val_matplotlib, imshow_det_bboxes
2 | 
3 | __all__ = ['imshow_det_bboxes', 'color_val_matplotlib']
4 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | cython
 2 | numpy
 3 | matplotlib
 4 | mmpycocotools
 5 | numpy
 6 | six
 7 | terminaltables
 8 | mmcv==1.3.18
 9 | torch==1.10.0
10 | torchvision==0.11.0
11 | 


--------------------------------------------------------------------------------
/radet/core/bbox/iou_calculators/__init__.py:
--------------------------------------------------------------------------------
1 | from .builder import build_iou_calculator
2 | from .iou2d_calculator import BboxOverlaps2D, bbox_overlaps
3 | 
4 | __all__ = ['build_iou_calculator', 'BboxOverlaps2D', 'bbox_overlaps']
5 | 


--------------------------------------------------------------------------------
/radet/models/detectors/__init__.py:
--------------------------------------------------------------------------------
1 | from .base import BaseDetector
2 | from .single_stage import SingleStageDetector
3 | from .radet import RADet
4 | 
5 | __all__ = [
6 |     'BaseDetector', 'SingleStageDetector', 
7 |     'RADet', 
8 | ]
9 | 


--------------------------------------------------------------------------------
/radet/datasets/samplers/__init__.py:
--------------------------------------------------------------------------------
1 | from .distributed_sampler import DistributedSampler
2 | from .group_sampler import DistributedGroupSampler, GroupSampler
3 | 
4 | __all__ = ['DistributedSampler', 'DistributedGroupSampler', 'GroupSampler']
5 | 


--------------------------------------------------------------------------------
/radet/core/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .dist_utils import DistOptimizerHook, allreduce_grads, reduce_mean
2 | from .misc import multi_apply, unmap
3 | 
4 | __all__ = [
5 |     'allreduce_grads', 'DistOptimizerHook', 'reduce_mean', 'multi_apply',
6 |     'unmap'
7 | ]
8 | 


--------------------------------------------------------------------------------
/radet/core/anchor/builder.py:
--------------------------------------------------------------------------------
1 | from mmcv.utils import Registry, build_from_cfg
2 | 
3 | ANCHOR_GENERATORS = Registry('Anchor generator')
4 | 
5 | 
6 | def build_anchor_generator(cfg, default_args=None):
7 |     return build_from_cfg(cfg, ANCHOR_GENERATORS, default_args)
8 | 


--------------------------------------------------------------------------------
/radet/core/bbox/match_costs/__init__.py:
--------------------------------------------------------------------------------
1 | from .builder import build_match_cost
2 | from .match_cost import BBoxL1Cost, ClassificationCost, FocalLossCost, IoUCost
3 | 
4 | __all__ = [
5 |     'build_match_cost', 'ClassificationCost', 'BBoxL1Cost', 'IoUCost',
6 |     'FocalLossCost'
7 | ]
8 | 


--------------------------------------------------------------------------------
/radet/models/dense_heads/__init__.py:
--------------------------------------------------------------------------------
1 | from .anchor_free_head import AnchorFreeHead
2 | from .anchor_head import AnchorHead
3 | from .atss_head import ATSSHead
4 | from .radet_head import RADetHead
5 | 
6 | __all__ = [
7 |     'AnchorFreeHead', 'AnchorHead', 'RADetHead', 'ATSSHead'
8 | ]
9 | 


--------------------------------------------------------------------------------
/radet/core/bbox/match_costs/builder.py:
--------------------------------------------------------------------------------
1 | from mmcv.utils import Registry, build_from_cfg
2 | 
3 | MATCH_COST = Registry('Match Cost')
4 | 
5 | 
6 | def build_match_cost(cfg, default_args=None):
7 |     """Builder of IoU calculator."""
8 |     return build_from_cfg(cfg, MATCH_COST, default_args)
9 | 


--------------------------------------------------------------------------------
/radet/core/bbox/iou_calculators/builder.py:
--------------------------------------------------------------------------------
1 | from mmcv.utils import Registry, build_from_cfg
2 | 
3 | IOU_CALCULATORS = Registry('IoU calculator')
4 | 
5 | 
6 | def build_iou_calculator(cfg, default_args=None):
7 |     """Builder of IoU calculator."""
8 |     return build_from_cfg(cfg, IOU_CALCULATORS, default_args)
9 | 


--------------------------------------------------------------------------------
/radet/core/export/__init__.py:
--------------------------------------------------------------------------------
1 | from .pytorch2onnx import (build_model_from_cfg,
2 |                            generate_inputs_and_wrap_model,
3 |                            preprocess_example_input)
4 | 
5 | __all__ = [
6 |     'build_model_from_cfg', 'generate_inputs_and_wrap_model',
7 |     'preprocess_example_input'
8 | ]
9 | 


--------------------------------------------------------------------------------
/radet/ops/__init__.py:
--------------------------------------------------------------------------------
 1 | from .bbox2distance import MBD_box2distance, GDT_box2distance
 2 | from .vote import vote_nms, global_vote_nms
 3 | from .cluster import cluster_nms
 4 | 
 5 | 
 6 | __all__ = [
 7 |     'vote_nms',
 8 |     'global_vote_nms',
 9 |     'MBD_box2distance',
10 |     'GDT_box2distance',
11 |     'cluster_nms'
12 | ]


--------------------------------------------------------------------------------
/radet/core/mask/__init__.py:
--------------------------------------------------------------------------------
1 | from .mask_target import mask_target
2 | from .structures import BaseInstanceMasks, BitmapMasks, PolygonMasks
3 | from .utils import encode_mask_results, split_combined_polys
4 | 
5 | __all__ = [
6 |     'split_combined_polys', 'mask_target', 'BaseInstanceMasks', 'BitmapMasks',
7 |     'PolygonMasks', 'encode_mask_results'
8 | ]
9 | 


--------------------------------------------------------------------------------
/radet/core/__init__.py:
--------------------------------------------------------------------------------
1 | from .anchor import *  # noqa: F401, F403
2 | from .bbox import *  # noqa: F401, F403
3 | from .evaluation import *  # noqa: F401, F403
4 | from .export import *  # noqa: F401, F403
5 | from .fp16 import *  # noqa: F401, F403
6 | from .mask import *  # noqa: F401, F403
7 | from .post_processing import *  # noqa: F401, F403
8 | from .utils import *  # noqa: F401, F403


--------------------------------------------------------------------------------
/radet/datasets/deepfashion.py:
--------------------------------------------------------------------------------
 1 | from .builder import DATASETS
 2 | from .coco import CocoDataset
 3 | 
 4 | 
 5 | @DATASETS.register_module()
 6 | class DeepFashionDataset(CocoDataset):
 7 | 
 8 |     CLASSES = ('top', 'skirt', 'leggings', 'dress', 'outer', 'pants', 'bag',
 9 |                'neckwear', 'headwear', 'eyeglass', 'belt', 'footwear', 'hair',
10 |                'skin', 'face')
11 | 


--------------------------------------------------------------------------------
/radet/core/post_processing/__init__.py:
--------------------------------------------------------------------------------
1 | from .bbox_nms import fast_nms, multiclass_nms, multiclass_vote
2 | from .merge_augs import (merge_aug_bboxes, merge_aug_masks,
3 |                          merge_aug_proposals, merge_aug_scores)
4 | 
5 | __all__ = [
6 |     'multiclass_nms', 'merge_aug_proposals', 'merge_aug_bboxes',
7 |     'merge_aug_scores', 'merge_aug_masks', 'fast_nms', 'multiclass_vote'
8 | ]
9 | 


--------------------------------------------------------------------------------
/radet/core/bbox/assigners/base_assigner.py:
--------------------------------------------------------------------------------
 1 | from abc import ABCMeta, abstractmethod
 2 | 
 3 | 
 4 | class BaseAssigner(metaclass=ABCMeta):
 5 |     """Base assigner that assigns boxes to ground truth boxes."""
 6 | 
 7 |     @abstractmethod
 8 |     def assign(self, bboxes, gt_bboxes, gt_bboxes_ignore=None, gt_labels=None):
 9 |         """Assign boxes to either a ground truth boxe or a negative boxes."""
10 |         pass
11 | 


--------------------------------------------------------------------------------
/radet/core/fp16/__init__.py:
--------------------------------------------------------------------------------
1 | from .deprecated_fp16_utils import \
2 |     DeprecatedFp16OptimizerHook as Fp16OptimizerHook
3 | from .deprecated_fp16_utils import deprecated_auto_fp16 as auto_fp16
4 | from .deprecated_fp16_utils import deprecated_force_fp32 as force_fp32
5 | from .deprecated_fp16_utils import \
6 |     deprecated_wrap_fp16_model as wrap_fp16_model
7 | 
8 | __all__ = ['auto_fp16', 'force_fp32', 'Fp16OptimizerHook', 'wrap_fp16_model']
9 | 


--------------------------------------------------------------------------------
/radet/apis/__init__.py:
--------------------------------------------------------------------------------
 1 | from .inference import (async_inference_detector, inference_detector,
 2 |                         init_detector, show_result_pyplot)
 3 | from .test import multi_gpu_test, single_gpu_test
 4 | from .train import get_root_logger, set_random_seed, train_detector
 5 | 
 6 | __all__ = [
 7 |     'get_root_logger', 'set_random_seed', 'train_detector', 'init_detector',
 8 |     'async_inference_detector', 'inference_detector', 'show_result_pyplot',
 9 |     'multi_gpu_test', 'single_gpu_test'
10 | ]
11 | 


--------------------------------------------------------------------------------
/radet/utils/collect_env.py:
--------------------------------------------------------------------------------
 1 | from mmcv.utils import collect_env as collect_base_env
 2 | from mmcv.utils import get_git_hash
 3 | 
 4 | import radet
 5 | 
 6 | 
 7 | def collect_env():
 8 |     """Collect the information of the running environments."""
 9 |     env_info = collect_base_env()
10 |     env_info['MMDetection'] = radet.__version__ + '+' + get_git_hash()[:7]
11 |     return env_info
12 | 
13 | 
14 | if __name__ == '__main__':
15 |     for name, val in collect_env().items():
16 |         print(f'{name}: {val}')
17 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [isort]
 2 | line_length = 79
 3 | multi_line_output = 0
 4 | known_standard_library = setuptools
 5 | known_first_party = mmdet
 6 | known_third_party = PIL,asynctest,cityscapesscripts,cv2,matplotlib,mmcv,numpy,onnx,onnxruntime,pycocotools,pytest,robustness_eval,seaborn,six,terminaltables,torch
 7 | no_lines_before = STDLIB,LOCALFOLDER
 8 | default_section = THIRDPARTY
 9 | 
10 | [yapf]
11 | BASED_ON_STYLE = pep8
12 | BLANK_LINE_BEFORE_NESTED_CLASS_OR_DEF = true
13 | SPLIT_BEFORE_EXPRESSION_AFTER_OPENING_PAREN = true
14 | 


--------------------------------------------------------------------------------
/radet/models/utils/builder.py:
--------------------------------------------------------------------------------
 1 | from mmcv.utils import Registry, build_from_cfg
 2 | 
 3 | TRANSFORMER = Registry('Transformer')
 4 | POSITIONAL_ENCODING = Registry('Position encoding')
 5 | 
 6 | 
 7 | def build_transformer(cfg, default_args=None):
 8 |     """Builder for Transformer."""
 9 |     return build_from_cfg(cfg, TRANSFORMER, default_args)
10 | 
11 | 
12 | def build_positional_encoding(cfg, default_args=None):
13 |     """Builder for Position Encoding."""
14 |     return build_from_cfg(cfg, POSITIONAL_ENCODING, default_args)
15 | 


--------------------------------------------------------------------------------
/radet/datasets/ycbv.py:
--------------------------------------------------------------------------------
 1 | from .builder import DATASETS
 2 | from .coco import CocoDataset
 3 | 
 4 | 
 5 | @DATASETS.register_module()
 6 | class YcbvDataset(CocoDataset):
 7 |     CLASSES = ('master_chef_can', 'cracker_box', 'sugar_box', 'tomato_soup_can',
 8 |                'mustard_bottle', 'tuna_fish_can', 'pudding_box', 'gelatin_box',
 9 |                'potted_meat_can', 'banana', 'pitcher_base', 'bleach_cleanser',
10 |                'bowl', 'mug', 'power_drill', 'wood_block', 'scissors', 'large_marker',
11 |                'large_clamp', 'extra_large_clamp', 'foam_brick')


--------------------------------------------------------------------------------
/radet/utils/logger.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | from mmcv.utils import get_logger
 4 | 
 5 | 
 6 | def get_root_logger(log_file=None, log_level=logging.INFO):
 7 |     """Get root logger.
 8 | 
 9 |     Args:
10 |         log_file (str, optional): File path of log. Defaults to None.
11 |         log_level (int, optional): The level of logger.
12 |             Defaults to logging.INFO.
13 | 
14 |     Returns:
15 |         :obj:`logging.Logger`: The obtained logger
16 |     """
17 |     logger = get_logger(name='radet', log_file=log_file, log_level=log_level)
18 | 
19 |     return logger
20 | 


--------------------------------------------------------------------------------
/radet/core/anchor/__init__.py:
--------------------------------------------------------------------------------
 1 | from .anchor_generator import (AnchorGenerator, LegacyAnchorGenerator,
 2 |                                YOLOAnchorGenerator)
 3 | from .builder import ANCHOR_GENERATORS, build_anchor_generator
 4 | from .point_generator import PointGenerator
 5 | from .utils import anchor_inside_flags, calc_region, images_to_levels
 6 | 
 7 | __all__ = [
 8 |     'AnchorGenerator', 'LegacyAnchorGenerator', 'anchor_inside_flags',
 9 |     'PointGenerator', 'images_to_levels', 'calc_region',
10 |     'build_anchor_generator', 'ANCHOR_GENERATORS', 'YOLOAnchorGenerator'
11 | ]
12 | 


--------------------------------------------------------------------------------
/radet/core/bbox/coder/base_bbox_coder.py:
--------------------------------------------------------------------------------
 1 | from abc import ABCMeta, abstractmethod
 2 | 
 3 | 
 4 | class BaseBBoxCoder(metaclass=ABCMeta):
 5 |     """Base bounding box coder."""
 6 | 
 7 |     def __init__(self, **kwargs):
 8 |         pass
 9 | 
10 |     @abstractmethod
11 |     def encode(self, bboxes, gt_bboxes):
12 |         """Encode deltas between bboxes and ground truth boxes."""
13 |         pass
14 | 
15 |     @abstractmethod
16 |     def decode(self, bboxes, bboxes_pred):
17 |         """Decode the predicted bboxes according to prediction and base
18 |         boxes."""
19 |         pass
20 | 


--------------------------------------------------------------------------------
/radet/core/bbox/coder/__init__.py:
--------------------------------------------------------------------------------
 1 | from .base_bbox_coder import BaseBBoxCoder
 2 | from .bucketing_bbox_coder import BucketingBBoxCoder
 3 | from .delta_xywh_bbox_coder import DeltaXYWHBBoxCoder
 4 | from .legacy_delta_xywh_bbox_coder import LegacyDeltaXYWHBBoxCoder
 5 | from .pseudo_bbox_coder import PseudoBBoxCoder
 6 | from .tblr_bbox_coder import TBLRBBoxCoder
 7 | from .yolo_bbox_coder import YOLOBBoxCoder
 8 | 
 9 | __all__ = [
10 |     'BaseBBoxCoder', 'PseudoBBoxCoder', 'DeltaXYWHBBoxCoder',
11 |     'LegacyDeltaXYWHBBoxCoder', 'TBLRBBoxCoder', 'YOLOBBoxCoder',
12 |     'BucketingBBoxCoder'
13 | ]
14 | 


--------------------------------------------------------------------------------
/radet/version.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Open-MMLab. All rights reserved.
 2 | 
 3 | __version__ = '2.8.0'
 4 | short_version = __version__
 5 | 
 6 | 
 7 | def parse_version_info(version_str):
 8 |     version_info = []
 9 |     for x in version_str.split('.'):
10 |         if x.isdigit():
11 |             version_info.append(int(x))
12 |         elif x.find('rc') != -1:
13 |             patch_version = x.split('rc')
14 |             version_info.append(int(patch_version[0]))
15 |             version_info.append(f'rc{patch_version[1]}')
16 |     return tuple(version_info)
17 | 
18 | 
19 | version_info = parse_version_info(__version__)
20 | 


--------------------------------------------------------------------------------
/radet/models/__init__.py:
--------------------------------------------------------------------------------
 1 | from .backbones import *  # noqa: F401,F403
 2 | from .builder import (BACKBONES, DETECTORS, HEADS, LOSSES, NECKS,
 3 |                     build_backbone, build_detector, 
 4 |                     build_head, build_loss, build_neck)
 5 | from .dense_heads import *  # noqa: F401,F403
 6 | from .detectors import *  # noqa: F401,F403
 7 | from .losses import *  # noqa: F401,F403
 8 | from .necks import *  # noqa: F401,F403
 9 | 
10 | __all__ = [
11 |     'BACKBONES', 'NECKS', 'HEADS', 'LOSSES',
12 |     'DETECTORS', 'build_backbone', 'build_neck', 
13 |     'build_head', 'build_loss', 'build_detector'
14 | ]
15 | 


--------------------------------------------------------------------------------
/radet/core/bbox/coder/pseudo_bbox_coder.py:
--------------------------------------------------------------------------------
 1 | from ..builder import BBOX_CODERS
 2 | from .base_bbox_coder import BaseBBoxCoder
 3 | 
 4 | 
 5 | @BBOX_CODERS.register_module()
 6 | class PseudoBBoxCoder(BaseBBoxCoder):
 7 |     """Pseudo bounding box coder."""
 8 | 
 9 |     def __init__(self, **kwargs):
10 |         super(BaseBBoxCoder, self).__init__(**kwargs)
11 | 
12 |     def encode(self, bboxes, gt_bboxes):
13 |         """torch.Tensor: return the given ``bboxes``"""
14 |         return gt_bboxes
15 | 
16 |     def decode(self, bboxes, pred_bboxes):
17 |         """torch.Tensor: return the given ``pred_bboxes``"""
18 |         return pred_bboxes
19 | 


--------------------------------------------------------------------------------
/radet/core/bbox/builder.py:
--------------------------------------------------------------------------------
 1 | from mmcv.utils import Registry, build_from_cfg
 2 | 
 3 | BBOX_ASSIGNERS = Registry('bbox_assigner')
 4 | BBOX_SAMPLERS = Registry('bbox_sampler')
 5 | BBOX_CODERS = Registry('bbox_coder')
 6 | 
 7 | 
 8 | def build_assigner(cfg, **default_args):
 9 |     """Builder of box assigner."""
10 |     return build_from_cfg(cfg, BBOX_ASSIGNERS, default_args)
11 | 
12 | 
13 | def build_sampler(cfg, **default_args):
14 |     """Builder of box sampler."""
15 |     return build_from_cfg(cfg, BBOX_SAMPLERS, default_args)
16 | 
17 | 
18 | def build_bbox_coder(cfg, **default_args):
19 |     """Builder of box coder."""
20 |     return build_from_cfg(cfg, BBOX_CODERS, default_args)
21 | 


--------------------------------------------------------------------------------
/radet/models/backbones/__init__.py:
--------------------------------------------------------------------------------
 1 | from .darknet import Darknet
 2 | from .detectors_resnet import DetectoRS_ResNet
 3 | from .detectors_resnext import DetectoRS_ResNeXt
 4 | from .hourglass import HourglassNet
 5 | from .hrnet import HRNet
 6 | from .regnet import RegNet
 7 | from .res2net import Res2Net
 8 | from .resnest import ResNeSt
 9 | from .resnet import ResNet, ResNetV1d
10 | from .resnext import ResNeXt
11 | from .ssd_vgg import SSDVGG
12 | from .trident_resnet import TridentResNet
13 | 
14 | __all__ = [
15 |     'RegNet', 'ResNet', 'ResNetV1d', 'ResNeXt', 'SSDVGG', 'HRNet', 'Res2Net',
16 |     'HourglassNet', 'DetectoRS_ResNet', 'DetectoRS_ResNeXt', 'Darknet',
17 |     'ResNeSt', 'TridentResNet'
18 | ]
19 | 


--------------------------------------------------------------------------------
/radet/core/bbox/samplers/__init__.py:
--------------------------------------------------------------------------------
 1 | from .base_sampler import BaseSampler
 2 | from .combined_sampler import CombinedSampler
 3 | from .instance_balanced_pos_sampler import InstanceBalancedPosSampler
 4 | from .iou_balanced_neg_sampler import IoUBalancedNegSampler
 5 | from .ohem_sampler import OHEMSampler
 6 | from .pseudo_sampler import PseudoSampler
 7 | from .random_sampler import RandomSampler
 8 | from .sampling_result import SamplingResult
 9 | from .score_hlr_sampler import ScoreHLRSampler
10 | 
11 | __all__ = [
12 |     'BaseSampler', 'PseudoSampler', 'RandomSampler',
13 |     'InstanceBalancedPosSampler', 'IoUBalancedNegSampler', 'CombinedSampler',
14 |     'OHEMSampler', 'SamplingResult', 'ScoreHLRSampler'
15 | ]
16 | 


--------------------------------------------------------------------------------
/radet/core/bbox/assigners/__init__.py:
--------------------------------------------------------------------------------
 1 | from .approx_max_iou_assigner import ApproxMaxIoUAssigner
 2 | from .assign_result import AssignResult
 3 | from .atss_assigner import ATSSAssigner
 4 | from .base_assigner import BaseAssigner
 5 | from .center_region_assigner import CenterRegionAssigner
 6 | from .grid_assigner import GridAssigner
 7 | from .hungarian_assigner import HungarianAssigner
 8 | from .max_iou_assigner import MaxIoUAssigner
 9 | from .point_assigner import PointAssigner
10 | from .region_assigner import RegionAssigner
11 | 
12 | __all__ = [
13 |     'BaseAssigner', 'MaxIoUAssigner', 'ApproxMaxIoUAssigner', 'AssignResult',
14 |     'PointAssigner', 'ATSSAssigner', 'CenterRegionAssigner', 'GridAssigner',
15 |     'HungarianAssigner', 'RegionAssigner'
16 | ]
17 | 


--------------------------------------------------------------------------------
/radet/core/bbox/samplers/combined_sampler.py:
--------------------------------------------------------------------------------
 1 | from ..builder import BBOX_SAMPLERS, build_sampler
 2 | from .base_sampler import BaseSampler
 3 | 
 4 | 
 5 | @BBOX_SAMPLERS.register_module()
 6 | class CombinedSampler(BaseSampler):
 7 |     """A sampler that combines positive sampler and negative sampler."""
 8 | 
 9 |     def __init__(self, pos_sampler, neg_sampler, **kwargs):
10 |         super(CombinedSampler, self).__init__(**kwargs)
11 |         self.pos_sampler = build_sampler(pos_sampler, **kwargs)
12 |         self.neg_sampler = build_sampler(neg_sampler, **kwargs)
13 | 
14 |     def _sample_pos(self, **kwargs):
15 |         """Sample positive samples."""
16 |         raise NotImplementedError
17 | 
18 |     def _sample_neg(self, **kwargs):
19 |         """Sample negative samples."""
20 |         raise NotImplementedError
21 | 


--------------------------------------------------------------------------------
/radet/ops/cluster/cluster_wrapper.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from . import cluster_ext
 3 | import numpy as np
 4 | 
 5 | 
 6 | def cluster_nms(bboxes, scores, categories, iou_threshold=0.65):
 7 |     if isinstance(bboxes, np.ndarray):
 8 |         bboxes = torch.from_numpy(bboxes)
 9 |     else:
10 |         assert isinstance(bboxes, torch.Tensor)
11 |     if isinstance(scores, np.ndarray):
12 |         scores = torch.from_numpy(scores)
13 |     else:
14 |         assert isinstance(scores, torch.Tensor)
15 | 
16 |     if isinstance(categories, np.ndarray):
17 |         categories = torch.from_numpy(categories)
18 |     else:
19 |         assert isinstance(categories, torch.Tensor)
20 | 
21 |     instance_ids, clusters_num = cluster_ext.cluster_nms(bboxes, scores, categories, iou_threshold)
22 |     return instance_ids, clusters_num
23 | 


--------------------------------------------------------------------------------
/configs/base/default_runtime.py:
--------------------------------------------------------------------------------
 1 | optimizer = dict(
 2 |     type='AdamW',
 3 |     lr=0.0004,
 4 |     betas=(0.9, 0.999),
 5 |     weight_decay=0.05,
 6 |     eps=1e-08,
 7 |     amsgrad=False,
 8 | )
 9 | lr_config = dict(
10 |     policy='OneCycle',
11 |     max_lr=0.0004,
12 |     total_steps=100100,
13 |     pct_start=0.05,
14 |     anneal_strategy='linear')
15 | 
16 | runner = dict(type='IterBasedRunner', max_iters=100000)
17 | checkpoint_config = dict(interval=10000)
18 | evaluation = dict(interval=10000, metric='bbox')
19 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
20 | 
21 | # yapf:disable
22 | log_config = dict(
23 |     interval=50,
24 |     hooks=[
25 |         dict(type='TextLoggerHook'),
26 |     ])
27 | # yapf:enable
28 | dist_params = dict(backend='nccl')
29 | log_level = 'INFO'
30 | workflow = [('train', 1)]
31 | 


--------------------------------------------------------------------------------
/radet/core/evaluation/__init__.py:
--------------------------------------------------------------------------------
 1 | from .class_names import (cityscapes_classes, coco_classes, dataset_aliases,
 2 |                           get_classes, imagenet_det_classes,
 3 |                           imagenet_vid_classes, voc_classes)
 4 | from .eval_hooks import DistEvalHook, EvalHook
 5 | from .mean_ap import average_precision, eval_map, print_map_summary
 6 | from .recall import (eval_recalls, plot_iou_recall, plot_num_recall,
 7 |                      print_recall_summary)
 8 | 
 9 | __all__ = [
10 |     'voc_classes', 'imagenet_det_classes', 'imagenet_vid_classes',
11 |     'coco_classes', 'cityscapes_classes', 'dataset_aliases', 'get_classes',
12 |     'DistEvalHook', 'EvalHook', 'average_precision', 'eval_map',
13 |     'print_map_summary', 'eval_recalls', 'print_recall_summary',
14 |     'plot_num_recall', 'plot_iou_recall'
15 | ]
16 | 


--------------------------------------------------------------------------------
/tools/collect_image_list.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | from glob import glob
 3 | from os import path as osp
 4 | 
 5 | def parse_args():
 6 |     parser = argparse.ArgumentParser()
 7 |     parser.add_argument('--source-dir', default='data/ycbv/train_real' ,type=str)
 8 |     parser.add_argument('--save-path', default='data/ycbv/train_real/train_real.txt', type=str)
 9 |     parser.add_argument('--pattern', default='*/rgb/*.png', type=str)
10 |     args = parser.parse_args()
11 |     return args
12 | 
13 | 
14 | 
15 | 
16 | if __name__ =='__main__':
17 |     args = parse_args()
18 |     image_list = glob(osp.join(args.source_dir, args.pattern))
19 |     image_list = sorted(image_list)
20 |     image_list = [i.replace(args.source_dir+'/', '')+'\n' for i in image_list]
21 |     print(f"Total {len(image_list)} images found")
22 |     with open(args.save_path, 'w') as f:
23 |         f.writelines(image_list)


--------------------------------------------------------------------------------
/radet/models/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | from .builder import build_positional_encoding, build_transformer
 2 | from .gaussian_target import gaussian_radius, gen_gaussian_target
 3 | from .positional_encoding import (LearnedPositionalEncoding,
 4 |                                   SinePositionalEncoding)
 5 | from .res_layer import ResLayer
 6 | from .transformer import (FFN, MultiheadAttention, Transformer,
 7 |                           TransformerDecoder, TransformerDecoderLayer,
 8 |                           TransformerEncoder, TransformerEncoderLayer)
 9 | 
10 | __all__ = [
11 |     'ResLayer', 'gaussian_radius', 'gen_gaussian_target', 'MultiheadAttention',
12 |     'FFN', 'TransformerEncoderLayer', 'TransformerEncoder',
13 |     'TransformerDecoderLayer', 'TransformerDecoder', 'Transformer',
14 |     'build_transformer', 'build_positional_encoding', 'SinePositionalEncoding',
15 |     'LearnedPositionalEncoding'
16 | ]
17 | 


--------------------------------------------------------------------------------
/radet/models/losses/__init__.py:
--------------------------------------------------------------------------------
 1 | from .accuracy import Accuracy, accuracy
 2 | from .cross_entropy_loss import (CrossEntropyLoss, binary_cross_entropy,
 3 |                                 cross_entropy, mask_cross_entropy)
 4 | from .focal_loss import FocalLoss, sigmoid_focal_loss
 5 | from .iou_loss import (BoundedIoULoss, CIoULoss, DIoULoss, GIoULoss, IoULoss,
 6 |                     bounded_iou_loss, iou_loss)
 7 | from .smooth_l1_loss import L1Loss, SmoothL1Loss, l1_loss, smooth_l1_loss
 8 | from .utils import reduce_loss, weight_reduce_loss, weighted_loss
 9 | 
10 | __all__ = [
11 |     'accuracy', 'Accuracy', 'cross_entropy', 'binary_cross_entropy',
12 |     'mask_cross_entropy', 'CrossEntropyLoss', 'sigmoid_focal_loss',
13 |     'FocalLoss', 'smooth_l1_loss', 'SmoothL1Loss', 
14 |     'iou_loss', 'bounded_iou_loss', 'l1_loss', 
15 |     'IoULoss', 'BoundedIoULoss', 'GIoULoss', 'DIoULoss', 'CIoULoss',
16 |     'reduce_loss', 'weight_reduce_loss', 'weighted_loss', 'L1Loss',
17 | ]
18 | 


--------------------------------------------------------------------------------
/radet/__init__.py:
--------------------------------------------------------------------------------
 1 | import mmcv
 2 | 
 3 | from .version import __version__, short_version
 4 | 
 5 | 
 6 | 
 7 | def digit_version(version_str):
 8 |     digit_version = []
 9 |     for x in version_str.split('.'):
10 |         if x.isdigit():
11 |             digit_version.append(int(x))
12 |         elif x.find('rc') != -1:
13 |             patch_version = x.split('rc')
14 |             digit_version.append(int(patch_version[0]) - 1)
15 |             digit_version.append(int(patch_version[1]))
16 |     return digit_version
17 | 
18 | 
19 | mmcv_minimum_version = '1.2.4'
20 | mmcv_maximum_version = '1.3.20'
21 | mmcv_version = digit_version(mmcv.__version__)
22 | 
23 | 
24 | assert (mmcv_version >= digit_version(mmcv_minimum_version)
25 |         and mmcv_version <= digit_version(mmcv_maximum_version)), \
26 |     f'MMCV=={mmcv.__version__} is used but incompatible. ' \
27 |     f'Please install mmcv>={mmcv_minimum_version}, <={mmcv_maximum_version}.'
28 | 
29 | __all__ = ['__version__', 'short_version']
30 | 


--------------------------------------------------------------------------------
/tools/collect_bop_imagelist.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import argparse
 3 | 
 4 | 
 5 | def parse_args():
 6 |     parser = argparse.ArgumentParser()
 7 |     parser.add_argument('bop_test_json', type=str)
 8 |     parser.add_argument('save_path', type=str)
 9 |     parser.add_argument('--ext', default='png', type=str)
10 |     args = parser.parse_args()
11 |     return args
12 | 
13 | 
14 | 
15 | if __name__ == '__main__':
16 |     args = parse_args()
17 |     bop_test_json, save_path, ext = args.bop_test_json, args.save_path, args.ext
18 |     with open(bop_test_json, 'r') as f:
19 |         bop_test = json.load(f)
20 |     image_paths = []
21 |     for obj in bop_test:
22 |         im_id, scene_id = obj['im_id'], obj['scene_id']
23 |         image_path = f"{int(scene_id):06d}/rgb/{int(im_id):06d}.{ext}"
24 |         if image_path in image_paths:
25 |             continue
26 |         else:
27 |             image_paths.append(image_path)
28 |     print(f"total {len(image_paths)} founded")
29 |     with open(save_path, 'w') as f:
30 |         f.writelines([p+'\n' for p in image_paths])
31 | 
32 | 


--------------------------------------------------------------------------------
/radet/datasets/kitti.py:
--------------------------------------------------------------------------------
 1 | from .builder import DATASETS
 2 | from .coco import CocoDataset
 3 | 
 4 | @DATASETS.register_module()
 5 | class KittiDataset(CocoDataset):
 6 |     CLASSES = ('Car', 'Van', 'Truck', 'Pedestrian', 'Person_sitting',
 7 |                'Cyclist', 'Tram', 'Misc')
 8 | 
 9 |     def evaluate(self,
10 |                  results,
11 |                  metric='bbox',
12 |                  logger=None,
13 |                  jsonfile_prefix=None,
14 |                  classwise=False,
15 |                  proposal_nums=(100, 300, 1000),
16 |                  iou_thrs=None,
17 |                  metric_items=None):
18 |         return super(KittiDataset, self).evaluate(results=results,
19 |                                                   metric=metric,
20 |                                                   logger=logger,
21 |                                                   jsonfile_prefix=jsonfile_prefix,
22 |                                                   classwise=True,
23 |                                                   proposal_nums=proposal_nums,
24 |                                                   iou_thrs=iou_thrs,
25 |                                                   metric_items=metric_items)
26 | 
27 | 


--------------------------------------------------------------------------------
/radet/datasets/samplers/distributed_sampler.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | 
 3 | import torch
 4 | from torch.utils.data import DistributedSampler as _DistributedSampler
 5 | 
 6 | 
 7 | class DistributedSampler(_DistributedSampler):
 8 | 
 9 |     def __init__(self, dataset, num_replicas=None, rank=None, shuffle=True):
10 |         super().__init__(dataset, num_replicas=num_replicas, rank=rank)
11 |         self.shuffle = shuffle
12 | 
13 |     def __iter__(self):
14 |         # deterministically shuffle based on epoch
15 |         if self.shuffle:
16 |             g = torch.Generator()
17 |             g.manual_seed(self.epoch)
18 |             indices = torch.randperm(len(self.dataset), generator=g).tolist()
19 |         else:
20 |             indices = torch.arange(len(self.dataset)).tolist()
21 | 
22 |         # add extra samples to make it evenly divisible
23 |         # in case that indices is shorter than half of total_size
24 |         indices = (indices *
25 |                    math.ceil(self.total_size / len(indices)))[:self.total_size]
26 |         assert len(indices) == self.total_size
27 | 
28 |         # subsample
29 |         indices = indices[self.rank:self.total_size:self.num_replicas]
30 |         assert len(indices) == self.num_samples
31 | 
32 |         return iter(indices)
33 | 


--------------------------------------------------------------------------------
/radet/datasets/__init__.py:
--------------------------------------------------------------------------------
 1 | from .builder import DATASETS, PIPELINES, build_dataloader, build_dataset
 2 | from .cityscapes import CityscapesDataset
 3 | from .coco import CocoDataset
 4 | from .custom import CustomDataset
 5 | from .dataset_wrappers import (ClassBalancedDataset, ConcatDataset, MixDataset,
 6 |                                RepeatDataset)
 7 | from .deepfashion import DeepFashionDataset
 8 | from .lvis import LVISDataset, LVISV1Dataset, LVISV05Dataset
 9 | from .samplers import DistributedGroupSampler, DistributedSampler, GroupSampler
10 | from .utils import replace_ImageToTensor
11 | from .voc import VOCDataset
12 | from .wider_face import WIDERFaceDataset
13 | from .xml_style import XMLDataset
14 | from .ycbv import YcbvDataset
15 | from .kitti import KittiDataset
16 | from .bop import BOPDataset
17 | 
18 | __all__ = [
19 |     'CustomDataset', 'XMLDataset', 'CocoDataset', 'DeepFashionDataset',
20 |     'VOCDataset', 'CityscapesDataset', 'LVISDataset', 'LVISV05Dataset',
21 |     'LVISV1Dataset', 'GroupSampler', 'DistributedGroupSampler',
22 |     'DistributedSampler', 'build_dataloader', 'ConcatDataset', 'RepeatDataset',
23 |     'ClassBalancedDataset', 'WIDERFaceDataset', 'DATASETS', 'PIPELINES',
24 |     'build_dataset', 'replace_ImageToTensor', 'YcbvDataset', 'KittiDataset'
25 | ]


--------------------------------------------------------------------------------
/radet/models/detectors/radet.py:
--------------------------------------------------------------------------------
 1 | from ..builder import DETECTORS
 2 | from .single_stage import SingleStageDetector
 3 | 
 4 | 
 5 | @DETECTORS.register_module()
 6 | class RADet(SingleStageDetector):
 7 |     """Implementation of `ATSS <https://arxiv.org/abs/1912.02424>`_."""
 8 | 
 9 |     def __init__(self,
10 |                  backbone,
11 |                  neck,
12 |                  bbox_head,
13 |                  train_cfg=None,
14 |                  test_cfg=None,
15 |                  pretrained=None):
16 |         super(RADet, self).__init__(backbone, neck, bbox_head, train_cfg,
17 |                                    test_cfg, pretrained)
18 | 
19 |     def forward_train(self,
20 |                       img,
21 |                       img_metas,
22 |                       gt_bboxes,
23 |                       gt_labels,
24 |                       points_to_gt_index,
25 |                       points_weight,
26 |                       gt_bboxes_ignore=None):
27 |         super(SingleStageDetector, self).forward_train(img, img_metas)
28 |         x = self.extract_feat(img)
29 |         losses = self.bbox_head.forward_train(x, img_metas, gt_bboxes,
30 |                                               gt_labels, points_to_gt_index, points_weight,
31 |                                               gt_bboxes_ignore)
32 |         return losses
33 | 


--------------------------------------------------------------------------------
/radet/core/utils/misc.py:
--------------------------------------------------------------------------------
 1 | from functools import partial
 2 | 
 3 | import torch
 4 | from six.moves import map, zip
 5 | 
 6 | 
 7 | def multi_apply(func, *args, **kwargs):
 8 |     """Apply function to a list of arguments.
 9 | 
10 |     Note:
11 |         This function applies the ``func`` to multiple inputs and
12 |         map the multiple outputs of the ``func`` into different
13 |         list. Each list contains the same type of outputs corresponding
14 |         to different inputs.
15 | 
16 |     Args:
17 |         func (Function): A function that will be applied to a list of
18 |             arguments
19 | 
20 |     Returns:
21 |         tuple(list): A tuple containing multiple list, each list contains \
22 |             a kind of returned results by the function
23 |     """
24 |     pfunc = partial(func, **kwargs) if kwargs else func
25 |     map_results = map(pfunc, *args)
26 |     return tuple(map(list, zip(*map_results)))
27 | 
28 | 
29 | def unmap(data, count, inds, fill=0):
30 |     """Unmap a subset of item (data) back to the original set of items (of size
31 |     count)"""
32 |     if data.dim() == 1:
33 |         ret = data.new_full((count, ), fill)
34 |         ret[inds.type(torch.bool)] = data
35 |     else:
36 |         new_size = (count, ) + data.size()[1:]
37 |         ret = data.new_full(new_size, fill)
38 |         ret[inds.type(torch.bool), :] = data
39 |     return ret
40 | 


--------------------------------------------------------------------------------
/radet/utils/profiling.py:
--------------------------------------------------------------------------------
 1 | import contextlib
 2 | import sys
 3 | import time
 4 | 
 5 | import torch
 6 | 
 7 | if sys.version_info >= (3, 7):
 8 | 
 9 |     @contextlib.contextmanager
10 |     def profile_time(trace_name,
11 |                      name,
12 |                      enabled=True,
13 |                      stream=None,
14 |                      end_stream=None):
15 |         """Print time spent by CPU and GPU.
16 | 
17 |         Useful as a temporary context manager to find sweet spots of code
18 |         suitable for async implementation.
19 |         """
20 |         if (not enabled) or not torch.cuda.is_available():
21 |             yield
22 |             return
23 |         stream = stream if stream else torch.cuda.current_stream()
24 |         end_stream = end_stream if end_stream else stream
25 |         start = torch.cuda.Event(enable_timing=True)
26 |         end = torch.cuda.Event(enable_timing=True)
27 |         stream.record_event(start)
28 |         try:
29 |             cpu_start = time.monotonic()
30 |             yield
31 |         finally:
32 |             cpu_end = time.monotonic()
33 |             end_stream.record_event(end)
34 |             end.synchronize()
35 |             cpu_time = (cpu_end - cpu_start) * 1000
36 |             gpu_time = start.elapsed_time(end)
37 |             msg = f'{trace_name} {name} cpu_time {cpu_time:.2f} ms '
38 |             msg += f'gpu_time {gpu_time:.2f} ms stream {stream}'
39 |             print(msg, end_stream)
40 | 


--------------------------------------------------------------------------------
/radet/core/anchor/point_generator.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from .builder import ANCHOR_GENERATORS
 4 | 
 5 | 
 6 | @ANCHOR_GENERATORS.register_module()
 7 | class PointGenerator(object):
 8 | 
 9 |     def _meshgrid(self, x, y, row_major=True):
10 |         xx = x.repeat(len(y))
11 |         yy = y.view(-1, 1).repeat(1, len(x)).view(-1)
12 |         if row_major:
13 |             return xx, yy
14 |         else:
15 |             return yy, xx
16 | 
17 |     def grid_points(self, featmap_size, stride=16, device='cuda'):
18 |         feat_h, feat_w = featmap_size
19 |         shift_x = torch.arange(0., feat_w, device=device) * stride
20 |         shift_y = torch.arange(0., feat_h, device=device) * stride
21 |         shift_xx, shift_yy = self._meshgrid(shift_x, shift_y)
22 |         stride = shift_x.new_full((shift_xx.shape[0], ), stride)
23 |         shifts = torch.stack([shift_xx, shift_yy, stride], dim=-1)
24 |         all_points = shifts.to(device)
25 |         return all_points
26 | 
27 |     def valid_flags(self, featmap_size, valid_size, device='cuda'):
28 |         feat_h, feat_w = featmap_size
29 |         valid_h, valid_w = valid_size
30 |         assert valid_h <= feat_h and valid_w <= feat_w
31 |         valid_x = torch.zeros(feat_w, dtype=torch.bool, device=device)
32 |         valid_y = torch.zeros(feat_h, dtype=torch.bool, device=device)
33 |         valid_x[:valid_w] = 1
34 |         valid_y[:valid_h] = 1
35 |         valid_xx, valid_yy = self._meshgrid(valid_x, valid_y)
36 |         valid = valid_xx & valid_yy
37 |         return valid
38 | 


--------------------------------------------------------------------------------
/tools/coco_to_bop.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import argparse
 3 | import os 
 4 | from os import path as osp
 5 | import mmcv
 6 | 
 7 | 
 8 | def parse_args():
 9 |     parser = argparse.ArgumentParser(description='Convert coco format to bop format')
10 |     parser.add_argument('json_path', type=str)
11 |     parser.add_argument('save_dir', type=str)
12 |     args = parser.parse_args()
13 |     return args 
14 | 
15 | if __name__ == '__main__':
16 |     args = parse_args()
17 |     json_path, save_dir = args.json_path, args.save_dir
18 |     with open(json_path, 'r') as f:
19 |         json_results = json.load(f)
20 |     convert_results = dict()
21 |     for result in json_results:
22 |         scene_id, image_id = result['scene_id'], result['image_id']
23 |         category_id = result['category_id']
24 |         bbox = result['bbox']
25 |         score = result['score']
26 |         if scene_id not in convert_results:
27 |             convert_results[scene_id] = dict()
28 |         if str(image_id) not in convert_results[scene_id]:
29 |             convert_results[scene_id][str(image_id)] = []
30 |         convert_results[scene_id][str(image_id)].append(
31 |             dict(
32 |                 bbox_obj=bbox,
33 |                 obj_id=category_id,
34 |                 score=score,
35 |             )
36 |         )
37 |     
38 |     for scene_id in convert_results:
39 |         save_path = osp.join(save_dir, f"{scene_id:06d}", "scene_gt_info.json")
40 |         os.makedirs(osp.dirname(save_path), exist_ok=True)
41 |         mmcv.dump(convert_results[scene_id], save_path)
42 | 


--------------------------------------------------------------------------------
/radet/core/bbox/samplers/pseudo_sampler.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from ..builder import BBOX_SAMPLERS
 4 | from .base_sampler import BaseSampler
 5 | from .sampling_result import SamplingResult
 6 | 
 7 | 
 8 | @BBOX_SAMPLERS.register_module()
 9 | class PseudoSampler(BaseSampler):
10 |     """A pseudo sampler that does not do sampling actually."""
11 | 
12 |     def __init__(self, **kwargs):
13 |         pass
14 | 
15 |     def _sample_pos(self, **kwargs):
16 |         """Sample positive samples."""
17 |         raise NotImplementedError
18 | 
19 |     def _sample_neg(self, **kwargs):
20 |         """Sample negative samples."""
21 |         raise NotImplementedError
22 | 
23 |     def sample(self, assign_result, bboxes, gt_bboxes, **kwargs):
24 |         """Directly returns the positive and negative indices  of samples.
25 | 
26 |         Args:
27 |             assign_result (:obj:`AssignResult`): Assigned results
28 |             bboxes (torch.Tensor): Bounding boxes
29 |             gt_bboxes (torch.Tensor): Ground truth boxes
30 | 
31 |         Returns:
32 |             :obj:`SamplingResult`: sampler results
33 |         """
34 |         pos_inds = torch.nonzero(
35 |             assign_result.gt_inds > 0, as_tuple=False).squeeze(-1).unique()
36 |         neg_inds = torch.nonzero(
37 |             assign_result.gt_inds == 0, as_tuple=False).squeeze(-1).unique()
38 |         gt_flags = bboxes.new_zeros(bboxes.shape[0], dtype=torch.uint8)
39 |         sampling_result = SamplingResult(pos_inds, neg_inds, bboxes, gt_bboxes,
40 |                                          assign_result, gt_flags)
41 |         return sampling_result
42 | 


--------------------------------------------------------------------------------
/radet/models/builder.py:
--------------------------------------------------------------------------------
 1 | from mmcv.utils import Registry, build_from_cfg
 2 | from torch import nn
 3 | 
 4 | BACKBONES = Registry('backbone')
 5 | NECKS = Registry('neck')
 6 | HEADS = Registry('head')
 7 | LOSSES = Registry('loss')
 8 | DETECTORS = Registry('detector')
 9 | 
10 | 
11 | def build(cfg, registry, default_args=None):
12 |     """Build a module.
13 | 
14 |     Args:
15 |         cfg (dict, list[dict]): The config of modules, is is either a dict
16 |             or a list of configs.
17 |         registry (:obj:`Registry`): A registry the module belongs to.
18 |         default_args (dict, optional): Default arguments to build the module.
19 |             Defaults to None.
20 | 
21 |     Returns:
22 |         nn.Module: A built nn module.
23 |     """
24 |     if isinstance(cfg, list):
25 |         modules = [
26 |             build_from_cfg(cfg_, registry, default_args) for cfg_ in cfg
27 |         ]
28 |         return nn.Sequential(*modules)
29 |     else:
30 |         return build_from_cfg(cfg, registry, default_args)
31 | 
32 | 
33 | def build_backbone(cfg):
34 |     """Build backbone."""
35 |     return build(cfg, BACKBONES)
36 | 
37 | 
38 | def build_neck(cfg):
39 |     """Build neck."""
40 |     return build(cfg, NECKS)
41 | 
42 | 
43 | 
44 | 
45 | def build_head(cfg):
46 |     """Build head."""
47 |     return build(cfg, HEADS)
48 | 
49 | 
50 | def build_loss(cfg):
51 |     """Build loss."""
52 |     return build(cfg, LOSSES)
53 | 
54 | 
55 | def build_detector(cfg, train_cfg=None, test_cfg=None):
56 |     """Build detector."""
57 |     return build(cfg, DETECTORS, dict(train_cfg=train_cfg, test_cfg=test_cfg))
58 | 


--------------------------------------------------------------------------------
/radet/core/bbox/__init__.py:
--------------------------------------------------------------------------------
 1 | from .assigners import (AssignResult, BaseAssigner, CenterRegionAssigner,
 2 |                         MaxIoUAssigner, RegionAssigner)
 3 | from .builder import build_assigner, build_bbox_coder, build_sampler
 4 | from .coder import (BaseBBoxCoder, DeltaXYWHBBoxCoder, PseudoBBoxCoder,
 5 |                     TBLRBBoxCoder)
 6 | from .iou_calculators import BboxOverlaps2D, bbox_overlaps
 7 | from .samplers import (BaseSampler, CombinedSampler,
 8 |                        InstanceBalancedPosSampler, IoUBalancedNegSampler,
 9 |                        OHEMSampler, PseudoSampler, RandomSampler,
10 |                        SamplingResult, ScoreHLRSampler)
11 | from .transforms import (bbox2distance, bbox2result, bbox2roi,
12 |                          bbox_cxcywh_to_xyxy, bbox_flip, bbox_mapping,
13 |                          bbox_mapping_back, bbox_rescale, bbox_xyxy_to_cxcywh,
14 |                          distance2bbox, roi2bbox)
15 | 
16 | __all__ = [
17 |     'bbox_overlaps', 'BboxOverlaps2D', 'BaseAssigner', 'MaxIoUAssigner',
18 |     'AssignResult', 'BaseSampler', 'PseudoSampler', 'RandomSampler',
19 |     'InstanceBalancedPosSampler', 'IoUBalancedNegSampler', 'CombinedSampler',
20 |     'OHEMSampler', 'SamplingResult', 'ScoreHLRSampler', 'build_assigner',
21 |     'build_sampler', 'bbox_flip', 'bbox_mapping', 'bbox_mapping_back',
22 |     'bbox2roi', 'roi2bbox', 'bbox2result', 'distance2bbox', 'bbox2distance',
23 |     'build_bbox_coder', 'BaseBBoxCoder', 'PseudoBBoxCoder',
24 |     'DeltaXYWHBBoxCoder', 'TBLRBBoxCoder', 'CenterRegionAssigner',
25 |     'bbox_rescale', 'bbox_cxcywh_to_xyxy', 'bbox_xyxy_to_cxcywh',
26 |     'RegionAssigner'
27 | ]
28 | 


--------------------------------------------------------------------------------
/radet/datasets/pipelines/compose.py:
--------------------------------------------------------------------------------
 1 | import collections
 2 | 
 3 | from mmcv.utils import build_from_cfg
 4 | 
 5 | from ..builder import PIPELINES
 6 | 
 7 | 
 8 | @PIPELINES.register_module()
 9 | class Compose(object):
10 |     """Compose multiple transforms sequentially.
11 | 
12 |     Args:
13 |         transforms (Sequence[dict | callable]): Sequence of transform object or
14 |             config dict to be composed.
15 |     """
16 | 
17 |     def __init__(self, transforms):
18 |         assert isinstance(transforms, collections.abc.Sequence)
19 |         self.transforms = []
20 |         for transform in transforms:
21 |             if isinstance(transform, dict):
22 |                 transform = build_from_cfg(transform, PIPELINES)
23 |                 self.transforms.append(transform)
24 |             elif callable(transform):
25 |                 self.transforms.append(transform)
26 |             else:
27 |                 raise TypeError('transform must be callable or a dict')
28 | 
29 |     def __call__(self, data):
30 |         """Call function to apply transforms sequentially.
31 | 
32 |         Args:
33 |             data (dict): A result dict contains the data to transform.
34 | 
35 |         Returns:
36 |            dict: Transformed data.
37 |         """
38 | 
39 |         for t in self.transforms:
40 |             data = t(data)
41 |             if data is None:
42 |                 return None
43 |         return data
44 | 
45 |     def __repr__(self):
46 |         format_string = self.__class__.__name__ + '('
47 |         for t in self.transforms:
48 |             format_string += '\n'
49 |             format_string += f'    {t}'
50 |         format_string += '\n)'
51 |         return format_string
52 | 


--------------------------------------------------------------------------------
/radet/datasets/wider_face.py:
--------------------------------------------------------------------------------
 1 | import os.path as osp
 2 | import xml.etree.ElementTree as ET
 3 | 
 4 | import mmcv
 5 | 
 6 | from .builder import DATASETS
 7 | from .xml_style import XMLDataset
 8 | 
 9 | 
10 | @DATASETS.register_module()
11 | class WIDERFaceDataset(XMLDataset):
12 |     """Reader for the WIDER Face dataset in PASCAL VOC format.
13 | 
14 |     Conversion scripts can be found in
15 |     https://github.com/sovrasov/wider-face-pascal-voc-annotations
16 |     """
17 |     CLASSES = ('face', )
18 | 
19 |     def __init__(self, **kwargs):
20 |         super(WIDERFaceDataset, self).__init__(**kwargs)
21 | 
22 |     def load_annotations(self, ann_file):
23 |         """Load annotation from WIDERFace XML style annotation file.
24 | 
25 |         Args:
26 |             ann_file (str): Path of XML file.
27 | 
28 |         Returns:
29 |             list[dict]: Annotation info from XML file.
30 |         """
31 | 
32 |         data_infos = []
33 |         img_ids = mmcv.list_from_file(ann_file)
34 |         for img_id in img_ids:
35 |             filename = f'{img_id}.jpg'
36 |             xml_path = osp.join(self.img_prefix, 'Annotations',
37 |                                 f'{img_id}.xml')
38 |             tree = ET.parse(xml_path)
39 |             root = tree.getroot()
40 |             size = root.find('size')
41 |             width = int(size.find('width').text)
42 |             height = int(size.find('height').text)
43 |             folder = root.find('folder').text
44 |             data_infos.append(
45 |                 dict(
46 |                     id=img_id,
47 |                     filename=osp.join(folder, filename),
48 |                     width=width,
49 |                     height=height))
50 | 
51 |         return data_infos
52 | 


--------------------------------------------------------------------------------
/radet/core/fp16/deprecated_fp16_utils.py:
--------------------------------------------------------------------------------
 1 | import warnings
 2 | 
 3 | from mmcv.runner import (Fp16OptimizerHook, auto_fp16, force_fp32,
 4 |                          wrap_fp16_model)
 5 | 
 6 | 
 7 | class DeprecatedFp16OptimizerHook(Fp16OptimizerHook):
 8 |     """A wrapper class for the FP16 optimizer hook. This class wraps
 9 |     :class:`Fp16OptimizerHook` in `mmcv.runner` and shows a warning that the
10 |     :class:`Fp16OptimizerHook` from `mmdet.core` will be deprecated.
11 | 
12 |     Refer to :class:`Fp16OptimizerHook` in `mmcv.runner` for more details.
13 | 
14 |     Args:
15 |         loss_scale (float): Scale factor multiplied with loss.
16 |     """
17 | 
18 |     def __init__(*args, **kwargs):
19 |         super().__init__(*args, **kwargs)
20 |         warnings.warn(
21 |             'Importing Fp16OptimizerHook from "mmdet.core" will be '
22 |             'deprecated in the future. Please import them from "mmcv.runner" '
23 |             'instead')
24 | 
25 | 
26 | def deprecated_auto_fp16(*args, **kwargs):
27 |     warnings.warn(
28 |         'Importing auto_fp16 from "mmdet.core" will be '
29 |         'deprecated in the future. Please import them from "mmcv.runner" '
30 |         'instead')
31 |     return auto_fp16(*args, **kwargs)
32 | 
33 | 
34 | def deprecated_force_fp32(*args, **kwargs):
35 |     warnings.warn(
36 |         'Importing force_fp32 from "mmdet.core" will be '
37 |         'deprecated in the future. Please import them from "mmcv.runner" '
38 |         'instead')
39 |     return force_fp32(*args, **kwargs)
40 | 
41 | 
42 | def deprecated_wrap_fp16_model(*args, **kwargs):
43 |     warnings.warn(
44 |         'Importing wrap_fp16_model from "mmdet.core" will be '
45 |         'deprecated in the future. Please import them from "mmcv.runner" '
46 |         'instead')
47 |     wrap_fp16_model(*args, **kwargs)
48 | 


--------------------------------------------------------------------------------
/radet/core/evaluation/bbox_overlaps.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | def bbox_overlaps(bboxes1, bboxes2, mode='iou', eps=1e-6):
 5 |     """Calculate the ious between each bbox of bboxes1 and bboxes2.
 6 | 
 7 |     Args:
 8 |         bboxes1(ndarray): shape (n, 4)
 9 |         bboxes2(ndarray): shape (k, 4)
10 |         mode(str): iou (intersection over union) or iof (intersection
11 |             over foreground)
12 | 
13 |     Returns:
14 |         ious(ndarray): shape (n, k)
15 |     """
16 | 
17 |     assert mode in ['iou', 'iof']
18 | 
19 |     bboxes1 = bboxes1.astype(np.float32)
20 |     bboxes2 = bboxes2.astype(np.float32)
21 |     rows = bboxes1.shape[0]
22 |     cols = bboxes2.shape[0]
23 |     ious = np.zeros((rows, cols), dtype=np.float32)
24 |     if rows * cols == 0:
25 |         return ious
26 |     exchange = False
27 |     if bboxes1.shape[0] > bboxes2.shape[0]:
28 |         bboxes1, bboxes2 = bboxes2, bboxes1
29 |         ious = np.zeros((cols, rows), dtype=np.float32)
30 |         exchange = True
31 |     area1 = (bboxes1[:, 2] - bboxes1[:, 0]) * (bboxes1[:, 3] - bboxes1[:, 1])
32 |     area2 = (bboxes2[:, 2] - bboxes2[:, 0]) * (bboxes2[:, 3] - bboxes2[:, 1])
33 |     for i in range(bboxes1.shape[0]):
34 |         x_start = np.maximum(bboxes1[i, 0], bboxes2[:, 0])
35 |         y_start = np.maximum(bboxes1[i, 1], bboxes2[:, 1])
36 |         x_end = np.minimum(bboxes1[i, 2], bboxes2[:, 2])
37 |         y_end = np.minimum(bboxes1[i, 3], bboxes2[:, 3])
38 |         overlap = np.maximum(x_end - x_start, 0) * np.maximum(
39 |             y_end - y_start, 0)
40 |         if mode == 'iou':
41 |             union = area1[i] + area2 - overlap
42 |         else:
43 |             union = area1[i] if not exchange else area2
44 |         union = np.maximum(union, eps)
45 |         ious[i, :] = overlap / union
46 |     if exchange:
47 |         ious = ious.T
48 |     return ious
49 | 


--------------------------------------------------------------------------------
/radet/datasets/pipelines/__init__.py:
--------------------------------------------------------------------------------
 1 | from .auto_augment import (AutoAugment, BrightnessTransform, ColorTransform,
 2 |                            ContrastTransform, EqualizeTransform, Rotate, Shear,
 3 |                            Translate)
 4 | from .compose import Compose
 5 | from .formating import (Collect, DefaultFormatBundle, ImageToTensor,
 6 |                         ToDataContainer, ToTensor, Transpose, to_tensor)
 7 | from .instaboost import InstaBoost
 8 | from .loading import (LoadAnnotations, LoadImageFromFile, LoadImageFromWebcam,
 9 |                       LoadMultiChannelImageFromFiles, LoadProposals, GenerateDistanceMap,
10 |                       LoadMaskFromFile)
11 | from .test_time_aug import MultiScaleFlipAug
12 | from .transforms import (Albu, CutOut, Expand, MinIoURandomCrop, Normalize,
13 |                          Pad, PhotoMetricDistortion, RandomCenterCropPad,
14 |                          RandomCrop, RandomFlip, Resize, SegRescale)
15 | from .color_aug import (RandomHSV, RandomNoise, RandomSmooth, RandomBackground, CosyPoseAug, 
16 |                         PillowBrightness, PillowColor, PillowContrast, PillowRGBAugmentation, PillowSharpness, PillowBlur)
17 | from .label_assignment import LabelAssignment, LabelAssignmentParallel
18 | 
19 | __all__ = [
20 |     'Compose', 'to_tensor', 'ToTensor', 'ImageToTensor', 'ToDataContainer',
21 |     'Transpose', 'Collect', 'DefaultFormatBundle', 'LoadAnnotations',
22 |     'LoadImageFromFile', 'LoadImageFromWebcam',
23 |     'LoadMultiChannelImageFromFiles', 'LoadProposals', 'MultiScaleFlipAug',
24 |     'Resize', 'RandomFlip', 'Pad', 'RandomCrop', 'Normalize', 'SegRescale',
25 |     'MinIoURandomCrop', 'Expand', 'PhotoMetricDistortion', 'Albu',
26 |     'InstaBoost', 'RandomCenterCropPad', 'AutoAugment', 'CutOut', 'Shear',
27 |     'Rotate', 'ColorTransform', 'EqualizeTransform', 'BrightnessTransform',
28 |     'ContrastTransform', 'Translate', 'GenerateDistanceMap', 'LoadMaskFromFile'
29 | ]
30 | 


--------------------------------------------------------------------------------
/radet/core/bbox/demodata.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | 
 4 | 
 5 | def ensure_rng(rng=None):
 6 |     """Simple version of the ``kwarray.ensure_rng``
 7 | 
 8 |     Args:
 9 |         rng (int | numpy.random.RandomState | None):
10 |             if None, then defaults to the global rng. Otherwise this can be an
11 |             integer or a RandomState class
12 |     Returns:
13 |         (numpy.random.RandomState) : rng -
14 |             a numpy random number generator
15 | 
16 |     References:
17 |         https://gitlab.kitware.com/computer-vision/kwarray/blob/master/kwarray/util_random.py#L270
18 |     """
19 | 
20 |     if rng is None:
21 |         rng = np.random.mtrand._rand
22 |     elif isinstance(rng, int):
23 |         rng = np.random.RandomState(rng)
24 |     else:
25 |         rng = rng
26 |     return rng
27 | 
28 | 
29 | def random_boxes(num=1, scale=1, rng=None):
30 |     """Simple version of ``kwimage.Boxes.random``
31 | 
32 |     Returns:
33 |         Tensor: shape (n, 4) in x1, y1, x2, y2 format.
34 | 
35 |     References:
36 |         https://gitlab.kitware.com/computer-vision/kwimage/blob/master/kwimage/structs/boxes.py#L1390
37 | 
38 |     Example:
39 |         >>> num = 3
40 |         >>> scale = 512
41 |         >>> rng = 0
42 |         >>> boxes = random_boxes(num, scale, rng)
43 |         >>> print(boxes)
44 |         tensor([[280.9925, 278.9802, 308.6148, 366.1769],
45 |                 [216.9113, 330.6978, 224.0446, 456.5878],
46 |                 [405.3632, 196.3221, 493.3953, 270.7942]])
47 |     """
48 |     rng = ensure_rng(rng)
49 | 
50 |     tlbr = rng.rand(num, 4).astype(np.float32)
51 | 
52 |     tl_x = np.minimum(tlbr[:, 0], tlbr[:, 2])
53 |     tl_y = np.minimum(tlbr[:, 1], tlbr[:, 3])
54 |     br_x = np.maximum(tlbr[:, 0], tlbr[:, 2])
55 |     br_y = np.maximum(tlbr[:, 1], tlbr[:, 3])
56 | 
57 |     tlbr[:, 0] = tl_x * scale
58 |     tlbr[:, 1] = tl_y * scale
59 |     tlbr[:, 2] = br_x * scale
60 |     tlbr[:, 3] = br_y * scale
61 | 
62 |     boxes = torch.from_numpy(tlbr)
63 |     return boxes
64 | 


--------------------------------------------------------------------------------
/radet/models/dense_heads/base_dense_head.py:
--------------------------------------------------------------------------------
 1 | from abc import ABCMeta, abstractmethod
 2 | 
 3 | import torch.nn as nn
 4 | 
 5 | 
 6 | class BaseDenseHead(nn.Module, metaclass=ABCMeta):
 7 |     """Base class for DenseHeads."""
 8 | 
 9 |     def __init__(self):
10 |         super(BaseDenseHead, self).__init__()
11 | 
12 |     @abstractmethod
13 |     def loss(self, **kwargs):
14 |         """Compute losses of the head."""
15 |         pass
16 | 
17 |     @abstractmethod
18 |     def get_bboxes(self, **kwargs):
19 |         """Transform network output for a batch into bbox predictions."""
20 |         pass
21 | 
22 |     def forward_train(self,
23 |                       x,
24 |                       img_metas,
25 |                       gt_bboxes,
26 |                       gt_labels=None,
27 |                       gt_bboxes_ignore=None,
28 |                       proposal_cfg=None,
29 |                       **kwargs):
30 |         """
31 |         Args:
32 |             x (list[Tensor]): Features from FPN.
33 |             img_metas (list[dict]): Meta information of each image, e.g.,
34 |                 image size, scaling factor, etc.
35 |             gt_bboxes (Tensor): Ground truth bboxes of the image,
36 |                 shape (num_gts, 4).
37 |             gt_labels (Tensor): Ground truth labels of each box,
38 |                 shape (num_gts,).
39 |             gt_bboxes_ignore (Tensor): Ground truth bboxes to be
40 |                 ignored, shape (num_ignored_gts, 4).
41 |             proposal_cfg (mmcv.Config): Test / postprocessing configuration,
42 |                 if None, test_cfg would be used
43 | 
44 |         Returns:
45 |             tuple:
46 |                 losses: (dict[str, Tensor]): A dictionary of loss components.
47 |                 proposal_list (list[Tensor]): Proposals of each image.
48 |         """
49 |         outs = self(x)
50 |         if gt_labels is None:
51 |             loss_inputs = outs + (gt_bboxes, img_metas)
52 |         else:
53 |             loss_inputs = outs + (gt_bboxes, gt_labels, img_metas)
54 |         losses = self.loss(*loss_inputs, gt_bboxes_ignore=gt_bboxes_ignore)
55 |         if proposal_cfg is None:
56 |             return losses
57 |         else:
58 |             proposal_list = self.get_bboxes(*outs, img_metas, cfg=proposal_cfg)
59 |             return losses, proposal_list
60 | 


--------------------------------------------------------------------------------
/radet/core/bbox/samplers/instance_balanced_pos_sampler.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | 
 4 | from ..builder import BBOX_SAMPLERS
 5 | from .random_sampler import RandomSampler
 6 | 
 7 | 
 8 | @BBOX_SAMPLERS.register_module()
 9 | class InstanceBalancedPosSampler(RandomSampler):
10 |     """Instance balanced sampler that samples equal number of positive samples
11 |     for each instance."""
12 | 
13 |     def _sample_pos(self, assign_result, num_expected, **kwargs):
14 |         """Sample positive boxes.
15 | 
16 |         Args:
17 |             assign_result (:obj:`AssignResult`): The assigned results of boxes.
18 |             num_expected (int): The number of expected positive samples
19 | 
20 |         Returns:
21 |             Tensor or ndarray: sampled indices.
22 |         """
23 |         pos_inds = torch.nonzero(assign_result.gt_inds > 0, as_tuple=False)
24 |         if pos_inds.numel() != 0:
25 |             pos_inds = pos_inds.squeeze(1)
26 |         if pos_inds.numel() <= num_expected:
27 |             return pos_inds
28 |         else:
29 |             unique_gt_inds = assign_result.gt_inds[pos_inds].unique()
30 |             num_gts = len(unique_gt_inds)
31 |             num_per_gt = int(round(num_expected / float(num_gts)) + 1)
32 |             sampled_inds = []
33 |             for i in unique_gt_inds:
34 |                 inds = torch.nonzero(
35 |                     assign_result.gt_inds == i.item(), as_tuple=False)
36 |                 if inds.numel() != 0:
37 |                     inds = inds.squeeze(1)
38 |                 else:
39 |                     continue
40 |                 if len(inds) > num_per_gt:
41 |                     inds = self.random_choice(inds, num_per_gt)
42 |                 sampled_inds.append(inds)
43 |             sampled_inds = torch.cat(sampled_inds)
44 |             if len(sampled_inds) < num_expected:
45 |                 num_extra = num_expected - len(sampled_inds)
46 |                 extra_inds = np.array(
47 |                     list(set(pos_inds.cpu()) - set(sampled_inds.cpu())))
48 |                 if len(extra_inds) > num_extra:
49 |                     extra_inds = self.random_choice(extra_inds, num_extra)
50 |                 extra_inds = torch.from_numpy(extra_inds).to(
51 |                     assign_result.gt_inds.device).long()
52 |                 sampled_inds = torch.cat([sampled_inds, extra_inds])
53 |             elif len(sampled_inds) > num_expected:
54 |                 sampled_inds = self.random_choice(sampled_inds, num_expected)
55 |             return sampled_inds
56 | 


--------------------------------------------------------------------------------
/radet/core/mask/utils.py:
--------------------------------------------------------------------------------
 1 | import mmcv
 2 | import numpy as np
 3 | import pycocotools.mask as mask_util
 4 | 
 5 | 
 6 | def split_combined_polys(polys, poly_lens, polys_per_mask):
 7 |     """Split the combined 1-D polys into masks.
 8 | 
 9 |     A mask is represented as a list of polys, and a poly is represented as
10 |     a 1-D array. In dataset, all masks are concatenated into a single 1-D
11 |     tensor. Here we need to split the tensor into original representations.
12 | 
13 |     Args:
14 |         polys (list): a list (length = image num) of 1-D tensors
15 |         poly_lens (list): a list (length = image num) of poly length
16 |         polys_per_mask (list): a list (length = image num) of poly number
17 |             of each mask
18 | 
19 |     Returns:
20 |         list: a list (length = image num) of list (length = mask num) of \
21 |             list (length = poly num) of numpy array.
22 |     """
23 |     mask_polys_list = []
24 |     for img_id in range(len(polys)):
25 |         polys_single = polys[img_id]
26 |         polys_lens_single = poly_lens[img_id].tolist()
27 |         polys_per_mask_single = polys_per_mask[img_id].tolist()
28 | 
29 |         split_polys = mmcv.slice_list(polys_single, polys_lens_single)
30 |         mask_polys = mmcv.slice_list(split_polys, polys_per_mask_single)
31 |         mask_polys_list.append(mask_polys)
32 |     return mask_polys_list
33 | 
34 | 
35 | # TODO: move this function to more proper place
36 | def encode_mask_results(mask_results):
37 |     """Encode bitmap mask to RLE code.
38 | 
39 |     Args:
40 |         mask_results (list | tuple[list]): bitmap mask results.
41 |             In mask scoring rcnn, mask_results is a tuple of (segm_results,
42 |             segm_cls_score).
43 | 
44 |     Returns:
45 |         list | tuple: RLE encoded mask.
46 |     """
47 |     if isinstance(mask_results, tuple):  # mask scoring
48 |         cls_segms, cls_mask_scores = mask_results
49 |     else:
50 |         cls_segms = mask_results
51 |     num_classes = len(cls_segms)
52 |     encoded_mask_results = [[] for _ in range(num_classes)]
53 |     for i in range(len(cls_segms)):
54 |         for cls_segm in cls_segms[i]:
55 |             encoded_mask_results[i].append(
56 |                 mask_util.encode(
57 |                     np.array(
58 |                         cls_segm[:, :, np.newaxis], order='F',
59 |                         dtype='uint8'))[0])  # encoded with RLE
60 |     if isinstance(mask_results, tuple):
61 |         return encoded_mask_results, cls_mask_scores
62 |     else:
63 |         return encoded_mask_results
64 | 


--------------------------------------------------------------------------------
/radet/core/mask/mask_target.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | from torch.nn.modules.utils import _pair
 4 | 
 5 | 
 6 | def mask_target(pos_proposals_list, pos_assigned_gt_inds_list, gt_masks_list,
 7 |                 cfg):
 8 |     """Compute mask target for positive proposals in multiple images.
 9 | 
10 |     Args:
11 |         pos_proposals_list (list[Tensor]): Positive proposals in multiple
12 |             images.
13 |         pos_assigned_gt_inds_list (list[Tensor]): Assigned GT indices for each
14 |             positive proposals.
15 |         gt_masks_list (list[:obj:`BaseInstanceMasks`]): Ground truth masks of
16 |             each image.
17 |         cfg (dict): Config dict that specifies the mask size.
18 | 
19 |     Returns:
20 |         list[Tensor]: Mask target of each image.
21 |     """
22 |     cfg_list = [cfg for _ in range(len(pos_proposals_list))]
23 |     mask_targets = map(mask_target_single, pos_proposals_list,
24 |                        pos_assigned_gt_inds_list, gt_masks_list, cfg_list)
25 |     mask_targets = list(mask_targets)
26 |     if len(mask_targets) > 0:
27 |         mask_targets = torch.cat(mask_targets)
28 |     return mask_targets
29 | 
30 | 
31 | def mask_target_single(pos_proposals, pos_assigned_gt_inds, gt_masks, cfg):
32 |     """Compute mask target for each positive proposal in the image.
33 | 
34 |     Args:
35 |         pos_proposals (Tensor): Positive proposals.
36 |         pos_assigned_gt_inds (Tensor): Assigned GT inds of positive proposals.
37 |         gt_masks (:obj:`BaseInstanceMasks`): GT masks in the format of Bitmap
38 |             or Polygon.
39 |         cfg (dict): Config dict that indicate the mask size.
40 | 
41 |     Returns:
42 |         Tensor: Mask target of each positive proposals in the image.
43 |     """
44 |     device = pos_proposals.device
45 |     mask_size = _pair(cfg.mask_size)
46 |     num_pos = pos_proposals.size(0)
47 |     if num_pos > 0:
48 |         proposals_np = pos_proposals.cpu().numpy()
49 |         maxh, maxw = gt_masks.height, gt_masks.width
50 |         proposals_np[:, [0, 2]] = np.clip(proposals_np[:, [0, 2]], 0, maxw)
51 |         proposals_np[:, [1, 3]] = np.clip(proposals_np[:, [1, 3]], 0, maxh)
52 |         pos_assigned_gt_inds = pos_assigned_gt_inds.cpu().numpy()
53 | 
54 |         mask_targets = gt_masks.crop_and_resize(
55 |             proposals_np, mask_size, device=device,
56 |             inds=pos_assigned_gt_inds).to_ndarray()
57 | 
58 |         mask_targets = torch.from_numpy(mask_targets).float().to(device)
59 |     else:
60 |         mask_targets = pos_proposals.new_zeros((0, ) + mask_size)
61 | 
62 |     return mask_targets
63 | 


--------------------------------------------------------------------------------
/radet/core/utils/dist_utils.py:
--------------------------------------------------------------------------------
 1 | import warnings
 2 | from collections import OrderedDict
 3 | 
 4 | import torch.distributed as dist
 5 | from mmcv.runner import OptimizerHook
 6 | from torch._utils import (_flatten_dense_tensors, _take_tensors,
 7 |                           _unflatten_dense_tensors)
 8 | 
 9 | 
10 | def _allreduce_coalesced(tensors, world_size, bucket_size_mb=-1):
11 |     if bucket_size_mb > 0:
12 |         bucket_size_bytes = bucket_size_mb * 1024 * 1024
13 |         buckets = _take_tensors(tensors, bucket_size_bytes)
14 |     else:
15 |         buckets = OrderedDict()
16 |         for tensor in tensors:
17 |             tp = tensor.type()
18 |             if tp not in buckets:
19 |                 buckets[tp] = []
20 |             buckets[tp].append(tensor)
21 |         buckets = buckets.values()
22 | 
23 |     for bucket in buckets:
24 |         flat_tensors = _flatten_dense_tensors(bucket)
25 |         dist.all_reduce(flat_tensors)
26 |         flat_tensors.div_(world_size)
27 |         for tensor, synced in zip(
28 |                 bucket, _unflatten_dense_tensors(flat_tensors, bucket)):
29 |             tensor.copy_(synced)
30 | 
31 | 
32 | def allreduce_grads(params, coalesce=True, bucket_size_mb=-1):
33 |     """Allreduce gradients.
34 | 
35 |     Args:
36 |         params (list[torch.Parameters]): List of parameters of a model
37 |         coalesce (bool, optional): Whether allreduce parameters as a whole.
38 |             Defaults to True.
39 |         bucket_size_mb (int, optional): Size of bucket, the unit is MB.
40 |             Defaults to -1.
41 |     """
42 |     grads = [
43 |         param.grad.data for param in params
44 |         if param.requires_grad and param.grad is not None
45 |     ]
46 |     world_size = dist.get_world_size()
47 |     if coalesce:
48 |         _allreduce_coalesced(grads, world_size, bucket_size_mb)
49 |     else:
50 |         for tensor in grads:
51 |             dist.all_reduce(tensor.div_(world_size))
52 | 
53 | 
54 | class DistOptimizerHook(OptimizerHook):
55 |     """Deprecated optimizer hook for distributed training."""
56 | 
57 |     def __init__(self, *args, **kwargs):
58 |         warnings.warn('"DistOptimizerHook" is deprecated, please switch to'
59 |                       '"mmcv.runner.OptimizerHook".')
60 |         super().__init__(*args, **kwargs)
61 | 
62 | 
63 | def reduce_mean(tensor):
64 |     """"Obtain the mean of tensor on different GPUs."""
65 |     if not (dist.is_available() and dist.is_initialized()):
66 |         return tensor
67 |     tensor = tensor.clone()
68 |     dist.all_reduce(tensor.div_(dist.get_world_size()), op=dist.ReduceOp.SUM)
69 |     return tensor
70 | 


--------------------------------------------------------------------------------
/radet/datasets/utils.py:
--------------------------------------------------------------------------------
 1 | import copy
 2 | import warnings
 3 | 
 4 | 
 5 | def replace_ImageToTensor(pipelines):
 6 |     """Replace the ImageToTensor transform in a data pipeline to
 7 |     DefaultFormatBundle, which is normally useful in batch inference.
 8 | 
 9 |     Args:
10 |         pipelines (list[dict]): Data pipeline configs.
11 | 
12 |     Returns:
13 |         list: The new pipeline list with all ImageToTensor replaced by
14 |             DefaultFormatBundle.
15 | 
16 |     Examples:
17 |         >>> pipelines = [
18 |         ...    dict(type='LoadImageFromFile'),
19 |         ...    dict(
20 |         ...        type='MultiScaleFlipAug',
21 |         ...        img_scale=(1333, 800),
22 |         ...        flip=False,
23 |         ...        transforms=[
24 |         ...            dict(type='Resize', keep_ratio=True),
25 |         ...            dict(type='RandomFlip'),
26 |         ...            dict(type='Normalize', mean=[0, 0, 0], std=[1, 1, 1]),
27 |         ...            dict(type='Pad', size_divisor=32),
28 |         ...            dict(type='ImageToTensor', keys=['img']),
29 |         ...            dict(type='Collect', keys=['img']),
30 |         ...        ])
31 |         ...    ]
32 |         >>> expected_pipelines = [
33 |         ...    dict(type='LoadImageFromFile'),
34 |         ...    dict(
35 |         ...        type='MultiScaleFlipAug',
36 |         ...        img_scale=(1333, 800),
37 |         ...        flip=False,
38 |         ...        transforms=[
39 |         ...            dict(type='Resize', keep_ratio=True),
40 |         ...            dict(type='RandomFlip'),
41 |         ...            dict(type='Normalize', mean=[0, 0, 0], std=[1, 1, 1]),
42 |         ...            dict(type='Pad', size_divisor=32),
43 |         ...            dict(type='DefaultFormatBundle'),
44 |         ...            dict(type='Collect', keys=['img']),
45 |         ...        ])
46 |         ...    ]
47 |         >>> assert expected_pipelines == replace_ImageToTensor(pipelines)
48 |     """
49 |     pipelines = copy.deepcopy(pipelines)
50 |     for i, pipeline in enumerate(pipelines):
51 |         if pipeline['type'] == 'MultiScaleFlipAug':
52 |             assert 'transforms' in pipeline
53 |             pipeline['transforms'] = replace_ImageToTensor(
54 |                 pipeline['transforms'])
55 |         elif pipeline['type'] == 'ImageToTensor':
56 |             warnings.warn(
57 |                 '"ImageToTensor" pipeline is replaced by '
58 |                 '"DefaultFormatBundle" for batch inference. It is '
59 |                 'recommended to manually replace it in the test '
60 |                 'data pipeline in your config file.', UserWarning)
61 |             pipelines[i] = {'type': 'DefaultFormatBundle'}
62 |     return pipelines
63 | 


--------------------------------------------------------------------------------
/radet/core/anchor/utils.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | def images_to_levels(target, num_levels):
 5 |     """Convert targets by image to targets by feature level.
 6 | 
 7 |     [target_img0, target_img1] -> [target_level0, target_level1, ...]
 8 |     """
 9 |     target = torch.stack(target, 0)
10 |     level_targets = []
11 |     start = 0
12 |     for n in num_levels:
13 |         end = start + n
14 |         # level_targets.append(target[:, start:end].squeeze(0))
15 |         level_targets.append(target[:, start:end])
16 |         start = end
17 |     return level_targets
18 | 
19 | 
20 | def anchor_inside_flags(flat_anchors,
21 |                         valid_flags,
22 |                         img_shape,
23 |                         allowed_border=0):
24 |     """Check whether the anchors are inside the border.
25 | 
26 |     Args:
27 |         flat_anchors (torch.Tensor): Flatten anchors, shape (n, 4).
28 |         valid_flags (torch.Tensor): An existing valid flags of anchors.
29 |         img_shape (tuple(int)): Shape of current image.
30 |         allowed_border (int, optional): The border to allow the valid anchor.
31 |             Defaults to 0.
32 | 
33 |     Returns:
34 |         torch.Tensor: Flags indicating whether the anchors are inside a \
35 |             valid range.
36 |     """
37 |     img_h, img_w = img_shape[:2]
38 |     if allowed_border >= 0:
39 |         inside_flags = valid_flags & \
40 |             (flat_anchors[:, 0] >= -allowed_border) & \
41 |             (flat_anchors[:, 1] >= -allowed_border) & \
42 |             (flat_anchors[:, 2] < img_w + allowed_border) & \
43 |             (flat_anchors[:, 3] < img_h + allowed_border)
44 |     else:
45 |         inside_flags = valid_flags
46 |     return inside_flags
47 | 
48 | 
49 | def calc_region(bbox, ratio, featmap_size=None):
50 |     """Calculate a proportional bbox region.
51 | 
52 |     The bbox center are fixed and the new h' and w' is h * ratio and w * ratio.
53 | 
54 |     Args:
55 |         bbox (Tensor): Bboxes to calculate regions, shape (n, 4).
56 |         ratio (float): Ratio of the output region.
57 |         featmap_size (tuple): Feature map size used for clipping the boundary.
58 | 
59 |     Returns:
60 |         tuple: x1, y1, x2, y2
61 |     """
62 |     x1 = torch.round((1 - ratio) * bbox[0] + ratio * bbox[2]).long()
63 |     y1 = torch.round((1 - ratio) * bbox[1] + ratio * bbox[3]).long()
64 |     x2 = torch.round(ratio * bbox[0] + (1 - ratio) * bbox[2]).long()
65 |     y2 = torch.round(ratio * bbox[1] + (1 - ratio) * bbox[3]).long()
66 |     if featmap_size is not None:
67 |         x1 = x1.clamp(min=0, max=featmap_size[1])
68 |         y1 = y1.clamp(min=0, max=featmap_size[0])
69 |         x2 = x2.clamp(min=0, max=featmap_size[1])
70 |         y2 = y2.clamp(min=0, max=featmap_size[0])
71 |     return (x1, y1, x2, y2)
72 | 


--------------------------------------------------------------------------------
/tools/browse_dataset.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import os, random
 3 | from pathlib import Path
 4 | 
 5 | import mmcv
 6 | from mmcv import Config
 7 | 
 8 | from radet.core.visualization import imshow_det_bboxes
 9 | from radet.datasets.builder import build_dataset
10 | 
11 | 
12 | def parse_args():
13 |     parser = argparse.ArgumentParser(description='Browse a dataset')
14 |     parser.add_argument('--config', default='configs/mask_bop/r50_lmo_cpuassign.py', help='train config file path')
15 |     parser.add_argument(
16 |         '--skip-type',
17 |         type=str,
18 |         nargs='+',
19 |         default=['DefaultFormatBundle', 'Normalize', 'Collect'],
20 |         help='skip some useless pipeline')
21 |     parser.add_argument(
22 |         '--output-dir',
23 |         default=None,
24 |         type=str,
25 |         help='If there is no display interface, you can save it')
26 |     parser.add_argument('--not-show', default=False, action='store_true')
27 |     parser.add_argument(
28 |         '--show-interval',
29 |         type=float,
30 |         default=1,
31 |         help='the interval of show (s)')
32 |     parser.add_argument('--type', default='train', type=str)
33 |     parser.add_argument('--random', default=True, type=bool)
34 |     args = parser.parse_args()
35 |     return args
36 | 
37 | 
38 | def retrieve_data_cfg(config_path, skip_type):
39 |     cfg = Config.fromfile(config_path)
40 |     train_data_cfg = cfg.data.train
41 |     if hasattr(train_data_cfg, 'pipeline'):
42 |         train_data_cfg['pipeline'] = [
43 |             x for x in train_data_cfg.pipeline if x['type'] not in skip_type
44 |         ]
45 |     else:
46 |         train_data_cfg['dataset']['pipeline'] = [
47 |             x for x in train_data_cfg.dataset.pipeline if x['type'] not in skip_type
48 |         ]
49 | 
50 |     return cfg
51 | 
52 | 
53 | def main():
54 |     args = parse_args()
55 |     cfg = retrieve_data_cfg(args.config, args.skip_type)
56 | 
57 |     dataset = build_dataset(getattr(cfg.data, args.type))
58 | 
59 |     random_index = list(range(len(dataset)))
60 |     if args.random:
61 |         random.shuffle(random_index)
62 | 
63 |     progress_bar = mmcv.ProgressBar(len(dataset))
64 |     for index in random_index:
65 |         item = dataset[index]
66 |         filename = os.path.join(args.output_dir,
67 |                                 Path(item['filename']).name
68 |                                 ) if args.output_dir is not None else None
69 |         imshow_det_bboxes(
70 |             item['img'],
71 |             item['gt_bboxes'],
72 |             item['gt_labels'],
73 |             class_names=dataset.CLASSES,
74 |             show=not args.not_show,
75 |             wait_time=args.show_interval,
76 |             out_file=filename,
77 |             bbox_color=(255, 102, 61),
78 |             text_color=(255, 102, 61))
79 |         progress_bar.update()
80 | 
81 | 
82 | if __name__ == '__main__':
83 |     main()
84 | 


--------------------------------------------------------------------------------
/configs/base/datasets/bop_detection.py:
--------------------------------------------------------------------------------
 1 | dataset_type = 'BOPDataset'
 2 | data_root = 'data/'
 3 | img_norm_cfg = dict(
 4 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 5 | train_pipeline = [
 6 |     dict(type='LoadImageFromFile'),
 7 |     dict(type='LoadAnnotations', with_bbox=True, with_bop_mask=True),
 8 |     dict(type='Resize', img_scale=(640, 480), keep_ratio=True),
 9 |     dict(type='RandomBackground', background_dir='data/coco', prob=0.3),
10 |     dict(type='CosyPoseAug', p=0.8,
11 |         pipelines=[
12 |             dict(type='PillowBlur', p=1., factor_interval=(1, 3)),
13 |             dict(type='PillowSharpness', p=0.3, factor_interval=(0., 50.)),
14 |             dict(type='PillowContrast', p=0.3, factor_interval=(0.2, 50.)),
15 |             dict(type='PillowBrightness', p=0.5, factor_interval=(0.1, 6.0)),
16 |             dict(type='PillowColor', p=0.3, factor_interval=(0., 20.)),
17 |     ]),
18 |     dict(type='RandomFlip', flip_ratio=0.5),
19 |     dict(type='GenerateDistanceMap'),
20 |     dict(type='LabelAssignment',
21 |         anchor_generator_cfg=dict(
22 |             type='AnchorGenerator',
23 |             ratios=[1.0],
24 |             octave_base_scale=8,
25 |             scales_per_octave=1,
26 |             strides=[8, 16, 32, 64, 128]
27 |         ),
28 |         neg_threshold=0.2,
29 |         positive_num=10,
30 |         adapt_positive_num=False,
31 |         balance_sample=True,
32 |     ),
33 |     dict(type='Normalize', **img_norm_cfg),
34 |     dict(type='Pad', size_divisor=16),
35 |     dict(type='DefaultFormatBundle'),
36 |     dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'points_to_gt_index', 'points_weight'])
37 | ]
38 | test_pipeline = [
39 |     dict(type='LoadImageFromFile'),
40 |     dict(
41 |         type='MultiScaleFlipAug',
42 |         img_scale=(640, 480),
43 |         flip=False,
44 |         transforms=[
45 |             dict(type='Resize', keep_ratio=True),
46 |             dict(type='RandomFlip'),
47 |             dict(type='Normalize', **img_norm_cfg),
48 |             dict(type='Pad', size_divisor=32),
49 |             dict(type='ImageToTensor', keys=['img']),
50 |             dict(type='Collect', keys=['img']),
51 |         ])
52 | ]
53 | 
54 | data = dict(
55 |     samples_per_gpu=16,
56 |     workers_per_gpu=4,
57 |     train=dict(
58 |         type=dataset_type,
59 |         ann_file=data_root + 'detector_annotations/train_pbr.json',
60 |         img_prefix=data_root + 'train_pbr/',
61 |         seg_prefix=data_root + 'train_pbr',
62 |         pipeline=train_pipeline,
63 |     ),
64 |     val=dict(
65 |         type=dataset_type,
66 |         ann_file=data_root +'detector_annotations/test_bop19.json',
67 |         img_prefix=data_root + 'test/',
68 |         pipeline=test_pipeline,
69 |     ),
70 |     test=dict(
71 |         type=dataset_type,
72 |         ann_file=data_root + 'detector_annotations/test_bop19.json',
73 |         img_prefix=data_root + 'test/',
74 |         pipeline=test_pipeline,
75 |         bop_submission=True,
76 |     ),
77 | )
78 | 


--------------------------------------------------------------------------------
/radet/models/necks/channel_mapper.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | from mmcv.cnn import ConvModule, xavier_init
 3 | 
 4 | from ..builder import NECKS
 5 | 
 6 | 
 7 | @NECKS.register_module()
 8 | class ChannelMapper(nn.Module):
 9 |     r"""Channel Mapper to reduce/increase channels of backbone features.
10 | 
11 |     This is used to reduce/increase channels of backbone features.
12 | 
13 |     Args:
14 |         in_channels (List[int]): Number of input channels per scale.
15 |         out_channels (int): Number of output channels (used at each scale).
16 |         kernel_size (int, optional): kernel_size for reducing channels (used
17 |             at each scale). Default: 3.
18 |         conv_cfg (dict, optional): Config dict for convolution layer.
19 |             Default: None.
20 |         norm_cfg (dict, optional): Config dict for normalization layer.
21 |             Default: None.
22 |         act_cfg (dict, optional): Config dict for activation layer in
23 |             ConvModule. Default: dict(type='ReLU').
24 | 
25 |     Example:
26 |         >>> import torch
27 |         >>> in_channels = [2, 3, 5, 7]
28 |         >>> scales = [340, 170, 84, 43]
29 |         >>> inputs = [torch.rand(1, c, s, s)
30 |         ...           for c, s in zip(in_channels, scales)]
31 |         >>> self = ChannelMapper(in_channels, 11, 3).eval()
32 |         >>> outputs = self.forward(inputs)
33 |         >>> for i in range(len(outputs)):
34 |         ...     print(f'outputs[{i}].shape = {outputs[i].shape}')
35 |         outputs[0].shape = torch.Size([1, 11, 340, 340])
36 |         outputs[1].shape = torch.Size([1, 11, 170, 170])
37 |         outputs[2].shape = torch.Size([1, 11, 84, 84])
38 |         outputs[3].shape = torch.Size([1, 11, 43, 43])
39 |     """
40 | 
41 |     def __init__(self,
42 |                  in_channels,
43 |                  out_channels,
44 |                  kernel_size=3,
45 |                  conv_cfg=None,
46 |                  norm_cfg=None,
47 |                  act_cfg=dict(type='ReLU')):
48 |         super(ChannelMapper, self).__init__()
49 |         assert isinstance(in_channels, list)
50 | 
51 |         self.convs = nn.ModuleList()
52 |         for in_channel in in_channels:
53 |             self.convs.append(
54 |                 ConvModule(
55 |                     in_channel,
56 |                     out_channels,
57 |                     kernel_size,
58 |                     padding=(kernel_size - 1) // 2,
59 |                     conv_cfg=conv_cfg,
60 |                     norm_cfg=norm_cfg,
61 |                     act_cfg=act_cfg))
62 | 
63 |     # default init_weights for conv(msra) and norm in ConvModule
64 |     def init_weights(self):
65 |         """Initialize the weights of ChannelMapper module."""
66 |         for m in self.modules():
67 |             if isinstance(m, nn.Conv2d):
68 |                 xavier_init(m, distribution='uniform')
69 | 
70 |     def forward(self, inputs):
71 |         """Forward function."""
72 |         assert len(inputs) == len(self.convs)
73 |         outs = [self.convs[i](inputs[i]) for i in range(len(inputs))]
74 |         return tuple(outs)
75 | 


--------------------------------------------------------------------------------
/radet/core/bbox/samplers/random_sampler.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from ..builder import BBOX_SAMPLERS
 4 | from .base_sampler import BaseSampler
 5 | 
 6 | 
 7 | @BBOX_SAMPLERS.register_module()
 8 | class RandomSampler(BaseSampler):
 9 |     """Random sampler.
10 | 
11 |     Args:
12 |         num (int): Number of samples
13 |         pos_fraction (float): Fraction of positive samples
14 |         neg_pos_up (int, optional): Upper bound number of negative and
15 |             positive samples. Defaults to -1.
16 |         add_gt_as_proposals (bool, optional): Whether to add ground truth
17 |             boxes as proposals. Defaults to True.
18 |     """
19 | 
20 |     def __init__(self,
21 |                  num,
22 |                  pos_fraction,
23 |                  neg_pos_ub=-1,
24 |                  add_gt_as_proposals=True,
25 |                  **kwargs):
26 |         from radet.core.bbox import demodata
27 |         super(RandomSampler, self).__init__(num, pos_fraction, neg_pos_ub,
28 |                                             add_gt_as_proposals)
29 |         self.rng = demodata.ensure_rng(kwargs.get('rng', None))
30 | 
31 |     def random_choice(self, gallery, num):
32 |         """Random select some elements from the gallery.
33 | 
34 |         If `gallery` is a Tensor, the returned indices will be a Tensor;
35 |         If `gallery` is a ndarray or list, the returned indices will be a
36 |         ndarray.
37 | 
38 |         Args:
39 |             gallery (Tensor | ndarray | list): indices pool.
40 |             num (int): expected sample num.
41 | 
42 |         Returns:
43 |             Tensor or ndarray: sampled indices.
44 |         """
45 |         assert len(gallery) >= num
46 | 
47 |         is_tensor = isinstance(gallery, torch.Tensor)
48 |         if not is_tensor:
49 |             if torch.cuda.is_available():
50 |                 device = torch.cuda.current_device()
51 |             else:
52 |                 device = 'cpu'
53 |             gallery = torch.tensor(gallery, dtype=torch.long, device=device)
54 |         perm = torch.randperm(gallery.numel(), device=gallery.device)[:num]
55 |         rand_inds = gallery[perm]
56 |         if not is_tensor:
57 |             rand_inds = rand_inds.cpu().numpy()
58 |         return rand_inds
59 | 
60 |     def _sample_pos(self, assign_result, num_expected, **kwargs):
61 |         """Randomly sample some positive samples."""
62 |         pos_inds = torch.nonzero(assign_result.gt_inds > 0, as_tuple=False)
63 |         if pos_inds.numel() != 0:
64 |             pos_inds = pos_inds.squeeze(1)
65 |         if pos_inds.numel() <= num_expected:
66 |             return pos_inds
67 |         else:
68 |             return self.random_choice(pos_inds, num_expected)
69 | 
70 |     def _sample_neg(self, assign_result, num_expected, **kwargs):
71 |         """Randomly sample some negative samples."""
72 |         neg_inds = torch.nonzero(assign_result.gt_inds == 0, as_tuple=False)
73 |         if neg_inds.numel() != 0:
74 |             neg_inds = neg_inds.squeeze(1)
75 |         if len(neg_inds) <= num_expected:
76 |             return neg_inds
77 |         else:
78 |             return self.random_choice(neg_inds, num_expected)
79 | 


--------------------------------------------------------------------------------
/tools/eval_metric.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | 
 3 | import mmcv
 4 | from mmcv import Config, DictAction
 5 | 
 6 | from radet.datasets import build_dataset
 7 | 
 8 | 
 9 | def parse_args():
10 |     parser = argparse.ArgumentParser(description='Evaluate metric of the '
11 |                                      'results saved in pkl format')
12 |     parser.add_argument('config', help='Config of the model')
13 |     parser.add_argument('pkl_results', help='Results in pickle format')
14 |     parser.add_argument(
15 |         '--format-only',
16 |         action='store_true',
17 |         help='Format the output results without perform evaluation. It is'
18 |         'useful when you want to format the result to a specific format and '
19 |         'submit it to the test server')
20 |     parser.add_argument(
21 |         '--eval',
22 |         type=str,
23 |         nargs='+',
24 |         help='Evaluation metrics, which depends on the dataset, e.g., "bbox",'
25 |         ' "segm", "proposal" for COCO, and "mAP", "recall" for PASCAL VOC')
26 |     parser.add_argument(
27 |         '--cfg-options',
28 |         nargs='+',
29 |         action=DictAction,
30 |         help='override some settings in the used config, the key-value pair '
31 |         'in xxx=yyy format will be merged into config file. If the value to '
32 |         'be overwritten is a list, it should be like key="[a,b]" or key=a,b '
33 |         'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" '
34 |         'Note that the quotation marks are necessary and that no white space '
35 |         'is allowed.')
36 |     parser.add_argument(
37 |         '--eval-options',
38 |         nargs='+',
39 |         action=DictAction,
40 |         help='custom options for evaluation, the key-value pair in xxx=yyy '
41 |         'format will be kwargs for dataset.evaluate() function')
42 |     args = parser.parse_args()
43 |     return args
44 | 
45 | 
46 | def main():
47 |     args = parse_args()
48 | 
49 |     cfg = Config.fromfile(args.config)
50 |     assert args.eval or args.format_only, (
51 |         'Please specify at least one operation (eval/format the results) with '
52 |         'the argument "--eval", "--format-only"')
53 |     if args.eval and args.format_only:
54 |         raise ValueError('--eval and --format_only cannot be both specified')
55 | 
56 |     if args.cfg_options is not None:
57 |         cfg.merge_from_dict(args.cfg_options)
58 |     cfg.data.test.test_mode = True
59 | 
60 |     dataset = build_dataset(cfg.data.test)
61 |     outputs = mmcv.load(args.pkl_results)
62 | 
63 |     kwargs = {} if args.eval_options is None else args.eval_options
64 |     if args.format_only:
65 |         dataset.format_results(outputs, **kwargs)
66 |     if args.eval:
67 |         eval_kwargs = cfg.get('evaluation', {}).copy()
68 |         # hard-code way to remove EvalHook args
69 |         for key in [
70 |                 'interval', 'tmpdir', 'start', 'gpu_collect', 'save_best',
71 |                 'rule'
72 |         ]:
73 |             eval_kwargs.pop(key, None)
74 |         eval_kwargs.update(dict(metric=args.eval, **kwargs))
75 |         print(dataset.evaluate(outputs, **eval_kwargs))
76 | 
77 | 
78 | if __name__ == '__main__':
79 |     main()
80 | 


--------------------------------------------------------------------------------
/radet/models/losses/accuracy.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | 
 3 | 
 4 | def accuracy(pred, target, topk=1, thresh=None):
 5 |     """Calculate accuracy according to the prediction and target.
 6 | 
 7 |     Args:
 8 |         pred (torch.Tensor): The model prediction, shape (N, num_class)
 9 |         target (torch.Tensor): The target of each prediction, shape (N, )
10 |         topk (int | tuple[int], optional): If the predictions in ``topk``
11 |             matches the target, the predictions will be regarded as
12 |             correct ones. Defaults to 1.
13 |         thresh (float, optional): If not None, predictions with scores under
14 |             this threshold are considered incorrect. Default to None.
15 | 
16 |     Returns:
17 |         float | tuple[float]: If the input ``topk`` is a single integer,
18 |             the function will return a single float as accuracy. If
19 |             ``topk`` is a tuple containing multiple integers, the
20 |             function will return a tuple containing accuracies of
21 |             each ``topk`` number.
22 |     """
23 |     assert isinstance(topk, (int, tuple))
24 |     if isinstance(topk, int):
25 |         topk = (topk, )
26 |         return_single = True
27 |     else:
28 |         return_single = False
29 | 
30 |     maxk = max(topk)
31 |     if pred.size(0) == 0:
32 |         accu = [pred.new_tensor(0.) for i in range(len(topk))]
33 |         return accu[0] if return_single else accu
34 |     assert pred.ndim == 2 and target.ndim == 1
35 |     assert pred.size(0) == target.size(0)
36 |     assert maxk <= pred.size(1), \
37 |         f'maxk {maxk} exceeds pred dimension {pred.size(1)}'
38 |     pred_value, pred_label = pred.topk(maxk, dim=1)
39 |     pred_label = pred_label.t()  # transpose to shape (maxk, N)
40 |     correct = pred_label.eq(target.view(1, -1).expand_as(pred_label))
41 |     if thresh is not None:
42 |         # Only prediction values larger than thresh are counted as correct
43 |         correct = correct & (pred_value > thresh).t()
44 |     res = []
45 |     for k in topk:
46 |         correct_k = correct[:k].reshape(-1).float().sum(0, keepdim=True)
47 |         res.append(correct_k.mul_(100.0 / pred.size(0)))
48 |     return res[0] if return_single else res
49 | 
50 | 
51 | class Accuracy(nn.Module):
52 | 
53 |     def __init__(self, topk=(1, ), thresh=None):
54 |         """Module to calculate the accuracy.
55 | 
56 |         Args:
57 |             topk (tuple, optional): The criterion used to calculate the
58 |                 accuracy. Defaults to (1,).
59 |             thresh (float, optional): If not None, predictions with scores
60 |                 under this threshold are considered incorrect. Default to None.
61 |         """
62 |         super().__init__()
63 |         self.topk = topk
64 |         self.thresh = thresh
65 | 
66 |     def forward(self, pred, target):
67 |         """Forward function to calculate accuracy.
68 | 
69 |         Args:
70 |             pred (torch.Tensor): Prediction of models.
71 |             target (torch.Tensor): Target for each prediction.
72 | 
73 |         Returns:
74 |             tuple[float]: The accuracies under different topk criterions.
75 |         """
76 |         return accuracy(pred, target, self.topk, self.thresh)
77 | 


--------------------------------------------------------------------------------
/configs/base/datasets/bop_detection_mix.py:
--------------------------------------------------------------------------------
 1 | dataset_type = 'BOPDataset'
 2 | data_root = 'data/'
 3 | img_norm_cfg = dict(
 4 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 5 | train_pipeline = [
 6 |     dict(type='LoadImageFromFile'),
 7 |     dict(type='LoadAnnotations', with_bbox=True, with_bop_mask=True),
 8 |     dict(type='Resize', img_scale=(640, 480), keep_ratio=True),
 9 |     dict(type='RandomBackground', background_dir='data/coco', prob=0.3),
10 |     dict(type='RandomHSV', h_ratio=0.2, s_ratio=0.5, v_ratio=0.5, prob=1.0),
11 |     dict(type='RandomNoise', noise_ratio=0.1, prob=1.0),
12 |     dict(type='RandomSmooth', max_kernel_size=7, prob=1.0),
13 |     dict(type='RandomFlip', flip_ratio=0.5),
14 |     dict(type='GenerateDistanceMap'),
15 |     dict(type='LabelAssignment',
16 |         anchor_generator_cfg=dict(
17 |             type='AnchorGenerator',
18 |             ratios=[1.0],
19 |             octave_base_scale=8,
20 |             scales_per_octave=1,
21 |             strides=[8, 16, 32, 64, 128]
22 |         ),
23 |         neg_threshold=0.2,
24 |         positive_num=10,
25 |         adapt_positive_num=False,
26 |         balance_sample=True,
27 |     ),
28 |     dict(type='Normalize', **img_norm_cfg),
29 |     dict(type='Pad', size_divisor=16),
30 |     dict(type='DefaultFormatBundle'),
31 |     dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'points_to_gt_index', 'points_weight'])
32 | ]
33 | test_pipeline = [
34 |     dict(type='LoadImageFromFile'),
35 |     dict(
36 |         type='MultiScaleFlipAug',
37 |         img_scale=(640, 480),
38 |         flip=False,
39 |         transforms=[
40 |             dict(type='Resize', keep_ratio=True),
41 |             dict(type='RandomFlip'),
42 |             dict(type='Normalize', **img_norm_cfg),
43 |             dict(type='Pad', size_divisor=32),
44 |             dict(type='ImageToTensor', keys=['img']),
45 |             dict(type='Collect', keys=['img']),
46 |         ])
47 | ]
48 | 
49 | data = dict(
50 |     samples_per_gpu=16,
51 |     workers_per_gpu=4,
52 |     train=dict(
53 |         type='MixDataset',
54 |         dataset_0=dict(
55 |             type=dataset_type,
56 |             ann_file=data_root + 'detector_annotations/train_pbr.json',
57 |             img_prefix=data_root + 'train_pbr/',
58 |             seg_prefix=data_root + 'train_pbr',
59 |             pipeline=train_pipeline,
60 |             ratio=1
61 |         ),
62 |         dataset_1=dict(
63 |             type=dataset_type,
64 |             ann_file=data_root + 'detector_annotations/train_pbr.json',
65 |             img_prefix=data_root + 'train_pbr/',
66 |             seg_prefix=data_root + 'train_pbr',
67 |             pipeline=train_pipeline,
68 |             ratio=1
69 |         ),
70 |     ),
71 |     val=dict(
72 |         type=dataset_type,
73 |         ann_file=data_root +'detector_annotations/test_bop19.json',
74 |         img_prefix=data_root + 'test/',
75 |         pipeline=test_pipeline,
76 |     ),
77 |     test=dict(
78 |         type=dataset_type,
79 |         ann_file=data_root + 'detector_annotations/test_bop19.json',
80 |         img_prefix=data_root + 'test/',
81 |         pipeline=test_pipeline,
82 |         bop_submission=True,
83 |     ),
84 | )
85 | 


--------------------------------------------------------------------------------
/configs/bop/r50_icbin_pbr.py:
--------------------------------------------------------------------------------
  1 | _base_ = [
  2 |     '../base/datasets/bop_detection.py',
  3 |     '../base/default_runtime.py']
  4 | 
  5 | 
  6 | OBJ_NUM = 2
  7 | CLASS_NAMES = ('coffee_cup', 'juice_carton')
  8 | 
  9 | model = dict(
 10 |     type='RADet',
 11 |     pretrained='torchvision://resnet50',
 12 |     backbone=dict(
 13 |         type='ResNet',
 14 |         depth=50,
 15 |         num_stages=4,
 16 |         out_indices=(0, 1, 2, 3),
 17 |         frozen_stages=1,
 18 |         norm_cfg=dict(type='BN', requires_grad=True),
 19 |         norm_eval=True,
 20 |         style='pytorch'),
 21 |     neck=dict(
 22 |         type='FPN',
 23 |         in_channels=[256, 512, 1024, 2048],
 24 |         out_channels=256,
 25 |         start_level=1,
 26 |         add_extra_convs='on_output',
 27 |         num_outs=5),
 28 |     bbox_head=dict(
 29 |         type='RADetHead',
 30 |         num_classes=2,
 31 |         in_channels=256,
 32 |         stacked_convs=4,
 33 |         feat_channels=256,
 34 |         strides=[8, 16, 32, 64, 128],
 35 |         anchor_generator=dict(
 36 |             type='AnchorGenerator',
 37 |             ratios=[1.0],
 38 |             octave_base_scale=8,
 39 |             scales_per_octave=1,
 40 |             strides=[8, 16, 32, 64, 128]),
 41 |         bbox_coder=dict(
 42 |             type='TBLRBBoxCoder',
 43 |             normalizer=1/8),
 44 |         loss_cls=dict(
 45 |             type='FocalLoss',
 46 |             use_sigmoid=True,
 47 |             gamma=2.0,
 48 |             alpha=0.25,
 49 |             loss_weight=1.0,
 50 |         ),
 51 |         loss_bbox=dict(type='GIoULoss', loss_weight=2.0),
 52 |         loss_centerness=dict(
 53 |             type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
 54 |     ),
 55 | )
 56 | 
 57 | train_cfg = dict(
 58 |     assigner=dict(
 59 |         type='MaxIoUAssigner',
 60 |         pos_iou_thr=0.5,
 61 |         neg_iou_thr=0.4,
 62 |         min_pos_iou=0,
 63 |         ignore_iof_thr=-1),
 64 |     allowed_border=-1,
 65 |     pos_weight=-1,
 66 |     debug=False)
 67 | 
 68 | test_cfg = dict(
 69 |     nms_pre=1000,
 70 |     min_bbox_size=0,
 71 |     score_thr=0.05,
 72 |     nms=dict(type='vote',
 73 |              iou_threshold=0.65,
 74 |              cluster_score=['cls', 'iou'],
 75 |              vote_score=['iou', 'cls'],
 76 |              iou_enable=False,
 77 |              sima=0.025,),
 78 |     max_per_img=100)
 79 | 
 80 | 
 81 | data_root = 'data/icbin/'
 82 | data = dict(
 83 |     samples_per_gpu=16,
 84 |     workers_per_gpu=8,
 85 |     train=dict(
 86 |         ann_file=data_root + 'detector_annotations/train_pbr.json',
 87 |         img_prefix=data_root + 'train_pbr/',
 88 |         seg_prefix=data_root + 'train_pbr/',
 89 |         classes=CLASS_NAMES,
 90 |         min_visib_frac=0.1,
 91 |     ),
 92 |     val=dict(
 93 |         ann_file=data_root +'detector_annotations/test_bop19.json',
 94 |         img_prefix=data_root + 'test/',
 95 |         classes=CLASS_NAMES,
 96 |     ),
 97 |     test=dict(
 98 |         ann_file=data_root + 'detector_annotations/test_bop19.json',
 99 |         img_prefix=data_root + 'test/',
100 |         classes=CLASS_NAMES,
101 |     )
102 | )
103 | 
104 | work_dir = 'work_dirs/icbin_r50_radet'


--------------------------------------------------------------------------------
/configs/bop/r50_itodd_pbr.py:
--------------------------------------------------------------------------------
  1 | _base_ = [
  2 |     '../base/datasets/bop_detection.py',
  3 |     '../base/default_runtime.py']
  4 | 
  5 | 
  6 | OBJ_NUM = 28
  7 | CLASS_NAMES = tuple([i+1 for i in range(OBJ_NUM)])
  8 | 
  9 | 
 10 | model = dict(
 11 |     type='RADet',
 12 |     pretrained='torchvision://resnet50',
 13 |     backbone=dict(
 14 |         type='ResNet',
 15 |         depth=50,
 16 |         num_stages=4,
 17 |         out_indices=(0, 1, 2, 3),
 18 |         frozen_stages=1,
 19 |         norm_cfg=dict(type='BN', requires_grad=True),
 20 |         norm_eval=True,
 21 |         style='pytorch'),
 22 |     neck=dict(
 23 |         type='FPN',
 24 |         in_channels=[256, 512, 1024, 2048],
 25 |         out_channels=256,
 26 |         start_level=1,
 27 |         add_extra_convs='on_output',
 28 |         num_outs=5),
 29 |     bbox_head=dict(
 30 |         type='RADetHead',
 31 |         num_classes=28,
 32 |         in_channels=256,
 33 |         stacked_convs=4,
 34 |         feat_channels=256,
 35 |         strides=[8, 16, 32, 64, 128],
 36 |         anchor_generator=dict(
 37 |             type='AnchorGenerator',
 38 |             ratios=[1.0],
 39 |             octave_base_scale=8,
 40 |             scales_per_octave=1,
 41 |             strides=[8, 16, 32, 64, 128]),
 42 |         bbox_coder=dict(
 43 |             type='TBLRBBoxCoder',
 44 |             normalizer=1/8),
 45 |         loss_cls=dict(
 46 |             type='FocalLoss',
 47 |             use_sigmoid=True,
 48 |             gamma=2.0,
 49 |             alpha=0.25,
 50 |             loss_weight=1.0,
 51 |         ),
 52 |         loss_bbox=dict(type='GIoULoss', loss_weight=2.0),
 53 |         loss_centerness=dict(
 54 |             type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
 55 |     ),
 56 | )
 57 | 
 58 | train_cfg = dict(
 59 |     assigner=dict(
 60 |         type='MaxIoUAssigner',
 61 |         pos_iou_thr=0.5,
 62 |         neg_iou_thr=0.4,
 63 |         min_pos_iou=0,
 64 |         ignore_iof_thr=-1),
 65 |     allowed_border=-1,
 66 |     pos_weight=-1,
 67 |     debug=False)
 68 | 
 69 | test_cfg = dict(
 70 |     nms_pre=1000,
 71 |     min_bbox_size=0,
 72 |     score_thr=0.05,
 73 |     nms=dict(type='vote',
 74 |              iou_threshold=0.65,
 75 |              cluster_score=['cls', 'iou'],
 76 |              vote_score=['iou', 'cls'],
 77 |              iou_enable=False,
 78 |              sima=0.025,),
 79 |     max_per_img=100)
 80 | 
 81 | 
 82 | data_root = 'data/itodd/'
 83 | data = dict(
 84 |     samples_per_gpu=16,
 85 |     workers_per_gpu=16,
 86 |     train=dict(
 87 |         ann_file=data_root + 'detector_annotations/train_pbr.json',
 88 |         img_prefix=data_root + 'train_pbr/',
 89 |         seg_prefix=data_root + 'train_pbr/',
 90 |         classes=CLASS_NAMES,
 91 |         min_visib_frac=0.1,
 92 |     ),
 93 |     val=dict(
 94 |         ann_file=data_root +'detector_annotations/val.json',
 95 |         img_prefix=data_root + 'val/',
 96 |         classes=CLASS_NAMES,
 97 |     ),
 98 |     test=dict(
 99 |         ann_file=data_root + 'detector_annotations/test_bop19.json',
100 |         img_prefix=data_root + 'test/',
101 |         classes=CLASS_NAMES,
102 |     )
103 | )
104 | 
105 | work_dir = 'work_dirs/itodd_r50_radet'


--------------------------------------------------------------------------------
/configs/bop/r50_tudl_pbr.py:
--------------------------------------------------------------------------------
  1 | _base_ = [
  2 |     '../base/datasets/bop_detection.py',
  3 |     '../base/default_runtime.py']
  4 | 
  5 | 
  6 | CLASS_NAMES = ('dragon', 'frog', 'can')
  7 | 
  8 | 
  9 | model = dict(
 10 |     type='RADet',
 11 |     pretrained='torchvision://resnet50',
 12 |     backbone=dict(
 13 |         type='ResNet',
 14 |         depth=50,
 15 |         num_stages=4,
 16 |         out_indices=(0, 1, 2, 3),
 17 |         frozen_stages=1,
 18 |         norm_cfg=dict(type='BN', requires_grad=True),
 19 |         norm_eval=True,
 20 |         style='pytorch'),
 21 |     neck=dict(
 22 |         type='FPN',
 23 |         in_channels=[256, 512, 1024, 2048],
 24 |         out_channels=256,
 25 |         start_level=1,
 26 |         add_extra_convs='on_output',
 27 |         num_outs=5),
 28 |     bbox_head=dict(
 29 |         type='RADetHead',
 30 |         num_classes=3,
 31 |         in_channels=256,
 32 |         stacked_convs=4,
 33 |         feat_channels=256,
 34 |         strides=[8, 16, 32, 64, 128],
 35 |         anchor_generator=dict(
 36 |             type='AnchorGenerator',
 37 |             ratios=[1.0],
 38 |             octave_base_scale=8,
 39 |             scales_per_octave=1,
 40 |             strides=[8, 16, 32, 64, 128]),
 41 |         bbox_coder=dict(
 42 |             type='TBLRBBoxCoder',
 43 |             normalizer=1/8),
 44 |         loss_cls=dict(
 45 |             type='FocalLoss',
 46 |             use_sigmoid=True,
 47 |             gamma=2.0,
 48 |             alpha=0.25,
 49 |             loss_weight=1.0,
 50 |         ),
 51 |         loss_bbox=dict(type='GIoULoss', loss_weight=2.0),
 52 |         loss_centerness=dict(
 53 |             type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
 54 |     ),
 55 | )
 56 | 
 57 | train_cfg = dict(
 58 |     assigner=dict(
 59 |         type='MaxIoUAssigner',
 60 |         pos_iou_thr=0.5,
 61 |         neg_iou_thr=0.4,
 62 |         min_pos_iou=0,
 63 |         ignore_iof_thr=-1),
 64 |     allowed_border=-1,
 65 |     pos_weight=-1,
 66 |     debug=False)
 67 | 
 68 | test_cfg = dict(
 69 |     nms_pre=1000,
 70 |     min_bbox_size=0,
 71 |     score_thr=0.05,
 72 |     nms=dict(type='vote',
 73 |              iou_threshold=0.65,
 74 |              cluster_score=['cls', 'iou'],
 75 |              vote_score=['iou', 'cls'],
 76 |              iou_enable=False,
 77 |              sima=0.025,),
 78 |     max_per_img=100)
 79 | 
 80 | 
 81 | data_root = 'data/tudl/'
 82 | data = dict(
 83 |     samples_per_gpu=16,
 84 |     workers_per_gpu=16,
 85 |     train=dict(
 86 |         ann_file=data_root + 'detector_annotations/train_pbr.json',
 87 |         img_prefix=data_root + 'train_pbr/',
 88 |         seg_prefix=data_root + 'train_pbr/',
 89 |         classes=CLASS_NAMES,
 90 |         min_visib_frac=0.1,
 91 |     ),
 92 |     val=dict(
 93 |         ann_file=data_root +'detector_annotations/test_bop19.json',
 94 |         img_prefix=data_root + 'test/',
 95 |         classes=CLASS_NAMES,
 96 |     ),
 97 |     test=dict(
 98 |         ann_file=data_root + 'detector_annotations/test_bop19.json',
 99 |         img_prefix=data_root + 'test/',
100 |         classes=CLASS_NAMES,
101 |         bop_submission=True,
102 |     )
103 | )
104 | 
105 | 
106 | work_dir = 'work_dirs/tudl_r50_radet_pbr'


--------------------------------------------------------------------------------
/configs/bop/r50_tless_pbr.py:
--------------------------------------------------------------------------------
  1 | _base_ = [
  2 |     '../base/datasets/bop_detection.py',
  3 |     '../base/default_runtime.py']
  4 | 
  5 | 
  6 | OBJ_NUM = 30
  7 | CLASS_NAMES = tuple([i+1 for i in range(OBJ_NUM)])
  8 | 
  9 | 
 10 | model = dict(
 11 |     type='RADet',
 12 |     pretrained='torchvision://resnet50',
 13 |     backbone=dict(
 14 |         type='ResNet',
 15 |         depth=50,
 16 |         num_stages=4,
 17 |         out_indices=(0, 1, 2, 3),
 18 |         frozen_stages=1,
 19 |         norm_cfg=dict(type='BN', requires_grad=True),
 20 |         norm_eval=True,
 21 |         style='pytorch'),
 22 |     neck=dict(
 23 |         type='FPN',
 24 |         in_channels=[256, 512, 1024, 2048],
 25 |         out_channels=256,
 26 |         start_level=1,
 27 |         add_extra_convs='on_output',
 28 |         num_outs=5),
 29 |     bbox_head=dict(
 30 |         type='RADetHead',
 31 |         num_classes=30,
 32 |         in_channels=256,
 33 |         stacked_convs=4,
 34 |         feat_channels=256,
 35 |         strides=[8, 16, 32, 64, 128],
 36 |         anchor_generator=dict(
 37 |             type='AnchorGenerator',
 38 |             ratios=[1.0],
 39 |             octave_base_scale=8,
 40 |             scales_per_octave=1,
 41 |             strides=[8, 16, 32, 64, 128]),
 42 |         bbox_coder=dict(
 43 |             type='TBLRBBoxCoder',
 44 |             normalizer=1/8),
 45 |         loss_cls=dict(
 46 |             type='FocalLoss',
 47 |             use_sigmoid=True,
 48 |             gamma=2.0,
 49 |             alpha=0.25,
 50 |             loss_weight=1.0,
 51 |         ),
 52 |         loss_bbox=dict(type='GIoULoss', loss_weight=2.0),
 53 |         loss_centerness=dict(
 54 |             type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
 55 |     ),
 56 | )
 57 | 
 58 | train_cfg = dict(
 59 |     assigner=dict(
 60 |         type='MaxIoUAssigner',
 61 |         pos_iou_thr=0.5,
 62 |         neg_iou_thr=0.4,
 63 |         min_pos_iou=0,
 64 |         ignore_iof_thr=-1),
 65 |     allowed_border=-1,
 66 |     pos_weight=-1,
 67 |     debug=False)
 68 | 
 69 | test_cfg = dict(
 70 |     nms_pre=1000,
 71 |     min_bbox_size=0,
 72 |     score_thr=0.05,
 73 |     nms=dict(type='vote',
 74 |              iou_threshold=0.65,
 75 |              cluster_score=['cls', 'iou'],
 76 |              vote_score=['iou', 'cls'],
 77 |              iou_enable=False,
 78 |              sima=0.025,),
 79 |     max_per_img=100)
 80 | 
 81 | 
 82 | data_root = 'data/tless/'
 83 | data = dict(
 84 |     samples_per_gpu=16,
 85 |     workers_per_gpu=8,
 86 |     train=dict(
 87 |         ann_file=data_root + 'detector_annotations/train_pbr.json',
 88 |         img_prefix=data_root + 'train_pbr/',
 89 |         seg_prefix=data_root + 'train_pbr/',
 90 |         classes=CLASS_NAMES,
 91 |         min_visib_frac=0.1,
 92 |     ),
 93 |     val=dict(
 94 |         ann_file=data_root +'detector_annotations/test_bop19.json',
 95 |         img_prefix=data_root + 'test_primesense/',
 96 |         classes=CLASS_NAMES,
 97 |     ),
 98 |     test=dict(
 99 |         ann_file=data_root + 'detector_annotations/test_bop19.json',
100 |         img_prefix=data_root + 'test_primesense/',
101 |         classes=CLASS_NAMES,
102 |     )
103 | )
104 | 
105 | 
106 | work_dir = 'work_dirs/tless_r50_radet_pbr'


--------------------------------------------------------------------------------
/configs/bop/r50_hb_pbr.py:
--------------------------------------------------------------------------------
  1 | _base_ = [
  2 |     '../base/datasets/bop_detection.py',
  3 |     '../base/default_runtime.py']
  4 | 
  5 | 
  6 | OBJ_NUM = 33
  7 | CLASS_NAMES = tuple([i+1 for i in range(OBJ_NUM)])
  8 | TARGET_CLASS_NAMES= [1, 3, 4, 8, 9, 10, 12, 15, 17, 18, 19, 22, 23, 29, 32, 33]
  9 | 
 10 | model = dict(
 11 |     type='RADet',
 12 |     pretrained='torchvision://resnet50',
 13 |     backbone=dict(
 14 |         type='ResNet',
 15 |         depth=50,
 16 |         num_stages=4,
 17 |         out_indices=(0, 1, 2, 3),
 18 |         frozen_stages=1,
 19 |         norm_cfg=dict(type='BN', requires_grad=True),
 20 |         norm_eval=True,
 21 |         style='pytorch'),
 22 |     neck=dict(
 23 |         type='FPN',
 24 |         in_channels=[256, 512, 1024, 2048],
 25 |         out_channels=256,
 26 |         start_level=1,
 27 |         add_extra_convs='on_output',
 28 |         num_outs=5),
 29 |     bbox_head=dict(
 30 |         type='RADetHead',
 31 |         num_classes=16,
 32 |         in_channels=256,
 33 |         stacked_convs=4,
 34 |         feat_channels=256,
 35 |         strides=[8, 16, 32, 64, 128],
 36 |         anchor_generator=dict(
 37 |             type='AnchorGenerator',
 38 |             ratios=[1.0],
 39 |             octave_base_scale=8,
 40 |             scales_per_octave=1,
 41 |             strides=[8, 16, 32, 64, 128]),
 42 |         bbox_coder=dict(
 43 |             type='TBLRBBoxCoder',
 44 |             normalizer=1/8),
 45 |         loss_cls=dict(
 46 |             type='FocalLoss',
 47 |             use_sigmoid=True,
 48 |             gamma=2.0,
 49 |             alpha=0.25,
 50 |             loss_weight=1.0,
 51 |         ),
 52 |         loss_bbox=dict(type='GIoULoss', loss_weight=2.0),
 53 |         loss_centerness=dict(
 54 |             type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
 55 |     ),
 56 | )
 57 | 
 58 | train_cfg = dict(
 59 |     assigner=dict(
 60 |         type='MaxIoUAssigner',
 61 |         pos_iou_thr=0.5,
 62 |         neg_iou_thr=0.4,
 63 |         min_pos_iou=0,
 64 |         ignore_iof_thr=-1),
 65 |     allowed_border=-1,
 66 |     pos_weight=-1,
 67 |     debug=False)
 68 | 
 69 | test_cfg = dict(
 70 |     nms_pre=1000,
 71 |     min_bbox_size=0,
 72 |     score_thr=0.05,
 73 |     nms=dict(type='vote',
 74 |              iou_threshold=0.65,
 75 |              cluster_score=['cls', 'iou'],
 76 |              vote_score=['iou', 'cls'],
 77 |              iou_enable=False,
 78 |              sima=0.025,),
 79 |     max_per_img=100)
 80 | 
 81 | 
 82 | data_root = 'data/hb/'
 83 | data = dict(
 84 |     samples_per_gpu=16,
 85 |     workers_per_gpu=8,
 86 |     train=dict(
 87 |         ann_file=data_root + 'detector_annotations/train_pbr.json',
 88 |         img_prefix=data_root + 'train_pbr/',
 89 |         seg_prefix=data_root + 'train_pbr/',
 90 |         classes=TARGET_CLASS_NAMES,
 91 |         min_visib_frac=0.1,
 92 |     ),
 93 |     val=dict(
 94 |         ann_file=data_root +'detector_annotations/val.json',
 95 |         img_prefix=data_root + 'val_primesense/',
 96 |         classes=TARGET_CLASS_NAMES,
 97 |     ),
 98 |     test=dict(
 99 |         ann_file=data_root + 'detector_annotations/test_bop19.json',
100 |         img_prefix=data_root + 'test_primesense/',
101 |         classes=TARGET_CLASS_NAMES,
102 |         bop_submission=True,
103 |     )
104 | )
105 | 
106 | 
107 | work_dir = 'work_dirs/hb_r50_radet'


--------------------------------------------------------------------------------
/radet/models/losses/utils.py:
--------------------------------------------------------------------------------
 1 | import functools
 2 | 
 3 | import torch.nn.functional as F
 4 | 
 5 | 
 6 | def reduce_loss(loss, reduction):
 7 |     """Reduce loss as specified.
 8 | 
 9 |     Args:
10 |         loss (Tensor): Elementwise loss tensor.
11 |         reduction (str): Options are "none", "mean" and "sum".
12 | 
13 |     Return:
14 |         Tensor: Reduced loss tensor.
15 |     """
16 |     reduction_enum = F._Reduction.get_enum(reduction)
17 |     # none: 0, elementwise_mean:1, sum: 2
18 |     if reduction_enum == 0:
19 |         return loss
20 |     elif reduction_enum == 1:
21 |         return loss.mean()
22 |     elif reduction_enum == 2:
23 |         return loss.sum()
24 | 
25 | 
26 | def weight_reduce_loss(loss, weight=None, reduction='mean', avg_factor=None):
27 |     """Apply element-wise weight and reduce loss.
28 | 
29 |     Args:
30 |         loss (Tensor): Element-wise loss.
31 |         weight (Tensor): Element-wise weights.
32 |         reduction (str): Same as built-in losses of PyTorch.
33 |         avg_factor (float): Avarage factor when computing the mean of losses.
34 | 
35 |     Returns:
36 |         Tensor: Processed loss values.
37 |     """
38 |     # if weight is specified, apply element-wise weight
39 |     if weight is not None:
40 |         loss = loss * weight
41 | 
42 |     # if avg_factor is not specified, just reduce the loss
43 |     if avg_factor is None:
44 |         loss = reduce_loss(loss, reduction)
45 |     else:
46 |         # if reduction is mean, then average the loss by avg_factor
47 |         if reduction == 'mean':
48 |             loss = loss.sum() / avg_factor
49 |         # if reduction is 'none', then do nothing, otherwise raise an error
50 |         elif reduction != 'none':
51 |             raise ValueError('avg_factor can not be used with reduction="sum"')
52 |     return loss
53 | 
54 | 
55 | def weighted_loss(loss_func):
56 |     """Create a weighted version of a given loss function.
57 | 
58 |     To use this decorator, the loss function must have the signature like
59 |     `loss_func(pred, target, **kwargs)`. The function only needs to compute
60 |     element-wise loss without any reduction. This decorator will add weight
61 |     and reduction arguments to the function. The decorated function will have
62 |     the signature like `loss_func(pred, target, weight=None, reduction='mean',
63 |     avg_factor=None, **kwargs)`.
64 | 
65 |     :Example:
66 | 
67 |     >>> import torch
68 |     >>> @weighted_loss
69 |     >>> def l1_loss(pred, target):
70 |     >>>     return (pred - target).abs()
71 | 
72 |     >>> pred = torch.Tensor([0, 2, 3])
73 |     >>> target = torch.Tensor([1, 1, 1])
74 |     >>> weight = torch.Tensor([1, 0, 1])
75 | 
76 |     >>> l1_loss(pred, target)
77 |     tensor(1.3333)
78 |     >>> l1_loss(pred, target, weight)
79 |     tensor(1.)
80 |     >>> l1_loss(pred, target, reduction='none')
81 |     tensor([1., 1., 2.])
82 |     >>> l1_loss(pred, target, weight, avg_factor=2)
83 |     tensor(1.5000)
84 |     """
85 | 
86 |     @functools.wraps(loss_func)
87 |     def wrapper(pred,
88 |                 target,
89 |                 weight=None,
90 |                 reduction='mean',
91 |                 avg_factor=None,
92 |                 **kwargs):
93 |         # get element-wise loss
94 |         loss = loss_func(pred, target, **kwargs)
95 |         loss = weight_reduce_loss(loss, weight, reduction, avg_factor)
96 |         return loss
97 | 
98 |     return wrapper
99 | 


--------------------------------------------------------------------------------
/configs/bop/r50_lmo_pbr.py:
--------------------------------------------------------------------------------
  1 | _base_ = [
  2 |     '../base/datasets/bop_detection.py',
  3 |     '../base/default_runtime.py']
  4 | 
  5 | 
  6 | OBJ_NUM = 15
  7 | CLASS_NAMES = ('ape', 'benchvise', 'bowl', 'cam', 'can', 'cat', 'cup', 'driller', 'duck', 'eggbox', 'glue', 'holepuncher', 'iron','lamp', 'phone')
  8 | TARGET_CLASS_NAMES = ['ape', 'can', 'cat', 'driller', 'duck', 'eggbox', 'glue', 'holepuncher']
  9 | 
 10 | 
 11 | model = dict(
 12 |     type='RADet',
 13 |     pretrained='torchvision://resnet50',
 14 |     backbone=dict(
 15 |         type='ResNet',
 16 |         depth=50,
 17 |         num_stages=4,
 18 |         out_indices=(0, 1, 2, 3),
 19 |         frozen_stages=1,
 20 |         norm_cfg=dict(type='BN', requires_grad=True),
 21 |         norm_eval=True,
 22 |         style='pytorch'),
 23 |     neck=dict(
 24 |         type='FPN',
 25 |         in_channels=[256, 512, 1024, 2048],
 26 |         out_channels=256,
 27 |         start_level=1,
 28 |         add_extra_convs='on_output',
 29 |         num_outs=5),
 30 |     bbox_head=dict(
 31 |         type='RADetHead',
 32 |         num_classes=8,
 33 |         in_channels=256,
 34 |         stacked_convs=4,
 35 |         feat_channels=256,
 36 |         strides=[8, 16, 32, 64, 128],
 37 |         anchor_generator=dict(
 38 |             type='AnchorGenerator',
 39 |             ratios=[1.0],
 40 |             octave_base_scale=8,
 41 |             scales_per_octave=1,
 42 |             strides=[8, 16, 32, 64, 128]),
 43 |         bbox_coder=dict(
 44 |             type='TBLRBBoxCoder',
 45 |             normalizer=1/8),
 46 |         loss_cls=dict(
 47 |             type='FocalLoss',
 48 |             use_sigmoid=True,
 49 |             gamma=2.0,
 50 |             alpha=0.25,
 51 |             loss_weight=1.0,
 52 |         ),
 53 |         loss_bbox=dict(type='GIoULoss', loss_weight=2.0),
 54 |         loss_centerness=dict(
 55 |             type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
 56 |     ),
 57 | )
 58 | 
 59 | train_cfg = dict(
 60 |     assigner=dict(
 61 |         type='MaxIoUAssigner',
 62 |         pos_iou_thr=0.5,
 63 |         neg_iou_thr=0.4,
 64 |         min_pos_iou=0,
 65 |         ignore_iof_thr=-1),
 66 |     allowed_border=-1,
 67 |     pos_weight=-1,
 68 |     debug=False)
 69 | 
 70 | test_cfg = dict(
 71 |     nms_pre=1000,
 72 |     min_bbox_size=0,
 73 |     score_thr=0.05,
 74 |     nms=dict(type='vote',
 75 |              iou_threshold=0.65,
 76 |              cluster_score=['cls', 'iou'],
 77 |              vote_score=['iou', 'cls'],
 78 |              iou_enable=False,
 79 |              sima=0.025,),
 80 |     max_per_img=100)
 81 | 
 82 | 
 83 | data_root = 'data/lmo/'
 84 | data = dict(
 85 |     samples_per_gpu=16,
 86 |     workers_per_gpu=8,
 87 |     train=dict(
 88 |         ann_file=data_root + 'detector_annotations/train_pbr.json',
 89 |         img_prefix=data_root + 'train_pbr/',
 90 |         seg_prefix=data_root + 'train_pbr/',
 91 |         classes=TARGET_CLASS_NAMES,
 92 |         min_visib_frac=0.1,
 93 |     ),
 94 |     val=dict(
 95 |         ann_file=data_root +'detector_annotations/test_bop19.json',
 96 |         img_prefix=data_root + 'test/',
 97 |         classes=TARGET_CLASS_NAMES,
 98 |     ),
 99 |     test=dict(
100 |         ann_file=data_root + 'detector_annotations/test_bop19.json',
101 |         img_prefix=data_root + 'test/',
102 |         classes=TARGET_CLASS_NAMES,
103 |     )
104 | )
105 | 
106 | 
107 | work_dir = 'work_dirs/lmo_r50_radet'


--------------------------------------------------------------------------------
/radet/ops/cluster/cluster_ext.cpp:
--------------------------------------------------------------------------------
 1 | #include <torch/extension.h>
 2 | 
 3 | 
 4 | std::vector<torch::Tensor> nms_cluster(torch::Tensor &bboxes,
 5 |                                        torch::Tensor &scores,
 6 |                                        torch::Tensor &labels,
 7 |                                        float_t nms_threshold){
 8 |     auto order_indices = std::get<1>(torch::sort(scores,0,true));
 9 |     auto suppressed = torch::zeros_like(scores, torch::kBool);
10 |     auto instances_id = torch::zeros_like(scores, torch::kInt64);
11 |     auto clusters_num = torch::zeros_like(scores, torch::kInt64);
12 | 
13 |     auto x1 = bboxes.select(1, 0).contiguous();
14 |     auto y1 = bboxes.select(1, 1).contiguous();
15 |     auto x2 = bboxes.select(1, 2).contiguous();
16 |     auto y2 = bboxes.select(1, 3).contiguous();
17 | 
18 |     auto order_indices_t = order_indices.data_ptr<int64_t>();
19 |     auto x1_t = x1.data_ptr<float_t>();
20 |     auto y1_t = y1.data_ptr<float_t>();
21 |     auto x2_t = x2.data_ptr<float_t>();
22 |     auto y2_t = y2.data_ptr<float_t>();
23 |     auto labels_t = labels.data_ptr<int64_t>();
24 |     auto suppressed_t = suppressed.data_ptr<bool>();
25 |     auto instances_id_t = instances_id.data_ptr<int64_t>();
26 |     auto clusters_num_t = clusters_num.data_ptr<int64_t>();
27 | 
28 |     int ndets = scores.size(0);
29 |     int64_t  instance_id = 0;
30 |     int64_t  cluster_num = 0;
31 | 
32 |     for (int i=0; i<ndets; i++){
33 |         auto index = order_indices_t[i];
34 |         if (suppressed_t[index]){
35 |             continue;
36 |         }
37 |         auto x1_i = x1_t[index];
38 |         auto y1_i = y1_t[index];
39 |         auto x2_i = x2_t[index];
40 |         auto y2_i = y2_t[index];
41 |         auto label_i = labels_t[index];
42 |         auto area_i = (x2_i - x1_i) * (y2_i - y1_i);
43 |         cluster_num = 1;
44 |         suppressed_t[index] = true;
45 |         instances_id[index] = instance_id;
46 | 
47 | 
48 |         for (int j=i+1; j< ndets; j++){
49 |             auto index_j = order_indices_t[j];
50 |             auto label_j = labels_t[index_j];
51 |             if (label_j != label_i){
52 |                 continue;
53 |             }
54 | 
55 |             if (suppressed_t[index_j]){
56 |                 continue;
57 |             }
58 | 
59 |             auto x1_j = x1_t[index_j];
60 |             auto y1_j = y1_t[index_j];
61 |             auto x2_j = x2_t[index_j];
62 |             auto y2_j = y2_t[index_j];
63 | 
64 |             auto x_l = std::max(x1_j, x1_i);
65 |             auto y_t = std::max(y1_j, y1_i);
66 |             auto x_r = std::min(x2_j, x2_i);
67 |             auto y_b = std::min(y2_j, y2_i);
68 |             auto inter_w = std::max(static_cast<float_t>(0), x_r - x_l);
69 |             auto inter_h = std::max(static_cast<float_t>(0), y_b - y_t);
70 |             auto inter = inter_w * inter_h;
71 |             auto area_j = (x2_j - x1_j) * (y2_j - y1_j);
72 |             auto iou = inter /(area_j + area_i - inter);
73 | 
74 |             if (iou > nms_threshold){
75 |                 instances_id_t[index_j] = instance_id;
76 |                 suppressed_t[index_j] = true;
77 |                 cluster_num ++;
78 |             }
79 | 
80 |         }
81 |         instances_id_t[index] = instance_id;
82 |         clusters_num_t[index] = cluster_num;
83 |         instance_id ++;
84 | 
85 |     }
86 |     return {instances_id, clusters_num};
87 | }
88 | 
89 | 
90 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m){
91 |     m.def("cluster_nms", &nms_cluster, "nms for cluster");
92 | }


--------------------------------------------------------------------------------
/tools/show_bop_detbbox.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import argparse
 3 | from radet.core.visualization import imshow_det_bboxes
 4 | import os
 5 | from os import path as osp
 6 | import numpy as np
 7 | 
 8 | class_names_cfg = dict(
 9 |     icbin=('coffee_cup', 'juice_carton'),
10 |     tudl= ('dragon', 'frog', 'can'),
11 |     lmo=('ape', 'benchvise', 'bowl', 'cam', 'can', 'cat', 'cup', 'driller', 'duck', 'eggbox', 'glue', 'holepuncher', 'iron','lamp', 'phone'),
12 |     ycbv= ('master_chef_can', 'cracker_box', 'sugar_box', 'tomato_soup_can', 'mustard_bottle', 'tuna_fish_can', 'pudding_box', 'gelatin_box',
13 |             'potted_meat_can', 'banana', 'pitcher_base', 'bleach_cleanser', 'bowl', 'mug', 'power_drill',  'wood_block', 'scissors', 'large_marker',
14 |             'large_clamp', 'extra_large_clamp', 'foam_brick'),
15 |     hb=tuple([str(i+1) for i in range(33)]),
16 |     itodd=tuple([str(i+1) for i in range(28)]),
17 |     tless=tuple([str(i+1) for i in range(30)]),
18 | )
19 | 
20 | def parse_args():
21 |     parser = argparse.ArgumentParser()
22 |     parser.add_argument('image_dir')
23 |     parser.add_argument('result_json')
24 |     parser.add_argument('save_dir')
25 |     parser.add_argument('--show-score-thr', type=float, default=0.3)
26 |     parser.add_argument('--dataset', choices=['icbin', 'itodd', 'ycbv', 'lmo', 'tless', 'hb', 'tudl'])
27 |     parser.add_argument('--ext', default='jpg')
28 |     args = parser.parse_args()
29 |     return args 
30 | 
31 | 
32 | if __name__ == '__main__':
33 |     args = parse_args()
34 |     image_dir, result_json, save_dir, show_score_thr, dataset, ext = args.image_dir, args.result_json, args.save_dir, args.show_score_thr, args.dataset, args.ext
35 |     class_names = class_names_cfg[dataset]
36 |     with open(result_json, 'r') as f:
37 |         detect_result = json.load(f)
38 |     
39 |     formated_results = dict()
40 |     for pred in detect_result:
41 |         scene_id, image_id = pred['scene_id'], pred['image_id']
42 |         bbox, score = pred['bbox'], pred['score']
43 |         category_id = pred['category_id']
44 |         if scene_id not in formated_results:
45 |             formated_results[scene_id] = {}
46 |         if image_id not in formated_results[scene_id]:
47 |             formated_results[scene_id][image_id] = {'bbox':[], 'score':[], 'label':[]}
48 |         bbox[2] = bbox[0] + bbox[2]
49 |         bbox[3] = bbox[1] + bbox[3]
50 |         formated_results[scene_id][image_id]['bbox'].append(bbox)
51 |         formated_results[scene_id][image_id]['score'].append(score)
52 |         formated_results[scene_id][image_id]['label'].append(category_id)
53 |     
54 |     for scene_id in formated_results:
55 |         for image_id in formated_results[scene_id]:
56 |             image = osp.join(image_dir, f"{scene_id:06d}", "rgb", f"{image_id:06d}.{ext}")
57 |             save_image = osp.join(save_dir, f"{scene_id:06d}", "rgb", f"{image_id:06d}.{ext}")
58 |             os.makedirs(osp.dirname(save_image), exist_ok=True)
59 |             result = formated_results[scene_id][image_id]
60 |             imshow_det_bboxes(
61 |                 image,
62 |                 np.concatenate([np.array(result['bbox']).reshape(-1, 4), np.array(result['score']).reshape(-1, 1)], axis=-1),
63 |                 np.array(result['label']) -1,
64 |                 score_thr=show_score_thr,
65 |                 show=False,
66 |                 out_file=save_image,
67 |                 class_names=class_names,
68 |                 bbox_color=(72, 101, 241),
69 |                 text_color=(72, 101, 241),
70 |             )
71 | 


--------------------------------------------------------------------------------
/configs/bop/r50_ycbv_pbr.py:
--------------------------------------------------------------------------------
  1 | _base_ = [
  2 |     '../base/datasets/bop_detection.py',
  3 |     '../base/default_runtime.py']
  4 | 
  5 | 
  6 | CLASS_NAMES = ('master_chef_can', 'cracker_box', 'sugar_box', 'tomato_soup_can', 'mustard_bottle', 'tuna_fish_can', 'pudding_box', 'gelatin_box',
  7 |                 'potted_meat_can', 'banana', 'pitcher_base', 'bleach_cleanser', 'bowl', 'mug', 'power_drill',  'wood_block', 'scissors', 'large_marker',
  8 |                 'large_clamp', 'extra_large_clamp', 'foam_brick')
  9 | 
 10 | 
 11 | model = dict(
 12 |     type='RADet',
 13 |     pretrained='torchvision://resnet50',
 14 |     backbone=dict(
 15 |         type='ResNet',
 16 |         depth=50,
 17 |         num_stages=4,
 18 |         out_indices=(0, 1, 2, 3),
 19 |         frozen_stages=1,
 20 |         norm_cfg=dict(type='BN', requires_grad=True),
 21 |         norm_eval=True,
 22 |         style='pytorch'),
 23 |     neck=dict(
 24 |         type='FPN',
 25 |         in_channels=[256, 512, 1024, 2048],
 26 |         out_channels=256,
 27 |         start_level=1,
 28 |         add_extra_convs='on_output',
 29 |         num_outs=5),
 30 |     bbox_head=dict(
 31 |         type='RADetHead',
 32 |         num_classes=21,
 33 |         in_channels=256,
 34 |         stacked_convs=4,
 35 |         feat_channels=256,
 36 |         strides=[8, 16, 32, 64, 128],
 37 |         anchor_generator=dict(
 38 |             type='AnchorGenerator',
 39 |             ratios=[1.0],
 40 |             octave_base_scale=8,
 41 |             scales_per_octave=1,
 42 |             strides=[8, 16, 32, 64, 128]),
 43 |         bbox_coder=dict(
 44 |             type='TBLRBBoxCoder',
 45 |             normalizer=1/8),
 46 |         loss_cls=dict(
 47 |             type='FocalLoss',
 48 |             use_sigmoid=True,
 49 |             gamma=2.0,
 50 |             alpha=0.25,
 51 |             loss_weight=1.0,
 52 |         ),
 53 |         loss_bbox=dict(type='GIoULoss', loss_weight=2.0),
 54 |         loss_centerness=dict(
 55 |             type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
 56 |     ),
 57 | )
 58 | 
 59 | train_cfg = dict(
 60 |     assigner=dict(
 61 |         type='MaxIoUAssigner',
 62 |         pos_iou_thr=0.5,
 63 |         neg_iou_thr=0.4,
 64 |         min_pos_iou=0,
 65 |         ignore_iof_thr=-1),
 66 |     allowed_border=-1,
 67 |     pos_weight=-1,
 68 |     debug=False)
 69 | 
 70 | test_cfg = dict(
 71 |     nms_pre=1000,
 72 |     min_bbox_size=0,
 73 |     score_thr=0.05,
 74 |     nms=dict(type='vote',
 75 |              iou_threshold=0.65,
 76 |              cluster_score=['cls', 'iou'],
 77 |              vote_score=['iou', 'cls'],
 78 |              iou_enable=False,
 79 |              sima=0.025,),
 80 |     max_per_img=100)
 81 | 
 82 | 
 83 | data_root = 'data/ycbv/'
 84 | data = dict(
 85 |     samples_per_gpu=16,
 86 |     workers_per_gpu=8,
 87 |     train=dict(
 88 |         ann_file=data_root + 'detector_annotations/train_real.json',
 89 |         img_prefix=data_root + 'train_real/',
 90 |         seg_prefix=data_root + 'train_real/',
 91 |         classes=CLASS_NAMES,
 92 |         min_visib_frac=0.1,
 93 |     ),
 94 |     val=dict(
 95 |         ann_file=data_root +'detector_annotations/test_bop19.json',
 96 |         img_prefix=data_root + 'test/',
 97 |         classes=CLASS_NAMES,
 98 |     ),
 99 |     test=dict(
100 |         ann_file=data_root + 'detector_annotations/test_bop19.json',
101 |         img_prefix=data_root + 'test/',
102 |         classes=CLASS_NAMES,
103 |         bop_submission=True,
104 |     )
105 | )
106 | 
107 | work_dir = 'work_dirs/ycbv_r50_radet_pbr'


--------------------------------------------------------------------------------
/configs/bop/r50_tudl_mixpbr.py:
--------------------------------------------------------------------------------
  1 | _base_ = [
  2 |     '../base/datasets/bop_detection_mix.py',
  3 |     '../base/default_runtime.py']
  4 | 
  5 | 
  6 | CLASS_NAMES = ('dragon', 'frog', 'can')
  7 | 
  8 | model = dict(
  9 |     type='RADetHead',
 10 |     pretrained='torchvision://resnet50',
 11 |     backbone=dict(
 12 |         type='ResNet',
 13 |         depth=50,
 14 |         num_stages=4,
 15 |         out_indices=(0, 1, 2, 3),
 16 |         frozen_stages=1,
 17 |         norm_cfg=dict(type='BN', requires_grad=True),
 18 |         norm_eval=True,
 19 |         style='pytorch'),
 20 |     neck=dict(
 21 |         type='FPN',
 22 |         in_channels=[256, 512, 1024, 2048],
 23 |         out_channels=256,
 24 |         start_level=1,
 25 |         add_extra_convs='on_output',
 26 |         num_outs=5),
 27 |     bbox_head=dict(
 28 |         type='RADetHead',
 29 |         num_classes=3,
 30 |         in_channels=256,
 31 |         stacked_convs=4,
 32 |         feat_channels=256,
 33 |         strides=[8, 16, 32, 64, 128],
 34 |         anchor_generator=dict(
 35 |             type='AnchorGenerator',
 36 |             ratios=[1.0],
 37 |             octave_base_scale=8,
 38 |             scales_per_octave=1,
 39 |             strides=[8, 16, 32, 64, 128]),
 40 |         bbox_coder=dict(
 41 |             type='TBLRBBoxCoder',
 42 |             normalizer=1/8),
 43 |         loss_cls=dict(
 44 |             type='FocalLoss',
 45 |             use_sigmoid=True,
 46 |             gamma=2.0,
 47 |             alpha=0.25,
 48 |             loss_weight=1.0,
 49 |         ),
 50 |         loss_bbox=dict(type='GIoULoss', loss_weight=2.0),
 51 |         loss_centerness=dict(
 52 |             type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
 53 |     ),
 54 | )
 55 | 
 56 | train_cfg = dict(
 57 |     assigner=dict(
 58 |         type='MaxIoUAssigner',
 59 |         pos_iou_thr=0.5,
 60 |         neg_iou_thr=0.4,
 61 |         min_pos_iou=0,
 62 |         ignore_iof_thr=-1),
 63 |     allowed_border=-1,
 64 |     pos_weight=-1,
 65 |     debug=False)
 66 | 
 67 | test_cfg = dict(
 68 |     nms_pre=1000,
 69 |     min_bbox_size=0,
 70 |     score_thr=0.05,
 71 |     nms=dict(type='vote',
 72 |              iou_threshold=0.65,
 73 |              cluster_score=['cls', 'iou'],
 74 |              vote_score=['iou', 'cls'],
 75 |              iou_enable=False,
 76 |              sima=0.025,),
 77 |     max_per_img=100)
 78 | 
 79 | 
 80 | data_root = 'data/tudl/'
 81 | data = dict(
 82 |     samples_per_gpu=16,
 83 |     workers_per_gpu=8,
 84 |     train=dict(
 85 |         dataset_0=dict(
 86 |             ann_file=data_root + 'detector_annotations/train_pbr.json',
 87 |             img_prefix=data_root + 'train_pbr/',
 88 |             seg_prefix=data_root + 'train_pbr/',
 89 |             min_visib_frac=0.1,
 90 |             ratio=1,
 91 |             classes=CLASS_NAMES,
 92 |         ),
 93 |         dataset_1=dict(
 94 |             ann_file=data_root+'detector_annotations/train_real.json',
 95 |             img_prefix=data_root + 'train_real/',
 96 |             seg_prefix=data_root + 'train_real/',
 97 |             ratio=1,
 98 |             classes=CLASS_NAMES,
 99 |         )
100 |     ),
101 |     val=dict(
102 |         ann_file=data_root +'detector_annotations/test_bop19.json',
103 |         img_prefix=data_root + 'test/',
104 |         classes=CLASS_NAMES,
105 |     ),
106 |     test=dict(
107 |         ann_file=data_root + 'detector_annotations/test_bop19.json',
108 |         img_prefix=data_root + 'test/',
109 |         classes=CLASS_NAMES,
110 |     )
111 | )
112 | 
113 | 
114 | load_from = 'work_dirs/tudl_r50_radet_pbr/latest.pth'
115 | work_dir = 'work_dirs/tudl_r50_radet_mixpbr'


--------------------------------------------------------------------------------
/configs/bop/r50_tless_mixpbr.py:
--------------------------------------------------------------------------------
  1 | _base_ = [
  2 |     '../base/datasets/bop_detection_mix.py',
  3 |     '../base/default_runtime.py']
  4 | 
  5 | 
  6 | OBJ_NUM = 30
  7 | CLASS_NAMES = tuple([i+1 for i in range(OBJ_NUM)])
  8 | 
  9 | 
 10 | model = dict(
 11 |     type='RADetHead',
 12 |     pretrained='torchvision://resnet50',
 13 |     backbone=dict(
 14 |         type='ResNet',
 15 |         depth=50,
 16 |         num_stages=4,
 17 |         out_indices=(0, 1, 2, 3),
 18 |         frozen_stages=1,
 19 |         norm_cfg=dict(type='BN', requires_grad=True),
 20 |         norm_eval=True,
 21 |         style='pytorch'),
 22 |     neck=dict(
 23 |         type='FPN',
 24 |         in_channels=[256, 512, 1024, 2048],
 25 |         out_channels=256,
 26 |         start_level=1,
 27 |         add_extra_convs='on_output',
 28 |         num_outs=5),
 29 |     bbox_head=dict(
 30 |         type='RADetHead',
 31 |         num_classes=30,
 32 |         in_channels=256,
 33 |         stacked_convs=4,
 34 |         feat_channels=256,
 35 |         strides=[8, 16, 32, 64, 128],
 36 |         anchor_generator=dict(
 37 |             type='AnchorGenerator',
 38 |             ratios=[1.0],
 39 |             octave_base_scale=8,
 40 |             scales_per_octave=1,
 41 |             strides=[8, 16, 32, 64, 128]),
 42 |         bbox_coder=dict(
 43 |             type='TBLRBBoxCoder',
 44 |             normalizer=1/8),
 45 |         loss_cls=dict(
 46 |             type='FocalLoss',
 47 |             use_sigmoid=True,
 48 |             gamma=2.0,
 49 |             alpha=0.25,
 50 |             loss_weight=1.0,
 51 |         ),
 52 |         loss_bbox=dict(type='GIoULoss', loss_weight=2.0),
 53 |         loss_centerness=dict(
 54 |             type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
 55 |     ),
 56 | )
 57 | 
 58 | train_cfg = dict(
 59 |     assigner=dict(
 60 |         type='MaxIoUAssigner',
 61 |         pos_iou_thr=0.5,
 62 |         neg_iou_thr=0.4,
 63 |         min_pos_iou=0,
 64 |         ignore_iof_thr=-1),
 65 |     allowed_border=-1,
 66 |     pos_weight=-1,
 67 |     debug=False)
 68 | 
 69 | test_cfg = dict(
 70 |     nms_pre=1000,
 71 |     min_bbox_size=0,
 72 |     score_thr=0.05,
 73 |     nms=dict(type='vote',
 74 |              iou_threshold=0.65,
 75 |              cluster_score=['cls', 'iou'],
 76 |              vote_score=['iou', 'cls'],
 77 |              iou_enable=False,
 78 |              sima=0.025,),
 79 |     max_per_img=100)
 80 | 
 81 | 
 82 | data_root = 'data/tless/'
 83 | 
 84 | data = dict(
 85 |     samples_per_gpu=16,
 86 |     workers_per_gpu=8,
 87 |     train=dict(
 88 |         dataset_0=dict(
 89 |             ann_file=data_root + 'detector_annotations/train_pbr.json',
 90 |             img_prefix=data_root + 'train_pbr/',
 91 |             seg_prefix=data_root + 'train_pbr/',
 92 |             min_visib_frac = 0.1,
 93 |             ratio=3,
 94 |             classes=CLASS_NAMES,
 95 |         ),
 96 |         dataset_1=dict(
 97 |             ann_file=data_root+'detector_annotations/train_real.json',
 98 |             img_prefix=data_root + 'train_primesense/',
 99 |             seg_prefix=data_root + 'train_primesense/',
100 |             ratio=1,
101 |             classes=CLASS_NAMES,
102 |         )
103 |     ),
104 |     val=dict(
105 |         ann_file=data_root +'detector_annotations/test_bop19.json',
106 |         img_prefix=data_root + 'test_primesense/',
107 |         classes=CLASS_NAMES,
108 |     ),
109 |     test=dict(
110 |         ann_file=data_root + 'detector_annotations/test_bop19.json',
111 |         img_prefix=data_root + 'test_primesense/',
112 |         classes=CLASS_NAMES,
113 |     )
114 | )
115 | 
116 | 
117 | load_from = 'work_dirs/tless_r50_radet_pbr/latest.pth'
118 | work_dir = 'work_dirs/tless_r50_radet_mixpbr'


--------------------------------------------------------------------------------
/radet/core/bbox/coder/yolo_bbox_coder.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from ..builder import BBOX_CODERS
 4 | from .base_bbox_coder import BaseBBoxCoder
 5 | 
 6 | 
 7 | @BBOX_CODERS.register_module()
 8 | class YOLOBBoxCoder(BaseBBoxCoder):
 9 |     """YOLO BBox coder.
10 | 
11 |     Following `YOLO <https://arxiv.org/abs/1506.02640>`_, this coder divide
12 |     image into grids, and encode bbox (x1, y1, x2, y2) into (cx, cy, dw, dh).
13 |     cx, cy in [0., 1.], denotes relative center position w.r.t the center of
14 |     bboxes. dw, dh are the same as :obj:`DeltaXYWHBBoxCoder`.
15 | 
16 |     Args:
17 |         eps (float): Min value of cx, cy when encoding.
18 |     """
19 | 
20 |     def __init__(self, eps=1e-6):
21 |         super(BaseBBoxCoder, self).__init__()
22 |         self.eps = eps
23 | 
24 |     def encode(self, bboxes, gt_bboxes, stride):
25 |         """Get box regression transformation deltas that can be used to
26 |         transform the ``bboxes`` into the ``gt_bboxes``.
27 | 
28 |         Args:
29 |             bboxes (torch.Tensor): Source boxes, e.g., anchors.
30 |             gt_bboxes (torch.Tensor): Target of the transformation, e.g.,
31 |                 ground-truth boxes.
32 |             stride (torch.Tensor | int): Stride of bboxes.
33 | 
34 |         Returns:
35 |             torch.Tensor: Box transformation deltas
36 |         """
37 | 
38 |         assert bboxes.size(0) == gt_bboxes.size(0)
39 |         assert bboxes.size(-1) == gt_bboxes.size(-1) == 4
40 |         x_center_gt = (gt_bboxes[..., 0] + gt_bboxes[..., 2]) * 0.5
41 |         y_center_gt = (gt_bboxes[..., 1] + gt_bboxes[..., 3]) * 0.5
42 |         w_gt = gt_bboxes[..., 2] - gt_bboxes[..., 0]
43 |         h_gt = gt_bboxes[..., 3] - gt_bboxes[..., 1]
44 |         x_center = (bboxes[..., 0] + bboxes[..., 2]) * 0.5
45 |         y_center = (bboxes[..., 1] + bboxes[..., 3]) * 0.5
46 |         w = bboxes[..., 2] - bboxes[..., 0]
47 |         h = bboxes[..., 3] - bboxes[..., 1]
48 |         w_target = torch.log((w_gt / w).clamp(min=self.eps))
49 |         h_target = torch.log((h_gt / h).clamp(min=self.eps))
50 |         x_center_target = ((x_center_gt - x_center) / stride + 0.5).clamp(
51 |             self.eps, 1 - self.eps)
52 |         y_center_target = ((y_center_gt - y_center) / stride + 0.5).clamp(
53 |             self.eps, 1 - self.eps)
54 |         encoded_bboxes = torch.stack(
55 |             [x_center_target, y_center_target, w_target, h_target], dim=-1)
56 |         return encoded_bboxes
57 | 
58 |     def decode(self, bboxes, pred_bboxes, stride):
59 |         """Apply transformation `pred_bboxes` to `boxes`.
60 | 
61 |         Args:
62 |             boxes (torch.Tensor): Basic boxes, e.g. anchors.
63 |             pred_bboxes (torch.Tensor): Encoded boxes with shape
64 |             stride (torch.Tensor | int): Strides of bboxes.
65 | 
66 |         Returns:
67 |             torch.Tensor: Decoded boxes.
68 |         """
69 |         assert pred_bboxes.size(0) == bboxes.size(0)
70 |         assert pred_bboxes.size(-1) == bboxes.size(-1) == 4
71 |         x_center = (bboxes[..., 0] + bboxes[..., 2]) * 0.5
72 |         y_center = (bboxes[..., 1] + bboxes[..., 3]) * 0.5
73 |         w = bboxes[..., 2] - bboxes[..., 0]
74 |         h = bboxes[..., 3] - bboxes[..., 1]
75 |         # Get outputs x, y
76 |         x_center_pred = (pred_bboxes[..., 0] - 0.5) * stride + x_center
77 |         y_center_pred = (pred_bboxes[..., 1] - 0.5) * stride + y_center
78 |         w_pred = torch.exp(pred_bboxes[..., 2]) * w
79 |         h_pred = torch.exp(pred_bboxes[..., 3]) * h
80 | 
81 |         decoded_bboxes = torch.stack(
82 |             (x_center_pred - w_pred / 2, y_center_pred - h_pred / 2,
83 |              x_center_pred + w_pred / 2, y_center_pred + h_pred / 2),
84 |             dim=-1)
85 | 
86 |         return decoded_bboxes
87 | 


--------------------------------------------------------------------------------
/radet/datasets/pipelines/instaboost.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from ..builder import PIPELINES
 4 | 
 5 | 
 6 | @PIPELINES.register_module()
 7 | class InstaBoost(object):
 8 |     r"""Data augmentation method in `InstaBoost: Boosting Instance
 9 |     Segmentation Via Probability Map Guided Copy-Pasting
10 |     <https://arxiv.org/abs/1908.07801>`_.
11 | 
12 |     Refer to https://github.com/GothicAi/Instaboost for implementation details.
13 |     """
14 | 
15 |     def __init__(self,
16 |                  action_candidate=('normal', 'horizontal', 'skip'),
17 |                  action_prob=(1, 0, 0),
18 |                  scale=(0.8, 1.2),
19 |                  dx=15,
20 |                  dy=15,
21 |                  theta=(-1, 1),
22 |                  color_prob=0.5,
23 |                  hflag=False,
24 |                  aug_ratio=0.5):
25 |         try:
26 |             import instaboostfast as instaboost
27 |         except ImportError:
28 |             raise ImportError(
29 |                 'Please run "pip install instaboostfast" '
30 |                 'to install instaboostfast first for instaboost augmentation.')
31 |         self.cfg = instaboost.InstaBoostConfig(action_candidate, action_prob,
32 |                                                scale, dx, dy, theta,
33 |                                                color_prob, hflag)
34 |         self.aug_ratio = aug_ratio
35 | 
36 |     def _load_anns(self, results):
37 |         labels = results['ann_info']['labels']
38 |         masks = results['ann_info']['masks']
39 |         bboxes = results['ann_info']['bboxes']
40 |         n = len(labels)
41 | 
42 |         anns = []
43 |         for i in range(n):
44 |             label = labels[i]
45 |             bbox = bboxes[i]
46 |             mask = masks[i]
47 |             x1, y1, x2, y2 = bbox
48 |             # assert (x2 - x1) >= 1 and (y2 - y1) >= 1
49 |             bbox = [x1, y1, x2 - x1, y2 - y1]
50 |             anns.append({
51 |                 'category_id': label,
52 |                 'segmentation': mask,
53 |                 'bbox': bbox
54 |             })
55 | 
56 |         return anns
57 | 
58 |     def _parse_anns(self, results, anns, img):
59 |         gt_bboxes = []
60 |         gt_labels = []
61 |         gt_masks_ann = []
62 |         for ann in anns:
63 |             x1, y1, w, h = ann['bbox']
64 |             # TODO: more essential bug need to be fixed in instaboost
65 |             if w <= 0 or h <= 0:
66 |                 continue
67 |             bbox = [x1, y1, x1 + w, y1 + h]
68 |             gt_bboxes.append(bbox)
69 |             gt_labels.append(ann['category_id'])
70 |             gt_masks_ann.append(ann['segmentation'])
71 |         gt_bboxes = np.array(gt_bboxes, dtype=np.float32)
72 |         gt_labels = np.array(gt_labels, dtype=np.int64)
73 |         results['ann_info']['labels'] = gt_labels
74 |         results['ann_info']['bboxes'] = gt_bboxes
75 |         results['ann_info']['masks'] = gt_masks_ann
76 |         results['img'] = img
77 |         return results
78 | 
79 |     def __call__(self, results):
80 |         img = results['img']
81 |         orig_type = img.dtype
82 |         anns = self._load_anns(results)
83 |         if np.random.choice([0, 1], p=[1 - self.aug_ratio, self.aug_ratio]):
84 |             try:
85 |                 import instaboostfast as instaboost
86 |             except ImportError:
87 |                 raise ImportError('Please run "pip install instaboostfast" '
88 |                                   'to install instaboostfast first.')
89 |             anns, img = instaboost.get_new_data(
90 |                 anns, img.astype(np.uint8), self.cfg, background=None)
91 | 
92 |         results = self._parse_anns(results, anns, img.astype(orig_type))
93 |         return results
94 | 
95 |     def __repr__(self):
96 |         repr_str = self.__class__.__name__
97 |         repr_str += f'(cfg={self.cfg}, aug_ratio={self.aug_ratio})'
98 |         return repr_str
99 | 


--------------------------------------------------------------------------------
/radet/ops/vote/vote_wrapper.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from . import vote_ext
 3 | 
 4 | 
 5 | 
 6 | 
 7 | def vote_nms(bboxes, cls_scores, labels, nms_cfg, score_factor=None, max_num=0):
 8 |     nms_cfg_ = nms_cfg.copy()
 9 |     nms_threshold = nms_cfg_.pop('iou_threshold', 0.6)
10 |     cluster_score_type = nms_cfg_.pop('cluster_score', 'cls')
11 |     vote_score_type = nms_cfg_.pop('vote_score', 'iou')
12 |     iou_enable = nms_cfg_.pop('iou_enable', False)
13 |     sigma = nms_cfg_.pop('sigma', 0.025)
14 |     if isinstance(cluster_score_type, (list, tuple)):
15 |         cluster_score = cls_scores * score_factor
16 |     elif cluster_score_type == 'cls':
17 |         cluster_score = cls_scores
18 |     elif cluster_score_type == 'iou':
19 |         cluster_score = score_factor
20 |     else:
21 |         raise RuntimeError(f"Unexpected cluster score type:{cluster_score_type}")
22 | 
23 |     if isinstance(vote_score_type, (list, tuple)):
24 |         vote_score = (cls_scores * score_factor).clone()
25 |     elif vote_score_type == 'cls':
26 |         vote_score = cls_scores
27 |     elif vote_score_type == 'iou':
28 |         vote_score = score_factor
29 |     else:
30 |         raise RuntimeError(f"Unexpected vote score type:{vote_score_type}")
31 | 
32 |     voted_bboxes, voted_labels, voted_scores = vote_ext.vote_nms(bboxes,
33 |                                                                  cluster_score,
34 |                                                                  vote_score,
35 |                                                                  labels,
36 |                                                                  nms_threshold,
37 |                                                                  iou_enable,
38 |                                                                  sigma)
39 |     voted_bboxes = torch.cat([voted_bboxes, voted_scores.view(-1, 1)], dim=-1)
40 |     if max_num > 0:
41 |         voted_bboxes = voted_bboxes[:max_num]
42 |         voted_labels = voted_labels[:max_num]
43 |     return voted_bboxes, voted_labels
44 | 
45 | 
46 | 
47 | def global_vote_nms(bboxes, cls_scores, labels, nms_cfg, score_factor=None, max_num=0):
48 |     nms_cfg_ = nms_cfg.copy()
49 |     nms_threshold = nms_cfg_.pop('iou_threshold', 0.6)
50 |     cluster_score_type = nms_cfg_.pop('cluster_score', 'cls')
51 |     vote_score_type = nms_cfg_.pop('vote_score', 'iou')
52 |     iou_enable = nms_cfg_.pop('iou_enable', False)
53 |     sigma = nms_cfg_.pop('sigma', 0.025)
54 |     if isinstance(cluster_score_type, (list, tuple)):
55 |         cluster_score = cls_scores * score_factor
56 |     elif cluster_score_type == 'cls':
57 |         cluster_score = cls_scores
58 |     elif cluster_score_type == 'iou':
59 |         cluster_score = score_factor
60 |     else:
61 |         raise RuntimeError(f"Unexpected cluster score type:{cluster_score_type}")
62 | 
63 |     if isinstance(vote_score_type, (list, tuple)):
64 |         vote_score = (cls_scores * score_factor).clone()
65 |     elif vote_score_type == 'cls':
66 |         vote_score = cls_scores
67 |     elif vote_score_type == 'iou':
68 |         vote_score = score_factor
69 |     else:
70 |         raise RuntimeError(f"Unexpected vote score type:{vote_score_type}")
71 | 
72 |     voted_bboxes, voted_labels, voted_scores = vote_ext.global_vote_nms(bboxes,
73 |                                                                  cluster_score,
74 |                                                                  vote_score,
75 |                                                                  labels,
76 |                                                                  nms_threshold,
77 |                                                                  iou_enable,
78 |                                                                  sigma)
79 |     voted_bboxes = torch.cat([voted_bboxes, voted_scores.view(-1, 1)], dim=-1)
80 |     if max_num > 0:
81 |         voted_bboxes = voted_bboxes[:max_num]
82 |         voted_labels = voted_labels[:max_num]
83 |     return voted_bboxes, voted_labels


--------------------------------------------------------------------------------
/configs/bop/r50_ycbv_mixpbr.py:
--------------------------------------------------------------------------------
  1 | _base_ = [
  2 |     '../base/datasets/bop_detection_mix.py',
  3 |     '../base/default_runtime.py']
  4 | 
  5 | 
  6 | CLASS_NAMES = ('master_chef_can', 'cracker_box', 'sugar_box', 'tomato_soup_can', 'mustard_bottle', 'tuna_fish_can', 'pudding_box', 'gelatin_box',
  7 |                 'potted_meat_can', 'banana', 'pitcher_base', 'bleach_cleanser', 'bowl', 'mug', 'power_drill',  'wood_block', 'scissors', 'large_marker',
  8 |                 'large_clamp', 'extra_large_clamp', 'foam_brick')
  9 | 
 10 | 
 11 | model = dict(
 12 |     type='MaskWoAssign',
 13 |     pretrained='torchvision://resnet50',
 14 |     backbone=dict(
 15 |         type='ResNet',
 16 |         depth=50,
 17 |         num_stages=4,
 18 |         out_indices=(0, 1, 2, 3),
 19 |         frozen_stages=1,
 20 |         norm_cfg=dict(type='BN', requires_grad=True),
 21 |         norm_eval=True,
 22 |         style='pytorch'),
 23 |     neck=dict(
 24 |         type='FPN',
 25 |         in_channels=[256, 512, 1024, 2048],
 26 |         out_channels=256,
 27 |         start_level=1,
 28 |         add_extra_convs='on_output',
 29 |         num_outs=5),
 30 |     bbox_head=dict(
 31 |         type='MaskHeadWoAssign',
 32 |         num_classes=21,
 33 |         in_channels=256,
 34 |         stacked_convs=4,
 35 |         feat_channels=256,
 36 |         strides=[8, 16, 32, 64, 128],
 37 |         anchor_generator=dict(
 38 |             type='AnchorGenerator',
 39 |             ratios=[1.0],
 40 |             octave_base_scale=8,
 41 |             scales_per_octave=1,
 42 |             strides=[8, 16, 32, 64, 128]),
 43 |         bbox_coder=dict(
 44 |             type='TBLRBBoxCoder',
 45 |             normalizer=1/8),
 46 |         loss_cls=dict(
 47 |             type='FocalLoss',
 48 |             use_sigmoid=True,
 49 |             gamma=2.0,
 50 |             alpha=0.25,
 51 |             loss_weight=1.0,
 52 |         ),
 53 |         loss_bbox=dict(type='GIoULoss', loss_weight=2.0),
 54 |         loss_centerness=dict(
 55 |             type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
 56 |     ),
 57 | )
 58 | 
 59 | train_cfg = dict(
 60 |     assigner=dict(
 61 |         type='MaxIoUAssigner',
 62 |         pos_iou_thr=0.5,
 63 |         neg_iou_thr=0.4,
 64 |         min_pos_iou=0,
 65 |         ignore_iof_thr=-1),
 66 |     allowed_border=-1,
 67 |     pos_weight=-1,
 68 |     debug=False)
 69 | 
 70 | test_cfg = dict(
 71 |     nms_pre=1000,
 72 |     min_bbox_size=0,
 73 |     score_thr=0.05,
 74 |     nms=dict(type='vote',
 75 |              iou_threshold=0.65,
 76 |              cluster_score=['cls', 'iou'],
 77 |              vote_score=['iou', 'cls'],
 78 |              iou_enable=False,
 79 |              sima=0.025,),
 80 |     max_per_img=100)
 81 | 
 82 | 
 83 | data_root = 'data/ycbv/'
 84 | data = dict(
 85 |     samples_per_gpu=16,
 86 |     workers_per_gpu=8,
 87 |     train=dict(
 88 |         dataset_0=dict(
 89 |             ann_file=data_root+'detector_annotations/train_pbr.json',
 90 |             img_prefix=data_root + 'train_pbr/',
 91 |             seg_prefix=data_root + 'train_pbr/',
 92 |             min_visib_frac=0.1,
 93 |             ratio=2,
 94 |             classes=CLASS_NAMES,
 95 |         ),
 96 |         dataset_1=dict(
 97 |             ann_file=data_root + 'detector_annotations/train_real.json',
 98 |             img_prefix=data_root + 'train_real/',
 99 |             seg_prefix=data_root + 'train_real/',
100 |             ratio=1,
101 |             classes=CLASS_NAMES,
102 |         )
103 |     ),
104 |     val=dict(
105 |         ann_file=data_root +'detector_annotations/test_bop19.json',
106 |         img_prefix=data_root + 'test/',
107 |         classes=CLASS_NAMES,
108 |     ),
109 |     test=dict(
110 |         ann_file=data_root + 'detector_annotations/test_bop19.json',
111 |         img_prefix=data_root + 'test/',
112 |         classes=CLASS_NAMES,
113 |     )
114 | )
115 | 
116 | 
117 | load_from = 'work_dirs/ycbv_r50_radet_pbr/latest.pth'
118 | work_dir = 'work_dirs/ycbv_r50_radet_mixpbr'


--------------------------------------------------------------------------------
/radet/datasets/voc.py:
--------------------------------------------------------------------------------
 1 | from collections import OrderedDict
 2 | 
 3 | from mmcv.utils import print_log
 4 | 
 5 | from radet.core import eval_map, eval_recalls
 6 | from .builder import DATASETS
 7 | from .xml_style import XMLDataset
 8 | 
 9 | 
10 | @DATASETS.register_module()
11 | class VOCDataset(XMLDataset):
12 | 
13 |     CLASSES = ('aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car',
14 |                'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse',
15 |                'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train',
16 |                'tvmonitor')
17 | 
18 |     def __init__(self, **kwargs):
19 |         super(VOCDataset, self).__init__(**kwargs)
20 |         if 'VOC2007' in self.img_prefix:
21 |             self.year = 2007
22 |         elif 'VOC2012' in self.img_prefix:
23 |             self.year = 2012
24 |         else:
25 |             raise ValueError('Cannot infer dataset year from img_prefix')
26 | 
27 |     def evaluate(self,
28 |                  results,
29 |                  metric='mAP',
30 |                  logger=None,
31 |                  proposal_nums=(100, 300, 1000),
32 |                  iou_thr=0.5,
33 |                  scale_ranges=None):
34 |         """Evaluate in VOC protocol.
35 | 
36 |         Args:
37 |             results (list[list | tuple]): Testing results of the dataset.
38 |             metric (str | list[str]): Metrics to be evaluated. Options are
39 |                 'mAP', 'recall'.
40 |             logger (logging.Logger | str, optional): Logger used for printing
41 |                 related information during evaluation. Default: None.
42 |             proposal_nums (Sequence[int]): Proposal number used for evaluating
43 |                 recalls, such as recall@100, recall@1000.
44 |                 Default: (100, 300, 1000).
45 |             iou_thr (float | list[float]): IoU threshold. Default: 0.5.
46 |             scale_ranges (list[tuple], optional): Scale ranges for evaluating
47 |                 mAP. If not specified, all bounding boxes would be included in
48 |                 evaluation. Default: None.
49 | 
50 |         Returns:
51 |             dict[str, float]: AP/recall metrics.
52 |         """
53 | 
54 |         if not isinstance(metric, str):
55 |             assert len(metric) == 1
56 |             metric = metric[0]
57 |         allowed_metrics = ['mAP', 'recall']
58 |         if metric not in allowed_metrics:
59 |             raise KeyError(f'metric {metric} is not supported')
60 |         annotations = [self.get_ann_info(i) for i in range(len(self))]
61 |         eval_results = OrderedDict()
62 |         iou_thrs = [iou_thr] if isinstance(iou_thr, float) else iou_thr
63 |         if metric == 'mAP':
64 |             assert isinstance(iou_thrs, list)
65 |             if self.year == 2007:
66 |                 ds_name = 'voc07'
67 |             else:
68 |                 ds_name = self.CLASSES
69 |             mean_aps = []
70 |             for iou_thr in iou_thrs:
71 |                 print_log(f'\n{"-" * 15}iou_thr: {iou_thr}{"-" * 15}')
72 |                 mean_ap, _ = eval_map(
73 |                     results,
74 |                     annotations,
75 |                     scale_ranges=None,
76 |                     iou_thr=iou_thr,
77 |                     dataset=ds_name,
78 |                     logger=logger)
79 |                 mean_aps.append(mean_ap)
80 |                 eval_results[f'AP{int(iou_thr * 100):02d}'] = round(mean_ap, 3)
81 |             eval_results['mAP'] = sum(mean_aps) / len(mean_aps)
82 |         elif metric == 'recall':
83 |             gt_bboxes = [ann['bboxes'] for ann in annotations]
84 |             recalls = eval_recalls(
85 |                 gt_bboxes, results, proposal_nums, iou_thr, logger=logger)
86 |             for i, num in enumerate(proposal_nums):
87 |                 for j, iou in enumerate(iou_thr):
88 |                     eval_results[f'recall@{num}@{iou}'] = recalls[i, j]
89 |             if recalls.shape[1] > 1:
90 |                 ar = recalls.mean(axis=1)
91 |                 for i, num in enumerate(proposal_nums):
92 |                     eval_results[f'AR@{num}'] = ar[i]
93 |         return eval_results
94 | 


--------------------------------------------------------------------------------
/radet/models/utils/res_layer.py:
--------------------------------------------------------------------------------
  1 | from mmcv.cnn import build_conv_layer, build_norm_layer
  2 | from torch import nn as nn
  3 | 
  4 | 
  5 | class ResLayer(nn.Sequential):
  6 |     """ResLayer to build ResNet style backbone.
  7 | 
  8 |     Args:
  9 |         block (nn.Module): block used to build ResLayer.
 10 |         inplanes (int): inplanes of block.
 11 |         planes (int): planes of block.
 12 |         num_blocks (int): number of blocks.
 13 |         stride (int): stride of the first block. Default: 1
 14 |         avg_down (bool): Use AvgPool instead of stride conv when
 15 |             downsampling in the bottleneck. Default: False
 16 |         conv_cfg (dict): dictionary to construct and config conv layer.
 17 |             Default: None
 18 |         norm_cfg (dict): dictionary to construct and config norm layer.
 19 |             Default: dict(type='BN')
 20 |         downsample_first (bool): Downsample at the first block or last block.
 21 |             False for Hourglass, True for ResNet. Default: True
 22 |     """
 23 | 
 24 |     def __init__(self,
 25 |                  block,
 26 |                  inplanes,
 27 |                  planes,
 28 |                  num_blocks,
 29 |                  stride=1,
 30 |                  avg_down=False,
 31 |                  conv_cfg=None,
 32 |                  norm_cfg=dict(type='BN'),
 33 |                  downsample_first=True,
 34 |                  **kwargs):
 35 |         self.block = block
 36 | 
 37 |         downsample = None
 38 |         if stride != 1 or inplanes != planes * block.expansion:
 39 |             downsample = []
 40 |             conv_stride = stride
 41 |             if avg_down:
 42 |                 conv_stride = 1
 43 |                 downsample.append(
 44 |                     nn.AvgPool2d(
 45 |                         kernel_size=stride,
 46 |                         stride=stride,
 47 |                         ceil_mode=True,
 48 |                         count_include_pad=False))
 49 |             downsample.extend([
 50 |                 build_conv_layer(
 51 |                     conv_cfg,
 52 |                     inplanes,
 53 |                     planes * block.expansion,
 54 |                     kernel_size=1,
 55 |                     stride=conv_stride,
 56 |                     bias=False),
 57 |                 build_norm_layer(norm_cfg, planes * block.expansion)[1]
 58 |             ])
 59 |             downsample = nn.Sequential(*downsample)
 60 | 
 61 |         layers = []
 62 |         if downsample_first:
 63 |             layers.append(
 64 |                 block(
 65 |                     inplanes=inplanes,
 66 |                     planes=planes,
 67 |                     stride=stride,
 68 |                     downsample=downsample,
 69 |                     conv_cfg=conv_cfg,
 70 |                     norm_cfg=norm_cfg,
 71 |                     **kwargs))
 72 |             inplanes = planes * block.expansion
 73 |             for _ in range(1, num_blocks):
 74 |                 layers.append(
 75 |                     block(
 76 |                         inplanes=inplanes,
 77 |                         planes=planes,
 78 |                         stride=1,
 79 |                         conv_cfg=conv_cfg,
 80 |                         norm_cfg=norm_cfg,
 81 |                         **kwargs))
 82 | 
 83 |         else:  # downsample_first=False is for HourglassModule
 84 |             for _ in range(num_blocks - 1):
 85 |                 layers.append(
 86 |                     block(
 87 |                         inplanes=inplanes,
 88 |                         planes=inplanes,
 89 |                         stride=1,
 90 |                         conv_cfg=conv_cfg,
 91 |                         norm_cfg=norm_cfg,
 92 |                         **kwargs))
 93 |             layers.append(
 94 |                 block(
 95 |                     inplanes=inplanes,
 96 |                     planes=planes,
 97 |                     stride=stride,
 98 |                     downsample=downsample,
 99 |                     conv_cfg=conv_cfg,
100 |                     norm_cfg=norm_cfg,
101 |                     **kwargs))
102 |         super(ResLayer, self).__init__(*layers)
103 | 


--------------------------------------------------------------------------------
/radet/utils/util_mixins.py:
--------------------------------------------------------------------------------
  1 | """This module defines the :class:`NiceRepr` mixin class, which defines a
  2 | ``__repr__`` and ``__str__`` method that only depend on a custom ``__nice__``
  3 | method, which you must define. This means you only have to overload one
  4 | function instead of two.  Furthermore, if the object defines a ``__len__``
  5 | method, then the ``__nice__`` method defaults to something sensible, otherwise
  6 | it is treated as abstract and raises ``NotImplementedError``.
  7 | 
  8 | To use simply have your object inherit from :class:`NiceRepr`
  9 | (multi-inheritance should be ok).
 10 | 
 11 | This code was copied from the ubelt library: https://github.com/Erotemic/ubelt
 12 | 
 13 | Example:
 14 |     >>> # Objects that define __nice__ have a default __str__ and __repr__
 15 |     >>> class Student(NiceRepr):
 16 |     ...    def __init__(self, name):
 17 |     ...        self.name = name
 18 |     ...    def __nice__(self):
 19 |     ...        return self.name
 20 |     >>> s1 = Student('Alice')
 21 |     >>> s2 = Student('Bob')
 22 |     >>> print(f's1 = {s1}')
 23 |     >>> print(f's2 = {s2}')
 24 |     s1 = <Student(Alice)>
 25 |     s2 = <Student(Bob)>
 26 | 
 27 | Example:
 28 |     >>> # Objects that define __len__ have a default __nice__
 29 |     >>> class Group(NiceRepr):
 30 |     ...    def __init__(self, data):
 31 |     ...        self.data = data
 32 |     ...    def __len__(self):
 33 |     ...        return len(self.data)
 34 |     >>> g = Group([1, 2, 3])
 35 |     >>> print(f'g = {g}')
 36 |     g = <Group(3)>
 37 | """
 38 | import warnings
 39 | 
 40 | 
 41 | class NiceRepr(object):
 42 |     """Inherit from this class and define ``__nice__`` to "nicely" print your
 43 |     objects.
 44 | 
 45 |     Defines ``__str__`` and ``__repr__`` in terms of ``__nice__`` function
 46 |     Classes that inherit from :class:`NiceRepr` should redefine ``__nice__``.
 47 |     If the inheriting class has a ``__len__``, method then the default
 48 |     ``__nice__`` method will return its length.
 49 | 
 50 |     Example:
 51 |         >>> class Foo(NiceRepr):
 52 |         ...    def __nice__(self):
 53 |         ...        return 'info'
 54 |         >>> foo = Foo()
 55 |         >>> assert str(foo) == '<Foo(info)>'
 56 |         >>> assert repr(foo).startswith('<Foo(info) at ')
 57 | 
 58 |     Example:
 59 |         >>> class Bar(NiceRepr):
 60 |         ...    pass
 61 |         >>> bar = Bar()
 62 |         >>> import pytest
 63 |         >>> with pytest.warns(None) as record:
 64 |         >>>     assert 'object at' in str(bar)
 65 |         >>>     assert 'object at' in repr(bar)
 66 | 
 67 |     Example:
 68 |         >>> class Baz(NiceRepr):
 69 |         ...    def __len__(self):
 70 |         ...        return 5
 71 |         >>> baz = Baz()
 72 |         >>> assert str(baz) == '<Baz(5)>'
 73 |     """
 74 | 
 75 |     def __nice__(self):
 76 |         """str: a "nice" summary string describing this module"""
 77 |         if hasattr(self, '__len__'):
 78 |             # It is a common pattern for objects to use __len__ in __nice__
 79 |             # As a convenience we define a default __nice__ for these objects
 80 |             return str(len(self))
 81 |         else:
 82 |             # In all other cases force the subclass to overload __nice__
 83 |             raise NotImplementedError(
 84 |                 f'Define the __nice__ method for {self.__class__!r}')
 85 | 
 86 |     def __repr__(self):
 87 |         """str: the string of the module"""
 88 |         try:
 89 |             nice = self.__nice__()
 90 |             classname = self.__class__.__name__
 91 |             return f'<{classname}({nice}) at {hex(id(self))}>'
 92 |         except NotImplementedError as ex:
 93 |             warnings.warn(str(ex), category=RuntimeWarning)
 94 |             return object.__repr__(self)
 95 | 
 96 |     def __str__(self):
 97 |         """str: the string of the module"""
 98 |         try:
 99 |             classname = self.__class__.__name__
100 |             nice = self.__nice__()
101 |             return f'<{classname}({nice})>'
102 |         except NotImplementedError as ex:
103 |             warnings.warn(str(ex), category=RuntimeWarning)
104 |             return object.__repr__(self)
105 | 


--------------------------------------------------------------------------------
/radet/models/dense_heads/dense_test_mixins.py:
--------------------------------------------------------------------------------
 1 | from inspect import signature
 2 | 
 3 | import torch
 4 | 
 5 | from radet.core import bbox2result, bbox_mapping_back, multiclass_nms
 6 | 
 7 | 
 8 | class BBoxTestMixin(object):
 9 |     """Mixin class for test time augmentation of bboxes."""
10 | 
11 |     def merge_aug_bboxes(self, aug_bboxes, aug_scores, img_metas):
12 |         """Merge augmented detection bboxes and scores.
13 | 
14 |         Args:
15 |             aug_bboxes (list[Tensor]): shape (n, 4*#class)
16 |             aug_scores (list[Tensor] or None): shape (n, #class)
17 |             img_shapes (list[Tensor]): shape (3, ).
18 | 
19 |         Returns:
20 |             tuple: (bboxes, scores)
21 |         """
22 |         recovered_bboxes = []
23 |         for bboxes, img_info in zip(aug_bboxes, img_metas):
24 |             img_shape = img_info[0]['img_shape']
25 |             scale_factor = img_info[0]['scale_factor']
26 |             flip = img_info[0]['flip']
27 |             flip_direction = img_info[0]['flip_direction']
28 |             bboxes = bbox_mapping_back(bboxes, img_shape, scale_factor, flip,
29 |                                        flip_direction)
30 |             recovered_bboxes.append(bboxes)
31 |         bboxes = torch.cat(recovered_bboxes, dim=0)
32 |         if aug_scores is None:
33 |             return bboxes
34 |         else:
35 |             scores = torch.cat(aug_scores, dim=0)
36 |             return bboxes, scores
37 | 
38 |     def aug_test_bboxes(self, feats, img_metas, rescale=False):
39 |         """Test det bboxes with test time augmentation.
40 | 
41 |         Args:
42 |             feats (list[Tensor]): the outer list indicates test-time
43 |                 augmentations and inner Tensor should have a shape NxCxHxW,
44 |                 which contains features for all images in the batch.
45 |             img_metas (list[list[dict]]): the outer list indicates test-time
46 |                 augs (multiscale, flip, etc.) and the inner list indicates
47 |                 images in a batch. each dict has image information.
48 |             rescale (bool, optional): Whether to rescale the results.
49 |                 Defaults to False.
50 | 
51 |         Returns:
52 |             list[ndarray]: bbox results of each class
53 |         """
54 |         # check with_nms argument
55 |         gb_sig = signature(self.get_bboxes)
56 |         gb_args = [p.name for p in gb_sig.parameters.values()]
57 |         gbs_sig = signature(self._get_bboxes_single)
58 |         gbs_args = [p.name for p in gbs_sig.parameters.values()]
59 |         assert ('with_nms' in gb_args) and ('with_nms' in gbs_args), \
60 |             f'{self.__class__.__name__}' \
61 |             ' does not support test-time augmentation'
62 | 
63 |         aug_bboxes = []
64 |         aug_scores = []
65 |         aug_factors = []  # score_factors for NMS
66 |         for x, img_meta in zip(feats, img_metas):
67 |             # only one image in the batch
68 |             outs = self.forward(x)
69 |             bbox_inputs = outs + (img_meta, self.test_cfg, False, False)
70 |             bbox_outputs = self.get_bboxes(*bbox_inputs)[0]
71 |             aug_bboxes.append(bbox_outputs[0])
72 |             aug_scores.append(bbox_outputs[1])
73 |             # bbox_outputs of some detectors (e.g., ATSS, FCOS, YOLOv3)
74 |             # contains additional element to adjust scores before NMS
75 |             if len(bbox_outputs) >= 3:
76 |                 aug_factors.append(bbox_outputs[2])
77 | 
78 |         # after merging, bboxes will be rescaled to the original image size
79 |         merged_bboxes, merged_scores = self.merge_aug_bboxes(
80 |             aug_bboxes, aug_scores, img_metas)
81 |         merged_factors = torch.cat(aug_factors, dim=0) if aug_factors else None
82 |         det_bboxes, det_labels = multiclass_nms(
83 |             merged_bboxes,
84 |             merged_scores,
85 |             self.test_cfg.score_thr,
86 |             self.test_cfg.nms,
87 |             self.test_cfg.max_per_img,
88 |             score_factors=merged_factors)
89 | 
90 |         if rescale:
91 |             _det_bboxes = det_bboxes
92 |         else:
93 |             _det_bboxes = det_bboxes.clone()
94 |             _det_bboxes[:, :4] *= det_bboxes.new_tensor(
95 |                 img_metas[0][0]['scale_factor'])
96 |         bbox_results = bbox2result(_det_bboxes, det_labels, self.num_classes)
97 |         return bbox_results
98 | 


--------------------------------------------------------------------------------
/radet/core/bbox/samplers/base_sampler.py:
--------------------------------------------------------------------------------
  1 | from abc import ABCMeta, abstractmethod
  2 | 
  3 | import torch
  4 | 
  5 | from .sampling_result import SamplingResult
  6 | 
  7 | 
  8 | class BaseSampler(metaclass=ABCMeta):
  9 |     """Base class of samplers."""
 10 | 
 11 |     def __init__(self,
 12 |                  num,
 13 |                  pos_fraction,
 14 |                  neg_pos_ub=-1,
 15 |                  add_gt_as_proposals=True,
 16 |                  **kwargs):
 17 |         self.num = num
 18 |         self.pos_fraction = pos_fraction
 19 |         self.neg_pos_ub = neg_pos_ub
 20 |         self.add_gt_as_proposals = add_gt_as_proposals
 21 |         self.pos_sampler = self
 22 |         self.neg_sampler = self
 23 | 
 24 |     @abstractmethod
 25 |     def _sample_pos(self, assign_result, num_expected, **kwargs):
 26 |         """Sample positive samples."""
 27 |         pass
 28 | 
 29 |     @abstractmethod
 30 |     def _sample_neg(self, assign_result, num_expected, **kwargs):
 31 |         """Sample negative samples."""
 32 |         pass
 33 | 
 34 |     def sample(self,
 35 |                assign_result,
 36 |                bboxes,
 37 |                gt_bboxes,
 38 |                gt_labels=None,
 39 |                **kwargs):
 40 |         """Sample positive and negative bboxes.
 41 | 
 42 |         This is a simple implementation of bbox sampling given candidates,
 43 |         assigning results and ground truth bboxes.
 44 | 
 45 |         Args:
 46 |             assign_result (:obj:`AssignResult`): Bbox assigning results.
 47 |             bboxes (Tensor): Boxes to be sampled from.
 48 |             gt_bboxes (Tensor): Ground truth bboxes.
 49 |             gt_labels (Tensor, optional): Class labels of ground truth bboxes.
 50 | 
 51 |         Returns:
 52 |             :obj:`SamplingResult`: Sampling result.
 53 | 
 54 |         Example:
 55 |             >>> from mmdet.core.bbox import RandomSampler
 56 |             >>> from mmdet.core.bbox import AssignResult
 57 |             >>> from mmdet.core.bbox.demodata import ensure_rng, random_boxes
 58 |             >>> rng = ensure_rng(None)
 59 |             >>> assign_result = AssignResult.random(rng=rng)
 60 |             >>> bboxes = random_boxes(assign_result.num_preds, rng=rng)
 61 |             >>> gt_bboxes = random_boxes(assign_result.num_gts, rng=rng)
 62 |             >>> gt_labels = None
 63 |             >>> self = RandomSampler(num=32, pos_fraction=0.5, neg_pos_ub=-1,
 64 |             >>>                      add_gt_as_proposals=False)
 65 |             >>> self = self.sample(assign_result, bboxes, gt_bboxes, gt_labels)
 66 |         """
 67 |         if len(bboxes.shape) < 2:
 68 |             bboxes = bboxes[None, :]
 69 | 
 70 |         bboxes = bboxes[:, :4]
 71 | 
 72 |         gt_flags = bboxes.new_zeros((bboxes.shape[0], ), dtype=torch.uint8)
 73 |         if self.add_gt_as_proposals and len(gt_bboxes) > 0:
 74 |             if gt_labels is None:
 75 |                 raise ValueError(
 76 |                     'gt_labels must be given when add_gt_as_proposals is True')
 77 |             bboxes = torch.cat([gt_bboxes, bboxes], dim=0)
 78 |             assign_result.add_gt_(gt_labels)
 79 |             gt_ones = bboxes.new_ones(gt_bboxes.shape[0], dtype=torch.uint8)
 80 |             gt_flags = torch.cat([gt_ones, gt_flags])
 81 | 
 82 |         num_expected_pos = int(self.num * self.pos_fraction)
 83 |         pos_inds = self.pos_sampler._sample_pos(
 84 |             assign_result, num_expected_pos, bboxes=bboxes, **kwargs)
 85 |         # We found that sampled indices have duplicated items occasionally.
 86 |         # (may be a bug of PyTorch)
 87 |         pos_inds = pos_inds.unique()
 88 |         num_sampled_pos = pos_inds.numel()
 89 |         num_expected_neg = self.num - num_sampled_pos
 90 |         if self.neg_pos_ub >= 0:
 91 |             _pos = max(1, num_sampled_pos)
 92 |             neg_upper_bound = int(self.neg_pos_ub * _pos)
 93 |             if num_expected_neg > neg_upper_bound:
 94 |                 num_expected_neg = neg_upper_bound
 95 |         neg_inds = self.neg_sampler._sample_neg(
 96 |             assign_result, num_expected_neg, bboxes=bboxes, **kwargs)
 97 |         neg_inds = neg_inds.unique()
 98 | 
 99 |         sampling_result = SamplingResult(pos_inds, neg_inds, bboxes, gt_bboxes,
100 |                                          assign_result, gt_flags)
101 |         return sampling_result
102 | 


--------------------------------------------------------------------------------
/radet/models/backbones/detectors_resnext.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | 
  3 | from mmcv.cnn import build_conv_layer, build_norm_layer
  4 | 
  5 | from ..builder import BACKBONES
  6 | from .detectors_resnet import Bottleneck as _Bottleneck
  7 | from .detectors_resnet import DetectoRS_ResNet
  8 | 
  9 | 
 10 | class Bottleneck(_Bottleneck):
 11 |     expansion = 4
 12 | 
 13 |     def __init__(self,
 14 |                  inplanes,
 15 |                  planes,
 16 |                  groups=1,
 17 |                  base_width=4,
 18 |                  base_channels=64,
 19 |                  **kwargs):
 20 |         """Bottleneck block for ResNeXt.
 21 | 
 22 |         If style is "pytorch", the stride-two layer is the 3x3 conv layer, if
 23 |         it is "caffe", the stride-two layer is the first 1x1 conv layer.
 24 |         """
 25 |         super(Bottleneck, self).__init__(inplanes, planes, **kwargs)
 26 | 
 27 |         if groups == 1:
 28 |             width = self.planes
 29 |         else:
 30 |             width = math.floor(self.planes *
 31 |                                (base_width / base_channels)) * groups
 32 | 
 33 |         self.norm1_name, norm1 = build_norm_layer(
 34 |             self.norm_cfg, width, postfix=1)
 35 |         self.norm2_name, norm2 = build_norm_layer(
 36 |             self.norm_cfg, width, postfix=2)
 37 |         self.norm3_name, norm3 = build_norm_layer(
 38 |             self.norm_cfg, self.planes * self.expansion, postfix=3)
 39 | 
 40 |         self.conv1 = build_conv_layer(
 41 |             self.conv_cfg,
 42 |             self.inplanes,
 43 |             width,
 44 |             kernel_size=1,
 45 |             stride=self.conv1_stride,
 46 |             bias=False)
 47 |         self.add_module(self.norm1_name, norm1)
 48 |         fallback_on_stride = False
 49 |         self.with_modulated_dcn = False
 50 |         if self.with_dcn:
 51 |             fallback_on_stride = self.dcn.pop('fallback_on_stride', False)
 52 |         if self.with_sac:
 53 |             self.conv2 = build_conv_layer(
 54 |                 self.sac,
 55 |                 width,
 56 |                 width,
 57 |                 kernel_size=3,
 58 |                 stride=self.conv2_stride,
 59 |                 padding=self.dilation,
 60 |                 dilation=self.dilation,
 61 |                 groups=groups,
 62 |                 bias=False)
 63 |         elif not self.with_dcn or fallback_on_stride:
 64 |             self.conv2 = build_conv_layer(
 65 |                 self.conv_cfg,
 66 |                 width,
 67 |                 width,
 68 |                 kernel_size=3,
 69 |                 stride=self.conv2_stride,
 70 |                 padding=self.dilation,
 71 |                 dilation=self.dilation,
 72 |                 groups=groups,
 73 |                 bias=False)
 74 |         else:
 75 |             assert self.conv_cfg is None, 'conv_cfg must be None for DCN'
 76 |             self.conv2 = build_conv_layer(
 77 |                 self.dcn,
 78 |                 width,
 79 |                 width,
 80 |                 kernel_size=3,
 81 |                 stride=self.conv2_stride,
 82 |                 padding=self.dilation,
 83 |                 dilation=self.dilation,
 84 |                 groups=groups,
 85 |                 bias=False)
 86 | 
 87 |         self.add_module(self.norm2_name, norm2)
 88 |         self.conv3 = build_conv_layer(
 89 |             self.conv_cfg,
 90 |             width,
 91 |             self.planes * self.expansion,
 92 |             kernel_size=1,
 93 |             bias=False)
 94 |         self.add_module(self.norm3_name, norm3)
 95 | 
 96 | 
 97 | @BACKBONES.register_module()
 98 | class DetectoRS_ResNeXt(DetectoRS_ResNet):
 99 |     """ResNeXt backbone for DetectoRS.
100 | 
101 |     Args:
102 |         groups (int): The number of groups in ResNeXt.
103 |         base_width (int): The base width of ResNeXt.
104 |     """
105 | 
106 |     arch_settings = {
107 |         50: (Bottleneck, (3, 4, 6, 3)),
108 |         101: (Bottleneck, (3, 4, 23, 3)),
109 |         152: (Bottleneck, (3, 8, 36, 3))
110 |     }
111 | 
112 |     def __init__(self, groups=1, base_width=4, **kwargs):
113 |         self.groups = groups
114 |         self.base_width = base_width
115 |         super(DetectoRS_ResNeXt, self).__init__(**kwargs)
116 | 
117 |     def make_res_layer(self, **kwargs):
118 |         return super().make_res_layer(
119 |             groups=self.groups,
120 |             base_width=self.base_width,
121 |             base_channels=self.base_channels,
122 |             **kwargs)
123 | 


--------------------------------------------------------------------------------
/radet/core/bbox/samplers/ohem_sampler.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | 
  3 | from ..builder import BBOX_SAMPLERS
  4 | from ..transforms import bbox2roi
  5 | from .base_sampler import BaseSampler
  6 | 
  7 | 
  8 | @BBOX_SAMPLERS.register_module()
  9 | class OHEMSampler(BaseSampler):
 10 |     r"""Online Hard Example Mining Sampler described in `Training Region-based
 11 |     Object Detectors with Online Hard Example Mining
 12 |     <https://arxiv.org/abs/1604.03540>`_.
 13 |     """
 14 | 
 15 |     def __init__(self,
 16 |                  num,
 17 |                  pos_fraction,
 18 |                  context,
 19 |                  neg_pos_ub=-1,
 20 |                  add_gt_as_proposals=True,
 21 |                  **kwargs):
 22 |         super(OHEMSampler, self).__init__(num, pos_fraction, neg_pos_ub,
 23 |                                           add_gt_as_proposals)
 24 |         self.context = context
 25 |         if not hasattr(self.context, 'num_stages'):
 26 |             self.bbox_head = self.context.bbox_head
 27 |         else:
 28 |             self.bbox_head = self.context.bbox_head[self.context.current_stage]
 29 | 
 30 |     def hard_mining(self, inds, num_expected, bboxes, labels, feats):
 31 |         with torch.no_grad():
 32 |             rois = bbox2roi([bboxes])
 33 |             if not hasattr(self.context, 'num_stages'):
 34 |                 bbox_results = self.context._bbox_forward(feats, rois)
 35 |             else:
 36 |                 bbox_results = self.context._bbox_forward(
 37 |                     self.context.current_stage, feats, rois)
 38 |             cls_score = bbox_results['cls_score']
 39 |             loss = self.bbox_head.loss(
 40 |                 cls_score=cls_score,
 41 |                 bbox_pred=None,
 42 |                 rois=rois,
 43 |                 labels=labels,
 44 |                 label_weights=cls_score.new_ones(cls_score.size(0)),
 45 |                 bbox_targets=None,
 46 |                 bbox_weights=None,
 47 |                 reduction_override='none')['loss_cls']
 48 |             _, topk_loss_inds = loss.topk(num_expected)
 49 |         return inds[topk_loss_inds]
 50 | 
 51 |     def _sample_pos(self,
 52 |                     assign_result,
 53 |                     num_expected,
 54 |                     bboxes=None,
 55 |                     feats=None,
 56 |                     **kwargs):
 57 |         """Sample positive boxes.
 58 | 
 59 |         Args:
 60 |             assign_result (:obj:`AssignResult`): Assigned results
 61 |             num_expected (int): Number of expected positive samples
 62 |             bboxes (torch.Tensor, optional): Boxes. Defaults to None.
 63 |             feats (list[torch.Tensor], optional): Multi-level features.
 64 |                 Defaults to None.
 65 | 
 66 |         Returns:
 67 |             torch.Tensor: Indices  of positive samples
 68 |         """
 69 |         # Sample some hard positive samples
 70 |         pos_inds = torch.nonzero(assign_result.gt_inds > 0, as_tuple=False)
 71 |         if pos_inds.numel() != 0:
 72 |             pos_inds = pos_inds.squeeze(1)
 73 |         if pos_inds.numel() <= num_expected:
 74 |             return pos_inds
 75 |         else:
 76 |             return self.hard_mining(pos_inds, num_expected, bboxes[pos_inds],
 77 |                                     assign_result.labels[pos_inds], feats)
 78 | 
 79 |     def _sample_neg(self,
 80 |                     assign_result,
 81 |                     num_expected,
 82 |                     bboxes=None,
 83 |                     feats=None,
 84 |                     **kwargs):
 85 |         """Sample negative boxes.
 86 | 
 87 |         Args:
 88 |             assign_result (:obj:`AssignResult`): Assigned results
 89 |             num_expected (int): Number of expected negative samples
 90 |             bboxes (torch.Tensor, optional): Boxes. Defaults to None.
 91 |             feats (list[torch.Tensor], optional): Multi-level features.
 92 |                 Defaults to None.
 93 | 
 94 |         Returns:
 95 |             torch.Tensor: Indices  of negative samples
 96 |         """
 97 |         # Sample some hard negative samples
 98 |         neg_inds = torch.nonzero(assign_result.gt_inds == 0, as_tuple=False)
 99 |         if neg_inds.numel() != 0:
100 |             neg_inds = neg_inds.squeeze(1)
101 |         if len(neg_inds) <= num_expected:
102 |             return neg_inds
103 |         else:
104 |             neg_labels = assign_result.labels.new_empty(
105 |                 neg_inds.size(0)).fill_(self.bbox_head.num_classes)
106 |             return self.hard_mining(neg_inds, num_expected, bboxes[neg_inds],
107 |                                     neg_labels, feats)
108 | 


--------------------------------------------------------------------------------
/radet/utils/contextmanagers.py:
--------------------------------------------------------------------------------
  1 | import asyncio
  2 | import contextlib
  3 | import logging
  4 | import os
  5 | import time
  6 | from typing import List
  7 | 
  8 | import torch
  9 | 
 10 | logger = logging.getLogger(__name__)
 11 | 
 12 | DEBUG_COMPLETED_TIME = bool(os.environ.get('DEBUG_COMPLETED_TIME', False))
 13 | 
 14 | 
 15 | @contextlib.asynccontextmanager
 16 | async def completed(trace_name='',
 17 |                     name='',
 18 |                     sleep_interval=0.05,
 19 |                     streams: List[torch.cuda.Stream] = None):
 20 |     """Async context manager that waits for work to complete on given CUDA
 21 |     streams."""
 22 |     if not torch.cuda.is_available():
 23 |         yield
 24 |         return
 25 | 
 26 |     stream_before_context_switch = torch.cuda.current_stream()
 27 |     if not streams:
 28 |         streams = [stream_before_context_switch]
 29 |     else:
 30 |         streams = [s if s else stream_before_context_switch for s in streams]
 31 | 
 32 |     end_events = [
 33 |         torch.cuda.Event(enable_timing=DEBUG_COMPLETED_TIME) for _ in streams
 34 |     ]
 35 | 
 36 |     if DEBUG_COMPLETED_TIME:
 37 |         start = torch.cuda.Event(enable_timing=True)
 38 |         stream_before_context_switch.record_event(start)
 39 | 
 40 |         cpu_start = time.monotonic()
 41 |     logger.debug('%s %s starting, streams: %s', trace_name, name, streams)
 42 |     grad_enabled_before = torch.is_grad_enabled()
 43 |     try:
 44 |         yield
 45 |     finally:
 46 |         current_stream = torch.cuda.current_stream()
 47 |         assert current_stream == stream_before_context_switch
 48 | 
 49 |         if DEBUG_COMPLETED_TIME:
 50 |             cpu_end = time.monotonic()
 51 |         for i, stream in enumerate(streams):
 52 |             event = end_events[i]
 53 |             stream.record_event(event)
 54 | 
 55 |         grad_enabled_after = torch.is_grad_enabled()
 56 | 
 57 |         # observed change of torch.is_grad_enabled() during concurrent run of
 58 |         # async_test_bboxes code
 59 |         assert (grad_enabled_before == grad_enabled_after
 60 |                 ), 'Unexpected is_grad_enabled() value change'
 61 | 
 62 |         are_done = [e.query() for e in end_events]
 63 |         logger.debug('%s %s completed: %s streams: %s', trace_name, name,
 64 |                      are_done, streams)
 65 |         with torch.cuda.stream(stream_before_context_switch):
 66 |             while not all(are_done):
 67 |                 await asyncio.sleep(sleep_interval)
 68 |                 are_done = [e.query() for e in end_events]
 69 |                 logger.debug(
 70 |                     '%s %s completed: %s streams: %s',
 71 |                     trace_name,
 72 |                     name,
 73 |                     are_done,
 74 |                     streams,
 75 |                 )
 76 | 
 77 |         current_stream = torch.cuda.current_stream()
 78 |         assert current_stream == stream_before_context_switch
 79 | 
 80 |         if DEBUG_COMPLETED_TIME:
 81 |             cpu_time = (cpu_end - cpu_start) * 1000
 82 |             stream_times_ms = ''
 83 |             for i, stream in enumerate(streams):
 84 |                 elapsed_time = start.elapsed_time(end_events[i])
 85 |                 stream_times_ms += f' {stream} {elapsed_time:.2f} ms'
 86 |             logger.info('%s %s %.2f ms %s', trace_name, name, cpu_time,
 87 |                         stream_times_ms)
 88 | 
 89 | 
 90 | @contextlib.asynccontextmanager
 91 | async def concurrent(streamqueue: asyncio.Queue,
 92 |                      trace_name='concurrent',
 93 |                      name='stream'):
 94 |     """Run code concurrently in different streams.
 95 | 
 96 |     :param streamqueue: asyncio.Queue instance.
 97 | 
 98 |     Queue tasks define the pool of streams used for concurrent execution.
 99 |     """
100 |     if not torch.cuda.is_available():
101 |         yield
102 |         return
103 | 
104 |     initial_stream = torch.cuda.current_stream()
105 | 
106 |     with torch.cuda.stream(initial_stream):
107 |         stream = await streamqueue.get()
108 |         assert isinstance(stream, torch.cuda.Stream)
109 | 
110 |         try:
111 |             with torch.cuda.stream(stream):
112 |                 logger.debug('%s %s is starting, stream: %s', trace_name, name,
113 |                              stream)
114 |                 yield
115 |                 current = torch.cuda.current_stream()
116 |                 assert current == stream
117 |                 logger.debug('%s %s has finished, stream: %s', trace_name,
118 |                              name, stream)
119 |         finally:
120 |             streamqueue.task_done()
121 |             streamqueue.put_nowait(stream)
122 | 


--------------------------------------------------------------------------------
/radet/core/post_processing/merge_augs.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch
  3 | from mmcv.ops import nms
  4 | 
  5 | from ..bbox import bbox_mapping_back
  6 | 
  7 | 
  8 | def merge_aug_proposals(aug_proposals, img_metas, rpn_test_cfg):
  9 |     """Merge augmented proposals (multiscale, flip, etc.)
 10 | 
 11 |     Args:
 12 |         aug_proposals (list[Tensor]): proposals from different testing
 13 |             schemes, shape (n, 5). Note that they are not rescaled to the
 14 |             original image size.
 15 | 
 16 |         img_metas (list[dict]): list of image info dict where each dict has:
 17 |             'img_shape', 'scale_factor', 'flip', and may also contain
 18 |             'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'.
 19 |             For details on the values of these keys see
 20 |             `mmdet/datasets/pipelines/formatting.py:Collect`.
 21 | 
 22 |         rpn_test_cfg (dict): rpn test config.
 23 | 
 24 |     Returns:
 25 |         Tensor: shape (n, 4), proposals corresponding to original image scale.
 26 |     """
 27 |     recovered_proposals = []
 28 |     for proposals, img_info in zip(aug_proposals, img_metas):
 29 |         img_shape = img_info['img_shape']
 30 |         scale_factor = img_info['scale_factor']
 31 |         flip = img_info['flip']
 32 |         flip_direction = img_info['flip_direction']
 33 |         _proposals = proposals.clone()
 34 |         _proposals[:, :4] = bbox_mapping_back(_proposals[:, :4], img_shape,
 35 |                                               scale_factor, flip,
 36 |                                               flip_direction)
 37 |         recovered_proposals.append(_proposals)
 38 |     aug_proposals = torch.cat(recovered_proposals, dim=0)
 39 |     merged_proposals, _ = nms(aug_proposals[:, :4].contiguous(),
 40 |                               aug_proposals[:, -1].contiguous(),
 41 |                               rpn_test_cfg.nms_thr)
 42 |     scores = merged_proposals[:, 4]
 43 |     _, order = scores.sort(0, descending=True)
 44 |     num = min(rpn_test_cfg.max_num, merged_proposals.shape[0])
 45 |     order = order[:num]
 46 |     merged_proposals = merged_proposals[order, :]
 47 |     return merged_proposals
 48 | 
 49 | 
 50 | def merge_aug_bboxes(aug_bboxes, aug_scores, img_metas, rcnn_test_cfg):
 51 |     """Merge augmented detection bboxes and scores.
 52 | 
 53 |     Args:
 54 |         aug_bboxes (list[Tensor]): shape (n, 4*#class)
 55 |         aug_scores (list[Tensor] or None): shape (n, #class)
 56 |         img_shapes (list[Tensor]): shape (3, ).
 57 |         rcnn_test_cfg (dict): rcnn test config.
 58 | 
 59 |     Returns:
 60 |         tuple: (bboxes, scores)
 61 |     """
 62 |     recovered_bboxes = []
 63 |     for bboxes, img_info in zip(aug_bboxes, img_metas):
 64 |         img_shape = img_info[0]['img_shape']
 65 |         scale_factor = img_info[0]['scale_factor']
 66 |         flip = img_info[0]['flip']
 67 |         flip_direction = img_info[0]['flip_direction']
 68 |         bboxes = bbox_mapping_back(bboxes, img_shape, scale_factor, flip,
 69 |                                    flip_direction)
 70 |         recovered_bboxes.append(bboxes)
 71 |     bboxes = torch.stack(recovered_bboxes).mean(dim=0)
 72 |     if aug_scores is None:
 73 |         return bboxes
 74 |     else:
 75 |         scores = torch.stack(aug_scores).mean(dim=0)
 76 |         return bboxes, scores
 77 | 
 78 | 
 79 | def merge_aug_scores(aug_scores):
 80 |     """Merge augmented bbox scores."""
 81 |     if isinstance(aug_scores[0], torch.Tensor):
 82 |         return torch.mean(torch.stack(aug_scores), dim=0)
 83 |     else:
 84 |         return np.mean(aug_scores, axis=0)
 85 | 
 86 | 
 87 | def merge_aug_masks(aug_masks, img_metas, rcnn_test_cfg, weights=None):
 88 |     """Merge augmented mask prediction.
 89 | 
 90 |     Args:
 91 |         aug_masks (list[ndarray]): shape (n, #class, h, w)
 92 |         img_shapes (list[ndarray]): shape (3, ).
 93 |         rcnn_test_cfg (dict): rcnn test config.
 94 | 
 95 |     Returns:
 96 |         tuple: (bboxes, scores)
 97 |     """
 98 |     recovered_masks = []
 99 |     for mask, img_info in zip(aug_masks, img_metas):
100 |         flip = img_info[0]['flip']
101 |         flip_direction = img_info[0]['flip_direction']
102 |         if flip:
103 |             if flip_direction == 'horizontal':
104 |                 mask = mask[:, :, :, ::-1]
105 |             elif flip_direction == 'vertical':
106 |                 mask = mask[:, :, ::-1, :]
107 |             else:
108 |                 raise ValueError(
109 |                     f"Invalid flipping direction '{flip_direction}'")
110 |         recovered_masks.append(mask)
111 | 
112 |     if weights is None:
113 |         merged_masks = np.mean(recovered_masks, axis=0)
114 |     else:
115 |         merged_masks = np.average(
116 |             np.array(recovered_masks), axis=0, weights=np.array(weights))
117 |     return merged_masks
118 | 


--------------------------------------------------------------------------------
/radet/datasets/pipelines/test_time_aug.py:
--------------------------------------------------------------------------------
  1 | import warnings
  2 | 
  3 | import mmcv
  4 | 
  5 | from ..builder import PIPELINES
  6 | from .compose import Compose
  7 | 
  8 | 
  9 | @PIPELINES.register_module()
 10 | class MultiScaleFlipAug(object):
 11 |     """Test-time augmentation with multiple scales and flipping.
 12 | 
 13 |     An example configuration is as followed:
 14 | 
 15 |     .. code-block::
 16 | 
 17 |         img_scale=[(1333, 400), (1333, 800)],
 18 |         flip=True,
 19 |         transforms=[
 20 |             dict(type='Resize', keep_ratio=True),
 21 |             dict(type='RandomFlip'),
 22 |             dict(type='Normalize', **img_norm_cfg),
 23 |             dict(type='Pad', size_divisor=32),
 24 |             dict(type='ImageToTensor', keys=['img']),
 25 |             dict(type='Collect', keys=['img']),
 26 |         ]
 27 | 
 28 |     After MultiScaleFLipAug with above configuration, the results are wrapped
 29 |     into lists of the same length as followed:
 30 | 
 31 |     .. code-block::
 32 | 
 33 |         dict(
 34 |             img=[...],
 35 |             img_shape=[...],
 36 |             scale=[(1333, 400), (1333, 400), (1333, 800), (1333, 800)]
 37 |             flip=[False, True, False, True]
 38 |             ...
 39 |         )
 40 | 
 41 |     Args:
 42 |         transforms (list[dict]): Transforms to apply in each augmentation.
 43 |         img_scale (tuple | list[tuple] | None): Images scales for resizing.
 44 |         scale_factor (float | list[float] | None): Scale factors for resizing.
 45 |         flip (bool): Whether apply flip augmentation. Default: False.
 46 |         flip_direction (str | list[str]): Flip augmentation directions,
 47 |             options are "horizontal" and "vertical". If flip_direction is list,
 48 |             multiple flip augmentations will be applied.
 49 |             It has no effect when flip == False. Default: "horizontal".
 50 |     """
 51 | 
 52 |     def __init__(self,
 53 |                  transforms,
 54 |                  img_scale=None,
 55 |                  scale_factor=None,
 56 |                  flip=False,
 57 |                  flip_direction='horizontal'):
 58 |         self.transforms = Compose(transforms)
 59 |         assert (img_scale is None) ^ (scale_factor is None), (
 60 |             'Must have but only one variable can be setted')
 61 |         if img_scale is not None:
 62 |             self.img_scale = img_scale if isinstance(img_scale,
 63 |                                                      list) else [img_scale]
 64 |             self.scale_key = 'scale'
 65 |             assert mmcv.is_list_of(self.img_scale, tuple)
 66 |         else:
 67 |             self.img_scale = scale_factor if isinstance(
 68 |                 scale_factor, list) else [scale_factor]
 69 |             self.scale_key = 'scale_factor'
 70 | 
 71 |         self.flip = flip
 72 |         self.flip_direction = flip_direction if isinstance(
 73 |             flip_direction, list) else [flip_direction]
 74 |         assert mmcv.is_list_of(self.flip_direction, str)
 75 |         if not self.flip and self.flip_direction != ['horizontal']:
 76 |             warnings.warn(
 77 |                 'flip_direction has no effect when flip is set to False')
 78 |         if (self.flip
 79 |                 and not any([t['type'] == 'RandomFlip' for t in transforms])):
 80 |             warnings.warn(
 81 |                 'flip has no effect when RandomFlip is not in transforms')
 82 | 
 83 |     def __call__(self, results):
 84 |         """Call function to apply test time augment transforms on results.
 85 | 
 86 |         Args:
 87 |             results (dict): Result dict contains the data to transform.
 88 | 
 89 |         Returns:
 90 |            dict[str: list]: The augmented data, where each value is wrapped
 91 |                into a list.
 92 |         """
 93 | 
 94 |         aug_data = []
 95 |         flip_args = [(False, None)]
 96 |         if self.flip:
 97 |             flip_args += [(True, direction)
 98 |                           for direction in self.flip_direction]
 99 |         for scale in self.img_scale:
100 |             for flip, direction in flip_args:
101 |                 _results = results.copy()
102 |                 _results[self.scale_key] = scale
103 |                 _results['flip'] = flip
104 |                 _results['flip_direction'] = direction
105 |                 data = self.transforms(_results)
106 |                 aug_data.append(data)
107 |         # list of dict to dict of list
108 |         aug_data_dict = {key: [] for key in aug_data[0]}
109 |         for data in aug_data:
110 |             for key, val in data.items():
111 |                 aug_data_dict[key].append(val)
112 |         return aug_data_dict
113 | 
114 |     def __repr__(self):
115 |         repr_str = self.__class__.__name__
116 |         repr_str += f'(transforms={self.transforms}, '
117 |         repr_str += f'img_scale={self.img_scale}, flip={self.flip})'
118 |         repr_str += f'flip_direction={self.flip_direction}'
119 |         return repr_str
120 | 


--------------------------------------------------------------------------------
/radet/models/losses/smooth_l1_loss.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | 
  4 | from ..builder import LOSSES
  5 | from .utils import weighted_loss
  6 | 
  7 | 
  8 | @weighted_loss
  9 | def smooth_l1_loss(pred, target, beta=1.0):
 10 |     """Smooth L1 loss.
 11 | 
 12 |     Args:
 13 |         pred (torch.Tensor): The prediction.
 14 |         target (torch.Tensor): The learning target of the prediction.
 15 |         beta (float, optional): The threshold in the piecewise function.
 16 |             Defaults to 1.0.
 17 | 
 18 |     Returns:
 19 |         torch.Tensor: Calculated loss
 20 |     """
 21 |     assert beta > 0
 22 |     assert pred.size() == target.size() and target.numel() > 0
 23 |     diff = torch.abs(pred - target)
 24 |     loss = torch.where(diff < beta, 0.5 * diff * diff / beta,
 25 |                        diff - 0.5 * beta)
 26 |     return loss
 27 | 
 28 | 
 29 | @weighted_loss
 30 | def l1_loss(pred, target):
 31 |     """L1 loss.
 32 | 
 33 |     Args:
 34 |         pred (torch.Tensor): The prediction.
 35 |         target (torch.Tensor): The learning target of the prediction.
 36 | 
 37 |     Returns:
 38 |         torch.Tensor: Calculated loss
 39 |     """
 40 |     assert pred.size() == target.size() and target.numel() > 0
 41 |     loss = torch.abs(pred - target)
 42 |     return loss
 43 | 
 44 | 
 45 | @LOSSES.register_module()
 46 | class SmoothL1Loss(nn.Module):
 47 |     """Smooth L1 loss.
 48 | 
 49 |     Args:
 50 |         beta (float, optional): The threshold in the piecewise function.
 51 |             Defaults to 1.0.
 52 |         reduction (str, optional): The method to reduce the loss.
 53 |             Options are "none", "mean" and "sum". Defaults to "mean".
 54 |         loss_weight (float, optional): The weight of loss.
 55 |     """
 56 | 
 57 |     def __init__(self, beta=1.0, reduction='mean', loss_weight=1.0):
 58 |         super(SmoothL1Loss, self).__init__()
 59 |         self.beta = beta
 60 |         self.reduction = reduction
 61 |         self.loss_weight = loss_weight
 62 | 
 63 |     def forward(self,
 64 |                 pred,
 65 |                 target,
 66 |                 weight=None,
 67 |                 avg_factor=None,
 68 |                 reduction_override=None,
 69 |                 **kwargs):
 70 |         """Forward function.
 71 | 
 72 |         Args:
 73 |             pred (torch.Tensor): The prediction.
 74 |             target (torch.Tensor): The learning target of the prediction.
 75 |             weight (torch.Tensor, optional): The weight of loss for each
 76 |                 prediction. Defaults to None.
 77 |             avg_factor (int, optional): Average factor that is used to average
 78 |                 the loss. Defaults to None.
 79 |             reduction_override (str, optional): The reduction method used to
 80 |                 override the original reduction method of the loss.
 81 |                 Defaults to None.
 82 |         """
 83 |         assert reduction_override in (None, 'none', 'mean', 'sum')
 84 |         reduction = (
 85 |             reduction_override if reduction_override else self.reduction)
 86 |         loss_bbox = self.loss_weight * smooth_l1_loss(
 87 |             pred,
 88 |             target,
 89 |             weight,
 90 |             beta=self.beta,
 91 |             reduction=reduction,
 92 |             avg_factor=avg_factor,
 93 |             **kwargs)
 94 |         return loss_bbox
 95 | 
 96 | 
 97 | @LOSSES.register_module()
 98 | class L1Loss(nn.Module):
 99 |     """L1 loss.
100 | 
101 |     Args:
102 |         reduction (str, optional): The method to reduce the loss.
103 |             Options are "none", "mean" and "sum".
104 |         loss_weight (float, optional): The weight of loss.
105 |     """
106 | 
107 |     def __init__(self, reduction='mean', loss_weight=1.0):
108 |         super(L1Loss, self).__init__()
109 |         self.reduction = reduction
110 |         self.loss_weight = loss_weight
111 | 
112 |     def forward(self,
113 |                 pred,
114 |                 target,
115 |                 weight=None,
116 |                 avg_factor=None,
117 |                 reduction_override=None):
118 |         """Forward function.
119 | 
120 |         Args:
121 |             pred (torch.Tensor): The prediction.
122 |             target (torch.Tensor): The learning target of the prediction.
123 |             weight (torch.Tensor, optional): The weight of loss for each
124 |                 prediction. Defaults to None.
125 |             avg_factor (int, optional): Average factor that is used to average
126 |                 the loss. Defaults to None.
127 |             reduction_override (str, optional): The reduction method used to
128 |                 override the original reduction method of the loss.
129 |                 Defaults to None.
130 |         """
131 |         assert reduction_override in (None, 'none', 'mean', 'sum')
132 |         reduction = (
133 |             reduction_override if reduction_override else self.reduction)
134 |         loss_bbox = self.loss_weight * l1_loss(
135 |             pred, target, weight, reduction=reduction, avg_factor=avg_factor)
136 |         return loss_bbox
137 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | <h1 align="center"> Rigidity-Aware Detection for 6D Object Pose Estimation (CVPR 2023) </h1>
 2 | <h5 align="center"><a href="https://yanghai-1218.github.io">Yang Hai</a>, Rui Song, Jiaojiao Li, <a href="https://people.epfl.ch/mathieu.salzmann">Mathieu Salzmann</a>, <a href="https://yinlinhu.github.io">Yinlin Hu</a></h5>
 3 | 
 4 | <p align="center">
 5 |   <a href="http://arxiv.org/abs/2303.12396">Paper</a> |
 6 |   <a href="https://yanghai-1218.github.io/publications/RADet-Poster.png">Poster</a> |
 7 |   <a href="https://www.youtube.com/watch?v=XgDFmE8AL_Y&t=9s">Video</a>
 8 | </p>
 9 | 
10 | 📢 **NEWS**: We have released the code for combining RADet with [WDR-Pose](https://arxiv.org/abs/2104.00337), which is a part of the Best Single-Model Solution for BOP Challenge 2022. Please check out [PseudoFlow](https://github.com/YangHai-1218/PseudoFlow)!
11 | 
12 | 
13 | # Introduction
14 | 
15 | Most recent 6D object pose estimation methods first use object detection to obtain 2D bounding boxes before actually regressing the pose. However, the general object detection methods they use are ill-suited to handle cluttered scenes, thus producing poor initialization to the subsequent pose network.
16 | 
17 | <div align="center">
18 |     <img src="./resources/radet.png" 
19 |     alt="Editor" width="500">
20 | </div>
21 | (a) The standard detection strategy chooses positive samples (green cells) around the object center, thus suffering from occlusions. (b) Instead, we propose to use a visibility-guided sampling strategy to discard the occluded regions and encourage the network to be supervised by all visible parts. The sampling probability is depicted by different shades of green. (c) Our method (green boxes) yields more accurate detections than the standard strategy (red boxes).
22 | 
23 | <div align="center">
24 |     <img src="./resources/rigidity.png"
25 |     alt="Editor" width="500">
26 | </div>
27 | Our motivation is from the rigidity property of targets in 6D object pose estimation. We show the testing accuracy of different sampling strategies w.r.t. different local predictions during training on the typical general object dataset (COCO) and on the typical 6D object pose dataset (YCB). We report the results of FCOSv2 (Center), ATSS (Center+), and a strategy exploiting all the candidates in the ground-truth mask (Visible). The horizontal axis represents the normalized distance of a local prediction to the box center. Although the accuracy of different strategies is similar on COCO, the visibility-guided sampling is much more accurate on YCB, even when the local predictions come from non-center areas, thanks to the rigidity of the target objects.
28 | 
29 | # Installation
30 | This code has been tested on a `ubuntu 18.04` server with `CUDA 11.3`
31 | - Install necessary python packages by `pip install -r requirements.txt`.
32 | - Build other dependencies by `python setup.py develop`.
33 | 
34 | # Dataset Preparation
35 | 
36 | - Download [BOP datasets](https://bop.felk.cvut.cz/), and place them under the `data` directory.
37 | 
38 | - Collect image file lists for the desired data. 
39 |   
40 |   For example, collect the PBR images for YCB-V dataset:
41 | 
42 | ```shell
43 | python tools/collect_image_list.py --source-dir data/ycbv/train_pbr --save-path data/ycbv/image_lists/train_pbr.txt --pattern '*/rgb/*.png'
44 | ```
45 | - Collect BOP image file lists for the desired dataset.
46 |   
47 |   For example, collect the BOP test images for YCB-V dataset.
48 | ```shell
49 | python tools/collect_bop_imagelist.py data/ycbv/test_bop19.json data/ycbv/image_lists/test_bop19.txt --ext png
50 | ```
51 | - Convert BOP annotations into COCO format for both training(train_pbr/train_real) and testing(test_bop19).
52 | 
53 | ```shell
54 | python tools/bop_to_coco.py --images-dir data/ycbv/train_pbr --images-list data/ycbv/images_list/train_pbr.txt --save-path data/ycbv/detector_annotations/train_pbr.json --dataset ycbv
55 | ```
56 | 
57 | # Testing
58 | Use `tools/test.py`:
59 | ```shell
60 | python tools/test.py --config configs/bop/r50_ycbv_pbr.py --checkpoint checkpoints/radet_ycbv_pbr.pth --format-only --eval-options jsonfile_prefix=work_dirs/results/radet_ycbv_pbr
61 | ```
62 | 
63 | # Training
64 | 
65 | Use `tools/train.py`:
66 | 
67 | ```shell
68 | python tools/train.py --config configs/bop/r50_ycbv_pbr.py
69 | ```
70 | 
71 | 
72 | # Pretrained models and detection results
73 | 
74 | We put the pretrained models and the corresponding detection results of 7 BOP core datasets at [here](https://drive.google.com/drive/folders/18_P693QoT9yTup1I8rmn7Jcs4DmQ2wOQ?usp=share_link). (We use a score threshold of 0.1 to filter out the false positives for the second-stage pose estimation in the paper.)
75 | 
76 | # Citation
77 | 
78 | If you find this project is helpful, please cite:
79 | 
80 | ```
81 | @inproceedings{yang2023radet,
82 |   title={Rigidity-Aware Detection for 6D Object Pose Estimation},
83 |   author={Yang Hai and Rui Song and Jiaojiao Li and Mathieu Salzmann and Yinlin Hu},
84 |   booktitle={CVPR},
85 |   year={2023}
86 | }
87 | ```
88 | 
89 | # Acknowledgement
90 | 
91 | We build our framework based on [mmdetection](https://github.com/open-mmlab/mmdetection) and [MBS](https://github.com/YinlinHu/MBS). We thank the authors for their great code repositories.
92 | 


--------------------------------------------------------------------------------
/radet/datasets/samplers/group_sampler.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division
  2 | import math
  3 | 
  4 | import numpy as np
  5 | import torch
  6 | from mmcv.runner import get_dist_info
  7 | from torch.utils.data import Sampler
  8 | 
  9 | 
 10 | class GroupSampler(Sampler):
 11 | 
 12 |     def __init__(self, dataset, samples_per_gpu=1):
 13 |         assert hasattr(dataset, 'flag')
 14 |         self.dataset = dataset
 15 |         self.samples_per_gpu = samples_per_gpu
 16 |         self.flag = dataset.flag.astype(np.int64)
 17 |         self.group_sizes = np.bincount(self.flag)
 18 |         self.num_samples = 0
 19 |         for i, size in enumerate(self.group_sizes):
 20 |             self.num_samples += int(np.ceil(
 21 |                 size / self.samples_per_gpu)) * self.samples_per_gpu
 22 | 
 23 |     def __iter__(self):
 24 |         indices = []
 25 |         for i, size in enumerate(self.group_sizes):
 26 |             if size == 0:
 27 |                 continue
 28 |             indice = np.where(self.flag == i)[0]
 29 |             assert len(indice) == size
 30 |             np.random.shuffle(indice)
 31 |             num_extra = int(np.ceil(size / self.samples_per_gpu)
 32 |                             ) * self.samples_per_gpu - len(indice)
 33 |             indice = np.concatenate(
 34 |                 [indice, np.random.choice(indice, num_extra)])
 35 |             indices.append(indice)
 36 |         indices = np.concatenate(indices)
 37 |         indices = [
 38 |             indices[i * self.samples_per_gpu:(i + 1) * self.samples_per_gpu]
 39 |             for i in np.random.permutation(
 40 |                 range(len(indices) // self.samples_per_gpu))
 41 |         ]
 42 |         indices = np.concatenate(indices)
 43 |         indices = indices.astype(np.int64).tolist()
 44 |         assert len(indices) == self.num_samples
 45 |         return iter(indices)
 46 | 
 47 |     def __len__(self):
 48 |         return self.num_samples
 49 | 
 50 | 
 51 | class DistributedGroupSampler(Sampler):
 52 |     """Sampler that restricts data loading to a subset of the dataset.
 53 | 
 54 |     It is especially useful in conjunction with
 55 |     :class:`torch.nn.parallel.DistributedDataParallel`. In such case, each
 56 |     process can pass a DistributedSampler instance as a DataLoader sampler,
 57 |     and load a subset of the original dataset that is exclusive to it.
 58 | 
 59 |     .. note::
 60 |         Dataset is assumed to be of constant size.
 61 | 
 62 |     Arguments:
 63 |         dataset: Dataset used for sampling.
 64 |         num_replicas (optional): Number of processes participating in
 65 |             distributed training.
 66 |         rank (optional): Rank of the current process within num_replicas.
 67 |     """
 68 | 
 69 |     def __init__(self,
 70 |                  dataset,
 71 |                  samples_per_gpu=1,
 72 |                  num_replicas=None,
 73 |                  rank=None):
 74 |         _rank, _num_replicas = get_dist_info()
 75 |         if num_replicas is None:
 76 |             num_replicas = _num_replicas
 77 |         if rank is None:
 78 |             rank = _rank
 79 |         self.dataset = dataset
 80 |         self.samples_per_gpu = samples_per_gpu
 81 |         self.num_replicas = num_replicas
 82 |         self.rank = rank
 83 |         self.epoch = 0
 84 | 
 85 |         assert hasattr(self.dataset, 'flag')
 86 |         self.flag = self.dataset.flag
 87 |         self.group_sizes = np.bincount(self.flag)
 88 | 
 89 |         self.num_samples = 0
 90 |         for i, j in enumerate(self.group_sizes):
 91 |             self.num_samples += int(
 92 |                 math.ceil(self.group_sizes[i] * 1.0 / self.samples_per_gpu /
 93 |                           self.num_replicas)) * self.samples_per_gpu
 94 |         self.total_size = self.num_samples * self.num_replicas
 95 | 
 96 |     def __iter__(self):
 97 |         # deterministically shuffle based on epoch
 98 |         g = torch.Generator()
 99 |         g.manual_seed(self.epoch)
100 | 
101 |         indices = []
102 |         for i, size in enumerate(self.group_sizes):
103 |             if size > 0:
104 |                 indice = np.where(self.flag == i)[0]
105 |                 assert len(indice) == size
106 |                 # add .numpy() to avoid bug when selecting indice in parrots.
107 |                 # TODO: check whether torch.randperm() can be replaced by
108 |                 # numpy.random.permutation().
109 |                 indice = indice[list(
110 |                     torch.randperm(int(size), generator=g).numpy())].tolist()
111 |                 extra = int(
112 |                     math.ceil(
113 |                         size * 1.0 / self.samples_per_gpu / self.num_replicas)
114 |                 ) * self.samples_per_gpu * self.num_replicas - len(indice)
115 |                 # pad indice
116 |                 tmp = indice.copy()
117 |                 for _ in range(extra // size):
118 |                     indice.extend(tmp)
119 |                 indice.extend(tmp[:extra % size])
120 |                 indices.extend(indice)
121 | 
122 |         assert len(indices) == self.total_size
123 | 
124 |         indices = [
125 |             indices[j] for i in list(
126 |                 torch.randperm(
127 |                     len(indices) // self.samples_per_gpu, generator=g))
128 |             for j in range(i * self.samples_per_gpu, (i + 1) *
129 |                            self.samples_per_gpu)
130 |         ]
131 | 
132 |         # subsample
133 |         offset = self.num_samples * self.rank
134 |         indices = indices[offset:offset + self.num_samples]
135 |         assert len(indices) == self.num_samples
136 | 
137 |         return iter(indices)
138 | 
139 |     def __len__(self):
140 |         return self.num_samples
141 | 
142 |     def set_epoch(self, epoch):
143 |         self.epoch = epoch
144 | 


--------------------------------------------------------------------------------
/radet/core/evaluation/class_names.py:
--------------------------------------------------------------------------------
  1 | import mmcv
  2 | 
  3 | 
  4 | def wider_face_classes():
  5 |     return ['face']
  6 | 
  7 | 
  8 | def voc_classes():
  9 |     return [
 10 |         'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat',
 11 |         'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike', 'person',
 12 |         'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor'
 13 |     ]
 14 | 
 15 | 
 16 | def imagenet_det_classes():
 17 |     return [
 18 |         'accordion', 'airplane', 'ant', 'antelope', 'apple', 'armadillo',
 19 |         'artichoke', 'axe', 'baby_bed', 'backpack', 'bagel', 'balance_beam',
 20 |         'banana', 'band_aid', 'banjo', 'baseball', 'basketball', 'bathing_cap',
 21 |         'beaker', 'bear', 'bee', 'bell_pepper', 'bench', 'bicycle', 'binder',
 22 |         'bird', 'bookshelf', 'bow_tie', 'bow', 'bowl', 'brassiere', 'burrito',
 23 |         'bus', 'butterfly', 'camel', 'can_opener', 'car', 'cart', 'cattle',
 24 |         'cello', 'centipede', 'chain_saw', 'chair', 'chime', 'cocktail_shaker',
 25 |         'coffee_maker', 'computer_keyboard', 'computer_mouse', 'corkscrew',
 26 |         'cream', 'croquet_ball', 'crutch', 'cucumber', 'cup_or_mug', 'diaper',
 27 |         'digital_clock', 'dishwasher', 'dog', 'domestic_cat', 'dragonfly',
 28 |         'drum', 'dumbbell', 'electric_fan', 'elephant', 'face_powder', 'fig',
 29 |         'filing_cabinet', 'flower_pot', 'flute', 'fox', 'french_horn', 'frog',
 30 |         'frying_pan', 'giant_panda', 'goldfish', 'golf_ball', 'golfcart',
 31 |         'guacamole', 'guitar', 'hair_dryer', 'hair_spray', 'hamburger',
 32 |         'hammer', 'hamster', 'harmonica', 'harp', 'hat_with_a_wide_brim',
 33 |         'head_cabbage', 'helmet', 'hippopotamus', 'horizontal_bar', 'horse',
 34 |         'hotdog', 'iPod', 'isopod', 'jellyfish', 'koala_bear', 'ladle',
 35 |         'ladybug', 'lamp', 'laptop', 'lemon', 'lion', 'lipstick', 'lizard',
 36 |         'lobster', 'maillot', 'maraca', 'microphone', 'microwave', 'milk_can',
 37 |         'miniskirt', 'monkey', 'motorcycle', 'mushroom', 'nail', 'neck_brace',
 38 |         'oboe', 'orange', 'otter', 'pencil_box', 'pencil_sharpener', 'perfume',
 39 |         'person', 'piano', 'pineapple', 'ping-pong_ball', 'pitcher', 'pizza',
 40 |         'plastic_bag', 'plate_rack', 'pomegranate', 'popsicle', 'porcupine',
 41 |         'power_drill', 'pretzel', 'printer', 'puck', 'punching_bag', 'purse',
 42 |         'rabbit', 'racket', 'ray', 'red_panda', 'refrigerator',
 43 |         'remote_control', 'rubber_eraser', 'rugby_ball', 'ruler',
 44 |         'salt_or_pepper_shaker', 'saxophone', 'scorpion', 'screwdriver',
 45 |         'seal', 'sheep', 'ski', 'skunk', 'snail', 'snake', 'snowmobile',
 46 |         'snowplow', 'soap_dispenser', 'soccer_ball', 'sofa', 'spatula',
 47 |         'squirrel', 'starfish', 'stethoscope', 'stove', 'strainer',
 48 |         'strawberry', 'stretcher', 'sunglasses', 'swimming_trunks', 'swine',
 49 |         'syringe', 'table', 'tape_player', 'tennis_ball', 'tick', 'tie',
 50 |         'tiger', 'toaster', 'traffic_light', 'train', 'trombone', 'trumpet',
 51 |         'turtle', 'tv_or_monitor', 'unicycle', 'vacuum', 'violin',
 52 |         'volleyball', 'waffle_iron', 'washer', 'water_bottle', 'watercraft',
 53 |         'whale', 'wine_bottle', 'zebra'
 54 |     ]
 55 | 
 56 | 
 57 | def imagenet_vid_classes():
 58 |     return [
 59 |         'airplane', 'antelope', 'bear', 'bicycle', 'bird', 'bus', 'car',
 60 |         'cattle', 'dog', 'domestic_cat', 'elephant', 'fox', 'giant_panda',
 61 |         'hamster', 'horse', 'lion', 'lizard', 'monkey', 'motorcycle', 'rabbit',
 62 |         'red_panda', 'sheep', 'snake', 'squirrel', 'tiger', 'train', 'turtle',
 63 |         'watercraft', 'whale', 'zebra'
 64 |     ]
 65 | 
 66 | 
 67 | def coco_classes():
 68 |     return [
 69 |         'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train',
 70 |         'truck', 'boat', 'traffic_light', 'fire_hydrant', 'stop_sign',
 71 |         'parking_meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep',
 72 |         'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella',
 73 |         'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard',
 74 |         'sports_ball', 'kite', 'baseball_bat', 'baseball_glove', 'skateboard',
 75 |         'surfboard', 'tennis_racket', 'bottle', 'wine_glass', 'cup', 'fork',
 76 |         'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange',
 77 |         'broccoli', 'carrot', 'hot_dog', 'pizza', 'donut', 'cake', 'chair',
 78 |         'couch', 'potted_plant', 'bed', 'dining_table', 'toilet', 'tv',
 79 |         'laptop', 'mouse', 'remote', 'keyboard', 'cell_phone', 'microwave',
 80 |         'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase',
 81 |         'scissors', 'teddy_bear', 'hair_drier', 'toothbrush'
 82 |     ]
 83 | 
 84 | 
 85 | def cityscapes_classes():
 86 |     return [
 87 |         'person', 'rider', 'car', 'truck', 'bus', 'train', 'motorcycle',
 88 |         'bicycle'
 89 |     ]
 90 | 
 91 | 
 92 | dataset_aliases = {
 93 |     'voc': ['voc', 'pascal_voc', 'voc07', 'voc12'],
 94 |     'imagenet_det': ['det', 'imagenet_det', 'ilsvrc_det'],
 95 |     'imagenet_vid': ['vid', 'imagenet_vid', 'ilsvrc_vid'],
 96 |     'coco': ['coco', 'mscoco', 'ms_coco'],
 97 |     'wider_face': ['WIDERFaceDataset', 'wider_face', 'WDIERFace'],
 98 |     'cityscapes': ['cityscapes']
 99 | }
100 | 
101 | 
102 | def get_classes(dataset):
103 |     """Get class names of a dataset."""
104 |     alias2name = {}
105 |     for name, aliases in dataset_aliases.items():
106 |         for alias in aliases:
107 |             alias2name[alias] = name
108 | 
109 |     if mmcv.is_str(dataset):
110 |         if dataset in alias2name:
111 |             labels = eval(alias2name[dataset] + '_classes()')
112 |         else:
113 |             raise ValueError(f'Unrecognized dataset: {dataset}')
114 |     else:
115 |         raise TypeError(f'dataset must a str, but got {type(dataset)}')
116 |     return labels
117 | 


--------------------------------------------------------------------------------
/radet/core/export/pytorch2onnx.py:
--------------------------------------------------------------------------------
  1 | from functools import partial
  2 | 
  3 | import mmcv
  4 | import numpy as np
  5 | import torch
  6 | from mmcv.runner import load_checkpoint
  7 | 
  8 | 
  9 | def generate_inputs_and_wrap_model(config_path, checkpoint_path, input_config):
 10 |     """Prepare sample input and wrap model for ONNX export.
 11 | 
 12 |     The ONNX export API only accept args, and all inputs should be
 13 |     torch.Tensor or corresponding types (such as tuple of tensor).
 14 |     So we should call this function before exporting. This function will:
 15 | 
 16 |     1. generate corresponding inputs which are used to execute the model.
 17 |     2. Wrap the model's forward function.
 18 | 
 19 |     For example, the MMDet models' forward function has a parameter
 20 |     ``return_loss:bool``. As we want to set it as False while export API
 21 |     supports neither bool type or kwargs. So we have to replace the forward
 22 |     like: ``model.forward = partial(model.forward, return_loss=False)``
 23 | 
 24 |     Args:
 25 |         config_path (str): the OpenMMLab config for the model we want to
 26 |             export to ONNX
 27 |         checkpoint_path (str): Path to the corresponding checkpoint
 28 |         input_config (dict): the exactly data in this dict depends on the
 29 |             framework. For MMSeg, we can just declare the input shape,
 30 |             and generate the dummy data accordingly. However, for MMDet,
 31 |             we may pass the real img path, or the NMS will return None
 32 |             as there is no legal bbox.
 33 | 
 34 |     Returns:
 35 |         tuple: (model, tensor_data) wrapped model which can be called by \
 36 |         model(*tensor_data) and a list of inputs which are used to execute \
 37 |             the model while exporting.
 38 |     """
 39 | 
 40 |     model = build_model_from_cfg(config_path, checkpoint_path)
 41 |     one_img, one_meta = preprocess_example_input(input_config)
 42 |     tensor_data = [one_img]
 43 |     model.forward = partial(
 44 |         model.forward, img_metas=[[one_meta]], return_loss=False)
 45 | 
 46 |     # pytorch has some bug in pytorch1.3, we have to fix it
 47 |     # by replacing these existing op
 48 |     opset_version = 11
 49 |     # put the import within the function thus it will not cause import error
 50 |     # when not using this function
 51 |     try:
 52 |         from mmcv.onnx.symbolic import register_extra_symbolics
 53 |     except ModuleNotFoundError:
 54 |         raise NotImplementedError('please update mmcv to version>=v1.0.4')
 55 |     register_extra_symbolics(opset_version)
 56 | 
 57 |     return model, tensor_data
 58 | 
 59 | 
 60 | def build_model_from_cfg(config_path, checkpoint_path):
 61 |     """Build a model from config and load the given checkpoint.
 62 | 
 63 |     Args:
 64 |         config_path (str): the OpenMMLab config for the model we want to
 65 |             export to ONNX
 66 |         checkpoint_path (str): Path to the corresponding checkpoint
 67 | 
 68 |     Returns:
 69 |         torch.nn.Module: the built model
 70 |     """
 71 |     from radet.models import build_detector
 72 | 
 73 |     cfg = mmcv.Config.fromfile(config_path)
 74 |     # import modules from string list.
 75 |     if cfg.get('custom_imports', None):
 76 |         from mmcv.utils import import_modules_from_strings
 77 |         import_modules_from_strings(**cfg['custom_imports'])
 78 |     cfg.model.pretrained = None
 79 |     cfg.data.test.test_mode = True
 80 | 
 81 |     # build the model
 82 |     model = build_detector(cfg.model, train_cfg=None, test_cfg=cfg.test_cfg)
 83 |     load_checkpoint(model, checkpoint_path, map_location='cpu')
 84 |     model.cpu().eval()
 85 |     return model
 86 | 
 87 | 
 88 | def preprocess_example_input(input_config):
 89 |     """Prepare an example input image for ``generate_inputs_and_wrap_model``.
 90 | 
 91 |     Args:
 92 |         input_config (dict): customized config describing the example input.
 93 | 
 94 |     Returns:
 95 |         tuple: (one_img, one_meta), tensor of the example input image and \
 96 |             meta information for the example input image.
 97 | 
 98 |     Examples:
 99 |         >>> from mmdet.core.export import preprocess_example_input
100 |         >>> input_config = {
101 |         >>>         'input_shape': (1,3,224,224),
102 |         >>>         'input_path': 'demo/demo.jpg',
103 |         >>>         'normalize_cfg': {
104 |         >>>             'mean': (123.675, 116.28, 103.53),
105 |         >>>             'std': (58.395, 57.12, 57.375)
106 |         >>>             }
107 |         >>>         }
108 |         >>> one_img, one_meta = preprocess_example_input(input_config)
109 |         >>> print(one_img.shape)
110 |         torch.Size([1, 3, 224, 224])
111 |         >>> print(one_meta)
112 |         {'img_shape': (224, 224, 3),
113 |         'ori_shape': (224, 224, 3),
114 |         'pad_shape': (224, 224, 3),
115 |         'filename': '<demo>.png',
116 |         'scale_factor': 1.0,
117 |         'flip': False}
118 |     """
119 |     input_path = input_config['input_path']
120 |     input_shape = input_config['input_shape']
121 |     one_img = mmcv.imread(input_path)
122 |     one_img = mmcv.imresize(one_img, input_shape[2:][::-1])
123 |     show_img = one_img.copy()
124 |     if 'normalize_cfg' in input_config.keys():
125 |         normalize_cfg = input_config['normalize_cfg']
126 |         mean = np.array(normalize_cfg['mean'], dtype=np.float32)
127 |         std = np.array(normalize_cfg['std'], dtype=np.float32)
128 |         one_img = mmcv.imnormalize(one_img, mean, std)
129 |     one_img = one_img.transpose(2, 0, 1)
130 |     one_img = torch.from_numpy(one_img).unsqueeze(0).float().requires_grad_(
131 |         True)
132 |     (_, C, H, W) = input_shape
133 |     one_meta = {
134 |         'img_shape': (H, W, C),
135 |         'ori_shape': (H, W, C),
136 |         'pad_shape': (H, W, C),
137 |         'filename': '<demo>.png',
138 |         'scale_factor': 1.0,
139 |         'flip': False,
140 |         'show_img': show_img,
141 |     }
142 | 
143 |     return one_img, one_meta
144 | 


--------------------------------------------------------------------------------