├── .gitignore ├── LICENSE ├── README.md ├── dynamic_rcnn ├── basemodels │ ├── c2_model_loading.py │ └── resnet.py ├── datasets │ ├── __init__.py │ ├── coco.py │ ├── collate_batch.py │ ├── concat_dataset.py │ ├── evaluation │ │ ├── __init__.py │ │ ├── coco │ │ │ ├── __init__.py │ │ │ └── coco_eval.py │ │ └── voc │ │ │ ├── __init__.py │ │ │ └── voc_eval.py │ ├── samplers │ │ ├── __init__.py │ │ ├── distributed.py │ │ ├── grouped_batch_sampler.py │ │ └── iteration_based_batch_sampler.py │ ├── structures │ │ ├── __init__.py │ │ ├── bounding_box.py │ │ ├── boxlist_ops.py │ │ ├── image_list.py │ │ ├── keypoint.py │ │ └── segmentation_mask.py │ ├── transforms │ │ ├── __init__.py │ │ ├── build.py │ │ ├── coco_transforms.py │ │ └── transforms.py │ └── voc.py ├── det_opr │ ├── box_coder.py │ ├── fpn │ │ └── fpn.py │ ├── loss.py │ ├── matcher.py │ ├── poolers.py │ ├── rcnn │ │ ├── cascade_rcnn │ │ │ └── proposal_opr.py │ │ ├── mask_head │ │ │ ├── inference.py │ │ │ └── mask_target_opr.py │ │ ├── post_processing.py │ │ └── proposal_target_opr.py │ ├── rpn │ │ ├── anchor_generator.py │ │ ├── anchor_target_opr.py │ │ ├── fcos │ │ │ ├── fcos_target_opr.py │ │ │ ├── post_processing.py │ │ │ └── scale.py │ │ ├── proposal_opr.py │ │ └── retinanet │ │ │ ├── anchor_target_opr.py │ │ │ └── post_processing.py │ └── sampler.py ├── engine │ ├── __init__.py │ ├── bbox_aug.py │ ├── checkpoint.py │ ├── comm.py │ └── lr_scheduler.py ├── kernels │ ├── ROIAlign.h │ ├── ROIPool.h │ ├── SigmoidFocalLoss.h │ ├── cpu │ │ ├── ROIAlign_cpu.cpp │ │ ├── nms_cpu.cpp │ │ └── vision.h │ ├── cuda │ │ ├── ROIAlign_cuda.cu │ │ ├── ROIPool_cuda.cu │ │ ├── SigmoidFocalLoss_cuda.cu │ │ ├── deform_conv_cuda.cu │ │ ├── deform_conv_kernel_cuda.cu │ │ ├── deform_pool_cuda.cu │ │ ├── deform_pool_kernel_cuda.cu │ │ ├── nms.cu │ │ └── vision.h │ ├── deform_conv.h │ ├── deform_pool.h │ ├── nms.h │ ├── ops │ │ ├── dcn │ │ │ ├── __init__.py │ │ │ ├── deform_conv_func.py │ │ │ ├── deform_conv_module.py │ │ │ ├── deform_pool_func.py │ │ │ └── deform_pool_module.py │ │ ├── nms.py │ │ ├── roi_align.py │ │ └── roi_pool.py │ └── vision.cpp └── utils │ ├── logger.py │ ├── metric_logger.py │ ├── misc.py │ ├── pyt_utils.py │ ├── registry.py │ └── torch_utils.py ├── models └── zhanghongkai │ └── dynamic_rcnn │ └── coco │ ├── dynamic_rcnn_r101_dcnv2_fpn_mstrain_3x │ ├── config.py │ ├── dataset.py │ ├── network.py │ ├── test.py │ └── train.py │ ├── dynamic_rcnn_r101_fpn_1x │ ├── config.py │ ├── dataset.py │ ├── network.py │ ├── test.py │ └── train.py │ ├── dynamic_rcnn_r101_fpn_2x │ ├── config.py │ ├── dataset.py │ ├── network.py │ ├── test.py │ └── train.py │ ├── dynamic_rcnn_r101_fpn_mstrain_3x │ ├── config.py │ ├── dataset.py │ ├── network.py │ ├── test.py │ └── train.py │ ├── dynamic_rcnn_r50_fpn_1x │ ├── config.py │ ├── dataset.py │ ├── network.py │ ├── test.py │ └── train.py │ └── dynamic_rcnn_r50_fpn_2x │ ├── config.py │ ├── dataset.py │ ├── network.py │ ├── test.py │ └── train.py └── setup.py /.gitignore: -------------------------------------------------------------------------------- 1 | # compilation and distribution 2 | __pycache__ 3 | _ext 4 | *.pyc 5 | *.so 6 | *.egg-info 7 | *.egg 8 | *log 9 | build/ 10 | dist/ 11 | 12 | # pytorch/python/numpy formats 13 | *.pth 14 | *.pkl 15 | *.npy 16 | 17 | # ipython/jupyter notebooks 18 | *.ipynb 19 | **/.ipynb_checkpoints/ 20 | 21 | # Editor temporaries 22 | *.swn 23 | *.swo 24 | *.swp 25 | *~ 26 | 27 | # Pycharm editor settings 28 | .idea 29 | 30 | # vscode editor settings 31 | .vscode 32 | 33 | # MacOS 34 | .DS_Store 35 | 36 | # project dirs 37 | /data 38 | /output 39 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Hongkai Zhang 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /dynamic_rcnn/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from .coco import COCODataset 3 | from .voc import PascalVOCDataset 4 | from .concat_dataset import ConcatDataset 5 | 6 | __all__ = ["COCODataset", "ConcatDataset", "PascalVOCDataset"] 7 | -------------------------------------------------------------------------------- /dynamic_rcnn/datasets/coco.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | import torchvision 4 | 5 | from dynamic_rcnn.datasets.structures.bounding_box import BoxList 6 | from dynamic_rcnn.datasets.structures.segmentation_mask import SegmentationMask 7 | from dynamic_rcnn.datasets.structures.keypoint import PersonKeypoints 8 | 9 | 10 | min_keypoints_per_image = 10 11 | 12 | 13 | def _count_visible_keypoints(anno): 14 | return sum(sum(1 for v in ann["keypoints"][2::3] if v > 0) for ann in anno) 15 | 16 | 17 | def _has_only_empty_bbox(anno): 18 | return all(any(o <= 1 for o in obj["bbox"][2:]) for obj in anno) 19 | 20 | 21 | def has_valid_annotation(anno): 22 | # if it's empty, there is no annotation 23 | if len(anno) == 0: 24 | return False 25 | # if all boxes have close to zero area, there is no annotation 26 | if _has_only_empty_bbox(anno): 27 | return False 28 | # keypoints task have a slight different critera for considering 29 | # if an annotation is valid 30 | if "keypoints" not in anno[0]: 31 | return True 32 | # for keypoint detection tasks, only consider valid images those 33 | # containing at least min_keypoints_per_image 34 | if _count_visible_keypoints(anno) >= min_keypoints_per_image: 35 | return True 36 | return False 37 | 38 | 39 | class COCODataset(torchvision.datasets.coco.CocoDetection): 40 | def __init__( 41 | self, ann_file, root, remove_images_without_annotations, 42 | transforms=None, return_raw=False): 43 | super(COCODataset, self).__init__(root, ann_file) 44 | # sort indices for reproducible results 45 | self.ids = sorted(self.ids) 46 | 47 | # filter images without detection annotations 48 | if remove_images_without_annotations: 49 | ids = [] 50 | for img_id in self.ids: 51 | ann_ids = self.coco.getAnnIds(imgIds=img_id, iscrowd=None) 52 | anno = self.coco.loadAnns(ann_ids) 53 | if has_valid_annotation(anno): 54 | ids.append(img_id) 55 | self.ids = ids 56 | 57 | self.categories = {cat['id']: cat['name'] for cat in self.coco.cats.values()} 58 | 59 | self.json_category_id_to_contiguous_id = { 60 | v: i + 1 for i, v in enumerate(self.coco.getCatIds()) 61 | } 62 | self.contiguous_category_id_to_json_id = { 63 | v: k for k, v in self.json_category_id_to_contiguous_id.items() 64 | } 65 | self.id_to_img_map = {k: v for k, v in enumerate(self.ids)} 66 | self._transforms = transforms 67 | self.return_raw = return_raw 68 | 69 | def __getitem__(self, idx): 70 | img, anno = super(COCODataset, self).__getitem__(idx) 71 | 72 | # filter crowd annotations 73 | # TODO might be better to add an extra field 74 | anno = [obj for obj in anno if obj["iscrowd"] == 0] 75 | 76 | boxes = [obj["bbox"] for obj in anno] 77 | boxes = torch.as_tensor(boxes).reshape(-1, 4) # guard against no boxes 78 | target = BoxList(boxes, img.size, mode="xywh").convert("xyxy") 79 | 80 | classes = [obj["category_id"] for obj in anno] 81 | classes = [self.json_category_id_to_contiguous_id[c] for c in classes] 82 | classes = torch.tensor(classes) 83 | target.add_field("labels", classes) 84 | 85 | if anno and "segmentation" in anno[0]: 86 | masks = [obj["segmentation"] for obj in anno] 87 | masks = SegmentationMask(masks, img.size, mode='poly') 88 | target.add_field("masks", masks) 89 | 90 | if anno and "keypoints" in anno[0]: 91 | keypoints = [obj["keypoints"] for obj in anno] 92 | keypoints = PersonKeypoints(keypoints, img.size) 93 | target.add_field("keypoints", keypoints) 94 | 95 | target = target.clip_to_image(remove_empty=True) 96 | 97 | if self._transforms is not None: 98 | trans_img, trans_target = self._transforms(img, target) 99 | if self.return_raw: 100 | return img, target, trans_img, trans_target, idx 101 | else: 102 | return trans_img, trans_target, idx 103 | 104 | return img, target, idx 105 | 106 | def get_img_info(self, index): 107 | img_id = self.id_to_img_map[index] 108 | img_data = self.coco.imgs[img_id] 109 | return img_data 110 | 111 | class_names = [ 112 | 'background', 'person', 'bicycle', 'car', 'motorcycle', 113 | 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 114 | 'fire hydrant', 'stop sign', 'parking meter', 'bench', 115 | 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 116 | 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 117 | 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 118 | 'sports ball', 'kite', 'baseball bat', 'baseball glove', 119 | 'skateboard', 'surfboard', 'tennis racket', 'bottle', 120 | 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 121 | 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 122 | 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 123 | 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 124 | 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 125 | 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 126 | 'book', 'clock', 'vase', 'scissors', 'teddy bear', 127 | 'hair drier', 'toothbrush'] 128 | -------------------------------------------------------------------------------- /dynamic_rcnn/datasets/collate_batch.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from dynamic_rcnn.datasets.structures.image_list import to_image_list 3 | 4 | 5 | class BatchCollator(object): 6 | """ 7 | From a list of samples from the dataset, 8 | returns the batched images and targets. 9 | This should be passed to the DataLoader 10 | """ 11 | 12 | def __init__(self, size_divisible=0, return_raw=False): 13 | self.size_divisible = size_divisible 14 | self.return_raw = return_raw 15 | 16 | def __call__(self, batch): 17 | transposed_batch = list(zip(*batch)) 18 | if self.return_raw: 19 | ori_images = transposed_batch[0] 20 | ori_targets = transposed_batch[1] 21 | images = to_image_list( 22 | transposed_batch[2], self.size_divisible) 23 | targets = transposed_batch[3] 24 | img_ids = transposed_batch[4] 25 | return ori_images, ori_targets, images, targets, img_ids 26 | else: 27 | images = to_image_list(transposed_batch[0], self.size_divisible) 28 | targets = transposed_batch[1] 29 | img_ids = transposed_batch[2] 30 | return images, targets, img_ids 31 | 32 | 33 | class BBoxAugCollator(object): 34 | """ 35 | From a list of samples from the dataset, 36 | returns the images and targets. 37 | Images should be converted to batched images in `im_detect_bbox_aug` 38 | """ 39 | 40 | def __call__(self, batch): 41 | return list(zip(*batch)) 42 | 43 | -------------------------------------------------------------------------------- /dynamic_rcnn/datasets/concat_dataset.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import bisect 3 | 4 | from torch.utils.data.dataset import ConcatDataset as _ConcatDataset 5 | 6 | 7 | class ConcatDataset(_ConcatDataset): 8 | """ 9 | Same as torch.utils.data.dataset.ConcatDataset, but exposes an extra 10 | method for querying the sizes of the image 11 | """ 12 | 13 | def get_idxs(self, idx): 14 | dataset_idx = bisect.bisect_right(self.cumulative_sizes, idx) 15 | if dataset_idx == 0: 16 | sample_idx = idx 17 | else: 18 | sample_idx = idx - self.cumulative_sizes[dataset_idx - 1] 19 | return dataset_idx, sample_idx 20 | 21 | def get_img_info(self, idx): 22 | dataset_idx, sample_idx = self.get_idxs(idx) 23 | return self.datasets[dataset_idx].get_img_info(sample_idx) 24 | -------------------------------------------------------------------------------- /dynamic_rcnn/datasets/evaluation/__init__.py: -------------------------------------------------------------------------------- 1 | from dynamic_rcnn import datasets 2 | 3 | from .coco import coco_evaluation 4 | from .voc import voc_evaluation 5 | 6 | 7 | def evaluate(dataset, predictions, output_folder, logger, **kwargs): 8 | """evaluate dataset using different methods based on dataset type. 9 | Args: 10 | dataset: Dataset object 11 | predictions(list[BoxList]): each item in the list represents the 12 | prediction results for one image. 13 | output_folder: output folder, to save evaluation files or results. 14 | logger: logger. 15 | **kwargs: other args. 16 | Returns: 17 | evaluation result 18 | """ 19 | args = dict(dataset=dataset, predictions=predictions, 20 | output_folder=output_folder, logger=logger, **kwargs) 21 | if isinstance(dataset, datasets.COCODataset): 22 | return coco_evaluation(**args) 23 | elif isinstance(dataset, datasets.PascalVOCDataset): 24 | return voc_evaluation(**args) 25 | else: 26 | dataset_name = dataset.__class__.__name__ 27 | raise NotImplementedError("Unsupported dataset type {}.".format(dataset_name)) 28 | -------------------------------------------------------------------------------- /dynamic_rcnn/datasets/evaluation/coco/__init__.py: -------------------------------------------------------------------------------- 1 | from .coco_eval import do_coco_evaluation 2 | 3 | 4 | def coco_evaluation( 5 | dataset, 6 | predictions, 7 | output_folder, 8 | logger, 9 | box_only, 10 | iou_types, 11 | expected_results, 12 | expected_results_sigma_tol, 13 | ): 14 | return do_coco_evaluation( 15 | dataset=dataset, 16 | predictions=predictions, 17 | box_only=box_only, 18 | output_folder=output_folder, 19 | logger=logger, 20 | iou_types=iou_types, 21 | expected_results=expected_results, 22 | expected_results_sigma_tol=expected_results_sigma_tol, 23 | ) 24 | -------------------------------------------------------------------------------- /dynamic_rcnn/datasets/evaluation/voc/__init__.py: -------------------------------------------------------------------------------- 1 | from .voc_eval import do_voc_evaluation 2 | 3 | 4 | def voc_evaluation(dataset, predictions, output_folder, box_only, logger, **_): 5 | if box_only: 6 | logger.warning("voc evaluation doesn't support box_only, ignored.") 7 | logger.info("performing voc evaluation, ignored iou_types.") 8 | return do_voc_evaluation( 9 | dataset=dataset, 10 | predictions=predictions, 11 | output_folder=output_folder, 12 | logger=logger, 13 | ) 14 | -------------------------------------------------------------------------------- /dynamic_rcnn/datasets/samplers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from .distributed import DistributedSampler 3 | from .grouped_batch_sampler import GroupedBatchSampler 4 | from .iteration_based_batch_sampler import IterationBasedBatchSampler 5 | 6 | __all__ = ["DistributedSampler", "GroupedBatchSampler", "IterationBasedBatchSampler"] 7 | -------------------------------------------------------------------------------- /dynamic_rcnn/datasets/samplers/distributed.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | # Code is copy-pasted exactly as in torch.utils.data.distributed. 3 | # FIXME remove this once c10d fixes the bug it has 4 | import math 5 | import torch 6 | import torch.distributed as dist 7 | from torch.utils.data.sampler import Sampler 8 | 9 | 10 | class DistributedSampler(Sampler): 11 | """Sampler that restricts data loading to a subset of the dataset. 12 | It is especially useful in conjunction with 13 | :class:`torch.nn.parallel.DistributedDataParallel`. In such case, each 14 | process can pass a DistributedSampler instance as a DataLoader sampler, 15 | and load a subset of the original dataset that is exclusive to it. 16 | .. note:: 17 | Dataset is assumed to be of constant size. 18 | Arguments: 19 | dataset: Dataset used for sampling. 20 | num_replicas (optional): Number of processes participating in 21 | distributed training. 22 | rank (optional): Rank of the current process within num_replicas. 23 | """ 24 | 25 | def __init__(self, dataset, num_replicas=None, rank=None, shuffle=True): 26 | if num_replicas is None: 27 | if not dist.is_available(): 28 | raise RuntimeError("Requires distributed package to be available") 29 | num_replicas = dist.get_world_size() 30 | if rank is None: 31 | if not dist.is_available(): 32 | raise RuntimeError("Requires distributed package to be available") 33 | rank = dist.get_rank() 34 | self.dataset = dataset 35 | self.num_replicas = num_replicas 36 | self.rank = rank 37 | self.epoch = 0 38 | self.num_samples = int(math.ceil(len(self.dataset) * 1.0 / self.num_replicas)) 39 | self.total_size = self.num_samples * self.num_replicas 40 | self.shuffle = shuffle 41 | 42 | def __iter__(self): 43 | if self.shuffle: 44 | # deterministically shuffle based on epoch 45 | g = torch.Generator() 46 | g.manual_seed(self.epoch) 47 | indices = torch.randperm(len(self.dataset), generator=g).tolist() 48 | else: 49 | indices = torch.arange(len(self.dataset)).tolist() 50 | 51 | # add extra samples to make it evenly divisible 52 | indices += indices[: (self.total_size - len(indices))] 53 | assert len(indices) == self.total_size 54 | 55 | # subsample 56 | offset = self.num_samples * self.rank 57 | indices = indices[offset : offset + self.num_samples] 58 | assert len(indices) == self.num_samples 59 | 60 | return iter(indices) 61 | 62 | def __len__(self): 63 | return self.num_samples 64 | 65 | def set_epoch(self, epoch): 66 | self.epoch = epoch 67 | -------------------------------------------------------------------------------- /dynamic_rcnn/datasets/samplers/grouped_batch_sampler.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import itertools 3 | import bisect 4 | import copy 5 | import torch 6 | from torch.utils.data.sampler import BatchSampler 7 | from torch.utils.data.sampler import Sampler 8 | 9 | 10 | def _quantize(x, bins): 11 | bins = copy.copy(bins) 12 | bins = sorted(bins) 13 | quantized = list(map(lambda y: bisect.bisect_right(bins, y), x)) 14 | return quantized 15 | 16 | 17 | def _compute_aspect_ratios(dataset): 18 | aspect_ratios = [] 19 | for i in range(len(dataset)): 20 | img_info = dataset.get_img_info(i) 21 | aspect_ratio = float(img_info["height"]) / float(img_info["width"]) 22 | aspect_ratios.append(aspect_ratio) 23 | return aspect_ratios 24 | 25 | 26 | class GroupedBatchSampler(BatchSampler): 27 | """ 28 | Wraps another sampler to yield a mini-batch of indices. 29 | It enforces that elements from the same group should appear in groups of batch_size. 30 | It also tries to provide mini-batches which follows an ordering which is 31 | as close as possible to the ordering from the original sampler. 32 | 33 | Arguments: 34 | sampler (Sampler): Base sampler. 35 | batch_size (int): Size of mini-batch. 36 | drop_uneven (bool): If ``True``, the sampler will drop the batches whose 37 | size is less than ``batch_size`` 38 | 39 | """ 40 | 41 | def __init__( 42 | self, sampler, dataset, aspect_grouping, batch_size, drop_uneven=False): 43 | aspect_ratios = _compute_aspect_ratios(dataset) 44 | group_ids = _quantize(aspect_ratios, aspect_grouping) 45 | if not isinstance(sampler, Sampler): 46 | raise ValueError( 47 | "sampler should be an instance of " 48 | "torch.utils.data.Sampler, but got sampler={}".format(sampler) 49 | ) 50 | self.sampler = sampler 51 | self.group_ids = torch.as_tensor(group_ids) 52 | assert self.group_ids.dim() == 1 53 | self.batch_size = batch_size 54 | self.drop_uneven = drop_uneven 55 | 56 | self.groups = torch.unique(self.group_ids).sort(0)[0] 57 | 58 | self._can_reuse_batches = False 59 | 60 | def _prepare_batches(self): 61 | dataset_size = len(self.group_ids) 62 | # get the sampled indices from the sampler 63 | sampled_ids = torch.as_tensor(list(self.sampler)) 64 | # potentially not all elements of the dataset were sampled 65 | # by the sampler (e.g., DistributedSampler). 66 | # construct a tensor which contains -1 if the element was 67 | # not sampled, and a non-negative number indicating the 68 | # order where the element was sampled. 69 | # for example. if sampled_ids = [3, 1] and dataset_size = 5, 70 | # the order is [-1, 1, -1, 0, -1] 71 | order = torch.full((dataset_size,), -1, dtype=torch.int64) 72 | order[sampled_ids] = torch.arange(len(sampled_ids)) 73 | 74 | # get a mask with the elements that were sampled 75 | mask = order >= 0 76 | 77 | # find the elements that belong to each individual cluster 78 | clusters = [(self.group_ids == i) & mask for i in self.groups] 79 | # get relative order of the elements inside each cluster 80 | # that follows the order from the sampler 81 | relative_order = [order[cluster] for cluster in clusters] 82 | # with the relative order, find the absolute order in the 83 | # sampled space 84 | permutation_ids = [s[s.sort()[1]] for s in relative_order] 85 | # permute each cluster so that they follow the order from 86 | # the sampler 87 | permuted_clusters = [sampled_ids[idx] for idx in permutation_ids] 88 | 89 | # splits each cluster in batch_size, and merge as a list of tensors 90 | splits = [c.split(self.batch_size) for c in permuted_clusters] 91 | merged = tuple(itertools.chain.from_iterable(splits)) 92 | 93 | # now each batch internally has the right order, but 94 | # they are grouped by clusters. Find the permutation between 95 | # different batches that brings them as close as possible to 96 | # the order that we have in the sampler. For that, we will consider the 97 | # ordering as coming from the first element of each batch, and sort 98 | # correspondingly 99 | first_element_of_batch = [t[0].item() for t in merged] 100 | # get and inverse mapping from sampled indices and the position where 101 | # they occur (as returned by the sampler) 102 | inv_sampled_ids_map = {v: k for k, v in enumerate(sampled_ids.tolist())} 103 | # from the first element in each batch, get a relative ordering 104 | first_index_of_batch = torch.as_tensor( 105 | [inv_sampled_ids_map[s] for s in first_element_of_batch] 106 | ) 107 | 108 | # permute the batches so that they approximately follow the order 109 | # from the sampler 110 | permutation_order = first_index_of_batch.sort(0)[1].tolist() 111 | # finally, permute the batches 112 | batches = [merged[i].tolist() for i in permutation_order] 113 | 114 | if self.drop_uneven: 115 | kept = [] 116 | for batch in batches: 117 | if len(batch) == self.batch_size: 118 | kept.append(batch) 119 | batches = kept 120 | return batches 121 | 122 | def __iter__(self): 123 | if self._can_reuse_batches: 124 | batches = self._batches 125 | self._can_reuse_batches = False 126 | else: 127 | batches = self._prepare_batches() 128 | self._batches = batches 129 | return iter(batches) 130 | 131 | def __len__(self): 132 | if not hasattr(self, "_batches"): 133 | self._batches = self._prepare_batches() 134 | self._can_reuse_batches = True 135 | return len(self._batches) 136 | -------------------------------------------------------------------------------- /dynamic_rcnn/datasets/samplers/iteration_based_batch_sampler.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from torch.utils.data.sampler import BatchSampler 3 | 4 | 5 | class IterationBasedBatchSampler(BatchSampler): 6 | """ 7 | Wraps a BatchSampler, resampling from it until 8 | a specified number of iterations have been sampled 9 | """ 10 | 11 | def __init__(self, batch_sampler, num_iterations, start_iter=0): 12 | self.batch_sampler = batch_sampler 13 | self.num_iterations = num_iterations 14 | self.start_iter = start_iter 15 | 16 | def __iter__(self): 17 | iteration = self.start_iter 18 | while iteration <= self.num_iterations: 19 | # if the underlying sampler has a set_epoch method, like 20 | # DistributedSampler, used for making each process see 21 | # a different split of the dataset, then set it 22 | if hasattr(self.batch_sampler.sampler, "set_epoch"): 23 | self.batch_sampler.sampler.set_epoch(iteration) 24 | for batch in self.batch_sampler: 25 | iteration += 1 26 | if iteration > self.num_iterations: 27 | break 28 | yield batch 29 | 30 | def __len__(self): 31 | return self.num_iterations 32 | -------------------------------------------------------------------------------- /dynamic_rcnn/datasets/structures/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hkzhang95/DynamicRCNN/fdfca3d4567270c606a52822b88b0ddd802802da/dynamic_rcnn/datasets/structures/__init__.py -------------------------------------------------------------------------------- /dynamic_rcnn/datasets/structures/boxlist_ops.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | 4 | from .bounding_box import BoxList 5 | 6 | from dynamic_rcnn.kernels.ops.nms import nms as _box_nms 7 | 8 | 9 | def boxlist_nms(boxlist, nms_thresh, max_proposals=-1, score_field="scores"): 10 | """ 11 | Performs non-maximum suppression on a boxlist, with scores specified 12 | in a boxlist field via score_field. 13 | 14 | Arguments: 15 | boxlist(BoxList) 16 | nms_thresh (float) 17 | max_proposals (int): if > 0, then only the top max_proposals are kept 18 | after non-maximum suppression 19 | score_field (str) 20 | """ 21 | if nms_thresh <= 0: 22 | return boxlist 23 | mode = boxlist.mode 24 | boxlist = boxlist.convert("xyxy") 25 | boxes = boxlist.bbox 26 | score = boxlist.get_field(score_field) 27 | keep = _box_nms(boxes, score, nms_thresh) 28 | if max_proposals > 0: 29 | keep = keep[: max_proposals] 30 | boxlist = boxlist[keep] 31 | return boxlist.convert(mode) 32 | 33 | 34 | def remove_small_boxes(boxlist, min_size): 35 | """ 36 | Only keep boxes with both sides >= min_size 37 | 38 | Arguments: 39 | boxlist (Boxlist) 40 | min_size (int) 41 | """ 42 | # TODO maybe add an API for querying the ws / hs 43 | xywh_boxes = boxlist.convert("xywh").bbox 44 | _, _, ws, hs = xywh_boxes.unbind(dim=1) 45 | keep = ( 46 | (ws >= min_size) & (hs >= min_size) 47 | ).nonzero().squeeze(1) 48 | return boxlist[keep] 49 | 50 | 51 | # implementation from https://github.com/kuangliu/torchcv/blob/master/torchcv/utils/box.py 52 | # with slight modifications 53 | def boxlist_iou(boxlist1, boxlist2): 54 | """Compute the intersection over union of two set of boxes. 55 | The box order must be (xmin, ymin, xmax, ymax). 56 | 57 | Arguments: 58 | box1: (BoxList) bounding boxes, sized [N,4]. 59 | box2: (BoxList) bounding boxes, sized [M,4]. 60 | 61 | Returns: 62 | (tensor) iou, sized [N,M]. 63 | 64 | Reference: 65 | https://github.com/chainer/chainercv/blob/master/chainercv/utils/bbox/bbox_iou.py 66 | """ 67 | if boxlist1.size != boxlist2.size: 68 | raise RuntimeError( 69 | "boxlists should have same image size, got {}, {}".format(boxlist1, boxlist2)) 70 | boxlist1 = boxlist1.convert("xyxy") 71 | boxlist2 = boxlist2.convert("xyxy") 72 | N = len(boxlist1) 73 | M = len(boxlist2) 74 | 75 | area1 = boxlist1.area() 76 | area2 = boxlist2.area() 77 | 78 | box1, box2 = boxlist1.bbox, boxlist2.bbox 79 | 80 | lt = torch.max(box1[:, None, :2], box2[:, :2]) # [N,M,2] 81 | rb = torch.min(box1[:, None, 2:], box2[:, 2:]) # [N,M,2] 82 | 83 | TO_REMOVE = 1 84 | 85 | wh = (rb - lt + TO_REMOVE).clamp(min=0) # [N,M,2] 86 | inter = wh[:, :, 0] * wh[:, :, 1] # [N,M] 87 | 88 | iou = inter / (area1[:, None] + area2 - inter) 89 | return iou 90 | 91 | 92 | # TODO redundant, remove 93 | def _cat(tensors, dim=0): 94 | """ 95 | Efficient version of torch.cat that avoids a copy if there is only a single element in a list 96 | """ 97 | assert isinstance(tensors, (list, tuple)) 98 | if len(tensors) == 1: 99 | return tensors[0] 100 | return torch.cat(tensors, dim) 101 | 102 | 103 | def cat_boxlist(bboxes): 104 | """ 105 | Concatenates a list of BoxList (having the same image size) into a 106 | single BoxList 107 | 108 | Arguments: 109 | bboxes (list[BoxList]) 110 | """ 111 | assert isinstance(bboxes, (list, tuple)) 112 | assert all(isinstance(bbox, BoxList) for bbox in bboxes) 113 | 114 | size = bboxes[0].size 115 | assert all(bbox.size == size for bbox in bboxes) 116 | 117 | mode = bboxes[0].mode 118 | assert all(bbox.mode == mode for bbox in bboxes) 119 | 120 | fields = set(bboxes[0].fields()) 121 | assert all(set(bbox.fields()) == fields for bbox in bboxes) 122 | 123 | cat_boxes = BoxList(_cat([bbox.bbox for bbox in bboxes], dim=0), size, mode) 124 | 125 | for field in fields: 126 | data = _cat([bbox.get_field(field) for bbox in bboxes], dim=0) 127 | cat_boxes.add_field(field, data) 128 | 129 | return cat_boxes 130 | -------------------------------------------------------------------------------- /dynamic_rcnn/datasets/structures/image_list.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from __future__ import division 3 | 4 | import torch 5 | 6 | 7 | class ImageList(object): 8 | """ 9 | Structure that holds a list of images (of possibly 10 | varying sizes) as a single tensor. 11 | This works by padding the images to the same size, 12 | and storing in a field the original sizes of each image 13 | """ 14 | 15 | def __init__(self, tensors, image_sizes): 16 | """ 17 | Arguments: 18 | tensors (tensor) 19 | image_sizes (list[tuple[int, int]]) 20 | """ 21 | self.tensors = tensors 22 | self.image_sizes = image_sizes 23 | 24 | def to(self, *args, **kwargs): 25 | cast_tensor = self.tensors.to(*args, **kwargs) 26 | return ImageList(cast_tensor, self.image_sizes) 27 | 28 | 29 | def to_image_list(tensors, size_divisible=0): 30 | """ 31 | tensors can be an ImageList, a torch.Tensor or 32 | an iterable of Tensors. It can't be a numpy array. 33 | When tensors is an iterable of Tensors, it pads 34 | the Tensors with zeros so that they have the same 35 | shape 36 | """ 37 | if isinstance(tensors, torch.Tensor) and size_divisible > 0: 38 | tensors = [tensors] 39 | 40 | if isinstance(tensors, ImageList): 41 | return tensors 42 | elif isinstance(tensors, torch.Tensor): 43 | # single tensor shape can be inferred 44 | if tensors.dim() == 3: 45 | tensors = tensors[None] 46 | assert tensors.dim() == 4 47 | image_sizes = [tensor.shape[-2:] for tensor in tensors] 48 | return ImageList(tensors, image_sizes) 49 | elif isinstance(tensors, (tuple, list)): 50 | max_size = tuple(max(s) for s in zip(*[img.shape for img in tensors])) 51 | 52 | # TODO Ideally, just remove this and let me model handle arbitrary 53 | # input sizs 54 | if size_divisible > 0: 55 | import math 56 | 57 | stride = size_divisible 58 | max_size = list(max_size) 59 | max_size[1] = int(math.ceil(max_size[1] / stride) * stride) 60 | max_size[2] = int(math.ceil(max_size[2] / stride) * stride) 61 | max_size = tuple(max_size) 62 | 63 | batch_shape = (len(tensors),) + max_size 64 | batched_imgs = tensors[0].new(*batch_shape).zero_() 65 | for img, pad_img in zip(tensors, batched_imgs): 66 | pad_img[: img.shape[0], : img.shape[1], : img.shape[2]].copy_(img) 67 | 68 | image_sizes = [im.shape[-2:] for im in tensors] 69 | 70 | return ImageList(batched_imgs, image_sizes) 71 | else: 72 | raise TypeError("Unsupported type for to_image_list: {}".format(type(tensors))) 73 | -------------------------------------------------------------------------------- /dynamic_rcnn/datasets/transforms/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from .transforms import Compose 3 | from .transforms import Resize 4 | from .transforms import RandomHorizontalFlip 5 | from .transforms import ToTensor 6 | from .transforms import Normalize 7 | 8 | from .build import build_transforms 9 | -------------------------------------------------------------------------------- /dynamic_rcnn/datasets/transforms/build.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from . import transforms as T 3 | 4 | 5 | def build_transforms(cfg, is_train=True): 6 | if is_train: 7 | min_size = cfg.INPUT.MIN_SIZE_TRAIN 8 | max_size = cfg.INPUT.MAX_SIZE_TRAIN 9 | flip_horizontal_prob = cfg.INPUT.HORIZONTAL_FLIP_PROB_TRAIN 10 | flip_vertical_prob = cfg.INPUT.VERTICAL_FLIP_PROB_TRAIN 11 | brightness = cfg.INPUT.BRIGHTNESS 12 | contrast = cfg.INPUT.CONTRAST 13 | saturation = cfg.INPUT.SATURATION 14 | hue = cfg.INPUT.HUE 15 | else: 16 | min_size = cfg.INPUT.MIN_SIZE_TEST 17 | max_size = cfg.INPUT.MAX_SIZE_TEST 18 | flip_horizontal_prob = 0.0 19 | flip_vertical_prob = 0.0 20 | brightness = 0.0 21 | contrast = 0.0 22 | saturation = 0.0 23 | hue = 0.0 24 | 25 | to_bgr255 = cfg.INPUT.TO_BGR255 26 | normalize_transform = T.Normalize( 27 | mean=cfg.INPUT.PIXEL_MEAN, std=cfg.INPUT.PIXEL_STD, to_bgr255=to_bgr255 28 | ) 29 | color_jitter = T.ColorJitter( 30 | brightness=brightness, 31 | contrast=contrast, 32 | saturation=saturation, 33 | hue=hue, 34 | ) 35 | 36 | transform = T.Compose( 37 | [ 38 | color_jitter, 39 | T.Resize(min_size, max_size), 40 | T.RandomHorizontalFlip(flip_horizontal_prob), 41 | T.RandomVerticalFlip(flip_vertical_prob), 42 | T.ToTensor(), 43 | normalize_transform, 44 | ] 45 | ) 46 | return transform 47 | -------------------------------------------------------------------------------- /dynamic_rcnn/datasets/transforms/coco_transforms.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from . import transforms as T 3 | 4 | 5 | def build_coco_transforms(cfg, is_train=True): 6 | if is_train: 7 | min_size = cfg.INPUT.MIN_SIZE_TRAIN 8 | max_size = cfg.INPUT.MAX_SIZE_TRAIN 9 | flip_prob = 0.5 10 | else: 11 | min_size = cfg.INPUT.MIN_SIZE_TEST 12 | max_size = cfg.INPUT.MAX_SIZE_TEST 13 | flip_prob = 0 14 | 15 | to_bgr255 = cfg.INPUT.TO_BGR255 16 | normalize_transform = T.Normalize( 17 | mean=cfg.INPUT.PIXEL_MEAN, std=cfg.INPUT.PIXEL_STD, 18 | to_bgr255=to_bgr255) 19 | 20 | transform = T.Compose([ 21 | T.Resize(min_size, max_size), 22 | T.RandomHorizontalFlip(flip_prob), 23 | T.ToTensor(), 24 | normalize_transform, ]) 25 | return transform 26 | -------------------------------------------------------------------------------- /dynamic_rcnn/datasets/transforms/transforms.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import random 3 | 4 | import torch 5 | import torchvision 6 | from torchvision.transforms import functional as F 7 | 8 | 9 | class Compose(object): 10 | def __init__(self, transforms): 11 | self.transforms = transforms 12 | 13 | def __call__(self, image, target): 14 | for t in self.transforms: 15 | image, target = t(image, target) 16 | return image, target 17 | 18 | def __repr__(self): 19 | format_string = self.__class__.__name__ + "(" 20 | for t in self.transforms: 21 | format_string += "\n" 22 | format_string += " {0}".format(t) 23 | format_string += "\n)" 24 | return format_string 25 | 26 | 27 | class Resize(object): 28 | def __init__(self, min_size, max_size): 29 | if not isinstance(min_size, (list, tuple)): 30 | min_size = (min_size,) 31 | self.min_size = min_size 32 | self.max_size = max_size 33 | 34 | # modified from torchvision to add support for max size 35 | def get_size(self, image_size): 36 | w, h = image_size 37 | size = random.choice(self.min_size) 38 | max_size = self.max_size 39 | if max_size is not None: 40 | min_original_size = float(min((w, h))) 41 | max_original_size = float(max((w, h))) 42 | if max_original_size / min_original_size * size > max_size: 43 | size = int(round(max_size * min_original_size / max_original_size)) 44 | 45 | if (w <= h and w == size) or (h <= w and h == size): 46 | return (h, w) 47 | 48 | if w < h: 49 | ow = size 50 | oh = int(size * h / w) 51 | else: 52 | oh = size 53 | ow = int(size * w / h) 54 | 55 | return (oh, ow) 56 | 57 | def __call__(self, image, target=None): 58 | size = self.get_size(image.size) 59 | image = F.resize(image, size) 60 | if target is None: 61 | return image 62 | target = target.resize(image.size) 63 | return image, target 64 | 65 | 66 | class RandomHorizontalFlip(object): 67 | def __init__(self, prob=0.5): 68 | self.prob = prob 69 | 70 | def __call__(self, image, target): 71 | if random.random() < self.prob: 72 | image = F.hflip(image) 73 | target = target.transpose(0) 74 | return image, target 75 | 76 | class RandomVerticalFlip(object): 77 | def __init__(self, prob=0.5): 78 | self.prob = prob 79 | 80 | def __call__(self, image, target): 81 | if random.random() < self.prob: 82 | image = F.vflip(image) 83 | target = target.transpose(1) 84 | return image, target 85 | 86 | class ColorJitter(object): 87 | def __init__(self, 88 | brightness=None, 89 | contrast=None, 90 | saturation=None, 91 | hue=None, 92 | ): 93 | self.color_jitter = torchvision.transforms.ColorJitter( 94 | brightness=brightness, 95 | contrast=contrast, 96 | saturation=saturation, 97 | hue=hue,) 98 | 99 | def __call__(self, image, target): 100 | image = self.color_jitter(image) 101 | return image, target 102 | 103 | 104 | class ToTensor(object): 105 | def __call__(self, image, target): 106 | return F.to_tensor(image), target 107 | 108 | 109 | class Normalize(object): 110 | def __init__(self, mean, std, to_bgr255=True): 111 | self.mean = mean 112 | self.std = std 113 | self.to_bgr255 = to_bgr255 114 | 115 | def __call__(self, image, target=None): 116 | if self.to_bgr255: 117 | image = image[[2, 1, 0]] * 255 118 | image = F.normalize(image, mean=self.mean, std=self.std) 119 | if target is None: 120 | return image 121 | return image, target 122 | -------------------------------------------------------------------------------- /dynamic_rcnn/datasets/voc.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import torch 4 | import torch.utils.data 5 | from PIL import Image 6 | import sys 7 | 8 | if sys.version_info[0] == 2: 9 | import xml.etree.cElementTree as ET 10 | else: 11 | import xml.etree.ElementTree as ET 12 | 13 | 14 | from dynamic_rcnn.datasets.structures.bounding_box import BoxList 15 | 16 | 17 | class PascalVOCDataset(torch.utils.data.Dataset): 18 | 19 | CLASSES = ( 20 | "__background__ ", 21 | "aeroplane", 22 | "bicycle", 23 | "bird", 24 | "boat", 25 | "bottle", 26 | "bus", 27 | "car", 28 | "cat", 29 | "chair", 30 | "cow", 31 | "diningtable", 32 | "dog", 33 | "horse", 34 | "motorbike", 35 | "person", 36 | "pottedplant", 37 | "sheep", 38 | "sofa", 39 | "train", 40 | "tvmonitor", 41 | ) 42 | 43 | def __init__(self, data_dir, split, use_difficult=False, transforms=None): 44 | self.root = data_dir 45 | self.image_set = split 46 | self.keep_difficult = use_difficult 47 | self.transforms = transforms 48 | 49 | self._annopath = os.path.join(self.root, "Annotations", "%s.xml") 50 | self._imgpath = os.path.join(self.root, "JPEGImages", "%s.jpg") 51 | self._imgsetpath = os.path.join(self.root, "ImageSets", "Main", "%s.txt") 52 | 53 | with open(self._imgsetpath % self.image_set) as f: 54 | self.ids = f.readlines() 55 | self.ids = [x.strip("\n") for x in self.ids] 56 | self.id_to_img_map = {k: v for k, v in enumerate(self.ids)} 57 | 58 | cls = PascalVOCDataset.CLASSES 59 | self.class_to_ind = dict(zip(cls, range(len(cls)))) 60 | self.categories = dict(zip(range(len(cls)), cls)) 61 | 62 | def __getitem__(self, index): 63 | img_id = self.ids[index] 64 | img = Image.open(self._imgpath % img_id).convert("RGB") 65 | 66 | target = self.get_groundtruth(index) 67 | target = target.clip_to_image(remove_empty=True) 68 | 69 | if self.transforms is not None: 70 | img, target = self.transforms(img, target) 71 | 72 | return img, target, index 73 | 74 | def __len__(self): 75 | return len(self.ids) 76 | 77 | def get_groundtruth(self, index): 78 | img_id = self.ids[index] 79 | anno = ET.parse(self._annopath % img_id).getroot() 80 | anno = self._preprocess_annotation(anno) 81 | 82 | height, width = anno["im_info"] 83 | target = BoxList(anno["boxes"], (width, height), mode="xyxy") 84 | target.add_field("labels", anno["labels"]) 85 | target.add_field("difficult", anno["difficult"]) 86 | return target 87 | 88 | def _preprocess_annotation(self, target): 89 | boxes = [] 90 | gt_classes = [] 91 | difficult_boxes = [] 92 | TO_REMOVE = 1 93 | 94 | for obj in target.iter("object"): 95 | difficult = int(obj.find("difficult").text) == 1 96 | if not self.keep_difficult and difficult: 97 | continue 98 | name = obj.find("name").text.lower().strip() 99 | bb = obj.find("bndbox") 100 | # Make pixel indexes 0-based 101 | # Refer to "https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/datasets/pascal_voc.py#L208-L211" 102 | box = [ 103 | bb.find("xmin").text, 104 | bb.find("ymin").text, 105 | bb.find("xmax").text, 106 | bb.find("ymax").text, 107 | ] 108 | bndbox = tuple( 109 | map(lambda x: x - TO_REMOVE, list(map(int, box))) 110 | ) 111 | 112 | boxes.append(bndbox) 113 | gt_classes.append(self.class_to_ind[name]) 114 | difficult_boxes.append(difficult) 115 | 116 | size = target.find("size") 117 | im_info = tuple(map(int, (size.find("height").text, size.find("width").text))) 118 | 119 | res = { 120 | "boxes": torch.tensor(boxes, dtype=torch.float32), 121 | "labels": torch.tensor(gt_classes), 122 | "difficult": torch.tensor(difficult_boxes), 123 | "im_info": im_info, 124 | } 125 | return res 126 | 127 | def get_img_info(self, index): 128 | img_id = self.ids[index] 129 | anno = ET.parse(self._annopath % img_id).getroot() 130 | size = anno.find("size") 131 | im_info = tuple(map(int, (size.find("height").text, size.find("width").text))) 132 | return {"height": im_info[0], "width": im_info[1]} 133 | 134 | def map_class_id_to_class_name(self, class_id): 135 | return PascalVOCDataset.CLASSES[class_id] 136 | -------------------------------------------------------------------------------- /dynamic_rcnn/det_opr/box_coder.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import math 3 | 4 | import torch 5 | 6 | 7 | class BoxCoder(object): 8 | """ 9 | This class encodes and decodes a set of bounding boxes into 10 | the representation used for training the regressors. 11 | """ 12 | 13 | def __init__(self, weights, bbox_xform_clip=math.log(1000. / 16)): 14 | """ 15 | Arguments: 16 | weights (4-element tuple) 17 | bbox_xform_clip (float) 18 | """ 19 | self.weights = weights 20 | self.bbox_xform_clip = bbox_xform_clip 21 | 22 | def encode(self, reference_boxes, proposals): 23 | """ 24 | Encode a set of proposals with respect to some 25 | reference boxes 26 | 27 | Arguments: 28 | reference_boxes (Tensor): reference boxes 29 | proposals (Tensor): boxes to be encoded 30 | """ 31 | 32 | TO_REMOVE = 1 # TODO remove 33 | ex_widths = proposals[:, 2] - proposals[:, 0] + TO_REMOVE 34 | ex_heights = proposals[:, 3] - proposals[:, 1] + TO_REMOVE 35 | ex_ctr_x = proposals[:, 0] + 0.5 * ex_widths 36 | ex_ctr_y = proposals[:, 1] + 0.5 * ex_heights 37 | 38 | gt_widths = reference_boxes[:, 2] - reference_boxes[:, 0] + TO_REMOVE 39 | gt_heights = reference_boxes[:, 3] - reference_boxes[:, 1] + TO_REMOVE 40 | gt_ctr_x = reference_boxes[:, 0] + 0.5 * gt_widths 41 | gt_ctr_y = reference_boxes[:, 1] + 0.5 * gt_heights 42 | 43 | wx, wy, ww, wh = self.weights 44 | targets_dx = wx * (gt_ctr_x - ex_ctr_x) / ex_widths 45 | targets_dy = wy * (gt_ctr_y - ex_ctr_y) / ex_heights 46 | targets_dw = ww * torch.log(gt_widths / ex_widths) 47 | targets_dh = wh * torch.log(gt_heights / ex_heights) 48 | 49 | targets = torch.stack((targets_dx, targets_dy, targets_dw, targets_dh), dim=1) 50 | return targets 51 | 52 | def decode(self, rel_codes, boxes): 53 | """ 54 | From a set of original boxes and encoded relative box offsets, 55 | get the decoded boxes. 56 | 57 | Arguments: 58 | rel_codes (Tensor): encoded boxes 59 | boxes (Tensor): reference boxes. 60 | """ 61 | 62 | boxes = boxes.to(rel_codes.dtype) 63 | 64 | TO_REMOVE = 1 # TODO remove 65 | widths = boxes[:, 2] - boxes[:, 0] + TO_REMOVE 66 | heights = boxes[:, 3] - boxes[:, 1] + TO_REMOVE 67 | ctr_x = boxes[:, 0] + 0.5 * widths 68 | ctr_y = boxes[:, 1] + 0.5 * heights 69 | 70 | wx, wy, ww, wh = self.weights 71 | dx = rel_codes[:, 0::4] / wx 72 | dy = rel_codes[:, 1::4] / wy 73 | dw = rel_codes[:, 2::4] / ww 74 | dh = rel_codes[:, 3::4] / wh 75 | 76 | # Prevent sending too large values into torch.exp() 77 | dw = torch.clamp(dw, max=self.bbox_xform_clip) 78 | dh = torch.clamp(dh, max=self.bbox_xform_clip) 79 | 80 | pred_ctr_x = dx * widths[:, None] + ctr_x[:, None] 81 | pred_ctr_y = dy * heights[:, None] + ctr_y[:, None] 82 | pred_w = torch.exp(dw) * widths[:, None] 83 | pred_h = torch.exp(dh) * heights[:, None] 84 | 85 | pred_boxes = torch.zeros_like(rel_codes) 86 | # x1 87 | pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * pred_w 88 | # y1 89 | pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * pred_h 90 | # x2 (note: "- 1" is correct; don't be fooled by the asymmetry) 91 | pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * pred_w - 1 92 | # y2 (note: "- 1" is correct; don't be fooled by the asymmetry) 93 | pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * pred_h - 1 94 | 95 | return pred_boxes 96 | -------------------------------------------------------------------------------- /dynamic_rcnn/det_opr/fpn/fpn.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch.nn.functional as F 3 | from torch import nn 4 | 5 | from collections import OrderedDict 6 | from dynamic_rcnn.utils.misc import conv_with_kaiming_uniform 7 | 8 | 9 | class FPN(nn.Module): 10 | """ 11 | Module that adds FPN on top of a list of feature maps. 12 | The feature maps are currently supposed to be in increasing depth 13 | order, and must be consecutive 14 | """ 15 | 16 | def __init__( 17 | self, in_channels_list, out_channels, conv_block, top_blocks=None 18 | ): 19 | """ 20 | Arguments: 21 | in_channels_list (list[int]): number of channels for each feature map that 22 | will be fed 23 | out_channels (int): number of channels of the FPN representation 24 | top_blocks (nn.Module or None): if provided, an extra operation will 25 | be performed on the output of the last (smallest resolution) 26 | FPN output, and the result will extend the result list 27 | """ 28 | super(FPN, self).__init__() 29 | self.inner_blocks = [] 30 | self.layer_blocks = [] 31 | for idx, in_channels in enumerate(in_channels_list, 1): 32 | inner_block = "fpn_inner{}".format(idx) 33 | layer_block = "fpn_layer{}".format(idx) 34 | 35 | if in_channels == 0: 36 | continue 37 | inner_block_module = conv_block(in_channels, out_channels, 1) 38 | layer_block_module = conv_block(out_channels, out_channels, 3, 1) 39 | self.add_module(inner_block, inner_block_module) 40 | self.add_module(layer_block, layer_block_module) 41 | self.inner_blocks.append(inner_block) 42 | self.layer_blocks.append(layer_block) 43 | self.top_blocks = top_blocks 44 | 45 | def forward(self, x): 46 | """ 47 | Arguments: 48 | x (list[Tensor]): feature maps for each feature level. 49 | Returns: 50 | results (tuple[Tensor]): feature maps after FPN layers. 51 | They are ordered from highest resolution first. 52 | """ 53 | last_inner = getattr(self, self.inner_blocks[-1])(x[-1]) 54 | results = [] 55 | results.append(getattr(self, self.layer_blocks[-1])(last_inner)) 56 | for feature, inner_block, layer_block in zip( 57 | x[:-1][::-1], self.inner_blocks[:-1][::-1], 58 | self.layer_blocks[:-1][::-1] 59 | ): 60 | if not inner_block: 61 | continue 62 | inner_top_down = F.interpolate(last_inner, scale_factor=2, 63 | mode="nearest") 64 | inner_lateral = getattr(self, inner_block)(feature) 65 | # TODO use size instead of scale to make it robust to different sizes 66 | # inner_top_down = F.upsample(last_inner, size=inner_lateral.shape[-2:], 67 | # mode='bilinear', align_corners=False) 68 | last_inner = inner_lateral + inner_top_down 69 | results.insert(0, getattr(self, layer_block)(last_inner)) 70 | 71 | if isinstance(self.top_blocks, LastLevelP6P7): 72 | last_results = self.top_blocks(x[-1], results[-1]) 73 | results.extend(last_results) 74 | elif isinstance(self.top_blocks, LastLevelMaxPool): 75 | last_results = self.top_blocks(results[-1]) 76 | results.extend(last_results) 77 | 78 | return tuple(results) 79 | 80 | 81 | class LastLevelMaxPool(nn.Module): 82 | def forward(self, x): 83 | return [F.max_pool2d(x, 1, 2, 0)] 84 | 85 | 86 | class LastLevelP6P7(nn.Module): 87 | """ 88 | This module is used in RetinaNet to generate extra layers, P6 and P7. 89 | """ 90 | 91 | def __init__(self, in_channels, out_channels): 92 | super(LastLevelP6P7, self).__init__() 93 | self.p6 = nn.Conv2d(in_channels, out_channels, 3, 2, 1) 94 | self.p7 = nn.Conv2d(out_channels, out_channels, 3, 2, 1) 95 | for module in [self.p6, self.p7]: 96 | nn.init.kaiming_uniform_(module.weight, a=1) 97 | nn.init.constant_(module.bias, 0) 98 | self.use_P5 = in_channels == out_channels 99 | 100 | def forward(self, c5, p5): 101 | x = p5 if self.use_P5 else c5 102 | p6 = self.p6(x) 103 | p7 = self.p7(F.relu(p6)) 104 | return [p6, p7] 105 | 106 | 107 | def build_resnet_fpn_backbone(body, cfg): 108 | in_channels_stage2 = cfg.MODEL.RESNETS.RES2_OUT_CHANNELS 109 | out_channels = cfg.MODEL.RESNETS.BACKBONE_OUT_CHANNELS 110 | fpn = FPN( 111 | in_channels_list=[ 112 | in_channels_stage2, 113 | in_channels_stage2 * 2, 114 | in_channels_stage2 * 4, 115 | in_channels_stage2 * 8, 116 | ], 117 | out_channels=out_channels, 118 | conv_block=conv_with_kaiming_uniform(use_relu=cfg.MODEL.FPN.USE_RELU), 119 | top_blocks=LastLevelMaxPool(), 120 | ) 121 | model = nn.Sequential(OrderedDict([("body", body), ("fpn", fpn)])) 122 | model.out_channels = out_channels 123 | return model 124 | 125 | 126 | def build_resnet_fpn_p3p7_backbone(body, cfg): 127 | in_channels_stage2 = cfg.MODEL.RESNETS.RES2_OUT_CHANNELS 128 | out_channels = cfg.MODEL.RESNETS.BACKBONE_OUT_CHANNELS 129 | in_channels_p6p7 = in_channels_stage2 * 8 if cfg.MODEL.RETINANET.USE_C5 \ 130 | else out_channels 131 | fpn = FPN( 132 | in_channels_list=[ 133 | 0, 134 | in_channels_stage2 * 2, 135 | in_channels_stage2 * 4, 136 | in_channels_stage2 * 8, 137 | ], 138 | out_channels=out_channels, 139 | conv_block=conv_with_kaiming_uniform(use_relu=cfg.MODEL.FPN.USE_RELU), 140 | top_blocks=LastLevelP6P7(in_channels_p6p7, out_channels), 141 | ) 142 | model = nn.Sequential(OrderedDict([("body", body), ("fpn", fpn)])) 143 | model.out_channels = out_channels 144 | return model 145 | -------------------------------------------------------------------------------- /dynamic_rcnn/det_opr/loss.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | from torch import nn 4 | from torch.autograd import Function 5 | from torch.autograd.function import once_differentiable 6 | 7 | from dynamic_rcnn import _C 8 | 9 | 10 | def smooth_l1_loss(input, target, beta=1. / 9, size_average=True): 11 | """ 12 | very similar to the smooth_l1_loss from pytorch, but with 13 | the extra beta parameter 14 | """ 15 | n = torch.abs(input - target) 16 | cond = n < beta 17 | loss = torch.where(cond, 0.5 * n ** 2 / beta, n - 0.5 * beta) 18 | if size_average: 19 | return loss.mean() 20 | return loss.sum() 21 | 22 | 23 | # TODO: Use JIT to replace CUDA implementation in the future. 24 | class _SigmoidFocalLoss(Function): 25 | @staticmethod 26 | def forward(ctx, logits, targets, gamma, alpha): 27 | ctx.save_for_backward(logits, targets) 28 | num_classes = logits.shape[1] 29 | ctx.num_classes = num_classes 30 | ctx.gamma = gamma 31 | ctx.alpha = alpha 32 | 33 | losses = _C.sigmoid_focalloss_forward( 34 | logits, targets, num_classes, gamma, alpha 35 | ) 36 | return losses 37 | 38 | @staticmethod 39 | @once_differentiable 40 | def backward(ctx, d_loss): 41 | logits, targets = ctx.saved_tensors 42 | num_classes = ctx.num_classes 43 | gamma = ctx.gamma 44 | alpha = ctx.alpha 45 | d_loss = d_loss.contiguous() 46 | d_logits = _C.sigmoid_focalloss_backward( 47 | logits, targets, d_loss, num_classes, gamma, alpha 48 | ) 49 | return d_logits, None, None, None, None 50 | 51 | 52 | sigmoid_focal_loss_cuda = _SigmoidFocalLoss.apply 53 | 54 | 55 | def sigmoid_focal_loss_cpu(logits, targets, gamma, alpha): 56 | num_classes = logits.shape[1] 57 | gamma = gamma[0] 58 | alpha = alpha[0] 59 | dtype = targets.dtype 60 | device = targets.device 61 | class_range = torch.arange(1, num_classes+1, dtype=dtype, device=device).unsqueeze(0) 62 | 63 | t = targets.unsqueeze(1) 64 | p = torch.sigmoid(logits) 65 | term1 = (1 - p) ** gamma * torch.log(p) 66 | term2 = p ** gamma * torch.log(1 - p) 67 | return -(t == class_range).float() * term1 * alpha - ((t != class_range) * (t >= 0)).float() * term2 * (1 - alpha) 68 | 69 | 70 | class SigmoidFocalLoss(nn.Module): 71 | def __init__(self, gamma, alpha): 72 | super(SigmoidFocalLoss, self).__init__() 73 | self.gamma = gamma 74 | self.alpha = alpha 75 | 76 | def forward(self, logits, targets): 77 | device = logits.device 78 | if logits.is_cuda: 79 | loss_func = sigmoid_focal_loss_cuda 80 | else: 81 | loss_func = sigmoid_focal_loss_cpu 82 | 83 | loss = loss_func(logits, targets, self.gamma, self.alpha) 84 | return loss.sum() 85 | 86 | def __repr__(self): 87 | tmpstr = self.__class__.__name__ + "(" 88 | tmpstr += "gamma=" + str(self.gamma) 89 | tmpstr += ", alpha=" + str(self.alpha) 90 | tmpstr += ")" 91 | return tmpstr 92 | 93 | 94 | class IOULoss(nn.Module): 95 | def forward(self, pred, target, weight=None, loc_loss_type='log_iou'): 96 | pred_left = pred[:, 0] 97 | pred_top = pred[:, 1] 98 | pred_right = pred[:, 2] 99 | pred_bottom = pred[:, 3] 100 | 101 | target_left = target[:, 0] 102 | target_top = target[:, 1] 103 | target_right = target[:, 2] 104 | target_bottom = target[:, 3] 105 | 106 | target_area = (target_left + target_right) * \ 107 | (target_top + target_bottom) 108 | pred_area = (pred_left + pred_right) * \ 109 | (pred_top + pred_bottom) 110 | 111 | w_intersect = torch.min(pred_left, target_left) + \ 112 | torch.min(pred_right, target_right) 113 | h_intersect = torch.min(pred_bottom, target_bottom) + \ 114 | torch.min(pred_top, target_top) 115 | 116 | area_intersect = w_intersect * h_intersect 117 | area_union = target_area + pred_area - area_intersect 118 | 119 | ious = (area_intersect + 1.0) / (area_union + 1.0) 120 | if loc_loss_type == 'log_iou': 121 | losses = -torch.log(ious) 122 | elif loc_loss_type == 'linear_iou': 123 | losses = 1 - ious 124 | else: 125 | raise NotImplementedError 126 | 127 | if weight is not None and weight.sum() > 0: 128 | return (losses * weight).sum() / weight.sum() 129 | else: 130 | assert losses.numel() != 0 131 | return losses.mean() 132 | -------------------------------------------------------------------------------- /dynamic_rcnn/det_opr/matcher.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | 4 | 5 | class Matcher(object): 6 | """ 7 | This class assigns to each predicted "element" (e.g., a box) a ground-truth 8 | element. Each predicted element will have exactly zero or one matches; each 9 | ground-truth element may be assigned to zero or more predicted elements. 10 | 11 | Matching is based on the MxN match_quality_matrix, that characterizes how well 12 | each (ground-truth, predicted)-pair match. For example, if the elements are 13 | boxes, the matrix may contain box IoU overlap values. 14 | 15 | The matcher returns a tensor of size N containing the index of the ground-truth 16 | element m that matches to prediction n. If there is no match, a negative value 17 | is returned. 18 | """ 19 | 20 | BELOW_LOW_THRESHOLD = -1 21 | BETWEEN_THRESHOLDS = -2 22 | 23 | def __init__(self, high_threshold, low_threshold, allow_low_quality_matches=False): 24 | """ 25 | Args: 26 | high_threshold (float): quality values greater than or equal to 27 | this value are candidate matches. 28 | low_threshold (float): a lower quality threshold used to stratify 29 | matches into three levels: 30 | 1) matches >= high_threshold 31 | 2) BETWEEN_THRESHOLDS matches in [low_threshold, high_threshold) 32 | 3) BELOW_LOW_THRESHOLD matches in [0, low_threshold) 33 | allow_low_quality_matches (bool): if True, produce additional matches 34 | for predictions that have only low-quality match candidates. See 35 | set_low_quality_matches_ for more details. 36 | """ 37 | assert low_threshold <= high_threshold 38 | self.high_threshold = high_threshold 39 | self.low_threshold = low_threshold 40 | self.allow_low_quality_matches = allow_low_quality_matches 41 | 42 | def __call__(self, match_quality_matrix): 43 | """ 44 | Args: 45 | match_quality_matrix (Tensor[float]): an MxN tensor, containing the 46 | pairwise quality between M ground-truth elements and N predicted elements. 47 | 48 | Returns: 49 | matches (Tensor[int64]): an N tensor where N[i] is a matched gt in 50 | [0, M - 1] or a negative value indicating that prediction i could not 51 | be matched. 52 | """ 53 | if match_quality_matrix.numel() == 0: 54 | # empty targets or proposals not supported during training 55 | if match_quality_matrix.shape[0] == 0: 56 | raise ValueError( 57 | "No ground-truth boxes available for one of the images " 58 | "during training") 59 | else: 60 | raise ValueError( 61 | "No proposal boxes available for one of the images " 62 | "during training") 63 | 64 | # match_quality_matrix is M (gt) x N (predicted) 65 | # Max over gt elements (dim 0) to find best gt candidate for each prediction 66 | matched_vals, matches = match_quality_matrix.max(dim=0) 67 | if self.allow_low_quality_matches: 68 | all_matches = matches.clone() 69 | 70 | # Assign candidate matches with low quality to negative (unassigned) values 71 | below_low_threshold = matched_vals < self.low_threshold 72 | between_thresholds = (matched_vals >= self.low_threshold) & ( 73 | matched_vals < self.high_threshold 74 | ) 75 | matches[below_low_threshold] = Matcher.BELOW_LOW_THRESHOLD 76 | matches[between_thresholds] = Matcher.BETWEEN_THRESHOLDS 77 | 78 | if self.allow_low_quality_matches: 79 | self.set_low_quality_matches_(matches, all_matches, match_quality_matrix) 80 | 81 | return matches 82 | 83 | def set_low_quality_matches_(self, matches, all_matches, match_quality_matrix): 84 | """ 85 | Produce additional matches for predictions that have only low-quality matches. 86 | Specifically, for each ground-truth find the set of predictions that have 87 | maximum overlap with it (including ties); for each prediction in that set, if 88 | it is unmatched, then match it to the ground-truth with which it has the highest 89 | quality value. 90 | """ 91 | # For each gt, find the prediction with which it has highest quality 92 | highest_quality_foreach_gt, _ = match_quality_matrix.max(dim=1) 93 | # Find highest quality match available, even if it is low, including ties 94 | gt_pred_pairs_of_highest_quality = torch.nonzero( 95 | match_quality_matrix == highest_quality_foreach_gt[:, None] 96 | ) 97 | # Example gt_pred_pairs_of_highest_quality: 98 | # tensor([[ 0, 39796], 99 | # [ 1, 32055], 100 | # [ 1, 32070], 101 | # [ 2, 39190], 102 | # [ 2, 40255], 103 | # [ 3, 40390], 104 | # [ 3, 41455], 105 | # [ 4, 45470], 106 | # [ 5, 45325], 107 | # [ 5, 46390]]) 108 | # Each row is a (gt index, prediction index) 109 | # Note how gt items 1, 2, 3, and 5 each have two ties 110 | 111 | pred_inds_to_update = gt_pred_pairs_of_highest_quality[:, 1] 112 | matches[pred_inds_to_update] = all_matches[pred_inds_to_update] 113 | -------------------------------------------------------------------------------- /dynamic_rcnn/det_opr/poolers.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | import torch.nn.functional as F 4 | from torch import nn 5 | 6 | from dynamic_rcnn.kernels.ops.roi_align import ROIAlign 7 | from dynamic_rcnn.utils.torch_utils import cat 8 | 9 | 10 | class LevelMapper(object): 11 | """Determine which FPN level each RoI in a set of RoIs should map to based 12 | on the heuristic in the FPN paper. 13 | """ 14 | 15 | def __init__(self, k_min, k_max, canonical_scale=224, canonical_level=4, eps=1e-6): 16 | """ 17 | Arguments: 18 | k_min (int) 19 | k_max (int) 20 | canonical_scale (int) 21 | canonical_level (int) 22 | eps (float) 23 | """ 24 | self.k_min = k_min 25 | self.k_max = k_max 26 | self.s0 = canonical_scale 27 | self.lvl0 = canonical_level 28 | self.eps = eps 29 | 30 | def __call__(self, boxlists): 31 | """ 32 | Arguments: 33 | boxlists (list[BoxList]) 34 | """ 35 | # Compute level ids 36 | s = torch.sqrt(cat([boxlist.area() for boxlist in boxlists])) 37 | 38 | # Eqn.(1) in FPN paper 39 | target_lvls = torch.floor(self.lvl0 + torch.log2(s / self.s0 + self.eps)) 40 | target_lvls = torch.clamp(target_lvls, min=self.k_min, max=self.k_max) 41 | return target_lvls.to(torch.int64) - self.k_min 42 | 43 | 44 | class Pooler(nn.Module): 45 | """ 46 | Pooler for Detection with or without FPN. 47 | It currently hard-code ROIAlign in the implementation, 48 | but that can be made more generic later on. 49 | Also, the requirement of passing the scales is not strictly necessary, as they 50 | can be inferred from the size of the feature map / size of original image, 51 | which is available thanks to the BoxList. 52 | """ 53 | 54 | def __init__(self, output_size, scales, sampling_ratio): 55 | """ 56 | Arguments: 57 | output_size (list[tuple[int]] or list[int]): output size for the pooled region 58 | scales (list[float]): scales for each Pooler 59 | sampling_ratio (int): sampling ratio for ROIAlign 60 | """ 61 | super(Pooler, self).__init__() 62 | poolers = [] 63 | for scale in scales: 64 | poolers.append( 65 | ROIAlign( 66 | output_size, spatial_scale=scale, sampling_ratio=sampling_ratio 67 | ) 68 | ) 69 | self.poolers = nn.ModuleList(poolers) 70 | self.output_size = output_size 71 | # get the levels in the feature map by leveraging the fact that the network always 72 | # downsamples by a factor of 2 at each level. 73 | lvl_min = -torch.log2(torch.tensor(scales[0], dtype=torch.float32)).item() 74 | lvl_max = -torch.log2(torch.tensor(scales[-1], dtype=torch.float32)).item() 75 | self.map_levels = LevelMapper(lvl_min, lvl_max) 76 | 77 | def convert_to_roi_format(self, boxes): 78 | concat_boxes = cat([b.bbox for b in boxes], dim=0) 79 | device, dtype = concat_boxes.device, concat_boxes.dtype 80 | ids = cat( 81 | [ 82 | torch.full((len(b), 1), i, dtype=dtype, device=device) 83 | for i, b in enumerate(boxes) 84 | ], 85 | dim=0, 86 | ) 87 | rois = torch.cat([ids, concat_boxes], dim=1) 88 | return rois 89 | 90 | def forward(self, x, boxes): 91 | """ 92 | Arguments: 93 | x (list[Tensor]): feature maps for each level 94 | boxes (list[BoxList]): boxes to be used to perform the pooling operation. 95 | Returns: 96 | result (Tensor) 97 | """ 98 | num_levels = len(self.poolers) 99 | rois = self.convert_to_roi_format(boxes) 100 | if num_levels == 1: 101 | return self.poolers[0](x[0], rois) 102 | 103 | levels = self.map_levels(boxes) 104 | 105 | num_rois = len(rois) 106 | num_channels = x[0].shape[1] 107 | output_size = self.output_size[0] 108 | 109 | dtype, device = x[0].dtype, x[0].device 110 | result = torch.zeros( 111 | (num_rois, num_channels, output_size, output_size), 112 | dtype=dtype, 113 | device=device, 114 | ) 115 | for level, (per_level_feature, pooler) in enumerate(zip(x, self.poolers)): 116 | idx_in_level = torch.nonzero(levels == level).squeeze(1) 117 | rois_per_level = rois[idx_in_level] 118 | result[idx_in_level] = pooler(per_level_feature, rois_per_level).to(dtype) 119 | 120 | return result 121 | 122 | 123 | def make_pooler(cfg, head_name): 124 | resolution = cfg.MODEL[head_name].POOLER_RESOLUTION 125 | scales = cfg.MODEL[head_name].POOLER_SCALES 126 | sampling_ratio = cfg.MODEL[head_name].POOLER_SAMPLING_RATIO 127 | pooler = Pooler( 128 | output_size=(resolution, resolution), 129 | scales=scales, 130 | sampling_ratio=sampling_ratio, 131 | ) 132 | return pooler 133 | -------------------------------------------------------------------------------- /dynamic_rcnn/det_opr/rcnn/cascade_rcnn/proposal_opr.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from dynamic_rcnn.datasets.structures.bounding_box import BoxList 3 | from dynamic_rcnn.datasets.structures.boxlist_ops import cat_boxlist 4 | 5 | 6 | # TODO: this should be implemented in RPN, but now a little different 7 | def add_gt_proposals(proposals, targets): 8 | """ 9 | Arguments: 10 | proposals: list[BoxList] 11 | targets: list[BoxList] 12 | """ 13 | # Get the device we're operating on 14 | device = proposals[0].bbox.device 15 | 16 | gt_boxes = [target.copy_with_fields([]) for target in targets] 17 | 18 | # later cat of bbox requires all fields to be present for all bbox 19 | # so we need to add a dummy for objectness that's missing 20 | # check whether the proposal has the "objectness" field first 21 | if "objectness" in proposals[0].fields(): 22 | for gt_box in gt_boxes: 23 | gt_box.add_field( 24 | "objectness", torch.ones(len(gt_box), device=device)) 25 | 26 | proposals = [ 27 | cat_boxlist((proposal, gt_box)) 28 | for proposal, gt_box in zip(proposals, gt_boxes) 29 | ] 30 | 31 | return proposals 32 | 33 | 34 | def add_box_regression( 35 | boxes, box_regression, box_coder, cls_agnostic_bbox_reg=False): 36 | if cls_agnostic_bbox_reg: 37 | box_regression = box_regression[:, -4:] 38 | 39 | boxes_per_image = [len(box) for box in boxes] 40 | concat_boxes = torch.cat([a.bbox for a in boxes], dim=0) 41 | proposals = box_coder.decode( 42 | box_regression.view(sum(boxes_per_image), -1), concat_boxes) 43 | proposals = proposals.split(boxes_per_image, dim=0) 44 | 45 | result = [] 46 | for img_id, proposal in enumerate(proposals): 47 | boxlist = BoxList(proposal, boxes[img_id].size, mode="xyxy") 48 | boxlist = boxlist.clip_to_image(remove_empty=False) 49 | result.append(boxlist) 50 | return result 51 | -------------------------------------------------------------------------------- /dynamic_rcnn/det_opr/rcnn/mask_head/mask_target_opr.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from dynamic_rcnn.det_opr.matcher import Matcher 3 | from dynamic_rcnn.datasets.structures.boxlist_ops import boxlist_iou 4 | 5 | 6 | def project_masks_on_boxes(segmentation_masks, proposals, discretization_size): 7 | """ 8 | Given segmentation masks and the bounding boxes corresponding 9 | to the location of the masks in the image, this function 10 | crops and resizes the masks in the position defined by the 11 | boxes. This prepares the masks for them to be fed to the 12 | loss computation as the targets. 13 | 14 | Arguments: 15 | segmentation_masks: an instance of SegmentationMask 16 | proposals: an instance of BoxList 17 | """ 18 | masks = [] 19 | M = discretization_size 20 | device = proposals.bbox.device 21 | proposals = proposals.convert("xyxy") 22 | assert segmentation_masks.size == proposals.size, "{}, {}".format( 23 | segmentation_masks, proposals 24 | ) 25 | 26 | # FIXME: CPU computation bottleneck, this should be parallelized 27 | proposals = proposals.bbox.to(torch.device("cpu")) 28 | for segmentation_mask, proposal in zip(segmentation_masks, proposals): 29 | # crop the masks, resize them to the desired resolution and 30 | # then convert them to the tensor representation. 31 | cropped_mask = segmentation_mask.crop(proposal) 32 | scaled_mask = cropped_mask.resize((M, M)) 33 | mask = scaled_mask.get_mask_tensor() 34 | masks.append(mask) 35 | if len(masks) == 0: 36 | return torch.empty(0, dtype=torch.float32, device=device) 37 | return torch.stack(masks, dim=0).to(device, dtype=torch.float32) 38 | 39 | 40 | def mask_target_opr( 41 | proposals, targets, high_threshold, low_threshold, discretization_size): 42 | """ 43 | Generate proposal targets for computing loss. 44 | 45 | Args: 46 | proposals: (list[BoxList]) 47 | targets: (list[BoxList]) 48 | high_threshold: (float) 49 | low_threshold: (float) 50 | discretization_size: (int) 51 | """ 52 | 53 | matcher = Matcher(high_threshold, low_threshold, 54 | allow_low_quality_matches=False) 55 | 56 | # prepare targets 57 | labels = [] 58 | masks = [] 59 | for proposals_per_image, targets_per_image in zip(proposals, targets): 60 | # match targets to proposals 61 | match_quality_matrix = boxlist_iou( 62 | targets_per_image, proposals_per_image) 63 | matched_idxs = matcher(match_quality_matrix) 64 | # Mask RCNN needs "labels" and "masks "fields for creating the targets 65 | target = targets_per_image.copy_with_fields(["labels", "masks"]) 66 | # get the targets corresponding GT for each proposal 67 | # NB: need to clamp the indices because we can have a single 68 | # GT in the image, and matched_idxs can be -2, which goes 69 | # out of bounds 70 | matched_targets = target[matched_idxs.clamp(min=0)] 71 | matched_targets.add_field("matched_idxs", matched_idxs) 72 | 73 | matched_idxs = matched_targets.get_field("matched_idxs") 74 | labels_per_image = matched_targets.get_field("labels") 75 | labels_per_image = labels_per_image.to(dtype=torch.int64) 76 | 77 | # this can probably be removed, but is left here for clarity 78 | # and completeness 79 | neg_inds = matched_idxs == Matcher.BELOW_LOW_THRESHOLD 80 | labels_per_image[neg_inds] = 0 81 | 82 | # mask scores are only computed on positive samples 83 | positive_inds = torch.nonzero(labels_per_image > 0).squeeze(1) 84 | 85 | segmentation_masks = matched_targets.get_field("masks") 86 | segmentation_masks = segmentation_masks[positive_inds] 87 | 88 | positive_proposals = proposals_per_image[positive_inds] 89 | 90 | masks_per_image = project_masks_on_boxes( 91 | segmentation_masks, positive_proposals, discretization_size 92 | ) 93 | 94 | labels.append(labels_per_image) 95 | masks.append(masks_per_image) 96 | 97 | return labels, masks 98 | -------------------------------------------------------------------------------- /dynamic_rcnn/det_opr/rcnn/post_processing.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | 4 | from dynamic_rcnn.datasets.structures.bounding_box import BoxList 5 | from dynamic_rcnn.datasets.structures.boxlist_ops import boxlist_nms, cat_boxlist 6 | 7 | 8 | def filter_results( 9 | boxlist, num_classes, score_thresh, nms_thresh, detections_per_img): 10 | # unwrap the boxlist to avoid additional overhead. 11 | # if we had multi-class NMS, we could perform this directly on the boxlist 12 | boxes = boxlist.bbox.reshape(-1, num_classes * 4) 13 | scores = boxlist.get_field("scores").reshape(-1, num_classes) 14 | 15 | device = scores.device 16 | result = [] 17 | # Apply threshold on detection probabilities and apply NMS 18 | # Skip j = 0, because it's the background class 19 | inds_all = scores > score_thresh 20 | for j in range(1, num_classes): 21 | inds = inds_all[:, j].nonzero().squeeze(1) 22 | scores_j = scores[inds, j] 23 | boxes_j = boxes[inds, j * 4: (j + 1) * 4] 24 | boxlist_for_class = BoxList(boxes_j, boxlist.size, mode="xyxy") 25 | boxlist_for_class.add_field("scores", scores_j) 26 | boxlist_for_class = boxlist_nms(boxlist_for_class, nms_thresh) 27 | num_labels = len(boxlist_for_class) 28 | boxlist_for_class.add_field( 29 | "labels", 30 | torch.full((num_labels,), j, dtype=torch.int64, device=device) 31 | ) 32 | result.append(boxlist_for_class) 33 | 34 | result = cat_boxlist(result) 35 | number_of_detections = len(result) 36 | 37 | # Limit to max_per_image detections **over all classes** 38 | if number_of_detections > detections_per_img > 0: 39 | cls_scores = result.get_field("scores") 40 | image_thresh, _ = torch.kthvalue( 41 | cls_scores.cpu(), number_of_detections - detections_per_img + 1 42 | ) 43 | keep = cls_scores >= image_thresh.item() 44 | keep = torch.nonzero(keep).squeeze(1) 45 | result = result[keep] 46 | return result 47 | 48 | 49 | # TODO: merge into test 50 | def post_processing_opr(boxes, logits, offsets, box_coder, score_thresh=0.05, 51 | nms_thresh=0.5, detections_per_img=100, 52 | cls_agnostic_bbox_reg=False, bbox_aug_enabled=False): 53 | """ 54 | Compute the post-processed boxes and obtain the final results. 55 | 56 | Args: 57 | boxes: (list[BoxList]) 58 | logits: (tensor) 59 | offsets: (tensor) 60 | box_coder: (BoxCoder) 61 | score_thresh: (float) 62 | nms_thresh: (float) 63 | detections_per_img: (int) 64 | cls_agnostic_bbox_reg: (bool) 65 | 66 | Returns: 67 | results: (list[BoxList]) 68 | """ 69 | 70 | class_prob = F.softmax(logits, -1) 71 | num_classes = class_prob.shape[1] 72 | 73 | image_shapes = [box.size for box in boxes] 74 | boxes_per_image = [len(box) for box in boxes] 75 | concat_boxes = torch.cat([a.bbox for a in boxes], dim=0) 76 | 77 | if cls_agnostic_bbox_reg: 78 | offsets = offsets[:, -4:] 79 | proposals = box_coder.decode( 80 | offsets.view(sum(boxes_per_image), -1), concat_boxes) 81 | if cls_agnostic_bbox_reg: 82 | proposals = proposals.repeat(1, num_classes) 83 | 84 | proposals = proposals.split(boxes_per_image, dim=0) 85 | class_prob = class_prob.split(boxes_per_image, dim=0) 86 | 87 | results = [] 88 | for prob, proposal, image_shape in zip(class_prob, proposals, image_shapes): 89 | # prepare boxlist 90 | proposal = proposal.reshape(-1, 4) 91 | prob = prob.reshape(-1) 92 | boxlist = BoxList(proposal, image_shape, mode="xyxy") 93 | boxlist.add_field("scores", prob) 94 | 95 | # clip tp image 96 | boxlist = boxlist.clip_to_image(remove_empty=False) 97 | 98 | # filter results 99 | if not bbox_aug_enabled: # If bbox aug is enabled, we will do it later 100 | boxlist = filter_results(boxlist, num_classes, score_thresh, 101 | nms_thresh, detections_per_img) 102 | results.append(boxlist) 103 | return results 104 | -------------------------------------------------------------------------------- /dynamic_rcnn/det_opr/rcnn/proposal_target_opr.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from dynamic_rcnn.det_opr.matcher import Matcher 3 | from dynamic_rcnn.det_opr.sampler import BalancedPositiveNegativeSampler 4 | from dynamic_rcnn.datasets.structures.boxlist_ops import boxlist_iou 5 | 6 | 7 | def proposal_target_opr( 8 | proposals, targets, box_coder, high_threshold, low_threshold, 9 | batch_size_per_image, positive_fraction, return_ious=False, 10 | return_sample_id=False, return_raw_proposals=False): 11 | """ 12 | Generate proposal targets for computing loss. 13 | 14 | Args: 15 | proposals: (list[BoxList]) 16 | targets: (list[BoxList]) 17 | box_coder: (BoxCoder) 18 | high_threshold: (float) 19 | low_threshold: (float) 20 | batch_size_per_image: (int) 21 | positive_fraction: (float) 22 | return_ious: (bool) 23 | """ 24 | 25 | matcher = Matcher(high_threshold, low_threshold, 26 | allow_low_quality_matches=False) 27 | fg_bg_sampler = BalancedPositiveNegativeSampler( 28 | batch_size_per_image, positive_fraction) 29 | 30 | # prepare targets 31 | labels = [] 32 | regression_targets = [] 33 | ious = [] 34 | for proposals_per_image, targets_per_image in zip(proposals, targets): 35 | # match targets to proposals 36 | match_quality_matrix = boxlist_iou( 37 | targets_per_image, proposals_per_image) 38 | matched_idxs = matcher(match_quality_matrix) 39 | # Fast RCNN only need "labels" field for selecting the targets 40 | target = targets_per_image.copy_with_fields("labels") 41 | # get the targets corresponding GT for each proposal 42 | # NB: need to clamp the indices because we can have a single 43 | # GT in the image, and matched_idxs can be -2, which goes 44 | # out of bounds 45 | matched_targets = target[matched_idxs.clamp(min=0)] 46 | matched_ious = match_quality_matrix.t()[ 47 | range(proposals_per_image.bbox.shape[0]), matched_idxs.clamp(min=0)] 48 | matched_targets.add_field("matched_idxs", matched_idxs) 49 | 50 | matched_idxs = matched_targets.get_field("matched_idxs") 51 | labels_per_image = matched_targets.get_field("labels") 52 | labels_per_image = labels_per_image.to(dtype=torch.int64) 53 | 54 | # Label background (below the low threshold) 55 | bg_inds = matched_idxs == Matcher.BELOW_LOW_THRESHOLD 56 | labels_per_image[bg_inds] = 0 57 | 58 | # Label ignore proposals (between low and high thresholds) 59 | ignore_inds = matched_idxs == Matcher.BETWEEN_THRESHOLDS 60 | labels_per_image[ignore_inds] = -1 # -1 is ignored by sampler 61 | 62 | # compute regression targets 63 | regression_targets_per_image = box_coder.encode( 64 | matched_targets.bbox, proposals_per_image.bbox 65 | ) 66 | 67 | labels.append(labels_per_image) 68 | regression_targets.append(regression_targets_per_image) 69 | ious.append(matched_ious) 70 | 71 | sampled_pos_inds, sampled_neg_inds = fg_bg_sampler(labels) 72 | proposals = list(proposals) 73 | # add corresponding label and regression_targets information to the bounding boxes 74 | for labels_per_image, regression_targets_per_image, ious_per_image, \ 75 | proposals_per_image in zip(labels, regression_targets, ious, proposals): 76 | proposals_per_image.add_field("labels", labels_per_image) 77 | proposals_per_image.add_field( 78 | "regression_targets", regression_targets_per_image 79 | ) 80 | if return_ious: 81 | proposals_per_image.add_field("ious", ious_per_image) 82 | 83 | if return_sample_id: 84 | sample_id = [] 85 | if return_raw_proposals: 86 | raw_proposals = proposals.copy() 87 | # distributed sampled proposals, that were obtained on all feature maps 88 | # concatenated via the fg_bg_sampler, into individual feature map levels 89 | for img_idx, (pos_inds_img, neg_inds_img) in enumerate( 90 | zip(sampled_pos_inds, sampled_neg_inds) 91 | ): 92 | img_sampled_inds = torch.nonzero(pos_inds_img | neg_inds_img).squeeze(1) 93 | proposals_per_image = proposals[img_idx][img_sampled_inds] 94 | proposals[img_idx] = proposals_per_image 95 | 96 | if return_sample_id: 97 | sample_id.append(img_sampled_inds) 98 | 99 | if return_sample_id: 100 | if return_raw_proposals: 101 | return proposals, sample_id, raw_proposals 102 | else: 103 | return proposals, sample_id 104 | else: 105 | if return_raw_proposals: 106 | return proposals, raw_proposals 107 | else: 108 | return proposals 109 | -------------------------------------------------------------------------------- /dynamic_rcnn/det_opr/rpn/anchor_target_opr.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | 4 | from dynamic_rcnn.det_opr.matcher import Matcher 5 | from dynamic_rcnn.det_opr.sampler import BalancedPositiveNegativeSampler 6 | from dynamic_rcnn.datasets.structures.boxlist_ops import cat_boxlist, boxlist_iou 7 | 8 | 9 | def anchor_target_opr( 10 | anchors, targets, box_coder, high_threshold, low_threshold, 11 | batch_size_per_image, positive_fraction): 12 | """ 13 | Generate anchor targets for computing loss. 14 | 15 | Args: 16 | anchors: (list[BoxList]) 17 | targets: (list[BoxList]) 18 | box_coder: (BoxCoder) 19 | high_threshold: (float) 20 | low_threshold: (float) 21 | batch_size_per_image: (int) 22 | positive_fraction: (float) 23 | """ 24 | matcher = Matcher( 25 | high_threshold, low_threshold, allow_low_quality_matches=True) 26 | fg_bg_sampler = BalancedPositiveNegativeSampler( 27 | batch_size_per_image, positive_fraction) 28 | 29 | anchors = [cat_boxlist(anchors_per_image) for anchors_per_image in anchors] 30 | # prepare targets 31 | labels = [] 32 | regression_targets = [] 33 | for anchors_per_image, targets_per_image in zip(anchors, targets): 34 | # match targets to anchors 35 | match_quality_matrix = boxlist_iou(targets_per_image, anchors_per_image) 36 | matched_idxs = matcher(match_quality_matrix) 37 | targets_per_image = targets_per_image.copy_with_fields([]) 38 | matched_targets = targets_per_image[matched_idxs.clamp(min=0)] 39 | matched_targets.add_field("matched_idxs", matched_idxs) 40 | 41 | matched_idxs = matched_targets.get_field("matched_idxs") 42 | # generate rpn labels 43 | labels_per_image = matched_idxs >= 0 44 | labels_per_image = labels_per_image.to(dtype=torch.float32) 45 | 46 | # Background (negative examples) 47 | bg_indices = matched_idxs == Matcher.BELOW_LOW_THRESHOLD 48 | labels_per_image[bg_indices] = 0 49 | 50 | # discard anchors that go out of the boundaries of the image 51 | labels_per_image[~anchors_per_image.get_field("visibility")] = -1 52 | 53 | # discard indices that are between thresholds 54 | inds_to_discard = matched_idxs == Matcher.BETWEEN_THRESHOLDS 55 | labels_per_image[inds_to_discard] = -1 56 | 57 | # compute regression targets 58 | regression_targets_per_image = box_coder.encode( 59 | matched_targets.bbox, anchors_per_image.bbox 60 | ) 61 | 62 | labels.append(labels_per_image) 63 | regression_targets.append(regression_targets_per_image) 64 | 65 | sampled_pos_inds, sampled_neg_inds = fg_bg_sampler(labels) 66 | sampled_pos_inds = torch.nonzero( 67 | torch.cat(sampled_pos_inds, dim=0)).squeeze(1) 68 | sampled_neg_inds = torch.nonzero( 69 | torch.cat(sampled_neg_inds, dim=0)).squeeze(1) 70 | 71 | sampled_inds = torch.cat([sampled_pos_inds, sampled_neg_inds], dim=0) 72 | 73 | labels = torch.cat(labels, dim=0) 74 | regression_targets = torch.cat(regression_targets, dim=0) 75 | 76 | return labels, regression_targets, sampled_inds, sampled_pos_inds 77 | -------------------------------------------------------------------------------- /dynamic_rcnn/det_opr/rpn/fcos/fcos_target_opr.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | 4 | INF = 100000000 5 | 6 | 7 | def get_sample_region( 8 | gt, strides, num_points_per_level, gt_xs, gt_ys, radius=1): 9 | gt = gt[None].expand(gt_xs.shape[0], gt.shape[0], 4) 10 | center_x = (gt[..., 0] + gt[..., 2]) / 2 11 | center_y = (gt[..., 1] + gt[..., 3]) / 2 12 | center_gt = gt.new_zeros(gt.shape) 13 | # no gt 14 | if center_x[..., 0].sum() == 0: 15 | return gt_xs.new_zeros(gt_xs.shape, dtype=torch.uint8) 16 | start = 0 17 | for level, num_points in enumerate(num_points_per_level): 18 | end = start + num_points 19 | stride = strides[level] * radius 20 | xmin = center_x[start:end] - stride 21 | ymin = center_y[start:end] - stride 22 | xmax = center_x[start:end] + stride 23 | ymax = center_y[start:end] + stride 24 | # limit sample region in gt 25 | center_gt[start:end, :, 0] = torch.where( 26 | xmin > gt[start:end, :, 0], xmin, gt[start:end, :, 0]) 27 | center_gt[start:end, :, 1] = torch.where( 28 | ymin > gt[start:end, :, 1], ymin, gt[start:end, :, 1]) 29 | center_gt[start:end, :, 2] = torch.where( 30 | xmax > gt[start:end, :, 2], gt[start:end, :, 2], xmax) 31 | center_gt[start:end, :, 3] = torch.where( 32 | ymax > gt[start:end, :, 3], gt[start:end, :, 3], ymax) 33 | start = end 34 | left = gt_xs[:, None] - center_gt[..., 0] 35 | right = center_gt[..., 2] - gt_xs[:, None] 36 | top = gt_ys[:, None] - center_gt[..., 1] 37 | bottom = center_gt[..., 3] - gt_ys[:, None] 38 | center_bbox = torch.stack((left, top, right, bottom), -1) 39 | inside_gt_bbox_mask = center_bbox.min(-1)[0] > 0 40 | return inside_gt_bbox_mask 41 | 42 | 43 | def compute_targets_for_locations( 44 | locations, targets, object_sizes_of_interest, center_sample=None): 45 | labels = [] 46 | reg_targets = [] 47 | xs, ys = locations[:, 0], locations[:, 1] 48 | 49 | for im_i in range(len(targets)): 50 | targets_per_im = targets[im_i] 51 | assert targets_per_im.mode == "xyxy" 52 | bboxes = targets_per_im.bbox 53 | labels_per_im = targets_per_im.get_field("labels") 54 | area = targets_per_im.area() 55 | 56 | l = xs[:, None] - bboxes[:, 0][None] 57 | t = ys[:, None] - bboxes[:, 1][None] 58 | r = bboxes[:, 2][None] - xs[:, None] 59 | b = bboxes[:, 3][None] - ys[:, None] 60 | reg_targets_per_im = torch.stack([l, t, r, b], dim=2) 61 | 62 | if center_sample is not None: 63 | fpn_strides = center_sample['fpn_strides'] 64 | pos_radius = center_sample['pos_radius'] 65 | num_points_per_level = center_sample['num_points_per_level'] 66 | is_in_boxes = get_sample_region( 67 | bboxes, fpn_strides, num_points_per_level, xs, ys, 68 | radius=pos_radius) 69 | else: 70 | is_in_boxes = reg_targets_per_im.min(dim=2)[0] > 0 71 | 72 | max_reg_targets_per_im = reg_targets_per_im.max(dim=2)[0] 73 | # limit the regression range for each location 74 | is_cared_in_the_level = \ 75 | (max_reg_targets_per_im >= object_sizes_of_interest[:, [0]]) & \ 76 | (max_reg_targets_per_im <= object_sizes_of_interest[:, [1]]) 77 | 78 | locations_to_gt_area = area[None].repeat(len(locations), 1) 79 | locations_to_gt_area[is_in_boxes == 0] = INF 80 | locations_to_gt_area[is_cared_in_the_level == 0] = INF 81 | 82 | # if there are still more than one objects for a location, 83 | # we choose the one with minimal area 84 | locations_to_min_area, locations_to_gt_inds = \ 85 | locations_to_gt_area.min(dim=1) 86 | 87 | reg_targets_per_im = reg_targets_per_im[ 88 | range(len(locations)), locations_to_gt_inds] 89 | labels_per_im = labels_per_im[locations_to_gt_inds] 90 | labels_per_im[locations_to_min_area == INF] = 0 91 | 92 | labels.append(labels_per_im) 93 | reg_targets.append(reg_targets_per_im) 94 | 95 | return labels, reg_targets 96 | 97 | 98 | def fcos_target_opr(locations, targets, center_sample=None): 99 | """ 100 | Generate targets for computing fcos loss. 101 | 102 | Args: 103 | locations: (list[BoxList]) 104 | targets: (list[BoxList]) 105 | center_sample: (dict) 106 | """ 107 | object_sizes_of_interest = [ 108 | [-1, 64], 109 | [64, 128], 110 | [128, 256], 111 | [256, 512], 112 | [512, INF], 113 | ] 114 | expanded_object_sizes_of_interest = [] 115 | for l, points_per_level in enumerate(locations): 116 | object_sizes_of_interest_per_level = \ 117 | points_per_level.new_tensor(object_sizes_of_interest[l]) 118 | expanded_object_sizes_of_interest.append( 119 | object_sizes_of_interest_per_level[None].expand( 120 | len(points_per_level), -1) 121 | ) 122 | 123 | expanded_object_sizes_of_interest = torch.cat( 124 | expanded_object_sizes_of_interest, dim=0) 125 | num_points_per_level = [ 126 | len(points_per_level) for points_per_level in locations] 127 | points_all_level = torch.cat(locations, dim=0) 128 | if center_sample is not None: 129 | center_sample['num_points_per_level'] = num_points_per_level 130 | labels, reg_targets = compute_targets_for_locations( 131 | points_all_level, targets, expanded_object_sizes_of_interest, 132 | center_sample=center_sample) 133 | else: 134 | labels, reg_targets = compute_targets_for_locations( 135 | points_all_level, targets, expanded_object_sizes_of_interest) 136 | 137 | for i in range(len(labels)): 138 | labels[i] = torch.split(labels[i], num_points_per_level, dim=0) 139 | reg_targets[i] = torch.split( 140 | reg_targets[i], num_points_per_level, dim=0) 141 | 142 | labels_level_first = [] 143 | reg_targets_level_first = [] 144 | for level in range(len(locations)): 145 | labels_level_first.append( 146 | torch.cat([labels_per_im[level] for labels_per_im in labels], dim=0) 147 | ) 148 | reg_targets_level_first.append( 149 | torch.cat([reg_targets_per_im[level] for reg_targets_per_im in 150 | reg_targets], dim=0) 151 | ) 152 | return labels_level_first, reg_targets_level_first 153 | 154 | 155 | def compute_centerness_targets(reg_targets): 156 | left_right = reg_targets[:, [0, 2]] 157 | top_bottom = reg_targets[:, [1, 3]] 158 | centerness = (left_right.min(dim=-1)[0] / left_right.max(dim=-1)[0]) * \ 159 | (top_bottom.min(dim=-1)[0] / top_bottom.max(dim=-1)[0]) 160 | return torch.sqrt(centerness) 161 | -------------------------------------------------------------------------------- /dynamic_rcnn/det_opr/rpn/fcos/post_processing.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from dynamic_rcnn.datasets.structures.bounding_box import BoxList 3 | from dynamic_rcnn.datasets.structures.boxlist_ops import cat_boxlist, boxlist_nms, \ 4 | remove_small_boxes 5 | 6 | 7 | def post_processing_opr( 8 | fcos_locations, cls_logits, bbox_preds, centernesses, image_sizes, 9 | pre_nms_top_n, pre_nms_thresh, nms_thresh, box_min_size, 10 | fpn_post_nms_top_n, num_classes): 11 | """ 12 | Compute the post-processed boxes and obtain the final results. 13 | 14 | Args: 15 | fcos_locations: (list[BoxList]) 16 | cls_logits: (list[tensor]) 17 | bbox_preds: (list[tensor]) 18 | centernesses: (list[tensor]) 19 | image_sizes: (list[tuple[int, int]]) 20 | pre_nms_top_n: (int) 21 | pre_nms_thresh: (float) 22 | nms_thresh: (float) 23 | box_min_size: (int) 24 | fpn_post_nms_top_n: (int) 25 | num_classes: (int) 26 | """ 27 | sampled_boxes = [] 28 | temp_pre_nms_top_n = pre_nms_top_n 29 | 30 | for locations, box_cls, box_regression, centerness in zip( 31 | fcos_locations, cls_logits, bbox_preds, centernesses): 32 | 33 | N, C, H, W = box_cls.shape 34 | 35 | # put in the same format as locations 36 | box_cls = box_cls.view(N, C, H, W).permute(0, 2, 3, 1) 37 | box_cls = box_cls.reshape(N, -1, C).sigmoid() 38 | box_regression = box_regression.view(N, 4, H, W).permute(0, 2, 3, 1) 39 | box_regression = box_regression.reshape(N, -1, 4) 40 | centerness = centerness.view(N, 1, H, W).permute(0, 2, 3, 1) 41 | centerness = centerness.reshape(N, -1).sigmoid() 42 | 43 | candidate_inds = box_cls > pre_nms_thresh 44 | pre_nms_top_n = candidate_inds.view(N, -1).sum(1) 45 | pre_nms_top_n = pre_nms_top_n.clamp(max=temp_pre_nms_top_n) 46 | 47 | # multiply the classification scores with centerness scores 48 | box_cls = box_cls * centerness[:, :, None] 49 | 50 | results = [] 51 | for i in range(N): 52 | per_box_cls = box_cls[i] 53 | per_candidate_inds = candidate_inds[i] 54 | per_box_cls = per_box_cls[per_candidate_inds] 55 | 56 | per_candidate_nonzeros = per_candidate_inds.nonzero() 57 | per_box_loc = per_candidate_nonzeros[:, 0] 58 | per_class = per_candidate_nonzeros[:, 1] + 1 59 | 60 | per_box_regression = box_regression[i] 61 | per_box_regression = per_box_regression[per_box_loc] 62 | per_locations = locations[per_box_loc] 63 | 64 | per_pre_nms_top_n = pre_nms_top_n[i] 65 | 66 | if per_candidate_inds.sum().item() > per_pre_nms_top_n.item(): 67 | per_box_cls, top_k_indices = \ 68 | per_box_cls.topk(per_pre_nms_top_n, sorted=False) 69 | per_class = per_class[top_k_indices] 70 | per_box_regression = per_box_regression[top_k_indices] 71 | per_locations = per_locations[top_k_indices] 72 | 73 | detections = torch.stack([ 74 | per_locations[:, 0] - per_box_regression[:, 0], 75 | per_locations[:, 1] - per_box_regression[:, 1], 76 | per_locations[:, 0] + per_box_regression[:, 2], 77 | per_locations[:, 1] + per_box_regression[:, 3], 78 | ], dim=1) 79 | 80 | h, w = image_sizes[i] 81 | boxlist = BoxList(detections, (int(w), int(h)), mode="xyxy") 82 | boxlist.add_field("labels", per_class) 83 | boxlist.add_field("scores", per_box_cls) 84 | boxlist = boxlist.clip_to_image(remove_empty=False) 85 | boxlist = remove_small_boxes(boxlist, box_min_size) 86 | results.append(boxlist) 87 | sampled_boxes.append(results) 88 | 89 | boxlists = list(zip(*sampled_boxes)) 90 | boxlists = [cat_boxlist(boxlist) for boxlist in boxlists] 91 | 92 | # select over all levels 93 | num_images = len(boxlists) 94 | results = [] 95 | for i in range(num_images): 96 | scores = boxlists[i].get_field("scores") 97 | labels = boxlists[i].get_field("labels") 98 | boxes = boxlists[i].bbox 99 | boxlist = boxlists[i] 100 | result = [] 101 | # skip the background 102 | for j in range(1, num_classes): 103 | inds = (labels == j).nonzero().view(-1) 104 | 105 | scores_j = scores[inds] 106 | boxes_j = boxes[inds, :].view(-1, 4) 107 | boxlist_for_class = BoxList(boxes_j, boxlist.size, mode="xyxy") 108 | boxlist_for_class.add_field("scores", scores_j) 109 | boxlist_for_class = boxlist_nms( 110 | boxlist_for_class, nms_thresh, 111 | score_field="scores" 112 | ) 113 | num_labels = len(boxlist_for_class) 114 | boxlist_for_class.add_field( 115 | "labels", torch.full((num_labels,), j, 116 | dtype=torch.int64, 117 | device=scores.device) 118 | ) 119 | result.append(boxlist_for_class) 120 | 121 | result = cat_boxlist(result) 122 | number_of_detections = len(result) 123 | 124 | # Limit to max_per_image detections **over all classes** 125 | if number_of_detections > fpn_post_nms_top_n > 0: 126 | cls_scores = result.get_field("scores") 127 | image_thresh, _ = torch.kthvalue( 128 | cls_scores.cpu(), 129 | number_of_detections - fpn_post_nms_top_n + 1 130 | ) 131 | keep = cls_scores >= image_thresh.item() 132 | keep = torch.nonzero(keep).squeeze(1) 133 | result = result[keep] 134 | results.append(result) 135 | return results 136 | -------------------------------------------------------------------------------- /dynamic_rcnn/det_opr/rpn/fcos/scale.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | 4 | 5 | class Scale(nn.Module): 6 | def __init__(self, init_value=1.0): 7 | super(Scale, self).__init__() 8 | self.scale = nn.Parameter(torch.FloatTensor([init_value])) 9 | 10 | def forward(self, input): 11 | return input * self.scale 12 | -------------------------------------------------------------------------------- /dynamic_rcnn/det_opr/rpn/proposal_opr.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from dynamic_rcnn.utils.torch_utils import permute_and_flatten 3 | from dynamic_rcnn.datasets.structures.bounding_box import BoxList 4 | from dynamic_rcnn.datasets.structures.boxlist_ops import cat_boxlist, boxlist_nms, \ 5 | remove_small_boxes 6 | 7 | 8 | def proposal_opr( 9 | rpn_anchors, rpn_cls_logits, rpn_bbox_preds, box_coder, pre_nms_top_n, 10 | post_nms_top_n, nms_thresh, box_min_size, fpn_post_nms_top_n, 11 | fpn_post_nms_per_batch=True, is_train=False, targets=None, 12 | proposal_with_gt=True): 13 | """ 14 | Generate proposals for RCNN. 15 | 16 | Args: 17 | rpn_anchors: (list[list[BoxList]]) 18 | rpn_cls_logits: (list[tensor]) 19 | rpn_bbox_preds: (list[tensor]) 20 | box_coder: (BoxCoder) 21 | pre_nms_top_n: (int) 22 | post_nms_top_n: (int) 23 | nms_thresh: (float) 24 | box_min_size: (int) 25 | fpn_post_nms_top_n: (int) 26 | fpn_post_nms_per_batch: (bool) 27 | is_train: (bool) 28 | targets: (list[BoxList]) 29 | proposal_with_gt: (bool) 30 | """ 31 | 32 | sampled_boxes = [] 33 | num_levels = len(rpn_cls_logits) 34 | rpn_anchors = list(zip(*rpn_anchors)) 35 | for anchors, objectness, box_regression in zip( 36 | rpn_anchors, rpn_cls_logits, rpn_bbox_preds): 37 | device = objectness.device 38 | N, A, H, W = objectness.shape 39 | 40 | # put in the same format as anchors 41 | objectness = permute_and_flatten(objectness, N, A, 1, H, W).view(N, -1) 42 | objectness = objectness.sigmoid() 43 | 44 | box_regression = permute_and_flatten(box_regression, N, A, 4, H, W) 45 | 46 | num_anchors = A * H * W 47 | 48 | pre_nms_top_n = min(pre_nms_top_n, num_anchors) 49 | objectness, topk_idx = objectness.topk( 50 | pre_nms_top_n, dim=1, sorted=True) 51 | 52 | batch_idx = torch.arange(N, device=device)[:, None] 53 | box_regression = box_regression[batch_idx, topk_idx] 54 | 55 | image_shapes = [box.size for box in anchors] 56 | concat_anchors = torch.cat([a.bbox for a in anchors], dim=0) 57 | concat_anchors = concat_anchors.reshape(N, -1, 4)[batch_idx, topk_idx] 58 | 59 | proposals = box_coder.decode( 60 | box_regression.view(-1, 4), concat_anchors.view(-1, 4) 61 | ) 62 | 63 | proposals = proposals.view(N, -1, 4) 64 | 65 | result = [] 66 | for proposal, score, im_shape in zip(proposals, objectness, 67 | image_shapes): 68 | boxlist = BoxList(proposal, im_shape, mode="xyxy") 69 | boxlist.add_field("objectness", score) 70 | boxlist = boxlist.clip_to_image(remove_empty=False) 71 | boxlist = remove_small_boxes(boxlist, box_min_size) 72 | boxlist = boxlist_nms( 73 | boxlist, 74 | nms_thresh, 75 | max_proposals=post_nms_top_n, 76 | score_field="objectness", 77 | ) 78 | result.append(boxlist) 79 | sampled_boxes.append(result) 80 | 81 | boxlists = list(zip(*sampled_boxes)) 82 | boxlists = [cat_boxlist(boxlist) for boxlist in boxlists] 83 | 84 | # select over all levels 85 | if num_levels > 1: 86 | num_images = len(boxlists) 87 | if is_train and fpn_post_nms_per_batch: 88 | objectness = torch.cat( 89 | [boxlist.get_field("objectness") for boxlist in boxlists], dim=0 90 | ) 91 | box_sizes = [len(boxlist) for boxlist in boxlists] 92 | post_nms_top_n = min(fpn_post_nms_top_n, len(objectness)) 93 | _, inds_sorted = torch.topk(objectness, post_nms_top_n, dim=0, 94 | sorted=True) 95 | inds_mask = torch.zeros_like(objectness, dtype=torch.uint8) 96 | inds_mask[inds_sorted] = 1 97 | inds_mask = inds_mask.split(box_sizes) 98 | for i in range(num_images): 99 | boxlists[i] = boxlists[i][inds_mask[i]] 100 | else: 101 | for i in range(num_images): 102 | objectness = boxlists[i].get_field("objectness") 103 | post_nms_top_n = min(fpn_post_nms_top_n, len(objectness)) 104 | _, inds_sorted = torch.topk( 105 | objectness, post_nms_top_n, dim=0, sorted=True 106 | ) 107 | boxlists[i] = boxlists[i][inds_sorted] 108 | 109 | # append ground-truth bboxes to proposals 110 | if is_train and targets is not None and proposal_with_gt: 111 | # Get the device we're operating on 112 | device = boxlists[0].bbox.device 113 | 114 | gt_boxes = [target.copy_with_fields([]) for target in targets] 115 | 116 | # later cat of bbox requires all fields to be present for all bbox 117 | # so we need to add a dummy for objectness that's missing 118 | for gt_box in gt_boxes: 119 | gt_box.add_field("objectness", 120 | torch.ones(len(gt_box), device=device)) 121 | 122 | boxlists = [ 123 | cat_boxlist((proposal, gt_box)) 124 | for proposal, gt_box in zip(boxlists, gt_boxes) 125 | ] 126 | 127 | return boxlists 128 | -------------------------------------------------------------------------------- /dynamic_rcnn/det_opr/rpn/retinanet/anchor_target_opr.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | 4 | from dynamic_rcnn.det_opr.matcher import Matcher 5 | from dynamic_rcnn.datasets.structures.boxlist_ops import boxlist_iou 6 | 7 | 8 | def anchor_target_opr( 9 | anchors, targets, box_coder, high_threshold, low_threshold, 10 | allow_low_quality_matches=True): 11 | """ 12 | Generate anchor targets for computing retinanet loss. 13 | 14 | Args: 15 | anchors: (list[BoxList]) 16 | targets: (list[BoxList]) 17 | box_coder: (BoxCoder) 18 | high_threshold: (float) 19 | low_threshold: (float) 20 | """ 21 | matcher = Matcher(high_threshold, low_threshold, 22 | allow_low_quality_matches=allow_low_quality_matches) 23 | 24 | # prepare targets 25 | labels = [] 26 | regression_targets = [] 27 | for anchors_per_image, targets_per_image in zip(anchors, targets): 28 | # match targets to anchors 29 | match_quality_matrix = boxlist_iou(targets_per_image, anchors_per_image) 30 | matched_idxs = matcher(match_quality_matrix) 31 | targets_per_image = targets_per_image.copy_with_fields(['labels']) 32 | matched_targets = targets_per_image[matched_idxs.clamp(min=0)] 33 | matched_targets.add_field("matched_idxs", matched_idxs) 34 | 35 | matched_idxs = matched_targets.get_field("matched_idxs") 36 | # generate rpn labels 37 | labels_per_image = matched_targets.get_field("labels") 38 | labels_per_image = labels_per_image.to(dtype=torch.float32) 39 | 40 | # Background (negative examples) 41 | bg_indices = matched_idxs == Matcher.BELOW_LOW_THRESHOLD 42 | labels_per_image[bg_indices] = 0 43 | 44 | # discard indices that are between thresholds 45 | inds_to_discard = matched_idxs == Matcher.BETWEEN_THRESHOLDS 46 | labels_per_image[inds_to_discard] = -1 47 | 48 | # compute regression targets 49 | regression_targets_per_image = box_coder.encode( 50 | matched_targets.bbox, anchors_per_image.bbox 51 | ) 52 | 53 | labels.append(labels_per_image) 54 | regression_targets.append(regression_targets_per_image) 55 | 56 | return labels, regression_targets 57 | -------------------------------------------------------------------------------- /dynamic_rcnn/det_opr/rpn/retinanet/post_processing.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from dynamic_rcnn.utils.torch_utils import permute_and_flatten 3 | from dynamic_rcnn.datasets.structures.bounding_box import BoxList 4 | from dynamic_rcnn.datasets.structures.boxlist_ops import cat_boxlist, boxlist_nms, \ 5 | remove_small_boxes 6 | 7 | 8 | def post_processing_opr( 9 | retina_anchors, cls_logits, bbox_preds, box_coder, pre_nms_top_n, 10 | pre_nms_thresh, nms_thresh, box_min_size, fpn_post_nms_top_n, 11 | num_classes): 12 | """ 13 | Compute the post-processed boxes and obtain the final results. 14 | 15 | Args: 16 | retina_anchors: (list[list[BoxList]]) 17 | cls_logits: (list[tensor]) 18 | bbox_preds: (list[tensor]) 19 | box_coder: (BoxCoder) 20 | pre_nms_top_n: (int) 21 | pre_nms_thresh: (float) 22 | nms_thresh: (float) 23 | box_min_size: (int) 24 | fpn_post_nms_top_n: (int) 25 | num_classes: (int) 26 | """ 27 | 28 | sampled_boxes = [] 29 | num_levels = len(cls_logits) 30 | retina_anchors = list(zip(*retina_anchors)) 31 | temp_pre_nms_top_n = pre_nms_top_n 32 | for anchors, box_cls, box_regression in zip( 33 | retina_anchors, cls_logits, bbox_preds): 34 | device = box_cls.device 35 | N, _, H, W = box_cls.shape 36 | A = box_regression.size(1) // 4 37 | C = box_cls.size(1) // A 38 | 39 | # put in the same format as anchors 40 | box_cls = permute_and_flatten(box_cls, N, A, C, H, W) 41 | box_cls = box_cls.sigmoid() 42 | 43 | box_regression = permute_and_flatten(box_regression, N, A, 4, H, W) 44 | box_regression = box_regression.reshape(N, -1, 4) 45 | 46 | num_anchors = A * H * W 47 | 48 | candidate_inds = box_cls > pre_nms_thresh 49 | 50 | pre_nms_top_n = candidate_inds.view(N, -1).sum(1) 51 | pre_nms_top_n = pre_nms_top_n.clamp(max=temp_pre_nms_top_n) 52 | 53 | results = [] 54 | for per_box_cls, per_box_regression, per_pre_nms_top_n, \ 55 | per_candidate_inds, per_anchors in zip( 56 | box_cls, 57 | box_regression, 58 | pre_nms_top_n, 59 | candidate_inds, 60 | anchors): 61 | # Sort and select TopN 62 | per_box_cls = per_box_cls[per_candidate_inds] 63 | 64 | per_box_cls, top_k_indices = \ 65 | per_box_cls.topk(per_pre_nms_top_n, sorted=False) 66 | 67 | per_candidate_nonzeros = \ 68 | per_candidate_inds.nonzero()[top_k_indices, :] 69 | 70 | per_box_loc = per_candidate_nonzeros[:, 0] 71 | per_class = per_candidate_nonzeros[:, 1] 72 | per_class += 1 73 | 74 | detections = box_coder.decode( 75 | per_box_regression[per_box_loc, :].view(-1, 4), 76 | per_anchors.bbox[per_box_loc, :].view(-1, 4) 77 | ) 78 | 79 | boxlist = BoxList(detections, per_anchors.size, mode="xyxy") 80 | boxlist.add_field("labels", per_class) 81 | boxlist.add_field("scores", per_box_cls) 82 | boxlist = boxlist.clip_to_image(remove_empty=False) 83 | boxlist = remove_small_boxes(boxlist, box_min_size) 84 | results.append(boxlist) 85 | sampled_boxes.append(results) 86 | 87 | boxlists = list(zip(*sampled_boxes)) 88 | boxlists = [cat_boxlist(boxlist) for boxlist in boxlists] 89 | 90 | # select over all levels 91 | if num_levels > 1: 92 | num_images = len(boxlists) 93 | results = [] 94 | for i in range(num_images): 95 | scores = boxlists[i].get_field("scores") 96 | labels = boxlists[i].get_field("labels") 97 | boxes = boxlists[i].bbox 98 | boxlist = boxlists[i] 99 | result = [] 100 | # skip the background 101 | for j in range(1, num_classes): 102 | inds = (labels == j).nonzero().view(-1) 103 | 104 | scores_j = scores[inds] 105 | boxes_j = boxes[inds, :].view(-1, 4) 106 | boxlist_for_class = BoxList(boxes_j, boxlist.size, mode="xyxy") 107 | boxlist_for_class.add_field("scores", scores_j) 108 | boxlist_for_class = boxlist_nms( 109 | boxlist_for_class, nms_thresh, 110 | score_field="scores" 111 | ) 112 | num_labels = len(boxlist_for_class) 113 | boxlist_for_class.add_field( 114 | "labels", torch.full((num_labels,), j, 115 | dtype=torch.int64, 116 | device=scores.device) 117 | ) 118 | result.append(boxlist_for_class) 119 | 120 | result = cat_boxlist(result) 121 | number_of_detections = len(result) 122 | 123 | # Limit to max_per_image detections **over all classes** 124 | if number_of_detections > fpn_post_nms_top_n > 0: 125 | cls_scores = result.get_field("scores") 126 | image_thresh, _ = torch.kthvalue( 127 | cls_scores.cpu(), 128 | number_of_detections - fpn_post_nms_top_n + 1 129 | ) 130 | keep = cls_scores >= image_thresh.item() 131 | keep = torch.nonzero(keep).squeeze(1) 132 | result = result[keep] 133 | results.append(result) 134 | return results 135 | return boxlists 136 | -------------------------------------------------------------------------------- /dynamic_rcnn/det_opr/sampler.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | 4 | 5 | class BalancedPositiveNegativeSampler(object): 6 | """ 7 | This class samples batches, ensuring that they contain a fixed proportion of positives 8 | """ 9 | 10 | def __init__(self, batch_size_per_image, positive_fraction): 11 | """ 12 | Arguments: 13 | batch_size_per_image (int): number of elements to be selected per image 14 | positive_fraction (float): percentage of positive elements per batch 15 | """ 16 | self.batch_size_per_image = batch_size_per_image 17 | self.positive_fraction = positive_fraction 18 | 19 | def __call__(self, matched_idxs): 20 | """ 21 | Arguments: 22 | matched idxs: list of tensors containing -1, 0 or positive values. 23 | Each tensor corresponds to a specific image. 24 | -1 values are ignored, 0 are considered as negatives and > 0 as 25 | positives. 26 | 27 | Returns: 28 | pos_idx (list[tensor]) 29 | neg_idx (list[tensor]) 30 | 31 | Returns two lists of binary masks for each image. 32 | The first list contains the positive elements that were selected, 33 | and the second list the negative example. 34 | """ 35 | pos_idx = [] 36 | neg_idx = [] 37 | for matched_idxs_per_image in matched_idxs: 38 | positive = torch.nonzero(matched_idxs_per_image >= 1).squeeze(1) 39 | negative = torch.nonzero(matched_idxs_per_image == 0).squeeze(1) 40 | 41 | num_pos = int(self.batch_size_per_image * self.positive_fraction) 42 | # protect against not enough positive examples 43 | num_pos = min(positive.numel(), num_pos) 44 | num_neg = self.batch_size_per_image - num_pos 45 | # protect against not enough negative examples 46 | num_neg = min(negative.numel(), num_neg) 47 | 48 | # randomly select positive and negative examples 49 | perm1 = torch.randperm(positive.numel(), device=positive.device)[:num_pos] 50 | perm2 = torch.randperm(negative.numel(), device=negative.device)[:num_neg] 51 | 52 | pos_idx_per_image = positive[perm1] 53 | neg_idx_per_image = negative[perm2] 54 | 55 | # create binary mask from indices 56 | pos_idx_per_image_mask = torch.zeros_like( 57 | matched_idxs_per_image, dtype=torch.uint8 58 | ) 59 | neg_idx_per_image_mask = torch.zeros_like( 60 | matched_idxs_per_image, dtype=torch.uint8 61 | ) 62 | pos_idx_per_image_mask[pos_idx_per_image] = 1 63 | neg_idx_per_image_mask[neg_idx_per_image] = 1 64 | 65 | pos_idx.append(pos_idx_per_image_mask) 66 | neg_idx.append(neg_idx_per_image_mask) 67 | 68 | return pos_idx, neg_idx 69 | -------------------------------------------------------------------------------- /dynamic_rcnn/engine/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | -------------------------------------------------------------------------------- /dynamic_rcnn/engine/bbox_aug.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torchvision.transforms as TT 3 | 4 | from dynamic_rcnn.datasets import transforms as T 5 | from dynamic_rcnn.datasets.structures.image_list import to_image_list 6 | from dynamic_rcnn.datasets.structures.bounding_box import BoxList 7 | from dynamic_rcnn.det_opr.rcnn.post_processing import filter_results 8 | 9 | 10 | def im_detect_bbox_aug(cfg, model, images, device): 11 | # Collect detections computed under different transformations 12 | boxlists_ts = [] 13 | for _ in range(len(images)): 14 | boxlists_ts.append([]) 15 | 16 | def add_preds_t(boxlists_t): 17 | for i, boxlist_t in enumerate(boxlists_t): 18 | if len(boxlists_ts[i]) == 0: 19 | # The first one is identity transform, no need to resize the boxlist 20 | boxlists_ts[i].append(boxlist_t) 21 | else: 22 | # Resize the boxlist as the first one 23 | boxlists_ts[i].append(boxlist_t.resize(boxlists_ts[i][0].size)) 24 | 25 | # Compute detections for the original image (identity transform) 26 | boxlists_i = im_detect_bbox(cfg, model, images, cfg.INPUT.MIN_SIZE_TEST, 27 | cfg.INPUT.MAX_SIZE_TEST, device) 28 | add_preds_t(boxlists_i) 29 | 30 | # Perform detection on the horizontally flipped image 31 | if cfg.TEST.BBOX_AUG.H_FLIP: 32 | boxlists_hf = im_detect_bbox_hflip( 33 | cfg, model, images, cfg.INPUT.MIN_SIZE_TEST, 34 | cfg.INPUT.MAX_SIZE_TEST, device) 35 | add_preds_t(boxlists_hf) 36 | 37 | # Compute detections at different scales 38 | for scale in cfg.TEST.BBOX_AUG.SCALES: 39 | max_size = cfg.TEST.BBOX_AUG.MAX_SIZE 40 | boxlists_scl = im_detect_bbox_scale( 41 | cfg, model, images, scale, max_size, device) 42 | add_preds_t(boxlists_scl) 43 | 44 | if cfg.TEST.BBOX_AUG.SCALE_H_FLIP: 45 | boxlists_scl_hf = im_detect_bbox_scale( 46 | cfg, model, images, scale, max_size, device, hflip=True) 47 | add_preds_t(boxlists_scl_hf) 48 | 49 | # Merge boxlists detected by different bbox aug params 50 | boxlists = [] 51 | for i, boxlist_ts in enumerate(boxlists_ts): 52 | bbox = torch.cat([boxlist_t.bbox for boxlist_t in boxlist_ts]) 53 | scores = torch.cat( 54 | [boxlist_t.get_field('scores') for boxlist_t in boxlist_ts]) 55 | boxlist = BoxList(bbox, boxlist_ts[0].size, boxlist_ts[0].mode) 56 | boxlist.add_field('scores', scores) 57 | boxlists.append(boxlist) 58 | 59 | # Apply NMS and limit the final detections 60 | results = [] 61 | for boxlist in boxlists: 62 | results.append(filter_results( 63 | boxlist, cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES, 64 | cfg.MODEL.ROI_HEADS.SCORE_THRESH, cfg.MODEL.ROI_HEADS.NMS, 65 | cfg.MODEL.ROI_HEADS.DETECTIONS_PER_IMG)) 66 | 67 | return results 68 | 69 | 70 | def im_detect_bbox(cfg, model, images, target_scale, target_max_size, device): 71 | """ 72 | Performs bbox detection on the original image. 73 | """ 74 | transform = TT.Compose([ 75 | T.Resize(target_scale, target_max_size), 76 | TT.ToTensor(), 77 | T.Normalize( 78 | mean=cfg.INPUT.PIXEL_MEAN, std=cfg.INPUT.PIXEL_STD, 79 | to_bgr255=cfg.INPUT.TO_BGR255 80 | ) 81 | ]) 82 | images = [transform(image) for image in images] 83 | images = to_image_list(images, cfg.DATALOADER.SIZE_DIVISIBILITY) 84 | return model(images.to(device)) 85 | 86 | 87 | def im_detect_bbox_hflip( 88 | cfg, model, images, target_scale, target_max_size, device): 89 | """ 90 | Performs bbox detection on the horizontally flipped image. 91 | Function signature is the same as for im_detect_bbox. 92 | """ 93 | transform = TT.Compose([ 94 | T.Resize(target_scale, target_max_size), 95 | TT.RandomHorizontalFlip(1.0), 96 | TT.ToTensor(), 97 | T.Normalize( 98 | mean=cfg.INPUT.PIXEL_MEAN, std=cfg.INPUT.PIXEL_STD, to_bgr255=cfg.INPUT.TO_BGR255 99 | ) 100 | ]) 101 | images = [transform(image) for image in images] 102 | images = to_image_list(images, cfg.DATALOADER.SIZE_DIVISIBILITY) 103 | boxlists = model(images.to(device)) 104 | 105 | # Invert the detections computed on the flipped image 106 | boxlists_inv = [boxlist.transpose(0) for boxlist in boxlists] 107 | return boxlists_inv 108 | 109 | 110 | def im_detect_bbox_scale( 111 | cfg, model, images, target_scale, target_max_size, device, hflip=False): 112 | """ 113 | Computes bbox detections at the given scale. 114 | Returns predictions in the scaled image space. 115 | """ 116 | if hflip: 117 | boxlists_scl = im_detect_bbox_hflip( 118 | cfg, model, images, target_scale, target_max_size, device) 119 | else: 120 | boxlists_scl = im_detect_bbox( 121 | cfg, model, images, target_scale, target_max_size, device) 122 | return boxlists_scl 123 | -------------------------------------------------------------------------------- /dynamic_rcnn/engine/comm.py: -------------------------------------------------------------------------------- 1 | """ 2 | This file contains primitives for multi-gpu communication. 3 | This is useful when doing distributed training. 4 | """ 5 | 6 | import pickle 7 | import time 8 | 9 | import torch 10 | import torch.distributed as dist 11 | 12 | 13 | def get_world_size(): 14 | if not dist.is_available(): 15 | return 1 16 | if not dist.is_initialized(): 17 | return 1 18 | return dist.get_world_size() 19 | 20 | 21 | def get_rank(): 22 | if not dist.is_available(): 23 | return 0 24 | if not dist.is_initialized(): 25 | return 0 26 | return dist.get_rank() 27 | 28 | 29 | def is_main_process(): 30 | return get_rank() == 0 31 | 32 | 33 | def synchronize(): 34 | """ 35 | Helper function to synchronize (barrier) among all processes when 36 | using distributed training 37 | """ 38 | if not dist.is_available(): 39 | return 40 | if not dist.is_initialized(): 41 | return 42 | world_size = dist.get_world_size() 43 | if world_size == 1: 44 | return 45 | dist.barrier() 46 | 47 | 48 | def all_gather(data): 49 | """ 50 | Run all_gather on arbitrary picklable data (not necessarily tensors) 51 | Args: 52 | data: any picklable object 53 | Returns: 54 | list[data]: list of data gathered from each rank 55 | """ 56 | world_size = get_world_size() 57 | if world_size == 1: 58 | return [data] 59 | 60 | # serialized to a Tensor 61 | buffer = pickle.dumps(data) 62 | storage = torch.ByteStorage.from_buffer(buffer) 63 | tensor = torch.ByteTensor(storage).to("cuda") 64 | 65 | # obtain Tensor size of each rank 66 | local_size = torch.LongTensor([tensor.numel()]).to("cuda") 67 | size_list = [torch.LongTensor([0]).to("cuda") for _ in range(world_size)] 68 | dist.all_gather(size_list, local_size) 69 | size_list = [int(size.item()) for size in size_list] 70 | max_size = max(size_list) 71 | 72 | # receiving Tensor from all ranks 73 | # we pad the tensor because torch all_gather does not support 74 | # gathering tensors of different shapes 75 | tensor_list = [] 76 | for _ in size_list: 77 | tensor_list.append(torch.ByteTensor(size=(max_size,)).to("cuda")) 78 | if local_size != max_size: 79 | padding = torch.ByteTensor(size=(max_size - local_size,)).to("cuda") 80 | tensor = torch.cat((tensor, padding), dim=0) 81 | dist.all_gather(tensor_list, tensor) 82 | 83 | data_list = [] 84 | for size, tensor in zip(size_list, tensor_list): 85 | buffer = tensor.cpu().numpy().tobytes()[:size] 86 | data_list.append(pickle.loads(buffer)) 87 | 88 | return data_list 89 | 90 | 91 | def reduce_dict(input_dict, average=True): 92 | """ 93 | Args: 94 | input_dict (dict): all the values will be reduced 95 | average (bool): whether to do average or sum 96 | Reduce the values in the dictionary from all processes so that process with rank 97 | 0 has the averaged results. Returns a dict with the same fields as 98 | input_dict, after reduction. 99 | """ 100 | world_size = get_world_size() 101 | if world_size < 2: 102 | return input_dict 103 | with torch.no_grad(): 104 | names = [] 105 | values = [] 106 | # sort the keys so that they are consistent across processes 107 | for k in sorted(input_dict.keys()): 108 | names.append(k) 109 | values.append(input_dict[k]) 110 | values = torch.stack(values, dim=0) 111 | dist.reduce(values, dst=0) 112 | if dist.get_rank() == 0 and average: 113 | # only main process gets accumulated, so only divide by 114 | # world_size in this case 115 | values /= world_size 116 | reduced_dict = {k: v for k, v in zip(names, values)} 117 | return reduced_dict 118 | -------------------------------------------------------------------------------- /dynamic_rcnn/engine/lr_scheduler.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from bisect import bisect_right 3 | 4 | import torch 5 | 6 | 7 | # FIXME ideally this would be achieved with a CombinedLRScheduler, 8 | # separating MultiStepLR with WarmupLR 9 | # but the current LRScheduler design doesn't allow it 10 | class WarmupMultiStepLR(torch.optim.lr_scheduler._LRScheduler): 11 | def __init__( 12 | self, 13 | optimizer, 14 | milestones, 15 | gamma=0.1, 16 | warmup_factor=1.0 / 3, 17 | warmup_iters=500, 18 | warmup_method="linear", 19 | last_epoch=-1, 20 | ): 21 | if not list(milestones) == sorted(milestones): 22 | raise ValueError( 23 | "Milestones should be a list of" " increasing integers. Got {}", 24 | milestones, 25 | ) 26 | 27 | if warmup_method not in ("constant", "linear"): 28 | raise ValueError( 29 | "Only 'constant' or 'linear' warmup_method accepted" 30 | "got {}".format(warmup_method) 31 | ) 32 | self.milestones = milestones 33 | self.gamma = gamma 34 | self.warmup_factor = warmup_factor 35 | self.warmup_iters = warmup_iters 36 | self.warmup_method = warmup_method 37 | super(WarmupMultiStepLR, self).__init__(optimizer, last_epoch) 38 | 39 | def get_lr(self): 40 | warmup_factor = 1 41 | if self.last_epoch < self.warmup_iters: 42 | if self.warmup_method == "constant": 43 | warmup_factor = self.warmup_factor 44 | elif self.warmup_method == "linear": 45 | alpha = float(self.last_epoch) / self.warmup_iters 46 | warmup_factor = self.warmup_factor * (1 - alpha) + alpha 47 | return [ 48 | base_lr 49 | * warmup_factor 50 | * self.gamma ** bisect_right(self.milestones, self.last_epoch) 51 | for base_lr in self.base_lrs 52 | ] 53 | -------------------------------------------------------------------------------- /dynamic_rcnn/kernels/ROIAlign.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #pragma once 3 | 4 | #include "cpu/vision.h" 5 | 6 | #ifdef WITH_CUDA 7 | #include "cuda/vision.h" 8 | #endif 9 | 10 | // Interface for Python 11 | at::Tensor ROIAlign_forward(const at::Tensor& input, 12 | const at::Tensor& rois, 13 | const float spatial_scale, 14 | const int pooled_height, 15 | const int pooled_width, 16 | const int sampling_ratio) { 17 | if (input.type().is_cuda()) { 18 | #ifdef WITH_CUDA 19 | return ROIAlign_forward_cuda(input, rois, spatial_scale, pooled_height, pooled_width, sampling_ratio); 20 | #else 21 | AT_ERROR("Not compiled with GPU support"); 22 | #endif 23 | } 24 | return ROIAlign_forward_cpu(input, rois, spatial_scale, pooled_height, pooled_width, sampling_ratio); 25 | } 26 | 27 | at::Tensor ROIAlign_backward(const at::Tensor& grad, 28 | const at::Tensor& rois, 29 | const float spatial_scale, 30 | const int pooled_height, 31 | const int pooled_width, 32 | const int batch_size, 33 | const int channels, 34 | const int height, 35 | const int width, 36 | const int sampling_ratio) { 37 | if (grad.type().is_cuda()) { 38 | #ifdef WITH_CUDA 39 | return ROIAlign_backward_cuda(grad, rois, spatial_scale, pooled_height, pooled_width, batch_size, channels, height, width, sampling_ratio); 40 | #else 41 | AT_ERROR("Not compiled with GPU support"); 42 | #endif 43 | } 44 | AT_ERROR("Not implemented on the CPU"); 45 | } 46 | 47 | -------------------------------------------------------------------------------- /dynamic_rcnn/kernels/ROIPool.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #pragma once 3 | 4 | #include "cpu/vision.h" 5 | 6 | #ifdef WITH_CUDA 7 | #include "cuda/vision.h" 8 | #endif 9 | 10 | 11 | std::tuple ROIPool_forward(const at::Tensor& input, 12 | const at::Tensor& rois, 13 | const float spatial_scale, 14 | const int pooled_height, 15 | const int pooled_width) { 16 | if (input.type().is_cuda()) { 17 | #ifdef WITH_CUDA 18 | return ROIPool_forward_cuda(input, rois, spatial_scale, pooled_height, pooled_width); 19 | #else 20 | AT_ERROR("Not compiled with GPU support"); 21 | #endif 22 | } 23 | AT_ERROR("Not implemented on the CPU"); 24 | } 25 | 26 | at::Tensor ROIPool_backward(const at::Tensor& grad, 27 | const at::Tensor& input, 28 | const at::Tensor& rois, 29 | const at::Tensor& argmax, 30 | const float spatial_scale, 31 | const int pooled_height, 32 | const int pooled_width, 33 | const int batch_size, 34 | const int channels, 35 | const int height, 36 | const int width) { 37 | if (grad.type().is_cuda()) { 38 | #ifdef WITH_CUDA 39 | return ROIPool_backward_cuda(grad, input, rois, argmax, spatial_scale, pooled_height, pooled_width, batch_size, channels, height, width); 40 | #else 41 | AT_ERROR("Not compiled with GPU support"); 42 | #endif 43 | } 44 | AT_ERROR("Not implemented on the CPU"); 45 | } 46 | 47 | 48 | 49 | -------------------------------------------------------------------------------- /dynamic_rcnn/kernels/SigmoidFocalLoss.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "cpu/vision.h" 4 | 5 | #ifdef WITH_CUDA 6 | #include "cuda/vision.h" 7 | #endif 8 | 9 | // Interface for Python 10 | at::Tensor SigmoidFocalLoss_forward( 11 | const at::Tensor& logits, 12 | const at::Tensor& targets, 13 | const int num_classes, 14 | const float gamma, 15 | const float alpha) { 16 | if (logits.type().is_cuda()) { 17 | #ifdef WITH_CUDA 18 | return SigmoidFocalLoss_forward_cuda(logits, targets, num_classes, gamma, alpha); 19 | #else 20 | AT_ERROR("Not compiled with GPU support"); 21 | #endif 22 | } 23 | AT_ERROR("Not implemented on the CPU"); 24 | } 25 | 26 | at::Tensor SigmoidFocalLoss_backward( 27 | const at::Tensor& logits, 28 | const at::Tensor& targets, 29 | const at::Tensor& d_losses, 30 | const int num_classes, 31 | const float gamma, 32 | const float alpha) { 33 | if (logits.type().is_cuda()) { 34 | #ifdef WITH_CUDA 35 | return SigmoidFocalLoss_backward_cuda(logits, targets, d_losses, num_classes, gamma, alpha); 36 | #else 37 | AT_ERROR("Not compiled with GPU support"); 38 | #endif 39 | } 40 | AT_ERROR("Not implemented on the CPU"); 41 | } 42 | -------------------------------------------------------------------------------- /dynamic_rcnn/kernels/cpu/nms_cpu.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #include "cpu/vision.h" 3 | 4 | 5 | template 6 | at::Tensor nms_cpu_kernel(const at::Tensor& dets, 7 | const at::Tensor& scores, 8 | const float threshold) { 9 | AT_ASSERTM(!dets.type().is_cuda(), "dets must be a CPU tensor"); 10 | AT_ASSERTM(!scores.type().is_cuda(), "scores must be a CPU tensor"); 11 | AT_ASSERTM(dets.type() == scores.type(), "dets should have the same type as scores"); 12 | 13 | if (dets.numel() == 0) { 14 | return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU)); 15 | } 16 | 17 | auto x1_t = dets.select(1, 0).contiguous(); 18 | auto y1_t = dets.select(1, 1).contiguous(); 19 | auto x2_t = dets.select(1, 2).contiguous(); 20 | auto y2_t = dets.select(1, 3).contiguous(); 21 | 22 | at::Tensor areas_t = (x2_t - x1_t + 1) * (y2_t - y1_t + 1); 23 | 24 | auto order_t = std::get<1>(scores.sort(0, /* descending=*/true)); 25 | 26 | auto ndets = dets.size(0); 27 | at::Tensor suppressed_t = at::zeros({ndets}, dets.options().dtype(at::kByte).device(at::kCPU)); 28 | 29 | auto suppressed = suppressed_t.data(); 30 | auto order = order_t.data(); 31 | auto x1 = x1_t.data(); 32 | auto y1 = y1_t.data(); 33 | auto x2 = x2_t.data(); 34 | auto y2 = y2_t.data(); 35 | auto areas = areas_t.data(); 36 | 37 | for (int64_t _i = 0; _i < ndets; _i++) { 38 | auto i = order[_i]; 39 | if (suppressed[i] == 1) 40 | continue; 41 | auto ix1 = x1[i]; 42 | auto iy1 = y1[i]; 43 | auto ix2 = x2[i]; 44 | auto iy2 = y2[i]; 45 | auto iarea = areas[i]; 46 | 47 | for (int64_t _j = _i + 1; _j < ndets; _j++) { 48 | auto j = order[_j]; 49 | if (suppressed[j] == 1) 50 | continue; 51 | auto xx1 = std::max(ix1, x1[j]); 52 | auto yy1 = std::max(iy1, y1[j]); 53 | auto xx2 = std::min(ix2, x2[j]); 54 | auto yy2 = std::min(iy2, y2[j]); 55 | 56 | auto w = std::max(static_cast(0), xx2 - xx1 + 1); 57 | auto h = std::max(static_cast(0), yy2 - yy1 + 1); 58 | auto inter = w * h; 59 | auto ovr = inter / (iarea + areas[j] - inter); 60 | if (ovr >= threshold) 61 | suppressed[j] = 1; 62 | } 63 | } 64 | return at::nonzero(suppressed_t == 0).squeeze(1); 65 | } 66 | 67 | at::Tensor nms_cpu(const at::Tensor& dets, 68 | const at::Tensor& scores, 69 | const float threshold) { 70 | at::Tensor result; 71 | AT_DISPATCH_FLOATING_TYPES(dets.type(), "nms", [&] { 72 | result = nms_cpu_kernel(dets, scores, threshold); 73 | }); 74 | return result; 75 | } 76 | -------------------------------------------------------------------------------- /dynamic_rcnn/kernels/cpu/vision.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #pragma once 3 | #include 4 | 5 | 6 | at::Tensor ROIAlign_forward_cpu(const at::Tensor& input, 7 | const at::Tensor& rois, 8 | const float spatial_scale, 9 | const int pooled_height, 10 | const int pooled_width, 11 | const int sampling_ratio); 12 | 13 | 14 | at::Tensor nms_cpu(const at::Tensor& dets, 15 | const at::Tensor& scores, 16 | const float threshold); 17 | -------------------------------------------------------------------------------- /dynamic_rcnn/kernels/cuda/SigmoidFocalLoss_cuda.cu: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | // This file is modified from https://github.com/pytorch/pytorch/blob/master/modules/detectron/sigmoid_focal_loss_op.cu 3 | // Cheng-Yang Fu 4 | // cyfu@cs.unc.edu 5 | #include 6 | #include 7 | 8 | #include 9 | #include 10 | #include 11 | 12 | #include 13 | 14 | // TODO make it in a common file 15 | #define CUDA_1D_KERNEL_LOOP(i, n) \ 16 | for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; \ 17 | i += blockDim.x * gridDim.x) 18 | 19 | 20 | template 21 | __global__ void SigmoidFocalLossForward(const int nthreads, 22 | const T* logits, 23 | const int* targets, 24 | const int num_classes, 25 | const float gamma, 26 | const float alpha, 27 | const int num, 28 | T* losses) { 29 | CUDA_1D_KERNEL_LOOP(i, nthreads) { 30 | 31 | int n = i / num_classes; 32 | int d = i % num_classes; // current class[0~79]; 33 | int t = targets[n]; // target class [1~80]; 34 | 35 | // Decide it is positive or negative case. 36 | T c1 = (t == (d+1)); 37 | T c2 = (t>=0 & t != (d+1)); 38 | 39 | T zn = (1.0 - alpha); 40 | T zp = (alpha); 41 | 42 | // p = 1. / 1. + expf(-x); p = sigmoid(x) 43 | T p = 1. / (1. + expf(-logits[i])); 44 | 45 | // (1-p)**gamma * log(p) where 46 | T term1 = powf((1. - p), gamma) * logf(max(p, FLT_MIN)); 47 | 48 | // p**gamma * log(1-p) 49 | T term2 = powf(p, gamma) * 50 | (-1. * logits[i] * (logits[i] >= 0) - 51 | logf(1. + expf(logits[i] - 2. * logits[i] * (logits[i] >= 0)))); 52 | 53 | losses[i] = 0.0; 54 | losses[i] += -c1 * term1 * zp; 55 | losses[i] += -c2 * term2 * zn; 56 | 57 | } // CUDA_1D_KERNEL_LOOP 58 | } // SigmoidFocalLossForward 59 | 60 | 61 | template 62 | __global__ void SigmoidFocalLossBackward(const int nthreads, 63 | const T* logits, 64 | const int* targets, 65 | const T* d_losses, 66 | const int num_classes, 67 | const float gamma, 68 | const float alpha, 69 | const int num, 70 | T* d_logits) { 71 | CUDA_1D_KERNEL_LOOP(i, nthreads) { 72 | 73 | int n = i / num_classes; 74 | int d = i % num_classes; // current class[0~79]; 75 | int t = targets[n]; // target class [1~80], 0 is background; 76 | 77 | // Decide it is positive or negative case. 78 | T c1 = (t == (d+1)); 79 | T c2 = (t>=0 & t != (d+1)); 80 | 81 | T zn = (1.0 - alpha); 82 | T zp = (alpha); 83 | // p = 1. / 1. + expf(-x); p = sigmoid(x) 84 | T p = 1. / (1. + expf(-logits[i])); 85 | 86 | // (1-p)**g * (1 - p - g*p*log(p) 87 | T term1 = powf((1. - p), gamma) * 88 | (1. - p - (p * gamma * logf(max(p, FLT_MIN)))); 89 | 90 | // (p**g) * (g*(1-p)*log(1-p) - p) 91 | T term2 = powf(p, gamma) * 92 | ((-1. * logits[i] * (logits[i] >= 0) - 93 | logf(1. + expf(logits[i] - 2. * logits[i] * (logits[i] >= 0)))) * 94 | (1. - p) * gamma - p); 95 | d_logits[i] = 0.0; 96 | d_logits[i] += -c1 * term1 * zp; 97 | d_logits[i] += -c2 * term2 * zn; 98 | d_logits[i] = d_logits[i] * d_losses[i]; 99 | 100 | } // CUDA_1D_KERNEL_LOOP 101 | } // SigmoidFocalLossBackward 102 | 103 | 104 | at::Tensor SigmoidFocalLoss_forward_cuda( 105 | const at::Tensor& logits, 106 | const at::Tensor& targets, 107 | const int num_classes, 108 | const float gamma, 109 | const float alpha) { 110 | AT_ASSERTM(logits.type().is_cuda(), "logits must be a CUDA tensor"); 111 | AT_ASSERTM(targets.type().is_cuda(), "targets must be a CUDA tensor"); 112 | AT_ASSERTM(logits.dim() == 2, "logits should be NxClass"); 113 | 114 | const int num_samples = logits.size(0); 115 | 116 | auto losses = at::empty({num_samples, logits.size(1)}, logits.options()); 117 | auto losses_size = num_samples * logits.size(1); 118 | cudaStream_t stream = at::cuda::getCurrentCUDAStream(); 119 | 120 | dim3 grid(std::min(THCCeilDiv((long)losses_size, 512L), 4096L)); 121 | 122 | dim3 block(512); 123 | 124 | if (losses.numel() == 0) { 125 | THCudaCheck(cudaGetLastError()); 126 | return losses; 127 | } 128 | 129 | AT_DISPATCH_FLOATING_TYPES(logits.type(), "SigmoidFocalLoss_forward", [&] { 130 | SigmoidFocalLossForward<<>>( 131 | losses_size, 132 | logits.contiguous().data(), 133 | targets.contiguous().data(), 134 | num_classes, 135 | gamma, 136 | alpha, 137 | num_samples, 138 | losses.data()); 139 | }); 140 | THCudaCheck(cudaGetLastError()); 141 | return losses; 142 | } 143 | 144 | 145 | at::Tensor SigmoidFocalLoss_backward_cuda( 146 | const at::Tensor& logits, 147 | const at::Tensor& targets, 148 | const at::Tensor& d_losses, 149 | const int num_classes, 150 | const float gamma, 151 | const float alpha) { 152 | AT_ASSERTM(logits.type().is_cuda(), "logits must be a CUDA tensor"); 153 | AT_ASSERTM(targets.type().is_cuda(), "targets must be a CUDA tensor"); 154 | AT_ASSERTM(d_losses.type().is_cuda(), "d_losses must be a CUDA tensor"); 155 | 156 | AT_ASSERTM(logits.dim() == 2, "logits should be NxClass"); 157 | 158 | const int num_samples = logits.size(0); 159 | AT_ASSERTM(logits.size(1) == num_classes, "logits.size(1) should be num_classes"); 160 | 161 | auto d_logits = at::zeros({num_samples, num_classes}, logits.options()); 162 | auto d_logits_size = num_samples * logits.size(1); 163 | cudaStream_t stream = at::cuda::getCurrentCUDAStream(); 164 | 165 | dim3 grid(std::min(THCCeilDiv((long)d_logits_size, 512L), 4096L)); 166 | dim3 block(512); 167 | 168 | if (d_logits.numel() == 0) { 169 | THCudaCheck(cudaGetLastError()); 170 | return d_logits; 171 | } 172 | 173 | AT_DISPATCH_FLOATING_TYPES(logits.type(), "SigmoidFocalLoss_backward", [&] { 174 | SigmoidFocalLossBackward<<>>( 175 | d_logits_size, 176 | logits.contiguous().data(), 177 | targets.contiguous().data(), 178 | d_losses.contiguous().data(), 179 | num_classes, 180 | gamma, 181 | alpha, 182 | num_samples, 183 | d_logits.data()); 184 | }); 185 | 186 | THCudaCheck(cudaGetLastError()); 187 | return d_logits; 188 | } 189 | 190 | -------------------------------------------------------------------------------- /dynamic_rcnn/kernels/cuda/deform_pool_cuda.cu: -------------------------------------------------------------------------------- 1 | // modify from 2 | // https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/blob/mmdetection/mmdet/ops/dcn/src/modulated_dcn_cuda.c 3 | 4 | // based on 5 | // author: Charles Shang 6 | // https://github.com/torch/cunn/blob/master/lib/THCUNN/generic/SpatialConvolutionMM.cu 7 | 8 | #include 9 | #include 10 | 11 | #include 12 | #include 13 | 14 | #include 15 | #include 16 | #include 17 | 18 | 19 | void DeformablePSROIPoolForward( 20 | const at::Tensor data, const at::Tensor bbox, const at::Tensor trans, 21 | at::Tensor out, at::Tensor top_count, const int batch, const int channels, 22 | const int height, const int width, const int num_bbox, 23 | const int channels_trans, const int no_trans, const float spatial_scale, 24 | const int output_dim, const int group_size, const int pooled_size, 25 | const int part_size, const int sample_per_part, const float trans_std); 26 | 27 | void DeformablePSROIPoolBackwardAcc( 28 | const at::Tensor out_grad, const at::Tensor data, const at::Tensor bbox, 29 | const at::Tensor trans, const at::Tensor top_count, at::Tensor in_grad, 30 | at::Tensor trans_grad, const int batch, const int channels, 31 | const int height, const int width, const int num_bbox, 32 | const int channels_trans, const int no_trans, const float spatial_scale, 33 | const int output_dim, const int group_size, const int pooled_size, 34 | const int part_size, const int sample_per_part, const float trans_std); 35 | 36 | void deform_psroi_pooling_cuda_forward( 37 | at::Tensor input, at::Tensor bbox, at::Tensor trans, at::Tensor out, 38 | at::Tensor top_count, const int no_trans, const float spatial_scale, 39 | const int output_dim, const int group_size, const int pooled_size, 40 | const int part_size, const int sample_per_part, const float trans_std) 41 | { 42 | AT_CHECK(input.is_contiguous(), "input tensor has to be contiguous"); 43 | 44 | const int batch = input.size(0); 45 | const int channels = input.size(1); 46 | const int height = input.size(2); 47 | const int width = input.size(3); 48 | const int channels_trans = no_trans ? 2 : trans.size(1); 49 | 50 | const int num_bbox = bbox.size(0); 51 | if (num_bbox != out.size(0)) 52 | AT_ERROR("Output shape and bbox number wont match: (%d vs %d).", 53 | out.size(0), num_bbox); 54 | 55 | DeformablePSROIPoolForward( 56 | input, bbox, trans, out, top_count, batch, channels, height, width, 57 | num_bbox, channels_trans, no_trans, spatial_scale, output_dim, group_size, 58 | pooled_size, part_size, sample_per_part, trans_std); 59 | } 60 | 61 | void deform_psroi_pooling_cuda_backward( 62 | at::Tensor out_grad, at::Tensor input, at::Tensor bbox, at::Tensor trans, 63 | at::Tensor top_count, at::Tensor input_grad, at::Tensor trans_grad, 64 | const int no_trans, const float spatial_scale, const int output_dim, 65 | const int group_size, const int pooled_size, const int part_size, 66 | const int sample_per_part, const float trans_std) 67 | { 68 | AT_CHECK(out_grad.is_contiguous(), "out_grad tensor has to be contiguous"); 69 | AT_CHECK(input.is_contiguous(), "input tensor has to be contiguous"); 70 | 71 | const int batch = input.size(0); 72 | const int channels = input.size(1); 73 | const int height = input.size(2); 74 | const int width = input.size(3); 75 | const int channels_trans = no_trans ? 2 : trans.size(1); 76 | 77 | const int num_bbox = bbox.size(0); 78 | if (num_bbox != out_grad.size(0)) 79 | AT_ERROR("Output shape and bbox number wont match: (%d vs %d).", 80 | out_grad.size(0), num_bbox); 81 | 82 | DeformablePSROIPoolBackwardAcc( 83 | out_grad, input, bbox, trans, top_count, input_grad, trans_grad, batch, 84 | channels, height, width, num_bbox, channels_trans, no_trans, 85 | spatial_scale, output_dim, group_size, pooled_size, part_size, 86 | sample_per_part, trans_std); 87 | } 88 | -------------------------------------------------------------------------------- /dynamic_rcnn/kernels/cuda/nms.cu: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #include 3 | #include 4 | 5 | #include 6 | #include 7 | 8 | #include 9 | #include 10 | 11 | int const threadsPerBlock = sizeof(unsigned long long) * 8; 12 | 13 | __device__ inline float devIoU(float const * const a, float const * const b) { 14 | float left = max(a[0], b[0]), right = min(a[2], b[2]); 15 | float top = max(a[1], b[1]), bottom = min(a[3], b[3]); 16 | float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f); 17 | float interS = width * height; 18 | float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1); 19 | float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1); 20 | return interS / (Sa + Sb - interS); 21 | } 22 | 23 | __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh, 24 | const float *dev_boxes, unsigned long long *dev_mask) { 25 | const int row_start = blockIdx.y; 26 | const int col_start = blockIdx.x; 27 | 28 | // if (row_start > col_start) return; 29 | 30 | const int row_size = 31 | min(n_boxes - row_start * threadsPerBlock, threadsPerBlock); 32 | const int col_size = 33 | min(n_boxes - col_start * threadsPerBlock, threadsPerBlock); 34 | 35 | __shared__ float block_boxes[threadsPerBlock * 5]; 36 | if (threadIdx.x < col_size) { 37 | block_boxes[threadIdx.x * 5 + 0] = 38 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0]; 39 | block_boxes[threadIdx.x * 5 + 1] = 40 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1]; 41 | block_boxes[threadIdx.x * 5 + 2] = 42 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2]; 43 | block_boxes[threadIdx.x * 5 + 3] = 44 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3]; 45 | block_boxes[threadIdx.x * 5 + 4] = 46 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4]; 47 | } 48 | __syncthreads(); 49 | 50 | if (threadIdx.x < row_size) { 51 | const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x; 52 | const float *cur_box = dev_boxes + cur_box_idx * 5; 53 | int i = 0; 54 | unsigned long long t = 0; 55 | int start = 0; 56 | if (row_start == col_start) { 57 | start = threadIdx.x + 1; 58 | } 59 | for (i = start; i < col_size; i++) { 60 | if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) { 61 | t |= 1ULL << i; 62 | } 63 | } 64 | const int col_blocks = THCCeilDiv(n_boxes, threadsPerBlock); 65 | dev_mask[cur_box_idx * col_blocks + col_start] = t; 66 | } 67 | } 68 | 69 | // boxes is a N x 5 tensor 70 | at::Tensor nms_cuda(const at::Tensor boxes, float nms_overlap_thresh) { 71 | using scalar_t = float; 72 | AT_ASSERTM(boxes.type().is_cuda(), "boxes must be a CUDA tensor"); 73 | auto scores = boxes.select(1, 4); 74 | auto order_t = std::get<1>(scores.sort(0, /* descending=*/true)); 75 | auto boxes_sorted = boxes.index_select(0, order_t); 76 | 77 | int boxes_num = boxes.size(0); 78 | 79 | const int col_blocks = THCCeilDiv(boxes_num, threadsPerBlock); 80 | 81 | scalar_t* boxes_dev = boxes_sorted.data(); 82 | 83 | THCState *state = at::globalContext().lazyInitCUDA(); // TODO replace with getTHCState 84 | 85 | unsigned long long* mask_dev = NULL; 86 | //THCudaCheck(THCudaMalloc(state, (void**) &mask_dev, 87 | // boxes_num * col_blocks * sizeof(unsigned long long))); 88 | 89 | mask_dev = (unsigned long long*) THCudaMalloc(state, boxes_num * col_blocks * sizeof(unsigned long long)); 90 | 91 | dim3 blocks(THCCeilDiv(boxes_num, threadsPerBlock), 92 | THCCeilDiv(boxes_num, threadsPerBlock)); 93 | dim3 threads(threadsPerBlock); 94 | nms_kernel<<>>(boxes_num, 95 | nms_overlap_thresh, 96 | boxes_dev, 97 | mask_dev); 98 | 99 | std::vector mask_host(boxes_num * col_blocks); 100 | THCudaCheck(cudaMemcpy(&mask_host[0], 101 | mask_dev, 102 | sizeof(unsigned long long) * boxes_num * col_blocks, 103 | cudaMemcpyDeviceToHost)); 104 | 105 | std::vector remv(col_blocks); 106 | memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks); 107 | 108 | at::Tensor keep = at::empty({boxes_num}, boxes.options().dtype(at::kLong).device(at::kCPU)); 109 | int64_t* keep_out = keep.data(); 110 | 111 | int num_to_keep = 0; 112 | for (int i = 0; i < boxes_num; i++) { 113 | int nblock = i / threadsPerBlock; 114 | int inblock = i % threadsPerBlock; 115 | 116 | if (!(remv[nblock] & (1ULL << inblock))) { 117 | keep_out[num_to_keep++] = i; 118 | unsigned long long *p = &mask_host[0] + i * col_blocks; 119 | for (int j = nblock; j < col_blocks; j++) { 120 | remv[j] |= p[j]; 121 | } 122 | } 123 | } 124 | 125 | THCudaFree(state, mask_dev); 126 | // TODO improve this part 127 | return std::get<0>(order_t.index({ 128 | keep.narrow(/*dim=*/0, /*start=*/0, /*length=*/num_to_keep).to( 129 | order_t.device(), keep.scalar_type()) 130 | }).sort(0, false)); 131 | } 132 | -------------------------------------------------------------------------------- /dynamic_rcnn/kernels/cuda/vision.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #pragma once 3 | #include 4 | 5 | 6 | at::Tensor SigmoidFocalLoss_forward_cuda( 7 | const at::Tensor& logits, 8 | const at::Tensor& targets, 9 | const int num_classes, 10 | const float gamma, 11 | const float alpha); 12 | 13 | at::Tensor SigmoidFocalLoss_backward_cuda( 14 | const at::Tensor& logits, 15 | const at::Tensor& targets, 16 | const at::Tensor& d_losses, 17 | const int num_classes, 18 | const float gamma, 19 | const float alpha); 20 | 21 | at::Tensor ROIAlign_forward_cuda(const at::Tensor& input, 22 | const at::Tensor& rois, 23 | const float spatial_scale, 24 | const int pooled_height, 25 | const int pooled_width, 26 | const int sampling_ratio); 27 | 28 | at::Tensor ROIAlign_backward_cuda(const at::Tensor& grad, 29 | const at::Tensor& rois, 30 | const float spatial_scale, 31 | const int pooled_height, 32 | const int pooled_width, 33 | const int batch_size, 34 | const int channels, 35 | const int height, 36 | const int width, 37 | const int sampling_ratio); 38 | 39 | 40 | std::tuple ROIPool_forward_cuda(const at::Tensor& input, 41 | const at::Tensor& rois, 42 | const float spatial_scale, 43 | const int pooled_height, 44 | const int pooled_width); 45 | 46 | at::Tensor ROIPool_backward_cuda(const at::Tensor& grad, 47 | const at::Tensor& input, 48 | const at::Tensor& rois, 49 | const at::Tensor& argmax, 50 | const float spatial_scale, 51 | const int pooled_height, 52 | const int pooled_width, 53 | const int batch_size, 54 | const int channels, 55 | const int height, 56 | const int width); 57 | 58 | at::Tensor nms_cuda(const at::Tensor boxes, float nms_overlap_thresh); 59 | 60 | 61 | int deform_conv_forward_cuda(at::Tensor input, at::Tensor weight, 62 | at::Tensor offset, at::Tensor output, 63 | at::Tensor columns, at::Tensor ones, int kW, 64 | int kH, int dW, int dH, int padW, int padH, 65 | int dilationW, int dilationH, int group, 66 | int deformable_group, int im2col_step); 67 | 68 | int deform_conv_backward_input_cuda(at::Tensor input, at::Tensor offset, 69 | at::Tensor gradOutput, at::Tensor gradInput, 70 | at::Tensor gradOffset, at::Tensor weight, 71 | at::Tensor columns, int kW, int kH, int dW, 72 | int dH, int padW, int padH, int dilationW, 73 | int dilationH, int group, 74 | int deformable_group, int im2col_step); 75 | 76 | int deform_conv_backward_parameters_cuda( 77 | at::Tensor input, at::Tensor offset, at::Tensor gradOutput, 78 | at::Tensor gradWeight, // at::Tensor gradBias, 79 | at::Tensor columns, at::Tensor ones, int kW, int kH, int dW, int dH, 80 | int padW, int padH, int dilationW, int dilationH, int group, 81 | int deformable_group, float scale, int im2col_step); 82 | 83 | void modulated_deform_conv_cuda_forward( 84 | at::Tensor input, at::Tensor weight, at::Tensor bias, at::Tensor ones, 85 | at::Tensor offset, at::Tensor mask, at::Tensor output, at::Tensor columns, 86 | int kernel_h, int kernel_w, const int stride_h, const int stride_w, 87 | const int pad_h, const int pad_w, const int dilation_h, 88 | const int dilation_w, const int group, const int deformable_group, 89 | const bool with_bias); 90 | 91 | void modulated_deform_conv_cuda_backward( 92 | at::Tensor input, at::Tensor weight, at::Tensor bias, at::Tensor ones, 93 | at::Tensor offset, at::Tensor mask, at::Tensor columns, 94 | at::Tensor grad_input, at::Tensor grad_weight, at::Tensor grad_bias, 95 | at::Tensor grad_offset, at::Tensor grad_mask, at::Tensor grad_output, 96 | int kernel_h, int kernel_w, int stride_h, int stride_w, int pad_h, 97 | int pad_w, int dilation_h, int dilation_w, int group, int deformable_group, 98 | const bool with_bias); 99 | 100 | void deform_psroi_pooling_cuda_forward( 101 | at::Tensor input, at::Tensor bbox, at::Tensor trans, at::Tensor out, 102 | at::Tensor top_count, const int no_trans, const float spatial_scale, 103 | const int output_dim, const int group_size, const int pooled_size, 104 | const int part_size, const int sample_per_part, const float trans_std); 105 | 106 | void deform_psroi_pooling_cuda_backward( 107 | at::Tensor out_grad, at::Tensor input, at::Tensor bbox, at::Tensor trans, 108 | at::Tensor top_count, at::Tensor input_grad, at::Tensor trans_grad, 109 | const int no_trans, const float spatial_scale, const int output_dim, 110 | const int group_size, const int pooled_size, const int part_size, 111 | const int sample_per_part, const float trans_std); 112 | 113 | 114 | at::Tensor compute_flow_cuda(const at::Tensor& boxes, 115 | const int height, 116 | const int width); 117 | -------------------------------------------------------------------------------- /dynamic_rcnn/kernels/deform_conv.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #pragma once 3 | #include "cpu/vision.h" 4 | 5 | #ifdef WITH_CUDA 6 | #include "cuda/vision.h" 7 | #endif 8 | 9 | 10 | // Interface for Python 11 | int deform_conv_forward( 12 | at::Tensor input, 13 | at::Tensor weight, 14 | at::Tensor offset, 15 | at::Tensor output, 16 | at::Tensor columns, 17 | at::Tensor ones, 18 | int kW, 19 | int kH, 20 | int dW, 21 | int dH, 22 | int padW, 23 | int padH, 24 | int dilationW, 25 | int dilationH, 26 | int group, 27 | int deformable_group, 28 | int im2col_step) 29 | { 30 | if (input.type().is_cuda()) { 31 | #ifdef WITH_CUDA 32 | return deform_conv_forward_cuda( 33 | input, weight, offset, output, columns, ones, 34 | kW, kH, dW, dH, padW, padH, dilationW, dilationH, 35 | group, deformable_group, im2col_step 36 | ); 37 | #else 38 | AT_ERROR("Not compiled with GPU support"); 39 | #endif 40 | } 41 | AT_ERROR("Not implemented on the CPU"); 42 | } 43 | 44 | 45 | int deform_conv_backward_input( 46 | at::Tensor input, 47 | at::Tensor offset, 48 | at::Tensor gradOutput, 49 | at::Tensor gradInput, 50 | at::Tensor gradOffset, 51 | at::Tensor weight, 52 | at::Tensor columns, 53 | int kW, 54 | int kH, 55 | int dW, 56 | int dH, 57 | int padW, 58 | int padH, 59 | int dilationW, 60 | int dilationH, 61 | int group, 62 | int deformable_group, 63 | int im2col_step) 64 | { 65 | if (input.type().is_cuda()) { 66 | #ifdef WITH_CUDA 67 | return deform_conv_backward_input_cuda( 68 | input, offset, gradOutput, gradInput, gradOffset, weight, columns, 69 | kW, kH, dW, dH, padW, padH, dilationW, dilationH, 70 | group, deformable_group, im2col_step 71 | ); 72 | #else 73 | AT_ERROR("Not compiled with GPU support"); 74 | #endif 75 | } 76 | AT_ERROR("Not implemented on the CPU"); 77 | } 78 | 79 | 80 | int deform_conv_backward_parameters( 81 | at::Tensor input, 82 | at::Tensor offset, 83 | at::Tensor gradOutput, 84 | at::Tensor gradWeight, // at::Tensor gradBias, 85 | at::Tensor columns, 86 | at::Tensor ones, 87 | int kW, 88 | int kH, 89 | int dW, 90 | int dH, 91 | int padW, 92 | int padH, 93 | int dilationW, 94 | int dilationH, 95 | int group, 96 | int deformable_group, 97 | float scale, 98 | int im2col_step) 99 | { 100 | if (input.type().is_cuda()) { 101 | #ifdef WITH_CUDA 102 | return deform_conv_backward_parameters_cuda( 103 | input, offset, gradOutput, gradWeight, columns, ones, 104 | kW, kH, dW, dH, padW, padH, dilationW, dilationH, 105 | group, deformable_group, scale, im2col_step 106 | ); 107 | #else 108 | AT_ERROR("Not compiled with GPU support"); 109 | #endif 110 | } 111 | AT_ERROR("Not implemented on the CPU"); 112 | } 113 | 114 | 115 | void modulated_deform_conv_forward( 116 | at::Tensor input, 117 | at::Tensor weight, 118 | at::Tensor bias, 119 | at::Tensor ones, 120 | at::Tensor offset, 121 | at::Tensor mask, 122 | at::Tensor output, 123 | at::Tensor columns, 124 | int kernel_h, 125 | int kernel_w, 126 | const int stride_h, 127 | const int stride_w, 128 | const int pad_h, 129 | const int pad_w, 130 | const int dilation_h, 131 | const int dilation_w, 132 | const int group, 133 | const int deformable_group, 134 | const bool with_bias) 135 | { 136 | if (input.type().is_cuda()) { 137 | #ifdef WITH_CUDA 138 | return modulated_deform_conv_cuda_forward( 139 | input, weight, bias, ones, offset, mask, output, columns, 140 | kernel_h, kernel_w, stride_h, stride_w, 141 | pad_h, pad_w, dilation_h, dilation_w, 142 | group, deformable_group, with_bias 143 | ); 144 | #else 145 | AT_ERROR("Not compiled with GPU support"); 146 | #endif 147 | } 148 | AT_ERROR("Not implemented on the CPU"); 149 | } 150 | 151 | 152 | void modulated_deform_conv_backward( 153 | at::Tensor input, 154 | at::Tensor weight, 155 | at::Tensor bias, 156 | at::Tensor ones, 157 | at::Tensor offset, 158 | at::Tensor mask, 159 | at::Tensor columns, 160 | at::Tensor grad_input, 161 | at::Tensor grad_weight, 162 | at::Tensor grad_bias, 163 | at::Tensor grad_offset, 164 | at::Tensor grad_mask, 165 | at::Tensor grad_output, 166 | int kernel_h, 167 | int kernel_w, 168 | int stride_h, 169 | int stride_w, 170 | int pad_h, 171 | int pad_w, 172 | int dilation_h, 173 | int dilation_w, 174 | int group, 175 | int deformable_group, 176 | const bool with_bias) 177 | { 178 | if (input.type().is_cuda()) { 179 | #ifdef WITH_CUDA 180 | return modulated_deform_conv_cuda_backward( 181 | input, weight, bias, ones, offset, mask, columns, 182 | grad_input, grad_weight, grad_bias, grad_offset, grad_mask, grad_output, 183 | kernel_h, kernel_w, stride_h, stride_w, pad_h, pad_w, dilation_h, dilation_w, 184 | group, deformable_group, with_bias 185 | ); 186 | #else 187 | AT_ERROR("Not compiled with GPU support"); 188 | #endif 189 | } 190 | AT_ERROR("Not implemented on the CPU"); 191 | } -------------------------------------------------------------------------------- /dynamic_rcnn/kernels/deform_pool.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #pragma once 3 | #include "cpu/vision.h" 4 | 5 | #ifdef WITH_CUDA 6 | #include "cuda/vision.h" 7 | #endif 8 | 9 | 10 | // Interface for Python 11 | void deform_psroi_pooling_forward( 12 | at::Tensor input, 13 | at::Tensor bbox, 14 | at::Tensor trans, 15 | at::Tensor out, 16 | at::Tensor top_count, 17 | const int no_trans, 18 | const float spatial_scale, 19 | const int output_dim, 20 | const int group_size, 21 | const int pooled_size, 22 | const int part_size, 23 | const int sample_per_part, 24 | const float trans_std) 25 | { 26 | if (input.type().is_cuda()) { 27 | #ifdef WITH_CUDA 28 | return deform_psroi_pooling_cuda_forward( 29 | input, bbox, trans, out, top_count, 30 | no_trans, spatial_scale, output_dim, group_size, 31 | pooled_size, part_size, sample_per_part, trans_std 32 | ); 33 | #else 34 | AT_ERROR("Not compiled with GPU support"); 35 | #endif 36 | } 37 | AT_ERROR("Not implemented on the CPU"); 38 | } 39 | 40 | 41 | void deform_psroi_pooling_backward( 42 | at::Tensor out_grad, 43 | at::Tensor input, 44 | at::Tensor bbox, 45 | at::Tensor trans, 46 | at::Tensor top_count, 47 | at::Tensor input_grad, 48 | at::Tensor trans_grad, 49 | const int no_trans, 50 | const float spatial_scale, 51 | const int output_dim, 52 | const int group_size, 53 | const int pooled_size, 54 | const int part_size, 55 | const int sample_per_part, 56 | const float trans_std) 57 | { 58 | if (input.type().is_cuda()) { 59 | #ifdef WITH_CUDA 60 | return deform_psroi_pooling_cuda_backward( 61 | out_grad, input, bbox, trans, top_count, input_grad, trans_grad, 62 | no_trans, spatial_scale, output_dim, group_size, pooled_size, 63 | part_size, sample_per_part, trans_std 64 | ); 65 | #else 66 | AT_ERROR("Not compiled with GPU support"); 67 | #endif 68 | } 69 | AT_ERROR("Not implemented on the CPU"); 70 | } 71 | -------------------------------------------------------------------------------- /dynamic_rcnn/kernels/nms.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #pragma once 3 | #include "cpu/vision.h" 4 | 5 | #ifdef WITH_CUDA 6 | #include "cuda/vision.h" 7 | #endif 8 | 9 | 10 | at::Tensor nms(const at::Tensor& dets, 11 | const at::Tensor& scores, 12 | const float threshold) { 13 | 14 | if (dets.type().is_cuda()) { 15 | #ifdef WITH_CUDA 16 | // TODO raise error if not compiled with CUDA 17 | if (dets.numel() == 0) 18 | return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU)); 19 | auto b = at::cat({dets, scores.unsqueeze(1)}, 1); 20 | return nms_cuda(b, threshold); 21 | #else 22 | AT_ERROR("Not compiled with GPU support"); 23 | #endif 24 | } 25 | 26 | at::Tensor result = nms_cpu(dets, scores, threshold); 27 | return result; 28 | } 29 | -------------------------------------------------------------------------------- /dynamic_rcnn/kernels/ops/dcn/__init__.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copied From [mmdetection](https://github.com/open-mmlab/mmdetection/tree/master/mmdet/ops/dcn) 3 | # 4 | 5 | from .deform_conv_func import deform_conv, modulated_deform_conv 6 | from .deform_conv_module import DeformConv, ModulatedDeformConv, \ 7 | ModulatedDeformConvPack 8 | from .deform_pool_func import deform_roi_pooling 9 | from .deform_pool_module import DeformRoIPooling, DeformRoIPoolingPack, \ 10 | ModulatedDeformRoIPoolingPack 11 | 12 | __all__ = [ 13 | 'deform_conv', 14 | 'modulated_deform_conv', 15 | 'DeformConv', 16 | 'ModulatedDeformConv', 17 | 'ModulatedDeformConvPack', 18 | 'deform_roi_pooling', 19 | 'DeformRoIPooling', 20 | 'DeformRoIPoolingPack', 21 | 'ModulatedDeformRoIPoolingPack', 22 | ] 23 | -------------------------------------------------------------------------------- /dynamic_rcnn/kernels/ops/dcn/deform_conv_module.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | import torch 4 | import torch.nn as nn 5 | from torch.nn.modules.utils import _pair 6 | 7 | from .deform_conv_func import deform_conv, modulated_deform_conv 8 | 9 | 10 | class DeformConv(nn.Module): 11 | 12 | def __init__( 13 | self, 14 | in_channels, 15 | out_channels, 16 | kernel_size, 17 | stride=1, 18 | padding=0, 19 | dilation=1, 20 | groups=1, 21 | deformable_groups=1, 22 | bias=False 23 | ): 24 | assert not bias 25 | super(DeformConv, self).__init__() 26 | self.with_bias = bias 27 | 28 | assert in_channels % groups == 0, \ 29 | 'in_channels {} cannot be divisible by groups {}'.format( 30 | in_channels, groups) 31 | assert out_channels % groups == 0, \ 32 | 'out_channels {} cannot be divisible by groups {}'.format( 33 | out_channels, groups) 34 | self.in_channels = in_channels 35 | self.out_channels = out_channels 36 | self.kernel_size = _pair(kernel_size) 37 | self.stride = _pair(stride) 38 | self.padding = _pair(padding) 39 | self.dilation = _pair(dilation) 40 | self.groups = groups 41 | self.deformable_groups = deformable_groups 42 | 43 | self.weight = nn.Parameter( 44 | torch.Tensor(out_channels, in_channels // self.groups, 45 | *self.kernel_size)) 46 | 47 | self.reset_parameters() 48 | 49 | def reset_parameters(self): 50 | n = self.in_channels 51 | for k in self.kernel_size: 52 | n *= k 53 | stdv = 1. / math.sqrt(n) 54 | self.weight.data.uniform_(-stdv, stdv) 55 | 56 | def forward(self, input, offset): 57 | return deform_conv(input, offset, self.weight, self.stride, 58 | self.padding, self.dilation, self.groups, 59 | self.deformable_groups) 60 | 61 | def __repr__(self): 62 | return "".join([ 63 | "{}(".format(self.__class__.__name__), 64 | "in_channels={}, ".format(self.in_channels), 65 | "out_channels={}, ".format(self.out_channels), 66 | "kernel_size={}, ".format(self.kernel_size), 67 | "stride={}, ".format(self.stride), 68 | "dilation={}, ".format(self.dilation), 69 | "padding={}, ".format(self.padding), 70 | "groups={}, ".format(self.groups), 71 | "deformable_groups={}, ".format(self.deformable_groups), 72 | "bias={})".format(self.with_bias), 73 | ]) 74 | 75 | 76 | class ModulatedDeformConv(nn.Module): 77 | 78 | def __init__( 79 | self, 80 | in_channels, 81 | out_channels, 82 | kernel_size, 83 | stride=1, 84 | padding=0, 85 | dilation=1, 86 | groups=1, 87 | deformable_groups=1, 88 | bias=True 89 | ): 90 | super(ModulatedDeformConv, self).__init__() 91 | self.in_channels = in_channels 92 | self.out_channels = out_channels 93 | self.kernel_size = _pair(kernel_size) 94 | self.stride = stride 95 | self.padding = padding 96 | self.dilation = dilation 97 | self.groups = groups 98 | self.deformable_groups = deformable_groups 99 | self.with_bias = bias 100 | 101 | self.weight = nn.Parameter(torch.Tensor( 102 | out_channels, 103 | in_channels // groups, 104 | *self.kernel_size 105 | )) 106 | if bias: 107 | self.bias = nn.Parameter(torch.Tensor(out_channels)) 108 | else: 109 | self.register_parameter('bias', None) 110 | self.reset_parameters() 111 | 112 | def reset_parameters(self): 113 | n = self.in_channels 114 | for k in self.kernel_size: 115 | n *= k 116 | stdv = 1. / math.sqrt(n) 117 | self.weight.data.uniform_(-stdv, stdv) 118 | if self.bias is not None: 119 | self.bias.data.zero_() 120 | 121 | def forward(self, input, offset, mask): 122 | return modulated_deform_conv( 123 | input, offset, mask, self.weight, self.bias, self.stride, 124 | self.padding, self.dilation, self.groups, self.deformable_groups) 125 | 126 | def __repr__(self): 127 | return "".join([ 128 | "{}(".format(self.__class__.__name__), 129 | "in_channels={}, ".format(self.in_channels), 130 | "out_channels={}, ".format(self.out_channels), 131 | "kernel_size={}, ".format(self.kernel_size), 132 | "stride={}, ".format(self.stride), 133 | "dilation={}, ".format(self.dilation), 134 | "padding={}, ".format(self.padding), 135 | "groups={}, ".format(self.groups), 136 | "deformable_groups={}, ".format(self.deformable_groups), 137 | "bias={})".format(self.with_bias), 138 | ]) 139 | 140 | class ModulatedDeformConvPack(ModulatedDeformConv): 141 | 142 | def __init__(self, 143 | in_channels, 144 | out_channels, 145 | kernel_size, 146 | stride=1, 147 | padding=0, 148 | dilation=1, 149 | groups=1, 150 | deformable_groups=1, 151 | bias=True): 152 | super(ModulatedDeformConvPack, self).__init__( 153 | in_channels, out_channels, kernel_size, stride, padding, dilation, 154 | groups, deformable_groups, bias) 155 | 156 | self.conv_offset_mask = nn.Conv2d( 157 | self.in_channels // self.groups, 158 | self.deformable_groups * 3 * self.kernel_size[0] * 159 | self.kernel_size[1], 160 | kernel_size=self.kernel_size, 161 | stride=_pair(self.stride), 162 | padding=_pair(self.padding), 163 | bias=True) 164 | self.init_offset() 165 | 166 | def init_offset(self): 167 | self.conv_offset_mask.weight.data.zero_() 168 | self.conv_offset_mask.bias.data.zero_() 169 | 170 | def forward(self, input): 171 | out = self.conv_offset_mask(input) 172 | o1, o2, mask = torch.chunk(out, 3, dim=1) 173 | offset = torch.cat((o1, o2), dim=1) 174 | mask = torch.sigmoid(mask) 175 | return modulated_deform_conv( 176 | input, offset, mask, self.weight, self.bias, self.stride, 177 | self.padding, self.dilation, self.groups, self.deformable_groups) 178 | -------------------------------------------------------------------------------- /dynamic_rcnn/kernels/ops/dcn/deform_pool_func.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Function 3 | from torch.autograd.function import once_differentiable 4 | 5 | from dynamic_rcnn import _C 6 | 7 | 8 | class DeformRoIPoolingFunction(Function): 9 | 10 | @staticmethod 11 | def forward( 12 | ctx, 13 | data, 14 | rois, 15 | offset, 16 | spatial_scale, 17 | out_size, 18 | out_channels, 19 | no_trans, 20 | group_size=1, 21 | part_size=None, 22 | sample_per_part=4, 23 | trans_std=.0 24 | ): 25 | ctx.spatial_scale = spatial_scale 26 | ctx.out_size = out_size 27 | ctx.out_channels = out_channels 28 | ctx.no_trans = no_trans 29 | ctx.group_size = group_size 30 | ctx.part_size = out_size if part_size is None else part_size 31 | ctx.sample_per_part = sample_per_part 32 | ctx.trans_std = trans_std 33 | 34 | assert 0.0 <= ctx.trans_std <= 1.0 35 | if not data.is_cuda: 36 | raise NotImplementedError 37 | 38 | n = rois.shape[0] 39 | output = data.new_empty(n, out_channels, out_size, out_size) 40 | output_count = data.new_empty(n, out_channels, out_size, out_size) 41 | _C.deform_psroi_pooling_forward( 42 | data, 43 | rois, 44 | offset, 45 | output, 46 | output_count, 47 | ctx.no_trans, 48 | ctx.spatial_scale, 49 | ctx.out_channels, 50 | ctx.group_size, 51 | ctx.out_size, 52 | ctx.part_size, 53 | ctx.sample_per_part, 54 | ctx.trans_std 55 | ) 56 | 57 | if data.requires_grad or rois.requires_grad or offset.requires_grad: 58 | ctx.save_for_backward(data, rois, offset) 59 | ctx.output_count = output_count 60 | 61 | return output 62 | 63 | @staticmethod 64 | @once_differentiable 65 | def backward(ctx, grad_output): 66 | if not grad_output.is_cuda: 67 | raise NotImplementedError 68 | 69 | data, rois, offset = ctx.saved_tensors 70 | output_count = ctx.output_count 71 | grad_input = torch.zeros_like(data) 72 | grad_rois = None 73 | grad_offset = torch.zeros_like(offset) 74 | 75 | _C.deform_psroi_pooling_backward( 76 | grad_output, 77 | data, 78 | rois, 79 | offset, 80 | output_count, 81 | grad_input, 82 | grad_offset, 83 | ctx.no_trans, 84 | ctx.spatial_scale, 85 | ctx.out_channels, 86 | ctx.group_size, 87 | ctx.out_size, 88 | ctx.part_size, 89 | ctx.sample_per_part, 90 | ctx.trans_std 91 | ) 92 | return (grad_input, grad_rois, grad_offset, None, None, None, None, None, None, None, None) 93 | 94 | 95 | deform_roi_pooling = DeformRoIPoolingFunction.apply 96 | -------------------------------------------------------------------------------- /dynamic_rcnn/kernels/ops/dcn/deform_pool_module.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | 3 | from .deform_pool_func import deform_roi_pooling 4 | 5 | 6 | class DeformRoIPooling(nn.Module): 7 | 8 | def __init__(self, 9 | spatial_scale, 10 | out_size, 11 | out_channels, 12 | no_trans, 13 | group_size=1, 14 | part_size=None, 15 | sample_per_part=4, 16 | trans_std=.0): 17 | super(DeformRoIPooling, self).__init__() 18 | self.spatial_scale = spatial_scale 19 | self.out_size = out_size 20 | self.out_channels = out_channels 21 | self.no_trans = no_trans 22 | self.group_size = group_size 23 | self.part_size = out_size if part_size is None else part_size 24 | self.sample_per_part = sample_per_part 25 | self.trans_std = trans_std 26 | 27 | def forward(self, data, rois, offset): 28 | if self.no_trans: 29 | offset = data.new_empty(0) 30 | return deform_roi_pooling( 31 | data, rois, offset, self.spatial_scale, self.out_size, 32 | self.out_channels, self.no_trans, self.group_size, self.part_size, 33 | self.sample_per_part, self.trans_std) 34 | 35 | 36 | class DeformRoIPoolingPack(DeformRoIPooling): 37 | 38 | def __init__(self, 39 | spatial_scale, 40 | out_size, 41 | out_channels, 42 | no_trans, 43 | group_size=1, 44 | part_size=None, 45 | sample_per_part=4, 46 | trans_std=.0, 47 | deform_fc_channels=1024): 48 | super(DeformRoIPoolingPack, 49 | self).__init__(spatial_scale, out_size, out_channels, no_trans, 50 | group_size, part_size, sample_per_part, trans_std) 51 | 52 | self.deform_fc_channels = deform_fc_channels 53 | 54 | if not no_trans: 55 | self.offset_fc = nn.Sequential( 56 | nn.Linear(self.out_size * self.out_size * self.out_channels, 57 | self.deform_fc_channels), 58 | nn.ReLU(inplace=True), 59 | nn.Linear(self.deform_fc_channels, self.deform_fc_channels), 60 | nn.ReLU(inplace=True), 61 | nn.Linear(self.deform_fc_channels, 62 | self.out_size * self.out_size * 2)) 63 | self.offset_fc[-1].weight.data.zero_() 64 | self.offset_fc[-1].bias.data.zero_() 65 | 66 | def forward(self, data, rois): 67 | assert data.size(1) == self.out_channels 68 | if self.no_trans: 69 | offset = data.new_empty(0) 70 | return deform_roi_pooling( 71 | data, rois, offset, self.spatial_scale, self.out_size, 72 | self.out_channels, self.no_trans, self.group_size, 73 | self.part_size, self.sample_per_part, self.trans_std) 74 | else: 75 | n = rois.shape[0] 76 | offset = data.new_empty(0) 77 | x = deform_roi_pooling(data, rois, offset, self.spatial_scale, 78 | self.out_size, self.out_channels, True, 79 | self.group_size, self.part_size, 80 | self.sample_per_part, self.trans_std) 81 | offset = self.offset_fc(x.view(n, -1)) 82 | offset = offset.view(n, 2, self.out_size, self.out_size) 83 | return deform_roi_pooling( 84 | data, rois, offset, self.spatial_scale, self.out_size, 85 | self.out_channels, self.no_trans, self.group_size, 86 | self.part_size, self.sample_per_part, self.trans_std) 87 | 88 | 89 | class ModulatedDeformRoIPoolingPack(DeformRoIPooling): 90 | 91 | def __init__(self, 92 | spatial_scale, 93 | out_size, 94 | out_channels, 95 | no_trans, 96 | group_size=1, 97 | part_size=None, 98 | sample_per_part=4, 99 | trans_std=.0, 100 | deform_fc_channels=1024): 101 | super(ModulatedDeformRoIPoolingPack, self).__init__( 102 | spatial_scale, out_size, out_channels, no_trans, group_size, 103 | part_size, sample_per_part, trans_std) 104 | 105 | self.deform_fc_channels = deform_fc_channels 106 | 107 | if not no_trans: 108 | self.offset_fc = nn.Sequential( 109 | nn.Linear(self.out_size * self.out_size * self.out_channels, 110 | self.deform_fc_channels), 111 | nn.ReLU(inplace=True), 112 | nn.Linear(self.deform_fc_channels, self.deform_fc_channels), 113 | nn.ReLU(inplace=True), 114 | nn.Linear(self.deform_fc_channels, 115 | self.out_size * self.out_size * 2)) 116 | self.offset_fc[-1].weight.data.zero_() 117 | self.offset_fc[-1].bias.data.zero_() 118 | self.mask_fc = nn.Sequential( 119 | nn.Linear(self.out_size * self.out_size * self.out_channels, 120 | self.deform_fc_channels), 121 | nn.ReLU(inplace=True), 122 | nn.Linear(self.deform_fc_channels, 123 | self.out_size * self.out_size * 1), 124 | nn.Sigmoid()) 125 | self.mask_fc[2].weight.data.zero_() 126 | self.mask_fc[2].bias.data.zero_() 127 | 128 | def forward(self, data, rois): 129 | assert data.size(1) == self.out_channels 130 | if self.no_trans: 131 | offset = data.new_empty(0) 132 | return deform_roi_pooling( 133 | data, rois, offset, self.spatial_scale, self.out_size, 134 | self.out_channels, self.no_trans, self.group_size, 135 | self.part_size, self.sample_per_part, self.trans_std) 136 | else: 137 | n = rois.shape[0] 138 | offset = data.new_empty(0) 139 | x = deform_roi_pooling(data, rois, offset, self.spatial_scale, 140 | self.out_size, self.out_channels, True, 141 | self.group_size, self.part_size, 142 | self.sample_per_part, self.trans_std) 143 | offset = self.offset_fc(x.view(n, -1)) 144 | offset = offset.view(n, 2, self.out_size, self.out_size) 145 | mask = self.mask_fc(x.view(n, -1)) 146 | mask = mask.view(n, 1, self.out_size, self.out_size) 147 | return deform_roi_pooling( 148 | data, rois, offset, self.spatial_scale, self.out_size, 149 | self.out_channels, self.no_trans, self.group_size, 150 | self.part_size, self.sample_per_part, self.trans_std) * mask 151 | -------------------------------------------------------------------------------- /dynamic_rcnn/kernels/ops/nms.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | # from ._utils import _C 3 | from dynamic_rcnn import _C 4 | 5 | nms = _C.nms 6 | 7 | # nms.__doc__ = """ 8 | # This function performs Non-maximum suppresion""" 9 | -------------------------------------------------------------------------------- /dynamic_rcnn/kernels/ops/roi_align.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | from torch import nn 4 | from torch.autograd import Function 5 | from torch.autograd.function import once_differentiable 6 | from torch.nn.modules.utils import _pair 7 | 8 | from dynamic_rcnn import _C 9 | 10 | 11 | class _ROIAlign(Function): 12 | @staticmethod 13 | def forward(ctx, input, roi, output_size, spatial_scale, sampling_ratio): 14 | ctx.save_for_backward(roi) 15 | ctx.output_size = _pair(output_size) 16 | ctx.spatial_scale = spatial_scale 17 | ctx.sampling_ratio = sampling_ratio 18 | ctx.input_shape = input.size() 19 | output = _C.roi_align_forward( 20 | input, roi, spatial_scale, output_size[0], output_size[1], sampling_ratio 21 | ) 22 | return output 23 | 24 | @staticmethod 25 | @once_differentiable 26 | def backward(ctx, grad_output): 27 | rois, = ctx.saved_tensors 28 | output_size = ctx.output_size 29 | spatial_scale = ctx.spatial_scale 30 | sampling_ratio = ctx.sampling_ratio 31 | bs, ch, h, w = ctx.input_shape 32 | grad_input = _C.roi_align_backward( 33 | grad_output, 34 | rois, 35 | spatial_scale, 36 | output_size[0], 37 | output_size[1], 38 | bs, 39 | ch, 40 | h, 41 | w, 42 | sampling_ratio, 43 | ) 44 | return grad_input, None, None, None, None 45 | 46 | 47 | roi_align = _ROIAlign.apply 48 | 49 | class ROIAlign(nn.Module): 50 | def __init__(self, output_size, spatial_scale, sampling_ratio): 51 | super(ROIAlign, self).__init__() 52 | self.output_size = output_size 53 | self.spatial_scale = spatial_scale 54 | self.sampling_ratio = sampling_ratio 55 | 56 | def forward(self, input, rois): 57 | return roi_align( 58 | input, rois, self.output_size, self.spatial_scale, self.sampling_ratio 59 | ) 60 | 61 | def __repr__(self): 62 | tmpstr = self.__class__.__name__ + "(" 63 | tmpstr += "output_size=" + str(self.output_size) 64 | tmpstr += ", spatial_scale=" + str(self.spatial_scale) 65 | tmpstr += ", sampling_ratio=" + str(self.sampling_ratio) 66 | tmpstr += ")" 67 | return tmpstr 68 | -------------------------------------------------------------------------------- /dynamic_rcnn/kernels/ops/roi_pool.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | from torch import nn 4 | from torch.autograd import Function 5 | from torch.autograd.function import once_differentiable 6 | from torch.nn.modules.utils import _pair 7 | 8 | from dynamic_rcnn import _C 9 | 10 | 11 | class _ROIPool(Function): 12 | @staticmethod 13 | def forward(ctx, input, roi, output_size, spatial_scale): 14 | ctx.output_size = _pair(output_size) 15 | ctx.spatial_scale = spatial_scale 16 | ctx.input_shape = input.size() 17 | output, argmax = _C.roi_pool_forward( 18 | input, roi, spatial_scale, output_size[0], output_size[1] 19 | ) 20 | ctx.save_for_backward(input, roi, argmax) 21 | return output 22 | 23 | @staticmethod 24 | @once_differentiable 25 | def backward(ctx, grad_output): 26 | input, rois, argmax = ctx.saved_tensors 27 | output_size = ctx.output_size 28 | spatial_scale = ctx.spatial_scale 29 | bs, ch, h, w = ctx.input_shape 30 | grad_input = _C.roi_pool_backward( 31 | grad_output, 32 | input, 33 | rois, 34 | argmax, 35 | spatial_scale, 36 | output_size[0], 37 | output_size[1], 38 | bs, 39 | ch, 40 | h, 41 | w, 42 | ) 43 | return grad_input, None, None, None 44 | 45 | 46 | roi_pool = _ROIPool.apply 47 | 48 | 49 | class ROIPool(nn.Module): 50 | def __init__(self, output_size, spatial_scale): 51 | super(ROIPool, self).__init__() 52 | self.output_size = output_size 53 | self.spatial_scale = spatial_scale 54 | 55 | def forward(self, input, rois): 56 | return roi_pool(input, rois, self.output_size, self.spatial_scale) 57 | 58 | def __repr__(self): 59 | tmpstr = self.__class__.__name__ + "(" 60 | tmpstr += "output_size=" + str(self.output_size) 61 | tmpstr += ", spatial_scale=" + str(self.spatial_scale) 62 | tmpstr += ")" 63 | return tmpstr 64 | -------------------------------------------------------------------------------- /dynamic_rcnn/kernels/vision.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #include "nms.h" 3 | #include "ROIAlign.h" 4 | #include "ROIPool.h" 5 | #include "SigmoidFocalLoss.h" 6 | #include "deform_conv.h" 7 | #include "deform_pool.h" 8 | 9 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 10 | m.def("nms", &nms, "non-maximum suppression"); 11 | m.def("roi_align_forward", &ROIAlign_forward, "ROIAlign_forward"); 12 | m.def("roi_align_backward", &ROIAlign_backward, "ROIAlign_backward"); 13 | m.def("roi_pool_forward", &ROIPool_forward, "ROIPool_forward"); 14 | m.def("roi_pool_backward", &ROIPool_backward, "ROIPool_backward"); 15 | m.def("sigmoid_focalloss_forward", &SigmoidFocalLoss_forward, "SigmoidFocalLoss_forward"); 16 | m.def("sigmoid_focalloss_backward", &SigmoidFocalLoss_backward, "SigmoidFocalLoss_backward"); 17 | // dcn-v2 18 | m.def("deform_conv_forward", &deform_conv_forward, "deform_conv_forward"); 19 | m.def("deform_conv_backward_input", &deform_conv_backward_input, "deform_conv_backward_input"); 20 | m.def("deform_conv_backward_parameters", &deform_conv_backward_parameters, "deform_conv_backward_parameters"); 21 | m.def("modulated_deform_conv_forward", &modulated_deform_conv_forward, "modulated_deform_conv_forward"); 22 | m.def("modulated_deform_conv_backward", &modulated_deform_conv_backward, "modulated_deform_conv_backward"); 23 | m.def("deform_psroi_pooling_forward", &deform_psroi_pooling_forward, "deform_psroi_pooling_forward"); 24 | m.def("deform_psroi_pooling_backward", &deform_psroi_pooling_backward, "deform_psroi_pooling_backward"); 25 | } -------------------------------------------------------------------------------- /dynamic_rcnn/utils/logger.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import logging 3 | import os 4 | import sys 5 | 6 | 7 | def setup_logger(name, save_dir, distributed_rank, filename="log.txt"): 8 | logger = logging.getLogger(name) 9 | logger.setLevel(logging.DEBUG) 10 | # don't log results for the non-master process 11 | if distributed_rank > 0: 12 | return logger 13 | ch = logging.StreamHandler(stream=sys.stdout) 14 | ch.setLevel(logging.DEBUG) 15 | formatter = logging.Formatter( 16 | "%(asctime)s %(name)s %(levelname)s: %(message)s") 17 | ch.setFormatter(formatter) 18 | logger.addHandler(ch) 19 | 20 | if save_dir: 21 | fh = logging.FileHandler(os.path.join(save_dir, filename)) 22 | fh.setLevel(logging.DEBUG) 23 | fh.setFormatter(formatter) 24 | logger.addHandler(fh) 25 | 26 | return logger 27 | -------------------------------------------------------------------------------- /dynamic_rcnn/utils/metric_logger.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from collections import defaultdict 3 | from collections import deque 4 | 5 | import torch 6 | 7 | 8 | class SmoothedValue(object): 9 | """Track a series of values and provide access to smoothed values over a 10 | window or the global series average. 11 | """ 12 | 13 | def __init__(self, window_size=20): 14 | self.deque = deque(maxlen=window_size) 15 | self.series = [] 16 | self.total = 0.0 17 | self.count = 0 18 | 19 | def update(self, value): 20 | self.deque.append(value) 21 | self.series.append(value) 22 | self.count += 1 23 | self.total += value 24 | 25 | @property 26 | def median(self): 27 | d = torch.tensor(list(self.deque)) 28 | return d.median().item() 29 | 30 | @property 31 | def avg(self): 32 | d = torch.tensor(list(self.deque)) 33 | return d.mean().item() 34 | 35 | @property 36 | def global_avg(self): 37 | return self.total / self.count 38 | 39 | 40 | class MetricLogger(object): 41 | def __init__(self, delimiter="\t"): 42 | self.meters = defaultdict(SmoothedValue) 43 | self.delimiter = delimiter 44 | 45 | def update(self, **kwargs): 46 | for k, v in kwargs.items(): 47 | if isinstance(v, torch.Tensor): 48 | v = v.item() 49 | assert isinstance(v, (float, int)) 50 | self.meters[k].update(v) 51 | 52 | def __getattr__(self, attr): 53 | if attr in self.meters: 54 | return self.meters[attr] 55 | if attr in self.__dict__: 56 | return self.__dict__[attr] 57 | raise AttributeError("'{}' object has no attribute '{}'".format( 58 | type(self).__name__, attr)) 59 | 60 | def __str__(self): 61 | loss_str = [] 62 | for name, meter in self.meters.items(): 63 | loss_str.append( 64 | "{}: {:.4f} ({:.4f})".format(name, meter.median, meter.global_avg) 65 | ) 66 | return self.delimiter.join(loss_str) 67 | -------------------------------------------------------------------------------- /dynamic_rcnn/utils/pyt_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import errno 3 | import os 4 | import cv2 5 | 6 | 7 | def mkdir(path): 8 | try: 9 | os.makedirs(path) 10 | except OSError as e: 11 | if e.errno != errno.EEXIST: 12 | raise 13 | 14 | 15 | def link_file(src, target): 16 | """symbol link the source directories to target.""" 17 | if os.path.isdir(target) or os.path.isfile(target): 18 | os.remove(target) 19 | os.system('ln -s {} {}'.format(src, target)) 20 | 21 | 22 | def findContours(*args, **kwargs): 23 | """ 24 | Wraps cv2.findContours to maintain compatiblity between versions 25 | 3 and 4 26 | 27 | Returns: 28 | contours, hierarchy 29 | """ 30 | if cv2.__version__.startswith('4'): 31 | contours, hierarchy = cv2.findContours(*args, **kwargs) 32 | elif cv2.__version__.startswith('3'): 33 | _, contours, hierarchy = cv2.findContours(*args, **kwargs) 34 | else: 35 | raise AssertionError( 36 | 'cv2 must be either version 3 or 4 to call this method') 37 | 38 | return contours, hierarchy 39 | 40 | 41 | def draw_box(image, box, label, color=(0, 0, 255), score=None, linewidth=2): 42 | """Draw a bounding box with label on the image.""" 43 | if score is not None: 44 | text = "{}: {:.4f}".format(label, score) 45 | else: 46 | text = str(label) 47 | 48 | cv2.rectangle(image, (int(box[0]), int(box[1])), 49 | (int(box[2]), int(box[3])), color, linewidth) 50 | cx = box[0] + (box[2] - box[0]) / 2 - 5 51 | cy = box[1] + 12 52 | cv2.putText(image, text, (int(cx), int(cy)), 53 | cv2.FONT_HERSHEY_DUPLEX, 0.5, (255, 255, 255)) -------------------------------------------------------------------------------- /dynamic_rcnn/utils/registry.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | 3 | 4 | def _register_generic(module_dict, module_name, module): 5 | assert module_name not in module_dict 6 | module_dict[module_name] = module 7 | 8 | 9 | class Registry(dict): 10 | ''' 11 | A helper class for managing registering modules, it extends a dictionary 12 | and provides a register functions. 13 | 14 | Eg. creeting a registry: 15 | some_registry = Registry({"default": default_module}) 16 | 17 | There're two ways of registering new modules: 18 | 1): normal way is just calling register function: 19 | def foo(): 20 | ... 21 | some_registry.register("foo_module", foo) 22 | 2): used as decorator when declaring the module: 23 | @some_registry.register("foo_module") 24 | @some_registry.register("foo_modeul_nickname") 25 | def foo(): 26 | ... 27 | 28 | Access of module is just like using a dictionary, eg: 29 | f = some_registry["foo_modeul"] 30 | ''' 31 | def __init__(self, *args, **kwargs): 32 | super(Registry, self).__init__(*args, **kwargs) 33 | 34 | def register(self, module_name, module=None): 35 | # used as function call 36 | if module is not None: 37 | _register_generic(self, module_name, module) 38 | return 39 | 40 | # used as decorator 41 | def register_fn(fn): 42 | _register_generic(self, module_name, fn) 43 | return fn 44 | 45 | return register_fn 46 | -------------------------------------------------------------------------------- /dynamic_rcnn/utils/torch_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | """ 3 | Miscellaneous utility functions 4 | """ 5 | 6 | import torch 7 | 8 | 9 | def cat(tensors, dim=0): 10 | """ 11 | Efficient version of torch.cat that avoids a copy if there is only a single element in a list 12 | """ 13 | assert isinstance(tensors, (list, tuple)) 14 | if len(tensors) == 1: 15 | return tensors[0] 16 | return torch.cat(tensors, dim) 17 | 18 | 19 | def permute_and_flatten(layer, N, A, C, H, W): 20 | layer = layer.view(N, -1, C, H, W) 21 | layer = layer.permute(0, 3, 4, 1, 2) 22 | layer = layer.reshape(N, -1, C) 23 | return layer 24 | 25 | 26 | def concat_box_prediction_layers(box_cls, box_regression): 27 | box_cls_flattened = [] 28 | box_regression_flattened = [] 29 | # for each feature level, permute the outputs to make them be in the 30 | # same format as the labels. Note that the labels are computed for 31 | # all feature levels concatenated, so we keep the same representation 32 | # for the objectness and the box_regression 33 | for box_cls_per_level, box_regression_per_level in zip( 34 | box_cls, box_regression 35 | ): 36 | N, AxC, H, W = box_cls_per_level.shape 37 | Ax4 = box_regression_per_level.shape[1] 38 | A = Ax4 // 4 39 | C = AxC // A 40 | box_cls_per_level = permute_and_flatten( 41 | box_cls_per_level, N, A, C, H, W 42 | ) 43 | box_cls_flattened.append(box_cls_per_level) 44 | 45 | box_regression_per_level = permute_and_flatten( 46 | box_regression_per_level, N, A, 4, H, W 47 | ) 48 | box_regression_flattened.append(box_regression_per_level) 49 | # concatenate on the first dimension (representing the feature levels), to 50 | # take into account the way the labels were generated (with all feature maps 51 | # being concatenated as well) 52 | box_cls = cat(box_cls_flattened, dim=1).reshape(-1, C) 53 | box_regression = cat(box_regression_flattened, dim=1).reshape(-1, 4) 54 | return box_cls, box_regression 55 | -------------------------------------------------------------------------------- /models/zhanghongkai/dynamic_rcnn/coco/dynamic_rcnn_r101_dcnv2_fpn_mstrain_3x/dataset.py: -------------------------------------------------------------------------------- 1 | from config import config as cfg 2 | 3 | import torch.utils.data 4 | from dynamic_rcnn.datasets.coco import COCODataset 5 | from dynamic_rcnn.datasets.concat_dataset import ConcatDataset 6 | from dynamic_rcnn.datasets.transforms import build_transforms 7 | from dynamic_rcnn.datasets import samplers 8 | from dynamic_rcnn.datasets.collate_batch import BatchCollator, BBoxAugCollator 9 | 10 | 11 | def make_data_loader( 12 | num_gpus, is_train=True, is_distributed=False, start_iter=0, 13 | return_raw=False): 14 | # If bbox aug is enabled in testing, simply set transforms to None and we will apply transforms later 15 | transforms = None if not is_train and cfg.TEST.BBOX_AUG.ENABLED else \ 16 | build_transforms(cfg, is_train) 17 | images_per_gpu = cfg.SOLVER.IMS_PER_GPU if is_train else cfg.TEST.IMS_PER_GPU 18 | images_per_batch = images_per_gpu * num_gpus 19 | 20 | if is_train: 21 | shuffle = True 22 | num_iters = cfg.SOLVER.MAX_ITER 23 | # scale, only suppose images_per_batch < SOLVER.IMS_PER_BATCH 24 | if images_per_batch < cfg.SOLVER.IMS_PER_BATCH: 25 | assert cfg.SOLVER.IMS_PER_BATCH % images_per_batch == 0 26 | num_iters *= (cfg.SOLVER.IMS_PER_BATCH // images_per_batch) 27 | else: 28 | shuffle = False if not is_distributed else True 29 | num_iters = None 30 | start_iter = 0 31 | 32 | # group images which have similar aspect ratio. In this case, we only 33 | # group in two cases: those with width / height > 1, and the other way around, 34 | # but the code supports more general grouping strategy 35 | aspect_grouping = [1] if cfg.DATALOADER.ASPECT_RATIO_GROUPING else [] 36 | dataset_list = cfg.DATASETS.TRAIN if is_train else cfg.DATASETS.TEST 37 | datasets = [] 38 | for d_key, d_val in dataset_list.items(): 39 | dataset = COCODataset( 40 | d_val['ann_file'], d_val['img_dir'], 41 | remove_images_without_annotations=is_train, 42 | transforms=transforms, return_raw=return_raw) 43 | datasets.append(dataset) 44 | dataset = datasets[0] if len(datasets) == 1 else ConcatDataset(datasets) 45 | 46 | # make data sampler 47 | if is_distributed: 48 | sampler = samplers.DistributedSampler(dataset, shuffle=shuffle) 49 | elif shuffle: 50 | sampler = torch.utils.data.sampler.RandomSampler(dataset) 51 | else: 52 | sampler = torch.utils.data.sampler.SequentialSampler(dataset) 53 | 54 | # make batch data sampler 55 | if aspect_grouping: 56 | if not isinstance(aspect_grouping, (list, tuple)): 57 | aspect_grouping = [aspect_grouping] 58 | batch_sampler = samplers.GroupedBatchSampler( 59 | sampler, dataset, aspect_grouping, images_per_gpu, 60 | drop_uneven=False) 61 | else: 62 | batch_sampler = torch.utils.data.sampler.BatchSampler( 63 | sampler, images_per_gpu, drop_last=False) 64 | 65 | if num_iters is not None: 66 | batch_sampler = samplers.IterationBasedBatchSampler( 67 | batch_sampler, num_iters, start_iter) 68 | 69 | collator = BBoxAugCollator() if not is_train and cfg.TEST.BBOX_AUG.ENABLED \ 70 | else BatchCollator(cfg.DATALOADER.SIZE_DIVISIBILITY, 71 | return_raw=return_raw) 72 | data_loader = torch.utils.data.DataLoader( 73 | dataset, 74 | num_workers=cfg.DATALOADER.NUM_WORKERS, 75 | batch_sampler=batch_sampler, 76 | collate_fn=collator 77 | ) 78 | if not is_train: 79 | data_loader = [data_loader] 80 | return data_loader 81 | -------------------------------------------------------------------------------- /models/zhanghongkai/dynamic_rcnn/coco/dynamic_rcnn_r101_fpn_1x/dataset.py: -------------------------------------------------------------------------------- 1 | from config import config as cfg 2 | 3 | import torch.utils.data 4 | from dynamic_rcnn.datasets.coco import COCODataset 5 | from dynamic_rcnn.datasets.concat_dataset import ConcatDataset 6 | from dynamic_rcnn.datasets.transforms import build_transforms 7 | from dynamic_rcnn.datasets import samplers 8 | from dynamic_rcnn.datasets.collate_batch import BatchCollator, BBoxAugCollator 9 | 10 | 11 | def make_data_loader( 12 | num_gpus, is_train=True, is_distributed=False, start_iter=0, 13 | return_raw=False): 14 | # If bbox aug is enabled in testing, simply set transforms to None and we will apply transforms later 15 | transforms = None if not is_train and cfg.TEST.BBOX_AUG.ENABLED else \ 16 | build_transforms(cfg, is_train) 17 | images_per_gpu = cfg.SOLVER.IMS_PER_GPU if is_train else cfg.TEST.IMS_PER_GPU 18 | images_per_batch = images_per_gpu * num_gpus 19 | 20 | if is_train: 21 | shuffle = True 22 | num_iters = cfg.SOLVER.MAX_ITER 23 | # scale, only suppose images_per_batch < SOLVER.IMS_PER_BATCH 24 | if images_per_batch < cfg.SOLVER.IMS_PER_BATCH: 25 | assert cfg.SOLVER.IMS_PER_BATCH % images_per_batch == 0 26 | num_iters *= (cfg.SOLVER.IMS_PER_BATCH // images_per_batch) 27 | else: 28 | shuffle = False if not is_distributed else True 29 | num_iters = None 30 | start_iter = 0 31 | 32 | # group images which have similar aspect ratio. In this case, we only 33 | # group in two cases: those with width / height > 1, and the other way around, 34 | # but the code supports more general grouping strategy 35 | aspect_grouping = [1] if cfg.DATALOADER.ASPECT_RATIO_GROUPING else [] 36 | dataset_list = cfg.DATASETS.TRAIN if is_train else cfg.DATASETS.TEST 37 | datasets = [] 38 | for d_key, d_val in dataset_list.items(): 39 | dataset = COCODataset( 40 | d_val['ann_file'], d_val['img_dir'], 41 | remove_images_without_annotations=is_train, 42 | transforms=transforms, return_raw=return_raw) 43 | datasets.append(dataset) 44 | dataset = datasets[0] if len(datasets) == 1 else ConcatDataset(datasets) 45 | 46 | # make data sampler 47 | if is_distributed: 48 | sampler = samplers.DistributedSampler(dataset, shuffle=shuffle) 49 | elif shuffle: 50 | sampler = torch.utils.data.sampler.RandomSampler(dataset) 51 | else: 52 | sampler = torch.utils.data.sampler.SequentialSampler(dataset) 53 | 54 | # make batch data sampler 55 | if aspect_grouping: 56 | if not isinstance(aspect_grouping, (list, tuple)): 57 | aspect_grouping = [aspect_grouping] 58 | batch_sampler = samplers.GroupedBatchSampler( 59 | sampler, dataset, aspect_grouping, images_per_gpu, 60 | drop_uneven=False) 61 | else: 62 | batch_sampler = torch.utils.data.sampler.BatchSampler( 63 | sampler, images_per_gpu, drop_last=False) 64 | 65 | if num_iters is not None: 66 | batch_sampler = samplers.IterationBasedBatchSampler( 67 | batch_sampler, num_iters, start_iter) 68 | 69 | collator = BBoxAugCollator() if not is_train and cfg.TEST.BBOX_AUG.ENABLED \ 70 | else BatchCollator(cfg.DATALOADER.SIZE_DIVISIBILITY, 71 | return_raw=return_raw) 72 | data_loader = torch.utils.data.DataLoader( 73 | dataset, 74 | num_workers=cfg.DATALOADER.NUM_WORKERS, 75 | batch_sampler=batch_sampler, 76 | collate_fn=collator 77 | ) 78 | if not is_train: 79 | data_loader = [data_loader] 80 | return data_loader 81 | -------------------------------------------------------------------------------- /models/zhanghongkai/dynamic_rcnn/coco/dynamic_rcnn_r101_fpn_2x/dataset.py: -------------------------------------------------------------------------------- 1 | from config import config as cfg 2 | 3 | import torch.utils.data 4 | from dynamic_rcnn.datasets.coco import COCODataset 5 | from dynamic_rcnn.datasets.concat_dataset import ConcatDataset 6 | from dynamic_rcnn.datasets.transforms import build_transforms 7 | from dynamic_rcnn.datasets import samplers 8 | from dynamic_rcnn.datasets.collate_batch import BatchCollator, BBoxAugCollator 9 | 10 | 11 | def make_data_loader( 12 | num_gpus, is_train=True, is_distributed=False, start_iter=0, 13 | return_raw=False): 14 | # If bbox aug is enabled in testing, simply set transforms to None and we will apply transforms later 15 | transforms = None if not is_train and cfg.TEST.BBOX_AUG.ENABLED else \ 16 | build_transforms(cfg, is_train) 17 | images_per_gpu = cfg.SOLVER.IMS_PER_GPU if is_train else cfg.TEST.IMS_PER_GPU 18 | images_per_batch = images_per_gpu * num_gpus 19 | 20 | if is_train: 21 | shuffle = True 22 | num_iters = cfg.SOLVER.MAX_ITER 23 | # scale, only suppose images_per_batch < SOLVER.IMS_PER_BATCH 24 | if images_per_batch < cfg.SOLVER.IMS_PER_BATCH: 25 | assert cfg.SOLVER.IMS_PER_BATCH % images_per_batch == 0 26 | num_iters *= (cfg.SOLVER.IMS_PER_BATCH // images_per_batch) 27 | else: 28 | shuffle = False if not is_distributed else True 29 | num_iters = None 30 | start_iter = 0 31 | 32 | # group images which have similar aspect ratio. In this case, we only 33 | # group in two cases: those with width / height > 1, and the other way around, 34 | # but the code supports more general grouping strategy 35 | aspect_grouping = [1] if cfg.DATALOADER.ASPECT_RATIO_GROUPING else [] 36 | dataset_list = cfg.DATASETS.TRAIN if is_train else cfg.DATASETS.TEST 37 | datasets = [] 38 | for d_key, d_val in dataset_list.items(): 39 | dataset = COCODataset( 40 | d_val['ann_file'], d_val['img_dir'], 41 | remove_images_without_annotations=is_train, 42 | transforms=transforms, return_raw=return_raw) 43 | datasets.append(dataset) 44 | dataset = datasets[0] if len(datasets) == 1 else ConcatDataset(datasets) 45 | 46 | # make data sampler 47 | if is_distributed: 48 | sampler = samplers.DistributedSampler(dataset, shuffle=shuffle) 49 | elif shuffle: 50 | sampler = torch.utils.data.sampler.RandomSampler(dataset) 51 | else: 52 | sampler = torch.utils.data.sampler.SequentialSampler(dataset) 53 | 54 | # make batch data sampler 55 | if aspect_grouping: 56 | if not isinstance(aspect_grouping, (list, tuple)): 57 | aspect_grouping = [aspect_grouping] 58 | batch_sampler = samplers.GroupedBatchSampler( 59 | sampler, dataset, aspect_grouping, images_per_gpu, 60 | drop_uneven=False) 61 | else: 62 | batch_sampler = torch.utils.data.sampler.BatchSampler( 63 | sampler, images_per_gpu, drop_last=False) 64 | 65 | if num_iters is not None: 66 | batch_sampler = samplers.IterationBasedBatchSampler( 67 | batch_sampler, num_iters, start_iter) 68 | 69 | collator = BBoxAugCollator() if not is_train and cfg.TEST.BBOX_AUG.ENABLED \ 70 | else BatchCollator(cfg.DATALOADER.SIZE_DIVISIBILITY, 71 | return_raw=return_raw) 72 | data_loader = torch.utils.data.DataLoader( 73 | dataset, 74 | num_workers=cfg.DATALOADER.NUM_WORKERS, 75 | batch_sampler=batch_sampler, 76 | collate_fn=collator 77 | ) 78 | if not is_train: 79 | data_loader = [data_loader] 80 | return data_loader 81 | -------------------------------------------------------------------------------- /models/zhanghongkai/dynamic_rcnn/coco/dynamic_rcnn_r101_fpn_mstrain_3x/dataset.py: -------------------------------------------------------------------------------- 1 | from config import config as cfg 2 | 3 | import torch.utils.data 4 | from dynamic_rcnn.datasets.coco import COCODataset 5 | from dynamic_rcnn.datasets.concat_dataset import ConcatDataset 6 | from dynamic_rcnn.datasets.transforms import build_transforms 7 | from dynamic_rcnn.datasets import samplers 8 | from dynamic_rcnn.datasets.collate_batch import BatchCollator, BBoxAugCollator 9 | 10 | 11 | def make_data_loader( 12 | num_gpus, is_train=True, is_distributed=False, start_iter=0, 13 | return_raw=False): 14 | # If bbox aug is enabled in testing, simply set transforms to None and we will apply transforms later 15 | transforms = None if not is_train and cfg.TEST.BBOX_AUG.ENABLED else \ 16 | build_transforms(cfg, is_train) 17 | images_per_gpu = cfg.SOLVER.IMS_PER_GPU if is_train else cfg.TEST.IMS_PER_GPU 18 | images_per_batch = images_per_gpu * num_gpus 19 | 20 | if is_train: 21 | shuffle = True 22 | num_iters = cfg.SOLVER.MAX_ITER 23 | # scale, only suppose images_per_batch < SOLVER.IMS_PER_BATCH 24 | if images_per_batch < cfg.SOLVER.IMS_PER_BATCH: 25 | assert cfg.SOLVER.IMS_PER_BATCH % images_per_batch == 0 26 | num_iters *= (cfg.SOLVER.IMS_PER_BATCH // images_per_batch) 27 | else: 28 | shuffle = False if not is_distributed else True 29 | num_iters = None 30 | start_iter = 0 31 | 32 | # group images which have similar aspect ratio. In this case, we only 33 | # group in two cases: those with width / height > 1, and the other way around, 34 | # but the code supports more general grouping strategy 35 | aspect_grouping = [1] if cfg.DATALOADER.ASPECT_RATIO_GROUPING else [] 36 | dataset_list = cfg.DATASETS.TRAIN if is_train else cfg.DATASETS.TEST 37 | datasets = [] 38 | for d_key, d_val in dataset_list.items(): 39 | dataset = COCODataset( 40 | d_val['ann_file'], d_val['img_dir'], 41 | remove_images_without_annotations=is_train, 42 | transforms=transforms, return_raw=return_raw) 43 | datasets.append(dataset) 44 | dataset = datasets[0] if len(datasets) == 1 else ConcatDataset(datasets) 45 | 46 | # make data sampler 47 | if is_distributed: 48 | sampler = samplers.DistributedSampler(dataset, shuffle=shuffle) 49 | elif shuffle: 50 | sampler = torch.utils.data.sampler.RandomSampler(dataset) 51 | else: 52 | sampler = torch.utils.data.sampler.SequentialSampler(dataset) 53 | 54 | # make batch data sampler 55 | if aspect_grouping: 56 | if not isinstance(aspect_grouping, (list, tuple)): 57 | aspect_grouping = [aspect_grouping] 58 | batch_sampler = samplers.GroupedBatchSampler( 59 | sampler, dataset, aspect_grouping, images_per_gpu, 60 | drop_uneven=False) 61 | else: 62 | batch_sampler = torch.utils.data.sampler.BatchSampler( 63 | sampler, images_per_gpu, drop_last=False) 64 | 65 | if num_iters is not None: 66 | batch_sampler = samplers.IterationBasedBatchSampler( 67 | batch_sampler, num_iters, start_iter) 68 | 69 | collator = BBoxAugCollator() if not is_train and cfg.TEST.BBOX_AUG.ENABLED \ 70 | else BatchCollator(cfg.DATALOADER.SIZE_DIVISIBILITY, 71 | return_raw=return_raw) 72 | data_loader = torch.utils.data.DataLoader( 73 | dataset, 74 | num_workers=cfg.DATALOADER.NUM_WORKERS, 75 | batch_sampler=batch_sampler, 76 | collate_fn=collator 77 | ) 78 | if not is_train: 79 | data_loader = [data_loader] 80 | return data_loader 81 | -------------------------------------------------------------------------------- /models/zhanghongkai/dynamic_rcnn/coco/dynamic_rcnn_r50_fpn_1x/dataset.py: -------------------------------------------------------------------------------- 1 | from config import config as cfg 2 | 3 | import torch.utils.data 4 | from dynamic_rcnn.datasets.coco import COCODataset 5 | from dynamic_rcnn.datasets.concat_dataset import ConcatDataset 6 | from dynamic_rcnn.datasets.transforms import build_transforms 7 | from dynamic_rcnn.datasets import samplers 8 | from dynamic_rcnn.datasets.collate_batch import BatchCollator, BBoxAugCollator 9 | 10 | 11 | def make_data_loader( 12 | num_gpus, is_train=True, is_distributed=False, start_iter=0, 13 | return_raw=False): 14 | # If bbox aug is enabled in testing, simply set transforms to None and we will apply transforms later 15 | transforms = None if not is_train and cfg.TEST.BBOX_AUG.ENABLED else \ 16 | build_transforms(cfg, is_train) 17 | images_per_gpu = cfg.SOLVER.IMS_PER_GPU if is_train else cfg.TEST.IMS_PER_GPU 18 | images_per_batch = images_per_gpu * num_gpus 19 | 20 | if is_train: 21 | shuffle = True 22 | num_iters = cfg.SOLVER.MAX_ITER 23 | # scale, only suppose images_per_batch < SOLVER.IMS_PER_BATCH 24 | if images_per_batch < cfg.SOLVER.IMS_PER_BATCH: 25 | assert cfg.SOLVER.IMS_PER_BATCH % images_per_batch == 0 26 | num_iters *= (cfg.SOLVER.IMS_PER_BATCH // images_per_batch) 27 | else: 28 | shuffle = False if not is_distributed else True 29 | num_iters = None 30 | start_iter = 0 31 | 32 | # group images which have similar aspect ratio. In this case, we only 33 | # group in two cases: those with width / height > 1, and the other way around, 34 | # but the code supports more general grouping strategy 35 | aspect_grouping = [1] if cfg.DATALOADER.ASPECT_RATIO_GROUPING else [] 36 | dataset_list = cfg.DATASETS.TRAIN if is_train else cfg.DATASETS.TEST 37 | datasets = [] 38 | for d_key, d_val in dataset_list.items(): 39 | dataset = COCODataset( 40 | d_val['ann_file'], d_val['img_dir'], 41 | remove_images_without_annotations=is_train, 42 | transforms=transforms, return_raw=return_raw) 43 | datasets.append(dataset) 44 | dataset = datasets[0] if len(datasets) == 1 else ConcatDataset(datasets) 45 | 46 | # make data sampler 47 | if is_distributed: 48 | sampler = samplers.DistributedSampler(dataset, shuffle=shuffle) 49 | elif shuffle: 50 | sampler = torch.utils.data.sampler.RandomSampler(dataset) 51 | else: 52 | sampler = torch.utils.data.sampler.SequentialSampler(dataset) 53 | 54 | # make batch data sampler 55 | if aspect_grouping: 56 | if not isinstance(aspect_grouping, (list, tuple)): 57 | aspect_grouping = [aspect_grouping] 58 | batch_sampler = samplers.GroupedBatchSampler( 59 | sampler, dataset, aspect_grouping, images_per_gpu, 60 | drop_uneven=False) 61 | else: 62 | batch_sampler = torch.utils.data.sampler.BatchSampler( 63 | sampler, images_per_gpu, drop_last=False) 64 | 65 | if num_iters is not None: 66 | batch_sampler = samplers.IterationBasedBatchSampler( 67 | batch_sampler, num_iters, start_iter) 68 | 69 | collator = BBoxAugCollator() if not is_train and cfg.TEST.BBOX_AUG.ENABLED \ 70 | else BatchCollator(cfg.DATALOADER.SIZE_DIVISIBILITY, 71 | return_raw=return_raw) 72 | data_loader = torch.utils.data.DataLoader( 73 | dataset, 74 | num_workers=cfg.DATALOADER.NUM_WORKERS, 75 | batch_sampler=batch_sampler, 76 | collate_fn=collator 77 | ) 78 | if not is_train: 79 | data_loader = [data_loader] 80 | return data_loader 81 | -------------------------------------------------------------------------------- /models/zhanghongkai/dynamic_rcnn/coco/dynamic_rcnn_r50_fpn_2x/dataset.py: -------------------------------------------------------------------------------- 1 | from config import config as cfg 2 | 3 | import torch.utils.data 4 | from dynamic_rcnn.datasets.coco import COCODataset 5 | from dynamic_rcnn.datasets.concat_dataset import ConcatDataset 6 | from dynamic_rcnn.datasets.transforms import build_transforms 7 | from dynamic_rcnn.datasets import samplers 8 | from dynamic_rcnn.datasets.collate_batch import BatchCollator, BBoxAugCollator 9 | 10 | 11 | def make_data_loader( 12 | num_gpus, is_train=True, is_distributed=False, start_iter=0, 13 | return_raw=False): 14 | # If bbox aug is enabled in testing, simply set transforms to None and we will apply transforms later 15 | transforms = None if not is_train and cfg.TEST.BBOX_AUG.ENABLED else \ 16 | build_transforms(cfg, is_train) 17 | images_per_gpu = cfg.SOLVER.IMS_PER_GPU if is_train else cfg.TEST.IMS_PER_GPU 18 | images_per_batch = images_per_gpu * num_gpus 19 | 20 | if is_train: 21 | shuffle = True 22 | num_iters = cfg.SOLVER.MAX_ITER 23 | # scale, only suppose images_per_batch < SOLVER.IMS_PER_BATCH 24 | if images_per_batch < cfg.SOLVER.IMS_PER_BATCH: 25 | assert cfg.SOLVER.IMS_PER_BATCH % images_per_batch == 0 26 | num_iters *= (cfg.SOLVER.IMS_PER_BATCH // images_per_batch) 27 | else: 28 | shuffle = False if not is_distributed else True 29 | num_iters = None 30 | start_iter = 0 31 | 32 | # group images which have similar aspect ratio. In this case, we only 33 | # group in two cases: those with width / height > 1, and the other way around, 34 | # but the code supports more general grouping strategy 35 | aspect_grouping = [1] if cfg.DATALOADER.ASPECT_RATIO_GROUPING else [] 36 | dataset_list = cfg.DATASETS.TRAIN if is_train else cfg.DATASETS.TEST 37 | datasets = [] 38 | for d_key, d_val in dataset_list.items(): 39 | dataset = COCODataset( 40 | d_val['ann_file'], d_val['img_dir'], 41 | remove_images_without_annotations=is_train, 42 | transforms=transforms, return_raw=return_raw) 43 | datasets.append(dataset) 44 | dataset = datasets[0] if len(datasets) == 1 else ConcatDataset(datasets) 45 | 46 | # make data sampler 47 | if is_distributed: 48 | sampler = samplers.DistributedSampler(dataset, shuffle=shuffle) 49 | elif shuffle: 50 | sampler = torch.utils.data.sampler.RandomSampler(dataset) 51 | else: 52 | sampler = torch.utils.data.sampler.SequentialSampler(dataset) 53 | 54 | # make batch data sampler 55 | if aspect_grouping: 56 | if not isinstance(aspect_grouping, (list, tuple)): 57 | aspect_grouping = [aspect_grouping] 58 | batch_sampler = samplers.GroupedBatchSampler( 59 | sampler, dataset, aspect_grouping, images_per_gpu, 60 | drop_uneven=False) 61 | else: 62 | batch_sampler = torch.utils.data.sampler.BatchSampler( 63 | sampler, images_per_gpu, drop_last=False) 64 | 65 | if num_iters is not None: 66 | batch_sampler = samplers.IterationBasedBatchSampler( 67 | batch_sampler, num_iters, start_iter) 68 | 69 | collator = BBoxAugCollator() if not is_train and cfg.TEST.BBOX_AUG.ENABLED \ 70 | else BatchCollator(cfg.DATALOADER.SIZE_DIVISIBILITY, 71 | return_raw=return_raw) 72 | data_loader = torch.utils.data.DataLoader( 73 | dataset, 74 | num_workers=cfg.DATALOADER.NUM_WORKERS, 75 | batch_sampler=batch_sampler, 76 | collate_fn=collator 77 | ) 78 | if not is_train: 79 | data_loader = [data_loader] 80 | return data_loader 81 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | # !/usr/bin/env python 3 | 4 | import glob 5 | import os 6 | 7 | import torch 8 | from setuptools import find_packages 9 | from setuptools import setup 10 | from torch.utils.cpp_extension import CUDA_HOME 11 | from torch.utils.cpp_extension import CppExtension 12 | from torch.utils.cpp_extension import CUDAExtension 13 | 14 | requirements = [ 15 | "torch==1.0.1.post2", "torchvision==0.2.2.post3", "cython", "matplotlib", 16 | "tqdm", "easydict", "pycocotools", "opencv-python"] 17 | 18 | 19 | def get_extensions(): 20 | this_dir = os.path.dirname(os.path.abspath(__file__)) 21 | extensions_dir = os.path.join(this_dir, "dynamic_rcnn", "kernels") 22 | 23 | main_file = glob.glob(os.path.join(extensions_dir, "*.cpp")) 24 | source_cpu = glob.glob(os.path.join(extensions_dir, "cpu", "*.cpp")) 25 | source_cuda = glob.glob(os.path.join(extensions_dir, "cuda", "*.cu")) 26 | 27 | sources = main_file + source_cpu 28 | extension = CppExtension 29 | 30 | extra_compile_args = {"cxx": []} 31 | define_macros = [] 32 | 33 | if (torch.cuda.is_available() and CUDA_HOME is not None) or os.getenv( 34 | "FORCE_CUDA", "0") == "1": 35 | extension = CUDAExtension 36 | sources += source_cuda 37 | define_macros += [("WITH_CUDA", None)] 38 | extra_compile_args["nvcc"] = [ 39 | "-DCUDA_HAS_FP16=1", 40 | "-D__CUDA_NO_HALF_OPERATORS__", 41 | "-D__CUDA_NO_HALF_CONVERSIONS__", 42 | "-D__CUDA_NO_HALF2_OPERATORS__", 43 | ] 44 | 45 | sources = [os.path.join(extensions_dir, s) for s in sources] 46 | 47 | include_dirs = [extensions_dir] 48 | 49 | ext_modules = [ 50 | extension( 51 | "dynamic_rcnn._C", 52 | sources, 53 | include_dirs=include_dirs, 54 | define_macros=define_macros, 55 | extra_compile_args=extra_compile_args, 56 | ) 57 | ] 58 | 59 | return ext_modules 60 | 61 | 62 | setup( 63 | name="DynamicRCNN", 64 | version="0.1", 65 | author="fmassa, hkzhang95", 66 | url="https://github.com/hkzhang95/DynamicRCNN", 67 | description="object detection in pytorch", 68 | # packages=find_packages(exclude=("configs", "tests",)), 69 | install_requires=requirements, 70 | ext_modules=get_extensions(), 71 | cmdclass={"build_ext": torch.utils.cpp_extension.BuildExtension}, 72 | ) 73 | --------------------------------------------------------------------------------