├── .gitignore
├── LICENSE
├── README.md
├── dynamic_rcnn
    ├── basemodels
    │   ├── c2_model_loading.py
    │   └── resnet.py
    ├── datasets
    │   ├── __init__.py
    │   ├── coco.py
    │   ├── collate_batch.py
    │   ├── concat_dataset.py
    │   ├── evaluation
    │   │   ├── __init__.py
    │   │   ├── coco
    │   │   │   ├── __init__.py
    │   │   │   └── coco_eval.py
    │   │   └── voc
    │   │   │   ├── __init__.py
    │   │   │   └── voc_eval.py
    │   ├── samplers
    │   │   ├── __init__.py
    │   │   ├── distributed.py
    │   │   ├── grouped_batch_sampler.py
    │   │   └── iteration_based_batch_sampler.py
    │   ├── structures
    │   │   ├── __init__.py
    │   │   ├── bounding_box.py
    │   │   ├── boxlist_ops.py
    │   │   ├── image_list.py
    │   │   ├── keypoint.py
    │   │   └── segmentation_mask.py
    │   ├── transforms
    │   │   ├── __init__.py
    │   │   ├── build.py
    │   │   ├── coco_transforms.py
    │   │   └── transforms.py
    │   └── voc.py
    ├── det_opr
    │   ├── box_coder.py
    │   ├── fpn
    │   │   └── fpn.py
    │   ├── loss.py
    │   ├── matcher.py
    │   ├── poolers.py
    │   ├── rcnn
    │   │   ├── cascade_rcnn
    │   │   │   └── proposal_opr.py
    │   │   ├── mask_head
    │   │   │   ├── inference.py
    │   │   │   └── mask_target_opr.py
    │   │   ├── post_processing.py
    │   │   └── proposal_target_opr.py
    │   ├── rpn
    │   │   ├── anchor_generator.py
    │   │   ├── anchor_target_opr.py
    │   │   ├── fcos
    │   │   │   ├── fcos_target_opr.py
    │   │   │   ├── post_processing.py
    │   │   │   └── scale.py
    │   │   ├── proposal_opr.py
    │   │   └── retinanet
    │   │   │   ├── anchor_target_opr.py
    │   │   │   └── post_processing.py
    │   └── sampler.py
    ├── engine
    │   ├── __init__.py
    │   ├── bbox_aug.py
    │   ├── checkpoint.py
    │   ├── comm.py
    │   └── lr_scheduler.py
    ├── kernels
    │   ├── ROIAlign.h
    │   ├── ROIPool.h
    │   ├── SigmoidFocalLoss.h
    │   ├── cpu
    │   │   ├── ROIAlign_cpu.cpp
    │   │   ├── nms_cpu.cpp
    │   │   └── vision.h
    │   ├── cuda
    │   │   ├── ROIAlign_cuda.cu
    │   │   ├── ROIPool_cuda.cu
    │   │   ├── SigmoidFocalLoss_cuda.cu
    │   │   ├── deform_conv_cuda.cu
    │   │   ├── deform_conv_kernel_cuda.cu
    │   │   ├── deform_pool_cuda.cu
    │   │   ├── deform_pool_kernel_cuda.cu
    │   │   ├── nms.cu
    │   │   └── vision.h
    │   ├── deform_conv.h
    │   ├── deform_pool.h
    │   ├── nms.h
    │   ├── ops
    │   │   ├── dcn
    │   │   │   ├── __init__.py
    │   │   │   ├── deform_conv_func.py
    │   │   │   ├── deform_conv_module.py
    │   │   │   ├── deform_pool_func.py
    │   │   │   └── deform_pool_module.py
    │   │   ├── nms.py
    │   │   ├── roi_align.py
    │   │   └── roi_pool.py
    │   └── vision.cpp
    └── utils
    │   ├── logger.py
    │   ├── metric_logger.py
    │   ├── misc.py
    │   ├── pyt_utils.py
    │   ├── registry.py
    │   └── torch_utils.py
├── models
    └── zhanghongkai
    │   └── dynamic_rcnn
    │       └── coco
    │           ├── dynamic_rcnn_r101_dcnv2_fpn_mstrain_3x
    │               ├── config.py
    │               ├── dataset.py
    │               ├── network.py
    │               ├── test.py
    │               └── train.py
    │           ├── dynamic_rcnn_r101_fpn_1x
    │               ├── config.py
    │               ├── dataset.py
    │               ├── network.py
    │               ├── test.py
    │               └── train.py
    │           ├── dynamic_rcnn_r101_fpn_2x
    │               ├── config.py
    │               ├── dataset.py
    │               ├── network.py
    │               ├── test.py
    │               └── train.py
    │           ├── dynamic_rcnn_r101_fpn_mstrain_3x
    │               ├── config.py
    │               ├── dataset.py
    │               ├── network.py
    │               ├── test.py
    │               └── train.py
    │           ├── dynamic_rcnn_r50_fpn_1x
    │               ├── config.py
    │               ├── dataset.py
    │               ├── network.py
    │               ├── test.py
    │               └── train.py
    │           └── dynamic_rcnn_r50_fpn_2x
    │               ├── config.py
    │               ├── dataset.py
    │               ├── network.py
    │               ├── test.py
    │               └── train.py
└── setup.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | # compilation and distribution
 2 | __pycache__
 3 | _ext
 4 | *.pyc
 5 | *.so
 6 | *.egg-info
 7 | *.egg
 8 | *log
 9 | build/
10 | dist/
11 | 
12 | # pytorch/python/numpy formats
13 | *.pth
14 | *.pkl
15 | *.npy
16 | 
17 | # ipython/jupyter notebooks
18 | *.ipynb
19 | **/.ipynb_checkpoints/
20 | 
21 | # Editor temporaries
22 | *.swn
23 | *.swo
24 | *.swp
25 | *~
26 | 
27 | # Pycharm editor settings
28 | .idea
29 | 
30 | # vscode editor settings
31 | .vscode
32 | 
33 | # MacOS
34 | .DS_Store
35 | 
36 | # project dirs
37 | /data
38 | /output
39 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 Hongkai Zhang
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/dynamic_rcnn/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | from .coco import COCODataset
3 | from .voc import PascalVOCDataset
4 | from .concat_dataset import ConcatDataset
5 | 
6 | __all__ = ["COCODataset", "ConcatDataset", "PascalVOCDataset"]
7 | 


--------------------------------------------------------------------------------
/dynamic_rcnn/datasets/coco.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
  2 | import torch
  3 | import torchvision
  4 | 
  5 | from dynamic_rcnn.datasets.structures.bounding_box import BoxList
  6 | from dynamic_rcnn.datasets.structures.segmentation_mask import SegmentationMask
  7 | from dynamic_rcnn.datasets.structures.keypoint import PersonKeypoints
  8 | 
  9 | 
 10 | min_keypoints_per_image = 10
 11 | 
 12 | 
 13 | def _count_visible_keypoints(anno):
 14 |     return sum(sum(1 for v in ann["keypoints"][2::3] if v > 0) for ann in anno)
 15 | 
 16 | 
 17 | def _has_only_empty_bbox(anno):
 18 |     return all(any(o <= 1 for o in obj["bbox"][2:]) for obj in anno)
 19 | 
 20 | 
 21 | def has_valid_annotation(anno):
 22 |     # if it's empty, there is no annotation
 23 |     if len(anno) == 0:
 24 |         return False
 25 |     # if all boxes have close to zero area, there is no annotation
 26 |     if _has_only_empty_bbox(anno):
 27 |         return False
 28 |     # keypoints task have a slight different critera for considering
 29 |     # if an annotation is valid
 30 |     if "keypoints" not in anno[0]:
 31 |         return True
 32 |     # for keypoint detection tasks, only consider valid images those
 33 |     # containing at least min_keypoints_per_image
 34 |     if _count_visible_keypoints(anno) >= min_keypoints_per_image:
 35 |         return True
 36 |     return False
 37 | 
 38 | 
 39 | class COCODataset(torchvision.datasets.coco.CocoDetection):
 40 |     def __init__(
 41 |         self, ann_file, root, remove_images_without_annotations,
 42 |         transforms=None, return_raw=False):
 43 |         super(COCODataset, self).__init__(root, ann_file)
 44 |         # sort indices for reproducible results
 45 |         self.ids = sorted(self.ids)
 46 | 
 47 |         # filter images without detection annotations
 48 |         if remove_images_without_annotations:
 49 |             ids = []
 50 |             for img_id in self.ids:
 51 |                 ann_ids = self.coco.getAnnIds(imgIds=img_id, iscrowd=None)
 52 |                 anno = self.coco.loadAnns(ann_ids)
 53 |                 if has_valid_annotation(anno):
 54 |                     ids.append(img_id)
 55 |             self.ids = ids
 56 | 
 57 |         self.categories = {cat['id']: cat['name'] for cat in self.coco.cats.values()}
 58 | 
 59 |         self.json_category_id_to_contiguous_id = {
 60 |             v: i + 1 for i, v in enumerate(self.coco.getCatIds())
 61 |         }
 62 |         self.contiguous_category_id_to_json_id = {
 63 |             v: k for k, v in self.json_category_id_to_contiguous_id.items()
 64 |         }
 65 |         self.id_to_img_map = {k: v for k, v in enumerate(self.ids)}
 66 |         self._transforms = transforms
 67 |         self.return_raw = return_raw
 68 | 
 69 |     def __getitem__(self, idx):
 70 |         img, anno = super(COCODataset, self).__getitem__(idx)
 71 | 
 72 |         # filter crowd annotations
 73 |         # TODO might be better to add an extra field
 74 |         anno = [obj for obj in anno if obj["iscrowd"] == 0]
 75 | 
 76 |         boxes = [obj["bbox"] for obj in anno]
 77 |         boxes = torch.as_tensor(boxes).reshape(-1, 4)  # guard against no boxes
 78 |         target = BoxList(boxes, img.size, mode="xywh").convert("xyxy")
 79 | 
 80 |         classes = [obj["category_id"] for obj in anno]
 81 |         classes = [self.json_category_id_to_contiguous_id[c] for c in classes]
 82 |         classes = torch.tensor(classes)
 83 |         target.add_field("labels", classes)
 84 | 
 85 |         if anno and "segmentation" in anno[0]:
 86 |             masks = [obj["segmentation"] for obj in anno]
 87 |             masks = SegmentationMask(masks, img.size, mode='poly')
 88 |             target.add_field("masks", masks)
 89 | 
 90 |         if anno and "keypoints" in anno[0]:
 91 |             keypoints = [obj["keypoints"] for obj in anno]
 92 |             keypoints = PersonKeypoints(keypoints, img.size)
 93 |             target.add_field("keypoints", keypoints)
 94 | 
 95 |         target = target.clip_to_image(remove_empty=True)
 96 | 
 97 |         if self._transforms is not None:
 98 |             trans_img, trans_target = self._transforms(img, target)
 99 |             if self.return_raw:
100 |                 return img, target, trans_img, trans_target, idx
101 |             else:
102 |                 return trans_img, trans_target, idx
103 | 
104 |         return img, target, idx
105 | 
106 |     def get_img_info(self, index):
107 |         img_id = self.id_to_img_map[index]
108 |         img_data = self.coco.imgs[img_id]
109 |         return img_data
110 | 
111 |     class_names = [
112 |         'background', 'person', 'bicycle', 'car', 'motorcycle',
113 |         'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
114 |         'fire hydrant', 'stop sign', 'parking meter', 'bench',
115 |         'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant',
116 |         'bear', 'zebra', 'giraffe', 'backpack', 'umbrella',
117 |         'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard',
118 |         'sports ball', 'kite', 'baseball bat', 'baseball glove',
119 |         'skateboard', 'surfboard', 'tennis racket', 'bottle',
120 |         'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
121 |         'banana', 'apple', 'sandwich', 'orange', 'broccoli',
122 |         'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair',
123 |         'couch', 'potted plant', 'bed', 'dining table', 'toilet',
124 |         'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
125 |         'microwave', 'oven', 'toaster', 'sink', 'refrigerator',
126 |         'book', 'clock', 'vase', 'scissors', 'teddy bear',
127 |         'hair drier', 'toothbrush']
128 | 


--------------------------------------------------------------------------------
/dynamic_rcnn/datasets/collate_batch.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | from dynamic_rcnn.datasets.structures.image_list import to_image_list
 3 | 
 4 | 
 5 | class BatchCollator(object):
 6 |     """
 7 |     From a list of samples from the dataset,
 8 |     returns the batched images and targets.
 9 |     This should be passed to the DataLoader
10 |     """
11 | 
12 |     def __init__(self, size_divisible=0, return_raw=False):
13 |         self.size_divisible = size_divisible
14 |         self.return_raw = return_raw
15 | 
16 |     def __call__(self, batch):
17 |         transposed_batch = list(zip(*batch))
18 |         if self.return_raw:
19 |             ori_images = transposed_batch[0]
20 |             ori_targets = transposed_batch[1]
21 |             images = to_image_list(
22 |                 transposed_batch[2], self.size_divisible)
23 |             targets = transposed_batch[3]
24 |             img_ids = transposed_batch[4]
25 |             return ori_images, ori_targets, images, targets, img_ids
26 |         else:
27 |             images = to_image_list(transposed_batch[0], self.size_divisible)
28 |             targets = transposed_batch[1]
29 |             img_ids = transposed_batch[2]
30 |             return images, targets, img_ids
31 | 
32 | 
33 | class BBoxAugCollator(object):
34 |     """
35 |     From a list of samples from the dataset,
36 |     returns the images and targets.
37 |     Images should be converted to batched images in `im_detect_bbox_aug`
38 |     """
39 | 
40 |     def __call__(self, batch):
41 |         return list(zip(*batch))
42 | 
43 | 


--------------------------------------------------------------------------------
/dynamic_rcnn/datasets/concat_dataset.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import bisect
 3 | 
 4 | from torch.utils.data.dataset import ConcatDataset as _ConcatDataset
 5 | 
 6 | 
 7 | class ConcatDataset(_ConcatDataset):
 8 |     """
 9 |     Same as torch.utils.data.dataset.ConcatDataset, but exposes an extra
10 |     method for querying the sizes of the image
11 |     """
12 | 
13 |     def get_idxs(self, idx):
14 |         dataset_idx = bisect.bisect_right(self.cumulative_sizes, idx)
15 |         if dataset_idx == 0:
16 |             sample_idx = idx
17 |         else:
18 |             sample_idx = idx - self.cumulative_sizes[dataset_idx - 1]
19 |         return dataset_idx, sample_idx
20 | 
21 |     def get_img_info(self, idx):
22 |         dataset_idx, sample_idx = self.get_idxs(idx)
23 |         return self.datasets[dataset_idx].get_img_info(sample_idx)
24 | 


--------------------------------------------------------------------------------
/dynamic_rcnn/datasets/evaluation/__init__.py:
--------------------------------------------------------------------------------
 1 | from dynamic_rcnn import datasets
 2 | 
 3 | from .coco import coco_evaluation
 4 | from .voc import voc_evaluation
 5 | 
 6 | 
 7 | def evaluate(dataset, predictions, output_folder, logger, **kwargs):
 8 |     """evaluate dataset using different methods based on dataset type.
 9 |     Args:
10 |         dataset: Dataset object
11 |         predictions(list[BoxList]): each item in the list represents the
12 |             prediction results for one image.
13 |         output_folder: output folder, to save evaluation files or results.
14 |         logger: logger.
15 |         **kwargs: other args.
16 |     Returns:
17 |         evaluation result
18 |     """
19 |     args = dict(dataset=dataset, predictions=predictions,
20 |                 output_folder=output_folder, logger=logger, **kwargs)
21 |     if isinstance(dataset, datasets.COCODataset):
22 |         return coco_evaluation(**args)
23 |     elif isinstance(dataset, datasets.PascalVOCDataset):
24 |         return voc_evaluation(**args)
25 |     else:
26 |         dataset_name = dataset.__class__.__name__
27 |         raise NotImplementedError("Unsupported dataset type {}.".format(dataset_name))
28 | 


--------------------------------------------------------------------------------
/dynamic_rcnn/datasets/evaluation/coco/__init__.py:
--------------------------------------------------------------------------------
 1 | from .coco_eval import do_coco_evaluation
 2 | 
 3 | 
 4 | def coco_evaluation(
 5 |     dataset,
 6 |     predictions,
 7 |     output_folder,
 8 |     logger,
 9 |     box_only,
10 |     iou_types,
11 |     expected_results,
12 |     expected_results_sigma_tol,
13 | ):
14 |     return do_coco_evaluation(
15 |         dataset=dataset,
16 |         predictions=predictions,
17 |         box_only=box_only,
18 |         output_folder=output_folder,
19 |         logger=logger,
20 |         iou_types=iou_types,
21 |         expected_results=expected_results,
22 |         expected_results_sigma_tol=expected_results_sigma_tol,
23 |     )
24 | 


--------------------------------------------------------------------------------
/dynamic_rcnn/datasets/evaluation/voc/__init__.py:
--------------------------------------------------------------------------------
 1 | from .voc_eval import do_voc_evaluation
 2 | 
 3 | 
 4 | def voc_evaluation(dataset, predictions, output_folder, box_only, logger, **_):
 5 |     if box_only:
 6 |         logger.warning("voc evaluation doesn't support box_only, ignored.")
 7 |     logger.info("performing voc evaluation, ignored iou_types.")
 8 |     return do_voc_evaluation(
 9 |         dataset=dataset,
10 |         predictions=predictions,
11 |         output_folder=output_folder,
12 |         logger=logger,
13 |     )
14 | 


--------------------------------------------------------------------------------
/dynamic_rcnn/datasets/samplers/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | from .distributed import DistributedSampler
3 | from .grouped_batch_sampler import GroupedBatchSampler
4 | from .iteration_based_batch_sampler import IterationBasedBatchSampler
5 | 
6 | __all__ = ["DistributedSampler", "GroupedBatchSampler", "IterationBasedBatchSampler"]
7 | 


--------------------------------------------------------------------------------
/dynamic_rcnn/datasets/samplers/distributed.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | # Code is copy-pasted exactly as in torch.utils.data.distributed.
 3 | # FIXME remove this once c10d fixes the bug it has
 4 | import math
 5 | import torch
 6 | import torch.distributed as dist
 7 | from torch.utils.data.sampler import Sampler
 8 | 
 9 | 
10 | class DistributedSampler(Sampler):
11 |     """Sampler that restricts data loading to a subset of the dataset.
12 |     It is especially useful in conjunction with
13 |     :class:`torch.nn.parallel.DistributedDataParallel`. In such case, each
14 |     process can pass a DistributedSampler instance as a DataLoader sampler,
15 |     and load a subset of the original dataset that is exclusive to it.
16 |     .. note::
17 |         Dataset is assumed to be of constant size.
18 |     Arguments:
19 |         dataset: Dataset used for sampling.
20 |         num_replicas (optional): Number of processes participating in
21 |             distributed training.
22 |         rank (optional): Rank of the current process within num_replicas.
23 |     """
24 | 
25 |     def __init__(self, dataset, num_replicas=None, rank=None, shuffle=True):
26 |         if num_replicas is None:
27 |             if not dist.is_available():
28 |                 raise RuntimeError("Requires distributed package to be available")
29 |             num_replicas = dist.get_world_size()
30 |         if rank is None:
31 |             if not dist.is_available():
32 |                 raise RuntimeError("Requires distributed package to be available")
33 |             rank = dist.get_rank()
34 |         self.dataset = dataset
35 |         self.num_replicas = num_replicas
36 |         self.rank = rank
37 |         self.epoch = 0
38 |         self.num_samples = int(math.ceil(len(self.dataset) * 1.0 / self.num_replicas))
39 |         self.total_size = self.num_samples * self.num_replicas
40 |         self.shuffle = shuffle
41 | 
42 |     def __iter__(self):
43 |         if self.shuffle:
44 |             # deterministically shuffle based on epoch
45 |             g = torch.Generator()
46 |             g.manual_seed(self.epoch)
47 |             indices = torch.randperm(len(self.dataset), generator=g).tolist()
48 |         else:
49 |             indices = torch.arange(len(self.dataset)).tolist()
50 | 
51 |         # add extra samples to make it evenly divisible
52 |         indices += indices[: (self.total_size - len(indices))]
53 |         assert len(indices) == self.total_size
54 | 
55 |         # subsample
56 |         offset = self.num_samples * self.rank
57 |         indices = indices[offset : offset + self.num_samples]
58 |         assert len(indices) == self.num_samples
59 | 
60 |         return iter(indices)
61 | 
62 |     def __len__(self):
63 |         return self.num_samples
64 | 
65 |     def set_epoch(self, epoch):
66 |         self.epoch = epoch
67 | 


--------------------------------------------------------------------------------
/dynamic_rcnn/datasets/samplers/grouped_batch_sampler.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
  2 | import itertools
  3 | import bisect
  4 | import copy
  5 | import torch
  6 | from torch.utils.data.sampler import BatchSampler
  7 | from torch.utils.data.sampler import Sampler
  8 | 
  9 | 
 10 | def _quantize(x, bins):
 11 |     bins = copy.copy(bins)
 12 |     bins = sorted(bins)
 13 |     quantized = list(map(lambda y: bisect.bisect_right(bins, y), x))
 14 |     return quantized
 15 | 
 16 | 
 17 | def _compute_aspect_ratios(dataset):
 18 |     aspect_ratios = []
 19 |     for i in range(len(dataset)):
 20 |         img_info = dataset.get_img_info(i)
 21 |         aspect_ratio = float(img_info["height"]) / float(img_info["width"])
 22 |         aspect_ratios.append(aspect_ratio)
 23 |     return aspect_ratios
 24 | 
 25 | 
 26 | class GroupedBatchSampler(BatchSampler):
 27 |     """
 28 |     Wraps another sampler to yield a mini-batch of indices.
 29 |     It enforces that elements from the same group should appear in groups of batch_size.
 30 |     It also tries to provide mini-batches which follows an ordering which is
 31 |     as close as possible to the ordering from the original sampler.
 32 | 
 33 |     Arguments:
 34 |         sampler (Sampler): Base sampler.
 35 |         batch_size (int): Size of mini-batch.
 36 |         drop_uneven (bool): If ``True``, the sampler will drop the batches whose
 37 |             size is less than ``batch_size``
 38 | 
 39 |     """
 40 | 
 41 |     def __init__(
 42 |         self, sampler, dataset, aspect_grouping, batch_size, drop_uneven=False):
 43 |         aspect_ratios = _compute_aspect_ratios(dataset)
 44 |         group_ids = _quantize(aspect_ratios, aspect_grouping)
 45 |         if not isinstance(sampler, Sampler):
 46 |             raise ValueError(
 47 |                 "sampler should be an instance of "
 48 |                 "torch.utils.data.Sampler, but got sampler={}".format(sampler)
 49 |             )
 50 |         self.sampler = sampler
 51 |         self.group_ids = torch.as_tensor(group_ids)
 52 |         assert self.group_ids.dim() == 1
 53 |         self.batch_size = batch_size
 54 |         self.drop_uneven = drop_uneven
 55 | 
 56 |         self.groups = torch.unique(self.group_ids).sort(0)[0]
 57 | 
 58 |         self._can_reuse_batches = False
 59 | 
 60 |     def _prepare_batches(self):
 61 |         dataset_size = len(self.group_ids)
 62 |         # get the sampled indices from the sampler
 63 |         sampled_ids = torch.as_tensor(list(self.sampler))
 64 |         # potentially not all elements of the dataset were sampled
 65 |         # by the sampler (e.g., DistributedSampler).
 66 |         # construct a tensor which contains -1 if the element was
 67 |         # not sampled, and a non-negative number indicating the
 68 |         # order where the element was sampled.
 69 |         # for example. if sampled_ids = [3, 1] and dataset_size = 5,
 70 |         # the order is [-1, 1, -1, 0, -1]
 71 |         order = torch.full((dataset_size,), -1, dtype=torch.int64)
 72 |         order[sampled_ids] = torch.arange(len(sampled_ids))
 73 | 
 74 |         # get a mask with the elements that were sampled
 75 |         mask = order >= 0
 76 | 
 77 |         # find the elements that belong to each individual cluster
 78 |         clusters = [(self.group_ids == i) & mask for i in self.groups]
 79 |         # get relative order of the elements inside each cluster
 80 |         # that follows the order from the sampler
 81 |         relative_order = [order[cluster] for cluster in clusters]
 82 |         # with the relative order, find the absolute order in the
 83 |         # sampled space
 84 |         permutation_ids = [s[s.sort()[1]] for s in relative_order]
 85 |         # permute each cluster so that they follow the order from
 86 |         # the sampler
 87 |         permuted_clusters = [sampled_ids[idx] for idx in permutation_ids]
 88 | 
 89 |         # splits each cluster in batch_size, and merge as a list of tensors
 90 |         splits = [c.split(self.batch_size) for c in permuted_clusters]
 91 |         merged = tuple(itertools.chain.from_iterable(splits))
 92 | 
 93 |         # now each batch internally has the right order, but
 94 |         # they are grouped by clusters. Find the permutation between
 95 |         # different batches that brings them as close as possible to
 96 |         # the order that we have in the sampler. For that, we will consider the
 97 |         # ordering as coming from the first element of each batch, and sort
 98 |         # correspondingly
 99 |         first_element_of_batch = [t[0].item() for t in merged]
100 |         # get and inverse mapping from sampled indices and the position where
101 |         # they occur (as returned by the sampler)
102 |         inv_sampled_ids_map = {v: k for k, v in enumerate(sampled_ids.tolist())}
103 |         # from the first element in each batch, get a relative ordering
104 |         first_index_of_batch = torch.as_tensor(
105 |             [inv_sampled_ids_map[s] for s in first_element_of_batch]
106 |         )
107 | 
108 |         # permute the batches so that they approximately follow the order
109 |         # from the sampler
110 |         permutation_order = first_index_of_batch.sort(0)[1].tolist()
111 |         # finally, permute the batches
112 |         batches = [merged[i].tolist() for i in permutation_order]
113 | 
114 |         if self.drop_uneven:
115 |             kept = []
116 |             for batch in batches:
117 |                 if len(batch) == self.batch_size:
118 |                     kept.append(batch)
119 |             batches = kept
120 |         return batches
121 | 
122 |     def __iter__(self):
123 |         if self._can_reuse_batches:
124 |             batches = self._batches
125 |             self._can_reuse_batches = False
126 |         else:
127 |             batches = self._prepare_batches()
128 |         self._batches = batches
129 |         return iter(batches)
130 | 
131 |     def __len__(self):
132 |         if not hasattr(self, "_batches"):
133 |             self._batches = self._prepare_batches()
134 |             self._can_reuse_batches = True
135 |         return len(self._batches)
136 | 


--------------------------------------------------------------------------------
/dynamic_rcnn/datasets/samplers/iteration_based_batch_sampler.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | from torch.utils.data.sampler import BatchSampler
 3 | 
 4 | 
 5 | class IterationBasedBatchSampler(BatchSampler):
 6 |     """
 7 |     Wraps a BatchSampler, resampling from it until
 8 |     a specified number of iterations have been sampled
 9 |     """
10 | 
11 |     def __init__(self, batch_sampler, num_iterations, start_iter=0):
12 |         self.batch_sampler = batch_sampler
13 |         self.num_iterations = num_iterations
14 |         self.start_iter = start_iter
15 | 
16 |     def __iter__(self):
17 |         iteration = self.start_iter
18 |         while iteration <= self.num_iterations:
19 |             # if the underlying sampler has a set_epoch method, like
20 |             # DistributedSampler, used for making each process see
21 |             # a different split of the dataset, then set it
22 |             if hasattr(self.batch_sampler.sampler, "set_epoch"):
23 |                 self.batch_sampler.sampler.set_epoch(iteration)
24 |             for batch in self.batch_sampler:
25 |                 iteration += 1
26 |                 if iteration > self.num_iterations:
27 |                     break
28 |                 yield batch
29 | 
30 |     def __len__(self):
31 |         return self.num_iterations
32 | 


--------------------------------------------------------------------------------
/dynamic_rcnn/datasets/structures/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hkzhang95/DynamicRCNN/fdfca3d4567270c606a52822b88b0ddd802802da/dynamic_rcnn/datasets/structures/__init__.py


--------------------------------------------------------------------------------
/dynamic_rcnn/datasets/structures/boxlist_ops.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
  2 | import torch
  3 | 
  4 | from .bounding_box import BoxList
  5 | 
  6 | from dynamic_rcnn.kernels.ops.nms import nms as _box_nms
  7 | 
  8 | 
  9 | def boxlist_nms(boxlist, nms_thresh, max_proposals=-1, score_field="scores"):
 10 |     """
 11 |     Performs non-maximum suppression on a boxlist, with scores specified
 12 |     in a boxlist field via score_field.
 13 | 
 14 |     Arguments:
 15 |         boxlist(BoxList)
 16 |         nms_thresh (float)
 17 |         max_proposals (int): if > 0, then only the top max_proposals are kept
 18 |             after non-maximum suppression
 19 |         score_field (str)
 20 |     """
 21 |     if nms_thresh <= 0:
 22 |         return boxlist
 23 |     mode = boxlist.mode
 24 |     boxlist = boxlist.convert("xyxy")
 25 |     boxes = boxlist.bbox
 26 |     score = boxlist.get_field(score_field)
 27 |     keep = _box_nms(boxes, score, nms_thresh)
 28 |     if max_proposals > 0:
 29 |         keep = keep[: max_proposals]
 30 |     boxlist = boxlist[keep]
 31 |     return boxlist.convert(mode)
 32 | 
 33 | 
 34 | def remove_small_boxes(boxlist, min_size):
 35 |     """
 36 |     Only keep boxes with both sides >= min_size
 37 | 
 38 |     Arguments:
 39 |         boxlist (Boxlist)
 40 |         min_size (int)
 41 |     """
 42 |     # TODO maybe add an API for querying the ws / hs
 43 |     xywh_boxes = boxlist.convert("xywh").bbox
 44 |     _, _, ws, hs = xywh_boxes.unbind(dim=1)
 45 |     keep = (
 46 |         (ws >= min_size) & (hs >= min_size)
 47 |     ).nonzero().squeeze(1)
 48 |     return boxlist[keep]
 49 | 
 50 | 
 51 | # implementation from https://github.com/kuangliu/torchcv/blob/master/torchcv/utils/box.py
 52 | # with slight modifications
 53 | def boxlist_iou(boxlist1, boxlist2):
 54 |     """Compute the intersection over union of two set of boxes.
 55 |     The box order must be (xmin, ymin, xmax, ymax).
 56 | 
 57 |     Arguments:
 58 |       box1: (BoxList) bounding boxes, sized [N,4].
 59 |       box2: (BoxList) bounding boxes, sized [M,4].
 60 | 
 61 |     Returns:
 62 |       (tensor) iou, sized [N,M].
 63 | 
 64 |     Reference:
 65 |       https://github.com/chainer/chainercv/blob/master/chainercv/utils/bbox/bbox_iou.py
 66 |     """
 67 |     if boxlist1.size != boxlist2.size:
 68 |         raise RuntimeError(
 69 |                 "boxlists should have same image size, got {}, {}".format(boxlist1, boxlist2))
 70 |     boxlist1 = boxlist1.convert("xyxy")
 71 |     boxlist2 = boxlist2.convert("xyxy")
 72 |     N = len(boxlist1)
 73 |     M = len(boxlist2)
 74 | 
 75 |     area1 = boxlist1.area()
 76 |     area2 = boxlist2.area()
 77 | 
 78 |     box1, box2 = boxlist1.bbox, boxlist2.bbox
 79 | 
 80 |     lt = torch.max(box1[:, None, :2], box2[:, :2])  # [N,M,2]
 81 |     rb = torch.min(box1[:, None, 2:], box2[:, 2:])  # [N,M,2]
 82 | 
 83 |     TO_REMOVE = 1
 84 | 
 85 |     wh = (rb - lt + TO_REMOVE).clamp(min=0)  # [N,M,2]
 86 |     inter = wh[:, :, 0] * wh[:, :, 1]  # [N,M]
 87 | 
 88 |     iou = inter / (area1[:, None] + area2 - inter)
 89 |     return iou
 90 | 
 91 | 
 92 | # TODO redundant, remove
 93 | def _cat(tensors, dim=0):
 94 |     """
 95 |     Efficient version of torch.cat that avoids a copy if there is only a single element in a list
 96 |     """
 97 |     assert isinstance(tensors, (list, tuple))
 98 |     if len(tensors) == 1:
 99 |         return tensors[0]
100 |     return torch.cat(tensors, dim)
101 | 
102 | 
103 | def cat_boxlist(bboxes):
104 |     """
105 |     Concatenates a list of BoxList (having the same image size) into a
106 |     single BoxList
107 | 
108 |     Arguments:
109 |         bboxes (list[BoxList])
110 |     """
111 |     assert isinstance(bboxes, (list, tuple))
112 |     assert all(isinstance(bbox, BoxList) for bbox in bboxes)
113 | 
114 |     size = bboxes[0].size
115 |     assert all(bbox.size == size for bbox in bboxes)
116 | 
117 |     mode = bboxes[0].mode
118 |     assert all(bbox.mode == mode for bbox in bboxes)
119 | 
120 |     fields = set(bboxes[0].fields())
121 |     assert all(set(bbox.fields()) == fields for bbox in bboxes)
122 | 
123 |     cat_boxes = BoxList(_cat([bbox.bbox for bbox in bboxes], dim=0), size, mode)
124 | 
125 |     for field in fields:
126 |         data = _cat([bbox.get_field(field) for bbox in bboxes], dim=0)
127 |         cat_boxes.add_field(field, data)
128 | 
129 |     return cat_boxes
130 | 


--------------------------------------------------------------------------------
/dynamic_rcnn/datasets/structures/image_list.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | from __future__ import division
 3 | 
 4 | import torch
 5 | 
 6 | 
 7 | class ImageList(object):
 8 |     """
 9 |     Structure that holds a list of images (of possibly
10 |     varying sizes) as a single tensor.
11 |     This works by padding the images to the same size,
12 |     and storing in a field the original sizes of each image
13 |     """
14 | 
15 |     def __init__(self, tensors, image_sizes):
16 |         """
17 |         Arguments:
18 |             tensors (tensor)
19 |             image_sizes (list[tuple[int, int]])
20 |         """
21 |         self.tensors = tensors
22 |         self.image_sizes = image_sizes
23 | 
24 |     def to(self, *args, **kwargs):
25 |         cast_tensor = self.tensors.to(*args, **kwargs)
26 |         return ImageList(cast_tensor, self.image_sizes)
27 | 
28 | 
29 | def to_image_list(tensors, size_divisible=0):
30 |     """
31 |     tensors can be an ImageList, a torch.Tensor or
32 |     an iterable of Tensors. It can't be a numpy array.
33 |     When tensors is an iterable of Tensors, it pads
34 |     the Tensors with zeros so that they have the same
35 |     shape
36 |     """
37 |     if isinstance(tensors, torch.Tensor) and size_divisible > 0:
38 |         tensors = [tensors]
39 | 
40 |     if isinstance(tensors, ImageList):
41 |         return tensors
42 |     elif isinstance(tensors, torch.Tensor):
43 |         # single tensor shape can be inferred
44 |         if tensors.dim() == 3:
45 |             tensors = tensors[None]
46 |         assert tensors.dim() == 4
47 |         image_sizes = [tensor.shape[-2:] for tensor in tensors]
48 |         return ImageList(tensors, image_sizes)
49 |     elif isinstance(tensors, (tuple, list)):
50 |         max_size = tuple(max(s) for s in zip(*[img.shape for img in tensors]))
51 | 
52 |         # TODO Ideally, just remove this and let me model handle arbitrary
53 |         # input sizs
54 |         if size_divisible > 0:
55 |             import math
56 | 
57 |             stride = size_divisible
58 |             max_size = list(max_size)
59 |             max_size[1] = int(math.ceil(max_size[1] / stride) * stride)
60 |             max_size[2] = int(math.ceil(max_size[2] / stride) * stride)
61 |             max_size = tuple(max_size)
62 | 
63 |         batch_shape = (len(tensors),) + max_size
64 |         batched_imgs = tensors[0].new(*batch_shape).zero_()
65 |         for img, pad_img in zip(tensors, batched_imgs):
66 |             pad_img[: img.shape[0], : img.shape[1], : img.shape[2]].copy_(img)
67 | 
68 |         image_sizes = [im.shape[-2:] for im in tensors]
69 | 
70 |         return ImageList(batched_imgs, image_sizes)
71 |     else:
72 |         raise TypeError("Unsupported type for to_image_list: {}".format(type(tensors)))
73 | 


--------------------------------------------------------------------------------
/dynamic_rcnn/datasets/transforms/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | from .transforms import Compose
3 | from .transforms import Resize
4 | from .transforms import RandomHorizontalFlip
5 | from .transforms import ToTensor
6 | from .transforms import Normalize
7 | 
8 | from .build import build_transforms
9 | 


--------------------------------------------------------------------------------
/dynamic_rcnn/datasets/transforms/build.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | from . import transforms as T
 3 | 
 4 | 
 5 | def build_transforms(cfg, is_train=True):
 6 |     if is_train:
 7 |         min_size = cfg.INPUT.MIN_SIZE_TRAIN
 8 |         max_size = cfg.INPUT.MAX_SIZE_TRAIN
 9 |         flip_horizontal_prob = cfg.INPUT.HORIZONTAL_FLIP_PROB_TRAIN
10 |         flip_vertical_prob = cfg.INPUT.VERTICAL_FLIP_PROB_TRAIN
11 |         brightness = cfg.INPUT.BRIGHTNESS
12 |         contrast = cfg.INPUT.CONTRAST
13 |         saturation = cfg.INPUT.SATURATION
14 |         hue = cfg.INPUT.HUE
15 |     else:
16 |         min_size = cfg.INPUT.MIN_SIZE_TEST
17 |         max_size = cfg.INPUT.MAX_SIZE_TEST
18 |         flip_horizontal_prob = 0.0
19 |         flip_vertical_prob = 0.0
20 |         brightness = 0.0
21 |         contrast = 0.0
22 |         saturation = 0.0
23 |         hue = 0.0
24 | 
25 |     to_bgr255 = cfg.INPUT.TO_BGR255
26 |     normalize_transform = T.Normalize(
27 |         mean=cfg.INPUT.PIXEL_MEAN, std=cfg.INPUT.PIXEL_STD, to_bgr255=to_bgr255
28 |     )
29 |     color_jitter = T.ColorJitter(
30 |         brightness=brightness,
31 |         contrast=contrast,
32 |         saturation=saturation,
33 |         hue=hue,
34 |     )
35 | 
36 |     transform = T.Compose(
37 |         [
38 |             color_jitter,
39 |             T.Resize(min_size, max_size),
40 |             T.RandomHorizontalFlip(flip_horizontal_prob),
41 |             T.RandomVerticalFlip(flip_vertical_prob),
42 |             T.ToTensor(),
43 |             normalize_transform,
44 |         ]
45 |     )
46 |     return transform
47 | 


--------------------------------------------------------------------------------
/dynamic_rcnn/datasets/transforms/coco_transforms.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | from . import transforms as T
 3 | 
 4 | 
 5 | def build_coco_transforms(cfg, is_train=True):
 6 |     if is_train:
 7 |         min_size = cfg.INPUT.MIN_SIZE_TRAIN
 8 |         max_size = cfg.INPUT.MAX_SIZE_TRAIN
 9 |         flip_prob = 0.5
10 |     else:
11 |         min_size = cfg.INPUT.MIN_SIZE_TEST
12 |         max_size = cfg.INPUT.MAX_SIZE_TEST
13 |         flip_prob = 0
14 | 
15 |     to_bgr255 = cfg.INPUT.TO_BGR255
16 |     normalize_transform = T.Normalize(
17 |         mean=cfg.INPUT.PIXEL_MEAN, std=cfg.INPUT.PIXEL_STD,
18 |         to_bgr255=to_bgr255)
19 | 
20 |     transform = T.Compose([
21 |         T.Resize(min_size, max_size),
22 |         T.RandomHorizontalFlip(flip_prob),
23 |         T.ToTensor(),
24 |         normalize_transform, ])
25 |     return transform
26 | 


--------------------------------------------------------------------------------
/dynamic_rcnn/datasets/transforms/transforms.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
  2 | import random
  3 | 
  4 | import torch
  5 | import torchvision
  6 | from torchvision.transforms import functional as F
  7 | 
  8 | 
  9 | class Compose(object):
 10 |     def __init__(self, transforms):
 11 |         self.transforms = transforms
 12 | 
 13 |     def __call__(self, image, target):
 14 |         for t in self.transforms:
 15 |             image, target = t(image, target)
 16 |         return image, target
 17 | 
 18 |     def __repr__(self):
 19 |         format_string = self.__class__.__name__ + "("
 20 |         for t in self.transforms:
 21 |             format_string += "\n"
 22 |             format_string += "    {0}".format(t)
 23 |         format_string += "\n)"
 24 |         return format_string
 25 | 
 26 | 
 27 | class Resize(object):
 28 |     def __init__(self, min_size, max_size):
 29 |         if not isinstance(min_size, (list, tuple)):
 30 |             min_size = (min_size,)
 31 |         self.min_size = min_size
 32 |         self.max_size = max_size
 33 | 
 34 |     # modified from torchvision to add support for max size
 35 |     def get_size(self, image_size):
 36 |         w, h = image_size
 37 |         size = random.choice(self.min_size)
 38 |         max_size = self.max_size
 39 |         if max_size is not None:
 40 |             min_original_size = float(min((w, h)))
 41 |             max_original_size = float(max((w, h)))
 42 |             if max_original_size / min_original_size * size > max_size:
 43 |                 size = int(round(max_size * min_original_size / max_original_size))
 44 | 
 45 |         if (w <= h and w == size) or (h <= w and h == size):
 46 |             return (h, w)
 47 | 
 48 |         if w < h:
 49 |             ow = size
 50 |             oh = int(size * h / w)
 51 |         else:
 52 |             oh = size
 53 |             ow = int(size * w / h)
 54 | 
 55 |         return (oh, ow)
 56 | 
 57 |     def __call__(self, image, target=None):
 58 |         size = self.get_size(image.size)
 59 |         image = F.resize(image, size)
 60 |         if target is None:
 61 |             return image
 62 |         target = target.resize(image.size)
 63 |         return image, target
 64 | 
 65 | 
 66 | class RandomHorizontalFlip(object):
 67 |     def __init__(self, prob=0.5):
 68 |         self.prob = prob
 69 | 
 70 |     def __call__(self, image, target):
 71 |         if random.random() < self.prob:
 72 |             image = F.hflip(image)
 73 |             target = target.transpose(0)
 74 |         return image, target
 75 | 
 76 | class RandomVerticalFlip(object):
 77 |     def __init__(self, prob=0.5):
 78 |         self.prob = prob
 79 | 
 80 |     def __call__(self, image, target):
 81 |         if random.random() < self.prob:
 82 |             image = F.vflip(image)
 83 |             target = target.transpose(1)
 84 |         return image, target
 85 | 
 86 | class ColorJitter(object):
 87 |     def __init__(self,
 88 |                  brightness=None,
 89 |                  contrast=None,
 90 |                  saturation=None,
 91 |                  hue=None,
 92 |                  ):
 93 |         self.color_jitter = torchvision.transforms.ColorJitter(
 94 |             brightness=brightness,
 95 |             contrast=contrast,
 96 |             saturation=saturation,
 97 |             hue=hue,)
 98 | 
 99 |     def __call__(self, image, target):
100 |         image = self.color_jitter(image)
101 |         return image, target
102 | 
103 | 
104 | class ToTensor(object):
105 |     def __call__(self, image, target):
106 |         return F.to_tensor(image), target
107 | 
108 | 
109 | class Normalize(object):
110 |     def __init__(self, mean, std, to_bgr255=True):
111 |         self.mean = mean
112 |         self.std = std
113 |         self.to_bgr255 = to_bgr255
114 | 
115 |     def __call__(self, image, target=None):
116 |         if self.to_bgr255:
117 |             image = image[[2, 1, 0]] * 255
118 |         image = F.normalize(image, mean=self.mean, std=self.std)
119 |         if target is None:
120 |             return image
121 |         return image, target
122 | 


--------------------------------------------------------------------------------
/dynamic_rcnn/datasets/voc.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | 
  3 | import torch
  4 | import torch.utils.data
  5 | from PIL import Image
  6 | import sys
  7 | 
  8 | if sys.version_info[0] == 2:
  9 |     import xml.etree.cElementTree as ET
 10 | else:
 11 |     import xml.etree.ElementTree as ET
 12 | 
 13 | 
 14 | from dynamic_rcnn.datasets.structures.bounding_box import BoxList
 15 | 
 16 | 
 17 | class PascalVOCDataset(torch.utils.data.Dataset):
 18 | 
 19 |     CLASSES = (
 20 |         "__background__ ",
 21 |         "aeroplane",
 22 |         "bicycle",
 23 |         "bird",
 24 |         "boat",
 25 |         "bottle",
 26 |         "bus",
 27 |         "car",
 28 |         "cat",
 29 |         "chair",
 30 |         "cow",
 31 |         "diningtable",
 32 |         "dog",
 33 |         "horse",
 34 |         "motorbike",
 35 |         "person",
 36 |         "pottedplant",
 37 |         "sheep",
 38 |         "sofa",
 39 |         "train",
 40 |         "tvmonitor",
 41 |     )
 42 | 
 43 |     def __init__(self, data_dir, split, use_difficult=False, transforms=None):
 44 |         self.root = data_dir
 45 |         self.image_set = split
 46 |         self.keep_difficult = use_difficult
 47 |         self.transforms = transforms
 48 | 
 49 |         self._annopath = os.path.join(self.root, "Annotations", "%s.xml")
 50 |         self._imgpath = os.path.join(self.root, "JPEGImages", "%s.jpg")
 51 |         self._imgsetpath = os.path.join(self.root, "ImageSets", "Main", "%s.txt")
 52 | 
 53 |         with open(self._imgsetpath % self.image_set) as f:
 54 |             self.ids = f.readlines()
 55 |         self.ids = [x.strip("\n") for x in self.ids]
 56 |         self.id_to_img_map = {k: v for k, v in enumerate(self.ids)}
 57 | 
 58 |         cls = PascalVOCDataset.CLASSES
 59 |         self.class_to_ind = dict(zip(cls, range(len(cls))))
 60 |         self.categories = dict(zip(range(len(cls)), cls))
 61 | 
 62 |     def __getitem__(self, index):
 63 |         img_id = self.ids[index]
 64 |         img = Image.open(self._imgpath % img_id).convert("RGB")
 65 | 
 66 |         target = self.get_groundtruth(index)
 67 |         target = target.clip_to_image(remove_empty=True)
 68 | 
 69 |         if self.transforms is not None:
 70 |             img, target = self.transforms(img, target)
 71 | 
 72 |         return img, target, index
 73 | 
 74 |     def __len__(self):
 75 |         return len(self.ids)
 76 | 
 77 |     def get_groundtruth(self, index):
 78 |         img_id = self.ids[index]
 79 |         anno = ET.parse(self._annopath % img_id).getroot()
 80 |         anno = self._preprocess_annotation(anno)
 81 | 
 82 |         height, width = anno["im_info"]
 83 |         target = BoxList(anno["boxes"], (width, height), mode="xyxy")
 84 |         target.add_field("labels", anno["labels"])
 85 |         target.add_field("difficult", anno["difficult"])
 86 |         return target
 87 | 
 88 |     def _preprocess_annotation(self, target):
 89 |         boxes = []
 90 |         gt_classes = []
 91 |         difficult_boxes = []
 92 |         TO_REMOVE = 1
 93 | 
 94 |         for obj in target.iter("object"):
 95 |             difficult = int(obj.find("difficult").text) == 1
 96 |             if not self.keep_difficult and difficult:
 97 |                 continue
 98 |             name = obj.find("name").text.lower().strip()
 99 |             bb = obj.find("bndbox")
100 |             # Make pixel indexes 0-based
101 |             # Refer to "https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/datasets/pascal_voc.py#L208-L211"
102 |             box = [
103 |                 bb.find("xmin").text,
104 |                 bb.find("ymin").text,
105 |                 bb.find("xmax").text,
106 |                 bb.find("ymax").text,
107 |             ]
108 |             bndbox = tuple(
109 |                 map(lambda x: x - TO_REMOVE, list(map(int, box)))
110 |             )
111 | 
112 |             boxes.append(bndbox)
113 |             gt_classes.append(self.class_to_ind[name])
114 |             difficult_boxes.append(difficult)
115 | 
116 |         size = target.find("size")
117 |         im_info = tuple(map(int, (size.find("height").text, size.find("width").text)))
118 | 
119 |         res = {
120 |             "boxes": torch.tensor(boxes, dtype=torch.float32),
121 |             "labels": torch.tensor(gt_classes),
122 |             "difficult": torch.tensor(difficult_boxes),
123 |             "im_info": im_info,
124 |         }
125 |         return res
126 | 
127 |     def get_img_info(self, index):
128 |         img_id = self.ids[index]
129 |         anno = ET.parse(self._annopath % img_id).getroot()
130 |         size = anno.find("size")
131 |         im_info = tuple(map(int, (size.find("height").text, size.find("width").text)))
132 |         return {"height": im_info[0], "width": im_info[1]}
133 | 
134 |     def map_class_id_to_class_name(self, class_id):
135 |         return PascalVOCDataset.CLASSES[class_id]
136 | 


--------------------------------------------------------------------------------
/dynamic_rcnn/det_opr/box_coder.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import math
 3 | 
 4 | import torch
 5 | 
 6 | 
 7 | class BoxCoder(object):
 8 |     """
 9 |     This class encodes and decodes a set of bounding boxes into
10 |     the representation used for training the regressors.
11 |     """
12 | 
13 |     def __init__(self, weights, bbox_xform_clip=math.log(1000. / 16)):
14 |         """
15 |         Arguments:
16 |             weights (4-element tuple)
17 |             bbox_xform_clip (float)
18 |         """
19 |         self.weights = weights
20 |         self.bbox_xform_clip = bbox_xform_clip
21 | 
22 |     def encode(self, reference_boxes, proposals):
23 |         """
24 |         Encode a set of proposals with respect to some
25 |         reference boxes
26 | 
27 |         Arguments:
28 |             reference_boxes (Tensor): reference boxes
29 |             proposals (Tensor): boxes to be encoded
30 |         """
31 | 
32 |         TO_REMOVE = 1  # TODO remove
33 |         ex_widths = proposals[:, 2] - proposals[:, 0] + TO_REMOVE
34 |         ex_heights = proposals[:, 3] - proposals[:, 1] + TO_REMOVE
35 |         ex_ctr_x = proposals[:, 0] + 0.5 * ex_widths
36 |         ex_ctr_y = proposals[:, 1] + 0.5 * ex_heights
37 | 
38 |         gt_widths = reference_boxes[:, 2] - reference_boxes[:, 0] + TO_REMOVE
39 |         gt_heights = reference_boxes[:, 3] - reference_boxes[:, 1] + TO_REMOVE
40 |         gt_ctr_x = reference_boxes[:, 0] + 0.5 * gt_widths
41 |         gt_ctr_y = reference_boxes[:, 1] + 0.5 * gt_heights
42 | 
43 |         wx, wy, ww, wh = self.weights
44 |         targets_dx = wx * (gt_ctr_x - ex_ctr_x) / ex_widths
45 |         targets_dy = wy * (gt_ctr_y - ex_ctr_y) / ex_heights
46 |         targets_dw = ww * torch.log(gt_widths / ex_widths)
47 |         targets_dh = wh * torch.log(gt_heights / ex_heights)
48 | 
49 |         targets = torch.stack((targets_dx, targets_dy, targets_dw, targets_dh), dim=1)
50 |         return targets
51 | 
52 |     def decode(self, rel_codes, boxes):
53 |         """
54 |         From a set of original boxes and encoded relative box offsets,
55 |         get the decoded boxes.
56 | 
57 |         Arguments:
58 |             rel_codes (Tensor): encoded boxes
59 |             boxes (Tensor): reference boxes.
60 |         """
61 | 
62 |         boxes = boxes.to(rel_codes.dtype)
63 | 
64 |         TO_REMOVE = 1  # TODO remove
65 |         widths = boxes[:, 2] - boxes[:, 0] + TO_REMOVE
66 |         heights = boxes[:, 3] - boxes[:, 1] + TO_REMOVE
67 |         ctr_x = boxes[:, 0] + 0.5 * widths
68 |         ctr_y = boxes[:, 1] + 0.5 * heights
69 | 
70 |         wx, wy, ww, wh = self.weights
71 |         dx = rel_codes[:, 0::4] / wx
72 |         dy = rel_codes[:, 1::4] / wy
73 |         dw = rel_codes[:, 2::4] / ww
74 |         dh = rel_codes[:, 3::4] / wh
75 | 
76 |         # Prevent sending too large values into torch.exp()
77 |         dw = torch.clamp(dw, max=self.bbox_xform_clip)
78 |         dh = torch.clamp(dh, max=self.bbox_xform_clip)
79 | 
80 |         pred_ctr_x = dx * widths[:, None] + ctr_x[:, None]
81 |         pred_ctr_y = dy * heights[:, None] + ctr_y[:, None]
82 |         pred_w = torch.exp(dw) * widths[:, None]
83 |         pred_h = torch.exp(dh) * heights[:, None]
84 | 
85 |         pred_boxes = torch.zeros_like(rel_codes)
86 |         # x1
87 |         pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * pred_w
88 |         # y1
89 |         pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * pred_h
90 |         # x2 (note: "- 1" is correct; don't be fooled by the asymmetry)
91 |         pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * pred_w - 1
92 |         # y2 (note: "- 1" is correct; don't be fooled by the asymmetry)
93 |         pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * pred_h - 1
94 | 
95 |         return pred_boxes
96 | 


--------------------------------------------------------------------------------
/dynamic_rcnn/det_opr/fpn/fpn.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
  2 | import torch.nn.functional as F
  3 | from torch import nn
  4 | 
  5 | from collections import OrderedDict
  6 | from dynamic_rcnn.utils.misc import conv_with_kaiming_uniform
  7 | 
  8 | 
  9 | class FPN(nn.Module):
 10 |     """
 11 |     Module that adds FPN on top of a list of feature maps.
 12 |     The feature maps are currently supposed to be in increasing depth
 13 |     order, and must be consecutive
 14 |     """
 15 | 
 16 |     def __init__(
 17 |             self, in_channels_list, out_channels, conv_block, top_blocks=None
 18 |     ):
 19 |         """
 20 |         Arguments:
 21 |             in_channels_list (list[int]): number of channels for each feature map that
 22 |                 will be fed
 23 |             out_channels (int): number of channels of the FPN representation
 24 |             top_blocks (nn.Module or None): if provided, an extra operation will
 25 |                 be performed on the output of the last (smallest resolution)
 26 |                 FPN output, and the result will extend the result list
 27 |         """
 28 |         super(FPN, self).__init__()
 29 |         self.inner_blocks = []
 30 |         self.layer_blocks = []
 31 |         for idx, in_channels in enumerate(in_channels_list, 1):
 32 |             inner_block = "fpn_inner{}".format(idx)
 33 |             layer_block = "fpn_layer{}".format(idx)
 34 | 
 35 |             if in_channels == 0:
 36 |                 continue
 37 |             inner_block_module = conv_block(in_channels, out_channels, 1)
 38 |             layer_block_module = conv_block(out_channels, out_channels, 3, 1)
 39 |             self.add_module(inner_block, inner_block_module)
 40 |             self.add_module(layer_block, layer_block_module)
 41 |             self.inner_blocks.append(inner_block)
 42 |             self.layer_blocks.append(layer_block)
 43 |         self.top_blocks = top_blocks
 44 | 
 45 |     def forward(self, x):
 46 |         """
 47 |         Arguments:
 48 |             x (list[Tensor]): feature maps for each feature level.
 49 |         Returns:
 50 |             results (tuple[Tensor]): feature maps after FPN layers.
 51 |                 They are ordered from highest resolution first.
 52 |         """
 53 |         last_inner = getattr(self, self.inner_blocks[-1])(x[-1])
 54 |         results = []
 55 |         results.append(getattr(self, self.layer_blocks[-1])(last_inner))
 56 |         for feature, inner_block, layer_block in zip(
 57 |                 x[:-1][::-1], self.inner_blocks[:-1][::-1],
 58 |                 self.layer_blocks[:-1][::-1]
 59 |         ):
 60 |             if not inner_block:
 61 |                 continue
 62 |             inner_top_down = F.interpolate(last_inner, scale_factor=2,
 63 |                                            mode="nearest")
 64 |             inner_lateral = getattr(self, inner_block)(feature)
 65 |             # TODO use size instead of scale to make it robust to different sizes
 66 |             # inner_top_down = F.upsample(last_inner, size=inner_lateral.shape[-2:],
 67 |             # mode='bilinear', align_corners=False)
 68 |             last_inner = inner_lateral + inner_top_down
 69 |             results.insert(0, getattr(self, layer_block)(last_inner))
 70 | 
 71 |         if isinstance(self.top_blocks, LastLevelP6P7):
 72 |             last_results = self.top_blocks(x[-1], results[-1])
 73 |             results.extend(last_results)
 74 |         elif isinstance(self.top_blocks, LastLevelMaxPool):
 75 |             last_results = self.top_blocks(results[-1])
 76 |             results.extend(last_results)
 77 | 
 78 |         return tuple(results)
 79 | 
 80 | 
 81 | class LastLevelMaxPool(nn.Module):
 82 |     def forward(self, x):
 83 |         return [F.max_pool2d(x, 1, 2, 0)]
 84 | 
 85 | 
 86 | class LastLevelP6P7(nn.Module):
 87 |     """
 88 |     This module is used in RetinaNet to generate extra layers, P6 and P7.
 89 |     """
 90 | 
 91 |     def __init__(self, in_channels, out_channels):
 92 |         super(LastLevelP6P7, self).__init__()
 93 |         self.p6 = nn.Conv2d(in_channels, out_channels, 3, 2, 1)
 94 |         self.p7 = nn.Conv2d(out_channels, out_channels, 3, 2, 1)
 95 |         for module in [self.p6, self.p7]:
 96 |             nn.init.kaiming_uniform_(module.weight, a=1)
 97 |             nn.init.constant_(module.bias, 0)
 98 |         self.use_P5 = in_channels == out_channels
 99 | 
100 |     def forward(self, c5, p5):
101 |         x = p5 if self.use_P5 else c5
102 |         p6 = self.p6(x)
103 |         p7 = self.p7(F.relu(p6))
104 |         return [p6, p7]
105 | 
106 | 
107 | def build_resnet_fpn_backbone(body, cfg):
108 |     in_channels_stage2 = cfg.MODEL.RESNETS.RES2_OUT_CHANNELS
109 |     out_channels = cfg.MODEL.RESNETS.BACKBONE_OUT_CHANNELS
110 |     fpn = FPN(
111 |         in_channels_list=[
112 |             in_channels_stage2,
113 |             in_channels_stage2 * 2,
114 |             in_channels_stage2 * 4,
115 |             in_channels_stage2 * 8,
116 |         ],
117 |         out_channels=out_channels,
118 |         conv_block=conv_with_kaiming_uniform(use_relu=cfg.MODEL.FPN.USE_RELU),
119 |         top_blocks=LastLevelMaxPool(),
120 |     )
121 |     model = nn.Sequential(OrderedDict([("body", body), ("fpn", fpn)]))
122 |     model.out_channels = out_channels
123 |     return model
124 | 
125 | 
126 | def build_resnet_fpn_p3p7_backbone(body, cfg):
127 |     in_channels_stage2 = cfg.MODEL.RESNETS.RES2_OUT_CHANNELS
128 |     out_channels = cfg.MODEL.RESNETS.BACKBONE_OUT_CHANNELS
129 |     in_channels_p6p7 = in_channels_stage2 * 8 if cfg.MODEL.RETINANET.USE_C5 \
130 |         else out_channels
131 |     fpn = FPN(
132 |         in_channels_list=[
133 |             0,
134 |             in_channels_stage2 * 2,
135 |             in_channels_stage2 * 4,
136 |             in_channels_stage2 * 8,
137 |         ],
138 |         out_channels=out_channels,
139 |         conv_block=conv_with_kaiming_uniform(use_relu=cfg.MODEL.FPN.USE_RELU),
140 |         top_blocks=LastLevelP6P7(in_channels_p6p7, out_channels),
141 |     )
142 |     model = nn.Sequential(OrderedDict([("body", body), ("fpn", fpn)]))
143 |     model.out_channels = out_channels
144 |     return model
145 | 


--------------------------------------------------------------------------------
/dynamic_rcnn/det_opr/loss.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
  2 | import torch
  3 | from torch import nn
  4 | from torch.autograd import Function
  5 | from torch.autograd.function import once_differentiable
  6 | 
  7 | from dynamic_rcnn import _C
  8 | 
  9 | 
 10 | def smooth_l1_loss(input, target, beta=1. / 9, size_average=True):
 11 |     """
 12 |     very similar to the smooth_l1_loss from pytorch, but with
 13 |     the extra beta parameter
 14 |     """
 15 |     n = torch.abs(input - target)
 16 |     cond = n < beta
 17 |     loss = torch.where(cond, 0.5 * n ** 2 / beta, n - 0.5 * beta)
 18 |     if size_average:
 19 |         return loss.mean()
 20 |     return loss.sum()
 21 | 
 22 | 
 23 | # TODO: Use JIT to replace CUDA implementation in the future.
 24 | class _SigmoidFocalLoss(Function):
 25 |     @staticmethod
 26 |     def forward(ctx, logits, targets, gamma, alpha):
 27 |         ctx.save_for_backward(logits, targets)
 28 |         num_classes = logits.shape[1]
 29 |         ctx.num_classes = num_classes
 30 |         ctx.gamma = gamma
 31 |         ctx.alpha = alpha
 32 | 
 33 |         losses = _C.sigmoid_focalloss_forward(
 34 |             logits, targets, num_classes, gamma, alpha
 35 |         )
 36 |         return losses
 37 | 
 38 |     @staticmethod
 39 |     @once_differentiable
 40 |     def backward(ctx, d_loss):
 41 |         logits, targets = ctx.saved_tensors
 42 |         num_classes = ctx.num_classes
 43 |         gamma = ctx.gamma
 44 |         alpha = ctx.alpha
 45 |         d_loss = d_loss.contiguous()
 46 |         d_logits = _C.sigmoid_focalloss_backward(
 47 |             logits, targets, d_loss, num_classes, gamma, alpha
 48 |         )
 49 |         return d_logits, None, None, None, None
 50 | 
 51 | 
 52 | sigmoid_focal_loss_cuda = _SigmoidFocalLoss.apply
 53 | 
 54 | 
 55 | def sigmoid_focal_loss_cpu(logits, targets, gamma, alpha):
 56 |     num_classes = logits.shape[1]
 57 |     gamma = gamma[0]
 58 |     alpha = alpha[0]
 59 |     dtype = targets.dtype
 60 |     device = targets.device
 61 |     class_range = torch.arange(1, num_classes+1, dtype=dtype, device=device).unsqueeze(0)
 62 | 
 63 |     t = targets.unsqueeze(1)
 64 |     p = torch.sigmoid(logits)
 65 |     term1 = (1 - p) ** gamma * torch.log(p)
 66 |     term2 = p ** gamma * torch.log(1 - p)
 67 |     return -(t == class_range).float() * term1 * alpha - ((t != class_range) * (t >= 0)).float() * term2 * (1 - alpha)
 68 | 
 69 | 
 70 | class SigmoidFocalLoss(nn.Module):
 71 |     def __init__(self, gamma, alpha):
 72 |         super(SigmoidFocalLoss, self).__init__()
 73 |         self.gamma = gamma
 74 |         self.alpha = alpha
 75 | 
 76 |     def forward(self, logits, targets):
 77 |         device = logits.device
 78 |         if logits.is_cuda:
 79 |             loss_func = sigmoid_focal_loss_cuda
 80 |         else:
 81 |             loss_func = sigmoid_focal_loss_cpu
 82 | 
 83 |         loss = loss_func(logits, targets, self.gamma, self.alpha)
 84 |         return loss.sum()
 85 | 
 86 |     def __repr__(self):
 87 |         tmpstr = self.__class__.__name__ + "("
 88 |         tmpstr += "gamma=" + str(self.gamma)
 89 |         tmpstr += ", alpha=" + str(self.alpha)
 90 |         tmpstr += ")"
 91 |         return tmpstr
 92 | 
 93 | 
 94 | class IOULoss(nn.Module):
 95 |     def forward(self, pred, target, weight=None, loc_loss_type='log_iou'):
 96 |         pred_left = pred[:, 0]
 97 |         pred_top = pred[:, 1]
 98 |         pred_right = pred[:, 2]
 99 |         pred_bottom = pred[:, 3]
100 | 
101 |         target_left = target[:, 0]
102 |         target_top = target[:, 1]
103 |         target_right = target[:, 2]
104 |         target_bottom = target[:, 3]
105 | 
106 |         target_area = (target_left + target_right) * \
107 |                       (target_top + target_bottom)
108 |         pred_area = (pred_left + pred_right) * \
109 |                     (pred_top + pred_bottom)
110 | 
111 |         w_intersect = torch.min(pred_left, target_left) + \
112 |                       torch.min(pred_right, target_right)
113 |         h_intersect = torch.min(pred_bottom, target_bottom) + \
114 |                       torch.min(pred_top, target_top)
115 | 
116 |         area_intersect = w_intersect * h_intersect
117 |         area_union = target_area + pred_area - area_intersect
118 | 
119 |         ious = (area_intersect + 1.0) / (area_union + 1.0)
120 |         if loc_loss_type == 'log_iou':
121 |             losses = -torch.log(ious)
122 |         elif loc_loss_type == 'linear_iou':
123 |             losses = 1 - ious
124 |         else:
125 |             raise NotImplementedError
126 | 
127 |         if weight is not None and weight.sum() > 0:
128 |             return (losses * weight).sum() / weight.sum()
129 |         else:
130 |             assert losses.numel() != 0
131 |             return losses.mean()
132 | 


--------------------------------------------------------------------------------
/dynamic_rcnn/det_opr/matcher.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
  2 | import torch
  3 | 
  4 | 
  5 | class Matcher(object):
  6 |     """
  7 |     This class assigns to each predicted "element" (e.g., a box) a ground-truth
  8 |     element. Each predicted element will have exactly zero or one matches; each
  9 |     ground-truth element may be assigned to zero or more predicted elements.
 10 | 
 11 |     Matching is based on the MxN match_quality_matrix, that characterizes how well
 12 |     each (ground-truth, predicted)-pair match. For example, if the elements are
 13 |     boxes, the matrix may contain box IoU overlap values.
 14 | 
 15 |     The matcher returns a tensor of size N containing the index of the ground-truth
 16 |     element m that matches to prediction n. If there is no match, a negative value
 17 |     is returned.
 18 |     """
 19 | 
 20 |     BELOW_LOW_THRESHOLD = -1
 21 |     BETWEEN_THRESHOLDS = -2
 22 | 
 23 |     def __init__(self, high_threshold, low_threshold, allow_low_quality_matches=False):
 24 |         """
 25 |         Args:
 26 |             high_threshold (float): quality values greater than or equal to
 27 |                 this value are candidate matches.
 28 |             low_threshold (float): a lower quality threshold used to stratify
 29 |                 matches into three levels:
 30 |                 1) matches >= high_threshold
 31 |                 2) BETWEEN_THRESHOLDS matches in [low_threshold, high_threshold)
 32 |                 3) BELOW_LOW_THRESHOLD matches in [0, low_threshold)
 33 |             allow_low_quality_matches (bool): if True, produce additional matches
 34 |                 for predictions that have only low-quality match candidates. See
 35 |                 set_low_quality_matches_ for more details.
 36 |         """
 37 |         assert low_threshold <= high_threshold
 38 |         self.high_threshold = high_threshold
 39 |         self.low_threshold = low_threshold
 40 |         self.allow_low_quality_matches = allow_low_quality_matches
 41 | 
 42 |     def __call__(self, match_quality_matrix):
 43 |         """
 44 |         Args:
 45 |             match_quality_matrix (Tensor[float]): an MxN tensor, containing the
 46 |             pairwise quality between M ground-truth elements and N predicted elements.
 47 | 
 48 |         Returns:
 49 |             matches (Tensor[int64]): an N tensor where N[i] is a matched gt in
 50 |             [0, M - 1] or a negative value indicating that prediction i could not
 51 |             be matched.
 52 |         """
 53 |         if match_quality_matrix.numel() == 0:
 54 |             # empty targets or proposals not supported during training
 55 |             if match_quality_matrix.shape[0] == 0:
 56 |                 raise ValueError(
 57 |                     "No ground-truth boxes available for one of the images "
 58 |                     "during training")
 59 |             else:
 60 |                 raise ValueError(
 61 |                     "No proposal boxes available for one of the images "
 62 |                     "during training")
 63 | 
 64 |         # match_quality_matrix is M (gt) x N (predicted)
 65 |         # Max over gt elements (dim 0) to find best gt candidate for each prediction
 66 |         matched_vals, matches = match_quality_matrix.max(dim=0)
 67 |         if self.allow_low_quality_matches:
 68 |             all_matches = matches.clone()
 69 | 
 70 |         # Assign candidate matches with low quality to negative (unassigned) values
 71 |         below_low_threshold = matched_vals < self.low_threshold
 72 |         between_thresholds = (matched_vals >= self.low_threshold) & (
 73 |             matched_vals < self.high_threshold
 74 |         )
 75 |         matches[below_low_threshold] = Matcher.BELOW_LOW_THRESHOLD
 76 |         matches[between_thresholds] = Matcher.BETWEEN_THRESHOLDS
 77 | 
 78 |         if self.allow_low_quality_matches:
 79 |             self.set_low_quality_matches_(matches, all_matches, match_quality_matrix)
 80 | 
 81 |         return matches
 82 | 
 83 |     def set_low_quality_matches_(self, matches, all_matches, match_quality_matrix):
 84 |         """
 85 |         Produce additional matches for predictions that have only low-quality matches.
 86 |         Specifically, for each ground-truth find the set of predictions that have
 87 |         maximum overlap with it (including ties); for each prediction in that set, if
 88 |         it is unmatched, then match it to the ground-truth with which it has the highest
 89 |         quality value.
 90 |         """
 91 |         # For each gt, find the prediction with which it has highest quality
 92 |         highest_quality_foreach_gt, _ = match_quality_matrix.max(dim=1)
 93 |         # Find highest quality match available, even if it is low, including ties
 94 |         gt_pred_pairs_of_highest_quality = torch.nonzero(
 95 |             match_quality_matrix == highest_quality_foreach_gt[:, None]
 96 |         )
 97 |         # Example gt_pred_pairs_of_highest_quality:
 98 |         #   tensor([[    0, 39796],
 99 |         #           [    1, 32055],
100 |         #           [    1, 32070],
101 |         #           [    2, 39190],
102 |         #           [    2, 40255],
103 |         #           [    3, 40390],
104 |         #           [    3, 41455],
105 |         #           [    4, 45470],
106 |         #           [    5, 45325],
107 |         #           [    5, 46390]])
108 |         # Each row is a (gt index, prediction index)
109 |         # Note how gt items 1, 2, 3, and 5 each have two ties
110 | 
111 |         pred_inds_to_update = gt_pred_pairs_of_highest_quality[:, 1]
112 |         matches[pred_inds_to_update] = all_matches[pred_inds_to_update]
113 | 


--------------------------------------------------------------------------------
/dynamic_rcnn/det_opr/poolers.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
  2 | import torch
  3 | import torch.nn.functional as F
  4 | from torch import nn
  5 | 
  6 | from dynamic_rcnn.kernels.ops.roi_align import ROIAlign
  7 | from dynamic_rcnn.utils.torch_utils import cat
  8 | 
  9 | 
 10 | class LevelMapper(object):
 11 |     """Determine which FPN level each RoI in a set of RoIs should map to based
 12 |     on the heuristic in the FPN paper.
 13 |     """
 14 | 
 15 |     def __init__(self, k_min, k_max, canonical_scale=224, canonical_level=4, eps=1e-6):
 16 |         """
 17 |         Arguments:
 18 |             k_min (int)
 19 |             k_max (int)
 20 |             canonical_scale (int)
 21 |             canonical_level (int)
 22 |             eps (float)
 23 |         """
 24 |         self.k_min = k_min
 25 |         self.k_max = k_max
 26 |         self.s0 = canonical_scale
 27 |         self.lvl0 = canonical_level
 28 |         self.eps = eps
 29 | 
 30 |     def __call__(self, boxlists):
 31 |         """
 32 |         Arguments:
 33 |             boxlists (list[BoxList])
 34 |         """
 35 |         # Compute level ids
 36 |         s = torch.sqrt(cat([boxlist.area() for boxlist in boxlists]))
 37 | 
 38 |         # Eqn.(1) in FPN paper
 39 |         target_lvls = torch.floor(self.lvl0 + torch.log2(s / self.s0 + self.eps))
 40 |         target_lvls = torch.clamp(target_lvls, min=self.k_min, max=self.k_max)
 41 |         return target_lvls.to(torch.int64) - self.k_min
 42 | 
 43 | 
 44 | class Pooler(nn.Module):
 45 |     """
 46 |     Pooler for Detection with or without FPN.
 47 |     It currently hard-code ROIAlign in the implementation,
 48 |     but that can be made more generic later on.
 49 |     Also, the requirement of passing the scales is not strictly necessary, as they
 50 |     can be inferred from the size of the feature map / size of original image,
 51 |     which is available thanks to the BoxList.
 52 |     """
 53 | 
 54 |     def __init__(self, output_size, scales, sampling_ratio):
 55 |         """
 56 |         Arguments:
 57 |             output_size (list[tuple[int]] or list[int]): output size for the pooled region
 58 |             scales (list[float]): scales for each Pooler
 59 |             sampling_ratio (int): sampling ratio for ROIAlign
 60 |         """
 61 |         super(Pooler, self).__init__()
 62 |         poolers = []
 63 |         for scale in scales:
 64 |             poolers.append(
 65 |                 ROIAlign(
 66 |                     output_size, spatial_scale=scale, sampling_ratio=sampling_ratio
 67 |                 )
 68 |             )
 69 |         self.poolers = nn.ModuleList(poolers)
 70 |         self.output_size = output_size
 71 |         # get the levels in the feature map by leveraging the fact that the network always
 72 |         # downsamples by a factor of 2 at each level.
 73 |         lvl_min = -torch.log2(torch.tensor(scales[0], dtype=torch.float32)).item()
 74 |         lvl_max = -torch.log2(torch.tensor(scales[-1], dtype=torch.float32)).item()
 75 |         self.map_levels = LevelMapper(lvl_min, lvl_max)
 76 | 
 77 |     def convert_to_roi_format(self, boxes):
 78 |         concat_boxes = cat([b.bbox for b in boxes], dim=0)
 79 |         device, dtype = concat_boxes.device, concat_boxes.dtype
 80 |         ids = cat(
 81 |             [
 82 |                 torch.full((len(b), 1), i, dtype=dtype, device=device)
 83 |                 for i, b in enumerate(boxes)
 84 |             ],
 85 |             dim=0,
 86 |         )
 87 |         rois = torch.cat([ids, concat_boxes], dim=1)
 88 |         return rois
 89 | 
 90 |     def forward(self, x, boxes):
 91 |         """
 92 |         Arguments:
 93 |             x (list[Tensor]): feature maps for each level
 94 |             boxes (list[BoxList]): boxes to be used to perform the pooling operation.
 95 |         Returns:
 96 |             result (Tensor)
 97 |         """
 98 |         num_levels = len(self.poolers)
 99 |         rois = self.convert_to_roi_format(boxes)
100 |         if num_levels == 1:
101 |             return self.poolers[0](x[0], rois)
102 | 
103 |         levels = self.map_levels(boxes)
104 | 
105 |         num_rois = len(rois)
106 |         num_channels = x[0].shape[1]
107 |         output_size = self.output_size[0]
108 | 
109 |         dtype, device = x[0].dtype, x[0].device
110 |         result = torch.zeros(
111 |             (num_rois, num_channels, output_size, output_size),
112 |             dtype=dtype,
113 |             device=device,
114 |         )
115 |         for level, (per_level_feature, pooler) in enumerate(zip(x, self.poolers)):
116 |             idx_in_level = torch.nonzero(levels == level).squeeze(1)
117 |             rois_per_level = rois[idx_in_level]
118 |             result[idx_in_level] = pooler(per_level_feature, rois_per_level).to(dtype)
119 | 
120 |         return result
121 | 
122 | 
123 | def make_pooler(cfg, head_name):
124 |     resolution = cfg.MODEL[head_name].POOLER_RESOLUTION
125 |     scales = cfg.MODEL[head_name].POOLER_SCALES
126 |     sampling_ratio = cfg.MODEL[head_name].POOLER_SAMPLING_RATIO
127 |     pooler = Pooler(
128 |         output_size=(resolution, resolution),
129 |         scales=scales,
130 |         sampling_ratio=sampling_ratio,
131 |     )
132 |     return pooler
133 | 


--------------------------------------------------------------------------------
/dynamic_rcnn/det_opr/rcnn/cascade_rcnn/proposal_opr.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from dynamic_rcnn.datasets.structures.bounding_box import BoxList
 3 | from dynamic_rcnn.datasets.structures.boxlist_ops import cat_boxlist
 4 | 
 5 | 
 6 | # TODO: this should be implemented in RPN, but now a little different
 7 | def add_gt_proposals(proposals, targets):
 8 |     """
 9 |     Arguments:
10 |         proposals: list[BoxList]
11 |         targets: list[BoxList]
12 |     """
13 |     # Get the device we're operating on
14 |     device = proposals[0].bbox.device
15 | 
16 |     gt_boxes = [target.copy_with_fields([]) for target in targets]
17 | 
18 |     # later cat of bbox requires all fields to be present for all bbox
19 |     # so we need to add a dummy for objectness that's missing
20 |     # check whether the proposal has the "objectness" field first
21 |     if "objectness" in proposals[0].fields():
22 |         for gt_box in gt_boxes:
23 |             gt_box.add_field(
24 |                 "objectness", torch.ones(len(gt_box), device=device))
25 | 
26 |     proposals = [
27 |         cat_boxlist((proposal, gt_box))
28 |         for proposal, gt_box in zip(proposals, gt_boxes)
29 |     ]
30 | 
31 |     return proposals
32 | 
33 | 
34 | def add_box_regression(
35 |         boxes, box_regression, box_coder, cls_agnostic_bbox_reg=False):
36 |     if cls_agnostic_bbox_reg:
37 |         box_regression = box_regression[:, -4:]
38 | 
39 |     boxes_per_image = [len(box) for box in boxes]
40 |     concat_boxes = torch.cat([a.bbox for a in boxes], dim=0)
41 |     proposals = box_coder.decode(
42 |         box_regression.view(sum(boxes_per_image), -1), concat_boxes)
43 |     proposals = proposals.split(boxes_per_image, dim=0)
44 | 
45 |     result = []
46 |     for img_id, proposal in enumerate(proposals):
47 |         boxlist = BoxList(proposal, boxes[img_id].size, mode="xyxy")
48 |         boxlist = boxlist.clip_to_image(remove_empty=False)
49 |         result.append(boxlist)
50 |     return result
51 | 


--------------------------------------------------------------------------------
/dynamic_rcnn/det_opr/rcnn/mask_head/mask_target_opr.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from dynamic_rcnn.det_opr.matcher import Matcher
 3 | from dynamic_rcnn.datasets.structures.boxlist_ops import boxlist_iou
 4 | 
 5 | 
 6 | def project_masks_on_boxes(segmentation_masks, proposals, discretization_size):
 7 |     """
 8 |     Given segmentation masks and the bounding boxes corresponding
 9 |     to the location of the masks in the image, this function
10 |     crops and resizes the masks in the position defined by the
11 |     boxes. This prepares the masks for them to be fed to the
12 |     loss computation as the targets.
13 | 
14 |     Arguments:
15 |         segmentation_masks: an instance of SegmentationMask
16 |         proposals: an instance of BoxList
17 |     """
18 |     masks = []
19 |     M = discretization_size
20 |     device = proposals.bbox.device
21 |     proposals = proposals.convert("xyxy")
22 |     assert segmentation_masks.size == proposals.size, "{}, {}".format(
23 |         segmentation_masks, proposals
24 |     )
25 | 
26 |     # FIXME: CPU computation bottleneck, this should be parallelized
27 |     proposals = proposals.bbox.to(torch.device("cpu"))
28 |     for segmentation_mask, proposal in zip(segmentation_masks, proposals):
29 |         # crop the masks, resize them to the desired resolution and
30 |         # then convert them to the tensor representation.
31 |         cropped_mask = segmentation_mask.crop(proposal)
32 |         scaled_mask = cropped_mask.resize((M, M))
33 |         mask = scaled_mask.get_mask_tensor()
34 |         masks.append(mask)
35 |     if len(masks) == 0:
36 |         return torch.empty(0, dtype=torch.float32, device=device)
37 |     return torch.stack(masks, dim=0).to(device, dtype=torch.float32)
38 | 
39 | 
40 | def mask_target_opr(
41 |         proposals, targets, high_threshold, low_threshold, discretization_size):
42 |     """
43 |     Generate proposal targets for computing loss.
44 | 
45 |     Args:
46 |         proposals: (list[BoxList])
47 |         targets: (list[BoxList])
48 |         high_threshold: (float)
49 |         low_threshold: (float)
50 |         discretization_size: (int)
51 |     """
52 | 
53 |     matcher = Matcher(high_threshold, low_threshold,
54 |                       allow_low_quality_matches=False)
55 | 
56 |     # prepare targets
57 |     labels = []
58 |     masks = []
59 |     for proposals_per_image, targets_per_image in zip(proposals, targets):
60 |         # match targets to proposals
61 |         match_quality_matrix = boxlist_iou(
62 |             targets_per_image, proposals_per_image)
63 |         matched_idxs = matcher(match_quality_matrix)
64 |         # Mask RCNN needs "labels" and "masks "fields for creating the targets
65 |         target = targets_per_image.copy_with_fields(["labels", "masks"])
66 |         # get the targets corresponding GT for each proposal
67 |         # NB: need to clamp the indices because we can have a single
68 |         # GT in the image, and matched_idxs can be -2, which goes
69 |         # out of bounds
70 |         matched_targets = target[matched_idxs.clamp(min=0)]
71 |         matched_targets.add_field("matched_idxs", matched_idxs)
72 | 
73 |         matched_idxs = matched_targets.get_field("matched_idxs")
74 |         labels_per_image = matched_targets.get_field("labels")
75 |         labels_per_image = labels_per_image.to(dtype=torch.int64)
76 | 
77 |         # this can probably be removed, but is left here for clarity
78 |         # and completeness
79 |         neg_inds = matched_idxs == Matcher.BELOW_LOW_THRESHOLD
80 |         labels_per_image[neg_inds] = 0
81 | 
82 |         # mask scores are only computed on positive samples
83 |         positive_inds = torch.nonzero(labels_per_image > 0).squeeze(1)
84 | 
85 |         segmentation_masks = matched_targets.get_field("masks")
86 |         segmentation_masks = segmentation_masks[positive_inds]
87 | 
88 |         positive_proposals = proposals_per_image[positive_inds]
89 | 
90 |         masks_per_image = project_masks_on_boxes(
91 |             segmentation_masks, positive_proposals, discretization_size
92 |         )
93 | 
94 |         labels.append(labels_per_image)
95 |         masks.append(masks_per_image)
96 | 
97 |     return labels, masks
98 | 


--------------------------------------------------------------------------------
/dynamic_rcnn/det_opr/rcnn/post_processing.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn.functional as F
  3 | 
  4 | from dynamic_rcnn.datasets.structures.bounding_box import BoxList
  5 | from dynamic_rcnn.datasets.structures.boxlist_ops import boxlist_nms, cat_boxlist
  6 | 
  7 | 
  8 | def filter_results(
  9 |         boxlist, num_classes, score_thresh, nms_thresh, detections_per_img):
 10 |     # unwrap the boxlist to avoid additional overhead.
 11 |     # if we had multi-class NMS, we could perform this directly on the boxlist
 12 |     boxes = boxlist.bbox.reshape(-1, num_classes * 4)
 13 |     scores = boxlist.get_field("scores").reshape(-1, num_classes)
 14 | 
 15 |     device = scores.device
 16 |     result = []
 17 |     # Apply threshold on detection probabilities and apply NMS
 18 |     # Skip j = 0, because it's the background class
 19 |     inds_all = scores > score_thresh
 20 |     for j in range(1, num_classes):
 21 |         inds = inds_all[:, j].nonzero().squeeze(1)
 22 |         scores_j = scores[inds, j]
 23 |         boxes_j = boxes[inds, j * 4: (j + 1) * 4]
 24 |         boxlist_for_class = BoxList(boxes_j, boxlist.size, mode="xyxy")
 25 |         boxlist_for_class.add_field("scores", scores_j)
 26 |         boxlist_for_class = boxlist_nms(boxlist_for_class, nms_thresh)
 27 |         num_labels = len(boxlist_for_class)
 28 |         boxlist_for_class.add_field(
 29 |             "labels",
 30 |             torch.full((num_labels,), j, dtype=torch.int64, device=device)
 31 |         )
 32 |         result.append(boxlist_for_class)
 33 | 
 34 |     result = cat_boxlist(result)
 35 |     number_of_detections = len(result)
 36 | 
 37 |     # Limit to max_per_image detections **over all classes**
 38 |     if number_of_detections > detections_per_img > 0:
 39 |         cls_scores = result.get_field("scores")
 40 |         image_thresh, _ = torch.kthvalue(
 41 |             cls_scores.cpu(), number_of_detections - detections_per_img + 1
 42 |         )
 43 |         keep = cls_scores >= image_thresh.item()
 44 |         keep = torch.nonzero(keep).squeeze(1)
 45 |         result = result[keep]
 46 |     return result
 47 | 
 48 | 
 49 | # TODO: merge into test
 50 | def post_processing_opr(boxes, logits, offsets, box_coder, score_thresh=0.05,
 51 |                         nms_thresh=0.5, detections_per_img=100,
 52 |                         cls_agnostic_bbox_reg=False, bbox_aug_enabled=False):
 53 |     """
 54 |     Compute the post-processed boxes and obtain the final results.
 55 | 
 56 |     Args:
 57 |         boxes: (list[BoxList])
 58 |         logits: (tensor)
 59 |         offsets: (tensor)
 60 |         box_coder: (BoxCoder)
 61 |         score_thresh: (float)
 62 |         nms_thresh: (float)
 63 |         detections_per_img: (int)
 64 |         cls_agnostic_bbox_reg: (bool)
 65 | 
 66 |     Returns:
 67 |         results: (list[BoxList])
 68 |     """
 69 | 
 70 |     class_prob = F.softmax(logits, -1)
 71 |     num_classes = class_prob.shape[1]
 72 | 
 73 |     image_shapes = [box.size for box in boxes]
 74 |     boxes_per_image = [len(box) for box in boxes]
 75 |     concat_boxes = torch.cat([a.bbox for a in boxes], dim=0)
 76 | 
 77 |     if cls_agnostic_bbox_reg:
 78 |         offsets = offsets[:, -4:]
 79 |     proposals = box_coder.decode(
 80 |         offsets.view(sum(boxes_per_image), -1), concat_boxes)
 81 |     if cls_agnostic_bbox_reg:
 82 |         proposals = proposals.repeat(1, num_classes)
 83 | 
 84 |     proposals = proposals.split(boxes_per_image, dim=0)
 85 |     class_prob = class_prob.split(boxes_per_image, dim=0)
 86 | 
 87 |     results = []
 88 |     for prob, proposal, image_shape in zip(class_prob, proposals, image_shapes):
 89 |         # prepare boxlist
 90 |         proposal = proposal.reshape(-1, 4)
 91 |         prob = prob.reshape(-1)
 92 |         boxlist = BoxList(proposal, image_shape, mode="xyxy")
 93 |         boxlist.add_field("scores", prob)
 94 | 
 95 |         # clip tp image
 96 |         boxlist = boxlist.clip_to_image(remove_empty=False)
 97 | 
 98 |         # filter results
 99 |         if not bbox_aug_enabled:  # If bbox aug is enabled, we will do it later
100 |             boxlist = filter_results(boxlist, num_classes, score_thresh,
101 |                                      nms_thresh, detections_per_img)
102 |         results.append(boxlist)
103 |     return results
104 | 


--------------------------------------------------------------------------------
/dynamic_rcnn/det_opr/rcnn/proposal_target_opr.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from dynamic_rcnn.det_opr.matcher import Matcher
  3 | from dynamic_rcnn.det_opr.sampler import BalancedPositiveNegativeSampler
  4 | from dynamic_rcnn.datasets.structures.boxlist_ops import boxlist_iou
  5 | 
  6 | 
  7 | def proposal_target_opr(
  8 |         proposals, targets, box_coder, high_threshold, low_threshold,
  9 |         batch_size_per_image, positive_fraction, return_ious=False,
 10 |         return_sample_id=False, return_raw_proposals=False):
 11 |     """
 12 |     Generate proposal targets for computing loss.
 13 | 
 14 |     Args:
 15 |         proposals: (list[BoxList])
 16 |         targets: (list[BoxList])
 17 |         box_coder: (BoxCoder)
 18 |         high_threshold: (float)
 19 |         low_threshold: (float)
 20 |         batch_size_per_image: (int)
 21 |         positive_fraction: (float)
 22 |         return_ious: (bool)
 23 |     """
 24 | 
 25 |     matcher = Matcher(high_threshold, low_threshold,
 26 |                       allow_low_quality_matches=False)
 27 |     fg_bg_sampler = BalancedPositiveNegativeSampler(
 28 |         batch_size_per_image, positive_fraction)
 29 | 
 30 |     # prepare targets
 31 |     labels = []
 32 |     regression_targets = []
 33 |     ious = []
 34 |     for proposals_per_image, targets_per_image in zip(proposals, targets):
 35 |         # match targets to proposals
 36 |         match_quality_matrix = boxlist_iou(
 37 |             targets_per_image, proposals_per_image)
 38 |         matched_idxs = matcher(match_quality_matrix)
 39 |         # Fast RCNN only need "labels" field for selecting the targets
 40 |         target = targets_per_image.copy_with_fields("labels")
 41 |         # get the targets corresponding GT for each proposal
 42 |         # NB: need to clamp the indices because we can have a single
 43 |         # GT in the image, and matched_idxs can be -2, which goes
 44 |         # out of bounds
 45 |         matched_targets = target[matched_idxs.clamp(min=0)]
 46 |         matched_ious = match_quality_matrix.t()[
 47 |             range(proposals_per_image.bbox.shape[0]), matched_idxs.clamp(min=0)]
 48 |         matched_targets.add_field("matched_idxs", matched_idxs)
 49 | 
 50 |         matched_idxs = matched_targets.get_field("matched_idxs")
 51 |         labels_per_image = matched_targets.get_field("labels")
 52 |         labels_per_image = labels_per_image.to(dtype=torch.int64)
 53 | 
 54 |         # Label background (below the low threshold)
 55 |         bg_inds = matched_idxs == Matcher.BELOW_LOW_THRESHOLD
 56 |         labels_per_image[bg_inds] = 0
 57 | 
 58 |         # Label ignore proposals (between low and high thresholds)
 59 |         ignore_inds = matched_idxs == Matcher.BETWEEN_THRESHOLDS
 60 |         labels_per_image[ignore_inds] = -1  # -1 is ignored by sampler
 61 | 
 62 |         # compute regression targets
 63 |         regression_targets_per_image = box_coder.encode(
 64 |             matched_targets.bbox, proposals_per_image.bbox
 65 |         )
 66 | 
 67 |         labels.append(labels_per_image)
 68 |         regression_targets.append(regression_targets_per_image)
 69 |         ious.append(matched_ious)
 70 | 
 71 |     sampled_pos_inds, sampled_neg_inds = fg_bg_sampler(labels)
 72 |     proposals = list(proposals)
 73 |     # add corresponding label and regression_targets information to the bounding boxes
 74 |     for labels_per_image, regression_targets_per_image, ious_per_image, \
 75 |         proposals_per_image in zip(labels, regression_targets, ious, proposals):
 76 |         proposals_per_image.add_field("labels", labels_per_image)
 77 |         proposals_per_image.add_field(
 78 |             "regression_targets", regression_targets_per_image
 79 |         )
 80 |         if return_ious:
 81 |             proposals_per_image.add_field("ious", ious_per_image)
 82 | 
 83 |     if return_sample_id:
 84 |         sample_id = []
 85 |     if return_raw_proposals:
 86 |         raw_proposals = proposals.copy()
 87 |     # distributed sampled proposals, that were obtained on all feature maps
 88 |     # concatenated via the fg_bg_sampler, into individual feature map levels
 89 |     for img_idx, (pos_inds_img, neg_inds_img) in enumerate(
 90 |             zip(sampled_pos_inds, sampled_neg_inds)
 91 |     ):
 92 |         img_sampled_inds = torch.nonzero(pos_inds_img | neg_inds_img).squeeze(1)
 93 |         proposals_per_image = proposals[img_idx][img_sampled_inds]
 94 |         proposals[img_idx] = proposals_per_image
 95 | 
 96 |         if return_sample_id:
 97 |             sample_id.append(img_sampled_inds)
 98 | 
 99 |     if return_sample_id:
100 |         if return_raw_proposals:
101 |             return proposals, sample_id, raw_proposals
102 |         else:
103 |             return proposals, sample_id
104 |     else:
105 |         if return_raw_proposals:
106 |             return proposals, raw_proposals
107 |         else:
108 |             return proposals
109 | 


--------------------------------------------------------------------------------
/dynamic_rcnn/det_opr/rpn/anchor_target_opr.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import torch
 3 | 
 4 | from dynamic_rcnn.det_opr.matcher import Matcher
 5 | from dynamic_rcnn.det_opr.sampler import BalancedPositiveNegativeSampler
 6 | from dynamic_rcnn.datasets.structures.boxlist_ops import cat_boxlist, boxlist_iou
 7 | 
 8 | 
 9 | def anchor_target_opr(
10 |         anchors, targets, box_coder, high_threshold, low_threshold,
11 |         batch_size_per_image, positive_fraction):
12 |     """
13 |     Generate anchor targets for computing loss.
14 | 
15 |     Args:
16 |         anchors: (list[BoxList])
17 |         targets: (list[BoxList])
18 |         box_coder: (BoxCoder)
19 |         high_threshold: (float)
20 |         low_threshold: (float)
21 |         batch_size_per_image: (int)
22 |         positive_fraction: (float)
23 |     """
24 |     matcher = Matcher(
25 |         high_threshold, low_threshold, allow_low_quality_matches=True)
26 |     fg_bg_sampler = BalancedPositiveNegativeSampler(
27 |         batch_size_per_image, positive_fraction)
28 | 
29 |     anchors = [cat_boxlist(anchors_per_image) for anchors_per_image in anchors]
30 |     # prepare targets
31 |     labels = []
32 |     regression_targets = []
33 |     for anchors_per_image, targets_per_image in zip(anchors, targets):
34 |         # match targets to anchors
35 |         match_quality_matrix = boxlist_iou(targets_per_image, anchors_per_image)
36 |         matched_idxs = matcher(match_quality_matrix)
37 |         targets_per_image = targets_per_image.copy_with_fields([])
38 |         matched_targets = targets_per_image[matched_idxs.clamp(min=0)]
39 |         matched_targets.add_field("matched_idxs", matched_idxs)
40 | 
41 |         matched_idxs = matched_targets.get_field("matched_idxs")
42 |         # generate rpn labels
43 |         labels_per_image = matched_idxs >= 0
44 |         labels_per_image = labels_per_image.to(dtype=torch.float32)
45 | 
46 |         # Background (negative examples)
47 |         bg_indices = matched_idxs == Matcher.BELOW_LOW_THRESHOLD
48 |         labels_per_image[bg_indices] = 0
49 | 
50 |         # discard anchors that go out of the boundaries of the image
51 |         labels_per_image[~anchors_per_image.get_field("visibility")] = -1
52 | 
53 |         # discard indices that are between thresholds
54 |         inds_to_discard = matched_idxs == Matcher.BETWEEN_THRESHOLDS
55 |         labels_per_image[inds_to_discard] = -1
56 | 
57 |         # compute regression targets
58 |         regression_targets_per_image = box_coder.encode(
59 |             matched_targets.bbox, anchors_per_image.bbox
60 |         )
61 | 
62 |         labels.append(labels_per_image)
63 |         regression_targets.append(regression_targets_per_image)
64 | 
65 |     sampled_pos_inds, sampled_neg_inds = fg_bg_sampler(labels)
66 |     sampled_pos_inds = torch.nonzero(
67 |         torch.cat(sampled_pos_inds, dim=0)).squeeze(1)
68 |     sampled_neg_inds = torch.nonzero(
69 |         torch.cat(sampled_neg_inds, dim=0)).squeeze(1)
70 | 
71 |     sampled_inds = torch.cat([sampled_pos_inds, sampled_neg_inds], dim=0)
72 | 
73 |     labels = torch.cat(labels, dim=0)
74 |     regression_targets = torch.cat(regression_targets, dim=0)
75 | 
76 |     return labels, regression_targets, sampled_inds, sampled_pos_inds
77 | 


--------------------------------------------------------------------------------
/dynamic_rcnn/det_opr/rpn/fcos/fcos_target_opr.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
  2 | import torch
  3 | 
  4 | INF = 100000000
  5 | 
  6 | 
  7 | def get_sample_region(
  8 |         gt, strides, num_points_per_level, gt_xs, gt_ys, radius=1):
  9 |     gt = gt[None].expand(gt_xs.shape[0], gt.shape[0], 4)
 10 |     center_x = (gt[..., 0] + gt[..., 2]) / 2
 11 |     center_y = (gt[..., 1] + gt[..., 3]) / 2
 12 |     center_gt = gt.new_zeros(gt.shape)
 13 |     # no gt
 14 |     if center_x[..., 0].sum() == 0:
 15 |         return gt_xs.new_zeros(gt_xs.shape, dtype=torch.uint8)
 16 |     start = 0
 17 |     for level, num_points in enumerate(num_points_per_level):
 18 |         end = start + num_points
 19 |         stride = strides[level] * radius
 20 |         xmin = center_x[start:end] - stride
 21 |         ymin = center_y[start:end] - stride
 22 |         xmax = center_x[start:end] + stride
 23 |         ymax = center_y[start:end] + stride
 24 |         # limit sample region in gt
 25 |         center_gt[start:end, :, 0] = torch.where(
 26 |             xmin > gt[start:end, :, 0], xmin, gt[start:end, :, 0])
 27 |         center_gt[start:end, :, 1] = torch.where(
 28 |             ymin > gt[start:end, :, 1], ymin, gt[start:end, :, 1])
 29 |         center_gt[start:end, :, 2] = torch.where(
 30 |             xmax > gt[start:end, :, 2], gt[start:end, :, 2], xmax)
 31 |         center_gt[start:end, :, 3] = torch.where(
 32 |             ymax > gt[start:end, :, 3], gt[start:end, :, 3], ymax)
 33 |         start = end
 34 |     left = gt_xs[:, None] - center_gt[..., 0]
 35 |     right = center_gt[..., 2] - gt_xs[:, None]
 36 |     top = gt_ys[:, None] - center_gt[..., 1]
 37 |     bottom = center_gt[..., 3] - gt_ys[:, None]
 38 |     center_bbox = torch.stack((left, top, right, bottom), -1)
 39 |     inside_gt_bbox_mask = center_bbox.min(-1)[0] > 0
 40 |     return inside_gt_bbox_mask
 41 | 
 42 | 
 43 | def compute_targets_for_locations(
 44 |         locations, targets, object_sizes_of_interest, center_sample=None):
 45 |     labels = []
 46 |     reg_targets = []
 47 |     xs, ys = locations[:, 0], locations[:, 1]
 48 | 
 49 |     for im_i in range(len(targets)):
 50 |         targets_per_im = targets[im_i]
 51 |         assert targets_per_im.mode == "xyxy"
 52 |         bboxes = targets_per_im.bbox
 53 |         labels_per_im = targets_per_im.get_field("labels")
 54 |         area = targets_per_im.area()
 55 | 
 56 |         l = xs[:, None] - bboxes[:, 0][None]
 57 |         t = ys[:, None] - bboxes[:, 1][None]
 58 |         r = bboxes[:, 2][None] - xs[:, None]
 59 |         b = bboxes[:, 3][None] - ys[:, None]
 60 |         reg_targets_per_im = torch.stack([l, t, r, b], dim=2)
 61 | 
 62 |         if center_sample is not None:
 63 |             fpn_strides = center_sample['fpn_strides']
 64 |             pos_radius = center_sample['pos_radius']
 65 |             num_points_per_level = center_sample['num_points_per_level']
 66 |             is_in_boxes = get_sample_region(
 67 |                 bboxes, fpn_strides, num_points_per_level, xs, ys,
 68 |                 radius=pos_radius)
 69 |         else:
 70 |             is_in_boxes = reg_targets_per_im.min(dim=2)[0] > 0
 71 | 
 72 |         max_reg_targets_per_im = reg_targets_per_im.max(dim=2)[0]
 73 |         # limit the regression range for each location
 74 |         is_cared_in_the_level = \
 75 |             (max_reg_targets_per_im >= object_sizes_of_interest[:, [0]]) & \
 76 |             (max_reg_targets_per_im <= object_sizes_of_interest[:, [1]])
 77 | 
 78 |         locations_to_gt_area = area[None].repeat(len(locations), 1)
 79 |         locations_to_gt_area[is_in_boxes == 0] = INF
 80 |         locations_to_gt_area[is_cared_in_the_level == 0] = INF
 81 | 
 82 |         # if there are still more than one objects for a location,
 83 |         # we choose the one with minimal area
 84 |         locations_to_min_area, locations_to_gt_inds = \
 85 |             locations_to_gt_area.min(dim=1)
 86 | 
 87 |         reg_targets_per_im = reg_targets_per_im[
 88 |             range(len(locations)), locations_to_gt_inds]
 89 |         labels_per_im = labels_per_im[locations_to_gt_inds]
 90 |         labels_per_im[locations_to_min_area == INF] = 0
 91 | 
 92 |         labels.append(labels_per_im)
 93 |         reg_targets.append(reg_targets_per_im)
 94 | 
 95 |     return labels, reg_targets
 96 | 
 97 | 
 98 | def fcos_target_opr(locations, targets, center_sample=None):
 99 |     """
100 |     Generate targets for computing fcos loss.
101 | 
102 |     Args:
103 |         locations: (list[BoxList])
104 |         targets: (list[BoxList])
105 |         center_sample: (dict)
106 |     """
107 |     object_sizes_of_interest = [
108 |         [-1, 64],
109 |         [64, 128],
110 |         [128, 256],
111 |         [256, 512],
112 |         [512, INF],
113 |     ]
114 |     expanded_object_sizes_of_interest = []
115 |     for l, points_per_level in enumerate(locations):
116 |         object_sizes_of_interest_per_level = \
117 |             points_per_level.new_tensor(object_sizes_of_interest[l])
118 |         expanded_object_sizes_of_interest.append(
119 |             object_sizes_of_interest_per_level[None].expand(
120 |                 len(points_per_level), -1)
121 |         )
122 | 
123 |     expanded_object_sizes_of_interest = torch.cat(
124 |         expanded_object_sizes_of_interest, dim=0)
125 |     num_points_per_level = [
126 |         len(points_per_level) for points_per_level in locations]
127 |     points_all_level = torch.cat(locations, dim=0)
128 |     if center_sample is not None:
129 |         center_sample['num_points_per_level'] = num_points_per_level
130 |         labels, reg_targets = compute_targets_for_locations(
131 |             points_all_level, targets, expanded_object_sizes_of_interest,
132 |             center_sample=center_sample)
133 |     else:
134 |         labels, reg_targets = compute_targets_for_locations(
135 |             points_all_level, targets, expanded_object_sizes_of_interest)
136 | 
137 |     for i in range(len(labels)):
138 |         labels[i] = torch.split(labels[i], num_points_per_level, dim=0)
139 |         reg_targets[i] = torch.split(
140 |             reg_targets[i], num_points_per_level, dim=0)
141 | 
142 |     labels_level_first = []
143 |     reg_targets_level_first = []
144 |     for level in range(len(locations)):
145 |         labels_level_first.append(
146 |             torch.cat([labels_per_im[level] for labels_per_im in labels], dim=0)
147 |         )
148 |         reg_targets_level_first.append(
149 |             torch.cat([reg_targets_per_im[level] for reg_targets_per_im in
150 |                        reg_targets], dim=0)
151 |         )
152 |     return labels_level_first, reg_targets_level_first
153 | 
154 | 
155 | def compute_centerness_targets(reg_targets):
156 |     left_right = reg_targets[:, [0, 2]]
157 |     top_bottom = reg_targets[:, [1, 3]]
158 |     centerness = (left_right.min(dim=-1)[0] / left_right.max(dim=-1)[0]) * \
159 |                   (top_bottom.min(dim=-1)[0] / top_bottom.max(dim=-1)[0])
160 |     return torch.sqrt(centerness)
161 | 


--------------------------------------------------------------------------------
/dynamic_rcnn/det_opr/rpn/fcos/post_processing.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from dynamic_rcnn.datasets.structures.bounding_box import BoxList
  3 | from dynamic_rcnn.datasets.structures.boxlist_ops import cat_boxlist, boxlist_nms, \
  4 |     remove_small_boxes
  5 | 
  6 | 
  7 | def post_processing_opr(
  8 |         fcos_locations, cls_logits, bbox_preds, centernesses, image_sizes,
  9 |         pre_nms_top_n, pre_nms_thresh, nms_thresh, box_min_size,
 10 |         fpn_post_nms_top_n, num_classes):
 11 |     """
 12 |     Compute the post-processed boxes and obtain the final results.
 13 | 
 14 |     Args:
 15 |         fcos_locations: (list[BoxList])
 16 |         cls_logits: (list[tensor])
 17 |         bbox_preds: (list[tensor])
 18 |         centernesses: (list[tensor])
 19 |         image_sizes: (list[tuple[int, int]])
 20 |         pre_nms_top_n: (int)
 21 |         pre_nms_thresh: (float)
 22 |         nms_thresh: (float)
 23 |         box_min_size: (int)
 24 |         fpn_post_nms_top_n: (int)
 25 |         num_classes: (int)
 26 |     """
 27 |     sampled_boxes = []
 28 |     temp_pre_nms_top_n = pre_nms_top_n
 29 | 
 30 |     for locations, box_cls, box_regression, centerness in zip(
 31 |             fcos_locations, cls_logits, bbox_preds, centernesses):
 32 | 
 33 |         N, C, H, W = box_cls.shape
 34 | 
 35 |         # put in the same format as locations
 36 |         box_cls = box_cls.view(N, C, H, W).permute(0, 2, 3, 1)
 37 |         box_cls = box_cls.reshape(N, -1, C).sigmoid()
 38 |         box_regression = box_regression.view(N, 4, H, W).permute(0, 2, 3, 1)
 39 |         box_regression = box_regression.reshape(N, -1, 4)
 40 |         centerness = centerness.view(N, 1, H, W).permute(0, 2, 3, 1)
 41 |         centerness = centerness.reshape(N, -1).sigmoid()
 42 | 
 43 |         candidate_inds = box_cls > pre_nms_thresh
 44 |         pre_nms_top_n = candidate_inds.view(N, -1).sum(1)
 45 |         pre_nms_top_n = pre_nms_top_n.clamp(max=temp_pre_nms_top_n)
 46 | 
 47 |         # multiply the classification scores with centerness scores
 48 |         box_cls = box_cls * centerness[:, :, None]
 49 | 
 50 |         results = []
 51 |         for i in range(N):
 52 |             per_box_cls = box_cls[i]
 53 |             per_candidate_inds = candidate_inds[i]
 54 |             per_box_cls = per_box_cls[per_candidate_inds]
 55 | 
 56 |             per_candidate_nonzeros = per_candidate_inds.nonzero()
 57 |             per_box_loc = per_candidate_nonzeros[:, 0]
 58 |             per_class = per_candidate_nonzeros[:, 1] + 1
 59 | 
 60 |             per_box_regression = box_regression[i]
 61 |             per_box_regression = per_box_regression[per_box_loc]
 62 |             per_locations = locations[per_box_loc]
 63 | 
 64 |             per_pre_nms_top_n = pre_nms_top_n[i]
 65 | 
 66 |             if per_candidate_inds.sum().item() > per_pre_nms_top_n.item():
 67 |                 per_box_cls, top_k_indices = \
 68 |                     per_box_cls.topk(per_pre_nms_top_n, sorted=False)
 69 |                 per_class = per_class[top_k_indices]
 70 |                 per_box_regression = per_box_regression[top_k_indices]
 71 |                 per_locations = per_locations[top_k_indices]
 72 | 
 73 |             detections = torch.stack([
 74 |                 per_locations[:, 0] - per_box_regression[:, 0],
 75 |                 per_locations[:, 1] - per_box_regression[:, 1],
 76 |                 per_locations[:, 0] + per_box_regression[:, 2],
 77 |                 per_locations[:, 1] + per_box_regression[:, 3],
 78 |             ], dim=1)
 79 | 
 80 |             h, w = image_sizes[i]
 81 |             boxlist = BoxList(detections, (int(w), int(h)), mode="xyxy")
 82 |             boxlist.add_field("labels", per_class)
 83 |             boxlist.add_field("scores", per_box_cls)
 84 |             boxlist = boxlist.clip_to_image(remove_empty=False)
 85 |             boxlist = remove_small_boxes(boxlist, box_min_size)
 86 |             results.append(boxlist)
 87 |         sampled_boxes.append(results)
 88 | 
 89 |     boxlists = list(zip(*sampled_boxes))
 90 |     boxlists = [cat_boxlist(boxlist) for boxlist in boxlists]
 91 | 
 92 |     # select over all levels
 93 |     num_images = len(boxlists)
 94 |     results = []
 95 |     for i in range(num_images):
 96 |         scores = boxlists[i].get_field("scores")
 97 |         labels = boxlists[i].get_field("labels")
 98 |         boxes = boxlists[i].bbox
 99 |         boxlist = boxlists[i]
100 |         result = []
101 |         # skip the background
102 |         for j in range(1, num_classes):
103 |             inds = (labels == j).nonzero().view(-1)
104 | 
105 |             scores_j = scores[inds]
106 |             boxes_j = boxes[inds, :].view(-1, 4)
107 |             boxlist_for_class = BoxList(boxes_j, boxlist.size, mode="xyxy")
108 |             boxlist_for_class.add_field("scores", scores_j)
109 |             boxlist_for_class = boxlist_nms(
110 |                 boxlist_for_class, nms_thresh,
111 |                 score_field="scores"
112 |             )
113 |             num_labels = len(boxlist_for_class)
114 |             boxlist_for_class.add_field(
115 |                 "labels", torch.full((num_labels,), j,
116 |                                      dtype=torch.int64,
117 |                                      device=scores.device)
118 |             )
119 |             result.append(boxlist_for_class)
120 | 
121 |         result = cat_boxlist(result)
122 |         number_of_detections = len(result)
123 | 
124 |         # Limit to max_per_image detections **over all classes**
125 |         if number_of_detections > fpn_post_nms_top_n > 0:
126 |             cls_scores = result.get_field("scores")
127 |             image_thresh, _ = torch.kthvalue(
128 |                 cls_scores.cpu(),
129 |                 number_of_detections - fpn_post_nms_top_n + 1
130 |             )
131 |             keep = cls_scores >= image_thresh.item()
132 |             keep = torch.nonzero(keep).squeeze(1)
133 |             result = result[keep]
134 |         results.append(result)
135 |     return results
136 | 


--------------------------------------------------------------------------------
/dynamic_rcnn/det_opr/rpn/fcos/scale.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn
 3 | 
 4 | 
 5 | class Scale(nn.Module):
 6 |     def __init__(self, init_value=1.0):
 7 |         super(Scale, self).__init__()
 8 |         self.scale = nn.Parameter(torch.FloatTensor([init_value]))
 9 | 
10 |     def forward(self, input):
11 |         return input * self.scale
12 | 


--------------------------------------------------------------------------------
/dynamic_rcnn/det_opr/rpn/proposal_opr.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from dynamic_rcnn.utils.torch_utils import permute_and_flatten
  3 | from dynamic_rcnn.datasets.structures.bounding_box import BoxList
  4 | from dynamic_rcnn.datasets.structures.boxlist_ops import cat_boxlist, boxlist_nms, \
  5 |     remove_small_boxes
  6 | 
  7 | 
  8 | def proposal_opr(
  9 |         rpn_anchors, rpn_cls_logits, rpn_bbox_preds, box_coder, pre_nms_top_n,
 10 |         post_nms_top_n, nms_thresh, box_min_size, fpn_post_nms_top_n,
 11 |         fpn_post_nms_per_batch=True, is_train=False, targets=None,
 12 |         proposal_with_gt=True):
 13 |     """
 14 |     Generate proposals for RCNN.
 15 | 
 16 |     Args:
 17 |         rpn_anchors: (list[list[BoxList]])
 18 |         rpn_cls_logits: (list[tensor])
 19 |         rpn_bbox_preds: (list[tensor])
 20 |         box_coder: (BoxCoder)
 21 |         pre_nms_top_n: (int)
 22 |         post_nms_top_n: (int)
 23 |         nms_thresh: (float)
 24 |         box_min_size: (int)
 25 |         fpn_post_nms_top_n: (int)
 26 |         fpn_post_nms_per_batch: (bool)
 27 |         is_train: (bool)
 28 |         targets: (list[BoxList])
 29 |         proposal_with_gt: (bool)
 30 |     """
 31 | 
 32 |     sampled_boxes = []
 33 |     num_levels = len(rpn_cls_logits)
 34 |     rpn_anchors = list(zip(*rpn_anchors))
 35 |     for anchors, objectness, box_regression in zip(
 36 |             rpn_anchors, rpn_cls_logits, rpn_bbox_preds):
 37 |         device = objectness.device
 38 |         N, A, H, W = objectness.shape
 39 | 
 40 |         # put in the same format as anchors
 41 |         objectness = permute_and_flatten(objectness, N, A, 1, H, W).view(N, -1)
 42 |         objectness = objectness.sigmoid()
 43 | 
 44 |         box_regression = permute_and_flatten(box_regression, N, A, 4, H, W)
 45 | 
 46 |         num_anchors = A * H * W
 47 | 
 48 |         pre_nms_top_n = min(pre_nms_top_n, num_anchors)
 49 |         objectness, topk_idx = objectness.topk(
 50 |             pre_nms_top_n, dim=1, sorted=True)
 51 | 
 52 |         batch_idx = torch.arange(N, device=device)[:, None]
 53 |         box_regression = box_regression[batch_idx, topk_idx]
 54 | 
 55 |         image_shapes = [box.size for box in anchors]
 56 |         concat_anchors = torch.cat([a.bbox for a in anchors], dim=0)
 57 |         concat_anchors = concat_anchors.reshape(N, -1, 4)[batch_idx, topk_idx]
 58 | 
 59 |         proposals = box_coder.decode(
 60 |             box_regression.view(-1, 4), concat_anchors.view(-1, 4)
 61 |         )
 62 | 
 63 |         proposals = proposals.view(N, -1, 4)
 64 | 
 65 |         result = []
 66 |         for proposal, score, im_shape in zip(proposals, objectness,
 67 |                                              image_shapes):
 68 |             boxlist = BoxList(proposal, im_shape, mode="xyxy")
 69 |             boxlist.add_field("objectness", score)
 70 |             boxlist = boxlist.clip_to_image(remove_empty=False)
 71 |             boxlist = remove_small_boxes(boxlist, box_min_size)
 72 |             boxlist = boxlist_nms(
 73 |                 boxlist,
 74 |                 nms_thresh,
 75 |                 max_proposals=post_nms_top_n,
 76 |                 score_field="objectness",
 77 |             )
 78 |             result.append(boxlist)
 79 |         sampled_boxes.append(result)
 80 | 
 81 |     boxlists = list(zip(*sampled_boxes))
 82 |     boxlists = [cat_boxlist(boxlist) for boxlist in boxlists]
 83 | 
 84 |     # select over all levels
 85 |     if num_levels > 1:
 86 |         num_images = len(boxlists)
 87 |         if is_train and fpn_post_nms_per_batch:
 88 |             objectness = torch.cat(
 89 |                 [boxlist.get_field("objectness") for boxlist in boxlists], dim=0
 90 |             )
 91 |             box_sizes = [len(boxlist) for boxlist in boxlists]
 92 |             post_nms_top_n = min(fpn_post_nms_top_n, len(objectness))
 93 |             _, inds_sorted = torch.topk(objectness, post_nms_top_n, dim=0,
 94 |                                         sorted=True)
 95 |             inds_mask = torch.zeros_like(objectness, dtype=torch.uint8)
 96 |             inds_mask[inds_sorted] = 1
 97 |             inds_mask = inds_mask.split(box_sizes)
 98 |             for i in range(num_images):
 99 |                 boxlists[i] = boxlists[i][inds_mask[i]]
100 |         else:
101 |             for i in range(num_images):
102 |                 objectness = boxlists[i].get_field("objectness")
103 |                 post_nms_top_n = min(fpn_post_nms_top_n, len(objectness))
104 |                 _, inds_sorted = torch.topk(
105 |                     objectness, post_nms_top_n, dim=0, sorted=True
106 |                 )
107 |                 boxlists[i] = boxlists[i][inds_sorted]
108 | 
109 |     # append ground-truth bboxes to proposals
110 |     if is_train and targets is not None and proposal_with_gt:
111 |         # Get the device we're operating on
112 |         device = boxlists[0].bbox.device
113 | 
114 |         gt_boxes = [target.copy_with_fields([]) for target in targets]
115 | 
116 |         # later cat of bbox requires all fields to be present for all bbox
117 |         # so we need to add a dummy for objectness that's missing
118 |         for gt_box in gt_boxes:
119 |             gt_box.add_field("objectness",
120 |                              torch.ones(len(gt_box), device=device))
121 | 
122 |         boxlists = [
123 |             cat_boxlist((proposal, gt_box))
124 |             for proposal, gt_box in zip(boxlists, gt_boxes)
125 |         ]
126 | 
127 |     return boxlists
128 | 


--------------------------------------------------------------------------------
/dynamic_rcnn/det_opr/rpn/retinanet/anchor_target_opr.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import torch
 3 | 
 4 | from dynamic_rcnn.det_opr.matcher import Matcher
 5 | from dynamic_rcnn.datasets.structures.boxlist_ops import boxlist_iou
 6 | 
 7 | 
 8 | def anchor_target_opr(
 9 |         anchors, targets, box_coder, high_threshold, low_threshold,
10 |         allow_low_quality_matches=True):
11 |     """
12 |     Generate anchor targets for computing retinanet loss.
13 | 
14 |     Args:
15 |         anchors: (list[BoxList])
16 |         targets: (list[BoxList])
17 |         box_coder: (BoxCoder)
18 |         high_threshold: (float)
19 |         low_threshold: (float)
20 |     """
21 |     matcher = Matcher(high_threshold, low_threshold,
22 |                       allow_low_quality_matches=allow_low_quality_matches)
23 | 
24 |     # prepare targets
25 |     labels = []
26 |     regression_targets = []
27 |     for anchors_per_image, targets_per_image in zip(anchors, targets):
28 |         # match targets to anchors
29 |         match_quality_matrix = boxlist_iou(targets_per_image, anchors_per_image)
30 |         matched_idxs = matcher(match_quality_matrix)
31 |         targets_per_image = targets_per_image.copy_with_fields(['labels'])
32 |         matched_targets = targets_per_image[matched_idxs.clamp(min=0)]
33 |         matched_targets.add_field("matched_idxs", matched_idxs)
34 | 
35 |         matched_idxs = matched_targets.get_field("matched_idxs")
36 |         # generate rpn labels
37 |         labels_per_image = matched_targets.get_field("labels")
38 |         labels_per_image = labels_per_image.to(dtype=torch.float32)
39 | 
40 |         # Background (negative examples)
41 |         bg_indices = matched_idxs == Matcher.BELOW_LOW_THRESHOLD
42 |         labels_per_image[bg_indices] = 0
43 | 
44 |         # discard indices that are between thresholds
45 |         inds_to_discard = matched_idxs == Matcher.BETWEEN_THRESHOLDS
46 |         labels_per_image[inds_to_discard] = -1
47 | 
48 |         # compute regression targets
49 |         regression_targets_per_image = box_coder.encode(
50 |             matched_targets.bbox, anchors_per_image.bbox
51 |         )
52 | 
53 |         labels.append(labels_per_image)
54 |         regression_targets.append(regression_targets_per_image)
55 | 
56 |     return labels, regression_targets
57 | 


--------------------------------------------------------------------------------
/dynamic_rcnn/det_opr/rpn/retinanet/post_processing.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from dynamic_rcnn.utils.torch_utils import permute_and_flatten
  3 | from dynamic_rcnn.datasets.structures.bounding_box import BoxList
  4 | from dynamic_rcnn.datasets.structures.boxlist_ops import cat_boxlist, boxlist_nms, \
  5 |     remove_small_boxes
  6 | 
  7 | 
  8 | def post_processing_opr(
  9 |         retina_anchors, cls_logits, bbox_preds, box_coder, pre_nms_top_n,
 10 |         pre_nms_thresh, nms_thresh, box_min_size, fpn_post_nms_top_n,
 11 |         num_classes):
 12 |     """
 13 |     Compute the post-processed boxes and obtain the final results.
 14 | 
 15 |     Args:
 16 |         retina_anchors: (list[list[BoxList]])
 17 |         cls_logits: (list[tensor])
 18 |         bbox_preds: (list[tensor])
 19 |         box_coder: (BoxCoder)
 20 |         pre_nms_top_n: (int)
 21 |         pre_nms_thresh: (float)
 22 |         nms_thresh: (float)
 23 |         box_min_size: (int)
 24 |         fpn_post_nms_top_n: (int)
 25 |         num_classes: (int)
 26 |     """
 27 | 
 28 |     sampled_boxes = []
 29 |     num_levels = len(cls_logits)
 30 |     retina_anchors = list(zip(*retina_anchors))
 31 |     temp_pre_nms_top_n = pre_nms_top_n
 32 |     for anchors, box_cls, box_regression in zip(
 33 |             retina_anchors, cls_logits, bbox_preds):
 34 |         device = box_cls.device
 35 |         N, _, H, W = box_cls.shape
 36 |         A = box_regression.size(1) // 4
 37 |         C = box_cls.size(1) // A
 38 | 
 39 |         # put in the same format as anchors
 40 |         box_cls = permute_and_flatten(box_cls, N, A, C, H, W)
 41 |         box_cls = box_cls.sigmoid()
 42 | 
 43 |         box_regression = permute_and_flatten(box_regression, N, A, 4, H, W)
 44 |         box_regression = box_regression.reshape(N, -1, 4)
 45 | 
 46 |         num_anchors = A * H * W
 47 | 
 48 |         candidate_inds = box_cls > pre_nms_thresh
 49 | 
 50 |         pre_nms_top_n = candidate_inds.view(N, -1).sum(1)
 51 |         pre_nms_top_n = pre_nms_top_n.clamp(max=temp_pre_nms_top_n)
 52 | 
 53 |         results = []
 54 |         for per_box_cls, per_box_regression, per_pre_nms_top_n, \
 55 |             per_candidate_inds, per_anchors in zip(
 56 |             box_cls,
 57 |             box_regression,
 58 |             pre_nms_top_n,
 59 |             candidate_inds,
 60 |             anchors):
 61 |             # Sort and select TopN
 62 |             per_box_cls = per_box_cls[per_candidate_inds]
 63 | 
 64 |             per_box_cls, top_k_indices = \
 65 |                 per_box_cls.topk(per_pre_nms_top_n, sorted=False)
 66 | 
 67 |             per_candidate_nonzeros = \
 68 |                 per_candidate_inds.nonzero()[top_k_indices, :]
 69 | 
 70 |             per_box_loc = per_candidate_nonzeros[:, 0]
 71 |             per_class = per_candidate_nonzeros[:, 1]
 72 |             per_class += 1
 73 | 
 74 |             detections = box_coder.decode(
 75 |                 per_box_regression[per_box_loc, :].view(-1, 4),
 76 |                 per_anchors.bbox[per_box_loc, :].view(-1, 4)
 77 |             )
 78 | 
 79 |             boxlist = BoxList(detections, per_anchors.size, mode="xyxy")
 80 |             boxlist.add_field("labels", per_class)
 81 |             boxlist.add_field("scores", per_box_cls)
 82 |             boxlist = boxlist.clip_to_image(remove_empty=False)
 83 |             boxlist = remove_small_boxes(boxlist, box_min_size)
 84 |             results.append(boxlist)
 85 |         sampled_boxes.append(results)
 86 | 
 87 |     boxlists = list(zip(*sampled_boxes))
 88 |     boxlists = [cat_boxlist(boxlist) for boxlist in boxlists]
 89 | 
 90 |     # select over all levels
 91 |     if num_levels > 1:
 92 |         num_images = len(boxlists)
 93 |         results = []
 94 |         for i in range(num_images):
 95 |             scores = boxlists[i].get_field("scores")
 96 |             labels = boxlists[i].get_field("labels")
 97 |             boxes = boxlists[i].bbox
 98 |             boxlist = boxlists[i]
 99 |             result = []
100 |             # skip the background
101 |             for j in range(1, num_classes):
102 |                 inds = (labels == j).nonzero().view(-1)
103 | 
104 |                 scores_j = scores[inds]
105 |                 boxes_j = boxes[inds, :].view(-1, 4)
106 |                 boxlist_for_class = BoxList(boxes_j, boxlist.size, mode="xyxy")
107 |                 boxlist_for_class.add_field("scores", scores_j)
108 |                 boxlist_for_class = boxlist_nms(
109 |                     boxlist_for_class, nms_thresh,
110 |                     score_field="scores"
111 |                 )
112 |                 num_labels = len(boxlist_for_class)
113 |                 boxlist_for_class.add_field(
114 |                     "labels", torch.full((num_labels,), j,
115 |                                          dtype=torch.int64,
116 |                                          device=scores.device)
117 |                 )
118 |                 result.append(boxlist_for_class)
119 | 
120 |             result = cat_boxlist(result)
121 |             number_of_detections = len(result)
122 | 
123 |             # Limit to max_per_image detections **over all classes**
124 |             if number_of_detections > fpn_post_nms_top_n > 0:
125 |                 cls_scores = result.get_field("scores")
126 |                 image_thresh, _ = torch.kthvalue(
127 |                     cls_scores.cpu(),
128 |                     number_of_detections - fpn_post_nms_top_n + 1
129 |                 )
130 |                 keep = cls_scores >= image_thresh.item()
131 |                 keep = torch.nonzero(keep).squeeze(1)
132 |                 result = result[keep]
133 |             results.append(result)
134 |         return results
135 |     return boxlists
136 | 


--------------------------------------------------------------------------------
/dynamic_rcnn/det_opr/sampler.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import torch
 3 | 
 4 | 
 5 | class BalancedPositiveNegativeSampler(object):
 6 |     """
 7 |     This class samples batches, ensuring that they contain a fixed proportion of positives
 8 |     """
 9 | 
10 |     def __init__(self, batch_size_per_image, positive_fraction):
11 |         """
12 |         Arguments:
13 |             batch_size_per_image (int): number of elements to be selected per image
14 |             positive_fraction (float): percentage of positive elements per batch
15 |         """
16 |         self.batch_size_per_image = batch_size_per_image
17 |         self.positive_fraction = positive_fraction
18 | 
19 |     def __call__(self, matched_idxs):
20 |         """
21 |         Arguments:
22 |             matched idxs: list of tensors containing -1, 0 or positive values.
23 |                 Each tensor corresponds to a specific image.
24 |                 -1 values are ignored, 0 are considered as negatives and > 0 as
25 |                 positives.
26 | 
27 |         Returns:
28 |             pos_idx (list[tensor])
29 |             neg_idx (list[tensor])
30 | 
31 |         Returns two lists of binary masks for each image.
32 |         The first list contains the positive elements that were selected,
33 |         and the second list the negative example.
34 |         """
35 |         pos_idx = []
36 |         neg_idx = []
37 |         for matched_idxs_per_image in matched_idxs:
38 |             positive = torch.nonzero(matched_idxs_per_image >= 1).squeeze(1)
39 |             negative = torch.nonzero(matched_idxs_per_image == 0).squeeze(1)
40 | 
41 |             num_pos = int(self.batch_size_per_image * self.positive_fraction)
42 |             # protect against not enough positive examples
43 |             num_pos = min(positive.numel(), num_pos)
44 |             num_neg = self.batch_size_per_image - num_pos
45 |             # protect against not enough negative examples
46 |             num_neg = min(negative.numel(), num_neg)
47 | 
48 |             # randomly select positive and negative examples
49 |             perm1 = torch.randperm(positive.numel(), device=positive.device)[:num_pos]
50 |             perm2 = torch.randperm(negative.numel(), device=negative.device)[:num_neg]
51 | 
52 |             pos_idx_per_image = positive[perm1]
53 |             neg_idx_per_image = negative[perm2]
54 | 
55 |             # create binary mask from indices
56 |             pos_idx_per_image_mask = torch.zeros_like(
57 |                 matched_idxs_per_image, dtype=torch.uint8
58 |             )
59 |             neg_idx_per_image_mask = torch.zeros_like(
60 |                 matched_idxs_per_image, dtype=torch.uint8
61 |             )
62 |             pos_idx_per_image_mask[pos_idx_per_image] = 1
63 |             neg_idx_per_image_mask[neg_idx_per_image] = 1
64 | 
65 |             pos_idx.append(pos_idx_per_image_mask)
66 |             neg_idx.append(neg_idx_per_image_mask)
67 | 
68 |         return pos_idx, neg_idx
69 | 


--------------------------------------------------------------------------------
/dynamic_rcnn/engine/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | 


--------------------------------------------------------------------------------
/dynamic_rcnn/engine/bbox_aug.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torchvision.transforms as TT
  3 | 
  4 | from dynamic_rcnn.datasets import transforms as T
  5 | from dynamic_rcnn.datasets.structures.image_list import to_image_list
  6 | from dynamic_rcnn.datasets.structures.bounding_box import BoxList
  7 | from dynamic_rcnn.det_opr.rcnn.post_processing import filter_results
  8 | 
  9 | 
 10 | def im_detect_bbox_aug(cfg, model, images, device):
 11 |     # Collect detections computed under different transformations
 12 |     boxlists_ts = []
 13 |     for _ in range(len(images)):
 14 |         boxlists_ts.append([])
 15 | 
 16 |     def add_preds_t(boxlists_t):
 17 |         for i, boxlist_t in enumerate(boxlists_t):
 18 |             if len(boxlists_ts[i]) == 0:
 19 |                 # The first one is identity transform, no need to resize the boxlist
 20 |                 boxlists_ts[i].append(boxlist_t)
 21 |             else:
 22 |                 # Resize the boxlist as the first one
 23 |                 boxlists_ts[i].append(boxlist_t.resize(boxlists_ts[i][0].size))
 24 | 
 25 |     # Compute detections for the original image (identity transform)
 26 |     boxlists_i = im_detect_bbox(cfg, model, images, cfg.INPUT.MIN_SIZE_TEST,
 27 |                                 cfg.INPUT.MAX_SIZE_TEST, device)
 28 |     add_preds_t(boxlists_i)
 29 | 
 30 |     # Perform detection on the horizontally flipped image
 31 |     if cfg.TEST.BBOX_AUG.H_FLIP:
 32 |         boxlists_hf = im_detect_bbox_hflip(
 33 |             cfg, model, images, cfg.INPUT.MIN_SIZE_TEST,
 34 |             cfg.INPUT.MAX_SIZE_TEST, device)
 35 |         add_preds_t(boxlists_hf)
 36 | 
 37 |     # Compute detections at different scales
 38 |     for scale in cfg.TEST.BBOX_AUG.SCALES:
 39 |         max_size = cfg.TEST.BBOX_AUG.MAX_SIZE
 40 |         boxlists_scl = im_detect_bbox_scale(
 41 |             cfg, model, images, scale, max_size, device)
 42 |         add_preds_t(boxlists_scl)
 43 | 
 44 |         if cfg.TEST.BBOX_AUG.SCALE_H_FLIP:
 45 |             boxlists_scl_hf = im_detect_bbox_scale(
 46 |                 cfg, model, images, scale, max_size, device, hflip=True)
 47 |             add_preds_t(boxlists_scl_hf)
 48 | 
 49 |     # Merge boxlists detected by different bbox aug params
 50 |     boxlists = []
 51 |     for i, boxlist_ts in enumerate(boxlists_ts):
 52 |         bbox = torch.cat([boxlist_t.bbox for boxlist_t in boxlist_ts])
 53 |         scores = torch.cat(
 54 |             [boxlist_t.get_field('scores') for boxlist_t in boxlist_ts])
 55 |         boxlist = BoxList(bbox, boxlist_ts[0].size, boxlist_ts[0].mode)
 56 |         boxlist.add_field('scores', scores)
 57 |         boxlists.append(boxlist)
 58 | 
 59 |     # Apply NMS and limit the final detections
 60 |     results = []
 61 |     for boxlist in boxlists:
 62 |         results.append(filter_results(
 63 |             boxlist, cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES,
 64 |             cfg.MODEL.ROI_HEADS.SCORE_THRESH, cfg.MODEL.ROI_HEADS.NMS,
 65 |             cfg.MODEL.ROI_HEADS.DETECTIONS_PER_IMG))
 66 | 
 67 |     return results
 68 | 
 69 | 
 70 | def im_detect_bbox(cfg, model, images, target_scale, target_max_size, device):
 71 |     """
 72 |     Performs bbox detection on the original image.
 73 |     """
 74 |     transform = TT.Compose([
 75 |         T.Resize(target_scale, target_max_size),
 76 |         TT.ToTensor(),
 77 |         T.Normalize(
 78 |             mean=cfg.INPUT.PIXEL_MEAN, std=cfg.INPUT.PIXEL_STD,
 79 |             to_bgr255=cfg.INPUT.TO_BGR255
 80 |         )
 81 |     ])
 82 |     images = [transform(image) for image in images]
 83 |     images = to_image_list(images, cfg.DATALOADER.SIZE_DIVISIBILITY)
 84 |     return model(images.to(device))
 85 | 
 86 | 
 87 | def im_detect_bbox_hflip(
 88 |         cfg, model, images, target_scale, target_max_size, device):
 89 |     """
 90 |     Performs bbox detection on the horizontally flipped image.
 91 |     Function signature is the same as for im_detect_bbox.
 92 |     """
 93 |     transform = TT.Compose([
 94 |         T.Resize(target_scale, target_max_size),
 95 |         TT.RandomHorizontalFlip(1.0),
 96 |         TT.ToTensor(),
 97 |         T.Normalize(
 98 |             mean=cfg.INPUT.PIXEL_MEAN, std=cfg.INPUT.PIXEL_STD, to_bgr255=cfg.INPUT.TO_BGR255
 99 |         )
100 |     ])
101 |     images = [transform(image) for image in images]
102 |     images = to_image_list(images, cfg.DATALOADER.SIZE_DIVISIBILITY)
103 |     boxlists = model(images.to(device))
104 | 
105 |     # Invert the detections computed on the flipped image
106 |     boxlists_inv = [boxlist.transpose(0) for boxlist in boxlists]
107 |     return boxlists_inv
108 | 
109 | 
110 | def im_detect_bbox_scale(
111 |         cfg, model, images, target_scale, target_max_size, device, hflip=False):
112 |     """
113 |     Computes bbox detections at the given scale.
114 |     Returns predictions in the scaled image space.
115 |     """
116 |     if hflip:
117 |         boxlists_scl = im_detect_bbox_hflip(
118 |             cfg, model, images, target_scale, target_max_size, device)
119 |     else:
120 |         boxlists_scl = im_detect_bbox(
121 |             cfg, model, images, target_scale, target_max_size, device)
122 |     return boxlists_scl
123 | 


--------------------------------------------------------------------------------
/dynamic_rcnn/engine/comm.py:
--------------------------------------------------------------------------------
  1 | """
  2 | This file contains primitives for multi-gpu communication.
  3 | This is useful when doing distributed training.
  4 | """
  5 | 
  6 | import pickle
  7 | import time
  8 | 
  9 | import torch
 10 | import torch.distributed as dist
 11 | 
 12 | 
 13 | def get_world_size():
 14 |     if not dist.is_available():
 15 |         return 1
 16 |     if not dist.is_initialized():
 17 |         return 1
 18 |     return dist.get_world_size()
 19 | 
 20 | 
 21 | def get_rank():
 22 |     if not dist.is_available():
 23 |         return 0
 24 |     if not dist.is_initialized():
 25 |         return 0
 26 |     return dist.get_rank()
 27 | 
 28 | 
 29 | def is_main_process():
 30 |     return get_rank() == 0
 31 | 
 32 | 
 33 | def synchronize():
 34 |     """
 35 |     Helper function to synchronize (barrier) among all processes when
 36 |     using distributed training
 37 |     """
 38 |     if not dist.is_available():
 39 |         return
 40 |     if not dist.is_initialized():
 41 |         return
 42 |     world_size = dist.get_world_size()
 43 |     if world_size == 1:
 44 |         return
 45 |     dist.barrier()
 46 | 
 47 | 
 48 | def all_gather(data):
 49 |     """
 50 |     Run all_gather on arbitrary picklable data (not necessarily tensors)
 51 |     Args:
 52 |         data: any picklable object
 53 |     Returns:
 54 |         list[data]: list of data gathered from each rank
 55 |     """
 56 |     world_size = get_world_size()
 57 |     if world_size == 1:
 58 |         return [data]
 59 | 
 60 |     # serialized to a Tensor
 61 |     buffer = pickle.dumps(data)
 62 |     storage = torch.ByteStorage.from_buffer(buffer)
 63 |     tensor = torch.ByteTensor(storage).to("cuda")
 64 | 
 65 |     # obtain Tensor size of each rank
 66 |     local_size = torch.LongTensor([tensor.numel()]).to("cuda")
 67 |     size_list = [torch.LongTensor([0]).to("cuda") for _ in range(world_size)]
 68 |     dist.all_gather(size_list, local_size)
 69 |     size_list = [int(size.item()) for size in size_list]
 70 |     max_size = max(size_list)
 71 | 
 72 |     # receiving Tensor from all ranks
 73 |     # we pad the tensor because torch all_gather does not support
 74 |     # gathering tensors of different shapes
 75 |     tensor_list = []
 76 |     for _ in size_list:
 77 |         tensor_list.append(torch.ByteTensor(size=(max_size,)).to("cuda"))
 78 |     if local_size != max_size:
 79 |         padding = torch.ByteTensor(size=(max_size - local_size,)).to("cuda")
 80 |         tensor = torch.cat((tensor, padding), dim=0)
 81 |     dist.all_gather(tensor_list, tensor)
 82 | 
 83 |     data_list = []
 84 |     for size, tensor in zip(size_list, tensor_list):
 85 |         buffer = tensor.cpu().numpy().tobytes()[:size]
 86 |         data_list.append(pickle.loads(buffer))
 87 | 
 88 |     return data_list
 89 | 
 90 | 
 91 | def reduce_dict(input_dict, average=True):
 92 |     """
 93 |     Args:
 94 |         input_dict (dict): all the values will be reduced
 95 |         average (bool): whether to do average or sum
 96 |     Reduce the values in the dictionary from all processes so that process with rank
 97 |     0 has the averaged results. Returns a dict with the same fields as
 98 |     input_dict, after reduction.
 99 |     """
100 |     world_size = get_world_size()
101 |     if world_size < 2:
102 |         return input_dict
103 |     with torch.no_grad():
104 |         names = []
105 |         values = []
106 |         # sort the keys so that they are consistent across processes
107 |         for k in sorted(input_dict.keys()):
108 |             names.append(k)
109 |             values.append(input_dict[k])
110 |         values = torch.stack(values, dim=0)
111 |         dist.reduce(values, dst=0)
112 |         if dist.get_rank() == 0 and average:
113 |             # only main process gets accumulated, so only divide by
114 |             # world_size in this case
115 |             values /= world_size
116 |         reduced_dict = {k: v for k, v in zip(names, values)}
117 |     return reduced_dict
118 | 


--------------------------------------------------------------------------------
/dynamic_rcnn/engine/lr_scheduler.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | from bisect import bisect_right
 3 | 
 4 | import torch
 5 | 
 6 | 
 7 | # FIXME ideally this would be achieved with a CombinedLRScheduler,
 8 | # separating MultiStepLR with WarmupLR
 9 | # but the current LRScheduler design doesn't allow it
10 | class WarmupMultiStepLR(torch.optim.lr_scheduler._LRScheduler):
11 |     def __init__(
12 |         self,
13 |         optimizer,
14 |         milestones,
15 |         gamma=0.1,
16 |         warmup_factor=1.0 / 3,
17 |         warmup_iters=500,
18 |         warmup_method="linear",
19 |         last_epoch=-1,
20 |     ):
21 |         if not list(milestones) == sorted(milestones):
22 |             raise ValueError(
23 |                 "Milestones should be a list of" " increasing integers. Got {}",
24 |                 milestones,
25 |             )
26 | 
27 |         if warmup_method not in ("constant", "linear"):
28 |             raise ValueError(
29 |                 "Only 'constant' or 'linear' warmup_method accepted"
30 |                 "got {}".format(warmup_method)
31 |             )
32 |         self.milestones = milestones
33 |         self.gamma = gamma
34 |         self.warmup_factor = warmup_factor
35 |         self.warmup_iters = warmup_iters
36 |         self.warmup_method = warmup_method
37 |         super(WarmupMultiStepLR, self).__init__(optimizer, last_epoch)
38 | 
39 |     def get_lr(self):
40 |         warmup_factor = 1
41 |         if self.last_epoch < self.warmup_iters:
42 |             if self.warmup_method == "constant":
43 |                 warmup_factor = self.warmup_factor
44 |             elif self.warmup_method == "linear":
45 |                 alpha = float(self.last_epoch) / self.warmup_iters
46 |                 warmup_factor = self.warmup_factor * (1 - alpha) + alpha
47 |         return [
48 |             base_lr
49 |             * warmup_factor
50 |             * self.gamma ** bisect_right(self.milestones, self.last_epoch)
51 |             for base_lr in self.base_lrs
52 |         ]
53 | 


--------------------------------------------------------------------------------
/dynamic_rcnn/kernels/ROIAlign.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #pragma once
 3 | 
 4 | #include "cpu/vision.h"
 5 | 
 6 | #ifdef WITH_CUDA
 7 | #include "cuda/vision.h"
 8 | #endif
 9 | 
10 | // Interface for Python
11 | at::Tensor ROIAlign_forward(const at::Tensor& input,
12 |                             const at::Tensor& rois,
13 |                             const float spatial_scale,
14 |                             const int pooled_height,
15 |                             const int pooled_width,
16 |                             const int sampling_ratio) {
17 |   if (input.type().is_cuda()) {
18 | #ifdef WITH_CUDA
19 |     return ROIAlign_forward_cuda(input, rois, spatial_scale, pooled_height, pooled_width, sampling_ratio);
20 | #else
21 |     AT_ERROR("Not compiled with GPU support");
22 | #endif
23 |   }
24 |   return ROIAlign_forward_cpu(input, rois, spatial_scale, pooled_height, pooled_width, sampling_ratio);
25 | }
26 | 
27 | at::Tensor ROIAlign_backward(const at::Tensor& grad,
28 |                              const at::Tensor& rois,
29 |                              const float spatial_scale,
30 |                              const int pooled_height,
31 |                              const int pooled_width,
32 |                              const int batch_size,
33 |                              const int channels,
34 |                              const int height,
35 |                              const int width,
36 |                              const int sampling_ratio) {
37 |   if (grad.type().is_cuda()) {
38 | #ifdef WITH_CUDA
39 |     return ROIAlign_backward_cuda(grad, rois, spatial_scale, pooled_height, pooled_width, batch_size, channels, height, width, sampling_ratio);
40 | #else
41 |     AT_ERROR("Not compiled with GPU support");
42 | #endif
43 |   }
44 |   AT_ERROR("Not implemented on the CPU");
45 | }
46 | 
47 | 


--------------------------------------------------------------------------------
/dynamic_rcnn/kernels/ROIPool.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #pragma once
 3 | 
 4 | #include "cpu/vision.h"
 5 | 
 6 | #ifdef WITH_CUDA
 7 | #include "cuda/vision.h"
 8 | #endif
 9 | 
10 | 
11 | std::tuple<at::Tensor, at::Tensor> ROIPool_forward(const at::Tensor& input,
12 |                                 const at::Tensor& rois,
13 |                                 const float spatial_scale,
14 |                                 const int pooled_height,
15 |                                 const int pooled_width) {
16 |   if (input.type().is_cuda()) {
17 | #ifdef WITH_CUDA
18 |     return ROIPool_forward_cuda(input, rois, spatial_scale, pooled_height, pooled_width);
19 | #else
20 |     AT_ERROR("Not compiled with GPU support");
21 | #endif
22 |   }
23 |   AT_ERROR("Not implemented on the CPU");
24 | }
25 | 
26 | at::Tensor ROIPool_backward(const at::Tensor& grad,
27 |                                  const at::Tensor& input,
28 |                                  const at::Tensor& rois,
29 |                                  const at::Tensor& argmax,
30 |                                  const float spatial_scale,
31 |                                  const int pooled_height,
32 |                                  const int pooled_width,
33 |                                  const int batch_size,
34 |                                  const int channels,
35 |                                  const int height,
36 |                                  const int width) {
37 |   if (grad.type().is_cuda()) {
38 | #ifdef WITH_CUDA
39 |     return ROIPool_backward_cuda(grad, input, rois, argmax, spatial_scale, pooled_height, pooled_width, batch_size, channels, height, width);
40 | #else
41 |     AT_ERROR("Not compiled with GPU support");
42 | #endif
43 |   }
44 |   AT_ERROR("Not implemented on the CPU");
45 | }
46 | 
47 | 
48 | 
49 | 


--------------------------------------------------------------------------------
/dynamic_rcnn/kernels/SigmoidFocalLoss.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "cpu/vision.h"
 4 | 
 5 | #ifdef WITH_CUDA
 6 | #include "cuda/vision.h"
 7 | #endif
 8 | 
 9 | // Interface for Python
10 | at::Tensor SigmoidFocalLoss_forward(
11 | 		const at::Tensor& logits,
12 |                 const at::Tensor& targets,
13 | 		const int num_classes, 
14 | 		const float gamma, 
15 | 		const float alpha) {
16 |   if (logits.type().is_cuda()) {
17 | #ifdef WITH_CUDA
18 |     return SigmoidFocalLoss_forward_cuda(logits, targets, num_classes, gamma, alpha);
19 | #else
20 |     AT_ERROR("Not compiled with GPU support");
21 | #endif
22 |   }
23 |   AT_ERROR("Not implemented on the CPU");
24 | }
25 | 
26 | at::Tensor SigmoidFocalLoss_backward(
27 | 			     const at::Tensor& logits,
28 |                              const at::Tensor& targets,
29 | 			     const at::Tensor& d_losses,
30 | 			     const int num_classes,
31 | 			     const float gamma,
32 | 			     const float alpha) {
33 |   if (logits.type().is_cuda()) {
34 | #ifdef WITH_CUDA
35 |     return SigmoidFocalLoss_backward_cuda(logits, targets, d_losses, num_classes, gamma, alpha);
36 | #else
37 |     AT_ERROR("Not compiled with GPU support");
38 | #endif
39 |   }
40 |   AT_ERROR("Not implemented on the CPU");
41 | }
42 | 


--------------------------------------------------------------------------------
/dynamic_rcnn/kernels/cpu/nms_cpu.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #include "cpu/vision.h"
 3 | 
 4 | 
 5 | template <typename scalar_t>
 6 | at::Tensor nms_cpu_kernel(const at::Tensor& dets,
 7 |                           const at::Tensor& scores,
 8 |                           const float threshold) {
 9 |   AT_ASSERTM(!dets.type().is_cuda(), "dets must be a CPU tensor");
10 |   AT_ASSERTM(!scores.type().is_cuda(), "scores must be a CPU tensor");
11 |   AT_ASSERTM(dets.type() == scores.type(), "dets should have the same type as scores");
12 | 
13 |   if (dets.numel() == 0) {
14 |     return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU));
15 |   }
16 | 
17 |   auto x1_t = dets.select(1, 0).contiguous();
18 |   auto y1_t = dets.select(1, 1).contiguous();
19 |   auto x2_t = dets.select(1, 2).contiguous();
20 |   auto y2_t = dets.select(1, 3).contiguous();
21 | 
22 |   at::Tensor areas_t = (x2_t - x1_t + 1) * (y2_t - y1_t + 1);
23 | 
24 |   auto order_t = std::get<1>(scores.sort(0, /* descending=*/true));
25 | 
26 |   auto ndets = dets.size(0);
27 |   at::Tensor suppressed_t = at::zeros({ndets}, dets.options().dtype(at::kByte).device(at::kCPU));
28 | 
29 |   auto suppressed = suppressed_t.data<uint8_t>();
30 |   auto order = order_t.data<int64_t>();
31 |   auto x1 = x1_t.data<scalar_t>();
32 |   auto y1 = y1_t.data<scalar_t>();
33 |   auto x2 = x2_t.data<scalar_t>();
34 |   auto y2 = y2_t.data<scalar_t>();
35 |   auto areas = areas_t.data<scalar_t>();
36 | 
37 |   for (int64_t _i = 0; _i < ndets; _i++) {
38 |     auto i = order[_i];
39 |     if (suppressed[i] == 1)
40 |       continue;
41 |     auto ix1 = x1[i];
42 |     auto iy1 = y1[i];
43 |     auto ix2 = x2[i];
44 |     auto iy2 = y2[i];
45 |     auto iarea = areas[i];
46 | 
47 |     for (int64_t _j = _i + 1; _j < ndets; _j++) {
48 |       auto j = order[_j];
49 |       if (suppressed[j] == 1)
50 |         continue;
51 |       auto xx1 = std::max(ix1, x1[j]);
52 |       auto yy1 = std::max(iy1, y1[j]);
53 |       auto xx2 = std::min(ix2, x2[j]);
54 |       auto yy2 = std::min(iy2, y2[j]);
55 | 
56 |       auto w = std::max(static_cast<scalar_t>(0), xx2 - xx1 + 1);
57 |       auto h = std::max(static_cast<scalar_t>(0), yy2 - yy1 + 1);
58 |       auto inter = w * h;
59 |       auto ovr = inter / (iarea + areas[j] - inter);
60 |       if (ovr >= threshold)
61 |         suppressed[j] = 1;
62 |    }
63 |   }
64 |   return at::nonzero(suppressed_t == 0).squeeze(1);
65 | }
66 | 
67 | at::Tensor nms_cpu(const at::Tensor& dets,
68 |                const at::Tensor& scores,
69 |                const float threshold) {
70 |   at::Tensor result;
71 |   AT_DISPATCH_FLOATING_TYPES(dets.type(), "nms", [&] {
72 |     result = nms_cpu_kernel<scalar_t>(dets, scores, threshold);
73 |   });
74 |   return result;
75 | }
76 | 


--------------------------------------------------------------------------------
/dynamic_rcnn/kernels/cpu/vision.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #pragma once
 3 | #include <torch/extension.h>
 4 | 
 5 | 
 6 | at::Tensor ROIAlign_forward_cpu(const at::Tensor& input,
 7 |                                 const at::Tensor& rois,
 8 |                                 const float spatial_scale,
 9 |                                 const int pooled_height,
10 |                                 const int pooled_width,
11 |                                 const int sampling_ratio);
12 | 
13 | 
14 | at::Tensor nms_cpu(const at::Tensor& dets,
15 |                    const at::Tensor& scores,
16 |                    const float threshold);
17 | 


--------------------------------------------------------------------------------
/dynamic_rcnn/kernels/cuda/SigmoidFocalLoss_cuda.cu:
--------------------------------------------------------------------------------
  1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
  2 | // This file is modified from  https://github.com/pytorch/pytorch/blob/master/modules/detectron/sigmoid_focal_loss_op.cu
  3 | // Cheng-Yang Fu
  4 | // cyfu@cs.unc.edu
  5 | #include <ATen/ATen.h>
  6 | #include <ATen/cuda/CUDAContext.h>
  7 | 
  8 | #include <THC/THC.h>
  9 | #include <THC/THCAtomics.cuh>
 10 | #include <THC/THCDeviceUtils.cuh>
 11 | 
 12 | #include <cfloat>
 13 | 
 14 | // TODO make it in a common file
 15 | #define CUDA_1D_KERNEL_LOOP(i, n)                            \
 16 |   for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; \
 17 |        i += blockDim.x * gridDim.x)
 18 | 
 19 | 
 20 | template <typename T>
 21 | __global__ void SigmoidFocalLossForward(const int nthreads, 
 22 |     const T* logits,
 23 |     const int* targets,
 24 |     const int num_classes,
 25 |     const float gamma, 
 26 |     const float alpha,
 27 |     const int num, 
 28 |     T* losses) {
 29 |   CUDA_1D_KERNEL_LOOP(i, nthreads) {
 30 | 
 31 |     int n = i / num_classes;
 32 |     int d = i % num_classes; // current class[0~79]; 
 33 |     int t = targets[n]; // target class [1~80];
 34 | 
 35 |     // Decide it is positive or negative case. 
 36 |     T c1 = (t == (d+1)); 
 37 |     T c2 = (t>=0 & t != (d+1));
 38 | 
 39 |     T zn = (1.0 - alpha);
 40 |     T zp = (alpha);
 41 | 
 42 |     // p = 1. / 1. + expf(-x); p = sigmoid(x)
 43 |     T  p = 1. / (1. + expf(-logits[i]));
 44 | 
 45 |     // (1-p)**gamma * log(p) where
 46 |     T term1 = powf((1. - p), gamma) * logf(max(p, FLT_MIN));
 47 | 
 48 |     // p**gamma * log(1-p)
 49 |     T term2 = powf(p, gamma) *
 50 |             (-1. * logits[i] * (logits[i] >= 0) -   
 51 |              logf(1. + expf(logits[i] - 2. * logits[i] * (logits[i] >= 0))));
 52 | 
 53 |     losses[i] = 0.0;
 54 |     losses[i] += -c1 * term1 * zp;
 55 |     losses[i] += -c2 * term2 * zn;
 56 | 
 57 |   } // CUDA_1D_KERNEL_LOOP
 58 | } // SigmoidFocalLossForward
 59 | 
 60 | 
 61 | template <typename T>
 62 | __global__ void SigmoidFocalLossBackward(const int nthreads,
 63 |                 const T* logits,
 64 |                 const int* targets,
 65 |                 const T* d_losses,
 66 |                 const int num_classes,
 67 |                 const float gamma,
 68 |                 const float alpha,
 69 |                 const int num,
 70 |                 T* d_logits) {
 71 |   CUDA_1D_KERNEL_LOOP(i, nthreads) {
 72 | 
 73 |     int n = i / num_classes;
 74 |     int d = i % num_classes; // current class[0~79]; 
 75 |     int t = targets[n]; // target class [1~80], 0 is background;
 76 | 
 77 |     // Decide it is positive or negative case. 
 78 |     T c1 = (t == (d+1));
 79 |     T c2 = (t>=0 & t != (d+1));
 80 | 
 81 |     T zn = (1.0 - alpha);
 82 |     T zp = (alpha);
 83 |     // p = 1. / 1. + expf(-x); p = sigmoid(x)
 84 |     T  p = 1. / (1. + expf(-logits[i]));
 85 | 
 86 |     // (1-p)**g * (1 - p - g*p*log(p)
 87 |     T term1 = powf((1. - p), gamma) *
 88 |                       (1. - p - (p * gamma * logf(max(p, FLT_MIN))));
 89 | 
 90 |     // (p**g) * (g*(1-p)*log(1-p) - p)
 91 |     T term2 = powf(p, gamma) *
 92 |                   ((-1. * logits[i] * (logits[i] >= 0) -
 93 |                       logf(1. + expf(logits[i] - 2. * logits[i] * (logits[i] >= 0)))) *
 94 |                       (1. - p) * gamma - p);
 95 |     d_logits[i] = 0.0;
 96 |     d_logits[i] += -c1 * term1 * zp;
 97 |     d_logits[i] += -c2 * term2 * zn;
 98 |     d_logits[i] = d_logits[i] * d_losses[i];
 99 | 
100 |   } // CUDA_1D_KERNEL_LOOP
101 | } // SigmoidFocalLossBackward
102 | 
103 | 
104 | at::Tensor SigmoidFocalLoss_forward_cuda(
105 | 		const at::Tensor& logits,
106 |                 const at::Tensor& targets,
107 | 		const int num_classes, 
108 | 		const float gamma, 
109 | 		const float alpha) {
110 |   AT_ASSERTM(logits.type().is_cuda(), "logits must be a CUDA tensor");
111 |   AT_ASSERTM(targets.type().is_cuda(), "targets must be a CUDA tensor");
112 |   AT_ASSERTM(logits.dim() == 2, "logits should be NxClass");
113 | 
114 |   const int num_samples = logits.size(0);
115 | 	
116 |   auto losses = at::empty({num_samples, logits.size(1)}, logits.options());
117 |   auto losses_size = num_samples * logits.size(1);
118 |   cudaStream_t stream = at::cuda::getCurrentCUDAStream();
119 | 
120 |   dim3 grid(std::min(THCCeilDiv((long)losses_size, 512L), 4096L));
121 |   
122 |   dim3 block(512);
123 | 
124 |   if (losses.numel() == 0) {
125 |     THCudaCheck(cudaGetLastError());
126 |     return losses;
127 |   }
128 | 
129 |   AT_DISPATCH_FLOATING_TYPES(logits.type(), "SigmoidFocalLoss_forward", [&] {
130 |     SigmoidFocalLossForward<scalar_t><<<grid, block, 0, stream>>>(
131 |          losses_size,
132 |          logits.contiguous().data<scalar_t>(),
133 | 	 targets.contiguous().data<int>(),
134 |          num_classes,
135 | 	 gamma,
136 | 	 alpha,
137 | 	 num_samples,
138 |          losses.data<scalar_t>());
139 |   });
140 |   THCudaCheck(cudaGetLastError());
141 |   return losses;   
142 | }	
143 | 
144 | 
145 | at::Tensor SigmoidFocalLoss_backward_cuda(
146 | 		const at::Tensor& logits,
147 |                 const at::Tensor& targets,
148 | 		const at::Tensor& d_losses,
149 | 		const int num_classes, 
150 | 		const float gamma, 
151 | 		const float alpha) {
152 |   AT_ASSERTM(logits.type().is_cuda(), "logits must be a CUDA tensor");
153 |   AT_ASSERTM(targets.type().is_cuda(), "targets must be a CUDA tensor");
154 |   AT_ASSERTM(d_losses.type().is_cuda(), "d_losses must be a CUDA tensor");
155 | 
156 |   AT_ASSERTM(logits.dim() == 2, "logits should be NxClass");
157 | 
158 |   const int num_samples = logits.size(0);
159 |   AT_ASSERTM(logits.size(1) == num_classes, "logits.size(1) should be num_classes");
160 | 	
161 |   auto d_logits = at::zeros({num_samples, num_classes}, logits.options());
162 |   auto d_logits_size = num_samples * logits.size(1);
163 |   cudaStream_t stream = at::cuda::getCurrentCUDAStream();
164 | 
165 |   dim3 grid(std::min(THCCeilDiv((long)d_logits_size, 512L), 4096L));
166 |   dim3 block(512);
167 | 
168 |   if (d_logits.numel() == 0) {
169 |     THCudaCheck(cudaGetLastError());
170 |     return d_logits;
171 |   }
172 | 
173 |   AT_DISPATCH_FLOATING_TYPES(logits.type(), "SigmoidFocalLoss_backward", [&] {
174 |     SigmoidFocalLossBackward<scalar_t><<<grid, block, 0, stream>>>(
175 |          d_logits_size,
176 |          logits.contiguous().data<scalar_t>(),
177 | 	 targets.contiguous().data<int>(),
178 | 	 d_losses.contiguous().data<scalar_t>(),
179 |          num_classes,
180 | 	 gamma,
181 | 	 alpha,
182 | 	 num_samples,
183 |          d_logits.data<scalar_t>());
184 |   });
185 | 
186 |   THCudaCheck(cudaGetLastError());
187 |   return d_logits;   
188 | }	
189 | 
190 | 


--------------------------------------------------------------------------------
/dynamic_rcnn/kernels/cuda/deform_pool_cuda.cu:
--------------------------------------------------------------------------------
 1 | // modify from
 2 | // https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/blob/mmdetection/mmdet/ops/dcn/src/modulated_dcn_cuda.c
 3 | 
 4 | // based on
 5 | // author: Charles Shang
 6 | // https://github.com/torch/cunn/blob/master/lib/THCUNN/generic/SpatialConvolutionMM.cu
 7 | 
 8 | #include <ATen/ATen.h>
 9 | #include <ATen/cuda/CUDAContext.h>
10 | 
11 | #include <THC/THC.h>
12 | #include <THC/THCDeviceUtils.cuh>
13 | 
14 | #include <vector>
15 | #include <iostream>
16 | #include <cmath>
17 | 
18 | 
19 | void DeformablePSROIPoolForward(
20 |     const at::Tensor data, const at::Tensor bbox, const at::Tensor trans,
21 |     at::Tensor out, at::Tensor top_count, const int batch, const int channels,
22 |     const int height, const int width, const int num_bbox,
23 |     const int channels_trans, const int no_trans, const float spatial_scale,
24 |     const int output_dim, const int group_size, const int pooled_size,
25 |     const int part_size, const int sample_per_part, const float trans_std);
26 | 
27 | void DeformablePSROIPoolBackwardAcc(
28 |     const at::Tensor out_grad, const at::Tensor data, const at::Tensor bbox,
29 |     const at::Tensor trans, const at::Tensor top_count, at::Tensor in_grad,
30 |     at::Tensor trans_grad, const int batch, const int channels,
31 |     const int height, const int width, const int num_bbox,
32 |     const int channels_trans, const int no_trans, const float spatial_scale,
33 |     const int output_dim, const int group_size, const int pooled_size,
34 |     const int part_size, const int sample_per_part, const float trans_std);
35 | 
36 | void deform_psroi_pooling_cuda_forward(
37 |     at::Tensor input, at::Tensor bbox, at::Tensor trans, at::Tensor out,
38 |     at::Tensor top_count, const int no_trans, const float spatial_scale,
39 |     const int output_dim, const int group_size, const int pooled_size,
40 |     const int part_size, const int sample_per_part, const float trans_std) 
41 | {
42 |   AT_CHECK(input.is_contiguous(), "input tensor has to be contiguous");
43 | 
44 |   const int batch = input.size(0);
45 |   const int channels = input.size(1);
46 |   const int height = input.size(2);
47 |   const int width = input.size(3);
48 |   const int channels_trans = no_trans ? 2 : trans.size(1);
49 | 
50 |   const int num_bbox = bbox.size(0);
51 |   if (num_bbox != out.size(0))
52 |     AT_ERROR("Output shape and bbox number wont match: (%d vs %d).",
53 |              out.size(0), num_bbox);
54 | 
55 |   DeformablePSROIPoolForward(
56 |       input, bbox, trans, out, top_count, batch, channels, height, width,
57 |       num_bbox, channels_trans, no_trans, spatial_scale, output_dim, group_size,
58 |       pooled_size, part_size, sample_per_part, trans_std);
59 | }
60 | 
61 | void deform_psroi_pooling_cuda_backward(
62 |     at::Tensor out_grad, at::Tensor input, at::Tensor bbox, at::Tensor trans,
63 |     at::Tensor top_count, at::Tensor input_grad, at::Tensor trans_grad,
64 |     const int no_trans, const float spatial_scale, const int output_dim,
65 |     const int group_size, const int pooled_size, const int part_size,
66 |     const int sample_per_part, const float trans_std) 
67 | {
68 |   AT_CHECK(out_grad.is_contiguous(), "out_grad tensor has to be contiguous");
69 |   AT_CHECK(input.is_contiguous(), "input tensor has to be contiguous");
70 | 
71 |   const int batch = input.size(0);
72 |   const int channels = input.size(1);
73 |   const int height = input.size(2);
74 |   const int width = input.size(3);
75 |   const int channels_trans = no_trans ? 2 : trans.size(1);
76 | 
77 |   const int num_bbox = bbox.size(0);
78 |   if (num_bbox != out_grad.size(0))
79 |     AT_ERROR("Output shape and bbox number wont match: (%d vs %d).",
80 |              out_grad.size(0), num_bbox);
81 | 
82 |   DeformablePSROIPoolBackwardAcc(
83 |       out_grad, input, bbox, trans, top_count, input_grad, trans_grad, batch,
84 |       channels, height, width, num_bbox, channels_trans, no_trans,
85 |       spatial_scale, output_dim, group_size, pooled_size, part_size,
86 |       sample_per_part, trans_std);
87 | }
88 | 


--------------------------------------------------------------------------------
/dynamic_rcnn/kernels/cuda/nms.cu:
--------------------------------------------------------------------------------
  1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
  2 | #include <ATen/ATen.h>
  3 | #include <ATen/cuda/CUDAContext.h>
  4 | 
  5 | #include <THC/THC.h>
  6 | #include <THC/THCDeviceUtils.cuh>
  7 | 
  8 | #include <vector>
  9 | #include <iostream>
 10 | 
 11 | int const threadsPerBlock = sizeof(unsigned long long) * 8;
 12 | 
 13 | __device__ inline float devIoU(float const * const a, float const * const b) {
 14 |   float left = max(a[0], b[0]), right = min(a[2], b[2]);
 15 |   float top = max(a[1], b[1]), bottom = min(a[3], b[3]);
 16 |   float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f);
 17 |   float interS = width * height;
 18 |   float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1);
 19 |   float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1);
 20 |   return interS / (Sa + Sb - interS);
 21 | }
 22 | 
 23 | __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh,
 24 |                            const float *dev_boxes, unsigned long long *dev_mask) {
 25 |   const int row_start = blockIdx.y;
 26 |   const int col_start = blockIdx.x;
 27 | 
 28 |   // if (row_start > col_start) return;
 29 | 
 30 |   const int row_size =
 31 |         min(n_boxes - row_start * threadsPerBlock, threadsPerBlock);
 32 |   const int col_size =
 33 |         min(n_boxes - col_start * threadsPerBlock, threadsPerBlock);
 34 | 
 35 |   __shared__ float block_boxes[threadsPerBlock * 5];
 36 |   if (threadIdx.x < col_size) {
 37 |     block_boxes[threadIdx.x * 5 + 0] =
 38 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0];
 39 |     block_boxes[threadIdx.x * 5 + 1] =
 40 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1];
 41 |     block_boxes[threadIdx.x * 5 + 2] =
 42 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2];
 43 |     block_boxes[threadIdx.x * 5 + 3] =
 44 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3];
 45 |     block_boxes[threadIdx.x * 5 + 4] =
 46 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4];
 47 |   }
 48 |   __syncthreads();
 49 | 
 50 |   if (threadIdx.x < row_size) {
 51 |     const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x;
 52 |     const float *cur_box = dev_boxes + cur_box_idx * 5;
 53 |     int i = 0;
 54 |     unsigned long long t = 0;
 55 |     int start = 0;
 56 |     if (row_start == col_start) {
 57 |       start = threadIdx.x + 1;
 58 |     }
 59 |     for (i = start; i < col_size; i++) {
 60 |       if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) {
 61 |         t |= 1ULL << i;
 62 |       }
 63 |     }
 64 |     const int col_blocks = THCCeilDiv(n_boxes, threadsPerBlock);
 65 |     dev_mask[cur_box_idx * col_blocks + col_start] = t;
 66 |   }
 67 | }
 68 | 
 69 | // boxes is a N x 5 tensor
 70 | at::Tensor nms_cuda(const at::Tensor boxes, float nms_overlap_thresh) {
 71 |   using scalar_t = float;
 72 |   AT_ASSERTM(boxes.type().is_cuda(), "boxes must be a CUDA tensor");
 73 |   auto scores = boxes.select(1, 4);
 74 |   auto order_t = std::get<1>(scores.sort(0, /* descending=*/true));
 75 |   auto boxes_sorted = boxes.index_select(0, order_t);
 76 | 
 77 |   int boxes_num = boxes.size(0);
 78 | 
 79 |   const int col_blocks = THCCeilDiv(boxes_num, threadsPerBlock);
 80 | 
 81 |   scalar_t* boxes_dev = boxes_sorted.data<scalar_t>();
 82 | 
 83 |   THCState *state = at::globalContext().lazyInitCUDA(); // TODO replace with getTHCState
 84 | 
 85 |   unsigned long long* mask_dev = NULL;
 86 |   //THCudaCheck(THCudaMalloc(state, (void**) &mask_dev,
 87 |   //                      boxes_num * col_blocks * sizeof(unsigned long long)));
 88 | 
 89 |   mask_dev = (unsigned long long*) THCudaMalloc(state, boxes_num * col_blocks * sizeof(unsigned long long));
 90 | 
 91 |   dim3 blocks(THCCeilDiv(boxes_num, threadsPerBlock),
 92 |               THCCeilDiv(boxes_num, threadsPerBlock));
 93 |   dim3 threads(threadsPerBlock);
 94 |   nms_kernel<<<blocks, threads>>>(boxes_num,
 95 |                                   nms_overlap_thresh,
 96 |                                   boxes_dev,
 97 |                                   mask_dev);
 98 | 
 99 |   std::vector<unsigned long long> mask_host(boxes_num * col_blocks);
100 |   THCudaCheck(cudaMemcpy(&mask_host[0],
101 |                         mask_dev,
102 |                         sizeof(unsigned long long) * boxes_num * col_blocks,
103 |                         cudaMemcpyDeviceToHost));
104 | 
105 |   std::vector<unsigned long long> remv(col_blocks);
106 |   memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks);
107 | 
108 |   at::Tensor keep = at::empty({boxes_num}, boxes.options().dtype(at::kLong).device(at::kCPU));
109 |   int64_t* keep_out = keep.data<int64_t>();
110 | 
111 |   int num_to_keep = 0;
112 |   for (int i = 0; i < boxes_num; i++) {
113 |     int nblock = i / threadsPerBlock;
114 |     int inblock = i % threadsPerBlock;
115 | 
116 |     if (!(remv[nblock] & (1ULL << inblock))) {
117 |       keep_out[num_to_keep++] = i;
118 |       unsigned long long *p = &mask_host[0] + i * col_blocks;
119 |       for (int j = nblock; j < col_blocks; j++) {
120 |         remv[j] |= p[j];
121 |       }
122 |     }
123 |   }
124 | 
125 |   THCudaFree(state, mask_dev);
126 |   // TODO improve this part
127 |   return std::get<0>(order_t.index({
128 |                        keep.narrow(/*dim=*/0, /*start=*/0, /*length=*/num_to_keep).to(
129 |                          order_t.device(), keep.scalar_type())
130 |                      }).sort(0, false));
131 | }
132 | 


--------------------------------------------------------------------------------
/dynamic_rcnn/kernels/cuda/vision.h:
--------------------------------------------------------------------------------
  1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
  2 | #pragma once
  3 | #include <torch/extension.h>
  4 | 
  5 | 
  6 | at::Tensor SigmoidFocalLoss_forward_cuda(
  7 | 		const at::Tensor& logits,
  8 |                 const at::Tensor& targets,
  9 | 		const int num_classes, 
 10 | 		const float gamma, 
 11 | 		const float alpha); 
 12 | 
 13 | at::Tensor SigmoidFocalLoss_backward_cuda(
 14 | 			     const at::Tensor& logits,
 15 |                              const at::Tensor& targets,
 16 | 			     const at::Tensor& d_losses,
 17 | 			     const int num_classes,
 18 | 			     const float gamma,
 19 | 			     const float alpha);
 20 | 
 21 | at::Tensor ROIAlign_forward_cuda(const at::Tensor& input,
 22 |                                  const at::Tensor& rois,
 23 |                                  const float spatial_scale,
 24 |                                  const int pooled_height,
 25 |                                  const int pooled_width,
 26 |                                  const int sampling_ratio);
 27 | 
 28 | at::Tensor ROIAlign_backward_cuda(const at::Tensor& grad,
 29 |                                   const at::Tensor& rois,
 30 |                                   const float spatial_scale,
 31 |                                   const int pooled_height,
 32 |                                   const int pooled_width,
 33 |                                   const int batch_size,
 34 |                                   const int channels,
 35 |                                   const int height,
 36 |                                   const int width,
 37 |                                   const int sampling_ratio);
 38 | 
 39 | 
 40 | std::tuple<at::Tensor, at::Tensor> ROIPool_forward_cuda(const at::Tensor& input,
 41 |                                 const at::Tensor& rois,
 42 |                                 const float spatial_scale,
 43 |                                 const int pooled_height,
 44 |                                 const int pooled_width);
 45 | 
 46 | at::Tensor ROIPool_backward_cuda(const at::Tensor& grad,
 47 |                                  const at::Tensor& input,
 48 |                                  const at::Tensor& rois,
 49 |                                  const at::Tensor& argmax,
 50 |                                  const float spatial_scale,
 51 |                                  const int pooled_height,
 52 |                                  const int pooled_width,
 53 |                                  const int batch_size,
 54 |                                  const int channels,
 55 |                                  const int height,
 56 |                                  const int width);
 57 | 
 58 | at::Tensor nms_cuda(const at::Tensor boxes, float nms_overlap_thresh);
 59 | 
 60 | 
 61 | int deform_conv_forward_cuda(at::Tensor input, at::Tensor weight,
 62 |                              at::Tensor offset, at::Tensor output,
 63 |                              at::Tensor columns, at::Tensor ones, int kW,
 64 |                              int kH, int dW, int dH, int padW, int padH,
 65 |                              int dilationW, int dilationH, int group,
 66 |                              int deformable_group, int im2col_step);
 67 | 
 68 | int deform_conv_backward_input_cuda(at::Tensor input, at::Tensor offset,
 69 |                                     at::Tensor gradOutput, at::Tensor gradInput,
 70 |                                     at::Tensor gradOffset, at::Tensor weight,
 71 |                                     at::Tensor columns, int kW, int kH, int dW,
 72 |                                     int dH, int padW, int padH, int dilationW,
 73 |                                     int dilationH, int group,
 74 |                                     int deformable_group, int im2col_step);
 75 | 
 76 | int deform_conv_backward_parameters_cuda(
 77 |     at::Tensor input, at::Tensor offset, at::Tensor gradOutput,
 78 |     at::Tensor gradWeight,  // at::Tensor gradBias,
 79 |     at::Tensor columns, at::Tensor ones, int kW, int kH, int dW, int dH,
 80 |     int padW, int padH, int dilationW, int dilationH, int group,
 81 |     int deformable_group, float scale, int im2col_step);
 82 | 
 83 | void modulated_deform_conv_cuda_forward(
 84 |     at::Tensor input, at::Tensor weight, at::Tensor bias, at::Tensor ones,
 85 |     at::Tensor offset, at::Tensor mask, at::Tensor output, at::Tensor columns,
 86 |     int kernel_h, int kernel_w, const int stride_h, const int stride_w,
 87 |     const int pad_h, const int pad_w, const int dilation_h,
 88 |     const int dilation_w, const int group, const int deformable_group,
 89 |     const bool with_bias);
 90 | 
 91 | void modulated_deform_conv_cuda_backward(
 92 |     at::Tensor input, at::Tensor weight, at::Tensor bias, at::Tensor ones,
 93 |     at::Tensor offset, at::Tensor mask, at::Tensor columns,
 94 |     at::Tensor grad_input, at::Tensor grad_weight, at::Tensor grad_bias,
 95 |     at::Tensor grad_offset, at::Tensor grad_mask, at::Tensor grad_output,
 96 |     int kernel_h, int kernel_w, int stride_h, int stride_w, int pad_h,
 97 |     int pad_w, int dilation_h, int dilation_w, int group, int deformable_group,
 98 |     const bool with_bias);
 99 | 
100 | void deform_psroi_pooling_cuda_forward(
101 |     at::Tensor input, at::Tensor bbox, at::Tensor trans, at::Tensor out,
102 |     at::Tensor top_count, const int no_trans, const float spatial_scale,
103 |     const int output_dim, const int group_size, const int pooled_size,
104 |     const int part_size, const int sample_per_part, const float trans_std);
105 | 
106 | void deform_psroi_pooling_cuda_backward(
107 |     at::Tensor out_grad, at::Tensor input, at::Tensor bbox, at::Tensor trans,
108 |     at::Tensor top_count, at::Tensor input_grad, at::Tensor trans_grad,
109 |     const int no_trans, const float spatial_scale, const int output_dim,
110 |     const int group_size, const int pooled_size, const int part_size,
111 |     const int sample_per_part, const float trans_std);
112 | 
113 | 
114 | at::Tensor compute_flow_cuda(const at::Tensor& boxes,
115 |                              const int height,
116 |                              const int width);
117 | 


--------------------------------------------------------------------------------
/dynamic_rcnn/kernels/deform_conv.h:
--------------------------------------------------------------------------------
  1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
  2 | #pragma once
  3 | #include "cpu/vision.h"
  4 | 
  5 | #ifdef WITH_CUDA
  6 | #include "cuda/vision.h"
  7 | #endif
  8 | 
  9 | 
 10 | // Interface for Python
 11 | int deform_conv_forward(
 12 |     at::Tensor input, 
 13 |     at::Tensor weight,
 14 |     at::Tensor offset, 
 15 |     at::Tensor output,
 16 |     at::Tensor columns, 
 17 |     at::Tensor ones, 
 18 |     int kW,
 19 |     int kH, 
 20 |     int dW, 
 21 |     int dH, 
 22 |     int padW, 
 23 |     int padH,
 24 |     int dilationW, 
 25 |     int dilationH, 
 26 |     int group,
 27 |     int deformable_group, 
 28 |     int im2col_step)
 29 | {
 30 |   if (input.type().is_cuda()) {
 31 | #ifdef WITH_CUDA
 32 |     return deform_conv_forward_cuda(
 33 |         input, weight, offset, output, columns, ones,
 34 |         kW, kH, dW, dH, padW, padH, dilationW, dilationH,
 35 |         group, deformable_group, im2col_step
 36 |     );
 37 | #else
 38 |     AT_ERROR("Not compiled with GPU support");
 39 | #endif
 40 |   }
 41 |   AT_ERROR("Not implemented on the CPU");
 42 | }
 43 | 
 44 | 
 45 | int deform_conv_backward_input(
 46 |     at::Tensor input, 
 47 |     at::Tensor offset,
 48 |     at::Tensor gradOutput, 
 49 |     at::Tensor gradInput,
 50 |     at::Tensor gradOffset, 
 51 |     at::Tensor weight,
 52 |     at::Tensor columns, 
 53 |     int kW, 
 54 |     int kH, 
 55 |     int dW,
 56 |     int dH, 
 57 |     int padW, 
 58 |     int padH, 
 59 |     int dilationW,
 60 |     int dilationH, 
 61 |     int group,
 62 |     int deformable_group, 
 63 |     int im2col_step)
 64 | {
 65 |   if (input.type().is_cuda()) {
 66 | #ifdef WITH_CUDA
 67 |     return deform_conv_backward_input_cuda(
 68 |         input, offset, gradOutput, gradInput, gradOffset, weight, columns,
 69 |         kW, kH, dW, dH, padW, padH, dilationW, dilationH, 
 70 |         group, deformable_group, im2col_step
 71 |     );
 72 | #else
 73 |     AT_ERROR("Not compiled with GPU support");
 74 | #endif
 75 |   }
 76 |   AT_ERROR("Not implemented on the CPU");
 77 | }
 78 | 
 79 | 
 80 | int deform_conv_backward_parameters(
 81 |     at::Tensor input, 
 82 |     at::Tensor offset, 
 83 |     at::Tensor gradOutput,
 84 |     at::Tensor gradWeight,  // at::Tensor gradBias,
 85 |     at::Tensor columns, 
 86 |     at::Tensor ones, 
 87 |     int kW, 
 88 |     int kH, 
 89 |     int dW, 
 90 |     int dH,
 91 |     int padW, 
 92 |     int padH, 
 93 |     int dilationW, 
 94 |     int dilationH, 
 95 |     int group,
 96 |     int deformable_group, 
 97 |     float scale, 
 98 |     int im2col_step)
 99 | {
100 |   if (input.type().is_cuda()) {
101 | #ifdef WITH_CUDA
102 |     return deform_conv_backward_parameters_cuda(
103 |         input, offset, gradOutput, gradWeight, columns, ones,
104 |         kW, kH, dW, dH, padW, padH, dilationW, dilationH,
105 |         group, deformable_group, scale, im2col_step
106 |     );
107 | #else
108 |     AT_ERROR("Not compiled with GPU support");
109 | #endif
110 |   }
111 |   AT_ERROR("Not implemented on the CPU");
112 | }
113 | 
114 | 
115 | void modulated_deform_conv_forward(
116 |     at::Tensor input, 
117 |     at::Tensor weight, 
118 |     at::Tensor bias, 
119 |     at::Tensor ones,
120 |     at::Tensor offset, 
121 |     at::Tensor mask, 
122 |     at::Tensor output, 
123 |     at::Tensor columns,
124 |     int kernel_h, 
125 |     int kernel_w, 
126 |     const int stride_h, 
127 |     const int stride_w,
128 |     const int pad_h, 
129 |     const int pad_w, 
130 |     const int dilation_h,
131 |     const int dilation_w, 
132 |     const int group, 
133 |     const int deformable_group,
134 |     const bool with_bias)
135 | {
136 |   if (input.type().is_cuda()) {
137 | #ifdef WITH_CUDA
138 |     return modulated_deform_conv_cuda_forward(
139 |         input, weight, bias, ones, offset, mask, output, columns,
140 |         kernel_h, kernel_w, stride_h, stride_w, 
141 |         pad_h, pad_w, dilation_h, dilation_w,
142 |         group, deformable_group, with_bias
143 |     );
144 | #else
145 |     AT_ERROR("Not compiled with GPU support");
146 | #endif
147 |   }
148 |   AT_ERROR("Not implemented on the CPU");
149 | }
150 | 
151 | 
152 | void modulated_deform_conv_backward(
153 |     at::Tensor input, 
154 |     at::Tensor weight, 
155 |     at::Tensor bias, 
156 |     at::Tensor ones,
157 |     at::Tensor offset, 
158 |     at::Tensor mask, 
159 |     at::Tensor columns,
160 |     at::Tensor grad_input, 
161 |     at::Tensor grad_weight, 
162 |     at::Tensor grad_bias,
163 |     at::Tensor grad_offset, 
164 |     at::Tensor grad_mask, 
165 |     at::Tensor grad_output,
166 |     int kernel_h, 
167 |     int kernel_w, 
168 |     int stride_h, 
169 |     int stride_w, 
170 |     int pad_h,
171 |     int pad_w, 
172 |     int dilation_h, 
173 |     int dilation_w, 
174 |     int group, 
175 |     int deformable_group,
176 |     const bool with_bias)
177 | {
178 |   if (input.type().is_cuda()) {
179 | #ifdef WITH_CUDA
180 |     return modulated_deform_conv_cuda_backward(
181 |         input, weight, bias, ones, offset, mask, columns, 
182 |         grad_input, grad_weight, grad_bias, grad_offset, grad_mask, grad_output,
183 |         kernel_h, kernel_w, stride_h, stride_w, pad_h, pad_w, dilation_h, dilation_w,
184 |         group, deformable_group, with_bias
185 |     );
186 | #else
187 |     AT_ERROR("Not compiled with GPU support");
188 | #endif
189 |   }
190 |   AT_ERROR("Not implemented on the CPU");
191 | }


--------------------------------------------------------------------------------
/dynamic_rcnn/kernels/deform_pool.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #pragma once
 3 | #include "cpu/vision.h"
 4 | 
 5 | #ifdef WITH_CUDA
 6 | #include "cuda/vision.h"
 7 | #endif
 8 | 
 9 | 
10 | // Interface for Python
11 | void deform_psroi_pooling_forward(
12 |     at::Tensor input, 
13 |     at::Tensor bbox, 
14 |     at::Tensor trans, 
15 |     at::Tensor out,
16 |     at::Tensor top_count, 
17 |     const int no_trans, 
18 |     const float spatial_scale,
19 |     const int output_dim, 
20 |     const int group_size, 
21 |     const int pooled_size,
22 |     const int part_size, 
23 |     const int sample_per_part, 
24 |     const float trans_std)
25 | {
26 |   if (input.type().is_cuda()) {
27 | #ifdef WITH_CUDA
28 |     return deform_psroi_pooling_cuda_forward(
29 |         input, bbox, trans, out, top_count, 
30 |         no_trans, spatial_scale, output_dim, group_size,
31 |         pooled_size, part_size, sample_per_part, trans_std
32 |     );
33 | #else
34 |     AT_ERROR("Not compiled with GPU support");
35 | #endif
36 |   }
37 |   AT_ERROR("Not implemented on the CPU");
38 | }
39 | 
40 | 
41 | void deform_psroi_pooling_backward(
42 |     at::Tensor out_grad, 
43 |     at::Tensor input, 
44 |     at::Tensor bbox, 
45 |     at::Tensor trans,
46 |     at::Tensor top_count, 
47 |     at::Tensor input_grad, 
48 |     at::Tensor trans_grad,
49 |     const int no_trans, 
50 |     const float spatial_scale, 
51 |     const int output_dim,
52 |     const int group_size, 
53 |     const int pooled_size, 
54 |     const int part_size,
55 |     const int sample_per_part, 
56 |     const float trans_std) 
57 | {
58 |   if (input.type().is_cuda()) {
59 | #ifdef WITH_CUDA
60 |     return deform_psroi_pooling_cuda_backward(
61 |         out_grad, input, bbox, trans, top_count, input_grad, trans_grad,
62 |         no_trans, spatial_scale, output_dim, group_size, pooled_size, 
63 |         part_size, sample_per_part, trans_std
64 |     );
65 | #else
66 |     AT_ERROR("Not compiled with GPU support");
67 | #endif
68 |   }
69 |   AT_ERROR("Not implemented on the CPU");
70 | }
71 | 


--------------------------------------------------------------------------------
/dynamic_rcnn/kernels/nms.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #pragma once
 3 | #include "cpu/vision.h"
 4 | 
 5 | #ifdef WITH_CUDA
 6 | #include "cuda/vision.h"
 7 | #endif
 8 | 
 9 | 
10 | at::Tensor nms(const at::Tensor& dets,
11 |                const at::Tensor& scores,
12 |                const float threshold) {
13 | 
14 |   if (dets.type().is_cuda()) {
15 | #ifdef WITH_CUDA
16 |     // TODO raise error if not compiled with CUDA
17 |     if (dets.numel() == 0)
18 |       return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU));
19 |     auto b = at::cat({dets, scores.unsqueeze(1)}, 1);
20 |     return nms_cuda(b, threshold);
21 | #else
22 |     AT_ERROR("Not compiled with GPU support");
23 | #endif
24 |   }
25 | 
26 |   at::Tensor result = nms_cpu(dets, scores, threshold);
27 |   return result;
28 | }
29 | 


--------------------------------------------------------------------------------
/dynamic_rcnn/kernels/ops/dcn/__init__.py:
--------------------------------------------------------------------------------
 1 | #
 2 | # Copied From [mmdetection](https://github.com/open-mmlab/mmdetection/tree/master/mmdet/ops/dcn)
 3 | #
 4 | 
 5 | from .deform_conv_func import deform_conv, modulated_deform_conv
 6 | from .deform_conv_module import DeformConv, ModulatedDeformConv, \
 7 |     ModulatedDeformConvPack
 8 | from .deform_pool_func import deform_roi_pooling
 9 | from .deform_pool_module import DeformRoIPooling, DeformRoIPoolingPack, \
10 |     ModulatedDeformRoIPoolingPack
11 | 
12 | __all__ = [
13 |     'deform_conv',
14 |     'modulated_deform_conv',
15 |     'DeformConv',
16 |     'ModulatedDeformConv',
17 |     'ModulatedDeformConvPack',
18 |     'deform_roi_pooling',
19 |     'DeformRoIPooling',
20 |     'DeformRoIPoolingPack',
21 |     'ModulatedDeformRoIPoolingPack',
22 | ]
23 | 


--------------------------------------------------------------------------------
/dynamic_rcnn/kernels/ops/dcn/deform_conv_module.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | 
  3 | import torch
  4 | import torch.nn as nn
  5 | from torch.nn.modules.utils import _pair
  6 | 
  7 | from .deform_conv_func import deform_conv, modulated_deform_conv
  8 | 
  9 | 
 10 | class DeformConv(nn.Module):
 11 | 
 12 |     def __init__(
 13 |         self,
 14 |         in_channels,
 15 |         out_channels,
 16 |         kernel_size,
 17 |         stride=1,
 18 |         padding=0,
 19 |         dilation=1,
 20 |         groups=1,
 21 |         deformable_groups=1,
 22 |         bias=False
 23 |     ):
 24 |         assert not bias
 25 |         super(DeformConv, self).__init__()
 26 |         self.with_bias = bias
 27 | 
 28 |         assert in_channels % groups == 0, \
 29 |             'in_channels {} cannot be divisible by groups {}'.format(
 30 |                 in_channels, groups)
 31 |         assert out_channels % groups == 0, \
 32 |             'out_channels {} cannot be divisible by groups {}'.format(
 33 |                 out_channels, groups)
 34 |         self.in_channels = in_channels
 35 |         self.out_channels = out_channels
 36 |         self.kernel_size = _pair(kernel_size)
 37 |         self.stride = _pair(stride)
 38 |         self.padding = _pair(padding)
 39 |         self.dilation = _pair(dilation)
 40 |         self.groups = groups
 41 |         self.deformable_groups = deformable_groups
 42 | 
 43 |         self.weight = nn.Parameter(
 44 |             torch.Tensor(out_channels, in_channels // self.groups,
 45 |                          *self.kernel_size))
 46 | 
 47 |         self.reset_parameters()
 48 | 
 49 |     def reset_parameters(self):
 50 |         n = self.in_channels
 51 |         for k in self.kernel_size:
 52 |             n *= k
 53 |         stdv = 1. / math.sqrt(n)
 54 |         self.weight.data.uniform_(-stdv, stdv)
 55 | 
 56 |     def forward(self, input, offset):
 57 |         return deform_conv(input, offset, self.weight, self.stride,
 58 |                            self.padding, self.dilation, self.groups,
 59 |                            self.deformable_groups)
 60 | 
 61 |     def __repr__(self):
 62 |         return "".join([
 63 |             "{}(".format(self.__class__.__name__),
 64 |             "in_channels={}, ".format(self.in_channels),
 65 |             "out_channels={}, ".format(self.out_channels),
 66 |             "kernel_size={}, ".format(self.kernel_size),
 67 |             "stride={}, ".format(self.stride),
 68 |             "dilation={}, ".format(self.dilation),
 69 |             "padding={}, ".format(self.padding),
 70 |             "groups={}, ".format(self.groups),
 71 |             "deformable_groups={}, ".format(self.deformable_groups),
 72 |             "bias={})".format(self.with_bias),
 73 |         ])
 74 | 
 75 | 
 76 | class ModulatedDeformConv(nn.Module):
 77 | 
 78 |     def __init__(
 79 |         self,
 80 |         in_channels,
 81 |         out_channels,
 82 |         kernel_size,
 83 |         stride=1,
 84 |         padding=0,
 85 |         dilation=1,
 86 |         groups=1,
 87 |         deformable_groups=1,
 88 |         bias=True
 89 |     ):
 90 |         super(ModulatedDeformConv, self).__init__()
 91 |         self.in_channels = in_channels
 92 |         self.out_channels = out_channels
 93 |         self.kernel_size = _pair(kernel_size)
 94 |         self.stride = stride
 95 |         self.padding = padding
 96 |         self.dilation = dilation
 97 |         self.groups = groups
 98 |         self.deformable_groups = deformable_groups
 99 |         self.with_bias = bias
100 | 
101 |         self.weight = nn.Parameter(torch.Tensor(
102 |             out_channels, 
103 |             in_channels // groups,
104 |             *self.kernel_size
105 |         ))
106 |         if bias:
107 |             self.bias = nn.Parameter(torch.Tensor(out_channels))
108 |         else:
109 |             self.register_parameter('bias', None)
110 |         self.reset_parameters()
111 | 
112 |     def reset_parameters(self):
113 |         n = self.in_channels
114 |         for k in self.kernel_size:
115 |             n *= k
116 |         stdv = 1. / math.sqrt(n)
117 |         self.weight.data.uniform_(-stdv, stdv)
118 |         if self.bias is not None:
119 |             self.bias.data.zero_()
120 | 
121 |     def forward(self, input, offset, mask):
122 |         return modulated_deform_conv(
123 |             input, offset, mask, self.weight, self.bias, self.stride,
124 |             self.padding, self.dilation, self.groups, self.deformable_groups)
125 | 
126 |     def __repr__(self):
127 |         return "".join([
128 |             "{}(".format(self.__class__.__name__),
129 |             "in_channels={}, ".format(self.in_channels),
130 |             "out_channels={}, ".format(self.out_channels),
131 |             "kernel_size={}, ".format(self.kernel_size),
132 |             "stride={}, ".format(self.stride),
133 |             "dilation={}, ".format(self.dilation),
134 |             "padding={}, ".format(self.padding),
135 |             "groups={}, ".format(self.groups),
136 |             "deformable_groups={}, ".format(self.deformable_groups),
137 |             "bias={})".format(self.with_bias),
138 |         ])
139 | 
140 | class ModulatedDeformConvPack(ModulatedDeformConv):
141 | 
142 |     def __init__(self,
143 |                  in_channels,
144 |                  out_channels,
145 |                  kernel_size,
146 |                  stride=1,
147 |                  padding=0,
148 |                  dilation=1,
149 |                  groups=1,
150 |                  deformable_groups=1,
151 |                  bias=True):
152 |         super(ModulatedDeformConvPack, self).__init__(
153 |             in_channels, out_channels, kernel_size, stride, padding, dilation,
154 |             groups, deformable_groups, bias)
155 | 
156 |         self.conv_offset_mask = nn.Conv2d(
157 |             self.in_channels // self.groups,
158 |             self.deformable_groups * 3 * self.kernel_size[0] *
159 |             self.kernel_size[1],
160 |             kernel_size=self.kernel_size,
161 |             stride=_pair(self.stride),
162 |             padding=_pair(self.padding),
163 |             bias=True)
164 |         self.init_offset()
165 | 
166 |     def init_offset(self):
167 |         self.conv_offset_mask.weight.data.zero_()
168 |         self.conv_offset_mask.bias.data.zero_()
169 | 
170 |     def forward(self, input):
171 |         out = self.conv_offset_mask(input)
172 |         o1, o2, mask = torch.chunk(out, 3, dim=1)
173 |         offset = torch.cat((o1, o2), dim=1)
174 |         mask = torch.sigmoid(mask)
175 |         return modulated_deform_conv(
176 |             input, offset, mask, self.weight, self.bias, self.stride,
177 |             self.padding, self.dilation, self.groups, self.deformable_groups)
178 | 


--------------------------------------------------------------------------------
/dynamic_rcnn/kernels/ops/dcn/deform_pool_func.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.autograd import Function
 3 | from torch.autograd.function import once_differentiable
 4 | 
 5 | from dynamic_rcnn import _C
 6 | 
 7 | 
 8 | class DeformRoIPoolingFunction(Function):
 9 | 
10 |     @staticmethod
11 |     def forward(
12 |         ctx,
13 |         data,
14 |         rois,
15 |         offset,
16 |         spatial_scale,
17 |         out_size,
18 |         out_channels,
19 |         no_trans,
20 |         group_size=1,
21 |         part_size=None,
22 |         sample_per_part=4,
23 |         trans_std=.0
24 |     ):
25 |         ctx.spatial_scale = spatial_scale
26 |         ctx.out_size = out_size
27 |         ctx.out_channels = out_channels
28 |         ctx.no_trans = no_trans
29 |         ctx.group_size = group_size
30 |         ctx.part_size = out_size if part_size is None else part_size
31 |         ctx.sample_per_part = sample_per_part
32 |         ctx.trans_std = trans_std
33 | 
34 |         assert 0.0 <= ctx.trans_std <= 1.0
35 |         if not data.is_cuda:
36 |             raise NotImplementedError
37 | 
38 |         n = rois.shape[0]
39 |         output = data.new_empty(n, out_channels, out_size, out_size)
40 |         output_count = data.new_empty(n, out_channels, out_size, out_size)
41 |         _C.deform_psroi_pooling_forward(
42 |             data,
43 |             rois,
44 |             offset,
45 |             output,
46 |             output_count,
47 |             ctx.no_trans,
48 |             ctx.spatial_scale,
49 |             ctx.out_channels,
50 |             ctx.group_size,
51 |             ctx.out_size,
52 |             ctx.part_size,
53 |             ctx.sample_per_part,
54 |             ctx.trans_std
55 |         )
56 | 
57 |         if data.requires_grad or rois.requires_grad or offset.requires_grad:
58 |             ctx.save_for_backward(data, rois, offset)
59 |         ctx.output_count = output_count
60 | 
61 |         return output
62 | 
63 |     @staticmethod
64 |     @once_differentiable
65 |     def backward(ctx, grad_output):
66 |         if not grad_output.is_cuda:
67 |             raise NotImplementedError
68 | 
69 |         data, rois, offset = ctx.saved_tensors
70 |         output_count = ctx.output_count
71 |         grad_input = torch.zeros_like(data)
72 |         grad_rois = None
73 |         grad_offset = torch.zeros_like(offset)
74 | 
75 |         _C.deform_psroi_pooling_backward(
76 |             grad_output,
77 |             data,
78 |             rois,
79 |             offset,
80 |             output_count,
81 |             grad_input,
82 |             grad_offset,
83 |             ctx.no_trans,
84 |             ctx.spatial_scale,
85 |             ctx.out_channels,
86 |             ctx.group_size,
87 |             ctx.out_size,
88 |             ctx.part_size,
89 |             ctx.sample_per_part,
90 |             ctx.trans_std
91 |         )
92 |         return (grad_input, grad_rois, grad_offset, None, None, None, None, None, None, None, None)
93 | 
94 | 
95 | deform_roi_pooling = DeformRoIPoolingFunction.apply
96 | 


--------------------------------------------------------------------------------
/dynamic_rcnn/kernels/ops/dcn/deform_pool_module.py:
--------------------------------------------------------------------------------
  1 | from torch import nn
  2 | 
  3 | from .deform_pool_func import deform_roi_pooling
  4 | 
  5 | 
  6 | class DeformRoIPooling(nn.Module):
  7 | 
  8 |     def __init__(self,
  9 |                  spatial_scale,
 10 |                  out_size,
 11 |                  out_channels,
 12 |                  no_trans,
 13 |                  group_size=1,
 14 |                  part_size=None,
 15 |                  sample_per_part=4,
 16 |                  trans_std=.0):
 17 |         super(DeformRoIPooling, self).__init__()
 18 |         self.spatial_scale = spatial_scale
 19 |         self.out_size = out_size
 20 |         self.out_channels = out_channels
 21 |         self.no_trans = no_trans
 22 |         self.group_size = group_size
 23 |         self.part_size = out_size if part_size is None else part_size
 24 |         self.sample_per_part = sample_per_part
 25 |         self.trans_std = trans_std
 26 | 
 27 |     def forward(self, data, rois, offset):
 28 |         if self.no_trans:
 29 |             offset = data.new_empty(0)
 30 |         return deform_roi_pooling(
 31 |             data, rois, offset, self.spatial_scale, self.out_size,
 32 |             self.out_channels, self.no_trans, self.group_size, self.part_size,
 33 |             self.sample_per_part, self.trans_std)
 34 | 
 35 | 
 36 | class DeformRoIPoolingPack(DeformRoIPooling):
 37 | 
 38 |     def __init__(self,
 39 |                  spatial_scale,
 40 |                  out_size,
 41 |                  out_channels,
 42 |                  no_trans,
 43 |                  group_size=1,
 44 |                  part_size=None,
 45 |                  sample_per_part=4,
 46 |                  trans_std=.0,
 47 |                  deform_fc_channels=1024):
 48 |         super(DeformRoIPoolingPack,
 49 |               self).__init__(spatial_scale, out_size, out_channels, no_trans,
 50 |                              group_size, part_size, sample_per_part, trans_std)
 51 | 
 52 |         self.deform_fc_channels = deform_fc_channels
 53 | 
 54 |         if not no_trans:
 55 |             self.offset_fc = nn.Sequential(
 56 |                 nn.Linear(self.out_size * self.out_size * self.out_channels,
 57 |                           self.deform_fc_channels),
 58 |                 nn.ReLU(inplace=True),
 59 |                 nn.Linear(self.deform_fc_channels, self.deform_fc_channels),
 60 |                 nn.ReLU(inplace=True),
 61 |                 nn.Linear(self.deform_fc_channels,
 62 |                           self.out_size * self.out_size * 2))
 63 |             self.offset_fc[-1].weight.data.zero_()
 64 |             self.offset_fc[-1].bias.data.zero_()
 65 | 
 66 |     def forward(self, data, rois):
 67 |         assert data.size(1) == self.out_channels
 68 |         if self.no_trans:
 69 |             offset = data.new_empty(0)
 70 |             return deform_roi_pooling(
 71 |                 data, rois, offset, self.spatial_scale, self.out_size,
 72 |                 self.out_channels, self.no_trans, self.group_size,
 73 |                 self.part_size, self.sample_per_part, self.trans_std)
 74 |         else:
 75 |             n = rois.shape[0]
 76 |             offset = data.new_empty(0)
 77 |             x = deform_roi_pooling(data, rois, offset, self.spatial_scale,
 78 |                                    self.out_size, self.out_channels, True,
 79 |                                    self.group_size, self.part_size,
 80 |                                    self.sample_per_part, self.trans_std)
 81 |             offset = self.offset_fc(x.view(n, -1))
 82 |             offset = offset.view(n, 2, self.out_size, self.out_size)
 83 |             return deform_roi_pooling(
 84 |                 data, rois, offset, self.spatial_scale, self.out_size,
 85 |                 self.out_channels, self.no_trans, self.group_size,
 86 |                 self.part_size, self.sample_per_part, self.trans_std)
 87 | 
 88 | 
 89 | class ModulatedDeformRoIPoolingPack(DeformRoIPooling):
 90 | 
 91 |     def __init__(self,
 92 |                  spatial_scale,
 93 |                  out_size,
 94 |                  out_channels,
 95 |                  no_trans,
 96 |                  group_size=1,
 97 |                  part_size=None,
 98 |                  sample_per_part=4,
 99 |                  trans_std=.0,
100 |                  deform_fc_channels=1024):
101 |         super(ModulatedDeformRoIPoolingPack, self).__init__(
102 |             spatial_scale, out_size, out_channels, no_trans, group_size,
103 |             part_size, sample_per_part, trans_std)
104 | 
105 |         self.deform_fc_channels = deform_fc_channels
106 | 
107 |         if not no_trans:
108 |             self.offset_fc = nn.Sequential(
109 |                 nn.Linear(self.out_size * self.out_size * self.out_channels,
110 |                           self.deform_fc_channels),
111 |                 nn.ReLU(inplace=True),
112 |                 nn.Linear(self.deform_fc_channels, self.deform_fc_channels),
113 |                 nn.ReLU(inplace=True),
114 |                 nn.Linear(self.deform_fc_channels,
115 |                           self.out_size * self.out_size * 2))
116 |             self.offset_fc[-1].weight.data.zero_()
117 |             self.offset_fc[-1].bias.data.zero_()
118 |             self.mask_fc = nn.Sequential(
119 |                 nn.Linear(self.out_size * self.out_size * self.out_channels,
120 |                           self.deform_fc_channels),
121 |                 nn.ReLU(inplace=True),
122 |                 nn.Linear(self.deform_fc_channels,
123 |                           self.out_size * self.out_size * 1),
124 |                 nn.Sigmoid())
125 |             self.mask_fc[2].weight.data.zero_()
126 |             self.mask_fc[2].bias.data.zero_()
127 | 
128 |     def forward(self, data, rois):
129 |         assert data.size(1) == self.out_channels
130 |         if self.no_trans:
131 |             offset = data.new_empty(0)
132 |             return deform_roi_pooling(
133 |                 data, rois, offset, self.spatial_scale, self.out_size,
134 |                 self.out_channels, self.no_trans, self.group_size,
135 |                 self.part_size, self.sample_per_part, self.trans_std)
136 |         else:
137 |             n = rois.shape[0]
138 |             offset = data.new_empty(0)
139 |             x = deform_roi_pooling(data, rois, offset, self.spatial_scale,
140 |                                    self.out_size, self.out_channels, True,
141 |                                    self.group_size, self.part_size,
142 |                                    self.sample_per_part, self.trans_std)
143 |             offset = self.offset_fc(x.view(n, -1))
144 |             offset = offset.view(n, 2, self.out_size, self.out_size)
145 |             mask = self.mask_fc(x.view(n, -1))
146 |             mask = mask.view(n, 1, self.out_size, self.out_size)
147 |             return deform_roi_pooling(
148 |                 data, rois, offset, self.spatial_scale, self.out_size,
149 |                 self.out_channels, self.no_trans, self.group_size,
150 |                 self.part_size, self.sample_per_part, self.trans_std) * mask
151 | 


--------------------------------------------------------------------------------
/dynamic_rcnn/kernels/ops/nms.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | # from ._utils import _C
3 | from dynamic_rcnn import _C
4 | 
5 | nms = _C.nms
6 | 
7 | # nms.__doc__ = """
8 | # This function performs Non-maximum suppresion"""
9 | 


--------------------------------------------------------------------------------
/dynamic_rcnn/kernels/ops/roi_align.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import torch
 3 | from torch import nn
 4 | from torch.autograd import Function
 5 | from torch.autograd.function import once_differentiable
 6 | from torch.nn.modules.utils import _pair
 7 | 
 8 | from dynamic_rcnn import _C
 9 | 
10 | 
11 | class _ROIAlign(Function):
12 |     @staticmethod
13 |     def forward(ctx, input, roi, output_size, spatial_scale, sampling_ratio):
14 |         ctx.save_for_backward(roi)
15 |         ctx.output_size = _pair(output_size)
16 |         ctx.spatial_scale = spatial_scale
17 |         ctx.sampling_ratio = sampling_ratio
18 |         ctx.input_shape = input.size()
19 |         output = _C.roi_align_forward(
20 |             input, roi, spatial_scale, output_size[0], output_size[1], sampling_ratio
21 |         )
22 |         return output
23 | 
24 |     @staticmethod
25 |     @once_differentiable
26 |     def backward(ctx, grad_output):
27 |         rois, = ctx.saved_tensors
28 |         output_size = ctx.output_size
29 |         spatial_scale = ctx.spatial_scale
30 |         sampling_ratio = ctx.sampling_ratio
31 |         bs, ch, h, w = ctx.input_shape
32 |         grad_input = _C.roi_align_backward(
33 |             grad_output,
34 |             rois,
35 |             spatial_scale,
36 |             output_size[0],
37 |             output_size[1],
38 |             bs,
39 |             ch,
40 |             h,
41 |             w,
42 |             sampling_ratio,
43 |         )
44 |         return grad_input, None, None, None, None
45 | 
46 | 
47 | roi_align = _ROIAlign.apply
48 | 
49 | class ROIAlign(nn.Module):
50 |     def __init__(self, output_size, spatial_scale, sampling_ratio):
51 |         super(ROIAlign, self).__init__()
52 |         self.output_size = output_size
53 |         self.spatial_scale = spatial_scale
54 |         self.sampling_ratio = sampling_ratio
55 | 
56 |     def forward(self, input, rois):
57 |         return roi_align(
58 |             input, rois, self.output_size, self.spatial_scale, self.sampling_ratio
59 |         )
60 | 
61 |     def __repr__(self):
62 |         tmpstr = self.__class__.__name__ + "("
63 |         tmpstr += "output_size=" + str(self.output_size)
64 |         tmpstr += ", spatial_scale=" + str(self.spatial_scale)
65 |         tmpstr += ", sampling_ratio=" + str(self.sampling_ratio)
66 |         tmpstr += ")"
67 |         return tmpstr
68 | 


--------------------------------------------------------------------------------
/dynamic_rcnn/kernels/ops/roi_pool.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import torch
 3 | from torch import nn
 4 | from torch.autograd import Function
 5 | from torch.autograd.function import once_differentiable
 6 | from torch.nn.modules.utils import _pair
 7 | 
 8 | from dynamic_rcnn import _C
 9 | 
10 | 
11 | class _ROIPool(Function):
12 |     @staticmethod
13 |     def forward(ctx, input, roi, output_size, spatial_scale):
14 |         ctx.output_size = _pair(output_size)
15 |         ctx.spatial_scale = spatial_scale
16 |         ctx.input_shape = input.size()
17 |         output, argmax = _C.roi_pool_forward(
18 |             input, roi, spatial_scale, output_size[0], output_size[1]
19 |         )
20 |         ctx.save_for_backward(input, roi, argmax)
21 |         return output
22 | 
23 |     @staticmethod
24 |     @once_differentiable
25 |     def backward(ctx, grad_output):
26 |         input, rois, argmax = ctx.saved_tensors
27 |         output_size = ctx.output_size
28 |         spatial_scale = ctx.spatial_scale
29 |         bs, ch, h, w = ctx.input_shape
30 |         grad_input = _C.roi_pool_backward(
31 |             grad_output,
32 |             input,
33 |             rois,
34 |             argmax,
35 |             spatial_scale,
36 |             output_size[0],
37 |             output_size[1],
38 |             bs,
39 |             ch,
40 |             h,
41 |             w,
42 |         )
43 |         return grad_input, None, None, None
44 | 
45 | 
46 | roi_pool = _ROIPool.apply
47 | 
48 | 
49 | class ROIPool(nn.Module):
50 |     def __init__(self, output_size, spatial_scale):
51 |         super(ROIPool, self).__init__()
52 |         self.output_size = output_size
53 |         self.spatial_scale = spatial_scale
54 | 
55 |     def forward(self, input, rois):
56 |         return roi_pool(input, rois, self.output_size, self.spatial_scale)
57 | 
58 |     def __repr__(self):
59 |         tmpstr = self.__class__.__name__ + "("
60 |         tmpstr += "output_size=" + str(self.output_size)
61 |         tmpstr += ", spatial_scale=" + str(self.spatial_scale)
62 |         tmpstr += ")"
63 |         return tmpstr
64 | 


--------------------------------------------------------------------------------
/dynamic_rcnn/kernels/vision.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #include "nms.h"
 3 | #include "ROIAlign.h"
 4 | #include "ROIPool.h"
 5 | #include "SigmoidFocalLoss.h"
 6 | #include "deform_conv.h"
 7 | #include "deform_pool.h"
 8 | 
 9 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
10 |   m.def("nms", &nms, "non-maximum suppression");
11 |   m.def("roi_align_forward", &ROIAlign_forward, "ROIAlign_forward");
12 |   m.def("roi_align_backward", &ROIAlign_backward, "ROIAlign_backward");
13 |   m.def("roi_pool_forward", &ROIPool_forward, "ROIPool_forward");
14 |   m.def("roi_pool_backward", &ROIPool_backward, "ROIPool_backward");
15 |   m.def("sigmoid_focalloss_forward", &SigmoidFocalLoss_forward, "SigmoidFocalLoss_forward");
16 |   m.def("sigmoid_focalloss_backward", &SigmoidFocalLoss_backward, "SigmoidFocalLoss_backward");
17 |   // dcn-v2
18 |   m.def("deform_conv_forward", &deform_conv_forward, "deform_conv_forward");
19 |   m.def("deform_conv_backward_input", &deform_conv_backward_input, "deform_conv_backward_input");
20 |   m.def("deform_conv_backward_parameters", &deform_conv_backward_parameters, "deform_conv_backward_parameters");
21 |   m.def("modulated_deform_conv_forward", &modulated_deform_conv_forward, "modulated_deform_conv_forward");
22 |   m.def("modulated_deform_conv_backward", &modulated_deform_conv_backward, "modulated_deform_conv_backward");
23 |   m.def("deform_psroi_pooling_forward", &deform_psroi_pooling_forward, "deform_psroi_pooling_forward");
24 |   m.def("deform_psroi_pooling_backward", &deform_psroi_pooling_backward, "deform_psroi_pooling_backward");
25 | }


--------------------------------------------------------------------------------
/dynamic_rcnn/utils/logger.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import logging
 3 | import os
 4 | import sys
 5 | 
 6 | 
 7 | def setup_logger(name, save_dir, distributed_rank, filename="log.txt"):
 8 |     logger = logging.getLogger(name)
 9 |     logger.setLevel(logging.DEBUG)
10 |     # don't log results for the non-master process
11 |     if distributed_rank > 0:
12 |         return logger
13 |     ch = logging.StreamHandler(stream=sys.stdout)
14 |     ch.setLevel(logging.DEBUG)
15 |     formatter = logging.Formatter(
16 |         "%(asctime)s %(name)s %(levelname)s: %(message)s")
17 |     ch.setFormatter(formatter)
18 |     logger.addHandler(ch)
19 | 
20 |     if save_dir:
21 |         fh = logging.FileHandler(os.path.join(save_dir, filename))
22 |         fh.setLevel(logging.DEBUG)
23 |         fh.setFormatter(formatter)
24 |         logger.addHandler(fh)
25 | 
26 |     return logger
27 | 


--------------------------------------------------------------------------------
/dynamic_rcnn/utils/metric_logger.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | from collections import defaultdict
 3 | from collections import deque
 4 | 
 5 | import torch
 6 | 
 7 | 
 8 | class SmoothedValue(object):
 9 |     """Track a series of values and provide access to smoothed values over a
10 |     window or the global series average.
11 |     """
12 | 
13 |     def __init__(self, window_size=20):
14 |         self.deque = deque(maxlen=window_size)
15 |         self.series = []
16 |         self.total = 0.0
17 |         self.count = 0
18 | 
19 |     def update(self, value):
20 |         self.deque.append(value)
21 |         self.series.append(value)
22 |         self.count += 1
23 |         self.total += value
24 | 
25 |     @property
26 |     def median(self):
27 |         d = torch.tensor(list(self.deque))
28 |         return d.median().item()
29 | 
30 |     @property
31 |     def avg(self):
32 |         d = torch.tensor(list(self.deque))
33 |         return d.mean().item()
34 | 
35 |     @property
36 |     def global_avg(self):
37 |         return self.total / self.count
38 | 
39 | 
40 | class MetricLogger(object):
41 |     def __init__(self, delimiter="\t"):
42 |         self.meters = defaultdict(SmoothedValue)
43 |         self.delimiter = delimiter
44 | 
45 |     def update(self, **kwargs):
46 |         for k, v in kwargs.items():
47 |             if isinstance(v, torch.Tensor):
48 |                 v = v.item()
49 |             assert isinstance(v, (float, int))
50 |             self.meters[k].update(v)
51 | 
52 |     def __getattr__(self, attr):
53 |         if attr in self.meters:
54 |             return self.meters[attr]
55 |         if attr in self.__dict__:
56 |             return self.__dict__[attr]
57 |         raise AttributeError("'{}' object has no attribute '{}'".format(
58 |                     type(self).__name__, attr))
59 | 
60 |     def __str__(self):
61 |         loss_str = []
62 |         for name, meter in self.meters.items():
63 |             loss_str.append(
64 |                 "{}: {:.4f} ({:.4f})".format(name, meter.median, meter.global_avg)
65 |             )
66 |         return self.delimiter.join(loss_str)
67 | 


--------------------------------------------------------------------------------
/dynamic_rcnn/utils/pyt_utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import errno
 3 | import os
 4 | import cv2
 5 | 
 6 | 
 7 | def mkdir(path):
 8 |     try:
 9 |         os.makedirs(path)
10 |     except OSError as e:
11 |         if e.errno != errno.EEXIST:
12 |             raise
13 | 
14 | 
15 | def link_file(src, target):
16 |     """symbol link the source directories to target."""
17 |     if os.path.isdir(target) or os.path.isfile(target):
18 |         os.remove(target)
19 |     os.system('ln -s {} {}'.format(src, target))
20 | 
21 | 
22 | def findContours(*args, **kwargs):
23 |     """
24 |     Wraps cv2.findContours to maintain compatiblity between versions
25 |     3 and 4
26 | 
27 |     Returns:
28 |         contours, hierarchy
29 |     """
30 |     if cv2.__version__.startswith('4'):
31 |         contours, hierarchy = cv2.findContours(*args, **kwargs)
32 |     elif cv2.__version__.startswith('3'):
33 |         _, contours, hierarchy = cv2.findContours(*args, **kwargs)
34 |     else:
35 |         raise AssertionError(
36 |             'cv2 must be either version 3 or 4 to call this method')
37 | 
38 |     return contours, hierarchy
39 | 
40 | 
41 | def draw_box(image, box, label, color=(0, 0, 255), score=None, linewidth=2):
42 |     """Draw a bounding box with label on the image."""
43 |     if score is not None:
44 |         text = "{}: {:.4f}".format(label, score)
45 |     else:
46 |         text = str(label)
47 | 
48 |     cv2.rectangle(image, (int(box[0]), int(box[1])),
49 |                   (int(box[2]), int(box[3])), color, linewidth)
50 |     cx = box[0] + (box[2] - box[0]) / 2 - 5
51 |     cy = box[1] + 12
52 |     cv2.putText(image, text, (int(cx), int(cy)),
53 |                 cv2.FONT_HERSHEY_DUPLEX, 0.5, (255, 255, 255))


--------------------------------------------------------------------------------
/dynamic_rcnn/utils/registry.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | 
 3 | 
 4 | def _register_generic(module_dict, module_name, module):
 5 |     assert module_name not in module_dict
 6 |     module_dict[module_name] = module
 7 | 
 8 | 
 9 | class Registry(dict):
10 |     '''
11 |     A helper class for managing registering modules, it extends a dictionary
12 |     and provides a register functions.
13 | 
14 |     Eg. creeting a registry:
15 |         some_registry = Registry({"default": default_module})
16 | 
17 |     There're two ways of registering new modules:
18 |     1): normal way is just calling register function:
19 |         def foo():
20 |             ...
21 |         some_registry.register("foo_module", foo)
22 |     2): used as decorator when declaring the module:
23 |         @some_registry.register("foo_module")
24 |         @some_registry.register("foo_modeul_nickname")
25 |         def foo():
26 |             ...
27 | 
28 |     Access of module is just like using a dictionary, eg:
29 |         f = some_registry["foo_modeul"]
30 |     '''
31 |     def __init__(self, *args, **kwargs):
32 |         super(Registry, self).__init__(*args, **kwargs)
33 | 
34 |     def register(self, module_name, module=None):
35 |         # used as function call
36 |         if module is not None:
37 |             _register_generic(self, module_name, module)
38 |             return
39 | 
40 |         # used as decorator
41 |         def register_fn(fn):
42 |             _register_generic(self, module_name, fn)
43 |             return fn
44 | 
45 |         return register_fn
46 | 


--------------------------------------------------------------------------------
/dynamic_rcnn/utils/torch_utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | """
 3 | Miscellaneous utility functions
 4 | """
 5 | 
 6 | import torch
 7 | 
 8 | 
 9 | def cat(tensors, dim=0):
10 |     """
11 |     Efficient version of torch.cat that avoids a copy if there is only a single element in a list
12 |     """
13 |     assert isinstance(tensors, (list, tuple))
14 |     if len(tensors) == 1:
15 |         return tensors[0]
16 |     return torch.cat(tensors, dim)
17 | 
18 | 
19 | def permute_and_flatten(layer, N, A, C, H, W):
20 |     layer = layer.view(N, -1, C, H, W)
21 |     layer = layer.permute(0, 3, 4, 1, 2)
22 |     layer = layer.reshape(N, -1, C)
23 |     return layer
24 | 
25 | 
26 | def concat_box_prediction_layers(box_cls, box_regression):
27 |     box_cls_flattened = []
28 |     box_regression_flattened = []
29 |     # for each feature level, permute the outputs to make them be in the
30 |     # same format as the labels. Note that the labels are computed for
31 |     # all feature levels concatenated, so we keep the same representation
32 |     # for the objectness and the box_regression
33 |     for box_cls_per_level, box_regression_per_level in zip(
34 |         box_cls, box_regression
35 |     ):
36 |         N, AxC, H, W = box_cls_per_level.shape
37 |         Ax4 = box_regression_per_level.shape[1]
38 |         A = Ax4 // 4
39 |         C = AxC // A
40 |         box_cls_per_level = permute_and_flatten(
41 |             box_cls_per_level, N, A, C, H, W
42 |         )
43 |         box_cls_flattened.append(box_cls_per_level)
44 | 
45 |         box_regression_per_level = permute_and_flatten(
46 |             box_regression_per_level, N, A, 4, H, W
47 |         )
48 |         box_regression_flattened.append(box_regression_per_level)
49 |     # concatenate on the first dimension (representing the feature levels), to
50 |     # take into account the way the labels were generated (with all feature maps
51 |     # being concatenated as well)
52 |     box_cls = cat(box_cls_flattened, dim=1).reshape(-1, C)
53 |     box_regression = cat(box_regression_flattened, dim=1).reshape(-1, 4)
54 |     return box_cls, box_regression
55 | 


--------------------------------------------------------------------------------
/models/zhanghongkai/dynamic_rcnn/coco/dynamic_rcnn_r101_dcnv2_fpn_mstrain_3x/dataset.py:
--------------------------------------------------------------------------------
 1 | from config import config as cfg
 2 | 
 3 | import torch.utils.data
 4 | from dynamic_rcnn.datasets.coco import COCODataset
 5 | from dynamic_rcnn.datasets.concat_dataset import ConcatDataset
 6 | from dynamic_rcnn.datasets.transforms import build_transforms
 7 | from dynamic_rcnn.datasets import samplers
 8 | from dynamic_rcnn.datasets.collate_batch import BatchCollator, BBoxAugCollator
 9 | 
10 | 
11 | def make_data_loader(
12 |         num_gpus, is_train=True, is_distributed=False, start_iter=0,
13 |         return_raw=False):
14 |     # If bbox aug is enabled in testing, simply set transforms to None and we will apply transforms later
15 |     transforms = None if not is_train and cfg.TEST.BBOX_AUG.ENABLED else \
16 |         build_transforms(cfg, is_train)
17 |     images_per_gpu = cfg.SOLVER.IMS_PER_GPU if is_train else cfg.TEST.IMS_PER_GPU
18 |     images_per_batch = images_per_gpu * num_gpus
19 | 
20 |     if is_train:
21 |         shuffle = True
22 |         num_iters = cfg.SOLVER.MAX_ITER
23 |         # scale, only suppose images_per_batch < SOLVER.IMS_PER_BATCH
24 |         if images_per_batch < cfg.SOLVER.IMS_PER_BATCH:
25 |             assert cfg.SOLVER.IMS_PER_BATCH % images_per_batch == 0
26 |             num_iters *= (cfg.SOLVER.IMS_PER_BATCH // images_per_batch)
27 |     else:
28 |         shuffle = False if not is_distributed else True
29 |         num_iters = None
30 |         start_iter = 0
31 | 
32 |     # group images which have similar aspect ratio. In this case, we only
33 |     # group in two cases: those with width / height > 1, and the other way around,
34 |     # but the code supports more general grouping strategy
35 |     aspect_grouping = [1] if cfg.DATALOADER.ASPECT_RATIO_GROUPING else []
36 |     dataset_list = cfg.DATASETS.TRAIN if is_train else cfg.DATASETS.TEST
37 |     datasets = []
38 |     for d_key, d_val in dataset_list.items():
39 |         dataset = COCODataset(
40 |             d_val['ann_file'], d_val['img_dir'],
41 |             remove_images_without_annotations=is_train,
42 |             transforms=transforms, return_raw=return_raw)
43 |         datasets.append(dataset)
44 |     dataset = datasets[0] if len(datasets) == 1 else ConcatDataset(datasets)
45 | 
46 |     # make data sampler
47 |     if is_distributed:
48 |         sampler = samplers.DistributedSampler(dataset, shuffle=shuffle)
49 |     elif shuffle:
50 |         sampler = torch.utils.data.sampler.RandomSampler(dataset)
51 |     else:
52 |         sampler = torch.utils.data.sampler.SequentialSampler(dataset)
53 | 
54 |     # make batch data sampler
55 |     if aspect_grouping:
56 |         if not isinstance(aspect_grouping, (list, tuple)):
57 |             aspect_grouping = [aspect_grouping]
58 |         batch_sampler = samplers.GroupedBatchSampler(
59 |             sampler, dataset, aspect_grouping, images_per_gpu,
60 |             drop_uneven=False)
61 |     else:
62 |         batch_sampler = torch.utils.data.sampler.BatchSampler(
63 |             sampler, images_per_gpu, drop_last=False)
64 | 
65 |     if num_iters is not None:
66 |         batch_sampler = samplers.IterationBasedBatchSampler(
67 |             batch_sampler, num_iters, start_iter)
68 | 
69 |     collator = BBoxAugCollator() if not is_train and cfg.TEST.BBOX_AUG.ENABLED \
70 |         else BatchCollator(cfg.DATALOADER.SIZE_DIVISIBILITY,
71 |                            return_raw=return_raw)
72 |     data_loader = torch.utils.data.DataLoader(
73 |         dataset,
74 |         num_workers=cfg.DATALOADER.NUM_WORKERS,
75 |         batch_sampler=batch_sampler,
76 |         collate_fn=collator
77 |     )
78 |     if not is_train:
79 |         data_loader = [data_loader]
80 |     return data_loader
81 | 


--------------------------------------------------------------------------------
/models/zhanghongkai/dynamic_rcnn/coco/dynamic_rcnn_r101_fpn_1x/dataset.py:
--------------------------------------------------------------------------------
 1 | from config import config as cfg
 2 | 
 3 | import torch.utils.data
 4 | from dynamic_rcnn.datasets.coco import COCODataset
 5 | from dynamic_rcnn.datasets.concat_dataset import ConcatDataset
 6 | from dynamic_rcnn.datasets.transforms import build_transforms
 7 | from dynamic_rcnn.datasets import samplers
 8 | from dynamic_rcnn.datasets.collate_batch import BatchCollator, BBoxAugCollator
 9 | 
10 | 
11 | def make_data_loader(
12 |         num_gpus, is_train=True, is_distributed=False, start_iter=0,
13 |         return_raw=False):
14 |     # If bbox aug is enabled in testing, simply set transforms to None and we will apply transforms later
15 |     transforms = None if not is_train and cfg.TEST.BBOX_AUG.ENABLED else \
16 |         build_transforms(cfg, is_train)
17 |     images_per_gpu = cfg.SOLVER.IMS_PER_GPU if is_train else cfg.TEST.IMS_PER_GPU
18 |     images_per_batch = images_per_gpu * num_gpus
19 | 
20 |     if is_train:
21 |         shuffle = True
22 |         num_iters = cfg.SOLVER.MAX_ITER
23 |         # scale, only suppose images_per_batch < SOLVER.IMS_PER_BATCH
24 |         if images_per_batch < cfg.SOLVER.IMS_PER_BATCH:
25 |             assert cfg.SOLVER.IMS_PER_BATCH % images_per_batch == 0
26 |             num_iters *= (cfg.SOLVER.IMS_PER_BATCH // images_per_batch)
27 |     else:
28 |         shuffle = False if not is_distributed else True
29 |         num_iters = None
30 |         start_iter = 0
31 | 
32 |     # group images which have similar aspect ratio. In this case, we only
33 |     # group in two cases: those with width / height > 1, and the other way around,
34 |     # but the code supports more general grouping strategy
35 |     aspect_grouping = [1] if cfg.DATALOADER.ASPECT_RATIO_GROUPING else []
36 |     dataset_list = cfg.DATASETS.TRAIN if is_train else cfg.DATASETS.TEST
37 |     datasets = []
38 |     for d_key, d_val in dataset_list.items():
39 |         dataset = COCODataset(
40 |             d_val['ann_file'], d_val['img_dir'],
41 |             remove_images_without_annotations=is_train,
42 |             transforms=transforms, return_raw=return_raw)
43 |         datasets.append(dataset)
44 |     dataset = datasets[0] if len(datasets) == 1 else ConcatDataset(datasets)
45 | 
46 |     # make data sampler
47 |     if is_distributed:
48 |         sampler = samplers.DistributedSampler(dataset, shuffle=shuffle)
49 |     elif shuffle:
50 |         sampler = torch.utils.data.sampler.RandomSampler(dataset)
51 |     else:
52 |         sampler = torch.utils.data.sampler.SequentialSampler(dataset)
53 | 
54 |     # make batch data sampler
55 |     if aspect_grouping:
56 |         if not isinstance(aspect_grouping, (list, tuple)):
57 |             aspect_grouping = [aspect_grouping]
58 |         batch_sampler = samplers.GroupedBatchSampler(
59 |             sampler, dataset, aspect_grouping, images_per_gpu,
60 |             drop_uneven=False)
61 |     else:
62 |         batch_sampler = torch.utils.data.sampler.BatchSampler(
63 |             sampler, images_per_gpu, drop_last=False)
64 | 
65 |     if num_iters is not None:
66 |         batch_sampler = samplers.IterationBasedBatchSampler(
67 |             batch_sampler, num_iters, start_iter)
68 | 
69 |     collator = BBoxAugCollator() if not is_train and cfg.TEST.BBOX_AUG.ENABLED \
70 |         else BatchCollator(cfg.DATALOADER.SIZE_DIVISIBILITY,
71 |                            return_raw=return_raw)
72 |     data_loader = torch.utils.data.DataLoader(
73 |         dataset,
74 |         num_workers=cfg.DATALOADER.NUM_WORKERS,
75 |         batch_sampler=batch_sampler,
76 |         collate_fn=collator
77 |     )
78 |     if not is_train:
79 |         data_loader = [data_loader]
80 |     return data_loader
81 | 


--------------------------------------------------------------------------------
/models/zhanghongkai/dynamic_rcnn/coco/dynamic_rcnn_r101_fpn_2x/dataset.py:
--------------------------------------------------------------------------------
 1 | from config import config as cfg
 2 | 
 3 | import torch.utils.data
 4 | from dynamic_rcnn.datasets.coco import COCODataset
 5 | from dynamic_rcnn.datasets.concat_dataset import ConcatDataset
 6 | from dynamic_rcnn.datasets.transforms import build_transforms
 7 | from dynamic_rcnn.datasets import samplers
 8 | from dynamic_rcnn.datasets.collate_batch import BatchCollator, BBoxAugCollator
 9 | 
10 | 
11 | def make_data_loader(
12 |         num_gpus, is_train=True, is_distributed=False, start_iter=0,
13 |         return_raw=False):
14 |     # If bbox aug is enabled in testing, simply set transforms to None and we will apply transforms later
15 |     transforms = None if not is_train and cfg.TEST.BBOX_AUG.ENABLED else \
16 |         build_transforms(cfg, is_train)
17 |     images_per_gpu = cfg.SOLVER.IMS_PER_GPU if is_train else cfg.TEST.IMS_PER_GPU
18 |     images_per_batch = images_per_gpu * num_gpus
19 | 
20 |     if is_train:
21 |         shuffle = True
22 |         num_iters = cfg.SOLVER.MAX_ITER
23 |         # scale, only suppose images_per_batch < SOLVER.IMS_PER_BATCH
24 |         if images_per_batch < cfg.SOLVER.IMS_PER_BATCH:
25 |             assert cfg.SOLVER.IMS_PER_BATCH % images_per_batch == 0
26 |             num_iters *= (cfg.SOLVER.IMS_PER_BATCH // images_per_batch)
27 |     else:
28 |         shuffle = False if not is_distributed else True
29 |         num_iters = None
30 |         start_iter = 0
31 | 
32 |     # group images which have similar aspect ratio. In this case, we only
33 |     # group in two cases: those with width / height > 1, and the other way around,
34 |     # but the code supports more general grouping strategy
35 |     aspect_grouping = [1] if cfg.DATALOADER.ASPECT_RATIO_GROUPING else []
36 |     dataset_list = cfg.DATASETS.TRAIN if is_train else cfg.DATASETS.TEST
37 |     datasets = []
38 |     for d_key, d_val in dataset_list.items():
39 |         dataset = COCODataset(
40 |             d_val['ann_file'], d_val['img_dir'],
41 |             remove_images_without_annotations=is_train,
42 |             transforms=transforms, return_raw=return_raw)
43 |         datasets.append(dataset)
44 |     dataset = datasets[0] if len(datasets) == 1 else ConcatDataset(datasets)
45 | 
46 |     # make data sampler
47 |     if is_distributed:
48 |         sampler = samplers.DistributedSampler(dataset, shuffle=shuffle)
49 |     elif shuffle:
50 |         sampler = torch.utils.data.sampler.RandomSampler(dataset)
51 |     else:
52 |         sampler = torch.utils.data.sampler.SequentialSampler(dataset)
53 | 
54 |     # make batch data sampler
55 |     if aspect_grouping:
56 |         if not isinstance(aspect_grouping, (list, tuple)):
57 |             aspect_grouping = [aspect_grouping]
58 |         batch_sampler = samplers.GroupedBatchSampler(
59 |             sampler, dataset, aspect_grouping, images_per_gpu,
60 |             drop_uneven=False)
61 |     else:
62 |         batch_sampler = torch.utils.data.sampler.BatchSampler(
63 |             sampler, images_per_gpu, drop_last=False)
64 | 
65 |     if num_iters is not None:
66 |         batch_sampler = samplers.IterationBasedBatchSampler(
67 |             batch_sampler, num_iters, start_iter)
68 | 
69 |     collator = BBoxAugCollator() if not is_train and cfg.TEST.BBOX_AUG.ENABLED \
70 |         else BatchCollator(cfg.DATALOADER.SIZE_DIVISIBILITY,
71 |                            return_raw=return_raw)
72 |     data_loader = torch.utils.data.DataLoader(
73 |         dataset,
74 |         num_workers=cfg.DATALOADER.NUM_WORKERS,
75 |         batch_sampler=batch_sampler,
76 |         collate_fn=collator
77 |     )
78 |     if not is_train:
79 |         data_loader = [data_loader]
80 |     return data_loader
81 | 


--------------------------------------------------------------------------------
/models/zhanghongkai/dynamic_rcnn/coco/dynamic_rcnn_r101_fpn_mstrain_3x/dataset.py:
--------------------------------------------------------------------------------
 1 | from config import config as cfg
 2 | 
 3 | import torch.utils.data
 4 | from dynamic_rcnn.datasets.coco import COCODataset
 5 | from dynamic_rcnn.datasets.concat_dataset import ConcatDataset
 6 | from dynamic_rcnn.datasets.transforms import build_transforms
 7 | from dynamic_rcnn.datasets import samplers
 8 | from dynamic_rcnn.datasets.collate_batch import BatchCollator, BBoxAugCollator
 9 | 
10 | 
11 | def make_data_loader(
12 |         num_gpus, is_train=True, is_distributed=False, start_iter=0,
13 |         return_raw=False):
14 |     # If bbox aug is enabled in testing, simply set transforms to None and we will apply transforms later
15 |     transforms = None if not is_train and cfg.TEST.BBOX_AUG.ENABLED else \
16 |         build_transforms(cfg, is_train)
17 |     images_per_gpu = cfg.SOLVER.IMS_PER_GPU if is_train else cfg.TEST.IMS_PER_GPU
18 |     images_per_batch = images_per_gpu * num_gpus
19 | 
20 |     if is_train:
21 |         shuffle = True
22 |         num_iters = cfg.SOLVER.MAX_ITER
23 |         # scale, only suppose images_per_batch < SOLVER.IMS_PER_BATCH
24 |         if images_per_batch < cfg.SOLVER.IMS_PER_BATCH:
25 |             assert cfg.SOLVER.IMS_PER_BATCH % images_per_batch == 0
26 |             num_iters *= (cfg.SOLVER.IMS_PER_BATCH // images_per_batch)
27 |     else:
28 |         shuffle = False if not is_distributed else True
29 |         num_iters = None
30 |         start_iter = 0
31 | 
32 |     # group images which have similar aspect ratio. In this case, we only
33 |     # group in two cases: those with width / height > 1, and the other way around,
34 |     # but the code supports more general grouping strategy
35 |     aspect_grouping = [1] if cfg.DATALOADER.ASPECT_RATIO_GROUPING else []
36 |     dataset_list = cfg.DATASETS.TRAIN if is_train else cfg.DATASETS.TEST
37 |     datasets = []
38 |     for d_key, d_val in dataset_list.items():
39 |         dataset = COCODataset(
40 |             d_val['ann_file'], d_val['img_dir'],
41 |             remove_images_without_annotations=is_train,
42 |             transforms=transforms, return_raw=return_raw)
43 |         datasets.append(dataset)
44 |     dataset = datasets[0] if len(datasets) == 1 else ConcatDataset(datasets)
45 | 
46 |     # make data sampler
47 |     if is_distributed:
48 |         sampler = samplers.DistributedSampler(dataset, shuffle=shuffle)
49 |     elif shuffle:
50 |         sampler = torch.utils.data.sampler.RandomSampler(dataset)
51 |     else:
52 |         sampler = torch.utils.data.sampler.SequentialSampler(dataset)
53 | 
54 |     # make batch data sampler
55 |     if aspect_grouping:
56 |         if not isinstance(aspect_grouping, (list, tuple)):
57 |             aspect_grouping = [aspect_grouping]
58 |         batch_sampler = samplers.GroupedBatchSampler(
59 |             sampler, dataset, aspect_grouping, images_per_gpu,
60 |             drop_uneven=False)
61 |     else:
62 |         batch_sampler = torch.utils.data.sampler.BatchSampler(
63 |             sampler, images_per_gpu, drop_last=False)
64 | 
65 |     if num_iters is not None:
66 |         batch_sampler = samplers.IterationBasedBatchSampler(
67 |             batch_sampler, num_iters, start_iter)
68 | 
69 |     collator = BBoxAugCollator() if not is_train and cfg.TEST.BBOX_AUG.ENABLED \
70 |         else BatchCollator(cfg.DATALOADER.SIZE_DIVISIBILITY,
71 |                            return_raw=return_raw)
72 |     data_loader = torch.utils.data.DataLoader(
73 |         dataset,
74 |         num_workers=cfg.DATALOADER.NUM_WORKERS,
75 |         batch_sampler=batch_sampler,
76 |         collate_fn=collator
77 |     )
78 |     if not is_train:
79 |         data_loader = [data_loader]
80 |     return data_loader
81 | 


--------------------------------------------------------------------------------
/models/zhanghongkai/dynamic_rcnn/coco/dynamic_rcnn_r50_fpn_1x/dataset.py:
--------------------------------------------------------------------------------
 1 | from config import config as cfg
 2 | 
 3 | import torch.utils.data
 4 | from dynamic_rcnn.datasets.coco import COCODataset
 5 | from dynamic_rcnn.datasets.concat_dataset import ConcatDataset
 6 | from dynamic_rcnn.datasets.transforms import build_transforms
 7 | from dynamic_rcnn.datasets import samplers
 8 | from dynamic_rcnn.datasets.collate_batch import BatchCollator, BBoxAugCollator
 9 | 
10 | 
11 | def make_data_loader(
12 |         num_gpus, is_train=True, is_distributed=False, start_iter=0,
13 |         return_raw=False):
14 |     # If bbox aug is enabled in testing, simply set transforms to None and we will apply transforms later
15 |     transforms = None if not is_train and cfg.TEST.BBOX_AUG.ENABLED else \
16 |         build_transforms(cfg, is_train)
17 |     images_per_gpu = cfg.SOLVER.IMS_PER_GPU if is_train else cfg.TEST.IMS_PER_GPU
18 |     images_per_batch = images_per_gpu * num_gpus
19 | 
20 |     if is_train:
21 |         shuffle = True
22 |         num_iters = cfg.SOLVER.MAX_ITER
23 |         # scale, only suppose images_per_batch < SOLVER.IMS_PER_BATCH
24 |         if images_per_batch < cfg.SOLVER.IMS_PER_BATCH:
25 |             assert cfg.SOLVER.IMS_PER_BATCH % images_per_batch == 0
26 |             num_iters *= (cfg.SOLVER.IMS_PER_BATCH // images_per_batch)
27 |     else:
28 |         shuffle = False if not is_distributed else True
29 |         num_iters = None
30 |         start_iter = 0
31 | 
32 |     # group images which have similar aspect ratio. In this case, we only
33 |     # group in two cases: those with width / height > 1, and the other way around,
34 |     # but the code supports more general grouping strategy
35 |     aspect_grouping = [1] if cfg.DATALOADER.ASPECT_RATIO_GROUPING else []
36 |     dataset_list = cfg.DATASETS.TRAIN if is_train else cfg.DATASETS.TEST
37 |     datasets = []
38 |     for d_key, d_val in dataset_list.items():
39 |         dataset = COCODataset(
40 |             d_val['ann_file'], d_val['img_dir'],
41 |             remove_images_without_annotations=is_train,
42 |             transforms=transforms, return_raw=return_raw)
43 |         datasets.append(dataset)
44 |     dataset = datasets[0] if len(datasets) == 1 else ConcatDataset(datasets)
45 | 
46 |     # make data sampler
47 |     if is_distributed:
48 |         sampler = samplers.DistributedSampler(dataset, shuffle=shuffle)
49 |     elif shuffle:
50 |         sampler = torch.utils.data.sampler.RandomSampler(dataset)
51 |     else:
52 |         sampler = torch.utils.data.sampler.SequentialSampler(dataset)
53 | 
54 |     # make batch data sampler
55 |     if aspect_grouping:
56 |         if not isinstance(aspect_grouping, (list, tuple)):
57 |             aspect_grouping = [aspect_grouping]
58 |         batch_sampler = samplers.GroupedBatchSampler(
59 |             sampler, dataset, aspect_grouping, images_per_gpu,
60 |             drop_uneven=False)
61 |     else:
62 |         batch_sampler = torch.utils.data.sampler.BatchSampler(
63 |             sampler, images_per_gpu, drop_last=False)
64 | 
65 |     if num_iters is not None:
66 |         batch_sampler = samplers.IterationBasedBatchSampler(
67 |             batch_sampler, num_iters, start_iter)
68 | 
69 |     collator = BBoxAugCollator() if not is_train and cfg.TEST.BBOX_AUG.ENABLED \
70 |         else BatchCollator(cfg.DATALOADER.SIZE_DIVISIBILITY,
71 |                            return_raw=return_raw)
72 |     data_loader = torch.utils.data.DataLoader(
73 |         dataset,
74 |         num_workers=cfg.DATALOADER.NUM_WORKERS,
75 |         batch_sampler=batch_sampler,
76 |         collate_fn=collator
77 |     )
78 |     if not is_train:
79 |         data_loader = [data_loader]
80 |     return data_loader
81 | 


--------------------------------------------------------------------------------
/models/zhanghongkai/dynamic_rcnn/coco/dynamic_rcnn_r50_fpn_2x/dataset.py:
--------------------------------------------------------------------------------
 1 | from config import config as cfg
 2 | 
 3 | import torch.utils.data
 4 | from dynamic_rcnn.datasets.coco import COCODataset
 5 | from dynamic_rcnn.datasets.concat_dataset import ConcatDataset
 6 | from dynamic_rcnn.datasets.transforms import build_transforms
 7 | from dynamic_rcnn.datasets import samplers
 8 | from dynamic_rcnn.datasets.collate_batch import BatchCollator, BBoxAugCollator
 9 | 
10 | 
11 | def make_data_loader(
12 |         num_gpus, is_train=True, is_distributed=False, start_iter=0,
13 |         return_raw=False):
14 |     # If bbox aug is enabled in testing, simply set transforms to None and we will apply transforms later
15 |     transforms = None if not is_train and cfg.TEST.BBOX_AUG.ENABLED else \
16 |         build_transforms(cfg, is_train)
17 |     images_per_gpu = cfg.SOLVER.IMS_PER_GPU if is_train else cfg.TEST.IMS_PER_GPU
18 |     images_per_batch = images_per_gpu * num_gpus
19 | 
20 |     if is_train:
21 |         shuffle = True
22 |         num_iters = cfg.SOLVER.MAX_ITER
23 |         # scale, only suppose images_per_batch < SOLVER.IMS_PER_BATCH
24 |         if images_per_batch < cfg.SOLVER.IMS_PER_BATCH:
25 |             assert cfg.SOLVER.IMS_PER_BATCH % images_per_batch == 0
26 |             num_iters *= (cfg.SOLVER.IMS_PER_BATCH // images_per_batch)
27 |     else:
28 |         shuffle = False if not is_distributed else True
29 |         num_iters = None
30 |         start_iter = 0
31 | 
32 |     # group images which have similar aspect ratio. In this case, we only
33 |     # group in two cases: those with width / height > 1, and the other way around,
34 |     # but the code supports more general grouping strategy
35 |     aspect_grouping = [1] if cfg.DATALOADER.ASPECT_RATIO_GROUPING else []
36 |     dataset_list = cfg.DATASETS.TRAIN if is_train else cfg.DATASETS.TEST
37 |     datasets = []
38 |     for d_key, d_val in dataset_list.items():
39 |         dataset = COCODataset(
40 |             d_val['ann_file'], d_val['img_dir'],
41 |             remove_images_without_annotations=is_train,
42 |             transforms=transforms, return_raw=return_raw)
43 |         datasets.append(dataset)
44 |     dataset = datasets[0] if len(datasets) == 1 else ConcatDataset(datasets)
45 | 
46 |     # make data sampler
47 |     if is_distributed:
48 |         sampler = samplers.DistributedSampler(dataset, shuffle=shuffle)
49 |     elif shuffle:
50 |         sampler = torch.utils.data.sampler.RandomSampler(dataset)
51 |     else:
52 |         sampler = torch.utils.data.sampler.SequentialSampler(dataset)
53 | 
54 |     # make batch data sampler
55 |     if aspect_grouping:
56 |         if not isinstance(aspect_grouping, (list, tuple)):
57 |             aspect_grouping = [aspect_grouping]
58 |         batch_sampler = samplers.GroupedBatchSampler(
59 |             sampler, dataset, aspect_grouping, images_per_gpu,
60 |             drop_uneven=False)
61 |     else:
62 |         batch_sampler = torch.utils.data.sampler.BatchSampler(
63 |             sampler, images_per_gpu, drop_last=False)
64 | 
65 |     if num_iters is not None:
66 |         batch_sampler = samplers.IterationBasedBatchSampler(
67 |             batch_sampler, num_iters, start_iter)
68 | 
69 |     collator = BBoxAugCollator() if not is_train and cfg.TEST.BBOX_AUG.ENABLED \
70 |         else BatchCollator(cfg.DATALOADER.SIZE_DIVISIBILITY,
71 |                            return_raw=return_raw)
72 |     data_loader = torch.utils.data.DataLoader(
73 |         dataset,
74 |         num_workers=cfg.DATALOADER.NUM_WORKERS,
75 |         batch_sampler=batch_sampler,
76 |         collate_fn=collator
77 |     )
78 |     if not is_train:
79 |         data_loader = [data_loader]
80 |     return data_loader
81 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | # !/usr/bin/env python
 3 | 
 4 | import glob
 5 | import os
 6 | 
 7 | import torch
 8 | from setuptools import find_packages
 9 | from setuptools import setup
10 | from torch.utils.cpp_extension import CUDA_HOME
11 | from torch.utils.cpp_extension import CppExtension
12 | from torch.utils.cpp_extension import CUDAExtension
13 | 
14 | requirements = [
15 |     "torch==1.0.1.post2", "torchvision==0.2.2.post3", "cython", "matplotlib",
16 |     "tqdm", "easydict", "pycocotools", "opencv-python"]
17 | 
18 | 
19 | def get_extensions():
20 |     this_dir = os.path.dirname(os.path.abspath(__file__))
21 |     extensions_dir = os.path.join(this_dir, "dynamic_rcnn", "kernels")
22 | 
23 |     main_file = glob.glob(os.path.join(extensions_dir, "*.cpp"))
24 |     source_cpu = glob.glob(os.path.join(extensions_dir, "cpu", "*.cpp"))
25 |     source_cuda = glob.glob(os.path.join(extensions_dir, "cuda", "*.cu"))
26 | 
27 |     sources = main_file + source_cpu
28 |     extension = CppExtension
29 | 
30 |     extra_compile_args = {"cxx": []}
31 |     define_macros = []
32 | 
33 |     if (torch.cuda.is_available() and CUDA_HOME is not None) or os.getenv(
34 |             "FORCE_CUDA", "0") == "1":
35 |         extension = CUDAExtension
36 |         sources += source_cuda
37 |         define_macros += [("WITH_CUDA", None)]
38 |         extra_compile_args["nvcc"] = [
39 |             "-DCUDA_HAS_FP16=1",
40 |             "-D__CUDA_NO_HALF_OPERATORS__",
41 |             "-D__CUDA_NO_HALF_CONVERSIONS__",
42 |             "-D__CUDA_NO_HALF2_OPERATORS__",
43 |         ]
44 | 
45 |     sources = [os.path.join(extensions_dir, s) for s in sources]
46 | 
47 |     include_dirs = [extensions_dir]
48 | 
49 |     ext_modules = [
50 |         extension(
51 |             "dynamic_rcnn._C",
52 |             sources,
53 |             include_dirs=include_dirs,
54 |             define_macros=define_macros,
55 |             extra_compile_args=extra_compile_args,
56 |         )
57 |     ]
58 | 
59 |     return ext_modules
60 | 
61 | 
62 | setup(
63 |     name="DynamicRCNN",
64 |     version="0.1",
65 |     author="fmassa, hkzhang95",
66 |     url="https://github.com/hkzhang95/DynamicRCNN",
67 |     description="object detection in pytorch",
68 |     # packages=find_packages(exclude=("configs", "tests",)),
69 |     install_requires=requirements,
70 |     ext_modules=get_extensions(),
71 |     cmdclass={"build_ext": torch.utils.cpp_extension.BuildExtension},
72 | )
73 | 


--------------------------------------------------------------------------------