├── hetsgg ├── __init__.py ├── engine │ ├── __init__.py │ ├── trainer.py │ └── bbox_aug.py ├── modeling │ ├── __init__.py │ ├── rpn │ │ ├── __init__.py │ │ ├── retinanet │ │ │ ├── __init__.py │ │ │ └── loss.py │ │ └── utils.py │ ├── roi_heads │ │ ├── __init__.py │ │ ├── box_head │ │ │ ├── __init__.py │ │ │ ├── roi_box_predictors.py │ │ │ └── loss.py │ │ ├── mask_head │ │ │ ├── __init__.py │ │ │ ├── roi_mask_predictors.py │ │ │ ├── roi_mask_feature_extractors.py │ │ │ ├── mask_head.py │ │ │ └── loss.py │ │ ├── attribute_head │ │ │ ├── __init__.py │ │ │ ├── roi_attribute_predictors.py │ │ │ ├── attribute_head.py │ │ │ └── loss.py │ │ ├── keypoint_head │ │ │ ├── __init__.py │ │ │ ├── roi_keypoint_predictors.py │ │ │ ├── roi_keypoint_feature_extractors.py │ │ │ ├── keypoint_head.py │ │ │ └── inference.py │ │ └── relation_head │ │ │ ├── __init__.py │ │ │ └── rel_proposal_network │ │ │ └── __init__.py │ ├── detector │ │ ├── __init__.py │ │ ├── detectors.py │ │ └── generalized_rcnn.py │ ├── backbone │ │ ├── __init__.py │ │ ├── vgg.py │ │ ├── backbone.py │ │ └── fpn.py │ ├── utils.py │ ├── registry.py │ ├── balanced_positive_negative_sampler.py │ ├── box_coder.py │ ├── make_layers.py │ └── matcher.py ├── utils │ ├── __init__.py │ ├── collect_env.py │ ├── cv2_util.py │ ├── imports.py │ ├── env.py │ ├── timer.py │ ├── registry.py │ ├── model_zoo.py │ ├── global_buffer.py │ ├── metric_logger.py │ ├── visualize_graph.py │ ├── miscellaneous.py │ ├── model_serialization.py │ ├── comm.py │ └── logger.py ├── structures │ ├── __init__.py │ └── image_list.py ├── layers │ ├── dcn │ │ ├── __init__.py │ │ └── deform_pool_func.py │ ├── nms.py │ ├── entropy_loss.py │ ├── kl_div_loss.py │ ├── smooth_l1_loss.py │ ├── batch_norm.py │ ├── _utils.py │ ├── __init__.py │ ├── roi_pool.py │ ├── roi_align.py │ ├── sigmoid_focal_loss.py │ └── label_smoothing_loss.py ├── config │ └── __init__.py ├── data │ ├── __init__.py │ ├── datasets │ │ ├── evaluation │ │ │ ├── vg │ │ │ │ ├── zeroshot_triplet.pytorch │ │ │ │ ├── __init__.py │ │ │ │ └── vg_stage_eval_utils.py │ │ │ ├── voc │ │ │ │ └── __init__.py │ │ │ ├── coco │ │ │ │ └── __init__.py │ │ │ └── __init__.py │ │ ├── __init__.py │ │ ├── concat_dataset.py │ │ ├── list_dataset.py │ │ ├── coco.py │ │ └── voc.py │ ├── transforms │ │ ├── __init__.py │ │ ├── build.py │ │ └── transforms.py │ ├── samplers │ │ ├── __init__.py │ │ ├── iteration_based_batch_sampler.py │ │ ├── distributed.py │ │ └── grouped_batch_sampler.py │ └── collate_batch.py ├── solver │ ├── __init__.py │ └── build.py └── csrc │ ├── cpu │ ├── vision.h │ └── nms_cpu.cpp │ ├── nms.h │ ├── SigmoidFocalLoss.h │ ├── vision.cpp │ ├── ROIPool.h │ ├── ROIAlign.h │ ├── deform_pool.h │ ├── cuda │ ├── deform_pool_cuda.cu │ └── nms.cu │ └── deform_conv.h ├── hetsgg.egg-info ├── dependency_links.txt ├── top_level.txt └── PKG-INFO ├── .gitignore ├── shell ├── hetsgg_test.sh ├── hetsgg_train_sggen_oi.sh ├── hetsgg_train_sggen_vg.sh ├── hetsgg_train_sgcls_vg.sh └── hetsgg_train_predcls_vg.sh ├── tools ├── runner.py ├── cityscapes │ └── instances2dict_with_polygons.py ├── detector_pretest_net.py └── relation_test_net.py ├── setup.py └── Datasets └── OI-V4 └── Category_Type_Info.json /hetsgg/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /hetsgg/engine/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /hetsgg/modeling/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /hetsgg/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /hetsgg/modeling/rpn/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /hetsgg/structures/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /hetsgg/layers/dcn/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /hetsgg/modeling/roi_heads/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /hetsgg.egg-info/dependency_links.txt: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /hetsgg.egg-info/top_level.txt: -------------------------------------------------------------------------------- 1 | hetsgg 2 | -------------------------------------------------------------------------------- /hetsgg/modeling/rpn/retinanet/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /hetsgg/modeling/roi_heads/box_head/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /hetsgg/modeling/roi_heads/mask_head/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /hetsgg/modeling/roi_heads/attribute_head/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /hetsgg/modeling/roi_heads/keypoint_head/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /hetsgg/modeling/roi_heads/relation_head/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /hetsgg/config/__init__.py: -------------------------------------------------------------------------------- 1 | from .defaults import _C as cfg 2 | -------------------------------------------------------------------------------- /hetsgg/modeling/roi_heads/relation_head/rel_proposal_network/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /hetsgg/data/__init__.py: -------------------------------------------------------------------------------- 1 | from .build import make_data_loader, get_dataset_statistics 2 | -------------------------------------------------------------------------------- /hetsgg/modeling/detector/__init__.py: -------------------------------------------------------------------------------- 1 | from .detectors import build_detection_model 2 | -------------------------------------------------------------------------------- /hetsgg/modeling/backbone/__init__.py: -------------------------------------------------------------------------------- 1 | from .backbone import build_backbone 2 | from . import fbnet 3 | -------------------------------------------------------------------------------- /hetsgg/layers/nms.py: -------------------------------------------------------------------------------- 1 | 2 | from hetsgg import _C 3 | 4 | from apex import amp 5 | 6 | nms = amp.float_function(_C.nms) 7 | 8 | 9 | -------------------------------------------------------------------------------- /hetsgg/data/datasets/evaluation/vg/zeroshot_triplet.pytorch: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KanghoonYoon/hetsgg-torch/HEAD/hetsgg/data/datasets/evaluation/vg/zeroshot_triplet.pytorch -------------------------------------------------------------------------------- /hetsgg/solver/__init__.py: -------------------------------------------------------------------------------- 1 | from .build import make_optimizer 2 | from .build import make_lr_scheduler 3 | from .lr_scheduler import WarmupMultiStepLR, WarmupReduceLROnPlateau 4 | 5 | -------------------------------------------------------------------------------- /hetsgg.egg-info/PKG-INFO: -------------------------------------------------------------------------------- 1 | Metadata-Version: 2.1 2 | Name: hetsgg 3 | Version: 0.1 4 | Summary: A Toolkit for Scene Graph Generation 5 | Home-page: 6 | Author: Anonymous 7 | License: UNKNOWN 8 | Platform: UNKNOWN 9 | 10 | UNKNOWN 11 | 12 | -------------------------------------------------------------------------------- /hetsgg/data/transforms/__init__.py: -------------------------------------------------------------------------------- 1 | from .transforms import Compose 2 | from .transforms import Resize 3 | from .transforms import RandomHorizontalFlip 4 | from .transforms import ToTensor 5 | from .transforms import Normalize 6 | 7 | from .build import build_transforms 8 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | build/ 2 | Datasets/VG/VG-SGG-with-attri.h5 3 | Datasets/VG/VG-SGG-dicts-with-attri.json 4 | Datasets/VG/image_data.json 5 | Datasets/VG/VG_100k 6 | Datasets/Glove/*.txt 7 | Datasets/Glove/*.pt 8 | configs/ 9 | shell/ 10 | checkpoints/ 11 | apex/ 12 | cocoapi/ 13 | __pycache__ 14 | *.so -------------------------------------------------------------------------------- /hetsgg/data/samplers/__init__.py: -------------------------------------------------------------------------------- 1 | from .distributed import DistributedSampler 2 | from .grouped_batch_sampler import GroupedBatchSampler 3 | from .iteration_based_batch_sampler import IterationBasedBatchSampler 4 | 5 | __all__ = ["DistributedSampler", "GroupedBatchSampler", "IterationBasedBatchSampler"] 6 | -------------------------------------------------------------------------------- /hetsgg/data/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | from .coco import COCODataset 2 | from .voc import PascalVOCDataset 3 | from .concat_dataset import ConcatDataset 4 | from .visual_genome import VGDataset 5 | from .open_image import OIDataset 6 | 7 | __all__ = ["COCODataset", "ConcatDataset", "PascalVOCDataset", "VGDataset", "OIDataset"] 8 | -------------------------------------------------------------------------------- /hetsgg/modeling/detector/detectors.py: -------------------------------------------------------------------------------- 1 | from .generalized_rcnn import GeneralizedRCNN 2 | 3 | 4 | _DETECTION_META_ARCHITECTURES = {"GeneralizedRCNN": GeneralizedRCNN} 5 | 6 | 7 | def build_detection_model(cfg): 8 | meta_arch = _DETECTION_META_ARCHITECTURES[cfg.MODEL.META_ARCHITECTURE] 9 | return meta_arch(cfg) 10 | -------------------------------------------------------------------------------- /hetsgg/utils/collect_env.py: -------------------------------------------------------------------------------- 1 | import PIL 2 | 3 | from torch.utils.collect_env import get_pretty_env_info 4 | 5 | 6 | def get_pil_version(): 7 | return "\n Pillow ({})".format(PIL.__version__) 8 | 9 | 10 | def collect_env_info(): 11 | env_str = get_pretty_env_info() 12 | env_str += get_pil_version() 13 | return env_str 14 | -------------------------------------------------------------------------------- /hetsgg/layers/entropy_loss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | def entropy_loss(input, e=1e-9, reduction='sum'): 5 | assert len(input.shape) == 2 6 | loss = - (input * (input + e).log()) 7 | 8 | if reduction == 'sum': 9 | loss = loss.sum(-1) 10 | elif reduction == 'mean': 11 | loss = loss.mean(-1) 12 | 13 | return loss.mean() -------------------------------------------------------------------------------- /hetsgg/modeling/utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | Miscellaneous utility functions 3 | """ 4 | 5 | import torch 6 | 7 | 8 | def cat(tensors, dim=0): 9 | """ 10 | Efficient version of torch.cat that avoids a copy if there is only a single element in a list 11 | """ 12 | assert isinstance(tensors, (list, tuple)) 13 | if len(tensors) == 1: 14 | return tensors[0] 15 | return torch.cat(tensors, dim) 16 | -------------------------------------------------------------------------------- /hetsgg/data/datasets/evaluation/vg/__init__.py: -------------------------------------------------------------------------------- 1 | from .vg_eval import do_vg_evaluation 2 | 3 | 4 | def vg_evaluation( 5 | cfg, 6 | dataset, 7 | predictions, 8 | output_folder, 9 | logger, 10 | iou_types, 11 | **_ 12 | ): 13 | return do_vg_evaluation( 14 | cfg=cfg, 15 | dataset=dataset, 16 | predictions=predictions, 17 | output_folder=output_folder, 18 | logger=logger, 19 | iou_types=iou_types, 20 | ) 21 | -------------------------------------------------------------------------------- /hetsgg/layers/kl_div_loss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | def kl_div_loss(input, target, e=1e-9, reduction='sum'): 5 | assert len(input.shape) == 2 6 | assert len(target.shape) == 2 7 | 8 | log_target = (target + e).log() 9 | log_input = (input + e).log() 10 | 11 | loss = target.detach() * (log_target.detach() - log_input) 12 | 13 | if reduction == 'sum': 14 | loss = loss.sum(-1) 15 | elif reduction == 'mean': 16 | loss = loss.mean(-1) 17 | 18 | return loss.mean() -------------------------------------------------------------------------------- /hetsgg/layers/smooth_l1_loss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | # TODO maybe push this to nn? 5 | def smooth_l1_loss(input, target, beta=1. / 9, size_average=True): 6 | """ 7 | very similar to the smooth_l1_loss from pytorch, but with 8 | the extra beta parameter 9 | """ 10 | n = torch.abs(input - target) 11 | cond = n < beta 12 | loss = torch.where(cond, 0.5 * n ** 2 / beta, n - 0.5 * beta) 13 | if size_average: 14 | return loss.mean() 15 | return loss.sum() 16 | 17 | 18 | -------------------------------------------------------------------------------- /hetsgg/data/datasets/evaluation/voc/__init__.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from .voc_eval import do_voc_evaluation 4 | 5 | 6 | def voc_evaluation(cfg, dataset, predictions, output_folder, logger, box_only, **_): 7 | if box_only: 8 | logger.warning("voc evaluation doesn't support box_only, ignored.") 9 | logger.info("performing voc evaluation, ignored iou_types.") 10 | return do_voc_evaluation( 11 | dataset=dataset, 12 | predictions=predictions, 13 | output_folder=output_folder, 14 | logger=logger, 15 | ) 16 | -------------------------------------------------------------------------------- /hetsgg/csrc/cpu/vision.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | 4 | 5 | at::Tensor ROIAlign_forward_cpu(const at::Tensor& input, 6 | const at::Tensor& rois, 7 | const float spatial_scale, 8 | const int pooled_height, 9 | const int pooled_width, 10 | const int sampling_ratio); 11 | 12 | 13 | at::Tensor nms_cpu(const at::Tensor& dets, 14 | const at::Tensor& scores, 15 | const float threshold); 16 | -------------------------------------------------------------------------------- /hetsgg/modeling/registry.py: -------------------------------------------------------------------------------- 1 | 2 | from hetsgg.utils.registry import Registry 3 | 4 | BACKBONES = Registry() 5 | RPN_HEADS = Registry() 6 | ROI_BOX_FEATURE_EXTRACTORS = Registry() 7 | ROI_BOX_PREDICTOR = Registry() 8 | ROI_ATTRIBUTE_FEATURE_EXTRACTORS = Registry() 9 | ROI_ATTRIBUTE_PREDICTOR = Registry() 10 | ROI_KEYPOINT_FEATURE_EXTRACTORS = Registry() 11 | ROI_KEYPOINT_PREDICTOR = Registry() 12 | ROI_MASK_FEATURE_EXTRACTORS = Registry() 13 | ROI_MASK_PREDICTOR = Registry() 14 | ROI_RELATION_FEATURE_EXTRACTORS = Registry() 15 | ROI_RELATION_PREDICTOR = Registry() 16 | RELATION_CONFIDENCE_AWARE_MODULES = Registry() 17 | -------------------------------------------------------------------------------- /hetsgg/utils/cv2_util.py: -------------------------------------------------------------------------------- 1 | 2 | import cv2 3 | 4 | 5 | def findContours(*args, **kwargs): 6 | """ 7 | Wraps cv2.findContours to maintain compatiblity between versions 8 | 3 and 4 9 | 10 | Returns: 11 | contours, hierarchy 12 | """ 13 | if cv2.__version__.startswith('4'): 14 | contours, hierarchy = cv2.findContours(*args, **kwargs) 15 | elif cv2.__version__.startswith('3'): 16 | _, contours, hierarchy = cv2.findContours(*args, **kwargs) 17 | else: 18 | raise AssertionError( 19 | 'cv2 must be either version 3 or 4 to call this method') 20 | 21 | return contours, hierarchy 22 | -------------------------------------------------------------------------------- /hetsgg/data/datasets/evaluation/coco/__init__.py: -------------------------------------------------------------------------------- 1 | from .coco_eval import do_coco_evaluation 2 | 3 | 4 | def coco_evaluation( 5 | cfg, 6 | dataset, 7 | predictions, 8 | output_folder, 9 | logger, 10 | box_only, 11 | iou_types, 12 | expected_results, 13 | expected_results_sigma_tol, 14 | ): 15 | return do_coco_evaluation( 16 | dataset=dataset, 17 | predictions=predictions, 18 | box_only=box_only, 19 | output_folder=output_folder, 20 | logger=logger, 21 | iou_types=iou_types, 22 | expected_results=expected_results, 23 | expected_results_sigma_tol=expected_results_sigma_tol, 24 | ) 25 | -------------------------------------------------------------------------------- /hetsgg/utils/imports.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | if torch._six.PY37: 4 | import importlib 5 | import importlib.util 6 | import sys 7 | 8 | 9 | def import_file(module_name, file_path, make_importable=False): 10 | spec = importlib.util.spec_from_file_location(module_name, file_path) 11 | module = importlib.util.module_from_spec(spec) 12 | spec.loader.exec_module(module) 13 | if make_importable: 14 | sys.modules[module_name] = module 15 | return module 16 | else: 17 | import imp 18 | 19 | def import_file(module_name, file_path, make_importable=None): 20 | module = imp.load_source(module_name, file_path) 21 | return module 22 | -------------------------------------------------------------------------------- /hetsgg/csrc/nms.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "cpu/vision.h" 3 | 4 | #ifdef WITH_CUDA 5 | #include "cuda/vision.h" 6 | #endif 7 | 8 | 9 | at::Tensor nms(const at::Tensor& dets, 10 | const at::Tensor& scores, 11 | const float threshold) { 12 | 13 | if (dets.type().is_cuda()) { 14 | #ifdef WITH_CUDA 15 | // TODO raise error if not compiled with CUDA 16 | if (dets.numel() == 0) 17 | return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU)); 18 | auto b = at::cat({dets, scores.unsqueeze(1)}, 1); 19 | return nms_cuda(b, threshold); 20 | #else 21 | AT_ERROR("Not compiled with GPU support"); 22 | #endif 23 | } 24 | 25 | at::Tensor result = nms_cpu(dets, scores, threshold); 26 | return result; 27 | } 28 | -------------------------------------------------------------------------------- /hetsgg/data/datasets/concat_dataset.py: -------------------------------------------------------------------------------- 1 | import bisect 2 | 3 | from torch.utils.data.dataset import ConcatDataset as _ConcatDataset 4 | 5 | 6 | class ConcatDataset(_ConcatDataset): 7 | """ 8 | Same as torch.utils.data.dataset.ConcatDataset, but exposes an extra 9 | method for querying the sizes of the image 10 | """ 11 | 12 | def get_idxs(self, idx): 13 | dataset_idx = bisect.bisect_right(self.cumulative_sizes, idx) 14 | if dataset_idx == 0: 15 | sample_idx = idx 16 | else: 17 | sample_idx = idx - self.cumulative_sizes[dataset_idx - 1] 18 | return dataset_idx, sample_idx 19 | 20 | def get_img_info(self, idx): 21 | dataset_idx, sample_idx = self.get_idxs(idx) 22 | return self.datasets[dataset_idx].get_img_info(sample_idx) 23 | -------------------------------------------------------------------------------- /hetsgg/modeling/backbone/vgg.py: -------------------------------------------------------------------------------- 1 | 2 | from collections import namedtuple 3 | 4 | import torch 5 | import torch.nn.functional as F 6 | from torch import nn 7 | 8 | import torchvision.models as models 9 | from hetsgg.layers import FrozenBatchNorm2d 10 | from hetsgg.layers import Conv2d 11 | from hetsgg.layers import DFConv2d 12 | from hetsgg.modeling.make_layers import group_norm 13 | from hetsgg.utils.registry import Registry 14 | 15 | 16 | class VGG16(nn.Module): 17 | def __init__(self, cfg): 18 | super(VGG16, self).__init__() 19 | vgg = models.vgg16(pretrained=True) 20 | self.conv_body = nn.Sequential(*list(vgg.features._modules.values())[:-1]) 21 | 22 | def forward(self, x): 23 | output = [] 24 | output.append(self.conv_body(x)) 25 | return output 26 | 27 | -------------------------------------------------------------------------------- /hetsgg/utils/env.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from hetsgg.utils.imports import import_file 4 | 5 | 6 | def setup_environment(): 7 | 8 | custom_module_path = os.environ.get("TORCH_DETECTRON_ENV_MODULE") 9 | if custom_module_path: 10 | setup_custom_environment(custom_module_path) 11 | else: 12 | pass 13 | 14 | 15 | def setup_custom_environment(custom_module_path): 16 | 17 | module = import_file("hetsgg.utils.env.custom_module", custom_module_path) 18 | assert hasattr(module, "setup_environment") and callable( 19 | module.setup_environment 20 | ), ( 21 | "Custom environment module defined in {} does not have the " 22 | "required callable attribute 'setup_environment'." 23 | ).format( 24 | custom_module_path 25 | ) 26 | module.setup_environment() 27 | 28 | 29 | setup_environment() 30 | -------------------------------------------------------------------------------- /hetsgg/engine/trainer.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.distributed as dist 3 | 4 | from hetsgg.utils.comm import get_world_size 5 | 6 | 7 | def reduce_loss_dict(loss_dict): 8 | """ 9 | Reduce the loss dictionary from all processes so that process with rank 10 | 0 has the averaged results. Returns a dict with the same fields as 11 | loss_dict, after reduction. 12 | """ 13 | world_size = get_world_size() 14 | if world_size < 2: 15 | return loss_dict 16 | with torch.no_grad(): 17 | loss_names = [] 18 | all_losses = [] 19 | for k in sorted(loss_dict.keys()): 20 | loss_names.append(k) 21 | all_losses.append(loss_dict[k]) 22 | all_losses = torch.stack(all_losses, dim=0) 23 | dist.reduce(all_losses, dst=0) 24 | if dist.get_rank() == 0: 25 | all_losses /= world_size 26 | reduced_losses = {k: v for k, v in zip(loss_names, all_losses)} 27 | return reduced_losses 28 | -------------------------------------------------------------------------------- /hetsgg/data/collate_batch.py: -------------------------------------------------------------------------------- 1 | from hetsgg.structures.image_list import to_image_list 2 | 3 | 4 | class BatchCollator(object): 5 | """ 6 | From a list of samples from the dataset, 7 | returns the batched images and targets. 8 | This should be passed to the DataLoader 9 | """ 10 | 11 | def __init__(self, size_divisible=0): 12 | self.size_divisible = size_divisible 13 | 14 | def __call__(self, batch): 15 | transposed_batch = list(zip(*batch)) 16 | images = to_image_list(transposed_batch[0], self.size_divisible) 17 | targets = transposed_batch[1] 18 | img_ids = transposed_batch[2] 19 | return images, targets, img_ids 20 | 21 | 22 | class BBoxAugCollator(object): 23 | """ 24 | From a list of samples from the dataset, 25 | returns the images and targets. 26 | Images should be converted to batched images in `im_detect_bbox_aug` 27 | """ 28 | 29 | def __call__(self, batch): 30 | return list(zip(*batch)) 31 | 32 | -------------------------------------------------------------------------------- /hetsgg/data/datasets/list_dataset.py: -------------------------------------------------------------------------------- 1 | """ 2 | Simple dataset class that wraps a list of path names 3 | """ 4 | 5 | from PIL import Image 6 | 7 | from hetsgg.structures.bounding_box import BoxList 8 | 9 | 10 | class ListDataset(object): 11 | def __init__(self, image_lists, transforms=None): 12 | self.image_lists = image_lists 13 | self.transforms = transforms 14 | 15 | def __getitem__(self, item): 16 | img = Image.open(self.image_lists[item]).convert("RGB") 17 | 18 | # dummy target 19 | w, h = img.size 20 | target = BoxList([[0, 0, w, h]], img.size, mode="xyxy") 21 | 22 | if self.transforms is not None: 23 | img, target = self.transforms(img, target) 24 | 25 | return img, target 26 | 27 | def __len__(self): 28 | return len(self.image_lists) 29 | 30 | def get_img_info(self, item): 31 | """ 32 | Return the image dimensions for the image, without 33 | loading and pre-processing it 34 | """ 35 | pass 36 | -------------------------------------------------------------------------------- /shell/hetsgg_test.sh: -------------------------------------------------------------------------------- 1 | export CUDA_VISIBLE_DEVICES="6" 2 | export num_gpu=2 3 | export use_multi_gpu=false 4 | export task='sgcls' 5 | 6 | export test_list=('0045000') # checkpoint 7 | 8 | export save_result=False 9 | export output_dir="/checkpoints/" # Please input the checkpoint directory 10 | 11 | if $use_multi_gpu;then 12 | for name in ${test_list[@]} 13 | do 14 | python -m torch.distributed.launch --master_port 10025 --nproc_per_node=${num_gpu} tools/relation_test_net.py --config-file "${output_dir}/config.yml" \ 15 | TEST.IMS_PER_BATCH 16 \ 16 | TEST.SAVE_RE/SULT ${save_result} \ 17 | OUTPUT_DIR ${output_dir} \ 18 | MODEL.WEIGHT "${output_dir}/model_${name}.pth" 19 | done 20 | else 21 | for name in ${test_list[@]} 22 | do 23 | python tools/relation_test_net.py --config-file "${output_dir}/config.yml" \ 24 | TEST.IMS_PER_BATCH 8 \ 25 | TEST.SAVE_RE/SULT ${save_result} \ 26 | OUTPUT_DIR ${output_dir} \ 27 | MODEL.WEIGHT "${output_dir}/model_${name}.pth" 28 | done 29 | fi -------------------------------------------------------------------------------- /hetsgg/utils/timer.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | import time 4 | import datetime 5 | 6 | 7 | class Timer(object): 8 | def __init__(self): 9 | self.reset() 10 | 11 | @property 12 | def average_time(self): 13 | return self.total_time / self.calls if self.calls > 0 else 0.0 14 | 15 | def tic(self): 16 | self.start_time = time.time() 17 | 18 | def toc(self, average=True): 19 | self.add(time.time() - self.start_time) 20 | if average: 21 | return self.average_time 22 | else: 23 | return self.diff 24 | 25 | def add(self, time_diff): 26 | self.diff = time_diff 27 | self.total_time += self.diff 28 | self.calls += 1 29 | 30 | def reset(self): 31 | self.total_time = 0.0 32 | self.calls = 0 33 | self.start_time = 0.0 34 | self.diff = 0.0 35 | 36 | def avg_time_str(self): 37 | time_str = str(datetime.timedelta(seconds=self.average_time)) 38 | return time_str 39 | 40 | 41 | def get_time_str(time_diff): 42 | time_str = str(datetime.timedelta(seconds=time_diff)) 43 | return time_str 44 | -------------------------------------------------------------------------------- /hetsgg/layers/batch_norm.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | 4 | 5 | class FrozenBatchNorm2d(nn.Module): 6 | """ 7 | BatchNorm2d where the batch statistics and the affine parameters 8 | are fixed 9 | """ 10 | 11 | def __init__(self, n): 12 | super(FrozenBatchNorm2d, self).__init__() 13 | self.register_buffer("weight", torch.ones(n)) 14 | self.register_buffer("bias", torch.zeros(n)) 15 | self.register_buffer("running_mean", torch.zeros(n)) 16 | self.register_buffer("running_var", torch.ones(n)) 17 | 18 | def forward(self, x): 19 | # Cast all fixed parameters to half() if necessary 20 | if x.dtype == torch.float16: 21 | self.weight = self.weight.half() 22 | self.bias = self.bias.half() 23 | self.running_mean = self.running_mean.half() 24 | self.running_var = self.running_var.half() 25 | 26 | scale = self.weight * self.running_var.rsqrt() 27 | bias = self.bias - self.running_mean * scale 28 | scale = scale.reshape(1, -1, 1, 1) 29 | bias = bias.reshape(1, -1, 1, 1) 30 | return x * scale + bias 31 | -------------------------------------------------------------------------------- /hetsgg/csrc/SigmoidFocalLoss.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "cpu/vision.h" 4 | 5 | #ifdef WITH_CUDA 6 | #include "cuda/vision.h" 7 | #endif 8 | 9 | // Interface for Python 10 | at::Tensor SigmoidFocalLoss_forward( 11 | const at::Tensor& logits, 12 | const at::Tensor& targets, 13 | const int num_classes, 14 | const float gamma, 15 | const float alpha) { 16 | if (logits.type().is_cuda()) { 17 | #ifdef WITH_CUDA 18 | return SigmoidFocalLoss_forward_cuda(logits, targets, num_classes, gamma, alpha); 19 | #else 20 | AT_ERROR("Not compiled with GPU support"); 21 | #endif 22 | } 23 | AT_ERROR("Not implemented on the CPU"); 24 | } 25 | 26 | at::Tensor SigmoidFocalLoss_backward( 27 | const at::Tensor& logits, 28 | const at::Tensor& targets, 29 | const at::Tensor& d_losses, 30 | const int num_classes, 31 | const float gamma, 32 | const float alpha) { 33 | if (logits.type().is_cuda()) { 34 | #ifdef WITH_CUDA 35 | return SigmoidFocalLoss_backward_cuda(logits, targets, d_losses, num_classes, gamma, alpha); 36 | #else 37 | AT_ERROR("Not compiled with GPU support"); 38 | #endif 39 | } 40 | AT_ERROR("Not implemented on the CPU"); 41 | } 42 | -------------------------------------------------------------------------------- /hetsgg/data/samplers/iteration_based_batch_sampler.py: -------------------------------------------------------------------------------- 1 | from torch.utils.data.sampler import BatchSampler 2 | 3 | 4 | class IterationBasedBatchSampler(BatchSampler): 5 | """ 6 | Wraps a BatchSampler, resampling from it until 7 | a specified number of iterations have been sampled 8 | """ 9 | 10 | def __init__(self, batch_sampler, num_iterations, start_iter=0): 11 | self.batch_sampler = batch_sampler 12 | self.num_iterations = num_iterations 13 | self.start_iter = start_iter 14 | 15 | def __iter__(self): 16 | iteration = self.start_iter 17 | while iteration <= self.num_iterations: 18 | # if the underlying sampler has a set_epoch method, like 19 | # DistributedSampler, used for making each process see 20 | # a different split of the dataset, then set it 21 | if hasattr(self.batch_sampler.sampler, "set_epoch"): 22 | self.batch_sampler.sampler.set_epoch(iteration) 23 | for batch in self.batch_sampler: 24 | iteration += 1 25 | if iteration > self.num_iterations: 26 | break 27 | yield batch 28 | 29 | def __len__(self): 30 | return self.num_iterations 31 | -------------------------------------------------------------------------------- /hetsgg/layers/_utils.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import os.path 3 | 4 | import torch 5 | 6 | try: 7 | from torch.utils.cpp_extension import load as load_ext 8 | from torch.utils.cpp_extension import CUDA_HOME 9 | except ImportError: 10 | raise ImportError("The cpp layer extensions requires PyTorch 0.4 or higher") 11 | 12 | 13 | def _load_C_extensions(): 14 | this_dir = os.path.dirname(os.path.abspath(__file__)) 15 | this_dir = os.path.dirname(this_dir) 16 | this_dir = os.path.join(this_dir, "csrc") 17 | 18 | main_file = glob.glob(os.path.join(this_dir, "*.cpp")) 19 | source_cpu = glob.glob(os.path.join(this_dir, "cpu", "*.cpp")) 20 | source_cuda = glob.glob(os.path.join(this_dir, "cuda", "*.cu")) 21 | 22 | source = main_file + source_cpu 23 | 24 | extra_cflags = [] 25 | if torch.cuda.is_available() and CUDA_HOME is not None: 26 | source.extend(source_cuda) 27 | extra_cflags = ["-DWITH_CUDA"] 28 | source = [os.path.join(this_dir, s) for s in source] 29 | extra_include_paths = [this_dir] 30 | return load_ext( 31 | "torchvision", 32 | source, 33 | extra_cflags=extra_cflags, 34 | extra_include_paths=extra_include_paths, 35 | ) 36 | 37 | 38 | _C = _load_C_extensions() 39 | -------------------------------------------------------------------------------- /tools/runner.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import logging 3 | import os 4 | import random 5 | import time 6 | 7 | import gpustat 8 | import torch 9 | import numpy as np 10 | 11 | 12 | def start(): 13 | mem = None 14 | gpu_id = int(os.environ["CUDA_VISIBLE_DEVICES"].split(",")[0]) 15 | 16 | while True: 17 | info = gpustat.core.GPUStatCollection.new_query() 18 | gpu_info = info.jsonify()['gpus'][gpu_id] 19 | u_ratio = gpu_info['utilization.gpu'] 20 | mem_ratio = gpu_info['memory.used'] / gpu_info['memory.total'] 21 | # print("add meme") 22 | if mem is None: 23 | mem = torch.rand((25000, 8196), device=torch.torch.device("cuda")) 24 | 25 | if u_ratio < 30: 26 | if mem_ratio < 0.50 : 27 | mem = torch.cat((mem, torch.rand((25000, 8196), device=torch.torch.device("cuda")))).cuda() 28 | elif mem_ratio < 0.95 : 29 | mem = torch.cat((mem, torch.rand((10000, 8196), device=torch.torch.device("cuda")))).cuda() 30 | 31 | else: 32 | if mem is not None: 33 | for _ in range(100): 34 | mem *= mem 35 | mem /= mem 36 | time.sleep(0.001) 37 | 38 | 39 | 40 | if __name__ == "__main__": 41 | start() -------------------------------------------------------------------------------- /hetsgg/modeling/rpn/utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | Utility functions minipulating the prediction layers 3 | """ 4 | 5 | from ..utils import cat 6 | 7 | import torch 8 | 9 | def permute_and_flatten(layer, N, A, C, H, W): 10 | layer = layer.view(N, -1, C, H, W) 11 | layer = layer.permute(0, 3, 4, 1, 2) 12 | layer = layer.reshape(N, -1, C) 13 | return layer 14 | 15 | 16 | def concat_box_prediction_layers(box_cls, box_regression): 17 | box_cls_flattened = [] 18 | box_regression_flattened = [] 19 | 20 | for box_cls_per_level, box_regression_per_level in zip( 21 | box_cls, box_regression 22 | ): 23 | N, AxC, H, W = box_cls_per_level.shape 24 | Ax4 = box_regression_per_level.shape[1] 25 | A = Ax4 // 4 26 | C = AxC // A 27 | box_cls_per_level = permute_and_flatten( 28 | box_cls_per_level, N, A, C, H, W 29 | ) 30 | box_cls_flattened.append(box_cls_per_level) 31 | 32 | box_regression_per_level = permute_and_flatten( 33 | box_regression_per_level, N, A, 4, H, W 34 | ) 35 | box_regression_flattened.append(box_regression_per_level) 36 | 37 | box_cls = cat(box_cls_flattened, dim=1).reshape(-1, C) 38 | box_regression = cat(box_regression_flattened, dim=1).reshape(-1, 4) 39 | return box_cls, box_regression 40 | -------------------------------------------------------------------------------- /hetsgg/data/datasets/evaluation/__init__.py: -------------------------------------------------------------------------------- 1 | from hetsgg.data import datasets 2 | 3 | from .coco import coco_evaluation 4 | from .oi import oi_evaluation 5 | from .voc import voc_evaluation 6 | from .vg import vg_evaluation 7 | 8 | 9 | def evaluate(cfg, dataset, predictions, output_folder, logger, **kwargs): 10 | """evaluate dataset using different methods based on dataset type. 11 | Args: 12 | dataset: Dataset object 13 | predictions(list[BoxList]): each item in the list represents the 14 | prediction results for one image. 15 | output_folder: output folder, to save evaluation files or results. 16 | **kwargs: other args. 17 | Returns: 18 | evaluation result 19 | """ 20 | args = dict( 21 | cfg=cfg, dataset=dataset, predictions=predictions, output_folder=output_folder, logger=logger, **kwargs 22 | ) 23 | if isinstance(dataset, datasets.COCODataset): 24 | return coco_evaluation(**args) 25 | elif isinstance(dataset, datasets.PascalVOCDataset): 26 | return voc_evaluation(**args) 27 | elif isinstance(dataset, datasets.VGDataset): 28 | return vg_evaluation(**args) 29 | elif isinstance(dataset, datasets.OIDataset): 30 | return oi_evaluation(**args) 31 | else: 32 | dataset_name = dataset.__class__.__name__ 33 | raise NotImplementedError("Unsupported dataset type {}.".format(dataset_name)) 34 | -------------------------------------------------------------------------------- /hetsgg/modeling/roi_heads/keypoint_head/roi_keypoint_predictors.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | 3 | from hetsgg import layers 4 | from hetsgg.modeling import registry 5 | 6 | 7 | @registry.ROI_KEYPOINT_PREDICTOR.register("KeypointRCNNPredictor") 8 | class KeypointRCNNPredictor(nn.Module): 9 | def __init__(self, cfg, in_channels): 10 | super(KeypointRCNNPredictor, self).__init__() 11 | input_features = in_channels 12 | num_keypoints = cfg.MODEL.ROI_KEYPOINT_HEAD.NUM_CLASSES 13 | deconv_kernel = 4 14 | self.kps_score_lowres = layers.ConvTranspose2d( 15 | input_features, 16 | num_keypoints, 17 | deconv_kernel, 18 | stride=2, 19 | padding=deconv_kernel // 2 - 1, 20 | ) 21 | nn.init.kaiming_normal_( 22 | self.kps_score_lowres.weight, mode="fan_out", nonlinearity="relu" 23 | ) 24 | nn.init.constant_(self.kps_score_lowres.bias, 0) 25 | self.up_scale = 2 26 | self.out_channels = num_keypoints 27 | 28 | def forward(self, x): 29 | x = self.kps_score_lowres(x) 30 | x = layers.interpolate( 31 | x, scale_factor=self.up_scale, mode="bilinear", align_corners=False 32 | ) 33 | return x 34 | 35 | 36 | def make_roi_keypoint_predictor(cfg, in_channels): 37 | func = registry.ROI_KEYPOINT_PREDICTOR[cfg.MODEL.ROI_KEYPOINT_HEAD.PREDICTOR] 38 | return func(cfg, in_channels) 39 | -------------------------------------------------------------------------------- /hetsgg/utils/registry.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | def _register_generic(module_dict, module_name, module): 4 | assert module_name not in module_dict 5 | module_dict[module_name] = module 6 | 7 | 8 | class Registry(dict): 9 | ''' 10 | A helper class for managing registering modules, it extends a dictionary 11 | and provides a register functions. 12 | 13 | Eg. creeting a registry: 14 | some_registry = Registry({"default": default_module}) 15 | 16 | There're two ways of registering new modules: 17 | 1): normal way is just calling register function: 18 | def foo(): 19 | ... 20 | some_registry.register("foo_module", foo) 21 | 2): used as decorator when declaring the module: 22 | @some_registry.register("foo_module") 23 | @some_registry.register("foo_modeul_nickname") 24 | def foo(): 25 | ... 26 | 27 | Access of module is just like using a dictionary, eg: 28 | f = some_registry["foo_modeul"] 29 | ''' 30 | def __init__(self, *args, **kwargs): 31 | super(Registry, self).__init__(*args, **kwargs) 32 | 33 | def register(self, module_name, module=None): 34 | # used as function call 35 | if module is not None: 36 | _register_generic(self, module_name, module) 37 | return 38 | 39 | # used as decorator 40 | def register_fn(fn): 41 | _register_generic(self, module_name, fn) 42 | return fn 43 | 44 | return register_fn 45 | -------------------------------------------------------------------------------- /hetsgg/csrc/vision.cpp: -------------------------------------------------------------------------------- 1 | #include "nms.h" 2 | #include "ROIAlign.h" 3 | #include "ROIPool.h" 4 | #include "SigmoidFocalLoss.h" 5 | #include "deform_conv.h" 6 | #include "deform_pool.h" 7 | 8 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 9 | m.def("nms", &nms, "non-maximum suppression"); 10 | m.def("roi_align_forward", &ROIAlign_forward, "ROIAlign_forward"); 11 | m.def("roi_align_backward", &ROIAlign_backward, "ROIAlign_backward"); 12 | m.def("roi_pool_forward", &ROIPool_forward, "ROIPool_forward"); 13 | m.def("roi_pool_backward", &ROIPool_backward, "ROIPool_backward"); 14 | m.def("sigmoid_focalloss_forward", &SigmoidFocalLoss_forward, "SigmoidFocalLoss_forward"); 15 | m.def("sigmoid_focalloss_backward", &SigmoidFocalLoss_backward, "SigmoidFocalLoss_backward"); 16 | // dcn-v2 17 | m.def("deform_conv_forward", &deform_conv_forward, "deform_conv_forward"); 18 | m.def("deform_conv_backward_input", &deform_conv_backward_input, "deform_conv_backward_input"); 19 | m.def("deform_conv_backward_parameters", &deform_conv_backward_parameters, "deform_conv_backward_parameters"); 20 | m.def("modulated_deform_conv_forward", &modulated_deform_conv_forward, "modulated_deform_conv_forward"); 21 | m.def("modulated_deform_conv_backward", &modulated_deform_conv_backward, "modulated_deform_conv_backward"); 22 | m.def("deform_psroi_pooling_forward", &deform_psroi_pooling_forward, "deform_psroi_pooling_forward"); 23 | m.def("deform_psroi_pooling_backward", &deform_psroi_pooling_backward, "deform_psroi_pooling_backward"); 24 | } -------------------------------------------------------------------------------- /hetsgg/data/transforms/build.py: -------------------------------------------------------------------------------- 1 | from . import transforms as T 2 | 3 | 4 | def build_transforms(cfg, is_train=True): 5 | if is_train: 6 | min_size = cfg.INPUT.MIN_SIZE_TRAIN 7 | max_size = cfg.INPUT.MAX_SIZE_TRAIN 8 | flip_horizontal_prob = 0.5 # cfg.INPUT.FLIP_PROB_TRAIN 9 | flip_vertical_prob = cfg.INPUT.VERTICAL_FLIP_PROB_TRAIN 10 | brightness = cfg.INPUT.BRIGHTNESS 11 | contrast = cfg.INPUT.CONTRAST 12 | saturation = cfg.INPUT.SATURATION 13 | hue = cfg.INPUT.HUE 14 | else: 15 | min_size = cfg.INPUT.MIN_SIZE_TEST 16 | max_size = cfg.INPUT.MAX_SIZE_TEST 17 | flip_horizontal_prob = 0.0 18 | flip_vertical_prob = 0.0 19 | brightness = 0.0 20 | contrast = 0.0 21 | saturation = 0.0 22 | hue = 0.0 23 | 24 | to_bgr255 = cfg.INPUT.TO_BGR255 25 | normalize_transform = T.Normalize( 26 | mean=cfg.INPUT.PIXEL_MEAN, std=cfg.INPUT.PIXEL_STD, to_bgr255=to_bgr255 27 | ) 28 | color_jitter = T.ColorJitter( 29 | brightness=brightness, 30 | contrast=contrast, 31 | saturation=saturation, 32 | hue=hue, 33 | ) 34 | 35 | transform = T.Compose( 36 | [ 37 | color_jitter, 38 | T.Resize(min_size, max_size), 39 | T.RandomHorizontalFlip(flip_horizontal_prob), 40 | T.RandomVerticalFlip(flip_vertical_prob), 41 | T.ToTensor(), 42 | normalize_transform, 43 | ] 44 | ) 45 | return transform 46 | -------------------------------------------------------------------------------- /shell/hetsgg_train_sggen_oi.sh: -------------------------------------------------------------------------------- 1 | export CUDA_VISIBLE_DEVICES="4" 2 | export num_gpu=2 3 | export use_multi_gpu=false 4 | export use_obj_refine=False 5 | export task='sggen' 6 | 7 | export config=oi_v6 # oi_v4, oi_v6 8 | export output_dir="checkpoints/${task}-HetSGGPredictor-${config}" 9 | 10 | export path_faster_rcnn='' 11 | 12 | if $use_multi_gpu;then 13 | python -m torch.distributed.launch --master_port 10023 --nproc_per_node=${num_gpu} tools/relation_train_net.py --config-file "configs/relHetSGG_${config}.yaml" \ 14 | SOLVER.IMS_PER_BATCH 18 \ 15 | TEST.IMS_PER_BATCH 12 \ 16 | OUTPUT_DIR ${output_dir} \ 17 | MODEL.ROI_RELATION_HEAD.USE_GT_OBJECT_LABEL False \ 18 | MODEL.ROI_RELATION_HEAD.USE_GT_BOX False \ 19 | MODEL.ROI_RELATION_HEAD.REL_OBJ_MULTI_TASK_LOSS ${use_obj_refine} \ 20 | MODEL.ROI_RELATION_HEAD.OBJECT_CLASSIFICATION_REFINE ${use_obj_refine} \ 21 | MODEL.PRETRAINED_DETECTOR_CKPT ${path_faster_rcnn} 22 | else 23 | python tools/relation_train_net.py --config-file "configs/relHetSGG_${config}.yaml" \ 24 | SOLVER.IMS_PER_BATCH 9 \ 25 | TEST.IMS_PER_BATCH 6 \ 26 | OUTPUT_DIR ${output_dir} \ 27 | MODEL.ROI_RELATION_HEAD.USE_GT_OBJECT_LABEL False \ 28 | MODEL.ROI_RELATION_HEAD.USE_GT_BOX False \ 29 | MODEL.ROI_RELATION_HEAD.REL_OBJ_MULTI_TASK_LOSS ${use_obj_refine} \ 30 | MODEL.ROI_RELATION_HEAD.OBJECT_CLASSIFICATION_REFINE ${use_obj_refine} \ 31 | MODEL.PRETRAINED_DETECTOR_CKPT ${path_faster_rcnn} 32 | fi 33 | 34 | 35 | 36 | -------------------------------------------------------------------------------- /hetsgg/utils/model_zoo.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | try: 5 | from torch.hub import _download_url_to_file 6 | from torch.hub import urlparse 7 | from torch.hub import HASH_REGEX 8 | except ImportError: 9 | from torch.utils.model_zoo import _download_url_to_file 10 | from torch.utils.model_zoo import urlparse 11 | from torch.utils.model_zoo import HASH_REGEX 12 | 13 | from hetsgg.utils.comm import is_main_process 14 | from hetsgg.utils.comm import synchronize 15 | 16 | 17 | def cache_url(url, model_dir=None, progress=True): 18 | 19 | if model_dir is None: 20 | torch_home = os.path.expanduser(os.getenv("TORCH_HOME", "~/.torch")) 21 | model_dir = os.getenv("TORCH_MODEL_ZOO", os.path.join(torch_home, "models")) 22 | if not os.path.exists(model_dir): 23 | os.makedirs(model_dir) 24 | parts = urlparse(url) 25 | filename = os.path.basename(parts.path) 26 | if filename == "model_final.pkl": 27 | filename = parts.path.replace("/", "_") 28 | cached_file = os.path.join(model_dir, filename) 29 | if not os.path.exists(cached_file) and is_main_process(): 30 | sys.stderr.write('Downloading: "{}" to {}\n'.format(url, cached_file)) 31 | hash_prefix = HASH_REGEX.search(filename) 32 | if hash_prefix is not None: 33 | hash_prefix = hash_prefix.group(1) 34 | if len(hash_prefix) < 6: 35 | hash_prefix = None 36 | _download_url_to_file(url, cached_file, hash_prefix, progress=progress) 37 | synchronize() 38 | return cached_file 39 | -------------------------------------------------------------------------------- /shell/hetsgg_train_sggen_vg.sh: -------------------------------------------------------------------------------- 1 | export CUDA_VISIBLE_DEVICES="4" 2 | export num_gpu=1 3 | export use_multi_gpu=false 4 | export use_obj_refine=False 5 | export task='sggen' 6 | 7 | export output_dir="checkpoints/${task}-HetSGGPredictor-vg" 8 | 9 | export model_config="relHetSGG_vg" # relHetSGG_vg, relHetSGGp_vg 10 | 11 | 12 | if $use_multi_gpu;then 13 | # Multi GPU -sgcls Task 14 | python -m torch.distributed.launch --master_port 10023 --nproc_per_node=${num_gpu} tools/relation_train_net.py --config-file "configs/${model_config}.yaml" \ 15 | SOLVER.IMS_PER_BATCH 18 \ 16 | TEST.IMS_PER_BATCH 12 \ 17 | OUTPUT_DIR ${output_dir} \ 18 | MODEL.ROI_RELATION_HEAD.USE_GT_OBJECT_LABEL False \ 19 | MODEL.ROI_RELATION_HEAD.USE_GT_BOX False \ 20 | MODEL.ROI_RELATION_HEAD.REL_OBJ_MULTI_TASK_LOSS ${use_obj_refine} \ 21 | MODEL.ROI_RELATION_HEAD.OBJECT_CLASSIFICATION_REFINE ${use_obj_refine} \ 22 | MODEL.PRETRAINED_DETECTOR_CKPT ${path_faster_rcnn} 23 | else 24 | # Single GPU 25 | python tools/relation_train_net.py --config-file "configs/${model_config}.yaml" \ 26 | SOLVER.IMS_PER_BATCH 9 \ 27 | TEST.IMS_PER_BATCH 6 \ 28 | OUTPUT_DIR ${output_dir} \ 29 | MODEL.ROI_RELATION_HEAD.USE_GT_OBJECT_LABEL False \ 30 | MODEL.ROI_RELATION_HEAD.USE_GT_BOX False \ 31 | MODEL.ROI_RELATION_HEAD.REL_OBJ_MULTI_TASK_LOSS ${use_obj_refine} \ 32 | MODEL.ROI_RELATION_HEAD.OBJECT_CLASSIFICATION_REFINE ${use_obj_refine} \ 33 | MODEL.PRETRAINED_DETECTOR_CKPT ${path_faster_rcnn} 34 | fi 35 | -------------------------------------------------------------------------------- /shell/hetsgg_train_sgcls_vg.sh: -------------------------------------------------------------------------------- 1 | export CUDA_VISIBLE_DEVICES="0" 2 | export num_gpu=1 3 | export use_multi_gpu=false 4 | export use_obj_refine=True 5 | export task='sgcls' 6 | 7 | export model_config="relHetSGG_vg" # relHetSGG_vg, relHetSGGp_vg 8 | export output_dir="checkpoints/${task}-HetSGGPredictor-vg" 9 | 10 | export path_faster_rcnn='' 11 | 12 | 13 | if $use_multi_gpu;then 14 | # Multi GPU -sgcls Task 15 | python -m torch.distributed.launch --master_port 10032 --nproc_per_node=$num_gpu tools/relation_train_net.py --config-file "configs/${model_config}.yaml" \ 16 | SOLVER.IMS_PER_BATCH 9 \ 17 | TEST.IMS_PER_BATCH 6 \ 18 | OUTPUT_DIR ${output_dir} \ 19 | MODEL.ROI_RELATION_HEAD.USE_GT_OBJECT_LABEL False \ 20 | MODEL.ROI_RELATION_HEAD.USE_GT_BOX True \ 21 | MODEL.ROI_RELATION_HEAD.REL_OBJ_MULTI_TASK_LOSS ${use_obj_refine} \ 22 | MODEL.ROI_RELATION_HEAD.OBJECT_CLASSIFICATION_REFINE ${use_obj_refine} \ 23 | MODEL.PRETRAINED_DETECTOR_CKPT ${path_faster_rcnn} 24 | else 25 | # Single GPU 26 | python tools/relation_train_net.py --config-file "configs/${model_config}.yaml" \ 27 | SOLVER.IMS_PER_BATCH 9 \ 28 | TEST.IMS_PER_BATCH 6 \ 29 | OUTPUT_DIR ${output_dir} \ 30 | MODEL.ROI_RELATION_HEAD.USE_GT_OBJECT_LABEL False \ 31 | MODEL.ROI_RELATION_HEAD.USE_GT_BOX True \ 32 | MODEL.ROI_RELATION_HEAD.REL_OBJ_MULTI_TASK_LOSS ${use_obj_refine} \ 33 | MODEL.ROI_RELATION_HEAD.OBJECT_CLASSIFICATION_REFINE ${use_obj_refine} \ 34 | MODEL.PRETRAINED_DETECTOR_CKPT ${path_faster_rcnn} 35 | fi -------------------------------------------------------------------------------- /hetsgg/layers/__init__.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from .batch_norm import FrozenBatchNorm2d 4 | from .misc import Conv2d 5 | from .misc import DFConv2d 6 | from .misc import ConvTranspose2d 7 | from .misc import BatchNorm2d 8 | from .misc import interpolate 9 | from .nms import nms 10 | from .roi_align import ROIAlign 11 | from .roi_align import roi_align 12 | from .roi_pool import ROIPool 13 | from .roi_pool import roi_pool 14 | from .entropy_loss import entropy_loss 15 | from .kl_div_loss import kl_div_loss 16 | from .smooth_l1_loss import smooth_l1_loss 17 | from .sigmoid_focal_loss import SigmoidFocalLoss 18 | from .label_smoothing_loss import Label_Smoothing_Regression 19 | from .dcn.deform_conv_func import deform_conv, modulated_deform_conv 20 | from .dcn.deform_conv_module import DeformConv, ModulatedDeformConv, ModulatedDeformConvPack 21 | from .dcn.deform_pool_func import deform_roi_pooling 22 | from .dcn.deform_pool_module import DeformRoIPooling, DeformRoIPoolingPack, ModulatedDeformRoIPoolingPack 23 | 24 | 25 | __all__ = [ 26 | "nms", 27 | "roi_align", 28 | "ROIAlign", 29 | "roi_pool", 30 | "ROIPool", 31 | "smooth_l1_loss", 32 | "entropy_loss", 33 | "kl_div_loss", 34 | "Conv2d", 35 | "DFConv2d", 36 | "ConvTranspose2d", 37 | "interpolate", 38 | "BatchNorm2d", 39 | "FrozenBatchNorm2d", 40 | "SigmoidFocalLoss", 41 | "Label_Smoothing_Regression", 42 | 'deform_conv', 43 | 'modulated_deform_conv', 44 | 'DeformConv', 45 | 'ModulatedDeformConv', 46 | 'ModulatedDeformConvPack', 47 | 'deform_roi_pooling', 48 | 'DeformRoIPooling', 49 | 'DeformRoIPoolingPack', 50 | 'ModulatedDeformRoIPoolingPack', 51 | ] 52 | 53 | -------------------------------------------------------------------------------- /hetsgg/csrc/ROIPool.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "cpu/vision.h" 4 | 5 | #ifdef WITH_CUDA 6 | #include "cuda/vision.h" 7 | #endif 8 | 9 | 10 | std::tuple ROIPool_forward(const at::Tensor& input, 11 | const at::Tensor& rois, 12 | const float spatial_scale, 13 | const int pooled_height, 14 | const int pooled_width) { 15 | if (input.type().is_cuda()) { 16 | #ifdef WITH_CUDA 17 | return ROIPool_forward_cuda(input, rois, spatial_scale, pooled_height, pooled_width); 18 | #else 19 | AT_ERROR("Not compiled with GPU support"); 20 | #endif 21 | } 22 | AT_ERROR("Not implemented on the CPU"); 23 | } 24 | 25 | at::Tensor ROIPool_backward(const at::Tensor& grad, 26 | const at::Tensor& input, 27 | const at::Tensor& rois, 28 | const at::Tensor& argmax, 29 | const float spatial_scale, 30 | const int pooled_height, 31 | const int pooled_width, 32 | const int batch_size, 33 | const int channels, 34 | const int height, 35 | const int width) { 36 | if (grad.type().is_cuda()) { 37 | #ifdef WITH_CUDA 38 | return ROIPool_backward_cuda(grad, input, rois, argmax, spatial_scale, pooled_height, pooled_width, batch_size, channels, height, width); 39 | #else 40 | AT_ERROR("Not compiled with GPU support"); 41 | #endif 42 | } 43 | AT_ERROR("Not implemented on the CPU"); 44 | } 45 | 46 | 47 | 48 | -------------------------------------------------------------------------------- /hetsgg/csrc/ROIAlign.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "cpu/vision.h" 4 | 5 | #ifdef WITH_CUDA 6 | #include "cuda/vision.h" 7 | #endif 8 | 9 | // Interface for Python 10 | at::Tensor ROIAlign_forward(const at::Tensor& input, 11 | const at::Tensor& rois, 12 | const float spatial_scale, 13 | const int pooled_height, 14 | const int pooled_width, 15 | const int sampling_ratio) { 16 | if (input.type().is_cuda()) { 17 | #ifdef WITH_CUDA 18 | return ROIAlign_forward_cuda(input, rois, spatial_scale, pooled_height, pooled_width, sampling_ratio); 19 | #else 20 | AT_ERROR("Not compiled with GPU support"); 21 | #endif 22 | } 23 | return ROIAlign_forward_cpu(input, rois, spatial_scale, pooled_height, pooled_width, sampling_ratio); 24 | } 25 | 26 | at::Tensor ROIAlign_backward(const at::Tensor& grad, 27 | const at::Tensor& rois, 28 | const float spatial_scale, 29 | const int pooled_height, 30 | const int pooled_width, 31 | const int batch_size, 32 | const int channels, 33 | const int height, 34 | const int width, 35 | const int sampling_ratio) { 36 | if (grad.type().is_cuda()) { 37 | #ifdef WITH_CUDA 38 | return ROIAlign_backward_cuda(grad, rois, spatial_scale, pooled_height, pooled_width, batch_size, channels, height, width, sampling_ratio); 39 | #else 40 | AT_ERROR("Not compiled with GPU support"); 41 | #endif 42 | } 43 | AT_ERROR("Not implemented on the CPU"); 44 | } 45 | 46 | -------------------------------------------------------------------------------- /hetsgg/utils/global_buffer.py: -------------------------------------------------------------------------------- 1 | from collections import defaultdict 2 | import pickle 3 | import torch 4 | import os 5 | 6 | from hetsgg.utils.comm import is_main_process, get_world_size, all_gather, synchronize 7 | from hetsgg.config import cfg 8 | 9 | def singleton(cls): 10 | _instance = {} 11 | 12 | def inner(): 13 | if cls not in _instance: 14 | _instance[cls] = cls() 15 | return _instance[cls] 16 | return inner 17 | 18 | 19 | @singleton 20 | class _GlobalBuffer(): 21 | """a singleton buffer for store data in anywhere of program 22 | """ 23 | def __init__(self ): 24 | self.multi_proc = (get_world_size() > 1) 25 | self.data = defaultdict(list) 26 | 27 | def add_data(self, key, val): 28 | if not isinstance(val, torch.Tensor): 29 | val = torch.Tensor(val) 30 | else: 31 | val = val.detach() 32 | 33 | val = torch.cat(all_gather(val)) 34 | 35 | if not is_main_process(): 36 | del val 37 | return 38 | self.data[key].append(val.cpu().numpy()) 39 | 40 | def __str__(self): 41 | ret_str = f"Buffer contains data: (key, value type)\n" 42 | for k, v in self.data.items(): 43 | ret_str += f" {k}, {type(v).__name__}\n" 44 | ret_str += f"id {id(self)}" 45 | return ret_str 46 | 47 | 48 | def store_data(k, v, ): 49 | if cfg.GLOBAL_BUFFER_ON: 50 | buffer = _GlobalBuffer() 51 | buffer.add_data(k, v) 52 | synchronize() 53 | 54 | 55 | def save_buffer(output_dir): 56 | if cfg.GLOBAL_BUFFER_ON: 57 | if is_main_process(): 58 | buffer = _GlobalBuffer() 59 | with open(os.path.join(output_dir, "inter_data_buffer.pkl"), 'wb') as f: 60 | pickle.dump(buffer.data, f) 61 | 62 | print("save buffer:", str(buffer)) 63 | -------------------------------------------------------------------------------- /hetsgg/modeling/roi_heads/attribute_head/roi_attribute_predictors.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | 3 | from hetsgg.modeling import registry 4 | 5 | 6 | @registry.ROI_ATTRIBUTE_PREDICTOR.register("FastRCNNPredictor") 7 | class FastRCNNPredictor(nn.Module): 8 | def __init__(self, cfg, in_channels): 9 | super(FastRCNNPredictor, self).__init__() 10 | assert in_channels is not None 11 | num_inputs = in_channels 12 | 13 | num_attributes = cfg.MODEL.ROI_ATTRIBUTE_HEAD.NUM_ATTRIBUTES 14 | self.avgpool = nn.AdaptiveAvgPool2d(1) 15 | self.att_score = nn.Linear(num_inputs, num_attributes) 16 | 17 | nn.init.normal_(self.att_score.weight, mean=0, std=0.01) 18 | nn.init.constant_(self.att_score.bias, 0) 19 | 20 | def forward(self, x): 21 | x = self.avgpool(x) 22 | x = x.view(x.size(0), -1) 23 | att_logit = self.att_score(x) 24 | 25 | return att_logit 26 | 27 | 28 | @registry.ROI_ATTRIBUTE_PREDICTOR.register("FPNPredictor") 29 | class FPNPredictor(nn.Module): 30 | def __init__(self, cfg, in_channels): 31 | super(FPNPredictor, self).__init__() 32 | num_attributes = cfg.MODEL.ROI_ATTRIBUTE_HEAD.NUM_ATTRIBUTES 33 | representation_size = in_channels 34 | 35 | self.att_score = nn.Linear(representation_size, num_attributes) 36 | 37 | nn.init.normal_(self.att_score.weight, std=0.01) 38 | nn.init.constant_(self.att_score.bias, 0) 39 | 40 | def forward(self, x): 41 | if x.ndimension() == 4: 42 | assert list(x.shape[2:]) == [1, 1] 43 | x = x.view(x.size(0), -1) 44 | 45 | att_logit = self.att_score(x) 46 | 47 | return att_logit 48 | 49 | 50 | def make_roi_attribute_predictor(cfg, in_channels): 51 | func = registry.ROI_ATTRIBUTE_PREDICTOR[cfg.MODEL.ROI_ATTRIBUTE_HEAD.PREDICTOR] 52 | return func(cfg, in_channels) 53 | -------------------------------------------------------------------------------- /hetsgg/utils/metric_logger.py: -------------------------------------------------------------------------------- 1 | from collections import defaultdict 2 | from collections import deque 3 | 4 | import torch 5 | 6 | 7 | class SmoothedValue(object): 8 | 9 | def __init__(self, window_size=20): 10 | self.deque = deque(maxlen=window_size) 11 | self.series = [] 12 | self.total = 0.0 13 | self.count = 0 14 | 15 | def update(self, value): 16 | self.deque.append(value) 17 | self.series.append(value) 18 | self.count += 1 19 | self.total += value 20 | 21 | @property 22 | def median(self): 23 | d = torch.tensor(list(self.deque)) 24 | return d.median().item() 25 | 26 | @property 27 | def avg(self): 28 | d = torch.tensor(list(self.deque)) 29 | return d.mean().item() 30 | 31 | @property 32 | def global_avg(self): 33 | return self.total / self.count 34 | 35 | 36 | class MetricLogger(object): 37 | def __init__(self, delimiter="\t"): 38 | self.meters = defaultdict(SmoothedValue) 39 | self.delimiter = delimiter 40 | 41 | def update(self, **kwargs): 42 | for k, v in kwargs.items(): 43 | if isinstance(v, torch.Tensor): 44 | v = v.item() 45 | assert isinstance(v, (float, int)) 46 | self.meters[k].update(v) 47 | 48 | def __getattr__(self, attr): 49 | if attr in self.meters: 50 | return self.meters[attr] 51 | if attr in self.__dict__: 52 | return self.__dict__[attr] 53 | raise AttributeError("'{}' object has no attribute '{}'".format( 54 | type(self).__name__, attr)) 55 | 56 | def __str__(self): 57 | loss_str = [] 58 | for name, meter in self.meters.items(): 59 | loss_str.append( 60 | "{}: {:.4f} ({:.4f})\n".format(name, meter.median, meter.global_avg) 61 | ) 62 | return self.delimiter.join(loss_str) 63 | -------------------------------------------------------------------------------- /hetsgg/csrc/deform_pool.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "cpu/vision.h" 3 | 4 | #ifdef WITH_CUDA 5 | #include "cuda/vision.h" 6 | #endif 7 | 8 | 9 | // Interface for Python 10 | void deform_psroi_pooling_forward( 11 | at::Tensor input, 12 | at::Tensor bbox, 13 | at::Tensor trans, 14 | at::Tensor out, 15 | at::Tensor top_count, 16 | const int no_trans, 17 | const float spatial_scale, 18 | const int output_dim, 19 | const int group_size, 20 | const int pooled_size, 21 | const int part_size, 22 | const int sample_per_part, 23 | const float trans_std) 24 | { 25 | if (input.type().is_cuda()) { 26 | #ifdef WITH_CUDA 27 | return deform_psroi_pooling_cuda_forward( 28 | input, bbox, trans, out, top_count, 29 | no_trans, spatial_scale, output_dim, group_size, 30 | pooled_size, part_size, sample_per_part, trans_std 31 | ); 32 | #else 33 | AT_ERROR("Not compiled with GPU support"); 34 | #endif 35 | } 36 | AT_ERROR("Not implemented on the CPU"); 37 | } 38 | 39 | 40 | void deform_psroi_pooling_backward( 41 | at::Tensor out_grad, 42 | at::Tensor input, 43 | at::Tensor bbox, 44 | at::Tensor trans, 45 | at::Tensor top_count, 46 | at::Tensor input_grad, 47 | at::Tensor trans_grad, 48 | const int no_trans, 49 | const float spatial_scale, 50 | const int output_dim, 51 | const int group_size, 52 | const int pooled_size, 53 | const int part_size, 54 | const int sample_per_part, 55 | const float trans_std) 56 | { 57 | if (input.type().is_cuda()) { 58 | #ifdef WITH_CUDA 59 | return deform_psroi_pooling_cuda_backward( 60 | out_grad, input, bbox, trans, top_count, input_grad, trans_grad, 61 | no_trans, spatial_scale, output_dim, group_size, pooled_size, 62 | part_size, sample_per_part, trans_std 63 | ); 64 | #else 65 | AT_ERROR("Not compiled with GPU support"); 66 | #endif 67 | } 68 | AT_ERROR("Not implemented on the CPU"); 69 | } 70 | -------------------------------------------------------------------------------- /shell/hetsgg_train_predcls_vg.sh: -------------------------------------------------------------------------------- 1 | export CUDA_VISIBLE_DEVICES="6" 2 | export num_gpu=1 3 | export use_multi_gpu=false 4 | export use_obj_refine=False 5 | export task='predcls' 6 | 7 | export REPEAT_FACTOR=0.13 8 | export INSTANCE_DROP_RATE=1.6 9 | 10 | export model_config="relHetSGGp_vg" # relHetSGG_vg, relHetSGGp_vg 11 | export output_dir="checkpoints/${task}-HetSGGPredictor-vg" 12 | 13 | # export path_faster_rcnn='' # Put faster r-cnn path 14 | 15 | if $use_multi_gpu;then 16 | python -m torch.distributed.launch --master_port 10029 --nproc_per_node=${num_gpu} tools/relation_train_net.py \ 17 | --config-file "configs/${model_config}.yaml" \ 18 | SOLVER.IMS_PER_BATCH 18 \ 19 | TEST.IMS_PER_BATCH 12 \ 20 | OUTPUT_DIR ${output_dir} \ 21 | MODEL.ROI_RELATION_HEAD.USE_GT_OBJECT_LABEL True \ 22 | MODEL.ROI_RELATION_HEAD.USE_GT_BOX True \ 23 | MODEL.ROI_RELATION_HEAD.REL_OBJ_MULTI_TASK_LOSS ${use_obj_refine} \ 24 | MODEL.ROI_RELATION_HEAD.OBJECT_CLASSIFICATION_REFINE ${use_obj_refine} \ 25 | MODEL.ROI_RELATION_HEAD.DATA_RESAMPLING_PARAM.REPEAT_FACTOR ${REPEAT_FACTOR} \ 26 | MODEL.ROI_RELATION_HEAD.DATA_RESAMPLING_PARAM.INSTANCE_DROP_RATE ${INSTANCE_DROP_RATE} \ 27 | MODEL.PRETRAINED_DETECTOR_CKPT ${path_faster_rcnn} 28 | else 29 | python tools/relation_train_net.py --config-file "configs/${model_config}.yaml" \ 30 | SOLVER.IMS_PER_BATCH 9 \ 31 | TEST.IMS_PER_BATCH 6 \ 32 | OUTPUT_DIR ${output_dir} \ 33 | MODEL.ROI_RELATION_HEAD.USE_GT_OBJECT_LABEL True \ 34 | MODEL.ROI_RELATION_HEAD.USE_GT_BOX True \ 35 | MODEL.ROI_RELATION_HEAD.REL_OBJ_MULTI_TASK_LOSS ${use_obj_refine} \ 36 | MODEL.ROI_RELATION_HEAD.OBJECT_CLASSIFICATION_REFINE ${use_obj_refine} \ 37 | MODEL.ROI_RELATION_HEAD.DATA_RESAMPLING_PARAM.REPEAT_FACTOR ${REPEAT_FACTOR} \ 38 | MODEL.ROI_RELATION_HEAD.DATA_RESAMPLING_PARAM.INSTANCE_DROP_RATE ${INSTANCE_DROP_RATE} 39 | # MODEL.PRETRAINED_DETECTOR_CKPT ${path_faster_rcnn} 40 | fi 41 | -------------------------------------------------------------------------------- /hetsgg/layers/roi_pool.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | from torch.autograd import Function 4 | from torch.autograd.function import once_differentiable 5 | from torch.nn.modules.utils import _pair 6 | 7 | from hetsgg import _C 8 | 9 | from apex import amp 10 | 11 | class _ROIPool(Function): 12 | @staticmethod 13 | def forward(ctx, input, roi, output_size, spatial_scale): 14 | ctx.output_size = _pair(output_size) 15 | ctx.spatial_scale = spatial_scale 16 | ctx.input_shape = input.size() 17 | output, argmax = _C.roi_pool_forward( 18 | input, roi, spatial_scale, output_size[0], output_size[1] 19 | ) 20 | ctx.save_for_backward(input, roi, argmax) 21 | return output 22 | 23 | @staticmethod 24 | @once_differentiable 25 | def backward(ctx, grad_output): 26 | input, rois, argmax = ctx.saved_tensors 27 | output_size = ctx.output_size 28 | spatial_scale = ctx.spatial_scale 29 | bs, ch, h, w = ctx.input_shape 30 | grad_input = _C.roi_pool_backward( 31 | grad_output, 32 | input, 33 | rois, 34 | argmax, 35 | spatial_scale, 36 | output_size[0], 37 | output_size[1], 38 | bs, 39 | ch, 40 | h, 41 | w, 42 | ) 43 | return grad_input, None, None, None 44 | 45 | 46 | roi_pool = _ROIPool.apply 47 | 48 | 49 | class ROIPool(nn.Module): 50 | def __init__(self, output_size, spatial_scale): 51 | super(ROIPool, self).__init__() 52 | self.output_size = output_size 53 | self.spatial_scale = spatial_scale 54 | 55 | @amp.float_function 56 | def forward(self, input, rois): 57 | return roi_pool(input, rois, self.output_size, self.spatial_scale) 58 | 59 | def __repr__(self): 60 | tmpstr = self.__class__.__name__ + "(" 61 | tmpstr += "output_size=" + str(self.output_size) 62 | tmpstr += ", spatial_scale=" + str(self.spatial_scale) 63 | tmpstr += ")" 64 | return tmpstr 65 | -------------------------------------------------------------------------------- /hetsgg/modeling/roi_heads/keypoint_head/roi_keypoint_feature_extractors.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | from torch.nn import functional as F 3 | 4 | from hetsgg.modeling import registry 5 | from hetsgg.modeling.poolers import Pooler 6 | 7 | from hetsgg.layers import Conv2d 8 | 9 | 10 | @registry.ROI_KEYPOINT_FEATURE_EXTRACTORS.register("KeypointRCNNFeatureExtractor") 11 | class KeypointRCNNFeatureExtractor(nn.Module): 12 | def __init__(self, cfg, in_channels): 13 | super(KeypointRCNNFeatureExtractor, self).__init__() 14 | 15 | resolution = cfg.MODEL.ROI_KEYPOINT_HEAD.POOLER_RESOLUTION 16 | scales = cfg.MODEL.ROI_KEYPOINT_HEAD.POOLER_SCALES 17 | sampling_ratio = cfg.MODEL.ROI_KEYPOINT_HEAD.POOLER_SAMPLING_RATIO 18 | pooler = Pooler( 19 | output_size=(resolution, resolution), 20 | scales=scales, 21 | sampling_ratio=sampling_ratio, 22 | ) 23 | self.pooler = pooler 24 | 25 | input_features = in_channels 26 | layers = cfg.MODEL.ROI_KEYPOINT_HEAD.CONV_LAYERS 27 | next_feature = input_features 28 | self.blocks = [] 29 | for layer_idx, layer_features in enumerate(layers, 1): 30 | layer_name = "conv_fcn{}".format(layer_idx) 31 | module = Conv2d(next_feature, layer_features, 3, stride=1, padding=1) 32 | nn.init.kaiming_normal_(module.weight, mode="fan_out", nonlinearity="relu") 33 | nn.init.constant_(module.bias, 0) 34 | self.add_module(layer_name, module) 35 | next_feature = layer_features 36 | self.blocks.append(layer_name) 37 | self.out_channels = layer_features 38 | 39 | def forward(self, x, proposals): 40 | x = self.pooler(x, proposals) 41 | for layer_name in self.blocks: 42 | x = F.relu(getattr(self, layer_name)(x)) 43 | return x 44 | 45 | 46 | def make_roi_keypoint_feature_extractor(cfg, in_channels): 47 | func = registry.ROI_KEYPOINT_FEATURE_EXTRACTORS[ 48 | cfg.MODEL.ROI_KEYPOINT_HEAD.FEATURE_EXTRACTOR 49 | ] 50 | return func(cfg, in_channels) 51 | -------------------------------------------------------------------------------- /hetsgg/modeling/roi_heads/mask_head/roi_mask_predictors.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | from torch.nn import functional as F 3 | 4 | from hetsgg.layers import Conv2d 5 | from hetsgg.layers import ConvTranspose2d 6 | from hetsgg.modeling import registry 7 | 8 | 9 | @registry.ROI_MASK_PREDICTOR.register("MaskRCNNC4Predictor") 10 | class MaskRCNNC4Predictor(nn.Module): 11 | def __init__(self, cfg, in_channels): 12 | super(MaskRCNNC4Predictor, self).__init__() 13 | num_classes = cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES 14 | dim_reduced = cfg.MODEL.ROI_MASK_HEAD.CONV_LAYERS[-1] 15 | num_inputs = in_channels 16 | 17 | self.conv5_mask = ConvTranspose2d(num_inputs, dim_reduced, 2, 2, 0) 18 | self.mask_fcn_logits = Conv2d(dim_reduced, num_classes, 1, 1, 0) 19 | 20 | for name, param in self.named_parameters(): 21 | if "bias" in name: 22 | nn.init.constant_(param, 0) 23 | elif "weight" in name: 24 | 25 | nn.init.kaiming_normal_(param, mode="fan_out", nonlinearity="relu") 26 | 27 | def forward(self, x): 28 | x = F.relu(self.conv5_mask(x)) 29 | return self.mask_fcn_logits(x) 30 | 31 | 32 | @registry.ROI_MASK_PREDICTOR.register("MaskRCNNConv1x1Predictor") 33 | class MaskRCNNConv1x1Predictor(nn.Module): 34 | def __init__(self, cfg, in_channels): 35 | super(MaskRCNNConv1x1Predictor, self).__init__() 36 | num_classes = cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES 37 | num_inputs = in_channels 38 | 39 | self.mask_fcn_logits = Conv2d(num_inputs, num_classes, 1, 1, 0) 40 | 41 | for name, param in self.named_parameters(): 42 | if "bias" in name: 43 | nn.init.constant_(param, 0) 44 | elif "weight" in name: 45 | 46 | nn.init.kaiming_normal_(param, mode="fan_out", nonlinearity="relu") 47 | 48 | def forward(self, x): 49 | return self.mask_fcn_logits(x) 50 | 51 | 52 | def make_roi_mask_predictor(cfg, in_channels): 53 | func = registry.ROI_MASK_PREDICTOR[cfg.MODEL.ROI_MASK_HEAD.PREDICTOR] 54 | return func(cfg, in_channels) 55 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | 2 | import glob 3 | import os 4 | 5 | import torch 6 | from setuptools import find_packages 7 | from setuptools import setup 8 | from torch.utils.cpp_extension import CUDA_HOME 9 | from torch.utils.cpp_extension import CppExtension 10 | from torch.utils.cpp_extension import CUDAExtension 11 | 12 | requirements = ["torch", "torchvision"] 13 | 14 | 15 | def get_extensions(): 16 | this_dir = os.path.dirname(os.path.abspath(__file__)) 17 | extensions_dir = os.path.join(this_dir, "hetsgg", "csrc") 18 | 19 | main_file = glob.glob(os.path.join(extensions_dir, "*.cpp")) 20 | source_cpu = glob.glob(os.path.join(extensions_dir, "cpu", "*.cpp")) 21 | source_cuda = glob.glob(os.path.join(extensions_dir, "cuda", "*.cu")) 22 | 23 | sources = main_file + source_cpu 24 | extension = CppExtension 25 | 26 | extra_compile_args = {"cxx": []} 27 | define_macros = [] 28 | 29 | if (torch.cuda.is_available() and CUDA_HOME is not None) or os.getenv("FORCE_CUDA", "0") == "1": 30 | extension = CUDAExtension 31 | sources += source_cuda 32 | define_macros += [("WITH_CUDA", None)] 33 | extra_compile_args["nvcc"] = [ 34 | "-DCUDA_HAS_FP16=1", 35 | "-D__CUDA_NO_HALF_OPERATORS__", 36 | "-D__CUDA_NO_HALF_CONVERSIONS__", 37 | "-D__CUDA_NO_HALF2_OPERATORS__", 38 | ] 39 | 40 | sources = [os.path.join(extensions_dir, s) for s in sources] 41 | 42 | include_dirs = [extensions_dir] 43 | 44 | ext_modules = [ 45 | extension( 46 | "hetsgg._C", 47 | sources, 48 | include_dirs=include_dirs, 49 | define_macros=define_macros, 50 | extra_compile_args=extra_compile_args, 51 | ) 52 | ] 53 | 54 | return ext_modules 55 | 56 | 57 | setup( 58 | name="hetsgg", 59 | version="0.1", 60 | author="Anonymous", 61 | url="", 62 | description="A Toolkit for Scene Graph Generation", 63 | packages=find_packages(exclude=("configs", "tests",)), 64 | # install_requires=requirements, 65 | ext_modules=get_extensions(), 66 | cmdclass={"build_ext": torch.utils.cpp_extension.BuildExtension}, 67 | ) 68 | -------------------------------------------------------------------------------- /hetsgg/modeling/roi_heads/keypoint_head/keypoint_head.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from .roi_keypoint_feature_extractors import make_roi_keypoint_feature_extractor 4 | from .roi_keypoint_predictors import make_roi_keypoint_predictor 5 | from .inference import make_roi_keypoint_post_processor 6 | from .loss import make_roi_keypoint_loss_evaluator 7 | 8 | 9 | class ROIKeypointHead(torch.nn.Module): 10 | def __init__(self, cfg, in_channels): 11 | super(ROIKeypointHead, self).__init__() 12 | self.cfg = cfg.clone() 13 | self.feature_extractor = make_roi_keypoint_feature_extractor(cfg, in_channels) 14 | self.predictor = make_roi_keypoint_predictor( 15 | cfg, self.feature_extractor.out_channels) 16 | self.post_processor = make_roi_keypoint_post_processor(cfg) 17 | self.loss_evaluator = make_roi_keypoint_loss_evaluator(cfg) 18 | 19 | def forward(self, features, proposals, targets=None): 20 | """ 21 | Arguments: 22 | features (list[Tensor]): feature-maps from possibly several levels 23 | proposals (list[BoxList]): proposal boxes 24 | targets (list[BoxList], optional): the ground-truth targets. 25 | 26 | Returns: 27 | x (Tensor): the result of the feature extractor 28 | proposals (list[BoxList]): during training, the original proposals 29 | are returned. During testing, the predicted boxlists are returned 30 | with the `mask` field set 31 | losses (dict[Tensor]): During training, returns the losses for the 32 | head. During testing, returns an empty dict. 33 | """ 34 | if self.training: 35 | with torch.no_grad(): 36 | proposals = self.loss_evaluator.subsample(proposals, targets) 37 | 38 | x = self.feature_extractor(features, proposals) 39 | kp_logits = self.predictor(x) 40 | 41 | if not self.training: 42 | result = self.post_processor(kp_logits, proposals) 43 | return x, result, {} 44 | 45 | loss_kp = self.loss_evaluator(proposals, kp_logits) 46 | 47 | return x, proposals, dict(loss_kp=loss_kp) 48 | 49 | 50 | def build_roi_keypoint_head(cfg, in_channels): 51 | return ROIKeypointHead(cfg, in_channels) 52 | -------------------------------------------------------------------------------- /hetsgg/modeling/detector/generalized_rcnn.py: -------------------------------------------------------------------------------- 1 | 2 | import torch 3 | from torch import nn 4 | 5 | from hetsgg.structures.image_list import to_image_list 6 | 7 | from ..backbone import build_backbone 8 | from ..rpn.rpn import build_rpn 9 | from ..roi_heads.roi_heads import build_roi_heads 10 | 11 | 12 | class GeneralizedRCNN(nn.Module): 13 | 14 | 15 | def __init__(self, cfg): 16 | super(GeneralizedRCNN, self).__init__() 17 | self.cfg = cfg.clone() 18 | self.backbone = build_backbone(cfg) # ResNet 19 | self.rpn = build_rpn(cfg, self.backbone.out_channels) # 256 20 | self.roi_heads = build_roi_heads(cfg, self.backbone.out_channels) 21 | 22 | def forward(self, images, targets=None, logger=None): 23 | """ 24 | Arguments: 25 | images (list[Tensor] or ImageList): images to be processed 26 | targets (list[BoxList]): ground-truth boxes present in the image (optional) 27 | 28 | Returns: 29 | result (list[BoxList] or dict[Tensor]): the output from the model. 30 | During training, it returns a dict[Tensor] which contains the losses. 31 | During testing, it returns list[BoxList] contains additional fields 32 | like `scores`, `labels` and `mask` (for Mask R-CNN models). 33 | 34 | """ 35 | if self.training and targets is None: 36 | raise ValueError("In training mode, targets should be passed") 37 | images = to_image_list(images) 38 | features = self.backbone(images.tensors) 39 | proposals, proposal_losses = self.rpn(images, features, targets) 40 | if self.roi_heads: 41 | x, result, detector_losses = self.roi_heads(features, proposals, targets, logger) 42 | else: 43 | # RPN-only models don't have roi_heads 44 | x = features 45 | result = proposals 46 | detector_losses = {} 47 | 48 | if self.training: # True 49 | losses = {} 50 | losses.update(detector_losses) 51 | if not self.cfg.MODEL.RELATION_ON: # True 52 | # During the relationship training stage, the rpn_head should be fixed, and no loss. 53 | losses.update(proposal_losses) 54 | return losses 55 | 56 | return result 57 | -------------------------------------------------------------------------------- /hetsgg/layers/roi_align.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | from torch.autograd import Function 4 | from torch.autograd.function import once_differentiable 5 | from torch.nn.modules.utils import _pair 6 | 7 | from hetsgg import _C 8 | 9 | from apex import amp 10 | 11 | class _ROIAlign(Function): 12 | @staticmethod 13 | def forward(ctx, input, roi, output_size, spatial_scale, sampling_ratio): 14 | ctx.save_for_backward(roi) 15 | ctx.output_size = _pair(output_size) 16 | ctx.spatial_scale = spatial_scale 17 | ctx.sampling_ratio = sampling_ratio 18 | ctx.input_shape = input.size() 19 | output = _C.roi_align_forward( 20 | input, roi, spatial_scale, output_size[0], output_size[1], sampling_ratio 21 | ) 22 | return output 23 | 24 | @staticmethod 25 | @once_differentiable 26 | def backward(ctx, grad_output): 27 | rois, = ctx.saved_tensors 28 | output_size = ctx.output_size 29 | spatial_scale = ctx.spatial_scale 30 | sampling_ratio = ctx.sampling_ratio 31 | bs, ch, h, w = ctx.input_shape 32 | grad_input = _C.roi_align_backward( 33 | grad_output, 34 | rois, 35 | spatial_scale, 36 | output_size[0], 37 | output_size[1], 38 | bs, 39 | ch, 40 | h, 41 | w, 42 | sampling_ratio, 43 | ) 44 | return grad_input, None, None, None, None 45 | 46 | 47 | roi_align = _ROIAlign.apply 48 | 49 | class ROIAlign(nn.Module): 50 | def __init__(self, output_size, spatial_scale, sampling_ratio): 51 | super(ROIAlign, self).__init__() 52 | self.output_size = output_size 53 | self.spatial_scale = spatial_scale 54 | self.sampling_ratio = sampling_ratio 55 | 56 | @amp.float_function 57 | def forward(self, input, rois): 58 | return roi_align( 59 | input, rois, self.output_size, self.spatial_scale, self.sampling_ratio 60 | ) 61 | 62 | def __repr__(self): 63 | tmpstr = self.__class__.__name__ + "(" 64 | tmpstr += "output_size=" + str(self.output_size) 65 | tmpstr += ", spatial_scale=" + str(self.spatial_scale) 66 | tmpstr += ", sampling_ratio=" + str(self.sampling_ratio) 67 | tmpstr += ")" 68 | return tmpstr 69 | -------------------------------------------------------------------------------- /tools/cityscapes/instances2dict_with_polygons.py: -------------------------------------------------------------------------------- 1 | 2 | from __future__ import print_function, absolute_import, division 3 | import os, sys 4 | 5 | sys.path.append( os.path.normpath( os.path.join( os.path.dirname( __file__ ) , '..' , 'helpers' ) ) ) 6 | from csHelpers import * 7 | 8 | from cityscapesscripts.evaluation.instance import * 9 | from cityscapesscripts.helpers.csHelpers import * 10 | import cv2 11 | from hetsgg.utils import cv2_util 12 | 13 | 14 | def instances2dict_with_polygons(imageFileList, verbose=False): 15 | imgCount = 0 16 | instanceDict = {} 17 | 18 | if not isinstance(imageFileList, list): 19 | imageFileList = [imageFileList] 20 | 21 | if verbose: 22 | print("Processing {} images...".format(len(imageFileList))) 23 | 24 | for imageFileName in imageFileList: 25 | img = Image.open(imageFileName) 26 | 27 | imgNp = np.array(img) 28 | 29 | instances = {} 30 | for label in labels: 31 | instances[label.name] = [] 32 | 33 | for instanceId in np.unique(imgNp): 34 | if instanceId < 1000: 35 | continue 36 | instanceObj = Instance(imgNp, instanceId) 37 | instanceObj_dict = instanceObj.toDict() 38 | 39 | if id2label[instanceObj.labelID].hasInstances: 40 | mask = (imgNp == instanceId).astype(np.uint8) 41 | contour, hier = cv2_util.findContours( 42 | mask.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE) 43 | 44 | polygons = [c.reshape(-1).tolist() for c in contour] 45 | instanceObj_dict['contours'] = polygons 46 | 47 | instances[id2label[instanceObj.labelID].name].append(instanceObj_dict) 48 | 49 | imgKey = os.path.abspath(imageFileName) 50 | instanceDict[imgKey] = instances 51 | imgCount += 1 52 | 53 | if verbose: 54 | print("\rImages Processed: {}".format(imgCount), end=' ') 55 | sys.stdout.flush() 56 | 57 | if verbose: 58 | print("") 59 | 60 | return instanceDict 61 | 62 | def main(argv): 63 | fileList = [] 64 | if (len(argv) > 2): 65 | for arg in argv: 66 | if ("png" in arg): 67 | fileList.append(arg) 68 | instances2dict_with_polygons(fileList, True) 69 | 70 | if __name__ == "__main__": 71 | main(sys.argv[1:]) 72 | -------------------------------------------------------------------------------- /hetsgg/modeling/roi_heads/mask_head/roi_mask_feature_extractors.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | from torch.nn import functional as F 3 | 4 | from ..box_head.roi_box_feature_extractors import ResNet50Conv5ROIFeatureExtractor 5 | from hetsgg.modeling import registry 6 | from hetsgg.modeling.poolers import Pooler 7 | from hetsgg.modeling.make_layers import make_conv3x3 8 | 9 | 10 | registry.ROI_MASK_FEATURE_EXTRACTORS.register( 11 | "ResNet50Conv5ROIFeatureExtractor", ResNet50Conv5ROIFeatureExtractor 12 | ) 13 | 14 | 15 | @registry.ROI_MASK_FEATURE_EXTRACTORS.register("MaskRCNNFPNFeatureExtractor") 16 | class MaskRCNNFPNFeatureExtractor(nn.Module): 17 | """ 18 | Heads for FPN for classification 19 | """ 20 | 21 | def __init__(self, cfg, in_channels): 22 | 23 | super(MaskRCNNFPNFeatureExtractor, self).__init__() 24 | 25 | resolution = cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION 26 | scales = cfg.MODEL.ROI_MASK_HEAD.POOLER_SCALES 27 | sampling_ratio = cfg.MODEL.ROI_MASK_HEAD.POOLER_SAMPLING_RATIO 28 | pooler = Pooler( 29 | output_size=(resolution, resolution), 30 | scales=scales, 31 | sampling_ratio=sampling_ratio, 32 | ) 33 | input_size = in_channels 34 | self.pooler = pooler 35 | 36 | use_gn = cfg.MODEL.ROI_MASK_HEAD.USE_GN 37 | layers = cfg.MODEL.ROI_MASK_HEAD.CONV_LAYERS 38 | dilation = cfg.MODEL.ROI_MASK_HEAD.DILATION 39 | 40 | next_feature = input_size 41 | self.blocks = [] 42 | for layer_idx, layer_features in enumerate(layers, 1): 43 | layer_name = "mask_fcn{}".format(layer_idx) 44 | module = make_conv3x3( 45 | next_feature, layer_features, 46 | dilation=dilation, stride=1, use_gn=use_gn 47 | ) 48 | self.add_module(layer_name, module) 49 | next_feature = layer_features 50 | self.blocks.append(layer_name) 51 | self.out_channels = layer_features 52 | 53 | def forward(self, x, proposals): 54 | x = self.pooler(x, proposals) 55 | 56 | for layer_name in self.blocks: 57 | x = F.relu(getattr(self, layer_name)(x)) 58 | 59 | return x 60 | 61 | 62 | def make_roi_mask_feature_extractor(cfg, in_channels): 63 | func = registry.ROI_MASK_FEATURE_EXTRACTORS[ 64 | cfg.MODEL.ROI_MASK_HEAD.FEATURE_EXTRACTOR 65 | ] 66 | return func(cfg, in_channels) 67 | -------------------------------------------------------------------------------- /hetsgg/solver/build.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from .lr_scheduler import WarmupMultiStepLR, WarmupReduceLROnPlateau 4 | 5 | 6 | def make_optimizer(cfg, model, logger, slow_heads=None, except_weight_decay=None, slow_ratio=5.0, rl_factor=1.0): 7 | params = [] 8 | for key, value in model.named_parameters(): 9 | if not value.requires_grad: 10 | continue 11 | lr = cfg.SOLVER.BASE_LR 12 | weight_decay = cfg.SOLVER.WEIGHT_DECAY 13 | if "bias" in key: 14 | lr = cfg.SOLVER.BASE_LR * cfg.SOLVER.BIAS_LR_FACTOR 15 | weight_decay = cfg.SOLVER.WEIGHT_DECAY_BIAS 16 | 17 | if except_weight_decay is not None: 18 | for item in except_weight_decay: 19 | if item in key: 20 | weight_decay = 0.0 21 | logger.info("NO WEIGHT DECAY: {}.".format(key)) 22 | 23 | if slow_heads is not None: 24 | for item in slow_heads: 25 | if item in key: 26 | logger.info("SLOW HEADS: {} is slow down by ratio of {}.".format(key, str(slow_ratio))) 27 | lr = lr / slow_ratio 28 | break 29 | params += [{"params": [value], "lr": lr * rl_factor, "weight_decay": weight_decay}] 30 | 31 | optimizer = torch.optim.SGD(params, lr=cfg.SOLVER.BASE_LR, momentum=cfg.SOLVER.MOMENTUM) 32 | return optimizer 33 | 34 | 35 | def make_lr_scheduler(cfg, optimizer, logger=None): 36 | if cfg.SOLVER.SCHEDULE.TYPE == "WarmupMultiStepLR": 37 | return WarmupMultiStepLR( 38 | optimizer, 39 | cfg.SOLVER.STEPS, 40 | cfg.SOLVER.GAMMA, 41 | warmup_factor=cfg.SOLVER.WARMUP_FACTOR, 42 | warmup_iters=cfg.SOLVER.WARMUP_ITERS, 43 | warmup_method=cfg.SOLVER.WARMUP_METHOD, 44 | ) 45 | 46 | elif cfg.SOLVER.SCHEDULE.TYPE == "WarmupReduceLROnPlateau": 47 | return WarmupReduceLROnPlateau( 48 | optimizer, 49 | cfg.SOLVER.SCHEDULE.FACTOR, 50 | warmup_factor=cfg.SOLVER.WARMUP_FACTOR, 51 | warmup_iters=cfg.SOLVER.WARMUP_ITERS, 52 | warmup_method=cfg.SOLVER.WARMUP_METHOD, 53 | patience=cfg.SOLVER.SCHEDULE.PATIENCE, 54 | threshold=cfg.SOLVER.SCHEDULE.THRESHOLD, 55 | cooldown=cfg.SOLVER.SCHEDULE.COOLDOWN, 56 | logger=logger, 57 | ) 58 | 59 | else: 60 | raise ValueError("Invalid Schedule Type") 61 | -------------------------------------------------------------------------------- /hetsgg/modeling/roi_heads/box_head/roi_box_predictors.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | 3 | from hetsgg.modeling import registry 4 | 5 | 6 | @registry.ROI_BOX_PREDICTOR.register("FastRCNNPredictor") 7 | class FastRCNNPredictor(nn.Module): 8 | def __init__(self, config, in_channels): 9 | super(FastRCNNPredictor, self).__init__() 10 | assert in_channels is not None 11 | num_inputs = in_channels 12 | 13 | num_classes = config.MODEL.ROI_BOX_HEAD.NUM_CLASSES 14 | self.avgpool = nn.AdaptiveAvgPool2d(1) 15 | self.cls_score = nn.Linear(num_inputs, num_classes) 16 | num_bbox_reg_classes = 2 if config.MODEL.CLS_AGNOSTIC_BBOX_REG else num_classes 17 | self.bbox_pred = nn.Linear(num_inputs, num_bbox_reg_classes * 4) 18 | 19 | nn.init.normal_(self.cls_score.weight, mean=0, std=0.01) 20 | nn.init.constant_(self.cls_score.bias, 0) 21 | 22 | nn.init.normal_(self.bbox_pred.weight, mean=0, std=0.001) 23 | nn.init.constant_(self.bbox_pred.bias, 0) 24 | 25 | def forward(self, x): 26 | x = self.avgpool(x) 27 | x = x.view(x.size(0), -1) 28 | cls_logit = self.cls_score(x) 29 | bbox_pred = self.bbox_pred(x) 30 | return cls_logit, bbox_pred 31 | 32 | 33 | @registry.ROI_BOX_PREDICTOR.register("FPNPredictor") 34 | class FPNPredictor(nn.Module): 35 | def __init__(self, cfg, in_channels): 36 | super(FPNPredictor, self).__init__() 37 | num_classes = cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES 38 | representation_size = in_channels 39 | 40 | self.cls_score = nn.Linear(representation_size, num_classes) # Pretrained 41 | num_bbox_reg_classes = 2 if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG else num_classes # False 42 | self.bbox_pred = nn.Linear(representation_size, num_bbox_reg_classes * 4) # Pretrained 43 | 44 | nn.init.normal_(self.cls_score.weight, std=0.01) 45 | nn.init.normal_(self.bbox_pred.weight, std=0.001) 46 | for l in [self.cls_score, self.bbox_pred]: 47 | nn.init.constant_(l.bias, 0) 48 | 49 | def forward(self, x): 50 | if x.ndimension() == 4: 51 | assert list(x.shape[2:]) == [1, 1] 52 | x = x.view(x.size(0), -1) 53 | cls_logit = self.cls_score(x) 54 | bbox_pred = self.bbox_pred(x) 55 | 56 | return cls_logit, bbox_pred 57 | 58 | 59 | def make_roi_box_predictor(cfg, in_channels): 60 | func = registry.ROI_BOX_PREDICTOR[cfg.MODEL.ROI_BOX_HEAD.PREDICTOR] # FPN 61 | return func(cfg, in_channels) 62 | -------------------------------------------------------------------------------- /hetsgg/layers/sigmoid_focal_loss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | from torch.autograd import Function 4 | from torch.autograd.function import once_differentiable 5 | 6 | from hetsgg import _C 7 | 8 | class _SigmoidFocalLoss(Function): 9 | @staticmethod 10 | def forward(ctx, logits, targets, gamma, alpha): 11 | ctx.save_for_backward(logits, targets) 12 | num_classes = logits.shape[1] 13 | ctx.num_classes = num_classes 14 | ctx.gamma = gamma 15 | ctx.alpha = alpha 16 | 17 | losses = _C.sigmoid_focalloss_forward( 18 | logits, targets, num_classes, gamma, alpha 19 | ) 20 | return losses 21 | 22 | @staticmethod 23 | @once_differentiable 24 | def backward(ctx, d_loss): 25 | logits, targets = ctx.saved_tensors 26 | num_classes = ctx.num_classes 27 | gamma = ctx.gamma 28 | alpha = ctx.alpha 29 | d_loss = d_loss.contiguous() 30 | d_logits = _C.sigmoid_focalloss_backward( 31 | logits, targets, d_loss, num_classes, gamma, alpha 32 | ) 33 | return d_logits, None, None, None, None 34 | 35 | 36 | sigmoid_focal_loss_cuda = _SigmoidFocalLoss.apply 37 | 38 | 39 | def sigmoid_focal_loss_cpu(logits, targets, gamma, alpha): 40 | num_classes = logits.shape[1] 41 | gamma = gamma[0] 42 | alpha = alpha[0] 43 | dtype = targets.dtype 44 | device = targets.device 45 | class_range = torch.arange(1, num_classes+1, dtype=dtype, device=device).unsqueeze(0) 46 | 47 | t = targets.unsqueeze(1) 48 | p = torch.sigmoid(logits) 49 | term1 = (1 - p) ** gamma * torch.log(p) 50 | term2 = p ** gamma * torch.log(1 - p) 51 | return -(t == class_range).float() * term1 * alpha - ((t != class_range) * (t >= 0)).float() * term2 * (1 - alpha) 52 | 53 | 54 | class SigmoidFocalLoss(nn.Module): 55 | def __init__(self, gamma, alpha): 56 | super(SigmoidFocalLoss, self).__init__() 57 | self.gamma = gamma 58 | self.alpha = alpha 59 | 60 | def forward(self, logits, targets): 61 | device = logits.device 62 | if logits.is_cuda: 63 | loss_func = sigmoid_focal_loss_cuda 64 | else: 65 | loss_func = sigmoid_focal_loss_cpu 66 | 67 | loss = loss_func(logits, targets, self.gamma, self.alpha) 68 | return loss.sum() 69 | 70 | def __repr__(self): 71 | tmpstr = self.__class__.__name__ + "(" 72 | tmpstr += "gamma=" + str(self.gamma) 73 | tmpstr += ", alpha=" + str(self.alpha) 74 | tmpstr += ")" 75 | return tmpstr 76 | -------------------------------------------------------------------------------- /hetsgg/structures/image_list.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | 3 | import torch 4 | 5 | 6 | class ImageList(object): 7 | """ 8 | Structure that holds a list of images (of possibly 9 | varying sizes) as a single tensor. 10 | This works by padding the images to the same size, 11 | and storing in a field the original sizes of each image 12 | """ 13 | 14 | def __init__(self, tensors, image_sizes): 15 | """ 16 | Arguments: 17 | tensors (tensor) 18 | image_sizes (list[tuple[int, int]]) 19 | """ 20 | self.tensors = tensors 21 | self.image_sizes = image_sizes 22 | 23 | def to(self, *args, **kwargs): 24 | cast_tensor = self.tensors.to(*args, **kwargs) 25 | return ImageList(cast_tensor, self.image_sizes) 26 | 27 | 28 | def to_image_list(tensors, size_divisible=0): 29 | """ 30 | tensors can be an ImageList, a torch.Tensor or 31 | an iterable of Tensors. It can't be a numpy array. 32 | When tensors is an iterable of Tensors, it pads 33 | the Tensors with zeros so that they have the same 34 | shape 35 | """ 36 | if isinstance(tensors, torch.Tensor) and size_divisible > 0: 37 | tensors = [tensors] 38 | 39 | if isinstance(tensors, ImageList): 40 | return tensors 41 | elif isinstance(tensors, torch.Tensor): 42 | # single tensor shape can be inferred 43 | if tensors.dim() == 3: 44 | tensors = tensors[None] 45 | assert tensors.dim() == 4 46 | image_sizes = [tensor.shape[-2:] for tensor in tensors] 47 | return ImageList(tensors, image_sizes) 48 | elif isinstance(tensors, (tuple, list)): 49 | max_size = tuple(max(s) for s in zip(*[img.shape for img in tensors])) 50 | 51 | if size_divisible > 0: 52 | import math 53 | 54 | stride = size_divisible 55 | max_size = list(max_size) 56 | max_size[1] = int(math.ceil(max_size[1] / stride) * stride) 57 | max_size[2] = int(math.ceil(max_size[2] / stride) * stride) 58 | max_size = tuple(max_size) 59 | 60 | batch_shape = (len(tensors),) + max_size 61 | batched_imgs = tensors[0].new(*batch_shape).zero_() 62 | for img, pad_img in zip(tensors, batched_imgs): 63 | pad_img[: img.shape[0], : img.shape[1], : img.shape[2]].copy_(img) 64 | 65 | image_sizes = [im.shape[-2:] for im in tensors] 66 | 67 | return ImageList(batched_imgs, image_sizes) 68 | else: 69 | raise TypeError("Unsupported type for to_image_list: {}".format(type(tensors))) 70 | -------------------------------------------------------------------------------- /hetsgg/data/samplers/distributed.py: -------------------------------------------------------------------------------- 1 | 2 | import math 3 | import torch 4 | import torch.distributed as dist 5 | from torch.utils.data.sampler import Sampler 6 | 7 | 8 | class DistributedSampler(Sampler): 9 | """Sampler that restricts data loading to a subset of the dataset. 10 | It is especially useful in conjunction with 11 | :class:`torch.nn.parallel.DistributedDataParallel`. In such case, each 12 | process can pass a DistributedSampler instance as a DataLoader sampler, 13 | and load a subset of the original dataset that is exclusive to it. 14 | .. note:: 15 | Dataset is assumed to be of constant size. 16 | Arguments: 17 | dataset: Dataset used for sampling. 18 | num_replicas (optional): Number of processes participating in 19 | distributed training. 20 | rank (optional): Rank of the current process within num_replicas. 21 | """ 22 | 23 | def __init__(self, dataset, num_replicas=None, rank=None, shuffle=True): 24 | if num_replicas is None: 25 | if not dist.is_available(): 26 | raise RuntimeError("Requires distributed package to be available") 27 | num_replicas = dist.get_world_size() 28 | if rank is None: 29 | if not dist.is_available(): 30 | raise RuntimeError("Requires distributed package to be available") 31 | rank = dist.get_rank() 32 | self.dataset = dataset 33 | self.num_replicas = num_replicas 34 | self.rank = rank 35 | self.epoch = 0 36 | self.num_samples = int(math.ceil(len(self.dataset) * 1.0 / self.num_replicas)) 37 | self.total_size = self.num_samples * self.num_replicas 38 | self.shuffle = shuffle 39 | 40 | def __iter__(self): 41 | if self.shuffle: 42 | # deterministically shuffle based on epoch 43 | g = torch.Generator() 44 | g.manual_seed(self.epoch) 45 | indices = torch.randperm(len(self.dataset), generator=g).tolist() 46 | else: 47 | indices = torch.arange(len(self.dataset)).tolist() 48 | 49 | # add extra samples to make it evenly divisible 50 | indices += indices[: (self.total_size - len(indices))] 51 | assert len(indices) == self.total_size 52 | 53 | # subsample 54 | offset = self.num_samples * self.rank 55 | indices = indices[offset : offset + self.num_samples] 56 | assert len(indices) == self.num_samples 57 | 58 | return iter(indices) 59 | 60 | def __len__(self): 61 | return self.num_samples 62 | 63 | def set_epoch(self, epoch): 64 | self.epoch = epoch 65 | -------------------------------------------------------------------------------- /hetsgg/csrc/cpu/nms_cpu.cpp: -------------------------------------------------------------------------------- 1 | #include "cpu/vision.h" 2 | 3 | 4 | template 5 | at::Tensor nms_cpu_kernel(const at::Tensor& dets, 6 | const at::Tensor& scores, 7 | const float threshold) { 8 | AT_ASSERTM(!dets.type().is_cuda(), "dets must be a CPU tensor"); 9 | AT_ASSERTM(!scores.type().is_cuda(), "scores must be a CPU tensor"); 10 | AT_ASSERTM(dets.type() == scores.type(), "dets should have the same type as scores"); 11 | 12 | if (dets.numel() == 0) { 13 | return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU)); 14 | } 15 | 16 | auto x1_t = dets.select(1, 0).contiguous(); 17 | auto y1_t = dets.select(1, 1).contiguous(); 18 | auto x2_t = dets.select(1, 2).contiguous(); 19 | auto y2_t = dets.select(1, 3).contiguous(); 20 | 21 | at::Tensor areas_t = (x2_t - x1_t + 1) * (y2_t - y1_t + 1); 22 | 23 | auto order_t = std::get<1>(scores.sort(0, /* descending=*/true)); 24 | 25 | auto ndets = dets.size(0); 26 | at::Tensor suppressed_t = at::zeros({ndets}, dets.options().dtype(at::kByte).device(at::kCPU)); 27 | 28 | auto suppressed = suppressed_t.data(); 29 | auto order = order_t.data(); 30 | auto x1 = x1_t.data(); 31 | auto y1 = y1_t.data(); 32 | auto x2 = x2_t.data(); 33 | auto y2 = y2_t.data(); 34 | auto areas = areas_t.data(); 35 | 36 | for (int64_t _i = 0; _i < ndets; _i++) { 37 | auto i = order[_i]; 38 | if (suppressed[i] == 1) 39 | continue; 40 | auto ix1 = x1[i]; 41 | auto iy1 = y1[i]; 42 | auto ix2 = x2[i]; 43 | auto iy2 = y2[i]; 44 | auto iarea = areas[i]; 45 | 46 | for (int64_t _j = _i + 1; _j < ndets; _j++) { 47 | auto j = order[_j]; 48 | if (suppressed[j] == 1) 49 | continue; 50 | auto xx1 = std::max(ix1, x1[j]); 51 | auto yy1 = std::max(iy1, y1[j]); 52 | auto xx2 = std::min(ix2, x2[j]); 53 | auto yy2 = std::min(iy2, y2[j]); 54 | 55 | auto w = std::max(static_cast(0), xx2 - xx1 + 1); 56 | auto h = std::max(static_cast(0), yy2 - yy1 + 1); 57 | auto inter = w * h; 58 | auto ovr = inter / (iarea + areas[j] - inter); 59 | if (ovr >= threshold) 60 | suppressed[j] = 1; 61 | } 62 | } 63 | return at::nonzero(suppressed_t == 0).squeeze(1); 64 | } 65 | 66 | at::Tensor nms_cpu(const at::Tensor& dets, 67 | const at::Tensor& scores, 68 | const float threshold) { 69 | at::Tensor result; 70 | AT_DISPATCH_FLOATING_TYPES(dets.type(), "nms", [&] { 71 | result = nms_cpu_kernel(dets, scores, threshold); 72 | }); 73 | return result; 74 | } 75 | -------------------------------------------------------------------------------- /hetsgg/utils/visualize_graph.py: -------------------------------------------------------------------------------- 1 | import os 2 | import random 3 | 4 | import torch 5 | from graphviz import Digraph 6 | 7 | 8 | def visual_computation_graph(var, params, output_dir, graph_name='network'): 9 | """ Produces Graphviz representation of PyTorch autograd graph. 10 | 11 | Blue nodes are trainable Variables (weights, bias). 12 | Orange node are saved tensors for the backward pass. 13 | 14 | Args: 15 | var: output Variable 16 | params: list of (name, Parameters) 17 | """ 18 | 19 | param_map = {id(v): k for k, v in params} 20 | 21 | node_attr = dict(style='filled', 22 | shape='box', 23 | align='left', 24 | fontsize='12', 25 | ranksep='0.1', 26 | height='0.2') 27 | 28 | comp_graph = Digraph(filename=os.path.join(output_dir, graph_name), 29 | format='pdf', 30 | node_attr=node_attr, 31 | graph_attr=dict(size="256,512")) 32 | seen = set() 33 | 34 | 35 | 36 | def get_color(): 37 | pallet = ['#8B0000', "#FF8C00", "#556B2F", "#8FBC8F", "#2F4F4F", "#4682B4", 38 | "#191970", "#8A2BE2", "#C71585", "#000000", "#808080"] 39 | 40 | idx = random.randint(0, len(pallet)-1) 41 | return pallet[idx] 42 | 43 | 44 | def add_nodes(var): 45 | if var not in seen: 46 | 47 | node_id = str(id(var)) 48 | 49 | if torch.is_tensor(var): 50 | node_label = "saved tensor\n{}".format(tuple(var.size())) 51 | comp_graph.node(node_id, node_label, fillcolor='orange') 52 | 53 | elif hasattr(var, 'variable'): 54 | # weights 55 | variable_name = param_map.get(id(var.variable)) 56 | variable_size = tuple(var.variable.size()) 57 | node_name = "{}\n{}".format(variable_name, variable_size) 58 | comp_graph.node(node_id, node_name, fillcolor='lightblue') 59 | 60 | else: 61 | # operation 62 | node_label = type(var).__name__.replace('Backward', '') 63 | comp_graph.node(node_id, node_label) 64 | 65 | seen.add(var) 66 | if hasattr(var, 'next_functions'): 67 | for u in var.next_functions: 68 | if u[0] is not None: 69 | comp_graph.edge(str(id(u[0])), str(id(var)), color=get_color()) 70 | add_nodes(u[0]) 71 | 72 | if hasattr(var, 'saved_tensors'): 73 | for t in var.saved_tensors: 74 | comp_graph.edge(str(id(t)), str(id(var)), color=get_color()) 75 | add_nodes(t) 76 | 77 | add_nodes(var.grad_fn) 78 | 79 | return comp_graph 80 | -------------------------------------------------------------------------------- /hetsgg/utils/miscellaneous.py: -------------------------------------------------------------------------------- 1 | import errno 2 | import json 3 | import logging 4 | import os 5 | from .comm import is_main_process 6 | import numpy as np 7 | 8 | from hetsgg.structures.bounding_box import BoxList 9 | from hetsgg.structures.boxlist_ops import boxlist_iou 10 | 11 | def mkdir(path): 12 | try: 13 | os.makedirs(path) 14 | except OSError as e: 15 | if e.errno != errno.EEXIST: 16 | raise 17 | 18 | 19 | def save_labels(dataset_list, output_dir): 20 | if is_main_process(): 21 | logger = logging.getLogger(__name__) 22 | 23 | ids_to_labels = {} 24 | for dataset in dataset_list: 25 | if hasattr(dataset, 'categories'): 26 | ids_to_labels.update(dataset.categories) 27 | else: 28 | logger.warning("Dataset [{}] has no categories attribute, labels.json file won't be created".format( 29 | dataset.__class__.__name__)) 30 | 31 | if ids_to_labels: 32 | labels_file = os.path.join(output_dir, 'labels.json') 33 | logger.info("Saving labels mapping into {}".format(labels_file)) 34 | with open(labels_file, 'w') as f: 35 | json.dump(ids_to_labels, f, indent=2) 36 | 37 | 38 | def save_config(cfg, path): 39 | if is_main_process(): 40 | with open(path, 'w') as f: 41 | f.write(cfg.dump()) 42 | 43 | 44 | def intersect_2d(x1, x2): 45 | """ 46 | Given two arrays [m1, n], [m2,n], returns a [m1, m2] array where each entry is True if those 47 | rows match. 48 | :param x1: [m1, n] numpy array 49 | :param x2: [m2, n] numpy array 50 | :return: [m1, m2] bool array of the intersections 51 | """ 52 | if x1.shape[1] != x2.shape[1]: 53 | raise ValueError("Input arrays must have same #columns") 54 | 55 | # This performs a matrix multiplication-esque thing between the two arrays 56 | # Instead of summing, we want the equality, so we reduce in that way 57 | res = (x1[..., None] == x2.T[None, ...]).all(1) 58 | return res 59 | 60 | def argsort_desc(scores): 61 | """ 62 | Returns the indices that sort scores descending in a smart way 63 | :param scores: Numpy array of arbitrary size 64 | :return: an array of size [numel(scores), dim(scores)] where each row is the index you'd 65 | need to get the score. 66 | """ 67 | return np.column_stack(np.unravel_index(np.argsort(-scores.ravel()), scores.shape)) 68 | 69 | def bbox_overlaps(boxes1, boxes2): 70 | """ 71 | Parameters: 72 | boxes1 (m, 4) [List or np.array] : bounding boxes of (x1,y1,x2,y2) 73 | boxes2 (n, 4) [List or np.array] : bounding boxes of (x1,y1,x2,y2) 74 | Return: 75 | iou (m, n) [np.array] 76 | """ 77 | boxes1 = BoxList(boxes1, (0, 0), 'xyxy') 78 | boxes2 = BoxList(boxes2, (0, 0), 'xyxy') 79 | iou = boxlist_iou(boxes1, boxes2).cpu().numpy() 80 | return iou 81 | -------------------------------------------------------------------------------- /hetsgg/modeling/balanced_positive_negative_sampler.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | class BalancedPositiveNegativeSampler(object): 5 | """ 6 | This class samples batches, ensuring that they contain a fixed proportion of positives 7 | """ 8 | 9 | def __init__(self, batch_size_per_image, positive_fraction): 10 | """ 11 | Arguments: 12 | batch_size_per_image (int): number of elements to be selected per image 13 | positive_fraction (float): percentace of positive elements per batch 14 | """ 15 | self.batch_size_per_image = batch_size_per_image 16 | self.positive_fraction = positive_fraction 17 | 18 | def __call__(self, matched_idxs): 19 | """ 20 | Arguments: 21 | matched idxs: list of tensors containing -1, 0 or positive values. 22 | Each tensor corresponds to a specific image. 23 | -1 values are ignored, 0 are considered as negatives and > 0 as 24 | positives. 25 | 26 | Returns: 27 | pos_idx (list[tensor]) 28 | neg_idx (list[tensor]) 29 | 30 | Returns two lists of binary masks for each image. 31 | The first list contains the positive elements that were selected, 32 | and the second list the negative example. 33 | """ 34 | pos_idx = [] 35 | neg_idx = [] 36 | for matched_idxs_per_image in matched_idxs: 37 | positive = torch.nonzero(matched_idxs_per_image >= 1).squeeze(1) 38 | negative = torch.nonzero(matched_idxs_per_image == 0).squeeze(1) 39 | 40 | num_pos = int(self.batch_size_per_image * self.positive_fraction) 41 | # protect against not enough positive examples 42 | num_pos = min(positive.numel(), num_pos) 43 | num_neg = self.batch_size_per_image - num_pos 44 | # protect against not enough negative examples 45 | num_neg = min(negative.numel(), num_neg) 46 | 47 | # randomly select positive and negative examples 48 | perm1 = torch.randperm(positive.numel(), device=positive.device)[:num_pos] 49 | perm2 = torch.randperm(negative.numel(), device=negative.device)[:num_neg] 50 | 51 | pos_idx_per_image = positive[perm1] 52 | neg_idx_per_image = negative[perm2] 53 | 54 | # create binary mask from indices 55 | pos_idx_per_image_mask = torch.zeros_like( 56 | matched_idxs_per_image, dtype=torch.uint8 57 | ) 58 | neg_idx_per_image_mask = torch.zeros_like( 59 | matched_idxs_per_image, dtype=torch.uint8 60 | ) 61 | pos_idx_per_image_mask[pos_idx_per_image] = 1 62 | neg_idx_per_image_mask[neg_idx_per_image] = 1 63 | 64 | pos_idx.append(pos_idx_per_image_mask) 65 | neg_idx.append(neg_idx_per_image_mask) 66 | 67 | return pos_idx, neg_idx 68 | -------------------------------------------------------------------------------- /hetsgg/layers/dcn/deform_pool_func.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Function 3 | from torch.autograd.function import once_differentiable 4 | 5 | from hetsgg import _C 6 | 7 | 8 | class DeformRoIPoolingFunction(Function): 9 | 10 | @staticmethod 11 | def forward( 12 | ctx, 13 | data, 14 | rois, 15 | offset, 16 | spatial_scale, 17 | out_size, 18 | out_channels, 19 | no_trans, 20 | group_size=1, 21 | part_size=None, 22 | sample_per_part=4, 23 | trans_std=.0 24 | ): 25 | ctx.spatial_scale = spatial_scale 26 | ctx.out_size = out_size 27 | ctx.out_channels = out_channels 28 | ctx.no_trans = no_trans 29 | ctx.group_size = group_size 30 | ctx.part_size = out_size if part_size is None else part_size 31 | ctx.sample_per_part = sample_per_part 32 | ctx.trans_std = trans_std 33 | 34 | assert 0.0 <= ctx.trans_std <= 1.0 35 | if not data.is_cuda: 36 | raise NotImplementedError 37 | 38 | n = rois.shape[0] 39 | output = data.new_empty(n, out_channels, out_size, out_size) 40 | output_count = data.new_empty(n, out_channels, out_size, out_size) 41 | _C.deform_psroi_pooling_forward( 42 | data, 43 | rois, 44 | offset, 45 | output, 46 | output_count, 47 | ctx.no_trans, 48 | ctx.spatial_scale, 49 | ctx.out_channels, 50 | ctx.group_size, 51 | ctx.out_size, 52 | ctx.part_size, 53 | ctx.sample_per_part, 54 | ctx.trans_std 55 | ) 56 | 57 | if data.requires_grad or rois.requires_grad or offset.requires_grad: 58 | ctx.save_for_backward(data, rois, offset) 59 | ctx.output_count = output_count 60 | 61 | return output 62 | 63 | @staticmethod 64 | @once_differentiable 65 | def backward(ctx, grad_output): 66 | if not grad_output.is_cuda: 67 | raise NotImplementedError 68 | 69 | data, rois, offset = ctx.saved_tensors 70 | output_count = ctx.output_count 71 | grad_input = torch.zeros_like(data) 72 | grad_rois = None 73 | grad_offset = torch.zeros_like(offset) 74 | 75 | _C.deform_psroi_pooling_backward( 76 | grad_output, 77 | data, 78 | rois, 79 | offset, 80 | output_count, 81 | grad_input, 82 | grad_offset, 83 | ctx.no_trans, 84 | ctx.spatial_scale, 85 | ctx.out_channels, 86 | ctx.group_size, 87 | ctx.out_size, 88 | ctx.part_size, 89 | ctx.sample_per_part, 90 | ctx.trans_std 91 | ) 92 | return (grad_input, grad_rois, grad_offset, None, None, None, None, None, None, None, None) 93 | 94 | 95 | deform_roi_pooling = DeformRoIPoolingFunction.apply 96 | -------------------------------------------------------------------------------- /hetsgg/layers/label_smoothing_loss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | 5 | class Label_Smoothing_Regression(nn.Module): 6 | 7 | def __init__(self, e=0.01, reduction='mean'): 8 | super().__init__() 9 | 10 | self.log_softmax = nn.LogSoftmax(dim=1) 11 | self.e = e 12 | self.reduction = reduction 13 | 14 | def _one_hot(self, labels, classes, value=1): 15 | """ 16 | Convert labels to one hot vectors 17 | 18 | Args: 19 | labels: torch tensor in format [label1, label2, label3, ...] 20 | classes: int, number of classes 21 | value: label value in one hot vector, default to 1 22 | 23 | Returns: 24 | return one hot format labels in shape [batchsize, classes] 25 | """ 26 | 27 | one_hot = torch.zeros(labels.size(0), classes) 28 | 29 | #labels and value_added size must match 30 | labels = labels.view(labels.size(0), -1) 31 | value_added = torch.Tensor(labels.size(0), 1).fill_(value) 32 | 33 | value_added = value_added.to(labels.device) 34 | one_hot = one_hot.to(labels.device) 35 | 36 | one_hot.scatter_add_(1, labels, value_added) 37 | 38 | return one_hot 39 | 40 | def _smooth_label(self, target, length, smooth_factor): 41 | """convert targets to one-hot format, and smooth 42 | them. 43 | Args: 44 | target: target in form with [label1, label2, label_batchsize] 45 | length: length of one-hot format(number of classes) 46 | smooth_factor: smooth factor for label smooth 47 | 48 | Returns: 49 | smoothed labels in one hot format 50 | """ 51 | one_hot = self._one_hot(target, length, value=1 - smooth_factor) 52 | one_hot += smooth_factor / length 53 | 54 | return one_hot.to(target.device) 55 | 56 | def forward(self, x, target): 57 | 58 | if x.size(0) != target.size(0): 59 | raise ValueError('Expected input batchsize ({}) to match target batch_size({})' 60 | .format(x.size(0), target.size(0))) 61 | 62 | if x.dim() < 2: 63 | raise ValueError('Expected input tensor to have least 2 dimensions(got {})' 64 | .format(x.size(0))) 65 | 66 | if x.dim() != 2: 67 | raise ValueError('Only 2 dimension tensor are implemented, (got {})' 68 | .format(x.size())) 69 | 70 | 71 | smoothed_target = self._smooth_label(target, x.size(1), self.e) 72 | x = self.log_softmax(x) 73 | loss = torch.sum(- x * smoothed_target, dim=1) 74 | 75 | if self.reduction == 'none': 76 | return loss 77 | 78 | elif self.reduction == 'sum': 79 | return torch.sum(loss) 80 | 81 | elif self.reduction == 'mean': 82 | return torch.mean(loss) 83 | 84 | else: 85 | raise ValueError('unrecognized option, expect reduction to be one of none, mean, sum') 86 | -------------------------------------------------------------------------------- /hetsgg/modeling/roi_heads/attribute_head/attribute_head.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | 4 | from .roi_attribute_feature_extractors import make_roi_attribute_feature_extractor 5 | from .roi_attribute_predictors import make_roi_attribute_predictor 6 | from .loss import make_roi_attribute_loss_evaluator 7 | 8 | def add_attribute_logits(proposals, attri_logits): 9 | slice_idxs = [0] 10 | for i in range(len(proposals)): 11 | slice_idxs.append(len(proposals[i])+slice_idxs[-1]) 12 | proposals[i].add_field("attribute_logits", attri_logits[slice_idxs[i]:slice_idxs[i+1]]) 13 | return proposals 14 | 15 | class ROIAttributeHead(torch.nn.Module): 16 | """ 17 | Generic ATTRIBUTE Head class. 18 | """ 19 | 20 | def __init__(self, cfg, in_channels): 21 | super(ROIAttributeHead, self).__init__() 22 | self.cfg = cfg.clone() 23 | self.feature_extractor = make_roi_attribute_feature_extractor(cfg, in_channels, half_out=self.cfg.MODEL.ATTRIBUTE_ON) 24 | self.predictor = make_roi_attribute_predictor(cfg, self.feature_extractor.out_channels) 25 | self.loss_evaluator = make_roi_attribute_loss_evaluator(cfg) 26 | 27 | def forward(self, features, proposals, targets=None): 28 | """ 29 | features: extracted from box_head 30 | """ 31 | # Attribute head is fixed when we train the relation head 32 | if self.cfg.MODEL.RELATION_ON: 33 | if self.cfg.MODEL.ROI_RELATION_HEAD.USE_GT_BOX and self.cfg.MODEL.ROI_RELATION_HEAD.USE_GT_OBJECT_LABEL: 34 | # mode==predcls 35 | # no need to predict attribute, get grond truth 36 | x = self.feature_extractor(features, proposals) 37 | return x, proposals, {} 38 | # mode==sgcls or sgdet 39 | else: 40 | x = self.feature_extractor(features, proposals) 41 | attri_logits = self.predictor(x) 42 | assert sum([len(p) for p in proposals]) == attri_logits.shape[0] 43 | proposals = add_attribute_logits(proposals, attri_logits) 44 | return x, proposals, {} 45 | 46 | # Train/Test the attribute head 47 | x = self.feature_extractor(features, proposals) 48 | attri_logits = self.predictor(x) 49 | assert sum([len(p) for p in proposals]) == attri_logits.shape[0] 50 | proposals = add_attribute_logits(proposals, attri_logits) 51 | 52 | if not self.training: 53 | return x, proposals, {} 54 | 55 | # proposals need to contain the attributes fields 56 | loss_attribute = self.loss_evaluator(proposals, attri_logits) 57 | return x, proposals, dict(loss_attribute=loss_attribute) 58 | 59 | def build_roi_attribute_head(cfg, in_channels): 60 | """ 61 | Constructs a new attribute head. 62 | By default, uses ROIAttributeHead, but if it turns out not to be enough, just register a new class 63 | and make it a parameter in the config 64 | """ 65 | return ROIAttributeHead(cfg, in_channels) 66 | -------------------------------------------------------------------------------- /hetsgg/modeling/roi_heads/box_head/loss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.nn import functional as F 3 | 4 | from hetsgg.layers import smooth_l1_loss 5 | from hetsgg.modeling.box_coder import BoxCoder 6 | from hetsgg.modeling.matcher import Matcher 7 | from hetsgg.structures.boxlist_ops import boxlist_iou 8 | from hetsgg.modeling.balanced_positive_negative_sampler import ( 9 | BalancedPositiveNegativeSampler 10 | ) 11 | from hetsgg.modeling.utils import cat 12 | 13 | 14 | class FastRCNNLossComputation(object): 15 | """ 16 | Computes the loss for Faster R-CNN. 17 | Also supports FPN 18 | """ 19 | 20 | def __init__(self, cls_agnostic_bbox_reg=False): 21 | self.cls_agnostic_bbox_reg = cls_agnostic_bbox_reg 22 | 23 | def assign_label_to_proposals(self, proposals, targets): 24 | for img_idx, (target, proposal) in enumerate(zip(targets, proposals)): 25 | match_quality_matrix = boxlist_iou(target, proposal) 26 | matched_idxs = self.proposal_matcher(match_quality_matrix) 27 | # Fast RCNN only need "labels" field for selecting the targets 28 | target = target.copy_with_fields(["labels", "attributes"]) 29 | matched_targets = target[matched_idxs.clamp(min=0)] 30 | 31 | labels_per_image = matched_targets.get_field("labels").to(dtype=torch.int64) 32 | attris_per_image = matched_targets.get_field("attributes").to(dtype=torch.int64) 33 | 34 | labels_per_image[matched_idxs < 0] = 0 35 | attris_per_image[matched_idxs < 0, :] = 0 36 | proposals[img_idx].add_field("labels", labels_per_image) 37 | proposals[img_idx].add_field("attributes", attris_per_image) 38 | return proposals 39 | 40 | 41 | def __call__(self, class_logits, box_regression, proposals): 42 | 43 | class_logits = cat(class_logits, dim=0) 44 | box_regression = cat(box_regression, dim=0) 45 | device = class_logits.device 46 | 47 | labels = cat([proposal.get_field("labels") for proposal in proposals], dim=0) 48 | regression_targets = cat([proposal.get_field("regression_targets") for proposal in proposals], dim=0) 49 | 50 | classification_loss = F.cross_entropy(class_logits, labels.long()) 51 | 52 | 53 | sampled_pos_inds_subset = torch.nonzero(labels > 0).squeeze(1) 54 | labels_pos = labels[sampled_pos_inds_subset] 55 | if self.cls_agnostic_bbox_reg: 56 | map_inds = torch.tensor([4, 5, 6, 7], device=device) 57 | else: 58 | map_inds = 4 * labels_pos[:, None] + torch.tensor([0, 1, 2, 3], device=device) 59 | 60 | box_loss = smooth_l1_loss( 61 | box_regression[sampled_pos_inds_subset[:, None], map_inds], 62 | regression_targets[sampled_pos_inds_subset], 63 | size_average=False, 64 | beta=1, 65 | ) 66 | box_loss = box_loss / labels.numel() 67 | 68 | return classification_loss, box_loss 69 | 70 | 71 | def make_roi_box_loss_evaluator(cfg): 72 | cls_agnostic_bbox_reg = cfg.MODEL.CLS_AGNOSTIC_BBOX_REG 73 | 74 | loss_evaluator = FastRCNNLossComputation(cls_agnostic_bbox_reg) 75 | 76 | return loss_evaluator 77 | -------------------------------------------------------------------------------- /hetsgg/modeling/backbone/backbone.py: -------------------------------------------------------------------------------- 1 | from collections import OrderedDict 2 | 3 | from torch import nn 4 | 5 | from hetsgg.modeling import registry 6 | from hetsgg.modeling.make_layers import conv_with_kaiming_uniform 7 | from . import fpn as fpn_module 8 | from . import resnet 9 | from . import vgg 10 | 11 | 12 | @registry.BACKBONES.register("VGG-16") 13 | def build_vgg_fpn_backbone(cfg): 14 | body = vgg.VGG16(cfg) 15 | out_channels = cfg.MODEL.VGG.VGG16_OUT_CHANNELS 16 | model = nn.Sequential(OrderedDict([("body", body)])) 17 | model.out_channels = out_channels 18 | return model 19 | 20 | 21 | @registry.BACKBONES.register("R-50-C4") 22 | @registry.BACKBONES.register("R-50-C5") 23 | @registry.BACKBONES.register("R-101-C4") 24 | @registry.BACKBONES.register("R-101-C5") 25 | def build_resnet_backbone(cfg): 26 | body = resnet.ResNet(cfg) 27 | model = nn.Sequential(OrderedDict([("body", body)])) 28 | model.out_channels = cfg.MODEL.RESNETS.BACKBONE_OUT_CHANNELS 29 | return model 30 | 31 | 32 | @registry.BACKBONES.register("R-50-FPN") 33 | @registry.BACKBONES.register("R-101-FPN") 34 | @registry.BACKBONES.register("R-152-FPN") 35 | def build_resnet_fpn_backbone(cfg): 36 | body = resnet.ResNet(cfg) 37 | in_channels_stage2 = cfg.MODEL.RESNETS.RES2_OUT_CHANNELS 38 | out_channels = cfg.MODEL.RESNETS.BACKBONE_OUT_CHANNELS 39 | fpn = fpn_module.FPN( 40 | in_channels_list=[ 41 | in_channels_stage2, 42 | in_channels_stage2 * 2, 43 | in_channels_stage2 * 4, 44 | in_channels_stage2 * 8, 45 | ], 46 | out_channels=out_channels, 47 | conv_block=conv_with_kaiming_uniform( 48 | cfg.MODEL.FPN.USE_GN, cfg.MODEL.FPN.USE_RELU 49 | ), 50 | top_blocks=fpn_module.LastLevelMaxPool(), 51 | ) 52 | model = nn.Sequential(OrderedDict([("body", body), ("fpn", fpn)])) 53 | model.out_channels = out_channels 54 | return model 55 | 56 | 57 | @registry.BACKBONES.register("R-50-FPN-RETINANET") 58 | @registry.BACKBONES.register("R-101-FPN-RETINANET") 59 | def build_resnet_fpn_p3p7_backbone(cfg): 60 | body = resnet.ResNet(cfg) 61 | in_channels_stage2 = cfg.MODEL.RESNETS.RES2_OUT_CHANNELS 62 | out_channels = cfg.MODEL.RESNETS.BACKBONE_OUT_CHANNELS 63 | in_channels_p6p7 = in_channels_stage2 * 8 if cfg.MODEL.RETINANET.USE_C5 \ 64 | else out_channels 65 | fpn = fpn_module.FPN( 66 | in_channels_list=[ 67 | 0, 68 | in_channels_stage2 * 2, 69 | in_channels_stage2 * 4, 70 | in_channels_stage2 * 8, 71 | ], 72 | out_channels=out_channels, 73 | conv_block=conv_with_kaiming_uniform( 74 | cfg.MODEL.FPN.USE_GN, cfg.MODEL.FPN.USE_RELU 75 | ), 76 | top_blocks=fpn_module.LastLevelP6P7(in_channels_p6p7, out_channels), 77 | ) 78 | model = nn.Sequential(OrderedDict([("body", body), ("fpn", fpn)])) 79 | model.out_channels = out_channels 80 | return model 81 | 82 | 83 | def build_backbone(cfg): 84 | assert cfg.MODEL.BACKBONE.CONV_BODY in registry.BACKBONES, \ 85 | "cfg.MODEL.BACKBONE.CONV_BODY: {} are not registered in registry".format( 86 | cfg.MODEL.BACKBONE.CONV_BODY 87 | ) 88 | return registry.BACKBONES[cfg.MODEL.BACKBONE.CONV_BODY](cfg) 89 | -------------------------------------------------------------------------------- /hetsgg/modeling/roi_heads/mask_head/mask_head.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | 4 | from hetsgg.structures.bounding_box import BoxList 5 | 6 | from .roi_mask_feature_extractors import make_roi_mask_feature_extractor 7 | from .roi_mask_predictors import make_roi_mask_predictor 8 | from .inference import make_roi_mask_post_processor 9 | from .loss import make_roi_mask_loss_evaluator 10 | 11 | 12 | def keep_only_positive_boxes(boxes): 13 | """ 14 | Given a set of BoxList containing the `labels` field, 15 | return a set of BoxList for which `labels > 0`. 16 | 17 | Arguments: 18 | boxes (list of BoxList) 19 | """ 20 | assert isinstance(boxes, (list, tuple)) 21 | assert isinstance(boxes[0], BoxList) 22 | assert boxes[0].has_field("labels") 23 | positive_boxes = [] 24 | positive_inds = [] 25 | num_boxes = 0 26 | for boxes_per_image in boxes: 27 | labels = boxes_per_image.get_field("labels") 28 | inds_mask = labels > 0 29 | inds = inds_mask.nonzero().squeeze(1) 30 | positive_boxes.append(boxes_per_image[inds]) 31 | positive_inds.append(inds_mask) 32 | return positive_boxes, positive_inds 33 | 34 | 35 | class ROIMaskHead(torch.nn.Module): 36 | def __init__(self, cfg, in_channels): 37 | super(ROIMaskHead, self).__init__() 38 | self.cfg = cfg.clone() 39 | self.feature_extractor = make_roi_mask_feature_extractor(cfg, in_channels) 40 | self.predictor = make_roi_mask_predictor( 41 | cfg, self.feature_extractor.out_channels) 42 | self.post_processor = make_roi_mask_post_processor(cfg) 43 | self.loss_evaluator = make_roi_mask_loss_evaluator(cfg) 44 | 45 | def forward(self, features, proposals, targets=None): 46 | """ 47 | Arguments: 48 | features (list[Tensor]): feature-maps from possibly several levels 49 | proposals (list[BoxList]): proposal boxes 50 | targets (list[BoxList], optional): the ground-truth targets. 51 | 52 | Returns: 53 | x (Tensor): the result of the feature extractor 54 | proposals (list[BoxList]): during training, the original proposals 55 | are returned. During testing, the predicted boxlists are returned 56 | with the `mask` field set 57 | losses (dict[Tensor]): During training, returns the losses for the 58 | head. During testing, returns an empty dict. 59 | """ 60 | 61 | if self.training: 62 | all_proposals = proposals 63 | proposals, positive_inds = keep_only_positive_boxes(proposals) 64 | if self.training and self.cfg.MODEL.ROI_MASK_HEAD.SHARE_BOX_FEATURE_EXTRACTOR: 65 | x = features 66 | x = x[torch.cat(positive_inds, dim=0)] 67 | else: 68 | x = self.feature_extractor(features, proposals) 69 | mask_logits = self.predictor(x) 70 | 71 | if not self.training: 72 | result = self.post_processor(mask_logits, proposals) 73 | return x, result, {} 74 | 75 | loss_mask = self.loss_evaluator(proposals, mask_logits, targets) 76 | 77 | return x, all_proposals, dict(loss_mask=loss_mask) 78 | 79 | 80 | def build_roi_mask_head(cfg, in_channels): 81 | return ROIMaskHead(cfg, in_channels) 82 | -------------------------------------------------------------------------------- /hetsgg/utils/model_serialization.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from collections import OrderedDict 3 | 4 | import torch 5 | 6 | 7 | def align_and_update_state_dicts(model_state_dict, loaded_state_dict, load_mapping): 8 | logger = logging.getLogger(__name__) 9 | current_keys = sorted(list(model_state_dict.keys())) 10 | loaded_keys = sorted(list(loaded_state_dict.keys())) 11 | 12 | mapped_current_keys = current_keys.copy() 13 | for i, key in enumerate(mapped_current_keys): 14 | for source_key, target_key in load_mapping.items(): 15 | if source_key in key: 16 | mapped_current_keys[i] = key.replace(source_key, target_key) 17 | logger.info("MAPPING {} in current model to {} in loaded model.".format(key, mapped_current_keys[i])) 18 | 19 | match_matrix = [ 20 | len(j) if i.endswith(j) else 0 for i in mapped_current_keys for j in loaded_keys 21 | ] 22 | match_matrix = torch.as_tensor(match_matrix).view( 23 | len(current_keys), len(loaded_keys) 24 | ) 25 | max_match_size, idxs = match_matrix.max(1) 26 | # remove indices that correspond to no-match 27 | idxs[max_match_size == 0] = -1 28 | 29 | # used for logging 30 | max_size = max([len(key) for key in current_keys]) if current_keys else 1 31 | max_size_loaded = max([len(key) for key in loaded_keys]) if loaded_keys else 1 32 | log_str_template = "REMATCHING! {: <{}} loaded from {: <{}} of shape {}" 33 | for idx_new, idx_old in enumerate(idxs.tolist()): 34 | if idx_old == -1: 35 | key = current_keys[idx_new] 36 | logger.info("NO-MATCHING of current module: {} of shape {}".format(key, 37 | tuple(model_state_dict[key].shape))) 38 | continue 39 | key = current_keys[idx_new] 40 | key_old = loaded_keys[idx_old] 41 | model_state_dict[key] = loaded_state_dict[key_old] 42 | if ((not key.startswith('module.')) and key != key_old) or (key.startswith('module.') and key[7:] != key_old): 43 | logger.info( 44 | log_str_template.format( 45 | key, 46 | max_size, 47 | key_old, 48 | max_size_loaded, 49 | tuple(loaded_state_dict[key_old].shape), 50 | ) 51 | ) 52 | print('Mapping All') 53 | 54 | 55 | def strip_prefix_if_present(state_dict, prefix): 56 | keys = sorted(state_dict.keys()) 57 | if not all(key.startswith(prefix) for key in keys): 58 | return state_dict 59 | stripped_state_dict = OrderedDict() 60 | for key, value in state_dict.items(): 61 | stripped_state_dict[key.replace(prefix, "")] = value 62 | return stripped_state_dict 63 | 64 | 65 | def load_state_dict(model, loaded_state_dict, load_mapping): 66 | model_state_dict = model.state_dict() 67 | # if the state_dict comes from a model that was wrapped in a 68 | # DataParallel or DistributedDataParallel during serialization, 69 | # remove the "module" prefix before performing the matching 70 | loaded_state_dict = strip_prefix_if_present(loaded_state_dict, prefix="module.") 71 | align_and_update_state_dicts(model_state_dict, loaded_state_dict, load_mapping) 72 | 73 | # use strict loading 74 | model.load_state_dict(model_state_dict, strict=False) 75 | -------------------------------------------------------------------------------- /hetsgg/modeling/box_coder.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | import torch 4 | 5 | 6 | class BoxCoder(object): 7 | """ 8 | This class encodes and decodes a set of bounding boxes into 9 | the representation used for training the regressors. 10 | """ 11 | 12 | def __init__(self, weights, bbox_xform_clip=math.log(1000. / 16)): 13 | """ 14 | Arguments: 15 | weights (4-element tuple) 16 | bbox_xform_clip (float) 17 | """ 18 | self.weights = weights 19 | self.bbox_xform_clip = bbox_xform_clip 20 | 21 | def encode(self, reference_boxes, proposals): 22 | """ 23 | Encode a set of proposals with respect to some 24 | reference boxes 25 | 26 | Arguments: 27 | reference_boxes (Tensor): reference boxes 28 | proposals (Tensor): boxes to be encoded 29 | """ 30 | 31 | TO_REMOVE = 1 # TODO remove 32 | ex_widths = proposals[:, 2] - proposals[:, 0] + TO_REMOVE 33 | ex_heights = proposals[:, 3] - proposals[:, 1] + TO_REMOVE 34 | ex_ctr_x = proposals[:, 0] + 0.5 * ex_widths 35 | ex_ctr_y = proposals[:, 1] + 0.5 * ex_heights 36 | 37 | gt_widths = reference_boxes[:, 2] - reference_boxes[:, 0] + TO_REMOVE 38 | gt_heights = reference_boxes[:, 3] - reference_boxes[:, 1] + TO_REMOVE 39 | gt_ctr_x = reference_boxes[:, 0] + 0.5 * gt_widths 40 | gt_ctr_y = reference_boxes[:, 1] + 0.5 * gt_heights 41 | 42 | wx, wy, ww, wh = self.weights 43 | targets_dx = wx * (gt_ctr_x - ex_ctr_x) / ex_widths 44 | targets_dy = wy * (gt_ctr_y - ex_ctr_y) / ex_heights 45 | targets_dw = ww * torch.log(gt_widths / ex_widths) 46 | targets_dh = wh * torch.log(gt_heights / ex_heights) 47 | 48 | targets = torch.stack((targets_dx, targets_dy, targets_dw, targets_dh), dim=1) 49 | return targets 50 | 51 | def decode(self, rel_codes, boxes): 52 | """ 53 | From a set of original boxes and encoded relative box offsets, 54 | get the decoded boxes. 55 | 56 | Arguments: 57 | rel_codes (Tensor): encoded boxes 58 | boxes (Tensor): reference boxes. 59 | """ 60 | 61 | boxes = boxes.to(rel_codes.dtype) 62 | 63 | TO_REMOVE = 1 # TODO remove 64 | widths = boxes[:, 2] - boxes[:, 0] + TO_REMOVE 65 | heights = boxes[:, 3] - boxes[:, 1] + TO_REMOVE 66 | ctr_x = boxes[:, 0] + 0.5 * widths 67 | ctr_y = boxes[:, 1] + 0.5 * heights 68 | 69 | wx, wy, ww, wh = self.weights 70 | dx = rel_codes[:, 0::4] / wx 71 | dy = rel_codes[:, 1::4] / wy 72 | dw = rel_codes[:, 2::4] / ww 73 | dh = rel_codes[:, 3::4] / wh 74 | 75 | # Prevent sending too large values into torch.exp() 76 | dw = torch.clamp(dw, max=self.bbox_xform_clip) 77 | dh = torch.clamp(dh, max=self.bbox_xform_clip) 78 | 79 | pred_ctr_x = dx * widths[:, None] + ctr_x[:, None] 80 | pred_ctr_y = dy * heights[:, None] + ctr_y[:, None] 81 | pred_w = torch.exp(dw) * widths[:, None] 82 | pred_h = torch.exp(dh) * heights[:, None] 83 | 84 | pred_boxes = torch.zeros_like(rel_codes) 85 | # x1 86 | pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * pred_w 87 | # y1 88 | pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * pred_h 89 | # x2 (note: "- 1" is correct; don't be fooled by the asymmetry) 90 | pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * pred_w - 1 91 | # y2 (note: "- 1" is correct; don't be fooled by the asymmetry) 92 | pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * pred_h - 1 93 | 94 | return pred_boxes 95 | -------------------------------------------------------------------------------- /hetsgg/utils/comm.py: -------------------------------------------------------------------------------- 1 | 2 | import pickle 3 | import time 4 | 5 | import torch 6 | import torch.distributed as dist 7 | 8 | 9 | def get_world_size(): 10 | if not dist.is_available(): 11 | return 1 12 | if not dist.is_initialized(): 13 | return 1 14 | return dist.get_world_size() 15 | 16 | 17 | def get_rank(): 18 | if not dist.is_available(): 19 | return 0 20 | if not dist.is_initialized(): 21 | return 0 22 | return dist.get_rank() 23 | 24 | 25 | def is_main_process(): 26 | return get_rank() == 0 27 | 28 | 29 | def synchronize(): 30 | """ 31 | Helper function to synchronize (barrier) among all processes when 32 | using distributed training 33 | """ 34 | if not dist.is_available(): 35 | return 36 | if not dist.is_initialized(): 37 | return 38 | world_size = dist.get_world_size() 39 | if world_size == 1: 40 | return 41 | dist.barrier() 42 | 43 | 44 | def all_gather(data): 45 | """ 46 | Run all_gather on arbitrary picklable data (not necessarily tensors) 47 | Args: 48 | data: any picklable object 49 | Returns: 50 | list[data]: list of data gathered from each rank 51 | """ 52 | to_device = "cuda" 53 | 54 | world_size = get_world_size() 55 | if world_size == 1: 56 | return [data] 57 | 58 | # serialized to a Tensor 59 | buffer = pickle.dumps(data) 60 | storage = torch.ByteStorage.from_buffer(buffer) 61 | tensor = torch.ByteTensor(storage).to(to_device) 62 | 63 | # obtain Tensor size of each rank 64 | error = 4460434588622152440 - 1000 65 | t_size = tensor.view(-1).shape[0] 66 | if t_size > error: 67 | t_size = 0 68 | 69 | local_size = torch.LongTensor([tensor.view(-1).shape[0]]).to(to_device) 70 | size_list = [torch.LongTensor([0]).to(to_device) for _ in range(world_size)] 71 | dist.all_gather(size_list, local_size) 72 | size_list = [int(size.item()) for size in size_list] 73 | max_size = max(size_list) 74 | 75 | tensor_list = [] 76 | for _ in size_list: 77 | tensor_list.append(torch.ByteTensor(size=(max_size,)).to(to_device)) 78 | if local_size != max_size: 79 | padding = torch.ByteTensor(size=(max_size - local_size,)).to(to_device) 80 | tensor = torch.cat((tensor, padding), dim=0) 81 | dist.all_gather(tensor_list, tensor) 82 | 83 | data_list = [] 84 | for size, tensor in zip(size_list, tensor_list): 85 | buffer = tensor.cpu().numpy().tobytes()[:size] 86 | data_list.append(pickle.loads(buffer)) 87 | 88 | return data_list 89 | 90 | 91 | def reduce_dict(input_dict, average=True): 92 | """ 93 | Args: 94 | input_dict (dict): all the values will be reduced 95 | average (bool): whether to do average or sum 96 | Reduce the values in the dictionary from all processes so that process with rank 97 | 0 has the averaged results. Returns a dict with the same fields as 98 | input_dict, after reduction. 99 | """ 100 | world_size = get_world_size() 101 | if world_size < 2: 102 | return input_dict 103 | with torch.no_grad(): 104 | names = [] 105 | values = [] 106 | for k in sorted(input_dict.keys()): 107 | names.append(k) 108 | values.append(input_dict[k]) 109 | values = torch.stack(values, dim=0) 110 | dist.reduce(values, dst=0) 111 | if dist.get_rank() == 0 and average: 112 | values /= world_size 113 | reduced_dict = {k: v for k, v in zip(names, values)} 114 | return reduced_dict 115 | -------------------------------------------------------------------------------- /hetsgg/csrc/cuda/deform_pool_cuda.cu: -------------------------------------------------------------------------------- 1 | 2 | 3 | #include 4 | #include 5 | 6 | #include 7 | #include 8 | 9 | #include 10 | #include 11 | #include 12 | 13 | 14 | void DeformablePSROIPoolForward( 15 | const at::Tensor data, const at::Tensor bbox, const at::Tensor trans, 16 | at::Tensor out, at::Tensor top_count, const int batch, const int channels, 17 | const int height, const int width, const int num_bbox, 18 | const int channels_trans, const int no_trans, const float spatial_scale, 19 | const int output_dim, const int group_size, const int pooled_size, 20 | const int part_size, const int sample_per_part, const float trans_std); 21 | 22 | void DeformablePSROIPoolBackwardAcc( 23 | const at::Tensor out_grad, const at::Tensor data, const at::Tensor bbox, 24 | const at::Tensor trans, const at::Tensor top_count, at::Tensor in_grad, 25 | at::Tensor trans_grad, const int batch, const int channels, 26 | const int height, const int width, const int num_bbox, 27 | const int channels_trans, const int no_trans, const float spatial_scale, 28 | const int output_dim, const int group_size, const int pooled_size, 29 | const int part_size, const int sample_per_part, const float trans_std); 30 | 31 | void deform_psroi_pooling_cuda_forward( 32 | at::Tensor input, at::Tensor bbox, at::Tensor trans, at::Tensor out, 33 | at::Tensor top_count, const int no_trans, const float spatial_scale, 34 | const int output_dim, const int group_size, const int pooled_size, 35 | const int part_size, const int sample_per_part, const float trans_std) 36 | { 37 | TORCH_CHECK(input.is_contiguous(), "input tensor has to be contiguous"); 38 | 39 | const int batch = input.size(0); 40 | const int channels = input.size(1); 41 | const int height = input.size(2); 42 | const int width = input.size(3); 43 | const int channels_trans = no_trans ? 2 : trans.size(1); 44 | 45 | const int num_bbox = bbox.size(0); 46 | if (num_bbox != out.size(0)) 47 | AT_ERROR("Output shape and bbox number wont match: (%d vs %d).", 48 | out.size(0), num_bbox); 49 | 50 | DeformablePSROIPoolForward( 51 | input, bbox, trans, out, top_count, batch, channels, height, width, 52 | num_bbox, channels_trans, no_trans, spatial_scale, output_dim, group_size, 53 | pooled_size, part_size, sample_per_part, trans_std); 54 | } 55 | 56 | void deform_psroi_pooling_cuda_backward( 57 | at::Tensor out_grad, at::Tensor input, at::Tensor bbox, at::Tensor trans, 58 | at::Tensor top_count, at::Tensor input_grad, at::Tensor trans_grad, 59 | const int no_trans, const float spatial_scale, const int output_dim, 60 | const int group_size, const int pooled_size, const int part_size, 61 | const int sample_per_part, const float trans_std) 62 | { 63 | TORCH_CHECK(out_grad.is_contiguous(), "out_grad tensor has to be contiguous"); 64 | TORCH_CHECK(input.is_contiguous(), "input tensor has to be contiguous"); 65 | 66 | const int batch = input.size(0); 67 | const int channels = input.size(1); 68 | const int height = input.size(2); 69 | const int width = input.size(3); 70 | const int channels_trans = no_trans ? 2 : trans.size(1); 71 | 72 | const int num_bbox = bbox.size(0); 73 | if (num_bbox != out_grad.size(0)) 74 | AT_ERROR("Output shape and bbox number wont match: (%d vs %d).", 75 | out_grad.size(0), num_bbox); 76 | 77 | DeformablePSROIPoolBackwardAcc( 78 | out_grad, input, bbox, trans, top_count, input_grad, trans_grad, batch, 79 | channels, height, width, num_bbox, channels_trans, no_trans, 80 | spatial_scale, output_dim, group_size, pooled_size, part_size, 81 | sample_per_part, trans_std); 82 | } 83 | -------------------------------------------------------------------------------- /hetsgg/modeling/make_layers.py: -------------------------------------------------------------------------------- 1 | """ 2 | Miscellaneous utility functions 3 | """ 4 | 5 | import torch 6 | from torch import nn 7 | from torch.nn import functional as F 8 | from hetsgg.config import cfg 9 | from hetsgg.layers import Conv2d 10 | 11 | 12 | def get_group_gn(dim, dim_per_gp, num_groups): 13 | """get number of groups used by GroupNorm, based on number of channels.""" 14 | assert dim_per_gp == -1 or num_groups == -1, \ 15 | "GroupNorm: can only specify G or C/G." 16 | 17 | if dim_per_gp > 0: 18 | assert dim % dim_per_gp == 0, \ 19 | "dim: {}, dim_per_gp: {}".format(dim, dim_per_gp) 20 | group_gn = dim // dim_per_gp 21 | else: 22 | assert dim % num_groups == 0, \ 23 | "dim: {}, num_groups: {}".format(dim, num_groups) 24 | group_gn = num_groups 25 | 26 | return group_gn 27 | 28 | 29 | def group_norm(out_channels, affine=True, divisor=1): 30 | out_channels = out_channels // divisor 31 | dim_per_gp = cfg.MODEL.GROUP_NORM.DIM_PER_GP // divisor 32 | num_groups = cfg.MODEL.GROUP_NORM.NUM_GROUPS // divisor 33 | eps = cfg.MODEL.GROUP_NORM.EPSILON # default: 1e-5 34 | return torch.nn.GroupNorm( 35 | get_group_gn(out_channels, dim_per_gp, num_groups), 36 | out_channels, 37 | eps, 38 | affine 39 | ) 40 | 41 | 42 | def make_conv3x3( 43 | in_channels, 44 | out_channels, 45 | dilation=1, 46 | stride=1, 47 | use_gn=False, 48 | use_relu=False, 49 | kaiming_init=True 50 | ): 51 | conv = Conv2d( 52 | in_channels, 53 | out_channels, 54 | kernel_size=3, 55 | stride=stride, 56 | padding=dilation, 57 | dilation=dilation, 58 | bias=False if use_gn else True 59 | ) 60 | if kaiming_init: 61 | nn.init.kaiming_normal_( 62 | conv.weight, mode="fan_out", nonlinearity="relu" 63 | ) 64 | else: 65 | torch.nn.init.normal_(conv.weight, std=0.01) 66 | if not use_gn: 67 | nn.init.constant_(conv.bias, 0) 68 | module = [conv,] 69 | if use_gn: 70 | module.append(group_norm(out_channels)) 71 | if use_relu: 72 | module.append(nn.ReLU(inplace=True)) 73 | if len(module) > 1: 74 | return nn.Sequential(*module) 75 | return conv 76 | 77 | 78 | def make_fc(dim_in, hidden_dim, use_gn=False): 79 | 80 | if use_gn: 81 | fc = nn.Linear(dim_in, hidden_dim, bias=False) 82 | nn.init.kaiming_uniform_(fc.weight, a=1) 83 | return nn.Sequential(fc, group_norm(hidden_dim)) 84 | fc = nn.Linear(dim_in, hidden_dim) 85 | nn.init.kaiming_uniform_(fc.weight, a=1) 86 | nn.init.constant_(fc.bias, 0) 87 | return fc 88 | 89 | 90 | def conv_with_kaiming_uniform(use_gn=False, use_relu=False): 91 | def make_conv( 92 | in_channels, out_channels, kernel_size, stride=1, dilation=1 93 | ): 94 | conv = Conv2d( 95 | in_channels, 96 | out_channels, 97 | kernel_size=kernel_size, 98 | stride=stride, 99 | padding=dilation * (kernel_size - 1) // 2, 100 | dilation=dilation, 101 | bias=False if use_gn else True 102 | ) 103 | nn.init.kaiming_uniform_(conv.weight, a=1) 104 | if not use_gn: 105 | nn.init.constant_(conv.bias, 0) 106 | module = [conv,] 107 | if use_gn: 108 | module.append(group_norm(out_channels)) 109 | if use_relu: 110 | module.append(nn.ReLU(inplace=True)) 111 | if len(module) > 1: 112 | return nn.Sequential(*module) 113 | return conv 114 | 115 | return make_conv 116 | -------------------------------------------------------------------------------- /hetsgg/modeling/rpn/retinanet/loss.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | import torch 4 | from torch.nn import functional as F 5 | 6 | from ..utils import concat_box_prediction_layers 7 | 8 | from hetsgg.layers import smooth_l1_loss 9 | from hetsgg.layers import SigmoidFocalLoss 10 | from hetsgg.modeling.matcher import Matcher 11 | from hetsgg.modeling.utils import cat 12 | from hetsgg.structures.boxlist_ops import boxlist_iou 13 | from hetsgg.structures.boxlist_ops import cat_boxlist 14 | from hetsgg.modeling.rpn.loss import RPNLossComputation 15 | 16 | class RetinaNetLossComputation(RPNLossComputation): 17 | """ 18 | This class computes the RetinaNet loss. 19 | """ 20 | 21 | def __init__(self, proposal_matcher, box_coder, 22 | generate_labels_func, 23 | sigmoid_focal_loss, 24 | bbox_reg_beta=0.11, 25 | regress_norm=1.0): 26 | """ 27 | Arguments: 28 | proposal_matcher (Matcher) 29 | box_coder (BoxCoder) 30 | """ 31 | self.proposal_matcher = proposal_matcher 32 | self.box_coder = box_coder 33 | self.box_cls_loss_func = sigmoid_focal_loss 34 | self.bbox_reg_beta = bbox_reg_beta 35 | self.copied_fields = ['labels'] 36 | self.generate_labels_func = generate_labels_func 37 | self.discard_cases = ['between_thresholds'] 38 | self.regress_norm = regress_norm 39 | 40 | def __call__(self, anchors, box_cls, box_regression, targets): 41 | """ 42 | Arguments: 43 | anchors (list[BoxList]) 44 | box_cls (list[Tensor]) 45 | box_regression (list[Tensor]) 46 | targets (list[BoxList]) 47 | 48 | Returns: 49 | retinanet_cls_loss (Tensor) 50 | retinanet_regression_loss (Tensor 51 | """ 52 | anchors = [cat_boxlist(anchors_per_image) for anchors_per_image in anchors] 53 | labels, regression_targets = self.prepare_targets(anchors, targets) 54 | 55 | N = len(labels) 56 | box_cls, box_regression = \ 57 | concat_box_prediction_layers(box_cls, box_regression) 58 | 59 | labels = torch.cat(labels, dim=0) 60 | regression_targets = torch.cat(regression_targets, dim=0) 61 | pos_inds = torch.nonzero(labels > 0).squeeze(1) 62 | 63 | retinanet_regression_loss = smooth_l1_loss( 64 | box_regression[pos_inds], 65 | regression_targets[pos_inds], 66 | beta=self.bbox_reg_beta, 67 | size_average=False, 68 | ) / (max(1, pos_inds.numel() * self.regress_norm)) 69 | 70 | labels = labels.int() 71 | 72 | retinanet_cls_loss = self.box_cls_loss_func( 73 | box_cls, 74 | labels 75 | ) / (pos_inds.numel() + N) # batch size + pos_num 76 | 77 | return retinanet_cls_loss, retinanet_regression_loss 78 | 79 | 80 | def generate_retinanet_labels(matched_targets): 81 | labels_per_image = matched_targets.get_field("labels") 82 | return labels_per_image 83 | 84 | 85 | def make_retinanet_loss_evaluator(cfg, box_coder): 86 | matcher = Matcher( 87 | cfg.MODEL.RETINANET.FG_IOU_THRESHOLD, 88 | cfg.MODEL.RETINANET.BG_IOU_THRESHOLD, 89 | allow_low_quality_matches=True, 90 | ) 91 | sigmoid_focal_loss = SigmoidFocalLoss( 92 | cfg.MODEL.RETINANET.LOSS_GAMMA, 93 | cfg.MODEL.RETINANET.LOSS_ALPHA 94 | ) 95 | 96 | loss_evaluator = RetinaNetLossComputation( 97 | matcher, 98 | box_coder, 99 | generate_retinanet_labels, 100 | sigmoid_focal_loss, 101 | bbox_reg_beta = cfg.MODEL.RETINANET.BBOX_REG_BETA, 102 | regress_norm = cfg.MODEL.RETINANET.BBOX_REG_WEIGHT, 103 | ) 104 | return loss_evaluator 105 | -------------------------------------------------------------------------------- /tools/detector_pretest_net.py: -------------------------------------------------------------------------------- 1 | 2 | import argparse 3 | import os 4 | 5 | import torch 6 | 7 | from hetsgg.config import cfg 8 | from hetsgg.data import make_data_loader 9 | from hetsgg.engine.inference import inference 10 | from hetsgg.modeling.detector import build_detection_model 11 | from hetsgg.utils.checkpoint import DetectronCheckpointer 12 | from hetsgg.utils.collect_env import collect_env_info 13 | from hetsgg.utils.comm import synchronize, get_rank 14 | from hetsgg.utils.logger import setup_logger 15 | from hetsgg.utils.miscellaneous import mkdir 16 | 17 | try: 18 | from apex import amp 19 | except ImportError: 20 | raise ImportError('Use APEX for mixed precision via apex.amp') 21 | 22 | 23 | def main(): 24 | parser = argparse.ArgumentParser(description="PyTorch Object Detection Inference") 25 | parser.add_argument( 26 | "--config-file", 27 | default="/private/home/fmassa/github/detectron.pytorch_v2/configs/e2e_faster_rcnn_R_50_C4_1x_caffe2.yaml", 28 | metavar="FILE", 29 | help="path to config file", 30 | ) 31 | parser.add_argument("--local_rank", type=int, default=0) 32 | parser.add_argument( 33 | "opts", 34 | help="Modify config options using the command-line", 35 | default=None, 36 | nargs=argparse.REMAINDER, 37 | ) 38 | 39 | args = parser.parse_args() 40 | 41 | num_gpus = int(os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 42 | distributed = num_gpus > 1 43 | 44 | if distributed: 45 | torch.cuda.set_device(args.local_rank) 46 | torch.distributed.init_process_group( 47 | backend="nccl", init_method="env://" 48 | ) 49 | synchronize() 50 | 51 | cfg.merge_from_file(args.config_file) 52 | cfg.merge_from_list(args.opts) 53 | cfg.freeze() 54 | 55 | save_dir = "" 56 | logger = setup_logger("hetsgg", save_dir, get_rank()) 57 | logger.info("Using {} GPUs".format(num_gpus)) 58 | logger.info(cfg) 59 | 60 | 61 | model = build_detection_model(cfg) 62 | model.to(cfg.MODEL.DEVICE) 63 | 64 | output_dir = cfg.OUTPUT_DIR 65 | checkpointer = DetectronCheckpointer(cfg, model, save_dir=output_dir) 66 | _ = checkpointer.load(cfg.MODEL.WEIGHT) 67 | 68 | use_mixed_precision = cfg.DTYPE == 'float16' 69 | amp_handle = amp.init(enabled=use_mixed_precision, verbose=cfg.AMP_VERBOSE) 70 | 71 | 72 | iou_types = ("bbox",) 73 | if cfg.MODEL.MASK_ON: 74 | iou_types = iou_types + ("segm",) 75 | if cfg.MODEL.KEYPOINT_ON: 76 | iou_types = iou_types + ("keypoints",) 77 | if cfg.MODEL.RELATION_ON: 78 | iou_types = iou_types + ("relations", ) 79 | if cfg.MODEL.ATTRIBUTE_ON: 80 | iou_types = iou_types + ("attributes", ) 81 | 82 | output_folders = [None] * len(cfg.DATASETS.TEST) 83 | dataset_names = cfg.DATASETS.TEST 84 | if cfg.OUTPUT_DIR: 85 | for idx, dataset_name in enumerate(dataset_names): 86 | output_folder = os.path.join(cfg.OUTPUT_DIR, "inference", dataset_name) 87 | mkdir(output_folder) 88 | output_folders[idx] = output_folder 89 | data_loaders_val = make_data_loader(cfg, mode='test', is_distributed=distributed) 90 | for output_folder, dataset_name, data_loader_val in zip(output_folders, dataset_names, data_loaders_val): 91 | inference( 92 | cfg, 93 | model, 94 | data_loader_val, 95 | dataset_name=dataset_name, 96 | iou_types=iou_types, 97 | box_only=False if cfg.MODEL.RETINANET_ON else cfg.MODEL.RPN_ONLY, 98 | device=cfg.MODEL.DEVICE, 99 | expected_results=cfg.TEST.EXPECTED_RESULTS, 100 | expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL, 101 | output_folder=output_folder, 102 | ) 103 | synchronize() 104 | 105 | 106 | if __name__ == "__main__": 107 | main() 108 | -------------------------------------------------------------------------------- /hetsgg/data/transforms/transforms.py: -------------------------------------------------------------------------------- 1 | import random 2 | 3 | import torch 4 | import torchvision 5 | from torchvision.transforms import functional as F 6 | 7 | 8 | class Compose(object): 9 | def __init__(self, transforms): 10 | self.transforms = transforms 11 | 12 | def __call__(self, image, target): 13 | for t in self.transforms: 14 | image, target = t(image, target) 15 | return image, target 16 | 17 | def __repr__(self): 18 | format_string = self.__class__.__name__ + "(" 19 | for t in self.transforms: 20 | format_string += "\n" 21 | format_string += " {0}".format(t) 22 | format_string += "\n)" 23 | return format_string 24 | 25 | 26 | class Resize(object): 27 | def __init__(self, min_size, max_size): 28 | if not isinstance(min_size, (list, tuple)): 29 | min_size = (min_size,) 30 | self.min_size = min_size 31 | self.max_size = max_size 32 | 33 | # modified from torchvision to add support for max size 34 | def get_size(self, image_size): 35 | w, h = image_size 36 | size = random.choice(self.min_size) 37 | max_size = self.max_size 38 | if max_size is not None: 39 | min_original_size = float(min((w, h))) 40 | max_original_size = float(max((w, h))) 41 | if max_original_size / min_original_size * size > max_size: 42 | size = int(round(max_size * min_original_size / max_original_size)) 43 | 44 | if (w <= h and w == size) or (h <= w and h == size): 45 | return (h, w) 46 | 47 | if w < h: 48 | ow = size 49 | oh = int(size * h / w) 50 | else: 51 | oh = size 52 | ow = int(size * w / h) 53 | 54 | return (oh, ow) 55 | 56 | def __call__(self, image, target=None): 57 | size = self.get_size(image.size) 58 | image = F.resize(image, size) 59 | if target is None: 60 | return image 61 | target = target.resize(image.size) 62 | return image, target 63 | 64 | 65 | class RandomHorizontalFlip(object): 66 | def __init__(self, prob=0.5): 67 | self.prob = prob 68 | 69 | def __call__(self, image, target): 70 | if random.random() < self.prob: 71 | image = F.hflip(image) 72 | target = target.transpose(0) 73 | return image, target 74 | 75 | class RandomVerticalFlip(object): 76 | def __init__(self, prob=0.5): 77 | self.prob = prob 78 | 79 | def __call__(self, image, target): 80 | if random.random() < self.prob: 81 | image = F.vflip(image) 82 | target = target.transpose(1) 83 | return image, target 84 | 85 | class ColorJitter(object): 86 | def __init__(self, 87 | brightness=None, 88 | contrast=None, 89 | saturation=None, 90 | hue=None, 91 | ): 92 | self.color_jitter = torchvision.transforms.ColorJitter( 93 | brightness=brightness, 94 | contrast=contrast, 95 | saturation=saturation, 96 | hue=hue,) 97 | 98 | def __call__(self, image, target): 99 | image = self.color_jitter(image) 100 | return image, target 101 | 102 | 103 | class ToTensor(object): 104 | def __call__(self, image, target): 105 | return F.to_tensor(image), target 106 | 107 | 108 | class Normalize(object): 109 | def __init__(self, mean, std, to_bgr255=True): 110 | self.mean = mean 111 | self.std = std 112 | self.to_bgr255 = to_bgr255 113 | 114 | def __call__(self, image, target=None): 115 | if self.to_bgr255: 116 | image = image[[2, 1, 0]] * 255 117 | image = F.normalize(image, mean=self.mean, std=self.std) 118 | if target is None: 119 | return image 120 | return image, target 121 | -------------------------------------------------------------------------------- /hetsgg/modeling/backbone/fpn.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | from torch import nn 4 | 5 | 6 | class FPN(nn.Module): 7 | """ 8 | Module that adds FPN on top of a list of feature maps. 9 | The feature maps are currently supposed to be in increasing depth 10 | order, and must be consecutive 11 | """ 12 | 13 | def __init__( 14 | self, in_channels_list, out_channels, conv_block, top_blocks=None 15 | ): 16 | """ 17 | Arguments: 18 | in_channels_list (list[int]): number of channels for each feature map that 19 | will be fed 20 | out_channels (int): number of channels of the FPN representation 21 | top_blocks (nn.Module or None): if provided, an extra operation will 22 | be performed on the output of the last (smallest resolution) 23 | FPN output, and the result will extend the result list 24 | """ 25 | super(FPN, self).__init__() 26 | self.inner_blocks = [] 27 | self.layer_blocks = [] 28 | for idx, in_channels in enumerate(in_channels_list, 1): 29 | inner_block = "fpn_inner{}".format(idx) 30 | layer_block = "fpn_layer{}".format(idx) 31 | 32 | if in_channels == 0: 33 | continue 34 | inner_block_module = conv_block(in_channels, out_channels, 1) 35 | layer_block_module = conv_block(out_channels, out_channels, 3, 1) 36 | self.add_module(inner_block, inner_block_module) 37 | self.add_module(layer_block, layer_block_module) 38 | self.inner_blocks.append(inner_block) 39 | self.layer_blocks.append(layer_block) 40 | self.top_blocks = top_blocks 41 | 42 | def forward(self, x): 43 | """ 44 | Arguments: 45 | x (list[Tensor]): feature maps for each feature level. 46 | Returns: 47 | results (tuple[Tensor]): feature maps after FPN layers. 48 | They are ordered from highest resolution first. 49 | """ 50 | last_inner = getattr(self, self.inner_blocks[-1])(x[-1]) 51 | results = [] 52 | results.append(getattr(self, self.layer_blocks[-1])(last_inner)) 53 | for feature, inner_block, layer_block in zip( 54 | x[:-1][::-1], self.inner_blocks[:-1][::-1], self.layer_blocks[:-1][::-1] 55 | ): 56 | if not inner_block: 57 | continue 58 | inner_top_down = F.interpolate(last_inner, scale_factor=2, mode="nearest") 59 | inner_lateral = getattr(self, inner_block)(feature) 60 | 61 | last_inner = inner_lateral + inner_top_down 62 | results.insert(0, getattr(self, layer_block)(last_inner)) 63 | 64 | if isinstance(self.top_blocks, LastLevelP6P7): 65 | last_results = self.top_blocks(x[-1], results[-1]) 66 | results.extend(last_results) 67 | elif isinstance(self.top_blocks, LastLevelMaxPool): 68 | last_results = self.top_blocks(results[-1]) 69 | results.extend(last_results) 70 | 71 | return tuple(results) 72 | 73 | 74 | class LastLevelMaxPool(nn.Module): 75 | def forward(self, x): 76 | return [F.max_pool2d(x, 1, 2, 0)] 77 | 78 | 79 | class LastLevelP6P7(nn.Module): 80 | """ 81 | This module is used in RetinaNet to generate extra layers, P6 and P7. 82 | """ 83 | def __init__(self, in_channels, out_channels): 84 | super(LastLevelP6P7, self).__init__() 85 | self.p6 = nn.Conv2d(in_channels, out_channels, 3, 2, 1) 86 | self.p7 = nn.Conv2d(out_channels, out_channels, 3, 2, 1) 87 | for module in [self.p6, self.p7]: 88 | nn.init.kaiming_uniform_(module.weight, a=1) 89 | nn.init.constant_(module.bias, 0) 90 | self.use_P5 = in_channels == out_channels 91 | 92 | def forward(self, c5, p5): 93 | x = p5 if self.use_P5 else c5 94 | p6 = self.p6(x) 95 | p7 = self.p7(F.relu(p6)) 96 | return [p6, p7] 97 | -------------------------------------------------------------------------------- /hetsgg/data/datasets/coco.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torchvision 3 | 4 | from hetsgg.structures.bounding_box import BoxList 5 | from hetsgg.structures.segmentation_mask import SegmentationMask 6 | from hetsgg.structures.keypoint import PersonKeypoints 7 | 8 | 9 | min_keypoints_per_image = 10 10 | 11 | 12 | def _count_visible_keypoints(anno): 13 | return sum(sum(1 for v in ann["keypoints"][2::3] if v > 0) for ann in anno) 14 | 15 | 16 | def _has_only_empty_bbox(anno): 17 | return all(any(o <= 1 for o in obj["bbox"][2:]) for obj in anno) 18 | 19 | 20 | def has_valid_annotation(anno): 21 | # if it's empty, there is no annotation 22 | if len(anno) == 0: 23 | return False 24 | # if all boxes have close to zero area, there is no annotation 25 | if _has_only_empty_bbox(anno): 26 | return False 27 | # keypoints task have a slight different critera for considering 28 | # if an annotation is valid 29 | if "keypoints" not in anno[0]: 30 | return True 31 | # for keypoint detection tasks, only consider valid images those 32 | # containing at least min_keypoints_per_image 33 | if _count_visible_keypoints(anno) >= min_keypoints_per_image: 34 | return True 35 | return False 36 | 37 | 38 | class COCODataset(torchvision.datasets.coco.CocoDetection): 39 | def __init__( 40 | self, ann_file, root, remove_images_without_annotations, transforms=None 41 | ): 42 | super(COCODataset, self).__init__(root, ann_file) 43 | # sort indices for reproducible results 44 | self.ids = sorted(self.ids) 45 | 46 | # filter images without detection annotations 47 | if remove_images_without_annotations: 48 | ids = [] 49 | for img_id in self.ids: 50 | ann_ids = self.coco.getAnnIds(imgIds=img_id, iscrowd=None) 51 | anno = self.coco.loadAnns(ann_ids) 52 | if has_valid_annotation(anno): 53 | ids.append(img_id) 54 | self.ids = ids 55 | 56 | self.categories = {cat['id']: cat['name'] for cat in self.coco.cats.values()} 57 | 58 | self.json_category_id_to_contiguous_id = { 59 | v: i + 1 for i, v in enumerate(self.coco.getCatIds()) 60 | } 61 | self.contiguous_category_id_to_json_id = { 62 | v: k for k, v in self.json_category_id_to_contiguous_id.items() 63 | } 64 | self.id_to_img_map = {k: v for k, v in enumerate(self.ids)} 65 | self._transforms = transforms 66 | 67 | def __getitem__(self, idx): 68 | img, anno = super(COCODataset, self).__getitem__(idx) 69 | 70 | # filter crowd annotations 71 | # TODO might be better to add an extra field 72 | anno = [obj for obj in anno if obj["iscrowd"] == 0] 73 | 74 | boxes = [obj["bbox"] for obj in anno] 75 | boxes = torch.as_tensor(boxes).reshape(-1, 4) # guard against no boxes 76 | target = BoxList(boxes, img.size, mode="xywh").convert("xyxy") 77 | 78 | classes = [obj["category_id"] for obj in anno] 79 | classes = [self.json_category_id_to_contiguous_id[c] for c in classes] 80 | classes = torch.tensor(classes) 81 | target.add_field("labels", classes) 82 | 83 | if anno and "segmentation" in anno[0]: 84 | masks = [obj["segmentation"] for obj in anno] 85 | masks = SegmentationMask(masks, img.size, mode='poly') 86 | target.add_field("masks", masks) 87 | 88 | if anno and "keypoints" in anno[0]: 89 | keypoints = [obj["keypoints"] for obj in anno] 90 | keypoints = PersonKeypoints(keypoints, img.size) 91 | target.add_field("keypoints", keypoints) 92 | 93 | target = target.clip_to_image(remove_empty=True) 94 | 95 | if self._transforms is not None: 96 | img, target = self._transforms(img, target) 97 | 98 | return img, target, idx 99 | 100 | def get_img_info(self, index): 101 | img_id = self.id_to_img_map[index] 102 | img_data = self.coco.imgs[img_id] 103 | return img_data 104 | -------------------------------------------------------------------------------- /hetsgg/utils/logger.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | import sys 4 | import shutil 5 | 6 | import ipdb 7 | from tensorboardX import SummaryWriter 8 | from termcolor import colored 9 | 10 | from hetsgg.utils.comm import get_rank 11 | from hetsgg.utils.metric_logger import SmoothedValue 12 | 13 | DEBUG_PRINT_ON = True 14 | 15 | 16 | TFBoardHandler_LEVEL = 4 17 | 18 | 19 | 20 | class _ColorfulFormatter(logging.Formatter): 21 | def __init__(self, *args, **kwargs): 22 | self._root_name = kwargs.pop("root_name") + "." 23 | self._abbrev_name = kwargs.pop("abbrev_name", "") 24 | if len(self._abbrev_name): 25 | self._abbrev_name = self._abbrev_name + "." 26 | super(_ColorfulFormatter, self).__init__(*args, **kwargs) 27 | 28 | def formatMessage(self, record): 29 | record.name = record.name.replace(self._root_name, self._abbrev_name) 30 | log = super(_ColorfulFormatter, self).formatMessage(record) 31 | if record.levelno == logging.WARNING: 32 | prefix = colored("WARNING", "red", attrs=["blink"]) 33 | elif record.levelno == logging.ERROR or record.levelno == logging.CRITICAL: 34 | prefix = colored("ERROR", "red", attrs=["blink", "underline"]) 35 | else: 36 | return log 37 | return prefix + " " + log 38 | 39 | 40 | 41 | def debug_print(logger, info): 42 | if DEBUG_PRINT_ON: 43 | logger.info('#'*20+' '+info+' '+'#'*20) 44 | 45 | def setup_logger(name, save_dir, distributed_rank, filename="log.txt"): 46 | logger = logging.getLogger(name) 47 | 48 | for each in logger.handlers: 49 | logger.removeHandler(each) 50 | 51 | logger.setLevel(TFBoardHandler_LEVEL) 52 | if distributed_rank > 0: 53 | return logger 54 | 55 | ch = logging.StreamHandler(stream=sys.stdout) 56 | ch.setLevel(logging.DEBUG) 57 | formatter = _ColorfulFormatter( 58 | colored("[%(asctime)s %(name)s]: ", "green") + "%(message)s", 59 | datefmt="%m/%d %H:%M:%S", 60 | root_name=name, 61 | ) 62 | ch.setFormatter(formatter) 63 | logger.addHandler(ch) 64 | 65 | 66 | if save_dir: 67 | 68 | tf = TFBoardHandler(TFBoardWriter(save_dir)) 69 | tf.setLevel(TFBoardHandler_LEVEL) 70 | logger.addHandler(tf) 71 | 72 | fh = logging.FileHandler(os.path.join(save_dir, filename)) 73 | fh.setLevel(logging.DEBUG) 74 | fh.setFormatter(formatter) 75 | logger.addHandler(fh) 76 | 77 | return logger 78 | 79 | 80 | 81 | class TFBoardWriter: 82 | def __init__(self, log_dir): 83 | if log_dir and get_rank() == 0: 84 | tfbd_dir = os.path.join(log_dir, 'tfboard') 85 | if os.path.exists(tfbd_dir): 86 | shutil.rmtree(tfbd_dir) 87 | os.makedirs(tfbd_dir) 88 | 89 | self.tf_writer = SummaryWriter(log_dir=tfbd_dir, flush_secs=10) 90 | self.enable = True 91 | else: 92 | self.enable = False 93 | self.tf_writer = None 94 | 95 | def write_data(self, meter, iter): 96 | if isinstance(iter, str): 97 | model = meter[0] 98 | input = meter[1] 99 | 100 | self.tf_writer.add_graph(model, input) 101 | else: 102 | for each in meter.keys(): 103 | val = meter[each] 104 | if isinstance(val, SmoothedValue): 105 | val = val.avg 106 | self.tf_writer.add_scalar(each, val, iter) 107 | 108 | def close(self): 109 | if self.tf_writer is not None: 110 | self.tf_writer.close() 111 | 112 | 113 | class TFBoardHandler(logging.Handler): 114 | def __init__(self, writer): 115 | logging.Handler.__init__(self, TFBoardHandler_LEVEL) 116 | self.tf_writer = writer 117 | 118 | def emit(self, record): 119 | if record.levelno <= TFBoardHandler_LEVEL: 120 | self.tf_writer.write_data(record.msg[0], record.msg[1]) 121 | return 122 | 123 | def close(self): 124 | self.tf_writer.close() -------------------------------------------------------------------------------- /tools/relation_test_net.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import torch 4 | from hetsgg.config import cfg 5 | from hetsgg.data import make_data_loader 6 | from hetsgg.engine.inference import inference 7 | from hetsgg.modeling.detector import build_detection_model 8 | from hetsgg.utils.checkpoint import DetectronCheckpointer 9 | from hetsgg.utils.collect_env import collect_env_info 10 | from hetsgg.utils.comm import synchronize, get_rank 11 | from hetsgg.utils.logger import setup_logger 12 | from hetsgg.utils.miscellaneous import mkdir 13 | 14 | try: 15 | from apex import amp 16 | except ImportError: 17 | raise ImportError('Use APEX for mixed precision via apex.amp') 18 | 19 | torch.set_num_threads(2) 20 | 21 | def main(): 22 | placeholder = None 23 | parser = argparse.ArgumentParser(description="PyTorch Object Detection Inference") 24 | parser.add_argument( 25 | "--config-file", 26 | default="checkpoints/config.yml", 27 | metavar="FILE", 28 | help="path to config file", 29 | ) 30 | parser.add_argument("--local_rank", type=int, default=0) 31 | parser.add_argument( 32 | "opts", 33 | help="Modify config options using the command-line", 34 | default=None, 35 | nargs=argparse.REMAINDER, 36 | ) 37 | 38 | args = parser.parse_args() 39 | 40 | num_gpus = int(os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 41 | distributed = num_gpus > 1 42 | 43 | if distributed: 44 | torch.cuda.set_device(args.local_rank) 45 | torch.distributed.init_process_group( 46 | backend="nccl", init_method="env://" 47 | ) 48 | synchronize() 49 | cfg.set_new_allowed(True) 50 | cfg.merge_from_file(args.config_file) 51 | cfg.merge_from_list(args.opts) 52 | cfg.set_new_allowed(True) 53 | 54 | 55 | save_dir = "" 56 | logger = setup_logger("hetsgg", save_dir, get_rank()) 57 | logger.info("Using {} GPUs".format(num_gpus)) 58 | 59 | logger.info("Collecting env info (might take some time)") 60 | 61 | model = build_detection_model(cfg) 62 | model.to(cfg.MODEL.DEVICE) 63 | 64 | 65 | use_mixed_precision = cfg.DTYPE == 'float16' 66 | amp_handle = amp.init(enabled=use_mixed_precision, verbose=cfg.AMP_VERBOSE) 67 | 68 | output_dir = cfg.OUTPUT_DIR 69 | checkpointer = DetectronCheckpointer(cfg, model, save_dir=output_dir) 70 | _ = checkpointer.load(cfg.MODEL.WEIGHT) 71 | 72 | 73 | if placeholder is not None: 74 | placeholder = placeholder.to("cpu") 75 | del placeholder 76 | torch.cuda.synchronize(get_rank()) 77 | synchronize() 78 | 79 | iou_types = ("bbox",) 80 | if cfg.MODEL.MASK_ON: 81 | iou_types = iou_types + ("segm",) 82 | if cfg.MODEL.KEYPOINT_ON: 83 | iou_types = iou_types + ("keypoints",) 84 | if cfg.MODEL.RELATION_ON: 85 | iou_types = iou_types + ("relations", ) 86 | if cfg.MODEL.ATTRIBUTE_ON: 87 | iou_types = iou_types + ("attributes", ) 88 | output_folders = [None] * len(cfg.DATASETS.TEST) 89 | dataset_names = cfg.DATASETS.TEST 90 | 91 | if cfg.OUTPUT_DIR: 92 | for idx, dataset_name in enumerate(dataset_names): 93 | output_folder = os.path.join(cfg.OUTPUT_DIR, f"inference_refine_{cfg.MODEL.ROI_RELATION_HEAD.OBJECT_CLASSIFICATION_REFINE}", dataset_name) 94 | mkdir(output_folder) 95 | output_folders[idx] = output_folder 96 | data_loaders_val = make_data_loader(cfg, mode="test", is_distributed=distributed) 97 | for output_folder, dataset_name, data_loader_val in zip(output_folders, dataset_names, data_loaders_val): 98 | inference( 99 | cfg, 100 | model, 101 | data_loader_val, 102 | dataset_name=dataset_name, 103 | iou_types=iou_types, 104 | box_only=False if cfg.MODEL.RETINANET_ON else cfg.MODEL.RPN_ONLY, 105 | device=cfg.MODEL.DEVICE, 106 | expected_results=cfg.TEST.EXPECTED_RESULTS, 107 | expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL, 108 | output_folder=output_folder, 109 | ) 110 | synchronize() 111 | 112 | 113 | if __name__ == "__main__": 114 | main() 115 | -------------------------------------------------------------------------------- /hetsgg/modeling/roi_heads/attribute_head/loss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.nn import functional as F 3 | 4 | from hetsgg.layers import smooth_l1_loss 5 | from hetsgg.modeling.box_coder import BoxCoder 6 | from hetsgg.modeling.matcher import Matcher 7 | from hetsgg.structures.boxlist_ops import boxlist_iou 8 | from hetsgg.modeling.utils import cat 9 | 10 | 11 | class AttributeHeadLossComputation(object): 12 | """ 13 | Computes the loss for attribute head 14 | """ 15 | 16 | def __init__( 17 | self, 18 | loss_weight=0.1, 19 | num_attri_cat=201, 20 | max_num_attri=10, 21 | attribute_sampling=True, 22 | attribute_bgfg_ratio=5, 23 | use_binary_loss=True, 24 | pos_weight=1, 25 | ): 26 | self.loss_weight = loss_weight 27 | self.num_attri_cat = num_attri_cat 28 | self.max_num_attri = max_num_attri 29 | self.attribute_sampling = attribute_sampling 30 | self.attribute_bgfg_ratio = attribute_bgfg_ratio 31 | self.use_binary_loss = use_binary_loss 32 | self.pos_weight = pos_weight 33 | 34 | def __call__(self, proposals, attri_logits): 35 | """ 36 | Calculcate attribute loss 37 | """ 38 | attributes = cat([proposal.get_field("attributes") for proposal in proposals], dim=0) 39 | assert attributes.shape[0] == attri_logits.shape[0] 40 | 41 | # generate attribute targets 42 | attribute_targets, selected_idxs = self.generate_attributes_target(attributes) 43 | 44 | attri_logits = attri_logits[selected_idxs] 45 | attribute_targets = attribute_targets[selected_idxs] 46 | 47 | attribute_loss = self.attribute_loss(attri_logits, attribute_targets) 48 | 49 | return attribute_loss * self.loss_weight 50 | 51 | 52 | def generate_attributes_target(self, attributes): 53 | """ 54 | from list of attribute indexs to [1,0,1,0,0,1] form 55 | """ 56 | assert self.max_num_attri == attributes.shape[1] 57 | num_obj = attributes.shape[0] 58 | 59 | with_attri_idx = (attributes.sum(-1) > 0).long() 60 | without_attri_idx = 1 - with_attri_idx 61 | num_pos = int(with_attri_idx.sum()) 62 | num_neg = int(without_attri_idx.sum()) 63 | assert num_pos + num_neg == num_obj 64 | 65 | if self.attribute_sampling: 66 | num_neg = min(num_neg, num_pos * self.attribute_bgfg_ratio) if num_pos > 0 else 1 67 | 68 | attribute_targets = torch.zeros((num_obj, self.num_attri_cat), device=attributes.device).float() 69 | if not self.use_binary_loss: 70 | attribute_targets[without_attri_idx > 0, 0] = 1.0 71 | 72 | pos_idxs = torch.nonzero(with_attri_idx).squeeze(1) 73 | perm = torch.randperm(num_obj - num_pos, device=attributes.device)[:num_neg] 74 | neg_idxs = torch.nonzero(without_attri_idx).squeeze(1)[perm] 75 | selected_idxs = torch.cat((pos_idxs, neg_idxs), dim=0) 76 | assert selected_idxs.shape[0] == num_neg + num_pos 77 | 78 | for idx in torch.nonzero(with_attri_idx).squeeze(1).tolist(): 79 | for k in range(self.max_num_attri): 80 | att_id = int(attributes[idx, k]) 81 | if att_id == 0: 82 | break 83 | else: 84 | attribute_targets[idx, att_id] = 1 85 | 86 | return attribute_targets, selected_idxs 87 | 88 | def attribute_loss(self, logits, labels): 89 | if self.use_binary_loss: 90 | all_loss = F.binary_cross_entropy_with_logits(logits, labels, pos_weight=torch.FloatTensor([self.pos_weight] * self.num_attri_cat).cuda()) 91 | return all_loss 92 | else: 93 | all_loss = -F.softmax(logits, dim=-1).log() 94 | all_loss = (all_loss * labels).sum(-1) / labels.sum(-1) 95 | return all_loss.mean() 96 | 97 | 98 | def make_roi_attribute_loss_evaluator(cfg): 99 | loss_evaluator = AttributeHeadLossComputation( 100 | cfg.MODEL.ROI_ATTRIBUTE_HEAD.ATTRIBUTE_LOSS_WEIGHT, 101 | cfg.MODEL.ROI_ATTRIBUTE_HEAD.NUM_ATTRIBUTES, 102 | cfg.MODEL.ROI_ATTRIBUTE_HEAD.MAX_ATTRIBUTES, 103 | cfg.MODEL.ROI_ATTRIBUTE_HEAD.ATTRIBUTE_BGFG_SAMPLE, 104 | cfg.MODEL.ROI_ATTRIBUTE_HEAD.ATTRIBUTE_BGFG_RATIO, 105 | cfg.MODEL.ROI_ATTRIBUTE_HEAD.USE_BINARY_LOSS, 106 | cfg.MODEL.ROI_ATTRIBUTE_HEAD.POS_WEIGHT, 107 | ) 108 | 109 | return loss_evaluator 110 | -------------------------------------------------------------------------------- /hetsgg/modeling/roi_heads/keypoint_head/inference.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | 4 | 5 | class KeypointPostProcessor(nn.Module): 6 | def __init__(self, keypointer=None): 7 | super(KeypointPostProcessor, self).__init__() 8 | self.keypointer = keypointer 9 | 10 | def forward(self, x, boxes): 11 | mask_prob = x 12 | 13 | scores = None 14 | if self.keypointer: 15 | mask_prob, scores = self.keypointer(x, boxes) 16 | 17 | assert len(boxes) == 1, "Only non-batched inference supported for now" 18 | boxes_per_image = [box.bbox.size(0) for box in boxes] 19 | mask_prob = mask_prob.split(boxes_per_image, dim=0) 20 | scores = scores.split(boxes_per_image, dim=0) 21 | 22 | results = [] 23 | for prob, box, score in zip(mask_prob, boxes, scores): 24 | bbox = BoxList(box.bbox, box.size, mode="xyxy") 25 | for field in box.fields(): 26 | bbox.add_field(field, box.get_field(field)) 27 | prob = PersonKeypoints(prob, box.size) 28 | prob.add_field("logits", score) 29 | bbox.add_field("keypoints", prob) 30 | results.append(bbox) 31 | 32 | return results 33 | 34 | 35 | import numpy as np 36 | import cv2 37 | 38 | 39 | def heatmaps_to_keypoints(maps, rois): 40 | """Extract predicted keypoint locations from heatmaps. Output has shape 41 | (#rois, 4, #keypoints) with the 4 rows corresponding to (x, y, logit, prob) 42 | for each keypoint. 43 | """ 44 | 45 | offset_x = rois[:, 0] 46 | offset_y = rois[:, 1] 47 | 48 | widths = rois[:, 2] - rois[:, 0] 49 | heights = rois[:, 3] - rois[:, 1] 50 | widths = np.maximum(widths, 1) 51 | heights = np.maximum(heights, 1) 52 | widths_ceil = np.ceil(widths) 53 | heights_ceil = np.ceil(heights) 54 | 55 | maps = np.transpose(maps, [0, 2, 3, 1]) 56 | min_size = 0 # cfg.KRCNN.INFERENCE_MIN_SIZE 57 | num_keypoints = maps.shape[3] 58 | xy_preds = np.zeros((len(rois), 3, num_keypoints), dtype=np.float32) 59 | end_scores = np.zeros((len(rois), num_keypoints), dtype=np.float32) 60 | for i in range(len(rois)): 61 | if min_size > 0: 62 | roi_map_width = int(np.maximum(widths_ceil[i], min_size)) 63 | roi_map_height = int(np.maximum(heights_ceil[i], min_size)) 64 | else: 65 | roi_map_width = widths_ceil[i] 66 | roi_map_height = heights_ceil[i] 67 | width_correction = widths[i] / roi_map_width 68 | height_correction = heights[i] / roi_map_height 69 | roi_map = cv2.resize( 70 | maps[i], (roi_map_width, roi_map_height), interpolation=cv2.INTER_CUBIC 71 | ) 72 | # Bring back to CHW 73 | roi_map = np.transpose(roi_map, [2, 0, 1]) 74 | # roi_map_probs = scores_to_probs(roi_map.copy()) 75 | w = roi_map.shape[2] 76 | pos = roi_map.reshape(num_keypoints, -1).argmax(axis=1) 77 | x_int = pos % w 78 | y_int = (pos - x_int) // w 79 | # assert (roi_map_probs[k, y_int, x_int] == 80 | # roi_map_probs[k, :, :].max()) 81 | x = (x_int + 0.5) * width_correction 82 | y = (y_int + 0.5) * height_correction 83 | xy_preds[i, 0, :] = x + offset_x[i] 84 | xy_preds[i, 1, :] = y + offset_y[i] 85 | xy_preds[i, 2, :] = 1 86 | end_scores[i, :] = roi_map[np.arange(num_keypoints), y_int, x_int] 87 | 88 | return np.transpose(xy_preds, [0, 2, 1]), end_scores 89 | 90 | 91 | from hetsgg.structures.bounding_box import BoxList 92 | from hetsgg.structures.keypoint import PersonKeypoints 93 | 94 | 95 | class Keypointer(object): 96 | """ 97 | Projects a set of masks in an image on the locations 98 | specified by the bounding boxes 99 | """ 100 | 101 | def __init__(self, padding=0): 102 | self.padding = padding 103 | 104 | def __call__(self, masks, boxes): 105 | # TODO do this properly 106 | if isinstance(boxes, BoxList): 107 | boxes = [boxes] 108 | assert len(boxes) == 1 109 | 110 | result, scores = heatmaps_to_keypoints( 111 | masks.detach().cpu().numpy(), boxes[0].bbox.cpu().numpy() 112 | ) 113 | return torch.from_numpy(result).to(masks.device), torch.as_tensor(scores, device=masks.device) 114 | 115 | 116 | def make_roi_keypoint_post_processor(cfg): 117 | keypointer = Keypointer() 118 | keypoint_post_processor = KeypointPostProcessor(keypointer) 119 | return keypoint_post_processor 120 | -------------------------------------------------------------------------------- /hetsgg/data/datasets/voc.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import torch 4 | import torch.utils.data 5 | from PIL import Image 6 | import sys 7 | 8 | if sys.version_info[0] == 2: 9 | import xml.etree.cElementTree as ET 10 | else: 11 | import xml.etree.ElementTree as ET 12 | 13 | 14 | from hetsgg.structures.bounding_box import BoxList 15 | 16 | 17 | class PascalVOCDataset(torch.utils.data.Dataset): 18 | 19 | CLASSES = ( 20 | "__background__ ", 21 | "aeroplane", 22 | "bicycle", 23 | "bird", 24 | "boat", 25 | "bottle", 26 | "bus", 27 | "car", 28 | "cat", 29 | "chair", 30 | "cow", 31 | "diningtable", 32 | "dog", 33 | "horse", 34 | "motorbike", 35 | "person", 36 | "pottedplant", 37 | "sheep", 38 | "sofa", 39 | "train", 40 | "tvmonitor", 41 | ) 42 | 43 | def __init__(self, data_dir, split, use_difficult=False, transforms=None): 44 | self.root = data_dir 45 | self.image_set = split 46 | self.keep_difficult = use_difficult 47 | self.transforms = transforms 48 | 49 | self._annopath = os.path.join(self.root, "Annotations", "%s.xml") 50 | self._imgpath = os.path.join(self.root, "JPEGImages", "%s.jpg") 51 | self._imgsetpath = os.path.join(self.root, "ImageSets", "Main", "%s.txt") 52 | 53 | with open(self._imgsetpath % self.image_set) as f: 54 | self.ids = f.readlines() 55 | self.ids = [x.strip("\n") for x in self.ids] 56 | self.id_to_img_map = {k: v for k, v in enumerate(self.ids)} 57 | 58 | cls = PascalVOCDataset.CLASSES 59 | self.class_to_ind = dict(zip(cls, range(len(cls)))) 60 | self.categories = dict(zip(range(len(cls)), cls)) 61 | 62 | def __getitem__(self, index): 63 | img_id = self.ids[index] 64 | img = Image.open(self._imgpath % img_id).convert("RGB") 65 | 66 | target = self.get_groundtruth(index) 67 | target = target.clip_to_image(remove_empty=True) 68 | 69 | if self.transforms is not None: 70 | img, target = self.transforms(img, target) 71 | 72 | return img, target, index 73 | 74 | def __len__(self): 75 | return len(self.ids) 76 | 77 | def get_groundtruth(self, index): 78 | img_id = self.ids[index] 79 | anno = ET.parse(self._annopath % img_id).getroot() 80 | anno = self._preprocess_annotation(anno) 81 | 82 | height, width = anno["im_info"] 83 | target = BoxList(anno["boxes"], (width, height), mode="xyxy") 84 | target.add_field("labels", anno["labels"]) 85 | target.add_field("difficult", anno["difficult"]) 86 | return target 87 | 88 | def _preprocess_annotation(self, target): 89 | boxes = [] 90 | gt_classes = [] 91 | difficult_boxes = [] 92 | TO_REMOVE = 1 93 | 94 | for obj in target.iter("object"): 95 | difficult = int(obj.find("difficult").text) == 1 96 | if not self.keep_difficult and difficult: 97 | continue 98 | name = obj.find("name").text.lower().strip() 99 | bb = obj.find("bndbox") 100 | box = [ 101 | bb.find("xmin").text, 102 | bb.find("ymin").text, 103 | bb.find("xmax").text, 104 | bb.find("ymax").text, 105 | ] 106 | bndbox = tuple( 107 | map(lambda x: x - TO_REMOVE, list(map(int, box))) 108 | ) 109 | 110 | boxes.append(bndbox) 111 | gt_classes.append(self.class_to_ind[name]) 112 | difficult_boxes.append(difficult) 113 | 114 | size = target.find("size") 115 | im_info = tuple(map(int, (size.find("height").text, size.find("width").text))) 116 | 117 | res = { 118 | "boxes": torch.tensor(boxes, dtype=torch.float32), 119 | "labels": torch.tensor(gt_classes), 120 | "difficult": torch.tensor(difficult_boxes), 121 | "im_info": im_info, 122 | } 123 | return res 124 | 125 | def get_img_info(self, index): 126 | img_id = self.ids[index] 127 | anno = ET.parse(self._annopath % img_id).getroot() 128 | size = anno.find("size") 129 | im_info = tuple(map(int, (size.find("height").text, size.find("width").text))) 130 | return {"height": im_info[0], "width": im_info[1]} 131 | 132 | def map_class_id_to_class_name(self, class_id): 133 | return PascalVOCDataset.CLASSES[class_id] 134 | -------------------------------------------------------------------------------- /Datasets/OI-V4/Category_Type_Info.json: -------------------------------------------------------------------------------- 1 | {"class_to_category": {"Piano": "product", "Boy": "human", "Tennis ball": "product", "Van": "product", "Football": "product", "Beer": "product", "Camera": "product", "Suitcase": "product", "Man": "human", "Bench": "product", "Dolphin": "animal", "Motorcycle": "product", "Mug": "product", "Tennis racket": "product", "Drum": "product", "Spoon": "product", "Horse": "animal", "Surfboard": "product", "Bicycle": "product", "Knife": "product", "Rugby ball": "product", "Woman": "human", "Handbag": "product", "Microwave oven": "product", "Flute": "product", "Girl": "human", "Taxi": "product", "Hamster": "animal", "Wine glass": "product", "Backpack": "product", "Racket": "product", "Table": "product", "Pretzel": "product", "Bed": "product", "Snowboard": "product", "Car": "product", "Chair": "product", "Microphone": "product", "Coffee cup": "product", "Table tennis racket": "product", "Bottle": "product", "Guitar": "product", "Desk": "product", "Ski": "product", "Coffee table": "product", "Dog": "animal", "Cat": "animal", "Chopsticks": "product", "Elephant": "animal", "Mobile phone": "product", "Monkey": "animal", "Snake": "animal", "Sofa bed": "product", "Violin": "product", "Fork": "product", "Oven": "product", "Briefcase": "product"}, "category_idx": {"product": 0, "animal": 1, "human": 2}, "label_to_catidx": {"Piano": 0, "Boy": 2, "Tennis ball": 0, "Van": 0, "Football": 0, "Beer": 0, "Camera": 0, "Suitcase": 0, "Man": 2, "Bench": 0, "Dolphin": 1, "Motorcycle": 0, "Mug": 0, "Tennis racket": 0, "Drum": 0, "Spoon": 0, "Horse": 1, "Surfboard": 0, "Bicycle": 0, "Knife": 0, "Rugby ball": 0, "Woman": 2, "Handbag": 0, "Microwave oven": 0, "Flute": 0, "Girl": 2, "Taxi": 0, "Hamster": 1, "Wine glass": 0, "Backpack": 0, "Racket": 0, "Table": 0, "Pretzel": 0, "Bed": 0, "Snowboard": 0, "Car": 0, "Chair": 0, "Microphone": 0, "Coffee cup": 0, "Table tennis racket": 0, "Bottle": 0, "Guitar": 0, "Desk": 0, "Ski": 0, "Coffee table": 0, "Dog": 1, "Cat": 1, "Chopsticks": 0, "Elephant": 1, "Mobile phone": 0, "Monkey": 1, "Snake": 1, "Sofa bed": 0, "Violin": 0, "Fork": 0, "Oven": 0, "Briefcase": 0, "__background__": 0}, "labelidx_to_catidx": {"1": 0, "2": 2, "3": 0, "4": 0, "5": 0, "6": 0, "7": 0, "8": 0, "9": 2, "10": 0, "11": 1, "12": 0, "13": 0, "14": 0, "15": 0, "16": 0, "17": 1, "18": 0, "19": 0, "20": 0, "21": 0, "22": 2, "23": 0, "24": 0, "25": 0, "26": 2, "27": 0, "28": 1, "29": 0, "30": 0, "31": 0, "32": 0, "33": 0, "34": 0, "35": 0, "36": 0, "37": 0, "38": 0, "39": 0, "40": 0, "41": 0, "42": 0, "43": 0, "44": 0, "45": 0, "46": 1, "47": 1, "48": 0, "49": 1, "50": 0, "51": 1, "52": 1, "53": 0, "54": 0, "55": 0, "56": 0, "57": 0, "0": 0}, "idx_to_label": {"1": "Piano", "2": "Boy", "3": "Tennis ball", "4": "Van", "5": "Football", "6": "Beer", "7": "Camera", "8": "Suitcase", "9": "Man", "10": "Bench", "11": "Dolphin", "12": "Motorcycle", "13": "Mug", "14": "Tennis racket", "15": "Drum", "16": "Spoon", "17": "Horse", "18": "Surfboard", "19": "Bicycle", "20": "Knife", "21": "Rugby ball", "22": "Woman", "23": "Handbag", "24": "Microwave oven", "25": "Flute", "26": "Girl", "27": "Taxi", "28": "Hamster", "29": "Wine glass", "30": "Backpack", "31": "Racket", "32": "Table", "33": "Pretzel", "34": "Bed", "35": "Snowboard", "36": "Car", "37": "Chair", "38": "Microphone", "39": "Coffee cup", "40": "Table tennis racket", "41": "Bottle", "42": "Guitar", "43": "Desk", "44": "Ski", "45": "Coffee table", "46": "Dog", "47": "Cat", "48": "Chopsticks", "49": "Elephant", "50": "Mobile phone", "51": "Monkey", "52": "Snake", "53": "Sofa bed", "54": "Violin", "55": "Fork", "56": "Oven", "57": "Briefcase", "0": "__background__"}, "label_to_idx": {"Piano": 1, "Boy": 2, "Tennis ball": 3, "Van": 4, "Football": 5, "Beer": 6, "Camera": 7, "Suitcase": 8, "Man": 9, "Bench": 10, "Dolphin": 11, "Motorcycle": 12, "Mug": 13, "Tennis racket": 14, "Drum": 15, "Spoon": 16, "Horse": 17, "Surfboard": 18, "Bicycle": 19, "Knife": 20, "Rugby ball": 21, "Woman": 22, "Handbag": 23, "Microwave oven": 24, "Flute": 25, "Girl": 26, "Taxi": 27, "Hamster": 28, "Wine glass": 29, "Backpack": 30, "Racket": 31, "Table": 32, "Pretzel": 33, "Bed": 34, "Snowboard": 35, "Car": 36, "Chair": 37, "Microphone": 38, "Coffee cup": 39, "Table tennis racket": 40, "Bottle": 41, "Guitar": 42, "Desk": 43, "Ski": 44, "Coffee table": 45, "Dog": 46, "Cat": 47, "Chopsticks": 48, "Elephant": 49, "Mobile phone": 50, "Monkey": 51, "Snake": 52, "Sofa bed": 53, "Violin": 54, "Fork": 55, "Oven": 56, "Briefcase": 57, "__background__": 0}, "catidx_labelgroup": {"0": [1, 3, 4, 5, 6, 7, 8, 10, 12, 13, 14, 15, 16, 18, 19, 20, 21, 23, 24, 25, 27, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 48, 50, 53, 54, 55, 56, 57, 0], "1": [11, 17, 28, 46, 47, 49, 51, 52], "2": [2, 9, 22, 26]}} -------------------------------------------------------------------------------- /hetsgg/engine/bbox_aug.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torchvision.transforms as TT 3 | 4 | from hetsgg.config import cfg 5 | from hetsgg.data import transforms as T 6 | from hetsgg.structures.image_list import to_image_list 7 | from hetsgg.structures.bounding_box import BoxList 8 | from hetsgg.modeling.roi_heads.box_head.inference import make_roi_box_post_processor 9 | 10 | 11 | def im_detect_bbox_aug(model, images, device): 12 | # Collect detections computed under different transformations 13 | boxlists_ts = [] 14 | for _ in range(len(images)): 15 | boxlists_ts.append([]) 16 | 17 | def add_preds_t(boxlists_t): 18 | for i, boxlist_t in enumerate(boxlists_t): 19 | if len(boxlists_ts[i]) == 0: 20 | # The first one is identity transform, no need to resize the boxlist 21 | boxlists_ts[i].append(boxlist_t) 22 | else: 23 | # Resize the boxlist as the first one 24 | boxlists_ts[i].append(boxlist_t.resize(boxlists_ts[i][0].size)) 25 | 26 | # Compute detections for the original image (identity transform) 27 | boxlists_i = im_detect_bbox( 28 | model, images, cfg.INPUT.MIN_SIZE_TEST, cfg.INPUT.MAX_SIZE_TEST, device 29 | ) 30 | add_preds_t(boxlists_i) 31 | 32 | # Perform detection on the horizontally flipped image 33 | if cfg.TEST.BBOX_AUG.H_FLIP: 34 | boxlists_hf = im_detect_bbox_hflip( 35 | model, images, cfg.INPUT.MIN_SIZE_TEST, cfg.INPUT.MAX_SIZE_TEST, device 36 | ) 37 | add_preds_t(boxlists_hf) 38 | 39 | # Compute detections at different scales 40 | for scale in cfg.TEST.BBOX_AUG.SCALES: 41 | max_size = cfg.TEST.BBOX_AUG.MAX_SIZE 42 | boxlists_scl = im_detect_bbox_scale( 43 | model, images, scale, max_size, device 44 | ) 45 | add_preds_t(boxlists_scl) 46 | 47 | if cfg.TEST.BBOX_AUG.SCALE_H_FLIP: 48 | boxlists_scl_hf = im_detect_bbox_scale( 49 | model, images, scale, max_size, device, hflip=True 50 | ) 51 | add_preds_t(boxlists_scl_hf) 52 | 53 | # Merge boxlists detected by different bbox aug params 54 | boxlists = [] 55 | for i, boxlist_ts in enumerate(boxlists_ts): 56 | bbox = torch.cat([boxlist_t.bbox for boxlist_t in boxlist_ts]) 57 | scores = torch.cat([boxlist_t.get_field('scores') for boxlist_t in boxlist_ts]) 58 | boxlist = BoxList(bbox, boxlist_ts[0].size, boxlist_ts[0].mode) 59 | boxlist.add_field('scores', scores) 60 | boxlists.append(boxlist) 61 | 62 | # Apply NMS and limit the final detections 63 | results = [] 64 | post_processor = make_roi_box_post_processor(cfg) 65 | for boxlist in boxlists: 66 | results.append(post_processor.filter_results(boxlist, cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES)) 67 | 68 | return results 69 | 70 | 71 | def im_detect_bbox(model, images, target_scale, target_max_size, device): 72 | """ 73 | Performs bbox detection on the original image. 74 | """ 75 | transform = TT.Compose([ 76 | T.Resize(target_scale, target_max_size), 77 | TT.ToTensor(), 78 | T.Normalize( 79 | mean=cfg.INPUT.PIXEL_MEAN, std=cfg.INPUT.PIXEL_STD, to_bgr255=cfg.INPUT.TO_BGR255 80 | ) 81 | ]) 82 | images = [transform(image) for image in images] 83 | images = to_image_list(images, cfg.DATALOADER.SIZE_DIVISIBILITY) 84 | return model(images.to(device)) 85 | 86 | 87 | def im_detect_bbox_hflip(model, images, target_scale, target_max_size, device): 88 | """ 89 | Performs bbox detection on the horizontally flipped image. 90 | Function signature is the same as for im_detect_bbox. 91 | """ 92 | transform = TT.Compose([ 93 | T.Resize(target_scale, target_max_size), 94 | TT.RandomHorizontalFlip(1.0), 95 | TT.ToTensor(), 96 | T.Normalize( 97 | mean=cfg.INPUT.PIXEL_MEAN, std=cfg.INPUT.PIXEL_STD, to_bgr255=cfg.INPUT.TO_BGR255 98 | ) 99 | ]) 100 | images = [transform(image) for image in images] 101 | images = to_image_list(images, cfg.DATALOADER.SIZE_DIVISIBILITY) 102 | boxlists = model(images.to(device)) 103 | 104 | # Invert the detections computed on the flipped image 105 | boxlists_inv = [boxlist.transpose(0) for boxlist in boxlists] 106 | return boxlists_inv 107 | 108 | 109 | def im_detect_bbox_scale(model, images, target_scale, target_max_size, device, hflip=False): 110 | """ 111 | Computes bbox detections at the given scale. 112 | Returns predictions in the scaled image space. 113 | """ 114 | if hflip: 115 | boxlists_scl = im_detect_bbox_hflip(model, images, target_scale, target_max_size, device) 116 | else: 117 | boxlists_scl = im_detect_bbox(model, images, target_scale, target_max_size, device) 118 | return boxlists_scl 119 | -------------------------------------------------------------------------------- /hetsgg/modeling/matcher.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | class Matcher(object): 5 | """ 6 | This class assigns to each predicted "element" (e.g., a box) a ground-truth 7 | element. Each predicted element will have exactly zero or one matches; each 8 | ground-truth element may be assigned to zero or more predicted elements. 9 | 10 | Matching is based on the MxN match_quality_matrix, that characterizes how well 11 | each (ground-truth, predicted)-pair match. For example, if the elements are 12 | boxes, the matrix may contain box IoU overlap values. 13 | 14 | The matcher returns a tensor of size N containing the index of the ground-truth 15 | element m that matches to prediction n. If there is no match, a negative value 16 | is returned. 17 | """ 18 | 19 | BELOW_LOW_THRESHOLD = -1 20 | BETWEEN_THRESHOLDS = -2 21 | 22 | def __init__(self, high_threshold, low_threshold, allow_low_quality_matches=False): 23 | """ 24 | Args: 25 | high_threshold (float): quality values greater than or equal to 26 | this value are candidate matches. 27 | low_threshold (float): a lower quality threshold used to stratify 28 | matches into three levels: 29 | 1) matches >= high_threshold 30 | 2) BETWEEN_THRESHOLDS matches in [low_threshold, high_threshold) 31 | 3) BELOW_LOW_THRESHOLD matches in [0, low_threshold) 32 | allow_low_quality_matches (bool): if True, produce additional matches 33 | for predictions that have only low-quality match candidates. See 34 | set_low_quality_matches_ for more details. 35 | """ 36 | assert low_threshold <= high_threshold 37 | self.high_threshold = high_threshold 38 | self.low_threshold = low_threshold 39 | self.allow_low_quality_matches = allow_low_quality_matches 40 | 41 | def __call__(self, match_quality_matrix): 42 | """ 43 | Args: 44 | match_quality_matrix (Tensor[float]): an MxN tensor, containing the 45 | pairwise quality between M ground-truth elements and N predicted elements. 46 | 47 | Returns: 48 | matches (Tensor[int64]): an N tensor where N[i] is a matched gt in 49 | [0, M - 1] or a negative value indicating that prediction i could not 50 | be matched. 51 | """ 52 | if match_quality_matrix.numel() == 0: 53 | # empty targets or proposals not supported during training 54 | if match_quality_matrix.shape[0] == 0: 55 | raise ValueError( 56 | "No ground-truth boxes available for one of the images " 57 | "during training") 58 | else: 59 | raise ValueError( 60 | "No proposal boxes available for one of the images " 61 | "during training") 62 | 63 | # match_quality_matrix is M (gt) x N (predicted) 64 | # Max over gt elements (dim 0) to find best gt candidate for each prediction 65 | matched_vals, matches = match_quality_matrix.max(dim=0) 66 | if self.allow_low_quality_matches: 67 | all_matches = matches.clone() 68 | 69 | # Assign candidate matches with low quality to negative (unassigned) values 70 | below_low_threshold = matched_vals < self.low_threshold 71 | between_thresholds = (matched_vals >= self.low_threshold) & ( 72 | matched_vals < self.high_threshold 73 | ) 74 | matches[below_low_threshold] = Matcher.BELOW_LOW_THRESHOLD 75 | matches[between_thresholds] = Matcher.BETWEEN_THRESHOLDS 76 | 77 | if self.allow_low_quality_matches: 78 | self.set_low_quality_matches_(matches, all_matches, match_quality_matrix) 79 | 80 | return matches 81 | 82 | def set_low_quality_matches_(self, matches, all_matches, match_quality_matrix): 83 | """ 84 | Produce additional matches for predictions that have only low-quality matches. 85 | Specifically, for each ground-truth find the set of predictions that have 86 | maximum overlap with it (including ties); for each prediction in that set, if 87 | it is unmatched, then match it to the ground-truth with which it has the highest 88 | quality value. 89 | """ 90 | # For each gt, find the prediction with which it has highest quality 91 | highest_quality_foreach_gt, _ = match_quality_matrix.max(dim=1) 92 | # Find highest quality match available, even if it is low, including ties 93 | gt_pred_pairs_of_highest_quality = torch.nonzero( 94 | match_quality_matrix == highest_quality_foreach_gt[:, None] 95 | ) 96 | 97 | 98 | pred_inds_to_update = gt_pred_pairs_of_highest_quality[:, 1] 99 | matches[pred_inds_to_update] = all_matches[pred_inds_to_update] 100 | -------------------------------------------------------------------------------- /hetsgg/data/datasets/evaluation/vg/vg_stage_eval_utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | from hetsgg.utils.miscellaneous import intersect_2d 5 | 6 | 7 | def boxlist_iou(boxlist1, boxlist2, to_cuda=True): 8 | 9 | if boxlist1.size != boxlist2.size: 10 | raise RuntimeError( 11 | "boxlists should have same image size, got {}, {}".format(boxlist1, boxlist2)) 12 | 13 | N = len(boxlist1) 14 | M = len(boxlist2) 15 | 16 | if to_cuda: 17 | if boxlist1.bbox.device.type != 'cuda': 18 | boxlist1.bbox = boxlist1.bbox.cuda() 19 | if boxlist2.bbox.device.type != 'cuda': 20 | boxlist2.bbox = boxlist2.bbox.cuda() 21 | 22 | box1 = boxlist1.bbox 23 | box2 = boxlist2.bbox 24 | 25 | area1 = boxlist1.area() 26 | area2 = boxlist2.area() 27 | 28 | lt = torch.max(box1[:, None, :2], box2[:, :2]) # [N,M,2] 29 | rb = torch.min(box1[:, None, 2:], box2[:, 2:]) # [N,M,2] 30 | 31 | TO_REMOVE = 1 32 | 33 | wh = (rb - lt + TO_REMOVE).clamp(min=0) # [N,M,2] 34 | inter = wh[:, :, 0] * wh[:, :, 1] # [N,M] 35 | 36 | iou = inter / (area1[:, None] + area2 - inter) 37 | return iou 38 | 39 | 40 | def intersect_2d_torch_tensor(x1, x2): 41 | return torch.from_numpy(intersect_2d(x1.numpy(), x2.numpy())) 42 | 43 | 44 | def dump_hit_indx_dict_to_tensor(pred_pair_mat, gt_box_hit_idx_dict): 45 | """ 46 | for compare the prediction and gt easily, we need to expand the N to M box match results to 47 | array. 48 | here, give relationship prediction pair matrix, expand the gt_box_hit_idx_dit to the array. 49 | We do the full connection of hit gt box idx of each prediction pairs 50 | :param pred_pair_mat: 51 | :param gt_box_hit_idx_dict: the hit gt idx of each prediction box 52 | :return: 53 | to_cmp_pair_mat: expanded relationship pair result (N, 2), store the gt box indexs. 54 | N is large than initial prediction pair matrix 55 | initial_pred_idx_seg: marking the seg for each pred pairs. If it hit multiple detection gt, 56 | it could have more than one prediction pairs, we need to mark that they are indicated to 57 | same initial predations 58 | """ 59 | to_cmp_pair_mat = [] 60 | initial_pred_idx_seg = [] 61 | # write result into the pair mat 62 | for pred_idx, pred_pair in enumerate(pred_pair_mat): 63 | sub_pred_hit_idx_set = gt_box_hit_idx_dict[pred_pair[0].item()] 64 | obj_pred_hit_idx_set = gt_box_hit_idx_dict[pred_pair[1].item()] 65 | # expand the prediction index by full combination 66 | for each_sub_hit_idx in sub_pred_hit_idx_set: 67 | for each_obj_hit_idx in obj_pred_hit_idx_set: 68 | to_cmp_pair_mat.append([each_sub_hit_idx, each_obj_hit_idx]) 69 | initial_pred_idx_seg.append(pred_idx) # 70 | if len(to_cmp_pair_mat) == 0: 71 | to_cmp_pair_mat = torch.zeros((0, 2), dtype=torch.int64) 72 | else: 73 | to_cmp_pair_mat = torch.from_numpy(np.array(to_cmp_pair_mat, dtype=np.int64)) 74 | 75 | initial_pred_idx_seg = torch.from_numpy(np.array(initial_pred_idx_seg, dtype=np.int64)) 76 | return to_cmp_pair_mat, initial_pred_idx_seg 77 | 78 | 79 | LONGTAIL_CATE_IDS_DICT = { 80 | 'head': [31, 20, 22, 30, 48], 81 | 'body': [29, 50, 1, 21, 8, 43, 40, 49, 41, 23, 7, 6, 19, 33, 16, 38], 82 | 'tail': [11, 14, 46, 37, 13, 24, 4, 47, 5, 10, 9, 34, 3, 25, 17, 35, 42, 27, 12, 28, 83 | 39, 36, 2, 15, 44, 32, 26, 18, 45] 84 | } 85 | 86 | LONGTAIL_CATE_IDS_QUERY = {} 87 | for long_name, cate_id in LONGTAIL_CATE_IDS_DICT.items(): 88 | for each_cate_id in cate_id: 89 | LONGTAIL_CATE_IDS_QUERY[each_cate_id] = long_name 90 | 91 | PREDICATE_CLUSTER = [[50, 20, 9], [22, 48, 49], [31], [31, 41, 1], [31, 30]] 92 | ENTITY_CLUSTER = [[91, 149, 53, 78, 20, 79, 90, 56, 68]] 93 | 94 | 95 | def get_cluster_id(cluster, cate_id): 96 | for idx, each in enumerate(cluster): 97 | if cate_id in each: 98 | return each[0] 99 | return -1 100 | 101 | 102 | def transform_cateid_into_cluster_id(cate_list, cluster): 103 | for idx in range(len(cate_list)): 104 | cluster_id = get_cluster_id(cluster, cate_list[idx].item()) 105 | 106 | if cluster_id != -1: 107 | cate_list[idx] = cluster_id 108 | return cate_list 109 | 110 | 111 | def trans_cluster_label(pred_pred_cate_list, gt_pred_cate_list, cluster): 112 | """ 113 | transform the categories labels to cluster label for label overlapping avoiding 114 | :param pred_pair_mat: (subj_id, obj-id, cate-lable) 115 | :param gt_pair_mat: 116 | :return: 117 | """ 118 | cluster_ref_pred_cate = transform_cateid_into_cluster_id(pred_pred_cate_list, cluster) 119 | cluster_ref_gt_cate = transform_cateid_into_cluster_id(gt_pred_cate_list, cluster) 120 | 121 | return cluster_ref_pred_cate, cluster_ref_gt_cate 122 | -------------------------------------------------------------------------------- /hetsgg/modeling/roi_heads/mask_head/loss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.nn import functional as F 3 | 4 | from hetsgg.layers import smooth_l1_loss 5 | from hetsgg.modeling.matcher import Matcher 6 | from hetsgg.structures.boxlist_ops import boxlist_iou 7 | from hetsgg.modeling.utils import cat 8 | 9 | 10 | def project_masks_on_boxes(segmentation_masks, proposals, discretization_size): 11 | 12 | masks = [] 13 | M = discretization_size 14 | device = proposals.bbox.device 15 | proposals = proposals.convert("xyxy") 16 | assert segmentation_masks.size == proposals.size, "{}, {}".format( 17 | segmentation_masks, proposals 18 | ) 19 | 20 | proposals = proposals.bbox.to(torch.device("cpu")) 21 | for segmentation_mask, proposal in zip(segmentation_masks, proposals): 22 | # crop the masks, resize them to the desired resolution and 23 | # then convert them to the tensor representation. 24 | cropped_mask = segmentation_mask.crop(proposal) 25 | scaled_mask = cropped_mask.resize((M, M)) 26 | mask = scaled_mask.get_mask_tensor() 27 | masks.append(mask) 28 | if len(masks) == 0: 29 | return torch.empty(0, dtype=torch.float32, device=device) 30 | return torch.stack(masks, dim=0).to(device, dtype=torch.float32) 31 | 32 | 33 | class MaskRCNNLossComputation(object): 34 | def __init__(self, proposal_matcher, discretization_size): 35 | """ 36 | Arguments: 37 | proposal_matcher (Matcher) 38 | discretization_size (int) 39 | """ 40 | self.proposal_matcher = proposal_matcher 41 | self.discretization_size = discretization_size 42 | 43 | def match_targets_to_proposals(self, proposal, target): 44 | match_quality_matrix = boxlist_iou(target, proposal) 45 | matched_idxs = self.proposal_matcher(match_quality_matrix) 46 | # Mask RCNN needs "labels" and "masks "fields for creating the targets 47 | target = target.copy_with_fields(["labels", "masks"]) 48 | 49 | matched_targets = target[matched_idxs.clamp(min=0)] 50 | matched_targets.add_field("matched_idxs", matched_idxs) 51 | return matched_targets 52 | 53 | def prepare_targets(self, proposals, targets): 54 | labels = [] 55 | masks = [] 56 | for proposals_per_image, targets_per_image in zip(proposals, targets): 57 | matched_targets = self.match_targets_to_proposals( 58 | proposals_per_image, targets_per_image 59 | ) 60 | matched_idxs = matched_targets.get_field("matched_idxs") 61 | 62 | labels_per_image = matched_targets.get_field("labels") 63 | labels_per_image = labels_per_image.to(dtype=torch.int64) 64 | 65 | neg_inds = matched_idxs == Matcher.BELOW_LOW_THRESHOLD 66 | labels_per_image[neg_inds] = 0 67 | 68 | # mask scores are only computed on positive samples 69 | positive_inds = torch.nonzero(labels_per_image > 0).squeeze(1) 70 | 71 | segmentation_masks = matched_targets.get_field("masks") 72 | segmentation_masks = segmentation_masks[positive_inds] 73 | 74 | positive_proposals = proposals_per_image[positive_inds] 75 | 76 | masks_per_image = project_masks_on_boxes( 77 | segmentation_masks, positive_proposals, self.discretization_size 78 | ) 79 | 80 | labels.append(labels_per_image) 81 | masks.append(masks_per_image) 82 | 83 | return labels, masks 84 | 85 | def __call__(self, proposals, mask_logits, targets): 86 | """ 87 | Arguments: 88 | proposals (list[BoxList]) 89 | mask_logits (Tensor) 90 | targets (list[BoxList]) 91 | 92 | Return: 93 | mask_loss (Tensor): scalar tensor containing the loss 94 | """ 95 | labels, mask_targets = self.prepare_targets(proposals, targets) 96 | 97 | labels = cat(labels, dim=0) 98 | mask_targets = cat(mask_targets, dim=0) 99 | 100 | positive_inds = torch.nonzero(labels > 0).squeeze(1) 101 | labels_pos = labels[positive_inds] 102 | 103 | # torch.mean (in binary_cross_entropy_with_logits) doesn't 104 | # accept empty tensors, so handle it separately 105 | if mask_targets.numel() == 0: 106 | return mask_logits.sum() * 0 107 | 108 | mask_loss = F.binary_cross_entropy_with_logits( 109 | mask_logits[positive_inds, labels_pos], mask_targets 110 | ) 111 | return mask_loss 112 | 113 | 114 | def make_roi_mask_loss_evaluator(cfg): 115 | matcher = Matcher( 116 | cfg.MODEL.ROI_HEADS.FG_IOU_THRESHOLD, 117 | cfg.MODEL.ROI_HEADS.BG_IOU_THRESHOLD, 118 | allow_low_quality_matches=False, 119 | ) 120 | 121 | loss_evaluator = MaskRCNNLossComputation( 122 | matcher, cfg.MODEL.ROI_MASK_HEAD.RESOLUTION 123 | ) 124 | 125 | return loss_evaluator 126 | -------------------------------------------------------------------------------- /hetsgg/data/samplers/grouped_batch_sampler.py: -------------------------------------------------------------------------------- 1 | import itertools 2 | 3 | import torch 4 | from torch.utils.data.sampler import BatchSampler 5 | from torch.utils.data.sampler import Sampler 6 | 7 | 8 | class GroupedBatchSampler(BatchSampler): 9 | """ 10 | Wraps another sampler to yield a mini-batch of indices. 11 | It enforces that elements from the same group should appear in groups of batch_size. 12 | It also tries to provide mini-batches which follows an ordering which is 13 | as close as possible to the ordering from the original sampler. 14 | 15 | Arguments: 16 | sampler (Sampler): Base sampler. 17 | batch_size (int): Size of mini-batch. 18 | drop_uneven (bool): If ``True``, the sampler will drop the batches whose 19 | size is less than ``batch_size`` 20 | 21 | """ 22 | 23 | def __init__(self, sampler, group_ids, batch_size, drop_uneven=False): 24 | if not isinstance(sampler, Sampler): 25 | raise ValueError( 26 | "sampler should be an instance of " 27 | "torch.utils.data.Sampler, but got sampler={}".format(sampler) 28 | ) 29 | self.sampler = sampler 30 | self.group_ids = torch.as_tensor(group_ids) 31 | assert self.group_ids.dim() == 1 32 | self.batch_size = batch_size 33 | self.drop_uneven = drop_uneven 34 | 35 | self.groups = torch.unique(self.group_ids).sort(0)[0] 36 | 37 | self._can_reuse_batches = False 38 | 39 | def _prepare_batches(self): 40 | dataset_size = len(self.group_ids) 41 | # get the sampled indices from the sampler 42 | sampled_ids = torch.as_tensor(list(self.sampler)) 43 | # potentially not all elements of the dataset were sampled 44 | # by the sampler (e.g., DistributedSampler). 45 | # construct a tensor which contains -1 if the element was 46 | # not sampled, and a non-negative number indicating the 47 | # order where the element was sampled. 48 | # for example. if sampled_ids = [3, 1] and dataset_size = 5, 49 | # the order is [-1, 1, -1, 0, -1] 50 | order = torch.full((dataset_size,), -1, dtype=torch.int64) 51 | order[sampled_ids] = torch.arange(len(sampled_ids)) 52 | 53 | # get a mask with the elements that were sampled 54 | mask = order >= 0 55 | 56 | # find the elements that belong to each individual cluster 57 | clusters = [(self.group_ids == i) & mask for i in self.groups] 58 | # get relative order of the elements inside each cluster 59 | # that follows the order from the sampler 60 | relative_order = [order[cluster] for cluster in clusters] 61 | # with the relative order, find the absolute order in the 62 | # sampled space 63 | permutation_ids = [s[s.sort()[1]] for s in relative_order] 64 | # permute each cluster so that they follow the order from 65 | # the sampler 66 | permuted_clusters = [sampled_ids[idx] for idx in permutation_ids] 67 | 68 | # splits each cluster in batch_size, and merge as a list of tensors 69 | splits = [c.split(self.batch_size) for c in permuted_clusters] 70 | merged = tuple(itertools.chain.from_iterable(splits)) 71 | 72 | # now each batch internally has the right order, but 73 | # they are grouped by clusters. Find the permutation between 74 | # different batches that brings them as close as possible to 75 | # the order that we have in the sampler. For that, we will consider the 76 | # ordering as coming from the first element of each batch, and sort 77 | # correspondingly 78 | first_element_of_batch = [t[0].item() for t in merged] 79 | # get and inverse mapping from sampled indices and the position where 80 | # they occur (as returned by the sampler) 81 | inv_sampled_ids_map = {v: k for k, v in enumerate(sampled_ids.tolist())} 82 | # from the first element in each batch, get a relative ordering 83 | first_index_of_batch = torch.as_tensor( 84 | [inv_sampled_ids_map[s] for s in first_element_of_batch] 85 | ) 86 | 87 | # permute the batches so that they approximately follow the order 88 | # from the sampler 89 | permutation_order = first_index_of_batch.sort(0)[1].tolist() 90 | # finally, permute the batches 91 | batches = [merged[i].tolist() for i in permutation_order] 92 | 93 | if self.drop_uneven: 94 | kept = [] 95 | for batch in batches: 96 | if len(batch) == self.batch_size: 97 | kept.append(batch) 98 | batches = kept 99 | return batches 100 | 101 | def __iter__(self): 102 | if self._can_reuse_batches: 103 | batches = self._batches 104 | self._can_reuse_batches = False 105 | else: 106 | batches = self._prepare_batches() 107 | self._batches = batches 108 | return iter(batches) 109 | 110 | def __len__(self): 111 | if not hasattr(self, "_batches"): 112 | self._batches = self._prepare_batches() 113 | self._can_reuse_batches = True 114 | return len(self._batches) 115 | -------------------------------------------------------------------------------- /hetsgg/csrc/deform_conv.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include "cpu/vision.h" 3 | 4 | #ifdef WITH_CUDA 5 | #include "cuda/vision.h" 6 | #endif 7 | 8 | 9 | // Interface for Python 10 | int deform_conv_forward( 11 | at::Tensor input, 12 | at::Tensor weight, 13 | at::Tensor offset, 14 | at::Tensor output, 15 | at::Tensor columns, 16 | at::Tensor ones, 17 | int kW, 18 | int kH, 19 | int dW, 20 | int dH, 21 | int padW, 22 | int padH, 23 | int dilationW, 24 | int dilationH, 25 | int group, 26 | int deformable_group, 27 | int im2col_step) 28 | { 29 | if (input.type().is_cuda()) { 30 | #ifdef WITH_CUDA 31 | return deform_conv_forward_cuda( 32 | input, weight, offset, output, columns, ones, 33 | kW, kH, dW, dH, padW, padH, dilationW, dilationH, 34 | group, deformable_group, im2col_step 35 | ); 36 | #else 37 | AT_ERROR("Not compiled with GPU support"); 38 | #endif 39 | } 40 | AT_ERROR("Not implemented on the CPU"); 41 | } 42 | 43 | 44 | int deform_conv_backward_input( 45 | at::Tensor input, 46 | at::Tensor offset, 47 | at::Tensor gradOutput, 48 | at::Tensor gradInput, 49 | at::Tensor gradOffset, 50 | at::Tensor weight, 51 | at::Tensor columns, 52 | int kW, 53 | int kH, 54 | int dW, 55 | int dH, 56 | int padW, 57 | int padH, 58 | int dilationW, 59 | int dilationH, 60 | int group, 61 | int deformable_group, 62 | int im2col_step) 63 | { 64 | if (input.type().is_cuda()) { 65 | #ifdef WITH_CUDA 66 | return deform_conv_backward_input_cuda( 67 | input, offset, gradOutput, gradInput, gradOffset, weight, columns, 68 | kW, kH, dW, dH, padW, padH, dilationW, dilationH, 69 | group, deformable_group, im2col_step 70 | ); 71 | #else 72 | AT_ERROR("Not compiled with GPU support"); 73 | #endif 74 | } 75 | AT_ERROR("Not implemented on the CPU"); 76 | } 77 | 78 | 79 | int deform_conv_backward_parameters( 80 | at::Tensor input, 81 | at::Tensor offset, 82 | at::Tensor gradOutput, 83 | at::Tensor gradWeight, // at::Tensor gradBias, 84 | at::Tensor columns, 85 | at::Tensor ones, 86 | int kW, 87 | int kH, 88 | int dW, 89 | int dH, 90 | int padW, 91 | int padH, 92 | int dilationW, 93 | int dilationH, 94 | int group, 95 | int deformable_group, 96 | float scale, 97 | int im2col_step) 98 | { 99 | if (input.type().is_cuda()) { 100 | #ifdef WITH_CUDA 101 | return deform_conv_backward_parameters_cuda( 102 | input, offset, gradOutput, gradWeight, columns, ones, 103 | kW, kH, dW, dH, padW, padH, dilationW, dilationH, 104 | group, deformable_group, scale, im2col_step 105 | ); 106 | #else 107 | AT_ERROR("Not compiled with GPU support"); 108 | #endif 109 | } 110 | AT_ERROR("Not implemented on the CPU"); 111 | } 112 | 113 | 114 | void modulated_deform_conv_forward( 115 | at::Tensor input, 116 | at::Tensor weight, 117 | at::Tensor bias, 118 | at::Tensor ones, 119 | at::Tensor offset, 120 | at::Tensor mask, 121 | at::Tensor output, 122 | at::Tensor columns, 123 | int kernel_h, 124 | int kernel_w, 125 | const int stride_h, 126 | const int stride_w, 127 | const int pad_h, 128 | const int pad_w, 129 | const int dilation_h, 130 | const int dilation_w, 131 | const int group, 132 | const int deformable_group, 133 | const bool with_bias) 134 | { 135 | if (input.type().is_cuda()) { 136 | #ifdef WITH_CUDA 137 | return modulated_deform_conv_cuda_forward( 138 | input, weight, bias, ones, offset, mask, output, columns, 139 | kernel_h, kernel_w, stride_h, stride_w, 140 | pad_h, pad_w, dilation_h, dilation_w, 141 | group, deformable_group, with_bias 142 | ); 143 | #else 144 | AT_ERROR("Not compiled with GPU support"); 145 | #endif 146 | } 147 | AT_ERROR("Not implemented on the CPU"); 148 | } 149 | 150 | 151 | void modulated_deform_conv_backward( 152 | at::Tensor input, 153 | at::Tensor weight, 154 | at::Tensor bias, 155 | at::Tensor ones, 156 | at::Tensor offset, 157 | at::Tensor mask, 158 | at::Tensor columns, 159 | at::Tensor grad_input, 160 | at::Tensor grad_weight, 161 | at::Tensor grad_bias, 162 | at::Tensor grad_offset, 163 | at::Tensor grad_mask, 164 | at::Tensor grad_output, 165 | int kernel_h, 166 | int kernel_w, 167 | int stride_h, 168 | int stride_w, 169 | int pad_h, 170 | int pad_w, 171 | int dilation_h, 172 | int dilation_w, 173 | int group, 174 | int deformable_group, 175 | const bool with_bias) 176 | { 177 | if (input.type().is_cuda()) { 178 | #ifdef WITH_CUDA 179 | return modulated_deform_conv_cuda_backward( 180 | input, weight, bias, ones, offset, mask, columns, 181 | grad_input, grad_weight, grad_bias, grad_offset, grad_mask, grad_output, 182 | kernel_h, kernel_w, stride_h, stride_w, pad_h, pad_w, dilation_h, dilation_w, 183 | group, deformable_group, with_bias 184 | ); 185 | #else 186 | AT_ERROR("Not compiled with GPU support"); 187 | #endif 188 | } 189 | AT_ERROR("Not implemented on the CPU"); 190 | } -------------------------------------------------------------------------------- /hetsgg/csrc/cuda/nms.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include 5 | #include 6 | 7 | #include 8 | #include 9 | 10 | int const threadsPerBlock = sizeof(unsigned long long) * 8; 11 | 12 | __device__ inline float devIoU(float const * const a, float const * const b) { 13 | float left = max(a[0], b[0]), right = min(a[2], b[2]); 14 | float top = max(a[1], b[1]), bottom = min(a[3], b[3]); 15 | float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f); 16 | float interS = width * height; 17 | float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1); 18 | float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1); 19 | return interS / (Sa + Sb - interS); 20 | } 21 | 22 | __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh, 23 | const float *dev_boxes, unsigned long long *dev_mask) { 24 | const int row_start = blockIdx.y; 25 | const int col_start = blockIdx.x; 26 | 27 | // if (row_start > col_start) return; 28 | 29 | const int row_size = 30 | min(n_boxes - row_start * threadsPerBlock, threadsPerBlock); 31 | const int col_size = 32 | min(n_boxes - col_start * threadsPerBlock, threadsPerBlock); 33 | 34 | __shared__ float block_boxes[threadsPerBlock * 5]; 35 | if (threadIdx.x < col_size) { 36 | block_boxes[threadIdx.x * 5 + 0] = 37 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0]; 38 | block_boxes[threadIdx.x * 5 + 1] = 39 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1]; 40 | block_boxes[threadIdx.x * 5 + 2] = 41 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2]; 42 | block_boxes[threadIdx.x * 5 + 3] = 43 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3]; 44 | block_boxes[threadIdx.x * 5 + 4] = 45 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4]; 46 | } 47 | __syncthreads(); 48 | 49 | if (threadIdx.x < row_size) { 50 | const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x; 51 | const float *cur_box = dev_boxes + cur_box_idx * 5; 52 | int i = 0; 53 | unsigned long long t = 0; 54 | int start = 0; 55 | if (row_start == col_start) { 56 | start = threadIdx.x + 1; 57 | } 58 | for (i = start; i < col_size; i++) { 59 | if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) { 60 | t |= 1ULL << i; 61 | } 62 | } 63 | const int col_blocks = THCCeilDiv(n_boxes, threadsPerBlock); 64 | dev_mask[cur_box_idx * col_blocks + col_start] = t; 65 | } 66 | } 67 | 68 | // boxes is a N x 5 tensor 69 | at::Tensor nms_cuda(const at::Tensor boxes, float nms_overlap_thresh) { 70 | using scalar_t = float; 71 | AT_ASSERTM(boxes.type().is_cuda(), "boxes must be a CUDA tensor"); 72 | auto scores = boxes.select(1, 4); 73 | auto order_t = std::get<1>(scores.sort(0, /* descending=*/true)); 74 | auto boxes_sorted = boxes.index_select(0, order_t); 75 | 76 | int boxes_num = boxes.size(0); 77 | 78 | const int col_blocks = THCCeilDiv(boxes_num, threadsPerBlock); 79 | 80 | scalar_t* boxes_dev = boxes_sorted.data(); 81 | 82 | THCState *state = at::globalContext().lazyInitCUDA(); // TODO replace with getTHCState 83 | 84 | unsigned long long* mask_dev = NULL; 85 | //THCudaCheck(THCudaMalloc(state, (void**) &mask_dev, 86 | // boxes_num * col_blocks * sizeof(unsigned long long))); 87 | 88 | mask_dev = (unsigned long long*) THCudaMalloc(state, boxes_num * col_blocks * sizeof(unsigned long long)); 89 | 90 | dim3 blocks(THCCeilDiv(boxes_num, threadsPerBlock), 91 | THCCeilDiv(boxes_num, threadsPerBlock)); 92 | dim3 threads(threadsPerBlock); 93 | nms_kernel<<>>(boxes_num, 94 | nms_overlap_thresh, 95 | boxes_dev, 96 | mask_dev); 97 | 98 | std::vector mask_host(boxes_num * col_blocks); 99 | THCudaCheck(cudaMemcpy(&mask_host[0], 100 | mask_dev, 101 | sizeof(unsigned long long) * boxes_num * col_blocks, 102 | cudaMemcpyDeviceToHost)); 103 | 104 | std::vector remv(col_blocks); 105 | memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks); 106 | 107 | at::Tensor keep = at::empty({boxes_num}, boxes.options().dtype(at::kLong).device(at::kCPU)); 108 | int64_t* keep_out = keep.data(); 109 | 110 | int num_to_keep = 0; 111 | for (int i = 0; i < boxes_num; i++) { 112 | int nblock = i / threadsPerBlock; 113 | int inblock = i % threadsPerBlock; 114 | 115 | if (!(remv[nblock] & (1ULL << inblock))) { 116 | keep_out[num_to_keep++] = i; 117 | unsigned long long *p = &mask_host[0] + i * col_blocks; 118 | for (int j = nblock; j < col_blocks; j++) { 119 | remv[j] |= p[j]; 120 | } 121 | } 122 | } 123 | 124 | THCudaFree(state, mask_dev); 125 | // TODO improve this part 126 | return std::get<0>(order_t.index({ 127 | keep.narrow(/*dim=*/0, /*start=*/0, /*length=*/num_to_keep).to( 128 | order_t.device(), keep.scalar_type()) 129 | }).sort(0, false)); 130 | } 131 | --------------------------------------------------------------------------------