├── hetsgg
    ├── __init__.py
    ├── engine
    │   ├── __init__.py
    │   ├── trainer.py
    │   └── bbox_aug.py
    ├── modeling
    │   ├── __init__.py
    │   ├── rpn
    │   │   ├── __init__.py
    │   │   ├── retinanet
    │   │   │   ├── __init__.py
    │   │   │   └── loss.py
    │   │   └── utils.py
    │   ├── roi_heads
    │   │   ├── __init__.py
    │   │   ├── box_head
    │   │   │   ├── __init__.py
    │   │   │   ├── roi_box_predictors.py
    │   │   │   └── loss.py
    │   │   ├── mask_head
    │   │   │   ├── __init__.py
    │   │   │   ├── roi_mask_predictors.py
    │   │   │   ├── roi_mask_feature_extractors.py
    │   │   │   ├── mask_head.py
    │   │   │   └── loss.py
    │   │   ├── attribute_head
    │   │   │   ├── __init__.py
    │   │   │   ├── roi_attribute_predictors.py
    │   │   │   ├── attribute_head.py
    │   │   │   └── loss.py
    │   │   ├── keypoint_head
    │   │   │   ├── __init__.py
    │   │   │   ├── roi_keypoint_predictors.py
    │   │   │   ├── roi_keypoint_feature_extractors.py
    │   │   │   ├── keypoint_head.py
    │   │   │   └── inference.py
    │   │   └── relation_head
    │   │   │   ├── __init__.py
    │   │   │   └── rel_proposal_network
    │   │   │       └── __init__.py
    │   ├── detector
    │   │   ├── __init__.py
    │   │   ├── detectors.py
    │   │   └── generalized_rcnn.py
    │   ├── backbone
    │   │   ├── __init__.py
    │   │   ├── vgg.py
    │   │   ├── backbone.py
    │   │   └── fpn.py
    │   ├── utils.py
    │   ├── registry.py
    │   ├── balanced_positive_negative_sampler.py
    │   ├── box_coder.py
    │   ├── make_layers.py
    │   └── matcher.py
    ├── utils
    │   ├── __init__.py
    │   ├── collect_env.py
    │   ├── cv2_util.py
    │   ├── imports.py
    │   ├── env.py
    │   ├── timer.py
    │   ├── registry.py
    │   ├── model_zoo.py
    │   ├── global_buffer.py
    │   ├── metric_logger.py
    │   ├── visualize_graph.py
    │   ├── miscellaneous.py
    │   ├── model_serialization.py
    │   ├── comm.py
    │   └── logger.py
    ├── structures
    │   ├── __init__.py
    │   └── image_list.py
    ├── layers
    │   ├── dcn
    │   │   ├── __init__.py
    │   │   └── deform_pool_func.py
    │   ├── nms.py
    │   ├── entropy_loss.py
    │   ├── kl_div_loss.py
    │   ├── smooth_l1_loss.py
    │   ├── batch_norm.py
    │   ├── _utils.py
    │   ├── __init__.py
    │   ├── roi_pool.py
    │   ├── roi_align.py
    │   ├── sigmoid_focal_loss.py
    │   └── label_smoothing_loss.py
    ├── config
    │   └── __init__.py
    ├── data
    │   ├── __init__.py
    │   ├── datasets
    │   │   ├── evaluation
    │   │   │   ├── vg
    │   │   │   │   ├── zeroshot_triplet.pytorch
    │   │   │   │   ├── __init__.py
    │   │   │   │   └── vg_stage_eval_utils.py
    │   │   │   ├── voc
    │   │   │   │   └── __init__.py
    │   │   │   ├── coco
    │   │   │   │   └── __init__.py
    │   │   │   └── __init__.py
    │   │   ├── __init__.py
    │   │   ├── concat_dataset.py
    │   │   ├── list_dataset.py
    │   │   ├── coco.py
    │   │   └── voc.py
    │   ├── transforms
    │   │   ├── __init__.py
    │   │   ├── build.py
    │   │   └── transforms.py
    │   ├── samplers
    │   │   ├── __init__.py
    │   │   ├── iteration_based_batch_sampler.py
    │   │   ├── distributed.py
    │   │   └── grouped_batch_sampler.py
    │   └── collate_batch.py
    ├── solver
    │   ├── __init__.py
    │   └── build.py
    └── csrc
    │   ├── cpu
    │       ├── vision.h
    │       └── nms_cpu.cpp
    │   ├── nms.h
    │   ├── SigmoidFocalLoss.h
    │   ├── vision.cpp
    │   ├── ROIPool.h
    │   ├── ROIAlign.h
    │   ├── deform_pool.h
    │   ├── cuda
    │       ├── deform_pool_cuda.cu
    │       └── nms.cu
    │   └── deform_conv.h
├── hetsgg.egg-info
    ├── dependency_links.txt
    ├── top_level.txt
    └── PKG-INFO
├── .gitignore
├── shell
    ├── hetsgg_test.sh
    ├── hetsgg_train_sggen_oi.sh
    ├── hetsgg_train_sggen_vg.sh
    ├── hetsgg_train_sgcls_vg.sh
    └── hetsgg_train_predcls_vg.sh
├── tools
    ├── runner.py
    ├── cityscapes
    │   └── instances2dict_with_polygons.py
    ├── detector_pretest_net.py
    └── relation_test_net.py
├── setup.py
└── Datasets
    └── OI-V4
        └── Category_Type_Info.json


/hetsgg/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/hetsgg/engine/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/hetsgg/modeling/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/hetsgg/utils/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/hetsgg/modeling/rpn/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/hetsgg/structures/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/hetsgg/layers/dcn/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/hetsgg/modeling/roi_heads/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/hetsgg.egg-info/dependency_links.txt:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/hetsgg.egg-info/top_level.txt:
--------------------------------------------------------------------------------
1 | hetsgg
2 | 


--------------------------------------------------------------------------------
/hetsgg/modeling/rpn/retinanet/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/hetsgg/modeling/roi_heads/box_head/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/hetsgg/modeling/roi_heads/mask_head/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/hetsgg/modeling/roi_heads/attribute_head/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/hetsgg/modeling/roi_heads/keypoint_head/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/hetsgg/modeling/roi_heads/relation_head/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/hetsgg/config/__init__.py:
--------------------------------------------------------------------------------
1 | from .defaults import _C as cfg
2 | 


--------------------------------------------------------------------------------
/hetsgg/modeling/roi_heads/relation_head/rel_proposal_network/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/hetsgg/data/__init__.py:
--------------------------------------------------------------------------------
1 | from .build import make_data_loader, get_dataset_statistics
2 | 


--------------------------------------------------------------------------------
/hetsgg/modeling/detector/__init__.py:
--------------------------------------------------------------------------------
1 | from .detectors import build_detection_model
2 | 


--------------------------------------------------------------------------------
/hetsgg/modeling/backbone/__init__.py:
--------------------------------------------------------------------------------
1 | from .backbone import build_backbone
2 | from . import fbnet
3 | 


--------------------------------------------------------------------------------
/hetsgg/layers/nms.py:
--------------------------------------------------------------------------------
1 | 
2 | from hetsgg import _C
3 | 
4 | from apex import amp
5 | 
6 | nms = amp.float_function(_C.nms)
7 | 
8 | 
9 | 


--------------------------------------------------------------------------------
/hetsgg/data/datasets/evaluation/vg/zeroshot_triplet.pytorch:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KanghoonYoon/hetsgg-torch/HEAD/hetsgg/data/datasets/evaluation/vg/zeroshot_triplet.pytorch


--------------------------------------------------------------------------------
/hetsgg/solver/__init__.py:
--------------------------------------------------------------------------------
1 | from .build import make_optimizer
2 | from .build import make_lr_scheduler
3 | from .lr_scheduler import WarmupMultiStepLR, WarmupReduceLROnPlateau
4 | 
5 | 


--------------------------------------------------------------------------------
/hetsgg.egg-info/PKG-INFO:
--------------------------------------------------------------------------------
 1 | Metadata-Version: 2.1
 2 | Name: hetsgg
 3 | Version: 0.1
 4 | Summary: A Toolkit for Scene Graph Generation
 5 | Home-page: 
 6 | Author: Anonymous
 7 | License: UNKNOWN
 8 | Platform: UNKNOWN
 9 | 
10 | UNKNOWN
11 | 
12 | 


--------------------------------------------------------------------------------
/hetsgg/data/transforms/__init__.py:
--------------------------------------------------------------------------------
1 | from .transforms import Compose
2 | from .transforms import Resize
3 | from .transforms import RandomHorizontalFlip
4 | from .transforms import ToTensor
5 | from .transforms import Normalize
6 | 
7 | from .build import build_transforms
8 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | build/
 2 | Datasets/VG/VG-SGG-with-attri.h5
 3 | Datasets/VG/VG-SGG-dicts-with-attri.json
 4 | Datasets/VG/image_data.json
 5 | Datasets/VG/VG_100k
 6 | Datasets/Glove/*.txt
 7 | Datasets/Glove/*.pt
 8 | configs/
 9 | shell/
10 | checkpoints/
11 | apex/
12 | cocoapi/
13 | __pycache__
14 | *.so


--------------------------------------------------------------------------------
/hetsgg/data/samplers/__init__.py:
--------------------------------------------------------------------------------
1 | from .distributed import DistributedSampler
2 | from .grouped_batch_sampler import GroupedBatchSampler
3 | from .iteration_based_batch_sampler import IterationBasedBatchSampler
4 | 
5 | __all__ = ["DistributedSampler", "GroupedBatchSampler", "IterationBasedBatchSampler"]
6 | 


--------------------------------------------------------------------------------
/hetsgg/data/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | from .coco import COCODataset
2 | from .voc import PascalVOCDataset
3 | from .concat_dataset import ConcatDataset
4 | from .visual_genome import VGDataset
5 | from .open_image import OIDataset
6 | 
7 | __all__ = ["COCODataset", "ConcatDataset", "PascalVOCDataset", "VGDataset", "OIDataset"]
8 | 


--------------------------------------------------------------------------------
/hetsgg/modeling/detector/detectors.py:
--------------------------------------------------------------------------------
 1 | from .generalized_rcnn import GeneralizedRCNN
 2 | 
 3 | 
 4 | _DETECTION_META_ARCHITECTURES = {"GeneralizedRCNN": GeneralizedRCNN}
 5 | 
 6 | 
 7 | def build_detection_model(cfg):
 8 |     meta_arch = _DETECTION_META_ARCHITECTURES[cfg.MODEL.META_ARCHITECTURE]
 9 |     return meta_arch(cfg)
10 | 


--------------------------------------------------------------------------------
/hetsgg/utils/collect_env.py:
--------------------------------------------------------------------------------
 1 | import PIL
 2 | 
 3 | from torch.utils.collect_env import get_pretty_env_info
 4 | 
 5 | 
 6 | def get_pil_version():
 7 |     return "\n        Pillow ({})".format(PIL.__version__)
 8 | 
 9 | 
10 | def collect_env_info():
11 |     env_str = get_pretty_env_info()
12 |     env_str += get_pil_version()
13 |     return env_str
14 | 


--------------------------------------------------------------------------------
/hetsgg/layers/entropy_loss.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | def entropy_loss(input, e=1e-9, reduction='sum'):
 5 |     assert len(input.shape) == 2
 6 |     loss = - (input * (input + e).log())
 7 | 
 8 |     if reduction == 'sum':
 9 |         loss = loss.sum(-1)
10 |     elif reduction == 'mean':
11 |         loss = loss.mean(-1)
12 | 
13 |     return loss.mean()


--------------------------------------------------------------------------------
/hetsgg/modeling/utils.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Miscellaneous utility functions
 3 | """
 4 | 
 5 | import torch
 6 | 
 7 | 
 8 | def cat(tensors, dim=0):
 9 |     """
10 |     Efficient version of torch.cat that avoids a copy if there is only a single element in a list
11 |     """
12 |     assert isinstance(tensors, (list, tuple))
13 |     if len(tensors) == 1:
14 |         return tensors[0]
15 |     return torch.cat(tensors, dim)
16 | 


--------------------------------------------------------------------------------
/hetsgg/data/datasets/evaluation/vg/__init__.py:
--------------------------------------------------------------------------------
 1 | from .vg_eval import do_vg_evaluation
 2 | 
 3 | 
 4 | def vg_evaluation(
 5 |     cfg,
 6 |     dataset,
 7 |     predictions,
 8 |     output_folder,
 9 |     logger,
10 |     iou_types,
11 |     **_
12 | ):
13 |     return do_vg_evaluation(
14 |         cfg=cfg,
15 |         dataset=dataset,
16 |         predictions=predictions,
17 |         output_folder=output_folder,
18 |         logger=logger,
19 |         iou_types=iou_types,
20 |     )
21 | 


--------------------------------------------------------------------------------
/hetsgg/layers/kl_div_loss.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | def kl_div_loss(input, target, e=1e-9, reduction='sum'):
 5 |     assert len(input.shape) == 2
 6 |     assert len(target.shape) == 2
 7 | 
 8 |     log_target = (target + e).log()
 9 |     log_input =  (input + e).log()
10 | 
11 |     loss = target.detach() * (log_target.detach() - log_input)
12 | 
13 |     if reduction == 'sum':
14 |         loss = loss.sum(-1)
15 |     elif reduction == 'mean':
16 |         loss = loss.mean(-1)
17 | 
18 |     return loss.mean()


--------------------------------------------------------------------------------
/hetsgg/layers/smooth_l1_loss.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | # TODO maybe push this to nn?
 5 | def smooth_l1_loss(input, target, beta=1. / 9, size_average=True):
 6 |     """
 7 |     very similar to the smooth_l1_loss from pytorch, but with
 8 |     the extra beta parameter
 9 |     """
10 |     n = torch.abs(input - target)
11 |     cond = n < beta
12 |     loss = torch.where(cond, 0.5 * n ** 2 / beta, n - 0.5 * beta)
13 |     if size_average:
14 |         return loss.mean()
15 |     return loss.sum()
16 | 
17 | 
18 | 


--------------------------------------------------------------------------------
/hetsgg/data/datasets/evaluation/voc/__init__.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | from .voc_eval import do_voc_evaluation
 4 | 
 5 | 
 6 | def voc_evaluation(cfg, dataset, predictions, output_folder, logger, box_only, **_):
 7 |     if box_only:
 8 |         logger.warning("voc evaluation doesn't support box_only, ignored.")
 9 |     logger.info("performing voc evaluation, ignored iou_types.")
10 |     return do_voc_evaluation(
11 |         dataset=dataset,
12 |         predictions=predictions,
13 |         output_folder=output_folder,
14 |         logger=logger,
15 |     )
16 | 


--------------------------------------------------------------------------------
/hetsgg/csrc/cpu/vision.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include <torch/extension.h>
 3 | 
 4 | 
 5 | at::Tensor ROIAlign_forward_cpu(const at::Tensor& input,
 6 |                                 const at::Tensor& rois,
 7 |                                 const float spatial_scale,
 8 |                                 const int pooled_height,
 9 |                                 const int pooled_width,
10 |                                 const int sampling_ratio);
11 | 
12 | 
13 | at::Tensor nms_cpu(const at::Tensor& dets,
14 |                    const at::Tensor& scores,
15 |                    const float threshold);
16 | 


--------------------------------------------------------------------------------
/hetsgg/modeling/registry.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from hetsgg.utils.registry import Registry
 3 | 
 4 | BACKBONES = Registry()
 5 | RPN_HEADS = Registry()
 6 | ROI_BOX_FEATURE_EXTRACTORS = Registry()
 7 | ROI_BOX_PREDICTOR = Registry()
 8 | ROI_ATTRIBUTE_FEATURE_EXTRACTORS = Registry()
 9 | ROI_ATTRIBUTE_PREDICTOR = Registry()
10 | ROI_KEYPOINT_FEATURE_EXTRACTORS = Registry()
11 | ROI_KEYPOINT_PREDICTOR = Registry()
12 | ROI_MASK_FEATURE_EXTRACTORS = Registry()
13 | ROI_MASK_PREDICTOR = Registry()
14 | ROI_RELATION_FEATURE_EXTRACTORS = Registry()
15 | ROI_RELATION_PREDICTOR = Registry()
16 | RELATION_CONFIDENCE_AWARE_MODULES = Registry()
17 | 


--------------------------------------------------------------------------------
/hetsgg/utils/cv2_util.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import cv2
 3 | 
 4 | 
 5 | def findContours(*args, **kwargs):
 6 |     """
 7 |     Wraps cv2.findContours to maintain compatiblity between versions
 8 |     3 and 4
 9 | 
10 |     Returns:
11 |         contours, hierarchy
12 |     """
13 |     if cv2.__version__.startswith('4'):
14 |         contours, hierarchy = cv2.findContours(*args, **kwargs)
15 |     elif cv2.__version__.startswith('3'):
16 |         _, contours, hierarchy = cv2.findContours(*args, **kwargs)
17 |     else:
18 |         raise AssertionError(
19 |             'cv2 must be either version 3 or 4 to call this method')
20 | 
21 |     return contours, hierarchy
22 | 


--------------------------------------------------------------------------------
/hetsgg/data/datasets/evaluation/coco/__init__.py:
--------------------------------------------------------------------------------
 1 | from .coco_eval import do_coco_evaluation
 2 | 
 3 | 
 4 | def coco_evaluation(
 5 |     cfg,
 6 |     dataset,
 7 |     predictions,
 8 |     output_folder,
 9 |     logger,
10 |     box_only,
11 |     iou_types,
12 |     expected_results,
13 |     expected_results_sigma_tol,
14 | ):
15 |     return do_coco_evaluation(
16 |         dataset=dataset,
17 |         predictions=predictions,
18 |         box_only=box_only,
19 |         output_folder=output_folder,
20 |         logger=logger,
21 |         iou_types=iou_types,
22 |         expected_results=expected_results,
23 |         expected_results_sigma_tol=expected_results_sigma_tol,
24 |     )
25 | 


--------------------------------------------------------------------------------
/hetsgg/utils/imports.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | if torch._six.PY37:
 4 |     import importlib
 5 |     import importlib.util
 6 |     import sys
 7 | 
 8 | 
 9 |     def import_file(module_name, file_path, make_importable=False):
10 |         spec = importlib.util.spec_from_file_location(module_name, file_path)
11 |         module = importlib.util.module_from_spec(spec)
12 |         spec.loader.exec_module(module)
13 |         if make_importable:
14 |             sys.modules[module_name] = module
15 |         return module
16 | else:
17 |     import imp
18 | 
19 |     def import_file(module_name, file_path, make_importable=None):
20 |         module = imp.load_source(module_name, file_path)
21 |         return module
22 | 


--------------------------------------------------------------------------------
/hetsgg/csrc/nms.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include "cpu/vision.h"
 3 | 
 4 | #ifdef WITH_CUDA
 5 | #include "cuda/vision.h"
 6 | #endif
 7 | 
 8 | 
 9 | at::Tensor nms(const at::Tensor& dets,
10 |                const at::Tensor& scores,
11 |                const float threshold) {
12 | 
13 |   if (dets.type().is_cuda()) {
14 | #ifdef WITH_CUDA
15 |     // TODO raise error if not compiled with CUDA
16 |     if (dets.numel() == 0)
17 |       return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU));
18 |     auto b = at::cat({dets, scores.unsqueeze(1)}, 1);
19 |     return nms_cuda(b, threshold);
20 | #else
21 |     AT_ERROR("Not compiled with GPU support");
22 | #endif
23 |   }
24 | 
25 |   at::Tensor result = nms_cpu(dets, scores, threshold);
26 |   return result;
27 | }
28 | 


--------------------------------------------------------------------------------
/hetsgg/data/datasets/concat_dataset.py:
--------------------------------------------------------------------------------
 1 | import bisect
 2 | 
 3 | from torch.utils.data.dataset import ConcatDataset as _ConcatDataset
 4 | 
 5 | 
 6 | class ConcatDataset(_ConcatDataset):
 7 |     """
 8 |     Same as torch.utils.data.dataset.ConcatDataset, but exposes an extra
 9 |     method for querying the sizes of the image
10 |     """
11 | 
12 |     def get_idxs(self, idx):
13 |         dataset_idx = bisect.bisect_right(self.cumulative_sizes, idx)
14 |         if dataset_idx == 0:
15 |             sample_idx = idx
16 |         else:
17 |             sample_idx = idx - self.cumulative_sizes[dataset_idx - 1]
18 |         return dataset_idx, sample_idx
19 | 
20 |     def get_img_info(self, idx):
21 |         dataset_idx, sample_idx = self.get_idxs(idx)
22 |         return self.datasets[dataset_idx].get_img_info(sample_idx)
23 | 


--------------------------------------------------------------------------------
/hetsgg/modeling/backbone/vgg.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from collections import namedtuple
 3 | 
 4 | import torch
 5 | import torch.nn.functional as F
 6 | from torch import nn
 7 | 
 8 | import torchvision.models as models
 9 | from hetsgg.layers import FrozenBatchNorm2d
10 | from hetsgg.layers import Conv2d
11 | from hetsgg.layers import DFConv2d
12 | from hetsgg.modeling.make_layers import group_norm
13 | from hetsgg.utils.registry import Registry
14 | 
15 | 
16 | class VGG16(nn.Module):
17 |     def __init__(self, cfg):
18 |         super(VGG16, self).__init__()
19 |         vgg = models.vgg16(pretrained=True)
20 |         self.conv_body = nn.Sequential(*list(vgg.features._modules.values())[:-1])
21 | 
22 |     def forward(self, x):
23 |         output = []
24 |         output.append(self.conv_body(x))
25 |         return output
26 | 
27 | 


--------------------------------------------------------------------------------
/hetsgg/utils/env.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from hetsgg.utils.imports import import_file
 4 | 
 5 | 
 6 | def setup_environment():
 7 | 
 8 |     custom_module_path = os.environ.get("TORCH_DETECTRON_ENV_MODULE")
 9 |     if custom_module_path:
10 |         setup_custom_environment(custom_module_path)
11 |     else:
12 |         pass
13 | 
14 | 
15 | def setup_custom_environment(custom_module_path):
16 | 
17 |     module = import_file("hetsgg.utils.env.custom_module", custom_module_path)
18 |     assert hasattr(module, "setup_environment") and callable(
19 |         module.setup_environment
20 |     ), (
21 |         "Custom environment module defined in {} does not have the "
22 |         "required callable attribute 'setup_environment'."
23 |     ).format(
24 |         custom_module_path
25 |     )
26 |     module.setup_environment()
27 | 
28 | 
29 | setup_environment()
30 | 


--------------------------------------------------------------------------------
/hetsgg/engine/trainer.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.distributed as dist
 3 | 
 4 | from hetsgg.utils.comm import get_world_size
 5 | 
 6 | 
 7 | def reduce_loss_dict(loss_dict):
 8 |     """
 9 |     Reduce the loss dictionary from all processes so that process with rank
10 |     0 has the averaged results. Returns a dict with the same fields as
11 |     loss_dict, after reduction.
12 |     """
13 |     world_size = get_world_size()
14 |     if world_size < 2:
15 |         return loss_dict
16 |     with torch.no_grad():
17 |         loss_names = []
18 |         all_losses = []
19 |         for k in sorted(loss_dict.keys()):
20 |             loss_names.append(k)
21 |             all_losses.append(loss_dict[k])
22 |         all_losses = torch.stack(all_losses, dim=0)
23 |         dist.reduce(all_losses, dst=0)
24 |         if dist.get_rank() == 0:
25 |             all_losses /= world_size
26 |         reduced_losses = {k: v for k, v in zip(loss_names, all_losses)}
27 |     return reduced_losses
28 | 


--------------------------------------------------------------------------------
/hetsgg/data/collate_batch.py:
--------------------------------------------------------------------------------
 1 | from hetsgg.structures.image_list import to_image_list
 2 | 
 3 | 
 4 | class BatchCollator(object):
 5 |     """
 6 |     From a list of samples from the dataset,
 7 |     returns the batched images and targets.
 8 |     This should be passed to the DataLoader
 9 |     """
10 | 
11 |     def __init__(self, size_divisible=0):
12 |         self.size_divisible = size_divisible
13 | 
14 |     def __call__(self, batch):
15 |         transposed_batch = list(zip(*batch))
16 |         images = to_image_list(transposed_batch[0], self.size_divisible)
17 |         targets = transposed_batch[1]
18 |         img_ids = transposed_batch[2]
19 |         return images, targets, img_ids
20 | 
21 | 
22 | class BBoxAugCollator(object):
23 |     """
24 |     From a list of samples from the dataset,
25 |     returns the images and targets.
26 |     Images should be converted to batched images in `im_detect_bbox_aug`
27 |     """
28 | 
29 |     def __call__(self, batch):
30 |         return list(zip(*batch))
31 | 
32 | 


--------------------------------------------------------------------------------
/hetsgg/data/datasets/list_dataset.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Simple dataset class that wraps a list of path names
 3 | """
 4 | 
 5 | from PIL import Image
 6 | 
 7 | from hetsgg.structures.bounding_box import BoxList
 8 | 
 9 | 
10 | class ListDataset(object):
11 |     def __init__(self, image_lists, transforms=None):
12 |         self.image_lists = image_lists
13 |         self.transforms = transforms
14 | 
15 |     def __getitem__(self, item):
16 |         img = Image.open(self.image_lists[item]).convert("RGB")
17 | 
18 |         # dummy target
19 |         w, h = img.size
20 |         target = BoxList([[0, 0, w, h]], img.size, mode="xyxy")
21 | 
22 |         if self.transforms is not None:
23 |             img, target = self.transforms(img, target)
24 | 
25 |         return img, target
26 | 
27 |     def __len__(self):
28 |         return len(self.image_lists)
29 | 
30 |     def get_img_info(self, item):
31 |         """
32 |         Return the image dimensions for the image, without
33 |         loading and pre-processing it
34 |         """
35 |         pass
36 | 


--------------------------------------------------------------------------------
/shell/hetsgg_test.sh:
--------------------------------------------------------------------------------
 1 | export CUDA_VISIBLE_DEVICES="6"
 2 | export num_gpu=2
 3 | export use_multi_gpu=false
 4 | export task='sgcls'
 5 | 
 6 | export test_list=('0045000') # checkpoint
 7 | 
 8 | export save_result=False
 9 | export output_dir="/checkpoints/" # Please input the checkpoint directory
10 | 
11 | if $use_multi_gpu;then
12 |     for name in ${test_list[@]}
13 |     do
14 |         python -m torch.distributed.launch --master_port 10025 --nproc_per_node=${num_gpu} tools/relation_test_net.py --config-file "${output_dir}/config.yml" \
15 |             TEST.IMS_PER_BATCH 16 \
16 |             TEST.SAVE_RE/SULT ${save_result} \
17 |             OUTPUT_DIR ${output_dir} \
18 |             MODEL.WEIGHT "${output_dir}/model_${name}.pth"
19 |     done
20 | else
21 |     for name in ${test_list[@]}
22 |     do
23 |         python tools/relation_test_net.py --config-file "${output_dir}/config.yml"  \
24 |             TEST.IMS_PER_BATCH 8 \
25 |             TEST.SAVE_RE/SULT ${save_result} \
26 |             OUTPUT_DIR ${output_dir} \
27 |             MODEL.WEIGHT "${output_dir}/model_${name}.pth"
28 |     done
29 | fi


--------------------------------------------------------------------------------
/hetsgg/utils/timer.py:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | import time
 4 | import datetime
 5 | 
 6 | 
 7 | class Timer(object):
 8 |     def __init__(self):
 9 |         self.reset()
10 | 
11 |     @property
12 |     def average_time(self):
13 |         return self.total_time / self.calls if self.calls > 0 else 0.0
14 | 
15 |     def tic(self):
16 |         self.start_time = time.time()
17 | 
18 |     def toc(self, average=True):
19 |         self.add(time.time() - self.start_time)
20 |         if average:
21 |             return self.average_time
22 |         else:
23 |             return self.diff
24 | 
25 |     def add(self, time_diff):
26 |         self.diff = time_diff
27 |         self.total_time += self.diff
28 |         self.calls += 1
29 | 
30 |     def reset(self):
31 |         self.total_time = 0.0
32 |         self.calls = 0
33 |         self.start_time = 0.0
34 |         self.diff = 0.0
35 | 
36 |     def avg_time_str(self):
37 |         time_str = str(datetime.timedelta(seconds=self.average_time))
38 |         return time_str
39 | 
40 | 
41 | def get_time_str(time_diff):
42 |     time_str = str(datetime.timedelta(seconds=time_diff))
43 |     return time_str
44 | 


--------------------------------------------------------------------------------
/hetsgg/layers/batch_norm.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn
 3 | 
 4 | 
 5 | class FrozenBatchNorm2d(nn.Module):
 6 |     """
 7 |     BatchNorm2d where the batch statistics and the affine parameters
 8 |     are fixed
 9 |     """
10 | 
11 |     def __init__(self, n):
12 |         super(FrozenBatchNorm2d, self).__init__()
13 |         self.register_buffer("weight", torch.ones(n))
14 |         self.register_buffer("bias", torch.zeros(n))
15 |         self.register_buffer("running_mean", torch.zeros(n))
16 |         self.register_buffer("running_var", torch.ones(n))
17 | 
18 |     def forward(self, x):
19 |         # Cast all fixed parameters to half() if necessary
20 |         if x.dtype == torch.float16:
21 |             self.weight = self.weight.half()
22 |             self.bias = self.bias.half()
23 |             self.running_mean = self.running_mean.half()
24 |             self.running_var = self.running_var.half()
25 | 
26 |         scale = self.weight * self.running_var.rsqrt()
27 |         bias = self.bias - self.running_mean * scale
28 |         scale = scale.reshape(1, -1, 1, 1)
29 |         bias = bias.reshape(1, -1, 1, 1)
30 |         return x * scale + bias
31 | 


--------------------------------------------------------------------------------
/hetsgg/csrc/SigmoidFocalLoss.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "cpu/vision.h"
 4 | 
 5 | #ifdef WITH_CUDA
 6 | #include "cuda/vision.h"
 7 | #endif
 8 | 
 9 | // Interface for Python
10 | at::Tensor SigmoidFocalLoss_forward(
11 | 		const at::Tensor& logits,
12 |                 const at::Tensor& targets,
13 | 		const int num_classes, 
14 | 		const float gamma, 
15 | 		const float alpha) {
16 |   if (logits.type().is_cuda()) {
17 | #ifdef WITH_CUDA
18 |     return SigmoidFocalLoss_forward_cuda(logits, targets, num_classes, gamma, alpha);
19 | #else
20 |     AT_ERROR("Not compiled with GPU support");
21 | #endif
22 |   }
23 |   AT_ERROR("Not implemented on the CPU");
24 | }
25 | 
26 | at::Tensor SigmoidFocalLoss_backward(
27 | 			     const at::Tensor& logits,
28 |                              const at::Tensor& targets,
29 | 			     const at::Tensor& d_losses,
30 | 			     const int num_classes,
31 | 			     const float gamma,
32 | 			     const float alpha) {
33 |   if (logits.type().is_cuda()) {
34 | #ifdef WITH_CUDA
35 |     return SigmoidFocalLoss_backward_cuda(logits, targets, d_losses, num_classes, gamma, alpha);
36 | #else
37 |     AT_ERROR("Not compiled with GPU support");
38 | #endif
39 |   }
40 |   AT_ERROR("Not implemented on the CPU");
41 | }
42 | 


--------------------------------------------------------------------------------
/hetsgg/data/samplers/iteration_based_batch_sampler.py:
--------------------------------------------------------------------------------
 1 | from torch.utils.data.sampler import BatchSampler
 2 | 
 3 | 
 4 | class IterationBasedBatchSampler(BatchSampler):
 5 |     """
 6 |     Wraps a BatchSampler, resampling from it until
 7 |     a specified number of iterations have been sampled
 8 |     """
 9 | 
10 |     def __init__(self, batch_sampler, num_iterations, start_iter=0):
11 |         self.batch_sampler = batch_sampler
12 |         self.num_iterations = num_iterations
13 |         self.start_iter = start_iter
14 | 
15 |     def __iter__(self):
16 |         iteration = self.start_iter
17 |         while iteration <= self.num_iterations:
18 |             # if the underlying sampler has a set_epoch method, like
19 |             # DistributedSampler, used for making each process see
20 |             # a different split of the dataset, then set it
21 |             if hasattr(self.batch_sampler.sampler, "set_epoch"):
22 |                 self.batch_sampler.sampler.set_epoch(iteration)
23 |             for batch in self.batch_sampler:
24 |                 iteration += 1
25 |                 if iteration > self.num_iterations:
26 |                     break
27 |                 yield batch
28 | 
29 |     def __len__(self):
30 |         return self.num_iterations
31 | 


--------------------------------------------------------------------------------
/hetsgg/layers/_utils.py:
--------------------------------------------------------------------------------
 1 | import glob
 2 | import os.path
 3 | 
 4 | import torch
 5 | 
 6 | try:
 7 |     from torch.utils.cpp_extension import load as load_ext
 8 |     from torch.utils.cpp_extension import CUDA_HOME
 9 | except ImportError:
10 |     raise ImportError("The cpp layer extensions requires PyTorch 0.4 or higher")
11 | 
12 | 
13 | def _load_C_extensions():
14 |     this_dir = os.path.dirname(os.path.abspath(__file__))
15 |     this_dir = os.path.dirname(this_dir)
16 |     this_dir = os.path.join(this_dir, "csrc")
17 | 
18 |     main_file = glob.glob(os.path.join(this_dir, "*.cpp"))
19 |     source_cpu = glob.glob(os.path.join(this_dir, "cpu", "*.cpp"))
20 |     source_cuda = glob.glob(os.path.join(this_dir, "cuda", "*.cu"))
21 | 
22 |     source = main_file + source_cpu
23 | 
24 |     extra_cflags = []
25 |     if torch.cuda.is_available() and CUDA_HOME is not None:
26 |         source.extend(source_cuda)
27 |         extra_cflags = ["-DWITH_CUDA"]
28 |     source = [os.path.join(this_dir, s) for s in source]
29 |     extra_include_paths = [this_dir]
30 |     return load_ext(
31 |         "torchvision",
32 |         source,
33 |         extra_cflags=extra_cflags,
34 |         extra_include_paths=extra_include_paths,
35 |     )
36 | 
37 | 
38 | _C = _load_C_extensions()
39 | 


--------------------------------------------------------------------------------
/tools/runner.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import logging
 3 | import os
 4 | import random
 5 | import time
 6 | 
 7 | import gpustat
 8 | import torch
 9 | import numpy as np
10 | 
11 | 
12 | def start():
13 |     mem = None
14 |     gpu_id = int(os.environ["CUDA_VISIBLE_DEVICES"].split(",")[0])
15 | 
16 |     while True:
17 |         info = gpustat.core.GPUStatCollection.new_query()
18 |         gpu_info = info.jsonify()['gpus'][gpu_id]
19 |         u_ratio = gpu_info['utilization.gpu']
20 |         mem_ratio = gpu_info['memory.used'] / gpu_info['memory.total']
21 |         # print("add meme")
22 |         if mem is None:
23 |             mem = torch.rand((25000, 8196), device=torch.torch.device("cuda"))
24 | 
25 |         if u_ratio < 30:
26 |             if  mem_ratio < 0.50 :
27 |                     mem = torch.cat((mem, torch.rand((25000, 8196), device=torch.torch.device("cuda")))).cuda()
28 |             elif mem_ratio < 0.95 :
29 |                 mem = torch.cat((mem, torch.rand((10000, 8196), device=torch.torch.device("cuda")))).cuda()
30 | 
31 |             else:
32 |                 if mem is not None:
33 |                     for _ in range(100):
34 |                         mem *= mem
35 |                         mem /= mem
36 |                     time.sleep(0.001)
37 | 
38 | 
39 | 
40 | if __name__ == "__main__":
41 |     start()


--------------------------------------------------------------------------------
/hetsgg/modeling/rpn/utils.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Utility functions minipulating the prediction layers
 3 | """
 4 | 
 5 | from ..utils import cat
 6 | 
 7 | import torch
 8 | 
 9 | def permute_and_flatten(layer, N, A, C, H, W):
10 |     layer = layer.view(N, -1, C, H, W)
11 |     layer = layer.permute(0, 3, 4, 1, 2)
12 |     layer = layer.reshape(N, -1, C)
13 |     return layer
14 | 
15 | 
16 | def concat_box_prediction_layers(box_cls, box_regression):
17 |     box_cls_flattened = []
18 |     box_regression_flattened = []
19 | 
20 |     for box_cls_per_level, box_regression_per_level in zip(
21 |         box_cls, box_regression
22 |     ):
23 |         N, AxC, H, W = box_cls_per_level.shape
24 |         Ax4 = box_regression_per_level.shape[1]
25 |         A = Ax4 // 4
26 |         C = AxC // A
27 |         box_cls_per_level = permute_and_flatten(
28 |             box_cls_per_level, N, A, C, H, W
29 |         )
30 |         box_cls_flattened.append(box_cls_per_level)
31 | 
32 |         box_regression_per_level = permute_and_flatten(
33 |             box_regression_per_level, N, A, 4, H, W
34 |         )
35 |         box_regression_flattened.append(box_regression_per_level)
36 | 
37 |     box_cls = cat(box_cls_flattened, dim=1).reshape(-1, C)
38 |     box_regression = cat(box_regression_flattened, dim=1).reshape(-1, 4)
39 |     return box_cls, box_regression
40 | 


--------------------------------------------------------------------------------
/hetsgg/data/datasets/evaluation/__init__.py:
--------------------------------------------------------------------------------
 1 | from hetsgg.data import datasets
 2 | 
 3 | from .coco import coco_evaluation
 4 | from .oi import oi_evaluation
 5 | from .voc import voc_evaluation
 6 | from .vg import vg_evaluation
 7 | 
 8 | 
 9 | def evaluate(cfg, dataset, predictions, output_folder, logger, **kwargs):
10 |     """evaluate dataset using different methods based on dataset type.
11 |     Args:
12 |         dataset: Dataset object
13 |         predictions(list[BoxList]): each item in the list represents the
14 |             prediction results for one image.
15 |         output_folder: output folder, to save evaluation files or results.
16 |         **kwargs: other args.
17 |     Returns:
18 |         evaluation result
19 |     """
20 |     args = dict(
21 |         cfg=cfg, dataset=dataset, predictions=predictions, output_folder=output_folder, logger=logger, **kwargs
22 |     )
23 |     if isinstance(dataset, datasets.COCODataset):
24 |         return coco_evaluation(**args)
25 |     elif isinstance(dataset, datasets.PascalVOCDataset):
26 |         return voc_evaluation(**args)
27 |     elif isinstance(dataset, datasets.VGDataset):
28 |         return vg_evaluation(**args)
29 |     elif isinstance(dataset, datasets.OIDataset):
30 |         return oi_evaluation(**args)
31 |     else:
32 |         dataset_name = dataset.__class__.__name__
33 |         raise NotImplementedError("Unsupported dataset type {}.".format(dataset_name))
34 | 


--------------------------------------------------------------------------------
/hetsgg/modeling/roi_heads/keypoint_head/roi_keypoint_predictors.py:
--------------------------------------------------------------------------------
 1 | from torch import nn
 2 | 
 3 | from hetsgg import layers
 4 | from hetsgg.modeling import registry
 5 | 
 6 | 
 7 | @registry.ROI_KEYPOINT_PREDICTOR.register("KeypointRCNNPredictor")
 8 | class KeypointRCNNPredictor(nn.Module):
 9 |     def __init__(self, cfg, in_channels):
10 |         super(KeypointRCNNPredictor, self).__init__()
11 |         input_features = in_channels
12 |         num_keypoints = cfg.MODEL.ROI_KEYPOINT_HEAD.NUM_CLASSES
13 |         deconv_kernel = 4
14 |         self.kps_score_lowres = layers.ConvTranspose2d(
15 |             input_features,
16 |             num_keypoints,
17 |             deconv_kernel,
18 |             stride=2,
19 |             padding=deconv_kernel // 2 - 1,
20 |         )
21 |         nn.init.kaiming_normal_(
22 |             self.kps_score_lowres.weight, mode="fan_out", nonlinearity="relu"
23 |         )
24 |         nn.init.constant_(self.kps_score_lowres.bias, 0)
25 |         self.up_scale = 2
26 |         self.out_channels = num_keypoints
27 | 
28 |     def forward(self, x):
29 |         x = self.kps_score_lowres(x)
30 |         x = layers.interpolate(
31 |             x, scale_factor=self.up_scale, mode="bilinear", align_corners=False
32 |         )
33 |         return x
34 | 
35 | 
36 | def make_roi_keypoint_predictor(cfg, in_channels):
37 |     func = registry.ROI_KEYPOINT_PREDICTOR[cfg.MODEL.ROI_KEYPOINT_HEAD.PREDICTOR]
38 |     return func(cfg, in_channels)
39 | 


--------------------------------------------------------------------------------
/hetsgg/utils/registry.py:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | def _register_generic(module_dict, module_name, module):
 4 |     assert module_name not in module_dict
 5 |     module_dict[module_name] = module
 6 | 
 7 | 
 8 | class Registry(dict):
 9 |     '''
10 |     A helper class for managing registering modules, it extends a dictionary
11 |     and provides a register functions.
12 | 
13 |     Eg. creeting a registry:
14 |         some_registry = Registry({"default": default_module})
15 | 
16 |     There're two ways of registering new modules:
17 |     1): normal way is just calling register function:
18 |         def foo():
19 |             ...
20 |         some_registry.register("foo_module", foo)
21 |     2): used as decorator when declaring the module:
22 |         @some_registry.register("foo_module")
23 |         @some_registry.register("foo_modeul_nickname")
24 |         def foo():
25 |             ...
26 | 
27 |     Access of module is just like using a dictionary, eg:
28 |         f = some_registry["foo_modeul"]
29 |     '''
30 |     def __init__(self, *args, **kwargs):
31 |         super(Registry, self).__init__(*args, **kwargs)
32 | 
33 |     def register(self, module_name, module=None):
34 |         # used as function call
35 |         if module is not None:
36 |             _register_generic(self, module_name, module)
37 |             return
38 | 
39 |         # used as decorator
40 |         def register_fn(fn):
41 |             _register_generic(self, module_name, fn)
42 |             return fn
43 | 
44 |         return register_fn
45 | 


--------------------------------------------------------------------------------
/hetsgg/csrc/vision.cpp:
--------------------------------------------------------------------------------
 1 | #include "nms.h"
 2 | #include "ROIAlign.h"
 3 | #include "ROIPool.h"
 4 | #include "SigmoidFocalLoss.h"
 5 | #include "deform_conv.h"
 6 | #include "deform_pool.h"
 7 | 
 8 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
 9 |   m.def("nms", &nms, "non-maximum suppression");
10 |   m.def("roi_align_forward", &ROIAlign_forward, "ROIAlign_forward");
11 |   m.def("roi_align_backward", &ROIAlign_backward, "ROIAlign_backward");
12 |   m.def("roi_pool_forward", &ROIPool_forward, "ROIPool_forward");
13 |   m.def("roi_pool_backward", &ROIPool_backward, "ROIPool_backward");
14 |   m.def("sigmoid_focalloss_forward", &SigmoidFocalLoss_forward, "SigmoidFocalLoss_forward");
15 |   m.def("sigmoid_focalloss_backward", &SigmoidFocalLoss_backward, "SigmoidFocalLoss_backward");
16 |   // dcn-v2
17 |   m.def("deform_conv_forward", &deform_conv_forward, "deform_conv_forward");
18 |   m.def("deform_conv_backward_input", &deform_conv_backward_input, "deform_conv_backward_input");
19 |   m.def("deform_conv_backward_parameters", &deform_conv_backward_parameters, "deform_conv_backward_parameters");
20 |   m.def("modulated_deform_conv_forward", &modulated_deform_conv_forward, "modulated_deform_conv_forward");
21 |   m.def("modulated_deform_conv_backward", &modulated_deform_conv_backward, "modulated_deform_conv_backward");
22 |   m.def("deform_psroi_pooling_forward", &deform_psroi_pooling_forward, "deform_psroi_pooling_forward");
23 |   m.def("deform_psroi_pooling_backward", &deform_psroi_pooling_backward, "deform_psroi_pooling_backward");
24 | }


--------------------------------------------------------------------------------
/hetsgg/data/transforms/build.py:
--------------------------------------------------------------------------------
 1 | from . import transforms as T
 2 | 
 3 | 
 4 | def build_transforms(cfg, is_train=True):
 5 |     if is_train:
 6 |         min_size = cfg.INPUT.MIN_SIZE_TRAIN
 7 |         max_size = cfg.INPUT.MAX_SIZE_TRAIN
 8 |         flip_horizontal_prob = 0.5  # cfg.INPUT.FLIP_PROB_TRAIN
 9 |         flip_vertical_prob = cfg.INPUT.VERTICAL_FLIP_PROB_TRAIN
10 |         brightness = cfg.INPUT.BRIGHTNESS
11 |         contrast = cfg.INPUT.CONTRAST
12 |         saturation = cfg.INPUT.SATURATION
13 |         hue = cfg.INPUT.HUE
14 |     else:
15 |         min_size = cfg.INPUT.MIN_SIZE_TEST
16 |         max_size = cfg.INPUT.MAX_SIZE_TEST
17 |         flip_horizontal_prob = 0.0
18 |         flip_vertical_prob = 0.0
19 |         brightness = 0.0
20 |         contrast = 0.0
21 |         saturation = 0.0
22 |         hue = 0.0
23 | 
24 |     to_bgr255 = cfg.INPUT.TO_BGR255
25 |     normalize_transform = T.Normalize(
26 |         mean=cfg.INPUT.PIXEL_MEAN, std=cfg.INPUT.PIXEL_STD, to_bgr255=to_bgr255
27 |     )
28 |     color_jitter = T.ColorJitter(
29 |         brightness=brightness,
30 |         contrast=contrast,
31 |         saturation=saturation,
32 |         hue=hue,
33 |     )
34 | 
35 |     transform = T.Compose(
36 |         [
37 |             color_jitter,
38 |             T.Resize(min_size, max_size),
39 |             T.RandomHorizontalFlip(flip_horizontal_prob),
40 |             T.RandomVerticalFlip(flip_vertical_prob),
41 |             T.ToTensor(),
42 |             normalize_transform,
43 |         ]
44 |     )
45 |     return transform
46 | 


--------------------------------------------------------------------------------
/shell/hetsgg_train_sggen_oi.sh:
--------------------------------------------------------------------------------
 1 | export CUDA_VISIBLE_DEVICES="4"
 2 | export num_gpu=2
 3 | export use_multi_gpu=false
 4 | export use_obj_refine=False 
 5 | export task='sggen'
 6 | 
 7 | export config=oi_v6 # oi_v4, oi_v6
 8 | export output_dir="checkpoints/${task}-HetSGGPredictor-${config}"
 9 | 
10 | export path_faster_rcnn=''
11 | 
12 | if $use_multi_gpu;then
13 |     python -m torch.distributed.launch --master_port 10023 --nproc_per_node=${num_gpu} tools/relation_train_net.py --config-file "configs/relHetSGG_${config}.yaml" \
14 |         SOLVER.IMS_PER_BATCH 18 \
15 |         TEST.IMS_PER_BATCH 12 \
16 |         OUTPUT_DIR ${output_dir} \
17 |         MODEL.ROI_RELATION_HEAD.USE_GT_OBJECT_LABEL False \
18 |         MODEL.ROI_RELATION_HEAD.USE_GT_BOX False \
19 |         MODEL.ROI_RELATION_HEAD.REL_OBJ_MULTI_TASK_LOSS ${use_obj_refine} \
20 |         MODEL.ROI_RELATION_HEAD.OBJECT_CLASSIFICATION_REFINE ${use_obj_refine} \
21 |         MODEL.PRETRAINED_DETECTOR_CKPT ${path_faster_rcnn}
22 | else
23 |     python  tools/relation_train_net.py --config-file "configs/relHetSGG_${config}.yaml" \
24 |         SOLVER.IMS_PER_BATCH 9 \
25 |         TEST.IMS_PER_BATCH 6 \
26 |         OUTPUT_DIR ${output_dir} \
27 |         MODEL.ROI_RELATION_HEAD.USE_GT_OBJECT_LABEL False \
28 |         MODEL.ROI_RELATION_HEAD.USE_GT_BOX False \
29 |         MODEL.ROI_RELATION_HEAD.REL_OBJ_MULTI_TASK_LOSS ${use_obj_refine} \
30 |         MODEL.ROI_RELATION_HEAD.OBJECT_CLASSIFICATION_REFINE ${use_obj_refine} \
31 |         MODEL.PRETRAINED_DETECTOR_CKPT ${path_faster_rcnn}
32 | fi
33 | 
34 | 
35 | 
36 | 


--------------------------------------------------------------------------------
/hetsgg/utils/model_zoo.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | 
 4 | try:
 5 |     from torch.hub import _download_url_to_file
 6 |     from torch.hub import urlparse
 7 |     from torch.hub import HASH_REGEX
 8 | except ImportError:
 9 |     from torch.utils.model_zoo import _download_url_to_file
10 |     from torch.utils.model_zoo import urlparse
11 |     from torch.utils.model_zoo import HASH_REGEX
12 | 
13 | from hetsgg.utils.comm import is_main_process
14 | from hetsgg.utils.comm import synchronize
15 | 
16 | 
17 | def cache_url(url, model_dir=None, progress=True):
18 | 
19 |     if model_dir is None:
20 |         torch_home = os.path.expanduser(os.getenv("TORCH_HOME", "~/.torch"))
21 |         model_dir = os.getenv("TORCH_MODEL_ZOO", os.path.join(torch_home, "models"))
22 |     if not os.path.exists(model_dir):
23 |         os.makedirs(model_dir)
24 |     parts = urlparse(url)
25 |     filename = os.path.basename(parts.path)
26 |     if filename == "model_final.pkl":
27 |         filename = parts.path.replace("/", "_")
28 |     cached_file = os.path.join(model_dir, filename)
29 |     if not os.path.exists(cached_file) and is_main_process():
30 |         sys.stderr.write('Downloading: "{}" to {}\n'.format(url, cached_file))
31 |         hash_prefix = HASH_REGEX.search(filename)
32 |         if hash_prefix is not None:
33 |             hash_prefix = hash_prefix.group(1)
34 |             if len(hash_prefix) < 6:
35 |                 hash_prefix = None
36 |         _download_url_to_file(url, cached_file, hash_prefix, progress=progress)
37 |     synchronize()
38 |     return cached_file
39 | 


--------------------------------------------------------------------------------
/shell/hetsgg_train_sggen_vg.sh:
--------------------------------------------------------------------------------
 1 | export CUDA_VISIBLE_DEVICES="4"
 2 | export num_gpu=1
 3 | export use_multi_gpu=false
 4 | export use_obj_refine=False 
 5 | export task='sggen'
 6 | 
 7 | export output_dir="checkpoints/${task}-HetSGGPredictor-vg"
 8 | 
 9 | export model_config="relHetSGG_vg" # relHetSGG_vg, relHetSGGp_vg
10 | 
11 | 
12 | if $use_multi_gpu;then
13 |     # Multi GPU -sgcls Task
14 |     python -m torch.distributed.launch --master_port 10023 --nproc_per_node=${num_gpu} tools/relation_train_net.py --config-file "configs/${model_config}.yaml" \
15 |         SOLVER.IMS_PER_BATCH 18 \
16 |         TEST.IMS_PER_BATCH 12 \
17 |         OUTPUT_DIR ${output_dir} \
18 |         MODEL.ROI_RELATION_HEAD.USE_GT_OBJECT_LABEL False \
19 |         MODEL.ROI_RELATION_HEAD.USE_GT_BOX False \
20 |         MODEL.ROI_RELATION_HEAD.REL_OBJ_MULTI_TASK_LOSS ${use_obj_refine} \
21 |         MODEL.ROI_RELATION_HEAD.OBJECT_CLASSIFICATION_REFINE ${use_obj_refine} \
22 |         MODEL.PRETRAINED_DETECTOR_CKPT ${path_faster_rcnn}
23 | else
24 |     # Single GPU
25 |     python  tools/relation_train_net.py --config-file "configs/${model_config}.yaml" \
26 |         SOLVER.IMS_PER_BATCH 9 \
27 |         TEST.IMS_PER_BATCH 6 \
28 |         OUTPUT_DIR ${output_dir} \
29 |         MODEL.ROI_RELATION_HEAD.USE_GT_OBJECT_LABEL False \
30 |         MODEL.ROI_RELATION_HEAD.USE_GT_BOX False \
31 |         MODEL.ROI_RELATION_HEAD.REL_OBJ_MULTI_TASK_LOSS ${use_obj_refine} \
32 |         MODEL.ROI_RELATION_HEAD.OBJECT_CLASSIFICATION_REFINE ${use_obj_refine} \
33 |         MODEL.PRETRAINED_DETECTOR_CKPT ${path_faster_rcnn}
34 | fi
35 | 


--------------------------------------------------------------------------------
/shell/hetsgg_train_sgcls_vg.sh:
--------------------------------------------------------------------------------
 1 | export CUDA_VISIBLE_DEVICES="0"
 2 | export num_gpu=1
 3 | export use_multi_gpu=false
 4 | export use_obj_refine=True
 5 | export task='sgcls'
 6 | 
 7 | export model_config="relHetSGG_vg" # relHetSGG_vg, relHetSGGp_vg
 8 | export output_dir="checkpoints/${task}-HetSGGPredictor-vg"
 9 | 
10 | export path_faster_rcnn=''
11 | 
12 | 
13 | if $use_multi_gpu;then
14 |     # Multi GPU -sgcls Task
15 |     python -m torch.distributed.launch --master_port 10032 --nproc_per_node=$num_gpu tools/relation_train_net.py --config-file "configs/${model_config}.yaml" \
16 |         SOLVER.IMS_PER_BATCH 9 \
17 |         TEST.IMS_PER_BATCH 6 \
18 |         OUTPUT_DIR ${output_dir} \
19 |         MODEL.ROI_RELATION_HEAD.USE_GT_OBJECT_LABEL False \
20 |         MODEL.ROI_RELATION_HEAD.USE_GT_BOX True \
21 |         MODEL.ROI_RELATION_HEAD.REL_OBJ_MULTI_TASK_LOSS ${use_obj_refine} \
22 |         MODEL.ROI_RELATION_HEAD.OBJECT_CLASSIFICATION_REFINE ${use_obj_refine} \
23 |         MODEL.PRETRAINED_DETECTOR_CKPT ${path_faster_rcnn}
24 | else
25 |     # Single GPU
26 |     python  tools/relation_train_net.py --config-file "configs/${model_config}.yaml" \
27 |         SOLVER.IMS_PER_BATCH 9 \
28 |         TEST.IMS_PER_BATCH 6 \
29 |         OUTPUT_DIR ${output_dir} \
30 |         MODEL.ROI_RELATION_HEAD.USE_GT_OBJECT_LABEL False \
31 |         MODEL.ROI_RELATION_HEAD.USE_GT_BOX True \
32 |         MODEL.ROI_RELATION_HEAD.REL_OBJ_MULTI_TASK_LOSS ${use_obj_refine} \
33 |         MODEL.ROI_RELATION_HEAD.OBJECT_CLASSIFICATION_REFINE ${use_obj_refine} \
34 |         MODEL.PRETRAINED_DETECTOR_CKPT ${path_faster_rcnn}
35 | fi


--------------------------------------------------------------------------------
/hetsgg/layers/__init__.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from .batch_norm import FrozenBatchNorm2d
 4 | from .misc import Conv2d
 5 | from .misc import DFConv2d
 6 | from .misc import ConvTranspose2d
 7 | from .misc import BatchNorm2d
 8 | from .misc import interpolate
 9 | from .nms import nms
10 | from .roi_align import ROIAlign
11 | from .roi_align import roi_align
12 | from .roi_pool import ROIPool
13 | from .roi_pool import roi_pool
14 | from .entropy_loss import entropy_loss
15 | from .kl_div_loss import kl_div_loss
16 | from .smooth_l1_loss import smooth_l1_loss
17 | from .sigmoid_focal_loss import SigmoidFocalLoss
18 | from .label_smoothing_loss import Label_Smoothing_Regression
19 | from .dcn.deform_conv_func import deform_conv, modulated_deform_conv
20 | from .dcn.deform_conv_module import DeformConv, ModulatedDeformConv, ModulatedDeformConvPack
21 | from .dcn.deform_pool_func import deform_roi_pooling
22 | from .dcn.deform_pool_module import DeformRoIPooling, DeformRoIPoolingPack, ModulatedDeformRoIPoolingPack
23 | 
24 | 
25 | __all__ = [
26 |     "nms",
27 |     "roi_align",
28 |     "ROIAlign",
29 |     "roi_pool",
30 |     "ROIPool",
31 |     "smooth_l1_loss",
32 |     "entropy_loss",
33 |     "kl_div_loss",
34 |     "Conv2d",
35 |     "DFConv2d",
36 |     "ConvTranspose2d",
37 |     "interpolate",
38 |     "BatchNorm2d",
39 |     "FrozenBatchNorm2d",
40 |     "SigmoidFocalLoss",
41 |     "Label_Smoothing_Regression",
42 |     'deform_conv',
43 |     'modulated_deform_conv',
44 |     'DeformConv',
45 |     'ModulatedDeformConv',
46 |     'ModulatedDeformConvPack',
47 |     'deform_roi_pooling',
48 |     'DeformRoIPooling',
49 |     'DeformRoIPoolingPack',
50 |     'ModulatedDeformRoIPoolingPack',
51 | ]
52 | 
53 | 


--------------------------------------------------------------------------------
/hetsgg/csrc/ROIPool.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "cpu/vision.h"
 4 | 
 5 | #ifdef WITH_CUDA
 6 | #include "cuda/vision.h"
 7 | #endif
 8 | 
 9 | 
10 | std::tuple<at::Tensor, at::Tensor> ROIPool_forward(const at::Tensor& input,
11 |                                 const at::Tensor& rois,
12 |                                 const float spatial_scale,
13 |                                 const int pooled_height,
14 |                                 const int pooled_width) {
15 |   if (input.type().is_cuda()) {
16 | #ifdef WITH_CUDA
17 |     return ROIPool_forward_cuda(input, rois, spatial_scale, pooled_height, pooled_width);
18 | #else
19 |     AT_ERROR("Not compiled with GPU support");
20 | #endif
21 |   }
22 |   AT_ERROR("Not implemented on the CPU");
23 | }
24 | 
25 | at::Tensor ROIPool_backward(const at::Tensor& grad,
26 |                                  const at::Tensor& input,
27 |                                  const at::Tensor& rois,
28 |                                  const at::Tensor& argmax,
29 |                                  const float spatial_scale,
30 |                                  const int pooled_height,
31 |                                  const int pooled_width,
32 |                                  const int batch_size,
33 |                                  const int channels,
34 |                                  const int height,
35 |                                  const int width) {
36 |   if (grad.type().is_cuda()) {
37 | #ifdef WITH_CUDA
38 |     return ROIPool_backward_cuda(grad, input, rois, argmax, spatial_scale, pooled_height, pooled_width, batch_size, channels, height, width);
39 | #else
40 |     AT_ERROR("Not compiled with GPU support");
41 | #endif
42 |   }
43 |   AT_ERROR("Not implemented on the CPU");
44 | }
45 | 
46 | 
47 | 
48 | 


--------------------------------------------------------------------------------
/hetsgg/csrc/ROIAlign.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "cpu/vision.h"
 4 | 
 5 | #ifdef WITH_CUDA
 6 | #include "cuda/vision.h"
 7 | #endif
 8 | 
 9 | // Interface for Python
10 | at::Tensor ROIAlign_forward(const at::Tensor& input,
11 |                             const at::Tensor& rois,
12 |                             const float spatial_scale,
13 |                             const int pooled_height,
14 |                             const int pooled_width,
15 |                             const int sampling_ratio) {
16 |   if (input.type().is_cuda()) {
17 | #ifdef WITH_CUDA
18 |     return ROIAlign_forward_cuda(input, rois, spatial_scale, pooled_height, pooled_width, sampling_ratio);
19 | #else
20 |     AT_ERROR("Not compiled with GPU support");
21 | #endif
22 |   }
23 |   return ROIAlign_forward_cpu(input, rois, spatial_scale, pooled_height, pooled_width, sampling_ratio);
24 | }
25 | 
26 | at::Tensor ROIAlign_backward(const at::Tensor& grad,
27 |                              const at::Tensor& rois,
28 |                              const float spatial_scale,
29 |                              const int pooled_height,
30 |                              const int pooled_width,
31 |                              const int batch_size,
32 |                              const int channels,
33 |                              const int height,
34 |                              const int width,
35 |                              const int sampling_ratio) {
36 |   if (grad.type().is_cuda()) {
37 | #ifdef WITH_CUDA
38 |     return ROIAlign_backward_cuda(grad, rois, spatial_scale, pooled_height, pooled_width, batch_size, channels, height, width, sampling_ratio);
39 | #else
40 |     AT_ERROR("Not compiled with GPU support");
41 | #endif
42 |   }
43 |   AT_ERROR("Not implemented on the CPU");
44 | }
45 | 
46 | 


--------------------------------------------------------------------------------
/hetsgg/utils/global_buffer.py:
--------------------------------------------------------------------------------
 1 | from collections import defaultdict
 2 | import pickle
 3 | import torch
 4 | import os
 5 | 
 6 | from hetsgg.utils.comm import is_main_process, get_world_size, all_gather, synchronize
 7 | from hetsgg.config import cfg
 8 | 
 9 | def singleton(cls):
10 |     _instance = {}
11 | 
12 |     def inner():
13 |         if cls not in _instance:
14 |             _instance[cls] = cls()
15 |         return _instance[cls]
16 |     return inner
17 | 
18 | 
19 | @singleton
20 | class _GlobalBuffer():
21 |     """a singleton buffer for store data in anywhere of program
22 |     """
23 |     def __init__(self ):
24 |         self.multi_proc = (get_world_size() > 1)
25 |         self.data = defaultdict(list)
26 | 
27 |     def add_data(self, key, val):
28 |         if not isinstance(val, torch.Tensor):
29 |             val = torch.Tensor(val)
30 |         else:
31 |             val = val.detach()
32 | 
33 |         val = torch.cat(all_gather(val))
34 | 
35 |         if not is_main_process():
36 |             del val
37 |             return
38 |         self.data[key].append(val.cpu().numpy())
39 | 
40 |     def __str__(self):
41 |         ret_str = f"Buffer contains data: (key, value type)\n"
42 |         for k, v in self.data.items():
43 |             ret_str += f"    {k}, {type(v).__name__}\n"
44 |         ret_str += f"id {id(self)}"
45 |         return ret_str
46 | 
47 | 
48 | def store_data(k, v, ):
49 |     if cfg.GLOBAL_BUFFER_ON:
50 |         buffer = _GlobalBuffer()
51 |         buffer.add_data(k, v)
52 |         synchronize()
53 | 
54 | 
55 | def save_buffer(output_dir):
56 |     if cfg.GLOBAL_BUFFER_ON:
57 |         if is_main_process():
58 |             buffer = _GlobalBuffer()
59 |             with open(os.path.join(output_dir, "inter_data_buffer.pkl"), 'wb') as f:
60 |                 pickle.dump(buffer.data, f)
61 | 
62 |             print("save buffer:", str(buffer))
63 | 


--------------------------------------------------------------------------------
/hetsgg/modeling/roi_heads/attribute_head/roi_attribute_predictors.py:
--------------------------------------------------------------------------------
 1 | from torch import nn
 2 | 
 3 | from hetsgg.modeling import registry
 4 | 
 5 | 
 6 | @registry.ROI_ATTRIBUTE_PREDICTOR.register("FastRCNNPredictor")
 7 | class FastRCNNPredictor(nn.Module):
 8 |     def __init__(self, cfg, in_channels):
 9 |         super(FastRCNNPredictor, self).__init__()
10 |         assert in_channels is not None
11 |         num_inputs = in_channels
12 | 
13 |         num_attributes = cfg.MODEL.ROI_ATTRIBUTE_HEAD.NUM_ATTRIBUTES
14 |         self.avgpool = nn.AdaptiveAvgPool2d(1)
15 |         self.att_score = nn.Linear(num_inputs, num_attributes)
16 | 
17 |         nn.init.normal_(self.att_score.weight, mean=0, std=0.01)
18 |         nn.init.constant_(self.att_score.bias, 0)
19 | 
20 |     def forward(self, x):
21 |         x = self.avgpool(x)
22 |         x = x.view(x.size(0), -1)
23 |         att_logit = self.att_score(x)
24 | 
25 |         return att_logit
26 | 
27 | 
28 | @registry.ROI_ATTRIBUTE_PREDICTOR.register("FPNPredictor")
29 | class FPNPredictor(nn.Module):
30 |     def __init__(self, cfg, in_channels):
31 |         super(FPNPredictor, self).__init__()
32 |         num_attributes = cfg.MODEL.ROI_ATTRIBUTE_HEAD.NUM_ATTRIBUTES
33 |         representation_size = in_channels
34 | 
35 |         self.att_score = nn.Linear(representation_size, num_attributes)
36 | 
37 |         nn.init.normal_(self.att_score.weight, std=0.01)
38 |         nn.init.constant_(self.att_score.bias, 0)
39 | 
40 |     def forward(self, x):
41 |         if x.ndimension() == 4:
42 |             assert list(x.shape[2:]) == [1, 1]
43 |             x = x.view(x.size(0), -1)
44 | 
45 |         att_logit = self.att_score(x)
46 | 
47 |         return att_logit
48 | 
49 | 
50 | def make_roi_attribute_predictor(cfg, in_channels):
51 |     func = registry.ROI_ATTRIBUTE_PREDICTOR[cfg.MODEL.ROI_ATTRIBUTE_HEAD.PREDICTOR]
52 |     return func(cfg, in_channels)
53 | 


--------------------------------------------------------------------------------
/hetsgg/utils/metric_logger.py:
--------------------------------------------------------------------------------
 1 | from collections import defaultdict
 2 | from collections import deque
 3 | 
 4 | import torch
 5 | 
 6 | 
 7 | class SmoothedValue(object):
 8 | 
 9 |     def __init__(self, window_size=20):
10 |         self.deque = deque(maxlen=window_size)
11 |         self.series = []
12 |         self.total = 0.0
13 |         self.count = 0
14 | 
15 |     def update(self, value):
16 |         self.deque.append(value)
17 |         self.series.append(value)
18 |         self.count += 1
19 |         self.total += value
20 | 
21 |     @property
22 |     def median(self):
23 |         d = torch.tensor(list(self.deque))
24 |         return d.median().item()
25 | 
26 |     @property
27 |     def avg(self):
28 |         d = torch.tensor(list(self.deque))
29 |         return d.mean().item()
30 | 
31 |     @property
32 |     def global_avg(self):
33 |         return self.total / self.count
34 | 
35 | 
36 | class MetricLogger(object):
37 |     def __init__(self, delimiter="\t"):
38 |         self.meters = defaultdict(SmoothedValue)
39 |         self.delimiter = delimiter
40 | 
41 |     def update(self, **kwargs):
42 |         for k, v in kwargs.items():
43 |             if isinstance(v, torch.Tensor):
44 |                 v = v.item()
45 |             assert isinstance(v, (float, int))
46 |             self.meters[k].update(v)
47 | 
48 |     def __getattr__(self, attr):
49 |         if attr in self.meters:
50 |             return self.meters[attr]
51 |         if attr in self.__dict__:
52 |             return self.__dict__[attr]
53 |         raise AttributeError("'{}' object has no attribute '{}'".format(
54 |                     type(self).__name__, attr))
55 | 
56 |     def __str__(self):
57 |         loss_str = []
58 |         for name, meter in self.meters.items():
59 |             loss_str.append(
60 |                 "{}: {:.4f} ({:.4f})\n".format(name, meter.median, meter.global_avg)
61 |             )
62 |         return self.delimiter.join(loss_str)
63 | 


--------------------------------------------------------------------------------
/hetsgg/csrc/deform_pool.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include "cpu/vision.h"
 3 | 
 4 | #ifdef WITH_CUDA
 5 | #include "cuda/vision.h"
 6 | #endif
 7 | 
 8 | 
 9 | // Interface for Python
10 | void deform_psroi_pooling_forward(
11 |     at::Tensor input, 
12 |     at::Tensor bbox, 
13 |     at::Tensor trans, 
14 |     at::Tensor out,
15 |     at::Tensor top_count, 
16 |     const int no_trans, 
17 |     const float spatial_scale,
18 |     const int output_dim, 
19 |     const int group_size, 
20 |     const int pooled_size,
21 |     const int part_size, 
22 |     const int sample_per_part, 
23 |     const float trans_std)
24 | {
25 |   if (input.type().is_cuda()) {
26 | #ifdef WITH_CUDA
27 |     return deform_psroi_pooling_cuda_forward(
28 |         input, bbox, trans, out, top_count, 
29 |         no_trans, spatial_scale, output_dim, group_size,
30 |         pooled_size, part_size, sample_per_part, trans_std
31 |     );
32 | #else
33 |     AT_ERROR("Not compiled with GPU support");
34 | #endif
35 |   }
36 |   AT_ERROR("Not implemented on the CPU");
37 | }
38 | 
39 | 
40 | void deform_psroi_pooling_backward(
41 |     at::Tensor out_grad, 
42 |     at::Tensor input, 
43 |     at::Tensor bbox, 
44 |     at::Tensor trans,
45 |     at::Tensor top_count, 
46 |     at::Tensor input_grad, 
47 |     at::Tensor trans_grad,
48 |     const int no_trans, 
49 |     const float spatial_scale, 
50 |     const int output_dim,
51 |     const int group_size, 
52 |     const int pooled_size, 
53 |     const int part_size,
54 |     const int sample_per_part, 
55 |     const float trans_std) 
56 | {
57 |   if (input.type().is_cuda()) {
58 | #ifdef WITH_CUDA
59 |     return deform_psroi_pooling_cuda_backward(
60 |         out_grad, input, bbox, trans, top_count, input_grad, trans_grad,
61 |         no_trans, spatial_scale, output_dim, group_size, pooled_size, 
62 |         part_size, sample_per_part, trans_std
63 |     );
64 | #else
65 |     AT_ERROR("Not compiled with GPU support");
66 | #endif
67 |   }
68 |   AT_ERROR("Not implemented on the CPU");
69 | }
70 | 


--------------------------------------------------------------------------------
/shell/hetsgg_train_predcls_vg.sh:
--------------------------------------------------------------------------------
 1 | export CUDA_VISIBLE_DEVICES="6"
 2 | export num_gpu=1
 3 | export use_multi_gpu=false
 4 | export use_obj_refine=False
 5 | export task='predcls'
 6 | 
 7 | export REPEAT_FACTOR=0.13
 8 | export INSTANCE_DROP_RATE=1.6
 9 | 
10 | export model_config="relHetSGGp_vg" # relHetSGG_vg, relHetSGGp_vg
11 | export output_dir="checkpoints/${task}-HetSGGPredictor-vg"
12 | 
13 | # export path_faster_rcnn='' # Put faster r-cnn path
14 | 
15 | if $use_multi_gpu;then
16 |     python -m torch.distributed.launch --master_port 10029 --nproc_per_node=${num_gpu} tools/relation_train_net.py \
17 |         --config-file "configs/${model_config}.yaml" \
18 |         SOLVER.IMS_PER_BATCH 18 \
19 |         TEST.IMS_PER_BATCH 12 \
20 |         OUTPUT_DIR ${output_dir} \
21 |         MODEL.ROI_RELATION_HEAD.USE_GT_OBJECT_LABEL True \
22 |         MODEL.ROI_RELATION_HEAD.USE_GT_BOX True \
23 |         MODEL.ROI_RELATION_HEAD.REL_OBJ_MULTI_TASK_LOSS ${use_obj_refine} \
24 |         MODEL.ROI_RELATION_HEAD.OBJECT_CLASSIFICATION_REFINE ${use_obj_refine} \
25 |         MODEL.ROI_RELATION_HEAD.DATA_RESAMPLING_PARAM.REPEAT_FACTOR ${REPEAT_FACTOR} \
26 |         MODEL.ROI_RELATION_HEAD.DATA_RESAMPLING_PARAM.INSTANCE_DROP_RATE ${INSTANCE_DROP_RATE} \
27 |         MODEL.PRETRAINED_DETECTOR_CKPT ${path_faster_rcnn}
28 | else
29 |     python tools/relation_train_net.py --config-file "configs/${model_config}.yaml" \
30 |         SOLVER.IMS_PER_BATCH 9 \
31 |         TEST.IMS_PER_BATCH 6 \
32 |         OUTPUT_DIR ${output_dir} \
33 |         MODEL.ROI_RELATION_HEAD.USE_GT_OBJECT_LABEL True \
34 |         MODEL.ROI_RELATION_HEAD.USE_GT_BOX True \
35 |         MODEL.ROI_RELATION_HEAD.REL_OBJ_MULTI_TASK_LOSS ${use_obj_refine} \
36 |         MODEL.ROI_RELATION_HEAD.OBJECT_CLASSIFICATION_REFINE ${use_obj_refine} \
37 |         MODEL.ROI_RELATION_HEAD.DATA_RESAMPLING_PARAM.REPEAT_FACTOR ${REPEAT_FACTOR} \
38 |         MODEL.ROI_RELATION_HEAD.DATA_RESAMPLING_PARAM.INSTANCE_DROP_RATE ${INSTANCE_DROP_RATE}
39 |         # MODEL.PRETRAINED_DETECTOR_CKPT ${path_faster_rcnn}
40 | fi
41 | 


--------------------------------------------------------------------------------
/hetsgg/layers/roi_pool.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn
 3 | from torch.autograd import Function
 4 | from torch.autograd.function import once_differentiable
 5 | from torch.nn.modules.utils import _pair
 6 | 
 7 | from hetsgg import _C
 8 | 
 9 | from apex import amp
10 | 
11 | class _ROIPool(Function):
12 |     @staticmethod
13 |     def forward(ctx, input, roi, output_size, spatial_scale):
14 |         ctx.output_size = _pair(output_size)
15 |         ctx.spatial_scale = spatial_scale
16 |         ctx.input_shape = input.size()
17 |         output, argmax = _C.roi_pool_forward(
18 |             input, roi, spatial_scale, output_size[0], output_size[1]
19 |         )
20 |         ctx.save_for_backward(input, roi, argmax)
21 |         return output
22 | 
23 |     @staticmethod
24 |     @once_differentiable
25 |     def backward(ctx, grad_output):
26 |         input, rois, argmax = ctx.saved_tensors
27 |         output_size = ctx.output_size
28 |         spatial_scale = ctx.spatial_scale
29 |         bs, ch, h, w = ctx.input_shape
30 |         grad_input = _C.roi_pool_backward(
31 |             grad_output,
32 |             input,
33 |             rois,
34 |             argmax,
35 |             spatial_scale,
36 |             output_size[0],
37 |             output_size[1],
38 |             bs,
39 |             ch,
40 |             h,
41 |             w,
42 |         )
43 |         return grad_input, None, None, None
44 | 
45 | 
46 | roi_pool = _ROIPool.apply
47 | 
48 | 
49 | class ROIPool(nn.Module):
50 |     def __init__(self, output_size, spatial_scale):
51 |         super(ROIPool, self).__init__()
52 |         self.output_size = output_size
53 |         self.spatial_scale = spatial_scale
54 | 
55 |     @amp.float_function
56 |     def forward(self, input, rois):
57 |         return roi_pool(input, rois, self.output_size, self.spatial_scale)
58 | 
59 |     def __repr__(self):
60 |         tmpstr = self.__class__.__name__ + "("
61 |         tmpstr += "output_size=" + str(self.output_size)
62 |         tmpstr += ", spatial_scale=" + str(self.spatial_scale)
63 |         tmpstr += ")"
64 |         return tmpstr
65 | 


--------------------------------------------------------------------------------
/hetsgg/modeling/roi_heads/keypoint_head/roi_keypoint_feature_extractors.py:
--------------------------------------------------------------------------------
 1 | from torch import nn
 2 | from torch.nn import functional as F
 3 | 
 4 | from hetsgg.modeling import registry
 5 | from hetsgg.modeling.poolers import Pooler
 6 | 
 7 | from hetsgg.layers import Conv2d
 8 | 
 9 | 
10 | @registry.ROI_KEYPOINT_FEATURE_EXTRACTORS.register("KeypointRCNNFeatureExtractor")
11 | class KeypointRCNNFeatureExtractor(nn.Module):
12 |     def __init__(self, cfg, in_channels):
13 |         super(KeypointRCNNFeatureExtractor, self).__init__()
14 | 
15 |         resolution = cfg.MODEL.ROI_KEYPOINT_HEAD.POOLER_RESOLUTION
16 |         scales = cfg.MODEL.ROI_KEYPOINT_HEAD.POOLER_SCALES
17 |         sampling_ratio = cfg.MODEL.ROI_KEYPOINT_HEAD.POOLER_SAMPLING_RATIO
18 |         pooler = Pooler(
19 |             output_size=(resolution, resolution),
20 |             scales=scales,
21 |             sampling_ratio=sampling_ratio,
22 |         )
23 |         self.pooler = pooler
24 | 
25 |         input_features = in_channels
26 |         layers = cfg.MODEL.ROI_KEYPOINT_HEAD.CONV_LAYERS
27 |         next_feature = input_features
28 |         self.blocks = []
29 |         for layer_idx, layer_features in enumerate(layers, 1):
30 |             layer_name = "conv_fcn{}".format(layer_idx)
31 |             module = Conv2d(next_feature, layer_features, 3, stride=1, padding=1)
32 |             nn.init.kaiming_normal_(module.weight, mode="fan_out", nonlinearity="relu")
33 |             nn.init.constant_(module.bias, 0)
34 |             self.add_module(layer_name, module)
35 |             next_feature = layer_features
36 |             self.blocks.append(layer_name)
37 |         self.out_channels = layer_features
38 | 
39 |     def forward(self, x, proposals):
40 |         x = self.pooler(x, proposals)
41 |         for layer_name in self.blocks:
42 |             x = F.relu(getattr(self, layer_name)(x))
43 |         return x
44 | 
45 | 
46 | def make_roi_keypoint_feature_extractor(cfg, in_channels):
47 |     func = registry.ROI_KEYPOINT_FEATURE_EXTRACTORS[
48 |         cfg.MODEL.ROI_KEYPOINT_HEAD.FEATURE_EXTRACTOR
49 |     ]
50 |     return func(cfg, in_channels)
51 | 


--------------------------------------------------------------------------------
/hetsgg/modeling/roi_heads/mask_head/roi_mask_predictors.py:
--------------------------------------------------------------------------------
 1 | from torch import nn
 2 | from torch.nn import functional as F
 3 | 
 4 | from hetsgg.layers import Conv2d
 5 | from hetsgg.layers import ConvTranspose2d
 6 | from hetsgg.modeling import registry
 7 | 
 8 | 
 9 | @registry.ROI_MASK_PREDICTOR.register("MaskRCNNC4Predictor")
10 | class MaskRCNNC4Predictor(nn.Module):
11 |     def __init__(self, cfg, in_channels):
12 |         super(MaskRCNNC4Predictor, self).__init__()
13 |         num_classes = cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES
14 |         dim_reduced = cfg.MODEL.ROI_MASK_HEAD.CONV_LAYERS[-1]
15 |         num_inputs = in_channels
16 | 
17 |         self.conv5_mask = ConvTranspose2d(num_inputs, dim_reduced, 2, 2, 0)
18 |         self.mask_fcn_logits = Conv2d(dim_reduced, num_classes, 1, 1, 0)
19 | 
20 |         for name, param in self.named_parameters():
21 |             if "bias" in name:
22 |                 nn.init.constant_(param, 0)
23 |             elif "weight" in name:
24 |         
25 |                 nn.init.kaiming_normal_(param, mode="fan_out", nonlinearity="relu")
26 | 
27 |     def forward(self, x):
28 |         x = F.relu(self.conv5_mask(x))
29 |         return self.mask_fcn_logits(x)
30 | 
31 | 
32 | @registry.ROI_MASK_PREDICTOR.register("MaskRCNNConv1x1Predictor")
33 | class MaskRCNNConv1x1Predictor(nn.Module):
34 |     def __init__(self, cfg, in_channels):
35 |         super(MaskRCNNConv1x1Predictor, self).__init__()
36 |         num_classes = cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES
37 |         num_inputs = in_channels
38 | 
39 |         self.mask_fcn_logits = Conv2d(num_inputs, num_classes, 1, 1, 0)
40 | 
41 |         for name, param in self.named_parameters():
42 |             if "bias" in name:
43 |                 nn.init.constant_(param, 0)
44 |             elif "weight" in name:
45 |          
46 |                 nn.init.kaiming_normal_(param, mode="fan_out", nonlinearity="relu")
47 | 
48 |     def forward(self, x):
49 |         return self.mask_fcn_logits(x)
50 | 
51 | 
52 | def make_roi_mask_predictor(cfg, in_channels):
53 |     func = registry.ROI_MASK_PREDICTOR[cfg.MODEL.ROI_MASK_HEAD.PREDICTOR]
54 |     return func(cfg, in_channels)
55 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import glob
 3 | import os
 4 | 
 5 | import torch
 6 | from setuptools import find_packages
 7 | from setuptools import setup
 8 | from torch.utils.cpp_extension import CUDA_HOME
 9 | from torch.utils.cpp_extension import CppExtension
10 | from torch.utils.cpp_extension import CUDAExtension
11 | 
12 | requirements = ["torch", "torchvision"]
13 | 
14 | 
15 | def get_extensions():
16 |     this_dir = os.path.dirname(os.path.abspath(__file__))
17 |     extensions_dir = os.path.join(this_dir, "hetsgg", "csrc")
18 | 
19 |     main_file = glob.glob(os.path.join(extensions_dir, "*.cpp"))
20 |     source_cpu = glob.glob(os.path.join(extensions_dir, "cpu", "*.cpp"))
21 |     source_cuda = glob.glob(os.path.join(extensions_dir, "cuda", "*.cu"))
22 | 
23 |     sources = main_file + source_cpu
24 |     extension = CppExtension
25 | 
26 |     extra_compile_args = {"cxx": []}
27 |     define_macros = []
28 | 
29 |     if (torch.cuda.is_available() and CUDA_HOME is not None) or os.getenv("FORCE_CUDA", "0") == "1":
30 |         extension = CUDAExtension
31 |         sources += source_cuda
32 |         define_macros += [("WITH_CUDA", None)]
33 |         extra_compile_args["nvcc"] = [
34 |             "-DCUDA_HAS_FP16=1",
35 |             "-D__CUDA_NO_HALF_OPERATORS__",
36 |             "-D__CUDA_NO_HALF_CONVERSIONS__",
37 |             "-D__CUDA_NO_HALF2_OPERATORS__",
38 |         ]
39 | 
40 |     sources = [os.path.join(extensions_dir, s) for s in sources]
41 | 
42 |     include_dirs = [extensions_dir]
43 | 
44 |     ext_modules = [
45 |         extension(
46 |             "hetsgg._C",
47 |             sources,
48 |             include_dirs=include_dirs,
49 |             define_macros=define_macros,
50 |             extra_compile_args=extra_compile_args,
51 |         )
52 |     ]
53 | 
54 |     return ext_modules
55 | 
56 | 
57 | setup(
58 |     name="hetsgg",
59 |     version="0.1",
60 |     author="Anonymous",
61 |     url="",
62 |     description="A Toolkit for Scene Graph Generation",
63 |     packages=find_packages(exclude=("configs", "tests",)),
64 |     # install_requires=requirements,
65 |     ext_modules=get_extensions(),
66 |     cmdclass={"build_ext": torch.utils.cpp_extension.BuildExtension},
67 | )
68 | 


--------------------------------------------------------------------------------
/hetsgg/modeling/roi_heads/keypoint_head/keypoint_head.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from .roi_keypoint_feature_extractors import make_roi_keypoint_feature_extractor
 4 | from .roi_keypoint_predictors import make_roi_keypoint_predictor
 5 | from .inference import make_roi_keypoint_post_processor
 6 | from .loss import make_roi_keypoint_loss_evaluator
 7 | 
 8 | 
 9 | class ROIKeypointHead(torch.nn.Module):
10 |     def __init__(self, cfg, in_channels):
11 |         super(ROIKeypointHead, self).__init__()
12 |         self.cfg = cfg.clone()
13 |         self.feature_extractor = make_roi_keypoint_feature_extractor(cfg, in_channels)
14 |         self.predictor = make_roi_keypoint_predictor(
15 |             cfg, self.feature_extractor.out_channels)
16 |         self.post_processor = make_roi_keypoint_post_processor(cfg)
17 |         self.loss_evaluator = make_roi_keypoint_loss_evaluator(cfg)
18 | 
19 |     def forward(self, features, proposals, targets=None):
20 |         """
21 |         Arguments:
22 |             features (list[Tensor]): feature-maps from possibly several levels
23 |             proposals (list[BoxList]): proposal boxes
24 |             targets (list[BoxList], optional): the ground-truth targets.
25 | 
26 |         Returns:
27 |             x (Tensor): the result of the feature extractor
28 |             proposals (list[BoxList]): during training, the original proposals
29 |                 are returned. During testing, the predicted boxlists are returned
30 |                 with the `mask` field set
31 |             losses (dict[Tensor]): During training, returns the losses for the
32 |                 head. During testing, returns an empty dict.
33 |         """
34 |         if self.training:
35 |             with torch.no_grad():
36 |                 proposals = self.loss_evaluator.subsample(proposals, targets)
37 | 
38 |         x = self.feature_extractor(features, proposals)
39 |         kp_logits = self.predictor(x)
40 | 
41 |         if not self.training:
42 |             result = self.post_processor(kp_logits, proposals)
43 |             return x, result, {}
44 | 
45 |         loss_kp = self.loss_evaluator(proposals, kp_logits)
46 | 
47 |         return x, proposals, dict(loss_kp=loss_kp)
48 | 
49 | 
50 | def build_roi_keypoint_head(cfg, in_channels):
51 |     return ROIKeypointHead(cfg, in_channels)
52 | 


--------------------------------------------------------------------------------
/hetsgg/modeling/detector/generalized_rcnn.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import torch
 3 | from torch import nn
 4 | 
 5 | from hetsgg.structures.image_list import to_image_list
 6 | 
 7 | from ..backbone import build_backbone
 8 | from ..rpn.rpn import build_rpn
 9 | from ..roi_heads.roi_heads import build_roi_heads
10 | 
11 | 
12 | class GeneralizedRCNN(nn.Module):
13 | 
14 | 
15 |     def __init__(self, cfg):
16 |         super(GeneralizedRCNN, self).__init__()
17 |         self.cfg = cfg.clone()
18 |         self.backbone = build_backbone(cfg) # ResNet
19 |         self.rpn = build_rpn(cfg, self.backbone.out_channels) # 256
20 |         self.roi_heads = build_roi_heads(cfg, self.backbone.out_channels)
21 | 
22 |     def forward(self, images, targets=None, logger=None):
23 |         """
24 |         Arguments:
25 |             images (list[Tensor] or ImageList): images to be processed
26 |             targets (list[BoxList]): ground-truth boxes present in the image (optional)
27 | 
28 |         Returns:
29 |             result (list[BoxList] or dict[Tensor]): the output from the model.
30 |                 During training, it returns a dict[Tensor] which contains the losses.
31 |                 During testing, it returns list[BoxList] contains additional fields
32 |                 like `scores`, `labels` and `mask` (for Mask R-CNN models).
33 | 
34 |         """
35 |         if self.training and targets is None:
36 |             raise ValueError("In training mode, targets should be passed")
37 |         images = to_image_list(images)
38 |         features = self.backbone(images.tensors)
39 |         proposals, proposal_losses = self.rpn(images, features, targets)
40 |         if self.roi_heads:
41 |             x, result, detector_losses = self.roi_heads(features, proposals, targets, logger)
42 |         else:
43 |             # RPN-only models don't have roi_heads
44 |             x = features
45 |             result = proposals
46 |             detector_losses = {}
47 | 
48 |         if self.training: # True
49 |             losses = {}
50 |             losses.update(detector_losses)
51 |             if not self.cfg.MODEL.RELATION_ON: # True
52 |                 # During the relationship training stage, the rpn_head should be fixed, and no loss. 
53 |                 losses.update(proposal_losses)
54 |             return losses
55 | 
56 |         return result
57 | 


--------------------------------------------------------------------------------
/hetsgg/layers/roi_align.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn
 3 | from torch.autograd import Function
 4 | from torch.autograd.function import once_differentiable
 5 | from torch.nn.modules.utils import _pair
 6 | 
 7 | from hetsgg import _C
 8 | 
 9 | from apex import amp
10 | 
11 | class _ROIAlign(Function):
12 |     @staticmethod
13 |     def forward(ctx, input, roi, output_size, spatial_scale, sampling_ratio):
14 |         ctx.save_for_backward(roi)
15 |         ctx.output_size = _pair(output_size)
16 |         ctx.spatial_scale = spatial_scale
17 |         ctx.sampling_ratio = sampling_ratio
18 |         ctx.input_shape = input.size()
19 |         output = _C.roi_align_forward(
20 |             input, roi, spatial_scale, output_size[0], output_size[1], sampling_ratio
21 |         )
22 |         return output
23 | 
24 |     @staticmethod
25 |     @once_differentiable
26 |     def backward(ctx, grad_output):
27 |         rois, = ctx.saved_tensors
28 |         output_size = ctx.output_size
29 |         spatial_scale = ctx.spatial_scale
30 |         sampling_ratio = ctx.sampling_ratio
31 |         bs, ch, h, w = ctx.input_shape
32 |         grad_input = _C.roi_align_backward(
33 |             grad_output,
34 |             rois,
35 |             spatial_scale,
36 |             output_size[0],
37 |             output_size[1],
38 |             bs,
39 |             ch,
40 |             h,
41 |             w,
42 |             sampling_ratio,
43 |         )
44 |         return grad_input, None, None, None, None
45 | 
46 | 
47 | roi_align = _ROIAlign.apply
48 | 
49 | class ROIAlign(nn.Module):
50 |     def __init__(self, output_size, spatial_scale, sampling_ratio):
51 |         super(ROIAlign, self).__init__()
52 |         self.output_size = output_size
53 |         self.spatial_scale = spatial_scale
54 |         self.sampling_ratio = sampling_ratio
55 | 
56 |     @amp.float_function
57 |     def forward(self, input, rois):
58 |         return roi_align(
59 |             input, rois, self.output_size, self.spatial_scale, self.sampling_ratio
60 |         )
61 | 
62 |     def __repr__(self):
63 |         tmpstr = self.__class__.__name__ + "("
64 |         tmpstr += "output_size=" + str(self.output_size)
65 |         tmpstr += ", spatial_scale=" + str(self.spatial_scale)
66 |         tmpstr += ", sampling_ratio=" + str(self.sampling_ratio)
67 |         tmpstr += ")"
68 |         return tmpstr
69 | 


--------------------------------------------------------------------------------
/tools/cityscapes/instances2dict_with_polygons.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from __future__ import print_function, absolute_import, division
 3 | import os, sys
 4 | 
 5 | sys.path.append( os.path.normpath( os.path.join( os.path.dirname( __file__ ) , '..' , 'helpers' ) ) )
 6 | from csHelpers import *
 7 | 
 8 | from cityscapesscripts.evaluation.instance import *
 9 | from cityscapesscripts.helpers.csHelpers import *
10 | import cv2
11 | from hetsgg.utils import cv2_util
12 | 
13 | 
14 | def instances2dict_with_polygons(imageFileList, verbose=False):
15 |     imgCount     = 0
16 |     instanceDict = {}
17 | 
18 |     if not isinstance(imageFileList, list):
19 |         imageFileList = [imageFileList]
20 | 
21 |     if verbose:
22 |         print("Processing {} images...".format(len(imageFileList)))
23 | 
24 |     for imageFileName in imageFileList:
25 |         img = Image.open(imageFileName)
26 | 
27 |         imgNp = np.array(img)
28 | 
29 |         instances = {}
30 |         for label in labels:
31 |             instances[label.name] = []
32 | 
33 |         for instanceId in np.unique(imgNp):
34 |             if instanceId < 1000:
35 |                 continue
36 |             instanceObj = Instance(imgNp, instanceId)
37 |             instanceObj_dict = instanceObj.toDict()
38 | 
39 |             if id2label[instanceObj.labelID].hasInstances:
40 |                 mask = (imgNp == instanceId).astype(np.uint8)
41 |                 contour, hier = cv2_util.findContours(
42 |                     mask.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
43 | 
44 |                 polygons = [c.reshape(-1).tolist() for c in contour]
45 |                 instanceObj_dict['contours'] = polygons
46 | 
47 |             instances[id2label[instanceObj.labelID].name].append(instanceObj_dict)
48 | 
49 |         imgKey = os.path.abspath(imageFileName)
50 |         instanceDict[imgKey] = instances
51 |         imgCount += 1
52 | 
53 |         if verbose:
54 |             print("\rImages Processed: {}".format(imgCount), end=' ')
55 |             sys.stdout.flush()
56 | 
57 |     if verbose:
58 |         print("")
59 | 
60 |     return instanceDict
61 | 
62 | def main(argv):
63 |     fileList = []
64 |     if (len(argv) > 2):
65 |         for arg in argv:
66 |             if ("png" in arg):
67 |                 fileList.append(arg)
68 |     instances2dict_with_polygons(fileList, True)
69 | 
70 | if __name__ == "__main__":
71 |     main(sys.argv[1:])
72 | 


--------------------------------------------------------------------------------
/hetsgg/modeling/roi_heads/mask_head/roi_mask_feature_extractors.py:
--------------------------------------------------------------------------------
 1 | from torch import nn
 2 | from torch.nn import functional as F
 3 | 
 4 | from ..box_head.roi_box_feature_extractors import ResNet50Conv5ROIFeatureExtractor
 5 | from hetsgg.modeling import registry
 6 | from hetsgg.modeling.poolers import Pooler
 7 | from hetsgg.modeling.make_layers import make_conv3x3
 8 | 
 9 | 
10 | registry.ROI_MASK_FEATURE_EXTRACTORS.register(
11 |     "ResNet50Conv5ROIFeatureExtractor", ResNet50Conv5ROIFeatureExtractor
12 | )
13 | 
14 | 
15 | @registry.ROI_MASK_FEATURE_EXTRACTORS.register("MaskRCNNFPNFeatureExtractor")
16 | class MaskRCNNFPNFeatureExtractor(nn.Module):
17 |     """
18 |     Heads for FPN for classification
19 |     """
20 | 
21 |     def __init__(self, cfg, in_channels):
22 | 
23 |         super(MaskRCNNFPNFeatureExtractor, self).__init__()
24 | 
25 |         resolution = cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION
26 |         scales = cfg.MODEL.ROI_MASK_HEAD.POOLER_SCALES
27 |         sampling_ratio = cfg.MODEL.ROI_MASK_HEAD.POOLER_SAMPLING_RATIO
28 |         pooler = Pooler(
29 |             output_size=(resolution, resolution),
30 |             scales=scales,
31 |             sampling_ratio=sampling_ratio,
32 |         )
33 |         input_size = in_channels
34 |         self.pooler = pooler
35 | 
36 |         use_gn = cfg.MODEL.ROI_MASK_HEAD.USE_GN
37 |         layers = cfg.MODEL.ROI_MASK_HEAD.CONV_LAYERS
38 |         dilation = cfg.MODEL.ROI_MASK_HEAD.DILATION
39 | 
40 |         next_feature = input_size
41 |         self.blocks = []
42 |         for layer_idx, layer_features in enumerate(layers, 1):
43 |             layer_name = "mask_fcn{}".format(layer_idx)
44 |             module = make_conv3x3(
45 |                 next_feature, layer_features,
46 |                 dilation=dilation, stride=1, use_gn=use_gn
47 |             )
48 |             self.add_module(layer_name, module)
49 |             next_feature = layer_features
50 |             self.blocks.append(layer_name)
51 |         self.out_channels = layer_features
52 | 
53 |     def forward(self, x, proposals):
54 |         x = self.pooler(x, proposals)
55 | 
56 |         for layer_name in self.blocks:
57 |             x = F.relu(getattr(self, layer_name)(x))
58 | 
59 |         return x
60 | 
61 | 
62 | def make_roi_mask_feature_extractor(cfg, in_channels):
63 |     func = registry.ROI_MASK_FEATURE_EXTRACTORS[
64 |         cfg.MODEL.ROI_MASK_HEAD.FEATURE_EXTRACTOR
65 |     ]
66 |     return func(cfg, in_channels)
67 | 


--------------------------------------------------------------------------------
/hetsgg/solver/build.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from .lr_scheduler import WarmupMultiStepLR, WarmupReduceLROnPlateau
 4 | 
 5 | 
 6 | def make_optimizer(cfg, model, logger, slow_heads=None, except_weight_decay=None, slow_ratio=5.0, rl_factor=1.0):
 7 |     params = []
 8 |     for key, value in model.named_parameters():
 9 |         if not value.requires_grad:
10 |             continue
11 |         lr = cfg.SOLVER.BASE_LR
12 |         weight_decay = cfg.SOLVER.WEIGHT_DECAY
13 |         if "bias" in key:
14 |             lr = cfg.SOLVER.BASE_LR * cfg.SOLVER.BIAS_LR_FACTOR
15 |             weight_decay = cfg.SOLVER.WEIGHT_DECAY_BIAS
16 | 
17 |         if except_weight_decay is not None:
18 |             for item in except_weight_decay:
19 |                 if item in key:
20 |                     weight_decay = 0.0
21 |                     logger.info("NO WEIGHT DECAY: {}.".format(key))
22 | 
23 |         if slow_heads is not None:
24 |             for item in slow_heads:
25 |                 if item in key:
26 |                     logger.info("SLOW HEADS: {} is slow down by ratio of {}.".format(key, str(slow_ratio)))
27 |                     lr = lr / slow_ratio
28 |                     break
29 |         params += [{"params": [value], "lr": lr * rl_factor, "weight_decay": weight_decay}]
30 | 
31 |     optimizer = torch.optim.SGD(params, lr=cfg.SOLVER.BASE_LR, momentum=cfg.SOLVER.MOMENTUM)
32 |     return optimizer
33 | 
34 | 
35 | def make_lr_scheduler(cfg, optimizer, logger=None):
36 |     if cfg.SOLVER.SCHEDULE.TYPE == "WarmupMultiStepLR":
37 |         return WarmupMultiStepLR(
38 |             optimizer,
39 |             cfg.SOLVER.STEPS,
40 |             cfg.SOLVER.GAMMA,
41 |             warmup_factor=cfg.SOLVER.WARMUP_FACTOR,
42 |             warmup_iters=cfg.SOLVER.WARMUP_ITERS,
43 |             warmup_method=cfg.SOLVER.WARMUP_METHOD,
44 |         )
45 |     
46 |     elif cfg.SOLVER.SCHEDULE.TYPE == "WarmupReduceLROnPlateau":
47 |         return WarmupReduceLROnPlateau(
48 |             optimizer,
49 |             cfg.SOLVER.SCHEDULE.FACTOR,
50 |             warmup_factor=cfg.SOLVER.WARMUP_FACTOR,
51 |             warmup_iters=cfg.SOLVER.WARMUP_ITERS,
52 |             warmup_method=cfg.SOLVER.WARMUP_METHOD,
53 |             patience=cfg.SOLVER.SCHEDULE.PATIENCE,
54 |             threshold=cfg.SOLVER.SCHEDULE.THRESHOLD,
55 |             cooldown=cfg.SOLVER.SCHEDULE.COOLDOWN,
56 |             logger=logger,
57 |         )
58 |     
59 |     else:
60 |         raise ValueError("Invalid Schedule Type")
61 | 


--------------------------------------------------------------------------------
/hetsgg/modeling/roi_heads/box_head/roi_box_predictors.py:
--------------------------------------------------------------------------------
 1 | from torch import nn
 2 | 
 3 | from hetsgg.modeling import registry
 4 | 
 5 | 
 6 | @registry.ROI_BOX_PREDICTOR.register("FastRCNNPredictor")
 7 | class FastRCNNPredictor(nn.Module):
 8 |     def __init__(self, config, in_channels):
 9 |         super(FastRCNNPredictor, self).__init__()
10 |         assert in_channels is not None
11 |         num_inputs = in_channels
12 | 
13 |         num_classes = config.MODEL.ROI_BOX_HEAD.NUM_CLASSES
14 |         self.avgpool = nn.AdaptiveAvgPool2d(1)
15 |         self.cls_score = nn.Linear(num_inputs, num_classes)
16 |         num_bbox_reg_classes = 2 if config.MODEL.CLS_AGNOSTIC_BBOX_REG else num_classes
17 |         self.bbox_pred = nn.Linear(num_inputs, num_bbox_reg_classes * 4)
18 | 
19 |         nn.init.normal_(self.cls_score.weight, mean=0, std=0.01)
20 |         nn.init.constant_(self.cls_score.bias, 0)
21 | 
22 |         nn.init.normal_(self.bbox_pred.weight, mean=0, std=0.001)
23 |         nn.init.constant_(self.bbox_pred.bias, 0)
24 | 
25 |     def forward(self, x):
26 |         x = self.avgpool(x)
27 |         x = x.view(x.size(0), -1)
28 |         cls_logit = self.cls_score(x)
29 |         bbox_pred = self.bbox_pred(x)
30 |         return cls_logit, bbox_pred
31 | 
32 | 
33 | @registry.ROI_BOX_PREDICTOR.register("FPNPredictor")
34 | class FPNPredictor(nn.Module):
35 |     def __init__(self, cfg, in_channels):
36 |         super(FPNPredictor, self).__init__()
37 |         num_classes = cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES
38 |         representation_size = in_channels
39 | 
40 |         self.cls_score = nn.Linear(representation_size, num_classes) # Pretrained
41 |         num_bbox_reg_classes = 2 if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG else num_classes # False
42 |         self.bbox_pred = nn.Linear(representation_size, num_bbox_reg_classes * 4) # Pretrained
43 | 
44 |         nn.init.normal_(self.cls_score.weight, std=0.01)
45 |         nn.init.normal_(self.bbox_pred.weight, std=0.001)
46 |         for l in [self.cls_score, self.bbox_pred]:
47 |             nn.init.constant_(l.bias, 0)
48 | 
49 |     def forward(self, x):
50 |         if x.ndimension() == 4:
51 |             assert list(x.shape[2:]) == [1, 1]
52 |             x = x.view(x.size(0), -1)
53 |         cls_logit = self.cls_score(x)
54 |         bbox_pred = self.bbox_pred(x)
55 | 
56 |         return cls_logit, bbox_pred
57 | 
58 | 
59 | def make_roi_box_predictor(cfg, in_channels):
60 |     func = registry.ROI_BOX_PREDICTOR[cfg.MODEL.ROI_BOX_HEAD.PREDICTOR] # FPN
61 |     return func(cfg, in_channels)
62 | 


--------------------------------------------------------------------------------
/hetsgg/layers/sigmoid_focal_loss.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn
 3 | from torch.autograd import Function
 4 | from torch.autograd.function import once_differentiable
 5 | 
 6 | from hetsgg import _C
 7 | 
 8 | class _SigmoidFocalLoss(Function):
 9 |     @staticmethod
10 |     def forward(ctx, logits, targets, gamma, alpha):
11 |         ctx.save_for_backward(logits, targets)
12 |         num_classes = logits.shape[1]
13 |         ctx.num_classes = num_classes
14 |         ctx.gamma = gamma
15 |         ctx.alpha = alpha
16 | 
17 |         losses = _C.sigmoid_focalloss_forward(
18 |             logits, targets, num_classes, gamma, alpha
19 |         )
20 |         return losses
21 | 
22 |     @staticmethod
23 |     @once_differentiable
24 |     def backward(ctx, d_loss):
25 |         logits, targets = ctx.saved_tensors
26 |         num_classes = ctx.num_classes
27 |         gamma = ctx.gamma
28 |         alpha = ctx.alpha
29 |         d_loss = d_loss.contiguous()
30 |         d_logits = _C.sigmoid_focalloss_backward(
31 |             logits, targets, d_loss, num_classes, gamma, alpha
32 |         )
33 |         return d_logits, None, None, None, None
34 | 
35 | 
36 | sigmoid_focal_loss_cuda = _SigmoidFocalLoss.apply
37 | 
38 | 
39 | def sigmoid_focal_loss_cpu(logits, targets, gamma, alpha):
40 |     num_classes = logits.shape[1]
41 |     gamma = gamma[0]
42 |     alpha = alpha[0]
43 |     dtype = targets.dtype
44 |     device = targets.device
45 |     class_range = torch.arange(1, num_classes+1, dtype=dtype, device=device).unsqueeze(0)
46 | 
47 |     t = targets.unsqueeze(1)
48 |     p = torch.sigmoid(logits)
49 |     term1 = (1 - p) ** gamma * torch.log(p)
50 |     term2 = p ** gamma * torch.log(1 - p)
51 |     return -(t == class_range).float() * term1 * alpha - ((t != class_range) * (t >= 0)).float() * term2 * (1 - alpha)
52 | 
53 | 
54 | class SigmoidFocalLoss(nn.Module):
55 |     def __init__(self, gamma, alpha):
56 |         super(SigmoidFocalLoss, self).__init__()
57 |         self.gamma = gamma
58 |         self.alpha = alpha
59 | 
60 |     def forward(self, logits, targets):
61 |         device = logits.device
62 |         if logits.is_cuda:
63 |             loss_func = sigmoid_focal_loss_cuda
64 |         else:
65 |             loss_func = sigmoid_focal_loss_cpu
66 | 
67 |         loss = loss_func(logits, targets, self.gamma, self.alpha)
68 |         return loss.sum()
69 | 
70 |     def __repr__(self):
71 |         tmpstr = self.__class__.__name__ + "("
72 |         tmpstr += "gamma=" + str(self.gamma)
73 |         tmpstr += ", alpha=" + str(self.alpha)
74 |         tmpstr += ")"
75 |         return tmpstr
76 | 


--------------------------------------------------------------------------------
/hetsgg/structures/image_list.py:
--------------------------------------------------------------------------------
 1 | from __future__ import division
 2 | 
 3 | import torch
 4 | 
 5 | 
 6 | class ImageList(object):
 7 |     """
 8 |     Structure that holds a list of images (of possibly
 9 |     varying sizes) as a single tensor.
10 |     This works by padding the images to the same size,
11 |     and storing in a field the original sizes of each image
12 |     """
13 | 
14 |     def __init__(self, tensors, image_sizes):
15 |         """
16 |         Arguments:
17 |             tensors (tensor)
18 |             image_sizes (list[tuple[int, int]])
19 |         """
20 |         self.tensors = tensors
21 |         self.image_sizes = image_sizes
22 | 
23 |     def to(self, *args, **kwargs):
24 |         cast_tensor = self.tensors.to(*args, **kwargs)
25 |         return ImageList(cast_tensor, self.image_sizes)
26 | 
27 | 
28 | def to_image_list(tensors, size_divisible=0):
29 |     """
30 |     tensors can be an ImageList, a torch.Tensor or
31 |     an iterable of Tensors. It can't be a numpy array.
32 |     When tensors is an iterable of Tensors, it pads
33 |     the Tensors with zeros so that they have the same
34 |     shape
35 |     """
36 |     if isinstance(tensors, torch.Tensor) and size_divisible > 0:
37 |         tensors = [tensors]
38 | 
39 |     if isinstance(tensors, ImageList):
40 |         return tensors
41 |     elif isinstance(tensors, torch.Tensor):
42 |         # single tensor shape can be inferred
43 |         if tensors.dim() == 3:
44 |             tensors = tensors[None]
45 |         assert tensors.dim() == 4
46 |         image_sizes = [tensor.shape[-2:] for tensor in tensors]
47 |         return ImageList(tensors, image_sizes)
48 |     elif isinstance(tensors, (tuple, list)):
49 |         max_size = tuple(max(s) for s in zip(*[img.shape for img in tensors]))
50 | 
51 |         if size_divisible > 0:
52 |             import math
53 | 
54 |             stride = size_divisible
55 |             max_size = list(max_size)
56 |             max_size[1] = int(math.ceil(max_size[1] / stride) * stride)
57 |             max_size[2] = int(math.ceil(max_size[2] / stride) * stride)
58 |             max_size = tuple(max_size)
59 | 
60 |         batch_shape = (len(tensors),) + max_size
61 |         batched_imgs = tensors[0].new(*batch_shape).zero_()
62 |         for img, pad_img in zip(tensors, batched_imgs):
63 |             pad_img[: img.shape[0], : img.shape[1], : img.shape[2]].copy_(img)
64 | 
65 |         image_sizes = [im.shape[-2:] for im in tensors]
66 | 
67 |         return ImageList(batched_imgs, image_sizes)
68 |     else:
69 |         raise TypeError("Unsupported type for to_image_list: {}".format(type(tensors)))
70 | 


--------------------------------------------------------------------------------
/hetsgg/data/samplers/distributed.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import math
 3 | import torch
 4 | import torch.distributed as dist
 5 | from torch.utils.data.sampler import Sampler
 6 | 
 7 | 
 8 | class DistributedSampler(Sampler):
 9 |     """Sampler that restricts data loading to a subset of the dataset.
10 |     It is especially useful in conjunction with
11 |     :class:`torch.nn.parallel.DistributedDataParallel`. In such case, each
12 |     process can pass a DistributedSampler instance as a DataLoader sampler,
13 |     and load a subset of the original dataset that is exclusive to it.
14 |     .. note::
15 |         Dataset is assumed to be of constant size.
16 |     Arguments:
17 |         dataset: Dataset used for sampling.
18 |         num_replicas (optional): Number of processes participating in
19 |             distributed training.
20 |         rank (optional): Rank of the current process within num_replicas.
21 |     """
22 | 
23 |     def __init__(self, dataset, num_replicas=None, rank=None, shuffle=True):
24 |         if num_replicas is None:
25 |             if not dist.is_available():
26 |                 raise RuntimeError("Requires distributed package to be available")
27 |             num_replicas = dist.get_world_size()
28 |         if rank is None:
29 |             if not dist.is_available():
30 |                 raise RuntimeError("Requires distributed package to be available")
31 |             rank = dist.get_rank()
32 |         self.dataset = dataset
33 |         self.num_replicas = num_replicas
34 |         self.rank = rank
35 |         self.epoch = 0
36 |         self.num_samples = int(math.ceil(len(self.dataset) * 1.0 / self.num_replicas))
37 |         self.total_size = self.num_samples * self.num_replicas
38 |         self.shuffle = shuffle
39 | 
40 |     def __iter__(self):
41 |         if self.shuffle:
42 |             # deterministically shuffle based on epoch
43 |             g = torch.Generator()
44 |             g.manual_seed(self.epoch)
45 |             indices = torch.randperm(len(self.dataset), generator=g).tolist()
46 |         else:
47 |             indices = torch.arange(len(self.dataset)).tolist()
48 | 
49 |         # add extra samples to make it evenly divisible
50 |         indices += indices[: (self.total_size - len(indices))]
51 |         assert len(indices) == self.total_size
52 | 
53 |         # subsample
54 |         offset = self.num_samples * self.rank
55 |         indices = indices[offset : offset + self.num_samples]
56 |         assert len(indices) == self.num_samples
57 | 
58 |         return iter(indices)
59 | 
60 |     def __len__(self):
61 |         return self.num_samples
62 | 
63 |     def set_epoch(self, epoch):
64 |         self.epoch = epoch
65 | 


--------------------------------------------------------------------------------
/hetsgg/csrc/cpu/nms_cpu.cpp:
--------------------------------------------------------------------------------
 1 | #include "cpu/vision.h"
 2 | 
 3 | 
 4 | template <typename scalar_t>
 5 | at::Tensor nms_cpu_kernel(const at::Tensor& dets,
 6 |                           const at::Tensor& scores,
 7 |                           const float threshold) {
 8 |   AT_ASSERTM(!dets.type().is_cuda(), "dets must be a CPU tensor");
 9 |   AT_ASSERTM(!scores.type().is_cuda(), "scores must be a CPU tensor");
10 |   AT_ASSERTM(dets.type() == scores.type(), "dets should have the same type as scores");
11 | 
12 |   if (dets.numel() == 0) {
13 |     return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU));
14 |   }
15 | 
16 |   auto x1_t = dets.select(1, 0).contiguous();
17 |   auto y1_t = dets.select(1, 1).contiguous();
18 |   auto x2_t = dets.select(1, 2).contiguous();
19 |   auto y2_t = dets.select(1, 3).contiguous();
20 | 
21 |   at::Tensor areas_t = (x2_t - x1_t + 1) * (y2_t - y1_t + 1);
22 | 
23 |   auto order_t = std::get<1>(scores.sort(0, /* descending=*/true));
24 | 
25 |   auto ndets = dets.size(0);
26 |   at::Tensor suppressed_t = at::zeros({ndets}, dets.options().dtype(at::kByte).device(at::kCPU));
27 | 
28 |   auto suppressed = suppressed_t.data<uint8_t>();
29 |   auto order = order_t.data<int64_t>();
30 |   auto x1 = x1_t.data<scalar_t>();
31 |   auto y1 = y1_t.data<scalar_t>();
32 |   auto x2 = x2_t.data<scalar_t>();
33 |   auto y2 = y2_t.data<scalar_t>();
34 |   auto areas = areas_t.data<scalar_t>();
35 | 
36 |   for (int64_t _i = 0; _i < ndets; _i++) {
37 |     auto i = order[_i];
38 |     if (suppressed[i] == 1)
39 |       continue;
40 |     auto ix1 = x1[i];
41 |     auto iy1 = y1[i];
42 |     auto ix2 = x2[i];
43 |     auto iy2 = y2[i];
44 |     auto iarea = areas[i];
45 | 
46 |     for (int64_t _j = _i + 1; _j < ndets; _j++) {
47 |       auto j = order[_j];
48 |       if (suppressed[j] == 1)
49 |         continue;
50 |       auto xx1 = std::max(ix1, x1[j]);
51 |       auto yy1 = std::max(iy1, y1[j]);
52 |       auto xx2 = std::min(ix2, x2[j]);
53 |       auto yy2 = std::min(iy2, y2[j]);
54 | 
55 |       auto w = std::max(static_cast<scalar_t>(0), xx2 - xx1 + 1);
56 |       auto h = std::max(static_cast<scalar_t>(0), yy2 - yy1 + 1);
57 |       auto inter = w * h;
58 |       auto ovr = inter / (iarea + areas[j] - inter);
59 |       if (ovr >= threshold)
60 |         suppressed[j] = 1;
61 |    }
62 |   }
63 |   return at::nonzero(suppressed_t == 0).squeeze(1);
64 | }
65 | 
66 | at::Tensor nms_cpu(const at::Tensor& dets,
67 |                const at::Tensor& scores,
68 |                const float threshold) {
69 |   at::Tensor result;
70 |   AT_DISPATCH_FLOATING_TYPES(dets.type(), "nms", [&] {
71 |     result = nms_cpu_kernel<scalar_t>(dets, scores, threshold);
72 |   });
73 |   return result;
74 | }
75 | 


--------------------------------------------------------------------------------
/hetsgg/utils/visualize_graph.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import random
 3 | 
 4 | import torch
 5 | from graphviz import Digraph
 6 | 
 7 | 
 8 | def visual_computation_graph(var, params, output_dir, graph_name='network'):
 9 |     """ Produces Graphviz representation of PyTorch autograd graph.
10 | 
11 |     Blue nodes are trainable Variables (weights, bias).
12 |     Orange node are saved tensors for the backward pass.
13 | 
14 |     Args:
15 |         var: output Variable
16 |         params: list of (name, Parameters)
17 |     """
18 | 
19 |     param_map = {id(v): k for k, v in params}
20 | 
21 |     node_attr = dict(style='filled',
22 |                      shape='box',
23 |                      align='left',
24 |                      fontsize='12',
25 |                      ranksep='0.1',
26 |                      height='0.2')
27 | 
28 |     comp_graph = Digraph(filename=os.path.join(output_dir, graph_name),
29 |                           format='pdf',
30 |                           node_attr=node_attr,
31 |                           graph_attr=dict(size="256,512"))
32 |     seen = set()
33 | 
34 | 
35 | 
36 |     def get_color():
37 |         pallet = ['#8B0000', "#FF8C00", "#556B2F", "#8FBC8F", "#2F4F4F", "#4682B4",
38 |                   "#191970", "#8A2BE2", "#C71585", "#000000", "#808080"]
39 | 
40 |         idx = random.randint(0, len(pallet)-1)
41 |         return pallet[idx]
42 | 
43 | 
44 |     def add_nodes(var):
45 |         if var not in seen:
46 | 
47 |             node_id = str(id(var))
48 | 
49 |             if torch.is_tensor(var):
50 |                 node_label = "saved tensor\n{}".format(tuple(var.size()))
51 |                 comp_graph.node(node_id, node_label, fillcolor='orange')
52 | 
53 |             elif hasattr(var, 'variable'):
54 |                 # weights
55 |                 variable_name = param_map.get(id(var.variable))
56 |                 variable_size = tuple(var.variable.size())
57 |                 node_name = "{}\n{}".format(variable_name, variable_size)
58 |                 comp_graph.node(node_id, node_name, fillcolor='lightblue')
59 | 
60 |             else:
61 |                 # operation
62 |                 node_label = type(var).__name__.replace('Backward', '')
63 |                 comp_graph.node(node_id, node_label)
64 | 
65 |             seen.add(var)
66 |             if hasattr(var, 'next_functions'):
67 |                 for u in var.next_functions:
68 |                     if u[0] is not None:
69 |                         comp_graph.edge(str(id(u[0])), str(id(var)), color=get_color())
70 |                         add_nodes(u[0])
71 | 
72 |             if hasattr(var, 'saved_tensors'):
73 |                 for t in var.saved_tensors:
74 |                     comp_graph.edge(str(id(t)), str(id(var)), color=get_color())
75 |                     add_nodes(t)
76 | 
77 |     add_nodes(var.grad_fn)
78 | 
79 |     return comp_graph
80 | 


--------------------------------------------------------------------------------
/hetsgg/utils/miscellaneous.py:
--------------------------------------------------------------------------------
 1 | import errno
 2 | import json
 3 | import logging
 4 | import os
 5 | from .comm import is_main_process
 6 | import numpy as np
 7 | 
 8 | from hetsgg.structures.bounding_box import BoxList
 9 | from hetsgg.structures.boxlist_ops import boxlist_iou
10 | 
11 | def mkdir(path):
12 |     try:
13 |         os.makedirs(path)
14 |     except OSError as e:
15 |         if e.errno != errno.EEXIST:
16 |             raise
17 | 
18 | 
19 | def save_labels(dataset_list, output_dir):
20 |     if is_main_process():
21 |         logger = logging.getLogger(__name__)
22 | 
23 |         ids_to_labels = {}
24 |         for dataset in dataset_list:
25 |             if hasattr(dataset, 'categories'):
26 |                 ids_to_labels.update(dataset.categories)
27 |             else:
28 |                 logger.warning("Dataset [{}] has no categories attribute, labels.json file won't be created".format(
29 |                     dataset.__class__.__name__))
30 | 
31 |         if ids_to_labels:
32 |             labels_file = os.path.join(output_dir, 'labels.json')
33 |             logger.info("Saving labels mapping into {}".format(labels_file))
34 |             with open(labels_file, 'w') as f:
35 |                 json.dump(ids_to_labels, f, indent=2)
36 | 
37 | 
38 | def save_config(cfg, path):
39 |     if is_main_process():
40 |         with open(path, 'w') as f:
41 |             f.write(cfg.dump())
42 | 
43 | 
44 | def intersect_2d(x1, x2):
45 |     """
46 |     Given two arrays [m1, n], [m2,n], returns a [m1, m2] array where each entry is True if those
47 |     rows match.
48 |     :param x1: [m1, n] numpy array
49 |     :param x2: [m2, n] numpy array
50 |     :return: [m1, m2] bool array of the intersections
51 |     """
52 |     if x1.shape[1] != x2.shape[1]:
53 |         raise ValueError("Input arrays must have same #columns")
54 | 
55 |     # This performs a matrix multiplication-esque thing between the two arrays
56 |     # Instead of summing, we want the equality, so we reduce in that way
57 |     res = (x1[..., None] == x2.T[None, ...]).all(1)
58 |     return res
59 | 
60 | def argsort_desc(scores):
61 |     """
62 |     Returns the indices that sort scores descending in a smart way
63 |     :param scores: Numpy array of arbitrary size
64 |     :return: an array of size [numel(scores), dim(scores)] where each row is the index you'd
65 |              need to get the score.
66 |     """
67 |     return np.column_stack(np.unravel_index(np.argsort(-scores.ravel()), scores.shape))
68 | 
69 | def bbox_overlaps(boxes1, boxes2):
70 |     """
71 |     Parameters:
72 |         boxes1 (m, 4) [List or np.array] : bounding boxes of (x1,y1,x2,y2)
73 |         boxes2 (n, 4) [List or np.array] : bounding boxes of (x1,y1,x2,y2)
74 |     Return:
75 |         iou (m, n) [np.array]
76 |     """
77 |     boxes1 = BoxList(boxes1, (0, 0), 'xyxy')
78 |     boxes2 = BoxList(boxes2, (0, 0), 'xyxy')
79 |     iou = boxlist_iou(boxes1, boxes2).cpu().numpy()
80 |     return iou
81 | 


--------------------------------------------------------------------------------
/hetsgg/modeling/balanced_positive_negative_sampler.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | class BalancedPositiveNegativeSampler(object):
 5 |     """
 6 |     This class samples batches, ensuring that they contain a fixed proportion of positives
 7 |     """
 8 | 
 9 |     def __init__(self, batch_size_per_image, positive_fraction):
10 |         """
11 |         Arguments:
12 |             batch_size_per_image (int): number of elements to be selected per image
13 |             positive_fraction (float): percentace of positive elements per batch
14 |         """
15 |         self.batch_size_per_image = batch_size_per_image
16 |         self.positive_fraction = positive_fraction
17 | 
18 |     def __call__(self, matched_idxs):
19 |         """
20 |         Arguments:
21 |             matched idxs: list of tensors containing -1, 0 or positive values.
22 |                 Each tensor corresponds to a specific image.
23 |                 -1 values are ignored, 0 are considered as negatives and > 0 as
24 |                 positives.
25 | 
26 |         Returns:
27 |             pos_idx (list[tensor])
28 |             neg_idx (list[tensor])
29 | 
30 |         Returns two lists of binary masks for each image.
31 |         The first list contains the positive elements that were selected,
32 |         and the second list the negative example.
33 |         """
34 |         pos_idx = []
35 |         neg_idx = []
36 |         for matched_idxs_per_image in matched_idxs:
37 |             positive = torch.nonzero(matched_idxs_per_image >= 1).squeeze(1)
38 |             negative = torch.nonzero(matched_idxs_per_image == 0).squeeze(1)
39 | 
40 |             num_pos = int(self.batch_size_per_image * self.positive_fraction)
41 |             # protect against not enough positive examples
42 |             num_pos = min(positive.numel(), num_pos)
43 |             num_neg = self.batch_size_per_image - num_pos
44 |             # protect against not enough negative examples
45 |             num_neg = min(negative.numel(), num_neg)
46 | 
47 |             # randomly select positive and negative examples
48 |             perm1 = torch.randperm(positive.numel(), device=positive.device)[:num_pos]
49 |             perm2 = torch.randperm(negative.numel(), device=negative.device)[:num_neg]
50 | 
51 |             pos_idx_per_image = positive[perm1]
52 |             neg_idx_per_image = negative[perm2]
53 | 
54 |             # create binary mask from indices
55 |             pos_idx_per_image_mask = torch.zeros_like(
56 |                 matched_idxs_per_image, dtype=torch.uint8
57 |             )
58 |             neg_idx_per_image_mask = torch.zeros_like(
59 |                 matched_idxs_per_image, dtype=torch.uint8
60 |             )
61 |             pos_idx_per_image_mask[pos_idx_per_image] = 1
62 |             neg_idx_per_image_mask[neg_idx_per_image] = 1
63 | 
64 |             pos_idx.append(pos_idx_per_image_mask)
65 |             neg_idx.append(neg_idx_per_image_mask)
66 | 
67 |         return pos_idx, neg_idx
68 | 


--------------------------------------------------------------------------------
/hetsgg/layers/dcn/deform_pool_func.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.autograd import Function
 3 | from torch.autograd.function import once_differentiable
 4 | 
 5 | from hetsgg import _C
 6 | 
 7 | 
 8 | class DeformRoIPoolingFunction(Function):
 9 | 
10 |     @staticmethod
11 |     def forward(
12 |         ctx,
13 |         data,
14 |         rois,
15 |         offset,
16 |         spatial_scale,
17 |         out_size,
18 |         out_channels,
19 |         no_trans,
20 |         group_size=1,
21 |         part_size=None,
22 |         sample_per_part=4,
23 |         trans_std=.0
24 |     ):
25 |         ctx.spatial_scale = spatial_scale
26 |         ctx.out_size = out_size
27 |         ctx.out_channels = out_channels
28 |         ctx.no_trans = no_trans
29 |         ctx.group_size = group_size
30 |         ctx.part_size = out_size if part_size is None else part_size
31 |         ctx.sample_per_part = sample_per_part
32 |         ctx.trans_std = trans_std
33 | 
34 |         assert 0.0 <= ctx.trans_std <= 1.0
35 |         if not data.is_cuda:
36 |             raise NotImplementedError
37 | 
38 |         n = rois.shape[0]
39 |         output = data.new_empty(n, out_channels, out_size, out_size)
40 |         output_count = data.new_empty(n, out_channels, out_size, out_size)
41 |         _C.deform_psroi_pooling_forward(
42 |             data,
43 |             rois,
44 |             offset,
45 |             output,
46 |             output_count,
47 |             ctx.no_trans,
48 |             ctx.spatial_scale,
49 |             ctx.out_channels,
50 |             ctx.group_size,
51 |             ctx.out_size,
52 |             ctx.part_size,
53 |             ctx.sample_per_part,
54 |             ctx.trans_std
55 |         )
56 | 
57 |         if data.requires_grad or rois.requires_grad or offset.requires_grad:
58 |             ctx.save_for_backward(data, rois, offset)
59 |         ctx.output_count = output_count
60 | 
61 |         return output
62 | 
63 |     @staticmethod
64 |     @once_differentiable
65 |     def backward(ctx, grad_output):
66 |         if not grad_output.is_cuda:
67 |             raise NotImplementedError
68 | 
69 |         data, rois, offset = ctx.saved_tensors
70 |         output_count = ctx.output_count
71 |         grad_input = torch.zeros_like(data)
72 |         grad_rois = None
73 |         grad_offset = torch.zeros_like(offset)
74 | 
75 |         _C.deform_psroi_pooling_backward(
76 |             grad_output,
77 |             data,
78 |             rois,
79 |             offset,
80 |             output_count,
81 |             grad_input,
82 |             grad_offset,
83 |             ctx.no_trans,
84 |             ctx.spatial_scale,
85 |             ctx.out_channels,
86 |             ctx.group_size,
87 |             ctx.out_size,
88 |             ctx.part_size,
89 |             ctx.sample_per_part,
90 |             ctx.trans_std
91 |         )
92 |         return (grad_input, grad_rois, grad_offset, None, None, None, None, None, None, None, None)
93 | 
94 | 
95 | deform_roi_pooling = DeformRoIPoolingFunction.apply
96 | 


--------------------------------------------------------------------------------
/hetsgg/layers/label_smoothing_loss.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | 
 5 | class Label_Smoothing_Regression(nn.Module):
 6 | 
 7 |     def __init__(self, e=0.01, reduction='mean'):
 8 |         super().__init__()
 9 | 
10 |         self.log_softmax = nn.LogSoftmax(dim=1)
11 |         self.e = e
12 |         self.reduction = reduction
13 |     
14 |     def _one_hot(self, labels, classes, value=1):
15 |         """
16 |             Convert labels to one hot vectors
17 |         
18 |         Args:
19 |             labels: torch tensor in format [label1, label2, label3, ...]
20 |             classes: int, number of classes
21 |             value: label value in one hot vector, default to 1
22 |         
23 |         Returns:
24 |             return one hot format labels in shape [batchsize, classes]
25 |         """
26 | 
27 |         one_hot = torch.zeros(labels.size(0), classes)
28 | 
29 |         #labels and value_added  size must match
30 |         labels = labels.view(labels.size(0), -1)
31 |         value_added = torch.Tensor(labels.size(0), 1).fill_(value)
32 | 
33 |         value_added = value_added.to(labels.device)
34 |         one_hot = one_hot.to(labels.device)
35 | 
36 |         one_hot.scatter_add_(1, labels, value_added)
37 | 
38 |         return one_hot
39 | 
40 |     def _smooth_label(self, target, length, smooth_factor):
41 |         """convert targets to one-hot format, and smooth
42 |         them.
43 |         Args:
44 |             target: target in form with [label1, label2, label_batchsize]
45 |             length: length of one-hot format(number of classes)
46 |             smooth_factor: smooth factor for label smooth
47 |         
48 |         Returns:
49 |             smoothed labels in one hot format
50 |         """
51 |         one_hot = self._one_hot(target, length, value=1 - smooth_factor)
52 |         one_hot += smooth_factor / length
53 | 
54 |         return one_hot.to(target.device)
55 | 
56 |     def forward(self, x, target):
57 | 
58 |         if x.size(0) != target.size(0):
59 |             raise ValueError('Expected input batchsize ({}) to match target batch_size({})'
60 |                     .format(x.size(0), target.size(0)))
61 | 
62 |         if x.dim() < 2:
63 |             raise ValueError('Expected input tensor to have least 2 dimensions(got {})'
64 |                     .format(x.size(0)))
65 | 
66 |         if x.dim() != 2:
67 |             raise ValueError('Only 2 dimension tensor are implemented, (got {})'
68 |                     .format(x.size()))
69 | 
70 | 
71 |         smoothed_target = self._smooth_label(target, x.size(1), self.e)
72 |         x = self.log_softmax(x)
73 |         loss = torch.sum(- x * smoothed_target, dim=1)
74 | 
75 |         if self.reduction == 'none':
76 |             return loss
77 |         
78 |         elif self.reduction == 'sum':
79 |             return torch.sum(loss)
80 |         
81 |         elif self.reduction == 'mean':
82 |             return torch.mean(loss)
83 |         
84 |         else:
85 |             raise ValueError('unrecognized option, expect reduction to be one of none, mean, sum')
86 | 


--------------------------------------------------------------------------------
/hetsgg/modeling/roi_heads/attribute_head/attribute_head.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn
 3 | 
 4 | from .roi_attribute_feature_extractors import make_roi_attribute_feature_extractor
 5 | from .roi_attribute_predictors import make_roi_attribute_predictor
 6 | from .loss import make_roi_attribute_loss_evaluator
 7 | 
 8 | def add_attribute_logits(proposals, attri_logits):
 9 |     slice_idxs = [0]
10 |     for i in range(len(proposals)):
11 |         slice_idxs.append(len(proposals[i])+slice_idxs[-1])
12 |         proposals[i].add_field("attribute_logits", attri_logits[slice_idxs[i]:slice_idxs[i+1]])
13 |     return proposals
14 | 
15 | class ROIAttributeHead(torch.nn.Module):
16 |     """
17 |     Generic ATTRIBUTE Head class.
18 |     """
19 | 
20 |     def __init__(self, cfg, in_channels):
21 |         super(ROIAttributeHead, self).__init__()
22 |         self.cfg = cfg.clone()
23 |         self.feature_extractor = make_roi_attribute_feature_extractor(cfg, in_channels, half_out=self.cfg.MODEL.ATTRIBUTE_ON)
24 |         self.predictor = make_roi_attribute_predictor(cfg, self.feature_extractor.out_channels)
25 |         self.loss_evaluator = make_roi_attribute_loss_evaluator(cfg)
26 | 
27 |     def forward(self, features, proposals, targets=None):
28 |         """
29 |         features:  extracted from box_head
30 |         """
31 |         # Attribute head is fixed when we train the relation head
32 |         if self.cfg.MODEL.RELATION_ON:
33 |             if self.cfg.MODEL.ROI_RELATION_HEAD.USE_GT_BOX and self.cfg.MODEL.ROI_RELATION_HEAD.USE_GT_OBJECT_LABEL:
34 |                 # mode==predcls
35 |                 # no need to predict attribute, get grond truth
36 |                 x = self.feature_extractor(features, proposals)
37 |                 return x, proposals, {}
38 |             # mode==sgcls  or sgdet
39 |             else:
40 |                 x = self.feature_extractor(features, proposals)
41 |                 attri_logits = self.predictor(x)
42 |                 assert sum([len(p) for p in proposals]) == attri_logits.shape[0]
43 |                 proposals = add_attribute_logits(proposals, attri_logits)
44 |                 return x, proposals, {}
45 |             
46 |         # Train/Test the attribute head
47 |         x = self.feature_extractor(features, proposals)
48 |         attri_logits = self.predictor(x)
49 |         assert sum([len(p) for p in proposals]) == attri_logits.shape[0]
50 |         proposals = add_attribute_logits(proposals, attri_logits)
51 |         
52 |         if not self.training:
53 |             return x, proposals, {}
54 | 
55 |         # proposals need to contain the attributes fields
56 |         loss_attribute = self.loss_evaluator(proposals, attri_logits)
57 |         return x, proposals, dict(loss_attribute=loss_attribute)
58 | 
59 | def build_roi_attribute_head(cfg, in_channels):
60 |     """
61 |     Constructs a new attribute head.
62 |     By default, uses ROIAttributeHead, but if it turns out not to be enough, just register a new class
63 |     and make it a parameter in the config
64 |     """
65 |     return ROIAttributeHead(cfg, in_channels)
66 | 


--------------------------------------------------------------------------------
/hetsgg/modeling/roi_heads/box_head/loss.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.nn import functional as F
 3 | 
 4 | from hetsgg.layers import smooth_l1_loss
 5 | from hetsgg.modeling.box_coder import BoxCoder
 6 | from hetsgg.modeling.matcher import Matcher
 7 | from hetsgg.structures.boxlist_ops import boxlist_iou
 8 | from hetsgg.modeling.balanced_positive_negative_sampler import (
 9 |     BalancedPositiveNegativeSampler
10 | )
11 | from hetsgg.modeling.utils import cat
12 | 
13 | 
14 | class FastRCNNLossComputation(object):
15 |     """
16 |     Computes the loss for Faster R-CNN.
17 |     Also supports FPN
18 |     """
19 | 
20 |     def __init__(self, cls_agnostic_bbox_reg=False):
21 |         self.cls_agnostic_bbox_reg = cls_agnostic_bbox_reg
22 | 
23 |     def assign_label_to_proposals(self, proposals, targets):
24 |         for img_idx, (target, proposal) in enumerate(zip(targets, proposals)):
25 |             match_quality_matrix = boxlist_iou(target, proposal)
26 |             matched_idxs = self.proposal_matcher(match_quality_matrix)
27 |             # Fast RCNN only need "labels" field for selecting the targets
28 |             target = target.copy_with_fields(["labels", "attributes"])
29 |             matched_targets = target[matched_idxs.clamp(min=0)]
30 |             
31 |             labels_per_image = matched_targets.get_field("labels").to(dtype=torch.int64)
32 |             attris_per_image = matched_targets.get_field("attributes").to(dtype=torch.int64)
33 | 
34 |             labels_per_image[matched_idxs < 0] = 0
35 |             attris_per_image[matched_idxs < 0, :] = 0
36 |             proposals[img_idx].add_field("labels", labels_per_image)
37 |             proposals[img_idx].add_field("attributes", attris_per_image)
38 |         return proposals
39 | 
40 | 
41 |     def __call__(self, class_logits, box_regression, proposals):
42 | 
43 |         class_logits = cat(class_logits, dim=0)
44 |         box_regression = cat(box_regression, dim=0)
45 |         device = class_logits.device
46 | 
47 |         labels = cat([proposal.get_field("labels") for proposal in proposals], dim=0)
48 |         regression_targets = cat([proposal.get_field("regression_targets") for proposal in proposals], dim=0)
49 | 
50 |         classification_loss = F.cross_entropy(class_logits, labels.long())
51 | 
52 | 
53 |         sampled_pos_inds_subset = torch.nonzero(labels > 0).squeeze(1)
54 |         labels_pos = labels[sampled_pos_inds_subset]
55 |         if self.cls_agnostic_bbox_reg:
56 |             map_inds = torch.tensor([4, 5, 6, 7], device=device)
57 |         else:
58 |             map_inds = 4 * labels_pos[:, None] + torch.tensor([0, 1, 2, 3], device=device)
59 | 
60 |         box_loss = smooth_l1_loss(
61 |             box_regression[sampled_pos_inds_subset[:, None], map_inds],
62 |             regression_targets[sampled_pos_inds_subset],
63 |             size_average=False,
64 |             beta=1,
65 |         )
66 |         box_loss = box_loss / labels.numel()
67 | 
68 |         return classification_loss, box_loss
69 | 
70 | 
71 | def make_roi_box_loss_evaluator(cfg):
72 |     cls_agnostic_bbox_reg = cfg.MODEL.CLS_AGNOSTIC_BBOX_REG
73 | 
74 |     loss_evaluator = FastRCNNLossComputation(cls_agnostic_bbox_reg)
75 | 
76 |     return loss_evaluator
77 | 


--------------------------------------------------------------------------------
/hetsgg/modeling/backbone/backbone.py:
--------------------------------------------------------------------------------
 1 | from collections import OrderedDict
 2 | 
 3 | from torch import nn
 4 | 
 5 | from hetsgg.modeling import registry
 6 | from hetsgg.modeling.make_layers import conv_with_kaiming_uniform
 7 | from . import fpn as fpn_module
 8 | from . import resnet
 9 | from . import vgg
10 | 
11 | 
12 | @registry.BACKBONES.register("VGG-16")
13 | def build_vgg_fpn_backbone(cfg):
14 |     body = vgg.VGG16(cfg)
15 |     out_channels = cfg.MODEL.VGG.VGG16_OUT_CHANNELS
16 |     model = nn.Sequential(OrderedDict([("body", body)]))
17 |     model.out_channels = out_channels
18 |     return model
19 | 
20 | 
21 | @registry.BACKBONES.register("R-50-C4")
22 | @registry.BACKBONES.register("R-50-C5")
23 | @registry.BACKBONES.register("R-101-C4")
24 | @registry.BACKBONES.register("R-101-C5")
25 | def build_resnet_backbone(cfg):
26 |     body = resnet.ResNet(cfg)
27 |     model = nn.Sequential(OrderedDict([("body", body)]))
28 |     model.out_channels = cfg.MODEL.RESNETS.BACKBONE_OUT_CHANNELS
29 |     return model
30 | 
31 | 
32 | @registry.BACKBONES.register("R-50-FPN")
33 | @registry.BACKBONES.register("R-101-FPN")
34 | @registry.BACKBONES.register("R-152-FPN")
35 | def build_resnet_fpn_backbone(cfg):
36 |     body = resnet.ResNet(cfg)
37 |     in_channels_stage2 = cfg.MODEL.RESNETS.RES2_OUT_CHANNELS
38 |     out_channels = cfg.MODEL.RESNETS.BACKBONE_OUT_CHANNELS
39 |     fpn = fpn_module.FPN(
40 |         in_channels_list=[
41 |             in_channels_stage2,
42 |             in_channels_stage2 * 2,
43 |             in_channels_stage2 * 4,
44 |             in_channels_stage2 * 8,
45 |         ],
46 |         out_channels=out_channels,
47 |         conv_block=conv_with_kaiming_uniform(
48 |             cfg.MODEL.FPN.USE_GN, cfg.MODEL.FPN.USE_RELU
49 |         ),
50 |         top_blocks=fpn_module.LastLevelMaxPool(),
51 |     )
52 |     model = nn.Sequential(OrderedDict([("body", body), ("fpn", fpn)]))
53 |     model.out_channels = out_channels
54 |     return model
55 | 
56 | 
57 | @registry.BACKBONES.register("R-50-FPN-RETINANET")
58 | @registry.BACKBONES.register("R-101-FPN-RETINANET")
59 | def build_resnet_fpn_p3p7_backbone(cfg):
60 |     body = resnet.ResNet(cfg)
61 |     in_channels_stage2 = cfg.MODEL.RESNETS.RES2_OUT_CHANNELS
62 |     out_channels = cfg.MODEL.RESNETS.BACKBONE_OUT_CHANNELS
63 |     in_channels_p6p7 = in_channels_stage2 * 8 if cfg.MODEL.RETINANET.USE_C5 \
64 |         else out_channels
65 |     fpn = fpn_module.FPN(
66 |         in_channels_list=[
67 |             0,
68 |             in_channels_stage2 * 2,
69 |             in_channels_stage2 * 4,
70 |             in_channels_stage2 * 8,
71 |         ],
72 |         out_channels=out_channels,
73 |         conv_block=conv_with_kaiming_uniform(
74 |             cfg.MODEL.FPN.USE_GN, cfg.MODEL.FPN.USE_RELU
75 |         ),
76 |         top_blocks=fpn_module.LastLevelP6P7(in_channels_p6p7, out_channels),
77 |     )
78 |     model = nn.Sequential(OrderedDict([("body", body), ("fpn", fpn)]))
79 |     model.out_channels = out_channels
80 |     return model
81 | 
82 | 
83 | def build_backbone(cfg):
84 |     assert cfg.MODEL.BACKBONE.CONV_BODY in registry.BACKBONES, \
85 |         "cfg.MODEL.BACKBONE.CONV_BODY: {} are not registered in registry".format(
86 |             cfg.MODEL.BACKBONE.CONV_BODY
87 |         )
88 |     return registry.BACKBONES[cfg.MODEL.BACKBONE.CONV_BODY](cfg)
89 | 


--------------------------------------------------------------------------------
/hetsgg/modeling/roi_heads/mask_head/mask_head.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn
 3 | 
 4 | from hetsgg.structures.bounding_box import BoxList
 5 | 
 6 | from .roi_mask_feature_extractors import make_roi_mask_feature_extractor
 7 | from .roi_mask_predictors import make_roi_mask_predictor
 8 | from .inference import make_roi_mask_post_processor
 9 | from .loss import make_roi_mask_loss_evaluator
10 | 
11 | 
12 | def keep_only_positive_boxes(boxes):
13 |     """
14 |     Given a set of BoxList containing the `labels` field,
15 |     return a set of BoxList for which `labels > 0`.
16 | 
17 |     Arguments:
18 |         boxes (list of BoxList)
19 |     """
20 |     assert isinstance(boxes, (list, tuple))
21 |     assert isinstance(boxes[0], BoxList)
22 |     assert boxes[0].has_field("labels")
23 |     positive_boxes = []
24 |     positive_inds = []
25 |     num_boxes = 0
26 |     for boxes_per_image in boxes:
27 |         labels = boxes_per_image.get_field("labels")
28 |         inds_mask = labels > 0
29 |         inds = inds_mask.nonzero().squeeze(1)
30 |         positive_boxes.append(boxes_per_image[inds])
31 |         positive_inds.append(inds_mask)
32 |     return positive_boxes, positive_inds
33 | 
34 | 
35 | class ROIMaskHead(torch.nn.Module):
36 |     def __init__(self, cfg, in_channels):
37 |         super(ROIMaskHead, self).__init__()
38 |         self.cfg = cfg.clone()
39 |         self.feature_extractor = make_roi_mask_feature_extractor(cfg, in_channels)
40 |         self.predictor = make_roi_mask_predictor(
41 |             cfg, self.feature_extractor.out_channels)
42 |         self.post_processor = make_roi_mask_post_processor(cfg)
43 |         self.loss_evaluator = make_roi_mask_loss_evaluator(cfg)
44 | 
45 |     def forward(self, features, proposals, targets=None):
46 |         """
47 |         Arguments:
48 |             features (list[Tensor]): feature-maps from possibly several levels
49 |             proposals (list[BoxList]): proposal boxes
50 |             targets (list[BoxList], optional): the ground-truth targets.
51 | 
52 |         Returns:
53 |             x (Tensor): the result of the feature extractor
54 |             proposals (list[BoxList]): during training, the original proposals
55 |                 are returned. During testing, the predicted boxlists are returned
56 |                 with the `mask` field set
57 |             losses (dict[Tensor]): During training, returns the losses for the
58 |                 head. During testing, returns an empty dict.
59 |         """
60 | 
61 |         if self.training:
62 |             all_proposals = proposals
63 |             proposals, positive_inds = keep_only_positive_boxes(proposals)
64 |         if self.training and self.cfg.MODEL.ROI_MASK_HEAD.SHARE_BOX_FEATURE_EXTRACTOR:
65 |             x = features
66 |             x = x[torch.cat(positive_inds, dim=0)]
67 |         else:
68 |             x = self.feature_extractor(features, proposals)
69 |         mask_logits = self.predictor(x)
70 | 
71 |         if not self.training:
72 |             result = self.post_processor(mask_logits, proposals)
73 |             return x, result, {}
74 | 
75 |         loss_mask = self.loss_evaluator(proposals, mask_logits, targets)
76 | 
77 |         return x, all_proposals, dict(loss_mask=loss_mask)
78 | 
79 | 
80 | def build_roi_mask_head(cfg, in_channels):
81 |     return ROIMaskHead(cfg, in_channels)
82 | 


--------------------------------------------------------------------------------
/hetsgg/utils/model_serialization.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from collections import OrderedDict
 3 | 
 4 | import torch
 5 | 
 6 | 
 7 | def align_and_update_state_dicts(model_state_dict, loaded_state_dict, load_mapping):
 8 |     logger = logging.getLogger(__name__)
 9 |     current_keys = sorted(list(model_state_dict.keys()))
10 |     loaded_keys = sorted(list(loaded_state_dict.keys()))
11 | 
12 |     mapped_current_keys = current_keys.copy()
13 |     for i, key in enumerate(mapped_current_keys):
14 |         for source_key, target_key in load_mapping.items():
15 |             if source_key in key:
16 |                 mapped_current_keys[i] = key.replace(source_key, target_key)
17 |                 logger.info("MAPPING {} in current model to {} in loaded model.".format(key, mapped_current_keys[i]))
18 | 
19 |     match_matrix = [
20 |         len(j) if i.endswith(j) else 0 for i in mapped_current_keys for j in loaded_keys
21 |     ]
22 |     match_matrix = torch.as_tensor(match_matrix).view(
23 |         len(current_keys), len(loaded_keys)
24 |     )
25 |     max_match_size, idxs = match_matrix.max(1)
26 |     # remove indices that correspond to no-match
27 |     idxs[max_match_size == 0] = -1
28 | 
29 |     # used for logging
30 |     max_size = max([len(key) for key in current_keys]) if current_keys else 1
31 |     max_size_loaded = max([len(key) for key in loaded_keys]) if loaded_keys else 1
32 |     log_str_template = "REMATCHING! {: <{}} loaded from {: <{}} of shape {}"
33 |     for idx_new, idx_old in enumerate(idxs.tolist()):
34 |         if idx_old == -1:
35 |             key = current_keys[idx_new]
36 |             logger.info("NO-MATCHING of current module: {} of shape {}".format(key, 
37 |                                     tuple(model_state_dict[key].shape)))
38 |             continue
39 |         key = current_keys[idx_new]
40 |         key_old = loaded_keys[idx_old]
41 |         model_state_dict[key] = loaded_state_dict[key_old]
42 |         if ((not key.startswith('module.'))  and key != key_old) or (key.startswith('module.') and key[7:] != key_old):
43 |             logger.info(
44 |                 log_str_template.format(
45 |                     key,
46 |                     max_size,
47 |                     key_old,
48 |                     max_size_loaded,
49 |                     tuple(loaded_state_dict[key_old].shape),
50 |                 )
51 |             )
52 |     print('Mapping All')
53 | 
54 | 
55 | def strip_prefix_if_present(state_dict, prefix):
56 |     keys = sorted(state_dict.keys())
57 |     if not all(key.startswith(prefix) for key in keys):
58 |         return state_dict
59 |     stripped_state_dict = OrderedDict()
60 |     for key, value in state_dict.items():
61 |         stripped_state_dict[key.replace(prefix, "")] = value
62 |     return stripped_state_dict
63 | 
64 | 
65 | def load_state_dict(model, loaded_state_dict, load_mapping):
66 |     model_state_dict = model.state_dict()
67 |     # if the state_dict comes from a model that was wrapped in a
68 |     # DataParallel or DistributedDataParallel during serialization,
69 |     # remove the "module" prefix before performing the matching
70 |     loaded_state_dict = strip_prefix_if_present(loaded_state_dict, prefix="module.")
71 |     align_and_update_state_dicts(model_state_dict, loaded_state_dict, load_mapping)
72 | 
73 |     # use strict loading
74 |     model.load_state_dict(model_state_dict, strict=False)
75 | 


--------------------------------------------------------------------------------
/hetsgg/modeling/box_coder.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | 
 3 | import torch
 4 | 
 5 | 
 6 | class BoxCoder(object):
 7 |     """
 8 |     This class encodes and decodes a set of bounding boxes into
 9 |     the representation used for training the regressors.
10 |     """
11 | 
12 |     def __init__(self, weights, bbox_xform_clip=math.log(1000. / 16)):
13 |         """
14 |         Arguments:
15 |             weights (4-element tuple)
16 |             bbox_xform_clip (float)
17 |         """
18 |         self.weights = weights
19 |         self.bbox_xform_clip = bbox_xform_clip
20 | 
21 |     def encode(self, reference_boxes, proposals):
22 |         """
23 |         Encode a set of proposals with respect to some
24 |         reference boxes
25 | 
26 |         Arguments:
27 |             reference_boxes (Tensor): reference boxes
28 |             proposals (Tensor): boxes to be encoded
29 |         """
30 | 
31 |         TO_REMOVE = 1  # TODO remove
32 |         ex_widths = proposals[:, 2] - proposals[:, 0] + TO_REMOVE
33 |         ex_heights = proposals[:, 3] - proposals[:, 1] + TO_REMOVE
34 |         ex_ctr_x = proposals[:, 0] + 0.5 * ex_widths
35 |         ex_ctr_y = proposals[:, 1] + 0.5 * ex_heights
36 | 
37 |         gt_widths = reference_boxes[:, 2] - reference_boxes[:, 0] + TO_REMOVE
38 |         gt_heights = reference_boxes[:, 3] - reference_boxes[:, 1] + TO_REMOVE
39 |         gt_ctr_x = reference_boxes[:, 0] + 0.5 * gt_widths
40 |         gt_ctr_y = reference_boxes[:, 1] + 0.5 * gt_heights
41 | 
42 |         wx, wy, ww, wh = self.weights
43 |         targets_dx = wx * (gt_ctr_x - ex_ctr_x) / ex_widths
44 |         targets_dy = wy * (gt_ctr_y - ex_ctr_y) / ex_heights
45 |         targets_dw = ww * torch.log(gt_widths / ex_widths)
46 |         targets_dh = wh * torch.log(gt_heights / ex_heights)
47 | 
48 |         targets = torch.stack((targets_dx, targets_dy, targets_dw, targets_dh), dim=1)
49 |         return targets
50 | 
51 |     def decode(self, rel_codes, boxes):
52 |         """
53 |         From a set of original boxes and encoded relative box offsets,
54 |         get the decoded boxes.
55 | 
56 |         Arguments:
57 |             rel_codes (Tensor): encoded boxes
58 |             boxes (Tensor): reference boxes.
59 |         """
60 | 
61 |         boxes = boxes.to(rel_codes.dtype)
62 | 
63 |         TO_REMOVE = 1  # TODO remove
64 |         widths = boxes[:, 2] - boxes[:, 0] + TO_REMOVE
65 |         heights = boxes[:, 3] - boxes[:, 1] + TO_REMOVE
66 |         ctr_x = boxes[:, 0] + 0.5 * widths
67 |         ctr_y = boxes[:, 1] + 0.5 * heights
68 | 
69 |         wx, wy, ww, wh = self.weights
70 |         dx = rel_codes[:, 0::4] / wx
71 |         dy = rel_codes[:, 1::4] / wy
72 |         dw = rel_codes[:, 2::4] / ww
73 |         dh = rel_codes[:, 3::4] / wh
74 | 
75 |         # Prevent sending too large values into torch.exp()
76 |         dw = torch.clamp(dw, max=self.bbox_xform_clip)
77 |         dh = torch.clamp(dh, max=self.bbox_xform_clip)
78 | 
79 |         pred_ctr_x = dx * widths[:, None] + ctr_x[:, None]
80 |         pred_ctr_y = dy * heights[:, None] + ctr_y[:, None]
81 |         pred_w = torch.exp(dw) * widths[:, None]
82 |         pred_h = torch.exp(dh) * heights[:, None]
83 | 
84 |         pred_boxes = torch.zeros_like(rel_codes)
85 |         # x1
86 |         pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * pred_w
87 |         # y1
88 |         pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * pred_h
89 |         # x2 (note: "- 1" is correct; don't be fooled by the asymmetry)
90 |         pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * pred_w - 1
91 |         # y2 (note: "- 1" is correct; don't be fooled by the asymmetry)
92 |         pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * pred_h - 1
93 | 
94 |         return pred_boxes
95 | 


--------------------------------------------------------------------------------
/hetsgg/utils/comm.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import pickle
  3 | import time
  4 | 
  5 | import torch
  6 | import torch.distributed as dist
  7 | 
  8 | 
  9 | def get_world_size():
 10 |     if not dist.is_available():
 11 |         return 1
 12 |     if not dist.is_initialized():
 13 |         return 1
 14 |     return dist.get_world_size()
 15 | 
 16 | 
 17 | def get_rank():
 18 |     if not dist.is_available():
 19 |         return 0
 20 |     if not dist.is_initialized():
 21 |         return 0
 22 |     return dist.get_rank()
 23 | 
 24 | 
 25 | def is_main_process():
 26 |     return get_rank() == 0
 27 | 
 28 | 
 29 | def synchronize():
 30 |     """
 31 |     Helper function to synchronize (barrier) among all processes when
 32 |     using distributed training
 33 |     """
 34 |     if not dist.is_available():
 35 |         return
 36 |     if not dist.is_initialized():
 37 |         return
 38 |     world_size = dist.get_world_size()
 39 |     if world_size == 1:
 40 |         return
 41 |     dist.barrier()
 42 | 
 43 | 
 44 | def all_gather(data):
 45 |     """
 46 |     Run all_gather on arbitrary picklable data (not necessarily tensors)
 47 |     Args:
 48 |         data: any picklable object
 49 |     Returns:
 50 |         list[data]: list of data gathered from each rank
 51 |     """
 52 |     to_device = "cuda"
 53 |     
 54 |     world_size = get_world_size()
 55 |     if world_size == 1:
 56 |         return [data]
 57 | 
 58 |     # serialized to a Tensor
 59 |     buffer = pickle.dumps(data)
 60 |     storage = torch.ByteStorage.from_buffer(buffer)
 61 |     tensor = torch.ByteTensor(storage).to(to_device)
 62 | 
 63 |     # obtain Tensor size of each rank
 64 |     error = 4460434588622152440 - 1000
 65 |     t_size = tensor.view(-1).shape[0]
 66 |     if t_size > error:
 67 |         t_size = 0
 68 |         
 69 |     local_size = torch.LongTensor([tensor.view(-1).shape[0]]).to(to_device)
 70 |     size_list = [torch.LongTensor([0]).to(to_device) for _ in range(world_size)]
 71 |     dist.all_gather(size_list, local_size)
 72 |     size_list = [int(size.item()) for size in size_list]
 73 |     max_size = max(size_list)
 74 | 
 75 |     tensor_list = []
 76 |     for _ in size_list:
 77 |         tensor_list.append(torch.ByteTensor(size=(max_size,)).to(to_device))
 78 |     if local_size != max_size:
 79 |         padding = torch.ByteTensor(size=(max_size - local_size,)).to(to_device)
 80 |         tensor = torch.cat((tensor, padding), dim=0)
 81 |     dist.all_gather(tensor_list, tensor)
 82 | 
 83 |     data_list = []
 84 |     for size, tensor in zip(size_list, tensor_list):
 85 |         buffer = tensor.cpu().numpy().tobytes()[:size]
 86 |         data_list.append(pickle.loads(buffer))
 87 | 
 88 |     return data_list
 89 | 
 90 | 
 91 | def reduce_dict(input_dict, average=True):
 92 |     """
 93 |     Args:
 94 |         input_dict (dict): all the values will be reduced
 95 |         average (bool): whether to do average or sum
 96 |     Reduce the values in the dictionary from all processes so that process with rank
 97 |     0 has the averaged results. Returns a dict with the same fields as
 98 |     input_dict, after reduction.
 99 |     """
100 |     world_size = get_world_size()
101 |     if world_size < 2:
102 |         return input_dict
103 |     with torch.no_grad():
104 |         names = []
105 |         values = []
106 |         for k in sorted(input_dict.keys()):
107 |             names.append(k)
108 |             values.append(input_dict[k])
109 |         values = torch.stack(values, dim=0)
110 |         dist.reduce(values, dst=0)
111 |         if dist.get_rank() == 0 and average:
112 |             values /= world_size
113 |         reduced_dict = {k: v for k, v in zip(names, values)}
114 |     return reduced_dict
115 | 


--------------------------------------------------------------------------------
/hetsgg/csrc/cuda/deform_pool_cuda.cu:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | #include <ATen/ATen.h>
 4 | #include <ATen/cuda/CUDAContext.h>
 5 | 
 6 | #include <THC/THC.h>
 7 | #include <THC/THCDeviceUtils.cuh>
 8 | 
 9 | #include <vector>
10 | #include <iostream>
11 | #include <cmath>
12 | 
13 | 
14 | void DeformablePSROIPoolForward(
15 |     const at::Tensor data, const at::Tensor bbox, const at::Tensor trans,
16 |     at::Tensor out, at::Tensor top_count, const int batch, const int channels,
17 |     const int height, const int width, const int num_bbox,
18 |     const int channels_trans, const int no_trans, const float spatial_scale,
19 |     const int output_dim, const int group_size, const int pooled_size,
20 |     const int part_size, const int sample_per_part, const float trans_std);
21 | 
22 | void DeformablePSROIPoolBackwardAcc(
23 |     const at::Tensor out_grad, const at::Tensor data, const at::Tensor bbox,
24 |     const at::Tensor trans, const at::Tensor top_count, at::Tensor in_grad,
25 |     at::Tensor trans_grad, const int batch, const int channels,
26 |     const int height, const int width, const int num_bbox,
27 |     const int channels_trans, const int no_trans, const float spatial_scale,
28 |     const int output_dim, const int group_size, const int pooled_size,
29 |     const int part_size, const int sample_per_part, const float trans_std);
30 | 
31 | void deform_psroi_pooling_cuda_forward(
32 |     at::Tensor input, at::Tensor bbox, at::Tensor trans, at::Tensor out,
33 |     at::Tensor top_count, const int no_trans, const float spatial_scale,
34 |     const int output_dim, const int group_size, const int pooled_size,
35 |     const int part_size, const int sample_per_part, const float trans_std) 
36 | {
37 |   TORCH_CHECK(input.is_contiguous(), "input tensor has to be contiguous");
38 | 
39 |   const int batch = input.size(0);
40 |   const int channels = input.size(1);
41 |   const int height = input.size(2);
42 |   const int width = input.size(3);
43 |   const int channels_trans = no_trans ? 2 : trans.size(1);
44 | 
45 |   const int num_bbox = bbox.size(0);
46 |   if (num_bbox != out.size(0))
47 |     AT_ERROR("Output shape and bbox number wont match: (%d vs %d).",
48 |              out.size(0), num_bbox);
49 | 
50 |   DeformablePSROIPoolForward(
51 |       input, bbox, trans, out, top_count, batch, channels, height, width,
52 |       num_bbox, channels_trans, no_trans, spatial_scale, output_dim, group_size,
53 |       pooled_size, part_size, sample_per_part, trans_std);
54 | }
55 | 
56 | void deform_psroi_pooling_cuda_backward(
57 |     at::Tensor out_grad, at::Tensor input, at::Tensor bbox, at::Tensor trans,
58 |     at::Tensor top_count, at::Tensor input_grad, at::Tensor trans_grad,
59 |     const int no_trans, const float spatial_scale, const int output_dim,
60 |     const int group_size, const int pooled_size, const int part_size,
61 |     const int sample_per_part, const float trans_std) 
62 | {
63 |   TORCH_CHECK(out_grad.is_contiguous(), "out_grad tensor has to be contiguous");
64 |   TORCH_CHECK(input.is_contiguous(), "input tensor has to be contiguous");
65 | 
66 |   const int batch = input.size(0);
67 |   const int channels = input.size(1);
68 |   const int height = input.size(2);
69 |   const int width = input.size(3);
70 |   const int channels_trans = no_trans ? 2 : trans.size(1);
71 | 
72 |   const int num_bbox = bbox.size(0);
73 |   if (num_bbox != out_grad.size(0))
74 |     AT_ERROR("Output shape and bbox number wont match: (%d vs %d).",
75 |              out_grad.size(0), num_bbox);
76 | 
77 |   DeformablePSROIPoolBackwardAcc(
78 |       out_grad, input, bbox, trans, top_count, input_grad, trans_grad, batch,
79 |       channels, height, width, num_bbox, channels_trans, no_trans,
80 |       spatial_scale, output_dim, group_size, pooled_size, part_size,
81 |       sample_per_part, trans_std);
82 | }
83 | 


--------------------------------------------------------------------------------
/hetsgg/modeling/make_layers.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Miscellaneous utility functions
  3 | """
  4 | 
  5 | import torch
  6 | from torch import nn
  7 | from torch.nn import functional as F
  8 | from hetsgg.config import cfg
  9 | from hetsgg.layers import Conv2d
 10 | 
 11 | 
 12 | def get_group_gn(dim, dim_per_gp, num_groups):
 13 |     """get number of groups used by GroupNorm, based on number of channels."""
 14 |     assert dim_per_gp == -1 or num_groups == -1, \
 15 |         "GroupNorm: can only specify G or C/G."
 16 | 
 17 |     if dim_per_gp > 0:
 18 |         assert dim % dim_per_gp == 0, \
 19 |             "dim: {}, dim_per_gp: {}".format(dim, dim_per_gp)
 20 |         group_gn = dim // dim_per_gp
 21 |     else:
 22 |         assert dim % num_groups == 0, \
 23 |             "dim: {}, num_groups: {}".format(dim, num_groups)
 24 |         group_gn = num_groups
 25 | 
 26 |     return group_gn
 27 | 
 28 | 
 29 | def group_norm(out_channels, affine=True, divisor=1):
 30 |     out_channels = out_channels // divisor
 31 |     dim_per_gp = cfg.MODEL.GROUP_NORM.DIM_PER_GP // divisor
 32 |     num_groups = cfg.MODEL.GROUP_NORM.NUM_GROUPS // divisor
 33 |     eps = cfg.MODEL.GROUP_NORM.EPSILON # default: 1e-5
 34 |     return torch.nn.GroupNorm(
 35 |         get_group_gn(out_channels, dim_per_gp, num_groups),
 36 |         out_channels,
 37 |         eps,
 38 |         affine
 39 |     )
 40 | 
 41 | 
 42 | def make_conv3x3(
 43 |     in_channels,
 44 |     out_channels,
 45 |     dilation=1,
 46 |     stride=1,
 47 |     use_gn=False,
 48 |     use_relu=False,
 49 |     kaiming_init=True
 50 | ):
 51 |     conv = Conv2d(
 52 |         in_channels,
 53 |         out_channels,
 54 |         kernel_size=3,
 55 |         stride=stride,
 56 |         padding=dilation,
 57 |         dilation=dilation,
 58 |         bias=False if use_gn else True
 59 |     )
 60 |     if kaiming_init:
 61 |         nn.init.kaiming_normal_(
 62 |             conv.weight, mode="fan_out", nonlinearity="relu"
 63 |         )
 64 |     else:
 65 |         torch.nn.init.normal_(conv.weight, std=0.01)
 66 |     if not use_gn:
 67 |         nn.init.constant_(conv.bias, 0)
 68 |     module = [conv,]
 69 |     if use_gn:
 70 |         module.append(group_norm(out_channels))
 71 |     if use_relu:
 72 |         module.append(nn.ReLU(inplace=True))
 73 |     if len(module) > 1:
 74 |         return nn.Sequential(*module)
 75 |     return conv
 76 | 
 77 | 
 78 | def make_fc(dim_in, hidden_dim, use_gn=False):
 79 | 
 80 |     if use_gn:
 81 |         fc = nn.Linear(dim_in, hidden_dim, bias=False)
 82 |         nn.init.kaiming_uniform_(fc.weight, a=1)
 83 |         return nn.Sequential(fc, group_norm(hidden_dim))
 84 |     fc = nn.Linear(dim_in, hidden_dim)
 85 |     nn.init.kaiming_uniform_(fc.weight, a=1)
 86 |     nn.init.constant_(fc.bias, 0)
 87 |     return fc
 88 | 
 89 | 
 90 | def conv_with_kaiming_uniform(use_gn=False, use_relu=False):
 91 |     def make_conv(
 92 |         in_channels, out_channels, kernel_size, stride=1, dilation=1
 93 |     ):
 94 |         conv = Conv2d(
 95 |             in_channels,
 96 |             out_channels,
 97 |             kernel_size=kernel_size,
 98 |             stride=stride,
 99 |             padding=dilation * (kernel_size - 1) // 2,
100 |             dilation=dilation,
101 |             bias=False if use_gn else True
102 |         )
103 |         nn.init.kaiming_uniform_(conv.weight, a=1)
104 |         if not use_gn:
105 |             nn.init.constant_(conv.bias, 0)
106 |         module = [conv,]
107 |         if use_gn:
108 |             module.append(group_norm(out_channels))
109 |         if use_relu:
110 |             module.append(nn.ReLU(inplace=True))
111 |         if len(module) > 1:
112 |             return nn.Sequential(*module)
113 |         return conv
114 | 
115 |     return make_conv
116 | 


--------------------------------------------------------------------------------
/hetsgg/modeling/rpn/retinanet/loss.py:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | import torch
  4 | from torch.nn import functional as F
  5 | 
  6 | from ..utils import concat_box_prediction_layers
  7 | 
  8 | from hetsgg.layers import smooth_l1_loss
  9 | from hetsgg.layers import SigmoidFocalLoss
 10 | from hetsgg.modeling.matcher import Matcher
 11 | from hetsgg.modeling.utils import cat
 12 | from hetsgg.structures.boxlist_ops import boxlist_iou
 13 | from hetsgg.structures.boxlist_ops import cat_boxlist
 14 | from hetsgg.modeling.rpn.loss import RPNLossComputation
 15 | 
 16 | class RetinaNetLossComputation(RPNLossComputation):
 17 |     """
 18 |     This class computes the RetinaNet loss.
 19 |     """
 20 | 
 21 |     def __init__(self, proposal_matcher, box_coder,
 22 |                  generate_labels_func,
 23 |                  sigmoid_focal_loss,
 24 |                  bbox_reg_beta=0.11,
 25 |                  regress_norm=1.0):
 26 |         """
 27 |         Arguments:
 28 |             proposal_matcher (Matcher)
 29 |             box_coder (BoxCoder)
 30 |         """
 31 |         self.proposal_matcher = proposal_matcher
 32 |         self.box_coder = box_coder
 33 |         self.box_cls_loss_func = sigmoid_focal_loss
 34 |         self.bbox_reg_beta = bbox_reg_beta
 35 |         self.copied_fields = ['labels']
 36 |         self.generate_labels_func = generate_labels_func
 37 |         self.discard_cases = ['between_thresholds']
 38 |         self.regress_norm = regress_norm
 39 | 
 40 |     def __call__(self, anchors, box_cls, box_regression, targets):
 41 |         """
 42 |         Arguments:
 43 |             anchors (list[BoxList])
 44 |             box_cls (list[Tensor])
 45 |             box_regression (list[Tensor])
 46 |             targets (list[BoxList])
 47 | 
 48 |         Returns:
 49 |             retinanet_cls_loss (Tensor)
 50 |             retinanet_regression_loss (Tensor
 51 |         """
 52 |         anchors = [cat_boxlist(anchors_per_image) for anchors_per_image in anchors]
 53 |         labels, regression_targets = self.prepare_targets(anchors, targets)
 54 | 
 55 |         N = len(labels)
 56 |         box_cls, box_regression = \
 57 |                 concat_box_prediction_layers(box_cls, box_regression)
 58 | 
 59 |         labels = torch.cat(labels, dim=0)
 60 |         regression_targets = torch.cat(regression_targets, dim=0)
 61 |         pos_inds = torch.nonzero(labels > 0).squeeze(1)
 62 | 
 63 |         retinanet_regression_loss = smooth_l1_loss(
 64 |             box_regression[pos_inds],
 65 |             regression_targets[pos_inds],
 66 |             beta=self.bbox_reg_beta,
 67 |             size_average=False,
 68 |         ) / (max(1, pos_inds.numel() * self.regress_norm))
 69 | 
 70 |         labels = labels.int()
 71 | 
 72 |         retinanet_cls_loss = self.box_cls_loss_func(
 73 |             box_cls,
 74 |             labels
 75 |         ) / (pos_inds.numel() + N) # batch size + pos_num
 76 | 
 77 |         return retinanet_cls_loss, retinanet_regression_loss
 78 | 
 79 | 
 80 | def generate_retinanet_labels(matched_targets):
 81 |     labels_per_image = matched_targets.get_field("labels")
 82 |     return labels_per_image
 83 | 
 84 | 
 85 | def make_retinanet_loss_evaluator(cfg, box_coder):
 86 |     matcher = Matcher(
 87 |         cfg.MODEL.RETINANET.FG_IOU_THRESHOLD,
 88 |         cfg.MODEL.RETINANET.BG_IOU_THRESHOLD,
 89 |         allow_low_quality_matches=True,
 90 |     )
 91 |     sigmoid_focal_loss = SigmoidFocalLoss(
 92 |         cfg.MODEL.RETINANET.LOSS_GAMMA,
 93 |         cfg.MODEL.RETINANET.LOSS_ALPHA
 94 |     )
 95 | 
 96 |     loss_evaluator = RetinaNetLossComputation(
 97 |         matcher,
 98 |         box_coder,
 99 |         generate_retinanet_labels,
100 |         sigmoid_focal_loss,
101 |         bbox_reg_beta = cfg.MODEL.RETINANET.BBOX_REG_BETA,
102 |         regress_norm = cfg.MODEL.RETINANET.BBOX_REG_WEIGHT,
103 |     )
104 |     return loss_evaluator
105 | 


--------------------------------------------------------------------------------
/tools/detector_pretest_net.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import argparse
  3 | import os
  4 | 
  5 | import torch
  6 | 
  7 | from hetsgg.config import cfg
  8 | from hetsgg.data import make_data_loader
  9 | from hetsgg.engine.inference import inference
 10 | from hetsgg.modeling.detector import build_detection_model
 11 | from hetsgg.utils.checkpoint import DetectronCheckpointer
 12 | from hetsgg.utils.collect_env import collect_env_info
 13 | from hetsgg.utils.comm import synchronize, get_rank
 14 | from hetsgg.utils.logger import setup_logger
 15 | from hetsgg.utils.miscellaneous import mkdir
 16 | 
 17 | try:
 18 |     from apex import amp
 19 | except ImportError:
 20 |     raise ImportError('Use APEX for mixed precision via apex.amp')
 21 | 
 22 | 
 23 | def main():
 24 |     parser = argparse.ArgumentParser(description="PyTorch Object Detection Inference")
 25 |     parser.add_argument(
 26 |         "--config-file",
 27 |         default="/private/home/fmassa/github/detectron.pytorch_v2/configs/e2e_faster_rcnn_R_50_C4_1x_caffe2.yaml",
 28 |         metavar="FILE",
 29 |         help="path to config file",
 30 |     )
 31 |     parser.add_argument("--local_rank", type=int, default=0)
 32 |     parser.add_argument(
 33 |         "opts",
 34 |         help="Modify config options using the command-line",
 35 |         default=None,
 36 |         nargs=argparse.REMAINDER,
 37 |     )
 38 | 
 39 |     args = parser.parse_args()
 40 | 
 41 |     num_gpus = int(os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1
 42 |     distributed = num_gpus > 1
 43 | 
 44 |     if distributed:
 45 |         torch.cuda.set_device(args.local_rank)
 46 |         torch.distributed.init_process_group(
 47 |             backend="nccl", init_method="env://"
 48 |         )
 49 |         synchronize()
 50 | 
 51 |     cfg.merge_from_file(args.config_file)
 52 |     cfg.merge_from_list(args.opts)
 53 |     cfg.freeze()
 54 | 
 55 |     save_dir = ""
 56 |     logger = setup_logger("hetsgg", save_dir, get_rank())
 57 |     logger.info("Using {} GPUs".format(num_gpus))
 58 |     logger.info(cfg)
 59 | 
 60 | 
 61 |     model = build_detection_model(cfg)
 62 |     model.to(cfg.MODEL.DEVICE)
 63 | 
 64 |     output_dir = cfg.OUTPUT_DIR
 65 |     checkpointer = DetectronCheckpointer(cfg, model, save_dir=output_dir)
 66 |     _ = checkpointer.load(cfg.MODEL.WEIGHT)
 67 | 
 68 |     use_mixed_precision = cfg.DTYPE == 'float16'
 69 |     amp_handle = amp.init(enabled=use_mixed_precision, verbose=cfg.AMP_VERBOSE)
 70 | 
 71 | 
 72 |     iou_types = ("bbox",)
 73 |     if cfg.MODEL.MASK_ON:
 74 |         iou_types = iou_types + ("segm",)
 75 |     if cfg.MODEL.KEYPOINT_ON:
 76 |         iou_types = iou_types + ("keypoints",)
 77 |     if cfg.MODEL.RELATION_ON:
 78 |         iou_types = iou_types + ("relations", )
 79 |     if cfg.MODEL.ATTRIBUTE_ON:
 80 |         iou_types = iou_types + ("attributes", )
 81 |         
 82 |     output_folders = [None] * len(cfg.DATASETS.TEST)
 83 |     dataset_names = cfg.DATASETS.TEST
 84 |     if cfg.OUTPUT_DIR:
 85 |         for idx, dataset_name in enumerate(dataset_names):
 86 |             output_folder = os.path.join(cfg.OUTPUT_DIR, "inference", dataset_name)
 87 |             mkdir(output_folder)
 88 |             output_folders[idx] = output_folder
 89 |     data_loaders_val = make_data_loader(cfg, mode='test', is_distributed=distributed)
 90 |     for output_folder, dataset_name, data_loader_val in zip(output_folders, dataset_names, data_loaders_val):
 91 |         inference(
 92 |             cfg,
 93 |             model,
 94 |             data_loader_val,
 95 |             dataset_name=dataset_name,
 96 |             iou_types=iou_types,
 97 |             box_only=False if cfg.MODEL.RETINANET_ON else cfg.MODEL.RPN_ONLY,
 98 |             device=cfg.MODEL.DEVICE,
 99 |             expected_results=cfg.TEST.EXPECTED_RESULTS,
100 |             expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL,
101 |             output_folder=output_folder,
102 |         )
103 |         synchronize()
104 | 
105 | 
106 | if __name__ == "__main__":
107 |     main()
108 | 


--------------------------------------------------------------------------------
/hetsgg/data/transforms/transforms.py:
--------------------------------------------------------------------------------
  1 | import random
  2 | 
  3 | import torch
  4 | import torchvision
  5 | from torchvision.transforms import functional as F
  6 | 
  7 | 
  8 | class Compose(object):
  9 |     def __init__(self, transforms):
 10 |         self.transforms = transforms
 11 | 
 12 |     def __call__(self, image, target):
 13 |         for t in self.transforms:
 14 |             image, target = t(image, target)
 15 |         return image, target
 16 | 
 17 |     def __repr__(self):
 18 |         format_string = self.__class__.__name__ + "("
 19 |         for t in self.transforms:
 20 |             format_string += "\n"
 21 |             format_string += "    {0}".format(t)
 22 |         format_string += "\n)"
 23 |         return format_string
 24 | 
 25 | 
 26 | class Resize(object):
 27 |     def __init__(self, min_size, max_size):
 28 |         if not isinstance(min_size, (list, tuple)):
 29 |             min_size = (min_size,)
 30 |         self.min_size = min_size
 31 |         self.max_size = max_size
 32 | 
 33 |     # modified from torchvision to add support for max size
 34 |     def get_size(self, image_size):
 35 |         w, h = image_size
 36 |         size = random.choice(self.min_size)
 37 |         max_size = self.max_size
 38 |         if max_size is not None:
 39 |             min_original_size = float(min((w, h)))
 40 |             max_original_size = float(max((w, h)))
 41 |             if max_original_size / min_original_size * size > max_size:
 42 |                 size = int(round(max_size * min_original_size / max_original_size))
 43 | 
 44 |         if (w <= h and w == size) or (h <= w and h == size):
 45 |             return (h, w)
 46 | 
 47 |         if w < h:
 48 |             ow = size
 49 |             oh = int(size * h / w)
 50 |         else:
 51 |             oh = size
 52 |             ow = int(size * w / h)
 53 | 
 54 |         return (oh, ow)
 55 | 
 56 |     def __call__(self, image, target=None):
 57 |         size = self.get_size(image.size)
 58 |         image = F.resize(image, size)
 59 |         if target is None:
 60 |             return image
 61 |         target = target.resize(image.size)
 62 |         return image, target
 63 | 
 64 | 
 65 | class RandomHorizontalFlip(object):
 66 |     def __init__(self, prob=0.5):
 67 |         self.prob = prob
 68 | 
 69 |     def __call__(self, image, target):
 70 |         if random.random() < self.prob:
 71 |             image = F.hflip(image)
 72 |             target = target.transpose(0)
 73 |         return image, target
 74 | 
 75 | class RandomVerticalFlip(object):
 76 |     def __init__(self, prob=0.5):
 77 |         self.prob = prob
 78 | 
 79 |     def __call__(self, image, target):
 80 |         if random.random() < self.prob:
 81 |             image = F.vflip(image)
 82 |             target = target.transpose(1)
 83 |         return image, target
 84 | 
 85 | class ColorJitter(object):
 86 |     def __init__(self,
 87 |                  brightness=None,
 88 |                  contrast=None,
 89 |                  saturation=None,
 90 |                  hue=None,
 91 |                  ):
 92 |         self.color_jitter = torchvision.transforms.ColorJitter(
 93 |             brightness=brightness,
 94 |             contrast=contrast,
 95 |             saturation=saturation,
 96 |             hue=hue,)
 97 | 
 98 |     def __call__(self, image, target):
 99 |         image = self.color_jitter(image)
100 |         return image, target
101 | 
102 | 
103 | class ToTensor(object):
104 |     def __call__(self, image, target):
105 |         return F.to_tensor(image), target
106 | 
107 | 
108 | class Normalize(object):
109 |     def __init__(self, mean, std, to_bgr255=True):
110 |         self.mean = mean
111 |         self.std = std
112 |         self.to_bgr255 = to_bgr255
113 | 
114 |     def __call__(self, image, target=None):
115 |         if self.to_bgr255:
116 |             image = image[[2, 1, 0]] * 255
117 |         image = F.normalize(image, mean=self.mean, std=self.std)
118 |         if target is None:
119 |             return image
120 |         return image, target
121 | 


--------------------------------------------------------------------------------
/hetsgg/modeling/backbone/fpn.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn.functional as F
 3 | from torch import nn
 4 | 
 5 | 
 6 | class FPN(nn.Module):
 7 |     """
 8 |     Module that adds FPN on top of a list of feature maps.
 9 |     The feature maps are currently supposed to be in increasing depth
10 |     order, and must be consecutive
11 |     """
12 | 
13 |     def __init__(
14 |         self, in_channels_list, out_channels, conv_block, top_blocks=None
15 |     ):
16 |         """
17 |         Arguments:
18 |             in_channels_list (list[int]): number of channels for each feature map that
19 |                 will be fed
20 |             out_channels (int): number of channels of the FPN representation
21 |             top_blocks (nn.Module or None): if provided, an extra operation will
22 |                 be performed on the output of the last (smallest resolution)
23 |                 FPN output, and the result will extend the result list
24 |         """
25 |         super(FPN, self).__init__()
26 |         self.inner_blocks = []
27 |         self.layer_blocks = []
28 |         for idx, in_channels in enumerate(in_channels_list, 1):
29 |             inner_block = "fpn_inner{}".format(idx)
30 |             layer_block = "fpn_layer{}".format(idx)
31 | 
32 |             if in_channels == 0:
33 |                 continue
34 |             inner_block_module = conv_block(in_channels, out_channels, 1)
35 |             layer_block_module = conv_block(out_channels, out_channels, 3, 1)
36 |             self.add_module(inner_block, inner_block_module)
37 |             self.add_module(layer_block, layer_block_module)
38 |             self.inner_blocks.append(inner_block)
39 |             self.layer_blocks.append(layer_block)
40 |         self.top_blocks = top_blocks
41 | 
42 |     def forward(self, x):
43 |         """
44 |         Arguments:
45 |             x (list[Tensor]): feature maps for each feature level.
46 |         Returns:
47 |             results (tuple[Tensor]): feature maps after FPN layers.
48 |                 They are ordered from highest resolution first.
49 |         """
50 |         last_inner = getattr(self, self.inner_blocks[-1])(x[-1])
51 |         results = []
52 |         results.append(getattr(self, self.layer_blocks[-1])(last_inner))
53 |         for feature, inner_block, layer_block in zip(
54 |             x[:-1][::-1], self.inner_blocks[:-1][::-1], self.layer_blocks[:-1][::-1]
55 |         ):
56 |             if not inner_block:
57 |                 continue
58 |             inner_top_down = F.interpolate(last_inner, scale_factor=2, mode="nearest")
59 |             inner_lateral = getattr(self, inner_block)(feature)
60 | 
61 |             last_inner = inner_lateral + inner_top_down
62 |             results.insert(0, getattr(self, layer_block)(last_inner))
63 | 
64 |         if isinstance(self.top_blocks, LastLevelP6P7):
65 |             last_results = self.top_blocks(x[-1], results[-1])
66 |             results.extend(last_results)
67 |         elif isinstance(self.top_blocks, LastLevelMaxPool):
68 |             last_results = self.top_blocks(results[-1])
69 |             results.extend(last_results)
70 | 
71 |         return tuple(results)
72 | 
73 | 
74 | class LastLevelMaxPool(nn.Module):
75 |     def forward(self, x):
76 |         return [F.max_pool2d(x, 1, 2, 0)]
77 | 
78 | 
79 | class LastLevelP6P7(nn.Module):
80 |     """
81 |     This module is used in RetinaNet to generate extra layers, P6 and P7.
82 |     """
83 |     def __init__(self, in_channels, out_channels):
84 |         super(LastLevelP6P7, self).__init__()
85 |         self.p6 = nn.Conv2d(in_channels, out_channels, 3, 2, 1)
86 |         self.p7 = nn.Conv2d(out_channels, out_channels, 3, 2, 1)
87 |         for module in [self.p6, self.p7]:
88 |             nn.init.kaiming_uniform_(module.weight, a=1)
89 |             nn.init.constant_(module.bias, 0)
90 |         self.use_P5 = in_channels == out_channels
91 | 
92 |     def forward(self, c5, p5):
93 |         x = p5 if self.use_P5 else c5
94 |         p6 = self.p6(x)
95 |         p7 = self.p7(F.relu(p6))
96 |         return [p6, p7]
97 | 


--------------------------------------------------------------------------------
/hetsgg/data/datasets/coco.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torchvision
  3 | 
  4 | from hetsgg.structures.bounding_box import BoxList
  5 | from hetsgg.structures.segmentation_mask import SegmentationMask
  6 | from hetsgg.structures.keypoint import PersonKeypoints
  7 | 
  8 | 
  9 | min_keypoints_per_image = 10
 10 | 
 11 | 
 12 | def _count_visible_keypoints(anno):
 13 |     return sum(sum(1 for v in ann["keypoints"][2::3] if v > 0) for ann in anno)
 14 | 
 15 | 
 16 | def _has_only_empty_bbox(anno):
 17 |     return all(any(o <= 1 for o in obj["bbox"][2:]) for obj in anno)
 18 | 
 19 | 
 20 | def has_valid_annotation(anno):
 21 |     # if it's empty, there is no annotation
 22 |     if len(anno) == 0:
 23 |         return False
 24 |     # if all boxes have close to zero area, there is no annotation
 25 |     if _has_only_empty_bbox(anno):
 26 |         return False
 27 |     # keypoints task have a slight different critera for considering
 28 |     # if an annotation is valid
 29 |     if "keypoints" not in anno[0]:
 30 |         return True
 31 |     # for keypoint detection tasks, only consider valid images those
 32 |     # containing at least min_keypoints_per_image
 33 |     if _count_visible_keypoints(anno) >= min_keypoints_per_image:
 34 |         return True
 35 |     return False
 36 | 
 37 | 
 38 | class COCODataset(torchvision.datasets.coco.CocoDetection):
 39 |     def __init__(
 40 |         self, ann_file, root, remove_images_without_annotations, transforms=None
 41 |     ):
 42 |         super(COCODataset, self).__init__(root, ann_file)
 43 |         # sort indices for reproducible results
 44 |         self.ids = sorted(self.ids)
 45 | 
 46 |         # filter images without detection annotations
 47 |         if remove_images_without_annotations:
 48 |             ids = []
 49 |             for img_id in self.ids:
 50 |                 ann_ids = self.coco.getAnnIds(imgIds=img_id, iscrowd=None)
 51 |                 anno = self.coco.loadAnns(ann_ids)
 52 |                 if has_valid_annotation(anno):
 53 |                     ids.append(img_id)
 54 |             self.ids = ids
 55 | 
 56 |         self.categories = {cat['id']: cat['name'] for cat in self.coco.cats.values()}
 57 | 
 58 |         self.json_category_id_to_contiguous_id = {
 59 |             v: i + 1 for i, v in enumerate(self.coco.getCatIds())
 60 |         }
 61 |         self.contiguous_category_id_to_json_id = {
 62 |             v: k for k, v in self.json_category_id_to_contiguous_id.items()
 63 |         }
 64 |         self.id_to_img_map = {k: v for k, v in enumerate(self.ids)}
 65 |         self._transforms = transforms
 66 | 
 67 |     def __getitem__(self, idx):
 68 |         img, anno = super(COCODataset, self).__getitem__(idx)
 69 | 
 70 |         # filter crowd annotations
 71 |         # TODO might be better to add an extra field
 72 |         anno = [obj for obj in anno if obj["iscrowd"] == 0]
 73 | 
 74 |         boxes = [obj["bbox"] for obj in anno]
 75 |         boxes = torch.as_tensor(boxes).reshape(-1, 4)  # guard against no boxes
 76 |         target = BoxList(boxes, img.size, mode="xywh").convert("xyxy")
 77 | 
 78 |         classes = [obj["category_id"] for obj in anno]
 79 |         classes = [self.json_category_id_to_contiguous_id[c] for c in classes]
 80 |         classes = torch.tensor(classes)
 81 |         target.add_field("labels", classes)
 82 | 
 83 |         if anno and "segmentation" in anno[0]:
 84 |             masks = [obj["segmentation"] for obj in anno]
 85 |             masks = SegmentationMask(masks, img.size, mode='poly')
 86 |             target.add_field("masks", masks)
 87 | 
 88 |         if anno and "keypoints" in anno[0]:
 89 |             keypoints = [obj["keypoints"] for obj in anno]
 90 |             keypoints = PersonKeypoints(keypoints, img.size)
 91 |             target.add_field("keypoints", keypoints)
 92 | 
 93 |         target = target.clip_to_image(remove_empty=True)
 94 | 
 95 |         if self._transforms is not None:
 96 |             img, target = self._transforms(img, target)
 97 | 
 98 |         return img, target, idx
 99 | 
100 |     def get_img_info(self, index):
101 |         img_id = self.id_to_img_map[index]
102 |         img_data = self.coco.imgs[img_id]
103 |         return img_data
104 | 


--------------------------------------------------------------------------------
/hetsgg/utils/logger.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import os
  3 | import sys
  4 | import shutil
  5 | 
  6 | import ipdb
  7 | from tensorboardX import SummaryWriter
  8 | from termcolor import colored
  9 | 
 10 | from hetsgg.utils.comm import get_rank
 11 | from hetsgg.utils.metric_logger import SmoothedValue
 12 | 
 13 | DEBUG_PRINT_ON = True
 14 | 
 15 | 
 16 | TFBoardHandler_LEVEL = 4
 17 | 
 18 | 
 19 | 
 20 | class _ColorfulFormatter(logging.Formatter):
 21 |     def __init__(self, *args, **kwargs):
 22 |         self._root_name = kwargs.pop("root_name") + "."
 23 |         self._abbrev_name = kwargs.pop("abbrev_name", "")
 24 |         if len(self._abbrev_name):
 25 |             self._abbrev_name = self._abbrev_name + "."
 26 |         super(_ColorfulFormatter, self).__init__(*args, **kwargs)
 27 | 
 28 |     def formatMessage(self, record):
 29 |         record.name = record.name.replace(self._root_name, self._abbrev_name)
 30 |         log = super(_ColorfulFormatter, self).formatMessage(record)
 31 |         if record.levelno == logging.WARNING:
 32 |             prefix = colored("WARNING", "red", attrs=["blink"])
 33 |         elif record.levelno == logging.ERROR or record.levelno == logging.CRITICAL:
 34 |             prefix = colored("ERROR", "red", attrs=["blink", "underline"])
 35 |         else:
 36 |             return log
 37 |         return prefix + " " + log
 38 | 
 39 | 
 40 | 
 41 | def debug_print(logger, info):
 42 |     if DEBUG_PRINT_ON:
 43 |         logger.info('#'*20+' '+info+' '+'#'*20)
 44 | 
 45 | def setup_logger(name, save_dir, distributed_rank, filename="log.txt"):
 46 |     logger = logging.getLogger(name)
 47 | 
 48 |     for each in logger.handlers:
 49 |         logger.removeHandler(each)
 50 | 
 51 |     logger.setLevel(TFBoardHandler_LEVEL)
 52 |     if distributed_rank > 0:
 53 |         return logger
 54 | 
 55 |     ch = logging.StreamHandler(stream=sys.stdout)
 56 |     ch.setLevel(logging.DEBUG)
 57 |     formatter =  _ColorfulFormatter(
 58 |                 colored("[%(asctime)s %(name)s]: ", "green") + "%(message)s",
 59 |                 datefmt="%m/%d %H:%M:%S",
 60 |                 root_name=name,
 61 |             )
 62 |     ch.setFormatter(formatter)
 63 |     logger.addHandler(ch)
 64 | 
 65 | 
 66 |     if save_dir:
 67 | 
 68 |         tf = TFBoardHandler(TFBoardWriter(save_dir))
 69 |         tf.setLevel(TFBoardHandler_LEVEL)
 70 |         logger.addHandler(tf)
 71 | 
 72 |         fh = logging.FileHandler(os.path.join(save_dir, filename))
 73 |         fh.setLevel(logging.DEBUG)
 74 |         fh.setFormatter(formatter)
 75 |         logger.addHandler(fh)
 76 | 
 77 |     return logger
 78 | 
 79 | 
 80 | 
 81 | class TFBoardWriter:
 82 |     def __init__(self, log_dir):
 83 |         if log_dir and get_rank() == 0:
 84 |             tfbd_dir = os.path.join(log_dir, 'tfboard')
 85 |             if os.path.exists(tfbd_dir):
 86 |                 shutil.rmtree(tfbd_dir)
 87 |             os.makedirs(tfbd_dir)
 88 | 
 89 |             self.tf_writer = SummaryWriter(log_dir=tfbd_dir, flush_secs=10)
 90 |             self.enable = True
 91 |         else:
 92 |             self.enable = False
 93 |             self.tf_writer = None
 94 | 
 95 |     def write_data(self, meter, iter):
 96 |         if isinstance(iter, str):
 97 |             model = meter[0]
 98 |             input = meter[1]
 99 | 
100 |             self.tf_writer.add_graph(model, input)
101 |         else:
102 |             for each in meter.keys():
103 |                 val = meter[each]
104 |                 if isinstance(val, SmoothedValue):
105 |                     val = val.avg
106 |                 self.tf_writer.add_scalar(each, val, iter)
107 | 
108 |     def close(self):
109 |         if self.tf_writer is not None:
110 |             self.tf_writer.close()
111 | 
112 | 
113 | class TFBoardHandler(logging.Handler):
114 |     def __init__(self, writer):
115 |         logging.Handler.__init__(self, TFBoardHandler_LEVEL)
116 |         self.tf_writer = writer
117 | 
118 |     def emit(self, record):
119 |         if record.levelno <= TFBoardHandler_LEVEL:
120 |             self.tf_writer.write_data(record.msg[0], record.msg[1])
121 |         return
122 | 
123 |     def close(self):
124 |         self.tf_writer.close()


--------------------------------------------------------------------------------
/tools/relation_test_net.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import os
  3 | import torch
  4 | from hetsgg.config import cfg
  5 | from hetsgg.data import make_data_loader
  6 | from hetsgg.engine.inference import inference
  7 | from hetsgg.modeling.detector import build_detection_model
  8 | from hetsgg.utils.checkpoint import DetectronCheckpointer
  9 | from hetsgg.utils.collect_env import collect_env_info
 10 | from hetsgg.utils.comm import synchronize, get_rank
 11 | from hetsgg.utils.logger import setup_logger
 12 | from hetsgg.utils.miscellaneous import mkdir
 13 | 
 14 | try:
 15 |     from apex import amp
 16 | except ImportError:
 17 |     raise ImportError('Use APEX for mixed precision via apex.amp')
 18 | 
 19 | torch.set_num_threads(2)
 20 | 
 21 | def main():
 22 |     placeholder = None
 23 |     parser = argparse.ArgumentParser(description="PyTorch Object Detection Inference")
 24 |     parser.add_argument(
 25 |         "--config-file",
 26 |         default="checkpoints/config.yml",
 27 |         metavar="FILE",
 28 |         help="path to config file",
 29 |     )
 30 |     parser.add_argument("--local_rank", type=int, default=0)
 31 |     parser.add_argument(
 32 |         "opts",
 33 |         help="Modify config options using the command-line",
 34 |         default=None,
 35 |         nargs=argparse.REMAINDER,
 36 |     )
 37 | 
 38 |     args = parser.parse_args()
 39 | 
 40 |     num_gpus = int(os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1
 41 |     distributed = num_gpus > 1
 42 | 
 43 |     if distributed:
 44 |         torch.cuda.set_device(args.local_rank)
 45 |         torch.distributed.init_process_group(
 46 |             backend="nccl", init_method="env://"
 47 |         )
 48 |         synchronize()
 49 |     cfg.set_new_allowed(True)
 50 |     cfg.merge_from_file(args.config_file)
 51 |     cfg.merge_from_list(args.opts)
 52 |     cfg.set_new_allowed(True)
 53 | 
 54 | 
 55 |     save_dir = ""
 56 |     logger = setup_logger("hetsgg", save_dir, get_rank())
 57 |     logger.info("Using {} GPUs".format(num_gpus))
 58 | 
 59 |     logger.info("Collecting env info (might take some time)")
 60 | 
 61 |     model = build_detection_model(cfg)
 62 |     model.to(cfg.MODEL.DEVICE)
 63 | 
 64 | 
 65 |     use_mixed_precision = cfg.DTYPE == 'float16'
 66 |     amp_handle = amp.init(enabled=use_mixed_precision, verbose=cfg.AMP_VERBOSE)
 67 | 
 68 |     output_dir = cfg.OUTPUT_DIR
 69 |     checkpointer = DetectronCheckpointer(cfg, model, save_dir=output_dir)
 70 |     _ = checkpointer.load(cfg.MODEL.WEIGHT)
 71 | 
 72 | 
 73 |     if placeholder is not None:
 74 |         placeholder = placeholder.to("cpu")
 75 |         del placeholder
 76 |         torch.cuda.synchronize(get_rank())
 77 |         synchronize()
 78 | 
 79 |     iou_types = ("bbox",)
 80 |     if cfg.MODEL.MASK_ON:
 81 |         iou_types = iou_types + ("segm",)
 82 |     if cfg.MODEL.KEYPOINT_ON:
 83 |         iou_types = iou_types + ("keypoints",)
 84 |     if cfg.MODEL.RELATION_ON:
 85 |         iou_types = iou_types + ("relations", )
 86 |     if cfg.MODEL.ATTRIBUTE_ON:
 87 |         iou_types = iou_types + ("attributes", )
 88 |     output_folders = [None] * len(cfg.DATASETS.TEST)
 89 |     dataset_names = cfg.DATASETS.TEST
 90 | 
 91 |     if cfg.OUTPUT_DIR:
 92 |         for idx, dataset_name in enumerate(dataset_names):
 93 |             output_folder = os.path.join(cfg.OUTPUT_DIR, f"inference_refine_{cfg.MODEL.ROI_RELATION_HEAD.OBJECT_CLASSIFICATION_REFINE}", dataset_name)
 94 |             mkdir(output_folder)
 95 |             output_folders[idx] = output_folder
 96 |     data_loaders_val = make_data_loader(cfg, mode="test", is_distributed=distributed)
 97 |     for output_folder, dataset_name, data_loader_val in zip(output_folders, dataset_names, data_loaders_val):
 98 |         inference(
 99 |             cfg,
100 |             model,
101 |             data_loader_val,
102 |             dataset_name=dataset_name,
103 |             iou_types=iou_types,
104 |             box_only=False if cfg.MODEL.RETINANET_ON else cfg.MODEL.RPN_ONLY,
105 |             device=cfg.MODEL.DEVICE,
106 |             expected_results=cfg.TEST.EXPECTED_RESULTS,
107 |             expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL,
108 |             output_folder=output_folder,
109 |         )
110 |         synchronize()
111 | 
112 | 
113 | if __name__ == "__main__":
114 |     main()
115 | 


--------------------------------------------------------------------------------
/hetsgg/modeling/roi_heads/attribute_head/loss.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torch.nn import functional as F
  3 | 
  4 | from hetsgg.layers import smooth_l1_loss
  5 | from hetsgg.modeling.box_coder import BoxCoder
  6 | from hetsgg.modeling.matcher import Matcher
  7 | from hetsgg.structures.boxlist_ops import boxlist_iou
  8 | from hetsgg.modeling.utils import cat
  9 | 
 10 | 
 11 | class AttributeHeadLossComputation(object):
 12 |     """
 13 |     Computes the loss for attribute head
 14 |     """
 15 | 
 16 |     def __init__(
 17 |         self,
 18 |         loss_weight=0.1,
 19 |         num_attri_cat=201,
 20 |         max_num_attri=10,
 21 |         attribute_sampling=True,
 22 |         attribute_bgfg_ratio=5,
 23 |         use_binary_loss=True,
 24 |         pos_weight=1,
 25 |     ):
 26 |         self.loss_weight = loss_weight
 27 |         self.num_attri_cat = num_attri_cat
 28 |         self.max_num_attri = max_num_attri
 29 |         self.attribute_sampling = attribute_sampling
 30 |         self.attribute_bgfg_ratio = attribute_bgfg_ratio
 31 |         self.use_binary_loss = use_binary_loss
 32 |         self.pos_weight = pos_weight
 33 | 
 34 |     def __call__(self, proposals, attri_logits):
 35 |         """
 36 |         Calculcate attribute loss
 37 |         """
 38 |         attributes = cat([proposal.get_field("attributes") for proposal in proposals], dim=0)
 39 |         assert attributes.shape[0] == attri_logits.shape[0]
 40 | 
 41 |         # generate attribute targets
 42 |         attribute_targets, selected_idxs = self.generate_attributes_target(attributes)
 43 | 
 44 |         attri_logits = attri_logits[selected_idxs]
 45 |         attribute_targets = attribute_targets[selected_idxs]
 46 | 
 47 |         attribute_loss = self.attribute_loss(attri_logits, attribute_targets)
 48 | 
 49 |         return attribute_loss * self.loss_weight
 50 | 
 51 |     
 52 |     def generate_attributes_target(self, attributes):
 53 |         """
 54 |         from list of attribute indexs to [1,0,1,0,0,1] form
 55 |         """
 56 |         assert self.max_num_attri == attributes.shape[1]
 57 |         num_obj = attributes.shape[0]
 58 | 
 59 |         with_attri_idx = (attributes.sum(-1) > 0).long()
 60 |         without_attri_idx = 1 - with_attri_idx
 61 |         num_pos = int(with_attri_idx.sum())
 62 |         num_neg = int(without_attri_idx.sum())
 63 |         assert num_pos + num_neg == num_obj
 64 |         
 65 |         if self.attribute_sampling:
 66 |             num_neg = min(num_neg, num_pos * self.attribute_bgfg_ratio) if num_pos > 0 else 1
 67 | 
 68 |         attribute_targets = torch.zeros((num_obj, self.num_attri_cat), device=attributes.device).float()
 69 |         if not self.use_binary_loss:
 70 |             attribute_targets[without_attri_idx > 0, 0] = 1.0
 71 | 
 72 |         pos_idxs = torch.nonzero(with_attri_idx).squeeze(1)
 73 |         perm = torch.randperm(num_obj - num_pos, device=attributes.device)[:num_neg]
 74 |         neg_idxs = torch.nonzero(without_attri_idx).squeeze(1)[perm]
 75 |         selected_idxs = torch.cat((pos_idxs, neg_idxs), dim=0)
 76 |         assert selected_idxs.shape[0] == num_neg + num_pos
 77 | 
 78 |         for idx in torch.nonzero(with_attri_idx).squeeze(1).tolist():
 79 |             for k in range(self.max_num_attri):
 80 |                 att_id = int(attributes[idx, k])
 81 |                 if att_id == 0:
 82 |                     break
 83 |                 else:
 84 |                     attribute_targets[idx, att_id] = 1
 85 | 
 86 |         return attribute_targets, selected_idxs
 87 | 
 88 |     def attribute_loss(self, logits, labels):
 89 |         if self.use_binary_loss:
 90 |             all_loss = F.binary_cross_entropy_with_logits(logits, labels, pos_weight=torch.FloatTensor([self.pos_weight] * self.num_attri_cat).cuda())
 91 |             return all_loss 
 92 |         else:
 93 |             all_loss = -F.softmax(logits, dim=-1).log()
 94 |             all_loss = (all_loss * labels).sum(-1) / labels.sum(-1)
 95 |             return all_loss.mean()
 96 | 
 97 | 
 98 | def make_roi_attribute_loss_evaluator(cfg):
 99 |     loss_evaluator = AttributeHeadLossComputation(
100 |         cfg.MODEL.ROI_ATTRIBUTE_HEAD.ATTRIBUTE_LOSS_WEIGHT,
101 |         cfg.MODEL.ROI_ATTRIBUTE_HEAD.NUM_ATTRIBUTES,
102 |         cfg.MODEL.ROI_ATTRIBUTE_HEAD.MAX_ATTRIBUTES,
103 |         cfg.MODEL.ROI_ATTRIBUTE_HEAD.ATTRIBUTE_BGFG_SAMPLE,
104 |         cfg.MODEL.ROI_ATTRIBUTE_HEAD.ATTRIBUTE_BGFG_RATIO,
105 |         cfg.MODEL.ROI_ATTRIBUTE_HEAD.USE_BINARY_LOSS,
106 |         cfg.MODEL.ROI_ATTRIBUTE_HEAD.POS_WEIGHT,
107 |     )
108 | 
109 |     return loss_evaluator
110 | 


--------------------------------------------------------------------------------
/hetsgg/modeling/roi_heads/keypoint_head/inference.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torch import nn
  3 | 
  4 | 
  5 | class KeypointPostProcessor(nn.Module):
  6 |     def __init__(self, keypointer=None):
  7 |         super(KeypointPostProcessor, self).__init__()
  8 |         self.keypointer = keypointer
  9 | 
 10 |     def forward(self, x, boxes):
 11 |         mask_prob = x
 12 | 
 13 |         scores = None
 14 |         if self.keypointer:
 15 |             mask_prob, scores = self.keypointer(x, boxes)
 16 | 
 17 |         assert len(boxes) == 1, "Only non-batched inference supported for now"
 18 |         boxes_per_image = [box.bbox.size(0) for box in boxes]
 19 |         mask_prob = mask_prob.split(boxes_per_image, dim=0)
 20 |         scores = scores.split(boxes_per_image, dim=0)
 21 | 
 22 |         results = []
 23 |         for prob, box, score in zip(mask_prob, boxes, scores):
 24 |             bbox = BoxList(box.bbox, box.size, mode="xyxy")
 25 |             for field in box.fields():
 26 |                 bbox.add_field(field, box.get_field(field))
 27 |             prob = PersonKeypoints(prob, box.size)
 28 |             prob.add_field("logits", score)
 29 |             bbox.add_field("keypoints", prob)
 30 |             results.append(bbox)
 31 | 
 32 |         return results
 33 | 
 34 | 
 35 | import numpy as np
 36 | import cv2
 37 | 
 38 | 
 39 | def heatmaps_to_keypoints(maps, rois):
 40 |     """Extract predicted keypoint locations from heatmaps. Output has shape
 41 |     (#rois, 4, #keypoints) with the 4 rows corresponding to (x, y, logit, prob)
 42 |     for each keypoint.
 43 |     """
 44 | 
 45 |     offset_x = rois[:, 0]
 46 |     offset_y = rois[:, 1]
 47 | 
 48 |     widths = rois[:, 2] - rois[:, 0]
 49 |     heights = rois[:, 3] - rois[:, 1]
 50 |     widths = np.maximum(widths, 1)
 51 |     heights = np.maximum(heights, 1)
 52 |     widths_ceil = np.ceil(widths)
 53 |     heights_ceil = np.ceil(heights)
 54 | 
 55 |     maps = np.transpose(maps, [0, 2, 3, 1])
 56 |     min_size = 0  # cfg.KRCNN.INFERENCE_MIN_SIZE
 57 |     num_keypoints = maps.shape[3]
 58 |     xy_preds = np.zeros((len(rois), 3, num_keypoints), dtype=np.float32)
 59 |     end_scores = np.zeros((len(rois), num_keypoints), dtype=np.float32)
 60 |     for i in range(len(rois)):
 61 |         if min_size > 0:
 62 |             roi_map_width = int(np.maximum(widths_ceil[i], min_size))
 63 |             roi_map_height = int(np.maximum(heights_ceil[i], min_size))
 64 |         else:
 65 |             roi_map_width = widths_ceil[i]
 66 |             roi_map_height = heights_ceil[i]
 67 |         width_correction = widths[i] / roi_map_width
 68 |         height_correction = heights[i] / roi_map_height
 69 |         roi_map = cv2.resize(
 70 |             maps[i], (roi_map_width, roi_map_height), interpolation=cv2.INTER_CUBIC
 71 |         )
 72 |         # Bring back to CHW
 73 |         roi_map = np.transpose(roi_map, [2, 0, 1])
 74 |         # roi_map_probs = scores_to_probs(roi_map.copy())
 75 |         w = roi_map.shape[2]
 76 |         pos = roi_map.reshape(num_keypoints, -1).argmax(axis=1)
 77 |         x_int = pos % w
 78 |         y_int = (pos - x_int) // w
 79 |         # assert (roi_map_probs[k, y_int, x_int] ==
 80 |         #         roi_map_probs[k, :, :].max())
 81 |         x = (x_int + 0.5) * width_correction
 82 |         y = (y_int + 0.5) * height_correction
 83 |         xy_preds[i, 0, :] = x + offset_x[i]
 84 |         xy_preds[i, 1, :] = y + offset_y[i]
 85 |         xy_preds[i, 2, :] = 1
 86 |         end_scores[i, :] = roi_map[np.arange(num_keypoints), y_int, x_int]
 87 | 
 88 |     return np.transpose(xy_preds, [0, 2, 1]), end_scores
 89 | 
 90 | 
 91 | from hetsgg.structures.bounding_box import BoxList
 92 | from hetsgg.structures.keypoint import PersonKeypoints
 93 | 
 94 | 
 95 | class Keypointer(object):
 96 |     """
 97 |     Projects a set of masks in an image on the locations
 98 |     specified by the bounding boxes
 99 |     """
100 | 
101 |     def __init__(self, padding=0):
102 |         self.padding = padding
103 | 
104 |     def __call__(self, masks, boxes):
105 |         # TODO do this properly
106 |         if isinstance(boxes, BoxList):
107 |             boxes = [boxes]
108 |         assert len(boxes) == 1
109 | 
110 |         result, scores = heatmaps_to_keypoints(
111 |             masks.detach().cpu().numpy(), boxes[0].bbox.cpu().numpy()
112 |         )
113 |         return torch.from_numpy(result).to(masks.device), torch.as_tensor(scores, device=masks.device)
114 | 
115 | 
116 | def make_roi_keypoint_post_processor(cfg):
117 |     keypointer = Keypointer()
118 |     keypoint_post_processor = KeypointPostProcessor(keypointer)
119 |     return keypoint_post_processor
120 | 


--------------------------------------------------------------------------------
/hetsgg/data/datasets/voc.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | 
  3 | import torch
  4 | import torch.utils.data
  5 | from PIL import Image
  6 | import sys
  7 | 
  8 | if sys.version_info[0] == 2:
  9 |     import xml.etree.cElementTree as ET
 10 | else:
 11 |     import xml.etree.ElementTree as ET
 12 | 
 13 | 
 14 | from hetsgg.structures.bounding_box import BoxList
 15 | 
 16 | 
 17 | class PascalVOCDataset(torch.utils.data.Dataset):
 18 | 
 19 |     CLASSES = (
 20 |         "__background__ ",
 21 |         "aeroplane",
 22 |         "bicycle",
 23 |         "bird",
 24 |         "boat",
 25 |         "bottle",
 26 |         "bus",
 27 |         "car",
 28 |         "cat",
 29 |         "chair",
 30 |         "cow",
 31 |         "diningtable",
 32 |         "dog",
 33 |         "horse",
 34 |         "motorbike",
 35 |         "person",
 36 |         "pottedplant",
 37 |         "sheep",
 38 |         "sofa",
 39 |         "train",
 40 |         "tvmonitor",
 41 |     )
 42 | 
 43 |     def __init__(self, data_dir, split, use_difficult=False, transforms=None):
 44 |         self.root = data_dir
 45 |         self.image_set = split
 46 |         self.keep_difficult = use_difficult
 47 |         self.transforms = transforms
 48 | 
 49 |         self._annopath = os.path.join(self.root, "Annotations", "%s.xml")
 50 |         self._imgpath = os.path.join(self.root, "JPEGImages", "%s.jpg")
 51 |         self._imgsetpath = os.path.join(self.root, "ImageSets", "Main", "%s.txt")
 52 | 
 53 |         with open(self._imgsetpath % self.image_set) as f:
 54 |             self.ids = f.readlines()
 55 |         self.ids = [x.strip("\n") for x in self.ids]
 56 |         self.id_to_img_map = {k: v for k, v in enumerate(self.ids)}
 57 | 
 58 |         cls = PascalVOCDataset.CLASSES
 59 |         self.class_to_ind = dict(zip(cls, range(len(cls))))
 60 |         self.categories = dict(zip(range(len(cls)), cls))
 61 | 
 62 |     def __getitem__(self, index):
 63 |         img_id = self.ids[index]
 64 |         img = Image.open(self._imgpath % img_id).convert("RGB")
 65 | 
 66 |         target = self.get_groundtruth(index)
 67 |         target = target.clip_to_image(remove_empty=True)
 68 | 
 69 |         if self.transforms is not None:
 70 |             img, target = self.transforms(img, target)
 71 | 
 72 |         return img, target, index
 73 | 
 74 |     def __len__(self):
 75 |         return len(self.ids)
 76 | 
 77 |     def get_groundtruth(self, index):
 78 |         img_id = self.ids[index]
 79 |         anno = ET.parse(self._annopath % img_id).getroot()
 80 |         anno = self._preprocess_annotation(anno)
 81 | 
 82 |         height, width = anno["im_info"]
 83 |         target = BoxList(anno["boxes"], (width, height), mode="xyxy")
 84 |         target.add_field("labels", anno["labels"])
 85 |         target.add_field("difficult", anno["difficult"])
 86 |         return target
 87 | 
 88 |     def _preprocess_annotation(self, target):
 89 |         boxes = []
 90 |         gt_classes = []
 91 |         difficult_boxes = []
 92 |         TO_REMOVE = 1
 93 | 
 94 |         for obj in target.iter("object"):
 95 |             difficult = int(obj.find("difficult").text) == 1
 96 |             if not self.keep_difficult and difficult:
 97 |                 continue
 98 |             name = obj.find("name").text.lower().strip()
 99 |             bb = obj.find("bndbox")
100 |             box = [
101 |                 bb.find("xmin").text,
102 |                 bb.find("ymin").text,
103 |                 bb.find("xmax").text,
104 |                 bb.find("ymax").text,
105 |             ]
106 |             bndbox = tuple(
107 |                 map(lambda x: x - TO_REMOVE, list(map(int, box)))
108 |             )
109 | 
110 |             boxes.append(bndbox)
111 |             gt_classes.append(self.class_to_ind[name])
112 |             difficult_boxes.append(difficult)
113 | 
114 |         size = target.find("size")
115 |         im_info = tuple(map(int, (size.find("height").text, size.find("width").text)))
116 | 
117 |         res = {
118 |             "boxes": torch.tensor(boxes, dtype=torch.float32),
119 |             "labels": torch.tensor(gt_classes),
120 |             "difficult": torch.tensor(difficult_boxes),
121 |             "im_info": im_info,
122 |         }
123 |         return res
124 | 
125 |     def get_img_info(self, index):
126 |         img_id = self.ids[index]
127 |         anno = ET.parse(self._annopath % img_id).getroot()
128 |         size = anno.find("size")
129 |         im_info = tuple(map(int, (size.find("height").text, size.find("width").text)))
130 |         return {"height": im_info[0], "width": im_info[1]}
131 | 
132 |     def map_class_id_to_class_name(self, class_id):
133 |         return PascalVOCDataset.CLASSES[class_id]
134 | 


--------------------------------------------------------------------------------
/Datasets/OI-V4/Category_Type_Info.json:
--------------------------------------------------------------------------------
1 | {"class_to_category": {"Piano": "product", "Boy": "human", "Tennis ball": "product", "Van": "product", "Football": "product", "Beer": "product", "Camera": "product", "Suitcase": "product", "Man": "human", "Bench": "product", "Dolphin": "animal", "Motorcycle": "product", "Mug": "product", "Tennis racket": "product", "Drum": "product", "Spoon": "product", "Horse": "animal", "Surfboard": "product", "Bicycle": "product", "Knife": "product", "Rugby ball": "product", "Woman": "human", "Handbag": "product", "Microwave oven": "product", "Flute": "product", "Girl": "human", "Taxi": "product", "Hamster": "animal", "Wine glass": "product", "Backpack": "product", "Racket": "product", "Table": "product", "Pretzel": "product", "Bed": "product", "Snowboard": "product", "Car": "product", "Chair": "product", "Microphone": "product", "Coffee cup": "product", "Table tennis racket": "product", "Bottle": "product", "Guitar": "product", "Desk": "product", "Ski": "product", "Coffee table": "product", "Dog": "animal", "Cat": "animal", "Chopsticks": "product", "Elephant": "animal", "Mobile phone": "product", "Monkey": "animal", "Snake": "animal", "Sofa bed": "product", "Violin": "product", "Fork": "product", "Oven": "product", "Briefcase": "product"}, "category_idx": {"product": 0, "animal": 1, "human": 2}, "label_to_catidx": {"Piano": 0, "Boy": 2, "Tennis ball": 0, "Van": 0, "Football": 0, "Beer": 0, "Camera": 0, "Suitcase": 0, "Man": 2, "Bench": 0, "Dolphin": 1, "Motorcycle": 0, "Mug": 0, "Tennis racket": 0, "Drum": 0, "Spoon": 0, "Horse": 1, "Surfboard": 0, "Bicycle": 0, "Knife": 0, "Rugby ball": 0, "Woman": 2, "Handbag": 0, "Microwave oven": 0, "Flute": 0, "Girl": 2, "Taxi": 0, "Hamster": 1, "Wine glass": 0, "Backpack": 0, "Racket": 0, "Table": 0, "Pretzel": 0, "Bed": 0, "Snowboard": 0, "Car": 0, "Chair": 0, "Microphone": 0, "Coffee cup": 0, "Table tennis racket": 0, "Bottle": 0, "Guitar": 0, "Desk": 0, "Ski": 0, "Coffee table": 0, "Dog": 1, "Cat": 1, "Chopsticks": 0, "Elephant": 1, "Mobile phone": 0, "Monkey": 1, "Snake": 1, "Sofa bed": 0, "Violin": 0, "Fork": 0, "Oven": 0, "Briefcase": 0, "__background__": 0}, "labelidx_to_catidx": {"1": 0, "2": 2, "3": 0, "4": 0, "5": 0, "6": 0, "7": 0, "8": 0, "9": 2, "10": 0, "11": 1, "12": 0, "13": 0, "14": 0, "15": 0, "16": 0, "17": 1, "18": 0, "19": 0, "20": 0, "21": 0, "22": 2, "23": 0, "24": 0, "25": 0, "26": 2, "27": 0, "28": 1, "29": 0, "30": 0, "31": 0, "32": 0, "33": 0, "34": 0, "35": 0, "36": 0, "37": 0, "38": 0, "39": 0, "40": 0, "41": 0, "42": 0, "43": 0, "44": 0, "45": 0, "46": 1, "47": 1, "48": 0, "49": 1, "50": 0, "51": 1, "52": 1, "53": 0, "54": 0, "55": 0, "56": 0, "57": 0, "0": 0}, "idx_to_label": {"1": "Piano", "2": "Boy", "3": "Tennis ball", "4": "Van", "5": "Football", "6": "Beer", "7": "Camera", "8": "Suitcase", "9": "Man", "10": "Bench", "11": "Dolphin", "12": "Motorcycle", "13": "Mug", "14": "Tennis racket", "15": "Drum", "16": "Spoon", "17": "Horse", "18": "Surfboard", "19": "Bicycle", "20": "Knife", "21": "Rugby ball", "22": "Woman", "23": "Handbag", "24": "Microwave oven", "25": "Flute", "26": "Girl", "27": "Taxi", "28": "Hamster", "29": "Wine glass", "30": "Backpack", "31": "Racket", "32": "Table", "33": "Pretzel", "34": "Bed", "35": "Snowboard", "36": "Car", "37": "Chair", "38": "Microphone", "39": "Coffee cup", "40": "Table tennis racket", "41": "Bottle", "42": "Guitar", "43": "Desk", "44": "Ski", "45": "Coffee table", "46": "Dog", "47": "Cat", "48": "Chopsticks", "49": "Elephant", "50": "Mobile phone", "51": "Monkey", "52": "Snake", "53": "Sofa bed", "54": "Violin", "55": "Fork", "56": "Oven", "57": "Briefcase", "0": "__background__"}, "label_to_idx": {"Piano": 1, "Boy": 2, "Tennis ball": 3, "Van": 4, "Football": 5, "Beer": 6, "Camera": 7, "Suitcase": 8, "Man": 9, "Bench": 10, "Dolphin": 11, "Motorcycle": 12, "Mug": 13, "Tennis racket": 14, "Drum": 15, "Spoon": 16, "Horse": 17, "Surfboard": 18, "Bicycle": 19, "Knife": 20, "Rugby ball": 21, "Woman": 22, "Handbag": 23, "Microwave oven": 24, "Flute": 25, "Girl": 26, "Taxi": 27, "Hamster": 28, "Wine glass": 29, "Backpack": 30, "Racket": 31, "Table": 32, "Pretzel": 33, "Bed": 34, "Snowboard": 35, "Car": 36, "Chair": 37, "Microphone": 38, "Coffee cup": 39, "Table tennis racket": 40, "Bottle": 41, "Guitar": 42, "Desk": 43, "Ski": 44, "Coffee table": 45, "Dog": 46, "Cat": 47, "Chopsticks": 48, "Elephant": 49, "Mobile phone": 50, "Monkey": 51, "Snake": 52, "Sofa bed": 53, "Violin": 54, "Fork": 55, "Oven": 56, "Briefcase": 57, "__background__": 0}, "catidx_labelgroup": {"0": [1, 3, 4, 5, 6, 7, 8, 10, 12, 13, 14, 15, 16, 18, 19, 20, 21, 23, 24, 25, 27, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 48, 50, 53, 54, 55, 56, 57, 0], "1": [11, 17, 28, 46, 47, 49, 51, 52], "2": [2, 9, 22, 26]}}


--------------------------------------------------------------------------------
/hetsgg/engine/bbox_aug.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torchvision.transforms as TT
  3 | 
  4 | from hetsgg.config import cfg
  5 | from hetsgg.data import transforms as T
  6 | from hetsgg.structures.image_list import to_image_list
  7 | from hetsgg.structures.bounding_box import BoxList
  8 | from hetsgg.modeling.roi_heads.box_head.inference import make_roi_box_post_processor
  9 | 
 10 | 
 11 | def im_detect_bbox_aug(model, images, device):
 12 |     # Collect detections computed under different transformations
 13 |     boxlists_ts = []
 14 |     for _ in range(len(images)):
 15 |         boxlists_ts.append([])
 16 | 
 17 |     def add_preds_t(boxlists_t):
 18 |         for i, boxlist_t in enumerate(boxlists_t):
 19 |             if len(boxlists_ts[i]) == 0:
 20 |                 # The first one is identity transform, no need to resize the boxlist
 21 |                 boxlists_ts[i].append(boxlist_t)
 22 |             else:
 23 |                 # Resize the boxlist as the first one
 24 |                 boxlists_ts[i].append(boxlist_t.resize(boxlists_ts[i][0].size))
 25 | 
 26 |     # Compute detections for the original image (identity transform)
 27 |     boxlists_i = im_detect_bbox(
 28 |         model, images, cfg.INPUT.MIN_SIZE_TEST, cfg.INPUT.MAX_SIZE_TEST, device
 29 |     )
 30 |     add_preds_t(boxlists_i)
 31 | 
 32 |     # Perform detection on the horizontally flipped image
 33 |     if cfg.TEST.BBOX_AUG.H_FLIP:
 34 |         boxlists_hf = im_detect_bbox_hflip(
 35 |             model, images, cfg.INPUT.MIN_SIZE_TEST, cfg.INPUT.MAX_SIZE_TEST, device
 36 |         )
 37 |         add_preds_t(boxlists_hf)
 38 | 
 39 |     # Compute detections at different scales
 40 |     for scale in cfg.TEST.BBOX_AUG.SCALES:
 41 |         max_size = cfg.TEST.BBOX_AUG.MAX_SIZE
 42 |         boxlists_scl = im_detect_bbox_scale(
 43 |             model, images, scale, max_size, device
 44 |         )
 45 |         add_preds_t(boxlists_scl)
 46 | 
 47 |         if cfg.TEST.BBOX_AUG.SCALE_H_FLIP:
 48 |             boxlists_scl_hf = im_detect_bbox_scale(
 49 |                 model, images, scale, max_size, device, hflip=True
 50 |             )
 51 |             add_preds_t(boxlists_scl_hf)
 52 | 
 53 |     # Merge boxlists detected by different bbox aug params
 54 |     boxlists = []
 55 |     for i, boxlist_ts in enumerate(boxlists_ts):
 56 |         bbox = torch.cat([boxlist_t.bbox for boxlist_t in boxlist_ts])
 57 |         scores = torch.cat([boxlist_t.get_field('scores') for boxlist_t in boxlist_ts])
 58 |         boxlist = BoxList(bbox, boxlist_ts[0].size, boxlist_ts[0].mode)
 59 |         boxlist.add_field('scores', scores)
 60 |         boxlists.append(boxlist)
 61 | 
 62 |     # Apply NMS and limit the final detections
 63 |     results = []
 64 |     post_processor = make_roi_box_post_processor(cfg)
 65 |     for boxlist in boxlists:
 66 |         results.append(post_processor.filter_results(boxlist, cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES))
 67 | 
 68 |     return results
 69 | 
 70 | 
 71 | def im_detect_bbox(model, images, target_scale, target_max_size, device):
 72 |     """
 73 |     Performs bbox detection on the original image.
 74 |     """
 75 |     transform = TT.Compose([
 76 |         T.Resize(target_scale, target_max_size),
 77 |         TT.ToTensor(),
 78 |         T.Normalize(
 79 |             mean=cfg.INPUT.PIXEL_MEAN, std=cfg.INPUT.PIXEL_STD, to_bgr255=cfg.INPUT.TO_BGR255
 80 |         )
 81 |     ])
 82 |     images = [transform(image) for image in images]
 83 |     images = to_image_list(images, cfg.DATALOADER.SIZE_DIVISIBILITY)
 84 |     return model(images.to(device))
 85 | 
 86 | 
 87 | def im_detect_bbox_hflip(model, images, target_scale, target_max_size, device):
 88 |     """
 89 |     Performs bbox detection on the horizontally flipped image.
 90 |     Function signature is the same as for im_detect_bbox.
 91 |     """
 92 |     transform = TT.Compose([
 93 |         T.Resize(target_scale, target_max_size),
 94 |         TT.RandomHorizontalFlip(1.0),
 95 |         TT.ToTensor(),
 96 |         T.Normalize(
 97 |             mean=cfg.INPUT.PIXEL_MEAN, std=cfg.INPUT.PIXEL_STD, to_bgr255=cfg.INPUT.TO_BGR255
 98 |         )
 99 |     ])
100 |     images = [transform(image) for image in images]
101 |     images = to_image_list(images, cfg.DATALOADER.SIZE_DIVISIBILITY)
102 |     boxlists = model(images.to(device))
103 | 
104 |     # Invert the detections computed on the flipped image
105 |     boxlists_inv = [boxlist.transpose(0) for boxlist in boxlists]
106 |     return boxlists_inv
107 | 
108 | 
109 | def im_detect_bbox_scale(model, images, target_scale, target_max_size, device, hflip=False):
110 |     """
111 |     Computes bbox detections at the given scale.
112 |     Returns predictions in the scaled image space.
113 |     """
114 |     if hflip:
115 |         boxlists_scl = im_detect_bbox_hflip(model, images, target_scale, target_max_size, device)
116 |     else:
117 |         boxlists_scl = im_detect_bbox(model, images, target_scale, target_max_size, device)
118 |     return boxlists_scl
119 | 


--------------------------------------------------------------------------------
/hetsgg/modeling/matcher.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | 
  3 | 
  4 | class Matcher(object):
  5 |     """
  6 |     This class assigns to each predicted "element" (e.g., a box) a ground-truth
  7 |     element. Each predicted element will have exactly zero or one matches; each
  8 |     ground-truth element may be assigned to zero or more predicted elements.
  9 | 
 10 |     Matching is based on the MxN match_quality_matrix, that characterizes how well
 11 |     each (ground-truth, predicted)-pair match. For example, if the elements are
 12 |     boxes, the matrix may contain box IoU overlap values.
 13 | 
 14 |     The matcher returns a tensor of size N containing the index of the ground-truth
 15 |     element m that matches to prediction n. If there is no match, a negative value
 16 |     is returned.
 17 |     """
 18 | 
 19 |     BELOW_LOW_THRESHOLD = -1
 20 |     BETWEEN_THRESHOLDS = -2
 21 | 
 22 |     def __init__(self, high_threshold, low_threshold, allow_low_quality_matches=False):
 23 |         """
 24 |         Args:
 25 |             high_threshold (float): quality values greater than or equal to
 26 |                 this value are candidate matches.
 27 |             low_threshold (float): a lower quality threshold used to stratify
 28 |                 matches into three levels:
 29 |                 1) matches >= high_threshold
 30 |                 2) BETWEEN_THRESHOLDS matches in [low_threshold, high_threshold)
 31 |                 3) BELOW_LOW_THRESHOLD matches in [0, low_threshold)
 32 |             allow_low_quality_matches (bool): if True, produce additional matches
 33 |                 for predictions that have only low-quality match candidates. See
 34 |                 set_low_quality_matches_ for more details.
 35 |         """
 36 |         assert low_threshold <= high_threshold
 37 |         self.high_threshold = high_threshold
 38 |         self.low_threshold = low_threshold
 39 |         self.allow_low_quality_matches = allow_low_quality_matches
 40 | 
 41 |     def __call__(self, match_quality_matrix):
 42 |         """
 43 |         Args:
 44 |             match_quality_matrix (Tensor[float]): an MxN tensor, containing the
 45 |             pairwise quality between M ground-truth elements and N predicted elements.
 46 | 
 47 |         Returns:
 48 |             matches (Tensor[int64]): an N tensor where N[i] is a matched gt in
 49 |             [0, M - 1] or a negative value indicating that prediction i could not
 50 |             be matched.
 51 |         """
 52 |         if match_quality_matrix.numel() == 0:
 53 |             # empty targets or proposals not supported during training
 54 |             if match_quality_matrix.shape[0] == 0:
 55 |                 raise ValueError(
 56 |                     "No ground-truth boxes available for one of the images "
 57 |                     "during training")
 58 |             else:
 59 |                 raise ValueError(
 60 |                     "No proposal boxes available for one of the images "
 61 |                     "during training")
 62 | 
 63 |         # match_quality_matrix is M (gt) x N (predicted)
 64 |         # Max over gt elements (dim 0) to find best gt candidate for each prediction
 65 |         matched_vals, matches = match_quality_matrix.max(dim=0)
 66 |         if self.allow_low_quality_matches:
 67 |             all_matches = matches.clone()
 68 | 
 69 |         # Assign candidate matches with low quality to negative (unassigned) values
 70 |         below_low_threshold = matched_vals < self.low_threshold
 71 |         between_thresholds = (matched_vals >= self.low_threshold) & (
 72 |             matched_vals < self.high_threshold
 73 |         )
 74 |         matches[below_low_threshold] = Matcher.BELOW_LOW_THRESHOLD
 75 |         matches[between_thresholds] = Matcher.BETWEEN_THRESHOLDS
 76 | 
 77 |         if self.allow_low_quality_matches:
 78 |             self.set_low_quality_matches_(matches, all_matches, match_quality_matrix)
 79 | 
 80 |         return matches
 81 | 
 82 |     def set_low_quality_matches_(self, matches, all_matches, match_quality_matrix):
 83 |         """
 84 |         Produce additional matches for predictions that have only low-quality matches.
 85 |         Specifically, for each ground-truth find the set of predictions that have
 86 |         maximum overlap with it (including ties); for each prediction in that set, if
 87 |         it is unmatched, then match it to the ground-truth with which it has the highest
 88 |         quality value.
 89 |         """
 90 |         # For each gt, find the prediction with which it has highest quality
 91 |         highest_quality_foreach_gt, _ = match_quality_matrix.max(dim=1)
 92 |         # Find highest quality match available, even if it is low, including ties
 93 |         gt_pred_pairs_of_highest_quality = torch.nonzero(
 94 |             match_quality_matrix == highest_quality_foreach_gt[:, None]
 95 |         )
 96 | 
 97 | 
 98 |         pred_inds_to_update = gt_pred_pairs_of_highest_quality[:, 1]
 99 |         matches[pred_inds_to_update] = all_matches[pred_inds_to_update]
100 | 


--------------------------------------------------------------------------------
/hetsgg/data/datasets/evaluation/vg/vg_stage_eval_utils.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch
  3 | 
  4 | from hetsgg.utils.miscellaneous import intersect_2d
  5 | 
  6 | 
  7 | def boxlist_iou(boxlist1, boxlist2, to_cuda=True):
  8 | 
  9 |     if boxlist1.size != boxlist2.size:
 10 |         raise RuntimeError(
 11 |             "boxlists should have same image size, got {}, {}".format(boxlist1, boxlist2))
 12 | 
 13 |     N = len(boxlist1)
 14 |     M = len(boxlist2)
 15 | 
 16 |     if to_cuda:
 17 |         if boxlist1.bbox.device.type != 'cuda':
 18 |             boxlist1.bbox = boxlist1.bbox.cuda()
 19 |         if boxlist2.bbox.device.type != 'cuda':
 20 |             boxlist2.bbox = boxlist2.bbox.cuda()
 21 | 
 22 |     box1 = boxlist1.bbox
 23 |     box2 = boxlist2.bbox
 24 | 
 25 |     area1 = boxlist1.area()
 26 |     area2 = boxlist2.area()
 27 | 
 28 |     lt = torch.max(box1[:, None, :2], box2[:, :2])  # [N,M,2]
 29 |     rb = torch.min(box1[:, None, 2:], box2[:, 2:])  # [N,M,2]
 30 | 
 31 |     TO_REMOVE = 1
 32 | 
 33 |     wh = (rb - lt + TO_REMOVE).clamp(min=0)  # [N,M,2]
 34 |     inter = wh[:, :, 0] * wh[:, :, 1]  # [N,M]
 35 | 
 36 |     iou = inter / (area1[:, None] + area2 - inter)
 37 |     return iou
 38 | 
 39 | 
 40 | def intersect_2d_torch_tensor(x1, x2):
 41 |     return torch.from_numpy(intersect_2d(x1.numpy(), x2.numpy()))
 42 | 
 43 | 
 44 | def dump_hit_indx_dict_to_tensor(pred_pair_mat, gt_box_hit_idx_dict):
 45 |     """
 46 |     for compare the prediction and gt easily, we need to expand the N to M box match results to
 47 |     array.
 48 |     here, give relationship prediction pair matrix, expand the gt_box_hit_idx_dit to the array.
 49 |     We do the full connection of hit gt box idx of each prediction pairs
 50 |     :param pred_pair_mat:
 51 |     :param gt_box_hit_idx_dict: the hit gt idx of each prediction box
 52 |     :return:
 53 |         to_cmp_pair_mat: expanded relationship pair result (N, 2), store the gt box indexs.
 54 |             N is large than initial prediction pair matrix
 55 |         initial_pred_idx_seg: marking the seg for each pred pairs. If it hit multiple detection gt,
 56 |             it could have more than one prediction pairs, we need to mark that they are indicated to
 57 |             same initial predations
 58 |     """
 59 |     to_cmp_pair_mat = []
 60 |     initial_pred_idx_seg = []
 61 |     # write result into the pair mat
 62 |     for pred_idx, pred_pair in enumerate(pred_pair_mat):
 63 |         sub_pred_hit_idx_set = gt_box_hit_idx_dict[pred_pair[0].item()]
 64 |         obj_pred_hit_idx_set = gt_box_hit_idx_dict[pred_pair[1].item()]
 65 |         # expand the prediction index by full combination
 66 |         for each_sub_hit_idx in sub_pred_hit_idx_set:
 67 |             for each_obj_hit_idx in obj_pred_hit_idx_set:
 68 |                 to_cmp_pair_mat.append([each_sub_hit_idx, each_obj_hit_idx])
 69 |                 initial_pred_idx_seg.append(pred_idx)  #
 70 |     if len(to_cmp_pair_mat) == 0:
 71 |         to_cmp_pair_mat = torch.zeros((0, 2), dtype=torch.int64)
 72 |     else:
 73 |         to_cmp_pair_mat = torch.from_numpy(np.array(to_cmp_pair_mat, dtype=np.int64))
 74 | 
 75 |     initial_pred_idx_seg = torch.from_numpy(np.array(initial_pred_idx_seg, dtype=np.int64))
 76 |     return to_cmp_pair_mat, initial_pred_idx_seg
 77 | 
 78 | 
 79 | LONGTAIL_CATE_IDS_DICT = {
 80 |     'head': [31, 20, 22, 30, 48],
 81 |     'body': [29, 50, 1, 21, 8, 43, 40, 49, 41, 23, 7, 6, 19, 33, 16, 38],
 82 |     'tail': [11, 14, 46, 37, 13, 24, 4, 47, 5, 10, 9, 34, 3, 25, 17, 35, 42, 27, 12, 28,
 83 |              39, 36, 2, 15, 44, 32, 26, 18, 45]
 84 | }
 85 | 
 86 | LONGTAIL_CATE_IDS_QUERY = {}
 87 | for long_name, cate_id in LONGTAIL_CATE_IDS_DICT.items():
 88 |     for each_cate_id in cate_id:
 89 |         LONGTAIL_CATE_IDS_QUERY[each_cate_id] = long_name
 90 | 
 91 | PREDICATE_CLUSTER = [[50, 20, 9], [22, 48, 49], [31], [31, 41, 1], [31, 30]]
 92 | ENTITY_CLUSTER = [[91, 149, 53, 78, 20, 79, 90, 56, 68]]
 93 | 
 94 | 
 95 | def get_cluster_id(cluster, cate_id):
 96 |     for idx, each in enumerate(cluster):
 97 |         if cate_id in each:
 98 |             return each[0]
 99 |     return -1
100 | 
101 | 
102 | def transform_cateid_into_cluster_id(cate_list, cluster):
103 |     for idx in range(len(cate_list)):
104 |         cluster_id = get_cluster_id(cluster, cate_list[idx].item())
105 | 
106 |         if cluster_id != -1:
107 |             cate_list[idx] = cluster_id
108 |     return cate_list
109 | 
110 | 
111 | def trans_cluster_label(pred_pred_cate_list, gt_pred_cate_list, cluster):
112 |     """
113 |     transform the categories labels to cluster label for label overlapping avoiding
114 |     :param pred_pair_mat: (subj_id, obj-id, cate-lable)
115 |     :param gt_pair_mat:
116 |     :return:
117 |     """
118 |     cluster_ref_pred_cate = transform_cateid_into_cluster_id(pred_pred_cate_list, cluster)
119 |     cluster_ref_gt_cate = transform_cateid_into_cluster_id(gt_pred_cate_list, cluster)
120 | 
121 |     return cluster_ref_pred_cate, cluster_ref_gt_cate
122 | 


--------------------------------------------------------------------------------
/hetsgg/modeling/roi_heads/mask_head/loss.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torch.nn import functional as F
  3 | 
  4 | from hetsgg.layers import smooth_l1_loss
  5 | from hetsgg.modeling.matcher import Matcher
  6 | from hetsgg.structures.boxlist_ops import boxlist_iou
  7 | from hetsgg.modeling.utils import cat
  8 | 
  9 | 
 10 | def project_masks_on_boxes(segmentation_masks, proposals, discretization_size):
 11 | 
 12 |     masks = []
 13 |     M = discretization_size
 14 |     device = proposals.bbox.device
 15 |     proposals = proposals.convert("xyxy")
 16 |     assert segmentation_masks.size == proposals.size, "{}, {}".format(
 17 |         segmentation_masks, proposals
 18 |     )
 19 | 
 20 |     proposals = proposals.bbox.to(torch.device("cpu"))
 21 |     for segmentation_mask, proposal in zip(segmentation_masks, proposals):
 22 |         # crop the masks, resize them to the desired resolution and
 23 |         # then convert them to the tensor representation.
 24 |         cropped_mask = segmentation_mask.crop(proposal)
 25 |         scaled_mask = cropped_mask.resize((M, M))
 26 |         mask = scaled_mask.get_mask_tensor()
 27 |         masks.append(mask)
 28 |     if len(masks) == 0:
 29 |         return torch.empty(0, dtype=torch.float32, device=device)
 30 |     return torch.stack(masks, dim=0).to(device, dtype=torch.float32)
 31 | 
 32 | 
 33 | class MaskRCNNLossComputation(object):
 34 |     def __init__(self, proposal_matcher, discretization_size):
 35 |         """
 36 |         Arguments:
 37 |             proposal_matcher (Matcher)
 38 |             discretization_size (int)
 39 |         """
 40 |         self.proposal_matcher = proposal_matcher
 41 |         self.discretization_size = discretization_size
 42 | 
 43 |     def match_targets_to_proposals(self, proposal, target):
 44 |         match_quality_matrix = boxlist_iou(target, proposal)
 45 |         matched_idxs = self.proposal_matcher(match_quality_matrix)
 46 |         # Mask RCNN needs "labels" and "masks "fields for creating the targets
 47 |         target = target.copy_with_fields(["labels", "masks"])
 48 |   
 49 |         matched_targets = target[matched_idxs.clamp(min=0)]
 50 |         matched_targets.add_field("matched_idxs", matched_idxs)
 51 |         return matched_targets
 52 | 
 53 |     def prepare_targets(self, proposals, targets):
 54 |         labels = []
 55 |         masks = []
 56 |         for proposals_per_image, targets_per_image in zip(proposals, targets):
 57 |             matched_targets = self.match_targets_to_proposals(
 58 |                 proposals_per_image, targets_per_image
 59 |             )
 60 |             matched_idxs = matched_targets.get_field("matched_idxs")
 61 | 
 62 |             labels_per_image = matched_targets.get_field("labels")
 63 |             labels_per_image = labels_per_image.to(dtype=torch.int64)
 64 | 
 65 |             neg_inds = matched_idxs == Matcher.BELOW_LOW_THRESHOLD
 66 |             labels_per_image[neg_inds] = 0
 67 | 
 68 |             # mask scores are only computed on positive samples
 69 |             positive_inds = torch.nonzero(labels_per_image > 0).squeeze(1)
 70 | 
 71 |             segmentation_masks = matched_targets.get_field("masks")
 72 |             segmentation_masks = segmentation_masks[positive_inds]
 73 | 
 74 |             positive_proposals = proposals_per_image[positive_inds]
 75 | 
 76 |             masks_per_image = project_masks_on_boxes(
 77 |                 segmentation_masks, positive_proposals, self.discretization_size
 78 |             )
 79 | 
 80 |             labels.append(labels_per_image)
 81 |             masks.append(masks_per_image)
 82 | 
 83 |         return labels, masks
 84 | 
 85 |     def __call__(self, proposals, mask_logits, targets):
 86 |         """
 87 |         Arguments:
 88 |             proposals (list[BoxList])
 89 |             mask_logits (Tensor)
 90 |             targets (list[BoxList])
 91 | 
 92 |         Return:
 93 |             mask_loss (Tensor): scalar tensor containing the loss
 94 |         """
 95 |         labels, mask_targets = self.prepare_targets(proposals, targets)
 96 | 
 97 |         labels = cat(labels, dim=0)
 98 |         mask_targets = cat(mask_targets, dim=0)
 99 | 
100 |         positive_inds = torch.nonzero(labels > 0).squeeze(1)
101 |         labels_pos = labels[positive_inds]
102 | 
103 |         # torch.mean (in binary_cross_entropy_with_logits) doesn't
104 |         # accept empty tensors, so handle it separately
105 |         if mask_targets.numel() == 0:
106 |             return mask_logits.sum() * 0
107 | 
108 |         mask_loss = F.binary_cross_entropy_with_logits(
109 |             mask_logits[positive_inds, labels_pos], mask_targets
110 |         )
111 |         return mask_loss
112 | 
113 | 
114 | def make_roi_mask_loss_evaluator(cfg):
115 |     matcher = Matcher(
116 |         cfg.MODEL.ROI_HEADS.FG_IOU_THRESHOLD,
117 |         cfg.MODEL.ROI_HEADS.BG_IOU_THRESHOLD,
118 |         allow_low_quality_matches=False,
119 |     )
120 | 
121 |     loss_evaluator = MaskRCNNLossComputation(
122 |         matcher, cfg.MODEL.ROI_MASK_HEAD.RESOLUTION
123 |     )
124 | 
125 |     return loss_evaluator
126 | 


--------------------------------------------------------------------------------
/hetsgg/data/samplers/grouped_batch_sampler.py:
--------------------------------------------------------------------------------
  1 | import itertools
  2 | 
  3 | import torch
  4 | from torch.utils.data.sampler import BatchSampler
  5 | from torch.utils.data.sampler import Sampler
  6 | 
  7 | 
  8 | class GroupedBatchSampler(BatchSampler):
  9 |     """
 10 |     Wraps another sampler to yield a mini-batch of indices.
 11 |     It enforces that elements from the same group should appear in groups of batch_size.
 12 |     It also tries to provide mini-batches which follows an ordering which is
 13 |     as close as possible to the ordering from the original sampler.
 14 | 
 15 |     Arguments:
 16 |         sampler (Sampler): Base sampler.
 17 |         batch_size (int): Size of mini-batch.
 18 |         drop_uneven (bool): If ``True``, the sampler will drop the batches whose
 19 |             size is less than ``batch_size``
 20 | 
 21 |     """
 22 | 
 23 |     def __init__(self, sampler, group_ids, batch_size, drop_uneven=False):
 24 |         if not isinstance(sampler, Sampler):
 25 |             raise ValueError(
 26 |                 "sampler should be an instance of "
 27 |                 "torch.utils.data.Sampler, but got sampler={}".format(sampler)
 28 |             )
 29 |         self.sampler = sampler
 30 |         self.group_ids = torch.as_tensor(group_ids)
 31 |         assert self.group_ids.dim() == 1
 32 |         self.batch_size = batch_size
 33 |         self.drop_uneven = drop_uneven
 34 | 
 35 |         self.groups = torch.unique(self.group_ids).sort(0)[0]
 36 | 
 37 |         self._can_reuse_batches = False
 38 | 
 39 |     def _prepare_batches(self):
 40 |         dataset_size = len(self.group_ids)
 41 |         # get the sampled indices from the sampler
 42 |         sampled_ids = torch.as_tensor(list(self.sampler))
 43 |         # potentially not all elements of the dataset were sampled
 44 |         # by the sampler (e.g., DistributedSampler).
 45 |         # construct a tensor which contains -1 if the element was
 46 |         # not sampled, and a non-negative number indicating the
 47 |         # order where the element was sampled.
 48 |         # for example. if sampled_ids = [3, 1] and dataset_size = 5,
 49 |         # the order is [-1, 1, -1, 0, -1]
 50 |         order = torch.full((dataset_size,), -1, dtype=torch.int64)
 51 |         order[sampled_ids] = torch.arange(len(sampled_ids))
 52 | 
 53 |         # get a mask with the elements that were sampled
 54 |         mask = order >= 0
 55 | 
 56 |         # find the elements that belong to each individual cluster
 57 |         clusters = [(self.group_ids == i) & mask for i in self.groups]
 58 |         # get relative order of the elements inside each cluster
 59 |         # that follows the order from the sampler
 60 |         relative_order = [order[cluster] for cluster in clusters]
 61 |         # with the relative order, find the absolute order in the
 62 |         # sampled space
 63 |         permutation_ids = [s[s.sort()[1]] for s in relative_order]
 64 |         # permute each cluster so that they follow the order from
 65 |         # the sampler
 66 |         permuted_clusters = [sampled_ids[idx] for idx in permutation_ids]
 67 | 
 68 |         # splits each cluster in batch_size, and merge as a list of tensors
 69 |         splits = [c.split(self.batch_size) for c in permuted_clusters]
 70 |         merged = tuple(itertools.chain.from_iterable(splits))
 71 | 
 72 |         # now each batch internally has the right order, but
 73 |         # they are grouped by clusters. Find the permutation between
 74 |         # different batches that brings them as close as possible to
 75 |         # the order that we have in the sampler. For that, we will consider the
 76 |         # ordering as coming from the first element of each batch, and sort
 77 |         # correspondingly
 78 |         first_element_of_batch = [t[0].item() for t in merged]
 79 |         # get and inverse mapping from sampled indices and the position where
 80 |         # they occur (as returned by the sampler)
 81 |         inv_sampled_ids_map = {v: k for k, v in enumerate(sampled_ids.tolist())}
 82 |         # from the first element in each batch, get a relative ordering
 83 |         first_index_of_batch = torch.as_tensor(
 84 |             [inv_sampled_ids_map[s] for s in first_element_of_batch]
 85 |         )
 86 | 
 87 |         # permute the batches so that they approximately follow the order
 88 |         # from the sampler
 89 |         permutation_order = first_index_of_batch.sort(0)[1].tolist()
 90 |         # finally, permute the batches
 91 |         batches = [merged[i].tolist() for i in permutation_order]
 92 | 
 93 |         if self.drop_uneven:
 94 |             kept = []
 95 |             for batch in batches:
 96 |                 if len(batch) == self.batch_size:
 97 |                     kept.append(batch)
 98 |             batches = kept
 99 |         return batches
100 | 
101 |     def __iter__(self):
102 |         if self._can_reuse_batches:
103 |             batches = self._batches
104 |             self._can_reuse_batches = False
105 |         else:
106 |             batches = self._prepare_batches()
107 |         self._batches = batches
108 |         return iter(batches)
109 | 
110 |     def __len__(self):
111 |         if not hasattr(self, "_batches"):
112 |             self._batches = self._prepare_batches()
113 |             self._can_reuse_batches = True
114 |         return len(self._batches)
115 | 


--------------------------------------------------------------------------------
/hetsgg/csrc/deform_conv.h:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | #include "cpu/vision.h"
  3 | 
  4 | #ifdef WITH_CUDA
  5 | #include "cuda/vision.h"
  6 | #endif
  7 | 
  8 | 
  9 | // Interface for Python
 10 | int deform_conv_forward(
 11 |     at::Tensor input, 
 12 |     at::Tensor weight,
 13 |     at::Tensor offset, 
 14 |     at::Tensor output,
 15 |     at::Tensor columns, 
 16 |     at::Tensor ones, 
 17 |     int kW,
 18 |     int kH, 
 19 |     int dW, 
 20 |     int dH, 
 21 |     int padW, 
 22 |     int padH,
 23 |     int dilationW, 
 24 |     int dilationH, 
 25 |     int group,
 26 |     int deformable_group, 
 27 |     int im2col_step)
 28 | {
 29 |   if (input.type().is_cuda()) {
 30 | #ifdef WITH_CUDA
 31 |     return deform_conv_forward_cuda(
 32 |         input, weight, offset, output, columns, ones,
 33 |         kW, kH, dW, dH, padW, padH, dilationW, dilationH,
 34 |         group, deformable_group, im2col_step
 35 |     );
 36 | #else
 37 |     AT_ERROR("Not compiled with GPU support");
 38 | #endif
 39 |   }
 40 |   AT_ERROR("Not implemented on the CPU");
 41 | }
 42 | 
 43 | 
 44 | int deform_conv_backward_input(
 45 |     at::Tensor input, 
 46 |     at::Tensor offset,
 47 |     at::Tensor gradOutput, 
 48 |     at::Tensor gradInput,
 49 |     at::Tensor gradOffset, 
 50 |     at::Tensor weight,
 51 |     at::Tensor columns, 
 52 |     int kW, 
 53 |     int kH, 
 54 |     int dW,
 55 |     int dH, 
 56 |     int padW, 
 57 |     int padH, 
 58 |     int dilationW,
 59 |     int dilationH, 
 60 |     int group,
 61 |     int deformable_group, 
 62 |     int im2col_step)
 63 | {
 64 |   if (input.type().is_cuda()) {
 65 | #ifdef WITH_CUDA
 66 |     return deform_conv_backward_input_cuda(
 67 |         input, offset, gradOutput, gradInput, gradOffset, weight, columns,
 68 |         kW, kH, dW, dH, padW, padH, dilationW, dilationH, 
 69 |         group, deformable_group, im2col_step
 70 |     );
 71 | #else
 72 |     AT_ERROR("Not compiled with GPU support");
 73 | #endif
 74 |   }
 75 |   AT_ERROR("Not implemented on the CPU");
 76 | }
 77 | 
 78 | 
 79 | int deform_conv_backward_parameters(
 80 |     at::Tensor input, 
 81 |     at::Tensor offset, 
 82 |     at::Tensor gradOutput,
 83 |     at::Tensor gradWeight,  // at::Tensor gradBias,
 84 |     at::Tensor columns, 
 85 |     at::Tensor ones, 
 86 |     int kW, 
 87 |     int kH, 
 88 |     int dW, 
 89 |     int dH,
 90 |     int padW, 
 91 |     int padH, 
 92 |     int dilationW, 
 93 |     int dilationH, 
 94 |     int group,
 95 |     int deformable_group, 
 96 |     float scale, 
 97 |     int im2col_step)
 98 | {
 99 |   if (input.type().is_cuda()) {
100 | #ifdef WITH_CUDA
101 |     return deform_conv_backward_parameters_cuda(
102 |         input, offset, gradOutput, gradWeight, columns, ones,
103 |         kW, kH, dW, dH, padW, padH, dilationW, dilationH,
104 |         group, deformable_group, scale, im2col_step
105 |     );
106 | #else
107 |     AT_ERROR("Not compiled with GPU support");
108 | #endif
109 |   }
110 |   AT_ERROR("Not implemented on the CPU");
111 | }
112 | 
113 | 
114 | void modulated_deform_conv_forward(
115 |     at::Tensor input, 
116 |     at::Tensor weight, 
117 |     at::Tensor bias, 
118 |     at::Tensor ones,
119 |     at::Tensor offset, 
120 |     at::Tensor mask, 
121 |     at::Tensor output, 
122 |     at::Tensor columns,
123 |     int kernel_h, 
124 |     int kernel_w, 
125 |     const int stride_h, 
126 |     const int stride_w,
127 |     const int pad_h, 
128 |     const int pad_w, 
129 |     const int dilation_h,
130 |     const int dilation_w, 
131 |     const int group, 
132 |     const int deformable_group,
133 |     const bool with_bias)
134 | {
135 |   if (input.type().is_cuda()) {
136 | #ifdef WITH_CUDA
137 |     return modulated_deform_conv_cuda_forward(
138 |         input, weight, bias, ones, offset, mask, output, columns,
139 |         kernel_h, kernel_w, stride_h, stride_w, 
140 |         pad_h, pad_w, dilation_h, dilation_w,
141 |         group, deformable_group, with_bias
142 |     );
143 | #else
144 |     AT_ERROR("Not compiled with GPU support");
145 | #endif
146 |   }
147 |   AT_ERROR("Not implemented on the CPU");
148 | }
149 | 
150 | 
151 | void modulated_deform_conv_backward(
152 |     at::Tensor input, 
153 |     at::Tensor weight, 
154 |     at::Tensor bias, 
155 |     at::Tensor ones,
156 |     at::Tensor offset, 
157 |     at::Tensor mask, 
158 |     at::Tensor columns,
159 |     at::Tensor grad_input, 
160 |     at::Tensor grad_weight, 
161 |     at::Tensor grad_bias,
162 |     at::Tensor grad_offset, 
163 |     at::Tensor grad_mask, 
164 |     at::Tensor grad_output,
165 |     int kernel_h, 
166 |     int kernel_w, 
167 |     int stride_h, 
168 |     int stride_w, 
169 |     int pad_h,
170 |     int pad_w, 
171 |     int dilation_h, 
172 |     int dilation_w, 
173 |     int group, 
174 |     int deformable_group,
175 |     const bool with_bias)
176 | {
177 |   if (input.type().is_cuda()) {
178 | #ifdef WITH_CUDA
179 |     return modulated_deform_conv_cuda_backward(
180 |         input, weight, bias, ones, offset, mask, columns, 
181 |         grad_input, grad_weight, grad_bias, grad_offset, grad_mask, grad_output,
182 |         kernel_h, kernel_w, stride_h, stride_w, pad_h, pad_w, dilation_h, dilation_w,
183 |         group, deformable_group, with_bias
184 |     );
185 | #else
186 |     AT_ERROR("Not compiled with GPU support");
187 | #endif
188 |   }
189 |   AT_ERROR("Not implemented on the CPU");
190 | }


--------------------------------------------------------------------------------
/hetsgg/csrc/cuda/nms.cu:
--------------------------------------------------------------------------------
  1 | #include <ATen/ATen.h>
  2 | #include <ATen/cuda/CUDAContext.h>
  3 | 
  4 | #include <THC/THC.h>
  5 | #include <THC/THCDeviceUtils.cuh>
  6 | 
  7 | #include <vector>
  8 | #include <iostream>
  9 | 
 10 | int const threadsPerBlock = sizeof(unsigned long long) * 8;
 11 | 
 12 | __device__ inline float devIoU(float const * const a, float const * const b) {
 13 |   float left = max(a[0], b[0]), right = min(a[2], b[2]);
 14 |   float top = max(a[1], b[1]), bottom = min(a[3], b[3]);
 15 |   float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f);
 16 |   float interS = width * height;
 17 |   float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1);
 18 |   float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1);
 19 |   return interS / (Sa + Sb - interS);
 20 | }
 21 | 
 22 | __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh,
 23 |                            const float *dev_boxes, unsigned long long *dev_mask) {
 24 |   const int row_start = blockIdx.y;
 25 |   const int col_start = blockIdx.x;
 26 | 
 27 |   // if (row_start > col_start) return;
 28 | 
 29 |   const int row_size =
 30 |         min(n_boxes - row_start * threadsPerBlock, threadsPerBlock);
 31 |   const int col_size =
 32 |         min(n_boxes - col_start * threadsPerBlock, threadsPerBlock);
 33 | 
 34 |   __shared__ float block_boxes[threadsPerBlock * 5];
 35 |   if (threadIdx.x < col_size) {
 36 |     block_boxes[threadIdx.x * 5 + 0] =
 37 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0];
 38 |     block_boxes[threadIdx.x * 5 + 1] =
 39 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1];
 40 |     block_boxes[threadIdx.x * 5 + 2] =
 41 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2];
 42 |     block_boxes[threadIdx.x * 5 + 3] =
 43 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3];
 44 |     block_boxes[threadIdx.x * 5 + 4] =
 45 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4];
 46 |   }
 47 |   __syncthreads();
 48 | 
 49 |   if (threadIdx.x < row_size) {
 50 |     const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x;
 51 |     const float *cur_box = dev_boxes + cur_box_idx * 5;
 52 |     int i = 0;
 53 |     unsigned long long t = 0;
 54 |     int start = 0;
 55 |     if (row_start == col_start) {
 56 |       start = threadIdx.x + 1;
 57 |     }
 58 |     for (i = start; i < col_size; i++) {
 59 |       if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) {
 60 |         t |= 1ULL << i;
 61 |       }
 62 |     }
 63 |     const int col_blocks = THCCeilDiv(n_boxes, threadsPerBlock);
 64 |     dev_mask[cur_box_idx * col_blocks + col_start] = t;
 65 |   }
 66 | }
 67 | 
 68 | // boxes is a N x 5 tensor
 69 | at::Tensor nms_cuda(const at::Tensor boxes, float nms_overlap_thresh) {
 70 |   using scalar_t = float;
 71 |   AT_ASSERTM(boxes.type().is_cuda(), "boxes must be a CUDA tensor");
 72 |   auto scores = boxes.select(1, 4);
 73 |   auto order_t = std::get<1>(scores.sort(0, /* descending=*/true));
 74 |   auto boxes_sorted = boxes.index_select(0, order_t);
 75 | 
 76 |   int boxes_num = boxes.size(0);
 77 | 
 78 |   const int col_blocks = THCCeilDiv(boxes_num, threadsPerBlock);
 79 | 
 80 |   scalar_t* boxes_dev = boxes_sorted.data<scalar_t>();
 81 | 
 82 |   THCState *state = at::globalContext().lazyInitCUDA(); // TODO replace with getTHCState
 83 | 
 84 |   unsigned long long* mask_dev = NULL;
 85 |   //THCudaCheck(THCudaMalloc(state, (void**) &mask_dev,
 86 |   //                      boxes_num * col_blocks * sizeof(unsigned long long)));
 87 | 
 88 |   mask_dev = (unsigned long long*) THCudaMalloc(state, boxes_num * col_blocks * sizeof(unsigned long long));
 89 | 
 90 |   dim3 blocks(THCCeilDiv(boxes_num, threadsPerBlock),
 91 |               THCCeilDiv(boxes_num, threadsPerBlock));
 92 |   dim3 threads(threadsPerBlock);
 93 |   nms_kernel<<<blocks, threads>>>(boxes_num,
 94 |                                   nms_overlap_thresh,
 95 |                                   boxes_dev,
 96 |                                   mask_dev);
 97 | 
 98 |   std::vector<unsigned long long> mask_host(boxes_num * col_blocks);
 99 |   THCudaCheck(cudaMemcpy(&mask_host[0],
100 |                         mask_dev,
101 |                         sizeof(unsigned long long) * boxes_num * col_blocks,
102 |                         cudaMemcpyDeviceToHost));
103 | 
104 |   std::vector<unsigned long long> remv(col_blocks);
105 |   memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks);
106 | 
107 |   at::Tensor keep = at::empty({boxes_num}, boxes.options().dtype(at::kLong).device(at::kCPU));
108 |   int64_t* keep_out = keep.data<int64_t>();
109 | 
110 |   int num_to_keep = 0;
111 |   for (int i = 0; i < boxes_num; i++) {
112 |     int nblock = i / threadsPerBlock;
113 |     int inblock = i % threadsPerBlock;
114 | 
115 |     if (!(remv[nblock] & (1ULL << inblock))) {
116 |       keep_out[num_to_keep++] = i;
117 |       unsigned long long *p = &mask_host[0] + i * col_blocks;
118 |       for (int j = nblock; j < col_blocks; j++) {
119 |         remv[j] |= p[j];
120 |       }
121 |     }
122 |   }
123 | 
124 |   THCudaFree(state, mask_dev);
125 |   // TODO improve this part
126 |   return std::get<0>(order_t.index({
127 |                        keep.narrow(/*dim=*/0, /*start=*/0, /*length=*/num_to_keep).to(
128 |                          order_t.device(), keep.scalar_type())
129 |                      }).sort(0, false));
130 | }
131 | 


--------------------------------------------------------------------------------