├── DATASET.md ├── INSTALL.md ├── README.md ├── nbdt ├── nbdt │ ├── __init__.py │ ├── analysis.py │ ├── bin │ │ ├── nbdt │ │ ├── nbdt-hierarchy │ │ └── nbdt-wnids │ ├── data │ │ ├── __init__.py │ │ ├── custom.py │ │ └── imagenet.py │ ├── graph.py │ ├── hierarchies │ │ └── VG150 │ │ │ └── graph-cogtree.json │ ├── hierarchy.py │ ├── loss.py │ ├── model.py │ ├── models │ │ ├── __init__.py │ │ ├── resnet.py │ │ ├── utils.py │ │ └── wideresnet.py │ ├── templates │ │ └── tree-template.html │ ├── utils.py │ └── wnids │ │ └── VG150.txt ├── requirements.txt └── setup.py └── sg-benchmark ├── configs ├── e2e_relation_X_101_32_8_FPN_1x.yaml └── e2e_relation_X_101_32_8_FPN_1x_transformer.yaml ├── maskrcnn_benchmark ├── __init__.py ├── config │ ├── __init__.py │ ├── defaults.py │ └── paths_catalog.py ├── csrc │ ├── ROIAlign.h │ ├── ROIPool.h │ ├── SigmoidFocalLoss.h │ ├── cpu │ │ ├── ROIAlign_cpu.cpp │ │ ├── nms_cpu.cpp │ │ └── vision.h │ ├── cuda │ │ ├── ROIAlign_cuda.cu │ │ ├── ROIPool_cuda.cu │ │ ├── SigmoidFocalLoss_cuda.cu │ │ ├── deform_conv_cuda.cu │ │ ├── deform_conv_kernel_cuda.cu │ │ ├── deform_pool_cuda.cu │ │ ├── deform_pool_kernel_cuda.cu │ │ ├── nms.cu │ │ └── vision.h │ ├── deform_conv.h │ ├── deform_pool.h │ ├── nms.h │ └── vision.cpp ├── data │ ├── README.md │ ├── __init__.py │ ├── build.py │ ├── collate_batch.py │ ├── datasets │ │ ├── Vrd.py │ │ ├── __init__.py │ │ ├── coco.py │ │ ├── concat_dataset.py │ │ ├── evaluation │ │ │ ├── __init__.py │ │ │ ├── coco │ │ │ │ ├── __init__.py │ │ │ │ └── coco_eval.py │ │ │ ├── vg │ │ │ │ ├── __init__.py │ │ │ │ ├── sgg_eval.py │ │ │ │ ├── vg_capgraphs_anno.json │ │ │ │ ├── vg_eval.py │ │ │ │ ├── vg_test_capgraph_anno.json │ │ │ │ ├── vg_test_caption_anno.json │ │ │ │ └── zeroshot_triplet.pytorch │ │ │ └── voc │ │ │ │ ├── __init__.py │ │ │ │ └── voc_eval.py │ │ ├── list_dataset.py │ │ ├── visual_genome.py │ │ └── voc.py │ ├── samplers │ │ ├── __init__.py │ │ ├── distributed.py │ │ ├── grouped_batch_sampler.py │ │ └── iteration_based_batch_sampler.py │ └── transforms │ │ ├── __init__.py │ │ ├── build.py │ │ └── transforms.py ├── engine │ ├── __init__.py │ ├── bbox_aug.py │ ├── inference.py │ └── trainer.py ├── image_retrieval │ ├── S2G-RETRIEVAL.md │ ├── __init__.py │ ├── dataloader.py │ ├── evaluation.py │ ├── model.py │ ├── modelv2.py │ └── preprocessing.py ├── layers │ ├── __init__.py │ ├── _utils.py │ ├── batch_norm.py │ ├── dcn │ │ ├── __init__.py │ │ ├── deform_conv_func.py │ │ ├── deform_conv_module.py │ │ ├── deform_pool_func.py │ │ └── deform_pool_module.py │ ├── entropy_loss.py │ ├── kl_div_loss.py │ ├── label_smoothing_loss.py │ ├── misc.py │ ├── nms.py │ ├── roi_align.py │ ├── roi_pool.py │ ├── sigmoid_focal_loss.py │ └── smooth_l1_loss.py ├── modeling │ ├── __init__.py │ ├── backbone │ │ ├── __init__.py │ │ ├── backbone.py │ │ ├── fbnet.py │ │ ├── fbnet_builder.py │ │ ├── fbnet_modeldef.py │ │ ├── fpn.py │ │ ├── resnet.py │ │ └── vgg.py │ ├── balanced_positive_negative_sampler.py │ ├── box_coder.py │ ├── detector │ │ ├── __init__.py │ │ ├── detectors.py │ │ └── generalized_rcnn.py │ ├── make_layers.py │ ├── matcher.py │ ├── poolers.py │ ├── registry.py │ ├── roi_heads │ │ ├── __init__.py │ │ ├── attribute_head │ │ │ ├── __init__.py │ │ │ ├── attribute_head.py │ │ │ ├── loss.py │ │ │ ├── roi_attribute_feature_extractors.py │ │ │ └── roi_attribute_predictors.py │ │ ├── box_head │ │ │ ├── __init__.py │ │ │ ├── box_head.py │ │ │ ├── inference.py │ │ │ ├── loss.py │ │ │ ├── roi_box_feature_extractors.py │ │ │ ├── roi_box_predictors.py │ │ │ └── sampling.py │ │ ├── keypoint_head │ │ │ ├── __init__.py │ │ │ ├── inference.py │ │ │ ├── keypoint_head.py │ │ │ ├── loss.py │ │ │ ├── roi_keypoint_feature_extractors.py │ │ │ └── roi_keypoint_predictors.py │ │ ├── mask_head │ │ │ ├── __init__.py │ │ │ ├── inference.py │ │ │ ├── loss.py │ │ │ ├── mask_head.py │ │ │ ├── roi_mask_feature_extractors.py │ │ │ └── roi_mask_predictors.py │ │ ├── relation_head │ │ │ ├── __init__.py │ │ │ ├── inference.py │ │ │ ├── loss.py │ │ │ ├── model_motifs.py │ │ │ ├── model_motifs_with_attribute.py │ │ │ ├── model_msg_passing.py │ │ │ ├── model_transformer.py │ │ │ ├── model_vctree.py │ │ │ ├── model_vtranse.py │ │ │ ├── relation_head.py │ │ │ ├── roi_relation_feature_extractors.py │ │ │ ├── roi_relation_predictors.py │ │ │ ├── sampling.py │ │ │ ├── utils_motifs.py │ │ │ ├── utils_relation.py │ │ │ ├── utils_treelstm.py │ │ │ └── utils_vctree.py │ │ └── roi_heads.py │ ├── rpn │ │ ├── __init__.py │ │ ├── anchor_generator.py │ │ ├── inference.py │ │ ├── loss.py │ │ ├── retinanet │ │ │ ├── __init__.py │ │ │ ├── inference.py │ │ │ ├── loss.py │ │ │ └── retinanet.py │ │ ├── rpn.py │ │ └── utils.py │ └── utils.py ├── solver │ ├── __init__.py │ ├── build.py │ └── lr_scheduler.py ├── structures │ ├── __init__.py │ ├── bounding_box.py │ ├── boxlist_ops.py │ ├── image_list.py │ ├── keypoint.py │ └── segmentation_mask.py └── utils │ ├── README.md │ ├── __init__.py │ ├── c2_model_loading.py │ ├── checkpoint.py │ ├── collect_env.py │ ├── comm.py │ ├── cv2_util.py │ ├── env.py │ ├── imports.py │ ├── logger.py │ ├── metric_logger.py │ ├── miscellaneous.py │ ├── model_serialization.py │ ├── model_zoo.py │ ├── registry.py │ └── timer.py ├── requirements.txt ├── setup.py └── tools ├── __init__.py ├── detector_pretest_net.py ├── detector_pretrain_net.py ├── image_retrieval_main.py ├── relation_test_net.py └── relation_train_net.py /DATASET.md: -------------------------------------------------------------------------------- 1 | ## DATASET 2 | The following is adapted from [Danfei Xu](https://github.com/danfeiX/scene-graph-TF-release/blob/master/data_tools/README.md) and [neural-motifs](https://github.com/rowanz/neural-motifs). 3 | 4 | ### Download: 5 | 1. Download the VG images [part1](https://cs.stanford.edu/people/rak248/VG_100K_2/images.zip) [part2](https://cs.stanford.edu/people/rak248/VG_100K_2/images2.zip). Extract these images to the file `datasets/vg/VG_100K`. 6 | 2. Download the [scene graphs](https://onedrive.live.com/embed?cid=22376FFAD72C4B64&resid=22376FFAD72C4B64%21779871&authkey=AA33n7BRpB1xa3I) and extract them to `datasets/vg/VG-SGG-with-attri.h5`. 7 | 8 | Please check [Scene Graph Benchmark/DATASET.md](https://github.com/KaihuaTang/Scene-Graph-Benchmark.pytorch/blob/master/DATASET.md) for more details. -------------------------------------------------------------------------------- /INSTALL.md: -------------------------------------------------------------------------------- 1 | ## Installation 2 | 3 | Most of the requirements of this projects are exactly the same as [Scene Graph Benchmark](https://github.com/KaihuaTang/Scene-Graph-Benchmark.pytorch) and [Neural-Backed Decision Trees](https://github.com/alvinwan/neural-backed-decision-trees). If you have any problem of your environment, you should check the [issues page of SG Benchmark](https://github.com/KaihuaTang/Scene-Graph-Benchmark.pytorch/issues) and [issues page of NBDT](https://github.com/alvinwan/neural-backed-decision-trees/issues) first. 4 | 5 | ### Requirements: 6 | - PyTorch >= 1.2 7 | - torchvision >= 0.4 8 | - cocoapi 9 | - yacs 10 | - matplotlib 11 | - GCC >= 4.9 12 | - OpenCV 13 | 14 | 15 | ### Step-by-step installation 16 | 17 | ```bash 18 | 19 | conda create --name scene_graph_benchmark 20 | conda activate scene_graph_benchmark 21 | 22 | # this installs the right pip and dependencies for the fresh python 23 | conda install ipython 24 | conda install scipy 25 | conda install h5py 26 | 27 | # scene_graph_benchmark and coco api dependencies 28 | pip install ninja yacs cython matplotlib tqdm opencv-python overrides 29 | 30 | # follow PyTorch installation in https://pytorch.org/get-started/locally/ 31 | # we give the instructions for CUDA 10.0 32 | conda install pytorch==1.4.0 torchvision==0.5.0 cudatoolkit=10.1 -c pytorch 33 | 34 | export INSTALL_DIR=$PWD 35 | 36 | # install pycocotools 37 | cd $INSTALL_DIR 38 | git clone https://github.com/cocodataset/cocoapi.git 39 | cd cocoapi/PythonAPI 40 | python setup.py build_ext install 41 | 42 | # install apex 43 | cd $INSTALL_DIR 44 | git clone https://github.com/NVIDIA/apex.git 45 | cd apex 46 | python setup.py install --cuda_ext --cpp_ext 47 | 48 | # install PyTorch Detection 49 | cd $INSTALL_DIR 50 | git clone https://github.com/CYVincent/Scene-Graph-Transformer-CogTree.git 51 | cd Scene-Graph-Transformer-CogTree/sg_benchmark 52 | 53 | # the following will install the lib with 54 | # symbolic links, so that you can modify 55 | # the files if you want and won't need to 56 | # re-build it 57 | python setup.py build develop 58 | 59 | cd $INSTALL_DIR 60 | cd Scene-Graph-Transformer-CogTree/nbdt 61 | python setup.py develop 62 | 63 | unset INSTALL_DIR 64 | 65 | -------------------------------------------------------------------------------- /nbdt/nbdt/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CYVincent/Scene-Graph-Transformer-CogTree/955d76403d6b17e4b35cb09b4669d8b3990a94ee/nbdt/nbdt/__init__.py -------------------------------------------------------------------------------- /nbdt/nbdt/bin/nbdt: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """Run evaluation on a single image, using an NBDT""" 3 | 4 | from nbdt.model import SoftNBDT, HardNBDT 5 | from pytorchcv.models.wrn_cifar import wrn28_10_cifar10 6 | from torchvision import transforms 7 | from nbdt.utils import DATASET_TO_CLASSES, load_image_from_path, maybe_install_wordnet 8 | import sys 9 | 10 | maybe_install_wordnet() 11 | 12 | assert len(sys.argv) > 1, "Need to pass image URL or image path as argument" 13 | 14 | # load pretrained NBDT 15 | model = wrn28_10_cifar10() 16 | model = HardNBDT( 17 | pretrained=True, 18 | dataset='CIFAR10', 19 | arch='wrn28_10_cifar10', 20 | model=model) 21 | 22 | # load + transform image 23 | im = load_image_from_path(sys.argv[1]) 24 | transform = transforms.Compose([ 25 | transforms.Resize(32), 26 | transforms.CenterCrop(32), 27 | transforms.ToTensor(), 28 | transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), 29 | ]) 30 | x = transform(im)[None] 31 | 32 | # run inference 33 | outputs, decisions = model.forward_with_decisions(x) # use `model(x)` to obtain just logits 34 | _, predicted = outputs.max(1) 35 | cls = DATASET_TO_CLASSES['CIFAR10'][predicted[0]] 36 | print('Prediction:', cls, '// Decisions:', ', '.join([ 37 | '{} ({:.2f}%)'.format(info['name'], info['prob'] * 100) for info in decisions[0] 38 | ][1:])) # [1:] to skip the root 39 | -------------------------------------------------------------------------------- /nbdt/nbdt/bin/nbdt-hierarchy: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from nbdt.hierarchy import generate_hierarchy, test_hierarchy, generate_hierarchy_vis 4 | from nbdt.graph import get_parser 5 | from nbdt.utils import maybe_install_wordnet 6 | 7 | 8 | def main(): 9 | maybe_install_wordnet() 10 | 11 | parser = get_parser() 12 | args = parser.parse_args() 13 | 14 | generate_hierarchy(**vars(args)) 15 | test_hierarchy(args) 16 | generate_hierarchy_vis(args) 17 | 18 | 19 | if __name__ == '__main__': 20 | main() 21 | -------------------------------------------------------------------------------- /nbdt/nbdt/bin/nbdt-wnids: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """Generates wnids using class names for torchvision dataset""" 3 | 4 | import argparse 5 | import torchvision 6 | from nbdt import data 7 | from nltk.corpus import wordnet as wn 8 | from nbdt.graph import synset_to_wnid, write_wnids, FakeSynset 9 | from pathlib import Path 10 | from nbdt.utils import Colors, generate_kwargs, maybe_install_wordnet 11 | import os 12 | 13 | maybe_install_wordnet() 14 | 15 | datasets = ('CIFAR10', 'CIFAR100', 'VG150', 'VG150_head', 'VG150_head1', 'VG150_head2', 'VG150_head3') + data.imagenet.names + data.custom.names 16 | 17 | 18 | parser = argparse.ArgumentParser() 19 | parser.add_argument('--dataset', choices=datasets, default='CIFAR10') 20 | parser.add_argument('--root', default='./nbdt/wnids') 21 | data.custom.add_arguments(parser) 22 | args = parser.parse_args() 23 | 24 | # dataset = getattr(data, args.dataset) 25 | # dataset_kwargs = generate_kwargs(args, dataset, 26 | # name=f'Dataset {args.dataset}', 27 | # keys=data.custom.keys, 28 | # globals=globals()) 29 | # dataset = dataset(**dataset_kwargs, root='./data', download=True) 30 | 31 | # classes = ['above', 'across', 'against', 'along', 'and', 'at', 'attached to', 'behind', 'belonging to', 'between', 'carrying', 'covered in', 'covering', 'eating', 'flying in', 'for', 'from', 'growing on', 'hanging from', 'has', 'holding', 'in', 'in front of', 'laying on', 'looking at', 'lying on', 'made of', 'mounted on', 'near', 'of', 'on', 'on back of', 'over', 'painted on', 'parked on', 'part of', 'playing', 'riding', 'says', 'sitting on', 'standing on', 'to', 'under', 'using', 'walking in', 'walking on', 'watching', 'wearing', 'wears', 'with'] 32 | # classes = ['on','has','wearing','of','in','near','with','holding','behind','above','sitting on','wears','riding','under','in front of'] 33 | classes = ['behind', 'has', 'holding', 'in', 'near','of','on', 'wearing', 'with'] 34 | 35 | 36 | path = Path(os.path.join(args.root, f'{args.dataset}.txt')) 37 | os.makedirs(path.parent, exist_ok=True) 38 | failures = [] 39 | 40 | hardcoded_mapping = { 41 | 'aquarium_fish': wn.synsets('fingerling', pos=wn.NOUN)[0], 42 | 'beaver': wn.synsets('beaver', pos=wn.NOUN)[-1], 43 | 'castle': wn.synsets('castle', pos=wn.NOUN)[1], 44 | 'flatfish': wn.synsets('flatfish', pos=wn.NOUN)[1], 45 | 'leopard': wn.synsets('leopard', pos=wn.NOUN)[1], 46 | 'lobster': wn.synsets('lobster', pos=wn.NOUN)[1], 47 | 'maple_tree': wn.synsets('maple', pos=wn.NOUN)[1], 48 | 'otter': wn.synsets('otter', pos=wn.NOUN)[1], 49 | 'plate': wn.synsets('plate', pos=wn.NOUN)[3], 50 | 'raccoon': wn.synsets('raccoon', pos=wn.NOUN)[1], 51 | 'ray': wn.synsets('ray', pos=wn.NOUN)[-1], 52 | 'seal': wn.synsets('seal', pos=wn.NOUN)[-1], 53 | 'shrew': wn.synsets('shrew', pos=wn.NOUN)[1], 54 | 'skunk': wn.synsets('skunk', pos=wn.NOUN)[1], 55 | 'tiger': wn.synsets('tiger', pos=wn.NOUN)[1], 56 | 'table': wn.synsets('table', pos=wn.NOUN)[1], 57 | 'turtle': wn.synsets('turtle', pos=wn.NOUN)[1], 58 | 'whale': wn.synsets('whale', pos=wn.NOUN)[1], 59 | } 60 | 61 | wnids = [] 62 | for i, cls in enumerate(classes): 63 | if cls in hardcoded_mapping: 64 | synset = hardcoded_mapping[cls] 65 | else: 66 | synsets = wn.synsets(cls, pos=wn.NOUN) 67 | if not synsets: 68 | Colors.red(f'==> Failed to find synset for {cls}. Using fake synset...') 69 | failures.append(cls) 70 | synsets = [FakeSynset.create_from_offset(i)] 71 | synset = synsets[0] 72 | wnid = synset_to_wnid(synset) 73 | print(f'{wnid}: ({cls}) {synset.definition()}') 74 | wnids.append(wnid) 75 | 76 | write_wnids(wnids, path) 77 | 78 | if failures: 79 | Colors.red(f'==> Warning: failed to find wordnet IDs for {failures}') 80 | Colors.green(f'==> Wrote to {path}') 81 | -------------------------------------------------------------------------------- /nbdt/nbdt/data/__init__.py: -------------------------------------------------------------------------------- 1 | from .custom import * 2 | from .imagenet import * 3 | from torchvision.datasets import * 4 | -------------------------------------------------------------------------------- /nbdt/nbdt/models/__init__.py: -------------------------------------------------------------------------------- 1 | from .resnet import * 2 | from .wideresnet import * 3 | from pytorchcv.models.efficientnet import * 4 | from torchvision.models import * 5 | 6 | 7 | def get_model_choices(): 8 | from types import ModuleType 9 | 10 | for key, value in globals().items(): 11 | if not key.startswith('__') and not isinstance(value, ModuleType) and callable(value): 12 | yield key 13 | -------------------------------------------------------------------------------- /nbdt/nbdt/models/utils.py: -------------------------------------------------------------------------------- 1 | from torch.hub import load_state_dict_from_url 2 | from pathlib import Path 3 | import torch 4 | 5 | 6 | def get_pretrained_model( 7 | arch, dataset, model, model_urls, 8 | pretrained=False, 9 | progress=True, 10 | root='.cache/torch/checkpoints'): 11 | if pretrained: 12 | state_dict = load_state_dict_from_key( 13 | [(arch, dataset)], model_urls, pretrained, progress, root, 14 | device=get_model_device(model)) 15 | state_dict = coerce_state_dict(state_dict, model.state_dict()) 16 | model.load_state_dict(state_dict) 17 | return model 18 | 19 | def coerce_state_dict(state_dict, reference_state_dict): 20 | if 'net' in state_dict: 21 | state_dict = state_dict['net'] 22 | has_reference_module = list(reference_state_dict)[0].startswith('module.') 23 | has_module = list(state_dict)[0].startswith('module.') 24 | if not has_reference_module and has_module: 25 | state_dict = { 26 | key.replace('module.', '', 1): value 27 | for key, value in state_dict.items() 28 | } 29 | elif has_reference_module and not has_module: 30 | state_dict = { 31 | 'module.' + key: value 32 | for key, value in state_dict.items() 33 | } 34 | return state_dict 35 | 36 | def get_model_device(model): 37 | return next(model.parameters()).device 38 | 39 | def load_state_dict_from_key( 40 | keys, model_urls, 41 | pretrained=False, 42 | progress=True, 43 | root='.cache/torch/checkpoints', 44 | device='cpu'): 45 | valid_keys = [key for key in keys if key in model_urls] 46 | if not valid_keys: 47 | raise UserWarning( 48 | f'None of the keys {keys} correspond to a pretrained model.' 49 | ) 50 | return load_state_dict_from_url( 51 | model_urls[valid_keys[-1]], 52 | Path.home() / root, 53 | progress=progress, 54 | check_hash=False, 55 | map_location=torch.device(device)) 56 | -------------------------------------------------------------------------------- /nbdt/nbdt/models/wideresnet.py: -------------------------------------------------------------------------------- 1 | from pytorchcv.models.wrn_cifar import wrn28_10_cifar10, wrn28_10_cifar100, get_wrn_cifar 2 | from nbdt.models.utils import get_pretrained_model 3 | import torch.nn as nn 4 | 5 | 6 | __all__ = ('wrn28_10', 'wrn28_10_cifar10', 'wrn28_10_cifar100') 7 | 8 | 9 | model_urls = { 10 | ('wrn28_10', 'TinyImagenet200'): 'https://github.com/alvinwan/neural-backed-decision-trees/releases/download/0.0.1/ckpt-TinyImagenet200-wrn28_10.pth' 11 | } 12 | 13 | 14 | def _wrn(arch, model, pretrained=False, progress=True, dataset='CIFAR10'): 15 | model = get_pretrained_model(arch, dataset, model, model_urls, 16 | pretrained=pretrained, progress=progress) 17 | return model 18 | 19 | 20 | def wrn28_10(pretrained=False, progress=True, dataset='CIFAR10', **kwargs): 21 | """Replace `final_pool` (8x8 average pooling) with a global average pooling. 22 | 23 | If this gets crappy accuracy for TinyImagenet200, it's probably because the 24 | final pooled feature map is 16x16 instead of 8x8. So needs another stride 2 25 | stage, technically. 26 | """ 27 | model = get_wrn_cifar(blocks=28, width_factor=10, model_name="wrn28_10", **kwargs) 28 | model.features.final_pool = nn.AdaptiveAvgPool2d((1, 1)) 29 | model = _wrn('wrn28_10', model, pretrained=pretrained, progress=progress, dataset=dataset) 30 | return model 31 | -------------------------------------------------------------------------------- /nbdt/nbdt/wnids/VG150.txt: -------------------------------------------------------------------------------- 1 | n06392660 2 | f00000001 3 | f00000002 4 | f00000003 5 | f00000004 6 | n14629561 7 | f00000006 8 | n05559256 9 | f00000008 10 | f00000009 11 | f00000010 12 | f00000011 13 | n09257949 14 | n00838367 15 | f00000014 16 | f00000015 17 | f00000016 18 | f00000017 19 | f00000018 20 | n13888783 21 | n00810598 22 | n13649791 23 | f00000022 24 | f00000023 25 | f00000024 26 | f00000025 27 | f00000026 28 | f00000027 29 | f00000028 30 | f00000029 31 | f00000030 32 | f00000031 33 | n15258694 34 | f00000033 35 | f00000034 36 | f00000035 37 | n00101191 38 | n00450335 39 | n14485526 40 | f00000039 41 | f00000040 42 | f00000041 43 | f00000042 44 | n00418903 45 | f00000044 46 | f00000045 47 | n00879759 48 | n13475538 49 | n14562683 50 | f00000049 -------------------------------------------------------------------------------- /nbdt/requirements.txt: -------------------------------------------------------------------------------- 1 | pytorchcv 2 | torch 3 | torchvision 4 | nltk 5 | scikit-learn 6 | networkx 7 | pytest 8 | -------------------------------------------------------------------------------- /nbdt/setup.py: -------------------------------------------------------------------------------- 1 | import setuptools 2 | 3 | VERSION = '0.0.4' 4 | 5 | with open("requirements.txt", "r") as f: 6 | install_requires = f.readlines() 7 | 8 | 9 | # with open("README.md", "r") as fh: 10 | # long_description = fh.read() 11 | 12 | 13 | setuptools.setup( 14 | name="nbdt", 15 | version=VERSION, 16 | author="Alvin Wan", # TODO: proper way to list all paper authors? 17 | author_email="hi@alvinwan.com", 18 | description="", 19 | long_description_content_type="text/markdown", 20 | url="https://github.com/alvinwan/neural-backed-decision-trees", 21 | packages=setuptools.find_packages(), 22 | install_requires=install_requires, 23 | download_url='https://github.com/alvinwan/neural-backed-decision-trees/archive/%s.zip' % VERSION, 24 | scripts=['nbdt/bin/nbdt-hierarchy', 'nbdt/bin/nbdt-wnids', 'nbdt/bin/nbdt'], 25 | classifiers=[ 26 | "Intended Audience :: Developers", 27 | "Programming Language :: Python :: 3", 28 | "License :: OSI Approved :: MIT License", 29 | "Operating System :: OS Independent", 30 | ], 31 | python_requires='>=3.5', 32 | include_package_data=True 33 | ) 34 | -------------------------------------------------------------------------------- /sg-benchmark/maskrcnn_benchmark/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | -------------------------------------------------------------------------------- /sg-benchmark/maskrcnn_benchmark/config/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from .defaults import _C as cfg 3 | -------------------------------------------------------------------------------- /sg-benchmark/maskrcnn_benchmark/csrc/ROIAlign.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #pragma once 3 | 4 | #include "cpu/vision.h" 5 | 6 | #ifdef WITH_CUDA 7 | #include "cuda/vision.h" 8 | #endif 9 | 10 | // Interface for Python 11 | at::Tensor ROIAlign_forward(const at::Tensor& input, 12 | const at::Tensor& rois, 13 | const float spatial_scale, 14 | const int pooled_height, 15 | const int pooled_width, 16 | const int sampling_ratio) { 17 | if (input.type().is_cuda()) { 18 | #ifdef WITH_CUDA 19 | return ROIAlign_forward_cuda(input, rois, spatial_scale, pooled_height, pooled_width, sampling_ratio); 20 | #else 21 | AT_ERROR("Not compiled with GPU support"); 22 | #endif 23 | } 24 | return ROIAlign_forward_cpu(input, rois, spatial_scale, pooled_height, pooled_width, sampling_ratio); 25 | } 26 | 27 | at::Tensor ROIAlign_backward(const at::Tensor& grad, 28 | const at::Tensor& rois, 29 | const float spatial_scale, 30 | const int pooled_height, 31 | const int pooled_width, 32 | const int batch_size, 33 | const int channels, 34 | const int height, 35 | const int width, 36 | const int sampling_ratio) { 37 | if (grad.type().is_cuda()) { 38 | #ifdef WITH_CUDA 39 | return ROIAlign_backward_cuda(grad, rois, spatial_scale, pooled_height, pooled_width, batch_size, channels, height, width, sampling_ratio); 40 | #else 41 | AT_ERROR("Not compiled with GPU support"); 42 | #endif 43 | } 44 | AT_ERROR("Not implemented on the CPU"); 45 | } 46 | 47 | -------------------------------------------------------------------------------- /sg-benchmark/maskrcnn_benchmark/csrc/ROIPool.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #pragma once 3 | 4 | #include "cpu/vision.h" 5 | 6 | #ifdef WITH_CUDA 7 | #include "cuda/vision.h" 8 | #endif 9 | 10 | 11 | std::tuple ROIPool_forward(const at::Tensor& input, 12 | const at::Tensor& rois, 13 | const float spatial_scale, 14 | const int pooled_height, 15 | const int pooled_width) { 16 | if (input.type().is_cuda()) { 17 | #ifdef WITH_CUDA 18 | return ROIPool_forward_cuda(input, rois, spatial_scale, pooled_height, pooled_width); 19 | #else 20 | AT_ERROR("Not compiled with GPU support"); 21 | #endif 22 | } 23 | AT_ERROR("Not implemented on the CPU"); 24 | } 25 | 26 | at::Tensor ROIPool_backward(const at::Tensor& grad, 27 | const at::Tensor& input, 28 | const at::Tensor& rois, 29 | const at::Tensor& argmax, 30 | const float spatial_scale, 31 | const int pooled_height, 32 | const int pooled_width, 33 | const int batch_size, 34 | const int channels, 35 | const int height, 36 | const int width) { 37 | if (grad.type().is_cuda()) { 38 | #ifdef WITH_CUDA 39 | return ROIPool_backward_cuda(grad, input, rois, argmax, spatial_scale, pooled_height, pooled_width, batch_size, channels, height, width); 40 | #else 41 | AT_ERROR("Not compiled with GPU support"); 42 | #endif 43 | } 44 | AT_ERROR("Not implemented on the CPU"); 45 | } 46 | 47 | 48 | 49 | -------------------------------------------------------------------------------- /sg-benchmark/maskrcnn_benchmark/csrc/SigmoidFocalLoss.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "cpu/vision.h" 4 | 5 | #ifdef WITH_CUDA 6 | #include "cuda/vision.h" 7 | #endif 8 | 9 | // Interface for Python 10 | at::Tensor SigmoidFocalLoss_forward( 11 | const at::Tensor& logits, 12 | const at::Tensor& targets, 13 | const int num_classes, 14 | const float gamma, 15 | const float alpha) { 16 | if (logits.type().is_cuda()) { 17 | #ifdef WITH_CUDA 18 | return SigmoidFocalLoss_forward_cuda(logits, targets, num_classes, gamma, alpha); 19 | #else 20 | AT_ERROR("Not compiled with GPU support"); 21 | #endif 22 | } 23 | AT_ERROR("Not implemented on the CPU"); 24 | } 25 | 26 | at::Tensor SigmoidFocalLoss_backward( 27 | const at::Tensor& logits, 28 | const at::Tensor& targets, 29 | const at::Tensor& d_losses, 30 | const int num_classes, 31 | const float gamma, 32 | const float alpha) { 33 | if (logits.type().is_cuda()) { 34 | #ifdef WITH_CUDA 35 | return SigmoidFocalLoss_backward_cuda(logits, targets, d_losses, num_classes, gamma, alpha); 36 | #else 37 | AT_ERROR("Not compiled with GPU support"); 38 | #endif 39 | } 40 | AT_ERROR("Not implemented on the CPU"); 41 | } 42 | -------------------------------------------------------------------------------- /sg-benchmark/maskrcnn_benchmark/csrc/cpu/nms_cpu.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #include "cpu/vision.h" 3 | 4 | 5 | template 6 | at::Tensor nms_cpu_kernel(const at::Tensor& dets, 7 | const at::Tensor& scores, 8 | const float threshold) { 9 | AT_ASSERTM(!dets.type().is_cuda(), "dets must be a CPU tensor"); 10 | AT_ASSERTM(!scores.type().is_cuda(), "scores must be a CPU tensor"); 11 | AT_ASSERTM(dets.type() == scores.type(), "dets should have the same type as scores"); 12 | 13 | if (dets.numel() == 0) { 14 | return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU)); 15 | } 16 | 17 | auto x1_t = dets.select(1, 0).contiguous(); 18 | auto y1_t = dets.select(1, 1).contiguous(); 19 | auto x2_t = dets.select(1, 2).contiguous(); 20 | auto y2_t = dets.select(1, 3).contiguous(); 21 | 22 | at::Tensor areas_t = (x2_t - x1_t + 1) * (y2_t - y1_t + 1); 23 | 24 | auto order_t = std::get<1>(scores.sort(0, /* descending=*/true)); 25 | 26 | auto ndets = dets.size(0); 27 | at::Tensor suppressed_t = at::zeros({ndets}, dets.options().dtype(at::kByte).device(at::kCPU)); 28 | 29 | auto suppressed = suppressed_t.data(); 30 | auto order = order_t.data(); 31 | auto x1 = x1_t.data(); 32 | auto y1 = y1_t.data(); 33 | auto x2 = x2_t.data(); 34 | auto y2 = y2_t.data(); 35 | auto areas = areas_t.data(); 36 | 37 | for (int64_t _i = 0; _i < ndets; _i++) { 38 | auto i = order[_i]; 39 | if (suppressed[i] == 1) 40 | continue; 41 | auto ix1 = x1[i]; 42 | auto iy1 = y1[i]; 43 | auto ix2 = x2[i]; 44 | auto iy2 = y2[i]; 45 | auto iarea = areas[i]; 46 | 47 | for (int64_t _j = _i + 1; _j < ndets; _j++) { 48 | auto j = order[_j]; 49 | if (suppressed[j] == 1) 50 | continue; 51 | auto xx1 = std::max(ix1, x1[j]); 52 | auto yy1 = std::max(iy1, y1[j]); 53 | auto xx2 = std::min(ix2, x2[j]); 54 | auto yy2 = std::min(iy2, y2[j]); 55 | 56 | auto w = std::max(static_cast(0), xx2 - xx1 + 1); 57 | auto h = std::max(static_cast(0), yy2 - yy1 + 1); 58 | auto inter = w * h; 59 | auto ovr = inter / (iarea + areas[j] - inter); 60 | if (ovr >= threshold) 61 | suppressed[j] = 1; 62 | } 63 | } 64 | return at::nonzero(suppressed_t == 0).squeeze(1); 65 | } 66 | 67 | at::Tensor nms_cpu(const at::Tensor& dets, 68 | const at::Tensor& scores, 69 | const float threshold) { 70 | at::Tensor result; 71 | AT_DISPATCH_FLOATING_TYPES(dets.type(), "nms", [&] { 72 | result = nms_cpu_kernel(dets, scores, threshold); 73 | }); 74 | return result; 75 | } 76 | -------------------------------------------------------------------------------- /sg-benchmark/maskrcnn_benchmark/csrc/cpu/vision.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #pragma once 3 | #include 4 | 5 | 6 | at::Tensor ROIAlign_forward_cpu(const at::Tensor& input, 7 | const at::Tensor& rois, 8 | const float spatial_scale, 9 | const int pooled_height, 10 | const int pooled_width, 11 | const int sampling_ratio); 12 | 13 | 14 | at::Tensor nms_cpu(const at::Tensor& dets, 15 | const at::Tensor& scores, 16 | const float threshold); 17 | -------------------------------------------------------------------------------- /sg-benchmark/maskrcnn_benchmark/csrc/cuda/deform_pool_cuda.cu: -------------------------------------------------------------------------------- 1 | // modify from 2 | // https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/blob/mmdetection/mmdet/ops/dcn/src/modulated_dcn_cuda.c 3 | 4 | // based on 5 | // author: Charles Shang 6 | // https://github.com/torch/cunn/blob/master/lib/THCUNN/generic/SpatialConvolutionMM.cu 7 | 8 | #include 9 | #include 10 | 11 | #include 12 | #include 13 | 14 | #include 15 | #include 16 | #include 17 | 18 | 19 | void DeformablePSROIPoolForward( 20 | const at::Tensor data, const at::Tensor bbox, const at::Tensor trans, 21 | at::Tensor out, at::Tensor top_count, const int batch, const int channels, 22 | const int height, const int width, const int num_bbox, 23 | const int channels_trans, const int no_trans, const float spatial_scale, 24 | const int output_dim, const int group_size, const int pooled_size, 25 | const int part_size, const int sample_per_part, const float trans_std); 26 | 27 | void DeformablePSROIPoolBackwardAcc( 28 | const at::Tensor out_grad, const at::Tensor data, const at::Tensor bbox, 29 | const at::Tensor trans, const at::Tensor top_count, at::Tensor in_grad, 30 | at::Tensor trans_grad, const int batch, const int channels, 31 | const int height, const int width, const int num_bbox, 32 | const int channels_trans, const int no_trans, const float spatial_scale, 33 | const int output_dim, const int group_size, const int pooled_size, 34 | const int part_size, const int sample_per_part, const float trans_std); 35 | 36 | void deform_psroi_pooling_cuda_forward( 37 | at::Tensor input, at::Tensor bbox, at::Tensor trans, at::Tensor out, 38 | at::Tensor top_count, const int no_trans, const float spatial_scale, 39 | const int output_dim, const int group_size, const int pooled_size, 40 | const int part_size, const int sample_per_part, const float trans_std) 41 | { 42 | AT_CHECK(input.is_contiguous(), "input tensor has to be contiguous"); 43 | 44 | const int batch = input.size(0); 45 | const int channels = input.size(1); 46 | const int height = input.size(2); 47 | const int width = input.size(3); 48 | const int channels_trans = no_trans ? 2 : trans.size(1); 49 | 50 | const int num_bbox = bbox.size(0); 51 | if (num_bbox != out.size(0)) 52 | AT_ERROR("Output shape and bbox number wont match: (%d vs %d).", 53 | out.size(0), num_bbox); 54 | 55 | DeformablePSROIPoolForward( 56 | input, bbox, trans, out, top_count, batch, channels, height, width, 57 | num_bbox, channels_trans, no_trans, spatial_scale, output_dim, group_size, 58 | pooled_size, part_size, sample_per_part, trans_std); 59 | } 60 | 61 | void deform_psroi_pooling_cuda_backward( 62 | at::Tensor out_grad, at::Tensor input, at::Tensor bbox, at::Tensor trans, 63 | at::Tensor top_count, at::Tensor input_grad, at::Tensor trans_grad, 64 | const int no_trans, const float spatial_scale, const int output_dim, 65 | const int group_size, const int pooled_size, const int part_size, 66 | const int sample_per_part, const float trans_std) 67 | { 68 | AT_CHECK(out_grad.is_contiguous(), "out_grad tensor has to be contiguous"); 69 | AT_CHECK(input.is_contiguous(), "input tensor has to be contiguous"); 70 | 71 | const int batch = input.size(0); 72 | const int channels = input.size(1); 73 | const int height = input.size(2); 74 | const int width = input.size(3); 75 | const int channels_trans = no_trans ? 2 : trans.size(1); 76 | 77 | const int num_bbox = bbox.size(0); 78 | if (num_bbox != out_grad.size(0)) 79 | AT_ERROR("Output shape and bbox number wont match: (%d vs %d).", 80 | out_grad.size(0), num_bbox); 81 | 82 | DeformablePSROIPoolBackwardAcc( 83 | out_grad, input, bbox, trans, top_count, input_grad, trans_grad, batch, 84 | channels, height, width, num_bbox, channels_trans, no_trans, 85 | spatial_scale, output_dim, group_size, pooled_size, part_size, 86 | sample_per_part, trans_std); 87 | } 88 | -------------------------------------------------------------------------------- /sg-benchmark/maskrcnn_benchmark/csrc/deform_pool.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #pragma once 3 | #include "cpu/vision.h" 4 | 5 | #ifdef WITH_CUDA 6 | #include "cuda/vision.h" 7 | #endif 8 | 9 | 10 | // Interface for Python 11 | void deform_psroi_pooling_forward( 12 | at::Tensor input, 13 | at::Tensor bbox, 14 | at::Tensor trans, 15 | at::Tensor out, 16 | at::Tensor top_count, 17 | const int no_trans, 18 | const float spatial_scale, 19 | const int output_dim, 20 | const int group_size, 21 | const int pooled_size, 22 | const int part_size, 23 | const int sample_per_part, 24 | const float trans_std) 25 | { 26 | if (input.type().is_cuda()) { 27 | #ifdef WITH_CUDA 28 | return deform_psroi_pooling_cuda_forward( 29 | input, bbox, trans, out, top_count, 30 | no_trans, spatial_scale, output_dim, group_size, 31 | pooled_size, part_size, sample_per_part, trans_std 32 | ); 33 | #else 34 | AT_ERROR("Not compiled with GPU support"); 35 | #endif 36 | } 37 | AT_ERROR("Not implemented on the CPU"); 38 | } 39 | 40 | 41 | void deform_psroi_pooling_backward( 42 | at::Tensor out_grad, 43 | at::Tensor input, 44 | at::Tensor bbox, 45 | at::Tensor trans, 46 | at::Tensor top_count, 47 | at::Tensor input_grad, 48 | at::Tensor trans_grad, 49 | const int no_trans, 50 | const float spatial_scale, 51 | const int output_dim, 52 | const int group_size, 53 | const int pooled_size, 54 | const int part_size, 55 | const int sample_per_part, 56 | const float trans_std) 57 | { 58 | if (input.type().is_cuda()) { 59 | #ifdef WITH_CUDA 60 | return deform_psroi_pooling_cuda_backward( 61 | out_grad, input, bbox, trans, top_count, input_grad, trans_grad, 62 | no_trans, spatial_scale, output_dim, group_size, pooled_size, 63 | part_size, sample_per_part, trans_std 64 | ); 65 | #else 66 | AT_ERROR("Not compiled with GPU support"); 67 | #endif 68 | } 69 | AT_ERROR("Not implemented on the CPU"); 70 | } 71 | -------------------------------------------------------------------------------- /sg-benchmark/maskrcnn_benchmark/csrc/nms.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #pragma once 3 | #include "cpu/vision.h" 4 | 5 | #ifdef WITH_CUDA 6 | #include "cuda/vision.h" 7 | #endif 8 | 9 | 10 | at::Tensor nms(const at::Tensor& dets, 11 | const at::Tensor& scores, 12 | const float threshold) { 13 | 14 | if (dets.type().is_cuda()) { 15 | #ifdef WITH_CUDA 16 | // TODO raise error if not compiled with CUDA 17 | if (dets.numel() == 0) 18 | return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU)); 19 | auto b = at::cat({dets, scores.unsqueeze(1)}, 1); 20 | return nms_cuda(b, threshold); 21 | #else 22 | AT_ERROR("Not compiled with GPU support"); 23 | #endif 24 | } 25 | 26 | at::Tensor result = nms_cpu(dets, scores, threshold); 27 | return result; 28 | } 29 | -------------------------------------------------------------------------------- /sg-benchmark/maskrcnn_benchmark/csrc/vision.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #include "nms.h" 3 | #include "ROIAlign.h" 4 | #include "ROIPool.h" 5 | #include "SigmoidFocalLoss.h" 6 | #include "deform_conv.h" 7 | #include "deform_pool.h" 8 | 9 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 10 | m.def("nms", &nms, "non-maximum suppression"); 11 | m.def("roi_align_forward", &ROIAlign_forward, "ROIAlign_forward"); 12 | m.def("roi_align_backward", &ROIAlign_backward, "ROIAlign_backward"); 13 | m.def("roi_pool_forward", &ROIPool_forward, "ROIPool_forward"); 14 | m.def("roi_pool_backward", &ROIPool_backward, "ROIPool_backward"); 15 | m.def("sigmoid_focalloss_forward", &SigmoidFocalLoss_forward, "SigmoidFocalLoss_forward"); 16 | m.def("sigmoid_focalloss_backward", &SigmoidFocalLoss_backward, "SigmoidFocalLoss_backward"); 17 | // dcn-v2 18 | m.def("deform_conv_forward", &deform_conv_forward, "deform_conv_forward"); 19 | m.def("deform_conv_backward_input", &deform_conv_backward_input, "deform_conv_backward_input"); 20 | m.def("deform_conv_backward_parameters", &deform_conv_backward_parameters, "deform_conv_backward_parameters"); 21 | m.def("modulated_deform_conv_forward", &modulated_deform_conv_forward, "modulated_deform_conv_forward"); 22 | m.def("modulated_deform_conv_backward", &modulated_deform_conv_backward, "modulated_deform_conv_backward"); 23 | m.def("deform_psroi_pooling_forward", &deform_psroi_pooling_forward, "deform_psroi_pooling_forward"); 24 | m.def("deform_psroi_pooling_backward", &deform_psroi_pooling_backward, "deform_psroi_pooling_backward"); 25 | } -------------------------------------------------------------------------------- /sg-benchmark/maskrcnn_benchmark/data/README.md: -------------------------------------------------------------------------------- 1 | # Setting Up Datasets 2 | This file describes how to perform training on other datasets. 3 | 4 | Only Pascal VOC dataset can be loaded from its original format and be outputted to Pascal style results currently. 5 | 6 | We expect the annotations from other datasets be converted to COCO json format, and 7 | the output will be in COCO-style. (i.e. AP, AP50, AP75, APs, APm, APl for bbox and segm) 8 | 9 | ## Creating Symlinks for PASCAL VOC 10 | 11 | We assume that your symlinked `datasets/voc/VOC` directory has the following structure: 12 | 13 | ``` 14 | VOC 15 | |_ JPEGImages 16 | | |_ .jpg 17 | | |_ ... 18 | | |_ .jpg 19 | |_ Annotations 20 | | |_ pascal_train.json (optional) 21 | | |_ pascal_val.json (optional) 22 | | |_ pascal_test.json (optional) 23 | | |_ .xml 24 | | |_ ... 25 | | |_ .xml 26 | |_ VOCdevkit 27 | ``` 28 | 29 | Create symlinks for `voc/VOC`: 30 | 31 | ``` 32 | cd ~/github/maskrcnn-benchmark 33 | mkdir -p datasets/voc/VOC 34 | ln -s /path/to/VOC /datasets/voc/VOC 35 | ``` 36 | Example configuration files for PASCAL VOC could be found [here](https://github.com/facebookresearch/maskrcnn-benchmark/blob/master/configs/pascal_voc/). 37 | 38 | ### PASCAL VOC Annotations in COCO Format 39 | To output COCO-style evaluation result, PASCAL VOC annotations in COCO json format is required and could be downloaded from [here](https://storage.googleapis.com/coco-dataset/external/PASCAL_VOC.zip) 40 | via http://cocodataset.org/#external. 41 | 42 | ## Creating Symlinks for Cityscapes: 43 | 44 | We assume that your symlinked `datasets/cityscapes` directory has the following structure: 45 | 46 | ``` 47 | cityscapes 48 | |_ images 49 | | |_ .jpg 50 | | |_ ... 51 | | |_ .jpg 52 | |_ annotations 53 | | |_ instanceonly_gtFile_train.json 54 | | |_ ... 55 | |_ raw 56 | |_ gtFine 57 | |_ ... 58 | |_ README.md 59 | ``` 60 | 61 | Create symlinks for `cityscapes`: 62 | 63 | ``` 64 | cd ~/github/maskrcnn-benchmark 65 | mkdir -p datasets/cityscapes 66 | ln -s /path/to/cityscapes datasets/data/cityscapes 67 | ``` 68 | 69 | ### Steps to convert Cityscapes Annotations to COCO Format 70 | 1. Download gtFine_trainvaltest.zip from https://www.cityscapes-dataset.com/downloads/ (login required) 71 | 2. Extract it to /path/to/gtFine_trainvaltest 72 | ``` 73 | cityscapes 74 | |_ gtFine_trainvaltest.zip 75 | |_ gtFine_trainvaltest 76 | |_ gtFine 77 | ``` 78 | 3. Run the below commands to convert the annotations 79 | 80 | ``` 81 | cd ~/github 82 | git clone https://github.com/mcordts/cityscapesScripts.git 83 | cd cityscapesScripts 84 | cp ~/github/maskrcnn-benchmark/tools/cityscapes/instances2dict_with_polygons.py cityscapesscripts/evaluation 85 | python setup.py install 86 | cd ~/github/maskrcnn-benchmark 87 | python tools/cityscapes/convert_cityscapes_to_coco.py --datadir /path/to/cityscapes --outdir /path/to/cityscapes/annotations 88 | ``` 89 | 90 | Example configuration files for Cityscapes could be found [here](https://github.com/facebookresearch/maskrcnn-benchmark/blob/master/configs/cityscapes/). 91 | -------------------------------------------------------------------------------- /sg-benchmark/maskrcnn_benchmark/data/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from .build import make_data_loader, get_dataset_statistics 3 | -------------------------------------------------------------------------------- /sg-benchmark/maskrcnn_benchmark/data/collate_batch.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from maskrcnn_benchmark.structures.image_list import to_image_list 3 | 4 | 5 | class BatchCollator(object): 6 | """ 7 | From a list of samples from the dataset, 8 | returns the batched images and targets. 9 | This should be passed to the DataLoader 10 | """ 11 | 12 | def __init__(self, size_divisible=0): 13 | self.size_divisible = size_divisible 14 | 15 | def __call__(self, batch): 16 | transposed_batch = list(zip(*batch)) 17 | images = to_image_list(transposed_batch[0], self.size_divisible) 18 | targets = transposed_batch[1] 19 | img_ids = transposed_batch[2] 20 | return images, targets, img_ids 21 | 22 | 23 | class BBoxAugCollator(object): 24 | """ 25 | From a list of samples from the dataset, 26 | returns the images and targets. 27 | Images should be converted to batched images in `im_detect_bbox_aug` 28 | """ 29 | 30 | def __call__(self, batch): 31 | return list(zip(*batch)) 32 | 33 | -------------------------------------------------------------------------------- /sg-benchmark/maskrcnn_benchmark/data/datasets/Vrd.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | import torchvision 4 | 5 | from maskrcnn_benchmark.structures.bounding_box import BoxList 6 | from maskrcnn_benchmark.structures.segmentation_mask import SegmentationMask 7 | from maskrcnn_benchmark.structures.keypoint import PersonKeypoints 8 | 9 | 10 | min_keypoints_per_image = 10 11 | 12 | 13 | def _count_visible_keypoints(anno): 14 | return sum(sum(1 for v in ann["keypoints"][2::3] if v > 0) for ann in anno) 15 | 16 | 17 | def _has_only_empty_bbox(anno): 18 | return all(any(o <= 1 for o in obj["bbox"][2:]) for obj in anno) 19 | 20 | 21 | def has_valid_annotation(anno): 22 | # if it's empty, there is no annotation 23 | if len(anno) == 0: 24 | return False 25 | # if all boxes have close to zero area, there is no annotation 26 | if _has_only_empty_bbox(anno): 27 | return False 28 | # keypoints task have a slight different critera for considering 29 | # if an annotation is valid 30 | if "keypoints" not in anno[0]: 31 | return True 32 | # for keypoint detection tasks, only consider valid images those 33 | # containing at least min_keypoints_per_image 34 | if _count_visible_keypoints(anno) >= min_keypoints_per_image: 35 | return True 36 | return False 37 | 38 | 39 | class VrdDataset(torchvision.datasets.coco.CocoDetection): 40 | def __init__( 41 | self, ann_file, root, remove_images_without_annotations, transforms=None 42 | ): 43 | super(VrdDataset, self).__init__(root, ann_file) 44 | # sort indices for reproducible results 45 | self.ids = sorted(self.ids) 46 | 47 | # filter images without detection annotations 48 | if remove_images_without_annotations: 49 | ids = [] 50 | for img_id in self.ids: 51 | ann_ids = self.coco.getAnnIds(imgIds=img_id, iscrowd=None) 52 | anno = self.coco.loadAnns(ann_ids) 53 | if has_valid_annotation(anno): 54 | ids.append(img_id) 55 | self.ids = ids 56 | 57 | self.categories = {cat['id']: cat['name'] for cat in self.coco.cats.values()} 58 | 59 | self.json_category_id_to_contiguous_id = { 60 | v: i + 1 for i, v in enumerate(self.coco.getCatIds()) 61 | } 62 | self.contiguous_category_id_to_json_id = { 63 | v: k for k, v in self.json_category_id_to_contiguous_id.items() 64 | } 65 | self.id_to_img_map = {k: v for k, v in enumerate(self.ids)} 66 | self._transforms = transforms 67 | 68 | def __getitem__(self, idx): 69 | img, anno = super(VrdDataset, self).__getitem__(idx) 70 | 71 | # filter crowd annotations 72 | # TODO might be better to add an extra field 73 | anno = [obj for obj in anno if obj["iscrowd"] == 0] 74 | 75 | boxes = [obj["bbox"] for obj in anno] 76 | boxes = torch.as_tensor(boxes).reshape(-1, 4) # guard against no boxes 77 | target = BoxList(boxes, img.size, mode="xywh").convert("xyxy") 78 | 79 | classes = [obj["category_id"] for obj in anno] 80 | classes = [self.json_category_id_to_contiguous_id[c] for c in classes] 81 | classes = torch.tensor(classes) 82 | target.add_field("labels", classes) 83 | 84 | if anno and "segmentation" in anno[0]: 85 | masks = [obj["segmentation"] for obj in anno] 86 | masks = SegmentationMask(masks, img.size, mode='poly') 87 | target.add_field("masks", masks) 88 | 89 | if anno and "keypoints" in anno[0]: 90 | keypoints = [obj["keypoints"] for obj in anno] 91 | keypoints = PersonKeypoints(keypoints, img.size) 92 | target.add_field("keypoints", keypoints) 93 | 94 | target = target.clip_to_image(remove_empty=True) 95 | 96 | if self._transforms is not None: 97 | img, target = self._transforms(img, target) 98 | 99 | return img, target, idx 100 | 101 | def get_img_info(self, index): 102 | img_id = self.id_to_img_map[index] 103 | img_data = self.coco.imgs[img_id] 104 | return img_data 105 | 106 | if __name__ == "__main__": 107 | image_dir = '/home/data1/yjgroup/cy/SGBenchmark/sg_benchmark/datasets/vrd/images/train_images' 108 | ann_dir = '/home/data1/yjgroup/cy/SGBenchmark/sg_benchmark/datasets/vrd/detections_train.json' 109 | dataset = VrdDataset(ann_dir,image_dir, True) 110 | temp = dataset[0] 111 | print(temp) 112 | -------------------------------------------------------------------------------- /sg-benchmark/maskrcnn_benchmark/data/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from .coco import COCODataset 3 | from .voc import PascalVOCDataset 4 | from .concat_dataset import ConcatDataset 5 | from .visual_genome import VGDataset 6 | 7 | __all__ = ["COCODataset", "ConcatDataset", "PascalVOCDataset", "VGDataset"] 8 | -------------------------------------------------------------------------------- /sg-benchmark/maskrcnn_benchmark/data/datasets/coco.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | import torchvision 4 | 5 | from maskrcnn_benchmark.structures.bounding_box import BoxList 6 | from maskrcnn_benchmark.structures.segmentation_mask import SegmentationMask 7 | from maskrcnn_benchmark.structures.keypoint import PersonKeypoints 8 | 9 | 10 | min_keypoints_per_image = 10 11 | 12 | 13 | def _count_visible_keypoints(anno): 14 | return sum(sum(1 for v in ann["keypoints"][2::3] if v > 0) for ann in anno) 15 | 16 | 17 | def _has_only_empty_bbox(anno): 18 | return all(any(o <= 1 for o in obj["bbox"][2:]) for obj in anno) 19 | 20 | 21 | def has_valid_annotation(anno): 22 | # if it's empty, there is no annotation 23 | if len(anno) == 0: 24 | return False 25 | # if all boxes have close to zero area, there is no annotation 26 | if _has_only_empty_bbox(anno): 27 | return False 28 | # keypoints task have a slight different critera for considering 29 | # if an annotation is valid 30 | if "keypoints" not in anno[0]: 31 | return True 32 | # for keypoint detection tasks, only consider valid images those 33 | # containing at least min_keypoints_per_image 34 | if _count_visible_keypoints(anno) >= min_keypoints_per_image: 35 | return True 36 | return False 37 | 38 | 39 | class COCODataset(torchvision.datasets.coco.CocoDetection): 40 | def __init__( 41 | self, ann_file, root, remove_images_without_annotations, transforms=None 42 | ): 43 | super(COCODataset, self).__init__(root, ann_file) 44 | # sort indices for reproducible results 45 | self.ids = sorted(self.ids) 46 | 47 | # filter images without detection annotations 48 | if remove_images_without_annotations: 49 | ids = [] 50 | for img_id in self.ids: 51 | ann_ids = self.coco.getAnnIds(imgIds=img_id, iscrowd=None) 52 | anno = self.coco.loadAnns(ann_ids) 53 | if has_valid_annotation(anno): 54 | ids.append(img_id) 55 | self.ids = ids 56 | 57 | self.categories = {cat['id']: cat['name'] for cat in self.coco.cats.values()} 58 | 59 | self.json_category_id_to_contiguous_id = { 60 | v: i + 1 for i, v in enumerate(self.coco.getCatIds()) 61 | } 62 | self.contiguous_category_id_to_json_id = { 63 | v: k for k, v in self.json_category_id_to_contiguous_id.items() 64 | } 65 | self.id_to_img_map = {k: v for k, v in enumerate(self.ids)} 66 | self._transforms = transforms 67 | 68 | def __getitem__(self, idx): 69 | img, anno = super(COCODataset, self).__getitem__(idx) 70 | 71 | # filter crowd annotations 72 | # TODO might be better to add an extra field 73 | anno = [obj for obj in anno if obj["iscrowd"] == 0] 74 | 75 | boxes = [obj["bbox"] for obj in anno] 76 | boxes = torch.as_tensor(boxes).reshape(-1, 4) # guard against no boxes 77 | target = BoxList(boxes, img.size, mode="xywh").convert("xyxy") 78 | 79 | classes = [obj["category_id"] for obj in anno] 80 | classes = [self.json_category_id_to_contiguous_id[c] for c in classes] 81 | classes = torch.tensor(classes) 82 | target.add_field("labels", classes) 83 | 84 | if anno and "segmentation" in anno[0]: 85 | masks = [obj["segmentation"] for obj in anno] 86 | masks = SegmentationMask(masks, img.size, mode='poly') 87 | target.add_field("masks", masks) 88 | 89 | if anno and "keypoints" in anno[0]: 90 | keypoints = [obj["keypoints"] for obj in anno] 91 | keypoints = PersonKeypoints(keypoints, img.size) 92 | target.add_field("keypoints", keypoints) 93 | 94 | target = target.clip_to_image(remove_empty=True) 95 | 96 | if self._transforms is not None: 97 | img, target = self._transforms(img, target) 98 | 99 | return img, target, idx 100 | 101 | def get_img_info(self, index): 102 | img_id = self.id_to_img_map[index] 103 | img_data = self.coco.imgs[img_id] 104 | return img_data 105 | -------------------------------------------------------------------------------- /sg-benchmark/maskrcnn_benchmark/data/datasets/concat_dataset.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import bisect 3 | 4 | from torch.utils.data.dataset import ConcatDataset as _ConcatDataset 5 | 6 | 7 | class ConcatDataset(_ConcatDataset): 8 | """ 9 | Same as torch.utils.data.dataset.ConcatDataset, but exposes an extra 10 | method for querying the sizes of the image 11 | """ 12 | 13 | def get_idxs(self, idx): 14 | dataset_idx = bisect.bisect_right(self.cumulative_sizes, idx) 15 | if dataset_idx == 0: 16 | sample_idx = idx 17 | else: 18 | sample_idx = idx - self.cumulative_sizes[dataset_idx - 1] 19 | return dataset_idx, sample_idx 20 | 21 | def get_img_info(self, idx): 22 | dataset_idx, sample_idx = self.get_idxs(idx) 23 | return self.datasets[dataset_idx].get_img_info(sample_idx) 24 | -------------------------------------------------------------------------------- /sg-benchmark/maskrcnn_benchmark/data/datasets/evaluation/__init__.py: -------------------------------------------------------------------------------- 1 | from maskrcnn_benchmark.data import datasets 2 | 3 | from .coco import coco_evaluation 4 | from .voc import voc_evaluation 5 | from .vg import vg_evaluation 6 | 7 | 8 | def evaluate(cfg, dataset, predictions, output_folder, logger, **kwargs): 9 | """evaluate dataset using different methods based on dataset type. 10 | Args: 11 | dataset: Dataset object 12 | predictions(list[BoxList]): each item in the list represents the 13 | prediction results for one image. 14 | output_folder: output folder, to save evaluation files or results. 15 | **kwargs: other args. 16 | Returns: 17 | evaluation result 18 | """ 19 | args = dict( 20 | cfg=cfg, dataset=dataset, predictions=predictions, output_folder=output_folder, logger=logger, **kwargs 21 | ) 22 | if isinstance(dataset, datasets.COCODataset): 23 | return coco_evaluation(**args) 24 | elif isinstance(dataset, datasets.PascalVOCDataset): 25 | return voc_evaluation(**args) 26 | elif isinstance(dataset, datasets.VGDataset): 27 | return vg_evaluation(**args) 28 | else: 29 | dataset_name = dataset.__class__.__name__ 30 | raise NotImplementedError("Unsupported dataset type {}.".format(dataset_name)) 31 | -------------------------------------------------------------------------------- /sg-benchmark/maskrcnn_benchmark/data/datasets/evaluation/coco/__init__.py: -------------------------------------------------------------------------------- 1 | from .coco_eval import do_coco_evaluation 2 | 3 | 4 | def coco_evaluation( 5 | cfg, 6 | dataset, 7 | predictions, 8 | output_folder, 9 | logger, 10 | box_only, 11 | iou_types, 12 | expected_results, 13 | expected_results_sigma_tol, 14 | ): 15 | return do_coco_evaluation( 16 | dataset=dataset, 17 | predictions=predictions, 18 | box_only=box_only, 19 | output_folder=output_folder, 20 | logger=logger, 21 | iou_types=iou_types, 22 | expected_results=expected_results, 23 | expected_results_sigma_tol=expected_results_sigma_tol, 24 | ) 25 | -------------------------------------------------------------------------------- /sg-benchmark/maskrcnn_benchmark/data/datasets/evaluation/vg/__init__.py: -------------------------------------------------------------------------------- 1 | from .vg_eval import do_vg_evaluation 2 | 3 | 4 | def vg_evaluation( 5 | cfg, 6 | dataset, 7 | predictions, 8 | output_folder, 9 | logger, 10 | iou_types, 11 | **_ 12 | ): 13 | return do_vg_evaluation( 14 | cfg=cfg, 15 | dataset=dataset, 16 | predictions=predictions, 17 | output_folder=output_folder, 18 | logger=logger, 19 | iou_types=iou_types, 20 | ) 21 | -------------------------------------------------------------------------------- /sg-benchmark/maskrcnn_benchmark/data/datasets/evaluation/vg/zeroshot_triplet.pytorch: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CYVincent/Scene-Graph-Transformer-CogTree/955d76403d6b17e4b35cb09b4669d8b3990a94ee/sg-benchmark/maskrcnn_benchmark/data/datasets/evaluation/vg/zeroshot_triplet.pytorch -------------------------------------------------------------------------------- /sg-benchmark/maskrcnn_benchmark/data/datasets/evaluation/voc/__init__.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from .voc_eval import do_voc_evaluation 4 | 5 | 6 | def voc_evaluation(cfg, dataset, predictions, output_folder, logger, box_only, **_): 7 | if box_only: 8 | logger.warning("voc evaluation doesn't support box_only, ignored.") 9 | logger.info("performing voc evaluation, ignored iou_types.") 10 | return do_voc_evaluation( 11 | dataset=dataset, 12 | predictions=predictions, 13 | output_folder=output_folder, 14 | logger=logger, 15 | ) 16 | -------------------------------------------------------------------------------- /sg-benchmark/maskrcnn_benchmark/data/datasets/list_dataset.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | """ 3 | Simple dataset class that wraps a list of path names 4 | """ 5 | 6 | from PIL import Image 7 | 8 | from maskrcnn_benchmark.structures.bounding_box import BoxList 9 | 10 | 11 | class ListDataset(object): 12 | def __init__(self, image_lists, transforms=None): 13 | self.image_lists = image_lists 14 | self.transforms = transforms 15 | 16 | def __getitem__(self, item): 17 | img = Image.open(self.image_lists[item]).convert("RGB") 18 | 19 | # dummy target 20 | w, h = img.size 21 | target = BoxList([[0, 0, w, h]], img.size, mode="xyxy") 22 | 23 | if self.transforms is not None: 24 | img, target = self.transforms(img, target) 25 | 26 | return img, target 27 | 28 | def __len__(self): 29 | return len(self.image_lists) 30 | 31 | def get_img_info(self, item): 32 | """ 33 | Return the image dimensions for the image, without 34 | loading and pre-processing it 35 | """ 36 | pass 37 | -------------------------------------------------------------------------------- /sg-benchmark/maskrcnn_benchmark/data/datasets/voc.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import torch 4 | import torch.utils.data 5 | from PIL import Image 6 | import sys 7 | 8 | if sys.version_info[0] == 2: 9 | import xml.etree.cElementTree as ET 10 | else: 11 | import xml.etree.ElementTree as ET 12 | 13 | 14 | from maskrcnn_benchmark.structures.bounding_box import BoxList 15 | 16 | 17 | class PascalVOCDataset(torch.utils.data.Dataset): 18 | 19 | CLASSES = ( 20 | "__background__ ", 21 | "aeroplane", 22 | "bicycle", 23 | "bird", 24 | "boat", 25 | "bottle", 26 | "bus", 27 | "car", 28 | "cat", 29 | "chair", 30 | "cow", 31 | "diningtable", 32 | "dog", 33 | "horse", 34 | "motorbike", 35 | "person", 36 | "pottedplant", 37 | "sheep", 38 | "sofa", 39 | "train", 40 | "tvmonitor", 41 | ) 42 | 43 | def __init__(self, data_dir, split, use_difficult=False, transforms=None): 44 | self.root = data_dir 45 | self.image_set = split 46 | self.keep_difficult = use_difficult 47 | self.transforms = transforms 48 | 49 | self._annopath = os.path.join(self.root, "Annotations", "%s.xml") 50 | self._imgpath = os.path.join(self.root, "JPEGImages", "%s.jpg") 51 | self._imgsetpath = os.path.join(self.root, "ImageSets", "Main", "%s.txt") 52 | 53 | with open(self._imgsetpath % self.image_set) as f: 54 | self.ids = f.readlines() 55 | self.ids = [x.strip("\n") for x in self.ids] 56 | self.id_to_img_map = {k: v for k, v in enumerate(self.ids)} 57 | 58 | cls = PascalVOCDataset.CLASSES 59 | self.class_to_ind = dict(zip(cls, range(len(cls)))) 60 | self.categories = dict(zip(range(len(cls)), cls)) 61 | 62 | def __getitem__(self, index): 63 | img_id = self.ids[index] 64 | img = Image.open(self._imgpath % img_id).convert("RGB") 65 | 66 | target = self.get_groundtruth(index) 67 | target = target.clip_to_image(remove_empty=True) 68 | 69 | if self.transforms is not None: 70 | img, target = self.transforms(img, target) 71 | 72 | return img, target, index 73 | 74 | def __len__(self): 75 | return len(self.ids) 76 | 77 | def get_groundtruth(self, index): 78 | img_id = self.ids[index] 79 | anno = ET.parse(self._annopath % img_id).getroot() 80 | anno = self._preprocess_annotation(anno) 81 | 82 | height, width = anno["im_info"] 83 | target = BoxList(anno["boxes"], (width, height), mode="xyxy") 84 | target.add_field("labels", anno["labels"]) 85 | target.add_field("difficult", anno["difficult"]) 86 | return target 87 | 88 | def _preprocess_annotation(self, target): 89 | boxes = [] 90 | gt_classes = [] 91 | difficult_boxes = [] 92 | TO_REMOVE = 1 93 | 94 | for obj in target.iter("object"): 95 | difficult = int(obj.find("difficult").text) == 1 96 | if not self.keep_difficult and difficult: 97 | continue 98 | name = obj.find("name").text.lower().strip() 99 | bb = obj.find("bndbox") 100 | # Make pixel indexes 0-based 101 | # Refer to "https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/datasets/pascal_voc.py#L208-L211" 102 | box = [ 103 | bb.find("xmin").text, 104 | bb.find("ymin").text, 105 | bb.find("xmax").text, 106 | bb.find("ymax").text, 107 | ] 108 | bndbox = tuple( 109 | map(lambda x: x - TO_REMOVE, list(map(int, box))) 110 | ) 111 | 112 | boxes.append(bndbox) 113 | gt_classes.append(self.class_to_ind[name]) 114 | difficult_boxes.append(difficult) 115 | 116 | size = target.find("size") 117 | im_info = tuple(map(int, (size.find("height").text, size.find("width").text))) 118 | 119 | res = { 120 | "boxes": torch.tensor(boxes, dtype=torch.float32), 121 | "labels": torch.tensor(gt_classes), 122 | "difficult": torch.tensor(difficult_boxes), 123 | "im_info": im_info, 124 | } 125 | return res 126 | 127 | def get_img_info(self, index): 128 | img_id = self.ids[index] 129 | anno = ET.parse(self._annopath % img_id).getroot() 130 | size = anno.find("size") 131 | im_info = tuple(map(int, (size.find("height").text, size.find("width").text))) 132 | return {"height": im_info[0], "width": im_info[1]} 133 | 134 | def map_class_id_to_class_name(self, class_id): 135 | return PascalVOCDataset.CLASSES[class_id] 136 | -------------------------------------------------------------------------------- /sg-benchmark/maskrcnn_benchmark/data/samplers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from .distributed import DistributedSampler 3 | from .grouped_batch_sampler import GroupedBatchSampler 4 | from .iteration_based_batch_sampler import IterationBasedBatchSampler 5 | 6 | __all__ = ["DistributedSampler", "GroupedBatchSampler", "IterationBasedBatchSampler"] 7 | -------------------------------------------------------------------------------- /sg-benchmark/maskrcnn_benchmark/data/samplers/distributed.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | # Code is copy-pasted exactly as in torch.utils.data.distributed. 3 | # FIXME remove this once c10d fixes the bug it has 4 | import math 5 | import torch 6 | import torch.distributed as dist 7 | from torch.utils.data.sampler import Sampler 8 | 9 | 10 | class DistributedSampler(Sampler): 11 | """Sampler that restricts data loading to a subset of the dataset. 12 | It is especially useful in conjunction with 13 | :class:`torch.nn.parallel.DistributedDataParallel`. In such case, each 14 | process can pass a DistributedSampler instance as a DataLoader sampler, 15 | and load a subset of the original dataset that is exclusive to it. 16 | .. note:: 17 | Dataset is assumed to be of constant size. 18 | Arguments: 19 | dataset: Dataset used for sampling. 20 | num_replicas (optional): Number of processes participating in 21 | distributed training. 22 | rank (optional): Rank of the current process within num_replicas. 23 | """ 24 | 25 | def __init__(self, dataset, num_replicas=None, rank=None, shuffle=True): 26 | if num_replicas is None: 27 | if not dist.is_available(): 28 | raise RuntimeError("Requires distributed package to be available") 29 | num_replicas = dist.get_world_size() 30 | if rank is None: 31 | if not dist.is_available(): 32 | raise RuntimeError("Requires distributed package to be available") 33 | rank = dist.get_rank() 34 | self.dataset = dataset 35 | self.num_replicas = num_replicas 36 | self.rank = rank 37 | self.epoch = 0 38 | self.num_samples = int(math.ceil(len(self.dataset) * 1.0 / self.num_replicas)) 39 | self.total_size = self.num_samples * self.num_replicas 40 | self.shuffle = shuffle 41 | 42 | def __iter__(self): 43 | if self.shuffle: 44 | # deterministically shuffle based on epoch 45 | g = torch.Generator() 46 | g.manual_seed(self.epoch) 47 | indices = torch.randperm(len(self.dataset), generator=g).tolist() 48 | else: 49 | indices = torch.arange(len(self.dataset)).tolist() 50 | 51 | # add extra samples to make it evenly divisible 52 | indices += indices[: (self.total_size - len(indices))] 53 | assert len(indices) == self.total_size 54 | 55 | # subsample 56 | offset = self.num_samples * self.rank 57 | indices = indices[offset : offset + self.num_samples] 58 | assert len(indices) == self.num_samples 59 | 60 | return iter(indices) 61 | 62 | def __len__(self): 63 | return self.num_samples 64 | 65 | def set_epoch(self, epoch): 66 | self.epoch = epoch 67 | -------------------------------------------------------------------------------- /sg-benchmark/maskrcnn_benchmark/data/samplers/iteration_based_batch_sampler.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from torch.utils.data.sampler import BatchSampler 3 | 4 | 5 | class IterationBasedBatchSampler(BatchSampler): 6 | """ 7 | Wraps a BatchSampler, resampling from it until 8 | a specified number of iterations have been sampled 9 | """ 10 | 11 | def __init__(self, batch_sampler, num_iterations, start_iter=0): 12 | self.batch_sampler = batch_sampler 13 | self.num_iterations = num_iterations 14 | self.start_iter = start_iter 15 | 16 | def __iter__(self): 17 | iteration = self.start_iter 18 | while iteration <= self.num_iterations: 19 | # if the underlying sampler has a set_epoch method, like 20 | # DistributedSampler, used for making each process see 21 | # a different split of the dataset, then set it 22 | if hasattr(self.batch_sampler.sampler, "set_epoch"): 23 | self.batch_sampler.sampler.set_epoch(iteration) 24 | for batch in self.batch_sampler: 25 | iteration += 1 26 | if iteration > self.num_iterations: 27 | break 28 | yield batch 29 | 30 | def __len__(self): 31 | return self.num_iterations 32 | -------------------------------------------------------------------------------- /sg-benchmark/maskrcnn_benchmark/data/transforms/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from .transforms import Compose 3 | from .transforms import Resize 4 | from .transforms import RandomHorizontalFlip 5 | from .transforms import ToTensor 6 | from .transforms import Normalize 7 | 8 | from .build import build_transforms 9 | -------------------------------------------------------------------------------- /sg-benchmark/maskrcnn_benchmark/data/transforms/build.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from . import transforms as T 3 | 4 | 5 | def build_transforms(cfg, is_train=True): 6 | if is_train: 7 | min_size = cfg.INPUT.MIN_SIZE_TRAIN 8 | max_size = cfg.INPUT.MAX_SIZE_TRAIN 9 | flip_horizontal_prob = 0.5 # cfg.INPUT.FLIP_PROB_TRAIN 10 | flip_vertical_prob = cfg.INPUT.VERTICAL_FLIP_PROB_TRAIN 11 | brightness = cfg.INPUT.BRIGHTNESS 12 | contrast = cfg.INPUT.CONTRAST 13 | saturation = cfg.INPUT.SATURATION 14 | hue = cfg.INPUT.HUE 15 | else: 16 | min_size = cfg.INPUT.MIN_SIZE_TEST 17 | max_size = cfg.INPUT.MAX_SIZE_TEST 18 | flip_horizontal_prob = 0.0 19 | flip_vertical_prob = 0.0 20 | brightness = 0.0 21 | contrast = 0.0 22 | saturation = 0.0 23 | hue = 0.0 24 | 25 | to_bgr255 = cfg.INPUT.TO_BGR255 26 | normalize_transform = T.Normalize( 27 | mean=cfg.INPUT.PIXEL_MEAN, std=cfg.INPUT.PIXEL_STD, to_bgr255=to_bgr255 28 | ) 29 | color_jitter = T.ColorJitter( 30 | brightness=brightness, 31 | contrast=contrast, 32 | saturation=saturation, 33 | hue=hue, 34 | ) 35 | 36 | transform = T.Compose( 37 | [ 38 | color_jitter, 39 | T.Resize(min_size, max_size), 40 | T.RandomHorizontalFlip(flip_horizontal_prob), 41 | T.RandomVerticalFlip(flip_vertical_prob), 42 | T.ToTensor(), 43 | normalize_transform, 44 | ] 45 | ) 46 | return transform 47 | -------------------------------------------------------------------------------- /sg-benchmark/maskrcnn_benchmark/data/transforms/transforms.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import random 3 | 4 | import torch 5 | import torchvision 6 | from torchvision.transforms import functional as F 7 | 8 | 9 | class Compose(object): 10 | def __init__(self, transforms): 11 | self.transforms = transforms 12 | 13 | def __call__(self, image, target): 14 | for t in self.transforms: 15 | image, target = t(image, target) 16 | return image, target 17 | 18 | def __repr__(self): 19 | format_string = self.__class__.__name__ + "(" 20 | for t in self.transforms: 21 | format_string += "\n" 22 | format_string += " {0}".format(t) 23 | format_string += "\n)" 24 | return format_string 25 | 26 | 27 | class Resize(object): 28 | def __init__(self, min_size, max_size): 29 | if not isinstance(min_size, (list, tuple)): 30 | min_size = (min_size,) 31 | self.min_size = min_size 32 | self.max_size = max_size 33 | 34 | # modified from torchvision to add support for max size 35 | def get_size(self, image_size): 36 | w, h = image_size 37 | size = random.choice(self.min_size) 38 | max_size = self.max_size 39 | if max_size is not None: 40 | min_original_size = float(min((w, h))) 41 | max_original_size = float(max((w, h))) 42 | if max_original_size / min_original_size * size > max_size: 43 | size = int(round(max_size * min_original_size / max_original_size)) 44 | 45 | if (w <= h and w == size) or (h <= w and h == size): 46 | return (h, w) 47 | 48 | if w < h: 49 | ow = size 50 | oh = int(size * h / w) 51 | else: 52 | oh = size 53 | ow = int(size * w / h) 54 | 55 | return (oh, ow) 56 | 57 | def __call__(self, image, target=None): 58 | size = self.get_size(image.size) 59 | image = F.resize(image, size) 60 | if target is None: 61 | return image 62 | target = target.resize(image.size) 63 | return image, target 64 | 65 | 66 | class RandomHorizontalFlip(object): 67 | def __init__(self, prob=0.5): 68 | self.prob = prob 69 | 70 | def __call__(self, image, target): 71 | if random.random() < self.prob: 72 | image = F.hflip(image) 73 | target = target.transpose(0) 74 | return image, target 75 | 76 | class RandomVerticalFlip(object): 77 | def __init__(self, prob=0.5): 78 | self.prob = prob 79 | 80 | def __call__(self, image, target): 81 | if random.random() < self.prob: 82 | image = F.vflip(image) 83 | target = target.transpose(1) 84 | return image, target 85 | 86 | class ColorJitter(object): 87 | def __init__(self, 88 | brightness=None, 89 | contrast=None, 90 | saturation=None, 91 | hue=None, 92 | ): 93 | self.color_jitter = torchvision.transforms.ColorJitter( 94 | brightness=brightness, 95 | contrast=contrast, 96 | saturation=saturation, 97 | hue=hue,) 98 | 99 | def __call__(self, image, target): 100 | image = self.color_jitter(image) 101 | return image, target 102 | 103 | 104 | class ToTensor(object): 105 | def __call__(self, image, target): 106 | return F.to_tensor(image), target 107 | 108 | 109 | class Normalize(object): 110 | def __init__(self, mean, std, to_bgr255=True): 111 | self.mean = mean 112 | self.std = std 113 | self.to_bgr255 = to_bgr255 114 | 115 | def __call__(self, image, target=None): 116 | if self.to_bgr255: 117 | image = image[[2, 1, 0]] * 255 118 | image = F.normalize(image, mean=self.mean, std=self.std) 119 | if target is None: 120 | return image 121 | return image, target 122 | -------------------------------------------------------------------------------- /sg-benchmark/maskrcnn_benchmark/engine/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | -------------------------------------------------------------------------------- /sg-benchmark/maskrcnn_benchmark/engine/bbox_aug.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torchvision.transforms as TT 3 | 4 | from maskrcnn_benchmark.config import cfg 5 | from maskrcnn_benchmark.data import transforms as T 6 | from maskrcnn_benchmark.structures.image_list import to_image_list 7 | from maskrcnn_benchmark.structures.bounding_box import BoxList 8 | from maskrcnn_benchmark.modeling.roi_heads.box_head.inference import make_roi_box_post_processor 9 | 10 | 11 | def im_detect_bbox_aug(model, images, device): 12 | # Collect detections computed under different transformations 13 | boxlists_ts = [] 14 | for _ in range(len(images)): 15 | boxlists_ts.append([]) 16 | 17 | def add_preds_t(boxlists_t): 18 | for i, boxlist_t in enumerate(boxlists_t): 19 | if len(boxlists_ts[i]) == 0: 20 | # The first one is identity transform, no need to resize the boxlist 21 | boxlists_ts[i].append(boxlist_t) 22 | else: 23 | # Resize the boxlist as the first one 24 | boxlists_ts[i].append(boxlist_t.resize(boxlists_ts[i][0].size)) 25 | 26 | # Compute detections for the original image (identity transform) 27 | boxlists_i = im_detect_bbox( 28 | model, images, cfg.INPUT.MIN_SIZE_TEST, cfg.INPUT.MAX_SIZE_TEST, device 29 | ) 30 | add_preds_t(boxlists_i) 31 | 32 | # Perform detection on the horizontally flipped image 33 | if cfg.TEST.BBOX_AUG.H_FLIP: 34 | boxlists_hf = im_detect_bbox_hflip( 35 | model, images, cfg.INPUT.MIN_SIZE_TEST, cfg.INPUT.MAX_SIZE_TEST, device 36 | ) 37 | add_preds_t(boxlists_hf) 38 | 39 | # Compute detections at different scales 40 | for scale in cfg.TEST.BBOX_AUG.SCALES: 41 | max_size = cfg.TEST.BBOX_AUG.MAX_SIZE 42 | boxlists_scl = im_detect_bbox_scale( 43 | model, images, scale, max_size, device 44 | ) 45 | add_preds_t(boxlists_scl) 46 | 47 | if cfg.TEST.BBOX_AUG.SCALE_H_FLIP: 48 | boxlists_scl_hf = im_detect_bbox_scale( 49 | model, images, scale, max_size, device, hflip=True 50 | ) 51 | add_preds_t(boxlists_scl_hf) 52 | 53 | # Merge boxlists detected by different bbox aug params 54 | boxlists = [] 55 | for i, boxlist_ts in enumerate(boxlists_ts): 56 | bbox = torch.cat([boxlist_t.bbox for boxlist_t in boxlist_ts]) 57 | scores = torch.cat([boxlist_t.get_field('scores') for boxlist_t in boxlist_ts]) 58 | boxlist = BoxList(bbox, boxlist_ts[0].size, boxlist_ts[0].mode) 59 | boxlist.add_field('scores', scores) 60 | boxlists.append(boxlist) 61 | 62 | # Apply NMS and limit the final detections 63 | results = [] 64 | post_processor = make_roi_box_post_processor(cfg) 65 | for boxlist in boxlists: 66 | results.append(post_processor.filter_results(boxlist, cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES)) 67 | 68 | return results 69 | 70 | 71 | def im_detect_bbox(model, images, target_scale, target_max_size, device): 72 | """ 73 | Performs bbox detection on the original image. 74 | """ 75 | transform = TT.Compose([ 76 | T.Resize(target_scale, target_max_size), 77 | TT.ToTensor(), 78 | T.Normalize( 79 | mean=cfg.INPUT.PIXEL_MEAN, std=cfg.INPUT.PIXEL_STD, to_bgr255=cfg.INPUT.TO_BGR255 80 | ) 81 | ]) 82 | images = [transform(image) for image in images] 83 | images = to_image_list(images, cfg.DATALOADER.SIZE_DIVISIBILITY) 84 | return model(images.to(device)) 85 | 86 | 87 | def im_detect_bbox_hflip(model, images, target_scale, target_max_size, device): 88 | """ 89 | Performs bbox detection on the horizontally flipped image. 90 | Function signature is the same as for im_detect_bbox. 91 | """ 92 | transform = TT.Compose([ 93 | T.Resize(target_scale, target_max_size), 94 | TT.RandomHorizontalFlip(1.0), 95 | TT.ToTensor(), 96 | T.Normalize( 97 | mean=cfg.INPUT.PIXEL_MEAN, std=cfg.INPUT.PIXEL_STD, to_bgr255=cfg.INPUT.TO_BGR255 98 | ) 99 | ]) 100 | images = [transform(image) for image in images] 101 | images = to_image_list(images, cfg.DATALOADER.SIZE_DIVISIBILITY) 102 | boxlists = model(images.to(device)) 103 | 104 | # Invert the detections computed on the flipped image 105 | boxlists_inv = [boxlist.transpose(0) for boxlist in boxlists] 106 | return boxlists_inv 107 | 108 | 109 | def im_detect_bbox_scale(model, images, target_scale, target_max_size, device, hflip=False): 110 | """ 111 | Computes bbox detections at the given scale. 112 | Returns predictions in the scaled image space. 113 | """ 114 | if hflip: 115 | boxlists_scl = im_detect_bbox_hflip(model, images, target_scale, target_max_size, device) 116 | else: 117 | boxlists_scl = im_detect_bbox(model, images, target_scale, target_max_size, device) 118 | return boxlists_scl 119 | -------------------------------------------------------------------------------- /sg-benchmark/maskrcnn_benchmark/engine/trainer.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import datetime 3 | import logging 4 | import time 5 | 6 | import torch 7 | import torch.distributed as dist 8 | 9 | from maskrcnn_benchmark.utils.comm import get_world_size 10 | from maskrcnn_benchmark.utils.metric_logger import MetricLogger 11 | 12 | from apex import amp 13 | 14 | def reduce_loss_dict(loss_dict): 15 | """ 16 | Reduce the loss dictionary from all processes so that process with rank 17 | 0 has the averaged results. Returns a dict with the same fields as 18 | loss_dict, after reduction. 19 | """ 20 | world_size = get_world_size() 21 | if world_size < 2: 22 | return loss_dict 23 | with torch.no_grad(): 24 | loss_names = [] 25 | all_losses = [] 26 | for k in sorted(loss_dict.keys()): 27 | loss_names.append(k) 28 | all_losses.append(loss_dict[k]) 29 | all_losses = torch.stack(all_losses, dim=0) 30 | dist.reduce(all_losses, dst=0) 31 | if dist.get_rank() == 0: 32 | # only main process gets accumulated, so only divide by 33 | # world_size in this case 34 | all_losses /= world_size 35 | reduced_losses = {k: v for k, v in zip(loss_names, all_losses)} 36 | return reduced_losses 37 | -------------------------------------------------------------------------------- /sg-benchmark/maskrcnn_benchmark/image_retrieval/S2G-RETRIEVAL.md: -------------------------------------------------------------------------------- 1 | # Sentence-to-Graph Retrieval (S2G) 2 | 3 | Forgive me, this part of code is ugly and less organized. 4 | 5 | ## Preprocessing 6 | 7 | Run the ```maskrcnn_benchmark/image_retrieval/preprocessing.py``` to process the annotations and checkpoints, where ```detected_path``` should be set to the corresponding checkpoints you want to use, ```vg_data, vg_dict, vg_info``` should have already downloaded if you followed DATASET.md, ```cap_graph``` is the ground-truth captions and generated sentence graphs (you can download it from [here](https://onedrive.live.com/embed?cid=22376FFAD72C4B64&resid=22376FFAD72C4B64%21779999&authkey=AGW0Wxjb1JSDFnc)). We use [SceneGraphParser](https://github.com/vacancy/SceneGraphParser) to generate these sentence graphs. 8 | 9 | You also need to set the ```cap_graph``` PATH and ```vg_dict``` PATH in ```maskrcnn_benchmark/image_retrieval/dataloader.py``` manually. 10 | 11 | ## Training and Evaluation 12 | 13 | Run the ```tools/image_retrieval_main.py``` for both training and evaluation. 14 | 15 | To load the generated scene graphs of the given SGG checkpoints, you need to manually set ```sg_train_path``` and ```sg_test_path``` in ```tools/image_retrieval_main.py```, which means you need to evaluate your model on **both training and testing set** to get the generated crude scene graphs. Our evaluation code will automatically saves the crude SGGs into ```checkpoints/MODEL_NAME/inference/VG_stanford_filtered_wth_attribute_test/``` or ```checkpoints/MODEL_NAME/inference/VG_stanford_filtered_wth_attribute_train/```, which will be further processed to generate the input of ```sg_train_path``` and ```sg_test_path``` by our preprocessing code ```maskrcnn_benchmark/image_retrieval/preprocessing.py```. 16 | 17 | ## Results 18 | 19 | Sentence-to-Graph Retrieval (S2G) results are given in the paper [Unbiased Scene Graph Generation from Biased Training](https://arxiv.org/abs/2002.11949): 20 | 21 | ![alt text](../../demo/TDE_Results3.png "from 'Unbiased Scene Graph Generation from Biased Training'") 22 | -------------------------------------------------------------------------------- /sg-benchmark/maskrcnn_benchmark/image_retrieval/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | -------------------------------------------------------------------------------- /sg-benchmark/maskrcnn_benchmark/image_retrieval/dataloader.py: -------------------------------------------------------------------------------- 1 | from maskrcnn_benchmark.utils.env import setup_environment # noqa F401 isort:skip 2 | 3 | import argparse 4 | import os 5 | import time 6 | import datetime 7 | import json 8 | import random 9 | 10 | import torch 11 | import torch.nn as nn 12 | import torch.nn.functional as F 13 | from torch.nn.utils import clip_grad_norm_ 14 | import torch.utils.data as data 15 | from torch.nn.utils import weight_norm 16 | from tqdm import tqdm 17 | 18 | from maskrcnn_benchmark.config import cfg 19 | from maskrcnn_benchmark.data import make_data_loader 20 | from maskrcnn_benchmark.solver import make_lr_scheduler 21 | from maskrcnn_benchmark.solver import make_optimizer 22 | from maskrcnn_benchmark.engine.trainer import reduce_loss_dict 23 | from maskrcnn_benchmark.engine.inference import inference 24 | from maskrcnn_benchmark.modeling.detector import build_detection_model 25 | from maskrcnn_benchmark.utils.checkpoint import DetectronCheckpointer 26 | from maskrcnn_benchmark.utils.checkpoint import clip_grad_norm 27 | from maskrcnn_benchmark.utils.collect_env import collect_env_info 28 | from maskrcnn_benchmark.utils.comm import synchronize, get_rank, all_gather 29 | from maskrcnn_benchmark.utils.imports import import_file 30 | from maskrcnn_benchmark.utils.logger import setup_logger, debug_print 31 | from maskrcnn_benchmark.utils.miscellaneous import mkdir, save_config 32 | from maskrcnn_benchmark.utils.metric_logger import MetricLogger 33 | 34 | class SGEncoding(data.Dataset): 35 | """ SGEncoding dataset """ 36 | def __init__(self, train_ids, test_ids, sg_data, test_on=False, val_on=False, num_test=5000, num_val=5000): 37 | super(SGEncoding, self).__init__() 38 | cap_graph = json.load(open('/data1/vg_capgraphs_anno.json')) 39 | vg_dict = json.load(open('/home/kaihua/projects/maskrcnn-benchmark/datasets/vg/VG-SGG-dicts-with-attri.json')) 40 | self.img_txt_sg = sg_data 41 | self.key_list = list(self.img_txt_sg.keys()) 42 | self.key_list.sort() 43 | self.train_ids = train_ids 44 | self.test_ids = test_ids 45 | if test_on: 46 | self.key_list = self.test_ids[:num_test] 47 | elif val_on: 48 | self.key_list = self.test_ids[num_test:num_test+num_val] 49 | else: 50 | self.key_list = self.test_ids[num_test+num_val:] + self.train_ids 51 | 52 | # generate union predicate vocabulary 53 | self.sgg_rel_vocab = list(set(cap_graph['idx_to_meta_predicate'].values())) 54 | self.txt_rel_vocab = list(set(cap_graph['cap_predicate'].keys())) 55 | 56 | # generate union object vocabulary 57 | self.sgg_obj_vocab = list(set(vg_dict['idx_to_label'].values())) 58 | self.txt_obj_vocab = list(set(cap_graph['cap_category'].keys())) 59 | 60 | # vocabulary length 61 | self.num_sgg_rel = len(self.sgg_rel_vocab) 62 | self.num_txt_rel = len(self.txt_rel_vocab) 63 | self.num_sgg_obj = len(self.sgg_obj_vocab) 64 | self.num_txt_obj = len(self.txt_obj_vocab) 65 | 66 | def _to_tensor(self, inp_dict): 67 | return {'entities': torch.LongTensor(inp_dict['entities']), 68 | 'relations': torch.LongTensor(inp_dict['relations'])} 69 | 70 | def _generate_tensor_by_idx(self, idx): 71 | img = self._to_tensor(self.img_txt_sg[self.key_list[idx]]['img']) 72 | img_graph = torch.FloatTensor(self.img_txt_sg[self.key_list[idx]]['image_graph']) 73 | txt = self._to_tensor(self.img_txt_sg[self.key_list[idx]]['txt']) 74 | txt_graph = torch.FloatTensor(self.img_txt_sg[self.key_list[idx]]['text_graph']) 75 | img['graph'] = img_graph 76 | txt['graph'] = txt_graph 77 | return img, txt 78 | 79 | def __getitem__(self, item): 80 | fg_img, fg_txt = self._generate_tensor_by_idx(item) 81 | # generate negative sample 82 | bg_idx = item 83 | while(bg_idx == item): 84 | bg_idx = int(random.random() * len(self.key_list)) 85 | bg_img, bg_txt = self._generate_tensor_by_idx(bg_idx) 86 | return fg_img, fg_txt, bg_img, bg_txt 87 | 88 | def __len__(self): 89 | return len(self.key_list) 90 | 91 | class SimpleCollator(object): 92 | def __call__(self, batch): 93 | return list(zip(*batch)) 94 | 95 | def get_loader(cfg, train_ids, test_ids, sg_data, test_on=False, val_on=False, num_test=5000, num_val=1000): 96 | """ Returns a data loader for the desired split """ 97 | split = SGEncoding(train_ids, test_ids, sg_data=sg_data, test_on=test_on, val_on=val_on, num_test=num_test, num_val=num_val) 98 | 99 | loader = torch.utils.data.DataLoader(split, 100 | batch_size=cfg.SOLVER.IMS_PER_BATCH, 101 | shuffle=not (test_on or val_on), # only shuffle the data in training 102 | pin_memory=True, 103 | num_workers=4, 104 | collate_fn=SimpleCollator(), 105 | ) 106 | return loader 107 | -------------------------------------------------------------------------------- /sg-benchmark/maskrcnn_benchmark/image_retrieval/evaluation.py: -------------------------------------------------------------------------------- 1 | from maskrcnn_benchmark.utils.env import setup_environment # noqa F401 isort:skip 2 | 3 | import argparse 4 | import os 5 | import time 6 | import datetime 7 | import json 8 | import random 9 | 10 | import torch 11 | import torch.nn as nn 12 | import torch.nn.functional as F 13 | from torch.nn.utils import clip_grad_norm_ 14 | import torch.utils.data as data 15 | from torch.nn.utils import weight_norm 16 | from tqdm import tqdm 17 | 18 | from maskrcnn_benchmark.config import cfg 19 | from maskrcnn_benchmark.data import make_data_loader 20 | from maskrcnn_benchmark.solver import make_lr_scheduler 21 | from maskrcnn_benchmark.solver import make_optimizer 22 | from maskrcnn_benchmark.engine.trainer import reduce_loss_dict 23 | from maskrcnn_benchmark.engine.inference import inference 24 | from maskrcnn_benchmark.modeling.detector import build_detection_model 25 | from maskrcnn_benchmark.utils.checkpoint import DetectronCheckpointer 26 | from maskrcnn_benchmark.utils.checkpoint import clip_grad_norm 27 | from maskrcnn_benchmark.utils.collect_env import collect_env_info 28 | from maskrcnn_benchmark.utils.comm import synchronize, get_rank, all_gather 29 | from maskrcnn_benchmark.utils.imports import import_file 30 | from maskrcnn_benchmark.utils.logger import setup_logger, debug_print 31 | from maskrcnn_benchmark.utils.miscellaneous import mkdir, save_config 32 | from maskrcnn_benchmark.utils.metric_logger import MetricLogger 33 | 34 | def evaluator(logger, input_lists): 35 | cat_data = [] 36 | for item in input_lists: 37 | cat_data.append(item[0]) 38 | # shape [num_image, 2, hidden_dim] 39 | cat_data = torch.cat(cat_data, dim=0).squeeze(2) 40 | 41 | similarity = cat_data[:, 0, :] @ (cat_data[:, 1, :].transpose(0,1)) # img to txt 42 | similarity = similarity.transpose(0,1) # txt to img 43 | 44 | pred_rank = (similarity > similarity.diag().view(-1, 1)).sum(-1) 45 | 46 | num_sample = pred_rank.shape[0] 47 | thres = [1, 5, 10, 20, 50, 100] 48 | for k in thres: 49 | logger.info('Recall @ %d: %.4f; ' % (k, float((pred_rank= 0)).float() * term2 * (1 - alpha) 53 | 54 | 55 | class SigmoidFocalLoss(nn.Module): 56 | def __init__(self, gamma, alpha): 57 | super(SigmoidFocalLoss, self).__init__() 58 | self.gamma = gamma 59 | self.alpha = alpha 60 | 61 | def forward(self, logits, targets): 62 | device = logits.device 63 | if logits.is_cuda: 64 | loss_func = sigmoid_focal_loss_cuda 65 | else: 66 | loss_func = sigmoid_focal_loss_cpu 67 | 68 | loss = loss_func(logits, targets, self.gamma, self.alpha) 69 | return loss.sum() 70 | 71 | def __repr__(self): 72 | tmpstr = self.__class__.__name__ + "(" 73 | tmpstr += "gamma=" + str(self.gamma) 74 | tmpstr += ", alpha=" + str(self.alpha) 75 | tmpstr += ")" 76 | return tmpstr 77 | -------------------------------------------------------------------------------- /sg-benchmark/maskrcnn_benchmark/layers/smooth_l1_loss.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | 4 | 5 | # TODO maybe push this to nn? 6 | def smooth_l1_loss(input, target, beta=1. / 9, size_average=True): 7 | """ 8 | very similar to the smooth_l1_loss from pytorch, but with 9 | the extra beta parameter 10 | """ 11 | n = torch.abs(input - target) 12 | cond = n < beta 13 | loss = torch.where(cond, 0.5 * n ** 2 / beta, n - 0.5 * beta) 14 | if size_average: 15 | return loss.mean() 16 | return loss.sum() 17 | 18 | 19 | -------------------------------------------------------------------------------- /sg-benchmark/maskrcnn_benchmark/modeling/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CYVincent/Scene-Graph-Transformer-CogTree/955d76403d6b17e4b35cb09b4669d8b3990a94ee/sg-benchmark/maskrcnn_benchmark/modeling/__init__.py -------------------------------------------------------------------------------- /sg-benchmark/maskrcnn_benchmark/modeling/backbone/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from .backbone import build_backbone 3 | from . import fbnet 4 | -------------------------------------------------------------------------------- /sg-benchmark/maskrcnn_benchmark/modeling/backbone/backbone.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from collections import OrderedDict 3 | 4 | from torch import nn 5 | 6 | from maskrcnn_benchmark.modeling import registry 7 | from maskrcnn_benchmark.modeling.make_layers import conv_with_kaiming_uniform 8 | from . import fpn as fpn_module 9 | from . import resnet 10 | from . import vgg 11 | 12 | 13 | @registry.BACKBONES.register("VGG-16") 14 | def build_vgg_fpn_backbone(cfg): 15 | body = vgg.VGG16(cfg) 16 | out_channels = cfg.MODEL.VGG.VGG16_OUT_CHANNELS 17 | model = nn.Sequential(OrderedDict([("body", body)])) 18 | model.out_channels = out_channels 19 | return model 20 | 21 | 22 | @registry.BACKBONES.register("R-50-C4") 23 | @registry.BACKBONES.register("R-50-C5") 24 | @registry.BACKBONES.register("R-101-C4") 25 | @registry.BACKBONES.register("R-101-C5") 26 | def build_resnet_backbone(cfg): 27 | body = resnet.ResNet(cfg) 28 | model = nn.Sequential(OrderedDict([("body", body)])) 29 | model.out_channels = cfg.MODEL.RESNETS.BACKBONE_OUT_CHANNELS 30 | return model 31 | 32 | 33 | @registry.BACKBONES.register("R-50-FPN") 34 | @registry.BACKBONES.register("R-101-FPN") 35 | @registry.BACKBONES.register("R-152-FPN") 36 | def build_resnet_fpn_backbone(cfg): 37 | body = resnet.ResNet(cfg) 38 | in_channels_stage2 = cfg.MODEL.RESNETS.RES2_OUT_CHANNELS 39 | out_channels = cfg.MODEL.RESNETS.BACKBONE_OUT_CHANNELS 40 | fpn = fpn_module.FPN( 41 | in_channels_list=[ 42 | in_channels_stage2, 43 | in_channels_stage2 * 2, 44 | in_channels_stage2 * 4, 45 | in_channels_stage2 * 8, 46 | ], 47 | out_channels=out_channels, 48 | conv_block=conv_with_kaiming_uniform( 49 | cfg.MODEL.FPN.USE_GN, cfg.MODEL.FPN.USE_RELU 50 | ), 51 | top_blocks=fpn_module.LastLevelMaxPool(), 52 | ) 53 | model = nn.Sequential(OrderedDict([("body", body), ("fpn", fpn)])) 54 | model.out_channels = out_channels 55 | return model 56 | 57 | 58 | @registry.BACKBONES.register("R-50-FPN-RETINANET") 59 | @registry.BACKBONES.register("R-101-FPN-RETINANET") 60 | def build_resnet_fpn_p3p7_backbone(cfg): 61 | body = resnet.ResNet(cfg) 62 | in_channels_stage2 = cfg.MODEL.RESNETS.RES2_OUT_CHANNELS 63 | out_channels = cfg.MODEL.RESNETS.BACKBONE_OUT_CHANNELS 64 | in_channels_p6p7 = in_channels_stage2 * 8 if cfg.MODEL.RETINANET.USE_C5 \ 65 | else out_channels 66 | fpn = fpn_module.FPN( 67 | in_channels_list=[ 68 | 0, 69 | in_channels_stage2 * 2, 70 | in_channels_stage2 * 4, 71 | in_channels_stage2 * 8, 72 | ], 73 | out_channels=out_channels, 74 | conv_block=conv_with_kaiming_uniform( 75 | cfg.MODEL.FPN.USE_GN, cfg.MODEL.FPN.USE_RELU 76 | ), 77 | top_blocks=fpn_module.LastLevelP6P7(in_channels_p6p7, out_channels), 78 | ) 79 | model = nn.Sequential(OrderedDict([("body", body), ("fpn", fpn)])) 80 | model.out_channels = out_channels 81 | return model 82 | 83 | 84 | def build_backbone(cfg): 85 | assert cfg.MODEL.BACKBONE.CONV_BODY in registry.BACKBONES, \ 86 | "cfg.MODEL.BACKBONE.CONV_BODY: {} are not registered in registry".format( 87 | cfg.MODEL.BACKBONE.CONV_BODY 88 | ) 89 | return registry.BACKBONES[cfg.MODEL.BACKBONE.CONV_BODY](cfg) 90 | -------------------------------------------------------------------------------- /sg-benchmark/maskrcnn_benchmark/modeling/backbone/fpn.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | import torch.nn.functional as F 4 | from torch import nn 5 | 6 | 7 | class FPN(nn.Module): 8 | """ 9 | Module that adds FPN on top of a list of feature maps. 10 | The feature maps are currently supposed to be in increasing depth 11 | order, and must be consecutive 12 | """ 13 | 14 | def __init__( 15 | self, in_channels_list, out_channels, conv_block, top_blocks=None 16 | ): 17 | """ 18 | Arguments: 19 | in_channels_list (list[int]): number of channels for each feature map that 20 | will be fed 21 | out_channels (int): number of channels of the FPN representation 22 | top_blocks (nn.Module or None): if provided, an extra operation will 23 | be performed on the output of the last (smallest resolution) 24 | FPN output, and the result will extend the result list 25 | """ 26 | super(FPN, self).__init__() 27 | self.inner_blocks = [] 28 | self.layer_blocks = [] 29 | for idx, in_channels in enumerate(in_channels_list, 1): 30 | inner_block = "fpn_inner{}".format(idx) 31 | layer_block = "fpn_layer{}".format(idx) 32 | 33 | if in_channels == 0: 34 | continue 35 | inner_block_module = conv_block(in_channels, out_channels, 1) 36 | layer_block_module = conv_block(out_channels, out_channels, 3, 1) 37 | self.add_module(inner_block, inner_block_module) 38 | self.add_module(layer_block, layer_block_module) 39 | self.inner_blocks.append(inner_block) 40 | self.layer_blocks.append(layer_block) 41 | self.top_blocks = top_blocks 42 | 43 | def forward(self, x): 44 | """ 45 | Arguments: 46 | x (list[Tensor]): feature maps for each feature level. 47 | Returns: 48 | results (tuple[Tensor]): feature maps after FPN layers. 49 | They are ordered from highest resolution first. 50 | """ 51 | last_inner = getattr(self, self.inner_blocks[-1])(x[-1]) 52 | results = [] 53 | results.append(getattr(self, self.layer_blocks[-1])(last_inner)) 54 | for feature, inner_block, layer_block in zip( 55 | x[:-1][::-1], self.inner_blocks[:-1][::-1], self.layer_blocks[:-1][::-1] 56 | ): 57 | if not inner_block: 58 | continue 59 | inner_top_down = F.interpolate(last_inner, scale_factor=2, mode="nearest") 60 | inner_lateral = getattr(self, inner_block)(feature) 61 | # TODO use size instead of scale to make it robust to different sizes 62 | # inner_top_down = F.upsample(last_inner, size=inner_lateral.shape[-2:], 63 | # mode='bilinear', align_corners=False) 64 | last_inner = inner_lateral + inner_top_down 65 | results.insert(0, getattr(self, layer_block)(last_inner)) 66 | 67 | if isinstance(self.top_blocks, LastLevelP6P7): 68 | last_results = self.top_blocks(x[-1], results[-1]) 69 | results.extend(last_results) 70 | elif isinstance(self.top_blocks, LastLevelMaxPool): 71 | last_results = self.top_blocks(results[-1]) 72 | results.extend(last_results) 73 | 74 | return tuple(results) 75 | 76 | 77 | class LastLevelMaxPool(nn.Module): 78 | def forward(self, x): 79 | return [F.max_pool2d(x, 1, 2, 0)] 80 | 81 | 82 | class LastLevelP6P7(nn.Module): 83 | """ 84 | This module is used in RetinaNet to generate extra layers, P6 and P7. 85 | """ 86 | def __init__(self, in_channels, out_channels): 87 | super(LastLevelP6P7, self).__init__() 88 | self.p6 = nn.Conv2d(in_channels, out_channels, 3, 2, 1) 89 | self.p7 = nn.Conv2d(out_channels, out_channels, 3, 2, 1) 90 | for module in [self.p6, self.p7]: 91 | nn.init.kaiming_uniform_(module.weight, a=1) 92 | nn.init.constant_(module.bias, 0) 93 | self.use_P5 = in_channels == out_channels 94 | 95 | def forward(self, c5, p5): 96 | x = p5 if self.use_P5 else c5 97 | p6 = self.p6(x) 98 | p7 = self.p7(F.relu(p6)) 99 | return [p6, p7] 100 | -------------------------------------------------------------------------------- /sg-benchmark/maskrcnn_benchmark/modeling/backbone/vgg.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | """ 3 | Variant of the resnet module that takes cfg as an argument. 4 | Example usage. Strings may be specified in the config file. 5 | model = ResNet( 6 | "StemWithFixedBatchNorm", 7 | "BottleneckWithFixedBatchNorm", 8 | "ResNet50StagesTo4", 9 | ) 10 | OR: 11 | model = ResNet( 12 | "StemWithGN", 13 | "BottleneckWithGN", 14 | "ResNet50StagesTo4", 15 | ) 16 | Custom implementations may be written in user code and hooked in via the 17 | `register_*` functions. 18 | """ 19 | from collections import namedtuple 20 | 21 | import torch 22 | import torch.nn.functional as F 23 | from torch import nn 24 | 25 | import torchvision.models as models 26 | from maskrcnn_benchmark.layers import FrozenBatchNorm2d 27 | from maskrcnn_benchmark.layers import Conv2d 28 | from maskrcnn_benchmark.layers import DFConv2d 29 | from maskrcnn_benchmark.modeling.make_layers import group_norm 30 | from maskrcnn_benchmark.utils.registry import Registry 31 | 32 | 33 | class VGG16(nn.Module): 34 | def __init__(self, cfg): 35 | super(VGG16, self).__init__() 36 | vgg = models.vgg16(pretrained=True) 37 | self.conv_body = nn.Sequential(*list(vgg.features._modules.values())[:-1]) 38 | 39 | def forward(self, x): 40 | output = [] 41 | output.append(self.conv_body(x)) 42 | return output 43 | 44 | -------------------------------------------------------------------------------- /sg-benchmark/maskrcnn_benchmark/modeling/balanced_positive_negative_sampler.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | 4 | 5 | class BalancedPositiveNegativeSampler(object): 6 | """ 7 | This class samples batches, ensuring that they contain a fixed proportion of positives 8 | """ 9 | 10 | def __init__(self, batch_size_per_image, positive_fraction): 11 | """ 12 | Arguments: 13 | batch_size_per_image (int): number of elements to be selected per image 14 | positive_fraction (float): percentace of positive elements per batch 15 | """ 16 | self.batch_size_per_image = batch_size_per_image 17 | self.positive_fraction = positive_fraction 18 | 19 | def __call__(self, matched_idxs): 20 | """ 21 | Arguments: 22 | matched idxs: list of tensors containing -1, 0 or positive values. 23 | Each tensor corresponds to a specific image. 24 | -1 values are ignored, 0 are considered as negatives and > 0 as 25 | positives. 26 | 27 | Returns: 28 | pos_idx (list[tensor]) 29 | neg_idx (list[tensor]) 30 | 31 | Returns two lists of binary masks for each image. 32 | The first list contains the positive elements that were selected, 33 | and the second list the negative example. 34 | """ 35 | pos_idx = [] 36 | neg_idx = [] 37 | for matched_idxs_per_image in matched_idxs: 38 | positive = torch.nonzero(matched_idxs_per_image >= 1).squeeze(1) 39 | negative = torch.nonzero(matched_idxs_per_image == 0).squeeze(1) 40 | 41 | num_pos = int(self.batch_size_per_image * self.positive_fraction) 42 | # protect against not enough positive examples 43 | num_pos = min(positive.numel(), num_pos) 44 | num_neg = self.batch_size_per_image - num_pos 45 | # protect against not enough negative examples 46 | num_neg = min(negative.numel(), num_neg) 47 | 48 | # randomly select positive and negative examples 49 | perm1 = torch.randperm(positive.numel(), device=positive.device)[:num_pos] 50 | perm2 = torch.randperm(negative.numel(), device=negative.device)[:num_neg] 51 | 52 | pos_idx_per_image = positive[perm1] 53 | neg_idx_per_image = negative[perm2] 54 | 55 | # create binary mask from indices 56 | pos_idx_per_image_mask = torch.zeros_like( 57 | matched_idxs_per_image, dtype=torch.uint8 58 | ) 59 | neg_idx_per_image_mask = torch.zeros_like( 60 | matched_idxs_per_image, dtype=torch.uint8 61 | ) 62 | pos_idx_per_image_mask[pos_idx_per_image] = 1 63 | neg_idx_per_image_mask[neg_idx_per_image] = 1 64 | 65 | pos_idx.append(pos_idx_per_image_mask) 66 | neg_idx.append(neg_idx_per_image_mask) 67 | 68 | return pos_idx, neg_idx 69 | -------------------------------------------------------------------------------- /sg-benchmark/maskrcnn_benchmark/modeling/box_coder.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import math 3 | 4 | import torch 5 | 6 | 7 | class BoxCoder(object): 8 | """ 9 | This class encodes and decodes a set of bounding boxes into 10 | the representation used for training the regressors. 11 | """ 12 | 13 | def __init__(self, weights, bbox_xform_clip=math.log(1000. / 16)): 14 | """ 15 | Arguments: 16 | weights (4-element tuple) 17 | bbox_xform_clip (float) 18 | """ 19 | self.weights = weights 20 | self.bbox_xform_clip = bbox_xform_clip 21 | 22 | def encode(self, reference_boxes, proposals): 23 | """ 24 | Encode a set of proposals with respect to some 25 | reference boxes 26 | 27 | Arguments: 28 | reference_boxes (Tensor): reference boxes 29 | proposals (Tensor): boxes to be encoded 30 | """ 31 | 32 | TO_REMOVE = 1 # TODO remove 33 | ex_widths = proposals[:, 2] - proposals[:, 0] + TO_REMOVE 34 | ex_heights = proposals[:, 3] - proposals[:, 1] + TO_REMOVE 35 | ex_ctr_x = proposals[:, 0] + 0.5 * ex_widths 36 | ex_ctr_y = proposals[:, 1] + 0.5 * ex_heights 37 | 38 | gt_widths = reference_boxes[:, 2] - reference_boxes[:, 0] + TO_REMOVE 39 | gt_heights = reference_boxes[:, 3] - reference_boxes[:, 1] + TO_REMOVE 40 | gt_ctr_x = reference_boxes[:, 0] + 0.5 * gt_widths 41 | gt_ctr_y = reference_boxes[:, 1] + 0.5 * gt_heights 42 | 43 | wx, wy, ww, wh = self.weights 44 | targets_dx = wx * (gt_ctr_x - ex_ctr_x) / ex_widths 45 | targets_dy = wy * (gt_ctr_y - ex_ctr_y) / ex_heights 46 | targets_dw = ww * torch.log(gt_widths / ex_widths) 47 | targets_dh = wh * torch.log(gt_heights / ex_heights) 48 | 49 | targets = torch.stack((targets_dx, targets_dy, targets_dw, targets_dh), dim=1) 50 | return targets 51 | 52 | def decode(self, rel_codes, boxes): 53 | """ 54 | From a set of original boxes and encoded relative box offsets, 55 | get the decoded boxes. 56 | 57 | Arguments: 58 | rel_codes (Tensor): encoded boxes 59 | boxes (Tensor): reference boxes. 60 | """ 61 | 62 | boxes = boxes.to(rel_codes.dtype) 63 | 64 | TO_REMOVE = 1 # TODO remove 65 | widths = boxes[:, 2] - boxes[:, 0] + TO_REMOVE 66 | heights = boxes[:, 3] - boxes[:, 1] + TO_REMOVE 67 | ctr_x = boxes[:, 0] + 0.5 * widths 68 | ctr_y = boxes[:, 1] + 0.5 * heights 69 | 70 | wx, wy, ww, wh = self.weights 71 | dx = rel_codes[:, 0::4] / wx 72 | dy = rel_codes[:, 1::4] / wy 73 | dw = rel_codes[:, 2::4] / ww 74 | dh = rel_codes[:, 3::4] / wh 75 | 76 | # Prevent sending too large values into torch.exp() 77 | dw = torch.clamp(dw, max=self.bbox_xform_clip) 78 | dh = torch.clamp(dh, max=self.bbox_xform_clip) 79 | 80 | pred_ctr_x = dx * widths[:, None] + ctr_x[:, None] 81 | pred_ctr_y = dy * heights[:, None] + ctr_y[:, None] 82 | pred_w = torch.exp(dw) * widths[:, None] 83 | pred_h = torch.exp(dh) * heights[:, None] 84 | 85 | pred_boxes = torch.zeros_like(rel_codes) 86 | # x1 87 | pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * pred_w 88 | # y1 89 | pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * pred_h 90 | # x2 (note: "- 1" is correct; don't be fooled by the asymmetry) 91 | pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * pred_w - 1 92 | # y2 (note: "- 1" is correct; don't be fooled by the asymmetry) 93 | pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * pred_h - 1 94 | 95 | return pred_boxes 96 | -------------------------------------------------------------------------------- /sg-benchmark/maskrcnn_benchmark/modeling/detector/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from .detectors import build_detection_model 3 | -------------------------------------------------------------------------------- /sg-benchmark/maskrcnn_benchmark/modeling/detector/detectors.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from .generalized_rcnn import GeneralizedRCNN 3 | 4 | 5 | _DETECTION_META_ARCHITECTURES = {"GeneralizedRCNN": GeneralizedRCNN} 6 | 7 | 8 | def build_detection_model(cfg): 9 | meta_arch = _DETECTION_META_ARCHITECTURES[cfg.MODEL.META_ARCHITECTURE] 10 | return meta_arch(cfg) 11 | -------------------------------------------------------------------------------- /sg-benchmark/maskrcnn_benchmark/modeling/detector/generalized_rcnn.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | """ 3 | Implements the Generalized R-CNN framework 4 | """ 5 | 6 | import torch 7 | from torch import nn 8 | 9 | from maskrcnn_benchmark.structures.image_list import to_image_list 10 | 11 | from ..backbone import build_backbone 12 | from ..rpn.rpn import build_rpn 13 | from ..roi_heads.roi_heads import build_roi_heads 14 | 15 | 16 | class GeneralizedRCNN(nn.Module): 17 | """ 18 | Main class for Generalized R-CNN. Currently supports boxes and masks. 19 | It consists of three main parts: 20 | - backbone 21 | - rpn 22 | - heads: takes the features + the proposals from the RPN and computes 23 | detections / masks from it. 24 | """ 25 | 26 | def __init__(self, cfg): 27 | super(GeneralizedRCNN, self).__init__() 28 | self.cfg = cfg.clone() 29 | self.backbone = build_backbone(cfg) 30 | self.rpn = build_rpn(cfg, self.backbone.out_channels) 31 | self.roi_heads = build_roi_heads(cfg, self.backbone.out_channels) 32 | 33 | def forward(self, images, targets=None, logger=None): 34 | """ 35 | Arguments: 36 | images (list[Tensor] or ImageList): images to be processed 37 | targets (list[BoxList]): ground-truth boxes present in the image (optional) 38 | 39 | Returns: 40 | result (list[BoxList] or dict[Tensor]): the output from the model. 41 | During training, it returns a dict[Tensor] which contains the losses. 42 | During testing, it returns list[BoxList] contains additional fields 43 | like `scores`, `labels` and `mask` (for Mask R-CNN models). 44 | 45 | """ 46 | if self.training and targets is None: 47 | raise ValueError("In training mode, targets should be passed") 48 | images = to_image_list(images) 49 | features = self.backbone(images.tensors) 50 | proposals, proposal_losses = self.rpn(images, features, targets) 51 | if self.roi_heads: 52 | x, result, detector_losses = self.roi_heads(features, proposals, targets, logger) 53 | else: 54 | # RPN-only models don't have roi_heads 55 | x = features 56 | result = proposals 57 | detector_losses = {} 58 | 59 | if self.training: 60 | losses = {} 61 | losses.update(detector_losses) 62 | if not self.cfg.MODEL.RELATION_ON: 63 | # During the relationship training stage, the rpn_head should be fixed, and no loss. 64 | losses.update(proposal_losses) 65 | return losses 66 | 67 | return result 68 | -------------------------------------------------------------------------------- /sg-benchmark/maskrcnn_benchmark/modeling/make_layers.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | """ 3 | Miscellaneous utility functions 4 | """ 5 | 6 | import torch 7 | from torch import nn 8 | from torch.nn import functional as F 9 | from maskrcnn_benchmark.config import cfg 10 | from maskrcnn_benchmark.layers import Conv2d 11 | 12 | 13 | def get_group_gn(dim, dim_per_gp, num_groups): 14 | """get number of groups used by GroupNorm, based on number of channels.""" 15 | assert dim_per_gp == -1 or num_groups == -1, \ 16 | "GroupNorm: can only specify G or C/G." 17 | 18 | if dim_per_gp > 0: 19 | assert dim % dim_per_gp == 0, \ 20 | "dim: {}, dim_per_gp: {}".format(dim, dim_per_gp) 21 | group_gn = dim // dim_per_gp 22 | else: 23 | assert dim % num_groups == 0, \ 24 | "dim: {}, num_groups: {}".format(dim, num_groups) 25 | group_gn = num_groups 26 | 27 | return group_gn 28 | 29 | 30 | def group_norm(out_channels, affine=True, divisor=1): 31 | out_channels = out_channels // divisor 32 | dim_per_gp = cfg.MODEL.GROUP_NORM.DIM_PER_GP // divisor 33 | num_groups = cfg.MODEL.GROUP_NORM.NUM_GROUPS // divisor 34 | eps = cfg.MODEL.GROUP_NORM.EPSILON # default: 1e-5 35 | return torch.nn.GroupNorm( 36 | get_group_gn(out_channels, dim_per_gp, num_groups), 37 | out_channels, 38 | eps, 39 | affine 40 | ) 41 | 42 | 43 | def make_conv3x3( 44 | in_channels, 45 | out_channels, 46 | dilation=1, 47 | stride=1, 48 | use_gn=False, 49 | use_relu=False, 50 | kaiming_init=True 51 | ): 52 | conv = Conv2d( 53 | in_channels, 54 | out_channels, 55 | kernel_size=3, 56 | stride=stride, 57 | padding=dilation, 58 | dilation=dilation, 59 | bias=False if use_gn else True 60 | ) 61 | if kaiming_init: 62 | nn.init.kaiming_normal_( 63 | conv.weight, mode="fan_out", nonlinearity="relu" 64 | ) 65 | else: 66 | torch.nn.init.normal_(conv.weight, std=0.01) 67 | if not use_gn: 68 | nn.init.constant_(conv.bias, 0) 69 | module = [conv,] 70 | if use_gn: 71 | module.append(group_norm(out_channels)) 72 | if use_relu: 73 | module.append(nn.ReLU(inplace=True)) 74 | if len(module) > 1: 75 | return nn.Sequential(*module) 76 | return conv 77 | 78 | 79 | def make_fc(dim_in, hidden_dim, use_gn=False): 80 | ''' 81 | Caffe2 implementation uses XavierFill, which in fact 82 | corresponds to kaiming_uniform_ in PyTorch 83 | ''' 84 | if use_gn: 85 | fc = nn.Linear(dim_in, hidden_dim, bias=False) 86 | nn.init.kaiming_uniform_(fc.weight, a=1) 87 | return nn.Sequential(fc, group_norm(hidden_dim)) 88 | fc = nn.Linear(dim_in, hidden_dim) 89 | nn.init.kaiming_uniform_(fc.weight, a=1) 90 | nn.init.constant_(fc.bias, 0) 91 | return fc 92 | 93 | 94 | def conv_with_kaiming_uniform(use_gn=False, use_relu=False): 95 | def make_conv( 96 | in_channels, out_channels, kernel_size, stride=1, dilation=1 97 | ): 98 | conv = Conv2d( 99 | in_channels, 100 | out_channels, 101 | kernel_size=kernel_size, 102 | stride=stride, 103 | padding=dilation * (kernel_size - 1) // 2, 104 | dilation=dilation, 105 | bias=False if use_gn else True 106 | ) 107 | # Caffe2 implementation uses XavierFill, which in fact 108 | # corresponds to kaiming_uniform_ in PyTorch 109 | nn.init.kaiming_uniform_(conv.weight, a=1) 110 | if not use_gn: 111 | nn.init.constant_(conv.bias, 0) 112 | module = [conv,] 113 | if use_gn: 114 | module.append(group_norm(out_channels)) 115 | if use_relu: 116 | module.append(nn.ReLU(inplace=True)) 117 | if len(module) > 1: 118 | return nn.Sequential(*module) 119 | return conv 120 | 121 | return make_conv 122 | -------------------------------------------------------------------------------- /sg-benchmark/maskrcnn_benchmark/modeling/registry.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | 3 | from maskrcnn_benchmark.utils.registry import Registry 4 | 5 | BACKBONES = Registry() 6 | RPN_HEADS = Registry() 7 | ROI_BOX_FEATURE_EXTRACTORS = Registry() 8 | ROI_BOX_PREDICTOR = Registry() 9 | ROI_ATTRIBUTE_FEATURE_EXTRACTORS = Registry() 10 | ROI_ATTRIBUTE_PREDICTOR = Registry() 11 | ROI_KEYPOINT_FEATURE_EXTRACTORS = Registry() 12 | ROI_KEYPOINT_PREDICTOR = Registry() 13 | ROI_MASK_FEATURE_EXTRACTORS = Registry() 14 | ROI_MASK_PREDICTOR = Registry() 15 | ROI_RELATION_FEATURE_EXTRACTORS = Registry() 16 | ROI_RELATION_PREDICTOR = Registry() 17 | -------------------------------------------------------------------------------- /sg-benchmark/maskrcnn_benchmark/modeling/roi_heads/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CYVincent/Scene-Graph-Transformer-CogTree/955d76403d6b17e4b35cb09b4669d8b3990a94ee/sg-benchmark/maskrcnn_benchmark/modeling/roi_heads/__init__.py -------------------------------------------------------------------------------- /sg-benchmark/maskrcnn_benchmark/modeling/roi_heads/attribute_head/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CYVincent/Scene-Graph-Transformer-CogTree/955d76403d6b17e4b35cb09b4669d8b3990a94ee/sg-benchmark/maskrcnn_benchmark/modeling/roi_heads/attribute_head/__init__.py -------------------------------------------------------------------------------- /sg-benchmark/maskrcnn_benchmark/modeling/roi_heads/attribute_head/attribute_head.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | from torch import nn 4 | 5 | from .roi_attribute_feature_extractors import make_roi_attribute_feature_extractor 6 | from .roi_attribute_predictors import make_roi_attribute_predictor 7 | from .loss import make_roi_attribute_loss_evaluator 8 | 9 | def add_attribute_logits(proposals, attri_logits): 10 | slice_idxs = [0] 11 | for i in range(len(proposals)): 12 | slice_idxs.append(len(proposals[i])+slice_idxs[-1]) 13 | proposals[i].add_field("attribute_logits", attri_logits[slice_idxs[i]:slice_idxs[i+1]]) 14 | return proposals 15 | 16 | class ROIAttributeHead(torch.nn.Module): 17 | """ 18 | Generic ATTRIBUTE Head class. 19 | """ 20 | 21 | def __init__(self, cfg, in_channels): 22 | super(ROIAttributeHead, self).__init__() 23 | self.cfg = cfg.clone() 24 | self.feature_extractor = make_roi_attribute_feature_extractor(cfg, in_channels, half_out=self.cfg.MODEL.ATTRIBUTE_ON) 25 | self.predictor = make_roi_attribute_predictor(cfg, self.feature_extractor.out_channels) 26 | self.loss_evaluator = make_roi_attribute_loss_evaluator(cfg) 27 | 28 | def forward(self, features, proposals, targets=None): 29 | """ 30 | features: extracted from box_head 31 | """ 32 | # Attribute head is fixed when we train the relation head 33 | if self.cfg.MODEL.RELATION_ON: 34 | if self.cfg.MODEL.ROI_RELATION_HEAD.USE_GT_BOX and self.cfg.MODEL.ROI_RELATION_HEAD.USE_GT_OBJECT_LABEL: 35 | # mode==predcls 36 | # no need to predict attribute, get grond truth 37 | x = self.feature_extractor(features, proposals) 38 | return x, proposals, {} 39 | # mode==sgcls or sgdet 40 | else: 41 | x = self.feature_extractor(features, proposals) 42 | attri_logits = self.predictor(x) 43 | assert sum([len(p) for p in proposals]) == attri_logits.shape[0] 44 | proposals = add_attribute_logits(proposals, attri_logits) 45 | return x, proposals, {} 46 | 47 | # Train/Test the attribute head 48 | x = self.feature_extractor(features, proposals) 49 | attri_logits = self.predictor(x) 50 | assert sum([len(p) for p in proposals]) == attri_logits.shape[0] 51 | proposals = add_attribute_logits(proposals, attri_logits) 52 | 53 | if not self.training: 54 | return x, proposals, {} 55 | 56 | # proposals need to contain the attributes fields 57 | loss_attribute = self.loss_evaluator(proposals, attri_logits) 58 | return x, proposals, dict(loss_attribute=loss_attribute) 59 | 60 | def build_roi_attribute_head(cfg, in_channels): 61 | """ 62 | Constructs a new attribute head. 63 | By default, uses ROIAttributeHead, but if it turns out not to be enough, just register a new class 64 | and make it a parameter in the config 65 | """ 66 | return ROIAttributeHead(cfg, in_channels) 67 | -------------------------------------------------------------------------------- /sg-benchmark/maskrcnn_benchmark/modeling/roi_heads/attribute_head/loss.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | from torch.nn import functional as F 4 | 5 | from maskrcnn_benchmark.layers import smooth_l1_loss 6 | from maskrcnn_benchmark.modeling.box_coder import BoxCoder 7 | from maskrcnn_benchmark.modeling.matcher import Matcher 8 | from maskrcnn_benchmark.structures.boxlist_ops import boxlist_iou 9 | from maskrcnn_benchmark.modeling.utils import cat 10 | 11 | 12 | class AttributeHeadLossComputation(object): 13 | """ 14 | Computes the loss for attribute head 15 | """ 16 | 17 | def __init__( 18 | self, 19 | loss_weight=0.1, 20 | num_attri_cat=201, 21 | max_num_attri=10, 22 | attribute_sampling=True, 23 | attribute_bgfg_ratio=5, 24 | use_binary_loss=True, 25 | pos_weight=1, 26 | ): 27 | self.loss_weight = loss_weight 28 | self.num_attri_cat = num_attri_cat 29 | self.max_num_attri = max_num_attri 30 | self.attribute_sampling = attribute_sampling 31 | self.attribute_bgfg_ratio = attribute_bgfg_ratio 32 | self.use_binary_loss = use_binary_loss 33 | self.pos_weight = pos_weight 34 | 35 | def __call__(self, proposals, attri_logits): 36 | """ 37 | Calculcate attribute loss 38 | """ 39 | attributes = cat([proposal.get_field("attributes") for proposal in proposals], dim=0) 40 | assert attributes.shape[0] == attri_logits.shape[0] 41 | 42 | # generate attribute targets 43 | attribute_targets, selected_idxs = self.generate_attributes_target(attributes) 44 | 45 | attri_logits = attri_logits[selected_idxs] 46 | attribute_targets = attribute_targets[selected_idxs] 47 | 48 | attribute_loss = self.attribute_loss(attri_logits, attribute_targets) 49 | 50 | return attribute_loss * self.loss_weight 51 | 52 | 53 | def generate_attributes_target(self, attributes): 54 | """ 55 | from list of attribute indexs to [1,0,1,0,0,1] form 56 | """ 57 | assert self.max_num_attri == attributes.shape[1] 58 | num_obj = attributes.shape[0] 59 | 60 | with_attri_idx = (attributes.sum(-1) > 0).long() 61 | without_attri_idx = 1 - with_attri_idx 62 | num_pos = int(with_attri_idx.sum()) 63 | num_neg = int(without_attri_idx.sum()) 64 | assert num_pos + num_neg == num_obj 65 | 66 | if self.attribute_sampling: 67 | num_neg = min(num_neg, num_pos * self.attribute_bgfg_ratio) if num_pos > 0 else 1 68 | 69 | attribute_targets = torch.zeros((num_obj, self.num_attri_cat), device=attributes.device).float() 70 | if not self.use_binary_loss: 71 | attribute_targets[without_attri_idx > 0, 0] = 1.0 72 | 73 | pos_idxs = torch.nonzero(with_attri_idx).squeeze(1) 74 | perm = torch.randperm(num_obj - num_pos, device=attributes.device)[:num_neg] 75 | neg_idxs = torch.nonzero(without_attri_idx).squeeze(1)[perm] 76 | selected_idxs = torch.cat((pos_idxs, neg_idxs), dim=0) 77 | assert selected_idxs.shape[0] == num_neg + num_pos 78 | 79 | for idx in torch.nonzero(with_attri_idx).squeeze(1).tolist(): 80 | for k in range(self.max_num_attri): 81 | att_id = int(attributes[idx, k]) 82 | if att_id == 0: 83 | break 84 | else: 85 | attribute_targets[idx, att_id] = 1 86 | 87 | return attribute_targets, selected_idxs 88 | 89 | def attribute_loss(self, logits, labels): 90 | if self.use_binary_loss: 91 | all_loss = F.binary_cross_entropy_with_logits(logits, labels, pos_weight=torch.FloatTensor([self.pos_weight] * self.num_attri_cat).cuda()) 92 | return all_loss 93 | else: 94 | # soft cross entropy 95 | # cross entropy attribute deteriorate the box head, even with 0.1 weight (although buttom-up top-down use cross entropy attribute) 96 | all_loss = -F.softmax(logits, dim=-1).log() 97 | all_loss = (all_loss * labels).sum(-1) / labels.sum(-1) 98 | return all_loss.mean() 99 | 100 | 101 | def make_roi_attribute_loss_evaluator(cfg): 102 | loss_evaluator = AttributeHeadLossComputation( 103 | cfg.MODEL.ROI_ATTRIBUTE_HEAD.ATTRIBUTE_LOSS_WEIGHT, 104 | cfg.MODEL.ROI_ATTRIBUTE_HEAD.NUM_ATTRIBUTES, 105 | cfg.MODEL.ROI_ATTRIBUTE_HEAD.MAX_ATTRIBUTES, 106 | cfg.MODEL.ROI_ATTRIBUTE_HEAD.ATTRIBUTE_BGFG_SAMPLE, 107 | cfg.MODEL.ROI_ATTRIBUTE_HEAD.ATTRIBUTE_BGFG_RATIO, 108 | cfg.MODEL.ROI_ATTRIBUTE_HEAD.USE_BINARY_LOSS, 109 | cfg.MODEL.ROI_ATTRIBUTE_HEAD.POS_WEIGHT, 110 | ) 111 | 112 | return loss_evaluator 113 | -------------------------------------------------------------------------------- /sg-benchmark/maskrcnn_benchmark/modeling/roi_heads/attribute_head/roi_attribute_predictors.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from maskrcnn_benchmark.modeling import registry 3 | from torch import nn 4 | 5 | 6 | @registry.ROI_ATTRIBUTE_PREDICTOR.register("FastRCNNPredictor") 7 | class FastRCNNPredictor(nn.Module): 8 | def __init__(self, config, in_channels): 9 | super(FastRCNNPredictor, self).__init__() 10 | assert in_channels is not None 11 | num_inputs = in_channels 12 | 13 | num_attributes = cfg.MODEL.ROI_ATTRIBUTE_HEAD.NUM_ATTRIBUTES 14 | self.avgpool = nn.AdaptiveAvgPool2d(1) 15 | self.att_score = nn.Linear(num_inputs, num_attributes) 16 | 17 | nn.init.normal_(self.att_score.weight, mean=0, std=0.01) 18 | nn.init.constant_(self.att_score.bias, 0) 19 | 20 | def forward(self, x): 21 | x = self.avgpool(x) 22 | x = x.view(x.size(0), -1) 23 | att_logit = self.att_score(x) 24 | 25 | return att_logit 26 | 27 | 28 | @registry.ROI_ATTRIBUTE_PREDICTOR.register("FPNPredictor") 29 | class FPNPredictor(nn.Module): 30 | def __init__(self, cfg, in_channels): 31 | super(FPNPredictor, self).__init__() 32 | num_attributes = cfg.MODEL.ROI_ATTRIBUTE_HEAD.NUM_ATTRIBUTES 33 | representation_size = in_channels 34 | 35 | self.att_score = nn.Linear(representation_size, num_attributes) 36 | 37 | nn.init.normal_(self.att_score.weight, std=0.01) 38 | nn.init.constant_(self.att_score.bias, 0) 39 | 40 | def forward(self, x): 41 | if x.ndimension() == 4: 42 | assert list(x.shape[2:]) == [1, 1] 43 | x = x.view(x.size(0), -1) 44 | 45 | att_logit = self.att_score(x) 46 | 47 | return att_logit 48 | 49 | 50 | def make_roi_attribute_predictor(cfg, in_channels): 51 | func = registry.ROI_ATTRIBUTE_PREDICTOR[cfg.MODEL.ROI_ATTRIBUTE_HEAD.PREDICTOR] 52 | return func(cfg, in_channels) 53 | -------------------------------------------------------------------------------- /sg-benchmark/maskrcnn_benchmark/modeling/roi_heads/box_head/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CYVincent/Scene-Graph-Transformer-CogTree/955d76403d6b17e4b35cb09b4669d8b3990a94ee/sg-benchmark/maskrcnn_benchmark/modeling/roi_heads/box_head/__init__.py -------------------------------------------------------------------------------- /sg-benchmark/maskrcnn_benchmark/modeling/roi_heads/box_head/loss.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | from torch.nn import functional as F 4 | 5 | from maskrcnn_benchmark.layers import smooth_l1_loss 6 | from maskrcnn_benchmark.modeling.box_coder import BoxCoder 7 | from maskrcnn_benchmark.modeling.matcher import Matcher 8 | from maskrcnn_benchmark.structures.boxlist_ops import boxlist_iou 9 | from maskrcnn_benchmark.modeling.balanced_positive_negative_sampler import ( 10 | BalancedPositiveNegativeSampler 11 | ) 12 | from maskrcnn_benchmark.modeling.utils import cat 13 | 14 | 15 | class FastRCNNLossComputation(object): 16 | """ 17 | Computes the loss for Faster R-CNN. 18 | Also supports FPN 19 | """ 20 | 21 | def __init__(self, cls_agnostic_bbox_reg=False): 22 | self.cls_agnostic_bbox_reg = cls_agnostic_bbox_reg 23 | 24 | def assign_label_to_proposals(self, proposals, targets): 25 | for img_idx, (target, proposal) in enumerate(zip(targets, proposals)): 26 | match_quality_matrix = boxlist_iou(target, proposal) 27 | matched_idxs = self.proposal_matcher(match_quality_matrix) 28 | # Fast RCNN only need "labels" field for selecting the targets 29 | target = target.copy_with_fields(["labels", "attributes"]) 30 | matched_targets = target[matched_idxs.clamp(min=0)] 31 | 32 | labels_per_image = matched_targets.get_field("labels").to(dtype=torch.int64) 33 | attris_per_image = matched_targets.get_field("attributes").to(dtype=torch.int64) 34 | 35 | labels_per_image[matched_idxs < 0] = 0 36 | attris_per_image[matched_idxs < 0, :] = 0 37 | proposals[img_idx].add_field("labels", labels_per_image) 38 | proposals[img_idx].add_field("attributes", attris_per_image) 39 | return proposals 40 | 41 | 42 | def __call__(self, class_logits, box_regression, proposals): 43 | """ 44 | Computes the loss for Faster R-CNN. 45 | This requires that the subsample method has been called beforehand. 46 | 47 | Arguments: 48 | class_logits (list[Tensor]) 49 | box_regression (list[Tensor]) 50 | proposals (list[BoxList]) 51 | 52 | Returns: 53 | classification_loss (Tensor) 54 | box_loss (Tensor) 55 | """ 56 | 57 | class_logits = cat(class_logits, dim=0) 58 | box_regression = cat(box_regression, dim=0) 59 | device = class_logits.device 60 | 61 | labels = cat([proposal.get_field("labels") for proposal in proposals], dim=0) 62 | regression_targets = cat([proposal.get_field("regression_targets") for proposal in proposals], dim=0) 63 | 64 | classification_loss = F.cross_entropy(class_logits, labels.long()) 65 | 66 | # get indices that correspond to the regression targets for 67 | # the corresponding ground truth labels, to be used with 68 | # advanced indexing 69 | sampled_pos_inds_subset = torch.nonzero(labels > 0).squeeze(1) 70 | labels_pos = labels[sampled_pos_inds_subset] 71 | if self.cls_agnostic_bbox_reg: 72 | map_inds = torch.tensor([4, 5, 6, 7], device=device) 73 | else: 74 | map_inds = 4 * labels_pos[:, None] + torch.tensor([0, 1, 2, 3], device=device) 75 | 76 | box_loss = smooth_l1_loss( 77 | box_regression[sampled_pos_inds_subset[:, None], map_inds], 78 | regression_targets[sampled_pos_inds_subset], 79 | size_average=False, 80 | beta=1, 81 | ) 82 | box_loss = box_loss / labels.numel() 83 | 84 | return classification_loss, box_loss 85 | 86 | 87 | def make_roi_box_loss_evaluator(cfg): 88 | cls_agnostic_bbox_reg = cfg.MODEL.CLS_AGNOSTIC_BBOX_REG 89 | 90 | loss_evaluator = FastRCNNLossComputation(cls_agnostic_bbox_reg) 91 | 92 | return loss_evaluator 93 | -------------------------------------------------------------------------------- /sg-benchmark/maskrcnn_benchmark/modeling/roi_heads/box_head/roi_box_predictors.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from maskrcnn_benchmark.modeling import registry 3 | from torch import nn 4 | 5 | 6 | @registry.ROI_BOX_PREDICTOR.register("FastRCNNPredictor") 7 | class FastRCNNPredictor(nn.Module): 8 | def __init__(self, config, in_channels): 9 | super(FastRCNNPredictor, self).__init__() 10 | assert in_channels is not None 11 | num_inputs = in_channels 12 | 13 | num_classes = config.MODEL.ROI_BOX_HEAD.NUM_CLASSES 14 | self.avgpool = nn.AdaptiveAvgPool2d(1) 15 | self.cls_score = nn.Linear(num_inputs, num_classes) 16 | num_bbox_reg_classes = 2 if config.MODEL.CLS_AGNOSTIC_BBOX_REG else num_classes 17 | self.bbox_pred = nn.Linear(num_inputs, num_bbox_reg_classes * 4) 18 | 19 | nn.init.normal_(self.cls_score.weight, mean=0, std=0.01) 20 | nn.init.constant_(self.cls_score.bias, 0) 21 | 22 | nn.init.normal_(self.bbox_pred.weight, mean=0, std=0.001) 23 | nn.init.constant_(self.bbox_pred.bias, 0) 24 | 25 | def forward(self, x): 26 | x = self.avgpool(x) 27 | x = x.view(x.size(0), -1) 28 | cls_logit = self.cls_score(x) 29 | bbox_pred = self.bbox_pred(x) 30 | return cls_logit, bbox_pred 31 | 32 | 33 | @registry.ROI_BOX_PREDICTOR.register("FPNPredictor") 34 | class FPNPredictor(nn.Module): 35 | def __init__(self, cfg, in_channels): 36 | super(FPNPredictor, self).__init__() 37 | num_classes = cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES 38 | representation_size = in_channels 39 | 40 | self.cls_score = nn.Linear(representation_size, num_classes) 41 | num_bbox_reg_classes = 2 if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG else num_classes 42 | self.bbox_pred = nn.Linear(representation_size, num_bbox_reg_classes * 4) 43 | 44 | nn.init.normal_(self.cls_score.weight, std=0.01) 45 | nn.init.normal_(self.bbox_pred.weight, std=0.001) 46 | for l in [self.cls_score, self.bbox_pred]: 47 | nn.init.constant_(l.bias, 0) 48 | 49 | def forward(self, x): 50 | if x.ndimension() == 4: 51 | assert list(x.shape[2:]) == [1, 1] 52 | x = x.view(x.size(0), -1) 53 | cls_logit = self.cls_score(x) 54 | bbox_pred = self.bbox_pred(x) 55 | 56 | return cls_logit, bbox_pred 57 | 58 | 59 | def make_roi_box_predictor(cfg, in_channels): 60 | func = registry.ROI_BOX_PREDICTOR[cfg.MODEL.ROI_BOX_HEAD.PREDICTOR] 61 | return func(cfg, in_channels) 62 | -------------------------------------------------------------------------------- /sg-benchmark/maskrcnn_benchmark/modeling/roi_heads/keypoint_head/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CYVincent/Scene-Graph-Transformer-CogTree/955d76403d6b17e4b35cb09b4669d8b3990a94ee/sg-benchmark/maskrcnn_benchmark/modeling/roi_heads/keypoint_head/__init__.py -------------------------------------------------------------------------------- /sg-benchmark/maskrcnn_benchmark/modeling/roi_heads/keypoint_head/keypoint_head.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from .roi_keypoint_feature_extractors import make_roi_keypoint_feature_extractor 4 | from .roi_keypoint_predictors import make_roi_keypoint_predictor 5 | from .inference import make_roi_keypoint_post_processor 6 | from .loss import make_roi_keypoint_loss_evaluator 7 | 8 | 9 | class ROIKeypointHead(torch.nn.Module): 10 | def __init__(self, cfg, in_channels): 11 | super(ROIKeypointHead, self).__init__() 12 | self.cfg = cfg.clone() 13 | self.feature_extractor = make_roi_keypoint_feature_extractor(cfg, in_channels) 14 | self.predictor = make_roi_keypoint_predictor( 15 | cfg, self.feature_extractor.out_channels) 16 | self.post_processor = make_roi_keypoint_post_processor(cfg) 17 | self.loss_evaluator = make_roi_keypoint_loss_evaluator(cfg) 18 | 19 | def forward(self, features, proposals, targets=None): 20 | """ 21 | Arguments: 22 | features (list[Tensor]): feature-maps from possibly several levels 23 | proposals (list[BoxList]): proposal boxes 24 | targets (list[BoxList], optional): the ground-truth targets. 25 | 26 | Returns: 27 | x (Tensor): the result of the feature extractor 28 | proposals (list[BoxList]): during training, the original proposals 29 | are returned. During testing, the predicted boxlists are returned 30 | with the `mask` field set 31 | losses (dict[Tensor]): During training, returns the losses for the 32 | head. During testing, returns an empty dict. 33 | """ 34 | if self.training: 35 | with torch.no_grad(): 36 | proposals = self.loss_evaluator.subsample(proposals, targets) 37 | 38 | x = self.feature_extractor(features, proposals) 39 | kp_logits = self.predictor(x) 40 | 41 | if not self.training: 42 | result = self.post_processor(kp_logits, proposals) 43 | return x, result, {} 44 | 45 | loss_kp = self.loss_evaluator(proposals, kp_logits) 46 | 47 | return x, proposals, dict(loss_kp=loss_kp) 48 | 49 | 50 | def build_roi_keypoint_head(cfg, in_channels): 51 | return ROIKeypointHead(cfg, in_channels) 52 | -------------------------------------------------------------------------------- /sg-benchmark/maskrcnn_benchmark/modeling/roi_heads/keypoint_head/roi_keypoint_feature_extractors.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | from torch.nn import functional as F 3 | 4 | from maskrcnn_benchmark.modeling import registry 5 | from maskrcnn_benchmark.modeling.poolers import Pooler 6 | 7 | from maskrcnn_benchmark.layers import Conv2d 8 | 9 | 10 | @registry.ROI_KEYPOINT_FEATURE_EXTRACTORS.register("KeypointRCNNFeatureExtractor") 11 | class KeypointRCNNFeatureExtractor(nn.Module): 12 | def __init__(self, cfg, in_channels): 13 | super(KeypointRCNNFeatureExtractor, self).__init__() 14 | 15 | resolution = cfg.MODEL.ROI_KEYPOINT_HEAD.POOLER_RESOLUTION 16 | scales = cfg.MODEL.ROI_KEYPOINT_HEAD.POOLER_SCALES 17 | sampling_ratio = cfg.MODEL.ROI_KEYPOINT_HEAD.POOLER_SAMPLING_RATIO 18 | pooler = Pooler( 19 | output_size=(resolution, resolution), 20 | scales=scales, 21 | sampling_ratio=sampling_ratio, 22 | ) 23 | self.pooler = pooler 24 | 25 | input_features = in_channels 26 | layers = cfg.MODEL.ROI_KEYPOINT_HEAD.CONV_LAYERS 27 | next_feature = input_features 28 | self.blocks = [] 29 | for layer_idx, layer_features in enumerate(layers, 1): 30 | layer_name = "conv_fcn{}".format(layer_idx) 31 | module = Conv2d(next_feature, layer_features, 3, stride=1, padding=1) 32 | nn.init.kaiming_normal_(module.weight, mode="fan_out", nonlinearity="relu") 33 | nn.init.constant_(module.bias, 0) 34 | self.add_module(layer_name, module) 35 | next_feature = layer_features 36 | self.blocks.append(layer_name) 37 | self.out_channels = layer_features 38 | 39 | def forward(self, x, proposals): 40 | x = self.pooler(x, proposals) 41 | for layer_name in self.blocks: 42 | x = F.relu(getattr(self, layer_name)(x)) 43 | return x 44 | 45 | 46 | def make_roi_keypoint_feature_extractor(cfg, in_channels): 47 | func = registry.ROI_KEYPOINT_FEATURE_EXTRACTORS[ 48 | cfg.MODEL.ROI_KEYPOINT_HEAD.FEATURE_EXTRACTOR 49 | ] 50 | return func(cfg, in_channels) 51 | -------------------------------------------------------------------------------- /sg-benchmark/maskrcnn_benchmark/modeling/roi_heads/keypoint_head/roi_keypoint_predictors.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | 3 | from maskrcnn_benchmark import layers 4 | from maskrcnn_benchmark.modeling import registry 5 | 6 | 7 | @registry.ROI_KEYPOINT_PREDICTOR.register("KeypointRCNNPredictor") 8 | class KeypointRCNNPredictor(nn.Module): 9 | def __init__(self, cfg, in_channels): 10 | super(KeypointRCNNPredictor, self).__init__() 11 | input_features = in_channels 12 | num_keypoints = cfg.MODEL.ROI_KEYPOINT_HEAD.NUM_CLASSES 13 | deconv_kernel = 4 14 | self.kps_score_lowres = layers.ConvTranspose2d( 15 | input_features, 16 | num_keypoints, 17 | deconv_kernel, 18 | stride=2, 19 | padding=deconv_kernel // 2 - 1, 20 | ) 21 | nn.init.kaiming_normal_( 22 | self.kps_score_lowres.weight, mode="fan_out", nonlinearity="relu" 23 | ) 24 | nn.init.constant_(self.kps_score_lowres.bias, 0) 25 | self.up_scale = 2 26 | self.out_channels = num_keypoints 27 | 28 | def forward(self, x): 29 | x = self.kps_score_lowres(x) 30 | x = layers.interpolate( 31 | x, scale_factor=self.up_scale, mode="bilinear", align_corners=False 32 | ) 33 | return x 34 | 35 | 36 | def make_roi_keypoint_predictor(cfg, in_channels): 37 | func = registry.ROI_KEYPOINT_PREDICTOR[cfg.MODEL.ROI_KEYPOINT_HEAD.PREDICTOR] 38 | return func(cfg, in_channels) 39 | -------------------------------------------------------------------------------- /sg-benchmark/maskrcnn_benchmark/modeling/roi_heads/mask_head/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CYVincent/Scene-Graph-Transformer-CogTree/955d76403d6b17e4b35cb09b4669d8b3990a94ee/sg-benchmark/maskrcnn_benchmark/modeling/roi_heads/mask_head/__init__.py -------------------------------------------------------------------------------- /sg-benchmark/maskrcnn_benchmark/modeling/roi_heads/mask_head/mask_head.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | from torch import nn 4 | 5 | from maskrcnn_benchmark.structures.bounding_box import BoxList 6 | 7 | from .roi_mask_feature_extractors import make_roi_mask_feature_extractor 8 | from .roi_mask_predictors import make_roi_mask_predictor 9 | from .inference import make_roi_mask_post_processor 10 | from .loss import make_roi_mask_loss_evaluator 11 | 12 | 13 | def keep_only_positive_boxes(boxes): 14 | """ 15 | Given a set of BoxList containing the `labels` field, 16 | return a set of BoxList for which `labels > 0`. 17 | 18 | Arguments: 19 | boxes (list of BoxList) 20 | """ 21 | assert isinstance(boxes, (list, tuple)) 22 | assert isinstance(boxes[0], BoxList) 23 | assert boxes[0].has_field("labels") 24 | positive_boxes = [] 25 | positive_inds = [] 26 | num_boxes = 0 27 | for boxes_per_image in boxes: 28 | labels = boxes_per_image.get_field("labels") 29 | inds_mask = labels > 0 30 | inds = inds_mask.nonzero().squeeze(1) 31 | positive_boxes.append(boxes_per_image[inds]) 32 | positive_inds.append(inds_mask) 33 | return positive_boxes, positive_inds 34 | 35 | 36 | class ROIMaskHead(torch.nn.Module): 37 | def __init__(self, cfg, in_channels): 38 | super(ROIMaskHead, self).__init__() 39 | self.cfg = cfg.clone() 40 | self.feature_extractor = make_roi_mask_feature_extractor(cfg, in_channels) 41 | self.predictor = make_roi_mask_predictor( 42 | cfg, self.feature_extractor.out_channels) 43 | self.post_processor = make_roi_mask_post_processor(cfg) 44 | self.loss_evaluator = make_roi_mask_loss_evaluator(cfg) 45 | 46 | def forward(self, features, proposals, targets=None): 47 | """ 48 | Arguments: 49 | features (list[Tensor]): feature-maps from possibly several levels 50 | proposals (list[BoxList]): proposal boxes 51 | targets (list[BoxList], optional): the ground-truth targets. 52 | 53 | Returns: 54 | x (Tensor): the result of the feature extractor 55 | proposals (list[BoxList]): during training, the original proposals 56 | are returned. During testing, the predicted boxlists are returned 57 | with the `mask` field set 58 | losses (dict[Tensor]): During training, returns the losses for the 59 | head. During testing, returns an empty dict. 60 | """ 61 | 62 | if self.training: 63 | # during training, only focus on positive boxes 64 | all_proposals = proposals 65 | proposals, positive_inds = keep_only_positive_boxes(proposals) 66 | if self.training and self.cfg.MODEL.ROI_MASK_HEAD.SHARE_BOX_FEATURE_EXTRACTOR: 67 | x = features 68 | x = x[torch.cat(positive_inds, dim=0)] 69 | else: 70 | x = self.feature_extractor(features, proposals) 71 | mask_logits = self.predictor(x) 72 | 73 | if not self.training: 74 | result = self.post_processor(mask_logits, proposals) 75 | return x, result, {} 76 | 77 | loss_mask = self.loss_evaluator(proposals, mask_logits, targets) 78 | 79 | return x, all_proposals, dict(loss_mask=loss_mask) 80 | 81 | 82 | def build_roi_mask_head(cfg, in_channels): 83 | return ROIMaskHead(cfg, in_channels) 84 | -------------------------------------------------------------------------------- /sg-benchmark/maskrcnn_benchmark/modeling/roi_heads/mask_head/roi_mask_feature_extractors.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from torch import nn 3 | from torch.nn import functional as F 4 | 5 | from ..box_head.roi_box_feature_extractors import ResNet50Conv5ROIFeatureExtractor 6 | from maskrcnn_benchmark.modeling import registry 7 | from maskrcnn_benchmark.modeling.poolers import Pooler 8 | from maskrcnn_benchmark.modeling.make_layers import make_conv3x3 9 | 10 | 11 | registry.ROI_MASK_FEATURE_EXTRACTORS.register( 12 | "ResNet50Conv5ROIFeatureExtractor", ResNet50Conv5ROIFeatureExtractor 13 | ) 14 | 15 | 16 | @registry.ROI_MASK_FEATURE_EXTRACTORS.register("MaskRCNNFPNFeatureExtractor") 17 | class MaskRCNNFPNFeatureExtractor(nn.Module): 18 | """ 19 | Heads for FPN for classification 20 | """ 21 | 22 | def __init__(self, cfg, in_channels): 23 | """ 24 | Arguments: 25 | num_classes (int): number of output classes 26 | input_size (int): number of channels of the input once it's flattened 27 | representation_size (int): size of the intermediate representation 28 | """ 29 | super(MaskRCNNFPNFeatureExtractor, self).__init__() 30 | 31 | resolution = cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION 32 | scales = cfg.MODEL.ROI_MASK_HEAD.POOLER_SCALES 33 | sampling_ratio = cfg.MODEL.ROI_MASK_HEAD.POOLER_SAMPLING_RATIO 34 | pooler = Pooler( 35 | output_size=(resolution, resolution), 36 | scales=scales, 37 | sampling_ratio=sampling_ratio, 38 | ) 39 | input_size = in_channels 40 | self.pooler = pooler 41 | 42 | use_gn = cfg.MODEL.ROI_MASK_HEAD.USE_GN 43 | layers = cfg.MODEL.ROI_MASK_HEAD.CONV_LAYERS 44 | dilation = cfg.MODEL.ROI_MASK_HEAD.DILATION 45 | 46 | next_feature = input_size 47 | self.blocks = [] 48 | for layer_idx, layer_features in enumerate(layers, 1): 49 | layer_name = "mask_fcn{}".format(layer_idx) 50 | module = make_conv3x3( 51 | next_feature, layer_features, 52 | dilation=dilation, stride=1, use_gn=use_gn 53 | ) 54 | self.add_module(layer_name, module) 55 | next_feature = layer_features 56 | self.blocks.append(layer_name) 57 | self.out_channels = layer_features 58 | 59 | def forward(self, x, proposals): 60 | x = self.pooler(x, proposals) 61 | 62 | for layer_name in self.blocks: 63 | x = F.relu(getattr(self, layer_name)(x)) 64 | 65 | return x 66 | 67 | 68 | def make_roi_mask_feature_extractor(cfg, in_channels): 69 | func = registry.ROI_MASK_FEATURE_EXTRACTORS[ 70 | cfg.MODEL.ROI_MASK_HEAD.FEATURE_EXTRACTOR 71 | ] 72 | return func(cfg, in_channels) 73 | -------------------------------------------------------------------------------- /sg-benchmark/maskrcnn_benchmark/modeling/roi_heads/mask_head/roi_mask_predictors.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from torch import nn 3 | from torch.nn import functional as F 4 | 5 | from maskrcnn_benchmark.layers import Conv2d 6 | from maskrcnn_benchmark.layers import ConvTranspose2d 7 | from maskrcnn_benchmark.modeling import registry 8 | 9 | 10 | @registry.ROI_MASK_PREDICTOR.register("MaskRCNNC4Predictor") 11 | class MaskRCNNC4Predictor(nn.Module): 12 | def __init__(self, cfg, in_channels): 13 | super(MaskRCNNC4Predictor, self).__init__() 14 | num_classes = cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES 15 | dim_reduced = cfg.MODEL.ROI_MASK_HEAD.CONV_LAYERS[-1] 16 | num_inputs = in_channels 17 | 18 | self.conv5_mask = ConvTranspose2d(num_inputs, dim_reduced, 2, 2, 0) 19 | self.mask_fcn_logits = Conv2d(dim_reduced, num_classes, 1, 1, 0) 20 | 21 | for name, param in self.named_parameters(): 22 | if "bias" in name: 23 | nn.init.constant_(param, 0) 24 | elif "weight" in name: 25 | # Caffe2 implementation uses MSRAFill, which in fact 26 | # corresponds to kaiming_normal_ in PyTorch 27 | nn.init.kaiming_normal_(param, mode="fan_out", nonlinearity="relu") 28 | 29 | def forward(self, x): 30 | x = F.relu(self.conv5_mask(x)) 31 | return self.mask_fcn_logits(x) 32 | 33 | 34 | @registry.ROI_MASK_PREDICTOR.register("MaskRCNNConv1x1Predictor") 35 | class MaskRCNNConv1x1Predictor(nn.Module): 36 | def __init__(self, cfg, in_channels): 37 | super(MaskRCNNConv1x1Predictor, self).__init__() 38 | num_classes = cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES 39 | num_inputs = in_channels 40 | 41 | self.mask_fcn_logits = Conv2d(num_inputs, num_classes, 1, 1, 0) 42 | 43 | for name, param in self.named_parameters(): 44 | if "bias" in name: 45 | nn.init.constant_(param, 0) 46 | elif "weight" in name: 47 | # Caffe2 implementation uses MSRAFill, which in fact 48 | # corresponds to kaiming_normal_ in PyTorch 49 | nn.init.kaiming_normal_(param, mode="fan_out", nonlinearity="relu") 50 | 51 | def forward(self, x): 52 | return self.mask_fcn_logits(x) 53 | 54 | 55 | def make_roi_mask_predictor(cfg, in_channels): 56 | func = registry.ROI_MASK_PREDICTOR[cfg.MODEL.ROI_MASK_HEAD.PREDICTOR] 57 | return func(cfg, in_channels) 58 | -------------------------------------------------------------------------------- /sg-benchmark/maskrcnn_benchmark/modeling/roi_heads/relation_head/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CYVincent/Scene-Graph-Transformer-CogTree/955d76403d6b17e4b35cb09b4669d8b3990a94ee/sg-benchmark/maskrcnn_benchmark/modeling/roi_heads/relation_head/__init__.py -------------------------------------------------------------------------------- /sg-benchmark/maskrcnn_benchmark/modeling/roi_heads/roi_heads.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | 4 | from .box_head.box_head import build_roi_box_head 5 | from .mask_head.mask_head import build_roi_mask_head 6 | from .attribute_head.attribute_head import build_roi_attribute_head 7 | from .keypoint_head.keypoint_head import build_roi_keypoint_head 8 | from .relation_head.relation_head import build_roi_relation_head 9 | 10 | 11 | class CombinedROIHeads(torch.nn.ModuleDict): 12 | """ 13 | Combines a set of individual heads (for box prediction or masks) into a single 14 | head. 15 | """ 16 | 17 | def __init__(self, cfg, heads): 18 | super(CombinedROIHeads, self).__init__(heads) 19 | self.cfg = cfg.clone() 20 | if cfg.MODEL.MASK_ON and cfg.MODEL.ROI_MASK_HEAD.SHARE_BOX_FEATURE_EXTRACTOR: 21 | self.mask.feature_extractor = self.box.feature_extractor 22 | if cfg.MODEL.KEYPOINT_ON and cfg.MODEL.ROI_KEYPOINT_HEAD.SHARE_BOX_FEATURE_EXTRACTOR: 23 | self.keypoint.feature_extractor = self.box.feature_extractor 24 | 25 | def forward(self, features, proposals, targets=None, logger=None): 26 | losses = {} 27 | x, detections, loss_box = self.box(features, proposals, targets) 28 | if not self.cfg.MODEL.RELATION_ON: 29 | # During the relationship training stage, the bbox_proposal_network should be fixed, and no loss. 30 | losses.update(loss_box) 31 | 32 | if self.cfg.MODEL.ATTRIBUTE_ON: 33 | # Attribute head don't have a separate feature extractor 34 | z, detections, loss_attribute = self.attribute(features, detections, targets) 35 | losses.update(loss_attribute) 36 | 37 | if self.cfg.MODEL.MASK_ON: 38 | mask_features = features 39 | # optimization: during training, if we share the feature extractor between 40 | # the box and the mask heads, then we can reuse the features already computed 41 | if ( 42 | self.training 43 | and self.cfg.MODEL.ROI_MASK_HEAD.SHARE_BOX_FEATURE_EXTRACTOR 44 | ): 45 | mask_features = x 46 | # During training, self.box() will return the unaltered proposals as "detections" 47 | # this makes the API consistent during training and testing 48 | x, detections, loss_mask = self.mask(mask_features, detections, targets) 49 | losses.update(loss_mask) 50 | 51 | if self.cfg.MODEL.KEYPOINT_ON: 52 | keypoint_features = features 53 | # optimization: during training, if we share the feature extractor between 54 | # the box and the mask heads, then we can reuse the features already computed 55 | if ( 56 | self.training 57 | and self.cfg.MODEL.ROI_KEYPOINT_HEAD.SHARE_BOX_FEATURE_EXTRACTOR 58 | ): 59 | keypoint_features = x 60 | # During training, self.box() will return the unaltered proposals as "detections" 61 | # this makes the API consistent during training and testing 62 | x, detections, loss_keypoint = self.keypoint(keypoint_features, detections, targets) 63 | losses.update(loss_keypoint) 64 | 65 | if self.cfg.MODEL.RELATION_ON: 66 | # it may be not safe to share features due to post processing 67 | # During training, self.box() will return the unaltered proposals as "detections" 68 | # this makes the API consistent during training and testing 69 | x, detections, loss_relation = self.relation(features, detections, targets, logger) 70 | losses.update(loss_relation) 71 | 72 | return x, detections, losses 73 | 74 | 75 | def build_roi_heads(cfg, in_channels): 76 | # individually create the heads, that will be combined together 77 | # afterwards 78 | roi_heads = [] 79 | if cfg.MODEL.RETINANET_ON: 80 | return [] 81 | 82 | if not cfg.MODEL.RPN_ONLY: 83 | roi_heads.append(("box", build_roi_box_head(cfg, in_channels))) 84 | if cfg.MODEL.MASK_ON: 85 | roi_heads.append(("mask", build_roi_mask_head(cfg, in_channels))) 86 | if cfg.MODEL.KEYPOINT_ON: 87 | roi_heads.append(("keypoint", build_roi_keypoint_head(cfg, in_channels))) 88 | if cfg.MODEL.RELATION_ON: 89 | roi_heads.append(("relation", build_roi_relation_head(cfg, in_channels))) 90 | if cfg.MODEL.ATTRIBUTE_ON: 91 | roi_heads.append(("attribute", build_roi_attribute_head(cfg, in_channels))) 92 | 93 | # combine individual heads in a single module 94 | if roi_heads: 95 | roi_heads = CombinedROIHeads(cfg, roi_heads) 96 | 97 | return roi_heads 98 | -------------------------------------------------------------------------------- /sg-benchmark/maskrcnn_benchmark/modeling/rpn/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | # from .rpn import build_rpn 3 | -------------------------------------------------------------------------------- /sg-benchmark/maskrcnn_benchmark/modeling/rpn/retinanet/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CYVincent/Scene-Graph-Transformer-CogTree/955d76403d6b17e4b35cb09b4669d8b3990a94ee/sg-benchmark/maskrcnn_benchmark/modeling/rpn/retinanet/__init__.py -------------------------------------------------------------------------------- /sg-benchmark/maskrcnn_benchmark/modeling/rpn/retinanet/loss.py: -------------------------------------------------------------------------------- 1 | """ 2 | This file contains specific functions for computing losses on the RetinaNet 3 | file 4 | """ 5 | 6 | import torch 7 | from torch.nn import functional as F 8 | 9 | from ..utils import concat_box_prediction_layers 10 | 11 | from maskrcnn_benchmark.layers import smooth_l1_loss 12 | from maskrcnn_benchmark.layers import SigmoidFocalLoss 13 | from maskrcnn_benchmark.modeling.matcher import Matcher 14 | from maskrcnn_benchmark.modeling.utils import cat 15 | from maskrcnn_benchmark.structures.boxlist_ops import boxlist_iou 16 | from maskrcnn_benchmark.structures.boxlist_ops import cat_boxlist 17 | from maskrcnn_benchmark.modeling.rpn.loss import RPNLossComputation 18 | 19 | class RetinaNetLossComputation(RPNLossComputation): 20 | """ 21 | This class computes the RetinaNet loss. 22 | """ 23 | 24 | def __init__(self, proposal_matcher, box_coder, 25 | generate_labels_func, 26 | sigmoid_focal_loss, 27 | bbox_reg_beta=0.11, 28 | regress_norm=1.0): 29 | """ 30 | Arguments: 31 | proposal_matcher (Matcher) 32 | box_coder (BoxCoder) 33 | """ 34 | self.proposal_matcher = proposal_matcher 35 | self.box_coder = box_coder 36 | self.box_cls_loss_func = sigmoid_focal_loss 37 | self.bbox_reg_beta = bbox_reg_beta 38 | self.copied_fields = ['labels'] 39 | self.generate_labels_func = generate_labels_func 40 | self.discard_cases = ['between_thresholds'] 41 | self.regress_norm = regress_norm 42 | 43 | def __call__(self, anchors, box_cls, box_regression, targets): 44 | """ 45 | Arguments: 46 | anchors (list[BoxList]) 47 | box_cls (list[Tensor]) 48 | box_regression (list[Tensor]) 49 | targets (list[BoxList]) 50 | 51 | Returns: 52 | retinanet_cls_loss (Tensor) 53 | retinanet_regression_loss (Tensor 54 | """ 55 | anchors = [cat_boxlist(anchors_per_image) for anchors_per_image in anchors] 56 | labels, regression_targets = self.prepare_targets(anchors, targets) 57 | 58 | N = len(labels) 59 | box_cls, box_regression = \ 60 | concat_box_prediction_layers(box_cls, box_regression) 61 | 62 | labels = torch.cat(labels, dim=0) 63 | regression_targets = torch.cat(regression_targets, dim=0) 64 | pos_inds = torch.nonzero(labels > 0).squeeze(1) 65 | 66 | retinanet_regression_loss = smooth_l1_loss( 67 | box_regression[pos_inds], 68 | regression_targets[pos_inds], 69 | beta=self.bbox_reg_beta, 70 | size_average=False, 71 | ) / (max(1, pos_inds.numel() * self.regress_norm)) 72 | 73 | labels = labels.int() 74 | 75 | retinanet_cls_loss = self.box_cls_loss_func( 76 | box_cls, 77 | labels 78 | ) / (pos_inds.numel() + N) 79 | 80 | return retinanet_cls_loss, retinanet_regression_loss 81 | 82 | 83 | def generate_retinanet_labels(matched_targets): 84 | labels_per_image = matched_targets.get_field("labels") 85 | return labels_per_image 86 | 87 | 88 | def make_retinanet_loss_evaluator(cfg, box_coder): 89 | matcher = Matcher( 90 | cfg.MODEL.RETINANET.FG_IOU_THRESHOLD, 91 | cfg.MODEL.RETINANET.BG_IOU_THRESHOLD, 92 | allow_low_quality_matches=True, 93 | ) 94 | sigmoid_focal_loss = SigmoidFocalLoss( 95 | cfg.MODEL.RETINANET.LOSS_GAMMA, 96 | cfg.MODEL.RETINANET.LOSS_ALPHA 97 | ) 98 | 99 | loss_evaluator = RetinaNetLossComputation( 100 | matcher, 101 | box_coder, 102 | generate_retinanet_labels, 103 | sigmoid_focal_loss, 104 | bbox_reg_beta = cfg.MODEL.RETINANET.BBOX_REG_BETA, 105 | regress_norm = cfg.MODEL.RETINANET.BBOX_REG_WEIGHT, 106 | ) 107 | return loss_evaluator 108 | -------------------------------------------------------------------------------- /sg-benchmark/maskrcnn_benchmark/modeling/rpn/utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | """ 3 | Utility functions minipulating the prediction layers 4 | """ 5 | 6 | from ..utils import cat 7 | 8 | import torch 9 | 10 | def permute_and_flatten(layer, N, A, C, H, W): 11 | layer = layer.view(N, -1, C, H, W) 12 | layer = layer.permute(0, 3, 4, 1, 2) 13 | layer = layer.reshape(N, -1, C) 14 | return layer 15 | 16 | 17 | def concat_box_prediction_layers(box_cls, box_regression): 18 | box_cls_flattened = [] 19 | box_regression_flattened = [] 20 | # for each feature level, permute the outputs to make them be in the 21 | # same format as the labels. Note that the labels are computed for 22 | # all feature levels concatenated, so we keep the same representation 23 | # for the objectness and the box_regression 24 | for box_cls_per_level, box_regression_per_level in zip( 25 | box_cls, box_regression 26 | ): 27 | N, AxC, H, W = box_cls_per_level.shape 28 | Ax4 = box_regression_per_level.shape[1] 29 | A = Ax4 // 4 30 | C = AxC // A 31 | box_cls_per_level = permute_and_flatten( 32 | box_cls_per_level, N, A, C, H, W 33 | ) 34 | box_cls_flattened.append(box_cls_per_level) 35 | 36 | box_regression_per_level = permute_and_flatten( 37 | box_regression_per_level, N, A, 4, H, W 38 | ) 39 | box_regression_flattened.append(box_regression_per_level) 40 | # concatenate on the first dimension (representing the feature levels), to 41 | # take into account the way the labels were generated (with all feature maps 42 | # being concatenated as well) 43 | box_cls = cat(box_cls_flattened, dim=1).reshape(-1, C) 44 | box_regression = cat(box_regression_flattened, dim=1).reshape(-1, 4) 45 | return box_cls, box_regression 46 | -------------------------------------------------------------------------------- /sg-benchmark/maskrcnn_benchmark/modeling/utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | """ 3 | Miscellaneous utility functions 4 | """ 5 | 6 | import torch 7 | 8 | 9 | def cat(tensors, dim=0): 10 | """ 11 | Efficient version of torch.cat that avoids a copy if there is only a single element in a list 12 | """ 13 | assert isinstance(tensors, (list, tuple)) 14 | if len(tensors) == 1: 15 | return tensors[0] 16 | return torch.cat(tensors, dim) 17 | -------------------------------------------------------------------------------- /sg-benchmark/maskrcnn_benchmark/solver/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from .build import make_optimizer 3 | from .build import make_lr_scheduler 4 | from .lr_scheduler import WarmupMultiStepLR, WarmupReduceLROnPlateau 5 | 6 | -------------------------------------------------------------------------------- /sg-benchmark/maskrcnn_benchmark/solver/build.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | 4 | from .lr_scheduler import WarmupMultiStepLR, WarmupReduceLROnPlateau, WarmupCosineLR, WarmupCosineMStepLR, WarmupConsrantCosineLR 5 | 6 | 7 | def make_optimizer(cfg, model, logger, slow_heads=None, slow_ratio=5.0, rl_factor=1.0): 8 | params = [] 9 | for key, value in model.named_parameters(): 10 | if not value.requires_grad: 11 | continue 12 | lr = cfg.SOLVER.BASE_LR 13 | weight_decay = cfg.SOLVER.WEIGHT_DECAY 14 | if "bias" in key: 15 | lr = cfg.SOLVER.BASE_LR * cfg.SOLVER.BIAS_LR_FACTOR 16 | weight_decay = cfg.SOLVER.WEIGHT_DECAY_BIAS 17 | if slow_heads is not None: 18 | for item in slow_heads: 19 | if item in key: 20 | logger.info("SLOW HEADS: {} is slow down by ratio of {}.".format(key, str(slow_ratio))) 21 | lr = lr / slow_ratio 22 | break 23 | params += [{"params": [value], "lr": lr * rl_factor, "weight_decay": weight_decay}] 24 | 25 | optimizer = torch.optim.SGD(params, lr=cfg.SOLVER.BASE_LR, momentum=cfg.SOLVER.MOMENTUM) 26 | return optimizer 27 | 28 | 29 | def make_lr_scheduler(cfg, optimizer, logger=None): 30 | if cfg.SOLVER.SCHEDULE.TYPE == "WarmupMultiStepLR": 31 | return WarmupMultiStepLR( 32 | optimizer, 33 | cfg.SOLVER.STEPS, 34 | cfg.SOLVER.GAMMA, 35 | warmup_factor=cfg.SOLVER.WARMUP_FACTOR, 36 | warmup_iters=cfg.SOLVER.WARMUP_ITERS, 37 | warmup_method=cfg.SOLVER.WARMUP_METHOD, 38 | ) 39 | 40 | elif cfg.SOLVER.SCHEDULE.TYPE == "WarmupReduceLROnPlateau": 41 | return WarmupReduceLROnPlateau( 42 | optimizer, 43 | cfg.SOLVER.SCHEDULE.FACTOR, 44 | warmup_factor=cfg.SOLVER.WARMUP_FACTOR, 45 | warmup_iters=cfg.SOLVER.WARMUP_ITERS, 46 | warmup_method=cfg.SOLVER.WARMUP_METHOD, 47 | patience=cfg.SOLVER.SCHEDULE.PATIENCE, 48 | threshold=cfg.SOLVER.SCHEDULE.THRESHOLD, 49 | cooldown=cfg.SOLVER.SCHEDULE.COOLDOWN, 50 | logger=logger, 51 | ) 52 | elif cfg.SOLVER.SCHEDULE.TYPE == "WarmupCosineLR": 53 | return WarmupCosineLR( 54 | optimizer, 55 | T_max=cfg.SOLVER.MAX_ITER, 56 | warmup_factor=cfg.SOLVER.WARMUP_FACTOR, 57 | warmup_iters=cfg.SOLVER.WARMUP_ITERS, 58 | warmup_method=cfg.SOLVER.WARMUP_METHOD, 59 | eta_min=cfg.SOLVER.ETA_MIN * cfg.SOLVER.IMS_PER_BATCH, 60 | ) 61 | 62 | elif cfg.SOLVER.SCHEDULE.TYPE == "WarmupCosineMstepLR": 63 | return WarmupCosineMStepLR( 64 | optimizer, 65 | cfg.SOLVER.STEPS, 66 | cfg.SOLVER.GAMMA, 67 | warmup_factor=cfg.SOLVER.WARMUP_FACTOR, 68 | warmup_iters=cfg.SOLVER.WARMUP_ITERS, 69 | warmup_method=cfg.SOLVER.WARMUP_METHOD, 70 | eta_min=cfg.SOLVER.ETA_MIN * cfg.SOLVER.IMS_PER_BATCH, 71 | ) 72 | elif cfg.SOLVER.SCHEDULE.TYPE == "WarmupConsrantCosineLR": 73 | return WarmupConsrantCosineLR( 74 | optimizer, 75 | cfg.SOLVER.STEPS, 76 | T_max=cfg.SOLVER.MAX_ITER, 77 | warmup_factor=cfg.SOLVER.WARMUP_FACTOR, 78 | warmup_iters=cfg.SOLVER.WARMUP_ITERS, 79 | warmup_method=cfg.SOLVER.WARMUP_METHOD, 80 | eta_min=cfg.SOLVER.ETA_MIN * cfg.SOLVER.IMS_PER_BATCH, 81 | ) 82 | else: 83 | raise ValueError("Invalid Schedule Type") 84 | -------------------------------------------------------------------------------- /sg-benchmark/maskrcnn_benchmark/structures/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CYVincent/Scene-Graph-Transformer-CogTree/955d76403d6b17e4b35cb09b4669d8b3990a94ee/sg-benchmark/maskrcnn_benchmark/structures/__init__.py -------------------------------------------------------------------------------- /sg-benchmark/maskrcnn_benchmark/structures/image_list.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from __future__ import division 3 | 4 | import torch 5 | 6 | 7 | class ImageList(object): 8 | """ 9 | Structure that holds a list of images (of possibly 10 | varying sizes) as a single tensor. 11 | This works by padding the images to the same size, 12 | and storing in a field the original sizes of each image 13 | """ 14 | 15 | def __init__(self, tensors, image_sizes): 16 | """ 17 | Arguments: 18 | tensors (tensor) 19 | image_sizes (list[tuple[int, int]]) 20 | """ 21 | self.tensors = tensors 22 | self.image_sizes = image_sizes 23 | 24 | def to(self, *args, **kwargs): 25 | cast_tensor = self.tensors.to(*args, **kwargs) 26 | return ImageList(cast_tensor, self.image_sizes) 27 | 28 | 29 | def to_image_list(tensors, size_divisible=0): 30 | """ 31 | tensors can be an ImageList, a torch.Tensor or 32 | an iterable of Tensors. It can't be a numpy array. 33 | When tensors is an iterable of Tensors, it pads 34 | the Tensors with zeros so that they have the same 35 | shape 36 | """ 37 | if isinstance(tensors, torch.Tensor) and size_divisible > 0: 38 | tensors = [tensors] 39 | 40 | if isinstance(tensors, ImageList): 41 | return tensors 42 | elif isinstance(tensors, torch.Tensor): 43 | # single tensor shape can be inferred 44 | if tensors.dim() == 3: 45 | tensors = tensors[None] 46 | assert tensors.dim() == 4 47 | image_sizes = [tensor.shape[-2:] for tensor in tensors] 48 | return ImageList(tensors, image_sizes) 49 | elif isinstance(tensors, (tuple, list)): 50 | max_size = tuple(max(s) for s in zip(*[img.shape for img in tensors])) 51 | 52 | # TODO Ideally, just remove this and let me model handle arbitrary 53 | # input sizs 54 | if size_divisible > 0: 55 | import math 56 | 57 | stride = size_divisible 58 | max_size = list(max_size) 59 | max_size[1] = int(math.ceil(max_size[1] / stride) * stride) 60 | max_size[2] = int(math.ceil(max_size[2] / stride) * stride) 61 | max_size = tuple(max_size) 62 | 63 | batch_shape = (len(tensors),) + max_size 64 | batched_imgs = tensors[0].new(*batch_shape).zero_() 65 | for img, pad_img in zip(tensors, batched_imgs): 66 | pad_img[: img.shape[0], : img.shape[1], : img.shape[2]].copy_(img) 67 | 68 | image_sizes = [im.shape[-2:] for im in tensors] 69 | 70 | return ImageList(batched_imgs, image_sizes) 71 | else: 72 | raise TypeError("Unsupported type for to_image_list: {}".format(type(tensors))) 73 | -------------------------------------------------------------------------------- /sg-benchmark/maskrcnn_benchmark/utils/README.md: -------------------------------------------------------------------------------- 1 | # Utility functions 2 | 3 | This folder contain utility functions that are not used in the 4 | core library, but are useful for building models or training 5 | code using the config system. 6 | -------------------------------------------------------------------------------- /sg-benchmark/maskrcnn_benchmark/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CYVincent/Scene-Graph-Transformer-CogTree/955d76403d6b17e4b35cb09b4669d8b3990a94ee/sg-benchmark/maskrcnn_benchmark/utils/__init__.py -------------------------------------------------------------------------------- /sg-benchmark/maskrcnn_benchmark/utils/collect_env.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import PIL 3 | 4 | from torch.utils.collect_env import get_pretty_env_info 5 | 6 | 7 | def get_pil_version(): 8 | return "\n Pillow ({})".format(PIL.__version__) 9 | 10 | 11 | def collect_env_info(): 12 | env_str = get_pretty_env_info() 13 | env_str += get_pil_version() 14 | return env_str 15 | -------------------------------------------------------------------------------- /sg-benchmark/maskrcnn_benchmark/utils/comm.py: -------------------------------------------------------------------------------- 1 | """ 2 | This file contains primitives for multi-gpu communication. 3 | This is useful when doing distributed training. 4 | """ 5 | 6 | import pickle 7 | import time 8 | 9 | import torch 10 | import torch.distributed as dist 11 | 12 | 13 | def get_world_size(): 14 | if not dist.is_available(): 15 | return 1 16 | if not dist.is_initialized(): 17 | return 1 18 | return dist.get_world_size() 19 | 20 | 21 | def get_rank(): 22 | if not dist.is_available(): 23 | return 0 24 | if not dist.is_initialized(): 25 | return 0 26 | return dist.get_rank() 27 | 28 | 29 | def is_main_process(): 30 | return get_rank() == 0 31 | 32 | 33 | def synchronize(): 34 | """ 35 | Helper function to synchronize (barrier) among all processes when 36 | using distributed training 37 | """ 38 | if not dist.is_available(): 39 | return 40 | if not dist.is_initialized(): 41 | return 42 | world_size = dist.get_world_size() 43 | if world_size == 1: 44 | return 45 | dist.barrier() 46 | 47 | 48 | def all_gather(data): 49 | """ 50 | Run all_gather on arbitrary picklable data (not necessarily tensors) 51 | Args: 52 | data: any picklable object 53 | Returns: 54 | list[data]: list of data gathered from each rank 55 | """ 56 | to_device = "cuda" 57 | #to_device = torch.device("cpu") 58 | 59 | world_size = get_world_size() 60 | if world_size == 1: 61 | return [data] 62 | 63 | # serialized to a Tensor 64 | buffer = pickle.dumps(data) 65 | storage = torch.ByteStorage.from_buffer(buffer) 66 | tensor = torch.ByteTensor(storage).to(to_device) 67 | 68 | # obtain Tensor size of each rank 69 | local_size = torch.LongTensor([tensor.numel()]).to(to_device) 70 | size_list = [torch.LongTensor([0]).to(to_device) for _ in range(world_size)] 71 | dist.all_gather(size_list, local_size) 72 | size_list = [int(size.item()) for size in size_list] 73 | max_size = max(size_list) 74 | 75 | # receiving Tensor from all ranks 76 | # we pad the tensor because torch all_gather does not support 77 | # gathering tensors of different shapes 78 | tensor_list = [] 79 | for _ in size_list: 80 | tensor_list.append(torch.ByteTensor(size=(max_size,)).to(to_device)) 81 | if local_size != max_size: 82 | padding = torch.ByteTensor(size=(max_size - local_size,)).to(to_device) 83 | tensor = torch.cat((tensor, padding), dim=0) 84 | dist.all_gather(tensor_list, tensor) 85 | 86 | data_list = [] 87 | for size, tensor in zip(size_list, tensor_list): 88 | buffer = tensor.cpu().numpy().tobytes()[:size] 89 | data_list.append(pickle.loads(buffer)) 90 | 91 | return data_list 92 | 93 | 94 | def reduce_dict(input_dict, average=True): 95 | """ 96 | Args: 97 | input_dict (dict): all the values will be reduced 98 | average (bool): whether to do average or sum 99 | Reduce the values in the dictionary from all processes so that process with rank 100 | 0 has the averaged results. Returns a dict with the same fields as 101 | input_dict, after reduction. 102 | """ 103 | world_size = get_world_size() 104 | if world_size < 2: 105 | return input_dict 106 | with torch.no_grad(): 107 | names = [] 108 | values = [] 109 | # sort the keys so that they are consistent across processes 110 | for k in sorted(input_dict.keys()): 111 | names.append(k) 112 | values.append(input_dict[k]) 113 | values = torch.stack(values, dim=0) 114 | dist.reduce(values, dst=0) 115 | if dist.get_rank() == 0 and average: 116 | # only main process gets accumulated, so only divide by 117 | # world_size in this case 118 | values /= world_size 119 | reduced_dict = {k: v for k, v in zip(names, values)} 120 | return reduced_dict 121 | -------------------------------------------------------------------------------- /sg-benchmark/maskrcnn_benchmark/utils/cv2_util.py: -------------------------------------------------------------------------------- 1 | """ 2 | Module for cv2 utility functions and maintaining version compatibility 3 | between 3.x and 4.x 4 | """ 5 | import cv2 6 | 7 | 8 | def findContours(*args, **kwargs): 9 | """ 10 | Wraps cv2.findContours to maintain compatiblity between versions 11 | 3 and 4 12 | 13 | Returns: 14 | contours, hierarchy 15 | """ 16 | if cv2.__version__.startswith('4'): 17 | contours, hierarchy = cv2.findContours(*args, **kwargs) 18 | elif cv2.__version__.startswith('3'): 19 | _, contours, hierarchy = cv2.findContours(*args, **kwargs) 20 | else: 21 | raise AssertionError( 22 | 'cv2 must be either version 3 or 4 to call this method') 23 | 24 | return contours, hierarchy 25 | -------------------------------------------------------------------------------- /sg-benchmark/maskrcnn_benchmark/utils/env.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import os 3 | 4 | from maskrcnn_benchmark.utils.imports import import_file 5 | 6 | 7 | def setup_environment(): 8 | """Perform environment setup work. The default setup is a no-op, but this 9 | function allows the user to specify a Python source file that performs 10 | custom setup work that may be necessary to their computing environment. 11 | """ 12 | custom_module_path = os.environ.get("TORCH_DETECTRON_ENV_MODULE") 13 | if custom_module_path: 14 | setup_custom_environment(custom_module_path) 15 | else: 16 | # The default setup is a no-op 17 | pass 18 | 19 | 20 | def setup_custom_environment(custom_module_path): 21 | """Load custom environment setup from a Python source file and run the setup 22 | function. 23 | """ 24 | module = import_file("maskrcnn_benchmark.utils.env.custom_module", custom_module_path) 25 | assert hasattr(module, "setup_environment") and callable( 26 | module.setup_environment 27 | ), ( 28 | "Custom environment module defined in {} does not have the " 29 | "required callable attribute 'setup_environment'." 30 | ).format( 31 | custom_module_path 32 | ) 33 | module.setup_environment() 34 | 35 | 36 | # Force environment setup when this module is imported 37 | setup_environment() 38 | -------------------------------------------------------------------------------- /sg-benchmark/maskrcnn_benchmark/utils/imports.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | 4 | if torch._six.PY3: 5 | import importlib 6 | import importlib.util 7 | import sys 8 | 9 | 10 | # from https://stackoverflow.com/questions/67631/how-to-import-a-module-given-the-full-path?utm_medium=organic&utm_source=google_rich_qa&utm_campaign=google_rich_qa 11 | def import_file(module_name, file_path, make_importable=False): 12 | spec = importlib.util.spec_from_file_location(module_name, file_path) 13 | module = importlib.util.module_from_spec(spec) 14 | spec.loader.exec_module(module) 15 | if make_importable: 16 | sys.modules[module_name] = module 17 | return module 18 | else: 19 | import imp 20 | 21 | def import_file(module_name, file_path, make_importable=None): 22 | module = imp.load_source(module_name, file_path) 23 | return module 24 | -------------------------------------------------------------------------------- /sg-benchmark/maskrcnn_benchmark/utils/logger.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import logging 3 | import os 4 | import sys 5 | 6 | DEBUG_PRINT_ON = True 7 | 8 | def debug_print(logger, info): 9 | if DEBUG_PRINT_ON: 10 | logger.info('#'*20+' '+info+' '+'#'*20) 11 | 12 | def setup_logger(name, save_dir, distributed_rank, filename="log.txt"): 13 | logger = logging.getLogger(name) 14 | logger.setLevel(logging.DEBUG) 15 | # don't log results for the non-master process 16 | if distributed_rank > 0: 17 | return logger 18 | ch = logging.StreamHandler(stream=sys.stdout) 19 | ch.setLevel(logging.DEBUG) 20 | formatter = logging.Formatter("%(asctime)s %(name)s %(levelname)s: %(message)s") 21 | ch.setFormatter(formatter) 22 | logger.addHandler(ch) 23 | 24 | if save_dir: 25 | fh = logging.FileHandler(os.path.join(save_dir, filename)) 26 | fh.setLevel(logging.DEBUG) 27 | fh.setFormatter(formatter) 28 | logger.addHandler(fh) 29 | 30 | return logger 31 | -------------------------------------------------------------------------------- /sg-benchmark/maskrcnn_benchmark/utils/metric_logger.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from collections import defaultdict 3 | from collections import deque 4 | 5 | import torch 6 | 7 | 8 | class SmoothedValue(object): 9 | """Track a series of values and provide access to smoothed values over a 10 | window or the global series average. 11 | """ 12 | 13 | def __init__(self, window_size=20): 14 | self.deque = deque(maxlen=window_size) 15 | self.series = [] 16 | self.total = 0.0 17 | self.count = 0 18 | 19 | def update(self, value): 20 | self.deque.append(value) 21 | self.series.append(value) 22 | self.count += 1 23 | self.total += value 24 | 25 | @property 26 | def median(self): 27 | d = torch.tensor(list(self.deque)) 28 | return d.median().item() 29 | 30 | @property 31 | def avg(self): 32 | d = torch.tensor(list(self.deque)) 33 | return d.mean().item() 34 | 35 | @property 36 | def global_avg(self): 37 | return self.total / self.count 38 | 39 | 40 | class MetricLogger(object): 41 | def __init__(self, delimiter="\t"): 42 | self.meters = defaultdict(SmoothedValue) 43 | self.delimiter = delimiter 44 | 45 | def update(self, **kwargs): 46 | for k, v in kwargs.items(): 47 | if isinstance(v, torch.Tensor): 48 | v = v.item() 49 | assert isinstance(v, (float, int)) 50 | self.meters[k].update(v) 51 | 52 | def __getattr__(self, attr): 53 | if attr in self.meters: 54 | return self.meters[attr] 55 | if attr in self.__dict__: 56 | return self.__dict__[attr] 57 | raise AttributeError("'{}' object has no attribute '{}'".format( 58 | type(self).__name__, attr)) 59 | 60 | def __str__(self): 61 | loss_str = [] 62 | for name, meter in self.meters.items(): 63 | loss_str.append( 64 | "{}: {:.4f} ({:.4f})".format(name, meter.median, meter.global_avg) 65 | ) 66 | return self.delimiter.join(loss_str) 67 | -------------------------------------------------------------------------------- /sg-benchmark/maskrcnn_benchmark/utils/miscellaneous.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import errno 3 | import json 4 | import logging 5 | import os 6 | from .comm import is_main_process 7 | import numpy as np 8 | 9 | from maskrcnn_benchmark.structures.bounding_box import BoxList 10 | from maskrcnn_benchmark.structures.boxlist_ops import boxlist_iou 11 | 12 | def mkdir(path): 13 | try: 14 | os.makedirs(path) 15 | except OSError as e: 16 | if e.errno != errno.EEXIST: 17 | raise 18 | 19 | 20 | def save_labels(dataset_list, output_dir): 21 | if is_main_process(): 22 | logger = logging.getLogger(__name__) 23 | 24 | ids_to_labels = {} 25 | for dataset in dataset_list: 26 | if hasattr(dataset, 'categories'): 27 | ids_to_labels.update(dataset.categories) 28 | else: 29 | logger.warning("Dataset [{}] has no categories attribute, labels.json file won't be created".format( 30 | dataset.__class__.__name__)) 31 | 32 | if ids_to_labels: 33 | labels_file = os.path.join(output_dir, 'labels.json') 34 | logger.info("Saving labels mapping into {}".format(labels_file)) 35 | with open(labels_file, 'w') as f: 36 | json.dump(ids_to_labels, f, indent=2) 37 | 38 | 39 | def save_config(cfg, path): 40 | if is_main_process(): 41 | with open(path, 'w') as f: 42 | f.write(cfg.dump()) 43 | 44 | 45 | def intersect_2d(x1, x2): 46 | """ 47 | Given two arrays [m1, n], [m2,n], returns a [m1, m2] array where each entry is True if those 48 | rows match. 49 | :param x1: [m1, n] numpy array 50 | :param x2: [m2, n] numpy array 51 | :return: [m1, m2] bool array of the intersections 52 | """ 53 | if x1.shape[1] != x2.shape[1]: 54 | raise ValueError("Input arrays must have same #columns") 55 | 56 | # This performs a matrix multiplication-esque thing between the two arrays 57 | # Instead of summing, we want the equality, so we reduce in that way 58 | res = (x1[..., None] == x2.T[None, ...]).all(1) 59 | return res 60 | 61 | def argsort_desc(scores): 62 | """ 63 | Returns the indices that sort scores descending in a smart way 64 | :param scores: Numpy array of arbitrary size 65 | :return: an array of size [numel(scores), dim(scores)] where each row is the index you'd 66 | need to get the score. 67 | """ 68 | return np.column_stack(np.unravel_index(np.argsort(-scores.ravel()), scores.shape)) 69 | 70 | def bbox_overlaps(boxes1, boxes2): 71 | """ 72 | Parameters: 73 | boxes1 (m, 4) [List or np.array] : bounding boxes of (x1,y1,x2,y2) 74 | boxes2 (n, 4) [List or np.array] : bounding boxes of (x1,y1,x2,y2) 75 | Return: 76 | iou (m, n) [np.array] 77 | """ 78 | boxes1 = BoxList(boxes1, (0, 0), 'xyxy') 79 | boxes2 = BoxList(boxes2, (0, 0), 'xyxy') 80 | iou = boxlist_iou(boxes1, boxes2).cpu().numpy() 81 | return iou 82 | -------------------------------------------------------------------------------- /sg-benchmark/maskrcnn_benchmark/utils/model_serialization.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from collections import OrderedDict 3 | import logging 4 | 5 | import torch 6 | 7 | from maskrcnn_benchmark.utils.imports import import_file 8 | 9 | 10 | def align_and_update_state_dicts(model_state_dict, loaded_state_dict, load_mapping): 11 | """ 12 | Strategy: suppose that the models that we will create will have prefixes appended 13 | to each of its keys, for example due to an extra level of nesting that the original 14 | pre-trained weights from ImageNet won't contain. For example, model.state_dict() 15 | might return backbone[0].body.res2.conv1.weight, while the pre-trained model contains 16 | res2.conv1.weight. We thus want to match both parameters together. 17 | For that, we look for each model weight, look among all loaded keys if there is one 18 | that is a suffix of the current weight name, and use it if that's the case. 19 | If multiple matches exist, take the one with longest size 20 | of the corresponding name. For example, for the same model as before, the pretrained 21 | weight file can contain both res2.conv1.weight, as well as conv1.weight. In this case, 22 | we want to match backbone[0].body.conv1.weight to conv1.weight, and 23 | backbone[0].body.res2.conv1.weight to res2.conv1.weight. 24 | """ 25 | logger = logging.getLogger(__name__) 26 | current_keys = sorted(list(model_state_dict.keys())) 27 | loaded_keys = sorted(list(loaded_state_dict.keys())) 28 | # get a matrix of string matches, where each (i, j) entry correspond to the size of the 29 | # loaded_key string, if it matches 30 | # NOTE: Kaihua Tang, since some modules of current model will be initialized from assigned layer of 31 | # loaded model, we use load_mapping to do such operation 32 | mapped_current_keys = current_keys.copy() 33 | for i, key in enumerate(mapped_current_keys): 34 | for source_key, target_key in load_mapping.items(): 35 | if source_key in key: 36 | mapped_current_keys[i] = key.replace(source_key, target_key) 37 | logger.info("MAPPING {} in current model to {} in loaded model.".format(key, mapped_current_keys[i])) 38 | 39 | match_matrix = [ 40 | len(j) if i.endswith(j) else 0 for i in mapped_current_keys for j in loaded_keys 41 | ] 42 | match_matrix = torch.as_tensor(match_matrix).view( 43 | len(current_keys), len(loaded_keys) 44 | ) 45 | max_match_size, idxs = match_matrix.max(1) 46 | # remove indices that correspond to no-match 47 | idxs[max_match_size == 0] = -1 48 | 49 | # used for logging 50 | max_size = max([len(key) for key in current_keys]) if current_keys else 1 51 | max_size_loaded = max([len(key) for key in loaded_keys]) if loaded_keys else 1 52 | log_str_template = "REMATCHING! {: <{}} loaded from {: <{}} of shape {}" 53 | for idx_new, idx_old in enumerate(idxs.tolist()): 54 | if idx_old == -1: 55 | key = current_keys[idx_new] 56 | logger.info("NO-MATCHING of current module: {} of shape {}".format(key, 57 | tuple(model_state_dict[key].shape))) 58 | continue 59 | key = current_keys[idx_new] 60 | key_old = loaded_keys[idx_old] 61 | model_state_dict[key] = loaded_state_dict[key_old] 62 | # add a control gate for this logger (it's too large) 63 | if ((not key.startswith('module.')) and key != key_old) or (key.startswith('module.') and key[7:] != key_old): 64 | logger.info( 65 | log_str_template.format( 66 | key, 67 | max_size, 68 | key_old, 69 | max_size_loaded, 70 | tuple(loaded_state_dict[key_old].shape), 71 | ) 72 | ) 73 | 74 | 75 | def strip_prefix_if_present(state_dict, prefix): 76 | keys = sorted(state_dict.keys()) 77 | if not all(key.startswith(prefix) for key in keys): 78 | return state_dict 79 | stripped_state_dict = OrderedDict() 80 | for key, value in state_dict.items(): 81 | stripped_state_dict[key.replace(prefix, "")] = value 82 | return stripped_state_dict 83 | 84 | 85 | def load_state_dict(model, loaded_state_dict, load_mapping): 86 | model_state_dict = model.state_dict() 87 | # if the state_dict comes from a model that was wrapped in a 88 | # DataParallel or DistributedDataParallel during serialization, 89 | # remove the "module" prefix before performing the matching 90 | loaded_state_dict = strip_prefix_if_present(loaded_state_dict, prefix="module.") 91 | align_and_update_state_dicts(model_state_dict, loaded_state_dict, load_mapping) 92 | 93 | # use strict loading 94 | model.load_state_dict(model_state_dict) 95 | -------------------------------------------------------------------------------- /sg-benchmark/maskrcnn_benchmark/utils/model_zoo.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import os 3 | import sys 4 | 5 | try: 6 | from torch.hub import _download_url_to_file 7 | from torch.hub import urlparse 8 | from torch.hub import HASH_REGEX 9 | except ImportError: 10 | from torch.utils.model_zoo import _download_url_to_file 11 | from torch.utils.model_zoo import urlparse 12 | from torch.utils.model_zoo import HASH_REGEX 13 | 14 | from maskrcnn_benchmark.utils.comm import is_main_process 15 | from maskrcnn_benchmark.utils.comm import synchronize 16 | 17 | 18 | # very similar to https://github.com/pytorch/pytorch/blob/master/torch/utils/model_zoo.py 19 | # but with a few improvements and modifications 20 | def cache_url(url, model_dir=None, progress=True): 21 | r"""Loads the Torch serialized object at the given URL. 22 | If the object is already present in `model_dir`, it's deserialized and 23 | returned. The filename part of the URL should follow the naming convention 24 | ``filename-.ext`` where ```` is the first eight or more 25 | digits of the SHA256 hash of the contents of the file. The hash is used to 26 | ensure unique names and to verify the contents of the file. 27 | The default value of `model_dir` is ``$TORCH_HOME/models`` where 28 | ``$TORCH_HOME`` defaults to ``~/.torch``. The default directory can be 29 | overridden with the ``$TORCH_MODEL_ZOO`` environment variable. 30 | Args: 31 | url (string): URL of the object to download 32 | model_dir (string, optional): directory in which to save the object 33 | progress (bool, optional): whether or not to display a progress bar to stderr 34 | Example: 35 | >>> cached_file = maskrcnn_benchmark.utils.model_zoo.cache_url('https://s3.amazonaws.com/pytorch/models/resnet18-5c106cde.pth') 36 | """ 37 | if model_dir is None: 38 | torch_home = os.path.expanduser(os.getenv("TORCH_HOME", "~/.torch")) 39 | model_dir = os.getenv("TORCH_MODEL_ZOO", os.path.join(torch_home, "models")) 40 | if not os.path.exists(model_dir): 41 | os.makedirs(model_dir) 42 | parts = urlparse(url) 43 | filename = os.path.basename(parts.path) 44 | if filename == "model_final.pkl": 45 | # workaround as pre-trained Caffe2 models from Detectron have all the same filename 46 | # so make the full path the filename by replacing / with _ 47 | filename = parts.path.replace("/", "_") 48 | cached_file = os.path.join(model_dir, filename) 49 | if not os.path.exists(cached_file) and is_main_process(): 50 | sys.stderr.write('Downloading: "{}" to {}\n'.format(url, cached_file)) 51 | hash_prefix = HASH_REGEX.search(filename) 52 | if hash_prefix is not None: 53 | hash_prefix = hash_prefix.group(1) 54 | # workaround: Caffe2 models don't have a hash, but follow the R-50 convention, 55 | # which matches the hash PyTorch uses. So we skip the hash matching 56 | # if the hash_prefix is less than 6 characters 57 | if len(hash_prefix) < 6: 58 | hash_prefix = None 59 | _download_url_to_file(url, cached_file, hash_prefix, progress=progress) 60 | synchronize() 61 | return cached_file 62 | -------------------------------------------------------------------------------- /sg-benchmark/maskrcnn_benchmark/utils/registry.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | 3 | 4 | def _register_generic(module_dict, module_name, module): 5 | assert module_name not in module_dict 6 | module_dict[module_name] = module 7 | 8 | 9 | class Registry(dict): 10 | ''' 11 | A helper class for managing registering modules, it extends a dictionary 12 | and provides a register functions. 13 | 14 | Eg. creeting a registry: 15 | some_registry = Registry({"default": default_module}) 16 | 17 | There're two ways of registering new modules: 18 | 1): normal way is just calling register function: 19 | def foo(): 20 | ... 21 | some_registry.register("foo_module", foo) 22 | 2): used as decorator when declaring the module: 23 | @some_registry.register("foo_module") 24 | @some_registry.register("foo_modeul_nickname") 25 | def foo(): 26 | ... 27 | 28 | Access of module is just like using a dictionary, eg: 29 | f = some_registry["foo_modeul"] 30 | ''' 31 | def __init__(self, *args, **kwargs): 32 | super(Registry, self).__init__(*args, **kwargs) 33 | 34 | def register(self, module_name, module=None): 35 | # used as function call 36 | if module is not None: 37 | _register_generic(self, module_name, module) 38 | return 39 | 40 | # used as decorator 41 | def register_fn(fn): 42 | _register_generic(self, module_name, fn) 43 | return fn 44 | 45 | return register_fn 46 | -------------------------------------------------------------------------------- /sg-benchmark/maskrcnn_benchmark/utils/timer.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | 3 | 4 | import time 5 | import datetime 6 | 7 | 8 | class Timer(object): 9 | def __init__(self): 10 | self.reset() 11 | 12 | @property 13 | def average_time(self): 14 | return self.total_time / self.calls if self.calls > 0 else 0.0 15 | 16 | def tic(self): 17 | # using time.time instead of time.clock because time time.clock 18 | # does not normalize for multithreading 19 | self.start_time = time.time() 20 | 21 | def toc(self, average=True): 22 | self.add(time.time() - self.start_time) 23 | if average: 24 | return self.average_time 25 | else: 26 | return self.diff 27 | 28 | def add(self, time_diff): 29 | self.diff = time_diff 30 | self.total_time += self.diff 31 | self.calls += 1 32 | 33 | def reset(self): 34 | self.total_time = 0.0 35 | self.calls = 0 36 | self.start_time = 0.0 37 | self.diff = 0.0 38 | 39 | def avg_time_str(self): 40 | time_str = str(datetime.timedelta(seconds=self.average_time)) 41 | return time_str 42 | 43 | 44 | def get_time_str(time_diff): 45 | time_str = str(datetime.timedelta(seconds=time_diff)) 46 | return time_str 47 | -------------------------------------------------------------------------------- /sg-benchmark/requirements.txt: -------------------------------------------------------------------------------- 1 | ninja 2 | yacs 3 | cython 4 | matplotlib 5 | tqdm 6 | -------------------------------------------------------------------------------- /sg-benchmark/setup.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #!/usr/bin/env python 3 | 4 | import glob 5 | import os 6 | 7 | import torch 8 | from setuptools import find_packages 9 | from setuptools import setup 10 | from torch.utils.cpp_extension import CUDA_HOME 11 | from torch.utils.cpp_extension import CppExtension 12 | from torch.utils.cpp_extension import CUDAExtension 13 | 14 | requirements = ["torch", "torchvision"] 15 | 16 | 17 | def get_extensions(): 18 | this_dir = os.path.dirname(os.path.abspath(__file__)) 19 | extensions_dir = os.path.join(this_dir, "maskrcnn_benchmark", "csrc") 20 | 21 | main_file = glob.glob(os.path.join(extensions_dir, "*.cpp")) 22 | source_cpu = glob.glob(os.path.join(extensions_dir, "cpu", "*.cpp")) 23 | source_cuda = glob.glob(os.path.join(extensions_dir, "cuda", "*.cu")) 24 | 25 | sources = main_file + source_cpu 26 | extension = CppExtension 27 | 28 | extra_compile_args = {"cxx": []} 29 | define_macros = [] 30 | 31 | if (torch.cuda.is_available() and CUDA_HOME is not None) or os.getenv("FORCE_CUDA", "0") == "1": 32 | extension = CUDAExtension 33 | sources += source_cuda 34 | define_macros += [("WITH_CUDA", None)] 35 | extra_compile_args["nvcc"] = [ 36 | "-DCUDA_HAS_FP16=1", 37 | "-D__CUDA_NO_HALF_OPERATORS__", 38 | "-D__CUDA_NO_HALF_CONVERSIONS__", 39 | "-D__CUDA_NO_HALF2_OPERATORS__", 40 | ] 41 | 42 | sources = [os.path.join(extensions_dir, s) for s in sources] 43 | 44 | include_dirs = [extensions_dir] 45 | 46 | ext_modules = [ 47 | extension( 48 | "maskrcnn_benchmark._C", 49 | sources, 50 | include_dirs=include_dirs, 51 | define_macros=define_macros, 52 | extra_compile_args=extra_compile_args, 53 | ) 54 | ] 55 | 56 | return ext_modules 57 | 58 | 59 | setup( 60 | name="maskrcnn_benchmark", 61 | version="0.1", 62 | author="fmassa", 63 | url="https://github.com/facebookresearch/maskrcnn-benchmark", 64 | description="object detection in pytorch", 65 | packages=find_packages(exclude=("configs", "tests",)), 66 | # install_requires=requirements, 67 | ext_modules=get_extensions(), 68 | cmdclass={"build_ext": torch.utils.cpp_extension.BuildExtension}, 69 | ) 70 | -------------------------------------------------------------------------------- /sg-benchmark/tools/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CYVincent/Scene-Graph-Transformer-CogTree/955d76403d6b17e4b35cb09b4669d8b3990a94ee/sg-benchmark/tools/__init__.py -------------------------------------------------------------------------------- /sg-benchmark/tools/detector_pretest_net.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | # Set up custom environment before nearly anything else is imported 3 | # NOTE: this should be the first import (no not reorder) 4 | from maskrcnn_benchmark.utils.env import setup_environment # noqa F401 isort:skip 5 | 6 | import argparse 7 | import os 8 | 9 | import torch 10 | from maskrcnn_benchmark.config import cfg 11 | from maskrcnn_benchmark.data import make_data_loader 12 | from maskrcnn_benchmark.engine.inference import inference 13 | from maskrcnn_benchmark.modeling.detector import build_detection_model 14 | from maskrcnn_benchmark.utils.checkpoint import DetectronCheckpointer 15 | from maskrcnn_benchmark.utils.collect_env import collect_env_info 16 | from maskrcnn_benchmark.utils.comm import synchronize, get_rank 17 | from maskrcnn_benchmark.utils.logger import setup_logger 18 | from maskrcnn_benchmark.utils.miscellaneous import mkdir 19 | 20 | # Check if we can enable mixed-precision via apex.amp 21 | try: 22 | from apex import amp 23 | except ImportError: 24 | raise ImportError('Use APEX for mixed precision via apex.amp') 25 | 26 | 27 | def main(): 28 | parser = argparse.ArgumentParser(description="PyTorch Object Detection Inference") 29 | parser.add_argument( 30 | "--config-file", 31 | default="/private/home/fmassa/github/detectron.pytorch_v2/configs/e2e_faster_rcnn_R_50_C4_1x_caffe2.yaml", 32 | metavar="FILE", 33 | help="path to config file", 34 | ) 35 | parser.add_argument("--local_rank", type=int, default=0) 36 | parser.add_argument( 37 | "opts", 38 | help="Modify config options using the command-line", 39 | default=None, 40 | nargs=argparse.REMAINDER, 41 | ) 42 | 43 | args = parser.parse_args() 44 | 45 | num_gpus = int(os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 46 | distributed = num_gpus > 1 47 | 48 | if distributed: 49 | torch.cuda.set_device(args.local_rank) 50 | torch.distributed.init_process_group( 51 | backend="nccl", init_method="env://" 52 | ) 53 | synchronize() 54 | 55 | cfg.merge_from_file(args.config_file) 56 | cfg.merge_from_list(args.opts) 57 | cfg.freeze() 58 | 59 | save_dir = "" 60 | logger = setup_logger("maskrcnn_benchmark", save_dir, get_rank()) 61 | logger.info("Using {} GPUs".format(num_gpus)) 62 | logger.info(cfg) 63 | 64 | logger.info("Collecting env info (might take some time)") 65 | logger.info("\n" + collect_env_info()) 66 | 67 | model = build_detection_model(cfg) 68 | model.to(cfg.MODEL.DEVICE) 69 | 70 | # Initialize mixed-precision if necessary 71 | use_mixed_precision = cfg.DTYPE == 'float16' 72 | amp_handle = amp.init(enabled=use_mixed_precision, verbose=cfg.AMP_VERBOSE) 73 | 74 | output_dir = cfg.OUTPUT_DIR 75 | checkpointer = DetectronCheckpointer(cfg, model, save_dir=output_dir) 76 | _ = checkpointer.load(cfg.MODEL.WEIGHT) 77 | 78 | iou_types = ("bbox",) 79 | if cfg.MODEL.MASK_ON: 80 | iou_types = iou_types + ("segm",) 81 | if cfg.MODEL.KEYPOINT_ON: 82 | iou_types = iou_types + ("keypoints",) 83 | if cfg.MODEL.RELATION_ON: 84 | iou_types = iou_types + ("relations", ) 85 | if cfg.MODEL.ATTRIBUTE_ON: 86 | iou_types = iou_types + ("attributes", ) 87 | 88 | output_folders = [None] * len(cfg.DATASETS.TEST) 89 | dataset_names = cfg.DATASETS.TEST 90 | if cfg.OUTPUT_DIR: 91 | for idx, dataset_name in enumerate(dataset_names): 92 | output_folder = os.path.join(cfg.OUTPUT_DIR, "inference", dataset_name) 93 | mkdir(output_folder) 94 | output_folders[idx] = output_folder 95 | data_loaders_val = make_data_loader(cfg, mode='val', is_distributed=distributed) # mode=val for fast visualization 96 | for output_folder, dataset_name, data_loader_val in zip(output_folders, dataset_names, data_loaders_val): 97 | inference( 98 | cfg, 99 | model, 100 | data_loader_val, 101 | dataset_name=dataset_name, 102 | iou_types=iou_types, 103 | box_only=False if cfg.MODEL.RETINANET_ON else cfg.MODEL.RPN_ONLY, 104 | device=cfg.MODEL.DEVICE, 105 | expected_results=cfg.TEST.EXPECTED_RESULTS, 106 | expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL, 107 | output_folder=output_folder, 108 | ) 109 | synchronize() 110 | 111 | 112 | if __name__ == "__main__": 113 | main() 114 | -------------------------------------------------------------------------------- /sg-benchmark/tools/relation_test_net.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | # Set up custom environment before nearly anything else is imported 3 | # NOTE: this should be the first import (no not reorder) 4 | from maskrcnn_benchmark.utils.env import setup_environment # noqa F401 isort:skip 5 | 6 | import argparse 7 | import os 8 | 9 | import torch 10 | from maskrcnn_benchmark.config import cfg 11 | from maskrcnn_benchmark.data import make_data_loader 12 | from maskrcnn_benchmark.engine.inference import inference 13 | from maskrcnn_benchmark.modeling.detector import build_detection_model 14 | from maskrcnn_benchmark.utils.checkpoint import DetectronCheckpointer 15 | from maskrcnn_benchmark.utils.collect_env import collect_env_info 16 | from maskrcnn_benchmark.utils.comm import synchronize, get_rank 17 | from maskrcnn_benchmark.utils.logger import setup_logger 18 | from maskrcnn_benchmark.utils.miscellaneous import mkdir 19 | 20 | # Check if we can enable mixed-precision via apex.amp 21 | try: 22 | from apex import amp 23 | except ImportError: 24 | raise ImportError('Use APEX for mixed precision via apex.amp') 25 | 26 | 27 | def main(): 28 | parser = argparse.ArgumentParser(description="PyTorch Object Detection Inference") 29 | parser.add_argument( 30 | "--config-file", 31 | default="/private/home/fmassa/github/detectron.pytorch_v2/configs/e2e_faster_rcnn_R_50_C4_1x_caffe2.yaml", 32 | metavar="FILE", 33 | help="path to config file", 34 | ) 35 | parser.add_argument("--local_rank", type=int, default=0) 36 | parser.add_argument( 37 | "opts", 38 | help="Modify config options using the command-line", 39 | default=None, 40 | nargs=argparse.REMAINDER, 41 | ) 42 | 43 | args = parser.parse_args() 44 | 45 | num_gpus = int(os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1 46 | distributed = num_gpus > 1 47 | 48 | if distributed: 49 | torch.cuda.set_device(args.local_rank) 50 | torch.distributed.init_process_group( 51 | backend="nccl", init_method="env://" 52 | ) 53 | synchronize() 54 | 55 | cfg.merge_from_file(args.config_file) 56 | cfg.merge_from_list(args.opts) 57 | cfg.freeze() 58 | 59 | save_dir = "" 60 | logger = setup_logger("maskrcnn_benchmark", save_dir, get_rank()) 61 | logger.info("Using {} GPUs".format(num_gpus)) 62 | logger.info(cfg) 63 | 64 | logger.info("Collecting env info (might take some time)") 65 | logger.info("\n" + collect_env_info()) 66 | 67 | model = build_detection_model(cfg) 68 | model.to(cfg.MODEL.DEVICE) 69 | 70 | # Initialize mixed-precision if necessary 71 | use_mixed_precision = cfg.DTYPE == 'float16' 72 | amp_handle = amp.init(enabled=use_mixed_precision, verbose=cfg.AMP_VERBOSE) 73 | 74 | output_dir = cfg.OUTPUT_DIR 75 | checkpointer = DetectronCheckpointer(cfg, model, save_dir=output_dir) 76 | _ = checkpointer.load(cfg.MODEL.WEIGHT) 77 | 78 | iou_types = ("bbox",) 79 | if cfg.MODEL.MASK_ON: 80 | iou_types = iou_types + ("segm",) 81 | if cfg.MODEL.KEYPOINT_ON: 82 | iou_types = iou_types + ("keypoints",) 83 | if cfg.MODEL.RELATION_ON: 84 | iou_types = iou_types + ("relations", ) 85 | if cfg.MODEL.ATTRIBUTE_ON: 86 | iou_types = iou_types + ("attributes", ) 87 | output_folders = [None] * len(cfg.DATASETS.TEST) 88 | dataset_names = cfg.DATASETS.TEST 89 | if cfg.OUTPUT_DIR: 90 | for idx, dataset_name in enumerate(dataset_names): 91 | output_folder = os.path.join(cfg.OUTPUT_DIR, "inference", dataset_name) 92 | mkdir(output_folder) 93 | output_folders[idx] = output_folder 94 | data_loaders_val = make_data_loader(cfg, mode="test", is_distributed=distributed) 95 | for output_folder, dataset_name, data_loader_val in zip(output_folders, dataset_names, data_loaders_val): 96 | inference( 97 | cfg, 98 | model, 99 | data_loader_val, 100 | dataset_name=dataset_name, 101 | iou_types=iou_types, 102 | box_only=False if cfg.MODEL.RETINANET_ON else cfg.MODEL.RPN_ONLY, 103 | device=cfg.MODEL.DEVICE, 104 | expected_results=cfg.TEST.EXPECTED_RESULTS, 105 | expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL, 106 | output_folder=output_folder, 107 | ) 108 | synchronize() 109 | 110 | 111 | if __name__ == "__main__": 112 | main() 113 | --------------------------------------------------------------------------------