├── DATASET.md
├── INSTALL.md
├── README.md
├── nbdt
    ├── nbdt
    │   ├── __init__.py
    │   ├── analysis.py
    │   ├── bin
    │   │   ├── nbdt
    │   │   ├── nbdt-hierarchy
    │   │   └── nbdt-wnids
    │   ├── data
    │   │   ├── __init__.py
    │   │   ├── custom.py
    │   │   └── imagenet.py
    │   ├── graph.py
    │   ├── hierarchies
    │   │   └── VG150
    │   │   │   └── graph-cogtree.json
    │   ├── hierarchy.py
    │   ├── loss.py
    │   ├── model.py
    │   ├── models
    │   │   ├── __init__.py
    │   │   ├── resnet.py
    │   │   ├── utils.py
    │   │   └── wideresnet.py
    │   ├── templates
    │   │   └── tree-template.html
    │   ├── utils.py
    │   └── wnids
    │   │   └── VG150.txt
    ├── requirements.txt
    └── setup.py
└── sg-benchmark
    ├── configs
        ├── e2e_relation_X_101_32_8_FPN_1x.yaml
        └── e2e_relation_X_101_32_8_FPN_1x_transformer.yaml
    ├── maskrcnn_benchmark
        ├── __init__.py
        ├── config
        │   ├── __init__.py
        │   ├── defaults.py
        │   └── paths_catalog.py
        ├── csrc
        │   ├── ROIAlign.h
        │   ├── ROIPool.h
        │   ├── SigmoidFocalLoss.h
        │   ├── cpu
        │   │   ├── ROIAlign_cpu.cpp
        │   │   ├── nms_cpu.cpp
        │   │   └── vision.h
        │   ├── cuda
        │   │   ├── ROIAlign_cuda.cu
        │   │   ├── ROIPool_cuda.cu
        │   │   ├── SigmoidFocalLoss_cuda.cu
        │   │   ├── deform_conv_cuda.cu
        │   │   ├── deform_conv_kernel_cuda.cu
        │   │   ├── deform_pool_cuda.cu
        │   │   ├── deform_pool_kernel_cuda.cu
        │   │   ├── nms.cu
        │   │   └── vision.h
        │   ├── deform_conv.h
        │   ├── deform_pool.h
        │   ├── nms.h
        │   └── vision.cpp
        ├── data
        │   ├── README.md
        │   ├── __init__.py
        │   ├── build.py
        │   ├── collate_batch.py
        │   ├── datasets
        │   │   ├── Vrd.py
        │   │   ├── __init__.py
        │   │   ├── coco.py
        │   │   ├── concat_dataset.py
        │   │   ├── evaluation
        │   │   │   ├── __init__.py
        │   │   │   ├── coco
        │   │   │   │   ├── __init__.py
        │   │   │   │   └── coco_eval.py
        │   │   │   ├── vg
        │   │   │   │   ├── __init__.py
        │   │   │   │   ├── sgg_eval.py
        │   │   │   │   ├── vg_capgraphs_anno.json
        │   │   │   │   ├── vg_eval.py
        │   │   │   │   ├── vg_test_capgraph_anno.json
        │   │   │   │   ├── vg_test_caption_anno.json
        │   │   │   │   └── zeroshot_triplet.pytorch
        │   │   │   └── voc
        │   │   │   │   ├── __init__.py
        │   │   │   │   └── voc_eval.py
        │   │   ├── list_dataset.py
        │   │   ├── visual_genome.py
        │   │   └── voc.py
        │   ├── samplers
        │   │   ├── __init__.py
        │   │   ├── distributed.py
        │   │   ├── grouped_batch_sampler.py
        │   │   └── iteration_based_batch_sampler.py
        │   └── transforms
        │   │   ├── __init__.py
        │   │   ├── build.py
        │   │   └── transforms.py
        ├── engine
        │   ├── __init__.py
        │   ├── bbox_aug.py
        │   ├── inference.py
        │   └── trainer.py
        ├── image_retrieval
        │   ├── S2G-RETRIEVAL.md
        │   ├── __init__.py
        │   ├── dataloader.py
        │   ├── evaluation.py
        │   ├── model.py
        │   ├── modelv2.py
        │   └── preprocessing.py
        ├── layers
        │   ├── __init__.py
        │   ├── _utils.py
        │   ├── batch_norm.py
        │   ├── dcn
        │   │   ├── __init__.py
        │   │   ├── deform_conv_func.py
        │   │   ├── deform_conv_module.py
        │   │   ├── deform_pool_func.py
        │   │   └── deform_pool_module.py
        │   ├── entropy_loss.py
        │   ├── kl_div_loss.py
        │   ├── label_smoothing_loss.py
        │   ├── misc.py
        │   ├── nms.py
        │   ├── roi_align.py
        │   ├── roi_pool.py
        │   ├── sigmoid_focal_loss.py
        │   └── smooth_l1_loss.py
        ├── modeling
        │   ├── __init__.py
        │   ├── backbone
        │   │   ├── __init__.py
        │   │   ├── backbone.py
        │   │   ├── fbnet.py
        │   │   ├── fbnet_builder.py
        │   │   ├── fbnet_modeldef.py
        │   │   ├── fpn.py
        │   │   ├── resnet.py
        │   │   └── vgg.py
        │   ├── balanced_positive_negative_sampler.py
        │   ├── box_coder.py
        │   ├── detector
        │   │   ├── __init__.py
        │   │   ├── detectors.py
        │   │   └── generalized_rcnn.py
        │   ├── make_layers.py
        │   ├── matcher.py
        │   ├── poolers.py
        │   ├── registry.py
        │   ├── roi_heads
        │   │   ├── __init__.py
        │   │   ├── attribute_head
        │   │   │   ├── __init__.py
        │   │   │   ├── attribute_head.py
        │   │   │   ├── loss.py
        │   │   │   ├── roi_attribute_feature_extractors.py
        │   │   │   └── roi_attribute_predictors.py
        │   │   ├── box_head
        │   │   │   ├── __init__.py
        │   │   │   ├── box_head.py
        │   │   │   ├── inference.py
        │   │   │   ├── loss.py
        │   │   │   ├── roi_box_feature_extractors.py
        │   │   │   ├── roi_box_predictors.py
        │   │   │   └── sampling.py
        │   │   ├── keypoint_head
        │   │   │   ├── __init__.py
        │   │   │   ├── inference.py
        │   │   │   ├── keypoint_head.py
        │   │   │   ├── loss.py
        │   │   │   ├── roi_keypoint_feature_extractors.py
        │   │   │   └── roi_keypoint_predictors.py
        │   │   ├── mask_head
        │   │   │   ├── __init__.py
        │   │   │   ├── inference.py
        │   │   │   ├── loss.py
        │   │   │   ├── mask_head.py
        │   │   │   ├── roi_mask_feature_extractors.py
        │   │   │   └── roi_mask_predictors.py
        │   │   ├── relation_head
        │   │   │   ├── __init__.py
        │   │   │   ├── inference.py
        │   │   │   ├── loss.py
        │   │   │   ├── model_motifs.py
        │   │   │   ├── model_motifs_with_attribute.py
        │   │   │   ├── model_msg_passing.py
        │   │   │   ├── model_transformer.py
        │   │   │   ├── model_vctree.py
        │   │   │   ├── model_vtranse.py
        │   │   │   ├── relation_head.py
        │   │   │   ├── roi_relation_feature_extractors.py
        │   │   │   ├── roi_relation_predictors.py
        │   │   │   ├── sampling.py
        │   │   │   ├── utils_motifs.py
        │   │   │   ├── utils_relation.py
        │   │   │   ├── utils_treelstm.py
        │   │   │   └── utils_vctree.py
        │   │   └── roi_heads.py
        │   ├── rpn
        │   │   ├── __init__.py
        │   │   ├── anchor_generator.py
        │   │   ├── inference.py
        │   │   ├── loss.py
        │   │   ├── retinanet
        │   │   │   ├── __init__.py
        │   │   │   ├── inference.py
        │   │   │   ├── loss.py
        │   │   │   └── retinanet.py
        │   │   ├── rpn.py
        │   │   └── utils.py
        │   └── utils.py
        ├── solver
        │   ├── __init__.py
        │   ├── build.py
        │   └── lr_scheduler.py
        ├── structures
        │   ├── __init__.py
        │   ├── bounding_box.py
        │   ├── boxlist_ops.py
        │   ├── image_list.py
        │   ├── keypoint.py
        │   └── segmentation_mask.py
        └── utils
        │   ├── README.md
        │   ├── __init__.py
        │   ├── c2_model_loading.py
        │   ├── checkpoint.py
        │   ├── collect_env.py
        │   ├── comm.py
        │   ├── cv2_util.py
        │   ├── env.py
        │   ├── imports.py
        │   ├── logger.py
        │   ├── metric_logger.py
        │   ├── miscellaneous.py
        │   ├── model_serialization.py
        │   ├── model_zoo.py
        │   ├── registry.py
        │   └── timer.py
    ├── requirements.txt
    ├── setup.py
    └── tools
        ├── __init__.py
        ├── detector_pretest_net.py
        ├── detector_pretrain_net.py
        ├── image_retrieval_main.py
        ├── relation_test_net.py
        └── relation_train_net.py


/DATASET.md:
--------------------------------------------------------------------------------
1 | ## DATASET
2 | The following is adapted from [Danfei Xu](https://github.com/danfeiX/scene-graph-TF-release/blob/master/data_tools/README.md) and [neural-motifs](https://github.com/rowanz/neural-motifs).
3 | 
4 | ### Download:
5 | 1. Download the VG images [part1](https://cs.stanford.edu/people/rak248/VG_100K_2/images.zip) [part2](https://cs.stanford.edu/people/rak248/VG_100K_2/images2.zip). Extract these images to the file `datasets/vg/VG_100K`.
6 | 2. Download the [scene graphs](https://onedrive.live.com/embed?cid=22376FFAD72C4B64&resid=22376FFAD72C4B64%21779871&authkey=AA33n7BRpB1xa3I) and extract them to `datasets/vg/VG-SGG-with-attri.h5`.
7 | 
8 | Please check [Scene Graph Benchmark/DATASET.md](https://github.com/KaihuaTang/Scene-Graph-Benchmark.pytorch/blob/master/DATASET.md) for more details.


--------------------------------------------------------------------------------
/INSTALL.md:
--------------------------------------------------------------------------------
 1 | ## Installation
 2 | 
 3 | Most of the requirements of this projects are exactly the same as [Scene Graph Benchmark](https://github.com/KaihuaTang/Scene-Graph-Benchmark.pytorch) and [Neural-Backed Decision Trees](https://github.com/alvinwan/neural-backed-decision-trees). If you have any problem of your environment, you should check the [issues page of SG Benchmark](https://github.com/KaihuaTang/Scene-Graph-Benchmark.pytorch/issues) and [issues page of NBDT](https://github.com/alvinwan/neural-backed-decision-trees/issues) first.
 4 | 
 5 | ### Requirements:
 6 | - PyTorch >= 1.2
 7 | - torchvision >= 0.4
 8 | - cocoapi
 9 | - yacs
10 | - matplotlib
11 | - GCC >= 4.9
12 | - OpenCV
13 | 
14 | 
15 | ### Step-by-step installation
16 | 
17 | ```bash
18 | 
19 | conda create --name scene_graph_benchmark
20 | conda activate scene_graph_benchmark
21 | 
22 | # this installs the right pip and dependencies for the fresh python
23 | conda install ipython
24 | conda install scipy
25 | conda install h5py
26 | 
27 | # scene_graph_benchmark and coco api dependencies
28 | pip install ninja yacs cython matplotlib tqdm opencv-python overrides
29 | 
30 | # follow PyTorch installation in https://pytorch.org/get-started/locally/
31 | # we give the instructions for CUDA 10.0
32 | conda install pytorch==1.4.0 torchvision==0.5.0 cudatoolkit=10.1 -c pytorch
33 | 
34 | export INSTALL_DIR=$PWD
35 | 
36 | # install pycocotools
37 | cd $INSTALL_DIR
38 | git clone https://github.com/cocodataset/cocoapi.git
39 | cd cocoapi/PythonAPI
40 | python setup.py build_ext install
41 | 
42 | # install apex
43 | cd $INSTALL_DIR
44 | git clone https://github.com/NVIDIA/apex.git
45 | cd apex
46 | python setup.py install --cuda_ext --cpp_ext
47 | 
48 | # install PyTorch Detection
49 | cd $INSTALL_DIR
50 | git clone https://github.com/CYVincent/Scene-Graph-Transformer-CogTree.git
51 | cd Scene-Graph-Transformer-CogTree/sg_benchmark
52 | 
53 | # the following will install the lib with
54 | # symbolic links, so that you can modify
55 | # the files if you want and won't need to
56 | # re-build it
57 | python setup.py build develop
58 | 
59 | cd $INSTALL_DIR
60 | cd Scene-Graph-Transformer-CogTree/nbdt
61 | python setup.py develop
62 | 
63 | unset INSTALL_DIR 
64 | 
65 | 


--------------------------------------------------------------------------------
/nbdt/nbdt/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CYVincent/Scene-Graph-Transformer-CogTree/955d76403d6b17e4b35cb09b4669d8b3990a94ee/nbdt/nbdt/__init__.py


--------------------------------------------------------------------------------
/nbdt/nbdt/bin/nbdt:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | """Run evaluation on a single image, using an NBDT"""
 3 | 
 4 | from nbdt.model import SoftNBDT, HardNBDT
 5 | from pytorchcv.models.wrn_cifar import wrn28_10_cifar10
 6 | from torchvision import transforms
 7 | from nbdt.utils import DATASET_TO_CLASSES, load_image_from_path, maybe_install_wordnet
 8 | import sys
 9 | 
10 | maybe_install_wordnet()
11 | 
12 | assert len(sys.argv) > 1, "Need to pass image URL or image path as argument"
13 | 
14 | # load pretrained NBDT
15 | model = wrn28_10_cifar10()
16 | model = HardNBDT(
17 |   pretrained=True,
18 |   dataset='CIFAR10',
19 |   arch='wrn28_10_cifar10',
20 |   model=model)
21 | 
22 | # load + transform image
23 | im = load_image_from_path(sys.argv[1])
24 | transform = transforms.Compose([
25 |   transforms.Resize(32),
26 |   transforms.CenterCrop(32),
27 |   transforms.ToTensor(),
28 |   transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
29 | ])
30 | x = transform(im)[None]
31 | 
32 | # run inference
33 | outputs, decisions = model.forward_with_decisions(x)  # use `model(x)` to obtain just logits
34 | _, predicted = outputs.max(1)
35 | cls = DATASET_TO_CLASSES['CIFAR10'][predicted[0]]
36 | print('Prediction:', cls, '// Decisions:', ', '.join([
37 |     '{} ({:.2f}%)'.format(info['name'], info['prob'] * 100) for info in decisions[0]
38 | ][1:]))  # [1:] to skip the root
39 | 


--------------------------------------------------------------------------------
/nbdt/nbdt/bin/nbdt-hierarchy:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | from nbdt.hierarchy import generate_hierarchy, test_hierarchy, generate_hierarchy_vis
 4 | from nbdt.graph import get_parser
 5 | from nbdt.utils import maybe_install_wordnet
 6 | 
 7 | 
 8 | def main():
 9 |     maybe_install_wordnet()
10 |     
11 |     parser = get_parser()
12 |     args = parser.parse_args()
13 | 
14 |     generate_hierarchy(**vars(args))
15 |     test_hierarchy(args)
16 |     generate_hierarchy_vis(args)
17 | 
18 | 
19 | if __name__ == '__main__':
20 |     main()
21 | 


--------------------------------------------------------------------------------
/nbdt/nbdt/bin/nbdt-wnids:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | """Generates wnids using class names for torchvision dataset"""
 3 | 
 4 | import argparse
 5 | import torchvision
 6 | from nbdt import data
 7 | from nltk.corpus import wordnet as wn
 8 | from nbdt.graph import synset_to_wnid, write_wnids, FakeSynset
 9 | from pathlib import Path
10 | from nbdt.utils import Colors, generate_kwargs, maybe_install_wordnet
11 | import os
12 | 
13 | maybe_install_wordnet()
14 | 
15 | datasets = ('CIFAR10', 'CIFAR100', 'VG150', 'VG150_head', 'VG150_head1', 'VG150_head2', 'VG150_head3') + data.imagenet.names + data.custom.names
16 | 
17 | 
18 | parser = argparse.ArgumentParser()
19 | parser.add_argument('--dataset', choices=datasets, default='CIFAR10')
20 | parser.add_argument('--root', default='./nbdt/wnids')
21 | data.custom.add_arguments(parser)
22 | args = parser.parse_args()
23 | 
24 | # dataset = getattr(data, args.dataset)
25 | # dataset_kwargs = generate_kwargs(args, dataset,
26 | #     name=f'Dataset {args.dataset}',
27 | #     keys=data.custom.keys,
28 | #     globals=globals())
29 | # dataset = dataset(**dataset_kwargs, root='./data', download=True)
30 | 
31 | # classes = ['above', 'across', 'against', 'along', 'and', 'at', 'attached to', 'behind', 'belonging to', 'between', 'carrying', 'covered in', 'covering', 'eating', 'flying in', 'for', 'from', 'growing on', 'hanging from', 'has', 'holding', 'in', 'in front of', 'laying on', 'looking at', 'lying on', 'made of', 'mounted on', 'near', 'of', 'on', 'on back of', 'over', 'painted on', 'parked on', 'part of', 'playing', 'riding', 'says', 'sitting on', 'standing on', 'to', 'under', 'using', 'walking in', 'walking on', 'watching', 'wearing', 'wears', 'with']
32 | # classes = ['on','has','wearing','of','in','near','with','holding','behind','above','sitting on','wears','riding','under','in front of']
33 | classes = ['behind', 'has', 'holding', 'in', 'near','of','on', 'wearing', 'with']
34 | 
35 | 
36 | path = Path(os.path.join(args.root, f'{args.dataset}.txt'))
37 | os.makedirs(path.parent, exist_ok=True)
38 | failures = []
39 | 
40 | hardcoded_mapping = {
41 |     'aquarium_fish': wn.synsets('fingerling', pos=wn.NOUN)[0],
42 |     'beaver': wn.synsets('beaver', pos=wn.NOUN)[-1],
43 |     'castle': wn.synsets('castle', pos=wn.NOUN)[1],
44 |     'flatfish': wn.synsets('flatfish', pos=wn.NOUN)[1],
45 |     'leopard': wn.synsets('leopard', pos=wn.NOUN)[1],
46 |     'lobster': wn.synsets('lobster', pos=wn.NOUN)[1],
47 |     'maple_tree': wn.synsets('maple', pos=wn.NOUN)[1],
48 |     'otter': wn.synsets('otter', pos=wn.NOUN)[1],
49 |     'plate': wn.synsets('plate', pos=wn.NOUN)[3],
50 |     'raccoon': wn.synsets('raccoon', pos=wn.NOUN)[1],
51 |     'ray': wn.synsets('ray', pos=wn.NOUN)[-1],
52 |     'seal': wn.synsets('seal', pos=wn.NOUN)[-1],
53 |     'shrew': wn.synsets('shrew', pos=wn.NOUN)[1],
54 |     'skunk': wn.synsets('skunk', pos=wn.NOUN)[1],
55 |     'tiger': wn.synsets('tiger', pos=wn.NOUN)[1],
56 |     'table': wn.synsets('table', pos=wn.NOUN)[1],
57 |     'turtle': wn.synsets('turtle', pos=wn.NOUN)[1],
58 |     'whale': wn.synsets('whale', pos=wn.NOUN)[1],
59 | }
60 | 
61 | wnids = []
62 | for i, cls in enumerate(classes):
63 |     if cls in hardcoded_mapping:
64 |         synset = hardcoded_mapping[cls]
65 |     else:
66 |         synsets = wn.synsets(cls, pos=wn.NOUN)
67 |         if not synsets:
68 |             Colors.red(f'==> Failed to find synset for {cls}. Using fake synset...')
69 |             failures.append(cls)
70 |             synsets = [FakeSynset.create_from_offset(i)]
71 |         synset = synsets[0]
72 |     wnid = synset_to_wnid(synset)
73 |     print(f'{wnid}: ({cls}) {synset.definition()}')
74 |     wnids.append(wnid)
75 | 
76 | write_wnids(wnids, path)
77 | 
78 | if failures:
79 |     Colors.red(f'==> Warning: failed to find wordnet IDs for {failures}')
80 | Colors.green(f'==> Wrote to {path}')
81 | 


--------------------------------------------------------------------------------
/nbdt/nbdt/data/__init__.py:
--------------------------------------------------------------------------------
1 | from .custom import *
2 | from .imagenet import *
3 | from torchvision.datasets import *
4 | 


--------------------------------------------------------------------------------
/nbdt/nbdt/models/__init__.py:
--------------------------------------------------------------------------------
 1 | from .resnet import *
 2 | from .wideresnet import *
 3 | from pytorchcv.models.efficientnet import *
 4 | from torchvision.models import *
 5 | 
 6 | 
 7 | def get_model_choices():
 8 |     from types import ModuleType
 9 | 
10 |     for key, value in globals().items():
11 |         if not key.startswith('__') and not isinstance(value, ModuleType) and callable(value):
12 |             yield key
13 | 


--------------------------------------------------------------------------------
/nbdt/nbdt/models/utils.py:
--------------------------------------------------------------------------------
 1 | from torch.hub import load_state_dict_from_url
 2 | from pathlib import Path
 3 | import torch
 4 | 
 5 | 
 6 | def get_pretrained_model(
 7 |         arch, dataset, model, model_urls,
 8 |         pretrained=False,
 9 |         progress=True,
10 |         root='.cache/torch/checkpoints'):
11 |     if pretrained:
12 |         state_dict = load_state_dict_from_key(
13 |             [(arch, dataset)], model_urls, pretrained, progress, root,
14 |             device=get_model_device(model))
15 |         state_dict = coerce_state_dict(state_dict, model.state_dict())
16 |         model.load_state_dict(state_dict)
17 |     return model
18 | 
19 | def coerce_state_dict(state_dict, reference_state_dict):
20 |     if 'net' in state_dict:
21 |         state_dict = state_dict['net']
22 |     has_reference_module = list(reference_state_dict)[0].startswith('module.')
23 |     has_module = list(state_dict)[0].startswith('module.')
24 |     if not has_reference_module and has_module:
25 |         state_dict = {
26 |             key.replace('module.', '', 1): value
27 |             for key, value in state_dict.items()
28 |         }
29 |     elif has_reference_module and not has_module:
30 |         state_dict = {
31 |             'module.' + key: value
32 |             for key, value in state_dict.items()
33 |         }
34 |     return state_dict
35 | 
36 | def get_model_device(model):
37 |     return next(model.parameters()).device
38 | 
39 | def load_state_dict_from_key(
40 |         keys, model_urls,
41 |         pretrained=False,
42 |         progress=True,
43 |         root='.cache/torch/checkpoints',
44 |         device='cpu'):
45 |     valid_keys = [key for key in keys if key in model_urls]
46 |     if not valid_keys:
47 |         raise UserWarning(
48 |             f'None of the keys {keys} correspond to a pretrained model.'
49 |         )
50 |     return load_state_dict_from_url(
51 |         model_urls[valid_keys[-1]],
52 |         Path.home() / root,
53 |         progress=progress,
54 |         check_hash=False,
55 |         map_location=torch.device(device))
56 | 


--------------------------------------------------------------------------------
/nbdt/nbdt/models/wideresnet.py:
--------------------------------------------------------------------------------
 1 | from pytorchcv.models.wrn_cifar import wrn28_10_cifar10, wrn28_10_cifar100, get_wrn_cifar
 2 | from nbdt.models.utils import get_pretrained_model
 3 | import torch.nn as nn
 4 | 
 5 | 
 6 | __all__ = ('wrn28_10', 'wrn28_10_cifar10', 'wrn28_10_cifar100')
 7 | 
 8 | 
 9 | model_urls = {
10 |     ('wrn28_10', 'TinyImagenet200'): 'https://github.com/alvinwan/neural-backed-decision-trees/releases/download/0.0.1/ckpt-TinyImagenet200-wrn28_10.pth'
11 | }
12 | 
13 | 
14 | def _wrn(arch, model, pretrained=False, progress=True, dataset='CIFAR10'):
15 |     model = get_pretrained_model(arch, dataset, model, model_urls,
16 |         pretrained=pretrained, progress=progress)
17 |     return model
18 | 
19 | 
20 | def wrn28_10(pretrained=False, progress=True, dataset='CIFAR10', **kwargs):
21 |     """Replace `final_pool` (8x8 average pooling) with a global average pooling.
22 | 
23 |     If this gets crappy accuracy for TinyImagenet200, it's probably because the
24 |     final pooled feature map is 16x16 instead of 8x8. So needs another stride 2
25 |     stage, technically.
26 |     """
27 |     model = get_wrn_cifar(blocks=28, width_factor=10, model_name="wrn28_10", **kwargs)
28 |     model.features.final_pool = nn.AdaptiveAvgPool2d((1, 1))
29 |     model = _wrn('wrn28_10', model, pretrained=pretrained, progress=progress, dataset=dataset)
30 |     return model
31 | 


--------------------------------------------------------------------------------
/nbdt/nbdt/wnids/VG150.txt:
--------------------------------------------------------------------------------
 1 | n06392660
 2 | f00000001
 3 | f00000002
 4 | f00000003
 5 | f00000004
 6 | n14629561
 7 | f00000006
 8 | n05559256
 9 | f00000008
10 | f00000009
11 | f00000010
12 | f00000011
13 | n09257949
14 | n00838367
15 | f00000014
16 | f00000015
17 | f00000016
18 | f00000017
19 | f00000018
20 | n13888783
21 | n00810598
22 | n13649791
23 | f00000022
24 | f00000023
25 | f00000024
26 | f00000025
27 | f00000026
28 | f00000027
29 | f00000028
30 | f00000029
31 | f00000030
32 | f00000031
33 | n15258694
34 | f00000033
35 | f00000034
36 | f00000035
37 | n00101191
38 | n00450335
39 | n14485526
40 | f00000039
41 | f00000040
42 | f00000041
43 | f00000042
44 | n00418903
45 | f00000044
46 | f00000045
47 | n00879759
48 | n13475538
49 | n14562683
50 | f00000049


--------------------------------------------------------------------------------
/nbdt/requirements.txt:
--------------------------------------------------------------------------------
1 | pytorchcv
2 | torch
3 | torchvision
4 | nltk
5 | scikit-learn
6 | networkx
7 | pytest
8 | 


--------------------------------------------------------------------------------
/nbdt/setup.py:
--------------------------------------------------------------------------------
 1 | import setuptools
 2 | 
 3 | VERSION = '0.0.4'
 4 | 
 5 | with open("requirements.txt", "r") as f:
 6 |     install_requires = f.readlines()
 7 | 
 8 | 
 9 | # with open("README.md", "r") as fh:
10 | #     long_description = fh.read()
11 | 
12 | 
13 | setuptools.setup(
14 |     name="nbdt",
15 |     version=VERSION,
16 |     author="Alvin Wan",  # TODO: proper way to list all paper authors?
17 |     author_email="hi@alvinwan.com",
18 |     description="",
19 |     long_description_content_type="text/markdown",
20 |     url="https://github.com/alvinwan/neural-backed-decision-trees",
21 |     packages=setuptools.find_packages(),
22 |     install_requires=install_requires,
23 |     download_url='https://github.com/alvinwan/neural-backed-decision-trees/archive/%s.zip' % VERSION,
24 |     scripts=['nbdt/bin/nbdt-hierarchy', 'nbdt/bin/nbdt-wnids', 'nbdt/bin/nbdt'],
25 |     classifiers=[
26 |         "Intended Audience :: Developers",
27 |         "Programming Language :: Python :: 3",
28 |         "License :: OSI Approved :: MIT License",
29 |         "Operating System :: OS Independent",
30 |     ],
31 |     python_requires='>=3.5',
32 |     include_package_data=True
33 | )
34 | 


--------------------------------------------------------------------------------
/sg-benchmark/maskrcnn_benchmark/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | 


--------------------------------------------------------------------------------
/sg-benchmark/maskrcnn_benchmark/config/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | from .defaults import _C as cfg
3 | 


--------------------------------------------------------------------------------
/sg-benchmark/maskrcnn_benchmark/csrc/ROIAlign.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #pragma once
 3 | 
 4 | #include "cpu/vision.h"
 5 | 
 6 | #ifdef WITH_CUDA
 7 | #include "cuda/vision.h"
 8 | #endif
 9 | 
10 | // Interface for Python
11 | at::Tensor ROIAlign_forward(const at::Tensor& input,
12 |                             const at::Tensor& rois,
13 |                             const float spatial_scale,
14 |                             const int pooled_height,
15 |                             const int pooled_width,
16 |                             const int sampling_ratio) {
17 |   if (input.type().is_cuda()) {
18 | #ifdef WITH_CUDA
19 |     return ROIAlign_forward_cuda(input, rois, spatial_scale, pooled_height, pooled_width, sampling_ratio);
20 | #else
21 |     AT_ERROR("Not compiled with GPU support");
22 | #endif
23 |   }
24 |   return ROIAlign_forward_cpu(input, rois, spatial_scale, pooled_height, pooled_width, sampling_ratio);
25 | }
26 | 
27 | at::Tensor ROIAlign_backward(const at::Tensor& grad,
28 |                              const at::Tensor& rois,
29 |                              const float spatial_scale,
30 |                              const int pooled_height,
31 |                              const int pooled_width,
32 |                              const int batch_size,
33 |                              const int channels,
34 |                              const int height,
35 |                              const int width,
36 |                              const int sampling_ratio) {
37 |   if (grad.type().is_cuda()) {
38 | #ifdef WITH_CUDA
39 |     return ROIAlign_backward_cuda(grad, rois, spatial_scale, pooled_height, pooled_width, batch_size, channels, height, width, sampling_ratio);
40 | #else
41 |     AT_ERROR("Not compiled with GPU support");
42 | #endif
43 |   }
44 |   AT_ERROR("Not implemented on the CPU");
45 | }
46 | 
47 | 


--------------------------------------------------------------------------------
/sg-benchmark/maskrcnn_benchmark/csrc/ROIPool.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #pragma once
 3 | 
 4 | #include "cpu/vision.h"
 5 | 
 6 | #ifdef WITH_CUDA
 7 | #include "cuda/vision.h"
 8 | #endif
 9 | 
10 | 
11 | std::tuple<at::Tensor, at::Tensor> ROIPool_forward(const at::Tensor& input,
12 |                                 const at::Tensor& rois,
13 |                                 const float spatial_scale,
14 |                                 const int pooled_height,
15 |                                 const int pooled_width) {
16 |   if (input.type().is_cuda()) {
17 | #ifdef WITH_CUDA
18 |     return ROIPool_forward_cuda(input, rois, spatial_scale, pooled_height, pooled_width);
19 | #else
20 |     AT_ERROR("Not compiled with GPU support");
21 | #endif
22 |   }
23 |   AT_ERROR("Not implemented on the CPU");
24 | }
25 | 
26 | at::Tensor ROIPool_backward(const at::Tensor& grad,
27 |                                  const at::Tensor& input,
28 |                                  const at::Tensor& rois,
29 |                                  const at::Tensor& argmax,
30 |                                  const float spatial_scale,
31 |                                  const int pooled_height,
32 |                                  const int pooled_width,
33 |                                  const int batch_size,
34 |                                  const int channels,
35 |                                  const int height,
36 |                                  const int width) {
37 |   if (grad.type().is_cuda()) {
38 | #ifdef WITH_CUDA
39 |     return ROIPool_backward_cuda(grad, input, rois, argmax, spatial_scale, pooled_height, pooled_width, batch_size, channels, height, width);
40 | #else
41 |     AT_ERROR("Not compiled with GPU support");
42 | #endif
43 |   }
44 |   AT_ERROR("Not implemented on the CPU");
45 | }
46 | 
47 | 
48 | 
49 | 


--------------------------------------------------------------------------------
/sg-benchmark/maskrcnn_benchmark/csrc/SigmoidFocalLoss.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "cpu/vision.h"
 4 | 
 5 | #ifdef WITH_CUDA
 6 | #include "cuda/vision.h"
 7 | #endif
 8 | 
 9 | // Interface for Python
10 | at::Tensor SigmoidFocalLoss_forward(
11 | 		const at::Tensor& logits,
12 |                 const at::Tensor& targets,
13 | 		const int num_classes, 
14 | 		const float gamma, 
15 | 		const float alpha) {
16 |   if (logits.type().is_cuda()) {
17 | #ifdef WITH_CUDA
18 |     return SigmoidFocalLoss_forward_cuda(logits, targets, num_classes, gamma, alpha);
19 | #else
20 |     AT_ERROR("Not compiled with GPU support");
21 | #endif
22 |   }
23 |   AT_ERROR("Not implemented on the CPU");
24 | }
25 | 
26 | at::Tensor SigmoidFocalLoss_backward(
27 | 			     const at::Tensor& logits,
28 |                              const at::Tensor& targets,
29 | 			     const at::Tensor& d_losses,
30 | 			     const int num_classes,
31 | 			     const float gamma,
32 | 			     const float alpha) {
33 |   if (logits.type().is_cuda()) {
34 | #ifdef WITH_CUDA
35 |     return SigmoidFocalLoss_backward_cuda(logits, targets, d_losses, num_classes, gamma, alpha);
36 | #else
37 |     AT_ERROR("Not compiled with GPU support");
38 | #endif
39 |   }
40 |   AT_ERROR("Not implemented on the CPU");
41 | }
42 | 


--------------------------------------------------------------------------------
/sg-benchmark/maskrcnn_benchmark/csrc/cpu/nms_cpu.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #include "cpu/vision.h"
 3 | 
 4 | 
 5 | template <typename scalar_t>
 6 | at::Tensor nms_cpu_kernel(const at::Tensor& dets,
 7 |                           const at::Tensor& scores,
 8 |                           const float threshold) {
 9 |   AT_ASSERTM(!dets.type().is_cuda(), "dets must be a CPU tensor");
10 |   AT_ASSERTM(!scores.type().is_cuda(), "scores must be a CPU tensor");
11 |   AT_ASSERTM(dets.type() == scores.type(), "dets should have the same type as scores");
12 | 
13 |   if (dets.numel() == 0) {
14 |     return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU));
15 |   }
16 | 
17 |   auto x1_t = dets.select(1, 0).contiguous();
18 |   auto y1_t = dets.select(1, 1).contiguous();
19 |   auto x2_t = dets.select(1, 2).contiguous();
20 |   auto y2_t = dets.select(1, 3).contiguous();
21 | 
22 |   at::Tensor areas_t = (x2_t - x1_t + 1) * (y2_t - y1_t + 1);
23 | 
24 |   auto order_t = std::get<1>(scores.sort(0, /* descending=*/true));
25 | 
26 |   auto ndets = dets.size(0);
27 |   at::Tensor suppressed_t = at::zeros({ndets}, dets.options().dtype(at::kByte).device(at::kCPU));
28 | 
29 |   auto suppressed = suppressed_t.data<uint8_t>();
30 |   auto order = order_t.data<int64_t>();
31 |   auto x1 = x1_t.data<scalar_t>();
32 |   auto y1 = y1_t.data<scalar_t>();
33 |   auto x2 = x2_t.data<scalar_t>();
34 |   auto y2 = y2_t.data<scalar_t>();
35 |   auto areas = areas_t.data<scalar_t>();
36 | 
37 |   for (int64_t _i = 0; _i < ndets; _i++) {
38 |     auto i = order[_i];
39 |     if (suppressed[i] == 1)
40 |       continue;
41 |     auto ix1 = x1[i];
42 |     auto iy1 = y1[i];
43 |     auto ix2 = x2[i];
44 |     auto iy2 = y2[i];
45 |     auto iarea = areas[i];
46 | 
47 |     for (int64_t _j = _i + 1; _j < ndets; _j++) {
48 |       auto j = order[_j];
49 |       if (suppressed[j] == 1)
50 |         continue;
51 |       auto xx1 = std::max(ix1, x1[j]);
52 |       auto yy1 = std::max(iy1, y1[j]);
53 |       auto xx2 = std::min(ix2, x2[j]);
54 |       auto yy2 = std::min(iy2, y2[j]);
55 | 
56 |       auto w = std::max(static_cast<scalar_t>(0), xx2 - xx1 + 1);
57 |       auto h = std::max(static_cast<scalar_t>(0), yy2 - yy1 + 1);
58 |       auto inter = w * h;
59 |       auto ovr = inter / (iarea + areas[j] - inter);
60 |       if (ovr >= threshold)
61 |         suppressed[j] = 1;
62 |    }
63 |   }
64 |   return at::nonzero(suppressed_t == 0).squeeze(1);
65 | }
66 | 
67 | at::Tensor nms_cpu(const at::Tensor& dets,
68 |                const at::Tensor& scores,
69 |                const float threshold) {
70 |   at::Tensor result;
71 |   AT_DISPATCH_FLOATING_TYPES(dets.type(), "nms", [&] {
72 |     result = nms_cpu_kernel<scalar_t>(dets, scores, threshold);
73 |   });
74 |   return result;
75 | }
76 | 


--------------------------------------------------------------------------------
/sg-benchmark/maskrcnn_benchmark/csrc/cpu/vision.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #pragma once
 3 | #include <torch/extension.h>
 4 | 
 5 | 
 6 | at::Tensor ROIAlign_forward_cpu(const at::Tensor& input,
 7 |                                 const at::Tensor& rois,
 8 |                                 const float spatial_scale,
 9 |                                 const int pooled_height,
10 |                                 const int pooled_width,
11 |                                 const int sampling_ratio);
12 | 
13 | 
14 | at::Tensor nms_cpu(const at::Tensor& dets,
15 |                    const at::Tensor& scores,
16 |                    const float threshold);
17 | 


--------------------------------------------------------------------------------
/sg-benchmark/maskrcnn_benchmark/csrc/cuda/deform_pool_cuda.cu:
--------------------------------------------------------------------------------
 1 | // modify from
 2 | // https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/blob/mmdetection/mmdet/ops/dcn/src/modulated_dcn_cuda.c
 3 | 
 4 | // based on
 5 | // author: Charles Shang
 6 | // https://github.com/torch/cunn/blob/master/lib/THCUNN/generic/SpatialConvolutionMM.cu
 7 | 
 8 | #include <ATen/ATen.h>
 9 | #include <ATen/cuda/CUDAContext.h>
10 | 
11 | #include <THC/THC.h>
12 | #include <THC/THCDeviceUtils.cuh>
13 | 
14 | #include <vector>
15 | #include <iostream>
16 | #include <cmath>
17 | 
18 | 
19 | void DeformablePSROIPoolForward(
20 |     const at::Tensor data, const at::Tensor bbox, const at::Tensor trans,
21 |     at::Tensor out, at::Tensor top_count, const int batch, const int channels,
22 |     const int height, const int width, const int num_bbox,
23 |     const int channels_trans, const int no_trans, const float spatial_scale,
24 |     const int output_dim, const int group_size, const int pooled_size,
25 |     const int part_size, const int sample_per_part, const float trans_std);
26 | 
27 | void DeformablePSROIPoolBackwardAcc(
28 |     const at::Tensor out_grad, const at::Tensor data, const at::Tensor bbox,
29 |     const at::Tensor trans, const at::Tensor top_count, at::Tensor in_grad,
30 |     at::Tensor trans_grad, const int batch, const int channels,
31 |     const int height, const int width, const int num_bbox,
32 |     const int channels_trans, const int no_trans, const float spatial_scale,
33 |     const int output_dim, const int group_size, const int pooled_size,
34 |     const int part_size, const int sample_per_part, const float trans_std);
35 | 
36 | void deform_psroi_pooling_cuda_forward(
37 |     at::Tensor input, at::Tensor bbox, at::Tensor trans, at::Tensor out,
38 |     at::Tensor top_count, const int no_trans, const float spatial_scale,
39 |     const int output_dim, const int group_size, const int pooled_size,
40 |     const int part_size, const int sample_per_part, const float trans_std) 
41 | {
42 |   AT_CHECK(input.is_contiguous(), "input tensor has to be contiguous");
43 | 
44 |   const int batch = input.size(0);
45 |   const int channels = input.size(1);
46 |   const int height = input.size(2);
47 |   const int width = input.size(3);
48 |   const int channels_trans = no_trans ? 2 : trans.size(1);
49 | 
50 |   const int num_bbox = bbox.size(0);
51 |   if (num_bbox != out.size(0))
52 |     AT_ERROR("Output shape and bbox number wont match: (%d vs %d).",
53 |              out.size(0), num_bbox);
54 | 
55 |   DeformablePSROIPoolForward(
56 |       input, bbox, trans, out, top_count, batch, channels, height, width,
57 |       num_bbox, channels_trans, no_trans, spatial_scale, output_dim, group_size,
58 |       pooled_size, part_size, sample_per_part, trans_std);
59 | }
60 | 
61 | void deform_psroi_pooling_cuda_backward(
62 |     at::Tensor out_grad, at::Tensor input, at::Tensor bbox, at::Tensor trans,
63 |     at::Tensor top_count, at::Tensor input_grad, at::Tensor trans_grad,
64 |     const int no_trans, const float spatial_scale, const int output_dim,
65 |     const int group_size, const int pooled_size, const int part_size,
66 |     const int sample_per_part, const float trans_std) 
67 | {
68 |   AT_CHECK(out_grad.is_contiguous(), "out_grad tensor has to be contiguous");
69 |   AT_CHECK(input.is_contiguous(), "input tensor has to be contiguous");
70 | 
71 |   const int batch = input.size(0);
72 |   const int channels = input.size(1);
73 |   const int height = input.size(2);
74 |   const int width = input.size(3);
75 |   const int channels_trans = no_trans ? 2 : trans.size(1);
76 | 
77 |   const int num_bbox = bbox.size(0);
78 |   if (num_bbox != out_grad.size(0))
79 |     AT_ERROR("Output shape and bbox number wont match: (%d vs %d).",
80 |              out_grad.size(0), num_bbox);
81 | 
82 |   DeformablePSROIPoolBackwardAcc(
83 |       out_grad, input, bbox, trans, top_count, input_grad, trans_grad, batch,
84 |       channels, height, width, num_bbox, channels_trans, no_trans,
85 |       spatial_scale, output_dim, group_size, pooled_size, part_size,
86 |       sample_per_part, trans_std);
87 | }
88 | 


--------------------------------------------------------------------------------
/sg-benchmark/maskrcnn_benchmark/csrc/deform_pool.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #pragma once
 3 | #include "cpu/vision.h"
 4 | 
 5 | #ifdef WITH_CUDA
 6 | #include "cuda/vision.h"
 7 | #endif
 8 | 
 9 | 
10 | // Interface for Python
11 | void deform_psroi_pooling_forward(
12 |     at::Tensor input, 
13 |     at::Tensor bbox, 
14 |     at::Tensor trans, 
15 |     at::Tensor out,
16 |     at::Tensor top_count, 
17 |     const int no_trans, 
18 |     const float spatial_scale,
19 |     const int output_dim, 
20 |     const int group_size, 
21 |     const int pooled_size,
22 |     const int part_size, 
23 |     const int sample_per_part, 
24 |     const float trans_std)
25 | {
26 |   if (input.type().is_cuda()) {
27 | #ifdef WITH_CUDA
28 |     return deform_psroi_pooling_cuda_forward(
29 |         input, bbox, trans, out, top_count, 
30 |         no_trans, spatial_scale, output_dim, group_size,
31 |         pooled_size, part_size, sample_per_part, trans_std
32 |     );
33 | #else
34 |     AT_ERROR("Not compiled with GPU support");
35 | #endif
36 |   }
37 |   AT_ERROR("Not implemented on the CPU");
38 | }
39 | 
40 | 
41 | void deform_psroi_pooling_backward(
42 |     at::Tensor out_grad, 
43 |     at::Tensor input, 
44 |     at::Tensor bbox, 
45 |     at::Tensor trans,
46 |     at::Tensor top_count, 
47 |     at::Tensor input_grad, 
48 |     at::Tensor trans_grad,
49 |     const int no_trans, 
50 |     const float spatial_scale, 
51 |     const int output_dim,
52 |     const int group_size, 
53 |     const int pooled_size, 
54 |     const int part_size,
55 |     const int sample_per_part, 
56 |     const float trans_std) 
57 | {
58 |   if (input.type().is_cuda()) {
59 | #ifdef WITH_CUDA
60 |     return deform_psroi_pooling_cuda_backward(
61 |         out_grad, input, bbox, trans, top_count, input_grad, trans_grad,
62 |         no_trans, spatial_scale, output_dim, group_size, pooled_size, 
63 |         part_size, sample_per_part, trans_std
64 |     );
65 | #else
66 |     AT_ERROR("Not compiled with GPU support");
67 | #endif
68 |   }
69 |   AT_ERROR("Not implemented on the CPU");
70 | }
71 | 


--------------------------------------------------------------------------------
/sg-benchmark/maskrcnn_benchmark/csrc/nms.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #pragma once
 3 | #include "cpu/vision.h"
 4 | 
 5 | #ifdef WITH_CUDA
 6 | #include "cuda/vision.h"
 7 | #endif
 8 | 
 9 | 
10 | at::Tensor nms(const at::Tensor& dets,
11 |                const at::Tensor& scores,
12 |                const float threshold) {
13 | 
14 |   if (dets.type().is_cuda()) {
15 | #ifdef WITH_CUDA
16 |     // TODO raise error if not compiled with CUDA
17 |     if (dets.numel() == 0)
18 |       return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU));
19 |     auto b = at::cat({dets, scores.unsqueeze(1)}, 1);
20 |     return nms_cuda(b, threshold);
21 | #else
22 |     AT_ERROR("Not compiled with GPU support");
23 | #endif
24 |   }
25 | 
26 |   at::Tensor result = nms_cpu(dets, scores, threshold);
27 |   return result;
28 | }
29 | 


--------------------------------------------------------------------------------
/sg-benchmark/maskrcnn_benchmark/csrc/vision.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #include "nms.h"
 3 | #include "ROIAlign.h"
 4 | #include "ROIPool.h"
 5 | #include "SigmoidFocalLoss.h"
 6 | #include "deform_conv.h"
 7 | #include "deform_pool.h"
 8 | 
 9 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
10 |   m.def("nms", &nms, "non-maximum suppression");
11 |   m.def("roi_align_forward", &ROIAlign_forward, "ROIAlign_forward");
12 |   m.def("roi_align_backward", &ROIAlign_backward, "ROIAlign_backward");
13 |   m.def("roi_pool_forward", &ROIPool_forward, "ROIPool_forward");
14 |   m.def("roi_pool_backward", &ROIPool_backward, "ROIPool_backward");
15 |   m.def("sigmoid_focalloss_forward", &SigmoidFocalLoss_forward, "SigmoidFocalLoss_forward");
16 |   m.def("sigmoid_focalloss_backward", &SigmoidFocalLoss_backward, "SigmoidFocalLoss_backward");
17 |   // dcn-v2
18 |   m.def("deform_conv_forward", &deform_conv_forward, "deform_conv_forward");
19 |   m.def("deform_conv_backward_input", &deform_conv_backward_input, "deform_conv_backward_input");
20 |   m.def("deform_conv_backward_parameters", &deform_conv_backward_parameters, "deform_conv_backward_parameters");
21 |   m.def("modulated_deform_conv_forward", &modulated_deform_conv_forward, "modulated_deform_conv_forward");
22 |   m.def("modulated_deform_conv_backward", &modulated_deform_conv_backward, "modulated_deform_conv_backward");
23 |   m.def("deform_psroi_pooling_forward", &deform_psroi_pooling_forward, "deform_psroi_pooling_forward");
24 |   m.def("deform_psroi_pooling_backward", &deform_psroi_pooling_backward, "deform_psroi_pooling_backward");
25 | }


--------------------------------------------------------------------------------
/sg-benchmark/maskrcnn_benchmark/data/README.md:
--------------------------------------------------------------------------------
 1 | # Setting Up Datasets
 2 | This file describes how to perform training on other datasets.
 3 | 
 4 | Only Pascal VOC dataset can be loaded from its original format and be outputted to Pascal style results currently.
 5 | 
 6 | We expect the annotations from other datasets be converted to COCO json format, and
 7 | the output will be in COCO-style. (i.e. AP, AP50, AP75, APs, APm, APl for bbox and segm)
 8 | 
 9 | ## Creating Symlinks for PASCAL VOC
10 | 
11 | We assume that your symlinked `datasets/voc/VOC<year>` directory has the following structure:
12 | 
13 | ```
14 | VOC<year>
15 | |_ JPEGImages
16 | |  |_ <im-1-name>.jpg
17 | |  |_ ...
18 | |  |_ <im-N-name>.jpg
19 | |_ Annotations
20 | |  |_ pascal_train<year>.json (optional)
21 | |  |_ pascal_val<year>.json (optional)
22 | |  |_ pascal_test<year>.json (optional)
23 | |  |_ <im-1-name>.xml
24 | |  |_ ...
25 | |  |_ <im-N-name>.xml
26 | |_ VOCdevkit<year>
27 | ```
28 | 
29 | Create symlinks for `voc/VOC<year>`:
30 | 
31 | ```
32 | cd ~/github/maskrcnn-benchmark
33 | mkdir -p datasets/voc/VOC<year>
34 | ln -s /path/to/VOC<year> /datasets/voc/VOC<year>
35 | ```
36 | Example configuration files for PASCAL VOC could be found [here](https://github.com/facebookresearch/maskrcnn-benchmark/blob/master/configs/pascal_voc/).
37 | 
38 | ### PASCAL VOC Annotations in COCO Format
39 | To output COCO-style evaluation result, PASCAL VOC annotations in COCO json format is required and could be downloaded from [here](https://storage.googleapis.com/coco-dataset/external/PASCAL_VOC.zip)
40 | via http://cocodataset.org/#external.
41 | 
42 | ## Creating Symlinks for Cityscapes:
43 | 
44 | We assume that your symlinked `datasets/cityscapes` directory has the following structure:
45 | 
46 | ```
47 | cityscapes
48 | |_ images
49 | |  |_ <im-1-name>.jpg
50 | |  |_ ...
51 | |  |_ <im-N-name>.jpg
52 | |_ annotations
53 | |  |_ instanceonly_gtFile_train.json
54 | |  |_ ...
55 | |_ raw
56 |    |_ gtFine
57 |    |_ ...
58 |    |_ README.md
59 | ```
60 | 
61 | Create symlinks for `cityscapes`:
62 | 
63 | ```
64 | cd ~/github/maskrcnn-benchmark
65 | mkdir -p datasets/cityscapes
66 | ln -s /path/to/cityscapes datasets/data/cityscapes
67 | ```
68 | 
69 | ### Steps to convert Cityscapes Annotations to COCO Format
70 | 1. Download gtFine_trainvaltest.zip from https://www.cityscapes-dataset.com/downloads/ (login required)
71 | 2. Extract it to /path/to/gtFine_trainvaltest
72 | ```
73 | cityscapes
74 | |_ gtFine_trainvaltest.zip
75 | |_ gtFine_trainvaltest
76 |    |_ gtFine
77 | ```
78 | 3. Run the below commands to convert the annotations
79 | 
80 | ```
81 | cd ~/github
82 | git clone https://github.com/mcordts/cityscapesScripts.git
83 | cd cityscapesScripts
84 | cp ~/github/maskrcnn-benchmark/tools/cityscapes/instances2dict_with_polygons.py cityscapesscripts/evaluation
85 | python setup.py install
86 | cd ~/github/maskrcnn-benchmark
87 | python tools/cityscapes/convert_cityscapes_to_coco.py --datadir /path/to/cityscapes --outdir /path/to/cityscapes/annotations
88 | ```
89 | 
90 | Example configuration files for Cityscapes could be found [here](https://github.com/facebookresearch/maskrcnn-benchmark/blob/master/configs/cityscapes/).
91 | 


--------------------------------------------------------------------------------
/sg-benchmark/maskrcnn_benchmark/data/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | from .build import make_data_loader, get_dataset_statistics
3 | 


--------------------------------------------------------------------------------
/sg-benchmark/maskrcnn_benchmark/data/collate_batch.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | from maskrcnn_benchmark.structures.image_list import to_image_list
 3 | 
 4 | 
 5 | class BatchCollator(object):
 6 |     """
 7 |     From a list of samples from the dataset,
 8 |     returns the batched images and targets.
 9 |     This should be passed to the DataLoader
10 |     """
11 | 
12 |     def __init__(self, size_divisible=0):
13 |         self.size_divisible = size_divisible
14 | 
15 |     def __call__(self, batch):
16 |         transposed_batch = list(zip(*batch))
17 |         images = to_image_list(transposed_batch[0], self.size_divisible)
18 |         targets = transposed_batch[1]
19 |         img_ids = transposed_batch[2]
20 |         return images, targets, img_ids
21 | 
22 | 
23 | class BBoxAugCollator(object):
24 |     """
25 |     From a list of samples from the dataset,
26 |     returns the images and targets.
27 |     Images should be converted to batched images in `im_detect_bbox_aug`
28 |     """
29 | 
30 |     def __call__(self, batch):
31 |         return list(zip(*batch))
32 | 
33 | 


--------------------------------------------------------------------------------
/sg-benchmark/maskrcnn_benchmark/data/datasets/Vrd.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
  2 | import torch
  3 | import torchvision
  4 | 
  5 | from maskrcnn_benchmark.structures.bounding_box import BoxList
  6 | from maskrcnn_benchmark.structures.segmentation_mask import SegmentationMask
  7 | from maskrcnn_benchmark.structures.keypoint import PersonKeypoints
  8 | 
  9 | 
 10 | min_keypoints_per_image = 10
 11 | 
 12 | 
 13 | def _count_visible_keypoints(anno):
 14 |     return sum(sum(1 for v in ann["keypoints"][2::3] if v > 0) for ann in anno)
 15 | 
 16 | 
 17 | def _has_only_empty_bbox(anno):
 18 |     return all(any(o <= 1 for o in obj["bbox"][2:]) for obj in anno)
 19 | 
 20 | 
 21 | def has_valid_annotation(anno):
 22 |     # if it's empty, there is no annotation
 23 |     if len(anno) == 0:
 24 |         return False
 25 |     # if all boxes have close to zero area, there is no annotation
 26 |     if _has_only_empty_bbox(anno):
 27 |         return False
 28 |     # keypoints task have a slight different critera for considering
 29 |     # if an annotation is valid
 30 |     if "keypoints" not in anno[0]:
 31 |         return True
 32 |     # for keypoint detection tasks, only consider valid images those
 33 |     # containing at least min_keypoints_per_image
 34 |     if _count_visible_keypoints(anno) >= min_keypoints_per_image:
 35 |         return True
 36 |     return False
 37 | 
 38 | 
 39 | class VrdDataset(torchvision.datasets.coco.CocoDetection):
 40 |     def __init__(
 41 |         self, ann_file, root, remove_images_without_annotations, transforms=None
 42 |     ):
 43 |         super(VrdDataset, self).__init__(root, ann_file)
 44 |         # sort indices for reproducible results
 45 |         self.ids = sorted(self.ids)
 46 | 
 47 |         # filter images without detection annotations
 48 |         if remove_images_without_annotations:
 49 |             ids = []
 50 |             for img_id in self.ids:
 51 |                 ann_ids = self.coco.getAnnIds(imgIds=img_id, iscrowd=None)
 52 |                 anno = self.coco.loadAnns(ann_ids)
 53 |                 if has_valid_annotation(anno):
 54 |                     ids.append(img_id)
 55 |             self.ids = ids
 56 | 
 57 |         self.categories = {cat['id']: cat['name'] for cat in self.coco.cats.values()}
 58 | 
 59 |         self.json_category_id_to_contiguous_id = {
 60 |             v: i + 1 for i, v in enumerate(self.coco.getCatIds())
 61 |         }
 62 |         self.contiguous_category_id_to_json_id = {
 63 |             v: k for k, v in self.json_category_id_to_contiguous_id.items()
 64 |         }
 65 |         self.id_to_img_map = {k: v for k, v in enumerate(self.ids)}
 66 |         self._transforms = transforms
 67 | 
 68 |     def __getitem__(self, idx):
 69 |         img, anno = super(VrdDataset, self).__getitem__(idx)
 70 | 
 71 |         # filter crowd annotations
 72 |         # TODO might be better to add an extra field
 73 |         anno = [obj for obj in anno if obj["iscrowd"] == 0]
 74 | 
 75 |         boxes = [obj["bbox"] for obj in anno]
 76 |         boxes = torch.as_tensor(boxes).reshape(-1, 4)  # guard against no boxes
 77 |         target = BoxList(boxes, img.size, mode="xywh").convert("xyxy")
 78 | 
 79 |         classes = [obj["category_id"] for obj in anno]
 80 |         classes = [self.json_category_id_to_contiguous_id[c] for c in classes]
 81 |         classes = torch.tensor(classes)
 82 |         target.add_field("labels", classes)
 83 | 
 84 |         if anno and "segmentation" in anno[0]:
 85 |             masks = [obj["segmentation"] for obj in anno]
 86 |             masks = SegmentationMask(masks, img.size, mode='poly')
 87 |             target.add_field("masks", masks)
 88 | 
 89 |         if anno and "keypoints" in anno[0]:
 90 |             keypoints = [obj["keypoints"] for obj in anno]
 91 |             keypoints = PersonKeypoints(keypoints, img.size)
 92 |             target.add_field("keypoints", keypoints)
 93 | 
 94 |         target = target.clip_to_image(remove_empty=True)
 95 | 
 96 |         if self._transforms is not None:
 97 |             img, target = self._transforms(img, target)
 98 | 
 99 |         return img, target, idx
100 | 
101 |     def get_img_info(self, index):
102 |         img_id = self.id_to_img_map[index]
103 |         img_data = self.coco.imgs[img_id]
104 |         return img_data
105 | 
106 | if __name__ == "__main__":
107 |     image_dir = '/home/data1/yjgroup/cy/SGBenchmark/sg_benchmark/datasets/vrd/images/train_images'
108 |     ann_dir = '/home/data1/yjgroup/cy/SGBenchmark/sg_benchmark/datasets/vrd/detections_train.json'
109 |     dataset = VrdDataset(ann_dir,image_dir, True)
110 |     temp = dataset[0]
111 |     print(temp)
112 | 


--------------------------------------------------------------------------------
/sg-benchmark/maskrcnn_benchmark/data/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | from .coco import COCODataset
3 | from .voc import PascalVOCDataset
4 | from .concat_dataset import ConcatDataset
5 | from .visual_genome import VGDataset
6 | 
7 | __all__ = ["COCODataset", "ConcatDataset", "PascalVOCDataset", "VGDataset"]
8 | 


--------------------------------------------------------------------------------
/sg-benchmark/maskrcnn_benchmark/data/datasets/coco.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
  2 | import torch
  3 | import torchvision
  4 | 
  5 | from maskrcnn_benchmark.structures.bounding_box import BoxList
  6 | from maskrcnn_benchmark.structures.segmentation_mask import SegmentationMask
  7 | from maskrcnn_benchmark.structures.keypoint import PersonKeypoints
  8 | 
  9 | 
 10 | min_keypoints_per_image = 10
 11 | 
 12 | 
 13 | def _count_visible_keypoints(anno):
 14 |     return sum(sum(1 for v in ann["keypoints"][2::3] if v > 0) for ann in anno)
 15 | 
 16 | 
 17 | def _has_only_empty_bbox(anno):
 18 |     return all(any(o <= 1 for o in obj["bbox"][2:]) for obj in anno)
 19 | 
 20 | 
 21 | def has_valid_annotation(anno):
 22 |     # if it's empty, there is no annotation
 23 |     if len(anno) == 0:
 24 |         return False
 25 |     # if all boxes have close to zero area, there is no annotation
 26 |     if _has_only_empty_bbox(anno):
 27 |         return False
 28 |     # keypoints task have a slight different critera for considering
 29 |     # if an annotation is valid
 30 |     if "keypoints" not in anno[0]:
 31 |         return True
 32 |     # for keypoint detection tasks, only consider valid images those
 33 |     # containing at least min_keypoints_per_image
 34 |     if _count_visible_keypoints(anno) >= min_keypoints_per_image:
 35 |         return True
 36 |     return False
 37 | 
 38 | 
 39 | class COCODataset(torchvision.datasets.coco.CocoDetection):
 40 |     def __init__(
 41 |         self, ann_file, root, remove_images_without_annotations, transforms=None
 42 |     ):
 43 |         super(COCODataset, self).__init__(root, ann_file)
 44 |         # sort indices for reproducible results
 45 |         self.ids = sorted(self.ids)
 46 | 
 47 |         # filter images without detection annotations
 48 |         if remove_images_without_annotations:
 49 |             ids = []
 50 |             for img_id in self.ids:
 51 |                 ann_ids = self.coco.getAnnIds(imgIds=img_id, iscrowd=None)
 52 |                 anno = self.coco.loadAnns(ann_ids)
 53 |                 if has_valid_annotation(anno):
 54 |                     ids.append(img_id)
 55 |             self.ids = ids
 56 | 
 57 |         self.categories = {cat['id']: cat['name'] for cat in self.coco.cats.values()}
 58 | 
 59 |         self.json_category_id_to_contiguous_id = {
 60 |             v: i + 1 for i, v in enumerate(self.coco.getCatIds())
 61 |         }
 62 |         self.contiguous_category_id_to_json_id = {
 63 |             v: k for k, v in self.json_category_id_to_contiguous_id.items()
 64 |         }
 65 |         self.id_to_img_map = {k: v for k, v in enumerate(self.ids)}
 66 |         self._transforms = transforms
 67 | 
 68 |     def __getitem__(self, idx):
 69 |         img, anno = super(COCODataset, self).__getitem__(idx)
 70 | 
 71 |         # filter crowd annotations
 72 |         # TODO might be better to add an extra field
 73 |         anno = [obj for obj in anno if obj["iscrowd"] == 0]
 74 | 
 75 |         boxes = [obj["bbox"] for obj in anno]
 76 |         boxes = torch.as_tensor(boxes).reshape(-1, 4)  # guard against no boxes
 77 |         target = BoxList(boxes, img.size, mode="xywh").convert("xyxy")
 78 | 
 79 |         classes = [obj["category_id"] for obj in anno]
 80 |         classes = [self.json_category_id_to_contiguous_id[c] for c in classes]
 81 |         classes = torch.tensor(classes)
 82 |         target.add_field("labels", classes)
 83 | 
 84 |         if anno and "segmentation" in anno[0]:
 85 |             masks = [obj["segmentation"] for obj in anno]
 86 |             masks = SegmentationMask(masks, img.size, mode='poly')
 87 |             target.add_field("masks", masks)
 88 | 
 89 |         if anno and "keypoints" in anno[0]:
 90 |             keypoints = [obj["keypoints"] for obj in anno]
 91 |             keypoints = PersonKeypoints(keypoints, img.size)
 92 |             target.add_field("keypoints", keypoints)
 93 | 
 94 |         target = target.clip_to_image(remove_empty=True)
 95 | 
 96 |         if self._transforms is not None:
 97 |             img, target = self._transforms(img, target)
 98 | 
 99 |         return img, target, idx
100 | 
101 |     def get_img_info(self, index):
102 |         img_id = self.id_to_img_map[index]
103 |         img_data = self.coco.imgs[img_id]
104 |         return img_data
105 | 


--------------------------------------------------------------------------------
/sg-benchmark/maskrcnn_benchmark/data/datasets/concat_dataset.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import bisect
 3 | 
 4 | from torch.utils.data.dataset import ConcatDataset as _ConcatDataset
 5 | 
 6 | 
 7 | class ConcatDataset(_ConcatDataset):
 8 |     """
 9 |     Same as torch.utils.data.dataset.ConcatDataset, but exposes an extra
10 |     method for querying the sizes of the image
11 |     """
12 | 
13 |     def get_idxs(self, idx):
14 |         dataset_idx = bisect.bisect_right(self.cumulative_sizes, idx)
15 |         if dataset_idx == 0:
16 |             sample_idx = idx
17 |         else:
18 |             sample_idx = idx - self.cumulative_sizes[dataset_idx - 1]
19 |         return dataset_idx, sample_idx
20 | 
21 |     def get_img_info(self, idx):
22 |         dataset_idx, sample_idx = self.get_idxs(idx)
23 |         return self.datasets[dataset_idx].get_img_info(sample_idx)
24 | 


--------------------------------------------------------------------------------
/sg-benchmark/maskrcnn_benchmark/data/datasets/evaluation/__init__.py:
--------------------------------------------------------------------------------
 1 | from maskrcnn_benchmark.data import datasets
 2 | 
 3 | from .coco import coco_evaluation
 4 | from .voc import voc_evaluation
 5 | from .vg import vg_evaluation
 6 | 
 7 | 
 8 | def evaluate(cfg, dataset, predictions, output_folder, logger, **kwargs):
 9 |     """evaluate dataset using different methods based on dataset type.
10 |     Args:
11 |         dataset: Dataset object
12 |         predictions(list[BoxList]): each item in the list represents the
13 |             prediction results for one image.
14 |         output_folder: output folder, to save evaluation files or results.
15 |         **kwargs: other args.
16 |     Returns:
17 |         evaluation result
18 |     """
19 |     args = dict(
20 |         cfg=cfg, dataset=dataset, predictions=predictions, output_folder=output_folder, logger=logger, **kwargs
21 |     )
22 |     if isinstance(dataset, datasets.COCODataset):
23 |         return coco_evaluation(**args)
24 |     elif isinstance(dataset, datasets.PascalVOCDataset):
25 |         return voc_evaluation(**args)
26 |     elif isinstance(dataset, datasets.VGDataset):
27 |         return vg_evaluation(**args)
28 |     else:
29 |         dataset_name = dataset.__class__.__name__
30 |         raise NotImplementedError("Unsupported dataset type {}.".format(dataset_name))
31 | 


--------------------------------------------------------------------------------
/sg-benchmark/maskrcnn_benchmark/data/datasets/evaluation/coco/__init__.py:
--------------------------------------------------------------------------------
 1 | from .coco_eval import do_coco_evaluation
 2 | 
 3 | 
 4 | def coco_evaluation(
 5 |     cfg,
 6 |     dataset,
 7 |     predictions,
 8 |     output_folder,
 9 |     logger,
10 |     box_only,
11 |     iou_types,
12 |     expected_results,
13 |     expected_results_sigma_tol,
14 | ):
15 |     return do_coco_evaluation(
16 |         dataset=dataset,
17 |         predictions=predictions,
18 |         box_only=box_only,
19 |         output_folder=output_folder,
20 |         logger=logger,
21 |         iou_types=iou_types,
22 |         expected_results=expected_results,
23 |         expected_results_sigma_tol=expected_results_sigma_tol,
24 |     )
25 | 


--------------------------------------------------------------------------------
/sg-benchmark/maskrcnn_benchmark/data/datasets/evaluation/vg/__init__.py:
--------------------------------------------------------------------------------
 1 | from .vg_eval import do_vg_evaluation
 2 | 
 3 | 
 4 | def vg_evaluation(
 5 |     cfg,
 6 |     dataset,
 7 |     predictions,
 8 |     output_folder,
 9 |     logger,
10 |     iou_types,
11 |     **_
12 | ):
13 |     return do_vg_evaluation(
14 |         cfg=cfg,
15 |         dataset=dataset,
16 |         predictions=predictions,
17 |         output_folder=output_folder,
18 |         logger=logger,
19 |         iou_types=iou_types,
20 |     )
21 | 


--------------------------------------------------------------------------------
/sg-benchmark/maskrcnn_benchmark/data/datasets/evaluation/vg/zeroshot_triplet.pytorch:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CYVincent/Scene-Graph-Transformer-CogTree/955d76403d6b17e4b35cb09b4669d8b3990a94ee/sg-benchmark/maskrcnn_benchmark/data/datasets/evaluation/vg/zeroshot_triplet.pytorch


--------------------------------------------------------------------------------
/sg-benchmark/maskrcnn_benchmark/data/datasets/evaluation/voc/__init__.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | from .voc_eval import do_voc_evaluation
 4 | 
 5 | 
 6 | def voc_evaluation(cfg, dataset, predictions, output_folder, logger, box_only, **_):
 7 |     if box_only:
 8 |         logger.warning("voc evaluation doesn't support box_only, ignored.")
 9 |     logger.info("performing voc evaluation, ignored iou_types.")
10 |     return do_voc_evaluation(
11 |         dataset=dataset,
12 |         predictions=predictions,
13 |         output_folder=output_folder,
14 |         logger=logger,
15 |     )
16 | 


--------------------------------------------------------------------------------
/sg-benchmark/maskrcnn_benchmark/data/datasets/list_dataset.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | """
 3 | Simple dataset class that wraps a list of path names
 4 | """
 5 | 
 6 | from PIL import Image
 7 | 
 8 | from maskrcnn_benchmark.structures.bounding_box import BoxList
 9 | 
10 | 
11 | class ListDataset(object):
12 |     def __init__(self, image_lists, transforms=None):
13 |         self.image_lists = image_lists
14 |         self.transforms = transforms
15 | 
16 |     def __getitem__(self, item):
17 |         img = Image.open(self.image_lists[item]).convert("RGB")
18 | 
19 |         # dummy target
20 |         w, h = img.size
21 |         target = BoxList([[0, 0, w, h]], img.size, mode="xyxy")
22 | 
23 |         if self.transforms is not None:
24 |             img, target = self.transforms(img, target)
25 | 
26 |         return img, target
27 | 
28 |     def __len__(self):
29 |         return len(self.image_lists)
30 | 
31 |     def get_img_info(self, item):
32 |         """
33 |         Return the image dimensions for the image, without
34 |         loading and pre-processing it
35 |         """
36 |         pass
37 | 


--------------------------------------------------------------------------------
/sg-benchmark/maskrcnn_benchmark/data/datasets/voc.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | 
  3 | import torch
  4 | import torch.utils.data
  5 | from PIL import Image
  6 | import sys
  7 | 
  8 | if sys.version_info[0] == 2:
  9 |     import xml.etree.cElementTree as ET
 10 | else:
 11 |     import xml.etree.ElementTree as ET
 12 | 
 13 | 
 14 | from maskrcnn_benchmark.structures.bounding_box import BoxList
 15 | 
 16 | 
 17 | class PascalVOCDataset(torch.utils.data.Dataset):
 18 | 
 19 |     CLASSES = (
 20 |         "__background__ ",
 21 |         "aeroplane",
 22 |         "bicycle",
 23 |         "bird",
 24 |         "boat",
 25 |         "bottle",
 26 |         "bus",
 27 |         "car",
 28 |         "cat",
 29 |         "chair",
 30 |         "cow",
 31 |         "diningtable",
 32 |         "dog",
 33 |         "horse",
 34 |         "motorbike",
 35 |         "person",
 36 |         "pottedplant",
 37 |         "sheep",
 38 |         "sofa",
 39 |         "train",
 40 |         "tvmonitor",
 41 |     )
 42 | 
 43 |     def __init__(self, data_dir, split, use_difficult=False, transforms=None):
 44 |         self.root = data_dir
 45 |         self.image_set = split
 46 |         self.keep_difficult = use_difficult
 47 |         self.transforms = transforms
 48 | 
 49 |         self._annopath = os.path.join(self.root, "Annotations", "%s.xml")
 50 |         self._imgpath = os.path.join(self.root, "JPEGImages", "%s.jpg")
 51 |         self._imgsetpath = os.path.join(self.root, "ImageSets", "Main", "%s.txt")
 52 | 
 53 |         with open(self._imgsetpath % self.image_set) as f:
 54 |             self.ids = f.readlines()
 55 |         self.ids = [x.strip("\n") for x in self.ids]
 56 |         self.id_to_img_map = {k: v for k, v in enumerate(self.ids)}
 57 | 
 58 |         cls = PascalVOCDataset.CLASSES
 59 |         self.class_to_ind = dict(zip(cls, range(len(cls))))
 60 |         self.categories = dict(zip(range(len(cls)), cls))
 61 | 
 62 |     def __getitem__(self, index):
 63 |         img_id = self.ids[index]
 64 |         img = Image.open(self._imgpath % img_id).convert("RGB")
 65 | 
 66 |         target = self.get_groundtruth(index)
 67 |         target = target.clip_to_image(remove_empty=True)
 68 | 
 69 |         if self.transforms is not None:
 70 |             img, target = self.transforms(img, target)
 71 | 
 72 |         return img, target, index
 73 | 
 74 |     def __len__(self):
 75 |         return len(self.ids)
 76 | 
 77 |     def get_groundtruth(self, index):
 78 |         img_id = self.ids[index]
 79 |         anno = ET.parse(self._annopath % img_id).getroot()
 80 |         anno = self._preprocess_annotation(anno)
 81 | 
 82 |         height, width = anno["im_info"]
 83 |         target = BoxList(anno["boxes"], (width, height), mode="xyxy")
 84 |         target.add_field("labels", anno["labels"])
 85 |         target.add_field("difficult", anno["difficult"])
 86 |         return target
 87 | 
 88 |     def _preprocess_annotation(self, target):
 89 |         boxes = []
 90 |         gt_classes = []
 91 |         difficult_boxes = []
 92 |         TO_REMOVE = 1
 93 | 
 94 |         for obj in target.iter("object"):
 95 |             difficult = int(obj.find("difficult").text) == 1
 96 |             if not self.keep_difficult and difficult:
 97 |                 continue
 98 |             name = obj.find("name").text.lower().strip()
 99 |             bb = obj.find("bndbox")
100 |             # Make pixel indexes 0-based
101 |             # Refer to "https://github.com/rbgirshick/py-faster-rcnn/blob/master/lib/datasets/pascal_voc.py#L208-L211"
102 |             box = [
103 |                 bb.find("xmin").text,
104 |                 bb.find("ymin").text,
105 |                 bb.find("xmax").text,
106 |                 bb.find("ymax").text,
107 |             ]
108 |             bndbox = tuple(
109 |                 map(lambda x: x - TO_REMOVE, list(map(int, box)))
110 |             )
111 | 
112 |             boxes.append(bndbox)
113 |             gt_classes.append(self.class_to_ind[name])
114 |             difficult_boxes.append(difficult)
115 | 
116 |         size = target.find("size")
117 |         im_info = tuple(map(int, (size.find("height").text, size.find("width").text)))
118 | 
119 |         res = {
120 |             "boxes": torch.tensor(boxes, dtype=torch.float32),
121 |             "labels": torch.tensor(gt_classes),
122 |             "difficult": torch.tensor(difficult_boxes),
123 |             "im_info": im_info,
124 |         }
125 |         return res
126 | 
127 |     def get_img_info(self, index):
128 |         img_id = self.ids[index]
129 |         anno = ET.parse(self._annopath % img_id).getroot()
130 |         size = anno.find("size")
131 |         im_info = tuple(map(int, (size.find("height").text, size.find("width").text)))
132 |         return {"height": im_info[0], "width": im_info[1]}
133 | 
134 |     def map_class_id_to_class_name(self, class_id):
135 |         return PascalVOCDataset.CLASSES[class_id]
136 | 


--------------------------------------------------------------------------------
/sg-benchmark/maskrcnn_benchmark/data/samplers/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | from .distributed import DistributedSampler
3 | from .grouped_batch_sampler import GroupedBatchSampler
4 | from .iteration_based_batch_sampler import IterationBasedBatchSampler
5 | 
6 | __all__ = ["DistributedSampler", "GroupedBatchSampler", "IterationBasedBatchSampler"]
7 | 


--------------------------------------------------------------------------------
/sg-benchmark/maskrcnn_benchmark/data/samplers/distributed.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | # Code is copy-pasted exactly as in torch.utils.data.distributed.
 3 | # FIXME remove this once c10d fixes the bug it has
 4 | import math
 5 | import torch
 6 | import torch.distributed as dist
 7 | from torch.utils.data.sampler import Sampler
 8 | 
 9 | 
10 | class DistributedSampler(Sampler):
11 |     """Sampler that restricts data loading to a subset of the dataset.
12 |     It is especially useful in conjunction with
13 |     :class:`torch.nn.parallel.DistributedDataParallel`. In such case, each
14 |     process can pass a DistributedSampler instance as a DataLoader sampler,
15 |     and load a subset of the original dataset that is exclusive to it.
16 |     .. note::
17 |         Dataset is assumed to be of constant size.
18 |     Arguments:
19 |         dataset: Dataset used for sampling.
20 |         num_replicas (optional): Number of processes participating in
21 |             distributed training.
22 |         rank (optional): Rank of the current process within num_replicas.
23 |     """
24 | 
25 |     def __init__(self, dataset, num_replicas=None, rank=None, shuffle=True):
26 |         if num_replicas is None:
27 |             if not dist.is_available():
28 |                 raise RuntimeError("Requires distributed package to be available")
29 |             num_replicas = dist.get_world_size()
30 |         if rank is None:
31 |             if not dist.is_available():
32 |                 raise RuntimeError("Requires distributed package to be available")
33 |             rank = dist.get_rank()
34 |         self.dataset = dataset
35 |         self.num_replicas = num_replicas
36 |         self.rank = rank
37 |         self.epoch = 0
38 |         self.num_samples = int(math.ceil(len(self.dataset) * 1.0 / self.num_replicas))
39 |         self.total_size = self.num_samples * self.num_replicas
40 |         self.shuffle = shuffle
41 | 
42 |     def __iter__(self):
43 |         if self.shuffle:
44 |             # deterministically shuffle based on epoch
45 |             g = torch.Generator()
46 |             g.manual_seed(self.epoch)
47 |             indices = torch.randperm(len(self.dataset), generator=g).tolist()
48 |         else:
49 |             indices = torch.arange(len(self.dataset)).tolist()
50 | 
51 |         # add extra samples to make it evenly divisible
52 |         indices += indices[: (self.total_size - len(indices))]
53 |         assert len(indices) == self.total_size
54 | 
55 |         # subsample
56 |         offset = self.num_samples * self.rank
57 |         indices = indices[offset : offset + self.num_samples]
58 |         assert len(indices) == self.num_samples
59 | 
60 |         return iter(indices)
61 | 
62 |     def __len__(self):
63 |         return self.num_samples
64 | 
65 |     def set_epoch(self, epoch):
66 |         self.epoch = epoch
67 | 


--------------------------------------------------------------------------------
/sg-benchmark/maskrcnn_benchmark/data/samplers/iteration_based_batch_sampler.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | from torch.utils.data.sampler import BatchSampler
 3 | 
 4 | 
 5 | class IterationBasedBatchSampler(BatchSampler):
 6 |     """
 7 |     Wraps a BatchSampler, resampling from it until
 8 |     a specified number of iterations have been sampled
 9 |     """
10 | 
11 |     def __init__(self, batch_sampler, num_iterations, start_iter=0):
12 |         self.batch_sampler = batch_sampler
13 |         self.num_iterations = num_iterations
14 |         self.start_iter = start_iter
15 | 
16 |     def __iter__(self):
17 |         iteration = self.start_iter
18 |         while iteration <= self.num_iterations:
19 |             # if the underlying sampler has a set_epoch method, like
20 |             # DistributedSampler, used for making each process see
21 |             # a different split of the dataset, then set it
22 |             if hasattr(self.batch_sampler.sampler, "set_epoch"):
23 |                 self.batch_sampler.sampler.set_epoch(iteration)
24 |             for batch in self.batch_sampler:
25 |                 iteration += 1
26 |                 if iteration > self.num_iterations:
27 |                     break
28 |                 yield batch
29 | 
30 |     def __len__(self):
31 |         return self.num_iterations
32 | 


--------------------------------------------------------------------------------
/sg-benchmark/maskrcnn_benchmark/data/transforms/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | from .transforms import Compose
3 | from .transforms import Resize
4 | from .transforms import RandomHorizontalFlip
5 | from .transforms import ToTensor
6 | from .transforms import Normalize
7 | 
8 | from .build import build_transforms
9 | 


--------------------------------------------------------------------------------
/sg-benchmark/maskrcnn_benchmark/data/transforms/build.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | from . import transforms as T
 3 | 
 4 | 
 5 | def build_transforms(cfg, is_train=True):
 6 |     if is_train:
 7 |         min_size = cfg.INPUT.MIN_SIZE_TRAIN
 8 |         max_size = cfg.INPUT.MAX_SIZE_TRAIN
 9 |         flip_horizontal_prob = 0.5  # cfg.INPUT.FLIP_PROB_TRAIN
10 |         flip_vertical_prob = cfg.INPUT.VERTICAL_FLIP_PROB_TRAIN
11 |         brightness = cfg.INPUT.BRIGHTNESS
12 |         contrast = cfg.INPUT.CONTRAST
13 |         saturation = cfg.INPUT.SATURATION
14 |         hue = cfg.INPUT.HUE
15 |     else:
16 |         min_size = cfg.INPUT.MIN_SIZE_TEST
17 |         max_size = cfg.INPUT.MAX_SIZE_TEST
18 |         flip_horizontal_prob = 0.0
19 |         flip_vertical_prob = 0.0
20 |         brightness = 0.0
21 |         contrast = 0.0
22 |         saturation = 0.0
23 |         hue = 0.0
24 | 
25 |     to_bgr255 = cfg.INPUT.TO_BGR255
26 |     normalize_transform = T.Normalize(
27 |         mean=cfg.INPUT.PIXEL_MEAN, std=cfg.INPUT.PIXEL_STD, to_bgr255=to_bgr255
28 |     )
29 |     color_jitter = T.ColorJitter(
30 |         brightness=brightness,
31 |         contrast=contrast,
32 |         saturation=saturation,
33 |         hue=hue,
34 |     )
35 | 
36 |     transform = T.Compose(
37 |         [
38 |             color_jitter,
39 |             T.Resize(min_size, max_size),
40 |             T.RandomHorizontalFlip(flip_horizontal_prob),
41 |             T.RandomVerticalFlip(flip_vertical_prob),
42 |             T.ToTensor(),
43 |             normalize_transform,
44 |         ]
45 |     )
46 |     return transform
47 | 


--------------------------------------------------------------------------------
/sg-benchmark/maskrcnn_benchmark/data/transforms/transforms.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
  2 | import random
  3 | 
  4 | import torch
  5 | import torchvision
  6 | from torchvision.transforms import functional as F
  7 | 
  8 | 
  9 | class Compose(object):
 10 |     def __init__(self, transforms):
 11 |         self.transforms = transforms
 12 | 
 13 |     def __call__(self, image, target):
 14 |         for t in self.transforms:
 15 |             image, target = t(image, target)
 16 |         return image, target
 17 | 
 18 |     def __repr__(self):
 19 |         format_string = self.__class__.__name__ + "("
 20 |         for t in self.transforms:
 21 |             format_string += "\n"
 22 |             format_string += "    {0}".format(t)
 23 |         format_string += "\n)"
 24 |         return format_string
 25 | 
 26 | 
 27 | class Resize(object):
 28 |     def __init__(self, min_size, max_size):
 29 |         if not isinstance(min_size, (list, tuple)):
 30 |             min_size = (min_size,)
 31 |         self.min_size = min_size
 32 |         self.max_size = max_size
 33 | 
 34 |     # modified from torchvision to add support for max size
 35 |     def get_size(self, image_size):
 36 |         w, h = image_size
 37 |         size = random.choice(self.min_size)
 38 |         max_size = self.max_size
 39 |         if max_size is not None:
 40 |             min_original_size = float(min((w, h)))
 41 |             max_original_size = float(max((w, h)))
 42 |             if max_original_size / min_original_size * size > max_size:
 43 |                 size = int(round(max_size * min_original_size / max_original_size))
 44 | 
 45 |         if (w <= h and w == size) or (h <= w and h == size):
 46 |             return (h, w)
 47 | 
 48 |         if w < h:
 49 |             ow = size
 50 |             oh = int(size * h / w)
 51 |         else:
 52 |             oh = size
 53 |             ow = int(size * w / h)
 54 | 
 55 |         return (oh, ow)
 56 | 
 57 |     def __call__(self, image, target=None):
 58 |         size = self.get_size(image.size)
 59 |         image = F.resize(image, size)
 60 |         if target is None:
 61 |             return image
 62 |         target = target.resize(image.size)
 63 |         return image, target
 64 | 
 65 | 
 66 | class RandomHorizontalFlip(object):
 67 |     def __init__(self, prob=0.5):
 68 |         self.prob = prob
 69 | 
 70 |     def __call__(self, image, target):
 71 |         if random.random() < self.prob:
 72 |             image = F.hflip(image)
 73 |             target = target.transpose(0)
 74 |         return image, target
 75 | 
 76 | class RandomVerticalFlip(object):
 77 |     def __init__(self, prob=0.5):
 78 |         self.prob = prob
 79 | 
 80 |     def __call__(self, image, target):
 81 |         if random.random() < self.prob:
 82 |             image = F.vflip(image)
 83 |             target = target.transpose(1)
 84 |         return image, target
 85 | 
 86 | class ColorJitter(object):
 87 |     def __init__(self,
 88 |                  brightness=None,
 89 |                  contrast=None,
 90 |                  saturation=None,
 91 |                  hue=None,
 92 |                  ):
 93 |         self.color_jitter = torchvision.transforms.ColorJitter(
 94 |             brightness=brightness,
 95 |             contrast=contrast,
 96 |             saturation=saturation,
 97 |             hue=hue,)
 98 | 
 99 |     def __call__(self, image, target):
100 |         image = self.color_jitter(image)
101 |         return image, target
102 | 
103 | 
104 | class ToTensor(object):
105 |     def __call__(self, image, target):
106 |         return F.to_tensor(image), target
107 | 
108 | 
109 | class Normalize(object):
110 |     def __init__(self, mean, std, to_bgr255=True):
111 |         self.mean = mean
112 |         self.std = std
113 |         self.to_bgr255 = to_bgr255
114 | 
115 |     def __call__(self, image, target=None):
116 |         if self.to_bgr255:
117 |             image = image[[2, 1, 0]] * 255
118 |         image = F.normalize(image, mean=self.mean, std=self.std)
119 |         if target is None:
120 |             return image
121 |         return image, target
122 | 


--------------------------------------------------------------------------------
/sg-benchmark/maskrcnn_benchmark/engine/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | 


--------------------------------------------------------------------------------
/sg-benchmark/maskrcnn_benchmark/engine/bbox_aug.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torchvision.transforms as TT
  3 | 
  4 | from maskrcnn_benchmark.config import cfg
  5 | from maskrcnn_benchmark.data import transforms as T
  6 | from maskrcnn_benchmark.structures.image_list import to_image_list
  7 | from maskrcnn_benchmark.structures.bounding_box import BoxList
  8 | from maskrcnn_benchmark.modeling.roi_heads.box_head.inference import make_roi_box_post_processor
  9 | 
 10 | 
 11 | def im_detect_bbox_aug(model, images, device):
 12 |     # Collect detections computed under different transformations
 13 |     boxlists_ts = []
 14 |     for _ in range(len(images)):
 15 |         boxlists_ts.append([])
 16 | 
 17 |     def add_preds_t(boxlists_t):
 18 |         for i, boxlist_t in enumerate(boxlists_t):
 19 |             if len(boxlists_ts[i]) == 0:
 20 |                 # The first one is identity transform, no need to resize the boxlist
 21 |                 boxlists_ts[i].append(boxlist_t)
 22 |             else:
 23 |                 # Resize the boxlist as the first one
 24 |                 boxlists_ts[i].append(boxlist_t.resize(boxlists_ts[i][0].size))
 25 | 
 26 |     # Compute detections for the original image (identity transform)
 27 |     boxlists_i = im_detect_bbox(
 28 |         model, images, cfg.INPUT.MIN_SIZE_TEST, cfg.INPUT.MAX_SIZE_TEST, device
 29 |     )
 30 |     add_preds_t(boxlists_i)
 31 | 
 32 |     # Perform detection on the horizontally flipped image
 33 |     if cfg.TEST.BBOX_AUG.H_FLIP:
 34 |         boxlists_hf = im_detect_bbox_hflip(
 35 |             model, images, cfg.INPUT.MIN_SIZE_TEST, cfg.INPUT.MAX_SIZE_TEST, device
 36 |         )
 37 |         add_preds_t(boxlists_hf)
 38 | 
 39 |     # Compute detections at different scales
 40 |     for scale in cfg.TEST.BBOX_AUG.SCALES:
 41 |         max_size = cfg.TEST.BBOX_AUG.MAX_SIZE
 42 |         boxlists_scl = im_detect_bbox_scale(
 43 |             model, images, scale, max_size, device
 44 |         )
 45 |         add_preds_t(boxlists_scl)
 46 | 
 47 |         if cfg.TEST.BBOX_AUG.SCALE_H_FLIP:
 48 |             boxlists_scl_hf = im_detect_bbox_scale(
 49 |                 model, images, scale, max_size, device, hflip=True
 50 |             )
 51 |             add_preds_t(boxlists_scl_hf)
 52 | 
 53 |     # Merge boxlists detected by different bbox aug params
 54 |     boxlists = []
 55 |     for i, boxlist_ts in enumerate(boxlists_ts):
 56 |         bbox = torch.cat([boxlist_t.bbox for boxlist_t in boxlist_ts])
 57 |         scores = torch.cat([boxlist_t.get_field('scores') for boxlist_t in boxlist_ts])
 58 |         boxlist = BoxList(bbox, boxlist_ts[0].size, boxlist_ts[0].mode)
 59 |         boxlist.add_field('scores', scores)
 60 |         boxlists.append(boxlist)
 61 | 
 62 |     # Apply NMS and limit the final detections
 63 |     results = []
 64 |     post_processor = make_roi_box_post_processor(cfg)
 65 |     for boxlist in boxlists:
 66 |         results.append(post_processor.filter_results(boxlist, cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES))
 67 | 
 68 |     return results
 69 | 
 70 | 
 71 | def im_detect_bbox(model, images, target_scale, target_max_size, device):
 72 |     """
 73 |     Performs bbox detection on the original image.
 74 |     """
 75 |     transform = TT.Compose([
 76 |         T.Resize(target_scale, target_max_size),
 77 |         TT.ToTensor(),
 78 |         T.Normalize(
 79 |             mean=cfg.INPUT.PIXEL_MEAN, std=cfg.INPUT.PIXEL_STD, to_bgr255=cfg.INPUT.TO_BGR255
 80 |         )
 81 |     ])
 82 |     images = [transform(image) for image in images]
 83 |     images = to_image_list(images, cfg.DATALOADER.SIZE_DIVISIBILITY)
 84 |     return model(images.to(device))
 85 | 
 86 | 
 87 | def im_detect_bbox_hflip(model, images, target_scale, target_max_size, device):
 88 |     """
 89 |     Performs bbox detection on the horizontally flipped image.
 90 |     Function signature is the same as for im_detect_bbox.
 91 |     """
 92 |     transform = TT.Compose([
 93 |         T.Resize(target_scale, target_max_size),
 94 |         TT.RandomHorizontalFlip(1.0),
 95 |         TT.ToTensor(),
 96 |         T.Normalize(
 97 |             mean=cfg.INPUT.PIXEL_MEAN, std=cfg.INPUT.PIXEL_STD, to_bgr255=cfg.INPUT.TO_BGR255
 98 |         )
 99 |     ])
100 |     images = [transform(image) for image in images]
101 |     images = to_image_list(images, cfg.DATALOADER.SIZE_DIVISIBILITY)
102 |     boxlists = model(images.to(device))
103 | 
104 |     # Invert the detections computed on the flipped image
105 |     boxlists_inv = [boxlist.transpose(0) for boxlist in boxlists]
106 |     return boxlists_inv
107 | 
108 | 
109 | def im_detect_bbox_scale(model, images, target_scale, target_max_size, device, hflip=False):
110 |     """
111 |     Computes bbox detections at the given scale.
112 |     Returns predictions in the scaled image space.
113 |     """
114 |     if hflip:
115 |         boxlists_scl = im_detect_bbox_hflip(model, images, target_scale, target_max_size, device)
116 |     else:
117 |         boxlists_scl = im_detect_bbox(model, images, target_scale, target_max_size, device)
118 |     return boxlists_scl
119 | 


--------------------------------------------------------------------------------
/sg-benchmark/maskrcnn_benchmark/engine/trainer.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import datetime
 3 | import logging
 4 | import time
 5 | 
 6 | import torch
 7 | import torch.distributed as dist
 8 | 
 9 | from maskrcnn_benchmark.utils.comm import get_world_size
10 | from maskrcnn_benchmark.utils.metric_logger import MetricLogger
11 | 
12 | from apex import amp
13 | 
14 | def reduce_loss_dict(loss_dict):
15 |     """
16 |     Reduce the loss dictionary from all processes so that process with rank
17 |     0 has the averaged results. Returns a dict with the same fields as
18 |     loss_dict, after reduction.
19 |     """
20 |     world_size = get_world_size()
21 |     if world_size < 2:
22 |         return loss_dict
23 |     with torch.no_grad():
24 |         loss_names = []
25 |         all_losses = []
26 |         for k in sorted(loss_dict.keys()):
27 |             loss_names.append(k)
28 |             all_losses.append(loss_dict[k])
29 |         all_losses = torch.stack(all_losses, dim=0)
30 |         dist.reduce(all_losses, dst=0)
31 |         if dist.get_rank() == 0:
32 |             # only main process gets accumulated, so only divide by
33 |             # world_size in this case
34 |             all_losses /= world_size
35 |         reduced_losses = {k: v for k, v in zip(loss_names, all_losses)}
36 |     return reduced_losses
37 | 


--------------------------------------------------------------------------------
/sg-benchmark/maskrcnn_benchmark/image_retrieval/S2G-RETRIEVAL.md:
--------------------------------------------------------------------------------
 1 | # Sentence-to-Graph Retrieval (S2G)
 2 | 
 3 | Forgive me, this part of code is ugly and less organized.
 4 | 
 5 | ## Preprocessing
 6 | 
 7 | Run the ```maskrcnn_benchmark/image_retrieval/preprocessing.py``` to process the annotations and checkpoints, where ```detected_path``` should be set to the corresponding checkpoints you want to use, ```vg_data, vg_dict, vg_info``` should have already downloaded if you followed DATASET.md, ```cap_graph``` is the ground-truth captions and generated sentence graphs (you can download it from [here](https://onedrive.live.com/embed?cid=22376FFAD72C4B64&resid=22376FFAD72C4B64%21779999&authkey=AGW0Wxjb1JSDFnc)). We use [SceneGraphParser](https://github.com/vacancy/SceneGraphParser) to generate these sentence graphs.
 8 | 
 9 | You also need to set the ```cap_graph``` PATH and ```vg_dict``` PATH in ```maskrcnn_benchmark/image_retrieval/dataloader.py``` manually.
10 | 
11 | ## Training and Evaluation
12 | 
13 | Run the ```tools/image_retrieval_main.py``` for both training and evaluation. 
14 | 
15 | To load the generated scene graphs of the given SGG checkpoints, you need to manually set ```sg_train_path``` and ```sg_test_path``` in ```tools/image_retrieval_main.py```, which means you need to evaluate your model on **both training and testing set** to get the generated crude scene graphs. Our evaluation code will automatically saves the crude SGGs into ```checkpoints/MODEL_NAME/inference/VG_stanford_filtered_wth_attribute_test/```  or ```checkpoints/MODEL_NAME/inference/VG_stanford_filtered_wth_attribute_train/```, which will be further processed to generate the input of ```sg_train_path``` and ```sg_test_path``` by our preprocessing code ```maskrcnn_benchmark/image_retrieval/preprocessing.py```.
16 | 
17 | ## Results
18 | 
19 | Sentence-to-Graph Retrieval (S2G) results are given in the paper [Unbiased Scene Graph Generation from Biased Training](https://arxiv.org/abs/2002.11949):
20 | 
21 | ![alt text](../../demo/TDE_Results3.png "from 'Unbiased Scene Graph Generation from Biased Training'")
22 | 


--------------------------------------------------------------------------------
/sg-benchmark/maskrcnn_benchmark/image_retrieval/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | 


--------------------------------------------------------------------------------
/sg-benchmark/maskrcnn_benchmark/image_retrieval/dataloader.py:
--------------------------------------------------------------------------------
  1 | from maskrcnn_benchmark.utils.env import setup_environment  # noqa F401 isort:skip
  2 | 
  3 | import argparse
  4 | import os
  5 | import time
  6 | import datetime
  7 | import json
  8 | import random
  9 | 
 10 | import torch
 11 | import torch.nn as nn
 12 | import torch.nn.functional as F
 13 | from torch.nn.utils import clip_grad_norm_
 14 | import torch.utils.data as data
 15 | from torch.nn.utils import weight_norm
 16 | from tqdm import tqdm
 17 | 
 18 | from maskrcnn_benchmark.config import cfg
 19 | from maskrcnn_benchmark.data import make_data_loader
 20 | from maskrcnn_benchmark.solver import make_lr_scheduler
 21 | from maskrcnn_benchmark.solver import make_optimizer
 22 | from maskrcnn_benchmark.engine.trainer import reduce_loss_dict
 23 | from maskrcnn_benchmark.engine.inference import inference
 24 | from maskrcnn_benchmark.modeling.detector import build_detection_model
 25 | from maskrcnn_benchmark.utils.checkpoint import DetectronCheckpointer
 26 | from maskrcnn_benchmark.utils.checkpoint import clip_grad_norm
 27 | from maskrcnn_benchmark.utils.collect_env import collect_env_info
 28 | from maskrcnn_benchmark.utils.comm import synchronize, get_rank, all_gather
 29 | from maskrcnn_benchmark.utils.imports import import_file
 30 | from maskrcnn_benchmark.utils.logger import setup_logger, debug_print
 31 | from maskrcnn_benchmark.utils.miscellaneous import mkdir, save_config
 32 | from maskrcnn_benchmark.utils.metric_logger import MetricLogger
 33 | 
 34 | class SGEncoding(data.Dataset):
 35 |     """ SGEncoding dataset """
 36 |     def __init__(self, train_ids, test_ids, sg_data, test_on=False, val_on=False, num_test=5000, num_val=5000):
 37 |         super(SGEncoding, self).__init__()
 38 |         cap_graph = json.load(open('/data1/vg_capgraphs_anno.json'))
 39 |         vg_dict = json.load(open('/home/kaihua/projects/maskrcnn-benchmark/datasets/vg/VG-SGG-dicts-with-attri.json'))
 40 |         self.img_txt_sg = sg_data
 41 |         self.key_list = list(self.img_txt_sg.keys())
 42 |         self.key_list.sort()
 43 |         self.train_ids = train_ids
 44 |         self.test_ids = test_ids
 45 |         if test_on:
 46 |             self.key_list = self.test_ids[:num_test]
 47 |         elif val_on:
 48 |             self.key_list = self.test_ids[num_test:num_test+num_val]
 49 |         else:
 50 |             self.key_list = self.test_ids[num_test+num_val:] + self.train_ids
 51 | 
 52 |         # generate union predicate vocabulary
 53 |         self.sgg_rel_vocab = list(set(cap_graph['idx_to_meta_predicate'].values()))
 54 |         self.txt_rel_vocab = list(set(cap_graph['cap_predicate'].keys()))
 55 | 
 56 |         # generate union object vocabulary
 57 |         self.sgg_obj_vocab = list(set(vg_dict['idx_to_label'].values()))
 58 |         self.txt_obj_vocab = list(set(cap_graph['cap_category'].keys()))
 59 | 
 60 |         # vocabulary length
 61 |         self.num_sgg_rel = len(self.sgg_rel_vocab)
 62 |         self.num_txt_rel = len(self.txt_rel_vocab)
 63 |         self.num_sgg_obj = len(self.sgg_obj_vocab)
 64 |         self.num_txt_obj = len(self.txt_obj_vocab)
 65 | 
 66 |     def _to_tensor(self, inp_dict):
 67 |         return {'entities': torch.LongTensor(inp_dict['entities']), 
 68 |                 'relations': torch.LongTensor(inp_dict['relations'])}
 69 | 
 70 |     def _generate_tensor_by_idx(self, idx):
 71 |         img = self._to_tensor(self.img_txt_sg[self.key_list[idx]]['img'])
 72 |         img_graph = torch.FloatTensor(self.img_txt_sg[self.key_list[idx]]['image_graph'])
 73 |         txt = self._to_tensor(self.img_txt_sg[self.key_list[idx]]['txt'])
 74 |         txt_graph = torch.FloatTensor(self.img_txt_sg[self.key_list[idx]]['text_graph'])
 75 |         img['graph'] = img_graph
 76 |         txt['graph'] = txt_graph
 77 |         return img, txt
 78 | 
 79 |     def __getitem__(self, item):
 80 |         fg_img, fg_txt = self._generate_tensor_by_idx(item)
 81 |         # generate negative sample
 82 |         bg_idx = item
 83 |         while(bg_idx == item):
 84 |             bg_idx = int(random.random() * len(self.key_list))
 85 |         bg_img, bg_txt = self._generate_tensor_by_idx(bg_idx)
 86 |         return fg_img, fg_txt, bg_img, bg_txt
 87 | 
 88 |     def __len__(self):
 89 |         return len(self.key_list)
 90 |         
 91 | class SimpleCollator(object):
 92 |     def __call__(self, batch):
 93 |         return list(zip(*batch))
 94 | 
 95 | def get_loader(cfg, train_ids, test_ids, sg_data, test_on=False, val_on=False, num_test=5000, num_val=1000):
 96 |     """ Returns a data loader for the desired split """
 97 |     split = SGEncoding(train_ids, test_ids, sg_data=sg_data, test_on=test_on, val_on=val_on, num_test=num_test, num_val=num_val)
 98 | 
 99 |     loader = torch.utils.data.DataLoader(split,
100 |         batch_size=cfg.SOLVER.IMS_PER_BATCH,
101 |         shuffle=not (test_on or val_on),  # only shuffle the data in training
102 |         pin_memory=True,
103 |         num_workers=4,
104 |         collate_fn=SimpleCollator(),
105 |     )
106 |     return loader
107 | 


--------------------------------------------------------------------------------
/sg-benchmark/maskrcnn_benchmark/image_retrieval/evaluation.py:
--------------------------------------------------------------------------------
 1 | from maskrcnn_benchmark.utils.env import setup_environment  # noqa F401 isort:skip
 2 | 
 3 | import argparse
 4 | import os
 5 | import time
 6 | import datetime
 7 | import json
 8 | import random
 9 | 
10 | import torch
11 | import torch.nn as nn
12 | import torch.nn.functional as F
13 | from torch.nn.utils import clip_grad_norm_
14 | import torch.utils.data as data
15 | from torch.nn.utils import weight_norm
16 | from tqdm import tqdm
17 | 
18 | from maskrcnn_benchmark.config import cfg
19 | from maskrcnn_benchmark.data import make_data_loader
20 | from maskrcnn_benchmark.solver import make_lr_scheduler
21 | from maskrcnn_benchmark.solver import make_optimizer
22 | from maskrcnn_benchmark.engine.trainer import reduce_loss_dict
23 | from maskrcnn_benchmark.engine.inference import inference
24 | from maskrcnn_benchmark.modeling.detector import build_detection_model
25 | from maskrcnn_benchmark.utils.checkpoint import DetectronCheckpointer
26 | from maskrcnn_benchmark.utils.checkpoint import clip_grad_norm
27 | from maskrcnn_benchmark.utils.collect_env import collect_env_info
28 | from maskrcnn_benchmark.utils.comm import synchronize, get_rank, all_gather
29 | from maskrcnn_benchmark.utils.imports import import_file
30 | from maskrcnn_benchmark.utils.logger import setup_logger, debug_print
31 | from maskrcnn_benchmark.utils.miscellaneous import mkdir, save_config
32 | from maskrcnn_benchmark.utils.metric_logger import MetricLogger
33 | 
34 | def evaluator(logger, input_lists):
35 |     cat_data = []
36 |     for item in input_lists:
37 |         cat_data.append(item[0])
38 |     # shape [num_image, 2, hidden_dim]
39 |     cat_data = torch.cat(cat_data, dim=0).squeeze(2)
40 | 
41 |     similarity = cat_data[:, 0, :] @ (cat_data[:, 1, :].transpose(0,1))   # img to txt
42 |     similarity = similarity.transpose(0,1)                                # txt to img
43 | 
44 |     pred_rank = (similarity > similarity.diag().view(-1, 1)).sum(-1)
45 | 
46 |     num_sample = pred_rank.shape[0]
47 |     thres = [1, 5, 10, 20, 50, 100]
48 |     for k in thres:
49 |         logger.info('Recall @ %d: %.4f; ' % (k, float((pred_rank<k).sum()) / num_sample))
50 | 
51 |     return similarity


--------------------------------------------------------------------------------
/sg-benchmark/maskrcnn_benchmark/layers/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import torch
 3 | 
 4 | from .batch_norm import FrozenBatchNorm2d
 5 | from .misc import Conv2d
 6 | from .misc import DFConv2d
 7 | from .misc import ConvTranspose2d
 8 | from .misc import BatchNorm2d
 9 | from .misc import interpolate
10 | from .nms import nms
11 | from .roi_align import ROIAlign
12 | from .roi_align import roi_align
13 | from .roi_pool import ROIPool
14 | from .roi_pool import roi_pool
15 | from .entropy_loss import entropy_loss
16 | from .kl_div_loss import kl_div_loss
17 | from .smooth_l1_loss import smooth_l1_loss
18 | from .sigmoid_focal_loss import SigmoidFocalLoss
19 | from .label_smoothing_loss import Label_Smoothing_Regression
20 | from .dcn.deform_conv_func import deform_conv, modulated_deform_conv
21 | from .dcn.deform_conv_module import DeformConv, ModulatedDeformConv, ModulatedDeformConvPack
22 | from .dcn.deform_pool_func import deform_roi_pooling
23 | from .dcn.deform_pool_module import DeformRoIPooling, DeformRoIPoolingPack, ModulatedDeformRoIPoolingPack
24 | 
25 | 
26 | __all__ = [
27 |     "nms",
28 |     "roi_align",
29 |     "ROIAlign",
30 |     "roi_pool",
31 |     "ROIPool",
32 |     "smooth_l1_loss",
33 |     "entropy_loss",
34 |     "kl_div_loss",
35 |     "Conv2d",
36 |     "DFConv2d",
37 |     "ConvTranspose2d",
38 |     "interpolate",
39 |     "BatchNorm2d",
40 |     "FrozenBatchNorm2d",
41 |     "SigmoidFocalLoss",
42 |     "Label_Smoothing_Regression",
43 |     'deform_conv',
44 |     'modulated_deform_conv',
45 |     'DeformConv',
46 |     'ModulatedDeformConv',
47 |     'ModulatedDeformConvPack',
48 |     'deform_roi_pooling',
49 |     'DeformRoIPooling',
50 |     'DeformRoIPoolingPack',
51 |     'ModulatedDeformRoIPoolingPack',
52 | ]
53 | 
54 | 


--------------------------------------------------------------------------------
/sg-benchmark/maskrcnn_benchmark/layers/_utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import glob
 3 | import os.path
 4 | 
 5 | import torch
 6 | 
 7 | try:
 8 |     from torch.utils.cpp_extension import load as load_ext
 9 |     from torch.utils.cpp_extension import CUDA_HOME
10 | except ImportError:
11 |     raise ImportError("The cpp layer extensions requires PyTorch 0.4 or higher")
12 | 
13 | 
14 | def _load_C_extensions():
15 |     this_dir = os.path.dirname(os.path.abspath(__file__))
16 |     this_dir = os.path.dirname(this_dir)
17 |     this_dir = os.path.join(this_dir, "csrc")
18 | 
19 |     main_file = glob.glob(os.path.join(this_dir, "*.cpp"))
20 |     source_cpu = glob.glob(os.path.join(this_dir, "cpu", "*.cpp"))
21 |     source_cuda = glob.glob(os.path.join(this_dir, "cuda", "*.cu"))
22 | 
23 |     source = main_file + source_cpu
24 | 
25 |     extra_cflags = []
26 |     if torch.cuda.is_available() and CUDA_HOME is not None:
27 |         source.extend(source_cuda)
28 |         extra_cflags = ["-DWITH_CUDA"]
29 |     source = [os.path.join(this_dir, s) for s in source]
30 |     extra_include_paths = [this_dir]
31 |     return load_ext(
32 |         "torchvision",
33 |         source,
34 |         extra_cflags=extra_cflags,
35 |         extra_include_paths=extra_include_paths,
36 |     )
37 | 
38 | 
39 | _C = _load_C_extensions()
40 | 


--------------------------------------------------------------------------------
/sg-benchmark/maskrcnn_benchmark/layers/batch_norm.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import torch
 3 | from torch import nn
 4 | 
 5 | 
 6 | class FrozenBatchNorm2d(nn.Module):
 7 |     """
 8 |     BatchNorm2d where the batch statistics and the affine parameters
 9 |     are fixed
10 |     """
11 | 
12 |     def __init__(self, n):
13 |         super(FrozenBatchNorm2d, self).__init__()
14 |         self.register_buffer("weight", torch.ones(n))
15 |         self.register_buffer("bias", torch.zeros(n))
16 |         self.register_buffer("running_mean", torch.zeros(n))
17 |         self.register_buffer("running_var", torch.ones(n))
18 | 
19 |     def forward(self, x):
20 |         # Cast all fixed parameters to half() if necessary
21 |         if x.dtype == torch.float16:
22 |             self.weight = self.weight.half()
23 |             self.bias = self.bias.half()
24 |             self.running_mean = self.running_mean.half()
25 |             self.running_var = self.running_var.half()
26 | 
27 |         scale = self.weight * self.running_var.rsqrt()
28 |         bias = self.bias - self.running_mean * scale
29 |         scale = scale.reshape(1, -1, 1, 1)
30 |         bias = bias.reshape(1, -1, 1, 1)
31 |         return x * scale + bias
32 | 


--------------------------------------------------------------------------------
/sg-benchmark/maskrcnn_benchmark/layers/dcn/__init__.py:
--------------------------------------------------------------------------------
1 | #
2 | # Copied From [mmdetection](https://github.com/open-mmlab/mmdetection/tree/master/mmdet/ops/dcn)
3 | #
4 | 


--------------------------------------------------------------------------------
/sg-benchmark/maskrcnn_benchmark/layers/dcn/deform_pool_func.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.autograd import Function
 3 | from torch.autograd.function import once_differentiable
 4 | 
 5 | from maskrcnn_benchmark import _C
 6 | 
 7 | 
 8 | class DeformRoIPoolingFunction(Function):
 9 | 
10 |     @staticmethod
11 |     def forward(
12 |         ctx,
13 |         data,
14 |         rois,
15 |         offset,
16 |         spatial_scale,
17 |         out_size,
18 |         out_channels,
19 |         no_trans,
20 |         group_size=1,
21 |         part_size=None,
22 |         sample_per_part=4,
23 |         trans_std=.0
24 |     ):
25 |         ctx.spatial_scale = spatial_scale
26 |         ctx.out_size = out_size
27 |         ctx.out_channels = out_channels
28 |         ctx.no_trans = no_trans
29 |         ctx.group_size = group_size
30 |         ctx.part_size = out_size if part_size is None else part_size
31 |         ctx.sample_per_part = sample_per_part
32 |         ctx.trans_std = trans_std
33 | 
34 |         assert 0.0 <= ctx.trans_std <= 1.0
35 |         if not data.is_cuda:
36 |             raise NotImplementedError
37 | 
38 |         n = rois.shape[0]
39 |         output = data.new_empty(n, out_channels, out_size, out_size)
40 |         output_count = data.new_empty(n, out_channels, out_size, out_size)
41 |         _C.deform_psroi_pooling_forward(
42 |             data,
43 |             rois,
44 |             offset,
45 |             output,
46 |             output_count,
47 |             ctx.no_trans,
48 |             ctx.spatial_scale,
49 |             ctx.out_channels,
50 |             ctx.group_size,
51 |             ctx.out_size,
52 |             ctx.part_size,
53 |             ctx.sample_per_part,
54 |             ctx.trans_std
55 |         )
56 | 
57 |         if data.requires_grad or rois.requires_grad or offset.requires_grad:
58 |             ctx.save_for_backward(data, rois, offset)
59 |         ctx.output_count = output_count
60 | 
61 |         return output
62 | 
63 |     @staticmethod
64 |     @once_differentiable
65 |     def backward(ctx, grad_output):
66 |         if not grad_output.is_cuda:
67 |             raise NotImplementedError
68 | 
69 |         data, rois, offset = ctx.saved_tensors
70 |         output_count = ctx.output_count
71 |         grad_input = torch.zeros_like(data)
72 |         grad_rois = None
73 |         grad_offset = torch.zeros_like(offset)
74 | 
75 |         _C.deform_psroi_pooling_backward(
76 |             grad_output,
77 |             data,
78 |             rois,
79 |             offset,
80 |             output_count,
81 |             grad_input,
82 |             grad_offset,
83 |             ctx.no_trans,
84 |             ctx.spatial_scale,
85 |             ctx.out_channels,
86 |             ctx.group_size,
87 |             ctx.out_size,
88 |             ctx.part_size,
89 |             ctx.sample_per_part,
90 |             ctx.trans_std
91 |         )
92 |         return (grad_input, grad_rois, grad_offset, None, None, None, None, None, None, None, None)
93 | 
94 | 
95 | deform_roi_pooling = DeformRoIPoolingFunction.apply
96 | 


--------------------------------------------------------------------------------
/sg-benchmark/maskrcnn_benchmark/layers/entropy_loss.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import torch
 3 | 
 4 | 
 5 | def entropy_loss(input, e=1e-9, reduction='sum'):
 6 |     assert len(input.shape) == 2
 7 |     loss = - (input * (input + e).log())
 8 | 
 9 |     if reduction == 'sum':
10 |         loss = loss.sum(-1)
11 |     elif reduction == 'mean':
12 |         loss = loss.mean(-1)
13 | 
14 |     return loss.mean()


--------------------------------------------------------------------------------
/sg-benchmark/maskrcnn_benchmark/layers/kl_div_loss.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import torch
 3 | 
 4 | 
 5 | def kl_div_loss(input, target, e=1e-9, reduction='sum'):
 6 |     assert len(input.shape) == 2
 7 |     assert len(target.shape) == 2
 8 | 
 9 |     log_target = (target + e).log()
10 |     log_input =  (input + e).log()
11 | 
12 |     loss = target.detach() * (log_target.detach() - log_input)
13 | 
14 |     if reduction == 'sum':
15 |         loss = loss.sum(-1)
16 |     elif reduction == 'mean':
17 |         loss = loss.mean(-1)
18 | 
19 |     return loss.mean()


--------------------------------------------------------------------------------
/sg-benchmark/maskrcnn_benchmark/layers/label_smoothing_loss.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | 
 5 | class Label_Smoothing_Regression(nn.Module):
 6 | 
 7 |     def __init__(self, e=0.01, reduction='mean'):
 8 |         super().__init__()
 9 | 
10 |         self.log_softmax = nn.LogSoftmax(dim=1)
11 |         self.e = e
12 |         self.reduction = reduction
13 |     
14 |     def _one_hot(self, labels, classes, value=1):
15 |         """
16 |             Convert labels to one hot vectors
17 |         
18 |         Args:
19 |             labels: torch tensor in format [label1, label2, label3, ...]
20 |             classes: int, number of classes
21 |             value: label value in one hot vector, default to 1
22 |         
23 |         Returns:
24 |             return one hot format labels in shape [batchsize, classes]
25 |         """
26 | 
27 |         one_hot = torch.zeros(labels.size(0), classes)
28 | 
29 |         #labels and value_added  size must match
30 |         labels = labels.view(labels.size(0), -1)
31 |         value_added = torch.Tensor(labels.size(0), 1).fill_(value)
32 | 
33 |         value_added = value_added.to(labels.device)
34 |         one_hot = one_hot.to(labels.device)
35 | 
36 |         one_hot.scatter_add_(1, labels, value_added)
37 | 
38 |         return one_hot
39 | 
40 |     def _smooth_label(self, target, length, smooth_factor):
41 |         """convert targets to one-hot format, and smooth
42 |         them.
43 |         Args:
44 |             target: target in form with [label1, label2, label_batchsize]
45 |             length: length of one-hot format(number of classes)
46 |             smooth_factor: smooth factor for label smooth
47 |         
48 |         Returns:
49 |             smoothed labels in one hot format
50 |         """
51 |         one_hot = self._one_hot(target, length, value=1 - smooth_factor)
52 |         one_hot += smooth_factor / length
53 | 
54 |         return one_hot.to(target.device)
55 | 
56 |     def forward(self, x, target):
57 | 
58 |         if x.size(0) != target.size(0):
59 |             raise ValueError('Expected input batchsize ({}) to match target batch_size({})'
60 |                     .format(x.size(0), target.size(0)))
61 | 
62 |         if x.dim() < 2:
63 |             raise ValueError('Expected input tensor to have least 2 dimensions(got {})'
64 |                     .format(x.size(0)))
65 | 
66 |         if x.dim() != 2:
67 |             raise ValueError('Only 2 dimension tensor are implemented, (got {})'
68 |                     .format(x.size()))
69 | 
70 | 
71 |         smoothed_target = self._smooth_label(target, x.size(1), self.e)
72 |         x = self.log_softmax(x)
73 |         loss = torch.sum(- x * smoothed_target, dim=1)
74 | 
75 |         if self.reduction == 'none':
76 |             return loss
77 |         
78 |         elif self.reduction == 'sum':
79 |             return torch.sum(loss)
80 |         
81 |         elif self.reduction == 'mean':
82 |             return torch.mean(loss)
83 |         
84 |         else:
85 |             raise ValueError('unrecognized option, expect reduction to be one of none, mean, sum')
86 | 


--------------------------------------------------------------------------------
/sg-benchmark/maskrcnn_benchmark/layers/nms.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | # from ._utils import _C
 3 | from maskrcnn_benchmark import _C
 4 | 
 5 | from apex import amp
 6 | 
 7 | # Only valid with fp32 inputs - give AMP the hint
 8 | nms = amp.float_function(_C.nms)
 9 | 
10 | # nms.__doc__ = """
11 | # This function performs Non-maximum suppresion"""
12 | 


--------------------------------------------------------------------------------
/sg-benchmark/maskrcnn_benchmark/layers/roi_align.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import torch
 3 | from torch import nn
 4 | from torch.autograd import Function
 5 | from torch.autograd.function import once_differentiable
 6 | from torch.nn.modules.utils import _pair
 7 | 
 8 | from maskrcnn_benchmark import _C
 9 | 
10 | from apex import amp
11 | 
12 | class _ROIAlign(Function):
13 |     @staticmethod
14 |     def forward(ctx, input, roi, output_size, spatial_scale, sampling_ratio):
15 |         ctx.save_for_backward(roi)
16 |         ctx.output_size = _pair(output_size)
17 |         ctx.spatial_scale = spatial_scale
18 |         ctx.sampling_ratio = sampling_ratio
19 |         ctx.input_shape = input.size()
20 |         output = _C.roi_align_forward(
21 |             input, roi, spatial_scale, output_size[0], output_size[1], sampling_ratio
22 |         )
23 |         return output
24 | 
25 |     @staticmethod
26 |     @once_differentiable
27 |     def backward(ctx, grad_output):
28 |         rois, = ctx.saved_tensors
29 |         output_size = ctx.output_size
30 |         spatial_scale = ctx.spatial_scale
31 |         sampling_ratio = ctx.sampling_ratio
32 |         bs, ch, h, w = ctx.input_shape
33 |         grad_input = _C.roi_align_backward(
34 |             grad_output,
35 |             rois,
36 |             spatial_scale,
37 |             output_size[0],
38 |             output_size[1],
39 |             bs,
40 |             ch,
41 |             h,
42 |             w,
43 |             sampling_ratio,
44 |         )
45 |         return grad_input, None, None, None, None
46 | 
47 | 
48 | roi_align = _ROIAlign.apply
49 | 
50 | class ROIAlign(nn.Module):
51 |     def __init__(self, output_size, spatial_scale, sampling_ratio):
52 |         super(ROIAlign, self).__init__()
53 |         self.output_size = output_size
54 |         self.spatial_scale = spatial_scale
55 |         self.sampling_ratio = sampling_ratio
56 | 
57 |     @amp.float_function
58 |     def forward(self, input, rois):
59 |         return roi_align(
60 |             input, rois, self.output_size, self.spatial_scale, self.sampling_ratio
61 |         )
62 | 
63 |     def __repr__(self):
64 |         tmpstr = self.__class__.__name__ + "("
65 |         tmpstr += "output_size=" + str(self.output_size)
66 |         tmpstr += ", spatial_scale=" + str(self.spatial_scale)
67 |         tmpstr += ", sampling_ratio=" + str(self.sampling_ratio)
68 |         tmpstr += ")"
69 |         return tmpstr
70 | 


--------------------------------------------------------------------------------
/sg-benchmark/maskrcnn_benchmark/layers/roi_pool.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import torch
 3 | from torch import nn
 4 | from torch.autograd import Function
 5 | from torch.autograd.function import once_differentiable
 6 | from torch.nn.modules.utils import _pair
 7 | 
 8 | from maskrcnn_benchmark import _C
 9 | 
10 | from apex import amp
11 | 
12 | class _ROIPool(Function):
13 |     @staticmethod
14 |     def forward(ctx, input, roi, output_size, spatial_scale):
15 |         ctx.output_size = _pair(output_size)
16 |         ctx.spatial_scale = spatial_scale
17 |         ctx.input_shape = input.size()
18 |         output, argmax = _C.roi_pool_forward(
19 |             input, roi, spatial_scale, output_size[0], output_size[1]
20 |         )
21 |         ctx.save_for_backward(input, roi, argmax)
22 |         return output
23 | 
24 |     @staticmethod
25 |     @once_differentiable
26 |     def backward(ctx, grad_output):
27 |         input, rois, argmax = ctx.saved_tensors
28 |         output_size = ctx.output_size
29 |         spatial_scale = ctx.spatial_scale
30 |         bs, ch, h, w = ctx.input_shape
31 |         grad_input = _C.roi_pool_backward(
32 |             grad_output,
33 |             input,
34 |             rois,
35 |             argmax,
36 |             spatial_scale,
37 |             output_size[0],
38 |             output_size[1],
39 |             bs,
40 |             ch,
41 |             h,
42 |             w,
43 |         )
44 |         return grad_input, None, None, None
45 | 
46 | 
47 | roi_pool = _ROIPool.apply
48 | 
49 | 
50 | class ROIPool(nn.Module):
51 |     def __init__(self, output_size, spatial_scale):
52 |         super(ROIPool, self).__init__()
53 |         self.output_size = output_size
54 |         self.spatial_scale = spatial_scale
55 | 
56 |     @amp.float_function
57 |     def forward(self, input, rois):
58 |         return roi_pool(input, rois, self.output_size, self.spatial_scale)
59 | 
60 |     def __repr__(self):
61 |         tmpstr = self.__class__.__name__ + "("
62 |         tmpstr += "output_size=" + str(self.output_size)
63 |         tmpstr += ", spatial_scale=" + str(self.spatial_scale)
64 |         tmpstr += ")"
65 |         return tmpstr
66 | 


--------------------------------------------------------------------------------
/sg-benchmark/maskrcnn_benchmark/layers/sigmoid_focal_loss.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn
 3 | from torch.autograd import Function
 4 | from torch.autograd.function import once_differentiable
 5 | 
 6 | from maskrcnn_benchmark import _C
 7 | 
 8 | # TODO: Use JIT to replace CUDA implementation in the future.
 9 | class _SigmoidFocalLoss(Function):
10 |     @staticmethod
11 |     def forward(ctx, logits, targets, gamma, alpha):
12 |         ctx.save_for_backward(logits, targets)
13 |         num_classes = logits.shape[1]
14 |         ctx.num_classes = num_classes
15 |         ctx.gamma = gamma
16 |         ctx.alpha = alpha
17 | 
18 |         losses = _C.sigmoid_focalloss_forward(
19 |             logits, targets, num_classes, gamma, alpha
20 |         )
21 |         return losses
22 | 
23 |     @staticmethod
24 |     @once_differentiable
25 |     def backward(ctx, d_loss):
26 |         logits, targets = ctx.saved_tensors
27 |         num_classes = ctx.num_classes
28 |         gamma = ctx.gamma
29 |         alpha = ctx.alpha
30 |         d_loss = d_loss.contiguous()
31 |         d_logits = _C.sigmoid_focalloss_backward(
32 |             logits, targets, d_loss, num_classes, gamma, alpha
33 |         )
34 |         return d_logits, None, None, None, None
35 | 
36 | 
37 | sigmoid_focal_loss_cuda = _SigmoidFocalLoss.apply
38 | 
39 | 
40 | def sigmoid_focal_loss_cpu(logits, targets, gamma, alpha):
41 |     num_classes = logits.shape[1]
42 |     gamma = gamma[0]
43 |     alpha = alpha[0]
44 |     dtype = targets.dtype
45 |     device = targets.device
46 |     class_range = torch.arange(1, num_classes+1, dtype=dtype, device=device).unsqueeze(0)
47 | 
48 |     t = targets.unsqueeze(1)
49 |     p = torch.sigmoid(logits)
50 |     term1 = (1 - p) ** gamma * torch.log(p)
51 |     term2 = p ** gamma * torch.log(1 - p)
52 |     return -(t == class_range).float() * term1 * alpha - ((t != class_range) * (t >= 0)).float() * term2 * (1 - alpha)
53 | 
54 | 
55 | class SigmoidFocalLoss(nn.Module):
56 |     def __init__(self, gamma, alpha):
57 |         super(SigmoidFocalLoss, self).__init__()
58 |         self.gamma = gamma
59 |         self.alpha = alpha
60 | 
61 |     def forward(self, logits, targets):
62 |         device = logits.device
63 |         if logits.is_cuda:
64 |             loss_func = sigmoid_focal_loss_cuda
65 |         else:
66 |             loss_func = sigmoid_focal_loss_cpu
67 | 
68 |         loss = loss_func(logits, targets, self.gamma, self.alpha)
69 |         return loss.sum()
70 | 
71 |     def __repr__(self):
72 |         tmpstr = self.__class__.__name__ + "("
73 |         tmpstr += "gamma=" + str(self.gamma)
74 |         tmpstr += ", alpha=" + str(self.alpha)
75 |         tmpstr += ")"
76 |         return tmpstr
77 | 


--------------------------------------------------------------------------------
/sg-benchmark/maskrcnn_benchmark/layers/smooth_l1_loss.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import torch
 3 | 
 4 | 
 5 | # TODO maybe push this to nn?
 6 | def smooth_l1_loss(input, target, beta=1. / 9, size_average=True):
 7 |     """
 8 |     very similar to the smooth_l1_loss from pytorch, but with
 9 |     the extra beta parameter
10 |     """
11 |     n = torch.abs(input - target)
12 |     cond = n < beta
13 |     loss = torch.where(cond, 0.5 * n ** 2 / beta, n - 0.5 * beta)
14 |     if size_average:
15 |         return loss.mean()
16 |     return loss.sum()
17 | 
18 | 
19 | 


--------------------------------------------------------------------------------
/sg-benchmark/maskrcnn_benchmark/modeling/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CYVincent/Scene-Graph-Transformer-CogTree/955d76403d6b17e4b35cb09b4669d8b3990a94ee/sg-benchmark/maskrcnn_benchmark/modeling/__init__.py


--------------------------------------------------------------------------------
/sg-benchmark/maskrcnn_benchmark/modeling/backbone/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | from .backbone import build_backbone
3 | from . import fbnet
4 | 


--------------------------------------------------------------------------------
/sg-benchmark/maskrcnn_benchmark/modeling/backbone/backbone.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | from collections import OrderedDict
 3 | 
 4 | from torch import nn
 5 | 
 6 | from maskrcnn_benchmark.modeling import registry
 7 | from maskrcnn_benchmark.modeling.make_layers import conv_with_kaiming_uniform
 8 | from . import fpn as fpn_module
 9 | from . import resnet
10 | from . import vgg
11 | 
12 | 
13 | @registry.BACKBONES.register("VGG-16")
14 | def build_vgg_fpn_backbone(cfg):
15 |     body = vgg.VGG16(cfg)
16 |     out_channels = cfg.MODEL.VGG.VGG16_OUT_CHANNELS
17 |     model = nn.Sequential(OrderedDict([("body", body)]))
18 |     model.out_channels = out_channels
19 |     return model
20 | 
21 | 
22 | @registry.BACKBONES.register("R-50-C4")
23 | @registry.BACKBONES.register("R-50-C5")
24 | @registry.BACKBONES.register("R-101-C4")
25 | @registry.BACKBONES.register("R-101-C5")
26 | def build_resnet_backbone(cfg):
27 |     body = resnet.ResNet(cfg)
28 |     model = nn.Sequential(OrderedDict([("body", body)]))
29 |     model.out_channels = cfg.MODEL.RESNETS.BACKBONE_OUT_CHANNELS
30 |     return model
31 | 
32 | 
33 | @registry.BACKBONES.register("R-50-FPN")
34 | @registry.BACKBONES.register("R-101-FPN")
35 | @registry.BACKBONES.register("R-152-FPN")
36 | def build_resnet_fpn_backbone(cfg):
37 |     body = resnet.ResNet(cfg)
38 |     in_channels_stage2 = cfg.MODEL.RESNETS.RES2_OUT_CHANNELS
39 |     out_channels = cfg.MODEL.RESNETS.BACKBONE_OUT_CHANNELS
40 |     fpn = fpn_module.FPN(
41 |         in_channels_list=[
42 |             in_channels_stage2,
43 |             in_channels_stage2 * 2,
44 |             in_channels_stage2 * 4,
45 |             in_channels_stage2 * 8,
46 |         ],
47 |         out_channels=out_channels,
48 |         conv_block=conv_with_kaiming_uniform(
49 |             cfg.MODEL.FPN.USE_GN, cfg.MODEL.FPN.USE_RELU
50 |         ),
51 |         top_blocks=fpn_module.LastLevelMaxPool(),
52 |     )
53 |     model = nn.Sequential(OrderedDict([("body", body), ("fpn", fpn)]))
54 |     model.out_channels = out_channels
55 |     return model
56 | 
57 | 
58 | @registry.BACKBONES.register("R-50-FPN-RETINANET")
59 | @registry.BACKBONES.register("R-101-FPN-RETINANET")
60 | def build_resnet_fpn_p3p7_backbone(cfg):
61 |     body = resnet.ResNet(cfg)
62 |     in_channels_stage2 = cfg.MODEL.RESNETS.RES2_OUT_CHANNELS
63 |     out_channels = cfg.MODEL.RESNETS.BACKBONE_OUT_CHANNELS
64 |     in_channels_p6p7 = in_channels_stage2 * 8 if cfg.MODEL.RETINANET.USE_C5 \
65 |         else out_channels
66 |     fpn = fpn_module.FPN(
67 |         in_channels_list=[
68 |             0,
69 |             in_channels_stage2 * 2,
70 |             in_channels_stage2 * 4,
71 |             in_channels_stage2 * 8,
72 |         ],
73 |         out_channels=out_channels,
74 |         conv_block=conv_with_kaiming_uniform(
75 |             cfg.MODEL.FPN.USE_GN, cfg.MODEL.FPN.USE_RELU
76 |         ),
77 |         top_blocks=fpn_module.LastLevelP6P7(in_channels_p6p7, out_channels),
78 |     )
79 |     model = nn.Sequential(OrderedDict([("body", body), ("fpn", fpn)]))
80 |     model.out_channels = out_channels
81 |     return model
82 | 
83 | 
84 | def build_backbone(cfg):
85 |     assert cfg.MODEL.BACKBONE.CONV_BODY in registry.BACKBONES, \
86 |         "cfg.MODEL.BACKBONE.CONV_BODY: {} are not registered in registry".format(
87 |             cfg.MODEL.BACKBONE.CONV_BODY
88 |         )
89 |     return registry.BACKBONES[cfg.MODEL.BACKBONE.CONV_BODY](cfg)
90 | 


--------------------------------------------------------------------------------
/sg-benchmark/maskrcnn_benchmark/modeling/backbone/fpn.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
  2 | import torch
  3 | import torch.nn.functional as F
  4 | from torch import nn
  5 | 
  6 | 
  7 | class FPN(nn.Module):
  8 |     """
  9 |     Module that adds FPN on top of a list of feature maps.
 10 |     The feature maps are currently supposed to be in increasing depth
 11 |     order, and must be consecutive
 12 |     """
 13 | 
 14 |     def __init__(
 15 |         self, in_channels_list, out_channels, conv_block, top_blocks=None
 16 |     ):
 17 |         """
 18 |         Arguments:
 19 |             in_channels_list (list[int]): number of channels for each feature map that
 20 |                 will be fed
 21 |             out_channels (int): number of channels of the FPN representation
 22 |             top_blocks (nn.Module or None): if provided, an extra operation will
 23 |                 be performed on the output of the last (smallest resolution)
 24 |                 FPN output, and the result will extend the result list
 25 |         """
 26 |         super(FPN, self).__init__()
 27 |         self.inner_blocks = []
 28 |         self.layer_blocks = []
 29 |         for idx, in_channels in enumerate(in_channels_list, 1):
 30 |             inner_block = "fpn_inner{}".format(idx)
 31 |             layer_block = "fpn_layer{}".format(idx)
 32 | 
 33 |             if in_channels == 0:
 34 |                 continue
 35 |             inner_block_module = conv_block(in_channels, out_channels, 1)
 36 |             layer_block_module = conv_block(out_channels, out_channels, 3, 1)
 37 |             self.add_module(inner_block, inner_block_module)
 38 |             self.add_module(layer_block, layer_block_module)
 39 |             self.inner_blocks.append(inner_block)
 40 |             self.layer_blocks.append(layer_block)
 41 |         self.top_blocks = top_blocks
 42 | 
 43 |     def forward(self, x):
 44 |         """
 45 |         Arguments:
 46 |             x (list[Tensor]): feature maps for each feature level.
 47 |         Returns:
 48 |             results (tuple[Tensor]): feature maps after FPN layers.
 49 |                 They are ordered from highest resolution first.
 50 |         """
 51 |         last_inner = getattr(self, self.inner_blocks[-1])(x[-1])
 52 |         results = []
 53 |         results.append(getattr(self, self.layer_blocks[-1])(last_inner))
 54 |         for feature, inner_block, layer_block in zip(
 55 |             x[:-1][::-1], self.inner_blocks[:-1][::-1], self.layer_blocks[:-1][::-1]
 56 |         ):
 57 |             if not inner_block:
 58 |                 continue
 59 |             inner_top_down = F.interpolate(last_inner, scale_factor=2, mode="nearest")
 60 |             inner_lateral = getattr(self, inner_block)(feature)
 61 |             # TODO use size instead of scale to make it robust to different sizes
 62 |             # inner_top_down = F.upsample(last_inner, size=inner_lateral.shape[-2:],
 63 |             # mode='bilinear', align_corners=False)
 64 |             last_inner = inner_lateral + inner_top_down
 65 |             results.insert(0, getattr(self, layer_block)(last_inner))
 66 | 
 67 |         if isinstance(self.top_blocks, LastLevelP6P7):
 68 |             last_results = self.top_blocks(x[-1], results[-1])
 69 |             results.extend(last_results)
 70 |         elif isinstance(self.top_blocks, LastLevelMaxPool):
 71 |             last_results = self.top_blocks(results[-1])
 72 |             results.extend(last_results)
 73 | 
 74 |         return tuple(results)
 75 | 
 76 | 
 77 | class LastLevelMaxPool(nn.Module):
 78 |     def forward(self, x):
 79 |         return [F.max_pool2d(x, 1, 2, 0)]
 80 | 
 81 | 
 82 | class LastLevelP6P7(nn.Module):
 83 |     """
 84 |     This module is used in RetinaNet to generate extra layers, P6 and P7.
 85 |     """
 86 |     def __init__(self, in_channels, out_channels):
 87 |         super(LastLevelP6P7, self).__init__()
 88 |         self.p6 = nn.Conv2d(in_channels, out_channels, 3, 2, 1)
 89 |         self.p7 = nn.Conv2d(out_channels, out_channels, 3, 2, 1)
 90 |         for module in [self.p6, self.p7]:
 91 |             nn.init.kaiming_uniform_(module.weight, a=1)
 92 |             nn.init.constant_(module.bias, 0)
 93 |         self.use_P5 = in_channels == out_channels
 94 | 
 95 |     def forward(self, c5, p5):
 96 |         x = p5 if self.use_P5 else c5
 97 |         p6 = self.p6(x)
 98 |         p7 = self.p7(F.relu(p6))
 99 |         return [p6, p7]
100 | 


--------------------------------------------------------------------------------
/sg-benchmark/maskrcnn_benchmark/modeling/backbone/vgg.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | """
 3 | Variant of the resnet module that takes cfg as an argument.
 4 | Example usage. Strings may be specified in the config file.
 5 |     model = ResNet(
 6 |         "StemWithFixedBatchNorm",
 7 |         "BottleneckWithFixedBatchNorm",
 8 |         "ResNet50StagesTo4",
 9 |     )
10 | OR:
11 |     model = ResNet(
12 |         "StemWithGN",
13 |         "BottleneckWithGN",
14 |         "ResNet50StagesTo4",
15 |     )
16 | Custom implementations may be written in user code and hooked in via the
17 | `register_*` functions.
18 | """
19 | from collections import namedtuple
20 | 
21 | import torch
22 | import torch.nn.functional as F
23 | from torch import nn
24 | 
25 | import torchvision.models as models
26 | from maskrcnn_benchmark.layers import FrozenBatchNorm2d
27 | from maskrcnn_benchmark.layers import Conv2d
28 | from maskrcnn_benchmark.layers import DFConv2d
29 | from maskrcnn_benchmark.modeling.make_layers import group_norm
30 | from maskrcnn_benchmark.utils.registry import Registry
31 | 
32 | 
33 | class VGG16(nn.Module):
34 |     def __init__(self, cfg):
35 |         super(VGG16, self).__init__()
36 |         vgg = models.vgg16(pretrained=True)
37 |         self.conv_body = nn.Sequential(*list(vgg.features._modules.values())[:-1])
38 | 
39 |     def forward(self, x):
40 |         output = []
41 |         output.append(self.conv_body(x))
42 |         return output
43 | 
44 | 


--------------------------------------------------------------------------------
/sg-benchmark/maskrcnn_benchmark/modeling/balanced_positive_negative_sampler.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import torch
 3 | 
 4 | 
 5 | class BalancedPositiveNegativeSampler(object):
 6 |     """
 7 |     This class samples batches, ensuring that they contain a fixed proportion of positives
 8 |     """
 9 | 
10 |     def __init__(self, batch_size_per_image, positive_fraction):
11 |         """
12 |         Arguments:
13 |             batch_size_per_image (int): number of elements to be selected per image
14 |             positive_fraction (float): percentace of positive elements per batch
15 |         """
16 |         self.batch_size_per_image = batch_size_per_image
17 |         self.positive_fraction = positive_fraction
18 | 
19 |     def __call__(self, matched_idxs):
20 |         """
21 |         Arguments:
22 |             matched idxs: list of tensors containing -1, 0 or positive values.
23 |                 Each tensor corresponds to a specific image.
24 |                 -1 values are ignored, 0 are considered as negatives and > 0 as
25 |                 positives.
26 | 
27 |         Returns:
28 |             pos_idx (list[tensor])
29 |             neg_idx (list[tensor])
30 | 
31 |         Returns two lists of binary masks for each image.
32 |         The first list contains the positive elements that were selected,
33 |         and the second list the negative example.
34 |         """
35 |         pos_idx = []
36 |         neg_idx = []
37 |         for matched_idxs_per_image in matched_idxs:
38 |             positive = torch.nonzero(matched_idxs_per_image >= 1).squeeze(1)
39 |             negative = torch.nonzero(matched_idxs_per_image == 0).squeeze(1)
40 | 
41 |             num_pos = int(self.batch_size_per_image * self.positive_fraction)
42 |             # protect against not enough positive examples
43 |             num_pos = min(positive.numel(), num_pos)
44 |             num_neg = self.batch_size_per_image - num_pos
45 |             # protect against not enough negative examples
46 |             num_neg = min(negative.numel(), num_neg)
47 | 
48 |             # randomly select positive and negative examples
49 |             perm1 = torch.randperm(positive.numel(), device=positive.device)[:num_pos]
50 |             perm2 = torch.randperm(negative.numel(), device=negative.device)[:num_neg]
51 | 
52 |             pos_idx_per_image = positive[perm1]
53 |             neg_idx_per_image = negative[perm2]
54 | 
55 |             # create binary mask from indices
56 |             pos_idx_per_image_mask = torch.zeros_like(
57 |                 matched_idxs_per_image, dtype=torch.uint8
58 |             )
59 |             neg_idx_per_image_mask = torch.zeros_like(
60 |                 matched_idxs_per_image, dtype=torch.uint8
61 |             )
62 |             pos_idx_per_image_mask[pos_idx_per_image] = 1
63 |             neg_idx_per_image_mask[neg_idx_per_image] = 1
64 | 
65 |             pos_idx.append(pos_idx_per_image_mask)
66 |             neg_idx.append(neg_idx_per_image_mask)
67 | 
68 |         return pos_idx, neg_idx
69 | 


--------------------------------------------------------------------------------
/sg-benchmark/maskrcnn_benchmark/modeling/box_coder.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import math
 3 | 
 4 | import torch
 5 | 
 6 | 
 7 | class BoxCoder(object):
 8 |     """
 9 |     This class encodes and decodes a set of bounding boxes into
10 |     the representation used for training the regressors.
11 |     """
12 | 
13 |     def __init__(self, weights, bbox_xform_clip=math.log(1000. / 16)):
14 |         """
15 |         Arguments:
16 |             weights (4-element tuple)
17 |             bbox_xform_clip (float)
18 |         """
19 |         self.weights = weights
20 |         self.bbox_xform_clip = bbox_xform_clip
21 | 
22 |     def encode(self, reference_boxes, proposals):
23 |         """
24 |         Encode a set of proposals with respect to some
25 |         reference boxes
26 | 
27 |         Arguments:
28 |             reference_boxes (Tensor): reference boxes
29 |             proposals (Tensor): boxes to be encoded
30 |         """
31 | 
32 |         TO_REMOVE = 1  # TODO remove
33 |         ex_widths = proposals[:, 2] - proposals[:, 0] + TO_REMOVE
34 |         ex_heights = proposals[:, 3] - proposals[:, 1] + TO_REMOVE
35 |         ex_ctr_x = proposals[:, 0] + 0.5 * ex_widths
36 |         ex_ctr_y = proposals[:, 1] + 0.5 * ex_heights
37 | 
38 |         gt_widths = reference_boxes[:, 2] - reference_boxes[:, 0] + TO_REMOVE
39 |         gt_heights = reference_boxes[:, 3] - reference_boxes[:, 1] + TO_REMOVE
40 |         gt_ctr_x = reference_boxes[:, 0] + 0.5 * gt_widths
41 |         gt_ctr_y = reference_boxes[:, 1] + 0.5 * gt_heights
42 | 
43 |         wx, wy, ww, wh = self.weights
44 |         targets_dx = wx * (gt_ctr_x - ex_ctr_x) / ex_widths
45 |         targets_dy = wy * (gt_ctr_y - ex_ctr_y) / ex_heights
46 |         targets_dw = ww * torch.log(gt_widths / ex_widths)
47 |         targets_dh = wh * torch.log(gt_heights / ex_heights)
48 | 
49 |         targets = torch.stack((targets_dx, targets_dy, targets_dw, targets_dh), dim=1)
50 |         return targets
51 | 
52 |     def decode(self, rel_codes, boxes):
53 |         """
54 |         From a set of original boxes and encoded relative box offsets,
55 |         get the decoded boxes.
56 | 
57 |         Arguments:
58 |             rel_codes (Tensor): encoded boxes
59 |             boxes (Tensor): reference boxes.
60 |         """
61 | 
62 |         boxes = boxes.to(rel_codes.dtype)
63 | 
64 |         TO_REMOVE = 1  # TODO remove
65 |         widths = boxes[:, 2] - boxes[:, 0] + TO_REMOVE
66 |         heights = boxes[:, 3] - boxes[:, 1] + TO_REMOVE
67 |         ctr_x = boxes[:, 0] + 0.5 * widths
68 |         ctr_y = boxes[:, 1] + 0.5 * heights
69 | 
70 |         wx, wy, ww, wh = self.weights
71 |         dx = rel_codes[:, 0::4] / wx
72 |         dy = rel_codes[:, 1::4] / wy
73 |         dw = rel_codes[:, 2::4] / ww
74 |         dh = rel_codes[:, 3::4] / wh
75 | 
76 |         # Prevent sending too large values into torch.exp()
77 |         dw = torch.clamp(dw, max=self.bbox_xform_clip)
78 |         dh = torch.clamp(dh, max=self.bbox_xform_clip)
79 | 
80 |         pred_ctr_x = dx * widths[:, None] + ctr_x[:, None]
81 |         pred_ctr_y = dy * heights[:, None] + ctr_y[:, None]
82 |         pred_w = torch.exp(dw) * widths[:, None]
83 |         pred_h = torch.exp(dh) * heights[:, None]
84 | 
85 |         pred_boxes = torch.zeros_like(rel_codes)
86 |         # x1
87 |         pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * pred_w
88 |         # y1
89 |         pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * pred_h
90 |         # x2 (note: "- 1" is correct; don't be fooled by the asymmetry)
91 |         pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * pred_w - 1
92 |         # y2 (note: "- 1" is correct; don't be fooled by the asymmetry)
93 |         pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * pred_h - 1
94 | 
95 |         return pred_boxes
96 | 


--------------------------------------------------------------------------------
/sg-benchmark/maskrcnn_benchmark/modeling/detector/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | from .detectors import build_detection_model
3 | 


--------------------------------------------------------------------------------
/sg-benchmark/maskrcnn_benchmark/modeling/detector/detectors.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | from .generalized_rcnn import GeneralizedRCNN
 3 | 
 4 | 
 5 | _DETECTION_META_ARCHITECTURES = {"GeneralizedRCNN": GeneralizedRCNN}
 6 | 
 7 | 
 8 | def build_detection_model(cfg):
 9 |     meta_arch = _DETECTION_META_ARCHITECTURES[cfg.MODEL.META_ARCHITECTURE]
10 |     return meta_arch(cfg)
11 | 


--------------------------------------------------------------------------------
/sg-benchmark/maskrcnn_benchmark/modeling/detector/generalized_rcnn.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | """
 3 | Implements the Generalized R-CNN framework
 4 | """
 5 | 
 6 | import torch
 7 | from torch import nn
 8 | 
 9 | from maskrcnn_benchmark.structures.image_list import to_image_list
10 | 
11 | from ..backbone import build_backbone
12 | from ..rpn.rpn import build_rpn
13 | from ..roi_heads.roi_heads import build_roi_heads
14 | 
15 | 
16 | class GeneralizedRCNN(nn.Module):
17 |     """
18 |     Main class for Generalized R-CNN. Currently supports boxes and masks.
19 |     It consists of three main parts:
20 |     - backbone
21 |     - rpn
22 |     - heads: takes the features + the proposals from the RPN and computes
23 |         detections / masks from it.
24 |     """
25 | 
26 |     def __init__(self, cfg):
27 |         super(GeneralizedRCNN, self).__init__()
28 |         self.cfg = cfg.clone()
29 |         self.backbone = build_backbone(cfg)
30 |         self.rpn = build_rpn(cfg, self.backbone.out_channels)
31 |         self.roi_heads = build_roi_heads(cfg, self.backbone.out_channels)
32 | 
33 |     def forward(self, images, targets=None, logger=None):
34 |         """
35 |         Arguments:
36 |             images (list[Tensor] or ImageList): images to be processed
37 |             targets (list[BoxList]): ground-truth boxes present in the image (optional)
38 | 
39 |         Returns:
40 |             result (list[BoxList] or dict[Tensor]): the output from the model.
41 |                 During training, it returns a dict[Tensor] which contains the losses.
42 |                 During testing, it returns list[BoxList] contains additional fields
43 |                 like `scores`, `labels` and `mask` (for Mask R-CNN models).
44 | 
45 |         """
46 |         if self.training and targets is None:
47 |             raise ValueError("In training mode, targets should be passed")
48 |         images = to_image_list(images)
49 |         features = self.backbone(images.tensors)
50 |         proposals, proposal_losses = self.rpn(images, features, targets)
51 |         if self.roi_heads:
52 |             x, result, detector_losses = self.roi_heads(features, proposals, targets, logger)
53 |         else:
54 |             # RPN-only models don't have roi_heads
55 |             x = features
56 |             result = proposals
57 |             detector_losses = {}
58 | 
59 |         if self.training:
60 |             losses = {}
61 |             losses.update(detector_losses)
62 |             if not self.cfg.MODEL.RELATION_ON:
63 |                 # During the relationship training stage, the rpn_head should be fixed, and no loss. 
64 |                 losses.update(proposal_losses)
65 |             return losses
66 | 
67 |         return result
68 | 


--------------------------------------------------------------------------------
/sg-benchmark/maskrcnn_benchmark/modeling/make_layers.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
  2 | """
  3 | Miscellaneous utility functions
  4 | """
  5 | 
  6 | import torch
  7 | from torch import nn
  8 | from torch.nn import functional as F
  9 | from maskrcnn_benchmark.config import cfg
 10 | from maskrcnn_benchmark.layers import Conv2d
 11 | 
 12 | 
 13 | def get_group_gn(dim, dim_per_gp, num_groups):
 14 |     """get number of groups used by GroupNorm, based on number of channels."""
 15 |     assert dim_per_gp == -1 or num_groups == -1, \
 16 |         "GroupNorm: can only specify G or C/G."
 17 | 
 18 |     if dim_per_gp > 0:
 19 |         assert dim % dim_per_gp == 0, \
 20 |             "dim: {}, dim_per_gp: {}".format(dim, dim_per_gp)
 21 |         group_gn = dim // dim_per_gp
 22 |     else:
 23 |         assert dim % num_groups == 0, \
 24 |             "dim: {}, num_groups: {}".format(dim, num_groups)
 25 |         group_gn = num_groups
 26 | 
 27 |     return group_gn
 28 | 
 29 | 
 30 | def group_norm(out_channels, affine=True, divisor=1):
 31 |     out_channels = out_channels // divisor
 32 |     dim_per_gp = cfg.MODEL.GROUP_NORM.DIM_PER_GP // divisor
 33 |     num_groups = cfg.MODEL.GROUP_NORM.NUM_GROUPS // divisor
 34 |     eps = cfg.MODEL.GROUP_NORM.EPSILON # default: 1e-5
 35 |     return torch.nn.GroupNorm(
 36 |         get_group_gn(out_channels, dim_per_gp, num_groups),
 37 |         out_channels,
 38 |         eps,
 39 |         affine
 40 |     )
 41 | 
 42 | 
 43 | def make_conv3x3(
 44 |     in_channels,
 45 |     out_channels,
 46 |     dilation=1,
 47 |     stride=1,
 48 |     use_gn=False,
 49 |     use_relu=False,
 50 |     kaiming_init=True
 51 | ):
 52 |     conv = Conv2d(
 53 |         in_channels,
 54 |         out_channels,
 55 |         kernel_size=3,
 56 |         stride=stride,
 57 |         padding=dilation,
 58 |         dilation=dilation,
 59 |         bias=False if use_gn else True
 60 |     )
 61 |     if kaiming_init:
 62 |         nn.init.kaiming_normal_(
 63 |             conv.weight, mode="fan_out", nonlinearity="relu"
 64 |         )
 65 |     else:
 66 |         torch.nn.init.normal_(conv.weight, std=0.01)
 67 |     if not use_gn:
 68 |         nn.init.constant_(conv.bias, 0)
 69 |     module = [conv,]
 70 |     if use_gn:
 71 |         module.append(group_norm(out_channels))
 72 |     if use_relu:
 73 |         module.append(nn.ReLU(inplace=True))
 74 |     if len(module) > 1:
 75 |         return nn.Sequential(*module)
 76 |     return conv
 77 | 
 78 | 
 79 | def make_fc(dim_in, hidden_dim, use_gn=False):
 80 |     '''
 81 |         Caffe2 implementation uses XavierFill, which in fact
 82 |         corresponds to kaiming_uniform_ in PyTorch
 83 |     '''
 84 |     if use_gn:
 85 |         fc = nn.Linear(dim_in, hidden_dim, bias=False)
 86 |         nn.init.kaiming_uniform_(fc.weight, a=1)
 87 |         return nn.Sequential(fc, group_norm(hidden_dim))
 88 |     fc = nn.Linear(dim_in, hidden_dim)
 89 |     nn.init.kaiming_uniform_(fc.weight, a=1)
 90 |     nn.init.constant_(fc.bias, 0)
 91 |     return fc
 92 | 
 93 | 
 94 | def conv_with_kaiming_uniform(use_gn=False, use_relu=False):
 95 |     def make_conv(
 96 |         in_channels, out_channels, kernel_size, stride=1, dilation=1
 97 |     ):
 98 |         conv = Conv2d(
 99 |             in_channels,
100 |             out_channels,
101 |             kernel_size=kernel_size,
102 |             stride=stride,
103 |             padding=dilation * (kernel_size - 1) // 2,
104 |             dilation=dilation,
105 |             bias=False if use_gn else True
106 |         )
107 |         # Caffe2 implementation uses XavierFill, which in fact
108 |         # corresponds to kaiming_uniform_ in PyTorch
109 |         nn.init.kaiming_uniform_(conv.weight, a=1)
110 |         if not use_gn:
111 |             nn.init.constant_(conv.bias, 0)
112 |         module = [conv,]
113 |         if use_gn:
114 |             module.append(group_norm(out_channels))
115 |         if use_relu:
116 |             module.append(nn.ReLU(inplace=True))
117 |         if len(module) > 1:
118 |             return nn.Sequential(*module)
119 |         return conv
120 | 
121 |     return make_conv
122 | 


--------------------------------------------------------------------------------
/sg-benchmark/maskrcnn_benchmark/modeling/registry.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | 
 3 | from maskrcnn_benchmark.utils.registry import Registry
 4 | 
 5 | BACKBONES = Registry()
 6 | RPN_HEADS = Registry()
 7 | ROI_BOX_FEATURE_EXTRACTORS = Registry()
 8 | ROI_BOX_PREDICTOR = Registry()
 9 | ROI_ATTRIBUTE_FEATURE_EXTRACTORS = Registry()
10 | ROI_ATTRIBUTE_PREDICTOR = Registry()
11 | ROI_KEYPOINT_FEATURE_EXTRACTORS = Registry()
12 | ROI_KEYPOINT_PREDICTOR = Registry()
13 | ROI_MASK_FEATURE_EXTRACTORS = Registry()
14 | ROI_MASK_PREDICTOR = Registry()
15 | ROI_RELATION_FEATURE_EXTRACTORS = Registry()
16 | ROI_RELATION_PREDICTOR = Registry()
17 | 


--------------------------------------------------------------------------------
/sg-benchmark/maskrcnn_benchmark/modeling/roi_heads/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CYVincent/Scene-Graph-Transformer-CogTree/955d76403d6b17e4b35cb09b4669d8b3990a94ee/sg-benchmark/maskrcnn_benchmark/modeling/roi_heads/__init__.py


--------------------------------------------------------------------------------
/sg-benchmark/maskrcnn_benchmark/modeling/roi_heads/attribute_head/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CYVincent/Scene-Graph-Transformer-CogTree/955d76403d6b17e4b35cb09b4669d8b3990a94ee/sg-benchmark/maskrcnn_benchmark/modeling/roi_heads/attribute_head/__init__.py


--------------------------------------------------------------------------------
/sg-benchmark/maskrcnn_benchmark/modeling/roi_heads/attribute_head/attribute_head.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import torch
 3 | from torch import nn
 4 | 
 5 | from .roi_attribute_feature_extractors import make_roi_attribute_feature_extractor
 6 | from .roi_attribute_predictors import make_roi_attribute_predictor
 7 | from .loss import make_roi_attribute_loss_evaluator
 8 | 
 9 | def add_attribute_logits(proposals, attri_logits):
10 |     slice_idxs = [0]
11 |     for i in range(len(proposals)):
12 |         slice_idxs.append(len(proposals[i])+slice_idxs[-1])
13 |         proposals[i].add_field("attribute_logits", attri_logits[slice_idxs[i]:slice_idxs[i+1]])
14 |     return proposals
15 | 
16 | class ROIAttributeHead(torch.nn.Module):
17 |     """
18 |     Generic ATTRIBUTE Head class.
19 |     """
20 | 
21 |     def __init__(self, cfg, in_channels):
22 |         super(ROIAttributeHead, self).__init__()
23 |         self.cfg = cfg.clone()
24 |         self.feature_extractor = make_roi_attribute_feature_extractor(cfg, in_channels, half_out=self.cfg.MODEL.ATTRIBUTE_ON)
25 |         self.predictor = make_roi_attribute_predictor(cfg, self.feature_extractor.out_channels)
26 |         self.loss_evaluator = make_roi_attribute_loss_evaluator(cfg)
27 | 
28 |     def forward(self, features, proposals, targets=None):
29 |         """
30 |         features:  extracted from box_head
31 |         """
32 |         # Attribute head is fixed when we train the relation head
33 |         if self.cfg.MODEL.RELATION_ON:
34 |             if self.cfg.MODEL.ROI_RELATION_HEAD.USE_GT_BOX and self.cfg.MODEL.ROI_RELATION_HEAD.USE_GT_OBJECT_LABEL:
35 |                 # mode==predcls
36 |                 # no need to predict attribute, get grond truth
37 |                 x = self.feature_extractor(features, proposals)
38 |                 return x, proposals, {}
39 |             # mode==sgcls  or sgdet
40 |             else:
41 |                 x = self.feature_extractor(features, proposals)
42 |                 attri_logits = self.predictor(x)
43 |                 assert sum([len(p) for p in proposals]) == attri_logits.shape[0]
44 |                 proposals = add_attribute_logits(proposals, attri_logits)
45 |                 return x, proposals, {}
46 |             
47 |         # Train/Test the attribute head
48 |         x = self.feature_extractor(features, proposals)
49 |         attri_logits = self.predictor(x)
50 |         assert sum([len(p) for p in proposals]) == attri_logits.shape[0]
51 |         proposals = add_attribute_logits(proposals, attri_logits)
52 |         
53 |         if not self.training:
54 |             return x, proposals, {}
55 | 
56 |         # proposals need to contain the attributes fields
57 |         loss_attribute = self.loss_evaluator(proposals, attri_logits)
58 |         return x, proposals, dict(loss_attribute=loss_attribute)
59 | 
60 | def build_roi_attribute_head(cfg, in_channels):
61 |     """
62 |     Constructs a new attribute head.
63 |     By default, uses ROIAttributeHead, but if it turns out not to be enough, just register a new class
64 |     and make it a parameter in the config
65 |     """
66 |     return ROIAttributeHead(cfg, in_channels)
67 | 


--------------------------------------------------------------------------------
/sg-benchmark/maskrcnn_benchmark/modeling/roi_heads/attribute_head/loss.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
  2 | import torch
  3 | from torch.nn import functional as F
  4 | 
  5 | from maskrcnn_benchmark.layers import smooth_l1_loss
  6 | from maskrcnn_benchmark.modeling.box_coder import BoxCoder
  7 | from maskrcnn_benchmark.modeling.matcher import Matcher
  8 | from maskrcnn_benchmark.structures.boxlist_ops import boxlist_iou
  9 | from maskrcnn_benchmark.modeling.utils import cat
 10 | 
 11 | 
 12 | class AttributeHeadLossComputation(object):
 13 |     """
 14 |     Computes the loss for attribute head
 15 |     """
 16 | 
 17 |     def __init__(
 18 |         self,
 19 |         loss_weight=0.1,
 20 |         num_attri_cat=201,
 21 |         max_num_attri=10,
 22 |         attribute_sampling=True,
 23 |         attribute_bgfg_ratio=5,
 24 |         use_binary_loss=True,
 25 |         pos_weight=1,
 26 |     ):
 27 |         self.loss_weight = loss_weight
 28 |         self.num_attri_cat = num_attri_cat
 29 |         self.max_num_attri = max_num_attri
 30 |         self.attribute_sampling = attribute_sampling
 31 |         self.attribute_bgfg_ratio = attribute_bgfg_ratio
 32 |         self.use_binary_loss = use_binary_loss
 33 |         self.pos_weight = pos_weight
 34 | 
 35 |     def __call__(self, proposals, attri_logits):
 36 |         """
 37 |         Calculcate attribute loss
 38 |         """
 39 |         attributes = cat([proposal.get_field("attributes") for proposal in proposals], dim=0)
 40 |         assert attributes.shape[0] == attri_logits.shape[0]
 41 | 
 42 |         # generate attribute targets
 43 |         attribute_targets, selected_idxs = self.generate_attributes_target(attributes)
 44 | 
 45 |         attri_logits = attri_logits[selected_idxs]
 46 |         attribute_targets = attribute_targets[selected_idxs]
 47 | 
 48 |         attribute_loss = self.attribute_loss(attri_logits, attribute_targets)
 49 | 
 50 |         return attribute_loss * self.loss_weight
 51 | 
 52 |     
 53 |     def generate_attributes_target(self, attributes):
 54 |         """
 55 |         from list of attribute indexs to [1,0,1,0,0,1] form
 56 |         """
 57 |         assert self.max_num_attri == attributes.shape[1]
 58 |         num_obj = attributes.shape[0]
 59 | 
 60 |         with_attri_idx = (attributes.sum(-1) > 0).long()
 61 |         without_attri_idx = 1 - with_attri_idx
 62 |         num_pos = int(with_attri_idx.sum())
 63 |         num_neg = int(without_attri_idx.sum())
 64 |         assert num_pos + num_neg == num_obj
 65 |         
 66 |         if self.attribute_sampling:
 67 |             num_neg = min(num_neg, num_pos * self.attribute_bgfg_ratio) if num_pos > 0 else 1
 68 | 
 69 |         attribute_targets = torch.zeros((num_obj, self.num_attri_cat), device=attributes.device).float()
 70 |         if not self.use_binary_loss:
 71 |             attribute_targets[without_attri_idx > 0, 0] = 1.0
 72 | 
 73 |         pos_idxs = torch.nonzero(with_attri_idx).squeeze(1)
 74 |         perm = torch.randperm(num_obj - num_pos, device=attributes.device)[:num_neg]
 75 |         neg_idxs = torch.nonzero(without_attri_idx).squeeze(1)[perm]
 76 |         selected_idxs = torch.cat((pos_idxs, neg_idxs), dim=0)
 77 |         assert selected_idxs.shape[0] == num_neg + num_pos
 78 | 
 79 |         for idx in torch.nonzero(with_attri_idx).squeeze(1).tolist():
 80 |             for k in range(self.max_num_attri):
 81 |                 att_id = int(attributes[idx, k])
 82 |                 if att_id == 0:
 83 |                     break
 84 |                 else:
 85 |                     attribute_targets[idx, att_id] = 1
 86 | 
 87 |         return attribute_targets, selected_idxs
 88 | 
 89 |     def attribute_loss(self, logits, labels):
 90 |         if self.use_binary_loss:
 91 |             all_loss = F.binary_cross_entropy_with_logits(logits, labels, pos_weight=torch.FloatTensor([self.pos_weight] * self.num_attri_cat).cuda())
 92 |             return all_loss 
 93 |         else:
 94 |             # soft cross entropy
 95 |             # cross entropy attribute deteriorate the box head, even with 0.1 weight (although buttom-up top-down use cross entropy attribute)
 96 |             all_loss = -F.softmax(logits, dim=-1).log()
 97 |             all_loss = (all_loss * labels).sum(-1) / labels.sum(-1)
 98 |             return all_loss.mean()
 99 | 
100 | 
101 | def make_roi_attribute_loss_evaluator(cfg):
102 |     loss_evaluator = AttributeHeadLossComputation(
103 |         cfg.MODEL.ROI_ATTRIBUTE_HEAD.ATTRIBUTE_LOSS_WEIGHT,
104 |         cfg.MODEL.ROI_ATTRIBUTE_HEAD.NUM_ATTRIBUTES,
105 |         cfg.MODEL.ROI_ATTRIBUTE_HEAD.MAX_ATTRIBUTES,
106 |         cfg.MODEL.ROI_ATTRIBUTE_HEAD.ATTRIBUTE_BGFG_SAMPLE,
107 |         cfg.MODEL.ROI_ATTRIBUTE_HEAD.ATTRIBUTE_BGFG_RATIO,
108 |         cfg.MODEL.ROI_ATTRIBUTE_HEAD.USE_BINARY_LOSS,
109 |         cfg.MODEL.ROI_ATTRIBUTE_HEAD.POS_WEIGHT,
110 |     )
111 | 
112 |     return loss_evaluator
113 | 


--------------------------------------------------------------------------------
/sg-benchmark/maskrcnn_benchmark/modeling/roi_heads/attribute_head/roi_attribute_predictors.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | from maskrcnn_benchmark.modeling import registry
 3 | from torch import nn
 4 | 
 5 | 
 6 | @registry.ROI_ATTRIBUTE_PREDICTOR.register("FastRCNNPredictor")
 7 | class FastRCNNPredictor(nn.Module):
 8 |     def __init__(self, config, in_channels):
 9 |         super(FastRCNNPredictor, self).__init__()
10 |         assert in_channels is not None
11 |         num_inputs = in_channels
12 | 
13 |         num_attributes = cfg.MODEL.ROI_ATTRIBUTE_HEAD.NUM_ATTRIBUTES
14 |         self.avgpool = nn.AdaptiveAvgPool2d(1)
15 |         self.att_score = nn.Linear(num_inputs, num_attributes)
16 | 
17 |         nn.init.normal_(self.att_score.weight, mean=0, std=0.01)
18 |         nn.init.constant_(self.att_score.bias, 0)
19 | 
20 |     def forward(self, x):
21 |         x = self.avgpool(x)
22 |         x = x.view(x.size(0), -1)
23 |         att_logit = self.att_score(x)
24 | 
25 |         return att_logit
26 | 
27 | 
28 | @registry.ROI_ATTRIBUTE_PREDICTOR.register("FPNPredictor")
29 | class FPNPredictor(nn.Module):
30 |     def __init__(self, cfg, in_channels):
31 |         super(FPNPredictor, self).__init__()
32 |         num_attributes = cfg.MODEL.ROI_ATTRIBUTE_HEAD.NUM_ATTRIBUTES
33 |         representation_size = in_channels
34 | 
35 |         self.att_score = nn.Linear(representation_size, num_attributes)
36 | 
37 |         nn.init.normal_(self.att_score.weight, std=0.01)
38 |         nn.init.constant_(self.att_score.bias, 0)
39 | 
40 |     def forward(self, x):
41 |         if x.ndimension() == 4:
42 |             assert list(x.shape[2:]) == [1, 1]
43 |             x = x.view(x.size(0), -1)
44 | 
45 |         att_logit = self.att_score(x)
46 | 
47 |         return att_logit
48 | 
49 | 
50 | def make_roi_attribute_predictor(cfg, in_channels):
51 |     func = registry.ROI_ATTRIBUTE_PREDICTOR[cfg.MODEL.ROI_ATTRIBUTE_HEAD.PREDICTOR]
52 |     return func(cfg, in_channels)
53 | 


--------------------------------------------------------------------------------
/sg-benchmark/maskrcnn_benchmark/modeling/roi_heads/box_head/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CYVincent/Scene-Graph-Transformer-CogTree/955d76403d6b17e4b35cb09b4669d8b3990a94ee/sg-benchmark/maskrcnn_benchmark/modeling/roi_heads/box_head/__init__.py


--------------------------------------------------------------------------------
/sg-benchmark/maskrcnn_benchmark/modeling/roi_heads/box_head/loss.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import torch
 3 | from torch.nn import functional as F
 4 | 
 5 | from maskrcnn_benchmark.layers import smooth_l1_loss
 6 | from maskrcnn_benchmark.modeling.box_coder import BoxCoder
 7 | from maskrcnn_benchmark.modeling.matcher import Matcher
 8 | from maskrcnn_benchmark.structures.boxlist_ops import boxlist_iou
 9 | from maskrcnn_benchmark.modeling.balanced_positive_negative_sampler import (
10 |     BalancedPositiveNegativeSampler
11 | )
12 | from maskrcnn_benchmark.modeling.utils import cat
13 | 
14 | 
15 | class FastRCNNLossComputation(object):
16 |     """
17 |     Computes the loss for Faster R-CNN.
18 |     Also supports FPN
19 |     """
20 | 
21 |     def __init__(self, cls_agnostic_bbox_reg=False):
22 |         self.cls_agnostic_bbox_reg = cls_agnostic_bbox_reg
23 | 
24 |     def assign_label_to_proposals(self, proposals, targets):
25 |         for img_idx, (target, proposal) in enumerate(zip(targets, proposals)):
26 |             match_quality_matrix = boxlist_iou(target, proposal)
27 |             matched_idxs = self.proposal_matcher(match_quality_matrix)
28 |             # Fast RCNN only need "labels" field for selecting the targets
29 |             target = target.copy_with_fields(["labels", "attributes"])
30 |             matched_targets = target[matched_idxs.clamp(min=0)]
31 |             
32 |             labels_per_image = matched_targets.get_field("labels").to(dtype=torch.int64)
33 |             attris_per_image = matched_targets.get_field("attributes").to(dtype=torch.int64)
34 | 
35 |             labels_per_image[matched_idxs < 0] = 0
36 |             attris_per_image[matched_idxs < 0, :] = 0
37 |             proposals[img_idx].add_field("labels", labels_per_image)
38 |             proposals[img_idx].add_field("attributes", attris_per_image)
39 |         return proposals
40 | 
41 | 
42 |     def __call__(self, class_logits, box_regression, proposals):
43 |         """
44 |         Computes the loss for Faster R-CNN.
45 |         This requires that the subsample method has been called beforehand.
46 | 
47 |         Arguments:
48 |             class_logits (list[Tensor])
49 |             box_regression (list[Tensor])
50 |             proposals (list[BoxList])
51 | 
52 |         Returns:
53 |             classification_loss (Tensor)
54 |             box_loss (Tensor)
55 |         """
56 | 
57 |         class_logits = cat(class_logits, dim=0)
58 |         box_regression = cat(box_regression, dim=0)
59 |         device = class_logits.device
60 | 
61 |         labels = cat([proposal.get_field("labels") for proposal in proposals], dim=0)
62 |         regression_targets = cat([proposal.get_field("regression_targets") for proposal in proposals], dim=0)
63 | 
64 |         classification_loss = F.cross_entropy(class_logits, labels.long())
65 | 
66 |         # get indices that correspond to the regression targets for
67 |         # the corresponding ground truth labels, to be used with
68 |         # advanced indexing
69 |         sampled_pos_inds_subset = torch.nonzero(labels > 0).squeeze(1)
70 |         labels_pos = labels[sampled_pos_inds_subset]
71 |         if self.cls_agnostic_bbox_reg:
72 |             map_inds = torch.tensor([4, 5, 6, 7], device=device)
73 |         else:
74 |             map_inds = 4 * labels_pos[:, None] + torch.tensor([0, 1, 2, 3], device=device)
75 | 
76 |         box_loss = smooth_l1_loss(
77 |             box_regression[sampled_pos_inds_subset[:, None], map_inds],
78 |             regression_targets[sampled_pos_inds_subset],
79 |             size_average=False,
80 |             beta=1,
81 |         )
82 |         box_loss = box_loss / labels.numel()
83 | 
84 |         return classification_loss, box_loss
85 | 
86 | 
87 | def make_roi_box_loss_evaluator(cfg):
88 |     cls_agnostic_bbox_reg = cfg.MODEL.CLS_AGNOSTIC_BBOX_REG
89 | 
90 |     loss_evaluator = FastRCNNLossComputation(cls_agnostic_bbox_reg)
91 | 
92 |     return loss_evaluator
93 | 


--------------------------------------------------------------------------------
/sg-benchmark/maskrcnn_benchmark/modeling/roi_heads/box_head/roi_box_predictors.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | from maskrcnn_benchmark.modeling import registry
 3 | from torch import nn
 4 | 
 5 | 
 6 | @registry.ROI_BOX_PREDICTOR.register("FastRCNNPredictor")
 7 | class FastRCNNPredictor(nn.Module):
 8 |     def __init__(self, config, in_channels):
 9 |         super(FastRCNNPredictor, self).__init__()
10 |         assert in_channels is not None
11 |         num_inputs = in_channels
12 | 
13 |         num_classes = config.MODEL.ROI_BOX_HEAD.NUM_CLASSES
14 |         self.avgpool = nn.AdaptiveAvgPool2d(1)
15 |         self.cls_score = nn.Linear(num_inputs, num_classes)
16 |         num_bbox_reg_classes = 2 if config.MODEL.CLS_AGNOSTIC_BBOX_REG else num_classes
17 |         self.bbox_pred = nn.Linear(num_inputs, num_bbox_reg_classes * 4)
18 | 
19 |         nn.init.normal_(self.cls_score.weight, mean=0, std=0.01)
20 |         nn.init.constant_(self.cls_score.bias, 0)
21 | 
22 |         nn.init.normal_(self.bbox_pred.weight, mean=0, std=0.001)
23 |         nn.init.constant_(self.bbox_pred.bias, 0)
24 | 
25 |     def forward(self, x):
26 |         x = self.avgpool(x)
27 |         x = x.view(x.size(0), -1)
28 |         cls_logit = self.cls_score(x)
29 |         bbox_pred = self.bbox_pred(x)
30 |         return cls_logit, bbox_pred
31 | 
32 | 
33 | @registry.ROI_BOX_PREDICTOR.register("FPNPredictor")
34 | class FPNPredictor(nn.Module):
35 |     def __init__(self, cfg, in_channels):
36 |         super(FPNPredictor, self).__init__()
37 |         num_classes = cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES
38 |         representation_size = in_channels
39 | 
40 |         self.cls_score = nn.Linear(representation_size, num_classes)
41 |         num_bbox_reg_classes = 2 if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG else num_classes
42 |         self.bbox_pred = nn.Linear(representation_size, num_bbox_reg_classes * 4)
43 | 
44 |         nn.init.normal_(self.cls_score.weight, std=0.01)
45 |         nn.init.normal_(self.bbox_pred.weight, std=0.001)
46 |         for l in [self.cls_score, self.bbox_pred]:
47 |             nn.init.constant_(l.bias, 0)
48 | 
49 |     def forward(self, x):
50 |         if x.ndimension() == 4:
51 |             assert list(x.shape[2:]) == [1, 1]
52 |             x = x.view(x.size(0), -1)
53 |         cls_logit = self.cls_score(x)
54 |         bbox_pred = self.bbox_pred(x)
55 | 
56 |         return cls_logit, bbox_pred
57 | 
58 | 
59 | def make_roi_box_predictor(cfg, in_channels):
60 |     func = registry.ROI_BOX_PREDICTOR[cfg.MODEL.ROI_BOX_HEAD.PREDICTOR]
61 |     return func(cfg, in_channels)
62 | 


--------------------------------------------------------------------------------
/sg-benchmark/maskrcnn_benchmark/modeling/roi_heads/keypoint_head/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CYVincent/Scene-Graph-Transformer-CogTree/955d76403d6b17e4b35cb09b4669d8b3990a94ee/sg-benchmark/maskrcnn_benchmark/modeling/roi_heads/keypoint_head/__init__.py


--------------------------------------------------------------------------------
/sg-benchmark/maskrcnn_benchmark/modeling/roi_heads/keypoint_head/keypoint_head.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from .roi_keypoint_feature_extractors import make_roi_keypoint_feature_extractor
 4 | from .roi_keypoint_predictors import make_roi_keypoint_predictor
 5 | from .inference import make_roi_keypoint_post_processor
 6 | from .loss import make_roi_keypoint_loss_evaluator
 7 | 
 8 | 
 9 | class ROIKeypointHead(torch.nn.Module):
10 |     def __init__(self, cfg, in_channels):
11 |         super(ROIKeypointHead, self).__init__()
12 |         self.cfg = cfg.clone()
13 |         self.feature_extractor = make_roi_keypoint_feature_extractor(cfg, in_channels)
14 |         self.predictor = make_roi_keypoint_predictor(
15 |             cfg, self.feature_extractor.out_channels)
16 |         self.post_processor = make_roi_keypoint_post_processor(cfg)
17 |         self.loss_evaluator = make_roi_keypoint_loss_evaluator(cfg)
18 | 
19 |     def forward(self, features, proposals, targets=None):
20 |         """
21 |         Arguments:
22 |             features (list[Tensor]): feature-maps from possibly several levels
23 |             proposals (list[BoxList]): proposal boxes
24 |             targets (list[BoxList], optional): the ground-truth targets.
25 | 
26 |         Returns:
27 |             x (Tensor): the result of the feature extractor
28 |             proposals (list[BoxList]): during training, the original proposals
29 |                 are returned. During testing, the predicted boxlists are returned
30 |                 with the `mask` field set
31 |             losses (dict[Tensor]): During training, returns the losses for the
32 |                 head. During testing, returns an empty dict.
33 |         """
34 |         if self.training:
35 |             with torch.no_grad():
36 |                 proposals = self.loss_evaluator.subsample(proposals, targets)
37 | 
38 |         x = self.feature_extractor(features, proposals)
39 |         kp_logits = self.predictor(x)
40 | 
41 |         if not self.training:
42 |             result = self.post_processor(kp_logits, proposals)
43 |             return x, result, {}
44 | 
45 |         loss_kp = self.loss_evaluator(proposals, kp_logits)
46 | 
47 |         return x, proposals, dict(loss_kp=loss_kp)
48 | 
49 | 
50 | def build_roi_keypoint_head(cfg, in_channels):
51 |     return ROIKeypointHead(cfg, in_channels)
52 | 


--------------------------------------------------------------------------------
/sg-benchmark/maskrcnn_benchmark/modeling/roi_heads/keypoint_head/roi_keypoint_feature_extractors.py:
--------------------------------------------------------------------------------
 1 | from torch import nn
 2 | from torch.nn import functional as F
 3 | 
 4 | from maskrcnn_benchmark.modeling import registry
 5 | from maskrcnn_benchmark.modeling.poolers import Pooler
 6 | 
 7 | from maskrcnn_benchmark.layers import Conv2d
 8 | 
 9 | 
10 | @registry.ROI_KEYPOINT_FEATURE_EXTRACTORS.register("KeypointRCNNFeatureExtractor")
11 | class KeypointRCNNFeatureExtractor(nn.Module):
12 |     def __init__(self, cfg, in_channels):
13 |         super(KeypointRCNNFeatureExtractor, self).__init__()
14 | 
15 |         resolution = cfg.MODEL.ROI_KEYPOINT_HEAD.POOLER_RESOLUTION
16 |         scales = cfg.MODEL.ROI_KEYPOINT_HEAD.POOLER_SCALES
17 |         sampling_ratio = cfg.MODEL.ROI_KEYPOINT_HEAD.POOLER_SAMPLING_RATIO
18 |         pooler = Pooler(
19 |             output_size=(resolution, resolution),
20 |             scales=scales,
21 |             sampling_ratio=sampling_ratio,
22 |         )
23 |         self.pooler = pooler
24 | 
25 |         input_features = in_channels
26 |         layers = cfg.MODEL.ROI_KEYPOINT_HEAD.CONV_LAYERS
27 |         next_feature = input_features
28 |         self.blocks = []
29 |         for layer_idx, layer_features in enumerate(layers, 1):
30 |             layer_name = "conv_fcn{}".format(layer_idx)
31 |             module = Conv2d(next_feature, layer_features, 3, stride=1, padding=1)
32 |             nn.init.kaiming_normal_(module.weight, mode="fan_out", nonlinearity="relu")
33 |             nn.init.constant_(module.bias, 0)
34 |             self.add_module(layer_name, module)
35 |             next_feature = layer_features
36 |             self.blocks.append(layer_name)
37 |         self.out_channels = layer_features
38 | 
39 |     def forward(self, x, proposals):
40 |         x = self.pooler(x, proposals)
41 |         for layer_name in self.blocks:
42 |             x = F.relu(getattr(self, layer_name)(x))
43 |         return x
44 | 
45 | 
46 | def make_roi_keypoint_feature_extractor(cfg, in_channels):
47 |     func = registry.ROI_KEYPOINT_FEATURE_EXTRACTORS[
48 |         cfg.MODEL.ROI_KEYPOINT_HEAD.FEATURE_EXTRACTOR
49 |     ]
50 |     return func(cfg, in_channels)
51 | 


--------------------------------------------------------------------------------
/sg-benchmark/maskrcnn_benchmark/modeling/roi_heads/keypoint_head/roi_keypoint_predictors.py:
--------------------------------------------------------------------------------
 1 | from torch import nn
 2 | 
 3 | from maskrcnn_benchmark import layers
 4 | from maskrcnn_benchmark.modeling import registry
 5 | 
 6 | 
 7 | @registry.ROI_KEYPOINT_PREDICTOR.register("KeypointRCNNPredictor")
 8 | class KeypointRCNNPredictor(nn.Module):
 9 |     def __init__(self, cfg, in_channels):
10 |         super(KeypointRCNNPredictor, self).__init__()
11 |         input_features = in_channels
12 |         num_keypoints = cfg.MODEL.ROI_KEYPOINT_HEAD.NUM_CLASSES
13 |         deconv_kernel = 4
14 |         self.kps_score_lowres = layers.ConvTranspose2d(
15 |             input_features,
16 |             num_keypoints,
17 |             deconv_kernel,
18 |             stride=2,
19 |             padding=deconv_kernel // 2 - 1,
20 |         )
21 |         nn.init.kaiming_normal_(
22 |             self.kps_score_lowres.weight, mode="fan_out", nonlinearity="relu"
23 |         )
24 |         nn.init.constant_(self.kps_score_lowres.bias, 0)
25 |         self.up_scale = 2
26 |         self.out_channels = num_keypoints
27 | 
28 |     def forward(self, x):
29 |         x = self.kps_score_lowres(x)
30 |         x = layers.interpolate(
31 |             x, scale_factor=self.up_scale, mode="bilinear", align_corners=False
32 |         )
33 |         return x
34 | 
35 | 
36 | def make_roi_keypoint_predictor(cfg, in_channels):
37 |     func = registry.ROI_KEYPOINT_PREDICTOR[cfg.MODEL.ROI_KEYPOINT_HEAD.PREDICTOR]
38 |     return func(cfg, in_channels)
39 | 


--------------------------------------------------------------------------------
/sg-benchmark/maskrcnn_benchmark/modeling/roi_heads/mask_head/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CYVincent/Scene-Graph-Transformer-CogTree/955d76403d6b17e4b35cb09b4669d8b3990a94ee/sg-benchmark/maskrcnn_benchmark/modeling/roi_heads/mask_head/__init__.py


--------------------------------------------------------------------------------
/sg-benchmark/maskrcnn_benchmark/modeling/roi_heads/mask_head/mask_head.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import torch
 3 | from torch import nn
 4 | 
 5 | from maskrcnn_benchmark.structures.bounding_box import BoxList
 6 | 
 7 | from .roi_mask_feature_extractors import make_roi_mask_feature_extractor
 8 | from .roi_mask_predictors import make_roi_mask_predictor
 9 | from .inference import make_roi_mask_post_processor
10 | from .loss import make_roi_mask_loss_evaluator
11 | 
12 | 
13 | def keep_only_positive_boxes(boxes):
14 |     """
15 |     Given a set of BoxList containing the `labels` field,
16 |     return a set of BoxList for which `labels > 0`.
17 | 
18 |     Arguments:
19 |         boxes (list of BoxList)
20 |     """
21 |     assert isinstance(boxes, (list, tuple))
22 |     assert isinstance(boxes[0], BoxList)
23 |     assert boxes[0].has_field("labels")
24 |     positive_boxes = []
25 |     positive_inds = []
26 |     num_boxes = 0
27 |     for boxes_per_image in boxes:
28 |         labels = boxes_per_image.get_field("labels")
29 |         inds_mask = labels > 0
30 |         inds = inds_mask.nonzero().squeeze(1)
31 |         positive_boxes.append(boxes_per_image[inds])
32 |         positive_inds.append(inds_mask)
33 |     return positive_boxes, positive_inds
34 | 
35 | 
36 | class ROIMaskHead(torch.nn.Module):
37 |     def __init__(self, cfg, in_channels):
38 |         super(ROIMaskHead, self).__init__()
39 |         self.cfg = cfg.clone()
40 |         self.feature_extractor = make_roi_mask_feature_extractor(cfg, in_channels)
41 |         self.predictor = make_roi_mask_predictor(
42 |             cfg, self.feature_extractor.out_channels)
43 |         self.post_processor = make_roi_mask_post_processor(cfg)
44 |         self.loss_evaluator = make_roi_mask_loss_evaluator(cfg)
45 | 
46 |     def forward(self, features, proposals, targets=None):
47 |         """
48 |         Arguments:
49 |             features (list[Tensor]): feature-maps from possibly several levels
50 |             proposals (list[BoxList]): proposal boxes
51 |             targets (list[BoxList], optional): the ground-truth targets.
52 | 
53 |         Returns:
54 |             x (Tensor): the result of the feature extractor
55 |             proposals (list[BoxList]): during training, the original proposals
56 |                 are returned. During testing, the predicted boxlists are returned
57 |                 with the `mask` field set
58 |             losses (dict[Tensor]): During training, returns the losses for the
59 |                 head. During testing, returns an empty dict.
60 |         """
61 | 
62 |         if self.training:
63 |             # during training, only focus on positive boxes
64 |             all_proposals = proposals
65 |             proposals, positive_inds = keep_only_positive_boxes(proposals)
66 |         if self.training and self.cfg.MODEL.ROI_MASK_HEAD.SHARE_BOX_FEATURE_EXTRACTOR:
67 |             x = features
68 |             x = x[torch.cat(positive_inds, dim=0)]
69 |         else:
70 |             x = self.feature_extractor(features, proposals)
71 |         mask_logits = self.predictor(x)
72 | 
73 |         if not self.training:
74 |             result = self.post_processor(mask_logits, proposals)
75 |             return x, result, {}
76 | 
77 |         loss_mask = self.loss_evaluator(proposals, mask_logits, targets)
78 | 
79 |         return x, all_proposals, dict(loss_mask=loss_mask)
80 | 
81 | 
82 | def build_roi_mask_head(cfg, in_channels):
83 |     return ROIMaskHead(cfg, in_channels)
84 | 


--------------------------------------------------------------------------------
/sg-benchmark/maskrcnn_benchmark/modeling/roi_heads/mask_head/roi_mask_feature_extractors.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | from torch import nn
 3 | from torch.nn import functional as F
 4 | 
 5 | from ..box_head.roi_box_feature_extractors import ResNet50Conv5ROIFeatureExtractor
 6 | from maskrcnn_benchmark.modeling import registry
 7 | from maskrcnn_benchmark.modeling.poolers import Pooler
 8 | from maskrcnn_benchmark.modeling.make_layers import make_conv3x3
 9 | 
10 | 
11 | registry.ROI_MASK_FEATURE_EXTRACTORS.register(
12 |     "ResNet50Conv5ROIFeatureExtractor", ResNet50Conv5ROIFeatureExtractor
13 | )
14 | 
15 | 
16 | @registry.ROI_MASK_FEATURE_EXTRACTORS.register("MaskRCNNFPNFeatureExtractor")
17 | class MaskRCNNFPNFeatureExtractor(nn.Module):
18 |     """
19 |     Heads for FPN for classification
20 |     """
21 | 
22 |     def __init__(self, cfg, in_channels):
23 |         """
24 |         Arguments:
25 |             num_classes (int): number of output classes
26 |             input_size (int): number of channels of the input once it's flattened
27 |             representation_size (int): size of the intermediate representation
28 |         """
29 |         super(MaskRCNNFPNFeatureExtractor, self).__init__()
30 | 
31 |         resolution = cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION
32 |         scales = cfg.MODEL.ROI_MASK_HEAD.POOLER_SCALES
33 |         sampling_ratio = cfg.MODEL.ROI_MASK_HEAD.POOLER_SAMPLING_RATIO
34 |         pooler = Pooler(
35 |             output_size=(resolution, resolution),
36 |             scales=scales,
37 |             sampling_ratio=sampling_ratio,
38 |         )
39 |         input_size = in_channels
40 |         self.pooler = pooler
41 | 
42 |         use_gn = cfg.MODEL.ROI_MASK_HEAD.USE_GN
43 |         layers = cfg.MODEL.ROI_MASK_HEAD.CONV_LAYERS
44 |         dilation = cfg.MODEL.ROI_MASK_HEAD.DILATION
45 | 
46 |         next_feature = input_size
47 |         self.blocks = []
48 |         for layer_idx, layer_features in enumerate(layers, 1):
49 |             layer_name = "mask_fcn{}".format(layer_idx)
50 |             module = make_conv3x3(
51 |                 next_feature, layer_features,
52 |                 dilation=dilation, stride=1, use_gn=use_gn
53 |             )
54 |             self.add_module(layer_name, module)
55 |             next_feature = layer_features
56 |             self.blocks.append(layer_name)
57 |         self.out_channels = layer_features
58 | 
59 |     def forward(self, x, proposals):
60 |         x = self.pooler(x, proposals)
61 | 
62 |         for layer_name in self.blocks:
63 |             x = F.relu(getattr(self, layer_name)(x))
64 | 
65 |         return x
66 | 
67 | 
68 | def make_roi_mask_feature_extractor(cfg, in_channels):
69 |     func = registry.ROI_MASK_FEATURE_EXTRACTORS[
70 |         cfg.MODEL.ROI_MASK_HEAD.FEATURE_EXTRACTOR
71 |     ]
72 |     return func(cfg, in_channels)
73 | 


--------------------------------------------------------------------------------
/sg-benchmark/maskrcnn_benchmark/modeling/roi_heads/mask_head/roi_mask_predictors.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | from torch import nn
 3 | from torch.nn import functional as F
 4 | 
 5 | from maskrcnn_benchmark.layers import Conv2d
 6 | from maskrcnn_benchmark.layers import ConvTranspose2d
 7 | from maskrcnn_benchmark.modeling import registry
 8 | 
 9 | 
10 | @registry.ROI_MASK_PREDICTOR.register("MaskRCNNC4Predictor")
11 | class MaskRCNNC4Predictor(nn.Module):
12 |     def __init__(self, cfg, in_channels):
13 |         super(MaskRCNNC4Predictor, self).__init__()
14 |         num_classes = cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES
15 |         dim_reduced = cfg.MODEL.ROI_MASK_HEAD.CONV_LAYERS[-1]
16 |         num_inputs = in_channels
17 | 
18 |         self.conv5_mask = ConvTranspose2d(num_inputs, dim_reduced, 2, 2, 0)
19 |         self.mask_fcn_logits = Conv2d(dim_reduced, num_classes, 1, 1, 0)
20 | 
21 |         for name, param in self.named_parameters():
22 |             if "bias" in name:
23 |                 nn.init.constant_(param, 0)
24 |             elif "weight" in name:
25 |                 # Caffe2 implementation uses MSRAFill, which in fact
26 |                 # corresponds to kaiming_normal_ in PyTorch
27 |                 nn.init.kaiming_normal_(param, mode="fan_out", nonlinearity="relu")
28 | 
29 |     def forward(self, x):
30 |         x = F.relu(self.conv5_mask(x))
31 |         return self.mask_fcn_logits(x)
32 | 
33 | 
34 | @registry.ROI_MASK_PREDICTOR.register("MaskRCNNConv1x1Predictor")
35 | class MaskRCNNConv1x1Predictor(nn.Module):
36 |     def __init__(self, cfg, in_channels):
37 |         super(MaskRCNNConv1x1Predictor, self).__init__()
38 |         num_classes = cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES
39 |         num_inputs = in_channels
40 | 
41 |         self.mask_fcn_logits = Conv2d(num_inputs, num_classes, 1, 1, 0)
42 | 
43 |         for name, param in self.named_parameters():
44 |             if "bias" in name:
45 |                 nn.init.constant_(param, 0)
46 |             elif "weight" in name:
47 |                 # Caffe2 implementation uses MSRAFill, which in fact
48 |                 # corresponds to kaiming_normal_ in PyTorch
49 |                 nn.init.kaiming_normal_(param, mode="fan_out", nonlinearity="relu")
50 | 
51 |     def forward(self, x):
52 |         return self.mask_fcn_logits(x)
53 | 
54 | 
55 | def make_roi_mask_predictor(cfg, in_channels):
56 |     func = registry.ROI_MASK_PREDICTOR[cfg.MODEL.ROI_MASK_HEAD.PREDICTOR]
57 |     return func(cfg, in_channels)
58 | 


--------------------------------------------------------------------------------
/sg-benchmark/maskrcnn_benchmark/modeling/roi_heads/relation_head/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CYVincent/Scene-Graph-Transformer-CogTree/955d76403d6b17e4b35cb09b4669d8b3990a94ee/sg-benchmark/maskrcnn_benchmark/modeling/roi_heads/relation_head/__init__.py


--------------------------------------------------------------------------------
/sg-benchmark/maskrcnn_benchmark/modeling/roi_heads/roi_heads.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import torch
 3 | 
 4 | from .box_head.box_head import build_roi_box_head
 5 | from .mask_head.mask_head import build_roi_mask_head
 6 | from .attribute_head.attribute_head import build_roi_attribute_head
 7 | from .keypoint_head.keypoint_head import build_roi_keypoint_head
 8 | from .relation_head.relation_head import build_roi_relation_head
 9 | 
10 | 
11 | class CombinedROIHeads(torch.nn.ModuleDict):
12 |     """
13 |     Combines a set of individual heads (for box prediction or masks) into a single
14 |     head.
15 |     """
16 | 
17 |     def __init__(self, cfg, heads):
18 |         super(CombinedROIHeads, self).__init__(heads)
19 |         self.cfg = cfg.clone()
20 |         if cfg.MODEL.MASK_ON and cfg.MODEL.ROI_MASK_HEAD.SHARE_BOX_FEATURE_EXTRACTOR:
21 |             self.mask.feature_extractor = self.box.feature_extractor
22 |         if cfg.MODEL.KEYPOINT_ON and cfg.MODEL.ROI_KEYPOINT_HEAD.SHARE_BOX_FEATURE_EXTRACTOR:
23 |             self.keypoint.feature_extractor = self.box.feature_extractor
24 | 
25 |     def forward(self, features, proposals, targets=None, logger=None):
26 |         losses = {}
27 |         x, detections, loss_box = self.box(features, proposals, targets)
28 |         if not self.cfg.MODEL.RELATION_ON:
29 |             # During the relationship training stage, the bbox_proposal_network should be fixed, and no loss. 
30 |             losses.update(loss_box)
31 | 
32 |         if self.cfg.MODEL.ATTRIBUTE_ON:
33 |             # Attribute head don't have a separate feature extractor
34 |             z, detections, loss_attribute = self.attribute(features, detections, targets)
35 |             losses.update(loss_attribute)
36 | 
37 |         if self.cfg.MODEL.MASK_ON:
38 |             mask_features = features
39 |             # optimization: during training, if we share the feature extractor between
40 |             # the box and the mask heads, then we can reuse the features already computed
41 |             if (
42 |                 self.training
43 |                 and self.cfg.MODEL.ROI_MASK_HEAD.SHARE_BOX_FEATURE_EXTRACTOR
44 |             ):
45 |                 mask_features = x
46 |             # During training, self.box() will return the unaltered proposals as "detections"
47 |             # this makes the API consistent during training and testing
48 |             x, detections, loss_mask = self.mask(mask_features, detections, targets)
49 |             losses.update(loss_mask)
50 | 
51 |         if self.cfg.MODEL.KEYPOINT_ON:
52 |             keypoint_features = features
53 |             # optimization: during training, if we share the feature extractor between
54 |             # the box and the mask heads, then we can reuse the features already computed
55 |             if (
56 |                 self.training
57 |                 and self.cfg.MODEL.ROI_KEYPOINT_HEAD.SHARE_BOX_FEATURE_EXTRACTOR
58 |             ):
59 |                 keypoint_features = x
60 |             # During training, self.box() will return the unaltered proposals as "detections"
61 |             # this makes the API consistent during training and testing
62 |             x, detections, loss_keypoint = self.keypoint(keypoint_features, detections, targets)
63 |             losses.update(loss_keypoint)
64 | 
65 |         if self.cfg.MODEL.RELATION_ON:
66 |             # it may be not safe to share features due to post processing
67 |             # During training, self.box() will return the unaltered proposals as "detections"
68 |             # this makes the API consistent during training and testing
69 |             x, detections, loss_relation = self.relation(features, detections, targets, logger)
70 |             losses.update(loss_relation)
71 | 
72 |         return x, detections, losses
73 | 
74 | 
75 | def build_roi_heads(cfg, in_channels):
76 |     # individually create the heads, that will be combined together
77 |     # afterwards
78 |     roi_heads = []
79 |     if cfg.MODEL.RETINANET_ON:
80 |         return []
81 | 
82 |     if not cfg.MODEL.RPN_ONLY:
83 |         roi_heads.append(("box", build_roi_box_head(cfg, in_channels)))
84 |     if cfg.MODEL.MASK_ON:
85 |         roi_heads.append(("mask", build_roi_mask_head(cfg, in_channels)))
86 |     if cfg.MODEL.KEYPOINT_ON:
87 |         roi_heads.append(("keypoint", build_roi_keypoint_head(cfg, in_channels)))
88 |     if cfg.MODEL.RELATION_ON:
89 |         roi_heads.append(("relation", build_roi_relation_head(cfg, in_channels)))
90 |     if cfg.MODEL.ATTRIBUTE_ON:
91 |         roi_heads.append(("attribute", build_roi_attribute_head(cfg, in_channels)))
92 | 
93 |     # combine individual heads in a single module
94 |     if roi_heads:
95 |         roi_heads = CombinedROIHeads(cfg, roi_heads)
96 | 
97 |     return roi_heads
98 | 


--------------------------------------------------------------------------------
/sg-benchmark/maskrcnn_benchmark/modeling/rpn/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | # from .rpn import build_rpn
3 | 


--------------------------------------------------------------------------------
/sg-benchmark/maskrcnn_benchmark/modeling/rpn/retinanet/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CYVincent/Scene-Graph-Transformer-CogTree/955d76403d6b17e4b35cb09b4669d8b3990a94ee/sg-benchmark/maskrcnn_benchmark/modeling/rpn/retinanet/__init__.py


--------------------------------------------------------------------------------
/sg-benchmark/maskrcnn_benchmark/modeling/rpn/retinanet/loss.py:
--------------------------------------------------------------------------------
  1 | """
  2 | This file contains specific functions for computing losses on the RetinaNet
  3 | file
  4 | """
  5 | 
  6 | import torch
  7 | from torch.nn import functional as F
  8 | 
  9 | from ..utils import concat_box_prediction_layers
 10 | 
 11 | from maskrcnn_benchmark.layers import smooth_l1_loss
 12 | from maskrcnn_benchmark.layers import SigmoidFocalLoss
 13 | from maskrcnn_benchmark.modeling.matcher import Matcher
 14 | from maskrcnn_benchmark.modeling.utils import cat
 15 | from maskrcnn_benchmark.structures.boxlist_ops import boxlist_iou
 16 | from maskrcnn_benchmark.structures.boxlist_ops import cat_boxlist
 17 | from maskrcnn_benchmark.modeling.rpn.loss import RPNLossComputation
 18 | 
 19 | class RetinaNetLossComputation(RPNLossComputation):
 20 |     """
 21 |     This class computes the RetinaNet loss.
 22 |     """
 23 | 
 24 |     def __init__(self, proposal_matcher, box_coder,
 25 |                  generate_labels_func,
 26 |                  sigmoid_focal_loss,
 27 |                  bbox_reg_beta=0.11,
 28 |                  regress_norm=1.0):
 29 |         """
 30 |         Arguments:
 31 |             proposal_matcher (Matcher)
 32 |             box_coder (BoxCoder)
 33 |         """
 34 |         self.proposal_matcher = proposal_matcher
 35 |         self.box_coder = box_coder
 36 |         self.box_cls_loss_func = sigmoid_focal_loss
 37 |         self.bbox_reg_beta = bbox_reg_beta
 38 |         self.copied_fields = ['labels']
 39 |         self.generate_labels_func = generate_labels_func
 40 |         self.discard_cases = ['between_thresholds']
 41 |         self.regress_norm = regress_norm
 42 | 
 43 |     def __call__(self, anchors, box_cls, box_regression, targets):
 44 |         """
 45 |         Arguments:
 46 |             anchors (list[BoxList])
 47 |             box_cls (list[Tensor])
 48 |             box_regression (list[Tensor])
 49 |             targets (list[BoxList])
 50 | 
 51 |         Returns:
 52 |             retinanet_cls_loss (Tensor)
 53 |             retinanet_regression_loss (Tensor
 54 |         """
 55 |         anchors = [cat_boxlist(anchors_per_image) for anchors_per_image in anchors]
 56 |         labels, regression_targets = self.prepare_targets(anchors, targets)
 57 | 
 58 |         N = len(labels)
 59 |         box_cls, box_regression = \
 60 |                 concat_box_prediction_layers(box_cls, box_regression)
 61 | 
 62 |         labels = torch.cat(labels, dim=0)
 63 |         regression_targets = torch.cat(regression_targets, dim=0)
 64 |         pos_inds = torch.nonzero(labels > 0).squeeze(1)
 65 | 
 66 |         retinanet_regression_loss = smooth_l1_loss(
 67 |             box_regression[pos_inds],
 68 |             regression_targets[pos_inds],
 69 |             beta=self.bbox_reg_beta,
 70 |             size_average=False,
 71 |         ) / (max(1, pos_inds.numel() * self.regress_norm))
 72 | 
 73 |         labels = labels.int()
 74 | 
 75 |         retinanet_cls_loss = self.box_cls_loss_func(
 76 |             box_cls,
 77 |             labels
 78 |         ) / (pos_inds.numel() + N)
 79 | 
 80 |         return retinanet_cls_loss, retinanet_regression_loss
 81 | 
 82 | 
 83 | def generate_retinanet_labels(matched_targets):
 84 |     labels_per_image = matched_targets.get_field("labels")
 85 |     return labels_per_image
 86 | 
 87 | 
 88 | def make_retinanet_loss_evaluator(cfg, box_coder):
 89 |     matcher = Matcher(
 90 |         cfg.MODEL.RETINANET.FG_IOU_THRESHOLD,
 91 |         cfg.MODEL.RETINANET.BG_IOU_THRESHOLD,
 92 |         allow_low_quality_matches=True,
 93 |     )
 94 |     sigmoid_focal_loss = SigmoidFocalLoss(
 95 |         cfg.MODEL.RETINANET.LOSS_GAMMA,
 96 |         cfg.MODEL.RETINANET.LOSS_ALPHA
 97 |     )
 98 | 
 99 |     loss_evaluator = RetinaNetLossComputation(
100 |         matcher,
101 |         box_coder,
102 |         generate_retinanet_labels,
103 |         sigmoid_focal_loss,
104 |         bbox_reg_beta = cfg.MODEL.RETINANET.BBOX_REG_BETA,
105 |         regress_norm = cfg.MODEL.RETINANET.BBOX_REG_WEIGHT,
106 |     )
107 |     return loss_evaluator
108 | 


--------------------------------------------------------------------------------
/sg-benchmark/maskrcnn_benchmark/modeling/rpn/utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | """
 3 | Utility functions minipulating the prediction layers
 4 | """
 5 | 
 6 | from ..utils import cat
 7 | 
 8 | import torch
 9 | 
10 | def permute_and_flatten(layer, N, A, C, H, W):
11 |     layer = layer.view(N, -1, C, H, W)
12 |     layer = layer.permute(0, 3, 4, 1, 2)
13 |     layer = layer.reshape(N, -1, C)
14 |     return layer
15 | 
16 | 
17 | def concat_box_prediction_layers(box_cls, box_regression):
18 |     box_cls_flattened = []
19 |     box_regression_flattened = []
20 |     # for each feature level, permute the outputs to make them be in the
21 |     # same format as the labels. Note that the labels are computed for
22 |     # all feature levels concatenated, so we keep the same representation
23 |     # for the objectness and the box_regression
24 |     for box_cls_per_level, box_regression_per_level in zip(
25 |         box_cls, box_regression
26 |     ):
27 |         N, AxC, H, W = box_cls_per_level.shape
28 |         Ax4 = box_regression_per_level.shape[1]
29 |         A = Ax4 // 4
30 |         C = AxC // A
31 |         box_cls_per_level = permute_and_flatten(
32 |             box_cls_per_level, N, A, C, H, W
33 |         )
34 |         box_cls_flattened.append(box_cls_per_level)
35 | 
36 |         box_regression_per_level = permute_and_flatten(
37 |             box_regression_per_level, N, A, 4, H, W
38 |         )
39 |         box_regression_flattened.append(box_regression_per_level)
40 |     # concatenate on the first dimension (representing the feature levels), to
41 |     # take into account the way the labels were generated (with all feature maps
42 |     # being concatenated as well)
43 |     box_cls = cat(box_cls_flattened, dim=1).reshape(-1, C)
44 |     box_regression = cat(box_regression_flattened, dim=1).reshape(-1, 4)
45 |     return box_cls, box_regression
46 | 


--------------------------------------------------------------------------------
/sg-benchmark/maskrcnn_benchmark/modeling/utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | """
 3 | Miscellaneous utility functions
 4 | """
 5 | 
 6 | import torch
 7 | 
 8 | 
 9 | def cat(tensors, dim=0):
10 |     """
11 |     Efficient version of torch.cat that avoids a copy if there is only a single element in a list
12 |     """
13 |     assert isinstance(tensors, (list, tuple))
14 |     if len(tensors) == 1:
15 |         return tensors[0]
16 |     return torch.cat(tensors, dim)
17 | 


--------------------------------------------------------------------------------
/sg-benchmark/maskrcnn_benchmark/solver/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | from .build import make_optimizer
3 | from .build import make_lr_scheduler
4 | from .lr_scheduler import WarmupMultiStepLR, WarmupReduceLROnPlateau
5 | 
6 | 


--------------------------------------------------------------------------------
/sg-benchmark/maskrcnn_benchmark/solver/build.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import torch
 3 | 
 4 | from .lr_scheduler import WarmupMultiStepLR, WarmupReduceLROnPlateau, WarmupCosineLR, WarmupCosineMStepLR, WarmupConsrantCosineLR
 5 | 
 6 | 
 7 | def make_optimizer(cfg, model, logger, slow_heads=None, slow_ratio=5.0, rl_factor=1.0):
 8 |     params = []
 9 |     for key, value in model.named_parameters():
10 |         if not value.requires_grad:
11 |             continue
12 |         lr = cfg.SOLVER.BASE_LR
13 |         weight_decay = cfg.SOLVER.WEIGHT_DECAY
14 |         if "bias" in key:
15 |             lr = cfg.SOLVER.BASE_LR * cfg.SOLVER.BIAS_LR_FACTOR
16 |             weight_decay = cfg.SOLVER.WEIGHT_DECAY_BIAS
17 |         if slow_heads is not None:
18 |             for item in slow_heads:
19 |                 if item in key:
20 |                     logger.info("SLOW HEADS: {} is slow down by ratio of {}.".format(key, str(slow_ratio)))
21 |                     lr = lr / slow_ratio
22 |                     break
23 |         params += [{"params": [value], "lr": lr * rl_factor, "weight_decay": weight_decay}]
24 | 
25 |     optimizer = torch.optim.SGD(params, lr=cfg.SOLVER.BASE_LR, momentum=cfg.SOLVER.MOMENTUM)
26 |     return optimizer
27 | 
28 | 
29 | def make_lr_scheduler(cfg, optimizer, logger=None):
30 |     if cfg.SOLVER.SCHEDULE.TYPE == "WarmupMultiStepLR":
31 |         return WarmupMultiStepLR(
32 |             optimizer,
33 |             cfg.SOLVER.STEPS,
34 |             cfg.SOLVER.GAMMA,
35 |             warmup_factor=cfg.SOLVER.WARMUP_FACTOR,
36 |             warmup_iters=cfg.SOLVER.WARMUP_ITERS,
37 |             warmup_method=cfg.SOLVER.WARMUP_METHOD,
38 |         )
39 |     
40 |     elif cfg.SOLVER.SCHEDULE.TYPE == "WarmupReduceLROnPlateau":
41 |         return WarmupReduceLROnPlateau(
42 |             optimizer,
43 |             cfg.SOLVER.SCHEDULE.FACTOR,
44 |             warmup_factor=cfg.SOLVER.WARMUP_FACTOR,
45 |             warmup_iters=cfg.SOLVER.WARMUP_ITERS,
46 |             warmup_method=cfg.SOLVER.WARMUP_METHOD,
47 |             patience=cfg.SOLVER.SCHEDULE.PATIENCE,
48 |             threshold=cfg.SOLVER.SCHEDULE.THRESHOLD,
49 |             cooldown=cfg.SOLVER.SCHEDULE.COOLDOWN,
50 |             logger=logger,
51 |         )
52 |     elif cfg.SOLVER.SCHEDULE.TYPE == "WarmupCosineLR":
53 |         return WarmupCosineLR(
54 |             optimizer,
55 |             T_max=cfg.SOLVER.MAX_ITER,
56 |             warmup_factor=cfg.SOLVER.WARMUP_FACTOR,
57 |             warmup_iters=cfg.SOLVER.WARMUP_ITERS,
58 |             warmup_method=cfg.SOLVER.WARMUP_METHOD,
59 |             eta_min=cfg.SOLVER.ETA_MIN * cfg.SOLVER.IMS_PER_BATCH,
60 |         )
61 | 
62 |     elif cfg.SOLVER.SCHEDULE.TYPE == "WarmupCosineMstepLR":
63 |         return WarmupCosineMStepLR(
64 |             optimizer,
65 |             cfg.SOLVER.STEPS,
66 |             cfg.SOLVER.GAMMA,
67 |             warmup_factor=cfg.SOLVER.WARMUP_FACTOR,
68 |             warmup_iters=cfg.SOLVER.WARMUP_ITERS,
69 |             warmup_method=cfg.SOLVER.WARMUP_METHOD,
70 |             eta_min=cfg.SOLVER.ETA_MIN * cfg.SOLVER.IMS_PER_BATCH,
71 |         )
72 |     elif cfg.SOLVER.SCHEDULE.TYPE == "WarmupConsrantCosineLR":
73 |         return WarmupConsrantCosineLR(
74 |             optimizer,
75 |             cfg.SOLVER.STEPS,
76 |             T_max=cfg.SOLVER.MAX_ITER,
77 |             warmup_factor=cfg.SOLVER.WARMUP_FACTOR,
78 |             warmup_iters=cfg.SOLVER.WARMUP_ITERS,
79 |             warmup_method=cfg.SOLVER.WARMUP_METHOD,
80 |             eta_min=cfg.SOLVER.ETA_MIN * cfg.SOLVER.IMS_PER_BATCH,
81 |         )
82 |     else:
83 |         raise ValueError("Invalid Schedule Type")
84 | 


--------------------------------------------------------------------------------
/sg-benchmark/maskrcnn_benchmark/structures/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CYVincent/Scene-Graph-Transformer-CogTree/955d76403d6b17e4b35cb09b4669d8b3990a94ee/sg-benchmark/maskrcnn_benchmark/structures/__init__.py


--------------------------------------------------------------------------------
/sg-benchmark/maskrcnn_benchmark/structures/image_list.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | from __future__ import division
 3 | 
 4 | import torch
 5 | 
 6 | 
 7 | class ImageList(object):
 8 |     """
 9 |     Structure that holds a list of images (of possibly
10 |     varying sizes) as a single tensor.
11 |     This works by padding the images to the same size,
12 |     and storing in a field the original sizes of each image
13 |     """
14 | 
15 |     def __init__(self, tensors, image_sizes):
16 |         """
17 |         Arguments:
18 |             tensors (tensor)
19 |             image_sizes (list[tuple[int, int]])
20 |         """
21 |         self.tensors = tensors
22 |         self.image_sizes = image_sizes
23 | 
24 |     def to(self, *args, **kwargs):
25 |         cast_tensor = self.tensors.to(*args, **kwargs)
26 |         return ImageList(cast_tensor, self.image_sizes)
27 | 
28 | 
29 | def to_image_list(tensors, size_divisible=0):
30 |     """
31 |     tensors can be an ImageList, a torch.Tensor or
32 |     an iterable of Tensors. It can't be a numpy array.
33 |     When tensors is an iterable of Tensors, it pads
34 |     the Tensors with zeros so that they have the same
35 |     shape
36 |     """
37 |     if isinstance(tensors, torch.Tensor) and size_divisible > 0:
38 |         tensors = [tensors]
39 | 
40 |     if isinstance(tensors, ImageList):
41 |         return tensors
42 |     elif isinstance(tensors, torch.Tensor):
43 |         # single tensor shape can be inferred
44 |         if tensors.dim() == 3:
45 |             tensors = tensors[None]
46 |         assert tensors.dim() == 4
47 |         image_sizes = [tensor.shape[-2:] for tensor in tensors]
48 |         return ImageList(tensors, image_sizes)
49 |     elif isinstance(tensors, (tuple, list)):
50 |         max_size = tuple(max(s) for s in zip(*[img.shape for img in tensors]))
51 | 
52 |         # TODO Ideally, just remove this and let me model handle arbitrary
53 |         # input sizs
54 |         if size_divisible > 0:
55 |             import math
56 | 
57 |             stride = size_divisible
58 |             max_size = list(max_size)
59 |             max_size[1] = int(math.ceil(max_size[1] / stride) * stride)
60 |             max_size[2] = int(math.ceil(max_size[2] / stride) * stride)
61 |             max_size = tuple(max_size)
62 | 
63 |         batch_shape = (len(tensors),) + max_size
64 |         batched_imgs = tensors[0].new(*batch_shape).zero_()
65 |         for img, pad_img in zip(tensors, batched_imgs):
66 |             pad_img[: img.shape[0], : img.shape[1], : img.shape[2]].copy_(img)
67 | 
68 |         image_sizes = [im.shape[-2:] for im in tensors]
69 | 
70 |         return ImageList(batched_imgs, image_sizes)
71 |     else:
72 |         raise TypeError("Unsupported type for to_image_list: {}".format(type(tensors)))
73 | 


--------------------------------------------------------------------------------
/sg-benchmark/maskrcnn_benchmark/utils/README.md:
--------------------------------------------------------------------------------
1 | # Utility functions
2 | 
3 | This folder contain utility functions that are not used in the
4 | core library, but are useful for building models or training
5 | code using the config system.
6 | 


--------------------------------------------------------------------------------
/sg-benchmark/maskrcnn_benchmark/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CYVincent/Scene-Graph-Transformer-CogTree/955d76403d6b17e4b35cb09b4669d8b3990a94ee/sg-benchmark/maskrcnn_benchmark/utils/__init__.py


--------------------------------------------------------------------------------
/sg-benchmark/maskrcnn_benchmark/utils/collect_env.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import PIL
 3 | 
 4 | from torch.utils.collect_env import get_pretty_env_info
 5 | 
 6 | 
 7 | def get_pil_version():
 8 |     return "\n        Pillow ({})".format(PIL.__version__)
 9 | 
10 | 
11 | def collect_env_info():
12 |     env_str = get_pretty_env_info()
13 |     env_str += get_pil_version()
14 |     return env_str
15 | 


--------------------------------------------------------------------------------
/sg-benchmark/maskrcnn_benchmark/utils/comm.py:
--------------------------------------------------------------------------------
  1 | """
  2 | This file contains primitives for multi-gpu communication.
  3 | This is useful when doing distributed training.
  4 | """
  5 | 
  6 | import pickle
  7 | import time
  8 | 
  9 | import torch
 10 | import torch.distributed as dist
 11 | 
 12 | 
 13 | def get_world_size():
 14 |     if not dist.is_available():
 15 |         return 1
 16 |     if not dist.is_initialized():
 17 |         return 1
 18 |     return dist.get_world_size()
 19 | 
 20 | 
 21 | def get_rank():
 22 |     if not dist.is_available():
 23 |         return 0
 24 |     if not dist.is_initialized():
 25 |         return 0
 26 |     return dist.get_rank()
 27 | 
 28 | 
 29 | def is_main_process():
 30 |     return get_rank() == 0
 31 | 
 32 | 
 33 | def synchronize():
 34 |     """
 35 |     Helper function to synchronize (barrier) among all processes when
 36 |     using distributed training
 37 |     """
 38 |     if not dist.is_available():
 39 |         return
 40 |     if not dist.is_initialized():
 41 |         return
 42 |     world_size = dist.get_world_size()
 43 |     if world_size == 1:
 44 |         return
 45 |     dist.barrier()
 46 | 
 47 | 
 48 | def all_gather(data):
 49 |     """
 50 |     Run all_gather on arbitrary picklable data (not necessarily tensors)
 51 |     Args:
 52 |         data: any picklable object
 53 |     Returns:
 54 |         list[data]: list of data gathered from each rank
 55 |     """
 56 |     to_device = "cuda"
 57 |     #to_device = torch.device("cpu")
 58 |     
 59 |     world_size = get_world_size()
 60 |     if world_size == 1:
 61 |         return [data]
 62 | 
 63 |     # serialized to a Tensor
 64 |     buffer = pickle.dumps(data)
 65 |     storage = torch.ByteStorage.from_buffer(buffer)
 66 |     tensor = torch.ByteTensor(storage).to(to_device)
 67 | 
 68 |     # obtain Tensor size of each rank
 69 |     local_size = torch.LongTensor([tensor.numel()]).to(to_device)
 70 |     size_list = [torch.LongTensor([0]).to(to_device) for _ in range(world_size)]
 71 |     dist.all_gather(size_list, local_size)
 72 |     size_list = [int(size.item()) for size in size_list]
 73 |     max_size = max(size_list)
 74 | 
 75 |     # receiving Tensor from all ranks
 76 |     # we pad the tensor because torch all_gather does not support
 77 |     # gathering tensors of different shapes
 78 |     tensor_list = []
 79 |     for _ in size_list:
 80 |         tensor_list.append(torch.ByteTensor(size=(max_size,)).to(to_device))
 81 |     if local_size != max_size:
 82 |         padding = torch.ByteTensor(size=(max_size - local_size,)).to(to_device)
 83 |         tensor = torch.cat((tensor, padding), dim=0)
 84 |     dist.all_gather(tensor_list, tensor)
 85 | 
 86 |     data_list = []
 87 |     for size, tensor in zip(size_list, tensor_list):
 88 |         buffer = tensor.cpu().numpy().tobytes()[:size]
 89 |         data_list.append(pickle.loads(buffer))
 90 | 
 91 |     return data_list
 92 | 
 93 | 
 94 | def reduce_dict(input_dict, average=True):
 95 |     """
 96 |     Args:
 97 |         input_dict (dict): all the values will be reduced
 98 |         average (bool): whether to do average or sum
 99 |     Reduce the values in the dictionary from all processes so that process with rank
100 |     0 has the averaged results. Returns a dict with the same fields as
101 |     input_dict, after reduction.
102 |     """
103 |     world_size = get_world_size()
104 |     if world_size < 2:
105 |         return input_dict
106 |     with torch.no_grad():
107 |         names = []
108 |         values = []
109 |         # sort the keys so that they are consistent across processes
110 |         for k in sorted(input_dict.keys()):
111 |             names.append(k)
112 |             values.append(input_dict[k])
113 |         values = torch.stack(values, dim=0)
114 |         dist.reduce(values, dst=0)
115 |         if dist.get_rank() == 0 and average:
116 |             # only main process gets accumulated, so only divide by
117 |             # world_size in this case
118 |             values /= world_size
119 |         reduced_dict = {k: v for k, v in zip(names, values)}
120 |     return reduced_dict
121 | 


--------------------------------------------------------------------------------
/sg-benchmark/maskrcnn_benchmark/utils/cv2_util.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Module for cv2 utility functions and maintaining version compatibility
 3 | between 3.x and 4.x
 4 | """
 5 | import cv2
 6 | 
 7 | 
 8 | def findContours(*args, **kwargs):
 9 |     """
10 |     Wraps cv2.findContours to maintain compatiblity between versions
11 |     3 and 4
12 | 
13 |     Returns:
14 |         contours, hierarchy
15 |     """
16 |     if cv2.__version__.startswith('4'):
17 |         contours, hierarchy = cv2.findContours(*args, **kwargs)
18 |     elif cv2.__version__.startswith('3'):
19 |         _, contours, hierarchy = cv2.findContours(*args, **kwargs)
20 |     else:
21 |         raise AssertionError(
22 |             'cv2 must be either version 3 or 4 to call this method')
23 | 
24 |     return contours, hierarchy
25 | 


--------------------------------------------------------------------------------
/sg-benchmark/maskrcnn_benchmark/utils/env.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import os
 3 | 
 4 | from maskrcnn_benchmark.utils.imports import import_file
 5 | 
 6 | 
 7 | def setup_environment():
 8 |     """Perform environment setup work. The default setup is a no-op, but this
 9 |     function allows the user to specify a Python source file that performs
10 |     custom setup work that may be necessary to their computing environment.
11 |     """
12 |     custom_module_path = os.environ.get("TORCH_DETECTRON_ENV_MODULE")
13 |     if custom_module_path:
14 |         setup_custom_environment(custom_module_path)
15 |     else:
16 |         # The default setup is a no-op
17 |         pass
18 | 
19 | 
20 | def setup_custom_environment(custom_module_path):
21 |     """Load custom environment setup from a Python source file and run the setup
22 |     function.
23 |     """
24 |     module = import_file("maskrcnn_benchmark.utils.env.custom_module", custom_module_path)
25 |     assert hasattr(module, "setup_environment") and callable(
26 |         module.setup_environment
27 |     ), (
28 |         "Custom environment module defined in {} does not have the "
29 |         "required callable attribute 'setup_environment'."
30 |     ).format(
31 |         custom_module_path
32 |     )
33 |     module.setup_environment()
34 | 
35 | 
36 | # Force environment setup when this module is imported
37 | setup_environment()
38 | 


--------------------------------------------------------------------------------
/sg-benchmark/maskrcnn_benchmark/utils/imports.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import torch
 3 | 
 4 | if torch._six.PY3:
 5 |     import importlib
 6 |     import importlib.util
 7 |     import sys
 8 | 
 9 | 
10 |     # from https://stackoverflow.com/questions/67631/how-to-import-a-module-given-the-full-path?utm_medium=organic&utm_source=google_rich_qa&utm_campaign=google_rich_qa
11 |     def import_file(module_name, file_path, make_importable=False):
12 |         spec = importlib.util.spec_from_file_location(module_name, file_path)
13 |         module = importlib.util.module_from_spec(spec)
14 |         spec.loader.exec_module(module)
15 |         if make_importable:
16 |             sys.modules[module_name] = module
17 |         return module
18 | else:
19 |     import imp
20 | 
21 |     def import_file(module_name, file_path, make_importable=None):
22 |         module = imp.load_source(module_name, file_path)
23 |         return module
24 | 


--------------------------------------------------------------------------------
/sg-benchmark/maskrcnn_benchmark/utils/logger.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import logging
 3 | import os
 4 | import sys
 5 | 
 6 | DEBUG_PRINT_ON = True
 7 | 
 8 | def debug_print(logger, info):
 9 |     if DEBUG_PRINT_ON:
10 |         logger.info('#'*20+' '+info+' '+'#'*20)
11 | 
12 | def setup_logger(name, save_dir, distributed_rank, filename="log.txt"):
13 |     logger = logging.getLogger(name)
14 |     logger.setLevel(logging.DEBUG)
15 |     # don't log results for the non-master process
16 |     if distributed_rank > 0:
17 |         return logger
18 |     ch = logging.StreamHandler(stream=sys.stdout)
19 |     ch.setLevel(logging.DEBUG)
20 |     formatter = logging.Formatter("%(asctime)s %(name)s %(levelname)s: %(message)s")
21 |     ch.setFormatter(formatter)
22 |     logger.addHandler(ch)
23 | 
24 |     if save_dir:
25 |         fh = logging.FileHandler(os.path.join(save_dir, filename))
26 |         fh.setLevel(logging.DEBUG)
27 |         fh.setFormatter(formatter)
28 |         logger.addHandler(fh)
29 | 
30 |     return logger
31 | 


--------------------------------------------------------------------------------
/sg-benchmark/maskrcnn_benchmark/utils/metric_logger.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | from collections import defaultdict
 3 | from collections import deque
 4 | 
 5 | import torch
 6 | 
 7 | 
 8 | class SmoothedValue(object):
 9 |     """Track a series of values and provide access to smoothed values over a
10 |     window or the global series average.
11 |     """
12 | 
13 |     def __init__(self, window_size=20):
14 |         self.deque = deque(maxlen=window_size)
15 |         self.series = []
16 |         self.total = 0.0
17 |         self.count = 0
18 | 
19 |     def update(self, value):
20 |         self.deque.append(value)
21 |         self.series.append(value)
22 |         self.count += 1
23 |         self.total += value
24 | 
25 |     @property
26 |     def median(self):
27 |         d = torch.tensor(list(self.deque))
28 |         return d.median().item()
29 | 
30 |     @property
31 |     def avg(self):
32 |         d = torch.tensor(list(self.deque))
33 |         return d.mean().item()
34 | 
35 |     @property
36 |     def global_avg(self):
37 |         return self.total / self.count
38 | 
39 | 
40 | class MetricLogger(object):
41 |     def __init__(self, delimiter="\t"):
42 |         self.meters = defaultdict(SmoothedValue)
43 |         self.delimiter = delimiter
44 | 
45 |     def update(self, **kwargs):
46 |         for k, v in kwargs.items():
47 |             if isinstance(v, torch.Tensor):
48 |                 v = v.item()
49 |             assert isinstance(v, (float, int))
50 |             self.meters[k].update(v)
51 | 
52 |     def __getattr__(self, attr):
53 |         if attr in self.meters:
54 |             return self.meters[attr]
55 |         if attr in self.__dict__:
56 |             return self.__dict__[attr]
57 |         raise AttributeError("'{}' object has no attribute '{}'".format(
58 |                     type(self).__name__, attr))
59 | 
60 |     def __str__(self):
61 |         loss_str = []
62 |         for name, meter in self.meters.items():
63 |             loss_str.append(
64 |                 "{}: {:.4f} ({:.4f})".format(name, meter.median, meter.global_avg)
65 |             )
66 |         return self.delimiter.join(loss_str)
67 | 


--------------------------------------------------------------------------------
/sg-benchmark/maskrcnn_benchmark/utils/miscellaneous.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import errno
 3 | import json
 4 | import logging
 5 | import os
 6 | from .comm import is_main_process
 7 | import numpy as np
 8 | 
 9 | from maskrcnn_benchmark.structures.bounding_box import BoxList
10 | from maskrcnn_benchmark.structures.boxlist_ops import boxlist_iou
11 | 
12 | def mkdir(path):
13 |     try:
14 |         os.makedirs(path)
15 |     except OSError as e:
16 |         if e.errno != errno.EEXIST:
17 |             raise
18 | 
19 | 
20 | def save_labels(dataset_list, output_dir):
21 |     if is_main_process():
22 |         logger = logging.getLogger(__name__)
23 | 
24 |         ids_to_labels = {}
25 |         for dataset in dataset_list:
26 |             if hasattr(dataset, 'categories'):
27 |                 ids_to_labels.update(dataset.categories)
28 |             else:
29 |                 logger.warning("Dataset [{}] has no categories attribute, labels.json file won't be created".format(
30 |                     dataset.__class__.__name__))
31 | 
32 |         if ids_to_labels:
33 |             labels_file = os.path.join(output_dir, 'labels.json')
34 |             logger.info("Saving labels mapping into {}".format(labels_file))
35 |             with open(labels_file, 'w') as f:
36 |                 json.dump(ids_to_labels, f, indent=2)
37 | 
38 | 
39 | def save_config(cfg, path):
40 |     if is_main_process():
41 |         with open(path, 'w') as f:
42 |             f.write(cfg.dump())
43 | 
44 | 
45 | def intersect_2d(x1, x2):
46 |     """
47 |     Given two arrays [m1, n], [m2,n], returns a [m1, m2] array where each entry is True if those
48 |     rows match.
49 |     :param x1: [m1, n] numpy array
50 |     :param x2: [m2, n] numpy array
51 |     :return: [m1, m2] bool array of the intersections
52 |     """
53 |     if x1.shape[1] != x2.shape[1]:
54 |         raise ValueError("Input arrays must have same #columns")
55 | 
56 |     # This performs a matrix multiplication-esque thing between the two arrays
57 |     # Instead of summing, we want the equality, so we reduce in that way
58 |     res = (x1[..., None] == x2.T[None, ...]).all(1)
59 |     return res
60 | 
61 | def argsort_desc(scores):
62 |     """
63 |     Returns the indices that sort scores descending in a smart way
64 |     :param scores: Numpy array of arbitrary size
65 |     :return: an array of size [numel(scores), dim(scores)] where each row is the index you'd
66 |              need to get the score.
67 |     """
68 |     return np.column_stack(np.unravel_index(np.argsort(-scores.ravel()), scores.shape))
69 | 
70 | def bbox_overlaps(boxes1, boxes2):
71 |     """
72 |     Parameters:
73 |         boxes1 (m, 4) [List or np.array] : bounding boxes of (x1,y1,x2,y2)
74 |         boxes2 (n, 4) [List or np.array] : bounding boxes of (x1,y1,x2,y2)
75 |     Return:
76 |         iou (m, n) [np.array]
77 |     """
78 |     boxes1 = BoxList(boxes1, (0, 0), 'xyxy')
79 |     boxes2 = BoxList(boxes2, (0, 0), 'xyxy')
80 |     iou = boxlist_iou(boxes1, boxes2).cpu().numpy()
81 |     return iou
82 | 


--------------------------------------------------------------------------------
/sg-benchmark/maskrcnn_benchmark/utils/model_serialization.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | from collections import OrderedDict
 3 | import logging
 4 | 
 5 | import torch
 6 | 
 7 | from maskrcnn_benchmark.utils.imports import import_file
 8 | 
 9 | 
10 | def align_and_update_state_dicts(model_state_dict, loaded_state_dict, load_mapping):
11 |     """
12 |     Strategy: suppose that the models that we will create will have prefixes appended
13 |     to each of its keys, for example due to an extra level of nesting that the original
14 |     pre-trained weights from ImageNet won't contain. For example, model.state_dict()
15 |     might return backbone[0].body.res2.conv1.weight, while the pre-trained model contains
16 |     res2.conv1.weight. We thus want to match both parameters together.
17 |     For that, we look for each model weight, look among all loaded keys if there is one
18 |     that is a suffix of the current weight name, and use it if that's the case.
19 |     If multiple matches exist, take the one with longest size
20 |     of the corresponding name. For example, for the same model as before, the pretrained
21 |     weight file can contain both res2.conv1.weight, as well as conv1.weight. In this case,
22 |     we want to match backbone[0].body.conv1.weight to conv1.weight, and
23 |     backbone[0].body.res2.conv1.weight to res2.conv1.weight.
24 |     """
25 |     logger = logging.getLogger(__name__)
26 |     current_keys = sorted(list(model_state_dict.keys()))
27 |     loaded_keys = sorted(list(loaded_state_dict.keys()))
28 |     # get a matrix of string matches, where each (i, j) entry correspond to the size of the
29 |     # loaded_key string, if it matches
30 |     # NOTE: Kaihua Tang, since some modules of current model will be initialized from assigned layer of 
31 |     # loaded model, we use load_mapping to do such operation
32 |     mapped_current_keys = current_keys.copy()
33 |     for i, key in enumerate(mapped_current_keys):
34 |         for source_key, target_key in load_mapping.items():
35 |             if source_key in key:
36 |                 mapped_current_keys[i] = key.replace(source_key, target_key)
37 |                 logger.info("MAPPING {} in current model to {} in loaded model.".format(key, mapped_current_keys[i]))
38 | 
39 |     match_matrix = [
40 |         len(j) if i.endswith(j) else 0 for i in mapped_current_keys for j in loaded_keys
41 |     ]
42 |     match_matrix = torch.as_tensor(match_matrix).view(
43 |         len(current_keys), len(loaded_keys)
44 |     )
45 |     max_match_size, idxs = match_matrix.max(1)
46 |     # remove indices that correspond to no-match
47 |     idxs[max_match_size == 0] = -1
48 | 
49 |     # used for logging
50 |     max_size = max([len(key) for key in current_keys]) if current_keys else 1
51 |     max_size_loaded = max([len(key) for key in loaded_keys]) if loaded_keys else 1
52 |     log_str_template = "REMATCHING! {: <{}} loaded from {: <{}} of shape {}"
53 |     for idx_new, idx_old in enumerate(idxs.tolist()):
54 |         if idx_old == -1:
55 |             key = current_keys[idx_new]
56 |             logger.info("NO-MATCHING of current module: {} of shape {}".format(key, 
57 |                                     tuple(model_state_dict[key].shape)))
58 |             continue
59 |         key = current_keys[idx_new]
60 |         key_old = loaded_keys[idx_old]
61 |         model_state_dict[key] = loaded_state_dict[key_old]
62 |         # add a control gate for this logger (it's too large)
63 |         if ((not key.startswith('module.'))  and key != key_old) or (key.startswith('module.') and key[7:] != key_old):
64 |             logger.info(
65 |                 log_str_template.format(
66 |                     key,
67 |                     max_size,
68 |                     key_old,
69 |                     max_size_loaded,
70 |                     tuple(loaded_state_dict[key_old].shape),
71 |                 )
72 |             )
73 | 
74 | 
75 | def strip_prefix_if_present(state_dict, prefix):
76 |     keys = sorted(state_dict.keys())
77 |     if not all(key.startswith(prefix) for key in keys):
78 |         return state_dict
79 |     stripped_state_dict = OrderedDict()
80 |     for key, value in state_dict.items():
81 |         stripped_state_dict[key.replace(prefix, "")] = value
82 |     return stripped_state_dict
83 | 
84 | 
85 | def load_state_dict(model, loaded_state_dict, load_mapping):
86 |     model_state_dict = model.state_dict()
87 |     # if the state_dict comes from a model that was wrapped in a
88 |     # DataParallel or DistributedDataParallel during serialization,
89 |     # remove the "module" prefix before performing the matching
90 |     loaded_state_dict = strip_prefix_if_present(loaded_state_dict, prefix="module.")
91 |     align_and_update_state_dicts(model_state_dict, loaded_state_dict, load_mapping)
92 | 
93 |     # use strict loading
94 |     model.load_state_dict(model_state_dict)
95 | 


--------------------------------------------------------------------------------
/sg-benchmark/maskrcnn_benchmark/utils/model_zoo.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import os
 3 | import sys
 4 | 
 5 | try:
 6 |     from torch.hub import _download_url_to_file
 7 |     from torch.hub import urlparse
 8 |     from torch.hub import HASH_REGEX
 9 | except ImportError:
10 |     from torch.utils.model_zoo import _download_url_to_file
11 |     from torch.utils.model_zoo import urlparse
12 |     from torch.utils.model_zoo import HASH_REGEX
13 | 
14 | from maskrcnn_benchmark.utils.comm import is_main_process
15 | from maskrcnn_benchmark.utils.comm import synchronize
16 | 
17 | 
18 | # very similar to https://github.com/pytorch/pytorch/blob/master/torch/utils/model_zoo.py
19 | # but with a few improvements and modifications
20 | def cache_url(url, model_dir=None, progress=True):
21 |     r"""Loads the Torch serialized object at the given URL.
22 |     If the object is already present in `model_dir`, it's deserialized and
23 |     returned. The filename part of the URL should follow the naming convention
24 |     ``filename-<sha256>.ext`` where ``<sha256>`` is the first eight or more
25 |     digits of the SHA256 hash of the contents of the file. The hash is used to
26 |     ensure unique names and to verify the contents of the file.
27 |     The default value of `model_dir` is ``$TORCH_HOME/models`` where
28 |     ``$TORCH_HOME`` defaults to ``~/.torch``. The default directory can be
29 |     overridden with the ``$TORCH_MODEL_ZOO`` environment variable.
30 |     Args:
31 |         url (string): URL of the object to download
32 |         model_dir (string, optional): directory in which to save the object
33 |         progress (bool, optional): whether or not to display a progress bar to stderr
34 |     Example:
35 |         >>> cached_file = maskrcnn_benchmark.utils.model_zoo.cache_url('https://s3.amazonaws.com/pytorch/models/resnet18-5c106cde.pth')
36 |     """
37 |     if model_dir is None:
38 |         torch_home = os.path.expanduser(os.getenv("TORCH_HOME", "~/.torch"))
39 |         model_dir = os.getenv("TORCH_MODEL_ZOO", os.path.join(torch_home, "models"))
40 |     if not os.path.exists(model_dir):
41 |         os.makedirs(model_dir)
42 |     parts = urlparse(url)
43 |     filename = os.path.basename(parts.path)
44 |     if filename == "model_final.pkl":
45 |         # workaround as pre-trained Caffe2 models from Detectron have all the same filename
46 |         # so make the full path the filename by replacing / with _
47 |         filename = parts.path.replace("/", "_")
48 |     cached_file = os.path.join(model_dir, filename)
49 |     if not os.path.exists(cached_file) and is_main_process():
50 |         sys.stderr.write('Downloading: "{}" to {}\n'.format(url, cached_file))
51 |         hash_prefix = HASH_REGEX.search(filename)
52 |         if hash_prefix is not None:
53 |             hash_prefix = hash_prefix.group(1)
54 |             # workaround: Caffe2 models don't have a hash, but follow the R-50 convention,
55 |             # which matches the hash PyTorch uses. So we skip the hash matching
56 |             # if the hash_prefix is less than 6 characters
57 |             if len(hash_prefix) < 6:
58 |                 hash_prefix = None
59 |         _download_url_to_file(url, cached_file, hash_prefix, progress=progress)
60 |     synchronize()
61 |     return cached_file
62 | 


--------------------------------------------------------------------------------
/sg-benchmark/maskrcnn_benchmark/utils/registry.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | 
 3 | 
 4 | def _register_generic(module_dict, module_name, module):
 5 |     assert module_name not in module_dict
 6 |     module_dict[module_name] = module
 7 | 
 8 | 
 9 | class Registry(dict):
10 |     '''
11 |     A helper class for managing registering modules, it extends a dictionary
12 |     and provides a register functions.
13 | 
14 |     Eg. creeting a registry:
15 |         some_registry = Registry({"default": default_module})
16 | 
17 |     There're two ways of registering new modules:
18 |     1): normal way is just calling register function:
19 |         def foo():
20 |             ...
21 |         some_registry.register("foo_module", foo)
22 |     2): used as decorator when declaring the module:
23 |         @some_registry.register("foo_module")
24 |         @some_registry.register("foo_modeul_nickname")
25 |         def foo():
26 |             ...
27 | 
28 |     Access of module is just like using a dictionary, eg:
29 |         f = some_registry["foo_modeul"]
30 |     '''
31 |     def __init__(self, *args, **kwargs):
32 |         super(Registry, self).__init__(*args, **kwargs)
33 | 
34 |     def register(self, module_name, module=None):
35 |         # used as function call
36 |         if module is not None:
37 |             _register_generic(self, module_name, module)
38 |             return
39 | 
40 |         # used as decorator
41 |         def register_fn(fn):
42 |             _register_generic(self, module_name, fn)
43 |             return fn
44 | 
45 |         return register_fn
46 | 


--------------------------------------------------------------------------------
/sg-benchmark/maskrcnn_benchmark/utils/timer.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | 
 3 | 
 4 | import time
 5 | import datetime
 6 | 
 7 | 
 8 | class Timer(object):
 9 |     def __init__(self):
10 |         self.reset()
11 | 
12 |     @property
13 |     def average_time(self):
14 |         return self.total_time / self.calls if self.calls > 0 else 0.0
15 | 
16 |     def tic(self):
17 |         # using time.time instead of time.clock because time time.clock
18 |         # does not normalize for multithreading
19 |         self.start_time = time.time()
20 | 
21 |     def toc(self, average=True):
22 |         self.add(time.time() - self.start_time)
23 |         if average:
24 |             return self.average_time
25 |         else:
26 |             return self.diff
27 | 
28 |     def add(self, time_diff):
29 |         self.diff = time_diff
30 |         self.total_time += self.diff
31 |         self.calls += 1
32 | 
33 |     def reset(self):
34 |         self.total_time = 0.0
35 |         self.calls = 0
36 |         self.start_time = 0.0
37 |         self.diff = 0.0
38 | 
39 |     def avg_time_str(self):
40 |         time_str = str(datetime.timedelta(seconds=self.average_time))
41 |         return time_str
42 | 
43 | 
44 | def get_time_str(time_diff):
45 |     time_str = str(datetime.timedelta(seconds=time_diff))
46 |     return time_str
47 | 


--------------------------------------------------------------------------------
/sg-benchmark/requirements.txt:
--------------------------------------------------------------------------------
1 | ninja
2 | yacs
3 | cython
4 | matplotlib
5 | tqdm
6 | 


--------------------------------------------------------------------------------
/sg-benchmark/setup.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #!/usr/bin/env python
 3 | 
 4 | import glob
 5 | import os
 6 | 
 7 | import torch
 8 | from setuptools import find_packages
 9 | from setuptools import setup
10 | from torch.utils.cpp_extension import CUDA_HOME
11 | from torch.utils.cpp_extension import CppExtension
12 | from torch.utils.cpp_extension import CUDAExtension
13 | 
14 | requirements = ["torch", "torchvision"]
15 | 
16 | 
17 | def get_extensions():
18 |     this_dir = os.path.dirname(os.path.abspath(__file__))
19 |     extensions_dir = os.path.join(this_dir, "maskrcnn_benchmark", "csrc")
20 | 
21 |     main_file = glob.glob(os.path.join(extensions_dir, "*.cpp"))
22 |     source_cpu = glob.glob(os.path.join(extensions_dir, "cpu", "*.cpp"))
23 |     source_cuda = glob.glob(os.path.join(extensions_dir, "cuda", "*.cu"))
24 | 
25 |     sources = main_file + source_cpu
26 |     extension = CppExtension
27 | 
28 |     extra_compile_args = {"cxx": []}
29 |     define_macros = []
30 | 
31 |     if (torch.cuda.is_available() and CUDA_HOME is not None) or os.getenv("FORCE_CUDA", "0") == "1":
32 |         extension = CUDAExtension
33 |         sources += source_cuda
34 |         define_macros += [("WITH_CUDA", None)]
35 |         extra_compile_args["nvcc"] = [
36 |             "-DCUDA_HAS_FP16=1",
37 |             "-D__CUDA_NO_HALF_OPERATORS__",
38 |             "-D__CUDA_NO_HALF_CONVERSIONS__",
39 |             "-D__CUDA_NO_HALF2_OPERATORS__",
40 |         ]
41 | 
42 |     sources = [os.path.join(extensions_dir, s) for s in sources]
43 | 
44 |     include_dirs = [extensions_dir]
45 | 
46 |     ext_modules = [
47 |         extension(
48 |             "maskrcnn_benchmark._C",
49 |             sources,
50 |             include_dirs=include_dirs,
51 |             define_macros=define_macros,
52 |             extra_compile_args=extra_compile_args,
53 |         )
54 |     ]
55 | 
56 |     return ext_modules
57 | 
58 | 
59 | setup(
60 |     name="maskrcnn_benchmark",
61 |     version="0.1",
62 |     author="fmassa",
63 |     url="https://github.com/facebookresearch/maskrcnn-benchmark",
64 |     description="object detection in pytorch",
65 |     packages=find_packages(exclude=("configs", "tests",)),
66 |     # install_requires=requirements,
67 |     ext_modules=get_extensions(),
68 |     cmdclass={"build_ext": torch.utils.cpp_extension.BuildExtension},
69 | )
70 | 


--------------------------------------------------------------------------------
/sg-benchmark/tools/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CYVincent/Scene-Graph-Transformer-CogTree/955d76403d6b17e4b35cb09b4669d8b3990a94ee/sg-benchmark/tools/__init__.py


--------------------------------------------------------------------------------
/sg-benchmark/tools/detector_pretest_net.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
  2 | # Set up custom environment before nearly anything else is imported
  3 | # NOTE: this should be the first import (no not reorder)
  4 | from maskrcnn_benchmark.utils.env import setup_environment  # noqa F401 isort:skip
  5 | 
  6 | import argparse
  7 | import os
  8 | 
  9 | import torch
 10 | from maskrcnn_benchmark.config import cfg
 11 | from maskrcnn_benchmark.data import make_data_loader
 12 | from maskrcnn_benchmark.engine.inference import inference
 13 | from maskrcnn_benchmark.modeling.detector import build_detection_model
 14 | from maskrcnn_benchmark.utils.checkpoint import DetectronCheckpointer
 15 | from maskrcnn_benchmark.utils.collect_env import collect_env_info
 16 | from maskrcnn_benchmark.utils.comm import synchronize, get_rank
 17 | from maskrcnn_benchmark.utils.logger import setup_logger
 18 | from maskrcnn_benchmark.utils.miscellaneous import mkdir
 19 | 
 20 | # Check if we can enable mixed-precision via apex.amp
 21 | try:
 22 |     from apex import amp
 23 | except ImportError:
 24 |     raise ImportError('Use APEX for mixed precision via apex.amp')
 25 | 
 26 | 
 27 | def main():
 28 |     parser = argparse.ArgumentParser(description="PyTorch Object Detection Inference")
 29 |     parser.add_argument(
 30 |         "--config-file",
 31 |         default="/private/home/fmassa/github/detectron.pytorch_v2/configs/e2e_faster_rcnn_R_50_C4_1x_caffe2.yaml",
 32 |         metavar="FILE",
 33 |         help="path to config file",
 34 |     )
 35 |     parser.add_argument("--local_rank", type=int, default=0)
 36 |     parser.add_argument(
 37 |         "opts",
 38 |         help="Modify config options using the command-line",
 39 |         default=None,
 40 |         nargs=argparse.REMAINDER,
 41 |     )
 42 | 
 43 |     args = parser.parse_args()
 44 | 
 45 |     num_gpus = int(os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1
 46 |     distributed = num_gpus > 1
 47 | 
 48 |     if distributed:
 49 |         torch.cuda.set_device(args.local_rank)
 50 |         torch.distributed.init_process_group(
 51 |             backend="nccl", init_method="env://"
 52 |         )
 53 |         synchronize()
 54 | 
 55 |     cfg.merge_from_file(args.config_file)
 56 |     cfg.merge_from_list(args.opts)
 57 |     cfg.freeze()
 58 | 
 59 |     save_dir = ""
 60 |     logger = setup_logger("maskrcnn_benchmark", save_dir, get_rank())
 61 |     logger.info("Using {} GPUs".format(num_gpus))
 62 |     logger.info(cfg)
 63 | 
 64 |     logger.info("Collecting env info (might take some time)")
 65 |     logger.info("\n" + collect_env_info())
 66 | 
 67 |     model = build_detection_model(cfg)
 68 |     model.to(cfg.MODEL.DEVICE)
 69 | 
 70 |     # Initialize mixed-precision if necessary
 71 |     use_mixed_precision = cfg.DTYPE == 'float16'
 72 |     amp_handle = amp.init(enabled=use_mixed_precision, verbose=cfg.AMP_VERBOSE)
 73 | 
 74 |     output_dir = cfg.OUTPUT_DIR
 75 |     checkpointer = DetectronCheckpointer(cfg, model, save_dir=output_dir)
 76 |     _ = checkpointer.load(cfg.MODEL.WEIGHT)
 77 | 
 78 |     iou_types = ("bbox",)
 79 |     if cfg.MODEL.MASK_ON:
 80 |         iou_types = iou_types + ("segm",)
 81 |     if cfg.MODEL.KEYPOINT_ON:
 82 |         iou_types = iou_types + ("keypoints",)
 83 |     if cfg.MODEL.RELATION_ON:
 84 |         iou_types = iou_types + ("relations", )
 85 |     if cfg.MODEL.ATTRIBUTE_ON:
 86 |         iou_types = iou_types + ("attributes", )
 87 |         
 88 |     output_folders = [None] * len(cfg.DATASETS.TEST)
 89 |     dataset_names = cfg.DATASETS.TEST
 90 |     if cfg.OUTPUT_DIR:
 91 |         for idx, dataset_name in enumerate(dataset_names):
 92 |             output_folder = os.path.join(cfg.OUTPUT_DIR, "inference", dataset_name)
 93 |             mkdir(output_folder)
 94 |             output_folders[idx] = output_folder
 95 |     data_loaders_val = make_data_loader(cfg, mode='val', is_distributed=distributed) # mode=val for fast visualization
 96 |     for output_folder, dataset_name, data_loader_val in zip(output_folders, dataset_names, data_loaders_val):
 97 |         inference(
 98 |             cfg,
 99 |             model,
100 |             data_loader_val,
101 |             dataset_name=dataset_name,
102 |             iou_types=iou_types,
103 |             box_only=False if cfg.MODEL.RETINANET_ON else cfg.MODEL.RPN_ONLY,
104 |             device=cfg.MODEL.DEVICE,
105 |             expected_results=cfg.TEST.EXPECTED_RESULTS,
106 |             expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL,
107 |             output_folder=output_folder,
108 |         )
109 |         synchronize()
110 | 
111 | 
112 | if __name__ == "__main__":
113 |     main()
114 | 


--------------------------------------------------------------------------------
/sg-benchmark/tools/relation_test_net.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
  2 | # Set up custom environment before nearly anything else is imported
  3 | # NOTE: this should be the first import (no not reorder)
  4 | from maskrcnn_benchmark.utils.env import setup_environment  # noqa F401 isort:skip
  5 | 
  6 | import argparse
  7 | import os
  8 | 
  9 | import torch
 10 | from maskrcnn_benchmark.config import cfg
 11 | from maskrcnn_benchmark.data import make_data_loader
 12 | from maskrcnn_benchmark.engine.inference import inference
 13 | from maskrcnn_benchmark.modeling.detector import build_detection_model
 14 | from maskrcnn_benchmark.utils.checkpoint import DetectronCheckpointer
 15 | from maskrcnn_benchmark.utils.collect_env import collect_env_info
 16 | from maskrcnn_benchmark.utils.comm import synchronize, get_rank
 17 | from maskrcnn_benchmark.utils.logger import setup_logger
 18 | from maskrcnn_benchmark.utils.miscellaneous import mkdir
 19 | 
 20 | # Check if we can enable mixed-precision via apex.amp
 21 | try:
 22 |     from apex import amp
 23 | except ImportError:
 24 |     raise ImportError('Use APEX for mixed precision via apex.amp')
 25 | 
 26 | 
 27 | def main():
 28 |     parser = argparse.ArgumentParser(description="PyTorch Object Detection Inference")
 29 |     parser.add_argument(
 30 |         "--config-file",
 31 |         default="/private/home/fmassa/github/detectron.pytorch_v2/configs/e2e_faster_rcnn_R_50_C4_1x_caffe2.yaml",
 32 |         metavar="FILE",
 33 |         help="path to config file",
 34 |     )
 35 |     parser.add_argument("--local_rank", type=int, default=0)
 36 |     parser.add_argument(
 37 |         "opts",
 38 |         help="Modify config options using the command-line",
 39 |         default=None,
 40 |         nargs=argparse.REMAINDER,
 41 |     )
 42 | 
 43 |     args = parser.parse_args()
 44 | 
 45 |     num_gpus = int(os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1
 46 |     distributed = num_gpus > 1
 47 | 
 48 |     if distributed:
 49 |         torch.cuda.set_device(args.local_rank)
 50 |         torch.distributed.init_process_group(
 51 |             backend="nccl", init_method="env://"
 52 |         )
 53 |         synchronize()
 54 | 
 55 |     cfg.merge_from_file(args.config_file)
 56 |     cfg.merge_from_list(args.opts)
 57 |     cfg.freeze()
 58 | 
 59 |     save_dir = ""
 60 |     logger = setup_logger("maskrcnn_benchmark", save_dir, get_rank())
 61 |     logger.info("Using {} GPUs".format(num_gpus))
 62 |     logger.info(cfg)
 63 | 
 64 |     logger.info("Collecting env info (might take some time)")
 65 |     logger.info("\n" + collect_env_info())
 66 | 
 67 |     model = build_detection_model(cfg)
 68 |     model.to(cfg.MODEL.DEVICE)
 69 | 
 70 |     # Initialize mixed-precision if necessary
 71 |     use_mixed_precision = cfg.DTYPE == 'float16'
 72 |     amp_handle = amp.init(enabled=use_mixed_precision, verbose=cfg.AMP_VERBOSE)
 73 | 
 74 |     output_dir = cfg.OUTPUT_DIR
 75 |     checkpointer = DetectronCheckpointer(cfg, model, save_dir=output_dir)
 76 |     _ = checkpointer.load(cfg.MODEL.WEIGHT)
 77 | 
 78 |     iou_types = ("bbox",)
 79 |     if cfg.MODEL.MASK_ON:
 80 |         iou_types = iou_types + ("segm",)
 81 |     if cfg.MODEL.KEYPOINT_ON:
 82 |         iou_types = iou_types + ("keypoints",)
 83 |     if cfg.MODEL.RELATION_ON:
 84 |         iou_types = iou_types + ("relations", )
 85 |     if cfg.MODEL.ATTRIBUTE_ON:
 86 |         iou_types = iou_types + ("attributes", )
 87 |     output_folders = [None] * len(cfg.DATASETS.TEST)
 88 |     dataset_names = cfg.DATASETS.TEST
 89 |     if cfg.OUTPUT_DIR:
 90 |         for idx, dataset_name in enumerate(dataset_names):
 91 |             output_folder = os.path.join(cfg.OUTPUT_DIR, "inference", dataset_name)
 92 |             mkdir(output_folder)
 93 |             output_folders[idx] = output_folder
 94 |     data_loaders_val = make_data_loader(cfg, mode="test", is_distributed=distributed)
 95 |     for output_folder, dataset_name, data_loader_val in zip(output_folders, dataset_names, data_loaders_val):
 96 |         inference(
 97 |             cfg,
 98 |             model,
 99 |             data_loader_val,
100 |             dataset_name=dataset_name,
101 |             iou_types=iou_types,
102 |             box_only=False if cfg.MODEL.RETINANET_ON else cfg.MODEL.RPN_ONLY,
103 |             device=cfg.MODEL.DEVICE,
104 |             expected_results=cfg.TEST.EXPECTED_RESULTS,
105 |             expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL,
106 |             output_folder=output_folder,
107 |         )
108 |         synchronize()
109 | 
110 | 
111 | if __name__ == "__main__":
112 |     main()
113 | 


--------------------------------------------------------------------------------