├── maskrcnn_benchmark
    ├── utils
    │   ├── __init__.py
    │   ├── README.md
    │   ├── collect_env.py
    │   ├── miscellaneous.py
    │   ├── amp.py
    │   ├── cv2_util.py
    │   ├── logger.py
    │   ├── imports.py
    │   ├── env.py
    │   ├── registry.py
    │   ├── ema.py
    │   ├── pretrain_model_loading.py
    │   ├── shallow_contrastive_loss_helper.py
    │   ├── model_zoo.py
    │   └── big_model_loading.py
    ├── modeling
    │   ├── __init__.py
    │   ├── roi_heads
    │   │   ├── box_head
    │   │   │   ├── __init__.py
    │   │   │   ├── roi_box_predictors.py
    │   │   │   └── box_head.py
    │   │   ├── mask_head
    │   │   │   ├── __init__.py
    │   │   │   ├── hourglass.py
    │   │   │   └── mask_head.py
    │   │   ├── keypoint_head
    │   │   │   ├── roi_keypoint_predictors.py
    │   │   │   ├── keypoint_head.py
    │   │   │   └── roi_keypoint_feature_extractors.py
    │   │   └── __init__.py
    │   ├── .DS_Store
    │   ├── language_backbone
    │   │   ├── bpe_simple_vocab_16e6.txt.gz
    │   │   ├── __init__.py
    │   │   ├── test_clip_tokenizer.py
    │   │   ├── build.py
    │   │   ├── backbone.py
    │   │   ├── bert_model.py
    │   │   ├── word_utils.py
    │   │   └── hfpt_tokenizer.py
    │   ├── registry.py
    │   ├── detector
    │   │   └── __init__.py
    │   ├── rpn
    │   │   ├── __init__.py
    │   │   └── transformer.py
    │   ├── backbone
    │   │   ├── mixer.py
    │   │   └── ops.py
    │   ├── balanced_positive_negative_sampler.py
    │   ├── utils.py
    │   └── box_coder.py
    ├── structures
    │   ├── __init__.py
    │   └── image_list.py
    ├── data
    │   ├── datasets
    │   │   ├── evaluation
    │   │   │   ├── od_eval.py
    │   │   │   ├── flickr
    │   │   │   │   └── __init__.py
    │   │   │   ├── lvis
    │   │   │   │   └── _change_lvis_annotation.py
    │   │   │   ├── vg
    │   │   │   │   └── __init__.py
    │   │   │   ├── voc
    │   │   │   │   └── __init__.py
    │   │   │   ├── coco
    │   │   │   │   └── __init__.py
    │   │   │   ├── od_to_grounding
    │   │   │   │   └── __init__.py
    │   │   │   └── __init__.py
    │   │   ├── flickr.py
    │   │   ├── phrasecut.py
    │   │   ├── object365.py
    │   │   ├── concat_dataset.py
    │   │   ├── duplicate_dataset.py
    │   │   ├── __init__.py
    │   │   ├── list_dataset.py
    │   │   ├── background.py
    │   │   ├── imagenet.py
    │   │   ├── refexp.py
    │   │   └── gqa.py
    │   ├── __init__.py
    │   ├── transforms
    │   │   ├── __init__.py
    │   │   └── build.py
    │   ├── samplers
    │   │   ├── __init__.py
    │   │   ├── iteration_based_batch_sampler.py
    │   │   └── distributed.py
    │   └── collate_batch.py
    ├── __init__.py
    ├── engine
    │   └── __init__.py
    ├── config
    │   └── __init__.py
    ├── solver
    │   └── __init__.py
    ├── layers
    │   ├── nms.py
    │   ├── smooth_l1_loss.py
    │   ├── evonorm.py
    │   ├── __init__.py
    │   ├── se.py
    │   ├── roi_pool.py
    │   ├── iou_loss.py
    │   └── roi_align.py
    └── csrc
    │   ├── ml_nms.h
    │   ├── cpu
    │       ├── vision.h
    │       └── nms_cpu.cpp
    │   ├── SigmoidFocalLoss.h
    │   ├── nms.h
    │   ├── vision.cpp
    │   ├── ROIPool.h
    │   ├── ROIAlign.h
    │   ├── deform_pool.h
    │   └── cuda
    │       └── deform_pool_cuda.cu
├── docs
    ├── lead.png
    ├── word_cloud_od.png
    └── benchmark_example_od.png
├── configs
    ├── pretrain
    │   ├── _coco.yaml
    │   ├── glip_Swin_T_O365.yaml
    │   ├── glip_A_Swin_T_O365.yaml
    │   ├── glip_Swin_T_O365_GoldG.yaml
    │   └── glip_Swin_L.yaml
    ├── flickr
    │   ├── test.yaml
    │   └── val.yaml
    ├── lvis
    │   ├── val.yaml
    │   └── minival.yaml
    ├── odinw_35
    │   ├── _all.json
    │   ├── pothole.yaml
    │   ├── pistols_export.yaml
    │   ├── WildfireSmoke.yaml
    │   ├── Packages_Raw.yaml
    │   ├── ThermalCheetah.yaml
    │   ├── MaskWearing_raw.yaml
    │   └── CottontailRabbits.yaml
    └── odinw_13
    │   ├── pothole.yaml
    │   └── pistols_export.yaml
├── CODE_OF_CONDUCT.md
├── .gitignore
├── LICENSE
├── SUPPORT.md
├── odinw
    └── download.py
├── setup.py
├── SECURITY.md
└── tools
    └── cityscapes
        └── instances2dict_with_polygons.py


/maskrcnn_benchmark/utils/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/structures/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/data/datasets/evaluation/od_eval.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/roi_heads/box_head/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/roi_heads/mask_head/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/docs/lead.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/GLIP/HEAD/docs/lead.png


--------------------------------------------------------------------------------
/docs/word_cloud_od.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/GLIP/HEAD/docs/word_cloud_od.png


--------------------------------------------------------------------------------
/configs/pretrain/_coco.yaml:
--------------------------------------------------------------------------------
1 | DATASETS:
2 |   TRAIN: ("coco_2017_train",)
3 |   TEST: ("coco_2017_val", )


--------------------------------------------------------------------------------
/docs/benchmark_example_od.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/GLIP/HEAD/docs/benchmark_example_od.png


--------------------------------------------------------------------------------
/maskrcnn_benchmark/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/data/datasets/evaluation/flickr/__init__.py:
--------------------------------------------------------------------------------
1 | from .flickr_eval import FlickrEvaluator
2 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/engine/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/GLIP/HEAD/maskrcnn_benchmark/modeling/.DS_Store


--------------------------------------------------------------------------------
/maskrcnn_benchmark/data/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | from .build import make_data_loader
3 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/config/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | from .defaults import _C as cfg
3 | from .paths_catalog import try_to_find


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/language_backbone/bpe_simple_vocab_16e6.txt.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/microsoft/GLIP/HEAD/maskrcnn_benchmark/modeling/language_backbone/bpe_simple_vocab_16e6.txt.gz


--------------------------------------------------------------------------------
/maskrcnn_benchmark/utils/README.md:
--------------------------------------------------------------------------------
1 | # Utility functions
2 | 
3 | This folder contain utility functions that are not used in the
4 | core library, but are useful for building models or training
5 | code using the config system.
6 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/solver/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | from .build import make_optimizer
3 | from .build import make_lr_scheduler
4 | from .lr_scheduler import WarmupMultiStepLR
5 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/data/datasets/flickr.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torchvision
3 | import torch.utils.data as data
4 | from maskrcnn_benchmark.data.datasets.modulated_coco import ModulatedDataset
5 | 
6 | 
7 | class FlickrDataset(ModulatedDataset):
8 |     pass
9 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/data/datasets/phrasecut.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torchvision
3 | import torch.utils.data as data
4 | from maskrcnn_benchmark.data.datasets.modulated_coco import ModulatedDataset
5 | 
6 | 
7 | class PhrasecutDetection(ModulatedDataset):
8 |     pass
9 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/data/datasets/object365.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torchvision
3 | import torch.utils.data as data
4 | from maskrcnn_benchmark.data.datasets.coco_dt import CocoDetectionTSV
5 | 
6 | 
7 | class Object365DetectionTSV(CocoDetectionTSV):
8 |     pass
9 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/language_backbone/__init__.py:
--------------------------------------------------------------------------------
1 | from .backbone import build_backbone as build_language_backbone
2 | from .build import build_tokenizer
3 | 
4 | from .hfpt_tokenizer import HFPTTokenizer
5 | from .simple_tokenizer import SimpleTokenizer
6 | from .clip_model import CLIPTransformer
7 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/registry.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | 
 3 | from maskrcnn_benchmark.utils.registry import Registry
 4 | 
 5 | BACKBONES = Registry()
 6 | 
 7 | LANGUAGE_BACKBONES = Registry()
 8 | 
 9 | ROI_BOX_FEATURE_EXTRACTORS = Registry()
10 | RPN_HEADS = Registry()
11 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/data/transforms/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | from .transforms import Compose
3 | from .transforms import Resize
4 | from .transforms import RandomHorizontalFlip
5 | from .transforms import ToTensor
6 | from .transforms import Normalize
7 | 
8 | from .build import build_transforms
9 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/data/samplers/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | from .distributed import DistributedSampler
3 | from .grouped_batch_sampler import GroupedBatchSampler
4 | from .iteration_based_batch_sampler import IterationBasedBatchSampler
5 | 
6 | __all__ = ["DistributedSampler", "GroupedBatchSampler", "IterationBasedBatchSampler"]
7 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/language_backbone/test_clip_tokenizer.py:
--------------------------------------------------------------------------------
1 | from maskrcnn_benchmark.modeling.language_backbone import build_tokenizer
2 | 
3 | if __name__ == '__main__':
4 | 
5 |     tokenizer2 = build_tokenizer("clip")
6 |     tokenized2 = tokenizer2(
7 |         ["Detectest : fishid. jellyfishioasod. penguinasd. puffin.asd shark. starfish. round stingray"])
8 |     print(tokenized2)
9 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/layers/nms.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | from maskrcnn_benchmark import _C
 3 | 
 4 | try:
 5 |     import torchvision
 6 |     from torchvision.ops import nms
 7 | except:
 8 |     nms = _C.nms
 9 | 
10 | ml_nms = _C.ml_nms
11 | soft_nms = _C.soft_nms
12 | 
13 | # nms.__doc__ = """
14 | # This function performs Non-maximum suppresion"""
15 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/data/datasets/evaluation/lvis/_change_lvis_annotation.py:
--------------------------------------------------------------------------------
 1 | path = "DATASET/coco/annotations/lvis_v1_minival.json"
 2 | import json
 3 | with open(path) as f:
 4 |     all = json.load(f)
 5 | 
 6 | for i in all["images"]:
 7 |     i["file_name"] = "/".join(i["coco_url"].split("/")[-2:])
 8 | 
 9 | with open("DATASET/coco/annotations/lvis_v1_minival_inserted_image_name.json", "w") as f:
10 |     json.dump(all, f)


--------------------------------------------------------------------------------
/maskrcnn_benchmark/utils/collect_env.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import PIL
 3 | 
 4 | from torch.utils.collect_env import get_pretty_env_info
 5 | 
 6 | 
 7 | def get_pil_version():
 8 |     return "\n        Pillow ({})".format(PIL.__version__)
 9 | 
10 | 
11 | def collect_env_info():
12 |     env_str = get_pretty_env_info()
13 |     env_str += get_pil_version()
14 |     return env_str
15 | 


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
 1 | # Microsoft Open Source Code of Conduct
 2 | 
 3 | This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/).
 4 | 
 5 | Resources:
 6 | 
 7 | - [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/)
 8 | - [Microsoft Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/)
 9 | - Contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with questions or concerns
10 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/detector/__init__.py:
--------------------------------------------------------------------------------
 1 | from .generalized_rcnn import GeneralizedRCNN
 2 | from .generalized_vl_rcnn import GeneralizedVLRCNN
 3 | 
 4 | _DETECTION_META_ARCHITECTURES = {"GeneralizedRCNN": GeneralizedRCNN,
 5 |                                  "GeneralizedVLRCNN": GeneralizedVLRCNN
 6 |                                  }
 7 | 
 8 | 
 9 | def build_detection_model(cfg):
10 |     meta_arch = _DETECTION_META_ARCHITECTURES[cfg.MODEL.META_ARCHITECTURE]
11 |     return meta_arch(cfg)
12 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/utils/miscellaneous.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import errno
 3 | import os
 4 | from .comm import is_main_process
 5 | 
 6 | def mkdir(path):
 7 |     try:
 8 |         os.makedirs(path)
 9 |     except OSError as e:
10 |         if e.errno != errno.EEXIST:
11 |             raise
12 | 
13 | 
14 | def save_config(cfg, path):
15 |     if is_main_process():
16 |         with open(path, 'w') as f:
17 |             f.write(cfg.dump())
18 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/utils/amp.py:
--------------------------------------------------------------------------------
 1 | from contextlib import contextmanager
 2 | 
 3 | @contextmanager
 4 | def nullcontext(enter_result=None, **kwargs):
 5 |     yield enter_result
 6 | 
 7 | try:
 8 |     from torch.cuda.amp import autocast, GradScaler, custom_fwd, custom_bwd
 9 | except:
10 |     print('[Warning] Library for automatic mixed precision is not found, AMP is disabled!!')
11 |     GradScaler = nullcontext
12 |     autocast = nullcontext
13 |     custom_fwd = nullcontext
14 |     custom_bwd = nullcontext


--------------------------------------------------------------------------------
/configs/flickr/test.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   ATSS:
 3 |     NUM_CLASSES: 8 # Placeholder
 4 |   FCOS:
 5 |     NUM_CLASSES: 8 # Placeholder
 6 |   ROI_BOX_HEAD:
 7 |     NUM_CLASSES: 8 # Placeholder
 8 |   DYHEAD:
 9 |     NUM_CLASSES: 8 # Placeholder
10 | DATASETS:
11 |   TRAIN: ("flickr30k_test", )
12 |   TEST: ("flickr30k_test", )
13 |   FLICKR_GT_TYPE: "separate"
14 | 
15 | INPUT:
16 |   MIN_SIZE_TRAIN: 800
17 |   MAX_SIZE_TRAIN: 1333
18 |   MIN_SIZE_TEST: 800
19 |   MAX_SIZE_TEST: 1333
20 | DATALOADER:
21 |   SIZE_DIVISIBILITY: 32
22 |   ASPECT_RATIO_GROUPING: False


--------------------------------------------------------------------------------
/maskrcnn_benchmark/layers/smooth_l1_loss.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import torch
 3 | 
 4 | 
 5 | # TODO maybe push this to nn?
 6 | def smooth_l1_loss(input, target, beta=1. / 9, size_average=True):
 7 |     """
 8 |     very similar to the smooth_l1_loss from pytorch, but with
 9 |     the extra beta parameter
10 |     """
11 |     n = torch.abs(input - target)
12 |     cond = n < beta
13 |     loss = torch.where(cond, 0.5 * n ** 2 / beta, n - 0.5 * beta)
14 |     if size_average:
15 |         return loss.mean()
16 |     return loss.sum()
17 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/data/datasets/evaluation/vg/__init__.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | from .vg_eval import do_vg_evaluation
 4 | 
 5 | 
 6 | def vg_evaluation(dataset, predictions, output_folder, box_only, eval_attributes=False, **_):
 7 |     logger = logging.getLogger("maskrcnn_benchmark.inference")
 8 |     logger.info("performing vg evaluation, ignored iou_types.")
 9 |     return do_vg_evaluation(
10 |         dataset=dataset,
11 |         predictions=predictions,
12 |         output_folder=output_folder,
13 |         box_only=box_only,
14 |         eval_attributes=eval_attributes,
15 |         logger=logger,
16 |     )
17 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.egg-info
 2 | *.pyc
 3 | build/
 4 | DATASET/
 5 | OUTPUT/
 6 | MODEL/
 7 | 
 8 | # compilation and distribution
 9 | __pycache__
10 | _ext
11 | *.so
12 | maskrcnn_benchmark.egg-info/
13 | dist/
14 | 
15 | # pytorch/python/numpy formats
16 | *.pth
17 | *.pkl
18 | *.npy
19 | 
20 | # ipython/jupyter notebooks
21 | *.ipynb
22 | **/.ipynb_checkpoints/
23 | 
24 | # Editor temporaries
25 | *.swn
26 | *.swo
27 | *.swp
28 | *~
29 | 
30 | # Pycharm editor settings
31 | .idea
32 | 
33 | # vscode editor settings
34 | .vscode
35 | 
36 | # MacOS
37 | .DS_Store
38 | 
39 | # Custom
40 | *.custom.py
41 | 
42 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/data/datasets/evaluation/voc/__init__.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | from .voc_eval import do_voc_evaluation
 4 | 
 5 | 
 6 | def voc_evaluation(dataset, predictions, output_folder, box_only, **_):
 7 |     logger = logging.getLogger("maskrcnn_benchmark.inference")
 8 |     if box_only:
 9 |         logger.warning("voc evaluation doesn't support box_only, ignored.")
10 |     logger.info("performing voc evaluation, ignored iou_types.")
11 |     return do_voc_evaluation(
12 |         dataset=dataset,
13 |         predictions=predictions,
14 |         output_folder=output_folder,
15 |         logger=logger,
16 |     )
17 | 


--------------------------------------------------------------------------------
/configs/flickr/val.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   ATSS:
 3 |     NUM_CLASSES: 8 # Placeholder
 4 |   FCOS:
 5 |     NUM_CLASSES: 8 # Placeholder
 6 |   ROI_BOX_HEAD:
 7 |     NUM_CLASSES: 8 # Placeholder
 8 |   DYHEAD:
 9 |     NUM_CLASSES: 8 # Placeholder
10 | DATASETS:
11 |   TRAIN: ("flickr30k_val", )
12 |   TEST: ("flickr30k_val", )
13 |   FLICKR_GT_TYPE: "separate"
14 | 
15 | INPUT:
16 |   MIN_SIZE_TRAIN: 800
17 |   MAX_SIZE_TRAIN: 1333
18 |   MIN_SIZE_TEST: 800
19 |   MAX_SIZE_TEST: 1333
20 | DATALOADER:
21 |   SIZE_DIVISIBILITY: 32
22 |   ASPECT_RATIO_GROUPING: False
23 | SOLVER:
24 |   WARMUP_ITERS: 0
25 |   MAX_EPOCH: 12
26 |   CHECKPOINT_PERIOD: 100
27 | TEST:
28 |   IMS_PER_BATCH: 8


--------------------------------------------------------------------------------
/maskrcnn_benchmark/data/datasets/evaluation/coco/__init__.py:
--------------------------------------------------------------------------------
 1 | from .coco_eval import do_coco_evaluation
 2 | 
 3 | 
 4 | def coco_evaluation(
 5 |     dataset,
 6 |     predictions,
 7 |     output_folder,
 8 |     box_only=False,
 9 |     iou_types=("bbox",),
10 |     expected_results=(),
11 |     expected_results_sigma_tol=4,
12 | ):
13 |     return do_coco_evaluation(
14 |         dataset=dataset,
15 |         predictions=predictions,
16 |         box_only=box_only,
17 |         output_folder=output_folder,
18 |         iou_types=iou_types,
19 |         expected_results=expected_results,
20 |         expected_results_sigma_tol=expected_results_sigma_tol,
21 |     )
22 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/language_backbone/build.py:
--------------------------------------------------------------------------------
 1 | from .simple_tokenizer import SimpleTokenizer
 2 | 
 3 | 
 4 | def build_tokenizer(tokenizer_name):
 5 |     tokenizer = None
 6 |     if tokenizer_name == 'clip':
 7 |         tokenizer = SimpleTokenizer()
 8 |     elif 'hf_' in tokenizer_name:
 9 |         from .hfpt_tokenizer import HFPTTokenizer
10 | 
11 |         tokenizer = HFPTTokenizer(pt_name=tokenizer_name[3:])
12 |     elif 'hfc_' in tokenizer_name:
13 |         from .hfpt_tokenizer import HFPTTokenizer
14 |         tokenizer = HFPTTokenizer(pt_name=tokenizer_name[4:])
15 |     else:
16 |         raise ValueError('Unknown tokenizer')
17 | 
18 |     return tokenizer
19 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/data/datasets/evaluation/od_to_grounding/__init__.py:
--------------------------------------------------------------------------------
 1 | from .od_eval import do_od_evaluation
 2 | 
 3 | 
 4 | def od_to_grounding_evaluation(
 5 |         dataset,
 6 |         predictions,
 7 |         output_folder,
 8 |         box_only=False,
 9 |         iou_types=("bbox",),
10 |         expected_results=(),
11 |         expected_results_sigma_tol=4, ):
12 |     return do_od_evaluation(
13 |         dataset=dataset,
14 |         predictions=predictions,
15 |         box_only=box_only,
16 |         output_folder=output_folder,
17 |         iou_types=iou_types,
18 |         expected_results=expected_results,
19 |         expected_results_sigma_tol=expected_results_sigma_tol,
20 |     )
21 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/utils/cv2_util.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Module for cv2 utility functions and maintaining version compatibility
 3 | between 3.x and 4.x
 4 | """
 5 | import cv2
 6 | 
 7 | 
 8 | def findContours(*args, **kwargs):
 9 |     """
10 |     Wraps cv2.findContours to maintain compatiblity between versions
11 |     3 and 4
12 | 
13 |     Returns:
14 |         contours, hierarchy
15 |     """
16 |     if cv2.__version__.startswith('4'):
17 |         contours, hierarchy = cv2.findContours(*args, **kwargs)
18 |     elif cv2.__version__.startswith('3'):
19 |         _, contours, hierarchy = cv2.findContours(*args, **kwargs)
20 |     else:
21 |         raise AssertionError(
22 |             'cv2 must be either version 3 or 4 to call this method')
23 | 
24 |     return contours, hierarchy
25 | 


--------------------------------------------------------------------------------
/configs/lvis/val.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   ATSS:
 3 |     NUM_CLASSES: 8 # these fields are not used; just a placeholder
 4 |   FCOS:
 5 |     NUM_CLASSES: 8
 6 |   ROI_BOX_HEAD:
 7 |     NUM_CLASSES: 8
 8 |   DYHEAD:
 9 |     NUM_CLASSES: 8
10 | DATASETS:
11 |   REGISTER:
12 |     lvis_evaluation_mini_val:
13 |       img_dir: "coco"
14 |       ann_file: "coco/annotations/lvis_v1_minival_inserted_image_name.json"
15 |     lvis_evaluation_val:
16 |       img_dir: "coco"
17 |       ann_file: "coco/annotations/lvis_od_val.json"
18 |   TRAIN: ("lvis_evaluation_val",) 
19 |   TEST: ("lvis_evaluation_val",)
20 | 
21 | INPUT:
22 |   MIN_SIZE_TRAIN: 800
23 |   MAX_SIZE_TRAIN: 1333
24 |   MIN_SIZE_TEST: 800
25 |   MAX_SIZE_TEST: 1333
26 | DATALOADER:
27 |   SIZE_DIVISIBILITY: 32
28 |   ASPECT_RATIO_GROUPING: False
29 | TEST:
30 |   IMS_PER_BATCH: 8
31 | 


--------------------------------------------------------------------------------
/configs/lvis/minival.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   ATSS:
 3 |     NUM_CLASSES: 8 # these fields are not used; just a placeholder
 4 |   FCOS:
 5 |     NUM_CLASSES: 8
 6 |   ROI_BOX_HEAD:
 7 |     NUM_CLASSES: 8
 8 |   DYHEAD:
 9 |     NUM_CLASSES: 8
10 | DATASETS:
11 |   REGISTER:
12 |     lvis_evaluation_mini_val:
13 |       img_dir: "coco"
14 |       ann_file: "coco/annotations/lvis_v1_minival_inserted_image_name.json"
15 |     lvis_evaluation_val:
16 |       img_dir: "coco"
17 |       ann_file: "coco/annotations/lvis_od_val.json"
18 |   TRAIN: ("lvis_evaluation_mini_val",) 
19 |   TEST: ("lvis_evaluation_mini_val",)
20 | 
21 | INPUT:
22 |   MIN_SIZE_TRAIN: 800
23 |   MAX_SIZE_TRAIN: 1333
24 |   MIN_SIZE_TEST: 800
25 |   MAX_SIZE_TEST: 1333
26 | DATALOADER:
27 |   SIZE_DIVISIBILITY: 32
28 |   ASPECT_RATIO_GROUPING: False
29 | TEST:
30 |   IMS_PER_BATCH: 8
31 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/utils/logger.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import logging
 3 | import os
 4 | import sys
 5 | 
 6 | 
 7 | def setup_logger(name, save_dir, distributed_rank):
 8 |     logger = logging.getLogger(name)
 9 |     logger.setLevel(logging.DEBUG)
10 |     # don't log results for the non-master process
11 |     if distributed_rank > 0:
12 |         return logger
13 |     ch = logging.StreamHandler(stream=sys.stdout)
14 |     ch.setLevel(logging.DEBUG)
15 |     formatter = logging.Formatter("%(asctime)s %(name)s %(levelname)s: %(message)s")
16 |     ch.setFormatter(formatter)
17 |     logger.addHandler(ch)
18 | 
19 |     if save_dir:
20 |         fh = logging.FileHandler(os.path.join(save_dir, "log.txt"))
21 |         fh.setLevel(logging.DEBUG)
22 |         fh.setFormatter(formatter)
23 |         logger.addHandler(fh)
24 | 
25 |     return logger
26 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/data/datasets/concat_dataset.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import bisect
 3 | 
 4 | from torch.utils.data.dataset import ConcatDataset as _ConcatDataset
 5 | 
 6 | 
 7 | class ConcatDataset(_ConcatDataset):
 8 |     """
 9 |     Same as torch.utils.data.dataset.ConcatDataset, but exposes an extra
10 |     method for querying the sizes of the image
11 |     """
12 | 
13 |     def get_idxs(self, idx):
14 |         dataset_idx = bisect.bisect_right(self.cumulative_sizes, idx)
15 |         if dataset_idx == 0:
16 |             sample_idx = idx
17 |         else:
18 |             sample_idx = idx - self.cumulative_sizes[dataset_idx - 1]
19 |         return dataset_idx, sample_idx
20 | 
21 |     def get_img_info(self, idx):
22 |         dataset_idx, sample_idx = self.get_idxs(idx)
23 |         return self.datasets[dataset_idx].get_img_info(sample_idx)
24 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/csrc/ml_nms.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #pragma once
 3 | #include "cpu/vision.h"
 4 | 
 5 | #ifdef WITH_CUDA
 6 | #include "cuda/vision.h"
 7 | #endif
 8 | 
 9 | 
10 | at::Tensor ml_nms(const at::Tensor& dets,
11 |                   const at::Tensor& scores,
12 |                   const at::Tensor& labels,
13 |                   const float threshold) {
14 | 
15 |   if (dets.device().is_cuda()) {
16 | #ifdef WITH_CUDA
17 |     // TODO raise error if not compiled with CUDA
18 |     if (dets.numel() == 0)
19 |       return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU));
20 |     auto b = at::cat({dets, scores.unsqueeze(1), labels.unsqueeze(1)}, 1);
21 |     return ml_nms_cuda(b, threshold);
22 | #else
23 |     AT_ERROR("Not compiled with GPU support");
24 | #endif
25 |   }
26 |   AT_ERROR("CPU version not implemented");
27 | }
28 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/utils/imports.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import torch
 3 | 
 4 | if torch._six.PY37:
 5 |     import importlib
 6 |     import importlib.util
 7 |     import sys
 8 | 
 9 | 
10 |     # from https://stackoverflow.com/questions/67631/how-to-import-a-module-given-the-full-path?utm_medium=organic&utm_source=google_rich_qa&utm_campaign=google_rich_qa
11 |     def import_file(module_name, file_path, make_importable=False):
12 |         spec = importlib.util.spec_from_file_location(module_name, file_path)
13 |         module = importlib.util.module_from_spec(spec)
14 |         spec.loader.exec_module(module)
15 |         if make_importable:
16 |             sys.modules[module_name] = module
17 |         return module
18 | else:
19 |     import imp
20 | 
21 |     def import_file(module_name, file_path, make_importable=None):
22 |         module = imp.load_source(module_name, file_path)
23 |         return module
24 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/rpn/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | # from .rpn import build_rpn
 3 | from .rpn import RPNModule
 4 | from .retina import RetinaNetModule
 5 | from .fcos import FCOSModule
 6 | from .atss import ATSSModule
 7 | from .dyhead import DyHeadModule
 8 | from .vldyhead import VLDyHeadModule
 9 | 
10 | _RPN_META_ARCHITECTURES = {"RPN": RPNModule,
11 |                            "RETINA": RetinaNetModule,
12 |                            "FCOS": FCOSModule,
13 |                            "ATSS": ATSSModule,
14 |                            "DYHEAD": DyHeadModule,
15 |                            "VLDYHEAD": VLDyHeadModule
16 |                            }
17 | 
18 | 
19 | def build_rpn(cfg):
20 |     """
21 |     This gives the gist of it. Not super important because it doesn't change as much
22 |     """
23 |     rpn_arch = _RPN_META_ARCHITECTURES[cfg.MODEL.RPN_ARCHITECTURE]
24 |     return rpn_arch(cfg)
25 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/csrc/cpu/vision.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #pragma once
 3 | #include <torch/extension.h>
 4 | 
 5 | 
 6 | at::Tensor ROIAlign_forward_cpu(const at::Tensor& input,
 7 |                                 const at::Tensor& rois,
 8 |                                 const float spatial_scale,
 9 |                                 const int pooled_height,
10 |                                 const int pooled_width,
11 |                                 const int sampling_ratio);
12 | 
13 | 
14 | at::Tensor nms_cpu(const at::Tensor& dets,
15 |                    const at::Tensor& scores,
16 |                    const float threshold);
17 | 
18 | 
19 | std::pair<at::Tensor, at::Tensor> soft_nms_cpu(const at::Tensor& dets,
20 |                                                const at::Tensor& scores,
21 |                                                const float threshold,
22 |                                                const float sigma);


--------------------------------------------------------------------------------
/maskrcnn_benchmark/data/datasets/duplicate_dataset.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | from typing import TypeVar, Optional, Iterator
 3 | 
 4 | import torch
 5 | from torch.utils.data import Sampler, Dataset
 6 | import torch.distributed as dist
 7 | import random
 8 | import numpy as np
 9 | 
10 | 
11 | def create_duplicate_dataset(DatasetBaseClass):
12 |     class DupDataset(DatasetBaseClass):
13 | 
14 |         def __init__(self, copy, **kwargs):
15 |             super(DupDataset, self).__init__(**kwargs)
16 | 
17 |             self.copy = copy
18 |             self.length = super(DupDataset, self).__len__()
19 | 
20 |         def __len__(self):
21 |             return self.copy * self.length
22 | 
23 |         def __getitem__(self, index):
24 |             true_index = index % self.length
25 |             return super(DupDataset, self).__getitem__(true_index)
26 | 
27 |         def get_img_info(self, index):
28 |             true_index = index % self.length
29 |             return super(DupDataset, self).get_img_info(true_index)
30 | 
31 |     return DupDataset
32 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/data/datasets/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | from .coco import COCODataset
 3 | from .voc import PascalVOCDataset
 4 | from .concat_dataset import ConcatDataset
 5 | from .background import Background
 6 | from .tsv import TSVDataset, ODTSVDataset
 7 | 
 8 | from .modulated_coco import ModulatedDataset, CocoDetection, CocoGrounding
 9 | from .flickr import FlickrDataset
10 | from .refexp import RefExpDataset
11 | from .mixed import MixedDataset
12 | from .gqa import GQADataset
13 | 
14 | from .coco_dt import CocoDetectionTSV
15 | from .caption import CaptionTSV
16 | from .lvis import LvisDetection
17 | from .pseudo_data import PseudoData
18 | from .phrasecut import PhrasecutDetection
19 | 
20 | __all__ = ["COCODataset", "TSVDataset", "ODTSVDataset", "ConcatDataset", "PascalVOCDataset", "Background",
21 |            "ModulatedDataset", "MixedDataset", "CocoDetection", "FlickrDataset", "RefExpDataset", "GQADataset",
22 |            "CocoDetectionTSV", "CocoGrounding", "CaptionTSV", "LvisDetection", "PseudoData", "PhrasecutDetection"
23 |            ]
24 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/backbone/mixer.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn
 3 | 
 4 | class MixedOperationRandom(nn.Module):
 5 |     def __init__(self, search_ops):
 6 |         super(MixedOperationRandom, self).__init__()
 7 |         self.ops = nn.ModuleList(search_ops)
 8 |         self.num_ops = len(search_ops)
 9 | 
10 |     def forward(self, x, x_path=None):
11 |         if x_path is None:
12 |             output = sum(op(x) for op in self.ops) / self.num_ops
13 |         else:
14 |             assert isinstance(x_path, (int, float)) and 0 <= x_path < self.num_ops or isinstance(x_path, torch.Tensor)
15 |             if isinstance(x_path, (int, float)):
16 |                 x_path = int(x_path)
17 |                 assert 0 <= x_path < self.num_ops
18 |                 output = self.ops[x_path](x)
19 |             elif isinstance(x_path, torch.Tensor):
20 |                 assert x_path.size(0) == x.size(0), 'batch_size should match length of y_idx'
21 |                 output = torch.cat([self.ops[int(x_path[i].item())](x.narrow(0, i, 1))
22 |                                     for i in range(x.size(0))], dim=0)
23 |         return output


--------------------------------------------------------------------------------
/maskrcnn_benchmark/data/datasets/list_dataset.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | """
 3 | Simple dataset class that wraps a list of path names
 4 | """
 5 | 
 6 | from PIL import Image
 7 | 
 8 | from maskrcnn_benchmark.structures.bounding_box import BoxList
 9 | 
10 | 
11 | class ListDataset(object):
12 |     def __init__(self, image_lists, transforms=None):
13 |         self.image_lists = image_lists
14 |         self.transforms = transforms
15 | 
16 |     def __getitem__(self, item):
17 |         img = Image.open(self.image_lists[item]).convert("RGB")
18 | 
19 |         # dummy target
20 |         w, h = img.size
21 |         target = BoxList([[0, 0, w, h]], img.size, mode="xyxy")
22 | 
23 |         if self.transforms is not None:
24 |             img, target = self.transforms(img, target)
25 | 
26 |         return img, target
27 | 
28 |     def __len__(self):
29 |         return len(self.image_lists)
30 | 
31 |     def get_img_info(self, item):
32 |         """
33 |         Return the image dimensions for the image, without
34 |         loading and pre-processing it
35 |         """
36 |         pass
37 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 |     MIT License
 2 | 
 3 |     Copyright (c) Microsoft Corporation.
 4 | 
 5 |     Permission is hereby granted, free of charge, to any person obtaining a copy
 6 |     of this software and associated documentation files (the "Software"), to deal
 7 |     in the Software without restriction, including without limitation the rights
 8 |     to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 |     copies of the Software, and to permit persons to whom the Software is
10 |     furnished to do so, subject to the following conditions:
11 | 
12 |     The above copyright notice and this permission notice shall be included in all
13 |     copies or substantial portions of the Software.
14 | 
15 |     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 |     IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 |     FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 |     AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 |     LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 |     OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 |     SOFTWARE
22 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/csrc/SigmoidFocalLoss.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "cpu/vision.h"
 4 | 
 5 | #ifdef WITH_CUDA
 6 | #include "cuda/vision.h"
 7 | #endif
 8 | 
 9 | // Interface for Python
10 | at::Tensor SigmoidFocalLoss_forward(
11 | 		const at::Tensor& logits,
12 |                 const at::Tensor& targets,
13 | 		const int num_classes, 
14 | 		const float gamma, 
15 | 		const float alpha) {
16 |   if (logits.device().is_cuda()) {
17 | #ifdef WITH_CUDA
18 |     return SigmoidFocalLoss_forward_cuda(logits, targets, num_classes, gamma, alpha);
19 | #else
20 |     AT_ERROR("Not compiled with GPU support");
21 | #endif
22 |   }
23 |   AT_ERROR("Not implemented on the CPU");
24 | }
25 | 
26 | at::Tensor SigmoidFocalLoss_backward(
27 | 			     const at::Tensor& logits,
28 |                              const at::Tensor& targets,
29 | 			     const at::Tensor& d_losses,
30 | 			     const int num_classes,
31 | 			     const float gamma,
32 | 			     const float alpha) {
33 |   if (logits.device().is_cuda()) {
34 | #ifdef WITH_CUDA
35 |     return SigmoidFocalLoss_backward_cuda(logits, targets, d_losses, num_classes, gamma, alpha);
36 | #else
37 |     AT_ERROR("Not compiled with GPU support");
38 | #endif
39 |   }
40 |   AT_ERROR("Not implemented on the CPU");
41 | }
42 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/data/samplers/iteration_based_batch_sampler.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | from torch.utils.data.sampler import BatchSampler
 3 | 
 4 | 
 5 | class IterationBasedBatchSampler(BatchSampler):
 6 |     """
 7 |     Wraps a BatchSampler, resampling from it until
 8 |     a specified number of iterations have been sampled
 9 |     """
10 | 
11 |     def __init__(self, batch_sampler, num_iterations, start_iter=0):
12 |         self.batch_sampler = batch_sampler
13 |         self.num_iterations = num_iterations
14 |         self.start_iter = start_iter
15 | 
16 |     def __iter__(self):
17 |         iteration = self.start_iter
18 |         while iteration <= self.num_iterations:
19 |             # if the underlying sampler has a set_epoch method, like
20 |             # DistributedSampler, used for making each process see
21 |             # a different split of the dataset, then set it
22 |             if hasattr(self.batch_sampler.sampler, "set_epoch"):
23 |                 self.batch_sampler.sampler.set_epoch(iteration)
24 |             for batch in self.batch_sampler:
25 |                 iteration += 1
26 |                 if iteration > self.num_iterations:
27 |                     break
28 |                 yield batch
29 | 
30 |     def __len__(self):
31 |         return self.num_iterations
32 | 


--------------------------------------------------------------------------------
/SUPPORT.md:
--------------------------------------------------------------------------------
 1 | # TODO: The maintainer of this repo has not yet edited this file
 2 | 
 3 | **REPO OWNER**: Do you want Customer Service & Support (CSS) support for this product/project?
 4 | 
 5 | - **No CSS support:** Fill out this template with information about how to file issues and get help.
 6 | - **Yes CSS support:** Fill out an intake form at [aka.ms/spot](https://aka.ms/spot). CSS will work with/help you to determine next steps. More details also available at [aka.ms/onboardsupport](https://aka.ms/onboardsupport).
 7 | - **Not sure?** Fill out a SPOT intake as though the answer were "Yes". CSS will help you decide.
 8 | 
 9 | *Then remove this first heading from this SUPPORT.MD file before publishing your repo.*
10 | 
11 | # Support
12 | 
13 | ## How to file issues and get help  
14 | 
15 | This project uses GitHub Issues to track bugs and feature requests. Please search the existing 
16 | issues before filing new issues to avoid duplicates.  For new issues, file your bug or 
17 | feature request as a new Issue.
18 | 
19 | For help and questions about using this project, please **REPO MAINTAINER: INSERT INSTRUCTIONS HERE 
20 | FOR HOW TO ENGAGE REPO OWNERS OR COMMUNITY FOR HELP. COULD BE A STACK OVERFLOW TAG OR OTHER
21 | CHANNEL. WHERE WILL YOU HELP PEOPLE?**.
22 | 
23 | ## Microsoft Support Policy  
24 | 
25 | Support for this **PROJECT or PRODUCT** is limited to the resources listed above.
26 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/utils/env.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import os
 3 | 
 4 | from maskrcnn_benchmark.utils.imports import import_file
 5 | 
 6 | 
 7 | def setup_environment():
 8 |     """Perform environment setup work. The default setup is a no-op, but this
 9 |     function allows the user to specify a Python source file that performs
10 |     custom setup work that may be necessary to their computing environment.
11 |     """
12 |     custom_module_path = os.environ.get("TORCH_DETECTRON_ENV_MODULE")
13 |     if custom_module_path:
14 |         setup_custom_environment(custom_module_path)
15 |     else:
16 |         # The default setup is a no-op
17 |         pass
18 | 
19 | 
20 | def setup_custom_environment(custom_module_path):
21 |     """Load custom environment setup from a Python source file and run the setup
22 |     function.
23 |     """
24 |     module = import_file("maskrcnn_benchmark.utils.env.custom_module", custom_module_path)
25 |     assert hasattr(module, "setup_environment") and callable(
26 |         module.setup_environment
27 |     ), (
28 |         "Custom environment module defined in {} does not have the "
29 |         "required callable attribute 'setup_environment'."
30 |     ).format(
31 |         custom_module_path
32 |     )
33 |     module.setup_environment()
34 | 
35 | 
36 | # Force environment setup when this module is imported
37 | setup_environment()
38 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/csrc/nms.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #pragma once
 3 | #include "cpu/vision.h"
 4 | 
 5 | #ifdef WITH_CUDA
 6 | #include "cuda/vision.h"
 7 | #endif
 8 | 
 9 | 
10 | at::Tensor nms(const at::Tensor& dets,
11 |                const at::Tensor& scores,
12 |                const float threshold) {
13 | 
14 |   if (dets.device().is_cuda()) {
15 | #ifdef WITH_CUDA
16 |     // TODO raise error if not compiled with CUDA
17 |     if (dets.numel() == 0)
18 |       return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU));
19 |     auto b = at::cat({dets, scores.unsqueeze(1)}, 1);
20 |     return nms_cuda(b, threshold);
21 | #else
22 |     AT_ERROR("Not compiled with GPU support");
23 | #endif
24 |   }
25 | 
26 |   at::Tensor result = nms_cpu(dets, scores, threshold);
27 |   return result;
28 | }
29 | 
30 | 
31 | std::pair<at::Tensor, at::Tensor> soft_nms(const at::Tensor& dets,
32 |                                            const at::Tensor& scores,
33 |                                            const float threshold,
34 |                                            const float sigma) {
35 | 
36 |   if (dets.device().is_cuda()) {
37 | #ifdef WITH_CUDA
38 |     AT_ERROR("Soft NMS Does Not have GPU support");
39 | #endif
40 |   }
41 | 
42 |   std::pair<at::Tensor, at::Tensor> result = soft_nms_cpu(dets, scores, threshold, sigma);
43 | 
44 |   return result;
45 | }


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/roi_heads/keypoint_head/roi_keypoint_predictors.py:
--------------------------------------------------------------------------------
 1 | from torch import nn
 2 | from torch.nn import functional as F
 3 | 
 4 | from maskrcnn_benchmark import layers
 5 | 
 6 | 
 7 | class KeypointRCNNPredictor(nn.Module):
 8 |     def __init__(self, cfg):
 9 |         super(KeypointRCNNPredictor, self).__init__()
10 |         input_features = cfg.MODEL.ROI_KEYPOINT_HEAD.CONV_LAYERS[-1]
11 |         num_keypoints = cfg.MODEL.ROI_KEYPOINT_HEAD.NUM_CLASSES
12 |         deconv_kernel = 4
13 |         self.kps_score_lowres = layers.ConvTranspose2d(
14 |             input_features,
15 |             num_keypoints,
16 |             deconv_kernel,
17 |             stride=2,
18 |             padding=deconv_kernel // 2 - 1,
19 |         )
20 |         nn.init.kaiming_normal_(
21 |             self.kps_score_lowres.weight, mode="fan_out", nonlinearity="relu"
22 |         )
23 |         nn.init.constant_(self.kps_score_lowres.bias, 0)
24 |         self.up_scale = 2
25 | 
26 |     def forward(self, x):
27 |         x = self.kps_score_lowres(x)
28 |         x = layers.interpolate(
29 |             x, scale_factor=self.up_scale, mode="bilinear", align_corners=False
30 |         )
31 |         return x
32 | 
33 | 
34 | _ROI_KEYPOINT_PREDICTOR = {"KeypointRCNNPredictor": KeypointRCNNPredictor}
35 | 
36 | 
37 | def make_roi_keypoint_predictor(cfg):
38 |     func = _ROI_KEYPOINT_PREDICTOR[cfg.MODEL.ROI_KEYPOINT_HEAD.PREDICTOR]
39 |     return func(cfg)


--------------------------------------------------------------------------------
/maskrcnn_benchmark/layers/evonorm.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | 
 5 | class EvoNorm2d(nn.Module):
 6 |     __constants__ = ['num_features', 'eps', 'nonlinearity']
 7 | 
 8 |     def __init__(self, num_features, eps=1e-5, nonlinearity=True, group=32):
 9 |         super(EvoNorm2d, self).__init__()
10 | 
11 |         self.num_features = num_features
12 |         self.eps = eps
13 |         self.nonlinearity = nonlinearity
14 |         self.group = group
15 | 
16 |         self.weight = nn.Parameter(torch.Tensor(1, num_features, 1, 1))
17 |         self.bias = nn.Parameter(torch.Tensor(1, num_features, 1, 1))
18 |         if self.nonlinearity:
19 |             self.v = nn.Parameter(torch.Tensor(1, num_features, 1, 1))
20 | 
21 |         self.reset_parameters()
22 | 
23 |     def reset_parameters(self):
24 |         nn.init.ones_(self.weight)
25 |         nn.init.zeros_(self.bias)
26 |         if self.nonlinearity:
27 |             nn.init.ones_(self.v)
28 | 
29 |     def group_std(self, x, groups=32):
30 |         N, C, H, W = x.shape
31 |         x = torch.reshape(x, (N, groups, C // groups, H, W))
32 |         std = torch.std(x, (3, 4), keepdim=True)
33 |         return torch.reshape(std + self.eps, (N, C, 1, 1))
34 | 
35 |     def forward(self, x):
36 |         if self.nonlinearity:
37 |             num = x * torch.sigmoid(self.v * x)
38 |             return num / self.group_std(x, self.group) * self.weight + self.bias
39 |         else:
40 |             return x * self.weight + self.bias


--------------------------------------------------------------------------------
/odinw/download.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import os
 3 | 
 4 | argparser = argparse.ArgumentParser()
 5 | argparser.add_argument("--dataset_names", default="all", type=str) # "all" or names joined by comma
 6 | argparser.add_argument("--dataset_path", default="DATASET/odinw", type=str)
 7 | args = argparser.parse_args()
 8 | 
 9 | root = "https://huggingface.co/GLIPModel/GLIP/tree/main/odinw_35"
10 | 
11 | all_datasets = ["AerialMaritimeDrone", "AmericanSignLanguageLetters", "Aquarium", "BCCD", "ChessPieces", "CottontailRabbits", "DroneControl", "EgoHands", "HardHatWorkers", "MaskWearing", "MountainDewCommercial", "NorthAmericaMushrooms", "OxfordPets", "PKLot", "Packages", "PascalVOC", "Raccoon", "ShellfishOpenImages", "ThermalCheetah", "UnoCards", "VehiclesOpenImages", "WildfireSmoke", "boggleBoards", "brackishUnderwater", "dice", "openPoetryVision", "pistols", "plantdoc", "pothole", "selfdrivingCar", "thermalDogsAndPeople", "vector", "websiteScreenshots"]
12 | 
13 | datasets_to_download = []
14 | if args.dataset_names == "all":
15 |     datasets_to_download = all_datasets
16 | else:
17 |     datasets_to_download = args.dataset_names.split(",")
18 | 
19 | for dataset in datasets_to_download:
20 |     if dataset in all_datasets:
21 |         print("Downloading dataset: ", dataset)
22 |         os.system("wget " + root + "/" + dataset + ".zip" + " -O " + args.dataset_path + "/" + dataset + ".zip")
23 |         os.system("unzip " + args.dataset_path + "/" + dataset + ".zip -d " + args.dataset_path)
24 |         os.system("rm " + args.dataset_path + "/" + dataset + ".zip")
25 |     else:
26 |         print("Dataset not found: ", dataset)
27 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/language_backbone/backbone.py:
--------------------------------------------------------------------------------
 1 | from collections import OrderedDict
 2 | import torch
 3 | from torch import nn
 4 | 
 5 | from maskrcnn_benchmark.modeling import registry
 6 | from . import bert_model
 7 | from . import rnn_model
 8 | from . import clip_model
 9 | from . import word_utils
10 | 
11 | 
12 | @registry.LANGUAGE_BACKBONES.register("bert-base-uncased")
13 | def build_bert_backbone(cfg):
14 |     body = bert_model.BertEncoder(cfg)
15 |     model = nn.Sequential(OrderedDict([("body", body)]))
16 |     return model
17 | 
18 | 
19 | @registry.LANGUAGE_BACKBONES.register("roberta-base")
20 | def build_bert_backbone(cfg):
21 |     body = bert_model.BertEncoder(cfg)
22 |     model = nn.Sequential(OrderedDict([("body", body)]))
23 |     return model
24 | 
25 | 
26 | @registry.LANGUAGE_BACKBONES.register("rnn")
27 | def build_rnn_backbone(cfg):
28 |     body = rnn_model.RNNEnoder(cfg)
29 |     model = nn.Sequential(OrderedDict([("body", body)]))
30 |     return model
31 | 
32 | 
33 | @registry.LANGUAGE_BACKBONES.register("clip")
34 | def build_clip_backbone(cfg):
35 |     body = clip_model.CLIPTransformer(cfg)
36 |     model = nn.Sequential(OrderedDict([("body", body)]))
37 |     return model
38 | 
39 | 
40 | def build_backbone(cfg):
41 |     assert cfg.MODEL.LANGUAGE_BACKBONE.MODEL_TYPE in registry.LANGUAGE_BACKBONES, \
42 |         "cfg.MODEL.LANGUAGE_BACKBONE.TYPE: {} is not registered in registry".format(
43 |             cfg.MODEL.LANGUAGE_BACKBONE.MODEL_TYPE
44 |         )
45 |     return registry.LANGUAGE_BACKBONES[cfg.MODEL.LANGUAGE_BACKBONE.MODEL_TYPE](cfg)
46 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/utils/registry.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | 
 3 | 
 4 | def _register_generic(module_dict, module_name, module):
 5 |     assert module_name not in module_dict
 6 |     module_dict[module_name] = module
 7 | 
 8 | 
 9 | class Registry(dict):
10 |     '''
11 |     A helper class for managing registering modules, it extends a dictionary
12 |     and provides a register functions.
13 | 
14 |     Eg. creeting a registry:
15 |         some_registry = Registry({"default": default_module})
16 | 
17 |     There're two ways of registering new modules:
18 |     1): normal way is just calling register function:
19 |         def foo():
20 |             ...
21 |         some_registry.register("foo_module", foo)
22 |     2): used as decorator when declaring the module:
23 |         @some_registry.register("foo_module")
24 |         @some_registry.register("foo_modeul_nickname")
25 |         def foo():
26 |             ...
27 | 
28 |     Access of module is just like using a dictionary, eg:
29 |         f = some_registry["foo_modeul"]
30 |     '''
31 |     def __init__(self, *args, **kwargs):
32 |         super(Registry, self).__init__(*args, **kwargs)
33 | 
34 |     def register(self, module_name, module=None):
35 |         # used as function call
36 |         if module is not None:
37 |             _register_generic(self, module_name, module)
38 |             return
39 | 
40 |         # used as decorator
41 |         def register_fn(fn):
42 |             _register_generic(self, module_name, fn)
43 |             return fn
44 | 
45 |         return register_fn
46 | 


--------------------------------------------------------------------------------
/configs/odinw_35/_all.json:
--------------------------------------------------------------------------------
1 | ["configs/odinw_35/AerialMaritimeDrone_large.yaml","configs/odinw_35/AerialMaritimeDrone_tiled.yaml","configs/odinw_35/AmericanSignLanguageLetters_American_Sign_Language_Letters.v1-v1.coco.yaml","configs/odinw_35/Aquarium_Aquarium_Combined.v2-raw-1024.coco.yaml","configs/odinw_35/BCCD_BCCD.v3-raw.coco.yaml","configs/odinw_35/ChessPieces_Chess_Pieces.v23-raw.coco.yaml","configs/odinw_35/CottontailRabbits.yaml","configs/odinw_35/DroneControl_Drone_Control.v3-raw.coco.yaml","configs/odinw_35/EgoHands_generic.yaml","configs/odinw_35/EgoHands_specific.yaml","configs/odinw_35/HardHatWorkers_raw.yaml","configs/odinw_35/MaskWearing_raw.yaml","configs/odinw_35/MountainDewCommercial.yaml","configs/odinw_35/NorthAmericaMushrooms_North_American_Mushrooms.v1-416x416.coco.yaml","configs/odinw_35/OxfordPets_by-breed.yaml","configs/odinw_35/OxfordPets_by-species.yaml","configs/odinw_35/PKLot_640.yaml","configs/odinw_35/Packages_Raw.yaml","configs/odinw_35/PascalVOC.yaml","configs/odinw_35/Raccoon_Raccoon.v2-raw.coco.yaml","configs/odinw_35/ShellfishOpenImages_raw.yaml","configs/odinw_35/ThermalCheetah.yaml","configs/odinw_35/UnoCards_raw.yaml","configs/odinw_35/VehiclesOpenImages_416x416.yaml","configs/odinw_35/WildfireSmoke.yaml","configs/odinw_35/boggleBoards_416x416AutoOrient_export_.yaml","configs/odinw_35/brackishUnderwater_960x540.yaml","configs/odinw_35/dice_mediumColor_export.yaml","configs/odinw_35/openPoetryVision_512x512.yaml","configs/odinw_35/pistols_export.yaml","configs/odinw_35/plantdoc_416x416.yaml","configs/odinw_35/pothole.yaml","configs/odinw_35/selfdrivingCar_fixedLarge_export_.yaml","configs/odinw_35/thermalDogsAndPeople.yaml","configs/odinw_35/websiteScreenshots.yaml"]


--------------------------------------------------------------------------------
/maskrcnn_benchmark/layers/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import torch
 3 | 
 4 | from .batch_norm import FrozenBatchNorm2d, NaiveSyncBatchNorm2d
 5 | from .misc import Conv2d, _NewEmptyTensorOp
 6 | from .misc import ConvTranspose2d
 7 | from .misc import DFConv2d
 8 | from .misc import interpolate
 9 | from .misc import Scale
10 | from .nms import nms
11 | from .nms import ml_nms
12 | from .nms import soft_nms
13 | from .roi_align import ROIAlign
14 | from .roi_align import roi_align
15 | from .roi_align import ROIAlignV2
16 | from .roi_pool import ROIPool
17 | from .roi_pool import roi_pool
18 | from .smooth_l1_loss import smooth_l1_loss
19 | from .sigmoid_focal_loss import SigmoidFocalLoss, TokenSigmoidFocalLoss
20 | from .iou_loss import IOULoss, IOUWHLoss
21 | from .deform_conv import DeformConv, ModulatedDeformConv
22 | from .dropblock import DropBlock2D, DropBlock3D
23 | from .evonorm import EvoNorm2d
24 | from .dyrelu import DYReLU, swish
25 | from .se import SELayer, SEBlock
26 | from .dyhead import DyHead
27 | from .set_loss import HungarianMatcher, SetCriterion
28 | 
29 | __all__ = ["nms", "ml_nms", "soft_nms", "roi_align", "ROIAlign", "roi_pool", "ROIPool",
30 |            "smooth_l1_loss", "Conv2d", "ConvTranspose2d", "interpolate", "swish",
31 |            "FrozenBatchNorm2d", "NaiveSyncBatchNorm2d", "SigmoidFocalLoss", "TokenSigmoidFocalLoss", "IOULoss",
32 |            "IOUWHLoss", "Scale", "DeformConv", "ModulatedDeformConv", "DyHead",
33 |            "DropBlock2D", "DropBlock3D", "EvoNorm2d", "DYReLU", "SELayer", "SEBlock",
34 |            "HungarianMatcher", "SetCriterion", "ROIAlignV2", "_NewEmptyTensorOp"]
35 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/data/transforms/build.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | from . import transforms as T
 3 | 
 4 | 
 5 | def build_transforms(cfg, is_train=True):
 6 |     if is_train:
 7 |         if len(cfg.AUGMENT.MULT_MIN_SIZE_TRAIN)>0:
 8 |             min_size = cfg.AUGMENT.MULT_MIN_SIZE_TRAIN
 9 |         else:
10 |             min_size = cfg.INPUT.MIN_SIZE_TRAIN
11 |         max_size = cfg.INPUT.MAX_SIZE_TRAIN
12 |         flip_horizontal_prob = cfg.AUGMENT.FLIP_PROB_TRAIN
13 |         flip_vertical_prob = cfg.AUGMENT.VERTICAL_FLIP_PROB_TRAIN
14 |         brightness = cfg.AUGMENT.BRIGHTNESS
15 |         contrast = cfg.AUGMENT.CONTRAST
16 |         saturation = cfg.AUGMENT.SATURATION
17 |         hue = cfg.AUGMENT.HUE
18 | 
19 |         crop_prob = cfg.AUGMENT.CROP_PROB
20 |         min_ious = cfg.AUGMENT.CROP_MIN_IOUS
21 |         min_crop_size = cfg.AUGMENT.CROP_MIN_SIZE
22 | 
23 |     else:
24 |         min_size = cfg.INPUT.MIN_SIZE_TEST
25 |         max_size = cfg.INPUT.MAX_SIZE_TEST
26 |         flip_horizontal_prob = 0.0
27 | 
28 |     fix_res = cfg.INPUT.FIX_RES
29 |     if cfg.INPUT.FORMAT is not '':
30 |         input_format = cfg.INPUT.FORMAT
31 |     elif cfg.INPUT.TO_BGR255:
32 |         input_format = 'bgr255'
33 |     normalize_transform = T.Normalize(
34 |         mean=cfg.INPUT.PIXEL_MEAN, std=cfg.INPUT.PIXEL_STD, format=input_format
35 |     )
36 |  
37 |     transform = T.Compose(
38 |         [
39 |             T.Resize(min_size, max_size, restrict=fix_res),
40 |             T.RandomHorizontalFlip(flip_horizontal_prob),
41 |             T.ToTensor(),
42 |             normalize_transform,
43 |         ]
44 |     )
45 |     return transform
46 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/csrc/vision.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #include "nms.h"
 3 | #include "ml_nms.h"
 4 | #include "ROIAlign.h"
 5 | #include "ROIPool.h"
 6 | #include "SigmoidFocalLoss.h"
 7 | #include "deform_conv.h"
 8 | #include "deform_pool.h"
 9 | 
10 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
11 |   m.def("nms", &nms, "non-maximum suppression");
12 |   m.def("ml_nms", &ml_nms, "multi-label non-maximum suppression");
13 |   m.def("soft_nms", &soft_nms, "soft non-maximum suppression");
14 |   m.def("roi_align_forward", &ROIAlign_forward, "ROIAlign_forward");
15 |   m.def("roi_align_backward", &ROIAlign_backward, "ROIAlign_backward");
16 |   m.def("roi_pool_forward", &ROIPool_forward, "ROIPool_forward");
17 |   m.def("roi_pool_backward", &ROIPool_backward, "ROIPool_backward");
18 |   m.def("sigmoid_focalloss_forward", &SigmoidFocalLoss_forward, "SigmoidFocalLoss_forward");
19 |   m.def("sigmoid_focalloss_backward", &SigmoidFocalLoss_backward, "SigmoidFocalLoss_backward");
20 |   m.def("deform_conv_forward", &deform_conv_forward, "deform_conv_forward");
21 |   m.def("deform_conv_backward_input", &deform_conv_backward_input, "deform_conv_backward_input");
22 |   m.def("deform_conv_backward_parameters", &deform_conv_backward_parameters, "deform_conv_backward_parameters");
23 |   m.def("modulated_deform_conv_forward", &modulated_deform_conv_forward, "modulated_deform_conv_forward");
24 |   m.def("modulated_deform_conv_backward", &modulated_deform_conv_backward, "modulated_deform_conv_backward");
25 |   m.def("deform_psroi_pooling_forward", &deform_psroi_pooling_forward, "deform_psroi_pooling_forward");
26 |   m.def("deform_psroi_pooling_backward", &deform_psroi_pooling_backward, "deform_psroi_pooling_backward");
27 | }
28 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/data/datasets/background.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import os.path
 3 | import json
 4 | from PIL import Image
 5 | 
 6 | import torch
 7 | import torchvision
 8 | import torch.utils.data as data
 9 | from maskrcnn_benchmark.structures.bounding_box import BoxList
10 | 
11 | class Background(data.Dataset):
12 |     """ Background
13 | 
14 |     Args:
15 |         root (string): Root directory where images are downloaded to.
16 |         annFile (string): Path to json annotation file.
17 |         transform (callable, optional): A function/transform that  takes in an PIL image
18 |             and returns a transformed version. E.g, ``transforms.ToTensor``
19 |     """
20 | 
21 |     def __init__(self, ann_file, root, remove_images_without_annotations=None, transforms=None):
22 |         self.root = root
23 | 
24 |         with open(ann_file, 'r') as f:
25 |             self.ids = json.load(f)['images']
26 |         self.transform = transforms
27 | 
28 |     def __getitem__(self, index):
29 |         """
30 |         Args:
31 |             index (int): Index
32 | 
33 |         Returns:
34 |             tuple: Tuple (image, target). target is the object returned by ``coco.loadAnns``.
35 |         """
36 |         im_info = self.ids[index]
37 |         path = im_info['file_name']
38 |         fp = os.path.join(self.root, path)
39 | 
40 |         img = Image.open(fp).convert('RGB')
41 |         if self.transform is not None:
42 |             img, _ = self.transform(img, None)
43 |         null_target = BoxList(torch.zeros((0,4)), (img.shape[-1], img.shape[-2]))
44 |         null_target.add_field('labels', torch.zeros(0))
45 | 
46 |         return img, null_target, index
47 | 
48 |     def __len__(self):
49 |         return len(self.ids)
50 | 
51 |     def get_img_info(self, index):
52 |         im_info = self.ids[index]
53 |         return im_info


--------------------------------------------------------------------------------
/maskrcnn_benchmark/utils/ema.py:
--------------------------------------------------------------------------------
 1 | from copy import deepcopy
 2 | from collections import OrderedDict
 3 | import torch
 4 | 
 5 | 
 6 | class ModelEma:
 7 |     def __init__(self, model, decay=0.9999, device=''):
 8 |         self.ema = deepcopy(model)
 9 |         self.ema.eval()
10 |         self.decay = decay
11 |         self.device = device
12 |         if device:
13 |             self.ema.to(device=device)
14 |         self.ema_is_dp = hasattr(self.ema, 'module')
15 |         for p in self.ema.parameters():
16 |             p.requires_grad_(False)
17 | 
18 |     def load_checkpoint(self, checkpoint):
19 |         if isinstance(checkpoint, str):
20 |             checkpoint = torch.load(checkpoint)
21 | 
22 |         assert isinstance(checkpoint, dict)
23 |         if 'model_ema' in checkpoint:
24 |             new_state_dict = OrderedDict()
25 |             for k, v in checkpoint['model_ema'].items():
26 |                 if self.ema_is_dp:
27 |                     name = k if k.startswith('module') else 'module.' + k
28 |                 else:
29 |                     name = k.replace('module.', '') if k.startswith('module') else k
30 |                 new_state_dict[name] = v
31 |             self.ema.load_state_dict(new_state_dict)
32 | 
33 |     def state_dict(self):
34 |         return self.ema.state_dict()
35 | 
36 |     def update(self, model):
37 |         pre_module = hasattr(model, 'module') and not self.ema_is_dp
38 |         with torch.no_grad():
39 |             curr_msd = model.state_dict()
40 |             for k, ema_v in self.ema.state_dict().items():
41 |                 k = 'module.' + k if pre_module else k
42 |                 model_v = curr_msd[k].detach()
43 |                 if self.device:
44 |                     model_v = model_v.to(device=self.device)
45 |                 ema_v.copy_(ema_v * self.decay + (1. - self.decay) * model_v)
46 | 
47 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/csrc/ROIPool.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #pragma once
 3 | 
 4 | #include "cpu/vision.h"
 5 | 
 6 | #ifdef WITH_CUDA
 7 | #include "cuda/vision.h"
 8 | #endif
 9 | 
10 | 
11 | std::tuple<at::Tensor, at::Tensor> ROIPool_forward(const at::Tensor& input,
12 |                                 const at::Tensor& rois,
13 |                                 const float spatial_scale,
14 |                                 const int pooled_height,
15 |                                 const int pooled_width) {
16 |   if (input.device().is_cuda()) {
17 | #ifdef WITH_CUDA
18 |     return ROIPool_forward_cuda(input, rois, spatial_scale, pooled_height, pooled_width);
19 | #else
20 |     AT_ERROR("Not compiled with GPU support");
21 | #endif
22 |   }
23 |   AT_ERROR("Not implemented on the CPU");
24 | }
25 | 
26 | at::Tensor ROIPool_backward(const at::Tensor& grad,
27 |                                  const at::Tensor& input,
28 |                                  const at::Tensor& rois,
29 |                                  const at::Tensor& argmax,
30 |                                  const float spatial_scale,
31 |                                  const int pooled_height,
32 |                                  const int pooled_width,
33 |                                  const int batch_size,
34 |                                  const int channels,
35 |                                  const int height,
36 |                                  const int width) {
37 |   if (grad.device().is_cuda()) {
38 | #ifdef WITH_CUDA
39 |     return ROIPool_backward_cuda(grad, input, rois, argmax, spatial_scale, pooled_height, pooled_width, batch_size, channels, height, width);
40 | #else
41 |     AT_ERROR("Not compiled with GPU support");
42 | #endif
43 |   }
44 |   AT_ERROR("Not implemented on the CPU");
45 | }
46 | 
47 | 
48 | 
49 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/csrc/ROIAlign.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #pragma once
 3 | 
 4 | #include "cpu/vision.h"
 5 | 
 6 | #ifdef WITH_CUDA
 7 | #include "cuda/vision.h"
 8 | #endif
 9 | 
10 | // Interface for Python
11 | at::Tensor ROIAlign_forward(const at::Tensor& input,
12 |                             const at::Tensor& rois,
13 |                             const float spatial_scale,
14 |                             const int pooled_height,
15 |                             const int pooled_width,
16 |                             const int sampling_ratio) {
17 |   if (input.device().is_cuda()) {
18 | #ifdef WITH_CUDA
19 |     return ROIAlign_forward_cuda(input, rois, spatial_scale, pooled_height, pooled_width, sampling_ratio);
20 | #else
21 |     AT_ERROR("Not compiled with GPU support");
22 | #endif
23 |   }
24 |   return ROIAlign_forward_cpu(input, rois, spatial_scale, pooled_height, pooled_width, sampling_ratio);
25 | }
26 | 
27 | at::Tensor ROIAlign_backward(const at::Tensor& grad,
28 |                              const at::Tensor& rois,
29 |                              const float spatial_scale,
30 |                              const int pooled_height,
31 |                              const int pooled_width,
32 |                              const int batch_size,
33 |                              const int channels,
34 |                              const int height,
35 |                              const int width,
36 |                              const int sampling_ratio) {
37 |   if (grad.device().is_cuda()) {
38 | #ifdef WITH_CUDA
39 |     return ROIAlign_backward_cuda(grad, rois, spatial_scale, pooled_height, pooled_width, batch_size, channels, height, width, sampling_ratio);
40 | #else
41 |     AT_ERROR("Not compiled with GPU support");
42 | #endif
43 |   }
44 |   AT_ERROR("Not implemented on the CPU");
45 | }
46 | 
47 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/utils/pretrain_model_loading.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | import torch.nn as nn
 4 | 
 5 | from collections import OrderedDict
 6 | 
 7 | def _remove_bn_statics(state_dict):
 8 |     layer_keys = sorted(state_dict.keys())
 9 |     remove_list = []
10 |     for key in layer_keys:
11 |         if 'running_mean' in key or 'running_var' in key or 'num_batches_tracked' in key:
12 |             remove_list.append(key)
13 |     for key in remove_list:
14 |         del state_dict[key]
15 |     return state_dict
16 | 
17 | def _rename_conv_weights_for_deformable_conv_layers(state_dict, cfg):
18 |     import re
19 |     layer_keys = sorted(state_dict.keys())
20 |     for ix, stage_with_dcn in enumerate(cfg.MODEL.RESNETS.STAGE_WITH_DCN, 1):
21 |         if not stage_with_dcn:
22 |             continue
23 |         for old_key in layer_keys:
24 |             pattern = ".*layer{}.*conv2.*".format(ix)
25 |             r = re.match(pattern, old_key)
26 |             if r is None:
27 |                 continue
28 |             for param in ["weight", "bias"]:
29 |                 if old_key.find(param) is -1:
30 |                     continue
31 |                 if 'unit01' in old_key:
32 |                     continue
33 |                 new_key = old_key.replace(
34 |                     "conv2.{}".format(param), "conv2.conv.{}".format(param)
35 |                 )
36 |                 print("pattern: {}, old_key: {}, new_key: {}".format(
37 |                     pattern, old_key, new_key
38 |                 ))
39 |                 state_dict[new_key] = state_dict[old_key]
40 |                 del state_dict[old_key]
41 |     return state_dict
42 | 
43 | 
44 | def load_pretrain_format(cfg, f):
45 |     model = torch.load(f)
46 |     model = _remove_bn_statics(model)
47 |     model = _rename_conv_weights_for_deformable_conv_layers(model, cfg)
48 | 
49 |     return dict(model=model)
50 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/layers/se.py:
--------------------------------------------------------------------------------
 1 | from torch import nn
 2 | 
 3 | 
 4 | class SELayer(nn.Module):
 5 |     def __init__(self, channel, reduction=16):
 6 |         super(SELayer, self).__init__()
 7 |         self.avg_pool = nn.AdaptiveAvgPool2d(1)
 8 |         self.fc = nn.Sequential(
 9 |             nn.Linear(channel, channel // reduction, bias=False),
10 |             nn.ReLU(inplace=True),
11 |             nn.Linear(channel // reduction, channel, bias=False),
12 |             nn.Sigmoid()
13 |         )
14 | 
15 |     def forward(self, x):
16 |         b, c, _, _ = x.size()
17 |         y = self.avg_pool(x).view(b, c)
18 |         y = self.fc(y).view(b, c, 1, 1)
19 |         return x * y.expand_as(x)
20 | 
21 | 
22 | class SEBlock(nn.Module):
23 |     def __init__(self, channels, reduction=16,
24 |                  use_conv=True, mid_activation=nn.ReLU(inplace=True), out_activation=nn.Sigmoid()):
25 |         super(SEBlock, self).__init__()
26 |         self.use_conv = use_conv
27 |         mid_channels = channels // reduction
28 | 
29 |         self.pool = nn.AdaptiveAvgPool2d(output_size=1)
30 |         if use_conv:
31 |             self.conv1 = nn.Conv2d(channels, mid_channels, kernel_size=1, bias=True)
32 |         else:
33 |             self.fc1 = nn.Linear(channels, mid_channels)
34 |         self.activ = mid_activation
35 |         if use_conv:
36 |             self.conv2 = nn.Conv2d(mid_channels, channels, kernel_size=1, bias=True)
37 |         else:
38 |             self.fc2 = nn.Linear(mid_channels, channels)
39 |         self.sigmoid = out_activation
40 | 
41 |     def forward(self, x):
42 |         w = self.pool(x)
43 |         if not self.use_conv:
44 |             w = w.view(x.size(0), -1)
45 |         w = self.conv1(w) if self.use_conv else self.fc1(w)
46 |         w = self.activ(w)
47 |         w = self.conv2(w) if self.use_conv else self.fc2(w)
48 |         w = self.sigmoid(w)
49 |         if not self.use_conv:
50 |             w = w.unsqueeze(2).unsqueeze(3)
51 |         x = x * w
52 |         return x


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/rpn/transformer.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn.functional as F
 3 | from torch import nn, Tensor
 4 | 
 5 | import copy
 6 | from typing import Optional, List
 7 | 
 8 | 
 9 | def _get_clones(module, N):
10 |     return nn.ModuleList([copy.deepcopy(module) for i in range(N)])
11 | 
12 | 
13 | def _get_activation_fn(activation):
14 |     """Return an activation function given a string"""
15 |     if activation == "relu":
16 |         return F.relu
17 |     if activation == "gelu":
18 |         return F.gelu
19 |     if activation == "glu":
20 |         return F.glu
21 |     raise RuntimeError(F"activation should be relu/gelu, not {activation}.")
22 | 
23 | 
24 | class TransformerEncoderLayer(nn.Module):
25 |     def __init__(self, d_model, nhead, dim_feedforward=2048, dropout=0.1,
26 |                  activation="relu", normalize_before=False):
27 |         super(TransformerEncoderLayer, self).__init__()
28 |         self.self_attn = nn.MultiheadAttention(d_model, nhead, dropout=dropout)
29 |         # Implementation of Feedforward model
30 |         self.linear1 = nn.Linear(d_model, dim_feedforward)
31 |         self.dropout = nn.Dropout(dropout)
32 |         self.linear2 = nn.Linear(dim_feedforward, d_model)
33 | 
34 |         self.norm1 = nn.LayerNorm(d_model)
35 |         self.norm2 = nn.LayerNorm(d_model)
36 |         self.dropout1 = nn.Dropout(dropout)
37 |         self.dropout2 = nn.Dropout(dropout)
38 | 
39 |         self.activation = _get_activation_fn(activation)
40 |         self.normalize_before = normalize_before
41 | 
42 |     def forward(self, src,
43 |                 src_mask: Optional[Tensor] = None,
44 |                 src_key_padding_mask: Optional[Tensor] = None):
45 |         src2 = self.self_attn(src, src, src, attn_mask=src_mask,
46 |                               key_padding_mask=src_key_padding_mask)[0]
47 |         src = src + self.dropout1(src2)
48 |         src = self.norm1(src)
49 |         src2 = self.linear2(self.dropout(self.activation(self.linear1(src))))
50 |         src = src + self.dropout2(src2)
51 |         src = self.norm2(src)
52 |         return src
53 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/csrc/deform_pool.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #pragma once
 3 | #include "cpu/vision.h"
 4 | 
 5 | #ifdef WITH_CUDA
 6 | #include "cuda/vision.h"
 7 | #endif
 8 | 
 9 | 
10 | // Interface for Python
11 | void deform_psroi_pooling_forward(
12 |     at::Tensor input, 
13 |     at::Tensor bbox, 
14 |     at::Tensor trans, 
15 |     at::Tensor out,
16 |     at::Tensor top_count, 
17 |     const int no_trans, 
18 |     const float spatial_scale,
19 |     const int output_dim, 
20 |     const int group_size, 
21 |     const int pooled_size,
22 |     const int part_size, 
23 |     const int sample_per_part, 
24 |     const float trans_std)
25 | {
26 |   if (input.device().is_cuda()) {
27 | #ifdef WITH_CUDA
28 |     return deform_psroi_pooling_cuda_forward(
29 |         input, bbox, trans, out, top_count, 
30 |         no_trans, spatial_scale, output_dim, group_size,
31 |         pooled_size, part_size, sample_per_part, trans_std
32 |     );
33 | #else
34 |     AT_ERROR("Not compiled with GPU support");
35 | #endif
36 |   }
37 |   AT_ERROR("Not implemented on the CPU");
38 | }
39 | 
40 | 
41 | void deform_psroi_pooling_backward(
42 |     at::Tensor out_grad, 
43 |     at::Tensor input, 
44 |     at::Tensor bbox, 
45 |     at::Tensor trans,
46 |     at::Tensor top_count, 
47 |     at::Tensor input_grad, 
48 |     at::Tensor trans_grad,
49 |     const int no_trans, 
50 |     const float spatial_scale, 
51 |     const int output_dim,
52 |     const int group_size, 
53 |     const int pooled_size, 
54 |     const int part_size,
55 |     const int sample_per_part, 
56 |     const float trans_std) 
57 | {
58 |   if (input.device().is_cuda()) {
59 | #ifdef WITH_CUDA
60 |     return deform_psroi_pooling_cuda_backward(
61 |         out_grad, input, bbox, trans, top_count, input_grad, trans_grad,
62 |         no_trans, spatial_scale, output_dim, group_size, pooled_size, 
63 |         part_size, sample_per_part, trans_std
64 |     );
65 | #else
66 |     AT_ERROR("Not compiled with GPU support");
67 | #endif
68 |   }
69 |   AT_ERROR("Not implemented on the CPU");
70 | }
71 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/layers/roi_pool.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import torch
 3 | from torch import nn
 4 | from torch.autograd import Function
 5 | from torch.autograd.function import once_differentiable
 6 | from torch.nn.modules.utils import _pair
 7 | 
 8 | from maskrcnn_benchmark import _C
 9 | 
10 | 
11 | class _ROIPool(Function):
12 |     @staticmethod
13 |     def forward(ctx, input, roi, output_size, spatial_scale):
14 |         ctx.output_size = _pair(output_size)
15 |         ctx.spatial_scale = spatial_scale
16 |         ctx.input_shape = input.size()
17 |         output, argmax = _C.roi_pool_forward(
18 |             input, roi, spatial_scale, output_size[0], output_size[1]
19 |         )
20 |         ctx.save_for_backward(input, roi, argmax)
21 |         return output
22 | 
23 |     @staticmethod
24 |     @once_differentiable
25 |     def backward(ctx, grad_output):
26 |         input, rois, argmax = ctx.saved_tensors
27 |         output_size = ctx.output_size
28 |         spatial_scale = ctx.spatial_scale
29 |         bs, ch, h, w = ctx.input_shape
30 |         grad_input = _C.roi_pool_backward(
31 |             grad_output,
32 |             input,
33 |             rois,
34 |             argmax,
35 |             spatial_scale,
36 |             output_size[0],
37 |             output_size[1],
38 |             bs,
39 |             ch,
40 |             h,
41 |             w,
42 |         )
43 |         return grad_input, None, None, None
44 | 
45 | 
46 | roi_pool = _ROIPool.apply
47 | 
48 | 
49 | class ROIPool(nn.Module):
50 |     def __init__(self, output_size, spatial_scale):
51 |         super(ROIPool, self).__init__()
52 |         self.output_size = output_size
53 |         self.spatial_scale = spatial_scale
54 | 
55 |     def forward(self, input, rois):
56 |         return roi_pool(input, rois, self.output_size, self.spatial_scale)
57 | 
58 |     def __repr__(self):
59 |         tmpstr = self.__class__.__name__ + "("
60 |         tmpstr += "output_size=" + str(self.output_size)
61 |         tmpstr += ", spatial_scale=" + str(self.spatial_scale)
62 |         tmpstr += ")"
63 |         return tmpstr
64 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/roi_heads/keypoint_head/keypoint_head.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from .roi_keypoint_feature_extractors import make_roi_keypoint_feature_extractor
 4 | from .roi_keypoint_predictors import make_roi_keypoint_predictor
 5 | from .inference import make_roi_keypoint_post_processor
 6 | from .loss import make_roi_keypoint_loss_evaluator
 7 | 
 8 | 
 9 | class ROIKeypointHead(torch.nn.Module):
10 |     def __init__(self, cfg):
11 |         super(ROIKeypointHead, self).__init__()
12 |         self.cfg = cfg.clone()
13 |         self.feature_extractor = make_roi_keypoint_feature_extractor(cfg)
14 |         self.predictor = make_roi_keypoint_predictor(cfg)
15 |         self.post_processor = make_roi_keypoint_post_processor(cfg)
16 |         self.loss_evaluator = make_roi_keypoint_loss_evaluator(cfg)
17 | 
18 |     def forward(self, features, proposals, targets=None):
19 |         """
20 |         Arguments:
21 |             features (list[Tensor]): feature-maps from possibly several levels
22 |             proposals (list[BoxList]): proposal boxes
23 |             targets (list[BoxList], optional): the ground-truth targets.
24 | 
25 |         Returns:
26 |             x (Tensor): the result of the feature extractor
27 |             proposals (list[BoxList]): during training, the original proposals
28 |                 are returned. During testing, the predicted boxlists are returned
29 |                 with the `mask` field set
30 |             losses (dict[Tensor]): During training, returns the losses for the
31 |                 head. During testing, returns an empty dict.
32 |         """
33 |         if self.training:
34 |             with torch.no_grad():
35 |                 proposals = self.loss_evaluator.subsample(proposals, targets)
36 | 
37 |         x = self.feature_extractor(features, proposals)
38 |         kp_logits = self.predictor(x)
39 | 
40 |         if not self.training:
41 |             result = self.post_processor(kp_logits, proposals)
42 |             return x, result, {}
43 | 
44 |         loss_kp = self.loss_evaluator(proposals, kp_logits)
45 | 
46 |         return x, proposals, dict(loss_kp=loss_kp)
47 | 
48 | 
49 | def build_roi_keypoint_head(cfg):
50 |     return ROIKeypointHead(cfg)


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #!/usr/bin/env python
 3 | 
 4 | import glob
 5 | import os
 6 | 
 7 | import torch
 8 | from setuptools import find_packages
 9 | from setuptools import setup
10 | from torch.utils.cpp_extension import CUDA_HOME
11 | from torch.utils.cpp_extension import CppExtension
12 | from torch.utils.cpp_extension import CUDAExtension
13 | 
14 | requirements = ["torch", "torchvision"]
15 | 
16 | 
17 | def get_extensions():
18 |     this_dir = os.path.dirname(os.path.abspath(__file__))
19 |     extensions_dir = os.path.join(this_dir, "maskrcnn_benchmark", "csrc")
20 | 
21 |     main_file = glob.glob(os.path.join(extensions_dir, "*.cpp"))
22 |     source_cpu = glob.glob(os.path.join(extensions_dir, "cpu", "*.cpp"))
23 |     source_cuda = glob.glob(os.path.join(extensions_dir, "cuda", "*.cu"))
24 | 
25 |     sources = main_file + source_cpu
26 |     extension = CppExtension
27 | 
28 |     extra_compile_args = {"cxx": []}
29 |     define_macros = []
30 | 
31 |     if torch.cuda.is_available() and CUDA_HOME is not None:
32 |         extension = CUDAExtension
33 |         sources += source_cuda
34 |         define_macros += [("WITH_CUDA", None)]
35 |         extra_compile_args["nvcc"] = [
36 |             "-DCUDA_HAS_FP16=1",
37 |             "-D__CUDA_NO_HALF_OPERATORS__",
38 |             "-D__CUDA_NO_HALF_CONVERSIONS__",
39 |             "-D__CUDA_NO_HALF2_OPERATORS__",
40 |         ]
41 | 
42 |     sources = [os.path.join(extensions_dir, s) for s in sources]
43 | 
44 |     include_dirs = [extensions_dir]
45 | 
46 |     ext_modules = [
47 |         extension(
48 |             "maskrcnn_benchmark._C",
49 |             sources,
50 |             include_dirs=include_dirs,
51 |             define_macros=define_macros,
52 |             extra_compile_args=extra_compile_args,
53 |         )
54 |     ]
55 | 
56 |     return ext_modules
57 | 
58 | 
59 | setup(
60 |     name="maskrcnn_benchmark",
61 |     description="object detection in pytorch",
62 |     packages=find_packages(exclude=("configs", "tests",)),
63 |     # install_requires=requirements,
64 |     ext_modules=get_extensions(),
65 |     cmdclass={"build_ext": torch.utils.cpp_extension.BuildExtension.with_options(use_ninja=False)},
66 | )
67 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/data/datasets/imagenet.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import os.path
 3 | import json
 4 | from PIL import Image
 5 | 
 6 | import torch.utils.data as data
 7 | 
 8 | def pil_loader(path):
 9 |     # open path as file to avoid ResourceWarning (https://github.com/python-pillow/Pillow/issues/835)
10 |     with open(path, 'rb') as f:
11 |         img = Image.open(f)
12 |         return img.convert('RGB')
13 | 
14 | class ImageNet(data.Dataset):
15 |     """ ImageNet
16 | 
17 |     Args:
18 |         root (string): Root directory where images are downloaded to.
19 |         annFile (string): Path to json annotation file.
20 |         transform (callable, optional): A function/transform that  takes in an PIL image
21 |             and returns a transformed version. E.g, ``transforms.ToTensor``
22 |     """
23 | 
24 |     def __init__(self, ann_file, root, remove_images_without_annotations=None, transforms=None):
25 | 
26 | 
27 |         self.root = root
28 |         self.transform = transforms
29 | 
30 |         meta_file = os.path.join(root, ann_file)
31 |         assert os.path.exists(meta_file), 'meta file %s under root %s not found' % (os.path.basename(meta_file), root)
32 | 
33 |         with open(meta_file, 'r') as f:
34 |             meta = json.load(f)
35 | 
36 |         self.classes = meta['classes']
37 |         self.class_to_idx = meta['class_to_idx']
38 |         self.samples = meta['samples']
39 |         self.num_sample = len(self.samples)
40 |         self.allsamples = self.samples
41 | 
42 |     def select_class(self, cls):
43 |         new_samples = [sample for sample in self.allsamples if sample[-1] in cls]
44 |         self.samples = new_samples
45 |         self.num_sample = len(self.samples)
46 | 
47 |     def __getitem__(self, index):
48 |         """
49 |         Args:
50 |             index (int): Index
51 | 
52 |         Returns:
53 |             tuple: (sample, target) where target is class_index of the target class.
54 |         """
55 |         img_path, target = self.samples[index]
56 |         sample = pil_loader(self.root + '/' + img_path)
57 |         if self.transform is not None:
58 |             sample = self.transform(sample)
59 | 
60 |         return sample, target, index
61 | 
62 |     def __len__(self):
63 |         return len(self.samples)


--------------------------------------------------------------------------------
/maskrcnn_benchmark/utils/shallow_contrastive_loss_helper.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import maskrcnn_benchmark.utils.dist as dist
 3 | 
 4 | 
 5 | def normalized_positive_map(positive_map):
 6 |     positive_map = positive_map.float()
 7 |     positive_map_num_pos = positive_map.sum(2)
 8 |     positive_map_num_pos[positive_map_num_pos == 0] = 1e-6
 9 |     positive_map = positive_map / positive_map_num_pos.unsqueeze(-1)
10 |     return positive_map
11 | 
12 | 
13 | def pad_tensor_given_dim_length(tensor, dim, length, padding_value=0, batch_first=True):
14 |     new_size = list(tensor.size()[:dim]) + [length] + list(tensor.size()[dim + 1:])
15 |     out_tensor = tensor.data.new(*new_size).fill_(padding_value)
16 |     if batch_first:
17 |         out_tensor[:, :tensor.size(1), ...] = tensor
18 |     else:
19 |         out_tensor[:tensor.size(0), ...] = tensor
20 |     return out_tensor
21 | 
22 | 
23 | def pad_random_negative_tensor_given_length(positive_tensor, negative_padding_tensor, length=None):
24 |     assert positive_tensor.shape[0] + negative_padding_tensor.shape[0] == length
25 |     return torch.cat((positive_tensor, negative_padding_tensor), dim=0)
26 | 
27 | 
28 | def gather_tensors(tensor):
29 |     """
30 |     Performs all_gather operation on the provided tensors.
31 |     *** Warning ***: torch.distributed.all_gather has no gradient.
32 |     """
33 |     if not dist.is_dist_avail_and_initialized():
34 |         return torch.stack([tensor], dim=0)
35 | 
36 |     total = dist.get_world_size()
37 |     rank = torch.distributed.get_rank()
38 |     # gathered_normalized_img_emb = [torch.zeros_like(normalized_img_emb) for _ in range(total)]
39 |     # torch.distributed.all_gather(gathered_normalized_img_emb, normalized_img_emb)
40 | 
41 |     tensors_gather = [
42 |         torch.zeros_like(tensor)
43 |         for _ in range(total)
44 |     ]
45 |     torch.distributed.all_gather(tensors_gather, tensor, async_op=False)
46 | 
47 |     # need to do this to restore propagation of the gradients
48 |     tensors_gather[rank] = tensor
49 |     output = torch.stack(tensors_gather, dim=0)
50 |     return output
51 | 
52 | 
53 | def convert_to_roi_format(boxes):
54 |     concat_boxes = boxes.bbox
55 |     device, dtype = concat_boxes.device, concat_boxes.dtype
56 |     ids = torch.full((len(boxes), 1), 0, dtype=dtype, device=device)
57 |     rois = torch.cat([ids, concat_boxes], dim=1)
58 |     return rois


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/roi_heads/box_head/roi_box_predictors.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | from torch import nn
 3 | 
 4 | 
 5 | class FastRCNNPredictor(nn.Module):
 6 |     def __init__(self, config, pretrained=None):
 7 |         super(FastRCNNPredictor, self).__init__()
 8 | 
 9 |         stage_index = 4
10 |         stage2_relative_factor = 2 ** (stage_index - 1)
11 |         res2_out_channels = config.MODEL.RESNETS.RES2_OUT_CHANNELS
12 |         num_inputs = res2_out_channels * stage2_relative_factor
13 | 
14 |         num_classes = config.MODEL.ROI_BOX_HEAD.NUM_CLASSES
15 |         self.avgpool = nn.AvgPool2d(kernel_size=7, stride=7)
16 |         self.cls_score = nn.Linear(num_inputs, num_classes)
17 |         self.bbox_pred = nn.Linear(num_inputs, num_classes * 4)
18 | 
19 |         nn.init.normal_(self.cls_score.weight, mean=0, std=0.01)
20 |         nn.init.constant_(self.cls_score.bias, 0)
21 | 
22 |         nn.init.normal_(self.bbox_pred.weight, mean=0, std=0.001)
23 |         nn.init.constant_(self.bbox_pred.bias, 0)
24 | 
25 |     def forward(self, x):
26 |         x = self.avgpool(x)
27 |         x = x.view(x.size(0), -1)
28 |         cls_logit = self.cls_score(x)
29 |         bbox_pred = self.bbox_pred(x)
30 |         return cls_logit, bbox_pred
31 | 
32 | 
33 | class FPNPredictor(nn.Module):
34 |     def __init__(self, cfg):
35 |         super(FPNPredictor, self).__init__()
36 |         num_classes = cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES
37 |         representation_size = cfg.MODEL.ROI_BOX_HEAD.MLP_HEAD_DIM
38 | 
39 |         self.cls_score = nn.Linear(representation_size, num_classes)
40 |         self.bbox_pred = nn.Linear(representation_size, num_classes * 4)
41 | 
42 |         nn.init.normal_(self.cls_score.weight, std=0.01)
43 |         nn.init.normal_(self.bbox_pred.weight, std=0.001)
44 |         for l in [self.cls_score, self.bbox_pred]:
45 |             nn.init.constant_(l.bias, 0)
46 | 
47 |     def forward(self, x):
48 |         scores = self.cls_score(x)
49 |         bbox_deltas = self.bbox_pred(x)
50 | 
51 |         return scores, bbox_deltas
52 | 
53 | 
54 | _ROI_BOX_PREDICTOR = {
55 |     "FastRCNNPredictor": FastRCNNPredictor,
56 |     "FPNPredictor": FPNPredictor,
57 | }
58 | 
59 | 
60 | def make_roi_box_predictor(cfg):
61 |     func = _ROI_BOX_PREDICTOR[cfg.MODEL.ROI_BOX_HEAD.PREDICTOR]
62 |     return func(cfg)
63 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/roi_heads/mask_head/hourglass.py:
--------------------------------------------------------------------------------
 1 | from torch import nn
 2 | 
 3 | from maskrcnn_benchmark.modeling.make_layers import make_conv3x3
 4 | 
 5 | 
 6 | class Residual(nn.Module):
 7 |     def __init__(self, inp_dim, out_dim, use_gn=False):
 8 |         super(Residual, self).__init__()
 9 |         self.relu = nn.ReLU()
10 |         # self.bn1 = nn.BatchNorm2d(inp_dim)
11 |         self.conv1 = make_conv3x3(inp_dim, int(out_dim / 2), 1, use_relu=False, use_gn=use_gn)
12 |         # self.bn2 = nn.BatchNorm2d(int(out_dim / 2))
13 |         self.conv2 = make_conv3x3(int(out_dim / 2), int(out_dim / 2), 3, use_relu=False, use_gn=use_gn)
14 |         # self.bn3 = nn.BatchNorm2d(int(out_dim / 2))
15 |         self.conv3 = make_conv3x3(int(out_dim / 2), out_dim, 1, use_relu=False, use_gn=use_gn)
16 |         if inp_dim == out_dim:
17 |             self.need_skip = False
18 |         else:
19 |             self.need_skip = True
20 |             self.skip_layer = make_conv3x3(inp_dim, out_dim, 1, use_relu=False, use_gn=False)
21 | 
22 |     def forward(self, x):
23 |         if self.need_skip:
24 |             residual = self.skip_layer(x)
25 |         else:
26 |             residual = x
27 |         out = x
28 |         # out = self.bn1(out)
29 |         out = self.relu(out)
30 |         out = self.conv1(out)
31 |         # out = self.bn2(out)
32 |         out = self.relu(out)
33 |         out = self.conv2(out)
34 |         # out = self.bn3(out)
35 |         out = self.relu(out)
36 |         out = self.conv3(out)
37 |         out += residual
38 |         return out
39 | 
40 | 
41 | class Hourglass(nn.Module):
42 |     def __init__(self, n, f, gn=False, increase=0):
43 |         super(Hourglass, self).__init__()
44 |         nf = f + increase
45 |         self.up1 = Residual(f, f)
46 |         # Lower branch
47 |         self.pool1 = nn.MaxPool2d(2, 2)
48 |         self.low1 = Residual(f, nf)
49 |         self.n = n
50 |         # Recursive hourglass
51 |         if self.n > 1:
52 |             self.low2 = Hourglass(n-1, nf, gn=gn)
53 |         else:
54 |             self.low2 = Residual(nf, nf, gn)
55 |         self.low3 = Residual(nf, f, gn)
56 |         self.up2 = nn.Upsample(scale_factor=2, mode='nearest')
57 | 
58 |     def forward(self, x):
59 |         up1 = self.up1(x)
60 |         pool1 = self.pool1(x)
61 |         low1 = self.low1(pool1)
62 |         low2 = self.low2(low1)
63 |         low3 = self.low3(low2)
64 |         up2 = self.up2(low3)
65 |         return up1 + up2


--------------------------------------------------------------------------------
/maskrcnn_benchmark/data/datasets/evaluation/__init__.py:
--------------------------------------------------------------------------------
 1 | from maskrcnn_benchmark.data import datasets
 2 | 
 3 | from .coco import coco_evaluation
 4 | from .voc import voc_evaluation
 5 | from .vg import vg_evaluation
 6 | from .box_aug import im_detect_bbox_aug
 7 | from .od_to_grounding import od_to_grounding_evaluation
 8 | 
 9 | 
10 | def evaluate(dataset, predictions, output_folder, **kwargs):
11 |     """evaluate dataset using different methods based on dataset type.
12 |     Args:
13 |         dataset: Dataset object
14 |         predictions(list[BoxList]): each item in the list represents the
15 |             prediction results for one image.
16 |         output_folder: output folder, to save evaluation files or results.
17 |         **kwargs: other args.
18 |     Returns:
19 |         evaluation result
20 |     """
21 |     args = dict(
22 |         dataset=dataset, predictions=predictions, output_folder=output_folder, **kwargs
23 |     )
24 |     if isinstance(dataset, datasets.COCODataset) or isinstance(dataset, datasets.TSVDataset):
25 |         return coco_evaluation(**args)
26 |     # elif isinstance(dataset, datasets.VGTSVDataset):
27 |     #     return vg_evaluation(**args)
28 |     elif isinstance(dataset, datasets.PascalVOCDataset):
29 |         return voc_evaluation(**args)
30 |     elif isinstance(dataset, datasets.CocoDetectionTSV):
31 |         return od_to_grounding_evaluation(**args)
32 |     elif isinstance(dataset, datasets.LvisDetection):
33 |         pass
34 |     else:
35 |         dataset_name = dataset.__class__.__name__
36 |         raise NotImplementedError("Unsupported dataset type {}.".format(dataset_name))
37 | 
38 | 
39 | def evaluate_mdetr(dataset, predictions, output_folder, cfg):
40 |    
41 |     args = dict(
42 |         dataset=dataset, predictions=predictions, output_folder=output_folder, **kwargs
43 |     )
44 |     if isinstance(dataset, datasets.COCODataset) or isinstance(dataset, datasets.TSVDataset):
45 |         return coco_evaluation(**args)
46 |     # elif isinstance(dataset, datasets.VGTSVDataset):
47 |     #     return vg_evaluation(**args)
48 |     elif isinstance(dataset, datasets.PascalVOCDataset):
49 |         return voc_evaluation(**args)
50 |     elif isinstance(dataset, datasets.CocoDetectionTSV):
51 |         return od_to_grounding_evaluation(**args)
52 |     elif isinstance(dataset, datasets.LvisDetection):
53 |         pass
54 |     else:
55 |         dataset_name = dataset.__class__.__name__
56 |         raise NotImplementedError("Unsupported dataset type {}.".format(dataset_name))
57 | 


--------------------------------------------------------------------------------
/configs/pretrain/glip_Swin_T_O365.yaml:
--------------------------------------------------------------------------------
  1 | MODEL:
  2 |   META_ARCHITECTURE: "GeneralizedVLRCNN"
  3 |   WEIGHT: "swin_tiny_patch4_window7_224.pth"
  4 |   RPN_ONLY: True
  5 |   RPN_ARCHITECTURE: "VLDYHEAD"
  6 | 
  7 |   BACKBONE:
  8 |     CONV_BODY: "SWINT-FPN-RETINANET"
  9 |     OUT_CHANNELS: 256
 10 |     FREEZE_CONV_BODY_AT: -1
 11 | 
 12 |   LANGUAGE_BACKBONE:
 13 |     FREEZE: False
 14 |     MODEL_TYPE: "bert-base-uncased" # "roberta-base", "clip"
 15 |     MASK_SPECIAL: False
 16 | 
 17 |   RPN:
 18 |     USE_FPN: True
 19 |     ANCHOR_SIZES: (64, 128, 256, 512, 1024)
 20 |     ANCHOR_STRIDE: (8, 16, 32, 64, 128)
 21 |     ASPECT_RATIOS: (1.0,)
 22 |     SCALES_PER_OCTAVE: 1
 23 | 
 24 |   DYHEAD:
 25 |     CHANNELS: 256
 26 |     NUM_CONVS: 6
 27 |     USE_GN: True
 28 |     USE_DYRELU: True
 29 |     USE_DFCONV: True
 30 |     USE_DYFUSE: True
 31 |     TOPK: 9 # topk for selecting candidate positive samples from each level
 32 |     SCORE_AGG: "MEAN"
 33 |     LOG_SCALE: 0.0
 34 | 
 35 |     FUSE_CONFIG:
 36 |       EARLY_FUSE_ON: True
 37 |       TYPE: "MHA-B"
 38 |       USE_CLASSIFICATION_LOSS: False
 39 |       USE_TOKEN_LOSS: False
 40 |       USE_CONTRASTIVE_ALIGN_LOSS: False
 41 |       CONTRASTIVE_HIDDEN_DIM: 64
 42 |       USE_DOT_PRODUCT_TOKEN_LOSS: True
 43 |       USE_FUSED_FEATURES_DOT_PRODUCT: True
 44 |       USE_LAYER_SCALE: True
 45 |       CLAMP_MIN_FOR_UNDERFLOW: True
 46 |       CLAMP_MAX_FOR_OVERFLOW: True
 47 |       CLAMP_BERTATTN_MIN_FOR_UNDERFLOW: True
 48 |       CLAMP_BERTATTN_MAX_FOR_OVERFLOW: True
 49 |       CLAMP_DOT_PRODUCT: True
 50 |            
 51 |     USE_CHECKPOINT: True
 52 | 
 53 | TEST:
 54 |   DURING_TRAINING: False
 55 |   IMS_PER_BATCH: 64
 56 | 
 57 | # use for grounding model
 58 | DATASETS:
 59 |   TRAIN: ("object365_dt_train", )
 60 |   TEST: ("coco_2017_val", )
 61 |   DISABLE_SHUFFLE: False
 62 |   ADD_DET_PROMPT: False
 63 |   RANDOM_SAMPLE_NEG: 85
 64 |   CONTROL_PROB: (0.0, 0.0, 0.5, 0.0)
 65 | 
 66 |   SEPARATION_TOKENS: ". "
 67 | 
 68 | INPUT:
 69 |   PIXEL_MEAN: [ 103.530, 116.280, 123.675 ]
 70 |   PIXEL_STD: [ 57.375, 57.120, 58.395 ]
 71 |   MIN_SIZE_TRAIN: 800
 72 |   MAX_SIZE_TRAIN: 1333
 73 |   MIN_SIZE_TEST: 800
 74 |   MAX_SIZE_TEST: 1333
 75 | 
 76 | AUGMENT:
 77 |   MULT_MIN_SIZE_TRAIN: (480,560,640,720,800)
 78 | 
 79 | DATALOADER:
 80 |   SIZE_DIVISIBILITY: 32
 81 | 
 82 | SOLVER:
 83 |   OPTIMIZER: ADAMW
 84 |   BASE_LR: 0.0001
 85 |   LANG_LR: 0.00001
 86 |   WEIGHT_DECAY: 0.0001
 87 |   STEPS: (0.67, 0.89)
 88 |   MAX_EPOCH: 30
 89 |   IMS_PER_BATCH: 64
 90 |   WARMUP_ITERS: 2000
 91 |   WARMUP_FACTOR: 0.001
 92 |   USE_AMP: True
 93 |   MODEL_EMA: 0.999
 94 |   FIND_UNUSED_PARAMETERS: False
 95 | 
 96 |   CLIP_GRADIENTS:
 97 |     ENABLED: True
 98 |     CLIP_TYPE: "full_model"
 99 |     CLIP_VALUE: 1.0
100 |     NORM_TYPE: 2.0


--------------------------------------------------------------------------------
/configs/pretrain/glip_A_Swin_T_O365.yaml:
--------------------------------------------------------------------------------
  1 | MODEL:
  2 |   META_ARCHITECTURE: "GeneralizedVLRCNN"
  3 |   WEIGHT: "swin_tiny_patch4_window7_224.pth"
  4 |   RPN_ONLY: True
  5 |   RPN_ARCHITECTURE: "VLDYHEAD"
  6 | 
  7 |   BACKBONE:
  8 |     CONV_BODY: "SWINT-FPN-RETINANET"
  9 |     OUT_CHANNELS: 256
 10 |     FREEZE_CONV_BODY_AT: -1
 11 | 
 12 |   LANGUAGE_BACKBONE:
 13 |     FREEZE: False
 14 |     MODEL_TYPE: "bert-base-uncased" # "roberta-base", "clip"
 15 |     MASK_SPECIAL: False
 16 | 
 17 |   RPN:
 18 |     USE_FPN: True
 19 |     ANCHOR_SIZES: (64, 128, 256, 512, 1024)
 20 |     ANCHOR_STRIDE: (8, 16, 32, 64, 128)
 21 |     ASPECT_RATIOS: (1.0,)
 22 |     SCALES_PER_OCTAVE: 1
 23 | 
 24 |   DYHEAD:
 25 |     CHANNELS: 256
 26 |     NUM_CONVS: 6
 27 |     USE_GN: True
 28 |     USE_DYRELU: True
 29 |     USE_DFCONV: True
 30 |     USE_DYFUSE: True
 31 |     TOPK: 9 # topk for selecting candidate positive samples from each level
 32 |     SCORE_AGG: "MEAN"
 33 |     LOG_SCALE: 0.0
 34 | 
 35 |     FUSE_CONFIG:
 36 |       EARLY_FUSE_ON: False
 37 |       TYPE: "MHA-B"
 38 |       USE_CLASSIFICATION_LOSS: False
 39 |       USE_TOKEN_LOSS: False
 40 |       USE_CONTRASTIVE_ALIGN_LOSS: False
 41 |       CONTRASTIVE_HIDDEN_DIM: 64
 42 |       USE_DOT_PRODUCT_TOKEN_LOSS: True
 43 |       USE_FUSED_FEATURES_DOT_PRODUCT: False
 44 |       USE_LAYER_SCALE: True
 45 |       CLAMP_MIN_FOR_UNDERFLOW: True
 46 |       CLAMP_MAX_FOR_OVERFLOW: True
 47 |       CLAMP_BERTATTN_MIN_FOR_UNDERFLOW: True
 48 |       CLAMP_BERTATTN_MAX_FOR_OVERFLOW: True
 49 |       CLAMP_DOT_PRODUCT: True
 50 |            
 51 |     USE_CHECKPOINT: True
 52 | 
 53 | TEST:
 54 |   DURING_TRAINING: False
 55 |   IMS_PER_BATCH: 64
 56 | 
 57 | # use for grounding model
 58 | DATASETS:
 59 |   TRAIN: ("object365_dt_train", )
 60 |   TEST: ("coco_2017_val", )
 61 |   DISABLE_SHUFFLE: False
 62 |   ADD_DET_PROMPT: False
 63 |   RANDOM_SAMPLE_NEG: 85
 64 |   CONTROL_PROB: (0.0, 0.0, 0.5, 0.0)
 65 | 
 66 |   SEPARATION_TOKENS: ". "
 67 | 
 68 | INPUT:
 69 |   PIXEL_MEAN: [ 103.530, 116.280, 123.675 ]
 70 |   PIXEL_STD: [ 57.375, 57.120, 58.395 ]
 71 |   MIN_SIZE_TRAIN: 800
 72 |   MAX_SIZE_TRAIN: 1333
 73 |   MIN_SIZE_TEST: 800
 74 |   MAX_SIZE_TEST: 1333
 75 | 
 76 | AUGMENT:
 77 |   MULT_MIN_SIZE_TRAIN: (480,560,640,720,800)
 78 | 
 79 | DATALOADER:
 80 |   SIZE_DIVISIBILITY: 32
 81 | 
 82 | SOLVER:
 83 |   OPTIMIZER: ADAMW
 84 |   BASE_LR: 0.0001
 85 |   LANG_LR: 0.00001
 86 |   WEIGHT_DECAY: 0.0001
 87 |   STEPS: (0.67, 0.89)
 88 |   MAX_EPOCH: 30
 89 |   IMS_PER_BATCH: 64
 90 |   WARMUP_ITERS: 2000
 91 |   WARMUP_FACTOR: 0.001
 92 |   USE_AMP: True
 93 |   MODEL_EMA: 0.999
 94 |   FIND_UNUSED_PARAMETERS: False
 95 | 
 96 |   CLIP_GRADIENTS:
 97 |     ENABLED: True
 98 |     CLIP_TYPE: "full_model"
 99 |     CLIP_VALUE: 1.0
100 |     NORM_TYPE: 2.0


--------------------------------------------------------------------------------
/maskrcnn_benchmark/structures/image_list.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | from __future__ import division
 3 | 
 4 | import torch
 5 | 
 6 | 
 7 | class ImageList(object):
 8 |     """
 9 |     Structure that holds a list of images (of possibly
10 |     varying sizes) as a single tensor.
11 |     This works by padding the images to the same size,
12 |     and storing in a field the original sizes of each image
13 |     """
14 | 
15 |     def __init__(self, tensors, image_sizes):
16 |         """
17 |         Arguments:
18 |             tensors (tensor)
19 |             image_sizes (list[tuple[int, int]])
20 |         """
21 |         self.tensors = tensors
22 |         self.image_sizes = image_sizes
23 | 
24 |     def to(self, *args, **kwargs):
25 |         cast_tensor = self.tensors.to(*args, **kwargs)
26 |         return ImageList(cast_tensor, self.image_sizes)
27 | 
28 | 
29 | def to_image_list(tensors, size_divisible=0):
30 |     """
31 |     tensors can be an ImageList, a torch.Tensor or
32 |     an iterable of Tensors. It can't be a numpy array.
33 |     When tensors is an iterable of Tensors, it pads
34 |     the Tensors with zeros so that they have the same
35 |     shape
36 |     """
37 |     if isinstance(tensors, torch.Tensor) and size_divisible > 0:
38 |         tensors = [tensors]
39 | 
40 |     if isinstance(tensors, ImageList):
41 |         return tensors
42 |     elif isinstance(tensors, torch.Tensor):
43 |         # single tensor shape can be inferred
44 |         assert tensors.dim() == 4
45 |         image_sizes = [tensor.shape[-2:] for tensor in tensors]
46 |         return ImageList(tensors, image_sizes)
47 |     elif isinstance(tensors, (tuple, list)):
48 |         max_size = tuple(max(s) for s in zip(*[img.shape for img in tensors]))
49 | 
50 |         # TODO Ideally, just remove this and let me model handle arbitrary
51 |         # input sizs
52 |         if size_divisible > 0:
53 |             import math
54 | 
55 |             stride = size_divisible
56 |             max_size = list(max_size)
57 |             max_size[1] = int(math.ceil(max_size[1] / stride) * stride)
58 |             max_size[2] = int(math.ceil(max_size[2] / stride) * stride)
59 |             max_size = tuple(max_size)
60 | 
61 |         batch_shape = (len(tensors),) + max_size
62 |         batched_imgs = tensors[0].new(*batch_shape).zero_()
63 |         for img, pad_img in zip(tensors, batched_imgs):
64 |             pad_img[: img.shape[0], : img.shape[1], : img.shape[2]].copy_(img)
65 | 
66 |         image_sizes = [im.shape[-2:] for im in tensors]
67 | 
68 |         return ImageList(batched_imgs, image_sizes)
69 |     else:
70 |         raise TypeError("Unsupported type for to_image_list: {}".format(type(tensors)))
71 | 


--------------------------------------------------------------------------------
/configs/pretrain/glip_Swin_T_O365_GoldG.yaml:
--------------------------------------------------------------------------------
  1 | MODEL:
  2 |   META_ARCHITECTURE: "GeneralizedVLRCNN"
  3 |   WEIGHT: "swin_tiny_patch4_window7_224.pth"
  4 |   RPN_ONLY: True
  5 |   RPN_ARCHITECTURE: "VLDYHEAD"
  6 | 
  7 |   BACKBONE:
  8 |     CONV_BODY: "SWINT-FPN-RETINANET"
  9 |     OUT_CHANNELS: 256
 10 |     FREEZE_CONV_BODY_AT: -1
 11 | 
 12 |   LANGUAGE_BACKBONE:
 13 |     FREEZE: False
 14 |     MODEL_TYPE: "bert-base-uncased" # "roberta-base", "clip"
 15 |     MASK_SPECIAL: False
 16 | 
 17 |   RPN:
 18 |     USE_FPN: True
 19 |     ANCHOR_SIZES: (64, 128, 256, 512, 1024)
 20 |     ANCHOR_STRIDE: (8, 16, 32, 64, 128)
 21 |     ASPECT_RATIOS: (1.0,)
 22 |     SCALES_PER_OCTAVE: 1
 23 | 
 24 |   DYHEAD:
 25 |     CHANNELS: 256
 26 |     NUM_CONVS: 6
 27 |     USE_GN: True
 28 |     USE_DYRELU: True
 29 |     USE_DFCONV: True
 30 |     USE_DYFUSE: True
 31 |     TOPK: 9 # topk for selecting candidate positive samples from each level
 32 |     SCORE_AGG: "MEAN"
 33 |     LOG_SCALE: 0.0
 34 | 
 35 |     FUSE_CONFIG:
 36 |       EARLY_FUSE_ON: True
 37 |       TYPE: "MHA-B"
 38 |       USE_CLASSIFICATION_LOSS: False
 39 |       USE_TOKEN_LOSS: False
 40 |       USE_CONTRASTIVE_ALIGN_LOSS: False
 41 |       CONTRASTIVE_HIDDEN_DIM: 64
 42 |       USE_DOT_PRODUCT_TOKEN_LOSS: True
 43 |       USE_FUSED_FEATURES_DOT_PRODUCT: True
 44 |       USE_LAYER_SCALE: True
 45 |       CLAMP_MIN_FOR_UNDERFLOW: True
 46 |       CLAMP_MAX_FOR_OVERFLOW: True
 47 |       CLAMP_BERTATTN_MIN_FOR_UNDERFLOW: True
 48 |       CLAMP_BERTATTN_MAX_FOR_OVERFLOW: True
 49 |       CLAMP_DOT_PRODUCT: True
 50 |            
 51 |     USE_CHECKPOINT: True
 52 | 
 53 | TEST:
 54 |   DURING_TRAINING: False
 55 |   IMS_PER_BATCH: 64
 56 | 
 57 | # use for grounding model
 58 | DATASETS:
 59 |   TRAIN: ("object365_dt_train", "mixed_train_no_coco", "flickr30k_train", )
 60 |   TEST: ("coco_2017_val", )
 61 |   DISABLE_SHUFFLE: False
 62 |   ADD_DET_PROMPT: False
 63 |   RANDOM_SAMPLE_NEG: 85
 64 |   CONTROL_PROB: (0.0, 0.0, 0.5, 0.0)
 65 | 
 66 |   SEPARATION_TOKENS: ". "
 67 | 
 68 | INPUT:
 69 |   PIXEL_MEAN: [ 103.530, 116.280, 123.675 ]
 70 |   PIXEL_STD: [ 57.375, 57.120, 58.395 ]
 71 |   MIN_SIZE_TRAIN: 800
 72 |   MAX_SIZE_TRAIN: 1333
 73 |   MIN_SIZE_TEST: 800
 74 |   MAX_SIZE_TEST: 1333
 75 | 
 76 | AUGMENT:
 77 |   MULT_MIN_SIZE_TRAIN: (480,560,640,720,800)
 78 | 
 79 | DATALOADER:
 80 |   SIZE_DIVISIBILITY: 32
 81 | 
 82 | SOLVER:
 83 |   OPTIMIZER: ADAMW
 84 |   BASE_LR: 0.0001
 85 |   LANG_LR: 0.00001
 86 |   WEIGHT_DECAY: 0.0001
 87 |   STEPS: (0.67, 0.89)
 88 |   MAX_EPOCH: 30
 89 |   IMS_PER_BATCH: 64
 90 |   WARMUP_ITERS: 2000
 91 |   WARMUP_FACTOR: 0.001
 92 |   USE_AMP: True
 93 |   MODEL_EMA: 0.999
 94 |   FIND_UNUSED_PARAMETERS: False
 95 | 
 96 |   CLIP_GRADIENTS:
 97 |     ENABLED: True
 98 |     CLIP_TYPE: "full_model"
 99 |     CLIP_VALUE: 1.0
100 |     NORM_TYPE: 2.0


--------------------------------------------------------------------------------
/SECURITY.md:
--------------------------------------------------------------------------------
 1 | <!-- BEGIN MICROSOFT SECURITY.MD V0.0.5 BLOCK -->
 2 | 
 3 | ## Security
 4 | 
 5 | Microsoft takes the security of our software products and services seriously, which includes all source code repositories managed through our GitHub organizations, which include [Microsoft](https://github.com/Microsoft), [Azure](https://github.com/Azure), [DotNet](https://github.com/dotnet), [AspNet](https://github.com/aspnet), [Xamarin](https://github.com/xamarin), and [our GitHub organizations](https://opensource.microsoft.com/).
 6 | 
 7 | If you believe you have found a security vulnerability in any Microsoft-owned repository that meets [Microsoft's definition of a security vulnerability](https://docs.microsoft.com/en-us/previous-versions/tn-archive/cc751383(v=technet.10)), please report it to us as described below.
 8 | 
 9 | ## Reporting Security Issues
10 | 
11 | **Please do not report security vulnerabilities through public GitHub issues.**
12 | 
13 | Instead, please report them to the Microsoft Security Response Center (MSRC) at [https://msrc.microsoft.com/create-report](https://msrc.microsoft.com/create-report).
14 | 
15 | If you prefer to submit without logging in, send email to [secure@microsoft.com](mailto:secure@microsoft.com).  If possible, encrypt your message with our PGP key; please download it from the [Microsoft Security Response Center PGP Key page](https://www.microsoft.com/en-us/msrc/pgp-key-msrc).
16 | 
17 | You should receive a response within 24 hours. If for some reason you do not, please follow up via email to ensure we received your original message. Additional information can be found at [microsoft.com/msrc](https://www.microsoft.com/msrc). 
18 | 
19 | Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue:
20 | 
21 |   * Type of issue (e.g. buffer overflow, SQL injection, cross-site scripting, etc.)
22 |   * Full paths of source file(s) related to the manifestation of the issue
23 |   * The location of the affected source code (tag/branch/commit or direct URL)
24 |   * Any special configuration required to reproduce the issue
25 |   * Step-by-step instructions to reproduce the issue
26 |   * Proof-of-concept or exploit code (if possible)
27 |   * Impact of the issue, including how an attacker might exploit the issue
28 | 
29 | This information will help us triage your report more quickly.
30 | 
31 | If you are reporting for a bug bounty, more complete reports can contribute to a higher bounty award. Please visit our [Microsoft Bug Bounty Program](https://microsoft.com/msrc/bounty) page for more details about our active programs.
32 | 
33 | ## Preferred Languages
34 | 
35 | We prefer all communications to be in English.
36 | 
37 | ## Policy
38 | 
39 | Microsoft follows the principle of [Coordinated Vulnerability Disclosure](https://www.microsoft.com/en-us/msrc/cvd).
40 | 
41 | <!-- END MICROSOFT SECURITY.MD BLOCK -->


--------------------------------------------------------------------------------
/maskrcnn_benchmark/csrc/cpu/nms_cpu.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #include "cpu/vision.h"
 3 | 
 4 | 
 5 | template <typename scalar_t>
 6 | at::Tensor nms_cpu_kernel(const at::Tensor& dets,
 7 |                           const at::Tensor& scores,
 8 |                           const float threshold) {
 9 |   AT_ASSERTM(!dets.device().is_cuda(), "dets must be a CPU tensor");
10 |   AT_ASSERTM(!scores.device().is_cuda(), "scores must be a CPU tensor");
11 |   AT_ASSERTM(dets.type() == scores.type(), "dets should have the same type as scores");
12 | 
13 |   if (dets.numel() == 0) {
14 |     return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU));
15 |   }
16 | 
17 |   auto x1_t = dets.select(1, 0).contiguous();
18 |   auto y1_t = dets.select(1, 1).contiguous();
19 |   auto x2_t = dets.select(1, 2).contiguous();
20 |   auto y2_t = dets.select(1, 3).contiguous();
21 | 
22 |   at::Tensor areas_t = (x2_t - x1_t + 1) * (y2_t - y1_t + 1);
23 | 
24 |   auto order_t = std::get<1>(scores.sort(0, /* descending=*/true));
25 | 
26 |   auto ndets = dets.size(0);
27 |   at::Tensor suppressed_t = at::zeros({ndets}, dets.options().dtype(at::kByte).device(at::kCPU));
28 | 
29 |   auto suppressed = suppressed_t.data_ptr<uint8_t>();
30 |   auto order = order_t.data_ptr<int64_t>();
31 |   auto x1 = x1_t.data_ptr<scalar_t>();
32 |   auto y1 = y1_t.data_ptr<scalar_t>();
33 |   auto x2 = x2_t.data_ptr<scalar_t>();
34 |   auto y2 = y2_t.data_ptr<scalar_t>();
35 |   auto areas = areas_t.data_ptr<scalar_t>();
36 | 
37 |   for (int64_t _i = 0; _i < ndets; _i++) {
38 |     auto i = order[_i];
39 |     if (suppressed[i] == 1)
40 |       continue;
41 |     auto ix1 = x1[i];
42 |     auto iy1 = y1[i];
43 |     auto ix2 = x2[i];
44 |     auto iy2 = y2[i];
45 |     auto iarea = areas[i];
46 | 
47 |     for (int64_t _j = _i + 1; _j < ndets; _j++) {
48 |       auto j = order[_j];
49 |       if (suppressed[j] == 1)
50 |         continue;
51 |       auto xx1 = std::max(ix1, x1[j]);
52 |       auto yy1 = std::max(iy1, y1[j]);
53 |       auto xx2 = std::min(ix2, x2[j]);
54 |       auto yy2 = std::min(iy2, y2[j]);
55 | 
56 |       auto w = std::max(static_cast<scalar_t>(0), xx2 - xx1 + 1);
57 |       auto h = std::max(static_cast<scalar_t>(0), yy2 - yy1 + 1);
58 |       auto inter = w * h;
59 |       auto ovr = inter / (iarea + areas[j] - inter);
60 |       if (ovr >= threshold)
61 |         suppressed[j] = 1;
62 |    }
63 |   }
64 |   return at::nonzero(suppressed_t == 0).squeeze(1);
65 | }
66 | 
67 | at::Tensor nms_cpu(const at::Tensor& dets,
68 |                const at::Tensor& scores,
69 |                const float threshold) {
70 |   at::Tensor result;
71 |   AT_DISPATCH_FLOATING_TYPES(dets.scalar_type(), "nms", [&] {
72 |     result = nms_cpu_kernel<scalar_t>(dets, scores, threshold);
73 |   });
74 |   return result;
75 | }
76 | 


--------------------------------------------------------------------------------
/tools/cityscapes/instances2dict_with_polygons.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | #
 3 | # Convert instances from png files to a dictionary
 4 | # This files is created according to https://github.com/facebookresearch/Detectron/issues/111
 5 | 
 6 | from __future__ import print_function, absolute_import, division
 7 | import os, sys
 8 | 
 9 | sys.path.append( os.path.normpath( os.path.join( os.path.dirname( __file__ ) , '..' , 'helpers' ) ) )
10 | from csHelpers import *
11 | 
12 | # Cityscapes imports
13 | from cityscapesscripts.evaluation.instance import *
14 | from cityscapesscripts.helpers.csHelpers import *
15 | import cv2
16 | from maskrcnn_benchmark.utils import cv2_util
17 | 
18 | 
19 | def instances2dict_with_polygons(imageFileList, verbose=False):
20 |     imgCount     = 0
21 |     instanceDict = {}
22 | 
23 |     if not isinstance(imageFileList, list):
24 |         imageFileList = [imageFileList]
25 | 
26 |     if verbose:
27 |         print("Processing {} images...".format(len(imageFileList)))
28 | 
29 |     for imageFileName in imageFileList:
30 |         # Load image
31 |         img = Image.open(imageFileName)
32 | 
33 |         # Image as numpy array
34 |         imgNp = np.array(img)
35 | 
36 |         # Initialize label categories
37 |         instances = {}
38 |         for label in labels:
39 |             instances[label.name] = []
40 | 
41 |         # Loop through all instance ids in instance image
42 |         for instanceId in np.unique(imgNp):
43 |             if instanceId < 1000:
44 |                 continue
45 |             instanceObj = Instance(imgNp, instanceId)
46 |             instanceObj_dict = instanceObj.toDict()
47 | 
48 |             #instances[id2label[instanceObj.labelID].name].append(instanceObj.toDict())
49 |             if id2label[instanceObj.labelID].hasInstances:
50 |                 mask = (imgNp == instanceId).astype(np.uint8)
51 |                 contour, hier = cv2_util.findContours(
52 |                     mask.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
53 | 
54 |                 polygons = [c.reshape(-1).tolist() for c in contour]
55 |                 instanceObj_dict['contours'] = polygons
56 | 
57 |             instances[id2label[instanceObj.labelID].name].append(instanceObj_dict)
58 | 
59 |         imgKey = os.path.abspath(imageFileName)
60 |         instanceDict[imgKey] = instances
61 |         imgCount += 1
62 | 
63 |         if verbose:
64 |             print("\rImages Processed: {}".format(imgCount), end=' ')
65 |             sys.stdout.flush()
66 | 
67 |     if verbose:
68 |         print("")
69 | 
70 |     return instanceDict
71 | 
72 | def main(argv):
73 |     fileList = []
74 |     if (len(argv) > 2):
75 |         for arg in argv:
76 |             if ("png" in arg):
77 |                 fileList.append(arg)
78 |     instances2dict_with_polygons(fileList, True)
79 | 
80 | if __name__ == "__main__":
81 |     main(sys.argv[1:])
82 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/backbone/ops.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | import torch
 3 | import torch.nn as nn
 4 | import torch.nn.functional as F
 5 | 
 6 | 
 7 | def conv7x7(in_planes, out_planes, stride=1, groups=1, dilation=1):
 8 |     """7x7 convolution with padding"""
 9 |     return nn.Conv2d(in_planes, out_planes, kernel_size=7, stride=stride,
10 |                      padding=3*dilation, groups=groups, bias=False, dilation=dilation)
11 | 
12 | 
13 | def conv5x5(in_planes, out_planes, stride=1, groups=1, dilation=1):
14 |     """5x5 convolution with padding"""
15 |     return nn.Conv2d(in_planes, out_planes, kernel_size=5, stride=stride,
16 |                      padding=2*dilation, groups=groups, bias=False, dilation=dilation)
17 | 
18 | 
19 | def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1):
20 |     """3x3 convolution with padding"""
21 |     return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
22 |                      padding=dilation, groups=groups, bias=False, dilation=dilation)
23 | 
24 | 
25 | def conv1x1(in_planes, out_planes, stride=1):
26 |     """1x1 convolution"""
27 |     return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)
28 | 
29 | 
30 | def maxpool(**kwargs):
31 |     return nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
32 | 
33 | 
34 | def avgpool(**kwargs):
35 |     return nn.AvgPool2d(kernel_size=3, stride=2, padding=1)
36 | 
37 | def dropout(prob):
38 |     return nn.Dropout(prob)
39 | 
40 | 
41 | conv3x3sep = lambda i, o, s=1: conv3x3(i, o, s, groups=i)
42 | conv3x3g2 = lambda i, o, s=1: conv3x3(i, o, s, groups=2)
43 | conv3x3g4 = lambda i, o, s=1: conv3x3(i, o, s, groups=4)
44 | conv3x3g8 = lambda i, o, s=1: conv3x3(i, o, s, groups=8)
45 | conv3x3dw = lambda i, o, s=1: conv3x3(i, o, s, groups=i)
46 | 
47 | conv3x3d2 = lambda i, o, s=1: conv3x3(i, o, s, dilation=2)
48 | conv3x3d3 = lambda i, o, s=1: conv3x3(i, o, s, dilation=3)
49 | conv3x3d4 = lambda i, o, s=1: conv3x3(i, o, s, dilation=4)
50 | 
51 | 
52 | conv5x5sep = lambda i, o, s=1: conv5x5(i, o, s, groups=i)
53 | conv5x5g2 = lambda i, o, s=1: conv5x5(i, o, s, groups=2)
54 | conv5x5g4 = lambda i, o, s=1: conv5x5(i, o, s, groups=4)
55 | conv5x5g8 = lambda i, o, s=1: conv5x5(i, o, s, groups=8)
56 | conv5x5dw = lambda i, o, s=1: conv5x5(i, o, s, groups=i)
57 | 
58 | 
59 | conv5x5d2 = lambda i, o, s=1: conv5x5(i, o, s, dilation=2)
60 | conv5x5d3 = lambda i, o, s=1: conv5x5(i, o, s, dilation=3)
61 | conv5x5d4 = lambda i, o, s=1: conv5x5(i, o, s, dilation=4)
62 | 
63 | conv7x7sep = lambda i, o, s=1: conv7x7(i, o, s, groups=i)
64 | conv7x7g2 = lambda i, o, s=1: conv7x7(i, o, s, groups=2)
65 | conv7x7g4 = lambda i, o, s=1: conv7x7(i, o, s, groups=4)
66 | conv7x7g8 = lambda i, o, s=1: conv7x7(i, o, s, groups=8)
67 | conv7x7dw = lambda i, o, s=1: conv7x7(i, o, s, groups=i)
68 | 
69 | conv7x7d2 = lambda i, o, s=1: conv7x7(i, o, s, dilation=2)
70 | conv7x7d3 = lambda i, o, s=1: conv7x7(i, o, s, dilation=3)
71 | conv7x7d4 = lambda i, o, s=1: conv7x7(i, o, s, dilation=4)


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/balanced_positive_negative_sampler.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import torch
 3 | 
 4 | 
 5 | class BalancedPositiveNegativeSampler(object):
 6 |     """
 7 |     This class samples batches, ensuring that they contain a fixed proportion of positives
 8 |     """
 9 | 
10 |     def __init__(self, batch_size_per_image, positive_fraction):
11 |         """
12 |         Arguments:
13 |             batch_size_per_image (int): number of elements to be selected per image
14 |             positive_fraction (float): percentace of positive elements per batch
15 |         """
16 |         self.batch_size_per_image = batch_size_per_image
17 |         self.positive_fraction = positive_fraction
18 | 
19 |     def __call__(self, matched_idxs):
20 |         """
21 |         Arguments:
22 |             matched idxs: list of tensors containing -1, 0 or positive values.
23 |                 Each tensor corresponds to a specific image.
24 |                 -1 values are ignored, 0 are considered as negatives and > 0 as
25 |                 positives.
26 | 
27 |         Returns:
28 |             pos_idx (list[tensor])
29 |             neg_idx (list[tensor])
30 | 
31 |         Returns two lists of binary masks for each image.
32 |         The first list contains the positive elements that were selected,
33 |         and the second list the negative example.
34 |         """
35 |         pos_idx = []
36 |         neg_idx = []
37 |         for matched_idxs_per_image in matched_idxs:
38 |             positive = torch.nonzero(matched_idxs_per_image >= 1).squeeze(1)
39 |             negative = torch.nonzero(matched_idxs_per_image == 0).squeeze(1)
40 | 
41 |             num_pos = int(self.batch_size_per_image * self.positive_fraction)
42 |             # protect against not enough positive examples
43 |             num_pos = min(positive.numel(), num_pos)
44 |             num_neg = self.batch_size_per_image - num_pos
45 |             # protect against not enough negative examples
46 |             num_neg = min(negative.numel(), num_neg)
47 | 
48 |             # randomly select positive and negative examples
49 |             perm1 = torch.randperm(positive.numel(), device=positive.device)[:num_pos]
50 |             perm2 = torch.randperm(negative.numel(), device=negative.device)[:num_neg]
51 | 
52 |             pos_idx_per_image = positive[perm1]
53 |             neg_idx_per_image = negative[perm2]
54 | 
55 |             # create binary mask from indices
56 |             pos_idx_per_image_mask = torch.zeros_like(
57 |                 matched_idxs_per_image, dtype=torch.bool
58 |             )
59 |             neg_idx_per_image_mask = torch.zeros_like(
60 |                 matched_idxs_per_image, dtype=torch.bool
61 |             )
62 |             pos_idx_per_image_mask[pos_idx_per_image] = 1
63 |             neg_idx_per_image_mask[neg_idx_per_image] = 1
64 | 
65 |             pos_idx.append(pos_idx_per_image_mask)
66 |             neg_idx.append(neg_idx_per_image_mask)
67 | 
68 |         return pos_idx, neg_idx
69 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | """
 3 | Miscellaneous utility functions
 4 | """
 5 | 
 6 | import torch
 7 | 
 8 | 
 9 | def cat(tensors, dim=0):
10 |     """
11 |     Efficient version of torch.cat that avoids a copy if there is only a single element in a list
12 |     """
13 |     assert isinstance(tensors, (list, tuple))
14 |     if len(tensors) == 1:
15 |         return tensors[0]
16 |     return torch.cat(tensors, dim)
17 | 
18 | 
19 | def permute_and_flatten(layer, N, A, C, H, W):
20 |     layer = layer.view(N, -1, C, H, W)
21 |     layer = layer.permute(0, 3, 4, 1, 2)
22 |     layer = layer.reshape(N, -1, C)
23 |     return layer
24 | 
25 | 
26 | def concat_box_prediction_layers(box_regression, box_cls=None, token_logits=None):
27 |     box_regression_flattened = []
28 |     box_cls_flattened = []
29 |     token_logit_flattened = []
30 | 
31 |     # for each feature level, permute the outputs to make them be in the
32 |     # same format as the labels. Note that the labels are computed for
33 |     # all feature levels concatenated, so we keep the same representation
34 |     # for the objectness and the box_regression
35 |     for box_cls_per_level, box_regression_per_level in zip(
36 |             box_cls, box_regression
37 |     ):
38 |         N, AxC, H, W = box_cls_per_level.shape
39 |         Ax4 = box_regression_per_level.shape[1]
40 |         A = Ax4 // 4
41 |         C = AxC // A
42 |         box_cls_per_level = permute_and_flatten(
43 |             box_cls_per_level, N, A, C, H, W
44 |         )
45 |         box_cls_flattened.append(box_cls_per_level)
46 | 
47 |         box_regression_per_level = permute_and_flatten(
48 |             box_regression_per_level, N, A, 4, H, W
49 |         )
50 |         box_regression_flattened.append(box_regression_per_level)
51 | 
52 |     if token_logits is not None:
53 |         for token_logit_per_level in token_logits:
54 |             N, AXT, H, W = token_logit_per_level.shape
55 |             T = AXT // A
56 |             token_logit_per_level = permute_and_flatten(
57 |                 token_logit_per_level, N, A, T, H, W
58 |             )
59 |             token_logit_flattened.append(token_logit_per_level)
60 | 
61 |     # concatenate on the first dimension (representing the feature levels), to
62 |     # take into account the way the labels were generated (with all feature maps
63 |     # being concatenated as well)
64 |     box_cls = cat(box_cls_flattened, dim=1).reshape(-1, C)
65 |     box_regression = cat(box_regression_flattened, dim=1).reshape(-1, 4)
66 | 
67 |     token_logits_stacked = None
68 |     if token_logits is not None:
69 |         # stacked
70 |         token_logits_stacked = cat(token_logit_flattened, dim=1)
71 | 
72 |     return box_regression, box_cls, token_logits_stacked
73 | 
74 | 
75 | def round_channels(channels, divisor=8):
76 |     rounded_channels = max(int(channels + divisor / 2.0) // divisor * divisor, divisor)
77 |     if float(rounded_channels) < 0.9 * channels:
78 |         rounded_channels += divisor
79 |     return rounded_channels
80 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/data/samplers/distributed.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | # Code is copy-pasted exactly as in torch.utils.data.distributed.
 3 | # FIXME remove this once c10d fixes the bug it has
 4 | import math
 5 | import torch
 6 | import torch.distributed as dist
 7 | from torch.utils.data.sampler import Sampler
 8 | 
 9 | from maskrcnn_benchmark.utils.comm import shared_random_seed
10 | 
11 | 
12 | class DistributedSampler(Sampler):
13 |     """Sampler that restricts data loading to a subset of the dataset.
14 |     It is especially useful in conjunction with
15 |     :class:`torch.nn.parallel.DistributedDataParallel`. In such case, each
16 |     process can pass a DistributedSampler instance as a DataLoader sampler,
17 |     and load a subset of the original dataset that is exclusive to it.
18 |     .. note::
19 |         Dataset is assumed to be of constant size.
20 |     Arguments:
21 |         dataset: Dataset used for sampling.
22 |         num_replicas (optional): Number of processes participating in
23 |             distributed training.
24 |         rank (optional): Rank of the current process within num_replicas.
25 |     """
26 | 
27 |     def __init__(self, dataset, num_replicas=None, rank=None, shuffle=True, use_random=False):
28 |         if num_replicas is None:
29 |             if not dist.is_available():
30 |                 raise RuntimeError("Requires distributed package to be available")
31 |             num_replicas = dist.get_world_size()
32 |         if rank is None:
33 |             if not dist.is_available():
34 |                 raise RuntimeError("Requires distributed package to be available")
35 |             rank = dist.get_rank()
36 |         self.dataset = dataset
37 |         self.num_replicas = num_replicas
38 |         self.rank = rank
39 |         self.epoch = 0
40 |         self.num_samples = int(math.ceil(len(self.dataset) * 1.0 / self.num_replicas))
41 |         self.total_size = self.num_samples * self.num_replicas
42 |         self.shuffle = shuffle
43 |         self.use_random = use_random
44 | 
45 |     def __iter__(self):
46 |         if self.shuffle:
47 |             # deterministically shuffle based on epoch
48 |             _seed = self.epoch
49 |             if self.use_random:
50 |                 _seed = int(shared_random_seed())
51 |             g = torch.Generator()
52 |             g.manual_seed(_seed)
53 |             indices = torch.randperm(len(self.dataset), generator=g).tolist()
54 |         else:
55 |             indices = torch.arange(len(self.dataset)).tolist()
56 | 
57 |         # add extra samples to make it evenly divisible
58 |         indices += indices[: (self.total_size - len(indices))]
59 |         assert len(indices) == self.total_size
60 | 
61 |         # subsample
62 |         offset = self.num_samples * self.rank
63 |         indices = indices[offset : offset + self.num_samples]
64 |         assert len(indices) == self.num_samples
65 | 
66 |         return iter(indices)
67 | 
68 |     def __len__(self):
69 |         return self.num_samples
70 | 
71 |     def set_epoch(self, epoch):
72 |         self.epoch = epoch
73 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/layers/iou_loss.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn
 3 | 
 4 | 
 5 | class IOULoss(nn.Module):
 6 |     def __init__(self, loss_type="iou"):
 7 |         super(IOULoss, self).__init__()
 8 |         self.loss_type = loss_type
 9 | 
10 |     def forward(self, pred, target, weight=None):
11 |         pred_left = pred[:, 0]
12 |         pred_top = pred[:, 1]
13 |         pred_right = pred[:, 2]
14 |         pred_bottom = pred[:, 3]
15 | 
16 |         target_left = target[:, 0]
17 |         target_top = target[:, 1]
18 |         target_right = target[:, 2]
19 |         target_bottom = target[:, 3]
20 | 
21 |         target_area = (target_left + target_right) * \
22 |                       (target_top + target_bottom)
23 |         pred_area = (pred_left + pred_right) * \
24 |                     (pred_top + pred_bottom)
25 | 
26 |         w_intersect = torch.min(pred_left, target_left) + torch.min(pred_right, target_right)
27 |         g_w_intersect = torch.max(pred_left, target_left) + torch.max(
28 |             pred_right, target_right)
29 |         h_intersect = torch.min(pred_bottom, target_bottom) + torch.min(pred_top, target_top)
30 |         g_h_intersect = torch.max(pred_bottom, target_bottom) + torch.max(pred_top, target_top)
31 |         ac_uion = g_w_intersect * g_h_intersect + 1e-7
32 |         area_intersect = w_intersect * h_intersect
33 |         area_union = target_area + pred_area - area_intersect
34 |         ious = (area_intersect + 1.0) / (area_union + 1.0)
35 |         gious = ious - (ac_uion - area_union) / ac_uion
36 |         if self.loss_type == 'iou':
37 |             losses = -torch.log(ious)
38 |         elif self.loss_type == 'linear_iou':
39 |             losses = 1 - ious
40 |         elif self.loss_type == 'giou':
41 |             losses = 1 - gious
42 |         else:
43 |             raise NotImplementedError
44 | 
45 |         if weight is not None and weight.sum() > 0:
46 |             return (losses * weight).sum()
47 |         else:
48 |             assert losses.numel() != 0
49 |             return losses.sum()
50 | 
51 | 
52 | class IOUWHLoss(nn.Module):  # used for anchor guiding
53 |     def __init__(self, reduction='none'):
54 |         super(IOUWHLoss, self).__init__()
55 |         self.reduction = reduction
56 | 
57 |     def forward(self, pred, target):
58 |         orig_shape = pred.shape
59 |         pred = pred.view(-1, 4)
60 |         target = target.view(-1, 4)
61 |         target[:, :2] = 0
62 |         tl = torch.max((target[:, :2] - pred[:, 2:] / 2),
63 |                        (target[:, :2] - target[:, 2:] / 2))
64 | 
65 |         br = torch.min((target[:, :2] + pred[:, 2:] / 2),
66 |                        (target[:, :2] + target[:, 2:] / 2))
67 | 
68 |         area_p = torch.prod(pred[:, 2:], 1)
69 |         area_g = torch.prod(target[:, 2:], 1)
70 | 
71 |         en = (tl < br).type(tl.type()).prod(dim=1)
72 |         area_i = torch.prod(br - tl, 1) * en
73 |         U = area_p + area_g - area_i + 1e-16
74 |         iou = area_i / U
75 | 
76 |         loss = 1 - iou ** 2
77 |         if self.reduction == 'mean':
78 |             loss = loss.mean()
79 |         elif self.reduction == 'sum':
80 |             loss = loss.sum()
81 | 
82 |         return loss
83 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/roi_heads/box_head/box_head.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import torch
 3 | from torch import nn
 4 | 
 5 | from .roi_box_feature_extractors import make_roi_box_feature_extractor
 6 | from .roi_box_predictors import make_roi_box_predictor
 7 | from .inference import make_roi_box_post_processor
 8 | from .loss import make_roi_box_loss_evaluator
 9 | from maskrcnn_benchmark.utils.amp import custom_fwd, custom_bwd
10 | 
11 | class ROIBoxHead(torch.nn.Module):
12 |     """
13 |     Generic Box Head class.
14 |     """
15 | 
16 |     def __init__(self, cfg):
17 |         super(ROIBoxHead, self).__init__()
18 |         self.feature_extractor = make_roi_box_feature_extractor(cfg)
19 |         self.predictor = make_roi_box_predictor(cfg)
20 |         self.post_processor = make_roi_box_post_processor(cfg)
21 |         self.loss_evaluator = make_roi_box_loss_evaluator(cfg)
22 |         self.onnx = cfg.MODEL.ONNX
23 | 
24 |     @custom_fwd(cast_inputs=torch.float32)
25 |     def forward(self, features, proposals, targets=None):
26 |         """
27 |         Arguments:
28 |             features (list[Tensor]): feature-maps from possibly several levels
29 |             proposals (list[BoxList]): proposal boxes
30 |             targets (list[BoxList], optional): the ground-truth targets.
31 | 
32 |         Returns:
33 |             x (Tensor): the result of the feature extractor
34 |             proposals (list[BoxList]): during training, the subsampled proposals
35 |                 are returned. During testing, the predicted boxlists are returned
36 |             losses (dict[Tensor]): During training, returns the losses for the
37 |                 head. During testing, returns an empty dict.
38 |         """
39 | 
40 |         if self.training:
41 |             # Faster R-CNN subsamples during training the proposals with a fixed
42 |             # positive / negative ratio
43 |             with torch.no_grad():
44 |                 proposals = self.loss_evaluator.subsample(proposals, targets)
45 | 
46 |         # extract features that will be fed to the final classifier. The
47 |         # feature_extractor generally corresponds to the pooler + heads
48 |         x = self.feature_extractor(features, proposals)
49 |         # final classifier that converts the features into predictions
50 |         class_logits, box_regression = self.predictor(x)
51 | 
52 |         if self.onnx:
53 |             return x, (class_logits, box_regression, [box.bbox for box in proposals]), {}
54 | 
55 |         if not self.training:
56 |             result = self.post_processor((class_logits, box_regression), proposals)
57 |             return x, result, {}
58 | 
59 |         loss_classifier, loss_box_reg = self.loss_evaluator(
60 |             [class_logits], [box_regression]
61 |         )
62 |         return (
63 |             x,
64 |             proposals,
65 |             dict(loss_classifier=loss_classifier, loss_box_reg=loss_box_reg),
66 |         )
67 | 
68 | 
69 | def build_roi_box_head(cfg):
70 |     """
71 |     Constructs a new box head.
72 |     By default, uses ROIBoxHead, but if it turns out not to be enough, just register a new class
73 |     and make it a parameter in the config
74 |     """
75 |     return ROIBoxHead(cfg)
76 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/utils/model_zoo.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import os
 3 | import sys
 4 | 
 5 | try:
 6 |     from torch.hub import _download_url_to_file
 7 |     from torch.hub import urlparse
 8 |     from torch.hub import HASH_REGEX
 9 | except ImportError:
10 |     from torch.utils.model_zoo import _download_url_to_file
11 |     from torch.utils.model_zoo import urlparse
12 |     from torch.utils.model_zoo import HASH_REGEX
13 | 
14 | from maskrcnn_benchmark.utils.comm import is_main_process
15 | from maskrcnn_benchmark.utils.comm import synchronize
16 | 
17 | 
18 | # very similar to https://github.com/pytorch/pytorch/blob/master/torch/utils/model_zoo.py
19 | # but with a few improvements and modifications
20 | def cache_url(url, model_dir='model', progress=True):
21 |     r"""Loads the Torch serialized object at the given URL.
22 |     If the object is already present in `model_dir`, it's deserialized and
23 |     returned. The filename part of the URL should follow the naming convention
24 |     ``filename-<sha256>.ext`` where ``<sha256>`` is the first eight or more
25 |     digits of the SHA256 hash of the contents of the file. The hash is used to
26 |     ensure unique names and to verify the contents of the file.
27 |     The default value of `model_dir` is ``$TORCH_HOME/models`` where
28 |     ``$TORCH_HOME`` defaults to ``~/.torch``. The default directory can be
29 |     overridden with the ``$TORCH_MODEL_ZOO`` environment variable.
30 |     Args:
31 |         url (string): URL of the object to download
32 |         model_dir (string, optional): directory in which to save the object
33 |         progress (bool, optional): whether or not to display a progress bar to stderr
34 |     Example:
35 |         >>> cached_file = maskrcnn_benchmark.utils.model_zoo.cache_url('https://s3.amazonaws.com/pytorch/models/resnet18-5c106cde.pth')
36 |     """
37 |     if model_dir is None:
38 |         torch_home = os.path.expanduser(os.getenv("TORCH_HOME", "~/.torch"))
39 |         model_dir = os.getenv("TORCH_MODEL_ZOO", os.path.join(torch_home, "models"))
40 |     if not os.path.exists(model_dir):
41 |         os.makedirs(model_dir, exist_ok=True)
42 |     parts = urlparse(url)
43 |     filename = os.path.basename(parts.path)
44 |     if filename == "model_final.pkl":
45 |         # workaround as pre-trained Caffe2 models from Detectron have all the same filename
46 |         # so make the full path the filename by replacing / with _
47 |         filename = parts.path.replace("/", "_")
48 |     cached_file = os.path.join(model_dir, filename)
49 |     if not os.path.exists(cached_file):
50 |         sys.stderr.write('Downloading: "{}" to {}\n'.format(url, cached_file))
51 |         hash_prefix = HASH_REGEX.search(filename)
52 |         if hash_prefix is not None:
53 |             hash_prefix = hash_prefix.group(1)
54 |             # workaround: Caffe2 models don't have a hash, but follow the R-50 convention,
55 |             # which matches the hash PyTorch uses. So we skip the hash matching
56 |             # if the hash_prefix is less than 6 characters
57 |             if len(hash_prefix) < 6:
58 |                 hash_prefix = None
59 |         _download_url_to_file(url, cached_file, hash_prefix, progress=progress)
60 |     synchronize()
61 |     return cached_file
62 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/layers/roi_align.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import torch
 3 | from torch import nn
 4 | from torch.autograd import Function
 5 | from torch.autograd.function import once_differentiable
 6 | from torch.nn.modules.utils import _pair
 7 | 
 8 | from maskrcnn_benchmark import _C
 9 | 
10 | class _ROIAlign(Function):
11 |     @staticmethod
12 |     def forward(ctx, input, roi, output_size, spatial_scale, sampling_ratio):
13 |         ctx.save_for_backward(roi)
14 |         ctx.output_size = _pair(output_size)
15 |         ctx.spatial_scale = spatial_scale
16 |         ctx.sampling_ratio = sampling_ratio
17 |         ctx.input_shape = input.size()
18 |         output = _C.roi_align_forward(
19 |             input, roi, spatial_scale, output_size[0], output_size[1], sampling_ratio
20 |         )
21 |         return output
22 | 
23 |     @staticmethod
24 |     @once_differentiable
25 |     def backward(ctx, grad_output):
26 |         rois, = ctx.saved_tensors
27 |         output_size = ctx.output_size
28 |         spatial_scale = ctx.spatial_scale
29 |         sampling_ratio = ctx.sampling_ratio
30 |         bs, ch, h, w = ctx.input_shape
31 |         grad_input = _C.roi_align_backward(
32 |             grad_output,
33 |             rois,
34 |             spatial_scale,
35 |             output_size[0],
36 |             output_size[1],
37 |             bs,
38 |             ch,
39 |             h,
40 |             w,
41 |             sampling_ratio,
42 |         )
43 |         return grad_input, None, None, None, None
44 | 
45 | try:
46 |     import torchvision
47 |     from torchvision.ops import roi_align
48 | except:
49 |     roi_align = _ROIAlign.apply
50 | 
51 | class ROIAlign(nn.Module):
52 |     def __init__(self, output_size, spatial_scale, sampling_ratio):
53 |         super(ROIAlign, self).__init__()
54 |         self.output_size = output_size
55 |         self.spatial_scale = spatial_scale
56 |         self.sampling_ratio = sampling_ratio
57 | 
58 |     def forward(self, input, rois):
59 |         return roi_align(
60 |             input, rois, self.output_size, self.spatial_scale, self.sampling_ratio
61 |         )
62 | 
63 |     def __repr__(self):
64 |         tmpstr = self.__class__.__name__ + "("
65 |         tmpstr += "output_size=" + str(self.output_size)
66 |         tmpstr += ", spatial_scale=" + str(self.spatial_scale)
67 |         tmpstr += ", sampling_ratio=" + str(self.sampling_ratio)
68 |         tmpstr += ")"
69 |         return tmpstr
70 | 
71 | class ROIAlignV2(nn.Module):
72 |     def __init__(self, output_size, spatial_scale, sampling_ratio):
73 |         super(ROIAlignV2, self).__init__()
74 |         self.output_size = output_size
75 |         self.spatial_scale = spatial_scale
76 |         self.sampling_ratio = sampling_ratio
77 | 
78 |     def forward(self, input, rois):
79 |         return torchvision.ops.roi_align(
80 |             input, rois, self.output_size, self.spatial_scale, self.sampling_ratio, aligned=True
81 |         )
82 | 
83 |     def __repr__(self):
84 |         tmpstr = self.__class__.__name__ + "("
85 |         tmpstr += "output_size=" + str(self.output_size)
86 |         tmpstr += ", spatial_scale=" + str(self.spatial_scale)
87 |         tmpstr += ", sampling_ratio=" + str(self.sampling_ratio)
88 |         tmpstr += ")"
89 |         return tmpstr
90 | 


--------------------------------------------------------------------------------
/configs/pretrain/glip_Swin_L.yaml:
--------------------------------------------------------------------------------
  1 | MODEL:
  2 |   META_ARCHITECTURE: "GeneralizedVLRCNN"
  3 |   WEIGHT: "swin_large_patch4_window12_384_22k.pth"
  4 |   RPN_ONLY: True
  5 |   RPN_ARCHITECTURE: "VLDYHEAD"
  6 | 
  7 |   BACKBONE:
  8 |     CONV_BODY: "SWINT-FPN-RETINANET"
  9 |     OUT_CHANNELS: 256
 10 | 
 11 |   SWINT:
 12 |     EMBED_DIM: 192
 13 |     DEPTHS: (2, 2, 18, 2)
 14 |     NUM_HEADS: (6, 12, 24, 48)
 15 |     WINDOW_SIZE: 12
 16 |     OUT_CHANNELS: (192, 384, 768, 1536)
 17 |     DROP_PATH_RATE: 0.4
 18 | 
 19 |   LANGUAGE_BACKBONE:
 20 |     FREEZE: False
 21 |     MODEL_TYPE: "bert-base-uncased" # "roberta-base", "clip"
 22 |     MASK_SPECIAL: False
 23 | 
 24 |   RPN:
 25 |     USE_FPN: True
 26 |     ANCHOR_SIZES: (64, 128, 256, 512, 1024)
 27 |     ANCHOR_STRIDE: (8, 16, 32, 64, 128)
 28 |     ASPECT_RATIOS: (1.0,)
 29 |     SCALES_PER_OCTAVE: 1
 30 | 
 31 |   DYHEAD:
 32 |     CHANNELS: 256
 33 |     NUM_CONVS: 8
 34 |     USE_GN: True
 35 |     USE_DYRELU: True
 36 |     USE_DFCONV: True
 37 |     USE_DYFUSE: True
 38 |     TOPK: 9 # topk for selecting candidate positive samples from each level
 39 |     SCORE_AGG: "MEAN"
 40 |     LOG_SCALE: 0.0
 41 | 
 42 |     USE_CHECKPOINT: True
 43 |     FUSE_CONFIG:
 44 |       USE_FUSED_FEATURES_DOT_PRODUCT: True
 45 |       EARLY_FUSE_ON: True
 46 |       TYPE: "MHA-B"
 47 |       USE_CLASSIFICATION_LOSS: False
 48 |       USE_TOKEN_LOSS: False
 49 |       USE_CONTRASTIVE_ALIGN_LOSS: False
 50 |       CONTRASTIVE_HIDDEN_DIM: 64
 51 |       USE_DOT_PRODUCT_TOKEN_LOSS: True
 52 |       USE_LAYER_SCALE: True
 53 |       CLAMP_MIN_FOR_UNDERFLOW: True
 54 |       CLAMP_MAX_FOR_OVERFLOW: True
 55 |       CLAMP_BERTATTN_MIN_FOR_UNDERFLOW: True
 56 |       CLAMP_BERTATTN_MAX_FOR_OVERFLOW: True
 57 |       CLAMP_DOT_PRODUCT: True
 58 | 
 59 | DATASETS:
 60 | 
 61 |   TRAIN: ("mixed_train_no_coco",) # Place holder dataset for now. To be updated in the next version
 62 |   TEST: ("coco_2017_val", )
 63 | 
 64 |   ONE_HOT: False
 65 |   FLICKR_COPY: 8 # 0.15 * 8 = ~1.2M
 66 |   MIXED_COPY: 4 # 0.6 * 4 = ~2.4M
 67 |   OBJECT365_COPY: 2 # 1.4 * 2 = ~2.8M
 68 |   VG_COPY: 3 # 0.4 * 3 = ~1.2M
 69 |   IN_COPY: 2 # 0.67 * 2 = ~1.33M
 70 |   OI_COPY: 1 # 2M * 1 = 2M
 71 | 
 72 |   DISABLE_SHUFFLE: False
 73 |   ADD_DET_PROMPT: False
 74 |   RANDOM_SAMPLE_NEG: 85
 75 |   CONTROL_PROB: (0.0, 0.0, 0.5, 0.0)
 76 |   FURTHER_SCREEN: True
 77 |   CAPTION_CONF: 0.5
 78 |   CAPTION_NMS: -1.0
 79 |   CAPTION_MIN_BOX: 1
 80 | 
 81 |   SEPARATION_TOKENS: ". "
 82 | 
 83 |   PACK_RANDOM_CAPTION_NUMBER: 20
 84 |   NO_RANDOM_PACK_PROBABILITY: 0.4
 85 |   RANDOM_PACK_PROB: 0.5
 86 |   CAPTION_FORMAT_VERSION: "v2"
 87 | 
 88 | INPUT:
 89 |   PIXEL_MEAN: [ 103.530, 116.280, 123.675 ]
 90 |   PIXEL_STD: [ 57.375, 57.120, 58.395 ]
 91 |   MIN_SIZE_TRAIN: 800
 92 |   MAX_SIZE_TRAIN: 1333
 93 |   MIN_SIZE_TEST: 800
 94 |   MAX_SIZE_TEST: 1333
 95 | 
 96 | AUGMENT:
 97 |   MULT_MIN_SIZE_TRAIN: (480,560,640,720,800)
 98 | 
 99 | DATALOADER:
100 |   SIZE_DIVISIBILITY: 32
101 | 
102 | SOLVER:
103 |   OPTIMIZER: ADAMW
104 |   BASE_LR: 0.0001
105 |   LANG_LR: 0.00001
106 |   WEIGHT_DECAY: 0.01
107 |   WEIGHT_DECAY_SCHEDULE: True
108 |   STEPS: (0.67, 0.89)
109 |   MAX_ITER: 1000000
110 |   IMS_PER_BATCH: 64
111 |   WARMUP_ITERS: 2000
112 |   WARMUP_FACTOR: 0.001
113 | 
114 |   FIND_UNUSED_PARAMETERS: False
115 | 
116 |   CLIP_GRADIENTS:
117 |     ENABLED: True
118 |     CLIP_TYPE: "full_model"
119 |     CLIP_VALUE: 1.0
120 |     NORM_TYPE: 2.0
121 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/utils/big_model_loading.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | import torch.nn as nn
 4 | 
 5 | from collections import OrderedDict
 6 | 
 7 | 
 8 | def tf2th(conv_weights):
 9 |     """Possibly convert HWIO to OIHW."""
10 |     if conv_weights.ndim == 4:
11 |         conv_weights = conv_weights.transpose([3, 2, 0, 1])
12 |     return torch.from_numpy(conv_weights)
13 | 
14 | 
15 | def _rename_conv_weights_for_deformable_conv_layers(state_dict, cfg):
16 |     import re
17 |     layer_keys = sorted(state_dict.keys())
18 |     for ix, stage_with_dcn in enumerate(cfg.MODEL.RESNETS.STAGE_WITH_DCN, 1):
19 |         if not stage_with_dcn:
20 |             continue
21 |         for old_key in layer_keys:
22 |             pattern = ".*block{}.*conv2.*".format(ix)
23 |             r = re.match(pattern, old_key)
24 |             if r is None:
25 |                 continue
26 |             for param in ["weight", "bias"]:
27 |                 if old_key.find(param) is -1:
28 |                     continue
29 |                 if 'unit01' in old_key:
30 |                     continue
31 |                 new_key = old_key.replace(
32 |                     "conv2.{}".format(param), "conv2.conv.{}".format(param)
33 |                 )
34 |                 print("pattern: {}, old_key: {}, new_key: {}".format(
35 |                     pattern, old_key, new_key
36 |                 ))
37 |                 # Calculate SD conv weight
38 |                 w = state_dict[old_key]
39 |                 v, m = torch.var_mean(w, dim=[1, 2, 3], keepdim=True, unbiased=False)
40 |                 w = (w - m) / torch.sqrt(v + 1e-10)
41 | 
42 |                 state_dict[new_key] = w
43 |                 del state_dict[old_key]
44 |     return state_dict
45 | 
46 | 
47 | def load_big_format(cfg, f):
48 |     model = OrderedDict()
49 |     weights = np.load(f)
50 | 
51 |     cmap = {'a':1, 'b':2, 'c':3}
52 |     for key, val in weights.items():
53 |         old_key = key.replace('resnet/', '')
54 |         if 'root_block' in old_key:
55 |             new_key = 'root.conv.weight'
56 |         elif '/proj/standardized_conv2d/kernel' in old_key:
57 |             key_pattern = old_key.replace('/proj/standardized_conv2d/kernel', '').replace('resnet/', '')
58 |             bname, uname, cidx = key_pattern.split('/')
59 |             new_key = '{}.downsample.{}.conv{}.weight'.format(bname,uname,cmap[cidx])
60 |         elif '/standardized_conv2d/kernel' in old_key:
61 |             key_pattern = old_key.replace('/standardized_conv2d/kernel', '').replace('resnet/', '')
62 |             bname, uname, cidx = key_pattern.split('/')
63 |             new_key = '{}.{}.conv{}.weight'.format(bname,uname,cmap[cidx])
64 |         elif '/group_norm/gamma' in old_key:
65 |             key_pattern = old_key.replace('/group_norm/gamma', '').replace('resnet/', '')
66 |             bname, uname, cidx = key_pattern.split('/')
67 |             new_key = '{}.{}.gn{}.weight'.format(bname,uname,cmap[cidx])
68 |         elif '/group_norm/beta' in old_key:
69 |             key_pattern = old_key.replace('/group_norm/beta', '').replace('resnet/', '')
70 |             bname, uname, cidx = key_pattern.split('/')
71 |             new_key = '{}.{}.gn{}.bias'.format(bname,uname,cmap[cidx])
72 |         else:
73 |             print('Unknown key {}'.format(old_key))
74 |             continue
75 |         print('Map {} -> {}'.format(key, new_key))
76 |         model[new_key] = tf2th(val)
77 | 
78 |     model = _rename_conv_weights_for_deformable_conv_layers(model, cfg)
79 | 
80 |     return dict(model=model)
81 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/language_backbone/bert_model.py:
--------------------------------------------------------------------------------
 1 | from copy import deepcopy
 2 | import numpy as np
 3 | import torch
 4 | from torch import nn
 5 | 
 6 | # from pytorch_pretrained_bert.modeling import BertModel
 7 | from transformers import BertConfig, RobertaConfig, RobertaModel, BertModel
 8 | 
 9 | 
10 | class BertEncoder(nn.Module):
11 |     def __init__(self, cfg):
12 |         super(BertEncoder, self).__init__()
13 |         self.cfg = cfg
14 |         self.bert_name = cfg.MODEL.LANGUAGE_BACKBONE.MODEL_TYPE
15 |         print("LANGUAGE BACKBONE USE GRADIENT CHECKPOINTING: ", self.cfg.MODEL.LANGUAGE_BACKBONE.USE_CHECKPOINT)
16 | 
17 |         if self.bert_name == "bert-base-uncased":
18 |             config = BertConfig.from_pretrained(self.bert_name)
19 |             config.gradient_checkpointing = self.cfg.MODEL.LANGUAGE_BACKBONE.USE_CHECKPOINT
20 |             self.model = BertModel.from_pretrained(self.bert_name, add_pooling_layer=False, config=config)
21 |             self.language_dim = 768
22 |         elif self.bert_name == "roberta-base":
23 |             config = RobertaConfig.from_pretrained(self.bert_name)
24 |             config.gradient_checkpointing = self.cfg.MODEL.LANGUAGE_BACKBONE.USE_CHECKPOINT
25 |             self.model = RobertaModel.from_pretrained(self.bert_name, add_pooling_layer=False, config=config)
26 |             self.language_dim = 768
27 |         else:
28 |             raise NotImplementedError
29 | 
30 |         self.num_layers = cfg.MODEL.LANGUAGE_BACKBONE.N_LAYERS
31 | 
32 |     def forward(self, x):
33 |         input = x["input_ids"]
34 |         mask = x["attention_mask"]
35 | 
36 |         if self.cfg.MODEL.DYHEAD.FUSE_CONFIG.USE_DOT_PRODUCT_TOKEN_LOSS:
37 |             # with padding, always 256
38 |             outputs = self.model(
39 |                 input_ids=input,
40 |                 attention_mask=mask,
41 |                 output_hidden_states=True,
42 |             )
43 |             # outputs has 13 layers, 1 input layer and 12 hidden layers
44 |             encoded_layers = outputs.hidden_states[1:]
45 |             features = None
46 |             features = torch.stack(encoded_layers[-self.num_layers:], 1).mean(1)
47 | 
48 |             # language embedding has shape [len(phrase), seq_len, language_dim]
49 |             features = features / self.num_layers
50 | 
51 |             embedded = features * mask.unsqueeze(-1).float()
52 |             aggregate = embedded.sum(1) / (mask.sum(-1).unsqueeze(-1).float())
53 | 
54 |         else:
55 |             # without padding, only consider positive_tokens
56 |             max_len = (input != 0).sum(1).max().item()
57 |             outputs = self.model(
58 |                 input_ids=input[:, :max_len],
59 |                 attention_mask=mask[:, :max_len],
60 |                 output_hidden_states=True,
61 |             )
62 |             # outputs has 13 layers, 1 input layer and 12 hidden layers
63 |             encoded_layers = outputs.hidden_states[1:]
64 | 
65 |             features = None
66 |             features = torch.stack(encoded_layers[-self.num_layers:], 1).mean(1)
67 |             # language embedding has shape [len(phrase), seq_len, language_dim]
68 |             features = features / self.num_layers
69 | 
70 |             embedded = features * mask[:, :max_len].unsqueeze(-1).float()
71 |             aggregate = embedded.sum(1) / (mask.sum(-1).unsqueeze(-1).float())
72 | 
73 |         ret = {
74 |             "aggregate": aggregate,
75 |             "embedded": embedded,
76 |             "masks": mask,
77 |             "hidden": encoded_layers[-1]
78 |         }
79 |         return ret
80 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/data/datasets/refexp.py:
--------------------------------------------------------------------------------
 1 | import copy
 2 | from collections import defaultdict
 3 | from pathlib import Path
 4 | 
 5 | import torch
 6 | import torch.utils.data
 7 | 
 8 | import maskrcnn_benchmark.utils.dist as dist
 9 | from maskrcnn_benchmark.layers.set_loss import generalized_box_iou
10 | 
11 | from .modulated_coco import ModulatedDataset
12 | 
13 | 
14 | class RefExpDataset(ModulatedDataset):
15 |     pass
16 | 
17 | 
18 | class RefExpEvaluator(object):
19 |     def __init__(self, refexp_gt, iou_types, k=(1, 5, 10), thresh_iou=0.5):
20 |         assert isinstance(k, (list, tuple))
21 |         refexp_gt = copy.deepcopy(refexp_gt)
22 |         self.refexp_gt = refexp_gt
23 |         self.iou_types = iou_types
24 |         self.img_ids = self.refexp_gt.imgs.keys()
25 |         self.predictions = {}
26 |         self.k = k
27 |         self.thresh_iou = thresh_iou
28 | 
29 |     def accumulate(self):
30 |         pass
31 | 
32 |     def update(self, predictions):
33 |         self.predictions.update(predictions)
34 | 
35 |     def synchronize_between_processes(self):
36 |         all_predictions = dist.all_gather(self.predictions)
37 |         merged_predictions = {}
38 |         for p in all_predictions:
39 |             merged_predictions.update(p)
40 |         self.predictions = merged_predictions
41 | 
42 |     def summarize(self):
43 |         if dist.is_main_process():
44 |             dataset2score = {
45 |                 "refcoco": {k: 0.0 for k in self.k},
46 |                 "refcoco+": {k: 0.0 for k in self.k},
47 |                 "refcocog": {k: 0.0 for k in self.k},
48 |             }
49 |             dataset2count = {"refcoco": 0.0, "refcoco+": 0.0, "refcocog": 0.0}
50 |             for image_id in self.img_ids:
51 |                 ann_ids = self.refexp_gt.getAnnIds(imgIds=image_id)
52 |                 assert len(ann_ids) == 1
53 |                 img_info = self.refexp_gt.loadImgs(image_id)[0]
54 | 
55 |                 target = self.refexp_gt.loadAnns(ann_ids[0])
56 |                 prediction = self.predictions[image_id]
57 |                 assert prediction is not None
58 |                 sorted_scores_boxes = sorted(
59 |                     zip(prediction["scores"].tolist(), prediction["boxes"].tolist()), reverse=True
60 |                 )
61 |                 sorted_scores, sorted_boxes = zip(*sorted_scores_boxes)
62 |                 sorted_boxes = torch.cat([torch.as_tensor(x).view(1, 4) for x in sorted_boxes])
63 |                 target_bbox = target[0]["bbox"]
64 |                 converted_bbox = [
65 |                     target_bbox[0],
66 |                     target_bbox[1],
67 |                     target_bbox[2] + target_bbox[0],
68 |                     target_bbox[3] + target_bbox[1],
69 |                 ]
70 |                 giou = generalized_box_iou(sorted_boxes, torch.as_tensor(converted_bbox).view(-1, 4))
71 |                 for k in self.k:
72 |                     if max(giou[:k]) >= self.thresh_iou:
73 |                         dataset2score[img_info["dataset_name"]][k] += 1.0
74 |                 dataset2count[img_info["dataset_name"]] += 1.0
75 | 
76 |             for key, value in dataset2score.items():
77 |                 for k in self.k:
78 |                     try:
79 |                         value[k] /= dataset2count[key]
80 |                     except:
81 |                         pass
82 |             results = {}
83 |             for key, value in dataset2score.items():
84 |                 results[key] = sorted([v for k, v in value.items()])
85 |                 print(f" Dataset: {key} - Precision @ 1, 5, 10: {results[key]} \n")
86 | 
87 |             return results
88 |         return None
89 | 


--------------------------------------------------------------------------------
/configs/odinw_35/pothole.yaml:
--------------------------------------------------------------------------------
 1 | DATALOADER: {ASPECT_RATIO_GROUPING: false, SIZE_DIVISIBILITY: 32}
 2 | DATASETS:
 3 |   CAPTION_PROMPT: '[{"prefix": "there are some ", "name": "holes", "suffix": " on
 4 |     the road"}]'
 5 |   GENERAL_COPY: 16
 6 |   OVERRIDE_CATEGORY: '[{"id": 1, "name": "pothole", "supercategory": "potholes"}]'
 7 |   PREDEFINED_TEXT: odinw/pothole/category_description.json
 8 |   REGISTER:
 9 |     test: {ann_file: odinw/pothole/test/annotations_without_background.json, img_dir: odinw/pothole/test}
10 |     train: {ann_file: odinw/pothole/train/annotations_without_background.json, img_dir: odinw/pothole/train}
11 |     train_10_3: {ann_file: odinw/pothole/train/fewshot_train_shot10_seed3.json, img_dir: odinw/pothole/train}
12 |     train_10_30: {ann_file: odinw/pothole/train/fewshot_train_shot10_seed30.json,
13 |       img_dir: odinw/pothole/train}
14 |     train_10_300: {ann_file: odinw/pothole/train/fewshot_train_shot10_seed300.json,
15 |       img_dir: odinw/pothole/train}
16 |     train_1_3: {ann_file: odinw/pothole/train/fewshot_train_shot1_seed3.json, img_dir: odinw/pothole/train}
17 |     train_1_30: {ann_file: odinw/pothole/train/fewshot_train_shot1_seed30.json, img_dir: odinw/pothole/train}
18 |     train_1_300: {ann_file: odinw/pothole/train/fewshot_train_shot1_seed300.json,
19 |       img_dir: odinw/pothole/train}
20 |     train_3_3: {ann_file: odinw/pothole/train/fewshot_train_shot3_seed3.json, img_dir: odinw/pothole/train}
21 |     train_3_30: {ann_file: odinw/pothole/train/fewshot_train_shot3_seed30.json, img_dir: odinw/pothole/train}
22 |     train_3_300: {ann_file: odinw/pothole/train/fewshot_train_shot3_seed300.json,
23 |       img_dir: odinw/pothole/train}
24 |     train_5_3: {ann_file: odinw/pothole/train/fewshot_train_shot5_seed3.json, img_dir: odinw/pothole/train}
25 |     train_5_30: {ann_file: odinw/pothole/train/fewshot_train_shot5_seed30.json, img_dir: odinw/pothole/train}
26 |     train_5_300: {ann_file: odinw/pothole/train/fewshot_train_shot5_seed300.json,
27 |       img_dir: odinw/pothole/train}
28 |     val: {ann_file: odinw/pothole/valid/annotations_without_background.json, img_dir: odinw/pothole/valid}
29 |     val_10_3: {ann_file: odinw/pothole/valid/fewshot_val_shot10_seed3.json, img_dir: odinw/pothole/valid}
30 |     val_10_30: {ann_file: odinw/pothole/valid/fewshot_val_shot10_seed30.json, img_dir: odinw/pothole/valid}
31 |     val_10_300: {ann_file: odinw/pothole/valid/fewshot_val_shot10_seed300.json, img_dir: odinw/pothole/valid}
32 |     val_1_3: {ann_file: odinw/pothole/valid/fewshot_val_shot1_seed3.json, img_dir: odinw/pothole/valid}
33 |     val_1_30: {ann_file: odinw/pothole/valid/fewshot_val_shot1_seed30.json, img_dir: odinw/pothole/valid}
34 |     val_1_300: {ann_file: odinw/pothole/valid/fewshot_val_shot1_seed300.json, img_dir: odinw/pothole/valid}
35 |     val_3_3: {ann_file: odinw/pothole/valid/fewshot_val_shot3_seed3.json, img_dir: odinw/pothole/valid}
36 |     val_3_30: {ann_file: odinw/pothole/valid/fewshot_val_shot3_seed30.json, img_dir: odinw/pothole/valid}
37 |     val_3_300: {ann_file: odinw/pothole/valid/fewshot_val_shot3_seed300.json, img_dir: odinw/pothole/valid}
38 |     val_5_3: {ann_file: odinw/pothole/valid/fewshot_val_shot5_seed3.json, img_dir: odinw/pothole/valid}
39 |     val_5_30: {ann_file: odinw/pothole/valid/fewshot_val_shot5_seed30.json, img_dir: odinw/pothole/valid}
40 |     val_5_300: {ann_file: odinw/pothole/valid/fewshot_val_shot5_seed300.json, img_dir: odinw/pothole/valid}
41 |   TEST: ("val",)
42 |   TRAIN: ("train",)
43 | INPUT: {MAX_SIZE_TEST: 1333, MAX_SIZE_TRAIN: 1333, MIN_SIZE_TEST: 800, MIN_SIZE_TRAIN: 800}
44 | MODEL:
45 |   ATSS: {NUM_CLASSES: 2}
46 |   DYHEAD: {NUM_CLASSES: 2}
47 |   FCOS: {NUM_CLASSES: 2}
48 |   ROI_BOX_HEAD: {NUM_CLASSES: 2}
49 | SOLVER: {CHECKPOINT_PERIOD: 100, MAX_EPOCH: 12, WARMUP_ITERS: 0}
50 | TEST: {IMS_PER_BATCH: 8}
51 | 


--------------------------------------------------------------------------------
/configs/odinw_35/pistols_export.yaml:
--------------------------------------------------------------------------------
 1 | DATALOADER: {ASPECT_RATIO_GROUPING: false, SIZE_DIVISIBILITY: 32}
 2 | DATASETS:
 3 |   GENERAL_COPY: 16
 4 |   OVERRIDE_CATEGORY: '[{"id": 1, "name": "pistol", "supercategory": "Guns"}]'
 5 |   PREDEFINED_TEXT: odinw/pothole/category_description.json
 6 |   REGISTER:
 7 |     test: {ann_file: odinw/pistols/export/test_annotations_without_background.json,
 8 |       img_dir: odinw/pistols/export}
 9 |     train: {ann_file: odinw/pistols/export/train_annotations_without_background.json,
10 |       img_dir: odinw/pistols/export}
11 |     train_10_3: {ann_file: odinw/pistols/export/fewshot_train_shot10_seed3.json, img_dir: odinw/pistols/export}
12 |     train_10_30: {ann_file: odinw/pistols/export/fewshot_train_shot10_seed30.json,
13 |       img_dir: odinw/pistols/export}
14 |     train_10_300: {ann_file: odinw/pistols/export/fewshot_train_shot10_seed300.json,
15 |       img_dir: odinw/pistols/export}
16 |     train_1_3: {ann_file: odinw/pistols/export/fewshot_train_shot1_seed3.json, img_dir: odinw/pistols/export}
17 |     train_1_30: {ann_file: odinw/pistols/export/fewshot_train_shot1_seed30.json, img_dir: odinw/pistols/export}
18 |     train_1_300: {ann_file: odinw/pistols/export/fewshot_train_shot1_seed300.json,
19 |       img_dir: odinw/pistols/export}
20 |     train_3_3: {ann_file: odinw/pistols/export/fewshot_train_shot3_seed3.json, img_dir: odinw/pistols/export}
21 |     train_3_30: {ann_file: odinw/pistols/export/fewshot_train_shot3_seed30.json, img_dir: odinw/pistols/export}
22 |     train_3_300: {ann_file: odinw/pistols/export/fewshot_train_shot3_seed300.json,
23 |       img_dir: odinw/pistols/export}
24 |     train_5_3: {ann_file: odinw/pistols/export/fewshot_train_shot5_seed3.json, img_dir: odinw/pistols/export}
25 |     train_5_30: {ann_file: odinw/pistols/export/fewshot_train_shot5_seed30.json, img_dir: odinw/pistols/export}
26 |     train_5_300: {ann_file: odinw/pistols/export/fewshot_train_shot5_seed300.json,
27 |       img_dir: odinw/pistols/export}
28 |     val: {ann_file: odinw/pistols/export/val_annotations_without_background.json,
29 |       img_dir: odinw/pistols/export}
30 |     val_10_3: {ann_file: odinw/pistols/export/fewshot_val_shot10_seed3.json, img_dir: odinw/pistols/export}
31 |     val_10_30: {ann_file: odinw/pistols/export/fewshot_val_shot10_seed30.json, img_dir: odinw/pistols/export}
32 |     val_10_300: {ann_file: odinw/pistols/export/fewshot_val_shot10_seed300.json, img_dir: odinw/pistols/export}
33 |     val_1_3: {ann_file: odinw/pistols/export/fewshot_val_shot1_seed3.json, img_dir: odinw/pistols/export}
34 |     val_1_30: {ann_file: odinw/pistols/export/fewshot_val_shot1_seed30.json, img_dir: odinw/pistols/export}
35 |     val_1_300: {ann_file: odinw/pistols/export/fewshot_val_shot1_seed300.json, img_dir: odinw/pistols/export}
36 |     val_3_3: {ann_file: odinw/pistols/export/fewshot_val_shot3_seed3.json, img_dir: odinw/pistols/export}
37 |     val_3_30: {ann_file: odinw/pistols/export/fewshot_val_shot3_seed30.json, img_dir: odinw/pistols/export}
38 |     val_3_300: {ann_file: odinw/pistols/export/fewshot_val_shot3_seed300.json, img_dir: odinw/pistols/export}
39 |     val_5_3: {ann_file: odinw/pistols/export/fewshot_val_shot5_seed3.json, img_dir: odinw/pistols/export}
40 |     val_5_30: {ann_file: odinw/pistols/export/fewshot_val_shot5_seed30.json, img_dir: odinw/pistols/export}
41 |     val_5_300: {ann_file: odinw/pistols/export/fewshot_val_shot5_seed300.json, img_dir: odinw/pistols/export}
42 |   TEST: ("val",)
43 |   TRAIN: ("train",)
44 | INPUT: {MAX_SIZE_TEST: 1333, MAX_SIZE_TRAIN: 1333, MIN_SIZE_TEST: 800, MIN_SIZE_TRAIN: 800}
45 | MODEL:
46 |   ATSS: {NUM_CLASSES: 297}
47 |   DYHEAD: {NUM_CLASSES: 297}
48 |   FCOS: {NUM_CLASSES: 297}
49 |   ROI_BOX_HEAD: {NUM_CLASSES: 297}
50 | SOLVER: {CHECKPOINT_PERIOD: 100, MAX_EPOCH: 12, WARMUP_ITERS: 0}
51 | TEST: {IMS_PER_BATCH: 8}
52 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/language_backbone/word_utils.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Language-related data loading helper functions and class wrappers.
  3 | """
  4 | 
  5 | import re
  6 | import torch
  7 | import codecs
  8 | 
  9 | UNK_TOKEN = '<unk>'
 10 | PAD_TOKEN = '<pad>'
 11 | END_TOKEN = '<eos>'
 12 | SENTENCE_SPLIT_REGEX = re.compile(r'(\W+)')
 13 | 
 14 | 
 15 | class Dictionary(object):
 16 |     def __init__(self):
 17 |         self.word2idx = {}
 18 |         self.idx2word = []
 19 | 
 20 |     def add_word(self, word):
 21 |         if word not in self.word2idx:
 22 |             self.idx2word.append(word)
 23 |             self.word2idx[word] = len(self.idx2word) - 1
 24 |         return self.word2idx[word]
 25 | 
 26 |     def __len__(self):
 27 |         return len(self.idx2word)
 28 | 
 29 |     def __getitem__(self, a):
 30 |         if isinstance(a, int):
 31 |             return self.idx2word[a]
 32 |         elif isinstance(a, list):
 33 |             return [self.idx2word[x] for x in a]
 34 |         elif isinstance(a, str):
 35 |             return self.word2idx[a]
 36 |         else:
 37 |             raise TypeError("Query word/index argument must be int or str")
 38 | 
 39 |     def __contains__(self, word):
 40 |         return word in self.word2idx
 41 | 
 42 | 
 43 | class Corpus(object):
 44 |     def __init__(self):
 45 |         self.dictionary = Dictionary()
 46 | 
 47 |     def set_max_len(self, value):
 48 |         self.max_len = value
 49 | 
 50 |     def load_file(self, filename):
 51 |         with codecs.open(filename, 'r', 'utf-8') as f:
 52 |             for line in f:
 53 |                 line = line.strip()
 54 |                 self.add_to_corpus(line)
 55 |         self.dictionary.add_word(UNK_TOKEN)
 56 |         self.dictionary.add_word(PAD_TOKEN)
 57 | 
 58 |     def add_to_corpus(self, line):
 59 |         """Tokenizes a text line."""
 60 |         # Add words to the dictionary
 61 |         words = line.split()
 62 |         # tokens = len(words)
 63 |         for word in words:
 64 |             word = word.lower()
 65 |             self.dictionary.add_word(word)
 66 | 
 67 |     def tokenize(self, line, max_len=20):
 68 |         # Tokenize line contents
 69 |         words = SENTENCE_SPLIT_REGEX.split(line.strip())
 70 |         # words = [w.lower() for w in words if len(w) > 0]
 71 |         words = [w.lower() for w in words if (len(w) > 0 and w != ' ')]  ## do not include space as a token
 72 | 
 73 |         if words[-1] == '.':
 74 |             words = words[:-1]
 75 | 
 76 |         if max_len > 0:
 77 |             if len(words) > max_len:
 78 |                 words = words[:max_len]
 79 |             elif len(words) < max_len:
 80 |                 # words = [PAD_TOKEN] * (max_len - len(words)) + words
 81 |                 words = words + [END_TOKEN] + [PAD_TOKEN] * (max_len - len(words) - 1)
 82 | 
 83 |         tokens = len(words)  ## for end token
 84 |         ids = torch.LongTensor(tokens)
 85 |         token = 0
 86 |         for word in words:
 87 |             if word not in self.dictionary:
 88 |                 word = UNK_TOKEN
 89 |             # print(word, type(word), word.encode('ascii','ignore').decode('ascii'), type(word.encode('ascii','ignore').decode('ascii')))
 90 |             if type(word) != type('a'):
 91 |                 print(word, type(word), word.encode('ascii', 'ignore').decode('ascii'),
 92 |                       type(word.encode('ascii', 'ignore').decode('ascii')))
 93 |                 word = word.encode('ascii', 'ignore').decode('ascii')
 94 |             ids[token] = self.dictionary[word]
 95 |             token += 1
 96 |         # ids[token] = self.dictionary[END_TOKEN]
 97 |         return ids
 98 | 
 99 |     def __len__(self):
100 |         return len(self.dictionary)
101 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/language_backbone/hfpt_tokenizer.py:
--------------------------------------------------------------------------------
  1 | from typing import Union, List
  2 | 
  3 | from transformers import AutoTokenizer
  4 | import torch
  5 | 
  6 | 
  7 | class HFPTTokenizer(object):
  8 |     def __init__(self, pt_name=None):
  9 | 
 10 |         self.pt_name = pt_name
 11 |         self.added_sep_token = 0
 12 |         self.added_cls_token = 0
 13 |         self.enable_add_tokens = False
 14 |         self.gpt_special_case = ((not self.enable_add_tokens) and ('gpt' in self.pt_name))
 15 | 
 16 |         if (pt_name is None):
 17 |             self.tokenizer = AutoTokenizer.from_pretrained('bert-base-cased')
 18 |         else:
 19 |             self.tokenizer = AutoTokenizer.from_pretrained(pt_name)
 20 | 
 21 |         # Adding tokens to GPT causing NaN training loss.
 22 |         # Disable for now until further investigation.
 23 |         if (self.enable_add_tokens):
 24 |             if (self.tokenizer.sep_token is None):
 25 |                 self.tokenizer.add_special_tokens({'sep_token': '<SEP>'})
 26 |                 self.added_sep_token = 1
 27 | 
 28 |             if (self.tokenizer.cls_token is None):
 29 |                 self.tokenizer.add_special_tokens({'cls_token': '<CLS>'})
 30 |                 self.added_cls_token = 1
 31 | 
 32 |         if (self.gpt_special_case):
 33 |             self.tokenizer.pad_token = self.tokenizer.eos_token
 34 |             self.tokenizer.sep_token = self.tokenizer.eos_token
 35 | 
 36 |     def get_eot_token(self):
 37 |         return self.tokenizer.encode(self.tokenizer.sep_token, add_special_tokens=False)[0]
 38 | 
 39 |     def get_sot_token(self):
 40 |         return self.tokenizer.encode(self.tokenizer.cls_token, add_special_tokens=False)[0]
 41 | 
 42 |     def get_eot_token_list(self):
 43 |         return self.tokenizer.encode(self.tokenizer.sep_token, add_special_tokens=False)
 44 | 
 45 |     def get_sot_token_list(self):
 46 |         return self.tokenizer.encode(self.tokenizer.cls_token, add_special_tokens=False)
 47 | 
 48 |     def get_tokenizer_obj(self):
 49 |         return self.tokenizer
 50 | 
 51 |     # Language model needs to know if new tokens
 52 |     # were added to the dictionary.
 53 |     def check_added_tokens(self):
 54 |         return self.added_sep_token + self.added_cls_token
 55 | 
 56 |     def tokenize(self, texts: Union[str, List[str]], context_length: int = 77):
 57 |         if isinstance(texts, str):
 58 |             texts = [texts]
 59 | 
 60 |         padding = 'max_length'
 61 | 
 62 |         seqstart = []
 63 |         seqtok = []
 64 |         seqend = []
 65 | 
 66 |         max_length = context_length
 67 | 
 68 |         if (self.added_cls_token > 0):
 69 |             seqstart = self.get_sot_token_list()
 70 |             max_length = max_length - 1
 71 | 
 72 |         if (self.added_sep_token > 0):
 73 |             seqend = self.get_eot_token_list()
 74 |             max_length = max_length - 1
 75 | 
 76 |         tokens = self.tokenizer(
 77 |             texts, padding=padding,
 78 |             truncation=True,
 79 |             max_length=max_length
 80 |         )['input_ids']
 81 | 
 82 |         for i in range(len(tokens)):
 83 |             tokens[i] = seqstart + tokens[i] + seqend
 84 | 
 85 |         if (self.gpt_special_case):
 86 |             for i in range(len(tokens)):
 87 |                 tokens[i][-1] = self.get_eot_token()
 88 | 
 89 |         # print(str(tokens))
 90 | 
 91 |         result = torch.Tensor(tokens).type(torch.LongTensor)
 92 | 
 93 |         return result
 94 | 
 95 |     def get_vocab_size(self):
 96 |         return self.tokenizer.vocab_size
 97 | 
 98 |     def __call__(self, texts: Union[str, List[str]], context_length: int = 77):
 99 |         return self.tokenize(texts, context_length)
100 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/box_coder.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import math
 3 | 
 4 | import torch
 5 | 
 6 | 
 7 | class BoxCoder(object):
 8 |     """
 9 |     This class encodes and decodes a set of bounding boxes into
10 |     the representation used for training the regressors.
11 |     """
12 | 
13 |     def __init__(self, weights, bbox_xform_clip=math.log(1000. / 16)):
14 |         """
15 |         Arguments:
16 |             weights (4-element tuple)
17 |             bbox_xform_clip (float)
18 |         """
19 |         self.weights = weights
20 |         self.bbox_xform_clip = bbox_xform_clip
21 | 
22 |     def encode(self, reference_boxes, proposals):
23 |         """
24 |         Encode a set of proposals with respect to some
25 |         reference boxes
26 | 
27 |         Arguments:
28 |             reference_boxes (Tensor): reference boxes
29 |             proposals (Tensor): boxes to be encoded
30 |         """
31 | 
32 |         TO_REMOVE = 1  # TODO remove
33 |         ex_widths = proposals[:, 2] - proposals[:, 0] + TO_REMOVE
34 |         ex_heights = proposals[:, 3] - proposals[:, 1] + TO_REMOVE
35 |         ex_ctr_x = proposals[:, 0] + 0.5 * ex_widths
36 |         ex_ctr_y = proposals[:, 1] + 0.5 * ex_heights
37 | 
38 |         gt_widths = reference_boxes[:, 2] - reference_boxes[:, 0] + TO_REMOVE
39 |         gt_heights = reference_boxes[:, 3] - reference_boxes[:, 1] + TO_REMOVE
40 |         gt_ctr_x = reference_boxes[:, 0] + 0.5 * gt_widths
41 |         gt_ctr_y = reference_boxes[:, 1] + 0.5 * gt_heights
42 | 
43 |         wx, wy, ww, wh = self.weights
44 |         targets_dx = wx * (gt_ctr_x - ex_ctr_x) / ex_widths
45 |         targets_dy = wy * (gt_ctr_y - ex_ctr_y) / ex_heights
46 |         targets_dw = ww * torch.log(gt_widths / ex_widths)
47 |         targets_dh = wh * torch.log(gt_heights / ex_heights)
48 | 
49 |         targets = torch.stack((targets_dx, targets_dy, targets_dw, targets_dh), dim=1)
50 |         return targets
51 | 
52 |     def decode(self, rel_codes, boxes):
53 |         """
54 |         From a set of original boxes and encoded relative box offsets,
55 |         get the decoded boxes.
56 | 
57 |         Arguments:
58 |             rel_codes (Tensor): encoded boxes
59 |             boxes (Tensor): reference boxes.
60 |         """
61 | 
62 |         boxes = boxes.to(rel_codes.dtype)
63 | 
64 |         TO_REMOVE = 1  # TODO remove
65 |         widths = boxes[:, 2] - boxes[:, 0] + TO_REMOVE
66 |         heights = boxes[:, 3] - boxes[:, 1] + TO_REMOVE
67 |         ctr_x = boxes[:, 0] + 0.5 * widths
68 |         ctr_y = boxes[:, 1] + 0.5 * heights
69 | 
70 |         wx, wy, ww, wh = self.weights
71 |         dx = rel_codes[:, 0::4] / wx
72 |         dy = rel_codes[:, 1::4] / wy
73 |         dw = rel_codes[:, 2::4] / ww
74 |         dh = rel_codes[:, 3::4] / wh
75 | 
76 |         # Prevent sending too large values into torch.exp()
77 |         dw = torch.clamp(dw, max=self.bbox_xform_clip)
78 |         dh = torch.clamp(dh, max=self.bbox_xform_clip)
79 | 
80 |         pred_ctr_x = dx * widths[:, None] + ctr_x[:, None]
81 |         pred_ctr_y = dy * heights[:, None] + ctr_y[:, None]
82 |         pred_w = torch.exp(dw) * widths[:, None]
83 |         pred_h = torch.exp(dh) * heights[:, None]
84 | 
85 |         pred_boxes = torch.zeros_like(rel_codes)
86 |         # x1
87 |         pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * pred_w
88 |         # y1
89 |         pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * pred_h
90 |         # x2 (note: "- 1" is correct; don't be fooled by the asymmetry)
91 |         pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * pred_w - 1
92 |         # y2 (note: "- 1" is correct; don't be fooled by the asymmetry)
93 |         pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * pred_h - 1
94 | 
95 |         return pred_boxes
96 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/roi_heads/mask_head/mask_head.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import torch
 3 | from torch import nn
 4 | 
 5 | from maskrcnn_benchmark.structures.bounding_box import BoxList
 6 | 
 7 | from .roi_mask_feature_extractors import make_roi_mask_feature_extractor
 8 | from .roi_mask_predictors import make_roi_mask_predictor
 9 | from .inference import make_roi_mask_post_processor
10 | from .loss import make_roi_mask_loss_evaluator
11 | 
12 | 
13 | def keep_only_positive_boxes(boxes):
14 |     """
15 |     Given a set of BoxList containing the `labels` field,
16 |     return a set of BoxList for which `labels > 0`.
17 | 
18 |     Arguments:
19 |         boxes (list of BoxList)
20 |     """
21 |     assert isinstance(boxes, (list, tuple))
22 |     assert isinstance(boxes[0], BoxList)
23 |     assert boxes[0].has_field("labels")
24 |     positive_boxes = []
25 |     positive_inds = []
26 |     num_boxes = 0
27 |     for boxes_per_image in boxes:
28 |         labels = boxes_per_image.get_field("labels")
29 |         inds_mask = labels > 0
30 |         inds = inds_mask.nonzero().squeeze(1)
31 |         positive_boxes.append(boxes_per_image[inds])
32 |         positive_inds.append(inds_mask)
33 |     return positive_boxes, positive_inds
34 | 
35 | 
36 | class ROIMaskHead(torch.nn.Module):
37 |     def __init__(self, cfg):
38 |         super(ROIMaskHead, self).__init__()
39 |         self.cfg = cfg.clone()
40 |         self.feature_extractor = make_roi_mask_feature_extractor(cfg)
41 |         self.predictor = make_roi_mask_predictor(cfg)
42 |         self.post_processor = make_roi_mask_post_processor(cfg)
43 |         self.loss_evaluator = make_roi_mask_loss_evaluator(cfg)
44 | 
45 |     def forward(self, features, proposals, targets=None,
46 |                 language_dict_features=None,
47 |                 positive_map_label_to_token=None
48 |                 ):
49 |         """
50 |         Arguments:
51 |             features (list[Tensor]): feature-maps from possibly several levels
52 |             proposals (list[BoxList]): proposal boxes
53 |             targets (list[BoxList], optional): the ground-truth targets.
54 |             language_dict_features: language features: hidden, embedding, mask, ...
55 | 
56 |         Returns:
57 |             x (Tensor): the result of the feature extractor
58 |             proposals (list[BoxList]): during training, the original proposals
59 |                 are returned. During testing, the predicted boxlists are returned
60 |                 with the `mask` field set
61 |             losses (dict[Tensor]): During training, returns the losses for the
62 |                 head. During testing, returns an empty dict.
63 |         """
64 |         if self.training:
65 |             # during training, only focus on positive boxes
66 |             all_proposals = proposals
67 |             proposals, positive_inds = keep_only_positive_boxes(proposals)
68 |         if self.training and self.cfg.MODEL.ROI_MASK_HEAD.SHARE_BOX_FEATURE_EXTRACTOR:
69 |             x = features
70 |             x = x[torch.cat(positive_inds, dim=0)]
71 |         else:
72 |             x = self.feature_extractor(features, proposals)
73 |         if self.cfg.MODEL.ROI_MASK_HEAD.PREDICTOR.startswith("VL"):
74 |             mask_logits = self.predictor(x, language_dict_features)
75 |         else:
76 |             mask_logits = self.predictor(x)
77 | 
78 |         if not self.training:
79 |             result = self.post_processor(mask_logits, proposals, positive_map_label_to_token)
80 |             return x, result, {}
81 | 
82 |         loss_mask = self.loss_evaluator(proposals, mask_logits, targets)
83 | 
84 |         return x, all_proposals, dict(loss_mask=loss_mask)
85 | 
86 | 
87 | def build_roi_mask_head(cfg):
88 |     return ROIMaskHead(cfg)
89 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/roi_heads/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import torch
 3 | 
 4 | from .box_head.box_head import build_roi_box_head
 5 | from .mask_head.mask_head import build_roi_mask_head
 6 | from .keypoint_head.keypoint_head import build_roi_keypoint_head
 7 | 
 8 | 
 9 | class CombinedROIHeads(torch.nn.ModuleDict):
10 |     """
11 |     Combines a set of individual heads (for box prediction or masks) into a single
12 |     head.
13 |     """
14 | 
15 |     def __init__(self, cfg, heads):
16 |         super(CombinedROIHeads, self).__init__(heads)
17 |         self.cfg = cfg.clone()
18 |         if cfg.MODEL.MASK_ON and cfg.MODEL.ROI_MASK_HEAD.SHARE_BOX_FEATURE_EXTRACTOR:
19 |             self.mask.feature_extractor = self.box.feature_extractor
20 |         if cfg.MODEL.KEYPOINT_ON and cfg.MODEL.ROI_KEYPOINT_HEAD.SHARE_BOX_FEATURE_EXTRACTOR:
21 |             self.keypoint.feature_extractor = self.box.feature_extractor
22 | 
23 |     def forward(self, features, proposals, targets=None, language_dict_features=None, positive_map_label_to_token=None):
24 |         losses = {}
25 |         detections = proposals
26 |         if self.cfg.MODEL.BOX_ON:
27 |             # TODO rename x to roi_box_features, if it doesn't increase memory consumption
28 |             x, detections, loss_box = self.box(features, proposals, targets)
29 |             losses.update(loss_box)
30 | 
31 |         if self.cfg.MODEL.MASK_ON:
32 |             mask_features = features
33 |             # optimization: during training, if we share the feature extractor between
34 |             # the box and the mask heads, then we can reuse the features already computed
35 |             if (
36 |                     self.training
37 |                     and self.cfg.MODEL.ROI_MASK_HEAD.SHARE_BOX_FEATURE_EXTRACTOR
38 |             ):
39 |                 mask_features = x
40 |             # During training, self.box() will return the unaltered proposals as "detections"
41 |             # this makes the API consistent during training and testing
42 |             x, detections, loss_mask = self.mask(
43 |                 mask_features, detections, targets,
44 |                 language_dict_features=language_dict_features,
45 |                 positive_map_label_to_token=positive_map_label_to_token)
46 |             losses.update(loss_mask)
47 | 
48 |         if self.cfg.MODEL.KEYPOINT_ON:
49 |             keypoint_features = features
50 |             # optimization: during training, if we share the feature extractor between
51 |             # the box and the mask heads, then we can reuse the features already computed
52 |             if (
53 |                     self.training
54 |                     and self.cfg.MODEL.ROI_KEYPOINT_HEAD.SHARE_BOX_FEATURE_EXTRACTOR
55 |             ):
56 |                 keypoint_features = x
57 |             # During training, self.box() will return the unaltered proposals as "detections"
58 |             # this makes the API consistent during training and testing
59 |             x, detections, loss_keypoint = self.keypoint(keypoint_features, detections, targets)
60 |             losses.update(loss_keypoint)
61 |         return x, detections, losses
62 | 
63 | 
64 | def build_roi_heads(cfg):
65 |     # individually create the heads, that will be combined together
66 |     # afterwards
67 |     # if cfg.MODEL.RPN_ONLY:
68 |     #     return None
69 | 
70 |     roi_heads = []
71 |     if cfg.MODEL.BOX_ON and not cfg.MODEL.RPN_ONLY:
72 |         roi_heads.append(("box", build_roi_box_head(cfg)))
73 |     if cfg.MODEL.MASK_ON:
74 |         roi_heads.append(("mask", build_roi_mask_head(cfg)))
75 |     if cfg.MODEL.KEYPOINT_ON:
76 |         roi_heads.append(("keypoint", build_roi_keypoint_head(cfg)))
77 | 
78 |     # combine individual heads in a single module
79 |     if roi_heads:
80 |         roi_heads = CombinedROIHeads(cfg, roi_heads)
81 |     else:
82 |         roi_heads = None
83 | 
84 |     return roi_heads


--------------------------------------------------------------------------------
/maskrcnn_benchmark/data/datasets/gqa.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | from pathlib import Path
 3 | 
 4 | import torch
 5 | import torchvision
 6 | 
 7 | from .modulated_coco import ConvertCocoPolysToMask, ModulatedDataset
 8 | 
 9 | 
10 | class GQADataset(ModulatedDataset):
11 |     pass
12 | 
13 | 
14 | class GQAQuestionAnswering(torchvision.datasets.CocoDetection):
15 |     def __init__(self, img_folder, ann_file, transforms, return_masks, return_tokens, tokenizer, ann_folder):
16 |         super(GQAQuestionAnswering, self).__init__(img_folder, ann_file)
17 |         self._transforms = transforms
18 |         self.prepare = ConvertCocoPolysToMask(return_masks, return_tokens, tokenizer=tokenizer)
19 |         with open(ann_folder / "gqa_answer2id.json", "r") as f:
20 |             self.answer2id = json.load(f)
21 |         with open(ann_folder / "gqa_answer2id_by_type.json", "r") as f:
22 |             self.answer2id_by_type = json.load(f)
23 |         self.type2id = {"obj": 0, "attr": 1, "rel": 2, "global": 3, "cat": 4}
24 | 
25 |     def __getitem__(self, idx):
26 |         img, target = super(GQAQuestionAnswering, self).__getitem__(idx)
27 |         image_id = self.ids[idx]
28 |         coco_img = self.coco.loadImgs(image_id)[0]
29 |         caption = coco_img["caption"]
30 |         dataset_name = coco_img["dataset_name"]
31 |         questionId = coco_img["questionId"]
32 |         target = {"image_id": image_id, "annotations": target, "caption": caption}
33 |         img, target = self.prepare(img, target)
34 |         if self._transforms is not None:
35 |             img, target = self._transforms(img, target)
36 |         target["dataset_name"] = dataset_name
37 |         target["questionId"] = questionId
38 | 
39 |         if coco_img["answer"] not in self.answer2id:
40 |             answer = "unknown"
41 |         else:
42 |             answer = coco_img["answer"]
43 | 
44 |         target["answer"] = torch.as_tensor(self.answer2id[answer], dtype=torch.long)
45 |         target["answer_type"] = torch.as_tensor(self.type2id[coco_img["question_type"]], dtype=torch.long)
46 | 
47 |         if coco_img["answer"] not in self.answer2id_by_type["answer_attr"]:
48 |             answer = "unknown"
49 |         else:
50 |             answer = coco_img["answer"]
51 |         target["answer_attr"] = torch.as_tensor(
52 |             self.answer2id_by_type["answer_attr"][answer] if coco_img["question_type"] == "attr" else -100,
53 |             dtype=torch.long,
54 |         )
55 | 
56 |         if coco_img["answer"] not in self.answer2id_by_type["answer_global"]:
57 |             answer = "unknown"
58 |         else:
59 |             answer = coco_img["answer"]
60 |         target["answer_global"] = torch.as_tensor(
61 |             self.answer2id_by_type["answer_global"][answer] if coco_img["question_type"] == "global" else -100,
62 |             dtype=torch.long,
63 |         )
64 | 
65 |         if coco_img["answer"] not in self.answer2id_by_type["answer_rel"]:
66 |             answer = "unknown"
67 |         else:
68 |             answer = coco_img["answer"]
69 |         target["answer_rel"] = torch.as_tensor(
70 |             self.answer2id_by_type["answer_rel"][answer] if coco_img["question_type"] == "rel" else -100,
71 |             dtype=torch.long,
72 |         )
73 | 
74 |         if coco_img["answer"] not in self.answer2id_by_type["answer_cat"]:
75 |             answer = "unknown"
76 |         else:
77 |             answer = coco_img["answer"]
78 |         target["answer_cat"] = torch.as_tensor(
79 |             self.answer2id_by_type["answer_cat"][answer] if coco_img["question_type"] == "cat" else -100,
80 |             dtype=torch.long,
81 |         )
82 | 
83 |         if coco_img["answer"] not in self.answer2id_by_type["answer_obj"]:
84 |             answer = "unknown"
85 |         else:
86 |             answer = coco_img["answer"]
87 |         target["answer_obj"] = torch.as_tensor(
88 |             self.answer2id_by_type["answer_obj"][answer] if coco_img["question_type"] == "obj" else -100,
89 |             dtype=torch.long,
90 |         )
91 |         return img, target
92 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/csrc/cuda/deform_pool_cuda.cu:
--------------------------------------------------------------------------------
 1 | // modify from
 2 | // https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/blob/mmdetection/mmdet/ops/dcn/src/modulated_dcn_cuda.c
 3 | 
 4 | // based on
 5 | // author: Charles Shang
 6 | // https://github.com/torch/cunn/blob/master/lib/THCUNN/generic/SpatialConvolutionMM.cu
 7 | 
 8 | #include <ATen/ATen.h>
 9 | #include <ATen/cuda/CUDAContext.h>
10 | 
11 | #include <THC/THC.h>
12 | #include <THC/THCDeviceUtils.cuh>
13 | 
14 | #include <vector>
15 | #include <iostream>
16 | #include <cmath>
17 | 
18 | 
19 | void DeformablePSROIPoolForward(
20 |     const at::Tensor data, const at::Tensor bbox, const at::Tensor trans,
21 |     at::Tensor out, at::Tensor top_count, const int batch, const int channels,
22 |     const int height, const int width, const int num_bbox,
23 |     const int channels_trans, const int no_trans, const float spatial_scale,
24 |     const int output_dim, const int group_size, const int pooled_size,
25 |     const int part_size, const int sample_per_part, const float trans_std);
26 | 
27 | void DeformablePSROIPoolBackwardAcc(
28 |     const at::Tensor out_grad, const at::Tensor data, const at::Tensor bbox,
29 |     const at::Tensor trans, const at::Tensor top_count, at::Tensor in_grad,
30 |     at::Tensor trans_grad, const int batch, const int channels,
31 |     const int height, const int width, const int num_bbox,
32 |     const int channels_trans, const int no_trans, const float spatial_scale,
33 |     const int output_dim, const int group_size, const int pooled_size,
34 |     const int part_size, const int sample_per_part, const float trans_std);
35 | 
36 | void deform_psroi_pooling_cuda_forward(
37 |     at::Tensor input, at::Tensor bbox, at::Tensor trans, at::Tensor out,
38 |     at::Tensor top_count, const int no_trans, const float spatial_scale,
39 |     const int output_dim, const int group_size, const int pooled_size,
40 |     const int part_size, const int sample_per_part, const float trans_std) 
41 | {
42 |   TORCH_CHECK(input.is_contiguous(), "input tensor has to be contiguous");
43 | 
44 |   const int batch = input.size(0);
45 |   const int channels = input.size(1);
46 |   const int height = input.size(2);
47 |   const int width = input.size(3);
48 |   const int channels_trans = no_trans ? 2 : trans.size(1);
49 | 
50 |   const int num_bbox = bbox.size(0);
51 |   if (num_bbox != out.size(0))
52 |     AT_ERROR("Output shape and bbox number wont match: (%d vs %d).",
53 |              out.size(0), num_bbox);
54 | 
55 |   DeformablePSROIPoolForward(
56 |       input, bbox, trans, out, top_count, batch, channels, height, width,
57 |       num_bbox, channels_trans, no_trans, spatial_scale, output_dim, group_size,
58 |       pooled_size, part_size, sample_per_part, trans_std);
59 | }
60 | 
61 | void deform_psroi_pooling_cuda_backward(
62 |     at::Tensor out_grad, at::Tensor input, at::Tensor bbox, at::Tensor trans,
63 |     at::Tensor top_count, at::Tensor input_grad, at::Tensor trans_grad,
64 |     const int no_trans, const float spatial_scale, const int output_dim,
65 |     const int group_size, const int pooled_size, const int part_size,
66 |     const int sample_per_part, const float trans_std) 
67 | {
68 |   TORCH_CHECK(out_grad.is_contiguous(), "out_grad tensor has to be contiguous");
69 |   TORCH_CHECK(input.is_contiguous(), "input tensor has to be contiguous");
70 | 
71 |   const int batch = input.size(0);
72 |   const int channels = input.size(1);
73 |   const int height = input.size(2);
74 |   const int width = input.size(3);
75 |   const int channels_trans = no_trans ? 2 : trans.size(1);
76 | 
77 |   const int num_bbox = bbox.size(0);
78 |   if (num_bbox != out_grad.size(0))
79 |     AT_ERROR("Output shape and bbox number wont match: (%d vs %d).",
80 |              out_grad.size(0), num_bbox);
81 | 
82 |   DeformablePSROIPoolBackwardAcc(
83 |       out_grad, input, bbox, trans, top_count, input_grad, trans_grad, batch,
84 |       channels, height, width, num_bbox, channels_trans, no_trans,
85 |       spatial_scale, output_dim, group_size, pooled_size, part_size,
86 |       sample_per_part, trans_std);
87 | }
88 | 


--------------------------------------------------------------------------------
/configs/odinw_35/WildfireSmoke.yaml:
--------------------------------------------------------------------------------
 1 | DATALOADER: {ASPECT_RATIO_GROUPING: false, SIZE_DIVISIBILITY: 32}
 2 | DATASETS:
 3 |   GENERAL_COPY: 16
 4 |   OVERRIDE_CATEGORY: '[{"id": 1, "name": "smoke", "supercategory": "Smoke"}]'
 5 |   PREDEFINED_TEXT: odinw/pothole/category_description.json
 6 |   REGISTER:
 7 |     test: {ann_file: odinw/WildfireSmoke/test/annotations_without_background.json,
 8 |       img_dir: odinw/WildfireSmoke/test}
 9 |     train: {ann_file: odinw/WildfireSmoke/train/annotations_without_background.json,
10 |       img_dir: odinw/WildfireSmoke/train}
11 |     train_10_3: {ann_file: odinw/WildfireSmoke/train/fewshot_train_shot10_seed3.json,
12 |       img_dir: odinw/WildfireSmoke/train}
13 |     train_10_30: {ann_file: odinw/WildfireSmoke/train/fewshot_train_shot10_seed30.json,
14 |       img_dir: odinw/WildfireSmoke/train}
15 |     train_10_300: {ann_file: odinw/WildfireSmoke/train/fewshot_train_shot10_seed300.json,
16 |       img_dir: odinw/WildfireSmoke/train}
17 |     train_1_3: {ann_file: odinw/WildfireSmoke/train/fewshot_train_shot1_seed3.json,
18 |       img_dir: odinw/WildfireSmoke/train}
19 |     train_1_30: {ann_file: odinw/WildfireSmoke/train/fewshot_train_shot1_seed30.json,
20 |       img_dir: odinw/WildfireSmoke/train}
21 |     train_1_300: {ann_file: odinw/WildfireSmoke/train/fewshot_train_shot1_seed300.json,
22 |       img_dir: odinw/WildfireSmoke/train}
23 |     train_3_3: {ann_file: odinw/WildfireSmoke/train/fewshot_train_shot3_seed3.json,
24 |       img_dir: odinw/WildfireSmoke/train}
25 |     train_3_30: {ann_file: odinw/WildfireSmoke/train/fewshot_train_shot3_seed30.json,
26 |       img_dir: odinw/WildfireSmoke/train}
27 |     train_3_300: {ann_file: odinw/WildfireSmoke/train/fewshot_train_shot3_seed300.json,
28 |       img_dir: odinw/WildfireSmoke/train}
29 |     train_5_3: {ann_file: odinw/WildfireSmoke/train/fewshot_train_shot5_seed3.json,
30 |       img_dir: odinw/WildfireSmoke/train}
31 |     train_5_30: {ann_file: odinw/WildfireSmoke/train/fewshot_train_shot5_seed30.json,
32 |       img_dir: odinw/WildfireSmoke/train}
33 |     train_5_300: {ann_file: odinw/WildfireSmoke/train/fewshot_train_shot5_seed300.json,
34 |       img_dir: odinw/WildfireSmoke/train}
35 |     val: {ann_file: odinw/WildfireSmoke/valid/annotations_without_background.json,
36 |       img_dir: odinw/WildfireSmoke/valid}
37 |     val_10_3: {ann_file: odinw/WildfireSmoke/valid/fewshot_val_shot10_seed3.json,
38 |       img_dir: odinw/WildfireSmoke/valid}
39 |     val_10_30: {ann_file: odinw/WildfireSmoke/valid/fewshot_val_shot10_seed30.json,
40 |       img_dir: odinw/WildfireSmoke/valid}
41 |     val_10_300: {ann_file: odinw/WildfireSmoke/valid/fewshot_val_shot10_seed300.json,
42 |       img_dir: odinw/WildfireSmoke/valid}
43 |     val_1_3: {ann_file: odinw/WildfireSmoke/valid/fewshot_val_shot1_seed3.json, img_dir: odinw/WildfireSmoke/valid}
44 |     val_1_30: {ann_file: odinw/WildfireSmoke/valid/fewshot_val_shot1_seed30.json,
45 |       img_dir: odinw/WildfireSmoke/valid}
46 |     val_1_300: {ann_file: odinw/WildfireSmoke/valid/fewshot_val_shot1_seed300.json,
47 |       img_dir: odinw/WildfireSmoke/valid}
48 |     val_3_3: {ann_file: odinw/WildfireSmoke/valid/fewshot_val_shot3_seed3.json, img_dir: odinw/WildfireSmoke/valid}
49 |     val_3_30: {ann_file: odinw/WildfireSmoke/valid/fewshot_val_shot3_seed30.json,
50 |       img_dir: odinw/WildfireSmoke/valid}
51 |     val_3_300: {ann_file: odinw/WildfireSmoke/valid/fewshot_val_shot3_seed300.json,
52 |       img_dir: odinw/WildfireSmoke/valid}
53 |     val_5_3: {ann_file: odinw/WildfireSmoke/valid/fewshot_val_shot5_seed3.json, img_dir: odinw/WildfireSmoke/valid}
54 |     val_5_30: {ann_file: odinw/WildfireSmoke/valid/fewshot_val_shot5_seed30.json,
55 |       img_dir: odinw/WildfireSmoke/valid}
56 |     val_5_300: {ann_file: odinw/WildfireSmoke/valid/fewshot_val_shot5_seed300.json,
57 |       img_dir: odinw/WildfireSmoke/valid}
58 |   TEST: ("val",)
59 |   TRAIN: ("train",)
60 | INPUT: {MAX_SIZE_TEST: 1333, MAX_SIZE_TRAIN: 1333, MIN_SIZE_TEST: 800, MIN_SIZE_TRAIN: 800}
61 | MODEL:
62 |   ATSS: {NUM_CLASSES: 2}
63 |   DYHEAD: {NUM_CLASSES: 2}
64 |   FCOS: {NUM_CLASSES: 2}
65 |   ROI_BOX_HEAD: {NUM_CLASSES: 2}
66 | SOLVER: {CHECKPOINT_PERIOD: 100, MAX_EPOCH: 12, WARMUP_ITERS: 0}
67 | TEST: {IMS_PER_BATCH: 8}
68 | 


--------------------------------------------------------------------------------
/configs/odinw_35/Packages_Raw.yaml:
--------------------------------------------------------------------------------
 1 | DATALOADER: {ASPECT_RATIO_GROUPING: false, SIZE_DIVISIBILITY: 32}
 2 | DATASETS:
 3 |   CAPTION_PROMPT: '[{"prefix": "there is a ", "name": "package", "suffix": " on the
 4 |     porch"}]'
 5 |   GENERAL_COPY: 16
 6 |   OVERRIDE_CATEGORY: '[{"id": 1, "name": "package", "supercategory": "packages"}]'
 7 |   PREDEFINED_TEXT: odinw/pothole/category_description.json
 8 |   REGISTER:
 9 |     test: {ann_file: odinw/Packages/Raw/test/annotations_without_background.json,
10 |       img_dir: odinw/Packages/Raw/test}
11 |     train: {ann_file: odinw/Packages/Raw/train/annotations_without_background.json,
12 |       img_dir: odinw/Packages/Raw/train}
13 |     train_10_3: {ann_file: odinw/Packages/Raw/train/fewshot_train_shot10_seed3.json,
14 |       img_dir: odinw/Packages/Raw/train}
15 |     train_10_30: {ann_file: odinw/Packages/Raw/train/fewshot_train_shot10_seed30.json,
16 |       img_dir: odinw/Packages/Raw/train}
17 |     train_10_300: {ann_file: odinw/Packages/Raw/train/fewshot_train_shot10_seed300.json,
18 |       img_dir: odinw/Packages/Raw/train}
19 |     train_1_3: {ann_file: odinw/Packages/Raw/train/fewshot_train_shot1_seed3.json,
20 |       img_dir: odinw/Packages/Raw/train}
21 |     train_1_30: {ann_file: odinw/Packages/Raw/train/fewshot_train_shot1_seed30.json,
22 |       img_dir: odinw/Packages/Raw/train}
23 |     train_1_300: {ann_file: odinw/Packages/Raw/train/fewshot_train_shot1_seed300.json,
24 |       img_dir: odinw/Packages/Raw/train}
25 |     train_3_3: {ann_file: odinw/Packages/Raw/train/fewshot_train_shot3_seed3.json,
26 |       img_dir: odinw/Packages/Raw/train}
27 |     train_3_30: {ann_file: odinw/Packages/Raw/train/fewshot_train_shot3_seed30.json,
28 |       img_dir: odinw/Packages/Raw/train}
29 |     train_3_300: {ann_file: odinw/Packages/Raw/train/fewshot_train_shot3_seed300.json,
30 |       img_dir: odinw/Packages/Raw/train}
31 |     train_5_3: {ann_file: odinw/Packages/Raw/train/fewshot_train_shot5_seed3.json,
32 |       img_dir: odinw/Packages/Raw/train}
33 |     train_5_30: {ann_file: odinw/Packages/Raw/train/fewshot_train_shot5_seed30.json,
34 |       img_dir: odinw/Packages/Raw/train}
35 |     train_5_300: {ann_file: odinw/Packages/Raw/train/fewshot_train_shot5_seed300.json,
36 |       img_dir: odinw/Packages/Raw/train}
37 |     val: {ann_file: odinw/Packages/Raw/valid/annotations_without_background.json,
38 |       img_dir: odinw/Packages/Raw/valid}
39 |     val_10_3: {ann_file: odinw/Packages/Raw/valid/fewshot_val_shot10_seed3.json, img_dir: odinw/Packages/Raw/valid}
40 |     val_10_30: {ann_file: odinw/Packages/Raw/valid/fewshot_val_shot10_seed30.json,
41 |       img_dir: odinw/Packages/Raw/valid}
42 |     val_10_300: {ann_file: odinw/Packages/Raw/valid/fewshot_val_shot10_seed300.json,
43 |       img_dir: odinw/Packages/Raw/valid}
44 |     val_1_3: {ann_file: odinw/Packages/Raw/valid/fewshot_val_shot1_seed3.json, img_dir: odinw/Packages/Raw/valid}
45 |     val_1_30: {ann_file: odinw/Packages/Raw/valid/fewshot_val_shot1_seed30.json, img_dir: odinw/Packages/Raw/valid}
46 |     val_1_300: {ann_file: odinw/Packages/Raw/valid/fewshot_val_shot1_seed300.json,
47 |       img_dir: odinw/Packages/Raw/valid}
48 |     val_3_3: {ann_file: odinw/Packages/Raw/valid/fewshot_val_shot3_seed3.json, img_dir: odinw/Packages/Raw/valid}
49 |     val_3_30: {ann_file: odinw/Packages/Raw/valid/fewshot_val_shot3_seed30.json, img_dir: odinw/Packages/Raw/valid}
50 |     val_3_300: {ann_file: odinw/Packages/Raw/valid/fewshot_val_shot3_seed300.json,
51 |       img_dir: odinw/Packages/Raw/valid}
52 |     val_5_3: {ann_file: odinw/Packages/Raw/valid/fewshot_val_shot5_seed3.json, img_dir: odinw/Packages/Raw/valid}
53 |     val_5_30: {ann_file: odinw/Packages/Raw/valid/fewshot_val_shot5_seed30.json, img_dir: odinw/Packages/Raw/valid}
54 |     val_5_300: {ann_file: odinw/Packages/Raw/valid/fewshot_val_shot5_seed300.json,
55 |       img_dir: odinw/Packages/Raw/valid}
56 |   TEST: ("val",)
57 |   TRAIN: ("train",)
58 | INPUT: {MAX_SIZE_TEST: 1333, MAX_SIZE_TRAIN: 1333, MIN_SIZE_TEST: 800, MIN_SIZE_TRAIN: 800}
59 | MODEL:
60 |   ATSS: {NUM_CLASSES: 2}
61 |   DYHEAD: {NUM_CLASSES: 2}
62 |   FCOS: {NUM_CLASSES: 2}
63 |   ROI_BOX_HEAD: {NUM_CLASSES: 2}
64 | SOLVER: {CHECKPOINT_PERIOD: 100, MAX_EPOCH: 12, WARMUP_ITERS: 0}
65 | TEST: {IMS_PER_BATCH: 8}
66 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/data/collate_batch.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import torch
 3 | from maskrcnn_benchmark.structures.image_list import to_image_list
 4 | 
 5 | import pdb
 6 | class BatchCollator(object):
 7 |     """
 8 |     From a list of samples from the dataset,
 9 |     returns the batched images and targets.
10 |     This should be passed to the DataLoader
11 |     """
12 | 
13 |     def __init__(self, size_divisible=0):
14 |         self.size_divisible = size_divisible
15 | 
16 |     def __call__(self, batch):
17 |         transposed_batch = list(zip(*batch))
18 |         
19 |         images = to_image_list(transposed_batch[0], self.size_divisible)
20 |         targets = transposed_batch[1]
21 |         img_ids = transposed_batch[2]
22 |         positive_map = None
23 |         positive_map_eval = None
24 |         greenlight_map = None
25 | 
26 |         if isinstance(targets[0], dict):
27 |             return images, targets, img_ids, positive_map, positive_map_eval
28 | 
29 |         if "greenlight_map" in transposed_batch[1][0].fields():
30 |             greenlight_map = torch.stack([i.get_field("greenlight_map") for i in transposed_batch[1]], dim = 0)
31 | 
32 |         if "positive_map" in transposed_batch[1][0].fields():
33 |             # we batch the positive maps here
34 |             # Since in general each batch element will have a different number of boxes,
35 |             # we collapse a single batch dimension to avoid padding. This is sufficient for our purposes.
36 |             max_len = max([v.get_field("positive_map").shape[1] for v in transposed_batch[1]])
37 |             nb_boxes = sum([v.get_field("positive_map").shape[0] for v in transposed_batch[1]])
38 |             batched_pos_map = torch.zeros((nb_boxes, max_len), dtype=torch.bool)
39 |             cur_count = 0
40 |             for v in transposed_batch[1]:
41 |                 cur_pos = v.get_field("positive_map")
42 |                 batched_pos_map[cur_count: cur_count + len(cur_pos), : cur_pos.shape[1]] = cur_pos
43 |                 cur_count += len(cur_pos)
44 | 
45 |             assert cur_count == len(batched_pos_map)
46 |             positive_map = batched_pos_map.float()
47 |         
48 | 
49 |         if "positive_map_eval" in transposed_batch[1][0].fields():
50 |             # we batch the positive maps here
51 |             # Since in general each batch element will have a different number of boxes,
52 |             # we collapse a single batch dimension to avoid padding. This is sufficient for our purposes.
53 |             max_len = max([v.get_field("positive_map_eval").shape[1] for v in transposed_batch[1]])
54 |             nb_boxes = sum([v.get_field("positive_map_eval").shape[0] for v in transposed_batch[1]])
55 |             batched_pos_map = torch.zeros((nb_boxes, max_len), dtype=torch.bool)
56 |             cur_count = 0
57 |             for v in transposed_batch[1]:
58 |                 cur_pos = v.get_field("positive_map_eval")
59 |                 batched_pos_map[cur_count: cur_count + len(cur_pos), : cur_pos.shape[1]] = cur_pos
60 |                 cur_count += len(cur_pos)
61 | 
62 |             assert cur_count == len(batched_pos_map)
63 |             # assert batched_pos_map.sum().item() == sum([v["positive_map"].sum().item() for v in batch[1]])
64 |             positive_map_eval = batched_pos_map.float()
65 | 
66 | 
67 |         return images, targets, img_ids, positive_map, positive_map_eval, greenlight_map
68 | 
69 | 
70 | class BBoxAugCollator(object):
71 |     """
72 |     From a list of samples from the dataset,
73 |     returns the images and targets.
74 |     Images should be converted to batched images in `im_detect_bbox_aug`
75 |     """
76 | 
77 |     def __call__(self, batch):
78 |         # return list(zip(*batch))
79 |         transposed_batch = list(zip(*batch))
80 | 
81 |         images = transposed_batch[0]
82 |         targets = transposed_batch[1]
83 |         img_ids = transposed_batch[2]
84 |         positive_map = None
85 |         positive_map_eval = None
86 | 
87 |         if isinstance(targets[0], dict):
88 |             return images, targets, img_ids, positive_map, positive_map_eval
89 | 
90 |         return images, targets, img_ids, positive_map, positive_map_eval
91 | 
92 | 
93 | 
94 | 


--------------------------------------------------------------------------------
/configs/odinw_13/pothole.yaml:
--------------------------------------------------------------------------------
  1 | DATALOADER:
  2 |   ASPECT_RATIO_GROUPING: false
  3 |   SIZE_DIVISIBILITY: 32
  4 | DATASETS:
  5 |   GENERAL_COPY: 16
  6 |   OVERRIDE_CATEGORY: '[{"id": 1, "name": "pothole", "supercategory": "potholes"}]'
  7 |   PREDEFINED_TEXT: odinw/pothole/category_description.json
  8 |   REGISTER:
  9 |     test:
 10 |       ann_file: odinw/pothole/test/annotations_without_background.json
 11 |       img_dir: odinw/pothole/test
 12 |     train:
 13 |       ann_file: odinw/pothole/train/annotations_without_background.json
 14 |       img_dir: odinw/pothole/train
 15 |     train_10_3:
 16 |       ann_file: odinw/pothole/train/fewshot_train_shot10_seed3.json
 17 |       img_dir: odinw/pothole/train
 18 |     train_10_30:
 19 |       ann_file: odinw/pothole/train/fewshot_train_shot10_seed30.json
 20 |       img_dir: odinw/pothole/train
 21 |     train_10_300:
 22 |       ann_file: odinw/pothole/train/fewshot_train_shot10_seed300.json
 23 |       img_dir: odinw/pothole/train
 24 |     train_1_3:
 25 |       ann_file: odinw/pothole/train/fewshot_train_shot1_seed3.json
 26 |       img_dir: odinw/pothole/train
 27 |     train_1_30:
 28 |       ann_file: odinw/pothole/train/fewshot_train_shot1_seed30.json
 29 |       img_dir: odinw/pothole/train
 30 |     train_1_300:
 31 |       ann_file: odinw/pothole/train/fewshot_train_shot1_seed300.json
 32 |       img_dir: odinw/pothole/train
 33 |     train_3_3:
 34 |       ann_file: odinw/pothole/train/fewshot_train_shot3_seed3.json
 35 |       img_dir: odinw/pothole/train
 36 |     train_3_30:
 37 |       ann_file: odinw/pothole/train/fewshot_train_shot3_seed30.json
 38 |       img_dir: odinw/pothole/train
 39 |     train_3_300:
 40 |       ann_file: odinw/pothole/train/fewshot_train_shot3_seed300.json
 41 |       img_dir: odinw/pothole/train
 42 |     train_5_3:
 43 |       ann_file: odinw/pothole/train/fewshot_train_shot5_seed3.json
 44 |       img_dir: odinw/pothole/train
 45 |     train_5_30:
 46 |       ann_file: odinw/pothole/train/fewshot_train_shot5_seed30.json
 47 |       img_dir: odinw/pothole/train
 48 |     train_5_300:
 49 |       ann_file: odinw/pothole/train/fewshot_train_shot5_seed300.json
 50 |       img_dir: odinw/pothole/train
 51 |     val:
 52 |       ann_file: odinw/pothole/valid/annotations_without_background.json
 53 |       img_dir: odinw/pothole/valid
 54 |     val_10_3:
 55 |       ann_file: odinw/pothole/valid/fewshot_val_shot10_seed3.json
 56 |       img_dir: odinw/pothole/valid
 57 |     val_10_30:
 58 |       ann_file: odinw/pothole/valid/fewshot_val_shot10_seed30.json
 59 |       img_dir: odinw/pothole/valid
 60 |     val_10_300:
 61 |       ann_file: odinw/pothole/valid/fewshot_val_shot10_seed300.json
 62 |       img_dir: odinw/pothole/valid
 63 |     val_1_3:
 64 |       ann_file: odinw/pothole/valid/fewshot_val_shot1_seed3.json
 65 |       img_dir: odinw/pothole/valid
 66 |     val_1_30:
 67 |       ann_file: odinw/pothole/valid/fewshot_val_shot1_seed30.json
 68 |       img_dir: odinw/pothole/valid
 69 |     val_1_300:
 70 |       ann_file: odinw/pothole/valid/fewshot_val_shot1_seed300.json
 71 |       img_dir: odinw/pothole/valid
 72 |     val_3_3:
 73 |       ann_file: odinw/pothole/valid/fewshot_val_shot3_seed3.json
 74 |       img_dir: odinw/pothole/valid
 75 |     val_3_30:
 76 |       ann_file: odinw/pothole/valid/fewshot_val_shot3_seed30.json
 77 |       img_dir: odinw/pothole/valid
 78 |     val_3_300:
 79 |       ann_file: odinw/pothole/valid/fewshot_val_shot3_seed300.json
 80 |       img_dir: odinw/pothole/valid
 81 |     val_5_3:
 82 |       ann_file: odinw/pothole/valid/fewshot_val_shot5_seed3.json
 83 |       img_dir: odinw/pothole/valid
 84 |     val_5_30:
 85 |       ann_file: odinw/pothole/valid/fewshot_val_shot5_seed30.json
 86 |       img_dir: odinw/pothole/valid
 87 |     val_5_300:
 88 |       ann_file: odinw/pothole/valid/fewshot_val_shot5_seed300.json
 89 |       img_dir: odinw/pothole/valid
 90 |   TEST: ("val",)
 91 |   TRAIN: ("train",)
 92 | INPUT:
 93 |   MAX_SIZE_TEST: 1333
 94 |   MAX_SIZE_TRAIN: 1333
 95 |   MIN_SIZE_TEST: 800
 96 |   MIN_SIZE_TRAIN: 800
 97 | MODEL:
 98 |   ATSS:
 99 |     NUM_CLASSES: 2
100 |   DYHEAD:
101 |     NUM_CLASSES: 2
102 |   FCOS:
103 |     NUM_CLASSES: 2
104 |   ROI_BOX_HEAD:
105 |     NUM_CLASSES: 2
106 | SOLVER:
107 |   CHECKPOINT_PERIOD: 100
108 |   MAX_EPOCH: 12
109 |   WARMUP_ITERS: 0
110 | TEST:
111 |   IMS_PER_BATCH: 8
112 | 


--------------------------------------------------------------------------------
/configs/odinw_35/ThermalCheetah.yaml:
--------------------------------------------------------------------------------
 1 | DATALOADER: {ASPECT_RATIO_GROUPING: false, SIZE_DIVISIBILITY: 32}
 2 | DATASETS:
 3 |   GENERAL_COPY: 16
 4 |   OVERRIDE_CATEGORY: '[{"id": 1, "name": "cheetah", "supercategory": "cheetah"}, {"id":
 5 |     2, "name": "human", "supercategory": "cheetah"}]'
 6 |   PREDEFINED_TEXT: odinw/pothole/category_description.json
 7 |   REGISTER:
 8 |     test: {ann_file: odinw/ThermalCheetah/test/annotations_without_background.json,
 9 |       img_dir: odinw/ThermalCheetah/test}
10 |     train: {ann_file: odinw/ThermalCheetah/train/annotations_without_background.json,
11 |       img_dir: odinw/ThermalCheetah/train}
12 |     train_10_3: {ann_file: odinw/ThermalCheetah/train/fewshot_train_shot10_seed3.json,
13 |       img_dir: odinw/ThermalCheetah/train}
14 |     train_10_30: {ann_file: odinw/ThermalCheetah/train/fewshot_train_shot10_seed30.json,
15 |       img_dir: odinw/ThermalCheetah/train}
16 |     train_10_300: {ann_file: odinw/ThermalCheetah/train/fewshot_train_shot10_seed300.json,
17 |       img_dir: odinw/ThermalCheetah/train}
18 |     train_1_3: {ann_file: odinw/ThermalCheetah/train/fewshot_train_shot1_seed3.json,
19 |       img_dir: odinw/ThermalCheetah/train}
20 |     train_1_30: {ann_file: odinw/ThermalCheetah/train/fewshot_train_shot1_seed30.json,
21 |       img_dir: odinw/ThermalCheetah/train}
22 |     train_1_300: {ann_file: odinw/ThermalCheetah/train/fewshot_train_shot1_seed300.json,
23 |       img_dir: odinw/ThermalCheetah/train}
24 |     train_3_3: {ann_file: odinw/ThermalCheetah/train/fewshot_train_shot3_seed3.json,
25 |       img_dir: odinw/ThermalCheetah/train}
26 |     train_3_30: {ann_file: odinw/ThermalCheetah/train/fewshot_train_shot3_seed30.json,
27 |       img_dir: odinw/ThermalCheetah/train}
28 |     train_3_300: {ann_file: odinw/ThermalCheetah/train/fewshot_train_shot3_seed300.json,
29 |       img_dir: odinw/ThermalCheetah/train}
30 |     train_5_3: {ann_file: odinw/ThermalCheetah/train/fewshot_train_shot5_seed3.json,
31 |       img_dir: odinw/ThermalCheetah/train}
32 |     train_5_30: {ann_file: odinw/ThermalCheetah/train/fewshot_train_shot5_seed30.json,
33 |       img_dir: odinw/ThermalCheetah/train}
34 |     train_5_300: {ann_file: odinw/ThermalCheetah/train/fewshot_train_shot5_seed300.json,
35 |       img_dir: odinw/ThermalCheetah/train}
36 |     val: {ann_file: odinw/ThermalCheetah/valid/annotations_without_background.json,
37 |       img_dir: odinw/ThermalCheetah/valid}
38 |     val_10_3: {ann_file: odinw/ThermalCheetah/valid/fewshot_val_shot10_seed3.json,
39 |       img_dir: odinw/ThermalCheetah/valid}
40 |     val_10_30: {ann_file: odinw/ThermalCheetah/valid/fewshot_val_shot10_seed30.json,
41 |       img_dir: odinw/ThermalCheetah/valid}
42 |     val_10_300: {ann_file: odinw/ThermalCheetah/valid/fewshot_val_shot10_seed300.json,
43 |       img_dir: odinw/ThermalCheetah/valid}
44 |     val_1_3: {ann_file: odinw/ThermalCheetah/valid/fewshot_val_shot1_seed3.json, img_dir: odinw/ThermalCheetah/valid}
45 |     val_1_30: {ann_file: odinw/ThermalCheetah/valid/fewshot_val_shot1_seed30.json,
46 |       img_dir: odinw/ThermalCheetah/valid}
47 |     val_1_300: {ann_file: odinw/ThermalCheetah/valid/fewshot_val_shot1_seed300.json,
48 |       img_dir: odinw/ThermalCheetah/valid}
49 |     val_3_3: {ann_file: odinw/ThermalCheetah/valid/fewshot_val_shot3_seed3.json, img_dir: odinw/ThermalCheetah/valid}
50 |     val_3_30: {ann_file: odinw/ThermalCheetah/valid/fewshot_val_shot3_seed30.json,
51 |       img_dir: odinw/ThermalCheetah/valid}
52 |     val_3_300: {ann_file: odinw/ThermalCheetah/valid/fewshot_val_shot3_seed300.json,
53 |       img_dir: odinw/ThermalCheetah/valid}
54 |     val_5_3: {ann_file: odinw/ThermalCheetah/valid/fewshot_val_shot5_seed3.json, img_dir: odinw/ThermalCheetah/valid}
55 |     val_5_30: {ann_file: odinw/ThermalCheetah/valid/fewshot_val_shot5_seed30.json,
56 |       img_dir: odinw/ThermalCheetah/valid}
57 |     val_5_300: {ann_file: odinw/ThermalCheetah/valid/fewshot_val_shot5_seed300.json,
58 |       img_dir: odinw/ThermalCheetah/valid}
59 |   TEST: ("val",)
60 |   TRAIN: ("train",)
61 | INPUT: {MAX_SIZE_TEST: 1333, MAX_SIZE_TRAIN: 1333, MIN_SIZE_TEST: 800, MIN_SIZE_TRAIN: 800}
62 | MODEL:
63 |   ATSS: {NUM_CLASSES: 3}
64 |   DYHEAD: {NUM_CLASSES: 3}
65 |   FCOS: {NUM_CLASSES: 3}
66 |   ROI_BOX_HEAD: {NUM_CLASSES: 3}
67 | SOLVER: {CHECKPOINT_PERIOD: 100, MAX_EPOCH: 12, WARMUP_ITERS: 0}
68 | TEST: {IMS_PER_BATCH: 8}
69 | 


--------------------------------------------------------------------------------
/maskrcnn_benchmark/modeling/roi_heads/keypoint_head/roi_keypoint_feature_extractors.py:
--------------------------------------------------------------------------------
 1 | from torch import nn
 2 | from torch.nn import functional as F
 3 | 
 4 | from maskrcnn_benchmark.modeling.poolers import Pooler
 5 | 
 6 | from maskrcnn_benchmark.layers import Conv2d
 7 | from maskrcnn_benchmark.layers import ConvTranspose2d
 8 | 
 9 | 
10 | class KeypointRCNNFeatureExtractor(nn.Module):
11 |     def __init__(self, cfg):
12 |         super(KeypointRCNNFeatureExtractor, self).__init__()
13 | 
14 |         resolution = cfg.MODEL.ROI_KEYPOINT_HEAD.POOLER_RESOLUTION
15 |         scales = cfg.MODEL.ROI_KEYPOINT_HEAD.POOLER_SCALES
16 |         sampling_ratio = cfg.MODEL.ROI_KEYPOINT_HEAD.POOLER_SAMPLING_RATIO
17 |         pooler = Pooler(
18 |             output_size=(resolution, resolution),
19 |             scales=scales,
20 |             sampling_ratio=sampling_ratio,
21 |         )
22 |         self.pooler = pooler
23 | 
24 |         input_features = cfg.MODEL.BACKBONE.OUT_CHANNELS
25 |         layers = cfg.MODEL.ROI_KEYPOINT_HEAD.CONV_LAYERS
26 |         next_feature = input_features
27 |         self.blocks = []
28 |         for layer_idx, layer_features in enumerate(layers, 1):
29 |             layer_name = "conv_fcn{}".format(layer_idx)
30 |             module = Conv2d(next_feature, layer_features, 3, stride=1, padding=1)
31 |             nn.init.kaiming_normal_(module.weight, mode="fan_out", nonlinearity="relu")
32 |             nn.init.constant_(module.bias, 0)
33 |             self.add_module(layer_name, module)
34 |             next_feature = layer_features
35 |             self.blocks.append(layer_name)
36 | 
37 |     def forward(self, x, proposals):
38 |         x = self.pooler(x, proposals)
39 |         for layer_name in self.blocks:
40 |             x = F.relu(getattr(self, layer_name)(x))
41 |         return x
42 | 
43 | class KeypointRCNNFeature2XZoomExtractor(nn.Module):
44 |     def __init__(self, cfg):
45 |         super(KeypointRCNNFeature2XZoomExtractor, self).__init__()
46 | 
47 |         resolution = cfg.MODEL.ROI_KEYPOINT_HEAD.POOLER_RESOLUTION
48 |         scales = cfg.MODEL.ROI_KEYPOINT_HEAD.POOLER_SCALES
49 |         sampling_ratio = cfg.MODEL.ROI_KEYPOINT_HEAD.POOLER_SAMPLING_RATIO
50 |         pooler = Pooler(
51 |             output_size=(resolution, resolution),
52 |             scales=scales,
53 |             sampling_ratio=sampling_ratio,
54 |         )
55 |         self.pooler = pooler
56 | 
57 |         input_features = cfg.MODEL.BACKBONE.OUT_CHANNELS
58 |         layers = cfg.MODEL.ROI_KEYPOINT_HEAD.CONV_LAYERS
59 |         next_feature = input_features
60 |         self.blocks = []
61 |         for layer_idx, layer_features in enumerate(layers, 1):
62 |             layer_name = "conv_fcn{}".format(layer_idx)
63 |             module = Conv2d(next_feature, layer_features, 3, stride=1, padding=1)
64 |             nn.init.kaiming_normal_(module.weight, mode="fan_out", nonlinearity="relu")
65 |             nn.init.constant_(module.bias, 0)
66 |             self.add_module(layer_name, module)
67 |             if layer_idx==len(layers)//2:
68 |                 deconv_kernel = 4
69 |                 kps_upsacle = ConvTranspose2d(layer_features, layer_features, deconv_kernel,
70 |                                               stride=2, padding=deconv_kernel//2-1)
71 |                 nn.init.kaiming_normal_(kps_upsacle.weight, mode="fan_out", nonlinearity="relu")
72 |                 nn.init.constant_(kps_upsacle.bias, 0)
73 |                 self.add_module("conv_fcn_upscale", kps_upsacle)
74 |                 self.blocks.append("conv_fcn_upscale")
75 | 
76 |             next_feature = layer_features
77 |             self.blocks.append(layer_name)
78 | 
79 |     def forward(self, x, proposals):
80 |         x = self.pooler(x, proposals)
81 |         for layer_name in self.blocks:
82 |             x = F.relu(getattr(self, layer_name)(x))
83 |         return x
84 | 
85 | 
86 | _ROI_KEYPOINT_FEATURE_EXTRACTORS = {
87 |     "KeypointRCNNFeatureExtractor": KeypointRCNNFeatureExtractor,
88 |     "KeypointRCNNFeature2XZoomExtractor": KeypointRCNNFeature2XZoomExtractor
89 | }
90 | 
91 | 
92 | def make_roi_keypoint_feature_extractor(cfg):
93 |     func = _ROI_KEYPOINT_FEATURE_EXTRACTORS[
94 |         cfg.MODEL.ROI_KEYPOINT_HEAD.FEATURE_EXTRACTOR
95 |     ]
96 |     return func(cfg)


--------------------------------------------------------------------------------
/configs/odinw_13/pistols_export.yaml:
--------------------------------------------------------------------------------
  1 | DATALOADER:
  2 |   ASPECT_RATIO_GROUPING: false
  3 |   SIZE_DIVISIBILITY: 32
  4 | DATASETS:
  5 |   GENERAL_COPY: 16
  6 |   OVERRIDE_CATEGORY: '[{"id": 1, "name": "pistol", "supercategory": "Guns"}]'
  7 |   PREDEFINED_TEXT: odinw/pothole/category_description.json
  8 |   REGISTER:
  9 |     test:
 10 |       ann_file: odinw/pistols/export/test_annotations_without_background.json
 11 |       img_dir: odinw/pistols/export
 12 |     train:
 13 |       ann_file: odinw/pistols/export/train_annotations_without_background.json
 14 |       img_dir: odinw/pistols/export
 15 |     train_10_3:
 16 |       ann_file: odinw/pistols/export/fewshot_train_shot10_seed3.json
 17 |       img_dir: odinw/pistols/export
 18 |     train_10_30:
 19 |       ann_file: odinw/pistols/export/fewshot_train_shot10_seed30.json
 20 |       img_dir: odinw/pistols/export
 21 |     train_10_300:
 22 |       ann_file: odinw/pistols/export/fewshot_train_shot10_seed300.json
 23 |       img_dir: odinw/pistols/export
 24 |     train_1_3:
 25 |       ann_file: odinw/pistols/export/fewshot_train_shot1_seed3.json
 26 |       img_dir: odinw/pistols/export
 27 |     train_1_30:
 28 |       ann_file: odinw/pistols/export/fewshot_train_shot1_seed30.json
 29 |       img_dir: odinw/pistols/export
 30 |     train_1_300:
 31 |       ann_file: odinw/pistols/export/fewshot_train_shot1_seed300.json
 32 |       img_dir: odinw/pistols/export
 33 |     train_3_3:
 34 |       ann_file: odinw/pistols/export/fewshot_train_shot3_seed3.json
 35 |       img_dir: odinw/pistols/export
 36 |     train_3_30:
 37 |       ann_file: odinw/pistols/export/fewshot_train_shot3_seed30.json
 38 |       img_dir: odinw/pistols/export
 39 |     train_3_300:
 40 |       ann_file: odinw/pistols/export/fewshot_train_shot3_seed300.json
 41 |       img_dir: odinw/pistols/export
 42 |     train_5_3:
 43 |       ann_file: odinw/pistols/export/fewshot_train_shot5_seed3.json
 44 |       img_dir: odinw/pistols/export
 45 |     train_5_30:
 46 |       ann_file: odinw/pistols/export/fewshot_train_shot5_seed30.json
 47 |       img_dir: odinw/pistols/export
 48 |     train_5_300:
 49 |       ann_file: odinw/pistols/export/fewshot_train_shot5_seed300.json
 50 |       img_dir: odinw/pistols/export
 51 |     val:
 52 |       ann_file: odinw/pistols/export/val_annotations_without_background.json
 53 |       img_dir: odinw/pistols/export
 54 |     val_10_3:
 55 |       ann_file: odinw/pistols/export/fewshot_val_shot10_seed3.json
 56 |       img_dir: odinw/pistols/export
 57 |     val_10_30:
 58 |       ann_file: odinw/pistols/export/fewshot_val_shot10_seed30.json
 59 |       img_dir: odinw/pistols/export
 60 |     val_10_300:
 61 |       ann_file: odinw/pistols/export/fewshot_val_shot10_seed300.json
 62 |       img_dir: odinw/pistols/export
 63 |     val_1_3:
 64 |       ann_file: odinw/pistols/export/fewshot_val_shot1_seed3.json
 65 |       img_dir: odinw/pistols/export
 66 |     val_1_30:
 67 |       ann_file: odinw/pistols/export/fewshot_val_shot1_seed30.json
 68 |       img_dir: odinw/pistols/export
 69 |     val_1_300:
 70 |       ann_file: odinw/pistols/export/fewshot_val_shot1_seed300.json
 71 |       img_dir: odinw/pistols/export
 72 |     val_3_3:
 73 |       ann_file: odinw/pistols/export/fewshot_val_shot3_seed3.json
 74 |       img_dir: odinw/pistols/export
 75 |     val_3_30:
 76 |       ann_file: odinw/pistols/export/fewshot_val_shot3_seed30.json
 77 |       img_dir: odinw/pistols/export
 78 |     val_3_300:
 79 |       ann_file: odinw/pistols/export/fewshot_val_shot3_seed300.json
 80 |       img_dir: odinw/pistols/export
 81 |     val_5_3:
 82 |       ann_file: odinw/pistols/export/fewshot_val_shot5_seed3.json
 83 |       img_dir: odinw/pistols/export
 84 |     val_5_30:
 85 |       ann_file: odinw/pistols/export/fewshot_val_shot5_seed30.json
 86 |       img_dir: odinw/pistols/export
 87 |     val_5_300:
 88 |       ann_file: odinw/pistols/export/fewshot_val_shot5_seed300.json
 89 |       img_dir: odinw/pistols/export
 90 |   TEST: ("val",)
 91 |   TRAIN: ("train",)
 92 | INPUT:
 93 |   MAX_SIZE_TEST: 1333
 94 |   MAX_SIZE_TRAIN: 1333
 95 |   MIN_SIZE_TEST: 800
 96 |   MIN_SIZE_TRAIN: 800
 97 | MODEL:
 98 |   ATSS:
 99 |     NUM_CLASSES: 297
100 |   DYHEAD:
101 |     NUM_CLASSES: 297
102 |   FCOS:
103 |     NUM_CLASSES: 297
104 |   ROI_BOX_HEAD:
105 |     NUM_CLASSES: 297
106 | SOLVER:
107 |   CHECKPOINT_PERIOD: 100
108 |   MAX_EPOCH: 12
109 |   WARMUP_ITERS: 0
110 | TEST:
111 |   IMS_PER_BATCH: 8
112 | 


--------------------------------------------------------------------------------
/configs/odinw_35/MaskWearing_raw.yaml:
--------------------------------------------------------------------------------
 1 | DATALOADER: {ASPECT_RATIO_GROUPING: false, SIZE_DIVISIBILITY: 32}
 2 | DATASETS:
 3 |   GENERAL_COPY: 16
 4 |   OVERRIDE_CATEGORY: '[{"id": 1, "name": "mask", "supercategory": "People"}, {"id":
 5 |     2, "name": "no-mask", "supercategory": "People"}]'
 6 |   PREDEFINED_TEXT: odinw/pothole/category_description.json
 7 |   REGISTER:
 8 |     test: {ann_file: odinw/MaskWearing/raw/test/annotations_without_background.json,
 9 |       img_dir: odinw/MaskWearing/raw/test}
10 |     train: {ann_file: odinw/MaskWearing/raw/train/annotations_without_background.json,
11 |       img_dir: odinw/MaskWearing/raw/train}
12 |     train_10_3: {ann_file: odinw/MaskWearing/raw/train/fewshot_train_shot10_seed3.json,
13 |       img_dir: odinw/MaskWearing/raw/train}
14 |     train_10_30: {ann_file: odinw/MaskWearing/raw/train/fewshot_train_shot10_seed30.json,
15 |       img_dir: odinw/MaskWearing/raw/train}
16 |     train_10_300: {ann_file: odinw/MaskWearing/raw/train/fewshot_train_shot10_seed300.json,
17 |       img_dir: odinw/MaskWearing/raw/train}
18 |     train_1_3: {ann_file: odinw/MaskWearing/raw/train/fewshot_train_shot1_seed3.json,
19 |       img_dir: odinw/MaskWearing/raw/train}
20 |     train_1_30: {ann_file: odinw/MaskWearing/raw/train/fewshot_train_shot1_seed30.json,
21 |       img_dir: odinw/MaskWearing/raw/train}
22 |     train_1_300: {ann_file: odinw/MaskWearing/raw/train/fewshot_train_shot1_seed300.json,
23 |       img_dir: odinw/MaskWearing/raw/train}
24 |     train_3_3: {ann_file: odinw/MaskWearing/raw/train/fewshot_train_shot3_seed3.json,
25 |       img_dir: odinw/MaskWearing/raw/train}
26 |     train_3_30: {ann_file: odinw/MaskWearing/raw/train/fewshot_train_shot3_seed30.json,
27 |       img_dir: odinw/MaskWearing/raw/train}
28 |     train_3_300: {ann_file: odinw/MaskWearing/raw/train/fewshot_train_shot3_seed300.json,
29 |       img_dir: odinw/MaskWearing/raw/train}
30 |     train_5_3: {ann_file: odinw/MaskWearing/raw/train/fewshot_train_shot5_seed3.json,
31 |       img_dir: odinw/MaskWearing/raw/train}
32 |     train_5_30: {ann_file: odinw/MaskWearing/raw/train/fewshot_train_shot5_seed30.json,
33 |       img_dir: odinw/MaskWearing/raw/train}
34 |     train_5_300: {ann_file: odinw/MaskWearing/raw/train/fewshot_train_shot5_seed300.json,
35 |       img_dir: odinw/MaskWearing/raw/train}
36 |     val: {ann_file: odinw/MaskWearing/raw/valid/annotations_without_background.json,
37 |       img_dir: odinw/MaskWearing/raw/valid}
38 |     val_10_3: {ann_file: odinw/MaskWearing/raw/valid/fewshot_val_shot10_seed3.json,
39 |       img_dir: odinw/MaskWearing/raw/valid}
40 |     val_10_30: {ann_file: odinw/MaskWearing/raw/valid/fewshot_val_shot10_seed30.json,
41 |       img_dir: odinw/MaskWearing/raw/valid}
42 |     val_10_300: {ann_file: odinw/MaskWearing/raw/valid/fewshot_val_shot10_seed300.json,
43 |       img_dir: odinw/MaskWearing/raw/valid}
44 |     val_1_3: {ann_file: odinw/MaskWearing/raw/valid/fewshot_val_shot1_seed3.json,
45 |       img_dir: odinw/MaskWearing/raw/valid}
46 |     val_1_30: {ann_file: odinw/MaskWearing/raw/valid/fewshot_val_shot1_seed30.json,
47 |       img_dir: odinw/MaskWearing/raw/valid}
48 |     val_1_300: {ann_file: odinw/MaskWearing/raw/valid/fewshot_val_shot1_seed300.json,
49 |       img_dir: odinw/MaskWearing/raw/valid}
50 |     val_3_3: {ann_file: odinw/MaskWearing/raw/valid/fewshot_val_shot3_seed3.json,
51 |       img_dir: odinw/MaskWearing/raw/valid}
52 |     val_3_30: {ann_file: odinw/MaskWearing/raw/valid/fewshot_val_shot3_seed30.json,
53 |       img_dir: odinw/MaskWearing/raw/valid}
54 |     val_3_300: {ann_file: odinw/MaskWearing/raw/valid/fewshot_val_shot3_seed300.json,
55 |       img_dir: odinw/MaskWearing/raw/valid}
56 |     val_5_3: {ann_file: odinw/MaskWearing/raw/valid/fewshot_val_shot5_seed3.json,
57 |       img_dir: odinw/MaskWearing/raw/valid}
58 |     val_5_30: {ann_file: odinw/MaskWearing/raw/valid/fewshot_val_shot5_seed30.json,
59 |       img_dir: odinw/MaskWearing/raw/valid}
60 |     val_5_300: {ann_file: odinw/MaskWearing/raw/valid/fewshot_val_shot5_seed300.json,
61 |       img_dir: odinw/MaskWearing/raw/valid}
62 |   TEST: ("val",)
63 |   TRAIN: ("train",)
64 | INPUT: {MAX_SIZE_TEST: 1333, MAX_SIZE_TRAIN: 1333, MIN_SIZE_TEST: 800, MIN_SIZE_TRAIN: 800}
65 | MODEL:
66 |   ATSS: {NUM_CLASSES: 3}
67 |   DYHEAD: {NUM_CLASSES: 3}
68 |   FCOS: {NUM_CLASSES: 3}
69 |   ROI_BOX_HEAD: {NUM_CLASSES: 3}
70 | SOLVER: {CHECKPOINT_PERIOD: 100, MAX_EPOCH: 12, WARMUP_ITERS: 0}
71 | TEST: {IMS_PER_BATCH: 8}
72 | 


--------------------------------------------------------------------------------
/configs/odinw_35/CottontailRabbits.yaml:
--------------------------------------------------------------------------------
 1 | DATALOADER: {ASPECT_RATIO_GROUPING: false, SIZE_DIVISIBILITY: 32}
 2 | DATASETS:
 3 |   GENERAL_COPY: 16
 4 |   OVERRIDE_CATEGORY: '[{"id": 1, "name": "rabbit", "supercategory": "Cottontail-Rabbit"}]'
 5 |   PREDEFINED_TEXT: odinw/pothole/category_description.json
 6 |   REGISTER:
 7 |     test: {ann_file: odinw/CottontailRabbits/test/annotations_without_background.json,
 8 |       img_dir: odinw/CottontailRabbits/test}
 9 |     train: {ann_file: odinw/CottontailRabbits/train/annotations_without_background.json,
10 |       img_dir: odinw/CottontailRabbits/train}
11 |     train_10_3: {ann_file: odinw/CottontailRabbits/train/fewshot_train_shot10_seed3.json,
12 |       img_dir: odinw/CottontailRabbits/train}
13 |     train_10_30: {ann_file: odinw/CottontailRabbits/train/fewshot_train_shot10_seed30.json,
14 |       img_dir: odinw/CottontailRabbits/train}
15 |     train_10_300: {ann_file: odinw/CottontailRabbits/train/fewshot_train_shot10_seed300.json,
16 |       img_dir: odinw/CottontailRabbits/train}
17 |     train_1_3: {ann_file: odinw/CottontailRabbits/train/fewshot_train_shot1_seed3.json,
18 |       img_dir: odinw/CottontailRabbits/train}
19 |     train_1_30: {ann_file: odinw/CottontailRabbits/train/fewshot_train_shot1_seed30.json,
20 |       img_dir: odinw/CottontailRabbits/train}
21 |     train_1_300: {ann_file: odinw/CottontailRabbits/train/fewshot_train_shot1_seed300.json,
22 |       img_dir: odinw/CottontailRabbits/train}
23 |     train_3_3: {ann_file: odinw/CottontailRabbits/train/fewshot_train_shot3_seed3.json,
24 |       img_dir: odinw/CottontailRabbits/train}
25 |     train_3_30: {ann_file: odinw/CottontailRabbits/train/fewshot_train_shot3_seed30.json,
26 |       img_dir: odinw/CottontailRabbits/train}
27 |     train_3_300: {ann_file: odinw/CottontailRabbits/train/fewshot_train_shot3_seed300.json,
28 |       img_dir: odinw/CottontailRabbits/train}
29 |     train_5_3: {ann_file: odinw/CottontailRabbits/train/fewshot_train_shot5_seed3.json,
30 |       img_dir: odinw/CottontailRabbits/train}
31 |     train_5_30: {ann_file: odinw/CottontailRabbits/train/fewshot_train_shot5_seed30.json,
32 |       img_dir: odinw/CottontailRabbits/train}
33 |     train_5_300: {ann_file: odinw/CottontailRabbits/train/fewshot_train_shot5_seed300.json,
34 |       img_dir: odinw/CottontailRabbits/train}
35 |     val: {ann_file: odinw/CottontailRabbits/valid/annotations_without_background.json,
36 |       img_dir: odinw/CottontailRabbits/valid}
37 |     val_10_3: {ann_file: odinw/CottontailRabbits/valid/fewshot_val_shot10_seed3.json,
38 |       img_dir: odinw/CottontailRabbits/valid}
39 |     val_10_30: {ann_file: odinw/CottontailRabbits/valid/fewshot_val_shot10_seed30.json,
40 |       img_dir: odinw/CottontailRabbits/valid}
41 |     val_10_300: {ann_file: odinw/CottontailRabbits/valid/fewshot_val_shot10_seed300.json,
42 |       img_dir: odinw/CottontailRabbits/valid}
43 |     val_1_3: {ann_file: odinw/CottontailRabbits/valid/fewshot_val_shot1_seed3.json,
44 |       img_dir: odinw/CottontailRabbits/valid}
45 |     val_1_30: {ann_file: odinw/CottontailRabbits/valid/fewshot_val_shot1_seed30.json,
46 |       img_dir: odinw/CottontailRabbits/valid}
47 |     val_1_300: {ann_file: odinw/CottontailRabbits/valid/fewshot_val_shot1_seed300.json,
48 |       img_dir: odinw/CottontailRabbits/valid}
49 |     val_3_3: {ann_file: odinw/CottontailRabbits/valid/fewshot_val_shot3_seed3.json,
50 |       img_dir: odinw/CottontailRabbits/valid}
51 |     val_3_30: {ann_file: odinw/CottontailRabbits/valid/fewshot_val_shot3_seed30.json,
52 |       img_dir: odinw/CottontailRabbits/valid}
53 |     val_3_300: {ann_file: odinw/CottontailRabbits/valid/fewshot_val_shot3_seed300.json,
54 |       img_dir: odinw/CottontailRabbits/valid}
55 |     val_5_3: {ann_file: odinw/CottontailRabbits/valid/fewshot_val_shot5_seed3.json,
56 |       img_dir: odinw/CottontailRabbits/valid}
57 |     val_5_30: {ann_file: odinw/CottontailRabbits/valid/fewshot_val_shot5_seed30.json,
58 |       img_dir: odinw/CottontailRabbits/valid}
59 |     val_5_300: {ann_file: odinw/CottontailRabbits/valid/fewshot_val_shot5_seed300.json,
60 |       img_dir: odinw/CottontailRabbits/valid}
61 |   TEST: ("val",)
62 |   TRAIN: ("train",)
63 | INPUT: {MAX_SIZE_TEST: 1333, MAX_SIZE_TRAIN: 1333, MIN_SIZE_TEST: 800, MIN_SIZE_TRAIN: 800}
64 | MODEL:
65 |   ATSS: {NUM_CLASSES: 2}
66 |   DYHEAD: {NUM_CLASSES: 2}
67 |   FCOS: {NUM_CLASSES: 2}
68 |   ROI_BOX_HEAD: {NUM_CLASSES: 2}
69 | SOLVER: {CHECKPOINT_PERIOD: 100, MAX_EPOCH: 12, WARMUP_ITERS: 0}
70 | TEST: {IMS_PER_BATCH: 8}
71 | 


--------------------------------------------------------------------------------