├── README.md ├── RUN ├── autoprompt │ ├── make_auto_hybrid.sh │ └── make_auto_mlm.sh └── polyp_test │ ├── zero_shot_hybrid.sh │ └── zero_shot_lama.sh ├── autoprompt_json ├── hybrid_clinicdb_path_prompt_top1.json ├── hybrid_clinicdb_path_prompt_top2.json ├── hybrid_clinicdb_path_prompt_top3.json ├── hybrid_colondb_path_prompt_top1.json ├── hybrid_colondb_path_prompt_top2.json ├── hybrid_colondb_path_prompt_top3.json ├── hybrid_cvc300_path_prompt_top1.json ├── hybrid_cvc300_path_prompt_top2.json ├── hybrid_cvc300_path_prompt_top3.json ├── hybrid_kvasir_path_prompt_top1.json ├── hybrid_kvasir_path_prompt_top2.json └── hybrid_kvasir_path_prompt_top3.json ├── configs └── glip_Swin_T_O365_GoldG_polyp_kvasir.yaml ├── figures ├── finetune_res.PNG ├── framework.PNG └── zeroshot_res.PNG ├── make_autopromptsv2.py ├── maskrcnn_benchmark ├── _C.cpython-38-x86_64-linux-gnu.so ├── __init__.py ├── __pycache__ │ └── __init__.cpython-38.pyc ├── config │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-38.pyc │ │ ├── defaults.cpython-38.pyc │ │ └── paths_catalog.cpython-38.pyc │ ├── defaults.py │ └── paths_catalog.py ├── csrc │ ├── ROIAlign.h │ ├── ROIPool.h │ ├── SigmoidFocalLoss.h │ ├── cpu │ │ ├── ROIAlign_cpu.cpp │ │ ├── nms_cpu.cpp │ │ ├── soft_nms.cpp │ │ └── vision.h │ ├── cuda │ │ ├── ROIAlign_cuda.cu │ │ ├── ROIPool_cuda.cu │ │ ├── SigmoidFocalLoss_cuda.cu │ │ ├── deform_conv_cuda.cu │ │ ├── deform_conv_kernel_cuda.cu │ │ ├── deform_pool_cuda.cu │ │ ├── deform_pool_kernel_cuda.cu │ │ ├── ml_nms.cu │ │ ├── nms.cu │ │ └── vision.h │ ├── deform_conv.h │ ├── deform_pool.h │ ├── ml_nms.h │ ├── nms.h │ └── vision.cpp ├── data │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-38.pyc │ │ ├── build.cpython-38.pyc │ │ └── collate_batch.cpython-38.pyc │ ├── build.py │ ├── collate_batch.py │ ├── datasets │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-38.pyc │ │ │ ├── background.cpython-38.pyc │ │ │ ├── box_label_loader.cpython-38.pyc │ │ │ ├── caption.cpython-38.pyc │ │ │ ├── coco.cpython-38.pyc │ │ │ ├── coco_dt.cpython-38.pyc │ │ │ ├── concat_dataset.cpython-38.pyc │ │ │ ├── duplicate_dataset.cpython-38.pyc │ │ │ ├── flickr.cpython-38.pyc │ │ │ ├── gqa.cpython-38.pyc │ │ │ ├── lvis.cpython-38.pyc │ │ │ ├── mixed.cpython-38.pyc │ │ │ ├── modulated_coco.cpython-38.pyc │ │ │ ├── od_to_grounding.cpython-38.pyc │ │ │ ├── phrasecut.cpython-38.pyc │ │ │ ├── pseudo_data.cpython-38.pyc │ │ │ ├── refexp.cpython-38.pyc │ │ │ ├── tsv.cpython-38.pyc │ │ │ └── voc.cpython-38.pyc │ │ ├── background.py │ │ ├── box_label_loader.py │ │ ├── caption.py │ │ ├── coco.py │ │ ├── coco_dt.py │ │ ├── concat_dataset.py │ │ ├── custom_distributed_sampler.py │ │ ├── duplicate_dataset.py │ │ ├── evaluation │ │ │ ├── __init__.py │ │ │ ├── __pycache__ │ │ │ │ ├── __init__.cpython-38.pyc │ │ │ │ └── box_aug.cpython-38.pyc │ │ │ ├── box_aug.py │ │ │ ├── coco │ │ │ │ ├── __init__.py │ │ │ │ ├── __pycache__ │ │ │ │ │ ├── __init__.cpython-38.pyc │ │ │ │ │ └── coco_eval.cpython-38.pyc │ │ │ │ └── coco_eval.py │ │ │ ├── flickr │ │ │ │ ├── __init__.py │ │ │ │ ├── __pycache__ │ │ │ │ │ ├── __init__.cpython-38.pyc │ │ │ │ │ └── flickr_eval.cpython-38.pyc │ │ │ │ └── flickr_eval.py │ │ │ ├── lvis │ │ │ │ ├── _change_lvis_annotation.py │ │ │ │ ├── lvis.py │ │ │ │ └── lvis_eval.py │ │ │ ├── od_eval.py │ │ │ ├── od_to_grounding │ │ │ │ ├── __init__.py │ │ │ │ ├── __pycache__ │ │ │ │ │ ├── __init__.cpython-38.pyc │ │ │ │ │ └── od_eval.cpython-38.pyc │ │ │ │ └── od_eval.py │ │ │ ├── vg │ │ │ │ ├── __init__.py │ │ │ │ ├── __pycache__ │ │ │ │ │ ├── __init__.cpython-38.pyc │ │ │ │ │ └── vg_eval.cpython-38.pyc │ │ │ │ └── vg_eval.py │ │ │ └── voc │ │ │ │ ├── __init__.py │ │ │ │ ├── __pycache__ │ │ │ │ ├── __init__.cpython-38.pyc │ │ │ │ └── voc_eval.cpython-38.pyc │ │ │ │ └── voc_eval.py │ │ ├── flickr.py │ │ ├── gqa.py │ │ ├── imagenet.py │ │ ├── list_dataset.py │ │ ├── lvis.py │ │ ├── mixed.py │ │ ├── mixup.py │ │ ├── modulated_coco.py │ │ ├── object365.py │ │ ├── od_to_grounding.py │ │ ├── phrasecut.py │ │ ├── pseudo_data.py │ │ ├── refexp.py │ │ ├── tsv.py │ │ ├── vg.py │ │ └── voc.py │ ├── samplers │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-38.pyc │ │ │ ├── distributed.cpython-38.pyc │ │ │ ├── grouped_batch_sampler.cpython-38.pyc │ │ │ └── iteration_based_batch_sampler.cpython-38.pyc │ │ ├── distributed.py │ │ ├── grouped_batch_sampler.py │ │ └── iteration_based_batch_sampler.py │ └── transforms │ │ ├── __init__.py │ │ ├── __pycache__ │ │ ├── __init__.cpython-38.pyc │ │ ├── build.cpython-38.pyc │ │ └── transforms.cpython-38.pyc │ │ ├── build.py │ │ └── transforms.py ├── engine │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-38.pyc │ │ ├── inference.cpython-38.pyc │ │ ├── inference_dfuc_vqa.cpython-38.pyc │ │ ├── inference_isbi_vqa.cpython-38.pyc │ │ └── inference_vqa.cpython-38.pyc │ ├── alter_trainer.py │ ├── evolution.py │ ├── inference.py │ ├── inference_dfuc_vqa.py │ ├── inference_isbi_vqa.py │ ├── inference_vqa.py │ ├── predictor.py │ ├── predictor_glip.py │ ├── singlepath_trainer.py │ ├── stage_trainer.py │ └── trainer.py ├── layers │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-38.pyc │ │ ├── batch_norm.cpython-38.pyc │ │ ├── deform_conv.cpython-38.pyc │ │ ├── dropblock.cpython-38.pyc │ │ ├── dyhead.cpython-38.pyc │ │ ├── dyrelu.cpython-38.pyc │ │ ├── evonorm.cpython-38.pyc │ │ ├── iou_loss.cpython-38.pyc │ │ ├── misc.cpython-38.pyc │ │ ├── nms.cpython-38.pyc │ │ ├── roi_align.cpython-38.pyc │ │ ├── roi_pool.cpython-38.pyc │ │ ├── se.cpython-38.pyc │ │ ├── set_loss.cpython-38.pyc │ │ ├── sigmoid_focal_loss.cpython-38.pyc │ │ └── smooth_l1_loss.cpython-38.pyc │ ├── batch_norm.py │ ├── deform_conv.py │ ├── deform_pool.py │ ├── dropblock.py │ ├── dyhead.py │ ├── dyrelu.py │ ├── evonorm.py │ ├── iou_loss.py │ ├── misc.py │ ├── nms.py │ ├── roi_align.py │ ├── roi_pool.py │ ├── se.py │ ├── set_loss.py │ ├── sigmoid_focal_loss.py │ └── smooth_l1_loss.py ├── modeling │ ├── .DS_Store │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-38.pyc │ │ ├── balanced_positive_negative_sampler.cpython-38.pyc │ │ ├── box_coder.cpython-38.pyc │ │ ├── make_layers.cpython-38.pyc │ │ ├── matcher.cpython-38.pyc │ │ ├── poolers.cpython-38.pyc │ │ ├── registry.cpython-38.pyc │ │ └── utils.cpython-38.pyc │ ├── backbone │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-38.pyc │ │ │ ├── bifpn.cpython-38.pyc │ │ │ ├── efficientdet.cpython-38.pyc │ │ │ ├── efficientnet.cpython-38.pyc │ │ │ ├── fbnet.cpython-38.pyc │ │ │ ├── fpn.cpython-38.pyc │ │ │ ├── resnet.cpython-38.pyc │ │ │ ├── swint.cpython-38.pyc │ │ │ ├── swint_v2.cpython-38.pyc │ │ │ ├── swint_v2_vl.cpython-38.pyc │ │ │ └── swint_vl.cpython-38.pyc │ │ ├── bifpn.py │ │ ├── blocks.py │ │ ├── efficientdet.py │ │ ├── efficientnet.py │ │ ├── fbnet.py │ │ ├── fpn.py │ │ ├── mixer.py │ │ ├── ops.py │ │ ├── resnet.py │ │ ├── swint.py │ │ ├── swint_v2.py │ │ ├── swint_v2_vl.py │ │ └── swint_vl.py │ ├── balanced_positive_negative_sampler.py │ ├── box_coder.py │ ├── detector │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-38.pyc │ │ │ ├── generalized_rcnn.cpython-38.pyc │ │ │ └── generalized_vl_rcnn.cpython-38.pyc │ │ ├── generalized_rcnn.py │ │ └── generalized_vl_rcnn.py │ ├── language_backbone │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-38.pyc │ │ │ ├── backbone.cpython-38.pyc │ │ │ ├── bert_model.cpython-38.pyc │ │ │ ├── build.cpython-38.pyc │ │ │ ├── clip_model.cpython-38.pyc │ │ │ ├── hfpt_tokenizer.cpython-38.pyc │ │ │ ├── rnn_model.cpython-38.pyc │ │ │ ├── simple_tokenizer.cpython-38.pyc │ │ │ └── word_utils.cpython-38.pyc │ │ ├── backbone.py │ │ ├── bert_model.py │ │ ├── bpe_simple_vocab_16e6.txt.gz │ │ ├── build.py │ │ ├── clip_model.py │ │ ├── hfpt_tokenizer.py │ │ ├── rnn_model.py │ │ ├── simple_tokenizer.py │ │ ├── test_clip_tokenizer.py │ │ └── word_utils.py │ ├── make_layers.py │ ├── matcher.py │ ├── poolers.py │ ├── registry.py │ ├── roi_heads │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ └── __init__.cpython-38.pyc │ │ ├── box_head │ │ │ ├── __init__.py │ │ │ ├── __pycache__ │ │ │ │ ├── __init__.cpython-38.pyc │ │ │ │ ├── box_head.cpython-38.pyc │ │ │ │ ├── inference.cpython-38.pyc │ │ │ │ ├── loss.cpython-38.pyc │ │ │ │ ├── roi_box_feature_extractors.cpython-38.pyc │ │ │ │ └── roi_box_predictors.cpython-38.pyc │ │ │ ├── box_head.py │ │ │ ├── inference.py │ │ │ ├── loss.py │ │ │ ├── roi_box_feature_extractors.py │ │ │ └── roi_box_predictors.py │ │ ├── keypoint_head │ │ │ ├── __pycache__ │ │ │ │ ├── inference.cpython-38.pyc │ │ │ │ ├── keypoint_head.cpython-38.pyc │ │ │ │ ├── loss.cpython-38.pyc │ │ │ │ ├── roi_keypoint_feature_extractors.cpython-38.pyc │ │ │ │ └── roi_keypoint_predictors.cpython-38.pyc │ │ │ ├── inference.py │ │ │ ├── keypoint_head.py │ │ │ ├── loss.py │ │ │ ├── roi_keypoint_feature_extractors.py │ │ │ └── roi_keypoint_predictors.py │ │ └── mask_head │ │ │ ├── __init__.py │ │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-38.pyc │ │ │ ├── hourglass.cpython-38.pyc │ │ │ ├── inference.cpython-38.pyc │ │ │ ├── loss.cpython-38.pyc │ │ │ ├── mask_head.cpython-38.pyc │ │ │ ├── roi_mask_feature_extractors.cpython-38.pyc │ │ │ └── roi_mask_predictors.cpython-38.pyc │ │ │ ├── hourglass.py │ │ │ ├── inference.py │ │ │ ├── loss.py │ │ │ ├── mask_head.py │ │ │ ├── roi_mask_feature_extractors.py │ │ │ └── roi_mask_predictors.py │ ├── rpn │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-38.pyc │ │ │ ├── anchor_generator.cpython-38.pyc │ │ │ ├── atss.cpython-38.pyc │ │ │ ├── dyhead.cpython-38.pyc │ │ │ ├── fcos.cpython-38.pyc │ │ │ ├── inference.cpython-38.pyc │ │ │ ├── loss.cpython-38.pyc │ │ │ ├── modeling_bert.cpython-38.pyc │ │ │ ├── retina.cpython-38.pyc │ │ │ ├── rpn.cpython-38.pyc │ │ │ └── vldyhead.cpython-38.pyc │ │ ├── anchor_generator.py │ │ ├── atss.py │ │ ├── dyhead.py │ │ ├── fcos.py │ │ ├── inference.py │ │ ├── loss.py │ │ ├── modeling_bert.py │ │ ├── retina.py │ │ ├── rpn.py │ │ ├── transformer.py │ │ └── vldyhead.py │ └── utils.py ├── solver │ ├── __init__.py │ ├── build.py │ └── lr_scheduler.py ├── structures │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-38.pyc │ │ ├── bounding_box.cpython-38.pyc │ │ ├── boxlist_ops.cpython-38.pyc │ │ ├── image_list.cpython-38.pyc │ │ ├── keypoint.cpython-38.pyc │ │ └── segmentation_mask.cpython-38.pyc │ ├── bounding_box.py │ ├── boxlist_ops.py │ ├── image_list.py │ ├── keypoint.py │ └── segmentation_mask.py └── utils │ ├── README.md │ ├── __init__.py │ ├── __pycache__ │ ├── __init__.cpython-38.pyc │ ├── amp.cpython-38.pyc │ ├── big_model_loading.cpython-38.pyc │ ├── c2_model_loading.cpython-38.pyc │ ├── checkpoint.cpython-38.pyc │ ├── collect_env.cpython-38.pyc │ ├── comm.cpython-38.pyc │ ├── dist.cpython-38.pyc │ ├── env.cpython-38.pyc │ ├── fuse_helper.cpython-38.pyc │ ├── imports.cpython-38.pyc │ ├── logger.cpython-38.pyc │ ├── mdetr_dist.cpython-38.pyc │ ├── miscellaneous.cpython-38.pyc │ ├── model_serialization.cpython-38.pyc │ ├── model_zoo.cpython-38.pyc │ ├── pretrain_model_loading.cpython-38.pyc │ ├── registry.cpython-38.pyc │ ├── shallow_contrastive_loss_helper.cpython-38.pyc │ └── stats.cpython-38.pyc │ ├── amp.py │ ├── big_model_loading.py │ ├── c2_model_loading.py │ ├── checkpoint.py │ ├── collect_env.py │ ├── comm.py │ ├── cv2_util.py │ ├── dist.py │ ├── ema.py │ ├── env.py │ ├── flops.py │ ├── fuse_helper.py │ ├── imports.py │ ├── logger.py │ ├── mdetr_dist.py │ ├── metric_logger.py │ ├── miscellaneous.py │ ├── model_serialization.py │ ├── model_zoo.py │ ├── pretrain_model_loading.py │ ├── registry.py │ ├── shallow_contrastive_loss_helper.py │ └── stats.py ├── requirements.txt ├── test.py └── vqa_dataloader.py /RUN/autoprompt/make_auto_hybrid.sh: -------------------------------------------------------------------------------- 1 | python make_autopromptsv2.py --dataset 'kvasir' \ 2 | --cls_names 'polyp' \ 3 | --vqa_names 'bump'\ 4 | --mode 'hybrid'\ 5 | --real_cls_names 'bump' -------------------------------------------------------------------------------- /RUN/autoprompt/make_auto_mlm.sh: -------------------------------------------------------------------------------- 1 | python make_autopromptsv2.py --dataset 'kvasir' \ 2 | --cls_names 'polyp' \ 3 | --vqa_names 'wound'\ 4 | --mode 'lama'\ 5 | --real_cls_names 'bump' -------------------------------------------------------------------------------- /RUN/polyp_test/zero_shot_hybrid.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | config_file=configs/medical/glip_Swin_T_O365_GoldG_polyp_colondb.yaml 3 | odinw_configs=configs/medical/glip_Swin_T_O365_GoldG_polyp_colondb.yaml 4 | output_dir=OUTPUTS/polyp/hybrid/zero_shot/ 5 | model_checkpoint=MODEL/glip_tiny_model_o365_goldg.pth 6 | jsonFile=autoprompt_json/hybrid_colondb_path_prompt_top3.json 7 | 8 | python test.py --json ${jsonFile} \ 9 | --config-file ${config_file} --weight ${model_checkpoint} \ 10 | --task_config ${odinw_configs} \ 11 | OUTPUT_DIR ${output_dir}\ 12 | TEST.IMS_PER_BATCH 2 SOLVER.IMS_PER_BATCH 2 \ 13 | TEST.EVAL_TASK detection \ 14 | DATASETS.TRAIN_DATASETNAME_SUFFIX _grounding \ 15 | DATALOADER.DISTRIBUTE_CHUNK_AMONG_NODE False \ 16 | DATASETS.USE_OVERRIDE_CATEGORY True \ 17 | DATASETS.USE_CAPTION_PROMPT True\ 18 | # MODEL.RETINANET.DETECTIONS_PER_IMG 300 MODEL.FCOS.DETECTIONS_PER_IMG 300 MODEL.ATSS.DETECTIONS_PER_IMG 300 MODEL.ROI_HEADS.DETECTIONS_PER_IMG 300 -------------------------------------------------------------------------------- /RUN/polyp_test/zero_shot_lama.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | config_file=configs/medical/glip_Swin_T_O365_GoldG_polyp_kvasir.yaml 3 | odinw_configs=configs/medical/glip_Swin_T_O365_GoldG_polyp_kvasir.yaml 4 | output_dir=OUTPUTS/polyp/hybrid/zero_shot/ 5 | model_checkpoint=MODEL/glip_tiny_model_o365_goldg.pth 6 | jsonFile=autoprompt_json/newtest/lama_kvasir_path_prompt_top1.json 7 | 8 | python test.py --json ${jsonFile} \ 9 | --config-file ${config_file} --weight ${model_checkpoint} \ 10 | --task_config ${odinw_configs} \ 11 | OUTPUT_DIR ${output_dir}\ 12 | TEST.IMS_PER_BATCH 2 SOLVER.IMS_PER_BATCH 2 \ 13 | TEST.EVAL_TASK detection \ 14 | DATASETS.TRAIN_DATASETNAME_SUFFIX _grounding \ 15 | DATALOADER.DISTRIBUTE_CHUNK_AMONG_NODE False \ 16 | DATASETS.USE_OVERRIDE_CATEGORY True \ 17 | DATASETS.USE_CAPTION_PROMPT True\ 18 | # MODEL.RETINANET.DETECTIONS_PER_IMG 300 MODEL.FCOS.DETECTIONS_PER_IMG 300 MODEL.ATSS.DETECTIONS_PER_IMG 300 MODEL.ROI_HEADS.DETECTIONS_PER_IMG 300 -------------------------------------------------------------------------------- /configs/glip_Swin_T_O365_GoldG_polyp_kvasir.yaml: -------------------------------------------------------------------------------- 1 | 2 | MODEL: 3 | META_ARCHITECTURE: "GeneralizedVLRCNN" 4 | WEIGHT: "swin_tiny_patch4_window7_224.pth" 5 | RPN_ONLY: True 6 | RPN_ARCHITECTURE: "VLDYHEAD" 7 | 8 | 9 | BACKBONE: 10 | CONV_BODY: "SWINT-FPN-RETINANET" 11 | OUT_CHANNELS: 256 12 | FREEZE_CONV_BODY_AT: -1 13 | 14 | LANGUAGE_BACKBONE: 15 | FREEZE: False 16 | MODEL_TYPE: "bert-base-uncased" # "roberta-base", "clip","bert-base-uncased" 17 | # MODEL_TYPE: "roberta-base" 18 | MASK_SPECIAL: False 19 | # TOKENIZER_TYPE: "clip" 20 | 21 | RPN: 22 | USE_FPN: True 23 | ANCHOR_SIZES: (64, 128, 256, 512, 1024) 24 | ANCHOR_STRIDE: (8, 16, 32, 64, 128) 25 | ASPECT_RATIOS: (1.0,) 26 | SCALES_PER_OCTAVE: 1 27 | 28 | DYHEAD: 29 | NUM_CLASSES: 2 30 | CHANNELS: 256 31 | NUM_CONVS: 6 32 | USE_GN: True 33 | USE_DYRELU: True 34 | USE_DFCONV: True 35 | USE_DYFUSE: True 36 | TOPK: 9 # topk for selecting candidate positive samples from each level 37 | SCORE_AGG: "MEAN" 38 | # SCORE_AGG: "MAX" 39 | LOG_SCALE: 0.0 40 | 41 | FUSE_CONFIG: 42 | EARLY_FUSE_ON: True 43 | TYPE: "MHA-B" 44 | USE_CLASSIFICATION_LOSS: False 45 | USE_TOKEN_LOSS: False 46 | USE_CONTRASTIVE_ALIGN_LOSS: False 47 | CONTRASTIVE_HIDDEN_DIM: 64 48 | USE_DOT_PRODUCT_TOKEN_LOSS: True 49 | USE_FUSED_FEATURES_DOT_PRODUCT: True 50 | USE_LAYER_SCALE: True 51 | CLAMP_MIN_FOR_UNDERFLOW: True 52 | CLAMP_MAX_FOR_OVERFLOW: True 53 | CLAMP_BERTATTN_MIN_FOR_UNDERFLOW: True 54 | CLAMP_BERTATTN_MAX_FOR_OVERFLOW: True 55 | CLAMP_DOT_PRODUCT: True 56 | 57 | USE_CHECKPOINT: True 58 | 59 | TEST: 60 | DURING_TRAINING: False 61 | IMS_PER_BATCH: 8 62 | 63 | # use for grounding model 64 | DATASETS: 65 | GENERAL_COPY: 16 66 | 67 | CAPTION_PROMPT: '[ 68 | 69 | 70 | {"prefix": "", "name": "pink yellow red color oval bump", "suffix": " in rectum"}, ]' 71 | REGISTER: 72 | test: 73 | ann_file: DATA/POLYP/annotations/Kvasir_val.json 74 | img_dir: DATA/POLYP/val/Kvasir/images 75 | train: 76 | ann_file: DATA/POLYP/annotations/Kvasir_train.json 77 | img_dir: DATA/POLYP/train/images 78 | val: 79 | ann_file: DATA/POLYP/annotations/Kvasir_val.json 80 | img_dir: DATA/POLYP/val/Kvasir/images 81 | 82 | TEST: ("val",) 83 | TRAIN: ("train", ) 84 | DISABLE_SHUFFLE: False 85 | ADD_DET_PROMPT: False 86 | RANDOM_SAMPLE_NEG: 85 87 | #SHUFFLE_SEED: 2022 88 | CONTROL_PROB: (0.0, 0.0, 0.5, 0.0) 89 | 90 | SEPARATION_TOKENS: ". " 91 | 92 | INPUT: 93 | PIXEL_MEAN: [ 103.530, 116.280, 123.675 ] 94 | PIXEL_STD: [ 57.375, 57.120, 58.395 ] 95 | # MIN_SIZE_TRAIN: 640 96 | # MAX_SIZE_TRAIN: 800 97 | # MIN_SIZE_TEST: 640 98 | # MAX_SIZE_TEST: 800 99 | MIN_SIZE_TRAIN: 800 100 | MAX_SIZE_TRAIN: 1333 101 | MIN_SIZE_TEST: 800 102 | MAX_SIZE_TEST: 1333 103 | 104 | AUGMENT: 105 | MULT_MIN_SIZE_TRAIN: (480,560,640,720,800) 106 | 107 | DATALOADER: 108 | SIZE_DIVISIBILITY: 32 109 | 110 | SOLVER: 111 | OPTIMIZER: ADAMW 112 | BASE_LR: 0.0001 113 | LANG_LR: 0.00001 114 | WEIGHT_DECAY: 0.0001 115 | STEPS: (0.67, 0.89) 116 | MAX_EPOCH: 30 117 | IMS_PER_BATCH: 8 118 | WARMUP_ITERS: 2000 119 | WARMUP_FACTOR: 0.001 120 | USE_AMP: True 121 | MODEL_EMA: 0.999 122 | FIND_UNUSED_PARAMETERS: False 123 | 124 | CLIP_GRADIENTS: 125 | ENABLED: True 126 | CLIP_TYPE: "full_model" 127 | CLIP_VALUE: 1.0 128 | NORM_TYPE: 2.0 -------------------------------------------------------------------------------- /figures/finetune_res.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/figures/finetune_res.PNG -------------------------------------------------------------------------------- /figures/framework.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/figures/framework.PNG -------------------------------------------------------------------------------- /figures/zeroshot_res.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/figures/zeroshot_res.PNG -------------------------------------------------------------------------------- /maskrcnn_benchmark/_C.cpython-38-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/_C.cpython-38-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /maskrcnn_benchmark/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/config/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from .defaults import _C as cfg 3 | from .paths_catalog import try_to_find -------------------------------------------------------------------------------- /maskrcnn_benchmark/config/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/config/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/config/__pycache__/defaults.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/config/__pycache__/defaults.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/config/__pycache__/paths_catalog.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/config/__pycache__/paths_catalog.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/csrc/ROIAlign.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #pragma once 3 | 4 | #include "cpu/vision.h" 5 | 6 | #ifdef WITH_CUDA 7 | #include "cuda/vision.h" 8 | #endif 9 | 10 | // Interface for Python 11 | at::Tensor ROIAlign_forward(const at::Tensor& input, 12 | const at::Tensor& rois, 13 | const float spatial_scale, 14 | const int pooled_height, 15 | const int pooled_width, 16 | const int sampling_ratio) { 17 | if (input.device().is_cuda()) { 18 | #ifdef WITH_CUDA 19 | return ROIAlign_forward_cuda(input, rois, spatial_scale, pooled_height, pooled_width, sampling_ratio); 20 | #else 21 | AT_ERROR("Not compiled with GPU support"); 22 | #endif 23 | } 24 | return ROIAlign_forward_cpu(input, rois, spatial_scale, pooled_height, pooled_width, sampling_ratio); 25 | } 26 | 27 | at::Tensor ROIAlign_backward(const at::Tensor& grad, 28 | const at::Tensor& rois, 29 | const float spatial_scale, 30 | const int pooled_height, 31 | const int pooled_width, 32 | const int batch_size, 33 | const int channels, 34 | const int height, 35 | const int width, 36 | const int sampling_ratio) { 37 | if (grad.device().is_cuda()) { 38 | #ifdef WITH_CUDA 39 | return ROIAlign_backward_cuda(grad, rois, spatial_scale, pooled_height, pooled_width, batch_size, channels, height, width, sampling_ratio); 40 | #else 41 | AT_ERROR("Not compiled with GPU support"); 42 | #endif 43 | } 44 | AT_ERROR("Not implemented on the CPU"); 45 | } 46 | 47 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/csrc/ROIPool.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #pragma once 3 | 4 | #include "cpu/vision.h" 5 | 6 | #ifdef WITH_CUDA 7 | #include "cuda/vision.h" 8 | #endif 9 | 10 | 11 | std::tuple ROIPool_forward(const at::Tensor& input, 12 | const at::Tensor& rois, 13 | const float spatial_scale, 14 | const int pooled_height, 15 | const int pooled_width) { 16 | if (input.device().is_cuda()) { 17 | #ifdef WITH_CUDA 18 | return ROIPool_forward_cuda(input, rois, spatial_scale, pooled_height, pooled_width); 19 | #else 20 | AT_ERROR("Not compiled with GPU support"); 21 | #endif 22 | } 23 | AT_ERROR("Not implemented on the CPU"); 24 | } 25 | 26 | at::Tensor ROIPool_backward(const at::Tensor& grad, 27 | const at::Tensor& input, 28 | const at::Tensor& rois, 29 | const at::Tensor& argmax, 30 | const float spatial_scale, 31 | const int pooled_height, 32 | const int pooled_width, 33 | const int batch_size, 34 | const int channels, 35 | const int height, 36 | const int width) { 37 | if (grad.device().is_cuda()) { 38 | #ifdef WITH_CUDA 39 | return ROIPool_backward_cuda(grad, input, rois, argmax, spatial_scale, pooled_height, pooled_width, batch_size, channels, height, width); 40 | #else 41 | AT_ERROR("Not compiled with GPU support"); 42 | #endif 43 | } 44 | AT_ERROR("Not implemented on the CPU"); 45 | } 46 | 47 | 48 | 49 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/csrc/SigmoidFocalLoss.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "cpu/vision.h" 4 | 5 | #ifdef WITH_CUDA 6 | #include "cuda/vision.h" 7 | #endif 8 | 9 | // Interface for Python 10 | at::Tensor SigmoidFocalLoss_forward( 11 | const at::Tensor& logits, 12 | const at::Tensor& targets, 13 | const int num_classes, 14 | const float gamma, 15 | const float alpha) { 16 | if (logits.device().is_cuda()) { 17 | #ifdef WITH_CUDA 18 | return SigmoidFocalLoss_forward_cuda(logits, targets, num_classes, gamma, alpha); 19 | #else 20 | AT_ERROR("Not compiled with GPU support"); 21 | #endif 22 | } 23 | AT_ERROR("Not implemented on the CPU"); 24 | } 25 | 26 | at::Tensor SigmoidFocalLoss_backward( 27 | const at::Tensor& logits, 28 | const at::Tensor& targets, 29 | const at::Tensor& d_losses, 30 | const int num_classes, 31 | const float gamma, 32 | const float alpha) { 33 | if (logits.device().is_cuda()) { 34 | #ifdef WITH_CUDA 35 | return SigmoidFocalLoss_backward_cuda(logits, targets, d_losses, num_classes, gamma, alpha); 36 | #else 37 | AT_ERROR("Not compiled with GPU support"); 38 | #endif 39 | } 40 | AT_ERROR("Not implemented on the CPU"); 41 | } 42 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/csrc/cpu/nms_cpu.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #include "cpu/vision.h" 3 | 4 | 5 | template 6 | at::Tensor nms_cpu_kernel(const at::Tensor& dets, 7 | const at::Tensor& scores, 8 | const float threshold) { 9 | AT_ASSERTM(!dets.device().is_cuda(), "dets must be a CPU tensor"); 10 | AT_ASSERTM(!scores.device().is_cuda(), "scores must be a CPU tensor"); 11 | AT_ASSERTM(dets.type() == scores.type(), "dets should have the same type as scores"); 12 | 13 | if (dets.numel() == 0) { 14 | return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU)); 15 | } 16 | 17 | auto x1_t = dets.select(1, 0).contiguous(); 18 | auto y1_t = dets.select(1, 1).contiguous(); 19 | auto x2_t = dets.select(1, 2).contiguous(); 20 | auto y2_t = dets.select(1, 3).contiguous(); 21 | 22 | at::Tensor areas_t = (x2_t - x1_t + 1) * (y2_t - y1_t + 1); 23 | 24 | auto order_t = std::get<1>(scores.sort(0, /* descending=*/true)); 25 | 26 | auto ndets = dets.size(0); 27 | at::Tensor suppressed_t = at::zeros({ndets}, dets.options().dtype(at::kByte).device(at::kCPU)); 28 | 29 | auto suppressed = suppressed_t.data_ptr(); 30 | auto order = order_t.data_ptr(); 31 | auto x1 = x1_t.data_ptr(); 32 | auto y1 = y1_t.data_ptr(); 33 | auto x2 = x2_t.data_ptr(); 34 | auto y2 = y2_t.data_ptr(); 35 | auto areas = areas_t.data_ptr(); 36 | 37 | for (int64_t _i = 0; _i < ndets; _i++) { 38 | auto i = order[_i]; 39 | if (suppressed[i] == 1) 40 | continue; 41 | auto ix1 = x1[i]; 42 | auto iy1 = y1[i]; 43 | auto ix2 = x2[i]; 44 | auto iy2 = y2[i]; 45 | auto iarea = areas[i]; 46 | 47 | for (int64_t _j = _i + 1; _j < ndets; _j++) { 48 | auto j = order[_j]; 49 | if (suppressed[j] == 1) 50 | continue; 51 | auto xx1 = std::max(ix1, x1[j]); 52 | auto yy1 = std::max(iy1, y1[j]); 53 | auto xx2 = std::min(ix2, x2[j]); 54 | auto yy2 = std::min(iy2, y2[j]); 55 | 56 | auto w = std::max(static_cast(0), xx2 - xx1 + 1); 57 | auto h = std::max(static_cast(0), yy2 - yy1 + 1); 58 | auto inter = w * h; 59 | auto ovr = inter / (iarea + areas[j] - inter); 60 | if (ovr >= threshold) 61 | suppressed[j] = 1; 62 | } 63 | } 64 | return at::nonzero(suppressed_t == 0).squeeze(1); 65 | } 66 | 67 | at::Tensor nms_cpu(const at::Tensor& dets, 68 | const at::Tensor& scores, 69 | const float threshold) { 70 | at::Tensor result; 71 | AT_DISPATCH_FLOATING_TYPES(dets.scalar_type(), "nms", [&] { 72 | result = nms_cpu_kernel(dets, scores, threshold); 73 | }); 74 | return result; 75 | } 76 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/csrc/cpu/vision.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #pragma once 3 | #include 4 | 5 | 6 | at::Tensor ROIAlign_forward_cpu(const at::Tensor& input, 7 | const at::Tensor& rois, 8 | const float spatial_scale, 9 | const int pooled_height, 10 | const int pooled_width, 11 | const int sampling_ratio); 12 | 13 | 14 | at::Tensor nms_cpu(const at::Tensor& dets, 15 | const at::Tensor& scores, 16 | const float threshold); 17 | 18 | 19 | std::pair soft_nms_cpu(const at::Tensor& dets, 20 | const at::Tensor& scores, 21 | const float threshold, 22 | const float sigma); -------------------------------------------------------------------------------- /maskrcnn_benchmark/csrc/cuda/deform_pool_cuda.cu: -------------------------------------------------------------------------------- 1 | // modify from 2 | // https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/blob/mmdetection/mmdet/ops/dcn/src/modulated_dcn_cuda.c 3 | 4 | // based on 5 | // author: Charles Shang 6 | // https://github.com/torch/cunn/blob/master/lib/THCUNN/generic/SpatialConvolutionMM.cu 7 | 8 | #include 9 | #include 10 | 11 | #include 12 | #include 13 | 14 | #include 15 | #include 16 | #include 17 | 18 | 19 | void DeformablePSROIPoolForward( 20 | const at::Tensor data, const at::Tensor bbox, const at::Tensor trans, 21 | at::Tensor out, at::Tensor top_count, const int batch, const int channels, 22 | const int height, const int width, const int num_bbox, 23 | const int channels_trans, const int no_trans, const float spatial_scale, 24 | const int output_dim, const int group_size, const int pooled_size, 25 | const int part_size, const int sample_per_part, const float trans_std); 26 | 27 | void DeformablePSROIPoolBackwardAcc( 28 | const at::Tensor out_grad, const at::Tensor data, const at::Tensor bbox, 29 | const at::Tensor trans, const at::Tensor top_count, at::Tensor in_grad, 30 | at::Tensor trans_grad, const int batch, const int channels, 31 | const int height, const int width, const int num_bbox, 32 | const int channels_trans, const int no_trans, const float spatial_scale, 33 | const int output_dim, const int group_size, const int pooled_size, 34 | const int part_size, const int sample_per_part, const float trans_std); 35 | 36 | void deform_psroi_pooling_cuda_forward( 37 | at::Tensor input, at::Tensor bbox, at::Tensor trans, at::Tensor out, 38 | at::Tensor top_count, const int no_trans, const float spatial_scale, 39 | const int output_dim, const int group_size, const int pooled_size, 40 | const int part_size, const int sample_per_part, const float trans_std) 41 | { 42 | TORCH_CHECK(input.is_contiguous(), "input tensor has to be contiguous"); 43 | 44 | const int batch = input.size(0); 45 | const int channels = input.size(1); 46 | const int height = input.size(2); 47 | const int width = input.size(3); 48 | const int channels_trans = no_trans ? 2 : trans.size(1); 49 | 50 | const int num_bbox = bbox.size(0); 51 | if (num_bbox != out.size(0)) 52 | AT_ERROR("Output shape and bbox number wont match: (%d vs %d).", 53 | out.size(0), num_bbox); 54 | 55 | DeformablePSROIPoolForward( 56 | input, bbox, trans, out, top_count, batch, channels, height, width, 57 | num_bbox, channels_trans, no_trans, spatial_scale, output_dim, group_size, 58 | pooled_size, part_size, sample_per_part, trans_std); 59 | } 60 | 61 | void deform_psroi_pooling_cuda_backward( 62 | at::Tensor out_grad, at::Tensor input, at::Tensor bbox, at::Tensor trans, 63 | at::Tensor top_count, at::Tensor input_grad, at::Tensor trans_grad, 64 | const int no_trans, const float spatial_scale, const int output_dim, 65 | const int group_size, const int pooled_size, const int part_size, 66 | const int sample_per_part, const float trans_std) 67 | { 68 | TORCH_CHECK(out_grad.is_contiguous(), "out_grad tensor has to be contiguous"); 69 | TORCH_CHECK(input.is_contiguous(), "input tensor has to be contiguous"); 70 | 71 | const int batch = input.size(0); 72 | const int channels = input.size(1); 73 | const int height = input.size(2); 74 | const int width = input.size(3); 75 | const int channels_trans = no_trans ? 2 : trans.size(1); 76 | 77 | const int num_bbox = bbox.size(0); 78 | if (num_bbox != out_grad.size(0)) 79 | AT_ERROR("Output shape and bbox number wont match: (%d vs %d).", 80 | out_grad.size(0), num_bbox); 81 | 82 | DeformablePSROIPoolBackwardAcc( 83 | out_grad, input, bbox, trans, top_count, input_grad, trans_grad, batch, 84 | channels, height, width, num_bbox, channels_trans, no_trans, 85 | spatial_scale, output_dim, group_size, pooled_size, part_size, 86 | sample_per_part, trans_std); 87 | } 88 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/csrc/deform_pool.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #pragma once 3 | #include "cpu/vision.h" 4 | 5 | #ifdef WITH_CUDA 6 | #include "cuda/vision.h" 7 | #endif 8 | 9 | 10 | // Interface for Python 11 | void deform_psroi_pooling_forward( 12 | at::Tensor input, 13 | at::Tensor bbox, 14 | at::Tensor trans, 15 | at::Tensor out, 16 | at::Tensor top_count, 17 | const int no_trans, 18 | const float spatial_scale, 19 | const int output_dim, 20 | const int group_size, 21 | const int pooled_size, 22 | const int part_size, 23 | const int sample_per_part, 24 | const float trans_std) 25 | { 26 | if (input.device().is_cuda()) { 27 | #ifdef WITH_CUDA 28 | return deform_psroi_pooling_cuda_forward( 29 | input, bbox, trans, out, top_count, 30 | no_trans, spatial_scale, output_dim, group_size, 31 | pooled_size, part_size, sample_per_part, trans_std 32 | ); 33 | #else 34 | AT_ERROR("Not compiled with GPU support"); 35 | #endif 36 | } 37 | AT_ERROR("Not implemented on the CPU"); 38 | } 39 | 40 | 41 | void deform_psroi_pooling_backward( 42 | at::Tensor out_grad, 43 | at::Tensor input, 44 | at::Tensor bbox, 45 | at::Tensor trans, 46 | at::Tensor top_count, 47 | at::Tensor input_grad, 48 | at::Tensor trans_grad, 49 | const int no_trans, 50 | const float spatial_scale, 51 | const int output_dim, 52 | const int group_size, 53 | const int pooled_size, 54 | const int part_size, 55 | const int sample_per_part, 56 | const float trans_std) 57 | { 58 | if (input.device().is_cuda()) { 59 | #ifdef WITH_CUDA 60 | return deform_psroi_pooling_cuda_backward( 61 | out_grad, input, bbox, trans, top_count, input_grad, trans_grad, 62 | no_trans, spatial_scale, output_dim, group_size, pooled_size, 63 | part_size, sample_per_part, trans_std 64 | ); 65 | #else 66 | AT_ERROR("Not compiled with GPU support"); 67 | #endif 68 | } 69 | AT_ERROR("Not implemented on the CPU"); 70 | } 71 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/csrc/ml_nms.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #pragma once 3 | #include "cpu/vision.h" 4 | 5 | #ifdef WITH_CUDA 6 | #include "cuda/vision.h" 7 | #endif 8 | 9 | 10 | at::Tensor ml_nms(const at::Tensor& dets, 11 | const at::Tensor& scores, 12 | const at::Tensor& labels, 13 | const float threshold) { 14 | 15 | if (dets.device().is_cuda()) { 16 | #ifdef WITH_CUDA 17 | // TODO raise error if not compiled with CUDA 18 | if (dets.numel() == 0) 19 | return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU)); 20 | auto b = at::cat({dets, scores.unsqueeze(1), labels.unsqueeze(1)}, 1); 21 | return ml_nms_cuda(b, threshold); 22 | #else 23 | AT_ERROR("Not compiled with GPU support"); 24 | #endif 25 | } 26 | AT_ERROR("CPU version not implemented"); 27 | } 28 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/csrc/nms.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #pragma once 3 | #include "cpu/vision.h" 4 | 5 | #ifdef WITH_CUDA 6 | #include "cuda/vision.h" 7 | #endif 8 | 9 | 10 | at::Tensor nms(const at::Tensor& dets, 11 | const at::Tensor& scores, 12 | const float threshold) { 13 | 14 | if (dets.device().is_cuda()) { 15 | #ifdef WITH_CUDA 16 | // TODO raise error if not compiled with CUDA 17 | if (dets.numel() == 0) 18 | return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU)); 19 | auto b = at::cat({dets, scores.unsqueeze(1)}, 1); 20 | return nms_cuda(b, threshold); 21 | #else 22 | AT_ERROR("Not compiled with GPU support"); 23 | #endif 24 | } 25 | 26 | at::Tensor result = nms_cpu(dets, scores, threshold); 27 | return result; 28 | } 29 | 30 | 31 | std::pair soft_nms(const at::Tensor& dets, 32 | const at::Tensor& scores, 33 | const float threshold, 34 | const float sigma) { 35 | 36 | if (dets.device().is_cuda()) { 37 | #ifdef WITH_CUDA 38 | AT_ERROR("Soft NMS Does Not have GPU support"); 39 | #endif 40 | } 41 | 42 | std::pair result = soft_nms_cpu(dets, scores, threshold, sigma); 43 | 44 | return result; 45 | } -------------------------------------------------------------------------------- /maskrcnn_benchmark/csrc/vision.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #include "nms.h" 3 | #include "ml_nms.h" 4 | #include "ROIAlign.h" 5 | #include "ROIPool.h" 6 | #include "SigmoidFocalLoss.h" 7 | #include "deform_conv.h" 8 | #include "deform_pool.h" 9 | 10 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 11 | m.def("nms", &nms, "non-maximum suppression"); 12 | m.def("ml_nms", &ml_nms, "multi-label non-maximum suppression"); 13 | m.def("soft_nms", &soft_nms, "soft non-maximum suppression"); 14 | m.def("roi_align_forward", &ROIAlign_forward, "ROIAlign_forward"); 15 | m.def("roi_align_backward", &ROIAlign_backward, "ROIAlign_backward"); 16 | m.def("roi_pool_forward", &ROIPool_forward, "ROIPool_forward"); 17 | m.def("roi_pool_backward", &ROIPool_backward, "ROIPool_backward"); 18 | m.def("sigmoid_focalloss_forward", &SigmoidFocalLoss_forward, "SigmoidFocalLoss_forward"); 19 | m.def("sigmoid_focalloss_backward", &SigmoidFocalLoss_backward, "SigmoidFocalLoss_backward"); 20 | m.def("deform_conv_forward", &deform_conv_forward, "deform_conv_forward"); 21 | m.def("deform_conv_backward_input", &deform_conv_backward_input, "deform_conv_backward_input"); 22 | m.def("deform_conv_backward_parameters", &deform_conv_backward_parameters, "deform_conv_backward_parameters"); 23 | m.def("modulated_deform_conv_forward", &modulated_deform_conv_forward, "modulated_deform_conv_forward"); 24 | m.def("modulated_deform_conv_backward", &modulated_deform_conv_backward, "modulated_deform_conv_backward"); 25 | m.def("deform_psroi_pooling_forward", &deform_psroi_pooling_forward, "deform_psroi_pooling_forward"); 26 | m.def("deform_psroi_pooling_backward", &deform_psroi_pooling_backward, "deform_psroi_pooling_backward"); 27 | } 28 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from .build import make_data_loader 3 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/data/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/__pycache__/build.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/data/__pycache__/build.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/__pycache__/collate_batch.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/data/__pycache__/collate_batch.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from .coco import COCODataset 3 | from .voc import PascalVOCDataset 4 | from .concat_dataset import ConcatDataset 5 | from .background import Background 6 | from .tsv import TSVDataset, ODTSVDataset 7 | 8 | from .modulated_coco import ModulatedDataset, CocoDetection, CocoGrounding 9 | from .flickr import FlickrDataset 10 | from .refexp import RefExpDataset 11 | from .mixed import MixedDataset 12 | from .gqa import GQADataset 13 | 14 | from .coco_dt import CocoDetectionTSV 15 | from .caption import CaptionTSV 16 | from .lvis import LvisDetection 17 | from .pseudo_data import PseudoData 18 | from .phrasecut import PhrasecutDetection 19 | 20 | __all__ = ["COCODataset", "TSVDataset", "ODTSVDataset", "ConcatDataset", "PascalVOCDataset", "Background", 21 | "ModulatedDataset", "MixedDataset", "CocoDetection", "FlickrDataset", "RefExpDataset", "GQADataset", 22 | "CocoDetectionTSV", "CocoGrounding", "CaptionTSV", "LvisDetection", "PseudoData", "PhrasecutDetection" 23 | ] 24 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/datasets/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/data/datasets/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/datasets/__pycache__/background.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/data/datasets/__pycache__/background.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/datasets/__pycache__/box_label_loader.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/data/datasets/__pycache__/box_label_loader.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/datasets/__pycache__/caption.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/data/datasets/__pycache__/caption.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/datasets/__pycache__/coco.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/data/datasets/__pycache__/coco.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/datasets/__pycache__/coco_dt.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/data/datasets/__pycache__/coco_dt.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/datasets/__pycache__/concat_dataset.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/data/datasets/__pycache__/concat_dataset.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/datasets/__pycache__/duplicate_dataset.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/data/datasets/__pycache__/duplicate_dataset.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/datasets/__pycache__/flickr.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/data/datasets/__pycache__/flickr.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/datasets/__pycache__/gqa.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/data/datasets/__pycache__/gqa.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/datasets/__pycache__/lvis.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/data/datasets/__pycache__/lvis.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/datasets/__pycache__/mixed.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/data/datasets/__pycache__/mixed.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/datasets/__pycache__/modulated_coco.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/data/datasets/__pycache__/modulated_coco.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/datasets/__pycache__/od_to_grounding.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/data/datasets/__pycache__/od_to_grounding.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/datasets/__pycache__/phrasecut.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/data/datasets/__pycache__/phrasecut.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/datasets/__pycache__/pseudo_data.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/data/datasets/__pycache__/pseudo_data.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/datasets/__pycache__/refexp.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/data/datasets/__pycache__/refexp.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/datasets/__pycache__/tsv.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/data/datasets/__pycache__/tsv.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/datasets/__pycache__/voc.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/data/datasets/__pycache__/voc.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/datasets/background.py: -------------------------------------------------------------------------------- 1 | import os 2 | import os.path 3 | import json 4 | from PIL import Image 5 | 6 | import torch 7 | import torchvision 8 | import torch.utils.data as data 9 | from maskrcnn_benchmark.structures.bounding_box import BoxList 10 | 11 | class Background(data.Dataset): 12 | """ Background 13 | 14 | Args: 15 | root (string): Root directory where images are downloaded to. 16 | annFile (string): Path to json annotation file. 17 | transform (callable, optional): A function/transform that takes in an PIL image 18 | and returns a transformed version. E.g, ``transforms.ToTensor`` 19 | """ 20 | 21 | def __init__(self, ann_file, root, remove_images_without_annotations=None, transforms=None): 22 | self.root = root 23 | 24 | with open(ann_file, 'r') as f: 25 | self.ids = json.load(f)['images'] 26 | self.transform = transforms 27 | 28 | def __getitem__(self, index): 29 | """ 30 | Args: 31 | index (int): Index 32 | 33 | Returns: 34 | tuple: Tuple (image, target). target is the object returned by ``coco.loadAnns``. 35 | """ 36 | im_info = self.ids[index] 37 | path = im_info['file_name'] 38 | fp = os.path.join(self.root, path) 39 | 40 | img = Image.open(fp).convert('RGB') 41 | if self.transform is not None: 42 | img, _ = self.transform(img, None) 43 | null_target = BoxList(torch.zeros((0,4)), (img.shape[-1], img.shape[-2])) 44 | null_target.add_field('labels', torch.zeros(0)) 45 | 46 | return img, null_target, index 47 | 48 | def __len__(self): 49 | return len(self.ids) 50 | 51 | def get_img_info(self, index): 52 | im_info = self.ids[index] 53 | return im_info -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/datasets/concat_dataset.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import bisect 3 | 4 | from torch.utils.data.dataset import ConcatDataset as _ConcatDataset 5 | 6 | 7 | class ConcatDataset(_ConcatDataset): 8 | """ 9 | Same as torch.utils.data.dataset.ConcatDataset, but exposes an extra 10 | method for querying the sizes of the image 11 | """ 12 | 13 | def get_idxs(self, idx): 14 | dataset_idx = bisect.bisect_right(self.cumulative_sizes, idx) 15 | if dataset_idx == 0: 16 | sample_idx = idx 17 | else: 18 | sample_idx = idx - self.cumulative_sizes[dataset_idx - 1] 19 | return dataset_idx, sample_idx 20 | 21 | def get_img_info(self, idx): 22 | dataset_idx, sample_idx = self.get_idxs(idx) 23 | return self.datasets[dataset_idx].get_img_info(sample_idx) 24 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/datasets/duplicate_dataset.py: -------------------------------------------------------------------------------- 1 | import math 2 | from typing import TypeVar, Optional, Iterator 3 | 4 | import torch 5 | from torch.utils.data import Sampler, Dataset 6 | import torch.distributed as dist 7 | import random 8 | import numpy as np 9 | 10 | 11 | def create_duplicate_dataset(DatasetBaseClass): 12 | class DupDataset(DatasetBaseClass): 13 | 14 | def __init__(self, copy, **kwargs): 15 | super(DupDataset, self).__init__(**kwargs) 16 | 17 | self.copy = copy 18 | self.length = super(DupDataset, self).__len__() 19 | 20 | def __len__(self): 21 | return self.copy * self.length 22 | 23 | def __getitem__(self, index): 24 | true_index = index % self.length 25 | return super(DupDataset, self).__getitem__(true_index) 26 | 27 | def get_img_info(self, index): 28 | true_index = index % self.length 29 | return super(DupDataset, self).get_img_info(true_index) 30 | 31 | return DupDataset 32 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/datasets/evaluation/__init__.py: -------------------------------------------------------------------------------- 1 | from maskrcnn_benchmark.data import datasets 2 | 3 | from .coco import coco_evaluation 4 | from .voc import voc_evaluation 5 | from .vg import vg_evaluation 6 | from .box_aug import im_detect_bbox_aug 7 | from .od_to_grounding import od_to_grounding_evaluation 8 | 9 | 10 | def evaluate(dataset, predictions, output_folder, **kwargs): 11 | """evaluate dataset using different methods based on dataset type. 12 | Args: 13 | dataset: Dataset object 14 | predictions(list[BoxList]): each item in the list represents the 15 | prediction results for one image. 16 | output_folder: output folder, to save evaluation files or results. 17 | **kwargs: other args. 18 | Returns: 19 | evaluation result 20 | """ 21 | args = dict( 22 | dataset=dataset, predictions=predictions, output_folder=output_folder, **kwargs 23 | ) 24 | if isinstance(dataset, datasets.COCODataset) or isinstance(dataset, datasets.TSVDataset): 25 | return coco_evaluation(**args) 26 | # elif isinstance(dataset, datasets.VGTSVDataset): 27 | # return vg_evaluation(**args) 28 | elif isinstance(dataset, datasets.PascalVOCDataset): 29 | return voc_evaluation(**args) 30 | elif isinstance(dataset, datasets.CocoDetectionTSV): 31 | return od_to_grounding_evaluation(**args) 32 | elif isinstance(dataset, datasets.LvisDetection): 33 | pass 34 | else: 35 | dataset_name = dataset.__class__.__name__ 36 | raise NotImplementedError("Unsupported dataset type {}.".format(dataset_name)) 37 | 38 | 39 | def evaluate_mdetr(dataset, predictions, output_folder, cfg): 40 | 41 | args = dict( 42 | dataset=dataset, predictions=predictions, output_folder=output_folder, **kwargs 43 | ) 44 | if isinstance(dataset, datasets.COCODataset) or isinstance(dataset, datasets.TSVDataset): 45 | return coco_evaluation(**args) 46 | # elif isinstance(dataset, datasets.VGTSVDataset): 47 | # return vg_evaluation(**args) 48 | elif isinstance(dataset, datasets.PascalVOCDataset): 49 | return voc_evaluation(**args) 50 | elif isinstance(dataset, datasets.CocoDetectionTSV): 51 | return od_to_grounding_evaluation(**args) 52 | elif isinstance(dataset, datasets.LvisDetection): 53 | pass 54 | else: 55 | dataset_name = dataset.__class__.__name__ 56 | raise NotImplementedError("Unsupported dataset type {}.".format(dataset_name)) 57 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/datasets/evaluation/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/data/datasets/evaluation/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/datasets/evaluation/__pycache__/box_aug.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/data/datasets/evaluation/__pycache__/box_aug.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/datasets/evaluation/coco/__init__.py: -------------------------------------------------------------------------------- 1 | from .coco_eval import do_coco_evaluation 2 | 3 | 4 | def coco_evaluation( 5 | dataset, 6 | predictions, 7 | output_folder, 8 | box_only=False, 9 | iou_types=("bbox",), 10 | expected_results=(), 11 | expected_results_sigma_tol=4, 12 | ): 13 | return do_coco_evaluation( 14 | dataset=dataset, 15 | predictions=predictions, 16 | box_only=box_only, 17 | output_folder=output_folder, 18 | iou_types=iou_types, 19 | expected_results=expected_results, 20 | expected_results_sigma_tol=expected_results_sigma_tol, 21 | ) 22 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/datasets/evaluation/coco/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/data/datasets/evaluation/coco/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/datasets/evaluation/coco/__pycache__/coco_eval.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/data/datasets/evaluation/coco/__pycache__/coco_eval.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/datasets/evaluation/flickr/__init__.py: -------------------------------------------------------------------------------- 1 | from .flickr_eval import FlickrEvaluator 2 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/datasets/evaluation/flickr/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/data/datasets/evaluation/flickr/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/datasets/evaluation/flickr/__pycache__/flickr_eval.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/data/datasets/evaluation/flickr/__pycache__/flickr_eval.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/datasets/evaluation/lvis/_change_lvis_annotation.py: -------------------------------------------------------------------------------- 1 | path = "DATASET/coco/annotations/lvis_v1_minival.json" 2 | import json 3 | with open(path) as f: 4 | all = json.load(f) 5 | 6 | for i in all["images"]: 7 | i["file_name"] = "/".join(i["coco_url"].split("/")[-2:]) 8 | 9 | with open("DATASET/coco/annotations/lvis_v1_minival_inserted_image_name.json", "w") as f: 10 | json.dump(all, f) -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/datasets/evaluation/od_eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/data/datasets/evaluation/od_eval.py -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/datasets/evaluation/od_to_grounding/__init__.py: -------------------------------------------------------------------------------- 1 | from .od_eval import do_od_evaluation 2 | 3 | 4 | def od_to_grounding_evaluation( 5 | dataset, 6 | predictions, 7 | output_folder, 8 | box_only=False, 9 | iou_types=("bbox",), 10 | expected_results=(), 11 | expected_results_sigma_tol=4, ): 12 | return do_od_evaluation( 13 | dataset=dataset, 14 | predictions=predictions, 15 | box_only=box_only, 16 | output_folder=output_folder, 17 | iou_types=iou_types, 18 | expected_results=expected_results, 19 | expected_results_sigma_tol=expected_results_sigma_tol, 20 | ) 21 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/datasets/evaluation/od_to_grounding/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/data/datasets/evaluation/od_to_grounding/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/datasets/evaluation/od_to_grounding/__pycache__/od_eval.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/data/datasets/evaluation/od_to_grounding/__pycache__/od_eval.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/datasets/evaluation/vg/__init__.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from .vg_eval import do_vg_evaluation 4 | 5 | 6 | def vg_evaluation(dataset, predictions, output_folder, box_only, eval_attributes=False, **_): 7 | logger = logging.getLogger("maskrcnn_benchmark.inference") 8 | logger.info("performing vg evaluation, ignored iou_types.") 9 | return do_vg_evaluation( 10 | dataset=dataset, 11 | predictions=predictions, 12 | output_folder=output_folder, 13 | box_only=box_only, 14 | eval_attributes=eval_attributes, 15 | logger=logger, 16 | ) 17 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/datasets/evaluation/vg/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/data/datasets/evaluation/vg/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/datasets/evaluation/vg/__pycache__/vg_eval.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/data/datasets/evaluation/vg/__pycache__/vg_eval.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/datasets/evaluation/voc/__init__.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from .voc_eval import do_voc_evaluation 4 | 5 | 6 | def voc_evaluation(dataset, predictions, output_folder, box_only, **_): 7 | logger = logging.getLogger("maskrcnn_benchmark.inference") 8 | if box_only: 9 | logger.warning("voc evaluation doesn't support box_only, ignored.") 10 | logger.info("performing voc evaluation, ignored iou_types.") 11 | return do_voc_evaluation( 12 | dataset=dataset, 13 | predictions=predictions, 14 | output_folder=output_folder, 15 | logger=logger, 16 | ) 17 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/datasets/evaluation/voc/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/data/datasets/evaluation/voc/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/datasets/evaluation/voc/__pycache__/voc_eval.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/data/datasets/evaluation/voc/__pycache__/voc_eval.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/datasets/flickr.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torchvision 3 | import torch.utils.data as data 4 | from maskrcnn_benchmark.data.datasets.modulated_coco import ModulatedDataset 5 | 6 | 7 | class FlickrDataset(ModulatedDataset): 8 | pass 9 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/datasets/gqa.py: -------------------------------------------------------------------------------- 1 | import json 2 | from pathlib import Path 3 | 4 | import torch 5 | import torchvision 6 | 7 | from .modulated_coco import ConvertCocoPolysToMask, ModulatedDataset 8 | 9 | 10 | class GQADataset(ModulatedDataset): 11 | pass 12 | 13 | 14 | class GQAQuestionAnswering(torchvision.datasets.CocoDetection): 15 | def __init__(self, img_folder, ann_file, transforms, return_masks, return_tokens, tokenizer, ann_folder): 16 | super(GQAQuestionAnswering, self).__init__(img_folder, ann_file) 17 | self._transforms = transforms 18 | self.prepare = ConvertCocoPolysToMask(return_masks, return_tokens, tokenizer=tokenizer) 19 | with open(ann_folder / "gqa_answer2id.json", "r") as f: 20 | self.answer2id = json.load(f) 21 | with open(ann_folder / "gqa_answer2id_by_type.json", "r") as f: 22 | self.answer2id_by_type = json.load(f) 23 | self.type2id = {"obj": 0, "attr": 1, "rel": 2, "global": 3, "cat": 4} 24 | 25 | def __getitem__(self, idx): 26 | img, target = super(GQAQuestionAnswering, self).__getitem__(idx) 27 | image_id = self.ids[idx] 28 | coco_img = self.coco.loadImgs(image_id)[0] 29 | caption = coco_img["caption"] 30 | dataset_name = coco_img["dataset_name"] 31 | questionId = coco_img["questionId"] 32 | target = {"image_id": image_id, "annotations": target, "caption": caption} 33 | img, target = self.prepare(img, target) 34 | if self._transforms is not None: 35 | img, target = self._transforms(img, target) 36 | target["dataset_name"] = dataset_name 37 | target["questionId"] = questionId 38 | 39 | if coco_img["answer"] not in self.answer2id: 40 | answer = "unknown" 41 | else: 42 | answer = coco_img["answer"] 43 | 44 | target["answer"] = torch.as_tensor(self.answer2id[answer], dtype=torch.long) 45 | target["answer_type"] = torch.as_tensor(self.type2id[coco_img["question_type"]], dtype=torch.long) 46 | 47 | if coco_img["answer"] not in self.answer2id_by_type["answer_attr"]: 48 | answer = "unknown" 49 | else: 50 | answer = coco_img["answer"] 51 | target["answer_attr"] = torch.as_tensor( 52 | self.answer2id_by_type["answer_attr"][answer] if coco_img["question_type"] == "attr" else -100, 53 | dtype=torch.long, 54 | ) 55 | 56 | if coco_img["answer"] not in self.answer2id_by_type["answer_global"]: 57 | answer = "unknown" 58 | else: 59 | answer = coco_img["answer"] 60 | target["answer_global"] = torch.as_tensor( 61 | self.answer2id_by_type["answer_global"][answer] if coco_img["question_type"] == "global" else -100, 62 | dtype=torch.long, 63 | ) 64 | 65 | if coco_img["answer"] not in self.answer2id_by_type["answer_rel"]: 66 | answer = "unknown" 67 | else: 68 | answer = coco_img["answer"] 69 | target["answer_rel"] = torch.as_tensor( 70 | self.answer2id_by_type["answer_rel"][answer] if coco_img["question_type"] == "rel" else -100, 71 | dtype=torch.long, 72 | ) 73 | 74 | if coco_img["answer"] not in self.answer2id_by_type["answer_cat"]: 75 | answer = "unknown" 76 | else: 77 | answer = coco_img["answer"] 78 | target["answer_cat"] = torch.as_tensor( 79 | self.answer2id_by_type["answer_cat"][answer] if coco_img["question_type"] == "cat" else -100, 80 | dtype=torch.long, 81 | ) 82 | 83 | if coco_img["answer"] not in self.answer2id_by_type["answer_obj"]: 84 | answer = "unknown" 85 | else: 86 | answer = coco_img["answer"] 87 | target["answer_obj"] = torch.as_tensor( 88 | self.answer2id_by_type["answer_obj"][answer] if coco_img["question_type"] == "obj" else -100, 89 | dtype=torch.long, 90 | ) 91 | return img, target 92 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/datasets/imagenet.py: -------------------------------------------------------------------------------- 1 | import os 2 | import os.path 3 | import json 4 | from PIL import Image 5 | 6 | import torch.utils.data as data 7 | 8 | def pil_loader(path): 9 | # open path as file to avoid ResourceWarning (https://github.com/python-pillow/Pillow/issues/835) 10 | with open(path, 'rb') as f: 11 | img = Image.open(f) 12 | return img.convert('RGB') 13 | 14 | class ImageNet(data.Dataset): 15 | """ ImageNet 16 | 17 | Args: 18 | root (string): Root directory where images are downloaded to. 19 | annFile (string): Path to json annotation file. 20 | transform (callable, optional): A function/transform that takes in an PIL image 21 | and returns a transformed version. E.g, ``transforms.ToTensor`` 22 | """ 23 | 24 | def __init__(self, ann_file, root, remove_images_without_annotations=None, transforms=None): 25 | 26 | 27 | self.root = root 28 | self.transform = transforms 29 | 30 | meta_file = os.path.join(root, ann_file) 31 | assert os.path.exists(meta_file), 'meta file %s under root %s not found' % (os.path.basename(meta_file), root) 32 | 33 | with open(meta_file, 'r') as f: 34 | meta = json.load(f) 35 | 36 | self.classes = meta['classes'] 37 | self.class_to_idx = meta['class_to_idx'] 38 | self.samples = meta['samples'] 39 | self.num_sample = len(self.samples) 40 | self.allsamples = self.samples 41 | 42 | def select_class(self, cls): 43 | new_samples = [sample for sample in self.allsamples if sample[-1] in cls] 44 | self.samples = new_samples 45 | self.num_sample = len(self.samples) 46 | 47 | def __getitem__(self, index): 48 | """ 49 | Args: 50 | index (int): Index 51 | 52 | Returns: 53 | tuple: (sample, target) where target is class_index of the target class. 54 | """ 55 | img_path, target = self.samples[index] 56 | sample = pil_loader(self.root + '/' + img_path) 57 | if self.transform is not None: 58 | sample = self.transform(sample) 59 | 60 | return sample, target, index 61 | 62 | def __len__(self): 63 | return len(self.samples) -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/datasets/list_dataset.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | """ 3 | Simple dataset class that wraps a list of path names 4 | """ 5 | 6 | from PIL import Image 7 | 8 | from maskrcnn_benchmark.structures.bounding_box import BoxList 9 | 10 | 11 | class ListDataset(object): 12 | def __init__(self, image_lists, transforms=None): 13 | self.image_lists = image_lists 14 | self.transforms = transforms 15 | 16 | def __getitem__(self, item): 17 | img = Image.open(self.image_lists[item]).convert("RGB") 18 | 19 | # dummy target 20 | w, h = img.size 21 | target = BoxList([[0, 0, w, h]], img.size, mode="xyxy") 22 | 23 | if self.transforms is not None: 24 | img, target = self.transforms(img, target) 25 | 26 | return img, target 27 | 28 | def __len__(self): 29 | return len(self.image_lists) 30 | 31 | def get_img_info(self, item): 32 | """ 33 | Return the image dimensions for the image, without 34 | loading and pre-processing it 35 | """ 36 | pass 37 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/datasets/object365.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torchvision 3 | import torch.utils.data as data 4 | from maskrcnn_benchmark.data.datasets.coco_dt import CocoDetectionTSV 5 | 6 | 7 | class Object365DetectionTSV(CocoDetectionTSV): 8 | pass 9 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/datasets/phrasecut.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torchvision 3 | import torch.utils.data as data 4 | from maskrcnn_benchmark.data.datasets.modulated_coco import ModulatedDataset 5 | 6 | 7 | class PhrasecutDetection(ModulatedDataset): 8 | pass 9 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/datasets/refexp.py: -------------------------------------------------------------------------------- 1 | import copy 2 | from collections import defaultdict 3 | from pathlib import Path 4 | 5 | import torch 6 | import torch.utils.data 7 | 8 | import maskrcnn_benchmark.utils.dist as dist 9 | from maskrcnn_benchmark.layers.set_loss import generalized_box_iou 10 | 11 | from .modulated_coco import ModulatedDataset 12 | 13 | 14 | class RefExpDataset(ModulatedDataset): 15 | pass 16 | 17 | 18 | class RefExpEvaluator(object): 19 | def __init__(self, refexp_gt, iou_types, k=(1, 5, 10), thresh_iou=0.5): 20 | assert isinstance(k, (list, tuple)) 21 | refexp_gt = copy.deepcopy(refexp_gt) 22 | self.refexp_gt = refexp_gt 23 | self.iou_types = iou_types 24 | self.img_ids = self.refexp_gt.imgs.keys() 25 | self.predictions = {} 26 | self.k = k 27 | self.thresh_iou = thresh_iou 28 | 29 | def accumulate(self): 30 | pass 31 | 32 | def update(self, predictions): 33 | self.predictions.update(predictions) 34 | 35 | def synchronize_between_processes(self): 36 | all_predictions = dist.all_gather(self.predictions) 37 | merged_predictions = {} 38 | for p in all_predictions: 39 | merged_predictions.update(p) 40 | self.predictions = merged_predictions 41 | 42 | def summarize(self): 43 | if dist.is_main_process(): 44 | dataset2score = { 45 | "refcoco": {k: 0.0 for k in self.k}, 46 | "refcoco+": {k: 0.0 for k in self.k}, 47 | "refcocog": {k: 0.0 for k in self.k}, 48 | } 49 | dataset2count = {"refcoco": 0.0, "refcoco+": 0.0, "refcocog": 0.0} 50 | for image_id in self.img_ids: 51 | ann_ids = self.refexp_gt.getAnnIds(imgIds=image_id) 52 | assert len(ann_ids) == 1 53 | img_info = self.refexp_gt.loadImgs(image_id)[0] 54 | 55 | target = self.refexp_gt.loadAnns(ann_ids[0]) 56 | prediction = self.predictions[image_id] 57 | assert prediction is not None 58 | sorted_scores_boxes = sorted( 59 | zip(prediction["scores"].tolist(), prediction["boxes"].tolist()), reverse=True 60 | ) 61 | sorted_scores, sorted_boxes = zip(*sorted_scores_boxes) 62 | sorted_boxes = torch.cat([torch.as_tensor(x).view(1, 4) for x in sorted_boxes]) 63 | target_bbox = target[0]["bbox"] 64 | converted_bbox = [ 65 | target_bbox[0], 66 | target_bbox[1], 67 | target_bbox[2] + target_bbox[0], 68 | target_bbox[3] + target_bbox[1], 69 | ] 70 | giou = generalized_box_iou(sorted_boxes, torch.as_tensor(converted_bbox).view(-1, 4)) 71 | for k in self.k: 72 | if max(giou[:k]) >= self.thresh_iou: 73 | dataset2score[img_info["dataset_name"]][k] += 1.0 74 | dataset2count[img_info["dataset_name"]] += 1.0 75 | 76 | for key, value in dataset2score.items(): 77 | for k in self.k: 78 | try: 79 | value[k] /= dataset2count[key] 80 | except: 81 | pass 82 | results = {} 83 | for key, value in dataset2score.items(): 84 | results[key] = sorted([v for k, v in value.items()]) 85 | print(f" Dataset: {key} - Precision @ 1, 5, 10: {results[key]} \n") 86 | 87 | return results 88 | return None 89 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/samplers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from .distributed import DistributedSampler 3 | from .grouped_batch_sampler import GroupedBatchSampler 4 | from .iteration_based_batch_sampler import IterationBasedBatchSampler 5 | 6 | __all__ = ["DistributedSampler", "GroupedBatchSampler", "IterationBasedBatchSampler"] 7 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/samplers/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/data/samplers/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/samplers/__pycache__/distributed.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/data/samplers/__pycache__/distributed.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/samplers/__pycache__/grouped_batch_sampler.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/data/samplers/__pycache__/grouped_batch_sampler.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/samplers/__pycache__/iteration_based_batch_sampler.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/data/samplers/__pycache__/iteration_based_batch_sampler.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/samplers/distributed.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | # Code is copy-pasted exactly as in torch.utils.data.distributed. 3 | # FIXME remove this once c10d fixes the bug it has 4 | import math 5 | import torch 6 | import torch.distributed as dist 7 | from torch.utils.data.sampler import Sampler 8 | 9 | from maskrcnn_benchmark.utils.comm import shared_random_seed 10 | 11 | 12 | class DistributedSampler(Sampler): 13 | """Sampler that restricts data loading to a subset of the dataset. 14 | It is especially useful in conjunction with 15 | :class:`torch.nn.parallel.DistributedDataParallel`. In such case, each 16 | process can pass a DistributedSampler instance as a DataLoader sampler, 17 | and load a subset of the original dataset that is exclusive to it. 18 | .. note:: 19 | Dataset is assumed to be of constant size. 20 | Arguments: 21 | dataset: Dataset used for sampling. 22 | num_replicas (optional): Number of processes participating in 23 | distributed training. 24 | rank (optional): Rank of the current process within num_replicas. 25 | """ 26 | 27 | def __init__(self, dataset, num_replicas=None, rank=None, shuffle=True, use_random=False): 28 | if num_replicas is None: 29 | if not dist.is_available(): 30 | raise RuntimeError("Requires distributed package to be available") 31 | num_replicas = dist.get_world_size() 32 | if rank is None: 33 | if not dist.is_available(): 34 | raise RuntimeError("Requires distributed package to be available") 35 | rank = dist.get_rank() 36 | self.dataset = dataset 37 | self.num_replicas = num_replicas 38 | self.rank = rank 39 | self.epoch = 0 40 | self.num_samples = int(math.ceil(len(self.dataset) * 1.0 / self.num_replicas)) 41 | self.total_size = self.num_samples * self.num_replicas 42 | self.shuffle = shuffle 43 | self.use_random = use_random 44 | 45 | def __iter__(self): 46 | if self.shuffle: 47 | # deterministically shuffle based on epoch 48 | _seed = self.epoch 49 | if self.use_random: 50 | _seed = int(shared_random_seed()) 51 | g = torch.Generator() 52 | g.manual_seed(_seed) 53 | indices = torch.randperm(len(self.dataset), generator=g).tolist() 54 | else: 55 | indices = torch.arange(len(self.dataset)).tolist() 56 | 57 | # add extra samples to make it evenly divisible 58 | indices += indices[: (self.total_size - len(indices))] 59 | assert len(indices) == self.total_size 60 | 61 | # subsample 62 | offset = self.num_samples * self.rank 63 | indices = indices[offset : offset + self.num_samples] 64 | assert len(indices) == self.num_samples 65 | 66 | return iter(indices) 67 | 68 | def __len__(self): 69 | return self.num_samples 70 | 71 | def set_epoch(self, epoch): 72 | self.epoch = epoch 73 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/samplers/iteration_based_batch_sampler.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from torch.utils.data.sampler import BatchSampler 3 | 4 | 5 | class IterationBasedBatchSampler(BatchSampler): 6 | """ 7 | Wraps a BatchSampler, resampling from it until 8 | a specified number of iterations have been sampled 9 | """ 10 | 11 | def __init__(self, batch_sampler, num_iterations, start_iter=0): 12 | self.batch_sampler = batch_sampler 13 | self.num_iterations = num_iterations 14 | self.start_iter = start_iter 15 | 16 | def __iter__(self): 17 | iteration = self.start_iter 18 | while iteration <= self.num_iterations: 19 | # if the underlying sampler has a set_epoch method, like 20 | # DistributedSampler, used for making each process see 21 | # a different split of the dataset, then set it 22 | if hasattr(self.batch_sampler.sampler, "set_epoch"): 23 | self.batch_sampler.sampler.set_epoch(iteration) 24 | for batch in self.batch_sampler: 25 | iteration += 1 26 | if iteration > self.num_iterations: 27 | break 28 | yield batch 29 | 30 | def __len__(self): 31 | return self.num_iterations 32 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/transforms/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from .transforms import Compose 3 | from .transforms import Resize 4 | from .transforms import RandomHorizontalFlip 5 | from .transforms import ToTensor 6 | from .transforms import Normalize 7 | 8 | from .build import build_transforms 9 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/transforms/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/data/transforms/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/transforms/__pycache__/build.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/data/transforms/__pycache__/build.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/transforms/__pycache__/transforms.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/data/transforms/__pycache__/transforms.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/data/transforms/build.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from . import transforms as T 3 | 4 | 5 | def build_transforms(cfg, is_train=True): 6 | if is_train: 7 | if len(cfg.AUGMENT.MULT_MIN_SIZE_TRAIN)>0: 8 | min_size = cfg.AUGMENT.MULT_MIN_SIZE_TRAIN 9 | else: 10 | min_size = cfg.INPUT.MIN_SIZE_TRAIN 11 | max_size = cfg.INPUT.MAX_SIZE_TRAIN 12 | flip_horizontal_prob = cfg.AUGMENT.FLIP_PROB_TRAIN 13 | flip_vertical_prob = cfg.AUGMENT.VERTICAL_FLIP_PROB_TRAIN 14 | brightness = cfg.AUGMENT.BRIGHTNESS 15 | contrast = cfg.AUGMENT.CONTRAST 16 | saturation = cfg.AUGMENT.SATURATION 17 | hue = cfg.AUGMENT.HUE 18 | 19 | crop_prob = cfg.AUGMENT.CROP_PROB 20 | min_ious = cfg.AUGMENT.CROP_MIN_IOUS 21 | min_crop_size = cfg.AUGMENT.CROP_MIN_SIZE 22 | 23 | else: 24 | min_size = cfg.INPUT.MIN_SIZE_TEST 25 | max_size = cfg.INPUT.MAX_SIZE_TEST 26 | flip_horizontal_prob = 0.0 27 | 28 | fix_res = cfg.INPUT.FIX_RES 29 | if cfg.INPUT.FORMAT is not '': 30 | input_format = cfg.INPUT.FORMAT 31 | elif cfg.INPUT.TO_BGR255: 32 | input_format = 'bgr255' 33 | normalize_transform = T.Normalize( 34 | mean=cfg.INPUT.PIXEL_MEAN, std=cfg.INPUT.PIXEL_STD, format=input_format 35 | ) 36 | 37 | transform = T.Compose( 38 | [ 39 | T.Resize(min_size, max_size, restrict=fix_res), 40 | T.RandomHorizontalFlip(flip_horizontal_prob), 41 | T.ToTensor(), 42 | normalize_transform, 43 | ] 44 | ) 45 | return transform 46 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/engine/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/engine/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/engine/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/engine/__pycache__/inference.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/engine/__pycache__/inference.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/engine/__pycache__/inference_dfuc_vqa.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/engine/__pycache__/inference_dfuc_vqa.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/engine/__pycache__/inference_isbi_vqa.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/engine/__pycache__/inference_isbi_vqa.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/engine/__pycache__/inference_vqa.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/engine/__pycache__/inference_vqa.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/layers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | 4 | from .batch_norm import FrozenBatchNorm2d, NaiveSyncBatchNorm2d 5 | from .misc import Conv2d, _NewEmptyTensorOp 6 | from .misc import ConvTranspose2d 7 | from .misc import DFConv2d 8 | from .misc import interpolate 9 | from .misc import Scale 10 | from .nms import nms 11 | from .nms import ml_nms 12 | from .nms import soft_nms 13 | from .roi_align import ROIAlign 14 | from .roi_align import roi_align 15 | from .roi_align import ROIAlignV2 16 | from .roi_pool import ROIPool 17 | from .roi_pool import roi_pool 18 | from .smooth_l1_loss import smooth_l1_loss 19 | from .sigmoid_focal_loss import SigmoidFocalLoss, TokenSigmoidFocalLoss 20 | from .iou_loss import IOULoss, IOUWHLoss 21 | from .deform_conv import DeformConv, ModulatedDeformConv 22 | from .dropblock import DropBlock2D, DropBlock3D 23 | from .evonorm import EvoNorm2d 24 | from .dyrelu import DYReLU, swish 25 | from .se import SELayer, SEBlock 26 | from .dyhead import DyHead 27 | from .set_loss import HungarianMatcher, SetCriterion 28 | 29 | __all__ = ["nms", "ml_nms", "soft_nms", "roi_align", "ROIAlign", "roi_pool", "ROIPool", 30 | "smooth_l1_loss", "Conv2d", "ConvTranspose2d", "interpolate", "swish", 31 | "FrozenBatchNorm2d", "NaiveSyncBatchNorm2d", "SigmoidFocalLoss", "TokenSigmoidFocalLoss", "IOULoss", 32 | "IOUWHLoss", "Scale", "DeformConv", "ModulatedDeformConv", "DyHead", 33 | "DropBlock2D", "DropBlock3D", "EvoNorm2d", "DYReLU", "SELayer", "SEBlock", 34 | "HungarianMatcher", "SetCriterion", "ROIAlignV2", "_NewEmptyTensorOp"] 35 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/layers/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/layers/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/layers/__pycache__/batch_norm.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/layers/__pycache__/batch_norm.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/layers/__pycache__/deform_conv.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/layers/__pycache__/deform_conv.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/layers/__pycache__/dropblock.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/layers/__pycache__/dropblock.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/layers/__pycache__/dyhead.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/layers/__pycache__/dyhead.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/layers/__pycache__/dyrelu.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/layers/__pycache__/dyrelu.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/layers/__pycache__/evonorm.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/layers/__pycache__/evonorm.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/layers/__pycache__/iou_loss.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/layers/__pycache__/iou_loss.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/layers/__pycache__/misc.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/layers/__pycache__/misc.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/layers/__pycache__/nms.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/layers/__pycache__/nms.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/layers/__pycache__/roi_align.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/layers/__pycache__/roi_align.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/layers/__pycache__/roi_pool.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/layers/__pycache__/roi_pool.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/layers/__pycache__/se.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/layers/__pycache__/se.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/layers/__pycache__/set_loss.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/layers/__pycache__/set_loss.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/layers/__pycache__/sigmoid_focal_loss.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/layers/__pycache__/sigmoid_focal_loss.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/layers/__pycache__/smooth_l1_loss.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/layers/__pycache__/smooth_l1_loss.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/layers/evonorm.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | 5 | class EvoNorm2d(nn.Module): 6 | __constants__ = ['num_features', 'eps', 'nonlinearity'] 7 | 8 | def __init__(self, num_features, eps=1e-5, nonlinearity=True, group=32): 9 | super(EvoNorm2d, self).__init__() 10 | 11 | self.num_features = num_features 12 | self.eps = eps 13 | self.nonlinearity = nonlinearity 14 | self.group = group 15 | 16 | self.weight = nn.Parameter(torch.Tensor(1, num_features, 1, 1)) 17 | self.bias = nn.Parameter(torch.Tensor(1, num_features, 1, 1)) 18 | if self.nonlinearity: 19 | self.v = nn.Parameter(torch.Tensor(1, num_features, 1, 1)) 20 | 21 | self.reset_parameters() 22 | 23 | def reset_parameters(self): 24 | nn.init.ones_(self.weight) 25 | nn.init.zeros_(self.bias) 26 | if self.nonlinearity: 27 | nn.init.ones_(self.v) 28 | 29 | def group_std(self, x, groups=32): 30 | N, C, H, W = x.shape 31 | x = torch.reshape(x, (N, groups, C // groups, H, W)) 32 | std = torch.std(x, (3, 4), keepdim=True) 33 | return torch.reshape(std + self.eps, (N, C, 1, 1)) 34 | 35 | def forward(self, x): 36 | if self.nonlinearity: 37 | num = x * torch.sigmoid(self.v * x) 38 | return num / self.group_std(x, self.group) * self.weight + self.bias 39 | else: 40 | return x * self.weight + self.bias -------------------------------------------------------------------------------- /maskrcnn_benchmark/layers/iou_loss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | 4 | 5 | class IOULoss(nn.Module): 6 | def __init__(self, loss_type="iou"): 7 | super(IOULoss, self).__init__() 8 | self.loss_type = loss_type 9 | 10 | def forward(self, pred, target, weight=None): 11 | pred_left = pred[:, 0] 12 | pred_top = pred[:, 1] 13 | pred_right = pred[:, 2] 14 | pred_bottom = pred[:, 3] 15 | 16 | target_left = target[:, 0] 17 | target_top = target[:, 1] 18 | target_right = target[:, 2] 19 | target_bottom = target[:, 3] 20 | 21 | target_area = (target_left + target_right) * \ 22 | (target_top + target_bottom) 23 | pred_area = (pred_left + pred_right) * \ 24 | (pred_top + pred_bottom) 25 | 26 | w_intersect = torch.min(pred_left, target_left) + torch.min(pred_right, target_right) 27 | g_w_intersect = torch.max(pred_left, target_left) + torch.max( 28 | pred_right, target_right) 29 | h_intersect = torch.min(pred_bottom, target_bottom) + torch.min(pred_top, target_top) 30 | g_h_intersect = torch.max(pred_bottom, target_bottom) + torch.max(pred_top, target_top) 31 | ac_uion = g_w_intersect * g_h_intersect + 1e-7 32 | area_intersect = w_intersect * h_intersect 33 | area_union = target_area + pred_area - area_intersect 34 | ious = (area_intersect + 1.0) / (area_union + 1.0) 35 | gious = ious - (ac_uion - area_union) / ac_uion 36 | if self.loss_type == 'iou': 37 | losses = -torch.log(ious) 38 | elif self.loss_type == 'linear_iou': 39 | losses = 1 - ious 40 | elif self.loss_type == 'giou': 41 | losses = 1 - gious 42 | else: 43 | raise NotImplementedError 44 | 45 | if weight is not None and weight.sum() > 0: 46 | return (losses * weight).sum() 47 | else: 48 | assert losses.numel() != 0 49 | return losses.sum() 50 | 51 | 52 | class IOUWHLoss(nn.Module): # used for anchor guiding 53 | def __init__(self, reduction='none'): 54 | super(IOUWHLoss, self).__init__() 55 | self.reduction = reduction 56 | 57 | def forward(self, pred, target): 58 | orig_shape = pred.shape 59 | pred = pred.view(-1, 4) 60 | target = target.view(-1, 4) 61 | target[:, :2] = 0 62 | tl = torch.max((target[:, :2] - pred[:, 2:] / 2), 63 | (target[:, :2] - target[:, 2:] / 2)) 64 | 65 | br = torch.min((target[:, :2] + pred[:, 2:] / 2), 66 | (target[:, :2] + target[:, 2:] / 2)) 67 | 68 | area_p = torch.prod(pred[:, 2:], 1) 69 | area_g = torch.prod(target[:, 2:], 1) 70 | 71 | en = (tl < br).type(tl.type()).prod(dim=1) 72 | area_i = torch.prod(br - tl, 1) * en 73 | U = area_p + area_g - area_i + 1e-16 74 | iou = area_i / U 75 | 76 | loss = 1 - iou ** 2 77 | if self.reduction == 'mean': 78 | loss = loss.mean() 79 | elif self.reduction == 'sum': 80 | loss = loss.sum() 81 | 82 | return loss 83 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/layers/nms.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from maskrcnn_benchmark import _C 3 | 4 | try: 5 | import torchvision 6 | from torchvision.ops import nms 7 | except: 8 | nms = _C.nms 9 | 10 | ml_nms = _C.ml_nms 11 | soft_nms = _C.soft_nms 12 | 13 | # nms.__doc__ = """ 14 | # This function performs Non-maximum suppresion""" 15 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/layers/roi_align.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | from torch import nn 4 | from torch.autograd import Function 5 | from torch.autograd.function import once_differentiable 6 | from torch.nn.modules.utils import _pair 7 | 8 | from maskrcnn_benchmark import _C 9 | 10 | class _ROIAlign(Function): 11 | @staticmethod 12 | def forward(ctx, input, roi, output_size, spatial_scale, sampling_ratio): 13 | ctx.save_for_backward(roi) 14 | ctx.output_size = _pair(output_size) 15 | ctx.spatial_scale = spatial_scale 16 | ctx.sampling_ratio = sampling_ratio 17 | ctx.input_shape = input.size() 18 | output = _C.roi_align_forward( 19 | input, roi, spatial_scale, output_size[0], output_size[1], sampling_ratio 20 | ) 21 | return output 22 | 23 | @staticmethod 24 | @once_differentiable 25 | def backward(ctx, grad_output): 26 | rois, = ctx.saved_tensors 27 | output_size = ctx.output_size 28 | spatial_scale = ctx.spatial_scale 29 | sampling_ratio = ctx.sampling_ratio 30 | bs, ch, h, w = ctx.input_shape 31 | grad_input = _C.roi_align_backward( 32 | grad_output, 33 | rois, 34 | spatial_scale, 35 | output_size[0], 36 | output_size[1], 37 | bs, 38 | ch, 39 | h, 40 | w, 41 | sampling_ratio, 42 | ) 43 | return grad_input, None, None, None, None 44 | 45 | try: 46 | import torchvision 47 | from torchvision.ops import roi_align 48 | except: 49 | roi_align = _ROIAlign.apply 50 | 51 | class ROIAlign(nn.Module): 52 | def __init__(self, output_size, spatial_scale, sampling_ratio): 53 | super(ROIAlign, self).__init__() 54 | self.output_size = output_size 55 | self.spatial_scale = spatial_scale 56 | self.sampling_ratio = sampling_ratio 57 | 58 | def forward(self, input, rois): 59 | return roi_align( 60 | input, rois, self.output_size, self.spatial_scale, self.sampling_ratio 61 | ) 62 | 63 | def __repr__(self): 64 | tmpstr = self.__class__.__name__ + "(" 65 | tmpstr += "output_size=" + str(self.output_size) 66 | tmpstr += ", spatial_scale=" + str(self.spatial_scale) 67 | tmpstr += ", sampling_ratio=" + str(self.sampling_ratio) 68 | tmpstr += ")" 69 | return tmpstr 70 | 71 | class ROIAlignV2(nn.Module): 72 | def __init__(self, output_size, spatial_scale, sampling_ratio): 73 | super(ROIAlignV2, self).__init__() 74 | self.output_size = output_size 75 | self.spatial_scale = spatial_scale 76 | self.sampling_ratio = sampling_ratio 77 | 78 | def forward(self, input, rois): 79 | return torchvision.ops.roi_align( 80 | input, rois, self.output_size, self.spatial_scale, self.sampling_ratio, aligned=True 81 | ) 82 | 83 | def __repr__(self): 84 | tmpstr = self.__class__.__name__ + "(" 85 | tmpstr += "output_size=" + str(self.output_size) 86 | tmpstr += ", spatial_scale=" + str(self.spatial_scale) 87 | tmpstr += ", sampling_ratio=" + str(self.sampling_ratio) 88 | tmpstr += ")" 89 | return tmpstr 90 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/layers/roi_pool.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | from torch import nn 4 | from torch.autograd import Function 5 | from torch.autograd.function import once_differentiable 6 | from torch.nn.modules.utils import _pair 7 | 8 | from maskrcnn_benchmark import _C 9 | 10 | 11 | class _ROIPool(Function): 12 | @staticmethod 13 | def forward(ctx, input, roi, output_size, spatial_scale): 14 | ctx.output_size = _pair(output_size) 15 | ctx.spatial_scale = spatial_scale 16 | ctx.input_shape = input.size() 17 | output, argmax = _C.roi_pool_forward( 18 | input, roi, spatial_scale, output_size[0], output_size[1] 19 | ) 20 | ctx.save_for_backward(input, roi, argmax) 21 | return output 22 | 23 | @staticmethod 24 | @once_differentiable 25 | def backward(ctx, grad_output): 26 | input, rois, argmax = ctx.saved_tensors 27 | output_size = ctx.output_size 28 | spatial_scale = ctx.spatial_scale 29 | bs, ch, h, w = ctx.input_shape 30 | grad_input = _C.roi_pool_backward( 31 | grad_output, 32 | input, 33 | rois, 34 | argmax, 35 | spatial_scale, 36 | output_size[0], 37 | output_size[1], 38 | bs, 39 | ch, 40 | h, 41 | w, 42 | ) 43 | return grad_input, None, None, None 44 | 45 | 46 | roi_pool = _ROIPool.apply 47 | 48 | 49 | class ROIPool(nn.Module): 50 | def __init__(self, output_size, spatial_scale): 51 | super(ROIPool, self).__init__() 52 | self.output_size = output_size 53 | self.spatial_scale = spatial_scale 54 | 55 | def forward(self, input, rois): 56 | return roi_pool(input, rois, self.output_size, self.spatial_scale) 57 | 58 | def __repr__(self): 59 | tmpstr = self.__class__.__name__ + "(" 60 | tmpstr += "output_size=" + str(self.output_size) 61 | tmpstr += ", spatial_scale=" + str(self.spatial_scale) 62 | tmpstr += ")" 63 | return tmpstr 64 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/layers/se.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | 3 | 4 | class SELayer(nn.Module): 5 | def __init__(self, channel, reduction=16): 6 | super(SELayer, self).__init__() 7 | self.avg_pool = nn.AdaptiveAvgPool2d(1) 8 | self.fc = nn.Sequential( 9 | nn.Linear(channel, channel // reduction, bias=False), 10 | nn.ReLU(inplace=True), 11 | nn.Linear(channel // reduction, channel, bias=False), 12 | nn.Sigmoid() 13 | ) 14 | 15 | def forward(self, x): 16 | b, c, _, _ = x.size() 17 | y = self.avg_pool(x).view(b, c) 18 | y = self.fc(y).view(b, c, 1, 1) 19 | return x * y.expand_as(x) 20 | 21 | 22 | class SEBlock(nn.Module): 23 | def __init__(self, channels, reduction=16, 24 | use_conv=True, mid_activation=nn.ReLU(inplace=True), out_activation=nn.Sigmoid()): 25 | super(SEBlock, self).__init__() 26 | self.use_conv = use_conv 27 | mid_channels = channels // reduction 28 | 29 | self.pool = nn.AdaptiveAvgPool2d(output_size=1) 30 | if use_conv: 31 | self.conv1 = nn.Conv2d(channels, mid_channels, kernel_size=1, bias=True) 32 | else: 33 | self.fc1 = nn.Linear(channels, mid_channels) 34 | self.activ = mid_activation 35 | if use_conv: 36 | self.conv2 = nn.Conv2d(mid_channels, channels, kernel_size=1, bias=True) 37 | else: 38 | self.fc2 = nn.Linear(mid_channels, channels) 39 | self.sigmoid = out_activation 40 | 41 | def forward(self, x): 42 | w = self.pool(x) 43 | if not self.use_conv: 44 | w = w.view(x.size(0), -1) 45 | w = self.conv1(w) if self.use_conv else self.fc1(w) 46 | w = self.activ(w) 47 | w = self.conv2(w) if self.use_conv else self.fc2(w) 48 | w = self.sigmoid(w) 49 | if not self.use_conv: 50 | w = w.unsqueeze(2).unsqueeze(3) 51 | x = x * w 52 | return x -------------------------------------------------------------------------------- /maskrcnn_benchmark/layers/smooth_l1_loss.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | 4 | 5 | # TODO maybe push this to nn? 6 | def smooth_l1_loss(input, target, beta=1. / 9, size_average=True): 7 | """ 8 | very similar to the smooth_l1_loss from pytorch, but with 9 | the extra beta parameter 10 | """ 11 | n = torch.abs(input - target) 12 | cond = n < beta 13 | loss = torch.where(cond, 0.5 * n ** 2 / beta, n - 0.5 * beta) 14 | if size_average: 15 | return loss.mean() 16 | return loss.sum() 17 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/modeling/.DS_Store -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/modeling/__init__.py -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/modeling/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/__pycache__/balanced_positive_negative_sampler.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/modeling/__pycache__/balanced_positive_negative_sampler.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/__pycache__/box_coder.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/modeling/__pycache__/box_coder.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/__pycache__/make_layers.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/modeling/__pycache__/make_layers.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/__pycache__/matcher.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/modeling/__pycache__/matcher.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/__pycache__/poolers.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/modeling/__pycache__/poolers.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/__pycache__/registry.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/modeling/__pycache__/registry.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/__pycache__/utils.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/modeling/__pycache__/utils.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/backbone/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/modeling/backbone/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/backbone/__pycache__/bifpn.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/modeling/backbone/__pycache__/bifpn.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/backbone/__pycache__/efficientdet.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/modeling/backbone/__pycache__/efficientdet.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/backbone/__pycache__/efficientnet.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/modeling/backbone/__pycache__/efficientnet.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/backbone/__pycache__/fbnet.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/modeling/backbone/__pycache__/fbnet.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/backbone/__pycache__/fpn.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/modeling/backbone/__pycache__/fpn.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/backbone/__pycache__/resnet.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/modeling/backbone/__pycache__/resnet.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/backbone/__pycache__/swint.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/modeling/backbone/__pycache__/swint.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/backbone/__pycache__/swint_v2.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/modeling/backbone/__pycache__/swint_v2.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/backbone/__pycache__/swint_v2_vl.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/modeling/backbone/__pycache__/swint_v2_vl.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/backbone/__pycache__/swint_vl.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/modeling/backbone/__pycache__/swint_vl.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/backbone/mixer.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | 4 | class MixedOperationRandom(nn.Module): 5 | def __init__(self, search_ops): 6 | super(MixedOperationRandom, self).__init__() 7 | self.ops = nn.ModuleList(search_ops) 8 | self.num_ops = len(search_ops) 9 | 10 | def forward(self, x, x_path=None): 11 | if x_path is None: 12 | output = sum(op(x) for op in self.ops) / self.num_ops 13 | else: 14 | assert isinstance(x_path, (int, float)) and 0 <= x_path < self.num_ops or isinstance(x_path, torch.Tensor) 15 | if isinstance(x_path, (int, float)): 16 | x_path = int(x_path) 17 | assert 0 <= x_path < self.num_ops 18 | output = self.ops[x_path](x) 19 | elif isinstance(x_path, torch.Tensor): 20 | assert x_path.size(0) == x.size(0), 'batch_size should match length of y_idx' 21 | output = torch.cat([self.ops[int(x_path[i].item())](x.narrow(0, i, 1)) 22 | for i in range(x.size(0))], dim=0) 23 | return output -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/backbone/ops.py: -------------------------------------------------------------------------------- 1 | import math 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | 6 | 7 | def conv7x7(in_planes, out_planes, stride=1, groups=1, dilation=1): 8 | """7x7 convolution with padding""" 9 | return nn.Conv2d(in_planes, out_planes, kernel_size=7, stride=stride, 10 | padding=3*dilation, groups=groups, bias=False, dilation=dilation) 11 | 12 | 13 | def conv5x5(in_planes, out_planes, stride=1, groups=1, dilation=1): 14 | """5x5 convolution with padding""" 15 | return nn.Conv2d(in_planes, out_planes, kernel_size=5, stride=stride, 16 | padding=2*dilation, groups=groups, bias=False, dilation=dilation) 17 | 18 | 19 | def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1): 20 | """3x3 convolution with padding""" 21 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, 22 | padding=dilation, groups=groups, bias=False, dilation=dilation) 23 | 24 | 25 | def conv1x1(in_planes, out_planes, stride=1): 26 | """1x1 convolution""" 27 | return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False) 28 | 29 | 30 | def maxpool(**kwargs): 31 | return nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 32 | 33 | 34 | def avgpool(**kwargs): 35 | return nn.AvgPool2d(kernel_size=3, stride=2, padding=1) 36 | 37 | def dropout(prob): 38 | return nn.Dropout(prob) 39 | 40 | 41 | conv3x3sep = lambda i, o, s=1: conv3x3(i, o, s, groups=i) 42 | conv3x3g2 = lambda i, o, s=1: conv3x3(i, o, s, groups=2) 43 | conv3x3g4 = lambda i, o, s=1: conv3x3(i, o, s, groups=4) 44 | conv3x3g8 = lambda i, o, s=1: conv3x3(i, o, s, groups=8) 45 | conv3x3dw = lambda i, o, s=1: conv3x3(i, o, s, groups=i) 46 | 47 | conv3x3d2 = lambda i, o, s=1: conv3x3(i, o, s, dilation=2) 48 | conv3x3d3 = lambda i, o, s=1: conv3x3(i, o, s, dilation=3) 49 | conv3x3d4 = lambda i, o, s=1: conv3x3(i, o, s, dilation=4) 50 | 51 | 52 | conv5x5sep = lambda i, o, s=1: conv5x5(i, o, s, groups=i) 53 | conv5x5g2 = lambda i, o, s=1: conv5x5(i, o, s, groups=2) 54 | conv5x5g4 = lambda i, o, s=1: conv5x5(i, o, s, groups=4) 55 | conv5x5g8 = lambda i, o, s=1: conv5x5(i, o, s, groups=8) 56 | conv5x5dw = lambda i, o, s=1: conv5x5(i, o, s, groups=i) 57 | 58 | 59 | conv5x5d2 = lambda i, o, s=1: conv5x5(i, o, s, dilation=2) 60 | conv5x5d3 = lambda i, o, s=1: conv5x5(i, o, s, dilation=3) 61 | conv5x5d4 = lambda i, o, s=1: conv5x5(i, o, s, dilation=4) 62 | 63 | conv7x7sep = lambda i, o, s=1: conv7x7(i, o, s, groups=i) 64 | conv7x7g2 = lambda i, o, s=1: conv7x7(i, o, s, groups=2) 65 | conv7x7g4 = lambda i, o, s=1: conv7x7(i, o, s, groups=4) 66 | conv7x7g8 = lambda i, o, s=1: conv7x7(i, o, s, groups=8) 67 | conv7x7dw = lambda i, o, s=1: conv7x7(i, o, s, groups=i) 68 | 69 | conv7x7d2 = lambda i, o, s=1: conv7x7(i, o, s, dilation=2) 70 | conv7x7d3 = lambda i, o, s=1: conv7x7(i, o, s, dilation=3) 71 | conv7x7d4 = lambda i, o, s=1: conv7x7(i, o, s, dilation=4) -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/balanced_positive_negative_sampler.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | 4 | 5 | class BalancedPositiveNegativeSampler(object): 6 | """ 7 | This class samples batches, ensuring that they contain a fixed proportion of positives 8 | """ 9 | 10 | def __init__(self, batch_size_per_image, positive_fraction): 11 | """ 12 | Arguments: 13 | batch_size_per_image (int): number of elements to be selected per image 14 | positive_fraction (float): percentace of positive elements per batch 15 | """ 16 | self.batch_size_per_image = batch_size_per_image 17 | self.positive_fraction = positive_fraction 18 | 19 | def __call__(self, matched_idxs): 20 | """ 21 | Arguments: 22 | matched idxs: list of tensors containing -1, 0 or positive values. 23 | Each tensor corresponds to a specific image. 24 | -1 values are ignored, 0 are considered as negatives and > 0 as 25 | positives. 26 | 27 | Returns: 28 | pos_idx (list[tensor]) 29 | neg_idx (list[tensor]) 30 | 31 | Returns two lists of binary masks for each image. 32 | The first list contains the positive elements that were selected, 33 | and the second list the negative example. 34 | """ 35 | pos_idx = [] 36 | neg_idx = [] 37 | for matched_idxs_per_image in matched_idxs: 38 | positive = torch.nonzero(matched_idxs_per_image >= 1).squeeze(1) 39 | negative = torch.nonzero(matched_idxs_per_image == 0).squeeze(1) 40 | 41 | num_pos = int(self.batch_size_per_image * self.positive_fraction) 42 | # protect against not enough positive examples 43 | num_pos = min(positive.numel(), num_pos) 44 | num_neg = self.batch_size_per_image - num_pos 45 | # protect against not enough negative examples 46 | num_neg = min(negative.numel(), num_neg) 47 | 48 | # randomly select positive and negative examples 49 | perm1 = torch.randperm(positive.numel(), device=positive.device)[:num_pos] 50 | perm2 = torch.randperm(negative.numel(), device=negative.device)[:num_neg] 51 | 52 | pos_idx_per_image = positive[perm1] 53 | neg_idx_per_image = negative[perm2] 54 | 55 | # create binary mask from indices 56 | pos_idx_per_image_mask = torch.zeros_like( 57 | matched_idxs_per_image, dtype=torch.bool 58 | ) 59 | neg_idx_per_image_mask = torch.zeros_like( 60 | matched_idxs_per_image, dtype=torch.bool 61 | ) 62 | pos_idx_per_image_mask[pos_idx_per_image] = 1 63 | neg_idx_per_image_mask[neg_idx_per_image] = 1 64 | 65 | pos_idx.append(pos_idx_per_image_mask) 66 | neg_idx.append(neg_idx_per_image_mask) 67 | 68 | return pos_idx, neg_idx 69 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/box_coder.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import math 3 | 4 | import torch 5 | 6 | 7 | class BoxCoder(object): 8 | """ 9 | This class encodes and decodes a set of bounding boxes into 10 | the representation used for training the regressors. 11 | """ 12 | 13 | def __init__(self, weights, bbox_xform_clip=math.log(1000. / 16)): 14 | """ 15 | Arguments: 16 | weights (4-element tuple) 17 | bbox_xform_clip (float) 18 | """ 19 | self.weights = weights 20 | self.bbox_xform_clip = bbox_xform_clip 21 | 22 | def encode(self, reference_boxes, proposals): 23 | """ 24 | Encode a set of proposals with respect to some 25 | reference boxes 26 | 27 | Arguments: 28 | reference_boxes (Tensor): reference boxes 29 | proposals (Tensor): boxes to be encoded 30 | """ 31 | 32 | TO_REMOVE = 1 # TODO remove 33 | ex_widths = proposals[:, 2] - proposals[:, 0] + TO_REMOVE 34 | ex_heights = proposals[:, 3] - proposals[:, 1] + TO_REMOVE 35 | ex_ctr_x = proposals[:, 0] + 0.5 * ex_widths 36 | ex_ctr_y = proposals[:, 1] + 0.5 * ex_heights 37 | 38 | gt_widths = reference_boxes[:, 2] - reference_boxes[:, 0] + TO_REMOVE 39 | gt_heights = reference_boxes[:, 3] - reference_boxes[:, 1] + TO_REMOVE 40 | gt_ctr_x = reference_boxes[:, 0] + 0.5 * gt_widths 41 | gt_ctr_y = reference_boxes[:, 1] + 0.5 * gt_heights 42 | 43 | wx, wy, ww, wh = self.weights 44 | targets_dx = wx * (gt_ctr_x - ex_ctr_x) / ex_widths 45 | targets_dy = wy * (gt_ctr_y - ex_ctr_y) / ex_heights 46 | targets_dw = ww * torch.log(gt_widths / ex_widths) 47 | targets_dh = wh * torch.log(gt_heights / ex_heights) 48 | 49 | targets = torch.stack((targets_dx, targets_dy, targets_dw, targets_dh), dim=1) 50 | return targets 51 | 52 | def decode(self, rel_codes, boxes): 53 | """ 54 | From a set of original boxes and encoded relative box offsets, 55 | get the decoded boxes. 56 | 57 | Arguments: 58 | rel_codes (Tensor): encoded boxes 59 | boxes (Tensor): reference boxes. 60 | """ 61 | 62 | boxes = boxes.to(rel_codes.dtype) 63 | 64 | TO_REMOVE = 1 # TODO remove 65 | widths = boxes[:, 2] - boxes[:, 0] + TO_REMOVE 66 | heights = boxes[:, 3] - boxes[:, 1] + TO_REMOVE 67 | ctr_x = boxes[:, 0] + 0.5 * widths 68 | ctr_y = boxes[:, 1] + 0.5 * heights 69 | 70 | wx, wy, ww, wh = self.weights 71 | dx = rel_codes[:, 0::4] / wx 72 | dy = rel_codes[:, 1::4] / wy 73 | dw = rel_codes[:, 2::4] / ww 74 | dh = rel_codes[:, 3::4] / wh 75 | 76 | # Prevent sending too large values into torch.exp() 77 | dw = torch.clamp(dw, max=self.bbox_xform_clip) 78 | dh = torch.clamp(dh, max=self.bbox_xform_clip) 79 | 80 | pred_ctr_x = dx * widths[:, None] + ctr_x[:, None] 81 | pred_ctr_y = dy * heights[:, None] + ctr_y[:, None] 82 | pred_w = torch.exp(dw) * widths[:, None] 83 | pred_h = torch.exp(dh) * heights[:, None] 84 | 85 | pred_boxes = torch.zeros_like(rel_codes) 86 | # x1 87 | pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * pred_w 88 | # y1 89 | pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * pred_h 90 | # x2 (note: "- 1" is correct; don't be fooled by the asymmetry) 91 | pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * pred_w - 1 92 | # y2 (note: "- 1" is correct; don't be fooled by the asymmetry) 93 | pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * pred_h - 1 94 | 95 | return pred_boxes 96 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/detector/__init__.py: -------------------------------------------------------------------------------- 1 | from .generalized_rcnn import GeneralizedRCNN 2 | from .generalized_vl_rcnn import GeneralizedVLRCNN 3 | 4 | _DETECTION_META_ARCHITECTURES = {"GeneralizedRCNN": GeneralizedRCNN, 5 | "GeneralizedVLRCNN": GeneralizedVLRCNN 6 | } 7 | 8 | 9 | def build_detection_model(cfg): 10 | meta_arch = _DETECTION_META_ARCHITECTURES[cfg.MODEL.META_ARCHITECTURE] 11 | return meta_arch(cfg) 12 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/detector/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/modeling/detector/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/detector/__pycache__/generalized_rcnn.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/modeling/detector/__pycache__/generalized_rcnn.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/detector/__pycache__/generalized_vl_rcnn.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/modeling/detector/__pycache__/generalized_vl_rcnn.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/language_backbone/__init__.py: -------------------------------------------------------------------------------- 1 | from .backbone import build_backbone as build_language_backbone 2 | from .build import build_tokenizer 3 | 4 | from .hfpt_tokenizer import HFPTTokenizer 5 | from .simple_tokenizer import SimpleTokenizer 6 | from .clip_model import CLIPTransformer 7 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/language_backbone/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/modeling/language_backbone/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/language_backbone/__pycache__/backbone.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/modeling/language_backbone/__pycache__/backbone.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/language_backbone/__pycache__/bert_model.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/modeling/language_backbone/__pycache__/bert_model.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/language_backbone/__pycache__/build.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/modeling/language_backbone/__pycache__/build.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/language_backbone/__pycache__/clip_model.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/modeling/language_backbone/__pycache__/clip_model.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/language_backbone/__pycache__/hfpt_tokenizer.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/modeling/language_backbone/__pycache__/hfpt_tokenizer.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/language_backbone/__pycache__/rnn_model.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/modeling/language_backbone/__pycache__/rnn_model.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/language_backbone/__pycache__/simple_tokenizer.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/modeling/language_backbone/__pycache__/simple_tokenizer.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/language_backbone/__pycache__/word_utils.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/modeling/language_backbone/__pycache__/word_utils.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/language_backbone/backbone.py: -------------------------------------------------------------------------------- 1 | from collections import OrderedDict 2 | import torch 3 | from torch import nn 4 | 5 | from maskrcnn_benchmark.modeling import registry 6 | from . import bert_model 7 | from . import rnn_model 8 | from . import clip_model 9 | from . import word_utils 10 | 11 | 12 | @registry.LANGUAGE_BACKBONES.register("bert-base-uncased") 13 | def build_bert_backbone(cfg): 14 | body = bert_model.BertEncoder(cfg) 15 | model = nn.Sequential(OrderedDict([("body", body)])) 16 | return model 17 | 18 | 19 | @registry.LANGUAGE_BACKBONES.register("roberta-base") 20 | def build_bert_backbone(cfg): 21 | body = bert_model.BertEncoder(cfg) 22 | model = nn.Sequential(OrderedDict([("body", body)])) 23 | return model 24 | 25 | 26 | @registry.LANGUAGE_BACKBONES.register("rnn") 27 | def build_rnn_backbone(cfg): 28 | body = rnn_model.RNNEnoder(cfg) 29 | model = nn.Sequential(OrderedDict([("body", body)])) 30 | return model 31 | 32 | 33 | @registry.LANGUAGE_BACKBONES.register("clip") 34 | def build_clip_backbone(cfg): 35 | body = clip_model.CLIPTransformer(cfg) 36 | model = nn.Sequential(OrderedDict([("body", body)])) 37 | return model 38 | 39 | 40 | def build_backbone(cfg): 41 | assert cfg.MODEL.LANGUAGE_BACKBONE.MODEL_TYPE in registry.LANGUAGE_BACKBONES, \ 42 | "cfg.MODEL.LANGUAGE_BACKBONE.TYPE: {} is not registered in registry".format( 43 | cfg.MODEL.LANGUAGE_BACKBONE.MODEL_TYPE 44 | ) 45 | return registry.LANGUAGE_BACKBONES[cfg.MODEL.LANGUAGE_BACKBONE.MODEL_TYPE](cfg) 46 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/language_backbone/bert_model.py: -------------------------------------------------------------------------------- 1 | from copy import deepcopy 2 | import numpy as np 3 | import torch 4 | from torch import nn 5 | 6 | # from pytorch_pretrained_bert.modeling import BertModel 7 | from transformers import BertConfig, RobertaConfig, RobertaModel, BertModel 8 | 9 | 10 | class BertEncoder(nn.Module): 11 | def __init__(self, cfg): 12 | super(BertEncoder, self).__init__() 13 | self.cfg = cfg 14 | self.bert_name = cfg.MODEL.LANGUAGE_BACKBONE.MODEL_TYPE 15 | print("LANGUAGE BACKBONE USE GRADIENT CHECKPOINTING: ", self.cfg.MODEL.LANGUAGE_BACKBONE.USE_CHECKPOINT) 16 | 17 | if self.bert_name == "bert-base-uncased": 18 | config = BertConfig.from_pretrained(self.bert_name) 19 | config.gradient_checkpointing = self.cfg.MODEL.LANGUAGE_BACKBONE.USE_CHECKPOINT 20 | self.model = BertModel.from_pretrained(self.bert_name, add_pooling_layer=False, config=config) 21 | self.language_dim = 768 22 | elif self.bert_name == "roberta-base": 23 | config = RobertaConfig.from_pretrained(self.bert_name) 24 | config.gradient_checkpointing = self.cfg.MODEL.LANGUAGE_BACKBONE.USE_CHECKPOINT 25 | self.model = RobertaModel.from_pretrained(self.bert_name, add_pooling_layer=False, config=config) 26 | self.language_dim = 768 27 | else: 28 | raise NotImplementedError 29 | 30 | self.num_layers = cfg.MODEL.LANGUAGE_BACKBONE.N_LAYERS 31 | 32 | def forward(self, x): 33 | input = x["input_ids"] 34 | mask = x["attention_mask"] 35 | 36 | if self.cfg.MODEL.DYHEAD.FUSE_CONFIG.USE_DOT_PRODUCT_TOKEN_LOSS: 37 | # with padding, always 256 38 | outputs = self.model( 39 | input_ids=input, 40 | attention_mask=mask, 41 | output_hidden_states=True, 42 | ) 43 | # outputs has 13 layers, 1 input layer and 12 hidden layers 44 | encoded_layers = outputs.hidden_states[1:] 45 | features = None 46 | features = torch.stack(encoded_layers[-self.num_layers:], 1).mean(1) 47 | 48 | # language embedding has shape [len(phrase), seq_len, language_dim] 49 | features = features / self.num_layers 50 | 51 | embedded = features * mask.unsqueeze(-1).float() 52 | aggregate = embedded.sum(1) / (mask.sum(-1).unsqueeze(-1).float()) 53 | 54 | else: 55 | # without padding, only consider positive_tokens 56 | max_len = (input != 0).sum(1).max().item() 57 | outputs = self.model( 58 | input_ids=input[:, :max_len], 59 | attention_mask=mask[:, :max_len], 60 | output_hidden_states=True, 61 | ) 62 | # outputs has 13 layers, 1 input layer and 12 hidden layers 63 | encoded_layers = outputs.hidden_states[1:] 64 | 65 | features = None 66 | features = torch.stack(encoded_layers[-self.num_layers:], 1).mean(1) 67 | # language embedding has shape [len(phrase), seq_len, language_dim] 68 | features = features / self.num_layers 69 | 70 | embedded = features * mask[:, :max_len].unsqueeze(-1).float() 71 | aggregate = embedded.sum(1) / (mask.sum(-1).unsqueeze(-1).float()) 72 | 73 | ret = { 74 | "aggregate": aggregate, 75 | "embedded": embedded, 76 | "masks": mask, 77 | "hidden": encoded_layers[-1] 78 | } 79 | return ret 80 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/language_backbone/bpe_simple_vocab_16e6.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/modeling/language_backbone/bpe_simple_vocab_16e6.txt.gz -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/language_backbone/build.py: -------------------------------------------------------------------------------- 1 | from .simple_tokenizer import SimpleTokenizer 2 | 3 | 4 | def build_tokenizer(tokenizer_name): 5 | tokenizer = None 6 | if tokenizer_name == 'clip': 7 | tokenizer = SimpleTokenizer() 8 | elif 'hf_' in tokenizer_name: 9 | from .hfpt_tokenizer import HFPTTokenizer 10 | 11 | tokenizer = HFPTTokenizer(pt_name=tokenizer_name[3:]) 12 | elif 'hfc_' in tokenizer_name: 13 | from .hfpt_tokenizer import HFPTTokenizer 14 | tokenizer = HFPTTokenizer(pt_name=tokenizer_name[4:]) 15 | else: 16 | raise ValueError('Unknown tokenizer') 17 | 18 | return tokenizer 19 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/language_backbone/hfpt_tokenizer.py: -------------------------------------------------------------------------------- 1 | from typing import Union, List 2 | 3 | from transformers import AutoTokenizer 4 | import torch 5 | 6 | 7 | class HFPTTokenizer(object): 8 | def __init__(self, pt_name=None): 9 | 10 | self.pt_name = pt_name 11 | self.added_sep_token = 0 12 | self.added_cls_token = 0 13 | self.enable_add_tokens = False 14 | self.gpt_special_case = ((not self.enable_add_tokens) and ('gpt' in self.pt_name)) 15 | 16 | if (pt_name is None): 17 | self.tokenizer = AutoTokenizer.from_pretrained('bert-base-cased') 18 | else: 19 | self.tokenizer = AutoTokenizer.from_pretrained(pt_name) 20 | 21 | # Adding tokens to GPT causing NaN training loss. 22 | # Disable for now until further investigation. 23 | if (self.enable_add_tokens): 24 | if (self.tokenizer.sep_token is None): 25 | self.tokenizer.add_special_tokens({'sep_token': ''}) 26 | self.added_sep_token = 1 27 | 28 | if (self.tokenizer.cls_token is None): 29 | self.tokenizer.add_special_tokens({'cls_token': ''}) 30 | self.added_cls_token = 1 31 | 32 | if (self.gpt_special_case): 33 | self.tokenizer.pad_token = self.tokenizer.eos_token 34 | self.tokenizer.sep_token = self.tokenizer.eos_token 35 | 36 | def get_eot_token(self): 37 | return self.tokenizer.encode(self.tokenizer.sep_token, add_special_tokens=False)[0] 38 | 39 | def get_sot_token(self): 40 | return self.tokenizer.encode(self.tokenizer.cls_token, add_special_tokens=False)[0] 41 | 42 | def get_eot_token_list(self): 43 | return self.tokenizer.encode(self.tokenizer.sep_token, add_special_tokens=False) 44 | 45 | def get_sot_token_list(self): 46 | return self.tokenizer.encode(self.tokenizer.cls_token, add_special_tokens=False) 47 | 48 | def get_tokenizer_obj(self): 49 | return self.tokenizer 50 | 51 | # Language model needs to know if new tokens 52 | # were added to the dictionary. 53 | def check_added_tokens(self): 54 | return self.added_sep_token + self.added_cls_token 55 | 56 | def tokenize(self, texts: Union[str, List[str]], context_length: int = 77): 57 | if isinstance(texts, str): 58 | texts = [texts] 59 | 60 | padding = 'max_length' 61 | 62 | seqstart = [] 63 | seqtok = [] 64 | seqend = [] 65 | 66 | max_length = context_length 67 | 68 | if (self.added_cls_token > 0): 69 | seqstart = self.get_sot_token_list() 70 | max_length = max_length - 1 71 | 72 | if (self.added_sep_token > 0): 73 | seqend = self.get_eot_token_list() 74 | max_length = max_length - 1 75 | 76 | tokens = self.tokenizer( 77 | texts, padding=padding, 78 | truncation=True, 79 | max_length=max_length 80 | )['input_ids'] 81 | 82 | for i in range(len(tokens)): 83 | tokens[i] = seqstart + tokens[i] + seqend 84 | 85 | if (self.gpt_special_case): 86 | for i in range(len(tokens)): 87 | tokens[i][-1] = self.get_eot_token() 88 | 89 | # print(str(tokens)) 90 | 91 | result = torch.Tensor(tokens).type(torch.LongTensor) 92 | 93 | return result 94 | 95 | def get_vocab_size(self): 96 | return self.tokenizer.vocab_size 97 | 98 | def __call__(self, texts: Union[str, List[str]], context_length: int = 77): 99 | return self.tokenize(texts, context_length) 100 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/language_backbone/test_clip_tokenizer.py: -------------------------------------------------------------------------------- 1 | from maskrcnn_benchmark.modeling.language_backbone import build_tokenizer 2 | 3 | if __name__ == '__main__': 4 | 5 | tokenizer2 = build_tokenizer("clip") 6 | tokenized2 = tokenizer2( 7 | ["Detectest : fishid. jellyfishioasod. penguinasd. puffin.asd shark. starfish. round stingray"]) 8 | print(tokenized2) 9 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/language_backbone/word_utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | Language-related data loading helper functions and class wrappers. 3 | """ 4 | 5 | import re 6 | import torch 7 | import codecs 8 | 9 | UNK_TOKEN = '' 10 | PAD_TOKEN = '' 11 | END_TOKEN = '' 12 | SENTENCE_SPLIT_REGEX = re.compile(r'(\W+)') 13 | 14 | 15 | class Dictionary(object): 16 | def __init__(self): 17 | self.word2idx = {} 18 | self.idx2word = [] 19 | 20 | def add_word(self, word): 21 | if word not in self.word2idx: 22 | self.idx2word.append(word) 23 | self.word2idx[word] = len(self.idx2word) - 1 24 | return self.word2idx[word] 25 | 26 | def __len__(self): 27 | return len(self.idx2word) 28 | 29 | def __getitem__(self, a): 30 | if isinstance(a, int): 31 | return self.idx2word[a] 32 | elif isinstance(a, list): 33 | return [self.idx2word[x] for x in a] 34 | elif isinstance(a, str): 35 | return self.word2idx[a] 36 | else: 37 | raise TypeError("Query word/index argument must be int or str") 38 | 39 | def __contains__(self, word): 40 | return word in self.word2idx 41 | 42 | 43 | class Corpus(object): 44 | def __init__(self): 45 | self.dictionary = Dictionary() 46 | 47 | def set_max_len(self, value): 48 | self.max_len = value 49 | 50 | def load_file(self, filename): 51 | with codecs.open(filename, 'r', 'utf-8') as f: 52 | for line in f: 53 | line = line.strip() 54 | self.add_to_corpus(line) 55 | self.dictionary.add_word(UNK_TOKEN) 56 | self.dictionary.add_word(PAD_TOKEN) 57 | 58 | def add_to_corpus(self, line): 59 | """Tokenizes a text line.""" 60 | # Add words to the dictionary 61 | words = line.split() 62 | # tokens = len(words) 63 | for word in words: 64 | word = word.lower() 65 | self.dictionary.add_word(word) 66 | 67 | def tokenize(self, line, max_len=20): 68 | # Tokenize line contents 69 | words = SENTENCE_SPLIT_REGEX.split(line.strip()) 70 | # words = [w.lower() for w in words if len(w) > 0] 71 | words = [w.lower() for w in words if (len(w) > 0 and w != ' ')] ## do not include space as a token 72 | 73 | if words[-1] == '.': 74 | words = words[:-1] 75 | 76 | if max_len > 0: 77 | if len(words) > max_len: 78 | words = words[:max_len] 79 | elif len(words) < max_len: 80 | # words = [PAD_TOKEN] * (max_len - len(words)) + words 81 | words = words + [END_TOKEN] + [PAD_TOKEN] * (max_len - len(words) - 1) 82 | 83 | tokens = len(words) ## for end token 84 | ids = torch.LongTensor(tokens) 85 | token = 0 86 | for word in words: 87 | if word not in self.dictionary: 88 | word = UNK_TOKEN 89 | # print(word, type(word), word.encode('ascii','ignore').decode('ascii'), type(word.encode('ascii','ignore').decode('ascii'))) 90 | if type(word) != type('a'): 91 | print(word, type(word), word.encode('ascii', 'ignore').decode('ascii'), 92 | type(word.encode('ascii', 'ignore').decode('ascii'))) 93 | word = word.encode('ascii', 'ignore').decode('ascii') 94 | ids[token] = self.dictionary[word] 95 | token += 1 96 | # ids[token] = self.dictionary[END_TOKEN] 97 | return ids 98 | 99 | def __len__(self): 100 | return len(self.dictionary) 101 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/make_layers.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | """ 3 | Miscellaneous utility functions 4 | """ 5 | 6 | import torch 7 | from torch import nn 8 | from torch.nn import functional as F 9 | from maskrcnn_benchmark.config import cfg 10 | from maskrcnn_benchmark.layers import Conv2d, DYReLU 11 | from maskrcnn_benchmark.modeling.poolers import Pooler 12 | 13 | 14 | def get_group_gn(dim, dim_per_gp, num_groups): 15 | """get number of groups used by GroupNorm, based on number of channels.""" 16 | assert dim_per_gp == -1 or num_groups == -1, \ 17 | "GroupNorm: can only specify G or C/G." 18 | 19 | if dim_per_gp > 0: 20 | assert dim % dim_per_gp == 0, \ 21 | "dim: {}, dim_per_gp: {}".format(dim, dim_per_gp) 22 | group_gn = dim // dim_per_gp 23 | else: 24 | assert dim % num_groups == 0, \ 25 | "dim: {}, num_groups: {}".format(dim, num_groups) 26 | group_gn = num_groups 27 | 28 | return group_gn 29 | 30 | 31 | def group_norm(out_channels, affine=True, divisor=1): 32 | out_channels = out_channels // divisor 33 | dim_per_gp = cfg.MODEL.GROUP_NORM.DIM_PER_GP // divisor 34 | num_groups = cfg.MODEL.GROUP_NORM.NUM_GROUPS // divisor 35 | eps = cfg.MODEL.GROUP_NORM.EPSILON # default: 1e-5 36 | return torch.nn.GroupNorm( 37 | get_group_gn(out_channels, dim_per_gp, num_groups), 38 | out_channels, 39 | eps, 40 | affine 41 | ) 42 | 43 | 44 | def make_conv3x3( 45 | in_channels, 46 | out_channels, 47 | dilation=1, 48 | stride=1, 49 | use_gn=False, 50 | use_relu=False, 51 | kaiming_init=True 52 | ): 53 | conv = Conv2d( 54 | in_channels, 55 | out_channels, 56 | kernel_size=3, 57 | stride=stride, 58 | padding=dilation, 59 | dilation=dilation, 60 | bias=False if use_gn else True 61 | ) 62 | if kaiming_init: 63 | nn.init.kaiming_normal_( 64 | conv.weight, mode="fan_out", nonlinearity="relu" 65 | ) 66 | else: 67 | torch.nn.init.normal_(conv.weight, std=0.01) 68 | if not use_gn: 69 | nn.init.constant_(conv.bias, 0) 70 | module = [conv,] 71 | if use_gn: 72 | module.append(group_norm(out_channels)) 73 | if use_relu: 74 | module.append(nn.ReLU(inplace=True)) 75 | if len(module) > 1: 76 | return nn.Sequential(*module) 77 | return conv 78 | 79 | 80 | def make_fc(dim_in, hidden_dim, use_gn=False): 81 | ''' 82 | Caffe2 implementation uses XavierFill, which in fact 83 | corresponds to kaiming_uniform_ in PyTorch 84 | ''' 85 | if use_gn: 86 | fc = nn.Linear(dim_in, hidden_dim, bias=False) 87 | nn.init.kaiming_uniform_(fc.weight, a=1) 88 | return nn.Sequential(fc, group_norm(hidden_dim)) 89 | fc = nn.Linear(dim_in, hidden_dim) 90 | nn.init.kaiming_uniform_(fc.weight, a=1) 91 | nn.init.constant_(fc.bias, 0) 92 | return fc 93 | 94 | 95 | def conv_with_kaiming_uniform(use_gn=False, use_relu=False, use_dyrelu=False): 96 | def make_conv( 97 | in_channels, out_channels, kernel_size, stride=1, dilation=1 98 | ): 99 | conv = Conv2d( 100 | in_channels, 101 | out_channels, 102 | kernel_size=kernel_size, 103 | stride=stride, 104 | padding=dilation * (kernel_size - 1) // 2, 105 | dilation=dilation, 106 | bias=False if use_gn else True 107 | ) 108 | # Caffe2 implementation uses XavierFill, which in fact 109 | # corresponds to kaiming_uniform_ in PyTorch 110 | nn.init.kaiming_uniform_(conv.weight, a=1) 111 | if not use_gn: 112 | nn.init.constant_(conv.bias, 0) 113 | module = [conv,] 114 | if use_gn: 115 | module.append(group_norm(out_channels)) 116 | if use_relu: 117 | module.append(nn.ReLU(inplace=True)) 118 | if use_dyrelu: 119 | module.append(DYReLU(out_channels, out_channels, use_spatial=True)) 120 | if len(module) > 1: 121 | return nn.Sequential(*module) 122 | return conv 123 | 124 | return make_conv 125 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/registry.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | 3 | from maskrcnn_benchmark.utils.registry import Registry 4 | 5 | BACKBONES = Registry() 6 | 7 | LANGUAGE_BACKBONES = Registry() 8 | 9 | ROI_BOX_FEATURE_EXTRACTORS = Registry() 10 | RPN_HEADS = Registry() 11 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/roi_heads/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | 4 | from .box_head.box_head import build_roi_box_head 5 | from .mask_head.mask_head import build_roi_mask_head 6 | from .keypoint_head.keypoint_head import build_roi_keypoint_head 7 | 8 | 9 | class CombinedROIHeads(torch.nn.ModuleDict): 10 | """ 11 | Combines a set of individual heads (for box prediction or masks) into a single 12 | head. 13 | """ 14 | 15 | def __init__(self, cfg, heads): 16 | super(CombinedROIHeads, self).__init__(heads) 17 | self.cfg = cfg.clone() 18 | if cfg.MODEL.MASK_ON and cfg.MODEL.ROI_MASK_HEAD.SHARE_BOX_FEATURE_EXTRACTOR: 19 | self.mask.feature_extractor = self.box.feature_extractor 20 | if cfg.MODEL.KEYPOINT_ON and cfg.MODEL.ROI_KEYPOINT_HEAD.SHARE_BOX_FEATURE_EXTRACTOR: 21 | self.keypoint.feature_extractor = self.box.feature_extractor 22 | 23 | def forward(self, features, proposals, targets=None, language_dict_features=None, positive_map_label_to_token=None): 24 | losses = {} 25 | detections = proposals 26 | if self.cfg.MODEL.BOX_ON: 27 | # TODO rename x to roi_box_features, if it doesn't increase memory consumption 28 | x, detections, loss_box = self.box(features, proposals, targets) 29 | losses.update(loss_box) 30 | 31 | if self.cfg.MODEL.MASK_ON: 32 | mask_features = features 33 | # optimization: during training, if we share the feature extractor between 34 | # the box and the mask heads, then we can reuse the features already computed 35 | if ( 36 | self.training 37 | and self.cfg.MODEL.ROI_MASK_HEAD.SHARE_BOX_FEATURE_EXTRACTOR 38 | ): 39 | mask_features = x 40 | # During training, self.box() will return the unaltered proposals as "detections" 41 | # this makes the API consistent during training and testing 42 | x, detections, loss_mask = self.mask( 43 | mask_features, detections, targets, 44 | language_dict_features=language_dict_features, 45 | positive_map_label_to_token=positive_map_label_to_token) 46 | losses.update(loss_mask) 47 | 48 | if self.cfg.MODEL.KEYPOINT_ON: 49 | keypoint_features = features 50 | # optimization: during training, if we share the feature extractor between 51 | # the box and the mask heads, then we can reuse the features already computed 52 | if ( 53 | self.training 54 | and self.cfg.MODEL.ROI_KEYPOINT_HEAD.SHARE_BOX_FEATURE_EXTRACTOR 55 | ): 56 | keypoint_features = x 57 | # During training, self.box() will return the unaltered proposals as "detections" 58 | # this makes the API consistent during training and testing 59 | x, detections, loss_keypoint = self.keypoint(keypoint_features, detections, targets) 60 | losses.update(loss_keypoint) 61 | return x, detections, losses 62 | 63 | 64 | def build_roi_heads(cfg): 65 | # individually create the heads, that will be combined together 66 | # afterwards 67 | # if cfg.MODEL.RPN_ONLY: 68 | # return None 69 | 70 | roi_heads = [] 71 | if cfg.MODEL.BOX_ON and not cfg.MODEL.RPN_ONLY: 72 | roi_heads.append(("box", build_roi_box_head(cfg))) 73 | if cfg.MODEL.MASK_ON: 74 | roi_heads.append(("mask", build_roi_mask_head(cfg))) 75 | if cfg.MODEL.KEYPOINT_ON: 76 | roi_heads.append(("keypoint", build_roi_keypoint_head(cfg))) 77 | 78 | # combine individual heads in a single module 79 | if roi_heads: 80 | roi_heads = CombinedROIHeads(cfg, roi_heads) 81 | else: 82 | roi_heads = None 83 | 84 | return roi_heads -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/roi_heads/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/modeling/roi_heads/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/roi_heads/box_head/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/modeling/roi_heads/box_head/__init__.py -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/roi_heads/box_head/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/modeling/roi_heads/box_head/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/roi_heads/box_head/__pycache__/box_head.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/modeling/roi_heads/box_head/__pycache__/box_head.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/roi_heads/box_head/__pycache__/inference.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/modeling/roi_heads/box_head/__pycache__/inference.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/roi_heads/box_head/__pycache__/loss.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/modeling/roi_heads/box_head/__pycache__/loss.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/roi_heads/box_head/__pycache__/roi_box_feature_extractors.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/modeling/roi_heads/box_head/__pycache__/roi_box_feature_extractors.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/roi_heads/box_head/__pycache__/roi_box_predictors.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/modeling/roi_heads/box_head/__pycache__/roi_box_predictors.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/roi_heads/box_head/box_head.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | from torch import nn 4 | 5 | from .roi_box_feature_extractors import make_roi_box_feature_extractor 6 | from .roi_box_predictors import make_roi_box_predictor 7 | from .inference import make_roi_box_post_processor 8 | from .loss import make_roi_box_loss_evaluator 9 | from maskrcnn_benchmark.utils.amp import custom_fwd, custom_bwd 10 | 11 | class ROIBoxHead(torch.nn.Module): 12 | """ 13 | Generic Box Head class. 14 | """ 15 | 16 | def __init__(self, cfg): 17 | super(ROIBoxHead, self).__init__() 18 | self.feature_extractor = make_roi_box_feature_extractor(cfg) 19 | self.predictor = make_roi_box_predictor(cfg) 20 | self.post_processor = make_roi_box_post_processor(cfg) 21 | self.loss_evaluator = make_roi_box_loss_evaluator(cfg) 22 | self.onnx = cfg.MODEL.ONNX 23 | 24 | @custom_fwd(cast_inputs=torch.float32) 25 | def forward(self, features, proposals, targets=None): 26 | """ 27 | Arguments: 28 | features (list[Tensor]): feature-maps from possibly several levels 29 | proposals (list[BoxList]): proposal boxes 30 | targets (list[BoxList], optional): the ground-truth targets. 31 | 32 | Returns: 33 | x (Tensor): the result of the feature extractor 34 | proposals (list[BoxList]): during training, the subsampled proposals 35 | are returned. During testing, the predicted boxlists are returned 36 | losses (dict[Tensor]): During training, returns the losses for the 37 | head. During testing, returns an empty dict. 38 | """ 39 | 40 | if self.training: 41 | # Faster R-CNN subsamples during training the proposals with a fixed 42 | # positive / negative ratio 43 | with torch.no_grad(): 44 | proposals = self.loss_evaluator.subsample(proposals, targets) 45 | 46 | # extract features that will be fed to the final classifier. The 47 | # feature_extractor generally corresponds to the pooler + heads 48 | x = self.feature_extractor(features, proposals) 49 | # final classifier that converts the features into predictions 50 | class_logits, box_regression = self.predictor(x) 51 | 52 | if self.onnx: 53 | return x, (class_logits, box_regression, [box.bbox for box in proposals]), {} 54 | 55 | if not self.training: 56 | result = self.post_processor((class_logits, box_regression), proposals) 57 | return x, result, {} 58 | 59 | loss_classifier, loss_box_reg = self.loss_evaluator( 60 | [class_logits], [box_regression] 61 | ) 62 | return ( 63 | x, 64 | proposals, 65 | dict(loss_classifier=loss_classifier, loss_box_reg=loss_box_reg), 66 | ) 67 | 68 | 69 | def build_roi_box_head(cfg): 70 | """ 71 | Constructs a new box head. 72 | By default, uses ROIBoxHead, but if it turns out not to be enough, just register a new class 73 | and make it a parameter in the config 74 | """ 75 | return ROIBoxHead(cfg) 76 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/roi_heads/box_head/roi_box_predictors.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from torch import nn 3 | 4 | 5 | class FastRCNNPredictor(nn.Module): 6 | def __init__(self, config, pretrained=None): 7 | super(FastRCNNPredictor, self).__init__() 8 | 9 | stage_index = 4 10 | stage2_relative_factor = 2 ** (stage_index - 1) 11 | res2_out_channels = config.MODEL.RESNETS.RES2_OUT_CHANNELS 12 | num_inputs = res2_out_channels * stage2_relative_factor 13 | 14 | num_classes = config.MODEL.ROI_BOX_HEAD.NUM_CLASSES 15 | self.avgpool = nn.AvgPool2d(kernel_size=7, stride=7) 16 | self.cls_score = nn.Linear(num_inputs, num_classes) 17 | self.bbox_pred = nn.Linear(num_inputs, num_classes * 4) 18 | 19 | nn.init.normal_(self.cls_score.weight, mean=0, std=0.01) 20 | nn.init.constant_(self.cls_score.bias, 0) 21 | 22 | nn.init.normal_(self.bbox_pred.weight, mean=0, std=0.001) 23 | nn.init.constant_(self.bbox_pred.bias, 0) 24 | 25 | def forward(self, x): 26 | x = self.avgpool(x) 27 | x = x.view(x.size(0), -1) 28 | cls_logit = self.cls_score(x) 29 | bbox_pred = self.bbox_pred(x) 30 | return cls_logit, bbox_pred 31 | 32 | 33 | class FPNPredictor(nn.Module): 34 | def __init__(self, cfg): 35 | super(FPNPredictor, self).__init__() 36 | num_classes = cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES 37 | representation_size = cfg.MODEL.ROI_BOX_HEAD.MLP_HEAD_DIM 38 | 39 | self.cls_score = nn.Linear(representation_size, num_classes) 40 | self.bbox_pred = nn.Linear(representation_size, num_classes * 4) 41 | 42 | nn.init.normal_(self.cls_score.weight, std=0.01) 43 | nn.init.normal_(self.bbox_pred.weight, std=0.001) 44 | for l in [self.cls_score, self.bbox_pred]: 45 | nn.init.constant_(l.bias, 0) 46 | 47 | def forward(self, x): 48 | scores = self.cls_score(x) 49 | bbox_deltas = self.bbox_pred(x) 50 | 51 | return scores, bbox_deltas 52 | 53 | 54 | _ROI_BOX_PREDICTOR = { 55 | "FastRCNNPredictor": FastRCNNPredictor, 56 | "FPNPredictor": FPNPredictor, 57 | } 58 | 59 | 60 | def make_roi_box_predictor(cfg): 61 | func = _ROI_BOX_PREDICTOR[cfg.MODEL.ROI_BOX_HEAD.PREDICTOR] 62 | return func(cfg) 63 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/roi_heads/keypoint_head/__pycache__/inference.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/modeling/roi_heads/keypoint_head/__pycache__/inference.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/roi_heads/keypoint_head/__pycache__/keypoint_head.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/modeling/roi_heads/keypoint_head/__pycache__/keypoint_head.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/roi_heads/keypoint_head/__pycache__/loss.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/modeling/roi_heads/keypoint_head/__pycache__/loss.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/roi_heads/keypoint_head/__pycache__/roi_keypoint_feature_extractors.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/modeling/roi_heads/keypoint_head/__pycache__/roi_keypoint_feature_extractors.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/roi_heads/keypoint_head/__pycache__/roi_keypoint_predictors.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/modeling/roi_heads/keypoint_head/__pycache__/roi_keypoint_predictors.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/roi_heads/keypoint_head/keypoint_head.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from .roi_keypoint_feature_extractors import make_roi_keypoint_feature_extractor 4 | from .roi_keypoint_predictors import make_roi_keypoint_predictor 5 | from .inference import make_roi_keypoint_post_processor 6 | from .loss import make_roi_keypoint_loss_evaluator 7 | 8 | 9 | class ROIKeypointHead(torch.nn.Module): 10 | def __init__(self, cfg): 11 | super(ROIKeypointHead, self).__init__() 12 | self.cfg = cfg.clone() 13 | self.feature_extractor = make_roi_keypoint_feature_extractor(cfg) 14 | self.predictor = make_roi_keypoint_predictor(cfg) 15 | self.post_processor = make_roi_keypoint_post_processor(cfg) 16 | self.loss_evaluator = make_roi_keypoint_loss_evaluator(cfg) 17 | 18 | def forward(self, features, proposals, targets=None): 19 | """ 20 | Arguments: 21 | features (list[Tensor]): feature-maps from possibly several levels 22 | proposals (list[BoxList]): proposal boxes 23 | targets (list[BoxList], optional): the ground-truth targets. 24 | 25 | Returns: 26 | x (Tensor): the result of the feature extractor 27 | proposals (list[BoxList]): during training, the original proposals 28 | are returned. During testing, the predicted boxlists are returned 29 | with the `mask` field set 30 | losses (dict[Tensor]): During training, returns the losses for the 31 | head. During testing, returns an empty dict. 32 | """ 33 | if self.training: 34 | with torch.no_grad(): 35 | proposals = self.loss_evaluator.subsample(proposals, targets) 36 | 37 | x = self.feature_extractor(features, proposals) 38 | kp_logits = self.predictor(x) 39 | 40 | if not self.training: 41 | result = self.post_processor(kp_logits, proposals) 42 | return x, result, {} 43 | 44 | loss_kp = self.loss_evaluator(proposals, kp_logits) 45 | 46 | return x, proposals, dict(loss_kp=loss_kp) 47 | 48 | 49 | def build_roi_keypoint_head(cfg): 50 | return ROIKeypointHead(cfg) -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/roi_heads/keypoint_head/roi_keypoint_feature_extractors.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | from torch.nn import functional as F 3 | 4 | from maskrcnn_benchmark.modeling.poolers import Pooler 5 | 6 | from maskrcnn_benchmark.layers import Conv2d 7 | from maskrcnn_benchmark.layers import ConvTranspose2d 8 | 9 | 10 | class KeypointRCNNFeatureExtractor(nn.Module): 11 | def __init__(self, cfg): 12 | super(KeypointRCNNFeatureExtractor, self).__init__() 13 | 14 | resolution = cfg.MODEL.ROI_KEYPOINT_HEAD.POOLER_RESOLUTION 15 | scales = cfg.MODEL.ROI_KEYPOINT_HEAD.POOLER_SCALES 16 | sampling_ratio = cfg.MODEL.ROI_KEYPOINT_HEAD.POOLER_SAMPLING_RATIO 17 | pooler = Pooler( 18 | output_size=(resolution, resolution), 19 | scales=scales, 20 | sampling_ratio=sampling_ratio, 21 | ) 22 | self.pooler = pooler 23 | 24 | input_features = cfg.MODEL.BACKBONE.OUT_CHANNELS 25 | layers = cfg.MODEL.ROI_KEYPOINT_HEAD.CONV_LAYERS 26 | next_feature = input_features 27 | self.blocks = [] 28 | for layer_idx, layer_features in enumerate(layers, 1): 29 | layer_name = "conv_fcn{}".format(layer_idx) 30 | module = Conv2d(next_feature, layer_features, 3, stride=1, padding=1) 31 | nn.init.kaiming_normal_(module.weight, mode="fan_out", nonlinearity="relu") 32 | nn.init.constant_(module.bias, 0) 33 | self.add_module(layer_name, module) 34 | next_feature = layer_features 35 | self.blocks.append(layer_name) 36 | 37 | def forward(self, x, proposals): 38 | x = self.pooler(x, proposals) 39 | for layer_name in self.blocks: 40 | x = F.relu(getattr(self, layer_name)(x)) 41 | return x 42 | 43 | class KeypointRCNNFeature2XZoomExtractor(nn.Module): 44 | def __init__(self, cfg): 45 | super(KeypointRCNNFeature2XZoomExtractor, self).__init__() 46 | 47 | resolution = cfg.MODEL.ROI_KEYPOINT_HEAD.POOLER_RESOLUTION 48 | scales = cfg.MODEL.ROI_KEYPOINT_HEAD.POOLER_SCALES 49 | sampling_ratio = cfg.MODEL.ROI_KEYPOINT_HEAD.POOLER_SAMPLING_RATIO 50 | pooler = Pooler( 51 | output_size=(resolution, resolution), 52 | scales=scales, 53 | sampling_ratio=sampling_ratio, 54 | ) 55 | self.pooler = pooler 56 | 57 | input_features = cfg.MODEL.BACKBONE.OUT_CHANNELS 58 | layers = cfg.MODEL.ROI_KEYPOINT_HEAD.CONV_LAYERS 59 | next_feature = input_features 60 | self.blocks = [] 61 | for layer_idx, layer_features in enumerate(layers, 1): 62 | layer_name = "conv_fcn{}".format(layer_idx) 63 | module = Conv2d(next_feature, layer_features, 3, stride=1, padding=1) 64 | nn.init.kaiming_normal_(module.weight, mode="fan_out", nonlinearity="relu") 65 | nn.init.constant_(module.bias, 0) 66 | self.add_module(layer_name, module) 67 | if layer_idx==len(layers)//2: 68 | deconv_kernel = 4 69 | kps_upsacle = ConvTranspose2d(layer_features, layer_features, deconv_kernel, 70 | stride=2, padding=deconv_kernel//2-1) 71 | nn.init.kaiming_normal_(kps_upsacle.weight, mode="fan_out", nonlinearity="relu") 72 | nn.init.constant_(kps_upsacle.bias, 0) 73 | self.add_module("conv_fcn_upscale", kps_upsacle) 74 | self.blocks.append("conv_fcn_upscale") 75 | 76 | next_feature = layer_features 77 | self.blocks.append(layer_name) 78 | 79 | def forward(self, x, proposals): 80 | x = self.pooler(x, proposals) 81 | for layer_name in self.blocks: 82 | x = F.relu(getattr(self, layer_name)(x)) 83 | return x 84 | 85 | 86 | _ROI_KEYPOINT_FEATURE_EXTRACTORS = { 87 | "KeypointRCNNFeatureExtractor": KeypointRCNNFeatureExtractor, 88 | "KeypointRCNNFeature2XZoomExtractor": KeypointRCNNFeature2XZoomExtractor 89 | } 90 | 91 | 92 | def make_roi_keypoint_feature_extractor(cfg): 93 | func = _ROI_KEYPOINT_FEATURE_EXTRACTORS[ 94 | cfg.MODEL.ROI_KEYPOINT_HEAD.FEATURE_EXTRACTOR 95 | ] 96 | return func(cfg) -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/roi_heads/keypoint_head/roi_keypoint_predictors.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | from torch.nn import functional as F 3 | 4 | from maskrcnn_benchmark import layers 5 | 6 | 7 | class KeypointRCNNPredictor(nn.Module): 8 | def __init__(self, cfg): 9 | super(KeypointRCNNPredictor, self).__init__() 10 | input_features = cfg.MODEL.ROI_KEYPOINT_HEAD.CONV_LAYERS[-1] 11 | num_keypoints = cfg.MODEL.ROI_KEYPOINT_HEAD.NUM_CLASSES 12 | deconv_kernel = 4 13 | self.kps_score_lowres = layers.ConvTranspose2d( 14 | input_features, 15 | num_keypoints, 16 | deconv_kernel, 17 | stride=2, 18 | padding=deconv_kernel // 2 - 1, 19 | ) 20 | nn.init.kaiming_normal_( 21 | self.kps_score_lowres.weight, mode="fan_out", nonlinearity="relu" 22 | ) 23 | nn.init.constant_(self.kps_score_lowres.bias, 0) 24 | self.up_scale = 2 25 | 26 | def forward(self, x): 27 | x = self.kps_score_lowres(x) 28 | x = layers.interpolate( 29 | x, scale_factor=self.up_scale, mode="bilinear", align_corners=False 30 | ) 31 | return x 32 | 33 | 34 | _ROI_KEYPOINT_PREDICTOR = {"KeypointRCNNPredictor": KeypointRCNNPredictor} 35 | 36 | 37 | def make_roi_keypoint_predictor(cfg): 38 | func = _ROI_KEYPOINT_PREDICTOR[cfg.MODEL.ROI_KEYPOINT_HEAD.PREDICTOR] 39 | return func(cfg) -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/roi_heads/mask_head/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/modeling/roi_heads/mask_head/__init__.py -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/roi_heads/mask_head/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/modeling/roi_heads/mask_head/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/roi_heads/mask_head/__pycache__/hourglass.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/modeling/roi_heads/mask_head/__pycache__/hourglass.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/roi_heads/mask_head/__pycache__/inference.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/modeling/roi_heads/mask_head/__pycache__/inference.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/roi_heads/mask_head/__pycache__/loss.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/modeling/roi_heads/mask_head/__pycache__/loss.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/roi_heads/mask_head/__pycache__/mask_head.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/modeling/roi_heads/mask_head/__pycache__/mask_head.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/roi_heads/mask_head/__pycache__/roi_mask_feature_extractors.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/modeling/roi_heads/mask_head/__pycache__/roi_mask_feature_extractors.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/roi_heads/mask_head/__pycache__/roi_mask_predictors.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/modeling/roi_heads/mask_head/__pycache__/roi_mask_predictors.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/roi_heads/mask_head/hourglass.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | 3 | from maskrcnn_benchmark.modeling.make_layers import make_conv3x3 4 | 5 | 6 | class Residual(nn.Module): 7 | def __init__(self, inp_dim, out_dim, use_gn=False): 8 | super(Residual, self).__init__() 9 | self.relu = nn.ReLU() 10 | # self.bn1 = nn.BatchNorm2d(inp_dim) 11 | self.conv1 = make_conv3x3(inp_dim, int(out_dim / 2), 1, use_relu=False, use_gn=use_gn) 12 | # self.bn2 = nn.BatchNorm2d(int(out_dim / 2)) 13 | self.conv2 = make_conv3x3(int(out_dim / 2), int(out_dim / 2), 3, use_relu=False, use_gn=use_gn) 14 | # self.bn3 = nn.BatchNorm2d(int(out_dim / 2)) 15 | self.conv3 = make_conv3x3(int(out_dim / 2), out_dim, 1, use_relu=False, use_gn=use_gn) 16 | if inp_dim == out_dim: 17 | self.need_skip = False 18 | else: 19 | self.need_skip = True 20 | self.skip_layer = make_conv3x3(inp_dim, out_dim, 1, use_relu=False, use_gn=False) 21 | 22 | def forward(self, x): 23 | if self.need_skip: 24 | residual = self.skip_layer(x) 25 | else: 26 | residual = x 27 | out = x 28 | # out = self.bn1(out) 29 | out = self.relu(out) 30 | out = self.conv1(out) 31 | # out = self.bn2(out) 32 | out = self.relu(out) 33 | out = self.conv2(out) 34 | # out = self.bn3(out) 35 | out = self.relu(out) 36 | out = self.conv3(out) 37 | out += residual 38 | return out 39 | 40 | 41 | class Hourglass(nn.Module): 42 | def __init__(self, n, f, gn=False, increase=0): 43 | super(Hourglass, self).__init__() 44 | nf = f + increase 45 | self.up1 = Residual(f, f) 46 | # Lower branch 47 | self.pool1 = nn.MaxPool2d(2, 2) 48 | self.low1 = Residual(f, nf) 49 | self.n = n 50 | # Recursive hourglass 51 | if self.n > 1: 52 | self.low2 = Hourglass(n-1, nf, gn=gn) 53 | else: 54 | self.low2 = Residual(nf, nf, gn) 55 | self.low3 = Residual(nf, f, gn) 56 | self.up2 = nn.Upsample(scale_factor=2, mode='nearest') 57 | 58 | def forward(self, x): 59 | up1 = self.up1(x) 60 | pool1 = self.pool1(x) 61 | low1 = self.low1(pool1) 62 | low2 = self.low2(low1) 63 | low3 = self.low3(low2) 64 | up2 = self.up2(low3) 65 | return up1 + up2 -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/roi_heads/mask_head/mask_head.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | from torch import nn 4 | 5 | from maskrcnn_benchmark.structures.bounding_box import BoxList 6 | 7 | from .roi_mask_feature_extractors import make_roi_mask_feature_extractor 8 | from .roi_mask_predictors import make_roi_mask_predictor 9 | from .inference import make_roi_mask_post_processor 10 | from .loss import make_roi_mask_loss_evaluator 11 | 12 | 13 | def keep_only_positive_boxes(boxes): 14 | """ 15 | Given a set of BoxList containing the `labels` field, 16 | return a set of BoxList for which `labels > 0`. 17 | 18 | Arguments: 19 | boxes (list of BoxList) 20 | """ 21 | assert isinstance(boxes, (list, tuple)) 22 | assert isinstance(boxes[0], BoxList) 23 | assert boxes[0].has_field("labels") 24 | positive_boxes = [] 25 | positive_inds = [] 26 | num_boxes = 0 27 | for boxes_per_image in boxes: 28 | labels = boxes_per_image.get_field("labels") 29 | inds_mask = labels > 0 30 | inds = inds_mask.nonzero().squeeze(1) 31 | positive_boxes.append(boxes_per_image[inds]) 32 | positive_inds.append(inds_mask) 33 | return positive_boxes, positive_inds 34 | 35 | 36 | class ROIMaskHead(torch.nn.Module): 37 | def __init__(self, cfg): 38 | super(ROIMaskHead, self).__init__() 39 | self.cfg = cfg.clone() 40 | self.feature_extractor = make_roi_mask_feature_extractor(cfg) 41 | self.predictor = make_roi_mask_predictor(cfg) 42 | self.post_processor = make_roi_mask_post_processor(cfg) 43 | self.loss_evaluator = make_roi_mask_loss_evaluator(cfg) 44 | 45 | def forward(self, features, proposals, targets=None, 46 | language_dict_features=None, 47 | positive_map_label_to_token=None 48 | ): 49 | """ 50 | Arguments: 51 | features (list[Tensor]): feature-maps from possibly several levels 52 | proposals (list[BoxList]): proposal boxes 53 | targets (list[BoxList], optional): the ground-truth targets. 54 | language_dict_features: language features: hidden, embedding, mask, ... 55 | 56 | Returns: 57 | x (Tensor): the result of the feature extractor 58 | proposals (list[BoxList]): during training, the original proposals 59 | are returned. During testing, the predicted boxlists are returned 60 | with the `mask` field set 61 | losses (dict[Tensor]): During training, returns the losses for the 62 | head. During testing, returns an empty dict. 63 | """ 64 | if self.training: 65 | # during training, only focus on positive boxes 66 | all_proposals = proposals 67 | proposals, positive_inds = keep_only_positive_boxes(proposals) 68 | if self.training and self.cfg.MODEL.ROI_MASK_HEAD.SHARE_BOX_FEATURE_EXTRACTOR: 69 | x = features 70 | x = x[torch.cat(positive_inds, dim=0)] 71 | else: 72 | x = self.feature_extractor(features, proposals) 73 | if self.cfg.MODEL.ROI_MASK_HEAD.PREDICTOR.startswith("VL"): 74 | mask_logits = self.predictor(x, language_dict_features) 75 | else: 76 | mask_logits = self.predictor(x) 77 | 78 | if not self.training: 79 | result = self.post_processor(mask_logits, proposals, positive_map_label_to_token) 80 | return x, result, {} 81 | 82 | loss_mask = self.loss_evaluator(proposals, mask_logits, targets) 83 | 84 | return x, all_proposals, dict(loss_mask=loss_mask) 85 | 86 | 87 | def build_roi_mask_head(cfg): 88 | return ROIMaskHead(cfg) 89 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/rpn/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | # from .rpn import build_rpn 3 | from .rpn import RPNModule 4 | from .retina import RetinaNetModule 5 | from .fcos import FCOSModule 6 | from .atss import ATSSModule 7 | from .dyhead import DyHeadModule 8 | from .vldyhead import VLDyHeadModule 9 | 10 | _RPN_META_ARCHITECTURES = {"RPN": RPNModule, 11 | "RETINA": RetinaNetModule, 12 | "FCOS": FCOSModule, 13 | "ATSS": ATSSModule, 14 | "DYHEAD": DyHeadModule, 15 | "VLDYHEAD": VLDyHeadModule 16 | } 17 | 18 | 19 | def build_rpn(cfg): 20 | """ 21 | This gives the gist of it. Not super important because it doesn't change as much 22 | """ 23 | rpn_arch = _RPN_META_ARCHITECTURES[cfg.MODEL.RPN_ARCHITECTURE] 24 | return rpn_arch(cfg) 25 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/rpn/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/modeling/rpn/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/rpn/__pycache__/anchor_generator.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/modeling/rpn/__pycache__/anchor_generator.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/rpn/__pycache__/atss.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/modeling/rpn/__pycache__/atss.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/rpn/__pycache__/dyhead.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/modeling/rpn/__pycache__/dyhead.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/rpn/__pycache__/fcos.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/modeling/rpn/__pycache__/fcos.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/rpn/__pycache__/inference.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/modeling/rpn/__pycache__/inference.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/rpn/__pycache__/loss.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/modeling/rpn/__pycache__/loss.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/rpn/__pycache__/modeling_bert.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/modeling/rpn/__pycache__/modeling_bert.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/rpn/__pycache__/retina.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/modeling/rpn/__pycache__/retina.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/rpn/__pycache__/rpn.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/modeling/rpn/__pycache__/rpn.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/rpn/__pycache__/vldyhead.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/modeling/rpn/__pycache__/vldyhead.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/rpn/transformer.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | from torch import nn, Tensor 4 | 5 | import copy 6 | from typing import Optional, List 7 | 8 | 9 | def _get_clones(module, N): 10 | return nn.ModuleList([copy.deepcopy(module) for i in range(N)]) 11 | 12 | 13 | def _get_activation_fn(activation): 14 | """Return an activation function given a string""" 15 | if activation == "relu": 16 | return F.relu 17 | if activation == "gelu": 18 | return F.gelu 19 | if activation == "glu": 20 | return F.glu 21 | raise RuntimeError(F"activation should be relu/gelu, not {activation}.") 22 | 23 | 24 | class TransformerEncoderLayer(nn.Module): 25 | def __init__(self, d_model, nhead, dim_feedforward=2048, dropout=0.1, 26 | activation="relu", normalize_before=False): 27 | super(TransformerEncoderLayer, self).__init__() 28 | self.self_attn = nn.MultiheadAttention(d_model, nhead, dropout=dropout) 29 | # Implementation of Feedforward model 30 | self.linear1 = nn.Linear(d_model, dim_feedforward) 31 | self.dropout = nn.Dropout(dropout) 32 | self.linear2 = nn.Linear(dim_feedforward, d_model) 33 | 34 | self.norm1 = nn.LayerNorm(d_model) 35 | self.norm2 = nn.LayerNorm(d_model) 36 | self.dropout1 = nn.Dropout(dropout) 37 | self.dropout2 = nn.Dropout(dropout) 38 | 39 | self.activation = _get_activation_fn(activation) 40 | self.normalize_before = normalize_before 41 | 42 | def forward(self, src, 43 | src_mask: Optional[Tensor] = None, 44 | src_key_padding_mask: Optional[Tensor] = None): 45 | src2 = self.self_attn(src, src, src, attn_mask=src_mask, 46 | key_padding_mask=src_key_padding_mask)[0] 47 | src = src + self.dropout1(src2) 48 | src = self.norm1(src) 49 | src2 = self.linear2(self.dropout(self.activation(self.linear1(src)))) 50 | src = src + self.dropout2(src2) 51 | src = self.norm2(src) 52 | return src 53 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/modeling/utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | """ 3 | Miscellaneous utility functions 4 | """ 5 | 6 | import torch 7 | 8 | 9 | def cat(tensors, dim=0): 10 | """ 11 | Efficient version of torch.cat that avoids a copy if there is only a single element in a list 12 | """ 13 | assert isinstance(tensors, (list, tuple)) 14 | if len(tensors) == 1: 15 | return tensors[0] 16 | return torch.cat(tensors, dim) 17 | 18 | 19 | def permute_and_flatten(layer, N, A, C, H, W): 20 | layer = layer.view(N, -1, C, H, W) 21 | layer = layer.permute(0, 3, 4, 1, 2) 22 | layer = layer.reshape(N, -1, C) 23 | return layer 24 | 25 | 26 | def concat_box_prediction_layers(box_regression, box_cls=None, token_logits=None): 27 | box_regression_flattened = [] 28 | box_cls_flattened = [] 29 | token_logit_flattened = [] 30 | 31 | # for each feature level, permute the outputs to make them be in the 32 | # same format as the labels. Note that the labels are computed for 33 | # all feature levels concatenated, so we keep the same representation 34 | # for the objectness and the box_regression 35 | for box_cls_per_level, box_regression_per_level in zip( 36 | box_cls, box_regression 37 | ): 38 | N, AxC, H, W = box_cls_per_level.shape 39 | Ax4 = box_regression_per_level.shape[1] 40 | A = Ax4 // 4 41 | C = AxC // A 42 | box_cls_per_level = permute_and_flatten( 43 | box_cls_per_level, N, A, C, H, W 44 | ) 45 | box_cls_flattened.append(box_cls_per_level) 46 | 47 | box_regression_per_level = permute_and_flatten( 48 | box_regression_per_level, N, A, 4, H, W 49 | ) 50 | box_regression_flattened.append(box_regression_per_level) 51 | 52 | if token_logits is not None: 53 | for token_logit_per_level in token_logits: 54 | N, AXT, H, W = token_logit_per_level.shape 55 | T = AXT // A 56 | token_logit_per_level = permute_and_flatten( 57 | token_logit_per_level, N, A, T, H, W 58 | ) 59 | token_logit_flattened.append(token_logit_per_level) 60 | 61 | # concatenate on the first dimension (representing the feature levels), to 62 | # take into account the way the labels were generated (with all feature maps 63 | # being concatenated as well) 64 | box_cls = cat(box_cls_flattened, dim=1).reshape(-1, C) 65 | box_regression = cat(box_regression_flattened, dim=1).reshape(-1, 4) 66 | 67 | token_logits_stacked = None 68 | if token_logits is not None: 69 | # stacked 70 | token_logits_stacked = cat(token_logit_flattened, dim=1) 71 | 72 | return box_regression, box_cls, token_logits_stacked 73 | 74 | 75 | def round_channels(channels, divisor=8): 76 | rounded_channels = max(int(channels + divisor / 2.0) // divisor * divisor, divisor) 77 | if float(rounded_channels) < 0.9 * channels: 78 | rounded_channels += divisor 79 | return rounded_channels 80 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/solver/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from .build import make_optimizer 3 | from .build import make_lr_scheduler 4 | from .lr_scheduler import WarmupMultiStepLR 5 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/structures/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/structures/__init__.py -------------------------------------------------------------------------------- /maskrcnn_benchmark/structures/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/structures/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/structures/__pycache__/bounding_box.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/structures/__pycache__/bounding_box.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/structures/__pycache__/boxlist_ops.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/structures/__pycache__/boxlist_ops.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/structures/__pycache__/image_list.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/structures/__pycache__/image_list.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/structures/__pycache__/keypoint.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/structures/__pycache__/keypoint.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/structures/__pycache__/segmentation_mask.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/structures/__pycache__/segmentation_mask.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/structures/image_list.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from __future__ import division 3 | 4 | import torch 5 | 6 | 7 | class ImageList(object): 8 | """ 9 | Structure that holds a list of images (of possibly 10 | varying sizes) as a single tensor. 11 | This works by padding the images to the same size, 12 | and storing in a field the original sizes of each image 13 | """ 14 | 15 | def __init__(self, tensors, image_sizes): 16 | """ 17 | Arguments: 18 | tensors (tensor) 19 | image_sizes (list[tuple[int, int]]) 20 | """ 21 | self.tensors = tensors 22 | self.image_sizes = image_sizes 23 | 24 | def to(self, *args, **kwargs): 25 | cast_tensor = self.tensors.to(*args, **kwargs) 26 | return ImageList(cast_tensor, self.image_sizes) 27 | 28 | 29 | def to_image_list(tensors, size_divisible=0): 30 | """ 31 | tensors can be an ImageList, a torch.Tensor or 32 | an iterable of Tensors. It can't be a numpy array. 33 | When tensors is an iterable of Tensors, it pads 34 | the Tensors with zeros so that they have the same 35 | shape 36 | """ 37 | if isinstance(tensors, torch.Tensor) and size_divisible > 0: 38 | tensors = [tensors] 39 | 40 | if isinstance(tensors, ImageList): 41 | return tensors 42 | elif isinstance(tensors, torch.Tensor): 43 | # single tensor shape can be inferred 44 | assert tensors.dim() == 4 45 | image_sizes = [tensor.shape[-2:] for tensor in tensors] 46 | return ImageList(tensors, image_sizes) 47 | elif isinstance(tensors, (tuple, list)): 48 | max_size = tuple(max(s) for s in zip(*[img.shape for img in tensors])) 49 | 50 | # TODO Ideally, just remove this and let me model handle arbitrary 51 | # input sizs 52 | if size_divisible > 0: 53 | import math 54 | 55 | stride = size_divisible 56 | max_size = list(max_size) 57 | max_size[1] = int(math.ceil(max_size[1] / stride) * stride) 58 | max_size[2] = int(math.ceil(max_size[2] / stride) * stride) 59 | max_size = tuple(max_size) 60 | 61 | batch_shape = (len(tensors),) + max_size 62 | batched_imgs = tensors[0].new(*batch_shape).zero_() 63 | for img, pad_img in zip(tensors, batched_imgs): 64 | pad_img[: img.shape[0], : img.shape[1], : img.shape[2]].copy_(img) 65 | 66 | image_sizes = [im.shape[-2:] for im in tensors] 67 | 68 | return ImageList(batched_imgs, image_sizes) 69 | else: 70 | raise TypeError("Unsupported type for to_image_list: {}".format(type(tensors))) 71 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/utils/README.md: -------------------------------------------------------------------------------- 1 | # Utility functions 2 | 3 | This folder contain utility functions that are not used in the 4 | core library, but are useful for building models or training 5 | code using the config system. 6 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/utils/__init__.py -------------------------------------------------------------------------------- /maskrcnn_benchmark/utils/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/utils/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/utils/__pycache__/amp.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/utils/__pycache__/amp.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/utils/__pycache__/big_model_loading.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/utils/__pycache__/big_model_loading.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/utils/__pycache__/c2_model_loading.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/utils/__pycache__/c2_model_loading.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/utils/__pycache__/checkpoint.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/utils/__pycache__/checkpoint.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/utils/__pycache__/collect_env.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/utils/__pycache__/collect_env.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/utils/__pycache__/comm.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/utils/__pycache__/comm.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/utils/__pycache__/dist.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/utils/__pycache__/dist.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/utils/__pycache__/env.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/utils/__pycache__/env.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/utils/__pycache__/fuse_helper.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/utils/__pycache__/fuse_helper.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/utils/__pycache__/imports.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/utils/__pycache__/imports.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/utils/__pycache__/logger.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/utils/__pycache__/logger.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/utils/__pycache__/mdetr_dist.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/utils/__pycache__/mdetr_dist.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/utils/__pycache__/miscellaneous.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/utils/__pycache__/miscellaneous.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/utils/__pycache__/model_serialization.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/utils/__pycache__/model_serialization.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/utils/__pycache__/model_zoo.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/utils/__pycache__/model_zoo.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/utils/__pycache__/pretrain_model_loading.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/utils/__pycache__/pretrain_model_loading.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/utils/__pycache__/registry.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/utils/__pycache__/registry.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/utils/__pycache__/shallow_contrastive_loss_helper.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/utils/__pycache__/shallow_contrastive_loss_helper.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/utils/__pycache__/stats.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MembrAI/MIU-VL/80adbdb745dadabb0ca23785d567c10393b01388/maskrcnn_benchmark/utils/__pycache__/stats.cpython-38.pyc -------------------------------------------------------------------------------- /maskrcnn_benchmark/utils/amp.py: -------------------------------------------------------------------------------- 1 | from contextlib import contextmanager 2 | 3 | @contextmanager 4 | def nullcontext(enter_result=None, **kwargs): 5 | yield enter_result 6 | 7 | try: 8 | from torch.cuda.amp import autocast, GradScaler, custom_fwd, custom_bwd 9 | except: 10 | print('[Warning] Library for automatic mixed precision is not found, AMP is disabled!!') 11 | GradScaler = nullcontext 12 | autocast = nullcontext 13 | custom_fwd = nullcontext 14 | custom_bwd = nullcontext -------------------------------------------------------------------------------- /maskrcnn_benchmark/utils/big_model_loading.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn as nn 4 | 5 | from collections import OrderedDict 6 | 7 | 8 | def tf2th(conv_weights): 9 | """Possibly convert HWIO to OIHW.""" 10 | if conv_weights.ndim == 4: 11 | conv_weights = conv_weights.transpose([3, 2, 0, 1]) 12 | return torch.from_numpy(conv_weights) 13 | 14 | 15 | def _rename_conv_weights_for_deformable_conv_layers(state_dict, cfg): 16 | import re 17 | layer_keys = sorted(state_dict.keys()) 18 | for ix, stage_with_dcn in enumerate(cfg.MODEL.RESNETS.STAGE_WITH_DCN, 1): 19 | if not stage_with_dcn: 20 | continue 21 | for old_key in layer_keys: 22 | pattern = ".*block{}.*conv2.*".format(ix) 23 | r = re.match(pattern, old_key) 24 | if r is None: 25 | continue 26 | for param in ["weight", "bias"]: 27 | if old_key.find(param) is -1: 28 | continue 29 | if 'unit01' in old_key: 30 | continue 31 | new_key = old_key.replace( 32 | "conv2.{}".format(param), "conv2.conv.{}".format(param) 33 | ) 34 | print("pattern: {}, old_key: {}, new_key: {}".format( 35 | pattern, old_key, new_key 36 | )) 37 | # Calculate SD conv weight 38 | w = state_dict[old_key] 39 | v, m = torch.var_mean(w, dim=[1, 2, 3], keepdim=True, unbiased=False) 40 | w = (w - m) / torch.sqrt(v + 1e-10) 41 | 42 | state_dict[new_key] = w 43 | del state_dict[old_key] 44 | return state_dict 45 | 46 | 47 | def load_big_format(cfg, f): 48 | model = OrderedDict() 49 | weights = np.load(f) 50 | 51 | cmap = {'a':1, 'b':2, 'c':3} 52 | for key, val in weights.items(): 53 | old_key = key.replace('resnet/', '') 54 | if 'root_block' in old_key: 55 | new_key = 'root.conv.weight' 56 | elif '/proj/standardized_conv2d/kernel' in old_key: 57 | key_pattern = old_key.replace('/proj/standardized_conv2d/kernel', '').replace('resnet/', '') 58 | bname, uname, cidx = key_pattern.split('/') 59 | new_key = '{}.downsample.{}.conv{}.weight'.format(bname,uname,cmap[cidx]) 60 | elif '/standardized_conv2d/kernel' in old_key: 61 | key_pattern = old_key.replace('/standardized_conv2d/kernel', '').replace('resnet/', '') 62 | bname, uname, cidx = key_pattern.split('/') 63 | new_key = '{}.{}.conv{}.weight'.format(bname,uname,cmap[cidx]) 64 | elif '/group_norm/gamma' in old_key: 65 | key_pattern = old_key.replace('/group_norm/gamma', '').replace('resnet/', '') 66 | bname, uname, cidx = key_pattern.split('/') 67 | new_key = '{}.{}.gn{}.weight'.format(bname,uname,cmap[cidx]) 68 | elif '/group_norm/beta' in old_key: 69 | key_pattern = old_key.replace('/group_norm/beta', '').replace('resnet/', '') 70 | bname, uname, cidx = key_pattern.split('/') 71 | new_key = '{}.{}.gn{}.bias'.format(bname,uname,cmap[cidx]) 72 | else: 73 | print('Unknown key {}'.format(old_key)) 74 | continue 75 | print('Map {} -> {}'.format(key, new_key)) 76 | model[new_key] = tf2th(val) 77 | 78 | model = _rename_conv_weights_for_deformable_conv_layers(model, cfg) 79 | 80 | return dict(model=model) 81 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/utils/collect_env.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import PIL 3 | 4 | from torch.utils.collect_env import get_pretty_env_info 5 | 6 | 7 | def get_pil_version(): 8 | return "\n Pillow ({})".format(PIL.__version__) 9 | 10 | 11 | def collect_env_info(): 12 | env_str = get_pretty_env_info() 13 | env_str += get_pil_version() 14 | return env_str 15 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/utils/cv2_util.py: -------------------------------------------------------------------------------- 1 | """ 2 | Module for cv2 utility functions and maintaining version compatibility 3 | between 3.x and 4.x 4 | """ 5 | import cv2 6 | 7 | 8 | def findContours(*args, **kwargs): 9 | """ 10 | Wraps cv2.findContours to maintain compatiblity between versions 11 | 3 and 4 12 | 13 | Returns: 14 | contours, hierarchy 15 | """ 16 | if cv2.__version__.startswith('4'): 17 | contours, hierarchy = cv2.findContours(*args, **kwargs) 18 | elif cv2.__version__.startswith('3'): 19 | _, contours, hierarchy = cv2.findContours(*args, **kwargs) 20 | else: 21 | raise AssertionError( 22 | 'cv2 must be either version 3 or 4 to call this method') 23 | 24 | return contours, hierarchy 25 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/utils/ema.py: -------------------------------------------------------------------------------- 1 | from copy import deepcopy 2 | from collections import OrderedDict 3 | import torch 4 | 5 | 6 | class ModelEma: 7 | def __init__(self, model, decay=0.9999, device=''): 8 | self.ema = deepcopy(model) 9 | self.ema.eval() 10 | self.decay = decay 11 | self.device = device 12 | if device: 13 | self.ema.to(device=device) 14 | self.ema_is_dp = hasattr(self.ema, 'module') 15 | for p in self.ema.parameters(): 16 | p.requires_grad_(False) 17 | 18 | def load_checkpoint(self, checkpoint): 19 | if isinstance(checkpoint, str): 20 | checkpoint = torch.load(checkpoint) 21 | 22 | assert isinstance(checkpoint, dict) 23 | if 'model_ema' in checkpoint: 24 | new_state_dict = OrderedDict() 25 | for k, v in checkpoint['model_ema'].items(): 26 | if self.ema_is_dp: 27 | name = k if k.startswith('module') else 'module.' + k 28 | else: 29 | name = k.replace('module.', '') if k.startswith('module') else k 30 | new_state_dict[name] = v 31 | self.ema.load_state_dict(new_state_dict) 32 | 33 | def state_dict(self): 34 | return self.ema.state_dict() 35 | 36 | def update(self, model): 37 | pre_module = hasattr(model, 'module') and not self.ema_is_dp 38 | with torch.no_grad(): 39 | curr_msd = model.state_dict() 40 | for k, ema_v in self.ema.state_dict().items(): 41 | k = 'module.' + k if pre_module else k 42 | model_v = curr_msd[k].detach() 43 | if self.device: 44 | model_v = model_v.to(device=self.device) 45 | ema_v.copy_(ema_v * self.decay + (1. - self.decay) * model_v) 46 | 47 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/utils/env.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import os 3 | 4 | from maskrcnn_benchmark.utils.imports import import_file 5 | 6 | 7 | def setup_environment(): 8 | """Perform environment setup work. The default setup is a no-op, but this 9 | function allows the user to specify a Python source file that performs 10 | custom setup work that may be necessary to their computing environment. 11 | """ 12 | custom_module_path = os.environ.get("TORCH_DETECTRON_ENV_MODULE") 13 | if custom_module_path: 14 | setup_custom_environment(custom_module_path) 15 | else: 16 | # The default setup is a no-op 17 | pass 18 | 19 | 20 | def setup_custom_environment(custom_module_path): 21 | """Load custom environment setup from a Python source file and run the setup 22 | function. 23 | """ 24 | module = import_file("maskrcnn_benchmark.utils.env.custom_module", custom_module_path) 25 | assert hasattr(module, "setup_environment") and callable( 26 | module.setup_environment 27 | ), ( 28 | "Custom environment module defined in {} does not have the " 29 | "required callable attribute 'setup_environment'." 30 | ).format( 31 | custom_module_path 32 | ) 33 | module.setup_environment() 34 | 35 | 36 | # Force environment setup when this module is imported 37 | setup_environment() 38 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/utils/imports.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | 4 | if torch._six.PY37: 5 | import importlib 6 | import importlib.util 7 | import sys 8 | 9 | 10 | # from https://stackoverflow.com/questions/67631/how-to-import-a-module-given-the-full-path?utm_medium=organic&utm_source=google_rich_qa&utm_campaign=google_rich_qa 11 | def import_file(module_name, file_path, make_importable=False): 12 | spec = importlib.util.spec_from_file_location(module_name, file_path) 13 | module = importlib.util.module_from_spec(spec) 14 | spec.loader.exec_module(module) 15 | if make_importable: 16 | sys.modules[module_name] = module 17 | return module 18 | else: 19 | import imp 20 | 21 | def import_file(module_name, file_path, make_importable=None): 22 | module = imp.load_source(module_name, file_path) 23 | return module 24 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/utils/logger.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import logging 3 | import os 4 | import sys 5 | 6 | 7 | def setup_logger(name, save_dir, distributed_rank): 8 | logger = logging.getLogger(name) 9 | logger.setLevel(logging.DEBUG) 10 | # don't log results for the non-master process 11 | if distributed_rank > 0: 12 | return logger 13 | ch = logging.StreamHandler(stream=sys.stdout) 14 | ch.setLevel(logging.ERROR) 15 | formatter = logging.Formatter("%(asctime)s %(name)s %(levelname)s: %(message)s") 16 | ch.setFormatter(formatter) 17 | logger.addHandler(ch) 18 | 19 | if save_dir: 20 | fh = logging.FileHandler(os.path.join(save_dir, "log.txt")) 21 | fh.setLevel(logging.DEBUG) 22 | fh.setFormatter(formatter) 23 | logger.addHandler(fh) 24 | 25 | return logger 26 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/utils/metric_logger.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from collections import defaultdict 3 | from collections import deque 4 | 5 | import torch 6 | import time 7 | from datetime import datetime 8 | from .comm import is_main_process 9 | 10 | 11 | class SmoothedValue(object): 12 | """Track a series of values and provide access to smoothed values over a 13 | window or the global series average. 14 | """ 15 | 16 | def __init__(self, window_size=20): 17 | self.deque = deque(maxlen=window_size) 18 | # self.series = [] 19 | self.total = 0.0 20 | self.count = 0 21 | 22 | def update(self, value): 23 | self.deque.append(value) 24 | # self.series.append(value) 25 | self.count += 1 26 | if value != value: 27 | value = 0 28 | self.total += value 29 | 30 | @property 31 | def median(self): 32 | d = torch.tensor(list(self.deque)) 33 | return d.median().item() 34 | 35 | @property 36 | def avg(self): 37 | d = torch.tensor(list(self.deque)) 38 | return d.mean().item() 39 | 40 | @property 41 | def global_avg(self): 42 | return self.total / self.count 43 | 44 | 45 | class AverageMeter(object): 46 | """Computes and stores the average and current value""" 47 | 48 | def __init__(self): 49 | self.reset() 50 | 51 | def reset(self): 52 | self.val = 0 53 | self.avg = 0 54 | self.sum = 0 55 | self.count = 0 56 | 57 | def update(self, val, n=1): 58 | self.val = val 59 | self.sum += val * n 60 | self.count += n 61 | self.avg = self.sum / self.count 62 | 63 | 64 | class MetricLogger(object): 65 | def __init__(self, delimiter="\t"): 66 | self.meters = defaultdict(SmoothedValue) 67 | self.delimiter = delimiter 68 | 69 | def update(self, **kwargs): 70 | for k, v in kwargs.items(): 71 | if isinstance(v, torch.Tensor): 72 | v = v.item() 73 | assert isinstance(v, (float, int)) 74 | self.meters[k].update(v) 75 | 76 | def __getattr__(self, attr): 77 | if attr in self.meters: 78 | return self.meters[attr] 79 | if attr in self.__dict__: 80 | return self.__dict__[attr] 81 | raise AttributeError("'{}' object has no attribute '{}'".format( 82 | type(self).__name__, attr)) 83 | 84 | def __str__(self): 85 | loss_str = [] 86 | for name, meter in self.meters.items(): 87 | loss_str.append( 88 | "{}: {:.4f} ({:.4f})".format(name, meter.median, meter.global_avg) 89 | ) 90 | return self.delimiter.join(loss_str) 91 | 92 | 93 | # haotian added tensorboard support 94 | class TensorboardLogger(MetricLogger): 95 | def __init__(self, 96 | log_dir, 97 | start_iter=0, 98 | delimiter='\t' 99 | ): 100 | super(TensorboardLogger, self).__init__(delimiter) 101 | self.iteration = start_iter 102 | self.writer = self._get_tensorboard_writer(log_dir) 103 | 104 | @staticmethod 105 | def _get_tensorboard_writer(log_dir): 106 | try: 107 | from tensorboardX import SummaryWriter 108 | except ImportError: 109 | raise ImportError( 110 | 'To use tensorboard please install tensorboardX ' 111 | '[ pip install tensorflow tensorboardX ].' 112 | ) 113 | 114 | if is_main_process(): 115 | # timestamp = datetime.fromtimestamp(time.time()).strftime('%Y%m%d-%H:%M') 116 | tb_logger = SummaryWriter('{}'.format(log_dir)) 117 | return tb_logger 118 | else: 119 | return None 120 | 121 | def update(self, **kwargs): 122 | super(TensorboardLogger, self).update(**kwargs) 123 | if self.writer: 124 | for k, v in kwargs.items(): 125 | if isinstance(v, torch.Tensor): 126 | v = v.item() 127 | assert isinstance(v, (float, int)) 128 | self.writer.add_scalar(k, v, self.iteration) 129 | 130 | self.iteration += 1 131 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/utils/miscellaneous.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import errno 3 | import os 4 | from .comm import is_main_process 5 | 6 | def mkdir(path): 7 | try: 8 | os.makedirs(path) 9 | except OSError as e: 10 | if e.errno != errno.EEXIST: 11 | raise 12 | 13 | 14 | def save_config(cfg, path): 15 | if is_main_process(): 16 | with open(path, 'w') as f: 17 | f.write(cfg.dump()) 18 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/utils/model_zoo.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import os 3 | import sys 4 | 5 | try: 6 | from torch.hub import _download_url_to_file 7 | from torch.hub import urlparse 8 | from torch.hub import HASH_REGEX 9 | except ImportError: 10 | from torch.utils.model_zoo import _download_url_to_file 11 | from torch.utils.model_zoo import urlparse 12 | from torch.utils.model_zoo import HASH_REGEX 13 | 14 | from maskrcnn_benchmark.utils.comm import is_main_process 15 | from maskrcnn_benchmark.utils.comm import synchronize 16 | 17 | 18 | # very similar to https://github.com/pytorch/pytorch/blob/master/torch/utils/model_zoo.py 19 | # but with a few improvements and modifications 20 | def cache_url(url, model_dir='model', progress=True): 21 | r"""Loads the Torch serialized object at the given URL. 22 | If the object is already present in `model_dir`, it's deserialized and 23 | returned. The filename part of the URL should follow the naming convention 24 | ``filename-.ext`` where ```` is the first eight or more 25 | digits of the SHA256 hash of the contents of the file. The hash is used to 26 | ensure unique names and to verify the contents of the file. 27 | The default value of `model_dir` is ``$TORCH_HOME/models`` where 28 | ``$TORCH_HOME`` defaults to ``~/.torch``. The default directory can be 29 | overridden with the ``$TORCH_MODEL_ZOO`` environment variable. 30 | Args: 31 | url (string): URL of the object to download 32 | model_dir (string, optional): directory in which to save the object 33 | progress (bool, optional): whether or not to display a progress bar to stderr 34 | Example: 35 | >>> cached_file = maskrcnn_benchmark.utils.model_zoo.cache_url('https://s3.amazonaws.com/pytorch/models/resnet18-5c106cde.pth') 36 | """ 37 | if model_dir is None: 38 | torch_home = os.path.expanduser(os.getenv("TORCH_HOME", "~/.torch")) 39 | model_dir = os.getenv("TORCH_MODEL_ZOO", os.path.join(torch_home, "models")) 40 | if not os.path.exists(model_dir): 41 | os.makedirs(model_dir, exist_ok=True) 42 | parts = urlparse(url) 43 | filename = os.path.basename(parts.path) 44 | if filename == "model_final.pkl": 45 | # workaround as pre-trained Caffe2 models from Detectron have all the same filename 46 | # so make the full path the filename by replacing / with _ 47 | filename = parts.path.replace("/", "_") 48 | cached_file = os.path.join(model_dir, filename) 49 | if not os.path.exists(cached_file): 50 | sys.stderr.write('Downloading: "{}" to {}\n'.format(url, cached_file)) 51 | hash_prefix = HASH_REGEX.search(filename) 52 | if hash_prefix is not None: 53 | hash_prefix = hash_prefix.group(1) 54 | # workaround: Caffe2 models don't have a hash, but follow the R-50 convention, 55 | # which matches the hash PyTorch uses. So we skip the hash matching 56 | # if the hash_prefix is less than 6 characters 57 | if len(hash_prefix) < 6: 58 | hash_prefix = None 59 | _download_url_to_file(url, cached_file, hash_prefix, progress=progress) 60 | synchronize() 61 | return cached_file 62 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/utils/pretrain_model_loading.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn as nn 4 | 5 | from collections import OrderedDict 6 | 7 | def _remove_bn_statics(state_dict): 8 | layer_keys = sorted(state_dict.keys()) 9 | remove_list = [] 10 | for key in layer_keys: 11 | if 'running_mean' in key or 'running_var' in key or 'num_batches_tracked' in key: 12 | remove_list.append(key) 13 | for key in remove_list: 14 | del state_dict[key] 15 | return state_dict 16 | 17 | def _rename_conv_weights_for_deformable_conv_layers(state_dict, cfg): 18 | import re 19 | layer_keys = sorted(state_dict.keys()) 20 | for ix, stage_with_dcn in enumerate(cfg.MODEL.RESNETS.STAGE_WITH_DCN, 1): 21 | if not stage_with_dcn: 22 | continue 23 | for old_key in layer_keys: 24 | pattern = ".*layer{}.*conv2.*".format(ix) 25 | r = re.match(pattern, old_key) 26 | if r is None: 27 | continue 28 | for param in ["weight", "bias"]: 29 | if old_key.find(param) is -1: 30 | continue 31 | if 'unit01' in old_key: 32 | continue 33 | new_key = old_key.replace( 34 | "conv2.{}".format(param), "conv2.conv.{}".format(param) 35 | ) 36 | print("pattern: {}, old_key: {}, new_key: {}".format( 37 | pattern, old_key, new_key 38 | )) 39 | state_dict[new_key] = state_dict[old_key] 40 | del state_dict[old_key] 41 | return state_dict 42 | 43 | 44 | def load_pretrain_format(cfg, f): 45 | model = torch.load(f) 46 | model = _remove_bn_statics(model) 47 | model = _rename_conv_weights_for_deformable_conv_layers(model, cfg) 48 | 49 | return dict(model=model) 50 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/utils/registry.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | 3 | 4 | def _register_generic(module_dict, module_name, module): 5 | assert module_name not in module_dict 6 | module_dict[module_name] = module 7 | 8 | 9 | class Registry(dict): 10 | ''' 11 | A helper class for managing registering modules, it extends a dictionary 12 | and provides a register functions. 13 | 14 | Eg. creeting a registry: 15 | some_registry = Registry({"default": default_module}) 16 | 17 | There're two ways of registering new modules: 18 | 1): normal way is just calling register function: 19 | def foo(): 20 | ... 21 | some_registry.register("foo_module", foo) 22 | 2): used as decorator when declaring the module: 23 | @some_registry.register("foo_module") 24 | @some_registry.register("foo_modeul_nickname") 25 | def foo(): 26 | ... 27 | 28 | Access of module is just like using a dictionary, eg: 29 | f = some_registry["foo_modeul"] 30 | ''' 31 | def __init__(self, *args, **kwargs): 32 | super(Registry, self).__init__(*args, **kwargs) 33 | 34 | def register(self, module_name, module=None): 35 | # used as function call 36 | if module is not None: 37 | _register_generic(self, module_name, module) 38 | return 39 | 40 | # used as decorator 41 | def register_fn(fn): 42 | _register_generic(self, module_name, fn) 43 | return fn 44 | 45 | return register_fn 46 | -------------------------------------------------------------------------------- /maskrcnn_benchmark/utils/shallow_contrastive_loss_helper.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import maskrcnn_benchmark.utils.dist as dist 3 | 4 | 5 | def normalized_positive_map(positive_map): 6 | positive_map = positive_map.float() 7 | positive_map_num_pos = positive_map.sum(2) 8 | positive_map_num_pos[positive_map_num_pos == 0] = 1e-6 9 | positive_map = positive_map / positive_map_num_pos.unsqueeze(-1) 10 | return positive_map 11 | 12 | 13 | def pad_tensor_given_dim_length(tensor, dim, length, padding_value=0, batch_first=True): 14 | new_size = list(tensor.size()[:dim]) + [length] + list(tensor.size()[dim + 1:]) 15 | out_tensor = tensor.data.new(*new_size).fill_(padding_value) 16 | if batch_first: 17 | out_tensor[:, :tensor.size(1), ...] = tensor 18 | else: 19 | out_tensor[:tensor.size(0), ...] = tensor 20 | return out_tensor 21 | 22 | 23 | def pad_random_negative_tensor_given_length(positive_tensor, negative_padding_tensor, length=None): 24 | assert positive_tensor.shape[0] + negative_padding_tensor.shape[0] == length 25 | return torch.cat((positive_tensor, negative_padding_tensor), dim=0) 26 | 27 | 28 | def gather_tensors(tensor): 29 | """ 30 | Performs all_gather operation on the provided tensors. 31 | *** Warning ***: torch.distributed.all_gather has no gradient. 32 | """ 33 | if not dist.is_dist_avail_and_initialized(): 34 | return torch.stack([tensor], dim=0) 35 | 36 | total = dist.get_world_size() 37 | rank = torch.distributed.get_rank() 38 | # gathered_normalized_img_emb = [torch.zeros_like(normalized_img_emb) for _ in range(total)] 39 | # torch.distributed.all_gather(gathered_normalized_img_emb, normalized_img_emb) 40 | 41 | tensors_gather = [ 42 | torch.zeros_like(tensor) 43 | for _ in range(total) 44 | ] 45 | torch.distributed.all_gather(tensors_gather, tensor, async_op=False) 46 | 47 | # need to do this to restore propagation of the gradients 48 | tensors_gather[rank] = tensor 49 | output = torch.stack(tensors_gather, dim=0) 50 | return output 51 | 52 | 53 | def convert_to_roi_format(boxes): 54 | concat_boxes = boxes.bbox 55 | device, dtype = concat_boxes.device, concat_boxes.dtype 56 | ids = torch.full((len(boxes), 1), 0, dtype=dtype, device=device) 57 | rois = torch.cat([ids, concat_boxes], dim=1) 58 | return rois -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | certifi 2 | charset-normalizer==2.0.12 3 | click==8.1.3 4 | cycler==0.11.0 5 | docopt==0.6.2 6 | einops==0.4.1 7 | filelock==3.7.1 8 | fonttools==4.33.3 9 | fsspec==2023.5.0 10 | ftfy==6.1.1 11 | huggingface-hub==0.15.1 12 | idna==3.3 13 | inflect==5.6.0 14 | joblib==1.1.0 15 | kiwisolver==1.4.3 16 | matplotlib==3.5.2 17 | mkl-fft==1.3.1 18 | mkl-random 19 | mkl-service==2.4.0 20 | nltk==3.7 21 | numpy 22 | opencv-python==4.6.0.66 23 | packaging==21.3 24 | Pillow==9.0.1 25 | pipreqs==0.4.13 26 | prettytable==3.3.0 27 | protobuf==3.20.1 28 | pycocotools==2.0.4 29 | pymongo==4.1.1 30 | pyparsing==3.0.9 31 | python-dateutil==2.8.2 32 | PyYAML==6.0 33 | regex==2022.6.2 34 | requests==2.28.0 35 | sacremoses==0.0.53 36 | scipy==1.8.1 37 | Shapely==1.8.2 38 | six 39 | tensorboardX==2.5.1 40 | timm==0.5.4 41 | tokenizers==0.13.3 42 | torch==1.9.1 43 | torchaudio==0.9.0a0+a85b239 44 | torchvision==0.10.1 45 | tqdm==4.64.0 46 | transformers 47 | typing_extensions 48 | urllib3==1.26.9 49 | wcwidth==0.2.5 50 | yacs==0.1.8 51 | yarg==0.1.9 52 | --------------------------------------------------------------------------------