├── fsdet ├── model_zoo │ ├── configs │ └── __init__.py ├── utils │ ├── __init__.py │ ├── README.md │ ├── serialize.py │ ├── registry.py │ ├── collect_env.py │ ├── env.py │ ├── colormap.py │ └── video_visualizer.py ├── layers │ ├── csrc │ │ ├── README.md │ │ ├── cuda_version.cu │ │ ├── box_iou_rotated │ │ │ ├── box_iou_rotated.h │ │ │ ├── box_iou_rotated_cpu.cpp │ │ │ └── box_iou_rotated_cuda.cu │ │ ├── nms_rotated │ │ │ ├── nms_rotated.h │ │ │ ├── nms_rotated_cpu.cpp │ │ │ └── nms_rotated_cuda.cu │ │ ├── vision.cpp │ │ ├── ROIAlignRotated │ │ │ └── ROIAlignRotated.h │ │ └── ROIAlign │ │ │ └── ROIAlign.h │ ├── __init__.py │ ├── shape_spec.py │ ├── rotated_boxes.py │ ├── roi_align_rotated.py │ └── roi_align.py ├── __init__.py ├── modeling │ ├── proposal_generator │ │ ├── __init__.py │ │ ├── build.py │ │ └── proposal_utils.py │ ├── roi_heads │ │ └── __init__.py │ ├── meta_arch │ │ ├── __init__.py │ │ └── build.py │ ├── backbone │ │ ├── __init__.py │ │ ├── build.py │ │ └── backbone.py │ ├── utils.py │ ├── __init__.py │ ├── postprocessing.py │ └── sampling.py ├── data │ ├── transforms │ │ ├── __init__.py │ │ └── transform.py │ ├── datasets │ │ ├── README.md │ │ ├── __init__.py │ │ ├── pascal_voc.py │ │ └── rfs.py │ ├── samplers │ │ ├── __init__.py │ │ └── grouped_batch_sampler.py │ ├── __init__.py │ └── common.py ├── solver │ ├── __init__.py │ ├── masked_sgd.py │ ├── build.py │ └── lr_scheduler.py ├── config │ ├── __init__.py │ └── config.py ├── structures │ ├── __init__.py │ └── image_list.py ├── checkpoint │ ├── __init__.py │ ├── detection_checkpoint.py │ └── catalog.py ├── engine │ ├── __init__.py │ └── launch.py └── evaluation │ ├── __init__.py │ └── testing.py ├── Commitment.pdf ├── scripts ├── train_base.sh ├── rand_weight.sh ├── remove.sh ├── create_config.sh └── test.sh ├── configs ├── RFS │ ├── base-training │ │ ├── R101_FPN_base_training_split2.yml │ │ ├── R101_FPN_base_training_split3.yml │ │ └── R101_FPN_base_training_split1.yml │ ├── split1 │ │ ├── 2shot_GPB_PFB_proloss.yml │ │ ├── 3shot_GPB_PFB_proloss.yml │ │ ├── 5shot_GPB_PFB_proloss.yml │ │ ├── 10shot_GPB_PFB_proloss.yml │ │ └── 1shot_GPB_PFB_proloss.yml │ ├── split2 │ │ ├── 2shot_GPB_PFB_proloss.yml │ │ ├── 1shot_GPB_PFB_proloss.yml │ │ ├── 3shot_GPB_PFB_proloss.yml │ │ ├── 10shot_GPB_PFB_proloss.yml │ │ └── 5shot_GPB_PFB_proloss.yml │ └── split3 │ │ ├── 2shot_GPB_PFB_proloss.yml │ │ ├── 3shot_GPB_PFB_proloss.yml │ │ ├── 5shot_GPB_PFB_proloss.yml │ │ ├── 10shot_GPB_PFB_proloss.yml │ │ └── 1shot_GPB_PFB_proloss.yml ├── PASCAL_VOC │ ├── base-training │ │ ├── R101_FPN_base_training_split2.yml │ │ ├── R101_FPN_base_training_split3.yml │ │ └── R101_FPN_base_training_split1.yml │ ├── split2 │ │ ├── 2shot_GPB_PFB_proloss.yml │ │ ├── 5shot_GPB_PFB_proloss.yml │ │ ├── 10shot_GPB_PFB_proloss.yml │ │ ├── 1shot_GPB_PFB_proloss.yml │ │ └── 3shot_GPB_PFB_proloss.yml │ ├── split3 │ │ ├── 2shot_GPB_PFB_proloss.yml │ │ ├── 3shot_GPB_PFB_proloss.yml │ │ ├── 10shot_GPB_PFB_proloss.yml │ │ ├── 1shot_GPB_PFB_proloss.yml │ │ └── 5shot_GPB_PFB_proloss.yml │ └── split1 │ │ ├── 2shot_GPB_PFB_proloss.yml │ │ ├── 3shot_GPB_PFB_proloss.yml │ │ ├── 5shot_GPB_PFB_proloss.yml │ │ ├── 10shot_GPB_PFB_proloss.yml │ │ └── 1shot_GPB_PFB_proloss.yml └── Base-RCNN-FPN.yaml ├── setup.cfg ├── run_rfs.sh ├── run_voc.sh ├── tools ├── visualize_json_results.py ├── visualize_data.py ├── train_net.py └── aggregate_seeds.py ├── .gitignore ├── setup.py └── demo └── demo.py /fsdet/model_zoo/configs: -------------------------------------------------------------------------------- 1 | /home/wtb/2022ECCV/Ours/configs -------------------------------------------------------------------------------- /Commitment.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wytbwytb/WEN/HEAD/Commitment.pdf -------------------------------------------------------------------------------- /fsdet/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | -------------------------------------------------------------------------------- /scripts/train_base.sh: -------------------------------------------------------------------------------- 1 | python3 tools/train_net.py --num-gpus 3 \ 2 | --config-file configs/RFS/base-training/R101_FPN_base_training_split3.yml 3 | 4 | -------------------------------------------------------------------------------- /fsdet/layers/csrc/README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | To add a new Op: 4 | 5 | 1. Create a new directory 6 | 2. Implement new ops there 7 | 3. Delcare its Python interface in `vision.cpp`. 8 | -------------------------------------------------------------------------------- /fsdet/layers/csrc/cuda_version.cu: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | namespace fsdet { 4 | int get_cudart_version() { 5 | return CUDART_VERSION; 6 | } 7 | } // namespace fsdet 8 | -------------------------------------------------------------------------------- /fsdet/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | 3 | from .utils.env import setup_environment 4 | 5 | setup_environment() 6 | 7 | 8 | __version__ = "0.1" 9 | -------------------------------------------------------------------------------- /fsdet/utils/README.md: -------------------------------------------------------------------------------- 1 | # Utility functions 2 | 3 | This folder contain utility functions that are not used in the 4 | core library, but are useful for building models or training 5 | code using the config system. 6 | -------------------------------------------------------------------------------- /fsdet/modeling/proposal_generator/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | from .build import PROPOSAL_GENERATOR_REGISTRY, build_proposal_generator 3 | from .rpn import RPN_HEAD_REGISTRY, build_rpn_head 4 | -------------------------------------------------------------------------------- /fsdet/data/transforms/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | from .transform import * 3 | from fvcore.transforms.transform import * 4 | from .transform_gen import * 5 | 6 | __all__ = [k for k in globals().keys() if not k.startswith("_")] 7 | -------------------------------------------------------------------------------- /fsdet/solver/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | from .build import build_lr_scheduler, build_optimizer 3 | from .lr_scheduler import WarmupCosineLR, WarmupMultiStepLR 4 | 5 | __all__ = [k for k in globals().keys() if not k.startswith("_")] 6 | -------------------------------------------------------------------------------- /scripts/rand_weight.sh: -------------------------------------------------------------------------------- 1 | python3 tools/ckpt_surgery.py \ 2 | --src1 /media/datasets/gpu17_models/FSCE/checkpoints/rfs/faster_rcnn/faster_rcnn_R_101_FPN_base3/model_final.pth \ 3 | --method randinit \ 4 | --save-dir /media/datasets/gpu17_models/FSCE/checkpoints/rfs/faster_rcnn/faster_rcnn_R_101_FPN_base3 5 | -------------------------------------------------------------------------------- /scripts/remove.sh: -------------------------------------------------------------------------------- 1 | python3 -m tools.ckpt_surgery \ 2 | --src1 /media/datasets/gpu17_models/TFA/checkpoints/rfs/faster_rcnn/faster_rcnn_R_101_FPN_base2/model_final.pth \ 3 | --method remove \ 4 | --save-dir /media/datasets/gpu17_models/TFA/checkpoints/rfs/faster_rcnn/faster_rcnn_R_101_FPN_all2 -------------------------------------------------------------------------------- /fsdet/modeling/roi_heads/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | from .box_head import ROI_BOX_HEAD_REGISTRY, build_box_head 3 | from .roi_heads import ( 4 | ROI_HEADS_REGISTRY, 5 | ROIHeads, 6 | StandardROIHeads, 7 | build_roi_heads, 8 | select_foreground_proposals, 9 | ) 10 | -------------------------------------------------------------------------------- /fsdet/modeling/meta_arch/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 3 | 4 | from .build import META_ARCH_REGISTRY, build_model # isort:skip 5 | 6 | # import all the meta_arch, so they will be registered 7 | from .rcnn import GeneralizedRCNN, ProposalNetwork 8 | from .retinanet import RetinaNet 9 | -------------------------------------------------------------------------------- /scripts/create_config.sh: -------------------------------------------------------------------------------- 1 | for split in 2 2 | do 3 | for seed in 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 4 | do 5 | for shot in 1 2 3 5 10 6 | do 7 | python3 tools/create_config.py --dataset voc --config_root configs/RFS-detection \ 8 | --shot ${shot} --seed ${seed} --split ${split} 9 | done 10 | done 11 | done -------------------------------------------------------------------------------- /fsdet/config/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | from .compat import downgrade_config, upgrade_config 3 | from .config import CfgNode, get_cfg, global_cfg, set_global_cfg 4 | 5 | __all__ = [ 6 | "CfgNode", 7 | "get_cfg", 8 | "global_cfg", 9 | "set_global_cfg", 10 | "downgrade_config", 11 | "upgrade_config", 12 | ] 13 | -------------------------------------------------------------------------------- /fsdet/modeling/backbone/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | from .build import build_backbone, BACKBONE_REGISTRY # noqa F401 isort:skip 3 | 4 | from .backbone import Backbone 5 | from .fpn import FPN 6 | from .resnet import ResNet, ResNetBlockBase, build_resnet_backbone, make_stage 7 | 8 | # TODO can expose more resnet blocks after careful consideration 9 | -------------------------------------------------------------------------------- /fsdet/structures/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | from .boxes import Boxes, BoxMode, pairwise_iou 3 | from .image_list import ImageList 4 | from .instances import Instances 5 | from .rotated_boxes import RotatedBoxes 6 | from .rotated_boxes import pairwise_iou as pairwise_iou_rotated 7 | 8 | __all__ = [k for k in globals().keys() if not k.startswith("_")] 9 | -------------------------------------------------------------------------------- /fsdet/data/datasets/README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | ### Common Datasets 4 | 5 | The dataset implemented here do not need to load the data into the final format. 6 | It should provide the minimal data structure needed to use the dataset, so it can be very efficient. 7 | 8 | For example, for an image dataset, just provide the file names and labels, but don't read the images. 9 | Let the downstream decide how to read. 10 | -------------------------------------------------------------------------------- /fsdet/data/samplers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | from .distributed_sampler import InferenceSampler, RepeatFactorTrainingSampler, TrainingSampler 3 | from .grouped_batch_sampler import GroupedBatchSampler 4 | 5 | __all__ = [ 6 | "GroupedBatchSampler", 7 | "TrainingSampler", 8 | "InferenceSampler", 9 | "RepeatFactorTrainingSampler", 10 | ] 11 | -------------------------------------------------------------------------------- /fsdet/data/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | from .coco import load_coco_json 3 | from .lvis import load_lvis_json, register_lvis_instances 4 | from .register_coco import register_coco_instances 5 | from . import builtin # ensure the builtin datasets are registered 6 | 7 | 8 | __all__ = [k for k in globals().keys() if "builtin" not in k and not k.startswith("_")] 9 | -------------------------------------------------------------------------------- /fsdet/checkpoint/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 3 | # File: 4 | 5 | 6 | from . import catalog as _UNUSED # register the handler 7 | from .detection_checkpoint import DetectionCheckpointer 8 | from fvcore.common.checkpoint import Checkpointer, PeriodicCheckpointer 9 | 10 | __all__ = ["Checkpointer", "PeriodicCheckpointer", "DetectionCheckpointer"] 11 | -------------------------------------------------------------------------------- /fsdet/engine/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | 3 | from .launch import * 4 | from .train_loop import * 5 | 6 | __all__ = [k for k in globals().keys() if not k.startswith("_")] 7 | 8 | 9 | # prefer to let hooks and defaults live in separate namespaces (therefore not in __all__) 10 | # but still make them available here 11 | from .hooks import * 12 | from .defaults import * 13 | -------------------------------------------------------------------------------- /scripts/test.sh: -------------------------------------------------------------------------------- 1 | python tools/test_net.py --num-gpus 1 \ 2 | --config-file configs/PASCAL_VOC/split2/5shot_GPB_PFB_proloss.yml \ 3 | --eval-only \ 4 | MODEL.WEIGHTS /media/datasets/gpu17_models/Ours/checkpoints/voc/faster_rcnn/split2_5shot_GPB_PFB_proloss/428.pth \ 5 | MODEL.ROI_HEADS.NOVEL_MODULE.INIT_FEATURE_WEIGHT /media/datasets/gpu17_models/Ours/checkpoints/voc/faster_rcnn/split2_5shot_GPB_PFB_proloss/428.pkl 6 | -------------------------------------------------------------------------------- /fsdet/model_zoo/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | """ 3 | Model Zoo API for FsDet: a collection of functions to create common model architectures and 4 | optionally load pre-trained weights as released in 5 | `MODEL_ZOO.md `_. 6 | """ 7 | from .model_zoo import get, get_config_file, get_checkpoint_url 8 | 9 | __all__ = ["get_checkpoint_url", "get", "get_config_file"] 10 | -------------------------------------------------------------------------------- /fsdet/evaluation/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | from .coco_evaluation import COCOEvaluator 3 | from .evaluator import DatasetEvaluator, DatasetEvaluators, inference_context, inference_on_dataset 4 | from .lvis_evaluation import LVISEvaluator 5 | from .pascal_voc_evaluation import PascalVOCDetectionEvaluator 6 | from .rfs_evaluation import RFSDetectionEvaluator 7 | from .testing import print_csv_format, verify_results 8 | 9 | __all__ = [k for k in globals().keys() if not k.startswith("_")] 10 | -------------------------------------------------------------------------------- /fsdet/layers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | from .batch_norm import FrozenBatchNorm2d, get_norm, NaiveSyncBatchNorm 3 | from .deform_conv import DeformConv, ModulatedDeformConv 4 | from .nms import batched_nms, batched_nms_rotated, nms, nms_rotated 5 | from .roi_align import ROIAlign, roi_align 6 | from .roi_align_rotated import ROIAlignRotated, roi_align_rotated 7 | from .shape_spec import ShapeSpec 8 | from .wrappers import BatchNorm2d, Conv2d, ConvTranspose2d, cat, interpolate 9 | 10 | __all__ = [k for k in globals().keys() if not k.startswith("_")] 11 | -------------------------------------------------------------------------------- /fsdet/data/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | from . import transforms # isort:skip 3 | 4 | from .build import ( 5 | build_detection_test_loader, 6 | build_detection_train_loader, 7 | get_detection_dataset_dicts, 8 | print_instances_class_histogram, 9 | ) 10 | from .catalog import DatasetCatalog, MetadataCatalog 11 | from .common import DatasetFromList, MapDataset 12 | from .dataset_mapper import DatasetMapper 13 | 14 | # ensure the builtin datasets are registered 15 | from . import datasets, samplers # isort:skip 16 | 17 | __all__ = [k for k in globals().keys() if not k.startswith("_")] 18 | -------------------------------------------------------------------------------- /fsdet/modeling/meta_arch/build.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | from fsdet.utils.registry import Registry 3 | 4 | META_ARCH_REGISTRY = Registry("META_ARCH") # noqa F401 isort:skip 5 | META_ARCH_REGISTRY.__doc__ = """ 6 | Registry for meta-architectures, i.e. the whole model. 7 | 8 | The registered object will be called with `obj(cfg)` 9 | and expected to return a `nn.Module` object. 10 | """ 11 | 12 | 13 | def build_model(cfg): 14 | """ 15 | Built the whole model, defined by `cfg.MODEL.META_ARCHITECTURE`. 16 | """ 17 | meta_arch = cfg.MODEL.META_ARCHITECTURE 18 | return META_ARCH_REGISTRY.get(meta_arch)(cfg) 19 | -------------------------------------------------------------------------------- /configs/RFS/base-training/R101_FPN_base_training_split2.yml: -------------------------------------------------------------------------------- 1 | _BASE_: "../../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" 4 | MASK_ON: False 5 | RESNETS: 6 | DEPTH: 101 7 | ROI_HEADS: 8 | NUM_CLASSES: 15 9 | INPUT: 10 | MIN_SIZE_TRAIN: (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800) 11 | MIN_SIZE_TEST: 800 12 | DATASETS: 13 | TRAIN: ('rfs_trainval_base2',) 14 | TEST: ('rfs_test_base2',) 15 | SOLVER: 16 | IMS_PER_BATCH: 12 17 | STEPS: (16000, 21000) 18 | MAX_ITER: 24000 # 17.4 epochs 19 | WARMUP_ITERS: 100 20 | OUTPUT_DIR: "/media/datasets/gpu17_models/FSCE/checkpoints/rfs/faster_rcnn/faster_rcnn_R_101_FPN_base2" -------------------------------------------------------------------------------- /configs/RFS/base-training/R101_FPN_base_training_split3.yml: -------------------------------------------------------------------------------- 1 | _BASE_: "../../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" 4 | MASK_ON: False 5 | RESNETS: 6 | DEPTH: 101 7 | ROI_HEADS: 8 | NUM_CLASSES: 15 9 | INPUT: 10 | MIN_SIZE_TRAIN: (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800) 11 | MIN_SIZE_TEST: 800 12 | DATASETS: 13 | TRAIN: ('rfs_trainval_base3',) 14 | TEST: ('rfs_test_base3',) 15 | SOLVER: 16 | IMS_PER_BATCH: 12 17 | STEPS: (16000, 21000) 18 | MAX_ITER: 24000 # 17.4 epochs 19 | WARMUP_ITERS: 100 20 | OUTPUT_DIR: "/media/datasets/gpu17_models/FSCE/checkpoints/rfs/faster_rcnn/faster_rcnn_R_101_FPN_base3" 21 | -------------------------------------------------------------------------------- /configs/RFS/base-training/R101_FPN_base_training_split1.yml: -------------------------------------------------------------------------------- 1 | _BASE_: "../../Base-RCNN-FPN.yaml" 2 | 3 | MODEL: 4 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" 5 | MASK_ON: False 6 | RESNETS: 7 | DEPTH: 101 8 | ROI_HEADS: 9 | NUM_CLASSES: 15 10 | INPUT: 11 | MIN_SIZE_TRAIN: (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800) 12 | MIN_SIZE_TEST: 800 13 | DATASETS: 14 | TRAIN: ('rfs_trainval_base1',) 15 | TEST: ('rfs_test_base1',) 16 | SOLVER: 17 | IMS_PER_BATCH: 12 18 | STEPS: (16000, 21000) 19 | MAX_ITER: 24000 # 17.4 epochs 20 | WARMUP_ITERS: 100 21 | OUTPUT_DIR: "/media/datasets/gpu17_models/FSCE/checkpoints/rfs/faster_rcnn/faster_rcnn_R_101_FPN_base1" 22 | -------------------------------------------------------------------------------- /fsdet/layers/shape_spec.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 3 | from collections import namedtuple 4 | 5 | 6 | class ShapeSpec(namedtuple("_ShapeSpec", ["channels", "height", "width", "stride"])): 7 | """ 8 | A simple structure that contains basic shape specification about a tensor. 9 | It is often used as the auxiliary inputs/outputs of models, 10 | to obtain the shape inference ability among pytorch modules. 11 | 12 | Attributes: 13 | channels: 14 | height: 15 | width: 16 | stride: 17 | """ 18 | 19 | def __new__(cls, *, channels=None, height=None, width=None, stride=None): 20 | return super().__new__(cls, channels, height, width, stride) 21 | -------------------------------------------------------------------------------- /fsdet/layers/rotated_boxes.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | from __future__ import absolute_import, division, print_function, unicode_literals 3 | 4 | # import torch 5 | from fsdet import _C 6 | 7 | 8 | def pairwise_iou_rotated(boxes1, boxes2): 9 | """ 10 | Return intersection-over-union (Jaccard index) of boxes. 11 | 12 | Both sets of boxes are expected to be in 13 | (x_center, y_center, width, height, angle) format. 14 | 15 | Arguments: 16 | boxes1 (Tensor[N, 5]) 17 | boxes2 (Tensor[M, 5]) 18 | 19 | Returns: 20 | iou (Tensor[N, M]): the NxM matrix containing the pairwise 21 | IoU values for every element in boxes1 and boxes2 22 | """ 23 | 24 | return _C.box_iou_rotated(boxes1, boxes2) 25 | -------------------------------------------------------------------------------- /configs/PASCAL_VOC/base-training/R101_FPN_base_training_split2.yml: -------------------------------------------------------------------------------- 1 | _BASE_: "../../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" 4 | MASK_ON: False 5 | RESNETS: 6 | DEPTH: 101 7 | ROI_HEADS: 8 | NUM_CLASSES: 15 9 | INPUT: 10 | MIN_SIZE_TRAIN: (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800) 11 | MIN_SIZE_TEST: 800 12 | DATASETS: 13 | TRAIN: ('voc_2007_trainval_base2', 'voc_2012_trainval_base2') 14 | TEST: ('voc_2007_test_base2',) 15 | SOLVER: 16 | IMS_PER_BATCH: 12 17 | STEPS: (24000, 32000) 18 | MAX_ITER: 36000 # 17.4 epochs 19 | WARMUP_ITERS: 100 20 | TEST: 21 | AUG: 22 | ENABLED: True 23 | EVAL_PERIOD: 5000 24 | OUTPUT_DIR: "/media/datasets/gpu17_models/Ours/checkpoints/voc/faster_rcnn/faster_rcnn_R_101_FPN_base2" 25 | -------------------------------------------------------------------------------- /configs/PASCAL_VOC/base-training/R101_FPN_base_training_split3.yml: -------------------------------------------------------------------------------- 1 | _BASE_: "../../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" 4 | MASK_ON: False 5 | RESNETS: 6 | DEPTH: 101 7 | ROI_HEADS: 8 | NUM_CLASSES: 15 9 | INPUT: 10 | MIN_SIZE_TRAIN: (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800) 11 | MIN_SIZE_TEST: 800 12 | DATASETS: 13 | TRAIN: ('voc_2007_trainval_base3', 'voc_2012_trainval_base3') 14 | TEST: ('voc_2007_test_base3',) 15 | SOLVER: 16 | IMS_PER_BATCH: 12 17 | STEPS: (24000, 32000) 18 | MAX_ITER: 36000 # 17.4 epochs 19 | WARMUP_ITERS: 100 20 | TEST: 21 | AUG: 22 | ENABLED: True 23 | EVAL_PERIOD: 5000 24 | OUTPUT_DIR: "/media/datasets/gpu17_models/Ours/checkpoints/voc/faster_rcnn/faster_rcnn_R_101_FPN_base3" 25 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [isort] 2 | line_length=100 3 | multi_line_output=4 4 | known_standard_library=numpy,setuptools 5 | known_myself=fsdet 6 | known_third_party=fvcore,matplotlib,cv2,torch,torchvision,PIL,pycocotools,yacs,termcolor,cityscapesscripts,tabulate,tqdm,scipy,lvis,psutil 7 | no_lines_before=STDLIB,THIRDPARTY 8 | sections=FUTURE,STDLIB,THIRDPARTY,myself,FIRSTPARTY,LOCALFOLDER 9 | default_section=FIRSTPARTY 10 | 11 | [mypy] 12 | python_version=3.6 13 | ignore_missing_imports = True 14 | warn_unused_configs = True 15 | disallow_untyped_defs = True 16 | check_untyped_defs = True 17 | warn_unused_ignores = True 18 | warn_redundant_casts = True 19 | show_column_numbers = True 20 | follow_imports = silent 21 | allow_redefinition = True 22 | ; Require all functions to be annotated 23 | disallow_incomplete_defs = True 24 | -------------------------------------------------------------------------------- /configs/PASCAL_VOC/base-training/R101_FPN_base_training_split1.yml: -------------------------------------------------------------------------------- 1 | _BASE_: "../../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" 4 | MASK_ON: False 5 | RESNETS: 6 | DEPTH: 101 7 | ROI_HEADS: 8 | NUM_CLASSES: 15 9 | INPUT: 10 | MIN_SIZE_TRAIN: (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800) 11 | MIN_SIZE_TEST: 800 12 | DATASETS: 13 | TRAIN: ('voc_2007_trainval_base1', 'voc_2012_trainval_base1') 14 | TEST: ('voc_2007_test_base1',) 15 | SOLVER: 16 | IMS_PER_BATCH: 12 17 | STEPS: (24000, 32000) 18 | MAX_ITER: 36000 # 17.4 epochs 19 | WARMUP_ITERS: 100 20 | TEST: 21 | AUG: 22 | ENABLED: True 23 | EVAL_PERIOD: 5000 24 | OUTPUT_DIR: "/media/datasets/gpu17_models/Ours/checkpoints/voc_outline/faster_rcnn/faster_rcnn_R_101_FPN_base1" 25 | -------------------------------------------------------------------------------- /fsdet/modeling/proposal_generator/build.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | from fsdet.utils.registry import Registry 3 | 4 | PROPOSAL_GENERATOR_REGISTRY = Registry("PROPOSAL_GENERATOR") 5 | PROPOSAL_GENERATOR_REGISTRY.__doc__ = """ 6 | Registry for proposal generator, which produces object proposals from feature maps. 7 | 8 | The registered object will be called with `obj(cfg, input_shape)`. 9 | The call should return a `nn.Module` object. 10 | """ 11 | 12 | from . import rpn # noqa F401 isort:skip 13 | 14 | 15 | def build_proposal_generator(cfg, input_shape): 16 | """ 17 | Build a proposal generator from `cfg.MODEL.PROPOSAL_GENERATOR.NAME`. 18 | The name can be "PrecomputedProposals" to use no proposal generator. 19 | """ 20 | name = cfg.MODEL.PROPOSAL_GENERATOR.NAME 21 | if name == "PrecomputedProposals": 22 | return None 23 | 24 | return PROPOSAL_GENERATOR_REGISTRY.get(name)(cfg, input_shape) 25 | -------------------------------------------------------------------------------- /fsdet/layers/csrc/box_iou_rotated/box_iou_rotated.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | #pragma once 3 | #include 4 | 5 | namespace fsdet { 6 | 7 | at::Tensor box_iou_rotated_cpu( 8 | const at::Tensor& boxes1, 9 | const at::Tensor& boxes2); 10 | 11 | #ifdef WITH_CUDA 12 | at::Tensor box_iou_rotated_cuda( 13 | const at::Tensor& boxes1, 14 | const at::Tensor& boxes2); 15 | #endif 16 | 17 | // Interface for Python 18 | // inline is needed to prevent multiple function definitions when this header is 19 | // included by different cpps 20 | inline at::Tensor box_iou_rotated( 21 | const at::Tensor& boxes1, 22 | const at::Tensor& boxes2) { 23 | assert(boxes1.device().is_cuda() == boxes2.device().is_cuda()); 24 | if (boxes1.device().is_cuda()) { 25 | #ifdef WITH_CUDA 26 | return box_iou_rotated_cuda(boxes1, boxes2); 27 | #else 28 | AT_ERROR("Not compiled with GPU support"); 29 | #endif 30 | } 31 | 32 | return box_iou_rotated_cpu(boxes1, boxes2); 33 | } 34 | 35 | } // namespace fsdet 36 | -------------------------------------------------------------------------------- /fsdet/utils/serialize.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | import cloudpickle 3 | 4 | 5 | class PicklableWrapper(object): 6 | """ 7 | Wrap an object to make it more picklable, note that it uses 8 | heavy weight serialization libraries that are slower than pickle. 9 | It's best to use it only on closures (which are usually not picklable). 10 | 11 | This is a simplified version of 12 | https://github.com/joblib/joblib/blob/master/joblib/externals/loky/cloudpickle_wrapper.py 13 | """ 14 | 15 | def __init__(self, obj): 16 | self._obj = obj 17 | 18 | def __reduce__(self): 19 | s = cloudpickle.dumps(self._obj) 20 | return cloudpickle.loads, (s,) 21 | 22 | def __call__(self, *args, **kwargs): 23 | return self._obj(*args, **kwargs) 24 | 25 | def __getattr__(self, attr): 26 | # Ensure that the wrapped object can be used seamlessly as the previous object. 27 | if attr not in ["_obj"]: 28 | return getattr(self._obj, attr) 29 | return getattr(self, attr) 30 | -------------------------------------------------------------------------------- /fsdet/layers/csrc/nms_rotated/nms_rotated.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | #pragma once 3 | #include 4 | 5 | namespace fsdet { 6 | 7 | at::Tensor nms_rotated_cpu( 8 | const at::Tensor& dets, 9 | const at::Tensor& scores, 10 | const float iou_threshold); 11 | 12 | #ifdef WITH_CUDA 13 | at::Tensor nms_rotated_cuda( 14 | const at::Tensor& dets, 15 | const at::Tensor& scores, 16 | const float iou_threshold); 17 | #endif 18 | 19 | // Interface for Python 20 | // inline is needed to prevent multiple function definitions when this header is 21 | // included by different cpps 22 | inline at::Tensor nms_rotated( 23 | const at::Tensor& dets, 24 | const at::Tensor& scores, 25 | const float iou_threshold) { 26 | assert(dets.device().is_cuda() == scores.device().is_cuda()); 27 | if (dets.device().is_cuda()) { 28 | #ifdef WITH_CUDA 29 | return nms_rotated_cuda(dets, scores, iou_threshold); 30 | #else 31 | AT_ERROR("Not compiled with GPU support"); 32 | #endif 33 | } 34 | 35 | return nms_rotated_cpu(dets, scores, iou_threshold); 36 | } 37 | 38 | } // namespace fsdet 39 | -------------------------------------------------------------------------------- /fsdet/modeling/backbone/build.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | from fsdet.layers import ShapeSpec 3 | from fsdet.utils.registry import Registry 4 | 5 | from .backbone import Backbone 6 | 7 | BACKBONE_REGISTRY = Registry("BACKBONE") 8 | BACKBONE_REGISTRY.__doc__ = """ 9 | Registry for backbones, which extract feature maps from images 10 | 11 | The registered object must be a callable that accepts two arguments: 12 | 13 | 1. A :class:`fsdet.config.CfgNode` 14 | 2. A :class:`fsdet.layers.ShapeSpec`, which contains the input shape specification. 15 | 16 | It must returns an instance of :class:`Backbone`. 17 | """ 18 | 19 | 20 | def build_backbone(cfg, input_shape=None): 21 | """ 22 | Build a backbone from `cfg.MODEL.BACKBONE.NAME`. 23 | 24 | Returns: 25 | an instance of :class:`Backbone` 26 | """ 27 | if input_shape is None: 28 | input_shape = ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN)) 29 | 30 | backbone_name = cfg.MODEL.BACKBONE.NAME 31 | backbone = BACKBONE_REGISTRY.get(backbone_name)(cfg, input_shape) 32 | assert isinstance(backbone, Backbone) 33 | return backbone 34 | -------------------------------------------------------------------------------- /fsdet/modeling/utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | @torch.no_grad() 6 | def concat_all_gathered(tensor): 7 | """gather and concat tensor from all GPUs""" 8 | gathered = [torch.ones_like(tensor) for _ in range(torch.distributed.get_world_size())] 9 | torch.distributed.all_gather(gathered, tensor) 10 | output = torch.cat(gathered, dim=0) 11 | return output 12 | 13 | @torch.no_grad() 14 | def select_all_gather(tensor, idx): 15 | """ 16 | args: 17 | idx (LongTensor), 0s and 1s. 18 | Performs all_gather operation on the provided tensors sliced by idx. 19 | """ 20 | world_size = torch.distributed.get_world_size() 21 | 22 | tensors_gather = [torch.ones_like(tensor) for _ in range(world_size)] 23 | torch.distributed.all_gather(tensors_gather, tensor, async_op=False) 24 | output = torch.cat(tensors_gather, dim=0) 25 | 26 | idx_gather = [torch.ones_like(idx) for _ in range(world_size)] 27 | torch.distributed.all_gather(idx_gather, idx, async_op=False) 28 | idx_gather = torch.cat(idx_gather , dim=0) 29 | keep = torch.where(idx_gather) 30 | return output[keep] -------------------------------------------------------------------------------- /configs/Base-RCNN-FPN.yaml: -------------------------------------------------------------------------------- 1 | VERSION: 2 2 | MODEL: 3 | META_ARCHITECTURE: "GeneralizedRCNN" 4 | BACKBONE: 5 | NAME: "build_resnet_fpn_backbone" 6 | RESNETS: 7 | OUT_FEATURES: ["res2", "res3", "res4", "res5"] 8 | FPN: 9 | IN_FEATURES: ["res2", "res3", "res4", "res5"] 10 | ANCHOR_GENERATOR: 11 | SIZES: [[32], [64], [128], [256], [512]] # One size for each in feature map 12 | ASPECT_RATIOS: [[0.5, 1.0, 2.0]] # Three aspect ratios (same for all in feature maps) 13 | RPN: 14 | IN_FEATURES: ["p2", "p3", "p4", "p5", "p6"] 15 | PRE_NMS_TOPK_TRAIN: 2000 # Per FPN level 16 | PRE_NMS_TOPK_TEST: 1000 # Per FPN level 17 | # Detectron1 uses 2000 proposals per-batch, 18 | # (See "modeling/rpn/rpn_outputs.py" for details of this legacy issue) 19 | # which is approximately 1000 proposals per-image since the default batch size for FPN is 2. 20 | POST_NMS_TOPK_TRAIN: 1000 21 | POST_NMS_TOPK_TEST: 1000 22 | ROI_HEADS: 23 | NAME: "StandardROIHeads" 24 | IN_FEATURES: ["p2", "p3", "p4", "p5"] 25 | ROI_BOX_HEAD: 26 | NAME: "FastRCNNConvFCHead" 27 | NUM_FC: 2 28 | POOLER_RESOLUTION: 7 29 | SOLVER: 30 | IMS_PER_BATCH: 8 31 | BASE_LR: 0.02 32 | STEPS: (60000, 80000) 33 | MAX_ITER: 90000 34 | INPUT: 35 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) 36 | -------------------------------------------------------------------------------- /fsdet/modeling/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | import torch 3 | 4 | from fsdet.layers import ShapeSpec 5 | 6 | from .anchor_generator import build_anchor_generator, ANCHOR_GENERATOR_REGISTRY 7 | from .backbone import ( 8 | BACKBONE_REGISTRY, 9 | FPN, 10 | Backbone, 11 | ResNet, 12 | ResNetBlockBase, 13 | build_backbone, 14 | build_resnet_backbone, 15 | make_stage, 16 | ) 17 | from .meta_arch import ( 18 | META_ARCH_REGISTRY, 19 | GeneralizedRCNN, 20 | ProposalNetwork, 21 | RetinaNet, 22 | build_model, 23 | ) 24 | from .postprocessing import detector_postprocess 25 | from .proposal_generator import ( 26 | PROPOSAL_GENERATOR_REGISTRY, 27 | build_proposal_generator, 28 | RPN_HEAD_REGISTRY, 29 | build_rpn_head, 30 | ) 31 | from .roi_heads import ( 32 | ROI_BOX_HEAD_REGISTRY, 33 | ROI_HEADS_REGISTRY, 34 | ROIHeads, 35 | StandardROIHeads, 36 | build_box_head, 37 | build_roi_heads, 38 | ) 39 | from .utils import ( 40 | concat_all_gathered, 41 | ) 42 | from .contrastive_loss import ( 43 | ContrastiveHead, 44 | SupConLoss, 45 | ) 46 | 47 | _EXCLUDE = {"torch", "ShapeSpec"} 48 | __all__ = [k for k in globals().keys() if k not in _EXCLUDE and not k.startswith("_")] 49 | 50 | assert ( 51 | torch.Tensor([1]) == torch.Tensor([2]) 52 | ).dtype == torch.bool, "Your Pytorch is too old. Please update to contain https://github.com/pytorch/pytorch/pull/21113" 53 | -------------------------------------------------------------------------------- /configs/RFS/split1/2shot_GPB_PFB_proloss.yml: -------------------------------------------------------------------------------- 1 | _BASE_: "../../Base-RCNN-FPN.yaml" 2 | 3 | MODEL: 4 | WEIGHTS: "/media/datasets/gpu17_models/Ours/checkpoints/rfs/faster_rcnn/faster_rcnn_R_101_FPN_base1/model_reset_surgery.pth" 5 | MASK_ON: False 6 | RESNETS: 7 | DEPTH: 101 8 | 9 | BACKBONE: 10 | FREEZE: False 11 | FREEZE_AT: 5 12 | 13 | PROPOSAL_GENERATOR: 14 | FREEZE: False 15 | RPN: 16 | POST_NMS_TOPK_TRAIN: 2000 17 | 18 | ROI_HEADS: 19 | NAME: "NovelROIHeads" 20 | NUM_CLASSES: 20 21 | OUTPUT_LAYER: "CosineSimOutputLayers" 22 | FREEZE_FEAT: False 23 | BATCH_SIZE_PER_IMAGE: 256 24 | IOU_THRESHOLDS: [0.4] 25 | NOVEL_MODULE: 26 | COPY_OPTION: True 27 | IOU_THRESH: 0.7 28 | PROTOTYPES_FUSE_ALPHA: 0.2 29 | PROTOTYPES_FEATURE_FUSE_ALPHA: 0.2 30 | PROLOSS_FACTOR: 0.3 31 | FUSE_ALPHA: 0.1 32 | 33 | INPUT: 34 | CROP: 35 | ENABLED: TRUE 36 | MIN_SIZE_TRAIN: (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800) 37 | MIN_SIZE_TEST: 800 38 | 39 | DATASETS: 40 | SHOTS: 2 41 | TRAIN: ('rfs_trainval_all1_2shot_seed0',) 42 | TEST: ('rfs_test_all1',) 43 | 44 | SOLVER: 45 | IMS_PER_BATCH: 6 46 | BASE_LR: 0.001 47 | GAMMA: 0.5 48 | STEPS: (8000,12000) 49 | MAX_ITER: 16000 50 | CHECKPOINT_PERIOD: 1000 51 | WARMUP_ITERS: 200 52 | 53 | TEST: 54 | AUG: 55 | ENABLED: True 56 | EVAL_PERIOD: 1000 57 | 58 | OUTPUT_DIR: "/media/datasets/gpu17_models/Ours/checkpoints/rfs/faster_rcnn/split1_2shot_GPB_PFB_proloss_new" -------------------------------------------------------------------------------- /configs/RFS/split2/2shot_GPB_PFB_proloss.yml: -------------------------------------------------------------------------------- 1 | _BASE_: "../../Base-RCNN-FPN.yaml" 2 | 3 | MODEL: 4 | WEIGHTS: "/media/datasets/gpu17_models/Ours/checkpoints/rfs/faster_rcnn/faster_rcnn_R_101_FPN_base2/model_reset_surgery.pth" 5 | MASK_ON: False 6 | RESNETS: 7 | DEPTH: 101 8 | 9 | BACKBONE: 10 | FREEZE: False 11 | FREEZE_AT: 5 12 | 13 | PROPOSAL_GENERATOR: 14 | FREEZE: False 15 | RPN: 16 | POST_NMS_TOPK_TRAIN: 2000 17 | 18 | ROI_HEADS: 19 | NAME: "NovelROIHeads" 20 | NUM_CLASSES: 20 21 | OUTPUT_LAYER: "CosineSimOutputLayers" 22 | FREEZE_FEAT: False 23 | BATCH_SIZE_PER_IMAGE: 256 24 | IOU_THRESHOLDS: [0.4] 25 | NOVEL_MODULE: 26 | COPY_OPTION: True 27 | IOU_THRESH: 0.7 28 | PROTOTYPES_FUSE_ALPHA: 0.2 29 | PROTOTYPES_FEATURE_FUSE_ALPHA: 0.2 30 | PROLOSS_FACTOR: 0.3 31 | FUSE_ALPHA: 0.1 32 | 33 | INPUT: 34 | CROP: 35 | ENABLED: TRUE 36 | MIN_SIZE_TRAIN: (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800) 37 | MIN_SIZE_TEST: 800 38 | 39 | DATASETS: 40 | SHOTS: 2 41 | TRAIN: ('rfs_trainval_all2_2shot_seed0',) 42 | TEST: ('rfs_test_all2',) 43 | 44 | SOLVER: 45 | IMS_PER_BATCH: 6 46 | BASE_LR: 0.001 47 | GAMMA: 0.5 48 | STEPS: (8000,12000) 49 | MAX_ITER: 16000 50 | CHECKPOINT_PERIOD: 1000 51 | WARMUP_ITERS: 200 52 | 53 | TEST: 54 | AUG: 55 | ENABLED: True 56 | EVAL_PERIOD: 1000 57 | 58 | OUTPUT_DIR: "/media/datasets/gpu17_models/Ours/checkpoints/rfs/faster_rcnn/split2_2shot_GPB_PFB_proloss_new" -------------------------------------------------------------------------------- /configs/RFS/split3/2shot_GPB_PFB_proloss.yml: -------------------------------------------------------------------------------- 1 | _BASE_: "../../Base-RCNN-FPN.yaml" 2 | 3 | MODEL: 4 | WEIGHTS: "/media/datasets/gpu17_models/Ours/checkpoints/rfs/faster_rcnn/faster_rcnn_R_101_FPN_base3/model_reset_surgery.pth" 5 | MASK_ON: False 6 | RESNETS: 7 | DEPTH: 101 8 | 9 | BACKBONE: 10 | FREEZE: False 11 | FREEZE_AT: 5 12 | 13 | PROPOSAL_GENERATOR: 14 | FREEZE: False 15 | RPN: 16 | POST_NMS_TOPK_TRAIN: 2000 17 | 18 | ROI_HEADS: 19 | NAME: "NovelROIHeads" 20 | NUM_CLASSES: 20 21 | OUTPUT_LAYER: "CosineSimOutputLayers" 22 | FREEZE_FEAT: False 23 | BATCH_SIZE_PER_IMAGE: 256 24 | IOU_THRESHOLDS: [0.4] 25 | NOVEL_MODULE: 26 | COPY_OPTION: True 27 | IOU_THRESH: 0.7 28 | PROTOTYPES_FUSE_ALPHA: 0.2 29 | PROTOTYPES_FEATURE_FUSE_ALPHA: 0.2 30 | PROLOSS_FACTOR: 0.3 31 | FUSE_ALPHA: 0.1 32 | 33 | INPUT: 34 | CROP: 35 | ENABLED: TRUE 36 | MIN_SIZE_TRAIN: (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800) 37 | MIN_SIZE_TEST: 800 38 | 39 | DATASETS: 40 | SHOTS: 2 41 | TRAIN: ('rfs_trainval_all3_2shot_seed0',) 42 | TEST: ('rfs_test_all3',) 43 | 44 | SOLVER: 45 | IMS_PER_BATCH: 6 46 | BASE_LR: 0.001 47 | GAMMA: 0.5 48 | STEPS: (8000,12000) 49 | MAX_ITER: 16000 50 | CHECKPOINT_PERIOD: 1000 51 | WARMUP_ITERS: 200 52 | 53 | TEST: 54 | AUG: 55 | ENABLED: True 56 | EVAL_PERIOD: 1000 57 | 58 | OUTPUT_DIR: "/media/datasets/gpu17_models/Ours/checkpoints/rfs/faster_rcnn/split3_2shot_GPB_PFB_proloss_new" -------------------------------------------------------------------------------- /configs/RFS/split3/3shot_GPB_PFB_proloss.yml: -------------------------------------------------------------------------------- 1 | _BASE_: "../../Base-RCNN-FPN.yaml" 2 | 3 | MODEL: 4 | WEIGHTS: "/media/datasets/gpu17_models/Ours/checkpoints/rfs/faster_rcnn/faster_rcnn_R_101_FPN_base3/model_reset_surgery.pth" 5 | MASK_ON: False 6 | RESNETS: 7 | DEPTH: 101 8 | 9 | BACKBONE: 10 | FREEZE: False 11 | FREEZE_AT: 5 12 | 13 | PROPOSAL_GENERATOR: 14 | FREEZE: False 15 | RPN: 16 | POST_NMS_TOPK_TRAIN: 2000 17 | 18 | ROI_HEADS: 19 | NAME: "NovelROIHeads" 20 | NUM_CLASSES: 20 21 | OUTPUT_LAYER: "CosineSimOutputLayers" 22 | FREEZE_FEAT: False 23 | BATCH_SIZE_PER_IMAGE: 256 24 | IOU_THRESHOLDS: [0.4] 25 | NOVEL_MODULE: 26 | COPY_OPTION: True 27 | IOU_THRESH: 0.7 28 | PROTOTYPES_FUSE_ALPHA: 0.2 29 | PROTOTYPES_FEATURE_FUSE_ALPHA: 0.2 30 | PROLOSS_FACTOR: 0.3 31 | FUSE_ALPHA: 0.1 32 | 33 | INPUT: 34 | CROP: 35 | ENABLED: TRUE 36 | MIN_SIZE_TRAIN: (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800) 37 | MIN_SIZE_TEST: 800 38 | 39 | DATASETS: 40 | SHOTS: 3 41 | TRAIN: ('rfs_trainval_all3_3shot_seed0',) 42 | TEST: ('rfs_test_all3',) 43 | 44 | SOLVER: 45 | IMS_PER_BATCH: 6 46 | BASE_LR: 0.001 47 | GAMMA: 0.5 48 | STEPS: (10000,14000) 49 | MAX_ITER: 16000 50 | CHECKPOINT_PERIOD: 1000 51 | WARMUP_ITERS: 200 52 | 53 | TEST: 54 | AUG: 55 | ENABLED: True 56 | EVAL_PERIOD: 1000 57 | 58 | OUTPUT_DIR: "/media/datasets/gpu17_models/Ours/checkpoints/rfs/faster_rcnn/split3_3shot_GPB_PFB_proloss_new" -------------------------------------------------------------------------------- /configs/RFS/split3/5shot_GPB_PFB_proloss.yml: -------------------------------------------------------------------------------- 1 | _BASE_: "../../Base-RCNN-FPN.yaml" 2 | 3 | MODEL: 4 | WEIGHTS: "/media/datasets/gpu17_models/Ours/checkpoints/rfs/faster_rcnn/faster_rcnn_R_101_FPN_base3/model_reset_surgery.pth" 5 | MASK_ON: False 6 | RESNETS: 7 | DEPTH: 101 8 | 9 | BACKBONE: 10 | FREEZE: False 11 | FREEZE_AT: 5 12 | 13 | PROPOSAL_GENERATOR: 14 | FREEZE: False 15 | RPN: 16 | POST_NMS_TOPK_TRAIN: 2000 17 | 18 | ROI_HEADS: 19 | NAME: "NovelROIHeads" 20 | NUM_CLASSES: 20 21 | OUTPUT_LAYER: "CosineSimOutputLayers" 22 | FREEZE_FEAT: False 23 | BATCH_SIZE_PER_IMAGE: 256 24 | IOU_THRESHOLDS: [0.4] 25 | NOVEL_MODULE: 26 | COPY_OPTION: True 27 | IOU_THRESH: 0.7 28 | PROTOTYPES_FUSE_ALPHA: 0.2 29 | PROTOTYPES_FEATURE_FUSE_ALPHA: 0.2 30 | PROLOSS_FACTOR: 0.3 31 | FUSE_ALPHA: 0.1 32 | 33 | INPUT: 34 | CROP: 35 | ENABLED: TRUE 36 | MIN_SIZE_TRAIN: (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800) 37 | MIN_SIZE_TEST: 800 38 | 39 | DATASETS: 40 | SHOTS: 5 41 | TRAIN: ('rfs_trainval_all3_5shot_seed0',) 42 | TEST: ('rfs_test_all3',) 43 | 44 | SOLVER: 45 | IMS_PER_BATCH: 6 46 | BASE_LR: 0.001 47 | GAMMA: 0.5 48 | STEPS: (10000,14000) 49 | MAX_ITER: 16000 50 | CHECKPOINT_PERIOD: 1000 51 | WARMUP_ITERS: 200 52 | 53 | TEST: 54 | AUG: 55 | ENABLED: True 56 | EVAL_PERIOD: 1000 57 | 58 | OUTPUT_DIR: "/media/datasets/gpu17_models/Ours/checkpoints/rfs/faster_rcnn/split3_5shot_GPB_PFB_proloss_new" -------------------------------------------------------------------------------- /configs/RFS/split1/3shot_GPB_PFB_proloss.yml: -------------------------------------------------------------------------------- 1 | _BASE_: "../../Base-RCNN-FPN.yaml" 2 | 3 | MODEL: 4 | WEIGHTS: "/media/datasets/gpu17_models/Ours/checkpoints/rfs/faster_rcnn/faster_rcnn_R_101_FPN_base1/model_reset_surgery.pth" 5 | MASK_ON: False 6 | RESNETS: 7 | DEPTH: 101 8 | 9 | BACKBONE: 10 | FREEZE: False 11 | FREEZE_AT: 5 12 | 13 | PROPOSAL_GENERATOR: 14 | FREEZE: False 15 | RPN: 16 | POST_NMS_TOPK_TRAIN: 2000 17 | 18 | ROI_HEADS: 19 | NAME: "NovelROIHeads" 20 | NUM_CLASSES: 20 21 | OUTPUT_LAYER: "CosineSimOutputLayers" 22 | FREEZE_FEAT: False 23 | BATCH_SIZE_PER_IMAGE: 256 24 | IOU_THRESHOLDS: [0.4] 25 | NOVEL_MODULE: 26 | COPY_OPTION: True 27 | IOU_THRESH: 0.7 28 | PROTOTYPES_FUSE_ALPHA: 0.2 29 | PROTOTYPES_FEATURE_FUSE_ALPHA: 0.2 30 | PROLOSS_FACTOR: 0.3 31 | FUSE_ALPHA: 0.1 32 | 33 | INPUT: 34 | CROP: 35 | ENABLED: TRUE 36 | MIN_SIZE_TRAIN: (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800) 37 | MIN_SIZE_TEST: 800 38 | 39 | DATASETS: 40 | SHOTS: 3 41 | TRAIN: ('rfs_trainval_all1_3shot_seed0',) 42 | TEST: ('rfs_test_all1',) 43 | 44 | SOLVER: 45 | IMS_PER_BATCH: 6 46 | BASE_LR: 0.001 47 | GAMMA: 0.5 48 | STEPS: (10000,14000) 49 | MAX_ITER: 16000 50 | CHECKPOINT_PERIOD: 1000 51 | WARMUP_ITERS: 200 52 | 53 | TEST: 54 | AUG: 55 | ENABLED: True 56 | EVAL_PERIOD: 1000 57 | 58 | OUTPUT_DIR: "/media/datasets/gpu17_models/Ours/checkpoints/rfs/faster_rcnn/split1_3shot_GPB_PFB_proloss_ablation" -------------------------------------------------------------------------------- /configs/RFS/split1/5shot_GPB_PFB_proloss.yml: -------------------------------------------------------------------------------- 1 | _BASE_: "../../Base-RCNN-FPN.yaml" 2 | 3 | MODEL: 4 | WEIGHTS: "/media/datasets/gpu17_models/Ours/checkpoints/rfs/faster_rcnn/faster_rcnn_R_101_FPN_base1/model_reset_surgery.pth" 5 | MASK_ON: False 6 | RESNETS: 7 | DEPTH: 101 8 | 9 | BACKBONE: 10 | FREEZE: False 11 | FREEZE_AT: 5 12 | 13 | PROPOSAL_GENERATOR: 14 | FREEZE: False 15 | RPN: 16 | POST_NMS_TOPK_TRAIN: 2000 17 | 18 | ROI_HEADS: 19 | NAME: "NovelROIHeads" 20 | NUM_CLASSES: 20 21 | OUTPUT_LAYER: "CosineSimOutputLayers" 22 | FREEZE_FEAT: False 23 | BATCH_SIZE_PER_IMAGE: 256 24 | IOU_THRESHOLDS: [0.4] 25 | NOVEL_MODULE: 26 | COPY_OPTION: True 27 | IOU_THRESH: 0.7 28 | PROTOTYPES_FUSE_ALPHA: 0.2 29 | PROTOTYPES_FEATURE_FUSE_ALPHA: 0.2 30 | PROLOSS_FACTOR: 0.3 31 | FUSE_ALPHA: 0.1 32 | 33 | INPUT: 34 | CROP: 35 | ENABLED: TRUE 36 | MIN_SIZE_TRAIN: (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800) 37 | MIN_SIZE_TEST: 800 38 | 39 | DATASETS: 40 | SHOTS: 5 41 | TRAIN: ('rfs_trainval_all1_5shot_seed0',) 42 | TEST: ('rfs_test_all1',) 43 | 44 | SOLVER: 45 | IMS_PER_BATCH: 6 46 | BASE_LR: 0.001 47 | GAMMA: 0.5 48 | STEPS: (10000,14000) 49 | MAX_ITER: 16000 50 | CHECKPOINT_PERIOD: 1000 51 | WARMUP_ITERS: 200 52 | 53 | TEST: 54 | AUG: 55 | ENABLED: True 56 | EVAL_PERIOD: 1000 57 | 58 | OUTPUT_DIR: "/media/datasets/gpu17_models/Ours/checkpoints/rfs/faster_rcnn/split1_5shot_GPB_PFB_proloss_ablation" -------------------------------------------------------------------------------- /configs/RFS/split2/1shot_GPB_PFB_proloss.yml: -------------------------------------------------------------------------------- 1 | _BASE_: "../../Base-RCNN-FPN.yaml" 2 | 3 | MODEL: 4 | WEIGHTS: "/media/datasets/gpu17_models/Ours/checkpoints/rfs/faster_rcnn/faster_rcnn_R_101_FPN_base2/model_reset_surgery.pth" 5 | MASK_ON: False 6 | RESNETS: 7 | DEPTH: 101 8 | 9 | BACKBONE: 10 | FREEZE: False 11 | FREEZE_AT: 5 12 | 13 | PROPOSAL_GENERATOR: 14 | FREEZE: False 15 | RPN: 16 | POST_NMS_TOPK_TRAIN: 2000 17 | 18 | ROI_HEADS: 19 | NAME: "NovelROIHeads" 20 | NUM_CLASSES: 20 21 | OUTPUT_LAYER: "CosineSimOutputLayers" 22 | FREEZE_FEAT: False 23 | BATCH_SIZE_PER_IMAGE: 256 24 | IOU_THRESHOLDS: [0.4] 25 | NOVEL_MODULE: 26 | COPY_OPTION: True 27 | IOU_THRESH: 0.7 28 | PROTOTYPES_FUSE_ALPHA: 0.2 29 | PROTOTYPES_FEATURE_FUSE_ALPHA: 0.2 30 | PROLOSS_FACTOR: 0.3 31 | FUSE_ALPHA: 0.1 32 | 33 | INPUT: 34 | CROP: 35 | ENABLED: TRUE 36 | MIN_SIZE_TRAIN: (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800) 37 | MIN_SIZE_TEST: 800 38 | 39 | DATASETS: 40 | SHOTS: 1 41 | TRAIN: ('rfs_trainval_all2_1shot_seed0',) 42 | TEST: ('rfs_test_all2',) 43 | 44 | SOLVER: 45 | IMS_PER_BATCH: 6 46 | BASE_LR: 0.001 47 | GAMMA: 0.5 48 | STEPS: (4000,6000) 49 | MAX_ITER: 10000 50 | CHECKPOINT_PERIOD: 1000 51 | WARMUP_ITERS: 200 52 | 53 | 54 | TEST: 55 | AUG: 56 | ENABLED: True 57 | EVAL_PERIOD: 1000 58 | 59 | OUTPUT_DIR: "/media/datasets/gpu17_models/Ours/checkpoints/rfs/faster_rcnn/split2_1shot_GPB_PFB_proloss_new" -------------------------------------------------------------------------------- /configs/RFS/split2/3shot_GPB_PFB_proloss.yml: -------------------------------------------------------------------------------- 1 | _BASE_: "../../Base-RCNN-FPN.yaml" 2 | 3 | MODEL: 4 | WEIGHTS: "/media/datasets/gpu17_models/Ours/checkpoints/rfs/faster_rcnn/faster_rcnn_R_101_FPN_base2/model_reset_surgery.pth" 5 | MASK_ON: False 6 | RESNETS: 7 | DEPTH: 101 8 | 9 | BACKBONE: 10 | FREEZE: False 11 | FREEZE_AT: 5 12 | 13 | PROPOSAL_GENERATOR: 14 | FREEZE: False 15 | RPN: 16 | POST_NMS_TOPK_TRAIN: 2000 17 | 18 | ROI_HEADS: 19 | NAME: "NovelROIHeads" 20 | NUM_CLASSES: 20 21 | OUTPUT_LAYER: "CosineSimOutputLayers" 22 | FREEZE_FEAT: False 23 | BATCH_SIZE_PER_IMAGE: 256 24 | IOU_THRESHOLDS: [0.4] 25 | NOVEL_MODULE: 26 | COPY_OPTION: True 27 | IOU_THRESH: 0.7 28 | PROTOTYPES_FUSE_ALPHA: 0.2 29 | PROTOTYPES_FEATURE_FUSE_ALPHA: 0.2 30 | PROLOSS_FACTOR: 0.3 31 | FUSE_ALPHA: 0.1 32 | 33 | INPUT: 34 | CROP: 35 | ENABLED: TRUE 36 | MIN_SIZE_TRAIN: (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800) 37 | MIN_SIZE_TEST: 800 38 | 39 | DATASETS: 40 | SHOTS: 3 41 | TRAIN: ('rfs_trainval_all2_3shot_seed0',) 42 | TEST: ('rfs_test_all2',) 43 | 44 | SOLVER: 45 | IMS_PER_BATCH: 6 46 | BASE_LR: 0.001 47 | GAMMA: 0.5 48 | STEPS: (10000,14000) 49 | MAX_ITER: 16000 50 | CHECKPOINT_PERIOD: 500 51 | WARMUP_ITERS: 200 52 | 53 | TEST: 54 | AUG: 55 | ENABLED: True 56 | EVAL_PERIOD: 500 57 | 58 | OUTPUT_DIR: "/media/datasets/gpu17_models/Ours/checkpoints/rfs/faster_rcnn/split2_3shot_GPB_PFB_proloss_new" 59 | -------------------------------------------------------------------------------- /configs/RFS/split3/10shot_GPB_PFB_proloss.yml: -------------------------------------------------------------------------------- 1 | _BASE_: "../../Base-RCNN-FPN.yaml" 2 | 3 | MODEL: 4 | WEIGHTS: "/media/datasets/gpu17_models/Ours/checkpoints/rfs/faster_rcnn/faster_rcnn_R_101_FPN_base3/model_reset_surgery.pth" 5 | MASK_ON: False 6 | RESNETS: 7 | DEPTH: 101 8 | 9 | BACKBONE: 10 | FREEZE: False 11 | FREEZE_AT: 5 12 | 13 | PROPOSAL_GENERATOR: 14 | FREEZE: False 15 | RPN: 16 | POST_NMS_TOPK_TRAIN: 2000 17 | 18 | ROI_HEADS: 19 | NAME: "NovelROIHeads" 20 | NUM_CLASSES: 20 21 | OUTPUT_LAYER: "CosineSimOutputLayers" 22 | FREEZE_FEAT: False 23 | BATCH_SIZE_PER_IMAGE: 256 24 | IOU_THRESHOLDS: [0.4] 25 | NOVEL_MODULE: 26 | COPY_OPTION: True 27 | IOU_THRESH: 0.7 28 | PROTOTYPES_FUSE_ALPHA: 0.2 29 | PROTOTYPES_FEATURE_FUSE_ALPHA: 0.2 30 | PROLOSS_FACTOR: 0.3 31 | FUSE_ALPHA: 0.1 32 | 33 | INPUT: 34 | CROP: 35 | ENABLED: TRUE 36 | MIN_SIZE_TRAIN: (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800) 37 | MIN_SIZE_TEST: 800 38 | 39 | DATASETS: 40 | SHOTS: 10 41 | TRAIN: ('rfs_trainval_all3_10shot_seed0',) 42 | TEST: ('rfs_test_all3',) 43 | 44 | SOLVER: 45 | IMS_PER_BATCH: 6 46 | BASE_LR: 0.001 47 | GAMMA: 0.5 48 | STEPS: (12000,16000) 49 | MAX_ITER: 20000 50 | CHECKPOINT_PERIOD: 1000 51 | WARMUP_ITERS: 200 52 | 53 | TEST: 54 | AUG: 55 | ENABLED: True 56 | EVAL_PERIOD: 1000 57 | 58 | OUTPUT_DIR: "/media/datasets/gpu17_models/Ours/checkpoints/rfs/faster_rcnn/split3_10shot_GPB_PFB_proloss_new" -------------------------------------------------------------------------------- /configs/RFS/split3/1shot_GPB_PFB_proloss.yml: -------------------------------------------------------------------------------- 1 | _BASE_: "../../Base-RCNN-FPN.yaml" 2 | 3 | MODEL: 4 | WEIGHTS: "/media/datasets/gpu17_models/Ours/checkpoints/rfs/faster_rcnn/faster_rcnn_R_101_FPN_base3/model_reset_surgery.pth" 5 | MASK_ON: False 6 | RESNETS: 7 | DEPTH: 101 8 | 9 | BACKBONE: 10 | FREEZE: False 11 | FREEZE_AT: 5 12 | 13 | PROPOSAL_GENERATOR: 14 | FREEZE: False 15 | RPN: 16 | POST_NMS_TOPK_TRAIN: 2000 17 | 18 | ROI_HEADS: 19 | NAME: "NovelROIHeads" 20 | NUM_CLASSES: 20 21 | OUTPUT_LAYER: "CosineSimOutputLayers" 22 | FREEZE_FEAT: False 23 | BATCH_SIZE_PER_IMAGE: 256 24 | IOU_THRESHOLDS: [0.4] 25 | NOVEL_MODULE: 26 | COPY_OPTION: True 27 | IOU_THRESH: 0.7 28 | PROTOTYPES_FUSE_ALPHA: 0.2 29 | PROTOTYPES_FEATURE_FUSE_ALPHA: 0.2 30 | PROLOSS_FACTOR: 0.3 31 | FUSE_ALPHA: 0.1 32 | 33 | INPUT: 34 | CROP: 35 | ENABLED: TRUE 36 | MIN_SIZE_TRAIN: (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800) 37 | MIN_SIZE_TEST: 800 38 | 39 | DATASETS: 40 | SHOTS: 1 41 | TRAIN: ('rfs_trainval_all3_1shot_seed0',) 42 | TEST: ('rfs_test_all3',) 43 | 44 | SOLVER: 45 | IMS_PER_BATCH: 6 46 | BASE_LR: 0.001 47 | GAMMA: 0.5 48 | STEPS: (4000,6000) 49 | MAX_ITER: 10000 50 | CHECKPOINT_PERIOD: 1000 51 | WARMUP_ITERS: 200 52 | 53 | 54 | TEST: 55 | AUG: 56 | ENABLED: True 57 | EVAL_PERIOD: 1000 58 | 59 | OUTPUT_DIR: "/media/datasets/gpu17_models/Ours/checkpoints/rfs/faster_rcnn/split3_1shot_GPB_PFB_proloss_new" -------------------------------------------------------------------------------- /configs/RFS/split1/10shot_GPB_PFB_proloss.yml: -------------------------------------------------------------------------------- 1 | _BASE_: "../../Base-RCNN-FPN.yaml" 2 | 3 | MODEL: 4 | WEIGHTS: "/media/datasets/gpu17_models/Ours/checkpoints/rfs/faster_rcnn/faster_rcnn_R_101_FPN_base1/model_reset_surgery.pth" 5 | MASK_ON: False 6 | RESNETS: 7 | DEPTH: 101 8 | 9 | BACKBONE: 10 | FREEZE: False 11 | FREEZE_AT: 5 12 | 13 | PROPOSAL_GENERATOR: 14 | FREEZE: False 15 | RPN: 16 | POST_NMS_TOPK_TRAIN: 2000 17 | 18 | ROI_HEADS: 19 | NAME: "NovelROIHeads" 20 | NUM_CLASSES: 20 21 | OUTPUT_LAYER: "CosineSimOutputLayers" 22 | FREEZE_FEAT: False 23 | BATCH_SIZE_PER_IMAGE: 256 24 | IOU_THRESHOLDS: [0.4] 25 | NOVEL_MODULE: 26 | COPY_OPTION: True 27 | IOU_THRESH: 0.7 28 | PROTOTYPES_FUSE_ALPHA: 0.2 29 | PROTOTYPES_FEATURE_FUSE_ALPHA: 0.2 30 | PROLOSS_FACTOR: 0.0 31 | FUSE_ALPHA: 0.1 32 | 33 | INPUT: 34 | CROP: 35 | ENABLED: TRUE 36 | MIN_SIZE_TRAIN: (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800) 37 | MIN_SIZE_TEST: 800 38 | 39 | DATASETS: 40 | SHOTS: 10 41 | TRAIN: ('rfs_trainval_all1_10shot_seed0',) 42 | TEST: ('rfs_test_all1',) 43 | 44 | SOLVER: 45 | IMS_PER_BATCH: 6 46 | BASE_LR: 0.001 47 | GAMMA: 0.5 48 | STEPS: (12000,16000) 49 | MAX_ITER: 20000 50 | CHECKPOINT_PERIOD: 500 51 | WARMUP_ITERS: 200 52 | 53 | TEST: 54 | AUG: 55 | ENABLED: True 56 | EVAL_PERIOD: 500 57 | 58 | OUTPUT_DIR: "/media/datasets/gpu17_models/Ours/checkpoints/rfs/faster_rcnn/split1_10shot_GPB_PFB_proloss_ablation" -------------------------------------------------------------------------------- /configs/RFS/split1/1shot_GPB_PFB_proloss.yml: -------------------------------------------------------------------------------- 1 | _BASE_: "../../Base-RCNN-FPN.yaml" 2 | 3 | MODEL: 4 | WEIGHTS: "/media/datasets/gpu17_models/Ours/checkpoints/rfs/faster_rcnn/faster_rcnn_R_101_FPN_base1/model_reset_surgery.pth" 5 | MASK_ON: False 6 | RESNETS: 7 | DEPTH: 101 8 | 9 | BACKBONE: 10 | FREEZE: False 11 | FREEZE_AT: 5 12 | 13 | PROPOSAL_GENERATOR: 14 | FREEZE: False 15 | RPN: 16 | POST_NMS_TOPK_TRAIN: 2000 17 | 18 | ROI_HEADS: 19 | NAME: "NovelROIHeads" 20 | NUM_CLASSES: 20 21 | OUTPUT_LAYER: "CosineSimOutputLayers" 22 | FREEZE_FEAT: False 23 | BATCH_SIZE_PER_IMAGE: 256 24 | IOU_THRESHOLDS: [0.4] 25 | NOVEL_MODULE: 26 | COPY_OPTION: True 27 | IOU_THRESH: 0.7 28 | PROTOTYPES_FUSE_ALPHA: 0.2 29 | PROTOTYPES_FEATURE_FUSE_ALPHA: 0.2 30 | PROLOSS_FACTOR: 0.3 31 | FUSE_ALPHA: 0.1 32 | 33 | INPUT: 34 | CROP: 35 | ENABLED: TRUE 36 | MIN_SIZE_TRAIN: (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800) 37 | MIN_SIZE_TEST: 800 38 | 39 | DATASETS: 40 | SHOTS: 1 41 | TRAIN: ('rfs_trainval_all1_1shot_seed0',) 42 | TEST: ('rfs_test_all1',) 43 | 44 | SOLVER: 45 | IMS_PER_BATCH: 6 46 | BASE_LR: 0.001 47 | GAMMA: 0.5 48 | STEPS: (4000,6000) 49 | MAX_ITER: 10000 50 | CHECKPOINT_PERIOD: 1000 51 | WARMUP_ITERS: 200 52 | 53 | 54 | TEST: 55 | AUG: 56 | ENABLED: True 57 | EVAL_PERIOD: 1000 58 | 59 | OUTPUT_DIR: "/media/datasets/gpu17_models/Ours/checkpoints/rfs/faster_rcnn/split1_1shot_GPB_PFB_proloss_ablation" -------------------------------------------------------------------------------- /configs/RFS/split2/10shot_GPB_PFB_proloss.yml: -------------------------------------------------------------------------------- 1 | _BASE_: "../../Base-RCNN-FPN.yaml" 2 | 3 | MODEL: 4 | WEIGHTS: "/media/datasets/gpu17_models/Ours/checkpoints/rfs/faster_rcnn/faster_rcnn_R_101_FPN_base2/model_reset_surgery.pth" 5 | MASK_ON: False 6 | RESNETS: 7 | DEPTH: 101 8 | 9 | BACKBONE: 10 | FREEZE: False 11 | FREEZE_AT: 5 12 | 13 | PROPOSAL_GENERATOR: 14 | FREEZE: False 15 | RPN: 16 | POST_NMS_TOPK_TRAIN: 2000 17 | 18 | ROI_HEADS: 19 | NAME: "NovelROIHeads" 20 | NUM_CLASSES: 20 21 | OUTPUT_LAYER: "CosineSimOutputLayers" 22 | FREEZE_FEAT: False 23 | BATCH_SIZE_PER_IMAGE: 256 24 | IOU_THRESHOLDS: [0.4] 25 | NOVEL_MODULE: 26 | COPY_OPTION: True 27 | IOU_THRESH: 0.7 28 | PROTOTYPES_FUSE_ALPHA: 0.2 29 | PROTOTYPES_FEATURE_FUSE_ALPHA: 0.2 30 | PROLOSS_FACTOR: 0.3 31 | FUSE_ALPHA: 0.1 32 | 33 | INPUT: 34 | CROP: 35 | ENABLED: TRUE 36 | MIN_SIZE_TRAIN: (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800) 37 | MIN_SIZE_TEST: 800 38 | 39 | DATASETS: 40 | SHOTS: 10 41 | TRAIN: ('rfs_trainval_all2_10shot_seed0',) 42 | TEST: ('rfs_test_all2',) 43 | 44 | SOLVER: 45 | IMS_PER_BATCH: 6 46 | BASE_LR: 0.001 47 | GAMMA: 0.5 48 | STEPS: (12000,16000) 49 | MAX_ITER: 20000 50 | CHECKPOINT_PERIOD: 1000 51 | WARMUP_ITERS: 200 52 | 53 | TEST: 54 | AUG: 55 | ENABLED: True 56 | EVAL_PERIOD: 1000 57 | 58 | OUTPUT_DIR: "/media/datasets/gpu17_models/Ours/checkpoints/rfs/faster_rcnn/split2_10shot_GPB_PFB_proloss_new" 59 | -------------------------------------------------------------------------------- /configs/RFS/split2/5shot_GPB_PFB_proloss.yml: -------------------------------------------------------------------------------- 1 | _BASE_: "../../Base-RCNN-FPN.yaml" 2 | 3 | MODEL: 4 | WEIGHTS: "/media/datasets/gpu17_models/Ours/checkpoints/rfs/faster_rcnn/faster_rcnn_R_101_FPN_base2/model_reset_surgery.pth" 5 | MASK_ON: False 6 | RESNETS: 7 | DEPTH: 101 8 | 9 | BACKBONE: 10 | FREEZE: False 11 | FREEZE_AT: 5 12 | 13 | PROPOSAL_GENERATOR: 14 | FREEZE: False 15 | RPN: 16 | POST_NMS_TOPK_TRAIN: 2000 17 | 18 | ROI_HEADS: 19 | NAME: "NovelROIHeads" 20 | NUM_CLASSES: 20 21 | OUTPUT_LAYER: "CosineSimOutputLayers" 22 | FREEZE_FEAT: False 23 | BATCH_SIZE_PER_IMAGE: 256 24 | IOU_THRESHOLDS: [0.4] 25 | NOVEL_MODULE: 26 | COPY_OPTION: True 27 | IOU_THRESH: 0.7 28 | PROTOTYPES_FUSE_ALPHA: 0.2 29 | PROTOTYPES_FEATURE_FUSE_ALPHA: 0.2 30 | PROLOSS_FACTOR: 0.3 31 | FUSE_ALPHA: 0.08 32 | 33 | INPUT: 34 | CROP: 35 | ENABLED: TRUE 36 | MIN_SIZE_TRAIN: (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800) 37 | MIN_SIZE_TEST: 800 38 | 39 | DATASETS: 40 | SHOTS: 5 41 | TRAIN: ('rfs_trainval_all2_5shot_seed0',) 42 | TEST: ('rfs_test_all2',) 43 | 44 | SOLVER: 45 | IMS_PER_BATCH: 6 46 | BASE_LR: 0.001 47 | GAMMA: 0.5 48 | STEPS: (10000,14000) 49 | MAX_ITER: 16000 50 | CHECKPOINT_PERIOD: 1000 51 | WARMUP_ITERS: 200 52 | 53 | TEST: 54 | AUG: 55 | ENABLED: True 56 | EVAL_PERIOD: 1000 57 | 58 | OUTPUT_DIR: "/media/datasets/gpu17_models/Ours/checkpoints/rfs/faster_rcnn/split2_5shot_GPB_PFB_proloss_new" 59 | -------------------------------------------------------------------------------- /configs/PASCAL_VOC/split2/2shot_GPB_PFB_proloss.yml: -------------------------------------------------------------------------------- 1 | _BASE_: "../../Base-RCNN-FPN.yaml" 2 | 3 | MODEL: 4 | WEIGHTS: "/media/datasets/gpu17_models/Ours/checkpoints/voc/faster_rcnn/faster_rcnn_R_101_FPN_base2/model_reset_surgery.pth" 5 | MASK_ON: False 6 | RESNETS: 7 | DEPTH: 101 8 | 9 | BACKBONE: 10 | FREEZE: False 11 | FREEZE_AT: 5 12 | 13 | PROPOSAL_GENERATOR: 14 | FREEZE: False 15 | RPN: 16 | POST_NMS_TOPK_TRAIN: 2000 17 | 18 | ROI_HEADS: 19 | NAME: "NovelROIHeads" 20 | NUM_CLASSES: 20 21 | OUTPUT_LAYER: "CosineSimOutputLayers" 22 | FREEZE_FEAT: False 23 | BATCH_SIZE_PER_IMAGE: 256 24 | IOU_THRESHOLDS: [0.4] 25 | NOVEL_MODULE: 26 | COPY_OPTION: True 27 | IOU_THRESH: 0.7 28 | PROTOTYPES_FUSE_ALPHA: 0.2 29 | PROTOTYPES_FEATURE_FUSE_ALPHA: 0.2 30 | PROLOSS_FACTOR: 0.3 31 | FUSE_ALPHA: 0.1 32 | 33 | INPUT: 34 | CROP: 35 | ENABLED: TRUE 36 | MIN_SIZE_TRAIN: (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800) 37 | MIN_SIZE_TEST: 800 38 | 39 | DATASETS: 40 | SHOTS: 2 41 | TRAIN: ('voc_2007_trainval_all2_2shot_seed0',) 42 | TEST: ('voc_2007_test_all2',) 43 | 44 | SOLVER: 45 | IMS_PER_BATCH: 6 46 | BASE_LR: 0.001 47 | GAMMA: 0.5 48 | STEPS: (8000,12000) 49 | MAX_ITER: 16000 50 | CHECKPOINT_PERIOD: 1000 51 | WARMUP_ITERS: 200 52 | 53 | TEST: 54 | AUG: 55 | ENABLED: True 56 | EVAL_PERIOD: 1000 57 | 58 | OUTPUT_DIR: "/media/datasets/gpu17_models/Ours/checkpoints/voc/faster_rcnn/split2_2shot_GPB_PFB_proloss" -------------------------------------------------------------------------------- /configs/PASCAL_VOC/split2/5shot_GPB_PFB_proloss.yml: -------------------------------------------------------------------------------- 1 | _BASE_: "../../Base-RCNN-FPN.yaml" 2 | 3 | MODEL: 4 | WEIGHTS: "/media/datasets/gpu17_models/Ours/checkpoints/voc/faster_rcnn/faster_rcnn_R_101_FPN_base2/model_reset_surgery.pth" 5 | MASK_ON: False 6 | RESNETS: 7 | DEPTH: 101 8 | 9 | BACKBONE: 10 | FREEZE: False 11 | FREEZE_AT: 5 12 | 13 | PROPOSAL_GENERATOR: 14 | FREEZE: False 15 | RPN: 16 | POST_NMS_TOPK_TRAIN: 2000 17 | 18 | ROI_HEADS: 19 | NAME: "NovelROIHeads" 20 | NUM_CLASSES: 20 21 | OUTPUT_LAYER: "CosineSimOutputLayers" 22 | FREEZE_FEAT: False 23 | BATCH_SIZE_PER_IMAGE: 256 24 | IOU_THRESHOLDS: [0.4] 25 | NOVEL_MODULE: 26 | COPY_OPTION: True 27 | IOU_THRESH: 0.7 28 | PROTOTYPES_FUSE_ALPHA: 0.2 29 | PROTOTYPES_FEATURE_FUSE_ALPHA: 0.2 30 | PROLOSS_FACTOR: 0.3 31 | FUSE_ALPHA: 0.1 32 | 33 | INPUT: 34 | CROP: 35 | ENABLED: TRUE 36 | MIN_SIZE_TRAIN: (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800) 37 | MIN_SIZE_TEST: 800 38 | 39 | DATASETS: 40 | SHOTS: 5 41 | TRAIN: ('voc_2007_trainval_all2_5shot_seed0',) 42 | TEST: ('voc_2007_test_all2',) 43 | 44 | SOLVER: 45 | IMS_PER_BATCH: 6 46 | BASE_LR: 0.001 47 | GAMMA: 0.5 48 | STEPS: (10000,14000) 49 | MAX_ITER: 16000 50 | CHECKPOINT_PERIOD: 1000 51 | WARMUP_ITERS: 200 52 | 53 | TEST: 54 | AUG: 55 | ENABLED: True 56 | EVAL_PERIOD: 1000 57 | 58 | OUTPUT_DIR: "/media/datasets/gpu17_models/Ours/checkpoints/voc/faster_rcnn/split2_5shot_GPB_PFB_proloss" -------------------------------------------------------------------------------- /configs/PASCAL_VOC/split3/2shot_GPB_PFB_proloss.yml: -------------------------------------------------------------------------------- 1 | _BASE_: "../../Base-RCNN-FPN.yaml" 2 | 3 | MODEL: 4 | WEIGHTS: "/media/datasets/gpu17_models/Ours/checkpoints/voc/faster_rcnn/faster_rcnn_R_101_FPN_base3/model_reset_surgery.pth" 5 | MASK_ON: False 6 | RESNETS: 7 | DEPTH: 101 8 | 9 | BACKBONE: 10 | FREEZE: False 11 | FREEZE_AT: 5 12 | 13 | PROPOSAL_GENERATOR: 14 | FREEZE: False 15 | RPN: 16 | POST_NMS_TOPK_TRAIN: 2000 17 | 18 | ROI_HEADS: 19 | NAME: "NovelROIHeads" 20 | NUM_CLASSES: 20 21 | OUTPUT_LAYER: "CosineSimOutputLayers" 22 | FREEZE_FEAT: False 23 | BATCH_SIZE_PER_IMAGE: 256 24 | IOU_THRESHOLDS: [0.4] 25 | NOVEL_MODULE: 26 | COPY_OPTION: True 27 | IOU_THRESH: 0.7 28 | PROTOTYPES_FUSE_ALPHA: 0.2 29 | PROTOTYPES_FEATURE_FUSE_ALPHA: 0.2 30 | PROLOSS_FACTOR: 0.3 31 | FUSE_ALPHA: 0.1 32 | 33 | INPUT: 34 | CROP: 35 | ENABLED: TRUE 36 | MIN_SIZE_TRAIN: (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800) 37 | MIN_SIZE_TEST: 800 38 | 39 | DATASETS: 40 | SHOTS: 2 41 | TRAIN: ('voc_2007_trainval_all3_2shot_seed0',) 42 | TEST: ('voc_2007_test_all3',) 43 | 44 | SOLVER: 45 | IMS_PER_BATCH: 6 46 | BASE_LR: 0.001 47 | GAMMA: 0.5 48 | STEPS: (8000,12000) 49 | MAX_ITER: 16000 50 | CHECKPOINT_PERIOD: 1000 51 | WARMUP_ITERS: 200 52 | 53 | TEST: 54 | AUG: 55 | ENABLED: True 56 | EVAL_PERIOD: 1000 57 | 58 | OUTPUT_DIR: "/media/datasets/gpu17_models/Ours/checkpoints/voc/faster_rcnn/split3_2shot_GPB_PFB_proloss" -------------------------------------------------------------------------------- /configs/PASCAL_VOC/split3/3shot_GPB_PFB_proloss.yml: -------------------------------------------------------------------------------- 1 | _BASE_: "../../Base-RCNN-FPN.yaml" 2 | 3 | MODEL: 4 | WEIGHTS: "/media/datasets/gpu17_models/Ours/checkpoints/voc/faster_rcnn/faster_rcnn_R_101_FPN_base3/model_reset_surgery.pth" 5 | MASK_ON: False 6 | RESNETS: 7 | DEPTH: 101 8 | 9 | BACKBONE: 10 | FREEZE: False 11 | FREEZE_AT: 5 12 | 13 | PROPOSAL_GENERATOR: 14 | FREEZE: False 15 | RPN: 16 | POST_NMS_TOPK_TRAIN: 2000 17 | 18 | ROI_HEADS: 19 | NAME: "NovelROIHeads" 20 | NUM_CLASSES: 20 21 | OUTPUT_LAYER: "CosineSimOutputLayers" 22 | FREEZE_FEAT: False 23 | BATCH_SIZE_PER_IMAGE: 256 24 | IOU_THRESHOLDS: [0.4] 25 | NOVEL_MODULE: 26 | COPY_OPTION: True 27 | IOU_THRESH: 0.7 28 | PROTOTYPES_FUSE_ALPHA: 0.2 29 | PROTOTYPES_FEATURE_FUSE_ALPHA: 0.2 30 | PROLOSS_FACTOR: 0.3 31 | FUSE_ALPHA: 0.08 32 | 33 | INPUT: 34 | CROP: 35 | ENABLED: TRUE 36 | MIN_SIZE_TRAIN: (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800) 37 | MIN_SIZE_TEST: 800 38 | 39 | DATASETS: 40 | SHOTS: 3 41 | TRAIN: ('voc_2007_trainval_all3_3shot_seed0',) 42 | TEST: ('voc_2007_test_all3',) 43 | 44 | SOLVER: 45 | IMS_PER_BATCH: 6 46 | BASE_LR: 0.001 47 | GAMMA: 0.5 48 | STEPS: (10000,14000) 49 | MAX_ITER: 16000 50 | CHECKPOINT_PERIOD: 1000 51 | WARMUP_ITERS: 200 52 | 53 | TEST: 54 | AUG: 55 | ENABLED: True 56 | EVAL_PERIOD: 1000 57 | 58 | OUTPUT_DIR: "/media/datasets/gpu17_models/Ours/checkpoints/voc/faster_rcnn/split3_3shot_GPB_PFB_proloss" -------------------------------------------------------------------------------- /configs/PASCAL_VOC/split2/10shot_GPB_PFB_proloss.yml: -------------------------------------------------------------------------------- 1 | _BASE_: "../../Base-RCNN-FPN.yaml" 2 | 3 | MODEL: 4 | WEIGHTS: "/media/datasets/gpu17_models/Ours/checkpoints/voc/faster_rcnn/faster_rcnn_R_101_FPN_base2/model_reset_surgery.pth" 5 | MASK_ON: False 6 | RESNETS: 7 | DEPTH: 101 8 | 9 | BACKBONE: 10 | FREEZE: False 11 | FREEZE_AT: 5 12 | 13 | PROPOSAL_GENERATOR: 14 | FREEZE: False 15 | RPN: 16 | POST_NMS_TOPK_TRAIN: 2000 17 | 18 | ROI_HEADS: 19 | NAME: "NovelROIHeads" 20 | NUM_CLASSES: 20 21 | OUTPUT_LAYER: "CosineSimOutputLayers" 22 | FREEZE_FEAT: False 23 | BATCH_SIZE_PER_IMAGE: 256 24 | IOU_THRESHOLDS: [0.4] 25 | NOVEL_MODULE: 26 | COPY_OPTION: True 27 | IOU_THRESH: 0.7 28 | PROTOTYPES_FUSE_ALPHA: 0.2 29 | PROTOTYPES_FEATURE_FUSE_ALPHA: 0.2 30 | PROLOSS_FACTOR: 0.3 31 | FUSE_ALPHA: 0.1 32 | 33 | INPUT: 34 | CROP: 35 | ENABLED: TRUE 36 | MIN_SIZE_TRAIN: (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800) 37 | MIN_SIZE_TEST: 800 38 | 39 | DATASETS: 40 | SHOTS: 10 41 | TRAIN: ('voc_2007_trainval_all2_10shot_seed0',) 42 | TEST: ('voc_2007_test_all2',) 43 | 44 | SOLVER: 45 | IMS_PER_BATCH: 6 46 | BASE_LR: 0.001 47 | GAMMA: 0.5 48 | STEPS: (12000,16000) 49 | MAX_ITER: 20000 50 | CHECKPOINT_PERIOD: 500 51 | WARMUP_ITERS: 200 52 | 53 | TEST: 54 | AUG: 55 | ENABLED: True 56 | EVAL_PERIOD: 500 57 | 58 | OUTPUT_DIR: "/media/datasets/gpu17_models/Ours/checkpoints/voc/faster_rcnn/split2_10shot_GPB_PFB_proloss" 59 | -------------------------------------------------------------------------------- /configs/PASCAL_VOC/split2/1shot_GPB_PFB_proloss.yml: -------------------------------------------------------------------------------- 1 | _BASE_: "../../Base-RCNN-FPN.yaml" 2 | 3 | MODEL: 4 | WEIGHTS: "/media/datasets/gpu17_models/Ours/checkpoints/voc/faster_rcnn/faster_rcnn_R_101_FPN_base2/model_reset_surgery.pth" 5 | MASK_ON: False 6 | RESNETS: 7 | DEPTH: 101 8 | 9 | BACKBONE: 10 | FREEZE: False 11 | FREEZE_AT: 5 12 | 13 | PROPOSAL_GENERATOR: 14 | FREEZE: False 15 | RPN: 16 | POST_NMS_TOPK_TRAIN: 2000 17 | 18 | ROI_HEADS: 19 | NAME: "NovelROIHeads" 20 | NUM_CLASSES: 20 21 | OUTPUT_LAYER: "CosineSimOutputLayers" 22 | FREEZE_FEAT: False 23 | BATCH_SIZE_PER_IMAGE: 256 24 | IOU_THRESHOLDS: [0.4] 25 | NOVEL_MODULE: 26 | COPY_OPTION: True 27 | IOU_THRESH: 0.7 28 | PROTOTYPES_FUSE_ALPHA: 0.2 29 | PROTOTYPES_FEATURE_FUSE_ALPHA: 0.2 30 | PROLOSS_FACTOR: 0.3 31 | FUSE_ALPHA: 0.1 32 | 33 | INPUT: 34 | CROP: 35 | ENABLED: TRUE 36 | MIN_SIZE_TRAIN: (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800) 37 | MIN_SIZE_TEST: 800 38 | 39 | DATASETS: 40 | SHOTS: 1 41 | TRAIN: ('voc_2007_trainval_all2_1shot_seed0',) 42 | TEST: ('voc_2007_test_all2',) 43 | 44 | SOLVER: 45 | IMS_PER_BATCH: 6 46 | BASE_LR: 0.001 47 | GAMMA: 0.5 48 | STEPS: (4000,6000) 49 | MAX_ITER: 10000 50 | CHECKPOINT_PERIOD: 1000 51 | WARMUP_ITERS: 200 52 | 53 | 54 | TEST: 55 | AUG: 56 | ENABLED: True 57 | EVAL_PERIOD: 1000 58 | 59 | OUTPUT_DIR: "/media/datasets/gpu17_models/Ours/checkpoints/voc/faster_rcnn/split2_1shot_GPB_PFB_proloss" -------------------------------------------------------------------------------- /configs/PASCAL_VOC/split2/3shot_GPB_PFB_proloss.yml: -------------------------------------------------------------------------------- 1 | _BASE_: "../../Base-RCNN-FPN.yaml" 2 | 3 | MODEL: 4 | WEIGHTS: "/media/datasets/gpu17_models/Ours/checkpoints/voc/faster_rcnn/faster_rcnn_R_101_FPN_base2/model_reset_surgery.pth" 5 | MASK_ON: False 6 | RESNETS: 7 | DEPTH: 101 8 | 9 | BACKBONE: 10 | FREEZE: False 11 | FREEZE_AT: 5 12 | 13 | PROPOSAL_GENERATOR: 14 | FREEZE: False 15 | RPN: 16 | POST_NMS_TOPK_TRAIN: 2000 17 | 18 | ROI_HEADS: 19 | NAME: "NovelROIHeads" 20 | NUM_CLASSES: 20 21 | OUTPUT_LAYER: "CosineSimOutputLayers" 22 | FREEZE_FEAT: False 23 | BATCH_SIZE_PER_IMAGE: 256 24 | IOU_THRESHOLDS: [0.4] 25 | NOVEL_MODULE: 26 | COPY_OPTION: True 27 | IOU_THRESH: 0.7 28 | PROTOTYPES_FUSE_ALPHA: 0.2 29 | PROTOTYPES_FEATURE_FUSE_ALPHA: 0.2 30 | PROLOSS_FACTOR: 0.3 31 | FUSE_ALPHA: 0.1 32 | 33 | INPUT: 34 | CROP: 35 | ENABLED: TRUE 36 | MIN_SIZE_TRAIN: (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800) 37 | MIN_SIZE_TEST: 800 38 | 39 | DATASETS: 40 | SHOTS: 3 41 | TRAIN: ('voc_2007_trainval_all2_3shot_seed0',) 42 | TEST: ('voc_2007_test_all2',) 43 | 44 | SOLVER: 45 | IMS_PER_BATCH: 6 46 | BASE_LR: 0.001 47 | GAMMA: 0.5 48 | STEPS: (10000,14000) 49 | MAX_ITER: 16000 50 | CHECKPOINT_PERIOD: 500 51 | WARMUP_ITERS: 200 52 | 53 | TEST: 54 | AUG: 55 | ENABLED: True 56 | EVAL_PERIOD: 500 57 | 58 | OUTPUT_DIR: "/media/datasets/gpu17_models/Ours/checkpoints/voc/faster_rcnn/split2_3shot_GPB_PFB_proloss" 59 | -------------------------------------------------------------------------------- /configs/PASCAL_VOC/split3/10shot_GPB_PFB_proloss.yml: -------------------------------------------------------------------------------- 1 | _BASE_: "../../Base-RCNN-FPN.yaml" 2 | 3 | MODEL: 4 | WEIGHTS: "/media/datasets/gpu17_models/Ours/checkpoints/voc/faster_rcnn/faster_rcnn_R_101_FPN_base3/model_reset_surgery.pth" 5 | MASK_ON: False 6 | RESNETS: 7 | DEPTH: 101 8 | 9 | BACKBONE: 10 | FREEZE: False 11 | FREEZE_AT: 5 12 | 13 | PROPOSAL_GENERATOR: 14 | FREEZE: False 15 | RPN: 16 | POST_NMS_TOPK_TRAIN: 2000 17 | 18 | ROI_HEADS: 19 | NAME: "NovelROIHeads" 20 | NUM_CLASSES: 20 21 | OUTPUT_LAYER: "CosineSimOutputLayers" 22 | FREEZE_FEAT: False 23 | BATCH_SIZE_PER_IMAGE: 256 24 | IOU_THRESHOLDS: [0.4] 25 | NOVEL_MODULE: 26 | COPY_OPTION: True 27 | IOU_THRESH: 0.7 28 | PROTOTYPES_FUSE_ALPHA: 0.2 29 | PROTOTYPES_FEATURE_FUSE_ALPHA: 0.2 30 | PROLOSS_FACTOR: 0.3 31 | FUSE_ALPHA: 0.1 32 | 33 | INPUT: 34 | CROP: 35 | ENABLED: TRUE 36 | MIN_SIZE_TRAIN: (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800) 37 | MIN_SIZE_TEST: 800 38 | 39 | DATASETS: 40 | SHOTS: 10 41 | TRAIN: ('voc_2007_trainval_all3_10shot_seed0',) 42 | TEST: ('voc_2007_test_all3',) 43 | 44 | SOLVER: 45 | IMS_PER_BATCH: 6 46 | BASE_LR: 0.001 47 | GAMMA: 0.5 48 | STEPS: (12000,16000) 49 | MAX_ITER: 20000 50 | CHECKPOINT_PERIOD: 1000 51 | WARMUP_ITERS: 200 52 | 53 | TEST: 54 | AUG: 55 | ENABLED: True 56 | EVAL_PERIOD: 1000 57 | 58 | OUTPUT_DIR: "/media/datasets/gpu17_models/Ours/checkpoints/voc/faster_rcnn/split3_10shot_GPB_PFB_proloss" -------------------------------------------------------------------------------- /configs/PASCAL_VOC/split3/1shot_GPB_PFB_proloss.yml: -------------------------------------------------------------------------------- 1 | _BASE_: "../../Base-RCNN-FPN.yaml" 2 | 3 | MODEL: 4 | WEIGHTS: "/media/datasets/gpu17_models/Ours/checkpoints/voc/faster_rcnn/faster_rcnn_R_101_FPN_base3/model_reset_surgery.pth" 5 | MASK_ON: False 6 | RESNETS: 7 | DEPTH: 101 8 | 9 | BACKBONE: 10 | FREEZE: False 11 | FREEZE_AT: 5 12 | 13 | PROPOSAL_GENERATOR: 14 | FREEZE: False 15 | RPN: 16 | POST_NMS_TOPK_TRAIN: 2000 17 | 18 | ROI_HEADS: 19 | NAME: "NovelROIHeads" 20 | NUM_CLASSES: 20 21 | OUTPUT_LAYER: "CosineSimOutputLayers" 22 | FREEZE_FEAT: False 23 | BATCH_SIZE_PER_IMAGE: 256 24 | IOU_THRESHOLDS: [0.4] 25 | NOVEL_MODULE: 26 | COPY_OPTION: True 27 | IOU_THRESH: 0.7 28 | PROTOTYPES_FUSE_ALPHA: 0.2 29 | PROTOTYPES_FEATURE_FUSE_ALPHA: 0.2 30 | PROLOSS_FACTOR: 0.3 31 | FUSE_ALPHA: 0.1 32 | 33 | INPUT: 34 | CROP: 35 | ENABLED: TRUE 36 | MIN_SIZE_TRAIN: (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800) 37 | MIN_SIZE_TEST: 800 38 | 39 | DATASETS: 40 | SHOTS: 1 41 | TRAIN: ('voc_2007_trainval_all3_1shot_seed0',) 42 | TEST: ('voc_2007_test_all3',) 43 | 44 | SOLVER: 45 | IMS_PER_BATCH: 6 46 | BASE_LR: 0.001 47 | GAMMA: 0.5 48 | STEPS: (4000,6000) 49 | MAX_ITER: 10000 50 | CHECKPOINT_PERIOD: 1000 51 | WARMUP_ITERS: 200 52 | 53 | 54 | TEST: 55 | AUG: 56 | ENABLED: True 57 | EVAL_PERIOD: 1000 58 | 59 | OUTPUT_DIR: "/media/datasets/gpu17_models/Ours/checkpoints/voc/faster_rcnn/split3_1shot_GPB_PFB_proloss" -------------------------------------------------------------------------------- /configs/PASCAL_VOC/split3/5shot_GPB_PFB_proloss.yml: -------------------------------------------------------------------------------- 1 | _BASE_: "../../Base-RCNN-FPN.yaml" 2 | 3 | MODEL: 4 | WEIGHTS: "/media/datasets/gpu17_models/Ours/checkpoints/voc_outline/faster_rcnn/faster_rcnn_R_101_FPN_base3/model_reset_surgery.pth" 5 | MASK_ON: False 6 | RESNETS: 7 | DEPTH: 101 8 | 9 | BACKBONE: 10 | FREEZE: False 11 | FREEZE_AT: 5 12 | 13 | PROPOSAL_GENERATOR: 14 | FREEZE: False 15 | RPN: 16 | POST_NMS_TOPK_TRAIN: 2000 17 | 18 | ROI_HEADS: 19 | NAME: "NovelROIHeads" 20 | NUM_CLASSES: 20 21 | OUTPUT_LAYER: "CosineSimOutputLayers" 22 | FREEZE_FEAT: False 23 | BATCH_SIZE_PER_IMAGE: 256 24 | IOU_THRESHOLDS: [0.4] 25 | NOVEL_MODULE: 26 | COPY_OPTION: True 27 | IOU_THRESH: 0.7 28 | PROTOTYPES_FUSE_ALPHA: 0.2 29 | PROTOTYPES_FEATURE_FUSE_ALPHA: 0.2 30 | PROLOSS_FACTOR: 0.3 31 | FUSE_ALPHA: 0.1 32 | 33 | INPUT: 34 | CROP: 35 | ENABLED: TRUE 36 | MIN_SIZE_TRAIN: (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800) 37 | MIN_SIZE_TEST: 800 38 | 39 | DATASETS: 40 | SHOTS: 5 41 | TRAIN: ('voc_2007_trainval_all3_5shot_seed0',) 42 | TEST: ('voc_2007_test_all3',) 43 | 44 | SOLVER: 45 | IMS_PER_BATCH: 6 46 | BASE_LR: 0.001 47 | GAMMA: 0.5 48 | STEPS: (10000,14000) 49 | MAX_ITER: 16000 50 | CHECKPOINT_PERIOD: 1000 51 | WARMUP_ITERS: 200 52 | 53 | TEST: 54 | AUG: 55 | ENABLED: True 56 | EVAL_PERIOD: 1000 57 | 58 | OUTPUT_DIR: "/media/datasets/gpu17_models/Ours/checkpoints/voc_outline/faster_rcnn/split3_5shot_GPB_PFB_proloss" -------------------------------------------------------------------------------- /configs/PASCAL_VOC/split1/2shot_GPB_PFB_proloss.yml: -------------------------------------------------------------------------------- 1 | _BASE_: "../../Base-RCNN-FPN.yaml" 2 | 3 | MODEL: 4 | WEIGHTS: "/media/datasets/gpu17_models/Ours/checkpoints/voc_outline/faster_rcnn/faster_rcnn_R_101_FPN_base1/model_reset_surgery.pth" 5 | MASK_ON: False 6 | RESNETS: 7 | DEPTH: 101 8 | 9 | BACKBONE: 10 | FREEZE: False 11 | FREEZE_AT: 5 12 | 13 | PROPOSAL_GENERATOR: 14 | FREEZE: False 15 | RPN: 16 | POST_NMS_TOPK_TRAIN: 2000 17 | 18 | ROI_HEADS: 19 | NAME: "NovelROIHeads" 20 | NUM_CLASSES: 20 21 | OUTPUT_LAYER: "CosineSimOutputLayers" 22 | FREEZE_FEAT: False 23 | BATCH_SIZE_PER_IMAGE: 256 24 | IOU_THRESHOLDS: [0.4] 25 | NOVEL_MODULE: 26 | COPY_OPTION: True 27 | IOU_THRESH: 0.7 28 | PROTOTYPES_FUSE_ALPHA: 0.2 29 | PROTOTYPES_FEATURE_FUSE_ALPHA: 0.2 30 | PROLOSS_FACTOR: 0.3 31 | FUSE_ALPHA: 0.08 32 | 33 | INPUT: 34 | CROP: 35 | ENABLED: TRUE 36 | MIN_SIZE_TRAIN: (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800) 37 | MIN_SIZE_TEST: 800 38 | 39 | DATASETS: 40 | SHOTS: 2 41 | TRAIN: ('voc_2007_trainval_all1_2shot_seed0',) 42 | TEST: ('voc_2007_test_all1',) 43 | 44 | SOLVER: 45 | IMS_PER_BATCH: 6 46 | BASE_LR: 0.001 47 | GAMMA: 0.5 48 | STEPS: (8000,12000) 49 | MAX_ITER: 16000 50 | CHECKPOINT_PERIOD: 1000 51 | WARMUP_ITERS: 200 52 | 53 | TEST: 54 | AUG: 55 | ENABLED: True 56 | EVAL_PERIOD: 1000 57 | 58 | OUTPUT_DIR: "/media/datasets/gpu17_models/Ours/checkpoints/voc_outline/faster_rcnn/split1_2shot_GPB_PFB_proloss_new" -------------------------------------------------------------------------------- /configs/PASCAL_VOC/split1/3shot_GPB_PFB_proloss.yml: -------------------------------------------------------------------------------- 1 | _BASE_: "../../Base-RCNN-FPN.yaml" 2 | 3 | MODEL: 4 | WEIGHTS: "/media/datasets/gpu17_models/Ours/checkpoints/voc_outline/faster_rcnn/faster_rcnn_R_101_FPN_base1/model_reset_surgery.pth" 5 | MASK_ON: False 6 | RESNETS: 7 | DEPTH: 101 8 | 9 | BACKBONE: 10 | FREEZE: False 11 | FREEZE_AT: 5 12 | 13 | PROPOSAL_GENERATOR: 14 | FREEZE: False 15 | RPN: 16 | POST_NMS_TOPK_TRAIN: 2000 17 | 18 | ROI_HEADS: 19 | NAME: "NovelROIHeads" 20 | NUM_CLASSES: 20 21 | OUTPUT_LAYER: "CosineSimOutputLayers" 22 | FREEZE_FEAT: False 23 | BATCH_SIZE_PER_IMAGE: 256 24 | IOU_THRESHOLDS: [0.4] 25 | NOVEL_MODULE: 26 | COPY_OPTION: True 27 | IOU_THRESH: 0.7 28 | PROTOTYPES_FUSE_ALPHA: 0.2 29 | PROTOTYPES_FEATURE_FUSE_ALPHA: 0.2 30 | PROLOSS_FACTOR: 0.3 31 | FUSE_ALPHA: 0.08 32 | 33 | INPUT: 34 | CROP: 35 | ENABLED: TRUE 36 | MIN_SIZE_TRAIN: (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800) 37 | MIN_SIZE_TEST: 800 38 | 39 | DATASETS: 40 | SHOTS: 3 41 | TRAIN: ('voc_2007_trainval_all1_3shot_seed0',) 42 | TEST: ('voc_2007_test_all1',) 43 | 44 | SOLVER: 45 | IMS_PER_BATCH: 6 46 | BASE_LR: 0.001 47 | GAMMA: 0.5 48 | STEPS: (10000,14000) 49 | MAX_ITER: 16000 50 | CHECKPOINT_PERIOD: 1000 51 | WARMUP_ITERS: 200 52 | 53 | TEST: 54 | AUG: 55 | ENABLED: True 56 | EVAL_PERIOD: 1000 57 | 58 | OUTPUT_DIR: "/media/datasets/gpu17_models/Ours/checkpoints/voc_outline/faster_rcnn/split1_3shot_GPB_PFB_proloss_new" -------------------------------------------------------------------------------- /configs/PASCAL_VOC/split1/5shot_GPB_PFB_proloss.yml: -------------------------------------------------------------------------------- 1 | _BASE_: "../../Base-RCNN-FPN.yaml" 2 | 3 | MODEL: 4 | WEIGHTS: "/media/datasets/gpu17_models/Ours/checkpoints/voc_outline/faster_rcnn/faster_rcnn_R_101_FPN_base1/model_reset_surgery.pth" 5 | MASK_ON: False 6 | RESNETS: 7 | DEPTH: 101 8 | 9 | BACKBONE: 10 | FREEZE: False 11 | FREEZE_AT: 5 12 | 13 | PROPOSAL_GENERATOR: 14 | FREEZE: False 15 | RPN: 16 | POST_NMS_TOPK_TRAIN: 2000 17 | 18 | ROI_HEADS: 19 | NAME: "NovelROIHeads" 20 | NUM_CLASSES: 20 21 | OUTPUT_LAYER: "CosineSimOutputLayers" 22 | FREEZE_FEAT: False 23 | BATCH_SIZE_PER_IMAGE: 256 24 | IOU_THRESHOLDS: [0.4] 25 | NOVEL_MODULE: 26 | COPY_OPTION: True 27 | IOU_THRESH: 0.7 28 | PROTOTYPES_FUSE_ALPHA: 0.2 29 | PROTOTYPES_FEATURE_FUSE_ALPHA: 0.2 30 | PROLOSS_FACTOR: 0.3 31 | FUSE_ALPHA: 0.1 32 | 33 | INPUT: 34 | CROP: 35 | ENABLED: TRUE 36 | MIN_SIZE_TRAIN: (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800) 37 | MIN_SIZE_TEST: 800 38 | 39 | DATASETS: 40 | SHOTS: 5 41 | TRAIN: ('voc_2007_trainval_all1_5shot_seed0',) 42 | TEST: ('voc_2007_test_all1',) 43 | 44 | SOLVER: 45 | IMS_PER_BATCH: 6 46 | BASE_LR: 0.001 47 | GAMMA: 0.5 48 | STEPS: (10000,14000) 49 | MAX_ITER: 16000 50 | CHECKPOINT_PERIOD: 1000 51 | WARMUP_ITERS: 200 52 | 53 | TEST: 54 | AUG: 55 | ENABLED: True 56 | EVAL_PERIOD: 1000 57 | 58 | OUTPUT_DIR: "/media/datasets/gpu17_models/Ours/checkpoints/voc_outline/faster_rcnn/split1_5shot_GPB_PFB_proloss_new" -------------------------------------------------------------------------------- /configs/PASCAL_VOC/split1/10shot_GPB_PFB_proloss.yml: -------------------------------------------------------------------------------- 1 | _BASE_: "../../Base-RCNN-FPN.yaml" 2 | 3 | MODEL: 4 | WEIGHTS: "/media/datasets/gpu17_models/Ours/checkpoints/voc_outline/faster_rcnn/faster_rcnn_R_101_FPN_base1/model_reset_surgery.pth" 5 | MASK_ON: False 6 | RESNETS: 7 | DEPTH: 101 8 | 9 | BACKBONE: 10 | FREEZE: False 11 | FREEZE_AT: 5 12 | 13 | PROPOSAL_GENERATOR: 14 | FREEZE: False 15 | RPN: 16 | POST_NMS_TOPK_TRAIN: 2000 17 | 18 | ROI_HEADS: 19 | NAME: "NovelROIHeads" 20 | NUM_CLASSES: 20 21 | OUTPUT_LAYER: "CosineSimOutputLayers" 22 | FREEZE_FEAT: False 23 | BATCH_SIZE_PER_IMAGE: 256 24 | IOU_THRESHOLDS: [0.4] 25 | NOVEL_MODULE: 26 | COPY_OPTION: True 27 | IOU_THRESH: 0.7 28 | PROTOTYPES_FUSE_ALPHA: 0.2 29 | PROTOTYPES_FEATURE_FUSE_ALPHA: 0.2 30 | PROLOSS_FACTOR: 0.3 31 | FUSE_ALPHA: 0.1 32 | 33 | INPUT: 34 | CROP: 35 | ENABLED: TRUE 36 | MIN_SIZE_TRAIN: (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800) 37 | MIN_SIZE_TEST: 800 38 | 39 | DATASETS: 40 | SHOTS: 10 41 | TRAIN: ('voc_2007_trainval_all1_10shot_seed0',) 42 | TEST: ('voc_2007_test_all1',) 43 | 44 | SOLVER: 45 | IMS_PER_BATCH: 6 46 | BASE_LR: 0.001 47 | GAMMA: 0.5 48 | STEPS: (12000,16000) 49 | MAX_ITER: 20000 50 | CHECKPOINT_PERIOD: 1000 51 | WARMUP_ITERS: 200 52 | 53 | TEST: 54 | AUG: 55 | ENABLED: True 56 | EVAL_PERIOD: 1000 57 | 58 | OUTPUT_DIR: "/media/datasets/gpu17_models/Ours/checkpoints/voc_outline/faster_rcnn/split1_10shot_GPB_PFB_proloss_new" -------------------------------------------------------------------------------- /configs/PASCAL_VOC/split1/1shot_GPB_PFB_proloss.yml: -------------------------------------------------------------------------------- 1 | _BASE_: "../../Base-RCNN-FPN.yaml" 2 | 3 | MODEL: 4 | WEIGHTS: "/media/datasets/gpu17_models/Ours/checkpoints/voc_outline/faster_rcnn/faster_rcnn_R_101_FPN_base1/model_reset_surgery.pth" 5 | MASK_ON: False 6 | RESNETS: 7 | DEPTH: 101 8 | 9 | BACKBONE: 10 | FREEZE: False 11 | FREEZE_AT: 5 12 | 13 | PROPOSAL_GENERATOR: 14 | FREEZE: False 15 | RPN: 16 | POST_NMS_TOPK_TRAIN: 2000 17 | 18 | ROI_HEADS: 19 | NAME: "NovelROIHeads" 20 | NUM_CLASSES: 20 21 | OUTPUT_LAYER: "CosineSimOutputLayers" 22 | FREEZE_FEAT: False 23 | BATCH_SIZE_PER_IMAGE: 256 24 | IOU_THRESHOLDS: [0.4] 25 | NOVEL_MODULE: 26 | COPY_OPTION: True 27 | IOU_THRESH: 0.7 28 | PROTOTYPES_FUSE_ALPHA: 0.2 29 | PROTOTYPES_FEATURE_FUSE_ALPHA: 0.2 30 | PROLOSS_FACTOR: 0.1 31 | FUSE_ALPHA: 0.0 32 | 33 | INPUT: 34 | CROP: 35 | ENABLED: TRUE 36 | MIN_SIZE_TRAIN: (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800) 37 | MIN_SIZE_TEST: 800 38 | 39 | DATASETS: 40 | SHOTS: 1 41 | TRAIN: ('voc_2007_trainval_all1_1shot_seed0',) 42 | TEST: ('voc_2007_test_all1',) 43 | 44 | SOLVER: 45 | IMS_PER_BATCH: 6 46 | BASE_LR: 0.001 47 | GAMMA: 0.5 48 | STEPS: (4000,6000) 49 | MAX_ITER: 10000 50 | CHECKPOINT_PERIOD: 1000 51 | WARMUP_ITERS: 200 52 | 53 | 54 | TEST: 55 | AUG: 56 | ENABLED: True 57 | EVAL_PERIOD: 1000 58 | 59 | OUTPUT_DIR: "/media/datasets/gpu17_models/Ours/checkpoints/voc_outline/faster_rcnn/split1_1shot_GPB_PFB_proloss_new" -------------------------------------------------------------------------------- /fsdet/layers/csrc/box_iou_rotated/box_iou_rotated_cpu.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | #include "box_iou_rotated.h" 3 | #include "box_iou_rotated_utils.h" 4 | 5 | namespace fsdet { 6 | 7 | template 8 | void box_iou_rotated_cpu_kernel( 9 | const at::Tensor& boxes1, 10 | const at::Tensor& boxes2, 11 | at::Tensor& ious) { 12 | auto widths1 = boxes1.select(1, 2).contiguous(); 13 | auto heights1 = boxes1.select(1, 3).contiguous(); 14 | auto widths2 = boxes2.select(1, 2).contiguous(); 15 | auto heights2 = boxes2.select(1, 3).contiguous(); 16 | 17 | at::Tensor areas1 = widths1 * heights1; 18 | at::Tensor areas2 = widths2 * heights2; 19 | 20 | auto num_boxes1 = boxes1.size(0); 21 | auto num_boxes2 = boxes2.size(0); 22 | 23 | for (int i = 0; i < num_boxes1; i++) { 24 | for (int j = 0; j < num_boxes2; j++) { 25 | ious[i * num_boxes2 + j] = single_box_iou_rotated( 26 | boxes1[i].data_ptr(), boxes2[j].data_ptr()); 27 | } 28 | } 29 | } 30 | 31 | at::Tensor box_iou_rotated_cpu( 32 | const at::Tensor& boxes1, 33 | const at::Tensor& boxes2) { 34 | auto num_boxes1 = boxes1.size(0); 35 | auto num_boxes2 = boxes2.size(0); 36 | at::Tensor ious = 37 | at::empty({num_boxes1 * num_boxes2}, boxes1.options().dtype(at::kFloat)); 38 | 39 | box_iou_rotated_cpu_kernel(boxes1, boxes2, ious); 40 | 41 | // reshape from 1d array to 2d array 42 | auto shape = std::vector{num_boxes1, num_boxes2}; 43 | return ious.reshape(shape); 44 | } 45 | 46 | } // namespace fsdet 47 | -------------------------------------------------------------------------------- /fsdet/modeling/postprocessing.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | from torch.nn import functional as F 3 | 4 | from fsdet.structures import Instances 5 | 6 | 7 | def detector_postprocess(results, output_height, output_width): 8 | """ 9 | Resize the output instances. 10 | The input images are often resized when entering an object detector. 11 | As a result, we often need the outputs of the detector in a different 12 | resolution from its inputs. 13 | 14 | This function will resize the raw outputs of an R-CNN detector 15 | to produce outputs according to the desired output resolution. 16 | 17 | Args: 18 | results (Instances): the raw outputs from the detector. 19 | `results.image_size` contains the input image resolution the detector sees. 20 | This object might be modified in-place. 21 | output_height, output_width: the desired output resolution. 22 | 23 | Returns: 24 | Instances: the resized output from the model, based on the output resolution 25 | """ 26 | scale_x, scale_y = (output_width / results.image_size[1], output_height / results.image_size[0]) 27 | results = Instances((output_height, output_width), **results.get_fields()) 28 | 29 | if results.has("pred_boxes"): 30 | output_boxes = results.pred_boxes 31 | elif results.has("proposal_boxes"): 32 | output_boxes = results.proposal_boxes 33 | 34 | output_boxes.scale(scale_x, scale_y) 35 | output_boxes.clip(results.image_size) 36 | 37 | results = results[output_boxes.nonempty()] 38 | 39 | return results 40 | -------------------------------------------------------------------------------- /fsdet/solver/masked_sgd.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.optim import SGD 3 | 4 | 5 | class MaskedSGD(SGD): 6 | r""" 7 | masked out some gradient updates 8 | """ 9 | def step(self, closure=None): 10 | """Performs a single optimization step. 11 | 12 | Arguments: 13 | closure (callable, optional): A closure that reevaluates the model 14 | and returns the loss. 15 | """ 16 | loss = None 17 | if closure is not None: 18 | loss = closure() 19 | 20 | for group in self.param_groups: 21 | weight_decay = group['weight_decay'] 22 | momentum = group['momentum'] 23 | dampening = group['dampening'] 24 | nesterov = group['nesterov'] 25 | 26 | for p, ind in zip(group['params'], group['filter_indices']): 27 | if p.grad is None: 28 | continue 29 | d_p = p.grad.data 30 | if weight_decay != 0: 31 | d_p.add_(weight_decay, p.data) 32 | if momentum != 0: 33 | param_state = self.state[p] 34 | if 'momentum_buffer' not in param_state: 35 | buf = param_state['momentum_buffer'] = torch.zeros_like(p.data) 36 | buf.mul_(momentum).add_(d_p) 37 | else: 38 | buf = param_state['momentum_buffer'] 39 | buf.mul_(momentum).add_(1 - dampening, d_p) 40 | if nesterov: 41 | d_p = d_p.add(momentum, buf) 42 | else: 43 | d_p = buf 44 | if ind: 45 | d_p[ind, :] = 0 46 | 47 | p.data.add_(-group['lr'], d_p) 48 | 49 | return loss 50 | -------------------------------------------------------------------------------- /fsdet/utils/registry.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | class Registry(object): 3 | """ 4 | The registry that provides name -> object mapping, to support third-party users' custom modules. 5 | 6 | To create a registry (inside FsDet): 7 | 8 | .. code-block:: python 9 | 10 | BACKBONE_REGISTRY = Registry('BACKBONE') 11 | 12 | To register an object: 13 | 14 | .. code-block:: python 15 | 16 | @BACKBONE_REGISTRY.register() 17 | class MyBackbone(): 18 | ... 19 | 20 | Or: 21 | 22 | .. code-block:: python 23 | 24 | BACKBONE_REGISTRY.register(MyBackbone) 25 | """ 26 | 27 | def __init__(self, name): 28 | """ 29 | Args: 30 | name (str): the name of this registry 31 | """ 32 | self._name = name 33 | 34 | self._obj_map = {} 35 | 36 | def _do_register(self, name, obj): 37 | assert ( 38 | name not in self._obj_map 39 | ), "An object named '{}' was already registered in '{}' registry!".format(name, self._name) 40 | self._obj_map[name] = obj 41 | 42 | def register(self, obj=None): 43 | """ 44 | Register the given object under the the name `obj.__name__`. 45 | Can be used as either a decorator or not. See docstring of this class for usage. 46 | """ 47 | if obj is None: 48 | # used as a decorator 49 | def deco(func_or_class): 50 | name = func_or_class.__name__ 51 | self._do_register(name, func_or_class) 52 | return func_or_class 53 | 54 | return deco 55 | 56 | # used as a function call 57 | name = obj.__name__ 58 | self._do_register(name, obj) 59 | 60 | def get(self, name): 61 | ret = self._obj_map.get(name) 62 | if ret is None: 63 | raise KeyError("No object named '{}' found in '{}' registry!".format(name, self._name)) 64 | return ret 65 | -------------------------------------------------------------------------------- /run_rfs.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | ROOT=/media/datasets/gpu17_models/WEN/checkpoints/rfs/faster_rcnn #<- Change this yourself 4 | 5 | #------------------------------ Base-training ------------------------------- # 6 | for split in 1 2 3 7 | do 8 | python tools/train_net.py --num-gpus 3 \ 9 | --config-file configs/RFS/base-training/R101_FPN_base_training_split${split}.yml \ 10 | --opts OUTPUT_DIR ${ROOT}/faster_rcnn_R_101_FPN_base${split} 11 | done 12 | 13 | #------------------------------ Random initialize ------------------------------- # 14 | for split in 1 2 3 15 | do 16 | python tools/ckpt_surgery.py \ 17 | --src1 ${ROOT}/faster_rcnn_R_101_FPN_base${split}/model_final.pth \ 18 | --method randinit \ 19 | --save-dir ${ROOT}/faster_rcnn_R_101_FPN_base${split} 20 | done 21 | 22 | #------------------------------ Fine-tuning ------------------------------- # 23 | for split in 1 2 3 24 | do 25 | for shot in 1 2 3 5 10 26 | do 27 | echo split:$split shot:$shot 28 | CONFIG_PATH=configs/RFS/split${split}/${shot}shot_GPB_PFB_proloss.yml 29 | OUT_DIR=${ROOT}/split${split}_${shot}shot_GPB_PFB_proloss 30 | python3 -m tools.train_net --num-gpus 3 \ 31 | --config-file ${CONFIG_PATH} \ 32 | --opts OUTPUT_DIR ${OUT_DIR} 33 | rm ${OUT_DIR}/last_checkpoint 34 | done 35 | done 36 | 37 | #------------------------------ Evaluating ------------------------------- # 38 | for split in 1 2 3 39 | do 40 | for shot in 1 2 3 5 10 41 | do 42 | echo split:$split shot:$shot 43 | CONFIG_PATH=configs/RFS/split${split}/${shot}shot_GPB_PFB_proloss.yml 44 | OUT_DIR=${ROOT}/split${split}_${shot}shot_GPB_PFB_proloss 45 | python3 -m tools.train_net --num-gpus 3 \ 46 | --config-file ${CONFIG_PATH} --eval-only \ 47 | --opts MODEL.WEIGHTS ${ROOT}/model_final.pth \ 48 | MODEL.MODEL.ROI_HEADS.NOVEL_MODULE.INIT_FEATURE_WEIGHT ${ROOT}/prototypes_feature_final.pkl 49 | done 50 | done 51 | 52 | -------------------------------------------------------------------------------- /run_voc.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | ROOT=/media/datasets/gpu17_models/WEN/checkpoints/voc/faster_rcnn #<- Change this yourself 4 | 5 | #------------------------------ Base-training ------------------------------- # 6 | for split in 1 2 3 7 | do 8 | python tools/train_net.py --num-gpus 3 \ 9 | --config-file configs/PASCAL_VOC/base-training/R101_FPN_base_training_split${split}.yml \ 10 | --opts OUTPUT_DIR ${ROOT}/faster_rcnn_R_101_FPN_base${split} 11 | done 12 | 13 | #------------------------------ Random initialize ------------------------------- # 14 | for split in 1 2 3 15 | do 16 | python tools/ckpt_surgery.py \ 17 | --src1 ${ROOT}/faster_rcnn_R_101_FPN_base${split}/model_final.pth \ 18 | --method randinit \ 19 | --save-dir ${ROOT}/faster_rcnn_R_101_FPN_base${split} 20 | done 21 | 22 | #------------------------------ Fine-tuning ------------------------------- # 23 | for split in 1 2 3 24 | do 25 | for shot in 1 2 3 5 10 26 | do 27 | echo split:$split shot:$shot 28 | CONFIG_PATH=configs/PASCAL_VOC/split${split}/${shot}shot_GPB_PFB_proloss.yml 29 | OUT_DIR=${ROOT}/split${split}_${shot}shot_GPB_PFB_proloss 30 | python3 -m tools.train_net --num-gpus 3 \ 31 | --config-file ${CONFIG_PATH} \ 32 | --opts OUTPUT_DIR ${OUT_DIR} 33 | rm ${OUT_DIR}/last_checkpoint 34 | done 35 | done 36 | 37 | #------------------------------ Evaluating ------------------------------- # 38 | for split in 1 2 3 39 | do 40 | for shot in 1 2 3 5 10 41 | do 42 | echo split:$split shot:$shot 43 | CONFIG_PATH=configs/PASCAL_VOC/split${split}/${shot}shot_GPB_PFB_proloss.yml 44 | OUT_DIR=${ROOT}/split${split}_${shot}shot_GPB_PFB_proloss 45 | python3 -m tools.train_net --num-gpus 3 \ 46 | --config-file ${CONFIG_PATH} --eval-only \ 47 | --opts MODEL.WEIGHTS ${ROOT}/model_final.pth \ 48 | MODEL.MODEL.ROI_HEADS.NOVEL_MODULE.INIT_FEATURE_WEIGHT ${ROOT}/prototypes_feature_final.pkl 49 | done 50 | done 51 | 52 | -------------------------------------------------------------------------------- /fsdet/data/samplers/grouped_batch_sampler.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | import numpy as np 3 | from torch.utils.data.sampler import BatchSampler, Sampler 4 | 5 | 6 | class GroupedBatchSampler(BatchSampler): 7 | """ 8 | Wraps another sampler to yield a mini-batch of indices. 9 | It enforces that the batch only contain elements from the same group. 10 | It also tries to provide mini-batches which follows an ordering which is 11 | as close as possible to the ordering from the original sampler. 12 | """ 13 | 14 | def __init__(self, sampler, group_ids, batch_size): 15 | """ 16 | Args: 17 | sampler (Sampler): Base sampler. 18 | group_ids (list[int]): If the sampler produces indices in range [0, N), 19 | `group_ids` must be a list of `N` ints which contains the group id of each sample. 20 | The group ids must be a set of integers in the range [0, num_groups). 21 | batch_size (int): Size of mini-batch. 22 | """ 23 | if not isinstance(sampler, Sampler): 24 | raise ValueError( 25 | "sampler should be an instance of " 26 | "torch.utils.data.Sampler, but got sampler={}".format(sampler) 27 | ) 28 | self.sampler = sampler 29 | self.group_ids = np.asarray(group_ids) 30 | assert self.group_ids.ndim == 1 31 | self.batch_size = batch_size 32 | groups = np.unique(self.group_ids).tolist() 33 | 34 | # buffer the indices of each group until batch size is reached 35 | self.buffer_per_group = {k: [] for k in groups} 36 | 37 | def __iter__(self): 38 | for idx in self.sampler: 39 | group_id = self.group_ids[idx] 40 | group_buffer = self.buffer_per_group[group_id] 41 | group_buffer.append(idx) 42 | if len(group_buffer) == self.batch_size: 43 | yield group_buffer[:] # yield a copy of the list 44 | del group_buffer[:] 45 | 46 | def __len__(self): 47 | raise NotImplementedError("len() of GroupedBatchSampler is not well-defined.") 48 | -------------------------------------------------------------------------------- /fsdet/modeling/backbone/backbone.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | from abc import ABCMeta, abstractmethod 3 | import torch.nn as nn 4 | 5 | from fsdet.layers import ShapeSpec 6 | 7 | __all__ = ["Backbone"] 8 | 9 | 10 | class Backbone(nn.Module, metaclass=ABCMeta): 11 | """ 12 | Abstract base class for network backbones. 13 | """ 14 | 15 | def __init__(self): 16 | """ 17 | The `__init__` method of any subclass can specify its own set of arguments. 18 | """ 19 | super().__init__() 20 | 21 | @abstractmethod 22 | def forward(self): 23 | """ 24 | Subclasses must override this method, but adhere to the same return type. 25 | 26 | Returns: 27 | dict[str: Tensor]: mapping from feature name (e.g., "res2") to tensor 28 | """ 29 | pass 30 | 31 | @property 32 | def size_divisibility(self): 33 | """ 34 | Some backbones require the input height and width to be divisible by a 35 | specific integer. This is typically true for encoder / decoder type networks 36 | with lateral connection (e.g., FPN) for which feature maps need to match 37 | dimension in the "bottom up" and "top down" paths. Set to 0 if no specific 38 | input size divisibility is required. 39 | """ 40 | return 0 41 | 42 | def output_shape(self): 43 | """ 44 | Returns: 45 | dict[str->ShapeSpec] 46 | """ 47 | # this is a backward-compatible default 48 | return { 49 | name: ShapeSpec( 50 | channels=self._out_feature_channels[name], stride=self._out_feature_strides[name] 51 | ) 52 | for name in self._out_features 53 | } 54 | 55 | # the properties below are not used any more 56 | 57 | @property 58 | def out_features(self): 59 | """deprecated""" 60 | return self._out_features 61 | 62 | @property 63 | def out_feature_strides(self): 64 | """deprecated""" 65 | return {f: self._out_feature_strides[f] for f in self._out_features} 66 | 67 | @property 68 | def out_feature_channels(self): 69 | """deprecated""" 70 | return {f: self._out_feature_channels[f] for f in self._out_features} 71 | -------------------------------------------------------------------------------- /fsdet/modeling/sampling.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | import torch 3 | 4 | __all__ = ["subsample_labels"] 5 | 6 | 7 | def subsample_labels(labels, num_samples, positive_fraction, bg_label): 8 | """ 9 | Return `num_samples` random samples from `labels`, with a fraction of 10 | positives no larger than `positive_fraction`. 11 | 12 | Args: 13 | labels (Tensor): (N, ) label vector with values: 14 | * -1: ignore 15 | * bg_label: background ("negative") class 16 | * otherwise: one or more foreground ("positive") classes 17 | num_samples (int): The total number of labels with value >= 0 to return. 18 | Values that are not sampled will be filled with -1 (ignore). 19 | positive_fraction (float): The number of subsampled labels with values > 0 20 | is `min(num_positives, int(positive_fraction * num_samples))`. The number 21 | of negatives sampled is `min(num_negatives, num_samples - num_positives_sampled)`. 22 | In order words, if there are not enough positives, the sample is filled with 23 | negatives. If there are also not enough negatives, then as many elements are 24 | sampled as is possible. 25 | bg_label (int): label index of background ("negative") class. 26 | 27 | Returns: 28 | pos_idx, neg_idx (Tensor): 29 | 1D indices. The total number of indices is `num_samples` if possible. 30 | The fraction of positive indices is `positive_fraction` if possible. 31 | """ 32 | positive = torch.nonzero((labels != -1) & (labels != bg_label)).squeeze(1) 33 | negative = torch.nonzero(labels == bg_label).squeeze(1) 34 | 35 | num_pos = int(num_samples * positive_fraction) 36 | # protect against not enough positive examples 37 | num_pos = min(positive.numel(), num_pos) 38 | num_neg = num_samples - num_pos 39 | # protect against not enough negative examples 40 | num_neg = min(negative.numel(), num_neg) 41 | 42 | # randomly select positive and negative examples 43 | perm1 = torch.randperm(positive.numel(), device=positive.device)[:num_pos] 44 | perm2 = torch.randperm(negative.numel(), device=negative.device)[:num_neg] 45 | 46 | pos_idx = positive[perm1] 47 | neg_idx = negative[perm2] 48 | return pos_idx, neg_idx 49 | -------------------------------------------------------------------------------- /fsdet/layers/csrc/nms_rotated/nms_rotated_cpu.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | #include "../box_iou_rotated/box_iou_rotated_utils.h" 3 | #include "nms_rotated.h" 4 | 5 | namespace fsdet { 6 | 7 | template 8 | at::Tensor nms_rotated_cpu_kernel( 9 | const at::Tensor& dets, 10 | const at::Tensor& scores, 11 | const float iou_threshold) { 12 | // nms_rotated_cpu_kernel is modified from torchvision's nms_cpu_kernel, 13 | // however, the code in this function is much shorter because 14 | // we delegate the IoU computation for rotated boxes to 15 | // the single_box_iou_rotated function in box_iou_rotated_utils.h 16 | AT_ASSERTM(!dets.type().is_cuda(), "dets must be a CPU tensor"); 17 | AT_ASSERTM(!scores.type().is_cuda(), "scores must be a CPU tensor"); 18 | AT_ASSERTM( 19 | dets.type() == scores.type(), "dets should have the same type as scores"); 20 | 21 | if (dets.numel() == 0) { 22 | return at::empty({0}, dets.options().dtype(at::kLong)); 23 | } 24 | 25 | auto order_t = std::get<1>(scores.sort(0, /* descending=*/true)); 26 | 27 | auto ndets = dets.size(0); 28 | at::Tensor suppressed_t = at::zeros({ndets}, dets.options().dtype(at::kByte)); 29 | at::Tensor keep_t = at::zeros({ndets}, dets.options().dtype(at::kLong)); 30 | 31 | auto suppressed = suppressed_t.data_ptr(); 32 | auto keep = keep_t.data_ptr(); 33 | auto order = order_t.data_ptr(); 34 | 35 | int64_t num_to_keep = 0; 36 | 37 | for (int64_t _i = 0; _i < ndets; _i++) { 38 | auto i = order[_i]; 39 | if (suppressed[i] == 1) { 40 | continue; 41 | } 42 | 43 | keep[num_to_keep++] = i; 44 | 45 | for (int64_t _j = _i + 1; _j < ndets; _j++) { 46 | auto j = order[_j]; 47 | if (suppressed[j] == 1) { 48 | continue; 49 | } 50 | 51 | auto ovr = single_box_iou_rotated( 52 | dets[i].data_ptr(), dets[j].data_ptr()); 53 | if (ovr >= iou_threshold) { 54 | suppressed[j] = 1; 55 | } 56 | } 57 | } 58 | return keep_t.narrow(/*dim=*/0, /*start=*/0, /*length=*/num_to_keep); 59 | } 60 | 61 | at::Tensor nms_rotated_cpu( 62 | const at::Tensor& dets, 63 | const at::Tensor& scores, 64 | const float iou_threshold) { 65 | auto result = at::empty({0}, dets.options()); 66 | 67 | AT_DISPATCH_FLOATING_TYPES(dets.type(), "nms_rotated", [&] { 68 | result = nms_rotated_cpu_kernel(dets, scores, iou_threshold); 69 | }); 70 | return result; 71 | } 72 | 73 | } // namespace fsdet 74 | -------------------------------------------------------------------------------- /fsdet/checkpoint/detection_checkpoint.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | import pickle 3 | from fvcore.common.checkpoint import Checkpointer 4 | from fvcore.common.file_io import PathManager 5 | 6 | import fsdet.utils.comm as comm 7 | 8 | from .c2_model_loading import align_and_update_state_dicts 9 | 10 | 11 | class DetectionCheckpointer(Checkpointer): 12 | """ 13 | Same as :class:`Checkpointer`, but is able to handle models in detectron & detectron2 14 | model zoo, and apply conversions for legacy models. 15 | """ 16 | 17 | def __init__(self, model, save_dir="", *, save_to_disk=None, **checkpointables): 18 | is_main_process = comm.is_main_process() 19 | super().__init__( 20 | model, 21 | save_dir, 22 | save_to_disk=is_main_process if save_to_disk is None else save_to_disk, 23 | **checkpointables, 24 | ) 25 | 26 | def _load_file(self, filename): 27 | if filename.endswith(".pkl"): 28 | with PathManager.open(filename, "rb") as f: 29 | data = pickle.load(f, encoding="latin1") 30 | if "model" in data and "__author__" in data: 31 | # file is in Detectron2 model zoo format 32 | self.logger.info("Reading a file from '{}'".format(data["__author__"])) 33 | return data 34 | else: 35 | # assume file is from Caffe2 / Detectron1 model zoo 36 | if "blobs" in data: 37 | # Detection models have "blobs", but ImageNet models don't 38 | data = data["blobs"] 39 | data = {k: v for k, v in data.items() if not k.endswith("_momentum")} 40 | return {"model": data, "__author__": "Caffe2", "matching_heuristics": True} 41 | 42 | loaded = super()._load_file(filename) # load native pth checkpoint 43 | if "model" not in loaded: 44 | loaded = {"model": loaded} 45 | return loaded 46 | 47 | def _load_model(self, checkpoint): 48 | if checkpoint.get("matching_heuristics", False): 49 | self._convert_ndarray_to_tensor(checkpoint["model"]) 50 | # convert weights by name-matching heuristics 51 | model_state_dict = self.model.state_dict() 52 | align_and_update_state_dicts( 53 | model_state_dict, 54 | checkpoint["model"], 55 | c2_conversion=checkpoint.get("__author__", None) == "Caffe2", 56 | ) 57 | checkpoint["model"] = model_state_dict 58 | # for non-caffe2 models, use standard ways to load it 59 | super()._load_model(checkpoint) 60 | 61 | -------------------------------------------------------------------------------- /fsdet/evaluation/testing.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | import logging 3 | import numpy as np 4 | import pprint 5 | import sys 6 | from collections import Mapping, OrderedDict 7 | 8 | 9 | def print_csv_format(results): 10 | """ 11 | Print main metrics in a format similar to Detectron, 12 | so that they are easy to copypaste into a spreadsheet. 13 | 14 | Args: 15 | results (OrderedDict[dict]): task_name -> {metric -> score} 16 | """ 17 | assert isinstance(results, OrderedDict), results # unordered results cannot be properly printed 18 | logger = logging.getLogger(__name__) 19 | for task, res in results.items(): 20 | # Don't print "AP-category" metrics since they are usually not tracked. 21 | important_res = [(k, v) for k, v in res.items() if "-" not in k] 22 | logger.info("copypaste: Task: {}".format(task)) 23 | logger.info("copypaste: " + ",".join([k[0] for k in important_res])) 24 | logger.info("copypaste: " + ",".join(["{0:.4f}".format(k[1]) for k in important_res])) 25 | 26 | 27 | def verify_results(cfg, results): 28 | """ 29 | Args: 30 | results (OrderedDict[dict]): task_name -> {metric -> score} 31 | 32 | Returns: 33 | bool: whether the verification succeeds or not 34 | """ 35 | expected_results = cfg.TEST.EXPECTED_RESULTS 36 | if not len(expected_results): 37 | return True 38 | 39 | ok = True 40 | for task, metric, expected, tolerance in expected_results: 41 | actual = results[task][metric] 42 | if not np.isfinite(actual): 43 | ok = False 44 | diff = abs(actual - expected) 45 | if diff > tolerance: 46 | ok = False 47 | 48 | logger = logging.getLogger(__name__) 49 | if not ok: 50 | logger.error("Result verification failed!") 51 | logger.error("Expected Results: " + str(expected_results)) 52 | logger.error("Actual Results: " + pprint.pformat(results)) 53 | 54 | sys.exit(1) 55 | else: 56 | logger.info("Results verification passed.") 57 | return ok 58 | 59 | 60 | def flatten_results_dict(results): 61 | """ 62 | Expand a hierarchical dict of scalars into a flat dict of scalars. 63 | If results[k1][k2][k3] = v, the returned dict will have the entry 64 | {"k1/k2/k3": v}. 65 | 66 | Args: 67 | results (dict): 68 | """ 69 | r = {} 70 | for k, v in results.items(): 71 | if isinstance(v, Mapping): 72 | v = flatten_results_dict(v) 73 | for kk, vv in v.items(): 74 | r[k + "/" + kk] = vv 75 | else: 76 | r[k] = v 77 | return r 78 | -------------------------------------------------------------------------------- /fsdet/data/common.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | import copy 3 | import logging 4 | import random 5 | import torch.utils.data as data 6 | 7 | from fsdet.utils.serialize import PicklableWrapper 8 | 9 | __all__ = ["MapDataset", "DatasetFromList"] 10 | 11 | 12 | class MapDataset(data.Dataset): 13 | """ 14 | Map a function over the elements in a dataset. 15 | 16 | Args: 17 | dataset: a dataset where map function is applied. 18 | map_func: a callable which maps the element in dataset. map_func is 19 | responsible for error handling, when error happens, it needs to 20 | return None so the MapDataset will randomly use other 21 | elements from the dataset. 22 | """ 23 | 24 | def __init__(self, dataset, map_func): 25 | self._dataset = dataset 26 | self._map_func = PicklableWrapper(map_func) # wrap so that a lambda will work 27 | 28 | self._rng = random.Random(42) 29 | self._fallback_candidates = set(range(len(dataset))) 30 | 31 | def __len__(self): 32 | return len(self._dataset) 33 | 34 | def __getitem__(self, idx): 35 | retry_count = 0 36 | cur_idx = int(idx) 37 | 38 | while True: 39 | data = self._map_func(self._dataset[cur_idx]) 40 | if data is not None: 41 | self._fallback_candidates.add(cur_idx) 42 | return data 43 | 44 | # _map_func fails for this idx, use a random new index from the pool 45 | retry_count += 1 46 | self._fallback_candidates.discard(cur_idx) 47 | cur_idx = self._rng.sample(self._fallback_candidates, k=1)[0] 48 | 49 | if retry_count >= 3: 50 | logger = logging.getLogger(__name__) 51 | logger.warning( 52 | "Failed to apply `_map_func` for idx: {}, retry count: {}".format( 53 | idx, retry_count 54 | ) 55 | ) 56 | 57 | 58 | class DatasetFromList(data.Dataset): 59 | """ 60 | Wrap a list to a torch Dataset. It produces elements of the list as data. 61 | """ 62 | 63 | def __init__(self, lst: list, copy: bool = True): 64 | """ 65 | Args: 66 | lst (list): a list which contains elements to produce. 67 | copy (bool): whether to deepcopy the element when producing it, 68 | so that the result can be modified in place without affecting the 69 | source in the list. 70 | """ 71 | self._lst = lst 72 | self._copy = copy 73 | 74 | def __len__(self): 75 | return len(self._lst) 76 | 77 | def __getitem__(self, idx): 78 | if self._copy: 79 | return copy.deepcopy(self._lst[idx]) 80 | else: 81 | return self._lst[idx] 82 | -------------------------------------------------------------------------------- /fsdet/modeling/proposal_generator/proposal_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | import math 3 | import torch 4 | 5 | from fsdet.structures import Instances 6 | 7 | 8 | def add_ground_truth_to_proposals(gt_boxes, proposals): 9 | """Augment proposals with ground-truth boxes. 10 | In the case of learned proposals (e.g., RPN), when training starts 11 | the proposals will be low quality due to random initialization. 12 | It's possible that none of these initial 13 | proposals have high enough overlap with the gt objects to be used 14 | as positive examples for the second stage components (box head, 15 | cls head). Adding the gt boxes to the set of proposals 16 | ensures that the second stage components will have some positive 17 | examples from the start of training. For RPN, this augmentation improves 18 | convergence and empirically improves box AP on COCO by about 0.5 19 | points (under one tested configuration). 20 | 21 | Call `add_ground_truth_to_proposals_single_image` for all images. 22 | 23 | Args: 24 | gt_boxes(list[Boxes]): list of N elements. Element i is a Boxes 25 | representing the gound-truth for image i. 26 | proposals (list[Instances]): list of N elements. Element i is a Instances 27 | representing the proposals for image i. 28 | 29 | Returns: 30 | list[Instances]: list of N Instances. Each is the proposals for the image, 31 | with field "proposal_boxes" and "objectness_logits". 32 | """ 33 | assert gt_boxes is not None 34 | 35 | assert len(proposals) == len(gt_boxes) 36 | if len(proposals) == 0: 37 | return proposals 38 | 39 | return [ 40 | add_ground_truth_to_proposals_single_image(gt_boxes_i, proposals_i) 41 | for gt_boxes_i, proposals_i in zip(gt_boxes, proposals) 42 | ] 43 | 44 | 45 | def add_ground_truth_to_proposals_single_image(gt_boxes, proposals): 46 | """ 47 | Augment `proposals` with ground-truth boxes from `gt_boxes`. 48 | 49 | Args: 50 | Same as `add_ground_truth_to_proposals`, but with gt_boxes and proposals 51 | per image. 52 | 53 | Returns: 54 | Same as `add_ground_truth_to_proposals`, but for only one image. 55 | """ 56 | device = proposals.objectness_logits.device 57 | # Concatenating gt_boxes with proposals requires them to have the same fields 58 | # Assign all ground-truth boxes an objectness logit corresponding to P(object) \approx 1. 59 | gt_logit_value = math.log((1.0 - 1e-10) / (1 - (1.0 - 1e-10))) 60 | 61 | gt_logits = gt_logit_value * torch.ones(len(gt_boxes), device=device) 62 | gt_proposal = Instances(proposals.image_size) 63 | 64 | gt_proposal.proposal_boxes = gt_boxes 65 | gt_proposal.objectness_logits = gt_logits 66 | new_proposals = Instances.cat([proposals, gt_proposal]) 67 | 68 | return new_proposals 69 | -------------------------------------------------------------------------------- /fsdet/solver/build.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | from typing import Any, Dict, List 3 | import torch 4 | 5 | from fsdet.config import CfgNode 6 | 7 | from .lr_scheduler import WarmupCosineLR, WarmupMultiStepLR 8 | from .masked_sgd import MaskedSGD 9 | 10 | 11 | def build_optimizer(cfg: CfgNode, model: torch.nn.Module) -> torch.optim.Optimizer: 12 | """ 13 | Build an optimizer from config. 14 | """ 15 | params: List[Dict[str, Any]] = [] 16 | 17 | # determine the masked parameter in the config file 18 | masked_param = cfg.SOLVER.MASKED_PARAMS 19 | masked_param_inds = cfg.SOLVER.MASKED_PARAMS_INDS 20 | 21 | for key, value in model.named_parameters(): 22 | if not value.requires_grad: 23 | continue 24 | lr = cfg.SOLVER.BASE_LR 25 | weight_decay = cfg.SOLVER.WEIGHT_DECAY 26 | if key.endswith("norm.weight") or key.endswith("norm.bias"): 27 | weight_decay = cfg.SOLVER.WEIGHT_DECAY_NORM 28 | elif key.endswith(".bias"): 29 | # NOTE: unlike Detectron v1, we now default BIAS_LR_FACTOR to 1.0 30 | # and WEIGHT_DECAY_BIAS to WEIGHT_DECAY so that bias optimizer 31 | # hyperparameters are by default exactly the same as for regular 32 | # weights. 33 | lr = cfg.SOLVER.BASE_LR * cfg.SOLVER.BIAS_LR_FACTOR 34 | weight_decay = cfg.SOLVER.WEIGHT_DECAY_BIAS 35 | 36 | if key in masked_param: 37 | filter_indices = masked_param_inds[masked_param.index(key)] 38 | else: 39 | filter_indices = [] 40 | params += [{"params": [value], "lr": lr, "weight_decay": weight_decay, 41 | "filter_indices": [filter_indices]}] 42 | 43 | if cfg.SOLVER.NAME == 'MaskedSGD': 44 | optimizer = MaskedSGD(params, lr, momentum=cfg.SOLVER.MOMENTUM) 45 | else: 46 | optimizer = torch.optim.SGD(params, lr, momentum=cfg.SOLVER.MOMENTUM) 47 | return optimizer 48 | 49 | 50 | def build_lr_scheduler( 51 | cfg: CfgNode, optimizer: torch.optim.Optimizer 52 | ) -> torch.optim.lr_scheduler._LRScheduler: 53 | """ 54 | Build a LR scheduler from config. 55 | """ 56 | name = cfg.SOLVER.LR_SCHEDULER_NAME 57 | if name == "WarmupMultiStepLR": 58 | return WarmupMultiStepLR( 59 | optimizer, 60 | cfg.SOLVER.STEPS, 61 | cfg.SOLVER.GAMMA, 62 | warmup_factor=cfg.SOLVER.WARMUP_FACTOR, 63 | warmup_iters=cfg.SOLVER.WARMUP_ITERS, 64 | warmup_method=cfg.SOLVER.WARMUP_METHOD, 65 | ) 66 | elif name == "WarmupCosineLR": 67 | return WarmupCosineLR( 68 | optimizer, 69 | cfg.SOLVER.MAX_ITER, 70 | warmup_factor=cfg.SOLVER.WARMUP_FACTOR, 71 | warmup_iters=cfg.SOLVER.WARMUP_ITERS, 72 | warmup_method=cfg.SOLVER.WARMUP_METHOD, 73 | ) 74 | else: 75 | raise ValueError("Unknown LR scheduler: {}".format(name)) 76 | -------------------------------------------------------------------------------- /fsdet/data/datasets/pascal_voc.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 3 | 4 | from fvcore.common.file_io import PathManager 5 | import os 6 | import numpy as np 7 | import xml.etree.ElementTree as ET 8 | 9 | from fsdet.structures import BoxMode 10 | from fsdet.data import DatasetCatalog, MetadataCatalog 11 | 12 | 13 | __all__ = ["register_pascal_voc"] 14 | 15 | 16 | # fmt: off 17 | CLASS_NAMES = [ 18 | "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", 19 | "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", 20 | "pottedplant", "sheep", "sofa", "train", "tvmonitor", 21 | ] 22 | # fmt: on 23 | 24 | 25 | def load_voc_instances(dirname: str, split: str): 26 | """ 27 | Load Pascal VOC detection annotations to Detectron2 format. 28 | 29 | Args: 30 | dirname: Contain "Annotations", "ImageSets", "JPEGImages" 31 | split (str): one of "train", "test", "val", "trainval" 32 | """ 33 | with PathManager.open(os.path.join(dirname, "ImageSets", "Main", split + ".txt")) as f: 34 | fileids = np.loadtxt(f, dtype=np.str) 35 | 36 | dicts = [] 37 | for fileid in fileids: 38 | anno_file = os.path.join(dirname, "Annotations", fileid + ".xml") 39 | jpeg_file = os.path.join(dirname, "JPEGImages", fileid + ".jpg") 40 | 41 | tree = ET.parse(anno_file) 42 | 43 | r = { 44 | "file_name": jpeg_file, 45 | "image_id": fileid, 46 | "height": int(tree.findall("./size/height")[0].text), 47 | "width": int(tree.findall("./size/width")[0].text), 48 | } 49 | instances = [] 50 | 51 | for obj in tree.findall("object"): 52 | cls = obj.find("name").text 53 | # We include "difficult" samples in training. 54 | # Based on limited experiments, they don't hurt accuracy. 55 | # difficult = int(obj.find("difficult").text) 56 | # if difficult == 1: 57 | # continue 58 | bbox = obj.find("bndbox") 59 | bbox = [float(bbox.find(x).text) for x in ["xmin", "ymin", "xmax", "ymax"]] 60 | # Original annotations are integers in the range [1, W or H] 61 | # Assuming they mean 1-based pixel indices (inclusive), 62 | # a box with annotation (xmin=1, xmax=W) covers the whole image. 63 | # In coordinate space this is represented by (xmin=0, xmax=W) 64 | bbox[0] -= 1.0 65 | bbox[1] -= 1.0 66 | instances.append( 67 | {"category_id": CLASS_NAMES.index(cls), "bbox": bbox, "bbox_mode": BoxMode.XYXY_ABS} 68 | ) 69 | r["annotations"] = instances 70 | dicts.append(r) 71 | return dicts 72 | 73 | 74 | def register_pascal_voc(name, dirname, split, year): 75 | DatasetCatalog.register(name, lambda: load_voc_instances(dirname, split)) 76 | MetadataCatalog.get(name).set( 77 | thing_classes=CLASS_NAMES, dirname=dirname, year=year, split=split 78 | ) 79 | -------------------------------------------------------------------------------- /fsdet/layers/csrc/vision.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | 3 | #include 4 | #include "ROIAlign/ROIAlign.h" 5 | #include "ROIAlignRotated/ROIAlignRotated.h" 6 | #include "box_iou_rotated/box_iou_rotated.h" 7 | #include "deformable/deform_conv.h" 8 | #include "nms_rotated/nms_rotated.h" 9 | 10 | namespace fsdet { 11 | 12 | #ifdef WITH_CUDA 13 | extern int get_cudart_version(); 14 | #endif 15 | 16 | std::string get_cuda_version() { 17 | #ifdef WITH_CUDA 18 | std::ostringstream oss; 19 | 20 | // copied from 21 | // https://github.com/pytorch/pytorch/blob/master/aten/src/ATen/cuda/detail/CUDAHooks.cpp#L231 22 | auto printCudaStyleVersion = [&](int v) { 23 | oss << (v / 1000) << "." << (v / 10 % 100); 24 | if (v % 10 != 0) { 25 | oss << "." << (v % 10); 26 | } 27 | }; 28 | printCudaStyleVersion(get_cudart_version()); 29 | return oss.str(); 30 | #else 31 | return std::string("not available"); 32 | #endif 33 | } 34 | 35 | // similar to 36 | // https://github.com/pytorch/pytorch/blob/master/aten/src/ATen/Version.cpp 37 | std::string get_compiler_version() { 38 | std::ostringstream ss; 39 | #if defined(__GNUC__) 40 | #ifndef __clang__ 41 | { ss << "GCC " << __GNUC__ << "." << __GNUC_MINOR__; } 42 | #endif 43 | #endif 44 | 45 | #if defined(__clang_major__) 46 | { 47 | ss << "clang " << __clang_major__ << "." << __clang_minor__ << "." 48 | << __clang_patchlevel__; 49 | } 50 | #endif 51 | 52 | #if defined(_MSC_VER) 53 | { ss << "MSVC " << _MSC_FULL_VER; } 54 | #endif 55 | return ss.str(); 56 | } 57 | 58 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 59 | m.def("get_compiler_version", &get_compiler_version, "get_compiler_version"); 60 | m.def("get_cuda_version", &get_cuda_version, "get_cuda_version"); 61 | 62 | m.def("box_iou_rotated", &box_iou_rotated, "IoU for rotated boxes"); 63 | 64 | m.def("deform_conv_forward", &deform_conv_forward, "deform_conv_forward"); 65 | m.def( 66 | "deform_conv_backward_input", 67 | &deform_conv_backward_input, 68 | "deform_conv_backward_input"); 69 | m.def( 70 | "deform_conv_backward_filter", 71 | &deform_conv_backward_filter, 72 | "deform_conv_backward_filter"); 73 | m.def( 74 | "modulated_deform_conv_forward", 75 | &modulated_deform_conv_forward, 76 | "modulated_deform_conv_forward"); 77 | m.def( 78 | "modulated_deform_conv_backward", 79 | &modulated_deform_conv_backward, 80 | "modulated_deform_conv_backward"); 81 | 82 | m.def("nms_rotated", &nms_rotated, "NMS for rotated boxes"); 83 | 84 | m.def("roi_align_forward", &ROIAlign_forward, "ROIAlign_forward"); 85 | m.def("roi_align_backward", &ROIAlign_backward, "ROIAlign_backward"); 86 | 87 | m.def( 88 | "roi_align_rotated_forward", 89 | &ROIAlignRotated_forward, 90 | "Forward pass for Rotated ROI-Align Operator"); 91 | m.def( 92 | "roi_align_rotated_backward", 93 | &ROIAlignRotated_backward, 94 | "Backward pass for Rotated ROI-Align Operator"); 95 | } 96 | 97 | } // namespace fsdet 98 | -------------------------------------------------------------------------------- /fsdet/utils/collect_env.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | import numpy as np 3 | import os 4 | import subprocess 5 | import sys 6 | from collections import defaultdict 7 | import PIL 8 | import torch 9 | import torchvision 10 | from tabulate import tabulate 11 | 12 | __all__ = ["collect_env_info"] 13 | 14 | 15 | def collect_torch_env(): 16 | try: 17 | import torch.__config__ 18 | 19 | return torch.__config__.show() 20 | except ImportError: 21 | # compatible with older versions of pytorch 22 | from torch.utils.collect_env import get_pretty_env_info 23 | 24 | return get_pretty_env_info() 25 | 26 | 27 | def get_env_module(): 28 | var_name = "FSDET_ENV_MODULE" 29 | return var_name, os.environ.get(var_name, "") 30 | 31 | 32 | def collect_env_info(): 33 | data = [] 34 | data.append(("sys.platform", sys.platform)) 35 | data.append(("Python", sys.version.replace("\n", ""))) 36 | data.append(("Numpy", np.__version__)) 37 | try: 38 | from fsdet import _C 39 | except ImportError: 40 | data.append(("fsdet._C", "failed to import")) 41 | else: 42 | data.append(("FsDet Compiler", _C.get_compiler_version())) 43 | data.append(("FsDet CUDA Compiler", _C.get_cuda_version())) 44 | 45 | data.append(get_env_module()) 46 | data.append(("PyTorch", torch.__version__)) 47 | data.append(("PyTorch Debug Build", torch.version.debug)) 48 | try: 49 | data.append(("torchvision", torchvision.__version__)) 50 | except AttributeError: 51 | data.append(("torchvision", "unknown")) 52 | 53 | has_cuda = torch.cuda.is_available() 54 | data.append(("CUDA available", has_cuda)) 55 | if has_cuda: 56 | devices = defaultdict(list) 57 | for k in range(torch.cuda.device_count()): 58 | devices[torch.cuda.get_device_name(k)].append(str(k)) 59 | for name, devids in devices.items(): 60 | data.append(("GPU " + ",".join(devids), name)) 61 | 62 | from torch.utils.cpp_extension import CUDA_HOME 63 | 64 | data.append(("CUDA_HOME", str(CUDA_HOME))) 65 | 66 | if CUDA_HOME is not None and os.path.isdir(CUDA_HOME): 67 | try: 68 | nvcc = os.path.join(CUDA_HOME, "bin", "nvcc") 69 | nvcc = subprocess.check_output("'{}' -V | tail -n1".format(nvcc), shell=True) 70 | nvcc = nvcc.decode("utf-8").strip() 71 | except subprocess.SubprocessError: 72 | nvcc = "Not Available" 73 | data.append(("NVCC", nvcc)) 74 | 75 | cuda_arch_list = os.environ.get("TORCH_CUDA_ARCH_LIST", None) 76 | if cuda_arch_list: 77 | data.append(("TORCH_CUDA_ARCH_LIST", cuda_arch_list)) 78 | data.append(("Pillow", PIL.__version__)) 79 | 80 | try: 81 | import cv2 82 | 83 | data.append(("cv2", cv2.__version__)) 84 | except ImportError: 85 | pass 86 | env_str = tabulate(data) + "\n" 87 | env_str += collect_torch_env() 88 | return env_str 89 | 90 | 91 | if __name__ == "__main__": 92 | print(collect_env_info()) 93 | -------------------------------------------------------------------------------- /fsdet/layers/csrc/ROIAlignRotated/ROIAlignRotated.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | #pragma once 3 | #include 4 | 5 | namespace fsdet { 6 | 7 | at::Tensor ROIAlignRotated_forward_cpu( 8 | const at::Tensor& input, 9 | const at::Tensor& rois, 10 | const float spatial_scale, 11 | const int pooled_height, 12 | const int pooled_width, 13 | const int sampling_ratio); 14 | 15 | at::Tensor ROIAlignRotated_backward_cpu( 16 | const at::Tensor& grad, 17 | const at::Tensor& rois, 18 | const float spatial_scale, 19 | const int pooled_height, 20 | const int pooled_width, 21 | const int batch_size, 22 | const int channels, 23 | const int height, 24 | const int width, 25 | const int sampling_ratio); 26 | 27 | #ifdef WITH_CUDA 28 | at::Tensor ROIAlignRotated_forward_cuda( 29 | const at::Tensor& input, 30 | const at::Tensor& rois, 31 | const float spatial_scale, 32 | const int pooled_height, 33 | const int pooled_width, 34 | const int sampling_ratio); 35 | 36 | at::Tensor ROIAlignRotated_backward_cuda( 37 | const at::Tensor& grad, 38 | const at::Tensor& rois, 39 | const float spatial_scale, 40 | const int pooled_height, 41 | const int pooled_width, 42 | const int batch_size, 43 | const int channels, 44 | const int height, 45 | const int width, 46 | const int sampling_ratio); 47 | #endif 48 | 49 | // Interface for Python 50 | inline at::Tensor ROIAlignRotated_forward( 51 | const at::Tensor& input, 52 | const at::Tensor& rois, 53 | const float spatial_scale, 54 | const int pooled_height, 55 | const int pooled_width, 56 | const int sampling_ratio) { 57 | if (input.type().is_cuda()) { 58 | #ifdef WITH_CUDA 59 | return ROIAlignRotated_forward_cuda( 60 | input, 61 | rois, 62 | spatial_scale, 63 | pooled_height, 64 | pooled_width, 65 | sampling_ratio); 66 | #else 67 | AT_ERROR("Not compiled with GPU support"); 68 | #endif 69 | } 70 | return ROIAlignRotated_forward_cpu( 71 | input, rois, spatial_scale, pooled_height, pooled_width, sampling_ratio); 72 | } 73 | 74 | inline at::Tensor ROIAlignRotated_backward( 75 | const at::Tensor& grad, 76 | const at::Tensor& rois, 77 | const float spatial_scale, 78 | const int pooled_height, 79 | const int pooled_width, 80 | const int batch_size, 81 | const int channels, 82 | const int height, 83 | const int width, 84 | const int sampling_ratio) { 85 | if (grad.type().is_cuda()) { 86 | #ifdef WITH_CUDA 87 | return ROIAlignRotated_backward_cuda( 88 | grad, 89 | rois, 90 | spatial_scale, 91 | pooled_height, 92 | pooled_width, 93 | batch_size, 94 | channels, 95 | height, 96 | width, 97 | sampling_ratio); 98 | #else 99 | AT_ERROR("Not compiled with GPU support"); 100 | #endif 101 | } 102 | return ROIAlignRotated_backward_cpu( 103 | grad, 104 | rois, 105 | spatial_scale, 106 | pooled_height, 107 | pooled_width, 108 | batch_size, 109 | channels, 110 | height, 111 | width, 112 | sampling_ratio); 113 | } 114 | 115 | } // namespace fsdet 116 | -------------------------------------------------------------------------------- /fsdet/engine/launch.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | import logging 3 | import torch 4 | import torch.distributed as dist 5 | import torch.multiprocessing as mp 6 | 7 | from fsdet.utils import comm 8 | 9 | __all__ = ["launch"] 10 | 11 | 12 | def _find_free_port(): 13 | import socket 14 | 15 | sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 16 | # Binding to port 0 will cause the OS to find an available port for us 17 | sock.bind(("", 0)) 18 | port = sock.getsockname()[1] 19 | sock.close() 20 | # NOTE: there is still a chance the port could be taken by other processes. 21 | return port 22 | 23 | 24 | def launch(main_func, num_gpus_per_machine, num_machines=1, machine_rank=0, dist_url=None, args=()): 25 | """ 26 | Args: 27 | main_func: a function that will be called by `main_func(*args)` 28 | num_machines (int): the total number of machines 29 | machine_rank (int): the rank of this machine (one per machine) 30 | dist_url (str): url to connect to for distributed training, including protocol 31 | e.g. "tcp://127.0.0.1:8686". 32 | Can be set to auto to automatically select a free port on localhost 33 | args (tuple): arguments passed to main_func 34 | """ 35 | world_size = num_machines * num_gpus_per_machine 36 | if world_size > 1: 37 | # https://github.com/pytorch/pytorch/pull/14391 38 | # TODO prctl in spawned processes 39 | 40 | if dist_url == "auto": 41 | assert num_machines == 1, "dist_url=auto cannot work with distributed training." 42 | port = _find_free_port() 43 | dist_url = f"tcp://127.0.0.1:{port}" 44 | 45 | mp.spawn( 46 | _distributed_worker, 47 | nprocs=num_gpus_per_machine, 48 | args=(main_func, world_size, num_gpus_per_machine, machine_rank, dist_url, args), 49 | daemon=False, 50 | ) 51 | else: 52 | main_func(*args) 53 | 54 | 55 | def _distributed_worker( 56 | local_rank, main_func, world_size, num_gpus_per_machine, machine_rank, dist_url, args 57 | ): 58 | assert torch.cuda.is_available(), "cuda is not available. Please check your installation." 59 | global_rank = machine_rank * num_gpus_per_machine + local_rank 60 | try: 61 | dist.init_process_group( 62 | backend="NCCL", init_method=dist_url, world_size=world_size, rank=global_rank 63 | ) 64 | except Exception as e: 65 | logger = logging.getLogger(__name__) 66 | logger.error("Process group URL: {}".format(dist_url)) 67 | raise e 68 | # synchronize is needed here to prevent a possible timeout after calling init_process_group 69 | # See: https://github.com/facebookresearch/maskrcnn-benchmark/issues/172 70 | comm.synchronize() 71 | 72 | assert num_gpus_per_machine <= torch.cuda.device_count() 73 | torch.cuda.set_device(local_rank) 74 | 75 | # Setup the local process group (which contains ranks within the same machine) 76 | assert comm._LOCAL_PROCESS_GROUP is None 77 | num_machines = world_size // num_gpus_per_machine 78 | for i in range(num_machines): 79 | ranks_on_i = list(range(i * num_gpus_per_machine, (i + 1) * num_gpus_per_machine)) 80 | pg = dist.new_group(ranks_on_i) 81 | if i == machine_rank: 82 | comm._LOCAL_PROCESS_GROUP = pg 83 | 84 | main_func(*args) 85 | -------------------------------------------------------------------------------- /fsdet/layers/roi_align_rotated.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | from torch import nn 3 | from torch.autograd import Function 4 | from torch.autograd.function import once_differentiable 5 | from torch.nn.modules.utils import _pair 6 | 7 | from fsdet import _C 8 | 9 | 10 | class _ROIAlignRotated(Function): 11 | @staticmethod 12 | def forward(ctx, input, roi, output_size, spatial_scale, sampling_ratio): 13 | ctx.save_for_backward(roi) 14 | ctx.output_size = _pair(output_size) 15 | ctx.spatial_scale = spatial_scale 16 | ctx.sampling_ratio = sampling_ratio 17 | ctx.input_shape = input.size() 18 | output = _C.roi_align_rotated_forward( 19 | input, roi, spatial_scale, output_size[0], output_size[1], sampling_ratio 20 | ) 21 | return output 22 | 23 | @staticmethod 24 | @once_differentiable 25 | def backward(ctx, grad_output): 26 | rois, = ctx.saved_tensors 27 | output_size = ctx.output_size 28 | spatial_scale = ctx.spatial_scale 29 | sampling_ratio = ctx.sampling_ratio 30 | bs, ch, h, w = ctx.input_shape 31 | grad_input = _C.roi_align_rotated_backward( 32 | grad_output, 33 | rois, 34 | spatial_scale, 35 | output_size[0], 36 | output_size[1], 37 | bs, 38 | ch, 39 | h, 40 | w, 41 | sampling_ratio, 42 | ) 43 | return grad_input, None, None, None, None, None 44 | 45 | 46 | roi_align_rotated = _ROIAlignRotated.apply 47 | 48 | 49 | class ROIAlignRotated(nn.Module): 50 | def __init__(self, output_size, spatial_scale, sampling_ratio): 51 | """ 52 | Args: 53 | output_size (tuple): h, w 54 | spatial_scale (float): scale the input boxes by this number 55 | sampling_ratio (int): number of inputs samples to take for each output 56 | sample. 0 to take samples densely. 57 | 58 | Note: 59 | ROIAlignRotated supports continuous coordinate by default: 60 | Given a continuous coordinate c, its two neighboring pixel indices (in our 61 | pixel model) are computed by floor(c - 0.5) and ceil(c - 0.5). For example, 62 | c=1.3 has pixel neighbors with discrete indices [0] and [1] (which are sampled 63 | from the underlying signal at continuous coordinates 0.5 and 1.5). 64 | """ 65 | super(ROIAlignRotated, self).__init__() 66 | self.output_size = output_size 67 | self.spatial_scale = spatial_scale 68 | self.sampling_ratio = sampling_ratio 69 | 70 | def forward(self, input, rois): 71 | """ 72 | Args: 73 | input: NCHW images 74 | rois: Bx6 boxes. First column is the index into N. 75 | The other 5 columns are (x_ctr, y_ctr, width, height, angle_degrees). 76 | """ 77 | assert rois.dim() == 2 and rois.size(1) == 6 78 | return roi_align_rotated( 79 | input, rois, self.output_size, self.spatial_scale, self.sampling_ratio 80 | ) 81 | 82 | def __repr__(self): 83 | tmpstr = self.__class__.__name__ + "(" 84 | tmpstr += "output_size=" + str(self.output_size) 85 | tmpstr += ", spatial_scale=" + str(self.spatial_scale) 86 | tmpstr += ", sampling_ratio=" + str(self.sampling_ratio) 87 | tmpstr += ")" 88 | return tmpstr 89 | -------------------------------------------------------------------------------- /tools/visualize_json_results.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 3 | 4 | import argparse 5 | import json 6 | import numpy as np 7 | import os 8 | from collections import defaultdict 9 | import cv2 10 | import tqdm 11 | from fvcore.common.file_io import PathManager 12 | 13 | from fsdet.data import DatasetCatalog, MetadataCatalog 14 | from fsdet.structures import Boxes, BoxMode, Instances 15 | from fsdet.utils.logger import setup_logger 16 | from fsdet.utils.visualizer import Visualizer 17 | 18 | 19 | def create_instances(predictions, image_size): 20 | ret = Instances(image_size) 21 | 22 | score = np.asarray([x["score"] for x in predictions]) 23 | chosen = (score > args.conf_threshold).nonzero()[0] 24 | score = score[chosen] 25 | bbox = np.asarray([predictions[i]["bbox"] for i in chosen]) 26 | bbox = BoxMode.convert(bbox, BoxMode.XYWH_ABS, BoxMode.XYXY_ABS) 27 | 28 | labels = np.asarray([dataset_id_map(predictions[i]["category_id"]) for i in chosen]) 29 | 30 | ret.scores = score 31 | ret.pred_boxes = Boxes(bbox) 32 | ret.pred_classes = labels 33 | 34 | try: 35 | ret.pred_masks = [predictions[i]["segmentation"] for i in chosen] 36 | except KeyError: 37 | pass 38 | return ret 39 | 40 | 41 | if __name__ == "__main__": 42 | parser = argparse.ArgumentParser( 43 | description="A script that visualizes the json predictions from COCO or LVIS dataset." 44 | ) 45 | parser.add_argument("--input", required=True, help="JSON file produced by the model") 46 | parser.add_argument("--output", required=True, help="output directory") 47 | parser.add_argument("--dataset", help="name of the dataset", default="coco_2017_val") 48 | parser.add_argument("--conf-threshold", default=0.5, type=float, help="confidence threshold") 49 | args = parser.parse_args() 50 | 51 | logger = setup_logger() 52 | 53 | with PathManager.open(args.input, "r") as f: 54 | predictions = json.load(f) 55 | 56 | pred_by_image = defaultdict(list) 57 | for p in predictions: 58 | pred_by_image[p["image_id"]].append(p) 59 | 60 | dicts = list(DatasetCatalog.get(args.dataset)) 61 | metadata = MetadataCatalog.get(args.dataset) 62 | if hasattr(metadata, "thing_dataset_id_to_contiguous_id"): 63 | 64 | def dataset_id_map(ds_id): 65 | return metadata.thing_dataset_id_to_contiguous_id[ds_id] 66 | 67 | elif "lvis" in args.dataset: 68 | # LVIS results are in the same format as COCO results, but have a different 69 | # mapping from dataset category id to contiguous category id in [0, #categories - 1] 70 | def dataset_id_map(ds_id): 71 | return ds_id - 1 72 | 73 | else: 74 | raise ValueError("Unsupported dataset: {}".format(args.dataset)) 75 | 76 | os.makedirs(args.output, exist_ok=True) 77 | 78 | for dic in tqdm.tqdm(dicts): 79 | img = cv2.imread(dic["file_name"], cv2.IMREAD_COLOR)[:, :, ::-1] 80 | basename = os.path.basename(dic["file_name"]) 81 | 82 | predictions = create_instances(pred_by_image[dic["image_id"]], img.shape[:2]) 83 | vis = Visualizer(img, metadata) 84 | vis_pred = vis.draw_instance_predictions(predictions).get_image() 85 | 86 | vis = Visualizer(img, metadata) 87 | vis_gt = vis.draw_dataset_dict(dic).get_image() 88 | 89 | concat = np.concatenate((vis_pred, vis_gt), axis=1) 90 | cv2.imwrite(os.path.join(args.output, basename), concat[:, :, ::-1]) 91 | -------------------------------------------------------------------------------- /fsdet/layers/csrc/ROIAlign/ROIAlign.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | #pragma once 3 | #include 4 | 5 | namespace fsdet { 6 | 7 | at::Tensor ROIAlign_forward_cpu( 8 | const at::Tensor& input, 9 | const at::Tensor& rois, 10 | const float spatial_scale, 11 | const int pooled_height, 12 | const int pooled_width, 13 | const int sampling_ratio, 14 | bool aligned); 15 | 16 | at::Tensor ROIAlign_backward_cpu( 17 | const at::Tensor& grad, 18 | const at::Tensor& rois, 19 | const float spatial_scale, 20 | const int pooled_height, 21 | const int pooled_width, 22 | const int batch_size, 23 | const int channels, 24 | const int height, 25 | const int width, 26 | const int sampling_ratio, 27 | bool aligned); 28 | 29 | #ifdef WITH_CUDA 30 | at::Tensor ROIAlign_forward_cuda( 31 | const at::Tensor& input, 32 | const at::Tensor& rois, 33 | const float spatial_scale, 34 | const int pooled_height, 35 | const int pooled_width, 36 | const int sampling_ratio, 37 | bool aligned); 38 | 39 | at::Tensor ROIAlign_backward_cuda( 40 | const at::Tensor& grad, 41 | const at::Tensor& rois, 42 | const float spatial_scale, 43 | const int pooled_height, 44 | const int pooled_width, 45 | const int batch_size, 46 | const int channels, 47 | const int height, 48 | const int width, 49 | const int sampling_ratio, 50 | bool aligned); 51 | #endif 52 | 53 | // Interface for Python 54 | inline at::Tensor ROIAlign_forward( 55 | const at::Tensor& input, 56 | const at::Tensor& rois, 57 | const float spatial_scale, 58 | const int pooled_height, 59 | const int pooled_width, 60 | const int sampling_ratio, 61 | bool aligned) { 62 | if (input.type().is_cuda()) { 63 | #ifdef WITH_CUDA 64 | return ROIAlign_forward_cuda( 65 | input, 66 | rois, 67 | spatial_scale, 68 | pooled_height, 69 | pooled_width, 70 | sampling_ratio, 71 | aligned); 72 | #else 73 | AT_ERROR("Not compiled with GPU support"); 74 | #endif 75 | } 76 | return ROIAlign_forward_cpu( 77 | input, 78 | rois, 79 | spatial_scale, 80 | pooled_height, 81 | pooled_width, 82 | sampling_ratio, 83 | aligned); 84 | } 85 | 86 | inline at::Tensor ROIAlign_backward( 87 | const at::Tensor& grad, 88 | const at::Tensor& rois, 89 | const float spatial_scale, 90 | const int pooled_height, 91 | const int pooled_width, 92 | const int batch_size, 93 | const int channels, 94 | const int height, 95 | const int width, 96 | const int sampling_ratio, 97 | bool aligned) { 98 | if (grad.type().is_cuda()) { 99 | #ifdef WITH_CUDA 100 | return ROIAlign_backward_cuda( 101 | grad, 102 | rois, 103 | spatial_scale, 104 | pooled_height, 105 | pooled_width, 106 | batch_size, 107 | channels, 108 | height, 109 | width, 110 | sampling_ratio, 111 | aligned); 112 | #else 113 | AT_ERROR("Not compiled with GPU support"); 114 | #endif 115 | } 116 | return ROIAlign_backward_cpu( 117 | grad, 118 | rois, 119 | spatial_scale, 120 | pooled_height, 121 | pooled_width, 122 | batch_size, 123 | channels, 124 | height, 125 | width, 126 | sampling_ratio, 127 | aligned); 128 | } 129 | 130 | } // namespace fsdet 131 | -------------------------------------------------------------------------------- /fsdet/config/config.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 3 | 4 | import logging 5 | from fvcore.common.config import CfgNode as _CfgNode 6 | 7 | 8 | class CfgNode(_CfgNode): 9 | """ 10 | The same as `fvcore.common.config.CfgNode`, but different in: 11 | 12 | 1. Use unsafe yaml loading by default. 13 | Note that this may lead to arbitrary code execution: you must not 14 | load a config file from untrusted sources before manually inspecting 15 | the content of the file. 16 | 2. Support config versioning. 17 | When attempting to merge an old config, it will convert the old config automatically. 18 | 19 | """ 20 | 21 | # Note that the default value of allow_unsafe is changed to True 22 | def merge_from_file(self, cfg_filename: str, allow_unsafe: bool = True) -> None: 23 | loaded_cfg = _CfgNode.load_yaml_with_base(cfg_filename, allow_unsafe=allow_unsafe) 24 | loaded_cfg = type(self)(loaded_cfg) 25 | 26 | # defaults.py needs to import CfgNode 27 | from .defaults import _C 28 | 29 | latest_ver = _C.VERSION 30 | assert ( 31 | latest_ver == self.VERSION 32 | ), "CfgNode.merge_from_file is only allowed on a config of latest version!" 33 | 34 | logger = logging.getLogger(__name__) 35 | 36 | loaded_ver = loaded_cfg.get("VERSION", None) 37 | if loaded_ver is None: 38 | from .compat import guess_version 39 | 40 | loaded_ver = guess_version(loaded_cfg, cfg_filename) 41 | assert loaded_ver <= self.VERSION, "Cannot merge a v{} config into a v{} config.".format( 42 | loaded_ver, self.VERSION 43 | ) 44 | 45 | if loaded_ver == self.VERSION: 46 | self.merge_from_other_cfg(loaded_cfg) 47 | else: 48 | # compat.py needs to import CfgNode 49 | from .compat import upgrade_config, downgrade_config 50 | 51 | logger.warning( 52 | "Loading an old v{} config file '{}' by automatically upgrading to v{}. " 53 | "See docs/CHANGELOG.md for instructions to update your files.".format( 54 | loaded_ver, cfg_filename, self.VERSION 55 | ) 56 | ) 57 | # To convert, first obtain a full config at an old version 58 | old_self = downgrade_config(self, to_version=loaded_ver) 59 | old_self.merge_from_other_cfg(loaded_cfg) 60 | new_config = upgrade_config(old_self) 61 | self.clear() 62 | self.update(new_config) 63 | 64 | 65 | global_cfg = CfgNode() 66 | 67 | 68 | def get_cfg() -> CfgNode: 69 | """ 70 | Get a copy of the default config. 71 | 72 | Returns: 73 | a fsdet CfgNode instance. 74 | """ 75 | from .defaults import _C 76 | 77 | return _C.clone() 78 | 79 | 80 | def set_global_cfg(cfg: CfgNode) -> None: 81 | """ 82 | Let the global config point to the given cfg. 83 | 84 | Assume that the given "cfg" has the key "KEY", after calling 85 | `set_global_cfg(cfg)`, the key can be accessed by: 86 | 87 | .. code-block:: python 88 | 89 | from fsdet.config import global_cfg 90 | print(global_cfg.KEY) 91 | 92 | By using a hacky global config, you can access these configs anywhere, 93 | without having to pass the config object or the values deep into the code. 94 | This is a hacky feature introduced for quick prototyping / research exploration. 95 | """ 96 | global global_cfg 97 | global_cfg.clear() 98 | global_cfg.update(cfg) 99 | -------------------------------------------------------------------------------- /fsdet/utils/env.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | import importlib 3 | import importlib.util 4 | import logging 5 | import numpy as np 6 | import os 7 | import random 8 | import sys 9 | from datetime import datetime 10 | import torch 11 | 12 | __all__ = ["seed_all_rng"] 13 | 14 | 15 | def seed_all_rng(seed=None): 16 | """ 17 | Set the random seed for the RNG in torch, numpy and python. 18 | 19 | Args: 20 | seed (int): if None, will use a strong random seed. 21 | """ 22 | if seed is None: 23 | seed = ( 24 | os.getpid() 25 | + int(datetime.now().strftime("%S%f")) 26 | + int.from_bytes(os.urandom(2), "big") 27 | ) 28 | logger = logging.getLogger(__name__) 29 | logger.info("Using a generated random seed {}".format(seed)) 30 | np.random.seed(seed) 31 | torch.set_rng_state(torch.manual_seed(seed).get_state()) 32 | random.seed(seed) 33 | 34 | 35 | # from https://stackoverflow.com/questions/67631/how-to-import-a-module-given-the-full-path 36 | def _import_file(module_name, file_path, make_importable=False): 37 | spec = importlib.util.spec_from_file_location(module_name, file_path) 38 | module = importlib.util.module_from_spec(spec) 39 | spec.loader.exec_module(module) 40 | if make_importable: 41 | sys.modules[module_name] = module 42 | return module 43 | 44 | 45 | def _configure_libraries(): 46 | """ 47 | Configurations for some libraries. 48 | """ 49 | # An environment option to disable `import cv2` globally, 50 | # in case it leads to negative performance impact 51 | disable_cv2 = int(os.environ.get("FSDET_DISABLE_CV2", False)) 52 | if disable_cv2: 53 | sys.modules["cv2"] = None 54 | else: 55 | # Disable opencl in opencv since its interaction with cuda often has negative effects 56 | # This envvar is supported after OpenCV 3.4.0 57 | os.environ["OPENCV_OPENCL_RUNTIME"] = "disabled" 58 | try: 59 | import cv2 60 | 61 | if int(cv2.__version__.split(".")[0]) >= 3: 62 | cv2.ocl.setUseOpenCL(False) 63 | except ImportError: 64 | pass 65 | 66 | 67 | _ENV_SETUP_DONE = False 68 | 69 | 70 | def setup_environment(): 71 | """Perform environment setup work. The default setup is a no-op, but this 72 | function allows the user to specify a Python source file or a module in 73 | the $FSDET_ENV_MODULE environment variable, that performs 74 | custom setup work that may be necessary to their computing environment. 75 | """ 76 | global _ENV_SETUP_DONE 77 | if _ENV_SETUP_DONE: 78 | return 79 | _ENV_SETUP_DONE = True 80 | 81 | _configure_libraries() 82 | 83 | custom_module_path = os.environ.get("FSDET_ENV_MODULE") 84 | 85 | if custom_module_path: 86 | setup_custom_environment(custom_module_path) 87 | else: 88 | # The default setup is a no-op 89 | pass 90 | 91 | 92 | def setup_custom_environment(custom_module): 93 | """ 94 | Load custom environment setup by importing a Python source file or a 95 | module, and run the setup function. 96 | """ 97 | if custom_module.endswith(".py"): 98 | module = _import_file("fsdet.utils.env.custom_module", custom_module) 99 | else: 100 | module = importlib.import_module(custom_module) 101 | assert hasattr(module, "setup_environment") and callable(module.setup_environment), ( 102 | "Custom environment module defined in {} does not have the " 103 | "required callable attribute 'setup_environment'." 104 | ).format(custom_module) 105 | module.setup_environment() 106 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Images 2 | *.jpg 3 | *.png 4 | *.txt 5 | *.json 6 | *.diff 7 | 8 | # compilation and distribution 9 | __pycache__/ 10 | _ext 11 | *.py[cod] 12 | *$py.class 13 | *.so 14 | detectron2.egg-info/ 15 | build/ 16 | dist/ 17 | wheels/ 18 | 19 | # pytorch/python/numpy formats 20 | *.pth 21 | *.pkl 22 | *.npy 23 | 24 | # ipython/jupyter notebooks 25 | *.ipynb 26 | **/.ipynb_checkpoints/ 27 | 28 | # Editor temporaries 29 | *.swn 30 | *.swo 31 | *.swp 32 | *~ 33 | 34 | # editor settings 35 | .idea 36 | .vscode 37 | 38 | # Byte-compiled / optimized / DLL files 39 | output/ 40 | log/ 41 | checkpoints/ 42 | hrnetv2_pretrained/ 43 | __pycache__/ 44 | *.py[cod] 45 | *$py.class 46 | 47 | # C extensions 48 | *.so 49 | 50 | # Distribution / packaging 51 | .Python 52 | build/ 53 | develop-eggs/ 54 | dist/ 55 | downloads/ 56 | eggs/ 57 | .eggs/ 58 | parts/ 59 | sdist/ 60 | var/ 61 | wheels/ 62 | share/python-wheels/ 63 | *.egg-info/ 64 | .installed.cfg 65 | *.egg 66 | MANIFEST 67 | 68 | # PyInstaller 69 | # Usually these files are written by a python script from a template 70 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 71 | *.manifest 72 | *.spec 73 | 74 | # Installer logs 75 | pip-log.txt 76 | pip-delete-this-directory.txt 77 | 78 | # Unit test / coverage reports 79 | htmlcov/ 80 | .tox/ 81 | .nox/ 82 | .coverage 83 | .coverage.* 84 | .cache 85 | nosetests.xml 86 | coverage.xml 87 | *.cover 88 | *.py,cover 89 | .hypothesis/ 90 | .pytest_cache/ 91 | cover/ 92 | 93 | # Translations 94 | *.mo 95 | *.pot 96 | 97 | # Django stuff: 98 | *.log 99 | local_settings.py 100 | db.sqlite3 101 | db.sqlite3-journal 102 | 103 | # Flask stuff: 104 | instance/ 105 | .webassets-cache 106 | 107 | # Scrapy stuff: 108 | .scrapy 109 | 110 | # Sphinx documentation 111 | docs/_build/ 112 | 113 | # PyBuilder 114 | .pybuilder/ 115 | target/ 116 | 117 | # Jupyter Notebook 118 | .ipynb_checkpoints 119 | 120 | # IPython 121 | profile_default/ 122 | ipython_config.py 123 | 124 | # pyenv 125 | # For a library or package, you might want to ignore these files since the code is 126 | # intended to run in multiple environments; otherwise, check them in: 127 | # .python-version 128 | 129 | # pipenv 130 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 131 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 132 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 133 | # install all needed dependencies. 134 | #Pipfile.lock 135 | 136 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 137 | __pypackages__/ 138 | 139 | # Celery stuff 140 | celerybeat-schedule 141 | celerybeat.pid 142 | 143 | # SageMath parsed files 144 | *.sage.py 145 | 146 | # Environments 147 | .env 148 | .venv 149 | env/ 150 | venv/ 151 | ENV/ 152 | env.bak/ 153 | venv.bak/ 154 | 155 | # Spyder project settings 156 | .spyderproject 157 | .spyproject 158 | 159 | # Rope project settings 160 | .ropeproject 161 | 162 | # mkdocs documentation 163 | /site 164 | 165 | # mypy 166 | .mypy_cache/ 167 | .dmypy.json 168 | dmypy.json 169 | 170 | # Pyre type checker 171 | .pyre/ 172 | 173 | # pytype static type analyzer 174 | .pytype/ 175 | 176 | # Cython debug symbols 177 | cython_debug/ 178 | 179 | 180 | # General 181 | .DS_Store 182 | .AppleDouble 183 | .LSOverride 184 | 185 | # Icon must end with two \r 186 | Icon 187 | 188 | # Thumbnails 189 | ._* 190 | 191 | # Files that might appear in the root of a volume 192 | .DocumentRevisions-V100 193 | .fseventsd 194 | .Spotlight-V100 195 | .TemporaryItems 196 | .Trashes 197 | .VolumeIcon.icns 198 | .com.apple.timemachine.donotpresent 199 | 200 | # Directories potentially created on remote AFP share 201 | .AppleDB 202 | .AppleDesktop 203 | Network Trash Folder 204 | Temporary Items 205 | .apdisk 206 | -------------------------------------------------------------------------------- /fsdet/layers/csrc/box_iou_rotated/box_iou_rotated_cuda.cu: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include "box_iou_rotated_utils.h" 7 | 8 | namespace fsdet { 9 | 10 | // 2D block with 32 * 16 = 512 threads per block 11 | const int BLOCK_DIM_X = 32; 12 | const int BLOCK_DIM_Y = 16; 13 | 14 | template 15 | __global__ void box_iou_rotated_cuda_kernel( 16 | const int n_boxes1, 17 | const int n_boxes2, 18 | const T* dev_boxes1, 19 | const T* dev_boxes2, 20 | T* dev_ious) { 21 | const int row_start = blockIdx.x * blockDim.x; 22 | const int col_start = blockIdx.y * blockDim.y; 23 | 24 | const int row_size = min(n_boxes1 - row_start, blockDim.x); 25 | const int col_size = min(n_boxes2 - col_start, blockDim.y); 26 | 27 | __shared__ float block_boxes1[BLOCK_DIM_X * 5]; 28 | __shared__ float block_boxes2[BLOCK_DIM_Y * 5]; 29 | 30 | // It's safe to copy using threadIdx.x since BLOCK_DIM_X >= BLOCK_DIM_Y 31 | if (threadIdx.x < row_size && threadIdx.y == 0) { 32 | block_boxes1[threadIdx.x * 5 + 0] = 33 | dev_boxes1[(row_start + threadIdx.x) * 5 + 0]; 34 | block_boxes1[threadIdx.x * 5 + 1] = 35 | dev_boxes1[(row_start + threadIdx.x) * 5 + 1]; 36 | block_boxes1[threadIdx.x * 5 + 2] = 37 | dev_boxes1[(row_start + threadIdx.x) * 5 + 2]; 38 | block_boxes1[threadIdx.x * 5 + 3] = 39 | dev_boxes1[(row_start + threadIdx.x) * 5 + 3]; 40 | block_boxes1[threadIdx.x * 5 + 4] = 41 | dev_boxes1[(row_start + threadIdx.x) * 5 + 4]; 42 | } 43 | 44 | if (threadIdx.x < col_size && threadIdx.y == 0) { 45 | block_boxes2[threadIdx.x * 5 + 0] = 46 | dev_boxes2[(col_start + threadIdx.x) * 5 + 0]; 47 | block_boxes2[threadIdx.x * 5 + 1] = 48 | dev_boxes2[(col_start + threadIdx.x) * 5 + 1]; 49 | block_boxes2[threadIdx.x * 5 + 2] = 50 | dev_boxes2[(col_start + threadIdx.x) * 5 + 2]; 51 | block_boxes2[threadIdx.x * 5 + 3] = 52 | dev_boxes2[(col_start + threadIdx.x) * 5 + 3]; 53 | block_boxes2[threadIdx.x * 5 + 4] = 54 | dev_boxes2[(col_start + threadIdx.x) * 5 + 4]; 55 | } 56 | __syncthreads(); 57 | 58 | if (threadIdx.x < row_size && threadIdx.y < col_size) { 59 | int offset = (row_start + threadIdx.x) * n_boxes2 + col_start + threadIdx.y; 60 | dev_ious[offset] = single_box_iou_rotated( 61 | block_boxes1 + threadIdx.x * 5, block_boxes2 + threadIdx.y * 5); 62 | } 63 | } 64 | 65 | at::Tensor box_iou_rotated_cuda( 66 | const at::Tensor& boxes1, 67 | const at::Tensor& boxes2) { 68 | using scalar_t = float; 69 | AT_ASSERTM(boxes1.type().is_cuda(), "boxes1 must be a CUDA tensor"); 70 | AT_ASSERTM(boxes2.type().is_cuda(), "boxes2 must be a CUDA tensor"); 71 | at::cuda::CUDAGuard device_guard(boxes1.device()); 72 | 73 | int num_boxes1 = boxes1.size(0); 74 | int num_boxes2 = boxes2.size(0); 75 | 76 | at::Tensor ious = 77 | at::empty({num_boxes1 * num_boxes2}, boxes1.options().dtype(at::kFloat)); 78 | 79 | if (num_boxes1 > 0 && num_boxes2 > 0) { 80 | const int blocks_x = at::cuda::ATenCeilDiv(num_boxes1, BLOCK_DIM_X); 81 | const int blocks_y = at::cuda::ATenCeilDiv(num_boxes2, BLOCK_DIM_Y); 82 | 83 | dim3 blocks(blocks_x, blocks_y); 84 | dim3 threads(BLOCK_DIM_X, BLOCK_DIM_Y); 85 | 86 | cudaStream_t stream = at::cuda::getCurrentCUDAStream(); 87 | 88 | box_iou_rotated_cuda_kernel<<>>( 89 | num_boxes1, 90 | num_boxes2, 91 | boxes1.data_ptr(), 92 | boxes2.data_ptr(), 93 | (scalar_t*)ious.data_ptr()); 94 | 95 | AT_CUDA_CHECK(cudaGetLastError()); 96 | } 97 | 98 | // reshape from 1d array to 2d array 99 | auto shape = std::vector{num_boxes1, num_boxes2}; 100 | return ious.reshape(shape); 101 | } 102 | 103 | } // namespace fsdet 104 | -------------------------------------------------------------------------------- /fsdet/structures/image_list.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from __future__ import division 3 | from typing import Any, List, Sequence, Tuple, Union 4 | import torch 5 | from torch.nn import functional as F 6 | 7 | 8 | class ImageList(object): 9 | """ 10 | Structure that holds a list of images (of possibly 11 | varying sizes) as a single tensor. 12 | This works by padding the images to the same size, 13 | and storing in a field the original sizes of each image 14 | 15 | Attributes: 16 | image_sizes (list[tuple[int, int]]): each tuple is (h, w) 17 | """ 18 | 19 | def __init__(self, tensor: torch.Tensor, image_sizes: List[Tuple[int, int]]): 20 | """ 21 | Arguments: 22 | tensor (Tensor): of shape (N, H, W) or (N, C_1, ..., C_K, H, W) where K >= 1 23 | image_sizes (list[tuple[int, int]]): Each tuple is (h, w). 24 | """ 25 | self.tensor = tensor 26 | self.image_sizes = image_sizes 27 | 28 | def __len__(self) -> int: 29 | return len(self.image_sizes) 30 | 31 | def __getitem__(self, idx: Union[int, slice]) -> torch.Tensor: 32 | """ 33 | Access the individual image in its original size. 34 | 35 | Returns: 36 | Tensor: an image of shape (H, W) or (C_1, ..., C_K, H, W) where K >= 1 37 | """ 38 | size = self.image_sizes[idx] 39 | return self.tensor[idx, ..., : size[0], : size[1]] # type: ignore 40 | 41 | def to(self, *args: Any, **kwargs: Any) -> "ImageList": 42 | cast_tensor = self.tensor.to(*args, **kwargs) 43 | return ImageList(cast_tensor, self.image_sizes) 44 | 45 | @staticmethod 46 | def from_tensors( 47 | tensors: Sequence[torch.Tensor], size_divisibility: int = 0, pad_value: float = 0.0 48 | ) -> "ImageList": 49 | """ 50 | Args: 51 | tensors: a tuple or list of `torch.Tensors`, each of shape (Hi, Wi) or 52 | (C_1, ..., C_K, Hi, Wi) where K >= 1. The Tensors will be padded with `pad_value` 53 | so that they will have the same shape. 54 | size_divisibility (int): If `size_divisibility > 0`, also adds padding to ensure 55 | the common height and width is divisible by `size_divisibility` 56 | pad_value (float): value to pad 57 | 58 | Returns: 59 | an `ImageList`. 60 | """ 61 | assert len(tensors) > 0 62 | assert isinstance(tensors, (tuple, list)) 63 | for t in tensors: 64 | assert isinstance(t, torch.Tensor), type(t) 65 | assert t.shape[1:-2] == tensors[0].shape[1:-2], t.shape 66 | # per dimension maximum (H, W) or (C_1, ..., C_K, H, W) where K >= 1 among all tensors 67 | max_size = tuple(max(s) for s in zip(*[img.shape for img in tensors])) 68 | 69 | if size_divisibility > 0: 70 | import math 71 | 72 | stride = size_divisibility 73 | max_size = list(max_size) # type: ignore 74 | max_size[-2] = int(math.ceil(max_size[-2] / stride) * stride) # type: ignore 75 | max_size[-1] = int(math.ceil(max_size[-1] / stride) * stride) # type: ignore 76 | max_size = tuple(max_size) 77 | 78 | image_sizes = [im.shape[-2:] for im in tensors] 79 | 80 | if len(tensors) == 1: 81 | # This seems slightly (2%) faster. 82 | # TODO: check whether it's faster for multiple images as well 83 | image_size = image_sizes[0] 84 | padded = F.pad( 85 | tensors[0], 86 | [0, max_size[-1] - image_size[1], 0, max_size[-2] - image_size[0]], 87 | value=pad_value, 88 | ) 89 | batched_imgs = padded.unsqueeze_(0) 90 | else: 91 | batch_shape = (len(tensors),) + max_size 92 | batched_imgs = tensors[0].new_full(batch_shape, pad_value) 93 | for img, pad_img in zip(tensors, batched_imgs): 94 | pad_img[..., : img.shape[-2], : img.shape[-1]].copy_(img) 95 | 96 | return ImageList(batched_imgs.contiguous(), image_sizes) 97 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 3 | 4 | import glob 5 | import os 6 | import shutil 7 | from setuptools import find_packages, setup 8 | from typing import List 9 | import torch 10 | from torch.utils.cpp_extension import CUDA_HOME, CppExtension, CUDAExtension 11 | 12 | torch_ver = [int(x) for x in torch.__version__.split(".")[:2]] 13 | assert torch_ver >= [1, 3], "Requires PyTorch >= 1.3" 14 | 15 | 16 | def get_extensions(): 17 | this_dir = os.path.dirname(os.path.abspath(__file__)) 18 | extensions_dir = os.path.join(this_dir, "fsdet", "layers", "csrc") 19 | 20 | main_source = os.path.join(extensions_dir, "vision.cpp") 21 | sources = glob.glob(os.path.join(extensions_dir, "**", "*.cpp")) 22 | source_cuda = glob.glob(os.path.join(extensions_dir, "**", "*.cu")) + glob.glob( 23 | os.path.join(extensions_dir, "*.cu") 24 | ) 25 | 26 | sources = [main_source] + sources 27 | extension = CppExtension 28 | 29 | extra_compile_args = {"cxx": []} 30 | define_macros = [] 31 | 32 | if (torch.cuda.is_available() and CUDA_HOME is not None) or os.getenv("FORCE_CUDA", "0") == "1": 33 | extension = CUDAExtension 34 | sources += source_cuda 35 | define_macros += [("WITH_CUDA", None)] 36 | extra_compile_args["nvcc"] = [ 37 | "-DCUDA_HAS_FP16=1", 38 | "-D__CUDA_NO_HALF_OPERATORS__", 39 | "-D__CUDA_NO_HALF_CONVERSIONS__", 40 | "-D__CUDA_NO_HALF2_OPERATORS__", 41 | ] 42 | 43 | # It's better if pytorch can do this by default .. 44 | CC = os.environ.get("CC", None) 45 | if CC is not None: 46 | extra_compile_args["nvcc"].append("-ccbin={}".format(CC)) 47 | 48 | include_dirs = [extensions_dir] 49 | 50 | ext_modules = [ 51 | extension( 52 | "fsdet._C", 53 | sources, 54 | include_dirs=include_dirs, 55 | define_macros=define_macros, 56 | extra_compile_args=extra_compile_args, 57 | ) 58 | ] 59 | 60 | return ext_modules 61 | 62 | 63 | def get_model_zoo_configs() -> List[str]: 64 | """ 65 | Return a list of configs to include in package for model zoo. Copy over these configs inside 66 | fsdet/model_zoo. 67 | """ 68 | 69 | # Use absolute paths while symlinking. 70 | source_configs_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), "configs") 71 | destination = os.path.join( 72 | os.path.dirname(os.path.realpath(__file__)), "fsdet", "model_zoo", "configs" 73 | ) 74 | # Symlink the config directory inside package to have a cleaner pip install. 75 | if os.path.exists(destination): 76 | # Remove stale symlink/directory from a previous build. 77 | if os.path.islink(destination): 78 | os.unlink(destination) 79 | else: 80 | shutil.rmtree(destination) 81 | 82 | try: 83 | os.symlink(source_configs_dir, destination) 84 | except OSError: 85 | # Fall back to copying if symlink fails: ex. on Windows. 86 | shutil.copytree(source_configs_dir, destination) 87 | 88 | config_paths = glob.glob("configs/**/*.yaml", recursive=True) 89 | return config_paths 90 | 91 | 92 | setup( 93 | name="FsDet", 94 | version="0.1", 95 | author="BDD", 96 | url="https://github.com/ucbdrive/few-shot-object-detection", 97 | description="A repository for few-shot object detection.", 98 | packages=find_packages(exclude=("configs", "tests")), 99 | package_data={"fsdet.model_zoo": get_model_zoo_configs()}, 100 | python_requires=">=3.6", 101 | install_requires=[ 102 | "termcolor>=1.1", 103 | "Pillow>=6.0", 104 | "yacs>=0.1.6", 105 | "tabulate", 106 | "cloudpickle", 107 | "matplotlib", 108 | "tqdm>4.29.0", 109 | "tensorboard", 110 | "imagesize", 111 | ], 112 | extras_require={"all": ["shapely", "psutil"]}, 113 | ext_modules=get_extensions(), 114 | cmdclass={"build_ext": torch.utils.cpp_extension.BuildExtension}, 115 | ) 116 | -------------------------------------------------------------------------------- /fsdet/utils/colormap.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | 3 | """ 4 | An awesome colormap for really neat visualizations. 5 | Copied from Detectron, and removed gray colors. 6 | """ 7 | 8 | import numpy as np 9 | 10 | __all__ = ["colormap", "random_color"] 11 | 12 | # fmt: off 13 | # RGB: 14 | _COLORS = np.array( 15 | [ 16 | 0.000, 0.447, 0.741, 17 | 0.850, 0.325, 0.098, 18 | 0.929, 0.694, 0.125, 19 | 0.494, 0.184, 0.556, 20 | 0.466, 0.674, 0.188, 21 | 0.301, 0.745, 0.933, 22 | 0.635, 0.078, 0.184, 23 | 0.300, 0.300, 0.300, 24 | 0.600, 0.600, 0.600, 25 | 1.000, 0.000, 0.000, 26 | 1.000, 0.500, 0.000, 27 | 0.749, 0.749, 0.000, 28 | 0.000, 1.000, 0.000, 29 | 0.000, 0.000, 1.000, 30 | 0.667, 0.000, 1.000, 31 | 0.333, 0.333, 0.000, 32 | 0.333, 0.667, 0.000, 33 | 0.333, 1.000, 0.000, 34 | 0.667, 0.333, 0.000, 35 | 0.667, 0.667, 0.000, 36 | 0.667, 1.000, 0.000, 37 | 1.000, 0.333, 0.000, 38 | 1.000, 0.667, 0.000, 39 | 1.000, 1.000, 0.000, 40 | 0.000, 0.333, 0.500, 41 | 0.000, 0.667, 0.500, 42 | 0.000, 1.000, 0.500, 43 | 0.333, 0.000, 0.500, 44 | 0.333, 0.333, 0.500, 45 | 0.333, 0.667, 0.500, 46 | 0.333, 1.000, 0.500, 47 | 0.667, 0.000, 0.500, 48 | 0.667, 0.333, 0.500, 49 | 0.667, 0.667, 0.500, 50 | 0.667, 1.000, 0.500, 51 | 1.000, 0.000, 0.500, 52 | 1.000, 0.333, 0.500, 53 | 1.000, 0.667, 0.500, 54 | 1.000, 1.000, 0.500, 55 | 0.000, 0.333, 1.000, 56 | 0.000, 0.667, 1.000, 57 | 0.000, 1.000, 1.000, 58 | 0.333, 0.000, 1.000, 59 | 0.333, 0.333, 1.000, 60 | 0.333, 0.667, 1.000, 61 | 0.333, 1.000, 1.000, 62 | 0.667, 0.000, 1.000, 63 | 0.667, 0.333, 1.000, 64 | 0.667, 0.667, 1.000, 65 | 0.667, 1.000, 1.000, 66 | 1.000, 0.000, 1.000, 67 | 1.000, 0.333, 1.000, 68 | 1.000, 0.667, 1.000, 69 | 0.333, 0.000, 0.000, 70 | 0.500, 0.000, 0.000, 71 | 0.667, 0.000, 0.000, 72 | 0.833, 0.000, 0.000, 73 | 1.000, 0.000, 0.000, 74 | 0.000, 0.167, 0.000, 75 | 0.000, 0.333, 0.000, 76 | 0.000, 0.500, 0.000, 77 | 0.000, 0.667, 0.000, 78 | 0.000, 0.833, 0.000, 79 | 0.000, 1.000, 0.000, 80 | 0.000, 0.000, 0.167, 81 | 0.000, 0.000, 0.333, 82 | 0.000, 0.000, 0.500, 83 | 0.000, 0.000, 0.667, 84 | 0.000, 0.000, 0.833, 85 | 0.000, 0.000, 1.000, 86 | 0.000, 0.000, 0.000, 87 | 0.143, 0.143, 0.143, 88 | 0.857, 0.857, 0.857, 89 | 1.000, 1.000, 1.000 90 | ] 91 | ).astype(np.float32).reshape(-1, 3) 92 | # fmt: on 93 | 94 | 95 | def colormap(rgb=False, maximum=255): 96 | """ 97 | Args: 98 | rgb (bool): whether to return RGB colors or BGR colors. 99 | maximum (int): either 255 or 1 100 | 101 | Returns: 102 | ndarray: a float32 array of Nx3 colors, in range [0, 255] or [0, 1] 103 | """ 104 | assert maximum in [255, 1], maximum 105 | c = _COLORS * maximum 106 | if not rgb: 107 | c = c[:, ::-1] 108 | return c 109 | 110 | 111 | def random_color(rgb=False, maximum=255): 112 | """ 113 | Args: 114 | rgb (bool): whether to return RGB colors or BGR colors. 115 | maximum (int): either 255 or 1 116 | 117 | Returns: 118 | ndarray: a vector of 3 numbers 119 | """ 120 | idx = np.random.randint(0, len(_COLORS)) 121 | ret = _COLORS[idx] * maximum 122 | if not rgb: 123 | ret = ret[::-1] 124 | return ret 125 | 126 | 127 | if __name__ == "__main__": 128 | import cv2 129 | 130 | size = 100 131 | H, W = 10, 10 132 | canvas = np.random.rand(H * size, W * size, 3).astype("float32") 133 | for h in range(H): 134 | for w in range(W): 135 | idx = h * W + w 136 | if idx >= len(_COLORS): 137 | break 138 | canvas[h * size : (h + 1) * size, w * size : (w + 1) * size] = _COLORS[idx] 139 | cv2.imshow("a", canvas) 140 | cv2.waitKey(0) 141 | -------------------------------------------------------------------------------- /fsdet/layers/roi_align.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | from torch import nn 3 | from torch.autograd import Function 4 | from torch.autograd.function import once_differentiable 5 | from torch.nn.modules.utils import _pair 6 | 7 | from fsdet import _C 8 | 9 | 10 | class _ROIAlign(Function): 11 | @staticmethod 12 | def forward(ctx, input, roi, output_size, spatial_scale, sampling_ratio, aligned): 13 | ctx.save_for_backward(roi) 14 | ctx.output_size = _pair(output_size) 15 | ctx.spatial_scale = spatial_scale 16 | ctx.sampling_ratio = sampling_ratio 17 | ctx.input_shape = input.size() 18 | ctx.aligned = aligned 19 | output = _C.roi_align_forward( 20 | input, roi, spatial_scale, output_size[0], output_size[1], sampling_ratio, aligned 21 | ) 22 | return output 23 | 24 | @staticmethod 25 | @once_differentiable 26 | def backward(ctx, grad_output): 27 | rois, = ctx.saved_tensors 28 | output_size = ctx.output_size 29 | spatial_scale = ctx.spatial_scale 30 | sampling_ratio = ctx.sampling_ratio 31 | bs, ch, h, w = ctx.input_shape 32 | grad_input = _C.roi_align_backward( 33 | grad_output, 34 | rois, 35 | spatial_scale, 36 | output_size[0], 37 | output_size[1], 38 | bs, 39 | ch, 40 | h, 41 | w, 42 | sampling_ratio, 43 | ctx.aligned, 44 | ) 45 | return grad_input, None, None, None, None, None 46 | 47 | 48 | roi_align = _ROIAlign.apply 49 | 50 | 51 | class ROIAlign(nn.Module): 52 | def __init__(self, output_size, spatial_scale, sampling_ratio, aligned=True): 53 | """ 54 | Args: 55 | output_size (tuple): h, w 56 | spatial_scale (float): scale the input boxes by this number 57 | sampling_ratio (int): number of inputs samples to take for each output 58 | sample. 0 to take samples densely. 59 | aligned (bool): if False, use the legacy implementation in 60 | Detectron. If True, align the results more perfectly. 61 | 62 | Note: 63 | The meaning of aligned=True: 64 | 65 | Given a continuous coordinate c, its two neighboring pixel indices (in our 66 | pixel model) are computed by floor(c - 0.5) and ceil(c - 0.5). For example, 67 | c=1.3 has pixel neighbors with discrete indices [0] and [1] (which are sampled 68 | from the underlying signal at continuous coordinates 0.5 and 1.5). But the original 69 | roi_align (aligned=False) does not subtract the 0.5 when computing neighboring 70 | pixel indices and therefore it uses pixels with a slightly incorrect alignment 71 | (relative to our pixel model) when performing bilinear interpolation. 72 | 73 | With `aligned=True`, 74 | we first appropriately scale the ROI and then shift it by -0.5 75 | prior to calling roi_align. This produces the correct neighbors. 76 | 77 | The difference does not make a difference to the model's performance if 78 | ROIAlign is used together with conv layers. 79 | """ 80 | super(ROIAlign, self).__init__() 81 | self.output_size = output_size 82 | self.spatial_scale = spatial_scale 83 | self.sampling_ratio = sampling_ratio 84 | self.aligned = aligned 85 | 86 | def forward(self, input, rois): 87 | """ 88 | Args: 89 | input: NCHW images 90 | rois: Bx5 boxes. First column is the index into N. The other 4 columns are xyxy. 91 | """ 92 | assert rois.dim() == 2 and rois.size(1) == 5 93 | return roi_align( 94 | input, rois, self.output_size, self.spatial_scale, self.sampling_ratio, self.aligned 95 | ) 96 | 97 | def __repr__(self): 98 | tmpstr = self.__class__.__name__ + "(" 99 | tmpstr += "output_size=" + str(self.output_size) 100 | tmpstr += ", spatial_scale=" + str(self.spatial_scale) 101 | tmpstr += ", sampling_ratio=" + str(self.sampling_ratio) 102 | tmpstr += ", aligned=" + str(self.aligned) 103 | tmpstr += ")" 104 | return tmpstr 105 | -------------------------------------------------------------------------------- /tools/visualize_data.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | import argparse 3 | import numpy as np 4 | import os 5 | from itertools import chain 6 | import cv2 7 | from PIL import Image 8 | 9 | from fsdet.config import get_cfg 10 | from fsdet.data import DatasetCatalog, MetadataCatalog, build_detection_train_loader 11 | from fsdet.data import detection_utils as utils 12 | from fsdet.utils.logger import setup_logger 13 | from fsdet.utils.visualizer import Visualizer 14 | 15 | from fsdet.data.dataset_mapper import AlbumentationMapper 16 | 17 | def setup(args): 18 | cfg = get_cfg() 19 | if args.config_file: 20 | cfg.merge_from_file(args.config_file) 21 | cfg.merge_from_list(args.opts) 22 | cfg.freeze() 23 | return cfg 24 | 25 | 26 | def parse_args(in_args=None): 27 | parser = argparse.ArgumentParser(description="Visualize ground-truth data") 28 | parser.add_argument( 29 | "--source", 30 | choices=["annotation", "dataloader"], 31 | required=True, 32 | help="visualize the annotations or the data loader (with pre-processing)", 33 | ) 34 | parser.add_argument("--config-file", default="", metavar="FILE", help="path to config file") 35 | parser.add_argument("--output-dir", default="./", help="path to output directory") 36 | parser.add_argument("--show", action="store_true", help="show output in a window") 37 | parser.add_argument( 38 | "--opts", 39 | help="Modify config options using the command-line", 40 | default=None, 41 | nargs=argparse.REMAINDER, 42 | ) 43 | return parser.parse_args(in_args) 44 | 45 | 46 | if __name__ == "__main__": 47 | args = parse_args() 48 | logger = setup_logger() 49 | logger.info("Arguments: " + str(args)) 50 | cfg = setup(args) 51 | 52 | dirname = args.output_dir 53 | os.makedirs(dirname, exist_ok=True) 54 | metadata = MetadataCatalog.get(cfg.DATASETS.TRAIN[0]) 55 | 56 | def output(vis, fname): 57 | if args.show: 58 | print(fname) 59 | cv2.imshow("window", vis.get_image()[:, :, ::-1]) 60 | cv2.waitKey() 61 | else: 62 | filepath = os.path.join(dirname, fname) 63 | # adds a random name for duplicated image name, 发现 fsdet use horizontal flip and scale(有待考证) 64 | if os.path.exists(filepath): 65 | filepath = filepath[:-4] + '_dup_' + str(np.random.randint(0, 1000)) + '.jpg' 66 | print("Saving to {} ...".format(filepath)) 67 | vis.save(filepath) 68 | 69 | scale = 2.0 if args.show else 1.0 70 | if args.source == "dataloader": 71 | mapper = None 72 | if cfg.INPUT.USE_ALBUMENTATIONS: 73 | mapper = AlbumentationMapper(cfg, is_train=True) 74 | train_data_loader = build_detection_train_loader(cfg, mapper=mapper) 75 | for batch in train_data_loader: 76 | for per_image in batch: 77 | # type(per_image), per_images.keys() 78 | # , dict_keys(['file_name', 'image_id', 'height', 'width', 'image', 'instances']) 79 | 80 | # Pytorch tensor is in (C, H, W) format 81 | img = per_image["image"].permute(1, 2, 0) 82 | if cfg.INPUT.FORMAT == "BGR": 83 | img = img[:, :, [2, 1, 0]] 84 | else: 85 | img = np.asarray(Image.fromarray(img, mode=cfg.INPUT.FORMAT).convert("RGB")) 86 | 87 | visualizer = Visualizer(img, metadata=metadata, scale=scale) 88 | # target_fields.keys() => dict_keys(['gt_boxes', 'gt_classes']) 89 | target_fields = per_image["instances"].get_fields() 90 | labels = [metadata.thing_classes[i] for i in target_fields["gt_classes"]] 91 | vis = visualizer.overlay_instances( 92 | labels=labels, 93 | boxes=target_fields.get("gt_boxes", None), 94 | ) 95 | 96 | # modified: voc I=1 in any case 97 | num_instances = len(per_image['instances']) 98 | output(vis, "I{}_".format(num_instances) + str(per_image["image_id"]) + ".jpg") 99 | else: 100 | dicts = list(chain.from_iterable([DatasetCatalog.get(k) for k in cfg.DATASETS.TRAIN])) 101 | for dic in dicts: 102 | img = utils.read_image(dic["file_name"], "RGB") 103 | visualizer = Visualizer(img, metadata=metadata, scale=scale) 104 | vis = visualizer.draw_dataset_dict(dic) 105 | output(vis, os.path.basename(dic["file_name"])) 106 | -------------------------------------------------------------------------------- /fsdet/solver/lr_scheduler.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | import math 3 | from bisect import bisect_right 4 | from typing import List 5 | import torch 6 | 7 | # NOTE: PyTorch's LR scheduler interface uses names that assume the LR changes 8 | # only on epoch boundaries. We typically use iteration based schedules instead. 9 | # As a result, "epoch" (e.g., as in self.last_epoch) should be understood to mean 10 | # "iteration" instead. 11 | 12 | # FIXME: ideally this would be achieved with a CombinedLRScheduler, separating 13 | # MultiStepLR with WarmupLR but the current LRScheduler design doesn't allow it. 14 | 15 | 16 | class WarmupMultiStepLR(torch.optim.lr_scheduler._LRScheduler): 17 | def __init__( 18 | self, 19 | optimizer: torch.optim.Optimizer, 20 | milestones: List[int], 21 | gamma: float = 0.1, 22 | warmup_factor: float = 0.001, 23 | warmup_iters: int = 1000, 24 | warmup_method: str = "linear", 25 | last_epoch: int = -1, 26 | ): 27 | if not list(milestones) == sorted(milestones): 28 | raise ValueError( 29 | "Milestones should be a list of" " increasing integers. Got {}", milestones 30 | ) 31 | self.milestones = milestones 32 | self.gamma = gamma 33 | self.warmup_factor = warmup_factor 34 | self.warmup_iters = warmup_iters 35 | self.warmup_method = warmup_method 36 | super().__init__(optimizer, last_epoch) 37 | 38 | def get_lr(self) -> List[float]: 39 | warmup_factor = _get_warmup_factor_at_iter( 40 | self.warmup_method, self.last_epoch, self.warmup_iters, self.warmup_factor 41 | ) 42 | return [ 43 | base_lr * warmup_factor * self.gamma ** bisect_right(self.milestones, self.last_epoch) 44 | for base_lr in self.base_lrs 45 | ] 46 | 47 | def _compute_values(self) -> List[float]: 48 | # The new interface 49 | return self.get_lr() 50 | 51 | 52 | class WarmupCosineLR(torch.optim.lr_scheduler._LRScheduler): 53 | def __init__( 54 | self, 55 | optimizer: torch.optim.Optimizer, 56 | max_iters: int, 57 | warmup_factor: float = 0.001, 58 | warmup_iters: int = 1000, 59 | warmup_method: str = "linear", 60 | last_epoch: int = -1, 61 | ): 62 | self.max_iters = max_iters 63 | self.warmup_factor = warmup_factor 64 | self.warmup_iters = warmup_iters 65 | self.warmup_method = warmup_method 66 | super().__init__(optimizer, last_epoch) 67 | 68 | def get_lr(self) -> List[float]: 69 | warmup_factor = _get_warmup_factor_at_iter( 70 | self.warmup_method, self.last_epoch, self.warmup_iters, self.warmup_factor 71 | ) 72 | # Different definitions of half-cosine with warmup are possible. For 73 | # simplicity we multiply the standard half-cosine schedule by the warmup 74 | # factor. An alternative is to start the period of the cosine at warmup_iters 75 | # instead of at 0. In the case that warmup_iters << max_iters the two are 76 | # very close to each other. 77 | return [ 78 | base_lr 79 | * warmup_factor 80 | * 0.5 81 | * (1.0 + math.cos(math.pi * self.last_epoch / self.max_iters)) 82 | for base_lr in self.base_lrs 83 | ] 84 | 85 | def _compute_values(self) -> List[float]: 86 | # The new interface 87 | return self.get_lr() 88 | 89 | 90 | def _get_warmup_factor_at_iter( 91 | method: str, iter: int, warmup_iters: int, warmup_factor: float 92 | ) -> float: 93 | """ 94 | Return the learning rate warmup factor at a specific iteration. 95 | See https://arxiv.org/abs/1706.02677 for more details. 96 | 97 | Args: 98 | method (str): warmup method; either "constant" or "linear". 99 | iter (int): iteration at which to calculate the warmup factor. 100 | warmup_iters (int): the number of warmup iterations. 101 | warmup_factor (float): the base warmup factor (the meaning changes according 102 | to the method used). 103 | 104 | Returns: 105 | float: the effective warmup factor at the given iteration. 106 | """ 107 | if iter >= warmup_iters: 108 | return 1.0 109 | 110 | if method == "constant": 111 | return warmup_factor 112 | elif method == "linear": 113 | alpha = iter / warmup_iters 114 | return warmup_factor * (1 - alpha) + alpha 115 | else: 116 | raise ValueError("Unknown warmup method: {}".format(method)) 117 | -------------------------------------------------------------------------------- /fsdet/data/datasets/rfs.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 3 | 4 | from fvcore.common.file_io import PathManager 5 | import os 6 | import numpy as np 7 | import xml.etree.ElementTree as ET 8 | import cv2 9 | from fsdet.structures import BoxMode 10 | from fsdet.data import DatasetCatalog, MetadataCatalog 11 | 12 | 13 | __all__ = ["register_rfs"] 14 | 15 | 16 | # fmt: off 17 | CLASS_NAMES = [ 18 | 'laptop', 19 | 'lighter', 20 | 'portable_charger_2', 21 | 'iron_shoe', 22 | 'straight_knife', 23 | 'folding_knife', 24 | 'scissor', 25 | 'multi-tool_knife', 26 | 'umbrella', 27 | 'glass_bottle', 28 | 'battery', 29 | 'metal_cup', 30 | 'nail_clippers', 31 | 'pressure_tank', 32 | 'spray_alcohol', 33 | 'portable_charger_1', 34 | 'utility_knife', 35 | 'mobile_phone', 36 | 'metal_can', 37 | 'drink_bottle' 38 | ] 39 | # fmt: on 40 | 41 | def parse_rec(filename): 42 | """ Parse a PASCAL VOC xml file """ 43 | # tree = ET.parse(filename) 44 | # filename = filename[:-3] + 'txt' 45 | 46 | filename = filename.replace('.xml', '.txt') 47 | imagename0 = filename.replace('annotation', 'image') 48 | # imagename1 = imagename0.replace('.txt', '.TIFF') # jpg form 49 | # imagename2 = imagename0.replace('.txt', '.tiff') 50 | imagename3 = imagename0.replace('.txt', '.jpg') 51 | objects = [] 52 | #print(filename,imagename0,imagename3) 53 | # img = cv2.imread(imagename1) 54 | # if img is None: 55 | # img = cv2.imread(imagename2) 56 | # if img is None: 57 | img = cv2.imread(imagename3) 58 | 59 | height, width, channels = img.shape 60 | with open(filename, "r", encoding='utf-8') as f1: 61 | dataread = f1.readlines() 62 | #print(dataread) 63 | for annotation in dataread: 64 | obj_struct = {} 65 | temp = annotation.split() 66 | name = temp[1].strip() 67 | rate = float(temp[-1].strip()) 68 | # if name != 'Portable_Charger_1' and name != 'Portable_Charger_2'and name != 'Mobile_Phone'and name != 'Cosmetic'and name != 'Nonmetallic_Lighter'and name != 'Water'and name != 'Tablet'and name != 'Laptop': 69 | # continue 70 | xmin = int(temp[2]) 71 | 72 | if int(xmin) > width: 73 | continue 74 | if xmin < 0: 75 | xmin = 1 76 | ymin = int(temp[3]) 77 | if ymin < 0: 78 | ymin = 1 79 | xmax = int(temp[4]) 80 | if xmax > width: 81 | xmax = width - 1 82 | ymax = int(temp[5]) 83 | if ymax > height: 84 | ymax = height - 1 85 | ##name 86 | obj_struct['name'] = name 87 | obj_struct['pose'] = 'Unspecified' 88 | obj_struct['truncated'] = 0 89 | obj_struct['difficult'] = 0 90 | obj_struct['bbox'] = [float(xmin) - 1, 91 | float(ymin) - 1, 92 | float(xmax) - 1, 93 | float(ymax) - 1] 94 | obj_struct['rate'] = rate 95 | objects.append(obj_struct) 96 | 97 | return objects, height, width 98 | 99 | def load_rfs_instances(dirname: str, split: str): 100 | """ 101 | Load Pascal VOC detection annotations to Detectron2 format. 102 | 103 | Args: 104 | dirname: Contain "Annotations", "ImageSets", "JPEGImages" 105 | split (str): one of "train", "test", "val", "trainval" 106 | """ 107 | with PathManager.open(os.path.join(dirname, split + ".txt")) as f: 108 | fileids = np.loadtxt(f, dtype=np.str) 109 | 110 | dicts = [] 111 | for fileid in fileids: 112 | anno_file = os.path.join(dirname, "annotations", fileid + ".txt") 113 | jpeg_file = os.path.join(dirname, "images", fileid + ".jpg") 114 | 115 | objs, height, width = parse_rec(anno_file) 116 | 117 | r = { 118 | "file_name": jpeg_file, 119 | "image_id": fileid, 120 | "height": height, 121 | "width": width, 122 | } 123 | 124 | instances = [] 125 | 126 | for obj in objs: 127 | cls = obj["name"] 128 | 129 | bbox = obj['bbox'] 130 | 131 | instances.append({ 132 | "category_id": CLASS_NAMES.index(cls), 133 | "bbox": bbox, 134 | "bbox_mode": BoxMode.XYXY_ABS, 135 | }) 136 | 137 | r["annotations"] = instances 138 | dicts.append(r) 139 | return dicts 140 | 141 | 142 | def register_rfs(name, dirname, split, year): 143 | DatasetCatalog.register(name, lambda: load_rfs_instances(dirname, split)) 144 | MetadataCatalog.get(name).set( 145 | thing_classes=CLASS_NAMES, dirname=dirname, year=year, split=split 146 | ) 147 | -------------------------------------------------------------------------------- /fsdet/data/transforms/transform.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 3 | # File: transform.py 4 | 5 | import numpy as np 6 | from fvcore.transforms.transform import HFlipTransform, NoOpTransform, Transform 7 | from PIL import Image 8 | 9 | __all__ = ["ExtentTransform", "ResizeTransform"] 10 | 11 | 12 | class ExtentTransform(Transform): 13 | """ 14 | Extracts a subregion from the source image and scales it to the output size. 15 | 16 | The fill color is used to map pixels from the source rect that fall outside 17 | the source image. 18 | 19 | See: https://pillow.readthedocs.io/en/latest/PIL.html#PIL.ImageTransform.ExtentTransform 20 | """ 21 | 22 | def __init__(self, src_rect, output_size, interp=Image.LINEAR, fill=0): 23 | """ 24 | Args: 25 | src_rect (x0, y0, x1, y1): src coordinates 26 | output_size (h, w): dst image size 27 | interp: PIL interpolation methods 28 | fill: Fill color used when src_rect extends outside image 29 | """ 30 | super().__init__() 31 | self._set_attributes(locals()) 32 | 33 | def apply_image(self, img, interp=None): 34 | h, w = self.output_size 35 | ret = Image.fromarray(img).transform( 36 | size=(w, h), 37 | method=Image.EXTENT, 38 | data=self.src_rect, 39 | resample=interp if interp else self.interp, 40 | fill=self.fill, 41 | ) 42 | return np.asarray(ret) 43 | 44 | def apply_coords(self, coords): 45 | # Transform image center from source coordinates into output coordinates 46 | # and then map the new origin to the corner of the output image. 47 | h, w = self.output_size 48 | x0, y0, x1, y1 = self.src_rect 49 | new_coords = coords.astype(np.float32) 50 | new_coords[:, 0] -= 0.5 * (x0 + x1) 51 | new_coords[:, 1] -= 0.5 * (y0 + y1) 52 | new_coords[:, 0] *= w / (x1 - x0) 53 | new_coords[:, 1] *= h / (y1 - y0) 54 | new_coords[:, 0] += 0.5 * w 55 | new_coords[:, 1] += 0.5 * h 56 | return new_coords 57 | 58 | 59 | class ResizeTransform(Transform): 60 | """ 61 | Resize the image to a target size. 62 | """ 63 | 64 | def __init__(self, h, w, new_h, new_w, interp): 65 | """ 66 | Args: 67 | h, w (int): original image size 68 | new_h, new_w (int): new image size 69 | interp: PIL interpolation methods 70 | """ 71 | # TODO decide on PIL vs opencv 72 | super().__init__() 73 | self._set_attributes(locals()) 74 | 75 | def apply_image(self, img, interp=None): 76 | assert img.shape[:2] == (self.h, self.w) 77 | pil_image = Image.fromarray(img) 78 | interp_method = interp if interp is not None else self.interp 79 | pil_image = pil_image.resize((self.new_w, self.new_h), interp_method) 80 | ret = np.asarray(pil_image) 81 | return ret 82 | 83 | def apply_coords(self, coords): 84 | coords[:, 0] = coords[:, 0] * (self.new_w * 1.0 / self.w) 85 | coords[:, 1] = coords[:, 1] * (self.new_h * 1.0 / self.h) 86 | return coords 87 | 88 | 89 | def HFlip_rotated_box(transform, rotated_boxes): 90 | """ 91 | Apply the horizontal flip transform on rotated boxes. 92 | 93 | Args: 94 | rotated_boxes (ndarray): Nx5 floating point array of 95 | (x_center, y_center, width, height, angle_degrees) format 96 | in absolute coordinates. 97 | """ 98 | # Transform x_center 99 | rotated_boxes[:, 0] = transform.width - rotated_boxes[:, 0] 100 | # Transform angle 101 | rotated_boxes[:, 4] = -rotated_boxes[:, 4] 102 | return rotated_boxes 103 | 104 | 105 | def Resize_rotated_box(transform, rotated_boxes): 106 | """ 107 | Apply the resizing transform on rotated boxes. For details of how these (approximation) 108 | formulas are derived, please refer to :meth:`RotatedBoxes.scale`. 109 | 110 | Args: 111 | rotated_boxes (ndarray): Nx5 floating point array of 112 | (x_center, y_center, width, height, angle_degrees) format 113 | in absolute coordinates. 114 | """ 115 | scale_factor_x = transform.new_w * 1.0 / transform.w 116 | scale_factor_y = transform.new_h * 1.0 / transform.h 117 | rotated_boxes[:, 0] *= scale_factor_x 118 | rotated_boxes[:, 1] *= scale_factor_y 119 | theta = rotated_boxes[:, 4] * np.pi / 180.0 120 | c = np.cos(theta) 121 | s = np.sin(theta) 122 | rotated_boxes[:, 2] *= np.sqrt(np.square(scale_factor_x * c) + np.square(scale_factor_y * s)) 123 | rotated_boxes[:, 3] *= np.sqrt(np.square(scale_factor_x * s) + np.square(scale_factor_y * c)) 124 | rotated_boxes[:, 4] = np.arctan2(scale_factor_x * s, scale_factor_y * c) * 180 / np.pi 125 | 126 | return rotated_boxes 127 | 128 | 129 | HFlipTransform.register_type("rotated_box", HFlip_rotated_box) 130 | NoOpTransform.register_type("rotated_box", lambda t, x: x) 131 | ResizeTransform.register_type("rotated_box", Resize_rotated_box) 132 | -------------------------------------------------------------------------------- /fsdet/layers/csrc/nms_rotated/nms_rotated_cuda.cu: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include "../box_iou_rotated/box_iou_rotated_utils.h" 7 | 8 | using namespace fsdet; 9 | 10 | namespace { 11 | int const threadsPerBlock = sizeof(unsigned long long) * 8; 12 | } 13 | 14 | template 15 | __global__ void nms_rotated_cuda_kernel( 16 | const int n_boxes, 17 | const float iou_threshold, 18 | const T* dev_boxes, 19 | unsigned long long* dev_mask) { 20 | // nms_rotated_cuda_kernel is modified from torchvision's nms_cuda_kernel 21 | 22 | const int row_start = blockIdx.y; 23 | const int col_start = blockIdx.x; 24 | 25 | // if (row_start > col_start) return; 26 | 27 | const int row_size = 28 | min(n_boxes - row_start * threadsPerBlock, threadsPerBlock); 29 | const int col_size = 30 | min(n_boxes - col_start * threadsPerBlock, threadsPerBlock); 31 | 32 | // Compared to nms_cuda_kernel, where each box is represented with 4 values 33 | // (x1, y1, x2, y2), each rotated box is represented with 5 values 34 | // (x_center, y_center, width, height, angle_degrees) here. 35 | __shared__ T block_boxes[threadsPerBlock * 5]; 36 | if (threadIdx.x < col_size) { 37 | block_boxes[threadIdx.x * 5 + 0] = 38 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0]; 39 | block_boxes[threadIdx.x * 5 + 1] = 40 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1]; 41 | block_boxes[threadIdx.x * 5 + 2] = 42 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2]; 43 | block_boxes[threadIdx.x * 5 + 3] = 44 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3]; 45 | block_boxes[threadIdx.x * 5 + 4] = 46 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4]; 47 | } 48 | __syncthreads(); 49 | 50 | if (threadIdx.x < row_size) { 51 | const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x; 52 | const T* cur_box = dev_boxes + cur_box_idx * 5; 53 | int i = 0; 54 | unsigned long long t = 0; 55 | int start = 0; 56 | if (row_start == col_start) { 57 | start = threadIdx.x + 1; 58 | } 59 | for (i = start; i < col_size; i++) { 60 | // Instead of devIoU used by original horizontal nms, here 61 | // we use the single_box_iou_rotated function from box_iou_rotated_utils.h 62 | if (single_box_iou_rotated(cur_box, block_boxes + i * 5) > 63 | iou_threshold) { 64 | t |= 1ULL << i; 65 | } 66 | } 67 | const int col_blocks = at::cuda::ATenCeilDiv(n_boxes, threadsPerBlock); 68 | dev_mask[cur_box_idx * col_blocks + col_start] = t; 69 | } 70 | } 71 | 72 | namespace fsdet { 73 | 74 | at::Tensor nms_rotated_cuda( 75 | const at::Tensor& dets, 76 | const at::Tensor& scores, 77 | float iou_threshold) { 78 | // using scalar_t = float; 79 | AT_ASSERTM(dets.type().is_cuda(), "dets must be a CUDA tensor"); 80 | AT_ASSERTM(scores.type().is_cuda(), "scores must be a CUDA tensor"); 81 | at::cuda::CUDAGuard device_guard(dets.device()); 82 | 83 | auto order_t = std::get<1>(scores.sort(0, /* descending=*/true)); 84 | auto dets_sorted = dets.index_select(0, order_t); 85 | 86 | int dets_num = dets.size(0); 87 | 88 | const int col_blocks = at::cuda::ATenCeilDiv(dets_num, threadsPerBlock); 89 | 90 | at::Tensor mask = 91 | at::empty({dets_num * col_blocks}, dets.options().dtype(at::kLong)); 92 | 93 | dim3 blocks(col_blocks, col_blocks); 94 | dim3 threads(threadsPerBlock); 95 | cudaStream_t stream = at::cuda::getCurrentCUDAStream(); 96 | 97 | AT_DISPATCH_FLOATING_TYPES_AND_HALF( 98 | dets_sorted.type(), "nms_rotated_kernel_cuda", [&] { 99 | nms_rotated_cuda_kernel<<>>( 100 | dets_num, 101 | iou_threshold, 102 | dets_sorted.data(), 103 | (unsigned long long*)mask.data()); 104 | }); 105 | 106 | at::Tensor mask_cpu = mask.to(at::kCPU); 107 | unsigned long long* mask_host = (unsigned long long*)mask_cpu.data(); 108 | 109 | std::vector remv(col_blocks); 110 | memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks); 111 | 112 | at::Tensor keep = 113 | at::empty({dets_num}, dets.options().dtype(at::kLong).device(at::kCPU)); 114 | int64_t* keep_out = keep.data(); 115 | 116 | int num_to_keep = 0; 117 | for (int i = 0; i < dets_num; i++) { 118 | int nblock = i / threadsPerBlock; 119 | int inblock = i % threadsPerBlock; 120 | 121 | if (!(remv[nblock] & (1ULL << inblock))) { 122 | keep_out[num_to_keep++] = i; 123 | unsigned long long* p = mask_host + i * col_blocks; 124 | for (int j = nblock; j < col_blocks; j++) { 125 | remv[j] |= p[j]; 126 | } 127 | } 128 | } 129 | 130 | AT_CUDA_CHECK(cudaGetLastError()); 131 | return order_t.index( 132 | {keep.narrow(/*dim=*/0, /*start=*/0, /*length=*/num_to_keep) 133 | .to(order_t.device(), keep.scalar_type())}); 134 | } 135 | 136 | } // namespace fsdet 137 | -------------------------------------------------------------------------------- /tools/train_net.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | """ 3 | Detection Training Script. 4 | 5 | This scripts reads a given config file and runs the training or evaluation. 6 | It is an entry point that is made to train standard models in FsDet. 7 | 8 | In order to let one script support training of many models, 9 | this script contains logic that are specific to these built-in models and 10 | therefore may not be suitable for your own project. 11 | For example, your research project perhaps only needs a single "evaluator". 12 | 13 | Therefore, we recommend you to use FsDet as an library and take 14 | this file as an example of how to use the library. 15 | You may want to write your own script with your datasets and other customizations. 16 | """ 17 | 18 | import os 19 | from fsdet.evaluation.rfs_evaluation import RFSDetectionEvaluator 20 | 21 | import fsdet.utils.comm as comm 22 | from fsdet.checkpoint import DetectionCheckpointer 23 | from fsdet.config import get_cfg, set_global_cfg 24 | from fsdet.data import MetadataCatalog, build_detection_train_loader 25 | from fsdet.engine import ( 26 | DefaultTrainer, 27 | default_argument_parser, 28 | default_setup, 29 | launch, 30 | ) 31 | from fsdet.evaluation import ( 32 | COCOEvaluator, 33 | DatasetEvaluators, 34 | LVISEvaluator, 35 | PascalVOCDetectionEvaluator, 36 | verify_results, 37 | ) 38 | 39 | # from fsdet.data.dataset_mapper import AlbumentationMapper 40 | 41 | 42 | class Trainer(DefaultTrainer): 43 | """ 44 | We use the "DefaultTrainer" which contains a number pre-defined logic for 45 | standard training workflow. They may not work for you, especially if you 46 | are working on a new research project. In that case you can use the cleaner 47 | "SimpleTrainer", or write your own training loop. 48 | """ 49 | 50 | @classmethod 51 | def build_evaluator(cls, cfg, dataset_name, output_folder=None): 52 | """ 53 | Create evaluator(s) for a given dataset. 54 | This uses the special metadata "evaluator_type" associated with each builtin dataset. 55 | For your own dataset, you can simply create an evaluator manually in your 56 | script and do not have to worry about the hacky if-else logic here. 57 | """ 58 | if output_folder is None: 59 | output_folder = os.path.join(cfg.OUTPUT_DIR, "inference") 60 | evaluator_list = [] 61 | evaluator_type = MetadataCatalog.get(dataset_name).evaluator_type 62 | if evaluator_type == "coco": 63 | evaluator_list.append(COCOEvaluator(dataset_name, cfg, True, output_folder)) 64 | if evaluator_type == "pascal_voc": 65 | return PascalVOCDetectionEvaluator(dataset_name) 66 | if evaluator_type == "lvis": 67 | return LVISEvaluator(dataset_name, cfg, True, output_folder) 68 | if evaluator_type == "rfs": 69 | return RFSDetectionEvaluator(dataset_name) 70 | if len(evaluator_list) == 0: 71 | raise NotImplementedError( 72 | "no Evaluator for the dataset {} with the type {}".format( 73 | dataset_name, evaluator_type 74 | ) 75 | ) 76 | if len(evaluator_list) == 1: 77 | return evaluator_list[0] 78 | return DatasetEvaluators(evaluator_list) 79 | 80 | @classmethod 81 | def build_train_loader(cls, cfg): 82 | mapper = None 83 | # if cfg.INPUT.USE_ALBUMENTATIONS: 84 | # mapper = AlbumentationMapper(cfg, is_train=True) 85 | return build_detection_train_loader(cfg, mapper=mapper) 86 | 87 | def setup(args): 88 | """ 89 | Create configs and perform basic setups. 90 | """ 91 | cfg = get_cfg() 92 | cfg.merge_from_file(args.config_file) 93 | cfg.merge_from_list(args.opts) 94 | cfg.freeze() 95 | set_global_cfg(cfg) 96 | default_setup(cfg, args) 97 | return cfg 98 | 99 | 100 | def main(args): 101 | cfg = setup(args) 102 | 103 | if args.eval_only: 104 | model = Trainer.build_model(cfg) 105 | DetectionCheckpointer(model, save_dir=cfg.OUTPUT_DIR).resume_or_load( 106 | cfg.MODEL.WEIGHTS, resume=args.resume 107 | ) 108 | res = Trainer.test(cfg, model) 109 | if comm.is_main_process(): 110 | verify_results(cfg, res) 111 | return res 112 | 113 | """ 114 | If you'd like to do anything fancier than the standard training logic, 115 | consider writing your own training loop or subclassing the trainer. 116 | """ 117 | 118 | trainer = Trainer(cfg) 119 | trainer.resume_or_load(resume=args.resume) 120 | return trainer.train() 121 | 122 | 123 | if __name__ == "__main__": 124 | args = default_argument_parser().parse_args() 125 | 126 | # args.config_file = 'configs/RFS/split1/5shot_GPB_PFB_proloss.yml' 127 | # args.num_gpus = 3 128 | # args.opts = ['MODEL.ROI_HEADS.NOVEL_MODULE.PROTOTYPES_FUSE_ALPHA', 0.1] 129 | 130 | print("Command Line Args:", args) 131 | launch( 132 | main, 133 | args.num_gpus, 134 | num_machines=args.num_machines, 135 | machine_rank=args.machine_rank, 136 | dist_url=args.dist_url, 137 | args=(args,), 138 | ) 139 | -------------------------------------------------------------------------------- /fsdet/utils/video_visualizer.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | import numpy as np 3 | import pycocotools.mask as mask_util 4 | 5 | from fsdet.utils.visualizer import ( 6 | ColorMode, 7 | Visualizer, 8 | _create_text_labels, 9 | ) 10 | 11 | from .colormap import random_color 12 | 13 | 14 | class _DetectedInstance: 15 | """ 16 | Used to store data about detected objects in video frame, 17 | in order to transfer color to objects in the future frames. 18 | 19 | Attributes: 20 | label (int): 21 | bbox (tuple[float]): 22 | color (tuple[float]): RGB colors in range (0, 1) 23 | ttl (int): time-to-live for the instance. For example, if ttl=2, 24 | the instance color can be transferred to objects in the next two frames. 25 | """ 26 | 27 | __slots__ = ["label", "bbox", "color", "ttl"] 28 | 29 | def __init__(self, label, bbox, color, ttl): 30 | self.label = label 31 | self.bbox = bbox 32 | self.color = color 33 | self.ttl = ttl 34 | 35 | 36 | class VideoVisualizer: 37 | def __init__(self, metadata, instance_mode=ColorMode.IMAGE): 38 | """ 39 | Args: 40 | metadata (MetadataCatalog): image metadata. 41 | """ 42 | self.metadata = metadata 43 | self._old_instances = [] 44 | assert instance_mode in [ 45 | ColorMode.IMAGE, 46 | ColorMode.IMAGE_BW, 47 | ], "Other mode not supported yet." 48 | self._instance_mode = instance_mode 49 | 50 | def draw_instance_predictions(self, frame, predictions): 51 | """ 52 | Draw instance-level prediction results on an image. 53 | 54 | Args: 55 | frame (ndarray): an RGB image of shape (H, W, C), in the range [0, 255]. 56 | predictions (Instances): the output of an instance detection 57 | model. Following fields will be used to draw: 58 | "pred_boxes", "pred_classes", "scores". 59 | 60 | Returns: 61 | output (VisImage): image object with visualizations. 62 | """ 63 | frame_visualizer = Visualizer(frame, self.metadata) 64 | num_instances = len(predictions) 65 | if num_instances == 0: 66 | return frame_visualizer.output 67 | 68 | boxes = predictions.pred_boxes.tensor.numpy() if predictions.has("pred_boxes") else None 69 | scores = predictions.scores if predictions.has("scores") else None 70 | classes = predictions.pred_classes.numpy() if predictions.has("pred_classes") else None 71 | 72 | detected = [ 73 | _DetectedInstance(classes[i], boxes[i], color=None, ttl=8) 74 | for i in range(num_instances) 75 | ] 76 | colors = self._assign_colors(detected) 77 | 78 | labels = _create_text_labels(classes, scores, self.metadata.get("thing_classes", None)) 79 | 80 | if self._instance_mode == ColorMode.IMAGE_BW: 81 | # any() returns uint8 tensor 82 | frame_visualizer.output.img = frame_visualizer._create_grayscale_image() 83 | alpha = 0.3 84 | else: 85 | alpha = 0.5 86 | 87 | frame_visualizer.overlay_instances( 88 | boxes=boxes, # boxes are a bit distracting 89 | labels=labels, 90 | assigned_colors=colors, 91 | alpha=alpha, 92 | ) 93 | 94 | return frame_visualizer.output 95 | 96 | def _assign_colors(self, instances): 97 | """ 98 | Naive tracking heuristics to assign same color to the same instance, 99 | will update the internal state of tracked instances. 100 | 101 | Returns: 102 | list[tuple[float]]: list of colors. 103 | """ 104 | 105 | # Compute iou with boxes: 106 | is_crowd = np.zeros((len(instances),), dtype=np.bool) 107 | boxes_old = [x.bbox for x in self._old_instances] 108 | boxes_new = [x.bbox for x in instances] 109 | ious = mask_util.iou(boxes_old, boxes_new, is_crowd) 110 | threshold = 0.6 111 | if len(ious) == 0: 112 | ious = np.zeros((len(self._old_instances), len(instances)), dtype="float32") 113 | 114 | # Only allow matching instances of the same label: 115 | for old_idx, old in enumerate(self._old_instances): 116 | for new_idx, new in enumerate(instances): 117 | if old.label != new.label: 118 | ious[old_idx, new_idx] = 0 119 | 120 | matched_new_per_old = np.asarray(ious).argmax(axis=1) 121 | max_iou_per_old = np.asarray(ious).max(axis=1) 122 | 123 | # Try to find match for each old instance: 124 | extra_instances = [] 125 | for idx, inst in enumerate(self._old_instances): 126 | if max_iou_per_old[idx] > threshold: 127 | newidx = matched_new_per_old[idx] 128 | if instances[newidx].color is None: 129 | instances[newidx].color = inst.color 130 | continue 131 | # If an old instance does not match any new instances, 132 | # keep it for the next frame in case it is just missed by the detector 133 | inst.ttl -= 1 134 | if inst.ttl > 0: 135 | extra_instances.append(inst) 136 | 137 | # Assign random color to newly-detected instances: 138 | for inst in instances: 139 | if inst.color is None: 140 | inst.color = random_color(rgb=True, maximum=1) 141 | self._old_instances = instances[:] + extra_instances 142 | return [d.color for d in instances] 143 | -------------------------------------------------------------------------------- /fsdet/checkpoint/catalog.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | import logging 3 | from fvcore.common.file_io import PathHandler, PathManager 4 | 5 | 6 | class ModelCatalog(object): 7 | """ 8 | Store mappings from names to third-party models. 9 | """ 10 | 11 | S3_C2_DETECTRON_PREFIX = "https://dl.fbaipublicfiles.com/detectron" 12 | 13 | # MSRA models have STRIDE_IN_1X1=True. False otherwise. 14 | # NOTE: all BN models here have fused BN into an affine layer. 15 | # As a result, you should only load them to a model with "FrozenBN". 16 | # Loading them to a model with regular BN or SyncBN is wrong. 17 | # Even when loaded to FrozenBN, it is still different from affine by an epsilon, 18 | # which should be negligible for training. 19 | # NOTE: all models here uses PIXEL_STD=[1,1,1] 20 | C2_IMAGENET_MODELS = { 21 | "MSRA/R-50": "ImageNetPretrained/MSRA/R-50.pkl", 22 | "MSRA/R-101": "detectron2://ImageNetPretrained/MSRA/R-101.pkl", 23 | "FAIR/R-50-GN": "ImageNetPretrained/47261647/R-50-GN.pkl", 24 | "FAIR/R-101-GN": "ImageNetPretrained/47592356/R-101-GN.pkl", 25 | "FAIR/X-101-32x8d": "ImageNetPretrained/20171220/X-101-32x8d.pkl", 26 | "FAIR/X-101-64x4d": "ImageNetPretrained/FBResNeXt/X-101-64x4d.pkl", 27 | "FAIR/X-152-32x8d-IN5k": "ImageNetPretrained/25093814/X-152-32x8d-IN5k.pkl", 28 | } 29 | 30 | C2_DETECTRON_PATH_FORMAT = ( 31 | "{prefix}/{url}/output/train/{dataset}/{type}/model_final.pkl" 32 | ) # noqa B950 33 | 34 | C2_DATASET_COCO = "coco_2014_train%3Acoco_2014_valminusminival" 35 | 36 | # format: {model_name} -> part of the url 37 | C2_DETECTRON_MODELS = { 38 | "35857197/e2e_faster_rcnn_R-50-C4_1x": "35857197/12_2017_baselines/e2e_faster_rcnn_R-50-C4_1x.yaml.01_33_49.iAX0mXvW", # noqa B950 39 | "35857345/e2e_faster_rcnn_R-50-FPN_1x": "35857345/12_2017_baselines/e2e_faster_rcnn_R-50-FPN_1x.yaml.01_36_30.cUF7QR7I", # noqa B950 40 | "35857890/e2e_faster_rcnn_R-101-FPN_1x": "35857890/12_2017_baselines/e2e_faster_rcnn_R-101-FPN_1x.yaml.01_38_50.sNxI7sX7", # noqa B950 41 | "36761737/e2e_faster_rcnn_X-101-32x8d-FPN_1x": "36761737/12_2017_baselines/e2e_faster_rcnn_X-101-32x8d-FPN_1x.yaml.06_31_39.5MIHi1fZ", # noqa B950 42 | } 43 | 44 | @staticmethod 45 | def get(name): 46 | if name.startswith("Caffe2Detectron/COCO"): 47 | return ModelCatalog._get_c2_detectron_baseline(name) 48 | if name.startswith("ImageNetPretrained/"): 49 | return ModelCatalog._get_c2_imagenet_pretrained(name) 50 | raise RuntimeError("model not present in the catalog: {}".format(name)) 51 | 52 | @staticmethod 53 | def _get_c2_imagenet_pretrained(name): 54 | prefix = ModelCatalog.S3_C2_DETECTRON_PREFIX 55 | name = name[len("ImageNetPretrained/") :] 56 | name = ModelCatalog.C2_IMAGENET_MODELS[name] 57 | url = "/".join([prefix, name]) 58 | return url 59 | 60 | @staticmethod 61 | def _get_c2_detectron_baseline(name): 62 | name = name[len("Caffe2Detectron/COCO/") :] 63 | url = ModelCatalog.C2_DETECTRON_MODELS[name] 64 | dataset = ModelCatalog.C2_DATASET_COCO 65 | type = "generalized_rcnn" 66 | 67 | # Detectron C2 models are stored in the structure defined in `C2_DETECTRON_PATH_FORMAT`. 68 | url = ModelCatalog.C2_DETECTRON_PATH_FORMAT.format( 69 | prefix=ModelCatalog.S3_C2_DETECTRON_PREFIX, url=url, type=type, dataset=dataset 70 | ) 71 | return url 72 | 73 | 74 | class ModelCatalogHandler(PathHandler): 75 | """ 76 | Resolve URL like catalog://. 77 | """ 78 | 79 | PREFIX = "catalog://" 80 | 81 | def _get_supported_prefixes(self): 82 | return [self.PREFIX] 83 | 84 | def _get_local_path(self, path): 85 | logger = logging.getLogger(__name__) 86 | catalog_path = ModelCatalog.get(path[len(self.PREFIX) :]) 87 | logger.info("Catalog entry {} points to {}".format(path, catalog_path)) 88 | return PathManager.get_local_path(catalog_path) 89 | 90 | def _open(self, path, mode="r", **kwargs): 91 | return PathManager.open(self._get_local_path(path), mode, **kwargs) 92 | 93 | 94 | class Detectron2Handler(PathHandler): 95 | """ 96 | Resolve anything that's in Detectron2 model zoo. 97 | """ 98 | 99 | PREFIX = "detectron2://" 100 | S3_DETECTRON2_PREFIX = "https://dl.fbaipublicfiles.com/detectron2/" 101 | 102 | def _get_supported_prefixes(self): 103 | return [self.PREFIX] 104 | 105 | def _get_local_path(self, path): 106 | name = path[len(self.PREFIX) :] 107 | return PathManager.get_local_path(self.S3_DETECTRON2_PREFIX + name) 108 | 109 | def _open(self, path, mode="r", **kwargs): 110 | return PathManager.open(self._get_local_path(path), mode, **kwargs) 111 | 112 | 113 | class FsDetHandler(PathHandler): 114 | """ 115 | Resolve anything that's in FsDet model zoo. 116 | """ 117 | 118 | PREFIX = "fsdet://" 119 | URL_PREFIX = "http://dl.yf.io/fs-det/models/" 120 | 121 | def _get_supported_prefixes(self): 122 | return [self.PREFIX] 123 | 124 | def _get_local_path(self, path): 125 | name = path[len(self.PREFIX) :] 126 | return PathManager.get_local_path(self.URL_PREFIX + name) 127 | 128 | def _open(self, path, mode="r", **kwargs): 129 | return PathManager.open(self._get_local_path(path), mode, **kwargs) 130 | 131 | 132 | PathManager.register_handler(ModelCatalogHandler()) 133 | PathManager.register_handler(Detectron2Handler()) 134 | PathManager.register_handler(FsDetHandler()) 135 | -------------------------------------------------------------------------------- /demo/demo.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | import argparse 3 | import glob 4 | import multiprocessing as mp 5 | import os 6 | import time 7 | import cv2 8 | import tqdm 9 | 10 | from fsdet.config import get_cfg 11 | from fsdet.data.detection_utils import read_image 12 | from fsdet.utils.logger import setup_logger 13 | 14 | from predictor import VisualizationDemo 15 | 16 | # constants 17 | WINDOW_NAME = "COCO detections" 18 | 19 | 20 | def setup_cfg(args): 21 | # load config from file and command-line arguments 22 | cfg = get_cfg() 23 | cfg.merge_from_file(args.config_file) 24 | cfg.merge_from_list(args.opts) 25 | # Set score_threshold for builtin models 26 | cfg.MODEL.RETINANET.SCORE_THRESH_TEST = args.confidence_threshold 27 | cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = args.confidence_threshold 28 | cfg.freeze() 29 | return cfg 30 | 31 | 32 | def get_parser(): 33 | parser = argparse.ArgumentParser(description="FsDet demo for builtin models") 34 | parser.add_argument( 35 | "--config-file", 36 | default="configs/COCO-detection/faster_rcnn_R_101_FPN_ft_all_1shot.yaml", 37 | metavar="FILE", 38 | help="path to config file", 39 | ) 40 | parser.add_argument("--webcam", action="store_true", help="Take inputs from webcam.") 41 | parser.add_argument("--video-input", help="Path to video file.") 42 | parser.add_argument( 43 | "--input", 44 | nargs="+", 45 | help="A list of space separated input images; " 46 | "or a single glob pattern such as 'directory/*.jpg'", 47 | ) 48 | parser.add_argument( 49 | "--output", 50 | help="A file or directory to save output visualizations. " 51 | "If not given, will show output in an OpenCV window.", 52 | ) 53 | 54 | parser.add_argument( 55 | "--confidence-threshold", 56 | type=float, 57 | default=0.5, 58 | help="Minimum score for instance predictions to be shown", 59 | ) 60 | parser.add_argument( 61 | "--opts", 62 | help="Modify config options using the command-line 'KEY VALUE' pairs", 63 | default=[], 64 | nargs=argparse.REMAINDER, 65 | ) 66 | return parser 67 | 68 | 69 | if __name__ == "__main__": 70 | mp.set_start_method("spawn", force=True) 71 | args = get_parser().parse_args() 72 | setup_logger(name="fvcore") 73 | logger = setup_logger() 74 | logger.info("Arguments: " + str(args)) 75 | 76 | cfg = setup_cfg(args) 77 | 78 | demo = VisualizationDemo(cfg) 79 | 80 | if args.input: 81 | if len(args.input) == 1: 82 | args.input = glob.glob(os.path.expanduser(args.input[0])) 83 | assert args.input, "The input path(s) was not found" 84 | for path in tqdm.tqdm(args.input, disable=not args.output): 85 | # use PIL, to be consistent with evaluation 86 | img = read_image(path, format="BGR") 87 | start_time = time.time() 88 | predictions, visualized_output = demo.run_on_image(img) 89 | logger.info( 90 | "{}: {} in {:.2f}s".format( 91 | path, 92 | "detected {} instances".format(len(predictions["instances"])) 93 | if "instances" in predictions 94 | else "finished", 95 | time.time() - start_time, 96 | ) 97 | ) 98 | 99 | if args.output: 100 | if os.path.isdir(args.output): 101 | assert os.path.isdir(args.output), args.output 102 | out_filename = os.path.join(args.output, os.path.basename(path)) 103 | else: 104 | assert len(args.input) == 1, "Please specify a directory with args.output" 105 | out_filename = args.output 106 | visualized_output.save(out_filename) 107 | else: 108 | cv2.namedWindow(WINDOW_NAME, cv2.WINDOW_NORMAL) 109 | cv2.imshow(WINDOW_NAME, visualized_output.get_image()[:, :, ::-1]) 110 | if cv2.waitKey(0) == 27: 111 | break # esc to quit 112 | elif args.webcam: 113 | assert args.input is None, "Cannot have both --input and --webcam!" 114 | cam = cv2.VideoCapture(0) 115 | for vis in tqdm.tqdm(demo.run_on_video(cam)): 116 | cv2.namedWindow(WINDOW_NAME, cv2.WINDOW_NORMAL) 117 | cv2.imshow(WINDOW_NAME, vis) 118 | if cv2.waitKey(1) == 27: 119 | break # esc to quit 120 | cv2.destroyAllWindows() 121 | elif args.video_input: 122 | video = cv2.VideoCapture(args.video_input) 123 | width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH)) 124 | height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT)) 125 | frames_per_second = video.get(cv2.CAP_PROP_FPS) 126 | num_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT)) 127 | basename = os.path.basename(args.video_input) 128 | 129 | if args.output: 130 | if os.path.isdir(args.output): 131 | output_fname = os.path.join(args.output, basename) 132 | output_fname = os.path.splitext(output_fname)[0] + ".mkv" 133 | else: 134 | output_fname = args.output 135 | assert not os.path.isfile(output_fname), output_fname 136 | output_file = cv2.VideoWriter( 137 | filename=output_fname, 138 | # some installation of opencv may not support x264 (due to its license), 139 | # you can try other format (e.g. MPEG) 140 | fourcc=cv2.VideoWriter_fourcc(*"x264"), 141 | fps=float(frames_per_second), 142 | frameSize=(width, height), 143 | isColor=True, 144 | ) 145 | assert os.path.isfile(args.video_input) 146 | for vis_frame in tqdm.tqdm(demo.run_on_video(video), total=num_frames): 147 | if args.output: 148 | output_file.write(vis_frame) 149 | else: 150 | cv2.namedWindow(basename, cv2.WINDOW_NORMAL) 151 | cv2.imshow(basename, vis_frame) 152 | if cv2.waitKey(1) == 27: 153 | break # esc to quit 154 | video.release() 155 | if args.output: 156 | output_file.release() 157 | else: 158 | cv2.destroyAllWindows() 159 | -------------------------------------------------------------------------------- /tools/aggregate_seeds.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import json 3 | import math 4 | import os 5 | import numpy as np 6 | import matplotlib.pyplot as plt 7 | 8 | 9 | def parse_args(): 10 | parser = argparse.ArgumentParser() 11 | parser.add_argument('--shots', type=int, default=1, 12 | help='Shots to aggregate over') 13 | parser.add_argument('--seeds', type=int, default=30, 14 | help='Seeds to aggregate over') 15 | # Model 16 | parser.add_argument('--fc', action='store_true', 17 | help='Model uses FC instead of cosine') 18 | parser.add_argument('--unfreeze', action='store_true', 19 | help='Unfreeze feature extractor') 20 | parser.add_argument('--suffix', type=str, default='', help='Suffix of path') 21 | # Output arguments 22 | parser.add_argument('--print', action='store_true', help='Clean output') 23 | parser.add_argument('--plot', action='store_true', help='Plot results') 24 | parser.add_argument('--save-dir', type=str, default='.', 25 | help='Save dir for generated plots') 26 | # PASCAL arguments 27 | parser.add_argument('--split', type=int, default=1, help='Data split') 28 | # COCO arguments 29 | parser.add_argument('--coco', action='store_true', help='Use COCO dataset') 30 | 31 | args = parser.parse_args() 32 | return args 33 | 34 | 35 | def main(args): 36 | metrics = {} 37 | num_ckpts = 0 38 | dataset = 'coco' if args.coco else 'voc' 39 | if args.fc: 40 | fc = '_fc' 41 | else: 42 | fc = '_normalized' if not args.coco else '' 43 | if args.unfreeze: 44 | unfreeze = '_unfreeze' 45 | else: 46 | unfreeze = '_randnovel' if not args.coco else '' 47 | for i in range(args.seeds): 48 | seed = 'seed{}/'.format(i) if i != 0 else '' 49 | prefix = 'checkpoints/{}/faster_rcnn/{}'.format(dataset, seed) 50 | prefix += 'faster_rcnn_R_101_FPN_ft{}_all'.format(fc) 51 | if args.coco: 52 | ckpt = prefix + '_{}shot{}'.format(args.shots, unfreeze) 53 | else: 54 | ckpt = prefix + '{}_{}shot{}{}'.format( 55 | args.split, args.shots, unfreeze, args.suffix) 56 | if os.path.exists(ckpt): 57 | if os.path.exists(os.path.join(ckpt, 'inference/all_res.json')): 58 | ckpt_ = os.path.join(ckpt, 'inference/all_res.json') 59 | res = json.load(open(ckpt_, 'r')) 60 | res = res[os.path.join(ckpt, 'model_final.pth')]['bbox'] 61 | elif os.path.exists(os.path.join(ckpt, 'inference/res_final.json')): 62 | ckpt = os.path.join(ckpt, 'inference/res_final.json') 63 | res = json.load(open(ckpt, 'r'))['bbox'] 64 | else: 65 | print('Missing: {}'.format(ckpt)) 66 | continue 67 | 68 | for metric in res: 69 | if metric in metrics: 70 | metrics[metric].append(res[metric]) 71 | else: 72 | metrics[metric] = [res[metric]] 73 | num_ckpts += 1 74 | else: 75 | print('Missing: {}'.format(ckpt)) 76 | print('Num ckpts: {}'.format(num_ckpts)) 77 | print('') 78 | 79 | # Output results 80 | if args.print: 81 | # Clean output for copy and pasting 82 | out_str = '' 83 | for metric in metrics: 84 | out_str += '{0:.1f} '.format(np.mean(metrics[metric])) 85 | print(out_str) 86 | out_str = '' 87 | for metric in metrics: 88 | out_str += '{0:.1f} '.format( 89 | 1.96*np.std(metrics[metric]) / math.sqrt(len(metrics[metric])) 90 | ) 91 | print(out_str) 92 | out_str = '' 93 | for metric in metrics: 94 | out_str += '{0:.1f} '.format(np.std(metrics[metric])) 95 | print(out_str) 96 | out_str = '' 97 | for metric in metrics: 98 | out_str += '{0:.1f} '.format(np.percentile(metrics[metric], 25)) 99 | print(out_str) 100 | out_str = '' 101 | for metric in metrics: 102 | out_str += '{0:.1f} '.format(np.percentile(metrics[metric], 50)) 103 | print(out_str) 104 | out_str = '' 105 | for metric in metrics: 106 | out_str += '{0:.1f} '.format(np.percentile(metrics[metric], 75)) 107 | print(out_str) 108 | else: 109 | # Verbose output 110 | for metric in metrics: 111 | print(metric) 112 | print('Mean \t {0:.4f}'.format(np.mean(metrics[metric]))) 113 | print('Std \t {0:.4f}'.format(np.std(metrics[metric]))) 114 | print('Q1 \t {0:.4f}'.format(np.percentile(metrics[metric], 25))) 115 | print('Median \t {0:.4f}'.format(np.percentile(metrics[metric], 50))) 116 | print('Q3 \t {0:.4f}'.format(np.percentile(metrics[metric], 75))) 117 | print('') 118 | 119 | # Plot results 120 | if args.plot: 121 | os.makedirs(args.save_dir, exist_ok=True) 122 | 123 | for met in ['avg', 'stdev', 'ci']: 124 | for metric, c in zip(['nAP', 'nAP50', 'nAP75'], 125 | ['bo-', 'ro-', 'go-']): 126 | if met == 'avg': 127 | res = [np.mean(metrics[metric][:i+1]) \ 128 | for i in range(len(metrics[metric]))] 129 | elif met == 'stdev': 130 | res = [np.std(metrics[metric][:i]) \ 131 | for i in range(1, len(metrics[metric])+1)] 132 | elif met == 'ci': 133 | res = [1.96*np.std(metrics[metric][:i+1]) / \ 134 | math.sqrt(len(metrics[metric][:i+1])) \ 135 | for i in range(len(metrics[metric]))] 136 | plt.plot(range(1, len(metrics[metric])+1), res, c) 137 | plt.legend(['nAP', 'nAP50', 'nAP75']) 138 | plt.title('Split {}, {} Shots - Cumulative {} over {} Seeds'.format( 139 | args.split, args.shots, met.upper(), args.seeds)) 140 | plt.xlabel('Number of seeds') 141 | plt.ylabel('Cumulative {}'.format(met.upper())) 142 | plt.savefig(os.path.join( 143 | args.save_dir, 144 | 'split{}_{}shots_{}_vs_{}seeds.png'.format( 145 | args.split, args.shots, met, args.seeds), 146 | )) 147 | plt.clf() 148 | 149 | 150 | if __name__ == '__main__': 151 | args = parse_args() 152 | main(args) 153 | --------------------------------------------------------------------------------