├── .gitignore ├── INSTALL.md ├── README.md ├── __init__.py ├── checkpoint ├── __init__.py └── detection_checkpoint.py ├── configs ├── Base-RCNN-C4.yaml ├── COCO │ ├── COCO-RCNN-50-C4-split1-ft.yaml │ ├── COCO-RCNN-50-C4-split1-note.yaml │ ├── COCO-RCNN-50-C4-split1-segm-ft.yaml │ ├── COCO-RCNN-50-C4-split1-segm.yaml │ ├── COCO-RCNN-50-C4-split1.yaml │ └── COCO-VGG-CNN-F-split1-dock.yaml ├── VOC │ ├── FT │ │ ├── 10_shot │ │ │ ├── VOC-RCNN-101-C4-split1-ft.yaml │ │ │ ├── VOC-RCNN-101-C4-split2-ft.yaml │ │ │ └── VOC-RCNN-101-C4-split3-ft.yaml │ │ ├── 1_shot │ │ │ ├── VOC-RCNN-101-C4-split1-ft.yaml │ │ │ ├── VOC-RCNN-101-C4-split2-ft.yaml │ │ │ └── VOC-RCNN-101-C4-split3-ft.yaml │ │ ├── 2_shot │ │ │ ├── VOC-RCNN-101-C4-split1-ft.yaml │ │ │ ├── VOC-RCNN-101-C4-split2-ft.yaml │ │ │ └── VOC-RCNN-101-C4-split3-ft.yaml │ │ ├── 3_shot │ │ │ ├── VOC-RCNN-101-C4-split1-ft.yaml │ │ │ ├── VOC-RCNN-101-C4-split2-ft.yaml │ │ │ └── VOC-RCNN-101-C4-split3-ft.yaml │ │ └── 5_shot │ │ │ ├── VOC-RCNN-101-C4-split1-ft.yaml │ │ │ ├── VOC-RCNN-101-C4-split2-ft.yaml │ │ │ └── VOC-RCNN-101-C4-split3-ft.yaml │ ├── VOC-RCNN-101-C4-split1.yaml │ ├── VOC-RCNN-101-C4-split2.yaml │ └── VOC-RCNN-101-C4-split3.yaml ├── __init__.py └── default_config.py ├── data ├── __init__.py ├── build.py ├── common.py ├── data_utils │ ├── __init__.py │ ├── cfg.py │ ├── convert_coco_proposals_to_detectron.py │ ├── convert_proposals_to_detectron.py │ ├── dataset.py │ ├── image.py │ ├── utils.py │ └── voc_label.py ├── dataset_mapper.py ├── datasets │ ├── __init__.py │ ├── coco │ │ ├── base_training.py │ │ ├── fine_tuning.py │ │ └── register_coco.py │ ├── coco_dock │ │ ├── base_training.py │ │ └── register_coco_dock.py │ ├── coco_note │ │ ├── base_training.py │ │ ├── fine_tuning.py │ │ └── register_coco_note.py │ └── voc │ │ ├── base_training.py │ │ ├── fine_tuning.py │ │ └── register_voc.py ├── embeddings │ └── glove_mean ├── evaluators.py ├── pipelines_adaptor │ ├── coco │ │ ├── base_training.py │ │ ├── config_base_training.yaml │ │ ├── config_fine_tuning.yaml │ │ └── fine_tuning.py │ └── voc │ │ ├── base_training.py │ │ ├── config_base_training.yaml │ │ ├── config_fine_tuning.yaml │ │ └── fine_tuning.py ├── prepare_coco.py ├── prepare_voc.py └── utils.py ├── engine ├── __init__.py └── defaults.py ├── evalutation ├── __init__.py └── evaulator.py ├── modeling ├── __init__.py ├── backbone │ ├── __init__.py │ ├── backbone.py │ ├── inception_resnet_v2.py │ └── vgg_cnn_f.py ├── matcher.py ├── meta_arch │ ├── __init__.py │ └── rcnn.py ├── proposal_generator │ ├── __init__.py │ └── rpn.py └── roi_heads │ ├── __init__.py │ ├── box_head.py │ ├── fast_rcnn.py │ ├── mask_head.py │ ├── pcl_loss.py │ ├── roi_heads.py │ ├── visual_attention_head.py │ └── weak_detector_fast_rcnn.py ├── models └── MODELS.md ├── scripts ├── finetune_VOC.py └── train_VOC.py └── solver ├── __init__.py ├── build.py └── lr_scheduler.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | **/*.pyc 4 | *.py[cod] 5 | *$py.class 6 | # C extensions 7 | *.so 8 | # Distribution / packaging 9 | .Python 10 | build/ 11 | develop-eggs/ 12 | dist/ 13 | downloads/ 14 | eggs/ 15 | .eggs/ 16 | lib/ 17 | lib64/ 18 | parts/ 19 | sdist/ 20 | var/ 21 | wheels/ 22 | share/python-wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | # PyInstaller 28 | # Usually these files are written by a python script from a template 29 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 30 | *.manifest 31 | *.spec 32 | # Installer logs 33 | pip-log.txt 34 | pip-delete-this-directory.txt 35 | # Unit test / coverage reports 36 | htmlcov/ 37 | .tox/ 38 | .nox/ 39 | .coverage 40 | .coverage.* 41 | .cache 42 | nosetests.xml 43 | coverage.xml 44 | *.cover 45 | *.py,cover 46 | .hypothesis/ 47 | .pytest_cache/ 48 | cover/ 49 | # Translations 50 | *.mo 51 | *.pot 52 | # Django stuff: 53 | *.log 54 | local_settings.py 55 | db.sqlite3 56 | db.sqlite3-journal 57 | # Flask stuff: 58 | instance/ 59 | .webassets-cache 60 | # Scrapy stuff: 61 | .scrapy 62 | # Sphinx documentation 63 | docs/_build/ 64 | # PyBuilder 65 | .pybuilder/ 66 | target/ 67 | # Jupyter Notebook 68 | .ipynb_checkpoints 69 | # IPython 70 | profile_default/ 71 | ipython_config.py 72 | # pyenv 73 | # For a library or package, you might want to ignore these files since the code is 74 | # intended to run in multiple environments; otherwise, check them in: 75 | # .python-version 76 | # pipenv 77 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 78 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 79 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 80 | # install all needed dependencies. 81 | #Pipfile.lock 82 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 83 | __pypackages__/ 84 | # Celery stuff 85 | celerybeat-schedule 86 | celerybeat.pid 87 | # SageMath parsed files 88 | *.sage.py 89 | # Environments 90 | .env 91 | .venv 92 | env/ 93 | venv/ 94 | ENV/ 95 | env.bak/ 96 | venv.bak/ 97 | # Spyder project settings 98 | .spyderproject 99 | .spyproject 100 | # Rope project settings 101 | .ropeproject 102 | # mkdocs documentation 103 | /site 104 | # mypy 105 | .mypy_cache/ 106 | .dmypy.json 107 | dmypy.json 108 | # Pyre type checker 109 | .pyre/ 110 | # pytype static type analyzer 111 | .pytype/ 112 | # Cython debug symbols 113 | cython_debug/ 114 | -------------------------------------------------------------------------------- /INSTALL.md: -------------------------------------------------------------------------------- 1 | ## Requirements 2 | In order to run the code, the following libraries are required: 3 | - python >= 3.7 4 | - PyTorch >= 1.6 5 | - detectron2 >= 0.2.1 6 | - cv2 >= 4.4.0 7 | - scikit-learn >= 0.23.2 8 | - imantics 9 | - easydict 10 | 11 | **Note**: Please make sure the detectron2 version correctly corresponds to the pytorch and cuda versions installed. Please check [this page](https://detectron2.readthedocs.io/en/latest/tutorials/install.html) for installation instructions and common issues. 12 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # UniT: Unified Knowledge Transfer for Any-shot Object Detection and Segmentation 2 | 3 | This repository contains the code for the CVPR 2021 paper titled [**"UniT: Unified Knowledge Transfer for Any-shot Object Detection and Segmentation"**](https://arxiv.org/pdf/2006.07502.pdf). 4 | 5 | ## Requirements 6 | To setup the environment with all the required dependencies, follow the steps detailed in [INSTALL.md](https://github.com/ubc-vision/UniT/blob/main/INSTALL.md). 7 | 8 | ## Prepare Dataset 9 | - To obtain the data and curate the splits for PASCAL VOC 10 | ```python 11 | python data/prepare_voc.py --DATA-ROOT "Path/to/Save/Location/" 12 | 13 | ``` 14 | **Note**: These splits are the same as the ones released by the authors of [Few-shot Object Detection via Feature Reweighting](https://github.com/bingykang/Fewshot_Detection). 15 | 16 | 17 | ## Model Training 18 | Download ImageNet pretrained models from [this Google drive](https://drive.google.com/drive/folders/1plLDI55qKvwPa5OuT_DcGobdnAPqBfq1?usp=sharing), and place them in the `models/` folder in the root directory. The directory structure should look like this: 19 | ``` 20 | UniT 21 | └── models 22 | └── resnet_50_MSRA_C4.pth 23 | └── resnet_101_MSRA_C4.pth 24 | ... 25 | └── modeling 26 | └── scripts 27 | └── solver 28 | ... 29 | ``` 30 | 31 | ### Base Training 32 | - Training on VOC 33 | ```python 34 | python scripts/train_VOC.py --config-file "configs/VOC/VOC-RCNN-101-C4-split{num}.yaml" --num-gpus 4 --resume SOLVER.IMS_PER_BATCH 8 TEST.AUG.ENABLED False SOLVER.BASE_LR 0.02 35 | 36 | ``` 37 | 38 | ### Fine Tuning 39 | - Fine Tuning on VOC 40 | ```python 41 | python scripts/finetune_VOC.py --config-file "configs/VOC/FT/{num}_shot/VOC-RCNN-101-C4-split{num}-ft.yaml" --num-gpus 4 --resume OUTPUT_DIR "Path/for/Checkpointing" MODEL.WEIGHTS "Path/to/Base/Training/Model/Weights" 42 | ``` 43 | 44 | ### Evaluation 45 | - Evaluation on VOC 46 | Evaluation can be done at any stage using the `--eval-only` flag. For example, the model obtained after fine tuning can be evaluated as follows: 47 | ```python 48 | python scripts/finetune_VOC.py --config-file "configs/VOC/VOC-RCNN-101-C4-split{num}.yaml" --num-gpus 4 --eval-only --resume OUTPUT_DIR "Path/for/Checkpointing" MODEL.WEIGHTS "Path/to/Fine/Tune/Model/Weights" 49 | ``` 50 | 51 | **Note**: The default training/testing assumes 4 GPUs. It can be modified to suit other GPU configurations, but would require changing the learning rate and batch sizes accordingly. Please look at `SOLVER.REFERENCE_WORLD_SIZE` parameter in the [detectron2 configurations](https://detectron2.readthedocs.io/en/latest/modules/config.html#config-references) for details on how this can be done automatically. 52 | 53 | This repository is still being updated. Instructions on how to run the code on MS-COCO will be provided shortly. 54 | -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- 1 | from .modeling import * -------------------------------------------------------------------------------- /checkpoint/__init__.py: -------------------------------------------------------------------------------- 1 | from .detection_checkpoint import * -------------------------------------------------------------------------------- /checkpoint/detection_checkpoint.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import torch 4 | from detectron2.utils import comm 5 | from detectron2.engine import hooks, HookBase 6 | import logging 7 | 8 | class PeriodicCheckpointerWithEval(HookBase): 9 | def __init__(self, eval_period, eval_function, checkpointer, checkpoint_period, max_to_keep=5): 10 | self.eval = hooks.EvalHook(eval_period, eval_function) 11 | self.checkpointer = hooks.PeriodicCheckpointer(checkpointer, checkpoint_period, max_to_keep=max_to_keep) 12 | self._logger = logging.getLogger("detectron2") 13 | best_model_path = checkpointer.save_dir + 'best_model_final.pth.pth' 14 | if os.path.isfile(best_model_path): 15 | best_model = torch.load(best_model_path, map_location=torch.device('cpu')) 16 | self.best_ap = best_model['AP50'] 17 | del best_model 18 | else: 19 | self.best_ap = 0.0 20 | 21 | def before_train(self): 22 | self.max_iter = self.trainer.max_iter 23 | self.checkpointer.max_iter = self.trainer.max_iter 24 | 25 | def _do_eval(self): 26 | results = self.eval._func() 27 | comm.synchronize() 28 | return results 29 | 30 | def after_step(self): 31 | next_iter = self.trainer.iter + 1 32 | is_final = next_iter == self.trainer.max_iter 33 | if is_final or (self.eval._period > 0 and next_iter % self.eval._period == 0): 34 | results = self._do_eval() 35 | if comm.is_main_process(): 36 | current_ap = results['bbox']['AP50'] 37 | if current_ap > self.best_ap: 38 | self.best_ap = current_ap 39 | additional_state = {"iteration":self.trainer.iter, "AP50":self.best_ap} 40 | self.checkpointer.checkpointer.save( 41 | "best_model_final.pth", **additional_state 42 | ) 43 | self._logger.info("Best AP: {}".format(self.best_ap)) 44 | self._logger.info("Current AP: {}".format(current_ap)) 45 | if comm.is_main_process(): 46 | self.checkpointer.step(self.trainer.iter) 47 | comm.synchronize() 48 | 49 | def after_train(self): 50 | # func is likely a closure that holds reference to the trainer 51 | # therefore we clean it to avoid circular reference in the end 52 | del self.eval._func -------------------------------------------------------------------------------- /configs/Base-RCNN-C4.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | RPN: 4 | PRE_NMS_TOPK_TEST: 6000 5 | POST_NMS_TOPK_TEST: 1000 6 | ROI_HEADS: 7 | NAME: "Res5ROIHeads" 8 | DATASETS: 9 | TRAIN: ("coco_2017_train",) 10 | TEST: ("coco_2017_val",) 11 | SOLVER: 12 | IMS_PER_BATCH: 16 13 | BASE_LR: 0.02 14 | STEPS: (60000, 80000) 15 | MAX_ITER: 90000 16 | INPUT: 17 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) 18 | VERSION: 2 -------------------------------------------------------------------------------- /configs/COCO/COCO-RCNN-50-C4-split1-ft.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-C4.yaml" 2 | MODEL: 3 | WEIGHTS: "/scratch/hdd001/home/skhandel/FewshotDetection/WSASOD/models/1201195/best_model_final_weights.pth" 4 | META_ARCHITECTURE: "WeaklySupervisedRCNNNoMeta" 5 | MASK_ON: False 6 | FREEZE_LAYERS: 7 | ROI_HEADS: ['box_pooler', 'weak_box_head'] 8 | META_ARCH: ['backbone'] 9 | FAST_RCNN: ['weak_detector_head', 'cls_score_delta', 'bbox_pred_delta', 'embeddings'] 10 | BACKBONE: 11 | FREEZE_AT: 2 12 | NAME: 'build_resnet_backbone' 13 | RESNETS: 14 | DEPTH: 50 15 | ROI_HEADS: 16 | NAME: "WSROIHeadFineTune" 17 | MULTI_BOX_HEAD: True 18 | NUM_CLASSES: 80 19 | EMBEDDING_PATH: "/h/skhandel/FewshotDetection/WSASOD/data/embeddings/glove_mean" 20 | FAST_RCNN: 21 | NAME: "SupervisedDetectorOutputsFineTune" 22 | WEAK_DETECTOR: 23 | DETECTOR_TEMP: 2.0 24 | REGRESSION_BRANCH: False 25 | FINETUNE_TERMS: 26 | CLASSIFIER: ['lingual', 'visual'] 27 | BBOX: ['lingual', 'visual'] 28 | MASK: ['lingual', 'visual'] 29 | VISUAL_ATTENTION_HEAD: 30 | VISUAL_SIMILARITY_THRESHOLD: 0.02 31 | ROI_BOX_HEAD: 32 | NAME: 'Res5BoxHead' 33 | POOLER_RESOLUTION: 14 34 | POOLER_TYPE: "ROIAlignV2" 35 | PROPOSAL_GENERATOR: 36 | NAME: "WSRPN" 37 | INPUT: 38 | MIN_SIZE_TRAIN: (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800) 39 | MIN_SIZE_TEST: 800 40 | DATASETS: 41 | TRAIN: ('coco_fine_tuning_query_train',) 42 | TEST: ('coco_fine_tuning_query_val',) 43 | CLASSIFIER_TRAIN: ('coco_train_2014',) 44 | FEWSHOT: 45 | TYPE: 'COCO' 46 | NUM_SHOTS: 10 47 | SPLIT_ID: 1 48 | BASE_CLASSES_ID: [7, 9, 10, 11, 12, 13, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 59, 61, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79] 49 | NOVEL_CLASSES_ID: [0, 1, 2, 3, 4, 5, 6, 8, 14, 15, 16, 17, 18, 19, 39, 56, 57, 58, 60, 62] 50 | DATALOADER: 51 | NUM_WORKERS: 2 52 | SOLVER: 53 | STEPS: (800,) 54 | MAX_ITER: 1000 # 17.4 epochs 55 | WARMUP_ITERS: 0 56 | IMS_PER_BATCH: 8 57 | CHECKPOINT_PERIOD: 500 58 | BASE_LR: 0.001 59 | TEST: 60 | EVAL_PERIOD: 1000 61 | AUG: 62 | ENABLED: False -------------------------------------------------------------------------------- /configs/COCO/COCO-RCNN-50-C4-split1-note.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-C4.yaml" 2 | MODEL: 3 | PIXEL_MEAN: (0.5, 0.5, 0.5) #RGB 4 | PIXEL_STD: (0.5, 0.5, 0.5) #RGB 5 | WEIGHTS: "" 6 | META_ARCHITECTURE: "WeaklySupervisedRCNNNoMeta" 7 | MASK_ON: False 8 | BACKBONE: 9 | NAME: 'InceptionResNetV2' 10 | FREEZE_CONVS: 2 11 | RESNETS: 12 | DEPTH: 50 13 | ROI_HEADS: 14 | NAME: "WSROIHeadNoMeta" 15 | MULTI_BOX_HEAD: False 16 | NUM_CLASSES: 80 17 | EMBEDDING_PATH: "/h/skhandel/FewshotDetection/WSASOD/data/embeddings/glove_mean" 18 | IN_FEATURES: ['block17'] 19 | BATCH_SIZE_PER_IMAGE: 300 20 | VISUAL_ATTENTION_HEAD: 21 | IN_FEATURES: ['block17'] 22 | POOLER_RESOLUTION: 17 23 | FAST_RCNN: 24 | NAME: "SupervisedDetectorOutputsBase" 25 | WEAK_DETECTOR: 26 | DETECTOR_TEMP: 2.0 27 | REGRESSION_BRANCH: False 28 | RPN: 29 | IN_FEATURES: ['block17'] 30 | ROI_BOX_HEAD: 31 | NAME: 'InceptionResNetHead' 32 | POOLER_RESOLUTION: 17 33 | POOLER_TYPE: "ROIAlignV2" 34 | PROPOSAL_GENERATOR: 35 | NAME: "WSRPN" 36 | INPUT: 37 | FORMAT: "RGB" 38 | NORMALIZE_IMAGES: True 39 | # MIN_SIZE_TRAIN: (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800) 40 | MIN_SIZE_TRAIN: (600,) 41 | MIN_SIZE_TEST: 600 42 | DATASETS: 43 | TRAIN: ('coco_note_base_training_query_train',) 44 | TEST: ('coco_note_base_training_query_val',) 45 | CLASSIFIER_TRAIN: ('coco_train_2017',) 46 | FEWSHOT: 47 | TYPE: 'COCO_NOTE' 48 | NUM_SHOTS: 10 49 | SPLIT_ID: 1 50 | BASE_CLASSES_ID: [0, 1, 2, 3, 4, 5, 6, 8, 14, 15, 16, 17, 18, 19, 39, 56, 57, 58, 60, 62] 51 | NOVEL_CLASSES_ID: [7, 9, 10, 11, 12, 13, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 59, 61, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79] 52 | DATALOADER: 53 | NUM_WORKERS: 2 54 | SOLVER: 55 | STEPS: (210000, 250000) 56 | MAX_ITER: 270000 # 17.4 epochs 57 | IMS_PER_BATCH: 8 58 | CHECKPOINT_PERIOD: 500 59 | BASE_LR: 0.02 60 | REFINEMENT_LR_FACTOR: 1.0 61 | WARMUP_ITERS: 100 62 | TEST: 63 | EVAL_PERIOD: 20000 64 | AUG: 65 | ENABLED: False -------------------------------------------------------------------------------- /configs/COCO/COCO-RCNN-50-C4-split1-segm-ft.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-C4.yaml" 2 | MODEL: 3 | WEIGHTS: "/scratch/hdd001/home/skhandel/FewshotDetection/WSASOD/models/1201467/best_model_final_weights.pth" 4 | META_ARCHITECTURE: "WeaklySupervisedRCNNNoMeta" 5 | MASK_ON: True 6 | BACKBONE: 7 | FREEZE_AT: 2 8 | NAME: 'build_resnet_backbone' 9 | FREEZE_LAYERS: 10 | ROI_HEADS: ['box_pooler', 'weak_box_head'] 11 | META_ARCH: ['backbone'] 12 | FAST_RCNN: ['weak_detector_head', 'cls_score_delta', 'bbox_pred_delta', 'embeddings'] 13 | MASK_HEAD: ['deconv', 'deconv_relu', 'predictor'] 14 | RESNETS: 15 | DEPTH: 50 16 | ROI_HEADS: 17 | NAME: "WSROIHeadWithMaskFineTune" 18 | MULTI_BOX_HEAD: True 19 | NUM_CLASSES: 80 20 | EMBEDDING_PATH: "/h/skhandel/FewshotDetection/WSASOD/data/embeddings/glove_mean" 21 | FAST_RCNN: 22 | NAME: "SupervisedDetectorOutputsFineTune" 23 | WEAK_DETECTOR: 24 | DETECTOR_TEMP: 2.0 25 | REGRESSION_BRANCH: False 26 | FINETUNE_TERMS: 27 | CLASSIFIER: ['lingual', 'visual'] 28 | BBOX: ['lingual', 'visual'] 29 | MASK: ['lingual', 'visual'] 30 | VISUAL_ATTENTION_HEAD: 31 | VISUAL_SIMILARITY_THRESHOLD: 0.04 32 | ROI_BOX_HEAD: 33 | NAME: 'Res5BoxHeadWithMask' 34 | POOLER_RESOLUTION: 14 35 | POOLER_TYPE: "ROIAlignV2" 36 | ROI_MASK_HEAD: 37 | POOLER_TYPE: "None" 38 | NAME: "MaskRCNNConvUpsampleHeadWithFineTune" 39 | PROPOSAL_GENERATOR: 40 | NAME: "WSRPN" 41 | INPUT: 42 | MIN_SIZE_TRAIN: (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800) 43 | MIN_SIZE_TEST: 800 44 | DATASETS: 45 | TRAIN: ('coco_fine_tuning_query_train',) 46 | TEST: ('coco_fine_tuning_query_val',) 47 | CLASSIFIER_TRAIN: ('coco_train_2014',) 48 | FEWSHOT: 49 | TYPE: 'COCO' 50 | NUM_SHOTS: 5 51 | SPLIT_ID: 1 52 | BASE_CLASSES_ID: [7, 9, 10, 11, 12, 13, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 59, 61, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79] 53 | NOVEL_CLASSES_ID: [0, 1, 2, 3, 4, 5, 6, 8, 14, 15, 16, 17, 18, 19, 39, 56, 57, 58, 60, 62] 54 | DATALOADER: 55 | NUM_WORKERS: 2 56 | SOLVER: 57 | STEPS: (800,) 58 | MAX_ITER: 1000 # 17.4 epochs 59 | WARMUP_ITERS: 0 60 | IMS_PER_BATCH: 8 61 | CHECKPOINT_PERIOD: 500 62 | BASE_LR: 0.001 63 | TEST: 64 | EVAL_PERIOD: 1000 65 | AUG: 66 | ENABLED: False -------------------------------------------------------------------------------- /configs/COCO/COCO-RCNN-50-C4-split1-segm.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-C4.yaml" 2 | MODEL: 3 | WEIGHTS: "../models/resnet_50_MSRA_C4.pth" 4 | META_ARCHITECTURE: "WeaklySupervisedRCNNNoMeta" 5 | MASK_ON: True 6 | BACKBONE: 7 | FREEZE_AT: 2 8 | NAME: 'build_resnet_backbone' 9 | RESNETS: 10 | DEPTH: 50 11 | ROI_HEADS: 12 | NAME: "WSROIHeadNoMetaWithMask" 13 | MULTI_BOX_HEAD: False 14 | NUM_CLASSES: 80 15 | EMBEDDING_PATH: "/h/skhandel/FewshotDetection/WSASOD/data/embeddings/glove_mean" 16 | FAST_RCNN: 17 | NAME: "SupervisedDetectorOutputsBase" 18 | WEAK_DETECTOR: 19 | DETECTOR_TEMP: 2.0 20 | REGRESSION_BRANCH: False 21 | ROI_BOX_HEAD: 22 | NAME: 'Res5BoxHeadWithMask' 23 | POOLER_RESOLUTION: 14 24 | POOLER_TYPE: "ROIAlignV2" 25 | ROI_MASK_HEAD: 26 | POOLER_TYPE: "None" 27 | NAME: "MaskRCNNConvUpsampleHeadWithSimilarity" 28 | PROPOSAL_GENERATOR: 29 | NAME: "WSRPN" 30 | INPUT: 31 | MIN_SIZE_TRAIN: (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800) 32 | MIN_SIZE_TEST: 800 33 | DATASETS: 34 | TRAIN: ('coco_base_training_query_train',) 35 | TEST: ('coco_base_training_query_val',) 36 | CLASSIFIER_TRAIN: ('coco_train_2014',) 37 | FEWSHOT: 38 | TYPE: 'COCO' 39 | NUM_SHOTS: 10 40 | SPLIT_ID: 1 41 | BASE_CLASSES_ID: [7, 9, 10, 11, 12, 13, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 59, 61, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79] 42 | NOVEL_CLASSES_ID: [0, 1, 2, 3, 4, 5, 6, 8, 14, 15, 16, 17, 18, 19, 39, 56, 57, 58, 60, 62] 43 | DATALOADER: 44 | NUM_WORKERS: 2 45 | SOLVER: 46 | STEPS: (210000, 250000) 47 | MAX_ITER: 270000 # 17.4 epochs 48 | IMS_PER_BATCH: 8 49 | CHECKPOINT_PERIOD: 500 50 | BASE_LR: 0.02 51 | REFINEMENT_LR_FACTOR: 1.0 52 | WARMUP_ITERS: 100 53 | TEST: 54 | EVAL_PERIOD: 20000 55 | AUG: 56 | ENABLED: False -------------------------------------------------------------------------------- /configs/COCO/COCO-RCNN-50-C4-split1.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-C4.yaml" 2 | MODEL: 3 | WEIGHTS: "../models/resnet_50_MSRA_C4.pth" 4 | META_ARCHITECTURE: "WeaklySupervisedRCNNNoMeta" 5 | MASK_ON: False 6 | BACKBONE: 7 | FREEZE_AT: 2 8 | NAME: 'build_resnet_backbone' 9 | RESNETS: 10 | DEPTH: 50 11 | ROI_HEADS: 12 | NAME: "WSROIHeadNoMeta" 13 | MULTI_BOX_HEAD: False 14 | NUM_CLASSES: 80 15 | EMBEDDING_PATH: "/h/skhandel/FewshotDetection/WSASOD/data/embeddings/glove_mean" 16 | FAST_RCNN: 17 | NAME: "SupervisedDetectorOutputsBase" 18 | WEAK_DETECTOR: 19 | DETECTOR_TEMP: 2.0 20 | REGRESSION_BRANCH: False 21 | ROI_BOX_HEAD: 22 | NAME: 'Res5BoxHead' 23 | POOLER_RESOLUTION: 14 24 | POOLER_TYPE: "ROIAlignV2" 25 | PROPOSAL_GENERATOR: 26 | NAME: "WSRPN" 27 | INPUT: 28 | MIN_SIZE_TRAIN: (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800) 29 | MIN_SIZE_TEST: 800 30 | DATASETS: 31 | TRAIN: ('coco_base_training_query_train',) 32 | TEST: ('coco_base_training_query_val',) 33 | CLASSIFIER_TRAIN: ('coco_train_2014',) 34 | FEWSHOT: 35 | TYPE: 'COCO' 36 | NUM_SHOTS: 10 37 | SPLIT_ID: 1 38 | BASE_CLASSES_ID: [7, 9, 10, 11, 12, 13, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 59, 61, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79] 39 | NOVEL_CLASSES_ID: [0, 1, 2, 3, 4, 5, 6, 8, 14, 15, 16, 17, 18, 19, 39, 56, 57, 58, 60, 62] 40 | DATALOADER: 41 | NUM_WORKERS: 2 42 | SOLVER: 43 | STEPS: (210000, 250000) 44 | MAX_ITER: 270000 # 17.4 epochs 45 | IMS_PER_BATCH: 8 46 | CHECKPOINT_PERIOD: 500 47 | BASE_LR: 0.02 48 | REFINEMENT_LR_FACTOR: 1.0 49 | WARMUP_ITERS: 100 50 | TEST: 51 | EVAL_PERIOD: 20000 52 | AUG: 53 | ENABLED: False -------------------------------------------------------------------------------- /configs/COCO/COCO-VGG-CNN-F-split1-dock.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-C4.yaml" 2 | MODEL: 3 | WEIGHTS: "" 4 | PIXEL_MEAN: (102.716995, 115.77262, 123.50935) #BGR 5 | PIXEL_STD: (1.0, 1.0, 1.0) #BGR 6 | WEIGHTS: "" 7 | META_ARCHITECTURE: "WeaklySupervisedRCNNNoMeta" 8 | MASK_ON: False 9 | LOAD_PROPOSALS: True 10 | BACKBONE: 11 | NAME: 'VGG_CNN_F' 12 | FREEZE_CONVS: 0 13 | RESNETS: 14 | DEPTH: 50 15 | ROI_HEADS: 16 | NAME: "WSROIHeadNoMeta" 17 | MULTI_BOX_HEAD: False 18 | NUM_CLASSES: 80 19 | EMBEDDING_PATH: "/h/skhandel/FewshotDetection/WSASOD/data/embeddings/glove_mean" 20 | IN_FEATURES: ['vgg_conv'] 21 | VISUAL_ATTENTION_HEAD: 22 | IN_FEATURES: ['vgg_conv'] 23 | POOLER_RESOLUTION: 6 24 | FAST_RCNN: 25 | NAME: "SupervisedDetectorOutputsBase" 26 | WEAK_DETECTOR: 27 | DETECTOR_TEMP: 2.0 28 | REGRESSION_BRANCH: False 29 | RPN: 30 | IN_FEATURES: ['vgg_conv'] 31 | ROI_BOX_HEAD: 32 | NAME: 'VGGCNNFBoxHead' 33 | POOLER_RESOLUTION: 6 34 | POOLER_TYPE: "ROIPool" 35 | PROPOSAL_GENERATOR: 36 | NAME: "PrecomputedProposals" 37 | INPUT: 38 | FORMAT: "BGR" 39 | MIN_SIZE_TRAIN: (480, 576, 688, 864, 1200) 40 | MAX_SIZE_TRAIN: 2000 41 | MIN_SIZE_TRAIN_SAMPLING: "choice" 42 | MIN_SIZE_TEST: 800 43 | DATASETS: 44 | TRAIN: ('coco_dock_base_training_query_train',) 45 | TEST: ('coco_dock_base_training_query_val',) 46 | CLASSIFIER_TRAIN: ('coco_train_2014',) 47 | PROPOSAL_FILES_TRAIN: ('/h/skhandel/FewshotDetection/WSASOD/data/data_utils/data/coco_proposals/train2014_coco_processed.pkl',) 48 | PROPOSAL_FILES_CLASSIFIER_TRAIN: ('/h/skhandel/FewshotDetection/WSASOD/data/data_utils/data/coco_proposals/train2014_coco_processed.pkl',) 49 | PRECOMPUTED_PROPOSAL_TOPK_TRAIN: 5000 50 | PROPOSAL_FILES_TEST: ('/h/skhandel/FewshotDetection/WSASOD/data/data_utils/data/coco_proposals/val2014_coco_processed.pkl',) 51 | PRECOMPUTED_PROPOSAL_TOPK_TEST: 5000 52 | FEWSHOT: 53 | TYPE: 'COCO_DOCK' 54 | NUM_SHOTS: 10 55 | SPLIT_ID: 1 56 | BASE_CLASSES_ID: [0, 1, 2, 3, 4, 5, 6, 8, 14, 15, 16, 17, 18, 19, 39, 56, 57, 58, 60, 62] 57 | NOVEL_CLASSES_ID: [7, 9, 10, 11, 12, 13, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 59, 61, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79] 58 | DATALOADER: 59 | NUM_WORKERS: 1 60 | SOLVER: 61 | BASE_LR: 1e-4 62 | REFINEMENT_LR_FACTOR: 10.0 63 | MOMENTUM: 0.9 64 | LR_SCHEDULER_NAME: "WarmupMultiStepLR" 65 | STEPS: (210000, 250000) 66 | MAX_ITER: 270000 # 17.4 epochs 67 | IMS_PER_BATCH: 8 68 | CHECKPOINT_PERIOD: 500 69 | TEST: 70 | EVAL_PERIOD: 20000 71 | AUG: 72 | ENABLED: False 73 | MIN_SIZES: (480, 576, 688, 864, 1200) 74 | MAX_SIZE: 2000 75 | FLIP: True -------------------------------------------------------------------------------- /configs/VOC/FT/10_shot/VOC-RCNN-101-C4-split1-ft.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../../../Base-RCNN-C4.yaml" 2 | MODEL: 3 | WEIGHTS: "/scratch/hdd001/home/skhandel/FewshotDetection/WSASOD/models/1198063/best_model_final_weights.pth" 4 | META_ARCHITECTURE: "WeaklySupervisedRCNNNoMeta" 5 | MASK_ON: False 6 | FREEZE_LAYERS: 7 | ROI_HEADS: ['box_pooler', 'box_head', 'weak_box_head'] 8 | META_ARCH: ['backbone'] 9 | FAST_RCNN: ['weak_detector_head', 'cls_score_delta', 'bbox_pred_delta', 'embeddings'] 10 | BACKBONE: 11 | FREEZE_AT: 2 12 | NAME: 'build_resnet_backbone' 13 | RESNETS: 14 | DEPTH: 101 15 | ROI_HEADS: 16 | NAME: "WSROIHeadFineTune" 17 | MULTI_BOX_HEAD: True 18 | NUM_CLASSES: 20 19 | EMBEDDING_PATH: "/h/skhandel/FewshotDetection/WSASOD/data/embeddings/glove_mean" 20 | FAST_RCNN: 21 | NAME: "SupervisedDetectorOutputsFineTune" 22 | WEAK_DETECTOR: 23 | DETECTOR_TEMP: 2.0 24 | REGRESSION_BRANCH: False 25 | FINETUNE_TERMS: 26 | CLASSIFIER: ['lingual', 'visual'] 27 | BBOX: ['lingual', 'visual'] 28 | MASK: ['lingual', 'visual'] 29 | VISUAL_ATTENTION_HEAD: 30 | VISUAL_SIMILARITY_THRESHOLD: 0.02 31 | ROI_BOX_HEAD: 32 | NAME: 'Res5BoxHead' 33 | POOLER_RESOLUTION: 14 34 | POOLER_TYPE: "ROIAlignV2" 35 | PROPOSAL_GENERATOR: 36 | NAME: "WSRPN" 37 | INPUT: 38 | MIN_SIZE_TRAIN: (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800) 39 | MIN_SIZE_TEST: 800 40 | DATASETS: 41 | TRAIN: ('voc_fine_tuning_query_train',) 42 | TEST: ('voc_fine_tuning_query_val', ) 43 | CLASSIFIER_TRAIN: ('pascal_trainval_2007', 'pascal_trainval_2012',) 44 | FEWSHOT: 45 | TYPE: 'VOC' 46 | NUM_SHOTS: 10 47 | SPLIT_ID: 1 48 | BASE_CLASSES_ID: [0, 1, 3, 4, 6, 7, 8, 10, 11, 12, 14, 15, 16, 18, 19] 49 | NOVEL_CLASSES_ID: [2, 5, 9, 13, 17] 50 | DATALOADER: 51 | NUM_WORKERS: 2 52 | SOLVER: 53 | STEPS: (500,) 54 | MAX_ITER: 500 # 17.4 epochs 55 | WARMUP_ITERS: 0 56 | IMS_PER_BATCH: 8 57 | CHECKPOINT_PERIOD: 50 58 | REFINEMENT_LR_FACTOR: 1.0 59 | BASE_LR: 0.001 60 | TEST: 61 | EVAL_PERIOD: 500 62 | AUG: 63 | ENABLED: False -------------------------------------------------------------------------------- /configs/VOC/FT/10_shot/VOC-RCNN-101-C4-split2-ft.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../../../Base-RCNN-C4.yaml" 2 | MODEL: 3 | WEIGHTS: "/scratch/hdd001/home/skhandel/FewshotDetection/WSASOD/models/1198012/best_model_final_weights.pth" 4 | META_ARCHITECTURE: "WeaklySupervisedRCNNNoMeta" 5 | MASK_ON: False 6 | FREEZE_LAYERS: 7 | ROI_HEADS: ['box_pooler', 'weak_box_head'] 8 | META_ARCH: ['backbone', 'proposal_generator'] 9 | FAST_RCNN: ['weak_detector_head', 'cls_score_delta', 'bbox_pred_delta', 'embeddings'] 10 | BACKBONE: 11 | FREEZE_AT: 2 12 | NAME: 'build_resnet_backbone' 13 | RESNETS: 14 | DEPTH: 101 15 | ROI_HEADS: 16 | NAME: "WSROIHeadFineTune" 17 | MULTI_BOX_HEAD: True 18 | NUM_CLASSES: 20 19 | EMBEDDING_PATH: "/h/skhandel/FewshotDetection/WSASOD/data/embeddings/glove_mean" 20 | FAST_RCNN: 21 | NAME: "SupervisedDetectorOutputsFineTune" 22 | WEAK_DETECTOR: 23 | DETECTOR_TEMP: 2.0 24 | REGRESSION_BRANCH: False 25 | FINETUNE_TERMS: 26 | CLASSIFIER: ['lingual', 'visual'] 27 | BBOX: ['lingual', 'visual'] 28 | MASK: ['lingual', 'visual'] 29 | VISUAL_ATTENTION_HEAD: 30 | VISUAL_SIMILARITY_THRESHOLD: 0.02 31 | ROI_BOX_HEAD: 32 | NAME: 'Res5BoxHead' 33 | POOLER_RESOLUTION: 14 34 | POOLER_TYPE: "ROIAlignV2" 35 | PROPOSAL_GENERATOR: 36 | NAME: "WSRPN" 37 | INPUT: 38 | MIN_SIZE_TRAIN: (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800) 39 | MIN_SIZE_TEST: 800 40 | DATASETS: 41 | TRAIN: ('voc_fine_tuning_query_train',) 42 | TEST: ('voc_fine_tuning_query_val', ) 43 | CLASSIFIER_TRAIN: ('pascal_trainval_2007', 'pascal_trainval_2012',) 44 | FEWSHOT: 45 | TYPE: 'VOC' 46 | NUM_SHOTS: 10 47 | SPLIT_ID: 2 48 | BASE_CLASSES_ID: [1, 2, 3, 5, 6, 7, 8, 10, 11, 13, 14, 15, 16, 18, 19] 49 | NOVEL_CLASSES_ID: [0, 4, 9, 12, 17] 50 | DATALOADER: 51 | NUM_WORKERS: 2 52 | SOLVER: 53 | STEPS: (500,) 54 | MAX_ITER: 500 # 17.4 epochs 55 | WARMUP_ITERS: 0 56 | IMS_PER_BATCH: 8 57 | CHECKPOINT_PERIOD: 50 58 | REFINEMENT_LR_FACTOR: 1.0 59 | BASE_LR: 0.001 60 | TEST: 61 | EVAL_PERIOD: 500 62 | AUG: 63 | ENABLED: False -------------------------------------------------------------------------------- /configs/VOC/FT/10_shot/VOC-RCNN-101-C4-split3-ft.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../../../Base-RCNN-C4.yaml" 2 | MODEL: 3 | WEIGHTS: "/scratch/hdd001/home/skhandel/FewshotDetection/WSASOD/models/1198019/best_model_final_weights.pth" 4 | META_ARCHITECTURE: "WeaklySupervisedRCNNNoMeta" 5 | MASK_ON: False 6 | FREEZE_LAYERS: 7 | ROI_HEADS: ['box_pooler', 'weak_box_head'] 8 | META_ARCH: ['backbone', 'proposal_generator'] 9 | FAST_RCNN: ['weak_detector_head', 'cls_score_delta', 'bbox_pred_delta', 'embeddings'] 10 | BACKBONE: 11 | FREEZE_AT: 2 12 | NAME: 'build_resnet_backbone' 13 | RESNETS: 14 | DEPTH: 101 15 | ROI_HEADS: 16 | NAME: "WSROIHeadFineTune" 17 | MULTI_BOX_HEAD: True 18 | NUM_CLASSES: 20 19 | EMBEDDING_PATH: "/h/skhandel/FewshotDetection/WSASOD/data/embeddings/glove_mean" 20 | FAST_RCNN: 21 | NAME: "SupervisedDetectorOutputsFineTune" 22 | WEAK_DETECTOR: 23 | DETECTOR_TEMP: 2.0 24 | REGRESSION_BRANCH: False 25 | FINETUNE_TERMS: 26 | CLASSIFIER: ['lingual', 'visual'] 27 | BBOX: ['lingual', 'visual'] 28 | MASK: ['lingual', 'visual'] 29 | VISUAL_ATTENTION_HEAD: 30 | VISUAL_SIMILARITY_THRESHOLD: 0.02 31 | ROI_BOX_HEAD: 32 | NAME: 'Res5BoxHead' 33 | POOLER_RESOLUTION: 14 34 | POOLER_TYPE: "ROIAlignV2" 35 | PROPOSAL_GENERATOR: 36 | NAME: "WSRPN" 37 | INPUT: 38 | MIN_SIZE_TRAIN: (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800) 39 | MIN_SIZE_TEST: 800 40 | DATASETS: 41 | TRAIN: ('voc_fine_tuning_query_train',) 42 | TEST: ('voc_fine_tuning_query_val', ) 43 | CLASSIFIER_TRAIN: ('pascal_trainval_2007', 'pascal_trainval_2012',) 44 | FEWSHOT: 45 | TYPE: 'VOC' 46 | NUM_SHOTS: 10 47 | SPLIT_ID: 3 48 | BASE_CLASSES_ID: [0, 1, 2, 4, 5, 6, 8, 9, 10, 11, 12, 14, 15, 18, 19] 49 | NOVEL_CLASSES_ID: [3, 7, 13, 16, 17] 50 | DATALOADER: 51 | NUM_WORKERS: 2 52 | SOLVER: 53 | STEPS: (500,) 54 | MAX_ITER: 500 # 17.4 epochs 55 | WARMUP_ITERS: 0 56 | IMS_PER_BATCH: 8 57 | CHECKPOINT_PERIOD: 50 58 | REFINEMENT_LR_FACTOR: 1.0 59 | BASE_LR: 0.001 60 | TEST: 61 | EVAL_PERIOD: 500 62 | AUG: 63 | ENABLED: False -------------------------------------------------------------------------------- /configs/VOC/FT/1_shot/VOC-RCNN-101-C4-split1-ft.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../../../Base-RCNN-C4.yaml" 2 | MODEL: 3 | WEIGHTS: "/scratch/hdd001/home/skhandel/FewshotDetection/WSASOD/models/1198063/best_model_final_weights.pth" 4 | META_ARCHITECTURE: "WeaklySupervisedRCNNNoMeta" 5 | MASK_ON: False 6 | FREEZE_LAYERS: 7 | ROI_HEADS: ['box_pooler', 'box_head', 'weak_box_head'] 8 | META_ARCH: ['backbone', 'proposal_generator'] 9 | FAST_RCNN: ['weak_detector_head', 'cls_score_delta', 'bbox_pred_delta', 'embeddings'] 10 | BACKBONE: 11 | FREEZE_AT: 2 12 | NAME: 'build_resnet_backbone' 13 | RESNETS: 14 | DEPTH: 101 15 | ROI_HEADS: 16 | NAME: "WSROIHeadFineTune" 17 | MULTI_BOX_HEAD: True 18 | NUM_CLASSES: 20 19 | EMBEDDING_PATH: "/h/skhandel/FewshotDetection/WSASOD/data/embeddings/glove_mean" 20 | FAST_RCNN: 21 | NAME: "SupervisedDetectorOutputsFineTune" 22 | WEAK_DETECTOR: 23 | DETECTOR_TEMP: 2.0 24 | REGRESSION_BRANCH: False 25 | FINETUNE_TERMS: 26 | CLASSIFIER: ['lingual', 'visual'] 27 | BBOX: ['lingual', 'visual'] 28 | MASK: ['lingual', 'visual'] 29 | VISUAL_ATTENTION_HEAD: 30 | VISUAL_SIMILARITY_THRESHOLD: 0.02 31 | ROI_BOX_HEAD: 32 | NAME: 'Res5BoxHead' 33 | POOLER_RESOLUTION: 14 34 | POOLER_TYPE: "ROIAlignV2" 35 | PROPOSAL_GENERATOR: 36 | NAME: "WSRPN" 37 | INPUT: 38 | MIN_SIZE_TRAIN: (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800) 39 | MIN_SIZE_TEST: 800 40 | DATASETS: 41 | TRAIN: ('voc_fine_tuning_query_train',) 42 | TEST: ('voc_fine_tuning_query_val', ) 43 | CLASSIFIER_TRAIN: ('pascal_trainval_2007', 'pascal_trainval_2012',) 44 | FEWSHOT: 45 | TYPE: 'VOC' 46 | NUM_SHOTS: 1 47 | SPLIT_ID: 1 48 | BASE_CLASSES_ID: [0, 1, 3, 4, 6, 7, 8, 10, 11, 12, 14, 15, 16, 18, 19] 49 | NOVEL_CLASSES_ID: [2, 5, 9, 13, 17] 50 | DATALOADER: 51 | NUM_WORKERS: 2 52 | SOLVER: 53 | STEPS: (50,) 54 | MAX_ITER: 50 # 17.4 epochs 55 | WARMUP_ITERS: 0 56 | IMS_PER_BATCH: 8 57 | CHECKPOINT_PERIOD: 50 58 | REFINEMENT_LR_FACTOR: 1.0 59 | BASE_LR: 0.001 60 | TEST: 61 | EVAL_PERIOD: 50 62 | AUG: 63 | ENABLED: False -------------------------------------------------------------------------------- /configs/VOC/FT/1_shot/VOC-RCNN-101-C4-split2-ft.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../../../Base-RCNN-C4.yaml" 2 | MODEL: 3 | WEIGHTS: "/scratch/hdd001/home/skhandel/FewshotDetection/WSASOD/models/1198012/best_model_final_weights.pth" 4 | META_ARCHITECTURE: "WeaklySupervisedRCNNNoMeta" 5 | MASK_ON: False 6 | FREEZE_LAYERS: 7 | ROI_HEADS: ['box_pooler', 'box_head', 'weak_box_head'] 8 | META_ARCH: ['backbone', 'proposal_generator'] 9 | FAST_RCNN: ['weak_detector_head', 'cls_score_delta', 'bbox_pred_delta', 'embeddings'] 10 | BACKBONE: 11 | FREEZE_AT: 2 12 | NAME: 'build_resnet_backbone' 13 | RESNETS: 14 | DEPTH: 101 15 | ROI_HEADS: 16 | NAME: "WSROIHeadFineTune" 17 | MULTI_BOX_HEAD: True 18 | NUM_CLASSES: 20 19 | EMBEDDING_PATH: "/h/skhandel/FewshotDetection/WSASOD/data/embeddings/glove_mean" 20 | FAST_RCNN: 21 | NAME: "SupervisedDetectorOutputsFineTune" 22 | WEAK_DETECTOR: 23 | DETECTOR_TEMP: 2.0 24 | REGRESSION_BRANCH: False 25 | FINETUNE_TERMS: 26 | CLASSIFIER: ['lingual', 'visual'] 27 | BBOX: ['lingual', 'visual'] 28 | MASK: ['lingual', 'visual'] 29 | VISUAL_ATTENTION_HEAD: 30 | VISUAL_SIMILARITY_THRESHOLD: 0.02 31 | ROI_BOX_HEAD: 32 | NAME: 'Res5BoxHead' 33 | POOLER_RESOLUTION: 14 34 | POOLER_TYPE: "ROIAlignV2" 35 | PROPOSAL_GENERATOR: 36 | NAME: "WSRPN" 37 | INPUT: 38 | MIN_SIZE_TRAIN: (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800) 39 | MIN_SIZE_TEST: 800 40 | DATASETS: 41 | TRAIN: ('voc_fine_tuning_query_train',) 42 | TEST: ('voc_fine_tuning_query_val', ) 43 | CLASSIFIER_TRAIN: ('pascal_trainval_2007', 'pascal_trainval_2012',) 44 | FEWSHOT: 45 | TYPE: 'VOC' 46 | NUM_SHOTS: 1 47 | SPLIT_ID: 2 48 | BASE_CLASSES_ID: [1, 2, 3, 5, 6, 7, 8, 10, 11, 13, 14, 15, 16, 18, 19] 49 | NOVEL_CLASSES_ID: [0, 4, 9, 12, 17] 50 | DATALOADER: 51 | NUM_WORKERS: 2 52 | SOLVER: 53 | STEPS: (50,) 54 | MAX_ITER: 50 # 17.4 epochs 55 | WARMUP_ITERS: 0 56 | IMS_PER_BATCH: 8 57 | CHECKPOINT_PERIOD: 50 58 | REFINEMENT_LR_FACTOR: 1.0 59 | BASE_LR: 0.001 60 | TEST: 61 | EVAL_PERIOD: 50 62 | AUG: 63 | ENABLED: False -------------------------------------------------------------------------------- /configs/VOC/FT/1_shot/VOC-RCNN-101-C4-split3-ft.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../../../Base-RCNN-C4.yaml" 2 | MODEL: 3 | WEIGHTS: "/scratch/hdd001/home/skhandel/FewshotDetection/WSASOD/models/1198019/best_model_final_weights.pth" 4 | META_ARCHITECTURE: "WeaklySupervisedRCNNNoMeta" 5 | MASK_ON: False 6 | FREEZE_LAYERS: 7 | ROI_HEADS: ['box_pooler', 'box_head', 'weak_box_head'] 8 | META_ARCH: ['backbone', 'proposal_generator'] 9 | FAST_RCNN: ['weak_detector_head', 'cls_score_delta', 'bbox_pred_delta', 'embeddings'] 10 | BACKBONE: 11 | FREEZE_AT: 2 12 | NAME: 'build_resnet_backbone' 13 | RESNETS: 14 | DEPTH: 101 15 | ROI_HEADS: 16 | NAME: "WSROIHeadFineTune" 17 | MULTI_BOX_HEAD: True 18 | NUM_CLASSES: 20 19 | EMBEDDING_PATH: "/h/skhandel/FewshotDetection/WSASOD/data/embeddings/glove_mean" 20 | FAST_RCNN: 21 | NAME: "SupervisedDetectorOutputsFineTune" 22 | WEAK_DETECTOR: 23 | DETECTOR_TEMP: 2.0 24 | REGRESSION_BRANCH: False 25 | FINETUNE_TERMS: 26 | CLASSIFIER: ['lingual', 'visual'] 27 | BBOX: ['lingual', 'visual'] 28 | MASK: ['lingual', 'visual'] 29 | VISUAL_ATTENTION_HEAD: 30 | VISUAL_SIMILARITY_THRESHOLD: 0.02 31 | ROI_BOX_HEAD: 32 | NAME: 'Res5BoxHead' 33 | POOLER_RESOLUTION: 14 34 | POOLER_TYPE: "ROIAlignV2" 35 | PROPOSAL_GENERATOR: 36 | NAME: "WSRPN" 37 | INPUT: 38 | MIN_SIZE_TRAIN: (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800) 39 | MIN_SIZE_TEST: 800 40 | DATASETS: 41 | TRAIN: ('voc_fine_tuning_query_train',) 42 | TEST: ('voc_fine_tuning_query_val', ) 43 | CLASSIFIER_TRAIN: ('pascal_trainval_2007', 'pascal_trainval_2012',) 44 | FEWSHOT: 45 | TYPE: 'VOC' 46 | NUM_SHOTS: 1 47 | SPLIT_ID: 3 48 | BASE_CLASSES_ID: [0, 1, 2, 4, 5, 6, 8, 9, 10, 11, 12, 14, 15, 18, 19] 49 | NOVEL_CLASSES_ID: [3, 7, 13, 16, 17] 50 | DATALOADER: 51 | NUM_WORKERS: 2 52 | SOLVER: 53 | STEPS: (50,) 54 | MAX_ITER: 50 # 17.4 epochs 55 | WARMUP_ITERS: 0 56 | IMS_PER_BATCH: 8 57 | CHECKPOINT_PERIOD: 50 58 | REFINEMENT_LR_FACTOR: 1.0 59 | BASE_LR: 0.001 60 | TEST: 61 | EVAL_PERIOD: 50 62 | AUG: 63 | ENABLED: False -------------------------------------------------------------------------------- /configs/VOC/FT/2_shot/VOC-RCNN-101-C4-split1-ft.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../../../Base-RCNN-C4.yaml" 2 | MODEL: 3 | WEIGHTS: "/scratch/hdd001/home/skhandel/FewshotDetection/WSASOD/models/1198063/best_model_final_weights.pth" 4 | META_ARCHITECTURE: "WeaklySupervisedRCNNNoMeta" 5 | MASK_ON: False 6 | FREEZE_LAYERS: 7 | ROI_HEADS: ['box_pooler', 'box_head', 'weak_box_head'] 8 | META_ARCH: ['backbone', 'proposal_generator'] 9 | FAST_RCNN: ['weak_detector_head', 'cls_score_delta', 'bbox_pred_delta', 'embeddings'] 10 | BACKBONE: 11 | FREEZE_AT: 2 12 | NAME: 'build_resnet_backbone' 13 | RESNETS: 14 | DEPTH: 101 15 | ROI_HEADS: 16 | NAME: "WSROIHeadFineTune" 17 | MULTI_BOX_HEAD: True 18 | NUM_CLASSES: 20 19 | EMBEDDING_PATH: "/h/skhandel/FewshotDetection/WSASOD/data/embeddings/glove_mean" 20 | FAST_RCNN: 21 | NAME: "SupervisedDetectorOutputsFineTune" 22 | WEAK_DETECTOR: 23 | DETECTOR_TEMP: 2.0 24 | REGRESSION_BRANCH: False 25 | FINETUNE_TERMS: 26 | CLASSIFIER: ['lingual', 'visual'] 27 | BBOX: ['lingual', 'visual'] 28 | MASK: ['lingual', 'visual'] 29 | VISUAL_ATTENTION_HEAD: 30 | VISUAL_SIMILARITY_THRESHOLD: 0.02 31 | ROI_BOX_HEAD: 32 | NAME: 'Res5BoxHead' 33 | POOLER_RESOLUTION: 14 34 | POOLER_TYPE: "ROIAlignV2" 35 | PROPOSAL_GENERATOR: 36 | NAME: "WSRPN" 37 | INPUT: 38 | MIN_SIZE_TRAIN: (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800) 39 | MIN_SIZE_TEST: 800 40 | DATASETS: 41 | TRAIN: ('voc_fine_tuning_query_train',) 42 | TEST: ('voc_fine_tuning_query_val', ) 43 | CLASSIFIER_TRAIN: ('pascal_trainval_2007', 'pascal_trainval_2012',) 44 | FEWSHOT: 45 | TYPE: 'VOC' 46 | NUM_SHOTS: 2 47 | SPLIT_ID: 1 48 | BASE_CLASSES_ID: [0, 1, 3, 4, 6, 7, 8, 10, 11, 12, 14, 15, 16, 18, 19] 49 | NOVEL_CLASSES_ID: [2, 5, 9, 13, 17] 50 | DATALOADER: 51 | NUM_WORKERS: 2 52 | SOLVER: 53 | STEPS: (100,) 54 | MAX_ITER: 100 # 17.4 epochs 55 | WARMUP_ITERS: 0 56 | IMS_PER_BATCH: 8 57 | CHECKPOINT_PERIOD: 50 58 | REFINEMENT_LR_FACTOR: 1.0 59 | BASE_LR: 0.001 60 | TEST: 61 | EVAL_PERIOD: 100 62 | AUG: 63 | ENABLED: False -------------------------------------------------------------------------------- /configs/VOC/FT/2_shot/VOC-RCNN-101-C4-split2-ft.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../../../Base-RCNN-C4.yaml" 2 | MODEL: 3 | WEIGHTS: "/scratch/hdd001/home/skhandel/FewshotDetection/WSASOD/models/1198012/best_model_final_weights.pth" 4 | META_ARCHITECTURE: "WeaklySupervisedRCNNNoMeta" 5 | MASK_ON: False 6 | FREEZE_LAYERS: 7 | ROI_HEADS: ['box_pooler', 'box_head', 'weak_box_head'] 8 | META_ARCH: ['backbone', 'proposal_generator'] 9 | FAST_RCNN: ['weak_detector_head', 'cls_score_delta', 'bbox_pred_delta', 'embeddings'] 10 | BACKBONE: 11 | FREEZE_AT: 2 12 | NAME: 'build_resnet_backbone' 13 | RESNETS: 14 | DEPTH: 101 15 | ROI_HEADS: 16 | NAME: "WSROIHeadFineTune" 17 | MULTI_BOX_HEAD: True 18 | NUM_CLASSES: 20 19 | EMBEDDING_PATH: "/h/skhandel/FewshotDetection/WSASOD/data/embeddings/glove_mean" 20 | FAST_RCNN: 21 | NAME: "SupervisedDetectorOutputsFineTune" 22 | WEAK_DETECTOR: 23 | DETECTOR_TEMP: 2.0 24 | REGRESSION_BRANCH: False 25 | FINETUNE_TERMS: 26 | CLASSIFIER: ['lingual', 'visual'] 27 | BBOX: ['lingual', 'visual'] 28 | MASK: ['lingual', 'visual'] 29 | VISUAL_ATTENTION_HEAD: 30 | VISUAL_SIMILARITY_THRESHOLD: 0.02 31 | ROI_BOX_HEAD: 32 | NAME: 'Res5BoxHead' 33 | POOLER_RESOLUTION: 14 34 | POOLER_TYPE: "ROIAlignV2" 35 | PROPOSAL_GENERATOR: 36 | NAME: "WSRPN" 37 | INPUT: 38 | MIN_SIZE_TRAIN: (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800) 39 | MIN_SIZE_TEST: 800 40 | DATASETS: 41 | TRAIN: ('voc_fine_tuning_query_train',) 42 | TEST: ('voc_fine_tuning_query_val', ) 43 | CLASSIFIER_TRAIN: ('pascal_trainval_2007', 'pascal_trainval_2012',) 44 | FEWSHOT: 45 | TYPE: 'VOC' 46 | NUM_SHOTS: 2 47 | SPLIT_ID: 2 48 | BASE_CLASSES_ID: [1, 2, 3, 5, 6, 7, 8, 10, 11, 13, 14, 15, 16, 18, 19] 49 | NOVEL_CLASSES_ID: [0, 4, 9, 12, 17] 50 | DATALOADER: 51 | NUM_WORKERS: 2 52 | SOLVER: 53 | STEPS: (100,) 54 | MAX_ITER: 100 # 17.4 epochs 55 | WARMUP_ITERS: 0 56 | IMS_PER_BATCH: 8 57 | CHECKPOINT_PERIOD: 50 58 | REFINEMENT_LR_FACTOR: 1.0 59 | BASE_LR: 0.001 60 | TEST: 61 | EVAL_PERIOD: 100 62 | AUG: 63 | ENABLED: False -------------------------------------------------------------------------------- /configs/VOC/FT/2_shot/VOC-RCNN-101-C4-split3-ft.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../../../Base-RCNN-C4.yaml" 2 | MODEL: 3 | WEIGHTS: "/scratch/hdd001/home/skhandel/FewshotDetection/WSASOD/models/1198019/best_model_final_weights.pth" 4 | META_ARCHITECTURE: "WeaklySupervisedRCNNNoMeta" 5 | MASK_ON: False 6 | FREEZE_LAYERS: 7 | ROI_HEADS: ['box_pooler', 'box_head', 'weak_box_head'] 8 | META_ARCH: ['backbone', 'proposal_generator'] 9 | FAST_RCNN: ['weak_detector_head', 'cls_score_delta', 'bbox_pred_delta', 'embeddings'] 10 | BACKBONE: 11 | FREEZE_AT: 2 12 | NAME: 'build_resnet_backbone' 13 | RESNETS: 14 | DEPTH: 101 15 | ROI_HEADS: 16 | NAME: "WSROIHeadFineTune" 17 | MULTI_BOX_HEAD: True 18 | NUM_CLASSES: 20 19 | EMBEDDING_PATH: "/h/skhandel/FewshotDetection/WSASOD/data/embeddings/glove_mean" 20 | FAST_RCNN: 21 | NAME: "SupervisedDetectorOutputsFineTune" 22 | WEAK_DETECTOR: 23 | DETECTOR_TEMP: 2.0 24 | REGRESSION_BRANCH: False 25 | FINETUNE_TERMS: 26 | CLASSIFIER: ['lingual', 'visual'] 27 | BBOX: ['lingual', 'visual'] 28 | MASK: ['lingual', 'visual'] 29 | VISUAL_ATTENTION_HEAD: 30 | VISUAL_SIMILARITY_THRESHOLD: 0.02 31 | ROI_BOX_HEAD: 32 | NAME: 'Res5BoxHead' 33 | POOLER_RESOLUTION: 14 34 | POOLER_TYPE: "ROIAlignV2" 35 | PROPOSAL_GENERATOR: 36 | NAME: "WSRPN" 37 | INPUT: 38 | MIN_SIZE_TRAIN: (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800) 39 | MIN_SIZE_TEST: 800 40 | DATASETS: 41 | TRAIN: ('voc_fine_tuning_query_train',) 42 | TEST: ('voc_fine_tuning_query_val', ) 43 | CLASSIFIER_TRAIN: ('pascal_trainval_2007', 'pascal_trainval_2012',) 44 | FEWSHOT: 45 | TYPE: 'VOC' 46 | NUM_SHOTS: 2 47 | SPLIT_ID: 3 48 | BASE_CLASSES_ID: [0, 1, 2, 4, 5, 6, 8, 9, 10, 11, 12, 14, 15, 18, 19] 49 | NOVEL_CLASSES_ID: [3, 7, 13, 16, 17] 50 | DATALOADER: 51 | NUM_WORKERS: 2 52 | SOLVER: 53 | STEPS: (100,) 54 | MAX_ITER: 100 # 17.4 epochs 55 | WARMUP_ITERS: 0 56 | IMS_PER_BATCH: 8 57 | CHECKPOINT_PERIOD: 50 58 | REFINEMENT_LR_FACTOR: 1.0 59 | BASE_LR: 0.001 60 | TEST: 61 | EVAL_PERIOD: 100 62 | AUG: 63 | ENABLED: False -------------------------------------------------------------------------------- /configs/VOC/FT/3_shot/VOC-RCNN-101-C4-split1-ft.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../../../Base-RCNN-C4.yaml" 2 | MODEL: 3 | WEIGHTS: "/scratch/hdd001/home/skhandel/FewshotDetection/WSASOD/models/1198063/best_model_final_weights.pth" 4 | META_ARCHITECTURE: "WeaklySupervisedRCNNNoMeta" 5 | MASK_ON: False 6 | FREEZE_LAYERS: 7 | ROI_HEADS: ['box_pooler', 'box_head', 'weak_box_head'] 8 | META_ARCH: ['backbone', 'proposal_generator'] 9 | FAST_RCNN: ['weak_detector_head', 'cls_score_delta', 'bbox_pred_delta', 'embeddings'] 10 | BACKBONE: 11 | FREEZE_AT: 2 12 | NAME: 'build_resnet_backbone' 13 | RESNETS: 14 | DEPTH: 101 15 | ROI_HEADS: 16 | NAME: "WSROIHeadFineTune" 17 | MULTI_BOX_HEAD: True 18 | NUM_CLASSES: 20 19 | EMBEDDING_PATH: "/h/skhandel/FewshotDetection/WSASOD/data/embeddings/glove_mean" 20 | FAST_RCNN: 21 | NAME: "SupervisedDetectorOutputsFineTune" 22 | WEAK_DETECTOR: 23 | DETECTOR_TEMP: 2.0 24 | REGRESSION_BRANCH: False 25 | FINETUNE_TERMS: 26 | CLASSIFIER: ['lingual', 'visual'] 27 | BBOX: ['lingual', 'visual'] 28 | MASK: ['lingual', 'visual'] 29 | VISUAL_ATTENTION_HEAD: 30 | VISUAL_SIMILARITY_THRESHOLD: 0.02 31 | ROI_BOX_HEAD: 32 | NAME: 'Res5BoxHead' 33 | POOLER_RESOLUTION: 14 34 | POOLER_TYPE: "ROIAlignV2" 35 | PROPOSAL_GENERATOR: 36 | NAME: "WSRPN" 37 | INPUT: 38 | MIN_SIZE_TRAIN: (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800) 39 | MIN_SIZE_TEST: 800 40 | DATASETS: 41 | TRAIN: ('voc_fine_tuning_query_train',) 42 | TEST: ('voc_fine_tuning_query_val', ) 43 | CLASSIFIER_TRAIN: ('pascal_trainval_2007', 'pascal_trainval_2012',) 44 | FEWSHOT: 45 | TYPE: 'VOC' 46 | NUM_SHOTS: 3 47 | SPLIT_ID: 1 48 | BASE_CLASSES_ID: [0, 1, 3, 4, 6, 7, 8, 10, 11, 12, 14, 15, 16, 18, 19] 49 | NOVEL_CLASSES_ID: [2, 5, 9, 13, 17] 50 | DATALOADER: 51 | NUM_WORKERS: 2 52 | SOLVER: 53 | STEPS: (150,) 54 | MAX_ITER: 150 # 17.4 epochs 55 | WARMUP_ITERS: 0 56 | IMS_PER_BATCH: 8 57 | CHECKPOINT_PERIOD: 50 58 | REFINEMENT_LR_FACTOR: 1.0 59 | BASE_LR: 0.001 60 | TEST: 61 | EVAL_PERIOD: 150 62 | AUG: 63 | ENABLED: False -------------------------------------------------------------------------------- /configs/VOC/FT/3_shot/VOC-RCNN-101-C4-split2-ft.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../../../Base-RCNN-C4.yaml" 2 | MODEL: 3 | WEIGHTS: "/scratch/hdd001/home/skhandel/FewshotDetection/WSASOD/models/1198012/best_model_final_weights.pth" 4 | META_ARCHITECTURE: "WeaklySupervisedRCNNNoMeta" 5 | MASK_ON: False 6 | FREEZE_LAYERS: 7 | ROI_HEADS: ['box_pooler', 'box_head', 'weak_box_head'] 8 | META_ARCH: ['backbone', 'proposal_generator'] 9 | FAST_RCNN: ['weak_detector_head', 'cls_score_delta', 'bbox_pred_delta', 'embeddings'] 10 | BACKBONE: 11 | FREEZE_AT: 2 12 | NAME: 'build_resnet_backbone' 13 | RESNETS: 14 | DEPTH: 101 15 | ROI_HEADS: 16 | NAME: "WSROIHeadFineTune" 17 | MULTI_BOX_HEAD: True 18 | NUM_CLASSES: 20 19 | EMBEDDING_PATH: "/h/skhandel/FewshotDetection/WSASOD/data/embeddings/glove_mean" 20 | FAST_RCNN: 21 | NAME: "SupervisedDetectorOutputsFineTune" 22 | WEAK_DETECTOR: 23 | DETECTOR_TEMP: 2.0 24 | REGRESSION_BRANCH: False 25 | FINETUNE_TERMS: 26 | CLASSIFIER: ['lingual', 'visual'] 27 | BBOX: ['lingual', 'visual'] 28 | MASK: ['lingual', 'visual'] 29 | VISUAL_ATTENTION_HEAD: 30 | VISUAL_SIMILARITY_THRESHOLD: 0.02 31 | ROI_BOX_HEAD: 32 | NAME: 'Res5BoxHead' 33 | POOLER_RESOLUTION: 14 34 | POOLER_TYPE: "ROIAlignV2" 35 | PROPOSAL_GENERATOR: 36 | NAME: "WSRPN" 37 | INPUT: 38 | MIN_SIZE_TRAIN: (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800) 39 | MIN_SIZE_TEST: 800 40 | DATASETS: 41 | TRAIN: ('voc_fine_tuning_query_train',) 42 | TEST: ('voc_fine_tuning_query_val', ) 43 | CLASSIFIER_TRAIN: ('pascal_trainval_2007', 'pascal_trainval_2012',) 44 | FEWSHOT: 45 | TYPE: 'VOC' 46 | NUM_SHOTS: 3 47 | SPLIT_ID: 2 48 | BASE_CLASSES_ID: [1, 2, 3, 5, 6, 7, 8, 10, 11, 13, 14, 15, 16, 18, 19] 49 | NOVEL_CLASSES_ID: [0, 4, 9, 12, 17] 50 | DATALOADER: 51 | NUM_WORKERS: 2 52 | SOLVER: 53 | STEPS: (150,) 54 | MAX_ITER: 150 # 17.4 epochs 55 | WARMUP_ITERS: 0 56 | IMS_PER_BATCH: 8 57 | CHECKPOINT_PERIOD: 50 58 | REFINEMENT_LR_FACTOR: 1.0 59 | BASE_LR: 0.001 60 | TEST: 61 | EVAL_PERIOD: 150 62 | AUG: 63 | ENABLED: False -------------------------------------------------------------------------------- /configs/VOC/FT/3_shot/VOC-RCNN-101-C4-split3-ft.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../../../Base-RCNN-C4.yaml" 2 | MODEL: 3 | WEIGHTS: "/scratch/hdd001/home/skhandel/FewshotDetection/WSASOD/models/1198019/best_model_final_weights.pth" 4 | META_ARCHITECTURE: "WeaklySupervisedRCNNNoMeta" 5 | MASK_ON: False 6 | FREEZE_LAYERS: 7 | ROI_HEADS: ['box_pooler', 'box_head', 'weak_box_head'] 8 | META_ARCH: ['backbone', 'proposal_generator'] 9 | FAST_RCNN: ['weak_detector_head', 'cls_score_delta', 'bbox_pred_delta', 'embeddings'] 10 | BACKBONE: 11 | FREEZE_AT: 2 12 | NAME: 'build_resnet_backbone' 13 | RESNETS: 14 | DEPTH: 101 15 | ROI_HEADS: 16 | NAME: "WSROIHeadFineTune" 17 | MULTI_BOX_HEAD: True 18 | NUM_CLASSES: 20 19 | EMBEDDING_PATH: "/h/skhandel/FewshotDetection/WSASOD/data/embeddings/glove_mean" 20 | FAST_RCNN: 21 | NAME: "SupervisedDetectorOutputsFineTune" 22 | WEAK_DETECTOR: 23 | DETECTOR_TEMP: 2.0 24 | REGRESSION_BRANCH: False 25 | FINETUNE_TERMS: 26 | CLASSIFIER: ['lingual', 'visual'] 27 | BBOX: ['lingual', 'visual'] 28 | MASK: ['lingual', 'visual'] 29 | VISUAL_ATTENTION_HEAD: 30 | VISUAL_SIMILARITY_THRESHOLD: 0.02 31 | ROI_BOX_HEAD: 32 | NAME: 'Res5BoxHead' 33 | POOLER_RESOLUTION: 14 34 | POOLER_TYPE: "ROIAlignV2" 35 | PROPOSAL_GENERATOR: 36 | NAME: "WSRPN" 37 | INPUT: 38 | MIN_SIZE_TRAIN: (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800) 39 | MIN_SIZE_TEST: 800 40 | DATASETS: 41 | TRAIN: ('voc_fine_tuning_query_train',) 42 | TEST: ('voc_fine_tuning_query_val', ) 43 | CLASSIFIER_TRAIN: ('pascal_trainval_2007', 'pascal_trainval_2012',) 44 | FEWSHOT: 45 | TYPE: 'VOC' 46 | NUM_SHOTS: 3 47 | SPLIT_ID: 3 48 | BASE_CLASSES_ID: [0, 1, 2, 4, 5, 6, 8, 9, 10, 11, 12, 14, 15, 18, 19] 49 | NOVEL_CLASSES_ID: [3, 7, 13, 16, 17] 50 | DATALOADER: 51 | NUM_WORKERS: 2 52 | SOLVER: 53 | STEPS: (150,) 54 | MAX_ITER: 150 # 17.4 epochs 55 | WARMUP_ITERS: 0 56 | IMS_PER_BATCH: 8 57 | CHECKPOINT_PERIOD: 50 58 | REFINEMENT_LR_FACTOR: 1.0 59 | BASE_LR: 0.001 60 | TEST: 61 | EVAL_PERIOD: 150 62 | AUG: 63 | ENABLED: False -------------------------------------------------------------------------------- /configs/VOC/FT/5_shot/VOC-RCNN-101-C4-split1-ft.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../../../Base-RCNN-C4.yaml" 2 | MODEL: 3 | WEIGHTS: "/scratch/hdd001/home/skhandel/FewshotDetection/WSASOD/models/1198063/best_model_final_weights.pth" 4 | META_ARCHITECTURE: "WeaklySupervisedRCNNNoMeta" 5 | MASK_ON: False 6 | FREEZE_LAYERS: 7 | ROI_HEADS: ['box_pooler', 'box_head', 'weak_box_head'] 8 | META_ARCH: ['backbone', 'proposal_generator'] 9 | FAST_RCNN: ['weak_detector_head', 'cls_score_delta', 'bbox_pred_delta', 'embeddings'] 10 | BACKBONE: 11 | FREEZE_AT: 2 12 | NAME: 'build_resnet_backbone' 13 | RESNETS: 14 | DEPTH: 101 15 | ROI_HEADS: 16 | NAME: "WSROIHeadFineTune" 17 | MULTI_BOX_HEAD: True 18 | NUM_CLASSES: 20 19 | EMBEDDING_PATH: "/h/skhandel/FewshotDetection/WSASOD/data/embeddings/glove_mean" 20 | FAST_RCNN: 21 | NAME: "SupervisedDetectorOutputsFineTune" 22 | WEAK_DETECTOR: 23 | DETECTOR_TEMP: 2.0 24 | REGRESSION_BRANCH: False 25 | FINETUNE_TERMS: 26 | CLASSIFIER: ['lingual', 'visual'] 27 | BBOX: ['lingual', 'visual'] 28 | MASK: ['lingual', 'visual'] 29 | VISUAL_ATTENTION_HEAD: 30 | VISUAL_SIMILARITY_THRESHOLD: 0.02 31 | ROI_BOX_HEAD: 32 | NAME: 'Res5BoxHead' 33 | POOLER_RESOLUTION: 14 34 | POOLER_TYPE: "ROIAlignV2" 35 | PROPOSAL_GENERATOR: 36 | NAME: "WSRPN" 37 | INPUT: 38 | MIN_SIZE_TRAIN: (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800) 39 | MIN_SIZE_TEST: 800 40 | DATASETS: 41 | TRAIN: ('voc_fine_tuning_query_train',) 42 | TEST: ('voc_fine_tuning_query_val', ) 43 | CLASSIFIER_TRAIN: ('pascal_trainval_2007', 'pascal_trainval_2012',) 44 | FEWSHOT: 45 | TYPE: 'VOC' 46 | NUM_SHOTS: 5 47 | SPLIT_ID: 1 48 | BASE_CLASSES_ID: [0, 1, 3, 4, 6, 7, 8, 10, 11, 12, 14, 15, 16, 18, 19] 49 | NOVEL_CLASSES_ID: [2, 5, 9, 13, 17] 50 | DATALOADER: 51 | NUM_WORKERS: 2 52 | SOLVER: 53 | STEPS: (250,) 54 | MAX_ITER: 250 # 17.4 epochs 55 | WARMUP_ITERS: 0 56 | IMS_PER_BATCH: 8 57 | CHECKPOINT_PERIOD: 50 58 | REFINEMENT_LR_FACTOR: 1.0 59 | BASE_LR: 0.001 60 | TEST: 61 | EVAL_PERIOD: 250 62 | AUG: 63 | ENABLED: False -------------------------------------------------------------------------------- /configs/VOC/FT/5_shot/VOC-RCNN-101-C4-split2-ft.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../../../Base-RCNN-C4.yaml" 2 | MODEL: 3 | WEIGHTS: "/scratch/hdd001/home/skhandel/FewshotDetection/WSASOD/models/1198012/best_model_final_weights.pth" 4 | META_ARCHITECTURE: "WeaklySupervisedRCNNNoMeta" 5 | MASK_ON: False 6 | FREEZE_LAYERS: 7 | ROI_HEADS: ['box_pooler', 'box_head', 'weak_box_head'] 8 | META_ARCH: ['backbone', 'proposal_generator'] 9 | FAST_RCNN: ['weak_detector_head', 'cls_score_delta', 'bbox_pred_delta', 'embeddings'] 10 | BACKBONE: 11 | FREEZE_AT: 2 12 | NAME: 'build_resnet_backbone' 13 | RESNETS: 14 | DEPTH: 101 15 | ROI_HEADS: 16 | NAME: "WSROIHeadFineTune" 17 | MULTI_BOX_HEAD: True 18 | NUM_CLASSES: 20 19 | EMBEDDING_PATH: "/h/skhandel/FewshotDetection/WSASOD/data/embeddings/glove_mean" 20 | FAST_RCNN: 21 | NAME: "SupervisedDetectorOutputsFineTune" 22 | WEAK_DETECTOR: 23 | DETECTOR_TEMP: 2.0 24 | REGRESSION_BRANCH: False 25 | FINETUNE_TERMS: 26 | CLASSIFIER: ['lingual', 'visual'] 27 | BBOX: ['lingual', 'visual'] 28 | MASK: ['lingual', 'visual'] 29 | VISUAL_ATTENTION_HEAD: 30 | VISUAL_SIMILARITY_THRESHOLD: 0.02 31 | ROI_BOX_HEAD: 32 | NAME: 'Res5BoxHead' 33 | POOLER_RESOLUTION: 14 34 | POOLER_TYPE: "ROIAlignV2" 35 | PROPOSAL_GENERATOR: 36 | NAME: "WSRPN" 37 | INPUT: 38 | MIN_SIZE_TRAIN: (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800) 39 | MIN_SIZE_TEST: 800 40 | DATASETS: 41 | TRAIN: ('voc_fine_tuning_query_train',) 42 | TEST: ('voc_fine_tuning_query_val', ) 43 | CLASSIFIER_TRAIN: ('pascal_trainval_2007', 'pascal_trainval_2012',) 44 | FEWSHOT: 45 | TYPE: 'VOC' 46 | NUM_SHOTS: 5 47 | SPLIT_ID: 2 48 | BASE_CLASSES_ID: [1, 2, 3, 5, 6, 7, 8, 10, 11, 13, 14, 15, 16, 18, 19] 49 | NOVEL_CLASSES_ID: [0, 4, 9, 12, 17] 50 | DATALOADER: 51 | NUM_WORKERS: 2 52 | SOLVER: 53 | STEPS: (250,) 54 | MAX_ITER: 250 # 17.4 epochs 55 | WARMUP_ITERS: 0 56 | IMS_PER_BATCH: 8 57 | CHECKPOINT_PERIOD: 50 58 | REFINEMENT_LR_FACTOR: 1.0 59 | BASE_LR: 0.001 60 | TEST: 61 | EVAL_PERIOD: 250 62 | AUG: 63 | ENABLED: False -------------------------------------------------------------------------------- /configs/VOC/FT/5_shot/VOC-RCNN-101-C4-split3-ft.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../../../Base-RCNN-C4.yaml" 2 | MODEL: 3 | WEIGHTS: "/scratch/hdd001/home/skhandel/FewshotDetection/WSASOD/models/1198019/best_model_final_weights.pth" 4 | META_ARCHITECTURE: "WeaklySupervisedRCNNNoMeta" 5 | MASK_ON: False 6 | FREEZE_LAYERS: 7 | ROI_HEADS: ['box_pooler', 'box_head', 'weak_box_head'] 8 | META_ARCH: ['backbone', 'proposal_generator'] 9 | FAST_RCNN: ['weak_detector_head', 'cls_score_delta', 'bbox_pred_delta', 'embeddings'] 10 | BACKBONE: 11 | FREEZE_AT: 2 12 | NAME: 'build_resnet_backbone' 13 | RESNETS: 14 | DEPTH: 101 15 | ROI_HEADS: 16 | NAME: "WSROIHeadFineTune" 17 | MULTI_BOX_HEAD: True 18 | NUM_CLASSES: 20 19 | EMBEDDING_PATH: "/h/skhandel/FewshotDetection/WSASOD/data/embeddings/glove_mean" 20 | FAST_RCNN: 21 | NAME: "SupervisedDetectorOutputsFineTune" 22 | WEAK_DETECTOR: 23 | DETECTOR_TEMP: 2.0 24 | REGRESSION_BRANCH: False 25 | FINETUNE_TERMS: 26 | CLASSIFIER: ['lingual', 'visual'] 27 | BBOX: ['lingual', 'visual'] 28 | MASK: ['lingual', 'visual'] 29 | VISUAL_ATTENTION_HEAD: 30 | VISUAL_SIMILARITY_THRESHOLD: 0.02 31 | ROI_BOX_HEAD: 32 | NAME: 'Res5BoxHead' 33 | POOLER_RESOLUTION: 14 34 | POOLER_TYPE: "ROIAlignV2" 35 | PROPOSAL_GENERATOR: 36 | NAME: "WSRPN" 37 | INPUT: 38 | MIN_SIZE_TRAIN: (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800) 39 | MIN_SIZE_TEST: 800 40 | DATASETS: 41 | TRAIN: ('voc_fine_tuning_query_train',) 42 | TEST: ('voc_fine_tuning_query_val', ) 43 | CLASSIFIER_TRAIN: ('pascal_trainval_2007', 'pascal_trainval_2012',) 44 | FEWSHOT: 45 | TYPE: 'VOC' 46 | NUM_SHOTS: 5 47 | SPLIT_ID: 3 48 | BASE_CLASSES_ID: [0, 1, 2, 4, 5, 6, 8, 9, 10, 11, 12, 14, 15, 18, 19] 49 | NOVEL_CLASSES_ID: [3, 7, 13, 16, 17] 50 | DATALOADER: 51 | NUM_WORKERS: 2 52 | SOLVER: 53 | STEPS: (250,) 54 | MAX_ITER: 250 # 17.4 epochs 55 | WARMUP_ITERS: 0 56 | IMS_PER_BATCH: 8 57 | CHECKPOINT_PERIOD: 50 58 | REFINEMENT_LR_FACTOR: 1.0 59 | BASE_LR: 0.001 60 | TEST: 61 | EVAL_PERIOD: 250 62 | AUG: 63 | ENABLED: False -------------------------------------------------------------------------------- /configs/VOC/VOC-RCNN-101-C4-split1.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-C4.yaml" 2 | MODEL: 3 | WEIGHTS: "../models/resnet_101_MSRA_C4.pth" 4 | META_ARCHITECTURE: "WeaklySupervisedRCNNNoMeta" 5 | MASK_ON: False 6 | BACKBONE: 7 | FREEZE_AT: 2 8 | NAME: 'build_resnet_backbone' 9 | RESNETS: 10 | DEPTH: 101 11 | ROI_HEADS: 12 | NAME: "WSROIHeadNoMeta" 13 | MULTI_BOX_HEAD: True 14 | NUM_CLASSES: 20 15 | EMBEDDING_PATH: "../data/embeddings/glove_mean" 16 | FAST_RCNN: 17 | NAME: "SupervisedDetectorOutputsBase" 18 | WEAK_DETECTOR: 19 | DETECTOR_TEMP: 2.0 20 | REGRESSION_BRANCH: False 21 | ROI_BOX_HEAD: 22 | NAME: 'Res5BoxHead' 23 | POOLER_RESOLUTION: 14 24 | POOLER_TYPE: "ROIAlignV2" 25 | PROPOSAL_GENERATOR: 26 | NAME: "WSRPN" 27 | INPUT: 28 | MIN_SIZE_TRAIN: (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800) 29 | MIN_SIZE_TEST: 800 30 | DATASETS: 31 | TRAIN: ('voc_base_training_query_train',) 32 | TEST: ('voc_base_training_query_val', ) 33 | CLASSIFIER_TRAIN: ('pascal_trainval_2007', 'pascal_trainval_2012',) 34 | FEWSHOT: 35 | TYPE: 'VOC' 36 | NUM_SHOTS: 5 37 | SPLIT_ID: 1 38 | BASE_CLASSES_ID: [0, 1, 3, 4, 6, 7, 8, 10, 11, 12, 14, 15, 16, 18, 19] 39 | NOVEL_CLASSES_ID: [2, 5, 9, 13, 17] 40 | DATALOADER: 41 | NUM_WORKERS: 2 42 | SOLVER: 43 | STEPS: (12000, 24000) 44 | MAX_ITER: 30000 # 17.4 epochs 45 | IMS_PER_BATCH: 8 46 | CHECKPOINT_PERIOD: 500 47 | BASE_LR: 0.02 48 | REFINEMENT_LR_FACTOR: 1.0 49 | WARMUP_ITERS: 100 50 | TEST: 51 | EVAL_PERIOD: 2000 52 | AUG: 53 | ENABLED: False -------------------------------------------------------------------------------- /configs/VOC/VOC-RCNN-101-C4-split2.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-C4.yaml" 2 | MODEL: 3 | WEIGHTS: "../models/resnet_101_MSRA_C4.pth" 4 | META_ARCHITECTURE: "WeaklySupervisedRCNNNoMeta" 5 | MASK_ON: False 6 | BACKBONE: 7 | FREEZE_AT: 2 8 | NAME: 'build_resnet_backbone' 9 | RESNETS: 10 | DEPTH: 101 11 | ROI_HEADS: 12 | NAME: "WSROIHeadNoMeta" 13 | MULTI_BOX_HEAD: True 14 | NUM_CLASSES: 20 15 | EMBEDDING_PATH: "/h/skhandel/FewshotDetection/WSASOD/data/embeddings/glove_mean" 16 | FAST_RCNN: 17 | NAME: "SupervisedDetectorOutputsBase" 18 | WEAK_DETECTOR: 19 | DETECTOR_TEMP: 2.0 20 | REGRESSION_BRANCH: False 21 | ROI_BOX_HEAD: 22 | NAME: 'Res5BoxHead' 23 | POOLER_RESOLUTION: 14 24 | POOLER_TYPE: "ROIAlignV2" 25 | PROPOSAL_GENERATOR: 26 | NAME: "WSRPN" 27 | INPUT: 28 | MIN_SIZE_TRAIN: (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800) 29 | MIN_SIZE_TEST: 800 30 | DATASETS: 31 | TRAIN: ('voc_base_training_query_train',) 32 | TEST: ('voc_base_training_query_val', ) 33 | CLASSIFIER_TRAIN: ('pascal_trainval_2007', 'pascal_trainval_2012',) 34 | FEWSHOT: 35 | TYPE: 'VOC' 36 | NUM_SHOTS: 5 37 | SPLIT_ID: 2 38 | BASE_CLASSES_ID: [1, 2, 3, 5, 6, 7, 8, 10, 11, 13, 14, 15, 16, 18, 19] 39 | NOVEL_CLASSES_ID: [0, 4, 9, 12, 17] 40 | DATALOADER: 41 | NUM_WORKERS: 2 42 | SOLVER: 43 | STEPS: (12000, 24000) 44 | MAX_ITER: 30000 # 17.4 epochs 45 | IMS_PER_BATCH: 8 46 | CHECKPOINT_PERIOD: 500 47 | BASE_LR: 0.02 48 | REFINEMENT_LR_FACTOR: 1.0 49 | WARMUP_ITERS: 100 50 | TEST: 51 | EVAL_PERIOD: 2000 52 | AUG: 53 | ENABLED: False -------------------------------------------------------------------------------- /configs/VOC/VOC-RCNN-101-C4-split3.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-C4.yaml" 2 | MODEL: 3 | WEIGHTS: "../models/resnet_101_MSRA_C4.pth" 4 | META_ARCHITECTURE: "WeaklySupervisedRCNNNoMeta" 5 | MASK_ON: False 6 | BACKBONE: 7 | FREEZE_AT: 2 8 | NAME: 'build_resnet_backbone' 9 | RESNETS: 10 | DEPTH: 101 11 | ROI_HEADS: 12 | NAME: "WSROIHeadNoMeta" 13 | MULTI_BOX_HEAD: True 14 | NUM_CLASSES: 20 15 | EMBEDDING_PATH: "/h/skhandel/FewshotDetection/WSASOD/data/embeddings/glove_mean" 16 | FAST_RCNN: 17 | NAME: "SupervisedDetectorOutputsBase" 18 | WEAK_DETECTOR: 19 | DETECTOR_TEMP: 2.0 20 | REGRESSION_BRANCH: False 21 | ROI_BOX_HEAD: 22 | NAME: 'Res5BoxHead' 23 | POOLER_RESOLUTION: 14 24 | POOLER_TYPE: "ROIAlignV2" 25 | PROPOSAL_GENERATOR: 26 | NAME: "WSRPN" 27 | INPUT: 28 | MIN_SIZE_TRAIN: (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800) 29 | MIN_SIZE_TEST: 800 30 | DATASETS: 31 | TRAIN: ('voc_base_training_query_train',) 32 | TEST: ('voc_base_training_query_val', ) 33 | CLASSIFIER_TRAIN: ('pascal_trainval_2007', 'pascal_trainval_2012',) 34 | FEWSHOT: 35 | TYPE: 'VOC' 36 | NUM_SHOTS: 5 37 | SPLIT_ID: 3 38 | BASE_CLASSES_ID: [0, 1, 2, 4, 5, 6, 8, 9, 10, 11, 12, 14, 15, 18, 19] 39 | NOVEL_CLASSES_ID: [3, 7, 13, 16, 17] 40 | DATALOADER: 41 | NUM_WORKERS: 2 42 | SOLVER: 43 | STEPS: (12000, 24000) 44 | MAX_ITER: 30000 # 17.4 epochs 45 | IMS_PER_BATCH: 8 46 | CHECKPOINT_PERIOD: 500 47 | BASE_LR: 0.02 48 | REFINEMENT_LR_FACTOR: 1.0 49 | WARMUP_ITERS: 100 50 | TEST: 51 | EVAL_PERIOD: 2000 52 | AUG: 53 | ENABLED: False -------------------------------------------------------------------------------- /configs/__init__.py: -------------------------------------------------------------------------------- 1 | from .default_config import * -------------------------------------------------------------------------------- /configs/default_config.py: -------------------------------------------------------------------------------- 1 | import os 2 | from detectron2.config import CfgNode as CN 3 | 4 | def add_config(cfg): 5 | _C = cfg 6 | 7 | _C.MODEL.BACKBONE.DILATED = False 8 | _C.MODEL.BACKBONE.FREEZE_CONVS = 0 9 | 10 | _C.MODEL.FREEZE_LAYERS = CN() 11 | _C.MODEL.FREEZE_LAYERS.ROI_HEADS = [] 12 | _C.MODEL.FREEZE_LAYERS.META_ARCH = [] 13 | _C.MODEL.FREEZE_LAYERS.FAST_RCNN = [] 14 | _C.MODEL.FREEZE_LAYERS.BOX_HEAD = [] 15 | _C.MODEL.FREEZE_LAYERS.MASK_HEAD = [] 16 | # _C.MODEL.ROI_HEADS.OTHER_CLASSES = True 17 | 18 | _C.MODEL.ROI_HEADS.EMBEDDING_PATH = '' 19 | 20 | _C.MODEL.ROI_HEADS.FINETUNE_TERMS = CN() 21 | _C.MODEL.ROI_HEADS.FINETUNE_TERMS.CLASSIFIER = ['lingual', 'visual'] 22 | _C.MODEL.ROI_HEADS.FINETUNE_TERMS.BBOX = ['lingual', 'visual'] 23 | _C.MODEL.ROI_HEADS.FINETUNE_TERMS.MASK = ['lingual', 'visual'] 24 | _C.MODEL.ROI_HEADS.WEAK_CLASSIFIER_PROPOSAL_DIVISOR = 1 25 | _C.MODEL.ROI_HEADS.MULTI_BOX_HEAD = False 26 | 27 | _C.MODEL.PROPOSAL_GENERATOR.WEAK_RPN_SCORE_TRESHOLD = 0.99 28 | _C.MODEL.ROI_HEADS.TRAIN_USING_WEAK = False 29 | _C.MODEL.ROI_HEADS.TRAIN_PROPOSAL_REGRESSOR = True 30 | _C.MODEL.ROI_HEADS.WEAK_PROPOSAL_DIVISOR = 1.0 31 | 32 | _C.MODEL.ROI_HEADS.FAST_RCNN = CN() 33 | _C.MODEL.ROI_HEADS.FAST_RCNN.NAME = 'SupervisedDetectorOutputsBase' 34 | _C.MODEL.ROI_HEADS.FAST_RCNN.MODE = 'Pre_Softmax' 35 | 36 | _C.MODEL.ROI_HEADS.FAST_RCNN.WEAK_DETECTOR = CN() 37 | _C.MODEL.ROI_HEADS.FAST_RCNN.WEAK_DETECTOR.NAME = 'WeakDetectorOutputsBase' 38 | _C.MODEL.ROI_HEADS.FAST_RCNN.WEAK_DETECTOR.NUM_KMEANS_CLUSTER = 3 39 | _C.MODEL.ROI_HEADS.FAST_RCNN.WEAK_DETECTOR.GRAPH_IOU_THRESHOLD = 0.4 40 | _C.MODEL.ROI_HEADS.FAST_RCNN.WEAK_DETECTOR.MAX_PC_NUM = 5 41 | _C.MODEL.ROI_HEADS.FAST_RCNN.WEAK_DETECTOR.WEAK_LOSS_MULTIPLIER = 1.0 42 | _C.MODEL.ROI_HEADS.FAST_RCNN.WEAK_DETECTOR.OICR_ITER = 3 43 | _C.MODEL.ROI_HEADS.FAST_RCNN.WEAK_DETECTOR.FG_THRESHOLD = 0.5 44 | _C.MODEL.ROI_HEADS.FAST_RCNN.WEAK_DETECTOR.BG_THRESHOLD = 0.1 45 | _C.MODEL.ROI_HEADS.FAST_RCNN.WEAK_DETECTOR.MIL_MULTIPLIER = 1.0 46 | _C.MODEL.ROI_HEADS.FAST_RCNN.WEAK_DETECTOR.DETECTOR_TEMP = 1.0 47 | _C.MODEL.ROI_HEADS.FAST_RCNN.WEAK_DETECTOR.CLASSIFIER_TEMP = 1.0 48 | _C.MODEL.ROI_HEADS.FAST_RCNN.WEAK_DETECTOR.REGRESSION_BRANCH = False 49 | _C.MODEL.ROI_HEADS.FAST_RCNN.WEAK_DETECTOR.TYPE = 'OICR' 50 | _C.MODEL.ROI_HEADS.FAST_RCNN.WEAK_DETECTOR.OICR_REGRESSION_BRANCH = False 51 | 52 | _C.MODEL.ROI_HEADS.VISUAL_ATTENTION_HEAD = CN() 53 | _C.MODEL.ROI_HEADS.VISUAL_ATTENTION_HEAD.NAME = 'MeanSimilarity' 54 | _C.MODEL.ROI_HEADS.VISUAL_ATTENTION_HEAD.IN_FEATURES = ['res4'] 55 | _C.MODEL.ROI_HEADS.VISUAL_ATTENTION_HEAD.POOLER_RESOLUTION = 14 56 | _C.MODEL.ROI_HEADS.VISUAL_ATTENTION_HEAD.POOLER_SAMPLING_RATIO = 0 57 | _C.MODEL.ROI_HEADS.VISUAL_ATTENTION_HEAD.POOLER_TYPE = "ROIAlignV2" 58 | _C.MODEL.ROI_HEADS.VISUAL_ATTENTION_HEAD.VISUAL_SIMILARITY_THRESHOLD = 0.02 59 | _C.MODEL.ROI_HEADS.VISUAL_ATTENTION_HEAD.SIMILARITY_COMBINATION = 'Sum' 60 | _C.MODEL.ROI_HEADS.VISUAL_ATTENTION_HEAD.TOPK = 5 61 | 62 | _C.DATASETS.META_TRAIN = '' 63 | _C.DATASETS.META_VAL = '' 64 | _C.DATASETS.META_SHOTS = [] 65 | _C.DATASETS.META_VAL_SHOTS = 1 66 | _C.DATASETS.BASE_META = '' 67 | _C.DATASETS.BASE_META_SHOTS = 50 68 | _C.DATASETS.MODE = 'base' 69 | _C.DATASETS.CLASSIFIER_DATAROOT = '/h/skhandel/FewshotDetection/WSASOD/data/data_utils/data/VOCdevkit/' 70 | _C.DATASETS.CLASSIFIER_TRAIN = () 71 | _C.DATASETS.ONLY_NOVEL_CLASSIFIER_DATA = False 72 | _C.INPUT.META_MIN_SIZE = 224 73 | _C.INPUT.META_MAX_SIZE = 480 74 | _C.INPUT.RESIZE_META = True 75 | _C.INPUT.NORMALIZE_IMAGES = False 76 | 77 | _C.DATASETS.FEWSHOT = CN() 78 | _C.DATASETS.FEWSHOT.TYPE = 'VOC' 79 | _C.DATASETS.FEWSHOT.NUM_SHOTS = 5 80 | _C.DATASETS.FEWSHOT.IS_ZERO_SHOT = False 81 | _C.DATASETS.FEWSHOT.SPLIT_ID = 1 82 | _C.DATASETS.FEWSHOT.BASE_CLASSES_ID = [0, 1, 3, 4, 6, 7, 8, 10, 11, 12, 14, 15, 16, 18, 19] 83 | _C.DATASETS.FEWSHOT.NOVEL_CLASSES_ID = [2, 5, 9, 13, 17] 84 | _C.DATASETS.WEAK_CLASSIFIER_MUTLIPLIER = 1.0 85 | _C.DATASETS.WEAK_CLASSIFIER_SAMPLE_NUM = -1 86 | _C.DATASETS.NUM_SAMPLES = 120 87 | _C.DATASETS.BASE_MULTIPLIER = -1.0 88 | _C.DATASETS.NOVEL_MULTIPLER = 0.0 89 | _C.DATASETS.SAMPLE_MULTIPLIER = 3 90 | _C.DATASETS.OVER_SAMPLE = False 91 | _C.DATASETS.SAMPLE_WITH_REPLACEMENT = False 92 | _C.DATASETS.SAMPLE_SEED = 0 93 | 94 | _C.DATASETS.PROPOSAL_FILES_CLASSIFIER_TRAIN = () 95 | 96 | _C.TEST.MIN_EVAL_PERIOD=0 97 | _C.TEST.AUG = CN() 98 | _C.TEST.AUG.ENABLED = True 99 | _C.TEST.AUG.MIN_SIZES = (480, 576, 688, 864, 1200) 100 | _C.TEST.AUG.MAX_SIZE = 2000 101 | _C.TEST.AUG.FLIP = True 102 | 103 | _C.SOLVER.REFINEMENT_LR_FACTOR = 1.0 104 | _C.SOLVER.DELTA_LR_FACTOR = 1.0 105 | _C.SOLVER.MIL_LR_FACTOR = 1.0 106 | _C.SOLVER.TRAIN_ONLY_WEAK = -1 -------------------------------------------------------------------------------- /data/__init__.py: -------------------------------------------------------------------------------- 1 | from .utils import register_datasets, get_evaluator 2 | from .build import build_detection_support_loader, build_classification_train_loader 3 | from .dataset_mapper import * -------------------------------------------------------------------------------- /data/common.py: -------------------------------------------------------------------------------- 1 | import math 2 | import random 3 | import torch.utils.data as data 4 | 5 | from detectron2.utils.serialize import PicklableWrapper 6 | 7 | 8 | __all__ = ["SupportExamplesSampler", "MapSupportDataset"] 9 | 10 | 11 | class SupportExamplesSampler(data.Dataset): 12 | """ 13 | flag_eval [boolean]: if True, sample deterministically 14 | """ 15 | def __init__(self, dataset, num_shots, flag_eval=False): 16 | self._dataset = dataset 17 | self._num_shots = num_shots 18 | self._flag_eval = flag_eval 19 | 20 | if self._flag_eval: 21 | assert isinstance(self._num_shots, int) 22 | 23 | def __len__(self): 24 | if not self._flag_eval: 25 | return 999999 26 | else: 27 | min_num_images_across_classes = min([len(v) for k, v in self._dataset.items()]) 28 | return max( 29 | math.floor(min_num_images_across_classes / self._num_shots), 30 | 1 31 | ) 32 | 33 | def __getitem__(self, idx): 34 | """ 35 | - num_shot is sampled from the provided list 36 | """ 37 | _dataset_sampled = {} 38 | if not self._flag_eval: 39 | num_shots = random.choices(self._num_shots, k=1)[0] 40 | for id_class, annotations_class in self._dataset.items(): 41 | _dataset_sampled[id_class] = random.choices(annotations_class, k=num_shots) 42 | else: 43 | """ 44 | - idx used here to make val dataset's sampling deterministic 45 | """ 46 | for id_class, annotations_class in self._dataset.items(): 47 | _dataset_sampled[id_class] = [] 48 | 49 | idx_start = idx * self._num_shots 50 | if idx_start < len(annotations_class): 51 | idx_end = min(len(annotations_class), ((idx + 1) * self._num_shots)) 52 | _dataset_sampled[id_class] = annotations_class[ 53 | idx_start: idx_end 54 | ] 55 | 56 | return _dataset_sampled 57 | 58 | 59 | class MapSupportDataset(data.Dataset): 60 | """ 61 | Map a function over the elements in a dataset. 62 | Args: 63 | dataset: a dataset where map function is applied. 64 | map_func: a callable which maps the element in dataset. map_func is 65 | responsible for error handling, when error happens, it needs to 66 | return None so the MapDataset will randomly use other 67 | elements from the dataset. 68 | """ 69 | 70 | def __init__(self, dataset, map_func): 71 | self._dataset = dataset 72 | self._map_func = PicklableWrapper(map_func) # wrap so that a lambda will work 73 | 74 | self._rng = random.Random(42) 75 | 76 | def __len__(self): 77 | return len(self._dataset) 78 | 79 | def __getitem__(self, idx): 80 | _dataset_sampled = self._dataset[idx] 81 | _dataset_dict_loaded = {} 82 | for id_class, annotations_class in _dataset_sampled.items(): 83 | _dataset_dict_loaded[id_class] = [self._map_func(e) for e in annotations_class] 84 | return _dataset_dict_loaded -------------------------------------------------------------------------------- /data/data_utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .dataset import * 2 | from .cfg import * 3 | -------------------------------------------------------------------------------- /data/data_utils/convert_coco_proposals_to_detectron.py: -------------------------------------------------------------------------------- 1 | import os 2 | import glob 3 | import pickle 4 | import numpy as np 5 | 6 | from tqdm import tqdm 7 | from scipy.io import loadmat 8 | 9 | 10 | PATH_PROPOSALS_ROOT = "/h/skhandel/FewshotDetection/WSASOD/data/data_utils/data/coco_proposals/" 11 | 12 | PATH_MCG_TRAIN = os.path.join(PATH_PROPOSALS_ROOT, 'MCG-COCO-train2014-boxes') 13 | PATH_MCG_VAL = os.path.join(PATH_PROPOSALS_ROOT, 'MCG-COCO-val2014-boxes') 14 | 15 | assert(os.path.isdir(PATH_MCG_TRAIN)) 16 | assert(os.path.isdir(PATH_MCG_VAL)) 17 | 18 | 19 | def swap_axes(boxes): 20 | """Swaps x and y axes.""" 21 | boxes = boxes.copy() 22 | boxes = np.stack((boxes[:, 1], boxes[:, 0], boxes[:, 3], boxes[:, 2]), axis=1) 23 | return boxes 24 | 25 | 26 | class CocoMCG: 27 | def __init__(self, path_mat_files): 28 | self.ids, self.boxes, self.scores = self.load_data(path_mat_files) 29 | 30 | def load_data(self, path_data): 31 | ids, boxes, scores = [], [], [] 32 | 33 | mat_files = sorted(glob.glob(os.path.join(path_data, '*'))) 34 | 35 | for mat_file in tqdm(mat_files): 36 | loaded_mat_ = loadmat(mat_file) 37 | 38 | boxes_ = loaded_mat_['boxes'] 39 | boxes_ = self._preprocess_boxes(boxes_) 40 | 41 | scores_ = loaded_mat_['scores'] 42 | scores_ = np.squeeze(scores_) 43 | 44 | ids.append(str(int(os.path.splitext(os.path.split(mat_file)[1])[0].split("_")[-1]))) 45 | boxes.append(boxes_) 46 | scores.append(scores_) 47 | 48 | return ids, boxes, scores 49 | 50 | def _preprocess_boxes(self, boxes): 51 | # (box_count, 4) 52 | # dtype: float32 53 | 54 | # box format: (y_min, x_min, y_max, x_max) 55 | boxes = boxes.astype(np.float32) - 1 56 | 57 | # box format: (x_min, y_min, x_max, y_max) 58 | boxes = swap_axes(boxes) 59 | 60 | return boxes 61 | 62 | def write_into_det2_format(self, path): 63 | dict_data = {} 64 | dict_data['ids'] = self.ids 65 | dict_data['boxes'] = self.boxes 66 | dict_data['scores'] = self.scores 67 | 68 | assert not os.path.isfile(path) 69 | 70 | with open(path, 'wb') as fp: 71 | pickle.dump(dict_data, fp) 72 | 73 | 74 | if __name__ == "__main__": 75 | mcg_instance_val = CocoMCG(PATH_MCG_VAL) 76 | mcg_instance_val.write_into_det2_format(os.path.join(PATH_PROPOSALS_ROOT, "val2014_coco_processed.pkl")) 77 | 78 | mcg_instance_train = CocoMCG(PATH_MCG_TRAIN) 79 | mcg_instance_train.write_into_det2_format(os.path.join(PATH_PROPOSALS_ROOT, "train2014_coco_processed.pkl")) -------------------------------------------------------------------------------- /data/data_utils/convert_proposals_to_detectron.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | import numpy as np 3 | 4 | for file_name in ['voc_2007_test.pkl', 'voc_2007_trainval.pkl', 'voc_2012_trainval.pkl']: 5 | dir_name = '/h/skhandel/FewshotDetection/WSASOD/data/data_utils/data/voc_proposals/' 6 | input_file_path = dir_name + file_name 7 | output_file_path = dir_name + file_name.split(".")[0] + "_detectron.pkl" 8 | 9 | input_data = pickle.load(open(input_file_path, 'rb')) 10 | output_data = {} 11 | if file_name != 'voc_2012_trainval.pkl': 12 | output_data['ids'] = [("%06d" % i) for i in input_data['indexes']] 13 | else: 14 | output_data['ids'] = [str(i)[:4] + "_" + str(i)[4:] for i in input_data['indexes']] 15 | output_data['boxes'] = [x.astype(np.float32) for x in input_data['boxes']] 16 | output_data['objectness_logits'] = input_data['scores'] 17 | with open(output_file_path, 'wb') as f: 18 | pickle.dump(output_data, f, -1) -------------------------------------------------------------------------------- /data/data_utils/image.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # encoding: utf-8 3 | import random 4 | import os 5 | import pdb 6 | import numpy as np 7 | from PIL import Image 8 | from PIL import ImageFile 9 | from .cfg import cfg 10 | ImageFile.LOAD_TRUNCATED_IMAGES = True 11 | 12 | 13 | def scale_image_channel(im, c, v): 14 | cs = list(im.split()) 15 | cs[c] = cs[c].point(lambda i: i * v) 16 | out = Image.merge(im.mode, tuple(cs)) 17 | return out 18 | 19 | def distort_image(im, hue, sat, val): 20 | im = im.convert('HSV') 21 | cs = list(im.split()) 22 | cs[1] = cs[1].point(lambda i: i * sat) 23 | cs[2] = cs[2].point(lambda i: i * val) 24 | 25 | def change_hue(x): 26 | x += hue*255 27 | if x > 255: 28 | x -= 255 29 | if x < 0: 30 | x += 255 31 | return x 32 | cs[0] = cs[0].point(change_hue) 33 | im = Image.merge(im.mode, tuple(cs)) 34 | 35 | im = im.convert('RGB') 36 | #constrain_image(im) 37 | return im 38 | 39 | def rand_scale(s): 40 | scale = random.uniform(1, s) 41 | if(random.randint(1,10000)%2): 42 | return scale 43 | return 1./scale 44 | 45 | def random_distort_image(im, hue, saturation, exposure): 46 | dhue = random.uniform(-hue, hue) 47 | dsat = rand_scale(saturation) 48 | dexp = rand_scale(exposure) 49 | res = distort_image(im, dhue, dsat, dexp) 50 | return res 51 | 52 | def data_augmentation(img, shape, jitter, hue, saturation, exposure, flag=True): 53 | oh = img.height 54 | ow = img.width 55 | 56 | dw =int(ow*jitter) 57 | dh =int(oh*jitter) 58 | 59 | if flag: 60 | pleft = random.randint(-dw, dw) 61 | pright = random.randint(-dw, dw) 62 | ptop = random.randint(-dh, dh) 63 | pbot = random.randint(-dh, dh) 64 | flip = random.randint(1,10000)%2 65 | 66 | swidth = ow - pleft - pright 67 | sheight = oh - ptop - pbot 68 | 69 | sx = float(swidth) / ow 70 | sy = float(sheight) / oh 71 | 72 | cropped = img.crop( (pleft, ptop, pleft + swidth - 1, ptop + sheight - 1)) 73 | 74 | dx = (float(pleft)/ow)/sx 75 | dy = (float(ptop) /oh)/sy 76 | 77 | sized = cropped.resize(shape) 78 | 79 | if flip: 80 | sized = sized.transpose(Image.FLIP_LEFT_RIGHT) 81 | img = random_distort_image(sized, hue, saturation, exposure) 82 | else: 83 | # pleft, pright, ptop, pbot, flip = 0, 0, 0, 0, 0 84 | flip, dx, dy, sx, sy = 0, 0, 0, 1, 1 85 | img = img.resize(shape) 86 | 87 | return img, flip, dx,dy,sx,sy 88 | 89 | 90 | def fill_truth_detection(labpath, w, h, flip, dx, dy, sx, sy): 91 | max_boxes = cfg.max_boxes 92 | label = np.zeros((max_boxes,5)) 93 | 94 | if os.path.exists(labpath) and os.path.getsize(labpath): 95 | bs = np.loadtxt(labpath) 96 | if bs is None: 97 | return label 98 | bs = np.reshape(bs, (-1, 5)) 99 | cc = 0 100 | for i in range(bs.shape[0]): 101 | # Filter out bboxes not in base classes 102 | imgid = labpath.split('/')[-1].split('.')[0] 103 | clsid = int(bs[i][0]) 104 | # if clsid not in cfg.base_ids: 105 | # continue 106 | if clsid in cfg.base_ids: 107 | keepit = True 108 | elif cfg.yolo_joint and imgid in cfg.metaids: 109 | keepit = True 110 | else: 111 | keepit = False 112 | if not keepit: 113 | continue 114 | 115 | x1 = bs[i][1] - bs[i][3]/2 116 | y1 = bs[i][2] - bs[i][4]/2 117 | x2 = bs[i][1] + bs[i][3]/2 118 | y2 = bs[i][2] + bs[i][4]/2 119 | 120 | x1 = min(0.999, max(0, x1 * sx - dx)) 121 | y1 = min(0.999, max(0, y1 * sy - dy)) 122 | x2 = min(0.999, max(0, x2 * sx - dx)) 123 | y2 = min(0.999, max(0, y2 * sy - dy)) 124 | 125 | bs[i][1] = (x1 + x2)/2 126 | bs[i][2] = (y1 + y2)/2 127 | bs[i][3] = (x2 - x1) 128 | bs[i][4] = (y2 - y1) 129 | 130 | if flip: 131 | bs[i][1] = 0.999 - bs[i][1] 132 | 133 | if bs[i][3] < 0.001 or bs[i][4] < 0.001: 134 | continue 135 | label[cc] = bs[i] 136 | cc += 1 137 | if cc >= 50: 138 | break 139 | 140 | label = np.reshape(label, (-1)) 141 | return label 142 | 143 | 144 | def fill_truth_detection_meta(labpath, w, h, flip, dx, dy, sx, sy): 145 | max_boxes = cfg.max_boxes 146 | n_cls = len(cfg.base_classes) 147 | label = np.zeros((n_cls, max_boxes, 5)) 148 | 149 | if os.path.exists(labpath) and os.path.getsize(labpath): 150 | bs = np.loadtxt(labpath) 151 | if bs is None: 152 | return label 153 | bs = np.reshape(bs, (-1, 5)) 154 | ccs = [0] * n_cls 155 | for i in range(bs.shape[0]): 156 | # Filter out bboxes not in base classes 157 | clsid = int(bs[i][0]) 158 | if clsid not in cfg.base_ids: 159 | continue 160 | x1 = bs[i][1] - bs[i][3]/2 161 | y1 = bs[i][2] - bs[i][4]/2 162 | x2 = bs[i][1] + bs[i][3]/2 163 | y2 = bs[i][2] + bs[i][4]/2 164 | 165 | x1 = min(0.999, max(0, x1 * sx - dx)) 166 | y1 = min(0.999, max(0, y1 * sy - dy)) 167 | x2 = min(0.999, max(0, x2 * sx - dx)) 168 | y2 = min(0.999, max(0, y2 * sy - dy)) 169 | 170 | bs[i][1] = (x1 + x2)/2 171 | bs[i][2] = (y1 + y2)/2 172 | bs[i][3] = (x2 - x1) 173 | bs[i][4] = (y2 - y1) 174 | 175 | if flip: 176 | bs[i][1] = 0.999 - bs[i][1] 177 | 178 | if bs[i][3] < 0.001 or bs[i][4] < 0.001: 179 | continue 180 | 181 | # Copy bbox info for building target 182 | ind = cfg.base_ids.index(clsid) 183 | if ind >= n_cls or ccs[ind]>= cfg.max_boxes: 184 | pdb.set_trace() 185 | label[ind][ccs[ind]] = bs[i] 186 | label[ind][ccs[ind]][0] = ind 187 | ccs[ind] += 1 188 | if sum(ccs) >= 50: 189 | break 190 | 191 | label = np.reshape(label, (n_cls, -1)) 192 | return label 193 | 194 | 195 | def load_label(labpath, w, h, flip, dx, dy, sx, sy): 196 | label = [] 197 | # if os.path.exists(labpath) and os.path.getsize(labpath): 198 | if os.path.getsize(labpath): 199 | bs = np.loadtxt(labpath) 200 | if bs is None: 201 | return label 202 | bs = np.reshape(bs, (-1, 5)) 203 | cc = 0 204 | for i in range(bs.shape[0]): 205 | x1 = bs[i][1] - bs[i][3]/2 206 | y1 = bs[i][2] - bs[i][4]/2 207 | x2 = bs[i][1] + bs[i][3]/2 208 | y2 = bs[i][2] + bs[i][4]/2 209 | 210 | x1 = min(0.999, max(0, x1 * sx - dx)) 211 | y1 = min(0.999, max(0, y1 * sy - dy)) 212 | x2 = min(0.999, max(0, x2 * sx - dx)) 213 | y2 = min(0.999, max(0, y2 * sy - dy)) 214 | 215 | bs[i][1] = (x1 + x2)/2 216 | bs[i][2] = (y1 + y2)/2 217 | bs[i][3] = (x2 - x1) 218 | bs[i][4] = (y2 - y1) 219 | 220 | if flip: 221 | bs[i][1] = 0.999 - bs[i][1] 222 | 223 | if bs[i][3] < 0.001 or bs[i][4] < 0.001: 224 | continue 225 | # label[cc] = bs[i] 226 | label.append(bs[i, 1:]) 227 | cc += 1 228 | if cc >= 50: 229 | break 230 | 231 | return label 232 | 233 | 234 | def load_data_detection(imgpath, labpath, shape, jitter, hue, saturation, exposure, data_aug=True): 235 | # labpath = imgpath.replace('images', 'labels').replace('JPEGImages', 'labels').replace('.jpg', '.txt').replace('.png','.txt') 236 | # labpath = imgpath.replace('images', 'labels_1c/aeroplane').replace('JPEGImages', 'labels_1c/aeroplane').replace('.jpg', '.txt').replace('.png','.txt') 237 | 238 | ## data augmentation 239 | img = Image.open(imgpath).convert('RGB') 240 | img,flip,dx,dy,sx,sy = data_augmentation(img, shape, jitter, hue, saturation, exposure, flag=data_aug) 241 | if cfg.metayolo: 242 | label = fill_truth_detection_meta(labpath, img.width, img.height, flip, dx, dy, 1./sx, 1./sy) 243 | else: 244 | label = fill_truth_detection(labpath, img.width, img.height, flip, dx, dy, 1./sx, 1./sy) 245 | return img,label 246 | 247 | def load_data_with_label(imgpath, labpath, shape, jitter, hue, saturation, exposure, data_aug=True): 248 | ## data augmentation 249 | img = Image.open(imgpath).convert('RGB') 250 | img,flip,dx,dy,sx,sy = data_augmentation(img, shape, jitter, hue, saturation, exposure, flag=data_aug) 251 | label = load_label(labpath, img.width, img.height, flip, dx, dy, 1./sx, 1./sy) 252 | return img, label 253 | -------------------------------------------------------------------------------- /data/data_utils/voc_label.py: -------------------------------------------------------------------------------- 1 | import xml.etree.ElementTree as ET 2 | import pickle 3 | import os 4 | from os import listdir, getcwd 5 | from os.path import join 6 | 7 | sets=[('2012', 'train'), ('2012', 'val'), ('2007', 'train'), ('2007', 'val'), ('2007', 'test')] 8 | 9 | classes = ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"] 10 | 11 | 12 | def convert(size, box): 13 | dw = 1./size[0] 14 | dh = 1./size[1] 15 | x = (box[0] + box[1])/2.0 16 | y = (box[2] + box[3])/2.0 17 | w = box[1] - box[0] 18 | h = box[3] - box[2] 19 | x = x*dw 20 | w = w*dw 21 | y = y*dh 22 | h = h*dh 23 | return (x,y,w,h) 24 | 25 | def convert_annotation(year, image_id): 26 | in_file = open('VOCdevkit/VOC%s/Annotations/%s.xml'%(year, image_id)) 27 | out_file = open('VOCdevkit/VOC%s/labels/%s.txt'%(year, image_id), 'w') 28 | tree=ET.parse(in_file) 29 | root = tree.getroot() 30 | size = root.find('size') 31 | w = int(size.find('width').text) 32 | h = int(size.find('height').text) 33 | 34 | for obj in root.iter('object'): 35 | difficult = obj.find('difficult').text 36 | cls = obj.find('name').text 37 | if cls not in classes or int(difficult) == 1: 38 | continue 39 | cls_id = classes.index(cls) 40 | xmlbox = obj.find('bndbox') 41 | b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text), float(xmlbox.find('ymax').text)) 42 | bb = convert((w,h), b) 43 | out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n') 44 | 45 | wd = getcwd() 46 | 47 | for year, image_set in sets: 48 | if not os.path.exists('VOCdevkit/VOC%s/labels/'%(year)): 49 | os.makedirs('VOCdevkit/VOC%s/labels/'%(year)) 50 | image_ids = open('VOCdevkit/VOC%s/ImageSets/Main/%s.txt'%(year, image_set)).read().strip().split() 51 | list_file = open('%s_%s.txt'%(year, image_set), 'w') 52 | for image_id in image_ids: 53 | list_file.write('%s/VOCdevkit/VOC%s/JPEGImages/%s.jpg\n'%(wd, year, image_id)) 54 | convert_annotation(year, image_id) 55 | list_file.close() 56 | 57 | -------------------------------------------------------------------------------- /data/dataset_mapper.py: -------------------------------------------------------------------------------- 1 | import copy 2 | import numpy as np 3 | import torch 4 | from fvcore.common.file_io import PathManager 5 | from PIL import Image 6 | 7 | from detectron2.data import detection_utils as utils 8 | from detectron2.data import transforms as T 9 | from detectron2.structures.instances import Instances 10 | from detectron2.data.dataset_mapper import DatasetMapper 11 | 12 | class MetaDatasetMapper(DatasetMapper): 13 | @classmethod 14 | def from_config(cls, cfg, is_train): 15 | ret = super().from_config(cfg, is_train) 16 | del ret['augmentations'] 17 | if cfg.INPUT.RESIZE_META: 18 | augmentations = [T.Resize((cfg.INPUT.META_MIN_SIZE, cfg.INPUT.META_MIN_SIZE))] 19 | else: 20 | augmentations = [T.ResizeShortestEdge(cfg.INPUT.META_MIN_SIZE, cfg.INPUT.META_MAX_SIZE, "choice")] 21 | if is_train and cfg.INPUT.RANDOM_FLIP != "none": 22 | augmentations.append( 23 | T.RandomFlip( 24 | horizontal=cfg.INPUT.RANDOM_FLIP == "horizontal", 25 | vertical=cfg.INPUT.RANDOM_FLIP == "vertical", 26 | ) 27 | ) 28 | ret['augmentations'] = augmentations 29 | ret['is_train'] = True 30 | return ret 31 | 32 | class DatasetMapperSupport(DatasetMapper): 33 | def __init__(self, cfg): 34 | super().__init__(cfg, is_train=False) 35 | 36 | def __call__(self, dataset_dict): 37 | """ 38 | Args: 39 | dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format. 40 | Returns: 41 | dict: a format that builtin models in detectron2 accept 42 | """ 43 | dataset_dict = copy.deepcopy(dataset_dict) # it will be modified by code below 44 | # USER: Write your own image loading if it's not from a file 45 | image = utils.read_image(dataset_dict["file_name"], format=self.img_format) 46 | utils.check_image_size(dataset_dict, image) 47 | 48 | if "annotations" not in dataset_dict: 49 | image, transforms = T.apply_transform_gens( 50 | ([self.crop_gen] if self.crop_gen else []) + self.tfm_gens, image 51 | ) 52 | else: 53 | # Crop around an instance if there are instances in the image. 54 | # USER: Remove if you don't use cropping 55 | if self.crop_gen: 56 | crop_tfm = utils.gen_crop_transform_with_instance( 57 | self.crop_gen.get_crop_size(image.shape[:2]), 58 | image.shape[:2], 59 | np.random.choice(dataset_dict["annotations"]), 60 | ) 61 | image = crop_tfm.apply_image(image) 62 | image, transforms = T.apply_transform_gens(self.tfm_gens, image) 63 | if self.crop_gen: 64 | transforms = crop_tfm + transforms 65 | 66 | image_shape = image.shape[:2] # h, w 67 | 68 | # Pytorch's dataloader is efficient on torch.Tensor due to shared-memory, 69 | # but not efficient on large generic data structures due to the use of pickle & mp.Queue. 70 | # Therefore it's important to use torch.Tensor. 71 | dataset_dict["image"] = torch.as_tensor( 72 | image.transpose(2, 0, 1).astype("float32") 73 | ).contiguous() 74 | # Can use uint8 if it turns out to be slow some day 75 | 76 | # USER: Remove if you don't use pre-computed proposals. 77 | if self.load_proposals: 78 | utils.transform_proposals( 79 | dataset_dict, image_shape, transforms, self.min_box_side_len, self.proposal_topk 80 | ) 81 | 82 | # !!!!!!!!!!!!!!! CHANGED HERE !!!!!!!!!!!!!!!!!!! 83 | """ 84 | if not self.is_train: 85 | dataset_dict.pop("annotations", None) 86 | dataset_dict.pop("sem_seg_file_name", None) 87 | return dataset_dict 88 | """ 89 | 90 | if "annotations" in dataset_dict: 91 | # USER: Modify this if you want to keep them for some reason. 92 | for anno in dataset_dict["annotations"]: 93 | if not self.mask_on: 94 | anno.pop("segmentation", None) 95 | if not self.keypoint_on: 96 | anno.pop("keypoints", None) 97 | 98 | # USER: Implement additional transformations if you have other types of data 99 | annos = [ 100 | utils.transform_instance_annotations( 101 | obj, transforms, image_shape, keypoint_hflip_indices=self.keypoint_hflip_indices 102 | ) 103 | for obj in dataset_dict.pop("annotations") 104 | if obj.get("iscrowd", 0) == 0 105 | ] 106 | instances = utils.annotations_to_instances( 107 | annos, image_shape, mask_format=self.mask_format 108 | ) 109 | # Create a tight bounding box from masks, useful when image is cropped 110 | if self.crop_gen and instances.has("gt_masks"): 111 | instances.gt_boxes = instances.gt_masks.get_bounding_boxes() 112 | dataset_dict["instances"] = utils.filter_empty_instances(instances) 113 | 114 | 115 | # USER: Remove if you don't do semantic/panoptic segmentation. 116 | if "sem_seg_file_name" in dataset_dict: 117 | with PathManager.open(dataset_dict.pop("sem_seg_file_name"), "rb") as f: 118 | sem_seg_gt = Image.open(f) 119 | sem_seg_gt = np.asarray(sem_seg_gt, dtype="uint8") 120 | sem_seg_gt = transforms.apply_segmentation(sem_seg_gt) 121 | sem_seg_gt = torch.as_tensor(sem_seg_gt.astype("long")) 122 | dataset_dict["sem_seg"] = sem_seg_gt 123 | return dataset_dict -------------------------------------------------------------------------------- /data/datasets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ubc-vision/UniT/87ace8710ea7e84f104163a9adc3d3fed24fdcda/data/datasets/__init__.py -------------------------------------------------------------------------------- /data/datasets/coco/base_training.py: -------------------------------------------------------------------------------- 1 | """ 2 | Register COCO base-training routine 3 | """ 4 | 5 | import os 6 | import copy 7 | import pickle 8 | 9 | from detectron2.data import DatasetCatalog, MetadataCatalog 10 | from detectron2.data.datasets.register_coco import register_coco_instances 11 | from detectron2.data.datasets.builtin_meta import _get_builtin_metadata 12 | from detectron2.data.datasets.coco import load_coco_json 13 | 14 | from ...pipelines_adaptor.coco.base_training import BaseTrainFSODAdaptor 15 | 16 | PATH_CONFIG_BASE_TRAINING = '/h/skhandel/FewshotDetection/WSASOD/data/pipelines_adaptor/coco/config_base_training.yaml' 17 | # assert(os.path.isfile(PATH_CONFIG_BASE_TRAINING)) 18 | 19 | 20 | class BaseTrainData: 21 | """ 22 | Registers data for base-training phase 23 | """ 24 | def __init__(self, novel_id=3): 25 | self.novel_id = novel_id 26 | 27 | self.base_train_adaptor_instance = self._fetch_the_copy_of_instance_if_tmp_stored() 28 | 29 | self.base_train_adaptor_instance = copy.deepcopy(self.base_train_adaptor_instance) 30 | 31 | self._register_coco_2017_if_not_present() 32 | self._fetch_coco_2017_data() 33 | 34 | # combine 2017 train val and index it with id 35 | self.data_coco_2017_trainval = self.data_train_coco_2017 + self.data_val_coco_2017 36 | 37 | self.dict_coco2017_id_item = {} 38 | for e in self.data_coco_2017_trainval: 39 | e_id = os.path.basename(e['file_name']).split('.')[0] 40 | self.dict_coco2017_id_item[e_id] = e 41 | 42 | def _fetch_the_copy_of_instance_if_tmp_stored(self, ): 43 | """takes a long time otherwise""" 44 | if os.path.isfile('tmp/coco_base_pipe_instance.pkl'): 45 | with open('tmp/coco_base_pipe_instance.pkl', 'rb') as f: 46 | base_train_adaptor_instance = pickle.load(f) 47 | else: 48 | os.makedirs('tmp', exist_ok=True) 49 | base_train_adaptor_instance = BaseTrainFSODAdaptor( 50 | novel_id=self.novel_id, path_config=PATH_CONFIG_BASE_TRAINING) 51 | with open('tmp/coco_base_pipe_instance.pkl', 'wb') as f: 52 | pickle.dump(base_train_adaptor_instance, f) 53 | 54 | return base_train_adaptor_instance 55 | 56 | def _register_coco_2017_if_not_present(self, ): 57 | if 'coco_2017_train_copy' not in DatasetCatalog.keys(): 58 | register_coco_instances( 59 | 'coco_2017_train_copy', 60 | _get_builtin_metadata('coco'), 61 | "/scratch/ssd001/home/skhandel/FewshotDetection/WSASOD/data/data_utils/data/MSCOCO/annotations/instances_train2017.json", 62 | "/scratch/ssd001/home/skhandel/FewshotDetection/WSASOD/data/data_utils/data/MSCOCO/images/" 63 | ) 64 | if 'coco_2017_val_copy' not in DatasetCatalog.keys(): 65 | register_coco_instances( 66 | 'coco_2017_val_copy', 67 | _get_builtin_metadata('coco'), 68 | "/scratch/ssd001/home/skhandel/FewshotDetection/WSASOD/data/data_utils/data/MSCOCO/annotations/instances_val2017.json", 69 | "/scratch/ssd001/home/skhandel/FewshotDetection/WSASOD/data/data_utils/data/MSCOCO/images/" 70 | ) 71 | 72 | def _fetch_coco_2017_data(self, ): 73 | self.data_train_coco_2017 = DatasetCatalog.get('coco_2017_train_copy') 74 | self.data_val_coco_2017 = DatasetCatalog.get('coco_2017_val_copy') 75 | 76 | self.meta_coco_2017 = MetadataCatalog.get('coco_2017_train_copy') 77 | 78 | def _load_datasets(self, ): 79 | # Query: Train 80 | self.base_training_query_train = self.get_query_set_base_training_train( 81 | self.base_train_adaptor_instance) 82 | 83 | # Support: Train + Validation 84 | self.base_training_support = self.get_support_set_base_training( 85 | self.base_train_adaptor_instance) 86 | 87 | # Query: Validation 88 | self.base_training_query_val = self.get_query_set_base_training_val( 89 | self.base_train_adaptor_instance) 90 | 91 | def _register_datasets(self, ): 92 | # Query: Train 93 | DatasetCatalog.register( 94 | "coco_base_training_query_train", 95 | lambda: self.get_query_set_base_training_train(self.base_train_adaptor_instance) 96 | ) 97 | MetadataCatalog.get("coco_base_training_query_train").set( 98 | thing_classes=self.base_train_adaptor_instance.cfg.coco_classes, 99 | evaluator_type='coco') 100 | 101 | # Support: Train + Validation 102 | DatasetCatalog.register( 103 | "coco_base_training_support", 104 | lambda: self.get_support_set_base_training(self.base_train_adaptor_instance) 105 | ) 106 | MetadataCatalog.get("coco_base_training_support").set( 107 | thing_classes=self.base_train_adaptor_instance.cfg.coco_classes, 108 | evaluator_type='coco') 109 | 110 | # Query: Validation 111 | DatasetCatalog.register( 112 | "coco_base_training_query_val", 113 | lambda: load_coco_json( 114 | "/scratch/ssd001/home/skhandel/FewshotDetection/WSASOD/data/data_utils/data/MSCOCO/assets/json_base_val.json", 115 | "/scratch/ssd001/home/skhandel/FewshotDetection/WSASOD/data/data_utils/data/MSCOCO/images/", 116 | "coco_base_training_query_val" 117 | ) 118 | ) 119 | MetadataCatalog.get("coco_base_training_query_val").set( 120 | evaluator_type='coco', 121 | json_file="/scratch/ssd001/home/skhandel/FewshotDetection/WSASOD/data/data_utils/data/MSCOCO/assets/json_base_val.json" 122 | ) 123 | 124 | def get_query_set_base_training_train(self, base_pipe_instance): 125 | """ 126 | Query: Train 127 | """ 128 | dataset_dicts = [] 129 | 130 | for path_img in base_pipe_instance.query_dataset_train.lines: 131 | path_img = path_img.strip() 132 | img_id = os.path.basename(path_img).split('_')[-1].split('.')[0] 133 | 134 | record = copy.deepcopy(self.dict_coco2017_id_item[img_id]) 135 | record['file_name'] = path_img 136 | 137 | # filter annotations 138 | annotations_filtered = [] 139 | for ann in record['annotations']: 140 | if ann['category_id'] in base_pipe_instance.cfg.base_ids: 141 | annotations_filtered.append(ann) 142 | 143 | record['annotations'] = annotations_filtered 144 | 145 | if len(annotations_filtered) > 0: 146 | dataset_dicts.append(record) 147 | 148 | return dataset_dicts 149 | 150 | def get_query_set_base_training_val(self, base_pipe_instance): 151 | """ 152 | Query: Validation 153 | !! Similar to the Train counterpart. Might refactor later. 154 | """ 155 | dataset_dicts = [] 156 | 157 | for path_img in base_pipe_instance.query_dataset_val.lines: 158 | path_img = path_img.strip() 159 | img_id = os.path.basename(path_img).split('_')[-1].split('.')[0] 160 | 161 | record = copy.deepcopy(self.dict_coco2017_id_item[img_id]) 162 | record['file_name'] = path_img 163 | 164 | # filter annotations 165 | annotations_filtered = [] 166 | for ann in record['annotations']: 167 | if ann['category_id'] in base_pipe_instance.cfg.base_ids: 168 | annotations_filtered.append(ann) 169 | 170 | record['annotations'] = annotations_filtered 171 | 172 | if len(annotations_filtered) > 0: 173 | dataset_dicts.append(record) 174 | 175 | return dataset_dicts 176 | 177 | def get_support_set_base_training(self, base_pipe_instance): 178 | """ 179 | Support: Train + Validation 180 | All samples in the class-based bins have at least one of their respective class's bbox 181 | """ 182 | dataset_dicts_label_wise = {e: [] for e in base_pipe_instance.cfg.base_ids} 183 | 184 | assert(len(base_pipe_instance.support_set.metalines) == len(base_pipe_instance.cfg.base_ids)) 185 | 186 | for i, class_id in enumerate(base_pipe_instance.cfg.base_ids): 187 | 188 | for path_img in base_pipe_instance.support_set.metalines[i]: 189 | path_img = path_img.strip() 190 | 191 | img_id = os.path.basename(path_img).split('_')[-1].split('.')[0] 192 | record = copy.deepcopy(self.dict_coco2017_id_item[img_id]) 193 | record['file_name'] = path_img 194 | 195 | # filter annotations 196 | annotations_filtered = [] 197 | for ann in record['annotations']: 198 | if ann['category_id'] in [class_id] and ann['iscrowd'] == 0: 199 | annotations_filtered.append(ann) 200 | 201 | record['annotations'] = annotations_filtered 202 | 203 | if len(annotations_filtered) > 0: 204 | dataset_dicts_label_wise[class_id].append(record) 205 | 206 | return dataset_dicts_label_wise 207 | 208 | def get_label2int_from_list(self, labels_list): 209 | lab2int = {e: i for i, e in enumerate(labels_list)} 210 | return lab2int -------------------------------------------------------------------------------- /data/datasets/coco/fine_tuning.py: -------------------------------------------------------------------------------- 1 | """ 2 | Register COCO fine-tuning routine 3 | """ 4 | 5 | import os 6 | import copy 7 | import pickle 8 | 9 | from detectron2.data import DatasetCatalog, MetadataCatalog 10 | from detectron2.data.datasets.register_coco import register_coco_instances 11 | from detectron2.data.datasets.builtin_meta import _get_builtin_metadata 12 | 13 | from ...pipelines_adaptor.coco.fine_tuning import FineTuneFSODAdaptor 14 | from detectron2.data.datasets.coco import load_coco_json 15 | 16 | PATH_CONFIG = '/h/skhandel/FewshotDetection/WSASOD/data/pipelines_adaptor/coco/config_fine_tuning.yaml' 17 | # assert(os.path.isfile(PATH_CONFIG)) 18 | 19 | 20 | class FineTuneData: 21 | """ 22 | Registers data for fine-tuning phase 23 | """ 24 | def __init__(self, novel_id=3, num_shots=10): 25 | self.novel_id = novel_id 26 | self.num_shots = num_shots 27 | 28 | self.fine_tune_adaptor_instance = self._fetch_the_copy_of_instance_if_tmp_stored() 29 | 30 | self.fine_tune_adaptor_instance = copy.deepcopy(self.fine_tune_adaptor_instance) 31 | 32 | self._register_coco_2017_if_not_present() 33 | self._fetch_coco_2017_data() 34 | 35 | # combine 2017 train val and index it with id 36 | self.data_coco_2017_trainval = self.data_train_coco_2017 + self.data_val_coco_2017 37 | 38 | self.dict_coco2017_id_item = {} 39 | for e in self.data_coco_2017_trainval: 40 | e_id = os.path.basename(e['file_name']).split('.')[0] 41 | self.dict_coco2017_id_item[e_id] = e 42 | 43 | def _fetch_the_copy_of_instance_if_tmp_stored(self, ): 44 | """takes a long time otherwise""" 45 | if self.num_shots == 10 or self.num_shots==30: 46 | file_name = 'tmp/coco_fine_tune_instance_shots_{}.pkl'.format(self.num_shots) 47 | elif self.num_shots == 5: 48 | file_name = 'tmp/coco_fine_tune_instance_shots_10.pkl' 49 | else: 50 | file_name = 'tmp/coco_fine_tune_instance_shots_30.pkl' 51 | if os.path.isfile(file_name): 52 | with open(file_name, 'rb') as f: 53 | fine_tune_adaptor_instance = pickle.load(f) 54 | else: 55 | os.makedirs('tmp', exist_ok=True) 56 | fine_tune_adaptor_instance = FineTuneFSODAdaptor( 57 | novel_id=self.novel_id, num_shots=self.num_shots, 58 | path_config=PATH_CONFIG 59 | ) 60 | with open(file_name, 'wb') as f: 61 | pickle.dump(fine_tune_adaptor_instance, f) 62 | 63 | return fine_tune_adaptor_instance 64 | 65 | def _register_coco_2017_if_not_present(self, ): 66 | if 'coco_2017_train_copy' not in DatasetCatalog.keys(): 67 | register_coco_instances( 68 | 'coco_2017_train_copy', 69 | _get_builtin_metadata('coco'), 70 | "/scratch/ssd001/home/skhandel/FewshotDetection/locatron/fsod_utils/data/MSCOCO/annotations/instances_train2017.json", 71 | "/scratch/ssd001/home/skhandel/FewshotDetection/locatron/fsod_utils/data/MSCOCO/images/" 72 | ) 73 | if 'coco_2017_val_copy' not in DatasetCatalog.keys(): 74 | register_coco_instances( 75 | 'coco_2017_val_copy', 76 | _get_builtin_metadata('coco'), 77 | "/scratch/ssd001/home/skhandel/FewshotDetection/locatron/fsod_utils/data/MSCOCO/annotations/instances_val2017.json", 78 | "/scratch/ssd001/home/skhandel/FewshotDetection/locatron/fsod_utils/data/MSCOCO/images/" 79 | ) 80 | 81 | def _fetch_coco_2017_data(self, ): 82 | self.data_train_coco_2017 = DatasetCatalog.get('coco_2017_train_copy') 83 | self.data_val_coco_2017 = DatasetCatalog.get('coco_2017_val_copy') 84 | 85 | self.meta_coco_2017 = MetadataCatalog.get('coco_2017_train_copy') 86 | 87 | def _load_datasets(self, ): 88 | # Query: Train 89 | self.fine_tuning_query_train = self.get_query_set_fine_tuning_train( 90 | self.fine_tune_adaptor_instance) 91 | 92 | # Support: Train + Validation 93 | self.fine_tuning_support = self.get_support_set_fine_tuning( 94 | self.fine_tune_adaptor_instance) 95 | 96 | # Query: Validation 97 | self.fine_tuning_query_val = self.get_query_set_fine_tuning_val( 98 | self.fine_tune_adaptor_instance) 99 | 100 | def _register_datasets(self, ): 101 | # Query: Train 102 | DatasetCatalog.register( 103 | "coco_fine_tuning_query_train", 104 | lambda: self.get_query_set_fine_tuning_train(self.fine_tune_adaptor_instance) 105 | ) 106 | MetadataCatalog.get("coco_fine_tuning_query_train").set( 107 | # thing_classes=self.fine_tune_adaptor_instance.cfg.coco_classes, 108 | thing_classes=MetadataCatalog.get("coco_2017_train").thing_classes, 109 | evaluator_type='coco') 110 | 111 | # Support: Train + Validation 112 | DatasetCatalog.register( 113 | "coco_fine_tuning_support", 114 | lambda: self.get_support_set_fine_tuning(self.fine_tune_adaptor_instance) 115 | ) 116 | MetadataCatalog.get("coco_fine_tuning_support").set( 117 | # thing_classes=self.fine_tune_adaptor_instance.cfg.coco_classes, 118 | thing_classes=MetadataCatalog.get("coco_2017_train").thing_classes, 119 | evaluator_type='coco') 120 | 121 | # Query: Validation 122 | DatasetCatalog.register( 123 | "coco_fine_tuning_query_val", 124 | lambda: load_coco_json( 125 | "/scratch/ssd001/home/skhandel/FewshotDetection/WSASOD/data/data_utils/data/MSCOCO/assets/json_fine_val.json", 126 | "/scratch/ssd001/home/skhandel/FewshotDetection/WSASOD/data/data_utils/data/MSCOCO/images/", 127 | "coco_fine_tuning_query_val" 128 | ) 129 | ) 130 | MetadataCatalog.get("coco_fine_tuning_query_val").set( 131 | # thing_classes=self.fine_tune_adaptor_instance.cfg.coco_classes, 132 | evaluator_type='coco', 133 | json_file="/scratch/ssd001/home/skhandel/FewshotDetection/WSASOD/data/data_utils/data/MSCOCO/assets/json_fine_val.json" 134 | ) 135 | 136 | def get_query_set_fine_tuning_train(self, base_pipe_instance): 137 | """ 138 | Query: Train 139 | """ 140 | dataset_dicts = [] 141 | category_counter = {} 142 | for path_img in base_pipe_instance.query_dataset_train.lines: 143 | path_img = path_img.strip() 144 | img_id = os.path.basename(path_img).split('_')[-1].split('.')[0] 145 | 146 | record = copy.deepcopy(self.dict_coco2017_id_item[img_id]) 147 | record['file_name'] = path_img 148 | # filter annotations 149 | annotations_filtered = [] 150 | for ann in record['annotations']: 151 | if ann['category_id'] in base_pipe_instance.cfg.base_ids: 152 | if self.num_shots == 5 or self.num_shots == 20: 153 | if category_counter.get(ann['category_id'], 0) >= self.num_shots: 154 | continue 155 | annotations_filtered.append(ann) 156 | category_counter[ann['category_id']] = category_counter.get(ann['category_id'], 0) + 1 157 | 158 | record['annotations'] = annotations_filtered 159 | 160 | if len(annotations_filtered) > 0: 161 | dataset_dicts.append(record) 162 | 163 | return dataset_dicts 164 | 165 | def get_query_set_fine_tuning_val(self, base_pipe_instance): 166 | """ 167 | Query: Validation 168 | !! Similar to the Train counterpart. Might refactor later. 169 | """ 170 | dataset_dicts = [] 171 | 172 | for path_img in base_pipe_instance.query_dataset_val.lines: 173 | path_img = path_img.strip() 174 | img_id = os.path.basename(path_img).split('_')[-1].split('.')[0] 175 | 176 | record = copy.deepcopy(self.dict_coco2017_id_item[img_id]) 177 | record['file_name'] = path_img 178 | 179 | # filter annotations 180 | annotations_filtered = [] 181 | for ann in record['annotations']: 182 | if ann['category_id'] in base_pipe_instance.cfg.base_ids: 183 | annotations_filtered.append(ann) 184 | 185 | record['annotations'] = annotations_filtered 186 | 187 | if len(annotations_filtered) > 0: 188 | dataset_dicts.append(record) 189 | 190 | return dataset_dicts 191 | 192 | def get_support_set_fine_tuning(self, base_pipe_instance): 193 | """ 194 | Support: Train + Validation 195 | All samples in the class-based bins have at least one of their respective class's bbox 196 | """ 197 | dataset_dicts_label_wise = {e: [] for e in base_pipe_instance.cfg.base_ids} 198 | 199 | assert(len(base_pipe_instance.support_set.metalines) == len(base_pipe_instance.cfg.base_ids)) 200 | 201 | for i, class_id in enumerate(base_pipe_instance.cfg.base_ids): 202 | 203 | for path_img in base_pipe_instance.support_set.metalines[i]: 204 | path_img = path_img.strip() 205 | 206 | img_id = os.path.basename(path_img).split('_')[-1].split('.')[0] 207 | record = copy.deepcopy(self.dict_coco2017_id_item[img_id]) 208 | record['file_name'] = path_img 209 | 210 | # filter annotations 211 | annotations_filtered = [] 212 | for ann in record['annotations']: 213 | if ann['category_id'] in [class_id] and ann['iscrowd'] == 0: 214 | annotations_filtered.append(ann) 215 | 216 | record['annotations'] = annotations_filtered 217 | 218 | if len(annotations_filtered) > 0: 219 | dataset_dicts_label_wise[class_id].append(record) 220 | 221 | return dataset_dicts_label_wise 222 | 223 | def get_label2int_from_list(self, labels_list): 224 | lab2int = {e: i for i, e in enumerate(labels_list)} 225 | return lab2int -------------------------------------------------------------------------------- /data/datasets/coco/register_coco.py: -------------------------------------------------------------------------------- 1 | from .base_training import BaseTrainData 2 | from .fine_tuning import FineTuneData 3 | 4 | 5 | class RegisterCOCO: 6 | def __init__(self, split_id, num_shots): 7 | self.split_id = split_id 8 | self.num_shots = num_shots 9 | 10 | def _register_datasets(self, ): 11 | self.base_train_data_instance = BaseTrainData( 12 | novel_id=self.split_id - 1 13 | ) 14 | self.fine_tune_data_instance = FineTuneData( 15 | novel_id=self.split_id - 1, 16 | num_shots=self.num_shots 17 | ) 18 | 19 | self.base_train_data_instance._register_datasets() 20 | self.fine_tune_data_instance._register_datasets() -------------------------------------------------------------------------------- /data/datasets/coco_dock/base_training.py: -------------------------------------------------------------------------------- 1 | """ 2 | Register COCO base-training routine 3 | """ 4 | import copy 5 | import os 6 | import pickle 7 | from detectron2.data import DatasetCatalog, MetadataCatalog 8 | from detectron2.data.datasets.coco import load_coco_json 9 | from detectron2.data.datasets.register_coco import register_coco_instances 10 | from detectron2.data.datasets.builtin_meta import _get_builtin_metadata 11 | 12 | IDS_BASE_CLASSES = [0, 1, 2, 3, 4, 5, 6, 8, 14, 15, 16, 17, 18, 19, 39, 56, 57, 58, 60, 62] 13 | IDS_NOVEL_CLASSES = [e for e in range(80) if e not in IDS_BASE_CLASSES] 14 | 15 | 16 | class BaseTrainData: 17 | """ 18 | Registers data for base-training phase 19 | """ 20 | def __init__(self, ): 21 | self._register_coco_2017_if_not_present() 22 | self._fetch_coco_2017_data() 23 | 24 | def _register_coco_2017_if_not_present(self, ): 25 | if 'coco_2014_train_copy' not in DatasetCatalog.keys(): 26 | register_coco_instances( 27 | 'coco_2014_train_copy', 28 | _get_builtin_metadata('coco'), 29 | "/scratch/ssd001/home/skhandel/FewshotDetection/WSASOD/data/data_utils/data/MSCOCO/annotations/instances_train2014.json", 30 | "/scratch/ssd001/home/skhandel/FewshotDetection/WSASOD/data/data_utils/data/MSCOCO/images/train2014/" 31 | ) 32 | if 'coco_2014_val_copy' not in DatasetCatalog.keys(): 33 | register_coco_instances( 34 | 'coco_2014_val_copy', 35 | _get_builtin_metadata('coco'), 36 | "/scratch/ssd001/home/skhandel/FewshotDetection/WSASOD/data/data_utils/data/MSCOCO/annotations/instances_val2014.json", 37 | "/scratch/ssd001/home/skhandel/FewshotDetection/WSASOD/data/data_utils/data/MSCOCO/images/val2014/" 38 | ) 39 | 40 | def _fetch_coco_2017_data(self, ): 41 | self.data_train_coco_2017 = DatasetCatalog.get('coco_2014_train_copy') 42 | self.data_val_coco_2017 = DatasetCatalog.get('coco_2014_val_copy') 43 | self.meta_coco_2017 = MetadataCatalog.get('coco_2014_train_copy') 44 | 45 | def _load_datasets(self, ): 46 | # Query: Train 47 | self.base_training_query_train = self.get_query_set_base_training_train() 48 | 49 | # Support 50 | self.base_training_support = self.get_support_set_base_training() 51 | 52 | # Query: Validation 53 | self.base_training_query_val = self.get_query_set_base_training_val() 54 | 55 | def _register_datasets(self, ): 56 | # Query: Train 57 | DatasetCatalog.register( 58 | "coco_dock_base_training_query_train", 59 | lambda: self.get_query_set_base_training_train() 60 | ) 61 | MetadataCatalog.get("coco_dock_base_training_query_train").set( 62 | thing_classes=MetadataCatalog.get("coco_2014_train_copy").thing_classes, 63 | evaluator_type='coco') 64 | 65 | # Support: Train + Validation 66 | DatasetCatalog.register( 67 | "coco_dock_base_training_support", 68 | lambda: self.get_support_set_base_training() 69 | ) 70 | MetadataCatalog.get("coco_dock_base_training_support").set( 71 | thing_classes=MetadataCatalog.get("coco_2014_train_copy").thing_classes, 72 | evaluator_type='coco') 73 | 74 | # Query: Validation 75 | DatasetCatalog.register( 76 | "coco_dock_base_training_query_val", 77 | # lambda: self.get_query_set_base_training_val(self.base_train_adaptor_instance) 78 | lambda: load_coco_json( 79 | "/scratch/ssd001/home/skhandel/FewshotDetection/WSASOD/data/data_utils/data/MSCOCO/annotations/instances_val2014.json", 80 | "/scratch/ssd001/home/skhandel/FewshotDetection/WSASOD/data/data_utils/data/MSCOCO/images/val2014/", 81 | "coco_dock_base_training_query_val" 82 | ) 83 | ) 84 | MetadataCatalog.get("coco_dock_base_training_query_val").set( 85 | # thing_classes=self.base_train_adaptor_instance.cfg.coco_classes, 86 | evaluator_type='coco', 87 | json_file="/scratch/ssd001/home/skhandel/FewshotDetection/WSASOD/data/data_utils/data/MSCOCO/annotations/instances_val2014.json" 88 | ) 89 | 90 | # DatasetCatalog.register( 91 | # "coco_note_base_training_query_val", 92 | # lambda: self.get_query_set_base_training_val() 93 | # ) 94 | # MetadataCatalog.get("coco_note_base_training_query_val").set( 95 | # thing_classes=MetadataCatalog.get("coco_2017_val_copy").thing_classes, 96 | # evaluator_type='coco') 97 | 98 | 99 | def get_query_set_base_training_train(self, ): 100 | """ 101 | Query: Train 102 | """ 103 | str_keywords = "query_set_base_training_train_dock" 104 | dataset_dicts = fetch_object_if_tmp_stored(str_keywords) 105 | 106 | if dataset_dicts is not None: 107 | return dataset_dicts 108 | else: 109 | dataset_dicts = [] 110 | 111 | for e in self.data_train_coco_2017: 112 | record = copy.deepcopy(e) 113 | # filter annotations 114 | annotations_filtered = [] 115 | for ann in record['annotations']: 116 | if ann['category_id'] in IDS_BASE_CLASSES: 117 | annotations_filtered.append(ann) 118 | 119 | record['annotations'] = annotations_filtered 120 | 121 | if len(annotations_filtered) > 0: 122 | dataset_dicts.append(record) 123 | 124 | store_object_in_tmp(str_keywords, dataset_dicts) 125 | return dataset_dicts 126 | 127 | def get_query_set_base_training_val(self, ): 128 | """ 129 | Query: Val 130 | """ 131 | str_keywords = "query_set_base_training_val_dock" 132 | dataset_dicts = fetch_object_if_tmp_stored(str_keywords) 133 | 134 | if dataset_dicts is not None: 135 | return dataset_dicts 136 | else: 137 | dataset_dicts = [] 138 | 139 | for e in self.data_val_coco_2017: 140 | record = copy.deepcopy(e) 141 | # filter annotations 142 | annotations_filtered = [] 143 | for ann in record['annotations']: 144 | if ann['category_id'] in IDS_BASE_CLASSES: 145 | annotations_filtered.append(ann) 146 | 147 | record['annotations'] = annotations_filtered 148 | 149 | if len(annotations_filtered) > 0: 150 | dataset_dicts.append(record) 151 | 152 | store_object_in_tmp(str_keywords, dataset_dicts) 153 | return dataset_dicts 154 | 155 | def get_support_set_base_training(self, ): 156 | """ 157 | All samples in the class-based bins have at least one of their respective class's bbox 158 | """ 159 | str_keywords = "support_set_base_training" 160 | dataset_dicts_label_wise = fetch_object_if_tmp_stored(str_keywords) 161 | 162 | if dataset_dicts_label_wise is not None: 163 | return dataset_dicts_label_wise 164 | else: 165 | dataset_dicts_label_wise = {e: [] for e in IDS_BASE_CLASSES} 166 | 167 | for e in self.data_train_coco_2017: 168 | for ann in e['annotations']: 169 | if ann['iscrowd'] != 0: 170 | continue 171 | if ann['category_id'] not in IDS_BASE_CLASSES: 172 | continue 173 | 174 | record = copy.deepcopy(e) 175 | 176 | # filter annotations 177 | annotations_filtered = [ann] 178 | record['annotations'] = annotations_filtered 179 | 180 | dataset_dicts_label_wise[ann['category_id']].append(record) 181 | 182 | store_object_in_tmp(str_keywords, dataset_dicts_label_wise) 183 | return dataset_dicts_label_wise 184 | 185 | 186 | def fetch_object_if_tmp_stored(str_keywords): 187 | str_file_name = "tmp/{}.pkl".format(str_keywords) 188 | 189 | object_loaded = None 190 | if os.path.isfile(str_file_name): 191 | with open(str_file_name, 'rb') as f: 192 | object_loaded = pickle.load(f) 193 | return object_loaded 194 | 195 | 196 | def store_object_in_tmp(str_keywords, object_to_store): 197 | str_file_name = "tmp/{}.pkl".format(str_keywords) 198 | 199 | os.makedirs('tmp', exist_ok=True) 200 | with open(str_file_name, 'wb') as f: 201 | pickle.dump(object_to_store, f) 202 | -------------------------------------------------------------------------------- /data/datasets/coco_dock/register_coco_dock.py: -------------------------------------------------------------------------------- 1 | from .base_training import BaseTrainData 2 | 3 | 4 | class RegisterCOCODOCK: 5 | def __init__(self, num_shots): 6 | self.num_shots = num_shots 7 | 8 | def _register_datasets(self, ): 9 | self.base_train_data_instance = BaseTrainData() 10 | # self.fine_tune_data_instance = FineTuneData(num_shots=self.num_shots) 11 | 12 | self.base_train_data_instance._register_datasets() 13 | # self.fine_tune_data_instance._register_datasets() -------------------------------------------------------------------------------- /data/datasets/coco_note/base_training.py: -------------------------------------------------------------------------------- 1 | """ 2 | Register COCO base-training routine 3 | """ 4 | import copy 5 | import os 6 | import pickle 7 | from detectron2.data import DatasetCatalog, MetadataCatalog 8 | from detectron2.data.datasets.coco import load_coco_json 9 | from detectron2.data.datasets.register_coco import register_coco_instances 10 | from detectron2.data.datasets.builtin_meta import _get_builtin_metadata 11 | 12 | IDS_BASE_CLASSES = [0, 1, 2, 3, 4, 5, 6, 8, 14, 15, 16, 17, 18, 19, 39, 56, 57, 58, 60, 62] 13 | IDS_NOVEL_CLASSES = [e for e in range(80) if e not in IDS_BASE_CLASSES] 14 | 15 | 16 | class BaseTrainData: 17 | """ 18 | Registers data for base-training phase 19 | """ 20 | def __init__(self, ): 21 | self._register_coco_2017_if_not_present() 22 | self._fetch_coco_2017_data() 23 | 24 | def _register_coco_2017_if_not_present(self, ): 25 | if 'coco_2017_train_copy' not in DatasetCatalog.keys(): 26 | register_coco_instances( 27 | 'coco_2017_train_copy', 28 | _get_builtin_metadata('coco'), 29 | "/scratch/ssd001/home/skhandel/FewshotDetection/WSASOD/data/data_utils/data/MSCOCO2017/annotations/instances_train2017.json", 30 | "/scratch/ssd001/home/skhandel/FewshotDetection/WSASOD/data/data_utils/data/MSCOCO2017/images/train2017/" 31 | ) 32 | if 'coco_2017_val_copy' not in DatasetCatalog.keys(): 33 | register_coco_instances( 34 | 'coco_2017_val_copy', 35 | _get_builtin_metadata('coco'), 36 | "/scratch/ssd001/home/skhandel/FewshotDetection/WSASOD/data/data_utils/data/MSCOCO2017/annotations/instances_val2017.json", 37 | "/scratch/ssd001/home/skhandel/FewshotDetection/WSASOD/data/data_utils/data/MSCOCO2017/images/val2017/" 38 | ) 39 | 40 | def _fetch_coco_2017_data(self, ): 41 | self.data_train_coco_2017 = DatasetCatalog.get('coco_2017_train_copy') 42 | self.data_val_coco_2017 = DatasetCatalog.get('coco_2017_val_copy') 43 | self.meta_coco_2017 = MetadataCatalog.get('coco_2017_train_copy') 44 | 45 | def _load_datasets(self, ): 46 | # Query: Train 47 | self.base_training_query_train = self.get_query_set_base_training_train() 48 | 49 | # Support 50 | self.base_training_support = self.get_support_set_base_training() 51 | 52 | # Query: Validation 53 | self.base_training_query_val = self.get_query_set_base_training_val() 54 | 55 | def _register_datasets(self, ): 56 | # Query: Train 57 | DatasetCatalog.register( 58 | "coco_note_base_training_query_train", 59 | lambda: self.get_query_set_base_training_train() 60 | ) 61 | MetadataCatalog.get("coco_note_base_training_query_train").set( 62 | thing_classes=MetadataCatalog.get("coco_2017_train_copy").thing_classes, 63 | evaluator_type='coco') 64 | 65 | # Support: Train + Validation 66 | DatasetCatalog.register( 67 | "coco_note_base_training_support", 68 | lambda: self.get_support_set_base_training() 69 | ) 70 | MetadataCatalog.get("coco_note_base_training_support").set( 71 | thing_classes=MetadataCatalog.get("coco_2017_train_copy").thing_classes, 72 | evaluator_type='coco') 73 | 74 | # Query: Validation 75 | DatasetCatalog.register( 76 | "coco_note_base_training_query_val", 77 | # lambda: self.get_query_set_base_training_val(self.base_train_adaptor_instance) 78 | lambda: load_coco_json( 79 | "/scratch/ssd001/home/skhandel/FewshotDetection/WSASOD/data/data_utils/data/MSCOCO2017/annotations/instances_val2017.json", 80 | "/scratch/ssd001/home/skhandel/FewshotDetection/WSASOD/data/data_utils/data/MSCOCO2017/images/val2017/", 81 | "coco_note_base_training_query_val" 82 | ) 83 | ) 84 | MetadataCatalog.get("coco_note_base_training_query_val").set( 85 | # thing_classes=self.base_train_adaptor_instance.cfg.coco_classes, 86 | evaluator_type='coco', 87 | json_file="/scratch/ssd001/home/skhandel/FewshotDetection/WSASOD/data/data_utils/data/MSCOCO2017/annotations/instances_val2017.json" 88 | ) 89 | 90 | # DatasetCatalog.register( 91 | # "coco_note_base_training_query_val", 92 | # lambda: self.get_query_set_base_training_val() 93 | # ) 94 | # MetadataCatalog.get("coco_note_base_training_query_val").set( 95 | # thing_classes=MetadataCatalog.get("coco_2017_val_copy").thing_classes, 96 | # evaluator_type='coco') 97 | 98 | 99 | def get_query_set_base_training_train(self, ): 100 | """ 101 | Query: Train 102 | """ 103 | str_keywords = "query_set_base_training_train" 104 | dataset_dicts = fetch_object_if_tmp_stored(str_keywords) 105 | 106 | if dataset_dicts is not None: 107 | return dataset_dicts 108 | else: 109 | dataset_dicts = [] 110 | 111 | for e in self.data_train_coco_2017: 112 | record = copy.deepcopy(e) 113 | # filter annotations 114 | annotations_filtered = [] 115 | for ann in record['annotations']: 116 | if ann['category_id'] in IDS_BASE_CLASSES: 117 | annotations_filtered.append(ann) 118 | 119 | record['annotations'] = annotations_filtered 120 | 121 | if len(annotations_filtered) > 0: 122 | dataset_dicts.append(record) 123 | 124 | store_object_in_tmp(str_keywords, dataset_dicts) 125 | return dataset_dicts 126 | 127 | def get_query_set_base_training_val(self, ): 128 | """ 129 | Query: Val 130 | """ 131 | str_keywords = "query_set_base_training_val" 132 | dataset_dicts = fetch_object_if_tmp_stored(str_keywords) 133 | 134 | if dataset_dicts is not None: 135 | return dataset_dicts 136 | else: 137 | dataset_dicts = [] 138 | 139 | for e in self.data_val_coco_2017: 140 | record = copy.deepcopy(e) 141 | # filter annotations 142 | annotations_filtered = [] 143 | for ann in record['annotations']: 144 | if ann['category_id'] in IDS_BASE_CLASSES: 145 | annotations_filtered.append(ann) 146 | 147 | record['annotations'] = annotations_filtered 148 | 149 | if len(annotations_filtered) > 0: 150 | dataset_dicts.append(record) 151 | 152 | store_object_in_tmp(str_keywords, dataset_dicts) 153 | return dataset_dicts 154 | 155 | def get_support_set_base_training(self, ): 156 | """ 157 | All samples in the class-based bins have at least one of their respective class's bbox 158 | """ 159 | str_keywords = "support_set_base_training" 160 | dataset_dicts_label_wise = fetch_object_if_tmp_stored(str_keywords) 161 | 162 | if dataset_dicts_label_wise is not None: 163 | return dataset_dicts_label_wise 164 | else: 165 | dataset_dicts_label_wise = {e: [] for e in IDS_BASE_CLASSES} 166 | 167 | for e in self.data_train_coco_2017: 168 | for ann in e['annotations']: 169 | if ann['iscrowd'] != 0: 170 | continue 171 | if ann['category_id'] not in IDS_BASE_CLASSES: 172 | continue 173 | 174 | record = copy.deepcopy(e) 175 | 176 | # filter annotations 177 | annotations_filtered = [ann] 178 | record['annotations'] = annotations_filtered 179 | 180 | dataset_dicts_label_wise[ann['category_id']].append(record) 181 | 182 | store_object_in_tmp(str_keywords, dataset_dicts_label_wise) 183 | return dataset_dicts_label_wise 184 | 185 | 186 | def fetch_object_if_tmp_stored(str_keywords): 187 | str_file_name = "tmp/{}.pkl".format(str_keywords) 188 | 189 | object_loaded = None 190 | if os.path.isfile(str_file_name): 191 | with open(str_file_name, 'rb') as f: 192 | object_loaded = pickle.load(f) 193 | return object_loaded 194 | 195 | 196 | def store_object_in_tmp(str_keywords, object_to_store): 197 | str_file_name = "tmp/{}.pkl".format(str_keywords) 198 | 199 | os.makedirs('tmp', exist_ok=True) 200 | with open(str_file_name, 'wb') as f: 201 | pickle.dump(object_to_store, f) 202 | -------------------------------------------------------------------------------- /data/datasets/coco_note/register_coco_note.py: -------------------------------------------------------------------------------- 1 | from .base_training import BaseTrainData 2 | from .fine_tuning import FineTuneData 3 | 4 | class RegisterCOCONote: 5 | def __init__(self, num_shots): 6 | self.num_shots = num_shots 7 | 8 | def _register_datasets(self, ): 9 | self.base_train_data_instance = BaseTrainData() 10 | self.fine_tune_data_instance = FineTuneData(num_shots=self.num_shots) 11 | 12 | self.base_train_data_instance._register_datasets() 13 | self.fine_tune_data_instance._register_datasets() -------------------------------------------------------------------------------- /data/datasets/voc/register_voc.py: -------------------------------------------------------------------------------- 1 | from .base_training import BaseTrainData 2 | from .fine_tuning import FineTuneData 3 | 4 | 5 | class RegisterVOC: 6 | def __init__(self, split_id, num_shots, filter_07_base=False): 7 | self.split_id = split_id 8 | self.num_shots = num_shots 9 | self.filter_07_base = filter_07_base 10 | 11 | def _register_datasets(self, ): 12 | self.base_train_data_instance = BaseTrainData( 13 | novel_id=self.split_id - 1, 14 | filter_07_base=self.filter_07_base 15 | ) 16 | self.fine_tune_data_instance = FineTuneData( 17 | novel_id=self.split_id - 1, 18 | num_shots=self.num_shots 19 | ) 20 | 21 | self.base_train_data_instance._register_datasets() 22 | self.fine_tune_data_instance._register_datasets() -------------------------------------------------------------------------------- /data/embeddings/glove_mean: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ubc-vision/UniT/87ace8710ea7e84f104163a9adc3d3fed24fdcda/data/embeddings/glove_mean -------------------------------------------------------------------------------- /data/pipelines_adaptor/coco/base_training.py: -------------------------------------------------------------------------------- 1 | 2 | # FSOD-SPECIFIC IMPORTS 3 | from ...data_utils import parse_cfg, cfg, listDataset, MetaDataset, build_dataset 4 | 5 | # GENERAL IMPORTS 6 | import yaml 7 | 8 | from torchvision import transforms 9 | 10 | 11 | class BaseTrainFSODAdaptor: 12 | def __init__(self, novel_id=3, 13 | path_config='config_base_training.yaml', 14 | gpus=[0,], num_workers=2, 15 | batch_size_query=5, batch_size_support=5, 16 | width_query=416, height_query=416, 17 | width_support=416, height_support=416): 18 | # user args 19 | self.novel_id = novel_id 20 | assert(self.novel_id == 3) 21 | 22 | # defaults 23 | self.gpus = gpus 24 | self.num_workers = num_workers 25 | 26 | # not used in any downstream usage 27 | self.width_query = width_query 28 | self.height_query = height_query 29 | 30 | # not used in any downstream usage 31 | self.width_support = width_support 32 | self.height_support = height_support 33 | 34 | # not used in any downstream usage 35 | self.batch_size_query = batch_size_query 36 | self.batch_size_support = batch_size_support 37 | 38 | # read config options 39 | self.path_config = path_config 40 | self.data_options = self._read_data_options_config(self.path_config) 41 | # pprint(self.data_options) 42 | 43 | # override them with user args 44 | self.data_options['novelid'] = self.novel_id 45 | 46 | # config query 47 | cfg.config_data(self.data_options) 48 | 49 | # config support 50 | cfg['batch_size'] = self.batch_size_query 51 | cfg.config_meta({ 52 | 'feat_layer': 0, 53 | 'channels': 4, 54 | 'width': self.width_support, 55 | 'height': self.height_support 56 | }) 57 | 58 | # to make cfg accessible to this class's instances. Kinda sad how the 59 | # original code is written! 60 | self.cfg = cfg 61 | 62 | # TRAIN 63 | query_set_filtered = self._pre_filter_query_list(self.data_options) 64 | self.query_dataset_train = listDataset( 65 | query_set_filtered, 66 | shape=(self.width_query, self.height_query), 67 | shuffle=False, 68 | transform=transforms.Compose([transforms.ToTensor(),]), 69 | train=True, 70 | seen=0, 71 | batch_size=self.batch_size_query, # this is not doing anything! 72 | # `num_workers` as an argument isn't doing anything except updating 73 | # `seen` that controls width for data aug 74 | num_workers=self.num_workers 75 | ) 76 | # this works both for train and validation case 77 | self.support_set = MetaDataset( 78 | self.data_options['meta'], train=True) 79 | 80 | # VALIDATION 81 | self.query_dataset_val = listDataset( 82 | self.data_options['valid'], 83 | shape=(self.width_query, self.height_query), 84 | shuffle=False, 85 | transform=transforms.Compose([transforms.ToTensor(),]) 86 | ) 87 | 88 | def _pre_filter_query_list(self, data_options): 89 | return build_dataset(data_options) 90 | 91 | def _read_data_options_config(self, path): 92 | with open(path, 'r') as f: 93 | data_options = yaml.safe_load(f) 94 | 95 | data_options['gpus'] = ','.join([str(e) for e in self.gpus]) 96 | data_options['neg'] = str(data_options['neg']) 97 | return data_options 98 | 99 | 100 | if __name__ == "__main__": 101 | base_pipe_instance = BaseTrainFSODAdaptor() -------------------------------------------------------------------------------- /data/pipelines_adaptor/coco/config_base_training.yaml: -------------------------------------------------------------------------------- 1 | # Adapted from: https://github.com/bingykang/Fewshot_Detection/blob/master/cfg/metayolo.data 2 | # paths roots are changed 3 | 4 | metayolo: 1 5 | metain_type: 2 6 | data: coco 7 | neg: 1 8 | rand: 0 9 | novel: /h/skhandel/FewshotDetection/WSASOD/data/data_utils/data/coco_novels.txt 10 | novelid: 3 11 | # scale: 0 # commented out originally 12 | meta: /h/skhandel/FewshotDetection/WSASOD/data/data_utils/data/coco_traindict_full.txt 13 | train: /h/skhandel/FewshotDetection/WSASOD/data/data_utils/data/MSCOCO/trainvalno5k.txt 14 | valid: /h/skhandel/FewshotDetection/WSASOD/data/data_utils/data/MSCOCO/5k.txt 15 | backup: tmp/metayolo 16 | gpus: 0 17 | num_workers: 2 -------------------------------------------------------------------------------- /data/pipelines_adaptor/coco/config_fine_tuning.yaml: -------------------------------------------------------------------------------- 1 | # Adapted from: https://github.com/bingykang/Fewshot_Detection/blob/master/cfg/metatune.data 2 | # paths roots are changed 3 | 4 | 5 | metayolo: 1 6 | metain_type: 2 7 | data: coco 8 | tuning: 1 9 | neg: 0 10 | rand: 0 11 | novel: /h/skhandel/FewshotDetection/WSASOD/data/data_utils/data/coco_novels.txt 12 | novelid: 3 13 | max_epoch: 2000 14 | repeat: 1 # 200 --> 1 (changed to 1 since we are batching in detectron now!) 15 | dynamic: 0 16 | scale: 1 17 | train: /h/skhandel/FewshotDetection/WSASOD/data/data_utils/data/MSCOCO/trainvalno5k.txt 18 | meta: /h/skhandel/FewshotDetection/WSASOD/data/data_utils/data/coco_full_10shot.txt 19 | valid: /h/skhandel/FewshotDetection/WSASOD/data/data_utils/data/MSCOCO/5k.txt 20 | backup: tmp/metatunetest1 21 | gpus: 0 22 | num_workers: 2 -------------------------------------------------------------------------------- /data/pipelines_adaptor/coco/fine_tuning.py: -------------------------------------------------------------------------------- 1 | # FSOD-SPECIFIC IMPORTS 2 | from ...data_utils import parse_cfg, cfg, listDataset, MetaDataset, build_dataset 3 | 4 | # GENERAL IMPORTS 5 | import yaml 6 | 7 | from torchvision import transforms 8 | 9 | 10 | class FineTuneFSODAdaptor: 11 | def __init__(self, novel_id=3, num_shots=10, 12 | path_config='config_fine_tuning.yaml', 13 | gpus=[0,], num_workers=2, 14 | batch_size_query=5, batch_size_support=5, 15 | width_query=416, height_query=416, 16 | width_support=416, height_support=416): 17 | # user args 18 | self.novel_id = novel_id 19 | self.num_shots = num_shots 20 | 21 | assert(self.novel_id == 3) 22 | assert(self.num_shots in [10, 30]) 23 | 24 | # defaults 25 | self.gpus = gpus 26 | self.num_workers = num_workers 27 | 28 | # not used in any downstream usage 29 | self.width_query = width_query 30 | self.height_query = height_query 31 | 32 | # not used in any downstream usage 33 | self.width_support = width_support 34 | self.height_support = height_support 35 | 36 | # not used in any downstream usage 37 | self.batch_size_query = batch_size_query 38 | self.batch_size_support = batch_size_support 39 | 40 | # read config options 41 | self.path_config = path_config 42 | self.data_options = self._read_data_options_config(self.path_config) 43 | 44 | # override them with user args 45 | self.data_options['novelid'] = self.novel_id 46 | self.data_options['meta'] = \ 47 | self.data_options['meta'].replace('10shot', '{}shot'.format(self.num_shots)) 48 | 49 | # config query 50 | cfg.config_data(self.data_options) 51 | 52 | # config support 53 | cfg['batch_size'] = self.batch_size_query 54 | cfg.config_meta({ 55 | 'feat_layer': 0, 56 | 'channels': 4, 57 | 'width': self.width_support, 58 | 'height': self.height_support 59 | }) 60 | 61 | # to make cfg accessible to this class's instances. Kinda sad how the 62 | # original code is written! 63 | self.cfg = cfg 64 | 65 | # TRAIN 66 | query_set_filtered = self._pre_filter_query_list(self.data_options) 67 | self.query_dataset_train = listDataset( 68 | query_set_filtered, 69 | shape=(self.width_query, self.height_query), 70 | shuffle=False, 71 | transform=transforms.Compose([transforms.ToTensor(),]), 72 | train=True, 73 | seen=0, 74 | batch_size=self.batch_size_query, # this is not doing anything! 75 | # `num_workers` as an argument isn't doing anything except updating 76 | # `seen` that controls width for data aug 77 | num_workers=self.num_workers 78 | ) 79 | # this works both for train and validation case 80 | self.support_set = MetaDataset( 81 | self.data_options['meta'], train=True) 82 | 83 | # VALIDATION 84 | self.query_dataset_val = listDataset( 85 | self.data_options['valid'], 86 | shape=(self.width_query, self.height_query), 87 | shuffle=False, 88 | transform=transforms.Compose([transforms.ToTensor(),]) 89 | ) 90 | 91 | def _pre_filter_query_list(self, data_options): 92 | return build_dataset(data_options) 93 | 94 | def _read_data_options_config(self, path): 95 | with open(path, 'r') as f: 96 | data_options = yaml.safe_load(f) 97 | 98 | data_options['gpus'] = ','.join([str(e) for e in self.gpus]) 99 | data_options['neg'] = str(data_options['neg']) 100 | return data_options 101 | 102 | 103 | if __name__ == "__main__": 104 | base_pipe_instance = FineTuneFSODAdaptor(novel_id=1, num_shots=10) -------------------------------------------------------------------------------- /data/pipelines_adaptor/voc/base_training.py: -------------------------------------------------------------------------------- 1 | 2 | # FSOD-SPECIFIC IMPORTS 3 | from ...data_utils import parse_cfg, cfg, listDataset, MetaDataset, build_dataset 4 | 5 | # GENERAL IMPORTS 6 | import yaml 7 | import os 8 | from torchvision import transforms 9 | 10 | 11 | class BaseTrainFSODAdaptor: 12 | def __init__(self, novel_id=0, 13 | path_config='config_base_training.yaml', 14 | gpus=[0,], num_workers=2, 15 | batch_size_query=5, batch_size_support=5, 16 | width_query=416, height_query=416, 17 | width_support=416, height_support=416): 18 | # user args 19 | self.novel_id = novel_id 20 | assert(self.novel_id in [0, 1, 2]) 21 | 22 | # defaults 23 | self.gpus = gpus 24 | self.num_workers = num_workers 25 | 26 | # not used in any downstream usage 27 | self.width_query = width_query 28 | self.height_query = height_query 29 | 30 | # not used in any downstream usage 31 | self.width_support = width_support 32 | self.height_support = height_support 33 | 34 | # not used in any downstream usage 35 | self.batch_size_query = batch_size_query 36 | self.batch_size_support = batch_size_support 37 | 38 | # read config options 39 | self.path_config = path_config 40 | self.data_options = self._read_data_options_config(self.path_config) 41 | # pprint(self.data_options) 42 | 43 | # override them with user args 44 | self.data_options['novelid'] = self.novel_id 45 | 46 | # config query 47 | cfg.config_data(self.data_options) 48 | 49 | # config support 50 | cfg['batch_size'] = self.batch_size_query 51 | cfg.config_meta({ 52 | 'feat_layer': 0, 53 | 'channels': 4, 54 | 'width': self.width_support, 55 | 'height': self.height_support 56 | }) 57 | 58 | # to make cfg accessible to this class's instances. Kinda sad how the 59 | # original code is written! 60 | self.cfg = cfg 61 | 62 | # TRAIN 63 | query_set_filtered = self._pre_filter_query_list(self.data_options) 64 | self.query_dataset_train = listDataset( 65 | query_set_filtered, 66 | shape=(self.width_query, self.height_query), 67 | shuffle=False, 68 | transform=transforms.Compose([transforms.ToTensor(),]), 69 | train=True, 70 | seen=0, 71 | batch_size=self.batch_size_query, # this is not doing anything! 72 | # `num_workers` as an argument isn't doing anything except updating 73 | # `seen` that controls width for data aug 74 | num_workers=self.num_workers 75 | ) 76 | # this works both for train and validation case 77 | self.support_set = MetaDataset( 78 | self.data_options['meta'], train=True) 79 | 80 | # VALIDATION 81 | self.query_dataset_val = listDataset( 82 | self.data_options['valid'], 83 | shape=(self.width_query, self.height_query), 84 | shuffle=False, 85 | transform=transforms.Compose([transforms.ToTensor(),]) 86 | ) 87 | 88 | def _pre_filter_query_list(self, data_options): 89 | return build_dataset(data_options) 90 | 91 | def _read_data_options_config(self, path): 92 | with open(path, 'r') as f: 93 | data_options = yaml.safe_load(f) 94 | 95 | data_options['gpus'] = ','.join([str(e) for e in self.gpus]) 96 | data_options['neg'] = str(data_options['neg']) 97 | return data_options 98 | 99 | 100 | if __name__ == "__main__": 101 | base_pipe_instance = BaseTrainFSODAdaptor(novel_id=2) -------------------------------------------------------------------------------- /data/pipelines_adaptor/voc/config_base_training.yaml: -------------------------------------------------------------------------------- 1 | backup: tmp/metayolo 2 | code_root: /cpu008/skhandel/UniT/ 3 | data: voc 4 | data_root: /cpu008/skhandel/UniT/data/VOC/ 5 | gpus: 0 6 | meta: /cpu008/skhandel/UniT/data/VOC/voc_traindict_full.txt 7 | metain_type: 2 8 | metayolo: 1 9 | neg: 1 10 | novel: /cpu008/skhandel/UniT/data/VOC/voc_novels.txt 11 | novelid: 0 12 | num_workers: 2 13 | rand: 0 14 | train: /cpu008/skhandel/UniT/data/VOC/voc_train.txt 15 | valid: /cpu008/skhandel/UniT/data/VOC/2007_test.txt 16 | -------------------------------------------------------------------------------- /data/pipelines_adaptor/voc/config_fine_tuning.yaml: -------------------------------------------------------------------------------- 1 | backup: tmp/metatunetest1 2 | code_root: /cpu008/skhandel/UniT/ 3 | data: voc 4 | data_root: /cpu008/skhandel/UniT/data/VOC/ 5 | dynamic: 0 6 | gpus: 0 7 | max_epoch: 2000 8 | meta: /cpu008/skhandel/UniT/data/VOC/voc_traindict_bbox_5shot.txt 9 | metain_type: 2 10 | metayolo: 1 11 | neg: 0 12 | novel: /cpu008/skhandel/UniT/data/VOC/voc_novels.txt 13 | novelid: 0 14 | num_workers: 2 15 | rand: 0 16 | repeat: 1 17 | scale: 1 18 | train: /cpu008/skhandel/UniT/data/VOC/voc_train.txt 19 | tuning: 1 20 | valid: /cpu008/skhandel/UniT/data/VOC/2007_test.txt 21 | -------------------------------------------------------------------------------- /data/pipelines_adaptor/voc/fine_tuning.py: -------------------------------------------------------------------------------- 1 | 2 | # FSOD-SPECIFIC IMPORTS 3 | from ...data_utils import parse_cfg, cfg, listDataset, MetaDataset, build_dataset 4 | 5 | # GENERAL IMPORTS 6 | import yaml 7 | 8 | from torchvision import transforms 9 | 10 | 11 | class FineTuneFSODAdaptor: 12 | def __init__(self, novel_id=0, num_shots=5, 13 | path_config='config_fine_tuning.yaml', 14 | gpus=[0,], num_workers=2, 15 | batch_size_query=5, batch_size_support=5, 16 | width_query=416, height_query=416, 17 | width_support=416, height_support=416): 18 | # user args 19 | self.novel_id = novel_id 20 | self.num_shots = num_shots 21 | 22 | assert(self.novel_id in [0, 1, 2]) 23 | assert(self.num_shots in [1, 2, 3, 5, 10]) 24 | 25 | # defaults 26 | self.gpus = gpus 27 | self.num_workers = num_workers 28 | 29 | # not used in any downstream usage 30 | self.width_query = width_query 31 | self.height_query = height_query 32 | 33 | # not used in any downstream usage 34 | self.width_support = width_support 35 | self.height_support = height_support 36 | 37 | # not used in any downstream usage 38 | self.batch_size_query = batch_size_query 39 | self.batch_size_support = batch_size_support 40 | 41 | # read config options 42 | self.path_config = path_config 43 | self.data_options = self._read_data_options_config(self.path_config) 44 | 45 | # override them with user args 46 | self.data_options['novelid'] = self.novel_id 47 | self.data_options['meta'] = \ 48 | self.data_options['meta'].replace('5shot', '{}shot'.format(self.num_shots)) 49 | 50 | # config query 51 | cfg.config_data(self.data_options) 52 | 53 | # config support 54 | cfg['batch_size'] = self.batch_size_query 55 | cfg.config_meta({ 56 | 'feat_layer': 0, 57 | 'channels': 4, 58 | 'width': self.width_support, 59 | 'height': self.height_support 60 | }) 61 | 62 | # to make cfg accessible to this class's instances. Kinda sad how the 63 | # original code is written! 64 | self.cfg = cfg 65 | 66 | # TRAIN 67 | query_set_filtered = self._pre_filter_query_list(self.data_options) 68 | self.query_dataset_train = listDataset( 69 | query_set_filtered, 70 | shape=(self.width_query, self.height_query), 71 | shuffle=False, 72 | transform=transforms.Compose([transforms.ToTensor(),]), 73 | train=True, 74 | seen=0, 75 | batch_size=self.batch_size_query, # this is not doing anything! 76 | # `num_workers` as an argument isn't doing anything except updating 77 | # `seen` that controls width for data aug 78 | num_workers=self.num_workers 79 | ) 80 | # this works both for train and validation case 81 | self.support_set = MetaDataset( 82 | self.data_options['meta'], train=True) 83 | 84 | # VALIDATION 85 | self.query_dataset_val = listDataset( 86 | self.data_options['valid'], 87 | shape=(self.width_query, self.height_query), 88 | shuffle=False, 89 | transform=transforms.Compose([transforms.ToTensor(),]) 90 | ) 91 | 92 | def _pre_filter_query_list(self, data_options): 93 | return build_dataset(data_options) 94 | 95 | def _read_data_options_config(self, path): 96 | with open(path, 'r') as f: 97 | data_options = yaml.safe_load(f) 98 | 99 | data_options['gpus'] = ','.join([str(e) for e in self.gpus]) 100 | data_options['neg'] = str(data_options['neg']) 101 | return data_options 102 | 103 | 104 | if __name__ == "__main__": 105 | base_pipe_instance = FineTuneFSODAdaptor(novel_id=1, num_shots=11) -------------------------------------------------------------------------------- /data/prepare_coco.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import argparse 4 | import logging 5 | 6 | parser = argparse.ArgumentParser() 7 | parser.add_argument('--DATA-ROOT', required=True, help='Directory to store COCO data') 8 | 9 | logging.basicConfig() 10 | logging.root.setLevel(logging.NOTSET) 11 | logging.basicConfig(level=logging.NOTSET) 12 | logger = logging.getLogger("COCO-Setup") 13 | 14 | def download_data(args): 15 | logger.info("Downloading Data") 16 | os.system('mkdir {}'.format(args.DATA_ROOT)) 17 | os.system('wget -P {} https://pjreddie.com/media/files/VOCtrainval_11-May-2012.tar'.format(args.DATA_ROOT)) 18 | os.system('wget -P {} https://pjreddie.com/media/files/VOCtrainval_06-Nov-2007.tar'.format(args.DATA_ROOT)) 19 | os.system('wget -P {} https://pjreddie.com/media/files/VOCtest_06-Nov-2007.tar'.format(args.DATA_ROOT)) 20 | 21 | logger.info("Extracting Data") 22 | os.system('tar -xf {}/VOCtrainval_11-May-2012.tar -C {}'.format(args.DATA_ROOT, args.DATA_ROOT)) 23 | os.system('tar -xf {}/VOCtrainval_06-Nov-2007.tar -C {}'.format(args.DATA_ROOT, args.DATA_ROOT)) 24 | os.system('tar -xf {}/VOCtest_06-Nov-2007.tar -C {}'.format(args.DATA_ROOT, args.DATA_ROOT)) 25 | 26 | logger.info("Data Extracted") 27 | os.system('rm -f {}/VOCtrainval_11-May-2012.tar'.format(args.DATA_ROOT)) 28 | os.system('rm -f {}/VOCtrainval_06-Nov-2007.tar'.format(args.DATA_ROOT)) 29 | os.system('rm -f {}/VOCtest_06-Nov-2007.tar'.format(args.DATA_ROOT)) 30 | -------------------------------------------------------------------------------- /data/utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import time 4 | import json 5 | import torch 6 | import logging 7 | import datetime 8 | import tempfile 9 | import detectron2.utils.comm as comm 10 | import numpy as np 11 | from collections import defaultdict, OrderedDict 12 | from detectron2.data import MetadataCatalog 13 | from detectron2.evaluation import ( 14 | COCOEvaluator, 15 | COCOPanopticEvaluator, 16 | DatasetEvaluators, 17 | LVISEvaluator, 18 | PascalVOCDetectionEvaluator, 19 | SemSegEvaluator 20 | ) 21 | 22 | from detectron2.evaluation.pascal_voc_evaluation import voc_eval 23 | from detectron2.evaluation import inference_context 24 | from detectron2.utils.logger import log_every_n_seconds 25 | from .datasets.voc.register_voc import RegisterVOC 26 | from .datasets.coco.register_coco import RegisterCOCO 27 | from .datasets.coco_note.register_coco_note import RegisterCOCONote 28 | from .datasets.coco_dock.register_coco_dock import RegisterCOCODOCK 29 | from .evaluators import PascalVOCEvaluator, PascalVOCDetectionWeakEvaluator, COCOEvaluatorWeakEvaluator 30 | 31 | 32 | def register_datasets(args_data): 33 | if args_data.DATASETS.FEWSHOT.TYPE == 'VOC': 34 | register_voc_instance = RegisterVOC( 35 | args_data.DATASETS.FEWSHOT.SPLIT_ID, args_data.DATASETS.FEWSHOT.NUM_SHOTS) 36 | register_voc_instance._register_datasets() 37 | elif args_data.DATASETS.FEWSHOT.TYPE == 'COCO': 38 | register_voc_instance = RegisterCOCO(4, args_data.DATASETS.FEWSHOT.NUM_SHOTS) 39 | register_voc_instance._register_datasets() 40 | elif args_data.DATASETS.FEWSHOT.TYPE == 'VOC2007': 41 | register_voc_instance = RegisterVOC( 42 | args_data.DATASETS.FEWSHOT.SPLIT_ID, args_data.DATASETS.FEWSHOT.NUM_SHOTS, filter_07_base=True) 43 | register_voc_instance._register_datasets() 44 | elif args_data.DATASETS.FEWSHOT.TYPE == 'COCO_NOTE': 45 | register_voc_instance = RegisterCOCONote(args_data.DATASETS.FEWSHOT.NUM_SHOTS) 46 | register_voc_instance._register_datasets() 47 | elif args_data.DATASETS.FEWSHOT.TYPE == 'COCO_DOCK': 48 | register_voc_instance = RegisterCOCODOCK(args_data.DATASETS.FEWSHOT.NUM_SHOTS) 49 | register_voc_instance._register_datasets() 50 | else: 51 | return ValueError("Dataset: {} not recognized".format(args_data['type'])) 52 | 53 | def get_evaluator(cfg, dataset_name, output_folder=None): 54 | """ 55 | Create evaluator(s) for a given dataset. 56 | This uses the special metadata "evaluator_type" associated with each builtin dataset. 57 | For your own dataset, you can simply create an evaluator manually in your 58 | script and do not have to worry about the hacky if-else logic here. 59 | """ 60 | if output_folder is None: 61 | output_folder = os.path.join(cfg.OUTPUT_DIR, "inference") 62 | evaluator_list = [] 63 | evaluator_type = MetadataCatalog.get(dataset_name).evaluator_type 64 | if evaluator_type in ["sem_seg", "coco_panoptic_seg"]: 65 | evaluator_list.append( 66 | SemSegEvaluator( 67 | dataset_name, 68 | distributed=True, 69 | num_classes=cfg.MODEL.SEM_SEG_HEAD.NUM_CLASSES, 70 | ignore_label=cfg.MODEL.SEM_SEG_HEAD.IGNORE_VALUE, 71 | output_dir=output_folder, 72 | ) 73 | ) 74 | if evaluator_type in ["coco", "coco_panoptic_seg"]: 75 | print (dataset_name) 76 | evaluator_list.append(COCOEvaluatorWeakEvaluator(dataset_name, cfg, True, output_folder)) 77 | if evaluator_type == "coco_panoptic_seg": 78 | evaluator_list.append(COCOPanopticEvaluator(dataset_name, output_folder)) 79 | if evaluator_type == "cityscapes": 80 | assert ( 81 | torch.cuda.device_count() >= comm.get_rank() 82 | ), "CityscapesEvaluator currently do not work with multiple machines." 83 | return CityscapesEvaluator(dataset_name) 84 | if evaluator_type == "pascal_voc": 85 | return PascalVOCDetectionWeakEvaluator(dataset_name, cfg=cfg) 86 | if evaluator_type == "lvis": 87 | return LVISEvaluator(dataset_name, cfg, True, output_folder) 88 | if len(evaluator_list) == 0: 89 | raise NotImplementedError( 90 | "no Evaluator for the dataset {} with the type {}".format(dataset_name, evaluator_type) 91 | ) 92 | if len(evaluator_list) == 1: 93 | return evaluator_list[0] 94 | return DatasetEvaluators(evaluator_list) 95 | 96 | 97 | 98 | def inference_on_dataset_meta(model, data_loader, att_vecs_support, evaluator): 99 | """ 100 | Run model on the data_loader and evaluate the metrics with evaluator. 101 | The model will be used in eval mode. 102 | Args: 103 | model (nn.Module): a module which accepts an object from 104 | `data_loader` and returns some outputs. It will be temporarily set to `eval` mode. 105 | If you wish to evaluate a model in `training` mode instead, you can 106 | wrap the given model and override its behavior of `.eval()` and `.train()`. 107 | data_loader: an iterable object with a length. 108 | The elements it generates will be the inputs to the model. 109 | evaluator (DatasetEvaluator): the evaluator to run. Use 110 | :class:`DatasetEvaluators([])` if you only want to benchmark, but 111 | don't want to do any evaluation. 112 | Returns: 113 | The return value of `evaluator.evaluate()` 114 | """ 115 | num_devices = torch.distributed.get_world_size() if torch.distributed.is_initialized() else 1 116 | logger = logging.getLogger(__name__) 117 | logger.info("Start inference on {} images".format(len(data_loader))) 118 | 119 | total = len(data_loader) # inference data loader must have a fixed length 120 | evaluator.reset() 121 | 122 | num_warmup = min(5, total - 1) 123 | start_time = time.perf_counter() 124 | total_compute_time = 0 125 | with inference_context(model), torch.no_grad(): 126 | for idx, inputs in enumerate(data_loader): 127 | if idx == num_warmup: 128 | start_time = time.perf_counter() 129 | total_compute_time = 0 130 | 131 | start_compute_time = time.perf_counter() 132 | outputs = model(inputs, att_vecs_support) 133 | if torch.cuda.is_available(): 134 | torch.cuda.synchronize() 135 | total_compute_time += time.perf_counter() - start_compute_time 136 | evaluator.process(inputs, outputs) 137 | 138 | iters_after_start = idx + 1 - num_warmup * int(idx >= num_warmup) 139 | seconds_per_img = total_compute_time / iters_after_start 140 | if idx >= num_warmup * 2 or seconds_per_img > 5: 141 | total_seconds_per_img = (time.perf_counter() - start_time) / iters_after_start 142 | eta = datetime.timedelta(seconds=int(total_seconds_per_img * (total - idx - 1))) 143 | log_every_n_seconds( 144 | logging.INFO, 145 | "Inference done {}/{}. {:.4f} s / img. ETA={}".format( 146 | idx + 1, total, seconds_per_img, str(eta) 147 | ), 148 | n=5, 149 | ) 150 | 151 | # Measure the time only for this worker (before the synchronization barrier) 152 | total_time = time.perf_counter() - start_time 153 | total_time_str = str(datetime.timedelta(seconds=total_time)) 154 | # NOTE this format is parsed by grep 155 | logger.info( 156 | "Total inference time: {} ({:.6f} s / img per device, on {} devices)".format( 157 | total_time_str, total_time / (total - num_warmup), num_devices 158 | ) 159 | ) 160 | total_compute_time_str = str(datetime.timedelta(seconds=int(total_compute_time))) 161 | logger.info( 162 | "Total inference pure compute time: {} ({:.6f} s / img per device, on {} devices)".format( 163 | total_compute_time_str, total_compute_time / (total - num_warmup), num_devices 164 | ) 165 | ) 166 | 167 | results = evaluator.evaluate() 168 | # An evaluator may return None when not in main process. 169 | # Replace it by an empty dict instead to make it easier for downstream code to handle 170 | if results is None: 171 | results = {} 172 | return results 173 | 174 | 175 | def include_dependencies(path_json): 176 | with open(path_json, 'r') as fp: 177 | f = json.load(fp) 178 | for dependency in f['dependencies']: 179 | print(" > Including dependency: {}".format(dependency)) 180 | sys.path.append(dependency) -------------------------------------------------------------------------------- /engine/__init__.py: -------------------------------------------------------------------------------- 1 | from .defaults import * -------------------------------------------------------------------------------- /evalutation/__init__.py: -------------------------------------------------------------------------------- 1 | from .evaulator import inference_on_dataset -------------------------------------------------------------------------------- /evalutation/evaulator.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import logging 3 | import time 4 | from collections import OrderedDict 5 | from contextlib import contextmanager 6 | import torch 7 | import multiprocessing as mp 8 | import numpy as np 9 | import os 10 | from pathlib import Path 11 | import tempfile 12 | import xml.etree.ElementTree as ET 13 | from collections import OrderedDict, defaultdict 14 | from functools import lru_cache 15 | from detectron2.utils.comm import get_world_size, is_main_process 16 | from detectron2.utils.logger import log_every_n_seconds 17 | from detectron2.evaluation.evaluator import inference_context 18 | from detectron2.data import MetadataCatalog 19 | from detectron2.utils import comm 20 | from detectron2.evaluation.evaluator import DatasetEvaluator 21 | 22 | def inference_on_dataset(model, data_loader, evaluator, meta_data_loader=None, meta_attention=None): 23 | """ 24 | Run model on the data_loader and evaluate the metrics with evaluator. 25 | Also benchmark the inference speed of `model.forward` accurately. 26 | The model will be used in eval mode. 27 | Args: 28 | model (nn.Module): a module which accepts an object from 29 | `data_loader` and returns some outputs. It will be temporarily set to `eval` mode. 30 | If you wish to evaluate a model in `training` mode instead, you can 31 | wrap the given model and override its behavior of `.eval()` and `.train()`. 32 | data_loader: an iterable object with a length. 33 | The elements it generates will be the inputs to the model. 34 | evaluator (DatasetEvaluator): the evaluator to run. Use `None` if you only want 35 | to benchmark, but don't want to do any evaluation. 36 | Returns: 37 | The return value of `evaluator.evaluate()` 38 | """ 39 | num_devices = get_world_size() 40 | logger = logging.getLogger('detectron2') 41 | logger.info("Start inference on {} images".format(len(data_loader))) 42 | 43 | total = len(data_loader) # inference data loader must have a fixed length 44 | if evaluator is None: 45 | # create a no-op evaluator 46 | evaluator = DatasetEvaluators([]) 47 | evaluator.reset() 48 | 49 | num_warmup = min(5, total - 1) 50 | start_time = time.perf_counter() 51 | total_compute_time = 0 52 | with inference_context(model), torch.no_grad(): 53 | for idx, inputs in enumerate(data_loader): 54 | # Get Attention Vectors 55 | if meta_attention is None: 56 | if meta_data_loader is not None: 57 | data = next(iter(meta_data_loader)) 58 | meta_attention = model(None, meta_data=data, return_attention=True) 59 | if idx == num_warmup: 60 | start_time = time.perf_counter() 61 | total_compute_time = 0 62 | 63 | start_compute_time = time.perf_counter() 64 | outputs = model(inputs, meta_attention=meta_attention) 65 | if torch.cuda.is_available(): 66 | torch.cuda.synchronize() 67 | total_compute_time += time.perf_counter() - start_compute_time 68 | evaluator.process(inputs, outputs) 69 | 70 | iters_after_start = idx + 1 - num_warmup * int(idx >= num_warmup) 71 | seconds_per_img = total_compute_time / iters_after_start 72 | if idx >= num_warmup * 2 or seconds_per_img > 5: 73 | total_seconds_per_img = (time.perf_counter() - start_time) / iters_after_start 74 | eta = datetime.timedelta(seconds=int(total_seconds_per_img * (total - idx - 1))) 75 | log_every_n_seconds( 76 | logging.INFO, 77 | "Inference done {}/{}. {:.4f} s / img. ETA={}".format( 78 | idx + 1, total, seconds_per_img, str(eta) 79 | ), 80 | n=5, 81 | ) 82 | 83 | # Measure the time only for this worker (before the synchronization barrier) 84 | total_time = time.perf_counter() - start_time 85 | total_time_str = str(datetime.timedelta(seconds=total_time)) 86 | # NOTE this format is parsed by grep 87 | logger.info( 88 | "Total inference time: {} ({:.6f} s / img per device, on {} devices)".format( 89 | total_time_str, total_time / (total - num_warmup), num_devices 90 | ) 91 | ) 92 | total_compute_time_str = str(datetime.timedelta(seconds=int(total_compute_time))) 93 | logger.info( 94 | "Total inference pure compute time: {} ({:.6f} s / img per device, on {} devices)".format( 95 | total_compute_time_str, total_compute_time / (total - num_warmup), num_devices 96 | ) 97 | ) 98 | 99 | results = evaluator.evaluate() 100 | # An evaluator may return None when not in main process. 101 | # Replace it by an empty dict instead to make it easier for downstream code to handle 102 | if results is None: 103 | results = {} 104 | return results 105 | 106 | 107 | -------------------------------------------------------------------------------- /modeling/__init__.py: -------------------------------------------------------------------------------- 1 | from .meta_arch import * 2 | from .roi_heads import * 3 | from .proposal_generator import * 4 | from .backbone import * -------------------------------------------------------------------------------- /modeling/backbone/__init__.py: -------------------------------------------------------------------------------- 1 | from .backbone import * -------------------------------------------------------------------------------- /modeling/backbone/backbone.py: -------------------------------------------------------------------------------- 1 | import math 2 | import fvcore.nn.weight_init as weight_init 3 | import torch.nn.functional as F 4 | from torch import nn 5 | 6 | from detectron2.layers import Conv2d, ShapeSpec, get_norm 7 | 8 | from detectron2.modeling.backbone import Backbone 9 | from detectron2.modeling.backbone.build import BACKBONE_REGISTRY 10 | from detectron2.modeling.backbone.resnet import build_resnet_backbone 11 | from torchvision import models as M, ops 12 | from .inception_resnet_v2 import inceptionresnetv2 13 | from .vgg_cnn_f import extract_vgg_cnn_f_components 14 | from collections import OrderedDict 15 | import logging 16 | 17 | 18 | def extract_components(model_fn, pretrained=False): 19 | model = model_fn(pretrained) 20 | convs = model.features[:-1] 21 | fc = model.classifier[:-1] 22 | return convs, fc 23 | 24 | def dilate_convs(convs): 25 | i = -1 26 | while not isinstance(convs[i], nn.MaxPool2d): 27 | if isinstance(convs[i], nn.Conv2d): 28 | convs[i].dilation = (2, 2) 29 | convs[i].padding = (2, 2) 30 | i -= 1 31 | del convs[i] 32 | return convs 33 | 34 | def freeze_convs(convs, k): 35 | """ 36 | Freezes `k` conv layers 37 | """ 38 | i = 0 39 | while k > 0: 40 | if isinstance(convs[i], nn.Conv2d): 41 | k -= 1 42 | for p in convs[i].parameters(): 43 | p.requires_grad = False 44 | i += 1 45 | 46 | def get_conv_scale(convs): 47 | """ 48 | Determines the downscaling performed by a sequence of convolutional and pooling layers 49 | """ 50 | scale = 1. 51 | channels = 3 52 | for c in convs: 53 | stride = getattr(c, 'stride', 1.) 54 | scale *= stride if isinstance(stride, (int, float)) else stride[0] 55 | channels = getattr(c, 'out_channels') if isinstance(c, nn.Conv2d) else channels 56 | return scale, channels 57 | 58 | @BACKBONE_REGISTRY.register() 59 | class VGG(Backbone): 60 | def __init__(self, cfg, input_shape): 61 | super().__init__() 62 | dilated = cfg.MODEL.BACKBONE.DILATED 63 | convs, _ = extract_components(M.vgg16 ,pretrained=True) 64 | if dilated: 65 | convs = dilate_convs(convs) 66 | freeze_convs(convs, cfg.MODEL.BACKBONE.FREEZE_CONVS) 67 | self.convs = convs 68 | self.scale, self.channels = get_conv_scale(convs) 69 | self._out_features = ['vgg_conv'] 70 | 71 | def output_shape(self): 72 | return { 73 | name: ShapeSpec( 74 | channels=self.channels, stride=self.scale 75 | ) 76 | for name in self._out_features 77 | } 78 | def forward(self, x): 79 | output = self.convs(x) 80 | return {self._out_features[0]: output} 81 | 82 | @BACKBONE_REGISTRY.register() 83 | class VGG_CNN_F(Backbone): 84 | def __init__(self, cfg, input_shape): 85 | super().__init__() 86 | convs, _ = extract_vgg_cnn_f_components(pretrained=True) 87 | self.convs = convs 88 | self.scale, self.channels = 16, 256 89 | self._out_features = ['vgg_conv'] 90 | 91 | def output_shape(self): 92 | return { 93 | name: ShapeSpec( 94 | channels=self.channels, stride=self.scale 95 | ) 96 | for name in self._out_features 97 | } 98 | 99 | def forward(self, x): 100 | output = self.convs(x) 101 | return {self._out_features[0]: output} 102 | 103 | @BACKBONE_REGISTRY.register() 104 | class InceptionResNetV2(Backbone): 105 | def __init__(self, cfg, input_shape): 106 | super().__init__() 107 | layers = inceptionresnetv2(num_classes=1000, pretrained='imagenet') 108 | self.conv2d_1a = layers.conv2d_1a 109 | self.conv2d_2a = layers.conv2d_2a 110 | self.conv2d_2b = layers.conv2d_2b 111 | self.maxpool_3a = layers.maxpool_3a 112 | self.conv2d_3b = layers.conv2d_3b 113 | self.conv2d_4a = layers.conv2d_4a 114 | self.maxpool_5a =layers.maxpool_5a 115 | self.mixed_5b = layers.mixed_5b 116 | self.repeat = layers.repeat 117 | self.mixed_6a = layers.mixed_6a 118 | self.repeat_1 = layers.repeat_1 119 | self.scale, self.channels = 16, 1088 120 | self._out_features = ['block17'] 121 | if cfg.MODEL.BACKBONE.FREEZE_CONVS > 0: 122 | logging.getLogger("detectron2").warn("FREEZING BACKBONE LAYERS") 123 | for layer in [self.conv2d_1a, self.conv2d_2a, self.conv2d_2b, self.conv2d_3b, self.conv2d_4a]: 124 | for name, param in layer.named_parameters(): 125 | param.requires_grad = False 126 | 127 | 128 | def output_shape(self): 129 | return { 130 | name: ShapeSpec( 131 | channels=self.channels, stride=self.scale 132 | ) 133 | for name in self._out_features 134 | } 135 | 136 | def features(self, x): 137 | x = self.conv2d_1a(x) 138 | x = self.conv2d_2a(x) 139 | x = self.conv2d_2b(x) 140 | x = self.maxpool_3a(x) 141 | x = self.conv2d_3b(x) 142 | x = self.conv2d_4a(x) 143 | x = self.maxpool_5a(x) 144 | x = self.mixed_5b(x) 145 | x = self.repeat(x) 146 | x = self.mixed_6a(x) 147 | x = self.repeat_1(x) 148 | return x 149 | 150 | def forward(self, x): 151 | output = self.features(x) 152 | return {self._out_features[0]: output} -------------------------------------------------------------------------------- /modeling/backbone/vgg_cnn_f.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.autograd import Variable 4 | from functools import reduce 5 | from torch.nn.modules.normalization import CrossMapLRN2d 6 | import os 7 | 8 | class LambdaBase(nn.Sequential): 9 | def __init__(self, fn, *args): 10 | super(LambdaBase, self).__init__(*args) 11 | self.lambda_func = fn 12 | 13 | def forward_prepare(self, input): 14 | output = [] 15 | for module in self._modules.values(): 16 | output.append(module(input)) 17 | return output if output else input 18 | 19 | class Lambda(LambdaBase): 20 | def forward(self, input): 21 | return self.lambda_func(self.forward_prepare(input)) 22 | 23 | class LambdaMap(LambdaBase): 24 | def forward(self, input): 25 | return list(map(self.lambda_func,self.forward_prepare(input))) 26 | 27 | class LambdaReduce(LambdaBase): 28 | def forward(self, input): 29 | return reduce(self.lambda_func,self.forward_prepare(input)) 30 | 31 | def extract_vgg_cnn_f_components(pretrained=False): 32 | VGG_CNN_F_torch = nn.Sequential( # Sequential, 33 | nn.Conv2d(3,64,(11, 11),(4, 4)), 34 | nn.ReLU(), 35 | # Lambda(lambda x,lrn=torch.legacy.nn.SpatialCrossMapLRN(*(5, 0.0005, 0.75, 2)): Variable(lrn.forward(x.data))), 36 | Lambda(lambda x,lrn=CrossMapLRN2d(*(5, 0.0005, 0.75, 2)): lrn.forward(x)), 37 | nn.MaxPool2d((3, 3),(2, 2),(0, 0),ceil_mode=True), 38 | nn.Conv2d(64,256,(5, 5),(1, 1),(2, 2)), 39 | nn.ReLU(), 40 | # Lambda(lambda x,lrn=torch.legacy.nn.SpatialCrossMapLRN(*(5, 0.0005, 0.75, 2)): Variable(lrn.forward(x.data))), 41 | Lambda(lambda x,lrn=CrossMapLRN2d(*(5, 0.0005, 0.75, 2)): lrn.forward(x)), 42 | nn.MaxPool2d((3, 3),(2, 2),(0, 0),ceil_mode=True), 43 | nn.Conv2d(256,256,(3, 3),(1, 1),(1, 1)), 44 | nn.ReLU(), 45 | nn.Conv2d(256,256,(3, 3),(1, 1),(1, 1)), 46 | nn.ReLU(), 47 | nn.Conv2d(256,256,(3, 3),(1, 1),(1, 1)), 48 | nn.ReLU(), 49 | nn.MaxPool2d((3, 3),(2, 2),(0, 0),ceil_mode=True), 50 | Lambda(lambda x: x.view(x.size(0),-1)), # View, 51 | nn.Sequential(Lambda(lambda x: x.view(1,-1) if 1==len(x.size()) else x ),nn.Linear(9216,4096)), # Linear, 52 | nn.ReLU(), 53 | nn.Dropout(0.5), 54 | nn.Sequential(Lambda(lambda x: x.view(1,-1) if 1==len(x.size()) else x ),nn.Linear(4096,4096)), # Linear, 55 | nn.ReLU(), 56 | nn.Dropout(0.5), 57 | nn.Sequential(Lambda(lambda x: x.view(1,-1) if 1==len(x.size()) else x ),nn.Linear(4096,1000)), # Linear, 58 | nn.Softmax(), 59 | ) 60 | if pretrained: 61 | VGG_CNN_F_torch.load_state_dict(torch.load(os.path.abspath(__file__ + '/../../../models/VGG_CNN_F_torch.pth'))) 62 | layers = list(VGG_CNN_F_torch.children()) 63 | convs = nn.Sequential(*layers[:14]) 64 | features = nn.Sequential(*layers[15:22]) 65 | return convs, features 66 | 67 | 68 | 69 | -------------------------------------------------------------------------------- /modeling/matcher.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | import torch 3 | 4 | from detectron2.layers import nonzero_tuple 5 | 6 | class Matcher(object): 7 | """ 8 | This class assigns to each predicted "element" (e.g., a box) a ground-truth 9 | element. Each predicted element will have exactly zero or one matches; each 10 | ground-truth element may be matched to zero or more predicted elements. 11 | The matching is determined by the MxN match_quality_matrix, that characterizes 12 | how well each (ground-truth, prediction)-pair match each other. For example, 13 | if the elements are boxes, this matrix may contain box intersection-over-union 14 | overlap values. 15 | The matcher returns (a) a vector of length N containing the index of the 16 | ground-truth element m in [0, M) that matches to prediction n in [0, N). 17 | (b) a vector of length N containing the labels for each prediction. 18 | """ 19 | 20 | def __init__(self, thresholds, labels, allow_low_quality_matches=False): 21 | """ 22 | Args: 23 | thresholds (list): a list of thresholds used to stratify predictions 24 | into levels. 25 | labels (list): a list of values to label predictions belonging at 26 | each level. A label can be one of {-1, 0, 1} signifying 27 | {ignore, negative class, positive class}, respectively. 28 | allow_low_quality_matches (bool): if True, produce additional matches 29 | for predictions with maximum match quality lower than high_threshold. 30 | See set_low_quality_matches_ for more details. 31 | For example, 32 | thresholds = [0.3, 0.5] 33 | labels = [0, -1, 1] 34 | All predictions with iou < 0.3 will be marked with 0 and 35 | thus will be considered as false positives while training. 36 | All predictions with 0.3 <= iou < 0.5 will be marked with -1 and 37 | thus will be ignored. 38 | All predictions with 0.5 <= iou will be marked with 1 and 39 | thus will be considered as true positives. 40 | """ 41 | # Add -inf and +inf to first and last position in thresholds 42 | thresholds = thresholds[:] 43 | assert thresholds[0] > 0 44 | thresholds.insert(0, -float("inf")) 45 | thresholds.append(float("inf")) 46 | # Currently torchscript does not support all + generator 47 | assert all([low <= high for (low, high) in zip(thresholds[:-1], thresholds[1:])]) 48 | assert all([l in [-1, 0, 1] for l in labels]) 49 | assert len(labels) == len(thresholds) - 1 50 | self.thresholds = thresholds 51 | self.labels = labels 52 | self.allow_low_quality_matches = allow_low_quality_matches 53 | 54 | def __call__(self, match_quality_matrix): 55 | """ 56 | Args: 57 | match_quality_matrix (Tensor[float]): an MxN tensor, containing the 58 | pairwise quality between M ground-truth elements and N predicted 59 | elements. All elements must be >= 0 (due to the us of `torch.nonzero` 60 | for selecting indices in :meth:`set_low_quality_matches_`). 61 | Returns: 62 | matches (Tensor[int64]): a vector of length N, where matches[i] is a matched 63 | ground-truth index in [0, M) 64 | match_labels (Tensor[int8]): a vector of length N, where pred_labels[i] indicates 65 | whether a prediction is a true or false positive or ignored 66 | """ 67 | assert match_quality_matrix.dim() == 2 68 | if match_quality_matrix.numel() == 0: 69 | default_matches = match_quality_matrix.new_full( 70 | (match_quality_matrix.size(1),), 0, dtype=torch.int64 71 | ) 72 | # When no gt boxes exist, we define IOU = 0 and therefore set labels 73 | # to `self.labels[0]`, which usually defaults to background class 0 74 | # To choose to ignore instead, can make labels=[-1,0,-1,1] + set appropriate thresholds 75 | default_match_labels = match_quality_matrix.new_full( 76 | (match_quality_matrix.size(1),), self.labels[0], dtype=torch.int8 77 | ) 78 | 79 | default_match_vals = match_quality_matrix.new_full( 80 | (match_quality_matrix.size(1),), 0, dtype=torch.float32 81 | ) 82 | return default_matches, default_match_labels, default_match_vals 83 | 84 | assert torch.all(match_quality_matrix >= 0) 85 | 86 | # match_quality_matrix is M (gt) x N (predicted) 87 | # Max over gt elements (dim 0) to find best gt candidate for each prediction 88 | matched_vals, matches = match_quality_matrix.max(dim=0) 89 | match_labels = matches.new_full(matches.size(), 1, dtype=torch.int8) 90 | 91 | for (l, low, high) in zip(self.labels, self.thresholds[:-1], self.thresholds[1:]): 92 | low_high = (matched_vals >= low) & (matched_vals < high) 93 | match_labels[low_high] = l 94 | 95 | if self.allow_low_quality_matches: 96 | self.set_low_quality_matches_(match_labels, match_quality_matrix) 97 | 98 | return matches, match_labels, matched_vals 99 | 100 | def set_low_quality_matches_(self, match_labels, match_quality_matrix): 101 | """ 102 | Produce additional matches for predictions that have only low-quality matches. 103 | Specifically, for each ground-truth G find the set of predictions that have 104 | maximum overlap with it (including ties); for each prediction in that set, if 105 | it is unmatched, then match it to the ground-truth G. 106 | This function implements the RPN assignment case (i) in Sec. 3.1.2 of 107 | :paper:`Faster R-CNN`. 108 | """ 109 | # For each gt, find the prediction with which it has highest quality 110 | highest_quality_foreach_gt, _ = match_quality_matrix.max(dim=1) 111 | # Find the highest quality match available, even if it is low, including ties. 112 | # Note that the matches qualities must be positive due to the use of 113 | # `torch.nonzero`. 114 | _, pred_inds_with_highest_quality = nonzero_tuple( 115 | match_quality_matrix == highest_quality_foreach_gt[:, None] 116 | ) 117 | # If an anchor was labeled positive only due to a low-quality match 118 | # with gt_A, but it has larger overlap with gt_B, it's matched index will still be gt_B. 119 | # This follows the implementation in Detectron, and is found to have no significant impact. 120 | match_labels[pred_inds_with_highest_quality] = 1 -------------------------------------------------------------------------------- /modeling/meta_arch/__init__.py: -------------------------------------------------------------------------------- 1 | from .rcnn import * -------------------------------------------------------------------------------- /modeling/proposal_generator/__init__.py: -------------------------------------------------------------------------------- 1 | from .rpn import * -------------------------------------------------------------------------------- /modeling/proposal_generator/rpn.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | from torch import nn 4 | from detectron2.modeling.proposal_generator import PROPOSAL_GENERATOR_REGISTRY, RPN 5 | from typing import Dict, List, Optional, Tuple, Union 6 | import torch 7 | import torch.nn.functional as F 8 | from fvcore.nn import giou_loss, smooth_l1_loss 9 | from torch import nn 10 | 11 | from detectron2.config import configurable 12 | from detectron2.layers import ShapeSpec, cat 13 | from detectron2.structures import Boxes, ImageList, Instances, pairwise_iou 14 | from detectron2.utils.events import get_event_storage 15 | from detectron2.utils.memory import retry_if_cuda_oom 16 | from detectron2.utils.registry import Registry 17 | 18 | @PROPOSAL_GENERATOR_REGISTRY.register() 19 | class WSRPN(RPN): 20 | def forward(self, images, features, gt_instances=None, loss_weights=None): 21 | features = [features[f] for f in self.in_features] 22 | anchors = self.anchor_generator(features) 23 | 24 | pred_objectness_logits, pred_anchor_deltas = self.rpn_head(features) 25 | # Transpose the Hi*Wi*A dimension to the middle: 26 | pred_objectness_logits = [ 27 | # (N, A, Hi, Wi) -> (N, Hi, Wi, A) -> (N, Hi*Wi*A) 28 | score.permute(0, 2, 3, 1).flatten(1) 29 | for score in pred_objectness_logits 30 | ] 31 | pred_anchor_deltas = [ 32 | # (N, A*B, Hi, Wi) -> (N, A, B, Hi, Wi) -> (N, Hi, Wi, A, B) -> (N, Hi*Wi*A, B) 33 | x.view(x.shape[0], -1, self.anchor_generator.box_dim, x.shape[-2], x.shape[-1]) 34 | .permute(0, 3, 4, 1, 2) 35 | .flatten(1, -2) 36 | for x in pred_anchor_deltas 37 | ] 38 | 39 | if self.training and gt_instances is not None: 40 | assert gt_instances is not None, "RPN requires gt_instances in training!" 41 | gt_labels, gt_boxes = self.label_and_sample_anchors(anchors, gt_instances) 42 | losses = self.losses( 43 | anchors, pred_objectness_logits, gt_labels, pred_anchor_deltas, gt_boxes, loss_weights=loss_weights 44 | ) 45 | else: 46 | losses = {} 47 | if images is not None: 48 | proposals = self.predict_proposals( 49 | anchors, pred_objectness_logits, pred_anchor_deltas, images.image_sizes 50 | ) 51 | else: 52 | proposals = None 53 | return proposals, losses 54 | 55 | @torch.jit.unused 56 | def losses(self, anchors, pred_objectness_logits, gt_labels, pred_anchor_deltas, gt_boxes, loss_weights=None): 57 | num_images = len(gt_labels) 58 | gt_labels = torch.stack(gt_labels) # (N, sum(Hi*Wi*Ai)) 59 | 60 | # Log the number of positive/negative anchors per-image that's used in training 61 | pos_mask = gt_labels == 1 62 | num_pos_anchors = pos_mask.sum().item() 63 | num_neg_anchors = (gt_labels == 0).sum().item() 64 | storage = get_event_storage() 65 | storage.put_scalar("rpn/num_pos_anchors", num_pos_anchors / num_images) 66 | storage.put_scalar("rpn/num_neg_anchors", num_neg_anchors / num_images) 67 | reduction = "sum" if loss_weights is None else "none" 68 | if self.box_reg_loss_type == "smooth_l1": 69 | anchors = type(anchors[0]).cat(anchors).tensor # Ax(4 or 5) 70 | gt_anchor_deltas = [self.box2box_transform.get_deltas(anchors, k) for k in gt_boxes] 71 | gt_anchor_deltas = torch.stack(gt_anchor_deltas) # (N, sum(Hi*Wi*Ai), 4 or 5) 72 | localization_loss = smooth_l1_loss( 73 | cat(pred_anchor_deltas, dim=1)[pos_mask], 74 | gt_anchor_deltas[pos_mask], 75 | self.smooth_l1_beta, 76 | reduction=reduction, 77 | ) 78 | elif self.box_reg_loss_type == "giou": 79 | pred_proposals = self._decode_proposals(anchors, pred_anchor_deltas) 80 | pred_proposals = cat(pred_proposals, dim=1) 81 | pred_proposals = pred_proposals.view(-1, pred_proposals.shape[-1]) 82 | pos_mask = pos_mask.view(-1) 83 | localization_loss = giou_loss( 84 | pred_proposals[pos_mask], cat(gt_boxes)[pos_mask], reduction=reduction 85 | ) 86 | else: 87 | raise ValueError(f"Invalid rpn box reg loss type '{self.box_reg_loss_type}'") 88 | 89 | valid_mask = gt_labels >= 0 90 | objectness_loss = F.binary_cross_entropy_with_logits( 91 | cat(pred_objectness_logits, dim=1)[valid_mask], 92 | gt_labels[valid_mask].to(torch.float32), 93 | reduction=reduction, 94 | ) 95 | normalizer = self.batch_size_per_image * num_images 96 | losses = { 97 | "loss_rpn_cls": objectness_loss / normalizer, 98 | "loss_rpn_loc": localization_loss / normalizer, 99 | } 100 | losses = {k: v * self.loss_weight.get(k, 1.0) for k, v in losses.items()} 101 | return losses -------------------------------------------------------------------------------- /modeling/roi_heads/__init__.py: -------------------------------------------------------------------------------- 1 | from .roi_heads import * 2 | from .fast_rcnn import * 3 | from .visual_attention_head import * 4 | from .box_head import * 5 | from .weak_detector_fast_rcnn import * 6 | from .mask_head import * -------------------------------------------------------------------------------- /modeling/roi_heads/box_head.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from typing import List 3 | import fvcore.nn.weight_init as weight_init 4 | import torch 5 | from torch import nn 6 | import logging 7 | from detectron2.config import configurable 8 | from detectron2.layers import Conv2d, Linear, ShapeSpec, get_norm 9 | from detectron2.utils.registry import Registry 10 | from detectron2.modeling.roi_heads.box_head import ROI_BOX_HEAD_REGISTRY 11 | 12 | from ..backbone import extract_components 13 | from torchvision import models as M, ops 14 | from detectron2.modeling.backbone.resnet import BottleneckBlock, ResNet 15 | from ..backbone.inception_resnet_v2 import inceptionresnetv2 16 | from ..backbone.vgg_cnn_f import extract_vgg_cnn_f_components 17 | 18 | @ROI_BOX_HEAD_REGISTRY.register() 19 | class VGGConvFCHead(nn.Module): 20 | def __init__(self, cfg, input_shape): 21 | super().__init__() 22 | _, fc = extract_components(M.vgg16 ,pretrained=True) 23 | _output_size = input_shape.channels 24 | for c in fc: 25 | _output_size = getattr(c, 'out_features') if isinstance(c, nn.Linear) else _output_size 26 | self.fc = fc 27 | self._output_size = _output_size 28 | 29 | def forward(self, x): 30 | x = x.flatten(1) 31 | return self.fc(x) 32 | 33 | @property 34 | @torch.jit.unused 35 | def output_shape(self): 36 | """ 37 | Returns: 38 | ShapeSpec: the output feature shape 39 | """ 40 | o = self._output_size 41 | if isinstance(o, int): 42 | return ShapeSpec(channels=o) 43 | else: 44 | return ShapeSpec(channels=o[0], height=o[1], width=o[2]) 45 | 46 | @ROI_BOX_HEAD_REGISTRY.register() 47 | class Res5BoxHead(nn.Module): 48 | def __init__(self, cfg, input_shape): 49 | super().__init__() 50 | self.res5, self.out_channels = self._build_res5_block(cfg) 51 | 52 | def _build_res5_block(self, cfg): 53 | # fmt: off 54 | stage_channel_factor = 2 ** 3 # res5 is 8x res2 55 | num_groups = cfg.MODEL.RESNETS.NUM_GROUPS 56 | width_per_group = cfg.MODEL.RESNETS.WIDTH_PER_GROUP 57 | bottleneck_channels = num_groups * width_per_group * stage_channel_factor 58 | out_channels = cfg.MODEL.RESNETS.RES2_OUT_CHANNELS * stage_channel_factor 59 | stride_in_1x1 = cfg.MODEL.RESNETS.STRIDE_IN_1X1 60 | norm = cfg.MODEL.RESNETS.NORM 61 | assert not cfg.MODEL.RESNETS.DEFORM_ON_PER_STAGE[-1], \ 62 | "Deformable conv is not yet supported in res5 head." 63 | # fmt: on 64 | 65 | blocks = ResNet.make_stage( 66 | BottleneckBlock, 67 | 3, 68 | stride_per_block=[2, 1, 1], 69 | in_channels=out_channels // 2, 70 | bottleneck_channels=bottleneck_channels, 71 | out_channels=out_channels, 72 | num_groups=num_groups, 73 | norm=norm, 74 | stride_in_1x1=stride_in_1x1, 75 | ) 76 | return nn.Sequential(*blocks), out_channels 77 | 78 | def forward(self, x): 79 | x = self.res5(x) 80 | return x.mean(dim=[2,3]) 81 | 82 | @property 83 | @torch.jit.unused 84 | def output_shape(self): 85 | """ 86 | Returns: 87 | ShapeSpec: the output feature shape 88 | """ 89 | return ShapeSpec(channels=self.out_channels, height=1, width=1) 90 | 91 | @ROI_BOX_HEAD_REGISTRY.register() 92 | class Res5BoxHeadNOTE(nn.Module): 93 | def __init__(self, cfg, input_shape): 94 | super().__init__() 95 | self.res5, self.out_channels = self._build_res5_block(cfg) 96 | self.out_channels = 1536 97 | 98 | def _build_res5_block(self, cfg): 99 | # fmt: off 100 | stage_channel_factor = 2 ** 3 # res5 is 8x res2 101 | num_groups = cfg.MODEL.RESNETS.NUM_GROUPS 102 | width_per_group = cfg.MODEL.RESNETS.WIDTH_PER_GROUP 103 | bottleneck_channels = num_groups * width_per_group * stage_channel_factor 104 | out_channels = cfg.MODEL.RESNETS.RES2_OUT_CHANNELS * stage_channel_factor 105 | stride_in_1x1 = cfg.MODEL.RESNETS.STRIDE_IN_1X1 106 | norm = cfg.MODEL.RESNETS.NORM 107 | assert not cfg.MODEL.RESNETS.DEFORM_ON_PER_STAGE[-1], \ 108 | "Deformable conv is not yet supported in res5 head." 109 | # fmt: on 110 | 111 | blocks = ResNet.make_stage( 112 | BottleneckBlock, 113 | 3, 114 | stride_per_block=[2, 1, 1], 115 | in_channels=1088, 116 | bottleneck_channels=bottleneck_channels, 117 | out_channels=1536, 118 | num_groups=num_groups, 119 | norm=norm, 120 | stride_in_1x1=stride_in_1x1, 121 | ) 122 | return nn.Sequential(*blocks), out_channels 123 | 124 | def forward(self, x): 125 | x = self.res5(x) 126 | return x.mean(dim=[2,3]) 127 | 128 | @property 129 | @torch.jit.unused 130 | def output_shape(self): 131 | """ 132 | Returns: 133 | ShapeSpec: the output feature shape 134 | """ 135 | return ShapeSpec(channels=self.out_channels, height=1, width=1) 136 | 137 | @ROI_BOX_HEAD_REGISTRY.register() 138 | class Res5BoxHeadWithMask(Res5BoxHead): 139 | def forward(self, x): 140 | x = self.res5(x) 141 | return x 142 | 143 | @ROI_BOX_HEAD_REGISTRY.register() 144 | class VGGCNNFBoxHead(nn.Module): 145 | def __init__(self, cfg, input_shape): 146 | super().__init__() 147 | _, features = extract_vgg_cnn_f_components(pretrained=True) 148 | self.fc = features 149 | self._output_size = 4096 150 | 151 | def forward(self, x): 152 | x = x.flatten(1) 153 | return self.fc(x) 154 | 155 | @property 156 | @torch.jit.unused 157 | def output_shape(self): 158 | """ 159 | Returns: 160 | ShapeSpec: the output feature shape 161 | """ 162 | o = self._output_size 163 | if isinstance(o, int): 164 | return ShapeSpec(channels=o) 165 | else: 166 | return ShapeSpec(channels=o[0], height=o[1], width=o[2]) 167 | 168 | @ROI_BOX_HEAD_REGISTRY.register() 169 | class InceptionResNetHead(nn.Module): 170 | def __init__(self, cfg, input_shape): 171 | super().__init__() 172 | layers = inceptionresnetv2(num_classes=1000, pretrained='imagenet') 173 | self.mixed_7a = layers.mixed_7a 174 | self.repeat_2 = layers.repeat_2 175 | self.block8 = layers.block8 176 | self.conv2d_7b = layers.conv2d_7b 177 | self.avgpool_1a = layers.avgpool_1a 178 | self._output_size = 1536 179 | self._freeze_layers(cfg.MODEL.FREEZE_LAYERS.BOX_HEAD) 180 | 181 | def _freeze_layers(self, layers): 182 | # Freeze layers 183 | for name, param in self.named_parameters(): 184 | if any(layer == name.split(".")[0] for layer in layers): 185 | logging.getLogger('detectron2').log(logging.WARN, "Freezed Layer: {}".format(name)) 186 | param.requires_grad = False 187 | 188 | def forward(self, x): 189 | x = self.mixed_7a(x) 190 | x = self.repeat_2(x) 191 | x = self.block8(x) 192 | x = self.conv2d_7b(x) 193 | x = self.avgpool_1a(x) 194 | return x.flatten(1) 195 | 196 | @property 197 | @torch.jit.unused 198 | def output_shape(self): 199 | """ 200 | Returns: 201 | ShapeSpec: the output feature shape 202 | """ 203 | o = self._output_size 204 | if isinstance(o, int): 205 | return ShapeSpec(channels=o) 206 | else: 207 | return ShapeSpec(channels=o[0], height=o[1], width=o[2]) 208 | -------------------------------------------------------------------------------- /modeling/roi_heads/mask_head.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | import fvcore.nn.weight_init as weight_init 3 | import torch 4 | from torch import nn 5 | from torch.nn import functional as F 6 | from detectron2.config import configurable 7 | from detectron2.layers import Conv2d, ConvTranspose2d, ShapeSpec, cat, get_norm 8 | from detectron2.structures import Instances 9 | from detectron2.utils.events import get_event_storage 10 | from detectron2.utils.registry import Registry 11 | from detectron2.modeling.roi_heads.mask_head import ROI_MASK_HEAD_REGISTRY, MaskRCNNConvUpsampleHead, mask_rcnn_inference, mask_rcnn_loss 12 | import logging 13 | 14 | @ROI_MASK_HEAD_REGISTRY.register() 15 | class MaskRCNNConvUpsampleHeadWithSimilarity(MaskRCNNConvUpsampleHead): 16 | def forward(self, x, instances, similarity=None, base_classes=None, novel_classes=None): 17 | x = self.layers(x) 18 | if similarity is not None: 19 | if x.numel() > 0: 20 | similarity_mask = similarity['seg'] 21 | mask_base = x.index_select(1, index=base_classes) 22 | mask_base_reshaped = mask_base.view(*mask_base.size()[:2], -1) 23 | if len(similarity_mask.size()) > 2: 24 | mask_combination = torch.bmm(similarity_mask, mask_base_reshaped) 25 | else: 26 | mask_combination = torch.matmul(mask_base_reshaped.transpose(1,2), similarity_mask.transpose(0,1)).transpose(1,2) 27 | mask_novel = mask_combination.view(mask_base.size(0), -1, *mask_base.size()[2:]) 28 | mask_final = torch.zeros_like(x) 29 | mask_final = mask_final.index_copy(1, novel_classes, mask_novel) 30 | mask_final = mask_final.index_copy(1, base_classes, mask_base) 31 | x = mask_final 32 | if self.training: 33 | assert not torch.jit.is_scripting() 34 | return {"loss_mask": mask_rcnn_loss(x, instances, self.vis_period)} 35 | else: 36 | mask_rcnn_inference(x, instances) 37 | return instances 38 | 39 | @ROI_MASK_HEAD_REGISTRY.register() 40 | class MaskRCNNConvUpsampleHeadWithFineTune(MaskRCNNConvUpsampleHead): 41 | @configurable 42 | def __init__(self, input_shape, *, num_classes, conv_dims, conv_norm="", **kwargs): 43 | freeze_layers = kwargs['freeze_layers'] 44 | del kwargs['freeze_layers'] 45 | super().__init__(input_shape, num_classes=num_classes, conv_dims=conv_dims, conv_norm=conv_norm, **kwargs) 46 | self.predictor_delta = Conv2d(self.predictor.in_channels, num_classes, kernel_size=1, stride=1, padding=0) 47 | nn.init.constant_(self.predictor_delta.weight, 0.) 48 | if self.predictor_delta.bias is not None: 49 | nn.init.constant_(self.predictor_delta.bias, 0.) 50 | self._freeze_layers(freeze_layers) 51 | 52 | @classmethod 53 | def from_config(cls, cfg, input_shape): 54 | ret = super().from_config(cfg, input_shape) 55 | ret['freeze_layers'] = cfg.MODEL.FREEZE_LAYERS.MASK_HEAD 56 | return ret 57 | 58 | def _freeze_layers(self, layers): 59 | # Freeze layers 60 | for name, param in self.named_parameters(): 61 | if any(layer == name.split(".")[0] for layer in layers): 62 | logging.getLogger('detectron2').log(logging.WARN, "Freezed Layer: {}".format(name)) 63 | param.requires_grad = False 64 | 65 | def layers(self, x): 66 | x = self.deconv(x) 67 | x = self.deconv_relu(x) 68 | x_fixed = self.predictor(x) 69 | x_delta = self.predictor_delta(x) 70 | return x_fixed, x_delta 71 | 72 | def forward(self, x, instances, similarity=None, base_classes=None, novel_classes=None): 73 | x, x_delta = self.layers(x) 74 | if similarity is not None: 75 | if x.numel() > 0: 76 | similarity_mask = similarity['seg'] 77 | mask_base = x.index_select(1, index=base_classes) 78 | mask_base_reshaped = mask_base.view(*mask_base.size()[:2], -1) 79 | if len(similarity_mask.size()) > 2: 80 | mask_combination = torch.bmm(similarity_mask, mask_base_reshaped) 81 | else: 82 | mask_combination = torch.matmul(mask_base_reshaped.transpose(1,2), similarity_mask.transpose(0,1)).transpose(1,2) 83 | mask_novel = mask_combination.view(mask_base.size(0), -1, *mask_base.size()[2:]) 84 | mask_final = torch.zeros_like(x) 85 | mask_final = mask_final.index_copy(1, novel_classes, mask_novel) 86 | mask_final = mask_final.index_copy(1, base_classes, mask_base) 87 | x = mask_final 88 | x = x + x_delta 89 | if self.training: 90 | assert not torch.jit.is_scripting() 91 | return {"loss_mask": mask_rcnn_loss(x, instances, self.vis_period)} 92 | else: 93 | mask_rcnn_inference(x, instances) 94 | return instances -------------------------------------------------------------------------------- /modeling/roi_heads/pcl_loss.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | from torch import nn 4 | from torch.nn import functional as F 5 | 6 | class PCLFunction(torch.autograd.Function): 7 | @staticmethod 8 | def forward(ctx, pcl_probs, labels, cls_weights, 9 | gt_assignment, pc_labels, pc_probs, pc_count, 10 | img_cls_weights, im_labels): 11 | ctx.pcl_probs = pcl_probs 12 | ctx.labels = labels 13 | ctx.cls_weights = cls_weights 14 | ctx.gt_assignment = gt_assignment 15 | ctx.pc_labels = pc_labels 16 | ctx.pc_probs = pc_probs 17 | ctx.pc_count = pc_count 18 | ctx.img_cls_weights = img_cls_weights 19 | ctx.im_labels = im_labels 20 | 21 | batch_size, channels = pcl_probs.size() 22 | loss = 0 23 | ctx.mark_non_differentiable(labels, cls_weights, 24 | gt_assignment, pc_labels, pc_probs, 25 | pc_count, img_cls_weights, im_labels) 26 | for im_label in im_labels: 27 | if im_label == (channels - 1): 28 | labels_mask = (labels == im_label).nonzero()[:,0] 29 | loss -= (cls_weights[labels_mask] * pcl_probs.index_select(1, im_label).squeeze(-1)[labels_mask].log()).sum() 30 | else: 31 | labels_mask = (pc_labels == im_label).nonzero()[:,0] 32 | loss -= (img_cls_weights[labels_mask] * pc_probs[labels_mask].log()).sum() 33 | return loss / batch_size 34 | 35 | @staticmethod 36 | def backward(ctx, grad_output): 37 | pcl_probs = ctx.pcl_probs 38 | labels = ctx.labels 39 | cls_weights = ctx.cls_weights 40 | gt_assignment = ctx.gt_assignment 41 | pc_labels = ctx.pc_labels 42 | pc_probs = ctx.pc_probs 43 | pc_count = ctx.pc_count 44 | img_cls_weights = ctx.img_cls_weights 45 | im_labels = ctx.im_labels 46 | 47 | grad_input = grad_output.new(pcl_probs.size()).zero_() 48 | 49 | batch_size, channels = pcl_probs.size() 50 | 51 | for im_label in im_labels: 52 | labels_mask = (labels == im_label) 53 | if im_label == (channels - 1): 54 | grad_input[labels_mask, im_label] = -cls_weights[labels_mask]/pcl_probs[labels_mask, im_label] 55 | else: 56 | pc_index = gt_assignment[labels_mask] 57 | if (im_label != pc_labels[pc_index]).all(): 58 | print ("Labels Mismatch.") 59 | grad_input[labels_mask, im_label] = -img_cls_weights[pc_index] / (pc_count[pc_index] * pc_probs[pc_index]) 60 | 61 | grad_input /= batch_size 62 | return grad_input, None, None, None, None, None, None, None, None -------------------------------------------------------------------------------- /models/MODELS.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ubc-vision/UniT/87ace8710ea7e84f104163a9adc3d3fed24fdcda/models/MODELS.md -------------------------------------------------------------------------------- /scripts/finetune_VOC.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | import numpy as np 4 | import torch 5 | sys.path.insert(0, '../../') 6 | sys.path.insert(0, '../') 7 | 8 | import detectron2.utils.comm as comm 9 | from detectron2.utils.logger import setup_logger 10 | from detectron2.engine import default_argument_parser, default_setup, launch 11 | from detectron2.config import get_cfg 12 | from detectron2.checkpoint import DetectionCheckpointer 13 | from detectron2.data.datasets.pascal_voc import register_pascal_voc 14 | from UniT.engine import TrainerFineTune 15 | from UniT.data import register_datasets 16 | from UniT.configs import add_config 17 | from detectron2.engine import DefaultTrainer 18 | 19 | parser = default_argument_parser() 20 | 21 | 22 | def register_voc_data(args): 23 | register_pascal_voc("pascal_trainval_2007", args.DATASETS.CLASSIFIER_DATAROOT + 'VOC2007/', 'trainval', 2007) 24 | register_pascal_voc("pascal_trainval_2012", args.DATASETS.CLASSIFIER_DATAROOT + 'VOC2012/', 'trainval', 2012) 25 | register_pascal_voc("pascal_test_2007", args.DATASETS.CLASSIFIER_DATAROOT + 'VOC2007/', 'test', 2007) 26 | 27 | def setup(args): 28 | cfg = get_cfg() 29 | add_config(cfg) 30 | cfg.merge_from_file(args.config_file) 31 | cfg.merge_from_list(args.opts) 32 | cfg.freeze() 33 | register_voc_data(cfg) 34 | register_datasets(args_data=cfg) 35 | default_setup(cfg, args) 36 | setup_logger(output=cfg.OUTPUT_DIR, distributed_rank=comm.get_rank(), name="UniT") 37 | return cfg 38 | 39 | def main(args): 40 | cfg = setup(args) 41 | if args.eval_only: 42 | model = TrainerFineTune.build_model(cfg) 43 | DetectionCheckpointer(model, save_dir=cfg.OUTPUT_DIR).resume_or_load( 44 | cfg.MODEL.WEIGHTS, resume=args.resume 45 | ) 46 | res = TrainerFineTune.test(cfg, model) 47 | return res 48 | 49 | trainer = TrainerFineTune(cfg) 50 | trainer.resume_or_load(resume=args.resume) 51 | return trainer.train() 52 | 53 | 54 | if __name__ == '__main__': 55 | args = parser.parse_args() 56 | try: 57 | # use the last 4 numbers in the job id as the id 58 | default_port = os.environ['SLURM_JOB_ID'] 59 | default_port = default_port[-4:] 60 | 61 | # all ports should be in the 10k+ range 62 | default_port = int(default_port) + 15000 63 | 64 | except Exception: 65 | default_port = 59482 66 | 67 | args.dist_url = 'tcp://127.0.0.1:'+str(default_port) 68 | print (args) 69 | 70 | launch( 71 | main, 72 | args.num_gpus, 73 | num_machines=args.num_machines, 74 | machine_rank=args.machine_rank, 75 | dist_url=args.dist_url, 76 | args=(args,), 77 | ) -------------------------------------------------------------------------------- /scripts/train_VOC.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | import numpy as np 4 | import torch 5 | sys.path.insert(0, '../../') 6 | sys.path.insert(0, '../') 7 | 8 | import detectron2.utils.comm as comm 9 | from detectron2.utils.logger import setup_logger 10 | from detectron2.engine import default_argument_parser, default_setup, launch 11 | from detectron2.config import get_cfg 12 | from detectron2.checkpoint import DetectionCheckpointer 13 | from detectron2.data.datasets.pascal_voc import register_pascal_voc 14 | from UniT.engine import TrainerNoMeta 15 | from UniT.data import register_datasets 16 | from UniT.configs import add_config 17 | from detectron2.engine import DefaultTrainer 18 | 19 | parser = default_argument_parser() 20 | 21 | 22 | def register_voc_data(args): 23 | register_pascal_voc("pascal_trainval_2007", args.DATASETS.CLASSIFIER_DATAROOT + 'VOC2007/', 'trainval', 2007) 24 | register_pascal_voc("pascal_trainval_2012", args.DATASETS.CLASSIFIER_DATAROOT + 'VOC2012/', 'trainval', 2012) 25 | register_pascal_voc("pascal_test_2007", args.DATASETS.CLASSIFIER_DATAROOT + 'VOC2007/', 'test', 2007) 26 | 27 | def setup(args): 28 | cfg = get_cfg() 29 | add_config(cfg) 30 | cfg.merge_from_file(args.config_file) 31 | cfg.merge_from_list(args.opts) 32 | cfg.freeze() 33 | register_voc_data(cfg) 34 | register_datasets(args_data=cfg) 35 | default_setup(cfg, args) 36 | setup_logger(output=cfg.OUTPUT_DIR, distributed_rank=comm.get_rank(), name="UniT") 37 | return cfg 38 | 39 | def main(args): 40 | cfg = setup(args) 41 | if args.eval_only: 42 | model = TrainerNoMeta.build_model(cfg) 43 | DetectionCheckpointer(model, save_dir=cfg.OUTPUT_DIR).resume_or_load( 44 | cfg.MODEL.WEIGHTS, resume=args.resume 45 | ) 46 | res = TrainerNoMeta.test(cfg, model) 47 | return res 48 | 49 | trainer = TrainerNoMeta(cfg) 50 | trainer.resume_or_load(resume=args.resume) 51 | return trainer.train() 52 | 53 | 54 | if __name__ == '__main__': 55 | args = parser.parse_args() 56 | try: 57 | # use the last 4 numbers in the job id as the id 58 | default_port = os.environ['SLURM_JOB_ID'] 59 | default_port = default_port[-4:] 60 | 61 | # all ports should be in the 10k+ range 62 | default_port = int(default_port) + 15000 63 | 64 | except Exception: 65 | default_port = 59482 66 | 67 | args.dist_url = 'tcp://127.0.0.1:'+str(default_port) 68 | print (args) 69 | 70 | launch( 71 | main, 72 | args.num_gpus, 73 | num_machines=args.num_machines, 74 | machine_rank=args.machine_rank, 75 | dist_url=args.dist_url, 76 | args=(args,), 77 | ) -------------------------------------------------------------------------------- /solver/__init__.py: -------------------------------------------------------------------------------- 1 | from .build import * -------------------------------------------------------------------------------- /solver/build.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | from typing import Any, Callable, Dict, Iterable, List, Set, Type, Union 3 | import torch 4 | 5 | from detectron2.config import CfgNode 6 | 7 | from detectron2.solver.lr_scheduler import WarmupCosineLR, WarmupMultiStepLR 8 | from detectron2.solver.build import maybe_add_gradient_clipping 9 | import logging 10 | 11 | def build_optimizer(cfg: CfgNode, model: torch.nn.Module) -> torch.optim.Optimizer: 12 | """ 13 | Build an optimizer from config. 14 | """ 15 | norm_module_types = ( 16 | torch.nn.BatchNorm1d, 17 | torch.nn.BatchNorm2d, 18 | torch.nn.BatchNorm3d, 19 | torch.nn.SyncBatchNorm, 20 | # NaiveSyncBatchNorm inherits from BatchNorm2d 21 | torch.nn.GroupNorm, 22 | torch.nn.InstanceNorm1d, 23 | torch.nn.InstanceNorm2d, 24 | torch.nn.InstanceNorm3d, 25 | torch.nn.LayerNorm, 26 | torch.nn.LocalResponseNorm, 27 | ) 28 | params: List[Dict[str, Any]] = [] 29 | memo: Set[torch.nn.parameter.Parameter] = set() 30 | for module_name, module in model.named_modules(): 31 | for key, value in module.named_parameters(recurse=False): 32 | if not value.requires_grad: 33 | continue 34 | # Avoid duplicating parameters 35 | if value in memo: 36 | continue 37 | memo.add(value) 38 | lr = cfg.SOLVER.BASE_LR 39 | weight_decay = cfg.SOLVER.WEIGHT_DECAY 40 | if isinstance(module, norm_module_types): 41 | weight_decay = cfg.SOLVER.WEIGHT_DECAY_NORM 42 | elif key == "bias": 43 | # NOTE: unlike Detectron v1, we now default BIAS_LR_FACTOR to 1.0 44 | # and WEIGHT_DECAY_BIAS to WEIGHT_DECAY so that bias optimizer 45 | # hyperparameters are by default exactly the same as for regular 46 | # weights. 47 | lr = cfg.SOLVER.BASE_LR * cfg.SOLVER.BIAS_LR_FACTOR 48 | weight_decay = cfg.SOLVER.WEIGHT_DECAY_BIAS 49 | if 'oicr_predictors' in module_name or 'regression_branch' in module_name: 50 | logging.getLogger('detectron2').log(logging.INFO, "Setting learning rate of {} to {}".format(module_name, lr * cfg.SOLVER.REFINEMENT_LR_FACTOR)) 51 | lr = lr * cfg.SOLVER.REFINEMENT_LR_FACTOR 52 | params += [{"params": [value], "lr": lr, "weight_decay": weight_decay}] 53 | 54 | optimizer = torch.optim.SGD( 55 | params, cfg.SOLVER.BASE_LR, momentum=cfg.SOLVER.MOMENTUM, nesterov=cfg.SOLVER.NESTEROV 56 | ) 57 | optimizer = maybe_add_gradient_clipping(cfg, optimizer) 58 | return optimizer 59 | 60 | 61 | def build_optimizer_C4(cfg: CfgNode, model: torch.nn.Module) -> torch.optim.Optimizer: 62 | """ 63 | Build an optimizer from config. 64 | """ 65 | norm_module_types = ( 66 | torch.nn.BatchNorm1d, 67 | torch.nn.BatchNorm2d, 68 | torch.nn.BatchNorm3d, 69 | torch.nn.SyncBatchNorm, 70 | # NaiveSyncBatchNorm inherits from BatchNorm2d 71 | torch.nn.GroupNorm, 72 | torch.nn.InstanceNorm1d, 73 | torch.nn.InstanceNorm2d, 74 | torch.nn.InstanceNorm3d, 75 | torch.nn.LayerNorm, 76 | torch.nn.LocalResponseNorm, 77 | ) 78 | params: List[Dict[str, Any]] = [] 79 | memo: Set[torch.nn.parameter.Parameter] = set() 80 | for module_name, module in model.named_modules(): 81 | for key, value in module.named_parameters(recurse=False): 82 | if not value.requires_grad: 83 | continue 84 | # Avoid duplicating parameters 85 | if value in memo: 86 | continue 87 | memo.add(value) 88 | lr = cfg.SOLVER.BASE_LR 89 | weight_decay = cfg.SOLVER.WEIGHT_DECAY 90 | if isinstance(module, norm_module_types): 91 | weight_decay = cfg.SOLVER.WEIGHT_DECAY_NORM 92 | elif key == "bias": 93 | # NOTE: unlike Detectron v1, we now default BIAS_LR_FACTOR to 1.0 94 | # and WEIGHT_DECAY_BIAS to WEIGHT_DECAY so that bias optimizer 95 | # hyperparameters are by default exactly the same as for regular 96 | # weights. 97 | lr = cfg.SOLVER.BASE_LR * cfg.SOLVER.BIAS_LR_FACTOR 98 | weight_decay = cfg.SOLVER.WEIGHT_DECAY_BIAS 99 | if 'oicr_predictors' in module_name or 'regression_branch' in module_name: 100 | logging.getLogger('detectron2').log(logging.INFO, "Setting learning rate of {} to {}".format(module_name, lr * cfg.SOLVER.REFINEMENT_LR_FACTOR)) 101 | lr = lr * cfg.SOLVER.REFINEMENT_LR_FACTOR 102 | if 'classifier_stream' in module_name or 'detection_stream' in module_name: 103 | logging.getLogger('detectron2').log(logging.INFO, "Setting learning rate of {} to {}".format(module_name, lr * cfg.SOLVER.MIL_LR_FACTOR)) 104 | lr = lr * cfg.SOLVER.MIL_LR_FACTOR 105 | if 'cls_score_delta' in module_name or 'bbox_pred_delta' in module_name: 106 | logging.getLogger('detectron2').log(logging.INFO, "Setting learning rate of {} to {}".format(module_name, lr * cfg.SOLVER.DELTA_LR_FACTOR)) 107 | lr = lr * cfg.SOLVER.DELTA_LR_FACTOR 108 | params += [{"params": [value], "lr": lr, "weight_decay": weight_decay}] 109 | 110 | optimizer = torch.optim.SGD( 111 | params, cfg.SOLVER.BASE_LR, momentum=cfg.SOLVER.MOMENTUM, nesterov=cfg.SOLVER.NESTEROV 112 | ) 113 | optimizer = maybe_add_gradient_clipping(cfg, optimizer) 114 | return optimizer 115 | 116 | 117 | def modify_optimizer_C4(cfg, model, train_only_weak=False, freezed_params=[]): 118 | """ 119 | Build an optimizer from config. 120 | """ 121 | norm_module_types = ( 122 | torch.nn.BatchNorm1d, 123 | torch.nn.BatchNorm2d, 124 | torch.nn.BatchNorm3d, 125 | torch.nn.SyncBatchNorm, 126 | # NaiveSyncBatchNorm inherits from BatchNorm2d 127 | torch.nn.GroupNorm, 128 | torch.nn.InstanceNorm1d, 129 | torch.nn.InstanceNorm2d, 130 | torch.nn.InstanceNorm3d, 131 | torch.nn.LayerNorm, 132 | torch.nn.LocalResponseNorm, 133 | ) 134 | params: List[Dict[str, Any]] = [] 135 | memo: Set[torch.nn.parameter.Parameter] = set() 136 | multi_box_head = cfg.MODEL.ROI_HEADS.MULTI_BOX_HEAD 137 | for module_name, module in model.named_modules(): 138 | for key, value in module.named_parameters(recurse=False): 139 | if not value.requires_grad: 140 | if module_name not in freezed_params: 141 | continue 142 | 143 | # Avoid duplicating parameters 144 | if value in memo: 145 | continue 146 | memo.add(value) 147 | lr = cfg.SOLVER.BASE_LR 148 | weight_decay = cfg.SOLVER.WEIGHT_DECAY 149 | if isinstance(module, norm_module_types): 150 | weight_decay = cfg.SOLVER.WEIGHT_DECAY_NORM 151 | elif key == "bias": 152 | # NOTE: unlike Detectron v1, we now default BIAS_LR_FACTOR to 1.0 153 | # and WEIGHT_DECAY_BIAS to WEIGHT_DECAY so that bias optimizer 154 | # hyperparameters are by default exactly the same as for regular 155 | # weights. 156 | lr = cfg.SOLVER.BASE_LR * cfg.SOLVER.BIAS_LR_FACTOR 157 | weight_decay = cfg.SOLVER.WEIGHT_DECAY_BIAS 158 | if 'oicr_predictors' in module_name or 'regression_branch' in module_name: 159 | logging.getLogger('detectron2').log(logging.INFO, "Setting learning rate of {} to {}".format(module_name, lr * cfg.SOLVER.REFINEMENT_LR_FACTOR)) 160 | lr = lr * cfg.SOLVER.REFINEMENT_LR_FACTOR 161 | if 'classifier_stream' in module_name or 'detection_stream' in module_name: 162 | logging.getLogger('detectron2').log(logging.INFO, "Setting learning rate of {} to {}".format(module_name, lr * cfg.SOLVER.MIL_LR_FACTOR)) 163 | lr = lr * cfg.SOLVER.MIL_LR_FACTOR 164 | if 'cls_score_delta' in module_name or 'bbox_pred_delta' in module_name: 165 | logging.getLogger('detectron2').log(logging.INFO, "Setting learning rate of {} to {}".format(module_name, lr * cfg.SOLVER.DELTA_LR_FACTOR)) 166 | lr = lr * cfg.SOLVER.DELTA_LR_FACTOR 167 | if train_only_weak: 168 | if 'roi_heads' in module_name: 169 | if 'weak' not in module_name: 170 | if 'box_head' in module_name: 171 | if multi_box_head: 172 | value.requires_grad = False 173 | freezed_params.append(module_name) 174 | continue 175 | else: 176 | value.requires_grad = False 177 | freezed_params.append(module_name) 178 | continue 179 | else: 180 | value.requires_grad = True 181 | params += [{"params": [value], "lr": lr, "weight_decay": weight_decay}] 182 | optimizer = torch.optim.SGD( 183 | params, cfg.SOLVER.BASE_LR, momentum=cfg.SOLVER.MOMENTUM, nesterov=cfg.SOLVER.NESTEROV 184 | ) 185 | optimizer = maybe_add_gradient_clipping(cfg, optimizer), freezed_params 186 | return optimizer -------------------------------------------------------------------------------- /solver/lr_scheduler.py: -------------------------------------------------------------------------------- 1 | from collections.abc import Iterable 2 | from math import log, cos, pi, floor 3 | 4 | from torch.optim.lr_scheduler import _LRScheduler 5 | 6 | class CyclicCosineDecayLR(_LRScheduler): 7 | def __init__(self, 8 | optimizer, 9 | init_interval, 10 | min_lr, 11 | restart_multiplier=None, 12 | restart_interval=None, 13 | restart_lr=None, 14 | last_epoch=-1): 15 | """ 16 | Initialize new CyclicCosineDecayLR object 17 | :param optimizer: (Optimizer) - Wrapped optimizer. 18 | :param init_interval: (int) - Initial decay cycle interval. 19 | :param min_lr: (float or iterable of floats) - Minimal learning rate. 20 | :param restart_multiplier: (float) - Multiplication coefficient for increasing cycle intervals, 21 | if this parameter is set, restart_interval must be None. 22 | :param restart_interval: (int) - Restart interval for fixed cycle intervals, 23 | if this parameter is set, restart_multiplier must be None. 24 | :param restart_lr: (float or iterable of floats) - Optional, the learning rate at cycle restarts, 25 | if not provided, initial learning rate will be used. 26 | :param last_epoch: (int) - Last epoch. 27 | """ 28 | 29 | if restart_interval is not None and restart_multiplier is not None: 30 | raise ValueError("You can either set restart_interval or restart_multiplier but not both") 31 | 32 | if isinstance(min_lr, Iterable) and len(min_lr) != len(optimizer.param_groups): 33 | raise ValueError("Expected len(min_lr) to be equal to len(optimizer.param_groups), " 34 | "got {} and {} instead".format(len(min_lr), len(optimizer.param_groups))) 35 | 36 | if isinstance(restart_lr, Iterable) and len(restart_lr) != len(optimizer.param_groups): 37 | raise ValueError("Expected len(restart_lr) to be equal to len(optimizer.param_groups), " 38 | "got {} and {} instead".format(len(restart_lr), len(optimizer.param_groups))) 39 | 40 | if init_interval <= 0: 41 | raise ValueError("init_interval must be a positive number, got {} instead".format(init_interval)) 42 | 43 | group_num = len(optimizer.param_groups) 44 | self._init_interval = init_interval 45 | self._min_lr = [min_lr] * group_num if isinstance(min_lr, float) else min_lr 46 | self._restart_lr = [restart_lr] * group_num if isinstance(restart_lr, float) else restart_lr 47 | self._restart_interval = restart_interval 48 | self._restart_multiplier = restart_multiplier 49 | super(CyclicCosineDecayLR, self).__init__(optimizer, last_epoch) 50 | 51 | def get_lr(self): 52 | if self.last_epoch < self._init_interval: 53 | return self._calc(self.last_epoch, 54 | self._init_interval, 55 | self.base_lrs) 56 | 57 | elif self._restart_interval is not None: 58 | cycle_epoch = (self.last_epoch - self._init_interval) % self._restart_interval 59 | lrs = self.base_lrs if self._restart_lr is None else self._restart_lr 60 | return self._calc(cycle_epoch, 61 | self._restart_interval, 62 | lrs) 63 | 64 | elif self._restart_multiplier is not None: 65 | n = self._get_n(self.last_epoch) 66 | sn_prev = self._partial_sum(n) 67 | cycle_epoch = self.last_epoch - sn_prev 68 | interval = self._init_interval * self._restart_multiplier ** n 69 | lrs = self.base_lrs if self._restart_lr is None else self._restart_lr 70 | return self._calc(cycle_epoch, 71 | interval, 72 | lrs) 73 | else: 74 | return self._min_lr 75 | 76 | def _calc(self, t, T, lrs): 77 | return [min_lr + (lr - min_lr) * (1 + cos(pi * t / T)) / 2 78 | for lr, min_lr in zip(lrs, self._min_lr)] 79 | 80 | def _get_n(self, epoch): 81 | a = self._init_interval 82 | r = self._restart_multiplier 83 | _t = 1 - (1 - r) * epoch / a 84 | return floor(log(_t, r)) 85 | 86 | def _partial_sum(self, n): 87 | a = self._init_interval 88 | r = self._restart_multiplier 89 | return a * (1 - r ** n) / (1 - r) --------------------------------------------------------------------------------