├── .gitignore ├── LICENSE ├── MODEL_ZOO.md ├── README.md ├── adet ├── __init__.py ├── checkpoint │ ├── __init__.py │ └── adet_checkpoint.py ├── config │ ├── __init__.py │ ├── config.py │ └── defaults.py ├── data │ ├── __init__.py │ ├── augmentation.py │ ├── builtin.py │ ├── dataset_mapper.py │ ├── datasets │ │ └── text.py │ ├── detection_utils.py │ └── fcpose_dataset_mapper.py ├── evaluation │ ├── __init__.py │ ├── rrc_evaluation_funcs.py │ ├── rrc_evaluation_funcs_ic15.py │ ├── text_eval_script.py │ ├── text_eval_script_ic15.py │ └── text_evaluation_all.py ├── layers │ ├── __init__.py │ ├── bezier_align.py │ ├── conv_with_kaiming_uniform.py │ ├── csrc │ │ ├── BezierAlign │ │ │ ├── BezierAlign.h │ │ │ ├── BezierAlign_cpu.cpp │ │ │ └── BezierAlign_cuda.cu │ │ ├── DefROIAlign │ │ │ ├── DefROIAlign.h │ │ │ └── DefROIAlign_cuda.cu │ │ ├── cuda_version.cu │ │ ├── ml_nms │ │ │ ├── ml_nms.cu │ │ │ └── ml_nms.h │ │ └── vision.cpp │ ├── def_roi_align.py │ ├── deform_conv.py │ ├── gcn.py │ ├── iou_loss.py │ ├── ml_nms.py │ └── naive_group_norm.py ├── modeling │ ├── MEInst │ │ ├── LME │ │ │ ├── MaskLoader.py │ │ │ ├── __init__.py │ │ │ ├── mask_evaluation.py │ │ │ ├── mask_generation.py │ │ │ └── utils.py │ │ ├── MEInst.py │ │ ├── MEInst_outputs.py │ │ ├── MaskEncoding.py │ │ └── __init__.py │ ├── __init__.py │ ├── backbone │ │ ├── __init__.py │ │ ├── bifpn.py │ │ ├── dla.py │ │ ├── fpn.py │ │ ├── lpf.py │ │ ├── mobilenet.py │ │ ├── resnet_interval.py │ │ ├── resnet_lpf.py │ │ └── vovnet.py │ ├── batext │ │ ├── __init__.py │ │ ├── batext.py │ │ └── batext_outputs.py │ ├── blendmask │ │ ├── __init__.py │ │ ├── basis_module.py │ │ ├── blender.py │ │ └── blendmask.py │ ├── condinst │ │ ├── __init__.py │ │ ├── condinst.py │ │ ├── dynamic_mask_head.py │ │ └── mask_branch.py │ ├── fcos │ │ ├── __init__.py │ │ ├── fcos.py │ │ └── fcos_outputs.py │ ├── fcpose │ │ ├── __init__.py │ │ ├── basis_module.py │ │ ├── fcpose_framework.py │ │ ├── fcpose_head.py │ │ └── utils.py │ ├── one_stage_detector.py │ ├── poolers.py │ ├── roi_heads │ │ ├── __init__.py │ │ ├── attn_predictor.py │ │ └── text_head.py │ └── solov2 │ │ ├── __init__.py │ │ ├── loss.py │ │ ├── solov2.py │ │ └── utils.py ├── structures │ ├── __init__.py │ └── beziers.py └── utils │ ├── __init__.py │ ├── comm.py │ ├── measures.py │ └── visualizer.py ├── configs ├── BAText │ ├── Base-BAText.yaml │ ├── CTW1500 │ │ ├── Base-CTW1500.yaml │ │ ├── attn_R_50.yaml │ │ └── v2_attn_R_50.yaml │ ├── ICDAR2015 │ │ ├── Base-ic15.yaml │ │ ├── v1_attn_R_50.yaml │ │ └── v2_attn_R_50.yaml │ ├── Pretrain │ │ ├── Base-Chn-Pretrain.yaml │ │ ├── Base-Pretrain-ic15.yaml │ │ ├── Base-Pretrain.yaml │ │ ├── attn_R_50.yaml │ │ ├── v1_ic15_attn_R_50.yaml │ │ ├── v2_attn_R_50.yaml │ │ ├── v2_chn_attn_R_50.yaml │ │ └── v2_ic15_attn_R_50.yaml │ ├── README.md │ ├── ReCTS │ │ ├── Base-ReCTS.yaml │ │ └── v2_chn_attn_R_50.yaml │ └── TotalText │ │ ├── Base-TotalText.yaml │ │ ├── attn_R_50.yaml │ │ └── v2_attn_R_50.yaml ├── BlendMask │ ├── 550_R_50_1x.yaml │ ├── 550_R_50_3x.yaml │ ├── 550_R_50_dcni3_5x.yaml │ ├── Base-550.yaml │ ├── Base-BlendMask.yaml │ ├── Base-RT.yaml │ ├── DLA_34_syncbn_4x.yaml │ ├── Panoptic │ │ ├── Base-Panoptic.yaml │ │ ├── R_101_3x.yaml │ │ ├── R_101_dcni3_5x.yaml │ │ ├── R_50_1x.yaml │ │ ├── R_50_3x.yaml │ │ └── R_50_dcni3_5x.yaml │ ├── Person │ │ ├── Base-Person.yaml │ │ └── R_50_1x.yaml │ ├── README.md │ ├── RT_R_50_4x_bn-head_syncbn_shtw.yaml │ ├── RT_R_50_4x_syncbn_shtw.yaml │ ├── R_101_3x.yaml │ ├── R_101_dcni3_5x.yaml │ ├── R_50_1x.yaml │ └── R_50_3x.yaml ├── BoxInst │ ├── Base-BoxInst.yaml │ ├── MS_R_101_1x.yaml │ ├── MS_R_101_3x.yaml │ ├── MS_R_101_BiFPN_3x.yaml │ ├── MS_R_101_BiFPN_dcni3_3x.yaml │ ├── MS_R_50_1x.yaml │ ├── MS_R_50_3x.yaml │ ├── MS_R_50_BiFPN_1x.yaml │ ├── MS_R_50_BiFPN_3x.yaml │ └── README.md ├── CondInst │ ├── Base-CondInst.yaml │ ├── MS_R_101_1x.yaml │ ├── MS_R_101_3x.yaml │ ├── MS_R_101_3x_sem.yaml │ ├── MS_R_101_BiFPN_3x.yaml │ ├── MS_R_101_BiFPN_3x_sem.yaml │ ├── MS_R_50_1x.yaml │ ├── MS_R_50_3x.yaml │ ├── MS_R_50_3x_sem.yaml │ ├── MS_R_50_BiFPN_1x.yaml │ ├── MS_R_50_BiFPN_3x.yaml │ ├── MS_R_50_BiFPN_3x_sem.yaml │ └── README.md ├── DenseCL │ ├── FCOS_R50_1x.yaml │ ├── FCOS_R50_1x_DenseCL.yaml │ ├── README.md │ ├── SOLOv2_R50_1x.yaml │ └── SOLOv2_R50_1x_DenseCL.yaml ├── FCOS-Detection │ ├── Base-FCOS.yaml │ ├── FCOS_RT │ │ ├── MS_DLA_34_4x_syncbn.yaml │ │ ├── MS_DLA_34_4x_syncbn_bn_head.yaml │ │ ├── MS_DLA_34_4x_syncbn_shared_towers.yaml │ │ ├── MS_DLA_34_4x_syncbn_shared_towers_bn_head.yaml │ │ ├── MS_R_50_4x_syncbn.yaml │ │ └── MS_R_50_4x_syncbn_bn_head.yaml │ ├── MS_R_101_2x.yaml │ ├── MS_R_101_2x_iou.yaml │ ├── MS_R_50_2x.yaml │ ├── MS_R_50_2x_iou.yaml │ ├── MS_X_101_32x8d_2x.yaml │ ├── MS_X_101_32x8d_2x_dcnv2.yaml │ ├── MS_X_101_32x8d_2x_dcnv2_iou.yaml │ ├── MS_X_101_32x8d_2x_iou.yaml │ ├── MS_X_101_64x4d_2x.yaml │ ├── MS_X_101_64x4d_2x_dcnv2.yaml │ ├── README.md │ ├── R_50_1x.yaml │ ├── R_50_1x_iou.yaml │ └── vovnet │ │ ├── MS_V_39_3x.yaml │ │ ├── MS_V_57_3x.yaml │ │ ├── MS_V_99_3x.yaml │ │ └── README.md ├── FCPose │ ├── Base-FCPose.yaml │ ├── README.md │ ├── R_101_3X.yaml │ └── R_50_3X.yaml ├── MEInst-InstanceSegmentation │ ├── Base-MEInst.yaml │ ├── MEInst_R_50_1x.yaml │ ├── MEInst_R_50_1x_none.yaml │ ├── MEInst_R_50_3x.yaml │ ├── MEInst_R_50_3x_512.yaml │ └── README.md ├── RCNN │ ├── 550_R_50_FPN_3x.yaml │ ├── Base-RCNN.yaml │ ├── LVIS │ │ └── R_50_1x.yaml │ └── R_101_3x.yaml └── SOLOv2 │ ├── Base-SOLOv2.yaml │ ├── R101_3x.yaml │ ├── R50_3x.yaml │ └── README.md ├── datasets ├── README.md ├── gen_coco_person.py ├── prepare_thing_sem_from_instance.py └── prepare_thing_sem_from_lvis.py ├── demo ├── demo.py └── predictor.py ├── docker └── Dockerfile ├── docs ├── .gitignore ├── Makefile ├── adel-logo.svg ├── conf.py ├── index.rst ├── modules │ ├── checkpoint.rst │ ├── config.rst │ ├── data.rst │ ├── index.rst │ ├── layers.rst │ ├── modeling.rst │ └── utils.rst └── requirements.txt ├── onnx ├── .gitignore ├── README.md ├── export_model_to_onnx.py ├── pytorch-onnx-caffe-ncnn-rt.sh ├── pytorch-onnx-caffe-ncnn.sh └── test_onnxruntime.py ├── setup.py └── tools ├── convert_fcos_weight.py ├── remove_optim_from_ckpt.py ├── rename_blendmask.py ├── train_net.py └── visualize_data.py /.gitignore: -------------------------------------------------------------------------------- 1 | # output dir 2 | output 3 | instant_test_output 4 | inference_test_output 5 | 6 | 7 | *.jpg 8 | *.png 9 | *.txt 10 | 11 | # compilation and distribution 12 | __pycache__ 13 | _ext 14 | *.pyc 15 | *.so 16 | AdelaiDet.egg-info/ 17 | build/ 18 | dist/ 19 | 20 | # pytorch/python/numpy formats 21 | *.pth 22 | *.pkl 23 | *.npy 24 | 25 | # ipython/jupyter notebooks 26 | *.ipynb 27 | **/.ipynb_checkpoints/ 28 | 29 | # Editor temporaries 30 | *.swn 31 | *.swo 32 | *.swp 33 | *~ 34 | 35 | # Pycharm editor settings 36 | .idea 37 | .vscode 38 | .python-version 39 | 40 | # project dirs 41 | /datasets/coco 42 | /datasets/lvis 43 | /datasets/pic 44 | /datasets/ytvos 45 | /models 46 | /demo_outputs 47 | /example_inputs 48 | /debug 49 | /weights 50 | /export 51 | eval.sh 52 | 53 | demo/performance.py 54 | demo/demo2.py 55 | train.sh 56 | benchmark.sh 57 | script -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | AdelaiDet for non-commercial purposes 2 | (For commercial use, contact chhshen@gmail.com for obtaining a commerical license.) 3 | 4 | Copyright (c) 2019 the authors 5 | All rights reserved. 6 | 7 | Redistribution and use in source and binary forms, with or without 8 | modification, are permitted provided that the following conditions are met: 9 | 10 | * Redistributions of source code must retain the above copyright notice, this 11 | list of conditions and the following disclaimer. 12 | 13 | * Redistributions in binary form must reproduce the above copyright notice, 14 | this list of conditions and the following disclaimer in the documentation 15 | and/or other materials provided with the distribution. 16 | 17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 18 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 20 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 21 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 23 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 24 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 25 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 | -------------------------------------------------------------------------------- /MODEL_ZOO.md: -------------------------------------------------------------------------------- 1 | # AdelaiDet Model Zoo and Baselines 2 | 3 | ## Introduction 4 | This file documents a collection of models trained with AdelaiDet in Nov, 2019. 5 | 6 | ## Models 7 | 8 | The inference time is measured on one 1080Ti based on the most recent commit on Detectron2 ([ffff8ac](https://github.com/facebookresearch/detectron2/commit/ffff8acc35ea88ad1cb1806ab0f00b4c1c5dbfd9)). 9 | 10 | More models will be released soon. Stay tuned. 11 | 12 | ### COCO Object Detecton Baselines with FCOS 13 | 14 | Name | box AP | download 15 | --- |:---:|:---: 16 | [FCOS_R_50_1x](configs/FCOS-Detection/R_50_1x.yaml) | 38.7 | [model](https://cloudstor.aarnet.edu.au/plus/s/glqFc13cCoEyHYy/download) 17 | 18 | ### COCO Instance Segmentation Baselines with [BlendMask](https://arxiv.org/abs/2001.00309) 19 | 20 | Model | Name |inference time (ms/im) | box AP | mask AP | download 21 | --- |:---:|:---:|:---:|:---:|:---: 22 | Mask R-CNN | [550_R_50_3x](configs/RCNN/550_R_50_FPN_3x.yaml) | 63 | 39.1 | 35.3 | 23 | BlendMask | [550_R_50_3x](configs/BlendMask/550_R_50_3x.yaml) | 36 | 38.7 | 34.5 | [model](https://cloudstor.aarnet.edu.au/plus/s/R3Qintf7N8UCiIt/download) 24 | Mask R-CNN | [R_50_1x](https://github.com/facebookresearch/detectron2/blob/master/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml) | 80 | 38.6 | 35.2 | 25 | BlendMask | [R_50_1x](configs/BlendMask/R_50_1x.yaml) | 73 | 39.9 | 35.8 | [model](https://cloudstor.aarnet.edu.au/plus/s/zoxXPnr6Hw3OJgK/download) 26 | Mask R-CNN | [R_50_3x](https://github.com/facebookresearch/detectron2/blob/master/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml) | 80 | 41.0 | 37.2 | 27 | BlendMask | [R_50_3x](configs/BlendMask/R_50_3x.yaml) | 74 | 42.7 | 37.8 | [model](https://cloudstor.aarnet.edu.au/plus/s/ZnaInHFEKst6mvg/download) 28 | Mask R-CNN | [R_101_3x](https://github.com/facebookresearch/detectron2/blob/master/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml) | 100 | 42.9 | 38.6 | 29 | BlendMask | [R_101_3x](configs/BlendMask/R_101_3x.yaml) | 94 | 44.8 | 39.5 | [model](https://cloudstor.aarnet.edu.au/plus/s/e4fXrliAcMtyEBy/download) 30 | BlendMask | [R_101_dcni3_5x](configs/BlendMask/R_101_dcni3_5x.yaml) | 105 | 46.8 | 41.1 | [model](https://cloudstor.aarnet.edu.au/plus/s/vbnKnQtaGlw8TKv/download) 31 | 32 | ### COCO Panoptic Segmentation Baselines with BlendMask 33 | Model | Name | PQ | PQTh | PQSt | download 34 | --- |:---:|:---:|:---:|:---:|:---: 35 | Panoptic FPN | [R_50_3x](https://github.com/facebookresearch/detectron2/blob/master/configs/COCO-PanopticSegmentation/panoptic_fpn_R_50_3x.yaml) | 41.5 | 48.3 | 31.2 | 36 | BlendMask | [R_50_3x](configs/BlendMask/Panoptic/R_50_3x.yaml) | 42.5 | 49.5 | 32.0 | [model](https://cloudstor.aarnet.edu.au/plus/s/oDgi0826JOJXCr5/download) 37 | Panoptic FPN | [R_101_3x](https://github.com/facebookresearch/detectron2/blob/master/configs/COCO-InstanceSegmentation/panoptic_fpn_R_101_3x.yaml) | 43.0 | 49.7 | 32.9 | 38 | BlendMask | [R_101_3x](configs/BlendMask/Panoptic/R_101_3x.yaml) | 44.3 | 51.6 | 33.2 | [model](https://cloudstor.aarnet.edu.au/plus/s/u6gZwj06MWDEkYe/download) 39 | BlendMask | [R_101_dcni3_5x](configs/BlendMask/Panoptic/R_101_dcni3_5x.yaml) | 46.0 | 52.9 | 35.5 | [model](https://cloudstor.aarnet.edu.au/plus/s/Jwp41WEzDdrhWsN/download) 40 | 41 | ### Person in Context with BlendMask 42 | Model | Name | box AP | mask AP | download 43 | --- |:---:|:---:|:---:|:---: 44 | BlendMask | [R_50_1x](configs/BlendMask/Person/R_50_1x.yaml) | 70.6 | 66.7 | [model](https://cloudstor.aarnet.edu.au/plus/s/nvpcKTFA5fsagc0/download) -------------------------------------------------------------------------------- /adet/__init__.py: -------------------------------------------------------------------------------- 1 | from adet import modeling 2 | 3 | __version__ = "0.1.1" 4 | -------------------------------------------------------------------------------- /adet/checkpoint/__init__.py: -------------------------------------------------------------------------------- 1 | from .adet_checkpoint import AdetCheckpointer 2 | 3 | __all__ = ["AdetCheckpointer"] 4 | -------------------------------------------------------------------------------- /adet/checkpoint/adet_checkpoint.py: -------------------------------------------------------------------------------- 1 | import pickle, os 2 | from fvcore.common.file_io import PathManager 3 | from detectron2.checkpoint import DetectionCheckpointer 4 | 5 | 6 | class AdetCheckpointer(DetectionCheckpointer): 7 | """ 8 | Same as :class:`DetectronCheckpointer`, but is able to convert models 9 | in AdelaiDet, such as LPF backbone. 10 | """ 11 | def _load_file(self, filename): 12 | if filename.endswith(".pkl"): 13 | with PathManager.open(filename, "rb") as f: 14 | data = pickle.load(f, encoding="latin1") 15 | if "model" in data and "__author__" in data: 16 | # file is in Detectron2 model zoo format 17 | self.logger.info("Reading a file from '{}'".format(data["__author__"])) 18 | return data 19 | else: 20 | # assume file is from Caffe2 / Detectron1 model zoo 21 | if "blobs" in data: 22 | # Detection models have "blobs", but ImageNet models don't 23 | data = data["blobs"] 24 | data = {k: v for k, v in data.items() if not k.endswith("_momentum")} 25 | if "weight_order" in data: 26 | del data["weight_order"] 27 | return {"model": data, "__author__": "Caffe2", "matching_heuristics": True} 28 | 29 | loaded = super()._load_file(filename) # load native pth checkpoint 30 | if "model" not in loaded: 31 | loaded = {"model": loaded} 32 | 33 | basename = os.path.basename(filename).lower() 34 | if "lpf" in basename or "dla" in basename: 35 | loaded["matching_heuristics"] = True 36 | return loaded 37 | -------------------------------------------------------------------------------- /adet/config/__init__.py: -------------------------------------------------------------------------------- 1 | from .config import get_cfg 2 | 3 | __all__ = [ 4 | "get_cfg", 5 | ] 6 | -------------------------------------------------------------------------------- /adet/config/config.py: -------------------------------------------------------------------------------- 1 | from detectron2.config import CfgNode 2 | 3 | 4 | def get_cfg() -> CfgNode: 5 | """ 6 | Get a copy of the default config. 7 | 8 | Returns: 9 | a detectron2 CfgNode instance. 10 | """ 11 | from .defaults import _C 12 | 13 | return _C.clone() 14 | -------------------------------------------------------------------------------- /adet/data/__init__.py: -------------------------------------------------------------------------------- 1 | from . import builtin # ensure the builtin datasets are registered 2 | from .dataset_mapper import DatasetMapperWithBasis 3 | from .fcpose_dataset_mapper import FCPoseDatasetMapper 4 | 5 | 6 | __all__ = ["DatasetMapperWithBasis"] 7 | -------------------------------------------------------------------------------- /adet/data/augmentation.py: -------------------------------------------------------------------------------- 1 | import random 2 | 3 | import numpy as np 4 | from fvcore.transforms import transform as T 5 | 6 | from detectron2.data.transforms import RandomCrop, StandardAugInput 7 | from detectron2.structures import BoxMode 8 | 9 | 10 | def gen_crop_transform_with_instance(crop_size, image_size, instances, crop_box=True): 11 | """ 12 | Generate a CropTransform so that the cropping region contains 13 | the center of the given instance. 14 | 15 | Args: 16 | crop_size (tuple): h, w in pixels 17 | image_size (tuple): h, w 18 | instance (dict): an annotation dict of one instance, in Detectron2's 19 | dataset format. 20 | """ 21 | bbox = random.choice(instances) 22 | bbox[::2] = np.clip(bbox[::2], 0, image_size[1]) 23 | bbox[1::2] = np.clip(bbox[1::2], 0, image_size[0]) 24 | crop_size = np.asarray(crop_size, dtype=np.int32) 25 | center_yx = (bbox[1] + bbox[3]) * 0.5, (bbox[0] + bbox[2]) * 0.5 26 | assert ( 27 | image_size[0] >= center_yx[0] and image_size[1] >= center_yx[1] 28 | ), "The annotation bounding box is outside of the image!" 29 | assert ( 30 | image_size[0] >= crop_size[0] and image_size[1] >= crop_size[1] 31 | ), "Crop size is larger than image size!" 32 | 33 | min_yx = np.maximum(np.floor(center_yx).astype(np.int32) - crop_size, 0) 34 | max_yx = np.maximum(np.asarray(image_size, dtype=np.int32) - crop_size, 0) 35 | max_yx = np.minimum(max_yx, np.ceil(center_yx).astype(np.int32)) 36 | 37 | y0 = np.random.randint(min_yx[0], max_yx[0] + 1) 38 | x0 = np.random.randint(min_yx[1], max_yx[1] + 1) 39 | 40 | # if some instance is cropped extend the box 41 | if not crop_box: 42 | num_modifications = 0 43 | modified = True 44 | 45 | # convert crop_size to float 46 | crop_size = crop_size.astype(np.float32) 47 | while modified: 48 | modified, x0, y0, crop_size = adjust_crop(x0, y0, crop_size, instances) 49 | num_modifications += 1 50 | if num_modifications > 100: 51 | raise ValueError( 52 | "Cannot finished cropping adjustment within 100 tries (#instances {}).".format( 53 | len(instances) 54 | ) 55 | ) 56 | return T.CropTransform(0, 0, image_size[1], image_size[0]) 57 | 58 | return T.CropTransform(*map(int, (x0, y0, crop_size[1], crop_size[0]))) 59 | 60 | 61 | def adjust_crop(x0, y0, crop_size, instances, eps=1e-3): 62 | modified = False 63 | 64 | x1 = x0 + crop_size[1] 65 | y1 = y0 + crop_size[0] 66 | 67 | for bbox in instances: 68 | 69 | if bbox[0] < x0 - eps and bbox[2] > x0 + eps: 70 | crop_size[1] += x0 - bbox[0] 71 | x0 = bbox[0] 72 | modified = True 73 | 74 | if bbox[0] < x1 - eps and bbox[2] > x1 + eps: 75 | crop_size[1] += bbox[2] - x1 76 | x1 = bbox[2] 77 | modified = True 78 | 79 | if bbox[1] < y0 - eps and bbox[3] > y0 + eps: 80 | crop_size[0] += y0 - bbox[1] 81 | y0 = bbox[1] 82 | modified = True 83 | 84 | if bbox[1] < y1 - eps and bbox[3] > y1 + eps: 85 | crop_size[0] += bbox[3] - y1 86 | y1 = bbox[3] 87 | modified = True 88 | 89 | return modified, x0, y0, crop_size 90 | 91 | 92 | class RandomCropWithInstance(RandomCrop): 93 | """ Instance-aware cropping. 94 | """ 95 | 96 | def __init__(self, crop_type, crop_size, crop_instance=True): 97 | """ 98 | Args: 99 | crop_instance (bool): if False, extend cropping boxes to avoid cropping instances 100 | """ 101 | super().__init__(crop_type, crop_size) 102 | self.crop_instance = crop_instance 103 | self.input_args = ("image", "boxes") 104 | 105 | def get_transform(self, img, boxes): 106 | image_size = img.shape[:2] 107 | crop_size = self.get_crop_size(image_size) 108 | return gen_crop_transform_with_instance( 109 | crop_size, image_size, boxes, crop_box=self.crop_instance 110 | ) 111 | -------------------------------------------------------------------------------- /adet/data/builtin.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from detectron2.data.datasets.register_coco import register_coco_instances 4 | from detectron2.data.datasets.builtin_meta import _get_builtin_metadata 5 | 6 | from .datasets.text import register_text_instances 7 | 8 | # register plane reconstruction 9 | 10 | _PREDEFINED_SPLITS_PIC = { 11 | "pic_person_train": ("pic/image/train", "pic/annotations/train_person.json"), 12 | "pic_person_val": ("pic/image/val", "pic/annotations/val_person.json"), 13 | } 14 | 15 | metadata_pic = { 16 | "thing_classes": ["person"] 17 | } 18 | 19 | _PREDEFINED_SPLITS_TEXT = { 20 | "totaltext_train": ("totaltext/train_images", "totaltext/train.json"), 21 | "totaltext_val": ("totaltext/test_images", "totaltext/test.json"), 22 | "ctw1500_word_train": ("CTW1500/ctwtrain_text_image", "CTW1500/annotations/train_ctw1500_maxlen100_v2.json"), 23 | "ctw1500_word_test": ("CTW1500/ctwtest_text_image","CTW1500/annotations/test_ctw1500_maxlen100.json"), 24 | "syntext1_train": ("syntext1/images", "syntext1/annotations/train.json"), 25 | "syntext2_train": ("syntext2/images", "syntext2/annotations/train.json"), 26 | "mltbezier_word_train": ("mlt2017/images","mlt2017/annotations/train.json"), 27 | "rects_train": ("ReCTS/ReCTS_train_images", "ReCTS/annotations/rects_train.json"), 28 | "rects_val": ("ReCTS/ReCTS_val_images", "ReCTS/annotations/rects_val.json"), 29 | "rects_test": ("ReCTS/ReCTS_test_images", "ReCTS/annotations/rects_test.json"), 30 | "art_train": ("ArT/rename_artimg_train", "ArT/annotations/abcnet_art_train.json"), 31 | "lsvt_train": ("LSVT/rename_lsvtimg_train", "LSVT/annotations/abcnet_lsvt_train.json"), 32 | "chnsyn_train": ("ChnSyn/syn_130k_images", "ChnSyn/annotations/chn_syntext.json"), 33 | "icdar2013_train": ("icdar2013/train_images", "icdar2013/ic13_train.json"), 34 | "icdar2015_train": ("icdar2015/train_images", "icdar2015/ic15_train.json"), 35 | "icdar2015_test": ("icdar2015/test_images", "icdar2015/ic15_test.json"), 36 | } 37 | 38 | metadata_text = { 39 | "thing_classes": ["text"] 40 | } 41 | 42 | 43 | def register_all_coco(root="datasets"): 44 | for key, (image_root, json_file) in _PREDEFINED_SPLITS_PIC.items(): 45 | # Assume pre-defined datasets live in `./datasets`. 46 | register_coco_instances( 47 | key, 48 | metadata_pic, 49 | os.path.join(root, json_file) if "://" not in json_file else json_file, 50 | os.path.join(root, image_root), 51 | ) 52 | for key, (image_root, json_file) in _PREDEFINED_SPLITS_TEXT.items(): 53 | # Assume pre-defined datasets live in `./datasets`. 54 | register_text_instances( 55 | key, 56 | metadata_text, 57 | os.path.join(root, json_file) if "://" not in json_file else json_file, 58 | os.path.join(root, image_root), 59 | ) 60 | 61 | 62 | register_all_coco() 63 | -------------------------------------------------------------------------------- /adet/data/fcpose_dataset_mapper.py: -------------------------------------------------------------------------------- 1 | import copy 2 | import logging 3 | import os.path as osp 4 | 5 | import numpy as np 6 | import torch 7 | from fvcore.common.file_io import PathManager 8 | from PIL import Image 9 | from pycocotools import mask as maskUtils 10 | 11 | from detectron2.data import detection_utils as utils 12 | from detectron2.data import transforms as T 13 | from detectron2.data.dataset_mapper import DatasetMapper 14 | from detectron2.data.detection_utils import SizeMismatchError 15 | from detectron2.structures import BoxMode 16 | 17 | from .augmentation import RandomCropWithInstance 18 | from .detection_utils import (annotations_to_instances, build_augmentation, 19 | transform_instance_annotations) 20 | 21 | from adet.data.detection_utils import HeatmapGenerator 22 | from adet.data.dataset_mapper import DatasetMapperWithBasis 23 | """ 24 | This file contains the default mapping that's applied to "dataset dicts". 25 | """ 26 | 27 | __all__ = ["DatasetMapperWithBasis"] 28 | 29 | logger = logging.getLogger(__name__) 30 | 31 | class FCPoseDatasetMapper(DatasetMapperWithBasis): 32 | """ 33 | This caller enables the default Detectron2 mapper to read an additional basis semantic label 34 | """ 35 | 36 | def __init__(self, cfg, is_train=True): 37 | super().__init__(cfg, is_train) 38 | 39 | self.fcpose_on = cfg.MODEL.FCPOSE_ON 40 | if self.fcpose_on: 41 | self.gt_heatmap_stride = cfg.MODEL.FCPOSE.GT_HEATMAP_STRIDE 42 | self.sigma = cfg.MODEL.FCPOSE.HEATMAP_SIGMA 43 | self.head_sigma = cfg.MODEL.FCPOSE.HEAD_HEATMAP_SIGMA 44 | self.HeatmapGenerator = HeatmapGenerator(17, self.sigma, self.head_sigma) 45 | 46 | def __call__(self, dataset_dict): 47 | """ 48 | Args: 49 | dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format. 50 | 51 | Returns: 52 | dict: a format that builtin models in detectron2 accept 53 | """ 54 | for i in range(100): 55 | dataset_dict_temp = copy.deepcopy(dataset_dict) 56 | dataset_dict_temp = super().__call__(dataset_dict_temp) 57 | if len(dataset_dict_temp["instances"]) != 0: 58 | if self.is_train: 59 | dataset_dict_temp['instances'] = self.HeatmapGenerator(dataset_dict_temp['instances'], 60 | self.gt_heatmap_stride) 61 | return dataset_dict_temp 62 | raise 63 | -------------------------------------------------------------------------------- /adet/evaluation/__init__.py: -------------------------------------------------------------------------------- 1 | from .text_evaluation_all import TextEvaluator 2 | from .text_eval_script import text_eval_main 3 | from .text_eval_script_ic15 import text_eval_main_ic15 4 | from . import rrc_evaluation_funcs 5 | from . import rrc_evaluation_funcs_ic15 -------------------------------------------------------------------------------- /adet/layers/__init__.py: -------------------------------------------------------------------------------- 1 | from .deform_conv import DFConv2d 2 | from .ml_nms import ml_nms 3 | from .iou_loss import IOULoss 4 | from .conv_with_kaiming_uniform import conv_with_kaiming_uniform 5 | from .bezier_align import BezierAlign 6 | from .def_roi_align import DefROIAlign 7 | from .naive_group_norm import NaiveGroupNorm 8 | from .gcn import GCN 9 | 10 | __all__ = [k for k in globals().keys() if not k.startswith("_")] -------------------------------------------------------------------------------- /adet/layers/bezier_align.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | from torch import nn 3 | from torch.autograd import Function 4 | from torch.autograd.function import once_differentiable 5 | from torch.nn.modules.utils import _pair 6 | 7 | from adet import _C 8 | 9 | 10 | class _BezierAlign(Function): 11 | @staticmethod 12 | def forward(ctx, input, roi, output_size, spatial_scale, sampling_ratio, aligned): 13 | ctx.save_for_backward(roi) 14 | ctx.output_size = _pair(output_size) 15 | ctx.spatial_scale = spatial_scale 16 | ctx.sampling_ratio = sampling_ratio 17 | ctx.input_shape = input.size() 18 | ctx.aligned = aligned 19 | output = _C.bezier_align_forward( 20 | input, roi, spatial_scale, output_size[0], output_size[1], sampling_ratio, aligned 21 | ) 22 | return output 23 | 24 | @staticmethod 25 | @once_differentiable 26 | def backward(ctx, grad_output): 27 | rois, = ctx.saved_tensors 28 | output_size = ctx.output_size 29 | spatial_scale = ctx.spatial_scale 30 | sampling_ratio = ctx.sampling_ratio 31 | bs, ch, h, w = ctx.input_shape 32 | grad_input = _C.bezier_align_backward( 33 | grad_output, 34 | rois, 35 | spatial_scale, 36 | output_size[0], 37 | output_size[1], 38 | bs, 39 | ch, 40 | h, 41 | w, 42 | sampling_ratio, 43 | ctx.aligned, 44 | ) 45 | return grad_input, None, None, None, None, None 46 | 47 | 48 | bezier_align = _BezierAlign.apply 49 | 50 | 51 | class BezierAlign(nn.Module): 52 | def __init__(self, output_size, spatial_scale, sampling_ratio, aligned=True): 53 | """ 54 | Args: 55 | output_size (tuple): h, w 56 | spatial_scale (float): scale the input boxes by this number 57 | sampling_ratio (int): number of inputs samples to take for each output 58 | sample. 0 to take samples densely. 59 | aligned (bool): if False, use the legacy implementation in 60 | Detectron. If True, align the results more perfectly. 61 | 62 | Note: 63 | The meaning of aligned=True: 64 | 65 | With `aligned=True`, 66 | we first appropriately scale the ROI and then shift it by -0.5 67 | prior to calling bezier_align. This produces the correct neighbors; see 68 | adet/tests/test_bezier_align.py for verification. 69 | 70 | The difference does not make a difference to the model's performance if 71 | ROIAlign is used together with conv layers. 72 | """ 73 | super(BezierAlign, self).__init__() 74 | self.output_size = output_size 75 | self.spatial_scale = spatial_scale 76 | self.sampling_ratio = sampling_ratio 77 | self.aligned = aligned 78 | 79 | def forward(self, input, rois): 80 | """ 81 | Args: 82 | input: NCHW images 83 | rois: Bx17 boxes. First column is the index into N. The other 16 columns are [xy]x8. 84 | """ 85 | assert rois.dim() == 2 and rois.size(1) == 17 86 | return bezier_align( 87 | input, rois, self.output_size, self.spatial_scale, self.sampling_ratio, self.aligned 88 | ) 89 | 90 | def __repr__(self): 91 | tmpstr = self.__class__.__name__ + "(" 92 | tmpstr += "output_size=" + str(self.output_size) 93 | tmpstr += ", spatial_scale=" + str(self.spatial_scale) 94 | tmpstr += ", sampling_ratio=" + str(self.sampling_ratio) 95 | tmpstr += ", aligned=" + str(self.aligned) 96 | tmpstr += ")" 97 | return tmpstr 98 | -------------------------------------------------------------------------------- /adet/layers/conv_with_kaiming_uniform.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | 3 | from detectron2.layers import Conv2d 4 | from .deform_conv import DFConv2d 5 | from detectron2.layers.batch_norm import get_norm 6 | 7 | 8 | def conv_with_kaiming_uniform( 9 | norm=None, activation=None, 10 | use_deformable=False, use_sep=False): 11 | def make_conv( 12 | in_channels, out_channels, kernel_size, stride=1, dilation=1 13 | ): 14 | if use_deformable: 15 | conv_func = DFConv2d 16 | else: 17 | conv_func = Conv2d 18 | if use_sep: 19 | assert in_channels == out_channels 20 | groups = in_channels 21 | else: 22 | groups = 1 23 | conv = conv_func( 24 | in_channels, 25 | out_channels, 26 | kernel_size=kernel_size, 27 | stride=stride, 28 | padding=dilation * (kernel_size - 1) // 2, 29 | dilation=dilation, 30 | groups=groups, 31 | bias=(norm is None) 32 | ) 33 | if not use_deformable: 34 | # Caffe2 implementation uses XavierFill, which in fact 35 | # corresponds to kaiming_uniform_ in PyTorch 36 | nn.init.kaiming_uniform_(conv.weight, a=1) 37 | if norm is None: 38 | nn.init.constant_(conv.bias, 0) 39 | module = [conv,] 40 | if norm is not None and len(norm) > 0: 41 | if norm == "GN": 42 | norm_module = nn.GroupNorm(32, out_channels) 43 | else: 44 | norm_module = get_norm(norm, out_channels) 45 | module.append(norm_module) 46 | if activation is not None: 47 | module.append(nn.ReLU(inplace=True)) 48 | if len(module) > 1: 49 | return nn.Sequential(*module) 50 | return conv 51 | 52 | return make_conv 53 | -------------------------------------------------------------------------------- /adet/layers/csrc/BezierAlign/BezierAlign.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | #pragma once 3 | #include 4 | 5 | namespace adet { 6 | 7 | at::Tensor BezierAlign_forward_cpu( 8 | const at::Tensor& input, 9 | const at::Tensor& rois, 10 | const float spatial_scale, 11 | const int pooled_height, 12 | const int pooled_width, 13 | const int sampling_ratio, 14 | bool aligned); 15 | 16 | at::Tensor BezierAlign_backward_cpu( 17 | const at::Tensor& grad, 18 | const at::Tensor& rois, 19 | const float spatial_scale, 20 | const int pooled_height, 21 | const int pooled_width, 22 | const int batch_size, 23 | const int channels, 24 | const int height, 25 | const int width, 26 | const int sampling_ratio, 27 | bool aligned); 28 | 29 | #ifdef WITH_CUDA 30 | at::Tensor BezierAlign_forward_cuda( 31 | const at::Tensor& input, 32 | const at::Tensor& rois, 33 | const float spatial_scale, 34 | const int pooled_height, 35 | const int pooled_width, 36 | const int sampling_ratio, 37 | bool aligned); 38 | 39 | at::Tensor BezierAlign_backward_cuda( 40 | const at::Tensor& grad, 41 | const at::Tensor& rois, 42 | const float spatial_scale, 43 | const int pooled_height, 44 | const int pooled_width, 45 | const int batch_size, 46 | const int channels, 47 | const int height, 48 | const int width, 49 | const int sampling_ratio, 50 | bool aligned); 51 | #endif 52 | 53 | // Interface for Python 54 | inline at::Tensor BezierAlign_forward( 55 | const at::Tensor& input, 56 | const at::Tensor& rois, 57 | const float spatial_scale, 58 | const int pooled_height, 59 | const int pooled_width, 60 | const int sampling_ratio, 61 | bool aligned) { 62 | if (input.type().is_cuda()) { 63 | #ifdef WITH_CUDA 64 | return BezierAlign_forward_cuda( 65 | input, 66 | rois, 67 | spatial_scale, 68 | pooled_height, 69 | pooled_width, 70 | sampling_ratio, 71 | aligned); 72 | #else 73 | AT_ERROR("Not compiled with GPU support"); 74 | #endif 75 | } 76 | return BezierAlign_forward_cpu( 77 | input, 78 | rois, 79 | spatial_scale, 80 | pooled_height, 81 | pooled_width, 82 | sampling_ratio, 83 | aligned); 84 | } 85 | 86 | inline at::Tensor BezierAlign_backward( 87 | const at::Tensor& grad, 88 | const at::Tensor& rois, 89 | const float spatial_scale, 90 | const int pooled_height, 91 | const int pooled_width, 92 | const int batch_size, 93 | const int channels, 94 | const int height, 95 | const int width, 96 | const int sampling_ratio, 97 | bool aligned) { 98 | if (grad.type().is_cuda()) { 99 | #ifdef WITH_CUDA 100 | return BezierAlign_backward_cuda( 101 | grad, 102 | rois, 103 | spatial_scale, 104 | pooled_height, 105 | pooled_width, 106 | batch_size, 107 | channels, 108 | height, 109 | width, 110 | sampling_ratio, 111 | aligned); 112 | #else 113 | AT_ERROR("Not compiled with GPU support"); 114 | #endif 115 | } 116 | return BezierAlign_backward_cpu( 117 | grad, 118 | rois, 119 | spatial_scale, 120 | pooled_height, 121 | pooled_width, 122 | batch_size, 123 | channels, 124 | height, 125 | width, 126 | sampling_ratio, 127 | aligned); 128 | } 129 | 130 | } // namespace detectron2 131 | -------------------------------------------------------------------------------- /adet/layers/csrc/DefROIAlign/DefROIAlign.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | 4 | namespace adet { 5 | 6 | #ifdef WITH_CUDA 7 | at::Tensor DefROIAlign_forward_cuda( 8 | const at::Tensor& input, 9 | const at::Tensor& rois, 10 | const at::Tensor& offsets, // def added 11 | const float spatial_scale, 12 | const int pooled_height, 13 | const int pooled_width, 14 | const int sampling_ratio, 15 | const float trans_std, // def added 16 | bool aligned); 17 | 18 | at::Tensor DefROIAlign_backward_cuda( 19 | const at::Tensor& input, // def added 20 | const at::Tensor& grad, 21 | const at::Tensor& rois, 22 | const at::Tensor& offsets, // def added 23 | const at::Tensor& grad_offsets, // def added 24 | const float spatial_scale, 25 | const int pooled_height, 26 | const int pooled_width, 27 | const int batch_size, 28 | const int channels, 29 | const int height, 30 | const int width, 31 | const int sampling_ratio, 32 | const float trans_std, // def added 33 | bool aligned); 34 | #endif 35 | 36 | // Interface for Python 37 | inline at::Tensor DefROIAlign_forward( 38 | const at::Tensor& input, 39 | const at::Tensor& rois, 40 | const at::Tensor& offsets, // def added 41 | const float spatial_scale, 42 | const int pooled_height, 43 | const int pooled_width, 44 | const int sampling_ratio, 45 | const float trans_std, // def added 46 | bool aligned) { 47 | if (input.type().is_cuda()) { 48 | #ifdef WITH_CUDA 49 | return DefROIAlign_forward_cuda( 50 | input, 51 | rois, 52 | offsets, 53 | spatial_scale, 54 | pooled_height, 55 | pooled_width, 56 | sampling_ratio, 57 | trans_std, 58 | aligned); 59 | #else 60 | AT_ERROR("Not compiled with GPU support"); 61 | #endif 62 | } 63 | AT_ERROR("CPU version not supported"); 64 | } 65 | 66 | inline at::Tensor DefROIAlign_backward( 67 | const at::Tensor& input, // def added 68 | const at::Tensor& grad, 69 | const at::Tensor& rois, 70 | const at::Tensor& offsets, // def added 71 | const at::Tensor& grad_offsets, // def added 72 | const float spatial_scale, 73 | const int pooled_height, 74 | const int pooled_width, 75 | const int batch_size, 76 | const int channels, 77 | const int height, 78 | const int width, 79 | const int sampling_ratio, 80 | const float trans_std, // def added 81 | bool aligned) { 82 | if (grad.type().is_cuda()) { 83 | #ifdef WITH_CUDA 84 | return DefROIAlign_backward_cuda( 85 | input, // def added 86 | grad, 87 | rois, 88 | offsets, // def added 89 | grad_offsets, // def added 90 | spatial_scale, 91 | pooled_height, 92 | pooled_width, 93 | batch_size, 94 | channels, 95 | height, 96 | width, 97 | sampling_ratio, 98 | trans_std, // def added 99 | aligned); 100 | #else 101 | AT_ERROR("Not compiled with GPU support"); 102 | #endif 103 | } 104 | AT_ERROR("CPU version not supported"); 105 | } 106 | 107 | } // namespace adet 108 | -------------------------------------------------------------------------------- /adet/layers/csrc/cuda_version.cu: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | namespace adet { 4 | int get_cudart_version() { 5 | return CUDART_VERSION; 6 | } 7 | } // namespace adet 8 | -------------------------------------------------------------------------------- /adet/layers/csrc/ml_nms/ml_nms.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | 4 | namespace adet { 5 | 6 | 7 | #ifdef WITH_CUDA 8 | at::Tensor ml_nms_cuda( 9 | const at::Tensor dets, 10 | const float threshold); 11 | #endif 12 | 13 | at::Tensor ml_nms(const at::Tensor& dets, 14 | const at::Tensor& scores, 15 | const at::Tensor& labels, 16 | const float threshold) { 17 | 18 | if (dets.type().is_cuda()) { 19 | #ifdef WITH_CUDA 20 | // TODO raise error if not compiled with CUDA 21 | if (dets.numel() == 0) 22 | return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU)); 23 | auto b = at::cat({dets, scores.unsqueeze(1), labels.unsqueeze(1)}, 1); 24 | return ml_nms_cuda(b, threshold); 25 | #else 26 | AT_ERROR("Not compiled with GPU support"); 27 | #endif 28 | } 29 | AT_ERROR("CPU version not implemented"); 30 | } 31 | 32 | } // namespace adet 33 | -------------------------------------------------------------------------------- /adet/layers/csrc/vision.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | 3 | #include "ml_nms/ml_nms.h" 4 | #include "DefROIAlign/DefROIAlign.h" 5 | #include "BezierAlign/BezierAlign.h" 6 | 7 | namespace adet { 8 | 9 | #ifdef WITH_CUDA 10 | extern int get_cudart_version(); 11 | #endif 12 | 13 | std::string get_cuda_version() { 14 | #ifdef WITH_CUDA 15 | std::ostringstream oss; 16 | 17 | // copied from 18 | // https://github.com/pytorch/pytorch/blob/master/aten/src/ATen/cuda/detail/CUDAHooks.cpp#L231 19 | auto printCudaStyleVersion = [&](int v) { 20 | oss << (v / 1000) << "." << (v / 10 % 100); 21 | if (v % 10 != 0) { 22 | oss << "." << (v % 10); 23 | } 24 | }; 25 | printCudaStyleVersion(get_cudart_version()); 26 | return oss.str(); 27 | #else 28 | return std::string("not available"); 29 | #endif 30 | } 31 | 32 | // similar to 33 | // https://github.com/pytorch/pytorch/blob/master/aten/src/ATen/Version.cpp 34 | std::string get_compiler_version() { 35 | std::ostringstream ss; 36 | #if defined(__GNUC__) 37 | #ifndef __clang__ 38 | { ss << "GCC " << __GNUC__ << "." << __GNUC_MINOR__; } 39 | #endif 40 | #endif 41 | 42 | #if defined(__clang_major__) 43 | { 44 | ss << "clang " << __clang_major__ << "." << __clang_minor__ << "." 45 | << __clang_patchlevel__; 46 | } 47 | #endif 48 | 49 | #if defined(_MSC_VER) 50 | { ss << "MSVC " << _MSC_FULL_VER; } 51 | #endif 52 | return ss.str(); 53 | } 54 | 55 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 56 | m.def("ml_nms", &ml_nms, "Multi-Label NMS"); 57 | m.def("def_roi_align_forward", &DefROIAlign_forward, "def_roi_align_forward"); 58 | m.def("def_roi_align_backward", &DefROIAlign_backward, "def_roi_align_backward"); 59 | m.def("bezier_align_forward", &BezierAlign_forward, "bezier_align_forward"); 60 | m.def("bezier_align_backward", &BezierAlign_backward, "bezier_align_backward"); 61 | } 62 | 63 | } // namespace adet 64 | -------------------------------------------------------------------------------- /adet/layers/def_roi_align.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | from torch.autograd import Function 4 | from torch.autograd.function import once_differentiable 5 | from torch.nn.modules.utils import _pair 6 | 7 | from adet import _C 8 | 9 | 10 | class _DefROIAlign(Function): 11 | @staticmethod 12 | def forward(ctx, input, roi, offsets, output_size, spatial_scale, sampling_ratio, trans_std, aligned): 13 | ctx.save_for_backward(input, roi, offsets) 14 | ctx.output_size = _pair(output_size) 15 | ctx.spatial_scale = spatial_scale 16 | ctx.sampling_ratio = sampling_ratio 17 | ctx.trans_std = trans_std 18 | ctx.input_shape = input.size() 19 | ctx.aligned = aligned 20 | output = _C.def_roi_align_forward( 21 | input, roi, offsets, spatial_scale, output_size[0], output_size[1], 22 | sampling_ratio, trans_std, aligned 23 | ) 24 | return output 25 | 26 | @staticmethod 27 | @once_differentiable 28 | def backward(ctx, grad_output): 29 | data, rois, offsets = ctx.saved_tensors 30 | output_size = ctx.output_size 31 | spatial_scale = ctx.spatial_scale 32 | sampling_ratio = ctx.sampling_ratio 33 | trans_std = ctx.trans_std 34 | bs, ch, h, w = ctx.input_shape 35 | grad_offsets = torch.zeros_like(offsets) 36 | 37 | grad_input = _C.def_roi_align_backward( 38 | data, 39 | grad_output, 40 | rois, 41 | offsets, 42 | grad_offsets, 43 | spatial_scale, 44 | output_size[0], 45 | output_size[1], 46 | bs, 47 | ch, 48 | h, 49 | w, 50 | sampling_ratio, 51 | trans_std, 52 | ctx.aligned, 53 | ) 54 | return grad_input, None, grad_offsets, None, None, None, None, None 55 | 56 | 57 | def_roi_align = _DefROIAlign.apply 58 | 59 | 60 | class DefROIAlign(nn.Module): 61 | def __init__(self, output_size, spatial_scale, 62 | sampling_ratio, trans_std, aligned=True): 63 | """ 64 | Args: 65 | output_size (tuple): h, w 66 | spatial_scale (float): scale the input boxes by this number 67 | sampling_ratio (int): number of inputs samples to take for each output 68 | sample. 0 to take samples densely. 69 | trans_std (float): offset scale according to the normalized roi size 70 | aligned (bool): if False, use the legacy implementation in 71 | Detectron. If True, align the results more perfectly. 72 | """ 73 | super(DefROIAlign, self).__init__() 74 | self.output_size = output_size 75 | self.spatial_scale = spatial_scale 76 | self.sampling_ratio = sampling_ratio 77 | self.trans_std = trans_std 78 | self.aligned = aligned 79 | 80 | def forward(self, input, rois, offsets): 81 | """ 82 | Args: 83 | input: NCHW images 84 | rois: Bx5 boxes. First column is the index into N. The other 4 columns are xyxy. 85 | """ 86 | assert rois.dim() == 2 and rois.size(1) == 5 87 | return def_roi_align( 88 | input, rois, offsets, self.output_size, 89 | self.spatial_scale, self.sampling_ratio, 90 | self.trans_std, self.aligned 91 | ) 92 | 93 | def __repr__(self): 94 | tmpstr = self.__class__.__name__ + "(" 95 | tmpstr += "output_size=" + str(self.output_size) 96 | tmpstr += ", spatial_scale=" + str(self.spatial_scale) 97 | tmpstr += ", sampling_ratio=" + str(self.sampling_ratio) 98 | tmpstr += ", trans_std=" + str(self.trans_std) 99 | tmpstr += ", aligned=" + str(self.aligned) 100 | tmpstr += ")" 101 | return tmpstr 102 | -------------------------------------------------------------------------------- /adet/layers/deform_conv.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | 4 | from detectron2.layers import Conv2d 5 | 6 | 7 | class _NewEmptyTensorOp(torch.autograd.Function): 8 | @staticmethod 9 | def forward(ctx, x, new_shape): 10 | ctx.shape = x.shape 11 | return x.new_empty(new_shape) 12 | 13 | @staticmethod 14 | def backward(ctx, grad): 15 | shape = ctx.shape 16 | return _NewEmptyTensorOp.apply(grad, shape), None 17 | 18 | 19 | class DFConv2d(nn.Module): 20 | """ 21 | Deformable convolutional layer with configurable 22 | deformable groups, dilations and groups. 23 | 24 | Code is from: 25 | https://github.com/facebookresearch/maskrcnn-benchmark/blob/master/maskrcnn_benchmark/layers/misc.py 26 | 27 | 28 | """ 29 | def __init__( 30 | self, 31 | in_channels, 32 | out_channels, 33 | with_modulated_dcn=True, 34 | kernel_size=3, 35 | stride=1, 36 | groups=1, 37 | dilation=1, 38 | deformable_groups=1, 39 | bias=False, 40 | padding=None 41 | ): 42 | super(DFConv2d, self).__init__() 43 | if isinstance(kernel_size, (list, tuple)): 44 | assert isinstance(stride, (list, tuple)) 45 | assert isinstance(dilation, (list, tuple)) 46 | assert len(kernel_size) == 2 47 | assert len(stride) == 2 48 | assert len(dilation) == 2 49 | padding = ( 50 | dilation[0] * (kernel_size[0] - 1) // 2, 51 | dilation[1] * (kernel_size[1] - 1) // 2 52 | ) 53 | offset_base_channels = kernel_size[0] * kernel_size[1] 54 | else: 55 | padding = dilation * (kernel_size - 1) // 2 56 | offset_base_channels = kernel_size * kernel_size 57 | if with_modulated_dcn: 58 | from detectron2.layers.deform_conv import ModulatedDeformConv 59 | offset_channels = offset_base_channels * 3 # default: 27 60 | conv_block = ModulatedDeformConv 61 | else: 62 | from detectron2.layers.deform_conv import DeformConv 63 | offset_channels = offset_base_channels * 2 # default: 18 64 | conv_block = DeformConv 65 | self.offset = Conv2d( 66 | in_channels, 67 | deformable_groups * offset_channels, 68 | kernel_size=kernel_size, 69 | stride=stride, 70 | padding=padding, 71 | groups=1, 72 | dilation=dilation 73 | ) 74 | for l in [self.offset, ]: 75 | nn.init.kaiming_uniform_(l.weight, a=1) 76 | torch.nn.init.constant_(l.bias, 0.) 77 | self.conv = conv_block( 78 | in_channels, 79 | out_channels, 80 | kernel_size=kernel_size, 81 | stride=stride, 82 | padding=padding, 83 | dilation=dilation, 84 | groups=groups, 85 | deformable_groups=deformable_groups, 86 | bias=bias 87 | ) 88 | self.with_modulated_dcn = with_modulated_dcn 89 | self.kernel_size = kernel_size 90 | self.stride = stride 91 | self.padding = padding 92 | self.dilation = dilation 93 | self.offset_split = offset_base_channels * deformable_groups * 2 94 | 95 | def forward(self, x, return_offset=False): 96 | if x.numel() > 0: 97 | if not self.with_modulated_dcn: 98 | offset_mask = self.offset(x) 99 | x = self.conv(x, offset_mask) 100 | else: 101 | offset_mask = self.offset(x) 102 | offset = offset_mask[:, :self.offset_split, :, :] 103 | mask = offset_mask[:, self.offset_split:, :, :].sigmoid() 104 | x = self.conv(x, offset, mask) 105 | if return_offset: 106 | return x, offset_mask 107 | return x 108 | # get output shape 109 | output_shape = [ 110 | (i + 2 * p - (di * (k - 1) + 1)) // d + 1 111 | for i, p, di, k, d in zip( 112 | x.shape[-2:], 113 | self.padding, 114 | self.dilation, 115 | self.kernel_size, 116 | self.stride 117 | ) 118 | ] 119 | output_shape = [x.shape[0], self.conv.weight.shape[0]] + output_shape 120 | return _NewEmptyTensorOp.apply(x, output_shape) 121 | -------------------------------------------------------------------------------- /adet/layers/gcn.py: -------------------------------------------------------------------------------- 1 | # coding:utf-8 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | 6 | class Conv2D(nn.Module): 7 | def __init__(self, in_channels, out_channels, kernel_size, padding='same', 8 | stride=1, dilation=1, groups=1): 9 | super(Conv2D, self).__init__() 10 | 11 | assert type(kernel_size) in [int, tuple], "Allowed kernel type [int or tuple], not {}".format(type(kernel_size)) 12 | assert padding == 'same', "Allowed padding type {}, not {}".format('same', padding) 13 | 14 | self.kernel_size = kernel_size 15 | if isinstance(kernel_size, tuple): 16 | self.h_kernel = kernel_size[0] 17 | self.w_kernel = kernel_size[1] 18 | else: 19 | self.h_kernel = kernel_size 20 | self.w_kernel = kernel_size 21 | 22 | self.padding = padding 23 | self.stride = stride 24 | self.dilation = dilation 25 | self.groups = groups 26 | self.conv = nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, 27 | stride=self.stride, dilation=self.dilation, groups=self.groups) 28 | 29 | def forward(self, x): 30 | 31 | if self.padding == 'same': 32 | 33 | height, width = x.shape[2:] 34 | 35 | h_pad_need = max(0, (height - 1) * self.stride + self.h_kernel - height) 36 | w_pad_need = max(0, (width - 1) * self.stride + self.w_kernel - width) 37 | 38 | pad_left = w_pad_need // 2 39 | pad_right = w_pad_need - pad_left 40 | pad_top = h_pad_need // 2 41 | pad_bottom = h_pad_need - pad_top 42 | 43 | padding = (pad_left, pad_right, pad_top, pad_bottom) 44 | 45 | x = F.pad(x, padding, 'constant', 0) 46 | 47 | x = self.conv(x) 48 | 49 | return x 50 | 51 | 52 | class GCN(nn.Module): 53 | """ 54 | Large Kernel Matters -- https://arxiv.org/abs/1703.02719 55 | """ 56 | def __init__(self, in_channels, out_channels, k=3): 57 | super(GCN, self).__init__() 58 | 59 | self.conv_l1 = Conv2D(in_channels=in_channels, out_channels=out_channels, kernel_size=(k, 1), padding='same') 60 | self.conv_l2 = Conv2D(in_channels=out_channels, out_channels=out_channels, kernel_size=(1, k), padding='same') 61 | 62 | self.conv_r1 = Conv2D(in_channels=in_channels, out_channels=out_channels, kernel_size=(1, k), padding='same') 63 | self.conv_r2 = Conv2D(in_channels=out_channels, out_channels=out_channels, kernel_size=(k, 1), padding='same') 64 | 65 | def forward(self, x): 66 | x1 = self.conv_l1(x) 67 | x1 = self.conv_l2(x1) 68 | 69 | x2 = self.conv_r1(x) 70 | x2 = self.conv_r2(x2) 71 | 72 | out = x1 + x2 73 | 74 | return out 75 | -------------------------------------------------------------------------------- /adet/layers/iou_loss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | 4 | 5 | class IOULoss(nn.Module): 6 | """ 7 | Intersetion Over Union (IoU) loss which supports three 8 | different IoU computations: 9 | 10 | * IoU 11 | * Linear IoU 12 | * gIoU 13 | """ 14 | def __init__(self, loc_loss_type='iou'): 15 | super(IOULoss, self).__init__() 16 | self.loc_loss_type = loc_loss_type 17 | 18 | def forward(self, ious, gious=None, weight=None): 19 | if self.loc_loss_type == 'iou': 20 | losses = -torch.log(ious) 21 | elif self.loc_loss_type == 'linear_iou': 22 | losses = 1 - ious 23 | elif self.loc_loss_type == 'giou': 24 | assert gious is not None 25 | losses = 1 - gious 26 | else: 27 | raise NotImplementedError 28 | 29 | if weight is not None: 30 | return (losses * weight).sum() 31 | else: 32 | return losses.sum() 33 | -------------------------------------------------------------------------------- /adet/layers/ml_nms.py: -------------------------------------------------------------------------------- 1 | from detectron2.layers import batched_nms 2 | 3 | 4 | def ml_nms(boxlist, nms_thresh, max_proposals=-1, 5 | score_field="scores", label_field="labels"): 6 | """ 7 | Performs non-maximum suppression on a boxlist, with scores specified 8 | in a boxlist field via score_field. 9 | 10 | Args: 11 | boxlist (detectron2.structures.Boxes): 12 | nms_thresh (float): 13 | max_proposals (int): if > 0, then only the top max_proposals are kept 14 | after non-maximum suppression 15 | score_field (str): 16 | """ 17 | if nms_thresh <= 0: 18 | return boxlist 19 | boxes = boxlist.pred_boxes.tensor 20 | scores = boxlist.scores 21 | labels = boxlist.pred_classes 22 | keep = batched_nms(boxes, scores, labels, nms_thresh) 23 | if max_proposals > 0: 24 | keep = keep[: max_proposals] 25 | boxlist = boxlist[keep] 26 | return boxlist 27 | -------------------------------------------------------------------------------- /adet/layers/naive_group_norm.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.nn import Module, Parameter 3 | from torch.nn import init 4 | 5 | 6 | class NaiveGroupNorm(Module): 7 | r"""NaiveGroupNorm implements Group Normalization with the high-level matrix operations in PyTorch. 8 | It is a temporary solution to export GN by ONNX before the official GN can be exported by ONNX. 9 | The usage of NaiveGroupNorm is exactly the same as the official :class:`torch.nn.GroupNorm`. 10 | Args: 11 | num_groups (int): number of groups to separate the channels into 12 | num_channels (int): number of channels expected in input 13 | eps: a value added to the denominator for numerical stability. Default: 1e-5 14 | affine: a boolean value that when set to ``True``, this module 15 | has learnable per-channel affine parameters initialized to ones (for weights) 16 | and zeros (for biases). Default: ``True``. 17 | 18 | Shape: 19 | - Input: :math:`(N, C, *)` where :math:`C=\text{num\_channels}` 20 | - Output: :math:`(N, C, *)` (same shape as input) 21 | 22 | Examples:: 23 | 24 | >>> input = torch.randn(20, 6, 10, 10) 25 | >>> # Separate 6 channels into 3 groups 26 | >>> m = NaiveGroupNorm(3, 6) 27 | >>> # Separate 6 channels into 6 groups (equivalent with InstanceNorm) 28 | >>> m = NaiveGroupNorm(6, 6) 29 | >>> # Put all 6 channels into a single group (equivalent with LayerNorm) 30 | >>> m = NaiveGroupNorm(1, 6) 31 | >>> # Activating the module 32 | >>> output = m(input) 33 | 34 | .. _`Group Normalization`: https://arxiv.org/abs/1803.08494 35 | """ 36 | __constants__ = ['num_groups', 'num_channels', 'eps', 'affine', 'weight', 37 | 'bias'] 38 | 39 | def __init__(self, num_groups, num_channels, eps=1e-5, affine=True): 40 | super(NaiveGroupNorm, self).__init__() 41 | self.num_groups = num_groups 42 | self.num_channels = num_channels 43 | self.eps = eps 44 | self.affine = affine 45 | if self.affine: 46 | self.weight = Parameter(torch.Tensor(num_channels)) 47 | self.bias = Parameter(torch.Tensor(num_channels)) 48 | else: 49 | self.register_parameter('weight', None) 50 | self.register_parameter('bias', None) 51 | self.reset_parameters() 52 | 53 | def reset_parameters(self): 54 | if self.affine: 55 | init.ones_(self.weight) 56 | init.zeros_(self.bias) 57 | 58 | def forward(self, input): 59 | N, C, H, W = input.size() 60 | assert C % self.num_groups == 0 61 | input = input.reshape(N, self.num_groups, -1) 62 | mean = input.mean(dim=-1, keepdim=True) 63 | var = (input ** 2).mean(dim=-1, keepdim=True) - mean ** 2 64 | std = torch.sqrt(var + self.eps) 65 | 66 | input = (input - mean) / std 67 | input = input.reshape(N, C, H, W) 68 | if self.affine: 69 | input = input * self.weight.reshape(1, C, 1, 1) + self.bias.reshape(1, C, 1, 1) 70 | return input 71 | 72 | def extra_repr(self): 73 | return '{num_groups}, {num_channels}, eps={eps}, ' \ 74 | 'affine={affine}'.format(**self.__dict__) 75 | -------------------------------------------------------------------------------- /adet/modeling/MEInst/LME/MaskLoader.py: -------------------------------------------------------------------------------- 1 | # coding:utf-8 2 | 3 | import os 4 | import json 5 | import numpy as np 6 | 7 | import torch.utils.data as data 8 | 9 | from detectron2.structures import ( 10 | Boxes, 11 | PolygonMasks, 12 | BoxMode 13 | ) 14 | 15 | 16 | DATASETS = { 17 | "coco_2017_train": { 18 | "img_dir": "coco/train2017", 19 | "ann_file": "coco/annotations/instances_train2017.json" 20 | }, 21 | "coco_2017_val": { 22 | "img_dir": "coco/val2017", 23 | "ann_file": "coco/annotations/instances_val2017.json" 24 | } 25 | } 26 | 27 | 28 | class MaskLoader(data.Dataset): 29 | """ 30 | Dataloader for Local Mask. 31 | 32 | Arguments: 33 | root (string): filepath to dataset folder. 34 | dataset (string): mask to use (eg. 'train', 'val'). 35 | size (tuple): The size used for train/val (height, width). 36 | transform (callable, optional): transformation to perform on the input mask. 37 | 38 | """ 39 | 40 | def __init__(self, root="datasets", dataset="coco_2017_train", size=28, transform=False): 41 | self.root = root 42 | self.dataset = dataset 43 | self.transform = transform 44 | 45 | if isinstance(size, int): 46 | self.size = size 47 | else: 48 | raise TypeError 49 | 50 | data_info = DATASETS[dataset] 51 | img_dir, ann_file = data_info['img_dir'], data_info['ann_file'] 52 | img_dir = os.path.join(self.root, img_dir) # actually we do not use it. 53 | ann_file = os.path.join(self.root, ann_file) 54 | 55 | with open(ann_file, 'r') as f: 56 | anns = json.load(f) 57 | anns = anns['annotations'] 58 | coco = list() 59 | for ann in anns: 60 | if ann.get('iscrowd', 0) == 0: 61 | coco.append(ann) 62 | self.coco = coco 63 | print("Removed {} images with no usable annotations. {} images left.".format( 64 | len(anns) - len(self.coco), len(self.coco))) 65 | 66 | def __len__(self): 67 | return len(self.coco) 68 | 69 | def __getitem__(self, index): 70 | ann = self.coco[index] 71 | 72 | # bbox transform. 73 | bbox = np.array([ann["bbox"]]) # xmin, ymin, w, h 74 | bbox = BoxMode.convert(bbox, BoxMode.XYWH_ABS, BoxMode.XYXY_ABS) # x1y1x2y2 75 | bbox = Boxes(bbox) 76 | 77 | # mask transform. 78 | mask = PolygonMasks([ann["segmentation"]]) 79 | mask = mask.crop_and_resize(bbox.tensor, self.size).float() 80 | 81 | return mask 82 | -------------------------------------------------------------------------------- /adet/modeling/MEInst/LME/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | from .MaskLoader import MaskLoader 3 | from .utils import inverse_sigmoid, direct_sigmoid, IOUMetric, transform, inverse_transform 4 | 5 | __all__ = ["MaskLoader", "IOUMetric", 6 | "inverse_sigmoid", "direct_sigmoid", 7 | "transform", "inverse_transform"] 8 | -------------------------------------------------------------------------------- /adet/modeling/MEInst/LME/mask_evaluation.py: -------------------------------------------------------------------------------- 1 | # coding:utf-8 2 | 3 | import os 4 | import argparse 5 | import numpy as np 6 | from torch.utils.data import DataLoader 7 | 8 | from MaskLoader import MaskLoader 9 | from utils import ( 10 | IOUMetric, 11 | transform, 12 | inverse_transform, 13 | direct_sigmoid, 14 | inverse_sigmoid 15 | ) 16 | 17 | 18 | VALUE_MAX = 0.05 19 | VALUE_MIN = 0.01 20 | 21 | 22 | def parse_args(): 23 | parser = argparse.ArgumentParser(description='Evaluation for PCA Mask Encoding.') 24 | parser.add_argument('--root', default='datasets', type=str) 25 | parser.add_argument('--dataset', default='coco_2017_train', type=str) 26 | parser.add_argument('--matrix', default='coco/components/coco_2017_train' 27 | '_class_agnosticTrue_whitenTrue_sigmoidTrue_60.npz', type=str) 28 | # mask encoding params. 29 | parser.add_argument('--mask_size', default=28, type=int) 30 | parser.add_argument('--n_components', default=60, type=int) 31 | parser.add_argument('--class_agnostic', default=True, type=bool) 32 | parser.add_argument('--whiten', default=True, type=bool) 33 | parser.add_argument('--sigmoid', default=True, type=bool) 34 | parser.add_argument('--batch-size', default=1024, type=int) 35 | args = parser.parse_args() 36 | return args 37 | 38 | 39 | if __name__ == "__main__": 40 | args = parse_args() 41 | # parse args. 42 | mask_size = args.mask_size 43 | n_components = args.n_components 44 | class_agnostic = args.class_agnostic 45 | whiten = args.whiten 46 | sigmoid = args.sigmoid 47 | 48 | cur_path = os.path.abspath(os.path.dirname(__file__)) 49 | root_path = cur_path[:cur_path.find("AdelaiDet") + len("AdelaiDet")] 50 | dataset_root = os.path.join(root_path, args.root) 51 | matrix_path = os.path.join(dataset_root, args.matrix) 52 | 53 | # load matrix. 54 | print("Loading matrix parameters: {}".format(matrix_path)) 55 | parameters = np.load(matrix_path) 56 | components_c = parameters['components_c'] 57 | mean_c = parameters['mean_c'] 58 | ratio_c = parameters['ratio_c'] 59 | explained_variance_c = parameters['explained_variance_c'] 60 | if class_agnostic: 61 | components_c = np.squeeze(components_c) 62 | mean_c = np.squeeze(mean_c) 63 | explained_variance_c = np.squeeze(explained_variance_c) 64 | assert n_components == components_c.shape[0], \ 65 | print("The n_components in component_ must equal to the supposed shape.") 66 | else: 67 | # TODO: We have not achieve the function in class-specific. 68 | raise NotImplementedError 69 | 70 | # build data loader. 71 | mask_data = MaskLoader(root=dataset_root, dataset=args.dataset, size=mask_size) 72 | mask_loader = DataLoader(mask_data, batch_size=args.batch_size, shuffle=False, num_workers=4) 73 | size_data = len(mask_loader) 74 | 75 | # evaluation. 76 | IoUevaluate = IOUMetric(2) 77 | print("Start Eva ...") 78 | for i, masks in enumerate(mask_loader): 79 | print("Eva [{} / {}]".format(i, size_data)) 80 | # generate the reconstruction mask. 81 | masks = masks.view(masks.shape[0], -1).numpy() 82 | masks = masks.astype(np.float32) 83 | # pre-process. 84 | if sigmoid: 85 | value_random = VALUE_MAX * np.random.rand(masks.shape[0], masks.shape[1]) 86 | value_random = np.maximum(value_random, VALUE_MIN) 87 | masks_random = np.where(masks > value_random, 1 - value_random, value_random) 88 | masks_random = inverse_sigmoid(masks_random) 89 | else: 90 | masks_random = masks 91 | # --> encode --> decode. 92 | mask_rc = transform(masks_random, components_=components_c, explained_variance_=explained_variance_c, 93 | mean_=mean_c, whiten=whiten) 94 | mask_rc = inverse_transform(mask_rc, components_=components_c, explained_variance_=explained_variance_c, 95 | mean_=mean_c, whiten=whiten) 96 | # post-process. 97 | if sigmoid: 98 | mask_rc = direct_sigmoid(mask_rc) 99 | # eva. 100 | mask_rc = np.where(mask_rc >= 0.5, 1, 0) 101 | IoUevaluate.add_batch(mask_rc, masks) 102 | 103 | _, _, _, mean_iu, _ = IoUevaluate.evaluate() 104 | print("The mIoU for {}: {}".format(args.matrix, mean_iu)) 105 | -------------------------------------------------------------------------------- /adet/modeling/MEInst/LME/utils.py: -------------------------------------------------------------------------------- 1 | # coding:utf-8 2 | 3 | import numpy as np 4 | 5 | 6 | def direct_sigmoid(x): 7 | """Apply the sigmoid operation. 8 | """ 9 | y = 1./(1.+1./np.exp(x)) 10 | dy = y*(1-y) 11 | return y 12 | 13 | 14 | def inverse_sigmoid(x): 15 | """Apply the inverse sigmoid operation. 16 | y = -ln(1-x/x) 17 | """ 18 | y = -1 * np.log((1-x)/x) 19 | return y 20 | 21 | 22 | def transform(X, components_, explained_variance_, mean_=None, whiten=False): 23 | """Apply dimensionality reduction to X. 24 | X is projected on the first principal components previously extracted 25 | from a training set. 26 | Parameters 27 | ---------- 28 | X: array-like, shape (n_samples, n_features) 29 | New data, where n_samples is the number of samples 30 | and n_features is the number of features. 31 | components_: array-like, shape (n_components, n_features) 32 | mean_: array-like, shape (n_features,) 33 | explained_variance_: array-like, shape (n_components,) 34 | Variance explained by each of the selected components. 35 | whiten : bool, optional 36 | When True (False by default) the ``components_`` vectors are divided 37 | by ``n_samples`` times ``components_`` to ensure uncorrelated outputs 38 | with unit component-wise variances. 39 | Whitening will remove some information from the transformed signal 40 | (the relative variance scales of the components) but can sometimes 41 | improve the predictive accuracy of the downstream estimators by 42 | making data respect some hard-wired assumptions. 43 | Returns 44 | ------- 45 | X_new : array-like, shape (n_samples, n_components) 46 | """ 47 | 48 | if mean_ is not None: 49 | X = X - mean_ 50 | X_transformed = np.dot(X, components_.T) 51 | if whiten: 52 | X_transformed /= np.sqrt(explained_variance_) 53 | return X_transformed 54 | 55 | 56 | def inverse_transform(X, components_, explained_variance_, mean_=None, whiten=False): 57 | """Transform data back to its original space. 58 | In other words, return an input X_original whose transform would be X. 59 | Parameters 60 | ---------- 61 | X : array-like, shape (n_samples, n_components) 62 | New data, where n_samples is the number of samples 63 | and n_components is the number of components. 64 | components_: array-like, shape (n_components, n_features) 65 | mean_: array-like, shape (n_features,) 66 | explained_variance_: array-like, shape (n_components,) 67 | Variance explained by each of the selected components. 68 | whiten : bool, optional 69 | When True (False by default) the ``components_`` vectors are divided 70 | by ``n_samples`` times ``components_`` to ensure uncorrelated outputs 71 | with unit component-wise variances. 72 | Whitening will remove some information from the transformed signal 73 | (the relative variance scales of the components) but can sometimes 74 | improve the predictive accuracy of the downstream estimators by 75 | making data respect some hard-wired assumptions. 76 | 77 | Returns 78 | ------- 79 | X_original array-like, shape (n_samples, n_features) 80 | """ 81 | if whiten: 82 | X_transformed = np.dot(X, np.sqrt(explained_variance_[:, np.newaxis]) * components_) 83 | else: 84 | X_transformed = np.dot(X, components_) 85 | 86 | if mean_ is not None: 87 | X_transformed = X_transformed + mean_ 88 | 89 | return X_transformed 90 | 91 | 92 | class IOUMetric(object): 93 | """ 94 | Class to calculate mean-iou using fast_hist method 95 | """ 96 | 97 | def __init__(self, num_classes): 98 | self.num_classes = num_classes 99 | self.hist = np.zeros((num_classes, num_classes)) 100 | 101 | def _fast_hist(self, label_pred, label_true): 102 | mask = (label_true >= 0) & (label_true < self.num_classes) 103 | hist = np.bincount( 104 | self.num_classes * label_true[mask].astype(int) + 105 | label_pred[mask], minlength=self.num_classes ** 2).reshape(self.num_classes, self.num_classes) 106 | return hist 107 | 108 | def add_batch(self, predictions, gts): 109 | for lp, lt in zip(predictions, gts): 110 | self.hist += self._fast_hist(lp.flatten(), lt.flatten()) 111 | 112 | def evaluate(self): 113 | acc = np.diag(self.hist).sum() / self.hist.sum() 114 | acc_cls = np.diag(self.hist) / self.hist.sum(axis=1) 115 | acc_cls = np.nanmean(acc_cls) 116 | iu = np.diag(self.hist) / (self.hist.sum(axis=1) + self.hist.sum(axis=0) - np.diag(self.hist)) 117 | mean_iu = np.nanmean(iu) 118 | freq = self.hist.sum(axis=1) / self.hist.sum() 119 | fwavacc = (freq[freq > 0] * iu[freq > 0]).sum() 120 | return acc, acc_cls, iu, mean_iu, fwavacc -------------------------------------------------------------------------------- /adet/modeling/MEInst/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | from .MEInst import MEInst 3 | from .MaskEncoding import PCAMaskEncoding 4 | -------------------------------------------------------------------------------- /adet/modeling/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | from .fcos import FCOS 3 | from .blendmask import BlendMask 4 | from .backbone import build_fcos_resnet_fpn_backbone 5 | from .one_stage_detector import OneStageDetector, OneStageRCNN 6 | from .roi_heads.text_head import TextHead 7 | from .batext import BAText 8 | from .MEInst import MEInst 9 | from .condinst import condinst 10 | from .solov2 import SOLOv2 11 | from .fcpose import FCPose 12 | 13 | _EXCLUDE = {"torch", "ShapeSpec"} 14 | __all__ = [k for k in globals().keys() if k not in _EXCLUDE and not k.startswith("_")] 15 | -------------------------------------------------------------------------------- /adet/modeling/backbone/__init__.py: -------------------------------------------------------------------------------- 1 | from .fpn import build_fcos_resnet_fpn_backbone 2 | from .vovnet import build_vovnet_fpn_backbone, build_vovnet_backbone 3 | from .dla import build_fcos_dla_fpn_backbone 4 | from .resnet_lpf import build_resnet_lpf_backbone 5 | from .bifpn import build_fcos_resnet_bifpn_backbone 6 | -------------------------------------------------------------------------------- /adet/modeling/backbone/fpn.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | import torch.nn.functional as F 3 | import fvcore.nn.weight_init as weight_init 4 | 5 | from detectron2.modeling.backbone import FPN, build_resnet_backbone 6 | from detectron2.layers import ShapeSpec 7 | from detectron2.modeling.backbone.build import BACKBONE_REGISTRY 8 | 9 | from .resnet_lpf import build_resnet_lpf_backbone 10 | from .resnet_interval import build_resnet_interval_backbone 11 | from .mobilenet import build_mnv2_backbone 12 | 13 | 14 | class LastLevelP6P7(nn.Module): 15 | """ 16 | This module is used in RetinaNet and FCOS to generate extra layers, P6 and P7 from 17 | C5 or P5 feature. 18 | """ 19 | 20 | def __init__(self, in_channels, out_channels, in_features="res5"): 21 | super().__init__() 22 | self.num_levels = 2 23 | self.in_feature = in_features 24 | self.p6 = nn.Conv2d(in_channels, out_channels, 3, 2, 1) 25 | self.p7 = nn.Conv2d(out_channels, out_channels, 3, 2, 1) 26 | for module in [self.p6, self.p7]: 27 | weight_init.c2_xavier_fill(module) 28 | 29 | def forward(self, x): 30 | p6 = self.p6(x) 31 | p7 = self.p7(F.relu(p6)) 32 | return [p6, p7] 33 | 34 | 35 | class LastLevelP6(nn.Module): 36 | """ 37 | This module is used in FCOS to generate extra layers 38 | """ 39 | 40 | def __init__(self, in_channels, out_channels, in_features="res5"): 41 | super().__init__() 42 | self.num_levels = 1 43 | self.in_feature = in_features 44 | self.p6 = nn.Conv2d(in_channels, out_channels, 3, 2, 1) 45 | for module in [self.p6]: 46 | weight_init.c2_xavier_fill(module) 47 | 48 | def forward(self, x): 49 | p6 = self.p6(x) 50 | return [p6] 51 | 52 | 53 | @BACKBONE_REGISTRY.register() 54 | def build_fcos_resnet_fpn_backbone(cfg, input_shape: ShapeSpec): 55 | """ 56 | Args: 57 | cfg: a detectron2 CfgNode 58 | 59 | Returns: 60 | backbone (Backbone): backbone module, must be a subclass of :class:`Backbone`. 61 | """ 62 | if cfg.MODEL.BACKBONE.ANTI_ALIAS: 63 | bottom_up = build_resnet_lpf_backbone(cfg, input_shape) 64 | elif cfg.MODEL.RESNETS.DEFORM_INTERVAL > 1: 65 | bottom_up = build_resnet_interval_backbone(cfg, input_shape) 66 | elif cfg.MODEL.MOBILENET: 67 | bottom_up = build_mnv2_backbone(cfg, input_shape) 68 | else: 69 | bottom_up = build_resnet_backbone(cfg, input_shape) 70 | in_features = cfg.MODEL.FPN.IN_FEATURES 71 | out_channels = cfg.MODEL.FPN.OUT_CHANNELS 72 | top_levels = cfg.MODEL.FCOS.TOP_LEVELS 73 | in_channels_top = out_channels 74 | if top_levels == 2: 75 | top_block = LastLevelP6P7(in_channels_top, out_channels, "p5") 76 | if top_levels == 1: 77 | top_block = LastLevelP6(in_channels_top, out_channels, "p5") 78 | elif top_levels == 0: 79 | top_block = None 80 | backbone = FPN( 81 | bottom_up=bottom_up, 82 | in_features=in_features, 83 | out_channels=out_channels, 84 | norm=cfg.MODEL.FPN.NORM, 85 | top_block=top_block, 86 | fuse_type=cfg.MODEL.FPN.FUSE_TYPE, 87 | ) 88 | return backbone 89 | -------------------------------------------------------------------------------- /adet/modeling/backbone/lpf.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.parallel 3 | import numpy as np 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | 7 | 8 | class Downsample(nn.Module): 9 | def __init__(self, pad_type='reflect', filt_size=3, stride=2, channels=None, pad_off=0): 10 | super(Downsample, self).__init__() 11 | self.filt_size = filt_size 12 | self.pad_off = pad_off 13 | self.pad_sizes = [int(1.*(filt_size-1)/2), int(np.ceil(1.*(filt_size-1)/2)), int(1.*(filt_size-1)/2), int(np.ceil(1.*(filt_size-1)/2))] 14 | self.pad_sizes = [pad_size+pad_off for pad_size in self.pad_sizes] 15 | self.stride = stride 16 | self.off = int((self.stride-1)/2.) 17 | self.channels = channels 18 | 19 | # print('Filter size [%i]'%filt_size) 20 | if(self.filt_size==1): 21 | a = np.array([1.,]) 22 | elif(self.filt_size==2): 23 | a = np.array([1., 1.]) 24 | elif(self.filt_size==3): 25 | a = np.array([1., 2., 1.]) 26 | elif(self.filt_size==4): 27 | a = np.array([1., 3., 3., 1.]) 28 | elif(self.filt_size==5): 29 | a = np.array([1., 4., 6., 4., 1.]) 30 | elif(self.filt_size==6): 31 | a = np.array([1., 5., 10., 10., 5., 1.]) 32 | elif(self.filt_size==7): 33 | a = np.array([1., 6., 15., 20., 15., 6., 1.]) 34 | 35 | filt = torch.Tensor(a[:,None]*a[None,:]) 36 | filt = filt/torch.sum(filt) 37 | self.register_buffer('filt', filt[None,None,:,:].repeat((self.channels,1,1,1))) 38 | 39 | self.pad = get_pad_layer(pad_type)(self.pad_sizes) 40 | 41 | def forward(self, inp): 42 | if(self.filt_size==1): 43 | if(self.pad_off==0): 44 | return inp[:,:,::self.stride,::self.stride] 45 | else: 46 | return self.pad(inp)[:,:,::self.stride,::self.stride] 47 | else: 48 | return F.conv2d(self.pad(inp), self.filt, stride=self.stride, groups=inp.shape[1]) 49 | 50 | def get_pad_layer(pad_type): 51 | if(pad_type in ['refl','reflect']): 52 | PadLayer = nn.ReflectionPad2d 53 | elif(pad_type in ['repl','replicate']): 54 | PadLayer = nn.ReplicationPad2d 55 | elif(pad_type=='zero'): 56 | PadLayer = nn.ZeroPad2d 57 | else: 58 | print('Pad type [%s] not recognized'%pad_type) 59 | return PadLayer 60 | 61 | 62 | class Downsample1D(nn.Module): 63 | def __init__(self, pad_type='reflect', filt_size=3, stride=2, channels=None, pad_off=0): 64 | super(Downsample1D, self).__init__() 65 | self.filt_size = filt_size 66 | self.pad_off = pad_off 67 | self.pad_sizes = [int(1. * (filt_size - 1) / 2), int(np.ceil(1. * (filt_size - 1) / 2))] 68 | self.pad_sizes = [pad_size + pad_off for pad_size in self.pad_sizes] 69 | self.stride = stride 70 | self.off = int((self.stride - 1) / 2.) 71 | self.channels = channels 72 | 73 | # print('Filter size [%i]' % filt_size) 74 | if(self.filt_size == 1): 75 | a = np.array([1., ]) 76 | elif(self.filt_size == 2): 77 | a = np.array([1., 1.]) 78 | elif(self.filt_size == 3): 79 | a = np.array([1., 2., 1.]) 80 | elif(self.filt_size == 4): 81 | a = np.array([1., 3., 3., 1.]) 82 | elif(self.filt_size == 5): 83 | a = np.array([1., 4., 6., 4., 1.]) 84 | elif(self.filt_size == 6): 85 | a = np.array([1., 5., 10., 10., 5., 1.]) 86 | elif(self.filt_size == 7): 87 | a = np.array([1., 6., 15., 20., 15., 6., 1.]) 88 | 89 | filt = torch.Tensor(a) 90 | filt = filt / torch.sum(filt) 91 | self.register_buffer('filt', filt[None, None, :].repeat((self.channels, 1, 1))) 92 | 93 | self.pad = get_pad_layer_1d(pad_type)(self.pad_sizes) 94 | 95 | def forward(self, inp): 96 | if(self.filt_size == 1): 97 | if(self.pad_off == 0): 98 | return inp[:, :, ::self.stride] 99 | else: 100 | return self.pad(inp)[:, :, ::self.stride] 101 | else: 102 | return F.conv1d(self.pad(inp), self.filt, stride=self.stride, groups=inp.shape[1]) 103 | 104 | 105 | def get_pad_layer_1d(pad_type): 106 | if(pad_type in ['refl', 'reflect']): 107 | PadLayer = nn.ReflectionPad1d 108 | elif(pad_type in ['repl', 'replicate']): 109 | PadLayer = nn.ReplicationPad1d 110 | elif(pad_type == 'zero'): 111 | PadLayer = nn.ZeroPad1d 112 | else: 113 | print('Pad type [%s] not recognized' % pad_type) 114 | return PadLayer 115 | -------------------------------------------------------------------------------- /adet/modeling/batext/__init__.py: -------------------------------------------------------------------------------- 1 | from .batext import BAText 2 | -------------------------------------------------------------------------------- /adet/modeling/blendmask/__init__.py: -------------------------------------------------------------------------------- 1 | from .basis_module import build_basis_module 2 | from .blendmask import BlendMask 3 | -------------------------------------------------------------------------------- /adet/modeling/blendmask/basis_module.py: -------------------------------------------------------------------------------- 1 | from typing import Dict 2 | from torch import nn 3 | from torch.nn import functional as F 4 | 5 | from detectron2.utils.registry import Registry 6 | from detectron2.layers import ShapeSpec 7 | 8 | from adet.layers import conv_with_kaiming_uniform 9 | 10 | 11 | BASIS_MODULE_REGISTRY = Registry("BASIS_MODULE") 12 | BASIS_MODULE_REGISTRY.__doc__ = """ 13 | Registry for basis module, which produces global bases from feature maps. 14 | 15 | The registered object will be called with `obj(cfg, input_shape)`. 16 | The call should return a `nn.Module` object. 17 | """ 18 | 19 | 20 | def build_basis_module(cfg, input_shape): 21 | name = cfg.MODEL.BASIS_MODULE.NAME 22 | return BASIS_MODULE_REGISTRY.get(name)(cfg, input_shape) 23 | 24 | 25 | @BASIS_MODULE_REGISTRY.register() 26 | class ProtoNet(nn.Module): 27 | def __init__(self, cfg, input_shape: Dict[str, ShapeSpec]): 28 | """ 29 | TODO: support deconv and variable channel width 30 | """ 31 | # official protonet has a relu after each conv 32 | super().__init__() 33 | # fmt: off 34 | mask_dim = cfg.MODEL.BASIS_MODULE.NUM_BASES 35 | planes = cfg.MODEL.BASIS_MODULE.CONVS_DIM 36 | self.in_features = cfg.MODEL.BASIS_MODULE.IN_FEATURES 37 | self.loss_on = cfg.MODEL.BASIS_MODULE.LOSS_ON 38 | norm = cfg.MODEL.BASIS_MODULE.NORM 39 | num_convs = cfg.MODEL.BASIS_MODULE.NUM_CONVS 40 | self.visualize = cfg.MODEL.BLENDMASK.VISUALIZE 41 | # fmt: on 42 | 43 | feature_channels = {k: v.channels for k, v in input_shape.items()} 44 | 45 | conv_block = conv_with_kaiming_uniform(norm, True) # conv relu bn 46 | self.refine = nn.ModuleList() 47 | for in_feature in self.in_features: 48 | self.refine.append(conv_block( 49 | feature_channels[in_feature], planes, 3, 1)) 50 | tower = [] 51 | for i in range(num_convs): 52 | tower.append( 53 | conv_block(planes, planes, 3, 1)) 54 | tower.append( 55 | nn.Upsample(scale_factor=2, mode='bilinear', align_corners=False)) 56 | tower.append( 57 | conv_block(planes, planes, 3, 1)) 58 | tower.append( 59 | nn.Conv2d(planes, mask_dim, 1)) 60 | self.add_module('tower', nn.Sequential(*tower)) 61 | 62 | if self.loss_on: 63 | # fmt: off 64 | self.common_stride = cfg.MODEL.BASIS_MODULE.COMMON_STRIDE 65 | num_classes = cfg.MODEL.BASIS_MODULE.NUM_CLASSES + 1 66 | self.sem_loss_weight = cfg.MODEL.BASIS_MODULE.LOSS_WEIGHT 67 | # fmt: on 68 | 69 | inplanes = feature_channels[self.in_features[0]] 70 | self.seg_head = nn.Sequential(nn.Conv2d(inplanes, planes, kernel_size=3, 71 | stride=1, padding=1, bias=False), 72 | nn.BatchNorm2d(planes), 73 | nn.ReLU(), 74 | nn.Conv2d(planes, planes, kernel_size=3, 75 | stride=1, padding=1, bias=False), 76 | nn.BatchNorm2d(planes), 77 | nn.ReLU(), 78 | nn.Conv2d(planes, num_classes, kernel_size=1, 79 | stride=1)) 80 | 81 | def forward(self, features, targets=None): 82 | for i, f in enumerate(self.in_features): 83 | if i == 0: 84 | x = self.refine[i](features[f]) 85 | else: 86 | x_p = self.refine[i](features[f]) 87 | x_p = F.interpolate(x_p, x.size()[2:], mode="bilinear", align_corners=False) 88 | # x_p = aligned_bilinear(x_p, x.size(3) // x_p.size(3)) 89 | x = x + x_p 90 | outputs = {"bases": [self.tower(x)]} 91 | losses = {} 92 | # auxiliary thing semantic loss 93 | if self.training and self.loss_on: 94 | sem_out = self.seg_head(features[self.in_features[0]]) 95 | # resize target to reduce memory 96 | gt_sem = targets.unsqueeze(1).float() 97 | gt_sem = F.interpolate( 98 | gt_sem, scale_factor=1 / self.common_stride) 99 | seg_loss = F.cross_entropy( 100 | sem_out, gt_sem.squeeze(1).long()) 101 | losses['loss_basis_sem'] = seg_loss * self.sem_loss_weight 102 | elif self.visualize and hasattr(self, "seg_head"): 103 | outputs["seg_thing_out"] = self.seg_head(features[self.in_features[0]]) 104 | return outputs, losses 105 | -------------------------------------------------------------------------------- /adet/modeling/blendmask/blender.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.nn import functional as F 3 | 4 | from detectron2.layers import cat 5 | from detectron2.modeling.poolers import ROIPooler 6 | 7 | 8 | def build_blender(cfg): 9 | return Blender(cfg) 10 | 11 | 12 | class Blender(object): 13 | def __init__(self, cfg): 14 | 15 | # fmt: off 16 | self.pooler_resolution = cfg.MODEL.BLENDMASK.BOTTOM_RESOLUTION 17 | sampling_ratio = cfg.MODEL.BLENDMASK.POOLER_SAMPLING_RATIO 18 | pooler_type = cfg.MODEL.BLENDMASK.POOLER_TYPE 19 | pooler_scales = cfg.MODEL.BLENDMASK.POOLER_SCALES 20 | self.attn_size = cfg.MODEL.BLENDMASK.ATTN_SIZE 21 | self.top_interp = cfg.MODEL.BLENDMASK.TOP_INTERP 22 | num_bases = cfg.MODEL.BASIS_MODULE.NUM_BASES 23 | # fmt: on 24 | 25 | self.attn_len = num_bases * self.attn_size * self.attn_size 26 | 27 | self.pooler = ROIPooler( 28 | output_size=self.pooler_resolution, 29 | scales=pooler_scales, 30 | sampling_ratio=sampling_ratio, 31 | pooler_type=pooler_type, 32 | canonical_level=2) 33 | 34 | def __call__(self, bases, proposals, gt_instances): 35 | if gt_instances is not None: 36 | # training 37 | # reshape attns 38 | dense_info = proposals["instances"] 39 | attns = dense_info.top_feats 40 | pos_inds = dense_info.pos_inds 41 | if pos_inds.numel() == 0: 42 | return None, {"loss_mask": sum([x.sum() * 0 for x in attns]) + bases[0].sum() * 0} 43 | 44 | gt_inds = dense_info.gt_inds 45 | 46 | rois = self.pooler(bases, [x.gt_boxes for x in gt_instances]) 47 | rois = rois[gt_inds] 48 | pred_mask_logits = self.merge_bases(rois, attns) 49 | 50 | # gen targets 51 | gt_masks = [] 52 | for instances_per_image in gt_instances: 53 | if len(instances_per_image.gt_boxes.tensor) == 0: 54 | continue 55 | gt_mask_per_image = instances_per_image.gt_masks.crop_and_resize( 56 | instances_per_image.gt_boxes.tensor, self.pooler_resolution 57 | ).to(device=pred_mask_logits.device) 58 | gt_masks.append(gt_mask_per_image) 59 | gt_masks = cat(gt_masks, dim=0) 60 | gt_masks = gt_masks[gt_inds] 61 | N = gt_masks.size(0) 62 | gt_masks = gt_masks.view(N, -1) 63 | 64 | gt_ctr = dense_info.gt_ctrs 65 | loss_denorm = proposals["loss_denorm"] 66 | mask_losses = F.binary_cross_entropy_with_logits( 67 | pred_mask_logits, gt_masks.to(dtype=torch.float32), reduction="none") 68 | mask_loss = ((mask_losses.mean(dim=-1) * gt_ctr).sum() 69 | / loss_denorm) 70 | return None, {"loss_mask": mask_loss} 71 | else: 72 | # no proposals 73 | total_instances = sum([len(x) for x in proposals]) 74 | if total_instances == 0: 75 | # add empty pred_masks results 76 | for box in proposals: 77 | box.pred_masks = box.pred_classes.view( 78 | -1, 1, self.pooler_resolution, self.pooler_resolution) 79 | return proposals, {} 80 | rois = self.pooler(bases, [x.pred_boxes for x in proposals]) 81 | attns = cat([x.top_feat for x in proposals], dim=0) 82 | pred_mask_logits = self.merge_bases(rois, attns).sigmoid() 83 | pred_mask_logits = pred_mask_logits.view( 84 | -1, 1, self.pooler_resolution, self.pooler_resolution) 85 | start_ind = 0 86 | for box in proposals: 87 | end_ind = start_ind + len(box) 88 | box.pred_masks = pred_mask_logits[start_ind:end_ind] 89 | start_ind = end_ind 90 | return proposals, {} 91 | 92 | def merge_bases(self, rois, coeffs, location_to_inds=None): 93 | # merge predictions 94 | N = coeffs.size(0) 95 | if location_to_inds is not None: 96 | rois = rois[location_to_inds] 97 | N, B, H, W = rois.size() 98 | 99 | coeffs = coeffs.view(N, -1, self.attn_size, self.attn_size) 100 | coeffs = F.interpolate(coeffs, (H, W), 101 | mode=self.top_interp).softmax(dim=1) 102 | masks_preds = (rois * coeffs).sum(dim=1) 103 | return masks_preds.view(N, -1) 104 | -------------------------------------------------------------------------------- /adet/modeling/condinst/__init__.py: -------------------------------------------------------------------------------- 1 | from .condinst import CondInst 2 | -------------------------------------------------------------------------------- /adet/modeling/fcos/__init__.py: -------------------------------------------------------------------------------- 1 | from .fcos import FCOS 2 | -------------------------------------------------------------------------------- /adet/modeling/fcpose/__init__.py: -------------------------------------------------------------------------------- 1 | from .fcpose_framework import FCPose 2 | -------------------------------------------------------------------------------- /adet/modeling/fcpose/fcpose_framework.py: -------------------------------------------------------------------------------- 1 | import math 2 | from typing import List, Dict 3 | import torch 4 | from torch import nn 5 | from torch.nn import functional as F 6 | from detectron2.modeling.proposal_generator.build import PROPOSAL_GENERATOR_REGISTRY 7 | from detectron2.layers import ShapeSpec, NaiveSyncBatchNorm 8 | from adet.modeling.fcos import FCOS 9 | from .basis_module import basis_module 10 | from .fcpose_head import fcpose_head_module 11 | from .utils import compute_basis_stride, top_module, process_gt_instances 12 | 13 | 14 | 15 | __all__ = ["FCPose"] 16 | 17 | 18 | 19 | @PROPOSAL_GENERATOR_REGISTRY.register() 20 | class FCPose(nn.Module): 21 | def __init__(self, cfg, input_shape: Dict[str, ShapeSpec]): 22 | super().__init__() 23 | self.fcos = FCOS(cfg, input_shape) 24 | self.top_module = top_module(256, cfg.MODEL.FCPOSE.ATTN_LEN) 25 | 26 | self.basis_module = basis_module(cfg,input_shape) 27 | 28 | self.fcpose_head = fcpose_head_module(cfg) 29 | 30 | self.gt_stride = cfg.MODEL.FCPOSE.GT_HEATMAP_STRIDE 31 | self.device = cfg.MODEL.DEVICE 32 | 33 | def forward(self, images, features, gt_instances=None): 34 | if gt_instances is not None: 35 | basis_gt_heatmap, head_gt_heatmap,p3_heatmap_list = process_gt_instances(gt_instances, self.gt_stride, self.device) 36 | else: 37 | basis_gt_heatmap, head_gt_heatmap,p3_heatmap_list = None, None, None 38 | 39 | proposals, proposal_losses = self.fcos(images, features, gt_instances, self.top_module) 40 | 41 | 42 | basis_out, basis_losses = self.basis_module(features, basis_gt_heatmap, p3_heatmap_list) 43 | del features, basis_gt_heatmap, p3_heatmap_list 44 | 45 | 46 | # base_stride = compute_basis_stride(images, basis_out) 47 | detector_results, detector_losses = self.fcpose_head( 48 | basis_out["bases"], proposals, 49 | head_gt_heatmap, gt_instances, basis_out["basis_seg"] 50 | ) 51 | 52 | losses = {} 53 | if self.training: 54 | losses.update(proposal_losses) 55 | losses.update(basis_losses) 56 | losses.update(detector_losses) 57 | 58 | 59 | return detector_results, losses -------------------------------------------------------------------------------- /adet/modeling/roi_heads/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /adet/modeling/solov2/__init__.py: -------------------------------------------------------------------------------- 1 | from .solov2 import SOLOv2 2 | -------------------------------------------------------------------------------- /adet/modeling/solov2/loss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | import torch.nn.functional as F 4 | from fvcore.nn import sigmoid_focal_loss_jit 5 | 6 | 7 | def dice_loss(input, target): 8 | input = input.contiguous().view(input.size()[0], -1) 9 | target = target.contiguous().view(target.size()[0], -1).float() 10 | 11 | a = torch.sum(input * target, 1) 12 | b = torch.sum(input * input, 1) + 0.001 13 | c = torch.sum(target * target, 1) + 0.001 14 | d = (2 * a) / (b + c) 15 | return 1 - d 16 | 17 | 18 | def reduce_loss(loss, reduction): 19 | """Reduce loss as specified. 20 | Args: 21 | loss (Tensor): Elementwise loss tensor. 22 | reduction (str): Options are "none", "mean" and "sum". 23 | Return: 24 | Tensor: Reduced loss tensor. 25 | """ 26 | reduction_enum = F._Reduction.get_enum(reduction) 27 | # none: 0, elementwise_mean:1, sum: 2 28 | if reduction_enum == 0: 29 | return loss 30 | elif reduction_enum == 1: 31 | return loss.mean() 32 | elif reduction_enum == 2: 33 | return loss.sum() 34 | 35 | 36 | def weight_reduce_loss(loss, weight=None, reduction='mean', avg_factor=None): 37 | """Apply element-wise weight and reduce loss. 38 | Args: 39 | loss (Tensor): Element-wise loss. 40 | weight (Tensor): Element-wise weights. 41 | reduction (str): Same as built-in losses of PyTorch. 42 | avg_factor (float): Avarage factor when computing the mean of losses. 43 | Returns: 44 | Tensor: Processed loss values. 45 | """ 46 | # if weight is specified, apply element-wise weight 47 | if weight is not None: 48 | loss = loss * weight 49 | 50 | # if avg_factor is not specified, just reduce the loss 51 | if avg_factor is None: 52 | loss = reduce_loss(loss, reduction) 53 | else: 54 | # if reduction is mean, then average the loss by avg_factor 55 | if reduction == 'mean': 56 | loss = loss.sum() / avg_factor 57 | # if reduction is 'none', then do nothing, otherwise raise an error 58 | elif reduction != 'none': 59 | raise ValueError('avg_factor can not be used with reduction="sum"') 60 | return loss 61 | 62 | 63 | def sigmoid_focal_loss(pred, 64 | target, 65 | weight=None, 66 | gamma=2.0, 67 | alpha=0.25, 68 | reduction='mean', 69 | avg_factor=None): 70 | # Function.apply does not accept keyword arguments, so the decorator 71 | # "weighted_loss" is not applicable 72 | loss = sigmoid_focal_loss_jit(pred, target, gamma=gamma, alpha=alpha) 73 | if weight is not None: 74 | if weight.shape != loss.shape: 75 | if weight.size(0) == loss.size(0): 76 | # For most cases, weight is of shape (num_priors, ), 77 | # which means it does not have the second axis num_class 78 | weight = weight.view(-1, 1) 79 | else: 80 | # Sometimes, weight per anchor per class is also needed. e.g. 81 | # in FSAF. But it may be flattened of shape 82 | # (num_priors x num_class, ), while loss is still of shape 83 | # (num_priors, num_class). 84 | assert weight.numel() == loss.numel() 85 | weight = weight.view(loss.size(0), -1) 86 | assert weight.ndim == loss.ndim 87 | loss = weight_reduce_loss(loss, weight, reduction, avg_factor) 88 | return loss 89 | 90 | 91 | class FocalLoss(nn.Module): 92 | 93 | def __init__(self, 94 | use_sigmoid=True, 95 | gamma=2.0, 96 | alpha=0.25, 97 | reduction='mean', 98 | loss_weight=1.0): 99 | super(FocalLoss, self).__init__() 100 | assert use_sigmoid is True, 'Only sigmoid focal loss supported now.' 101 | self.use_sigmoid = use_sigmoid 102 | self.gamma = gamma 103 | self.alpha = alpha 104 | self.reduction = reduction 105 | self.loss_weight = loss_weight 106 | 107 | def forward(self, 108 | pred, 109 | target, 110 | weight=None, 111 | avg_factor=None, 112 | reduction_override=None): 113 | assert reduction_override in (None, 'none', 'mean', 'sum') 114 | reduction = ( 115 | reduction_override if reduction_override else self.reduction) 116 | if self.use_sigmoid: 117 | loss_cls = self.loss_weight * sigmoid_focal_loss( 118 | pred, 119 | target, 120 | weight, 121 | gamma=self.gamma, 122 | alpha=self.alpha, 123 | reduction=reduction, 124 | avg_factor=avg_factor) 125 | else: 126 | raise NotImplementedError 127 | return loss_cls 128 | -------------------------------------------------------------------------------- /adet/structures/__init__.py: -------------------------------------------------------------------------------- 1 | from .beziers import Beziers -------------------------------------------------------------------------------- /adet/structures/beziers.py: -------------------------------------------------------------------------------- 1 | from typing import Union 2 | import torch 3 | 4 | 5 | class Beziers: 6 | """ 7 | This structure stores a list of bezier curves as a Nx16 torch.Tensor. 8 | It will support some common methods about bezier shapes 9 | (`area`, `clip`, `nonempty`, etc), 10 | and also behaves like a Tensor 11 | (support indexing, `to(device)`, `.device`, and iteration over all boxes) 12 | 13 | Attributes: 14 | tensor (torch.Tensor): float matrix of Nx4. Each row is (x1, y1, x2, y2). 15 | """ 16 | 17 | def __init__(self, tensor: torch.Tensor): 18 | """ 19 | Args: 20 | tensor (Tensor[float]): a Nx4 matrix. Each row is (x1, y1, x2, y2). 21 | """ 22 | device = tensor.device if isinstance(tensor, torch.Tensor) else torch.device("cpu") 23 | tensor = torch.as_tensor(tensor, dtype=torch.float32, device=device) 24 | if tensor.numel() == 0: 25 | # Use reshape, so we don't end up creating a new tensor that does not depend on 26 | # the inputs (and consequently confuses jit) 27 | tensor = tensor.reshape((0, 16)).to(dtype=torch.float32, device=device) 28 | assert tensor.dim() == 2 and tensor.size(-1) == 16, tensor.size() 29 | 30 | self.tensor = tensor 31 | 32 | def to(self, device: str) -> "Beziers": 33 | return Beziers(self.tensor.to(device)) 34 | 35 | def __getitem__(self, item: Union[int, slice, torch.BoolTensor]) -> "Beziers": 36 | """ 37 | Returns: 38 | Beziers: Create a new :class:`Beziers` by indexing. 39 | """ 40 | if isinstance(item, int): 41 | return Beziers(self.tensor[item].view(1, -1)) 42 | b = self.tensor[item] 43 | assert b.dim() == 2, "Indexing on Boxes with {} failed to return a matrix!".format(item) 44 | return Beziers(b) -------------------------------------------------------------------------------- /adet/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aim-uofa/AdelaiDet/5e19cb172b8363820b409ed1a2754fb19ad3acb8/adet/utils/__init__.py -------------------------------------------------------------------------------- /adet/utils/comm.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | import torch.distributed as dist 4 | 5 | from detectron2.utils.comm import get_world_size 6 | 7 | 8 | def reduce_sum(tensor): 9 | world_size = get_world_size() 10 | if world_size < 2: 11 | return tensor 12 | tensor = tensor.clone() 13 | dist.all_reduce(tensor, op=dist.ReduceOp.SUM) 14 | return tensor 15 | 16 | 17 | def reduce_mean(tensor): 18 | num_gpus = get_world_size() 19 | total = reduce_sum(tensor) 20 | return total.float() / num_gpus 21 | 22 | 23 | def aligned_bilinear(tensor, factor): 24 | assert tensor.dim() == 4 25 | assert factor >= 1 26 | assert int(factor) == factor 27 | 28 | if factor == 1: 29 | return tensor 30 | 31 | h, w = tensor.size()[2:] 32 | tensor = F.pad(tensor, pad=(0, 1, 0, 1), mode="replicate") 33 | oh = factor * h + 1 34 | ow = factor * w + 1 35 | tensor = F.interpolate( 36 | tensor, size=(oh, ow), 37 | mode='bilinear', 38 | align_corners=True 39 | ) 40 | tensor = F.pad( 41 | tensor, pad=(factor // 2, 0, factor // 2, 0), 42 | mode="replicate" 43 | ) 44 | 45 | return tensor[:, :, :oh - 1, :ow - 1] 46 | 47 | 48 | def compute_locations(h, w, stride, device): 49 | shifts_x = torch.arange( 50 | 0, w * stride, step=stride, 51 | dtype=torch.float32, device=device 52 | ) 53 | shifts_y = torch.arange( 54 | 0, h * stride, step=stride, 55 | dtype=torch.float32, device=device 56 | ) 57 | shift_y, shift_x = torch.meshgrid(shifts_y, shifts_x) 58 | shift_x = shift_x.reshape(-1) 59 | shift_y = shift_y.reshape(-1) 60 | locations = torch.stack((shift_x, shift_y), dim=1) + stride // 2 61 | return locations 62 | 63 | 64 | def compute_ious(pred, target): 65 | """ 66 | Args: 67 | pred: Nx4 predicted bounding boxes 68 | target: Nx4 target bounding boxes 69 | Both are in the form of FCOS prediction (l, t, r, b) 70 | """ 71 | pred_left = pred[:, 0] 72 | pred_top = pred[:, 1] 73 | pred_right = pred[:, 2] 74 | pred_bottom = pred[:, 3] 75 | 76 | target_left = target[:, 0] 77 | target_top = target[:, 1] 78 | target_right = target[:, 2] 79 | target_bottom = target[:, 3] 80 | 81 | target_aera = (target_left + target_right) * \ 82 | (target_top + target_bottom) 83 | pred_aera = (pred_left + pred_right) * \ 84 | (pred_top + pred_bottom) 85 | 86 | w_intersect = torch.min(pred_left, target_left) + \ 87 | torch.min(pred_right, target_right) 88 | h_intersect = torch.min(pred_bottom, target_bottom) + \ 89 | torch.min(pred_top, target_top) 90 | 91 | g_w_intersect = torch.max(pred_left, target_left) + \ 92 | torch.max(pred_right, target_right) 93 | g_h_intersect = torch.max(pred_bottom, target_bottom) + \ 94 | torch.max(pred_top, target_top) 95 | ac_uion = g_w_intersect * g_h_intersect 96 | 97 | area_intersect = w_intersect * h_intersect 98 | area_union = target_aera + pred_aera - area_intersect 99 | 100 | ious = (area_intersect + 1.0) / (area_union + 1.0) 101 | gious = ious - (ac_uion - area_union) / ac_uion 102 | 103 | return ious, gious 104 | -------------------------------------------------------------------------------- /configs/BAText/Base-BAText.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "OneStageRCNN" 3 | BACKBONE: 4 | NAME: "build_fcos_resnet_fpn_backbone" 5 | RESNETS: 6 | OUT_FEATURES: ["res2", "res3", "res4", "res5"] 7 | FPN: 8 | IN_FEATURES: ["res2", "res3", "res4", "res5"] 9 | PROPOSAL_GENERATOR: 10 | NAME: "BAText" 11 | FCOS: 12 | NMS_TH: 0.5 13 | THRESH_WITH_CTR: False 14 | USE_SCALE: False 15 | NUM_CLASSES: 1 16 | INFERENCE_TH_TRAIN: 0.7 17 | INFERENCE_TH_TEST: 0.45 18 | ROI_HEADS: 19 | NAME: "TextHead" 20 | IOU_THRESHOLDS: [0.5] 21 | SOLVER: 22 | CLIP_GRADIENTS: 23 | ENABLED: True 24 | INPUT: 25 | HFLIP_TRAIN: False 26 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800, 832, 864, 896) 27 | MAX_SIZE_TRAIN: 1600 28 | MIN_SIZE_TEST: 1000 29 | MAX_SIZE_TEST: 1824 30 | CROP: 31 | ENABLED: True 32 | CROP_INSTANCE: False 33 | SIZE: [0.1, 0.1] -------------------------------------------------------------------------------- /configs/BAText/CTW1500/Base-CTW1500.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-BAText.yaml" 2 | MODEL: 3 | BATEXT: 4 | POOLER_RESOLUTION: (8,128) 5 | NUM_CHARS: 100 6 | FCOS: 7 | INFERENCE_TH_TEST: 0.6 8 | DATASETS: 9 | TRAIN: ("ctw1500_word_train",) 10 | TEST: ("ctw1500_word_test",) 11 | INPUT: 12 | MIN_SIZE_TEST: 800 13 | MAX_SIZE_TEST: 1024 14 | -------------------------------------------------------------------------------- /configs/BAText/CTW1500/attn_R_50.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-CTW1500.yaml" 2 | MODEL: 3 | WEIGHTS: "weights/batext/pretrain_attn_R_50.pth" 4 | RESNETS: 5 | DEPTH: 50 6 | BATEXT: 7 | RECOGNIZER: "attn" # "attn" "rnn" 8 | SOLVER: 9 | IMS_PER_BATCH: 8 10 | BASE_LR: 0.001 11 | STEPS: (80000,) 12 | MAX_ITER: 120000 13 | CHECKPOINT_PERIOD: 10000 14 | TEST: 15 | EVAL_PERIOD: 10000 16 | OUTPUT_DIR: "output/batext/ctw1500/attn_R_50" 17 | -------------------------------------------------------------------------------- /configs/BAText/CTW1500/v2_attn_R_50.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-CTW1500.yaml" 2 | MODEL: 3 | WEIGHTS: "model_v2_pretrain.pth" 4 | BACKBONE: 5 | NAME: "build_fcos_resnet_bifpn_backbone" 6 | BiFPN: 7 | IN_FEATURES: ["res2", "res3", "res4", "res5"] 8 | OUT_CHANNELS: 256 9 | NUM_REPEATS: 2 10 | NORM: "SyncBN" 11 | RESNETS: 12 | DEPTH: 50 13 | BATEXT: 14 | RECOGNIZER: "attn" 15 | USE_COORDCONV: True 16 | USE_AET: True 17 | FCOS: 18 | # Best e2e: 0.5; Best det: 0.3 19 | INFERENCE_TH_TEST: 0.5 20 | SOLVER: 21 | IMS_PER_BATCH: 8 22 | BASE_LR: 0.001 23 | STEPS: (80000, ) 24 | MAX_ITER: 100000 25 | CHECKPOINT_PERIOD: 10000 26 | TEST: 27 | EVAL_PERIOD: 10000 28 | OUTPUT_DIR: "output/batext/ctw1500/v2_attn_R_50" 29 | -------------------------------------------------------------------------------- /configs/BAText/ICDAR2015/Base-ic15.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-BAText.yaml" 2 | DATASETS: 3 | TRAIN: ("icdar2015_train",) 4 | TEST: ("icdar2015_test",) -------------------------------------------------------------------------------- /configs/BAText/ICDAR2015/v1_attn_R_50.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-ic15.yaml" 2 | MODEL: 3 | RESNETS: 4 | DEPTH: 50 5 | BATEXT: 6 | RECOGNIZER: "attn" 7 | SOLVER: 8 | IMS_PER_BATCH: 4 9 | BASE_LR: 0.001 10 | MAX_ITER: 5500 11 | CHECKPOINT_PERIOD: 500 12 | INPUT: 13 | MIN_SIZE_TRAIN: (980, 1044, 1108, 1172, 1236, 1300, 1364, 1428, 1492) 14 | MAX_SIZE_TRAIN: 2900 15 | MIN_SIZE_TEST: 2000 16 | MAX_SIZE_TEST: 4000 17 | IS_ROTATE: True 18 | TEST: 19 | EVAL_PERIOD: 500 20 | OUTPUT_DIR: "output/batext/ic15/v1_attn_R_50" 21 | -------------------------------------------------------------------------------- /configs/BAText/ICDAR2015/v2_attn_R_50.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-ic15.yaml" 2 | MODEL: 3 | BACKBONE: 4 | NAME: "build_fcos_resnet_bifpn_backbone" 5 | BiFPN: 6 | IN_FEATURES: ["res2", "res3", "res4", "res5"] 7 | OUT_CHANNELS: 256 8 | NUM_REPEATS: 2 9 | NORM: "SyncBN" 10 | RESNETS: 11 | DEPTH: 50 12 | BATEXT: 13 | RECOGNIZER: "attn" 14 | USE_COORDCONV: True 15 | USE_AET: True 16 | POOLER_RESOLUTION: (16, 64) 17 | FCOS: 18 | INFERENCE_TH_TEST: 0.4 19 | NMS_TH: 0.4 20 | SOLVER: 21 | IMS_PER_BATCH: 4 22 | BASE_LR: 0.001 23 | MAX_ITER: 5500 24 | CHECKPOINT_PERIOD: 500 25 | INPUT: 26 | MIN_SIZE_TRAIN: (980, 1044, 1108, 1172, 1236, 1300, 1364, 1428, 1492) 27 | MAX_SIZE_TRAIN: 2900 28 | MIN_SIZE_TEST: 2000 29 | MAX_SIZE_TEST: 4000 30 | IS_ROTATE: True 31 | TEST: 32 | EVAL_PERIOD: 500 33 | OUTPUT_DIR: "output/batext/ic15/v2_attn_R_50" 34 | -------------------------------------------------------------------------------- /configs/BAText/Pretrain/Base-Chn-Pretrain.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-BAText.yaml" 2 | DATASETS: 3 | TRAIN: ("chnsyn_train", "rects_train", "rects_val", "lsvt_train", "art_train", ) 4 | TEST: ("rects_test", ) 5 | -------------------------------------------------------------------------------- /configs/BAText/Pretrain/Base-Pretrain-ic15.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-BAText.yaml" 2 | DATASETS: 3 | TRAIN: ("mltbezier_word_train", "totaltext_train", "syntext1_train", "syntext2_train", "icdar2013_train", "icdar2015_train") 4 | TEST: ("icdar2015_test",) -------------------------------------------------------------------------------- /configs/BAText/Pretrain/Base-Pretrain.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-BAText.yaml" 2 | DATASETS: 3 | TRAIN: ("mltbezier_word_train", "totaltext_train", "syntext1_train", "syntext2_train",) 4 | TEST: ("totaltext_val",) -------------------------------------------------------------------------------- /configs/BAText/Pretrain/attn_R_50.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-Pretrain.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | RESNETS: 5 | DEPTH: 50 6 | BATEXT: 7 | RECOGNIZER: "attn" 8 | SOLVER: 9 | IMS_PER_BATCH: 8 10 | BASE_LR: 0.01 11 | STEPS: (160000, 220000) 12 | MAX_ITER: 260000 13 | CHECKPOINT_PERIOD: 20000 14 | TEST: 15 | EVAL_PERIOD: 20000 16 | OUTPUT_DIR: "output/batext/pretrain/attn_R_50" 17 | -------------------------------------------------------------------------------- /configs/BAText/Pretrain/v1_ic15_attn_R_50.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-Pretrain-ic15.yaml" 2 | MODEL: 3 | WEIGHTS: "https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl" 4 | RESNETS: 5 | DEPTH: 50 6 | BATEXT: 7 | RECOGNIZER: "attn" 8 | POOLER_RESOLUTION: (16, 64) 9 | SOLVER: 10 | IMS_PER_BATCH: 8 11 | BASE_LR: 0.01 12 | STEPS: (160000, 220000) 13 | MAX_ITER: 260000 14 | CHECKPOINT_PERIOD: 5000 15 | TEST: 16 | EVAL_PERIOD: 20000 17 | INPUT: 18 | IS_ROTATE: True 19 | OUTPUT_DIR: "output/batext/pretrain/v1_ic15_attn_R_50" 20 | -------------------------------------------------------------------------------- /configs/BAText/Pretrain/v2_attn_R_50.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-Pretrain.yaml" 2 | MODEL: 3 | WEIGHTS: "https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl" 4 | BACKBONE: 5 | NAME: "build_fcos_resnet_bifpn_backbone" 6 | BiFPN: 7 | IN_FEATURES: ["res2", "res3", "res4", "res5"] 8 | OUT_CHANNELS: 256 9 | NUM_REPEATS: 2 10 | NORM: "SyncBN" 11 | RESNETS: 12 | DEPTH: 50 13 | BATEXT: 14 | RECOGNIZER: "attn" 15 | USE_COORDCONV: True 16 | USE_AET: True 17 | SOLVER: 18 | IMS_PER_BATCH: 8 19 | BASE_LR: 0.01 20 | STEPS: (160000, 220000) 21 | MAX_ITER: 260000 22 | CHECKPOINT_PERIOD: 20000 23 | TEST: 24 | EVAL_PERIOD: 20000 25 | OUTPUT_DIR: "output/batext/pretrain/v2_attn_R_50" 26 | 27 | -------------------------------------------------------------------------------- /configs/BAText/Pretrain/v2_chn_attn_R_50.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-Chn-Pretrain.yaml" 2 | MODEL: 3 | WEIGHTS: "https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl" 4 | BACKBONE: 5 | NAME: "build_fcos_resnet_bifpn_backbone" 6 | BiFPN: 7 | IN_FEATURES: ["res2", "res3", "res4", "res5"] 8 | OUT_CHANNELS: 256 9 | NUM_REPEATS: 2 10 | NORM: "SyncBN" 11 | RESNETS: 12 | DEPTH: 50 13 | BATEXT: 14 | RECOGNIZER: "attn" 15 | USE_COORDCONV: True 16 | USE_AET: True 17 | VOC_SIZE: 5462 18 | CUSTOM_DICT: "chn_cls_list" 19 | SOLVER: 20 | IMS_PER_BATCH: 8 21 | BASE_LR: 0.01 22 | STEPS: (160000, 220000) 23 | MAX_ITER: 260000 24 | CHECKPOINT_PERIOD: 10000 25 | INPUT: 26 | CROP: 27 | ENABLED: False 28 | OUTPUT_DIR: "output/batext/chn_pretrain/v2_attn_R_50" 29 | 30 | -------------------------------------------------------------------------------- /configs/BAText/Pretrain/v2_ic15_attn_R_50.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-Pretrain-ic15.yaml" 2 | MODEL: 3 | WEIGHTS: "https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl" 4 | BACKBONE: 5 | NAME: "build_fcos_resnet_bifpn_backbone" 6 | BiFPN: 7 | IN_FEATURES: ["res2", "res3", "res4", "res5"] 8 | OUT_CHANNELS: 256 9 | NUM_REPEATS: 2 10 | NORM: "SyncBN" 11 | RESNETS: 12 | DEPTH: 50 13 | BATEXT: 14 | RECOGNIZER: "attn" 15 | USE_COORDCONV: True 16 | USE_AET: True 17 | POOLER_RESOLUTION: (16, 64) 18 | SOLVER: 19 | IMS_PER_BATCH: 8 20 | BASE_LR: 0.01 21 | STEPS: (160000, 220000) 22 | MAX_ITER: 260000 23 | CHECKPOINT_PERIOD: 20000 24 | TEST: 25 | EVAL_PERIOD: 20000 26 | INPUT: 27 | IS_ROTATE: True 28 | OUTPUT_DIR: "output/batext/pretrain/v2_ic15_attn_R_50" 29 | 30 | -------------------------------------------------------------------------------- /configs/BAText/ReCTS/Base-ReCTS.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-BAText.yaml" 2 | DATASETS: 3 | TRAIN: ("rects_train", "rects_val") 4 | TEST: ("rects_test",) 5 | -------------------------------------------------------------------------------- /configs/BAText/ReCTS/v2_chn_attn_R_50.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-ReCTS.yaml" 2 | MODEL: 3 | WEIGHTS: "https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl" 4 | BACKBONE: 5 | NAME: "build_fcos_resnet_bifpn_backbone" 6 | BiFPN: 7 | IN_FEATURES: ["res2", "res3", "res4", "res5"] 8 | OUT_CHANNELS: 256 9 | NUM_REPEATS: 2 10 | NORM: "SyncBN" 11 | RESNETS: 12 | DEPTH: 50 13 | BATEXT: 14 | RECOGNIZER: "attn" 15 | USE_COORDCONV: True 16 | USE_AET: True 17 | VOC_SIZE: 5462 18 | CUSTOM_DICT: "chn_cls_list" 19 | SOLVER: 20 | IMS_PER_BATCH: 8 21 | BASE_LR: 0.001 22 | STEPS: (140000, 160000) 23 | MAX_ITER: 180000 24 | CHECKPOINT_PERIOD: 10000 25 | INPUT: 26 | CROP: 27 | ENABLED: False 28 | OUTPUT_DIR: "output/batext/rects/v2_attn_R_50" 29 | 30 | -------------------------------------------------------------------------------- /configs/BAText/TotalText/Base-TotalText.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-BAText.yaml" 2 | DATASETS: 3 | TRAIN: ("totaltext_train",) 4 | TEST: ("totaltext_val",) -------------------------------------------------------------------------------- /configs/BAText/TotalText/attn_R_50.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-TotalText.yaml" 2 | MODEL: 3 | WEIGHTS: "weights/batext/pretrain_attn_R_50.pth" 4 | RESNETS: 5 | DEPTH: 50 6 | BATEXT: 7 | RECOGNIZER: "attn" # "attn" "rnn" 8 | SOLVER: 9 | IMS_PER_BATCH: 8 10 | BASE_LR: 0.001 11 | MAX_ITER: 5000 12 | CHECKPOINT_PERIOD: 1000 13 | TEST: 14 | EVAL_PERIOD: 1000 15 | OUTPUT_DIR: "output/batext/totaltext/attn_R_50" 16 | -------------------------------------------------------------------------------- /configs/BAText/TotalText/v2_attn_R_50.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-TotalText.yaml" 2 | MODEL: 3 | WEIGHTS: "https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl" 4 | BACKBONE: 5 | NAME: "build_fcos_resnet_bifpn_backbone" 6 | BiFPN: 7 | IN_FEATURES: ["res2", "res3", "res4", "res5"] 8 | OUT_CHANNELS: 256 9 | NUM_REPEATS: 2 10 | NORM: "SyncBN" 11 | RESNETS: 12 | DEPTH: 50 13 | BATEXT: 14 | RECOGNIZER: "attn" 15 | USE_COORDCONV: True 16 | USE_AET: True 17 | FCOS: 18 | # Best e2e: 0.5; Best det: 0.4 19 | INFERENCE_TH_TEST: 0.5 20 | SOLVER: 21 | IMS_PER_BATCH: 8 22 | BASE_LR: 0.001 23 | MAX_ITER: 5000 24 | CHECKPOINT_PERIOD: 1000 25 | TEST: 26 | EVAL_PERIOD: 1000 27 | OUTPUT_DIR: "output/batext/pretrain/v2_attn_R_50" 28 | -------------------------------------------------------------------------------- /configs/BlendMask/550_R_50_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-550.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | RESNETS: 5 | DEPTH: 50 6 | OUTPUT_DIR: "output/blendmask/550_R_50_1x" 7 | -------------------------------------------------------------------------------- /configs/BlendMask/550_R_50_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-550.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | RESNETS: 5 | DEPTH: 50 6 | SOLVER: 7 | STEPS: (210000, 250000) 8 | MAX_ITER: 270000 9 | OUTPUT_DIR: "output/blendmask/550_R_50_3x" 10 | -------------------------------------------------------------------------------- /configs/BlendMask/550_R_50_dcni3_5x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-550.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | RESNETS: 5 | DEPTH: 50 6 | DEFORM_ON_PER_STAGE: [False, True, True, True] 7 | DEFORM_MODULATED: True 8 | DEFORM_INTERVAL: 3 9 | INPUT: 10 | MIN_SIZE_TRAIN: (440, 594) 11 | MIN_SIZE_TRAIN_SAMPLING: "range" 12 | MAX_SIZE_TRAIN: 990 13 | CROP: 14 | ENABLED: True 15 | SOLVER: 16 | STEPS: (210000, 250000) 17 | MAX_ITER: 270000 18 | OUTPUT_DIR: "output/blendmask/550_R_50_dcni3_5x" 19 | -------------------------------------------------------------------------------- /configs/BlendMask/Base-550.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-BlendMask.yaml" 2 | MODEL: 3 | FCOS: 4 | TOP_LEVELS: 1 5 | IN_FEATURES: ["p3", "p4", "p5", "p6"] 6 | FPN_STRIDES: [8, 16, 32, 64] 7 | SIZES_OF_INTEREST: [64, 128, 256] 8 | NUM_SHARE_CONVS: 3 9 | NUM_CLS_CONVS: 0 10 | NUM_BOX_CONVS: 0 11 | BASIS_MODULE: 12 | NUM_CONVS: 2 13 | INPUT: 14 | MIN_SIZE_TRAIN: (440, 462, 484, 506, 528, 550) 15 | MAX_SIZE_TRAIN: 916 16 | MIN_SIZE_TEST: 550 17 | MAX_SIZE_TEST: 916 18 | -------------------------------------------------------------------------------- /configs/BlendMask/Base-BlendMask.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "BlendMask" 3 | MASK_ON: True 4 | BACKBONE: 5 | NAME: "build_fcos_resnet_fpn_backbone" 6 | RESNETS: 7 | OUT_FEATURES: ["res3", "res4", "res5"] 8 | FPN: 9 | IN_FEATURES: ["res3", "res4", "res5"] 10 | PROPOSAL_GENERATOR: 11 | NAME: "FCOS" 12 | BASIS_MODULE: 13 | LOSS_ON: True 14 | PANOPTIC_FPN: 15 | COMBINE: 16 | ENABLED: False 17 | FCOS: 18 | THRESH_WITH_CTR: True 19 | USE_SCALE: False 20 | DATASETS: 21 | TRAIN: ("coco_2017_train",) 22 | TEST: ("coco_2017_val",) 23 | SOLVER: 24 | IMS_PER_BATCH: 16 25 | BASE_LR: 0.01 # Note that RetinaNet uses a different default learning rate 26 | STEPS: (60000, 80000) 27 | MAX_ITER: 90000 28 | INPUT: 29 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) 30 | -------------------------------------------------------------------------------- /configs/BlendMask/Base-RT.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-BlendMask.yaml" 2 | INPUT: 3 | MIN_SIZE_TRAIN: (256, 288, 320, 352, 384, 416, 448, 480, 512, 544, 576, 608) 4 | MAX_SIZE_TRAIN: 900 5 | MAX_SIZE_TEST: 736 6 | MIN_SIZE_TEST: 512 7 | MODEL: 8 | FCOS: 9 | TOP_LEVELS: 0 10 | SIZES_OF_INTEREST: [64, 128] 11 | FPN_STRIDES: [8, 16, 32] 12 | IN_FEATURES: ['p3', 'p4', 'p5'] 13 | SOLVER: 14 | STEPS: (300000, 340000) 15 | MAX_ITER: 360000 -------------------------------------------------------------------------------- /configs/BlendMask/DLA_34_syncbn_4x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-RT.yaml" 2 | MODEL: 3 | BACKBONE: 4 | NAME: "build_fcos_dla_fpn_backbone" 5 | FREEZE_AT: -1 6 | WEIGHTS: "http://dl.yf.io/dla/models/imagenet/dla34-ba72cf86.pth" 7 | DLA: 8 | CONV_BODY: "DLA34" 9 | NORM: "SyncBN" 10 | FPN: 11 | IN_FEATURES: ["level3", "level4", "level5"] 12 | OUTPUT_DIR: "output/blendmask/DLA_34_syncbn_4x" 13 | -------------------------------------------------------------------------------- /configs/BlendMask/Panoptic/Base-Panoptic.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-BlendMask.yaml" 2 | MODEL: 3 | RESNETS: 4 | OUT_FEATURES: ["res2", "res3", "res4", "res5"] 5 | FPN: 6 | IN_FEATURES: ["res2", "res3", "res4", "res5"] 7 | SEM_SEG_HEAD: 8 | LOSS_WEIGHT: 0.5 9 | PANOPTIC_FPN: 10 | COMBINE: 11 | ENABLED: True 12 | INSTANCES_CONFIDENCE_THRESH: 0.45 13 | OVERLAP_THRESH: 0.4 14 | DATASETS: 15 | TRAIN: ("coco_2017_train_panoptic_separated",) 16 | TEST: ("coco_2017_val_panoptic_separated",) 17 | -------------------------------------------------------------------------------- /configs/BlendMask/Panoptic/R_101_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-Panoptic.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" 4 | RESNETS: 5 | DEPTH: 101 6 | SOLVER: 7 | STEPS: (210000, 250000) 8 | MAX_ITER: 270000 9 | OUTPUT_DIR: "output/panoptic/blendmask/R_101_3x" 10 | -------------------------------------------------------------------------------- /configs/BlendMask/Panoptic/R_101_dcni3_5x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-Panoptic.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" 4 | RESNETS: 5 | DEPTH: 101 6 | DEFORM_ON_PER_STAGE: [False, True, True, True] 7 | DEFORM_MODULATED: True 8 | DEFORM_INTERVAL: 3 9 | SOLVER: 10 | STEPS: (280000, 360000) 11 | MAX_ITER: 400000 12 | INPUT: 13 | MIN_SIZE_TRAIN: (640, 864) 14 | MIN_SIZE_TRAIN_SAMPLING: "range" 15 | MAX_SIZE_TRAIN: 1333 16 | CROP: 17 | ENABLED: True 18 | OUTPUT_DIR: "output/panoptic/blendmask/R_101_dcni3_5x" 19 | -------------------------------------------------------------------------------- /configs/BlendMask/Panoptic/R_50_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-Panoptic.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | RESNETS: 5 | DEPTH: 50 6 | OUTPUT_DIR: "output/panoptic/blendmask/R_50_1x" 7 | -------------------------------------------------------------------------------- /configs/BlendMask/Panoptic/R_50_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-Panoptic.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | RESNETS: 5 | DEPTH: 50 6 | SOLVER: 7 | STEPS: (210000, 250000) 8 | MAX_ITER: 270000 9 | OUTPUT_DIR: "output/panoptic/blendmask/R_50_3x" 10 | -------------------------------------------------------------------------------- /configs/BlendMask/Panoptic/R_50_dcni3_5x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-Panoptic.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | RESNETS: 5 | DEPTH: 50 6 | DEFORM_ON_PER_STAGE: [False, True, True, True] 7 | DEFORM_MODULATED: True 8 | DEFORM_INTERVAL: 3 9 | SOLVER: 10 | STEPS: (280000, 360000) 11 | MAX_ITER: 400000 12 | INPUT: 13 | MIN_SIZE_TRAIN: (640, 864) 14 | MIN_SIZE_TRAIN_SAMPLING: "range" 15 | MAX_SIZE_TRAIN: 1440 16 | CROP: 17 | ENABLED: True 18 | OUTPUT_DIR: "output/panoptic/blendmask/R_50_dcni3_5x" 19 | -------------------------------------------------------------------------------- /configs/BlendMask/Person/Base-Person.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-BlendMask.yaml" 2 | MODEL: 3 | BASIS_MODULE: 4 | NUM_CLASSES: 1 5 | FCOS: 6 | NUM_CLASSES: 1 7 | DATASETS: 8 | TRAIN: ("pic_person_train",) 9 | TEST: ("pic_person_val",) 10 | -------------------------------------------------------------------------------- /configs/BlendMask/Person/R_50_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-Person.yaml" 2 | MODEL: 3 | WEIGHTS: "https://cloudstor.aarnet.edu.au/plus/s/9u1cG2zXvEva5SM/download#R_50_3x.pth" 4 | RESNETS: 5 | DEPTH: 50 6 | OUTPUT_DIR: "output/person/blendmask/R_50_1x" 7 | -------------------------------------------------------------------------------- /configs/BlendMask/RT_R_50_4x_bn-head_syncbn_shtw.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "RT_R_50_4x_syncbn_shtw.yaml" 2 | MODEL: 3 | FCOS: 4 | NORM: "SyncBN" 5 | OUTPUT_DIR: "output/blendmask/RT_R_50_4x_bn-head_syncbn_shtw" 6 | -------------------------------------------------------------------------------- /configs/BlendMask/RT_R_50_4x_syncbn_shtw.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-RT.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | RESNETS: 5 | DEPTH: 50 6 | NORM: "SyncBN" 7 | BACKBONE: 8 | FREEZE_AT: -1 9 | FCOS: 10 | NUM_SHARE_CONVS: 3 11 | NUM_CLS_CONVS: 0 12 | NUM_BOX_CONVS: 0 13 | BASIS_MODULE: 14 | NUM_CONVS: 2 15 | OUTPUT_DIR: "output/blendmask/RT_R_50_4x_syncbn_shtw" 16 | -------------------------------------------------------------------------------- /configs/BlendMask/R_101_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-BlendMask.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" 4 | RESNETS: 5 | DEPTH: 101 6 | SOLVER: 7 | STEPS: (210000, 250000) 8 | MAX_ITER: 270000 9 | OUTPUT_DIR: "output/blendmask/R_101_3x" 10 | -------------------------------------------------------------------------------- /configs/BlendMask/R_101_dcni3_5x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-BlendMask.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" 4 | RESNETS: 5 | DEPTH: 101 6 | DEFORM_ON_PER_STAGE: [False, True, True, True] 7 | DEFORM_MODULATED: True 8 | DEFORM_INTERVAL: 3 9 | SOLVER: 10 | STEPS: (280000, 360000) 11 | MAX_ITER: 400000 12 | INPUT: 13 | MIN_SIZE_TRAIN: (640, 864) 14 | MIN_SIZE_TRAIN_SAMPLING: "range" 15 | MAX_SIZE_TRAIN: 1440 16 | CROP: 17 | ENABLED: True 18 | TEST: 19 | EVAL_PERIOD: 20000 20 | OUTPUT_DIR: "output/blendmask/R_101_dcni3_5x" 21 | -------------------------------------------------------------------------------- /configs/BlendMask/R_50_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-BlendMask.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | RESNETS: 5 | DEPTH: 50 6 | OUTPUT_DIR: "output/blendmask/R_50_1x" 7 | -------------------------------------------------------------------------------- /configs/BlendMask/R_50_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-BlendMask.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | RESNETS: 5 | DEPTH: 50 6 | SOLVER: 7 | STEPS: (210000, 250000) 8 | MAX_ITER: 270000 9 | OUTPUT_DIR: "output/blendmask/R_50_3x" 10 | -------------------------------------------------------------------------------- /configs/BoxInst/Base-BoxInst.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "CondInst" 3 | MASK_ON: True 4 | BACKBONE: 5 | NAME: "build_fcos_resnet_fpn_backbone" 6 | RESNETS: 7 | OUT_FEATURES: ["res3", "res4", "res5"] 8 | FPN: 9 | IN_FEATURES: ["res3", "res4", "res5"] 10 | PROPOSAL_GENERATOR: 11 | NAME: "FCOS" 12 | FCOS: 13 | THRESH_WITH_CTR: True 14 | USE_SCALE: True 15 | CONDINST: 16 | TOPK_PROPOSALS_PER_IM: 64 17 | MASK_BRANCH: 18 | OUT_CHANNELS: 16 19 | BOXINST: 20 | ENABLED: True 21 | BOTTOM_PIXELS_REMOVED: 10 22 | PAIRWISE: 23 | SIZE: 3 24 | DILATION: 2 25 | COLOR_THRESH: 0.3 26 | DATASETS: 27 | TRAIN: ("coco_2017_train",) 28 | TEST: ("coco_2017_val",) 29 | SOLVER: 30 | IMS_PER_BATCH: 16 31 | BASE_LR: 0.01 32 | STEPS: (60000, 80000) 33 | MAX_ITER: 90000 34 | INPUT: 35 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) 36 | -------------------------------------------------------------------------------- /configs/BoxInst/MS_R_101_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-BoxInst.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" 4 | RESNETS: 5 | DEPTH: 101 6 | OUTPUT_DIR: "output/boxinst_MS_R_101_1x" 7 | -------------------------------------------------------------------------------- /configs/BoxInst/MS_R_101_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-BoxInst.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" 4 | RESNETS: 5 | DEPTH: 101 6 | SOLVER: 7 | STEPS: (210000, 250000) 8 | MAX_ITER: 270000 9 | OUTPUT_DIR: "output/boxinst_MS_R_101_3x" 10 | -------------------------------------------------------------------------------- /configs/BoxInst/MS_R_101_BiFPN_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-BoxInst.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" 4 | BACKBONE: 5 | NAME: "build_fcos_resnet_bifpn_backbone" 6 | RESNETS: 7 | DEPTH: 101 8 | BiFPN: 9 | IN_FEATURES: ["res3", "res4", "res5"] 10 | OUT_CHANNELS: 160 11 | NORM: "SyncBN" 12 | SOLVER: 13 | STEPS: (210000, 250000) 14 | MAX_ITER: 270000 15 | OUTPUT_DIR: "output/boxinst_MS_R_101_3x_bifpn" 16 | -------------------------------------------------------------------------------- /configs/BoxInst/MS_R_101_BiFPN_dcni3_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-BoxInst.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" 4 | BACKBONE: 5 | NAME: "build_fcos_resnet_bifpn_backbone" 6 | RESNETS: 7 | DEPTH: 101 8 | DEFORM_ON_PER_STAGE: [False, True, True, True] 9 | DEFORM_MODULATED: True 10 | DEFORM_INTERVAL: 3 11 | BiFPN: 12 | IN_FEATURES: ["res3", "res4", "res5"] 13 | OUT_CHANNELS: 160 14 | NORM: "SyncBN" 15 | SOLVER: 16 | STEPS: (210000, 250000) 17 | MAX_ITER: 270000 18 | OUTPUT_DIR: "output/boxinst_MS_R_101_BiFPN_dcni3_3x" 19 | -------------------------------------------------------------------------------- /configs/BoxInst/MS_R_50_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-BoxInst.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | RESNETS: 5 | DEPTH: 50 6 | OUTPUT_DIR: "output/boxinst_MS_R_50_1x" 7 | -------------------------------------------------------------------------------- /configs/BoxInst/MS_R_50_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-BoxInst.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | RESNETS: 5 | DEPTH: 50 6 | SOLVER: 7 | STEPS: (210000, 250000) 8 | MAX_ITER: 270000 9 | OUTPUT_DIR: "output/boxinst_MS_R_50_3x" 10 | -------------------------------------------------------------------------------- /configs/BoxInst/MS_R_50_BiFPN_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-BoxInst.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | BACKBONE: 5 | NAME: "build_fcos_resnet_bifpn_backbone" 6 | RESNETS: 7 | DEPTH: 50 8 | BiFPN: 9 | IN_FEATURES: ["res3", "res4", "res5"] 10 | OUT_CHANNELS: 160 11 | NORM: "SyncBN" 12 | OUTPUT_DIR: "output/boxinst_MS_R_50_1x_bifpn" 13 | -------------------------------------------------------------------------------- /configs/BoxInst/MS_R_50_BiFPN_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-BoxInst.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | BACKBONE: 5 | NAME: "build_fcos_resnet_bifpn_backbone" 6 | RESNETS: 7 | DEPTH: 50 8 | BiFPN: 9 | IN_FEATURES: ["res3", "res4", "res5"] 10 | OUT_CHANNELS: 160 11 | NORM: "SyncBN" 12 | SOLVER: 13 | STEPS: (210000, 250000) 14 | MAX_ITER: 270000 15 | OUTPUT_DIR: "output/boxinst_MS_R_50_3x_bifpn" 16 | -------------------------------------------------------------------------------- /configs/BoxInst/README.md: -------------------------------------------------------------------------------- 1 | # BoxInst: High-Performance Instance Segmentation with Box Annotations 2 | 3 | BoxInst: High-Performance Instance Segmentation with Box Annotations; 4 | Zhi Tian, Chunhua Shen, Xinlong Wang and Hao Chen; 5 | In: Proc. IEEE Conf. Computer Vision and Pattern Recognition (CVPR), 2021. 6 | arXiv preprint arXiv:2012.02310 7 | 8 | [[`Paper`](https://arxiv.org/abs/2012.02310)] [[`BibTeX`](#citing-boxinst)] [[`Video Demo`](https://www.youtube.com/watch?v=NuF8NAYf5L8)] 9 | 10 | 11 | # Installation & Quick Start 12 | First, follow the [default instruction](../../README.md#Installation) to install the project and [datasets/README.md](https://github.com/facebookresearch/detectron2/blob/master/datasets/README.md) 13 | set up the datasets (e.g., MS-COCO). 14 | 15 | For demo, run the following command lines: 16 | ``` 17 | wget https://huggingface.co/tianzhi/AdelaiDet-BoxInst/resolve/main/BoxInst_MS_R_50_3x.pth?download=true -O BoxInst_MS_R_50_3x.pth 18 | python demo/demo.py \ 19 | --config-file configs/BoxInst/MS_R_50_3x.yaml \ 20 | --input input1.jpg input2.jpg \ 21 | --opts MODEL.WEIGHTS BoxInst_MS_R_50_3x.pth 22 | ``` 23 | 24 | For training on COCO, run: 25 | ``` 26 | OMP_NUM_THREADS=1 python tools/train_net.py \ 27 | --config-file configs/BoxInst/MS_R_50_1x.yaml \ 28 | --num-gpus 8 \ 29 | OUTPUT_DIR training_dir/BoxInst_MS_R_50_1x 30 | ``` 31 | 32 | For evaluation on COCO, run: 33 | ``` 34 | OMP_NUM_THREADS=1 python tools/train_net.py \ 35 | --config-file configs/BoxInst/MS_R_50_1x.yaml \ 36 | --eval-only \ 37 | --num-gpus 8 \ 38 | OUTPUT_DIR training_dir/BoxInst_MS_R_50_1x \ 39 | MODEL.WEIGHTS training_dir/BoxInst_MS_R_50_1x/model_final.pth 40 | ``` 41 | 42 | 43 | ## Models 44 | ### COCO Instance Segmentation Baselines with [BoxInst](https://arxiv.org/abs/2012.02310) 45 | 46 | Only **box annotations** are used during training. 47 | 48 | Name | inf. time | box AP | mask AP | mask AP (test-dev)| download 49 | --- |:---:|:---:|:---:|:---:|:---: 50 | [BoxInst_MS_R_50_1x](MS_R_50_1x.yaml) | 14 FPS | 39.4 | 30.7 | - | [model](https://huggingface.co/tianzhi/AdelaiDet-BoxInst/resolve/main/BoxInst_MS_R_50_1x.pth?download=true) 51 | [BoxInst_MS_R_50_3x](MS_R_50_3x.yaml) | 14 FPS | 41.5 | 31.8 | 32.1 | [model](https://huggingface.co/tianzhi/AdelaiDet-BoxInst/resolve/main/BoxInst_MS_R_50_3x.pth?download=true) 52 | [BoxInst_MS_R_101_1x](MS_R_101_1x.yaml) | 11 FPS | 41.4 | 32.2 | 32.5 | [model](https://huggingface.co/tianzhi/AdelaiDet-BoxInst/resolve/main/BoxInst_MS_R_101_1x.pth?download=true) 53 | [BoxInst_MS_R_101_3x](MS_R_101_3x.yaml) | 11 FPS | 43.3 | 33.0 | 33.2 | [model](https://huggingface.co/tianzhi/AdelaiDet-BoxInst/resolve/main/BoxInst_MS_R_101_3x.pth?download=true) 54 | [BoxInst_MS_R_101_BiFPN_3x](MS_R_101_BiFPN_3x.yaml) | 10 FPS | 45.4 | 34.1 | 33.9 | [model](https://huggingface.co/tianzhi/AdelaiDet-BoxInst/resolve/main/BoxInst_MS_R_101_BiFPN_3x.pth?download=true) 55 | [BoxInst_MS_R_101_BiFPN_dcni3_3x](MS_R_101_BiFPN_dcni3_3x.yaml) | 8 FPS | 46.4 | 34.8 | 35.0 | [model](https://huggingface.co/tianzhi/AdelaiDet-BoxInst/resolve/main/BoxInst_MS_R_101_BiFPN_dcni3_3x.pth?download=true) 56 | 57 | Disclaimer: 58 | - All models are trained with multi-scale data augmentation. Inference time is measured on a single NVIDIA 1080Ti with batch size 1. 59 | - This is a reimplementation. Thus, the numbers might be slightly different from the ones reported in our original paper. 60 | 61 | 62 | # Citing BoxInst 63 | If you use BoxInst in your research or wish to refer to the baseline results, please use the following BibTeX entries. 64 | ```BibTeX 65 | @inproceedings{tian2020boxinst, 66 | title = {{BoxInst}: High-Performance Instance Segmentation with Box Annotations}, 67 | author = {Tian, Zhi and Shen, Chunhua and Wang, Xinlong and Chen, Hao}, 68 | booktitle = {Proc. IEEE Conf. Computer Vision and Pattern Recognition (CVPR)}, 69 | year = {2021} 70 | } 71 | ``` 72 | -------------------------------------------------------------------------------- /configs/CondInst/Base-CondInst.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "CondInst" 3 | MASK_ON: True 4 | BACKBONE: 5 | NAME: "build_fcos_resnet_fpn_backbone" 6 | RESNETS: 7 | OUT_FEATURES: ["res3", "res4", "res5"] 8 | FPN: 9 | IN_FEATURES: ["res3", "res4", "res5"] 10 | PROPOSAL_GENERATOR: 11 | NAME: "FCOS" 12 | FCOS: 13 | THRESH_WITH_CTR: True 14 | USE_SCALE: True 15 | CONDINST: 16 | MAX_PROPOSALS: 500 17 | DATASETS: 18 | TRAIN: ("coco_2017_train",) 19 | TEST: ("coco_2017_val",) 20 | SOLVER: 21 | IMS_PER_BATCH: 16 22 | BASE_LR: 0.01 23 | STEPS: (60000, 80000) 24 | MAX_ITER: 90000 25 | INPUT: 26 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) 27 | -------------------------------------------------------------------------------- /configs/CondInst/MS_R_101_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-CondInst.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" 4 | RESNETS: 5 | DEPTH: 101 6 | OUTPUT_DIR: "output/condinst_MS_R_101_1x" 7 | -------------------------------------------------------------------------------- /configs/CondInst/MS_R_101_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-CondInst.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" 4 | RESNETS: 5 | DEPTH: 101 6 | SOLVER: 7 | STEPS: (210000, 250000) 8 | MAX_ITER: 270000 9 | OUTPUT_DIR: "output/condinst_MS_R_101_3x" 10 | -------------------------------------------------------------------------------- /configs/CondInst/MS_R_101_3x_sem.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-CondInst.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" 4 | RESNETS: 5 | DEPTH: 101 6 | CONDINST: 7 | MASK_BRANCH: 8 | SEMANTIC_LOSS_ON: True 9 | SOLVER: 10 | STEPS: (210000, 250000) 11 | MAX_ITER: 270000 12 | OUTPUT_DIR: "output/condinst_MS_R_101_3x_sem" 13 | -------------------------------------------------------------------------------- /configs/CondInst/MS_R_101_BiFPN_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-CondInst.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" 4 | BACKBONE: 5 | NAME: "build_fcos_resnet_bifpn_backbone" 6 | RESNETS: 7 | DEPTH: 101 8 | BiFPN: 9 | IN_FEATURES: ["res3", "res4", "res5"] 10 | OUT_CHANNELS: 160 11 | NORM: "SyncBN" 12 | SOLVER: 13 | STEPS: (210000, 250000) 14 | MAX_ITER: 270000 15 | OUTPUT_DIR: "output/condinst_MS_R_101_3x_bifpn" 16 | -------------------------------------------------------------------------------- /configs/CondInst/MS_R_101_BiFPN_3x_sem.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-CondInst.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" 4 | BACKBONE: 5 | NAME: "build_fcos_resnet_bifpn_backbone" 6 | RESNETS: 7 | DEPTH: 101 8 | BiFPN: 9 | IN_FEATURES: ["res3", "res4", "res5"] 10 | OUT_CHANNELS: 160 11 | NORM: "SyncBN" 12 | CONDINST: 13 | MASK_BRANCH: 14 | SEMANTIC_LOSS_ON: True 15 | SOLVER: 16 | STEPS: (210000, 250000) 17 | MAX_ITER: 270000 18 | OUTPUT_DIR: "output/condinst_MS_R_101_3x_bifpn_sem" 19 | -------------------------------------------------------------------------------- /configs/CondInst/MS_R_50_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-CondInst.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | RESNETS: 5 | DEPTH: 50 6 | OUTPUT_DIR: "output/condinst_MS_R_50_1x" 7 | -------------------------------------------------------------------------------- /configs/CondInst/MS_R_50_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-CondInst.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | RESNETS: 5 | DEPTH: 50 6 | SOLVER: 7 | STEPS: (210000, 250000) 8 | MAX_ITER: 270000 9 | OUTPUT_DIR: "output/condinst_MS_R_50_3x" 10 | -------------------------------------------------------------------------------- /configs/CondInst/MS_R_50_3x_sem.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-CondInst.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | RESNETS: 5 | DEPTH: 50 6 | CONDINST: 7 | MASK_BRANCH: 8 | SEMANTIC_LOSS_ON: True 9 | SOLVER: 10 | STEPS: (210000, 250000) 11 | MAX_ITER: 270000 12 | OUTPUT_DIR: "output/condinst_MS_R_50_3x_sem" 13 | -------------------------------------------------------------------------------- /configs/CondInst/MS_R_50_BiFPN_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-CondInst.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | BACKBONE: 5 | NAME: "build_fcos_resnet_bifpn_backbone" 6 | RESNETS: 7 | DEPTH: 50 8 | BiFPN: 9 | IN_FEATURES: ["res3", "res4", "res5"] 10 | OUT_CHANNELS: 160 11 | NORM: "SyncBN" 12 | OUTPUT_DIR: "output/condinst_MS_R_50_1x_bifpn" 13 | -------------------------------------------------------------------------------- /configs/CondInst/MS_R_50_BiFPN_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-CondInst.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | BACKBONE: 5 | NAME: "build_fcos_resnet_bifpn_backbone" 6 | RESNETS: 7 | DEPTH: 50 8 | BiFPN: 9 | IN_FEATURES: ["res3", "res4", "res5"] 10 | OUT_CHANNELS: 160 11 | NORM: "SyncBN" 12 | SOLVER: 13 | STEPS: (210000, 250000) 14 | MAX_ITER: 270000 15 | OUTPUT_DIR: "output/condinst_MS_R_50_3x_bifpn" 16 | -------------------------------------------------------------------------------- /configs/CondInst/MS_R_50_BiFPN_3x_sem.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-CondInst.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | BACKBONE: 5 | NAME: "build_fcos_resnet_bifpn_backbone" 6 | RESNETS: 7 | DEPTH: 50 8 | BiFPN: 9 | IN_FEATURES: ["res3", "res4", "res5"] 10 | OUT_CHANNELS: 160 11 | NORM: "SyncBN" 12 | CONDINST: 13 | MASK_BRANCH: 14 | SEMANTIC_LOSS_ON: True 15 | SOLVER: 16 | STEPS: (210000, 250000) 17 | MAX_ITER: 270000 18 | OUTPUT_DIR: "output/condinst_MS_R_50_3x_bifpn_sem" 19 | -------------------------------------------------------------------------------- /configs/CondInst/README.md: -------------------------------------------------------------------------------- 1 | # Conditional Convolutions for Instance Segmentation (Oral) 2 | 3 | Conditional Convolutions for Instance Segmentation; 4 | Zhi Tian, Chunhua Shen and Hao Chen; 5 | In: Proc. European Conference on Computer Vision (ECCV), 2020. 6 | arXiv preprint arXiv:2003.05664 7 | 8 | [[`Paper`](https://arxiv.org/abs/2003.05664)] [[`BibTeX`](#citing-condinst)] 9 | 10 | 11 | # Installation & Quick Start 12 | First, follow the [default instruction](../../README.md#Installation) to install the project and [datasets/README.md](https://github.com/facebookresearch/detectron2/blob/master/datasets/README.md) 13 | set up the datasets (e.g., MS-COCO). 14 | 15 | For demo, run the following command lines: 16 | ``` 17 | wget https://cloudstor.aarnet.edu.au/plus/s/M8nNxSR5iNP4qyO/download -O CondInst_MS_R_101_3x_sem.pth 18 | python demo/demo.py \ 19 | --config-file configs/CondInst/MS_R_101_3x_sem.yaml \ 20 | --input input1.jpg input2.jpg \ 21 | --opts MODEL.WEIGHTS CondInst_MS_R_101_3x_sem.pth 22 | ``` 23 | 24 | For training on COCO, run: 25 | ``` 26 | OMP_NUM_THREADS=1 python tools/train_net.py \ 27 | --config-file configs/CondInst/MS_R_50_1x.yaml \ 28 | --num-gpus 8 \ 29 | OUTPUT_DIR training_dir/CondInst_MS_R_50_1x 30 | ``` 31 | 32 | For evaluation on COCO, run: 33 | ``` 34 | OMP_NUM_THREADS=1 python tools/train_net.py \ 35 | --config-file configs/CondInst/MS_R_50_1x.yaml \ 36 | --eval-only \ 37 | --num-gpus 8 \ 38 | OUTPUT_DIR training_dir/CondInst_MS_R_50_1x \ 39 | MODEL.WEIGHTS training_dir/CondInst_MS_R_50_1x/model_final.pth 40 | ``` 41 | 42 | 43 | ## Models 44 | ### COCO Instance Segmentation Baselines with [CondInst](https://arxiv.org/abs/2003.05664) 45 | 46 | Name | inf. time | box AP | mask AP | download 47 | --- |:---:|:---:|:---:|:---: 48 | [CondInst_MS_R_50_1x](MS_R_50_1x.yaml) | 14 FPS | 39.7 | 35.7 | [model](https://huggingface.co/tianzhi/AdelaiDet-CondInst/resolve/main/CondInst_MS_R_50_1x.pth?download=true) 49 | [CondInst_MS_R_50_3x](MS_R_50_3x.yaml) | 14 FPS | 41.9 | 37.5 | [model](https://huggingface.co/tianzhi/AdelaiDet-CondInst/resolve/main/CondInst_MS_R_50_3x.pth?download=true) 50 | [CondInst_MS_R_101_3x](MS_R_101_3x.yaml) | 11 FPS | 43.3 | 38.6 | [model](https://huggingface.co/tianzhi/AdelaiDet-CondInst/resolve/main/CondInst_MS_R_101_3x.pth?download=true) 51 | 52 | With an auxiliary semantic segmentation task (set `MODEL.CONDINST.MASK_BRANCH.SEMANTIC_LOSS_ON = True` to enable it): 53 | 54 | Name | inf. time | box AP | mask AP | mask AP (test-dev) | download 55 | --- |:---:|:---:|:---:|:---:|:---: 56 | [CondInst_MS_R_50_3x_sem](MS_R_50_3x_sem.yaml) | 14 FPS | 42.6 | 38.2 | 38.7 | [model](https://huggingface.co/tianzhi/AdelaiDet-CondInst/resolve/main/CondInst_MS_R_50_3x_sem.pth?download=true) 57 | [CondInst_MS_R_101_3x_sem](MS_R_101_3x_sem.yaml) | 11 FPS | 44.6 | 39.8 | 40.1 | [model](https://huggingface.co/tianzhi/AdelaiDet-CondInst/resolve/main/CondInst_MS_R_101_3x_sem.pth?download=true) 58 | 59 | With BiFPN: 60 | 61 | Name | inf. time | box AP | mask AP | download 62 | --- |:---:|:---:|:---:|:---: 63 | [CondInst_MS_R_50_BiFPN_1x](MS_R_50_BiFPN_1x.yaml) | 13 FPS | 42.5 | 37.3 | [model](https://huggingface.co/tianzhi/AdelaiDet-CondInst/resolve/main/CondInst_MS_R_50_BiFPN_1x.pth?download=true) 64 | [CondInst_MS_R_50_BiFPN_3x](MS_R_50_BiFPN_3x.yaml) | 13 FPS | 44.3 | 38.9 | [model](https://huggingface.co/tianzhi/AdelaiDet-CondInst/resolve/main/CondInst_MS_R_50_BiFPN_3x.pth?download=true) 65 | [CondInst_MS_R_50_BiFPN_3x_sem](MS_R_50_BiFPN_3x_sem.yaml) | 13 FPS | 44.7 | 39.4 | [model](https://huggingface.co/tianzhi/AdelaiDet-CondInst/resolve/main/CondInst_MS_R_50_BiFPN_3x_sem.pth?download=true) 66 | [CondInst_MS_R_101_BiFPN_3x](MS_R_101_BiFPN_3x.yaml) | 10 FPS | 45.3 | 39.6 | [model](https://huggingface.co/tianzhi/AdelaiDet-CondInst/resolve/main/CondInst_MS_R_101_BiFPN_3x.pth?download=true) 67 | [CondInst_MS_R_101_BiFPN_3x_sem](MS_R_101_BiFPN_3x_sem.yaml) | 10 FPS | 45.7 | 40.2 | [model](https://huggingface.co/tianzhi/AdelaiDet-CondInst/resolve/main/CondInst_R_101_BiFPN_3x_sem.pth?download=true) 68 | 69 | 70 | *Disclaimer:* 71 | 72 | - All models are trained with multi-scale data augmentation. Inference time is measured on a single NVIDIA 1080Ti with batch size 1. 73 | - The final mask's resolution is 1/4 of the input image (i.e., `MODEL.CONDINST.MASK_OUT_STRIDE = 4`, which is enough on MS-COCO and different from our original paper. In the paper, we used `MODEL.CONDINST.MASK_OUT_STRIDE = 2`. If you want high-resolution mask results, please reduce it. 74 | - This is a reimplementation. Thus, the numbers are slightly different from our original paper (within 0.1% in mask AP). 75 | 76 | 77 | # Citing CondInst 78 | If you use CondInst in your research or wish to refer to the baseline results, please use the following BibTeX entries. 79 | ```BibTeX 80 | @inproceedings{tian2020conditional, 81 | title = {Conditional Convolutions for Instance Segmentation}, 82 | author = {Tian, Zhi and Shen, Chunhua and Chen, Hao}, 83 | booktitle = {Proc. Eur. Conf. Computer Vision (ECCV)}, 84 | year = {2020} 85 | } 86 | ``` 87 | -------------------------------------------------------------------------------- /configs/DenseCL/FCOS_R50_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../FCOS-Detection/Base-FCOS.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | BACKBONE: 5 | FREEZE_AT: 0 6 | RESNETS: 7 | DEPTH: 50 8 | NORM: "SyncBN" 9 | FPN: 10 | NORM: "SyncBN" 11 | TEST: 12 | PRECISE_BN: 13 | ENABLED: True 14 | -------------------------------------------------------------------------------- /configs/DenseCL/FCOS_R50_1x_DenseCL.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "FCOS_R50_1x.yaml" 2 | MODEL: 3 | PIXEL_MEAN: [123.675, 116.280, 103.530] 4 | PIXEL_STD: [58.395, 57.120, 57.375] 5 | WEIGHTS: "See Instructions" 6 | RESNETS: 7 | STRIDE_IN_1X1: False 8 | INPUT: 9 | FORMAT: "RGB" 10 | -------------------------------------------------------------------------------- /configs/DenseCL/README.md: -------------------------------------------------------------------------------- 1 | # Dense Contrastive Learning for Self-Supervised Visual Pre-Training 2 | 3 | Here we provide instructions and results for applying DenseCL pre-trained models to AdelaiDet. Please refer to [https://git.io/DenseCL 4 | ](https://git.io/DenseCL 5 | ) for the pre-training code. 6 | 7 | > [**Dense Contrastive Learning for Self-Supervised Visual Pre-Training**](https://arxiv.org/abs/2011.09157), 8 | > Xinlong Wang, Rufeng Zhang, Chunhua Shen, Tao Kong, Lei Li 9 | > In: Proc. IEEE Conf. Computer Vision and Pattern Recognition (CVPR), 2021, **Oral** 10 | > *arXiv preprint ([arXiv 2011.09157](https://arxiv.org/abs/2011.09157))* 11 | 12 | 13 | # Installation 14 | First, follow the [default instruction](../../README.md#Installation) to install the project and [datasets/README.md](https://github.com/facebookresearch/detectron2/blob/master/datasets/README.md) 15 | set up the datasets (e.g., MS-COCO). 16 | 17 | 18 | # DenseCL Pre-trained Models 19 | pre-train method | pre-train dataset | backbone | #epoch | Link 20 | --- |:---:|:---:|:---:|:---: 21 | DenseCL | COCO | ResNet-50 | 800 | [download](https://huggingface.co/ZjuCv/AdelaiDet/blob/main/densecl_r50_coco_800ep.pth) 22 | DenseCL | COCO | ResNet-50 | 1600 | [download](https://huggingface.co/ZjuCv/AdelaiDet/blob/main/densecl_r50_coco_1600ep.pth) 23 | DenseCL | ImageNet | ResNet-50 | 200 | [download](https://huggingface.co/ZjuCv/AdelaiDet/blob/main/densecl_r50_imagenet_200ep.pth) 24 | DenseCL | ImageNet | ResNet-101 | 200 | [download](https://huggingface.co/ZjuCv/AdelaiDet/blob/main/densecl_r101_imagenet_200ep.pth) 25 | 26 | 27 | # Usage 28 | 29 | ## Download the pre-trained model 30 | ``` 31 | PRETRAIN_DIR=./ 32 | wget https://cloudstor.aarnet.edu.au/plus/s/hdAg5RYm8NNM2QP/download -O ${PRETRAIN_DIR}/densecl_r50_imagenet_200ep.pkl 33 | ``` 34 | 35 | ## Convert it to detectron2's format 36 | Use [convert-pretrain-to-detectron2.py](https://github.com/WXinlong/DenseCL/blob/main/benchmarks/detection/convert-pretrain-to-detectron2.py) to convert the pre-trained backbone weights: 37 | ``` 38 | WEIGHT_FILE=${PRETRAIN_DIR}/densecl_r50_imagenet_200ep.pth 39 | OUTPUT_FILE=${PRETRAIN_DIR}/densecl_r50_imagenet_200ep.pkl 40 | python convert-pretrain-to-detectron2.py ${WEIGHT_FILE} ${OUTPUT_FILE} 41 | ``` 42 | 43 | ## Train the downstream models 44 | 45 | For training a SOLOv2, run: 46 | ``` 47 | OMP_NUM_THREADS=1 python tools/train_net.py \ 48 | --config-file configs/DenseCL/SOLOv2_R50_1x_DenseCL.yaml \ 49 | --num-gpus 8 \ 50 | OUTPUT_DIR training_dir/SOLOv2_R50_1x_DenseCL \ 51 | MODEL.WEIGHTS ${PRETRAIN_DIR}/densecl_r50_imagenet_200ep.pkl 52 | ``` 53 | 54 | For training a FCOS, run: 55 | ``` 56 | OMP_NUM_THREADS=1 python tools/train_net.py \ 57 | --config-file configs/DenseCL/FCOS_R50_1x_DenseCL.yaml \ 58 | --num-gpus 8 \ 59 | OUTPUT_DIR training_dir/FCOS_R50_1x_DenseCL \ 60 | MODEL.WEIGHTS ${PRETRAIN_DIR}/densecl_r50_imagenet_200ep.pkl 61 | ``` 62 | 63 | 64 | # Performance 65 | ## SOLOv2 on COCO Instance Segmentation 66 | 67 | pre-train method | pre-train dataset | mask AP | 68 | --- |:---:|:---:| 69 | Supervised | ImageNet | 35.2 70 | MoCo-v2 | ImageNet | 35.2 71 | DenseCL | ImageNet | 35.7 (+0.5) 72 | 73 | ## FCOS on COCO Object Detection 74 | 75 | pre-train method | pre-train dataset | box AP | 76 | --- |:---:|:---:| 77 | Supervised | ImageNet | 39.9 78 | MoCo-v2 | ImageNet | 40.3 79 | DenseCL | ImageNet | 40.9 (+1.0) 80 | 81 | 82 | 83 | # Citation 84 | Please consider citing our paper in your publications if the project helps your research. BibTeX reference is as follows. 85 | ```BibTeX 86 | @inproceedings{wang2020densecl, 87 | title = {Dense Contrastive Learning for Self-Supervised Visual Pre-Training}, 88 | author = {Wang, Xinlong and Zhang, Rufeng and Shen, Chunhua and Kong, Tao and Li, Lei}, 89 | booktitle = {Proc. IEEE Conf. Computer Vision and Pattern Recognition (CVPR)}, 90 | year = {2021} 91 | } 92 | ``` 93 | -------------------------------------------------------------------------------- /configs/DenseCL/SOLOv2_R50_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../SOLOv2/Base-SOLOv2.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | BACKBONE: 5 | FREEZE_AT: 0 6 | RESNETS: 7 | DEPTH: 50 8 | NORM: "SyncBN" 9 | FPN: 10 | NORM: "SyncBN" 11 | TEST: 12 | PRECISE_BN: 13 | ENABLED: True 14 | -------------------------------------------------------------------------------- /configs/DenseCL/SOLOv2_R50_1x_DenseCL.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "SOLOv2_R50_1x.yaml" 2 | MODEL: 3 | PIXEL_MEAN: [123.675, 116.280, 103.530] 4 | PIXEL_STD: [58.395, 57.120, 57.375] 5 | WEIGHTS: "See Instructions" 6 | RESNETS: 7 | STRIDE_IN_1X1: False 8 | INPUT: 9 | FORMAT: "RGB" 10 | -------------------------------------------------------------------------------- /configs/FCOS-Detection/Base-FCOS.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "OneStageDetector" 3 | BACKBONE: 4 | NAME: "build_fcos_resnet_fpn_backbone" 5 | RESNETS: 6 | OUT_FEATURES: ["res3", "res4", "res5"] 7 | FPN: 8 | IN_FEATURES: ["res3", "res4", "res5"] 9 | PROPOSAL_GENERATOR: 10 | NAME: "FCOS" 11 | # PIXEL_MEAN: [102.9801, 115.9465, 122.7717] 12 | DATASETS: 13 | TRAIN: ("coco_2017_train",) 14 | TEST: ("coco_2017_val",) 15 | SOLVER: 16 | IMS_PER_BATCH: 16 17 | BASE_LR: 0.01 # Note that RetinaNet uses a different default learning rate 18 | STEPS: (60000, 80000) 19 | MAX_ITER: 90000 20 | INPUT: 21 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) 22 | -------------------------------------------------------------------------------- /configs/FCOS-Detection/FCOS_RT/MS_DLA_34_4x_syncbn.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-FCOS.yaml" 2 | INPUT: 3 | MIN_SIZE_TRAIN: (256, 288, 320, 352, 384, 416, 448, 480, 512, 544, 576, 608) 4 | MAX_SIZE_TRAIN: 900 5 | MAX_SIZE_TEST: 736 6 | MIN_SIZE_TEST: 512 7 | MODEL: 8 | BACKBONE: 9 | NAME: "build_fcos_dla_fpn_backbone" 10 | FREEZE_AT: -1 11 | WEIGHTS: "http://dl.yf.io/dla/models/imagenet/dla34-ba72cf86.pth" 12 | DLA: 13 | CONV_BODY: "DLA34" 14 | NORM: "SyncBN" 15 | FPN: 16 | IN_FEATURES: ["level3", "level4", "level5"] 17 | FCOS: 18 | TOP_LEVELS: 0 19 | SIZES_OF_INTEREST: [64, 128] 20 | FPN_STRIDES: [8, 16, 32] 21 | IN_FEATURES: ['p3', 'p4', 'p5'] 22 | SOLVER: 23 | STEPS: (300000, 340000) 24 | MAX_ITER: 360000 25 | OUTPUT_DIR: "output/fcos/FCOS_RT_MS_DLA_34_4x_syncbn" 26 | -------------------------------------------------------------------------------- /configs/FCOS-Detection/FCOS_RT/MS_DLA_34_4x_syncbn_bn_head.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-FCOS.yaml" 2 | INPUT: 3 | MIN_SIZE_TRAIN: (256, 288, 320, 352, 384, 416, 448, 480, 512, 544, 576, 608) 4 | MAX_SIZE_TRAIN: 900 5 | MAX_SIZE_TEST: 736 6 | MIN_SIZE_TEST: 512 7 | MODEL: 8 | BACKBONE: 9 | NAME: "build_fcos_dla_fpn_backbone" 10 | FREEZE_AT: -1 11 | WEIGHTS: "http://dl.yf.io/dla/models/imagenet/dla34-ba72cf86.pth" 12 | DLA: 13 | CONV_BODY: "DLA34" 14 | NORM: "SyncBN" 15 | FPN: 16 | IN_FEATURES: ["level3", "level4", "level5"] 17 | FCOS: 18 | TOP_LEVELS: 0 19 | SIZES_OF_INTEREST: [64, 128] 20 | FPN_STRIDES: [8, 16, 32] 21 | IN_FEATURES: ['p3', 'p4', 'p5'] 22 | NORM: "SyncBN" 23 | SOLVER: 24 | STEPS: (300000, 340000) 25 | MAX_ITER: 360000 26 | OUTPUT_DIR: "output/fcos/FCOS_RT_MS_DLA_34_4x_syncbn_bn_head" 27 | -------------------------------------------------------------------------------- /configs/FCOS-Detection/FCOS_RT/MS_DLA_34_4x_syncbn_shared_towers.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-FCOS.yaml" 2 | INPUT: 3 | MIN_SIZE_TRAIN: (256, 288, 320, 352, 384, 416, 448, 480, 512, 544, 576, 608) 4 | MAX_SIZE_TRAIN: 900 5 | MAX_SIZE_TEST: 736 6 | MIN_SIZE_TEST: 512 7 | MODEL: 8 | BACKBONE: 9 | NAME: "build_fcos_dla_fpn_backbone" 10 | FREEZE_AT: -1 11 | WEIGHTS: "http://dl.yf.io/dla/models/imagenet/dla34-ba72cf86.pth" 12 | DLA: 13 | CONV_BODY: "DLA34" 14 | NORM: "SyncBN" 15 | FPN: 16 | IN_FEATURES: ["level3", "level4", "level5"] 17 | FCOS: 18 | TOP_LEVELS: 0 19 | SIZES_OF_INTEREST: [64, 128] 20 | FPN_STRIDES: [8, 16, 32] 21 | IN_FEATURES: ['p3', 'p4', 'p5'] 22 | NUM_SHARE_CONVS: 4 23 | NUM_BOX_CONVS: 0 24 | NUM_CLS_CONVS: 0 25 | SOLVER: 26 | STEPS: (300000, 340000) 27 | MAX_ITER: 360000 28 | OUTPUT_DIR: "output/fcos/FCOS_RT_MS_DLA_34_4x_syncbn_shared_towers" 29 | -------------------------------------------------------------------------------- /configs/FCOS-Detection/FCOS_RT/MS_DLA_34_4x_syncbn_shared_towers_bn_head.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-FCOS.yaml" 2 | INPUT: 3 | MIN_SIZE_TRAIN: (256, 288, 320, 352, 384, 416, 448, 480, 512, 544, 576, 608) 4 | MAX_SIZE_TRAIN: 900 5 | MAX_SIZE_TEST: 736 6 | MIN_SIZE_TEST: 512 7 | MODEL: 8 | BACKBONE: 9 | NAME: "build_fcos_dla_fpn_backbone" 10 | FREEZE_AT: -1 11 | WEIGHTS: "http://dl.yf.io/dla/models/imagenet/dla34-ba72cf86.pth" 12 | DLA: 13 | CONV_BODY: "DLA34" 14 | NORM: "SyncBN" 15 | FPN: 16 | IN_FEATURES: ["level3", "level4", "level5"] 17 | FCOS: 18 | TOP_LEVELS: 0 19 | SIZES_OF_INTEREST: [64, 128] 20 | FPN_STRIDES: [8, 16, 32] 21 | IN_FEATURES: ['p3', 'p4', 'p5'] 22 | NUM_SHARE_CONVS: 4 23 | NUM_BOX_CONVS: 0 24 | NUM_CLS_CONVS: 0 25 | NORM: "SyncBN" 26 | SOLVER: 27 | STEPS: (300000, 340000) 28 | MAX_ITER: 360000 29 | OUTPUT_DIR: "output/fcos/FCOS_RT_MS_DLA_34_4x_syncbn_shared_towers_bn_head" 30 | -------------------------------------------------------------------------------- /configs/FCOS-Detection/FCOS_RT/MS_R_50_4x_syncbn.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-FCOS.yaml" 2 | INPUT: 3 | MIN_SIZE_TRAIN: (256, 288, 320, 352, 384, 416, 448, 480, 512, 544, 576, 608) 4 | MAX_SIZE_TRAIN: 900 5 | MAX_SIZE_TEST: 736 6 | MIN_SIZE_TEST: 512 7 | MODEL: 8 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 9 | RESNETS: 10 | DEPTH: 50 11 | NORM: "SyncBN" 12 | FCOS: 13 | TOP_LEVELS: 0 14 | SIZES_OF_INTEREST: [64, 128] 15 | FPN_STRIDES: [8, 16, 32] 16 | IN_FEATURES: ['p3', 'p4', 'p5'] 17 | SOLVER: 18 | STEPS: (300000, 340000) 19 | MAX_ITER: 360000 20 | OUTPUT_DIR: "output/fcos/FCOS_RT_MS_R_50_4x_syncbn" 21 | -------------------------------------------------------------------------------- /configs/FCOS-Detection/FCOS_RT/MS_R_50_4x_syncbn_bn_head.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-FCOS.yaml" 2 | INPUT: 3 | MIN_SIZE_TRAIN: (256, 288, 320, 352, 384, 416, 448, 480, 512, 544, 576, 608) 4 | MAX_SIZE_TRAIN: 900 5 | MAX_SIZE_TEST: 736 6 | MIN_SIZE_TEST: 512 7 | MODEL: 8 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 9 | RESNETS: 10 | DEPTH: 50 11 | NORM: "SyncBN" 12 | FCOS: 13 | TOP_LEVELS: 0 14 | SIZES_OF_INTEREST: [64, 128] 15 | FPN_STRIDES: [8, 16, 32] 16 | IN_FEATURES: ['p3', 'p4', 'p5'] 17 | NORM: "SyncBN" 18 | SOLVER: 19 | STEPS: (300000, 340000) 20 | MAX_ITER: 360000 21 | OUTPUT_DIR: "output/fcos/FCOS_RT_MS_R_50_4x_syncbn_bn_head" 22 | -------------------------------------------------------------------------------- /configs/FCOS-Detection/MS_R_101_2x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-FCOS.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" 4 | RESNETS: 5 | DEPTH: 101 6 | SOLVER: 7 | STEPS: (120000, 160000) 8 | MAX_ITER: 180000 9 | OUTPUT_DIR: "output/fcos/R_101_2x" 10 | -------------------------------------------------------------------------------- /configs/FCOS-Detection/MS_R_101_2x_iou.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-FCOS.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" 4 | RESNETS: 5 | DEPTH: 101 6 | FCOS: 7 | BOX_QUALITY: "iou" 8 | SOLVER: 9 | STEPS: (120000, 160000) 10 | MAX_ITER: 180000 11 | OUTPUT_DIR: "output/fcos/MS_R_101_2x_iou" 12 | -------------------------------------------------------------------------------- /configs/FCOS-Detection/MS_R_50_2x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-FCOS.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | RESNETS: 5 | DEPTH: 50 6 | SOLVER: 7 | STEPS: (120000, 160000) 8 | MAX_ITER: 180000 9 | OUTPUT_DIR: "output/fcos/R_50_2x" 10 | -------------------------------------------------------------------------------- /configs/FCOS-Detection/MS_R_50_2x_iou.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-FCOS.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | RESNETS: 5 | DEPTH: 50 6 | FCOS: 7 | BOX_QUALITY: "iou" 8 | SOLVER: 9 | STEPS: (120000, 160000) 10 | MAX_ITER: 180000 11 | OUTPUT_DIR: "output/fcos/MS_R_50_2x_iou" 12 | -------------------------------------------------------------------------------- /configs/FCOS-Detection/MS_X_101_32x8d_2x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-FCOS.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/FAIR/X-101-32x8d.pkl" 4 | PIXEL_STD: [57.375, 57.120, 58.395] 5 | RESNETS: 6 | STRIDE_IN_1X1: False # this is a C2 model 7 | NUM_GROUPS: 32 8 | WIDTH_PER_GROUP: 8 9 | DEPTH: 101 10 | SOLVER: 11 | STEPS: (120000, 160000) 12 | MAX_ITER: 180000 13 | OUTPUT_DIR: "output/fcos/X_101_2x" 14 | -------------------------------------------------------------------------------- /configs/FCOS-Detection/MS_X_101_32x8d_2x_dcnv2.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-FCOS.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/FAIR/X-101-32x8d.pkl" 4 | PIXEL_STD: [57.375, 57.120, 58.395] 5 | RESNETS: 6 | STRIDE_IN_1X1: False # this is a C2 model 7 | NUM_GROUPS: 32 8 | WIDTH_PER_GROUP: 8 9 | DEPTH: 101 10 | DEFORM_ON_PER_STAGE: [False, False, True, True] # on Res4, Res5 11 | DEFORM_MODULATED: True 12 | FCOS: 13 | USE_DEFORMABLE: True 14 | SOLVER: 15 | STEPS: (120000, 160000) 16 | MAX_ITER: 180000 17 | OUTPUT_DIR: "output/fcos/MS_X_101_2x_dcnv2" 18 | -------------------------------------------------------------------------------- /configs/FCOS-Detection/MS_X_101_32x8d_2x_dcnv2_iou.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-FCOS.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/FAIR/X-101-32x8d.pkl" 4 | PIXEL_STD: [57.375, 57.120, 58.395] 5 | RESNETS: 6 | STRIDE_IN_1X1: False # this is a C2 model 7 | NUM_GROUPS: 32 8 | WIDTH_PER_GROUP: 8 9 | DEPTH: 101 10 | DEFORM_ON_PER_STAGE: [False, False, True, True] # on Res4, Res5 11 | DEFORM_MODULATED: True 12 | FCOS: 13 | USE_DEFORMABLE: True 14 | BOX_QUALITY: "iou" 15 | SOLVER: 16 | STEPS: (120000, 160000) 17 | MAX_ITER: 180000 18 | OUTPUT_DIR: "output/fcos/MS_X_101_2x_dcnv2_iou" 19 | -------------------------------------------------------------------------------- /configs/FCOS-Detection/MS_X_101_32x8d_2x_iou.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-FCOS.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/FAIR/X-101-32x8d.pkl" 4 | PIXEL_STD: [57.375, 57.120, 58.395] 5 | RESNETS: 6 | STRIDE_IN_1X1: False # this is a C2 model 7 | NUM_GROUPS: 32 8 | WIDTH_PER_GROUP: 8 9 | DEPTH: 101 10 | FCOS: 11 | BOX_QUALITY: "iou" 12 | SOLVER: 13 | STEPS: (120000, 160000) 14 | MAX_ITER: 180000 15 | OUTPUT_DIR: "output/fcos/X_101_2x_iou" 16 | -------------------------------------------------------------------------------- /configs/FCOS-Detection/MS_X_101_64x4d_2x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-FCOS.yaml" 2 | MODEL: 3 | WEIGHTS: "catalog://ImageNetPretrained/FAIR/X-101-64x4d" 4 | PIXEL_STD: [1.0, 1.0, 1.0] 5 | RESNETS: 6 | STRIDE_IN_1X1: False # this is a C2 model 7 | NUM_GROUPS: 64 8 | WIDTH_PER_GROUP: 4 9 | DEPTH: 101 10 | SOLVER: 11 | STEPS: (120000, 160000) 12 | MAX_ITER: 180000 13 | OUTPUT_DIR: "output/fcos/MS_X_101_64x4d_2x" 14 | -------------------------------------------------------------------------------- /configs/FCOS-Detection/MS_X_101_64x4d_2x_dcnv2.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-FCOS.yaml" 2 | MODEL: 3 | WEIGHTS: "catalog://ImageNetPretrained/FAIR/X-101-64x4d" 4 | PIXEL_STD: [1.0, 1.0, 1.0] 5 | RESNETS: 6 | STRIDE_IN_1X1: False # this is a C2 model 7 | NUM_GROUPS: 64 8 | WIDTH_PER_GROUP: 4 9 | DEPTH: 101 10 | DEFORM_ON_PER_STAGE: [False, False, True, True] # on Res4, Res5 11 | DEFORM_MODULATED: True 12 | FCOS: 13 | USE_DEFORMABLE: True 14 | SOLVER: 15 | STEPS: (120000, 160000) 16 | MAX_ITER: 180000 17 | OUTPUT_DIR: "output/fcos/MS_X_101_64x4d_2x_dcnv2" 18 | -------------------------------------------------------------------------------- /configs/FCOS-Detection/R_50_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-FCOS.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | RESNETS: 5 | DEPTH: 50 6 | INPUT: 7 | MIN_SIZE_TRAIN: (800,) 8 | OUTPUT_DIR: "output/fcos/R_50_1x" 9 | -------------------------------------------------------------------------------- /configs/FCOS-Detection/R_50_1x_iou.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-FCOS.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | RESNETS: 5 | DEPTH: 50 6 | FCOS: 7 | BOX_QUALITY: "iou" 8 | INPUT: 9 | MIN_SIZE_TRAIN: (800,) 10 | OUTPUT_DIR: "output/fcos/R_50_1x_iou" 11 | -------------------------------------------------------------------------------- /configs/FCOS-Detection/vovnet/MS_V_39_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-FCOS.yaml" 2 | MODEL: 3 | WEIGHTS: "https://www.dropbox.com/s/q98pypf96rhtd8y/vovnet39_ese_detectron2.pth?dl=1" 4 | BACKBONE: 5 | NAME: "build_fcos_vovnet_fpn_backbone" 6 | FREEZE_AT: 0 7 | VOVNET: 8 | CONV_BODY : "V-39-eSE" 9 | OUT_FEATURES: ["stage3", "stage4", "stage5"] 10 | FPN: 11 | IN_FEATURES: ["stage3", "stage4", "stage5"] 12 | SOLVER: 13 | STEPS: (210000, 250000) 14 | MAX_ITER: 270000 15 | OUTPUT_DIR: "output/fcos/V_39_ms_3x" 16 | -------------------------------------------------------------------------------- /configs/FCOS-Detection/vovnet/MS_V_57_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-FCOS.yaml" 2 | MODEL: 3 | WEIGHTS: "https://www.dropbox.com/s/8xl0cb3jj51f45a/vovnet57_ese_detectron2.pth?dl=1" 4 | BACKBONE: 5 | NAME: "build_fcos_vovnet_fpn_backbone" 6 | FREEZE_AT: 0 7 | VOVNET: 8 | CONV_BODY : "V-57-eSE" 9 | OUT_FEATURES: ["stage3", "stage4", "stage5"] 10 | FPN: 11 | IN_FEATURES: ["stage3", "stage4", "stage5"] 12 | SOLVER: 13 | STEPS: (210000, 250000) 14 | MAX_ITER: 270000 15 | OUTPUT_DIR: "output/fcos/V_57_ms_3x" 16 | -------------------------------------------------------------------------------- /configs/FCOS-Detection/vovnet/MS_V_99_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-FCOS.yaml" 2 | MODEL: 3 | WEIGHTS: "https://www.dropbox.com/s/1mlv31coewx8trd/vovnet99_ese_detectron2.pth?dl=1" 4 | BACKBONE: 5 | NAME: "build_fcos_vovnet_fpn_backbone" 6 | FREEZE_AT: 0 7 | VOVNET: 8 | CONV_BODY : "V-99-eSE" 9 | OUT_FEATURES: ["stage3", "stage4", "stage5"] 10 | FPN: 11 | IN_FEATURES: ["stage3", "stage4", "stage5"] 12 | SOLVER: 13 | STEPS: (210000, 250000) 14 | MAX_ITER: 270000 15 | OUTPUT_DIR: "output/fcos/V_99_ms_3x" 16 | -------------------------------------------------------------------------------- /configs/FCOS-Detection/vovnet/README.md: -------------------------------------------------------------------------------- 1 | # [VoVNet-v2](https://github.com/youngwanLEE/CenterMask) backbone networks in [FCOS](https://github.com/aim-uofa/adet) 2 | **Efficient Backbone Network for Object Detection and Segmentation**\ 3 | Youngwan Lee 4 | 5 | 6 | [[`vovnet-detectron2`](https://github.com/youngwanLEE/vovnet-detectron2)][[`CenterMask(code)`](https://github.com/youngwanLEE/CenterMask)] [[`VoVNet-v1(arxiv)`](https://arxiv.org/abs/1904.09730)] [[`VoVNet-v2(arxiv)`](https://arxiv.org/abs/1911.06667)] [[`BibTeX`](#CitingVoVNet)] 7 | 8 | 9 |
10 | 11 |
12 | 13 | 14 | ## Comparison with Faster R-CNN and ResNet 15 | 16 | ### Note 17 | 18 | We measure the inference time of all models with batch size 1 on the same V100 GPU machine. 19 | 20 | - pytorch1.3.1 21 | - CUDA 10.1 22 | - cuDNN 7.3 23 | 24 | 25 | |Method|Backbone|lr sched|inference time|AP|APs|APm|APl|download| 26 | |---|:--------:|:---:|:--:|--|----|----|---|--------| 27 | |Faster|R-50-FPN|3x|0.047|40.2|24.2|43.5|52.0|model \| metrics 28 | |Faster|**V2-39-FPN**|3x|0.047|42.7|27.1|45.6|54.0|model \| metrics 29 | |**FCOS**|**V2-39-FPN**|3x|0.045|43.5|28.1|47.2|54.5|model \| metrics 30 | || 31 | |Faster|R-101-FPN|3x|0.063|42.0|25.2|45.6|54.6|model \| metrics 32 | |Faster|**V2-57-FPN**|3x|0.054|43.3|27.5|46.7|55.3|model \| metrics 33 | |**FCOS**|**V2-57-FPN**|3x|0.051|44.4|28.8|47.2|56.3|model \| metrics 34 | || 35 | |Faster|X-101-FPN|3x|0.120|43.0|27.2|46.1|54.9|model \| metrics| 36 | |Faster|**V2-99-FPN**|3x|0.073|44.1|28.1|47.0|56.4|model \| metrics| 37 | |**FCOS**|**V2-99-FPN**|3x|0.070|45.2|29.2|48.4|57.3|model \| metrics| 38 | 39 | 40 | 41 | ## Citing VoVNet 42 | 43 | If you use VoVNet, please use the following BibTeX entry. 44 | 45 | ```BibTeX 46 | @inproceedings{lee2019energy, 47 | title = {An Energy and GPU-Computation Efficient Backbone Network for Real-Time Object Detection}, 48 | author = {Lee, Youngwan and Hwang, Joong-won and Lee, Sangrok and Bae, Yuseok and Park, Jongyoul}, 49 | booktitle = {Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition Workshops}, 50 | year = {2019} 51 | } 52 | 53 | @article{lee2019centermask, 54 | title={CenterMask: Real-Time Anchor-Free Instance Segmentation}, 55 | author={Lee, Youngwan and Park, Jongyoul}, 56 | journal={arXiv preprint arXiv:1911.06667}, 57 | year={2019} 58 | } 59 | ``` 60 | -------------------------------------------------------------------------------- /configs/FCPose/Base-FCPose.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 3 | RESNETS: 4 | DEPTH: 50 5 | KEYPOINT_ON: True 6 | FCPOSE_ON: True 7 | META_ARCHITECTURE: "OneStageDetector" 8 | BACKBONE: 9 | NAME: "build_fcos_resnet_fpn_backbone" 10 | RESNETS: 11 | OUT_FEATURES: ["res3", "res4", "res5"] 12 | FPN: 13 | IN_FEATURES: ["res3", "res4", "res5"] 14 | PROPOSAL_GENERATOR: 15 | NAME: "FCPose" 16 | DATASETS: 17 | TRAIN: ("keypoints_coco_2017_train",) 18 | TEST: ("keypoints_coco_2017_val",) 19 | SOLVER: 20 | IMS_PER_BATCH: 16 21 | BASE_LR: 0.01 22 | STEPS: (60000, 80000) 23 | MAX_ITER: 90000 24 | CLIP_GRADIENTS: 25 | ENABLED: True 26 | CLIP_VALUE: 1.0 27 | INPUT: 28 | MIN_SIZE_TRAIN_SAMPLING: "range" 29 | MIN_SIZE_TRAIN: [320,800] 30 | MAX_SIZE_TRAIN: 1333 31 | CROP: 32 | ENABLED: True 33 | TYPE: "relative_range" 34 | SIZE: [0.4, 0.4] 35 | TEST: 36 | EVAL_PERIOD: 5000 37 | -------------------------------------------------------------------------------- /configs/FCPose/README.md: -------------------------------------------------------------------------------- 1 | # FCPose: Fully Convolutional Multi-Person Pose Estimation with Dynamic Instance-Aware Convolutions 2 | 3 | 4 | 5 | # Installation & Quick Start 6 | First, follow the [default instruction](../../README.md#Installation) to install the project and [datasets/README.md](https://github.com/facebookresearch/detectron2/blob/master/datasets/README.md) 7 | set up the datasets (e.g., MS-COCO). 8 | 9 | For training on COCO, run: 10 | ``` 11 | python tools/train_net.py \ 12 | --num-gpus 8 \ 13 | --config-file configs/FCPose/R_50_3X.yaml \ 14 | --dist-url tcp://127.0.0.1:$(( RANDOM % 1000 + 50000 )) \ 15 | OUTPUT_DIR training_dir/R_50_3X 16 | ``` 17 | 18 | For evaluation on COCO, run: 19 | ``` 20 | python tools/train_net.py \ 21 | --num-gpus 8 \ 22 | --eval-only \ 23 | --config-file configs/FCPose/R_50_3X.yaml \ 24 | --dist-url tcp://127.0.0.1:$(( RANDOM % 1000 + 50000 )) \ 25 | OUTPUT_DIR training_dir/R_50_3X \ 26 | MODEL.WEIGHTS training_dir/R_50_3X/model_final.pth 27 | ``` 28 | 29 | 30 | ## Models 31 | ### COCO Instance Segmentation Baselines with SOLOv2 32 | 33 | Name | inf. time | box AP | mask AP | download 34 | --- |:---:|:---:|:---:|:---: 35 | [FCPose_R50_3x](R_50_3X.yaml) | 45ms | 57.9 | 65.2 | [model](https://huggingface.co/ZjuCv/AdelaiDet/blob/main/fcpose50.pth) 36 | [FCPose_R101_3x](R_101_3X.yaml) | 58ms | 58.7 | 67.0 | [model](https://huggingface.co/ZjuCv/AdelaiDet/blob/main/fcpose101.pth) 37 | 38 | 39 | *Disclaimer:* 40 | 41 | - Inference time is measured on 8 V100 GPUs. 42 | - This is a reimplementation. Thus, the numbers are slightly different from our original paper. 43 | - This is a alpha version. We will update our implement later, including adding real-time version FCPose and fixing the issue of the loss being nan. if you found you loss being nan when training, please try again. 44 | 45 | 46 | # Citations 47 | Please consider citing our papers in your publications if the project helps your research. BibTeX reference is as follows. 48 | ```BibTeX 49 | @inproceedings{mao2021fcpose, 50 | title={FCPose: Fully Convolutional Multi-Person Pose Estimation with Dynamic Instance-Aware Convolutions}, 51 | author={Mao, Weian and Tian, Zhi and Wang, Xinlong and Shen, Chunhua}, 52 | booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition}, 53 | pages={9034--9043}, 54 | year={2021} 55 | } 56 | ``` 57 | -------------------------------------------------------------------------------- /configs/FCPose/R_101_3X.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-FCPose.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" 4 | RESNETS: 5 | DEPTH: 101 6 | FCOS: 7 | NUM_CLASSES: 1 8 | SOLVER: 9 | STEPS: (180000, 240000) 10 | MAX_ITER: 270000 11 | # WARMUP_FACTOR: 1.0 / 3000 12 | # WARMUP_ITERS: 3000 -------------------------------------------------------------------------------- /configs/FCPose/R_50_3X.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-FCPose.yaml" 2 | SOLVER: 3 | STEPS: (180000, 240000) 4 | MAX_ITER: 270000 5 | MODEL: 6 | FCOS: 7 | NUM_CLASSES: 1 8 | # FCPOSE: 9 | # LOSS_WEIGHT_DIRECTION: 0.0 10 | # LOSS_WEIGHT_KEYPOINT: 0.0 11 | # BASIS_MODULE: 12 | # LOSS_WEIGHT: 0.0 -------------------------------------------------------------------------------- /configs/MEInst-InstanceSegmentation/Base-MEInst.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | MASK_ON: True 3 | META_ARCHITECTURE: "OneStageDetector" 4 | BACKBONE: 5 | NAME: "build_fcos_resnet_fpn_backbone" 6 | RESNETS: 7 | OUT_FEATURES: ["res3", "res4", "res5"] 8 | FPN: 9 | IN_FEATURES: ["res3", "res4", "res5"] 10 | PROPOSAL_GENERATOR: 11 | NAME: "MEInst" 12 | # PIXEL_MEAN: [102.9801, 115.9465, 122.7717] 13 | DATASETS: 14 | TRAIN: ("coco_2017_train",) 15 | TEST: ("coco_2017_val",) 16 | SOLVER: 17 | IMS_PER_BATCH: 16 18 | BASE_LR: 0.01 # Note that RetinaNet uses a different default learning rate 19 | STEPS: (60000, 80000) 20 | MAX_ITER: 90000 21 | INPUT: 22 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) 23 | VERSION: 2 24 | -------------------------------------------------------------------------------- /configs/MEInst-InstanceSegmentation/MEInst_R_50_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-MEInst.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | RESNETS: 5 | DEPTH: 50 6 | MEInst: 7 | DIM_MASK: 60 8 | MASK_SIZE: 28 9 | USE_DEFORMABLE: True 10 | LAST_DEFORMABLE: True 11 | TYPE_DEFORMABLE: "DCNv1" 12 | INPUT: 13 | MIN_SIZE_TRAIN: (800,) 14 | SOLVER: 15 | WARMUP_METHOD: "constant" 16 | WARMUP_FACTOR: 0.3333 17 | WARMUP_ITERS: 500 18 | OUTPUT_DIR: "output/MEInst/R_50_1x" 19 | -------------------------------------------------------------------------------- /configs/MEInst-InstanceSegmentation/MEInst_R_50_1x_none.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-MEInst.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | RESNETS: 5 | DEPTH: 50 6 | MEInst: 7 | DIM_MASK: 60 8 | MASK_SIZE: 28 9 | INPUT: 10 | MIN_SIZE_TRAIN: (800,) 11 | SOLVER: 12 | WARMUP_METHOD: "constant" 13 | WARMUP_FACTOR: 0.3333 14 | WARMUP_ITERS: 500 15 | OUTPUT_DIR: "output/MEInst/R_50_1x_none" 16 | -------------------------------------------------------------------------------- /configs/MEInst-InstanceSegmentation/MEInst_R_50_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-MEInst.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | RESNETS: 5 | DEPTH: 50 6 | MEInst: 7 | DIM_MASK: 60 8 | MASK_SIZE: 28 9 | USE_DEFORMABLE: True 10 | LAST_DEFORMABLE: True 11 | TYPE_DEFORMABLE: "DCNv1" 12 | SOLVER: 13 | STEPS: (180000, 240000) 14 | MAX_ITER: 270000 15 | OUTPUT_DIR: "output/MEInst/R_50_3x" 16 | -------------------------------------------------------------------------------- /configs/MEInst-InstanceSegmentation/MEInst_R_50_3x_512.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-MEInst.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | RESNETS: 5 | DEPTH: 50 6 | MEInst: 7 | DIM_MASK: 60 8 | MASK_SIZE: 28 9 | USE_DEFORMABLE: True 10 | LAST_DEFORMABLE: True 11 | TYPE_DEFORMABLE: "DCNv1" 12 | INPUT: 13 | MIN_SIZE_TRAIN: (384, 416, 448, 480, 512, 544) 14 | MIN_SIZE_TEST: 512 15 | SOLVER: 16 | STEPS: (180000, 240000) 17 | MAX_ITER: 270000 18 | OUTPUT_DIR: "output/MEInst/R_50_3x_512" 19 | -------------------------------------------------------------------------------- /configs/MEInst-InstanceSegmentation/README.md: -------------------------------------------------------------------------------- 1 | # Mask Encoding for Single Shot Instance Segmentation 2 | 3 | Rufeng Zhang, Zhi Tian, Chunhua Shen, Mingyu You, Youliang Yan 4 | 5 | [[`arXiv`](https://arxiv.org/abs/2003.11712)] [[`BibTeX`](#CitingMEInst)] 6 | 7 | ## Models 8 | 9 | ### COCO Instance Segmentation Baselines with [MEInst](https://arxiv.org/abs/2003.11712) 10 | 11 | Name | inf. time | box AP | mask AP | download 12 | --- |:---:|:---:|:---:|:---: 13 | [MEInst_R_50_1x_none](MEInst_R_50_1x_none.yaml) | 13 FPS | 39.5 | 30.7 | [model](https://huggingface.co/ZjuCv/AdelaiDet/blob/main/MEInst_R_50_1x_none.pth) 14 | [MEInst_R_50_1x](MEInst_R_50_1x.yaml) | 12 FPS | 40.1 | 31.7 | [model](https://huggingface.co/ZjuCv/AdelaiDet/blob/main/MEInst_R_50_1x.pth) 15 | [MEInst_R_50_3x](MEInst_R_50_3x.yaml) | 12 FPS | 43.6 | 34.5 | [model](https://huggingface.co/ZjuCv/AdelaiDet/blob/main/MEInst_R_50_3x.pth) 16 | [MEInst_R_50_3x_512](MEInst_R_50_3x_512.yaml) | 19 FPS | 40.8 | 32.2 | [model](https://huggingface.co/ZjuCv/AdelaiDet/blob/main/MEInst_R_50_3x_512.pth) 17 | 18 | *Inference time is measured on a NVIDIA 1080Ti with batch size 1.* 19 | 20 | ## Quick Start 21 | 22 | 1. Download the [matrix](https://huggingface.co/ZjuCv/AdelaiDet/blob/main/coco_2017_train_class_agnosticTrue_whitenTrue_sigmoidTrue_60.npz) file for mask encoding during training 23 | 2. Symlink the matrix path to datasets/components/xxx.npz, e.g., 24 | `coco/components/coco_2017_train_class_agnosticTrue_whitenTrue_sigmoidTrue_60.npz` 25 | 3. Follow [AdelaiDet](https://github.com/aim-uofa/AdelaiDet) for install, train and inference 26 | 27 | ### Step by step for Mask Encoding (Optional) 28 | 29 | We recommend to directly download the [matrix](https://huggingface.co/ZjuCv/AdelaiDet/blob/main/coco_2017_train_class_agnosticTrue_whitenTrue_sigmoidTrue_60.npz) file and use it, as it can already handle most cases. 30 | And we also provide tools to generate encoding matrix yourself. 31 | 32 | Example: 33 | 34 | * Generate encoding matrix 35 | 36 | `python adet/modeling/MEInst/LME/mask_generation.py` 37 | 38 | * Evaluate the quality of reconstruction 39 | 40 | `python adet/modeling/MEInst/LME/mask_evaluation.py` 41 | 42 | ## Citing MEInst 43 | 44 | If you use MEInst, please use the following BibTeX entry. 45 | 46 | ```BibTeX 47 | @inproceedings{zhang2020MEInst, 48 | title = {Mask Encoding for Single Shot Instance Segmentation}, 49 | author = {Zhang, Rufeng and Tian, Zhi and Shen, Chunhua and You, Mingyu and Yan, Youliang}, 50 | booktitle = {Proc. IEEE Conf. Computer Vision and Pattern Recognition (CVPR)}, 51 | year = {2020} 52 | } 53 | ``` 54 | 55 | ## License 56 | 57 | For academic use, this project is licensed under the 2-clause BSD License - see the LICENSE file for details. For commercial use, please contact [Chunhua Shen](https://cs.adelaide.edu.au/~chhshen/). 58 | -------------------------------------------------------------------------------- /configs/RCNN/550_R_50_FPN_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-RCNN.yaml" 2 | MODEL: 3 | WEIGHTS: "output/mask_rcnn/550_R_50_3x/model_final.pth" 4 | MASK_ON: True 5 | RESNETS: 6 | DEPTH: 50 7 | SOLVER: 8 | STEPS: (210000, 250000) 9 | MAX_ITER: 270000 10 | INPUT: 11 | MIN_SIZE_TRAIN: (440, 462, 484, 506, 528, 550) 12 | MAX_SIZE_TRAIN: 916 13 | MIN_SIZE_TEST: 550 14 | MAX_SIZE_TEST: 916 15 | OUTPUT_DIR: "output/mask_rcnn/550_R_50_3x" 16 | -------------------------------------------------------------------------------- /configs/RCNN/Base-RCNN.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | BACKBONE: 4 | NAME: "build_resnet_fpn_backbone" 5 | RESNETS: 6 | OUT_FEATURES: ["res2", "res3", "res4", "res5"] 7 | FPN: 8 | IN_FEATURES: ["res2", "res3", "res4", "res5"] 9 | ANCHOR_GENERATOR: 10 | SIZES: [[32], [64], [128], [256], [512]] # One size for each in feature map 11 | ASPECT_RATIOS: [[0.5, 1.0, 2.0]] # Three aspect ratios (same for all in feature maps) 12 | RPN: 13 | IN_FEATURES: ["p2", "p3", "p4", "p5", "p6"] 14 | PRE_NMS_TOPK_TRAIN: 2000 # Per FPN level 15 | PRE_NMS_TOPK_TEST: 1000 # Per FPN level 16 | # Detectron1 uses 2000 proposals per-batch, 17 | # (See "modeling/rpn/rpn_outputs.py" for details of this legacy issue) 18 | # which is approximately 1000 proposals per-image since the default batch size for FPN is 2. 19 | POST_NMS_TOPK_TRAIN: 1000 20 | POST_NMS_TOPK_TEST: 1000 21 | ROI_HEADS: 22 | NAME: "StandardROIHeads" 23 | IN_FEATURES: ["p2", "p3", "p4", "p5"] 24 | ROI_BOX_HEAD: 25 | NAME: "FastRCNNConvFCHead" 26 | NUM_FC: 2 27 | POOLER_RESOLUTION: 7 28 | ROI_MASK_HEAD: 29 | NAME: "MaskRCNNConvUpsampleHead" 30 | NUM_CONV: 4 31 | POOLER_RESOLUTION: 14 32 | DATASETS: 33 | TRAIN: ("coco_2017_train",) 34 | TEST: ("coco_2017_val",) 35 | SOLVER: 36 | IMS_PER_BATCH: 16 37 | BASE_LR: 0.02 38 | STEPS: (60000, 80000) 39 | MAX_ITER: 90000 40 | INPUT: 41 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) 42 | VERSION: 2 43 | -------------------------------------------------------------------------------- /configs/RCNN/LVIS/R_50_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-LVIS.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | RESNETS: 5 | DEPTH: 50 6 | OUTPUT_DIR: "output/lvis/mask_rcnn/R_50_1x" 7 | -------------------------------------------------------------------------------- /configs/RCNN/R_101_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-RCNN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" 4 | MASK_ON: True 5 | RESNETS: 6 | DEPTH: 101 7 | SOLVER: 8 | STEPS: (210000, 250000) 9 | MAX_ITER: 270000 10 | -------------------------------------------------------------------------------- /configs/SOLOv2/Base-SOLOv2.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "SOLOv2" 3 | MASK_ON: True 4 | BACKBONE: 5 | NAME: "build_resnet_fpn_backbone" 6 | RESNETS: 7 | OUT_FEATURES: ["res2", "res3", "res4", "res5"] 8 | FPN: 9 | IN_FEATURES: ["res2", "res3", "res4", "res5"] 10 | DATASETS: 11 | TRAIN: ("coco_2017_train",) 12 | TEST: ("coco_2017_val",) 13 | SOLVER: 14 | IMS_PER_BATCH: 16 15 | BASE_LR: 0.01 16 | WARMUP_FACTOR: 0.01 17 | WARMUP_ITERS: 1000 18 | STEPS: (60000, 80000) 19 | MAX_ITER: 90000 20 | INPUT: 21 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) 22 | MASK_FORMAT: "bitmask" 23 | VERSION: 2 24 | 25 | 26 | -------------------------------------------------------------------------------- /configs/SOLOv2/R101_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-SOLOv2.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" 4 | RESNETS: 5 | DEPTH: 101 6 | SOLVER: 7 | STEPS: (210000, 250000) 8 | MAX_ITER: 270000 9 | -------------------------------------------------------------------------------- /configs/SOLOv2/R50_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-SOLOv2.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | RESNETS: 5 | DEPTH: 50 6 | SOLVER: 7 | STEPS: (210000, 250000) 8 | MAX_ITER: 270000 9 | -------------------------------------------------------------------------------- /configs/SOLOv2/README.md: -------------------------------------------------------------------------------- 1 | # SOLOv2: Dynamic and Fast Instance Segmentation 2 | 3 | 4 | > [**SOLOv2: Dynamic and Fast Instance Segmentation**](https://arxiv.org/abs/2003.10152), 5 | > Xinlong Wang, Rufeng Zhang, Tao Kong, Lei Li, Chunhua Shen 6 | > In: Proc. Advances in Neural Information Processing Systems (NeurIPS), 2020 7 | > *arXiv preprint ([arXiv 2003.10152](https://arxiv.org/abs/2003.10152))* 8 | 9 | 10 | 11 | # Installation & Quick Start 12 | First, follow the [default instruction](../../README.md#Installation) to install the project and [datasets/README.md](https://github.com/facebookresearch/detectron2/blob/master/datasets/README.md) 13 | set up the datasets (e.g., MS-COCO). 14 | 15 | For demo, run the following command lines: 16 | ``` 17 | wget https://cloudstor.aarnet.edu.au/plus/s/chF3VKQT4RDoEqC/download -O SOLOv2_R50_3x.pth 18 | python demo/demo.py \ 19 | --config-file configs/SOLOv2/R50_3x.yaml \ 20 | --input input1.jpg input2.jpg \ 21 | --opts MODEL.WEIGHTS SOLOv2_R50_3x.pth 22 | ``` 23 | 24 | For training on COCO, run: 25 | ``` 26 | OMP_NUM_THREADS=1 python tools/train_net.py \ 27 | --config-file configs/SOLOv2/R50_3x.yaml \ 28 | --num-gpus 8 \ 29 | OUTPUT_DIR training_dir/SOLOv2_R50_3x 30 | ``` 31 | 32 | For evaluation on COCO, run: 33 | ``` 34 | OMP_NUM_THREADS=1 python tools/train_net.py \ 35 | --config-file configs/SOLOv2/R50_3x.yaml \ 36 | --eval-only \ 37 | --num-gpus 8 \ 38 | OUTPUT_DIR training_dir/SOLOv2_R50_3x \ 39 | MODEL.WEIGHTS training_dir/SOLOv2_R50_3x/model_final.pth 40 | ``` 41 | 42 | 43 | ## Models 44 | ### COCO Instance Segmentation Baselines with SOLOv2 45 | 46 | Name | inf. time | train. time | Mem | box AP | mask AP | download 47 | --- |:---:|:---:|:---:|:---:|:---:|:---: 48 | [SOLOv2_R50_3x](R50_3x.yaml) | 47ms | ~25h(36 epochs) | 3.7GB | - | 37.6 | [model](https://huggingface.co/ZjuCv/AdelaiDet/blob/main/SOLOv2_R50_3x.pth) 49 | [SOLOv2_R101_3x](R101_3x.yaml) | 61ms | ~30h(36 epochs) | 4.7GB | - | 39.0 | [model](https://huggingface.co/ZjuCv/AdelaiDet/blob/main/SOLOv2_R101_3x.pth) 50 | 51 | 52 | *Disclaimer:* 53 | 54 | - All models are trained with multi-scale data augmentation. 55 | - Inference time is measured on a single V100 GPU. Training time is measured on 8 V100 GPUs. 56 | - This is a reimplementation. Thus, the numbers are slightly different from our original paper (within 0.3% in mask AP). 57 | - The implementation on mmdetection is available at [https://github.com/WXinlong/SOLO](https://github.com/WXinlong/SOLO). 58 | 59 | 60 | # Citations 61 | Please consider citing our papers in your publications if the project helps your research. BibTeX reference is as follows. 62 | ```BibTeX 63 | @inproceedings{wang2020solo, 64 | title = {{SOLO}: Segmenting Objects by Locations}, 65 | author = {Wang, Xinlong and Kong, Tao and Shen, Chunhua and Jiang, Yuning and Li, Lei}, 66 | booktitle = {Proc. Eur. Conf. Computer Vision (ECCV)}, 67 | year = {2020} 68 | } 69 | 70 | ``` 71 | 72 | ```BibTeX 73 | @inproceedings{wang2020solov2, 74 | title = {{SOLOv2}: Dynamic and Fast Instance Segmentation}, 75 | author = {Wang, Xinlong and Zhang, Rufeng and Kong, Tao and Li, Lei and Shen, Chunhua}, 76 | booktitle = {Proc. Advances in Neural Information Processing Systems (NeurIPS)}, 77 | year = {2020} 78 | } 79 | ``` 80 | 81 | ```BibTeX 82 | @article{wang2021solo, 83 | title = {{SOLO}: A Simple Framework for Instance Segmentation}, 84 | author = {Wang, Xinlong and Zhang, Rufeng and Shen, Chunhua and Kong, Tao and Li, Lei}, 85 | journal = {IEEE T. Pattern Analysis and Machine Intelligence (TPAMI)}, 86 | year = {2021} 87 | } 88 | ``` 89 | -------------------------------------------------------------------------------- /datasets/gen_coco_person.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | import os 4 | import json 5 | error_list = ['23382.png', '23441.png', '20714.png', '20727.png', '23300.png', '21200.png'] 6 | 7 | def mask2box(mask): 8 | index = np.argwhere(mask == 1) 9 | rows = index[:, 0] 10 | clos = index[:, 1] 11 | y1 = int(np.min(rows)) # y 12 | x1 = int(np.min(clos)) # x 13 | y2 = int(np.max(rows)) 14 | x2 = int(np.max(clos)) 15 | return (x1, y1, x2, y2) 16 | 17 | def gen_coco(phase): 18 | result = { 19 | "info": {"description": "PIC2.0 dataset."}, 20 | "categories": [ 21 | {"supercategory": "none", "id": 1, "name": "person"} 22 | ] 23 | } 24 | out_json = phase +'_person.json' 25 | store_segmentation = True 26 | 27 | images_info = [] 28 | labels_info = [] 29 | img_id = 0 30 | files = tuple(open("pic/list5/"+phase+'_id', 'r')) 31 | files = (_.strip() for _ in files) 32 | 33 | for index, image_name in enumerate(files): 34 | image_name = image_name+".png" 35 | print(index, image_name) 36 | if image_name in error_list: 37 | continue 38 | instance = cv2.imread(os.path.join('instance', phase, image_name), flags=cv2.IMREAD_GRAYSCALE) 39 | semantic = cv2.imread(os.path.join('semantic', phase, image_name), flags=cv2.IMREAD_GRAYSCALE) 40 | # print(instance.shape, semantic.shape) 41 | h = instance.shape[0] 42 | w = instance.shape[1] 43 | images_info.append( 44 | { 45 | "file_name": image_name[:-4]+'.jpg', 46 | "height": h, 47 | "width": w, 48 | "id": index 49 | } 50 | ) 51 | instance_max_num = instance.max() 52 | instance_ids = np.unique(instance) 53 | for instance_id in instance_ids: 54 | if instance_id == 0: 55 | continue 56 | instance_part = instance == instance_id 57 | object_pos = instance_part.nonzero() 58 | # category_id_ = int(semantic[object_pos[0][0], object_pos[1][0]]) 59 | category_id = int(np.max(semantic[object_pos[0], object_pos[1]])) 60 | # assert category_id_ == category_id, (category_id_, category_id) 61 | if category_id != 1: 62 | continue 63 | area = int(instance_part.sum()) 64 | x1, y1, x2, y2 = mask2box(instance_part) 65 | w = x2 - x1 + 1 66 | h = y2 - y1 + 1 67 | segmentation = [] 68 | if store_segmentation: 69 | contours, hierarchy = cv2.findContours((instance_part * 255).astype(np.uint8), cv2.RETR_TREE, 70 | cv2.CHAIN_APPROX_SIMPLE) 71 | for contour in contours: 72 | contour = contour.flatten().tolist() 73 | if len(contour) > 4: 74 | segmentation.append(contour) 75 | if len(segmentation) == 0: 76 | print('error') 77 | continue 78 | labels_info.append( 79 | { 80 | "segmentation": segmentation, # poly 81 | "area": area, # segmentation area 82 | "iscrowd": 0, 83 | "image_id": index, 84 | "bbox": [x1, y1, w, h], 85 | "category_id": category_id, 86 | "id": img_id 87 | }, 88 | ) 89 | img_id += 1 90 | # break 91 | result["images"] = images_info 92 | result["annotations"] = labels_info 93 | with open('pic/annotations/' + out_json, 'w') as f: 94 | json.dump(result, f, indent=4) 95 | 96 | if __name__ == "__main__": 97 | if not os.path.exists('pic/annotations/'): 98 | os.mkdirs('pic/annotations/') 99 | gen_coco("train") 100 | gen_coco("val") 101 | #gen_coco("test") 102 | -------------------------------------------------------------------------------- /datasets/prepare_thing_sem_from_instance.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 3 | 4 | import time 5 | import functools 6 | import multiprocessing as mp 7 | import numpy as np 8 | import os 9 | import argparse 10 | from pycocotools.coco import COCO 11 | from pycocotools import mask as maskUtils 12 | 13 | from detectron2.data.datasets.builtin_meta import _get_coco_instances_meta 14 | 15 | 16 | def annToRLE(ann, img_size): 17 | h, w = img_size 18 | segm = ann['segmentation'] 19 | if type(segm) == list: 20 | # polygon -- a single object might consist of multiple parts 21 | # we merge all parts into one mask rle code 22 | rles = maskUtils.frPyObjects(segm, h, w) 23 | rle = maskUtils.merge(rles) 24 | elif type(segm['counts']) == list: 25 | # uncompressed RLE 26 | rle = maskUtils.frPyObjects(segm, h, w) 27 | else: 28 | # rle 29 | rle = ann['segmentation'] 30 | return rle 31 | 32 | 33 | def annToMask(ann, img_size): 34 | rle = annToRLE(ann, img_size) 35 | m = maskUtils.decode(rle) 36 | return m 37 | 38 | 39 | def _process_instance_to_semantic(anns, output_semantic, img, categories): 40 | img_size = (img["height"], img["width"]) 41 | output = np.zeros(img_size, dtype=np.uint8) 42 | for ann in anns: 43 | mask = annToMask(ann, img_size) 44 | output[mask == 1] = categories[ann["category_id"]] + 1 45 | # save as compressed npz 46 | np.savez_compressed(output_semantic, mask=output) 47 | # Image.fromarray(output).save(output_semantic) 48 | 49 | 50 | def create_coco_semantic_from_instance(instance_json, sem_seg_root, categories): 51 | """ 52 | Create semantic segmentation annotations from panoptic segmentation 53 | annotations, to be used by PanopticFPN. 54 | 55 | It maps all thing categories to contiguous ids starting from 1, and maps all unlabeled pixels to class 0 56 | 57 | Args: 58 | instance_json (str): path to the instance json file, in COCO's format. 59 | sem_seg_root (str): a directory to output semantic annotation files 60 | categories (dict): category metadata. Each dict needs to have: 61 | "id": corresponds to the "category_id" in the json annotations 62 | "isthing": 0 or 1 63 | """ 64 | os.makedirs(sem_seg_root, exist_ok=True) 65 | 66 | coco_detection = COCO(instance_json) 67 | 68 | def iter_annotations(): 69 | for img_id in coco_detection.getImgIds(): 70 | anns_ids = coco_detection.getAnnIds(img_id) 71 | anns = coco_detection.loadAnns(anns_ids) 72 | img = coco_detection.loadImgs(int(img_id))[0] 73 | file_name = os.path.splitext(img["file_name"])[0] 74 | output = os.path.join(sem_seg_root, file_name + '.npz') 75 | yield anns, output, img 76 | 77 | # single process 78 | # print("Start writing to {} ...".format(sem_seg_root)) 79 | # start = time.time() 80 | # for anno, oup, img in iter_annotations(): 81 | # _process_instance_to_semantic( 82 | # anno, oup, img, categories) 83 | # print("Finished. time: {:.2f}s".format(time.time() - start)) 84 | # return 85 | 86 | pool = mp.Pool(processes=max(mp.cpu_count() // 2, 4)) 87 | 88 | print("Start writing to {} ...".format(sem_seg_root)) 89 | start = time.time() 90 | pool.starmap( 91 | functools.partial( 92 | _process_instance_to_semantic, 93 | categories=categories), 94 | iter_annotations(), 95 | chunksize=100, 96 | ) 97 | print("Finished. time: {:.2f}s".format(time.time() - start)) 98 | 99 | 100 | def get_parser(): 101 | parser = argparse.ArgumentParser(description="Keep only model in ckpt") 102 | parser.add_argument( 103 | "--dataset-name", 104 | default="coco", 105 | help="dataset to generate", 106 | ) 107 | return parser 108 | 109 | 110 | if __name__ == "__main__": 111 | args = get_parser().parse_args() 112 | dataset_dir = os.path.join(os.path.dirname(__file__), args.dataset_name) 113 | if args.dataset_name == "coco": 114 | thing_id_to_contiguous_id = _get_coco_instances_meta()["thing_dataset_id_to_contiguous_id"] 115 | split_name = 'train2017' 116 | annotation_name = "annotations/instances_{}.json" 117 | else: 118 | thing_id_to_contiguous_id = {1: 0} 119 | split_name = 'train' 120 | annotation_name = "annotations/{}_person.json" 121 | for s in ["train2017"]: 122 | create_coco_semantic_from_instance( 123 | os.path.join(dataset_dir, "annotations/instances_{}.json".format(s)), 124 | os.path.join(dataset_dir, "thing_{}".format(s)), 125 | thing_id_to_contiguous_id 126 | ) 127 | -------------------------------------------------------------------------------- /datasets/prepare_thing_sem_from_lvis.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 3 | 4 | import time 5 | import functools 6 | import multiprocessing as mp 7 | import numpy as np 8 | import os 9 | from lvis import LVIS 10 | from pycocotools import mask as maskUtils 11 | 12 | 13 | def annToRLE(ann, img_size): 14 | h, w = img_size 15 | segm = ann['segmentation'] 16 | if type(segm) == list: 17 | # polygon -- a single object might consist of multiple parts 18 | # we merge all parts into one mask rle code 19 | rles = maskUtils.frPyObjects(segm, h, w) 20 | rle = maskUtils.merge(rles) 21 | elif type(segm['counts']) == list: 22 | # uncompressed RLE 23 | rle = maskUtils.frPyObjects(segm, h, w) 24 | else: 25 | # rle 26 | rle = ann['segmentation'] 27 | return rle 28 | 29 | 30 | def annToMask(ann, img_size): 31 | rle = annToRLE(ann, img_size) 32 | m = maskUtils.decode(rle) 33 | return m 34 | 35 | 36 | def _process_instance_to_semantic(anns, output_semantic, img): 37 | img_size = (img["height"], img["width"]) 38 | output = np.zeros(img_size, dtype=np.uint8) 39 | for ann in anns: 40 | mask = annToMask(ann, img_size) 41 | output[mask == 1] = ann["category_id"] // 5 42 | # save as compressed npz 43 | np.savez_compressed(output_semantic, mask=output) 44 | # Image.fromarray(output).save(output_semantic) 45 | 46 | 47 | def create_lvis_semantic_from_instance(instance_json, sem_seg_root): 48 | """ 49 | Create semantic segmentation annotations from panoptic segmentation 50 | annotations, to be used by PanopticFPN. 51 | 52 | It maps all thing categories to contiguous ids starting from 1, and maps all unlabeled pixels to class 0 53 | 54 | Args: 55 | instance_json (str): path to the instance json file, in COCO's format. 56 | sem_seg_root (str): a directory to output semantic annotation files 57 | """ 58 | os.makedirs(sem_seg_root, exist_ok=True) 59 | 60 | lvis_detection = LVIS(instance_json) 61 | 62 | def iter_annotations(): 63 | for img_id in lvis_detection.get_img_ids(): 64 | anns_ids = lvis_detection.get_ann_ids([img_id]) 65 | anns = lvis_detection.load_anns(anns_ids) 66 | img = lvis_detection.load_imgs([img_id])[0] 67 | file_name = os.path.splitext(img["file_name"])[0] 68 | output = os.path.join(sem_seg_root, file_name + '.npz') 69 | yield anns, output, img 70 | 71 | # # single process 72 | # print("Start writing to {} ...".format(sem_seg_root)) 73 | # start = time.time() 74 | # for anno, oup, img in iter_annotations(): 75 | # _process_instance_to_semantic( 76 | # anno, oup, img) 77 | # print("Finished. time: {:.2f}s".format(time.time() - start)) 78 | # return 79 | 80 | pool = mp.Pool(processes=max(mp.cpu_count() // 2, 4)) 81 | 82 | print("Start writing to {} ...".format(sem_seg_root)) 83 | start = time.time() 84 | pool.starmap( 85 | functools.partial( 86 | _process_instance_to_semantic), 87 | iter_annotations(), 88 | chunksize=100, 89 | ) 90 | print("Finished. time: {:.2f}s".format(time.time() - start)) 91 | 92 | 93 | if __name__ == "__main__": 94 | dataset_dir = os.path.join(os.path.dirname(__file__), "lvis") 95 | for s in ["train"]: 96 | create_lvis_semantic_from_instance( 97 | os.path.join(dataset_dir, "lvis_v0.5_{}.json".format(s)), 98 | os.path.join(dataset_dir, "thing_{}".format(s)), 99 | ) 100 | -------------------------------------------------------------------------------- /docker/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM nvidia/cuda:10.2-devel-ubuntu18.04 2 | 3 | RUN apt-get update && apt-get install -y libglib2.0-0 && apt-get clean 4 | 5 | RUN apt-get install -y wget htop byobu git gcc g++ vim libsm6 libxext6 libxrender-dev lsb-core 6 | 7 | RUN cd /root && wget https://repo.anaconda.com/archive/Anaconda3-2020.07-Linux-x86_64.sh 8 | 9 | RUN cd /root && bash Anaconda3-2020.07-Linux-x86_64.sh -b -p ./anaconda3 10 | 11 | RUN bash -c "source /root/anaconda3/etc/profile.d/conda.sh && conda install -y pytorch==1.5.0 torchvision cudatoolkit=10.2 -c pytorch" 12 | 13 | RUN bash -c "/root/anaconda3/bin/conda init bash" 14 | 15 | WORKDIR /root 16 | RUN mkdir code 17 | WORKDIR code 18 | 19 | RUN git clone https://github.com/facebookresearch/detectron2.git 20 | RUN bash -c "source /root/anaconda3/etc/profile.d/conda.sh && conda activate base && cd detectron2 && python setup.py build develop" 21 | 22 | RUN git clone https://github.com/aim-uofa/AdelaiDet.git adet 23 | 24 | WORKDIR adet 25 | RUN bash -c "source /root/anaconda3/etc/profile.d/conda.sh && conda activate base && python setup.py build develop" 26 | 27 | RUN rm /root/Anaconda3-2020.07-Linux-x86_64.sh 28 | -------------------------------------------------------------------------------- /docs/.gitignore: -------------------------------------------------------------------------------- 1 | _build -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | .. AdelaiDet documentation master file, created by 2 | sphinx-quickstart on Wed Feb 26 15:24:04 2020. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | Welcome to AdelaiDet's documentation! 7 | ===================================== 8 | 9 | .. toctree:: 10 | :maxdepth: 2 11 | 12 | 13 | modules/index -------------------------------------------------------------------------------- /docs/modules/checkpoint.rst: -------------------------------------------------------------------------------- 1 | adet.checkpoint package 2 | ============================= 3 | 4 | .. automodule:: adet.checkpoint 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: -------------------------------------------------------------------------------- /docs/modules/config.rst: -------------------------------------------------------------------------------- 1 | adet.config package 2 | ========================= 3 | 4 | .. automodule:: adet.config 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | :inherited-members: 9 | 10 | 11 | Config References 12 | ----------------- 13 | 14 | .. literalinclude:: ../../adet/config/defaults.py 15 | :language: python 16 | :linenos: 17 | :lines: 4- -------------------------------------------------------------------------------- /docs/modules/data.rst: -------------------------------------------------------------------------------- 1 | adet.data package 2 | ======================= 3 | 4 | .. automodule:: adet.data 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: -------------------------------------------------------------------------------- /docs/modules/index.rst: -------------------------------------------------------------------------------- 1 | API Documentation 2 | ================== 3 | 4 | .. toctree:: 5 | 6 | checkpoint 7 | config 8 | data 9 | layers 10 | modeling 11 | utils -------------------------------------------------------------------------------- /docs/modules/layers.rst: -------------------------------------------------------------------------------- 1 | adet.layers package 2 | ========================= 3 | 4 | .. automodule:: adet.layers 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: -------------------------------------------------------------------------------- /docs/modules/modeling.rst: -------------------------------------------------------------------------------- 1 | adet.modeling package 2 | =========================== 3 | 4 | .. automodule:: adet.modeling 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | 9 | adet.modeling.backbone module 10 | --------------------------------------- 11 | 12 | .. automodule:: adet.modeling.backbone 13 | :members: 14 | :undoc-members: 15 | :show-inheritance: 16 | 17 | adet.modeling.poolers module 18 | --------------------------------------- 19 | 20 | .. automodule:: adet.modeling.poolers 21 | :members: 22 | :undoc-members: 23 | :show-inheritance: 24 | 25 | 26 | Model Registries 27 | ----------------- 28 | 29 | These are different registries provided in modeling. 30 | Each registry provide you the ability to replace it with your customized component, 31 | without having to modify detectron2's code. 32 | 33 | Note that it is impossible to allow users to customize any line of code directly. 34 | Even just to add one line at some place, 35 | you'll likely need to find out the smallest registry which contains that line, 36 | and register your component to that registry. 37 | 38 | 39 | * detectron2.modeling.META_ARCH_REGISTRY 40 | * detectron2.modeling.BACKBONE_REGISTRY 41 | * detectron2.modeling.PROPOSAL_GENERATOR_REGISTRY 42 | * detectron2.modeling.RPN_HEAD_REGISTRY 43 | * detectron2.modeling.ANCHOR_GENERATOR_REGISTRY 44 | * detectron2.modeling.ROI_HEADS_REGISTRY 45 | * detectron2.modeling.ROI_BOX_HEAD_REGISTRY 46 | * detectron2.modeling.ROI_MASK_HEAD_REGISTRY 47 | * detectron2.modeling.ROI_KEYPOINT_HEAD_REGISTRY -------------------------------------------------------------------------------- /docs/modules/utils.rst: -------------------------------------------------------------------------------- 1 | adet.utils package 2 | ======================== 3 | 4 | adet.utils.comm module 5 | -------------------------------- 6 | 7 | .. automodule:: adet.utils.comm 8 | :members: 9 | :undoc-members: 10 | :show-inheritance: 11 | 12 | adet.utils.measures module 13 | ---------------------------- 14 | 15 | .. automodule:: adet.utils.measures 16 | :members: 17 | :undoc-members: 18 | :show-inheritance: 19 | -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | termcolor 2 | numpy 3 | tqdm 4 | docutils>=0.14 5 | Sphinx>=1.7 6 | recommonmark==0.4.0 7 | sphinx_rtd_theme 8 | mock 9 | matplotlib 10 | termcolor 11 | yacs 12 | tabulate 13 | cloudpickle 14 | Pillow==8.1.1 15 | future 16 | requests 17 | six 18 | https://download.pytorch.org/whl/nightly/cpu/torch-1.3.0.dev20191010%2Bcpu-cp37-cp37m-linux_x86_64.whl 19 | https://download.pytorch.org/whl/nightly/cpu/torchvision-0.5.0.dev20191008%2Bcpu-cp37-cp37m-linux_x86_64.whl 20 | git+git://github.com/facebookresearch/fvcore.git 21 | https://dl.fbaipublicfiles.com/detectron2/wheels/cu101/detectron2-0.1.1%2Bcu101-cp37-cp37m-linux_x86_64.whl -------------------------------------------------------------------------------- /onnx/.gitignore: -------------------------------------------------------------------------------- 1 | ncnn 2 | -------------------------------------------------------------------------------- /onnx/README.md: -------------------------------------------------------------------------------- 1 | 2 | # update history 3 | 4 | 2020.05.07: 5 | 6 | 1. add [onnxruntime](https://github.com/microsoft/onnxruntime) verification demo 7 | 8 | 2. add RT model all-in-one script 9 | 10 | # Export to onnx/caffe/ncnn 11 | 12 | Refer all-in-one script: [pytorch-onnx-caffe-ncnn.sh](pytorch-onnx-caffe-ncnn.sh) (BN instead of GN used in the FCOS head) 13 | 14 | Refer another all-in-one script: [pytorch-onnx-caffe-ncnn-rt.sh](pytorch-onnx-caffe-ncnn-rt.sh) for the RT model alone with onnxruntime verification demo 15 | 16 | note: to convert model to *Caffe* and *NCNN* requires BN in the FCOS head 17 | 18 | # Normalization in the FCOS head 19 | The normalization in FOCS head is GroupNorm (GN) by default as shown in the original paper. Unlike BN, GN caculates the mean and variance of features online. Thus, it costs extra time and memory. 20 | On the other hand, as BN can be merged into the previous convolution layer, BN introduces no computation overhead during inference. The following instruction introduces a simple method to measure the impact of GN on speed. 21 | 22 | * prepare some images (for example 1000) in folder output/test/input/ 23 | 24 | * include time measurement code in demo/demo.py 25 | 26 | * GN + GPU: total execution time 285.1398s, average 0.0696s per image 27 | 28 | ```sh 29 | python demo/demo --config-file configs/FCOS-Detection/R_50_1x.yaml --input output/test/input/ --output output/test/output/ --opts MODEL.WEIGHTS weights/fcos_R_50_1x.pth 30 | ``` 31 | * BN + GPU: total execution time 257.4333s, average 0.0628s per image 32 | ```sh 33 | python demo/demo.py --config-file configs/FCOS-Detection/R_50_1x.yaml --input output/test/input/ --output output/test/output/ --opts MODEL.WEIGHTS weights/fcos_R_50_1x.pth MODEL.FCOS.NORM BN 34 | ``` 35 | * GN + CPU: total execution time 1125.4375s, average 1.0112s per image 36 | ```sh 37 | python demo/demo.py --config-file configs/FCOS-Detection/R_50_1x.yaml --input output/test/input/ --output output/test/output/ --opts MODEL.WEIGHTS weights/fcos_R_50_1x.pth MODEL.DEVICE cpu 38 | ``` 39 | * BN + CPU: total execution time 1068.0550s, average 0.9596s per image 40 | ```sh 41 | python demo/demo.py --config-file configs/FCOS-Detection/R_50_1x.yaml --input output/test/input/ --output output/test/output/ --opts MODEL.WEIGHTS weights/fcos_R_50_1x.pth MODEL.DEVICE cpu MODEL.FCOS.NORM BN 42 | ``` 43 | 44 | Tested on 2080ti. The result shows 5~10% slower for GN compared against BN. 45 | 46 | # Result compare between pytorch and NCNN 47 | 48 | * pytorch version: run demo/demo.py 49 | * ncnn version: refer to https://github.com/blueardour/ncnn/blob/master/examples/fcos.cpp 50 | 51 | Example: take coco/test2017/000000144041.jpg as the test image 52 | 53 | ``` 54 | #> cd AdelaiDet 55 | 56 | #> mkdir -p output/test/ 57 | 58 | #> cp $COCO/test2017/000000144041.jpg output/test/input.jpg 59 | 60 | #> python demo/demo.py --config-file configs/FCOS-Detection/R_50_1x.yaml --input output/test/input.jpg --output output/test/output.jpg --opts MODEL.WEIGHTS /data/pretrained/pytorch/fcos/FCOS_R_50_1x_bn_head.pth MODEL.FCOS.NORM "BN" MODEL.DEVICE cpu 61 | 62 | 63 | #> cd $NCNN_ROOT # (build the project ahead) 64 | 65 | #> cd build-host-gcc-linux/examples 66 | 67 | #> ln -s /data/pretrained/ncnn/fcos/FCOS_R_50_1x_bn_head-update-opt.bin net.bin # (refer pytorch-onnx-caffe-ncnn.sh to generate the file) 68 | 69 | #> ln -s /data/pretrained/ncnn/fcos/FCOS_R_50_1x_bn_head-update-opt.param net.param (refer pytorch-onnx-caffe-ncnn.sh to generate the file) 70 | 71 | #> ./fcos /workspace/git/uofa-AdelaiDet/output/test/input.jpg net.param net.bin 800 1088 72 | ``` 73 | 74 | -------------------------------------------------------------------------------- /onnx/pytorch-onnx-caffe-ncnn-rt.sh: -------------------------------------------------------------------------------- 1 | 2 | if [ "$1" == "update" ]; 3 | then 4 | update='update' 5 | else 6 | update= 7 | fi 8 | 9 | if [ "$1" == "test" ]; 10 | then 11 | onnx_rt='test' 12 | else 13 | onnx_rt= 14 | fi 15 | 16 | caffe_repo=/data/pretrained/caffe/fcos/ 17 | onnx_repo=/data/pretrained/onnx/fcos/ 18 | pytorch_repo=/data/pretrained/pytorch/fcos/ 19 | ncnn_repo=/data/pretrained/ncnn/fcos/ 20 | 21 | config=configs/FCOS-Detection/FCOS_RT/MS_R_50_4x_syncbn_bn_head.yaml 22 | case=FCOS_RT_MS_R_50_4x_syncbn_bn_head 23 | 24 | if [ ! -e $onnx_repo/$case.onnx ] || [ "$update" != "" ]; 25 | then 26 | cd /workspace/git/uofa-AdelaiDet/ # folder of project https://github.com/aim-uofa/AdelaiDet 27 | pwd 28 | python -V # ensure python3.x 29 | python onnx/export_model_to_onnx.py \ 30 | --config-file $config \ 31 | --output $onnx_repo/$case.onnx \ 32 | --width 768 --height 640 \ 33 | --opts MODEL.WEIGHTS $pytorch_repo/$case.pth MODEL.FCOS.NORM "BN" MODEL.DEVICE cpu 34 | if [ $? -ne 0 ]; then exit; fi 35 | fi 36 | 37 | if [ ! -e $onnx_repo/$case-update.onnx ] || [ "$update" != "" ]; 38 | then 39 | # advise version 1.3.0 40 | cd /workspace/git/onnx-simplifier # folder of project: https://github.com/daquexian/onnx-simplifier 41 | pwd 42 | python -V # ensure python3.x 43 | python -m onnxsim $onnx_repo/$case.onnx $onnx_repo/$case-update.onnx 44 | if [ $? -ne 0 ]; then exit; fi 45 | fi 46 | 47 | # optional 48 | if [ ! -e $caffe_repo/$case-update.caffemodel ]; 49 | then 50 | # switch to python2 and ensure caffe (with the upsample patch) ready 51 | # refer: https://github.com/blueardour/caffe.git for patched version 52 | cd /workspace/git/onnx2caffe # folder of project: https://github.com/MTlab/onnx2caffe 53 | pwd 54 | python -V 55 | python convertCaffe.py $onnx_repo/$case-update.onnx $caffe_repo/$case-update.prototxt $caffe_repo/$case-update.caffemodel 56 | if [ $? -ne 0 ]; then exit; fi 57 | fi 58 | 59 | # ncnn 60 | if [ ! -e $ncnn_repo/$case-update-opt.bin ] || [ "$update" != "" ] 61 | then 62 | cd /workspace/git/ncnn # folder of project: https://github.com/Tencent/ncnn 63 | pwd 64 | mkdir -p $ncnn_repo 65 | ./build-host-gcc-linux/tools/onnx/onnx2ncnn $onnx_repo/$case-update.onnx $ncnn_repo/$case-update.param $ncnn_repo/$case-update.bin 66 | if [ $? -eq 0 ]; then 67 | echo "Optimizing" 68 | ./build-host-gcc-linux/tools/ncnnoptimize $ncnn_repo/$case-update.param $ncnn_repo/$case-update.bin \ 69 | $ncnn_repo/$case-update-opt.param $ncnn_repo/$case-update-opt.bin \ 70 | 0 #data 640 512 3 71 | else 72 | echo "Convert failed" 73 | fi 74 | fi 75 | 76 | if [ "$onnx_rt" == "test" ]; 77 | then 78 | cd /workspace/git/uofa-AdelaiDet/ # folder of project https://github.com/aim-uofa/AdelaiDet 79 | pwd 80 | python -V # ensure python3.x 81 | python onnx/test_onnxruntime.py \ 82 | --config-file $config \ 83 | --output $onnx_repo/$case-update.onnx \ 84 | --width 768 --height 640 \ 85 | --opts MODEL.WEIGHTS $pytorch_repo/$case.pth MODEL.FCOS.NORM "BN" MODEL.DEVICE cpu 86 | if [ $? -ne 0 ]; then exit; fi 87 | fi 88 | 89 | 90 | -------------------------------------------------------------------------------- /onnx/pytorch-onnx-caffe-ncnn.sh: -------------------------------------------------------------------------------- 1 | 2 | update=$1 # force update 3 | caffe_repo=/data/pretrained/caffe/fcos/ 4 | onnx_repo=/data/pretrained/onnx/fcos/ 5 | pytorch_repo=/data/pretrained/pytorch/fcos/ 6 | ncnn_repo=/data/pretrained/ncnn/fcos/ 7 | case=FCOS_R_50_1x_bn_head 8 | 9 | mkdir -p $caffe_repo $onnx_repo $pytorch_repo $ncnn_repo 10 | 11 | if [ ! -e $onnx_repo/$case.onnx ] || [ "$update" != "" ]; 12 | then 13 | cd /workspace/git/uofa-AdelaiDet/ # folder of project https://github.com/aim-uofa/AdelaiDet 14 | pwd 15 | python -V # ensure python3.x 16 | python onnx/export_model_to_onnx.py \ 17 | --config-file configs/FCOS-Detection/R_50_1x.yaml \ 18 | --output $onnx_repo/$case.onnx \ 19 | --opts MODEL.WEIGHTS $pytorch_repo/$case.pth MODEL.FCOS.NORM "BN" MODEL.DEVICE cpu 20 | fi 21 | 22 | if [ ! -e $onnx_repo/$case-update.onnx ] || [ "$update" != "" ]; 23 | then 24 | # advise version 1.3.0 25 | cd /workspace/git/onnx-simplifier # folder of project: https://github.com/daquexian/onnx-simplifier 26 | pwd 27 | python -V # ensure python3.x 28 | python -m onnxsim $onnx_repo/$case.onnx $onnx_repo/$case-update.onnx 29 | fi 30 | 31 | # optional 32 | if [ ! -e $caffe_repo/$case-update.caffemodel ]; 33 | then 34 | # switch to python2 and ensure caffe (with the upsample patch) ready 35 | # refer: https://github.com/blueardour/caffe.git for patched version 36 | cd /workspace/git/onnx2caffe # folder of project: https://github.com/MTlab/onnx2caffe 37 | pwd 38 | python -V 39 | python convertCaffe.py $onnx_repo/$case-update.onnx $caffe_repo/$case-update.prototxt $caffe_repo/$case-update.caffemodel 40 | fi 41 | 42 | # ncnn 43 | if [ ! -e $ncnn_repo/$case-opt.bin ] || [ "$update" != "" ] 44 | then 45 | cd /workspace/git/ncnn # folder of project: https://github.com/Tencent/ncnn 46 | pwd 47 | mkdir -p $ncnn_repo 48 | ./build-host-gcc-linux/tools/onnx/onnx2ncnn $onnx_repo/$case-update.onnx $ncnn_repo/$case-update.param $ncnn_repo/$case-update.bin 49 | if [ $? -eq 0 ]; then 50 | echo "Optimizing" 51 | ./build-host-gcc-linux/tools/ncnnoptimize $ncnn_repo/$case-update.param $ncnn_repo/$case-update.bin \ 52 | $ncnn_repo/$case-update-opt.param $ncnn_repo/$case-update-opt.bin \ 53 | 0 54 | else 55 | echo "Convert failed" 56 | fi 57 | fi 58 | 59 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 3 | 4 | import glob 5 | import os 6 | from setuptools import find_packages, setup 7 | import torch 8 | from torch.utils.cpp_extension import CUDA_HOME, CppExtension, CUDAExtension 9 | 10 | torch_ver = [int(x) for x in torch.__version__.split(".")[:2]] 11 | assert torch_ver >= [1, 3], "Requires PyTorch >= 1.3" 12 | 13 | 14 | def get_extensions(): 15 | this_dir = os.path.dirname(os.path.abspath(__file__)) 16 | extensions_dir = os.path.join(this_dir, "adet", "layers", "csrc") 17 | 18 | main_source = os.path.join(extensions_dir, "vision.cpp") 19 | sources = glob.glob(os.path.join(extensions_dir, "**", "*.cpp")) 20 | source_cuda = glob.glob(os.path.join(extensions_dir, "**", "*.cu")) + glob.glob( 21 | os.path.join(extensions_dir, "*.cu") 22 | ) 23 | 24 | sources = [main_source] + sources 25 | 26 | extension = CppExtension 27 | 28 | extra_compile_args = {"cxx": []} 29 | define_macros = [] 30 | 31 | if (torch.cuda.is_available() and CUDA_HOME is not None) or os.getenv("FORCE_CUDA", "0") == "1": 32 | extension = CUDAExtension 33 | sources += source_cuda 34 | define_macros += [("WITH_CUDA", None)] 35 | extra_compile_args["nvcc"] = [ 36 | "-DCUDA_HAS_FP16=1", 37 | "-D__CUDA_NO_HALF_OPERATORS__", 38 | "-D__CUDA_NO_HALF_CONVERSIONS__", 39 | "-D__CUDA_NO_HALF2_OPERATORS__", 40 | ] 41 | 42 | if torch_ver < [1, 7]: 43 | # supported by https://github.com/pytorch/pytorch/pull/43931 44 | CC = os.environ.get("CC", None) 45 | if CC is not None: 46 | extra_compile_args["nvcc"].append("-ccbin={}".format(CC)) 47 | 48 | sources = [os.path.join(extensions_dir, s) for s in sources] 49 | 50 | include_dirs = [extensions_dir] 51 | 52 | ext_modules = [ 53 | extension( 54 | "adet._C", 55 | sources, 56 | include_dirs=include_dirs, 57 | define_macros=define_macros, 58 | extra_compile_args=extra_compile_args, 59 | ) 60 | ] 61 | 62 | return ext_modules 63 | 64 | 65 | setup( 66 | name="AdelaiDet", 67 | version="0.2.0", 68 | author="Adelaide Intelligent Machines", 69 | url="https://github.com/stanstarks/AdelaiDet", 70 | description="AdelaiDet is AIM's research " 71 | "platform for instance-level detection tasks based on Detectron2.", 72 | packages=find_packages(exclude=("configs", "tests")), 73 | python_requires=">=3.6", 74 | install_requires=[ 75 | "termcolor>=1.1", 76 | "Pillow>=6.0", 77 | "yacs>=0.1.6", 78 | "tabulate", 79 | "cloudpickle", 80 | "matplotlib", 81 | "tqdm>4.29.0", 82 | "tensorboard", 83 | "rapidfuzz", 84 | "Polygon3", 85 | "shapely", 86 | "scikit-image", 87 | "editdistance" 88 | ], 89 | extras_require={"all": ["psutil"]}, 90 | ext_modules=get_extensions(), 91 | cmdclass={"build_ext": torch.utils.cpp_extension.BuildExtension}, 92 | ) 93 | -------------------------------------------------------------------------------- /tools/convert_fcos_weight.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from collections import OrderedDict 3 | 4 | import torch 5 | 6 | 7 | def get_parser(): 8 | parser = argparse.ArgumentParser(description="FCOS Detectron2 Converter") 9 | parser.add_argument( 10 | "--model", 11 | default="weights/fcos_R_50_1x_official.pth", 12 | metavar="FILE", 13 | help="path to model weights", 14 | ) 15 | parser.add_argument( 16 | "--output", 17 | default="weights/fcos_R_50_1x_converted.pth", 18 | metavar="FILE", 19 | help="path to model weights", 20 | ) 21 | return parser 22 | 23 | 24 | def rename_resnet_param_names(ckpt_state_dict): 25 | converted_state_dict = OrderedDict() 26 | for key in ckpt_state_dict.keys(): 27 | value = ckpt_state_dict[key] 28 | 29 | key = key.replace("module.", "") 30 | key = key.replace("body", "bottom_up") 31 | 32 | # adding a . ahead to avoid renaming the fpn modules 33 | # this can happen after fpn renaming 34 | key = key.replace(".layer1", ".res2") 35 | key = key.replace(".layer2", ".res3") 36 | key = key.replace(".layer3", ".res4") 37 | key = key.replace(".layer4", ".res5") 38 | key = key.replace("downsample.0", "shortcut") 39 | key = key.replace("downsample.1", "shortcut.norm") 40 | key = key.replace("bn1", "conv1.norm") 41 | key = key.replace("bn2", "conv2.norm") 42 | key = key.replace("bn3", "conv3.norm") 43 | key = key.replace("fpn_inner2", "fpn_lateral3") 44 | key = key.replace("fpn_inner3", "fpn_lateral4") 45 | key = key.replace("fpn_inner4", "fpn_lateral5") 46 | key = key.replace("fpn_layer2", "fpn_output3") 47 | key = key.replace("fpn_layer3", "fpn_output4") 48 | key = key.replace("fpn_layer4", "fpn_output5") 49 | key = key.replace("top_blocks", "top_block") 50 | key = key.replace("fpn.", "") 51 | key = key.replace("rpn", "proposal_generator") 52 | key = key.replace("head", "fcos_head") 53 | 54 | converted_state_dict[key] = value 55 | return converted_state_dict 56 | 57 | 58 | if __name__ == "__main__": 59 | args = get_parser().parse_args() 60 | ckpt = torch.load(args.model) 61 | model = rename_resnet_param_names(ckpt["model"]) 62 | torch.save(model, args.output) 63 | -------------------------------------------------------------------------------- /tools/remove_optim_from_ckpt.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | import torch 4 | 5 | 6 | def get_parser(): 7 | parser = argparse.ArgumentParser(description="Keep only model in ckpt") 8 | parser.add_argument( 9 | "--path", 10 | default="output/person/blendmask/R_50_1x/", 11 | help="path to model weights", 12 | ) 13 | parser.add_argument( 14 | "--name", 15 | default="R_50_1x.pth", 16 | help="name of output file", 17 | ) 18 | return parser 19 | 20 | 21 | if __name__ == "__main__": 22 | args = get_parser().parse_args() 23 | ckpt = torch.load(args.path + 'model_final.pth') 24 | model = ckpt["model"] 25 | torch.save(model, args.path + args.name) 26 | -------------------------------------------------------------------------------- /tools/rename_blendmask.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from collections import OrderedDict 3 | 4 | import torch 5 | 6 | 7 | def get_parser(): 8 | parser = argparse.ArgumentParser(description="FCOS Detectron2 Converter") 9 | parser.add_argument( 10 | "--model", 11 | default="weights/blendmask/person/R_50_1x.pth", 12 | metavar="FILE", 13 | help="path to model weights", 14 | ) 15 | parser.add_argument( 16 | "--output", 17 | default="weights/blendmask/person/R_50_1x.pth", 18 | metavar="FILE", 19 | help="path to model weights", 20 | ) 21 | return parser 22 | 23 | 24 | def rename_resnet_param_names(ckpt_state_dict): 25 | converted_state_dict = OrderedDict() 26 | for key in ckpt_state_dict.keys(): 27 | value = ckpt_state_dict[key] 28 | key = key.replace("centerness", "ctrness") 29 | 30 | converted_state_dict[key] = value 31 | return converted_state_dict 32 | 33 | 34 | if __name__ == "__main__": 35 | args = get_parser().parse_args() 36 | ckpt = torch.load(args.model) 37 | if "model" in ckpt: 38 | model = rename_resnet_param_names(ckpt["model"]) 39 | else: 40 | model = rename_resnet_param_names(ckpt) 41 | torch.save(model, args.output) 42 | -------------------------------------------------------------------------------- /tools/visualize_data.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 3 | import argparse 4 | import numpy as np 5 | import os 6 | from itertools import chain 7 | import cv2 8 | import tqdm 9 | from PIL import Image 10 | 11 | from detectron2.data import DatasetCatalog, MetadataCatalog, build_detection_train_loader 12 | from detectron2.data import detection_utils as utils 13 | from detectron2.data.build import filter_images_with_few_keypoints 14 | from detectron2.utils.logger import setup_logger 15 | from detectron2.utils.visualizer import Visualizer 16 | 17 | from adet.config import get_cfg 18 | from adet.data.dataset_mapper import DatasetMapperWithBasis 19 | 20 | 21 | def setup(args): 22 | cfg = get_cfg() 23 | if args.config_file: 24 | cfg.merge_from_file(args.config_file) 25 | cfg.merge_from_list(args.opts) 26 | cfg.freeze() 27 | return cfg 28 | 29 | 30 | def parse_args(in_args=None): 31 | parser = argparse.ArgumentParser(description="Visualize ground-truth data") 32 | parser.add_argument( 33 | "--source", 34 | choices=["annotation", "dataloader"], 35 | required=True, 36 | help="visualize the annotations or the data loader (with pre-processing)", 37 | ) 38 | parser.add_argument("--config-file", metavar="FILE", help="path to config file") 39 | parser.add_argument("--output-dir", default="./", help="path to output directory") 40 | parser.add_argument("--show", action="store_true", help="show output in a window") 41 | parser.add_argument( 42 | "--opts", 43 | help="Modify config options using the command-line", 44 | default=[], 45 | nargs=argparse.REMAINDER, 46 | ) 47 | return parser.parse_args(in_args) 48 | 49 | 50 | if __name__ == "__main__": 51 | args = parse_args() 52 | logger = setup_logger() 53 | logger.info("Arguments: " + str(args)) 54 | cfg = setup(args) 55 | 56 | dirname = args.output_dir 57 | os.makedirs(dirname, exist_ok=True) 58 | metadata = MetadataCatalog.get(cfg.DATASETS.TRAIN[0]) 59 | 60 | def output(vis, fname): 61 | if args.show: 62 | print(fname) 63 | cv2.imshow("window", vis.get_image()[:, :, ::-1]) 64 | cv2.waitKey() 65 | else: 66 | filepath = os.path.join(dirname, fname) 67 | print("Saving to {} ...".format(filepath)) 68 | vis.save(filepath) 69 | 70 | scale = 2.0 if args.show else 1.0 71 | if args.source == "dataloader": 72 | mapper = DatasetMapperWithBasis(cfg, True) 73 | train_data_loader = build_detection_train_loader(cfg, mapper) 74 | for batch in train_data_loader: 75 | for per_image in batch: 76 | # Pytorch tensor is in (C, H, W) format 77 | img = per_image["image"].permute(1, 2, 0) 78 | if cfg.INPUT.FORMAT == "BGR": 79 | img = img[:, :, [2, 1, 0]] 80 | else: 81 | img = np.asarray(Image.fromarray(img, mode=cfg.INPUT.FORMAT).convert("RGB")) 82 | 83 | visualizer = Visualizer(img, metadata=metadata, scale=scale) 84 | target_fields = per_image["instances"].get_fields() 85 | labels = [metadata.thing_classes[i] for i in target_fields["gt_classes"]] 86 | vis = visualizer.overlay_instances( 87 | labels=labels, 88 | boxes=target_fields.get("gt_boxes", None), 89 | masks=target_fields.get("gt_masks", None), 90 | keypoints=target_fields.get("gt_keypoints", None), 91 | ) 92 | output(vis, str(per_image["image_id"]) + ".jpg") 93 | else: 94 | dicts = list(chain.from_iterable([DatasetCatalog.get(k) for k in cfg.DATASETS.TRAIN])) 95 | if cfg.MODEL.KEYPOINT_ON: 96 | dicts = filter_images_with_few_keypoints(dicts, 1) 97 | for dic in tqdm.tqdm(dicts): 98 | img = utils.read_image(dic["file_name"], "RGB") 99 | visualizer = Visualizer(img, metadata=metadata, scale=scale) 100 | vis = visualizer.draw_dataset_dict(dic) 101 | output(vis, os.path.basename(dic["file_name"])) --------------------------------------------------------------------------------