├── .gitignore
├── LICENSE
├── MODEL_ZOO.md
├── README.md
├── adet
    ├── __init__.py
    ├── checkpoint
    │   ├── __init__.py
    │   └── adet_checkpoint.py
    ├── config
    │   ├── __init__.py
    │   ├── config.py
    │   └── defaults.py
    ├── data
    │   ├── __init__.py
    │   ├── augmentation.py
    │   ├── builtin.py
    │   ├── dataset_mapper.py
    │   ├── datasets
    │   │   └── text.py
    │   ├── detection_utils.py
    │   └── fcpose_dataset_mapper.py
    ├── evaluation
    │   ├── __init__.py
    │   ├── rrc_evaluation_funcs.py
    │   ├── rrc_evaluation_funcs_ic15.py
    │   ├── text_eval_script.py
    │   ├── text_eval_script_ic15.py
    │   └── text_evaluation_all.py
    ├── layers
    │   ├── __init__.py
    │   ├── bezier_align.py
    │   ├── conv_with_kaiming_uniform.py
    │   ├── csrc
    │   │   ├── BezierAlign
    │   │   │   ├── BezierAlign.h
    │   │   │   ├── BezierAlign_cpu.cpp
    │   │   │   └── BezierAlign_cuda.cu
    │   │   ├── DefROIAlign
    │   │   │   ├── DefROIAlign.h
    │   │   │   └── DefROIAlign_cuda.cu
    │   │   ├── cuda_version.cu
    │   │   ├── ml_nms
    │   │   │   ├── ml_nms.cu
    │   │   │   └── ml_nms.h
    │   │   └── vision.cpp
    │   ├── def_roi_align.py
    │   ├── deform_conv.py
    │   ├── gcn.py
    │   ├── iou_loss.py
    │   ├── ml_nms.py
    │   └── naive_group_norm.py
    ├── modeling
    │   ├── MEInst
    │   │   ├── LME
    │   │   │   ├── MaskLoader.py
    │   │   │   ├── __init__.py
    │   │   │   ├── mask_evaluation.py
    │   │   │   ├── mask_generation.py
    │   │   │   └── utils.py
    │   │   ├── MEInst.py
    │   │   ├── MEInst_outputs.py
    │   │   ├── MaskEncoding.py
    │   │   └── __init__.py
    │   ├── __init__.py
    │   ├── backbone
    │   │   ├── __init__.py
    │   │   ├── bifpn.py
    │   │   ├── dla.py
    │   │   ├── fpn.py
    │   │   ├── lpf.py
    │   │   ├── mobilenet.py
    │   │   ├── resnet_interval.py
    │   │   ├── resnet_lpf.py
    │   │   └── vovnet.py
    │   ├── batext
    │   │   ├── __init__.py
    │   │   ├── batext.py
    │   │   └── batext_outputs.py
    │   ├── blendmask
    │   │   ├── __init__.py
    │   │   ├── basis_module.py
    │   │   ├── blender.py
    │   │   └── blendmask.py
    │   ├── condinst
    │   │   ├── __init__.py
    │   │   ├── condinst.py
    │   │   ├── dynamic_mask_head.py
    │   │   └── mask_branch.py
    │   ├── fcos
    │   │   ├── __init__.py
    │   │   ├── fcos.py
    │   │   └── fcos_outputs.py
    │   ├── fcpose
    │   │   ├── __init__.py
    │   │   ├── basis_module.py
    │   │   ├── fcpose_framework.py
    │   │   ├── fcpose_head.py
    │   │   └── utils.py
    │   ├── one_stage_detector.py
    │   ├── poolers.py
    │   ├── roi_heads
    │   │   ├── __init__.py
    │   │   ├── attn_predictor.py
    │   │   └── text_head.py
    │   └── solov2
    │   │   ├── __init__.py
    │   │   ├── loss.py
    │   │   ├── solov2.py
    │   │   └── utils.py
    ├── structures
    │   ├── __init__.py
    │   └── beziers.py
    └── utils
    │   ├── __init__.py
    │   ├── comm.py
    │   ├── measures.py
    │   └── visualizer.py
├── configs
    ├── BAText
    │   ├── Base-BAText.yaml
    │   ├── CTW1500
    │   │   ├── Base-CTW1500.yaml
    │   │   ├── attn_R_50.yaml
    │   │   └── v2_attn_R_50.yaml
    │   ├── ICDAR2015
    │   │   ├── Base-ic15.yaml
    │   │   ├── v1_attn_R_50.yaml
    │   │   └── v2_attn_R_50.yaml
    │   ├── Pretrain
    │   │   ├── Base-Chn-Pretrain.yaml
    │   │   ├── Base-Pretrain-ic15.yaml
    │   │   ├── Base-Pretrain.yaml
    │   │   ├── attn_R_50.yaml
    │   │   ├── v1_ic15_attn_R_50.yaml
    │   │   ├── v2_attn_R_50.yaml
    │   │   ├── v2_chn_attn_R_50.yaml
    │   │   └── v2_ic15_attn_R_50.yaml
    │   ├── README.md
    │   ├── ReCTS
    │   │   ├── Base-ReCTS.yaml
    │   │   └── v2_chn_attn_R_50.yaml
    │   └── TotalText
    │   │   ├── Base-TotalText.yaml
    │   │   ├── attn_R_50.yaml
    │   │   └── v2_attn_R_50.yaml
    ├── BlendMask
    │   ├── 550_R_50_1x.yaml
    │   ├── 550_R_50_3x.yaml
    │   ├── 550_R_50_dcni3_5x.yaml
    │   ├── Base-550.yaml
    │   ├── Base-BlendMask.yaml
    │   ├── Base-RT.yaml
    │   ├── DLA_34_syncbn_4x.yaml
    │   ├── Panoptic
    │   │   ├── Base-Panoptic.yaml
    │   │   ├── R_101_3x.yaml
    │   │   ├── R_101_dcni3_5x.yaml
    │   │   ├── R_50_1x.yaml
    │   │   ├── R_50_3x.yaml
    │   │   └── R_50_dcni3_5x.yaml
    │   ├── Person
    │   │   ├── Base-Person.yaml
    │   │   └── R_50_1x.yaml
    │   ├── README.md
    │   ├── RT_R_50_4x_bn-head_syncbn_shtw.yaml
    │   ├── RT_R_50_4x_syncbn_shtw.yaml
    │   ├── R_101_3x.yaml
    │   ├── R_101_dcni3_5x.yaml
    │   ├── R_50_1x.yaml
    │   └── R_50_3x.yaml
    ├── BoxInst
    │   ├── Base-BoxInst.yaml
    │   ├── MS_R_101_1x.yaml
    │   ├── MS_R_101_3x.yaml
    │   ├── MS_R_101_BiFPN_3x.yaml
    │   ├── MS_R_101_BiFPN_dcni3_3x.yaml
    │   ├── MS_R_50_1x.yaml
    │   ├── MS_R_50_3x.yaml
    │   ├── MS_R_50_BiFPN_1x.yaml
    │   ├── MS_R_50_BiFPN_3x.yaml
    │   └── README.md
    ├── CondInst
    │   ├── Base-CondInst.yaml
    │   ├── MS_R_101_1x.yaml
    │   ├── MS_R_101_3x.yaml
    │   ├── MS_R_101_3x_sem.yaml
    │   ├── MS_R_101_BiFPN_3x.yaml
    │   ├── MS_R_101_BiFPN_3x_sem.yaml
    │   ├── MS_R_50_1x.yaml
    │   ├── MS_R_50_3x.yaml
    │   ├── MS_R_50_3x_sem.yaml
    │   ├── MS_R_50_BiFPN_1x.yaml
    │   ├── MS_R_50_BiFPN_3x.yaml
    │   ├── MS_R_50_BiFPN_3x_sem.yaml
    │   └── README.md
    ├── DenseCL
    │   ├── FCOS_R50_1x.yaml
    │   ├── FCOS_R50_1x_DenseCL.yaml
    │   ├── README.md
    │   ├── SOLOv2_R50_1x.yaml
    │   └── SOLOv2_R50_1x_DenseCL.yaml
    ├── FCOS-Detection
    │   ├── Base-FCOS.yaml
    │   ├── FCOS_RT
    │   │   ├── MS_DLA_34_4x_syncbn.yaml
    │   │   ├── MS_DLA_34_4x_syncbn_bn_head.yaml
    │   │   ├── MS_DLA_34_4x_syncbn_shared_towers.yaml
    │   │   ├── MS_DLA_34_4x_syncbn_shared_towers_bn_head.yaml
    │   │   ├── MS_R_50_4x_syncbn.yaml
    │   │   └── MS_R_50_4x_syncbn_bn_head.yaml
    │   ├── MS_R_101_2x.yaml
    │   ├── MS_R_101_2x_iou.yaml
    │   ├── MS_R_50_2x.yaml
    │   ├── MS_R_50_2x_iou.yaml
    │   ├── MS_X_101_32x8d_2x.yaml
    │   ├── MS_X_101_32x8d_2x_dcnv2.yaml
    │   ├── MS_X_101_32x8d_2x_dcnv2_iou.yaml
    │   ├── MS_X_101_32x8d_2x_iou.yaml
    │   ├── MS_X_101_64x4d_2x.yaml
    │   ├── MS_X_101_64x4d_2x_dcnv2.yaml
    │   ├── README.md
    │   ├── R_50_1x.yaml
    │   ├── R_50_1x_iou.yaml
    │   └── vovnet
    │   │   ├── MS_V_39_3x.yaml
    │   │   ├── MS_V_57_3x.yaml
    │   │   ├── MS_V_99_3x.yaml
    │   │   └── README.md
    ├── FCPose
    │   ├── Base-FCPose.yaml
    │   ├── README.md
    │   ├── R_101_3X.yaml
    │   └── R_50_3X.yaml
    ├── MEInst-InstanceSegmentation
    │   ├── Base-MEInst.yaml
    │   ├── MEInst_R_50_1x.yaml
    │   ├── MEInst_R_50_1x_none.yaml
    │   ├── MEInst_R_50_3x.yaml
    │   ├── MEInst_R_50_3x_512.yaml
    │   └── README.md
    ├── RCNN
    │   ├── 550_R_50_FPN_3x.yaml
    │   ├── Base-RCNN.yaml
    │   ├── LVIS
    │   │   └── R_50_1x.yaml
    │   └── R_101_3x.yaml
    └── SOLOv2
    │   ├── Base-SOLOv2.yaml
    │   ├── R101_3x.yaml
    │   ├── R50_3x.yaml
    │   └── README.md
├── datasets
    ├── README.md
    ├── gen_coco_person.py
    ├── prepare_thing_sem_from_instance.py
    └── prepare_thing_sem_from_lvis.py
├── demo
    ├── demo.py
    └── predictor.py
├── docker
    └── Dockerfile
├── docs
    ├── .gitignore
    ├── Makefile
    ├── adel-logo.svg
    ├── conf.py
    ├── index.rst
    ├── modules
    │   ├── checkpoint.rst
    │   ├── config.rst
    │   ├── data.rst
    │   ├── index.rst
    │   ├── layers.rst
    │   ├── modeling.rst
    │   └── utils.rst
    └── requirements.txt
├── onnx
    ├── .gitignore
    ├── README.md
    ├── export_model_to_onnx.py
    ├── pytorch-onnx-caffe-ncnn-rt.sh
    ├── pytorch-onnx-caffe-ncnn.sh
    └── test_onnxruntime.py
├── setup.py
└── tools
    ├── convert_fcos_weight.py
    ├── remove_optim_from_ckpt.py
    ├── rename_blendmask.py
    ├── train_net.py
    └── visualize_data.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | # output dir
 2 | output
 3 | instant_test_output
 4 | inference_test_output
 5 | 
 6 | 
 7 | *.jpg
 8 | *.png
 9 | *.txt
10 | 
11 | # compilation and distribution
12 | __pycache__
13 | _ext
14 | *.pyc
15 | *.so
16 | AdelaiDet.egg-info/
17 | build/
18 | dist/
19 | 
20 | # pytorch/python/numpy formats
21 | *.pth
22 | *.pkl
23 | *.npy
24 | 
25 | # ipython/jupyter notebooks
26 | *.ipynb
27 | **/.ipynb_checkpoints/
28 | 
29 | # Editor temporaries
30 | *.swn
31 | *.swo
32 | *.swp
33 | *~
34 | 
35 | # Pycharm editor settings
36 | .idea
37 | .vscode
38 | .python-version
39 | 
40 | # project dirs
41 | /datasets/coco
42 | /datasets/lvis
43 | /datasets/pic
44 | /datasets/ytvos
45 | /models
46 | /demo_outputs
47 | /example_inputs
48 | /debug
49 | /weights
50 | /export
51 | eval.sh
52 | 
53 | demo/performance.py
54 | demo/demo2.py
55 | train.sh
56 | benchmark.sh
57 | script


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | AdelaiDet for non-commercial purposes
 2 | (For commercial use, contact chhshen@gmail.com for obtaining a commerical license.)
 3 | 
 4 | Copyright (c) 2019 the authors
 5 | All rights reserved.
 6 | 
 7 | Redistribution and use in source and binary forms, with or without
 8 | modification, are permitted provided that the following conditions are met:
 9 | 
10 | * Redistributions of source code must retain the above copyright notice, this
11 |   list of conditions and the following disclaimer.
12 | 
13 | * Redistributions in binary form must reproduce the above copyright notice,
14 |   this list of conditions and the following disclaimer in the documentation
15 |   and/or other materials provided with the distribution.
16 | 
17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
21 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
23 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
24 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
25 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 | 


--------------------------------------------------------------------------------
/MODEL_ZOO.md:
--------------------------------------------------------------------------------
 1 | # AdelaiDet Model Zoo and Baselines
 2 | 
 3 | ## Introduction
 4 | This file documents a collection of models trained with AdelaiDet in Nov, 2019.
 5 | 
 6 | ## Models
 7 | 
 8 | The inference time is measured on one 1080Ti based on the most recent commit on Detectron2 ([ffff8ac](https://github.com/facebookresearch/detectron2/commit/ffff8acc35ea88ad1cb1806ab0f00b4c1c5dbfd9)).
 9 | 
10 | More models will be released soon. Stay tuned.
11 | 
12 | ### COCO Object Detecton Baselines with FCOS
13 | 
14 | Name | box AP | download
15 | --- |:---:|:---:
16 | [FCOS_R_50_1x](configs/FCOS-Detection/R_50_1x.yaml) | 38.7 | [model](https://cloudstor.aarnet.edu.au/plus/s/glqFc13cCoEyHYy/download)
17 | 
18 | ### COCO Instance Segmentation Baselines with [BlendMask](https://arxiv.org/abs/2001.00309)
19 | 
20 | Model | Name |inference time (ms/im) | box AP | mask AP | download
21 | --- |:---:|:---:|:---:|:---:|:---:
22 | Mask R-CNN | [550_R_50_3x](configs/RCNN/550_R_50_FPN_3x.yaml) | 63 | 39.1 | 35.3 |
23 | BlendMask | [550_R_50_3x](configs/BlendMask/550_R_50_3x.yaml) | 36 | 38.7 | 34.5 | [model](https://cloudstor.aarnet.edu.au/plus/s/R3Qintf7N8UCiIt/download)
24 | Mask R-CNN | [R_50_1x](https://github.com/facebookresearch/detectron2/blob/master/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml) | 80 | 38.6 | 35.2 |
25 | BlendMask | [R_50_1x](configs/BlendMask/R_50_1x.yaml) | 73 | 39.9 | 35.8 | [model](https://cloudstor.aarnet.edu.au/plus/s/zoxXPnr6Hw3OJgK/download)
26 | Mask R-CNN | [R_50_3x](https://github.com/facebookresearch/detectron2/blob/master/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml) | 80 | 41.0 | 37.2 | 
27 | BlendMask | [R_50_3x](configs/BlendMask/R_50_3x.yaml) | 74 | 42.7 | 37.8 | [model](https://cloudstor.aarnet.edu.au/plus/s/ZnaInHFEKst6mvg/download)
28 | Mask R-CNN | [R_101_3x](https://github.com/facebookresearch/detectron2/blob/master/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml) | 100 | 42.9 | 38.6 |
29 | BlendMask | [R_101_3x](configs/BlendMask/R_101_3x.yaml) | 94 | 44.8 | 39.5 | [model](https://cloudstor.aarnet.edu.au/plus/s/e4fXrliAcMtyEBy/download)
30 | BlendMask | [R_101_dcni3_5x](configs/BlendMask/R_101_dcni3_5x.yaml) | 105 | 46.8 | 41.1 | [model](https://cloudstor.aarnet.edu.au/plus/s/vbnKnQtaGlw8TKv/download)
31 | 
32 | ### COCO Panoptic Segmentation Baselines with BlendMask
33 | Model | Name | PQ | PQ<sup>Th</sup> | PQ<sup>St</sup> | download
34 | --- |:---:|:---:|:---:|:---:|:---:
35 | Panoptic FPN | [R_50_3x](https://github.com/facebookresearch/detectron2/blob/master/configs/COCO-PanopticSegmentation/panoptic_fpn_R_50_3x.yaml) | 41.5 | 48.3 | 31.2 | 
36 | BlendMask | [R_50_3x](configs/BlendMask/Panoptic/R_50_3x.yaml) | 42.5 | 49.5 | 32.0 | [model](https://cloudstor.aarnet.edu.au/plus/s/oDgi0826JOJXCr5/download)
37 | Panoptic FPN | [R_101_3x](https://github.com/facebookresearch/detectron2/blob/master/configs/COCO-InstanceSegmentation/panoptic_fpn_R_101_3x.yaml) | 43.0 | 49.7 | 32.9 |
38 | BlendMask | [R_101_3x](configs/BlendMask/Panoptic/R_101_3x.yaml) | 44.3 | 51.6 | 33.2 | [model](https://cloudstor.aarnet.edu.au/plus/s/u6gZwj06MWDEkYe/download)
39 | BlendMask | [R_101_dcni3_5x](configs/BlendMask/Panoptic/R_101_dcni3_5x.yaml) | 46.0 | 52.9 | 35.5 | [model](https://cloudstor.aarnet.edu.au/plus/s/Jwp41WEzDdrhWsN/download)
40 | 
41 | ### Person in Context with BlendMask
42 | Model | Name | box AP | mask AP | download
43 | --- |:---:|:---:|:---:|:---:
44 | BlendMask | [R_50_1x](configs/BlendMask/Person/R_50_1x.yaml) | 70.6 | 66.7 | [model](https://cloudstor.aarnet.edu.au/plus/s/nvpcKTFA5fsagc0/download)


--------------------------------------------------------------------------------
/adet/__init__.py:
--------------------------------------------------------------------------------
1 | from adet import modeling
2 | 
3 | __version__ = "0.1.1"
4 | 


--------------------------------------------------------------------------------
/adet/checkpoint/__init__.py:
--------------------------------------------------------------------------------
1 | from .adet_checkpoint import AdetCheckpointer
2 | 
3 | __all__ = ["AdetCheckpointer"]
4 | 


--------------------------------------------------------------------------------
/adet/checkpoint/adet_checkpoint.py:
--------------------------------------------------------------------------------
 1 | import pickle, os
 2 | from fvcore.common.file_io import PathManager
 3 | from detectron2.checkpoint import DetectionCheckpointer
 4 | 
 5 | 
 6 | class AdetCheckpointer(DetectionCheckpointer):
 7 |     """
 8 |     Same as :class:`DetectronCheckpointer`, but is able to convert models
 9 |     in AdelaiDet, such as LPF backbone.
10 |     """
11 |     def _load_file(self, filename):
12 |         if filename.endswith(".pkl"):
13 |             with PathManager.open(filename, "rb") as f:
14 |                 data = pickle.load(f, encoding="latin1")
15 |             if "model" in data and "__author__" in data:
16 |                 # file is in Detectron2 model zoo format
17 |                 self.logger.info("Reading a file from '{}'".format(data["__author__"]))
18 |                 return data
19 |             else:
20 |                 # assume file is from Caffe2 / Detectron1 model zoo
21 |                 if "blobs" in data:
22 |                     # Detection models have "blobs", but ImageNet models don't
23 |                     data = data["blobs"]
24 |                 data = {k: v for k, v in data.items() if not k.endswith("_momentum")}
25 |                 if "weight_order" in data:
26 |                     del data["weight_order"]
27 |                 return {"model": data, "__author__": "Caffe2", "matching_heuristics": True}
28 | 
29 |         loaded = super()._load_file(filename)  # load native pth checkpoint
30 |         if "model" not in loaded:
31 |             loaded = {"model": loaded}
32 | 
33 |         basename = os.path.basename(filename).lower()
34 |         if "lpf" in basename or "dla" in basename:
35 |             loaded["matching_heuristics"] = True
36 |         return loaded
37 | 


--------------------------------------------------------------------------------
/adet/config/__init__.py:
--------------------------------------------------------------------------------
1 | from .config import get_cfg
2 | 
3 | __all__ = [
4 |     "get_cfg",
5 | ]
6 | 


--------------------------------------------------------------------------------
/adet/config/config.py:
--------------------------------------------------------------------------------
 1 | from detectron2.config import CfgNode
 2 | 
 3 | 
 4 | def get_cfg() -> CfgNode:
 5 |     """
 6 |     Get a copy of the default config.
 7 | 
 8 |     Returns:
 9 |         a detectron2 CfgNode instance.
10 |     """
11 |     from .defaults import _C
12 | 
13 |     return _C.clone()
14 | 


--------------------------------------------------------------------------------
/adet/data/__init__.py:
--------------------------------------------------------------------------------
1 | from . import builtin  # ensure the builtin datasets are registered
2 | from .dataset_mapper import DatasetMapperWithBasis
3 | from .fcpose_dataset_mapper import FCPoseDatasetMapper
4 | 
5 | 
6 | __all__ = ["DatasetMapperWithBasis"]
7 | 


--------------------------------------------------------------------------------
/adet/data/augmentation.py:
--------------------------------------------------------------------------------
  1 | import random
  2 | 
  3 | import numpy as np
  4 | from fvcore.transforms import transform as T
  5 | 
  6 | from detectron2.data.transforms import RandomCrop, StandardAugInput
  7 | from detectron2.structures import BoxMode
  8 | 
  9 | 
 10 | def gen_crop_transform_with_instance(crop_size, image_size, instances, crop_box=True):
 11 |     """
 12 |     Generate a CropTransform so that the cropping region contains
 13 |     the center of the given instance.
 14 | 
 15 |     Args:
 16 |         crop_size (tuple): h, w in pixels
 17 |         image_size (tuple): h, w
 18 |         instance (dict): an annotation dict of one instance, in Detectron2's
 19 |             dataset format.
 20 |     """
 21 |     bbox = random.choice(instances)
 22 |     bbox[::2] = np.clip(bbox[::2], 0, image_size[1])
 23 |     bbox[1::2] = np.clip(bbox[1::2], 0, image_size[0])
 24 |     crop_size = np.asarray(crop_size, dtype=np.int32)
 25 |     center_yx = (bbox[1] + bbox[3]) * 0.5, (bbox[0] + bbox[2]) * 0.5
 26 |     assert (
 27 |         image_size[0] >= center_yx[0] and image_size[1] >= center_yx[1]
 28 |     ), "The annotation bounding box is outside of the image!"
 29 |     assert (
 30 |         image_size[0] >= crop_size[0] and image_size[1] >= crop_size[1]
 31 |     ), "Crop size is larger than image size!"
 32 | 
 33 |     min_yx = np.maximum(np.floor(center_yx).astype(np.int32) - crop_size, 0)
 34 |     max_yx = np.maximum(np.asarray(image_size, dtype=np.int32) - crop_size, 0)
 35 |     max_yx = np.minimum(max_yx, np.ceil(center_yx).astype(np.int32))
 36 | 
 37 |     y0 = np.random.randint(min_yx[0], max_yx[0] + 1)
 38 |     x0 = np.random.randint(min_yx[1], max_yx[1] + 1)
 39 | 
 40 |     # if some instance is cropped extend the box
 41 |     if not crop_box:
 42 |         num_modifications = 0
 43 |         modified = True
 44 | 
 45 |         # convert crop_size to float
 46 |         crop_size = crop_size.astype(np.float32)
 47 |         while modified:
 48 |             modified, x0, y0, crop_size = adjust_crop(x0, y0, crop_size, instances)
 49 |             num_modifications += 1
 50 |             if num_modifications > 100:
 51 |                 raise ValueError(
 52 |                     "Cannot finished cropping adjustment within 100 tries (#instances {}).".format(
 53 |                         len(instances)
 54 |                     )
 55 |                 )
 56 |                 return T.CropTransform(0, 0, image_size[1], image_size[0])
 57 | 
 58 |     return T.CropTransform(*map(int, (x0, y0, crop_size[1], crop_size[0])))
 59 | 
 60 | 
 61 | def adjust_crop(x0, y0, crop_size, instances, eps=1e-3):
 62 |     modified = False
 63 | 
 64 |     x1 = x0 + crop_size[1]
 65 |     y1 = y0 + crop_size[0]
 66 | 
 67 |     for bbox in instances:
 68 | 
 69 |         if bbox[0] < x0 - eps and bbox[2] > x0 + eps:
 70 |             crop_size[1] += x0 - bbox[0]
 71 |             x0 = bbox[0]
 72 |             modified = True
 73 | 
 74 |         if bbox[0] < x1 - eps and bbox[2] > x1 + eps:
 75 |             crop_size[1] += bbox[2] - x1
 76 |             x1 = bbox[2]
 77 |             modified = True
 78 | 
 79 |         if bbox[1] < y0 - eps and bbox[3] > y0 + eps:
 80 |             crop_size[0] += y0 - bbox[1]
 81 |             y0 = bbox[1]
 82 |             modified = True
 83 | 
 84 |         if bbox[1] < y1 - eps and bbox[3] > y1 + eps:
 85 |             crop_size[0] += bbox[3] - y1
 86 |             y1 = bbox[3]
 87 |             modified = True
 88 | 
 89 |     return modified, x0, y0, crop_size
 90 | 
 91 | 
 92 | class RandomCropWithInstance(RandomCrop):
 93 |     """ Instance-aware cropping.
 94 |     """
 95 | 
 96 |     def __init__(self, crop_type, crop_size, crop_instance=True):
 97 |         """
 98 |         Args:
 99 |             crop_instance (bool): if False, extend cropping boxes to avoid cropping instances
100 |         """
101 |         super().__init__(crop_type, crop_size)
102 |         self.crop_instance = crop_instance
103 |         self.input_args = ("image", "boxes")
104 | 
105 |     def get_transform(self, img, boxes):
106 |         image_size = img.shape[:2]
107 |         crop_size = self.get_crop_size(image_size)
108 |         return gen_crop_transform_with_instance(
109 |             crop_size, image_size, boxes, crop_box=self.crop_instance
110 |         )
111 | 


--------------------------------------------------------------------------------
/adet/data/builtin.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from detectron2.data.datasets.register_coco import register_coco_instances
 4 | from detectron2.data.datasets.builtin_meta import _get_builtin_metadata
 5 | 
 6 | from .datasets.text import register_text_instances
 7 | 
 8 | # register plane reconstruction
 9 | 
10 | _PREDEFINED_SPLITS_PIC = {
11 |     "pic_person_train": ("pic/image/train", "pic/annotations/train_person.json"),
12 |     "pic_person_val": ("pic/image/val", "pic/annotations/val_person.json"),
13 | }
14 | 
15 | metadata_pic = {
16 |     "thing_classes": ["person"]
17 | }
18 | 
19 | _PREDEFINED_SPLITS_TEXT = {
20 |     "totaltext_train": ("totaltext/train_images", "totaltext/train.json"),
21 |     "totaltext_val": ("totaltext/test_images", "totaltext/test.json"),
22 |     "ctw1500_word_train": ("CTW1500/ctwtrain_text_image", "CTW1500/annotations/train_ctw1500_maxlen100_v2.json"),
23 |     "ctw1500_word_test": ("CTW1500/ctwtest_text_image","CTW1500/annotations/test_ctw1500_maxlen100.json"),
24 |     "syntext1_train": ("syntext1/images", "syntext1/annotations/train.json"),
25 |     "syntext2_train": ("syntext2/images", "syntext2/annotations/train.json"),
26 |     "mltbezier_word_train": ("mlt2017/images","mlt2017/annotations/train.json"),
27 |     "rects_train": ("ReCTS/ReCTS_train_images", "ReCTS/annotations/rects_train.json"),
28 |     "rects_val": ("ReCTS/ReCTS_val_images", "ReCTS/annotations/rects_val.json"),
29 |     "rects_test": ("ReCTS/ReCTS_test_images", "ReCTS/annotations/rects_test.json"),
30 |     "art_train": ("ArT/rename_artimg_train", "ArT/annotations/abcnet_art_train.json"), 
31 |     "lsvt_train": ("LSVT/rename_lsvtimg_train", "LSVT/annotations/abcnet_lsvt_train.json"), 
32 |     "chnsyn_train": ("ChnSyn/syn_130k_images", "ChnSyn/annotations/chn_syntext.json"),
33 |     "icdar2013_train": ("icdar2013/train_images", "icdar2013/ic13_train.json"),
34 |     "icdar2015_train": ("icdar2015/train_images", "icdar2015/ic15_train.json"),
35 |     "icdar2015_test": ("icdar2015/test_images", "icdar2015/ic15_test.json"),
36 | }
37 | 
38 | metadata_text = {
39 |     "thing_classes": ["text"]
40 | }
41 | 
42 | 
43 | def register_all_coco(root="datasets"):
44 |     for key, (image_root, json_file) in _PREDEFINED_SPLITS_PIC.items():
45 |         # Assume pre-defined datasets live in `./datasets`.
46 |         register_coco_instances(
47 |             key,
48 |             metadata_pic,
49 |             os.path.join(root, json_file) if "://" not in json_file else json_file,
50 |             os.path.join(root, image_root),
51 |         )
52 |     for key, (image_root, json_file) in _PREDEFINED_SPLITS_TEXT.items():
53 |         # Assume pre-defined datasets live in `./datasets`.
54 |         register_text_instances(
55 |             key,
56 |             metadata_text,
57 |             os.path.join(root, json_file) if "://" not in json_file else json_file,
58 |             os.path.join(root, image_root),
59 |         )
60 | 
61 | 
62 | register_all_coco()
63 | 


--------------------------------------------------------------------------------
/adet/data/fcpose_dataset_mapper.py:
--------------------------------------------------------------------------------
 1 | import copy
 2 | import logging
 3 | import os.path as osp
 4 | 
 5 | import numpy as np
 6 | import torch
 7 | from fvcore.common.file_io import PathManager
 8 | from PIL import Image
 9 | from pycocotools import mask as maskUtils
10 | 
11 | from detectron2.data import detection_utils as utils
12 | from detectron2.data import transforms as T
13 | from detectron2.data.dataset_mapper import DatasetMapper
14 | from detectron2.data.detection_utils import SizeMismatchError
15 | from detectron2.structures import BoxMode
16 | 
17 | from .augmentation import RandomCropWithInstance
18 | from .detection_utils import (annotations_to_instances, build_augmentation,
19 |                               transform_instance_annotations)
20 | 
21 | from adet.data.detection_utils import HeatmapGenerator
22 | from adet.data.dataset_mapper import DatasetMapperWithBasis
23 | """
24 | This file contains the default mapping that's applied to "dataset dicts".
25 | """
26 | 
27 | __all__ = ["DatasetMapperWithBasis"]
28 | 
29 | logger = logging.getLogger(__name__)
30 | 
31 | class FCPoseDatasetMapper(DatasetMapperWithBasis):
32 |     """
33 |     This caller enables the default Detectron2 mapper to read an additional basis semantic label
34 |     """
35 | 
36 |     def __init__(self, cfg, is_train=True):
37 |         super().__init__(cfg, is_train)
38 | 
39 |         self.fcpose_on = cfg.MODEL.FCPOSE_ON
40 |         if self.fcpose_on:
41 |             self.gt_heatmap_stride = cfg.MODEL.FCPOSE.GT_HEATMAP_STRIDE
42 |             self.sigma             = cfg.MODEL.FCPOSE.HEATMAP_SIGMA
43 |             self.head_sigma        = cfg.MODEL.FCPOSE.HEAD_HEATMAP_SIGMA
44 |             self.HeatmapGenerator  = HeatmapGenerator(17, self.sigma, self.head_sigma)
45 | 
46 |     def __call__(self, dataset_dict):
47 |         """
48 |         Args:
49 |             dataset_dict (dict): Metadata of one image, in Detectron2 Dataset format.
50 | 
51 |         Returns:
52 |             dict: a format that builtin models in detectron2 accept
53 |         """
54 |         for i in range(100):
55 |             dataset_dict_temp = copy.deepcopy(dataset_dict)
56 |             dataset_dict_temp = super().__call__(dataset_dict_temp)
57 |             if len(dataset_dict_temp["instances"]) != 0:
58 |                 if self.is_train:
59 |                     dataset_dict_temp['instances'] = self.HeatmapGenerator(dataset_dict_temp['instances'], 
60 |                                                                         self.gt_heatmap_stride)
61 |                 return dataset_dict_temp
62 |         raise
63 | 


--------------------------------------------------------------------------------
/adet/evaluation/__init__.py:
--------------------------------------------------------------------------------
1 | from .text_evaluation_all import TextEvaluator
2 | from .text_eval_script import text_eval_main
3 | from .text_eval_script_ic15 import text_eval_main_ic15
4 | from . import rrc_evaluation_funcs
5 | from . import rrc_evaluation_funcs_ic15


--------------------------------------------------------------------------------
/adet/layers/__init__.py:
--------------------------------------------------------------------------------
 1 | from .deform_conv import DFConv2d
 2 | from .ml_nms import ml_nms
 3 | from .iou_loss import IOULoss
 4 | from .conv_with_kaiming_uniform import conv_with_kaiming_uniform
 5 | from .bezier_align import BezierAlign
 6 | from .def_roi_align import DefROIAlign
 7 | from .naive_group_norm import NaiveGroupNorm
 8 | from .gcn import GCN
 9 | 
10 | __all__ = [k for k in globals().keys() if not k.startswith("_")]


--------------------------------------------------------------------------------
/adet/layers/bezier_align.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | from torch import nn
 3 | from torch.autograd import Function
 4 | from torch.autograd.function import once_differentiable
 5 | from torch.nn.modules.utils import _pair
 6 | 
 7 | from adet import _C
 8 | 
 9 | 
10 | class _BezierAlign(Function):
11 |     @staticmethod
12 |     def forward(ctx, input, roi, output_size, spatial_scale, sampling_ratio, aligned):
13 |         ctx.save_for_backward(roi)
14 |         ctx.output_size = _pair(output_size)
15 |         ctx.spatial_scale = spatial_scale
16 |         ctx.sampling_ratio = sampling_ratio
17 |         ctx.input_shape = input.size()
18 |         ctx.aligned = aligned
19 |         output = _C.bezier_align_forward(
20 |             input, roi, spatial_scale, output_size[0], output_size[1], sampling_ratio, aligned
21 |         )
22 |         return output
23 | 
24 |     @staticmethod
25 |     @once_differentiable
26 |     def backward(ctx, grad_output):
27 |         rois, = ctx.saved_tensors
28 |         output_size = ctx.output_size
29 |         spatial_scale = ctx.spatial_scale
30 |         sampling_ratio = ctx.sampling_ratio
31 |         bs, ch, h, w = ctx.input_shape
32 |         grad_input = _C.bezier_align_backward(
33 |             grad_output,
34 |             rois,
35 |             spatial_scale,
36 |             output_size[0],
37 |             output_size[1],
38 |             bs,
39 |             ch,
40 |             h,
41 |             w,
42 |             sampling_ratio,
43 |             ctx.aligned,
44 |         )
45 |         return grad_input, None, None, None, None, None
46 | 
47 | 
48 | bezier_align = _BezierAlign.apply
49 | 
50 | 
51 | class BezierAlign(nn.Module):
52 |     def __init__(self, output_size, spatial_scale, sampling_ratio, aligned=True):
53 |         """
54 |         Args:
55 |             output_size (tuple): h, w
56 |             spatial_scale (float): scale the input boxes by this number
57 |             sampling_ratio (int): number of inputs samples to take for each output
58 |                 sample. 0 to take samples densely.
59 |             aligned (bool): if False, use the legacy implementation in
60 |                 Detectron. If True, align the results more perfectly.
61 | 
62 |         Note:
63 |             The meaning of aligned=True:
64 | 
65 |             With `aligned=True`,
66 |             we first appropriately scale the ROI and then shift it by -0.5
67 |             prior to calling bezier_align. This produces the correct neighbors; see
68 |             adet/tests/test_bezier_align.py for verification.
69 | 
70 |             The difference does not make a difference to the model's performance if
71 |             ROIAlign is used together with conv layers.
72 |         """
73 |         super(BezierAlign, self).__init__()
74 |         self.output_size = output_size
75 |         self.spatial_scale = spatial_scale
76 |         self.sampling_ratio = sampling_ratio
77 |         self.aligned = aligned
78 | 
79 |     def forward(self, input, rois):
80 |         """
81 |         Args:
82 |             input: NCHW images
83 |             rois: Bx17 boxes. First column is the index into N. The other 16 columns are [xy]x8.
84 |         """
85 |         assert rois.dim() == 2 and rois.size(1) == 17
86 |         return bezier_align(
87 |             input, rois, self.output_size, self.spatial_scale, self.sampling_ratio, self.aligned
88 |         )
89 | 
90 |     def __repr__(self):
91 |         tmpstr = self.__class__.__name__ + "("
92 |         tmpstr += "output_size=" + str(self.output_size)
93 |         tmpstr += ", spatial_scale=" + str(self.spatial_scale)
94 |         tmpstr += ", sampling_ratio=" + str(self.sampling_ratio)
95 |         tmpstr += ", aligned=" + str(self.aligned)
96 |         tmpstr += ")"
97 |         return tmpstr
98 | 


--------------------------------------------------------------------------------
/adet/layers/conv_with_kaiming_uniform.py:
--------------------------------------------------------------------------------
 1 | from torch import nn
 2 | 
 3 | from detectron2.layers import Conv2d
 4 | from .deform_conv import DFConv2d
 5 | from detectron2.layers.batch_norm import get_norm
 6 | 
 7 | 
 8 | def conv_with_kaiming_uniform(
 9 |         norm=None, activation=None,
10 |         use_deformable=False, use_sep=False):
11 |     def make_conv(
12 |         in_channels, out_channels, kernel_size, stride=1, dilation=1
13 |     ):
14 |         if use_deformable:
15 |             conv_func = DFConv2d
16 |         else:
17 |             conv_func = Conv2d
18 |         if use_sep:
19 |             assert in_channels == out_channels
20 |             groups = in_channels
21 |         else:
22 |             groups = 1
23 |         conv = conv_func(
24 |             in_channels,
25 |             out_channels,
26 |             kernel_size=kernel_size,
27 |             stride=stride,
28 |             padding=dilation * (kernel_size - 1) // 2,
29 |             dilation=dilation,
30 |             groups=groups,
31 |             bias=(norm is None)
32 |         )
33 |         if not use_deformable:
34 |             # Caffe2 implementation uses XavierFill, which in fact
35 |             # corresponds to kaiming_uniform_ in PyTorch
36 |             nn.init.kaiming_uniform_(conv.weight, a=1)
37 |             if norm is None:
38 |                 nn.init.constant_(conv.bias, 0)
39 |         module = [conv,]
40 |         if norm is not None and len(norm) > 0:
41 |             if norm == "GN":
42 |                 norm_module = nn.GroupNorm(32, out_channels)
43 |             else:
44 |                 norm_module = get_norm(norm, out_channels)
45 |             module.append(norm_module)
46 |         if activation is not None:
47 |             module.append(nn.ReLU(inplace=True))
48 |         if len(module) > 1:
49 |             return nn.Sequential(*module)
50 |         return conv
51 | 
52 |     return make_conv
53 | 


--------------------------------------------------------------------------------
/adet/layers/csrc/BezierAlign/BezierAlign.h:
--------------------------------------------------------------------------------
  1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
  2 | #pragma once
  3 | #include <torch/types.h>
  4 | 
  5 | namespace adet {
  6 | 
  7 | at::Tensor BezierAlign_forward_cpu(
  8 |     const at::Tensor& input,
  9 |     const at::Tensor& rois,
 10 |     const float spatial_scale,
 11 |     const int pooled_height,
 12 |     const int pooled_width,
 13 |     const int sampling_ratio,
 14 |     bool aligned);
 15 | 
 16 | at::Tensor BezierAlign_backward_cpu(
 17 |     const at::Tensor& grad,
 18 |     const at::Tensor& rois,
 19 |     const float spatial_scale,
 20 |     const int pooled_height,
 21 |     const int pooled_width,
 22 |     const int batch_size,
 23 |     const int channels,
 24 |     const int height,
 25 |     const int width,
 26 |     const int sampling_ratio,
 27 |     bool aligned);
 28 | 
 29 | #ifdef WITH_CUDA
 30 | at::Tensor BezierAlign_forward_cuda(
 31 |     const at::Tensor& input,
 32 |     const at::Tensor& rois,
 33 |     const float spatial_scale,
 34 |     const int pooled_height,
 35 |     const int pooled_width,
 36 |     const int sampling_ratio,
 37 |     bool aligned);
 38 | 
 39 | at::Tensor BezierAlign_backward_cuda(
 40 |     const at::Tensor& grad,
 41 |     const at::Tensor& rois,
 42 |     const float spatial_scale,
 43 |     const int pooled_height,
 44 |     const int pooled_width,
 45 |     const int batch_size,
 46 |     const int channels,
 47 |     const int height,
 48 |     const int width,
 49 |     const int sampling_ratio,
 50 |     bool aligned);
 51 | #endif
 52 | 
 53 | // Interface for Python
 54 | inline at::Tensor BezierAlign_forward(
 55 |     const at::Tensor& input,
 56 |     const at::Tensor& rois,
 57 |     const float spatial_scale,
 58 |     const int pooled_height,
 59 |     const int pooled_width,
 60 |     const int sampling_ratio,
 61 |     bool aligned) {
 62 |   if (input.type().is_cuda()) {
 63 | #ifdef WITH_CUDA
 64 |     return BezierAlign_forward_cuda(
 65 |         input,
 66 |         rois,
 67 |         spatial_scale,
 68 |         pooled_height,
 69 |         pooled_width,
 70 |         sampling_ratio,
 71 |         aligned);
 72 | #else
 73 |     AT_ERROR("Not compiled with GPU support");
 74 | #endif
 75 |   }
 76 |   return BezierAlign_forward_cpu(
 77 |       input,
 78 |       rois,
 79 |       spatial_scale,
 80 |       pooled_height,
 81 |       pooled_width,
 82 |       sampling_ratio,
 83 |       aligned);
 84 | }
 85 | 
 86 | inline at::Tensor BezierAlign_backward(
 87 |     const at::Tensor& grad,
 88 |     const at::Tensor& rois,
 89 |     const float spatial_scale,
 90 |     const int pooled_height,
 91 |     const int pooled_width,
 92 |     const int batch_size,
 93 |     const int channels,
 94 |     const int height,
 95 |     const int width,
 96 |     const int sampling_ratio,
 97 |     bool aligned) {
 98 |   if (grad.type().is_cuda()) {
 99 | #ifdef WITH_CUDA
100 |     return BezierAlign_backward_cuda(
101 |         grad,
102 |         rois,
103 |         spatial_scale,
104 |         pooled_height,
105 |         pooled_width,
106 |         batch_size,
107 |         channels,
108 |         height,
109 |         width,
110 |         sampling_ratio,
111 |         aligned);
112 | #else
113 |     AT_ERROR("Not compiled with GPU support");
114 | #endif
115 |   }
116 |   return BezierAlign_backward_cpu(
117 |       grad,
118 |       rois,
119 |       spatial_scale,
120 |       pooled_height,
121 |       pooled_width,
122 |       batch_size,
123 |       channels,
124 |       height,
125 |       width,
126 |       sampling_ratio,
127 |       aligned);
128 | }
129 | 
130 | } // namespace detectron2
131 | 


--------------------------------------------------------------------------------
/adet/layers/csrc/DefROIAlign/DefROIAlign.h:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | #include <torch/types.h>
  3 | 
  4 | namespace adet {
  5 | 
  6 | #ifdef WITH_CUDA
  7 | at::Tensor DefROIAlign_forward_cuda(
  8 |     const at::Tensor& input,
  9 |     const at::Tensor& rois,
 10 |     const at::Tensor& offsets,  // def added
 11 |     const float spatial_scale,
 12 |     const int pooled_height,
 13 |     const int pooled_width,
 14 |     const int sampling_ratio,
 15 |     const float trans_std,  // def added
 16 |     bool aligned);
 17 | 
 18 | at::Tensor DefROIAlign_backward_cuda(
 19 |     const at::Tensor& input,  // def added
 20 |     const at::Tensor& grad,
 21 |     const at::Tensor& rois,
 22 |     const at::Tensor& offsets,  // def added
 23 |     const at::Tensor& grad_offsets,  // def added
 24 |     const float spatial_scale,
 25 |     const int pooled_height,
 26 |     const int pooled_width,
 27 |     const int batch_size,
 28 |     const int channels,
 29 |     const int height,
 30 |     const int width,
 31 |     const int sampling_ratio,
 32 |     const float trans_std,  // def added
 33 |     bool aligned);
 34 | #endif
 35 | 
 36 | // Interface for Python
 37 | inline at::Tensor DefROIAlign_forward(
 38 |     const at::Tensor& input,
 39 |     const at::Tensor& rois,
 40 |     const at::Tensor& offsets,  // def added
 41 |     const float spatial_scale,
 42 |     const int pooled_height,
 43 |     const int pooled_width,
 44 |     const int sampling_ratio,
 45 |     const float trans_std,  // def added
 46 |     bool aligned) {
 47 |   if (input.type().is_cuda()) {
 48 | #ifdef WITH_CUDA
 49 |     return DefROIAlign_forward_cuda(
 50 |         input,
 51 |         rois,
 52 |         offsets,
 53 |         spatial_scale,
 54 |         pooled_height,
 55 |         pooled_width,
 56 |         sampling_ratio,
 57 |         trans_std,
 58 |         aligned);
 59 | #else
 60 |     AT_ERROR("Not compiled with GPU support");
 61 | #endif
 62 |   }
 63 |   AT_ERROR("CPU version not supported");
 64 | }
 65 | 
 66 | inline at::Tensor DefROIAlign_backward(
 67 |     const at::Tensor& input,  // def added
 68 |     const at::Tensor& grad,
 69 |     const at::Tensor& rois,
 70 |     const at::Tensor& offsets,  // def added
 71 |     const at::Tensor& grad_offsets,  // def added
 72 |     const float spatial_scale,
 73 |     const int pooled_height,
 74 |     const int pooled_width,
 75 |     const int batch_size,
 76 |     const int channels,
 77 |     const int height,
 78 |     const int width,
 79 |     const int sampling_ratio,
 80 |     const float trans_std,  // def added
 81 |     bool aligned) {
 82 |   if (grad.type().is_cuda()) {
 83 | #ifdef WITH_CUDA
 84 |     return DefROIAlign_backward_cuda(
 85 |         input,  // def added
 86 |         grad,
 87 |         rois,
 88 |         offsets,  // def added
 89 |         grad_offsets, // def added
 90 |         spatial_scale,
 91 |         pooled_height,
 92 |         pooled_width,
 93 |         batch_size,
 94 |         channels,
 95 |         height,
 96 |         width,
 97 |         sampling_ratio,
 98 |         trans_std, // def added
 99 |         aligned);
100 | #else
101 |     AT_ERROR("Not compiled with GPU support");
102 | #endif
103 |   }
104 |   AT_ERROR("CPU version not supported");
105 | }
106 | 
107 | } // namespace adet
108 | 


--------------------------------------------------------------------------------
/adet/layers/csrc/cuda_version.cu:
--------------------------------------------------------------------------------
1 | #include <cuda_runtime_api.h>
2 | 
3 | namespace adet {
4 | int get_cudart_version() {
5 |   return CUDART_VERSION;
6 | }
7 | } // namespace adet
8 | 


--------------------------------------------------------------------------------
/adet/layers/csrc/ml_nms/ml_nms.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include <torch/extension.h>
 3 | 
 4 | namespace adet {
 5 | 
 6 | 
 7 | #ifdef WITH_CUDA
 8 | at::Tensor ml_nms_cuda(
 9 |     const at::Tensor dets,
10 |     const float threshold);
11 | #endif
12 | 
13 | at::Tensor ml_nms(const at::Tensor& dets,
14 |                   const at::Tensor& scores,
15 |                   const at::Tensor& labels,
16 |                   const float threshold) {
17 | 
18 |   if (dets.type().is_cuda()) {
19 | #ifdef WITH_CUDA
20 |     // TODO raise error if not compiled with CUDA
21 |     if (dets.numel() == 0)
22 |       return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU));
23 |     auto b = at::cat({dets, scores.unsqueeze(1), labels.unsqueeze(1)}, 1);
24 |     return ml_nms_cuda(b, threshold);
25 | #else
26 |     AT_ERROR("Not compiled with GPU support");
27 | #endif
28 |   }
29 |   AT_ERROR("CPU version not implemented");
30 | }
31 | 
32 | } // namespace adet
33 | 


--------------------------------------------------------------------------------
/adet/layers/csrc/vision.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | 
 3 | #include "ml_nms/ml_nms.h"
 4 | #include "DefROIAlign/DefROIAlign.h"
 5 | #include "BezierAlign/BezierAlign.h"
 6 | 
 7 | namespace adet {
 8 | 
 9 | #ifdef WITH_CUDA
10 | extern int get_cudart_version();
11 | #endif
12 | 
13 | std::string get_cuda_version() {
14 | #ifdef WITH_CUDA
15 |   std::ostringstream oss;
16 | 
17 |   // copied from
18 |   // https://github.com/pytorch/pytorch/blob/master/aten/src/ATen/cuda/detail/CUDAHooks.cpp#L231
19 |   auto printCudaStyleVersion = [&](int v) {
20 |     oss << (v / 1000) << "." << (v / 10 % 100);
21 |     if (v % 10 != 0) {
22 |       oss << "." << (v % 10);
23 |     }
24 |   };
25 |   printCudaStyleVersion(get_cudart_version());
26 |   return oss.str();
27 | #else
28 |   return std::string("not available");
29 | #endif
30 | }
31 | 
32 | // similar to
33 | // https://github.com/pytorch/pytorch/blob/master/aten/src/ATen/Version.cpp
34 | std::string get_compiler_version() {
35 |   std::ostringstream ss;
36 | #if defined(__GNUC__)
37 | #ifndef __clang__
38 |   { ss << "GCC " << __GNUC__ << "." << __GNUC_MINOR__; }
39 | #endif
40 | #endif
41 | 
42 | #if defined(__clang_major__)
43 |   {
44 |     ss << "clang " << __clang_major__ << "." << __clang_minor__ << "."
45 |        << __clang_patchlevel__;
46 |   }
47 | #endif
48 | 
49 | #if defined(_MSC_VER)
50 |   { ss << "MSVC " << _MSC_FULL_VER; }
51 | #endif
52 |   return ss.str();
53 | }
54 | 
55 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
56 |   m.def("ml_nms", &ml_nms, "Multi-Label NMS");
57 |   m.def("def_roi_align_forward", &DefROIAlign_forward, "def_roi_align_forward");
58 |   m.def("def_roi_align_backward", &DefROIAlign_backward, "def_roi_align_backward");
59 |   m.def("bezier_align_forward", &BezierAlign_forward, "bezier_align_forward");
60 |   m.def("bezier_align_backward", &BezierAlign_backward, "bezier_align_backward");
61 | }
62 | 
63 | } // namespace adet
64 | 


--------------------------------------------------------------------------------
/adet/layers/def_roi_align.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torch import nn
  3 | from torch.autograd import Function
  4 | from torch.autograd.function import once_differentiable
  5 | from torch.nn.modules.utils import _pair
  6 | 
  7 | from adet import _C
  8 | 
  9 | 
 10 | class _DefROIAlign(Function):
 11 |     @staticmethod
 12 |     def forward(ctx, input, roi, offsets, output_size, spatial_scale, sampling_ratio, trans_std, aligned):
 13 |         ctx.save_for_backward(input, roi, offsets)
 14 |         ctx.output_size = _pair(output_size)
 15 |         ctx.spatial_scale = spatial_scale
 16 |         ctx.sampling_ratio = sampling_ratio
 17 |         ctx.trans_std = trans_std
 18 |         ctx.input_shape = input.size()
 19 |         ctx.aligned = aligned
 20 |         output = _C.def_roi_align_forward(
 21 |             input, roi, offsets, spatial_scale, output_size[0], output_size[1],
 22 |             sampling_ratio, trans_std, aligned
 23 |         )
 24 |         return output
 25 | 
 26 |     @staticmethod
 27 |     @once_differentiable
 28 |     def backward(ctx, grad_output):
 29 |         data, rois, offsets = ctx.saved_tensors
 30 |         output_size = ctx.output_size
 31 |         spatial_scale = ctx.spatial_scale
 32 |         sampling_ratio = ctx.sampling_ratio
 33 |         trans_std = ctx.trans_std
 34 |         bs, ch, h, w = ctx.input_shape
 35 |         grad_offsets = torch.zeros_like(offsets)
 36 | 
 37 |         grad_input = _C.def_roi_align_backward(
 38 |             data,
 39 |             grad_output,
 40 |             rois,
 41 |             offsets,
 42 |             grad_offsets,
 43 |             spatial_scale,
 44 |             output_size[0],
 45 |             output_size[1],
 46 |             bs,
 47 |             ch,
 48 |             h,
 49 |             w,
 50 |             sampling_ratio,
 51 |             trans_std,
 52 |             ctx.aligned,
 53 |         )
 54 |         return grad_input, None, grad_offsets, None, None, None, None, None
 55 | 
 56 | 
 57 | def_roi_align = _DefROIAlign.apply
 58 | 
 59 | 
 60 | class DefROIAlign(nn.Module):
 61 |     def __init__(self, output_size, spatial_scale,
 62 |                  sampling_ratio, trans_std, aligned=True):
 63 |         """
 64 |         Args:
 65 |             output_size (tuple): h, w
 66 |             spatial_scale (float): scale the input boxes by this number
 67 |             sampling_ratio (int): number of inputs samples to take for each output
 68 |                 sample. 0 to take samples densely.
 69 |             trans_std (float): offset scale according to the normalized roi size
 70 |             aligned (bool): if False, use the legacy implementation in
 71 |                 Detectron. If True, align the results more perfectly.
 72 |         """
 73 |         super(DefROIAlign, self).__init__()
 74 |         self.output_size = output_size
 75 |         self.spatial_scale = spatial_scale
 76 |         self.sampling_ratio = sampling_ratio
 77 |         self.trans_std = trans_std
 78 |         self.aligned = aligned
 79 | 
 80 |     def forward(self, input, rois, offsets):
 81 |         """
 82 |         Args:
 83 |             input: NCHW images
 84 |             rois: Bx5 boxes. First column is the index into N. The other 4 columns are xyxy.
 85 |         """
 86 |         assert rois.dim() == 2 and rois.size(1) == 5
 87 |         return def_roi_align(
 88 |             input, rois, offsets, self.output_size,
 89 |             self.spatial_scale, self.sampling_ratio,
 90 |             self.trans_std, self.aligned
 91 |         )
 92 | 
 93 |     def __repr__(self):
 94 |         tmpstr = self.__class__.__name__ + "("
 95 |         tmpstr += "output_size=" + str(self.output_size)
 96 |         tmpstr += ", spatial_scale=" + str(self.spatial_scale)
 97 |         tmpstr += ", sampling_ratio=" + str(self.sampling_ratio)
 98 |         tmpstr += ", trans_std=" + str(self.trans_std)
 99 |         tmpstr += ", aligned=" + str(self.aligned)
100 |         tmpstr += ")"
101 |         return tmpstr
102 | 


--------------------------------------------------------------------------------
/adet/layers/deform_conv.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torch import nn
  3 | 
  4 | from detectron2.layers import Conv2d
  5 | 
  6 | 
  7 | class _NewEmptyTensorOp(torch.autograd.Function):
  8 |     @staticmethod
  9 |     def forward(ctx, x, new_shape):
 10 |         ctx.shape = x.shape
 11 |         return x.new_empty(new_shape)
 12 | 
 13 |     @staticmethod
 14 |     def backward(ctx, grad):
 15 |         shape = ctx.shape
 16 |         return _NewEmptyTensorOp.apply(grad, shape), None
 17 | 
 18 | 
 19 | class DFConv2d(nn.Module):
 20 |     """
 21 |     Deformable convolutional layer with configurable
 22 |     deformable groups, dilations and groups.
 23 | 
 24 |     Code is from:
 25 |     https://github.com/facebookresearch/maskrcnn-benchmark/blob/master/maskrcnn_benchmark/layers/misc.py
 26 | 
 27 | 
 28 |     """
 29 |     def __init__(
 30 |             self,
 31 |             in_channels,
 32 |             out_channels,
 33 |             with_modulated_dcn=True,
 34 |             kernel_size=3,
 35 |             stride=1,
 36 |             groups=1,
 37 |             dilation=1,
 38 |             deformable_groups=1,
 39 |             bias=False,
 40 |             padding=None
 41 |     ):
 42 |         super(DFConv2d, self).__init__()
 43 |         if isinstance(kernel_size, (list, tuple)):
 44 |             assert isinstance(stride, (list, tuple))
 45 |             assert isinstance(dilation, (list, tuple))
 46 |             assert len(kernel_size) == 2
 47 |             assert len(stride) == 2
 48 |             assert len(dilation) == 2
 49 |             padding = (
 50 |                 dilation[0] * (kernel_size[0] - 1) // 2,
 51 |                 dilation[1] * (kernel_size[1] - 1) // 2
 52 |             )
 53 |             offset_base_channels = kernel_size[0] * kernel_size[1]
 54 |         else:
 55 |             padding = dilation * (kernel_size - 1) // 2
 56 |             offset_base_channels = kernel_size * kernel_size
 57 |         if with_modulated_dcn:
 58 |             from detectron2.layers.deform_conv import ModulatedDeformConv
 59 |             offset_channels = offset_base_channels * 3  # default: 27
 60 |             conv_block = ModulatedDeformConv
 61 |         else:
 62 |             from detectron2.layers.deform_conv import DeformConv
 63 |             offset_channels = offset_base_channels * 2  # default: 18
 64 |             conv_block = DeformConv
 65 |         self.offset = Conv2d(
 66 |             in_channels,
 67 |             deformable_groups * offset_channels,
 68 |             kernel_size=kernel_size,
 69 |             stride=stride,
 70 |             padding=padding,
 71 |             groups=1,
 72 |             dilation=dilation
 73 |         )
 74 |         for l in [self.offset, ]:
 75 |             nn.init.kaiming_uniform_(l.weight, a=1)
 76 |             torch.nn.init.constant_(l.bias, 0.)
 77 |         self.conv = conv_block(
 78 |             in_channels,
 79 |             out_channels,
 80 |             kernel_size=kernel_size,
 81 |             stride=stride,
 82 |             padding=padding,
 83 |             dilation=dilation,
 84 |             groups=groups,
 85 |             deformable_groups=deformable_groups,
 86 |             bias=bias
 87 |         )
 88 |         self.with_modulated_dcn = with_modulated_dcn
 89 |         self.kernel_size = kernel_size
 90 |         self.stride = stride
 91 |         self.padding = padding
 92 |         self.dilation = dilation
 93 |         self.offset_split = offset_base_channels * deformable_groups * 2
 94 | 
 95 |     def forward(self, x, return_offset=False):
 96 |         if x.numel() > 0:
 97 |             if not self.with_modulated_dcn:
 98 |                 offset_mask = self.offset(x)
 99 |                 x = self.conv(x, offset_mask)
100 |             else:
101 |                 offset_mask = self.offset(x)
102 |                 offset = offset_mask[:, :self.offset_split, :, :]
103 |                 mask = offset_mask[:, self.offset_split:, :, :].sigmoid()
104 |                 x = self.conv(x, offset, mask)
105 |             if return_offset:
106 |                 return x, offset_mask
107 |             return x
108 |         # get output shape
109 |         output_shape = [
110 |             (i + 2 * p - (di * (k - 1) + 1)) // d + 1
111 |             for i, p, di, k, d in zip(
112 |                 x.shape[-2:],
113 |                 self.padding,
114 |                 self.dilation,
115 |                 self.kernel_size,
116 |                 self.stride
117 |             )
118 |         ]
119 |         output_shape = [x.shape[0], self.conv.weight.shape[0]] + output_shape
120 |         return _NewEmptyTensorOp.apply(x, output_shape)
121 | 


--------------------------------------------------------------------------------
/adet/layers/gcn.py:
--------------------------------------------------------------------------------
 1 | # coding:utf-8
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | 
 6 | class Conv2D(nn.Module):
 7 |     def __init__(self, in_channels, out_channels, kernel_size, padding='same',
 8 |                  stride=1, dilation=1, groups=1):
 9 |         super(Conv2D, self).__init__()
10 | 
11 |         assert type(kernel_size) in [int, tuple], "Allowed kernel type [int or tuple], not {}".format(type(kernel_size))
12 |         assert padding == 'same', "Allowed padding type {}, not {}".format('same', padding)
13 | 
14 |         self.kernel_size = kernel_size
15 |         if isinstance(kernel_size, tuple):
16 |             self.h_kernel = kernel_size[0]
17 |             self.w_kernel = kernel_size[1]
18 |         else:
19 |             self.h_kernel = kernel_size
20 |             self.w_kernel = kernel_size
21 | 
22 |         self.padding = padding
23 |         self.stride = stride
24 |         self.dilation = dilation
25 |         self.groups = groups
26 |         self.conv = nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size,
27 |                               stride=self.stride, dilation=self.dilation, groups=self.groups)
28 | 
29 |     def forward(self, x):
30 | 
31 |         if self.padding == 'same':
32 | 
33 |             height, width = x.shape[2:]
34 | 
35 |             h_pad_need = max(0, (height - 1) * self.stride + self.h_kernel - height)
36 |             w_pad_need = max(0, (width - 1) * self.stride + self.w_kernel - width)
37 | 
38 |             pad_left = w_pad_need // 2
39 |             pad_right = w_pad_need - pad_left
40 |             pad_top = h_pad_need // 2
41 |             pad_bottom = h_pad_need - pad_top
42 | 
43 |             padding = (pad_left, pad_right, pad_top, pad_bottom)
44 | 
45 |             x = F.pad(x, padding, 'constant', 0)
46 | 
47 |         x = self.conv(x)
48 | 
49 |         return x
50 | 
51 | 
52 | class GCN(nn.Module):
53 |     """
54 |         Large Kernel Matters -- https://arxiv.org/abs/1703.02719
55 |     """
56 |     def __init__(self, in_channels, out_channels, k=3):
57 |         super(GCN, self).__init__()
58 | 
59 |         self.conv_l1 = Conv2D(in_channels=in_channels, out_channels=out_channels, kernel_size=(k, 1), padding='same')
60 |         self.conv_l2 = Conv2D(in_channels=out_channels, out_channels=out_channels, kernel_size=(1, k), padding='same')
61 | 
62 |         self.conv_r1 = Conv2D(in_channels=in_channels, out_channels=out_channels, kernel_size=(1, k), padding='same')
63 |         self.conv_r2 = Conv2D(in_channels=out_channels, out_channels=out_channels, kernel_size=(k, 1), padding='same')
64 | 
65 |     def forward(self, x):
66 |         x1 = self.conv_l1(x)
67 |         x1 = self.conv_l2(x1)
68 | 
69 |         x2 = self.conv_r1(x)
70 |         x2 = self.conv_r2(x2)
71 | 
72 |         out = x1 + x2
73 | 
74 |         return out
75 | 


--------------------------------------------------------------------------------
/adet/layers/iou_loss.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn
 3 | 
 4 | 
 5 | class IOULoss(nn.Module):
 6 |     """
 7 |     Intersetion Over Union (IoU) loss which supports three
 8 |     different IoU computations:
 9 | 
10 |     * IoU
11 |     * Linear IoU
12 |     * gIoU
13 |     """
14 |     def __init__(self, loc_loss_type='iou'):
15 |         super(IOULoss, self).__init__()
16 |         self.loc_loss_type = loc_loss_type
17 | 
18 |     def forward(self, ious, gious=None, weight=None):
19 |         if self.loc_loss_type == 'iou':
20 |             losses = -torch.log(ious)
21 |         elif self.loc_loss_type == 'linear_iou':
22 |             losses = 1 - ious
23 |         elif self.loc_loss_type == 'giou':
24 |             assert gious is not None
25 |             losses = 1 - gious
26 |         else:
27 |             raise NotImplementedError
28 | 
29 |         if weight is not None:
30 |             return (losses * weight).sum()
31 |         else:
32 |             return losses.sum()
33 | 


--------------------------------------------------------------------------------
/adet/layers/ml_nms.py:
--------------------------------------------------------------------------------
 1 | from detectron2.layers import batched_nms
 2 | 
 3 | 
 4 | def ml_nms(boxlist, nms_thresh, max_proposals=-1,
 5 |            score_field="scores", label_field="labels"):
 6 |     """
 7 |     Performs non-maximum suppression on a boxlist, with scores specified
 8 |     in a boxlist field via score_field.
 9 |     
10 |     Args:
11 |         boxlist (detectron2.structures.Boxes): 
12 |         nms_thresh (float): 
13 |         max_proposals (int): if > 0, then only the top max_proposals are kept
14 |             after non-maximum suppression
15 |         score_field (str): 
16 |     """
17 |     if nms_thresh <= 0:
18 |         return boxlist
19 |     boxes = boxlist.pred_boxes.tensor
20 |     scores = boxlist.scores
21 |     labels = boxlist.pred_classes
22 |     keep = batched_nms(boxes, scores, labels, nms_thresh)
23 |     if max_proposals > 0:
24 |         keep = keep[: max_proposals]
25 |     boxlist = boxlist[keep]
26 |     return boxlist
27 | 


--------------------------------------------------------------------------------
/adet/layers/naive_group_norm.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.nn import Module, Parameter
 3 | from torch.nn import init
 4 | 
 5 | 
 6 | class NaiveGroupNorm(Module):
 7 |     r"""NaiveGroupNorm implements Group Normalization with the high-level matrix operations in PyTorch.
 8 |     It is a temporary solution to export GN by ONNX before the official GN can be exported by ONNX.
 9 |     The usage of NaiveGroupNorm is exactly the same as the official :class:`torch.nn.GroupNorm`.
10 |     Args:
11 |         num_groups (int): number of groups to separate the channels into
12 |         num_channels (int): number of channels expected in input
13 |         eps: a value added to the denominator for numerical stability. Default: 1e-5
14 |         affine: a boolean value that when set to ``True``, this module
15 |             has learnable per-channel affine parameters initialized to ones (for weights)
16 |             and zeros (for biases). Default: ``True``.
17 | 
18 |     Shape:
19 |         - Input: :math:`(N, C, *)` where :math:`C=\text{num\_channels}`
20 |         - Output: :math:`(N, C, *)` (same shape as input)
21 | 
22 |     Examples::
23 | 
24 |         >>> input = torch.randn(20, 6, 10, 10)
25 |         >>> # Separate 6 channels into 3 groups
26 |         >>> m = NaiveGroupNorm(3, 6)
27 |         >>> # Separate 6 channels into 6 groups (equivalent with InstanceNorm)
28 |         >>> m = NaiveGroupNorm(6, 6)
29 |         >>> # Put all 6 channels into a single group (equivalent with LayerNorm)
30 |         >>> m = NaiveGroupNorm(1, 6)
31 |         >>> # Activating the module
32 |         >>> output = m(input)
33 | 
34 |     .. _`Group Normalization`: https://arxiv.org/abs/1803.08494
35 |     """
36 |     __constants__ = ['num_groups', 'num_channels', 'eps', 'affine', 'weight',
37 |                      'bias']
38 | 
39 |     def __init__(self, num_groups, num_channels, eps=1e-5, affine=True):
40 |         super(NaiveGroupNorm, self).__init__()
41 |         self.num_groups = num_groups
42 |         self.num_channels = num_channels
43 |         self.eps = eps
44 |         self.affine = affine
45 |         if self.affine:
46 |             self.weight = Parameter(torch.Tensor(num_channels))
47 |             self.bias = Parameter(torch.Tensor(num_channels))
48 |         else:
49 |             self.register_parameter('weight', None)
50 |             self.register_parameter('bias', None)
51 |         self.reset_parameters()
52 | 
53 |     def reset_parameters(self):
54 |         if self.affine:
55 |             init.ones_(self.weight)
56 |             init.zeros_(self.bias)
57 | 
58 |     def forward(self, input):
59 |         N, C, H, W = input.size()
60 |         assert C % self.num_groups == 0
61 |         input = input.reshape(N, self.num_groups, -1)
62 |         mean = input.mean(dim=-1, keepdim=True)
63 |         var = (input ** 2).mean(dim=-1, keepdim=True) - mean ** 2
64 |         std = torch.sqrt(var + self.eps)
65 | 
66 |         input = (input - mean) / std
67 |         input = input.reshape(N, C, H, W)
68 |         if self.affine:
69 |             input = input * self.weight.reshape(1, C, 1, 1) + self.bias.reshape(1, C, 1, 1)
70 |         return input
71 | 
72 |     def extra_repr(self):
73 |         return '{num_groups}, {num_channels}, eps={eps}, ' \
74 |             'affine={affine}'.format(**self.__dict__)
75 | 


--------------------------------------------------------------------------------
/adet/modeling/MEInst/LME/MaskLoader.py:
--------------------------------------------------------------------------------
 1 | # coding:utf-8
 2 | 
 3 | import os
 4 | import json
 5 | import numpy as np
 6 | 
 7 | import torch.utils.data as data
 8 | 
 9 | from detectron2.structures import (
10 |     Boxes,
11 |     PolygonMasks,
12 |     BoxMode
13 | )
14 | 
15 | 
16 | DATASETS = {
17 |         "coco_2017_train": {
18 |             "img_dir": "coco/train2017",
19 |             "ann_file": "coco/annotations/instances_train2017.json"
20 |         },
21 |         "coco_2017_val": {
22 |             "img_dir": "coco/val2017",
23 |             "ann_file": "coco/annotations/instances_val2017.json"
24 |         }
25 | }
26 | 
27 | 
28 | class MaskLoader(data.Dataset):
29 |     """
30 |     Dataloader for Local Mask.
31 | 
32 |     Arguments:
33 |         root (string): filepath to dataset folder.
34 |         dataset (string): mask to use (eg. 'train', 'val').
35 |         size (tuple): The size used for train/val (height, width).
36 |         transform (callable, optional): transformation to perform on the input mask.
37 | 
38 |     """
39 | 
40 |     def __init__(self, root="datasets", dataset="coco_2017_train", size=28, transform=False):
41 |         self.root = root
42 |         self.dataset = dataset
43 |         self.transform = transform
44 | 
45 |         if isinstance(size, int):
46 |             self.size = size
47 |         else:
48 |             raise TypeError
49 | 
50 |         data_info = DATASETS[dataset]
51 |         img_dir, ann_file = data_info['img_dir'], data_info['ann_file']
52 |         img_dir = os.path.join(self.root, img_dir)  # actually we do not use it.
53 |         ann_file = os.path.join(self.root, ann_file)
54 | 
55 |         with open(ann_file, 'r') as f:
56 |             anns = json.load(f)
57 |         anns = anns['annotations']
58 |         coco = list()
59 |         for ann in anns:
60 |             if ann.get('iscrowd', 0) == 0:
61 |                 coco.append(ann)
62 |         self.coco = coco
63 |         print("Removed {} images with no usable annotations. {} images left.".format(
64 |               len(anns) - len(self.coco), len(self.coco)))
65 | 
66 |     def __len__(self):
67 |         return len(self.coco)
68 | 
69 |     def __getitem__(self, index):
70 |         ann = self.coco[index]
71 | 
72 |         # bbox transform.
73 |         bbox = np.array([ann["bbox"]])  # xmin, ymin, w, h
74 |         bbox = BoxMode.convert(bbox, BoxMode.XYWH_ABS, BoxMode.XYXY_ABS)  # x1y1x2y2
75 |         bbox = Boxes(bbox)
76 | 
77 |         # mask transform.
78 |         mask = PolygonMasks([ann["segmentation"]])
79 |         mask = mask.crop_and_resize(bbox.tensor, self.size).float()
80 | 
81 |         return mask
82 | 


--------------------------------------------------------------------------------
/adet/modeling/MEInst/LME/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
2 | from .MaskLoader import MaskLoader
3 | from .utils import inverse_sigmoid, direct_sigmoid, IOUMetric, transform, inverse_transform
4 | 
5 | __all__ = ["MaskLoader", "IOUMetric",
6 |            "inverse_sigmoid", "direct_sigmoid",
7 |            "transform", "inverse_transform"]
8 | 


--------------------------------------------------------------------------------
/adet/modeling/MEInst/LME/mask_evaluation.py:
--------------------------------------------------------------------------------
  1 | # coding:utf-8
  2 | 
  3 | import os
  4 | import argparse
  5 | import numpy as np
  6 | from torch.utils.data import DataLoader
  7 | 
  8 | from MaskLoader import MaskLoader
  9 | from utils import (
 10 |     IOUMetric,
 11 |     transform,
 12 |     inverse_transform,
 13 |     direct_sigmoid,
 14 |     inverse_sigmoid
 15 | )
 16 | 
 17 | 
 18 | VALUE_MAX = 0.05
 19 | VALUE_MIN = 0.01
 20 | 
 21 | 
 22 | def parse_args():
 23 |     parser = argparse.ArgumentParser(description='Evaluation for PCA Mask Encoding.')
 24 |     parser.add_argument('--root', default='datasets', type=str)
 25 |     parser.add_argument('--dataset', default='coco_2017_train', type=str)
 26 |     parser.add_argument('--matrix', default='coco/components/coco_2017_train'
 27 |                         '_class_agnosticTrue_whitenTrue_sigmoidTrue_60.npz', type=str)
 28 |     # mask encoding params.
 29 |     parser.add_argument('--mask_size', default=28, type=int)
 30 |     parser.add_argument('--n_components', default=60, type=int)
 31 |     parser.add_argument('--class_agnostic', default=True, type=bool)
 32 |     parser.add_argument('--whiten', default=True, type=bool)
 33 |     parser.add_argument('--sigmoid', default=True, type=bool)
 34 |     parser.add_argument('--batch-size', default=1024, type=int)
 35 |     args = parser.parse_args()
 36 |     return args
 37 | 
 38 | 
 39 | if __name__ == "__main__":
 40 |     args = parse_args()
 41 |     # parse args.
 42 |     mask_size = args.mask_size
 43 |     n_components = args.n_components
 44 |     class_agnostic = args.class_agnostic
 45 |     whiten = args.whiten
 46 |     sigmoid = args.sigmoid
 47 | 
 48 |     cur_path = os.path.abspath(os.path.dirname(__file__))
 49 |     root_path = cur_path[:cur_path.find("AdelaiDet") + len("AdelaiDet")]
 50 |     dataset_root = os.path.join(root_path, args.root)
 51 |     matrix_path = os.path.join(dataset_root, args.matrix)
 52 | 
 53 |     # load matrix.
 54 |     print("Loading matrix parameters: {}".format(matrix_path))
 55 |     parameters = np.load(matrix_path)
 56 |     components_c = parameters['components_c']
 57 |     mean_c = parameters['mean_c']
 58 |     ratio_c = parameters['ratio_c']
 59 |     explained_variance_c = parameters['explained_variance_c']
 60 |     if class_agnostic:
 61 |         components_c = np.squeeze(components_c)
 62 |         mean_c = np.squeeze(mean_c)
 63 |         explained_variance_c = np.squeeze(explained_variance_c)
 64 |         assert n_components == components_c.shape[0], \
 65 |             print("The n_components in component_ must equal to the supposed shape.")
 66 |     else:
 67 |         # TODO: We have not achieve the function in class-specific.
 68 |         raise NotImplementedError
 69 | 
 70 |     # build data loader.
 71 |     mask_data = MaskLoader(root=dataset_root, dataset=args.dataset, size=mask_size)
 72 |     mask_loader = DataLoader(mask_data, batch_size=args.batch_size, shuffle=False, num_workers=4)
 73 |     size_data = len(mask_loader)
 74 | 
 75 |     # evaluation.
 76 |     IoUevaluate = IOUMetric(2)
 77 |     print("Start Eva ...")
 78 |     for i, masks in enumerate(mask_loader):
 79 |         print("Eva [{} / {}]".format(i, size_data))
 80 |         # generate the reconstruction mask.
 81 |         masks = masks.view(masks.shape[0], -1).numpy()
 82 |         masks = masks.astype(np.float32)
 83 |         # pre-process.
 84 |         if sigmoid:
 85 |             value_random = VALUE_MAX * np.random.rand(masks.shape[0], masks.shape[1])
 86 |             value_random = np.maximum(value_random, VALUE_MIN)
 87 |             masks_random = np.where(masks > value_random, 1 - value_random, value_random)
 88 |             masks_random = inverse_sigmoid(masks_random)
 89 |         else:
 90 |             masks_random = masks
 91 |         # --> encode --> decode.
 92 |         mask_rc = transform(masks_random, components_=components_c, explained_variance_=explained_variance_c,
 93 |                             mean_=mean_c, whiten=whiten)
 94 |         mask_rc = inverse_transform(mask_rc, components_=components_c, explained_variance_=explained_variance_c,
 95 |                                     mean_=mean_c, whiten=whiten)
 96 |         # post-process.
 97 |         if sigmoid:
 98 |             mask_rc = direct_sigmoid(mask_rc)
 99 |         # eva.
100 |         mask_rc = np.where(mask_rc >= 0.5, 1, 0)
101 |         IoUevaluate.add_batch(mask_rc, masks)
102 | 
103 |     _, _, _, mean_iu, _ = IoUevaluate.evaluate()
104 |     print("The mIoU for {}: {}".format(args.matrix, mean_iu))
105 | 


--------------------------------------------------------------------------------
/adet/modeling/MEInst/LME/utils.py:
--------------------------------------------------------------------------------
  1 | # coding:utf-8
  2 | 
  3 | import numpy as np
  4 | 
  5 | 
  6 | def direct_sigmoid(x):
  7 |     """Apply the sigmoid operation.
  8 |     """
  9 |     y = 1./(1.+1./np.exp(x))
 10 |     dy = y*(1-y)
 11 |     return y
 12 | 
 13 | 
 14 | def inverse_sigmoid(x):
 15 |     """Apply the inverse sigmoid operation.
 16 |             y = -ln(1-x/x)
 17 |     """
 18 |     y = -1 * np.log((1-x)/x)
 19 |     return y
 20 | 
 21 | 
 22 | def transform(X, components_, explained_variance_, mean_=None, whiten=False):
 23 |     """Apply dimensionality reduction to X.
 24 |     X is projected on the first principal components previously extracted
 25 |     from a training set.
 26 |     Parameters
 27 |     ----------
 28 |     X: array-like, shape (n_samples, n_features)
 29 |         New data, where n_samples is the number of samples
 30 |         and n_features is the number of features.
 31 |     components_: array-like, shape (n_components, n_features)
 32 |     mean_: array-like, shape (n_features,)
 33 |     explained_variance_: array-like, shape (n_components,)
 34 |                         Variance explained by each of the selected components.
 35 |     whiten : bool, optional
 36 |         When True (False by default) the ``components_`` vectors are divided
 37 |         by ``n_samples`` times ``components_`` to ensure uncorrelated outputs
 38 |         with unit component-wise variances.
 39 |         Whitening will remove some information from the transformed signal
 40 |         (the relative variance scales of the components) but can sometimes
 41 |         improve the predictive accuracy of the downstream estimators by
 42 |         making data respect some hard-wired assumptions.
 43 |     Returns
 44 |     -------
 45 |     X_new : array-like, shape (n_samples, n_components)
 46 |     """
 47 | 
 48 |     if mean_ is not None:
 49 |         X = X - mean_
 50 |     X_transformed = np.dot(X, components_.T)
 51 |     if whiten:
 52 |         X_transformed /= np.sqrt(explained_variance_)
 53 |     return X_transformed
 54 | 
 55 | 
 56 | def inverse_transform(X, components_, explained_variance_, mean_=None, whiten=False):
 57 |     """Transform data back to its original space.
 58 |     In other words, return an input X_original whose transform would be X.
 59 |     Parameters
 60 |     ----------
 61 |     X : array-like, shape (n_samples, n_components)
 62 |         New data, where n_samples is the number of samples
 63 |         and n_components is the number of components.
 64 |     components_: array-like, shape (n_components, n_features)
 65 |     mean_: array-like, shape (n_features,)
 66 |     explained_variance_: array-like, shape (n_components,)
 67 |                         Variance explained by each of the selected components.
 68 |     whiten : bool, optional
 69 |         When True (False by default) the ``components_`` vectors are divided
 70 |         by ``n_samples`` times ``components_`` to ensure uncorrelated outputs
 71 |         with unit component-wise variances.
 72 |         Whitening will remove some information from the transformed signal
 73 |         (the relative variance scales of the components) but can sometimes
 74 |         improve the predictive accuracy of the downstream estimators by
 75 |         making data respect some hard-wired assumptions.
 76 | 
 77 |     Returns
 78 |     -------
 79 |     X_original array-like, shape (n_samples, n_features)
 80 |     """
 81 |     if whiten:
 82 |         X_transformed = np.dot(X, np.sqrt(explained_variance_[:, np.newaxis]) * components_)
 83 |     else:
 84 |         X_transformed = np.dot(X, components_)
 85 | 
 86 |     if mean_ is not None:
 87 |         X_transformed = X_transformed + mean_
 88 | 
 89 |     return X_transformed
 90 | 
 91 | 
 92 | class IOUMetric(object):
 93 |     """
 94 |     Class to calculate mean-iou using fast_hist method
 95 |     """
 96 | 
 97 |     def __init__(self, num_classes):
 98 |         self.num_classes = num_classes
 99 |         self.hist = np.zeros((num_classes, num_classes))
100 | 
101 |     def _fast_hist(self, label_pred, label_true):
102 |         mask = (label_true >= 0) & (label_true < self.num_classes)
103 |         hist = np.bincount(
104 |             self.num_classes * label_true[mask].astype(int) +
105 |             label_pred[mask], minlength=self.num_classes ** 2).reshape(self.num_classes, self.num_classes)
106 |         return hist
107 | 
108 |     def add_batch(self, predictions, gts):
109 |         for lp, lt in zip(predictions, gts):
110 |             self.hist += self._fast_hist(lp.flatten(), lt.flatten())
111 | 
112 |     def evaluate(self):
113 |         acc = np.diag(self.hist).sum() / self.hist.sum()
114 |         acc_cls = np.diag(self.hist) / self.hist.sum(axis=1)
115 |         acc_cls = np.nanmean(acc_cls)
116 |         iu = np.diag(self.hist) / (self.hist.sum(axis=1) + self.hist.sum(axis=0) - np.diag(self.hist))
117 |         mean_iu = np.nanmean(iu)
118 |         freq = self.hist.sum(axis=1) / self.hist.sum()
119 |         fwavacc = (freq[freq > 0] * iu[freq > 0]).sum()
120 |         return acc, acc_cls, iu, mean_iu, fwavacc


--------------------------------------------------------------------------------
/adet/modeling/MEInst/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
2 | from .MEInst import MEInst
3 | from .MaskEncoding import PCAMaskEncoding
4 | 


--------------------------------------------------------------------------------
/adet/modeling/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | from .fcos import FCOS
 3 | from .blendmask import BlendMask
 4 | from .backbone import build_fcos_resnet_fpn_backbone
 5 | from .one_stage_detector import OneStageDetector, OneStageRCNN
 6 | from .roi_heads.text_head import TextHead
 7 | from .batext import BAText
 8 | from .MEInst import MEInst
 9 | from .condinst import condinst
10 | from .solov2 import SOLOv2
11 | from .fcpose import FCPose
12 | 
13 | _EXCLUDE = {"torch", "ShapeSpec"}
14 | __all__ = [k for k in globals().keys() if k not in _EXCLUDE and not k.startswith("_")]
15 | 


--------------------------------------------------------------------------------
/adet/modeling/backbone/__init__.py:
--------------------------------------------------------------------------------
1 | from .fpn import build_fcos_resnet_fpn_backbone
2 | from .vovnet import build_vovnet_fpn_backbone, build_vovnet_backbone
3 | from .dla import build_fcos_dla_fpn_backbone
4 | from .resnet_lpf import build_resnet_lpf_backbone
5 | from .bifpn import build_fcos_resnet_bifpn_backbone
6 | 


--------------------------------------------------------------------------------
/adet/modeling/backbone/fpn.py:
--------------------------------------------------------------------------------
 1 | from torch import nn
 2 | import torch.nn.functional as F
 3 | import fvcore.nn.weight_init as weight_init
 4 | 
 5 | from detectron2.modeling.backbone import FPN, build_resnet_backbone
 6 | from detectron2.layers import ShapeSpec
 7 | from detectron2.modeling.backbone.build import BACKBONE_REGISTRY
 8 | 
 9 | from .resnet_lpf import build_resnet_lpf_backbone
10 | from .resnet_interval import build_resnet_interval_backbone
11 | from .mobilenet import build_mnv2_backbone
12 | 
13 | 
14 | class LastLevelP6P7(nn.Module):
15 |     """
16 |     This module is used in RetinaNet and FCOS to generate extra layers, P6 and P7 from
17 |     C5 or P5 feature.
18 |     """
19 | 
20 |     def __init__(self, in_channels, out_channels, in_features="res5"):
21 |         super().__init__()
22 |         self.num_levels = 2
23 |         self.in_feature = in_features
24 |         self.p6 = nn.Conv2d(in_channels, out_channels, 3, 2, 1)
25 |         self.p7 = nn.Conv2d(out_channels, out_channels, 3, 2, 1)
26 |         for module in [self.p6, self.p7]:
27 |             weight_init.c2_xavier_fill(module)
28 | 
29 |     def forward(self, x):
30 |         p6 = self.p6(x)
31 |         p7 = self.p7(F.relu(p6))
32 |         return [p6, p7]
33 | 
34 | 
35 | class LastLevelP6(nn.Module):
36 |     """
37 |     This module is used in FCOS to generate extra layers
38 |     """
39 | 
40 |     def __init__(self, in_channels, out_channels, in_features="res5"):
41 |         super().__init__()
42 |         self.num_levels = 1
43 |         self.in_feature = in_features
44 |         self.p6 = nn.Conv2d(in_channels, out_channels, 3, 2, 1)
45 |         for module in [self.p6]:
46 |             weight_init.c2_xavier_fill(module)
47 | 
48 |     def forward(self, x):
49 |         p6 = self.p6(x)
50 |         return [p6]
51 | 
52 | 
53 | @BACKBONE_REGISTRY.register()
54 | def build_fcos_resnet_fpn_backbone(cfg, input_shape: ShapeSpec):
55 |     """
56 |     Args:
57 |         cfg: a detectron2 CfgNode
58 | 
59 |     Returns:
60 |         backbone (Backbone): backbone module, must be a subclass of :class:`Backbone`.
61 |     """
62 |     if cfg.MODEL.BACKBONE.ANTI_ALIAS:
63 |         bottom_up = build_resnet_lpf_backbone(cfg, input_shape)
64 |     elif cfg.MODEL.RESNETS.DEFORM_INTERVAL > 1:
65 |         bottom_up = build_resnet_interval_backbone(cfg, input_shape)
66 |     elif cfg.MODEL.MOBILENET:
67 |         bottom_up = build_mnv2_backbone(cfg, input_shape)
68 |     else:
69 |         bottom_up = build_resnet_backbone(cfg, input_shape)
70 |     in_features = cfg.MODEL.FPN.IN_FEATURES
71 |     out_channels = cfg.MODEL.FPN.OUT_CHANNELS
72 |     top_levels = cfg.MODEL.FCOS.TOP_LEVELS
73 |     in_channels_top = out_channels
74 |     if top_levels == 2:
75 |         top_block = LastLevelP6P7(in_channels_top, out_channels, "p5")
76 |     if top_levels == 1:
77 |         top_block = LastLevelP6(in_channels_top, out_channels, "p5")
78 |     elif top_levels == 0:
79 |         top_block = None
80 |     backbone = FPN(
81 |         bottom_up=bottom_up,
82 |         in_features=in_features,
83 |         out_channels=out_channels,
84 |         norm=cfg.MODEL.FPN.NORM,
85 |         top_block=top_block,
86 |         fuse_type=cfg.MODEL.FPN.FUSE_TYPE,
87 |     )
88 |     return backbone
89 | 


--------------------------------------------------------------------------------
/adet/modeling/backbone/lpf.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn.parallel
  3 | import numpy as np
  4 | import torch.nn as nn
  5 | import torch.nn.functional as F
  6 | 
  7 | 
  8 | class Downsample(nn.Module):
  9 |     def __init__(self, pad_type='reflect', filt_size=3, stride=2, channels=None, pad_off=0):
 10 |         super(Downsample, self).__init__()
 11 |         self.filt_size = filt_size
 12 |         self.pad_off = pad_off
 13 |         self.pad_sizes = [int(1.*(filt_size-1)/2), int(np.ceil(1.*(filt_size-1)/2)), int(1.*(filt_size-1)/2), int(np.ceil(1.*(filt_size-1)/2))]
 14 |         self.pad_sizes = [pad_size+pad_off for pad_size in self.pad_sizes]
 15 |         self.stride = stride
 16 |         self.off = int((self.stride-1)/2.)
 17 |         self.channels = channels
 18 | 
 19 |         # print('Filter size [%i]'%filt_size)
 20 |         if(self.filt_size==1):
 21 |             a = np.array([1.,])
 22 |         elif(self.filt_size==2):
 23 |             a = np.array([1., 1.])
 24 |         elif(self.filt_size==3):
 25 |             a = np.array([1., 2., 1.])
 26 |         elif(self.filt_size==4):    
 27 |             a = np.array([1., 3., 3., 1.])
 28 |         elif(self.filt_size==5):    
 29 |             a = np.array([1., 4., 6., 4., 1.])
 30 |         elif(self.filt_size==6):    
 31 |             a = np.array([1., 5., 10., 10., 5., 1.])
 32 |         elif(self.filt_size==7):    
 33 |             a = np.array([1., 6., 15., 20., 15., 6., 1.])
 34 | 
 35 |         filt = torch.Tensor(a[:,None]*a[None,:])
 36 |         filt = filt/torch.sum(filt)
 37 |         self.register_buffer('filt', filt[None,None,:,:].repeat((self.channels,1,1,1)))
 38 | 
 39 |         self.pad = get_pad_layer(pad_type)(self.pad_sizes)
 40 | 
 41 |     def forward(self, inp):
 42 |         if(self.filt_size==1):
 43 |             if(self.pad_off==0):
 44 |                 return inp[:,:,::self.stride,::self.stride]    
 45 |             else:
 46 |                 return self.pad(inp)[:,:,::self.stride,::self.stride]
 47 |         else:
 48 |             return F.conv2d(self.pad(inp), self.filt, stride=self.stride, groups=inp.shape[1])
 49 | 
 50 | def get_pad_layer(pad_type):
 51 |     if(pad_type in ['refl','reflect']):
 52 |         PadLayer = nn.ReflectionPad2d
 53 |     elif(pad_type in ['repl','replicate']):
 54 |         PadLayer = nn.ReplicationPad2d
 55 |     elif(pad_type=='zero'):
 56 |         PadLayer = nn.ZeroPad2d
 57 |     else:
 58 |         print('Pad type [%s] not recognized'%pad_type)
 59 |     return PadLayer
 60 | 
 61 | 
 62 | class Downsample1D(nn.Module):
 63 |     def __init__(self, pad_type='reflect', filt_size=3, stride=2, channels=None, pad_off=0):
 64 |         super(Downsample1D, self).__init__()
 65 |         self.filt_size = filt_size
 66 |         self.pad_off = pad_off
 67 |         self.pad_sizes = [int(1. * (filt_size - 1) / 2), int(np.ceil(1. * (filt_size - 1) / 2))]
 68 |         self.pad_sizes = [pad_size + pad_off for pad_size in self.pad_sizes]
 69 |         self.stride = stride
 70 |         self.off = int((self.stride - 1) / 2.)
 71 |         self.channels = channels
 72 | 
 73 |         # print('Filter size [%i]' % filt_size)
 74 |         if(self.filt_size == 1):
 75 |             a = np.array([1., ])
 76 |         elif(self.filt_size == 2):
 77 |             a = np.array([1., 1.])
 78 |         elif(self.filt_size == 3):
 79 |             a = np.array([1., 2., 1.])
 80 |         elif(self.filt_size == 4):
 81 |             a = np.array([1., 3., 3., 1.])
 82 |         elif(self.filt_size == 5):
 83 |             a = np.array([1., 4., 6., 4., 1.])
 84 |         elif(self.filt_size == 6):
 85 |             a = np.array([1., 5., 10., 10., 5., 1.])
 86 |         elif(self.filt_size == 7):
 87 |             a = np.array([1., 6., 15., 20., 15., 6., 1.])
 88 | 
 89 |         filt = torch.Tensor(a)
 90 |         filt = filt / torch.sum(filt)
 91 |         self.register_buffer('filt', filt[None, None, :].repeat((self.channels, 1, 1)))
 92 | 
 93 |         self.pad = get_pad_layer_1d(pad_type)(self.pad_sizes)
 94 | 
 95 |     def forward(self, inp):
 96 |         if(self.filt_size == 1):
 97 |             if(self.pad_off == 0):
 98 |                 return inp[:, :, ::self.stride]
 99 |             else:
100 |                 return self.pad(inp)[:, :, ::self.stride]
101 |         else:
102 |             return F.conv1d(self.pad(inp), self.filt, stride=self.stride, groups=inp.shape[1])
103 | 
104 | 
105 | def get_pad_layer_1d(pad_type):
106 |     if(pad_type in ['refl', 'reflect']):
107 |         PadLayer = nn.ReflectionPad1d
108 |     elif(pad_type in ['repl', 'replicate']):
109 |         PadLayer = nn.ReplicationPad1d
110 |     elif(pad_type == 'zero'):
111 |         PadLayer = nn.ZeroPad1d
112 |     else:
113 |         print('Pad type [%s] not recognized' % pad_type)
114 |     return PadLayer
115 | 


--------------------------------------------------------------------------------
/adet/modeling/batext/__init__.py:
--------------------------------------------------------------------------------
1 | from .batext import BAText
2 | 


--------------------------------------------------------------------------------
/adet/modeling/blendmask/__init__.py:
--------------------------------------------------------------------------------
1 | from .basis_module import build_basis_module
2 | from .blendmask import BlendMask
3 | 


--------------------------------------------------------------------------------
/adet/modeling/blendmask/basis_module.py:
--------------------------------------------------------------------------------
  1 | from typing import Dict
  2 | from torch import nn
  3 | from torch.nn import functional as F
  4 | 
  5 | from detectron2.utils.registry import Registry
  6 | from detectron2.layers import ShapeSpec
  7 | 
  8 | from adet.layers import conv_with_kaiming_uniform
  9 | 
 10 | 
 11 | BASIS_MODULE_REGISTRY = Registry("BASIS_MODULE")
 12 | BASIS_MODULE_REGISTRY.__doc__ = """
 13 | Registry for basis module, which produces global bases from feature maps.
 14 | 
 15 | The registered object will be called with `obj(cfg, input_shape)`.
 16 | The call should return a `nn.Module` object.
 17 | """
 18 | 
 19 | 
 20 | def build_basis_module(cfg, input_shape):
 21 |     name = cfg.MODEL.BASIS_MODULE.NAME
 22 |     return BASIS_MODULE_REGISTRY.get(name)(cfg, input_shape)
 23 | 
 24 | 
 25 | @BASIS_MODULE_REGISTRY.register()
 26 | class ProtoNet(nn.Module):
 27 |     def __init__(self, cfg, input_shape: Dict[str, ShapeSpec]):
 28 |         """
 29 |         TODO: support deconv and variable channel width
 30 |         """
 31 |         # official protonet has a relu after each conv
 32 |         super().__init__()
 33 |         # fmt: off
 34 |         mask_dim          = cfg.MODEL.BASIS_MODULE.NUM_BASES
 35 |         planes            = cfg.MODEL.BASIS_MODULE.CONVS_DIM
 36 |         self.in_features  = cfg.MODEL.BASIS_MODULE.IN_FEATURES
 37 |         self.loss_on      = cfg.MODEL.BASIS_MODULE.LOSS_ON
 38 |         norm              = cfg.MODEL.BASIS_MODULE.NORM
 39 |         num_convs         = cfg.MODEL.BASIS_MODULE.NUM_CONVS
 40 |         self.visualize    = cfg.MODEL.BLENDMASK.VISUALIZE
 41 |         # fmt: on
 42 | 
 43 |         feature_channels = {k: v.channels for k, v in input_shape.items()}
 44 | 
 45 |         conv_block = conv_with_kaiming_uniform(norm, True)  # conv relu bn
 46 |         self.refine = nn.ModuleList()
 47 |         for in_feature in self.in_features:
 48 |             self.refine.append(conv_block(
 49 |                 feature_channels[in_feature], planes, 3, 1))
 50 |         tower = []
 51 |         for i in range(num_convs):
 52 |             tower.append(
 53 |                 conv_block(planes, planes, 3, 1))
 54 |         tower.append(
 55 |             nn.Upsample(scale_factor=2, mode='bilinear', align_corners=False))
 56 |         tower.append(
 57 |             conv_block(planes, planes, 3, 1))
 58 |         tower.append(
 59 |             nn.Conv2d(planes, mask_dim, 1))
 60 |         self.add_module('tower', nn.Sequential(*tower))
 61 | 
 62 |         if self.loss_on:
 63 |             # fmt: off
 64 |             self.common_stride   = cfg.MODEL.BASIS_MODULE.COMMON_STRIDE
 65 |             num_classes          = cfg.MODEL.BASIS_MODULE.NUM_CLASSES + 1
 66 |             self.sem_loss_weight = cfg.MODEL.BASIS_MODULE.LOSS_WEIGHT
 67 |             # fmt: on
 68 | 
 69 |             inplanes = feature_channels[self.in_features[0]]
 70 |             self.seg_head = nn.Sequential(nn.Conv2d(inplanes, planes, kernel_size=3,
 71 |                                                     stride=1, padding=1, bias=False),
 72 |                                           nn.BatchNorm2d(planes),
 73 |                                           nn.ReLU(),
 74 |                                           nn.Conv2d(planes, planes, kernel_size=3,
 75 |                                                     stride=1, padding=1, bias=False),
 76 |                                           nn.BatchNorm2d(planes),
 77 |                                           nn.ReLU(),
 78 |                                           nn.Conv2d(planes, num_classes, kernel_size=1,
 79 |                                                     stride=1))
 80 | 
 81 |     def forward(self, features, targets=None):
 82 |         for i, f in enumerate(self.in_features):
 83 |             if i == 0:
 84 |                 x = self.refine[i](features[f])
 85 |             else:
 86 |                 x_p = self.refine[i](features[f])
 87 |                 x_p = F.interpolate(x_p, x.size()[2:], mode="bilinear", align_corners=False)
 88 |                 # x_p = aligned_bilinear(x_p, x.size(3) // x_p.size(3))
 89 |                 x = x + x_p
 90 |         outputs = {"bases": [self.tower(x)]}
 91 |         losses = {}
 92 |         # auxiliary thing semantic loss
 93 |         if self.training and self.loss_on:
 94 |             sem_out = self.seg_head(features[self.in_features[0]])
 95 |             # resize target to reduce memory
 96 |             gt_sem = targets.unsqueeze(1).float()
 97 |             gt_sem = F.interpolate(
 98 |                 gt_sem, scale_factor=1 / self.common_stride)
 99 |             seg_loss = F.cross_entropy(
100 |                 sem_out, gt_sem.squeeze(1).long())
101 |             losses['loss_basis_sem'] = seg_loss * self.sem_loss_weight
102 |         elif self.visualize and hasattr(self, "seg_head"):
103 |             outputs["seg_thing_out"] = self.seg_head(features[self.in_features[0]])
104 |         return outputs, losses
105 | 


--------------------------------------------------------------------------------
/adet/modeling/blendmask/blender.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torch.nn import functional as F
  3 | 
  4 | from detectron2.layers import cat
  5 | from detectron2.modeling.poolers import ROIPooler
  6 | 
  7 | 
  8 | def build_blender(cfg):
  9 |     return Blender(cfg)
 10 | 
 11 | 
 12 | class Blender(object):
 13 |     def __init__(self, cfg):
 14 | 
 15 |         # fmt: off
 16 |         self.pooler_resolution = cfg.MODEL.BLENDMASK.BOTTOM_RESOLUTION
 17 |         sampling_ratio         = cfg.MODEL.BLENDMASK.POOLER_SAMPLING_RATIO
 18 |         pooler_type            = cfg.MODEL.BLENDMASK.POOLER_TYPE
 19 |         pooler_scales          = cfg.MODEL.BLENDMASK.POOLER_SCALES
 20 |         self.attn_size         = cfg.MODEL.BLENDMASK.ATTN_SIZE
 21 |         self.top_interp        = cfg.MODEL.BLENDMASK.TOP_INTERP
 22 |         num_bases              = cfg.MODEL.BASIS_MODULE.NUM_BASES
 23 |         # fmt: on
 24 | 
 25 |         self.attn_len = num_bases * self.attn_size * self.attn_size
 26 | 
 27 |         self.pooler = ROIPooler(
 28 |             output_size=self.pooler_resolution,
 29 |             scales=pooler_scales,
 30 |             sampling_ratio=sampling_ratio,
 31 |             pooler_type=pooler_type,
 32 |             canonical_level=2)
 33 | 
 34 |     def __call__(self, bases, proposals, gt_instances):
 35 |         if gt_instances is not None:
 36 |             # training
 37 |             # reshape attns
 38 |             dense_info = proposals["instances"]
 39 |             attns = dense_info.top_feats
 40 |             pos_inds = dense_info.pos_inds
 41 |             if pos_inds.numel() == 0:
 42 |                 return None, {"loss_mask": sum([x.sum() * 0 for x in attns]) + bases[0].sum() * 0}
 43 | 
 44 |             gt_inds = dense_info.gt_inds
 45 | 
 46 |             rois = self.pooler(bases, [x.gt_boxes for x in gt_instances])
 47 |             rois = rois[gt_inds]
 48 |             pred_mask_logits = self.merge_bases(rois, attns)
 49 | 
 50 |             # gen targets
 51 |             gt_masks = []
 52 |             for instances_per_image in gt_instances:
 53 |                 if len(instances_per_image.gt_boxes.tensor) == 0:
 54 |                     continue
 55 |                 gt_mask_per_image = instances_per_image.gt_masks.crop_and_resize(
 56 |                     instances_per_image.gt_boxes.tensor, self.pooler_resolution
 57 |                 ).to(device=pred_mask_logits.device)
 58 |                 gt_masks.append(gt_mask_per_image)
 59 |             gt_masks = cat(gt_masks, dim=0)
 60 |             gt_masks = gt_masks[gt_inds]
 61 |             N = gt_masks.size(0)
 62 |             gt_masks = gt_masks.view(N, -1)
 63 | 
 64 |             gt_ctr = dense_info.gt_ctrs
 65 |             loss_denorm = proposals["loss_denorm"]
 66 |             mask_losses = F.binary_cross_entropy_with_logits(
 67 |                 pred_mask_logits, gt_masks.to(dtype=torch.float32), reduction="none")
 68 |             mask_loss = ((mask_losses.mean(dim=-1) * gt_ctr).sum()
 69 |                          / loss_denorm)
 70 |             return None, {"loss_mask": mask_loss}
 71 |         else:
 72 |             # no proposals
 73 |             total_instances = sum([len(x) for x in proposals])
 74 |             if total_instances == 0:
 75 |                 # add empty pred_masks results
 76 |                 for box in proposals:
 77 |                     box.pred_masks = box.pred_classes.view(
 78 |                         -1, 1, self.pooler_resolution, self.pooler_resolution)
 79 |                 return proposals, {}
 80 |             rois = self.pooler(bases, [x.pred_boxes for x in proposals])
 81 |             attns = cat([x.top_feat for x in proposals], dim=0)
 82 |             pred_mask_logits = self.merge_bases(rois, attns).sigmoid()
 83 |             pred_mask_logits = pred_mask_logits.view(
 84 |                 -1, 1, self.pooler_resolution, self.pooler_resolution)
 85 |             start_ind = 0
 86 |             for box in proposals:
 87 |                 end_ind = start_ind + len(box)
 88 |                 box.pred_masks = pred_mask_logits[start_ind:end_ind]
 89 |                 start_ind = end_ind
 90 |             return proposals, {}
 91 | 
 92 |     def merge_bases(self, rois, coeffs, location_to_inds=None):
 93 |         # merge predictions
 94 |         N = coeffs.size(0)
 95 |         if location_to_inds is not None:
 96 |             rois = rois[location_to_inds]
 97 |         N, B, H, W = rois.size()
 98 | 
 99 |         coeffs = coeffs.view(N, -1, self.attn_size, self.attn_size)
100 |         coeffs = F.interpolate(coeffs, (H, W),
101 |                                mode=self.top_interp).softmax(dim=1)
102 |         masks_preds = (rois * coeffs).sum(dim=1)
103 |         return masks_preds.view(N, -1)
104 | 


--------------------------------------------------------------------------------
/adet/modeling/condinst/__init__.py:
--------------------------------------------------------------------------------
1 | from .condinst import CondInst
2 | 


--------------------------------------------------------------------------------
/adet/modeling/fcos/__init__.py:
--------------------------------------------------------------------------------
1 | from .fcos import FCOS
2 | 


--------------------------------------------------------------------------------
/adet/modeling/fcpose/__init__.py:
--------------------------------------------------------------------------------
1 | from .fcpose_framework import FCPose
2 | 


--------------------------------------------------------------------------------
/adet/modeling/fcpose/fcpose_framework.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | from typing import List, Dict
 3 | import torch
 4 | from torch import nn
 5 | from torch.nn import functional as F
 6 | from detectron2.modeling.proposal_generator.build import PROPOSAL_GENERATOR_REGISTRY
 7 | from detectron2.layers import ShapeSpec, NaiveSyncBatchNorm
 8 | from adet.modeling.fcos import FCOS
 9 | from .basis_module import basis_module
10 | from .fcpose_head import fcpose_head_module
11 | from .utils import compute_basis_stride, top_module, process_gt_instances
12 | 
13 | 
14 | 
15 | __all__ = ["FCPose"]
16 | 
17 | 
18 | 
19 | @PROPOSAL_GENERATOR_REGISTRY.register()
20 | class FCPose(nn.Module):
21 |     def __init__(self, cfg, input_shape: Dict[str, ShapeSpec]):
22 |         super().__init__()
23 |         self.fcos = FCOS(cfg, input_shape)
24 |         self.top_module = top_module(256, cfg.MODEL.FCPOSE.ATTN_LEN)
25 | 
26 |         self.basis_module = basis_module(cfg,input_shape)
27 | 
28 |         self.fcpose_head = fcpose_head_module(cfg)
29 | 
30 |         self.gt_stride = cfg.MODEL.FCPOSE.GT_HEATMAP_STRIDE
31 |         self.device    = cfg.MODEL.DEVICE
32 | 
33 |     def forward(self, images, features, gt_instances=None):
34 |         if gt_instances is not None:
35 |             basis_gt_heatmap, head_gt_heatmap,p3_heatmap_list = process_gt_instances(gt_instances, self.gt_stride, self.device)
36 |         else:
37 |             basis_gt_heatmap, head_gt_heatmap,p3_heatmap_list = None, None, None
38 | 
39 |         proposals, proposal_losses = self.fcos(images, features, gt_instances, self.top_module)
40 | 
41 | 
42 |         basis_out, basis_losses = self.basis_module(features, basis_gt_heatmap, p3_heatmap_list)
43 |         del features, basis_gt_heatmap, p3_heatmap_list
44 | 
45 | 
46 |         # base_stride = compute_basis_stride(images, basis_out)
47 |         detector_results, detector_losses = self.fcpose_head(
48 |             basis_out["bases"], proposals,
49 |             head_gt_heatmap, gt_instances, basis_out["basis_seg"]
50 |         )
51 | 
52 |         losses = {}
53 |         if self.training:
54 |             losses.update(proposal_losses)
55 |             losses.update(basis_losses)
56 |             losses.update(detector_losses)
57 | 
58 | 
59 |         return detector_results, losses


--------------------------------------------------------------------------------
/adet/modeling/roi_heads/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/adet/modeling/solov2/__init__.py:
--------------------------------------------------------------------------------
1 | from .solov2 import SOLOv2
2 | 


--------------------------------------------------------------------------------
/adet/modeling/solov2/loss.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torch import nn
  3 | import torch.nn.functional as F
  4 | from fvcore.nn import sigmoid_focal_loss_jit
  5 | 
  6 | 
  7 | def dice_loss(input, target):
  8 |     input = input.contiguous().view(input.size()[0], -1)
  9 |     target = target.contiguous().view(target.size()[0], -1).float()
 10 | 
 11 |     a = torch.sum(input * target, 1)
 12 |     b = torch.sum(input * input, 1) + 0.001
 13 |     c = torch.sum(target * target, 1) + 0.001
 14 |     d = (2 * a) / (b + c)
 15 |     return 1 - d
 16 | 
 17 | 
 18 | def reduce_loss(loss, reduction):
 19 |     """Reduce loss as specified.
 20 |     Args:
 21 |         loss (Tensor): Elementwise loss tensor.
 22 |         reduction (str): Options are "none", "mean" and "sum".
 23 |     Return:
 24 |         Tensor: Reduced loss tensor.
 25 |     """
 26 |     reduction_enum = F._Reduction.get_enum(reduction)
 27 |     # none: 0, elementwise_mean:1, sum: 2
 28 |     if reduction_enum == 0:
 29 |         return loss
 30 |     elif reduction_enum == 1:
 31 |         return loss.mean()
 32 |     elif reduction_enum == 2:
 33 |         return loss.sum()
 34 | 
 35 | 
 36 | def weight_reduce_loss(loss, weight=None, reduction='mean', avg_factor=None):
 37 |     """Apply element-wise weight and reduce loss.
 38 |     Args:
 39 |         loss (Tensor): Element-wise loss.
 40 |         weight (Tensor): Element-wise weights.
 41 |         reduction (str): Same as built-in losses of PyTorch.
 42 |         avg_factor (float): Avarage factor when computing the mean of losses.
 43 |     Returns:
 44 |         Tensor: Processed loss values.
 45 |     """
 46 |     # if weight is specified, apply element-wise weight
 47 |     if weight is not None:
 48 |         loss = loss * weight
 49 | 
 50 |     # if avg_factor is not specified, just reduce the loss
 51 |     if avg_factor is None:
 52 |         loss = reduce_loss(loss, reduction)
 53 |     else:
 54 |         # if reduction is mean, then average the loss by avg_factor
 55 |         if reduction == 'mean':
 56 |             loss = loss.sum() / avg_factor
 57 |         # if reduction is 'none', then do nothing, otherwise raise an error
 58 |         elif reduction != 'none':
 59 |             raise ValueError('avg_factor can not be used with reduction="sum"')
 60 |     return loss
 61 | 
 62 | 
 63 | def sigmoid_focal_loss(pred,
 64 |                        target,
 65 |                        weight=None,
 66 |                        gamma=2.0,
 67 |                        alpha=0.25,
 68 |                        reduction='mean',
 69 |                        avg_factor=None):
 70 |     # Function.apply does not accept keyword arguments, so the decorator
 71 |     # "weighted_loss" is not applicable
 72 |     loss = sigmoid_focal_loss_jit(pred, target, gamma=gamma, alpha=alpha)
 73 |     if weight is not None:
 74 |         if weight.shape != loss.shape:
 75 |             if weight.size(0) == loss.size(0):
 76 |                 # For most cases, weight is of shape (num_priors, ),
 77 |                 #  which means it does not have the second axis num_class
 78 |                 weight = weight.view(-1, 1)
 79 |             else:
 80 |                 # Sometimes, weight per anchor per class is also needed. e.g.
 81 |                 #  in FSAF. But it may be flattened of shape
 82 |                 #  (num_priors x num_class, ), while loss is still of shape
 83 |                 #  (num_priors, num_class).
 84 |                 assert weight.numel() == loss.numel()
 85 |                 weight = weight.view(loss.size(0), -1)
 86 |         assert weight.ndim == loss.ndim
 87 |     loss = weight_reduce_loss(loss, weight, reduction, avg_factor)
 88 |     return loss
 89 | 
 90 | 
 91 | class FocalLoss(nn.Module):
 92 | 
 93 |     def __init__(self,
 94 |                  use_sigmoid=True,
 95 |                  gamma=2.0,
 96 |                  alpha=0.25,
 97 |                  reduction='mean',
 98 |                  loss_weight=1.0):
 99 |         super(FocalLoss, self).__init__()
100 |         assert use_sigmoid is True, 'Only sigmoid focal loss supported now.'
101 |         self.use_sigmoid = use_sigmoid
102 |         self.gamma = gamma
103 |         self.alpha = alpha
104 |         self.reduction = reduction
105 |         self.loss_weight = loss_weight
106 | 
107 |     def forward(self,
108 |                 pred,
109 |                 target,
110 |                 weight=None,
111 |                 avg_factor=None,
112 |                 reduction_override=None):
113 |         assert reduction_override in (None, 'none', 'mean', 'sum')
114 |         reduction = (
115 |             reduction_override if reduction_override else self.reduction)
116 |         if self.use_sigmoid:
117 |             loss_cls = self.loss_weight * sigmoid_focal_loss(
118 |                 pred,
119 |                 target,
120 |                 weight,
121 |                 gamma=self.gamma,
122 |                 alpha=self.alpha,
123 |                 reduction=reduction,
124 |                 avg_factor=avg_factor)
125 |         else:
126 |             raise NotImplementedError
127 |         return loss_cls
128 | 


--------------------------------------------------------------------------------
/adet/structures/__init__.py:
--------------------------------------------------------------------------------
1 | from .beziers import Beziers


--------------------------------------------------------------------------------
/adet/structures/beziers.py:
--------------------------------------------------------------------------------
 1 | from typing import Union
 2 | import torch
 3 | 
 4 | 
 5 | class Beziers:
 6 |     """
 7 |     This structure stores a list of bezier curves as a Nx16 torch.Tensor.
 8 |     It will support some common methods about bezier shapes
 9 |     (`area`, `clip`, `nonempty`, etc),
10 |     and also behaves like a Tensor
11 |     (support indexing, `to(device)`, `.device`, and iteration over all boxes)
12 | 
13 |     Attributes:
14 |         tensor (torch.Tensor): float matrix of Nx4. Each row is (x1, y1, x2, y2).
15 |     """
16 | 
17 |     def __init__(self, tensor: torch.Tensor):
18 |         """
19 |         Args:
20 |             tensor (Tensor[float]): a Nx4 matrix.  Each row is (x1, y1, x2, y2).
21 |         """
22 |         device = tensor.device if isinstance(tensor, torch.Tensor) else torch.device("cpu")
23 |         tensor = torch.as_tensor(tensor, dtype=torch.float32, device=device)
24 |         if tensor.numel() == 0:
25 |             # Use reshape, so we don't end up creating a new tensor that does not depend on
26 |             # the inputs (and consequently confuses jit)
27 |             tensor = tensor.reshape((0, 16)).to(dtype=torch.float32, device=device)
28 |         assert tensor.dim() == 2 and tensor.size(-1) == 16, tensor.size()
29 | 
30 |         self.tensor = tensor
31 | 
32 |     def to(self, device: str) -> "Beziers":
33 |         return Beziers(self.tensor.to(device))
34 | 
35 |     def __getitem__(self, item: Union[int, slice, torch.BoolTensor]) -> "Beziers":
36 |         """
37 |         Returns:
38 |             Beziers: Create a new :class:`Beziers` by indexing.
39 |         """
40 |         if isinstance(item, int):
41 |             return Beziers(self.tensor[item].view(1, -1))
42 |         b = self.tensor[item]
43 |         assert b.dim() == 2, "Indexing on Boxes with {} failed to return a matrix!".format(item)
44 |         return Beziers(b)


--------------------------------------------------------------------------------
/adet/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/aim-uofa/AdelaiDet/5e19cb172b8363820b409ed1a2754fb19ad3acb8/adet/utils/__init__.py


--------------------------------------------------------------------------------
/adet/utils/comm.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn.functional as F
  3 | import torch.distributed as dist
  4 | 
  5 | from detectron2.utils.comm import get_world_size
  6 | 
  7 | 
  8 | def reduce_sum(tensor):
  9 |     world_size = get_world_size()
 10 |     if world_size < 2:
 11 |         return tensor
 12 |     tensor = tensor.clone()
 13 |     dist.all_reduce(tensor, op=dist.ReduceOp.SUM)
 14 |     return tensor
 15 | 
 16 | 
 17 | def reduce_mean(tensor):
 18 |     num_gpus = get_world_size()
 19 |     total = reduce_sum(tensor)
 20 |     return total.float() / num_gpus
 21 | 
 22 | 
 23 | def aligned_bilinear(tensor, factor):
 24 |     assert tensor.dim() == 4
 25 |     assert factor >= 1
 26 |     assert int(factor) == factor
 27 | 
 28 |     if factor == 1:
 29 |         return tensor
 30 | 
 31 |     h, w = tensor.size()[2:]
 32 |     tensor = F.pad(tensor, pad=(0, 1, 0, 1), mode="replicate")
 33 |     oh = factor * h + 1
 34 |     ow = factor * w + 1
 35 |     tensor = F.interpolate(
 36 |         tensor, size=(oh, ow),
 37 |         mode='bilinear',
 38 |         align_corners=True
 39 |     )
 40 |     tensor = F.pad(
 41 |         tensor, pad=(factor // 2, 0, factor // 2, 0),
 42 |         mode="replicate"
 43 |     )
 44 | 
 45 |     return tensor[:, :, :oh - 1, :ow - 1]
 46 | 
 47 | 
 48 | def compute_locations(h, w, stride, device):
 49 |     shifts_x = torch.arange(
 50 |         0, w * stride, step=stride,
 51 |         dtype=torch.float32, device=device
 52 |     )
 53 |     shifts_y = torch.arange(
 54 |         0, h * stride, step=stride,
 55 |         dtype=torch.float32, device=device
 56 |     )
 57 |     shift_y, shift_x = torch.meshgrid(shifts_y, shifts_x)
 58 |     shift_x = shift_x.reshape(-1)
 59 |     shift_y = shift_y.reshape(-1)
 60 |     locations = torch.stack((shift_x, shift_y), dim=1) + stride // 2
 61 |     return locations
 62 | 
 63 | 
 64 | def compute_ious(pred, target):
 65 |     """
 66 |     Args:
 67 |         pred: Nx4 predicted bounding boxes
 68 |         target: Nx4 target bounding boxes
 69 |         Both are in the form of FCOS prediction (l, t, r, b)
 70 |     """
 71 |     pred_left = pred[:, 0]
 72 |     pred_top = pred[:, 1]
 73 |     pred_right = pred[:, 2]
 74 |     pred_bottom = pred[:, 3]
 75 | 
 76 |     target_left = target[:, 0]
 77 |     target_top = target[:, 1]
 78 |     target_right = target[:, 2]
 79 |     target_bottom = target[:, 3]
 80 | 
 81 |     target_aera = (target_left + target_right) * \
 82 |                   (target_top + target_bottom)
 83 |     pred_aera = (pred_left + pred_right) * \
 84 |                 (pred_top + pred_bottom)
 85 | 
 86 |     w_intersect = torch.min(pred_left, target_left) + \
 87 |                   torch.min(pred_right, target_right)
 88 |     h_intersect = torch.min(pred_bottom, target_bottom) + \
 89 |                   torch.min(pred_top, target_top)
 90 | 
 91 |     g_w_intersect = torch.max(pred_left, target_left) + \
 92 |                     torch.max(pred_right, target_right)
 93 |     g_h_intersect = torch.max(pred_bottom, target_bottom) + \
 94 |                     torch.max(pred_top, target_top)
 95 |     ac_uion = g_w_intersect * g_h_intersect
 96 | 
 97 |     area_intersect = w_intersect * h_intersect
 98 |     area_union = target_aera + pred_aera - area_intersect
 99 | 
100 |     ious = (area_intersect + 1.0) / (area_union + 1.0)
101 |     gious = ious - (ac_uion - area_union) / ac_uion
102 | 
103 |     return ious, gious
104 | 


--------------------------------------------------------------------------------
/configs/BAText/Base-BAText.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "OneStageRCNN"
 3 |   BACKBONE:
 4 |     NAME: "build_fcos_resnet_fpn_backbone"
 5 |   RESNETS:
 6 |     OUT_FEATURES: ["res2", "res3", "res4", "res5"]
 7 |   FPN:
 8 |     IN_FEATURES: ["res2", "res3", "res4", "res5"]
 9 |   PROPOSAL_GENERATOR:
10 |     NAME: "BAText"
11 |   FCOS:
12 |     NMS_TH: 0.5
13 |     THRESH_WITH_CTR: False
14 |     USE_SCALE: False
15 |     NUM_CLASSES: 1
16 |     INFERENCE_TH_TRAIN: 0.7
17 |     INFERENCE_TH_TEST: 0.45
18 |   ROI_HEADS:
19 |     NAME: "TextHead"
20 |     IOU_THRESHOLDS: [0.5]
21 | SOLVER:
22 |   CLIP_GRADIENTS:
23 |     ENABLED: True
24 | INPUT:
25 |   HFLIP_TRAIN: False
26 |   MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800, 832, 864, 896)
27 |   MAX_SIZE_TRAIN: 1600
28 |   MIN_SIZE_TEST: 1000
29 |   MAX_SIZE_TEST: 1824
30 |   CROP:
31 |     ENABLED: True
32 |     CROP_INSTANCE: False
33 |     SIZE: [0.1, 0.1]


--------------------------------------------------------------------------------
/configs/BAText/CTW1500/Base-CTW1500.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-BAText.yaml"
 2 | MODEL:
 3 |   BATEXT:
 4 |     POOLER_RESOLUTION: (8,128)
 5 |     NUM_CHARS: 100
 6 |   FCOS:
 7 |     INFERENCE_TH_TEST: 0.6
 8 | DATASETS:
 9 |   TRAIN: ("ctw1500_word_train",)
10 |   TEST: ("ctw1500_word_test",)
11 | INPUT:
12 |   MIN_SIZE_TEST: 800
13 |   MAX_SIZE_TEST: 1024
14 | 


--------------------------------------------------------------------------------
/configs/BAText/CTW1500/attn_R_50.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "Base-CTW1500.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "weights/batext/pretrain_attn_R_50.pth"
 4 |   RESNETS:
 5 |     DEPTH: 50
 6 |   BATEXT:
 7 |     RECOGNIZER: "attn" # "attn" "rnn"
 8 | SOLVER:
 9 |   IMS_PER_BATCH: 8
10 |   BASE_LR: 0.001
11 |   STEPS: (80000,)
12 |   MAX_ITER: 120000
13 |   CHECKPOINT_PERIOD: 10000
14 | TEST:
15 |   EVAL_PERIOD: 10000
16 | OUTPUT_DIR: "output/batext/ctw1500/attn_R_50"
17 | 


--------------------------------------------------------------------------------
/configs/BAText/CTW1500/v2_attn_R_50.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "Base-CTW1500.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "model_v2_pretrain.pth"
 4 |   BACKBONE:
 5 |     NAME: "build_fcos_resnet_bifpn_backbone"
 6 |   BiFPN:
 7 |     IN_FEATURES: ["res2", "res3", "res4", "res5"]
 8 |     OUT_CHANNELS: 256
 9 |     NUM_REPEATS: 2
10 |     NORM: "SyncBN"
11 |   RESNETS:
12 |     DEPTH: 50
13 |   BATEXT:
14 |     RECOGNIZER: "attn"
15 |     USE_COORDCONV: True
16 |     USE_AET: True
17 |   FCOS:
18 |     # Best e2e: 0.5; Best det: 0.3
19 |     INFERENCE_TH_TEST: 0.5
20 | SOLVER:
21 |   IMS_PER_BATCH: 8
22 |   BASE_LR: 0.001
23 |   STEPS: (80000, )
24 |   MAX_ITER: 100000
25 |   CHECKPOINT_PERIOD: 10000
26 | TEST:
27 |   EVAL_PERIOD: 10000
28 | OUTPUT_DIR: "output/batext/ctw1500/v2_attn_R_50"
29 | 


--------------------------------------------------------------------------------
/configs/BAText/ICDAR2015/Base-ic15.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../Base-BAText.yaml"
2 | DATASETS:
3 |   TRAIN: ("icdar2015_train",)
4 |   TEST: ("icdar2015_test",)


--------------------------------------------------------------------------------
/configs/BAText/ICDAR2015/v1_attn_R_50.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "Base-ic15.yaml"
 2 | MODEL:
 3 |   RESNETS:
 4 |     DEPTH: 50
 5 |   BATEXT:
 6 |     RECOGNIZER: "attn" 
 7 | SOLVER:
 8 |   IMS_PER_BATCH: 4
 9 |   BASE_LR: 0.001
10 |   MAX_ITER: 5500
11 |   CHECKPOINT_PERIOD: 500
12 | INPUT:
13 |   MIN_SIZE_TRAIN: (980, 1044, 1108, 1172, 1236, 1300, 1364, 1428, 1492)
14 |   MAX_SIZE_TRAIN: 2900
15 |   MIN_SIZE_TEST: 2000
16 |   MAX_SIZE_TEST: 4000
17 |   IS_ROTATE: True
18 | TEST:
19 |   EVAL_PERIOD: 500
20 | OUTPUT_DIR: "output/batext/ic15/v1_attn_R_50"
21 | 


--------------------------------------------------------------------------------
/configs/BAText/ICDAR2015/v2_attn_R_50.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "Base-ic15.yaml"
 2 | MODEL:
 3 |   BACKBONE:
 4 |     NAME: "build_fcos_resnet_bifpn_backbone"
 5 |   BiFPN:
 6 |     IN_FEATURES: ["res2", "res3", "res4", "res5"]
 7 |     OUT_CHANNELS: 256
 8 |     NUM_REPEATS: 2
 9 |     NORM: "SyncBN"
10 |   RESNETS:
11 |     DEPTH: 50
12 |   BATEXT:
13 |     RECOGNIZER: "attn"
14 |     USE_COORDCONV: True
15 |     USE_AET: True
16 |     POOLER_RESOLUTION: (16, 64)
17 |   FCOS:
18 |     INFERENCE_TH_TEST: 0.4
19 |     NMS_TH: 0.4
20 | SOLVER:
21 |   IMS_PER_BATCH: 4
22 |   BASE_LR: 0.001
23 |   MAX_ITER: 5500
24 |   CHECKPOINT_PERIOD: 500
25 | INPUT:
26 |   MIN_SIZE_TRAIN: (980, 1044, 1108, 1172, 1236, 1300, 1364, 1428, 1492)
27 |   MAX_SIZE_TRAIN: 2900
28 |   MIN_SIZE_TEST: 2000
29 |   MAX_SIZE_TEST: 4000
30 |   IS_ROTATE: True
31 | TEST:
32 |   EVAL_PERIOD: 500 
33 | OUTPUT_DIR: "output/batext/ic15/v2_attn_R_50"
34 | 


--------------------------------------------------------------------------------
/configs/BAText/Pretrain/Base-Chn-Pretrain.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../Base-BAText.yaml"
2 | DATASETS:
3 |   TRAIN: ("chnsyn_train", "rects_train", "rects_val", "lsvt_train", "art_train", ) 
4 |   TEST: ("rects_test", )
5 | 


--------------------------------------------------------------------------------
/configs/BAText/Pretrain/Base-Pretrain-ic15.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../Base-BAText.yaml"
2 | DATASETS:
3 |   TRAIN: ("mltbezier_word_train", "totaltext_train", "syntext1_train", "syntext2_train", "icdar2013_train", "icdar2015_train") 
4 |   TEST: ("icdar2015_test",)


--------------------------------------------------------------------------------
/configs/BAText/Pretrain/Base-Pretrain.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../Base-BAText.yaml"
2 | DATASETS:
3 |   TRAIN: ("mltbezier_word_train", "totaltext_train", "syntext1_train", "syntext2_train",) 
4 |   TEST: ("totaltext_val",)


--------------------------------------------------------------------------------
/configs/BAText/Pretrain/attn_R_50.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "Base-Pretrain.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   RESNETS:
 5 |     DEPTH: 50
 6 |   BATEXT:
 7 |     RECOGNIZER: "attn" 
 8 | SOLVER:
 9 |   IMS_PER_BATCH: 8
10 |   BASE_LR: 0.01
11 |   STEPS: (160000, 220000)
12 |   MAX_ITER: 260000
13 |   CHECKPOINT_PERIOD: 20000
14 | TEST:
15 |   EVAL_PERIOD: 20000
16 | OUTPUT_DIR: "output/batext/pretrain/attn_R_50"
17 | 


--------------------------------------------------------------------------------
/configs/BAText/Pretrain/v1_ic15_attn_R_50.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "Base-Pretrain-ic15.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl"
 4 |   RESNETS:
 5 |     DEPTH: 50
 6 |   BATEXT:
 7 |     RECOGNIZER: "attn" 
 8 |     POOLER_RESOLUTION: (16, 64)
 9 | SOLVER:
10 |   IMS_PER_BATCH: 8
11 |   BASE_LR: 0.01
12 |   STEPS: (160000, 220000)
13 |   MAX_ITER: 260000
14 |   CHECKPOINT_PERIOD: 5000
15 | TEST:
16 |   EVAL_PERIOD: 20000
17 | INPUT:
18 |   IS_ROTATE: True
19 | OUTPUT_DIR: "output/batext/pretrain/v1_ic15_attn_R_50"
20 | 


--------------------------------------------------------------------------------
/configs/BAText/Pretrain/v2_attn_R_50.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "Base-Pretrain.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl"
 4 |   BACKBONE: 
 5 |     NAME: "build_fcos_resnet_bifpn_backbone"
 6 |   BiFPN:
 7 |     IN_FEATURES: ["res2", "res3", "res4", "res5"]
 8 |     OUT_CHANNELS: 256
 9 |     NUM_REPEATS: 2
10 |     NORM: "SyncBN"
11 |   RESNETS:
12 |     DEPTH: 50
13 |   BATEXT:
14 |     RECOGNIZER: "attn"
15 |     USE_COORDCONV: True
16 |     USE_AET: True
17 | SOLVER:
18 |   IMS_PER_BATCH: 8 
19 |   BASE_LR: 0.01
20 |   STEPS: (160000, 220000)
21 |   MAX_ITER: 260000
22 |   CHECKPOINT_PERIOD: 20000
23 | TEST:
24 |   EVAL_PERIOD: 20000
25 | OUTPUT_DIR: "output/batext/pretrain/v2_attn_R_50"
26 | 
27 | 


--------------------------------------------------------------------------------
/configs/BAText/Pretrain/v2_chn_attn_R_50.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "Base-Chn-Pretrain.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl"
 4 |   BACKBONE: 
 5 |     NAME: "build_fcos_resnet_bifpn_backbone"
 6 |   BiFPN:
 7 |     IN_FEATURES: ["res2", "res3", "res4", "res5"]
 8 |     OUT_CHANNELS: 256
 9 |     NUM_REPEATS: 2
10 |     NORM: "SyncBN"
11 |   RESNETS:
12 |     DEPTH: 50
13 |   BATEXT:
14 |     RECOGNIZER: "attn"
15 |     USE_COORDCONV: True
16 |     USE_AET: True
17 |     VOC_SIZE: 5462
18 |     CUSTOM_DICT: "chn_cls_list"
19 | SOLVER:
20 |   IMS_PER_BATCH: 8 
21 |   BASE_LR: 0.01
22 |   STEPS: (160000, 220000)
23 |   MAX_ITER: 260000
24 |   CHECKPOINT_PERIOD: 10000
25 | INPUT:
26 |   CROP:
27 |     ENABLED: False
28 | OUTPUT_DIR: "output/batext/chn_pretrain/v2_attn_R_50"
29 | 
30 | 


--------------------------------------------------------------------------------
/configs/BAText/Pretrain/v2_ic15_attn_R_50.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "Base-Pretrain-ic15.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl"
 4 |   BACKBONE: 
 5 |     NAME: "build_fcos_resnet_bifpn_backbone"
 6 |   BiFPN:
 7 |     IN_FEATURES: ["res2", "res3", "res4", "res5"]
 8 |     OUT_CHANNELS: 256
 9 |     NUM_REPEATS: 2
10 |     NORM: "SyncBN"
11 |   RESNETS:
12 |     DEPTH: 50
13 |   BATEXT:
14 |     RECOGNIZER: "attn"
15 |     USE_COORDCONV: True
16 |     USE_AET: True
17 |     POOLER_RESOLUTION: (16, 64)
18 | SOLVER:
19 |   IMS_PER_BATCH: 8 
20 |   BASE_LR: 0.01
21 |   STEPS: (160000, 220000)
22 |   MAX_ITER: 260000
23 |   CHECKPOINT_PERIOD: 20000
24 | TEST:
25 |   EVAL_PERIOD: 20000
26 | INPUT:
27 |   IS_ROTATE: True
28 | OUTPUT_DIR: "output/batext/pretrain/v2_ic15_attn_R_50"
29 | 
30 | 


--------------------------------------------------------------------------------
/configs/BAText/ReCTS/Base-ReCTS.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../Base-BAText.yaml"
2 | DATASETS:
3 |   TRAIN: ("rects_train", "rects_val")
4 |   TEST: ("rects_test",)
5 | 


--------------------------------------------------------------------------------
/configs/BAText/ReCTS/v2_chn_attn_R_50.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "Base-ReCTS.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl"
 4 |   BACKBONE: 
 5 |     NAME: "build_fcos_resnet_bifpn_backbone"
 6 |   BiFPN:
 7 |     IN_FEATURES: ["res2", "res3", "res4", "res5"]
 8 |     OUT_CHANNELS: 256
 9 |     NUM_REPEATS: 2
10 |     NORM: "SyncBN"
11 |   RESNETS:
12 |     DEPTH: 50
13 |   BATEXT:
14 |     RECOGNIZER: "attn"
15 |     USE_COORDCONV: True
16 |     USE_AET: True
17 |     VOC_SIZE: 5462
18 |     CUSTOM_DICT: "chn_cls_list"
19 | SOLVER:
20 |   IMS_PER_BATCH: 8 
21 |   BASE_LR: 0.001
22 |   STEPS: (140000, 160000)
23 |   MAX_ITER: 180000
24 |   CHECKPOINT_PERIOD: 10000
25 | INPUT:
26 |   CROP:
27 |     ENABLED: False
28 | OUTPUT_DIR: "output/batext/rects/v2_attn_R_50"
29 | 
30 | 


--------------------------------------------------------------------------------
/configs/BAText/TotalText/Base-TotalText.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../Base-BAText.yaml"
2 | DATASETS:
3 |   TRAIN: ("totaltext_train",)
4 |   TEST: ("totaltext_val",)


--------------------------------------------------------------------------------
/configs/BAText/TotalText/attn_R_50.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "Base-TotalText.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "weights/batext/pretrain_attn_R_50.pth"
 4 |   RESNETS:
 5 |     DEPTH: 50
 6 |   BATEXT:
 7 |     RECOGNIZER: "attn" # "attn" "rnn"
 8 | SOLVER:
 9 |   IMS_PER_BATCH: 8
10 |   BASE_LR: 0.001
11 |   MAX_ITER: 5000
12 |   CHECKPOINT_PERIOD: 1000
13 | TEST:
14 |   EVAL_PERIOD: 1000
15 | OUTPUT_DIR: "output/batext/totaltext/attn_R_50"
16 | 


--------------------------------------------------------------------------------
/configs/BAText/TotalText/v2_attn_R_50.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "Base-TotalText.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "https://dl.fbaipublicfiles.com/detectron/ImageNetPretrained/MSRA/R-50.pkl"
 4 |   BACKBONE:
 5 |     NAME: "build_fcos_resnet_bifpn_backbone"
 6 |   BiFPN:
 7 |     IN_FEATURES: ["res2", "res3", "res4", "res5"]
 8 |     OUT_CHANNELS: 256
 9 |     NUM_REPEATS: 2
10 |     NORM: "SyncBN"
11 |   RESNETS:
12 |     DEPTH: 50
13 |   BATEXT:
14 |     RECOGNIZER: "attn"
15 |     USE_COORDCONV: True
16 |     USE_AET: True
17 |   FCOS:
18 |     # Best e2e: 0.5; Best det: 0.4
19 |     INFERENCE_TH_TEST: 0.5
20 | SOLVER:
21 |   IMS_PER_BATCH: 8
22 |   BASE_LR: 0.001
23 |   MAX_ITER: 5000
24 |   CHECKPOINT_PERIOD: 1000
25 | TEST:
26 |   EVAL_PERIOD: 1000
27 | OUTPUT_DIR: "output/batext/pretrain/v2_attn_R_50"
28 | 


--------------------------------------------------------------------------------
/configs/BlendMask/550_R_50_1x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "Base-550.yaml"
2 | MODEL:
3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
4 |   RESNETS:
5 |     DEPTH: 50
6 | OUTPUT_DIR: "output/blendmask/550_R_50_1x"
7 | 


--------------------------------------------------------------------------------
/configs/BlendMask/550_R_50_3x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "Base-550.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   RESNETS:
 5 |     DEPTH: 50
 6 | SOLVER:
 7 |   STEPS: (210000, 250000)
 8 |   MAX_ITER: 270000
 9 | OUTPUT_DIR: "output/blendmask/550_R_50_3x"
10 | 


--------------------------------------------------------------------------------
/configs/BlendMask/550_R_50_dcni3_5x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "Base-550.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   RESNETS:
 5 |     DEPTH: 50
 6 |     DEFORM_ON_PER_STAGE: [False, True, True, True]
 7 |     DEFORM_MODULATED: True
 8 |     DEFORM_INTERVAL: 3
 9 | INPUT:
10 |   MIN_SIZE_TRAIN: (440, 594)
11 |   MIN_SIZE_TRAIN_SAMPLING: "range"
12 |   MAX_SIZE_TRAIN: 990
13 |   CROP:
14 |     ENABLED: True
15 | SOLVER:
16 |   STEPS: (210000, 250000)
17 |   MAX_ITER: 270000
18 | OUTPUT_DIR: "output/blendmask/550_R_50_dcni3_5x"
19 | 


--------------------------------------------------------------------------------
/configs/BlendMask/Base-550.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "Base-BlendMask.yaml"
 2 | MODEL:
 3 |   FCOS:
 4 |     TOP_LEVELS: 1
 5 |     IN_FEATURES: ["p3", "p4", "p5", "p6"]
 6 |     FPN_STRIDES: [8, 16, 32, 64]
 7 |     SIZES_OF_INTEREST: [64, 128, 256]
 8 |     NUM_SHARE_CONVS: 3
 9 |     NUM_CLS_CONVS: 0
10 |     NUM_BOX_CONVS: 0
11 |   BASIS_MODULE:
12 |     NUM_CONVS: 2
13 | INPUT:
14 |   MIN_SIZE_TRAIN: (440, 462, 484, 506, 528, 550)
15 |   MAX_SIZE_TRAIN: 916
16 |   MIN_SIZE_TEST: 550
17 |   MAX_SIZE_TEST: 916
18 | 


--------------------------------------------------------------------------------
/configs/BlendMask/Base-BlendMask.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "BlendMask"
 3 |   MASK_ON: True
 4 |   BACKBONE:
 5 |     NAME: "build_fcos_resnet_fpn_backbone"
 6 |   RESNETS:
 7 |     OUT_FEATURES: ["res3", "res4", "res5"]
 8 |   FPN:
 9 |     IN_FEATURES: ["res3", "res4", "res5"]
10 |   PROPOSAL_GENERATOR:
11 |     NAME: "FCOS"
12 |   BASIS_MODULE:
13 |     LOSS_ON: True
14 |   PANOPTIC_FPN:
15 |     COMBINE:
16 |       ENABLED: False
17 |   FCOS:
18 |     THRESH_WITH_CTR: True
19 |     USE_SCALE: False
20 | DATASETS:
21 |   TRAIN: ("coco_2017_train",)
22 |   TEST: ("coco_2017_val",)
23 | SOLVER:
24 |   IMS_PER_BATCH: 16
25 |   BASE_LR: 0.01  # Note that RetinaNet uses a different default learning rate
26 |   STEPS: (60000, 80000)
27 |   MAX_ITER: 90000
28 | INPUT:
29 |   MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
30 | 


--------------------------------------------------------------------------------
/configs/BlendMask/Base-RT.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "Base-BlendMask.yaml"
 2 | INPUT:
 3 |   MIN_SIZE_TRAIN: (256, 288, 320, 352, 384, 416, 448, 480, 512, 544, 576, 608)
 4 |   MAX_SIZE_TRAIN: 900
 5 |   MAX_SIZE_TEST: 736
 6 |   MIN_SIZE_TEST: 512
 7 | MODEL:
 8 |   FCOS:
 9 |     TOP_LEVELS: 0
10 |     SIZES_OF_INTEREST: [64, 128]
11 |     FPN_STRIDES: [8, 16, 32]
12 |     IN_FEATURES: ['p3', 'p4', 'p5']
13 | SOLVER:
14 |   STEPS: (300000, 340000)
15 |   MAX_ITER: 360000


--------------------------------------------------------------------------------
/configs/BlendMask/DLA_34_syncbn_4x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "Base-RT.yaml"
 2 | MODEL:
 3 |   BACKBONE:
 4 |     NAME: "build_fcos_dla_fpn_backbone"
 5 |     FREEZE_AT: -1
 6 |   WEIGHTS: "http://dl.yf.io/dla/models/imagenet/dla34-ba72cf86.pth"
 7 |   DLA:
 8 |     CONV_BODY: "DLA34"
 9 |     NORM: "SyncBN"
10 |   FPN:
11 |     IN_FEATURES: ["level3", "level4", "level5"]
12 | OUTPUT_DIR: "output/blendmask/DLA_34_syncbn_4x"
13 | 


--------------------------------------------------------------------------------
/configs/BlendMask/Panoptic/Base-Panoptic.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-BlendMask.yaml"
 2 | MODEL:
 3 |   RESNETS:
 4 |     OUT_FEATURES: ["res2", "res3", "res4", "res5"]
 5 |   FPN:
 6 |     IN_FEATURES: ["res2", "res3", "res4", "res5"]
 7 |   SEM_SEG_HEAD:
 8 |     LOSS_WEIGHT: 0.5
 9 |   PANOPTIC_FPN:
10 |     COMBINE:
11 |       ENABLED: True
12 |       INSTANCES_CONFIDENCE_THRESH: 0.45
13 |       OVERLAP_THRESH: 0.4
14 | DATASETS:
15 |   TRAIN: ("coco_2017_train_panoptic_separated",)
16 |   TEST: ("coco_2017_val_panoptic_separated",)
17 | 


--------------------------------------------------------------------------------
/configs/BlendMask/Panoptic/R_101_3x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "Base-Panoptic.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
 4 |   RESNETS:
 5 |     DEPTH: 101
 6 | SOLVER:
 7 |   STEPS: (210000, 250000)
 8 |   MAX_ITER: 270000
 9 | OUTPUT_DIR: "output/panoptic/blendmask/R_101_3x"
10 | 


--------------------------------------------------------------------------------
/configs/BlendMask/Panoptic/R_101_dcni3_5x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "Base-Panoptic.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
 4 |   RESNETS:
 5 |     DEPTH: 101
 6 |     DEFORM_ON_PER_STAGE: [False, True, True, True]
 7 |     DEFORM_MODULATED: True
 8 |     DEFORM_INTERVAL: 3
 9 | SOLVER:
10 |   STEPS: (280000, 360000)
11 |   MAX_ITER: 400000
12 | INPUT:
13 |   MIN_SIZE_TRAIN: (640, 864)
14 |   MIN_SIZE_TRAIN_SAMPLING: "range"
15 |   MAX_SIZE_TRAIN: 1333
16 |   CROP:
17 |     ENABLED: True
18 | OUTPUT_DIR: "output/panoptic/blendmask/R_101_dcni3_5x"
19 | 


--------------------------------------------------------------------------------
/configs/BlendMask/Panoptic/R_50_1x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "Base-Panoptic.yaml"
2 | MODEL:
3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
4 |   RESNETS:
5 |     DEPTH: 50
6 | OUTPUT_DIR: "output/panoptic/blendmask/R_50_1x"
7 | 


--------------------------------------------------------------------------------
/configs/BlendMask/Panoptic/R_50_3x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "Base-Panoptic.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   RESNETS:
 5 |     DEPTH: 50
 6 | SOLVER:
 7 |   STEPS: (210000, 250000)
 8 |   MAX_ITER: 270000
 9 | OUTPUT_DIR: "output/panoptic/blendmask/R_50_3x"
10 | 


--------------------------------------------------------------------------------
/configs/BlendMask/Panoptic/R_50_dcni3_5x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "Base-Panoptic.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   RESNETS:
 5 |     DEPTH: 50
 6 |     DEFORM_ON_PER_STAGE: [False, True, True, True]
 7 |     DEFORM_MODULATED: True
 8 |     DEFORM_INTERVAL: 3
 9 | SOLVER:
10 |   STEPS: (280000, 360000)
11 |   MAX_ITER: 400000
12 | INPUT:
13 |   MIN_SIZE_TRAIN: (640, 864)
14 |   MIN_SIZE_TRAIN_SAMPLING: "range"
15 |   MAX_SIZE_TRAIN: 1440
16 |   CROP:
17 |     ENABLED: True
18 | OUTPUT_DIR: "output/panoptic/blendmask/R_50_dcni3_5x"
19 | 


--------------------------------------------------------------------------------
/configs/BlendMask/Person/Base-Person.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-BlendMask.yaml"
 2 | MODEL:
 3 |   BASIS_MODULE:
 4 |     NUM_CLASSES: 1
 5 |   FCOS:
 6 |     NUM_CLASSES: 1
 7 | DATASETS:
 8 |   TRAIN: ("pic_person_train",)
 9 |   TEST: ("pic_person_val",)
10 | 


--------------------------------------------------------------------------------
/configs/BlendMask/Person/R_50_1x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "Base-Person.yaml"
2 | MODEL:
3 |   WEIGHTS: "https://cloudstor.aarnet.edu.au/plus/s/9u1cG2zXvEva5SM/download#R_50_3x.pth"
4 |   RESNETS:
5 |     DEPTH: 50
6 | OUTPUT_DIR: "output/person/blendmask/R_50_1x"
7 | 


--------------------------------------------------------------------------------
/configs/BlendMask/RT_R_50_4x_bn-head_syncbn_shtw.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "RT_R_50_4x_syncbn_shtw.yaml"
2 | MODEL:
3 |   FCOS:
4 |     NORM: "SyncBN"
5 | OUTPUT_DIR: "output/blendmask/RT_R_50_4x_bn-head_syncbn_shtw"
6 | 


--------------------------------------------------------------------------------
/configs/BlendMask/RT_R_50_4x_syncbn_shtw.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "Base-RT.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   RESNETS:
 5 |     DEPTH: 50
 6 |     NORM: "SyncBN"
 7 |   BACKBONE:
 8 |     FREEZE_AT: -1
 9 |   FCOS:
10 |     NUM_SHARE_CONVS: 3
11 |     NUM_CLS_CONVS: 0
12 |     NUM_BOX_CONVS: 0
13 |   BASIS_MODULE:
14 |     NUM_CONVS: 2
15 | OUTPUT_DIR: "output/blendmask/RT_R_50_4x_syncbn_shtw"
16 | 


--------------------------------------------------------------------------------
/configs/BlendMask/R_101_3x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "Base-BlendMask.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
 4 |   RESNETS:
 5 |     DEPTH: 101
 6 | SOLVER:
 7 |   STEPS: (210000, 250000)
 8 |   MAX_ITER: 270000
 9 | OUTPUT_DIR: "output/blendmask/R_101_3x"
10 | 


--------------------------------------------------------------------------------
/configs/BlendMask/R_101_dcni3_5x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "Base-BlendMask.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
 4 |   RESNETS:
 5 |     DEPTH: 101
 6 |     DEFORM_ON_PER_STAGE: [False, True, True, True]
 7 |     DEFORM_MODULATED: True
 8 |     DEFORM_INTERVAL: 3
 9 | SOLVER:
10 |   STEPS: (280000, 360000)
11 |   MAX_ITER: 400000
12 | INPUT:
13 |   MIN_SIZE_TRAIN: (640, 864)
14 |   MIN_SIZE_TRAIN_SAMPLING: "range"
15 |   MAX_SIZE_TRAIN: 1440
16 |   CROP:
17 |     ENABLED: True
18 | TEST:
19 |   EVAL_PERIOD: 20000
20 | OUTPUT_DIR: "output/blendmask/R_101_dcni3_5x"
21 | 


--------------------------------------------------------------------------------
/configs/BlendMask/R_50_1x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "Base-BlendMask.yaml"
2 | MODEL:
3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
4 |   RESNETS:
5 |     DEPTH: 50
6 | OUTPUT_DIR: "output/blendmask/R_50_1x"
7 | 


--------------------------------------------------------------------------------
/configs/BlendMask/R_50_3x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "Base-BlendMask.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   RESNETS:
 5 |     DEPTH: 50
 6 | SOLVER:
 7 |   STEPS: (210000, 250000)
 8 |   MAX_ITER: 270000
 9 | OUTPUT_DIR: "output/blendmask/R_50_3x"
10 | 


--------------------------------------------------------------------------------
/configs/BoxInst/Base-BoxInst.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "CondInst"
 3 |   MASK_ON: True
 4 |   BACKBONE:
 5 |     NAME: "build_fcos_resnet_fpn_backbone"
 6 |   RESNETS:
 7 |     OUT_FEATURES: ["res3", "res4", "res5"]
 8 |   FPN:
 9 |     IN_FEATURES: ["res3", "res4", "res5"]
10 |   PROPOSAL_GENERATOR:
11 |     NAME: "FCOS"
12 |   FCOS:
13 |     THRESH_WITH_CTR: True
14 |     USE_SCALE: True
15 |   CONDINST:
16 |     TOPK_PROPOSALS_PER_IM: 64
17 |     MASK_BRANCH:
18 |       OUT_CHANNELS: 16
19 |   BOXINST:
20 |     ENABLED: True
21 |     BOTTOM_PIXELS_REMOVED: 10
22 |     PAIRWISE:
23 |       SIZE: 3
24 |       DILATION: 2
25 |       COLOR_THRESH: 0.3
26 | DATASETS:
27 |   TRAIN: ("coco_2017_train",)
28 |   TEST: ("coco_2017_val",)
29 | SOLVER:
30 |   IMS_PER_BATCH: 16
31 |   BASE_LR: 0.01
32 |   STEPS: (60000, 80000)
33 |   MAX_ITER: 90000
34 | INPUT:
35 |   MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
36 | 


--------------------------------------------------------------------------------
/configs/BoxInst/MS_R_101_1x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "Base-BoxInst.yaml"
2 | MODEL:
3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
4 |   RESNETS:
5 |     DEPTH: 101
6 | OUTPUT_DIR: "output/boxinst_MS_R_101_1x"
7 | 


--------------------------------------------------------------------------------
/configs/BoxInst/MS_R_101_3x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "Base-BoxInst.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
 4 |   RESNETS:
 5 |     DEPTH: 101
 6 | SOLVER:
 7 |   STEPS: (210000, 250000)
 8 |   MAX_ITER: 270000
 9 | OUTPUT_DIR: "output/boxinst_MS_R_101_3x"
10 | 


--------------------------------------------------------------------------------
/configs/BoxInst/MS_R_101_BiFPN_3x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "Base-BoxInst.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
 4 |   BACKBONE:
 5 |     NAME: "build_fcos_resnet_bifpn_backbone"
 6 |   RESNETS:
 7 |     DEPTH: 101
 8 |   BiFPN:
 9 |     IN_FEATURES: ["res3", "res4", "res5"]
10 |     OUT_CHANNELS: 160
11 |     NORM: "SyncBN"
12 | SOLVER:
13 |   STEPS: (210000, 250000)
14 |   MAX_ITER: 270000
15 | OUTPUT_DIR: "output/boxinst_MS_R_101_3x_bifpn"
16 | 


--------------------------------------------------------------------------------
/configs/BoxInst/MS_R_101_BiFPN_dcni3_3x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "Base-BoxInst.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
 4 |   BACKBONE:
 5 |     NAME: "build_fcos_resnet_bifpn_backbone"
 6 |   RESNETS:
 7 |     DEPTH: 101
 8 |     DEFORM_ON_PER_STAGE: [False, True, True, True]
 9 |     DEFORM_MODULATED: True
10 |     DEFORM_INTERVAL: 3
11 |   BiFPN:
12 |     IN_FEATURES: ["res3", "res4", "res5"]
13 |     OUT_CHANNELS: 160
14 |     NORM: "SyncBN"
15 | SOLVER:
16 |   STEPS: (210000, 250000)
17 |   MAX_ITER: 270000
18 | OUTPUT_DIR: "output/boxinst_MS_R_101_BiFPN_dcni3_3x"
19 | 


--------------------------------------------------------------------------------
/configs/BoxInst/MS_R_50_1x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "Base-BoxInst.yaml"
2 | MODEL:
3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
4 |   RESNETS:
5 |     DEPTH: 50
6 | OUTPUT_DIR: "output/boxinst_MS_R_50_1x"
7 | 


--------------------------------------------------------------------------------
/configs/BoxInst/MS_R_50_3x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "Base-BoxInst.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   RESNETS:
 5 |     DEPTH: 50
 6 | SOLVER:
 7 |   STEPS: (210000, 250000)
 8 |   MAX_ITER: 270000
 9 | OUTPUT_DIR: "output/boxinst_MS_R_50_3x"
10 | 


--------------------------------------------------------------------------------
/configs/BoxInst/MS_R_50_BiFPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "Base-BoxInst.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   BACKBONE:
 5 |     NAME: "build_fcos_resnet_bifpn_backbone"
 6 |   RESNETS:
 7 |     DEPTH: 50
 8 |   BiFPN:
 9 |     IN_FEATURES: ["res3", "res4", "res5"]
10 |     OUT_CHANNELS: 160
11 |     NORM: "SyncBN"
12 | OUTPUT_DIR: "output/boxinst_MS_R_50_1x_bifpn"
13 | 


--------------------------------------------------------------------------------
/configs/BoxInst/MS_R_50_BiFPN_3x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "Base-BoxInst.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   BACKBONE:
 5 |     NAME: "build_fcos_resnet_bifpn_backbone"
 6 |   RESNETS:
 7 |     DEPTH: 50
 8 |   BiFPN:
 9 |     IN_FEATURES: ["res3", "res4", "res5"]
10 |     OUT_CHANNELS: 160
11 |     NORM: "SyncBN"
12 | SOLVER:
13 |   STEPS: (210000, 250000)
14 |   MAX_ITER: 270000
15 | OUTPUT_DIR: "output/boxinst_MS_R_50_3x_bifpn"
16 | 


--------------------------------------------------------------------------------
/configs/BoxInst/README.md:
--------------------------------------------------------------------------------
 1 | # BoxInst: High-Performance Instance Segmentation with Box Annotations
 2 | 
 3 |     BoxInst: High-Performance Instance Segmentation with Box Annotations;
 4 |     Zhi Tian, Chunhua Shen, Xinlong Wang and Hao Chen;
 5 |     In: Proc. IEEE Conf. Computer Vision and Pattern Recognition (CVPR), 2021.
 6 |     arXiv preprint arXiv:2012.02310
 7 | 
 8 | [[`Paper`](https://arxiv.org/abs/2012.02310)] [[`BibTeX`](#citing-boxinst)] [[`Video Demo`](https://www.youtube.com/watch?v=NuF8NAYf5L8)]
 9 | 
10 | 
11 | # Installation & Quick Start
12 | First, follow the [default instruction](../../README.md#Installation) to install the project and [datasets/README.md](https://github.com/facebookresearch/detectron2/blob/master/datasets/README.md) 
13 | set up the datasets (e.g., MS-COCO).
14 | 
15 | For demo, run the following command lines:
16 | ```
17 | wget https://huggingface.co/tianzhi/AdelaiDet-BoxInst/resolve/main/BoxInst_MS_R_50_3x.pth?download=true -O BoxInst_MS_R_50_3x.pth
18 | python demo/demo.py \
19 |     --config-file configs/BoxInst/MS_R_50_3x.yaml \
20 |     --input input1.jpg input2.jpg \
21 |     --opts MODEL.WEIGHTS BoxInst_MS_R_50_3x.pth
22 | ```
23 | 
24 | For training on COCO, run:
25 | ```
26 | OMP_NUM_THREADS=1 python tools/train_net.py \
27 |     --config-file configs/BoxInst/MS_R_50_1x.yaml \
28 |     --num-gpus 8 \
29 |     OUTPUT_DIR training_dir/BoxInst_MS_R_50_1x
30 | ```
31 | 
32 | For evaluation on COCO, run:
33 | ```
34 | OMP_NUM_THREADS=1 python tools/train_net.py \
35 |     --config-file configs/BoxInst/MS_R_50_1x.yaml \
36 |     --eval-only \
37 |     --num-gpus 8 \
38 |     OUTPUT_DIR training_dir/BoxInst_MS_R_50_1x \
39 |     MODEL.WEIGHTS training_dir/BoxInst_MS_R_50_1x/model_final.pth
40 | ```
41 | 
42 | 
43 | ## Models
44 | ### COCO Instance Segmentation Baselines with [BoxInst](https://arxiv.org/abs/2012.02310)
45 | 
46 | Only **box annotations** are used during training.
47 | 
48 | Name | inf. time | box AP | mask AP | mask AP (test-dev)| download
49 | --- |:---:|:---:|:---:|:---:|:---:
50 | [BoxInst_MS_R_50_1x](MS_R_50_1x.yaml) | 14 FPS | 39.4 | 30.7 | - | [model](https://huggingface.co/tianzhi/AdelaiDet-BoxInst/resolve/main/BoxInst_MS_R_50_1x.pth?download=true)
51 | [BoxInst_MS_R_50_3x](MS_R_50_3x.yaml) | 14 FPS | 41.5 | 31.8 | 32.1 | [model](https://huggingface.co/tianzhi/AdelaiDet-BoxInst/resolve/main/BoxInst_MS_R_50_3x.pth?download=true)
52 | [BoxInst_MS_R_101_1x](MS_R_101_1x.yaml) | 11 FPS | 41.4 | 32.2 | 32.5 | [model](https://huggingface.co/tianzhi/AdelaiDet-BoxInst/resolve/main/BoxInst_MS_R_101_1x.pth?download=true)
53 | [BoxInst_MS_R_101_3x](MS_R_101_3x.yaml) | 11 FPS | 43.3 | 33.0 | 33.2 | [model](https://huggingface.co/tianzhi/AdelaiDet-BoxInst/resolve/main/BoxInst_MS_R_101_3x.pth?download=true)
54 | [BoxInst_MS_R_101_BiFPN_3x](MS_R_101_BiFPN_3x.yaml) | 10 FPS | 45.4 | 34.1 | 33.9 | [model](https://huggingface.co/tianzhi/AdelaiDet-BoxInst/resolve/main/BoxInst_MS_R_101_BiFPN_3x.pth?download=true)
55 | [BoxInst_MS_R_101_BiFPN_dcni3_3x](MS_R_101_BiFPN_dcni3_3x.yaml) | 8 FPS | 46.4 | 34.8 | 35.0 | [model](https://huggingface.co/tianzhi/AdelaiDet-BoxInst/resolve/main/BoxInst_MS_R_101_BiFPN_dcni3_3x.pth?download=true)
56 | 
57 | Disclaimer:
58 | - All models are trained with multi-scale data augmentation. Inference time is measured on a single NVIDIA 1080Ti with batch size 1.
59 | - This is a reimplementation. Thus, the numbers might be slightly different from the ones reported in our original paper.
60 | 
61 | 
62 | # Citing BoxInst
63 | If you use BoxInst in your research or wish to refer to the baseline results, please use the following BibTeX entries.
64 | ```BibTeX
65 | @inproceedings{tian2020boxinst,
66 |   title     =  {{BoxInst}: High-Performance Instance Segmentation with Box Annotations},
67 |   author    =  {Tian, Zhi and Shen, Chunhua and Wang, Xinlong and Chen, Hao},
68 |   booktitle =  {Proc. IEEE Conf. Computer Vision and Pattern Recognition (CVPR)},
69 |   year      =  {2021}
70 | }
71 | ```
72 | 


--------------------------------------------------------------------------------
/configs/CondInst/Base-CondInst.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "CondInst"
 3 |   MASK_ON: True
 4 |   BACKBONE:
 5 |     NAME: "build_fcos_resnet_fpn_backbone"
 6 |   RESNETS:
 7 |     OUT_FEATURES: ["res3", "res4", "res5"]
 8 |   FPN:
 9 |     IN_FEATURES: ["res3", "res4", "res5"]
10 |   PROPOSAL_GENERATOR:
11 |     NAME: "FCOS"
12 |   FCOS:
13 |     THRESH_WITH_CTR: True
14 |     USE_SCALE: True
15 |   CONDINST:
16 |     MAX_PROPOSALS: 500
17 | DATASETS:
18 |   TRAIN: ("coco_2017_train",)
19 |   TEST: ("coco_2017_val",)
20 | SOLVER:
21 |   IMS_PER_BATCH: 16
22 |   BASE_LR: 0.01
23 |   STEPS: (60000, 80000)
24 |   MAX_ITER: 90000
25 | INPUT:
26 |   MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
27 | 


--------------------------------------------------------------------------------
/configs/CondInst/MS_R_101_1x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "Base-CondInst.yaml"
2 | MODEL:
3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
4 |   RESNETS:
5 |     DEPTH: 101
6 | OUTPUT_DIR: "output/condinst_MS_R_101_1x"
7 | 


--------------------------------------------------------------------------------
/configs/CondInst/MS_R_101_3x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "Base-CondInst.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
 4 |   RESNETS:
 5 |     DEPTH: 101
 6 | SOLVER:
 7 |   STEPS: (210000, 250000)
 8 |   MAX_ITER: 270000
 9 | OUTPUT_DIR: "output/condinst_MS_R_101_3x"
10 | 


--------------------------------------------------------------------------------
/configs/CondInst/MS_R_101_3x_sem.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "Base-CondInst.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
 4 |   RESNETS:
 5 |     DEPTH: 101
 6 |   CONDINST:
 7 |     MASK_BRANCH:
 8 |       SEMANTIC_LOSS_ON: True
 9 | SOLVER:
10 |   STEPS: (210000, 250000)
11 |   MAX_ITER: 270000
12 | OUTPUT_DIR: "output/condinst_MS_R_101_3x_sem"
13 | 


--------------------------------------------------------------------------------
/configs/CondInst/MS_R_101_BiFPN_3x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "Base-CondInst.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
 4 |   BACKBONE:
 5 |     NAME: "build_fcos_resnet_bifpn_backbone"
 6 |   RESNETS:
 7 |     DEPTH: 101
 8 |   BiFPN:
 9 |     IN_FEATURES: ["res3", "res4", "res5"]
10 |     OUT_CHANNELS: 160
11 |     NORM: "SyncBN"
12 | SOLVER:
13 |   STEPS: (210000, 250000)
14 |   MAX_ITER: 270000
15 | OUTPUT_DIR: "output/condinst_MS_R_101_3x_bifpn"
16 | 


--------------------------------------------------------------------------------
/configs/CondInst/MS_R_101_BiFPN_3x_sem.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "Base-CondInst.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
 4 |   BACKBONE:
 5 |     NAME: "build_fcos_resnet_bifpn_backbone"
 6 |   RESNETS:
 7 |     DEPTH: 101
 8 |   BiFPN:
 9 |     IN_FEATURES: ["res3", "res4", "res5"]
10 |     OUT_CHANNELS: 160
11 |     NORM: "SyncBN"
12 |   CONDINST:
13 |     MASK_BRANCH:
14 |       SEMANTIC_LOSS_ON: True
15 | SOLVER:
16 |   STEPS: (210000, 250000)
17 |   MAX_ITER: 270000
18 | OUTPUT_DIR: "output/condinst_MS_R_101_3x_bifpn_sem"
19 | 


--------------------------------------------------------------------------------
/configs/CondInst/MS_R_50_1x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "Base-CondInst.yaml"
2 | MODEL:
3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
4 |   RESNETS:
5 |     DEPTH: 50
6 | OUTPUT_DIR: "output/condinst_MS_R_50_1x"
7 | 


--------------------------------------------------------------------------------
/configs/CondInst/MS_R_50_3x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "Base-CondInst.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   RESNETS:
 5 |     DEPTH: 50
 6 | SOLVER:
 7 |   STEPS: (210000, 250000)
 8 |   MAX_ITER: 270000
 9 | OUTPUT_DIR: "output/condinst_MS_R_50_3x"
10 | 


--------------------------------------------------------------------------------
/configs/CondInst/MS_R_50_3x_sem.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "Base-CondInst.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   RESNETS:
 5 |     DEPTH: 50
 6 |   CONDINST:
 7 |     MASK_BRANCH:
 8 |       SEMANTIC_LOSS_ON: True
 9 | SOLVER:
10 |   STEPS: (210000, 250000)
11 |   MAX_ITER: 270000
12 | OUTPUT_DIR: "output/condinst_MS_R_50_3x_sem"
13 | 


--------------------------------------------------------------------------------
/configs/CondInst/MS_R_50_BiFPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "Base-CondInst.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   BACKBONE:
 5 |     NAME: "build_fcos_resnet_bifpn_backbone"
 6 |   RESNETS:
 7 |     DEPTH: 50
 8 |   BiFPN:
 9 |     IN_FEATURES: ["res3", "res4", "res5"]
10 |     OUT_CHANNELS: 160
11 |     NORM: "SyncBN"
12 | OUTPUT_DIR: "output/condinst_MS_R_50_1x_bifpn"
13 | 


--------------------------------------------------------------------------------
/configs/CondInst/MS_R_50_BiFPN_3x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "Base-CondInst.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   BACKBONE:
 5 |     NAME: "build_fcos_resnet_bifpn_backbone"
 6 |   RESNETS:
 7 |     DEPTH: 50
 8 |   BiFPN:
 9 |     IN_FEATURES: ["res3", "res4", "res5"]
10 |     OUT_CHANNELS: 160
11 |     NORM: "SyncBN"
12 | SOLVER:
13 |   STEPS: (210000, 250000)
14 |   MAX_ITER: 270000
15 | OUTPUT_DIR: "output/condinst_MS_R_50_3x_bifpn"
16 | 


--------------------------------------------------------------------------------
/configs/CondInst/MS_R_50_BiFPN_3x_sem.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "Base-CondInst.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   BACKBONE:
 5 |     NAME: "build_fcos_resnet_bifpn_backbone"
 6 |   RESNETS:
 7 |     DEPTH: 50
 8 |   BiFPN:
 9 |     IN_FEATURES: ["res3", "res4", "res5"]
10 |     OUT_CHANNELS: 160
11 |     NORM: "SyncBN"
12 |   CONDINST:
13 |     MASK_BRANCH:
14 |       SEMANTIC_LOSS_ON: True
15 | SOLVER:
16 |   STEPS: (210000, 250000)
17 |   MAX_ITER: 270000
18 | OUTPUT_DIR: "output/condinst_MS_R_50_3x_bifpn_sem"
19 | 


--------------------------------------------------------------------------------
/configs/CondInst/README.md:
--------------------------------------------------------------------------------
 1 | # Conditional Convolutions for Instance Segmentation (Oral)
 2 | 
 3 |     Conditional Convolutions for Instance Segmentation;
 4 |     Zhi Tian, Chunhua Shen and Hao Chen;
 5 |     In: Proc. European Conference on Computer Vision (ECCV), 2020.
 6 |     arXiv preprint arXiv:2003.05664
 7 | 
 8 | [[`Paper`](https://arxiv.org/abs/2003.05664)] [[`BibTeX`](#citing-condinst)]
 9 | 
10 | 
11 | # Installation & Quick Start
12 | First, follow the [default instruction](../../README.md#Installation) to install the project and [datasets/README.md](https://github.com/facebookresearch/detectron2/blob/master/datasets/README.md) 
13 | set up the datasets (e.g., MS-COCO).
14 | 
15 | For demo, run the following command lines:
16 | ```
17 | wget https://cloudstor.aarnet.edu.au/plus/s/M8nNxSR5iNP4qyO/download -O CondInst_MS_R_101_3x_sem.pth
18 | python demo/demo.py \
19 |     --config-file configs/CondInst/MS_R_101_3x_sem.yaml \
20 |     --input input1.jpg input2.jpg \
21 |     --opts MODEL.WEIGHTS CondInst_MS_R_101_3x_sem.pth
22 | ```
23 | 
24 | For training on COCO, run:
25 | ```
26 | OMP_NUM_THREADS=1 python tools/train_net.py \
27 |     --config-file configs/CondInst/MS_R_50_1x.yaml \
28 |     --num-gpus 8 \
29 |     OUTPUT_DIR training_dir/CondInst_MS_R_50_1x
30 | ```
31 | 
32 | For evaluation on COCO, run:
33 | ```
34 | OMP_NUM_THREADS=1 python tools/train_net.py \
35 |     --config-file configs/CondInst/MS_R_50_1x.yaml \
36 |     --eval-only \
37 |     --num-gpus 8 \
38 |     OUTPUT_DIR training_dir/CondInst_MS_R_50_1x \
39 |     MODEL.WEIGHTS training_dir/CondInst_MS_R_50_1x/model_final.pth
40 | ```
41 | 
42 | 
43 | ## Models
44 | ### COCO Instance Segmentation Baselines with [CondInst](https://arxiv.org/abs/2003.05664)
45 | 
46 | Name | inf. time | box AP | mask AP | download
47 | --- |:---:|:---:|:---:|:---:
48 | [CondInst_MS_R_50_1x](MS_R_50_1x.yaml) | 14 FPS | 39.7 | 35.7 | [model](https://huggingface.co/tianzhi/AdelaiDet-CondInst/resolve/main/CondInst_MS_R_50_1x.pth?download=true)
49 | [CondInst_MS_R_50_3x](MS_R_50_3x.yaml) | 14 FPS | 41.9 | 37.5 | [model](https://huggingface.co/tianzhi/AdelaiDet-CondInst/resolve/main/CondInst_MS_R_50_3x.pth?download=true)
50 | [CondInst_MS_R_101_3x](MS_R_101_3x.yaml) | 11 FPS | 43.3 | 38.6 | [model](https://huggingface.co/tianzhi/AdelaiDet-CondInst/resolve/main/CondInst_MS_R_101_3x.pth?download=true)
51 | 
52 | With an auxiliary semantic segmentation task (set `MODEL.CONDINST.MASK_BRANCH.SEMANTIC_LOSS_ON = True` to enable it):
53 | 
54 | Name | inf. time | box AP | mask AP | mask AP (test-dev) | download
55 | --- |:---:|:---:|:---:|:---:|:---:
56 | [CondInst_MS_R_50_3x_sem](MS_R_50_3x_sem.yaml) | 14 FPS | 42.6 | 38.2 | 38.7 | [model](https://huggingface.co/tianzhi/AdelaiDet-CondInst/resolve/main/CondInst_MS_R_50_3x_sem.pth?download=true)
57 | [CondInst_MS_R_101_3x_sem](MS_R_101_3x_sem.yaml) | 11 FPS | 44.6 | 39.8 | 40.1 | [model](https://huggingface.co/tianzhi/AdelaiDet-CondInst/resolve/main/CondInst_MS_R_101_3x_sem.pth?download=true)
58 | 
59 | With BiFPN:
60 | 
61 | Name | inf. time | box AP | mask AP | download
62 | --- |:---:|:---:|:---:|:---:
63 | [CondInst_MS_R_50_BiFPN_1x](MS_R_50_BiFPN_1x.yaml) | 13 FPS | 42.5 | 37.3 | [model](https://huggingface.co/tianzhi/AdelaiDet-CondInst/resolve/main/CondInst_MS_R_50_BiFPN_1x.pth?download=true)
64 | [CondInst_MS_R_50_BiFPN_3x](MS_R_50_BiFPN_3x.yaml) | 13 FPS | 44.3 | 38.9 | [model](https://huggingface.co/tianzhi/AdelaiDet-CondInst/resolve/main/CondInst_MS_R_50_BiFPN_3x.pth?download=true)
65 | [CondInst_MS_R_50_BiFPN_3x_sem](MS_R_50_BiFPN_3x_sem.yaml) | 13 FPS | 44.7 | 39.4 | [model](https://huggingface.co/tianzhi/AdelaiDet-CondInst/resolve/main/CondInst_MS_R_50_BiFPN_3x_sem.pth?download=true)
66 | [CondInst_MS_R_101_BiFPN_3x](MS_R_101_BiFPN_3x.yaml) | 10 FPS | 45.3 | 39.6 | [model](https://huggingface.co/tianzhi/AdelaiDet-CondInst/resolve/main/CondInst_MS_R_101_BiFPN_3x.pth?download=true)
67 | [CondInst_MS_R_101_BiFPN_3x_sem](MS_R_101_BiFPN_3x_sem.yaml) | 10 FPS | 45.7 | 40.2 | [model](https://huggingface.co/tianzhi/AdelaiDet-CondInst/resolve/main/CondInst_R_101_BiFPN_3x_sem.pth?download=true)
68 | 
69 | 
70 | *Disclaimer:*
71 | 
72 | - All models are trained with multi-scale data augmentation. Inference time is measured on a single NVIDIA 1080Ti with batch size 1.
73 | - The final mask's resolution is 1/4 of the input image (i.e., `MODEL.CONDINST.MASK_OUT_STRIDE = 4`, which is enough on MS-COCO and different from our original paper. In the paper, we used `MODEL.CONDINST.MASK_OUT_STRIDE = 2`. If you want high-resolution mask results, please reduce it.
74 | - This is a reimplementation. Thus, the numbers are slightly different from our original paper (within 0.1% in mask AP).
75 | 
76 | 
77 | # Citing CondInst
78 | If you use CondInst in your research or wish to refer to the baseline results, please use the following BibTeX entries.
79 | ```BibTeX
80 | @inproceedings{tian2020conditional,
81 |   title     =  {Conditional Convolutions for Instance Segmentation},
82 |   author    =  {Tian, Zhi and Shen, Chunhua and Chen, Hao},
83 |   booktitle =  {Proc. Eur. Conf. Computer Vision (ECCV)},
84 |   year      =  {2020}
85 | }
86 | ```
87 | 


--------------------------------------------------------------------------------
/configs/DenseCL/FCOS_R50_1x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../FCOS-Detection/Base-FCOS.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   BACKBONE:
 5 |     FREEZE_AT: 0
 6 |   RESNETS:
 7 |     DEPTH: 50
 8 |     NORM: "SyncBN"
 9 |   FPN:
10 |     NORM: "SyncBN"
11 | TEST:
12 |   PRECISE_BN:
13 |     ENABLED: True
14 | 


--------------------------------------------------------------------------------
/configs/DenseCL/FCOS_R50_1x_DenseCL.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "FCOS_R50_1x.yaml"
 2 | MODEL:
 3 |   PIXEL_MEAN: [123.675, 116.280, 103.530]
 4 |   PIXEL_STD: [58.395, 57.120, 57.375]
 5 |   WEIGHTS: "See Instructions"
 6 |   RESNETS:
 7 |     STRIDE_IN_1X1: False
 8 | INPUT:
 9 |   FORMAT: "RGB"
10 | 


--------------------------------------------------------------------------------
/configs/DenseCL/README.md:
--------------------------------------------------------------------------------
 1 | #  Dense Contrastive Learning for Self-Supervised Visual Pre-Training
 2 | 
 3 | Here we provide instructions and results for applying DenseCL pre-trained models to AdelaiDet. Please refer to [https://git.io/DenseCL
 4 | ](https://git.io/DenseCL
 5 | ) for the pre-training code.
 6 | 
 7 | > [**Dense Contrastive Learning for Self-Supervised Visual Pre-Training**](https://arxiv.org/abs/2011.09157),  
 8 | > Xinlong Wang, Rufeng Zhang, Chunhua Shen, Tao Kong, Lei Li   
 9 | > In: Proc. IEEE Conf. Computer Vision and Pattern Recognition (CVPR), 2021, **Oral**  
10 | > *arXiv preprint ([arXiv 2011.09157](https://arxiv.org/abs/2011.09157))*   
11 | 
12 | 
13 | # Installation 
14 | First, follow the [default instruction](../../README.md#Installation) to install the project and [datasets/README.md](https://github.com/facebookresearch/detectron2/blob/master/datasets/README.md) 
15 | set up the datasets (e.g., MS-COCO).
16 | 
17 | 
18 | # DenseCL Pre-trained Models
19 | pre-train method | pre-train dataset | backbone | #epoch | Link
20 | --- |:---:|:---:|:---:|:---:
21 | DenseCL | COCO | ResNet-50 | 800 | [download](https://huggingface.co/ZjuCv/AdelaiDet/blob/main/densecl_r50_coco_800ep.pth)
22 | DenseCL | COCO | ResNet-50 | 1600 |  [download](https://huggingface.co/ZjuCv/AdelaiDet/blob/main/densecl_r50_coco_1600ep.pth)
23 | DenseCL | ImageNet | ResNet-50 | 200 |  [download](https://huggingface.co/ZjuCv/AdelaiDet/blob/main/densecl_r50_imagenet_200ep.pth)
24 | DenseCL | ImageNet | ResNet-101 | 200 | [download](https://huggingface.co/ZjuCv/AdelaiDet/blob/main/densecl_r101_imagenet_200ep.pth)
25 | 
26 | 
27 | # Usage
28 | 
29 | ## Download the pre-trained model
30 | ```
31 | PRETRAIN_DIR=./
32 | wget https://cloudstor.aarnet.edu.au/plus/s/hdAg5RYm8NNM2QP/download -O ${PRETRAIN_DIR}/densecl_r50_imagenet_200ep.pkl
33 | ```
34 | 
35 | ## Convert it to detectron2's format
36 | Use [convert-pretrain-to-detectron2.py](https://github.com/WXinlong/DenseCL/blob/main/benchmarks/detection/convert-pretrain-to-detectron2.py) to convert the pre-trained backbone weights:
37 | ```
38 | WEIGHT_FILE=${PRETRAIN_DIR}/densecl_r50_imagenet_200ep.pth
39 | OUTPUT_FILE=${PRETRAIN_DIR}/densecl_r50_imagenet_200ep.pkl
40 | python convert-pretrain-to-detectron2.py ${WEIGHT_FILE} ${OUTPUT_FILE}
41 | ```
42 | 
43 | ## Train the downstream models
44 | 
45 | For training a SOLOv2, run:
46 | ```
47 | OMP_NUM_THREADS=1 python tools/train_net.py \
48 |     --config-file configs/DenseCL/SOLOv2_R50_1x_DenseCL.yaml \
49 |     --num-gpus 8 \
50 |     OUTPUT_DIR training_dir/SOLOv2_R50_1x_DenseCL \
51 |     MODEL.WEIGHTS ${PRETRAIN_DIR}/densecl_r50_imagenet_200ep.pkl
52 | ```
53 | 
54 | For training a FCOS, run:
55 | ```
56 | OMP_NUM_THREADS=1 python tools/train_net.py \
57 |     --config-file configs/DenseCL/FCOS_R50_1x_DenseCL.yaml \
58 |     --num-gpus 8 \
59 |     OUTPUT_DIR training_dir/FCOS_R50_1x_DenseCL \
60 |     MODEL.WEIGHTS ${PRETRAIN_DIR}/densecl_r50_imagenet_200ep.pkl
61 | ```
62 | 
63 | 
64 | # Performance
65 | ## SOLOv2 on COCO Instance Segmentation
66 | 
67 | pre-train method | pre-train dataset  |  mask AP | 
68 | --- |:---:|:---:|
69 | Supervised  | ImageNet | 35.2  
70 | MoCo-v2  | ImageNet | 35.2
71 | DenseCL |  ImageNet | 35.7 (+0.5)
72 | 
73 | ## FCOS on COCO Object Detection
74 | 
75 | pre-train method | pre-train dataset  |  box AP | 
76 | --- |:---:|:---:|
77 | Supervised   | ImageNet | 39.9
78 | MoCo-v2  | ImageNet | 40.3
79 | DenseCL |  ImageNet | 40.9 (+1.0)
80 | 
81 | 
82 | 
83 | # Citation
84 | Please consider citing our paper in your publications if the project helps your research. BibTeX reference is as follows.
85 | ```BibTeX
86 | @inproceedings{wang2020densecl,
87 |   title     =   {Dense Contrastive Learning for Self-Supervised Visual Pre-Training},
88 |   author    =   {Wang, Xinlong and Zhang, Rufeng and Shen, Chunhua and Kong, Tao and Li, Lei},
89 |   booktitle =   {Proc. IEEE Conf. Computer Vision and Pattern Recognition (CVPR)},
90 |   year      =   {2021}
91 | }
92 | ```
93 | 


--------------------------------------------------------------------------------
/configs/DenseCL/SOLOv2_R50_1x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../SOLOv2/Base-SOLOv2.yaml"                       
 2 | MODEL:                                                     
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 
 4 |   BACKBONE:                                                
 5 |     FREEZE_AT: 0                                           
 6 |   RESNETS:                                                 
 7 |     DEPTH: 50                                              
 8 |     NORM: "SyncBN"                                         
 9 |   FPN:                                                     
10 |     NORM: "SyncBN"                                         
11 | TEST:                                                      
12 |   PRECISE_BN:                                              
13 |     ENABLED: True                                          
14 | 


--------------------------------------------------------------------------------
/configs/DenseCL/SOLOv2_R50_1x_DenseCL.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "SOLOv2_R50_1x.yaml"
 2 | MODEL:
 3 |   PIXEL_MEAN: [123.675, 116.280, 103.530]
 4 |   PIXEL_STD: [58.395, 57.120, 57.375]
 5 |   WEIGHTS: "See Instructions"
 6 |   RESNETS:
 7 |     STRIDE_IN_1X1: False
 8 | INPUT:
 9 |   FORMAT: "RGB"
10 | 


--------------------------------------------------------------------------------
/configs/FCOS-Detection/Base-FCOS.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "OneStageDetector"
 3 |   BACKBONE:
 4 |     NAME: "build_fcos_resnet_fpn_backbone"
 5 |   RESNETS:
 6 |     OUT_FEATURES: ["res3", "res4", "res5"]
 7 |   FPN:
 8 |     IN_FEATURES: ["res3", "res4", "res5"]
 9 |   PROPOSAL_GENERATOR:
10 |     NAME: "FCOS"
11 |   # PIXEL_MEAN: [102.9801, 115.9465, 122.7717]
12 | DATASETS:
13 |   TRAIN: ("coco_2017_train",)
14 |   TEST: ("coco_2017_val",)
15 | SOLVER:
16 |   IMS_PER_BATCH: 16
17 |   BASE_LR: 0.01  # Note that RetinaNet uses a different default learning rate
18 |   STEPS: (60000, 80000)
19 |   MAX_ITER: 90000
20 | INPUT:
21 |   MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
22 | 


--------------------------------------------------------------------------------
/configs/FCOS-Detection/FCOS_RT/MS_DLA_34_4x_syncbn.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-FCOS.yaml"
 2 | INPUT:
 3 |   MIN_SIZE_TRAIN: (256, 288, 320, 352, 384, 416, 448, 480, 512, 544, 576, 608)
 4 |   MAX_SIZE_TRAIN: 900
 5 |   MAX_SIZE_TEST: 736
 6 |   MIN_SIZE_TEST: 512
 7 | MODEL:
 8 |   BACKBONE:
 9 |     NAME: "build_fcos_dla_fpn_backbone"
10 |     FREEZE_AT: -1
11 |   WEIGHTS: "http://dl.yf.io/dla/models/imagenet/dla34-ba72cf86.pth"
12 |   DLA:
13 |     CONV_BODY: "DLA34"
14 |     NORM: "SyncBN"
15 |   FPN:
16 |     IN_FEATURES: ["level3", "level4", "level5"]
17 |   FCOS:
18 |     TOP_LEVELS: 0
19 |     SIZES_OF_INTEREST: [64, 128]
20 |     FPN_STRIDES: [8, 16, 32]
21 |     IN_FEATURES: ['p3', 'p4', 'p5']
22 | SOLVER:
23 |   STEPS: (300000, 340000)
24 |   MAX_ITER: 360000
25 | OUTPUT_DIR: "output/fcos/FCOS_RT_MS_DLA_34_4x_syncbn"
26 | 


--------------------------------------------------------------------------------
/configs/FCOS-Detection/FCOS_RT/MS_DLA_34_4x_syncbn_bn_head.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-FCOS.yaml"
 2 | INPUT:
 3 |   MIN_SIZE_TRAIN: (256, 288, 320, 352, 384, 416, 448, 480, 512, 544, 576, 608)
 4 |   MAX_SIZE_TRAIN: 900
 5 |   MAX_SIZE_TEST: 736
 6 |   MIN_SIZE_TEST: 512
 7 | MODEL:
 8 |   BACKBONE:
 9 |     NAME: "build_fcos_dla_fpn_backbone"
10 |     FREEZE_AT: -1
11 |   WEIGHTS: "http://dl.yf.io/dla/models/imagenet/dla34-ba72cf86.pth"
12 |   DLA:
13 |     CONV_BODY: "DLA34"
14 |     NORM: "SyncBN"
15 |   FPN:
16 |     IN_FEATURES: ["level3", "level4", "level5"]
17 |   FCOS:
18 |     TOP_LEVELS: 0
19 |     SIZES_OF_INTEREST: [64, 128]
20 |     FPN_STRIDES: [8, 16, 32]
21 |     IN_FEATURES: ['p3', 'p4', 'p5']
22 |     NORM: "SyncBN"
23 | SOLVER:
24 |   STEPS: (300000, 340000)
25 |   MAX_ITER: 360000
26 | OUTPUT_DIR: "output/fcos/FCOS_RT_MS_DLA_34_4x_syncbn_bn_head"
27 | 


--------------------------------------------------------------------------------
/configs/FCOS-Detection/FCOS_RT/MS_DLA_34_4x_syncbn_shared_towers.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-FCOS.yaml"
 2 | INPUT:
 3 |   MIN_SIZE_TRAIN: (256, 288, 320, 352, 384, 416, 448, 480, 512, 544, 576, 608)
 4 |   MAX_SIZE_TRAIN: 900
 5 |   MAX_SIZE_TEST: 736
 6 |   MIN_SIZE_TEST: 512
 7 | MODEL:
 8 |   BACKBONE:
 9 |     NAME: "build_fcos_dla_fpn_backbone"
10 |     FREEZE_AT: -1
11 |   WEIGHTS: "http://dl.yf.io/dla/models/imagenet/dla34-ba72cf86.pth"
12 |   DLA:
13 |     CONV_BODY: "DLA34"
14 |     NORM: "SyncBN"
15 |   FPN:
16 |     IN_FEATURES: ["level3", "level4", "level5"]
17 |   FCOS:
18 |     TOP_LEVELS: 0
19 |     SIZES_OF_INTEREST: [64, 128]
20 |     FPN_STRIDES: [8, 16, 32]
21 |     IN_FEATURES: ['p3', 'p4', 'p5']
22 |     NUM_SHARE_CONVS: 4
23 |     NUM_BOX_CONVS: 0
24 |     NUM_CLS_CONVS: 0
25 | SOLVER:
26 |   STEPS: (300000, 340000)
27 |   MAX_ITER: 360000
28 | OUTPUT_DIR: "output/fcos/FCOS_RT_MS_DLA_34_4x_syncbn_shared_towers"
29 | 


--------------------------------------------------------------------------------
/configs/FCOS-Detection/FCOS_RT/MS_DLA_34_4x_syncbn_shared_towers_bn_head.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-FCOS.yaml"
 2 | INPUT:
 3 |   MIN_SIZE_TRAIN: (256, 288, 320, 352, 384, 416, 448, 480, 512, 544, 576, 608)
 4 |   MAX_SIZE_TRAIN: 900
 5 |   MAX_SIZE_TEST: 736
 6 |   MIN_SIZE_TEST: 512
 7 | MODEL:
 8 |   BACKBONE:
 9 |     NAME: "build_fcos_dla_fpn_backbone"
10 |     FREEZE_AT: -1
11 |   WEIGHTS: "http://dl.yf.io/dla/models/imagenet/dla34-ba72cf86.pth"
12 |   DLA:
13 |     CONV_BODY: "DLA34"
14 |     NORM: "SyncBN"
15 |   FPN:
16 |     IN_FEATURES: ["level3", "level4", "level5"]
17 |   FCOS:
18 |     TOP_LEVELS: 0
19 |     SIZES_OF_INTEREST: [64, 128]
20 |     FPN_STRIDES: [8, 16, 32]
21 |     IN_FEATURES: ['p3', 'p4', 'p5']
22 |     NUM_SHARE_CONVS: 4
23 |     NUM_BOX_CONVS: 0
24 |     NUM_CLS_CONVS: 0
25 |     NORM: "SyncBN"
26 | SOLVER:
27 |   STEPS: (300000, 340000)
28 |   MAX_ITER: 360000
29 | OUTPUT_DIR: "output/fcos/FCOS_RT_MS_DLA_34_4x_syncbn_shared_towers_bn_head"
30 | 


--------------------------------------------------------------------------------
/configs/FCOS-Detection/FCOS_RT/MS_R_50_4x_syncbn.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-FCOS.yaml"
 2 | INPUT:
 3 |   MIN_SIZE_TRAIN: (256, 288, 320, 352, 384, 416, 448, 480, 512, 544, 576, 608)
 4 |   MAX_SIZE_TRAIN: 900
 5 |   MAX_SIZE_TEST: 736
 6 |   MIN_SIZE_TEST: 512
 7 | MODEL:
 8 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 9 |   RESNETS:
10 |     DEPTH: 50
11 |     NORM: "SyncBN"
12 |   FCOS:
13 |     TOP_LEVELS: 0
14 |     SIZES_OF_INTEREST: [64, 128]
15 |     FPN_STRIDES: [8, 16, 32]
16 |     IN_FEATURES: ['p3', 'p4', 'p5']
17 | SOLVER:
18 |   STEPS: (300000, 340000)
19 |   MAX_ITER: 360000
20 | OUTPUT_DIR: "output/fcos/FCOS_RT_MS_R_50_4x_syncbn"
21 | 


--------------------------------------------------------------------------------
/configs/FCOS-Detection/FCOS_RT/MS_R_50_4x_syncbn_bn_head.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-FCOS.yaml"
 2 | INPUT:
 3 |   MIN_SIZE_TRAIN: (256, 288, 320, 352, 384, 416, 448, 480, 512, 544, 576, 608)
 4 |   MAX_SIZE_TRAIN: 900
 5 |   MAX_SIZE_TEST: 736
 6 |   MIN_SIZE_TEST: 512
 7 | MODEL:
 8 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 9 |   RESNETS:
10 |     DEPTH: 50
11 |     NORM: "SyncBN"
12 |   FCOS:
13 |     TOP_LEVELS: 0
14 |     SIZES_OF_INTEREST: [64, 128]
15 |     FPN_STRIDES: [8, 16, 32]
16 |     IN_FEATURES: ['p3', 'p4', 'p5']
17 |     NORM: "SyncBN"
18 | SOLVER:
19 |   STEPS: (300000, 340000)
20 |   MAX_ITER: 360000
21 | OUTPUT_DIR: "output/fcos/FCOS_RT_MS_R_50_4x_syncbn_bn_head"
22 | 


--------------------------------------------------------------------------------
/configs/FCOS-Detection/MS_R_101_2x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "Base-FCOS.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
 4 |   RESNETS:
 5 |     DEPTH: 101
 6 | SOLVER:
 7 |   STEPS: (120000, 160000)
 8 |   MAX_ITER: 180000
 9 | OUTPUT_DIR: "output/fcos/R_101_2x"
10 | 


--------------------------------------------------------------------------------
/configs/FCOS-Detection/MS_R_101_2x_iou.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "Base-FCOS.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
 4 |   RESNETS:
 5 |     DEPTH: 101
 6 |   FCOS:
 7 |     BOX_QUALITY: "iou"
 8 | SOLVER:
 9 |   STEPS: (120000, 160000)
10 |   MAX_ITER: 180000
11 | OUTPUT_DIR: "output/fcos/MS_R_101_2x_iou"
12 | 


--------------------------------------------------------------------------------
/configs/FCOS-Detection/MS_R_50_2x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "Base-FCOS.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   RESNETS:
 5 |     DEPTH: 50
 6 | SOLVER:
 7 |   STEPS: (120000, 160000)
 8 |   MAX_ITER: 180000
 9 | OUTPUT_DIR: "output/fcos/R_50_2x"
10 | 


--------------------------------------------------------------------------------
/configs/FCOS-Detection/MS_R_50_2x_iou.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "Base-FCOS.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   RESNETS:
 5 |     DEPTH: 50
 6 |   FCOS:
 7 |     BOX_QUALITY: "iou"
 8 | SOLVER:
 9 |   STEPS: (120000, 160000)
10 |   MAX_ITER: 180000
11 | OUTPUT_DIR: "output/fcos/MS_R_50_2x_iou"
12 | 


--------------------------------------------------------------------------------
/configs/FCOS-Detection/MS_X_101_32x8d_2x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "Base-FCOS.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/FAIR/X-101-32x8d.pkl"
 4 |   PIXEL_STD: [57.375, 57.120, 58.395]
 5 |   RESNETS:
 6 |     STRIDE_IN_1X1: False  # this is a C2 model
 7 |     NUM_GROUPS: 32
 8 |     WIDTH_PER_GROUP: 8
 9 |     DEPTH: 101
10 | SOLVER:
11 |   STEPS: (120000, 160000)
12 |   MAX_ITER: 180000
13 | OUTPUT_DIR: "output/fcos/X_101_2x"
14 | 


--------------------------------------------------------------------------------
/configs/FCOS-Detection/MS_X_101_32x8d_2x_dcnv2.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "Base-FCOS.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/FAIR/X-101-32x8d.pkl"
 4 |   PIXEL_STD: [57.375, 57.120, 58.395]
 5 |   RESNETS:
 6 |     STRIDE_IN_1X1: False  # this is a C2 model
 7 |     NUM_GROUPS: 32
 8 |     WIDTH_PER_GROUP: 8
 9 |     DEPTH: 101
10 |     DEFORM_ON_PER_STAGE: [False, False, True, True] # on Res4, Res5
11 |     DEFORM_MODULATED: True
12 |   FCOS:
13 |     USE_DEFORMABLE: True
14 | SOLVER:
15 |   STEPS: (120000, 160000)
16 |   MAX_ITER: 180000
17 | OUTPUT_DIR: "output/fcos/MS_X_101_2x_dcnv2"
18 | 


--------------------------------------------------------------------------------
/configs/FCOS-Detection/MS_X_101_32x8d_2x_dcnv2_iou.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "Base-FCOS.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/FAIR/X-101-32x8d.pkl"
 4 |   PIXEL_STD: [57.375, 57.120, 58.395]
 5 |   RESNETS:
 6 |     STRIDE_IN_1X1: False  # this is a C2 model
 7 |     NUM_GROUPS: 32
 8 |     WIDTH_PER_GROUP: 8
 9 |     DEPTH: 101
10 |     DEFORM_ON_PER_STAGE: [False, False, True, True] # on Res4, Res5
11 |     DEFORM_MODULATED: True
12 |   FCOS:
13 |     USE_DEFORMABLE: True
14 |     BOX_QUALITY: "iou"
15 | SOLVER:
16 |   STEPS: (120000, 160000)
17 |   MAX_ITER: 180000
18 | OUTPUT_DIR: "output/fcos/MS_X_101_2x_dcnv2_iou"
19 | 


--------------------------------------------------------------------------------
/configs/FCOS-Detection/MS_X_101_32x8d_2x_iou.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "Base-FCOS.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/FAIR/X-101-32x8d.pkl"
 4 |   PIXEL_STD: [57.375, 57.120, 58.395]
 5 |   RESNETS:
 6 |     STRIDE_IN_1X1: False  # this is a C2 model
 7 |     NUM_GROUPS: 32
 8 |     WIDTH_PER_GROUP: 8
 9 |     DEPTH: 101
10 |   FCOS:
11 |     BOX_QUALITY: "iou"
12 | SOLVER:
13 |   STEPS: (120000, 160000)
14 |   MAX_ITER: 180000
15 | OUTPUT_DIR: "output/fcos/X_101_2x_iou"
16 | 


--------------------------------------------------------------------------------
/configs/FCOS-Detection/MS_X_101_64x4d_2x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "Base-FCOS.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "catalog://ImageNetPretrained/FAIR/X-101-64x4d"
 4 |   PIXEL_STD: [1.0, 1.0, 1.0]
 5 |   RESNETS:
 6 |     STRIDE_IN_1X1: False  # this is a C2 model
 7 |     NUM_GROUPS: 64
 8 |     WIDTH_PER_GROUP: 4
 9 |     DEPTH: 101
10 | SOLVER:
11 |   STEPS: (120000, 160000)
12 |   MAX_ITER: 180000
13 | OUTPUT_DIR: "output/fcos/MS_X_101_64x4d_2x"
14 | 


--------------------------------------------------------------------------------
/configs/FCOS-Detection/MS_X_101_64x4d_2x_dcnv2.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "Base-FCOS.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "catalog://ImageNetPretrained/FAIR/X-101-64x4d"
 4 |   PIXEL_STD: [1.0, 1.0, 1.0]
 5 |   RESNETS:
 6 |     STRIDE_IN_1X1: False  # this is a C2 model
 7 |     NUM_GROUPS: 64
 8 |     WIDTH_PER_GROUP: 4
 9 |     DEPTH: 101
10 |     DEFORM_ON_PER_STAGE: [False, False, True, True] # on Res4, Res5
11 |     DEFORM_MODULATED: True
12 |   FCOS:
13 |     USE_DEFORMABLE: True
14 | SOLVER:
15 |   STEPS: (120000, 160000)
16 |   MAX_ITER: 180000
17 | OUTPUT_DIR: "output/fcos/MS_X_101_64x4d_2x_dcnv2"
18 | 


--------------------------------------------------------------------------------
/configs/FCOS-Detection/R_50_1x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "Base-FCOS.yaml"
2 | MODEL:
3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
4 |   RESNETS:
5 |     DEPTH: 50
6 | INPUT:
7 |   MIN_SIZE_TRAIN: (800,)
8 | OUTPUT_DIR: "output/fcos/R_50_1x"
9 | 


--------------------------------------------------------------------------------
/configs/FCOS-Detection/R_50_1x_iou.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "Base-FCOS.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   RESNETS:
 5 |     DEPTH: 50
 6 |   FCOS:
 7 |     BOX_QUALITY: "iou"
 8 | INPUT:
 9 |   MIN_SIZE_TRAIN: (800,)
10 | OUTPUT_DIR: "output/fcos/R_50_1x_iou"
11 | 


--------------------------------------------------------------------------------
/configs/FCOS-Detection/vovnet/MS_V_39_3x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-FCOS.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "https://www.dropbox.com/s/q98pypf96rhtd8y/vovnet39_ese_detectron2.pth?dl=1"
 4 |   BACKBONE:
 5 |     NAME: "build_fcos_vovnet_fpn_backbone"
 6 |     FREEZE_AT: 0
 7 |   VOVNET:
 8 |     CONV_BODY : "V-39-eSE"
 9 |     OUT_FEATURES: ["stage3", "stage4", "stage5"]
10 |   FPN:
11 |     IN_FEATURES: ["stage3", "stage4", "stage5"]
12 | SOLVER:
13 |   STEPS: (210000, 250000)
14 |   MAX_ITER: 270000
15 | OUTPUT_DIR: "output/fcos/V_39_ms_3x"
16 | 


--------------------------------------------------------------------------------
/configs/FCOS-Detection/vovnet/MS_V_57_3x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-FCOS.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "https://www.dropbox.com/s/8xl0cb3jj51f45a/vovnet57_ese_detectron2.pth?dl=1"
 4 |   BACKBONE:
 5 |     NAME: "build_fcos_vovnet_fpn_backbone"
 6 |     FREEZE_AT: 0
 7 |   VOVNET:
 8 |     CONV_BODY : "V-57-eSE"
 9 |     OUT_FEATURES: ["stage3", "stage4", "stage5"]
10 |   FPN:
11 |     IN_FEATURES: ["stage3", "stage4", "stage5"]
12 | SOLVER:
13 |   STEPS: (210000, 250000)
14 |   MAX_ITER: 270000
15 | OUTPUT_DIR: "output/fcos/V_57_ms_3x"
16 | 


--------------------------------------------------------------------------------
/configs/FCOS-Detection/vovnet/MS_V_99_3x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-FCOS.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "https://www.dropbox.com/s/1mlv31coewx8trd/vovnet99_ese_detectron2.pth?dl=1"
 4 |   BACKBONE:
 5 |     NAME: "build_fcos_vovnet_fpn_backbone"
 6 |     FREEZE_AT: 0
 7 |   VOVNET:
 8 |     CONV_BODY : "V-99-eSE"
 9 |     OUT_FEATURES: ["stage3", "stage4", "stage5"]
10 |   FPN:
11 |     IN_FEATURES: ["stage3", "stage4", "stage5"]
12 | SOLVER:
13 |   STEPS: (210000, 250000)
14 |   MAX_ITER: 270000
15 | OUTPUT_DIR: "output/fcos/V_99_ms_3x"
16 | 


--------------------------------------------------------------------------------
/configs/FCOS-Detection/vovnet/README.md:
--------------------------------------------------------------------------------
 1 | # [VoVNet-v2](https://github.com/youngwanLEE/CenterMask) backbone networks in [FCOS](https://github.com/aim-uofa/adet)
 2 | **Efficient Backbone Network for Object Detection and Segmentation**\
 3 | Youngwan Lee
 4 | 
 5 | 
 6 | [[`vovnet-detectron2`](https://github.com/youngwanLEE/vovnet-detectron2)][[`CenterMask(code)`](https://github.com/youngwanLEE/CenterMask)] [[`VoVNet-v1(arxiv)`](https://arxiv.org/abs/1904.09730)] [[`VoVNet-v2(arxiv)`](https://arxiv.org/abs/1911.06667)] [[`BibTeX`](#CitingVoVNet)]
 7 | 
 8 | 
 9 | <div align="center">
10 |   <img src="https://dl.dropbox.com/s/jgi3c5828dzcupf/osa_updated.jpg" width="700px" />
11 | </div>
12 | 
13 |   
14 | ## Comparison with Faster R-CNN and ResNet
15 | 
16 | ### Note
17 | 
18 | We measure the inference time of all models with batch size 1 on the same V100 GPU machine.
19 | 
20 | - pytorch1.3.1
21 | - CUDA 10.1
22 | - cuDNN 7.3
23 | 
24 | 
25 | |Method|Backbone|lr sched|inference time|AP|APs|APm|APl|download|
26 | |---|:--------:|:---:|:--:|--|----|----|---|--------|
27 | |Faster|R-50-FPN|3x|0.047|40.2|24.2|43.5|52.0|<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl">model</a>&nbsp;\|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/metrics.json">metrics</a>
28 | |Faster|**V2-39-FPN**|3x|0.047|42.7|27.1|45.6|54.0|<a href="https://dl.dropbox.com/s/dkto39ececze6l4/faster_V_39_eSE_ms_3x.pth">model</a>&nbsp;\|&nbsp;<a href="https://dl.dropbox.com/s/dx9qz1dn65ccrwd/faster_V_39_eSE_ms_3x_metrics.json">metrics</a>
29 | |**FCOS**|**V2-39-FPN**|3x|0.045|43.5|28.1|47.2|54.5|<a href="https://dl.dropbox.com/s/t51vrqiekid49vp/fcos_V_39_eSE_FPN_ms_3x.pth">model</a>&nbsp;\|&nbsp;<a href="https://www.dropbox.com/s/jhu301a95o7lzw1/fcos_V_39_eSE_FPN_ms_3x_metrics.json">metrics</a>
30 | ||
31 | |Faster|R-101-FPN|3x|0.063|42.0|25.2|45.6|54.6|<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x/138205316/model_final_a3ec72.pkl">model</a>&nbsp;\|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x/138205316/metrics.json">metrics</a>
32 | |Faster|**V2-57-FPN**|3x|0.054|43.3|27.5|46.7|55.3|<a href="https://dl.dropbox.com/s/c7mb1mq10eo4pzk/faster_V_57_eSE_ms_3x.pth">model</a>&nbsp;\|&nbsp;<a href="https://dl.dropbox.com/s/3tsn218zzmuhyo8/faster_V_57_eSE_metrics.json">metrics</a>
33 | |**FCOS**|**V2-57-FPN**|3x|0.051|44.4|28.8|47.2|56.3|<a href="https://dl.dropbox.com/s/c7mb1mq10eo4pzk/faster_V_57_eSE_ms_3x.pth">model</a>&nbsp;\|&nbsp;<a href="https://dl.dropbox.com/s/3tsn218zzmuhyo8/faster_V_57_eSE_metrics.json">metrics</a>
34 | ||
35 | |Faster|X-101-FPN|3x|0.120|43.0|27.2|46.1|54.9|<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x/139653917/model_final_2d9806.pkl">model</a>&nbsp;\|&nbsp;<a href="https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x/139653917/metrics.json">metrics</a>|
36 | |Faster|**V2-99-FPN**|3x|0.073|44.1|28.1|47.0|56.4|<a href="https://dl.dropbox.com/s/v64mknwzfpmfcdh/faster_V_99_eSE_ms_3x.pth">model</a>&nbsp;\|&nbsp;<a href="https://dl.dropbox.com/s/zvaz9s8gvq2mhrd/faster_V_99_eSE_ms_3x_metrics.json">metrics</a>|
37 | |**FCOS**|**V2-99-FPN**|3x|0.070|45.2|29.2|48.4|57.3|<a href="https://www.dropbox.com/s/cztd5jry52cy6vx/fcos_V_99_eSE_FPN_ms_3x.pth">model</a>&nbsp;\|&nbsp;<a href="https://www.dropbox.com/s/zdfb5zjl9lhi5p8/fcos_V_99_eSE_FPN_ms_3x_metrics.json">metrics</a>|
38 | 
39 | 
40 | 
41 | ## <a name="CitingVoVNet"></a>Citing VoVNet
42 | 
43 | If you use VoVNet, please use the following BibTeX entry.
44 | 
45 | ```BibTeX
46 | @inproceedings{lee2019energy,
47 |   title = {An Energy and GPU-Computation Efficient Backbone Network for Real-Time Object Detection},
48 |   author = {Lee, Youngwan and Hwang, Joong-won and Lee, Sangrok and Bae, Yuseok and Park, Jongyoul},
49 |   booktitle = {Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition Workshops},
50 |   year = {2019}
51 | }
52 | 
53 | @article{lee2019centermask,
54 |   title={CenterMask: Real-Time Anchor-Free Instance Segmentation},
55 |   author={Lee, Youngwan and Park, Jongyoul},
56 |   journal={arXiv preprint arXiv:1911.06667},
57 |   year={2019}
58 | }
59 | ```
60 | 


--------------------------------------------------------------------------------
/configs/FCPose/Base-FCPose.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 3 |   RESNETS:
 4 |     DEPTH: 50
 5 |   KEYPOINT_ON: True
 6 |   FCPOSE_ON: True
 7 |   META_ARCHITECTURE: "OneStageDetector"
 8 |   BACKBONE:
 9 |     NAME: "build_fcos_resnet_fpn_backbone"
10 |   RESNETS:
11 |     OUT_FEATURES: ["res3", "res4", "res5"]
12 |   FPN:
13 |     IN_FEATURES: ["res3", "res4", "res5"]
14 |   PROPOSAL_GENERATOR:
15 |     NAME: "FCPose"
16 | DATASETS:
17 |   TRAIN: ("keypoints_coco_2017_train",)
18 |   TEST: ("keypoints_coco_2017_val",)
19 | SOLVER:
20 |   IMS_PER_BATCH: 16
21 |   BASE_LR: 0.01
22 |   STEPS: (60000, 80000)
23 |   MAX_ITER: 90000
24 |   CLIP_GRADIENTS:
25 |     ENABLED: True
26 |     CLIP_VALUE: 1.0
27 | INPUT:
28 |   MIN_SIZE_TRAIN_SAMPLING: "range"
29 |   MIN_SIZE_TRAIN: [320,800]
30 |   MAX_SIZE_TRAIN: 1333
31 |   CROP:
32 |     ENABLED: True
33 |     TYPE: "relative_range"
34 |     SIZE: [0.4, 0.4]
35 | TEST:
36 |   EVAL_PERIOD: 5000
37 | 


--------------------------------------------------------------------------------
/configs/FCPose/README.md:
--------------------------------------------------------------------------------
 1 | # FCPose: Fully Convolutional Multi-Person Pose Estimation with Dynamic Instance-Aware Convolutions
 2 | 
 3 | 
 4 | 
 5 | # Installation & Quick Start
 6 | First, follow the [default instruction](../../README.md#Installation) to install the project and [datasets/README.md](https://github.com/facebookresearch/detectron2/blob/master/datasets/README.md) 
 7 | set up the datasets (e.g., MS-COCO).
 8 | 
 9 | For training on COCO, run:
10 | ```
11 | python tools/train_net.py \
12 |     --num-gpus 8 \
13 |     --config-file configs/FCPose/R_50_3X.yaml \
14 |     --dist-url tcp://127.0.0.1:$(( RANDOM % 1000 + 50000 )) \
15 |     OUTPUT_DIR training_dir/R_50_3X
16 | ```
17 | 
18 | For evaluation on COCO, run:
19 | ```
20 | python tools/train_net.py \
21 |     --num-gpus 8 \
22 |     --eval-only \
23 |     --config-file configs/FCPose/R_50_3X.yaml \
24 |     --dist-url tcp://127.0.0.1:$(( RANDOM % 1000 + 50000 )) \
25 |     OUTPUT_DIR training_dir/R_50_3X \
26 |     MODEL.WEIGHTS training_dir/R_50_3X/model_final.pth
27 | ```
28 | 
29 | 
30 | ## Models
31 | ### COCO Instance Segmentation Baselines with SOLOv2
32 | 
33 | Name | inf. time | box AP | mask AP | download
34 | --- |:---:|:---:|:---:|:---:
35 | [FCPose_R50_3x](R_50_3X.yaml) | 45ms | 57.9  | 65.2  | [model](https://huggingface.co/ZjuCv/AdelaiDet/blob/main/fcpose50.pth)
36 | [FCPose_R101_3x](R_101_3X.yaml) | 58ms | 58.7  | 67.0  | [model](https://huggingface.co/ZjuCv/AdelaiDet/blob/main/fcpose101.pth)
37 | 
38 | 
39 | *Disclaimer:*
40 | 
41 | - Inference time is measured on 8 V100 GPUs.
42 | - This is a reimplementation. Thus, the numbers are slightly different from our original paper.
43 | - This is a alpha version. We will update our implement later, including adding real-time version FCPose and fixing the issue of the loss being nan. if you found you loss being nan when training, please try again.
44 | 
45 | 
46 | # Citations
47 | Please consider citing our papers in your publications if the project helps your research. BibTeX reference is as follows.
48 | ```BibTeX
49 | @inproceedings{mao2021fcpose,
50 |   title={FCPose: Fully Convolutional Multi-Person Pose Estimation with Dynamic Instance-Aware Convolutions},
51 |   author={Mao, Weian and Tian, Zhi and Wang, Xinlong and Shen, Chunhua},
52 |   booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
53 |   pages={9034--9043},
54 |   year={2021}
55 | }
56 | ```
57 | 


--------------------------------------------------------------------------------
/configs/FCPose/R_101_3X.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "Base-FCPose.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
 4 |   RESNETS:
 5 |     DEPTH: 101
 6 |   FCOS:
 7 |     NUM_CLASSES: 1
 8 | SOLVER:
 9 |   STEPS: (180000, 240000)
10 |   MAX_ITER: 270000
11 |   # WARMUP_FACTOR: 1.0 / 3000
12 |   # WARMUP_ITERS: 3000


--------------------------------------------------------------------------------
/configs/FCPose/R_50_3X.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "Base-FCPose.yaml"
 2 | SOLVER:
 3 |   STEPS: (180000, 240000)
 4 |   MAX_ITER: 270000
 5 | MODEL:
 6 |   FCOS:
 7 |     NUM_CLASSES: 1
 8 |   # FCPOSE:
 9 |   #   LOSS_WEIGHT_DIRECTION: 0.0
10 |   #   LOSS_WEIGHT_KEYPOINT: 0.0
11 |   #   BASIS_MODULE:
12 |   #     LOSS_WEIGHT: 0.0


--------------------------------------------------------------------------------
/configs/MEInst-InstanceSegmentation/Base-MEInst.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   MASK_ON: True
 3 |   META_ARCHITECTURE: "OneStageDetector"
 4 |   BACKBONE:
 5 |     NAME: "build_fcos_resnet_fpn_backbone"
 6 |   RESNETS:
 7 |     OUT_FEATURES: ["res3", "res4", "res5"]
 8 |   FPN:
 9 |     IN_FEATURES: ["res3", "res4", "res5"]
10 |   PROPOSAL_GENERATOR:
11 |     NAME: "MEInst"
12 |   # PIXEL_MEAN: [102.9801, 115.9465, 122.7717]
13 | DATASETS:
14 |   TRAIN: ("coco_2017_train",)
15 |   TEST: ("coco_2017_val",)
16 | SOLVER:
17 |   IMS_PER_BATCH: 16
18 |   BASE_LR: 0.01  # Note that RetinaNet uses a different default learning rate
19 |   STEPS: (60000, 80000)
20 |   MAX_ITER: 90000
21 | INPUT:
22 |   MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
23 | VERSION: 2
24 | 


--------------------------------------------------------------------------------
/configs/MEInst-InstanceSegmentation/MEInst_R_50_1x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "Base-MEInst.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   RESNETS:
 5 |     DEPTH: 50
 6 |   MEInst:
 7 |     DIM_MASK: 60
 8 |     MASK_SIZE: 28
 9 |     USE_DEFORMABLE: True
10 |     LAST_DEFORMABLE: True
11 |     TYPE_DEFORMABLE: "DCNv1"
12 | INPUT:
13 |   MIN_SIZE_TRAIN: (800,)
14 | SOLVER:
15 |   WARMUP_METHOD: "constant"
16 |   WARMUP_FACTOR: 0.3333
17 |   WARMUP_ITERS: 500
18 | OUTPUT_DIR: "output/MEInst/R_50_1x"
19 | 


--------------------------------------------------------------------------------
/configs/MEInst-InstanceSegmentation/MEInst_R_50_1x_none.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "Base-MEInst.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   RESNETS:
 5 |     DEPTH: 50
 6 |   MEInst:
 7 |     DIM_MASK: 60
 8 |     MASK_SIZE: 28
 9 | INPUT:
10 |   MIN_SIZE_TRAIN: (800,)
11 | SOLVER:
12 |   WARMUP_METHOD: "constant"
13 |   WARMUP_FACTOR: 0.3333
14 |   WARMUP_ITERS: 500
15 | OUTPUT_DIR: "output/MEInst/R_50_1x_none"
16 | 


--------------------------------------------------------------------------------
/configs/MEInst-InstanceSegmentation/MEInst_R_50_3x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "Base-MEInst.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   RESNETS:
 5 |     DEPTH: 50
 6 |   MEInst:
 7 |     DIM_MASK: 60
 8 |     MASK_SIZE: 28
 9 |     USE_DEFORMABLE: True
10 |     LAST_DEFORMABLE: True
11 |     TYPE_DEFORMABLE: "DCNv1"
12 | SOLVER:
13 |   STEPS: (180000, 240000)
14 |   MAX_ITER: 270000
15 | OUTPUT_DIR: "output/MEInst/R_50_3x"
16 | 


--------------------------------------------------------------------------------
/configs/MEInst-InstanceSegmentation/MEInst_R_50_3x_512.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "Base-MEInst.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   RESNETS:
 5 |     DEPTH: 50
 6 |   MEInst:
 7 |     DIM_MASK: 60
 8 |     MASK_SIZE: 28
 9 |     USE_DEFORMABLE: True
10 |     LAST_DEFORMABLE: True
11 |     TYPE_DEFORMABLE: "DCNv1"
12 | INPUT:
13 |   MIN_SIZE_TRAIN: (384, 416, 448, 480, 512, 544)
14 |   MIN_SIZE_TEST: 512
15 | SOLVER:
16 |   STEPS: (180000, 240000)
17 |   MAX_ITER: 270000
18 | OUTPUT_DIR: "output/MEInst/R_50_3x_512"
19 | 


--------------------------------------------------------------------------------
/configs/MEInst-InstanceSegmentation/README.md:
--------------------------------------------------------------------------------
 1 | # Mask Encoding for Single Shot Instance Segmentation
 2 | 
 3 | Rufeng Zhang, Zhi Tian, Chunhua Shen, Mingyu You, Youliang Yan
 4 | 
 5 | [[`arXiv`](https://arxiv.org/abs/2003.11712)] [[`BibTeX`](#CitingMEInst)]
 6 | 
 7 | ## Models
 8 | 
 9 | ### COCO Instance Segmentation Baselines with [MEInst](https://arxiv.org/abs/2003.11712)
10 | 
11 | Name | inf. time | box AP | mask AP | download
12 | --- |:---:|:---:|:---:|:---:
13 | [MEInst_R_50_1x_none](MEInst_R_50_1x_none.yaml) | 13 FPS | 39.5 | 30.7 | [model](https://huggingface.co/ZjuCv/AdelaiDet/blob/main/MEInst_R_50_1x_none.pth)
14 | [MEInst_R_50_1x](MEInst_R_50_1x.yaml) | 12 FPS | 40.1 | 31.7 | [model](https://huggingface.co/ZjuCv/AdelaiDet/blob/main/MEInst_R_50_1x.pth)
15 | [MEInst_R_50_3x](MEInst_R_50_3x.yaml) | 12 FPS | 43.6 | 34.5 | [model](https://huggingface.co/ZjuCv/AdelaiDet/blob/main/MEInst_R_50_3x.pth)
16 | [MEInst_R_50_3x_512](MEInst_R_50_3x_512.yaml) | 19 FPS | 40.8 | 32.2 | [model](https://huggingface.co/ZjuCv/AdelaiDet/blob/main/MEInst_R_50_3x_512.pth)
17 | 
18 | *Inference time is measured on a NVIDIA 1080Ti with batch size 1.*
19 | 
20 | ## Quick Start
21 | 
22 | 1. Download the [matrix](https://huggingface.co/ZjuCv/AdelaiDet/blob/main/coco_2017_train_class_agnosticTrue_whitenTrue_sigmoidTrue_60.npz) file for mask encoding during training 
23 | 2. Symlink the matrix path to datasets/components/xxx.npz, e.g., 
24 |    `coco/components/coco_2017_train_class_agnosticTrue_whitenTrue_sigmoidTrue_60.npz` 
25 | 3. Follow [AdelaiDet](https://github.com/aim-uofa/AdelaiDet) for install, train and inference
26 | 
27 | ### Step by step for Mask Encoding (Optional)
28 | 
29 |   We recommend to directly download the [matrix](https://huggingface.co/ZjuCv/AdelaiDet/blob/main/coco_2017_train_class_agnosticTrue_whitenTrue_sigmoidTrue_60.npz) file and use it, as it can already handle most cases.
30 | And we also provide tools to generate encoding matrix yourself.
31 | 
32 | Example:
33 | 
34 | * Generate encoding matrix
35 | 
36 |   `python adet/modeling/MEInst/LME/mask_generation.py`
37 | 
38 | * Evaluate the quality of reconstruction
39 | 
40 |   `python adet/modeling/MEInst/LME/mask_evaluation.py`
41 | 
42 | ## <a name="CitingMEInst"></a>Citing MEInst
43 | 
44 | If you use MEInst, please use the following BibTeX entry.
45 | 
46 | ```BibTeX
47 | @inproceedings{zhang2020MEInst,
48 |   title     =  {Mask Encoding for Single Shot Instance Segmentation},
49 |   author    =  {Zhang, Rufeng and Tian, Zhi and Shen, Chunhua and You, Mingyu and Yan, Youliang},
50 |   booktitle =  {Proc. IEEE Conf. Computer Vision and Pattern Recognition (CVPR)},
51 |   year      =  {2020}
52 | }
53 | ```
54 | 
55 | ## License
56 | 
57 | For academic use, this project is licensed under the 2-clause BSD License - see the LICENSE file for details. For commercial use, please contact [Chunhua Shen](https://cs.adelaide.edu.au/~chhshen/).
58 | 


--------------------------------------------------------------------------------
/configs/RCNN/550_R_50_FPN_3x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "Base-RCNN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "output/mask_rcnn/550_R_50_3x/model_final.pth"
 4 |   MASK_ON: True
 5 |   RESNETS:
 6 |     DEPTH: 50
 7 | SOLVER:
 8 |   STEPS: (210000, 250000)
 9 |   MAX_ITER: 270000
10 | INPUT:
11 |   MIN_SIZE_TRAIN: (440, 462, 484, 506, 528, 550)
12 |   MAX_SIZE_TRAIN: 916
13 |   MIN_SIZE_TEST: 550
14 |   MAX_SIZE_TEST: 916
15 | OUTPUT_DIR: "output/mask_rcnn/550_R_50_3x"
16 | 


--------------------------------------------------------------------------------
/configs/RCNN/Base-RCNN.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   BACKBONE:
 4 |     NAME: "build_resnet_fpn_backbone"
 5 |   RESNETS:
 6 |     OUT_FEATURES: ["res2", "res3", "res4", "res5"]
 7 |   FPN:
 8 |     IN_FEATURES: ["res2", "res3", "res4", "res5"]
 9 |   ANCHOR_GENERATOR:
10 |     SIZES: [[32], [64], [128], [256], [512]]  # One size for each in feature map
11 |     ASPECT_RATIOS: [[0.5, 1.0, 2.0]]  # Three aspect ratios (same for all in feature maps)
12 |   RPN:
13 |     IN_FEATURES: ["p2", "p3", "p4", "p5", "p6"]
14 |     PRE_NMS_TOPK_TRAIN: 2000  # Per FPN level
15 |     PRE_NMS_TOPK_TEST: 1000  # Per FPN level
16 |     # Detectron1 uses 2000 proposals per-batch,
17 |     # (See "modeling/rpn/rpn_outputs.py" for details of this legacy issue)
18 |     # which is approximately 1000 proposals per-image since the default batch size for FPN is 2.
19 |     POST_NMS_TOPK_TRAIN: 1000
20 |     POST_NMS_TOPK_TEST: 1000
21 |   ROI_HEADS:
22 |     NAME: "StandardROIHeads"
23 |     IN_FEATURES: ["p2", "p3", "p4", "p5"]
24 |   ROI_BOX_HEAD:
25 |     NAME: "FastRCNNConvFCHead"
26 |     NUM_FC: 2
27 |     POOLER_RESOLUTION: 7
28 |   ROI_MASK_HEAD:
29 |     NAME: "MaskRCNNConvUpsampleHead"
30 |     NUM_CONV: 4
31 |     POOLER_RESOLUTION: 14
32 | DATASETS:
33 |   TRAIN: ("coco_2017_train",)
34 |   TEST: ("coco_2017_val",)
35 | SOLVER:
36 |   IMS_PER_BATCH: 16
37 |   BASE_LR: 0.02
38 |   STEPS: (60000, 80000)
39 |   MAX_ITER: 90000
40 | INPUT:
41 |   MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
42 | VERSION: 2
43 | 


--------------------------------------------------------------------------------
/configs/RCNN/LVIS/R_50_1x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "Base-LVIS.yaml"
2 | MODEL:
3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
4 |   RESNETS:
5 |     DEPTH: 50
6 | OUTPUT_DIR: "output/lvis/mask_rcnn/R_50_1x"
7 | 


--------------------------------------------------------------------------------
/configs/RCNN/R_101_3x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "Base-RCNN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
 4 |   MASK_ON: True
 5 |   RESNETS:
 6 |     DEPTH: 101
 7 | SOLVER:
 8 |   STEPS: (210000, 250000)
 9 |   MAX_ITER: 270000
10 | 


--------------------------------------------------------------------------------
/configs/SOLOv2/Base-SOLOv2.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "SOLOv2"
 3 |   MASK_ON: True
 4 |   BACKBONE:
 5 |     NAME: "build_resnet_fpn_backbone"
 6 |   RESNETS:
 7 |     OUT_FEATURES: ["res2", "res3", "res4", "res5"]
 8 |   FPN:
 9 |     IN_FEATURES: ["res2", "res3", "res4", "res5"]
10 | DATASETS:
11 |   TRAIN: ("coco_2017_train",)
12 |   TEST: ("coco_2017_val",)
13 | SOLVER:
14 |   IMS_PER_BATCH: 16
15 |   BASE_LR: 0.01
16 |   WARMUP_FACTOR: 0.01
17 |   WARMUP_ITERS: 1000
18 |   STEPS: (60000, 80000)
19 |   MAX_ITER: 90000
20 | INPUT:
21 |   MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
22 |   MASK_FORMAT: "bitmask"
23 | VERSION: 2
24 | 
25 | 
26 | 


--------------------------------------------------------------------------------
/configs/SOLOv2/R101_3x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "Base-SOLOv2.yaml"
2 | MODEL:
3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
4 |   RESNETS:
5 |     DEPTH: 101
6 | SOLVER:
7 |   STEPS: (210000, 250000)
8 |   MAX_ITER: 270000
9 | 


--------------------------------------------------------------------------------
/configs/SOLOv2/R50_3x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "Base-SOLOv2.yaml"
2 | MODEL:
3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
4 |   RESNETS:
5 |     DEPTH: 50
6 | SOLVER:
7 |   STEPS: (210000, 250000)
8 |   MAX_ITER: 270000
9 | 


--------------------------------------------------------------------------------
/configs/SOLOv2/README.md:
--------------------------------------------------------------------------------
 1 | # SOLOv2: Dynamic and Fast Instance Segmentation
 2 | 
 3 | 
 4 | > [**SOLOv2: Dynamic and Fast Instance Segmentation**](https://arxiv.org/abs/2003.10152),            
 5 | > Xinlong Wang, Rufeng Zhang, Tao Kong, Lei Li, Chunhua Shen     
 6 | > In: Proc. Advances in Neural Information Processing Systems (NeurIPS), 2020  
 7 | > *arXiv preprint ([arXiv 2003.10152](https://arxiv.org/abs/2003.10152))*  
 8 | 
 9 | 
10 | 
11 | # Installation & Quick Start
12 | First, follow the [default instruction](../../README.md#Installation) to install the project and [datasets/README.md](https://github.com/facebookresearch/detectron2/blob/master/datasets/README.md) 
13 | set up the datasets (e.g., MS-COCO).
14 | 
15 | For demo, run the following command lines:
16 | ```
17 | wget https://cloudstor.aarnet.edu.au/plus/s/chF3VKQT4RDoEqC/download -O SOLOv2_R50_3x.pth
18 | python demo/demo.py \
19 |     --config-file configs/SOLOv2/R50_3x.yaml \
20 |     --input input1.jpg input2.jpg \
21 |     --opts MODEL.WEIGHTS SOLOv2_R50_3x.pth
22 | ```
23 | 
24 | For training on COCO, run:
25 | ```
26 | OMP_NUM_THREADS=1 python tools/train_net.py \
27 |     --config-file configs/SOLOv2/R50_3x.yaml \
28 |     --num-gpus 8 \
29 |     OUTPUT_DIR training_dir/SOLOv2_R50_3x
30 | ```
31 | 
32 | For evaluation on COCO, run:
33 | ```
34 | OMP_NUM_THREADS=1 python tools/train_net.py \
35 |     --config-file configs/SOLOv2/R50_3x.yaml \
36 |     --eval-only \
37 |     --num-gpus 8 \
38 |     OUTPUT_DIR training_dir/SOLOv2_R50_3x \
39 |     MODEL.WEIGHTS training_dir/SOLOv2_R50_3x/model_final.pth
40 | ```
41 | 
42 | 
43 | ## Models
44 | ### COCO Instance Segmentation Baselines with SOLOv2
45 | 
46 | Name | inf. time | train. time | Mem | box AP | mask AP | download
47 | --- |:---:|:---:|:---:|:---:|:---:|:---:
48 | [SOLOv2_R50_3x](R50_3x.yaml) | 47ms | ~25h(36 epochs) | 3.7GB  | -  | 37.6  | [model](https://huggingface.co/ZjuCv/AdelaiDet/blob/main/SOLOv2_R50_3x.pth)
49 | [SOLOv2_R101_3x](R101_3x.yaml) | 61ms | ~30h(36 epochs) | 4.7GB | -   | 39.0  | [model](https://huggingface.co/ZjuCv/AdelaiDet/blob/main/SOLOv2_R101_3x.pth)
50 | 
51 | 
52 | *Disclaimer:*
53 | 
54 | - All models are trained with multi-scale data augmentation. 
55 | - Inference time is measured on a single V100 GPU. Training time is measured on 8 V100 GPUs.
56 | - This is a reimplementation. Thus, the numbers are slightly different from our original paper (within 0.3% in mask AP).
57 | - The implementation on mmdetection is available at [https://github.com/WXinlong/SOLO](https://github.com/WXinlong/SOLO).
58 | 
59 | 
60 | # Citations
61 | Please consider citing our papers in your publications if the project helps your research. BibTeX reference is as follows.
62 | ```BibTeX
63 | @inproceedings{wang2020solo,
64 |   title     =  {{SOLO}: Segmenting Objects by Locations},
65 |   author    =  {Wang, Xinlong and Kong, Tao and Shen, Chunhua and Jiang, Yuning and Li, Lei},
66 |   booktitle =  {Proc. Eur. Conf. Computer Vision (ECCV)},
67 |   year      =  {2020}
68 | }
69 | 
70 | ```
71 | 
72 | ```BibTeX
73 | @inproceedings{wang2020solov2,
74 |   title   =  {{SOLOv2}: Dynamic and Fast Instance Segmentation},
75 |   author  =  {Wang, Xinlong and Zhang, Rufeng and Kong, Tao and Li, Lei and Shen, Chunhua},
76 |   booktitle =  {Proc. Advances in Neural Information Processing Systems (NeurIPS)},
77 |   year    =  {2020}
78 | }
79 | ```
80 | 
81 | ```BibTeX
82 | @article{wang2021solo,
83 |   title   =  {{SOLO}: A Simple Framework for Instance Segmentation},
84 |   author  =  {Wang, Xinlong and Zhang, Rufeng and Shen, Chunhua and Kong, Tao and Li, Lei},
85 |   journal =  {IEEE T. Pattern Analysis and Machine Intelligence (TPAMI)},
86 |   year    =  {2021}
87 | }
88 | ```
89 | 


--------------------------------------------------------------------------------
/datasets/gen_coco_person.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import cv2
  3 | import os
  4 | import json
  5 | error_list = ['23382.png', '23441.png', '20714.png', '20727.png', '23300.png', '21200.png']
  6 | 
  7 | def mask2box(mask):
  8 |     index = np.argwhere(mask == 1)
  9 |     rows = index[:, 0]
 10 |     clos = index[:, 1]
 11 |     y1 = int(np.min(rows))  # y
 12 |     x1 = int(np.min(clos))  # x
 13 |     y2 = int(np.max(rows))
 14 |     x2 = int(np.max(clos))
 15 |     return (x1, y1, x2, y2)
 16 | 
 17 | def gen_coco(phase):
 18 |     result = {
 19 |         "info": {"description": "PIC2.0 dataset."},
 20 |         "categories": [
 21 |             {"supercategory": "none", "id": 1, "name": "person"}
 22 |         ]
 23 |     }
 24 |     out_json = phase +'_person.json'
 25 |     store_segmentation = True
 26 | 
 27 |     images_info = []
 28 |     labels_info = []
 29 |     img_id = 0
 30 |     files = tuple(open("pic/list5/"+phase+'_id', 'r'))
 31 |     files = (_.strip() for _ in files)
 32 | 
 33 |     for index, image_name in enumerate(files):
 34 |         image_name = image_name+".png"
 35 |         print(index, image_name)
 36 |         if image_name in error_list:
 37 |             continue
 38 |         instance = cv2.imread(os.path.join('instance', phase, image_name), flags=cv2.IMREAD_GRAYSCALE)
 39 |         semantic = cv2.imread(os.path.join('semantic', phase, image_name), flags=cv2.IMREAD_GRAYSCALE)
 40 |         # print(instance.shape, semantic.shape)
 41 |         h = instance.shape[0]
 42 |         w = instance.shape[1]
 43 |         images_info.append(
 44 |             {
 45 |                 "file_name": image_name[:-4]+'.jpg',
 46 |                 "height": h,
 47 |                 "width": w,
 48 |                 "id": index
 49 |             }
 50 |         )
 51 |         instance_max_num = instance.max()
 52 |         instance_ids = np.unique(instance)
 53 |         for instance_id in instance_ids:
 54 |             if instance_id == 0:
 55 |                 continue
 56 |             instance_part = instance == instance_id
 57 |             object_pos = instance_part.nonzero()
 58 |             # category_id_ = int(semantic[object_pos[0][0], object_pos[1][0]])
 59 |             category_id = int(np.max(semantic[object_pos[0], object_pos[1]]))
 60 |             # assert category_id_ == category_id, (category_id_, category_id)
 61 |             if category_id != 1:
 62 |                 continue
 63 |             area = int(instance_part.sum())
 64 |             x1, y1, x2, y2 = mask2box(instance_part)
 65 |             w = x2 - x1 + 1
 66 |             h = y2 - y1 + 1
 67 |             segmentation = []
 68 |             if store_segmentation:
 69 |                 contours, hierarchy = cv2.findContours((instance_part * 255).astype(np.uint8), cv2.RETR_TREE,
 70 |                                                             cv2.CHAIN_APPROX_SIMPLE)
 71 |                 for contour in contours:
 72 |                     contour = contour.flatten().tolist()
 73 |                     if len(contour) > 4:
 74 |                         segmentation.append(contour)
 75 |                 if len(segmentation) == 0:
 76 |                     print('error')
 77 |                     continue
 78 |             labels_info.append(
 79 |                 {
 80 |                     "segmentation": segmentation,  # poly
 81 |                     "area": area,  # segmentation area
 82 |                     "iscrowd": 0,
 83 |                     "image_id": index,
 84 |                     "bbox": [x1, y1, w, h],
 85 |                     "category_id": category_id,
 86 |                     "id": img_id
 87 |                 },
 88 |             )
 89 |             img_id += 1
 90 |         # break
 91 |     result["images"] = images_info
 92 |     result["annotations"] = labels_info
 93 |     with open('pic/annotations/' + out_json, 'w') as f:
 94 |         json.dump(result, f, indent=4)
 95 | 
 96 | if __name__ == "__main__":
 97 |     if not os.path.exists('pic/annotations/'):
 98 |         os.mkdirs('pic/annotations/')
 99 |     gen_coco("train")
100 |     gen_coco("val")
101 |     #gen_coco("test")
102 | 


--------------------------------------------------------------------------------
/datasets/prepare_thing_sem_from_instance.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
  3 | 
  4 | import time
  5 | import functools
  6 | import multiprocessing as mp
  7 | import numpy as np
  8 | import os
  9 | import argparse
 10 | from pycocotools.coco import COCO
 11 | from pycocotools import mask as maskUtils
 12 | 
 13 | from detectron2.data.datasets.builtin_meta import _get_coco_instances_meta
 14 | 
 15 | 
 16 | def annToRLE(ann, img_size):
 17 |     h, w = img_size
 18 |     segm = ann['segmentation']
 19 |     if type(segm) == list:
 20 |         # polygon -- a single object might consist of multiple parts
 21 |         # we merge all parts into one mask rle code
 22 |         rles = maskUtils.frPyObjects(segm, h, w)
 23 |         rle = maskUtils.merge(rles)
 24 |     elif type(segm['counts']) == list:
 25 |         # uncompressed RLE
 26 |         rle = maskUtils.frPyObjects(segm, h, w)
 27 |     else:
 28 |         # rle
 29 |         rle = ann['segmentation']
 30 |     return rle
 31 | 
 32 | 
 33 | def annToMask(ann, img_size):
 34 |     rle = annToRLE(ann, img_size)
 35 |     m = maskUtils.decode(rle)
 36 |     return m
 37 | 
 38 | 
 39 | def _process_instance_to_semantic(anns, output_semantic, img, categories):
 40 |     img_size = (img["height"], img["width"])
 41 |     output = np.zeros(img_size, dtype=np.uint8)
 42 |     for ann in anns:
 43 |         mask = annToMask(ann, img_size)
 44 |         output[mask == 1] = categories[ann["category_id"]] + 1
 45 |     # save as compressed npz
 46 |     np.savez_compressed(output_semantic, mask=output)
 47 |     # Image.fromarray(output).save(output_semantic)
 48 | 
 49 | 
 50 | def create_coco_semantic_from_instance(instance_json, sem_seg_root, categories):
 51 |     """
 52 |     Create semantic segmentation annotations from panoptic segmentation
 53 |     annotations, to be used by PanopticFPN.
 54 | 
 55 |     It maps all thing categories to contiguous ids starting from 1, and maps all unlabeled pixels to class 0
 56 | 
 57 |     Args:
 58 |         instance_json (str): path to the instance json file, in COCO's format.
 59 |         sem_seg_root (str): a directory to output semantic annotation files
 60 |         categories (dict): category metadata. Each dict needs to have:
 61 |             "id": corresponds to the "category_id" in the json annotations
 62 |             "isthing": 0 or 1
 63 |     """
 64 |     os.makedirs(sem_seg_root, exist_ok=True)
 65 | 
 66 |     coco_detection = COCO(instance_json)
 67 | 
 68 |     def iter_annotations():
 69 |         for img_id in coco_detection.getImgIds():
 70 |             anns_ids = coco_detection.getAnnIds(img_id)
 71 |             anns = coco_detection.loadAnns(anns_ids)
 72 |             img = coco_detection.loadImgs(int(img_id))[0]
 73 |             file_name = os.path.splitext(img["file_name"])[0]
 74 |             output = os.path.join(sem_seg_root, file_name + '.npz')
 75 |             yield anns, output, img
 76 | 
 77 |     # single process
 78 |     # print("Start writing to {} ...".format(sem_seg_root))
 79 |     # start = time.time()
 80 |     # for anno, oup, img in iter_annotations():
 81 |     #     _process_instance_to_semantic(
 82 |     #         anno, oup, img, categories)
 83 |     # print("Finished. time: {:.2f}s".format(time.time() - start))
 84 |     # return
 85 | 
 86 |     pool = mp.Pool(processes=max(mp.cpu_count() // 2, 4))
 87 | 
 88 |     print("Start writing to {} ...".format(sem_seg_root))
 89 |     start = time.time()
 90 |     pool.starmap(
 91 |         functools.partial(
 92 |             _process_instance_to_semantic,
 93 |             categories=categories),
 94 |         iter_annotations(),
 95 |         chunksize=100,
 96 |     )
 97 |     print("Finished. time: {:.2f}s".format(time.time() - start))
 98 | 
 99 | 
100 | def get_parser():
101 |     parser = argparse.ArgumentParser(description="Keep only model in ckpt")
102 |     parser.add_argument(
103 |         "--dataset-name",
104 |         default="coco",
105 |         help="dataset to generate",
106 |     )
107 |     return parser
108 | 
109 | 
110 | if __name__ == "__main__":
111 |     args = get_parser().parse_args()
112 |     dataset_dir = os.path.join(os.path.dirname(__file__), args.dataset_name)
113 |     if args.dataset_name == "coco":
114 |         thing_id_to_contiguous_id = _get_coco_instances_meta()["thing_dataset_id_to_contiguous_id"]
115 |         split_name = 'train2017'
116 |         annotation_name = "annotations/instances_{}.json"
117 |     else:
118 |         thing_id_to_contiguous_id = {1: 0}
119 |         split_name = 'train'
120 |         annotation_name = "annotations/{}_person.json"
121 |     for s in ["train2017"]:
122 |         create_coco_semantic_from_instance(
123 |             os.path.join(dataset_dir, "annotations/instances_{}.json".format(s)),
124 |             os.path.join(dataset_dir, "thing_{}".format(s)),
125 |             thing_id_to_contiguous_id
126 |         )
127 | 


--------------------------------------------------------------------------------
/datasets/prepare_thing_sem_from_lvis.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
  3 | 
  4 | import time
  5 | import functools
  6 | import multiprocessing as mp
  7 | import numpy as np
  8 | import os
  9 | from lvis import LVIS
 10 | from pycocotools import mask as maskUtils
 11 | 
 12 | 
 13 | def annToRLE(ann, img_size):
 14 |     h, w = img_size
 15 |     segm = ann['segmentation']
 16 |     if type(segm) == list:
 17 |         # polygon -- a single object might consist of multiple parts
 18 |         # we merge all parts into one mask rle code
 19 |         rles = maskUtils.frPyObjects(segm, h, w)
 20 |         rle = maskUtils.merge(rles)
 21 |     elif type(segm['counts']) == list:
 22 |         # uncompressed RLE
 23 |         rle = maskUtils.frPyObjects(segm, h, w)
 24 |     else:
 25 |         # rle
 26 |         rle = ann['segmentation']
 27 |     return rle
 28 | 
 29 | 
 30 | def annToMask(ann, img_size):
 31 |     rle = annToRLE(ann, img_size)
 32 |     m = maskUtils.decode(rle)
 33 |     return m
 34 | 
 35 | 
 36 | def _process_instance_to_semantic(anns, output_semantic, img):
 37 |     img_size = (img["height"], img["width"])
 38 |     output = np.zeros(img_size, dtype=np.uint8)
 39 |     for ann in anns:
 40 |         mask = annToMask(ann, img_size)
 41 |         output[mask == 1] = ann["category_id"] // 5
 42 |     # save as compressed npz
 43 |     np.savez_compressed(output_semantic, mask=output)
 44 |     # Image.fromarray(output).save(output_semantic)
 45 | 
 46 | 
 47 | def create_lvis_semantic_from_instance(instance_json, sem_seg_root):
 48 |     """
 49 |     Create semantic segmentation annotations from panoptic segmentation
 50 |     annotations, to be used by PanopticFPN.
 51 | 
 52 |     It maps all thing categories to contiguous ids starting from 1, and maps all unlabeled pixels to class 0
 53 | 
 54 |     Args:
 55 |         instance_json (str): path to the instance json file, in COCO's format.
 56 |         sem_seg_root (str): a directory to output semantic annotation files
 57 |     """
 58 |     os.makedirs(sem_seg_root, exist_ok=True)
 59 | 
 60 |     lvis_detection = LVIS(instance_json)
 61 | 
 62 |     def iter_annotations():
 63 |         for img_id in lvis_detection.get_img_ids():
 64 |             anns_ids = lvis_detection.get_ann_ids([img_id])
 65 |             anns = lvis_detection.load_anns(anns_ids)
 66 |             img = lvis_detection.load_imgs([img_id])[0]
 67 |             file_name = os.path.splitext(img["file_name"])[0]
 68 |             output = os.path.join(sem_seg_root, file_name + '.npz')
 69 |             yield anns, output, img
 70 | 
 71 |     # # single process
 72 |     # print("Start writing to {} ...".format(sem_seg_root))
 73 |     # start = time.time()
 74 |     # for anno, oup, img in iter_annotations():
 75 |     #     _process_instance_to_semantic(
 76 |     #         anno, oup, img)
 77 |     # print("Finished. time: {:.2f}s".format(time.time() - start))
 78 |     # return
 79 | 
 80 |     pool = mp.Pool(processes=max(mp.cpu_count() // 2, 4))
 81 | 
 82 |     print("Start writing to {} ...".format(sem_seg_root))
 83 |     start = time.time()
 84 |     pool.starmap(
 85 |         functools.partial(
 86 |             _process_instance_to_semantic),
 87 |         iter_annotations(),
 88 |         chunksize=100,
 89 |     )
 90 |     print("Finished. time: {:.2f}s".format(time.time() - start))
 91 | 
 92 | 
 93 | if __name__ == "__main__":
 94 |     dataset_dir = os.path.join(os.path.dirname(__file__), "lvis")
 95 |     for s in ["train"]:
 96 |         create_lvis_semantic_from_instance(
 97 |             os.path.join(dataset_dir, "lvis_v0.5_{}.json".format(s)),
 98 |             os.path.join(dataset_dir, "thing_{}".format(s)),
 99 |         )
100 | 


--------------------------------------------------------------------------------
/docker/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM nvidia/cuda:10.2-devel-ubuntu18.04
 2 | 
 3 | RUN apt-get update && apt-get install -y libglib2.0-0 && apt-get clean
 4 | 
 5 | RUN apt-get install -y wget htop byobu git gcc g++ vim libsm6 libxext6 libxrender-dev lsb-core
 6 | 
 7 | RUN cd /root && wget https://repo.anaconda.com/archive/Anaconda3-2020.07-Linux-x86_64.sh
 8 | 
 9 | RUN cd /root && bash Anaconda3-2020.07-Linux-x86_64.sh -b -p ./anaconda3
10 | 
11 | RUN bash -c "source /root/anaconda3/etc/profile.d/conda.sh && conda install -y pytorch==1.5.0 torchvision cudatoolkit=10.2 -c pytorch"
12 | 
13 | RUN bash -c "/root/anaconda3/bin/conda init bash"
14 | 
15 | WORKDIR /root
16 | RUN mkdir code
17 | WORKDIR code
18 | 
19 | RUN git clone https://github.com/facebookresearch/detectron2.git
20 | RUN bash -c "source /root/anaconda3/etc/profile.d/conda.sh && conda activate base && cd detectron2 && python setup.py build develop"
21 | 
22 | RUN git clone https://github.com/aim-uofa/AdelaiDet.git adet
23 | 
24 | WORKDIR adet
25 | RUN bash -c "source /root/anaconda3/etc/profile.d/conda.sh && conda activate base && python setup.py build develop"
26 | 
27 | RUN rm /root/Anaconda3-2020.07-Linux-x86_64.sh
28 | 


--------------------------------------------------------------------------------
/docs/.gitignore:
--------------------------------------------------------------------------------
1 | _build


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line, and also
 5 | # from the environment for the first two.
 6 | SPHINXOPTS    ?=
 7 | SPHINXBUILD   ?= sphinx-build
 8 | SOURCEDIR     = .
 9 | BUILDDIR      = _build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 | 


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
 1 | .. AdelaiDet documentation master file, created by
 2 |    sphinx-quickstart on Wed Feb 26 15:24:04 2020.
 3 |    You can adapt this file completely to your liking, but it should at least
 4 |    contain the root `toctree` directive.
 5 | 
 6 | Welcome to AdelaiDet's documentation!
 7 | =====================================
 8 | 
 9 | .. toctree::
10 |    :maxdepth: 2
11 | 
12 | 
13 |    modules/index


--------------------------------------------------------------------------------
/docs/modules/checkpoint.rst:
--------------------------------------------------------------------------------
1 | adet.checkpoint package
2 | =============================
3 | 
4 | .. automodule:: adet.checkpoint
5 |     :members:
6 |     :undoc-members:
7 |     :show-inheritance:


--------------------------------------------------------------------------------
/docs/modules/config.rst:
--------------------------------------------------------------------------------
 1 | adet.config package
 2 | =========================
 3 | 
 4 | .. automodule:: adet.config
 5 |     :members:
 6 |     :undoc-members:
 7 |     :show-inheritance:
 8 |     :inherited-members:
 9 | 
10 | 
11 | Config References
12 | -----------------
13 | 
14 | .. literalinclude:: ../../adet/config/defaults.py
15 |   :language: python
16 |   :linenos:
17 |   :lines: 4-


--------------------------------------------------------------------------------
/docs/modules/data.rst:
--------------------------------------------------------------------------------
1 | adet.data package
2 | =======================
3 | 
4 | .. automodule:: adet.data
5 |     :members:
6 |     :undoc-members:
7 |     :show-inheritance:


--------------------------------------------------------------------------------
/docs/modules/index.rst:
--------------------------------------------------------------------------------
 1 | API Documentation
 2 | ==================
 3 | 
 4 | .. toctree::
 5 | 
 6 |     checkpoint
 7 |     config
 8 |     data
 9 |     layers
10 |     modeling
11 |     utils


--------------------------------------------------------------------------------
/docs/modules/layers.rst:
--------------------------------------------------------------------------------
1 | adet.layers package
2 | =========================
3 | 
4 | .. automodule:: adet.layers
5 |     :members:
6 |     :undoc-members:
7 |     :show-inheritance:


--------------------------------------------------------------------------------
/docs/modules/modeling.rst:
--------------------------------------------------------------------------------
 1 | adet.modeling package
 2 | ===========================
 3 | 
 4 | .. automodule:: adet.modeling
 5 |     :members:
 6 |     :undoc-members:
 7 |     :show-inheritance:
 8 | 
 9 | adet.modeling.backbone module
10 | ---------------------------------------
11 | 
12 | .. automodule:: adet.modeling.backbone
13 |     :members:
14 |     :undoc-members:
15 |     :show-inheritance:
16 | 
17 | adet.modeling.poolers module
18 | ---------------------------------------
19 | 
20 | .. automodule:: adet.modeling.poolers
21 |     :members:
22 |     :undoc-members:
23 |     :show-inheritance:
24 | 
25 | 
26 | Model Registries
27 | -----------------
28 | 
29 | These are different registries provided in modeling.
30 | Each registry provide you the ability to replace it with your customized component,
31 | without having to modify detectron2's code.
32 | 
33 | Note that it is impossible to allow users to customize any line of code directly.
34 | Even just to add one line at some place,
35 | you'll likely need to find out the smallest registry which contains that line,
36 | and register your component to that registry.
37 | 
38 | 
39 | * detectron2.modeling.META_ARCH_REGISTRY
40 | * detectron2.modeling.BACKBONE_REGISTRY
41 | * detectron2.modeling.PROPOSAL_GENERATOR_REGISTRY
42 | * detectron2.modeling.RPN_HEAD_REGISTRY
43 | * detectron2.modeling.ANCHOR_GENERATOR_REGISTRY
44 | * detectron2.modeling.ROI_HEADS_REGISTRY
45 | * detectron2.modeling.ROI_BOX_HEAD_REGISTRY
46 | * detectron2.modeling.ROI_MASK_HEAD_REGISTRY
47 | * detectron2.modeling.ROI_KEYPOINT_HEAD_REGISTRY


--------------------------------------------------------------------------------
/docs/modules/utils.rst:
--------------------------------------------------------------------------------
 1 | adet.utils package
 2 | ========================
 3 | 
 4 | adet.utils.comm module
 5 | --------------------------------
 6 | 
 7 | .. automodule:: adet.utils.comm
 8 |     :members:
 9 |     :undoc-members:
10 |     :show-inheritance:
11 | 
12 | adet.utils.measures module
13 | ----------------------------
14 | 
15 | .. automodule:: adet.utils.measures
16 |     :members:
17 |     :undoc-members:
18 |     :show-inheritance:
19 | 


--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
 1 | termcolor
 2 | numpy
 3 | tqdm
 4 | docutils>=0.14
 5 | Sphinx>=1.7
 6 | recommonmark==0.4.0
 7 | sphinx_rtd_theme
 8 | mock
 9 | matplotlib
10 | termcolor
11 | yacs
12 | tabulate
13 | cloudpickle
14 | Pillow==8.1.1
15 | future
16 | requests
17 | six
18 | https://download.pytorch.org/whl/nightly/cpu/torch-1.3.0.dev20191010%2Bcpu-cp37-cp37m-linux_x86_64.whl
19 | https://download.pytorch.org/whl/nightly/cpu/torchvision-0.5.0.dev20191008%2Bcpu-cp37-cp37m-linux_x86_64.whl
20 | git+git://github.com/facebookresearch/fvcore.git
21 | https://dl.fbaipublicfiles.com/detectron2/wheels/cu101/detectron2-0.1.1%2Bcu101-cp37-cp37m-linux_x86_64.whl


--------------------------------------------------------------------------------
/onnx/.gitignore:
--------------------------------------------------------------------------------
1 | ncnn
2 | 


--------------------------------------------------------------------------------
/onnx/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | # update history
 3 | 
 4 | 2020.05.07:
 5 | 
 6 | 1. add [onnxruntime](https://github.com/microsoft/onnxruntime) verification demo 
 7 | 
 8 | 2. add RT model all-in-one script
 9 | 
10 | # Export to onnx/caffe/ncnn
11 | 
12 | Refer all-in-one script: [pytorch-onnx-caffe-ncnn.sh](pytorch-onnx-caffe-ncnn.sh) (BN instead of GN used in the FCOS head)
13 | 
14 | Refer another all-in-one script: [pytorch-onnx-caffe-ncnn-rt.sh](pytorch-onnx-caffe-ncnn-rt.sh) for the RT model alone with onnxruntime verification demo
15 | 
16 | note: to convert model to *Caffe* and *NCNN* requires BN in the FCOS head
17 | 
18 | # Normalization in the FCOS head
19 | The normalization in FOCS head is GroupNorm (GN) by default as shown in the original paper. Unlike BN, GN caculates the mean and variance of features online. Thus, it costs extra time and memory.
20 | On the other hand, as BN can be merged into the previous convolution layer,  BN introduces no computation overhead during inference. The following instruction introduces a simple method to measure the impact of GN on speed.
21 | 
22 | * prepare some images (for example 1000) in folder output/test/input/
23 | 
24 | * include time measurement code in demo/demo.py
25 | 
26 | * GN + GPU: total execution time 285.1398s, average 0.0696s per image
27 | 
28 | ```sh
29 | python demo/demo --config-file configs/FCOS-Detection/R_50_1x.yaml --input output/test/input/ --output output/test/output/  --opts MODEL.WEIGHTS weights/fcos_R_50_1x.pth
30 | ```
31 | * BN + GPU: total execution time 257.4333s, average 0.0628s per image
32 | ```sh
33 | python demo/demo.py --config-file configs/FCOS-Detection/R_50_1x.yaml --input output/test/input/ --output output/test/output/  --opts MODEL.WEIGHTS weights/fcos_R_50_1x.pth MODEL.FCOS.NORM BN
34 | ```
35 | * GN + CPU: total execution time 1125.4375s, average 1.0112s per image
36 | ```sh
37 | python demo/demo.py --config-file configs/FCOS-Detection/R_50_1x.yaml --input output/test/input/ --output output/test/output/  --opts MODEL.WEIGHTS weights/fcos_R_50_1x.pth MODEL.DEVICE cpu
38 | ```
39 | * BN + CPU: total execution time 1068.0550s, average 0.9596s per image
40 | ```sh
41 | python demo/demo.py --config-file configs/FCOS-Detection/R_50_1x.yaml --input output/test/input/ --output output/test/output/  --opts MODEL.WEIGHTS weights/fcos_R_50_1x.pth MODEL.DEVICE cpu MODEL.FCOS.NORM BN
42 | ```
43 | 
44 | Tested on 2080ti. The result shows 5~10% slower for GN compared against BN.
45 | 
46 | # Result compare between pytorch and NCNN
47 | 
48 | * pytorch version: run demo/demo.py
49 | * ncnn version: refer to https://github.com/blueardour/ncnn/blob/master/examples/fcos.cpp
50 | 
51 | Example: take coco/test2017/000000144041.jpg as the test image
52 | 
53 | ```
54 | #> cd AdelaiDet
55 | 
56 | #> mkdir -p output/test/
57 | 
58 | #> cp $COCO/test2017/000000144041.jpg output/test/input.jpg
59 | 
60 | #> python demo/demo.py --config-file configs/FCOS-Detection/R_50_1x.yaml --input output/test/input.jpg --output output/test/output.jpg --opts MODEL.WEIGHTS /data/pretrained/pytorch/fcos/FCOS_R_50_1x_bn_head.pth MODEL.FCOS.NORM "BN" MODEL.DEVICE cpu
61 | 
62 | 
63 | #> cd $NCNN_ROOT  # (build the project ahead)
64 | 
65 | #> cd build-host-gcc-linux/examples
66 | 
67 | #> ln -s /data/pretrained/ncnn/fcos/FCOS_R_50_1x_bn_head-update-opt.bin net.bin  # (refer pytorch-onnx-caffe-ncnn.sh to generate the file)
68 | 
69 | #> ln -s /data/pretrained/ncnn/fcos/FCOS_R_50_1x_bn_head-update-opt.param net.param  (refer pytorch-onnx-caffe-ncnn.sh to generate the file)
70 | 
71 | #> ./fcos /workspace/git/uofa-AdelaiDet/output/test/input.jpg net.param net.bin 800 1088
72 | ```
73 | 
74 | 


--------------------------------------------------------------------------------
/onnx/pytorch-onnx-caffe-ncnn-rt.sh:
--------------------------------------------------------------------------------
 1 | 
 2 | if [ "$1" == "update" ];
 3 | then
 4 |   update='update'
 5 | else
 6 |   update=
 7 | fi
 8 | 
 9 | if [ "$1" == "test" ];
10 | then
11 |   onnx_rt='test'
12 | else
13 |   onnx_rt=
14 | fi
15 | 
16 | caffe_repo=/data/pretrained/caffe/fcos/
17 | onnx_repo=/data/pretrained/onnx/fcos/
18 | pytorch_repo=/data/pretrained/pytorch/fcos/
19 | ncnn_repo=/data/pretrained/ncnn/fcos/
20 | 
21 | config=configs/FCOS-Detection/FCOS_RT/MS_R_50_4x_syncbn_bn_head.yaml
22 | case=FCOS_RT_MS_R_50_4x_syncbn_bn_head
23 | 
24 | if [ ! -e $onnx_repo/$case.onnx ] || [ "$update" != "" ];
25 | then
26 |   cd /workspace/git/uofa-AdelaiDet/ # folder of project https://github.com/aim-uofa/AdelaiDet
27 |   pwd
28 |   python -V # ensure python3.x
29 |   python onnx/export_model_to_onnx.py \
30 |     --config-file $config \
31 |     --output $onnx_repo/$case.onnx \
32 |     --width 768 --height 640 \
33 |     --opts MODEL.WEIGHTS $pytorch_repo/$case.pth MODEL.FCOS.NORM "BN" MODEL.DEVICE cpu
34 |   if [ $? -ne 0 ]; then exit; fi
35 | fi
36 | 
37 | if [ ! -e $onnx_repo/$case-update.onnx ] || [ "$update" != "" ];
38 | then
39 |   # advise version 1.3.0
40 |   cd /workspace/git/onnx-simplifier  # folder of project: https://github.com/daquexian/onnx-simplifier
41 |   pwd
42 |   python -V # ensure python3.x
43 |   python -m onnxsim $onnx_repo/$case.onnx $onnx_repo/$case-update.onnx
44 |   if [ $? -ne 0 ]; then exit; fi
45 | fi
46 | 
47 | # optional
48 | if [ ! -e $caffe_repo/$case-update.caffemodel ];
49 | then
50 |   # switch to python2 and ensure caffe (with the upsample patch) ready
51 |   # refer: https://github.com/blueardour/caffe.git  for patched version
52 |   cd /workspace/git/onnx2caffe  # folder of project: https://github.com/MTlab/onnx2caffe
53 |   pwd
54 |   python -V
55 |   python convertCaffe.py $onnx_repo/$case-update.onnx $caffe_repo/$case-update.prototxt $caffe_repo/$case-update.caffemodel
56 |   if [ $? -ne 0 ]; then exit; fi
57 | fi
58 | 
59 | # ncnn
60 | if [ ! -e $ncnn_repo/$case-update-opt.bin ] || [ "$update" != "" ]
61 | then
62 |   cd /workspace/git/ncnn # folder of project: https://github.com/Tencent/ncnn
63 |   pwd
64 |   mkdir -p $ncnn_repo
65 |   ./build-host-gcc-linux/tools/onnx/onnx2ncnn $onnx_repo/$case-update.onnx $ncnn_repo/$case-update.param $ncnn_repo/$case-update.bin
66 |   if [ $? -eq 0 ]; then
67 |     echo "Optimizing"
68 |     ./build-host-gcc-linux/tools/ncnnoptimize $ncnn_repo/$case-update.param $ncnn_repo/$case-update.bin \
69 |       $ncnn_repo/$case-update-opt.param $ncnn_repo/$case-update-opt.bin \
70 |       0 #data 640 512 3
71 |   else
72 |     echo "Convert failed"
73 |   fi
74 | fi
75 | 
76 | if [ "$onnx_rt" == "test" ];
77 | then
78 |   cd /workspace/git/uofa-AdelaiDet/ # folder of project https://github.com/aim-uofa/AdelaiDet
79 |   pwd
80 |   python -V # ensure python3.x
81 |   python onnx/test_onnxruntime.py \
82 |     --config-file $config \
83 |     --output $onnx_repo/$case-update.onnx \
84 |     --width 768 --height 640 \
85 |     --opts MODEL.WEIGHTS $pytorch_repo/$case.pth MODEL.FCOS.NORM "BN" MODEL.DEVICE cpu
86 |   if [ $? -ne 0 ]; then exit; fi
87 | fi
88 | 
89 | 
90 | 


--------------------------------------------------------------------------------
/onnx/pytorch-onnx-caffe-ncnn.sh:
--------------------------------------------------------------------------------
 1 | 
 2 | update=$1 # force update
 3 | caffe_repo=/data/pretrained/caffe/fcos/
 4 | onnx_repo=/data/pretrained/onnx/fcos/
 5 | pytorch_repo=/data/pretrained/pytorch/fcos/
 6 | ncnn_repo=/data/pretrained/ncnn/fcos/
 7 | case=FCOS_R_50_1x_bn_head
 8 | 
 9 | mkdir -p $caffe_repo $onnx_repo $pytorch_repo $ncnn_repo
10 | 
11 | if [ ! -e $onnx_repo/$case.onnx ] || [ "$update" != "" ];
12 | then
13 |   cd /workspace/git/uofa-AdelaiDet/ # folder of project https://github.com/aim-uofa/AdelaiDet
14 |   pwd
15 |   python -V # ensure python3.x
16 |   python onnx/export_model_to_onnx.py \
17 |     --config-file configs/FCOS-Detection/R_50_1x.yaml \
18 |     --output $onnx_repo/$case.onnx \
19 |     --opts MODEL.WEIGHTS $pytorch_repo/$case.pth MODEL.FCOS.NORM "BN" MODEL.DEVICE cpu
20 | fi
21 | 
22 | if [ ! -e $onnx_repo/$case-update.onnx ] || [ "$update" != "" ];
23 | then
24 |   # advise version 1.3.0
25 |   cd /workspace/git/onnx-simplifier  # folder of project: https://github.com/daquexian/onnx-simplifier
26 |   pwd
27 |   python -V # ensure python3.x
28 |   python -m onnxsim $onnx_repo/$case.onnx $onnx_repo/$case-update.onnx
29 | fi
30 | 
31 | # optional
32 | if [ ! -e $caffe_repo/$case-update.caffemodel ];
33 | then
34 |   # switch to python2 and ensure caffe (with the upsample patch) ready
35 |   # refer: https://github.com/blueardour/caffe.git  for patched version
36 |   cd /workspace/git/onnx2caffe  # folder of project: https://github.com/MTlab/onnx2caffe
37 |   pwd
38 |   python -V
39 |   python convertCaffe.py $onnx_repo/$case-update.onnx $caffe_repo/$case-update.prototxt $caffe_repo/$case-update.caffemodel
40 | fi
41 | 
42 | # ncnn
43 | if [ ! -e $ncnn_repo/$case-opt.bin ] || [ "$update" != "" ]
44 | then
45 |   cd /workspace/git/ncnn # folder of project: https://github.com/Tencent/ncnn
46 |   pwd
47 |   mkdir -p $ncnn_repo
48 |   ./build-host-gcc-linux/tools/onnx/onnx2ncnn $onnx_repo/$case-update.onnx $ncnn_repo/$case-update.param $ncnn_repo/$case-update.bin
49 |   if [ $? -eq 0 ]; then
50 |     echo "Optimizing"
51 |     ./build-host-gcc-linux/tools/ncnnoptimize $ncnn_repo/$case-update.param $ncnn_repo/$case-update.bin \
52 |       $ncnn_repo/$case-update-opt.param $ncnn_repo/$case-update-opt.bin \
53 |       0
54 |   else
55 |     echo "Convert failed"
56 |   fi
57 | fi
58 | 
59 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 3 | 
 4 | import glob
 5 | import os
 6 | from setuptools import find_packages, setup
 7 | import torch
 8 | from torch.utils.cpp_extension import CUDA_HOME, CppExtension, CUDAExtension
 9 | 
10 | torch_ver = [int(x) for x in torch.__version__.split(".")[:2]]
11 | assert torch_ver >= [1, 3], "Requires PyTorch >= 1.3"
12 | 
13 | 
14 | def get_extensions():
15 |     this_dir = os.path.dirname(os.path.abspath(__file__))
16 |     extensions_dir = os.path.join(this_dir, "adet", "layers", "csrc")
17 | 
18 |     main_source = os.path.join(extensions_dir, "vision.cpp")
19 |     sources = glob.glob(os.path.join(extensions_dir, "**", "*.cpp"))
20 |     source_cuda = glob.glob(os.path.join(extensions_dir, "**", "*.cu")) + glob.glob(
21 |         os.path.join(extensions_dir, "*.cu")
22 |     )
23 | 
24 |     sources = [main_source] + sources
25 | 
26 |     extension = CppExtension
27 | 
28 |     extra_compile_args = {"cxx": []}
29 |     define_macros = []
30 | 
31 |     if (torch.cuda.is_available() and CUDA_HOME is not None) or os.getenv("FORCE_CUDA", "0") == "1":
32 |         extension = CUDAExtension
33 |         sources += source_cuda
34 |         define_macros += [("WITH_CUDA", None)]
35 |         extra_compile_args["nvcc"] = [
36 |             "-DCUDA_HAS_FP16=1",
37 |             "-D__CUDA_NO_HALF_OPERATORS__",
38 |             "-D__CUDA_NO_HALF_CONVERSIONS__",
39 |             "-D__CUDA_NO_HALF2_OPERATORS__",
40 |         ]
41 | 
42 |         if torch_ver < [1, 7]:
43 |             # supported by https://github.com/pytorch/pytorch/pull/43931
44 |             CC = os.environ.get("CC", None)
45 |             if CC is not None:
46 |                 extra_compile_args["nvcc"].append("-ccbin={}".format(CC))
47 | 
48 |     sources = [os.path.join(extensions_dir, s) for s in sources]
49 | 
50 |     include_dirs = [extensions_dir]
51 | 
52 |     ext_modules = [
53 |         extension(
54 |             "adet._C",
55 |             sources,
56 |             include_dirs=include_dirs,
57 |             define_macros=define_macros,
58 |             extra_compile_args=extra_compile_args,
59 |         )
60 |     ]
61 | 
62 |     return ext_modules
63 | 
64 | 
65 | setup(
66 |     name="AdelaiDet",
67 |     version="0.2.0",
68 |     author="Adelaide Intelligent Machines",
69 |     url="https://github.com/stanstarks/AdelaiDet",
70 |     description="AdelaiDet is AIM's research "
71 |     "platform for instance-level detection tasks based on Detectron2.",
72 |     packages=find_packages(exclude=("configs", "tests")),
73 |     python_requires=">=3.6",
74 |     install_requires=[
75 |         "termcolor>=1.1",
76 |         "Pillow>=6.0",
77 |         "yacs>=0.1.6",
78 |         "tabulate",
79 |         "cloudpickle",
80 |         "matplotlib",
81 |         "tqdm>4.29.0",
82 |         "tensorboard",
83 |         "rapidfuzz",
84 |         "Polygon3",
85 |         "shapely",
86 |         "scikit-image",
87 |         "editdistance"
88 |     ],
89 |     extras_require={"all": ["psutil"]},
90 |     ext_modules=get_extensions(),
91 |     cmdclass={"build_ext": torch.utils.cpp_extension.BuildExtension},
92 | )
93 | 


--------------------------------------------------------------------------------
/tools/convert_fcos_weight.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | from collections import OrderedDict
 3 | 
 4 | import torch
 5 | 
 6 | 
 7 | def get_parser():
 8 |     parser = argparse.ArgumentParser(description="FCOS Detectron2 Converter")
 9 |     parser.add_argument(
10 |         "--model",
11 |         default="weights/fcos_R_50_1x_official.pth",
12 |         metavar="FILE",
13 |         help="path to model weights",
14 |     )
15 |     parser.add_argument(
16 |         "--output",
17 |         default="weights/fcos_R_50_1x_converted.pth",
18 |         metavar="FILE",
19 |         help="path to model weights",
20 |     )
21 |     return parser
22 | 
23 | 
24 | def rename_resnet_param_names(ckpt_state_dict):
25 |     converted_state_dict = OrderedDict()
26 |     for key in ckpt_state_dict.keys():
27 |         value = ckpt_state_dict[key]
28 | 
29 |         key = key.replace("module.", "")
30 |         key = key.replace("body", "bottom_up")
31 | 
32 |         # adding a . ahead to avoid renaming the fpn modules
33 |         # this can happen after fpn renaming
34 |         key = key.replace(".layer1", ".res2")
35 |         key = key.replace(".layer2", ".res3")
36 |         key = key.replace(".layer3", ".res4")
37 |         key = key.replace(".layer4", ".res5")
38 |         key = key.replace("downsample.0", "shortcut")
39 |         key = key.replace("downsample.1", "shortcut.norm")
40 |         key = key.replace("bn1", "conv1.norm")
41 |         key = key.replace("bn2", "conv2.norm")
42 |         key = key.replace("bn3", "conv3.norm")
43 |         key = key.replace("fpn_inner2", "fpn_lateral3")
44 |         key = key.replace("fpn_inner3", "fpn_lateral4")
45 |         key = key.replace("fpn_inner4", "fpn_lateral5")
46 |         key = key.replace("fpn_layer2", "fpn_output3")
47 |         key = key.replace("fpn_layer3", "fpn_output4")
48 |         key = key.replace("fpn_layer4", "fpn_output5")
49 |         key = key.replace("top_blocks", "top_block")
50 |         key = key.replace("fpn.", "")
51 |         key = key.replace("rpn", "proposal_generator")
52 |         key = key.replace("head", "fcos_head")
53 | 
54 |         converted_state_dict[key] = value
55 |     return converted_state_dict
56 | 
57 | 
58 | if __name__ == "__main__":
59 |     args = get_parser().parse_args()
60 |     ckpt = torch.load(args.model)
61 |     model = rename_resnet_param_names(ckpt["model"])
62 |     torch.save(model, args.output)
63 | 


--------------------------------------------------------------------------------
/tools/remove_optim_from_ckpt.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | 
 3 | import torch
 4 | 
 5 | 
 6 | def get_parser():
 7 |     parser = argparse.ArgumentParser(description="Keep only model in ckpt")
 8 |     parser.add_argument(
 9 |         "--path",
10 |         default="output/person/blendmask/R_50_1x/",
11 |         help="path to model weights",
12 |     )
13 |     parser.add_argument(
14 |         "--name",
15 |         default="R_50_1x.pth",
16 |         help="name of output file",
17 |     )
18 |     return parser
19 | 
20 | 
21 | if __name__ == "__main__":
22 |     args = get_parser().parse_args()
23 |     ckpt = torch.load(args.path + 'model_final.pth')
24 |     model = ckpt["model"]
25 |     torch.save(model, args.path + args.name)
26 | 


--------------------------------------------------------------------------------
/tools/rename_blendmask.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | from collections import OrderedDict
 3 | 
 4 | import torch
 5 | 
 6 | 
 7 | def get_parser():
 8 |     parser = argparse.ArgumentParser(description="FCOS Detectron2 Converter")
 9 |     parser.add_argument(
10 |         "--model",
11 |         default="weights/blendmask/person/R_50_1x.pth",
12 |         metavar="FILE",
13 |         help="path to model weights",
14 |     )
15 |     parser.add_argument(
16 |         "--output",
17 |         default="weights/blendmask/person/R_50_1x.pth",
18 |         metavar="FILE",
19 |         help="path to model weights",
20 |     )
21 |     return parser
22 | 
23 | 
24 | def rename_resnet_param_names(ckpt_state_dict):
25 |     converted_state_dict = OrderedDict()
26 |     for key in ckpt_state_dict.keys():
27 |         value = ckpt_state_dict[key]
28 |         key = key.replace("centerness", "ctrness")
29 | 
30 |         converted_state_dict[key] = value
31 |     return converted_state_dict
32 | 
33 | 
34 | if __name__ == "__main__":
35 |     args = get_parser().parse_args()
36 |     ckpt = torch.load(args.model)
37 |     if "model" in ckpt:
38 |         model = rename_resnet_param_names(ckpt["model"])
39 |     else:
40 |         model = rename_resnet_param_names(ckpt)
41 |     torch.save(model, args.output)
42 | 


--------------------------------------------------------------------------------
/tools/visualize_data.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
  3 | import argparse
  4 | import numpy as np
  5 | import os
  6 | from itertools import chain
  7 | import cv2
  8 | import tqdm
  9 | from PIL import Image
 10 | 
 11 | from detectron2.data import DatasetCatalog, MetadataCatalog, build_detection_train_loader
 12 | from detectron2.data import detection_utils as utils
 13 | from detectron2.data.build import filter_images_with_few_keypoints
 14 | from detectron2.utils.logger import setup_logger
 15 | from detectron2.utils.visualizer import Visualizer
 16 | 
 17 | from adet.config import get_cfg
 18 | from adet.data.dataset_mapper import DatasetMapperWithBasis
 19 | 
 20 | 
 21 | def setup(args):
 22 |     cfg = get_cfg()
 23 |     if args.config_file:
 24 |         cfg.merge_from_file(args.config_file)
 25 |     cfg.merge_from_list(args.opts)
 26 |     cfg.freeze()
 27 |     return cfg
 28 | 
 29 | 
 30 | def parse_args(in_args=None):
 31 |     parser = argparse.ArgumentParser(description="Visualize ground-truth data")
 32 |     parser.add_argument(
 33 |         "--source",
 34 |         choices=["annotation", "dataloader"],
 35 |         required=True,
 36 |         help="visualize the annotations or the data loader (with pre-processing)",
 37 |     )
 38 |     parser.add_argument("--config-file", metavar="FILE", help="path to config file")
 39 |     parser.add_argument("--output-dir", default="./", help="path to output directory")
 40 |     parser.add_argument("--show", action="store_true", help="show output in a window")
 41 |     parser.add_argument(
 42 |         "--opts",
 43 |         help="Modify config options using the command-line",
 44 |         default=[],
 45 |         nargs=argparse.REMAINDER,
 46 |     )
 47 |     return parser.parse_args(in_args)
 48 | 
 49 | 
 50 | if __name__ == "__main__":
 51 |     args = parse_args()
 52 |     logger = setup_logger()
 53 |     logger.info("Arguments: " + str(args))
 54 |     cfg = setup(args)
 55 | 
 56 |     dirname = args.output_dir
 57 |     os.makedirs(dirname, exist_ok=True)
 58 |     metadata = MetadataCatalog.get(cfg.DATASETS.TRAIN[0])
 59 | 
 60 |     def output(vis, fname):
 61 |         if args.show:
 62 |             print(fname)
 63 |             cv2.imshow("window", vis.get_image()[:, :, ::-1])
 64 |             cv2.waitKey()
 65 |         else:
 66 |             filepath = os.path.join(dirname, fname)
 67 |             print("Saving to {} ...".format(filepath))
 68 |             vis.save(filepath)
 69 | 
 70 |     scale = 2.0 if args.show else 1.0
 71 |     if args.source == "dataloader":
 72 |         mapper = DatasetMapperWithBasis(cfg, True)
 73 |         train_data_loader = build_detection_train_loader(cfg, mapper)
 74 |         for batch in train_data_loader:
 75 |             for per_image in batch:
 76 |                 # Pytorch tensor is in (C, H, W) format
 77 |                 img = per_image["image"].permute(1, 2, 0)
 78 |                 if cfg.INPUT.FORMAT == "BGR":
 79 |                     img = img[:, :, [2, 1, 0]]
 80 |                 else:
 81 |                     img = np.asarray(Image.fromarray(img, mode=cfg.INPUT.FORMAT).convert("RGB"))
 82 | 
 83 |                 visualizer = Visualizer(img, metadata=metadata, scale=scale)
 84 |                 target_fields = per_image["instances"].get_fields()
 85 |                 labels = [metadata.thing_classes[i] for i in target_fields["gt_classes"]]
 86 |                 vis = visualizer.overlay_instances(
 87 |                     labels=labels,
 88 |                     boxes=target_fields.get("gt_boxes", None),
 89 |                     masks=target_fields.get("gt_masks", None),
 90 |                     keypoints=target_fields.get("gt_keypoints", None),
 91 |                 )
 92 |                 output(vis, str(per_image["image_id"]) + ".jpg")
 93 |     else:
 94 |         dicts = list(chain.from_iterable([DatasetCatalog.get(k) for k in cfg.DATASETS.TRAIN]))
 95 |         if cfg.MODEL.KEYPOINT_ON:
 96 |             dicts = filter_images_with_few_keypoints(dicts, 1)
 97 |         for dic in tqdm.tqdm(dicts):
 98 |             img = utils.read_image(dic["file_name"], "RGB")
 99 |             visualizer = Visualizer(img, metadata=metadata, scale=scale)
100 |             vis = visualizer.draw_dataset_dict(dic)
101 |             output(vis, os.path.basename(dic["file_name"]))


--------------------------------------------------------------------------------