├── .gitignore ├── LICENSE ├── README.md ├── cvpods ├── __init__.py ├── checkpoint │ ├── __init__.py │ ├── catalog.py │ ├── checkpoint.py │ ├── detection_checkpoint.py │ └── model_loading.py ├── configs │ ├── base_classification_config.py │ ├── base_config.py │ ├── base_detection_config.py │ ├── blendmask_config.py │ ├── centernet2_config.py │ ├── centernet_config.py │ ├── centernet_det2_config.py │ ├── conditionInst_config.py │ ├── config_helper.py │ ├── deeplab_config.py │ ├── dynamic_routing_config.py │ ├── efficientdet_config.py │ ├── fcos_config.py │ ├── fcos_sepc_config.py │ ├── keypoint_config.py │ ├── ovr_cnn_config.py │ ├── panoptic_deeplab.py │ ├── panoptic_fcn.py │ ├── panoptic_fpn.py │ ├── pointrend_config.py │ ├── rcnn_config.py │ ├── rcnn_fpn_config.py │ ├── retinanet_config.py │ ├── retinanet_sepc_config.py │ ├── sipmask_config.py │ ├── solo_config.py │ ├── solov2_config.py │ ├── sparse_rcnn_config.py │ ├── ssd_config.py │ └── yolo_config.py ├── data │ ├── __init__.py │ ├── base_dataset.py │ ├── build.py │ ├── datasets │ │ ├── __init__.py │ │ ├── builtin_meta.py │ │ ├── citypersons.py │ │ ├── cityscapes.py │ │ ├── coco.py │ │ ├── coco_captions.py │ │ ├── crowdhuman.py │ │ ├── ext │ │ │ ├── __init__.py │ │ │ ├── mask.py │ │ │ └── ytvos.py │ │ ├── imagenet.py │ │ ├── lvis.py │ │ ├── ovis.py │ │ ├── paths_route.py │ │ ├── target_generator.py │ │ ├── voc.py │ │ ├── widerface.py │ │ └── youtubevis.py │ ├── detection_utils.py │ ├── registry.py │ ├── samplers │ │ ├── __init__.py │ │ ├── distributed_sampler.py │ │ ├── grouped_batch_sampler.py │ │ └── sampler.py │ ├── transforms │ │ ├── __init__.py │ │ ├── transform.py │ │ ├── transform_gen.py │ │ └── transform_util.py │ └── wrapped_dataset.py ├── engine │ ├── __init__.py │ ├── hooks.py │ ├── launch.py │ ├── predictor.py │ ├── setup.py │ └── trainer.py ├── evaluation │ ├── __init__.py │ ├── citypersons_evaluation.py │ ├── cityscapes_evaluation.py │ ├── classification_evaluation.py │ ├── coco_evaluation.py │ ├── coco_evaluation_hack.py │ ├── crowdhuman_evaluation.py │ ├── crowdhumantools.py │ ├── eval_MR_multisetup.py │ ├── evaluator.py │ ├── evaluator_hack.py │ ├── lvis_evaluation.py │ ├── panoptic_evaluation.py │ ├── pascal_voc_evaluation.py │ ├── rotated_coco_evaluation.py │ ├── sem_seg_evaluation.py │ ├── testing.py │ ├── widerface_evaluation.py │ ├── widerfacetools.py │ └── youtubevis_evaluation.py ├── export │ ├── README.md │ ├── __init__.py │ ├── api.py │ ├── c10.py │ ├── caffe2_export.py │ ├── caffe2_inference.py │ ├── caffe2_modeling.py │ ├── patcher.py │ └── shared.py ├── layers │ ├── __init__.py │ ├── activation_funcs.py │ ├── aspp.py │ ├── batch_norm.py │ ├── blocks.py │ ├── border_align.py │ ├── box_ops.py │ ├── cond_conv.py │ ├── conv_with_kaiming_uniform.py │ ├── crop_split.py │ ├── crop_split_gt.py │ ├── csrc │ │ ├── README.md │ │ ├── ROIAlign │ │ │ ├── ROIAlign.h │ │ │ ├── ROIAlign_cpu.cpp │ │ │ └── ROIAlign_cuda.cu │ │ ├── ROIAlignRotated │ │ │ ├── ROIAlignRotated.h │ │ │ ├── ROIAlignRotated_cpu.cpp │ │ │ └── ROIAlignRotated_cuda.cu │ │ ├── SwapAlign2Nat │ │ │ ├── SwapAlign2Nat.h │ │ │ └── SwapAlign2Nat_cuda.cu │ │ ├── border_align │ │ │ ├── border_align.h │ │ │ └── border_align_kernel.cu │ │ ├── box_iou_rotated │ │ │ ├── box_iou_rotated.h │ │ │ ├── box_iou_rotated_cpu.cpp │ │ │ ├── box_iou_rotated_cuda.cu │ │ │ └── box_iou_rotated_utils.h │ │ ├── correlation │ │ │ ├── correlation.h │ │ │ └── correlation_kernel.cu │ │ ├── crop_split │ │ │ ├── crop_split.h │ │ │ └── crop_split_kernel.cu │ │ ├── crop_split_gt │ │ │ ├── crop_split_gt.h │ │ │ └── crop_split_gt_kernel.cu │ │ ├── cuda_version.cu │ │ ├── deformable │ │ │ ├── deform_conv.h │ │ │ ├── deform_conv_cuda.cu │ │ │ └── deform_conv_cuda_kernel.cu │ │ ├── deformable_attn │ │ │ ├── ms_deform_attn.h │ │ │ ├── ms_deform_attn_cuda.cu │ │ │ └── ms_deform_im2col_cuda.cuh │ │ ├── masked_conv2d │ │ │ ├── masked_conv2d.h │ │ │ └── masked_conv2d_cuda.cu │ │ ├── ml_nms │ │ │ ├── ml_nms.cu │ │ │ └── ml_nms.h │ │ ├── nms_rotated │ │ │ ├── nms_rotated.h │ │ │ ├── nms_rotated_cpu.cpp │ │ │ └── nms_rotated_cuda.cu │ │ ├── sigmoid_focal_loss │ │ │ ├── SigmoidFocalLoss.h │ │ │ └── SigmoidFocalLoss_cuda.cu │ │ └── vision.cpp │ ├── deform_conv.py │ ├── deform_conv_with_off.py │ ├── deform_unfold_module.py │ ├── deformable_pytorch.py │ ├── dynamic_conv.py │ ├── dynamic_conv_with_condition_dcn_atten.py │ ├── dynamic_conv_with_dcn.py │ ├── dynamic_weights.py │ ├── feature_align.py │ ├── fix_conv.py │ ├── mask_ops.py │ ├── masked_conv.py │ ├── misc.py │ ├── ms_deform_attn.py │ ├── naive_group_norm.py │ ├── nms.py │ ├── panopitc_deeplab.py │ ├── point_transformer.py │ ├── position_encoding.py │ ├── roi_align.py │ ├── roi_align_rotated.py │ ├── rotated_boxes.py │ ├── saconv.py │ ├── shape_spec.py │ ├── swap_align2nat.py │ ├── swtichable_conv.py │ └── wrappers.py ├── modeling │ ├── __init__.py │ ├── anchor_generator.py │ ├── backbone │ │ ├── __init__.py │ │ ├── backbone.py │ │ ├── bifpn.py │ │ ├── bifpn_fcos.py │ │ ├── centernet2_bifpn.py │ │ ├── darknet.py │ │ ├── deformable_transformer.py │ │ ├── dla.py │ │ ├── dlafpn.py │ │ ├── dynamic_arch │ │ │ ├── __init__.py │ │ │ ├── cal_op_flops.py │ │ │ ├── dynamic_backbone.py │ │ │ ├── dynamic_cell.py │ │ │ └── op_with_flops.py │ │ ├── efficientnet.py │ │ ├── fpn.py │ │ ├── res2net.py │ │ ├── resnet.py │ │ ├── sepc.py │ │ ├── sf_fpn.py │ │ ├── splat.py │ │ ├── swin.py │ │ ├── transformer.py │ │ └── vgg.py │ ├── basenet │ │ ├── __init__.py │ │ └── basenet.py │ ├── box_regression.py │ ├── losses │ │ ├── __init__.py │ │ ├── circle_loss.py │ │ ├── dice_loss.py │ │ ├── flow_loss.py │ │ ├── focal_loss.py │ │ ├── iou_loss.py │ │ ├── reg_l1_loss.py │ │ ├── sigmoid_focal_loss.py │ │ ├── smooth_l1_loss.py │ │ └── sparse_rcnn_loss.py │ ├── matcher.py │ ├── meta_arch │ │ ├── __init__.py │ │ ├── borderdet.py │ │ ├── boundary_mask_rcnn │ │ │ ├── __init__.py │ │ │ ├── boundary_mask_rcnn.py │ │ │ └── boundary_track_mask_rcnn.py │ │ ├── centernet.py │ │ ├── conditionalInst │ │ │ ├── __init__.py │ │ │ ├── conditionalInst.py │ │ │ ├── conditionalInst_boundary.py │ │ │ ├── conditionalInst_boundary_decouple.py │ │ │ ├── conditionalInst_boundary_decouplev2.py │ │ │ └── conditionalInst_sipmask_track.py │ │ ├── deeplab.py │ │ ├── deformable_detr.py │ │ ├── detr │ │ │ ├── __init__.py │ │ │ ├── detr.py │ │ │ ├── detr_tracking.py │ │ │ └── segmentation.py │ │ ├── dynamic4seg.py │ │ ├── efficientdet.py │ │ ├── fcn.py │ │ ├── fcos │ │ │ ├── __init__.py │ │ │ ├── fcos.py │ │ │ └── fcos_sepc.py │ │ ├── flownet │ │ │ ├── flownetc.py │ │ │ ├── flownets.py │ │ │ ├── two_stream.py │ │ │ └── util.py │ │ ├── imagenet.py │ │ ├── mask_scoring_rcnn.py │ │ ├── moco.py │ │ ├── panoptic_deeplab.py │ │ ├── panoptic_fpn.py │ │ ├── panopticfcn │ │ │ ├── __init__.py │ │ │ ├── gt_generate.py │ │ │ ├── heads.py │ │ │ ├── panoptic_fcn.py │ │ │ └── utils.py │ │ ├── pointrend.py │ │ ├── rcnn.py │ │ ├── reppoints.py │ │ ├── retinanet │ │ │ ├── __init__.py │ │ │ ├── retinanet.py │ │ │ └── retinanet_sepc.py │ │ ├── semantic_seg.py │ │ ├── sipmask.py │ │ ├── solo │ │ │ ├── __init__.py │ │ │ ├── solo.py │ │ │ ├── solo_decoupled.py │ │ │ ├── solov2.py │ │ │ └── utils.py │ │ ├── sparsercnn │ │ │ ├── __init__.py │ │ │ └── sparse_rcnn.py │ │ ├── ssd.py │ │ ├── tensormask.py │ │ └── yolov3.py │ ├── nn_utils │ │ ├── __init__.py │ │ ├── activation_count.py │ │ ├── feature_utils.py │ │ ├── flop_count.py │ │ ├── jit_handles.py │ │ ├── parameter_count.py │ │ ├── precise_bn.py │ │ ├── scale_grad.py │ │ └── weight_init.py │ ├── poolers.py │ ├── postprocessing.py │ ├── proposal_generator │ │ ├── __init__.py │ │ ├── build.py │ │ ├── fcos.py │ │ ├── fcos_outputs.py │ │ ├── fcos_sip_mask.py │ │ ├── proposal_utils.py │ │ ├── rpn.py │ │ ├── rpn_outputs.py │ │ ├── rrpn.py │ │ └── rrpn_outputs.py │ ├── roi_heads │ │ ├── __init__.py │ │ ├── box_head.py │ │ ├── cascade_rcnn.py │ │ ├── fast_rcnn.py │ │ ├── keypoint_head.py │ │ ├── mask_head.py │ │ ├── mask_iou_head.py │ │ ├── mask_scoring_roi_head.py │ │ ├── refine_mask_head.py │ │ ├── refine_roi_head.py │ │ ├── roi_heads.py │ │ ├── roi_heads_visua_hack.py │ │ ├── rotated_fast_rcnn.py │ │ └── track_heads.py │ ├── sampling.py │ └── test_time_augmentation.py ├── solver │ ├── __init__.py │ ├── build.py │ ├── lr_scheduler.py │ ├── optimizer_builder.py │ └── scheduler_builder.py ├── structures │ ├── __init__.py │ ├── boxes.py │ ├── image_list.py │ ├── instances.py │ ├── keypoints.py │ ├── masks.py │ └── rotated_boxes.py └── utils │ ├── README.md │ ├── __init__.py │ ├── analysis.py │ ├── benchmark │ ├── __init__.py │ ├── benchmark.py │ └── timer.py │ ├── distributed │ ├── __init__.py │ └── comm.py │ ├── dump │ ├── __init__.py │ ├── events.py │ ├── history_buffer.py │ └── logger.py │ ├── env │ ├── __init__.py │ ├── collect_env.py │ └── env.py │ ├── file │ ├── __init__.py │ ├── download.py │ ├── file_io.py │ └── serialize.py │ ├── imports.py │ ├── memory.py │ ├── metrics │ ├── __init__.py │ └── accuracy.py │ ├── registry.py │ └── visualizer │ ├── __init__.py │ ├── colormap.py │ ├── show.py │ ├── video_visualizer.py │ └── visualizer.py ├── datasets ├── README.md ├── components │ └── coco_2017_train_class_agnosticTrue_whitenTrue_sigmoidTrue_60_siz28.npz ├── gen_coco_person.py ├── panoptic2detection_coco_format.py ├── prepare_cocofied_lvis.py ├── prepare_panoptic_fpn.py ├── prepare_thing_sem_from_instance.py └── prepare_thing_sem_from_lvis.py ├── docs ├── bugs.md ├── datasets.md ├── notes.md ├── overview.md ├── results.md └── tricks.md ├── fig └── test.jpg ├── playground └── detection │ ├── cityscapes │ ├── ceseg │ │ ├── boundary_refine_mask_rcnn_r101_ms_3x_3_subgt_warpping_dice_erode_dilate_gn │ │ │ ├── boundary_mask_rcnn.py │ │ │ ├── box_head.py │ │ │ ├── config.py │ │ │ ├── net.py │ │ │ └── rcnn.py │ │ ├── boundary_refine_mask_rcnn_r50_ms_3x_3_subgt_warpping_dice_erode_dilate_gn │ │ │ ├── boundary_mask_rcnn.py │ │ │ ├── box_head.py │ │ │ ├── config.py │ │ │ ├── net.py │ │ │ └── rcnn.py │ │ └── boundary_refine_mask_rcnn_rx101_ms_3x_3_subgt_warpping_dice_erode_dilate_gn │ │ │ ├── boundary_mask_rcnn.py │ │ │ ├── box_head.py │ │ │ ├── config.py │ │ │ ├── net.py │ │ │ └── rcnn.py │ ├── pointrend │ │ ├── point_rend_mask_rcnn_R101X │ │ │ ├── config.py │ │ │ └── net.py │ │ ├── point_rend_mask_rcnn_r101 │ │ │ ├── config.py │ │ │ └── net.py │ │ └── point_rend_mask_rcnn_r50 │ │ │ ├── config.py │ │ │ └── net.py │ └── rcnn │ │ ├── mask_rcnn_res101_fpn_coco_ms │ │ ├── README.md │ │ ├── config.py │ │ └── net.py │ │ ├── mask_rcnn_res50_fpn_coco_ms │ │ ├── README.md │ │ ├── config.py │ │ └── net.py │ │ └── mask_rcnn_rx101_fpn_coco_ms │ │ ├── README.md │ │ ├── config.py │ │ └── net.py │ └── coco │ └── bs_mask │ ├── boundary_refine_mask_rcnn_r101_ms_1x_3_subgt_warpping_dice_erode_dilate_gn │ ├── boundary_mask_rcnn.py │ ├── box_head.py │ ├── config.py │ ├── net.py │ └── rcnn.py │ ├── boundary_refine_mask_rcnn_r50_ms_1x_3_subgt_warpping_dice_erode_dilate_gn │ ├── boundary_mask_rcnn.py │ ├── box_head.py │ ├── config.py │ ├── net.py │ └── rcnn.py │ └── cascade │ ├── boundary_refine_mask_rcnn_r50_ms_1x_3_subgt_warpping_dice_erode_dilate_gn_add_cascade │ ├── boundary_mask_rcnn.py │ ├── box_head.py │ ├── config.py │ ├── net.py │ └── rcnn.py │ ├── boundary_refine_mask_rcnn_r50_ms_3x_3_subgt_warpping_dice_erode_dilate_gn_add_cascade │ ├── boundary_mask_rcnn.py │ ├── box_head.py │ ├── config.py │ ├── net.py │ └── rcnn.py │ └── boundary_refine_mask_rcnn_rx101_ms_20e_3_subgt_warpping_dice_erode_dilate_gn_add_cascade_1600multi │ ├── boundary_mask_rcnn.py │ ├── box_head.py │ ├── config.py │ ├── net.py │ └── rcnn.py └── tools ├── cal_flops.py ├── cat_visualizer_results.py ├── convert_detr2cvpod.py ├── convert_to_d2.py ├── draw_teaser.py ├── image2gif.py ├── rm_files.py ├── test_dis.py ├── test_net.py ├── train_net.py ├── visualize_json_results.py └── visualize_vis_json_results.py /.gitignore: -------------------------------------------------------------------------------- 1 | **/log 2 | 3 | *.jpg 4 | *.png 5 | *.txt 6 | 7 | # compilation and distribution 8 | __pycache__ 9 | _ext 10 | *.pyc 11 | *.so 12 | *.o 13 | cvpods.egg-info/ 14 | build/ 15 | dist/ 16 | wheels/ 17 | 18 | tools/pods_test 19 | tools/pods_train 20 | 21 | # pytorch/python/numpy formats 22 | *.pth 23 | *.pkl 24 | *.npy 25 | 26 | # ipython/jupyter notebooks 27 | *.ipynb 28 | **/.ipynb_checkpoints/ 29 | 30 | # Editor temporaries 31 | *.swn 32 | *.swo 33 | *.swp 34 | *~ 35 | 36 | # Pycharm editor settings 37 | .idea 38 | 39 | # project dirs 40 | /models 41 | 42 | 43 | # exclude 44 | !requirements*.txt 45 | 46 | # tools 47 | tools/visualize_vis_json_results_for_debug.py 48 | tools/visualize_vis_json_results_only_seg.py 49 | tools/useless_tools 50 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # BoundarySqueeze: Image Segmentation as Boundary Squeezing [[Arxiv]](https://arxiv.org/pdf/2105.11668.pdf) 2 | Hao He, Xiangtai Li, Guangliang Cheng, Yunhai Tong, Lubin Weng 3 | 4 | This paper proposes a novel method for high-quality image segmentation of both objects and scenes. 5 | Inspired by the dilation and erosion operations in morphological image processing techniques, the pixel-level image segmentation problems are treated as squeezing object boundaries. 6 | 7 | ### Comparison with Point Rend 8 | ![Figure](./fig/test.jpg) 9 | 10 | Our method is built on the codebase of [CVPOD](https://github.com/Megvii-BaseDetection/cvpods). 11 | 12 | 13 | ## Install, Training and Testing 14 | 15 | ```shell 16 | 17 | # Or, to install it from a local clone: 18 | git clone https://github.com/lxtGH/BSSeg 19 | cd BSSeg 20 | 21 | pip install -r requirements.txt 22 | 23 | python setup.py build develop 24 | 25 | # Preprare data path 26 | ln -s /path/to/your/coco/dataset datasets/coco 27 | 28 | # Enter a specific experiment dir 29 | cd playground/detection/coco/bs_mask/boundary_refine_mask_rcnn_r50_ms_1x_3_subgt_warpping_dice_erode_dilate_gn 30 | 31 | 32 | # Train 33 | pods_train --num-gpus 8 34 | # Test 35 | pods_test --num-gpus 8 \ 36 | MODEL.WEIGHTS /path/to/your/save_dir/ckpt.pth # optional 37 | OUTPUT_DIR /path/to/your/save_dir # optional 38 | 39 | # Multi node training 40 | ## sudo apt install net-tools ifconfig 41 | pods_train --num-gpus 8 --num-machines N --machine-rank 0/1/.../N-1 --dist-url "tcp://MASTER_IP:port" 42 | ``` 43 | 44 | If you find this codebase is useful to your research, plese consider cite the paper and original codebase. 45 | 46 | ```BibTeX 47 | @misc{he2021boundarysqueeze, 48 | title={BoundarySqueeze: Image Segmentation as Boundary Squeezing}, 49 | author={Hao He and Xiangtai Li and Guangliang Cheng and Yunhai Tong and Lubin Weng}, 50 | year={2021}, 51 | eprint={2105.11668}, 52 | archivePrefix={arXiv}, 53 | primaryClass={cs.CV} 54 | } 55 | 56 | @misc{zhu2020cvpods, 57 | title={cvpods: All-in-one Toolbox for Computer Vision Research}, 58 | author={Zhu*, Benjin and Wang*, Feng and Wang, Jianfeng and Yang, Siwei and Chen, Jianhu and Li, Zeming}, 59 | year={2020} 60 | } 61 | ``` 62 | -------------------------------------------------------------------------------- /cvpods/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | 3 | from .utils import setup_environment 4 | 5 | setup_environment() 6 | 7 | # This line will be programatically read/write by setup.py. 8 | # Leave them at the bottom of this file and don't touch them. 9 | __version__ = "0.1" 10 | -------------------------------------------------------------------------------- /cvpods/checkpoint/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 3 | # File: 4 | 5 | 6 | from . import catalog as _UNUSED # register the handler 7 | from .checkpoint import Checkpointer, PeriodicCheckpointer 8 | from .detection_checkpoint import DetectionCheckpointer 9 | 10 | __all__ = ["Checkpointer", "PeriodicCheckpointer", "DetectionCheckpointer"] 11 | -------------------------------------------------------------------------------- /cvpods/configs/base_classification_config.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- encoding: utf-8 -*- 3 | ''' 4 | @File : base_classification_config.py 5 | @Time : 2020/05/07 23:56:17 6 | @Author : Benjin Zhu 7 | @Contact : poodarchu@gmail.com 8 | @Last Modified by : Benjin Zhu 9 | @Last Modified time : 2020/05/07 23:56:17 10 | ''' 11 | 12 | from cvpods.configs.base_config import BaseConfig 13 | 14 | _config_dict = dict( 15 | MODEL=dict( 16 | WEIGHTS="", 17 | PIXEL_MEAN=[0.406, 0.456, 0.485], # BGR 18 | PIXEL_STD=[0.225, 0.224, 0.229], 19 | BACKBONE=dict(FREEZE_AT=-1, ), # do not freeze 20 | RESNETS=dict( 21 | NUM_CLASSES=None, 22 | DEPTH=None, 23 | OUT_FEATURES=["linear"], 24 | NUM_GROUPS=1, 25 | # Options: FrozenBN, GN, "SyncBN", "BN" 26 | NORM="BN", 27 | ACTIVATION=dict( 28 | NAME="ReLU", 29 | INPLACE=True, 30 | ), 31 | # Whether init last bn weight of each BasicBlock or BottleneckBlock to 0 32 | ZERO_INIT_RESIDUAL=True, 33 | WIDTH_PER_GROUP=64, 34 | # Use True only for the original MSRA ResNet; use False for C2 and Torch models 35 | STRIDE_IN_1X1=False, 36 | RES5_DILATION=1, 37 | RES2_OUT_CHANNELS=256, 38 | STEM_OUT_CHANNELS=64, 39 | DEFORM_ON_PER_STAGE=[False, False, False, False], 40 | DEFORM_MODULATED=False, 41 | DEFORM_NUM_GROUPS=1, 42 | 43 | # Deep Stem 44 | DEEP_STEM=False, 45 | # Apply avg after conv2 in the BottleBlock 46 | # When AVD=True, the STRIDE_IN_1X1 should be Falss 47 | AVD=False, 48 | # Apply avg_down to the downsampling layer for residual path 49 | AVG_DOWN=False, 50 | # Radix in ResNeSt 51 | RADIX=1, 52 | # Bottleneck_width in ResNeSt 53 | BOTTLENECK_WIDTH=64, 54 | ), 55 | ), 56 | SOLVER=dict( 57 | IMS_PER_DEVICE=32, # defalut: 8 gpus x 32 = 256 58 | ), 59 | ) 60 | 61 | 62 | class BaseClassificationConfig(BaseConfig): 63 | def __init__(self): 64 | super(BaseClassificationConfig, self).__init__() 65 | self._register_configuration(_config_dict) 66 | 67 | 68 | config = BaseClassificationConfig() 69 | -------------------------------------------------------------------------------- /cvpods/configs/centernet_config.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | 3 | from .base_detection_config import BaseDetectionConfig 4 | 5 | _config_dict = dict( 6 | MODEL=dict( 7 | # WEIGHTS="detectron2://ImageNetPretrained/MSRA/R-18.pth", 8 | WEIGHTS="", 9 | MASK_ON=False, 10 | RESNETS=dict( 11 | DEPTH=50, 12 | OUT_FEATURES=["res5"] 13 | ), 14 | CENTERNET=dict( 15 | DECONV_CHANNEL=[2048, 256, 128, 64], 16 | DECONV_KERNEL=[4, 4, 4], 17 | NUM_CLASSES=80, 18 | MODULATE_DEFORM=True, 19 | BIAS_VALUE=-2.19, 20 | DOWN_SCALE=4, 21 | MIN_OVERLAP=0.7, 22 | TENSOR_DIM=128, 23 | ), 24 | LOSS=dict( 25 | CLS_WEIGHT=1, 26 | WH_WEIGHT=0.1, 27 | REG_WEIGHT=1, 28 | ), 29 | ), 30 | INPUT=dict( 31 | AUG=dict( 32 | TRAIN_PIPELINES=[ 33 | ('CenterAffine', dict( 34 | boarder=128, 35 | output_size=(512, 512), 36 | random_aug=True)), 37 | ('RandomFlip', dict()), 38 | ('RandomBrightness', dict(intensity_min=0.6, intensity_max=1.4)), 39 | ('RandomContrast', dict(intensity_min=0.6, intensity_max=1.4)), 40 | ('RandomSaturation', dict(intensity_min=0.6, intensity_max=1.4)), 41 | ('RandomLighting', dict(scale=0.1)), 42 | ], 43 | TEST_PIPELINES=[ 44 | ], 45 | ), 46 | OUTPUT_SIZE=(128, 128), 47 | ), 48 | DATALOADER=dict( 49 | NUM_WORKERS=4, 50 | ), 51 | DATASETS=dict( 52 | TRAIN=("coco_2017_train",), 53 | TEST=("coco_2017_val",), 54 | ), 55 | SOLVER=dict( 56 | OPTIMIZER=dict( 57 | NAME="SGD", 58 | BASE_LR=0.02, 59 | WEIGHT_DECAY=1e-4, 60 | ), 61 | LR_SCHEDULER=dict( 62 | GAMMA=0.1, 63 | STEPS=(81000, 108000), 64 | MAX_ITER=126000, 65 | WARMUP_ITERS=1000, 66 | ), 67 | IMS_PER_BATCH=128, 68 | IMS_PER_DEVICE=16 69 | ), 70 | OUTPUT_DIR=osp.join( 71 | '/data/Outputs/model_logs/playground', 72 | osp.split(osp.realpath(__file__))[0].split("playground/")[-1] 73 | ), 74 | GLOBAL=dict(DUMP_TEST=False), 75 | ) 76 | 77 | 78 | class CenterNetConfig(BaseDetectionConfig): 79 | def __init__(self): 80 | super(CenterNetConfig, self).__init__() 81 | self._register_configuration(_config_dict) 82 | 83 | 84 | config = CenterNetConfig() 85 | -------------------------------------------------------------------------------- /cvpods/configs/centernet_det2_config.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- encoding: utf-8 -*- 3 | ''' 4 | @File : base-centernet2.py 5 | @Author : Xiangtai Li 6 | ''' 7 | 8 | 9 | from .rcnn_config import RCNNConfig 10 | 11 | _config_dict = dict( 12 | DEBUG=False, 13 | SAVE_DEBUG=False, 14 | SAVE_PTH=False, 15 | VIS_THRESH=0.3, 16 | DEBUG_SHOW_NAME=False, 17 | 18 | MODEL=dict( 19 | RESNETS=dict(OUT_FEATURES=["res3", "res4", "res5"],), 20 | FPN=dict(IN_FEATURES=["res3", "res4", "res5"]), 21 | CENTERNET=dict( 22 | NUM_CLASSES=80, 23 | IN_FEATURES = ["p3", "p4", "p5", "p6", "p7"], 24 | FPN_STRIDES = [8, 16, 32, 64, 128], 25 | PRIOR_PROB = 0.01, 26 | CENTER_NMS = False, 27 | NMS_TH_TRAIN = 0.6, 28 | NFERENCE_TH = 0.05, 29 | NMS_TH_TEST = 0.6, 30 | INFERENCE_TH = 0.05, 31 | PRE_NMS_TOPK_TRAIN = 1000, 32 | POST_NMS_TOPK_TRAIN = 100, 33 | PRE_NMS_TOPK_TEST = 1000, 34 | POST_NMS_TOPK_TEST = 100, 35 | NORM = "GN", 36 | USE_DEFORMABLE = False, 37 | NUM_CLS_CONVS = 4, 38 | NUM_BOX_CONVS = 4, 39 | NUM_SHARE_CONVS = 0, 40 | LOC_LOSS_TYPE = 'giou', 41 | SIGMOID_CLAMP = 1e-4, 42 | HM_MIN_OVERLAP = 0.8, 43 | MIN_RADIUS = 4, 44 | SOI = [[0, 80], [64, 160], [128, 320], [256, 640], [512, 10000000]], 45 | POS_WEIGHT = 1., 46 | NEG_WEIGHT = 1., 47 | REG_WEIGHT = 2., 48 | HM_FOCAL_BETA = 4, 49 | HM_FOCAL_ALPHA = 0.25, 50 | LOSS_GAMMA = 2.0, 51 | WITH_AGN_HM = False, 52 | ONLY_PROPOSAL = False, 53 | AS_PROPOSAL = False, 54 | IGNORE_HIGH_FP = -1., 55 | MORE_POS = False, 56 | MORE_POS_THRESH = 0.2, 57 | MORE_POS_TOPK = 9, 58 | NOT_NORM_REG = True, 59 | NOT_NMS = False, 60 | ), 61 | 62 | BIFPN=dict( 63 | NUM_LEVELS=5, 64 | NUM_BIFPN=6, 65 | NORM='GN', 66 | OUT_CHANNELS=160, 67 | SEPARABLE_CONV=False, 68 | ), 69 | 70 | ), 71 | ) 72 | 73 | 74 | class CenterNetDet2Config(RCNNConfig): 75 | def __init__(self): 76 | super(CenterNetDet2Config, self).__init__() 77 | self._register_configuration(_config_dict) 78 | 79 | 80 | config = CenterNetDet2Config() 81 | -------------------------------------------------------------------------------- /cvpods/configs/deeplab_config.py: -------------------------------------------------------------------------------- 1 | from .base_detection_config import BaseDetectionConfig 2 | 3 | 4 | _config_dict = dict( 5 | BACKBONE=dict(FREEZE_AT=0,), 6 | MODEL=dict( 7 | MASK_ON=False, 8 | LOAD_PROPOSALS=False, 9 | RESNETS=dict( 10 | NORM="nnSyncBN", 11 | OUT_FEATURES=["res5"], 12 | RES4_DILATION=1, 13 | RES5_DILATION=2, 14 | RES5_MULTI_GRID = [1, 2, 4], 15 | STEM_TYPE="deeplabv3_r50" 16 | ), 17 | SEM_SEG_HEAD=dict( 18 | # NAME="Deeplabv3Head", 19 | IGNORE_VALUE=255, 20 | # Number of classes in the semantic segmentation head 21 | NUM_CLASSES=19, 22 | # Number of channels in the 3x3 convs inside semantic-FPN heads. 23 | LOSS_TYPE="hard_pixel_mining", 24 | PROJECT_FEATURES=["res2"], 25 | PROJECT_CHANNELS=[48], 26 | ASPP_CHANNELS=256, 27 | CONVS_DIM=256, 28 | ASPP_DILATIONS=[6, 12, 18], 29 | ASPP_DROPOUT=0.1, 30 | USE_DEPTHWISE_SEPARABLE_CONV=False, 31 | COMMON_STRIDE=16, 32 | NORM="GN", 33 | LOSS_WEIGHT=1.0, 34 | ), 35 | ), 36 | 37 | SOLVER=dict( 38 | LR_SCHEDULER=dict( 39 | NAME="WarmupPolyLR", 40 | WARMUP_FACTOR=1.0 / 100, 41 | MAX_ITER=90000, 42 | ), 43 | POLY_LR_CONSTANT_ENDING=0.0, 44 | POLY_LR_POWER=0.9, 45 | ) 46 | 47 | ) 48 | 49 | 50 | class SegmentationConfig(BaseDetectionConfig): 51 | def __init__(self): 52 | super(SegmentationConfig, self).__init__() 53 | self._register_configuration(_config_dict) 54 | 55 | 56 | config = SegmentationConfig() 57 | -------------------------------------------------------------------------------- /cvpods/configs/dynamic_routing_config.py: -------------------------------------------------------------------------------- 1 | from .base_config import BaseConfig 2 | 3 | _config_dict = dict( 4 | MODEL=dict( 5 | LOAD_PROPOSALS=False, 6 | MASK_ON=False, 7 | KEYPOINT_ON=False, 8 | BACKBONE=dict(FREEZE_AT=0,), 9 | RESNETS=dict( 10 | OUT_FEATURES=["res2", "res3", "res4", "res5"], 11 | NORM="nnSyncBN", 12 | NUM_GROUPS=1, 13 | WIDTH_PER_GROUP=64, 14 | STRIDE_IN_1X1=True, 15 | RES5_DILATION=1, 16 | RES2_OUT_CHANNELS=256, 17 | STEM_OUT_CHANNELS=64, 18 | DEFORM_ON_PER_STAGE=[False, False, False, False], 19 | DEFORM_MODULATED=False, 20 | DEFORM_NUM_GROUPS=1, 21 | ), 22 | FPN=dict( 23 | IN_FEATURES=[], 24 | OUT_CHANNELS=256, 25 | NORM="", 26 | FUSE_TYPE="sum", 27 | ), 28 | SEM_SEG_HEAD=dict( 29 | # NAME="SemSegFPNHead", 30 | IN_FEATURES=[], 31 | IGNORE_VALUE=255, 32 | NUM_CLASSES=(), 33 | CONVS_DIM=256, 34 | COMMON_STRIDE=(), 35 | NORM="GN", 36 | LOSS_WEIGHT=1.0, 37 | ), 38 | SOLVER=dict( 39 | LR_SCHEDULER=dict( 40 | NAME="PolyLR", 41 | POLY_POWER=0.9, 42 | MAX_ITER=40000, 43 | WARMUP_ITERS=1000, 44 | WARMUP_FACTOR=0.001, 45 | WARMUP_METHOD="linear", 46 | ), 47 | OPTIMIZER=dict(BASE_LR=0.01, ), 48 | IMS_PER_BATCH=16, 49 | CHECKPOINT_PERIOD=5000, 50 | ), 51 | TEST=dict(PRECISE_BN=dict(ENABLED=True), ), 52 | ), 53 | ) 54 | 55 | 56 | class SemanticSegmentationConfig(BaseConfig): 57 | def __init__(self): 58 | super(SemanticSegmentationConfig, self).__init__() 59 | self._register_configuration(_config_dict) 60 | 61 | 62 | config = SemanticSegmentationConfig() 63 | -------------------------------------------------------------------------------- /cvpods/configs/efficientdet_config.py: -------------------------------------------------------------------------------- 1 | from .base_detection_config import BaseDetectionConfig 2 | 3 | _config_dict = dict( 4 | MODEL=dict( 5 | PIXEL_MEAN=[0.485, 0.456, 0.406], # mean value from ImageNet 6 | PIXEL_STD=[0.229, 0.224, 0.225], 7 | EFFICIENTNET=dict( 8 | MODEL_NAME="efficientnet-b0", # default setting for EfficientDet-D0 9 | NORM="BN", 10 | BN_MOMENTUM=1 - 0.99, 11 | BN_EPS=1e-3, 12 | DROP_CONNECT_RATE=1 - 0.8, # survival_prob = 0.8 13 | DEPTH_DIVISOR=8, 14 | MIN_DEPTH=None, 15 | NUM_CLASSES=None, 16 | FIX_HEAD_STEAM=False, 17 | MEMORY_EFFICIENT_SWISH=True, 18 | OUT_FEATURES=["stage4", "stage6", "stage8"], 19 | ), 20 | BIFPN=dict( 21 | IN_FEATURES=["stage4", "stage6", "stage8"], 22 | NORM="BN", 23 | BN_MOMENTUM=0.01, # 1 - 0.99 24 | BN_EPS=1e-3, 25 | MEMORY_EFFICIENT_SWISH=True, 26 | INPUT_SIZE=512, # default setting for EfficientDet-D0 27 | NUM_LAYERS=3, # default setting for EfficientDet-D0 28 | OUT_CHANNELS=60, # default setting for EfficientDet-D0 29 | FUSE_TYPE="fast", # select in ["softmax", "fast", "sum"] 30 | ), 31 | EFFICIENTDET=dict( 32 | IN_FEATURES=[f"p{i}" for i in range(3, 8)], # p3-p7 33 | NUM_CLASSES=80, 34 | FREEZE_BACKBONE=False, 35 | FREEZE_BN=False, 36 | HEAD=dict( 37 | NUM_CONV=3, # default setting for EfficientDet-D0 38 | NORM="BN", 39 | BN_MOMENTUM=1 - 0.99, 40 | BN_EPS=1e-3, 41 | PRIOR_PROB=0.01, 42 | MEMORY_EFFICIENT_SWISH=True, 43 | ), 44 | IOU_THRESHOLDS=[0.5, 0.5], 45 | IOU_LABELS=[0, -1, 1], 46 | SCORE_THRESH_TEST=0.05, 47 | TOPK_CANDIDATES_TEST=1000, 48 | NMS_THRESH_TEST=0.5, 49 | BBOX_REG_WEIGHTS=(1.0, 1.0, 1.0, 1.0), 50 | FOCAL_LOSS_GAMMA=1.5, 51 | FOCAL_LOSS_ALPHA=0.25, 52 | SMOOTH_L1_LOSS_BETA=0.1, 53 | REG_NORM=4.0, 54 | BOX_LOSS_WEIGHT=50.0, 55 | ), 56 | ANCHOR_GENERATOR=dict( 57 | SIZES=[ 58 | [x, x * 2 ** (1.0 / 3), x * 2 ** (2.0 / 3)] 59 | for x in [4 * 2**i for i in range(3, 8)] 60 | ] 61 | ), 62 | ), 63 | ) 64 | 65 | 66 | class EfficientDetConfig(BaseDetectionConfig): 67 | def __init__(self): 68 | super(EfficientDetConfig, self).__init__() 69 | self._register_configuration(_config_dict) 70 | 71 | 72 | config = EfficientDetConfig() 73 | -------------------------------------------------------------------------------- /cvpods/configs/fcos_config.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- encoding: utf-8 -*- 3 | ''' 4 | @File : fcos_config.py 5 | @Time : 2020/05/07 23:56:09 6 | @Author : Benjin Zhu 7 | @Contact : poodarchu@gmail.com 8 | @Last Modified by : Benjin Zhu 9 | @Last Modified time : 2020/05/07 23:56:09 10 | ''' 11 | 12 | from .base_detection_config import BaseDetectionConfig 13 | 14 | _config_dict = dict( 15 | MODEL=dict( 16 | # META_ARCHITECTURE="RetinaNet", 17 | RESNETS=dict(OUT_FEATURES=["res3", "res4", "res5"]), 18 | FPN=dict(IN_FEATURES=["res3", "res4", "res5"]), 19 | FCOS=dict( 20 | NUM_CLASSES=80, 21 | IN_FEATURES=["p3", "p4", "p5", "p6", "p7"], 22 | NUM_CONVS=4, 23 | BUDGET_LOSS_LAMBDA=0.0, 24 | SHARE_CONVS=0, 25 | FPN_STRIDES=[8, 16, 32, 64, 128], 26 | PRIOR_PROB=0.01, 27 | CENTERNESS_ON_REG=False, 28 | NORM_REG_TARGETS=False, 29 | SCORE_THRESH_TEST=0.05, 30 | TOPK_CANDIDATES_TEST=1000, 31 | NMS_THRESH_TEST=0.6, 32 | BBOX_REG_WEIGHTS=(1.0, 1.0, 1.0, 1.0), 33 | FOCAL_LOSS_GAMMA=2.0, 34 | FOCAL_LOSS_ALPHA=0.25, 35 | IOU_LOSS_TYPE="iou", 36 | CENTER_SAMPLING_RADIUS=0.0, 37 | OBJECT_SIZES_OF_INTEREST=[ 38 | [-1, 64], 39 | [64, 128], 40 | [128, 256], 41 | [256, 512], 42 | [512, float("inf")], 43 | ], 44 | ), 45 | ), 46 | ) 47 | 48 | 49 | class FCOSConfig(BaseDetectionConfig): 50 | def __init__(self): 51 | super(FCOSConfig, self).__init__() 52 | self._register_configuration(_config_dict) 53 | 54 | 55 | config = FCOSConfig() 56 | -------------------------------------------------------------------------------- /cvpods/configs/fcos_sepc_config.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- encoding: utf-8 -*- 3 | ''' 4 | @File : fcos_config.py 5 | @Time : 2020/05/07 23:56:09 6 | @Author : Benjin Zhu 7 | @Contact : poodarchu@gmail.com 8 | @Last Modified by : Benjin Zhu 9 | @Last Modified time : 2020/05/07 23:56:09 10 | ''' 11 | 12 | from .base_detection_config import BaseDetectionConfig 13 | 14 | _config_dict = dict( 15 | MODEL=dict( 16 | RESNETS=dict(OUT_FEATURES=["res3", "res4", "res5"]), 17 | FPN=dict(IN_FEATURES=["res3", "res4", "res5"]), 18 | FCOS=dict( 19 | NUM_CLASSES=80, 20 | IN_FEATURES=["p3", "p4", "p5", "p6", "p7"], 21 | NUM_CONVS=0, 22 | FPN_STRIDES=[8, 16, 32, 64, 128], 23 | PRIOR_PROB=0.01, 24 | CENTERNESS_ON_REG=False, 25 | NORM_REG_TARGETS=False, 26 | SCORE_THRESH_TEST=0.05, 27 | TOPK_CANDIDATES_TEST=1000, 28 | NMS_THRESH_TEST=0.6, 29 | BBOX_REG_WEIGHTS=(1.0, 1.0, 1.0, 1.0), 30 | FOCAL_LOSS_GAMMA=2.0, 31 | FOCAL_LOSS_ALPHA=0.25, 32 | IOU_LOSS_TYPE="iou", 33 | CENTER_SAMPLING_RADIUS=0.0, 34 | OBJECT_SIZES_OF_INTEREST=[ 35 | [-1, 64], 36 | [64, 128], 37 | [128, 256], 38 | [256, 512], 39 | [512, float("inf")], 40 | ], 41 | ), 42 | SEPC=dict( 43 | IN_FEATURES=["p3", "p4", "p5", "p6", "p7"], 44 | IN_CHANNELS=[256, 256, 256, 256, 256], 45 | OUT_CHANNELS=256, 46 | NUM_OUTS=5, 47 | COMBINE_DEFORM=False, 48 | EXTRA_DEFORM=False, 49 | COMBINE_NUM=4, 50 | IBN=False, 51 | ) 52 | ), 53 | ) 54 | 55 | class FCOSSPECConfig(BaseDetectionConfig): 56 | def __init__(self): 57 | super(FCOSSPECConfig, self).__init__() 58 | self._register_configuration(_config_dict) 59 | 60 | 61 | config = FCOSSPECConfig() 62 | -------------------------------------------------------------------------------- /cvpods/configs/keypoint_config.py: -------------------------------------------------------------------------------- 1 | from .base_detection_config import BaseDetectionConfig 2 | 3 | _config_dict = dict( 4 | MODEL=dict( 5 | ROI_KEYPOINT_HEAD=dict( 6 | NAME="KRCNNConvDeconvUpsampleHead", 7 | POOLER_RESOLUTION=14, 8 | POOLER_SAMPLING_RATIO=0, 9 | CONV_DIMS=tuple(512 for _ in range(8)), 10 | NUM_KEYPOINTS=17, # 17 is the number of keypoints in COCO 11 | # Images with too few (or no) keypoints are excluded from training. 12 | MIN_KEYPOINTS_PER_IMAGE=1, 13 | # Normalize by the total number of visible keypoints in the minibatch if True. 14 | # Otherwise, normalize by the total number of keypoints that could ever exist 15 | # in the minibatch. 16 | # The keypoint softmax loss is only calculated on visible keypoints. 17 | # Since the number of visible keypoints can vary significantly between 18 | # minibatches, this has the effect of up-weighting the importance of 19 | # minibatches with few visible keypoints. (Imagine the extreme case of 20 | # only one visible keypoint versus N: in the case of N, each one 21 | # contributes 1/N to the gradient compared to the single keypoint 22 | # determining the gradient direction). Instead, we can normalize the 23 | # loss by the total number of keypoints, if it were the case that all 24 | # keypoints were visible in a full minibatch. (Returning to the example, 25 | # this means that the one visible keypoint contributes as much as each 26 | # of the N keypoints.) 27 | NORMALIZE_LOSS_BY_VISIBLE_KEYPOINTS=True, 28 | # Multi-task loss weight to use for keypoints 29 | # Recommended values: 30 | # - use 1.0 if NORMALIZE_LOSS_BY_VISIBLE_KEYPOINTS is True 31 | # - use 4.0 if NORMALIZE_LOSS_BY_VISIBLE_KEYPOINTS is False 32 | LOSS_WEIGHT=1.0, 33 | # Type of pooling operation applied to the incoming feature map for each RoI 34 | POOLER_TYPE="ROIAlignV2", 35 | ), 36 | ) 37 | ) 38 | 39 | 40 | class KeypointConfig(BaseDetectionConfig): 41 | def __init__(self): 42 | super(KeypointConfig, self).__init__() 43 | self._register_configuration(_config_dict) 44 | 45 | 46 | config = KeypointConfig() 47 | -------------------------------------------------------------------------------- /cvpods/configs/panoptic_fpn.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- encoding: utf-8 -*- 3 | 4 | from .rcnn_config import RCNNConfig 5 | 6 | _config_dict = dict( 7 | MODEL=dict( 8 | RESNETS=dict(OUT_FEATURES=["res2", "res3", "res4", "res5"],), 9 | FPN=dict(IN_FEATURES=["res2", "res3", "res4", "res5"]), 10 | ANCHOR_GENERATOR=dict( 11 | SIZES=[[32], [64], [128], [256], [512]], ASPECT_RATIOS=[[0.5, 1.0, 2.0]], 12 | ), 13 | RPN=dict( 14 | IN_FEATURES=["p2", "p3", "p4", "p5", "p6"], 15 | PRE_NMS_TOPK_TRAIN=2000, 16 | PRE_NMS_TOPK_TEST=1000, 17 | POST_NMS_TOPK_TRAIN=1000, 18 | POST_NMS_TOPK_TEST=1000, 19 | ), 20 | ROI_HEADS=dict( 21 | # NAME: "StandardROIHeads" 22 | IN_FEATURES=["p2", "p3", "p4", "p5"], 23 | ), 24 | ROI_BOX_HEAD=dict( 25 | # NAME: "FastRCNNConvFCHead" 26 | NUM_FC=2, 27 | POOLER_RESOLUTION=7, 28 | ), 29 | ROI_MASK_HEAD=dict( 30 | # NAME: "MaskRCNNConvUpsampleHead" 31 | NUM_CONV=4, 32 | POOLER_RESOLUTION=14, 33 | ), 34 | SEM_SEG_HEAD=dict( 35 | # NAME="SemSegFPNHead", 36 | IN_FEATURES=["p2", "p3", "p4", "p5"], 37 | # Label in the semantic segmentation ground truth that is ignored, 38 | # i.e., no loss is calculated for the correposnding pixel. 39 | IGNORE_VALUE=255, 40 | # Number of classes in the semantic segmentation head 41 | NUM_CLASSES=54, 42 | # Number of channels in the 3x3 convs inside semantic-FPN heads. 43 | CONVS_DIM=128, 44 | # Outputs from semantic-FPN heads are up-scaled to the COMMON_STRIDE stride. 45 | COMMON_STRIDE=4, 46 | # Normalization method for the convolution layers. Options: "" (no norm), "GN". 47 | NORM="GN", 48 | LOSS_WEIGHT=1.0, 49 | ), 50 | PANOPTIC_FPN=dict( 51 | # Scaling of all losses from instance detection / segmentation head. 52 | INSTANCE_LOSS_WEIGHT=1.0, 53 | # options when combining instance & semantic segmentation outputs 54 | COMBINE=dict( 55 | ENABLED=True, 56 | OVERLAP_THRESH=0.5, 57 | STUFF_AREA_LIMIT=4096, 58 | INSTANCES_CONFIDENCE_THRESH=0.5, 59 | ), 60 | ), 61 | 62 | ), 63 | ) 64 | 65 | 66 | class PANFPNConfig(RCNNConfig): 67 | def __init__(self): 68 | super(PANFPNConfig, self).__init__() 69 | self._register_configuration(_config_dict) 70 | 71 | 72 | config = PANFPNConfig() 73 | -------------------------------------------------------------------------------- /cvpods/configs/pointrend_config.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # Copyright (c) BaseDetection, Inc. and its affiliates. All Rights Reserved 4 | 5 | from .rcnn_fpn_config import RCNNFPNConfig 6 | 7 | _config_dict = dict( 8 | MODEL=dict( 9 | ROI_HEADS=dict( 10 | # NAME="PointRendROIHeads", 11 | IN_FEATURES=["p2", "p3", "p4", "p5"], 12 | ), 13 | ROI_BOX_HEAD=dict( 14 | TRAIN_ON_PRED_BOXES=True, 15 | ), 16 | ROI_MASK_HEAD=dict( 17 | # NAME="CoarseMaskHead", 18 | # Names of the input feature maps to be used by a coarse mask head. 19 | IN_FEATURES=["p2"], 20 | FC_DIM=1024, 21 | NUM_FC=2, 22 | # The side size of a coarse mask head prediction. 23 | OUTPUT_SIDE_RESOLUTION=7, 24 | # True if point head is used. 25 | POINT_HEAD_ON=True, 26 | ), 27 | POINT_HEAD=dict( 28 | # Names of the input feature maps to be used by a mask point head. 29 | IN_FEATURES=["p2"], 30 | NUM_CLASSES=80, 31 | FC_DIM=256, 32 | NUM_FC=3, 33 | # Number of points sampled during training for a mask point head. 34 | TRAIN_NUM_POINTS=14 * 14, 35 | # Oversampling parameter for PointRend point sampling during training. 36 | # Parameter `k` in the original paper. 37 | OVERSAMPLE_RATIO=3, 38 | # Importance sampling parameter for PointRend point sampling during training. 39 | # Parametr `beta` in the original paper. 40 | IMPORTANCE_SAMPLE_RATIO=0.75, 41 | # Number of subdivision steps during inference. 42 | SUBDIVISION_STEPS=5, 43 | # Maximum number of points selected at each subdivision step (N). 44 | SUBDIVISION_NUM_POINTS=28 * 28, 45 | CLS_AGNOSTIC_MASK=False, 46 | # If True, then coarse prediction features are used as input for each layer 47 | # in PointRend's MLP. 48 | COARSE_PRED_EACH_LAYER=True, 49 | # COARSE_SEM_SEG_HEAD_NAME="SemSegFPNHead" 50 | ), 51 | ), 52 | INPUT=dict( 53 | # PointRend for instance segmenation does not work with "polygon" mask_format 54 | MASK_FORMAT="bitmask", 55 | ), 56 | DATALOADER=dict(FILTER_EMPTY_ANNOTATIONS=False,), 57 | ) 58 | 59 | 60 | class PointRendRCNNFPNConfig(RCNNFPNConfig): 61 | def __init__(self): 62 | super(PointRendRCNNFPNConfig, self).__init__() 63 | self._register_configuration(_config_dict) 64 | 65 | 66 | config = PointRendRCNNFPNConfig() 67 | -------------------------------------------------------------------------------- /cvpods/configs/rcnn_fpn_config.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- encoding: utf-8 -*- 3 | 4 | from .rcnn_config import RCNNConfig 5 | 6 | _config_dict = dict( 7 | MODEL=dict( 8 | RESNETS=dict(OUT_FEATURES=["res2", "res3", "res4", "res5"],), 9 | FPN=dict(IN_FEATURES=["res2", "res3", "res4", "res5"]), 10 | ANCHOR_GENERATOR=dict( 11 | SIZES=[[32], [64], [128], [256], [512]], ASPECT_RATIOS=[[0.5, 1.0, 2.0]], 12 | ), 13 | RPN=dict( 14 | IN_FEATURES=["p2", "p3", "p4", "p5", "p6"], 15 | PRE_NMS_TOPK_TRAIN=2000, 16 | PRE_NMS_TOPK_TEST=1000, 17 | POST_NMS_TOPK_TRAIN=1000, 18 | POST_NMS_TOPK_TEST=1000, 19 | ), 20 | ROI_HEADS=dict( 21 | # NAME: "StandardROIHeads" 22 | IN_FEATURES=["p2", "p3", "p4", "p5"], 23 | ), 24 | ROI_BOX_HEAD=dict( 25 | # NAME: "FastRCNNConvFCHead" 26 | NUM_FC=2, 27 | POOLER_RESOLUTION=7, 28 | ), 29 | ROI_MASK_HEAD=dict( 30 | # NAME: "MaskRCNNConvUpsampleHead" 31 | NUM_CONV=4, 32 | POOLER_RESOLUTION=14, 33 | ), 34 | ROI_TRACK_HEAD=dict( 35 | # NAME: "TrackHead" 36 | POOLER_RESOLUTION=7, 37 | PID_WEIGHT=-1, 38 | ), 39 | ), 40 | ) 41 | 42 | 43 | class RCNNFPNConfig(RCNNConfig): 44 | def __init__(self): 45 | super(RCNNFPNConfig, self).__init__() 46 | self._register_configuration(_config_dict) 47 | 48 | 49 | config = RCNNFPNConfig() 50 | -------------------------------------------------------------------------------- /cvpods/configs/retinanet_config.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- encoding: utf-8 -*- 3 | ''' 4 | @File : retinanet_config.py 5 | @Time : 2020/05/07 23:56:02 6 | @Author : Benjin Zhu 7 | @Contact : poodarchu@gmail.com 8 | @Last Modified by : Benjin Zhu 9 | @Last Modified time : 2020/05/07 23:56:02 10 | ''' 11 | 12 | from .base_detection_config import BaseDetectionConfig 13 | 14 | _config_dict = dict( 15 | MODEL=dict( 16 | # Backbone NAME: "build_retinanet_resnet_fpn_backbone" 17 | RESNETS=dict(OUT_FEATURES=["res3", "res4", "res5"]), 18 | FPN=dict(IN_FEATURES=["res3", "res4", "res5"]), 19 | ANCHOR_GENERATOR=dict( 20 | SIZES=[ 21 | [x, x * 2 ** (1.0 / 3), x * 2 ** (2.0 / 3)] 22 | for x in [32, 64, 128, 256, 512] 23 | ] 24 | ), 25 | RETINANET=dict( 26 | # This is the number of foreground classes. 27 | NUM_CLASSES=80, 28 | IN_FEATURES=["p3", "p4", "p5", "p6", "p7"], 29 | # Convolutions to use in the cls and bbox tower 30 | # NOTE: this doesn't include the last conv for logits 31 | NUM_CONVS=4, 32 | # IoU overlap ratio [bg, fg] for labeling anchors. 33 | # Anchors with < bg are labeled negative (0) 34 | # Anchors with >= bg and < fg are ignored (-1) 35 | # Anchors with >= fg are labeled positive (1) 36 | IOU_THRESHOLDS=[0.4, 0.5], 37 | IOU_LABELS=[0, -1, 1], 38 | # Prior prob for rare case (i.e. foreground) at the beginning of training. 39 | # This is used to set the bias for the logits layer of the classifier subnet. 40 | # This improves training stability in the case of heavy class imbalance. 41 | PRIOR_PROB=0.01, 42 | # Inference cls score threshold, only anchors with score > INFERENCE_TH are 43 | # considered for inference (to improve speed) 44 | SCORE_THRESH_TEST=0.05, 45 | TOPK_CANDIDATES_TEST=1000, 46 | NMS_THRESH_TEST=0.5, 47 | # Weights on (dx, dy, dw, dh) for normalizing Retinanet anchor regression targets 48 | BBOX_REG_WEIGHTS=(1.0, 1.0, 1.0, 1.0), 49 | # Loss parameters 50 | FOCAL_LOSS_GAMMA=2.0, 51 | FOCAL_LOSS_ALPHA=0.25, 52 | SMOOTH_L1_LOSS_BETA=0.1, 53 | ), 54 | ), 55 | ) 56 | 57 | 58 | class RetinaNetConfig(BaseDetectionConfig): 59 | def __init__(self): 60 | super(RetinaNetConfig, self).__init__() 61 | self._register_configuration(_config_dict) 62 | 63 | 64 | config = RetinaNetConfig() 65 | -------------------------------------------------------------------------------- /cvpods/configs/solo_config.py: -------------------------------------------------------------------------------- 1 | from .base_detection_config import BaseDetectionConfig 2 | 3 | _config_dict = dict( 4 | MODEL=dict( 5 | MASK_ON=True, 6 | PIXEL_MEAN=[103.530, 116.280, 123.675], # BGR FORMAT 7 | PIXEL_STD=[1.0, 1.0, 1.0], 8 | RESNETS=dict( 9 | DEPTH=50, 10 | OUT_FEATURES=["res2", "res3", "res4", "res5"], 11 | ), 12 | FPN=dict( 13 | IN_FEATURES=["res2", "res3", "res4", "res5"], 14 | OUT_CHANNELS=256, 15 | ), 16 | SOLO=dict( 17 | NUM_CLASSES=80, 18 | IN_FEATURES=["p2", "p3", "p4", "p5", "p6"], 19 | NUM_GRIDS=[40, 36, 24, 16, 12], # per level 20 | SCALE_RANGES=((1, 64), (32, 128), (64, 256), (128, 512), (256, 2048)), 21 | FEATURE_STRIDES=[8, 8, 16, 32, 32], 22 | # Given a gt: (cx, cy, w, h), the center region is controlled by 23 | # constant scale factors sigma: (cx, cy, sigma*w, sigma*h) 24 | SIGMA=0.2, 25 | HEAD=dict( 26 | TYPE="SOLOHead", # "SOLOHead", "DecoupledSOLOHead" 27 | SEG_FEAT_CHANNELS=256, 28 | STACKED_CONVS=7, 29 | PRIOR_PROB=0.01, 30 | NORM="GN", 31 | # The following two items are useful in the "DecoupledSOLOLightHead" 32 | USE_DCN_IN_TOWER=False, 33 | DCN_TYPE=None, 34 | ), 35 | # Loss parameters: 36 | LOSS_INS=dict( 37 | TYPE='DiceLoss', 38 | LOSS_WEIGHT=3.0 39 | ), 40 | LOSS_CAT=dict( 41 | TYPE='FocalLoss', 42 | GAMMA=2.0, 43 | ALPHA=0.25, 44 | LOSS_WEIGHT=1.0, 45 | ), 46 | # Inference parameters: 47 | SCORE_THRESH_TEST=0.1, 48 | MASK_THRESH_TEST=0.5, 49 | # NMS parameters: 50 | NMS_PER_IMAGE=500, 51 | NMS_KERNEL='gaussian', # gaussian/linear 52 | NMS_SIGMA=2.0, 53 | UPDATE_THRESH=0.05, 54 | DETECTIONS_PER_IMAGE=100, 55 | ), 56 | ), 57 | INPUT=dict( 58 | # SOLO for instance segmenation does not work with "polygon" mask_format 59 | MASK_FORMAT="bitmask", 60 | ) 61 | ) 62 | 63 | 64 | class SOLOConfig(BaseDetectionConfig): 65 | def __init__(self): 66 | super(SOLOConfig, self).__init__() 67 | self._register_configuration(_config_dict) 68 | 69 | 70 | config = SOLOConfig() 71 | -------------------------------------------------------------------------------- /cvpods/configs/solov2_config.py: -------------------------------------------------------------------------------- 1 | from .base_detection_config import BaseDetectionConfig 2 | 3 | _config_dict = dict( 4 | MODEL=dict( 5 | MASK_ON=True, 6 | PIXEL_MEAN=[103.530, 116.280, 123.675], # BGR FORMAT 7 | PIXEL_STD=[1.0, 1.0, 1.0], 8 | RESNETS=dict( 9 | DEPTH=50, 10 | OUT_FEATURES=["res2", "res3", "res4", "res5"], 11 | ), 12 | FPN=dict( 13 | IN_FEATURES=["res2", "res3", "res4", "res5"], 14 | OUT_CHANNELS=256, 15 | ), 16 | SOLOV2=dict( 17 | # Instance hyper-parameters 18 | INSTANCE_IN_FEATURES=["p2", "p3", "p4", "p5", "p6"], 19 | FPN_INSTANCE_STRIDES=[8, 8, 16, 32, 32], 20 | FPN_SCALE_RANGES=((1, 96), (48, 192), (96, 384), (192, 768), (384, 2048)), 21 | SIGMA=0.2, 22 | # Channel size for the instance head. 23 | INSTANCE_IN_CHANNELS=256, 24 | INSTANCE_CHANNELS=512, 25 | # Convolutions to use in the instance head. 26 | NUM_INSTANCE_CONVS=4, 27 | USE_DCN_IN_INSTANCE=False, 28 | TYPE_DCN='DCN', 29 | NUM_GRIDS=[40, 36, 24, 16, 12], 30 | # Number of foreground classes. 31 | NUM_CLASSES=80, 32 | NUM_KERNELS=256, 33 | NORM="GN", 34 | USE_COORD_CONV=True, 35 | PRIOR_PROB=0.01, 36 | # Mask hyper-parameters. 37 | # Channel size for the mask tower. 38 | MASK_IN_FEATURES=["p2", "p3", "p4", "p5"], 39 | MASK_IN_CHANNELS=256, 40 | MASK_CHANNELS=128, 41 | NUM_MASKS=256, # NUM_MASKS * kernel_size**2 = NUM_KERNELS 42 | # Test cfg. 43 | NMS_PRE=500, 44 | SCORE_THR=0.1, 45 | UPDATE_THR=0.05, 46 | MASK_THR=0.5, 47 | MAX_PER_IMG=100, 48 | # NMS type: matrix OR mask. 49 | NMS_TYPE="matrix", 50 | NMS_KERNEL="gaussian", 51 | NMS_SIGMA=2, 52 | # Loss cfg. 53 | LOSS=dict( 54 | FOCAL_USE_SIGMOID=True, 55 | FOCAL_ALPHA=0.25, 56 | FOCAL_GAMMA=2.0, 57 | FOCAL_WEIGHT=1.0, 58 | DICE_WEIGHT=3.0 59 | ) 60 | ), 61 | ), 62 | INPUT=dict( 63 | # SOLO for instance segmenation does not work with "polygon" mask_format 64 | MASK_FORMAT="bitmask", 65 | ) 66 | ) 67 | 68 | 69 | class SOLOV2Config(BaseDetectionConfig): 70 | def __init__(self): 71 | super(SOLOV2Config, self).__init__() 72 | self._register_configuration(_config_dict) 73 | 74 | 75 | config = SOLOV2Config() 76 | -------------------------------------------------------------------------------- /cvpods/configs/sparse_rcnn_config.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- encoding: utf-8 -*- 3 | ''' 4 | Xiangtai Li 5 | ''' 6 | 7 | from .rcnn_config import RCNNConfig 8 | 9 | _config_dict = dict( 10 | MODEL=dict( 11 | # BACKBONE=dict(NAME='build_resnet_backbone',), 12 | RESNETS=dict(OUT_FEATURES=["res2", "res3", "res4", "res5"],), 13 | FPN=dict(IN_FEATURES=["res2", "res3", "res4", "res5"]), 14 | ROI_HEADS=dict( 15 | # NAME: "StandardROIHeads" 16 | IN_FEATURES=["p2", "p3", "p4", "p5"], 17 | ), 18 | ROI_BOX_HEAD=dict( 19 | POOLER_TYPE="ROIAlignV2", 20 | POOLER_SAMPLING_RATIO=2, 21 | POOLER_RESOLUTION=7, 22 | ), 23 | SparseRCNN=dict( 24 | NUM_PROPOSALS=100, 25 | NUM_CLASSES=80, 26 | NHEADS=8, 27 | DROPOUT=0.0, 28 | DIM_FEEDFORWARD=2048, 29 | ACTIVATION='relu', 30 | HIDDEN_DIM=256, 31 | NUM_CLS=1, 32 | NUM_REG=3, 33 | NUM_HEADS=6, 34 | 35 | # Dynamic Conv. 36 | NUM_DYNAMIC=2, 37 | DIM_DYNAMIC=64, 38 | 39 | # Loss. 40 | CLASS_WEIGHT=2.0, 41 | GIOU_WEIGHT=2.0, 42 | L1_WEIGHT=5.0, 43 | DEEP_SUPERVISION=True, 44 | NO_OBJECT_WEIGHT=0.1, 45 | USE_FOCAL=True, 46 | ALPHA=0.25, 47 | GAMMA=2.0, 48 | PRIOR_PROB=0.01 49 | ) 50 | ), 51 | ) 52 | 53 | 54 | class SparseRCNNFPNConfig(RCNNConfig): 55 | def __init__(self): 56 | super(SparseRCNNFPNConfig, self).__init__() 57 | self._register_configuration(_config_dict) 58 | 59 | 60 | config = SparseRCNNFPNConfig() 61 | -------------------------------------------------------------------------------- /cvpods/configs/ssd_config.py: -------------------------------------------------------------------------------- 1 | from .base_detection_config import BaseDetectionConfig 2 | 3 | _config_dict = dict( 4 | MODEL=dict( 5 | PIXEL_MEAN=[123.675, 116.28, 103.53], # RGB FORMAT 6 | PIXEL_STD=[1.0, 1.0, 1.0], 7 | VGG=dict( 8 | ARCH='D', 9 | NORM="", 10 | NUM_CLASSES=None, 11 | OUT_FEATURES=["Conv4_3", "Conv7"], 12 | POOL_ARGS=dict( 13 | pool3=(2, 2, 0, True), # k, s, p, ceil_model 14 | pool5=(3, 1, 1, False) # k, s, p, ceil_model 15 | ), 16 | FC_TO_CONV=True, 17 | ), 18 | SSD=dict( 19 | NUM_CLASSES=80, 20 | IN_FEATURES=["Conv4_3", "Conv7"], 21 | EXTRA_LAYER_ARCH={ 22 | # the number after "S" and "S" to denote conv layer with stride=2 23 | "300": [256, 'S', 512, 128, 'S', 256, 128, 256, 128, 256], 24 | "512": [256, 'S', 512, 128, 'S', 256, 128, 'S', 256, 128, 'S', 256, 128, 256], 25 | }, 26 | IOU_THRESHOLDS=[0.5, 0.5], 27 | IOU_LABELS=[0, -1, 1], 28 | BBOX_REG_WEIGHTS=(10.0, 10.0, 5.0, 5.0), 29 | L2NORM_SCALE=20.0, 30 | # Loss parameters: 31 | LOSS_ALPHA=1.0, 32 | SMOOTH_L1_LOSS_BETA=1.0, 33 | NEGATIVE_POSITIVE_RATIO=3.0, 34 | # Inference parameters: 35 | SCORE_THRESH_TEST=0.02, 36 | NMS_THRESH_TEST=0.45, 37 | ), 38 | ) 39 | ) 40 | 41 | 42 | class SSDConfig(BaseDetectionConfig): 43 | def __init__(self): 44 | super(SSDConfig, self).__init__() 45 | self._register_configuration(_config_dict) 46 | 47 | 48 | config = SSDConfig() 49 | -------------------------------------------------------------------------------- /cvpods/configs/yolo_config.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- encoding: utf-8 -*- 3 | ''' 4 | @File : yolo_config.py 5 | @Time : 2020/05/07 23:55:49 6 | @Author : Benjin Zhu 7 | @Contact : poodarchu@gmail.com 8 | @Last Modified by : Benjin Zhu 9 | @Last Modified time : 2020/05/07 23:55:49 10 | ''' 11 | 12 | from .base_detection_config import BaseDetectionConfig 13 | 14 | _config_dict = dict( 15 | MODEL=dict( 16 | PIXEL_MEAN=(0.485, 0.456, 0.406), 17 | PIXEL_STD=(0.229, 0.224, 0.225), 18 | DARKNET=dict( 19 | DEPTH=53, 20 | STEM_OUT_CHANNELS=32, 21 | WEIGHTS="s3://generalDetection/cvpods/ImageNetPretrained/custom/darknet53.mix.pth", 22 | OUT_FEATURES=["dark3", "dark4", "dark5"] 23 | ), 24 | YOLO=dict( 25 | CLASSES=80, 26 | IN_FEATURES=["dark3", "dark4", "dark5"], 27 | ANCHORS=[ 28 | [[116, 90], [156, 198], [373, 326]], 29 | [[30, 61], [62, 45], [42, 119]], 30 | [[10, 13], [16, 30], [33, 23]], 31 | ], 32 | CONF_THRESHOLD=0.01, # TEST 33 | NMS_THRESHOLD=0.5, 34 | IGNORE_THRESHOLD=0.7, 35 | ), 36 | ), 37 | ) 38 | 39 | 40 | class YOLO3Config(BaseDetectionConfig): 41 | def __init__(self): 42 | super(YOLO3Config, self).__init__() 43 | self._register_configuration(_config_dict) 44 | 45 | 46 | config = YOLO3Config() 47 | -------------------------------------------------------------------------------- /cvpods/data/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | from .build import ( 3 | build_dataset, 4 | build_transform_gen, 5 | build_detection_test_loader, 6 | build_detection_train_loader, 7 | ) 8 | from .registry import DATASETS, TRANSFORMS, SAMPLERS 9 | 10 | from . import transforms # isort:skip 11 | # ensure the builtin datasets are registered 12 | from . import datasets, samplers # isort:skip 13 | 14 | 15 | __all__ = [k for k in globals().keys() if not k.startswith("_")] 16 | -------------------------------------------------------------------------------- /cvpods/data/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Megvii, Inc. and its affiliates. All Rights Reserved 2 | 3 | from .cityscapes import CityScapesDataset 4 | from .coco import COCODataset 5 | from .imagenet import ImageNetDataset 6 | from .voc import VOCDataset 7 | from .widerface import WiderFaceDataset 8 | from .lvis import LVISDataset 9 | from .citypersons import CityPersonsDataset 10 | from .crowdhuman import CrowdHumanDataset 11 | from .youtubevis import YTVisDataset 12 | from .ovis import OVisDataset 13 | from .coco_captions import COCOCaptionsDataset 14 | 15 | __all__ = [ 16 | "COCODataset", 17 | "VOCDataset", 18 | "CityScapesDataset", 19 | "ImageNetDataset", 20 | "WiderFaceDataset", 21 | "LVISDataset", 22 | "CityPersonsDataset", 23 | "CrowdHumanDataset", 24 | "YTVisDataset", 25 | "OVisDataset", 26 | "COCOCaptionsDataset" 27 | ] 28 | -------------------------------------------------------------------------------- /cvpods/data/datasets/ext/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lxtGH/BSSeg/77a6cf15150cc65cf2f21b124381dc8d4573f21b/cvpods/data/datasets/ext/__init__.py -------------------------------------------------------------------------------- /cvpods/data/registry.py: -------------------------------------------------------------------------------- 1 | from cvpods.utils import Registry 2 | 3 | DATASETS = Registry("datasets") 4 | TRANSFORMS = Registry("transforms") 5 | SAMPLERS = Registry("samplers") 6 | -------------------------------------------------------------------------------- /cvpods/data/samplers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | from .distributed_sampler import InferenceSampler, RepeatFactorTrainingSampler, TrainingSampler 3 | from .grouped_batch_sampler import GroupedBatchSampler 4 | from .sampler import DistributedSampler, GroupSampler, DistributedGroupSampler 5 | 6 | __all__ = [ 7 | "GroupedBatchSampler", 8 | "TrainingSampler", 9 | "InferenceSampler", 10 | "RepeatFactorTrainingSampler", 11 | "DistributedSampler", 12 | "GroupSampler", 13 | "DistributedGroupSampler", 14 | ] 15 | -------------------------------------------------------------------------------- /cvpods/data/samplers/grouped_batch_sampler.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | import numpy as np 3 | from torch.utils.data.sampler import BatchSampler, Sampler 4 | 5 | from ..registry import SAMPLERS 6 | 7 | 8 | @SAMPLERS.register() 9 | class GroupedBatchSampler(BatchSampler): 10 | """ 11 | Wraps another sampler to yield a mini-batch of indices. 12 | It enforces that the batch only contain elements from the same group. 13 | It also tries to provide mini-batches which follows an ordering which is 14 | as close as possible to the ordering from the original sampler. 15 | """ 16 | 17 | def __init__(self, sampler, group_ids, batch_size): 18 | """ 19 | Args: 20 | sampler (Sampler): Base sampler. 21 | group_ids (list[int]): If the sampler produces indices in range [0, N), 22 | `group_ids` must be a list of `N` ints which contains the group id of each sample. 23 | The group ids must be a set of integers in the range [0, num_groups). 24 | batch_size (int): Size of mini-batch. 25 | """ 26 | if not isinstance(sampler, Sampler): 27 | raise ValueError( 28 | "sampler should be an instance of " 29 | "torch.utils.data.Sampler, but got sampler={}".format(sampler) 30 | ) 31 | self.sampler = sampler 32 | self.group_ids = np.asarray(group_ids) 33 | assert self.group_ids.ndim == 1 34 | self.batch_size = batch_size 35 | groups = np.unique(self.group_ids).tolist() 36 | 37 | # buffer the indices of each group until batch size is reached 38 | self.buffer_per_group = {k: [] for k in groups} 39 | 40 | def __iter__(self): 41 | for idx in self.sampler: 42 | group_id = self.group_ids[idx] 43 | group_buffer = self.buffer_per_group[group_id] 44 | group_buffer.append(idx) 45 | if len(group_buffer) == self.batch_size: 46 | yield group_buffer[:] # yield a copy of the list 47 | del group_buffer[:] 48 | 49 | def __len__(self): 50 | raise NotImplementedError("len() of GroupedBatchSampler is not well-defined.") 51 | -------------------------------------------------------------------------------- /cvpods/data/transforms/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | from .transform import * 3 | from .transform_gen import * 4 | 5 | __all__ = [k for k in globals().keys() if not k.startswith("_")] 6 | -------------------------------------------------------------------------------- /cvpods/data/wrapped_dataset.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- encoding: utf-8 -*- 3 | ''' 4 | @File : wrapped_dataset.py 5 | @Time : 2020/05/07 23:54:57 6 | @Author : Benjin Zhu 7 | @Contact : poodarchu@gmail.com 8 | @Last Modified by : Benjin Zhu 9 | @Last Modified time : 2020/05/07 23:54:57 10 | ''' 11 | 12 | import numpy as np 13 | from types import SimpleNamespace 14 | from torch.utils.data.dataset import ConcatDataset as _ConcatDataset 15 | 16 | from .registry import DATASETS 17 | 18 | 19 | @DATASETS.register() 20 | class ConcatDataset(_ConcatDataset): 21 | """A wrapper of concatenated datasets. 22 | Same as :obj:`torch.utils.data.datasets.ConcatDataset`, but 23 | concat the group flag for image aspect ratio. 24 | Args: 25 | datasets (list[:obj:`Dataset`]): A list of datasets. 26 | """ 27 | 28 | def __init__(self, datasets): 29 | super(ConcatDataset, self).__init__(datasets) 30 | if hasattr(self.datasets[0], 'aspect_ratios'): 31 | aspect_ratios = [d.aspect_ratios for d in self.datasets] 32 | self.aspect_ratios = np.concatenate(aspect_ratios) 33 | if hasattr(self.datasets[0], 'meta'): 34 | self.meta = {} 35 | for d in self.datasets: 36 | self.meta.update(d.meta) 37 | self.meta = SimpleNamespace(**self.meta) 38 | 39 | 40 | @DATASETS.register() 41 | class RepeatDataset(object): 42 | """A wrapper of repeated datasets. 43 | The length of repeated datasets will be `times` larger than the original 44 | datasets. This is useful when the data loading time is long but the datasets 45 | is small. Using RepeatDataset can reduce the data loading time between 46 | epochs. 47 | Args: 48 | dataset (:obj:`Dataset`): The datasets to be repeated. 49 | times (int): Repeat times. 50 | """ 51 | 52 | def __init__(self, dataset, times): 53 | self.dataset = dataset 54 | self.times = times 55 | if hasattr(self.dataset, 'aspect_ratios'): 56 | self.aspect_ratios = np.tile(self.dataset.aspect_ratios, times) 57 | 58 | self._ori_len = len(self.dataset) 59 | 60 | def __getitem__(self, idx): 61 | return self.dataset[idx % self._ori_len] 62 | 63 | def __len__(self): 64 | return self.times * self._ori_len 65 | -------------------------------------------------------------------------------- /cvpods/engine/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | 3 | from .hooks import * 4 | from .launch import * 5 | from .predictor import * 6 | from .setup import * 7 | from .trainer import * 8 | 9 | __all__ = [k for k in globals().keys() if not k.startswith("_")] 10 | -------------------------------------------------------------------------------- /cvpods/evaluation/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | from .citypersons_evaluation import CityPersonsEvaluator 3 | from .cityscapes_evaluation import CityscapesInstanceEvaluator, CityscapesSemSegEvaluator 4 | from .crowdhuman_evaluation import CrowdHumanEvaluator 5 | from .coco_evaluation import COCOEvaluator 6 | from .evaluator import DatasetEvaluator, DatasetEvaluators, inference_context, inference_on_dataset 7 | from .lvis_evaluation import LVISEvaluator 8 | from .panoptic_evaluation import COCOPanopticEvaluator 9 | from .pascal_voc_evaluation import PascalVOCDetectionEvaluator 10 | from .rotated_coco_evaluation import RotatedCOCOEvaluator 11 | from .sem_seg_evaluation import SemSegEvaluator 12 | from .testing import print_csv_format, verify_results 13 | from .widerface_evaluation import WiderFaceEvaluator 14 | from .classification_evaluation import ClassificationEvaluator 15 | from .youtubevis_evaluation import YouTubeVISEvaluator 16 | 17 | __all__ = [k for k in globals().keys() if not k.startswith("_")] 18 | -------------------------------------------------------------------------------- /cvpods/evaluation/testing.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | import logging 3 | import numpy as np 4 | import pprint 5 | import sys 6 | from collections import Mapping, OrderedDict 7 | 8 | 9 | def print_csv_format(results): 10 | """ 11 | Print main metrics in a format similar to Detectron, 12 | so that they are easy to copypaste into a spreadsheet. 13 | 14 | Args: 15 | results (OrderedDict[dict]): task_name -> {metric -> score} 16 | """ 17 | assert isinstance(results, OrderedDict), results # unordered results cannot be properly printed 18 | logger = logging.getLogger(__name__) 19 | for task, res in results.items(): 20 | # Don't print "AP-category" metrics since they are usually not tracked. 21 | important_res = [(k, v) for k, v in res.items() if "-" not in k] 22 | logger.info("copypaste: Task: {}".format(task)) 23 | logger.info("copypaste: " + ",".join([k[0] for k in important_res])) 24 | logger.info("copypaste: " + ",".join(["{0:.4f}".format(k[1]) for k in important_res])) 25 | 26 | 27 | def verify_results(cfg, results): 28 | """ 29 | Args: 30 | results (OrderedDict[dict]): task_name -> {metric -> score} 31 | 32 | Returns: 33 | bool: whether the verification succeeds or not 34 | """ 35 | expected_results = cfg.TEST.EXPECTED_RESULTS 36 | if not len(expected_results): 37 | return True 38 | 39 | ok = True 40 | for task, metric, expected, tolerance in expected_results: 41 | actual = results[task][metric] 42 | if not np.isfinite(actual): 43 | ok = False 44 | diff = abs(actual - expected) 45 | if diff > tolerance: 46 | ok = False 47 | 48 | logger = logging.getLogger(__name__) 49 | if not ok: 50 | logger.error("Result verification failed!") 51 | logger.error("Expected Results: " + str(expected_results)) 52 | logger.error("Actual Results: " + pprint.pformat(results)) 53 | 54 | sys.exit(1) 55 | else: 56 | logger.info("Results verification passed.") 57 | return ok 58 | 59 | 60 | def flatten_results_dict(results): 61 | """ 62 | Expand a hierarchical dict of scalars into a flat dict of scalars. 63 | If results[k1][k2][k3] = v, the returned dict will have the entry 64 | {"k1/k2/k3": v}. 65 | 66 | Args: 67 | results (dict): 68 | """ 69 | r = {} 70 | for k, v in results.items(): 71 | if isinstance(v, Mapping): 72 | v = flatten_results_dict(v) 73 | for kk, vv in v.items(): 74 | r[k + "/" + kk] = vv 75 | else: 76 | r[k] = v 77 | return r 78 | -------------------------------------------------------------------------------- /cvpods/export/README.md: -------------------------------------------------------------------------------- 1 | 2 | This directory contains code to prepare a detectron2 model for deployment. 3 | Currently it supports exporting a detectron2 model to Caffe2 format through ONNX. 4 | 5 | Please see [documentation](https://detectron2.readthedocs.io/tutorials/deployment.html) for its usage. 6 | 7 | 8 | ### Acknowledgements 9 | 10 | Thanks to Mobile Vision team at Facebook for developing the conversion tools. 11 | -------------------------------------------------------------------------------- /cvpods/export/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from .api import * 4 | 5 | __all__ = [k for k in globals().keys() if not k.startswith("_")] 6 | -------------------------------------------------------------------------------- /cvpods/layers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | from .batch_norm import FrozenBatchNorm2d, NaiveSyncBatchNorm, get_activation, get_norm 3 | from .deform_conv import DeformConv, ModulatedDeformConv, DFConv2d 4 | from .deform_conv_with_off import DeformConvWithOff, ModulatedDeformConvWithOff 5 | from .mask_ops import paste_masks_in_image 6 | from .nms import (batched_nms, batched_softnms, generalized_batched_nms, batched_nms_rotated, 7 | ml_nms, nms_rotated, softnms, matrix_nms) 8 | 9 | from .position_encoding import position_encoding_dict 10 | from .blocks import CNNBlockBase, DepthwiseSeparableConv2d 11 | from .aspp import ASPP 12 | from .roi_align import ROIAlign, roi_align 13 | from .roi_align_rotated import ROIAlignRotated, roi_align_rotated 14 | from .shape_spec import ShapeSpec 15 | from .swap_align2nat import SwapAlign2Nat, swap_align2nat 16 | from .activation_funcs import Swish, MemoryEfficientSwish 17 | from .border_align import BorderAlign 18 | from .naive_group_norm import NaiveGroupNorm 19 | from .ms_deform_attn import MSDeformAttn 20 | from .crop_split import CropSplit 21 | from .crop_split_gt import CropSplitGT 22 | from .dynamic_weights import DynamicWeightsCat11 23 | from .saconv import ConvAWS2dLayer, SAConv2dLayer, SAConv2dNoGlobalContextLayer 24 | from .wrappers import ( 25 | cat, 26 | BatchNorm2d, 27 | Conv2d, 28 | Conv2dSamePadding, 29 | MaxPool2dSamePadding, 30 | SeparableConvBlock, 31 | ConvTranspose2d, 32 | interpolate, 33 | nonzero_tuple, 34 | cross_entropy 35 | ) 36 | 37 | 38 | __all__ = [k for k in globals().keys() if not k.startswith("_")] 39 | -------------------------------------------------------------------------------- /cvpods/layers/activation_funcs.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | 5 | # Ref: 6 | # https://medium.com/the-artificial-impostor/more-memory-efficient-swish-activation-function-e07c22c12a76 7 | class SwishImplementation(torch.autograd.Function): 8 | """ 9 | Swish activation function memory-efficient implementation. 10 | 11 | This implementation explicitly processes the gradient, it keeps a copy of the input tensor, 12 | and uses it to calculate the gradient during the back-propagation phase. 13 | """ 14 | @staticmethod 15 | def forward(ctx, i): 16 | result = i * torch.sigmoid(i) 17 | ctx.save_for_backward(i) 18 | return result 19 | 20 | @staticmethod 21 | def backward(ctx, grad_output): 22 | i = ctx.saved_variables[0] 23 | sigmoid_i = torch.sigmoid(i) 24 | return grad_output * (sigmoid_i * (1 + i * (1 - sigmoid_i))) 25 | 26 | 27 | class MemoryEfficientSwish(nn.Module): 28 | def forward(self, x): 29 | return SwishImplementation.apply(x) 30 | 31 | 32 | class Swish(nn.Module): 33 | """ 34 | Implement the Swish activation function. 35 | See: https://arxiv.org/abs/1710.05941 for more details. 36 | """ 37 | def forward(self, x): 38 | return x * torch.sigmoid(x) 39 | -------------------------------------------------------------------------------- /cvpods/layers/border_align.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from torch import nn 3 | from torch.autograd import Function 4 | from torch.autograd.function import once_differentiable 5 | from cvpods import _C 6 | 7 | 8 | class _BorderAlign(Function): 9 | @staticmethod 10 | def forward(ctx, input, boxes, wh, pool_size): 11 | output = _C.border_align_forward(input, boxes, wh, pool_size) 12 | ctx.pool_size = pool_size 13 | ctx.save_for_backward(input, boxes, wh) 14 | return output 15 | 16 | @staticmethod 17 | @once_differentiable 18 | def backward(ctx, grad_output): 19 | pool_size = ctx.pool_size 20 | input, boxes, wh = ctx.saved_tensors 21 | grad_input = _C.border_align_backward( 22 | grad_output, input, boxes, wh, pool_size) 23 | return grad_input, None, None, None 24 | 25 | 26 | border_align = _BorderAlign.apply 27 | 28 | 29 | class BorderAlign(nn.Module): 30 | def __init__(self, pool_size): 31 | super(BorderAlign, self).__init__() 32 | self.pool_size = pool_size 33 | 34 | def forward(self, feature, boxes): 35 | feature = feature.contiguous() 36 | boxes = boxes.contiguous() 37 | wh = (boxes[:, :, 2:] - boxes[:, :, :2]).contiguous() 38 | output = border_align(feature, boxes, wh, self.pool_size) 39 | return output 40 | 41 | def __repr__(self): 42 | tmpstr = self.__class__.__name__ 43 | return tmpstr 44 | -------------------------------------------------------------------------------- /cvpods/layers/conv_with_kaiming_uniform.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | 3 | from cvpods.layers import Conv2d 4 | from .deform_conv import DFConv2d 5 | from cvpods.layers.batch_norm import get_norm 6 | 7 | 8 | def conv_with_kaiming_uniform( 9 | norm=None, activation=None, 10 | use_deformable=False, use_sep=False): 11 | def make_conv( 12 | in_channels, out_channels, kernel_size, stride=1, dilation=1 13 | ): 14 | if use_deformable: 15 | conv_func = DFConv2d 16 | else: 17 | conv_func = Conv2d 18 | if use_sep: 19 | assert in_channels == out_channels 20 | groups = in_channels 21 | else: 22 | groups = 1 23 | conv = conv_func( 24 | in_channels, 25 | out_channels, 26 | kernel_size=kernel_size, 27 | stride=stride, 28 | padding=dilation * (kernel_size - 1) // 2, 29 | dilation=dilation, 30 | groups=groups, 31 | bias=(norm is None) 32 | ) 33 | if not use_deformable: 34 | # Caffe2 implementation uses XavierFill, which in fact 35 | # corresponds to kaiming_uniform_ in PyTorch 36 | nn.init.kaiming_uniform_(conv.weight, a=1) 37 | if norm is None: 38 | nn.init.constant_(conv.bias, 0) 39 | module = [conv,] 40 | if norm is not None and len(norm) > 0: 41 | if norm == "GN": 42 | norm_module = nn.GroupNorm(32, out_channels) 43 | else: 44 | norm_module = get_norm(norm, out_channels) 45 | module.append(norm_module) 46 | if activation is not None: 47 | module.append(nn.ReLU(inplace=True)) 48 | if len(module) > 1: 49 | return nn.Sequential(*module) 50 | return conv 51 | 52 | return make_conv 53 | -------------------------------------------------------------------------------- /cvpods/layers/crop_split.py: -------------------------------------------------------------------------------- 1 | 2 | import torch 3 | import torch.nn as nn 4 | from torch.autograd import Function 5 | from torch.autograd.function import once_differentiable 6 | from torch.nn.modules.utils import _pair 7 | from cvpods import _C 8 | 9 | class _CropSplit(Function): 10 | @staticmethod 11 | def forward(ctx, data, rois, c): 12 | height = data.shape[1] 13 | width = data.shape[2] 14 | n = data.shape[3] 15 | ctx.c = c 16 | ctx.height = height 17 | ctx.width = width 18 | ctx.n = n 19 | ctx.rois = _pair(rois) 20 | # print(height*width*n) 21 | output = data.new_zeros(height, width, n) 22 | _C.crop_split_forward(data, rois, output, height, width, c, n) 23 | # print('aa',rois[0]) 24 | 25 | # if data.requires_grad: 26 | # ctx.save_for_backward(data,rois) 27 | # print(rois.shape) 28 | # print(data.requires_grad, rois.requires_grad) 29 | return output 30 | 31 | @staticmethod 32 | @once_differentiable 33 | def backward(ctx, grad_output): 34 | # dtata,_ = ctx.saved_tensors 35 | 36 | c = ctx.c 37 | height = ctx.height 38 | width = ctx.width 39 | n = ctx.n 40 | rois = ctx.rois 41 | # print('bb', rois[0]) 42 | grad_input = torch.zeros((c*c, height, width, n), dtype=grad_output.dtype, device=grad_output.device) 43 | # grad_input = torch.zeros_like(data) 44 | _C.crop_split_backward(grad_output, rois, grad_input, height, width, c, n) 45 | # print(grad_output.requires_grad,grad_input.requires_grad) 46 | 47 | return grad_input, None, None 48 | 49 | crop_split = _CropSplit.apply 50 | 51 | class CropSplit(nn.Module): 52 | 53 | def __init__(self, c=2): 54 | super(CropSplit, self).__init__() 55 | self.c = c 56 | 57 | def forward(self, data, rois): 58 | return crop_split(data, rois, self.c) -------------------------------------------------------------------------------- /cvpods/layers/crop_split_gt.py: -------------------------------------------------------------------------------- 1 | 2 | import torch 3 | import torch.nn as nn 4 | from torch.autograd import Function 5 | from torch.autograd.function import once_differentiable 6 | from torch.nn.modules.utils import _pair 7 | from cvpods import _C 8 | 9 | class _CropSplitGT(Function): 10 | 11 | @staticmethod 12 | def forward(ctx, data, rois, c): 13 | height = data.shape[0] 14 | width = data.shape[1] 15 | n = data.shape[2] 16 | ctx.c = _pair(c) 17 | ctx.height = _pair(height) 18 | ctx.width = _pair(width) 19 | ctx.n = _pair(n) 20 | # ctx.rois = rois 21 | # print(height*width*n) 22 | output = data.new_zeros(height, width, n) 23 | _C.crop_split_gt_forward(data, rois, output, height, width, c, n) 24 | # print('aa',rois[0]) 25 | 26 | # ctx.save_for_backward(data,rois) 27 | # print(torch.max(output_gt)) 28 | # print('aa',output_gt.shape) 29 | # print(rois.shape) 30 | # print(data.requires_grad, rois.requires_grad) 31 | return output 32 | 33 | crop_split_gt = _CropSplitGT.apply 34 | 35 | class CropSplitGT(nn.Module): 36 | 37 | def __init__(self, c=2): 38 | super(CropSplitGT, self).__init__() 39 | self.c = c 40 | 41 | def forward(self, data, rois): 42 | return crop_split_gt(data, rois, self.c) 43 | -------------------------------------------------------------------------------- /cvpods/layers/csrc/README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | To add a new Op: 4 | 5 | 1. Create a new directory 6 | 2. Implement new ops there 7 | 3. Delcare its Python interface in `vision.cpp`. 8 | -------------------------------------------------------------------------------- /cvpods/layers/csrc/SwapAlign2Nat/SwapAlign2Nat.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | #pragma once 3 | #include 4 | 5 | namespace cvpods { 6 | 7 | #ifdef WITH_CUDA 8 | at::Tensor SwapAlign2Nat_forward_cuda( 9 | const at::Tensor& X, 10 | const int lambda_val, 11 | const float pad_val); 12 | 13 | at::Tensor SwapAlign2Nat_backward_cuda( 14 | const at::Tensor& gY, 15 | const int lambda_val, 16 | const int batch_size, 17 | const int channel, 18 | const int height, 19 | const int width); 20 | #endif 21 | 22 | inline at::Tensor SwapAlign2Nat_forward( 23 | const at::Tensor& X, 24 | const int lambda_val, 25 | const float pad_val) { 26 | if (X.type().is_cuda()) { 27 | #ifdef WITH_CUDA 28 | return SwapAlign2Nat_forward_cuda(X, lambda_val, pad_val); 29 | #else 30 | AT_ERROR("Not compiled with GPU support"); 31 | #endif 32 | } 33 | AT_ERROR("Not implemented on the CPU"); 34 | } 35 | 36 | inline at::Tensor SwapAlign2Nat_backward( 37 | const at::Tensor& gY, 38 | const int lambda_val, 39 | const int batch_size, 40 | const int channel, 41 | const int height, 42 | const int width) { 43 | if (gY.type().is_cuda()) { 44 | #ifdef WITH_CUDA 45 | return SwapAlign2Nat_backward_cuda( 46 | gY, lambda_val, batch_size, channel, height, width); 47 | #else 48 | AT_ERROR("Not compiled with GPU support"); 49 | #endif 50 | } 51 | AT_ERROR("Not implemented on the CPU"); 52 | } 53 | 54 | } // namespace cvpods 55 | -------------------------------------------------------------------------------- /cvpods/layers/csrc/border_align/border_align.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | #include 5 | 6 | namespace cvpods { 7 | 8 | at::Tensor border_align_cuda_forward( 9 | const at::Tensor& feature, 10 | const at::Tensor& boxes, 11 | const at::Tensor& wh, 12 | const int pool_size); 13 | 14 | 15 | at::Tensor border_align_cuda_backward( 16 | const at::Tensor& gradOutput, 17 | const at::Tensor& feature, 18 | const at::Tensor& boxes, 19 | const at::Tensor& wh, 20 | const int pool_size); 21 | 22 | 23 | at::Tensor BorderAlign_Forward( 24 | const at::Tensor& feature, 25 | const at::Tensor& boxes, 26 | const at::Tensor& wh, 27 | const int pool_size) { 28 | return border_align_cuda_forward(feature, boxes, wh, pool_size); 29 | } 30 | 31 | 32 | at::Tensor BorderAlign_Backward( 33 | const at::Tensor& gradOutput, 34 | const at::Tensor& feature, 35 | const at::Tensor& boxes, 36 | const at::Tensor& wh, 37 | const int pool_size) { 38 | return border_align_cuda_backward(gradOutput, feature, boxes, wh, pool_size); 39 | } 40 | 41 | } // namespace cvpods -------------------------------------------------------------------------------- /cvpods/layers/csrc/box_iou_rotated/box_iou_rotated.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | #pragma once 3 | #include 4 | 5 | namespace cvpods { 6 | 7 | at::Tensor box_iou_rotated_cpu( 8 | const at::Tensor& boxes1, 9 | const at::Tensor& boxes2); 10 | 11 | #ifdef WITH_CUDA 12 | at::Tensor box_iou_rotated_cuda( 13 | const at::Tensor& boxes1, 14 | const at::Tensor& boxes2); 15 | #endif 16 | 17 | // Interface for Python 18 | // inline is needed to prevent multiple function definitions when this header is 19 | // included by different cpps 20 | inline at::Tensor box_iou_rotated( 21 | const at::Tensor& boxes1, 22 | const at::Tensor& boxes2) { 23 | assert(boxes1.device().is_cuda() == boxes2.device().is_cuda()); 24 | if (boxes1.device().is_cuda()) { 25 | #ifdef WITH_CUDA 26 | return box_iou_rotated_cuda(boxes1, boxes2); 27 | #else 28 | AT_ERROR("Not compiled with GPU support"); 29 | #endif 30 | } 31 | 32 | return box_iou_rotated_cpu(boxes1, boxes2); 33 | } 34 | 35 | } // namespace cvpods 36 | -------------------------------------------------------------------------------- /cvpods/layers/csrc/box_iou_rotated/box_iou_rotated_cpu.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | #include "box_iou_rotated.h" 3 | #include "box_iou_rotated_utils.h" 4 | 5 | namespace cvpods { 6 | 7 | template 8 | void box_iou_rotated_cpu_kernel( 9 | const at::Tensor& boxes1, 10 | const at::Tensor& boxes2, 11 | at::Tensor& ious) { 12 | auto widths1 = boxes1.select(1, 2).contiguous(); 13 | auto heights1 = boxes1.select(1, 3).contiguous(); 14 | auto widths2 = boxes2.select(1, 2).contiguous(); 15 | auto heights2 = boxes2.select(1, 3).contiguous(); 16 | 17 | at::Tensor areas1 = widths1 * heights1; 18 | at::Tensor areas2 = widths2 * heights2; 19 | 20 | auto num_boxes1 = boxes1.size(0); 21 | auto num_boxes2 = boxes2.size(0); 22 | 23 | for (int i = 0; i < num_boxes1; i++) { 24 | for (int j = 0; j < num_boxes2; j++) { 25 | ious[i * num_boxes2 + j] = single_box_iou_rotated( 26 | boxes1[i].data_ptr(), boxes2[j].data_ptr()); 27 | } 28 | } 29 | } 30 | 31 | at::Tensor box_iou_rotated_cpu( 32 | const at::Tensor& boxes1, 33 | const at::Tensor& boxes2) { 34 | auto num_boxes1 = boxes1.size(0); 35 | auto num_boxes2 = boxes2.size(0); 36 | at::Tensor ious = 37 | at::empty({num_boxes1 * num_boxes2}, boxes1.options().dtype(at::kFloat)); 38 | 39 | box_iou_rotated_cpu_kernel(boxes1, boxes2, ious); 40 | 41 | // reshape from 1d array to 2d array 42 | auto shape = std::vector{num_boxes1, num_boxes2}; 43 | return ious.reshape(shape); 44 | } 45 | 46 | } // namespace cvpods 47 | -------------------------------------------------------------------------------- /cvpods/layers/csrc/correlation/correlation.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | namespace cvpods { 5 | 6 | // CUDA forward declarations 7 | std::vector corr_cuda_forward( 8 | torch::Tensor fmap1, 9 | torch::Tensor fmap2, 10 | torch::Tensor coords, 11 | int radius); 12 | 13 | std::vector corr_cuda_backward( 14 | torch::Tensor fmap1, 15 | torch::Tensor fmap2, 16 | torch::Tensor coords, 17 | torch::Tensor corr_grad, 18 | int radius); 19 | 20 | // C++ interface 21 | #define CHECK_CUDA(x) TORCH_CHECK(x.type().is_cuda(), #x " must be a CUDA tensor") 22 | #define CHECK_CONTIGUOUS(x) TORCH_CHECK(x.is_contiguous(), #x " must be contiguous") 23 | #define CHECK_INPUT(x) CHECK_CUDA(x); CHECK_CONTIGUOUS(x) 24 | 25 | std::vector corr_forward( 26 | torch::Tensor fmap1, 27 | torch::Tensor fmap2, 28 | torch::Tensor coords, 29 | int radius) { 30 | CHECK_INPUT(fmap1); 31 | CHECK_INPUT(fmap2); 32 | CHECK_INPUT(coords); 33 | 34 | return corr_cuda_forward(fmap1, fmap2, coords, radius); 35 | } 36 | 37 | 38 | std::vector corr_backward( 39 | torch::Tensor fmap1, 40 | torch::Tensor fmap2, 41 | torch::Tensor coords, 42 | torch::Tensor corr_grad, 43 | int radius) { 44 | CHECK_INPUT(fmap1); 45 | CHECK_INPUT(fmap2); 46 | CHECK_INPUT(coords); 47 | CHECK_INPUT(corr_grad); 48 | 49 | return corr_cuda_backward(fmap1, fmap2, coords, corr_grad, radius); 50 | } 51 | 52 | } -------------------------------------------------------------------------------- /cvpods/layers/csrc/crop_split/crop_split.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | 4 | namespace cvpods { 5 | 6 | void CropSplitForward(const at::Tensor data, 7 | const at::Tensor bbox, 8 | at::Tensor out, 9 | const int height, 10 | const int width, 11 | const int num_cell, 12 | const int num_bbox); 13 | 14 | void CropSplitBack(const at::Tensor top_grad, 15 | const at::Tensor bbox, 16 | at::Tensor bottom_grad, 17 | const int height, 18 | const int width, 19 | const int num_cell, 20 | const int num_bbox); 21 | 22 | 23 | void crop_split_cuda_forward(const at::Tensor input, 24 | const at::Tensor bbox, 25 | at::Tensor out, 26 | const int height, 27 | const int width, 28 | const int num_cell, 29 | const int num_bbox) 30 | { 31 | TORCH_CHECK(input.is_contiguous(), "input tensor has to be contiguous"); 32 | 33 | CropSplitForward(input, bbox, out, height, width, num_cell, num_bbox); 34 | } 35 | 36 | void crop_split_cuda_backward(const at::Tensor out_grad, 37 | const at::Tensor bbox, 38 | at::Tensor bottom_grad, 39 | const int height, 40 | const int width, 41 | const int num_cell, 42 | const int num_bbox) 43 | { 44 | TORCH_CHECK(out_grad.is_contiguous(), "out_grad tensor has to be contiguous"); 45 | 46 | CropSplitBack(out_grad, bbox, bottom_grad, height, width, num_cell, num_bbox); 47 | } 48 | } -------------------------------------------------------------------------------- /cvpods/layers/csrc/crop_split_gt/crop_split_gt.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | 4 | namespace cvpods { 5 | 6 | void CropSplitGtForward(const at::Tensor data, 7 | const at::Tensor bbox, 8 | at::Tensor out, 9 | const int height, 10 | const int width, 11 | const int num_cell, 12 | const int num_bbox); 13 | 14 | void CropSplitGtBack(const at::Tensor top_grad, 15 | const at::Tensor bbox, 16 | at::Tensor bottom_grad, 17 | const int height, 18 | const int width, 19 | const int num_cell, 20 | const int num_bbox); 21 | 22 | 23 | void crop_split_gt_cuda_forward(const at::Tensor input, 24 | const at::Tensor bbox, 25 | at::Tensor out, 26 | const int height, 27 | const int width, 28 | const int num_cell, 29 | const int num_bbox) 30 | { 31 | TORCH_CHECK(input.is_contiguous(), "input tensor has to be contiguous"); 32 | 33 | CropSplitGtForward(input, bbox, out, height, width, num_cell, num_bbox); 34 | } 35 | 36 | void crop_split_gt_cuda_backward(const at::Tensor out_grad, 37 | const at::Tensor bbox, 38 | at::Tensor bottom_grad, 39 | const int height, 40 | const int width, 41 | const int num_cell, 42 | const int num_bbox) 43 | { 44 | TORCH_CHECK(out_grad.is_contiguous(), "out_grad tensor has to be contiguous"); 45 | 46 | CropSplitGtBack(out_grad, bbox, bottom_grad, height, width, num_cell, num_bbox); 47 | } 48 | 49 | 50 | } -------------------------------------------------------------------------------- /cvpods/layers/csrc/cuda_version.cu: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | 3 | #include 4 | 5 | namespace cvpods { 6 | int get_cudart_version() { 7 | return CUDART_VERSION; 8 | } 9 | } // namespace cvpods 10 | -------------------------------------------------------------------------------- /cvpods/layers/csrc/deformable_attn/ms_deform_attn.h: -------------------------------------------------------------------------------- 1 | /*! 2 | ************************************************************************************************** 3 | * Deformable DETR 4 | * Copyright (c) 2020 SenseTime. All Rights Reserved. 5 | * Licensed under the Apache License, Version 2.0 [see LICENSE for details] 6 | ************************************************************************************************** 7 | * Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0 8 | ************************************************************************************************** 9 | */ 10 | 11 | #pragma once 12 | #include 13 | #include 14 | 15 | 16 | namespace cvpods { 17 | 18 | at::Tensor ms_deform_attn_cuda_forward( 19 | const at::Tensor &value, 20 | const at::Tensor &spatial_shapes, 21 | const at::Tensor &level_start_index, 22 | const at::Tensor &sampling_loc, 23 | const at::Tensor &attn_weight, 24 | const int im2col_step); 25 | 26 | 27 | std::vector ms_deform_attn_cuda_backward( 28 | const at::Tensor &value, 29 | const at::Tensor &spatial_shapes, 30 | const at::Tensor &level_start_index, 31 | const at::Tensor &sampling_loc, 32 | const at::Tensor &attn_weight, 33 | const at::Tensor &grad_output, 34 | const int im2col_step); 35 | 36 | 37 | std::vector ms_deform_attn_cuda_backward( 38 | const at::Tensor &value, 39 | const at::Tensor &spatial_shapes, 40 | const at::Tensor &level_start_index, 41 | const at::Tensor &sampling_loc, 42 | const at::Tensor &attn_weight, 43 | const at::Tensor &grad_output, 44 | const int im2col_step); 45 | 46 | 47 | at::Tensor 48 | ms_deform_attn_forward( 49 | const at::Tensor &value, 50 | const at::Tensor &spatial_shapes, 51 | const at::Tensor &level_start_index, 52 | const at::Tensor &sampling_loc, 53 | const at::Tensor &attn_weight, 54 | const int im2col_step) 55 | { 56 | if (value.type().is_cuda()) 57 | { 58 | return ms_deform_attn_cuda_forward( 59 | value, spatial_shapes, level_start_index, sampling_loc, attn_weight, im2col_step); 60 | } 61 | AT_ERROR("Not implemented on the CPU"); 62 | } 63 | 64 | std::vector 65 | ms_deform_attn_backward( 66 | const at::Tensor &value, 67 | const at::Tensor &spatial_shapes, 68 | const at::Tensor &level_start_index, 69 | const at::Tensor &sampling_loc, 70 | const at::Tensor &attn_weight, 71 | const at::Tensor &grad_output, 72 | const int im2col_step) 73 | { 74 | if (value.type().is_cuda()) 75 | { 76 | return ms_deform_attn_cuda_backward( 77 | value, spatial_shapes, level_start_index, sampling_loc, attn_weight, grad_output, im2col_step); 78 | } 79 | AT_ERROR("Not implemented on the CPU"); 80 | } 81 | } 82 | -------------------------------------------------------------------------------- /cvpods/layers/csrc/masked_conv2d/masked_conv2d.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | 4 | namespace cvpods { 5 | 6 | void masked_im2col_forward(const at::Tensor im, const at::Tensor mask_h_idx, 7 | const at::Tensor mask_w_idx, at::Tensor col, 8 | const int kernel_h, const int kernel_w, 9 | const int pad_h, const int pad_w); 10 | 11 | void masked_col2im_forward(const at::Tensor col, const at::Tensor mask_h_idx, 12 | const at::Tensor mask_w_idx, at::Tensor im, int height, 13 | int width, int channels); 14 | 15 | } // namespace cvpods 16 | -------------------------------------------------------------------------------- /cvpods/layers/csrc/ml_nms/ml_nms.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #pragma once 3 | #include 4 | 5 | namespace cvpods { 6 | #ifdef WITH_CUDA 7 | at::Tensor ml_nms_cuda(const at::Tensor boxes, float nms_overlap_thresh); 8 | #endif 9 | 10 | 11 | inline at::Tensor ml_nms(const at::Tensor& dets, 12 | const at::Tensor& scores, 13 | const at::Tensor& labels, 14 | const float threshold) { 15 | 16 | if (dets.type().is_cuda()) { 17 | #ifdef WITH_CUDA 18 | // TODO raise error if not compiled with CUDA 19 | if (dets.numel() == 0) 20 | return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU)); 21 | auto b = at::cat({dets, scores.unsqueeze(1), labels.unsqueeze(1)}, 1); 22 | return ml_nms_cuda(b, threshold); 23 | #else 24 | AT_ERROR("Not compiled with GPU support"); 25 | #endif 26 | } 27 | AT_ERROR("CPU version not implemented"); 28 | } 29 | 30 | } 31 | -------------------------------------------------------------------------------- /cvpods/layers/csrc/nms_rotated/nms_rotated.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | #pragma once 3 | #include 4 | 5 | namespace cvpods { 6 | 7 | at::Tensor nms_rotated_cpu( 8 | const at::Tensor& dets, 9 | const at::Tensor& scores, 10 | const float iou_threshold); 11 | 12 | #ifdef WITH_CUDA 13 | at::Tensor nms_rotated_cuda( 14 | const at::Tensor& dets, 15 | const at::Tensor& scores, 16 | const float iou_threshold); 17 | #endif 18 | 19 | // Interface for Python 20 | // inline is needed to prevent multiple function definitions when this header is 21 | // included by different cpps 22 | inline at::Tensor nms_rotated( 23 | const at::Tensor& dets, 24 | const at::Tensor& scores, 25 | const float iou_threshold) { 26 | assert(dets.device().is_cuda() == scores.device().is_cuda()); 27 | if (dets.device().is_cuda()) { 28 | #ifdef WITH_CUDA 29 | return nms_rotated_cuda(dets, scores, iou_threshold); 30 | #else 31 | AT_ERROR("Not compiled with GPU support"); 32 | #endif 33 | } 34 | 35 | return nms_rotated_cpu(dets, scores, iou_threshold); 36 | } 37 | 38 | } // namespace cvpods 39 | -------------------------------------------------------------------------------- /cvpods/layers/csrc/nms_rotated/nms_rotated_cpu.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | #include "../box_iou_rotated/box_iou_rotated_utils.h" 3 | #include "nms_rotated.h" 4 | 5 | namespace cvpods { 6 | 7 | template 8 | at::Tensor nms_rotated_cpu_kernel( 9 | const at::Tensor& dets, 10 | const at::Tensor& scores, 11 | const float iou_threshold) { 12 | // nms_rotated_cpu_kernel is modified from torchvision's nms_cpu_kernel, 13 | // however, the code in this function is much shorter because 14 | // we delegate the IoU computation for rotated boxes to 15 | // the single_box_iou_rotated function in box_iou_rotated_utils.h 16 | AT_ASSERTM(!dets.type().is_cuda(), "dets must be a CPU tensor"); 17 | AT_ASSERTM(!scores.type().is_cuda(), "scores must be a CPU tensor"); 18 | AT_ASSERTM( 19 | dets.type() == scores.type(), "dets should have the same type as scores"); 20 | 21 | if (dets.numel() == 0) { 22 | return at::empty({0}, dets.options().dtype(at::kLong)); 23 | } 24 | 25 | auto order_t = std::get<1>(scores.sort(0, /* descending=*/true)); 26 | 27 | auto ndets = dets.size(0); 28 | at::Tensor suppressed_t = at::zeros({ndets}, dets.options().dtype(at::kByte)); 29 | at::Tensor keep_t = at::zeros({ndets}, dets.options().dtype(at::kLong)); 30 | 31 | auto suppressed = suppressed_t.data_ptr(); 32 | auto keep = keep_t.data_ptr(); 33 | auto order = order_t.data_ptr(); 34 | 35 | int64_t num_to_keep = 0; 36 | 37 | for (int64_t _i = 0; _i < ndets; _i++) { 38 | auto i = order[_i]; 39 | if (suppressed[i] == 1) { 40 | continue; 41 | } 42 | 43 | keep[num_to_keep++] = i; 44 | 45 | for (int64_t _j = _i + 1; _j < ndets; _j++) { 46 | auto j = order[_j]; 47 | if (suppressed[j] == 1) { 48 | continue; 49 | } 50 | 51 | auto ovr = single_box_iou_rotated( 52 | dets[i].data_ptr(), dets[j].data_ptr()); 53 | if (ovr >= iou_threshold) { 54 | suppressed[j] = 1; 55 | } 56 | } 57 | } 58 | return keep_t.narrow(/*dim=*/0, /*start=*/0, /*length=*/num_to_keep); 59 | } 60 | 61 | at::Tensor nms_rotated_cpu( 62 | const at::Tensor& dets, 63 | const at::Tensor& scores, 64 | const float iou_threshold) { 65 | auto result = at::empty({0}, dets.options()); 66 | 67 | AT_DISPATCH_FLOATING_TYPES(dets.scalar_type(), "nms_rotated", [&] { 68 | result = nms_rotated_cpu_kernel(dets, scores, iou_threshold); 69 | }); 70 | return result; 71 | } 72 | 73 | } // namespace cvpods 74 | -------------------------------------------------------------------------------- /cvpods/layers/csrc/sigmoid_focal_loss/SigmoidFocalLoss.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | 4 | namespace cvpods { 5 | #ifdef WITH_CUDA 6 | at::Tensor SigmoidFocalLoss_forward_cuda( 7 | const at::Tensor& logits, 8 | const at::Tensor& targets, 9 | const int num_classes, 10 | const float gamma, 11 | const float alpha); 12 | 13 | at::Tensor SigmoidFocalLoss_backward_cuda( 14 | const at::Tensor& logits, 15 | const at::Tensor& targets, 16 | const at::Tensor& d_losses, 17 | const int num_classes, 18 | const float gamma, 19 | const float alpha); 20 | #endif 21 | 22 | // 23 | // Interface for Python 24 | inline at::Tensor SigmoidFocalLoss_forward( 25 | const at::Tensor& logits, 26 | const at::Tensor& targets, 27 | const int num_classes, 28 | const float gamma, 29 | const float alpha) { 30 | if (logits.type().is_cuda()) { 31 | #ifdef WITH_CUDA 32 | return SigmoidFocalLoss_forward_cuda(logits, targets, num_classes, gamma, alpha); 33 | #else 34 | AT_ERROR("Not compiled with GPU support"); 35 | #endif 36 | } 37 | AT_ERROR("Not implemented on the CPU"); 38 | } 39 | 40 | inline at::Tensor SigmoidFocalLoss_backward( 41 | const at::Tensor& logits, 42 | const at::Tensor& targets, 43 | const at::Tensor& d_losses, 44 | const int num_classes, 45 | const float gamma, 46 | const float alpha) { 47 | if (logits.type().is_cuda()) { 48 | #ifdef WITH_CUDA 49 | return SigmoidFocalLoss_backward_cuda(logits, targets, d_losses, num_classes, gamma, alpha); 50 | #else 51 | AT_ERROR("Not compiled with GPU support"); 52 | #endif 53 | } 54 | AT_ERROR("Not implemented on the CPU"); 55 | } 56 | 57 | } 58 | -------------------------------------------------------------------------------- /cvpods/layers/deform_conv_with_off.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding:utf-8 -*- 3 | import torch 4 | import torch.nn as nn 5 | 6 | from .deform_conv import DeformConv, ModulatedDeformConv 7 | 8 | 9 | class DeformConvWithOff(nn.Module): 10 | 11 | def __init__(self, in_channels, out_channels, 12 | kernel_size=3, stride=1, padding=1, 13 | dilation=1, deformable_groups=1): 14 | super(DeformConvWithOff, self).__init__() 15 | self.offset_conv = nn.Conv2d( 16 | in_channels, 17 | deformable_groups * 2 * kernel_size * kernel_size, 18 | kernel_size=kernel_size, 19 | stride=stride, 20 | padding=padding, 21 | ) 22 | self.dcn = DeformConv( 23 | in_channels, out_channels, kernel_size=kernel_size, 24 | stride=stride, padding=padding, dilation=dilation, 25 | deformable_groups=deformable_groups, 26 | ) 27 | 28 | def forward(self, input): 29 | offset = self.offset_conv(input) 30 | output = self.dcn(input, offset) 31 | return output 32 | 33 | 34 | class ModulatedDeformConvWithOff(nn.Module): 35 | 36 | def __init__(self, in_channels, out_channels, 37 | kernel_size=3, stride=1, padding=1, 38 | dilation=1, deformable_groups=1, bias=True, norm=None, activation=None,): 39 | super(ModulatedDeformConvWithOff, self).__init__() 40 | self.offset_mask_conv = nn.Conv2d( 41 | in_channels, 42 | deformable_groups * 3 * kernel_size * kernel_size, 43 | kernel_size=kernel_size, 44 | stride=stride, 45 | padding=padding, 46 | ) 47 | self.dcnv2 = ModulatedDeformConv( 48 | in_channels, out_channels, kernel_size=kernel_size, 49 | stride=stride, padding=padding, dilation=dilation, 50 | deformable_groups=deformable_groups, bias=bias, norm=norm, activation=activation 51 | ) 52 | 53 | def forward(self, input): 54 | x = self.offset_mask_conv(input) 55 | o1, o2, mask = torch.chunk(x, 3, dim=1) 56 | offset = torch.cat((o1, o2), dim=1) 57 | mask = torch.sigmoid(mask) 58 | output = self.dcnv2(input, offset, mask) 59 | return output 60 | -------------------------------------------------------------------------------- /cvpods/layers/point_transformer.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | import numpy as np 5 | 6 | 7 | def index_points(points, idx): 8 | """ 9 | Input: 10 | points: input points data, [B, N, C] 11 | idx: sample index data, [B, S, [K]] 12 | Return: 13 | new_points:, indexed points data, [B, S, [K], C] 14 | """ 15 | raw_size = idx.size() 16 | idx = idx.reshape(raw_size[0], -1) 17 | res = torch.gather(points, 1, idx[..., None].expand(-1, -1, points.size(-1))) 18 | return res.reshape(*raw_size, -1) 19 | 20 | 21 | def square_distance(src, dst): 22 | """ 23 | Calculate Euclid distance between each two points. 24 | src^T * dst = xn * xm + yn * ym + zn * zm; 25 | sum(src^2, dim=-1) = xn*xn + yn*yn + zn*zn; 26 | sum(dst^2, dim=-1) = xm*xm + ym*ym + zm*zm; 27 | dist = (xn - xm)^2 + (yn - ym)^2 + (zn - zm)^2 28 | = sum(src**2,dim=-1)+sum(dst**2,dim=-1)-2*src^T*dst 29 | Input: 30 | src: source points, [B, N, C] 31 | dst: target points, [B, M, C] 32 | Output: 33 | dist: per-point square distance, [B, N, M] 34 | """ 35 | return torch.sum((src[:, :, None] - dst[:, None]) ** 2, dim=-1) 36 | 37 | 38 | class TransformerBlock(nn.Module): 39 | def __init__(self, d_points, d_model, k) -> None: 40 | super().__init__() 41 | self.fc1 = nn.Linear(d_points, d_model) 42 | self.fc2 = nn.Linear(d_model, d_points) 43 | self.fc_delta = nn.Sequential( 44 | nn.Linear(3, d_model), 45 | nn.ReLU(), 46 | nn.Linear(d_model, d_model) 47 | ) 48 | self.fc_gamma = nn.Sequential( 49 | nn.Linear(d_model, d_model), 50 | nn.ReLU(), 51 | nn.Linear(d_model, d_model) 52 | ) 53 | self.w_qs = nn.Linear(d_model, d_model, bias=False) 54 | self.w_ks = nn.Linear(d_model, d_model, bias=False) 55 | self.w_vs = nn.Linear(d_model, d_model, bias=False) 56 | self.k = k 57 | 58 | # xyz: b x n x 3, features: b x n x f 59 | def forward(self, xyz, features): 60 | dists = square_distance(xyz, xyz) 61 | knn_idx = dists.argsort()[:, :, :self.k] # b x n x k 62 | knn_xyz = index_points(xyz, knn_idx) 63 | 64 | pre = features 65 | x = self.fc1(features) 66 | q, k, v = self.w_qs(x), index_points(self.w_ks(x), knn_idx), index_points(self.w_vs(x), knn_idx) 67 | 68 | pos_enc = self.fc_delta(xyz[:, :, None] - knn_xyz) # b x n x k x f 69 | 70 | attn = self.fc_gamma(q[:, :, None] - k + pos_enc) 71 | attn = F.softmax(attn / np.sqrt(k.size(-1)), dim=-2) # b x n x k x f 72 | 73 | res = torch.einsum('bmnf,bmnf->bmf', attn, v + pos_enc) 74 | res = self.fc2(res) + pre 75 | return res, attn 76 | 77 | -------------------------------------------------------------------------------- /cvpods/layers/rotated_boxes.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | from __future__ import absolute_import, division, print_function, unicode_literals 3 | 4 | # import torch 5 | from cvpods import _C 6 | 7 | 8 | def pairwise_iou_rotated(boxes1, boxes2): 9 | """ 10 | Return intersection-over-union (Jaccard index) of boxes. 11 | 12 | Both sets of boxes are expected to be in 13 | (x_center, y_center, width, height, angle) format. 14 | 15 | Arguments: 16 | boxes1 (Tensor[N, 5]) 17 | boxes2 (Tensor[M, 5]) 18 | 19 | Returns: 20 | iou (Tensor[N, M]): the NxM matrix containing the pairwise 21 | IoU values for every element in boxes1 and boxes2 22 | """ 23 | return _C.box_iou_rotated(boxes1, boxes2) 24 | -------------------------------------------------------------------------------- /cvpods/layers/shape_spec.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 3 | from collections import namedtuple 4 | 5 | 6 | class ShapeSpec(namedtuple("_ShapeSpec", ["channels", "height", "width", "stride"])): 7 | """ 8 | A simple structure that contains basic shape specification about a tensor. 9 | It is often used as the auxiliary inputs/outputs of models, 10 | to obtain the shape inference ability among pytorch modules. 11 | 12 | Attributes: 13 | channels: 14 | height: 15 | width: 16 | stride: 17 | """ 18 | 19 | def __new__(cls, *, channels=None, height=None, width=None, stride=None): 20 | return super().__new__(cls, channels, height, width, stride) 21 | -------------------------------------------------------------------------------- /cvpods/layers/swap_align2nat.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | from torch import nn 3 | from torch.autograd import Function 4 | from torch.autograd.function import once_differentiable 5 | 6 | from cvpods import _C 7 | 8 | 9 | class _SwapAlign2Nat(Function): 10 | @staticmethod 11 | def forward(ctx, X, lambda_val, pad_val): 12 | ctx.lambda_val = lambda_val 13 | ctx.input_shape = X.size() 14 | 15 | Y = _C.swap_align2nat_forward(X, lambda_val, pad_val) 16 | return Y 17 | 18 | @staticmethod 19 | @once_differentiable 20 | def backward(ctx, gY): 21 | lambda_val = ctx.lambda_val 22 | bs, ch, h, w = ctx.input_shape 23 | 24 | gX = _C.swap_align2nat_backward(gY, lambda_val, bs, ch, h, w) 25 | 26 | return gX, None, None 27 | 28 | 29 | swap_align2nat = _SwapAlign2Nat.apply 30 | 31 | 32 | class SwapAlign2Nat(nn.Module): 33 | """ 34 | The op `SwapAlign2Nat` described in https://arxiv.org/abs/1903.12174. 35 | Given an input tensor that predicts masks of shape (N, C=VxU, H, W), 36 | apply the op, it will return masks of shape (N, V'xU', H', W') where 37 | the unit lengths of (V, U) and (H, W) are swapped, and the mask representation 38 | is transformed from aligned to natural. 39 | Args: 40 | lambda_val (int): the relative unit length ratio between (V, U) and (H, W), 41 | as we always have larger unit lengths for (V, U) than (H, W), 42 | lambda_val is always >= 1. 43 | pad_val (float): padding value for the values falling outside of the input 44 | tensor, default set to -6 as sigmoid(-6) is ~0, indicating 45 | that is no masks outside of the tensor. 46 | """ 47 | 48 | def __init__(self, lambda_val, pad_val=-6.0): 49 | super(SwapAlign2Nat, self).__init__() 50 | self.lambda_val = lambda_val 51 | self.pad_val = pad_val 52 | 53 | def forward(self, X): 54 | return swap_align2nat(X, self.lambda_val, self.pad_val) 55 | 56 | def __repr__(self): 57 | tmpstr = self.__class__.__name__ + "(" 58 | tmpstr += "lambda_val=" + str(self.lambda_val) 59 | tmpstr += ", pad_val=" + str(self.pad_val) 60 | tmpstr += ")" 61 | return tmpstr 62 | -------------------------------------------------------------------------------- /cvpods/modeling/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | import torch 3 | 4 | from cvpods.layers import ShapeSpec 5 | 6 | # from .anchor_generator import build_anchor_generator 7 | from .backbone import FPN, Backbone, ResNet, ResNetBlockBase, build_resnet_backbone, make_stage 8 | from .meta_arch import GeneralizedRCNN, PanopticFPN, ProposalNetwork, RetinaNet, SemanticSegmentor 9 | from .postprocessing import detector_postprocess 10 | from .roi_heads import ROIHeads, StandardROIHeads 11 | from .test_time_augmentation import DatasetMapperTTA, GeneralizedRCNNWithTTA 12 | 13 | _EXCLUDE = {"torch", "ShapeSpec"} 14 | __all__ = [k for k in globals().keys() if k not in _EXCLUDE and not k.startswith("_")] 15 | 16 | assert ( 17 | torch.Tensor([1]) == torch.Tensor([2]) 18 | ).dtype == torch.bool, ("Your Pytorch is too old. " 19 | "Please update to contain https://github.com/pytorch/pytorch/pull/21113") 20 | -------------------------------------------------------------------------------- /cvpods/modeling/backbone/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | 3 | from .backbone import Backbone 4 | from .fpn import FPN, build_retinanet_resnet_fpn_p5_backbone 5 | from .resnet import ResNet, ResNetBlockBase, build_resnet_backbone, make_stage, build_resnet_deeplab_backbone 6 | from .darknet import Darknet, build_darknet_backbone 7 | from .efficientnet import EfficientNet, build_efficientnet_backbone 8 | from .bifpn import BiFPN, build_efficientnet_bifpn_backbone 9 | from .dynamic_arch import DynamicNetwork, build_dynamic_backbone 10 | from .sf_fpn import build_resnet_sf_fpn_backbone 11 | from .transformer import Transformer 12 | from .swin import build_swin_backbone, build_swin_fpn_backbone, build_retinanet_swin_fpn_backbone 13 | # TODO can expose more resnet blocks after careful consideration 14 | -------------------------------------------------------------------------------- /cvpods/modeling/backbone/backbone.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- encoding: utf-8 -*- 3 | ''' 4 | @File : backbone.py 5 | @Time : 2020/05/07 23:58:08 6 | @Author : Facebook, Inc. and its affiliates. 7 | @Contact : poodarchu@gmail.com 8 | @Last Modified by : Benjin Zhu 9 | @Last Modified time : 2020/05/07 23:58:08 10 | ''' 11 | 12 | from abc import ABCMeta, abstractmethod 13 | 14 | import torch.nn as nn 15 | 16 | from cvpods.layers import ShapeSpec 17 | 18 | __all__ = ["Backbone"] 19 | 20 | 21 | class Backbone(nn.Module, metaclass=ABCMeta): 22 | """ 23 | Abstract base class for network backbones. 24 | """ 25 | def __init__(self): 26 | """ 27 | The `__init__` method of any subclass can specify its own set of arguments. 28 | """ 29 | super().__init__() 30 | 31 | @abstractmethod 32 | def forward(self): 33 | """ 34 | Subclasses must override this method, but adhere to the same return type. 35 | 36 | Returns: 37 | dict[str->Tensor]: mapping from feature name (e.g., "res2") to tensor 38 | """ 39 | pass 40 | 41 | @property 42 | def size_divisibility(self): 43 | """ 44 | Some backbones require the input height and width to be divisible by a 45 | specific integer. This is typically true for encoder / decoder type networks 46 | with lateral connection (e.g., FPN) for which feature maps need to match 47 | dimension in the "bottom up" and "top down" paths. Set to 0 if no specific 48 | input size divisibility is required. 49 | """ 50 | return 0 51 | 52 | def output_shape(self): 53 | """ 54 | Returns: 55 | dict[str->ShapeSpec] 56 | """ 57 | # this is a backward-compatible default 58 | return { 59 | name: ShapeSpec(channels=self._out_feature_channels[name], 60 | stride=self._out_feature_strides[name]) 61 | for name in self._out_features 62 | } 63 | -------------------------------------------------------------------------------- /cvpods/modeling/backbone/dynamic_arch/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # build for dynamic networks 3 | # @Author: yanwei.li 4 | 5 | from .dynamic_backbone import DynamicNetwork, build_dynamic_backbone -------------------------------------------------------------------------------- /cvpods/modeling/backbone/dynamic_arch/cal_op_flops.py: -------------------------------------------------------------------------------- 1 | # Count Operation MFLOPs when fix batch to 1 2 | # @author: yanwei.li 3 | 4 | 5 | def count_Conv_flop( 6 | in_h, in_w, in_channel, out_channel, 7 | kernel_size, is_bias=False, stride=1, groups=1 8 | ): 9 | out_h = in_h // stride 10 | out_w = in_w // stride 11 | bias_ops = 1 if is_bias else 0 12 | kernel_ops = kernel_size[0] * kernel_size[1] * (in_channel // groups) 13 | delta_ops = (kernel_ops + bias_ops) * out_channel * out_h * out_w 14 | return delta_ops / 1e6 15 | 16 | 17 | def count_Linear_flop(in_num, out_num, is_bias): 18 | weight_ops = in_num * out_num 19 | bias_ops = out_num if is_bias else 0 20 | delta_ops = weight_ops + bias_ops 21 | return delta_ops / 1e6 22 | 23 | 24 | def count_BN_flop(in_h, in_w, in_channel, is_affine): 25 | multi_affine = 2 if is_affine else 1 26 | delta_ops = multi_affine * in_h * in_w * in_channel 27 | return delta_ops / 1e6 28 | 29 | 30 | def count_ReLU_flop(in_h, in_w, in_channel): 31 | delta_ops = in_h * in_w * in_channel 32 | return delta_ops / 1e6 33 | 34 | 35 | def count_Pool2d_flop(in_h, in_w, out_channel, kernel_size, stride): 36 | out_h = in_h // stride 37 | out_w = in_w // stride 38 | kernel_ops = kernel_size[0] * kernel_size[1] 39 | delta_ops = kernel_ops * out_w * out_h * out_channel 40 | return delta_ops / 1e6 41 | 42 | 43 | def count_ConvBNReLU_flop( 44 | in_h, in_w, in_channel, out_channel, 45 | kernel_size, is_bias=False, stride=1, 46 | groups=1, is_affine=True 47 | ): 48 | flops = 0.0 49 | flops += count_Conv_flop( 50 | in_h, in_w, in_channel, out_channel, 51 | kernel_size, is_bias, stride, groups 52 | ) 53 | in_h = in_h // stride 54 | in_w = in_w // stride 55 | flops += count_BN_flop(in_h, in_w, out_channel, is_affine) 56 | flops += count_ReLU_flop(in_h, in_w, out_channel) 57 | return flops 58 | -------------------------------------------------------------------------------- /cvpods/modeling/basenet/__init__.py: -------------------------------------------------------------------------------- 1 | from .basenet import basenet 2 | -------------------------------------------------------------------------------- /cvpods/modeling/losses/__init__.py: -------------------------------------------------------------------------------- 1 | from .focal_loss import ( 2 | sigmoid_focal_loss, sigmoid_focal_loss_jit, sigmoid_focal_loss_star, 3 | sigmoid_focal_loss_star_jit) 4 | from .iou_loss import IOULoss, iou_loss 5 | from .reg_l1_loss import reg_l1_loss 6 | from .sigmoid_focal_loss import SigmoidFocalLoss, sigmoid_focal_loss_cuda 7 | from .smooth_l1_loss import smooth_l1_loss 8 | from .dice_loss import dice_loss, weighted_dice_loss -------------------------------------------------------------------------------- /cvpods/modeling/losses/reg_l1_loss.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding:utf-8 -*- 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | 6 | from cvpods.modeling.nn_utils.feature_utils import gather_feature 7 | 8 | 9 | class reg_l1_loss(nn.Module): 10 | 11 | def __init__(self): 12 | super(reg_l1_loss, self).__init__() 13 | 14 | def forward(self, output, mask, index, target): 15 | pred = gather_feature(output, index, use_transform=True) 16 | mask = mask.unsqueeze(dim=2).expand_as(pred).float() 17 | # loss = F.l1_loss(pred * mask, target * mask, reduction='elementwise_mean') 18 | loss = F.l1_loss(pred * mask, target * mask, reduction='sum') 19 | loss = loss / (mask.sum() + 1e-4) 20 | return loss 21 | -------------------------------------------------------------------------------- /cvpods/modeling/meta_arch/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 3 | 4 | # import all the meta_arch, so they will be registered 5 | 6 | from .centernet import CenterNet 7 | from .borderdet import BorderDet 8 | from .panoptic_fpn import PanopticFPN 9 | from .rcnn import GeneralizedRCNN, ProposalNetwork 10 | from .reppoints import RepPoints 11 | from .semantic_seg import SemanticSegmentor, SemSegFPNHead 12 | from .ssd import SSD 13 | from .tensormask import TensorMask 14 | from .yolov3 import YOLOv3 15 | 16 | from .solo.solo import SOLO 17 | from .solo.solov2 import SOLOv2 18 | from .solo.solo_decoupled import DecoupledSOLO 19 | from cvpods.modeling.meta_arch.conditionalInst.conditionalInst import CondInst 20 | from cvpods.modeling.meta_arch.sparsercnn.sparse_rcnn import SparseRCNN 21 | from cvpods.modeling.meta_arch.retinanet.retinanet_sepc import RetinaNetSEPC 22 | from cvpods.modeling.meta_arch.retinanet.retinanet import RetinaNet 23 | from cvpods.modeling.meta_arch.fcos.fcos import FCOS 24 | from cvpods.modeling.meta_arch.fcos.fcos_sepc import FCOSSEPC 25 | from cvpods.modeling.meta_arch.detr.detr import DETR 26 | 27 | 28 | from .efficientdet import EfficientDet 29 | from .pointrend import ( 30 | PointRendROIHeads, 31 | CoarseMaskHead, 32 | StandardPointHead, 33 | PointRendSemSegHead, 34 | ) 35 | from .dynamic4seg import DynamicNet4Seg 36 | from .fcn import FCNHead 37 | -------------------------------------------------------------------------------- /cvpods/modeling/meta_arch/boundary_mask_rcnn/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lxtGH/BSSeg/77a6cf15150cc65cf2f21b124381dc8d4573f21b/cvpods/modeling/meta_arch/boundary_mask_rcnn/__init__.py -------------------------------------------------------------------------------- /cvpods/modeling/meta_arch/conditionalInst/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lxtGH/BSSeg/77a6cf15150cc65cf2f21b124381dc8d4573f21b/cvpods/modeling/meta_arch/conditionalInst/__init__.py -------------------------------------------------------------------------------- /cvpods/modeling/meta_arch/deformable_detr.py: -------------------------------------------------------------------------------- 1 | # To Do: implement the deformable detr 2 | # Xiangtai Li 3 | import torch 4 | import torch.nn.functional as F 5 | from torch import nn 6 | 7 | from cvpods.layers import ShapeSpec, position_encoding_dict 8 | from cvpods.modeling.backbone import Transformer 9 | from cvpods.modeling.matcher import HungarianMatcher 10 | from cvpods.structures import Boxes, ImageList, Instances 11 | from cvpods.structures import boxes as box_ops 12 | from cvpods.layers.box_ops import generalized_box_iou 13 | from cvpods.utils import comm 14 | from cvpods.layers.misc import accuracy 15 | 16 | 17 | -------------------------------------------------------------------------------- /cvpods/modeling/meta_arch/detr/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lxtGH/BSSeg/77a6cf15150cc65cf2f21b124381dc8d4573f21b/cvpods/modeling/meta_arch/detr/__init__.py -------------------------------------------------------------------------------- /cvpods/modeling/meta_arch/fcos/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lxtGH/BSSeg/77a6cf15150cc65cf2f21b124381dc8d4573f21b/cvpods/modeling/meta_arch/fcos/__init__.py -------------------------------------------------------------------------------- /cvpods/modeling/meta_arch/flownet/util.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch.nn.functional as F 3 | 4 | try: 5 | from spatial_correlation_sampler import spatial_correlation_sample 6 | except ImportError as e: 7 | import warnings 8 | with warnings.catch_warnings(): 9 | warnings.filterwarnings("default", category=ImportWarning) 10 | warnings.warn("failed to load custom correlation module" 11 | "which is needed for FlowNetC", ImportWarning) 12 | 13 | 14 | def conv(batchNorm, in_planes, out_planes, kernel_size=3, stride=1): 15 | if batchNorm: 16 | return nn.Sequential( 17 | nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=(kernel_size-1)//2, bias=False), 18 | nn.BatchNorm2d(out_planes), 19 | nn.LeakyReLU(0.1,inplace=True) 20 | ) 21 | else: 22 | return nn.Sequential( 23 | nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=(kernel_size-1)//2, bias=True), 24 | nn.LeakyReLU(0.1,inplace=True) 25 | ) 26 | 27 | 28 | def predict_flow(in_planes): 29 | return nn.Conv2d(in_planes,2,kernel_size=3,stride=1,padding=1,bias=False) 30 | 31 | 32 | def deconv(in_planes, out_planes): 33 | return nn.Sequential( 34 | nn.ConvTranspose2d(in_planes, out_planes, kernel_size=4, stride=2, padding=1, bias=False), 35 | nn.LeakyReLU(0.1,inplace=True) 36 | ) 37 | 38 | 39 | def correlate(input1, input2): 40 | out_corr = spatial_correlation_sample(input1, 41 | input2, 42 | kernel_size=1, 43 | patch_size=21, 44 | stride=1, 45 | padding=0, 46 | dilation_patch=2) 47 | # collate dimensions 1 and 2 in order to be treated as a 48 | # regular 4D tensor 49 | b, ph, pw, h, w = out_corr.size() 50 | out_corr = out_corr.view(b, ph * pw, h, w)/input1.size(1) 51 | return F.leaky_relu_(out_corr, 0.1) 52 | 53 | 54 | def crop_like(input, target): 55 | if input.size()[2:] == target.size()[2:]: 56 | return input 57 | else: 58 | return input[:, :, :target.size(2), :target.size(3)] 59 | -------------------------------------------------------------------------------- /cvpods/modeling/meta_arch/imagenet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from torch import nn 4 | 5 | from cvpods.layers import ShapeSpec 6 | from cvpods.structures import ImageList 7 | 8 | 9 | def accuracy(output, target, topk=(1,)): 10 | """Computes the accuracy over the k top predictions for the specified values of k""" 11 | with torch.no_grad(): 12 | maxk = max(topk) 13 | batch_size = target.size(0) 14 | 15 | _, pred = output.topk(maxk, 1, True, True) 16 | pred = pred.t() 17 | correct = pred.eq(target.view(1, -1).expand_as(pred)) 18 | 19 | res = [] 20 | for k in topk: 21 | correct_k = correct[:k].view(-1).float().sum(0, keepdim=True) 22 | res.append(correct_k.mul_(100.0 / batch_size)) 23 | return res 24 | 25 | 26 | class Classification(nn.Module): 27 | """ 28 | ImageNet classification module. 29 | Weights of this model can be used as pretrained weights of any models in cvpods. 30 | """ 31 | def __init__(self, cfg): 32 | super(Classification, self).__init__() 33 | 34 | self.device = torch.device(cfg.MODEL.DEVICE) 35 | 36 | self.network = cfg.build_backbone( 37 | cfg, input_shape=ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN))) 38 | 39 | self.loss_evaluator = nn.CrossEntropyLoss() 40 | 41 | pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view( 42 | 3, 1, 1) 43 | pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view( 44 | 3, 1, 1) 45 | self.normalizer = lambda x: (x - pixel_mean) / pixel_std 46 | 47 | self.to(self.device) 48 | 49 | def forward(self, batched_inputs): 50 | images = self.preprocess_image(batched_inputs) 51 | 52 | preds = self.network(images.tensor)["linear"] 53 | 54 | if self.training: 55 | labels = torch.tensor([gi["category_id"] for gi in batched_inputs]).cuda() 56 | losses = self.loss_evaluator(preds, labels) 57 | acc1, acc5 = accuracy(preds, labels, topk=(1, 5)) 58 | 59 | return { 60 | "loss_cls": losses, 61 | "Acc@1": acc1, 62 | "Acc@5": acc5, 63 | } 64 | else: 65 | return preds 66 | 67 | def preprocess_image(self, batched_inputs): 68 | """ 69 | Normalize, pad and batch the input images. 70 | """ 71 | images = [x["image"].float().to(self.device) for x in batched_inputs] 72 | images = [self.normalizer(x.div(255)) for x in images] 73 | images = ImageList.from_tensors(images, self.network.size_divisibility) 74 | return images 75 | -------------------------------------------------------------------------------- /cvpods/modeling/meta_arch/panopticfcn/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lxtGH/BSSeg/77a6cf15150cc65cf2f21b124381dc8d4573f21b/cvpods/modeling/meta_arch/panopticfcn/__init__.py -------------------------------------------------------------------------------- /cvpods/modeling/meta_arch/panopticfcn/utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding:utf-8 -*- 3 | import torch 4 | from functools import partial 5 | 6 | def topk_score(scores, K=40, score_shape=None): 7 | """ 8 | get top K point in score map 9 | """ 10 | batch, channel, height, width = score_shape 11 | 12 | # get topk score and its index in every H x W(channel dim) feature map 13 | topk_scores, topk_inds = torch.topk(scores.reshape(batch, channel, -1), K) 14 | 15 | topk_inds = topk_inds % (height * width) 16 | topk_ys = (topk_inds // width).float() 17 | topk_xs = (topk_inds % width).int().float() 18 | 19 | # get all topk in in a batch 20 | topk_score, index = torch.topk(topk_scores.reshape(batch, -1), K) 21 | # div by K because index is grouped by K(C x K shape) 22 | topk_clses = index // K 23 | topk_inds = gather_feature(topk_inds.view(batch, -1, 1), index).reshape(batch, K) 24 | topk_ys = gather_feature(topk_ys.reshape(batch, -1, 1), index).reshape(batch, K) 25 | topk_xs = gather_feature(topk_xs.reshape(batch, -1, 1), index).reshape(batch, K) 26 | 27 | return topk_score, topk_inds, topk_clses, topk_ys, topk_xs 28 | 29 | 30 | def gather_feature(fmap, index, mask=None, use_transform=False): 31 | if use_transform: 32 | # change a (N, C, H, W) tenor to (N, HxW, C) shape 33 | batch, channel = fmap.shape[:2] 34 | fmap = fmap.view(batch, channel, -1).permute((0, 2, 1)).contiguous() 35 | 36 | dim = fmap.size(-1) 37 | index = index.unsqueeze(len(index.shape)).expand(*index.shape, dim) 38 | fmap = fmap.gather(dim=1, index=index) 39 | if mask is not None: 40 | mask = mask.unsqueeze(2).expand_as(fmap) 41 | fmap = fmap[mask] 42 | fmap = fmap.reshape(-1, dim) 43 | return fmap 44 | 45 | 46 | def multi_apply(func, *args, **kwargs): 47 | pfunc = partial(func, **kwargs) if kwargs else func 48 | map_results = map(pfunc, *args) 49 | return tuple(map(list, zip(*map_results))) 50 | -------------------------------------------------------------------------------- /cvpods/modeling/meta_arch/retinanet/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lxtGH/BSSeg/77a6cf15150cc65cf2f21b124381dc8d4573f21b/cvpods/modeling/meta_arch/retinanet/__init__.py -------------------------------------------------------------------------------- /cvpods/modeling/meta_arch/solo/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lxtGH/BSSeg/77a6cf15150cc65cf2f21b124381dc8d4573f21b/cvpods/modeling/meta_arch/solo/__init__.py -------------------------------------------------------------------------------- /cvpods/modeling/meta_arch/sparsercnn/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lxtGH/BSSeg/77a6cf15150cc65cf2f21b124381dc8d4573f21b/cvpods/modeling/meta_arch/sparsercnn/__init__.py -------------------------------------------------------------------------------- /cvpods/modeling/nn_utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lxtGH/BSSeg/77a6cf15150cc65cf2f21b124381dc8d4573f21b/cvpods/modeling/nn_utils/__init__.py -------------------------------------------------------------------------------- /cvpods/modeling/nn_utils/activation_count.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | 3 | import logging 4 | import typing 5 | from collections import defaultdict 6 | import torch.nn as nn 7 | 8 | from .jit_handles import generic_activation_jit, get_jit_model_analysis 9 | 10 | # A dictionary that maps supported operations to their activation count handles. 11 | _SUPPORTED_OPS: typing.Dict[str, typing.Callable] = { 12 | "aten::_convolution": generic_activation_jit("conv"), 13 | "aten::addmm": generic_activation_jit("addmm"), 14 | } 15 | 16 | 17 | def activation_count( 18 | model: nn.Module, 19 | inputs: typing.Tuple[object, ...], 20 | supported_ops: typing.Union[typing.Dict[str, typing.Callable], None] = None, 21 | ) -> typing.Tuple[typing.DefaultDict[str, float], typing.Counter[str]]: 22 | """ 23 | Given a model and an input to the model, compute the total number of 24 | activations of the model. Note the input should have a batch size of 1. 25 | 26 | Args: 27 | model (nn.Module): The model to compute activation counts. 28 | inputs (tuple): Inputs that are passed to `model` to count activations. 29 | Inputs need to be in a tuple. 30 | supported_ops (dict(str,Callable) or None) : By default, we count 31 | activation for convolution and fully connected layers. Users can 32 | provide customized supported_ops if desired. 33 | 34 | Returns: 35 | tuple[defaultdict, Counter]: A dictionary that records the number of 36 | activation (mega) for each operation and a Counter that records the 37 | number of skipped operations. 38 | """ 39 | assert isinstance(inputs, tuple), "Inputs need to be in a tuple." 40 | if not supported_ops: 41 | supported_ops = _SUPPORTED_OPS.copy() 42 | 43 | # Run activation count. 44 | total_activation_count, skipped_ops = get_jit_model_analysis( 45 | model, inputs, supported_ops 46 | ) 47 | 48 | # Log for skipped operations. 49 | if len(skipped_ops) > 0: 50 | for op, freq in skipped_ops.items(): 51 | logging.warning("Skipped operation {} {} time(s)".format(op, freq)) 52 | 53 | # Convert activation count to mega count. 54 | final_count = defaultdict(float) 55 | for op in total_activation_count: 56 | final_count[op] = total_activation_count[op] / 1e6 57 | 58 | return final_count, skipped_ops 59 | -------------------------------------------------------------------------------- /cvpods/modeling/nn_utils/feature_utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding:utf-8 -*- 3 | 4 | 5 | def gather_feature(fmap, index, mask=None, use_transform=False): 6 | """ 7 | used for Centernet 8 | """ 9 | if use_transform: 10 | # change a (N, C, H, W) tenor to (N, HxW, C) shape 11 | batch, channel = fmap.shape[:2] 12 | fmap = fmap.view(batch, channel, -1).permute((0, 2, 1)).contiguous() 13 | 14 | dim = fmap.size(-1) 15 | index = index.unsqueeze(len(index.shape)).expand(*index.shape, dim) 16 | fmap = fmap.gather(dim=1, index=index) 17 | if mask is not None: 18 | # this part is not called in Res18 dcn COCO 19 | mask = mask.unsqueeze(2).expand_as(fmap) 20 | fmap = fmap[mask] 21 | fmap = fmap.reshape(-1, dim) 22 | return fmap 23 | -------------------------------------------------------------------------------- /cvpods/modeling/nn_utils/flop_count.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | 3 | import logging 4 | import typing 5 | from collections import defaultdict 6 | import torch.nn as nn 7 | 8 | from .jit_handles import ( 9 | addmm_flop_jit, conv_flop_jit, einsum_flop_jit, get_jit_model_analysis, matmul_flop_jit) 10 | 11 | # A dictionary that maps supported operations to their flop count jit handles. 12 | _SUPPORTED_OPS: typing.Dict[str, typing.Callable] = { 13 | "aten::addmm": addmm_flop_jit, 14 | "aten::_convolution": conv_flop_jit, 15 | "aten::einsum": einsum_flop_jit, 16 | "aten::matmul": matmul_flop_jit, 17 | } 18 | 19 | 20 | def flop_count( 21 | model: nn.Module, 22 | inputs: typing.Tuple[object, ...], 23 | supported_ops: typing.Union[typing.Dict[str, typing.Callable], None] = None, 24 | ) -> typing.Tuple[typing.DefaultDict[str, float], typing.Counter[str]]: 25 | """ 26 | Given a model and an input to the model, compute the Gflops of the given 27 | model. Note the input should have a batch size of 1. 28 | 29 | Args: 30 | model (nn.Module): The model to compute flop counts. 31 | inputs (tuple): Inputs that are passed to `model` to count flops. 32 | Inputs need to be in a tuple. 33 | supported_ops (dict(str,Callable) or None) : By default, we count flops 34 | for convolution layers, fully connected layers, torch.matmul and 35 | torch.einsum operations. We define a FLOP as a single atomic 36 | Multiply-Add. Users can provide customized supported_ops for 37 | counting flops if desired. 38 | 39 | Returns: 40 | tuple[defaultdict, Counter]: A dictionary that records the number of 41 | gflops for each operation and a Counter that records the number of 42 | skipped operations. 43 | """ 44 | assert isinstance(inputs, tuple), "Inputs need to be in a tuple." 45 | if not supported_ops: 46 | supported_ops = _SUPPORTED_OPS.copy() 47 | 48 | # Run flop count. 49 | total_flop_counter, skipped_ops = get_jit_model_analysis( 50 | model, inputs, supported_ops 51 | ) 52 | 53 | # Log for skipped operations. 54 | if len(skipped_ops) > 0: 55 | for op, freq in skipped_ops.items(): 56 | logging.warning("Skipped operation {} {} time(s)".format(op, freq)) 57 | 58 | # Convert flop count to gigaflops. 59 | final_count = defaultdict(float) 60 | for op in total_flop_counter: 61 | final_count[op] = total_flop_counter[op] / 1e9 62 | 63 | return final_count, skipped_ops 64 | -------------------------------------------------------------------------------- /cvpods/modeling/nn_utils/parameter_count.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | 3 | import typing 4 | from collections import defaultdict 5 | import tabulate 6 | from torch import nn 7 | 8 | 9 | def parameter_count(model: nn.Module) -> typing.DefaultDict[str, int]: 10 | """ 11 | Count parameters of a model and its submodules. 12 | 13 | Args: 14 | model: a torch module 15 | 16 | Returns: 17 | dict (str-> int): the key is either a parameter name or a module name. 18 | The value is the number of elements in the parameter, or in all 19 | parameters of the module. The key "" corresponds to the total 20 | number of parameters of the model. 21 | """ 22 | r = defaultdict(int) 23 | for name, prm in model.named_parameters(): 24 | size = prm.numel() 25 | name = name.split(".") 26 | for k in range(0, len(name) + 1): 27 | prefix = ".".join(name[:k]) 28 | r[prefix] += size 29 | return r 30 | 31 | 32 | def parameter_count_table(model: nn.Module, max_depth: int = 3) -> str: 33 | """ 34 | Format the parameter count of the model (and its submodules or parameters) 35 | in a nice table. 36 | 37 | Args: 38 | model: a torch module 39 | max_depth (int): maximum depth to recursively print submodules or 40 | parameters 41 | 42 | Returns: 43 | str: the table to be printed 44 | """ 45 | count: typing.DefaultDict[str, int] = parameter_count(model) 46 | param_shape: typing.Dict[str, typing.Tuple] = { 47 | k: tuple(v.shape) for k, v in model.named_parameters() 48 | } 49 | 50 | table: typing.List[typing.Tuple] = [] 51 | 52 | def format_size(x: int) -> str: 53 | if x > 1e5: 54 | return "{:.1f}M".format(x / 1e6) 55 | if x > 1e2: 56 | return "{:.1f}K".format(x / 1e3) 57 | return str(x) 58 | 59 | def fill(lvl: int, prefix: str) -> None: 60 | if lvl >= max_depth: 61 | return 62 | for name, v in count.items(): 63 | if name.count(".") == lvl and name.startswith(prefix): 64 | indent = " " * (lvl + 1) 65 | if name in param_shape: 66 | table.append((indent + name, indent + str(param_shape[name]))) 67 | else: 68 | table.append((indent + name, indent + format_size(v))) 69 | fill(lvl + 1, name + ".") 70 | 71 | table.append(("model", format_size(count.pop("")))) 72 | fill(0, "") 73 | 74 | old_ws = tabulate.PRESERVE_WHITESPACE 75 | tabulate.PRESERVE_WHITESPACE = True 76 | tab = tabulate.tabulate( 77 | table, headers=["name", "#elements or shape"], tablefmt="pipe" 78 | ) 79 | tabulate.PRESERVE_WHITESPACE = old_ws 80 | return tab 81 | -------------------------------------------------------------------------------- /cvpods/modeling/nn_utils/scale_grad.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding:utf-8 -*- 3 | from torch.autograd.function import Function 4 | 5 | 6 | class _ScaleGradient(Function): 7 | 8 | @staticmethod 9 | def forward(ctx, input, scale): 10 | ctx.scale = scale 11 | return input 12 | 13 | @staticmethod 14 | def backward(ctx, grad_output): 15 | return grad_output * ctx.scale, None 16 | -------------------------------------------------------------------------------- /cvpods/modeling/proposal_generator/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | from .rpn import RPN -------------------------------------------------------------------------------- /cvpods/modeling/proposal_generator/build.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | from cvpods.utils.registry import Registry 3 | 4 | PROPOSAL_GENERATOR_REGISTRY = Registry("PROPOSAL_GENERATOR") 5 | PROPOSAL_GENERATOR_REGISTRY.__doc__ = """ 6 | Registry for proposal generator, which produces object proposals from feature maps. 7 | 8 | The registered object will be called with `obj(cfg, input_shape)`. 9 | The call should return a `nn.Module` object. 10 | """ 11 | 12 | from . import rpn, rrpn # noqa F401 isort:skip 13 | 14 | 15 | def build_proposal_generator(cfg, input_shape): 16 | """ 17 | Build a proposal generator from `cfg.MODEL.PROPOSAL_GENERATOR.NAME`. 18 | The name can be "PrecomputedProposals" to use no proposal generator. 19 | """ 20 | name = cfg.MODEL.PROPOSAL_GENERATOR.NAME 21 | if name == "PrecomputedProposals": 22 | return None 23 | 24 | return PROPOSAL_GENERATOR_REGISTRY.get(name)(cfg, input_shape) 25 | -------------------------------------------------------------------------------- /cvpods/modeling/proposal_generator/proposal_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | import math 3 | import torch 4 | 5 | from cvpods.structures import Instances 6 | 7 | 8 | def add_ground_truth_to_proposals(gt_boxes, proposals): 9 | """ 10 | Call `add_ground_truth_to_proposals_single_image` for all images. 11 | 12 | Args: 13 | gt_boxes(list[Boxes]): list of N elements. Element i is a Boxes 14 | representing the gound-truth for image i. 15 | proposals (list[Instances]): list of N elements. Element i is a Instances 16 | representing the proposals for image i. 17 | 18 | Returns: 19 | list[Instances]: list of N Instances. Each is the proposals for the image, 20 | with field "proposal_boxes" and "objectness_logits". 21 | """ 22 | assert gt_boxes is not None 23 | 24 | assert len(proposals) == len(gt_boxes) 25 | if len(proposals) == 0: 26 | return proposals 27 | 28 | return [ 29 | add_ground_truth_to_proposals_single_image(gt_boxes_i, proposals_i) 30 | for gt_boxes_i, proposals_i in zip(gt_boxes, proposals) 31 | ] 32 | 33 | 34 | def add_ground_truth_to_proposals_single_image(gt_boxes, proposals): 35 | """ 36 | Augment `proposals` with ground-truth boxes from `gt_boxes`. 37 | 38 | Args: 39 | Same as `add_ground_truth_to_proposals`, but with gt_boxes and proposals 40 | per image. 41 | 42 | Returns: 43 | Same as `add_ground_truth_to_proposals`, but for only one image. 44 | """ 45 | device = proposals.objectness_logits.device 46 | # Concatenating gt_boxes with proposals requires them to have the same fields 47 | # Assign all ground-truth boxes an objectness logit corresponding to P(object) \approx 1. 48 | gt_logit_value = math.log((1.0 - 1e-10) / (1 - (1.0 - 1e-10))) 49 | 50 | gt_logits = gt_logit_value * torch.ones(len(gt_boxes), device=device) 51 | gt_proposal = Instances(proposals.image_size) 52 | 53 | gt_proposal.proposal_boxes = gt_boxes 54 | gt_proposal.objectness_logits = gt_logits 55 | new_proposals = Instances.cat([proposals, gt_proposal]) 56 | 57 | return new_proposals 58 | -------------------------------------------------------------------------------- /cvpods/modeling/roi_heads/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | from .roi_heads import Res5ROIHeads, ROIHeads, StandardROIHeads, select_foreground_proposals 3 | from .rotated_fast_rcnn import RROIHeads 4 | 5 | from . import cascade_rcnn # isort:skip 6 | -------------------------------------------------------------------------------- /cvpods/modeling/sampling.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | import torch 3 | 4 | __all__ = ["subsample_labels"] 5 | 6 | 7 | def subsample_labels(labels, num_samples, positive_fraction, bg_label): 8 | """ 9 | Return `num_samples` (or fewer, if not enough found) 10 | random samples from `labels` which is a mixture of positives & negatives. 11 | It will try to return as many positives as possible without 12 | exceeding `positive_fraction * num_samples`, and then try to 13 | fill the remaining slots with negatives. 14 | 15 | Args: 16 | labels (Tensor): (N, ) label vector with values: 17 | * -1: ignore 18 | * bg_label: background ("negative") class 19 | * otherwise: one or more foreground ("positive") classes 20 | num_samples (int): The total number of labels with value >= 0 to return. 21 | Values that are not sampled will be filled with -1 (ignore). 22 | positive_fraction (float): The number of subsampled labels with values > 0 23 | is `min(num_positives, int(positive_fraction * num_samples))`. The number 24 | of negatives sampled is `min(num_negatives, num_samples - num_positives_sampled)`. 25 | In order words, if there are not enough positives, the sample is filled with 26 | negatives. If there are also not enough negatives, then as many elements are 27 | sampled as is possible. 28 | bg_label (int): label index of background ("negative") class. 29 | 30 | Returns: 31 | pos_idx, neg_idx (Tensor): 32 | 1D vector of indices. The total length of both is `num_samples` or fewer. 33 | """ 34 | positive = torch.nonzero((labels != -1) & (labels != bg_label), as_tuple=False).squeeze(1) 35 | negative = torch.nonzero(labels == bg_label, as_tuple=False).squeeze(1) 36 | 37 | num_pos = int(num_samples * positive_fraction) 38 | # protect against not enough positive examples 39 | num_pos = min(positive.numel(), num_pos) 40 | num_neg = num_samples - num_pos 41 | # protect against not enough negative examples 42 | num_neg = min(negative.numel(), num_neg) 43 | 44 | # randomly select positive and negative examples 45 | perm1 = torch.randperm(positive.numel(), device=positive.device)[:num_pos] 46 | perm2 = torch.randperm(negative.numel(), device=negative.device)[:num_neg] 47 | 48 | pos_idx = positive[perm1] 49 | neg_idx = negative[perm2] 50 | return pos_idx, neg_idx 51 | -------------------------------------------------------------------------------- /cvpods/solver/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | from .build import build_lr_scheduler, build_optimizer 3 | from .optimizer_builder import ( 4 | OPTIMIZER_BUILDER, 5 | AdamBuilder, 6 | AdamWBuilder, 7 | OptimizerBuilder, 8 | SGDBuilder, 9 | SGDGateLRBuilder 10 | ) 11 | from .scheduler_builder import ( 12 | SCHEDULER_BUILDER, 13 | BaseSchedulerBuilder, 14 | LambdaLRBuilder, 15 | OneCycleLRBuilder, 16 | PolyLRBuilder, 17 | WarmupCosineLR, 18 | WarmupCosineLRBuilder, 19 | WarmupMultiStepLR, 20 | WarmupMultiStepLRBuilder 21 | ) 22 | 23 | __all__ = [k for k in globals().keys() if not k.startswith("_")] 24 | -------------------------------------------------------------------------------- /cvpods/structures/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | from .boxes import Boxes, BoxMode, pairwise_iou, pairwise_ioa, pairwise_iou_tensor 3 | from .image_list import ImageList 4 | from .instances import Instances 5 | from .keypoints import Keypoints, heatmaps_to_keypoints 6 | from .masks import BitMasks, PolygonMasks, polygons_to_bitmask, rasterize_polygons_within_box 7 | from .rotated_boxes import RotatedBoxes 8 | from .rotated_boxes import pairwise_iou as pairwise_iou_rotated 9 | 10 | __all__ = [k for k in globals().keys() if not k.startswith("_")] 11 | -------------------------------------------------------------------------------- /cvpods/utils/README.md: -------------------------------------------------------------------------------- 1 | # Utility functions 2 | 3 | This folder contain utility functions that are not used in the 4 | core library, but are useful for building models or training 5 | code using the config system. 6 | -------------------------------------------------------------------------------- /cvpods/utils/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding:utf-8 -*- 3 | 4 | from .registry import Registry 5 | from .benchmark import timeit, benchmark, Timer 6 | from .distributed import comm 7 | from .env import collect_env_info, seed_all_rng, setup_environment, setup_custom_environment 8 | from .imports import dynamic_import 9 | from .file import download, PathHandler, PathManager, get_cache_dir, file_lock, PicklableWrapper 10 | from .memory import retry_if_cuda_oom 11 | from .visualizer import colormap, random_color, VideoVisualizer, ColorMode, VisImage, Visualizer 12 | from .dump import (get_event_storage, EventWriter, JSONWriter, TensorboardXWriter, 13 | CommonMetricPrinter, EventStorage, HistoryBuffer, setup_logger, log_first_n, 14 | log_every_n, log_every_n_seconds, create_small_table, create_table_with_header) 15 | 16 | __all__ = [k for k in globals().keys() if not k.startswith("_")] 17 | -------------------------------------------------------------------------------- /cvpods/utils/benchmark/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding:utf-8 -*- 3 | 4 | from .benchmark import * 5 | from .timer import * 6 | -------------------------------------------------------------------------------- /cvpods/utils/benchmark/timer.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 3 | # -*- coding: utf-8 -*- 4 | 5 | from time import perf_counter 6 | from typing import Optional 7 | 8 | 9 | class Timer: 10 | """ 11 | A timer which computes the time elapsed since the start/reset of the timer. 12 | """ 13 | 14 | def __init__(self): 15 | self.reset() 16 | 17 | def reset(self): 18 | """ 19 | Reset the timer. 20 | """ 21 | self._start = perf_counter() 22 | self._paused: Optional[float] = None 23 | self._total_paused = 0 24 | 25 | def pause(self): 26 | """ 27 | Pause the timer. 28 | """ 29 | if self._paused is not None: 30 | raise ValueError("Trying to pause a Timer that is already paused!") 31 | self._paused = perf_counter() 32 | 33 | def is_paused(self) -> bool: 34 | """ 35 | Returns: 36 | bool: whether the timer is currently paused 37 | """ 38 | return self._paused is not None 39 | 40 | def resume(self): 41 | """ 42 | Resume the timer. 43 | """ 44 | if self._paused is None: 45 | raise ValueError("Trying to resume a Timer that is not paused!") 46 | self._total_paused += perf_counter() - self._paused 47 | self._paused = None 48 | 49 | def seconds(self) -> float: 50 | """ 51 | Returns: 52 | (float): the total number of seconds since the start/reset of the 53 | timer, excluding the time when the timer is paused. 54 | """ 55 | if self._paused is not None: 56 | end_time: float = self._paused # type: ignore 57 | else: 58 | end_time = perf_counter() 59 | return end_time - self._start - self._total_paused 60 | -------------------------------------------------------------------------------- /cvpods/utils/distributed/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding:utf-8 -*- 3 | 4 | from .comm import * 5 | -------------------------------------------------------------------------------- /cvpods/utils/dump/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding:utf-8 -*- 3 | 4 | from .events import * 5 | from .history_buffer import * 6 | from .logger import * 7 | -------------------------------------------------------------------------------- /cvpods/utils/dump/history_buffer.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 3 | 4 | import numpy as np 5 | from typing import List, Tuple 6 | 7 | 8 | class HistoryBuffer: 9 | """ 10 | Track a series of scalar values and provide access to smoothed values over a 11 | window or the global average of the series. 12 | """ 13 | 14 | def __init__(self, max_length: int = 1000000): 15 | """ 16 | Args: 17 | max_length: maximal number of values that can be stored in the 18 | buffer. When the capacity of the buffer is exhausted, old 19 | values will be removed. 20 | """ 21 | self._max_length: int = max_length 22 | self._data: List[Tuple[float, float]] = [] # (value, iteration) pairs 23 | self._count: int = 0 24 | self._global_avg: float = 0 25 | 26 | def update(self, value: float, iteration: float = None): 27 | """ 28 | Add a new scalar value produced at certain iteration. If the length 29 | of the buffer exceeds self._max_length, the oldest element will be 30 | removed from the buffer. 31 | """ 32 | if iteration is None: 33 | iteration = self._count 34 | if len(self._data) == self._max_length: 35 | self._data.pop(0) 36 | self._data.append((value, iteration)) 37 | 38 | self._count += 1 39 | self._global_avg += (value - self._global_avg) / self._count 40 | 41 | def latest(self): 42 | """ 43 | Return the latest scalar value added to the buffer. 44 | """ 45 | return self._data[-1][0] 46 | 47 | def median(self, window_size: int): 48 | """ 49 | Return the median of the latest `window_size` values in the buffer. 50 | """ 51 | return np.median([x[0] for x in self._data[-window_size:]]) 52 | 53 | def avg(self, window_size: int): 54 | """ 55 | Return the mean of the latest `window_size` values in the buffer. 56 | """ 57 | return np.mean([x[0] for x in self._data[-window_size:]]) 58 | 59 | def global_avg(self): 60 | """ 61 | Return the mean of all the elements in the buffer. Note that this 62 | includes those getting removed due to limited buffer storage. 63 | """ 64 | return self._global_avg 65 | 66 | def values(self): 67 | """ 68 | Returns: 69 | list[(number, iteration)]: content of the current buffer. 70 | """ 71 | return self._data 72 | -------------------------------------------------------------------------------- /cvpods/utils/env/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding:utf-8 -*- 3 | 4 | from .collect_env import * 5 | from .env import * 6 | -------------------------------------------------------------------------------- /cvpods/utils/file/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding:utf-8 -*- 3 | 4 | from .download import * 5 | from .file_io import * 6 | from .serialize import * 7 | -------------------------------------------------------------------------------- /cvpods/utils/file/serialize.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | import cloudpickle 3 | 4 | 5 | class PicklableWrapper(object): 6 | """ 7 | Wrap an object to make it more picklable, note that it uses 8 | heavy weight serialization libraries that are slower than pickle. 9 | It's best to use it only on closures (which are usually not picklable). 10 | 11 | This is a simplified version of 12 | https://github.com/joblib/joblib/blob/master/joblib/externals/loky/cloudpickle_wrapper.py 13 | """ 14 | 15 | def __init__(self, obj): 16 | self._obj = obj 17 | 18 | def __reduce__(self): 19 | s = cloudpickle.dumps(self._obj) 20 | return cloudpickle.loads, (s,) 21 | 22 | def __call__(self, *args, **kwargs): 23 | return self._obj(*args, **kwargs) 24 | 25 | def __getattr__(self, attr): 26 | # Ensure that the wrapped object can be used seamlessly as the previous object. 27 | if attr not in ["_obj"]: 28 | return getattr(self._obj, attr) 29 | return getattr(self, attr) 30 | -------------------------------------------------------------------------------- /cvpods/utils/imports.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- encoding: utf-8 -*- 3 | ''' 4 | @File : imports.py 5 | @Time : 2020/05/07 23:59:19 6 | @Author : Benjin Zhu 7 | @Contact : poodarchu@gmail.com 8 | @Last Modified by : Benjin Zhu 9 | @Last Modified time : 2020/05/07 23:59:19 10 | ''' 11 | 12 | import imp 13 | 14 | 15 | def dynamic_import(config_name, config_path): 16 | """ 17 | Dynamic import a project. 18 | 19 | Args: 20 | config_name (str): module name 21 | config_path (str): the dir that contains the .py with this module. 22 | 23 | Examples:: 24 | >>> root = "/data/repos/cvpods_playground/zhubenjin/retinanet/" 25 | >>> project = root + "retinanet.res50.fpn.coco.800size.1x.mrcnn_sigmoid" 26 | >>> cfg = dynamic_import("config", project).config 27 | >>> net = dynamic_import("net", project) 28 | """ 29 | fp, pth, desc = imp.find_module(config_name, [config_path]) 30 | 31 | return imp.load_module(config_name, fp, pth, desc) 32 | -------------------------------------------------------------------------------- /cvpods/utils/metrics/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lxtGH/BSSeg/77a6cf15150cc65cf2f21b124381dc8d4573f21b/cvpods/utils/metrics/__init__.py -------------------------------------------------------------------------------- /cvpods/utils/metrics/accuracy.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | # Copyright (c) BaseDetection, Inc. and its affiliates. 4 | import torch 5 | 6 | 7 | @torch.no_grad() 8 | def accuracy(output, target, topk=(1,)): 9 | """Computes the precision@k for the specified values of k""" 10 | if target.numel() == 0: 11 | return [torch.zeros([], device=output.device)] 12 | maxk = max(topk) 13 | batch_size = target.size(0) 14 | 15 | _, pred = output.topk(maxk, 1, True, True) 16 | pred = pred.t() 17 | correct = pred.eq(target.view(1, -1).expand_as(pred)) 18 | 19 | res = [] 20 | for k in topk: 21 | correct_k = correct[:k].view(-1).float().sum(0) 22 | res.append(correct_k.mul_(100.0 / batch_size)) 23 | return res 24 | -------------------------------------------------------------------------------- /cvpods/utils/registry.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | 3 | from typing import Dict, Optional 4 | 5 | 6 | class Registry(object): 7 | """ 8 | The registry that provides name -> object mapping, to support third-party 9 | users' custom modules. 10 | To create a registry (e.g. a backbone registry): 11 | .. code-block:: python 12 | BACKBONE_REGISTRY = Registry('BACKBONE') 13 | To register an object: 14 | .. code-block:: python 15 | @BACKBONE_REGISTRY.register() 16 | class MyBackbone(): 17 | ... 18 | Or: 19 | .. code-block:: python 20 | BACKBONE_REGISTRY.register(MyBackbone) 21 | """ 22 | 23 | def __init__(self, name: str) -> None: 24 | """ 25 | Args: 26 | name (str): the name of this registry 27 | """ 28 | self._name: str = name 29 | self._obj_map: Dict[str, object] = {} 30 | 31 | def _do_register(self, name: str, obj: object) -> None: 32 | assert ( 33 | name not in self._obj_map 34 | ), "An object named '{}' was already registered in '{}' registry!".format( 35 | name, self._name 36 | ) 37 | self._obj_map[name] = obj 38 | 39 | def register(self, obj: object = None) -> Optional[object]: 40 | """ 41 | Register the given object under the the name `obj.__name__`. 42 | Can be used as either a decorator or not. See docstring of this class for usage. 43 | """ 44 | if obj is None: 45 | # used as a decorator 46 | def deco(func_or_class: object) -> object: 47 | name = func_or_class.__name__ # pyre-ignore 48 | self._do_register(name, func_or_class) 49 | return func_or_class 50 | 51 | return deco 52 | 53 | # used as a function call 54 | name = obj.__name__ # pyre-ignore 55 | self._do_register(name, obj) 56 | 57 | def get(self, name: str) -> object: 58 | ret = self._obj_map.get(name) 59 | if ret is None: 60 | raise KeyError( 61 | "No object named '{}' found in '{}' registry!".format(name, self._name) 62 | ) 63 | return ret 64 | 65 | def __contains__(self, name: str) -> bool: 66 | return name in self._obj_map 67 | -------------------------------------------------------------------------------- /cvpods/utils/visualizer/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding:utf-8 -*- 3 | 4 | from .colormap import * 5 | from .video_visualizer import * 6 | from .visualizer import * 7 | -------------------------------------------------------------------------------- /cvpods/utils/visualizer/show.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- encoding: utf-8 -*- 3 | ''' 4 | @File : show.py 5 | @Time : 2020/05/07 23:58:35 6 | @Author : Benjin Zhu 7 | @Contact : poodarchu@gmail.com 8 | @Last Modified by : Benjin Zhu 9 | @Last Modified time : 2020/05/07 23:58:35 10 | ''' 11 | 12 | import copy 13 | 14 | import numpy as np 15 | import pylab as plt 16 | 17 | 18 | def draw_box(ax, vertices, color='black'): 19 | """ 20 | Draw box with color. 21 | 22 | Args: 23 | ax (list): axes to draw box along 24 | vertices (ndarray): indices of shape (N x 2) 25 | color (str): plotted color 26 | """ 27 | connections = [ 28 | [0, 1], 29 | [1, 2], 30 | [2, 3], 31 | [3, 0], 32 | ] 33 | for connection in connections: 34 | ax.plot(*vertices[:, connection], c=color, lw=5) 35 | 36 | 37 | def visualize_feature_maps( 38 | fm, 39 | boxes=[], 40 | keypoints=[], 41 | stride=1, 42 | save_filename=None 43 | ): 44 | """ 45 | Visualize feature map with boxes or key points. 46 | 47 | Args: 48 | fm (torch.Tensor): feature map of shape H x W x c, c is channel 49 | boxes (ndarray): boxes to be visualized. 50 | keypoints (ndarray): key points to be visualized 51 | stride (int): used to normalize boxes or keypoints 52 | save_filename (bool): whether save to disk 53 | """ 54 | nc = np.ceil(np.sqrt(fm.shape[2])) # column 55 | nr = np.ceil(fm.shape[2] / nc) # row 56 | nc = int(nc) 57 | nr = int(nr) 58 | plt.figure(figsize=(64, 64)) 59 | for i in range(fm.shape[2]): 60 | ax = plt.subplot(nr, nc, i + 1) 61 | ax.imshow(fm[:, :, i], cmap='jet') 62 | 63 | for obj in boxes: 64 | box = copy.deepcopy(obj) / stride 65 | draw_box(ax, box, color='g') 66 | 67 | for pts_score in keypoints: 68 | pts = pts_score[:8] 69 | pts = pts / stride 70 | for i in range(4): 71 | ax.plot(pts[2 * i + 1], pts[2 * i + 0], 'r*') 72 | ax.plot([pts[1], pts[3]], [pts[0], pts[2]], c='y', lw=5) 73 | ax.plot([pts[3], pts[5]], [pts[2], pts[4]], c='g', lw=5) 74 | ax.plot([pts[5], pts[7]], [pts[4], pts[6]], c='b', lw=5) 75 | ax.plot([pts[7], pts[1]], [pts[6], pts[0]], c='r', lw=5) 76 | 77 | # plt.colorbar() 78 | ax.axis('off') 79 | if save_filename: 80 | plt.savefig(save_filename) 81 | else: 82 | plt.show() 83 | plt.close() 84 | -------------------------------------------------------------------------------- /datasets/README.md: -------------------------------------------------------------------------------- 1 | ## BlendMask instance detection 2 | 3 | ``` 4 | coco/ 5 | thing_train2017/ 6 | # thing class label maps for auxiliary semantic loss 7 | lvis/ 8 | thing_train/ 9 | # semantic labels for LVIS 10 | ``` 11 | 12 | Run `python prepare_thing_sem_from_instance.py`, to extract semantic labels from instance annotations. 13 | 14 | Run `python prepare_thing_sem_from_lvis.py`, to extract semantic labels from LVIS annotations. 15 | 16 | ## Text Recognition 17 | 18 | - Totaltext training, testing images, and annotations [[link]](https://universityofadelaide.box.com/shared/static/3eq5ti7z45qfq5gu96gg5t1xwh1yrrt7.zip) [[paper]](https://ieeexplore.ieee.org/abstract/document/8270088/) [[code]](https://github.com/cs-chan/Total-Text-Dataset). 19 | - CTW1500 training, testing images, and annotations [[link]](https://universityofadelaide.box.com/s/yb9red8pi9eszuzqompo593b6zhz87qw) [[paper]](https://www.sciencedirect.com/science/article/pii/S0031320319300664) [[code]](https://github.com/Yuliang-Liu/Curve-Text-Detector). 20 | - MLT [[dataset]](https://universityofadelaide.box.com/s/tsiimvp65tkf7dw1nuh8l71cjcs0fyif) [[paper]](https://ieeexplore.ieee.org/abstract/document/8270168). 21 | - Syntext-150k: 22 | - Part1: 94,723 [[dataset]](https://universityofadelaide.box.com/s/alta996w4fym6arh977h3k3xv55clhg3) 23 | - Part2: 54,327 [[dataset]](https://universityofadelaide.box.com/s/7k7d6nvf951s4i01szs4udpu2yv5dlqe) 24 | 25 | ``` 26 | text/ 27 | totaltext/ 28 | annotations/ 29 | train_images/ 30 | test_images/ 31 | mlt2017/ 32 | annotations/train.json 33 | images/ 34 | ... 35 | syntext1/ 36 | syntext2/ 37 | ... 38 | evaluation/ 39 | gt_ctw1500.zip 40 | gt_totaltext.zip 41 | ``` 42 | 43 | To evaluate on Total Text and CTW1500, first download the zipped annotations with 44 | 45 | ``` 46 | mkdir evaluation 47 | cd evaluation 48 | wget -O gt_ctw1500.zip https://cloudstor.aarnet.edu.au/plus/s/uoeFl0pCN9BOCN5/download 49 | wget -O gt_totaltext.zip https://cloudstor.aarnet.edu.au/plus/s/pEMs0KjCocL2nvV/download 50 | ``` 51 | 52 | ## Person In Context instance detection 53 | 54 | ``` 55 | pic/ 56 | thing_train/ 57 | # thing class label maps for auxiliary semantic loss 58 | annotations/ 59 | train_person.json 60 | val_person.json 61 | image/ 62 | train/ 63 | ... 64 | 65 | ``` 66 | 67 | First link the PIC_2.0 dataset to this folder with `ln -s \path\to\PIC_2.0 pic`. Then use the `python gen_coco_person.py` to generate train and validation annotation jsons. 68 | 69 | Run `python prepare_thing_sem_from_instance.py --dataset-name pic`, to extract semantic labels from instance annotations. 70 | -------------------------------------------------------------------------------- /datasets/components/coco_2017_train_class_agnosticTrue_whitenTrue_sigmoidTrue_60_siz28.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lxtGH/BSSeg/77a6cf15150cc65cf2f21b124381dc8d4573f21b/datasets/components/coco_2017_train_class_agnosticTrue_whitenTrue_sigmoidTrue_60_siz28.npz -------------------------------------------------------------------------------- /docs/bugs.md: -------------------------------------------------------------------------------- 1 | ## Potential Bugs 2 | 3 | 4 | 1, The default Pixel Means and Variance is RGB input by changing FAIR pretrained models(R-50.pkl, R-101.pkl) 5 | Caffe pretrained model(BGR, ) vs Pytorch pretrained model(RGB) -------------------------------------------------------------------------------- /docs/datasets.md: -------------------------------------------------------------------------------- 1 | # Some Dataset Annotations 2 | 3 | ## YouTube VIS 4 | Train: 2238 videos(one clip in each floder) 5 | 6 | Validation: 302 videos(submit to the server) 7 | 8 | Test: 343 videos(not useful, designed for competition) 9 | 10 | 11 | ## OVIS 12 | 13 | Train: 607 videos(one clip in each floder) 14 | 15 | Validation: 140 videos(submit to the server) 16 | 17 | Test: 343 videos(not useful, designed for competition) 18 | 19 | 20 | 21 | ## MOTS Challenge 22 | 23 | 24 | 25 | ## MOTS KITTI 26 | 27 | -------------------------------------------------------------------------------- /docs/notes.md: -------------------------------------------------------------------------------- 1 | 2 | 1, YouTube-VIS dataset: 3 | should upload the result.json to server to evaluate the results. 4 | 5 | 6 | 2, Cityscape Panoptic dataset 7 | 8 | 9 | 3, Cityscape Video Panoptic dataset -------------------------------------------------------------------------------- /docs/overview.md: -------------------------------------------------------------------------------- 1 | 2 | ## OverView 3 | cvpods is based Detectron2 but with more flexible design for research purpose. 4 | 5 | Currently, this codebase is used for object detection and instance segmentation research. -------------------------------------------------------------------------------- /docs/results.md: -------------------------------------------------------------------------------- 1 | ## Benchmark of CVPODs 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /docs/tricks.md: -------------------------------------------------------------------------------- 1 | 1, -------------------------------------------------------------------------------- /fig/test.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lxtGH/BSSeg/77a6cf15150cc65cf2f21b124381dc8d4573f21b/fig/test.jpg -------------------------------------------------------------------------------- /playground/detection/cityscapes/ceseg/boundary_refine_mask_rcnn_r101_ms_3x_3_subgt_warpping_dice_erode_dilate_gn/config.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | 3 | from cvpods.configs.rcnn_fpn_config import RCNNFPNConfig 4 | 5 | _config_dict = dict( 6 | MODEL=dict( 7 | WEIGHTS="/mnt/lustre/share_data/chengguangliang/hehao/CEInst_checkpoint/r101_3x.pth", 8 | MASK_ON=True, 9 | RESNETS=dict(DEPTH=101), 10 | ROI_HEADS=dict(NUM_CLASSES=8), 11 | BOUNDARY_MASK_HEAD=dict( 12 | OUTPUT_RATIO=1, 13 | POOLER_RESOLUTION=28, 14 | IN_FEATURES=["p2"], 15 | NUM_CONV=2), 16 | ROI_MASK_HEAD=dict( 17 | CEMODULE=dict( 18 | NUM_CONV=2, 19 | PLANES=256, 20 | DCN_ON=True, 21 | DCN_V2=True, 22 | NUM_EDGE_CONV=2, 23 | FUSE_MODE="Add", 24 | WITH_EDGE_REFINE=True, 25 | NORM='GN', 26 | KERNEL_SIZE=5 27 | ), 28 | LOSS_WEIGHT=[1.0, 1.0, 1.0, 1.0] 29 | ), 30 | ), 31 | DATASETS=dict( 32 | TRAIN=("cityscapes_fine_instance_seg_train",), 33 | TEST=("cityscapes_fine_instance_seg_val",), 34 | ), 35 | SOLVER=dict( 36 | IMS_PER_BATCH=8, 37 | IMS_PER_DEVICE=1, 38 | LR_SCHEDULER=dict( 39 | STEPS=(18000,), 40 | MAX_ITER=24000, 41 | ), 42 | OPTIMIZER=dict( 43 | BASE_LR=0.01, 44 | ), 45 | CHECKPOINT_PERIOD=8000, 46 | ), 47 | INPUT=dict( 48 | AUG=dict( 49 | TRAIN_PIPELINES=[ 50 | ("ResizeShortestEdge", dict( 51 | short_edge_length=(800, 832, 864, 896, 928, 960, 992, 1024), 52 | max_size=2048, sample_style="choice")), 53 | ("RandomFlip", dict()), 54 | ], 55 | TEST_PIPELINES=[ 56 | ("ResizeShortestEdge", dict( 57 | short_edge_length=1024, max_size=2048, sample_style="choice")), 58 | ], 59 | ), 60 | ), 61 | TEST=dict( 62 | EVAL_PEROID=10000, 63 | ), 64 | OUTPUT_DIR="output" 65 | ) 66 | 67 | 68 | class FasterRCNNConfig(RCNNFPNConfig): 69 | def __init__(self): 70 | super(FasterRCNNConfig, self).__init__() 71 | self._register_configuration(_config_dict) 72 | 73 | 74 | config = FasterRCNNConfig() 75 | -------------------------------------------------------------------------------- /playground/detection/cityscapes/ceseg/boundary_refine_mask_rcnn_r101_ms_3x_3_subgt_warpping_dice_erode_dilate_gn/net.py: -------------------------------------------------------------------------------- 1 | from cvpods.layers import ShapeSpec 2 | from cvpods.modeling.backbone import Backbone 3 | from cvpods.modeling.backbone.fpn import build_resnet_fpn_backbone 4 | from cvpods.modeling.proposal_generator import RPN 5 | from box_head import FastRCNNConvFCHead 6 | from boundary_mask_rcnn import BoundaryROIHeads, DecoupledBoundaryMaskHead 7 | from rcnn import GeneralizedRCNN 8 | 9 | 10 | def build_backbone(cfg, input_shape=None): 11 | if input_shape is None: 12 | input_shape = ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN)) 13 | backbone = build_resnet_fpn_backbone(cfg, input_shape) 14 | assert isinstance(backbone, Backbone) 15 | return backbone 16 | 17 | 18 | def build_proposal_generator(cfg, input_shape): 19 | return RPN(cfg, input_shape) 20 | 21 | 22 | def build_roi_heads(cfg, input_shape): 23 | return BoundaryROIHeads(cfg, input_shape) 24 | 25 | 26 | def build_box_head(cfg, input_shape): 27 | return FastRCNNConvFCHead(cfg, input_shape) 28 | 29 | 30 | def build_mask_head(cfg, input_shape): 31 | return DecoupledBoundaryMaskHead(cfg, input_shape) 32 | 33 | 34 | def build_model(cfg): 35 | cfg.build_backbone = build_backbone 36 | cfg.build_proposal_generator = build_proposal_generator 37 | cfg.build_roi_heads = build_roi_heads 38 | cfg.build_box_head = build_box_head 39 | cfg.build_mask_head = build_mask_head 40 | 41 | model = GeneralizedRCNN(cfg) 42 | return model 43 | -------------------------------------------------------------------------------- /playground/detection/cityscapes/ceseg/boundary_refine_mask_rcnn_r50_ms_3x_3_subgt_warpping_dice_erode_dilate_gn/config.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | 3 | from cvpods.configs.rcnn_fpn_config import RCNNFPNConfig 4 | 5 | _config_dict = dict( 6 | MODEL=dict( 7 | WEIGHTS="/mnt/lustre/share_data/chengguangliang/hehao/CEInst_checkpoint/r50_3x.pth", 8 | MASK_ON=True, 9 | RESNETS=dict(DEPTH=50), 10 | ROI_HEADS=dict(NUM_CLASSES=8), 11 | BOUNDARY_MASK_HEAD=dict( 12 | OUTPUT_RATIO=1, 13 | POOLER_RESOLUTION=28, 14 | IN_FEATURES=["p2"], 15 | NUM_CONV=2), 16 | ROI_MASK_HEAD=dict( 17 | CEMODULE=dict( 18 | NUM_CONV=2, 19 | PLANES=256, 20 | DCN_ON=True, 21 | DCN_V2=True, 22 | NUM_EDGE_CONV=2, 23 | FUSE_MODE="Add", 24 | WITH_EDGE_REFINE=True, 25 | NORM='GN', 26 | KERNEL_SIZE=5 27 | ), 28 | LOSS_WEIGHT=[1.0, 1.0, 1.0, 1.0] 29 | ), 30 | ), 31 | DATASETS=dict( 32 | TRAIN=("cityscapes_fine_instance_seg_train",), 33 | TEST=("cityscapes_fine_instance_seg_val",), 34 | ), 35 | SOLVER=dict( 36 | IMS_PER_BATCH=8, 37 | IMS_PER_DEVICE=1, 38 | LR_SCHEDULER=dict( 39 | STEPS=(18000,), 40 | MAX_ITER=24000, 41 | ), 42 | OPTIMIZER=dict( 43 | BASE_LR=0.01, 44 | ), 45 | CHECKPOINT_PERIOD=8000, 46 | ), 47 | INPUT=dict( 48 | AUG=dict( 49 | TRAIN_PIPELINES=[ 50 | ("ResizeShortestEdge", dict( 51 | short_edge_length=(800, 832, 864, 896, 928, 960, 992, 1024), 52 | max_size=2048, sample_style="choice")), 53 | ("RandomFlip", dict()), 54 | ], 55 | TEST_PIPELINES=[ 56 | ("ResizeShortestEdge", dict( 57 | short_edge_length=1024, max_size=2048, sample_style="choice")), 58 | ], 59 | ), 60 | ), 61 | TEST=dict( 62 | EVAL_PEROID=10000, 63 | ), 64 | OUTPUT_DIR="output" 65 | ) 66 | 67 | 68 | class FasterRCNNConfig(RCNNFPNConfig): 69 | def __init__(self): 70 | super(FasterRCNNConfig, self).__init__() 71 | self._register_configuration(_config_dict) 72 | 73 | 74 | config = FasterRCNNConfig() 75 | -------------------------------------------------------------------------------- /playground/detection/cityscapes/ceseg/boundary_refine_mask_rcnn_r50_ms_3x_3_subgt_warpping_dice_erode_dilate_gn/net.py: -------------------------------------------------------------------------------- 1 | from cvpods.layers import ShapeSpec 2 | from cvpods.modeling.backbone import Backbone 3 | from cvpods.modeling.backbone.fpn import build_resnet_fpn_backbone 4 | from cvpods.modeling.proposal_generator import RPN 5 | from box_head import FastRCNNConvFCHead 6 | from boundary_mask_rcnn import BoundaryROIHeads, DecoupledBoundaryMaskHead 7 | from rcnn import GeneralizedRCNN 8 | 9 | 10 | def build_backbone(cfg, input_shape=None): 11 | if input_shape is None: 12 | input_shape = ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN)) 13 | backbone = build_resnet_fpn_backbone(cfg, input_shape) 14 | assert isinstance(backbone, Backbone) 15 | return backbone 16 | 17 | 18 | def build_proposal_generator(cfg, input_shape): 19 | return RPN(cfg, input_shape) 20 | 21 | 22 | def build_roi_heads(cfg, input_shape): 23 | return BoundaryROIHeads(cfg, input_shape) 24 | 25 | 26 | def build_box_head(cfg, input_shape): 27 | return FastRCNNConvFCHead(cfg, input_shape) 28 | 29 | 30 | def build_mask_head(cfg, input_shape): 31 | return DecoupledBoundaryMaskHead(cfg, input_shape) 32 | 33 | 34 | def build_model(cfg): 35 | cfg.build_backbone = build_backbone 36 | cfg.build_proposal_generator = build_proposal_generator 37 | cfg.build_roi_heads = build_roi_heads 38 | cfg.build_box_head = build_box_head 39 | cfg.build_mask_head = build_mask_head 40 | 41 | model = GeneralizedRCNN(cfg) 42 | return model 43 | -------------------------------------------------------------------------------- /playground/detection/cityscapes/ceseg/boundary_refine_mask_rcnn_rx101_ms_3x_3_subgt_warpping_dice_erode_dilate_gn/config.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | 3 | from cvpods.configs.rcnn_fpn_config import RCNNFPNConfig 4 | 5 | _config_dict = dict( 6 | MODEL=dict( 7 | WEIGHTS="/mnt/lustre/share_data/chengguangliang/hehao/CEInst_checkpoint/rx101_3x.pth", 8 | MASK_ON=True, 9 | RESNETS=dict( 10 | DEPTH=101, 11 | NUM_GROUPS=64, 12 | WIDTH_PER_GROUP=4, 13 | STRIDE_IN_1X1=False), 14 | ROI_HEADS=dict(NUM_CLASSES=8), 15 | BOUNDARY_MASK_HEAD=dict( 16 | OUTPUT_RATIO=1, 17 | POOLER_RESOLUTION=28, 18 | IN_FEATURES=["p2"], 19 | NUM_CONV=2), 20 | ROI_MASK_HEAD=dict( 21 | CEMODULE=dict( 22 | NUM_CONV=2, 23 | PLANES=256, 24 | DCN_ON=True, 25 | DCN_V2=True, 26 | NUM_EDGE_CONV=2, 27 | FUSE_MODE="Add", 28 | WITH_EDGE_REFINE=True, 29 | NORM='GN', 30 | KERNEL_SIZE=5 31 | ), 32 | LOSS_WEIGHT=[1.0, 1.0, 1.0, 1.0] 33 | ), 34 | ), 35 | DATASETS=dict( 36 | TRAIN=("cityscapes_fine_instance_seg_train",), 37 | TEST=("cityscapes_fine_instance_seg_val",), 38 | ), 39 | SOLVER=dict( 40 | IMS_PER_BATCH=8, 41 | IMS_PER_DEVICE=1, 42 | LR_SCHEDULER=dict( 43 | STEPS=(18000,), 44 | MAX_ITER=24000, 45 | ), 46 | OPTIMIZER=dict( 47 | BASE_LR=0.01, 48 | ), 49 | CHECKPOINT_PERIOD=8000, 50 | ), 51 | INPUT=dict( 52 | AUG=dict( 53 | TRAIN_PIPELINES=[ 54 | ("ResizeShortestEdge", dict( 55 | short_edge_length=(800, 832, 864, 896, 928, 960, 992, 1024), 56 | max_size=2048, sample_style="choice")), 57 | ("RandomFlip", dict()), 58 | ], 59 | TEST_PIPELINES=[ 60 | ("ResizeShortestEdge", dict( 61 | short_edge_length=1024, max_size=2048, sample_style="choice")), 62 | ], 63 | ), 64 | ), 65 | TEST=dict( 66 | EVAL_PEROID=10000, 67 | ), 68 | OUTPUT_DIR="output" 69 | ) 70 | 71 | 72 | class FasterRCNNConfig(RCNNFPNConfig): 73 | def __init__(self): 74 | super(FasterRCNNConfig, self).__init__() 75 | self._register_configuration(_config_dict) 76 | 77 | 78 | config = FasterRCNNConfig() 79 | -------------------------------------------------------------------------------- /playground/detection/cityscapes/ceseg/boundary_refine_mask_rcnn_rx101_ms_3x_3_subgt_warpping_dice_erode_dilate_gn/net.py: -------------------------------------------------------------------------------- 1 | from cvpods.layers import ShapeSpec 2 | from cvpods.modeling.backbone import Backbone 3 | from cvpods.modeling.backbone.fpn import build_resnet_fpn_backbone 4 | from cvpods.modeling.proposal_generator import RPN 5 | from box_head import FastRCNNConvFCHead 6 | from boundary_mask_rcnn import BoundaryROIHeads, DecoupledBoundaryMaskHead 7 | from rcnn import GeneralizedRCNN 8 | 9 | 10 | def build_backbone(cfg, input_shape=None): 11 | if input_shape is None: 12 | input_shape = ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN)) 13 | backbone = build_resnet_fpn_backbone(cfg, input_shape) 14 | assert isinstance(backbone, Backbone) 15 | return backbone 16 | 17 | 18 | def build_proposal_generator(cfg, input_shape): 19 | return RPN(cfg, input_shape) 20 | 21 | 22 | def build_roi_heads(cfg, input_shape): 23 | return BoundaryROIHeads(cfg, input_shape) 24 | 25 | 26 | def build_box_head(cfg, input_shape): 27 | return FastRCNNConvFCHead(cfg, input_shape) 28 | 29 | 30 | def build_mask_head(cfg, input_shape): 31 | return DecoupledBoundaryMaskHead(cfg, input_shape) 32 | 33 | 34 | def build_model(cfg): 35 | cfg.build_backbone = build_backbone 36 | cfg.build_proposal_generator = build_proposal_generator 37 | cfg.build_roi_heads = build_roi_heads 38 | cfg.build_box_head = build_box_head 39 | cfg.build_mask_head = build_mask_head 40 | 41 | model = GeneralizedRCNN(cfg) 42 | return model 43 | -------------------------------------------------------------------------------- /playground/detection/cityscapes/pointrend/point_rend_mask_rcnn_R101X/config.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | 3 | from cvpods.configs.pointrend_config import PointRendRCNNFPNConfig 4 | 5 | _config_dict = dict( 6 | MODEL=dict( 7 | WEIGHTS="/mnt/lustreold/share_data/lixiangtai/pretrained/X-101-64x4d.pkl", 8 | MASK_ON=True, 9 | RESNETS=dict( 10 | DEPTH=101, 11 | NUM_GROUPS=64, 12 | WIDTH_PER_GROUP=4, 13 | STRIDE_IN_1X1=False), 14 | ROI_HEADS=dict(NUM_CLASSES=8), 15 | POINT_HEAD=dict( 16 | NUM_CLASSES=8, 17 | ), 18 | ), 19 | DATASETS=dict( 20 | TRAIN=("cityscapes_fine_instance_seg_train",), 21 | TEST=("cityscapes_fine_instance_seg_test",), 22 | ), 23 | SOLVER=dict( 24 | IMS_PER_BATCH=8, 25 | IMS_PER_DEVICE=1, 26 | LR_SCHEDULER=dict( 27 | STEPS=(18000,), 28 | MAX_ITER=24000, 29 | ), 30 | OPTIMIZER=dict( 31 | BASE_LR=0.01, 32 | ), 33 | CHECKPOINT_PERIOD=8000, 34 | ), 35 | INPUT=dict( 36 | AUG=dict( 37 | TRAIN_PIPELINES=[ 38 | ("ResizeShortestEdge", dict( 39 | short_edge_length=(800, 832, 864, 896, 928, 960, 992, 1024), 40 | max_size=2048, sample_style="choice")), 41 | ("RandomFlip", dict()), 42 | ], 43 | TEST_PIPELINES=[ 44 | ("ResizeShortestEdge", dict( 45 | short_edge_length=1024, max_size=2048, sample_style="choice")), 46 | ], 47 | ), 48 | ), 49 | TEST=dict( 50 | EVAL_PEROID=10000, 51 | ), 52 | OUTPUT_DIR="output" 53 | ) 54 | 55 | 56 | class PointRendRCNNConfig(PointRendRCNNFPNConfig): 57 | def __init__(self): 58 | super(PointRendRCNNConfig, self).__init__() 59 | self._register_configuration(_config_dict) 60 | 61 | 62 | config = PointRendRCNNConfig() 63 | -------------------------------------------------------------------------------- /playground/detection/cityscapes/pointrend/point_rend_mask_rcnn_R101X/net.py: -------------------------------------------------------------------------------- 1 | from cvpods.layers import ShapeSpec 2 | from cvpods.modeling.backbone import Backbone 3 | from cvpods.modeling.backbone.fpn import build_resnet_fpn_backbone 4 | from cvpods.modeling.proposal_generator import RPN 5 | from cvpods.modeling.roi_heads.box_head import FastRCNNConvFCHead 6 | from cvpods.modeling.meta_arch.rcnn import GeneralizedRCNN 7 | from cvpods.modeling.meta_arch.pointrend import PointRendROIHeads, CoarseMaskHead, StandardPointHead 8 | 9 | 10 | def build_backbone(cfg, input_shape=None): 11 | if input_shape is None: 12 | input_shape = ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN)) 13 | backbone = build_resnet_fpn_backbone(cfg, input_shape) 14 | assert isinstance(backbone, Backbone) 15 | return backbone 16 | 17 | 18 | def build_proposal_generator(cfg, input_shape): 19 | return RPN(cfg, input_shape) 20 | 21 | 22 | def build_roi_heads(cfg, input_shape): 23 | return PointRendROIHeads(cfg, input_shape) 24 | 25 | 26 | def build_box_head(cfg, input_shape): 27 | return FastRCNNConvFCHead(cfg, input_shape) 28 | 29 | 30 | def build_point_head(cfg, input_shape): 31 | return StandardPointHead(cfg, input_shape) 32 | 33 | 34 | def build_mask_head(cfg, input_shape): 35 | return CoarseMaskHead(cfg, input_shape) 36 | 37 | 38 | def build_model(cfg): 39 | cfg.build_backbone = build_backbone 40 | cfg.build_proposal_generator = build_proposal_generator 41 | cfg.build_roi_heads = build_roi_heads 42 | cfg.build_box_head = build_box_head 43 | cfg.build_mask_head = build_mask_head 44 | cfg.build_point_head = build_point_head 45 | 46 | model = GeneralizedRCNN(cfg) 47 | 48 | return model 49 | -------------------------------------------------------------------------------- /playground/detection/cityscapes/pointrend/point_rend_mask_rcnn_r101/config.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | 3 | from cvpods.configs.pointrend_config import PointRendRCNNFPNConfig 4 | 5 | _config_dict = dict( 6 | MODEL=dict( 7 | WEIGHTS="/mnt/lustreold/share_data/lixiangtai/pretrained/R-101.pkl", 8 | MASK_ON=True, 9 | RESNETS=dict(DEPTH=101), 10 | ROI_HEADS=dict(NUM_CLASSES=8), 11 | POINT_HEAD=dict( 12 | NUM_CLASSES=8, 13 | ), 14 | ), 15 | DATASETS=dict( 16 | TRAIN=("cityscapes_fine_instance_seg_train",), 17 | TEST=("cityscapes_fine_instance_seg_test",), 18 | ), 19 | SOLVER=dict( 20 | IMS_PER_BATCH=8, 21 | IMS_PER_DEVICE=1, 22 | LR_SCHEDULER=dict( 23 | STEPS=(18000,), 24 | MAX_ITER=24000, 25 | ), 26 | OPTIMIZER=dict( 27 | BASE_LR=0.01, 28 | ), 29 | CHECKPOINT_PERIOD=8000, 30 | ), 31 | INPUT=dict( 32 | AUG=dict( 33 | TRAIN_PIPELINES=[ 34 | ("ResizeShortestEdge", dict( 35 | short_edge_length=(800, 832, 864, 896, 928, 960, 992, 1024), 36 | max_size=2048, sample_style="choice")), 37 | ("RandomFlip", dict()), 38 | ], 39 | TEST_PIPELINES=[ 40 | ("ResizeShortestEdge", dict( 41 | short_edge_length=1024, max_size=2048, sample_style="choice")), 42 | ], 43 | ), 44 | ), 45 | TEST=dict( 46 | EVAL_PEROID=10000, 47 | ), 48 | OUTPUT_DIR="output" 49 | ) 50 | 51 | 52 | class PointRendRCNNConfig(PointRendRCNNFPNConfig): 53 | def __init__(self): 54 | super(PointRendRCNNConfig, self).__init__() 55 | self._register_configuration(_config_dict) 56 | 57 | 58 | config = PointRendRCNNConfig() 59 | -------------------------------------------------------------------------------- /playground/detection/cityscapes/pointrend/point_rend_mask_rcnn_r101/net.py: -------------------------------------------------------------------------------- 1 | from cvpods.layers import ShapeSpec 2 | from cvpods.modeling.backbone import Backbone 3 | from cvpods.modeling.backbone.fpn import build_resnet_fpn_backbone 4 | from cvpods.modeling.proposal_generator import RPN 5 | from cvpods.modeling.roi_heads.box_head import FastRCNNConvFCHead 6 | from cvpods.modeling.meta_arch.rcnn import GeneralizedRCNN 7 | from cvpods.modeling.meta_arch.pointrend import PointRendROIHeads, CoarseMaskHead, StandardPointHead 8 | 9 | 10 | def build_backbone(cfg, input_shape=None): 11 | if input_shape is None: 12 | input_shape = ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN)) 13 | backbone = build_resnet_fpn_backbone(cfg, input_shape) 14 | assert isinstance(backbone, Backbone) 15 | return backbone 16 | 17 | 18 | def build_proposal_generator(cfg, input_shape): 19 | return RPN(cfg, input_shape) 20 | 21 | 22 | def build_roi_heads(cfg, input_shape): 23 | return PointRendROIHeads(cfg, input_shape) 24 | 25 | 26 | def build_box_head(cfg, input_shape): 27 | return FastRCNNConvFCHead(cfg, input_shape) 28 | 29 | 30 | def build_point_head(cfg, input_shape): 31 | return StandardPointHead(cfg, input_shape) 32 | 33 | 34 | def build_mask_head(cfg, input_shape): 35 | return CoarseMaskHead(cfg, input_shape) 36 | 37 | 38 | def build_model(cfg): 39 | cfg.build_backbone = build_backbone 40 | cfg.build_proposal_generator = build_proposal_generator 41 | cfg.build_roi_heads = build_roi_heads 42 | cfg.build_box_head = build_box_head 43 | cfg.build_mask_head = build_mask_head 44 | cfg.build_point_head = build_point_head 45 | 46 | model = GeneralizedRCNN(cfg) 47 | 48 | return model 49 | -------------------------------------------------------------------------------- /playground/detection/cityscapes/pointrend/point_rend_mask_rcnn_r50/config.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | 3 | from cvpods.configs.pointrend_config import PointRendRCNNFPNConfig 4 | 5 | _config_dict = dict( 6 | MODEL=dict( 7 | WEIGHTS="/mnt/lustreold/share_data/lixiangtai/pretrained/R-50.pkl", 8 | MASK_ON=True, 9 | RESNETS=dict(DEPTH=50), 10 | ROI_HEADS=dict(NUM_CLASSES=8), 11 | POINT_HEAD=dict( 12 | NUM_CLASSES=8, 13 | ), 14 | ), 15 | DATASETS=dict( 16 | TRAIN=("cityscapes_fine_instance_seg_train",), 17 | TEST=("cityscapes_fine_instance_seg_test",), 18 | ), 19 | SOLVER=dict( 20 | IMS_PER_BATCH=8, 21 | IMS_PER_DEVICE=1, 22 | LR_SCHEDULER=dict( 23 | STEPS=(18000,), 24 | MAX_ITER=24000, 25 | ), 26 | OPTIMIZER=dict( 27 | BASE_LR=0.01, 28 | ), 29 | CHECKPOINT_PERIOD=8000, 30 | ), 31 | INPUT=dict( 32 | AUG=dict( 33 | TRAIN_PIPELINES=[ 34 | ("ResizeShortestEdge", dict( 35 | short_edge_length=(800, 832, 864, 896, 928, 960, 992, 1024), 36 | max_size=2048, sample_style="choice")), 37 | ("RandomFlip", dict()), 38 | ], 39 | TEST_PIPELINES=[ 40 | ("ResizeShortestEdge", dict( 41 | short_edge_length=1024, max_size=2048, sample_style="choice")), 42 | ], 43 | ), 44 | ), 45 | TEST=dict( 46 | EVAL_PEROID=10000, 47 | ), 48 | OUTPUT_DIR="output" 49 | ) 50 | 51 | 52 | class PointRendRCNNConfig(PointRendRCNNFPNConfig): 53 | def __init__(self): 54 | super(PointRendRCNNConfig, self).__init__() 55 | self._register_configuration(_config_dict) 56 | 57 | 58 | config = PointRendRCNNConfig() 59 | -------------------------------------------------------------------------------- /playground/detection/cityscapes/pointrend/point_rend_mask_rcnn_r50/net.py: -------------------------------------------------------------------------------- 1 | from cvpods.layers import ShapeSpec 2 | from cvpods.modeling.backbone import Backbone 3 | from cvpods.modeling.backbone.fpn import build_resnet_fpn_backbone 4 | from cvpods.modeling.proposal_generator import RPN 5 | from cvpods.modeling.roi_heads.box_head import FastRCNNConvFCHead 6 | from cvpods.modeling.meta_arch.rcnn import GeneralizedRCNN 7 | from cvpods.modeling.meta_arch.pointrend import PointRendROIHeads, CoarseMaskHead, StandardPointHead 8 | 9 | 10 | def build_backbone(cfg, input_shape=None): 11 | if input_shape is None: 12 | input_shape = ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN)) 13 | backbone = build_resnet_fpn_backbone(cfg, input_shape) 14 | assert isinstance(backbone, Backbone) 15 | return backbone 16 | 17 | 18 | def build_proposal_generator(cfg, input_shape): 19 | return RPN(cfg, input_shape) 20 | 21 | 22 | def build_roi_heads(cfg, input_shape): 23 | return PointRendROIHeads(cfg, input_shape) 24 | 25 | 26 | def build_box_head(cfg, input_shape): 27 | return FastRCNNConvFCHead(cfg, input_shape) 28 | 29 | 30 | def build_point_head(cfg, input_shape): 31 | return StandardPointHead(cfg, input_shape) 32 | 33 | 34 | def build_mask_head(cfg, input_shape): 35 | return CoarseMaskHead(cfg, input_shape) 36 | 37 | 38 | def build_model(cfg): 39 | cfg.build_backbone = build_backbone 40 | cfg.build_proposal_generator = build_proposal_generator 41 | cfg.build_roi_heads = build_roi_heads 42 | cfg.build_box_head = build_box_head 43 | cfg.build_mask_head = build_mask_head 44 | cfg.build_point_head = build_point_head 45 | 46 | model = GeneralizedRCNN(cfg) 47 | 48 | return model 49 | -------------------------------------------------------------------------------- /playground/detection/cityscapes/rcnn/mask_rcnn_res101_fpn_coco_ms/README.md: -------------------------------------------------------------------------------- 1 | # Mask-RCNN 2 | ## Evaluation results for bbox: 3 | ``` 4 | Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.655 5 | ``` 6 | 7 | -------------------------------------------------------------------------------- /playground/detection/cityscapes/rcnn/mask_rcnn_res101_fpn_coco_ms/config.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | 3 | from cvpods.configs.rcnn_fpn_config import RCNNFPNConfig 4 | 5 | _config_dict = dict( 6 | MODEL=dict( 7 | WEIGHTS="/mnt/lustreold/share_data/lixiangtai/pretrained/R-101.pkl", 8 | MASK_ON=True, 9 | RESNETS=dict(DEPTH=101), 10 | ROI_HEADS=dict(NUM_CLASSES=8), 11 | ), 12 | DATASETS=dict( 13 | TRAIN=("cityscapes_fine_instance_seg_train",), 14 | TEST=("cityscapes_fine_instance_seg_test",), 15 | ), 16 | SOLVER=dict( 17 | IMS_PER_BATCH=8, 18 | IMS_PER_DEVICE=1, 19 | LR_SCHEDULER=dict( 20 | STEPS=(18000,), 21 | MAX_ITER=24000, 22 | ), 23 | OPTIMIZER=dict( 24 | BASE_LR=0.01, 25 | ), 26 | CHECKPOINT_PERIOD=8000, 27 | ), 28 | INPUT=dict( 29 | AUG=dict( 30 | TRAIN_PIPELINES=[ 31 | ("ResizeShortestEdge", dict( 32 | short_edge_length=(800, 832, 864, 896, 928, 960, 992, 1024), 33 | max_size=2048, sample_style="choice")), 34 | ("RandomFlip", dict()), 35 | ], 36 | TEST_PIPELINES=[ 37 | ("ResizeShortestEdge", dict( 38 | short_edge_length=1024, max_size=2048, sample_style="choice")), 39 | ], 40 | ), 41 | ), 42 | TEST=dict( 43 | EVAL_PEROID=10000, 44 | ), 45 | OUTPUT_DIR="output", 46 | ) 47 | 48 | 49 | class FasterRCNNConfig(RCNNFPNConfig): 50 | def __init__(self): 51 | super(FasterRCNNConfig, self).__init__() 52 | self._register_configuration(_config_dict) 53 | 54 | 55 | config = FasterRCNNConfig() 56 | -------------------------------------------------------------------------------- /playground/detection/cityscapes/rcnn/mask_rcnn_res101_fpn_coco_ms/net.py: -------------------------------------------------------------------------------- 1 | from cvpods.layers import ShapeSpec 2 | from cvpods.modeling.backbone import Backbone 3 | from cvpods.modeling.backbone.fpn import build_resnet_fpn_backbone 4 | from cvpods.modeling.proposal_generator import RPN 5 | from cvpods.modeling.roi_heads.box_head import FastRCNNConvFCHead 6 | from cvpods.modeling.roi_heads import StandardROIHeads 7 | from cvpods.modeling.meta_arch.rcnn import GeneralizedRCNN 8 | from cvpods.modeling.roi_heads.mask_head import MaskRCNNConvUpsampleHead 9 | 10 | 11 | def build_backbone(cfg, input_shape=None): 12 | if input_shape is None: 13 | input_shape = ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN)) 14 | backbone = build_resnet_fpn_backbone(cfg, input_shape) 15 | assert isinstance(backbone, Backbone) 16 | return backbone 17 | 18 | 19 | def build_proposal_generator(cfg, input_shape): 20 | return RPN(cfg, input_shape) 21 | 22 | 23 | def build_roi_heads(cfg, input_shape): 24 | return StandardROIHeads(cfg, input_shape) 25 | 26 | 27 | def build_box_head(cfg, input_shape): 28 | return FastRCNNConvFCHead(cfg, input_shape) 29 | 30 | 31 | def build_mask_head(cfg, input_shape): 32 | return MaskRCNNConvUpsampleHead(cfg, input_shape) 33 | 34 | 35 | def build_model(cfg): 36 | cfg.build_backbone = build_backbone 37 | cfg.build_proposal_generator = build_proposal_generator 38 | cfg.build_roi_heads = build_roi_heads 39 | cfg.build_box_head = build_box_head 40 | cfg.build_mask_head = build_mask_head 41 | 42 | model = GeneralizedRCNN(cfg) 43 | return model 44 | -------------------------------------------------------------------------------- /playground/detection/cityscapes/rcnn/mask_rcnn_res50_fpn_coco_ms/README.md: -------------------------------------------------------------------------------- 1 | # Mask-RCNN 2 | ## Evaluation results for bbox: 3 | ``` 4 | Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.655 5 | ``` 6 | 7 | -------------------------------------------------------------------------------- /playground/detection/cityscapes/rcnn/mask_rcnn_res50_fpn_coco_ms/config.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | 3 | from cvpods.configs.rcnn_fpn_config import RCNNFPNConfig 4 | 5 | _config_dict = dict( 6 | MODEL=dict( 7 | WEIGHTS="/mnt/lustreold/share_data/lixiangtai/pretrained/R-50.pkl", 8 | MASK_ON=True, 9 | RESNETS=dict(DEPTH=50), 10 | ROI_HEADS=dict(NUM_CLASSES=8), 11 | ), 12 | DATASETS=dict( 13 | TRAIN=("cityscapes_fine_instance_seg_train",), 14 | TEST=("cityscapes_fine_instance_seg_test",), 15 | ), 16 | SOLVER=dict( 17 | IMS_PER_BATCH=8, 18 | IMS_PER_DEVICE=1, 19 | LR_SCHEDULER=dict( 20 | STEPS=(18000,), 21 | MAX_ITER=24000, 22 | ), 23 | OPTIMIZER=dict( 24 | BASE_LR=0.01, 25 | ), 26 | CHECKPOINT_PERIOD=8000, 27 | ), 28 | INPUT=dict( 29 | AUG=dict( 30 | TRAIN_PIPELINES=[ 31 | ("ResizeShortestEdge", dict( 32 | short_edge_length=(800, 832, 864, 896, 928, 960, 992, 1024), 33 | max_size=2048, sample_style="choice")), 34 | ("RandomFlip", dict()), 35 | ], 36 | TEST_PIPELINES=[ 37 | ("ResizeShortestEdge", dict( 38 | short_edge_length=1024, max_size=2048, sample_style="choice")), 39 | ], 40 | ), 41 | ), 42 | TEST=dict( 43 | EVAL_PEROID=10000, 44 | ), 45 | OUTPUT_DIR="output", 46 | ) 47 | 48 | 49 | class FasterRCNNConfig(RCNNFPNConfig): 50 | def __init__(self): 51 | super(FasterRCNNConfig, self).__init__() 52 | self._register_configuration(_config_dict) 53 | 54 | 55 | config = FasterRCNNConfig() 56 | -------------------------------------------------------------------------------- /playground/detection/cityscapes/rcnn/mask_rcnn_res50_fpn_coco_ms/net.py: -------------------------------------------------------------------------------- 1 | from cvpods.layers import ShapeSpec 2 | from cvpods.modeling.backbone import Backbone 3 | from cvpods.modeling.backbone.fpn import build_resnet_fpn_backbone 4 | from cvpods.modeling.proposal_generator import RPN 5 | from cvpods.modeling.roi_heads.box_head import FastRCNNConvFCHead 6 | from cvpods.modeling.roi_heads import StandardROIHeads 7 | from cvpods.modeling.meta_arch.rcnn import GeneralizedRCNN 8 | from cvpods.modeling.roi_heads.mask_head import MaskRCNNConvUpsampleHead 9 | 10 | 11 | def build_backbone(cfg, input_shape=None): 12 | if input_shape is None: 13 | input_shape = ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN)) 14 | backbone = build_resnet_fpn_backbone(cfg, input_shape) 15 | assert isinstance(backbone, Backbone) 16 | return backbone 17 | 18 | 19 | def build_proposal_generator(cfg, input_shape): 20 | return RPN(cfg, input_shape) 21 | 22 | 23 | def build_roi_heads(cfg, input_shape): 24 | return StandardROIHeads(cfg, input_shape) 25 | 26 | 27 | def build_box_head(cfg, input_shape): 28 | return FastRCNNConvFCHead(cfg, input_shape) 29 | 30 | 31 | def build_mask_head(cfg, input_shape): 32 | return MaskRCNNConvUpsampleHead(cfg, input_shape) 33 | 34 | 35 | def build_model(cfg): 36 | cfg.build_backbone = build_backbone 37 | cfg.build_proposal_generator = build_proposal_generator 38 | cfg.build_roi_heads = build_roi_heads 39 | cfg.build_box_head = build_box_head 40 | cfg.build_mask_head = build_mask_head 41 | 42 | model = GeneralizedRCNN(cfg) 43 | return model 44 | -------------------------------------------------------------------------------- /playground/detection/cityscapes/rcnn/mask_rcnn_rx101_fpn_coco_ms/README.md: -------------------------------------------------------------------------------- 1 | # Mask-RCNN 2 | ## Evaluation results for bbox: 3 | ``` 4 | Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.655 5 | ``` 6 | 7 | -------------------------------------------------------------------------------- /playground/detection/cityscapes/rcnn/mask_rcnn_rx101_fpn_coco_ms/config.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | 3 | from cvpods.configs.rcnn_fpn_config import RCNNFPNConfig 4 | 5 | _config_dict = dict( 6 | MODEL=dict( 7 | WEIGHTS="/mnt/lustreold/share_data/lixiangtai/pretrained/X-101-64x4d.pkl", 8 | MASK_ON=True, 9 | RESNETS=dict( 10 | DEPTH=101, 11 | NUM_GROUPS=64, 12 | WIDTH_PER_GROUP=4, 13 | STRIDE_IN_1X1=False), 14 | ROI_HEADS=dict(NUM_CLASSES=8), 15 | ), 16 | DATASETS=dict( 17 | TRAIN=("cityscapes_fine_instance_seg_train",), 18 | TEST=("cityscapes_fine_instance_seg_test",), 19 | ), 20 | SOLVER=dict( 21 | IMS_PER_BATCH=8, 22 | IMS_PER_DEVICE=1, 23 | LR_SCHEDULER=dict( 24 | STEPS=(18000,), 25 | MAX_ITER=24000, 26 | ), 27 | OPTIMIZER=dict( 28 | BASE_LR=0.01, 29 | ), 30 | CHECKPOINT_PERIOD=8000, 31 | ), 32 | INPUT=dict( 33 | AUG=dict( 34 | TRAIN_PIPELINES=[ 35 | ("ResizeShortestEdge", dict( 36 | short_edge_length=(800, 832, 864, 896, 928, 960, 992, 1024), 37 | max_size=2048, sample_style="choice")), 38 | ("RandomFlip", dict()), 39 | ], 40 | TEST_PIPELINES=[ 41 | ("ResizeShortestEdge", dict( 42 | short_edge_length=1024, max_size=2048, sample_style="choice")), 43 | ], 44 | ), 45 | ), 46 | TEST=dict( 47 | EVAL_PEROID=10000, 48 | ), 49 | OUTPUT_DIR="output", 50 | ) 51 | 52 | 53 | class FasterRCNNConfig(RCNNFPNConfig): 54 | def __init__(self): 55 | super(FasterRCNNConfig, self).__init__() 56 | self._register_configuration(_config_dict) 57 | 58 | 59 | config = FasterRCNNConfig() 60 | -------------------------------------------------------------------------------- /playground/detection/cityscapes/rcnn/mask_rcnn_rx101_fpn_coco_ms/net.py: -------------------------------------------------------------------------------- 1 | from cvpods.layers import ShapeSpec 2 | from cvpods.modeling.backbone import Backbone 3 | from cvpods.modeling.backbone.fpn import build_resnet_fpn_backbone 4 | from cvpods.modeling.proposal_generator import RPN 5 | from cvpods.modeling.roi_heads.box_head import FastRCNNConvFCHead 6 | from cvpods.modeling.roi_heads import StandardROIHeads 7 | from cvpods.modeling.meta_arch.rcnn import GeneralizedRCNN 8 | from cvpods.modeling.roi_heads.mask_head import MaskRCNNConvUpsampleHead 9 | 10 | 11 | def build_backbone(cfg, input_shape=None): 12 | if input_shape is None: 13 | input_shape = ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN)) 14 | backbone = build_resnet_fpn_backbone(cfg, input_shape) 15 | assert isinstance(backbone, Backbone) 16 | return backbone 17 | 18 | 19 | def build_proposal_generator(cfg, input_shape): 20 | return RPN(cfg, input_shape) 21 | 22 | 23 | def build_roi_heads(cfg, input_shape): 24 | return StandardROIHeads(cfg, input_shape) 25 | 26 | 27 | def build_box_head(cfg, input_shape): 28 | return FastRCNNConvFCHead(cfg, input_shape) 29 | 30 | 31 | def build_mask_head(cfg, input_shape): 32 | return MaskRCNNConvUpsampleHead(cfg, input_shape) 33 | 34 | 35 | def build_model(cfg): 36 | cfg.build_backbone = build_backbone 37 | cfg.build_proposal_generator = build_proposal_generator 38 | cfg.build_roi_heads = build_roi_heads 39 | cfg.build_box_head = build_box_head 40 | cfg.build_mask_head = build_mask_head 41 | 42 | model = GeneralizedRCNN(cfg) 43 | return model 44 | -------------------------------------------------------------------------------- /playground/detection/coco/bs_mask/boundary_refine_mask_rcnn_r101_ms_1x_3_subgt_warpping_dice_erode_dilate_gn/config.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | 3 | from cvpods.configs.rcnn_fpn_config import RCNNFPNConfig 4 | 5 | _config_dict = dict( 6 | MODEL=dict( 7 | WEIGHTS="/mnt/lustreold/share_data/lixiangtai/pretrained/R-101.pkl", 8 | MASK_ON=True, 9 | RESNETS=dict(DEPTH=101), 10 | BOUNDARY_MASK_HEAD=dict( 11 | OUTPUT_RATIO=1, 12 | POOLER_RESOLUTION=28, 13 | IN_FEATURES=["p2"], 14 | NUM_CONV=2), 15 | ROI_MASK_HEAD=dict( 16 | CEMODULE=dict( 17 | NUM_CONV=2, 18 | PLANES=256, 19 | DCN_ON=True, 20 | DCN_V2=True, 21 | NUM_EDGE_CONV=2, 22 | FUSE_MODE="Add", 23 | WITH_EDGE_REFINE=True, 24 | NORM='GN', 25 | KERNEL_SIZE=5 26 | ), 27 | LOSS_WEIGHT=[1.0, 1.0, 1.0, 1.0] 28 | ), 29 | ), 30 | DATASETS=dict( 31 | TRAIN=("coco_2017_train",), 32 | TEST=("coco_2017_val",), 33 | ), 34 | SOLVER=dict( 35 | LR_SCHEDULER=dict( 36 | STEPS=(60000, 80000), 37 | MAX_ITER=90000, 38 | ), 39 | OPTIMIZER=dict( 40 | BASE_LR=0.02, 41 | ), 42 | IMS_PER_BATCH=16, 43 | IMS_PER_DEVICE=2, 44 | CHECKPOINT_PERIOD=30000, 45 | ), 46 | INPUT=dict( 47 | AUG=dict( 48 | TRAIN_PIPELINES=[ 49 | ("ResizeShortestEdge", 50 | dict(short_edge_length=(640, 672, 704, 736, 768, 800), 51 | max_size=1333, sample_style="choice")), 52 | ("RandomFlip", dict()), 53 | ], 54 | TEST_PIPELINES=[ 55 | ("ResizeShortestEdge", 56 | dict(short_edge_length=800, max_size=1333, sample_style="choice")), 57 | ], 58 | ) 59 | ), 60 | TEST=dict( 61 | EVAL_PEROID=10000, 62 | ), 63 | OUTPUT_DIR="output" 64 | ) 65 | 66 | 67 | class FasterRCNNConfig(RCNNFPNConfig): 68 | def __init__(self): 69 | super(FasterRCNNConfig, self).__init__() 70 | self._register_configuration(_config_dict) 71 | 72 | 73 | config = FasterRCNNConfig() 74 | -------------------------------------------------------------------------------- /playground/detection/coco/bs_mask/boundary_refine_mask_rcnn_r101_ms_1x_3_subgt_warpping_dice_erode_dilate_gn/net.py: -------------------------------------------------------------------------------- 1 | from cvpods.layers import ShapeSpec 2 | from cvpods.modeling.backbone import Backbone 3 | from cvpods.modeling.backbone.fpn import build_resnet_fpn_backbone 4 | from cvpods.modeling.proposal_generator import RPN 5 | from box_head import FastRCNNConvFCHead 6 | from boundary_mask_rcnn import BoundaryROIHeads, DecoupledBoundaryMaskHead 7 | from rcnn import GeneralizedRCNN 8 | 9 | 10 | def build_backbone(cfg, input_shape=None): 11 | if input_shape is None: 12 | input_shape = ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN)) 13 | backbone = build_resnet_fpn_backbone(cfg, input_shape) 14 | assert isinstance(backbone, Backbone) 15 | return backbone 16 | 17 | 18 | def build_proposal_generator(cfg, input_shape): 19 | return RPN(cfg, input_shape) 20 | 21 | 22 | def build_roi_heads(cfg, input_shape): 23 | return BoundaryROIHeads(cfg, input_shape) 24 | 25 | 26 | def build_box_head(cfg, input_shape): 27 | return FastRCNNConvFCHead(cfg, input_shape) 28 | 29 | 30 | def build_mask_head(cfg, input_shape): 31 | return DecoupledBoundaryMaskHead(cfg, input_shape) 32 | 33 | 34 | def build_model(cfg): 35 | cfg.build_backbone = build_backbone 36 | cfg.build_proposal_generator = build_proposal_generator 37 | cfg.build_roi_heads = build_roi_heads 38 | cfg.build_box_head = build_box_head 39 | cfg.build_mask_head = build_mask_head 40 | 41 | model = GeneralizedRCNN(cfg) 42 | return model 43 | -------------------------------------------------------------------------------- /playground/detection/coco/bs_mask/boundary_refine_mask_rcnn_r50_ms_1x_3_subgt_warpping_dice_erode_dilate_gn/config.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | 3 | from cvpods.configs.rcnn_fpn_config import RCNNFPNConfig 4 | 5 | _config_dict = dict( 6 | MODEL=dict( 7 | WEIGHTS="/data/data1/pretrained/R-50.pkl", 8 | MASK_ON=True, 9 | RESNETS=dict(DEPTH=50), 10 | BOUNDARY_MASK_HEAD=dict( 11 | OUTPUT_RATIO=1, 12 | POOLER_RESOLUTION=28, 13 | IN_FEATURES=["p2"], 14 | NUM_CONV=2), 15 | ROI_MASK_HEAD=dict( 16 | CEMODULE=dict( 17 | NUM_CONV=2, 18 | PLANES=256, 19 | DCN_ON=True, 20 | DCN_V2=True, 21 | NUM_EDGE_CONV=2, 22 | FUSE_MODE="Add", 23 | WITH_EDGE_REFINE=True, 24 | NORM='GN', 25 | KERNEL_SIZE=5 26 | ), 27 | LOSS_WEIGHT=[1.0, 1.0, 1.0, 1.0] 28 | ), 29 | ), 30 | DATASETS=dict( 31 | TRAIN=("coco_2017_train",), 32 | TEST=("coco_2017_val",), 33 | ), 34 | SOLVER=dict( 35 | LR_SCHEDULER=dict( 36 | STEPS=(60000, 80000), 37 | MAX_ITER=90000, 38 | ), 39 | OPTIMIZER=dict( 40 | BASE_LR=0.02, 41 | ), 42 | IMS_PER_BATCH=16, 43 | IMS_PER_DEVICE=2, 44 | CHECKPOINT_PERIOD=30000, 45 | ), 46 | INPUT=dict( 47 | AUG=dict( 48 | TRAIN_PIPELINES=[ 49 | ("ResizeShortestEdge", 50 | dict(short_edge_length=(640, 672, 704, 736, 768, 800), 51 | max_size=1333, sample_style="choice")), 52 | ("RandomFlip", dict()), 53 | ], 54 | TEST_PIPELINES=[ 55 | ("ResizeShortestEdge", 56 | dict(short_edge_length=800, max_size=1333, sample_style="choice")), 57 | ], 58 | ) 59 | ), 60 | TEST=dict( 61 | EVAL_PEROID=10000, 62 | ), 63 | OUTPUT_DIR="output" 64 | ) 65 | 66 | 67 | class FasterRCNNConfig(RCNNFPNConfig): 68 | def __init__(self): 69 | super(FasterRCNNConfig, self).__init__() 70 | self._register_configuration(_config_dict) 71 | 72 | 73 | config = FasterRCNNConfig() 74 | -------------------------------------------------------------------------------- /playground/detection/coco/bs_mask/boundary_refine_mask_rcnn_r50_ms_1x_3_subgt_warpping_dice_erode_dilate_gn/net.py: -------------------------------------------------------------------------------- 1 | from cvpods.layers import ShapeSpec 2 | from cvpods.modeling.backbone import Backbone 3 | from cvpods.modeling.backbone.fpn import build_resnet_fpn_backbone 4 | from cvpods.modeling.proposal_generator import RPN 5 | from box_head import FastRCNNConvFCHead 6 | from boundary_mask_rcnn import BoundaryROIHeads, DecoupledBoundaryMaskHead 7 | from rcnn import GeneralizedRCNN 8 | 9 | 10 | def build_backbone(cfg, input_shape=None): 11 | if input_shape is None: 12 | input_shape = ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN)) 13 | backbone = build_resnet_fpn_backbone(cfg, input_shape) 14 | assert isinstance(backbone, Backbone) 15 | return backbone 16 | 17 | 18 | def build_proposal_generator(cfg, input_shape): 19 | return RPN(cfg, input_shape) 20 | 21 | 22 | def build_roi_heads(cfg, input_shape): 23 | return BoundaryROIHeads(cfg, input_shape) 24 | 25 | 26 | def build_box_head(cfg, input_shape): 27 | return FastRCNNConvFCHead(cfg, input_shape) 28 | 29 | 30 | def build_mask_head(cfg, input_shape): 31 | return DecoupledBoundaryMaskHead(cfg, input_shape) 32 | 33 | 34 | def build_model(cfg): 35 | cfg.build_backbone = build_backbone 36 | cfg.build_proposal_generator = build_proposal_generator 37 | cfg.build_roi_heads = build_roi_heads 38 | cfg.build_box_head = build_box_head 39 | cfg.build_mask_head = build_mask_head 40 | 41 | model = GeneralizedRCNN(cfg) 42 | return model 43 | -------------------------------------------------------------------------------- /playground/detection/coco/bs_mask/cascade/boundary_refine_mask_rcnn_r50_ms_1x_3_subgt_warpping_dice_erode_dilate_gn_add_cascade/config.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | 3 | from cvpods.configs.rcnn_fpn_config import RCNNFPNConfig 4 | 5 | _config_dict = dict( 6 | MODEL=dict( 7 | WEIGHTS="/mnt/lustreold/share_data/lixiangtai/pretrained/R-50.pkl", 8 | MASK_ON=True, 9 | RESNETS=dict(DEPTH=50), 10 | BOUNDARY_MASK_HEAD=dict( 11 | OUTPUT_RATIO=1, 12 | POOLER_RESOLUTION=28, 13 | IN_FEATURES=["p2"], 14 | NUM_CONV=2), 15 | ROI_BOX_HEAD=dict( 16 | CLS_AGNOSTIC_BBOX_REG=True, 17 | ), 18 | ROI_MASK_HEAD=dict( 19 | CEMODULE=dict( 20 | NUM_CONV=2, 21 | PLANES=256, 22 | DCN_ON=True, 23 | DCN_V2=True, 24 | NUM_EDGE_CONV=2, 25 | FUSE_MODE="Add", 26 | WITH_EDGE_REFINE=True, 27 | NORM='GN', 28 | KERNEL_SIZE=5 29 | ), 30 | LOSS_WEIGHT=[1.0, 1.0, 1.0, 1.0] 31 | ), 32 | ), 33 | DATASETS=dict( 34 | TRAIN=("coco_2017_train",), 35 | TEST=("coco_2017_val",), 36 | ), 37 | SOLVER=dict( 38 | LR_SCHEDULER=dict( 39 | STEPS=(60000, 80000), 40 | MAX_ITER=90000, 41 | ), 42 | OPTIMIZER=dict( 43 | BASE_LR=0.02, 44 | ), 45 | IMS_PER_BATCH=16, 46 | IMS_PER_DEVICE=2, 47 | CHECKPOINT_PERIOD=30000, 48 | ), 49 | INPUT=dict( 50 | AUG=dict( 51 | TRAIN_PIPELINES=[ 52 | ("ResizeShortestEdge", 53 | dict(short_edge_length=(640, 672, 704, 736, 768, 800), 54 | max_size=1333, sample_style="choice")), 55 | ("RandomFlip", dict()), 56 | ], 57 | TEST_PIPELINES=[ 58 | ("ResizeShortestEdge", 59 | dict(short_edge_length=800, max_size=1333, sample_style="choice")), 60 | ], 61 | ) 62 | ), 63 | TEST=dict( 64 | EVAL_PEROID=10000, 65 | ), 66 | OUTPUT_DIR="output" 67 | ) 68 | 69 | 70 | class FasterRCNNConfig(RCNNFPNConfig): 71 | def __init__(self): 72 | super(FasterRCNNConfig, self).__init__() 73 | self._register_configuration(_config_dict) 74 | 75 | 76 | config = FasterRCNNConfig() 77 | -------------------------------------------------------------------------------- /playground/detection/coco/bs_mask/cascade/boundary_refine_mask_rcnn_r50_ms_1x_3_subgt_warpping_dice_erode_dilate_gn_add_cascade/net.py: -------------------------------------------------------------------------------- 1 | from cvpods.layers import ShapeSpec 2 | from cvpods.modeling.backbone import Backbone 3 | from cvpods.modeling.backbone.fpn import build_resnet_fpn_backbone 4 | from cvpods.modeling.proposal_generator import RPN 5 | from box_head import FastRCNNConvFCHead 6 | from boundary_mask_rcnn import CascadeBoundaryROIHeads, DecoupledBoundaryMaskHead 7 | from rcnn import GeneralizedRCNN 8 | 9 | 10 | def build_backbone(cfg, input_shape=None): 11 | if input_shape is None: 12 | input_shape = ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN)) 13 | backbone = build_resnet_fpn_backbone(cfg, input_shape) 14 | assert isinstance(backbone, Backbone) 15 | return backbone 16 | 17 | 18 | def build_proposal_generator(cfg, input_shape): 19 | return RPN(cfg, input_shape) 20 | 21 | 22 | def build_roi_heads(cfg, input_shape): 23 | return CascadeBoundaryROIHeads(cfg, input_shape) 24 | 25 | 26 | def build_box_head(cfg, input_shape): 27 | return FastRCNNConvFCHead(cfg, input_shape) 28 | 29 | 30 | def build_mask_head(cfg, input_shape): 31 | return DecoupledBoundaryMaskHead(cfg, input_shape) 32 | 33 | 34 | def build_model(cfg): 35 | cfg.build_backbone = build_backbone 36 | cfg.build_proposal_generator = build_proposal_generator 37 | cfg.build_roi_heads = build_roi_heads 38 | cfg.build_box_head = build_box_head 39 | cfg.build_mask_head = build_mask_head 40 | 41 | model = GeneralizedRCNN(cfg) 42 | return model 43 | -------------------------------------------------------------------------------- /playground/detection/coco/bs_mask/cascade/boundary_refine_mask_rcnn_r50_ms_3x_3_subgt_warpping_dice_erode_dilate_gn_add_cascade/config.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | 3 | from cvpods.configs.rcnn_fpn_config import RCNNFPNConfig 4 | 5 | _config_dict = dict( 6 | MODEL=dict( 7 | WEIGHTS="/mnt/lustreold/share_data/lixiangtai/pretrained/R-50.pkl", 8 | MASK_ON=True, 9 | RESNETS=dict(DEPTH=50), 10 | BOUNDARY_MASK_HEAD=dict( 11 | OUTPUT_RATIO=1, 12 | POOLER_RESOLUTION=28, 13 | IN_FEATURES=["p2"], 14 | NUM_CONV=2), 15 | ROI_BOX_HEAD=dict( 16 | CLS_AGNOSTIC_BBOX_REG=True, 17 | ), 18 | ROI_MASK_HEAD=dict( 19 | CEMODULE=dict( 20 | NUM_CONV=2, 21 | PLANES=256, 22 | DCN_ON=True, 23 | DCN_V2=True, 24 | NUM_EDGE_CONV=2, 25 | FUSE_MODE="Add", 26 | WITH_EDGE_REFINE=True, 27 | NORM='GN', 28 | KERNEL_SIZE=5 29 | ), 30 | LOSS_WEIGHT=[1.0, 1.0, 1.0, 1.0] 31 | ), 32 | ), 33 | DATASETS=dict( 34 | TRAIN=("coco_2017_train",), 35 | TEST=("coco_2017_val",), 36 | ), 37 | SOLVER=dict( 38 | LR_SCHEDULER=dict( 39 | STEPS=(210000, 250000), 40 | MAX_ITER=270000, 41 | ), 42 | OPTIMIZER=dict( 43 | BASE_LR=0.02, 44 | ), 45 | IMS_PER_BATCH=16, 46 | IMS_PER_DEVICE=2, 47 | CHECKPOINT_PERIOD=30000, 48 | ), 49 | INPUT=dict( 50 | AUG=dict( 51 | TRAIN_PIPELINES=[ 52 | ("ResizeShortestEdge", 53 | dict(short_edge_length=(640, 672, 704, 736, 768, 800), 54 | max_size=1333, sample_style="choice")), 55 | ("RandomFlip", dict()), 56 | ], 57 | TEST_PIPELINES=[ 58 | ("ResizeShortestEdge", 59 | dict(short_edge_length=800, max_size=1333, sample_style="choice")), 60 | ], 61 | ) 62 | ), 63 | TEST=dict( 64 | EVAL_PEROID=10000, 65 | ), 66 | OUTPUT_DIR="output" 67 | ) 68 | 69 | 70 | class FasterRCNNConfig(RCNNFPNConfig): 71 | def __init__(self): 72 | super(FasterRCNNConfig, self).__init__() 73 | self._register_configuration(_config_dict) 74 | 75 | 76 | config = FasterRCNNConfig() 77 | -------------------------------------------------------------------------------- /playground/detection/coco/bs_mask/cascade/boundary_refine_mask_rcnn_r50_ms_3x_3_subgt_warpping_dice_erode_dilate_gn_add_cascade/net.py: -------------------------------------------------------------------------------- 1 | from cvpods.layers import ShapeSpec 2 | from cvpods.modeling.backbone import Backbone 3 | from cvpods.modeling.backbone.fpn import build_resnet_fpn_backbone 4 | from cvpods.modeling.proposal_generator import RPN 5 | from box_head import FastRCNNConvFCHead 6 | from boundary_mask_rcnn import CascadeBoundaryROIHeads, DecoupledBoundaryMaskHead 7 | from rcnn import GeneralizedRCNN 8 | 9 | 10 | def build_backbone(cfg, input_shape=None): 11 | if input_shape is None: 12 | input_shape = ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN)) 13 | backbone = build_resnet_fpn_backbone(cfg, input_shape) 14 | assert isinstance(backbone, Backbone) 15 | return backbone 16 | 17 | 18 | def build_proposal_generator(cfg, input_shape): 19 | return RPN(cfg, input_shape) 20 | 21 | 22 | def build_roi_heads(cfg, input_shape): 23 | return CascadeBoundaryROIHeads(cfg, input_shape) 24 | 25 | 26 | def build_box_head(cfg, input_shape): 27 | return FastRCNNConvFCHead(cfg, input_shape) 28 | 29 | 30 | def build_mask_head(cfg, input_shape): 31 | return DecoupledBoundaryMaskHead(cfg, input_shape) 32 | 33 | 34 | def build_model(cfg): 35 | cfg.build_backbone = build_backbone 36 | cfg.build_proposal_generator = build_proposal_generator 37 | cfg.build_roi_heads = build_roi_heads 38 | cfg.build_box_head = build_box_head 39 | cfg.build_mask_head = build_mask_head 40 | 41 | model = GeneralizedRCNN(cfg) 42 | return model 43 | -------------------------------------------------------------------------------- /playground/detection/coco/bs_mask/cascade/boundary_refine_mask_rcnn_rx101_ms_20e_3_subgt_warpping_dice_erode_dilate_gn_add_cascade_1600multi/config.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | 3 | from cvpods.configs.rcnn_fpn_config import RCNNFPNConfig 4 | 5 | _config_dict = dict( 6 | MODEL=dict( 7 | WEIGHTS="/mnt/lustreold/share_data/lixiangtai/pretrained/X-101-64x4d.pkl", 8 | MASK_ON=True, 9 | RESNETS=dict( 10 | DEPTH=101, 11 | NUM_GROUPS=64, 12 | WIDTH_PER_GROUP=4, 13 | STRIDE_IN_1X1=False), 14 | BOUNDARY_MASK_HEAD=dict( 15 | OUTPUT_RATIO=1, 16 | POOLER_RESOLUTION=28, 17 | IN_FEATURES=["p2"], 18 | NUM_CONV=2), 19 | ROI_BOX_HEAD=dict( 20 | CLS_AGNOSTIC_BBOX_REG=True, 21 | ), 22 | ROI_MASK_HEAD=dict( 23 | CEMODULE=dict( 24 | NUM_CONV=2, 25 | PLANES=256, 26 | DCN_ON=True, 27 | DCN_V2=True, 28 | NUM_EDGE_CONV=2, 29 | FUSE_MODE="Add", 30 | WITH_EDGE_REFINE=True, 31 | NORM='GN', 32 | KERNEL_SIZE=5 33 | ), 34 | LOSS_WEIGHT=[1.0, 1.0, 1.0, 1.0] 35 | ), 36 | ), 37 | DATASETS=dict( 38 | TRAIN=("coco_2017_train",), 39 | TEST=("coco_2017_val",), 40 | ), 41 | SOLVER=dict( 42 | LR_SCHEDULER=dict( 43 | STEPS=(120000, 140000), 44 | MAX_ITER=150000, 45 | ), 46 | OPTIMIZER=dict( 47 | BASE_LR=0.02, 48 | ), 49 | IMS_PER_BATCH=16, 50 | IMS_PER_DEVICE=2, 51 | CHECKPOINT_PERIOD=50000, 52 | ), 53 | INPUT=dict( 54 | AUG=dict( 55 | TRAIN_PIPELINES=[ 56 | ("ResizeShortestEdge", 57 | dict(short_edge_length=(416, 448, 480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800, 832, 864, 58 | 896, 928, 960, 992, 1024, 1056, 1088, 1120, 1152, 1184, 1216, 1248, 59 | 1280, 1312, 1344, 1376), 60 | max_size=1600, sample_style="choice")), 61 | ("RandomFlip", dict()), 62 | ], 63 | TEST_PIPELINES=[ 64 | ("ResizeShortestEdge", 65 | dict(short_edge_length=800, max_size=1333, sample_style="choice")), 66 | ], 67 | ) 68 | ), 69 | TEST=dict( 70 | EVAL_PEROID=10000, 71 | ), 72 | OUTPUT_DIR="output" 73 | ) 74 | 75 | 76 | class FasterRCNNConfig(RCNNFPNConfig): 77 | def __init__(self): 78 | super(FasterRCNNConfig, self).__init__() 79 | self._register_configuration(_config_dict) 80 | 81 | 82 | config = FasterRCNNConfig() 83 | -------------------------------------------------------------------------------- /playground/detection/coco/bs_mask/cascade/boundary_refine_mask_rcnn_rx101_ms_20e_3_subgt_warpping_dice_erode_dilate_gn_add_cascade_1600multi/net.py: -------------------------------------------------------------------------------- 1 | from cvpods.layers import ShapeSpec 2 | from cvpods.modeling.backbone import Backbone 3 | from cvpods.modeling.backbone.fpn import build_resnet_fpn_backbone 4 | from cvpods.modeling.proposal_generator import RPN 5 | from box_head import FastRCNNConvFCHead 6 | from boundary_mask_rcnn import CascadeBoundaryROIHeads, DecoupledBoundaryMaskHead 7 | from rcnn import GeneralizedRCNN 8 | 9 | 10 | def build_backbone(cfg, input_shape=None): 11 | if input_shape is None: 12 | input_shape = ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN)) 13 | backbone = build_resnet_fpn_backbone(cfg, input_shape) 14 | assert isinstance(backbone, Backbone) 15 | return backbone 16 | 17 | 18 | def build_proposal_generator(cfg, input_shape): 19 | return RPN(cfg, input_shape) 20 | 21 | 22 | def build_roi_heads(cfg, input_shape): 23 | return CascadeBoundaryROIHeads(cfg, input_shape) 24 | 25 | 26 | def build_box_head(cfg, input_shape): 27 | return FastRCNNConvFCHead(cfg, input_shape) 28 | 29 | 30 | def build_mask_head(cfg, input_shape): 31 | return DecoupledBoundaryMaskHead(cfg, input_shape) 32 | 33 | 34 | def build_model(cfg): 35 | cfg.build_backbone = build_backbone 36 | cfg.build_proposal_generator = build_proposal_generator 37 | cfg.build_roi_heads = build_roi_heads 38 | cfg.build_box_head = build_box_head 39 | cfg.build_mask_head = build_mask_head 40 | 41 | model = GeneralizedRCNN(cfg) 42 | return model 43 | -------------------------------------------------------------------------------- /tools/cat_visualizer_results.py: -------------------------------------------------------------------------------- 1 | import os 2 | import os.path as osp 3 | import cv2 4 | import argparse 5 | 6 | 7 | def get_args(): 8 | parser = argparse.ArgumentParser() 9 | parser.add_argument('--file1', type=str, required=True) 10 | parser.add_argument('--file2', type=str, required=True) 11 | parser.add_argument('--save_file', type=str, required=True) 12 | 13 | args = parser.parse_args() 14 | 15 | return args 16 | 17 | 18 | def main(): 19 | 20 | args = get_args() 21 | sub_file_names = os.listdir(args.file1) 22 | os.makedirs(args.save_file, exist_ok=True) 23 | for sub_file in sub_file_names: 24 | if sub_file == '.DS_Store': 25 | continue 26 | images = os.listdir(osp.join(args.file1, sub_file)) 27 | for image in images: 28 | if image == '.DS_Store': 29 | continue 30 | save_image_name = osp.join(args.save_file, sub_file+'_'+image) 31 | img1 = cv2.imread(osp.join(args.file1, sub_file, image)) 32 | img2 = cv2.imread(osp.join(args.file2, sub_file, image)) 33 | img = cv2.hconcat((img1, img2)) 34 | cv2.imwrite(save_image_name, img) 35 | print(f'{image} done') 36 | print(f'{sub_file} done') 37 | 38 | 39 | if __name__ == '__main__': 40 | main() -------------------------------------------------------------------------------- /tools/convert_detr2cvpod.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | """ 3 | Helper script to convert models trained with the main version of DETR to be used with the Detectron2 version. 4 | """ 5 | import json 6 | import argparse 7 | 8 | import numpy as np 9 | import torch 10 | 11 | 12 | def parse_args(): 13 | parser = argparse.ArgumentParser("D2 model converter") 14 | 15 | parser.add_argument("--source_model", default="", type=str, help="Path or url to the DETR model to convert") 16 | parser.add_argument("--output_model", default="", type=str, help="Path where to save the converted model") 17 | return parser.parse_args() 18 | 19 | 20 | def main(): 21 | args = parse_args() 22 | 23 | # D2 expects contiguous classes, so we need to remap the 92 classes from DETR 24 | # fmt: off 25 | coco_idx = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26 | 27, 28, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47, 48, 49, 50, 51, 27 | 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 67, 70, 72, 73, 74, 75, 76, 77, 28 | 78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90, 91] 29 | # fmt: on 30 | 31 | coco_idx = np.array(coco_idx) 32 | 33 | if args.source_model.startswith("https"): 34 | checkpoint = torch.hub.load_state_dict_from_url(args.source_model, map_location="cpu", check_hash=True) 35 | else: 36 | checkpoint = torch.load(args.source_model, map_location="cpu") 37 | model_to_convert = checkpoint["model"] 38 | 39 | model_converted = {} 40 | for k in model_to_convert.keys(): 41 | old_k = k 42 | if "backbone" in k: 43 | k = k.replace("backbone.0.body.", "") 44 | if "layer" not in k: 45 | k = "stem." + k 46 | for t in [1, 2, 3, 4]: 47 | k = k.replace(f"layer{t}", f"res{t + 1}") 48 | for t in [1, 2, 3]: 49 | k = k.replace(f"bn{t}", f"conv{t}.norm") 50 | k = k.replace("downsample.0", "shortcut") 51 | k = k.replace("downsample.1", "shortcut.norm") 52 | k = "backbone.0.backbone." + k 53 | k = "detr." + k 54 | print(old_k, "->", k) 55 | if "class_embed" in old_k: 56 | v = model_to_convert[old_k].detach() 57 | if v.shape[0] == 92: 58 | shape_old = v.shape 59 | model_converted[k] = v[coco_idx] 60 | print("Head conversion: changing shape from {} to {}".format(shape_old, model_converted[k].shape)) 61 | continue 62 | model_converted[k] = model_to_convert[old_k].detach() 63 | 64 | model_to_save = {"model": model_converted} 65 | torch.save(model_to_save, args.output_model) 66 | 67 | 68 | if __name__ == "__main__": 69 | main() 70 | -------------------------------------------------------------------------------- /tools/convert_to_d2.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | import torch 4 | 5 | 6 | def parse_args(): 7 | parser = argparse.ArgumentParser("D2 model converter") 8 | 9 | parser.add_argument("--source_model", default="", type=str, help="Path or url to the model to convert") 10 | parser.add_argument("--output_model", default="", type=str, help="Path where to save the converted model") 11 | return parser.parse_args() 12 | 13 | 14 | def main(): 15 | args = parse_args() 16 | 17 | source_weights = torch.load(args.source_model)["model"] 18 | converted_weights = {} 19 | keys = list(source_weights.keys()) 20 | 21 | prefix = 'backbone.bottom_up.' 22 | for key in keys: 23 | converted_weights[prefix + key] = source_weights[key] 24 | 25 | torch.save(converted_weights, args.output_model) 26 | 27 | 28 | if __name__ == "__main__": 29 | main() 30 | 31 | 32 | -------------------------------------------------------------------------------- /tools/draw_teaser.py: -------------------------------------------------------------------------------- 1 | # if __name__ == '__main__': 2 | import numpy as np 3 | import cv2 4 | import torch 5 | import torch.nn.functional as F 6 | from PIL import Image 7 | 8 | img_path = '/Users/hhe/research/mm2021_cenet/teaser/test_teaser.jpg' 9 | 10 | mask = Image.open(img_path) 11 | mask = np.array(mask) 12 | mask[mask > 127] = 255 13 | mask[mask <= 127] = 0 14 | mask[mask == 255] = 1 15 | mask = mask.astype(np.float32) 16 | laplacian_kernel = torch.tensor([-1, -1, -1, -1, 8, -1, -1, -1, -1], dtype=torch.float32).reshape(1, 1, 3, 3).requires_grad_(False) 17 | mask = torch.from_numpy(mask) 18 | mask = mask.unsqueeze(0) 19 | boundary_masks = F.conv2d(mask.unsqueeze(1), laplacian_kernel, padding=1) 20 | boundary_masks = boundary_masks.clamp(min=0) 21 | boundary_masks[boundary_masks > 0.1] = 1 22 | boundary_masks[boundary_masks <= 0.1] = 0 23 | 24 | boundary_masks = boundary_masks.squeeze().cpu().numpy().astype('uint8') 25 | mask = mask.squeeze() 26 | mask = mask.cpu().numpy().astype('uint8') 27 | kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (11, 11)) 28 | 29 | boundary_valid = boundary_masks == 1 30 | dilate = cv2.dilate(mask, kernel=kernel) 31 | contraction_mask = dilate - mask 32 | contraction_mask[boundary_valid] = 1 33 | contraction_mask_rgb = np.zeros([*contraction_mask.shape, 3]) 34 | contraction_mask_rgb[contraction_mask == 1] = [0, 0, 255] 35 | 36 | erode = cv2.erode(mask, kernel=kernel) 37 | expansion_mask = mask - erode 38 | expansion_mask[boundary_valid] = 1 39 | expansion_mask_rgb = np.zeros([*expansion_mask.shape, 3]) 40 | expansion_mask_rgb[expansion_mask == 1] = [255, 0, 0] 41 | 42 | boundary_masks[boundary_masks == 1] = 255 43 | 44 | contour, _ = cv2.findContours(mask, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) 45 | boundary = np.zeros_like(mask) 46 | boundary = cv2.drawContours(boundary, contour, -1, 1, 2) 47 | boundary = boundary.astype(np.float) 48 | boundary[boundary == 1] = 255 49 | cv2.imwrite('/Users/hhe/research/mm2021_cenet/teaser/contraction.jpg', contraction_mask_rgb) 50 | cv2.imwrite('/Users/hhe/research/mm2021_cenet/teaser/expansion.jpg', expansion_mask_rgb) 51 | cv2.imwrite('/Users/hhe/research/mm2021_cenet/teaser/boundary2.jpg', boundary_masks) 52 | cv2.imwrite('/Users/hhe/research/mm2021_cenet/teaser/boundary.jpg', boundary) 53 | -------------------------------------------------------------------------------- /tools/image2gif.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import os.path as osp 4 | import imageio 5 | import cv2 6 | 7 | def get_args(): 8 | parser = argparse.ArgumentParser() 9 | parser.add_argument('--image_path', required=True) 10 | parser.add_argument('--save_path', required=True) 11 | parser.add_argument('--img_height', type=int, default=360) 12 | parser.add_argument('--img_width', type=int, default=640) 13 | parser.add_argument('--fps', default=10, type=int) 14 | 15 | args = parser.parse_args() 16 | 17 | return args 18 | 19 | def main(): 20 | args = get_args() 21 | 22 | images = os.listdir(args.image_path) 23 | images = sorted(images) 24 | gif_images = [] 25 | for image in images: 26 | if image == '.DS_Store': 27 | continue 28 | img = imageio.imread(osp.join(args.image_path, image)) 29 | img = cv2.resize(img, (args.img_width, args.img_height)) 30 | gif_images.append(img) 31 | imageio.mimsave(args.save_path, gif_images, fps=args.fps) 32 | 33 | 34 | if __name__ == '__main__': 35 | main() -------------------------------------------------------------------------------- /tools/rm_files.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding:utf-8 -*- 3 | 4 | import argparse 5 | import os 6 | import re 7 | from colorama import Fore, Style 8 | 9 | 10 | def remove_parser(): 11 | parser = argparse.ArgumentParser() 12 | parser.add_argument("--start-iter", "-s", type=int, default=0, help="start iter to remove") 13 | parser.add_argument("--end-iter", "-e", type=int, default=0, help="end iter to remove") 14 | parser.add_argument("--prefix", "-p", type=str, default="model_", 15 | help="prefix of model to remove") 16 | parser.add_argument("--dir", "-d", type=str, default="/data/Outputs", 17 | help="dir to remove pth model") 18 | parser.add_argument("--real", "-r", action="store_true", 19 | help="really delete or just show what you will delete") 20 | return parser 21 | 22 | 23 | def remove_files(args): 24 | start = args.start_iter 25 | end = args.end_iter 26 | prefix = args.prefix 27 | for folder, _, files in os.walk(args.dir): 28 | # l = [x for x in f if x.endswith(".pth")] 29 | models = [f for f in files if re.search(prefix + r"[0123456789]*\.pth", f)] 30 | delete = [os.path.join(folder, model) for model in models 31 | if start <= int(model[len(prefix):-len(".pth")]) <= end] 32 | if delete: 33 | for f in delete: 34 | if args.real: 35 | print(f"remove {f}") 36 | os.remove(f) 37 | else: 38 | print(f"you may remove {f}") 39 | if not args.real: 40 | print(Fore.RED + "use --real parameter to really delete models" + Style.RESET_ALL) 41 | 42 | 43 | def main(): 44 | args = remove_parser().parse_args() 45 | remove_files(args) 46 | 47 | 48 | if __name__ == "__main__": 49 | main() 50 | --------------------------------------------------------------------------------