├── .gitignore
├── LICENSE
├── README.md
├── cvpods
    ├── __init__.py
    ├── checkpoint
    │   ├── __init__.py
    │   ├── catalog.py
    │   ├── checkpoint.py
    │   ├── detection_checkpoint.py
    │   └── model_loading.py
    ├── configs
    │   ├── base_classification_config.py
    │   ├── base_config.py
    │   ├── base_detection_config.py
    │   ├── blendmask_config.py
    │   ├── centernet2_config.py
    │   ├── centernet_config.py
    │   ├── centernet_det2_config.py
    │   ├── conditionInst_config.py
    │   ├── config_helper.py
    │   ├── deeplab_config.py
    │   ├── dynamic_routing_config.py
    │   ├── efficientdet_config.py
    │   ├── fcos_config.py
    │   ├── fcos_sepc_config.py
    │   ├── keypoint_config.py
    │   ├── ovr_cnn_config.py
    │   ├── panoptic_deeplab.py
    │   ├── panoptic_fcn.py
    │   ├── panoptic_fpn.py
    │   ├── pointrend_config.py
    │   ├── rcnn_config.py
    │   ├── rcnn_fpn_config.py
    │   ├── retinanet_config.py
    │   ├── retinanet_sepc_config.py
    │   ├── sipmask_config.py
    │   ├── solo_config.py
    │   ├── solov2_config.py
    │   ├── sparse_rcnn_config.py
    │   ├── ssd_config.py
    │   └── yolo_config.py
    ├── data
    │   ├── __init__.py
    │   ├── base_dataset.py
    │   ├── build.py
    │   ├── datasets
    │   │   ├── __init__.py
    │   │   ├── builtin_meta.py
    │   │   ├── citypersons.py
    │   │   ├── cityscapes.py
    │   │   ├── coco.py
    │   │   ├── coco_captions.py
    │   │   ├── crowdhuman.py
    │   │   ├── ext
    │   │   │   ├── __init__.py
    │   │   │   ├── mask.py
    │   │   │   └── ytvos.py
    │   │   ├── imagenet.py
    │   │   ├── lvis.py
    │   │   ├── ovis.py
    │   │   ├── paths_route.py
    │   │   ├── target_generator.py
    │   │   ├── voc.py
    │   │   ├── widerface.py
    │   │   └── youtubevis.py
    │   ├── detection_utils.py
    │   ├── registry.py
    │   ├── samplers
    │   │   ├── __init__.py
    │   │   ├── distributed_sampler.py
    │   │   ├── grouped_batch_sampler.py
    │   │   └── sampler.py
    │   ├── transforms
    │   │   ├── __init__.py
    │   │   ├── transform.py
    │   │   ├── transform_gen.py
    │   │   └── transform_util.py
    │   └── wrapped_dataset.py
    ├── engine
    │   ├── __init__.py
    │   ├── hooks.py
    │   ├── launch.py
    │   ├── predictor.py
    │   ├── setup.py
    │   └── trainer.py
    ├── evaluation
    │   ├── __init__.py
    │   ├── citypersons_evaluation.py
    │   ├── cityscapes_evaluation.py
    │   ├── classification_evaluation.py
    │   ├── coco_evaluation.py
    │   ├── coco_evaluation_hack.py
    │   ├── crowdhuman_evaluation.py
    │   ├── crowdhumantools.py
    │   ├── eval_MR_multisetup.py
    │   ├── evaluator.py
    │   ├── evaluator_hack.py
    │   ├── lvis_evaluation.py
    │   ├── panoptic_evaluation.py
    │   ├── pascal_voc_evaluation.py
    │   ├── rotated_coco_evaluation.py
    │   ├── sem_seg_evaluation.py
    │   ├── testing.py
    │   ├── widerface_evaluation.py
    │   ├── widerfacetools.py
    │   └── youtubevis_evaluation.py
    ├── export
    │   ├── README.md
    │   ├── __init__.py
    │   ├── api.py
    │   ├── c10.py
    │   ├── caffe2_export.py
    │   ├── caffe2_inference.py
    │   ├── caffe2_modeling.py
    │   ├── patcher.py
    │   └── shared.py
    ├── layers
    │   ├── __init__.py
    │   ├── activation_funcs.py
    │   ├── aspp.py
    │   ├── batch_norm.py
    │   ├── blocks.py
    │   ├── border_align.py
    │   ├── box_ops.py
    │   ├── cond_conv.py
    │   ├── conv_with_kaiming_uniform.py
    │   ├── crop_split.py
    │   ├── crop_split_gt.py
    │   ├── csrc
    │   │   ├── README.md
    │   │   ├── ROIAlign
    │   │   │   ├── ROIAlign.h
    │   │   │   ├── ROIAlign_cpu.cpp
    │   │   │   └── ROIAlign_cuda.cu
    │   │   ├── ROIAlignRotated
    │   │   │   ├── ROIAlignRotated.h
    │   │   │   ├── ROIAlignRotated_cpu.cpp
    │   │   │   └── ROIAlignRotated_cuda.cu
    │   │   ├── SwapAlign2Nat
    │   │   │   ├── SwapAlign2Nat.h
    │   │   │   └── SwapAlign2Nat_cuda.cu
    │   │   ├── border_align
    │   │   │   ├── border_align.h
    │   │   │   └── border_align_kernel.cu
    │   │   ├── box_iou_rotated
    │   │   │   ├── box_iou_rotated.h
    │   │   │   ├── box_iou_rotated_cpu.cpp
    │   │   │   ├── box_iou_rotated_cuda.cu
    │   │   │   └── box_iou_rotated_utils.h
    │   │   ├── correlation
    │   │   │   ├── correlation.h
    │   │   │   └── correlation_kernel.cu
    │   │   ├── crop_split
    │   │   │   ├── crop_split.h
    │   │   │   └── crop_split_kernel.cu
    │   │   ├── crop_split_gt
    │   │   │   ├── crop_split_gt.h
    │   │   │   └── crop_split_gt_kernel.cu
    │   │   ├── cuda_version.cu
    │   │   ├── deformable
    │   │   │   ├── deform_conv.h
    │   │   │   ├── deform_conv_cuda.cu
    │   │   │   └── deform_conv_cuda_kernel.cu
    │   │   ├── deformable_attn
    │   │   │   ├── ms_deform_attn.h
    │   │   │   ├── ms_deform_attn_cuda.cu
    │   │   │   └── ms_deform_im2col_cuda.cuh
    │   │   ├── masked_conv2d
    │   │   │   ├── masked_conv2d.h
    │   │   │   └── masked_conv2d_cuda.cu
    │   │   ├── ml_nms
    │   │   │   ├── ml_nms.cu
    │   │   │   └── ml_nms.h
    │   │   ├── nms_rotated
    │   │   │   ├── nms_rotated.h
    │   │   │   ├── nms_rotated_cpu.cpp
    │   │   │   └── nms_rotated_cuda.cu
    │   │   ├── sigmoid_focal_loss
    │   │   │   ├── SigmoidFocalLoss.h
    │   │   │   └── SigmoidFocalLoss_cuda.cu
    │   │   └── vision.cpp
    │   ├── deform_conv.py
    │   ├── deform_conv_with_off.py
    │   ├── deform_unfold_module.py
    │   ├── deformable_pytorch.py
    │   ├── dynamic_conv.py
    │   ├── dynamic_conv_with_condition_dcn_atten.py
    │   ├── dynamic_conv_with_dcn.py
    │   ├── dynamic_weights.py
    │   ├── feature_align.py
    │   ├── fix_conv.py
    │   ├── mask_ops.py
    │   ├── masked_conv.py
    │   ├── misc.py
    │   ├── ms_deform_attn.py
    │   ├── naive_group_norm.py
    │   ├── nms.py
    │   ├── panopitc_deeplab.py
    │   ├── point_transformer.py
    │   ├── position_encoding.py
    │   ├── roi_align.py
    │   ├── roi_align_rotated.py
    │   ├── rotated_boxes.py
    │   ├── saconv.py
    │   ├── shape_spec.py
    │   ├── swap_align2nat.py
    │   ├── swtichable_conv.py
    │   └── wrappers.py
    ├── modeling
    │   ├── __init__.py
    │   ├── anchor_generator.py
    │   ├── backbone
    │   │   ├── __init__.py
    │   │   ├── backbone.py
    │   │   ├── bifpn.py
    │   │   ├── bifpn_fcos.py
    │   │   ├── centernet2_bifpn.py
    │   │   ├── darknet.py
    │   │   ├── deformable_transformer.py
    │   │   ├── dla.py
    │   │   ├── dlafpn.py
    │   │   ├── dynamic_arch
    │   │   │   ├── __init__.py
    │   │   │   ├── cal_op_flops.py
    │   │   │   ├── dynamic_backbone.py
    │   │   │   ├── dynamic_cell.py
    │   │   │   └── op_with_flops.py
    │   │   ├── efficientnet.py
    │   │   ├── fpn.py
    │   │   ├── res2net.py
    │   │   ├── resnet.py
    │   │   ├── sepc.py
    │   │   ├── sf_fpn.py
    │   │   ├── splat.py
    │   │   ├── swin.py
    │   │   ├── transformer.py
    │   │   └── vgg.py
    │   ├── basenet
    │   │   ├── __init__.py
    │   │   └── basenet.py
    │   ├── box_regression.py
    │   ├── losses
    │   │   ├── __init__.py
    │   │   ├── circle_loss.py
    │   │   ├── dice_loss.py
    │   │   ├── flow_loss.py
    │   │   ├── focal_loss.py
    │   │   ├── iou_loss.py
    │   │   ├── reg_l1_loss.py
    │   │   ├── sigmoid_focal_loss.py
    │   │   ├── smooth_l1_loss.py
    │   │   └── sparse_rcnn_loss.py
    │   ├── matcher.py
    │   ├── meta_arch
    │   │   ├── __init__.py
    │   │   ├── borderdet.py
    │   │   ├── boundary_mask_rcnn
    │   │   │   ├── __init__.py
    │   │   │   ├── boundary_mask_rcnn.py
    │   │   │   └── boundary_track_mask_rcnn.py
    │   │   ├── centernet.py
    │   │   ├── conditionalInst
    │   │   │   ├── __init__.py
    │   │   │   ├── conditionalInst.py
    │   │   │   ├── conditionalInst_boundary.py
    │   │   │   ├── conditionalInst_boundary_decouple.py
    │   │   │   ├── conditionalInst_boundary_decouplev2.py
    │   │   │   └── conditionalInst_sipmask_track.py
    │   │   ├── deeplab.py
    │   │   ├── deformable_detr.py
    │   │   ├── detr
    │   │   │   ├── __init__.py
    │   │   │   ├── detr.py
    │   │   │   ├── detr_tracking.py
    │   │   │   └── segmentation.py
    │   │   ├── dynamic4seg.py
    │   │   ├── efficientdet.py
    │   │   ├── fcn.py
    │   │   ├── fcos
    │   │   │   ├── __init__.py
    │   │   │   ├── fcos.py
    │   │   │   └── fcos_sepc.py
    │   │   ├── flownet
    │   │   │   ├── flownetc.py
    │   │   │   ├── flownets.py
    │   │   │   ├── two_stream.py
    │   │   │   └── util.py
    │   │   ├── imagenet.py
    │   │   ├── mask_scoring_rcnn.py
    │   │   ├── moco.py
    │   │   ├── panoptic_deeplab.py
    │   │   ├── panoptic_fpn.py
    │   │   ├── panopticfcn
    │   │   │   ├── __init__.py
    │   │   │   ├── gt_generate.py
    │   │   │   ├── heads.py
    │   │   │   ├── panoptic_fcn.py
    │   │   │   └── utils.py
    │   │   ├── pointrend.py
    │   │   ├── rcnn.py
    │   │   ├── reppoints.py
    │   │   ├── retinanet
    │   │   │   ├── __init__.py
    │   │   │   ├── retinanet.py
    │   │   │   └── retinanet_sepc.py
    │   │   ├── semantic_seg.py
    │   │   ├── sipmask.py
    │   │   ├── solo
    │   │   │   ├── __init__.py
    │   │   │   ├── solo.py
    │   │   │   ├── solo_decoupled.py
    │   │   │   ├── solov2.py
    │   │   │   └── utils.py
    │   │   ├── sparsercnn
    │   │   │   ├── __init__.py
    │   │   │   └── sparse_rcnn.py
    │   │   ├── ssd.py
    │   │   ├── tensormask.py
    │   │   └── yolov3.py
    │   ├── nn_utils
    │   │   ├── __init__.py
    │   │   ├── activation_count.py
    │   │   ├── feature_utils.py
    │   │   ├── flop_count.py
    │   │   ├── jit_handles.py
    │   │   ├── parameter_count.py
    │   │   ├── precise_bn.py
    │   │   ├── scale_grad.py
    │   │   └── weight_init.py
    │   ├── poolers.py
    │   ├── postprocessing.py
    │   ├── proposal_generator
    │   │   ├── __init__.py
    │   │   ├── build.py
    │   │   ├── fcos.py
    │   │   ├── fcos_outputs.py
    │   │   ├── fcos_sip_mask.py
    │   │   ├── proposal_utils.py
    │   │   ├── rpn.py
    │   │   ├── rpn_outputs.py
    │   │   ├── rrpn.py
    │   │   └── rrpn_outputs.py
    │   ├── roi_heads
    │   │   ├── __init__.py
    │   │   ├── box_head.py
    │   │   ├── cascade_rcnn.py
    │   │   ├── fast_rcnn.py
    │   │   ├── keypoint_head.py
    │   │   ├── mask_head.py
    │   │   ├── mask_iou_head.py
    │   │   ├── mask_scoring_roi_head.py
    │   │   ├── refine_mask_head.py
    │   │   ├── refine_roi_head.py
    │   │   ├── roi_heads.py
    │   │   ├── roi_heads_visua_hack.py
    │   │   ├── rotated_fast_rcnn.py
    │   │   └── track_heads.py
    │   ├── sampling.py
    │   └── test_time_augmentation.py
    ├── solver
    │   ├── __init__.py
    │   ├── build.py
    │   ├── lr_scheduler.py
    │   ├── optimizer_builder.py
    │   └── scheduler_builder.py
    ├── structures
    │   ├── __init__.py
    │   ├── boxes.py
    │   ├── image_list.py
    │   ├── instances.py
    │   ├── keypoints.py
    │   ├── masks.py
    │   └── rotated_boxes.py
    └── utils
    │   ├── README.md
    │   ├── __init__.py
    │   ├── analysis.py
    │   ├── benchmark
    │       ├── __init__.py
    │       ├── benchmark.py
    │       └── timer.py
    │   ├── distributed
    │       ├── __init__.py
    │       └── comm.py
    │   ├── dump
    │       ├── __init__.py
    │       ├── events.py
    │       ├── history_buffer.py
    │       └── logger.py
    │   ├── env
    │       ├── __init__.py
    │       ├── collect_env.py
    │       └── env.py
    │   ├── file
    │       ├── __init__.py
    │       ├── download.py
    │       ├── file_io.py
    │       └── serialize.py
    │   ├── imports.py
    │   ├── memory.py
    │   ├── metrics
    │       ├── __init__.py
    │       └── accuracy.py
    │   ├── registry.py
    │   └── visualizer
    │       ├── __init__.py
    │       ├── colormap.py
    │       ├── show.py
    │       ├── video_visualizer.py
    │       └── visualizer.py
├── datasets
    ├── README.md
    ├── components
    │   └── coco_2017_train_class_agnosticTrue_whitenTrue_sigmoidTrue_60_siz28.npz
    ├── gen_coco_person.py
    ├── panoptic2detection_coco_format.py
    ├── prepare_cocofied_lvis.py
    ├── prepare_panoptic_fpn.py
    ├── prepare_thing_sem_from_instance.py
    └── prepare_thing_sem_from_lvis.py
├── docs
    ├── bugs.md
    ├── datasets.md
    ├── notes.md
    ├── overview.md
    ├── results.md
    └── tricks.md
├── fig
    └── test.jpg
├── playground
    └── detection
    │   ├── cityscapes
    │       ├── ceseg
    │       │   ├── boundary_refine_mask_rcnn_r101_ms_3x_3_subgt_warpping_dice_erode_dilate_gn
    │       │   │   ├── boundary_mask_rcnn.py
    │       │   │   ├── box_head.py
    │       │   │   ├── config.py
    │       │   │   ├── net.py
    │       │   │   └── rcnn.py
    │       │   ├── boundary_refine_mask_rcnn_r50_ms_3x_3_subgt_warpping_dice_erode_dilate_gn
    │       │   │   ├── boundary_mask_rcnn.py
    │       │   │   ├── box_head.py
    │       │   │   ├── config.py
    │       │   │   ├── net.py
    │       │   │   └── rcnn.py
    │       │   └── boundary_refine_mask_rcnn_rx101_ms_3x_3_subgt_warpping_dice_erode_dilate_gn
    │       │   │   ├── boundary_mask_rcnn.py
    │       │   │   ├── box_head.py
    │       │   │   ├── config.py
    │       │   │   ├── net.py
    │       │   │   └── rcnn.py
    │       ├── pointrend
    │       │   ├── point_rend_mask_rcnn_R101X
    │       │   │   ├── config.py
    │       │   │   └── net.py
    │       │   ├── point_rend_mask_rcnn_r101
    │       │   │   ├── config.py
    │       │   │   └── net.py
    │       │   └── point_rend_mask_rcnn_r50
    │       │   │   ├── config.py
    │       │   │   └── net.py
    │       └── rcnn
    │       │   ├── mask_rcnn_res101_fpn_coco_ms
    │       │       ├── README.md
    │       │       ├── config.py
    │       │       └── net.py
    │       │   ├── mask_rcnn_res50_fpn_coco_ms
    │       │       ├── README.md
    │       │       ├── config.py
    │       │       └── net.py
    │       │   └── mask_rcnn_rx101_fpn_coco_ms
    │       │       ├── README.md
    │       │       ├── config.py
    │       │       └── net.py
    │   └── coco
    │       └── bs_mask
    │           ├── boundary_refine_mask_rcnn_r101_ms_1x_3_subgt_warpping_dice_erode_dilate_gn
    │               ├── boundary_mask_rcnn.py
    │               ├── box_head.py
    │               ├── config.py
    │               ├── net.py
    │               └── rcnn.py
    │           ├── boundary_refine_mask_rcnn_r50_ms_1x_3_subgt_warpping_dice_erode_dilate_gn
    │               ├── boundary_mask_rcnn.py
    │               ├── box_head.py
    │               ├── config.py
    │               ├── net.py
    │               └── rcnn.py
    │           └── cascade
    │               ├── boundary_refine_mask_rcnn_r50_ms_1x_3_subgt_warpping_dice_erode_dilate_gn_add_cascade
    │                   ├── boundary_mask_rcnn.py
    │                   ├── box_head.py
    │                   ├── config.py
    │                   ├── net.py
    │                   └── rcnn.py
    │               ├── boundary_refine_mask_rcnn_r50_ms_3x_3_subgt_warpping_dice_erode_dilate_gn_add_cascade
    │                   ├── boundary_mask_rcnn.py
    │                   ├── box_head.py
    │                   ├── config.py
    │                   ├── net.py
    │                   └── rcnn.py
    │               └── boundary_refine_mask_rcnn_rx101_ms_20e_3_subgt_warpping_dice_erode_dilate_gn_add_cascade_1600multi
    │                   ├── boundary_mask_rcnn.py
    │                   ├── box_head.py
    │                   ├── config.py
    │                   ├── net.py
    │                   └── rcnn.py
└── tools
    ├── cal_flops.py
    ├── cat_visualizer_results.py
    ├── convert_detr2cvpod.py
    ├── convert_to_d2.py
    ├── draw_teaser.py
    ├── image2gif.py
    ├── rm_files.py
    ├── test_dis.py
    ├── test_net.py
    ├── train_net.py
    ├── visualize_json_results.py
    └── visualize_vis_json_results.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | **/log
 2 | 
 3 | *.jpg
 4 | *.png
 5 | *.txt
 6 | 
 7 | # compilation and distribution
 8 | __pycache__
 9 | _ext
10 | *.pyc
11 | *.so
12 | *.o
13 | cvpods.egg-info/
14 | build/
15 | dist/
16 | wheels/
17 | 
18 | tools/pods_test
19 | tools/pods_train
20 | 
21 | # pytorch/python/numpy formats
22 | *.pth
23 | *.pkl
24 | *.npy
25 | 
26 | # ipython/jupyter notebooks
27 | *.ipynb
28 | **/.ipynb_checkpoints/
29 | 
30 | # Editor temporaries
31 | *.swn
32 | *.swo
33 | *.swp
34 | *~
35 | 
36 | # Pycharm editor settings
37 | .idea
38 | 
39 | # project dirs
40 | /models
41 | 
42 | 
43 | # exclude
44 | !requirements*.txt
45 | 
46 | # tools
47 | tools/visualize_vis_json_results_for_debug.py
48 | tools/visualize_vis_json_results_only_seg.py
49 | tools/useless_tools
50 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # BoundarySqueeze: Image Segmentation as Boundary Squeezing [[Arxiv]](https://arxiv.org/pdf/2105.11668.pdf)
 2 | Hao He, Xiangtai Li,  Guangliang Cheng, Yunhai Tong, Lubin Weng
 3 | 
 4 | This paper proposes a novel method for high-quality image segmentation of both objects and scenes. 
 5 | Inspired by the dilation and erosion operations in morphological image processing techniques, the pixel-level image segmentation problems are treated as squeezing object boundaries.
 6 | 
 7 | ### Comparison with Point Rend
 8 | ![Figure](./fig/test.jpg) 
 9 | 
10 | Our method is built on the codebase of [CVPOD](https://github.com/Megvii-BaseDetection/cvpods).
11 | 
12 | 
13 | ## Install, Training and Testing
14 | 
15 | ```shell
16 | 
17 | # Or, to install it from a local clone:
18 | git clone https://github.com/lxtGH/BSSeg
19 | cd BSSeg
20 | 
21 | pip install -r requirements.txt
22 | 
23 | python setup.py build develop
24 | 
25 | # Preprare data path
26 | ln -s /path/to/your/coco/dataset datasets/coco
27 | 
28 | # Enter a specific experiment dir 
29 | cd playground/detection/coco/bs_mask/boundary_refine_mask_rcnn_r50_ms_1x_3_subgt_warpping_dice_erode_dilate_gn
30 | 
31 | 
32 | # Train
33 | pods_train --num-gpus 8
34 | # Test
35 | pods_test --num-gpus 8 \
36 |     MODEL.WEIGHTS /path/to/your/save_dir/ckpt.pth # optional
37 |     OUTPUT_DIR /path/to/your/save_dir # optional
38 | 
39 | # Multi node training
40 | ## sudo apt install net-tools ifconfig
41 | pods_train --num-gpus 8 --num-machines N --machine-rank 0/1/.../N-1 --dist-url "tcp://MASTER_IP:port"
42 | ```
43 | 
44 | If you find this codebase is useful to your research, plese consider cite the paper and original codebase.
45 | 
46 | ```BibTeX
47 | @misc{he2021boundarysqueeze,
48 |     title={BoundarySqueeze: Image Segmentation as Boundary Squeezing},
49 |     author={Hao He and Xiangtai Li and Guangliang Cheng and Yunhai Tong and Lubin Weng},
50 |     year={2021},
51 |     eprint={2105.11668},
52 |     archivePrefix={arXiv},
53 |     primaryClass={cs.CV}
54 | }
55 | 
56 | @misc{zhu2020cvpods,
57 |   title={cvpods: All-in-one Toolbox for Computer Vision Research},
58 |   author={Zhu*, Benjin and Wang*, Feng and Wang, Jianfeng and Yang, Siwei and Chen, Jianhu and Li, Zeming},
59 |   year={2020}
60 | }
61 | ```
62 | 


--------------------------------------------------------------------------------
/cvpods/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | 
 3 | from .utils import setup_environment
 4 | 
 5 | setup_environment()
 6 | 
 7 | # This line will be programatically read/write by setup.py.
 8 | # Leave them at the bottom of this file and don't touch them.
 9 | __version__ = "0.1"
10 | 


--------------------------------------------------------------------------------
/cvpods/checkpoint/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 3 | # File:
 4 | 
 5 | 
 6 | from . import catalog as _UNUSED  # register the handler
 7 | from .checkpoint import Checkpointer, PeriodicCheckpointer
 8 | from .detection_checkpoint import DetectionCheckpointer
 9 | 
10 | __all__ = ["Checkpointer", "PeriodicCheckpointer", "DetectionCheckpointer"]
11 | 


--------------------------------------------------------------------------------
/cvpods/configs/base_classification_config.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- encoding: utf-8 -*-
 3 | '''
 4 | @File               :   base_classification_config.py
 5 | @Time               :   2020/05/07 23:56:17
 6 | @Author             :   Benjin Zhu
 7 | @Contact            :   poodarchu@gmail.com
 8 | @Last Modified by   :   Benjin Zhu
 9 | @Last Modified time :   2020/05/07 23:56:17
10 | '''
11 | 
12 | from cvpods.configs.base_config import BaseConfig
13 | 
14 | _config_dict = dict(
15 |     MODEL=dict(
16 |         WEIGHTS="",
17 |         PIXEL_MEAN=[0.406, 0.456, 0.485],  # BGR
18 |         PIXEL_STD=[0.225, 0.224, 0.229],
19 |         BACKBONE=dict(FREEZE_AT=-1, ),  # do not freeze
20 |         RESNETS=dict(
21 |             NUM_CLASSES=None,
22 |             DEPTH=None,
23 |             OUT_FEATURES=["linear"],
24 |             NUM_GROUPS=1,
25 |             # Options: FrozenBN, GN, "SyncBN", "BN"
26 |             NORM="BN",
27 |             ACTIVATION=dict(
28 |                 NAME="ReLU",
29 |                 INPLACE=True,
30 |             ),
31 |             # Whether init last bn weight of each BasicBlock or BottleneckBlock to 0
32 |             ZERO_INIT_RESIDUAL=True,
33 |             WIDTH_PER_GROUP=64,
34 |             # Use True only for the original MSRA ResNet; use False for C2 and Torch models
35 |             STRIDE_IN_1X1=False,
36 |             RES5_DILATION=1,
37 |             RES2_OUT_CHANNELS=256,
38 |             STEM_OUT_CHANNELS=64,
39 |             DEFORM_ON_PER_STAGE=[False, False, False, False],
40 |             DEFORM_MODULATED=False,
41 |             DEFORM_NUM_GROUPS=1,
42 | 
43 |             # Deep Stem
44 |             DEEP_STEM=False,
45 |             # Apply avg after conv2 in the BottleBlock
46 |             # When AVD=True, the STRIDE_IN_1X1 should be Falss
47 |             AVD=False,
48 |             # Apply avg_down to the downsampling layer for residual path
49 |             AVG_DOWN=False,
50 |             # Radix in ResNeSt
51 |             RADIX=1,
52 |             # Bottleneck_width in ResNeSt
53 |             BOTTLENECK_WIDTH=64,
54 |         ),
55 |     ),
56 |     SOLVER=dict(
57 |         IMS_PER_DEVICE=32,  # defalut: 8 gpus x 32 = 256
58 |     ),
59 | )
60 | 
61 | 
62 | class BaseClassificationConfig(BaseConfig):
63 |     def __init__(self):
64 |         super(BaseClassificationConfig, self).__init__()
65 |         self._register_configuration(_config_dict)
66 | 
67 | 
68 | config = BaseClassificationConfig()
69 | 


--------------------------------------------------------------------------------
/cvpods/configs/centernet_config.py:
--------------------------------------------------------------------------------
 1 | import os.path as osp
 2 | 
 3 | from .base_detection_config import BaseDetectionConfig
 4 | 
 5 | _config_dict = dict(
 6 |     MODEL=dict(
 7 |         # WEIGHTS="detectron2://ImageNetPretrained/MSRA/R-18.pth",
 8 |         WEIGHTS="",
 9 |         MASK_ON=False,
10 |         RESNETS=dict(
11 |             DEPTH=50,
12 |             OUT_FEATURES=["res5"]
13 |         ),
14 |         CENTERNET=dict(
15 |             DECONV_CHANNEL=[2048, 256, 128, 64],
16 |             DECONV_KERNEL=[4, 4, 4],
17 |             NUM_CLASSES=80,
18 |             MODULATE_DEFORM=True,
19 |             BIAS_VALUE=-2.19,
20 |             DOWN_SCALE=4,
21 |             MIN_OVERLAP=0.7,
22 |             TENSOR_DIM=128,
23 |         ),
24 |         LOSS=dict(
25 |             CLS_WEIGHT=1,
26 |             WH_WEIGHT=0.1,
27 |             REG_WEIGHT=1,
28 |         ),
29 |     ),
30 |     INPUT=dict(
31 |         AUG=dict(
32 |             TRAIN_PIPELINES=[
33 |                 ('CenterAffine', dict(
34 |                     boarder=128,
35 |                     output_size=(512, 512),
36 |                     random_aug=True)),
37 |                 ('RandomFlip', dict()),
38 |                 ('RandomBrightness', dict(intensity_min=0.6, intensity_max=1.4)),
39 |                 ('RandomContrast', dict(intensity_min=0.6, intensity_max=1.4)),
40 |                 ('RandomSaturation', dict(intensity_min=0.6, intensity_max=1.4)),
41 |                 ('RandomLighting', dict(scale=0.1)),
42 |             ],
43 |             TEST_PIPELINES=[
44 |             ],
45 |         ),
46 |         OUTPUT_SIZE=(128, 128),
47 |     ),
48 |     DATALOADER=dict(
49 |         NUM_WORKERS=4,
50 |     ),
51 |     DATASETS=dict(
52 |         TRAIN=("coco_2017_train",),
53 |         TEST=("coco_2017_val",),
54 |     ),
55 |     SOLVER=dict(
56 |         OPTIMIZER=dict(
57 |             NAME="SGD",
58 |             BASE_LR=0.02,
59 |             WEIGHT_DECAY=1e-4,
60 |         ),
61 |         LR_SCHEDULER=dict(
62 |             GAMMA=0.1,
63 |             STEPS=(81000, 108000),
64 |             MAX_ITER=126000,
65 |             WARMUP_ITERS=1000,
66 |         ),
67 |         IMS_PER_BATCH=128,
68 |         IMS_PER_DEVICE=16
69 |     ),
70 |     OUTPUT_DIR=osp.join(
71 |         '/data/Outputs/model_logs/playground',
72 |         osp.split(osp.realpath(__file__))[0].split("playground/")[-1]
73 |     ),
74 |     GLOBAL=dict(DUMP_TEST=False),
75 | )
76 | 
77 | 
78 | class CenterNetConfig(BaseDetectionConfig):
79 |     def __init__(self):
80 |         super(CenterNetConfig, self).__init__()
81 |         self._register_configuration(_config_dict)
82 | 
83 | 
84 | config = CenterNetConfig()
85 | 


--------------------------------------------------------------------------------
/cvpods/configs/centernet_det2_config.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- encoding: utf-8 -*-
 3 | '''
 4 | @File               :   base-centernet2.py
 5 | @Author             :   Xiangtai Li
 6 | '''
 7 | 
 8 | 
 9 | from .rcnn_config import RCNNConfig
10 | 
11 | _config_dict = dict(
12 |     DEBUG=False,
13 |     SAVE_DEBUG=False,
14 |     SAVE_PTH=False,
15 |     VIS_THRESH=0.3,
16 |     DEBUG_SHOW_NAME=False,
17 | 
18 |     MODEL=dict(
19 |         RESNETS=dict(OUT_FEATURES=["res3", "res4", "res5"],),
20 |         FPN=dict(IN_FEATURES=["res3", "res4", "res5"]),
21 |         CENTERNET=dict(
22 |             NUM_CLASSES=80,
23 |             IN_FEATURES = ["p3", "p4", "p5", "p6", "p7"],
24 |             FPN_STRIDES = [8, 16, 32, 64, 128],
25 |             PRIOR_PROB = 0.01,
26 |             CENTER_NMS = False,
27 |             NMS_TH_TRAIN = 0.6,
28 |             NFERENCE_TH = 0.05,
29 |             NMS_TH_TEST = 0.6,
30 |             INFERENCE_TH = 0.05,
31 |             PRE_NMS_TOPK_TRAIN = 1000,
32 |             POST_NMS_TOPK_TRAIN = 100,
33 |             PRE_NMS_TOPK_TEST = 1000,
34 |             POST_NMS_TOPK_TEST = 100,
35 |             NORM = "GN",
36 |             USE_DEFORMABLE = False,
37 |             NUM_CLS_CONVS = 4,
38 |             NUM_BOX_CONVS = 4,
39 |             NUM_SHARE_CONVS = 0,
40 |             LOC_LOSS_TYPE = 'giou',
41 |             SIGMOID_CLAMP = 1e-4,
42 |             HM_MIN_OVERLAP = 0.8,
43 |             MIN_RADIUS = 4,
44 |             SOI = [[0, 80], [64, 160], [128, 320], [256, 640], [512, 10000000]],
45 |             POS_WEIGHT = 1.,
46 |             NEG_WEIGHT = 1.,
47 |             REG_WEIGHT = 2.,
48 |             HM_FOCAL_BETA = 4,
49 |             HM_FOCAL_ALPHA = 0.25,
50 |             LOSS_GAMMA = 2.0,
51 |             WITH_AGN_HM = False,
52 |             ONLY_PROPOSAL = False,
53 |             AS_PROPOSAL = False,
54 |             IGNORE_HIGH_FP = -1.,
55 |             MORE_POS = False,
56 |             MORE_POS_THRESH = 0.2,
57 |             MORE_POS_TOPK = 9,
58 |             NOT_NORM_REG = True,
59 |             NOT_NMS = False,
60 |         ),
61 | 
62 |         BIFPN=dict(
63 |             NUM_LEVELS=5,
64 |             NUM_BIFPN=6,
65 |             NORM='GN',
66 |             OUT_CHANNELS=160,
67 |             SEPARABLE_CONV=False,
68 |         ),
69 | 
70 |     ),
71 | )
72 | 
73 | 
74 | class CenterNetDet2Config(RCNNConfig):
75 |     def __init__(self):
76 |         super(CenterNetDet2Config, self).__init__()
77 |         self._register_configuration(_config_dict)
78 | 
79 | 
80 | config = CenterNetDet2Config()
81 | 


--------------------------------------------------------------------------------
/cvpods/configs/deeplab_config.py:
--------------------------------------------------------------------------------
 1 | from .base_detection_config import BaseDetectionConfig
 2 | 
 3 | 
 4 | _config_dict = dict(
 5 |     BACKBONE=dict(FREEZE_AT=0,),
 6 |     MODEL=dict(
 7 |         MASK_ON=False,
 8 |         LOAD_PROPOSALS=False,
 9 |         RESNETS=dict(
10 |         NORM="nnSyncBN",
11 |         OUT_FEATURES=["res5"],
12 |         RES4_DILATION=1,
13 |         RES5_DILATION=2,
14 |         RES5_MULTI_GRID = [1, 2, 4],
15 |         STEM_TYPE="deeplabv3_r50"
16 |         ),
17 |         SEM_SEG_HEAD=dict(
18 |             # NAME="Deeplabv3Head",
19 |             IGNORE_VALUE=255,
20 |             # Number of classes in the semantic segmentation head
21 |             NUM_CLASSES=19,
22 |             # Number of channels in the 3x3 convs inside semantic-FPN heads.
23 |             LOSS_TYPE="hard_pixel_mining",
24 |             PROJECT_FEATURES=["res2"],
25 |             PROJECT_CHANNELS=[48],
26 |             ASPP_CHANNELS=256,
27 |             CONVS_DIM=256,
28 |             ASPP_DILATIONS=[6, 12, 18],
29 |             ASPP_DROPOUT=0.1,
30 |             USE_DEPTHWISE_SEPARABLE_CONV=False,
31 |             COMMON_STRIDE=16,
32 |             NORM="GN",
33 |             LOSS_WEIGHT=1.0,
34 |         ),
35 |     ),
36 | 
37 |     SOLVER=dict(
38 |         LR_SCHEDULER=dict(
39 |             NAME="WarmupPolyLR",
40 |             WARMUP_FACTOR=1.0 / 100,
41 |             MAX_ITER=90000,
42 |         ),
43 |         POLY_LR_CONSTANT_ENDING=0.0,
44 |         POLY_LR_POWER=0.9,
45 |     )
46 | 
47 | )
48 | 
49 | 
50 | class SegmentationConfig(BaseDetectionConfig):
51 |     def __init__(self):
52 |         super(SegmentationConfig, self).__init__()
53 |         self._register_configuration(_config_dict)
54 | 
55 | 
56 | config = SegmentationConfig()
57 | 


--------------------------------------------------------------------------------
/cvpods/configs/dynamic_routing_config.py:
--------------------------------------------------------------------------------
 1 | from .base_config import BaseConfig
 2 | 
 3 | _config_dict = dict(
 4 |     MODEL=dict(
 5 |         LOAD_PROPOSALS=False,
 6 |         MASK_ON=False,
 7 |         KEYPOINT_ON=False,
 8 |         BACKBONE=dict(FREEZE_AT=0,),
 9 |         RESNETS=dict(
10 |             OUT_FEATURES=["res2", "res3", "res4", "res5"],
11 |             NORM="nnSyncBN",
12 |             NUM_GROUPS=1,
13 |             WIDTH_PER_GROUP=64,
14 |             STRIDE_IN_1X1=True,
15 |             RES5_DILATION=1,
16 |             RES2_OUT_CHANNELS=256,
17 |             STEM_OUT_CHANNELS=64,
18 |             DEFORM_ON_PER_STAGE=[False, False, False, False],
19 |             DEFORM_MODULATED=False,
20 |             DEFORM_NUM_GROUPS=1,
21 |         ),
22 |         FPN=dict(
23 |             IN_FEATURES=[],
24 |             OUT_CHANNELS=256,
25 |             NORM="",
26 |             FUSE_TYPE="sum",
27 |         ),
28 |         SEM_SEG_HEAD=dict(
29 |             # NAME="SemSegFPNHead",
30 |             IN_FEATURES=[],
31 |             IGNORE_VALUE=255,
32 |             NUM_CLASSES=(),
33 |             CONVS_DIM=256,
34 |             COMMON_STRIDE=(),
35 |             NORM="GN",
36 |             LOSS_WEIGHT=1.0,
37 |         ),
38 |         SOLVER=dict(
39 |             LR_SCHEDULER=dict(
40 |                 NAME="PolyLR",
41 |                 POLY_POWER=0.9,
42 |                 MAX_ITER=40000,
43 |                 WARMUP_ITERS=1000,
44 |                 WARMUP_FACTOR=0.001,
45 |                 WARMUP_METHOD="linear",
46 |             ),
47 |             OPTIMIZER=dict(BASE_LR=0.01, ),
48 |             IMS_PER_BATCH=16,
49 |             CHECKPOINT_PERIOD=5000,
50 |         ),
51 |         TEST=dict(PRECISE_BN=dict(ENABLED=True), ),
52 |     ),
53 | )
54 | 
55 | 
56 | class SemanticSegmentationConfig(BaseConfig):
57 |     def __init__(self):
58 |         super(SemanticSegmentationConfig, self).__init__()
59 |         self._register_configuration(_config_dict)
60 | 
61 | 
62 | config = SemanticSegmentationConfig()
63 | 


--------------------------------------------------------------------------------
/cvpods/configs/efficientdet_config.py:
--------------------------------------------------------------------------------
 1 | from .base_detection_config import BaseDetectionConfig
 2 | 
 3 | _config_dict = dict(
 4 |     MODEL=dict(
 5 |         PIXEL_MEAN=[0.485, 0.456, 0.406],  # mean value from ImageNet
 6 |         PIXEL_STD=[0.229, 0.224, 0.225],
 7 |         EFFICIENTNET=dict(
 8 |             MODEL_NAME="efficientnet-b0",  # default setting for EfficientDet-D0
 9 |             NORM="BN",
10 |             BN_MOMENTUM=1 - 0.99,
11 |             BN_EPS=1e-3,
12 |             DROP_CONNECT_RATE=1 - 0.8,  # survival_prob = 0.8
13 |             DEPTH_DIVISOR=8,
14 |             MIN_DEPTH=None,
15 |             NUM_CLASSES=None,
16 |             FIX_HEAD_STEAM=False,
17 |             MEMORY_EFFICIENT_SWISH=True,
18 |             OUT_FEATURES=["stage4", "stage6", "stage8"],
19 |         ),
20 |         BIFPN=dict(
21 |             IN_FEATURES=["stage4", "stage6", "stage8"],
22 |             NORM="BN",
23 |             BN_MOMENTUM=0.01,  # 1 - 0.99
24 |             BN_EPS=1e-3,
25 |             MEMORY_EFFICIENT_SWISH=True,
26 |             INPUT_SIZE=512,  # default setting for EfficientDet-D0
27 |             NUM_LAYERS=3,  # default setting for EfficientDet-D0
28 |             OUT_CHANNELS=60,  # default setting for EfficientDet-D0
29 |             FUSE_TYPE="fast",  # select in ["softmax", "fast", "sum"]
30 |         ),
31 |         EFFICIENTDET=dict(
32 |             IN_FEATURES=[f"p{i}" for i in range(3, 8)],  # p3-p7
33 |             NUM_CLASSES=80,
34 |             FREEZE_BACKBONE=False,
35 |             FREEZE_BN=False,
36 |             HEAD=dict(
37 |                 NUM_CONV=3,  # default setting for EfficientDet-D0
38 |                 NORM="BN",
39 |                 BN_MOMENTUM=1 - 0.99,
40 |                 BN_EPS=1e-3,
41 |                 PRIOR_PROB=0.01,
42 |                 MEMORY_EFFICIENT_SWISH=True,
43 |             ),
44 |             IOU_THRESHOLDS=[0.5, 0.5],
45 |             IOU_LABELS=[0, -1, 1],
46 |             SCORE_THRESH_TEST=0.05,
47 |             TOPK_CANDIDATES_TEST=1000,
48 |             NMS_THRESH_TEST=0.5,
49 |             BBOX_REG_WEIGHTS=(1.0, 1.0, 1.0, 1.0),
50 |             FOCAL_LOSS_GAMMA=1.5,
51 |             FOCAL_LOSS_ALPHA=0.25,
52 |             SMOOTH_L1_LOSS_BETA=0.1,
53 |             REG_NORM=4.0,
54 |             BOX_LOSS_WEIGHT=50.0,
55 |         ),
56 |         ANCHOR_GENERATOR=dict(
57 |             SIZES=[
58 |                 [x, x * 2 ** (1.0 / 3), x * 2 ** (2.0 / 3)]
59 |                 for x in [4 * 2**i for i in range(3, 8)]
60 |             ]
61 |         ),
62 |     ),
63 | )
64 | 
65 | 
66 | class EfficientDetConfig(BaseDetectionConfig):
67 |     def __init__(self):
68 |         super(EfficientDetConfig, self).__init__()
69 |         self._register_configuration(_config_dict)
70 | 
71 | 
72 | config = EfficientDetConfig()
73 | 


--------------------------------------------------------------------------------
/cvpods/configs/fcos_config.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- encoding: utf-8 -*-
 3 | '''
 4 | @File               :   fcos_config.py
 5 | @Time               :   2020/05/07 23:56:09
 6 | @Author             :   Benjin Zhu
 7 | @Contact            :   poodarchu@gmail.com
 8 | @Last Modified by   :   Benjin Zhu
 9 | @Last Modified time :   2020/05/07 23:56:09
10 | '''
11 | 
12 | from .base_detection_config import BaseDetectionConfig
13 | 
14 | _config_dict = dict(
15 |     MODEL=dict(
16 |         # META_ARCHITECTURE="RetinaNet",
17 |         RESNETS=dict(OUT_FEATURES=["res3", "res4", "res5"]),
18 |         FPN=dict(IN_FEATURES=["res3", "res4", "res5"]),
19 |         FCOS=dict(
20 |             NUM_CLASSES=80,
21 |             IN_FEATURES=["p3", "p4", "p5", "p6", "p7"],
22 |             NUM_CONVS=4,
23 |             BUDGET_LOSS_LAMBDA=0.0,
24 |             SHARE_CONVS=0,
25 |             FPN_STRIDES=[8, 16, 32, 64, 128],
26 |             PRIOR_PROB=0.01,
27 |             CENTERNESS_ON_REG=False,
28 |             NORM_REG_TARGETS=False,
29 |             SCORE_THRESH_TEST=0.05,
30 |             TOPK_CANDIDATES_TEST=1000,
31 |             NMS_THRESH_TEST=0.6,
32 |             BBOX_REG_WEIGHTS=(1.0, 1.0, 1.0, 1.0),
33 |             FOCAL_LOSS_GAMMA=2.0,
34 |             FOCAL_LOSS_ALPHA=0.25,
35 |             IOU_LOSS_TYPE="iou",
36 |             CENTER_SAMPLING_RADIUS=0.0,
37 |             OBJECT_SIZES_OF_INTEREST=[
38 |                 [-1, 64],
39 |                 [64, 128],
40 |                 [128, 256],
41 |                 [256, 512],
42 |                 [512, float("inf")],
43 |             ],
44 |         ),
45 |     ),
46 | )
47 | 
48 | 
49 | class FCOSConfig(BaseDetectionConfig):
50 |     def __init__(self):
51 |         super(FCOSConfig, self).__init__()
52 |         self._register_configuration(_config_dict)
53 | 
54 | 
55 | config = FCOSConfig()
56 | 


--------------------------------------------------------------------------------
/cvpods/configs/fcos_sepc_config.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- encoding: utf-8 -*-
 3 | '''
 4 | @File               :   fcos_config.py
 5 | @Time               :   2020/05/07 23:56:09
 6 | @Author             :   Benjin Zhu
 7 | @Contact            :   poodarchu@gmail.com
 8 | @Last Modified by   :   Benjin Zhu
 9 | @Last Modified time :   2020/05/07 23:56:09
10 | '''
11 | 
12 | from .base_detection_config import BaseDetectionConfig
13 | 
14 | _config_dict = dict(
15 |     MODEL=dict(
16 |         RESNETS=dict(OUT_FEATURES=["res3", "res4", "res5"]),
17 |         FPN=dict(IN_FEATURES=["res3", "res4", "res5"]),
18 |         FCOS=dict(
19 |             NUM_CLASSES=80,
20 |             IN_FEATURES=["p3", "p4", "p5", "p6", "p7"],
21 |             NUM_CONVS=0,
22 |             FPN_STRIDES=[8, 16, 32, 64, 128],
23 |             PRIOR_PROB=0.01,
24 |             CENTERNESS_ON_REG=False,
25 |             NORM_REG_TARGETS=False,
26 |             SCORE_THRESH_TEST=0.05,
27 |             TOPK_CANDIDATES_TEST=1000,
28 |             NMS_THRESH_TEST=0.6,
29 |             BBOX_REG_WEIGHTS=(1.0, 1.0, 1.0, 1.0),
30 |             FOCAL_LOSS_GAMMA=2.0,
31 |             FOCAL_LOSS_ALPHA=0.25,
32 |             IOU_LOSS_TYPE="iou",
33 |             CENTER_SAMPLING_RADIUS=0.0,
34 |             OBJECT_SIZES_OF_INTEREST=[
35 |                 [-1, 64],
36 |                 [64, 128],
37 |                 [128, 256],
38 |                 [256, 512],
39 |                 [512, float("inf")],
40 |             ],
41 |         ),
42 |         SEPC=dict(
43 |             IN_FEATURES=["p3", "p4", "p5", "p6", "p7"],
44 |             IN_CHANNELS=[256, 256, 256, 256, 256],
45 |             OUT_CHANNELS=256,
46 |             NUM_OUTS=5,
47 |             COMBINE_DEFORM=False,
48 |             EXTRA_DEFORM=False,
49 |             COMBINE_NUM=4,
50 |             IBN=False,
51 |         )
52 |     ),
53 | )
54 | 
55 | class FCOSSPECConfig(BaseDetectionConfig):
56 |     def __init__(self):
57 |         super(FCOSSPECConfig, self).__init__()
58 |         self._register_configuration(_config_dict)
59 | 
60 | 
61 | config = FCOSSPECConfig()
62 | 


--------------------------------------------------------------------------------
/cvpods/configs/keypoint_config.py:
--------------------------------------------------------------------------------
 1 | from .base_detection_config import BaseDetectionConfig
 2 | 
 3 | _config_dict = dict(
 4 |     MODEL=dict(
 5 |         ROI_KEYPOINT_HEAD=dict(
 6 |             NAME="KRCNNConvDeconvUpsampleHead",
 7 |             POOLER_RESOLUTION=14,
 8 |             POOLER_SAMPLING_RATIO=0,
 9 |             CONV_DIMS=tuple(512 for _ in range(8)),
10 |             NUM_KEYPOINTS=17,  # 17 is the number of keypoints in COCO
11 |             # Images with too few (or no) keypoints are excluded from training.
12 |             MIN_KEYPOINTS_PER_IMAGE=1,
13 |             # Normalize by the total number of visible keypoints in the minibatch if True.
14 |             # Otherwise, normalize by the total number of keypoints that could ever exist
15 |             # in the minibatch.
16 |             # The keypoint softmax loss is only calculated on visible keypoints.
17 |             # Since the number of visible keypoints can vary significantly between
18 |             # minibatches, this has the effect of up-weighting the importance of
19 |             # minibatches with few visible keypoints. (Imagine the extreme case of
20 |             # only one visible keypoint versus N: in the case of N, each one
21 |             # contributes 1/N to the gradient compared to the single keypoint
22 |             # determining the gradient direction). Instead, we can normalize the
23 |             # loss by the total number of keypoints, if it were the case that all
24 |             # keypoints were visible in a full minibatch. (Returning to the example,
25 |             # this means that the one visible keypoint contributes as much as each
26 |             # of the N keypoints.)
27 |             NORMALIZE_LOSS_BY_VISIBLE_KEYPOINTS=True,
28 |             # Multi-task loss weight to use for keypoints
29 |             # Recommended values:
30 |             #   - use 1.0 if NORMALIZE_LOSS_BY_VISIBLE_KEYPOINTS is True
31 |             #   - use 4.0 if NORMALIZE_LOSS_BY_VISIBLE_KEYPOINTS is False
32 |             LOSS_WEIGHT=1.0,
33 |             # Type of pooling operation applied to the incoming feature map for each RoI
34 |             POOLER_TYPE="ROIAlignV2",
35 |         ),
36 |     )
37 | )
38 | 
39 | 
40 | class KeypointConfig(BaseDetectionConfig):
41 |     def __init__(self):
42 |         super(KeypointConfig, self).__init__()
43 |         self._register_configuration(_config_dict)
44 | 
45 | 
46 | config = KeypointConfig()
47 | 


--------------------------------------------------------------------------------
/cvpods/configs/panoptic_fpn.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- encoding: utf-8 -*-
 3 | 
 4 | from .rcnn_config import RCNNConfig
 5 | 
 6 | _config_dict = dict(
 7 |     MODEL=dict(
 8 |         RESNETS=dict(OUT_FEATURES=["res2", "res3", "res4", "res5"],),
 9 |         FPN=dict(IN_FEATURES=["res2", "res3", "res4", "res5"]),
10 |         ANCHOR_GENERATOR=dict(
11 |             SIZES=[[32], [64], [128], [256], [512]], ASPECT_RATIOS=[[0.5, 1.0, 2.0]],
12 |         ),
13 |         RPN=dict(
14 |             IN_FEATURES=["p2", "p3", "p4", "p5", "p6"],
15 |             PRE_NMS_TOPK_TRAIN=2000,
16 |             PRE_NMS_TOPK_TEST=1000,
17 |             POST_NMS_TOPK_TRAIN=1000,
18 |             POST_NMS_TOPK_TEST=1000,
19 |         ),
20 |         ROI_HEADS=dict(
21 |             # NAME: "StandardROIHeads"
22 |             IN_FEATURES=["p2", "p3", "p4", "p5"],
23 |         ),
24 |         ROI_BOX_HEAD=dict(
25 |             # NAME: "FastRCNNConvFCHead"
26 |             NUM_FC=2,
27 |             POOLER_RESOLUTION=7,
28 |         ),
29 |         ROI_MASK_HEAD=dict(
30 |             # NAME: "MaskRCNNConvUpsampleHead"
31 |             NUM_CONV=4,
32 |             POOLER_RESOLUTION=14,
33 |         ),
34 |         SEM_SEG_HEAD=dict(
35 |             # NAME="SemSegFPNHead",
36 |             IN_FEATURES=["p2", "p3", "p4", "p5"],
37 |             # Label in the semantic segmentation ground truth that is ignored,
38 |             # i.e., no loss is calculated for the correposnding pixel.
39 |             IGNORE_VALUE=255,
40 |             # Number of classes in the semantic segmentation head
41 |             NUM_CLASSES=54,
42 |             # Number of channels in the 3x3 convs inside semantic-FPN heads.
43 |             CONVS_DIM=128,
44 |             # Outputs from semantic-FPN heads are up-scaled to the COMMON_STRIDE stride.
45 |             COMMON_STRIDE=4,
46 |             # Normalization method for the convolution layers. Options: "" (no norm), "GN".
47 |             NORM="GN",
48 |             LOSS_WEIGHT=1.0,
49 |         ),
50 |         PANOPTIC_FPN=dict(
51 |             # Scaling of all losses from instance detection / segmentation head.
52 |             INSTANCE_LOSS_WEIGHT=1.0,
53 |             # options when combining instance & semantic segmentation outputs
54 |             COMBINE=dict(
55 |                 ENABLED=True,
56 |                 OVERLAP_THRESH=0.5,
57 |                 STUFF_AREA_LIMIT=4096,
58 |                 INSTANCES_CONFIDENCE_THRESH=0.5,
59 |             ),
60 |         ),
61 | 
62 |     ),
63 | )
64 | 
65 | 
66 | class PANFPNConfig(RCNNConfig):
67 |     def __init__(self):
68 |         super(PANFPNConfig, self).__init__()
69 |         self._register_configuration(_config_dict)
70 | 
71 | 
72 | config = PANFPNConfig()
73 | 


--------------------------------------------------------------------------------
/cvpods/configs/pointrend_config.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | # Copyright (c) BaseDetection, Inc. and its affiliates. All Rights Reserved
 4 | 
 5 | from .rcnn_fpn_config import RCNNFPNConfig
 6 | 
 7 | _config_dict = dict(
 8 |     MODEL=dict(
 9 |         ROI_HEADS=dict(
10 |             # NAME="PointRendROIHeads",
11 |             IN_FEATURES=["p2", "p3", "p4", "p5"],
12 |         ),
13 |         ROI_BOX_HEAD=dict(
14 |             TRAIN_ON_PRED_BOXES=True,
15 |         ),
16 |         ROI_MASK_HEAD=dict(
17 |             # NAME="CoarseMaskHead",
18 |             # Names of the input feature maps to be used by a coarse mask head.
19 |             IN_FEATURES=["p2"],
20 |             FC_DIM=1024,
21 |             NUM_FC=2,
22 |             # The side size of a coarse mask head prediction.
23 |             OUTPUT_SIDE_RESOLUTION=7,
24 |             # True if point head is used.
25 |             POINT_HEAD_ON=True,
26 |         ),
27 |         POINT_HEAD=dict(
28 |             # Names of the input feature maps to be used by a mask point head.
29 |             IN_FEATURES=["p2"],
30 |             NUM_CLASSES=80,
31 |             FC_DIM=256,
32 |             NUM_FC=3,
33 |             # Number of points sampled during training for a mask point head.
34 |             TRAIN_NUM_POINTS=14 * 14,
35 |             # Oversampling parameter for PointRend point sampling during training.
36 |             # Parameter `k` in the original paper.
37 |             OVERSAMPLE_RATIO=3,
38 |             # Importance sampling parameter for PointRend point sampling during training.
39 |             # Parametr `beta` in the original paper.
40 |             IMPORTANCE_SAMPLE_RATIO=0.75,
41 |             # Number of subdivision steps during inference.
42 |             SUBDIVISION_STEPS=5,
43 |             # Maximum number of points selected at each subdivision step (N).
44 |             SUBDIVISION_NUM_POINTS=28 * 28,
45 |             CLS_AGNOSTIC_MASK=False,
46 |             # If True, then coarse prediction features are used as input for each layer
47 |             # in PointRend's MLP.
48 |             COARSE_PRED_EACH_LAYER=True,
49 |             # COARSE_SEM_SEG_HEAD_NAME="SemSegFPNHead"
50 |         ),
51 |     ),
52 |     INPUT=dict(
53 |         # PointRend for instance segmenation does not work with "polygon" mask_format
54 |         MASK_FORMAT="bitmask",
55 |     ),
56 |     DATALOADER=dict(FILTER_EMPTY_ANNOTATIONS=False,),
57 | )
58 | 
59 | 
60 | class PointRendRCNNFPNConfig(RCNNFPNConfig):
61 |     def __init__(self):
62 |         super(PointRendRCNNFPNConfig, self).__init__()
63 |         self._register_configuration(_config_dict)
64 | 
65 | 
66 | config = PointRendRCNNFPNConfig()
67 | 


--------------------------------------------------------------------------------
/cvpods/configs/rcnn_fpn_config.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- encoding: utf-8 -*-
 3 | 
 4 | from .rcnn_config import RCNNConfig
 5 | 
 6 | _config_dict = dict(
 7 |     MODEL=dict(
 8 |         RESNETS=dict(OUT_FEATURES=["res2", "res3", "res4", "res5"],),
 9 |         FPN=dict(IN_FEATURES=["res2", "res3", "res4", "res5"]),
10 |         ANCHOR_GENERATOR=dict(
11 |             SIZES=[[32], [64], [128], [256], [512]], ASPECT_RATIOS=[[0.5, 1.0, 2.0]],
12 |         ),
13 |         RPN=dict(
14 |             IN_FEATURES=["p2", "p3", "p4", "p5", "p6"],
15 |             PRE_NMS_TOPK_TRAIN=2000,
16 |             PRE_NMS_TOPK_TEST=1000,
17 |             POST_NMS_TOPK_TRAIN=1000,
18 |             POST_NMS_TOPK_TEST=1000,
19 |         ),
20 |         ROI_HEADS=dict(
21 |             # NAME: "StandardROIHeads"
22 |             IN_FEATURES=["p2", "p3", "p4", "p5"],
23 |         ),
24 |         ROI_BOX_HEAD=dict(
25 |             # NAME: "FastRCNNConvFCHead"
26 |             NUM_FC=2,
27 |             POOLER_RESOLUTION=7,
28 |         ),
29 |         ROI_MASK_HEAD=dict(
30 |             # NAME: "MaskRCNNConvUpsampleHead"
31 |             NUM_CONV=4,
32 |             POOLER_RESOLUTION=14,
33 |         ),
34 |         ROI_TRACK_HEAD=dict(
35 |             # NAME: "TrackHead"
36 |             POOLER_RESOLUTION=7,
37 |             PID_WEIGHT=-1,
38 |         ),
39 |     ),
40 | )
41 | 
42 | 
43 | class RCNNFPNConfig(RCNNConfig):
44 |     def __init__(self):
45 |         super(RCNNFPNConfig, self).__init__()
46 |         self._register_configuration(_config_dict)
47 | 
48 | 
49 | config = RCNNFPNConfig()
50 | 


--------------------------------------------------------------------------------
/cvpods/configs/retinanet_config.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- encoding: utf-8 -*-
 3 | '''
 4 | @File               :   retinanet_config.py
 5 | @Time               :   2020/05/07 23:56:02
 6 | @Author             :   Benjin Zhu
 7 | @Contact            :   poodarchu@gmail.com
 8 | @Last Modified by   :   Benjin Zhu
 9 | @Last Modified time :   2020/05/07 23:56:02
10 | '''
11 | 
12 | from .base_detection_config import BaseDetectionConfig
13 | 
14 | _config_dict = dict(
15 |     MODEL=dict(
16 |         # Backbone NAME: "build_retinanet_resnet_fpn_backbone"
17 |         RESNETS=dict(OUT_FEATURES=["res3", "res4", "res5"]),
18 |         FPN=dict(IN_FEATURES=["res3", "res4", "res5"]),
19 |         ANCHOR_GENERATOR=dict(
20 |             SIZES=[
21 |                 [x, x * 2 ** (1.0 / 3), x * 2 ** (2.0 / 3)]
22 |                 for x in [32, 64, 128, 256, 512]
23 |             ]
24 |         ),
25 |         RETINANET=dict(
26 |             # This is the number of foreground classes.
27 |             NUM_CLASSES=80,
28 |             IN_FEATURES=["p3", "p4", "p5", "p6", "p7"],
29 |             # Convolutions to use in the cls and bbox tower
30 |             # NOTE: this doesn't include the last conv for logits
31 |             NUM_CONVS=4,
32 |             # IoU overlap ratio [bg, fg] for labeling anchors.
33 |             # Anchors with < bg are labeled negative (0)
34 |             # Anchors  with >= bg and < fg are ignored (-1)
35 |             # Anchors with >= fg are labeled positive (1)
36 |             IOU_THRESHOLDS=[0.4, 0.5],
37 |             IOU_LABELS=[0, -1, 1],
38 |             # Prior prob for rare case (i.e. foreground) at the beginning of training.
39 |             # This is used to set the bias for the logits layer of the classifier subnet.
40 |             # This improves training stability in the case of heavy class imbalance.
41 |             PRIOR_PROB=0.01,
42 |             # Inference cls score threshold, only anchors with score > INFERENCE_TH are
43 |             # considered for inference (to improve speed)
44 |             SCORE_THRESH_TEST=0.05,
45 |             TOPK_CANDIDATES_TEST=1000,
46 |             NMS_THRESH_TEST=0.5,
47 |             # Weights on (dx, dy, dw, dh) for normalizing Retinanet anchor regression targets
48 |             BBOX_REG_WEIGHTS=(1.0, 1.0, 1.0, 1.0),
49 |             # Loss parameters
50 |             FOCAL_LOSS_GAMMA=2.0,
51 |             FOCAL_LOSS_ALPHA=0.25,
52 |             SMOOTH_L1_LOSS_BETA=0.1,
53 |         ),
54 |     ),
55 | )
56 | 
57 | 
58 | class RetinaNetConfig(BaseDetectionConfig):
59 |     def __init__(self):
60 |         super(RetinaNetConfig, self).__init__()
61 |         self._register_configuration(_config_dict)
62 | 
63 | 
64 | config = RetinaNetConfig()
65 | 


--------------------------------------------------------------------------------
/cvpods/configs/solo_config.py:
--------------------------------------------------------------------------------
 1 | from .base_detection_config import BaseDetectionConfig
 2 | 
 3 | _config_dict = dict(
 4 |     MODEL=dict(
 5 |         MASK_ON=True,
 6 |         PIXEL_MEAN=[103.530, 116.280, 123.675],  # BGR FORMAT
 7 |         PIXEL_STD=[1.0, 1.0, 1.0],
 8 |         RESNETS=dict(
 9 |             DEPTH=50,
10 |             OUT_FEATURES=["res2", "res3", "res4", "res5"],
11 |         ),
12 |         FPN=dict(
13 |             IN_FEATURES=["res2", "res3", "res4", "res5"],
14 |             OUT_CHANNELS=256,
15 |         ),
16 |         SOLO=dict(
17 |             NUM_CLASSES=80,
18 |             IN_FEATURES=["p2", "p3", "p4", "p5", "p6"],
19 |             NUM_GRIDS=[40, 36, 24, 16, 12],  # per level
20 |             SCALE_RANGES=((1, 64), (32, 128), (64, 256), (128, 512), (256, 2048)),
21 |             FEATURE_STRIDES=[8, 8, 16, 32, 32],
22 |             # Given a gt: (cx, cy, w, h), the center region is controlled by
23 |             # constant scale factors sigma: (cx, cy, sigma*w, sigma*h)
24 |             SIGMA=0.2,
25 |             HEAD=dict(
26 |                 TYPE="SOLOHead",  # "SOLOHead", "DecoupledSOLOHead"
27 |                 SEG_FEAT_CHANNELS=256,
28 |                 STACKED_CONVS=7,
29 |                 PRIOR_PROB=0.01,
30 |                 NORM="GN",
31 |                 # The following two items are useful in the "DecoupledSOLOLightHead"
32 |                 USE_DCN_IN_TOWER=False,
33 |                 DCN_TYPE=None,
34 |             ),
35 |             # Loss parameters:
36 |             LOSS_INS=dict(
37 |                 TYPE='DiceLoss',
38 |                 LOSS_WEIGHT=3.0
39 |             ),
40 |             LOSS_CAT=dict(
41 |                 TYPE='FocalLoss',
42 |                 GAMMA=2.0,
43 |                 ALPHA=0.25,
44 |                 LOSS_WEIGHT=1.0,
45 |             ),
46 |             # Inference parameters:
47 |             SCORE_THRESH_TEST=0.1,
48 |             MASK_THRESH_TEST=0.5,
49 |             # NMS parameters:
50 |             NMS_PER_IMAGE=500,
51 |             NMS_KERNEL='gaussian',  # gaussian/linear
52 |             NMS_SIGMA=2.0,
53 |             UPDATE_THRESH=0.05,
54 |             DETECTIONS_PER_IMAGE=100,
55 |         ),
56 |     ),
57 |     INPUT=dict(
58 |         # SOLO for instance segmenation does not work with "polygon" mask_format
59 |         MASK_FORMAT="bitmask",
60 |     )
61 | )
62 | 
63 | 
64 | class SOLOConfig(BaseDetectionConfig):
65 |     def __init__(self):
66 |         super(SOLOConfig, self).__init__()
67 |         self._register_configuration(_config_dict)
68 | 
69 | 
70 | config = SOLOConfig()
71 | 


--------------------------------------------------------------------------------
/cvpods/configs/solov2_config.py:
--------------------------------------------------------------------------------
 1 | from .base_detection_config import BaseDetectionConfig
 2 | 
 3 | _config_dict = dict(
 4 |     MODEL=dict(
 5 |         MASK_ON=True,
 6 |         PIXEL_MEAN=[103.530, 116.280, 123.675],  # BGR FORMAT
 7 |         PIXEL_STD=[1.0, 1.0, 1.0],
 8 |         RESNETS=dict(
 9 |             DEPTH=50,
10 |             OUT_FEATURES=["res2", "res3", "res4", "res5"],
11 |         ),
12 |         FPN=dict(
13 |             IN_FEATURES=["res2", "res3", "res4", "res5"],
14 |             OUT_CHANNELS=256,
15 |         ),
16 |         SOLOV2=dict(
17 |             # Instance hyper-parameters
18 |             INSTANCE_IN_FEATURES=["p2", "p3", "p4", "p5", "p6"],
19 |             FPN_INSTANCE_STRIDES=[8, 8, 16, 32, 32],
20 |             FPN_SCALE_RANGES=((1, 96), (48, 192), (96, 384), (192, 768), (384, 2048)),
21 |             SIGMA=0.2,
22 |             # Channel size for the instance head.
23 |             INSTANCE_IN_CHANNELS=256,
24 |             INSTANCE_CHANNELS=512,
25 |             # Convolutions to use in the instance head.
26 |             NUM_INSTANCE_CONVS=4,
27 |             USE_DCN_IN_INSTANCE=False,
28 |             TYPE_DCN='DCN',
29 |             NUM_GRIDS=[40, 36, 24, 16, 12],
30 |             # Number of foreground classes.
31 |             NUM_CLASSES=80,
32 |             NUM_KERNELS=256,
33 |             NORM="GN",
34 |             USE_COORD_CONV=True,
35 |             PRIOR_PROB=0.01,
36 |             # Mask hyper-parameters.
37 |             # Channel size for the mask tower.
38 |             MASK_IN_FEATURES=["p2", "p3", "p4", "p5"],
39 |             MASK_IN_CHANNELS=256,
40 |             MASK_CHANNELS=128,
41 |             NUM_MASKS=256,     # NUM_MASKS * kernel_size**2 = NUM_KERNELS
42 |             # Test cfg.
43 |             NMS_PRE=500,
44 |             SCORE_THR=0.1,
45 |             UPDATE_THR=0.05,
46 |             MASK_THR=0.5,
47 |             MAX_PER_IMG=100,
48 |             # NMS type: matrix OR mask.
49 |             NMS_TYPE="matrix",
50 |             NMS_KERNEL="gaussian",
51 |             NMS_SIGMA=2,
52 |             # Loss cfg.
53 |             LOSS=dict(
54 |                 FOCAL_USE_SIGMOID=True,
55 |                 FOCAL_ALPHA=0.25,
56 |                 FOCAL_GAMMA=2.0,
57 |                 FOCAL_WEIGHT=1.0,
58 |                 DICE_WEIGHT=3.0
59 |             )
60 |         ),
61 |     ),
62 |     INPUT=dict(
63 |         # SOLO for instance segmenation does not work with "polygon" mask_format
64 |         MASK_FORMAT="bitmask",
65 |     )
66 | )
67 | 
68 | 
69 | class SOLOV2Config(BaseDetectionConfig):
70 |     def __init__(self):
71 |         super(SOLOV2Config, self).__init__()
72 |         self._register_configuration(_config_dict)
73 | 
74 | 
75 | config = SOLOV2Config()
76 | 


--------------------------------------------------------------------------------
/cvpods/configs/sparse_rcnn_config.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- encoding: utf-8 -*-
 3 | '''
 4 | Xiangtai Li
 5 | '''
 6 | 
 7 | from .rcnn_config import RCNNConfig
 8 | 
 9 | _config_dict = dict(
10 |     MODEL=dict(
11 |         # BACKBONE=dict(NAME='build_resnet_backbone',),
12 |         RESNETS=dict(OUT_FEATURES=["res2", "res3", "res4", "res5"],),
13 |         FPN=dict(IN_FEATURES=["res2", "res3", "res4", "res5"]),
14 |         ROI_HEADS=dict(
15 |             # NAME: "StandardROIHeads"
16 |             IN_FEATURES=["p2", "p3", "p4", "p5"],
17 |         ),
18 |         ROI_BOX_HEAD=dict(
19 |             POOLER_TYPE="ROIAlignV2",
20 |             POOLER_SAMPLING_RATIO=2,
21 |             POOLER_RESOLUTION=7,
22 |         ),
23 |         SparseRCNN=dict(
24 |             NUM_PROPOSALS=100,
25 |             NUM_CLASSES=80,
26 |             NHEADS=8,
27 |             DROPOUT=0.0,
28 |             DIM_FEEDFORWARD=2048,
29 |             ACTIVATION='relu',
30 |             HIDDEN_DIM=256,
31 |             NUM_CLS=1,
32 |             NUM_REG=3,
33 |             NUM_HEADS=6,
34 | 
35 |             # Dynamic Conv.
36 |             NUM_DYNAMIC=2,
37 |             DIM_DYNAMIC=64,
38 | 
39 |             # Loss.
40 |             CLASS_WEIGHT=2.0,
41 |             GIOU_WEIGHT=2.0,
42 |             L1_WEIGHT=5.0,
43 |             DEEP_SUPERVISION=True,
44 |             NO_OBJECT_WEIGHT=0.1,
45 |             USE_FOCAL=True,
46 |             ALPHA=0.25,
47 |             GAMMA=2.0,
48 |             PRIOR_PROB=0.01
49 |         )
50 |     ),
51 | )
52 | 
53 | 
54 | class SparseRCNNFPNConfig(RCNNConfig):
55 |     def __init__(self):
56 |         super(SparseRCNNFPNConfig, self).__init__()
57 |         self._register_configuration(_config_dict)
58 | 
59 | 
60 | config = SparseRCNNFPNConfig()
61 | 


--------------------------------------------------------------------------------
/cvpods/configs/ssd_config.py:
--------------------------------------------------------------------------------
 1 | from .base_detection_config import BaseDetectionConfig
 2 | 
 3 | _config_dict = dict(
 4 |     MODEL=dict(
 5 |         PIXEL_MEAN=[123.675, 116.28, 103.53],  # RGB FORMAT
 6 |         PIXEL_STD=[1.0, 1.0, 1.0],
 7 |         VGG=dict(
 8 |             ARCH='D',
 9 |             NORM="",
10 |             NUM_CLASSES=None,
11 |             OUT_FEATURES=["Conv4_3", "Conv7"],
12 |             POOL_ARGS=dict(
13 |                 pool3=(2, 2, 0, True),  # k, s, p, ceil_model
14 |                 pool5=(3, 1, 1, False)  # k, s, p, ceil_model
15 |             ),
16 |             FC_TO_CONV=True,
17 |         ),
18 |         SSD=dict(
19 |             NUM_CLASSES=80,
20 |             IN_FEATURES=["Conv4_3", "Conv7"],
21 |             EXTRA_LAYER_ARCH={
22 |                 # the number after "S" and "S" to denote conv layer with stride=2
23 |                 "300": [256, 'S', 512, 128, 'S', 256, 128, 256, 128, 256],
24 |                 "512": [256, 'S', 512, 128, 'S', 256, 128, 'S', 256, 128, 'S', 256, 128, 256],
25 |             },
26 |             IOU_THRESHOLDS=[0.5, 0.5],
27 |             IOU_LABELS=[0, -1, 1],
28 |             BBOX_REG_WEIGHTS=(10.0, 10.0, 5.0, 5.0),
29 |             L2NORM_SCALE=20.0,
30 |             # Loss parameters:
31 |             LOSS_ALPHA=1.0,
32 |             SMOOTH_L1_LOSS_BETA=1.0,
33 |             NEGATIVE_POSITIVE_RATIO=3.0,
34 |             # Inference parameters:
35 |             SCORE_THRESH_TEST=0.02,
36 |             NMS_THRESH_TEST=0.45,
37 |         ),
38 |     )
39 | )
40 | 
41 | 
42 | class SSDConfig(BaseDetectionConfig):
43 |     def __init__(self):
44 |         super(SSDConfig, self).__init__()
45 |         self._register_configuration(_config_dict)
46 | 
47 | 
48 | config = SSDConfig()
49 | 


--------------------------------------------------------------------------------
/cvpods/configs/yolo_config.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- encoding: utf-8 -*-
 3 | '''
 4 | @File               :   yolo_config.py
 5 | @Time               :   2020/05/07 23:55:49
 6 | @Author             :   Benjin Zhu
 7 | @Contact            :   poodarchu@gmail.com
 8 | @Last Modified by   :   Benjin Zhu
 9 | @Last Modified time :   2020/05/07 23:55:49
10 | '''
11 | 
12 | from .base_detection_config import BaseDetectionConfig
13 | 
14 | _config_dict = dict(
15 |     MODEL=dict(
16 |         PIXEL_MEAN=(0.485, 0.456, 0.406),
17 |         PIXEL_STD=(0.229, 0.224, 0.225),
18 |         DARKNET=dict(
19 |             DEPTH=53,
20 |             STEM_OUT_CHANNELS=32,
21 |             WEIGHTS="s3://generalDetection/cvpods/ImageNetPretrained/custom/darknet53.mix.pth",
22 |             OUT_FEATURES=["dark3", "dark4", "dark5"]
23 |         ),
24 |         YOLO=dict(
25 |             CLASSES=80,
26 |             IN_FEATURES=["dark3", "dark4", "dark5"],
27 |             ANCHORS=[
28 |                 [[116, 90], [156, 198], [373, 326]],
29 |                 [[30, 61], [62, 45], [42, 119]],
30 |                 [[10, 13], [16, 30], [33, 23]],
31 |             ],
32 |             CONF_THRESHOLD=0.01,  # TEST
33 |             NMS_THRESHOLD=0.5,
34 |             IGNORE_THRESHOLD=0.7,
35 |         ),
36 |     ),
37 | )
38 | 
39 | 
40 | class YOLO3Config(BaseDetectionConfig):
41 |     def __init__(self):
42 |         super(YOLO3Config, self).__init__()
43 |         self._register_configuration(_config_dict)
44 | 
45 | 
46 | config = YOLO3Config()
47 | 


--------------------------------------------------------------------------------
/cvpods/data/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | from .build import (
 3 |     build_dataset,
 4 |     build_transform_gen,
 5 |     build_detection_test_loader,
 6 |     build_detection_train_loader,
 7 | )
 8 | from .registry import DATASETS, TRANSFORMS, SAMPLERS
 9 | 
10 | from . import transforms  # isort:skip
11 | # ensure the builtin datasets are registered
12 | from . import datasets, samplers  # isort:skip
13 | 
14 | 
15 | __all__ = [k for k in globals().keys() if not k.startswith("_")]
16 | 


--------------------------------------------------------------------------------
/cvpods/data/datasets/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Megvii, Inc. and its affiliates. All Rights Reserved
 2 | 
 3 | from .cityscapes import CityScapesDataset
 4 | from .coco import COCODataset
 5 | from .imagenet import ImageNetDataset
 6 | from .voc import VOCDataset
 7 | from .widerface import WiderFaceDataset
 8 | from .lvis import LVISDataset
 9 | from .citypersons import CityPersonsDataset
10 | from .crowdhuman import CrowdHumanDataset
11 | from .youtubevis import YTVisDataset
12 | from .ovis import OVisDataset
13 | from .coco_captions import COCOCaptionsDataset
14 | 
15 | __all__ = [
16 |     "COCODataset",
17 |     "VOCDataset",
18 |     "CityScapesDataset",
19 |     "ImageNetDataset",
20 |     "WiderFaceDataset",
21 |     "LVISDataset",
22 |     "CityPersonsDataset",
23 |     "CrowdHumanDataset",
24 |     "YTVisDataset",
25 |     "OVisDataset",
26 |     "COCOCaptionsDataset"
27 | ]
28 | 


--------------------------------------------------------------------------------
/cvpods/data/datasets/ext/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lxtGH/BSSeg/77a6cf15150cc65cf2f21b124381dc8d4573f21b/cvpods/data/datasets/ext/__init__.py


--------------------------------------------------------------------------------
/cvpods/data/registry.py:
--------------------------------------------------------------------------------
1 | from cvpods.utils import Registry
2 | 
3 | DATASETS = Registry("datasets")
4 | TRANSFORMS = Registry("transforms")
5 | SAMPLERS = Registry("samplers")
6 | 


--------------------------------------------------------------------------------
/cvpods/data/samplers/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | from .distributed_sampler import InferenceSampler, RepeatFactorTrainingSampler, TrainingSampler
 3 | from .grouped_batch_sampler import GroupedBatchSampler
 4 | from .sampler import DistributedSampler, GroupSampler, DistributedGroupSampler
 5 | 
 6 | __all__ = [
 7 |     "GroupedBatchSampler",
 8 |     "TrainingSampler",
 9 |     "InferenceSampler",
10 |     "RepeatFactorTrainingSampler",
11 |     "DistributedSampler",
12 |     "GroupSampler",
13 |     "DistributedGroupSampler",
14 | ]
15 | 


--------------------------------------------------------------------------------
/cvpods/data/samplers/grouped_batch_sampler.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | import numpy as np
 3 | from torch.utils.data.sampler import BatchSampler, Sampler
 4 | 
 5 | from ..registry import SAMPLERS
 6 | 
 7 | 
 8 | @SAMPLERS.register()
 9 | class GroupedBatchSampler(BatchSampler):
10 |     """
11 |     Wraps another sampler to yield a mini-batch of indices.
12 |     It enforces that the batch only contain elements from the same group.
13 |     It also tries to provide mini-batches which follows an ordering which is
14 |     as close as possible to the ordering from the original sampler.
15 |     """
16 | 
17 |     def __init__(self, sampler, group_ids, batch_size):
18 |         """
19 |         Args:
20 |             sampler (Sampler): Base sampler.
21 |             group_ids (list[int]): If the sampler produces indices in range [0, N),
22 |                 `group_ids` must be a list of `N` ints which contains the group id of each sample.
23 |                 The group ids must be a set of integers in the range [0, num_groups).
24 |             batch_size (int): Size of mini-batch.
25 |         """
26 |         if not isinstance(sampler, Sampler):
27 |             raise ValueError(
28 |                 "sampler should be an instance of "
29 |                 "torch.utils.data.Sampler, but got sampler={}".format(sampler)
30 |             )
31 |         self.sampler = sampler
32 |         self.group_ids = np.asarray(group_ids)
33 |         assert self.group_ids.ndim == 1
34 |         self.batch_size = batch_size
35 |         groups = np.unique(self.group_ids).tolist()
36 | 
37 |         # buffer the indices of each group until batch size is reached
38 |         self.buffer_per_group = {k: [] for k in groups}
39 | 
40 |     def __iter__(self):
41 |         for idx in self.sampler:
42 |             group_id = self.group_ids[idx]
43 |             group_buffer = self.buffer_per_group[group_id]
44 |             group_buffer.append(idx)
45 |             if len(group_buffer) == self.batch_size:
46 |                 yield group_buffer[:]  # yield a copy of the list
47 |                 del group_buffer[:]
48 | 
49 |     def __len__(self):
50 |         raise NotImplementedError("len() of GroupedBatchSampler is not well-defined.")
51 | 


--------------------------------------------------------------------------------
/cvpods/data/transforms/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
2 | from .transform import *
3 | from .transform_gen import *
4 | 
5 | __all__ = [k for k in globals().keys() if not k.startswith("_")]
6 | 


--------------------------------------------------------------------------------
/cvpods/data/wrapped_dataset.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- encoding: utf-8 -*-
 3 | '''
 4 | @File               :   wrapped_dataset.py
 5 | @Time               :   2020/05/07 23:54:57
 6 | @Author             :   Benjin Zhu
 7 | @Contact            :   poodarchu@gmail.com
 8 | @Last Modified by   :   Benjin Zhu
 9 | @Last Modified time :   2020/05/07 23:54:57
10 | '''
11 | 
12 | import numpy as np
13 | from types import SimpleNamespace
14 | from torch.utils.data.dataset import ConcatDataset as _ConcatDataset
15 | 
16 | from .registry import DATASETS
17 | 
18 | 
19 | @DATASETS.register()
20 | class ConcatDataset(_ConcatDataset):
21 |     """A wrapper of concatenated datasets.
22 |     Same as :obj:`torch.utils.data.datasets.ConcatDataset`, but
23 |     concat the group flag for image aspect ratio.
24 |     Args:
25 |         datasets (list[:obj:`Dataset`]): A list of datasets.
26 |     """
27 | 
28 |     def __init__(self, datasets):
29 |         super(ConcatDataset, self).__init__(datasets)
30 |         if hasattr(self.datasets[0], 'aspect_ratios'):
31 |             aspect_ratios = [d.aspect_ratios for d in self.datasets]
32 |             self.aspect_ratios = np.concatenate(aspect_ratios)
33 |         if hasattr(self.datasets[0], 'meta'):
34 |             self.meta = {}
35 |             for d in self.datasets:
36 |                 self.meta.update(d.meta)
37 |             self.meta = SimpleNamespace(**self.meta)
38 | 
39 | 
40 | @DATASETS.register()
41 | class RepeatDataset(object):
42 |     """A wrapper of repeated datasets.
43 |     The length of repeated datasets will be `times` larger than the original
44 |     datasets. This is useful when the data loading time is long but the datasets
45 |     is small. Using RepeatDataset can reduce the data loading time between
46 |     epochs.
47 |     Args:
48 |         dataset (:obj:`Dataset`): The datasets to be repeated.
49 |         times (int): Repeat times.
50 |     """
51 | 
52 |     def __init__(self, dataset, times):
53 |         self.dataset = dataset
54 |         self.times = times
55 |         if hasattr(self.dataset, 'aspect_ratios'):
56 |             self.aspect_ratios = np.tile(self.dataset.aspect_ratios, times)
57 | 
58 |         self._ori_len = len(self.dataset)
59 | 
60 |     def __getitem__(self, idx):
61 |         return self.dataset[idx % self._ori_len]
62 | 
63 |     def __len__(self):
64 |         return self.times * self._ori_len
65 | 


--------------------------------------------------------------------------------
/cvpods/engine/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | 
 3 | from .hooks import *
 4 | from .launch import *
 5 | from .predictor import *
 6 | from .setup import *
 7 | from .trainer import *
 8 | 
 9 | __all__ = [k for k in globals().keys() if not k.startswith("_")]
10 | 


--------------------------------------------------------------------------------
/cvpods/evaluation/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | from .citypersons_evaluation import CityPersonsEvaluator
 3 | from .cityscapes_evaluation import CityscapesInstanceEvaluator, CityscapesSemSegEvaluator
 4 | from .crowdhuman_evaluation import CrowdHumanEvaluator
 5 | from .coco_evaluation import COCOEvaluator
 6 | from .evaluator import DatasetEvaluator, DatasetEvaluators, inference_context, inference_on_dataset
 7 | from .lvis_evaluation import LVISEvaluator
 8 | from .panoptic_evaluation import COCOPanopticEvaluator
 9 | from .pascal_voc_evaluation import PascalVOCDetectionEvaluator
10 | from .rotated_coco_evaluation import RotatedCOCOEvaluator
11 | from .sem_seg_evaluation import SemSegEvaluator
12 | from .testing import print_csv_format, verify_results
13 | from .widerface_evaluation import WiderFaceEvaluator
14 | from .classification_evaluation import ClassificationEvaluator
15 | from .youtubevis_evaluation import YouTubeVISEvaluator
16 | 
17 | __all__ = [k for k in globals().keys() if not k.startswith("_")]
18 | 


--------------------------------------------------------------------------------
/cvpods/evaluation/testing.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | import logging
 3 | import numpy as np
 4 | import pprint
 5 | import sys
 6 | from collections import Mapping, OrderedDict
 7 | 
 8 | 
 9 | def print_csv_format(results):
10 |     """
11 |     Print main metrics in a format similar to Detectron,
12 |     so that they are easy to copypaste into a spreadsheet.
13 | 
14 |     Args:
15 |         results (OrderedDict[dict]): task_name -> {metric -> score}
16 |     """
17 |     assert isinstance(results, OrderedDict), results  # unordered results cannot be properly printed
18 |     logger = logging.getLogger(__name__)
19 |     for task, res in results.items():
20 |         # Don't print "AP-category" metrics since they are usually not tracked.
21 |         important_res = [(k, v) for k, v in res.items() if "-" not in k]
22 |         logger.info("copypaste: Task: {}".format(task))
23 |         logger.info("copypaste: " + ",".join([k[0] for k in important_res]))
24 |         logger.info("copypaste: " + ",".join(["{0:.4f}".format(k[1]) for k in important_res]))
25 | 
26 | 
27 | def verify_results(cfg, results):
28 |     """
29 |     Args:
30 |         results (OrderedDict[dict]): task_name -> {metric -> score}
31 | 
32 |     Returns:
33 |         bool: whether the verification succeeds or not
34 |     """
35 |     expected_results = cfg.TEST.EXPECTED_RESULTS
36 |     if not len(expected_results):
37 |         return True
38 | 
39 |     ok = True
40 |     for task, metric, expected, tolerance in expected_results:
41 |         actual = results[task][metric]
42 |         if not np.isfinite(actual):
43 |             ok = False
44 |         diff = abs(actual - expected)
45 |         if diff > tolerance:
46 |             ok = False
47 | 
48 |     logger = logging.getLogger(__name__)
49 |     if not ok:
50 |         logger.error("Result verification failed!")
51 |         logger.error("Expected Results: " + str(expected_results))
52 |         logger.error("Actual Results: " + pprint.pformat(results))
53 | 
54 |         sys.exit(1)
55 |     else:
56 |         logger.info("Results verification passed.")
57 |     return ok
58 | 
59 | 
60 | def flatten_results_dict(results):
61 |     """
62 |     Expand a hierarchical dict of scalars into a flat dict of scalars.
63 |     If results[k1][k2][k3] = v, the returned dict will have the entry
64 |     {"k1/k2/k3": v}.
65 | 
66 |     Args:
67 |         results (dict):
68 |     """
69 |     r = {}
70 |     for k, v in results.items():
71 |         if isinstance(v, Mapping):
72 |             v = flatten_results_dict(v)
73 |             for kk, vv in v.items():
74 |                 r[k + "/" + kk] = vv
75 |         else:
76 |             r[k] = v
77 |     return r
78 | 


--------------------------------------------------------------------------------
/cvpods/export/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | This directory contains code to prepare a detectron2 model for deployment.
 3 | Currently it supports exporting a detectron2 model to Caffe2 format through ONNX.
 4 | 
 5 | Please see [documentation](https://detectron2.readthedocs.io/tutorials/deployment.html) for its usage.
 6 | 
 7 | 
 8 | ### Acknowledgements
 9 | 
10 | Thanks to Mobile Vision team at Facebook for developing the conversion tools.
11 | 


--------------------------------------------------------------------------------
/cvpods/export/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | 
3 | from .api import *
4 | 
5 | __all__ = [k for k in globals().keys() if not k.startswith("_")]
6 | 


--------------------------------------------------------------------------------
/cvpods/layers/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | from .batch_norm import FrozenBatchNorm2d, NaiveSyncBatchNorm, get_activation, get_norm
 3 | from .deform_conv import DeformConv, ModulatedDeformConv, DFConv2d
 4 | from .deform_conv_with_off import DeformConvWithOff, ModulatedDeformConvWithOff
 5 | from .mask_ops import paste_masks_in_image
 6 | from .nms import (batched_nms, batched_softnms, generalized_batched_nms, batched_nms_rotated,
 7 |                   ml_nms, nms_rotated, softnms, matrix_nms)
 8 | 
 9 | from .position_encoding import position_encoding_dict
10 | from .blocks import CNNBlockBase, DepthwiseSeparableConv2d
11 | from .aspp import ASPP
12 | from .roi_align import ROIAlign, roi_align
13 | from .roi_align_rotated import ROIAlignRotated, roi_align_rotated
14 | from .shape_spec import ShapeSpec
15 | from .swap_align2nat import SwapAlign2Nat, swap_align2nat
16 | from .activation_funcs import Swish, MemoryEfficientSwish
17 | from .border_align import BorderAlign
18 | from .naive_group_norm import NaiveGroupNorm
19 | from .ms_deform_attn import MSDeformAttn
20 | from .crop_split import CropSplit
21 | from .crop_split_gt import CropSplitGT
22 | from .dynamic_weights import DynamicWeightsCat11
23 | from .saconv import ConvAWS2dLayer, SAConv2dLayer, SAConv2dNoGlobalContextLayer
24 | from .wrappers import (
25 |     cat,
26 |     BatchNorm2d,
27 |     Conv2d,
28 |     Conv2dSamePadding,
29 |     MaxPool2dSamePadding,
30 |     SeparableConvBlock,
31 |     ConvTranspose2d,
32 |     interpolate,
33 |     nonzero_tuple,
34 |     cross_entropy
35 | )
36 | 
37 | 
38 | __all__ = [k for k in globals().keys() if not k.startswith("_")]
39 | 


--------------------------------------------------------------------------------
/cvpods/layers/activation_funcs.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | 
 5 | # Ref:
 6 | # https://medium.com/the-artificial-impostor/more-memory-efficient-swish-activation-function-e07c22c12a76
 7 | class SwishImplementation(torch.autograd.Function):
 8 |     """
 9 |     Swish activation function memory-efficient implementation.
10 | 
11 |     This implementation explicitly processes the gradient, it keeps a copy of the input tensor,
12 |     and uses it to calculate the gradient during the back-propagation phase.
13 |     """
14 |     @staticmethod
15 |     def forward(ctx, i):
16 |         result = i * torch.sigmoid(i)
17 |         ctx.save_for_backward(i)
18 |         return result
19 | 
20 |     @staticmethod
21 |     def backward(ctx, grad_output):
22 |         i = ctx.saved_variables[0]
23 |         sigmoid_i = torch.sigmoid(i)
24 |         return grad_output * (sigmoid_i * (1 + i * (1 - sigmoid_i)))
25 | 
26 | 
27 | class MemoryEfficientSwish(nn.Module):
28 |     def forward(self, x):
29 |         return SwishImplementation.apply(x)
30 | 
31 | 
32 | class Swish(nn.Module):
33 |     """
34 |     Implement the Swish activation function.
35 |     See: https://arxiv.org/abs/1710.05941 for more details.
36 |     """
37 |     def forward(self, x):
38 |         return x * torch.sigmoid(x)
39 | 


--------------------------------------------------------------------------------
/cvpods/layers/border_align.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | from torch import nn
 3 | from torch.autograd import Function
 4 | from torch.autograd.function import once_differentiable
 5 | from cvpods import _C
 6 | 
 7 | 
 8 | class _BorderAlign(Function):
 9 |     @staticmethod
10 |     def forward(ctx, input, boxes, wh, pool_size):
11 |         output = _C.border_align_forward(input, boxes, wh, pool_size)
12 |         ctx.pool_size = pool_size
13 |         ctx.save_for_backward(input, boxes, wh)
14 |         return output
15 | 
16 |     @staticmethod
17 |     @once_differentiable
18 |     def backward(ctx, grad_output):
19 |         pool_size = ctx.pool_size
20 |         input, boxes, wh = ctx.saved_tensors
21 |         grad_input = _C.border_align_backward(
22 |             grad_output, input, boxes, wh, pool_size)
23 |         return grad_input, None, None, None
24 | 
25 | 
26 | border_align = _BorderAlign.apply
27 | 
28 | 
29 | class BorderAlign(nn.Module):
30 |     def __init__(self, pool_size):
31 |         super(BorderAlign, self).__init__()
32 |         self.pool_size = pool_size
33 | 
34 |     def forward(self, feature, boxes):
35 |         feature = feature.contiguous()
36 |         boxes = boxes.contiguous()
37 |         wh = (boxes[:, :, 2:] - boxes[:, :, :2]).contiguous()
38 |         output = border_align(feature, boxes, wh, self.pool_size)
39 |         return output
40 | 
41 |     def __repr__(self):
42 |         tmpstr = self.__class__.__name__
43 |         return tmpstr
44 | 


--------------------------------------------------------------------------------
/cvpods/layers/conv_with_kaiming_uniform.py:
--------------------------------------------------------------------------------
 1 | from torch import nn
 2 | 
 3 | from cvpods.layers import Conv2d
 4 | from .deform_conv import DFConv2d
 5 | from cvpods.layers.batch_norm import get_norm
 6 | 
 7 | 
 8 | def conv_with_kaiming_uniform(
 9 |         norm=None, activation=None,
10 |         use_deformable=False, use_sep=False):
11 |     def make_conv(
12 |         in_channels, out_channels, kernel_size, stride=1, dilation=1
13 |     ):
14 |         if use_deformable:
15 |             conv_func = DFConv2d
16 |         else:
17 |             conv_func = Conv2d
18 |         if use_sep:
19 |             assert in_channels == out_channels
20 |             groups = in_channels
21 |         else:
22 |             groups = 1
23 |         conv = conv_func(
24 |             in_channels,
25 |             out_channels,
26 |             kernel_size=kernel_size,
27 |             stride=stride,
28 |             padding=dilation * (kernel_size - 1) // 2,
29 |             dilation=dilation,
30 |             groups=groups,
31 |             bias=(norm is None)
32 |         )
33 |         if not use_deformable:
34 |             # Caffe2 implementation uses XavierFill, which in fact
35 |             # corresponds to kaiming_uniform_ in PyTorch
36 |             nn.init.kaiming_uniform_(conv.weight, a=1)
37 |             if norm is None:
38 |                 nn.init.constant_(conv.bias, 0)
39 |         module = [conv,]
40 |         if norm is not None and len(norm) > 0:
41 |             if norm == "GN":
42 |                 norm_module = nn.GroupNorm(32, out_channels)
43 |             else:
44 |                 norm_module = get_norm(norm, out_channels)
45 |             module.append(norm_module)
46 |         if activation is not None:
47 |             module.append(nn.ReLU(inplace=True))
48 |         if len(module) > 1:
49 |             return nn.Sequential(*module)
50 |         return conv
51 | 
52 |     return make_conv
53 | 


--------------------------------------------------------------------------------
/cvpods/layers/crop_split.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import torch
 3 | import torch.nn as nn
 4 | from torch.autograd import Function
 5 | from torch.autograd.function import once_differentiable
 6 | from torch.nn.modules.utils import _pair
 7 | from cvpods import _C
 8 | 
 9 | class _CropSplit(Function):
10 |     @staticmethod
11 |     def forward(ctx, data, rois, c):
12 |         height = data.shape[1]
13 |         width = data.shape[2]
14 |         n = data.shape[3]
15 |         ctx.c = c
16 |         ctx.height = height
17 |         ctx.width = width
18 |         ctx.n = n
19 |         ctx.rois = _pair(rois)
20 |         # print(height*width*n)
21 |         output = data.new_zeros(height, width, n)
22 |         _C.crop_split_forward(data, rois, output, height, width, c, n)
23 |         # print('aa',rois[0])
24 | 
25 |         # if data.requires_grad:
26 |         #     ctx.save_for_backward(data,rois)
27 |         # print(rois.shape)
28 |         # print(data.requires_grad, rois.requires_grad)
29 |         return output
30 | 
31 |     @staticmethod
32 |     @once_differentiable
33 |     def backward(ctx, grad_output):
34 |         # dtata,_ = ctx.saved_tensors
35 | 
36 |         c = ctx.c
37 |         height = ctx.height
38 |         width = ctx.width
39 |         n = ctx.n
40 |         rois = ctx.rois
41 |         # print('bb', rois[0])
42 |         grad_input = torch.zeros((c*c, height, width, n), dtype=grad_output.dtype, device=grad_output.device)
43 |         # grad_input = torch.zeros_like(data)
44 |         _C.crop_split_backward(grad_output, rois, grad_input, height, width, c, n)
45 |         # print(grad_output.requires_grad,grad_input.requires_grad)
46 | 
47 |         return grad_input, None, None
48 | 
49 | crop_split = _CropSplit.apply
50 | 
51 | class CropSplit(nn.Module):
52 | 
53 |     def __init__(self, c=2):
54 |         super(CropSplit, self).__init__()
55 |         self.c = c
56 | 
57 |     def forward(self, data, rois):
58 |         return crop_split(data, rois, self.c)


--------------------------------------------------------------------------------
/cvpods/layers/crop_split_gt.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import torch
 3 | import torch.nn as nn
 4 | from torch.autograd import Function
 5 | from torch.autograd.function import once_differentiable
 6 | from torch.nn.modules.utils import _pair
 7 | from cvpods import _C
 8 | 
 9 | class _CropSplitGT(Function):
10 | 
11 |     @staticmethod
12 |     def forward(ctx, data, rois, c):
13 |         height = data.shape[0]
14 |         width = data.shape[1]
15 |         n = data.shape[2]
16 |         ctx.c = _pair(c)
17 |         ctx.height = _pair(height)
18 |         ctx.width = _pair(width)
19 |         ctx.n = _pair(n)
20 |         # ctx.rois = rois
21 |         # print(height*width*n)
22 |         output = data.new_zeros(height, width, n)
23 |         _C.crop_split_gt_forward(data, rois, output, height, width, c, n)
24 |         # print('aa',rois[0])
25 | 
26 |         # ctx.save_for_backward(data,rois)
27 |         # print(torch.max(output_gt))
28 |         # print('aa',output_gt.shape)
29 |         # print(rois.shape)
30 |         # print(data.requires_grad, rois.requires_grad)
31 |         return output
32 | 
33 | crop_split_gt = _CropSplitGT.apply
34 | 
35 | class CropSplitGT(nn.Module):
36 | 
37 |     def __init__(self, c=2):
38 |         super(CropSplitGT, self).__init__()
39 |         self.c = c
40 | 
41 |     def forward(self, data, rois):
42 |         return crop_split_gt(data, rois, self.c)
43 | 


--------------------------------------------------------------------------------
/cvpods/layers/csrc/README.md:
--------------------------------------------------------------------------------
1 | 
2 | 
3 | To add a new Op:
4 | 
5 | 1. Create a new directory
6 | 2. Implement new ops there
7 | 3. Delcare its Python interface in `vision.cpp`.
8 | 


--------------------------------------------------------------------------------
/cvpods/layers/csrc/SwapAlign2Nat/SwapAlign2Nat.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | #pragma once
 3 | #include <torch/types.h>
 4 | 
 5 | namespace cvpods {
 6 | 
 7 | #ifdef WITH_CUDA
 8 | at::Tensor SwapAlign2Nat_forward_cuda(
 9 |     const at::Tensor& X,
10 |     const int lambda_val,
11 |     const float pad_val);
12 | 
13 | at::Tensor SwapAlign2Nat_backward_cuda(
14 |     const at::Tensor& gY,
15 |     const int lambda_val,
16 |     const int batch_size,
17 |     const int channel,
18 |     const int height,
19 |     const int width);
20 | #endif
21 | 
22 | inline at::Tensor SwapAlign2Nat_forward(
23 |     const at::Tensor& X,
24 |     const int lambda_val,
25 |     const float pad_val) {
26 |   if (X.type().is_cuda()) {
27 | #ifdef WITH_CUDA
28 |     return SwapAlign2Nat_forward_cuda(X, lambda_val, pad_val);
29 | #else
30 |     AT_ERROR("Not compiled with GPU support");
31 | #endif
32 |   }
33 |   AT_ERROR("Not implemented on the CPU");
34 | }
35 | 
36 | inline at::Tensor SwapAlign2Nat_backward(
37 |     const at::Tensor& gY,
38 |     const int lambda_val,
39 |     const int batch_size,
40 |     const int channel,
41 |     const int height,
42 |     const int width) {
43 |   if (gY.type().is_cuda()) {
44 | #ifdef WITH_CUDA
45 |     return SwapAlign2Nat_backward_cuda(
46 |         gY, lambda_val, batch_size, channel, height, width);
47 | #else
48 |     AT_ERROR("Not compiled with GPU support");
49 | #endif
50 |   }
51 |   AT_ERROR("Not implemented on the CPU");
52 | }
53 | 
54 | } // namespace cvpods 
55 | 


--------------------------------------------------------------------------------
/cvpods/layers/csrc/border_align/border_align.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include <torch/types.h>
 3 | #include <torch/extension.h>
 4 | #include <ATen/ATen.h>
 5 | 
 6 | namespace cvpods {
 7 | 
 8 | at::Tensor border_align_cuda_forward(
 9 |     const at::Tensor& feature,
10 |     const at::Tensor& boxes,
11 |     const at::Tensor& wh,
12 |     const int pool_size);
13 | 
14 | 
15 | at::Tensor border_align_cuda_backward(
16 |     const at::Tensor& gradOutput,
17 |     const at::Tensor& feature,
18 |     const at::Tensor& boxes,
19 |     const at::Tensor& wh,
20 |     const int pool_size);
21 | 
22 | 
23 | at::Tensor BorderAlign_Forward(
24 |     const at::Tensor& feature,
25 |     const at::Tensor& boxes,
26 |     const at::Tensor& wh,
27 |     const int pool_size) {
28 |     return border_align_cuda_forward(feature, boxes, wh, pool_size);
29 | }
30 | 
31 | 
32 | at::Tensor BorderAlign_Backward(
33 |     const at::Tensor& gradOutput,
34 |     const at::Tensor& feature,
35 |     const at::Tensor& boxes,
36 |     const at::Tensor& wh,
37 |     const int pool_size) {
38 |     return border_align_cuda_backward(gradOutput, feature, boxes, wh, pool_size);
39 | }
40 | 
41 | } // namespace cvpods


--------------------------------------------------------------------------------
/cvpods/layers/csrc/box_iou_rotated/box_iou_rotated.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | #pragma once
 3 | #include <torch/types.h>
 4 | 
 5 | namespace cvpods {
 6 | 
 7 | at::Tensor box_iou_rotated_cpu(
 8 |     const at::Tensor& boxes1,
 9 |     const at::Tensor& boxes2);
10 | 
11 | #ifdef WITH_CUDA
12 | at::Tensor box_iou_rotated_cuda(
13 |     const at::Tensor& boxes1,
14 |     const at::Tensor& boxes2);
15 | #endif
16 | 
17 | // Interface for Python
18 | // inline is needed to prevent multiple function definitions when this header is
19 | // included by different cpps
20 | inline at::Tensor box_iou_rotated(
21 |     const at::Tensor& boxes1,
22 |     const at::Tensor& boxes2) {
23 |   assert(boxes1.device().is_cuda() == boxes2.device().is_cuda());
24 |   if (boxes1.device().is_cuda()) {
25 | #ifdef WITH_CUDA
26 |     return box_iou_rotated_cuda(boxes1, boxes2);
27 | #else
28 |     AT_ERROR("Not compiled with GPU support");
29 | #endif
30 |   }
31 | 
32 |   return box_iou_rotated_cpu(boxes1, boxes2);
33 | }
34 | 
35 | } // namespace cvpods
36 | 


--------------------------------------------------------------------------------
/cvpods/layers/csrc/box_iou_rotated/box_iou_rotated_cpu.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | #include "box_iou_rotated.h"
 3 | #include "box_iou_rotated_utils.h"
 4 | 
 5 | namespace cvpods {
 6 | 
 7 | template <typename T>
 8 | void box_iou_rotated_cpu_kernel(
 9 |     const at::Tensor& boxes1,
10 |     const at::Tensor& boxes2,
11 |     at::Tensor& ious) {
12 |   auto widths1 = boxes1.select(1, 2).contiguous();
13 |   auto heights1 = boxes1.select(1, 3).contiguous();
14 |   auto widths2 = boxes2.select(1, 2).contiguous();
15 |   auto heights2 = boxes2.select(1, 3).contiguous();
16 | 
17 |   at::Tensor areas1 = widths1 * heights1;
18 |   at::Tensor areas2 = widths2 * heights2;
19 | 
20 |   auto num_boxes1 = boxes1.size(0);
21 |   auto num_boxes2 = boxes2.size(0);
22 | 
23 |   for (int i = 0; i < num_boxes1; i++) {
24 |     for (int j = 0; j < num_boxes2; j++) {
25 |       ious[i * num_boxes2 + j] = single_box_iou_rotated<T>(
26 |           boxes1[i].data_ptr<T>(), boxes2[j].data_ptr<T>());
27 |     }
28 |   }
29 | }
30 | 
31 | at::Tensor box_iou_rotated_cpu(
32 |     const at::Tensor& boxes1,
33 |     const at::Tensor& boxes2) {
34 |   auto num_boxes1 = boxes1.size(0);
35 |   auto num_boxes2 = boxes2.size(0);
36 |   at::Tensor ious =
37 |       at::empty({num_boxes1 * num_boxes2}, boxes1.options().dtype(at::kFloat));
38 | 
39 |   box_iou_rotated_cpu_kernel<float>(boxes1, boxes2, ious);
40 | 
41 |   // reshape from 1d array to 2d array
42 |   auto shape = std::vector<int64_t>{num_boxes1, num_boxes2};
43 |   return ious.reshape(shape);
44 | }
45 | 
46 | } // namespace cvpods
47 | 


--------------------------------------------------------------------------------
/cvpods/layers/csrc/correlation/correlation.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include <torch/extension.h>
 3 | #include <vector>
 4 | namespace cvpods {
 5 | 
 6 | // CUDA forward declarations
 7 | std::vector<torch::Tensor> corr_cuda_forward(
 8 |     torch::Tensor fmap1,
 9 |     torch::Tensor fmap2,
10 |     torch::Tensor coords,
11 |     int radius);
12 | 
13 | std::vector<torch::Tensor> corr_cuda_backward(
14 |   torch::Tensor fmap1,
15 |   torch::Tensor fmap2,
16 |   torch::Tensor coords,
17 |   torch::Tensor corr_grad,
18 |   int radius);
19 | 
20 | // C++ interface
21 | #define CHECK_CUDA(x) TORCH_CHECK(x.type().is_cuda(), #x " must be a CUDA tensor")
22 | #define CHECK_CONTIGUOUS(x) TORCH_CHECK(x.is_contiguous(), #x " must be contiguous")
23 | #define CHECK_INPUT(x) CHECK_CUDA(x); CHECK_CONTIGUOUS(x)
24 | 
25 | std::vector<torch::Tensor> corr_forward(
26 |     torch::Tensor fmap1,
27 |     torch::Tensor fmap2,
28 |     torch::Tensor coords,
29 |     int radius) {
30 |   CHECK_INPUT(fmap1);
31 |   CHECK_INPUT(fmap2);
32 |   CHECK_INPUT(coords);
33 | 
34 |   return corr_cuda_forward(fmap1, fmap2, coords, radius);
35 | }
36 | 
37 | 
38 | std::vector<torch::Tensor> corr_backward(
39 |     torch::Tensor fmap1,
40 |     torch::Tensor fmap2,
41 |     torch::Tensor coords,
42 |     torch::Tensor corr_grad,
43 |     int radius) {
44 |   CHECK_INPUT(fmap1);
45 |   CHECK_INPUT(fmap2);
46 |   CHECK_INPUT(coords);
47 |   CHECK_INPUT(corr_grad);
48 | 
49 |   return corr_cuda_backward(fmap1, fmap2, coords, corr_grad, radius);
50 | }
51 | 
52 | }


--------------------------------------------------------------------------------
/cvpods/layers/csrc/crop_split/crop_split.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include <torch/types.h>
 3 | 
 4 | namespace cvpods {
 5 | 
 6 | void CropSplitForward(const at::Tensor data,
 7 |                       const at::Tensor bbox,
 8 |                       at::Tensor out,
 9 |                       const int height,
10 |                       const int width,
11 |                       const int num_cell,
12 |                       const int num_bbox);
13 | 
14 | void CropSplitBack(const at::Tensor top_grad,
15 |                    const at::Tensor bbox,
16 |                    at::Tensor bottom_grad,
17 |                    const int height,
18 |                    const int width,
19 |                    const int num_cell,
20 |                    const int num_bbox);
21 | 
22 | 
23 | void crop_split_cuda_forward(const at::Tensor input,
24 |                              const at::Tensor bbox,
25 |                              at::Tensor out,
26 |                              const int height,
27 |                              const int width,
28 |                              const int num_cell,
29 |                              const int num_bbox)
30 | {
31 |   TORCH_CHECK(input.is_contiguous(), "input tensor has to be contiguous");
32 | 
33 |   CropSplitForward(input, bbox, out, height, width, num_cell, num_bbox);
34 | }
35 | 
36 | void crop_split_cuda_backward(const at::Tensor out_grad,
37 |                               const at::Tensor bbox,
38 |                               at::Tensor bottom_grad,
39 |                               const int height,
40 |                               const int width,
41 |                               const int num_cell,
42 |                               const int num_bbox)
43 | {
44 |   TORCH_CHECK(out_grad.is_contiguous(), "out_grad tensor has to be contiguous");
45 | 
46 |   CropSplitBack(out_grad, bbox, bottom_grad, height, width, num_cell, num_bbox);
47 | }
48 | }


--------------------------------------------------------------------------------
/cvpods/layers/csrc/crop_split_gt/crop_split_gt.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include <torch/types.h>
 3 | 
 4 | namespace cvpods {
 5 | 
 6 | void CropSplitGtForward(const at::Tensor data,
 7 |                         const at::Tensor bbox,
 8 |                         at::Tensor out,
 9 |                         const int height,
10 |                         const int width,
11 |                         const int num_cell,
12 |                         const int num_bbox);
13 | 
14 | void CropSplitGtBack(const at::Tensor top_grad,
15 |                      const at::Tensor bbox,
16 |                      at::Tensor bottom_grad,
17 |                      const int height,
18 |                      const int width,
19 |                      const int num_cell,
20 |                      const int num_bbox);
21 | 
22 | 
23 | void crop_split_gt_cuda_forward(const at::Tensor input,
24 |                                 const at::Tensor bbox,
25 |                                 at::Tensor out,
26 |                                 const int height,
27 |                                 const int width,
28 |                                 const int num_cell,
29 |                                 const int num_bbox)
30 | {
31 |   TORCH_CHECK(input.is_contiguous(), "input tensor has to be contiguous");
32 | 
33 |   CropSplitGtForward(input, bbox, out, height, width, num_cell, num_bbox);
34 | }
35 | 
36 | void crop_split_gt_cuda_backward(const at::Tensor out_grad,
37 |                                  const at::Tensor bbox,
38 |                                  at::Tensor bottom_grad,
39 |                                  const int height,
40 |                                  const int width,
41 |                                  const int num_cell,
42 |                                  const int num_bbox)
43 | {
44 |   TORCH_CHECK(out_grad.is_contiguous(), "out_grad tensor has to be contiguous");
45 | 
46 |   CropSplitGtBack(out_grad, bbox, bottom_grad, height, width, num_cell, num_bbox);
47 | }
48 | 
49 | 
50 | }


--------------------------------------------------------------------------------
/cvpods/layers/csrc/cuda_version.cu:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | 
 3 | #include <cuda_runtime_api.h>
 4 | 
 5 | namespace cvpods {
 6 | int get_cudart_version() {
 7 |   return CUDART_VERSION;
 8 | }
 9 | } // namespace cvpods
10 | 


--------------------------------------------------------------------------------
/cvpods/layers/csrc/deformable_attn/ms_deform_attn.h:
--------------------------------------------------------------------------------
 1 | /*!
 2 | **************************************************************************************************
 3 | * Deformable DETR
 4 | * Copyright (c) 2020 SenseTime. All Rights Reserved.
 5 | * Licensed under the Apache License, Version 2.0 [see LICENSE for details]
 6 | **************************************************************************************************
 7 | * Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
 8 | **************************************************************************************************
 9 | */
10 | 
11 | #pragma once
12 | #include <torch/types.h>
13 | #include <torch/extension.h>
14 | 
15 | 
16 | namespace cvpods {
17 | 
18 | at::Tensor ms_deform_attn_cuda_forward(
19 |     const at::Tensor &value,
20 |     const at::Tensor &spatial_shapes,
21 |     const at::Tensor &level_start_index,
22 |     const at::Tensor &sampling_loc,
23 |     const at::Tensor &attn_weight,
24 |     const int im2col_step);
25 | 
26 | 
27 | std::vector<at::Tensor> ms_deform_attn_cuda_backward(
28 |     const at::Tensor &value,
29 |     const at::Tensor &spatial_shapes,
30 |     const at::Tensor &level_start_index,
31 |     const at::Tensor &sampling_loc,
32 |     const at::Tensor &attn_weight,
33 |     const at::Tensor &grad_output,
34 |     const int im2col_step);
35 | 
36 | 
37 | std::vector<at::Tensor> ms_deform_attn_cuda_backward(
38 |     const at::Tensor &value,
39 |     const at::Tensor &spatial_shapes,
40 |     const at::Tensor &level_start_index,
41 |     const at::Tensor &sampling_loc,
42 |     const at::Tensor &attn_weight,
43 |     const at::Tensor &grad_output,
44 |     const int im2col_step);
45 | 
46 | 
47 | at::Tensor
48 | ms_deform_attn_forward(
49 |     const at::Tensor &value, 
50 |     const at::Tensor &spatial_shapes,
51 |     const at::Tensor &level_start_index,
52 |     const at::Tensor &sampling_loc,
53 |     const at::Tensor &attn_weight,
54 |     const int im2col_step)
55 | {
56 |     if (value.type().is_cuda())
57 |     {
58 |         return ms_deform_attn_cuda_forward(
59 |             value, spatial_shapes, level_start_index, sampling_loc, attn_weight, im2col_step);
60 |     }
61 |     AT_ERROR("Not implemented on the CPU");
62 | }
63 | 
64 | std::vector<at::Tensor>
65 | ms_deform_attn_backward(
66 |     const at::Tensor &value, 
67 |     const at::Tensor &spatial_shapes,
68 |     const at::Tensor &level_start_index,
69 |     const at::Tensor &sampling_loc,
70 |     const at::Tensor &attn_weight,
71 |     const at::Tensor &grad_output,
72 |     const int im2col_step)
73 | {
74 |     if (value.type().is_cuda())
75 |     {
76 |         return ms_deform_attn_cuda_backward(
77 |             value, spatial_shapes, level_start_index, sampling_loc, attn_weight, grad_output, im2col_step);
78 |     }
79 |     AT_ERROR("Not implemented on the CPU");
80 | }
81 | }
82 | 


--------------------------------------------------------------------------------
/cvpods/layers/csrc/masked_conv2d/masked_conv2d.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include <torch/types.h>
 3 | 
 4 | namespace cvpods {
 5 |     
 6 | void masked_im2col_forward(const at::Tensor im, const at::Tensor mask_h_idx,
 7 |                            const at::Tensor mask_w_idx, at::Tensor col,
 8 |                            const int kernel_h, const int kernel_w,
 9 |                            const int pad_h, const int pad_w);
10 | 
11 | void masked_col2im_forward(const at::Tensor col, const at::Tensor mask_h_idx,
12 |                            const at::Tensor mask_w_idx, at::Tensor im, int height,
13 |                            int width, int channels);
14 | 
15 | } // namespace cvpods
16 | 


--------------------------------------------------------------------------------
/cvpods/layers/csrc/ml_nms/ml_nms.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #pragma once
 3 | #include <torch/extension.h>
 4 | 
 5 | namespace cvpods {
 6 | #ifdef WITH_CUDA
 7 | at::Tensor ml_nms_cuda(const at::Tensor boxes, float nms_overlap_thresh);
 8 | #endif
 9 | 
10 | 
11 | inline at::Tensor ml_nms(const at::Tensor& dets,
12 |                   const at::Tensor& scores,
13 |                   const at::Tensor& labels,
14 |                   const float threshold) {
15 | 
16 |   if (dets.type().is_cuda()) {
17 | #ifdef WITH_CUDA
18 |     // TODO raise error if not compiled with CUDA
19 |     if (dets.numel() == 0)
20 |       return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU));
21 |     auto b = at::cat({dets, scores.unsqueeze(1), labels.unsqueeze(1)}, 1);
22 |     return ml_nms_cuda(b, threshold);
23 | #else
24 |     AT_ERROR("Not compiled with GPU support");
25 | #endif
26 |   }
27 |   AT_ERROR("CPU version not implemented");
28 | }
29 | 
30 | }
31 | 


--------------------------------------------------------------------------------
/cvpods/layers/csrc/nms_rotated/nms_rotated.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | #pragma once
 3 | #include <torch/types.h>
 4 | 
 5 | namespace cvpods {
 6 | 
 7 | at::Tensor nms_rotated_cpu(
 8 |     const at::Tensor& dets,
 9 |     const at::Tensor& scores,
10 |     const float iou_threshold);
11 | 
12 | #ifdef WITH_CUDA
13 | at::Tensor nms_rotated_cuda(
14 |     const at::Tensor& dets,
15 |     const at::Tensor& scores,
16 |     const float iou_threshold);
17 | #endif
18 | 
19 | // Interface for Python
20 | // inline is needed to prevent multiple function definitions when this header is
21 | // included by different cpps
22 | inline at::Tensor nms_rotated(
23 |     const at::Tensor& dets,
24 |     const at::Tensor& scores,
25 |     const float iou_threshold) {
26 |   assert(dets.device().is_cuda() == scores.device().is_cuda());
27 |   if (dets.device().is_cuda()) {
28 | #ifdef WITH_CUDA
29 |     return nms_rotated_cuda(dets, scores, iou_threshold);
30 | #else
31 |     AT_ERROR("Not compiled with GPU support");
32 | #endif
33 |   }
34 | 
35 |   return nms_rotated_cpu(dets, scores, iou_threshold);
36 | }
37 | 
38 | } // namespace cvpods
39 | 


--------------------------------------------------------------------------------
/cvpods/layers/csrc/nms_rotated/nms_rotated_cpu.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | #include "../box_iou_rotated/box_iou_rotated_utils.h"
 3 | #include "nms_rotated.h"
 4 | 
 5 | namespace cvpods {
 6 | 
 7 | template <typename scalar_t>
 8 | at::Tensor nms_rotated_cpu_kernel(
 9 |     const at::Tensor& dets,
10 |     const at::Tensor& scores,
11 |     const float iou_threshold) {
12 |   // nms_rotated_cpu_kernel is modified from torchvision's nms_cpu_kernel,
13 |   // however, the code in this function is much shorter because
14 |   // we delegate the IoU computation for rotated boxes to
15 |   // the single_box_iou_rotated function in box_iou_rotated_utils.h
16 |   AT_ASSERTM(!dets.type().is_cuda(), "dets must be a CPU tensor");
17 |   AT_ASSERTM(!scores.type().is_cuda(), "scores must be a CPU tensor");
18 |   AT_ASSERTM(
19 |       dets.type() == scores.type(), "dets should have the same type as scores");
20 | 
21 |   if (dets.numel() == 0) {
22 |     return at::empty({0}, dets.options().dtype(at::kLong));
23 |   }
24 | 
25 |   auto order_t = std::get<1>(scores.sort(0, /* descending=*/true));
26 | 
27 |   auto ndets = dets.size(0);
28 |   at::Tensor suppressed_t = at::zeros({ndets}, dets.options().dtype(at::kByte));
29 |   at::Tensor keep_t = at::zeros({ndets}, dets.options().dtype(at::kLong));
30 | 
31 |   auto suppressed = suppressed_t.data_ptr<uint8_t>();
32 |   auto keep = keep_t.data_ptr<int64_t>();
33 |   auto order = order_t.data_ptr<int64_t>();
34 | 
35 |   int64_t num_to_keep = 0;
36 | 
37 |   for (int64_t _i = 0; _i < ndets; _i++) {
38 |     auto i = order[_i];
39 |     if (suppressed[i] == 1) {
40 |       continue;
41 |     }
42 | 
43 |     keep[num_to_keep++] = i;
44 | 
45 |     for (int64_t _j = _i + 1; _j < ndets; _j++) {
46 |       auto j = order[_j];
47 |       if (suppressed[j] == 1) {
48 |         continue;
49 |       }
50 | 
51 |       auto ovr = single_box_iou_rotated<scalar_t>(
52 |           dets[i].data_ptr<scalar_t>(), dets[j].data_ptr<scalar_t>());
53 |       if (ovr >= iou_threshold) {
54 |         suppressed[j] = 1;
55 |       }
56 |     }
57 |   }
58 |   return keep_t.narrow(/*dim=*/0, /*start=*/0, /*length=*/num_to_keep);
59 | }
60 | 
61 | at::Tensor nms_rotated_cpu(
62 |     const at::Tensor& dets,
63 |     const at::Tensor& scores,
64 |     const float iou_threshold) {
65 |   auto result = at::empty({0}, dets.options());
66 | 
67 |   AT_DISPATCH_FLOATING_TYPES(dets.scalar_type(), "nms_rotated", [&] {
68 |     result = nms_rotated_cpu_kernel<scalar_t>(dets, scores, iou_threshold);
69 |   });
70 |   return result;
71 | }
72 | 
73 | } // namespace cvpods
74 | 


--------------------------------------------------------------------------------
/cvpods/layers/csrc/sigmoid_focal_loss/SigmoidFocalLoss.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include <torch/extension.h>
 3 | 
 4 | namespace cvpods {
 5 | #ifdef WITH_CUDA
 6 | at::Tensor SigmoidFocalLoss_forward_cuda(
 7 | 		const at::Tensor& logits,
 8 |         const at::Tensor& targets,
 9 | 		const int num_classes, 
10 | 		const float gamma, 
11 | 		const float alpha); 
12 | 
13 | at::Tensor SigmoidFocalLoss_backward_cuda(
14 | 			     const at::Tensor& logits,
15 |                  const at::Tensor& targets,
16 | 			     const at::Tensor& d_losses,
17 | 			     const int num_classes,
18 | 			     const float gamma,
19 | 			     const float alpha);
20 | #endif
21 | 
22 | //
23 | // Interface for Python
24 | inline at::Tensor SigmoidFocalLoss_forward(
25 | 		const at::Tensor& logits,
26 |         const at::Tensor& targets,
27 | 		const int num_classes, 
28 | 		const float gamma, 
29 | 		const float alpha) {
30 |   if (logits.type().is_cuda()) {
31 | #ifdef WITH_CUDA
32 |     return SigmoidFocalLoss_forward_cuda(logits, targets, num_classes, gamma, alpha);
33 | #else
34 |     AT_ERROR("Not compiled with GPU support");
35 | #endif
36 |   }
37 |   AT_ERROR("Not implemented on the CPU");
38 | }
39 | 
40 | inline at::Tensor SigmoidFocalLoss_backward(
41 | 			     const at::Tensor& logits,
42 |                  const at::Tensor& targets,
43 | 			     const at::Tensor& d_losses,
44 | 			     const int num_classes,
45 | 			     const float gamma,
46 | 			     const float alpha) {
47 |   if (logits.type().is_cuda()) {
48 | #ifdef WITH_CUDA
49 |     return SigmoidFocalLoss_backward_cuda(logits, targets, d_losses, num_classes, gamma, alpha);
50 | #else
51 |     AT_ERROR("Not compiled with GPU support");
52 | #endif
53 |   }
54 |   AT_ERROR("Not implemented on the CPU");
55 | }
56 | 
57 | }
58 | 


--------------------------------------------------------------------------------
/cvpods/layers/deform_conv_with_off.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python3
 2 | # -*- coding:utf-8 -*-
 3 | import torch
 4 | import torch.nn as nn
 5 | 
 6 | from .deform_conv import DeformConv, ModulatedDeformConv
 7 | 
 8 | 
 9 | class DeformConvWithOff(nn.Module):
10 | 
11 |     def __init__(self, in_channels, out_channels,
12 |                  kernel_size=3, stride=1, padding=1,
13 |                  dilation=1, deformable_groups=1):
14 |         super(DeformConvWithOff, self).__init__()
15 |         self.offset_conv = nn.Conv2d(
16 |             in_channels,
17 |             deformable_groups * 2 * kernel_size * kernel_size,
18 |             kernel_size=kernel_size,
19 |             stride=stride,
20 |             padding=padding,
21 |         )
22 |         self.dcn = DeformConv(
23 |             in_channels, out_channels, kernel_size=kernel_size,
24 |             stride=stride, padding=padding, dilation=dilation,
25 |             deformable_groups=deformable_groups,
26 |         )
27 | 
28 |     def forward(self, input):
29 |         offset = self.offset_conv(input)
30 |         output = self.dcn(input, offset)
31 |         return output
32 | 
33 | 
34 | class ModulatedDeformConvWithOff(nn.Module):
35 | 
36 |     def __init__(self, in_channels, out_channels,
37 |                  kernel_size=3, stride=1, padding=1,
38 |                  dilation=1, deformable_groups=1, bias=True, norm=None, activation=None,):
39 |         super(ModulatedDeformConvWithOff, self).__init__()
40 |         self.offset_mask_conv = nn.Conv2d(
41 |             in_channels,
42 |             deformable_groups * 3 * kernel_size * kernel_size,
43 |             kernel_size=kernel_size,
44 |             stride=stride,
45 |             padding=padding,
46 |         )
47 |         self.dcnv2 = ModulatedDeformConv(
48 |             in_channels, out_channels, kernel_size=kernel_size,
49 |             stride=stride, padding=padding, dilation=dilation,
50 |             deformable_groups=deformable_groups, bias=bias, norm=norm, activation=activation
51 |         )
52 | 
53 |     def forward(self, input):
54 |         x = self.offset_mask_conv(input)
55 |         o1, o2, mask = torch.chunk(x, 3, dim=1)
56 |         offset = torch.cat((o1, o2), dim=1)
57 |         mask = torch.sigmoid(mask)
58 |         output = self.dcnv2(input, offset, mask)
59 |         return output
60 | 


--------------------------------------------------------------------------------
/cvpods/layers/point_transformer.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | import numpy as np
 5 | 
 6 | 
 7 | def index_points(points, idx):
 8 |     """
 9 |     Input:
10 |         points: input points data, [B, N, C]
11 |         idx: sample index data, [B, S, [K]]
12 |     Return:
13 |         new_points:, indexed points data, [B, S, [K], C]
14 |     """
15 |     raw_size = idx.size()
16 |     idx = idx.reshape(raw_size[0], -1)
17 |     res = torch.gather(points, 1, idx[..., None].expand(-1, -1, points.size(-1)))
18 |     return res.reshape(*raw_size, -1)
19 | 
20 | 
21 | def square_distance(src, dst):
22 |     """
23 |     Calculate Euclid distance between each two points.
24 |     src^T * dst = xn * xm + yn * ym + zn * zm；
25 |     sum(src^2, dim=-1) = xn*xn + yn*yn + zn*zn;
26 |     sum(dst^2, dim=-1) = xm*xm + ym*ym + zm*zm;
27 |     dist = (xn - xm)^2 + (yn - ym)^2 + (zn - zm)^2
28 |          = sum(src**2,dim=-1)+sum(dst**2,dim=-1)-2*src^T*dst
29 |     Input:
30 |         src: source points, [B, N, C]
31 |         dst: target points, [B, M, C]
32 |     Output:
33 |         dist: per-point square distance, [B, N, M]
34 |     """
35 |     return torch.sum((src[:, :, None] - dst[:, None]) ** 2, dim=-1)
36 | 
37 | 
38 | class TransformerBlock(nn.Module):
39 |     def __init__(self, d_points, d_model, k) -> None:
40 |         super().__init__()
41 |         self.fc1 = nn.Linear(d_points, d_model)
42 |         self.fc2 = nn.Linear(d_model, d_points)
43 |         self.fc_delta = nn.Sequential(
44 |             nn.Linear(3, d_model),
45 |             nn.ReLU(),
46 |             nn.Linear(d_model, d_model)
47 |         )
48 |         self.fc_gamma = nn.Sequential(
49 |             nn.Linear(d_model, d_model),
50 |             nn.ReLU(),
51 |             nn.Linear(d_model, d_model)
52 |         )
53 |         self.w_qs = nn.Linear(d_model, d_model, bias=False)
54 |         self.w_ks = nn.Linear(d_model, d_model, bias=False)
55 |         self.w_vs = nn.Linear(d_model, d_model, bias=False)
56 |         self.k = k
57 | 
58 |     # xyz: b x n x 3, features: b x n x f
59 |     def forward(self, xyz, features):
60 |         dists = square_distance(xyz, xyz)
61 |         knn_idx = dists.argsort()[:, :, :self.k]  # b x n x k
62 |         knn_xyz = index_points(xyz, knn_idx)
63 | 
64 |         pre = features
65 |         x = self.fc1(features)
66 |         q, k, v = self.w_qs(x), index_points(self.w_ks(x), knn_idx), index_points(self.w_vs(x), knn_idx)
67 | 
68 |         pos_enc = self.fc_delta(xyz[:, :, None] - knn_xyz)  # b x n x k x f
69 | 
70 |         attn = self.fc_gamma(q[:, :, None] - k + pos_enc)
71 |         attn = F.softmax(attn / np.sqrt(k.size(-1)), dim=-2)  # b x n x k x f
72 | 
73 |         res = torch.einsum('bmnf,bmnf->bmf', attn, v + pos_enc)
74 |         res = self.fc2(res) + pre
75 |         return res, attn
76 | 
77 | 


--------------------------------------------------------------------------------
/cvpods/layers/rotated_boxes.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | from __future__ import absolute_import, division, print_function, unicode_literals
 3 | 
 4 | # import torch
 5 | from cvpods import _C
 6 | 
 7 | 
 8 | def pairwise_iou_rotated(boxes1, boxes2):
 9 |     """
10 |     Return intersection-over-union (Jaccard index) of boxes.
11 | 
12 |     Both sets of boxes are expected to be in
13 |     (x_center, y_center, width, height, angle) format.
14 | 
15 |     Arguments:
16 |         boxes1 (Tensor[N, 5])
17 |         boxes2 (Tensor[M, 5])
18 | 
19 |     Returns:
20 |         iou (Tensor[N, M]): the NxM matrix containing the pairwise
21 |             IoU values for every element in boxes1 and boxes2
22 |     """
23 |     return _C.box_iou_rotated(boxes1, boxes2)
24 | 


--------------------------------------------------------------------------------
/cvpods/layers/shape_spec.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 3 | from collections import namedtuple
 4 | 
 5 | 
 6 | class ShapeSpec(namedtuple("_ShapeSpec", ["channels", "height", "width", "stride"])):
 7 |     """
 8 |     A simple structure that contains basic shape specification about a tensor.
 9 |     It is often used as the auxiliary inputs/outputs of models,
10 |     to obtain the shape inference ability among pytorch modules.
11 | 
12 |     Attributes:
13 |         channels:
14 |         height:
15 |         width:
16 |         stride:
17 |     """
18 | 
19 |     def __new__(cls, *, channels=None, height=None, width=None, stride=None):
20 |         return super().__new__(cls, channels, height, width, stride)
21 | 


--------------------------------------------------------------------------------
/cvpods/layers/swap_align2nat.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | from torch import nn
 3 | from torch.autograd import Function
 4 | from torch.autograd.function import once_differentiable
 5 | 
 6 | from cvpods import _C
 7 | 
 8 | 
 9 | class _SwapAlign2Nat(Function):
10 |     @staticmethod
11 |     def forward(ctx, X, lambda_val, pad_val):
12 |         ctx.lambda_val = lambda_val
13 |         ctx.input_shape = X.size()
14 | 
15 |         Y = _C.swap_align2nat_forward(X, lambda_val, pad_val)
16 |         return Y
17 | 
18 |     @staticmethod
19 |     @once_differentiable
20 |     def backward(ctx, gY):
21 |         lambda_val = ctx.lambda_val
22 |         bs, ch, h, w = ctx.input_shape
23 | 
24 |         gX = _C.swap_align2nat_backward(gY, lambda_val, bs, ch, h, w)
25 | 
26 |         return gX, None, None
27 | 
28 | 
29 | swap_align2nat = _SwapAlign2Nat.apply
30 | 
31 | 
32 | class SwapAlign2Nat(nn.Module):
33 |     """
34 |         The op `SwapAlign2Nat` described in https://arxiv.org/abs/1903.12174.
35 |         Given an input tensor that predicts masks of shape (N, C=VxU, H, W),
36 |         apply the op, it will return masks of shape (N, V'xU', H', W') where
37 |         the unit lengths of (V, U) and (H, W) are swapped, and the mask representation
38 |         is transformed from aligned to natural.
39 |         Args:
40 |             lambda_val (int): the relative unit length ratio between (V, U) and (H, W),
41 |             as we always have larger unit lengths for (V, U) than (H, W),
42 |             lambda_val is always >= 1.
43 |             pad_val (float): padding value for the values falling outside of the input
44 |             tensor, default set to -6 as sigmoid(-6) is ~0, indicating
45 |             that is no masks outside of the tensor.
46 |     """
47 | 
48 |     def __init__(self, lambda_val, pad_val=-6.0):
49 |         super(SwapAlign2Nat, self).__init__()
50 |         self.lambda_val = lambda_val
51 |         self.pad_val = pad_val
52 | 
53 |     def forward(self, X):
54 |         return swap_align2nat(X, self.lambda_val, self.pad_val)
55 | 
56 |     def __repr__(self):
57 |         tmpstr = self.__class__.__name__ + "("
58 |         tmpstr += "lambda_val=" + str(self.lambda_val)
59 |         tmpstr += ", pad_val=" + str(self.pad_val)
60 |         tmpstr += ")"
61 |         return tmpstr
62 | 


--------------------------------------------------------------------------------
/cvpods/modeling/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | import torch
 3 | 
 4 | from cvpods.layers import ShapeSpec
 5 | 
 6 | # from .anchor_generator import build_anchor_generator
 7 | from .backbone import FPN, Backbone, ResNet, ResNetBlockBase, build_resnet_backbone, make_stage
 8 | from .meta_arch import GeneralizedRCNN, PanopticFPN, ProposalNetwork, RetinaNet, SemanticSegmentor
 9 | from .postprocessing import detector_postprocess
10 | from .roi_heads import ROIHeads, StandardROIHeads
11 | from .test_time_augmentation import DatasetMapperTTA, GeneralizedRCNNWithTTA
12 | 
13 | _EXCLUDE = {"torch", "ShapeSpec"}
14 | __all__ = [k for k in globals().keys() if k not in _EXCLUDE and not k.startswith("_")]
15 | 
16 | assert (
17 |     torch.Tensor([1]) == torch.Tensor([2])
18 | ).dtype == torch.bool, ("Your Pytorch is too old. "
19 |                         "Please update to contain https://github.com/pytorch/pytorch/pull/21113")
20 | 


--------------------------------------------------------------------------------
/cvpods/modeling/backbone/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | 
 3 | from .backbone import Backbone
 4 | from .fpn import FPN, build_retinanet_resnet_fpn_p5_backbone
 5 | from .resnet import ResNet, ResNetBlockBase, build_resnet_backbone, make_stage, build_resnet_deeplab_backbone
 6 | from .darknet import Darknet, build_darknet_backbone
 7 | from .efficientnet import EfficientNet, build_efficientnet_backbone
 8 | from .bifpn import BiFPN, build_efficientnet_bifpn_backbone
 9 | from .dynamic_arch import DynamicNetwork, build_dynamic_backbone
10 | from .sf_fpn import build_resnet_sf_fpn_backbone
11 | from .transformer import Transformer
12 | from .swin import build_swin_backbone, build_swin_fpn_backbone, build_retinanet_swin_fpn_backbone
13 | # TODO can expose more resnet blocks after careful consideration
14 | 


--------------------------------------------------------------------------------
/cvpods/modeling/backbone/backbone.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- encoding: utf-8 -*-
 3 | '''
 4 | @File               :   backbone.py
 5 | @Time               :   2020/05/07 23:58:08
 6 | @Author             :   Facebook, Inc. and its affiliates.
 7 | @Contact            :   poodarchu@gmail.com
 8 | @Last Modified by   :   Benjin Zhu
 9 | @Last Modified time :   2020/05/07 23:58:08
10 | '''
11 | 
12 | from abc import ABCMeta, abstractmethod
13 | 
14 | import torch.nn as nn
15 | 
16 | from cvpods.layers import ShapeSpec
17 | 
18 | __all__ = ["Backbone"]
19 | 
20 | 
21 | class Backbone(nn.Module, metaclass=ABCMeta):
22 |     """
23 |     Abstract base class for network backbones.
24 |     """
25 |     def __init__(self):
26 |         """
27 |         The `__init__` method of any subclass can specify its own set of arguments.
28 |         """
29 |         super().__init__()
30 | 
31 |     @abstractmethod
32 |     def forward(self):
33 |         """
34 |         Subclasses must override this method, but adhere to the same return type.
35 | 
36 |         Returns:
37 |             dict[str->Tensor]: mapping from feature name (e.g., "res2") to tensor
38 |         """
39 |         pass
40 | 
41 |     @property
42 |     def size_divisibility(self):
43 |         """
44 |         Some backbones require the input height and width to be divisible by a
45 |         specific integer. This is typically true for encoder / decoder type networks
46 |         with lateral connection (e.g., FPN) for which feature maps need to match
47 |         dimension in the "bottom up" and "top down" paths. Set to 0 if no specific
48 |         input size divisibility is required.
49 |         """
50 |         return 0
51 | 
52 |     def output_shape(self):
53 |         """
54 |         Returns:
55 |             dict[str->ShapeSpec]
56 |         """
57 |         # this is a backward-compatible default
58 |         return {
59 |             name: ShapeSpec(channels=self._out_feature_channels[name],
60 |                             stride=self._out_feature_strides[name])
61 |             for name in self._out_features
62 |         }
63 | 


--------------------------------------------------------------------------------
/cvpods/modeling/backbone/dynamic_arch/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # build for dynamic networks
3 | # @Author: yanwei.li
4 | 
5 | from .dynamic_backbone import DynamicNetwork, build_dynamic_backbone


--------------------------------------------------------------------------------
/cvpods/modeling/backbone/dynamic_arch/cal_op_flops.py:
--------------------------------------------------------------------------------
 1 | # Count Operation MFLOPs when fix batch to 1
 2 | # @author: yanwei.li
 3 | 
 4 | 
 5 | def count_Conv_flop(
 6 |     in_h, in_w, in_channel, out_channel,
 7 |     kernel_size, is_bias=False, stride=1, groups=1
 8 | ):
 9 |     out_h = in_h // stride
10 |     out_w = in_w // stride
11 |     bias_ops = 1 if is_bias else 0
12 |     kernel_ops = kernel_size[0] * kernel_size[1] * (in_channel // groups)
13 |     delta_ops = (kernel_ops + bias_ops) * out_channel * out_h * out_w
14 |     return delta_ops / 1e6
15 | 
16 | 
17 | def count_Linear_flop(in_num, out_num, is_bias):
18 |     weight_ops = in_num * out_num
19 |     bias_ops = out_num if is_bias else 0
20 |     delta_ops = weight_ops + bias_ops
21 |     return delta_ops / 1e6
22 | 
23 | 
24 | def count_BN_flop(in_h, in_w, in_channel, is_affine):
25 |     multi_affine = 2 if is_affine else 1
26 |     delta_ops = multi_affine * in_h * in_w * in_channel
27 |     return delta_ops / 1e6
28 | 
29 | 
30 | def count_ReLU_flop(in_h, in_w, in_channel):
31 |     delta_ops = in_h * in_w * in_channel
32 |     return delta_ops / 1e6
33 | 
34 | 
35 | def count_Pool2d_flop(in_h, in_w, out_channel, kernel_size, stride):
36 |     out_h = in_h // stride
37 |     out_w = in_w // stride
38 |     kernel_ops = kernel_size[0] * kernel_size[1]
39 |     delta_ops = kernel_ops * out_w * out_h * out_channel
40 |     return delta_ops / 1e6
41 | 
42 | 
43 | def count_ConvBNReLU_flop(
44 |     in_h, in_w, in_channel, out_channel,
45 |     kernel_size, is_bias=False, stride=1,
46 |     groups=1, is_affine=True
47 | ):
48 |     flops = 0.0
49 |     flops += count_Conv_flop(
50 |         in_h, in_w, in_channel, out_channel,
51 |         kernel_size, is_bias, stride, groups
52 |     )
53 |     in_h = in_h // stride
54 |     in_w = in_w // stride
55 |     flops += count_BN_flop(in_h, in_w, out_channel, is_affine)
56 |     flops += count_ReLU_flop(in_h, in_w, out_channel)
57 |     return flops
58 | 


--------------------------------------------------------------------------------
/cvpods/modeling/basenet/__init__.py:
--------------------------------------------------------------------------------
1 | from .basenet import basenet
2 | 


--------------------------------------------------------------------------------
/cvpods/modeling/losses/__init__.py:
--------------------------------------------------------------------------------
1 | from .focal_loss import (
2 |     sigmoid_focal_loss, sigmoid_focal_loss_jit, sigmoid_focal_loss_star,
3 |     sigmoid_focal_loss_star_jit)
4 | from .iou_loss import IOULoss, iou_loss
5 | from .reg_l1_loss import reg_l1_loss
6 | from .sigmoid_focal_loss import SigmoidFocalLoss, sigmoid_focal_loss_cuda
7 | from .smooth_l1_loss import smooth_l1_loss
8 | from .dice_loss import dice_loss, weighted_dice_loss


--------------------------------------------------------------------------------
/cvpods/modeling/losses/reg_l1_loss.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python3
 2 | # -*- coding:utf-8 -*-
 3 | import torch.nn as nn
 4 | import torch.nn.functional as F
 5 | 
 6 | from cvpods.modeling.nn_utils.feature_utils import gather_feature
 7 | 
 8 | 
 9 | class reg_l1_loss(nn.Module):
10 | 
11 |     def __init__(self):
12 |         super(reg_l1_loss, self).__init__()
13 | 
14 |     def forward(self, output, mask, index, target):
15 |         pred = gather_feature(output, index, use_transform=True)
16 |         mask = mask.unsqueeze(dim=2).expand_as(pred).float()
17 |         # loss = F.l1_loss(pred * mask, target * mask, reduction='elementwise_mean')
18 |         loss = F.l1_loss(pred * mask, target * mask, reduction='sum')
19 |         loss = loss / (mask.sum() + 1e-4)
20 |         return loss
21 | 


--------------------------------------------------------------------------------
/cvpods/modeling/meta_arch/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 3 | 
 4 | # import all the meta_arch, so they will be registered
 5 | 
 6 | from .centernet import CenterNet
 7 | from .borderdet import BorderDet
 8 | from .panoptic_fpn import PanopticFPN
 9 | from .rcnn import GeneralizedRCNN, ProposalNetwork
10 | from .reppoints import RepPoints
11 | from .semantic_seg import SemanticSegmentor, SemSegFPNHead
12 | from .ssd import SSD
13 | from .tensormask import TensorMask
14 | from .yolov3 import YOLOv3
15 | 
16 | from .solo.solo import SOLO
17 | from .solo.solov2 import SOLOv2
18 | from .solo.solo_decoupled import DecoupledSOLO
19 | from cvpods.modeling.meta_arch.conditionalInst.conditionalInst import CondInst
20 | from cvpods.modeling.meta_arch.sparsercnn.sparse_rcnn import SparseRCNN
21 | from cvpods.modeling.meta_arch.retinanet.retinanet_sepc import RetinaNetSEPC
22 | from cvpods.modeling.meta_arch.retinanet.retinanet import RetinaNet
23 | from cvpods.modeling.meta_arch.fcos.fcos import FCOS
24 | from cvpods.modeling.meta_arch.fcos.fcos_sepc import FCOSSEPC
25 | from cvpods.modeling.meta_arch.detr.detr import DETR
26 | 
27 | 
28 | from .efficientdet import EfficientDet
29 | from .pointrend import (
30 |     PointRendROIHeads,
31 |     CoarseMaskHead,
32 |     StandardPointHead,
33 |     PointRendSemSegHead,
34 | )
35 | from .dynamic4seg import DynamicNet4Seg
36 | from .fcn import FCNHead
37 | 


--------------------------------------------------------------------------------
/cvpods/modeling/meta_arch/boundary_mask_rcnn/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lxtGH/BSSeg/77a6cf15150cc65cf2f21b124381dc8d4573f21b/cvpods/modeling/meta_arch/boundary_mask_rcnn/__init__.py


--------------------------------------------------------------------------------
/cvpods/modeling/meta_arch/conditionalInst/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lxtGH/BSSeg/77a6cf15150cc65cf2f21b124381dc8d4573f21b/cvpods/modeling/meta_arch/conditionalInst/__init__.py


--------------------------------------------------------------------------------
/cvpods/modeling/meta_arch/deformable_detr.py:
--------------------------------------------------------------------------------
 1 | # To Do: implement the deformable detr
 2 | # Xiangtai Li
 3 | import torch
 4 | import torch.nn.functional as F
 5 | from torch import nn
 6 | 
 7 | from cvpods.layers import ShapeSpec, position_encoding_dict
 8 | from cvpods.modeling.backbone import Transformer
 9 | from cvpods.modeling.matcher import HungarianMatcher
10 | from cvpods.structures import Boxes, ImageList, Instances
11 | from cvpods.structures import boxes as box_ops
12 | from cvpods.layers.box_ops import generalized_box_iou
13 | from cvpods.utils import comm
14 | from cvpods.layers.misc import accuracy
15 | 
16 | 
17 | 


--------------------------------------------------------------------------------
/cvpods/modeling/meta_arch/detr/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lxtGH/BSSeg/77a6cf15150cc65cf2f21b124381dc8d4573f21b/cvpods/modeling/meta_arch/detr/__init__.py


--------------------------------------------------------------------------------
/cvpods/modeling/meta_arch/fcos/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lxtGH/BSSeg/77a6cf15150cc65cf2f21b124381dc8d4573f21b/cvpods/modeling/meta_arch/fcos/__init__.py


--------------------------------------------------------------------------------
/cvpods/modeling/meta_arch/flownet/util.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | import torch.nn.functional as F
 3 | 
 4 | try:
 5 |     from spatial_correlation_sampler import spatial_correlation_sample
 6 | except ImportError as e:
 7 |     import warnings
 8 |     with warnings.catch_warnings():
 9 |         warnings.filterwarnings("default", category=ImportWarning)
10 |         warnings.warn("failed to load custom correlation module"
11 |                       "which is needed for FlowNetC", ImportWarning)
12 | 
13 | 
14 | def conv(batchNorm, in_planes, out_planes, kernel_size=3, stride=1):
15 |     if batchNorm:
16 |         return nn.Sequential(
17 |             nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=(kernel_size-1)//2, bias=False),
18 |             nn.BatchNorm2d(out_planes),
19 |             nn.LeakyReLU(0.1,inplace=True)
20 |         )
21 |     else:
22 |         return nn.Sequential(
23 |             nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=(kernel_size-1)//2, bias=True),
24 |             nn.LeakyReLU(0.1,inplace=True)
25 |         )
26 | 
27 | 
28 | def predict_flow(in_planes):
29 |     return nn.Conv2d(in_planes,2,kernel_size=3,stride=1,padding=1,bias=False)
30 | 
31 | 
32 | def deconv(in_planes, out_planes):
33 |     return nn.Sequential(
34 |         nn.ConvTranspose2d(in_planes, out_planes, kernel_size=4, stride=2, padding=1, bias=False),
35 |         nn.LeakyReLU(0.1,inplace=True)
36 |     )
37 | 
38 | 
39 | def correlate(input1, input2):
40 |     out_corr = spatial_correlation_sample(input1,
41 |                                           input2,
42 |                                           kernel_size=1,
43 |                                           patch_size=21,
44 |                                           stride=1,
45 |                                           padding=0,
46 |                                           dilation_patch=2)
47 |     # collate dimensions 1 and 2 in order to be treated as a
48 |     # regular 4D tensor
49 |     b, ph, pw, h, w = out_corr.size()
50 |     out_corr = out_corr.view(b, ph * pw, h, w)/input1.size(1)
51 |     return F.leaky_relu_(out_corr, 0.1)
52 | 
53 | 
54 | def crop_like(input, target):
55 |     if input.size()[2:] == target.size()[2:]:
56 |         return input
57 |     else:
58 |         return input[:, :, :target.size(2), :target.size(3)]
59 | 


--------------------------------------------------------------------------------
/cvpods/modeling/meta_arch/imagenet.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from torch import nn
 4 | 
 5 | from cvpods.layers import ShapeSpec
 6 | from cvpods.structures import ImageList
 7 | 
 8 | 
 9 | def accuracy(output, target, topk=(1,)):
10 |     """Computes the accuracy over the k top predictions for the specified values of k"""
11 |     with torch.no_grad():
12 |         maxk = max(topk)
13 |         batch_size = target.size(0)
14 | 
15 |         _, pred = output.topk(maxk, 1, True, True)
16 |         pred = pred.t()
17 |         correct = pred.eq(target.view(1, -1).expand_as(pred))
18 | 
19 |         res = []
20 |         for k in topk:
21 |             correct_k = correct[:k].view(-1).float().sum(0, keepdim=True)
22 |             res.append(correct_k.mul_(100.0 / batch_size))
23 |         return res
24 | 
25 | 
26 | class Classification(nn.Module):
27 |     """
28 |     ImageNet classification module.
29 |     Weights of this model can be used as pretrained weights of any models in cvpods.
30 |     """
31 |     def __init__(self, cfg):
32 |         super(Classification, self).__init__()
33 | 
34 |         self.device = torch.device(cfg.MODEL.DEVICE)
35 | 
36 |         self.network = cfg.build_backbone(
37 |             cfg, input_shape=ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN)))
38 | 
39 |         self.loss_evaluator = nn.CrossEntropyLoss()
40 | 
41 |         pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view(
42 |             3, 1, 1)
43 |         pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view(
44 |             3, 1, 1)
45 |         self.normalizer = lambda x: (x - pixel_mean) / pixel_std
46 | 
47 |         self.to(self.device)
48 | 
49 |     def forward(self, batched_inputs):
50 |         images = self.preprocess_image(batched_inputs)
51 | 
52 |         preds = self.network(images.tensor)["linear"]
53 | 
54 |         if self.training:
55 |             labels = torch.tensor([gi["category_id"] for gi in batched_inputs]).cuda()
56 |             losses = self.loss_evaluator(preds, labels)
57 |             acc1, acc5 = accuracy(preds, labels, topk=(1, 5))
58 | 
59 |             return {
60 |                 "loss_cls": losses,
61 |                 "Acc@1": acc1,
62 |                 "Acc@5": acc5,
63 |             }
64 |         else:
65 |             return preds
66 | 
67 |     def preprocess_image(self, batched_inputs):
68 |         """
69 |         Normalize, pad and batch the input images.
70 |         """
71 |         images = [x["image"].float().to(self.device) for x in batched_inputs]
72 |         images = [self.normalizer(x.div(255)) for x in images]
73 |         images = ImageList.from_tensors(images, self.network.size_divisibility)
74 |         return images
75 | 


--------------------------------------------------------------------------------
/cvpods/modeling/meta_arch/panopticfcn/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lxtGH/BSSeg/77a6cf15150cc65cf2f21b124381dc8d4573f21b/cvpods/modeling/meta_arch/panopticfcn/__init__.py


--------------------------------------------------------------------------------
/cvpods/modeling/meta_arch/panopticfcn/utils.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python3
 2 | # -*- coding:utf-8 -*-
 3 | import torch
 4 | from functools import partial
 5 | 
 6 | def topk_score(scores, K=40, score_shape=None):
 7 |     """
 8 |     get top K point in score map
 9 |     """
10 |     batch, channel, height, width = score_shape
11 | 
12 |     # get topk score and its index in every H x W(channel dim) feature map
13 |     topk_scores, topk_inds = torch.topk(scores.reshape(batch, channel, -1), K)
14 | 
15 |     topk_inds = topk_inds % (height * width)
16 |     topk_ys = (topk_inds // width).float()
17 |     topk_xs = (topk_inds % width).int().float()
18 | 
19 |     # get all topk in in a batch
20 |     topk_score, index = torch.topk(topk_scores.reshape(batch, -1), K)
21 |     # div by K because index is grouped by K(C x K shape)
22 |     topk_clses = index // K
23 |     topk_inds = gather_feature(topk_inds.view(batch, -1, 1), index).reshape(batch, K)
24 |     topk_ys = gather_feature(topk_ys.reshape(batch, -1, 1), index).reshape(batch, K)
25 |     topk_xs = gather_feature(topk_xs.reshape(batch, -1, 1), index).reshape(batch, K)
26 | 
27 |     return topk_score, topk_inds, topk_clses, topk_ys, topk_xs
28 | 
29 | 
30 | def gather_feature(fmap, index, mask=None, use_transform=False):
31 |     if use_transform:
32 |         # change a (N, C, H, W) tenor to (N, HxW, C) shape
33 |         batch, channel = fmap.shape[:2]
34 |         fmap = fmap.view(batch, channel, -1).permute((0, 2, 1)).contiguous()
35 | 
36 |     dim = fmap.size(-1)
37 |     index  = index.unsqueeze(len(index.shape)).expand(*index.shape, dim)
38 |     fmap = fmap.gather(dim=1, index=index)
39 |     if mask is not None:
40 |         mask = mask.unsqueeze(2).expand_as(fmap)
41 |         fmap = fmap[mask]
42 |         fmap = fmap.reshape(-1, dim)
43 |     return fmap
44 | 
45 | 
46 | def multi_apply(func, *args, **kwargs):
47 |     pfunc = partial(func, **kwargs) if kwargs else func
48 |     map_results = map(pfunc, *args)
49 |     return tuple(map(list, zip(*map_results)))
50 | 


--------------------------------------------------------------------------------
/cvpods/modeling/meta_arch/retinanet/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lxtGH/BSSeg/77a6cf15150cc65cf2f21b124381dc8d4573f21b/cvpods/modeling/meta_arch/retinanet/__init__.py


--------------------------------------------------------------------------------
/cvpods/modeling/meta_arch/solo/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lxtGH/BSSeg/77a6cf15150cc65cf2f21b124381dc8d4573f21b/cvpods/modeling/meta_arch/solo/__init__.py


--------------------------------------------------------------------------------
/cvpods/modeling/meta_arch/sparsercnn/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lxtGH/BSSeg/77a6cf15150cc65cf2f21b124381dc8d4573f21b/cvpods/modeling/meta_arch/sparsercnn/__init__.py


--------------------------------------------------------------------------------
/cvpods/modeling/nn_utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lxtGH/BSSeg/77a6cf15150cc65cf2f21b124381dc8d4573f21b/cvpods/modeling/nn_utils/__init__.py


--------------------------------------------------------------------------------
/cvpods/modeling/nn_utils/activation_count.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | 
 3 | import logging
 4 | import typing
 5 | from collections import defaultdict
 6 | import torch.nn as nn
 7 | 
 8 | from .jit_handles import generic_activation_jit, get_jit_model_analysis
 9 | 
10 | # A dictionary that maps supported operations to their activation count handles.
11 | _SUPPORTED_OPS: typing.Dict[str, typing.Callable] = {
12 |     "aten::_convolution": generic_activation_jit("conv"),
13 |     "aten::addmm": generic_activation_jit("addmm"),
14 | }
15 | 
16 | 
17 | def activation_count(
18 |     model: nn.Module,
19 |     inputs: typing.Tuple[object, ...],
20 |     supported_ops: typing.Union[typing.Dict[str, typing.Callable], None] = None,
21 | ) -> typing.Tuple[typing.DefaultDict[str, float], typing.Counter[str]]:
22 |     """
23 |     Given a model and an input to the model, compute the total number of
24 |     activations of the model. Note the input should have a batch size of 1.
25 | 
26 |     Args:
27 |         model (nn.Module): The model to compute activation counts.
28 |         inputs (tuple): Inputs that are passed to `model` to count activations.
29 |             Inputs need to be in a tuple.
30 |         supported_ops (dict(str,Callable) or None) : By default, we count
31 |             activation for convolution and fully connected layers. Users can
32 |             provide customized supported_ops if desired.
33 | 
34 |     Returns:
35 |         tuple[defaultdict, Counter]: A dictionary that records the number of
36 |             activation (mega) for each operation and a Counter that records the
37 |             number of skipped operations.
38 |     """
39 |     assert isinstance(inputs, tuple), "Inputs need to be in a tuple."
40 |     if not supported_ops:
41 |         supported_ops = _SUPPORTED_OPS.copy()
42 | 
43 |     # Run activation count.
44 |     total_activation_count, skipped_ops = get_jit_model_analysis(
45 |         model, inputs, supported_ops
46 |     )
47 | 
48 |     # Log for skipped operations.
49 |     if len(skipped_ops) > 0:
50 |         for op, freq in skipped_ops.items():
51 |             logging.warning("Skipped operation {} {} time(s)".format(op, freq))
52 | 
53 |     # Convert activation count to mega count.
54 |     final_count = defaultdict(float)
55 |     for op in total_activation_count:
56 |         final_count[op] = total_activation_count[op] / 1e6
57 | 
58 |     return final_count, skipped_ops
59 | 


--------------------------------------------------------------------------------
/cvpods/modeling/nn_utils/feature_utils.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python3
 2 | # -*- coding:utf-8 -*-
 3 | 
 4 | 
 5 | def gather_feature(fmap, index, mask=None, use_transform=False):
 6 |     """
 7 |     used for Centernet
 8 |     """
 9 |     if use_transform:
10 |         # change a (N, C, H, W) tenor to (N, HxW, C) shape
11 |         batch, channel = fmap.shape[:2]
12 |         fmap = fmap.view(batch, channel, -1).permute((0, 2, 1)).contiguous()
13 | 
14 |     dim = fmap.size(-1)
15 |     index  = index.unsqueeze(len(index.shape)).expand(*index.shape, dim)
16 |     fmap = fmap.gather(dim=1, index=index)
17 |     if mask is not None:
18 |         # this part is not called in Res18 dcn COCO
19 |         mask = mask.unsqueeze(2).expand_as(fmap)
20 |         fmap = fmap[mask]
21 |         fmap = fmap.reshape(-1, dim)
22 |     return fmap
23 | 


--------------------------------------------------------------------------------
/cvpods/modeling/nn_utils/flop_count.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | 
 3 | import logging
 4 | import typing
 5 | from collections import defaultdict
 6 | import torch.nn as nn
 7 | 
 8 | from .jit_handles import (
 9 |     addmm_flop_jit, conv_flop_jit, einsum_flop_jit, get_jit_model_analysis, matmul_flop_jit)
10 | 
11 | # A dictionary that maps supported operations to their flop count jit handles.
12 | _SUPPORTED_OPS: typing.Dict[str, typing.Callable] = {
13 |     "aten::addmm": addmm_flop_jit,
14 |     "aten::_convolution": conv_flop_jit,
15 |     "aten::einsum": einsum_flop_jit,
16 |     "aten::matmul": matmul_flop_jit,
17 | }
18 | 
19 | 
20 | def flop_count(
21 |     model: nn.Module,
22 |     inputs: typing.Tuple[object, ...],
23 |     supported_ops: typing.Union[typing.Dict[str, typing.Callable], None] = None,
24 | ) -> typing.Tuple[typing.DefaultDict[str, float], typing.Counter[str]]:
25 |     """
26 |     Given a model and an input to the model, compute the Gflops of the given
27 |     model. Note the input should have a batch size of 1.
28 | 
29 |     Args:
30 |         model (nn.Module): The model to compute flop counts.
31 |         inputs (tuple): Inputs that are passed to `model` to count flops.
32 |             Inputs need to be in a tuple.
33 |         supported_ops (dict(str,Callable) or None) : By default, we count flops
34 |             for convolution layers, fully connected layers, torch.matmul and
35 |             torch.einsum operations. We define a FLOP as a single atomic
36 |             Multiply-Add. Users can provide customized supported_ops for
37 |             counting flops if desired.
38 | 
39 |     Returns:
40 |         tuple[defaultdict, Counter]: A dictionary that records the number of
41 |             gflops for each operation and a Counter that records the number of
42 |             skipped operations.
43 |     """
44 |     assert isinstance(inputs, tuple), "Inputs need to be in a tuple."
45 |     if not supported_ops:
46 |         supported_ops = _SUPPORTED_OPS.copy()
47 | 
48 |     # Run flop count.
49 |     total_flop_counter, skipped_ops = get_jit_model_analysis(
50 |         model, inputs, supported_ops
51 |     )
52 | 
53 |     # Log for skipped operations.
54 |     if len(skipped_ops) > 0:
55 |         for op, freq in skipped_ops.items():
56 |             logging.warning("Skipped operation {} {} time(s)".format(op, freq))
57 | 
58 |     # Convert flop count to gigaflops.
59 |     final_count = defaultdict(float)
60 |     for op in total_flop_counter:
61 |         final_count[op] = total_flop_counter[op] / 1e9
62 | 
63 |     return final_count, skipped_ops
64 | 


--------------------------------------------------------------------------------
/cvpods/modeling/nn_utils/parameter_count.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | 
 3 | import typing
 4 | from collections import defaultdict
 5 | import tabulate
 6 | from torch import nn
 7 | 
 8 | 
 9 | def parameter_count(model: nn.Module) -> typing.DefaultDict[str, int]:
10 |     """
11 |     Count parameters of a model and its submodules.
12 | 
13 |     Args:
14 |         model: a torch module
15 | 
16 |     Returns:
17 |         dict (str-> int): the key is either a parameter name or a module name.
18 |             The value is the number of elements in the parameter, or in all
19 |             parameters of the module. The key "" corresponds to the total
20 |             number of parameters of the model.
21 |     """
22 |     r = defaultdict(int)
23 |     for name, prm in model.named_parameters():
24 |         size = prm.numel()
25 |         name = name.split(".")
26 |         for k in range(0, len(name) + 1):
27 |             prefix = ".".join(name[:k])
28 |             r[prefix] += size
29 |     return r
30 | 
31 | 
32 | def parameter_count_table(model: nn.Module, max_depth: int = 3) -> str:
33 |     """
34 |     Format the parameter count of the model (and its submodules or parameters)
35 |     in a nice table.
36 | 
37 |     Args:
38 |         model: a torch module
39 |         max_depth (int): maximum depth to recursively print submodules or
40 |             parameters
41 | 
42 |     Returns:
43 |         str: the table to be printed
44 |     """
45 |     count: typing.DefaultDict[str, int] = parameter_count(model)
46 |     param_shape: typing.Dict[str, typing.Tuple] = {
47 |         k: tuple(v.shape) for k, v in model.named_parameters()
48 |     }
49 | 
50 |     table: typing.List[typing.Tuple] = []
51 | 
52 |     def format_size(x: int) -> str:
53 |         if x > 1e5:
54 |             return "{:.1f}M".format(x / 1e6)
55 |         if x > 1e2:
56 |             return "{:.1f}K".format(x / 1e3)
57 |         return str(x)
58 | 
59 |     def fill(lvl: int, prefix: str) -> None:
60 |         if lvl >= max_depth:
61 |             return
62 |         for name, v in count.items():
63 |             if name.count(".") == lvl and name.startswith(prefix):
64 |                 indent = " " * (lvl + 1)
65 |                 if name in param_shape:
66 |                     table.append((indent + name, indent + str(param_shape[name])))
67 |                 else:
68 |                     table.append((indent + name, indent + format_size(v)))
69 |                     fill(lvl + 1, name + ".")
70 | 
71 |     table.append(("model", format_size(count.pop(""))))
72 |     fill(0, "")
73 | 
74 |     old_ws = tabulate.PRESERVE_WHITESPACE
75 |     tabulate.PRESERVE_WHITESPACE = True
76 |     tab = tabulate.tabulate(
77 |         table, headers=["name", "#elements or shape"], tablefmt="pipe"
78 |     )
79 |     tabulate.PRESERVE_WHITESPACE = old_ws
80 |     return tab
81 | 


--------------------------------------------------------------------------------
/cvpods/modeling/nn_utils/scale_grad.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python3
 2 | # -*- coding:utf-8 -*-
 3 | from torch.autograd.function import Function
 4 | 
 5 | 
 6 | class _ScaleGradient(Function):
 7 | 
 8 |     @staticmethod
 9 |     def forward(ctx, input, scale):
10 |         ctx.scale = scale
11 |         return input
12 | 
13 |     @staticmethod
14 |     def backward(ctx, grad_output):
15 |         return grad_output * ctx.scale, None
16 | 


--------------------------------------------------------------------------------
/cvpods/modeling/proposal_generator/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
2 | from .rpn import RPN


--------------------------------------------------------------------------------
/cvpods/modeling/proposal_generator/build.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | from cvpods.utils.registry import Registry
 3 | 
 4 | PROPOSAL_GENERATOR_REGISTRY = Registry("PROPOSAL_GENERATOR")
 5 | PROPOSAL_GENERATOR_REGISTRY.__doc__ = """
 6 | Registry for proposal generator, which produces object proposals from feature maps.
 7 | 
 8 | The registered object will be called with `obj(cfg, input_shape)`.
 9 | The call should return a `nn.Module` object.
10 | """
11 | 
12 | from . import rpn, rrpn  # noqa F401 isort:skip
13 | 
14 | 
15 | def build_proposal_generator(cfg, input_shape):
16 |     """
17 |     Build a proposal generator from `cfg.MODEL.PROPOSAL_GENERATOR.NAME`.
18 |     The name can be "PrecomputedProposals" to use no proposal generator.
19 |     """
20 |     name = cfg.MODEL.PROPOSAL_GENERATOR.NAME
21 |     if name == "PrecomputedProposals":
22 |         return None
23 | 
24 |     return PROPOSAL_GENERATOR_REGISTRY.get(name)(cfg, input_shape)
25 | 


--------------------------------------------------------------------------------
/cvpods/modeling/proposal_generator/proposal_utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | import math
 3 | import torch
 4 | 
 5 | from cvpods.structures import Instances
 6 | 
 7 | 
 8 | def add_ground_truth_to_proposals(gt_boxes, proposals):
 9 |     """
10 |     Call `add_ground_truth_to_proposals_single_image` for all images.
11 | 
12 |     Args:
13 |         gt_boxes(list[Boxes]): list of N elements. Element i is a Boxes
14 |             representing the gound-truth for image i.
15 |         proposals (list[Instances]): list of N elements. Element i is a Instances
16 |             representing the proposals for image i.
17 | 
18 |     Returns:
19 |         list[Instances]: list of N Instances. Each is the proposals for the image,
20 |             with field "proposal_boxes" and "objectness_logits".
21 |     """
22 |     assert gt_boxes is not None
23 | 
24 |     assert len(proposals) == len(gt_boxes)
25 |     if len(proposals) == 0:
26 |         return proposals
27 | 
28 |     return [
29 |         add_ground_truth_to_proposals_single_image(gt_boxes_i, proposals_i)
30 |         for gt_boxes_i, proposals_i in zip(gt_boxes, proposals)
31 |     ]
32 | 
33 | 
34 | def add_ground_truth_to_proposals_single_image(gt_boxes, proposals):
35 |     """
36 |     Augment `proposals` with ground-truth boxes from `gt_boxes`.
37 | 
38 |     Args:
39 |         Same as `add_ground_truth_to_proposals`, but with gt_boxes and proposals
40 |         per image.
41 | 
42 |     Returns:
43 |         Same as `add_ground_truth_to_proposals`, but for only one image.
44 |     """
45 |     device = proposals.objectness_logits.device
46 |     # Concatenating gt_boxes with proposals requires them to have the same fields
47 |     # Assign all ground-truth boxes an objectness logit corresponding to P(object) \approx 1.
48 |     gt_logit_value = math.log((1.0 - 1e-10) / (1 - (1.0 - 1e-10)))
49 | 
50 |     gt_logits = gt_logit_value * torch.ones(len(gt_boxes), device=device)
51 |     gt_proposal = Instances(proposals.image_size)
52 | 
53 |     gt_proposal.proposal_boxes = gt_boxes
54 |     gt_proposal.objectness_logits = gt_logits
55 |     new_proposals = Instances.cat([proposals, gt_proposal])
56 | 
57 |     return new_proposals
58 | 


--------------------------------------------------------------------------------
/cvpods/modeling/roi_heads/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
2 | from .roi_heads import Res5ROIHeads, ROIHeads, StandardROIHeads, select_foreground_proposals
3 | from .rotated_fast_rcnn import RROIHeads
4 | 
5 | from . import cascade_rcnn  # isort:skip
6 | 


--------------------------------------------------------------------------------
/cvpods/modeling/sampling.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | import torch
 3 | 
 4 | __all__ = ["subsample_labels"]
 5 | 
 6 | 
 7 | def subsample_labels(labels, num_samples, positive_fraction, bg_label):
 8 |     """
 9 |     Return `num_samples` (or fewer, if not enough found)
10 |     random samples from `labels` which is a mixture of positives & negatives.
11 |     It will try to return as many positives as possible without
12 |     exceeding `positive_fraction * num_samples`, and then try to
13 |     fill the remaining slots with negatives.
14 | 
15 |     Args:
16 |         labels (Tensor): (N, ) label vector with values:
17 |             * -1: ignore
18 |             * bg_label: background ("negative") class
19 |             * otherwise: one or more foreground ("positive") classes
20 |         num_samples (int): The total number of labels with value >= 0 to return.
21 |             Values that are not sampled will be filled with -1 (ignore).
22 |         positive_fraction (float): The number of subsampled labels with values > 0
23 |             is `min(num_positives, int(positive_fraction * num_samples))`. The number
24 |             of negatives sampled is `min(num_negatives, num_samples - num_positives_sampled)`.
25 |             In order words, if there are not enough positives, the sample is filled with
26 |             negatives. If there are also not enough negatives, then as many elements are
27 |             sampled as is possible.
28 |         bg_label (int): label index of background ("negative") class.
29 | 
30 |     Returns:
31 |         pos_idx, neg_idx (Tensor):
32 |             1D vector of indices. The total length of both is `num_samples` or fewer.
33 |     """
34 |     positive = torch.nonzero((labels != -1) & (labels != bg_label), as_tuple=False).squeeze(1)
35 |     negative = torch.nonzero(labels == bg_label, as_tuple=False).squeeze(1)
36 | 
37 |     num_pos = int(num_samples * positive_fraction)
38 |     # protect against not enough positive examples
39 |     num_pos = min(positive.numel(), num_pos)
40 |     num_neg = num_samples - num_pos
41 |     # protect against not enough negative examples
42 |     num_neg = min(negative.numel(), num_neg)
43 | 
44 |     # randomly select positive and negative examples
45 |     perm1 = torch.randperm(positive.numel(), device=positive.device)[:num_pos]
46 |     perm2 = torch.randperm(negative.numel(), device=negative.device)[:num_neg]
47 | 
48 |     pos_idx = positive[perm1]
49 |     neg_idx = negative[perm2]
50 |     return pos_idx, neg_idx
51 | 


--------------------------------------------------------------------------------
/cvpods/solver/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | from .build import build_lr_scheduler, build_optimizer
 3 | from .optimizer_builder import (
 4 |     OPTIMIZER_BUILDER,
 5 |     AdamBuilder,
 6 |     AdamWBuilder,
 7 |     OptimizerBuilder,
 8 |     SGDBuilder,
 9 |     SGDGateLRBuilder
10 | )
11 | from .scheduler_builder import (
12 |     SCHEDULER_BUILDER,
13 |     BaseSchedulerBuilder,
14 |     LambdaLRBuilder,
15 |     OneCycleLRBuilder,
16 |     PolyLRBuilder,
17 |     WarmupCosineLR,
18 |     WarmupCosineLRBuilder,
19 |     WarmupMultiStepLR,
20 |     WarmupMultiStepLRBuilder
21 | )
22 | 
23 | __all__ = [k for k in globals().keys() if not k.startswith("_")]
24 | 


--------------------------------------------------------------------------------
/cvpods/structures/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | from .boxes import Boxes, BoxMode, pairwise_iou, pairwise_ioa, pairwise_iou_tensor
 3 | from .image_list import ImageList
 4 | from .instances import Instances
 5 | from .keypoints import Keypoints, heatmaps_to_keypoints
 6 | from .masks import BitMasks, PolygonMasks, polygons_to_bitmask, rasterize_polygons_within_box
 7 | from .rotated_boxes import RotatedBoxes
 8 | from .rotated_boxes import pairwise_iou as pairwise_iou_rotated
 9 | 
10 | __all__ = [k for k in globals().keys() if not k.startswith("_")]
11 | 


--------------------------------------------------------------------------------
/cvpods/utils/README.md:
--------------------------------------------------------------------------------
1 | # Utility functions
2 | 
3 | This folder contain utility functions that are not used in the
4 | core library, but are useful for building models or training
5 | code using the config system.
6 | 


--------------------------------------------------------------------------------
/cvpods/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python3
 2 | # -*- coding:utf-8 -*-
 3 | 
 4 | from .registry import Registry
 5 | from .benchmark import timeit, benchmark, Timer
 6 | from .distributed import comm
 7 | from .env import collect_env_info, seed_all_rng, setup_environment, setup_custom_environment
 8 | from .imports import dynamic_import
 9 | from .file import download, PathHandler, PathManager, get_cache_dir, file_lock, PicklableWrapper
10 | from .memory import retry_if_cuda_oom
11 | from .visualizer import colormap, random_color, VideoVisualizer, ColorMode, VisImage, Visualizer
12 | from .dump import (get_event_storage, EventWriter, JSONWriter, TensorboardXWriter,
13 |                    CommonMetricPrinter, EventStorage, HistoryBuffer, setup_logger, log_first_n,
14 |                    log_every_n, log_every_n_seconds, create_small_table, create_table_with_header)
15 | 
16 | __all__ = [k for k in globals().keys() if not k.startswith("_")]
17 | 


--------------------------------------------------------------------------------
/cvpods/utils/benchmark/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python3
2 | # -*- coding:utf-8 -*-
3 | 
4 | from .benchmark import *
5 | from .timer import *
6 | 


--------------------------------------------------------------------------------
/cvpods/utils/benchmark/timer.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 3 | # -*- coding: utf-8 -*-
 4 | 
 5 | from time import perf_counter
 6 | from typing import Optional
 7 | 
 8 | 
 9 | class Timer:
10 |     """
11 |     A timer which computes the time elapsed since the start/reset of the timer.
12 |     """
13 | 
14 |     def __init__(self):
15 |         self.reset()
16 | 
17 |     def reset(self):
18 |         """
19 |         Reset the timer.
20 |         """
21 |         self._start = perf_counter()
22 |         self._paused: Optional[float] = None
23 |         self._total_paused = 0
24 | 
25 |     def pause(self):
26 |         """
27 |         Pause the timer.
28 |         """
29 |         if self._paused is not None:
30 |             raise ValueError("Trying to pause a Timer that is already paused!")
31 |         self._paused = perf_counter()
32 | 
33 |     def is_paused(self) -> bool:
34 |         """
35 |         Returns:
36 |             bool: whether the timer is currently paused
37 |         """
38 |         return self._paused is not None
39 | 
40 |     def resume(self):
41 |         """
42 |         Resume the timer.
43 |         """
44 |         if self._paused is None:
45 |             raise ValueError("Trying to resume a Timer that is not paused!")
46 |         self._total_paused += perf_counter() - self._paused
47 |         self._paused = None
48 | 
49 |     def seconds(self) -> float:
50 |         """
51 |         Returns:
52 |             (float): the total number of seconds since the start/reset of the
53 |                 timer, excluding the time when the timer is paused.
54 |         """
55 |         if self._paused is not None:
56 |             end_time: float = self._paused  # type: ignore
57 |         else:
58 |             end_time = perf_counter()
59 |         return end_time - self._start - self._total_paused
60 | 


--------------------------------------------------------------------------------
/cvpods/utils/distributed/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python3
2 | # -*- coding:utf-8 -*-
3 | 
4 | from .comm import *
5 | 


--------------------------------------------------------------------------------
/cvpods/utils/dump/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python3
2 | # -*- coding:utf-8 -*-
3 | 
4 | from .events import *
5 | from .history_buffer import *
6 | from .logger import *
7 | 


--------------------------------------------------------------------------------
/cvpods/utils/dump/history_buffer.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 3 | 
 4 | import numpy as np
 5 | from typing import List, Tuple
 6 | 
 7 | 
 8 | class HistoryBuffer:
 9 |     """
10 |     Track a series of scalar values and provide access to smoothed values over a
11 |     window or the global average of the series.
12 |     """
13 | 
14 |     def __init__(self, max_length: int = 1000000):
15 |         """
16 |         Args:
17 |             max_length: maximal number of values that can be stored in the
18 |                 buffer. When the capacity of the buffer is exhausted, old
19 |                 values will be removed.
20 |         """
21 |         self._max_length: int = max_length
22 |         self._data: List[Tuple[float, float]] = []  # (value, iteration) pairs
23 |         self._count: int = 0
24 |         self._global_avg: float = 0
25 | 
26 |     def update(self, value: float, iteration: float = None):
27 |         """
28 |         Add a new scalar value produced at certain iteration. If the length
29 |         of the buffer exceeds self._max_length, the oldest element will be
30 |         removed from the buffer.
31 |         """
32 |         if iteration is None:
33 |             iteration = self._count
34 |         if len(self._data) == self._max_length:
35 |             self._data.pop(0)
36 |         self._data.append((value, iteration))
37 | 
38 |         self._count += 1
39 |         self._global_avg += (value - self._global_avg) / self._count
40 | 
41 |     def latest(self):
42 |         """
43 |         Return the latest scalar value added to the buffer.
44 |         """
45 |         return self._data[-1][0]
46 | 
47 |     def median(self, window_size: int):
48 |         """
49 |         Return the median of the latest `window_size` values in the buffer.
50 |         """
51 |         return np.median([x[0] for x in self._data[-window_size:]])
52 | 
53 |     def avg(self, window_size: int):
54 |         """
55 |         Return the mean of the latest `window_size` values in the buffer.
56 |         """
57 |         return np.mean([x[0] for x in self._data[-window_size:]])
58 | 
59 |     def global_avg(self):
60 |         """
61 |         Return the mean of all the elements in the buffer. Note that this
62 |         includes those getting removed due to limited buffer storage.
63 |         """
64 |         return self._global_avg
65 | 
66 |     def values(self):
67 |         """
68 |         Returns:
69 |             list[(number, iteration)]: content of the current buffer.
70 |         """
71 |         return self._data
72 | 


--------------------------------------------------------------------------------
/cvpods/utils/env/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python3
2 | # -*- coding:utf-8 -*-
3 | 
4 | from .collect_env import *
5 | from .env import *
6 | 


--------------------------------------------------------------------------------
/cvpods/utils/file/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python3
2 | # -*- coding:utf-8 -*-
3 | 
4 | from .download import *
5 | from .file_io import *
6 | from .serialize import *
7 | 


--------------------------------------------------------------------------------
/cvpods/utils/file/serialize.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | import cloudpickle
 3 | 
 4 | 
 5 | class PicklableWrapper(object):
 6 |     """
 7 |     Wrap an object to make it more picklable, note that it uses
 8 |     heavy weight serialization libraries that are slower than pickle.
 9 |     It's best to use it only on closures (which are usually not picklable).
10 | 
11 |     This is a simplified version of
12 |     https://github.com/joblib/joblib/blob/master/joblib/externals/loky/cloudpickle_wrapper.py
13 |     """
14 | 
15 |     def __init__(self, obj):
16 |         self._obj = obj
17 | 
18 |     def __reduce__(self):
19 |         s = cloudpickle.dumps(self._obj)
20 |         return cloudpickle.loads, (s,)
21 | 
22 |     def __call__(self, *args, **kwargs):
23 |         return self._obj(*args, **kwargs)
24 | 
25 |     def __getattr__(self, attr):
26 |         # Ensure that the wrapped object can be used seamlessly as the previous object.
27 |         if attr not in ["_obj"]:
28 |             return getattr(self._obj, attr)
29 |         return getattr(self, attr)
30 | 


--------------------------------------------------------------------------------
/cvpods/utils/imports.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- encoding: utf-8 -*-
 3 | '''
 4 | @File               :   imports.py
 5 | @Time               :   2020/05/07 23:59:19
 6 | @Author             :   Benjin Zhu
 7 | @Contact            :   poodarchu@gmail.com
 8 | @Last Modified by   :   Benjin Zhu
 9 | @Last Modified time :   2020/05/07 23:59:19
10 | '''
11 | 
12 | import imp
13 | 
14 | 
15 | def dynamic_import(config_name, config_path):
16 |     """
17 |     Dynamic import a project.
18 | 
19 |     Args:
20 |         config_name (str): module name
21 |         config_path (str): the dir that contains the .py with this module.
22 | 
23 |     Examples::
24 |         >>> root = "/data/repos/cvpods_playground/zhubenjin/retinanet/"
25 |         >>> project = root + "retinanet.res50.fpn.coco.800size.1x.mrcnn_sigmoid"
26 |         >>> cfg = dynamic_import("config", project).config
27 |         >>> net = dynamic_import("net", project)
28 |     """
29 |     fp, pth, desc = imp.find_module(config_name, [config_path])
30 | 
31 |     return imp.load_module(config_name, fp, pth, desc)
32 | 


--------------------------------------------------------------------------------
/cvpods/utils/metrics/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lxtGH/BSSeg/77a6cf15150cc65cf2f21b124381dc8d4573f21b/cvpods/utils/metrics/__init__.py


--------------------------------------------------------------------------------
/cvpods/utils/metrics/accuracy.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # -*- coding: utf-8 -*-
 3 | # Copyright (c) BaseDetection, Inc. and its affiliates.
 4 | import torch
 5 | 
 6 | 
 7 | @torch.no_grad()
 8 | def accuracy(output, target, topk=(1,)):
 9 |     """Computes the precision@k for the specified values of k"""
10 |     if target.numel() == 0:
11 |         return [torch.zeros([], device=output.device)]
12 |     maxk = max(topk)
13 |     batch_size = target.size(0)
14 | 
15 |     _, pred = output.topk(maxk, 1, True, True)
16 |     pred = pred.t()
17 |     correct = pred.eq(target.view(1, -1).expand_as(pred))
18 | 
19 |     res = []
20 |     for k in topk:
21 |         correct_k = correct[:k].view(-1).float().sum(0)
22 |         res.append(correct_k.mul_(100.0 / batch_size))
23 |     return res
24 | 


--------------------------------------------------------------------------------
/cvpods/utils/registry.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | 
 3 | from typing import Dict, Optional
 4 | 
 5 | 
 6 | class Registry(object):
 7 |     """
 8 |     The registry that provides name -> object mapping, to support third-party
 9 |     users' custom modules.
10 |     To create a registry (e.g. a backbone registry):
11 |     .. code-block:: python
12 |         BACKBONE_REGISTRY = Registry('BACKBONE')
13 |     To register an object:
14 |     .. code-block:: python
15 |         @BACKBONE_REGISTRY.register()
16 |         class MyBackbone():
17 |             ...
18 |     Or:
19 |     .. code-block:: python
20 |         BACKBONE_REGISTRY.register(MyBackbone)
21 |     """
22 | 
23 |     def __init__(self, name: str) -> None:
24 |         """
25 |         Args:
26 |             name (str): the name of this registry
27 |         """
28 |         self._name: str = name
29 |         self._obj_map: Dict[str, object] = {}
30 | 
31 |     def _do_register(self, name: str, obj: object) -> None:
32 |         assert (
33 |             name not in self._obj_map
34 |         ), "An object named '{}' was already registered in '{}' registry!".format(
35 |             name, self._name
36 |         )
37 |         self._obj_map[name] = obj
38 | 
39 |     def register(self, obj: object = None) -> Optional[object]:
40 |         """
41 |         Register the given object under the the name `obj.__name__`.
42 |         Can be used as either a decorator or not. See docstring of this class for usage.
43 |         """
44 |         if obj is None:
45 |             # used as a decorator
46 |             def deco(func_or_class: object) -> object:
47 |                 name = func_or_class.__name__  # pyre-ignore
48 |                 self._do_register(name, func_or_class)
49 |                 return func_or_class
50 | 
51 |             return deco
52 | 
53 |         # used as a function call
54 |         name = obj.__name__  # pyre-ignore
55 |         self._do_register(name, obj)
56 | 
57 |     def get(self, name: str) -> object:
58 |         ret = self._obj_map.get(name)
59 |         if ret is None:
60 |             raise KeyError(
61 |                 "No object named '{}' found in '{}' registry!".format(name, self._name)
62 |             )
63 |         return ret
64 | 
65 |     def __contains__(self, name: str) -> bool:
66 |         return name in self._obj_map
67 | 


--------------------------------------------------------------------------------
/cvpods/utils/visualizer/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python3
2 | # -*- coding:utf-8 -*-
3 | 
4 | from .colormap import *
5 | from .video_visualizer import *
6 | from .visualizer import *
7 | 


--------------------------------------------------------------------------------
/cvpods/utils/visualizer/show.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- encoding: utf-8 -*-
 3 | '''
 4 | @File               :   show.py
 5 | @Time               :   2020/05/07 23:58:35
 6 | @Author             :   Benjin Zhu
 7 | @Contact            :   poodarchu@gmail.com
 8 | @Last Modified by   :   Benjin Zhu
 9 | @Last Modified time :   2020/05/07 23:58:35
10 | '''
11 | 
12 | import copy
13 | 
14 | import numpy as np
15 | import pylab as plt
16 | 
17 | 
18 | def draw_box(ax, vertices, color='black'):
19 |     """
20 |     Draw box with color.
21 | 
22 |     Args:
23 |         ax (list): axes to draw box along
24 |         vertices (ndarray): indices of shape (N x 2)
25 |         color (str): plotted color
26 |     """
27 |     connections = [
28 |         [0, 1],
29 |         [1, 2],
30 |         [2, 3],
31 |         [3, 0],
32 |     ]
33 |     for connection in connections:
34 |         ax.plot(*vertices[:, connection], c=color, lw=5)
35 | 
36 | 
37 | def visualize_feature_maps(
38 |         fm,
39 |         boxes=[],
40 |         keypoints=[],
41 |         stride=1,
42 |         save_filename=None
43 | ):
44 |     """
45 |     Visualize feature map with boxes or key points.
46 | 
47 |     Args:
48 |         fm (torch.Tensor): feature map of shape H x W x c, c is channel
49 |         boxes (ndarray): boxes to be visualized.
50 |         keypoints (ndarray): key points to be visualized
51 |         stride (int): used to normalize boxes or keypoints
52 |         save_filename (bool): whether save to disk
53 |     """
54 |     nc = np.ceil(np.sqrt(fm.shape[2]))  # column
55 |     nr = np.ceil(fm.shape[2] / nc)  # row
56 |     nc = int(nc)
57 |     nr = int(nr)
58 |     plt.figure(figsize=(64, 64))
59 |     for i in range(fm.shape[2]):
60 |         ax = plt.subplot(nr, nc, i + 1)
61 |         ax.imshow(fm[:, :, i], cmap='jet')
62 | 
63 |         for obj in boxes:
64 |             box = copy.deepcopy(obj) / stride
65 |             draw_box(ax, box, color='g')
66 | 
67 |         for pts_score in keypoints:
68 |             pts = pts_score[:8]
69 |             pts = pts / stride
70 |             for i in range(4):
71 |                 ax.plot(pts[2 * i + 1], pts[2 * i + 0], 'r*')
72 |             ax.plot([pts[1], pts[3]], [pts[0], pts[2]], c='y', lw=5)
73 |             ax.plot([pts[3], pts[5]], [pts[2], pts[4]], c='g', lw=5)
74 |             ax.plot([pts[5], pts[7]], [pts[4], pts[6]], c='b', lw=5)
75 |             ax.plot([pts[7], pts[1]], [pts[6], pts[0]], c='r', lw=5)
76 | 
77 |         # plt.colorbar()
78 |         ax.axis('off')
79 |     if save_filename:
80 |         plt.savefig(save_filename)
81 |     else:
82 |         plt.show()
83 |     plt.close()
84 | 


--------------------------------------------------------------------------------
/datasets/README.md:
--------------------------------------------------------------------------------
 1 |  ## BlendMask instance detection
 2 | 
 3 | ```
 4 | coco/
 5 |   thing_train2017/
 6 |     # thing class label maps for auxiliary semantic loss
 7 | lvis/
 8 |   thing_train/
 9 |     # semantic labels for LVIS
10 | ```
11 | 
12 | Run `python prepare_thing_sem_from_instance.py`, to extract semantic labels from instance annotations.
13 | 
14 | Run `python prepare_thing_sem_from_lvis.py`, to extract semantic labels from LVIS annotations.
15 | 
16 | ## Text Recognition
17 | 
18 | - Totaltext training, testing images, and annotations [[link]](https://universityofadelaide.box.com/shared/static/3eq5ti7z45qfq5gu96gg5t1xwh1yrrt7.zip) [[paper]](https://ieeexplore.ieee.org/abstract/document/8270088/) [[code]](https://github.com/cs-chan/Total-Text-Dataset). 
19 | - CTW1500 training, testing images, and annotations [[link]](https://universityofadelaide.box.com/s/yb9red8pi9eszuzqompo593b6zhz87qw) [[paper]](https://www.sciencedirect.com/science/article/pii/S0031320319300664) [[code]](https://github.com/Yuliang-Liu/Curve-Text-Detector).
20 | - MLT [[dataset]](https://universityofadelaide.box.com/s/tsiimvp65tkf7dw1nuh8l71cjcs0fyif) [[paper]](https://ieeexplore.ieee.org/abstract/document/8270168).
21 | - Syntext-150k: 
22 |   - Part1: 94,723 [[dataset]](https://universityofadelaide.box.com/s/alta996w4fym6arh977h3k3xv55clhg3) 
23 |   - Part2: 54,327 [[dataset]](https://universityofadelaide.box.com/s/7k7d6nvf951s4i01szs4udpu2yv5dlqe)
24 | 
25 | ```
26 | text/
27 |   totaltext/
28 |     annotations/
29 |     train_images/
30 |     test_images/
31 |   mlt2017/
32 |     annotations/train.json
33 |     images/
34 |     ...
35 |   syntext1/
36 |   syntext2/
37 |   ...
38 |   evaluation/
39 |     gt_ctw1500.zip
40 |     gt_totaltext.zip
41 | ```
42 | 
43 | To evaluate on Total Text and CTW1500, first download the zipped annotations with
44 | 
45 | ```
46 | mkdir evaluation
47 | cd evaluation
48 | wget -O gt_ctw1500.zip https://cloudstor.aarnet.edu.au/plus/s/uoeFl0pCN9BOCN5/download
49 | wget -O gt_totaltext.zip https://cloudstor.aarnet.edu.au/plus/s/pEMs0KjCocL2nvV/download
50 | ```
51 | 
52 | ## Person In Context instance detection
53 | 
54 | ```
55 | pic/
56 |   thing_train/
57 |     # thing class label maps for auxiliary semantic loss
58 |   annotations/
59 |     train_person.json
60 |     val_person.json
61 |   image/
62 |     train/
63 |     ...
64 |   
65 | ```
66 | 
67 | First link the PIC_2.0 dataset to this folder with `ln -s \path\to\PIC_2.0 pic`. Then use the `python gen_coco_person.py` to generate train and validation annotation jsons.
68 | 
69 | Run `python prepare_thing_sem_from_instance.py --dataset-name pic`, to extract semantic labels from instance annotations.
70 | 


--------------------------------------------------------------------------------
/datasets/components/coco_2017_train_class_agnosticTrue_whitenTrue_sigmoidTrue_60_siz28.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lxtGH/BSSeg/77a6cf15150cc65cf2f21b124381dc8d4573f21b/datasets/components/coco_2017_train_class_agnosticTrue_whitenTrue_sigmoidTrue_60_siz28.npz


--------------------------------------------------------------------------------
/docs/bugs.md:
--------------------------------------------------------------------------------
1 | ## Potential Bugs
2 | 
3 | 
4 | 1, The default Pixel Means and Variance is RGB input by changing FAIR pretrained models(R-50.pkl, R-101.pkl)
5 |     Caffe pretrained model(BGR, ) vs Pytorch pretrained model(RGB)


--------------------------------------------------------------------------------
/docs/datasets.md:
--------------------------------------------------------------------------------
 1 | # Some Dataset Annotations
 2 | 
 3 | ## YouTube VIS
 4 | Train: 2238 videos(one clip in each floder)
 5 | 
 6 | Validation: 302 videos(submit to the server)
 7 | 
 8 | Test: 343 videos(not useful, designed for competition)
 9 | 
10 | 
11 | ## OVIS
12 | 
13 | Train: 607 videos(one clip in each floder)
14 | 
15 | Validation: 140 videos(submit to the server)
16 | 
17 | Test: 343 videos(not useful, designed for competition)
18 | 
19 | 
20 | 
21 | ## MOTS Challenge 
22 | 
23 | 
24 | 
25 | ## MOTS KITTI
26 | 
27 | 


--------------------------------------------------------------------------------
/docs/notes.md:
--------------------------------------------------------------------------------
1 | 
2 | 1, YouTube-VIS dataset:
3 |  should upload the result.json to server to evaluate the results.
4 | 
5 | 
6 | 2, Cityscape Panoptic dataset
7 | 
8 | 
9 | 3, Cityscape Video Panoptic dataset 


--------------------------------------------------------------------------------
/docs/overview.md:
--------------------------------------------------------------------------------
1 | 
2 | ## OverView 
3 | cvpods is based Detectron2 but with more flexible design for research purpose.
4 | 
5 | Currently, this codebase is used for object detection and instance segmentation research.


--------------------------------------------------------------------------------
/docs/results.md:
--------------------------------------------------------------------------------
1 | ## Benchmark of CVPODs
2 | 
3 | 
4 | 
5 | 
6 | 


--------------------------------------------------------------------------------
/docs/tricks.md:
--------------------------------------------------------------------------------
1 | 1, 


--------------------------------------------------------------------------------
/fig/test.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/lxtGH/BSSeg/77a6cf15150cc65cf2f21b124381dc8d4573f21b/fig/test.jpg


--------------------------------------------------------------------------------
/playground/detection/cityscapes/ceseg/boundary_refine_mask_rcnn_r101_ms_3x_3_subgt_warpping_dice_erode_dilate_gn/config.py:
--------------------------------------------------------------------------------
 1 | import os.path as osp
 2 | 
 3 | from cvpods.configs.rcnn_fpn_config import RCNNFPNConfig
 4 | 
 5 | _config_dict = dict(
 6 |     MODEL=dict(
 7 |         WEIGHTS="/mnt/lustre/share_data/chengguangliang/hehao/CEInst_checkpoint/r101_3x.pth",
 8 |         MASK_ON=True,
 9 |         RESNETS=dict(DEPTH=101),
10 |     ROI_HEADS=dict(NUM_CLASSES=8),
11 |     BOUNDARY_MASK_HEAD=dict(
12 |         OUTPUT_RATIO=1,
13 |         POOLER_RESOLUTION=28,
14 |         IN_FEATURES=["p2"],
15 |         NUM_CONV=2),
16 |         ROI_MASK_HEAD=dict(
17 |             CEMODULE=dict(
18 |                 NUM_CONV=2,
19 |                 PLANES=256,
20 |                 DCN_ON=True,
21 |                 DCN_V2=True,
22 |                 NUM_EDGE_CONV=2,
23 |                 FUSE_MODE="Add",
24 |                 WITH_EDGE_REFINE=True,
25 |                 NORM='GN',
26 |                 KERNEL_SIZE=5
27 |             ),
28 |             LOSS_WEIGHT=[1.0, 1.0, 1.0, 1.0]
29 |         ),
30 |     ),
31 |     DATASETS=dict(
32 |         TRAIN=("cityscapes_fine_instance_seg_train",),
33 |         TEST=("cityscapes_fine_instance_seg_val",),
34 |     ),
35 |     SOLVER=dict(
36 |         IMS_PER_BATCH=8,
37 |         IMS_PER_DEVICE=1,
38 |         LR_SCHEDULER=dict(
39 |             STEPS=(18000,),
40 |             MAX_ITER=24000,
41 |         ),
42 |         OPTIMIZER=dict(
43 |             BASE_LR=0.01,
44 |         ),
45 |         CHECKPOINT_PERIOD=8000,
46 |     ),
47 |     INPUT=dict(
48 |         AUG=dict(
49 |             TRAIN_PIPELINES=[
50 |                 ("ResizeShortestEdge", dict(
51 |                     short_edge_length=(800, 832, 864, 896, 928, 960, 992, 1024),
52 |                     max_size=2048, sample_style="choice")),
53 |                 ("RandomFlip", dict()),
54 |             ],
55 |             TEST_PIPELINES=[
56 |                 ("ResizeShortestEdge", dict(
57 |                     short_edge_length=1024, max_size=2048, sample_style="choice")),
58 |             ],
59 |         ),
60 |     ),
61 |     TEST=dict(
62 |         EVAL_PEROID=10000,
63 |     ),
64 |     OUTPUT_DIR="output"
65 | )
66 | 
67 | 
68 | class FasterRCNNConfig(RCNNFPNConfig):
69 |     def __init__(self):
70 |         super(FasterRCNNConfig, self).__init__()
71 |         self._register_configuration(_config_dict)
72 | 
73 | 
74 | config = FasterRCNNConfig()
75 | 


--------------------------------------------------------------------------------
/playground/detection/cityscapes/ceseg/boundary_refine_mask_rcnn_r101_ms_3x_3_subgt_warpping_dice_erode_dilate_gn/net.py:
--------------------------------------------------------------------------------
 1 | from cvpods.layers import ShapeSpec
 2 | from cvpods.modeling.backbone import Backbone
 3 | from cvpods.modeling.backbone.fpn import build_resnet_fpn_backbone
 4 | from cvpods.modeling.proposal_generator import RPN
 5 | from box_head import FastRCNNConvFCHead
 6 | from boundary_mask_rcnn import BoundaryROIHeads, DecoupledBoundaryMaskHead
 7 | from rcnn import GeneralizedRCNN
 8 | 
 9 | 
10 | def build_backbone(cfg, input_shape=None):
11 |     if input_shape is None:
12 |         input_shape = ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN))
13 |     backbone = build_resnet_fpn_backbone(cfg, input_shape)
14 |     assert isinstance(backbone, Backbone)
15 |     return backbone
16 | 
17 | 
18 | def build_proposal_generator(cfg, input_shape):
19 |     return RPN(cfg, input_shape)
20 | 
21 | 
22 | def build_roi_heads(cfg, input_shape):
23 |     return BoundaryROIHeads(cfg, input_shape)
24 | 
25 | 
26 | def build_box_head(cfg, input_shape):
27 |     return FastRCNNConvFCHead(cfg, input_shape)
28 | 
29 | 
30 | def build_mask_head(cfg, input_shape):
31 |     return DecoupledBoundaryMaskHead(cfg, input_shape)
32 | 
33 | 
34 | def build_model(cfg):
35 |     cfg.build_backbone = build_backbone
36 |     cfg.build_proposal_generator = build_proposal_generator
37 |     cfg.build_roi_heads = build_roi_heads
38 |     cfg.build_box_head = build_box_head
39 |     cfg.build_mask_head = build_mask_head
40 | 
41 |     model = GeneralizedRCNN(cfg)
42 |     return model
43 | 


--------------------------------------------------------------------------------
/playground/detection/cityscapes/ceseg/boundary_refine_mask_rcnn_r50_ms_3x_3_subgt_warpping_dice_erode_dilate_gn/config.py:
--------------------------------------------------------------------------------
 1 | import os.path as osp
 2 | 
 3 | from cvpods.configs.rcnn_fpn_config import RCNNFPNConfig
 4 | 
 5 | _config_dict = dict(
 6 |     MODEL=dict(
 7 |         WEIGHTS="/mnt/lustre/share_data/chengguangliang/hehao/CEInst_checkpoint/r50_3x.pth",
 8 |         MASK_ON=True,
 9 |         RESNETS=dict(DEPTH=50),
10 |     ROI_HEADS=dict(NUM_CLASSES=8),
11 |     BOUNDARY_MASK_HEAD=dict(
12 |         OUTPUT_RATIO=1,
13 |         POOLER_RESOLUTION=28,
14 |         IN_FEATURES=["p2"],
15 |         NUM_CONV=2),
16 |         ROI_MASK_HEAD=dict(
17 |             CEMODULE=dict(
18 |                 NUM_CONV=2,
19 |                 PLANES=256,
20 |                 DCN_ON=True,
21 |                 DCN_V2=True,
22 |                 NUM_EDGE_CONV=2,
23 |                 FUSE_MODE="Add",
24 |                 WITH_EDGE_REFINE=True,
25 |                 NORM='GN',
26 |                 KERNEL_SIZE=5
27 |             ),
28 |             LOSS_WEIGHT=[1.0, 1.0, 1.0, 1.0]
29 |         ),
30 |     ),
31 |     DATASETS=dict(
32 |         TRAIN=("cityscapes_fine_instance_seg_train",),
33 |         TEST=("cityscapes_fine_instance_seg_val",),
34 |     ),
35 |     SOLVER=dict(
36 |         IMS_PER_BATCH=8,
37 |         IMS_PER_DEVICE=1,
38 |         LR_SCHEDULER=dict(
39 |             STEPS=(18000,),
40 |             MAX_ITER=24000,
41 |         ),
42 |         OPTIMIZER=dict(
43 |             BASE_LR=0.01,
44 |         ),
45 |         CHECKPOINT_PERIOD=8000,
46 |     ),
47 |     INPUT=dict(
48 |         AUG=dict(
49 |             TRAIN_PIPELINES=[
50 |                 ("ResizeShortestEdge", dict(
51 |                     short_edge_length=(800, 832, 864, 896, 928, 960, 992, 1024),
52 |                     max_size=2048, sample_style="choice")),
53 |                 ("RandomFlip", dict()),
54 |             ],
55 |             TEST_PIPELINES=[
56 |                 ("ResizeShortestEdge", dict(
57 |                     short_edge_length=1024, max_size=2048, sample_style="choice")),
58 |             ],
59 |         ),
60 |     ),
61 |     TEST=dict(
62 |         EVAL_PEROID=10000,
63 |     ),
64 |     OUTPUT_DIR="output"
65 | )
66 | 
67 | 
68 | class FasterRCNNConfig(RCNNFPNConfig):
69 |     def __init__(self):
70 |         super(FasterRCNNConfig, self).__init__()
71 |         self._register_configuration(_config_dict)
72 | 
73 | 
74 | config = FasterRCNNConfig()
75 | 


--------------------------------------------------------------------------------
/playground/detection/cityscapes/ceseg/boundary_refine_mask_rcnn_r50_ms_3x_3_subgt_warpping_dice_erode_dilate_gn/net.py:
--------------------------------------------------------------------------------
 1 | from cvpods.layers import ShapeSpec
 2 | from cvpods.modeling.backbone import Backbone
 3 | from cvpods.modeling.backbone.fpn import build_resnet_fpn_backbone
 4 | from cvpods.modeling.proposal_generator import RPN
 5 | from box_head import FastRCNNConvFCHead
 6 | from boundary_mask_rcnn import BoundaryROIHeads, DecoupledBoundaryMaskHead
 7 | from rcnn import GeneralizedRCNN
 8 | 
 9 | 
10 | def build_backbone(cfg, input_shape=None):
11 |     if input_shape is None:
12 |         input_shape = ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN))
13 |     backbone = build_resnet_fpn_backbone(cfg, input_shape)
14 |     assert isinstance(backbone, Backbone)
15 |     return backbone
16 | 
17 | 
18 | def build_proposal_generator(cfg, input_shape):
19 |     return RPN(cfg, input_shape)
20 | 
21 | 
22 | def build_roi_heads(cfg, input_shape):
23 |     return BoundaryROIHeads(cfg, input_shape)
24 | 
25 | 
26 | def build_box_head(cfg, input_shape):
27 |     return FastRCNNConvFCHead(cfg, input_shape)
28 | 
29 | 
30 | def build_mask_head(cfg, input_shape):
31 |     return DecoupledBoundaryMaskHead(cfg, input_shape)
32 | 
33 | 
34 | def build_model(cfg):
35 |     cfg.build_backbone = build_backbone
36 |     cfg.build_proposal_generator = build_proposal_generator
37 |     cfg.build_roi_heads = build_roi_heads
38 |     cfg.build_box_head = build_box_head
39 |     cfg.build_mask_head = build_mask_head
40 | 
41 |     model = GeneralizedRCNN(cfg)
42 |     return model
43 | 


--------------------------------------------------------------------------------
/playground/detection/cityscapes/ceseg/boundary_refine_mask_rcnn_rx101_ms_3x_3_subgt_warpping_dice_erode_dilate_gn/config.py:
--------------------------------------------------------------------------------
 1 | import os.path as osp
 2 | 
 3 | from cvpods.configs.rcnn_fpn_config import RCNNFPNConfig
 4 | 
 5 | _config_dict = dict(
 6 |     MODEL=dict(
 7 |         WEIGHTS="/mnt/lustre/share_data/chengguangliang/hehao/CEInst_checkpoint/rx101_3x.pth",
 8 |         MASK_ON=True,
 9 |         RESNETS=dict(
10 |                 DEPTH=101,
11 |                 NUM_GROUPS=64,
12 |                 WIDTH_PER_GROUP=4,
13 |                 STRIDE_IN_1X1=False),
14 |         ROI_HEADS=dict(NUM_CLASSES=8),
15 |     BOUNDARY_MASK_HEAD=dict(
16 |         OUTPUT_RATIO=1,
17 |         POOLER_RESOLUTION=28,
18 |         IN_FEATURES=["p2"],
19 |         NUM_CONV=2),
20 |         ROI_MASK_HEAD=dict(
21 |             CEMODULE=dict(
22 |                 NUM_CONV=2,
23 |                 PLANES=256,
24 |                 DCN_ON=True,
25 |                 DCN_V2=True,
26 |                 NUM_EDGE_CONV=2,
27 |                 FUSE_MODE="Add",
28 |                 WITH_EDGE_REFINE=True,
29 |                 NORM='GN',
30 |                 KERNEL_SIZE=5
31 |             ),
32 |             LOSS_WEIGHT=[1.0, 1.0, 1.0, 1.0]
33 |         ),
34 |     ),
35 |     DATASETS=dict(
36 |         TRAIN=("cityscapes_fine_instance_seg_train",),
37 |         TEST=("cityscapes_fine_instance_seg_val",),
38 |     ),
39 |     SOLVER=dict(
40 |         IMS_PER_BATCH=8,
41 |         IMS_PER_DEVICE=1,
42 |         LR_SCHEDULER=dict(
43 |             STEPS=(18000,),
44 |             MAX_ITER=24000,
45 |         ),
46 |         OPTIMIZER=dict(
47 |             BASE_LR=0.01,
48 |         ),
49 |         CHECKPOINT_PERIOD=8000,
50 |     ),
51 |     INPUT=dict(
52 |         AUG=dict(
53 |             TRAIN_PIPELINES=[
54 |                 ("ResizeShortestEdge", dict(
55 |                     short_edge_length=(800, 832, 864, 896, 928, 960, 992, 1024),
56 |                     max_size=2048, sample_style="choice")),
57 |                 ("RandomFlip", dict()),
58 |             ],
59 |             TEST_PIPELINES=[
60 |                 ("ResizeShortestEdge", dict(
61 |                     short_edge_length=1024, max_size=2048, sample_style="choice")),
62 |             ],
63 |         ),
64 |     ),
65 |     TEST=dict(
66 |         EVAL_PEROID=10000,
67 |     ),
68 |     OUTPUT_DIR="output"
69 | )
70 | 
71 | 
72 | class FasterRCNNConfig(RCNNFPNConfig):
73 |     def __init__(self):
74 |         super(FasterRCNNConfig, self).__init__()
75 |         self._register_configuration(_config_dict)
76 | 
77 | 
78 | config = FasterRCNNConfig()
79 | 


--------------------------------------------------------------------------------
/playground/detection/cityscapes/ceseg/boundary_refine_mask_rcnn_rx101_ms_3x_3_subgt_warpping_dice_erode_dilate_gn/net.py:
--------------------------------------------------------------------------------
 1 | from cvpods.layers import ShapeSpec
 2 | from cvpods.modeling.backbone import Backbone
 3 | from cvpods.modeling.backbone.fpn import build_resnet_fpn_backbone
 4 | from cvpods.modeling.proposal_generator import RPN
 5 | from box_head import FastRCNNConvFCHead
 6 | from boundary_mask_rcnn import BoundaryROIHeads, DecoupledBoundaryMaskHead
 7 | from rcnn import GeneralizedRCNN
 8 | 
 9 | 
10 | def build_backbone(cfg, input_shape=None):
11 |     if input_shape is None:
12 |         input_shape = ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN))
13 |     backbone = build_resnet_fpn_backbone(cfg, input_shape)
14 |     assert isinstance(backbone, Backbone)
15 |     return backbone
16 | 
17 | 
18 | def build_proposal_generator(cfg, input_shape):
19 |     return RPN(cfg, input_shape)
20 | 
21 | 
22 | def build_roi_heads(cfg, input_shape):
23 |     return BoundaryROIHeads(cfg, input_shape)
24 | 
25 | 
26 | def build_box_head(cfg, input_shape):
27 |     return FastRCNNConvFCHead(cfg, input_shape)
28 | 
29 | 
30 | def build_mask_head(cfg, input_shape):
31 |     return DecoupledBoundaryMaskHead(cfg, input_shape)
32 | 
33 | 
34 | def build_model(cfg):
35 |     cfg.build_backbone = build_backbone
36 |     cfg.build_proposal_generator = build_proposal_generator
37 |     cfg.build_roi_heads = build_roi_heads
38 |     cfg.build_box_head = build_box_head
39 |     cfg.build_mask_head = build_mask_head
40 | 
41 |     model = GeneralizedRCNN(cfg)
42 |     return model
43 | 


--------------------------------------------------------------------------------
/playground/detection/cityscapes/pointrend/point_rend_mask_rcnn_R101X/config.py:
--------------------------------------------------------------------------------
 1 | import os.path as osp
 2 | 
 3 | from cvpods.configs.pointrend_config import PointRendRCNNFPNConfig
 4 | 
 5 | _config_dict = dict(
 6 |     MODEL=dict(
 7 |         WEIGHTS="/mnt/lustreold/share_data/lixiangtai/pretrained/X-101-64x4d.pkl",
 8 |         MASK_ON=True,
 9 |         RESNETS=dict(
10 |                 DEPTH=101,
11 |                 NUM_GROUPS=64,
12 |                 WIDTH_PER_GROUP=4,
13 |                 STRIDE_IN_1X1=False),
14 |         ROI_HEADS=dict(NUM_CLASSES=8),
15 |         POINT_HEAD=dict(
16 |             NUM_CLASSES=8,
17 |         ),
18 |     ),
19 |     DATASETS=dict(
20 |         TRAIN=("cityscapes_fine_instance_seg_train",),
21 |         TEST=("cityscapes_fine_instance_seg_test",),
22 |     ),
23 |     SOLVER=dict(
24 |         IMS_PER_BATCH=8,
25 |         IMS_PER_DEVICE=1,
26 |         LR_SCHEDULER=dict(
27 |             STEPS=(18000,),
28 |             MAX_ITER=24000,
29 |         ),
30 |         OPTIMIZER=dict(
31 |             BASE_LR=0.01,
32 |         ),
33 |         CHECKPOINT_PERIOD=8000,
34 |     ),
35 |     INPUT=dict(
36 |         AUG=dict(
37 |             TRAIN_PIPELINES=[
38 |                 ("ResizeShortestEdge", dict(
39 |                     short_edge_length=(800, 832, 864, 896, 928, 960, 992, 1024),
40 |                     max_size=2048, sample_style="choice")),
41 |                 ("RandomFlip", dict()),
42 |             ],
43 |             TEST_PIPELINES=[
44 |                 ("ResizeShortestEdge", dict(
45 |                     short_edge_length=1024, max_size=2048, sample_style="choice")),
46 |             ],
47 |         ),
48 |     ),
49 |     TEST=dict(
50 |         EVAL_PEROID=10000,
51 |     ),
52 |     OUTPUT_DIR="output"
53 | )
54 | 
55 | 
56 | class PointRendRCNNConfig(PointRendRCNNFPNConfig):
57 |     def __init__(self):
58 |         super(PointRendRCNNConfig, self).__init__()
59 |         self._register_configuration(_config_dict)
60 | 
61 | 
62 | config = PointRendRCNNConfig()
63 | 


--------------------------------------------------------------------------------
/playground/detection/cityscapes/pointrend/point_rend_mask_rcnn_R101X/net.py:
--------------------------------------------------------------------------------
 1 | from cvpods.layers import ShapeSpec
 2 | from cvpods.modeling.backbone import Backbone
 3 | from cvpods.modeling.backbone.fpn import build_resnet_fpn_backbone
 4 | from cvpods.modeling.proposal_generator import RPN
 5 | from cvpods.modeling.roi_heads.box_head import FastRCNNConvFCHead
 6 | from cvpods.modeling.meta_arch.rcnn import GeneralizedRCNN
 7 | from cvpods.modeling.meta_arch.pointrend import PointRendROIHeads, CoarseMaskHead, StandardPointHead
 8 | 
 9 | 
10 | def build_backbone(cfg, input_shape=None):
11 |     if input_shape is None:
12 |         input_shape = ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN))
13 |     backbone = build_resnet_fpn_backbone(cfg, input_shape)
14 |     assert isinstance(backbone, Backbone)
15 |     return backbone
16 | 
17 | 
18 | def build_proposal_generator(cfg, input_shape):
19 |     return RPN(cfg, input_shape)
20 | 
21 | 
22 | def build_roi_heads(cfg, input_shape):
23 |     return PointRendROIHeads(cfg, input_shape)
24 | 
25 | 
26 | def build_box_head(cfg, input_shape):
27 |     return FastRCNNConvFCHead(cfg, input_shape)
28 | 
29 | 
30 | def build_point_head(cfg, input_shape):
31 |     return StandardPointHead(cfg, input_shape)
32 | 
33 | 
34 | def build_mask_head(cfg, input_shape):
35 |     return CoarseMaskHead(cfg, input_shape)
36 | 
37 | 
38 | def build_model(cfg):
39 |     cfg.build_backbone = build_backbone
40 |     cfg.build_proposal_generator = build_proposal_generator
41 |     cfg.build_roi_heads = build_roi_heads
42 |     cfg.build_box_head = build_box_head
43 |     cfg.build_mask_head = build_mask_head
44 |     cfg.build_point_head = build_point_head
45 | 
46 |     model = GeneralizedRCNN(cfg)
47 | 
48 |     return model
49 | 


--------------------------------------------------------------------------------
/playground/detection/cityscapes/pointrend/point_rend_mask_rcnn_r101/config.py:
--------------------------------------------------------------------------------
 1 | import os.path as osp
 2 | 
 3 | from cvpods.configs.pointrend_config import PointRendRCNNFPNConfig
 4 | 
 5 | _config_dict = dict(
 6 |     MODEL=dict(
 7 |         WEIGHTS="/mnt/lustreold/share_data/lixiangtai/pretrained/R-101.pkl",
 8 |         MASK_ON=True,
 9 |         RESNETS=dict(DEPTH=101),
10 |         ROI_HEADS=dict(NUM_CLASSES=8),
11 |         POINT_HEAD=dict(
12 |             NUM_CLASSES=8,
13 |         ),
14 |     ),
15 |     DATASETS=dict(
16 |         TRAIN=("cityscapes_fine_instance_seg_train",),
17 |         TEST=("cityscapes_fine_instance_seg_test",),
18 |     ),
19 |     SOLVER=dict(
20 |         IMS_PER_BATCH=8,
21 |         IMS_PER_DEVICE=1,
22 |         LR_SCHEDULER=dict(
23 |             STEPS=(18000,),
24 |             MAX_ITER=24000,
25 |         ),
26 |         OPTIMIZER=dict(
27 |             BASE_LR=0.01,
28 |         ),
29 |         CHECKPOINT_PERIOD=8000,
30 |     ),
31 |     INPUT=dict(
32 |         AUG=dict(
33 |             TRAIN_PIPELINES=[
34 |                 ("ResizeShortestEdge", dict(
35 |                     short_edge_length=(800, 832, 864, 896, 928, 960, 992, 1024),
36 |                     max_size=2048, sample_style="choice")),
37 |                 ("RandomFlip", dict()),
38 |             ],
39 |             TEST_PIPELINES=[
40 |                 ("ResizeShortestEdge", dict(
41 |                     short_edge_length=1024, max_size=2048, sample_style="choice")),
42 |             ],
43 |         ),
44 |     ),
45 |     TEST=dict(
46 |         EVAL_PEROID=10000,
47 |     ),
48 |     OUTPUT_DIR="output"
49 | )
50 | 
51 | 
52 | class PointRendRCNNConfig(PointRendRCNNFPNConfig):
53 |     def __init__(self):
54 |         super(PointRendRCNNConfig, self).__init__()
55 |         self._register_configuration(_config_dict)
56 | 
57 | 
58 | config = PointRendRCNNConfig()
59 | 


--------------------------------------------------------------------------------
/playground/detection/cityscapes/pointrend/point_rend_mask_rcnn_r101/net.py:
--------------------------------------------------------------------------------
 1 | from cvpods.layers import ShapeSpec
 2 | from cvpods.modeling.backbone import Backbone
 3 | from cvpods.modeling.backbone.fpn import build_resnet_fpn_backbone
 4 | from cvpods.modeling.proposal_generator import RPN
 5 | from cvpods.modeling.roi_heads.box_head import FastRCNNConvFCHead
 6 | from cvpods.modeling.meta_arch.rcnn import GeneralizedRCNN
 7 | from cvpods.modeling.meta_arch.pointrend import PointRendROIHeads, CoarseMaskHead, StandardPointHead
 8 | 
 9 | 
10 | def build_backbone(cfg, input_shape=None):
11 |     if input_shape is None:
12 |         input_shape = ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN))
13 |     backbone = build_resnet_fpn_backbone(cfg, input_shape)
14 |     assert isinstance(backbone, Backbone)
15 |     return backbone
16 | 
17 | 
18 | def build_proposal_generator(cfg, input_shape):
19 |     return RPN(cfg, input_shape)
20 | 
21 | 
22 | def build_roi_heads(cfg, input_shape):
23 |     return PointRendROIHeads(cfg, input_shape)
24 | 
25 | 
26 | def build_box_head(cfg, input_shape):
27 |     return FastRCNNConvFCHead(cfg, input_shape)
28 | 
29 | 
30 | def build_point_head(cfg, input_shape):
31 |     return StandardPointHead(cfg, input_shape)
32 | 
33 | 
34 | def build_mask_head(cfg, input_shape):
35 |     return CoarseMaskHead(cfg, input_shape)
36 | 
37 | 
38 | def build_model(cfg):
39 |     cfg.build_backbone = build_backbone
40 |     cfg.build_proposal_generator = build_proposal_generator
41 |     cfg.build_roi_heads = build_roi_heads
42 |     cfg.build_box_head = build_box_head
43 |     cfg.build_mask_head = build_mask_head
44 |     cfg.build_point_head = build_point_head
45 | 
46 |     model = GeneralizedRCNN(cfg)
47 | 
48 |     return model
49 | 


--------------------------------------------------------------------------------
/playground/detection/cityscapes/pointrend/point_rend_mask_rcnn_r50/config.py:
--------------------------------------------------------------------------------
 1 | import os.path as osp
 2 | 
 3 | from cvpods.configs.pointrend_config import PointRendRCNNFPNConfig
 4 | 
 5 | _config_dict = dict(
 6 |     MODEL=dict(
 7 |         WEIGHTS="/mnt/lustreold/share_data/lixiangtai/pretrained/R-50.pkl",
 8 |         MASK_ON=True,
 9 |         RESNETS=dict(DEPTH=50),
10 |         ROI_HEADS=dict(NUM_CLASSES=8),
11 |         POINT_HEAD=dict(
12 |             NUM_CLASSES=8,
13 |         ),
14 |     ),
15 |     DATASETS=dict(
16 |         TRAIN=("cityscapes_fine_instance_seg_train",),
17 |         TEST=("cityscapes_fine_instance_seg_test",),
18 |     ),
19 |     SOLVER=dict(
20 |         IMS_PER_BATCH=8,
21 |         IMS_PER_DEVICE=1,
22 |         LR_SCHEDULER=dict(
23 |             STEPS=(18000,),
24 |             MAX_ITER=24000,
25 |         ),
26 |         OPTIMIZER=dict(
27 |             BASE_LR=0.01,
28 |         ),
29 |         CHECKPOINT_PERIOD=8000,
30 |     ),
31 |     INPUT=dict(
32 |         AUG=dict(
33 |             TRAIN_PIPELINES=[
34 |                 ("ResizeShortestEdge", dict(
35 |                     short_edge_length=(800, 832, 864, 896, 928, 960, 992, 1024),
36 |                     max_size=2048, sample_style="choice")),
37 |                 ("RandomFlip", dict()),
38 |             ],
39 |             TEST_PIPELINES=[
40 |                 ("ResizeShortestEdge", dict(
41 |                     short_edge_length=1024, max_size=2048, sample_style="choice")),
42 |             ],
43 |         ),
44 |     ),
45 |     TEST=dict(
46 |         EVAL_PEROID=10000,
47 |     ),
48 |     OUTPUT_DIR="output"
49 | )
50 | 
51 | 
52 | class PointRendRCNNConfig(PointRendRCNNFPNConfig):
53 |     def __init__(self):
54 |         super(PointRendRCNNConfig, self).__init__()
55 |         self._register_configuration(_config_dict)
56 | 
57 | 
58 | config = PointRendRCNNConfig()
59 | 


--------------------------------------------------------------------------------
/playground/detection/cityscapes/pointrend/point_rend_mask_rcnn_r50/net.py:
--------------------------------------------------------------------------------
 1 | from cvpods.layers import ShapeSpec
 2 | from cvpods.modeling.backbone import Backbone
 3 | from cvpods.modeling.backbone.fpn import build_resnet_fpn_backbone
 4 | from cvpods.modeling.proposal_generator import RPN
 5 | from cvpods.modeling.roi_heads.box_head import FastRCNNConvFCHead
 6 | from cvpods.modeling.meta_arch.rcnn import GeneralizedRCNN
 7 | from cvpods.modeling.meta_arch.pointrend import PointRendROIHeads, CoarseMaskHead, StandardPointHead
 8 | 
 9 | 
10 | def build_backbone(cfg, input_shape=None):
11 |     if input_shape is None:
12 |         input_shape = ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN))
13 |     backbone = build_resnet_fpn_backbone(cfg, input_shape)
14 |     assert isinstance(backbone, Backbone)
15 |     return backbone
16 | 
17 | 
18 | def build_proposal_generator(cfg, input_shape):
19 |     return RPN(cfg, input_shape)
20 | 
21 | 
22 | def build_roi_heads(cfg, input_shape):
23 |     return PointRendROIHeads(cfg, input_shape)
24 | 
25 | 
26 | def build_box_head(cfg, input_shape):
27 |     return FastRCNNConvFCHead(cfg, input_shape)
28 | 
29 | 
30 | def build_point_head(cfg, input_shape):
31 |     return StandardPointHead(cfg, input_shape)
32 | 
33 | 
34 | def build_mask_head(cfg, input_shape):
35 |     return CoarseMaskHead(cfg, input_shape)
36 | 
37 | 
38 | def build_model(cfg):
39 |     cfg.build_backbone = build_backbone
40 |     cfg.build_proposal_generator = build_proposal_generator
41 |     cfg.build_roi_heads = build_roi_heads
42 |     cfg.build_box_head = build_box_head
43 |     cfg.build_mask_head = build_mask_head
44 |     cfg.build_point_head = build_point_head
45 | 
46 |     model = GeneralizedRCNN(cfg)
47 | 
48 |     return model
49 | 


--------------------------------------------------------------------------------
/playground/detection/cityscapes/rcnn/mask_rcnn_res101_fpn_coco_ms/README.md:
--------------------------------------------------------------------------------
1 | # Mask-RCNN
2 | ## Evaluation results for bbox:  
3 | ```  
4 |  Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.655
5 | ```  
6 | 
7 | 


--------------------------------------------------------------------------------
/playground/detection/cityscapes/rcnn/mask_rcnn_res101_fpn_coco_ms/config.py:
--------------------------------------------------------------------------------
 1 | import os.path as osp
 2 | 
 3 | from cvpods.configs.rcnn_fpn_config import RCNNFPNConfig
 4 | 
 5 | _config_dict = dict(
 6 |     MODEL=dict(
 7 |         WEIGHTS="/mnt/lustreold/share_data/lixiangtai/pretrained/R-101.pkl",
 8 |         MASK_ON=True,
 9 |         RESNETS=dict(DEPTH=101),
10 |         ROI_HEADS=dict(NUM_CLASSES=8),
11 |     ),
12 |     DATASETS=dict(
13 |         TRAIN=("cityscapes_fine_instance_seg_train",),
14 |         TEST=("cityscapes_fine_instance_seg_test",),
15 |     ),
16 |     SOLVER=dict(
17 |         IMS_PER_BATCH=8,
18 |         IMS_PER_DEVICE=1,
19 |         LR_SCHEDULER=dict(
20 |             STEPS=(18000,),
21 |             MAX_ITER=24000,
22 |         ),
23 |         OPTIMIZER=dict(
24 |             BASE_LR=0.01,
25 |         ),
26 |         CHECKPOINT_PERIOD=8000,
27 |     ),
28 |     INPUT=dict(
29 |         AUG=dict(
30 |             TRAIN_PIPELINES=[
31 |                 ("ResizeShortestEdge", dict(
32 |                     short_edge_length=(800, 832, 864, 896, 928, 960, 992, 1024),
33 |                     max_size=2048, sample_style="choice")),
34 |                 ("RandomFlip", dict()),
35 |             ],
36 |             TEST_PIPELINES=[
37 |                 ("ResizeShortestEdge", dict(
38 |                     short_edge_length=1024, max_size=2048, sample_style="choice")),
39 |             ],
40 |         ),
41 |     ),
42 |     TEST=dict(
43 |         EVAL_PEROID=10000,
44 |     ),
45 |     OUTPUT_DIR="output",
46 | )
47 | 
48 | 
49 | class FasterRCNNConfig(RCNNFPNConfig):
50 |     def __init__(self):
51 |         super(FasterRCNNConfig, self).__init__()
52 |         self._register_configuration(_config_dict)
53 | 
54 | 
55 | config = FasterRCNNConfig()
56 | 


--------------------------------------------------------------------------------
/playground/detection/cityscapes/rcnn/mask_rcnn_res101_fpn_coco_ms/net.py:
--------------------------------------------------------------------------------
 1 | from cvpods.layers import ShapeSpec
 2 | from cvpods.modeling.backbone import Backbone
 3 | from cvpods.modeling.backbone.fpn import build_resnet_fpn_backbone
 4 | from cvpods.modeling.proposal_generator import RPN
 5 | from cvpods.modeling.roi_heads.box_head import FastRCNNConvFCHead
 6 | from cvpods.modeling.roi_heads import StandardROIHeads
 7 | from cvpods.modeling.meta_arch.rcnn import GeneralizedRCNN
 8 | from cvpods.modeling.roi_heads.mask_head import MaskRCNNConvUpsampleHead
 9 | 
10 | 
11 | def build_backbone(cfg, input_shape=None):
12 |     if input_shape is None:
13 |         input_shape = ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN))
14 |     backbone = build_resnet_fpn_backbone(cfg, input_shape)
15 |     assert isinstance(backbone, Backbone)
16 |     return backbone
17 | 
18 | 
19 | def build_proposal_generator(cfg, input_shape):
20 |     return RPN(cfg, input_shape)
21 | 
22 | 
23 | def build_roi_heads(cfg, input_shape):
24 |     return StandardROIHeads(cfg, input_shape)
25 | 
26 | 
27 | def build_box_head(cfg, input_shape):
28 |     return FastRCNNConvFCHead(cfg, input_shape)
29 | 
30 | 
31 | def build_mask_head(cfg, input_shape):
32 |     return MaskRCNNConvUpsampleHead(cfg, input_shape)
33 | 
34 | 
35 | def build_model(cfg):
36 |     cfg.build_backbone = build_backbone
37 |     cfg.build_proposal_generator = build_proposal_generator
38 |     cfg.build_roi_heads = build_roi_heads
39 |     cfg.build_box_head = build_box_head
40 |     cfg.build_mask_head = build_mask_head
41 | 
42 |     model = GeneralizedRCNN(cfg)
43 |     return model
44 | 


--------------------------------------------------------------------------------
/playground/detection/cityscapes/rcnn/mask_rcnn_res50_fpn_coco_ms/README.md:
--------------------------------------------------------------------------------
1 | # Mask-RCNN
2 | ## Evaluation results for bbox:  
3 | ```  
4 |  Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.655
5 | ```  
6 | 
7 | 


--------------------------------------------------------------------------------
/playground/detection/cityscapes/rcnn/mask_rcnn_res50_fpn_coco_ms/config.py:
--------------------------------------------------------------------------------
 1 | import os.path as osp
 2 | 
 3 | from cvpods.configs.rcnn_fpn_config import RCNNFPNConfig
 4 | 
 5 | _config_dict = dict(
 6 |     MODEL=dict(
 7 |         WEIGHTS="/mnt/lustreold/share_data/lixiangtai/pretrained/R-50.pkl",
 8 |         MASK_ON=True,
 9 |         RESNETS=dict(DEPTH=50),
10 |         ROI_HEADS=dict(NUM_CLASSES=8),
11 |     ),
12 |     DATASETS=dict(
13 |         TRAIN=("cityscapes_fine_instance_seg_train",),
14 |         TEST=("cityscapes_fine_instance_seg_test",),
15 |     ),
16 |     SOLVER=dict(
17 |         IMS_PER_BATCH=8,
18 |         IMS_PER_DEVICE=1,
19 |         LR_SCHEDULER=dict(
20 |             STEPS=(18000,),
21 |             MAX_ITER=24000,
22 |         ),
23 |         OPTIMIZER=dict(
24 |             BASE_LR=0.01,
25 |         ),
26 |         CHECKPOINT_PERIOD=8000,
27 |     ),
28 |     INPUT=dict(
29 |         AUG=dict(
30 |             TRAIN_PIPELINES=[
31 |                 ("ResizeShortestEdge", dict(
32 |                     short_edge_length=(800, 832, 864, 896, 928, 960, 992, 1024),
33 |                     max_size=2048, sample_style="choice")),
34 |                 ("RandomFlip", dict()),
35 |             ],
36 |             TEST_PIPELINES=[
37 |                 ("ResizeShortestEdge", dict(
38 |                     short_edge_length=1024, max_size=2048, sample_style="choice")),
39 |             ],
40 |         ),
41 |     ),
42 |     TEST=dict(
43 |         EVAL_PEROID=10000,
44 |     ),
45 |     OUTPUT_DIR="output",
46 | )
47 | 
48 | 
49 | class FasterRCNNConfig(RCNNFPNConfig):
50 |     def __init__(self):
51 |         super(FasterRCNNConfig, self).__init__()
52 |         self._register_configuration(_config_dict)
53 | 
54 | 
55 | config = FasterRCNNConfig()
56 | 


--------------------------------------------------------------------------------
/playground/detection/cityscapes/rcnn/mask_rcnn_res50_fpn_coco_ms/net.py:
--------------------------------------------------------------------------------
 1 | from cvpods.layers import ShapeSpec
 2 | from cvpods.modeling.backbone import Backbone
 3 | from cvpods.modeling.backbone.fpn import build_resnet_fpn_backbone
 4 | from cvpods.modeling.proposal_generator import RPN
 5 | from cvpods.modeling.roi_heads.box_head import FastRCNNConvFCHead
 6 | from cvpods.modeling.roi_heads import StandardROIHeads
 7 | from cvpods.modeling.meta_arch.rcnn import GeneralizedRCNN
 8 | from cvpods.modeling.roi_heads.mask_head import MaskRCNNConvUpsampleHead
 9 | 
10 | 
11 | def build_backbone(cfg, input_shape=None):
12 |     if input_shape is None:
13 |         input_shape = ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN))
14 |     backbone = build_resnet_fpn_backbone(cfg, input_shape)
15 |     assert isinstance(backbone, Backbone)
16 |     return backbone
17 | 
18 | 
19 | def build_proposal_generator(cfg, input_shape):
20 |     return RPN(cfg, input_shape)
21 | 
22 | 
23 | def build_roi_heads(cfg, input_shape):
24 |     return StandardROIHeads(cfg, input_shape)
25 | 
26 | 
27 | def build_box_head(cfg, input_shape):
28 |     return FastRCNNConvFCHead(cfg, input_shape)
29 | 
30 | 
31 | def build_mask_head(cfg, input_shape):
32 |     return MaskRCNNConvUpsampleHead(cfg, input_shape)
33 | 
34 | 
35 | def build_model(cfg):
36 |     cfg.build_backbone = build_backbone
37 |     cfg.build_proposal_generator = build_proposal_generator
38 |     cfg.build_roi_heads = build_roi_heads
39 |     cfg.build_box_head = build_box_head
40 |     cfg.build_mask_head = build_mask_head
41 | 
42 |     model = GeneralizedRCNN(cfg)
43 |     return model
44 | 


--------------------------------------------------------------------------------
/playground/detection/cityscapes/rcnn/mask_rcnn_rx101_fpn_coco_ms/README.md:
--------------------------------------------------------------------------------
1 | # Mask-RCNN
2 | ## Evaluation results for bbox:  
3 | ```  
4 |  Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.655
5 | ```  
6 | 
7 | 


--------------------------------------------------------------------------------
/playground/detection/cityscapes/rcnn/mask_rcnn_rx101_fpn_coco_ms/config.py:
--------------------------------------------------------------------------------
 1 | import os.path as osp
 2 | 
 3 | from cvpods.configs.rcnn_fpn_config import RCNNFPNConfig
 4 | 
 5 | _config_dict = dict(
 6 |     MODEL=dict(
 7 |         WEIGHTS="/mnt/lustreold/share_data/lixiangtai/pretrained/X-101-64x4d.pkl",
 8 |         MASK_ON=True,
 9 |         RESNETS=dict(
10 |                 DEPTH=101,
11 |                 NUM_GROUPS=64,
12 |                 WIDTH_PER_GROUP=4,
13 |                 STRIDE_IN_1X1=False),
14 |         ROI_HEADS=dict(NUM_CLASSES=8),
15 |     ),
16 |     DATASETS=dict(
17 |         TRAIN=("cityscapes_fine_instance_seg_train",),
18 |         TEST=("cityscapes_fine_instance_seg_test",),
19 |     ),
20 |     SOLVER=dict(
21 |         IMS_PER_BATCH=8,
22 |         IMS_PER_DEVICE=1,
23 |         LR_SCHEDULER=dict(
24 |             STEPS=(18000,),
25 |             MAX_ITER=24000,
26 |         ),
27 |         OPTIMIZER=dict(
28 |             BASE_LR=0.01,
29 |         ),
30 |         CHECKPOINT_PERIOD=8000,
31 |     ),
32 |     INPUT=dict(
33 |         AUG=dict(
34 |             TRAIN_PIPELINES=[
35 |                 ("ResizeShortestEdge", dict(
36 |                     short_edge_length=(800, 832, 864, 896, 928, 960, 992, 1024),
37 |                     max_size=2048, sample_style="choice")),
38 |                 ("RandomFlip", dict()),
39 |             ],
40 |             TEST_PIPELINES=[
41 |                 ("ResizeShortestEdge", dict(
42 |                     short_edge_length=1024, max_size=2048, sample_style="choice")),
43 |             ],
44 |         ),
45 |     ),
46 |     TEST=dict(
47 |         EVAL_PEROID=10000,
48 |     ),
49 |     OUTPUT_DIR="output",
50 | )
51 | 
52 | 
53 | class FasterRCNNConfig(RCNNFPNConfig):
54 |     def __init__(self):
55 |         super(FasterRCNNConfig, self).__init__()
56 |         self._register_configuration(_config_dict)
57 | 
58 | 
59 | config = FasterRCNNConfig()
60 | 


--------------------------------------------------------------------------------
/playground/detection/cityscapes/rcnn/mask_rcnn_rx101_fpn_coco_ms/net.py:
--------------------------------------------------------------------------------
 1 | from cvpods.layers import ShapeSpec
 2 | from cvpods.modeling.backbone import Backbone
 3 | from cvpods.modeling.backbone.fpn import build_resnet_fpn_backbone
 4 | from cvpods.modeling.proposal_generator import RPN
 5 | from cvpods.modeling.roi_heads.box_head import FastRCNNConvFCHead
 6 | from cvpods.modeling.roi_heads import StandardROIHeads
 7 | from cvpods.modeling.meta_arch.rcnn import GeneralizedRCNN
 8 | from cvpods.modeling.roi_heads.mask_head import MaskRCNNConvUpsampleHead
 9 | 
10 | 
11 | def build_backbone(cfg, input_shape=None):
12 |     if input_shape is None:
13 |         input_shape = ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN))
14 |     backbone = build_resnet_fpn_backbone(cfg, input_shape)
15 |     assert isinstance(backbone, Backbone)
16 |     return backbone
17 | 
18 | 
19 | def build_proposal_generator(cfg, input_shape):
20 |     return RPN(cfg, input_shape)
21 | 
22 | 
23 | def build_roi_heads(cfg, input_shape):
24 |     return StandardROIHeads(cfg, input_shape)
25 | 
26 | 
27 | def build_box_head(cfg, input_shape):
28 |     return FastRCNNConvFCHead(cfg, input_shape)
29 | 
30 | 
31 | def build_mask_head(cfg, input_shape):
32 |     return MaskRCNNConvUpsampleHead(cfg, input_shape)
33 | 
34 | 
35 | def build_model(cfg):
36 |     cfg.build_backbone = build_backbone
37 |     cfg.build_proposal_generator = build_proposal_generator
38 |     cfg.build_roi_heads = build_roi_heads
39 |     cfg.build_box_head = build_box_head
40 |     cfg.build_mask_head = build_mask_head
41 | 
42 |     model = GeneralizedRCNN(cfg)
43 |     return model
44 | 


--------------------------------------------------------------------------------
/playground/detection/coco/bs_mask/boundary_refine_mask_rcnn_r101_ms_1x_3_subgt_warpping_dice_erode_dilate_gn/config.py:
--------------------------------------------------------------------------------
 1 | import os.path as osp
 2 | 
 3 | from cvpods.configs.rcnn_fpn_config import RCNNFPNConfig
 4 | 
 5 | _config_dict = dict(
 6 |     MODEL=dict(
 7 |         WEIGHTS="/mnt/lustreold/share_data/lixiangtai/pretrained/R-101.pkl",
 8 |         MASK_ON=True,
 9 |         RESNETS=dict(DEPTH=101),
10 |     BOUNDARY_MASK_HEAD=dict(
11 |         OUTPUT_RATIO=1,
12 |         POOLER_RESOLUTION=28,
13 |         IN_FEATURES=["p2"],
14 |         NUM_CONV=2),
15 |         ROI_MASK_HEAD=dict(
16 |             CEMODULE=dict(
17 |                 NUM_CONV=2,
18 |                 PLANES=256,
19 |                 DCN_ON=True,
20 |                 DCN_V2=True,
21 |                 NUM_EDGE_CONV=2,
22 |                 FUSE_MODE="Add",
23 |                 WITH_EDGE_REFINE=True,
24 |                 NORM='GN',
25 |                 KERNEL_SIZE=5
26 |             ),
27 |             LOSS_WEIGHT=[1.0, 1.0, 1.0, 1.0]
28 |         ),
29 |     ),
30 |     DATASETS=dict(
31 |         TRAIN=("coco_2017_train",),
32 |         TEST=("coco_2017_val",),
33 |     ),
34 |     SOLVER=dict(
35 |         LR_SCHEDULER=dict(
36 |             STEPS=(60000, 80000),
37 |             MAX_ITER=90000,
38 |         ),
39 |         OPTIMIZER=dict(
40 |             BASE_LR=0.02,
41 |         ),
42 |         IMS_PER_BATCH=16,
43 |         IMS_PER_DEVICE=2,
44 |         CHECKPOINT_PERIOD=30000,
45 |     ),
46 |     INPUT=dict(
47 |         AUG=dict(
48 |             TRAIN_PIPELINES=[
49 |                 ("ResizeShortestEdge",
50 |                  dict(short_edge_length=(640, 672, 704, 736, 768, 800),
51 |                       max_size=1333, sample_style="choice")),
52 |                 ("RandomFlip", dict()),
53 |             ],
54 |             TEST_PIPELINES=[
55 |                 ("ResizeShortestEdge",
56 |                  dict(short_edge_length=800, max_size=1333, sample_style="choice")),
57 |             ],
58 |         )
59 |     ),
60 |     TEST=dict(
61 |         EVAL_PEROID=10000,
62 |     ),
63 |     OUTPUT_DIR="output"
64 | )
65 | 
66 | 
67 | class FasterRCNNConfig(RCNNFPNConfig):
68 |     def __init__(self):
69 |         super(FasterRCNNConfig, self).__init__()
70 |         self._register_configuration(_config_dict)
71 | 
72 | 
73 | config = FasterRCNNConfig()
74 | 


--------------------------------------------------------------------------------
/playground/detection/coco/bs_mask/boundary_refine_mask_rcnn_r101_ms_1x_3_subgt_warpping_dice_erode_dilate_gn/net.py:
--------------------------------------------------------------------------------
 1 | from cvpods.layers import ShapeSpec
 2 | from cvpods.modeling.backbone import Backbone
 3 | from cvpods.modeling.backbone.fpn import build_resnet_fpn_backbone
 4 | from cvpods.modeling.proposal_generator import RPN
 5 | from box_head import FastRCNNConvFCHead
 6 | from boundary_mask_rcnn import BoundaryROIHeads, DecoupledBoundaryMaskHead
 7 | from rcnn import GeneralizedRCNN
 8 | 
 9 | 
10 | def build_backbone(cfg, input_shape=None):
11 |     if input_shape is None:
12 |         input_shape = ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN))
13 |     backbone = build_resnet_fpn_backbone(cfg, input_shape)
14 |     assert isinstance(backbone, Backbone)
15 |     return backbone
16 | 
17 | 
18 | def build_proposal_generator(cfg, input_shape):
19 |     return RPN(cfg, input_shape)
20 | 
21 | 
22 | def build_roi_heads(cfg, input_shape):
23 |     return BoundaryROIHeads(cfg, input_shape)
24 | 
25 | 
26 | def build_box_head(cfg, input_shape):
27 |     return FastRCNNConvFCHead(cfg, input_shape)
28 | 
29 | 
30 | def build_mask_head(cfg, input_shape):
31 |     return DecoupledBoundaryMaskHead(cfg, input_shape)
32 | 
33 | 
34 | def build_model(cfg):
35 |     cfg.build_backbone = build_backbone
36 |     cfg.build_proposal_generator = build_proposal_generator
37 |     cfg.build_roi_heads = build_roi_heads
38 |     cfg.build_box_head = build_box_head
39 |     cfg.build_mask_head = build_mask_head
40 | 
41 |     model = GeneralizedRCNN(cfg)
42 |     return model
43 | 


--------------------------------------------------------------------------------
/playground/detection/coco/bs_mask/boundary_refine_mask_rcnn_r50_ms_1x_3_subgt_warpping_dice_erode_dilate_gn/config.py:
--------------------------------------------------------------------------------
 1 | import os.path as osp
 2 | 
 3 | from cvpods.configs.rcnn_fpn_config import RCNNFPNConfig
 4 | 
 5 | _config_dict = dict(
 6 |     MODEL=dict(
 7 |         WEIGHTS="/data/data1/pretrained/R-50.pkl",
 8 |         MASK_ON=True,
 9 |         RESNETS=dict(DEPTH=50),
10 |     BOUNDARY_MASK_HEAD=dict(
11 |         OUTPUT_RATIO=1,
12 |         POOLER_RESOLUTION=28,
13 |         IN_FEATURES=["p2"],
14 |         NUM_CONV=2),
15 |         ROI_MASK_HEAD=dict(
16 |             CEMODULE=dict(
17 |                 NUM_CONV=2,
18 |                 PLANES=256,
19 |                 DCN_ON=True,
20 |                 DCN_V2=True,
21 |                 NUM_EDGE_CONV=2,
22 |                 FUSE_MODE="Add",
23 |                 WITH_EDGE_REFINE=True,
24 |                 NORM='GN',
25 |                 KERNEL_SIZE=5
26 |             ),
27 |             LOSS_WEIGHT=[1.0, 1.0, 1.0, 1.0]
28 |         ),
29 |     ),
30 |     DATASETS=dict(
31 |         TRAIN=("coco_2017_train",),
32 |         TEST=("coco_2017_val",),
33 |     ),
34 |     SOLVER=dict(
35 |         LR_SCHEDULER=dict(
36 |             STEPS=(60000, 80000),
37 |             MAX_ITER=90000,
38 |         ),
39 |         OPTIMIZER=dict(
40 |             BASE_LR=0.02,
41 |         ),
42 |         IMS_PER_BATCH=16,
43 |         IMS_PER_DEVICE=2,
44 |         CHECKPOINT_PERIOD=30000,
45 |     ),
46 |     INPUT=dict(
47 |         AUG=dict(
48 |             TRAIN_PIPELINES=[
49 |                 ("ResizeShortestEdge",
50 |                  dict(short_edge_length=(640, 672, 704, 736, 768, 800),
51 |                       max_size=1333, sample_style="choice")),
52 |                 ("RandomFlip", dict()),
53 |             ],
54 |             TEST_PIPELINES=[
55 |                 ("ResizeShortestEdge",
56 |                  dict(short_edge_length=800, max_size=1333, sample_style="choice")),
57 |             ],
58 |         )
59 |     ),
60 |     TEST=dict(
61 |         EVAL_PEROID=10000,
62 |     ),
63 |     OUTPUT_DIR="output"
64 | )
65 | 
66 | 
67 | class FasterRCNNConfig(RCNNFPNConfig):
68 |     def __init__(self):
69 |         super(FasterRCNNConfig, self).__init__()
70 |         self._register_configuration(_config_dict)
71 | 
72 | 
73 | config = FasterRCNNConfig()
74 | 


--------------------------------------------------------------------------------
/playground/detection/coco/bs_mask/boundary_refine_mask_rcnn_r50_ms_1x_3_subgt_warpping_dice_erode_dilate_gn/net.py:
--------------------------------------------------------------------------------
 1 | from cvpods.layers import ShapeSpec
 2 | from cvpods.modeling.backbone import Backbone
 3 | from cvpods.modeling.backbone.fpn import build_resnet_fpn_backbone
 4 | from cvpods.modeling.proposal_generator import RPN
 5 | from box_head import FastRCNNConvFCHead
 6 | from boundary_mask_rcnn import BoundaryROIHeads, DecoupledBoundaryMaskHead
 7 | from rcnn import GeneralizedRCNN
 8 | 
 9 | 
10 | def build_backbone(cfg, input_shape=None):
11 |     if input_shape is None:
12 |         input_shape = ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN))
13 |     backbone = build_resnet_fpn_backbone(cfg, input_shape)
14 |     assert isinstance(backbone, Backbone)
15 |     return backbone
16 | 
17 | 
18 | def build_proposal_generator(cfg, input_shape):
19 |     return RPN(cfg, input_shape)
20 | 
21 | 
22 | def build_roi_heads(cfg, input_shape):
23 |     return BoundaryROIHeads(cfg, input_shape)
24 | 
25 | 
26 | def build_box_head(cfg, input_shape):
27 |     return FastRCNNConvFCHead(cfg, input_shape)
28 | 
29 | 
30 | def build_mask_head(cfg, input_shape):
31 |     return DecoupledBoundaryMaskHead(cfg, input_shape)
32 | 
33 | 
34 | def build_model(cfg):
35 |     cfg.build_backbone = build_backbone
36 |     cfg.build_proposal_generator = build_proposal_generator
37 |     cfg.build_roi_heads = build_roi_heads
38 |     cfg.build_box_head = build_box_head
39 |     cfg.build_mask_head = build_mask_head
40 | 
41 |     model = GeneralizedRCNN(cfg)
42 |     return model
43 | 


--------------------------------------------------------------------------------
/playground/detection/coco/bs_mask/cascade/boundary_refine_mask_rcnn_r50_ms_1x_3_subgt_warpping_dice_erode_dilate_gn_add_cascade/config.py:
--------------------------------------------------------------------------------
 1 | import os.path as osp
 2 | 
 3 | from cvpods.configs.rcnn_fpn_config import RCNNFPNConfig
 4 | 
 5 | _config_dict = dict(
 6 |     MODEL=dict(
 7 |         WEIGHTS="/mnt/lustreold/share_data/lixiangtai/pretrained/R-50.pkl",
 8 |         MASK_ON=True,
 9 |         RESNETS=dict(DEPTH=50),
10 |     BOUNDARY_MASK_HEAD=dict(
11 |         OUTPUT_RATIO=1,
12 |         POOLER_RESOLUTION=28,
13 |         IN_FEATURES=["p2"],
14 |         NUM_CONV=2),
15 |         ROI_BOX_HEAD=dict(
16 |             CLS_AGNOSTIC_BBOX_REG=True,
17 |         ),
18 |         ROI_MASK_HEAD=dict(
19 |             CEMODULE=dict(
20 |                 NUM_CONV=2,
21 |                 PLANES=256,
22 |                 DCN_ON=True,
23 |                 DCN_V2=True,
24 |                 NUM_EDGE_CONV=2,
25 |                 FUSE_MODE="Add",
26 |                 WITH_EDGE_REFINE=True,
27 |                 NORM='GN',
28 |                 KERNEL_SIZE=5
29 |             ),
30 |             LOSS_WEIGHT=[1.0, 1.0, 1.0, 1.0]
31 |         ),
32 |     ),
33 |     DATASETS=dict(
34 |         TRAIN=("coco_2017_train",),
35 |         TEST=("coco_2017_val",),
36 |     ),
37 |     SOLVER=dict(
38 |         LR_SCHEDULER=dict(
39 |             STEPS=(60000, 80000),
40 |             MAX_ITER=90000,
41 |         ),
42 |         OPTIMIZER=dict(
43 |             BASE_LR=0.02,
44 |         ),
45 |         IMS_PER_BATCH=16,
46 |         IMS_PER_DEVICE=2,
47 |         CHECKPOINT_PERIOD=30000,
48 |     ),
49 |     INPUT=dict(
50 |         AUG=dict(
51 |             TRAIN_PIPELINES=[
52 |                 ("ResizeShortestEdge",
53 |                  dict(short_edge_length=(640, 672, 704, 736, 768, 800),
54 |                       max_size=1333, sample_style="choice")),
55 |                 ("RandomFlip", dict()),
56 |             ],
57 |             TEST_PIPELINES=[
58 |                 ("ResizeShortestEdge",
59 |                  dict(short_edge_length=800, max_size=1333, sample_style="choice")),
60 |             ],
61 |         )
62 |     ),
63 |     TEST=dict(
64 |         EVAL_PEROID=10000,
65 |     ),
66 |     OUTPUT_DIR="output"
67 | )
68 | 
69 | 
70 | class FasterRCNNConfig(RCNNFPNConfig):
71 |     def __init__(self):
72 |         super(FasterRCNNConfig, self).__init__()
73 |         self._register_configuration(_config_dict)
74 | 
75 | 
76 | config = FasterRCNNConfig()
77 | 


--------------------------------------------------------------------------------
/playground/detection/coco/bs_mask/cascade/boundary_refine_mask_rcnn_r50_ms_1x_3_subgt_warpping_dice_erode_dilate_gn_add_cascade/net.py:
--------------------------------------------------------------------------------
 1 | from cvpods.layers import ShapeSpec
 2 | from cvpods.modeling.backbone import Backbone
 3 | from cvpods.modeling.backbone.fpn import build_resnet_fpn_backbone
 4 | from cvpods.modeling.proposal_generator import RPN
 5 | from box_head import FastRCNNConvFCHead
 6 | from boundary_mask_rcnn import CascadeBoundaryROIHeads, DecoupledBoundaryMaskHead
 7 | from rcnn import GeneralizedRCNN
 8 | 
 9 | 
10 | def build_backbone(cfg, input_shape=None):
11 |     if input_shape is None:
12 |         input_shape = ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN))
13 |     backbone = build_resnet_fpn_backbone(cfg, input_shape)
14 |     assert isinstance(backbone, Backbone)
15 |     return backbone
16 | 
17 | 
18 | def build_proposal_generator(cfg, input_shape):
19 |     return RPN(cfg, input_shape)
20 | 
21 | 
22 | def build_roi_heads(cfg, input_shape):
23 |     return CascadeBoundaryROIHeads(cfg, input_shape)
24 | 
25 | 
26 | def build_box_head(cfg, input_shape):
27 |     return FastRCNNConvFCHead(cfg, input_shape)
28 | 
29 | 
30 | def build_mask_head(cfg, input_shape):
31 |     return DecoupledBoundaryMaskHead(cfg, input_shape)
32 | 
33 | 
34 | def build_model(cfg):
35 |     cfg.build_backbone = build_backbone
36 |     cfg.build_proposal_generator = build_proposal_generator
37 |     cfg.build_roi_heads = build_roi_heads
38 |     cfg.build_box_head = build_box_head
39 |     cfg.build_mask_head = build_mask_head
40 | 
41 |     model = GeneralizedRCNN(cfg)
42 |     return model
43 | 


--------------------------------------------------------------------------------
/playground/detection/coco/bs_mask/cascade/boundary_refine_mask_rcnn_r50_ms_3x_3_subgt_warpping_dice_erode_dilate_gn_add_cascade/config.py:
--------------------------------------------------------------------------------
 1 | import os.path as osp
 2 | 
 3 | from cvpods.configs.rcnn_fpn_config import RCNNFPNConfig
 4 | 
 5 | _config_dict = dict(
 6 |     MODEL=dict(
 7 |         WEIGHTS="/mnt/lustreold/share_data/lixiangtai/pretrained/R-50.pkl",
 8 |         MASK_ON=True,
 9 |         RESNETS=dict(DEPTH=50),
10 |     BOUNDARY_MASK_HEAD=dict(
11 |         OUTPUT_RATIO=1,
12 |         POOLER_RESOLUTION=28,
13 |         IN_FEATURES=["p2"],
14 |         NUM_CONV=2),
15 |         ROI_BOX_HEAD=dict(
16 |             CLS_AGNOSTIC_BBOX_REG=True,
17 |         ),
18 |         ROI_MASK_HEAD=dict(
19 |             CEMODULE=dict(
20 |                 NUM_CONV=2,
21 |                 PLANES=256,
22 |                 DCN_ON=True,
23 |                 DCN_V2=True,
24 |                 NUM_EDGE_CONV=2,
25 |                 FUSE_MODE="Add",
26 |                 WITH_EDGE_REFINE=True,
27 |                 NORM='GN',
28 |                 KERNEL_SIZE=5
29 |             ),
30 |             LOSS_WEIGHT=[1.0, 1.0, 1.0, 1.0]
31 |         ),
32 |     ),
33 |     DATASETS=dict(
34 |         TRAIN=("coco_2017_train",),
35 |         TEST=("coco_2017_val",),
36 |     ),
37 |     SOLVER=dict(
38 |         LR_SCHEDULER=dict(
39 |             STEPS=(210000, 250000),
40 |             MAX_ITER=270000,
41 |         ),
42 |         OPTIMIZER=dict(
43 |             BASE_LR=0.02,
44 |         ),
45 |         IMS_PER_BATCH=16,
46 |         IMS_PER_DEVICE=2,
47 |         CHECKPOINT_PERIOD=30000,
48 |     ),
49 |     INPUT=dict(
50 |         AUG=dict(
51 |             TRAIN_PIPELINES=[
52 |                 ("ResizeShortestEdge",
53 |                  dict(short_edge_length=(640, 672, 704, 736, 768, 800),
54 |                       max_size=1333, sample_style="choice")),
55 |                 ("RandomFlip", dict()),
56 |             ],
57 |             TEST_PIPELINES=[
58 |                 ("ResizeShortestEdge",
59 |                  dict(short_edge_length=800, max_size=1333, sample_style="choice")),
60 |             ],
61 |         )
62 |     ),
63 |     TEST=dict(
64 |         EVAL_PEROID=10000,
65 |     ),
66 |     OUTPUT_DIR="output"
67 | )
68 | 
69 | 
70 | class FasterRCNNConfig(RCNNFPNConfig):
71 |     def __init__(self):
72 |         super(FasterRCNNConfig, self).__init__()
73 |         self._register_configuration(_config_dict)
74 | 
75 | 
76 | config = FasterRCNNConfig()
77 | 


--------------------------------------------------------------------------------
/playground/detection/coco/bs_mask/cascade/boundary_refine_mask_rcnn_r50_ms_3x_3_subgt_warpping_dice_erode_dilate_gn_add_cascade/net.py:
--------------------------------------------------------------------------------
 1 | from cvpods.layers import ShapeSpec
 2 | from cvpods.modeling.backbone import Backbone
 3 | from cvpods.modeling.backbone.fpn import build_resnet_fpn_backbone
 4 | from cvpods.modeling.proposal_generator import RPN
 5 | from box_head import FastRCNNConvFCHead
 6 | from boundary_mask_rcnn import CascadeBoundaryROIHeads, DecoupledBoundaryMaskHead
 7 | from rcnn import GeneralizedRCNN
 8 | 
 9 | 
10 | def build_backbone(cfg, input_shape=None):
11 |     if input_shape is None:
12 |         input_shape = ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN))
13 |     backbone = build_resnet_fpn_backbone(cfg, input_shape)
14 |     assert isinstance(backbone, Backbone)
15 |     return backbone
16 | 
17 | 
18 | def build_proposal_generator(cfg, input_shape):
19 |     return RPN(cfg, input_shape)
20 | 
21 | 
22 | def build_roi_heads(cfg, input_shape):
23 |     return CascadeBoundaryROIHeads(cfg, input_shape)
24 | 
25 | 
26 | def build_box_head(cfg, input_shape):
27 |     return FastRCNNConvFCHead(cfg, input_shape)
28 | 
29 | 
30 | def build_mask_head(cfg, input_shape):
31 |     return DecoupledBoundaryMaskHead(cfg, input_shape)
32 | 
33 | 
34 | def build_model(cfg):
35 |     cfg.build_backbone = build_backbone
36 |     cfg.build_proposal_generator = build_proposal_generator
37 |     cfg.build_roi_heads = build_roi_heads
38 |     cfg.build_box_head = build_box_head
39 |     cfg.build_mask_head = build_mask_head
40 | 
41 |     model = GeneralizedRCNN(cfg)
42 |     return model
43 | 


--------------------------------------------------------------------------------
/playground/detection/coco/bs_mask/cascade/boundary_refine_mask_rcnn_rx101_ms_20e_3_subgt_warpping_dice_erode_dilate_gn_add_cascade_1600multi/config.py:
--------------------------------------------------------------------------------
 1 | import os.path as osp
 2 | 
 3 | from cvpods.configs.rcnn_fpn_config import RCNNFPNConfig
 4 | 
 5 | _config_dict = dict(
 6 |     MODEL=dict(
 7 |         WEIGHTS="/mnt/lustreold/share_data/lixiangtai/pretrained/X-101-64x4d.pkl",
 8 |         MASK_ON=True,
 9 |         RESNETS=dict(
10 |                 DEPTH=101,
11 |                 NUM_GROUPS=64,
12 |                 WIDTH_PER_GROUP=4,
13 |                 STRIDE_IN_1X1=False),
14 |     BOUNDARY_MASK_HEAD=dict(
15 |         OUTPUT_RATIO=1,
16 |         POOLER_RESOLUTION=28,
17 |         IN_FEATURES=["p2"],
18 |         NUM_CONV=2),
19 |         ROI_BOX_HEAD=dict(
20 |             CLS_AGNOSTIC_BBOX_REG=True,
21 |         ),
22 |         ROI_MASK_HEAD=dict(
23 |             CEMODULE=dict(
24 |                 NUM_CONV=2,
25 |                 PLANES=256,
26 |                 DCN_ON=True,
27 |                 DCN_V2=True,
28 |                 NUM_EDGE_CONV=2,
29 |                 FUSE_MODE="Add",
30 |                 WITH_EDGE_REFINE=True,
31 |                 NORM='GN',
32 |                 KERNEL_SIZE=5
33 |             ),
34 |             LOSS_WEIGHT=[1.0, 1.0, 1.0, 1.0]
35 |         ),
36 |     ),
37 |     DATASETS=dict(
38 |         TRAIN=("coco_2017_train",),
39 |         TEST=("coco_2017_val",),
40 |     ),
41 |     SOLVER=dict(
42 |         LR_SCHEDULER=dict(
43 |             STEPS=(120000, 140000),
44 |             MAX_ITER=150000,
45 |         ),
46 |         OPTIMIZER=dict(
47 |             BASE_LR=0.02,
48 |         ),
49 |         IMS_PER_BATCH=16,
50 |         IMS_PER_DEVICE=2,
51 |         CHECKPOINT_PERIOD=50000,
52 |     ),
53 |     INPUT=dict(
54 |         AUG=dict(
55 |             TRAIN_PIPELINES=[
56 |                 ("ResizeShortestEdge",
57 |                  dict(short_edge_length=(416, 448, 480, 512, 544, 576, 608, 640,  672, 704, 736, 768, 800, 832, 864,
58 |                                          896, 928, 960, 992, 1024, 1056, 1088, 1120, 1152, 1184, 1216, 1248,
59 |                                          1280, 1312, 1344, 1376),
60 |                       max_size=1600, sample_style="choice")),
61 |                 ("RandomFlip", dict()),
62 |             ],
63 |             TEST_PIPELINES=[
64 |                 ("ResizeShortestEdge",
65 |                  dict(short_edge_length=800, max_size=1333, sample_style="choice")),
66 |             ],
67 |         )
68 |     ),
69 |     TEST=dict(
70 |         EVAL_PEROID=10000,
71 |     ),
72 |     OUTPUT_DIR="output"
73 | )
74 | 
75 | 
76 | class FasterRCNNConfig(RCNNFPNConfig):
77 |     def __init__(self):
78 |         super(FasterRCNNConfig, self).__init__()
79 |         self._register_configuration(_config_dict)
80 | 
81 | 
82 | config = FasterRCNNConfig()
83 | 


--------------------------------------------------------------------------------
/playground/detection/coco/bs_mask/cascade/boundary_refine_mask_rcnn_rx101_ms_20e_3_subgt_warpping_dice_erode_dilate_gn_add_cascade_1600multi/net.py:
--------------------------------------------------------------------------------
 1 | from cvpods.layers import ShapeSpec
 2 | from cvpods.modeling.backbone import Backbone
 3 | from cvpods.modeling.backbone.fpn import build_resnet_fpn_backbone
 4 | from cvpods.modeling.proposal_generator import RPN
 5 | from box_head import FastRCNNConvFCHead
 6 | from boundary_mask_rcnn import CascadeBoundaryROIHeads, DecoupledBoundaryMaskHead
 7 | from rcnn import GeneralizedRCNN
 8 | 
 9 | 
10 | def build_backbone(cfg, input_shape=None):
11 |     if input_shape is None:
12 |         input_shape = ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN))
13 |     backbone = build_resnet_fpn_backbone(cfg, input_shape)
14 |     assert isinstance(backbone, Backbone)
15 |     return backbone
16 | 
17 | 
18 | def build_proposal_generator(cfg, input_shape):
19 |     return RPN(cfg, input_shape)
20 | 
21 | 
22 | def build_roi_heads(cfg, input_shape):
23 |     return CascadeBoundaryROIHeads(cfg, input_shape)
24 | 
25 | 
26 | def build_box_head(cfg, input_shape):
27 |     return FastRCNNConvFCHead(cfg, input_shape)
28 | 
29 | 
30 | def build_mask_head(cfg, input_shape):
31 |     return DecoupledBoundaryMaskHead(cfg, input_shape)
32 | 
33 | 
34 | def build_model(cfg):
35 |     cfg.build_backbone = build_backbone
36 |     cfg.build_proposal_generator = build_proposal_generator
37 |     cfg.build_roi_heads = build_roi_heads
38 |     cfg.build_box_head = build_box_head
39 |     cfg.build_mask_head = build_mask_head
40 | 
41 |     model = GeneralizedRCNN(cfg)
42 |     return model
43 | 


--------------------------------------------------------------------------------
/tools/cat_visualizer_results.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import os.path as osp
 3 | import cv2
 4 | import argparse
 5 | 
 6 | 
 7 | def get_args():
 8 |     parser = argparse.ArgumentParser()
 9 |     parser.add_argument('--file1', type=str, required=True)
10 |     parser.add_argument('--file2', type=str, required=True)
11 |     parser.add_argument('--save_file', type=str, required=True)
12 | 
13 |     args = parser.parse_args()
14 | 
15 |     return args
16 | 
17 | 
18 | def main():
19 | 
20 |     args = get_args()
21 |     sub_file_names = os.listdir(args.file1)
22 |     os.makedirs(args.save_file, exist_ok=True)
23 |     for sub_file in sub_file_names:
24 |         if sub_file == '.DS_Store':
25 |             continue
26 |         images = os.listdir(osp.join(args.file1, sub_file))
27 |         for image in images:
28 |             if image == '.DS_Store':
29 |                 continue
30 |             save_image_name = osp.join(args.save_file, sub_file+'_'+image)
31 |             img1 = cv2.imread(osp.join(args.file1, sub_file, image))
32 |             img2 = cv2.imread(osp.join(args.file2, sub_file, image))
33 |             img = cv2.hconcat((img1, img2))
34 |             cv2.imwrite(save_image_name, img)
35 |             print(f'{image} done')
36 |         print(f'{sub_file} done')
37 | 
38 | 
39 | if __name__ == '__main__':
40 |     main()


--------------------------------------------------------------------------------
/tools/convert_detr2cvpod.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | """
 3 | Helper script to convert models trained with the main version of DETR to be used with the Detectron2 version.
 4 | """
 5 | import json
 6 | import argparse
 7 | 
 8 | import numpy as np
 9 | import torch
10 | 
11 | 
12 | def parse_args():
13 |     parser = argparse.ArgumentParser("D2 model converter")
14 | 
15 |     parser.add_argument("--source_model", default="", type=str, help="Path or url to the DETR model to convert")
16 |     parser.add_argument("--output_model", default="", type=str, help="Path where to save the converted model")
17 |     return parser.parse_args()
18 | 
19 | 
20 | def main():
21 |     args = parse_args()
22 | 
23 |     # D2 expects contiguous classes, so we need to remap the 92 classes from DETR
24 |     # fmt: off
25 |     coco_idx = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
26 |                 27, 28, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47, 48, 49, 50, 51,
27 |                 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 67, 70, 72, 73, 74, 75, 76, 77,
28 |                 78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90, 91]
29 |     # fmt: on
30 | 
31 |     coco_idx = np.array(coco_idx)
32 | 
33 |     if args.source_model.startswith("https"):
34 |         checkpoint = torch.hub.load_state_dict_from_url(args.source_model, map_location="cpu", check_hash=True)
35 |     else:
36 |         checkpoint = torch.load(args.source_model, map_location="cpu")
37 |     model_to_convert = checkpoint["model"]
38 | 
39 |     model_converted = {}
40 |     for k in model_to_convert.keys():
41 |         old_k = k
42 |         if "backbone" in k:
43 |             k = k.replace("backbone.0.body.", "")
44 |             if "layer" not in k:
45 |                 k = "stem." + k
46 |             for t in [1, 2, 3, 4]:
47 |                 k = k.replace(f"layer{t}", f"res{t + 1}")
48 |             for t in [1, 2, 3]:
49 |                 k = k.replace(f"bn{t}", f"conv{t}.norm")
50 |             k = k.replace("downsample.0", "shortcut")
51 |             k = k.replace("downsample.1", "shortcut.norm")
52 |             k = "backbone.0.backbone." + k
53 |         k = "detr." + k
54 |         print(old_k, "->", k)
55 |         if "class_embed" in old_k:
56 |             v = model_to_convert[old_k].detach()
57 |             if v.shape[0] == 92:
58 |                 shape_old = v.shape
59 |                 model_converted[k] = v[coco_idx]
60 |                 print("Head conversion: changing shape from {} to {}".format(shape_old, model_converted[k].shape))
61 |                 continue
62 |         model_converted[k] = model_to_convert[old_k].detach()
63 | 
64 |     model_to_save = {"model": model_converted}
65 |     torch.save(model_to_save, args.output_model)
66 | 
67 | 
68 | if __name__ == "__main__":
69 |     main()
70 | 


--------------------------------------------------------------------------------
/tools/convert_to_d2.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | 
 3 | import torch
 4 | 
 5 | 
 6 | def parse_args():
 7 |     parser = argparse.ArgumentParser("D2 model converter")
 8 | 
 9 |     parser.add_argument("--source_model", default="", type=str, help="Path or url to the  model to convert")
10 |     parser.add_argument("--output_model", default="", type=str, help="Path where to save the converted model")
11 |     return parser.parse_args()
12 | 
13 | 
14 | def main():
15 |     args = parse_args()
16 | 
17 |     source_weights = torch.load(args.source_model)["model"]
18 |     converted_weights = {}
19 |     keys = list(source_weights.keys())
20 | 
21 |     prefix = 'backbone.bottom_up.'
22 |     for key in keys:
23 |         converted_weights[prefix + key] = source_weights[key]
24 | 
25 |     torch.save(converted_weights, args.output_model)
26 | 
27 | 
28 | if __name__ == "__main__":
29 |     main()
30 | 
31 | 
32 | 


--------------------------------------------------------------------------------
/tools/draw_teaser.py:
--------------------------------------------------------------------------------
 1 | # if __name__ == '__main__':
 2 | import numpy as np
 3 | import cv2
 4 | import torch
 5 | import torch.nn.functional as F
 6 | from PIL import Image
 7 | 
 8 | img_path = '/Users/hhe/research/mm2021_cenet/teaser/test_teaser.jpg'
 9 | 
10 | mask = Image.open(img_path)
11 | mask = np.array(mask)
12 | mask[mask > 127] = 255
13 | mask[mask <= 127] = 0
14 | mask[mask == 255] = 1
15 | mask = mask.astype(np.float32)
16 | laplacian_kernel = torch.tensor([-1, -1, -1, -1, 8, -1, -1, -1, -1], dtype=torch.float32).reshape(1, 1, 3, 3).requires_grad_(False)
17 | mask = torch.from_numpy(mask)
18 | mask = mask.unsqueeze(0)
19 | boundary_masks = F.conv2d(mask.unsqueeze(1), laplacian_kernel, padding=1)
20 | boundary_masks = boundary_masks.clamp(min=0)
21 | boundary_masks[boundary_masks > 0.1] = 1
22 | boundary_masks[boundary_masks <= 0.1] = 0
23 | 
24 | boundary_masks = boundary_masks.squeeze().cpu().numpy().astype('uint8')
25 | mask = mask.squeeze()
26 | mask = mask.cpu().numpy().astype('uint8')
27 | kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (11, 11))
28 | 
29 | boundary_valid = boundary_masks == 1
30 | dilate = cv2.dilate(mask, kernel=kernel)
31 | contraction_mask = dilate - mask
32 | contraction_mask[boundary_valid] = 1
33 | contraction_mask_rgb = np.zeros([*contraction_mask.shape, 3])
34 | contraction_mask_rgb[contraction_mask == 1] = [0, 0, 255]
35 | 
36 | erode = cv2.erode(mask, kernel=kernel)
37 | expansion_mask = mask - erode
38 | expansion_mask[boundary_valid] = 1
39 | expansion_mask_rgb = np.zeros([*expansion_mask.shape, 3])
40 | expansion_mask_rgb[expansion_mask == 1] = [255, 0, 0]
41 | 
42 | boundary_masks[boundary_masks == 1] = 255
43 | 
44 | contour, _ = cv2.findContours(mask, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
45 | boundary = np.zeros_like(mask)
46 | boundary = cv2.drawContours(boundary, contour, -1, 1, 2)
47 | boundary = boundary.astype(np.float)
48 | boundary[boundary == 1] = 255
49 | cv2.imwrite('/Users/hhe/research/mm2021_cenet/teaser/contraction.jpg', contraction_mask_rgb)
50 | cv2.imwrite('/Users/hhe/research/mm2021_cenet/teaser/expansion.jpg', expansion_mask_rgb)
51 | cv2.imwrite('/Users/hhe/research/mm2021_cenet/teaser/boundary2.jpg', boundary_masks)
52 | cv2.imwrite('/Users/hhe/research/mm2021_cenet/teaser/boundary.jpg', boundary)
53 | 


--------------------------------------------------------------------------------
/tools/image2gif.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import os
 3 | import os.path as osp
 4 | import imageio
 5 | import cv2
 6 | 
 7 | def get_args():
 8 |     parser = argparse.ArgumentParser()
 9 |     parser.add_argument('--image_path', required=True)
10 |     parser.add_argument('--save_path', required=True)
11 |     parser.add_argument('--img_height', type=int, default=360)
12 |     parser.add_argument('--img_width', type=int, default=640)
13 |     parser.add_argument('--fps', default=10, type=int)
14 | 
15 |     args = parser.parse_args()
16 | 
17 |     return args
18 | 
19 | def main():
20 |     args = get_args()
21 | 
22 |     images = os.listdir(args.image_path)
23 |     images = sorted(images)
24 |     gif_images = []
25 |     for image in images:
26 |         if image == '.DS_Store':
27 |             continue
28 |         img = imageio.imread(osp.join(args.image_path, image))
29 |         img = cv2.resize(img, (args.img_width, args.img_height))
30 |         gif_images.append(img)
31 |     imageio.mimsave(args.save_path, gif_images, fps=args.fps)
32 | 
33 | 
34 | if __name__ == '__main__':
35 |     main()


--------------------------------------------------------------------------------
/tools/rm_files.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python3
 2 | # -*- coding:utf-8 -*-
 3 | 
 4 | import argparse
 5 | import os
 6 | import re
 7 | from colorama import Fore, Style
 8 | 
 9 | 
10 | def remove_parser():
11 |     parser = argparse.ArgumentParser()
12 |     parser.add_argument("--start-iter", "-s", type=int, default=0, help="start iter to remove")
13 |     parser.add_argument("--end-iter", "-e", type=int, default=0, help="end iter to remove")
14 |     parser.add_argument("--prefix", "-p", type=str, default="model_",
15 |                         help="prefix of model to remove")
16 |     parser.add_argument("--dir", "-d", type=str, default="/data/Outputs",
17 |                         help="dir to remove pth model")
18 |     parser.add_argument("--real", "-r", action="store_true",
19 |                         help="really delete or just show what you will delete")
20 |     return parser
21 | 
22 | 
23 | def remove_files(args):
24 |     start = args.start_iter
25 |     end = args.end_iter
26 |     prefix = args.prefix
27 |     for folder, _, files in os.walk(args.dir):
28 |         # l = [x for x in f if x.endswith(".pth")]
29 |         models = [f for f in files if re.search(prefix + r"[0123456789]*\.pth", f)]
30 |         delete = [os.path.join(folder, model) for model in models
31 |                   if start <= int(model[len(prefix):-len(".pth")]) <= end]
32 |         if delete:
33 |             for f in delete:
34 |                 if args.real:
35 |                     print(f"remove {f}")
36 |                     os.remove(f)
37 |                 else:
38 |                     print(f"you may remove {f}")
39 |     if not args.real:
40 |         print(Fore.RED + "use --real parameter to really delete models" + Style.RESET_ALL)
41 | 
42 | 
43 | def main():
44 |     args = remove_parser().parse_args()
45 |     remove_files(args)
46 | 
47 | 
48 | if __name__ == "__main__":
49 |     main()
50 | 


--------------------------------------------------------------------------------