├── .gitignore ├── LICENSE ├── Motion_Dataset_Download.md ├── Motion_Dataset_Preview.md ├── README.md ├── configs ├── data │ ├── cscapes.py │ ├── cscapesvps.py │ ├── cscapesvps_motion.py │ ├── cscapesvps_motion_supp.py │ ├── cscapesvps_motion_supp_2048.py │ ├── cscapesvps_repeat.py │ ├── davis.py │ ├── idd.py │ ├── idd_annots.py │ ├── idd_supp.py │ ├── kittimots.py │ ├── kittimots_motion.py │ └── kittimots_motion_supp.py ├── experiments │ └── general.py ├── infer_cscapesvps.py ├── infer_cscapesvps_pq.py ├── infer_kittimots.py ├── misc │ ├── debug_classagnostic_loaders.py │ ├── visualise_mod_cscapesvps.py │ └── visualise_mod_kittimots.py └── models │ ├── backbone_1stream.py │ ├── backbone_2stream.py │ ├── backbone_2stream_tfstyle.py │ ├── bbox_head.py │ ├── ca_appearance_mahalanobis_head.py │ ├── ca_appearance_map.py │ ├── ca_motion_head.py │ ├── neck.py │ └── panoptic_head.py ├── images └── VCA_Teaser.png ├── mmdet ├── __init__.py ├── apis │ ├── __init__.py │ ├── inference.py │ └── train.py ├── core │ ├── __init__.py │ ├── anchor │ │ ├── __init__.py │ │ ├── anchor_generator.py │ │ ├── anchor_target.py │ │ ├── guided_anchor_target.py │ │ ├── point_generator.py │ │ └── point_target.py │ ├── bbox │ │ ├── __init__.py │ │ ├── assign_sampling.py │ │ ├── assigners │ │ │ ├── __init__.py │ │ │ ├── approx_max_iou_assigner.py │ │ │ ├── assign_result.py │ │ │ ├── atss_assigner.py │ │ │ ├── base_assigner.py │ │ │ ├── max_iou_assigner.py │ │ │ └── point_assigner.py │ │ ├── bbox_target.py │ │ ├── demodata.py │ │ ├── geometry.py │ │ ├── samplers │ │ │ ├── __init__.py │ │ │ ├── base_sampler.py │ │ │ ├── combined_sampler.py │ │ │ ├── instance_balanced_pos_sampler.py │ │ │ ├── iou_balanced_neg_sampler.py │ │ │ ├── ohem_sampler.py │ │ │ ├── pseudo_sampler.py │ │ │ ├── random_sampler.py │ │ │ └── sampling_result.py │ │ └── transforms.py │ ├── evaluation │ │ ├── __init__.py │ │ ├── bbox_overlaps.py │ │ ├── class_names.py │ │ ├── coco_utils.py │ │ ├── eval_hooks.py │ │ ├── mean_ap.py │ │ └── recall.py │ ├── fp16 │ │ ├── __init__.py │ │ ├── decorators.py │ │ ├── hooks.py │ │ └── utils.py │ ├── mask │ │ ├── __init__.py │ │ ├── mask_target.py │ │ └── utils.py │ ├── post_processing │ │ ├── __init__.py │ │ ├── bbox_nms.py │ │ ├── matrix_nms.py │ │ └── merge_augs.py │ └── utils │ │ ├── __init__.py │ │ ├── colormap.py │ │ ├── dist_utils.py │ │ ├── map.py │ │ ├── misc.py │ │ └── post_proc_utils.py ├── datasets │ ├── __init__.py │ ├── builder.py │ ├── cityscapes.py │ ├── cityscapes_ps.py │ ├── cityscapes_vps.py │ ├── cityscapes_vps_segonly.py │ ├── coco.py │ ├── custom.py │ ├── dataset_wrappers.py │ ├── kittimots.py │ ├── loader │ │ ├── __init__.py │ │ ├── build_loader.py │ │ └── sampler.py │ ├── motion_dataset.py │ ├── pipelines │ │ ├── __init__.py │ │ ├── compose.py │ │ ├── formating.py │ │ ├── instaboost.py │ │ ├── loading.py │ │ ├── test_aug.py │ │ └── transforms.py │ ├── registry.py │ ├── utils.py │ ├── voc.py │ ├── wider_face.py │ └── xml_style.py ├── metrics.py ├── models │ ├── __init__.py │ ├── anchor_heads │ │ ├── __init__.py │ │ ├── anchor_head.py │ │ ├── atss_head.py │ │ ├── decoupled_solo_head.py │ │ ├── decoupled_solo_light_head.py │ │ ├── fcos_head.py │ │ ├── fovea_head.py │ │ ├── free_anchor_retina_head.py │ │ ├── ga_retina_head.py │ │ ├── ga_rpn_head.py │ │ ├── guided_anchor_head.py │ │ ├── panoptic_head.py │ │ ├── reppoints_head.py │ │ ├── retina_head.py │ │ ├── retina_sepbn_head.py │ │ ├── rpn_head.py │ │ ├── solo_head.py │ │ ├── solov2_head.py │ │ ├── solov2_light_head.py │ │ └── ssd_head.py │ ├── backbones │ │ ├── __init__.py │ │ ├── hrnet.py │ │ ├── resnet.py │ │ ├── resnet_tfstyle.py │ │ ├── resnext.py │ │ └── ssd_vgg.py │ ├── bbox_heads │ │ ├── __init__.py │ │ ├── bbox_head.py │ │ ├── convfc_bbox_head.py │ │ └── double_bbox_head.py │ ├── builder.py │ ├── ca_heads │ │ ├── __init__.py │ │ ├── appearance_ca_abstract.py │ │ ├── appearance_ca_mahalanobis.py │ │ ├── appearance_ca_map.py │ │ └── compose_ca.py │ ├── detectors │ │ ├── __init__.py │ │ ├── atss.py │ │ ├── base.py │ │ ├── cascade_rcnn.py │ │ ├── double_head_rcnn.py │ │ ├── fast_rcnn.py │ │ ├── faster_rcnn.py │ │ ├── fcos.py │ │ ├── fovea.py │ │ ├── grid_rcnn.py │ │ ├── htc.py │ │ ├── mask_rcnn.py │ │ ├── mask_scoring_rcnn.py │ │ ├── reppoints_detector.py │ │ ├── retinanet.py │ │ ├── rpn.py │ │ ├── single_stage.py │ │ ├── single_stage_ins.py │ │ ├── solo.py │ │ ├── solov2.py │ │ ├── test_mixins.py │ │ └── two_stage.py │ ├── losses │ │ ├── __init__.py │ │ ├── accuracy.py │ │ ├── balanced_l1_loss.py │ │ ├── cross_entropy_loss.py │ │ ├── focal_loss.py │ │ ├── ghm_loss.py │ │ ├── iou_loss.py │ │ ├── mse_loss.py │ │ ├── smooth_l1_loss.py │ │ └── utils.py │ ├── mask_heads │ │ ├── __init__.py │ │ ├── fcn_mask_head.py │ │ ├── fused_semantic_head.py │ │ ├── grid_head.py │ │ ├── htc_mask_head.py │ │ ├── mask_feat_head.py │ │ └── maskiou_head.py │ ├── necks │ │ ├── __init__.py │ │ ├── bfp.py │ │ ├── fpn.py │ │ ├── fpn_flo_warp.py │ │ ├── hrfpn.py │ │ └── nas_fpn.py │ ├── plugins │ │ ├── __init__.py │ │ ├── generalized_attention.py │ │ └── non_local.py │ ├── registry.py │ ├── roi_extractors │ │ ├── __init__.py │ │ └── single_level.py │ ├── shared_heads │ │ ├── __init__.py │ │ └── res_layer.py │ ├── track_heads │ │ ├── __init__.py │ │ └── track_head.py │ ├── tracker_inference.py │ └── utils │ │ ├── __init__.py │ │ ├── conv_module.py │ │ ├── conv_ws.py │ │ ├── fpn_utils.py │ │ ├── functional.py │ │ ├── functions │ │ ├── __init__.py │ │ ├── aggregation_refpad.py │ │ ├── aggregation_zeropad.py │ │ └── utils.py │ │ ├── modules │ │ ├── __init__.py │ │ └── aggregation.py │ │ ├── norm.py │ │ ├── scale.py │ │ ├── sta_module.py │ │ └── weight_init.py ├── ops │ ├── __init__.py │ ├── context_block.py │ ├── dcn │ │ ├── __init__.py │ │ ├── deform_conv.py │ │ ├── deform_pool.py │ │ └── src │ │ │ ├── deform_conv_cuda.cpp │ │ │ ├── deform_conv_cuda_kernel.cu │ │ │ ├── deform_pool_cuda.cpp │ │ │ └── deform_pool_cuda_kernel.cu │ ├── masked_conv │ │ ├── __init__.py │ │ ├── masked_conv.py │ │ └── src │ │ │ ├── masked_conv2d_cuda.cpp │ │ │ └── masked_conv2d_kernel.cu │ ├── nms │ │ ├── __init__.py │ │ ├── nms_wrapper.py │ │ └── src │ │ │ ├── nms_cpu.cpp │ │ │ ├── nms_cuda.cpp │ │ │ ├── nms_kernel.cu │ │ │ ├── soft_nms_cpu.cpp │ │ │ └── soft_nms_cpu.pyx │ ├── roi_align │ │ ├── __init__.py │ │ ├── gradcheck.py │ │ ├── roi_align.py │ │ └── src │ │ │ ├── roi_align_cuda.cpp │ │ │ └── roi_align_kernel.cu │ ├── roi_pool │ │ ├── __init__.py │ │ ├── gradcheck.py │ │ ├── roi_pool.py │ │ └── src │ │ │ ├── roi_pool_cuda.cpp │ │ │ └── roi_pool_kernel.cu │ ├── sigmoid_focal_loss │ │ ├── __init__.py │ │ ├── sigmoid_focal_loss.py │ │ └── src │ │ │ ├── sigmoid_focal_loss.cpp │ │ │ └── sigmoid_focal_loss_cuda.cu │ └── utils │ │ ├── __init__.py │ │ └── src │ │ └── compiling_info.cpp ├── utils │ ├── __init__.py │ ├── contextmanagers.py │ ├── flops_counter.py │ ├── logger.py │ ├── profiling.py │ ├── registry.py │ └── util_mixins.py └── version.py ├── requirements.txt ├── requirements ├── build.txt ├── optional.txt ├── runtime.txt └── tests.txt ├── setup.py ├── tests └── test_loader.py └── tools ├── __init__.py ├── dataset ├── __init__.py ├── base_dataset.py └── cityscapes_vps.py ├── test_eval_caq.py ├── test_eval_ipq.py ├── test_vis.py └── train.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | -------------------------------------------------------------------------------- /Motion_Dataset_Download.md: -------------------------------------------------------------------------------- 1 | # Dataset Preparation 2 | 3 | We build our motion annotations on KITTI-MOTS[1] and Cityscapes-VPS[2]. 4 | 5 | ## KITTI 6 | 7 | * Images: Download the original [KITTI-MOTS Dataset](http://www.cvlibs.net/datasets/kitti/eval_instance_seg.php?benchmark=instanceSeg2015). 8 | * Flow: Download Precomupted [Flow](https://drive.google.com/file/d/1tIyRKO5o9imAF3huUo0s-R-ys4znly5t/view?usp=sharing). 9 | * Annotations: Download [motion annotations](https://drive.google.com/drive/folders/1whMm0NMzkz77jQRHkQeNNQsuAGEVekqk?usp=sharing). 10 | * Construct Dataset Folder with Structure 11 | ``` 12 | . 13 | +-- Images 14 | +-- Flow 15 | +-- Flow_Suppressed 16 | +-- Annotations 17 | ``` 18 | 19 | ## Cityscapes 20 | * Download motion annotations for Cityscapes 21 | * Images: Download the original [Cityscapes-VPS](https://www.dropbox.com/s/ecem4kq0fdkver4/cityscapes-vps-dataset-1.0.zip?dl=0). Follow full instructions [here](https://github.com/mcahny/vps/blob/master/docs/DATASET.md). 22 | * Flow: Download Precomupted [Flow](https://drive.google.com/file/d/1HE4WTIW7HvjpQPU2wZ-eD6CVxmlAwigb/view?usp=sharing). 23 | * Annotations: Download [motion annotations](https://drive.google.com/drive/folders/1whMm0NMzkz77jQRHkQeNNQsuAGEVekqk?usp=sharing). 24 | * Construct Dataset Folder with Structure 25 | ``` 26 | . 27 | +-- train 28 | | +-- images 29 | | +-- flow 30 | | +-- flow_suppressed 31 | +-- val 32 | | +-- images 33 | | +-- flow 34 | | +-- flow_suppressed 35 | +-- annotations 36 | ``` 37 | 38 | # References 39 | 40 | [1] Voigtlaender, Paul, et al. "Mots: Multi-object tracking and segmentation." Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition. 2019. 41 | [2] Kim, Dahun, et al. "Video panoptic segmentation." Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition. 2020. 42 | -------------------------------------------------------------------------------- /Motion_Dataset_Preview.md: -------------------------------------------------------------------------------- 1 | # Motion Segmentation Datasets Preview 2 | 3 | We build our motion annotations on KITTI-MOTS[1] and Cityscapes-VPS[2] by annotating on the trajectory level per instance. 4 | 5 | ## KITTI-MOTS 6 | 7 | Preview of the 20 sequences annotated for moving and static cars and pedestrians: 8 | 9 | * [sequence 1](https://youtu.be/Oc7foi7XTEM ) 10 | * [sequence 2](https://youtu.be/Z_mParrlYpU ) 11 | * [sequence 3](https://youtu.be/PWk0jKMP0B8 ) 12 | * [sequence 4](https://youtu.be/2gfuRXWkWmY ) 13 | * [sequence 5](https://youtu.be/RjTyhaiFe7c ) 14 | * [sequence 6](https://youtu.be/TWyId0iuUSY ) 15 | * [sequence 7](https://youtu.be/ig08tI8x3g4 ) 16 | * [sequence 8](https://youtu.be/TN8jebMbuds ) 17 | * [sequence 9](https://youtu.be/T_vu9qvxCjI ) 18 | * [sequence 10](https://youtu.be/sQNbSSdT2Wg ) 19 | * [sequence 11](https://youtu.be/gP3mHmP1dmw ) 20 | * [sequence 12](https://youtu.be/COvfmAYXpFc ) 21 | * [sequence 13](https://youtu.be/X4tLTSclGKg ) 22 | * [sequence 14](https://youtu.be/Stjq2sgiu8I ) 23 | * [sequence 15](https://youtu.be/Gh5YYdWv_ic ) 24 | * [sequence 16](https://youtu.be/w9kpmHX8vNo ) 25 | * [sequence 17](https://youtu.be/osjucd3pbNg ) 26 | * [sequence 18](https://youtu.be/IN9vH29qu4U ) 27 | * [sequence 19](https://youtu.be/koApyZS0dus ) 28 | * [sequence 20](https://youtu.be/PJvTPKu9eGs ) 29 | 30 | ## Cityscapes-VPS 31 | 32 | Preview of the 500 sequences with 6 frames per sequence annotated for moving and static objects 33 | 34 | [preview](https://youtu.be/rTQ1OIqF6Dw) 35 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Video Class Agnostic Segmentation 2 | \[[Method Paper]()\] \[[Benchmark Paper](https://arxiv.org/pdf/2103.11015.pdf)\] \[[Project](https://msiam.github.io/vca/)\] \[[Demo](https://www.youtube.com/watch?v=c9hMFHdTs6M)\] 3 | 4 | Official Datasets and Implementation from our Paper "Video Class Agnostic Segmentation Benchmark in Autonomous Driving" in Workshop on Autonomous Driving, CVPR 2021. 5 | 6 |
7 |

8 |
9 | 10 | 11 | # Installation 12 | This repo is tested under Python 3.6, PyTorch 1.4 13 | 14 | * Download Required Packages 15 | ``` 16 | pip install -r requirements.txt 17 | pip install "git+https://github.com/cocodataset/panopticapi.git" 18 | ``` 19 | 20 | * Setup mmdet 21 | ``` 22 | python setup.py develop 23 | ``` 24 | 25 | # Motion Segmentation Track 26 | ## Dataset Preparation 27 | 28 | * Follow Dataset Preparation [Instructions](https://github.com/MSiam/video_class_agnostic_segmentation/blob/main/Motion_Dataset_Download.md). 29 | * Low resolution view of the [full dataset](https://www.youtube.com/playlist?list=PL4jKsHbreeuBhEmzcL94JxWzVear79r5z) 30 | 31 | ## Inference 32 | 33 | * Download [Trained Weights](https://drive.google.com/file/d/16qEH0WoFVt0n6Ooi6zl4ymWKZYv1YVJ8/view?usp=sharing) on Ego Flow Suppressed, trained on Cityscapes and KITTI-MOTS 34 | 35 | * Modify Configs according to dataset path + Image/Annotation/Flow prefix 36 | ``` 37 | configs/data/kittimots_motion_supp.py 38 | configs/data/cscapesvps_motion_supp.py 39 | ``` 40 | 41 | * Evaluate CAQ, 42 | ``` 43 | python tools/test_eval_caq.py CONFIG_FILE WEIGHTS_FILE 44 | ``` 45 | CONFIG_FILE: configs/infer_kittimots.py or configs/infer_cscapesvps.py 46 | 47 | 48 | * Qualitative Results 49 | ``` 50 | python tools/test_vis.py CONFIG_FILE WEIGHTS_FILE --vis_unknown --save_dir OUTS_DIR 51 | ``` 52 | 53 | * Evaluate Image Panoptic Quality, Note: evaluated on 1024x2048 Images 54 | ``` 55 | python tools/test_eval_ipq.py configs/infer_cscapesvps_pq.py WEIGHTS_FILE --out PKL_FILE 56 | ``` 57 | 58 | ## Training 59 | 60 | Coming Soon ... 61 | 62 | # Open-set Segmentation Track 63 | 64 | Coming soon ... 65 | 66 | # Acknowledgements 67 | 68 | Dataset and Repository relied on these sources: 69 | 70 | * Voigtlaender, Paul, et al. "Mots: Multi-object tracking and segmentation." Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition. 2019. 71 | * Kim, Dahun, et al. "Video panoptic segmentation." Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition. 2020. 72 | * Wang, Xinlong, et al. "Solo: Segmenting objects by locations." European Conference on Computer Vision. Springer, Cham, 2020. 73 | * This Repository built upon [SOLO Code](https://github.com/WXinlong/SOLO) 74 | 75 | # Citation 76 | 77 | ``` 78 | @article{siam2021video, 79 | title={Video Class Agnostic Segmentation Benchmark for Autonomous Driving}, 80 | author={Mennatullah Siam and Alex Kendall and Martin Jagersand}, 81 | year={2021}, 82 | eprint={2103.11015}, 83 | archivePrefix={arXiv}, 84 | primaryClass={cs.CV} 85 | } 86 | ``` 87 | 88 | # Contact 89 | If you have any questions regarding the dataset or repository, please contact menna.seyam@gmail.com. 90 | -------------------------------------------------------------------------------- /configs/data/cscapes.py: -------------------------------------------------------------------------------- 1 | dataset_type = 'CityscapesDataset' 2 | data_root = 'data/cityscapes/' 3 | img_norm_cfg = dict( 4 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 5 | train_pipeline = [ 6 | dict(type='LoadImageFromFile'), 7 | dict(type='LoadAnnotations', with_bbox=True, with_mask=True), 8 | dict(type='Resize', img_scale=[(2048, 1024), (2048, 992), (2048, 960),(2048, 928), (2048, 896), (2048, 864)], 9 | multiscale_mode='value', 10 | keep_ratio=True), 11 | dict(type='RandomFlip', flip_ratio=0.5), 12 | dict(type='Normalize', **img_norm_cfg), 13 | dict(type='Pad', size_divisor=32), 14 | dict(type='DefaultFormatBundle'), 15 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), 16 | ] 17 | test_pipeline = [ 18 | dict(type='LoadImageFromFile'), 19 | dict( 20 | type='MultiScaleFlipAug', 21 | img_scale=[(2048, 1024)], 22 | flip=False, 23 | transforms=[ 24 | dict(type='Resize', keep_ratio=True), 25 | dict(type='RandomFlip'), 26 | dict(type='Normalize', **img_norm_cfg), 27 | dict(type='Pad', size_divisor=32), 28 | dict(type='ImageToTensor', keys=['img']), 29 | dict(type='Collect', keys=['img']), 30 | ]) 31 | ] 32 | data = dict( 33 | imgs_per_gpu=2, 34 | workers_per_gpu=1, 35 | train=dict( 36 | type=dataset_type, 37 | ann_file=data_root + 'annotations/instancesonly_filtered_gtFine_train.json', 38 | img_prefix=data_root + 'train/', 39 | pipeline=train_pipeline), 40 | val=dict( 41 | type=dataset_type, 42 | ann_file=data_root + 'annotations/instancesonly_filtered_gtFine_val.json', 43 | img_prefix=data_root + 'val/', 44 | pipeline=test_pipeline), 45 | test=dict( 46 | type=dataset_type, 47 | ann_file=data_root + 'annotations/instancesonly_filtered_gtFine_val.json', 48 | img_prefix=data_root + 'val/', 49 | pipeline=test_pipeline)) 50 | 51 | -------------------------------------------------------------------------------- /configs/data/cscapesvps.py: -------------------------------------------------------------------------------- 1 | dataset_type = 'CityscapesVPSDataset' 2 | data_root = 'data/cityscapes_vps/' 3 | img_norm_cfg = dict( 4 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 5 | train_pipeline = [ 6 | dict(type='LoadRefImageFromFile'), 7 | dict(type='LoadAnnotations', with_bbox=True, with_mask=True, 8 | with_seg=True, with_pid=True, 9 | # Cityscapes specific class mapping 10 | semantic2label={0:0, 1:1, 2:2, 3:3, 4:4, 5:5, 6:6, 7:7, 8:8, 9:9, 11 | 10:10, 11:11, 12:12, 13:13, 14:14, 15:15, 16:16, 12 | 17:17, 18:18, -1:255, 255:255},), 13 | dict(type='Resize', img_scale=[(2048, 1024)], keep_ratio=True, 14 | multiscale_mode='value', ratio_range=(0.8, 1.5)), 15 | dict(type='RandomFlip', flip_ratio=0.5), 16 | dict(type='Normalize', **img_norm_cfg), 17 | dict(type='RandomCrop', crop_size=(800, 1600)), 18 | dict(type='Pad', size_divisor=32), 19 | dict(type='SegResizeFlipCropPadRescale', scale_factor=[1, 0.25]), 20 | dict(type='DefaultFormatBundle'), 21 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 22 | 'gt_obj_ids', 'gt_masks', 'gt_semantic_seg', 23 | 'gt_semantic_seg_Nx', 'ref_img', 'ref_bboxes', 24 | 'ref_labels', 'ref_obj_ids', 'ref_masks']), 25 | ] 26 | test_pipeline = [ 27 | dict(type='LoadRefImageFromFile'), 28 | 29 | dict( 30 | type='MultiScaleFlipAug', 31 | img_scale=[(2048, 1024)], 32 | flip=False, 33 | transforms=[ 34 | dict(type='Resize', keep_ratio=True), 35 | dict(type='RandomFlip'), 36 | dict(type='Normalize', **img_norm_cfg), 37 | dict(type='Pad', size_divisor=32), 38 | dict(type='ImageToTensor', keys=['img', 'ref_img']), 39 | dict(type='Collect', keys=['img', 'ref_img']), 40 | ]) 41 | ] 42 | data = dict( 43 | imgs_per_gpu=1, 44 | workers_per_gpu=0, 45 | train=dict( 46 | type=dataset_type, 47 | ann_file=data_root + 48 | 'instances_train_city_vps_rle.json', 49 | img_prefix=data_root + 'train/img/', 50 | ref_prefix=data_root + 'train/img/', 51 | seg_prefix=data_root + 'train/labelmap/', 52 | pipeline=train_pipeline, 53 | ref_ann_file=data_root + 54 | 'instances_train_city_vps_rle.json', 55 | offsets=[-1,+1]), 56 | val=dict( 57 | type=dataset_type, 58 | ann_file=data_root + 59 | 'instances_val_city_vps_rle.json', 60 | img_prefix=data_root + 'val/img/', 61 | pipeline=test_pipeline), 62 | test=dict( 63 | type=dataset_type, 64 | ann_file=data_root + 65 | #'im_all_info_val_city_vps.json', 66 | 'instances_val_city_vps_rle.json', 67 | #img_prefix=data_root + 'val/img_all/', 68 | img_prefix=data_root + 'val/img/', 69 | ref_prefix=data_root + 'val/img/', 70 | seg_prefix=data_root + 'val/labelmap/', 71 | #nframes_span_test=30, 72 | nframes_span_test=6, 73 | pipeline=test_pipeline)) 74 | -------------------------------------------------------------------------------- /configs/data/cscapesvps_motion.py: -------------------------------------------------------------------------------- 1 | dataset_type = 'MotionDataset' 2 | data_root = 'data/cityscapes_vps/' 3 | 4 | img_norm_cfg = dict( 5 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 6 | 7 | train_pipeline = [ 8 | dict(type='LoadImageFromFile'), 9 | dict(type='LoadFlowFromFile'), 10 | dict(type='LoadAnnotations', with_bbox=True, with_mask=True), 11 | dict(type='Resize', img_scale=[(1024, 512), (1024, 496), (1024, 480),(1024, 464), (1024, 448), (1024, 432)], 12 | multiscale_mode='value', 13 | keep_ratio=True), 14 | dict(type='RandomFlip', flip_ratio=0.5), 15 | dict(type='Normalize', **img_norm_cfg), 16 | dict(type='RandomCrop', crop_size=(300, 800)), 17 | dict(type='Pad', size_divisor=32), 18 | dict(type='DefaultFormatBundle'), 19 | dict(type='Collect', keys=['img', 'flow', 'gt_bboxes', 'gt_labels', 'gt_masks']), 20 | ] 21 | test_pipeline = [ 22 | dict(type='LoadImageFromFile'), 23 | dict(type='LoadFlowFromFile'), 24 | dict( 25 | type='MultiScaleFlipAug', 26 | img_scale=(1024, 512), 27 | flip=False, 28 | transforms=[ 29 | dict(type='Resize', keep_ratio=True), 30 | dict(type='RandomFlip'), 31 | dict(type='Normalize', **img_norm_cfg), 32 | dict(type='Pad', size_divisor=32), 33 | dict(type='ImageToTensor', keys=['img', 'flow']), 34 | dict(type='Collect', keys=['img', 'flow']), 35 | ]) 36 | ] 37 | data = dict( 38 | imgs_per_gpu=2, 39 | workers_per_gpu=0, 40 | train=dict( 41 | type=dataset_type, 42 | ann_file=data_root + 'CityscapesVPS_MOSeg_train_Annotations.json', 43 | img_prefix=data_root + 'train/img/', 44 | flow_prefix=data_root + 'train/flow/', 45 | pipeline=train_pipeline), 46 | val=dict( 47 | type=dataset_type, 48 | ann_file=data_root + 'CityscapesVPS_MOSeg_val_Annotations.json', 49 | img_prefix=data_root + 'val/img/', 50 | flow_prefix=data_root + 'val/flow/', 51 | pipeline=test_pipeline), 52 | test=dict( 53 | type=dataset_type, 54 | ann_file=data_root + 'CityscapesVPS_MOSeg_val_Annotations.json', 55 | img_prefix=data_root + 'val/img/', 56 | flow_prefix=data_root + 'val/flow/', 57 | pipeline=test_pipeline)) 58 | 59 | -------------------------------------------------------------------------------- /configs/data/cscapesvps_motion_supp.py: -------------------------------------------------------------------------------- 1 | from configs.data.cscapesvps_motion import * 2 | 3 | data = dict( 4 | imgs_per_gpu=2, 5 | workers_per_gpu=0, 6 | train=dict( 7 | type=dataset_type, 8 | ann_file=data_root + 'CityscapesVPS_MOSeg_train_Annotations.json', 9 | img_prefix=data_root + 'train/img/', 10 | flow_prefix=data_root + 'train/flow_suppressed/', 11 | pipeline=train_pipeline), 12 | val=dict( 13 | type=dataset_type, 14 | ann_file=data_root + 'CityscapesVPS_MOSeg_val_Annotations.json', 15 | img_prefix=data_root + 'val/img/', 16 | flow_prefix=data_root + 'val/flow_suppressed/', 17 | pipeline=test_pipeline), 18 | test=dict( 19 | type=dataset_type, 20 | ann_file=data_root + 'CityscapesVPS_MOSeg_val_Annotations.json', 21 | img_prefix=data_root + 'val/img/', 22 | flow_prefix=data_root + 'val/flow_suppressed/', 23 | pipeline=test_pipeline)) 24 | 25 | -------------------------------------------------------------------------------- /configs/data/cscapesvps_motion_supp_2048.py: -------------------------------------------------------------------------------- 1 | from configs.data.cscapesvps_motion import * 2 | 3 | test_pipeline = [ 4 | dict(type='LoadImageFromFile'), 5 | dict(type='LoadFlowFromFile'), 6 | dict( 7 | type='MultiScaleFlipAug', 8 | img_scale=(2048, 1024), 9 | flip=False, 10 | transforms=[ 11 | dict(type='Resize', keep_ratio=True), 12 | dict(type='RandomFlip'), 13 | dict(type='Normalize', **img_norm_cfg), 14 | dict(type='Pad', size_divisor=32), 15 | dict(type='ImageToTensor', keys=['img', 'flow']), 16 | dict(type='Collect', keys=['img', 'flow']), 17 | ]) 18 | ] 19 | 20 | data = dict( 21 | imgs_per_gpu=2, 22 | workers_per_gpu=0, 23 | train=dict( 24 | type=dataset_type, 25 | ann_file=data_root + 'CityscapesVPS_MOSeg_train_Annotations.json', 26 | img_prefix=data_root + 'train/img/', 27 | flow_prefix=data_root + 'train/flow_suppressed/', 28 | pipeline=train_pipeline), 29 | val=dict( 30 | type=dataset_type, 31 | ann_file=data_root + 'CityscapesVPS_MOSeg_val_Annotations.json', 32 | img_prefix=data_root + 'val/img/', 33 | flow_prefix=data_root + 'val/flow_suppressed/', 34 | pipeline=test_pipeline), 35 | test=dict( 36 | type=dataset_type, 37 | ann_file=data_root + 'CityscapesVPS_MOSeg_val_Annotations.json', 38 | img_prefix=data_root + 'val/img/', 39 | flow_prefix=data_root + 'val/flow_suppressed/', 40 | pipeline=test_pipeline)) 41 | 42 | -------------------------------------------------------------------------------- /configs/data/cscapesvps_repeat.py: -------------------------------------------------------------------------------- 1 | dataset_type = 'CityscapesVPSDataset' 2 | data_root = 'data/cityscapes_vps/' 3 | img_norm_cfg = dict( 4 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 5 | train_pipeline = [ 6 | dict(type='LoadRefImageFromFile'), 7 | dict(type='LoadAnnotations', with_bbox=True, with_mask=True, 8 | with_seg=True, with_pid=True, 9 | # Cityscapes specific class mapping 10 | semantic2label={0:0, 1:1, 2:2, 3:3, 4:4, 5:5, 6:6, 7:7, 8:8, 9:9, 11 | 10:10, 11:11, 12:12, 13:13, 14:14, 15:15, 16:16, 12 | 17:17, 18:18, -1:255, 255:255},), 13 | dict(type='Resize', img_scale=[(2048, 1024)], keep_ratio=True, 14 | multiscale_mode='value', ratio_range=(0.8, 1.5)), 15 | dict(type='RandomFlip', flip_ratio=0.5), 16 | dict(type='Normalize', **img_norm_cfg), 17 | dict(type='RandomCrop', crop_size=(800, 1600)), 18 | dict(type='Pad', size_divisor=32), 19 | dict(type='SegResizeFlipCropPadRescale', scale_factor=[1, 0.25]), 20 | dict(type='DefaultFormatBundle'), 21 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 22 | 'gt_obj_ids', 'gt_masks', 'gt_semantic_seg', 23 | 'gt_semantic_seg_Nx', 'ref_img', 'ref_bboxes', 24 | 'ref_labels', 'ref_obj_ids', 'ref_masks']), 25 | ] 26 | test_pipeline = [ 27 | dict(type='LoadRefImageFromFile'), 28 | 29 | dict( 30 | type='MultiScaleFlipAug', 31 | img_scale=[(2048, 1024)], 32 | flip=False, 33 | transforms=[ 34 | dict(type='Resize', keep_ratio=True), 35 | dict(type='RandomFlip'), 36 | dict(type='Normalize', **img_norm_cfg), 37 | dict(type='Pad', size_divisor=32), 38 | dict(type='ImageToTensor', keys=['img', 'ref_img']), 39 | dict(type='Collect', keys=['img', 'ref_img']), 40 | ]) 41 | ] 42 | data = dict( 43 | imgs_per_gpu=1, 44 | workers_per_gpu=0, 45 | train=dict( 46 | type='RepeatDataset', 47 | times=8, 48 | dataset=dict( 49 | type=dataset_type, 50 | ann_file=data_root + 51 | 'instances_train_city_vps_rle.json', 52 | img_prefix=data_root + 'train/img/', 53 | ref_prefix=data_root + 'train/img/', 54 | seg_prefix=data_root + 'train/labelmap/', 55 | pipeline=train_pipeline, 56 | ref_ann_file=data_root + 57 | 'instances_train_city_vps_rle.json', 58 | offsets=[-1,+1])), 59 | val=dict( 60 | type=dataset_type, 61 | ann_file=data_root + 62 | 'instances_val_city_vps_rle.json', 63 | img_prefix=data_root + 'val/img/', 64 | pipeline=test_pipeline), 65 | test=dict( 66 | type=dataset_type, 67 | ann_file=data_root + 68 | #'im_all_info_val_city_vps.json', 69 | 'instances_val_city_vps_rle.json', 70 | #img_prefix=data_root + 'val/img_all/', 71 | img_prefix=data_root + 'val/img/', 72 | ref_prefix=data_root + 'val/img/', 73 | #nframes_span_test=30, 74 | nframes_span_test=6, 75 | pipeline=test_pipeline)) 76 | -------------------------------------------------------------------------------- /configs/data/davis.py: -------------------------------------------------------------------------------- 1 | dataset_type = 'MotionDataset' 2 | data_root = 'data/DAVIS/' 3 | img_norm_cfg = dict( 4 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 5 | train_pipeline = [ 6 | dict(type='LoadImageFromFile'), 7 | dict(type='LoadFlowFromFile'), 8 | dict(type='LoadAnnotations', with_bbox=True, with_mask=True), 9 | dict(type='Resize', img_scale=[(854, 480), (854, 448), (854, 416), 10 | (854, 384), (854, 352), (854, 320)], 11 | multiscale_mode='value', 12 | keep_ratio=True), 13 | dict(type='RandomFlip', flip_ratio=0.5), 14 | dict(type='Normalize', **img_norm_cfg), 15 | dict(type='Pad', size_divisor=32), 16 | dict(type='DefaultFormatBundle'), 17 | dict(type='Collect', keys=['img', 'flow', 'gt_bboxes', 'gt_labels', 'gt_masks']), 18 | ] 19 | test_pipeline = [ 20 | dict(type='LoadImageFromFile'), 21 | dict(type='LoadFlowFromFile'), 22 | dict( 23 | type='MultiScaleFlipAug', 24 | img_scale=(854, 480), 25 | flip=False, 26 | transforms=[ 27 | dict(type='Resize', keep_ratio=True), 28 | dict(type='RandomFlip'), 29 | dict(type='Normalize', **img_norm_cfg), 30 | dict(type='Pad', size_divisor=32), 31 | dict(type='ImageToTensor', keys=['img', 'flow']), 32 | dict(type='Collect', keys=['img', 'flow']), 33 | ]) 34 | ] 35 | data = dict( 36 | imgs_per_gpu=2, 37 | workers_per_gpu=0, 38 | train=dict( 39 | type=dataset_type, 40 | ann_file=data_root + 'Annotations_json/DAVIS_Unsupervised_train_Annotations.json', 41 | img_prefix=data_root + 'JPEGImages_480/', 42 | flow_prefix=data_root + 'OpticalFlow_480/', 43 | pipeline=train_pipeline), 44 | val=dict( 45 | type=dataset_type, 46 | ann_file=data_root + 'Annotations_json/DAVIS_Unsupervised_val_Annotations.json', 47 | img_prefix=data_root + 'JPEGImages_480/', 48 | flow_prefix=data_root + 'OpticalFlow_480/', 49 | pipeline=test_pipeline), 50 | test=dict( 51 | type=dataset_type, 52 | ann_file=data_root + 'Annotations_json/DAVIS_Unsupervised_val_Annotations.json', 53 | img_prefix=data_root + 'JPEGImages_480/', 54 | flow_prefix=data_root + 'OpticalFlow_480/', 55 | pipeline=test_pipeline)) 56 | 57 | -------------------------------------------------------------------------------- /configs/data/idd.py: -------------------------------------------------------------------------------- 1 | dataset_type = 'MotionDataset' 2 | data_root = 'data/debug_ca_idd_data/' 3 | img_norm_cfg = dict( 4 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 5 | 6 | test_pipeline = [ 7 | dict(type='LoadImageFromFile'), 8 | dict(type='LoadFlowFromFile'), 9 | 10 | dict( 11 | type='MultiScaleFlipAug', 12 | img_scale=[(1280, 720)], 13 | flip=False, 14 | transforms=[ 15 | dict(type='Resize', keep_ratio=True), 16 | dict(type='RandomFlip'), 17 | dict(type='Normalize', **img_norm_cfg), 18 | dict(type='Pad', size_divisor=32), 19 | dict(type='ImageToTensor', keys=['img', 'flow']), 20 | dict(type='Collect', keys=['img', 'flow']), 21 | ]) 22 | ] 23 | data = dict( 24 | test=dict( 25 | type=dataset_type, 26 | ann_file=data_root + 27 | 'IDD_Test.json', 28 | img_prefix=data_root+'/JPEGImages_480/', 29 | flow_prefix=data_root+'OpticalFlow_480_0/', 30 | pipeline=test_pipeline)) 31 | 32 | -------------------------------------------------------------------------------- /configs/data/idd_annots.py: -------------------------------------------------------------------------------- 1 | from configs.data.idd import * 2 | 3 | data = dict( 4 | test=dict( 5 | type=dataset_type, 6 | ann_file=data_root + 7 | 'IDD_Annotated_Annotations.json', 8 | img_prefix=data_root+'/images/', 9 | flow_prefix=data_root+'/flow/', 10 | pipeline=test_pipeline)) 11 | 12 | -------------------------------------------------------------------------------- /configs/data/idd_supp.py: -------------------------------------------------------------------------------- 1 | from configs.data.idd import * 2 | 3 | data = dict( 4 | test=dict( 5 | type=dataset_type, 6 | ann_file=data_root + 7 | 'IDD_Test.json', 8 | img_prefix=data_root+'/images/', 9 | flow_prefix=data_root+'flow_suppressed/', 10 | pipeline=test_pipeline)) 11 | 12 | -------------------------------------------------------------------------------- /configs/data/kittimots.py: -------------------------------------------------------------------------------- 1 | 2 | dataset_type = 'KITTIMOTSDataset' 3 | data_root = 'data/kitti_mots/' 4 | img_norm_cfg = dict( 5 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 6 | train_pipeline = [ 7 | dict(type='LoadImageFromFile'), 8 | dict(type='LoadAnnotations', with_bbox=True, with_mask=True), 9 | dict(type='Resize', img_scale=[(1242, 375), (1242, 343), (1242, 311), 10 | (1242, 279), (1242, 247), (1242, 215)], 11 | multiscale_mode='value', 12 | keep_ratio=True), 13 | dict(type='RandomFlip', flip_ratio=0.5), 14 | dict(type='Normalize', **img_norm_cfg), 15 | dict(type='Pad', size_divisor=32), 16 | dict(type='DefaultFormatBundle'), 17 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']), 18 | ] 19 | test_pipeline = [ 20 | dict(type='LoadImageFromFile'), 21 | dict( 22 | type='MultiScaleFlipAug', 23 | img_scale=(1242, 375), 24 | flip=False, 25 | transforms=[ 26 | dict(type='Resize', keep_ratio=True), 27 | dict(type='RandomFlip'), 28 | dict(type='Normalize', **img_norm_cfg), 29 | dict(type='Pad', size_divisor=32), 30 | dict(type='ImageToTensor', keys=['img']), 31 | dict(type='Collect', keys=['img']), 32 | ]) 33 | ] 34 | data = dict( 35 | imgs_per_gpu=1, 36 | workers_per_gpu=0, 37 | train=dict( 38 | type=dataset_type, 39 | ann_file=data_root + 'annotations/KITTIMOTS_train_Annotations.json', 40 | img_prefix=data_root + 'images/', 41 | pipeline=train_pipeline), 42 | val=dict( 43 | type=dataset_type, 44 | ann_file=data_root + 'annotations/KITTIMOTS_val_Annotations.json', 45 | img_prefix=data_root + 'images/', 46 | pipeline=test_pipeline), 47 | test=dict( 48 | type=dataset_type, 49 | ann_file=data_root + 'annotations/KITTIMOTS_val_Annotations.json', 50 | img_prefix=data_root + 'images/', 51 | pipeline=test_pipeline)) 52 | -------------------------------------------------------------------------------- /configs/data/kittimots_motion.py: -------------------------------------------------------------------------------- 1 | dataset_type = 'MotionDataset' 2 | data_root = 'data/kittimots_moseg/' 3 | img_norm_cfg = dict( 4 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 5 | train_pipeline = [ 6 | dict(type='LoadImageFromFile'), 7 | dict(type='LoadFlowFromFile'), 8 | dict(type='LoadAnnotations', with_bbox=True, with_mask=True), 9 | dict(type='Resize', img_scale=[(1242, 375), (1242, 343), (1242, 311), 10 | (1242, 279), (1242, 247), (1242, 215)], 11 | multiscale_mode='value', 12 | keep_ratio=True), 13 | dict(type='RandomFlip', flip_ratio=0.5), 14 | dict(type='Normalize', **img_norm_cfg), 15 | dict(type='Pad', size_divisor=32), 16 | dict(type='DefaultFormatBundle'), 17 | dict(type='Collect', keys=['img', 'flow', 'gt_bboxes', 'gt_labels', 'gt_masks']), 18 | ] 19 | test_pipeline = [ 20 | dict(type='LoadImageFromFile'), 21 | dict(type='LoadFlowFromFile'), 22 | dict( 23 | type='MultiScaleFlipAug', 24 | img_scale=(1242, 375), 25 | flip=False, 26 | transforms=[ 27 | dict(type='Resize', keep_ratio=True), 28 | dict(type='RandomFlip'), 29 | dict(type='Normalize', **img_norm_cfg), 30 | dict(type='Pad', size_divisor=32), 31 | dict(type='ImageToTensor', keys=['img', 'flow']), 32 | dict(type='Collect', keys=['img', 'flow']), 33 | ]) 34 | ] 35 | data = dict( 36 | imgs_per_gpu=2, 37 | workers_per_gpu=0, 38 | train=dict( 39 | type=dataset_type, 40 | ann_file=data_root + 'annotations/KITTIMOTS_MOSeg_train.json', 41 | img_prefix=data_root + 'images/', 42 | flow_prefix=data_root + 'flow/', 43 | pipeline=train_pipeline), 44 | val=dict( 45 | type=dataset_type, 46 | ann_file=data_root + 'annotations/KITTIMOTS_MOSeg_val.json', 47 | img_prefix=data_root + 'images/', 48 | flow_prefix=data_root + 'flow/', 49 | pipeline=test_pipeline), 50 | test=dict( 51 | type=dataset_type, 52 | ann_file=data_root + 'annotations/KITTIMOTS_MOSeg_val.json', 53 | img_prefix=data_root + 'images/', 54 | flow_prefix=data_root + 'flow/', 55 | pipeline=test_pipeline)) 56 | -------------------------------------------------------------------------------- /configs/data/kittimots_motion_supp.py: -------------------------------------------------------------------------------- 1 | from configs.data.kittimots_motion import * 2 | 3 | data = dict( 4 | imgs_per_gpu=2, 5 | workers_per_gpu=0, 6 | train=dict( 7 | type=dataset_type, 8 | ann_file=data_root + 'annotations/KITTIMOTS_MOSeg_train.json', 9 | img_prefix=data_root + 'images/', 10 | flow_prefix=data_root + 'flow_suppressed/', 11 | pipeline=train_pipeline), 12 | val=dict( 13 | type=dataset_type, 14 | ann_file=data_root + 'annotations/KITTIMOTS_MOSeg_val.json', 15 | img_prefix=data_root + 'images/', 16 | flow_prefix=data_root + 'flow_suppressed/', 17 | pipeline=test_pipeline), 18 | test=dict( 19 | type=dataset_type, 20 | ann_file=data_root + 'annotations/KITTIMOTS_MOSeg_val.json', 21 | img_prefix=data_root + 'images/', 22 | flow_prefix=data_root + 'flow_suppressed/', 23 | pipeline=test_pipeline)) 24 | -------------------------------------------------------------------------------- /configs/experiments/general.py: -------------------------------------------------------------------------------- 1 | train_cfg = dict(endtoend=False, train_tracker=False, train_ca=False, 2 | train_inst_seg=False, train_panoptic=False) 3 | 4 | test_cfg = dict( 5 | nms_pre=500, 6 | score_thr=0.1, 7 | mask_thr=0.5, 8 | update_thr=0.05, 9 | kernel='gaussian', # gaussian/linear 10 | sigma=2.0, 11 | max_per_img=100) 12 | 13 | # yapf:disable 14 | log_config = dict( 15 | interval=50, 16 | hooks=[ 17 | dict(type='TextLoggerHook'), 18 | dict(type='TensorboardLoggerHook') 19 | ]) 20 | device_ids = range(4) 21 | dist_params = dict(backend='nccl') 22 | log_level = 'INFO' 23 | load_from = None 24 | resume_from = None 25 | workflow = [('train', 1)] 26 | -------------------------------------------------------------------------------- /configs/infer_cscapesvps.py: -------------------------------------------------------------------------------- 1 | from configs.models.backbone_2stream import backbone 2 | from configs.models.neck import neck 3 | from configs.models.bbox_head import set_num_classes 4 | from configs.models.ca_motion_head import set_params 5 | from configs.models.panoptic_head import panoptic_head 6 | from configs.experiments.general import * 7 | from configs.data.cscapesvps_motion_supp import data as cscapesvps_data 8 | from configs.data.kittimots_motion_supp import data as kittimots_data 9 | from configs.data.cscapesvps_motion_supp import * 10 | 11 | 12 | # model settings 13 | bbox_head = set_num_classes(num_classes=9) 14 | ca_head = set_params(num_classes=3) 15 | 16 | # model settings 17 | model = dict( 18 | type='SOLO', 19 | pretrained='torchvision://resnet50', 20 | backbone=backbone, 21 | neck=neck, 22 | panoptic_head=panoptic_head, 23 | bbox_head=bbox_head, 24 | ca_head=ca_head, 25 | ) 26 | 27 | data = dict( 28 | imgs_per_gpu=2, 29 | workers_per_gpu=0, 30 | train=[kittimots_data['train'], cscapesvps_data['train']], 31 | val=cscapesvps_data['val'], 32 | test=cscapesvps_data['test'],) 33 | 34 | # optimizer 35 | total_epochs = 15 36 | optimizer = dict(type='SGD', lr=0.005, momentum=0.9, weight_decay=0.0001) 37 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) 38 | 39 | # learning policy 40 | lr_config = dict( 41 | policy='step', 42 | warmup='linear', 43 | warmup_iters=500, 44 | warmup_ratio=1.0 / 3, 45 | step=[6, 8]) 46 | checkpoint_config = dict(interval=5) 47 | 48 | # yapf:enable 49 | work_dir = './work_dirs/ca_motion/' 50 | pretrain_weights = './trained_models/panopticseg_cscapesvps.pth' 51 | ignore_clf = False 52 | same_nclasses = True 53 | freeze_vars={'backbone.appearance_stream':True, 'neck':True, 'bbox_head':True, 'panoptic_head':True} 54 | -------------------------------------------------------------------------------- /configs/infer_cscapesvps_pq.py: -------------------------------------------------------------------------------- 1 | from configs.models.backbone_2stream import backbone 2 | from configs.models.neck import neck 3 | from configs.models.bbox_head import set_num_classes 4 | from configs.models.ca_motion_head import set_params 5 | from configs.models.panoptic_head import panoptic_head 6 | from configs.experiments.general import * 7 | from configs.data.cscapesvps_motion_supp_2048 import data as cscapesvps_data 8 | from configs.data.kittimots_motion_supp import data as kittimots_data 9 | from configs.data.cscapesvps_motion_supp_2048 import * 10 | 11 | 12 | # model settings 13 | bbox_head = set_num_classes(num_classes=9) 14 | ca_head = set_params(num_classes=3) 15 | 16 | # model settings 17 | model = dict( 18 | type='SOLO', 19 | pretrained='torchvision://resnet50', 20 | backbone=backbone, 21 | neck=neck, 22 | panoptic_head=panoptic_head, 23 | bbox_head=bbox_head, 24 | ca_head=ca_head, 25 | ) 26 | 27 | data = dict( 28 | imgs_per_gpu=2, 29 | workers_per_gpu=0, 30 | train=[kittimots_data['train'], cscapesvps_data['train']], 31 | val=cscapesvps_data['val'], 32 | test=cscapesvps_data['test'],) 33 | 34 | # optimizer 35 | total_epochs = 15 36 | optimizer = dict(type='SGD', lr=0.005, momentum=0.9, weight_decay=0.0001) 37 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) 38 | 39 | # learning policy 40 | lr_config = dict( 41 | policy='step', 42 | warmup='linear', 43 | warmup_iters=500, 44 | warmup_ratio=1.0 / 3, 45 | step=[6, 8]) 46 | checkpoint_config = dict(interval=5) 47 | 48 | # yapf:enable 49 | work_dir = './work_dirs/ca_motion/' 50 | pretrain_weights = './trained_models/panopticseg_cscapesvps.pth' 51 | ignore_clf = False 52 | same_nclasses = True 53 | freeze_vars={'backbone.appearance_stream':True, 'neck':True, 'bbox_head':True, 'panoptic_head':True} 54 | -------------------------------------------------------------------------------- /configs/infer_kittimots.py: -------------------------------------------------------------------------------- 1 | from configs.models.backbone_2stream import backbone 2 | from configs.models.neck import neck 3 | from configs.models.bbox_head import set_num_classes 4 | from configs.models.ca_motion_head import set_params 5 | from configs.models.panoptic_head import panoptic_head 6 | from configs.experiments.general import * 7 | from configs.data.cscapesvps_motion_supp import data as cscapesvps_data 8 | from configs.data.kittimots_motion_supp import data as kittimots_data 9 | from configs.data.kittimots_motion_supp import * 10 | 11 | # model settings 12 | bbox_head = set_num_classes(num_classes=9) 13 | ca_head = set_params(num_classes=3) 14 | 15 | # model settings 16 | model = dict( 17 | type='SOLO', 18 | pretrained='torchvision://resnet50', 19 | backbone=backbone, 20 | neck=neck, 21 | panoptic_head=panoptic_head, 22 | bbox_head=bbox_head, 23 | ca_head=ca_head, 24 | max_nottrack=5 25 | ) 26 | 27 | data = dict( 28 | imgs_per_gpu=2, 29 | workers_per_gpu=0, 30 | train=[kittimots_data['train'], cscapesvps_data['train']], 31 | val=kittimots_data['val'], 32 | test=kittimots_data['test'],) 33 | 34 | # optimizer 35 | total_epochs = 15 36 | optimizer = dict(type='SGD', lr=0.005, momentum=0.9, weight_decay=0.0001) 37 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) 38 | 39 | # learning policy 40 | lr_config = dict( 41 | policy='step', 42 | warmup='linear', 43 | warmup_iters=500, 44 | warmup_ratio=1.0 / 3, 45 | step=[6, 8]) 46 | checkpoint_config = dict(interval=5) 47 | 48 | # yapf:enable 49 | work_dir = './work_dirs/ca_motion/' 50 | pretrain_weights = './trained_models/panopticseg_cscapesvps.pth' 51 | ignore_clf = False 52 | same_nclasses = True 53 | freeze_vars={'backbone.appearance_stream':True, 'neck':True, 'bbox_head':True, 'panoptic_head':True} 54 | -------------------------------------------------------------------------------- /configs/misc/debug_classagnostic_loaders.py: -------------------------------------------------------------------------------- 1 | # dataset settings 2 | from configs.data.kittimots_motion_supp import data as data_kittimots_motion 3 | from configs.data.cscapesvps_motion_supp import data as data_cscapesvps_motion 4 | 5 | img_norm_cfg = dict( 6 | mean=[0.0, 0.0, 0.0], std=[1.0, 1.0, 1.0], to_rgb=True) 7 | 8 | for idx, pipeline in enumerate(data_kittimots_motion['train']['pipeline']): 9 | if pipeline['type'] == 'Normalize': 10 | data_kittimots_motion['train']['pipeline'][idx]['mean'] = img_norm_cfg['mean'] 11 | data_kittimots_motion['train']['pipeline'][idx]['std'] = img_norm_cfg['std'] 12 | data_kittimots_motion['train']['pipeline'][idx]['to_rgb'] = img_norm_cfg['to_rgb'] 13 | 14 | for idx, pipeline in enumerate(data_cscapesvps_motion['train']['pipeline']): 15 | if pipeline['type'] == 'Normalize': 16 | data_cscapesvps_motion['train']['pipeline'][idx]['mean'] = img_norm_cfg['mean'] 17 | data_cscapesvps_motion['train']['pipeline'][idx]['std'] = img_norm_cfg['std'] 18 | data_cscapesvps_motion['train']['pipeline'][idx]['to_rgb'] = img_norm_cfg['to_rgb'] 19 | 20 | data = dict( 21 | imgs_per_gpu=1, 22 | workers_per_gpu=0, 23 | train=[data_kittimots_motion['train'], data_cscapesvps_motion['train']], 24 | val=data_kittimots_motion['val'], 25 | test=data_kittimots_motion['test'],) 26 | -------------------------------------------------------------------------------- /configs/misc/visualise_mod_cscapesvps.py: -------------------------------------------------------------------------------- 1 | dataset_type = 'CityscapesVPSDataset' 2 | data_root = 'data/cityscapes_vps/' 3 | img_norm_cfg = dict( 4 | mean=[0.0, 0.0, 0.0], std=[1.0, 1.0, 1.0], to_rgb=True) 5 | train_pipeline = [ 6 | dict(type='LoadRefImageFromFile'), 7 | dict(type='LoadAnnotations', with_bbox=True, with_mask=True, 8 | with_seg=True, with_pid=True, 9 | # Cityscapes specific class mapping 10 | semantic2label={0:0, 1:1, 2:2, 3:3, 4:4, 5:5, 6:6, 7:7, 8:8, 9:9, 11 | 10:10, 11:11, 12:12, 13:13, 14:14, 15:15, 16:16, 12 | 17:17, 18:18, -1:255, 255:255},), 13 | dict(type='Resize', img_scale=[(2048, 1024)], keep_ratio=True), 14 | dict(type='RandomFlip'), 15 | dict(type='Normalize', **img_norm_cfg), 16 | dict(type='SegResizeFlipCropPadRescale', scale_factor=[1, 0.25]), 17 | dict(type='DefaultFormatBundle'), 18 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 19 | 'gt_obj_ids', 'gt_masks', 'gt_semantic_seg', 20 | 'gt_semantic_seg_Nx', 'ref_img', 'ref_bboxes', 21 | 'ref_labels', 'ref_obj_ids', 'ref_masks']), 22 | ] 23 | test_pipeline = [ 24 | dict(type='LoadRefImageFromFile'), 25 | 26 | dict( 27 | type='MultiScaleFlipAug', 28 | img_scale=[(2048, 1024)], 29 | flip=False, 30 | transforms=[ 31 | dict(type='Resize', keep_ratio=True), 32 | dict(type='RandomFlip'), 33 | dict(type='Normalize', **img_norm_cfg), 34 | dict(type='Pad', size_divisor=32), 35 | dict(type='ImageToTensor', keys=['img', 'ref_img']), 36 | dict(type='Collect', keys=['img', 'ref_img']), 37 | ]) 38 | ] 39 | data = dict( 40 | imgs_per_gpu=1, 41 | workers_per_gpu=0, 42 | train=dict( 43 | type=dataset_type, 44 | ann_file=data_root + 45 | 'instances_train_city_vps_rle.json', 46 | img_prefix=data_root + 'train/img/', 47 | ref_prefix=data_root + 'train/img/', 48 | seg_prefix=data_root + 'train/labelmap/', 49 | pipeline=train_pipeline, 50 | ref_ann_file=data_root + 51 | 'instances_train_city_vps_rle.json', 52 | offsets=[-1,+1]), 53 | val=dict( 54 | type=dataset_type, 55 | ann_file=data_root + 56 | 'instances_val_city_vps_rle.json', 57 | img_prefix=data_root + 'val/img/', 58 | pipeline=test_pipeline), 59 | test=dict( 60 | type=dataset_type, 61 | ann_file=data_root + 62 | #'im_all_info_val_city_vps.json', 63 | 'instances_val_city_vps_rle.json', 64 | #img_prefix=data_root + 'val/img_all/', 65 | img_prefix=data_root + 'val/img/', 66 | ref_prefix=data_root + 'val/img/', 67 | seg_prefix=data_root + 'val/labelmap/', 68 | #nframes_span_test=30, 69 | nframes_span_test=6, 70 | pipeline=test_pipeline)) 71 | -------------------------------------------------------------------------------- /configs/misc/visualise_mod_kittimots.py: -------------------------------------------------------------------------------- 1 | dataset_type = 'MotionDataset' 2 | data_root = 'data/kittimots_moseg/' 3 | img_norm_cfg = dict( 4 | mean=[0.0, 0.0, 0.0], std=[1.0, 1.0, 1.0], to_rgb=True) 5 | train_pipeline = [ 6 | dict(type='LoadImageFromFile'), 7 | dict(type='LoadFlowFromFile'), 8 | dict(type='LoadAnnotations', with_bbox=True, with_mask=True, 9 | with_seg=True, with_pid=True, 10 | # Cityscapes specific class mapping 11 | semantic2label={0:0, 1:1, 2:2, 3:3, 4:4, 5:5, 6:6, 7:7, 8:8, 9:9, 12 | 10:10, 11:11, 12:12, 13:13, 14:14, 15:15, 16:16, 13 | 17:17, 18:18, -1:255, 255:255},), 14 | dict(type='Resize', img_scale=[(1242, 375)], keep_ratio=True), 15 | dict(type='RandomFlip'), 16 | dict(type='Normalize', **img_norm_cfg), 17 | dict(type='SegResizeFlipCropPadRescale', scale_factor=[1, 0.25]), 18 | dict(type='DefaultFormatBundle'), 19 | dict(type='Collect', keys=['img', 'flow', 'gt_bboxes', 'gt_labels', 'gt_masks']), 20 | ] 21 | test_pipeline = [ 22 | dict(type='LoadRefImageFromFile'), 23 | 24 | dict( 25 | type='MultiScaleFlipAug', 26 | img_scale=[(1242, 375)], 27 | flip=False, 28 | transforms=[ 29 | dict(type='Resize', keep_ratio=True), 30 | dict(type='RandomFlip'), 31 | dict(type='Normalize', **img_norm_cfg), 32 | dict(type='Pad', size_divisor=32), 33 | dict(type='ImageToTensor', keys=['img', 'ref_img']), 34 | dict(type='Collect', keys=['img', 'ref_img']), 35 | ]) 36 | ] 37 | data = dict( 38 | imgs_per_gpu=2, 39 | workers_per_gpu=0, 40 | train=dict( 41 | type=dataset_type, 42 | ann_file=data_root + 'annotations/KITTIMOTS_MOSeg_train_3classes_Annotations.json', 43 | img_prefix=data_root + 'JPEGImages_480/', 44 | flow_prefix=data_root + 'OpticalFlow_480/', 45 | pipeline=train_pipeline), 46 | val=dict( 47 | type=dataset_type, 48 | ann_file=data_root + 'annotations/KITTIMOTS_MOSeg_val_3classes_Annotations.json', 49 | img_prefix=data_root + 'JPEGImages_480/', 50 | flow_prefix=data_root + 'OpticalFlow_480/', 51 | pipeline=test_pipeline), 52 | test=dict( 53 | type=dataset_type, 54 | ann_file=data_root + 'annotations/KITTIMOTS_MOSeg_val_3classes_Annotations.json', 55 | img_prefix=data_root + 'JPEGImages_480/', 56 | flow_prefix=data_root + 'OpticalFlow_480/', 57 | pipeline=test_pipeline)) 58 | -------------------------------------------------------------------------------- /configs/models/backbone_1stream.py: -------------------------------------------------------------------------------- 1 | backbone=dict( 2 | type='ResNet', 3 | depth=50, 4 | num_stages=4, 5 | out_indices=(0, 1, 2, 3), # C2, C3, C4, C5 6 | frozen_stages=1, 7 | style='pytorch') 8 | 9 | -------------------------------------------------------------------------------- /configs/models/backbone_2stream.py: -------------------------------------------------------------------------------- 1 | backbone=dict( 2 | type='TwoStreamResNet', 3 | depth=50, 4 | num_stages=4, 5 | out_indices=(0, 1, 2, 3), # C2, C3, C4, C5 6 | frozen_stages=1, 7 | style='pytorch') 8 | -------------------------------------------------------------------------------- /configs/models/backbone_2stream_tfstyle.py: -------------------------------------------------------------------------------- 1 | backbone=dict( 2 | type='TwoStreamResNetTFStyle', 3 | layers=[3, 4, 6, 3], 4 | width_multiplier=1, 5 | sk_ratio=0, 6 | out_indices=(0, 1, 2, 3), # C2, C3, C4, C5 7 | frozen_stages=3 8 | ) 9 | 10 | def set_frozen_stages(frozen_stages=1): 11 | backbone['frozen_stages'] = frozen_stages 12 | return backbone 13 | -------------------------------------------------------------------------------- /configs/models/bbox_head.py: -------------------------------------------------------------------------------- 1 | bbox_head=dict( 2 | type='DecoupledSOLOHead', 3 | num_classes=9, 4 | in_channels=256, 5 | stacked_convs=7, 6 | seg_feat_channels=256, 7 | strides=[8, 8, 16, 32, 32], 8 | scale_ranges=((1, 96), (48, 192), (96, 384), (192, 768), (384, 2048)), 9 | sigma=0.2, 10 | num_grids=[80, 72, 64, 48, 32], 11 | cate_down_pos=0, 12 | with_deform=False, 13 | loss_ins=dict( 14 | type='DiceLoss', 15 | use_sigmoid=True, 16 | loss_weight=3.0), 17 | loss_cate=dict( 18 | type='FocalLoss', 19 | use_sigmoid=True, 20 | gamma=2.0, 21 | alpha=0.25, 22 | loss_weight=1.0), 23 | ) 24 | 25 | def set_num_classes(num_classes): 26 | bbox_head['num_classes'] = num_classes 27 | return bbox_head 28 | -------------------------------------------------------------------------------- /configs/models/ca_appearance_mahalanobis_head.py: -------------------------------------------------------------------------------- 1 | ca_head=dict(type='MahalanobisAppearanceBasedClassAgnosticHead', 2 | n_convs=4, 3 | clustering_type='dbscan', 4 | norm_cfg = dict(type='GN', num_groups=32, requires_grad=True), 5 | num_classes=19, 6 | interm_channels=256, 7 | merge_fpn=True 8 | ) 9 | 10 | def set_params(num_classes, ca_label, merge_fpn=True, merge_average=True): 11 | ca_head['num_classes'] = num_classes 12 | ca_head['ca_label'] = ca_label 13 | ca_head['merge_fpn'] = merge_fpn 14 | ca_head['merge_average'] = merge_average 15 | return ca_head 16 | -------------------------------------------------------------------------------- /configs/models/ca_appearance_map.py: -------------------------------------------------------------------------------- 1 | ca_head=dict(type='MAPClassAgnosticHead', 2 | num_classes=11) 3 | -------------------------------------------------------------------------------- /configs/models/ca_motion_head.py: -------------------------------------------------------------------------------- 1 | ca_head=dict( 2 | type='DecoupledSOLOHead', 3 | num_classes=3, 4 | in_channels=256, 5 | stacked_convs=7, 6 | seg_feat_channels=256, 7 | strides=[8, 8, 16, 32, 32], 8 | scale_ranges=((1, 96), (48, 192), (96, 384), (192, 768), (384, 2048)), 9 | sigma=0.2, 10 | num_grids=[80, 72, 64, 48, 32], 11 | cate_down_pos=0, 12 | with_deform=False, 13 | loss_ins=dict( 14 | type='DiceLoss', 15 | use_sigmoid=True, 16 | loss_weight=3.0), 17 | loss_cate=dict( 18 | type='FocalLoss', 19 | use_sigmoid=True, 20 | gamma=2.0, 21 | alpha=0.25, 22 | loss_weight=1.0, 23 | cate_loss_weight=[1.0, 1.0] 24 | ) 25 | ) 26 | 27 | def set_params(num_classes, loss_weights=[1.0, 1.0]): 28 | ca_head['num_classes'] = num_classes 29 | ca_head['loss_cate']['cate_loss_weight'] = loss_weights 30 | return ca_head 31 | -------------------------------------------------------------------------------- /configs/models/neck.py: -------------------------------------------------------------------------------- 1 | neck=dict( 2 | type='FPN', 3 | in_channels=[256, 512, 1024, 2048], 4 | out_channels=256, 5 | start_level=0, 6 | num_outs=5) 7 | -------------------------------------------------------------------------------- /configs/models/panoptic_head.py: -------------------------------------------------------------------------------- 1 | panoptic_head=dict(type='SimpleSegHead', 2 | num_classes=19, 3 | in_channels=256, 4 | seg_feats_channel=256, 5 | stacked_convs=5, 6 | original_image_size=(1600, 800)) 7 | 8 | def set_params(num_classes=19): 9 | panoptic_head['num_classes'] = num_classes 10 | return panoptic_head 11 | -------------------------------------------------------------------------------- /images/VCA_Teaser.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MSiam/video_class_agnostic_segmentation/59fd84485e87c0f5895110240837b76325dde657/images/VCA_Teaser.png -------------------------------------------------------------------------------- /mmdet/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MSiam/video_class_agnostic_segmentation/59fd84485e87c0f5895110240837b76325dde657/mmdet/__init__.py -------------------------------------------------------------------------------- /mmdet/apis/__init__.py: -------------------------------------------------------------------------------- 1 | from .inference import (async_inference_detector, inference_detector, 2 | init_detector, show_result, show_result_pyplot, show_result_ins) 3 | from .train import get_root_logger, set_random_seed, train_detector 4 | 5 | __all__ = [ 6 | 'get_root_logger', 'set_random_seed', 'train_detector', 'init_detector', 7 | 'async_inference_detector', 'inference_detector', 'show_result', 8 | 'show_result_pyplot', 'show_result_ins' 9 | ] 10 | -------------------------------------------------------------------------------- /mmdet/core/__init__.py: -------------------------------------------------------------------------------- 1 | from .anchor import * # noqa: F401, F403 2 | from .bbox import * # noqa: F401, F403 3 | from .evaluation import * # noqa: F401, F403 4 | from .fp16 import * # noqa: F401, F403 5 | from .mask import * # noqa: F401, F403 6 | from .post_processing import * # noqa: F401, F403 7 | from .utils import * # noqa: F401, F403 8 | -------------------------------------------------------------------------------- /mmdet/core/anchor/__init__.py: -------------------------------------------------------------------------------- 1 | from .anchor_generator import AnchorGenerator 2 | from .anchor_target import (anchor_inside_flags, anchor_target, 3 | images_to_levels, unmap) 4 | from .guided_anchor_target import ga_loc_target, ga_shape_target 5 | from .point_generator import PointGenerator 6 | from .point_target import point_target 7 | 8 | __all__ = [ 9 | 'AnchorGenerator', 'anchor_target', 'anchor_inside_flags', 'ga_loc_target', 10 | 'ga_shape_target', 'PointGenerator', 'point_target', 'images_to_levels', 11 | 'unmap' 12 | ] 13 | -------------------------------------------------------------------------------- /mmdet/core/anchor/point_generator.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | class PointGenerator(object): 5 | 6 | def _meshgrid(self, x, y, row_major=True): 7 | xx = x.repeat(len(y)) 8 | yy = y.view(-1, 1).repeat(1, len(x)).view(-1) 9 | if row_major: 10 | return xx, yy 11 | else: 12 | return yy, xx 13 | 14 | def grid_points(self, featmap_size, stride=16, device='cuda'): 15 | feat_h, feat_w = featmap_size 16 | shift_x = torch.arange(0., feat_w, device=device) * stride 17 | shift_y = torch.arange(0., feat_h, device=device) * stride 18 | shift_xx, shift_yy = self._meshgrid(shift_x, shift_y) 19 | stride = shift_x.new_full((shift_xx.shape[0], ), stride) 20 | shifts = torch.stack([shift_xx, shift_yy, stride], dim=-1) 21 | all_points = shifts.to(device) 22 | return all_points 23 | 24 | def valid_flags(self, featmap_size, valid_size, device='cuda'): 25 | feat_h, feat_w = featmap_size 26 | valid_h, valid_w = valid_size 27 | assert valid_h <= feat_h and valid_w <= feat_w 28 | valid_x = torch.zeros(feat_w, dtype=torch.uint8, device=device) 29 | valid_y = torch.zeros(feat_h, dtype=torch.uint8, device=device) 30 | valid_x[:valid_w] = 1 31 | valid_y[:valid_h] = 1 32 | valid_xx, valid_yy = self._meshgrid(valid_x, valid_y) 33 | valid = valid_xx & valid_yy 34 | return valid 35 | -------------------------------------------------------------------------------- /mmdet/core/bbox/__init__.py: -------------------------------------------------------------------------------- 1 | from .assigners import AssignResult, BaseAssigner, MaxIoUAssigner 2 | from .bbox_target import bbox_target 3 | from .geometry import bbox_overlaps 4 | from .samplers import (BaseSampler, CombinedSampler, 5 | InstanceBalancedPosSampler, IoUBalancedNegSampler, 6 | PseudoSampler, RandomSampler, SamplingResult) 7 | from .transforms import (bbox2delta, bbox2result, bbox2roi, bbox_flip, 8 | bbox_mapping, bbox_mapping_back, delta2bbox, 9 | distance2bbox, roi2bbox) 10 | 11 | from .assign_sampling import ( # isort:skip, avoid recursive imports 12 | assign_and_sample, build_assigner, build_sampler) 13 | 14 | __all__ = [ 15 | 'bbox_overlaps', 'BaseAssigner', 'MaxIoUAssigner', 'AssignResult', 16 | 'BaseSampler', 'PseudoSampler', 'RandomSampler', 17 | 'InstanceBalancedPosSampler', 'IoUBalancedNegSampler', 'CombinedSampler', 18 | 'SamplingResult', 'build_assigner', 'build_sampler', 'assign_and_sample', 19 | 'bbox2delta', 'delta2bbox', 'bbox_flip', 'bbox_mapping', 20 | 'bbox_mapping_back', 'bbox2roi', 'roi2bbox', 'bbox2result', 21 | 'distance2bbox', 'bbox_target' 22 | ] 23 | -------------------------------------------------------------------------------- /mmdet/core/bbox/assign_sampling.py: -------------------------------------------------------------------------------- 1 | import mmcv 2 | 3 | from . import assigners, samplers 4 | 5 | 6 | def build_assigner(cfg, **kwargs): 7 | if isinstance(cfg, assigners.BaseAssigner): 8 | return cfg 9 | elif isinstance(cfg, dict): 10 | return mmcv.runner.obj_from_dict(cfg, assigners, default_args=kwargs) 11 | else: 12 | raise TypeError('Invalid type {} for building a sampler'.format( 13 | type(cfg))) 14 | 15 | 16 | def build_sampler(cfg, **kwargs): 17 | if isinstance(cfg, samplers.BaseSampler): 18 | return cfg 19 | elif isinstance(cfg, dict): 20 | return mmcv.runner.obj_from_dict(cfg, samplers, default_args=kwargs) 21 | else: 22 | raise TypeError('Invalid type {} for building a sampler'.format( 23 | type(cfg))) 24 | 25 | 26 | def assign_and_sample(bboxes, gt_bboxes, gt_bboxes_ignore, gt_labels, cfg): 27 | bbox_assigner = build_assigner(cfg.assigner) 28 | bbox_sampler = build_sampler(cfg.sampler) 29 | assign_result = bbox_assigner.assign(bboxes, gt_bboxes, gt_bboxes_ignore, 30 | gt_labels) 31 | sampling_result = bbox_sampler.sample(assign_result, bboxes, gt_bboxes, 32 | gt_labels) 33 | return assign_result, sampling_result 34 | -------------------------------------------------------------------------------- /mmdet/core/bbox/assigners/__init__.py: -------------------------------------------------------------------------------- 1 | from .approx_max_iou_assigner import ApproxMaxIoUAssigner 2 | from .assign_result import AssignResult 3 | from .atss_assigner import ATSSAssigner 4 | from .base_assigner import BaseAssigner 5 | from .max_iou_assigner import MaxIoUAssigner 6 | from .point_assigner import PointAssigner 7 | 8 | __all__ = [ 9 | 'BaseAssigner', 'MaxIoUAssigner', 'ApproxMaxIoUAssigner', 'AssignResult', 10 | 'PointAssigner', 'ATSSAssigner' 11 | ] 12 | -------------------------------------------------------------------------------- /mmdet/core/bbox/assigners/base_assigner.py: -------------------------------------------------------------------------------- 1 | from abc import ABCMeta, abstractmethod 2 | 3 | 4 | class BaseAssigner(metaclass=ABCMeta): 5 | 6 | @abstractmethod 7 | def assign(self, bboxes, gt_bboxes, gt_bboxes_ignore=None, gt_labels=None): 8 | pass 9 | -------------------------------------------------------------------------------- /mmdet/core/bbox/bbox_target.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from ..utils import multi_apply 4 | from .transforms import bbox2delta 5 | 6 | 7 | def bbox_target(pos_bboxes_list, 8 | neg_bboxes_list, 9 | pos_gt_bboxes_list, 10 | pos_gt_labels_list, 11 | cfg, 12 | reg_classes=1, 13 | target_means=[.0, .0, .0, .0], 14 | target_stds=[1.0, 1.0, 1.0, 1.0], 15 | concat=True): 16 | labels, label_weights, bbox_targets, bbox_weights = multi_apply( 17 | bbox_target_single, 18 | pos_bboxes_list, 19 | neg_bboxes_list, 20 | pos_gt_bboxes_list, 21 | pos_gt_labels_list, 22 | cfg=cfg, 23 | reg_classes=reg_classes, 24 | target_means=target_means, 25 | target_stds=target_stds) 26 | 27 | if concat: 28 | labels = torch.cat(labels, 0) 29 | label_weights = torch.cat(label_weights, 0) 30 | bbox_targets = torch.cat(bbox_targets, 0) 31 | bbox_weights = torch.cat(bbox_weights, 0) 32 | return labels, label_weights, bbox_targets, bbox_weights 33 | 34 | 35 | def bbox_target_single(pos_bboxes, 36 | neg_bboxes, 37 | pos_gt_bboxes, 38 | pos_gt_labels, 39 | cfg, 40 | reg_classes=1, 41 | target_means=[.0, .0, .0, .0], 42 | target_stds=[1.0, 1.0, 1.0, 1.0]): 43 | num_pos = pos_bboxes.size(0) 44 | num_neg = neg_bboxes.size(0) 45 | num_samples = num_pos + num_neg 46 | labels = pos_bboxes.new_zeros(num_samples, dtype=torch.long) 47 | label_weights = pos_bboxes.new_zeros(num_samples) 48 | bbox_targets = pos_bboxes.new_zeros(num_samples, 4) 49 | bbox_weights = pos_bboxes.new_zeros(num_samples, 4) 50 | if num_pos > 0: 51 | labels[:num_pos] = pos_gt_labels 52 | pos_weight = 1.0 if cfg.pos_weight <= 0 else cfg.pos_weight 53 | label_weights[:num_pos] = pos_weight 54 | pos_bbox_targets = bbox2delta(pos_bboxes, pos_gt_bboxes, target_means, 55 | target_stds) 56 | bbox_targets[:num_pos, :] = pos_bbox_targets 57 | bbox_weights[:num_pos, :] = 1 58 | if num_neg > 0: 59 | label_weights[-num_neg:] = 1.0 60 | 61 | return labels, label_weights, bbox_targets, bbox_weights 62 | 63 | 64 | def expand_target(bbox_targets, bbox_weights, labels, num_classes): 65 | bbox_targets_expand = bbox_targets.new_zeros( 66 | (bbox_targets.size(0), 4 * num_classes)) 67 | bbox_weights_expand = bbox_weights.new_zeros( 68 | (bbox_weights.size(0), 4 * num_classes)) 69 | for i in torch.nonzero(labels > 0).squeeze(-1): 70 | start, end = labels[i] * 4, (labels[i] + 1) * 4 71 | bbox_targets_expand[i, start:end] = bbox_targets[i, :] 72 | bbox_weights_expand[i, start:end] = bbox_weights[i, :] 73 | return bbox_targets_expand, bbox_weights_expand 74 | -------------------------------------------------------------------------------- /mmdet/core/bbox/demodata.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | 5 | def ensure_rng(rng=None): 6 | """ 7 | Simple version of the ``kwarray.ensure_rng`` 8 | 9 | Args: 10 | rng (int | numpy.random.RandomState | None): 11 | if None, then defaults to the global rng. Otherwise this can be an 12 | integer or a RandomState class 13 | Returns: 14 | (numpy.random.RandomState) : rng - 15 | a numpy random number generator 16 | 17 | References: 18 | https://gitlab.kitware.com/computer-vision/kwarray/blob/master/kwarray/util_random.py#L270 19 | """ 20 | 21 | if rng is None: 22 | rng = np.random.mtrand._rand 23 | elif isinstance(rng, int): 24 | rng = np.random.RandomState(rng) 25 | else: 26 | rng = rng 27 | return rng 28 | 29 | 30 | def random_boxes(num=1, scale=1, rng=None): 31 | """ 32 | Simple version of ``kwimage.Boxes.random`` 33 | 34 | Returns: 35 | Tensor: shape (n, 4) in x1, y1, x2, y2 format. 36 | 37 | References: 38 | https://gitlab.kitware.com/computer-vision/kwimage/blob/master/kwimage/structs/boxes.py#L1390 39 | 40 | Example: 41 | >>> num = 3 42 | >>> scale = 512 43 | >>> rng = 0 44 | >>> boxes = random_boxes(num, scale, rng) 45 | >>> print(boxes) 46 | tensor([[280.9925, 278.9802, 308.6148, 366.1769], 47 | [216.9113, 330.6978, 224.0446, 456.5878], 48 | [405.3632, 196.3221, 493.3953, 270.7942]]) 49 | """ 50 | rng = ensure_rng(rng) 51 | 52 | tlbr = rng.rand(num, 4).astype(np.float32) 53 | 54 | tl_x = np.minimum(tlbr[:, 0], tlbr[:, 2]) 55 | tl_y = np.minimum(tlbr[:, 1], tlbr[:, 3]) 56 | br_x = np.maximum(tlbr[:, 0], tlbr[:, 2]) 57 | br_y = np.maximum(tlbr[:, 1], tlbr[:, 3]) 58 | 59 | tlbr[:, 0] = tl_x * scale 60 | tlbr[:, 1] = tl_y * scale 61 | tlbr[:, 2] = br_x * scale 62 | tlbr[:, 3] = br_y * scale 63 | 64 | boxes = torch.from_numpy(tlbr) 65 | return boxes 66 | -------------------------------------------------------------------------------- /mmdet/core/bbox/geometry.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | def bbox_overlaps(bboxes1, bboxes2, mode='iou', is_aligned=False): 5 | """Calculate overlap between two set of bboxes. 6 | 7 | If ``is_aligned`` is ``False``, then calculate the ious between each bbox 8 | of bboxes1 and bboxes2, otherwise the ious between each aligned pair of 9 | bboxes1 and bboxes2. 10 | 11 | Args: 12 | bboxes1 (Tensor): shape (m, 4) in format. 13 | bboxes2 (Tensor): shape (n, 4) in format. 14 | If is_aligned is ``True``, then m and n must be equal. 15 | mode (str): "iou" (intersection over union) or iof (intersection over 16 | foreground). 17 | 18 | Returns: 19 | ious(Tensor): shape (m, n) if is_aligned == False else shape (m, 1) 20 | 21 | Example: 22 | >>> bboxes1 = torch.FloatTensor([ 23 | >>> [0, 0, 10, 10], 24 | >>> [10, 10, 20, 20], 25 | >>> [32, 32, 38, 42], 26 | >>> ]) 27 | >>> bboxes2 = torch.FloatTensor([ 28 | >>> [0, 0, 10, 20], 29 | >>> [0, 10, 10, 19], 30 | >>> [10, 10, 20, 20], 31 | >>> ]) 32 | >>> bbox_overlaps(bboxes1, bboxes2) 33 | tensor([[0.5238, 0.0500, 0.0041], 34 | [0.0323, 0.0452, 1.0000], 35 | [0.0000, 0.0000, 0.0000]]) 36 | 37 | Example: 38 | >>> empty = torch.FloatTensor([]) 39 | >>> nonempty = torch.FloatTensor([ 40 | >>> [0, 0, 10, 9], 41 | >>> ]) 42 | >>> assert tuple(bbox_overlaps(empty, nonempty).shape) == (0, 1) 43 | >>> assert tuple(bbox_overlaps(nonempty, empty).shape) == (1, 0) 44 | >>> assert tuple(bbox_overlaps(empty, empty).shape) == (0, 0) 45 | """ 46 | 47 | assert mode in ['iou', 'iof'] 48 | 49 | rows = bboxes1.size(0) 50 | cols = bboxes2.size(0) 51 | if is_aligned: 52 | assert rows == cols 53 | 54 | if rows * cols == 0: 55 | return bboxes1.new(rows, 1) if is_aligned else bboxes1.new(rows, cols) 56 | 57 | if is_aligned: 58 | lt = torch.max(bboxes1[:, :2], bboxes2[:, :2]) # [rows, 2] 59 | rb = torch.min(bboxes1[:, 2:], bboxes2[:, 2:]) # [rows, 2] 60 | 61 | wh = (rb - lt + 1).clamp(min=0) # [rows, 2] 62 | overlap = wh[:, 0] * wh[:, 1] 63 | area1 = (bboxes1[:, 2] - bboxes1[:, 0] + 1) * ( 64 | bboxes1[:, 3] - bboxes1[:, 1] + 1) 65 | 66 | if mode == 'iou': 67 | area2 = (bboxes2[:, 2] - bboxes2[:, 0] + 1) * ( 68 | bboxes2[:, 3] - bboxes2[:, 1] + 1) 69 | ious = overlap / (area1 + area2 - overlap) 70 | else: 71 | ious = overlap / area1 72 | else: 73 | lt = torch.max(bboxes1[:, None, :2], bboxes2[:, :2]) # [rows, cols, 2] 74 | rb = torch.min(bboxes1[:, None, 2:], bboxes2[:, 2:]) # [rows, cols, 2] 75 | 76 | wh = (rb - lt + 1).clamp(min=0) # [rows, cols, 2] 77 | overlap = wh[:, :, 0] * wh[:, :, 1] 78 | area1 = (bboxes1[:, 2] - bboxes1[:, 0] + 1) * ( 79 | bboxes1[:, 3] - bboxes1[:, 1] + 1) 80 | 81 | if mode == 'iou': 82 | area2 = (bboxes2[:, 2] - bboxes2[:, 0] + 1) * ( 83 | bboxes2[:, 3] - bboxes2[:, 1] + 1) 84 | ious = overlap / (area1[:, None] + area2 - overlap) 85 | else: 86 | ious = overlap / (area1[:, None]) 87 | 88 | return ious 89 | -------------------------------------------------------------------------------- /mmdet/core/bbox/samplers/__init__.py: -------------------------------------------------------------------------------- 1 | from .base_sampler import BaseSampler 2 | from .combined_sampler import CombinedSampler 3 | from .instance_balanced_pos_sampler import InstanceBalancedPosSampler 4 | from .iou_balanced_neg_sampler import IoUBalancedNegSampler 5 | from .ohem_sampler import OHEMSampler 6 | from .pseudo_sampler import PseudoSampler 7 | from .random_sampler import RandomSampler 8 | from .sampling_result import SamplingResult 9 | 10 | __all__ = [ 11 | 'BaseSampler', 'PseudoSampler', 'RandomSampler', 12 | 'InstanceBalancedPosSampler', 'IoUBalancedNegSampler', 'CombinedSampler', 13 | 'OHEMSampler', 'SamplingResult' 14 | ] 15 | -------------------------------------------------------------------------------- /mmdet/core/bbox/samplers/combined_sampler.py: -------------------------------------------------------------------------------- 1 | from ..assign_sampling import build_sampler 2 | from .base_sampler import BaseSampler 3 | 4 | 5 | class CombinedSampler(BaseSampler): 6 | 7 | def __init__(self, pos_sampler, neg_sampler, **kwargs): 8 | super(CombinedSampler, self).__init__(**kwargs) 9 | self.pos_sampler = build_sampler(pos_sampler, **kwargs) 10 | self.neg_sampler = build_sampler(neg_sampler, **kwargs) 11 | 12 | def _sample_pos(self, **kwargs): 13 | raise NotImplementedError 14 | 15 | def _sample_neg(self, **kwargs): 16 | raise NotImplementedError 17 | -------------------------------------------------------------------------------- /mmdet/core/bbox/samplers/instance_balanced_pos_sampler.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | from .random_sampler import RandomSampler 5 | 6 | 7 | class InstanceBalancedPosSampler(RandomSampler): 8 | 9 | def _sample_pos(self, assign_result, num_expected, **kwargs): 10 | pos_inds = torch.nonzero(assign_result.gt_inds > 0) 11 | if pos_inds.numel() != 0: 12 | pos_inds = pos_inds.squeeze(1) 13 | if pos_inds.numel() <= num_expected: 14 | return pos_inds 15 | else: 16 | unique_gt_inds = assign_result.gt_inds[pos_inds].unique() 17 | num_gts = len(unique_gt_inds) 18 | num_per_gt = int(round(num_expected / float(num_gts)) + 1) 19 | sampled_inds = [] 20 | for i in unique_gt_inds: 21 | inds = torch.nonzero(assign_result.gt_inds == i.item()) 22 | if inds.numel() != 0: 23 | inds = inds.squeeze(1) 24 | else: 25 | continue 26 | if len(inds) > num_per_gt: 27 | inds = self.random_choice(inds, num_per_gt) 28 | sampled_inds.append(inds) 29 | sampled_inds = torch.cat(sampled_inds) 30 | if len(sampled_inds) < num_expected: 31 | num_extra = num_expected - len(sampled_inds) 32 | extra_inds = np.array( 33 | list(set(pos_inds.cpu()) - set(sampled_inds.cpu()))) 34 | if len(extra_inds) > num_extra: 35 | extra_inds = self.random_choice(extra_inds, num_extra) 36 | extra_inds = torch.from_numpy(extra_inds).to( 37 | assign_result.gt_inds.device).long() 38 | sampled_inds = torch.cat([sampled_inds, extra_inds]) 39 | elif len(sampled_inds) > num_expected: 40 | sampled_inds = self.random_choice(sampled_inds, num_expected) 41 | return sampled_inds 42 | -------------------------------------------------------------------------------- /mmdet/core/bbox/samplers/ohem_sampler.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from ..transforms import bbox2roi 4 | from .base_sampler import BaseSampler 5 | 6 | 7 | class OHEMSampler(BaseSampler): 8 | """ 9 | Online Hard Example Mining Sampler described in [1]_. 10 | 11 | References: 12 | .. [1] https://arxiv.org/pdf/1604.03540.pdf 13 | """ 14 | 15 | def __init__(self, 16 | num, 17 | pos_fraction, 18 | context, 19 | neg_pos_ub=-1, 20 | add_gt_as_proposals=True, 21 | **kwargs): 22 | super(OHEMSampler, self).__init__(num, pos_fraction, neg_pos_ub, 23 | add_gt_as_proposals) 24 | if not hasattr(context, 'num_stages'): 25 | self.bbox_roi_extractor = context.bbox_roi_extractor 26 | self.bbox_head = context.bbox_head 27 | else: 28 | self.bbox_roi_extractor = context.bbox_roi_extractor[ 29 | context.current_stage] 30 | self.bbox_head = context.bbox_head[context.current_stage] 31 | 32 | def hard_mining(self, inds, num_expected, bboxes, labels, feats): 33 | with torch.no_grad(): 34 | rois = bbox2roi([bboxes]) 35 | bbox_feats = self.bbox_roi_extractor( 36 | feats[:self.bbox_roi_extractor.num_inputs], rois) 37 | cls_score, _ = self.bbox_head(bbox_feats) 38 | loss = self.bbox_head.loss( 39 | cls_score=cls_score, 40 | bbox_pred=None, 41 | labels=labels, 42 | label_weights=cls_score.new_ones(cls_score.size(0)), 43 | bbox_targets=None, 44 | bbox_weights=None, 45 | reduction_override='none')['loss_cls'] 46 | _, topk_loss_inds = loss.topk(num_expected) 47 | return inds[topk_loss_inds] 48 | 49 | def _sample_pos(self, 50 | assign_result, 51 | num_expected, 52 | bboxes=None, 53 | feats=None, 54 | **kwargs): 55 | # Sample some hard positive samples 56 | pos_inds = torch.nonzero(assign_result.gt_inds > 0) 57 | if pos_inds.numel() != 0: 58 | pos_inds = pos_inds.squeeze(1) 59 | if pos_inds.numel() <= num_expected: 60 | return pos_inds 61 | else: 62 | return self.hard_mining(pos_inds, num_expected, bboxes[pos_inds], 63 | assign_result.labels[pos_inds], feats) 64 | 65 | def _sample_neg(self, 66 | assign_result, 67 | num_expected, 68 | bboxes=None, 69 | feats=None, 70 | **kwargs): 71 | # Sample some hard negative samples 72 | neg_inds = torch.nonzero(assign_result.gt_inds == 0) 73 | if neg_inds.numel() != 0: 74 | neg_inds = neg_inds.squeeze(1) 75 | if len(neg_inds) <= num_expected: 76 | return neg_inds 77 | else: 78 | return self.hard_mining(neg_inds, num_expected, bboxes[neg_inds], 79 | assign_result.labels[neg_inds], feats) 80 | -------------------------------------------------------------------------------- /mmdet/core/bbox/samplers/pseudo_sampler.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from .base_sampler import BaseSampler 4 | from .sampling_result import SamplingResult 5 | 6 | 7 | class PseudoSampler(BaseSampler): 8 | 9 | def __init__(self, **kwargs): 10 | pass 11 | 12 | def _sample_pos(self, **kwargs): 13 | raise NotImplementedError 14 | 15 | def _sample_neg(self, **kwargs): 16 | raise NotImplementedError 17 | 18 | def sample(self, assign_result, bboxes, gt_bboxes, **kwargs): 19 | pos_inds = torch.nonzero( 20 | assign_result.gt_inds > 0).squeeze(-1).unique() 21 | neg_inds = torch.nonzero( 22 | assign_result.gt_inds == 0).squeeze(-1).unique() 23 | gt_flags = bboxes.new_zeros(bboxes.shape[0], dtype=torch.uint8) 24 | sampling_result = SamplingResult(pos_inds, neg_inds, bboxes, gt_bboxes, 25 | assign_result, gt_flags) 26 | return sampling_result 27 | -------------------------------------------------------------------------------- /mmdet/core/bbox/samplers/random_sampler.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | from .base_sampler import BaseSampler 5 | 6 | 7 | class RandomSampler(BaseSampler): 8 | 9 | def __init__(self, 10 | num, 11 | pos_fraction, 12 | neg_pos_ub=-1, 13 | add_gt_as_proposals=True, 14 | **kwargs): 15 | from mmdet.core.bbox import demodata 16 | super(RandomSampler, self).__init__(num, pos_fraction, neg_pos_ub, 17 | add_gt_as_proposals) 18 | self.rng = demodata.ensure_rng(kwargs.get('rng', None)) 19 | 20 | def random_choice(self, gallery, num): 21 | """Random select some elements from the gallery. 22 | 23 | It seems that Pytorch's implementation is slower than numpy so we use 24 | numpy to randperm the indices. 25 | """ 26 | assert len(gallery) >= num 27 | if isinstance(gallery, list): 28 | gallery = np.array(gallery) 29 | cands = np.arange(len(gallery)) 30 | self.rng.shuffle(cands) 31 | rand_inds = cands[:num] 32 | if not isinstance(gallery, np.ndarray): 33 | rand_inds = torch.from_numpy(rand_inds).long().to(gallery.device) 34 | return gallery[rand_inds] 35 | 36 | def _sample_pos(self, assign_result, num_expected, **kwargs): 37 | """Randomly sample some positive samples.""" 38 | pos_inds = torch.nonzero(assign_result.gt_inds > 0) 39 | if pos_inds.numel() != 0: 40 | pos_inds = pos_inds.squeeze(1) 41 | if pos_inds.numel() <= num_expected: 42 | return pos_inds 43 | else: 44 | return self.random_choice(pos_inds, num_expected) 45 | 46 | def _sample_neg(self, assign_result, num_expected, **kwargs): 47 | """Randomly sample some negative samples.""" 48 | neg_inds = torch.nonzero(assign_result.gt_inds == 0) 49 | if neg_inds.numel() != 0: 50 | neg_inds = neg_inds.squeeze(1) 51 | if len(neg_inds) <= num_expected: 52 | return neg_inds 53 | else: 54 | return self.random_choice(neg_inds, num_expected) 55 | -------------------------------------------------------------------------------- /mmdet/core/evaluation/__init__.py: -------------------------------------------------------------------------------- 1 | from .class_names import (coco_classes, dataset_aliases, get_classes, 2 | imagenet_det_classes, imagenet_vid_classes, 3 | voc_classes) 4 | from .coco_utils import coco_eval, fast_eval_recall, results2json, results2json_segm 5 | from .eval_hooks import (CocoDistEvalmAPHook, CocoDistEvalRecallHook, 6 | DistEvalHook, DistEvalmAPHook) 7 | from .mean_ap import average_precision, eval_map, print_map_summary 8 | from .recall import (eval_recalls, plot_iou_recall, plot_num_recall, 9 | print_recall_summary) 10 | 11 | __all__ = [ 12 | 'voc_classes', 'imagenet_det_classes', 'imagenet_vid_classes', 13 | 'coco_classes', 'dataset_aliases', 'get_classes', 'coco_eval', 14 | 'fast_eval_recall', 'results2json', 'DistEvalHook', 'DistEvalmAPHook', 15 | 'CocoDistEvalRecallHook', 'CocoDistEvalmAPHook', 'average_precision', 16 | 'eval_map', 'print_map_summary', 'eval_recalls', 'print_recall_summary', 17 | 'plot_num_recall', 'plot_iou_recall', 'results2json_segm' 18 | ] 19 | -------------------------------------------------------------------------------- /mmdet/core/evaluation/bbox_overlaps.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def bbox_overlaps(bboxes1, bboxes2, mode='iou'): 5 | """Calculate the ious between each bbox of bboxes1 and bboxes2. 6 | 7 | Args: 8 | bboxes1(ndarray): shape (n, 4) 9 | bboxes2(ndarray): shape (k, 4) 10 | mode(str): iou (intersection over union) or iof (intersection 11 | over foreground) 12 | 13 | Returns: 14 | ious(ndarray): shape (n, k) 15 | """ 16 | 17 | assert mode in ['iou', 'iof'] 18 | 19 | bboxes1 = bboxes1.astype(np.float32) 20 | bboxes2 = bboxes2.astype(np.float32) 21 | rows = bboxes1.shape[0] 22 | cols = bboxes2.shape[0] 23 | ious = np.zeros((rows, cols), dtype=np.float32) 24 | if rows * cols == 0: 25 | return ious 26 | exchange = False 27 | if bboxes1.shape[0] > bboxes2.shape[0]: 28 | bboxes1, bboxes2 = bboxes2, bboxes1 29 | ious = np.zeros((cols, rows), dtype=np.float32) 30 | exchange = True 31 | area1 = (bboxes1[:, 2] - bboxes1[:, 0] + 1) * ( 32 | bboxes1[:, 3] - bboxes1[:, 1] + 1) 33 | area2 = (bboxes2[:, 2] - bboxes2[:, 0] + 1) * ( 34 | bboxes2[:, 3] - bboxes2[:, 1] + 1) 35 | for i in range(bboxes1.shape[0]): 36 | x_start = np.maximum(bboxes1[i, 0], bboxes2[:, 0]) 37 | y_start = np.maximum(bboxes1[i, 1], bboxes2[:, 1]) 38 | x_end = np.minimum(bboxes1[i, 2], bboxes2[:, 2]) 39 | y_end = np.minimum(bboxes1[i, 3], bboxes2[:, 3]) 40 | overlap = np.maximum(x_end - x_start + 1, 0) * np.maximum( 41 | y_end - y_start + 1, 0) 42 | if mode == 'iou': 43 | union = area1[i] + area2 - overlap 44 | else: 45 | union = area1[i] if not exchange else area2 46 | ious[i, :] = overlap / union 47 | if exchange: 48 | ious = ious.T 49 | return ious 50 | -------------------------------------------------------------------------------- /mmdet/core/fp16/__init__.py: -------------------------------------------------------------------------------- 1 | from .decorators import auto_fp16, force_fp32 2 | from .hooks import Fp16OptimizerHook, wrap_fp16_model 3 | 4 | __all__ = ['auto_fp16', 'force_fp32', 'Fp16OptimizerHook', 'wrap_fp16_model'] 5 | -------------------------------------------------------------------------------- /mmdet/core/fp16/utils.py: -------------------------------------------------------------------------------- 1 | from collections import abc 2 | 3 | import numpy as np 4 | import torch 5 | 6 | 7 | def cast_tensor_type(inputs, src_type, dst_type): 8 | if isinstance(inputs, torch.Tensor): 9 | return inputs.to(dst_type) 10 | elif isinstance(inputs, str): 11 | return inputs 12 | elif isinstance(inputs, np.ndarray): 13 | return inputs 14 | elif isinstance(inputs, abc.Mapping): 15 | return type(inputs)({ 16 | k: cast_tensor_type(v, src_type, dst_type) 17 | for k, v in inputs.items() 18 | }) 19 | elif isinstance(inputs, abc.Iterable): 20 | return type(inputs)( 21 | cast_tensor_type(item, src_type, dst_type) for item in inputs) 22 | else: 23 | return inputs 24 | -------------------------------------------------------------------------------- /mmdet/core/mask/__init__.py: -------------------------------------------------------------------------------- 1 | from .mask_target import mask_target 2 | from .utils import split_combined_polys 3 | 4 | __all__ = ['split_combined_polys', 'mask_target'] 5 | -------------------------------------------------------------------------------- /mmdet/core/mask/mask_target.py: -------------------------------------------------------------------------------- 1 | import mmcv 2 | import numpy as np 3 | import torch 4 | from torch.nn.modules.utils import _pair 5 | 6 | 7 | def mask_target(pos_proposals_list, pos_assigned_gt_inds_list, gt_masks_list, 8 | cfg): 9 | cfg_list = [cfg for _ in range(len(pos_proposals_list))] 10 | mask_targets = map(mask_target_single, pos_proposals_list, 11 | pos_assigned_gt_inds_list, gt_masks_list, cfg_list) 12 | mask_targets = torch.cat(list(mask_targets)) 13 | return mask_targets 14 | 15 | 16 | def mask_target_single(pos_proposals, pos_assigned_gt_inds, gt_masks, cfg): 17 | mask_size = _pair(cfg.mask_size) 18 | num_pos = pos_proposals.size(0) 19 | mask_targets = [] 20 | if num_pos > 0: 21 | proposals_np = pos_proposals.cpu().numpy() 22 | _, maxh, maxw = gt_masks.shape 23 | proposals_np[:, [0, 2]] = np.clip(proposals_np[:, [0, 2]], 0, maxw - 1) 24 | proposals_np[:, [1, 3]] = np.clip(proposals_np[:, [1, 3]], 0, maxh - 1) 25 | pos_assigned_gt_inds = pos_assigned_gt_inds.cpu().numpy() 26 | for i in range(num_pos): 27 | gt_mask = gt_masks[pos_assigned_gt_inds[i]] 28 | bbox = proposals_np[i, :].astype(np.int32) 29 | x1, y1, x2, y2 = bbox 30 | w = np.maximum(x2 - x1 + 1, 1) 31 | h = np.maximum(y2 - y1 + 1, 1) 32 | # mask is uint8 both before and after resizing 33 | # mask_size (h, w) to (w, h) 34 | target = mmcv.imresize(gt_mask[y1:y1 + h, x1:x1 + w], 35 | mask_size[::-1]) 36 | mask_targets.append(target) 37 | mask_targets = torch.from_numpy(np.stack(mask_targets)).float().to( 38 | pos_proposals.device) 39 | else: 40 | mask_targets = pos_proposals.new_zeros((0, ) + mask_size) 41 | return mask_targets 42 | -------------------------------------------------------------------------------- /mmdet/core/mask/utils.py: -------------------------------------------------------------------------------- 1 | import mmcv 2 | 3 | 4 | def split_combined_polys(polys, poly_lens, polys_per_mask): 5 | """Split the combined 1-D polys into masks. 6 | 7 | A mask is represented as a list of polys, and a poly is represented as 8 | a 1-D array. In dataset, all masks are concatenated into a single 1-D 9 | tensor. Here we need to split the tensor into original representations. 10 | 11 | Args: 12 | polys (list): a list (length = image num) of 1-D tensors 13 | poly_lens (list): a list (length = image num) of poly length 14 | polys_per_mask (list): a list (length = image num) of poly number 15 | of each mask 16 | 17 | Returns: 18 | list: a list (length = image num) of list (length = mask num) of 19 | list (length = poly num) of numpy array 20 | """ 21 | mask_polys_list = [] 22 | for img_id in range(len(polys)): 23 | polys_single = polys[img_id] 24 | polys_lens_single = poly_lens[img_id].tolist() 25 | polys_per_mask_single = polys_per_mask[img_id].tolist() 26 | 27 | split_polys = mmcv.slice_list(polys_single, polys_lens_single) 28 | mask_polys = mmcv.slice_list(split_polys, polys_per_mask_single) 29 | mask_polys_list.append(mask_polys) 30 | return mask_polys_list 31 | -------------------------------------------------------------------------------- /mmdet/core/post_processing/__init__.py: -------------------------------------------------------------------------------- 1 | from .bbox_nms import multiclass_nms 2 | from .matrix_nms import matrix_nms 3 | from .merge_augs import (merge_aug_bboxes, merge_aug_masks, 4 | merge_aug_proposals, merge_aug_scores) 5 | 6 | __all__ = [ 7 | 'multiclass_nms', 'merge_aug_proposals', 'merge_aug_bboxes', 8 | 'merge_aug_scores', 'merge_aug_masks', 'matrix_nms' 9 | ] 10 | -------------------------------------------------------------------------------- /mmdet/core/post_processing/bbox_nms.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from mmdet.ops.nms import nms_wrapper 4 | 5 | 6 | def multiclass_nms(multi_bboxes, 7 | multi_scores, 8 | score_thr, 9 | nms_cfg, 10 | max_num=-1, 11 | score_factors=None): 12 | """NMS for multi-class bboxes. 13 | 14 | Args: 15 | multi_bboxes (Tensor): shape (n, #class*4) or (n, 4) 16 | multi_scores (Tensor): shape (n, #class), where the 0th column 17 | contains scores of the background class, but this will be ignored. 18 | score_thr (float): bbox threshold, bboxes with scores lower than it 19 | will not be considered. 20 | nms_thr (float): NMS IoU threshold 21 | max_num (int): if there are more than max_num bboxes after NMS, 22 | only top max_num will be kept. 23 | score_factors (Tensor): The factors multiplied to scores before 24 | applying NMS 25 | 26 | Returns: 27 | tuple: (bboxes, labels), tensors of shape (k, 5) and (k, 1). Labels 28 | are 0-based. 29 | """ 30 | num_classes = multi_scores.shape[1] 31 | bboxes, labels = [], [] 32 | nms_cfg_ = nms_cfg.copy() 33 | nms_type = nms_cfg_.pop('type', 'nms') 34 | nms_op = getattr(nms_wrapper, nms_type) 35 | for i in range(1, num_classes): 36 | cls_inds = multi_scores[:, i] > score_thr 37 | if not cls_inds.any(): 38 | continue 39 | # get bboxes and scores of this class 40 | if multi_bboxes.shape[1] == 4: 41 | _bboxes = multi_bboxes[cls_inds, :] 42 | else: 43 | _bboxes = multi_bboxes[cls_inds, i * 4:(i + 1) * 4] 44 | _scores = multi_scores[cls_inds, i] 45 | if score_factors is not None: 46 | _scores *= score_factors[cls_inds] 47 | cls_dets = torch.cat([_bboxes, _scores[:, None]], dim=1) 48 | cls_dets, _ = nms_op(cls_dets, **nms_cfg_) 49 | cls_labels = multi_bboxes.new_full((cls_dets.shape[0], ), 50 | i - 1, 51 | dtype=torch.long) 52 | bboxes.append(cls_dets) 53 | labels.append(cls_labels) 54 | if bboxes: 55 | bboxes = torch.cat(bboxes) 56 | labels = torch.cat(labels) 57 | if bboxes.shape[0] > max_num: 58 | _, inds = bboxes[:, -1].sort(descending=True) 59 | inds = inds[:max_num] 60 | bboxes = bboxes[inds] 61 | labels = labels[inds] 62 | else: 63 | bboxes = multi_bboxes.new_zeros((0, 5)) 64 | labels = multi_bboxes.new_zeros((0, ), dtype=torch.long) 65 | 66 | return bboxes, labels 67 | -------------------------------------------------------------------------------- /mmdet/core/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .dist_utils import DistOptimizerHook, allreduce_grads 2 | from .misc import multi_apply, tensor2imgs, unmap, \ 3 | partial_load, masked_avg_pool, \ 4 | freeze_model_partially, vis_seg, \ 5 | compute_mask_ious, compute_box_ious, \ 6 | convert_and_load_checkpoint, process_gt_masks, process_seg_masks, \ 7 | compute_gaussian, compute_ood_scores 8 | from .colormap import get_color_map 9 | 10 | __all__ = [ 11 | 'allreduce_grads', 'DistOptimizerHook', 'tensor2imgs', 'unmap', 12 | 'multi_apply', 'partial_load', 'masked_avg_pool', 'freeze_model_partially', 13 | 'vis_seg', 'compute_mask_ious', 'compute_box_ious', 'convert_and_load_checkpoint', 14 | 'get_color_map', 'process_gt_masks', 'process_seg_masks', 'compute_gaussian', 15 | 'compute_ood_scores' 16 | ] 17 | -------------------------------------------------------------------------------- /mmdet/core/utils/dist_utils.py: -------------------------------------------------------------------------------- 1 | from collections import OrderedDict 2 | 3 | import torch.distributed as dist 4 | from mmcv.runner import OptimizerHook 5 | from torch._utils import (_flatten_dense_tensors, _take_tensors, 6 | _unflatten_dense_tensors) 7 | 8 | 9 | def _allreduce_coalesced(tensors, world_size, bucket_size_mb=-1): 10 | if bucket_size_mb > 0: 11 | bucket_size_bytes = bucket_size_mb * 1024 * 1024 12 | buckets = _take_tensors(tensors, bucket_size_bytes) 13 | else: 14 | buckets = OrderedDict() 15 | for tensor in tensors: 16 | tp = tensor.type() 17 | if tp not in buckets: 18 | buckets[tp] = [] 19 | buckets[tp].append(tensor) 20 | buckets = buckets.values() 21 | 22 | for bucket in buckets: 23 | flat_tensors = _flatten_dense_tensors(bucket) 24 | dist.all_reduce(flat_tensors) 25 | flat_tensors.div_(world_size) 26 | for tensor, synced in zip( 27 | bucket, _unflatten_dense_tensors(flat_tensors, bucket)): 28 | tensor.copy_(synced) 29 | 30 | 31 | def allreduce_grads(params, coalesce=True, bucket_size_mb=-1): 32 | grads = [ 33 | param.grad.data for param in params 34 | if param.requires_grad and param.grad is not None 35 | ] 36 | world_size = dist.get_world_size() 37 | if coalesce: 38 | _allreduce_coalesced(grads, world_size, bucket_size_mb) 39 | else: 40 | for tensor in grads: 41 | dist.all_reduce(tensor.div_(world_size)) 42 | 43 | 44 | class DistOptimizerHook(OptimizerHook): 45 | 46 | def __init__(self, grad_clip=None, coalesce=True, bucket_size_mb=-1): 47 | self.grad_clip = grad_clip 48 | self.coalesce = coalesce 49 | self.bucket_size_mb = bucket_size_mb 50 | 51 | def after_train_iter(self, runner): 52 | runner.optimizer.zero_grad() 53 | runner.outputs['loss'].backward() 54 | allreduce_grads(runner.model.parameters(), self.coalesce, 55 | self.bucket_size_mb) 56 | if self.grad_clip is not None: 57 | self.clip_grads(runner.model.parameters()) 58 | runner.optimizer.step() 59 | -------------------------------------------------------------------------------- /mmdet/core/utils/map.py: -------------------------------------------------------------------------------- 1 | import torch.nn.functional as F 2 | 3 | def compute_map(mask, x): 4 | h, w = x.shape[-2:] 5 | if mask.ndim < 4: 6 | mask = mask.unsqueeze(1) 7 | 8 | masked_embedding = mask * x.unsqueeze(0) 9 | area = F.avg_pool2d(mask, x.shape[-2:]) * h * w + 0.0005 10 | map_embedding = F.avg_pool2d(input=masked_embedding, kernel_size=x.shape[-2:]) * h * w / area 11 | map_embedding = map_embedding.squeeze() 12 | if map_embedding.ndim < 2: 13 | map_embedding = map_embedding.unsqueeze(0) 14 | return map_embedding 15 | 16 | -------------------------------------------------------------------------------- /mmdet/core/utils/post_proc_utils.py: -------------------------------------------------------------------------------- 1 | import torch.nn.functional as F 2 | 3 | def post_process_seg_result(seg_result, img, train=False): 4 | score_thr = 0.3 5 | empty = True 6 | 7 | for i in range(len(seg_result)): 8 | if seg_result[i] is None or 'scores' not in seg_result[i]: # Happens when only embeddings are there 9 | continue 10 | vis_inds = seg_result[i]['scores'] > score_thr 11 | seg_result[i]['masks'] = seg_result[i]['masks'][vis_inds] 12 | seg_result[i]['labels'] = seg_result[i]['labels'][vis_inds] 13 | seg_result[i]['scores'] = seg_result[i]['scores'][vis_inds] 14 | 15 | if seg_result[i]['masks'].shape[0] != 0: 16 | empty = False 17 | if train: 18 | seg_result[i]['masks'] = F.interpolate(seg_result[i]['masks'].unsqueeze(0).float(), \ 19 | img[i].shape[-2:], mode='nearest').squeeze() 20 | if len(seg_result[i]['masks'].shape) < 3: 21 | seg_result[i]['masks'] = seg_result[i]['masks'].unsqueeze(0) 22 | 23 | return seg_result, empty 24 | 25 | def process_bbox_outputs(outs, bbox_head, img_meta, rescale, pred_semantic_seg=None, cfg=None): 26 | if 'eval_tensors' in outs: # BBox or Ca Head is using decoupled SOLO 27 | seg_inputs = outs['eval_tensors'] 28 | seg_inputs.update({'img_metas': img_meta, 'cfg': cfg, 'rescale': rescale}) 29 | seg_result = bbox_head.get_seg(**seg_inputs) 30 | else: 31 | seg_inputs = {'class_agnostic_embeddings': outs['class_agnostic_embeddings'], 32 | 'merged_fpn_embeddings': outs['merged_fpn_embeddings'], 33 | 'pred_semantic_seg': pred_semantic_seg} 34 | seg_result = bbox_head.get_seg(**seg_inputs) 35 | 36 | extra_keys = ['class_agnostic_embeddings', 'merged_fpn_embeddings'] 37 | for key in extra_keys: 38 | if key in outs: 39 | seg_result[0][key] = outs[key] 40 | return seg_result 41 | -------------------------------------------------------------------------------- /mmdet/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | from .builder import build_dataset 2 | from .cityscapes import CityscapesDataset 3 | from .coco import CocoDataset 4 | from .custom import CustomDataset 5 | from .dataset_wrappers import ConcatDataset, RepeatDataset 6 | from .loader import DistributedGroupSampler, GroupSampler, build_dataloader 7 | from .registry import DATASETS 8 | from .voc import VOCDataset 9 | from .wider_face import WIDERFaceDataset 10 | from .xml_style import XMLDataset 11 | from .kittimots import KITTIMOTSDataset 12 | from .motion_dataset import MotionDataset 13 | from .cityscapes_vps import CityscapesVPSDataset 14 | from .cityscapes_ps import CityscapesPanopticDataset 15 | from .cityscapes_vps_segonly import CityscapesVPSSegDataset 16 | 17 | __all__ = [ 18 | 'CustomDataset', 'XMLDataset', 'CocoDataset', 'VOCDataset', 'CityscapesVPSDataset', 19 | 'CityscapesDataset', 'GroupSampler', 'DistributedGroupSampler', 20 | 'build_dataloader', 'ConcatDataset', 'RepeatDataset', 'WIDERFaceDataset', 21 | 'DATASETS', 'build_dataset', 'KITTIMOTSDataset', 22 | 'MotionDataset', 'CityscapesPanopticDataset', 23 | 'CityscapesVPSSegDataset' 24 | ] 25 | -------------------------------------------------------------------------------- /mmdet/datasets/builder.py: -------------------------------------------------------------------------------- 1 | import copy 2 | 3 | from mmdet.utils import build_from_cfg 4 | from .dataset_wrappers import ConcatDataset, RepeatDataset 5 | from .registry import DATASETS 6 | 7 | 8 | def _concat_dataset(cfg, default_args=None): 9 | ann_files = cfg['ann_file'] 10 | img_prefixes = cfg.get('img_prefix', None) 11 | seg_prefixes = cfg.get('seg_prefix', None) 12 | proposal_files = cfg.get('proposal_file', None) 13 | 14 | datasets = [] 15 | num_dset = len(ann_files) 16 | for i in range(num_dset): 17 | data_cfg = copy.deepcopy(cfg) 18 | data_cfg['ann_file'] = ann_files[i] 19 | if isinstance(img_prefixes, (list, tuple)): 20 | data_cfg['img_prefix'] = img_prefixes[i] 21 | if isinstance(seg_prefixes, (list, tuple)): 22 | data_cfg['seg_prefix'] = seg_prefixes[i] 23 | if isinstance(proposal_files, (list, tuple)): 24 | data_cfg['proposal_file'] = proposal_files[i] 25 | datasets.append(build_dataset(data_cfg, default_args)) 26 | 27 | return ConcatDataset(datasets) 28 | 29 | 30 | def build_dataset(cfg, default_args=None): 31 | if isinstance(cfg, (list, tuple)): 32 | dataset = ConcatDataset([build_dataset(c, default_args) for c in cfg]) 33 | elif cfg['type'] == 'RepeatDataset': 34 | dataset = RepeatDataset( 35 | build_dataset(cfg['dataset'], default_args), cfg['times']) 36 | elif isinstance(cfg['ann_file'], (list, tuple)): 37 | dataset = _concat_dataset(cfg, default_args) 38 | else: 39 | dataset = build_from_cfg(cfg, DATASETS, default_args) 40 | 41 | return dataset 42 | -------------------------------------------------------------------------------- /mmdet/datasets/cityscapes.py: -------------------------------------------------------------------------------- 1 | from .coco import CocoDataset 2 | from .registry import DATASETS 3 | 4 | 5 | @DATASETS.register_module 6 | class CityscapesDataset(CocoDataset): 7 | 8 | CLASSES = ("person", "rider", "car", "truck", "bicycle", "motorcycle", "bus", "train") 9 | -------------------------------------------------------------------------------- /mmdet/datasets/cityscapes_ps.py: -------------------------------------------------------------------------------- 1 | from .cityscapes import CityscapesDataset 2 | from .registry import DATASETS 3 | import os.path as osp 4 | 5 | @DATASETS.register_module 6 | class CityscapesPanopticDataset(CityscapesDataset): 7 | """ 8 | Cityscapes/Carla Dataset loading semantic segmentation 9 | without Instance support 10 | """ 11 | 12 | def prepare_train_img(self, idx): 13 | img_info = self.img_infos[idx] 14 | ann_info = self.get_ann_info(idx) 15 | results = dict(img_info=img_info, ann_info=ann_info) 16 | 17 | # Add reading of semantic segmentation labels 18 | seg_filename = osp.join( 19 | self.seg_prefix, 20 | results['ann_info']['seg_map'].replace( 21 | 'leftImg8bit', 'gtFine_labelTrainIds')) 22 | results['ann_info']['seg_filename'] = seg_filename 23 | 24 | if self.proposals is not None: 25 | results['proposals'] = self.proposals[idx] 26 | self.pre_pipeline(results) 27 | 28 | results = self.pipeline(results) 29 | 30 | if results is not None and 'gt_labels' not in results: 31 | results['gt_labels'] = [] 32 | results['gt_bboxes'] = [] 33 | return results 34 | -------------------------------------------------------------------------------- /mmdet/datasets/dataset_wrappers.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from torch.utils.data.dataset import ConcatDataset as _ConcatDataset 3 | 4 | from .registry import DATASETS 5 | import bisect 6 | 7 | 8 | @DATASETS.register_module 9 | class ConcatDataset(_ConcatDataset): 10 | """A wrapper of concatenated dataset. 11 | 12 | Same as :obj:`torch.utils.data.dataset.ConcatDataset`, but 13 | concat the group flag for image aspect ratio. 14 | 15 | Args: 16 | datasets (list[:obj:`Dataset`]): A list of datasets. 17 | """ 18 | 19 | def __init__(self, datasets): 20 | super(ConcatDataset, self).__init__(datasets) 21 | self.CLASSES = datasets[0].CLASSES 22 | if hasattr(datasets[0], 'flag'): 23 | flags = [] 24 | for i in range(0, len(datasets)): 25 | flags.append(datasets[i].flag) 26 | self.flag = np.concatenate(flags) 27 | self.get_ann_info = None # Callable 28 | 29 | def __getitem__(self, idx): 30 | if idx < 0: 31 | if -idx > len(self): 32 | raise ValueError("absolute value of index should not exceed dataset length") 33 | idx = len(self) + idx 34 | dataset_idx = bisect.bisect_right(self.cumulative_sizes, idx) 35 | if dataset_idx == 0: 36 | sample_idx = idx 37 | else: 38 | sample_idx = idx - self.cumulative_sizes[dataset_idx - 1] 39 | 40 | self.get_ann_info = self.datasets[dataset_idx].get_ann_info 41 | return self.datasets[dataset_idx][sample_idx] 42 | 43 | @DATASETS.register_module 44 | class RepeatDataset(object): 45 | """A wrapper of repeated dataset. 46 | 47 | The length of repeated dataset will be `times` larger than the original 48 | dataset. This is useful when the data loading time is long but the dataset 49 | is small. Using RepeatDataset can reduce the data loading time between 50 | epochs. 51 | 52 | Args: 53 | dataset (:obj:`Dataset`): The dataset to be repeated. 54 | times (int): Repeat times. 55 | """ 56 | 57 | def __init__(self, dataset, times): 58 | self.dataset = dataset 59 | self.times = times 60 | self.CLASSES = dataset.CLASSES 61 | if hasattr(self.dataset, 'flag'): 62 | self.flag = np.tile(self.dataset.flag, times) 63 | 64 | self._ori_len = len(self.dataset) 65 | self.get_ann_info = self.dataset.get_ann_info 66 | 67 | def __getitem__(self, idx): 68 | return self.dataset[idx % self._ori_len] 69 | 70 | def __len__(self): 71 | return self.times * self._ori_len 72 | -------------------------------------------------------------------------------- /mmdet/datasets/kittimots.py: -------------------------------------------------------------------------------- 1 | from .coco import CocoDataset 2 | from .registry import DATASETS 3 | 4 | 5 | @DATASETS.register_module 6 | class KITTIMOTSDataset(CocoDataset): 7 | """ 8 | KITTI Dataset for Instance Segmentation 9 | """ 10 | CLASSES = ("car", "person") 11 | -------------------------------------------------------------------------------- /mmdet/datasets/loader/__init__.py: -------------------------------------------------------------------------------- 1 | from .build_loader import build_dataloader 2 | from .sampler import DistributedGroupSampler, GroupSampler 3 | 4 | __all__ = ['GroupSampler', 'DistributedGroupSampler', 'build_dataloader'] 5 | -------------------------------------------------------------------------------- /mmdet/datasets/loader/build_loader.py: -------------------------------------------------------------------------------- 1 | import platform 2 | from functools import partial 3 | 4 | from mmcv.parallel import collate 5 | from mmcv.runner import get_dist_info 6 | from torch.utils.data import DataLoader 7 | 8 | from .sampler import DistributedGroupSampler, DistributedSampler, GroupSampler 9 | 10 | if platform.system() != 'Windows': 11 | # https://github.com/pytorch/pytorch/issues/973 12 | import resource 13 | rlimit = resource.getrlimit(resource.RLIMIT_NOFILE) 14 | resource.setrlimit(resource.RLIMIT_NOFILE, (4096, rlimit[1])) 15 | 16 | 17 | def full_collate(batch, samples_per_gpu=1): 18 | # Collate CL Augmentations with Batch Dim 19 | if type(batch[0]) == list: 20 | modified_batch = [] 21 | for batch_element in batch: 22 | modified_batch += batch_element 23 | return collate(modified_batch, samples_per_gpu*2) 24 | else: 25 | return collate(batch, samples_per_gpu) 26 | 27 | 28 | def build_dataloader(dataset, 29 | imgs_per_gpu, 30 | workers_per_gpu, 31 | num_gpus=1, 32 | dist=True, 33 | shuffle=True, 34 | **kwargs): 35 | """Build PyTorch DataLoader. 36 | 37 | In distributed training, each GPU/process has a dataloader. 38 | In non-distributed training, there is only one dataloader for all GPUs. 39 | 40 | Args: 41 | dataset (Dataset): A PyTorch dataset. 42 | imgs_per_gpu (int): Number of images on each GPU, i.e., batch size of 43 | each GPU. 44 | workers_per_gpu (int): How many subprocesses to use for data loading 45 | for each GPU. 46 | num_gpus (int): Number of GPUs. Only used in non-distributed training. 47 | dist (bool): Distributed training/test or not. Default: True. 48 | shuffle (bool): Whether to shuffle the data at every epoch. 49 | Default: True. 50 | kwargs: any keyword argument to be used to initialize DataLoader 51 | 52 | Returns: 53 | DataLoader: A PyTorch dataloader. 54 | """ 55 | if dist: 56 | rank, world_size = get_dist_info() 57 | # DistributedGroupSampler will definitely shuffle the data to satisfy 58 | # that images on each GPU are in the same group 59 | if shuffle: 60 | sampler = DistributedGroupSampler(dataset, imgs_per_gpu, 61 | world_size, rank) 62 | else: 63 | sampler = DistributedSampler( 64 | dataset, world_size, rank, shuffle=False) 65 | batch_size = imgs_per_gpu 66 | num_workers = workers_per_gpu 67 | else: 68 | sampler = GroupSampler(dataset, imgs_per_gpu) if shuffle else None 69 | batch_size = num_gpus * imgs_per_gpu 70 | num_workers = num_gpus * workers_per_gpu 71 | 72 | collate_fn = partial(full_collate, samples_per_gpu=imgs_per_gpu) 73 | 74 | data_loader = DataLoader( 75 | dataset, 76 | batch_size=batch_size, 77 | sampler=sampler, 78 | num_workers=num_workers, 79 | collate_fn=collate_fn, 80 | pin_memory=False, 81 | **kwargs) 82 | 83 | return data_loader 84 | -------------------------------------------------------------------------------- /mmdet/datasets/motion_dataset.py: -------------------------------------------------------------------------------- 1 | from .coco import CocoDataset 2 | from .registry import DATASETS 3 | 4 | 5 | @DATASETS.register_module 6 | class MotionDataset(CocoDataset): 7 | """ 8 | Motion Dataset for Motion Instance Segmentation 9 | """ 10 | 11 | CLASSES = ("moving", "static") 12 | def prepare_test_img(self, idx): 13 | results = super().prepare_test_img(idx) 14 | 15 | # TODO: Use more generic way to work with cityscapes as well not only kitti 16 | if 'kitti' in results['img_meta'][0].data['filename']: 17 | fileno = int(results['img_meta'][0].data['filename'].split('/')[-1].split('.')[0]) 18 | is_first = fileno==0 19 | else: 20 | nframes_span_test = 6 21 | is_first = (idx % nframes_span_test == 0) 22 | 23 | results['img_meta'][0].data['is_first'] = is_first 24 | return results 25 | -------------------------------------------------------------------------------- /mmdet/datasets/pipelines/__init__.py: -------------------------------------------------------------------------------- 1 | from .compose import Compose 2 | from .formating import (Collect, ImageToTensor, ToDataContainer, 3 | ToTensor, Transpose, to_tensor) 4 | from .loading import LoadAnnotations, LoadImageFromFile, LoadRefImageFromFile, LoadProposals 5 | from .test_aug import MultiScaleFlipAug 6 | from .transforms import (Expand, MinIoURandomCrop, Normalize, Pad, 7 | PhotoMetricDistortion, RandomCrop, 8 | RandomFlip, Resize, 9 | SegResizeFlipCropPadRescale, 10 | ImgResizeFlipNormCropPad, ColorJitter, 11 | GaussianBlur, RandGrayscale) 12 | 13 | __all__ = [ 14 | 'Compose', 'to_tensor', 'ToTensor', 'ImageToTensor', 15 | 'ToDataContainer', 'Transpose', 'Collect', 'LoadAnnotations', 16 | 'LoadImageFromFile', 'LoadProposals', 'MultiScaleFlipAug', 17 | 'Resize', 'RandomFlip', 'Pad', 'RandomCrop', 'Normalize', 18 | 'SegResizeFlipCropPadRescale', 'ImgResizeFlipNormCropPad', 19 | 'MinIoURandomCrop', 'Expand', 'PhotoMetricDistortion', 20 | 'LoadRefImageFromFile', 'ColorJitter', 'GaussianBlur', 'RandGrayscale', 21 | ] 22 | -------------------------------------------------------------------------------- /mmdet/datasets/pipelines/compose.py: -------------------------------------------------------------------------------- 1 | import collections 2 | from mmdet.utils import build_from_cfg 3 | from ..registry import PIPELINES 4 | 5 | @PIPELINES.register_module 6 | class Compose(object): 7 | 8 | def __init__(self, transforms): 9 | assert isinstance(transforms, collections.abc.Sequence) 10 | self.transforms = [] 11 | for transform in transforms: 12 | if isinstance(transform, dict): 13 | transform = build_from_cfg(transform, PIPELINES) 14 | self.transforms.append(transform) 15 | elif callable(transform): 16 | self.transforms.append(transform) 17 | else: 18 | raise TypeError('transform must be callable or a dict') 19 | 20 | def __call__(self, data): 21 | for t in self.transforms: 22 | data = t(data) 23 | if data is None: 24 | return None 25 | return data 26 | 27 | def __repr__(self): 28 | format_string = self.__class__.__name__ + '(' 29 | for t in self.transforms: 30 | format_string += '\n' 31 | format_string += ' {0}'.format(t) 32 | format_string += '\n)' 33 | return format_string 34 | -------------------------------------------------------------------------------- /mmdet/datasets/pipelines/test_aug.py: -------------------------------------------------------------------------------- 1 | import mmcv 2 | 3 | from ..registry import PIPELINES 4 | from .compose import Compose 5 | 6 | 7 | @PIPELINES.register_module 8 | class MultiScaleFlipAug(object): 9 | 10 | def __init__(self, transforms, img_scale, flip=False): 11 | self.transforms = Compose(transforms) 12 | self.img_scale = img_scale if isinstance(img_scale, 13 | list) else [img_scale] 14 | assert mmcv.is_list_of(self.img_scale, tuple) 15 | self.flip = flip 16 | 17 | def __call__(self, results): 18 | aug_data = [] 19 | flip_aug = [False, True] if self.flip else [False] 20 | for scale in self.img_scale: 21 | for flip in flip_aug: 22 | _results = results.copy() 23 | _results['scale'] = scale 24 | _results['flip'] = flip 25 | data = self.transforms(_results) 26 | aug_data.append(data) 27 | # list of dict to dict of list 28 | aug_data_dict = {key: [] for key in aug_data[0]} 29 | for data in aug_data: 30 | for key, val in data.items(): 31 | aug_data_dict[key].append(val) 32 | return aug_data_dict 33 | 34 | def __repr__(self): 35 | repr_str = self.__class__.__name__ 36 | repr_str += '(transforms={}, img_scale={}, flip={})'.format( 37 | self.transforms, self.img_scale, self.flip) 38 | return repr_str 39 | -------------------------------------------------------------------------------- /mmdet/datasets/registry.py: -------------------------------------------------------------------------------- 1 | from mmdet.utils import Registry 2 | 3 | DATASETS = Registry('dataset') 4 | PIPELINES = Registry('pipeline') 5 | -------------------------------------------------------------------------------- /mmdet/datasets/voc.py: -------------------------------------------------------------------------------- 1 | from .registry import DATASETS 2 | from .xml_style import XMLDataset 3 | 4 | 5 | @DATASETS.register_module 6 | class VOCDataset(XMLDataset): 7 | 8 | CLASSES = ('aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 9 | 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 10 | 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 11 | 'tvmonitor') 12 | 13 | def __init__(self, **kwargs): 14 | super(VOCDataset, self).__init__(**kwargs) 15 | if 'VOC2007' in self.img_prefix: 16 | self.year = 2007 17 | elif 'VOC2012' in self.img_prefix: 18 | self.year = 2012 19 | else: 20 | raise ValueError('Cannot infer dataset year from img_prefix') 21 | -------------------------------------------------------------------------------- /mmdet/datasets/wider_face.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | import xml.etree.ElementTree as ET 3 | 4 | import mmcv 5 | 6 | from .registry import DATASETS 7 | from .xml_style import XMLDataset 8 | 9 | 10 | @DATASETS.register_module 11 | class WIDERFaceDataset(XMLDataset): 12 | """ 13 | Reader for the WIDER Face dataset in PASCAL VOC format. 14 | Conversion scripts can be found in 15 | https://github.com/sovrasov/wider-face-pascal-voc-annotations 16 | """ 17 | CLASSES = ('face', ) 18 | 19 | def __init__(self, **kwargs): 20 | super(WIDERFaceDataset, self).__init__(**kwargs) 21 | 22 | def load_annotations(self, ann_file): 23 | img_infos = [] 24 | img_ids = mmcv.list_from_file(ann_file) 25 | for img_id in img_ids: 26 | filename = '{}.jpg'.format(img_id) 27 | xml_path = osp.join(self.img_prefix, 'Annotations', 28 | '{}.xml'.format(img_id)) 29 | tree = ET.parse(xml_path) 30 | root = tree.getroot() 31 | size = root.find('size') 32 | width = int(size.find('width').text) 33 | height = int(size.find('height').text) 34 | folder = root.find('folder').text 35 | img_infos.append( 36 | dict( 37 | id=img_id, 38 | filename=osp.join(folder, filename), 39 | width=width, 40 | height=height)) 41 | 42 | return img_infos 43 | -------------------------------------------------------------------------------- /mmdet/datasets/xml_style.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | import xml.etree.ElementTree as ET 3 | 4 | import mmcv 5 | import numpy as np 6 | 7 | from .custom import CustomDataset 8 | from .registry import DATASETS 9 | 10 | 11 | @DATASETS.register_module 12 | class XMLDataset(CustomDataset): 13 | 14 | def __init__(self, min_size=None, **kwargs): 15 | super(XMLDataset, self).__init__(**kwargs) 16 | self.cat2label = {cat: i + 1 for i, cat in enumerate(self.CLASSES)} 17 | self.min_size = min_size 18 | 19 | def load_annotations(self, ann_file): 20 | img_infos = [] 21 | img_ids = mmcv.list_from_file(ann_file) 22 | for img_id in img_ids: 23 | filename = 'JPEGImages/{}.jpg'.format(img_id) 24 | xml_path = osp.join(self.img_prefix, 'Annotations', 25 | '{}.xml'.format(img_id)) 26 | tree = ET.parse(xml_path) 27 | root = tree.getroot() 28 | size = root.find('size') 29 | width = int(size.find('width').text) 30 | height = int(size.find('height').text) 31 | img_infos.append( 32 | dict(id=img_id, filename=filename, width=width, height=height)) 33 | return img_infos 34 | 35 | def get_ann_info(self, idx): 36 | img_id = self.img_infos[idx]['id'] 37 | xml_path = osp.join(self.img_prefix, 'Annotations', 38 | '{}.xml'.format(img_id)) 39 | tree = ET.parse(xml_path) 40 | root = tree.getroot() 41 | bboxes = [] 42 | labels = [] 43 | bboxes_ignore = [] 44 | labels_ignore = [] 45 | for obj in root.findall('object'): 46 | name = obj.find('name').text 47 | label = self.cat2label[name] 48 | difficult = int(obj.find('difficult').text) 49 | bnd_box = obj.find('bndbox') 50 | bbox = [ 51 | int(bnd_box.find('xmin').text), 52 | int(bnd_box.find('ymin').text), 53 | int(bnd_box.find('xmax').text), 54 | int(bnd_box.find('ymax').text) 55 | ] 56 | ignore = False 57 | if self.min_size: 58 | assert not self.test_mode 59 | w = bbox[2] - bbox[0] 60 | h = bbox[3] - bbox[1] 61 | if w < self.min_size or h < self.min_size: 62 | ignore = True 63 | if difficult or ignore: 64 | bboxes_ignore.append(bbox) 65 | labels_ignore.append(label) 66 | else: 67 | bboxes.append(bbox) 68 | labels.append(label) 69 | if not bboxes: 70 | bboxes = np.zeros((0, 4)) 71 | labels = np.zeros((0, )) 72 | else: 73 | bboxes = np.array(bboxes, ndmin=2) - 1 74 | labels = np.array(labels) 75 | if not bboxes_ignore: 76 | bboxes_ignore = np.zeros((0, 4)) 77 | labels_ignore = np.zeros((0, )) 78 | else: 79 | bboxes_ignore = np.array(bboxes_ignore, ndmin=2) - 1 80 | labels_ignore = np.array(labels_ignore) 81 | ann = dict( 82 | bboxes=bboxes.astype(np.float32), 83 | labels=labels.astype(np.int64), 84 | bboxes_ignore=bboxes_ignore.astype(np.float32), 85 | labels_ignore=labels_ignore.astype(np.int64)) 86 | return ann 87 | -------------------------------------------------------------------------------- /mmdet/metrics.py: -------------------------------------------------------------------------------- 1 | # Adapted from score written by wkentaro 2 | # https://github.com/wkentaro/pytorch-fcn/blob/master/torchfcn/utils.py 3 | 4 | import numpy as np 5 | 6 | 7 | class RunningScore(object): 8 | def __init__(self, n_classes): 9 | self.n_classes = n_classes 10 | self.confusion_matrix = np.zeros((n_classes, n_classes)) 11 | 12 | def _fast_hist(self, label_true, label_pred, n_class): 13 | mask = (label_true >= 0) & (label_true < n_class) 14 | hist = np.bincount( 15 | n_class * label_true[mask].astype(int) + label_pred[mask], minlength=n_class ** 2 16 | ).reshape(n_class, n_class) 17 | return hist 18 | 19 | def update(self, label_trues, label_preds): 20 | for lt, lp in zip(label_trues, label_preds): 21 | self.confusion_matrix += self._fast_hist(lt.flatten(), lp.flatten(), self.n_classes) 22 | 23 | def get_scores(self): 24 | """Returns accuracy score evaluation result. 25 | - overall accuracy 26 | - mean accuracy 27 | - mean IU 28 | - fwavacc 29 | """ 30 | hist = self.confusion_matrix 31 | acc = np.diag(hist).sum() / hist.sum() 32 | acc_cls = np.diag(hist) / hist.sum(axis=1) 33 | acc_cls = np.nanmean(acc_cls) 34 | iu = np.diag(hist) / (hist.sum(axis=1) + hist.sum(axis=0) - np.diag(hist)) 35 | mean_iu = np.nanmean(iu) 36 | freq = hist.sum(axis=1) / hist.sum() 37 | fwavacc = (freq[freq > 0] * iu[freq > 0]).sum() 38 | cls_iu = dict(zip(range(self.n_classes), iu)) 39 | 40 | return ( 41 | { 42 | "Overall Acc: \t": acc, 43 | "Mean Acc : \t": acc_cls, 44 | "FreqW Acc : \t": fwavacc, 45 | "Mean IoU : \t": mean_iu, 46 | }, 47 | cls_iu, 48 | self.confusion_matrix 49 | ) 50 | 51 | def reset(self): 52 | self.confusion_matrix = np.zeros((self.n_classes, self.n_classes)) 53 | -------------------------------------------------------------------------------- /mmdet/models/__init__.py: -------------------------------------------------------------------------------- 1 | from .anchor_heads import * # noqa: F401,F403 2 | from .backbones import * # noqa: F401,F403 3 | from .bbox_heads import * # noqa: F401,F403 4 | from .builder import (build_backbone, build_detector, build_head, build_loss, 5 | build_neck, build_roi_extractor, build_shared_head) 6 | from .detectors import * # noqa: F401,F403 7 | from .losses import * # noqa: F401,F403 8 | from .mask_heads import * # noqa: F401,F403 9 | from .necks import * # noqa: F401,F403 10 | from .registry import (BACKBONES, DETECTORS, HEADS, LOSSES, NECKS, 11 | ROI_EXTRACTORS, SHARED_HEADS) 12 | from .roi_extractors import * # noqa: F401,F403 13 | from .shared_heads import * # noqa: F401,F403 14 | from .track_heads import * 15 | from .ca_heads import * 16 | 17 | __all__ = [ 18 | 'BACKBONES', 'NECKS', 'ROI_EXTRACTORS', 'SHARED_HEADS', 'HEADS', 'LOSSES', 19 | 'DETECTORS', 'build_backbone', 'build_neck', 'build_roi_extractor', 20 | 'build_shared_head', 'build_head', 'build_loss', 'build_detector' 21 | ] 22 | -------------------------------------------------------------------------------- /mmdet/models/anchor_heads/__init__.py: -------------------------------------------------------------------------------- 1 | from .anchor_head import AnchorHead 2 | from .atss_head import ATSSHead 3 | from .fcos_head import FCOSHead 4 | from .fovea_head import FoveaHead 5 | from .free_anchor_retina_head import FreeAnchorRetinaHead 6 | from .ga_retina_head import GARetinaHead 7 | from .ga_rpn_head import GARPNHead 8 | from .guided_anchor_head import FeatureAdaption, GuidedAnchorHead 9 | from .reppoints_head import RepPointsHead 10 | from .retina_head import RetinaHead 11 | from .retina_sepbn_head import RetinaSepBNHead 12 | from .rpn_head import RPNHead 13 | from .ssd_head import SSDHead 14 | from .solo_head import SOLOHead 15 | from .solov2_head import SOLOv2Head 16 | from .solov2_light_head import SOLOv2LightHead 17 | from .decoupled_solo_head import DecoupledSOLOHead 18 | from .decoupled_solo_light_head import DecoupledSOLOLightHead 19 | from .panoptic_head import SimpleSegHead 20 | 21 | __all__ = [ 22 | 'AnchorHead', 'GuidedAnchorHead', 'FeatureAdaption', 'RPNHead', 23 | 'GARPNHead', 'RetinaHead', 'RetinaSepBNHead', 'GARetinaHead', 'SSDHead', 24 | 'FCOSHead', 'RepPointsHead', 'FoveaHead', 'FreeAnchorRetinaHead', 25 | 'ATSSHead', 'SOLOHead', 'SOLOv2Head', 'SOLOv2LightHead', 'DecoupledSOLOHead', 'DecoupledSOLOLightHead', 26 | 'SimpleSegHead' 27 | ] 28 | -------------------------------------------------------------------------------- /mmdet/models/anchor_heads/panoptic_head.py: -------------------------------------------------------------------------------- 1 | import torch.nn.functional as F 2 | import torch.nn as nn 3 | from ..utils import bias_init_with_prob, ConvModule, merge_fpn 4 | from ..registry import HEADS 5 | import torch 6 | 7 | @HEADS.register_module 8 | class SimpleSegHead(nn.Module): 9 | def __init__(self, num_classes, in_channels, seg_feats_channel, stacked_convs, original_image_size, 10 | merge_fpn=True): 11 | 12 | super().__init__() 13 | self.num_classes = num_classes 14 | self.original_image_size = original_image_size 15 | self.fcn = nn.ModuleList() 16 | self.stacked_convs = stacked_convs 17 | self.merge_fpn = merge_fpn 18 | 19 | chn = in_channels 20 | for i in range(stacked_convs): 21 | self.fcn.append( 22 | ConvModule( 23 | chn, 24 | seg_feats_channel, 25 | 3, 26 | stride=1, 27 | padding=1, 28 | norm_cfg=None, 29 | bias=True)) 30 | chn = seg_feats_channel 31 | 32 | self.upsample_conv = nn.Conv2d(chn, chn, 1) 33 | self.classifier = nn.Conv2d(chn, num_classes, 1) 34 | 35 | def forward(self, x): 36 | x = merge_fpn(x, average=self.merge_fpn) 37 | for i in range(self.stacked_convs): 38 | x = self.fcn[i](x) 39 | intermediate_feats = x 40 | x = F.interpolate(x, (x.shape[2]*2, x.shape[3]*2)) 41 | x = F.relu(self.upsample_conv(x)) 42 | x = self.classifier(x) 43 | x = F.interpolate(x, self.original_image_size[::-1]) 44 | return x, intermediate_feats 45 | 46 | def loss(self, seg_map, gt_semantic_seg): 47 | #TODO: Add loss using two logits from instance and semantic seg 48 | gt_semantic_seg_up = F.interpolate(gt_semantic_seg.float(), seg_map.shape[-2:], mode='nearest') 49 | gt_semantic_seg_up = gt_semantic_seg_up.long().squeeze(1) 50 | loss_seg = F.cross_entropy(seg_map, gt_semantic_seg_up, ignore_index=255) 51 | return loss_seg 52 | -------------------------------------------------------------------------------- /mmdet/models/backbones/__init__.py: -------------------------------------------------------------------------------- 1 | from .hrnet import HRNet 2 | from .resnet import ResNet, make_res_layer, TwoStreamResNet 3 | from .resnext import ResNeXt 4 | from .ssd_vgg import SSDVGG 5 | from .resnet_tfstyle import ResNetTFStyle, TwoStreamResNetTFStyle 6 | 7 | __all__ = ['ResNet', 'make_res_layer', 'ResNeXt', 'SSDVGG', 'HRNet', 'TwoStreamResNet', 8 | 'ResNetTFStyle', 'TwoStreamResNetTFStyle'] 9 | -------------------------------------------------------------------------------- /mmdet/models/bbox_heads/__init__.py: -------------------------------------------------------------------------------- 1 | from .bbox_head import BBoxHead 2 | from .convfc_bbox_head import ConvFCBBoxHead, SharedFCBBoxHead 3 | from .double_bbox_head import DoubleConvFCBBoxHead 4 | 5 | __all__ = [ 6 | 'BBoxHead', 'ConvFCBBoxHead', 'SharedFCBBoxHead', 'DoubleConvFCBBoxHead' 7 | ] 8 | -------------------------------------------------------------------------------- /mmdet/models/builder.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | 3 | from mmdet.utils import build_from_cfg 4 | from .registry import (BACKBONES, DETECTORS, HEADS, LOSSES, NECKS, 5 | ROI_EXTRACTORS, SHARED_HEADS) 6 | 7 | 8 | def build(cfg, registry, default_args=None): 9 | if isinstance(cfg, list): 10 | modules = [ 11 | build_from_cfg(cfg_, registry, default_args) for cfg_ in cfg 12 | ] 13 | return nn.Sequential(*modules) 14 | else: 15 | return build_from_cfg(cfg, registry, default_args) 16 | 17 | 18 | def build_backbone(cfg): 19 | return build(cfg, BACKBONES) 20 | 21 | 22 | def build_neck(cfg): 23 | return build(cfg, NECKS) 24 | 25 | 26 | def build_roi_extractor(cfg): 27 | return build(cfg, ROI_EXTRACTORS) 28 | 29 | 30 | def build_shared_head(cfg): 31 | return build(cfg, SHARED_HEADS) 32 | 33 | 34 | def build_head(cfg): 35 | return build(cfg, HEADS) 36 | 37 | 38 | def build_loss(cfg): 39 | return build(cfg, LOSSES) 40 | 41 | 42 | def build_detector(cfg, train_cfg=None, test_cfg=None): 43 | return build(cfg, DETECTORS, dict(train_cfg=train_cfg, test_cfg=test_cfg)) 44 | -------------------------------------------------------------------------------- /mmdet/models/ca_heads/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from .compose_ca import ComposedClassAgnosticHead 3 | from .appearance_ca_mahalanobis import MahalanobisAppearanceBasedClassAgnosticHead 4 | from .appearance_ca_map import MAPClassAgnosticHead 5 | 6 | __all__ = ['ComposedClassAgnosticHead', 7 | 'MahalanobisAppearanceBasedClassAgnosticHead', 8 | 'MAPClassAgnosticHead', 9 | ] 10 | -------------------------------------------------------------------------------- /mmdet/models/ca_heads/appearance_ca_abstract.py: -------------------------------------------------------------------------------- 1 | """ 2 | Class Agnostic Head Abstract Class 3 | """ 4 | 5 | import torch.nn as nn 6 | import torch 7 | from mmcv.cnn import normal_init 8 | import torch.nn.functional as F 9 | 10 | from ..utils import ConvModule 11 | from ..utils import merge_fpn 12 | import random 13 | import numpy as np 14 | 15 | class AppearanceBasedClassAgnosticAbstract(nn.Module): 16 | def __init__(self, clustering_type='dbscan', in_channels=256, interm_channels=256, 17 | n_convs=7, norm_cfg=None, num_classes=19, merge_fpn=False): 18 | 19 | super().__init__() 20 | self.clustering_type = clustering_type 21 | self.stuff_idx = 11 22 | self.interm_channels = interm_channels 23 | self.conv_modules = nn.ModuleList() 24 | self.merge_fpn = merge_fpn 25 | self.num_classes = num_classes 26 | self.norm_cfg = norm_cfg 27 | 28 | self.conv_modules = self.init_layers( 29 | n_convs, in_channels, interm_channels, norm_cfg, self.conv_modules 30 | ) 31 | self.conv_modules = self.init_weights_module(self.conv_modules) 32 | 33 | def init_layers(self, n_convs, in_channels, interm_channels, norm_cfg, conv_modules): 34 | for idx in range(n_convs): 35 | if idx == 0: 36 | chn = in_channels 37 | else: 38 | chn = interm_channels 39 | 40 | conv_modules.append( 41 | ConvModule( 42 | chn, 43 | interm_channels, 44 | 3, 45 | stride=1, 46 | padding=1, 47 | norm_cfg=norm_cfg, 48 | bias=norm_cfg is None)) 49 | return conv_modules 50 | 51 | def init_weights(self): 52 | self.conv_modules = self.init_weights_module(self.conv_modules) 53 | 54 | def init_weights_module(self, conv_modules): 55 | for m in conv_modules: 56 | normal_init(m.conv, std=0.01) 57 | return conv_modules 58 | 59 | def forward(self, feats, eval=0): 60 | if self.merge_fpn: 61 | feats = merge_fpn(feats) 62 | else: 63 | feats = feats[0] 64 | 65 | merged_fpn_feats = feats 66 | 67 | for conv_layer in self.conv_modules: 68 | feats = conv_layer(feats) 69 | 70 | out = {'class_agnostic_embeddings': feats} 71 | if not self.training: 72 | out['merged_fpn_embeddings'] = merged_fpn_feats 73 | return out 74 | 75 | def loss(self, **kwargs): 76 | pass 77 | 78 | def get_seg(self, **kwargs): 79 | pass 80 | -------------------------------------------------------------------------------- /mmdet/models/ca_heads/appearance_ca_map.py: -------------------------------------------------------------------------------- 1 | """ 2 | Masked Average Pooling Head for Feature Analysis 3 | """ 4 | 5 | import torch.nn as nn 6 | import torch 7 | from mmcv.cnn import normal_init 8 | import torch.nn.functional as F 9 | 10 | from ..registry import HEADS 11 | from ..utils import ConvModule 12 | from ..utils import merge_fpn 13 | from mmdet.core import compute_ood_scores 14 | import random 15 | import numpy as np 16 | import time 17 | 18 | 19 | @HEADS.register_module 20 | class MAPClassAgnosticHead(nn.Module): 21 | def __init__(self, num_classes=9): 22 | super().__init__() 23 | 24 | def init_weights(self): 25 | pass 26 | 27 | def forward(self, feats, eval=0): 28 | out = {'class_agnostic_embeddings': feats, 29 | 'merged_fpn_embeddings': feats} 30 | return out 31 | 32 | def get_seg(self, **kwargs): 33 | return [{}] 34 | -------------------------------------------------------------------------------- /mmdet/models/ca_heads/compose_ca.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | from ..registry import HEADS 4 | from .. import builder 5 | 6 | @HEADS.register_module 7 | class ComposedClassAgnosticHead(nn.Module): 8 | def __init__(self, ca_heads): 9 | super().__init__() 10 | self.heads_list = nn.ModuleDict() 11 | for ca_name, ca_head in ca_heads.items(): 12 | self.heads_list[ca_name] = builder.build_head(ca_head) 13 | 14 | def init_weights(self): 15 | for _, ca_head in self.heads_list.items(): 16 | ca_head.init_weights() 17 | 18 | def forward(self, **kwargs): 19 | outs = {} 20 | for _, ca_head in self.heads_list.items(): 21 | outs.update(ca_head(**kwargs)) 22 | return outs 23 | 24 | def loss(self, **kwargs): 25 | losses = {} 26 | for _, ca_head in self.heads_list.items(): 27 | losses.update(ca_head.loss(**kwargs)) 28 | return losses 29 | 30 | def get_seg(self, **kwargs): 31 | seg_out = [{}] 32 | for _, ca_head in self.heads_list.items(): 33 | ca_out = ca_head.get_seg(**kwargs) 34 | if ca_out[0] is not None: 35 | seg_out[0].update(ca_out[0]) 36 | return seg_out 37 | -------------------------------------------------------------------------------- /mmdet/models/detectors/__init__.py: -------------------------------------------------------------------------------- 1 | from .atss import ATSS 2 | from .base import BaseDetector 3 | from .cascade_rcnn import CascadeRCNN 4 | from .double_head_rcnn import DoubleHeadRCNN 5 | from .fast_rcnn import FastRCNN 6 | from .faster_rcnn import FasterRCNN 7 | from .fcos import FCOS 8 | from .fovea import FOVEA 9 | from .grid_rcnn import GridRCNN 10 | from .htc import HybridTaskCascade 11 | from .mask_rcnn import MaskRCNN 12 | from .mask_scoring_rcnn import MaskScoringRCNN 13 | from .reppoints_detector import RepPointsDetector 14 | from .retinanet import RetinaNet 15 | from .rpn import RPN 16 | from .single_stage import SingleStageDetector 17 | from .single_stage_ins import SingleStageInsDetector 18 | from .two_stage import TwoStageDetector 19 | from .solo import SOLO 20 | from .solov2 import SOLOv2 21 | 22 | __all__ = [ 23 | 'ATSS', 'BaseDetector', 'SingleStageDetector', 'TwoStageDetector', 'RPN', 24 | 'FastRCNN', 'FasterRCNN', 'MaskRCNN', 'CascadeRCNN', 'HybridTaskCascade', 25 | 'DoubleHeadRCNN', 'RetinaNet', 'FCOS', 'GridRCNN', 'MaskScoringRCNN', 26 | 'RepPointsDetector', 'FOVEA', 'SingleStageInsDetector', 'SOLO', 'SOLOv2' 27 | ] 28 | -------------------------------------------------------------------------------- /mmdet/models/detectors/atss.py: -------------------------------------------------------------------------------- 1 | from ..registry import DETECTORS 2 | from .single_stage import SingleStageDetector 3 | 4 | 5 | @DETECTORS.register_module 6 | class ATSS(SingleStageDetector): 7 | 8 | def __init__(self, 9 | backbone, 10 | neck, 11 | bbox_head, 12 | train_cfg=None, 13 | test_cfg=None, 14 | pretrained=None): 15 | super(ATSS, self).__init__(backbone, neck, bbox_head, train_cfg, 16 | test_cfg, pretrained) 17 | -------------------------------------------------------------------------------- /mmdet/models/detectors/fast_rcnn.py: -------------------------------------------------------------------------------- 1 | from ..registry import DETECTORS 2 | from .two_stage import TwoStageDetector 3 | 4 | 5 | @DETECTORS.register_module 6 | class FastRCNN(TwoStageDetector): 7 | 8 | def __init__(self, 9 | backbone, 10 | bbox_roi_extractor, 11 | bbox_head, 12 | train_cfg, 13 | test_cfg, 14 | neck=None, 15 | shared_head=None, 16 | mask_roi_extractor=None, 17 | mask_head=None, 18 | pretrained=None): 19 | super(FastRCNN, self).__init__( 20 | backbone=backbone, 21 | neck=neck, 22 | shared_head=shared_head, 23 | bbox_roi_extractor=bbox_roi_extractor, 24 | bbox_head=bbox_head, 25 | train_cfg=train_cfg, 26 | test_cfg=test_cfg, 27 | mask_roi_extractor=mask_roi_extractor, 28 | mask_head=mask_head, 29 | pretrained=pretrained) 30 | 31 | def forward_test(self, imgs, img_metas, proposals, **kwargs): 32 | """ 33 | Args: 34 | imgs (List[Tensor]): the outer list indicates test-time 35 | augmentations and inner Tensor should have a shape NxCxHxW, 36 | which contains all images in the batch. 37 | img_meta (List[List[dict]]): the outer list indicates test-time 38 | augs (multiscale, flip, etc.) and the inner list indicates 39 | images in a batch 40 | proposals (List[List[Tensor | None]]): predefiend proposals for 41 | each test-time augmentation and each item. 42 | """ 43 | for var, name in [(imgs, 'imgs'), (img_metas, 'img_metas')]: 44 | if not isinstance(var, list): 45 | raise TypeError('{} must be a list, but got {}'.format( 46 | name, type(var))) 47 | 48 | num_augs = len(imgs) 49 | if num_augs != len(img_metas): 50 | raise ValueError( 51 | 'num of augmentations ({}) != num of image meta ({})'.format( 52 | len(imgs), len(img_metas))) 53 | # TODO: remove the restriction of imgs_per_gpu == 1 when prepared 54 | imgs_per_gpu = imgs[0].size(0) 55 | assert imgs_per_gpu == 1 56 | 57 | if num_augs == 1: 58 | return self.simple_test(imgs[0], img_metas[0], proposals[0], 59 | **kwargs) 60 | else: 61 | return self.aug_test(imgs, img_metas, proposals, **kwargs) 62 | -------------------------------------------------------------------------------- /mmdet/models/detectors/faster_rcnn.py: -------------------------------------------------------------------------------- 1 | from ..registry import DETECTORS 2 | from .two_stage import TwoStageDetector 3 | 4 | 5 | @DETECTORS.register_module 6 | class FasterRCNN(TwoStageDetector): 7 | 8 | def __init__(self, 9 | backbone, 10 | rpn_head, 11 | bbox_roi_extractor, 12 | bbox_head, 13 | train_cfg, 14 | test_cfg, 15 | neck=None, 16 | shared_head=None, 17 | pretrained=None): 18 | super(FasterRCNN, self).__init__( 19 | backbone=backbone, 20 | neck=neck, 21 | shared_head=shared_head, 22 | rpn_head=rpn_head, 23 | bbox_roi_extractor=bbox_roi_extractor, 24 | bbox_head=bbox_head, 25 | train_cfg=train_cfg, 26 | test_cfg=test_cfg, 27 | pretrained=pretrained) 28 | -------------------------------------------------------------------------------- /mmdet/models/detectors/fcos.py: -------------------------------------------------------------------------------- 1 | from ..registry import DETECTORS 2 | from .single_stage import SingleStageDetector 3 | 4 | 5 | @DETECTORS.register_module 6 | class FCOS(SingleStageDetector): 7 | 8 | def __init__(self, 9 | backbone, 10 | neck, 11 | bbox_head, 12 | train_cfg=None, 13 | test_cfg=None, 14 | pretrained=None): 15 | super(FCOS, self).__init__(backbone, neck, bbox_head, train_cfg, 16 | test_cfg, pretrained) 17 | -------------------------------------------------------------------------------- /mmdet/models/detectors/fovea.py: -------------------------------------------------------------------------------- 1 | from ..registry import DETECTORS 2 | from .single_stage import SingleStageDetector 3 | 4 | 5 | @DETECTORS.register_module 6 | class FOVEA(SingleStageDetector): 7 | 8 | def __init__(self, 9 | backbone, 10 | neck, 11 | bbox_head, 12 | train_cfg=None, 13 | test_cfg=None, 14 | pretrained=None): 15 | super(FOVEA, self).__init__(backbone, neck, bbox_head, train_cfg, 16 | test_cfg, pretrained) 17 | -------------------------------------------------------------------------------- /mmdet/models/detectors/mask_rcnn.py: -------------------------------------------------------------------------------- 1 | from ..registry import DETECTORS 2 | from .two_stage import TwoStageDetector 3 | 4 | 5 | @DETECTORS.register_module 6 | class MaskRCNN(TwoStageDetector): 7 | 8 | def __init__(self, 9 | backbone, 10 | rpn_head, 11 | bbox_roi_extractor, 12 | bbox_head, 13 | mask_roi_extractor, 14 | mask_head, 15 | train_cfg, 16 | test_cfg, 17 | neck=None, 18 | shared_head=None, 19 | pretrained=None): 20 | super(MaskRCNN, self).__init__( 21 | backbone=backbone, 22 | neck=neck, 23 | shared_head=shared_head, 24 | rpn_head=rpn_head, 25 | bbox_roi_extractor=bbox_roi_extractor, 26 | bbox_head=bbox_head, 27 | mask_roi_extractor=mask_roi_extractor, 28 | mask_head=mask_head, 29 | train_cfg=train_cfg, 30 | test_cfg=test_cfg, 31 | pretrained=pretrained) 32 | -------------------------------------------------------------------------------- /mmdet/models/detectors/reppoints_detector.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from mmdet.core import bbox2result, bbox_mapping_back, multiclass_nms 4 | from ..registry import DETECTORS 5 | from .single_stage import SingleStageDetector 6 | 7 | 8 | @DETECTORS.register_module 9 | class RepPointsDetector(SingleStageDetector): 10 | """RepPoints: Point Set Representation for Object Detection. 11 | 12 | This detector is the implementation of: 13 | - RepPoints detector (https://arxiv.org/pdf/1904.11490) 14 | """ 15 | 16 | def __init__(self, 17 | backbone, 18 | neck, 19 | bbox_head, 20 | train_cfg=None, 21 | test_cfg=None, 22 | pretrained=None): 23 | super(RepPointsDetector, 24 | self).__init__(backbone, neck, bbox_head, train_cfg, test_cfg, 25 | pretrained) 26 | 27 | def merge_aug_results(self, aug_bboxes, aug_scores, img_metas): 28 | """Merge augmented detection bboxes and scores. 29 | 30 | Args: 31 | aug_bboxes (list[Tensor]): shape (n, 4*#class) 32 | aug_scores (list[Tensor] or None): shape (n, #class) 33 | img_shapes (list[Tensor]): shape (3, ). 34 | 35 | Returns: 36 | tuple: (bboxes, scores) 37 | """ 38 | recovered_bboxes = [] 39 | for bboxes, img_info in zip(aug_bboxes, img_metas): 40 | img_shape = img_info[0]['img_shape'] 41 | scale_factor = img_info[0]['scale_factor'] 42 | flip = img_info[0]['flip'] 43 | bboxes = bbox_mapping_back(bboxes, img_shape, scale_factor, flip) 44 | recovered_bboxes.append(bboxes) 45 | bboxes = torch.cat(recovered_bboxes, dim=0) 46 | if aug_scores is None: 47 | return bboxes 48 | else: 49 | scores = torch.cat(aug_scores, dim=0) 50 | return bboxes, scores 51 | 52 | def aug_test(self, imgs, img_metas, rescale=False): 53 | # recompute feats to save memory 54 | feats = self.extract_feats(imgs) 55 | 56 | aug_bboxes = [] 57 | aug_scores = [] 58 | for x, img_meta in zip(feats, img_metas): 59 | # only one image in the batch 60 | outs = self.bbox_head(x) 61 | bbox_inputs = outs + (img_meta, self.test_cfg, False, False) 62 | det_bboxes, det_scores = self.bbox_head.get_bboxes(*bbox_inputs)[0] 63 | aug_bboxes.append(det_bboxes) 64 | aug_scores.append(det_scores) 65 | 66 | # after merging, bboxes will be rescaled to the original image size 67 | merged_bboxes, merged_scores = self.merge_aug_results( 68 | aug_bboxes, aug_scores, img_metas) 69 | det_bboxes, det_labels = multiclass_nms(merged_bboxes, merged_scores, 70 | self.test_cfg.score_thr, 71 | self.test_cfg.nms, 72 | self.test_cfg.max_per_img) 73 | 74 | if rescale: 75 | _det_bboxes = det_bboxes 76 | else: 77 | _det_bboxes = det_bboxes.clone() 78 | _det_bboxes[:, :4] *= img_metas[0][0]['scale_factor'] 79 | bbox_results = bbox2result(_det_bboxes, det_labels, 80 | self.bbox_head.num_classes) 81 | return bbox_results 82 | -------------------------------------------------------------------------------- /mmdet/models/detectors/retinanet.py: -------------------------------------------------------------------------------- 1 | from ..registry import DETECTORS 2 | from .single_stage import SingleStageDetector 3 | 4 | 5 | @DETECTORS.register_module 6 | class RetinaNet(SingleStageDetector): 7 | 8 | def __init__(self, 9 | backbone, 10 | neck, 11 | bbox_head, 12 | train_cfg=None, 13 | test_cfg=None, 14 | pretrained=None): 15 | super(RetinaNet, self).__init__(backbone, neck, bbox_head, train_cfg, 16 | test_cfg, pretrained) 17 | -------------------------------------------------------------------------------- /mmdet/models/detectors/single_stage.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | from mmdet.core import bbox2result 4 | from .. import builder 5 | from ..registry import DETECTORS 6 | from .base import BaseDetector 7 | 8 | 9 | @DETECTORS.register_module 10 | class SingleStageDetector(BaseDetector): 11 | """Base class for single-stage detectors. 12 | 13 | Single-stage detectors directly and densely predict bounding boxes on the 14 | output features of the backbone+neck. 15 | """ 16 | 17 | def __init__(self, 18 | backbone, 19 | neck=None, 20 | bbox_head=None, 21 | train_cfg=None, 22 | test_cfg=None, 23 | pretrained=None): 24 | super(SingleStageDetector, self).__init__() 25 | self.backbone = builder.build_backbone(backbone) 26 | if neck is not None: 27 | self.neck = builder.build_neck(neck) 28 | self.bbox_head = builder.build_head(bbox_head) 29 | self.train_cfg = train_cfg 30 | self.test_cfg = test_cfg 31 | self.init_weights(pretrained=pretrained) 32 | 33 | def init_weights(self, pretrained=None): 34 | super(SingleStageDetector, self).init_weights(pretrained) 35 | self.backbone.init_weights(pretrained=pretrained) 36 | if self.with_neck: 37 | if isinstance(self.neck, nn.Sequential): 38 | for m in self.neck: 39 | m.init_weights() 40 | else: 41 | self.neck.init_weights() 42 | self.bbox_head.init_weights() 43 | 44 | def extract_feat(self, img): 45 | """Directly extract features from the backbone+neck 46 | """ 47 | x = self.backbone(img) 48 | if self.with_neck: 49 | x = self.neck(x) 50 | return x 51 | 52 | def forward_dummy(self, img): 53 | """Used for computing network flops. 54 | 55 | See `mmedetection/tools/get_flops.py` 56 | """ 57 | x = self.extract_feat(img) 58 | outs = self.bbox_head(x) 59 | return outs 60 | 61 | def forward_train(self, 62 | img, 63 | img_metas, 64 | gt_bboxes, 65 | gt_labels, 66 | gt_bboxes_ignore=None): 67 | x = self.extract_feat(img) 68 | outs = self.bbox_head(x) 69 | loss_inputs = outs + (gt_bboxes, gt_labels, img_metas, self.train_cfg) 70 | losses = self.bbox_head.loss( 71 | *loss_inputs, gt_bboxes_ignore=gt_bboxes_ignore) 72 | return losses 73 | 74 | def simple_test(self, img, img_meta, rescale=False): 75 | x = self.extract_feat(img) 76 | outs = self.bbox_head(x) 77 | bbox_inputs = outs + (img_meta, self.test_cfg, rescale) 78 | bbox_list = self.bbox_head.get_bboxes(*bbox_inputs) 79 | bbox_results = [ 80 | bbox2result(det_bboxes, det_labels, self.bbox_head.num_classes) 81 | for det_bboxes, det_labels in bbox_list 82 | ] 83 | return bbox_results[0] 84 | 85 | def aug_test(self, imgs, img_metas, rescale=False): 86 | raise NotImplementedError 87 | -------------------------------------------------------------------------------- /mmdet/models/detectors/solo.py: -------------------------------------------------------------------------------- 1 | from .single_stage_ins import SingleStageInsDetector 2 | from ..registry import DETECTORS 3 | 4 | 5 | @DETECTORS.register_module 6 | class SOLO(SingleStageInsDetector): 7 | 8 | def __init__(self, 9 | backbone, 10 | neck, 11 | bbox_head=None, 12 | track_head=None, 13 | panoptic_head=None, 14 | train_cfg=None, 15 | test_cfg=None, 16 | pretrained=None, 17 | sta_config=None, 18 | ca_head=None, 19 | max_nottrack=20): 20 | super(SOLO, self).__init__(backbone, neck, bbox_head, track_head, panoptic_head, train_cfg, 21 | test_cfg, pretrained, sta_config=sta_config, ca_head=ca_head, 22 | max_nottrack=max_nottrack) 23 | -------------------------------------------------------------------------------- /mmdet/models/detectors/solov2.py: -------------------------------------------------------------------------------- 1 | from .single_stage_ins import SingleStageInsDetector 2 | from ..registry import DETECTORS 3 | 4 | 5 | @DETECTORS.register_module 6 | class SOLOv2(SingleStageInsDetector): 7 | 8 | def __init__(self, 9 | backbone, 10 | neck, 11 | bbox_head, 12 | mask_feat_head, 13 | train_cfg=None, 14 | test_cfg=None, 15 | pretrained=None): 16 | super(SOLOv2, self).__init__(backbone, neck, bbox_head, mask_feat_head, train_cfg, 17 | test_cfg, pretrained) 18 | -------------------------------------------------------------------------------- /mmdet/models/losses/__init__.py: -------------------------------------------------------------------------------- 1 | from .accuracy import Accuracy, accuracy 2 | from .balanced_l1_loss import BalancedL1Loss, balanced_l1_loss 3 | from .cross_entropy_loss import (CrossEntropyLoss, binary_cross_entropy, 4 | cross_entropy, mask_cross_entropy) 5 | from .focal_loss import FocalLoss, sigmoid_focal_loss 6 | from .ghm_loss import GHMC, GHMR 7 | from .iou_loss import (BoundedIoULoss, GIoULoss, IoULoss, bounded_iou_loss, 8 | iou_loss) 9 | from .mse_loss import MSELoss, mse_loss 10 | from .smooth_l1_loss import SmoothL1Loss, smooth_l1_loss 11 | from .utils import reduce_loss, weight_reduce_loss, weighted_loss 12 | 13 | __all__ = [ 14 | 'accuracy', 'Accuracy', 'cross_entropy', 'binary_cross_entropy', 15 | 'mask_cross_entropy', 'CrossEntropyLoss', 'sigmoid_focal_loss', 16 | 'FocalLoss', 'smooth_l1_loss', 'SmoothL1Loss', 'balanced_l1_loss', 17 | 'BalancedL1Loss', 'mse_loss', 'MSELoss', 'iou_loss', 'bounded_iou_loss', 18 | 'IoULoss', 'BoundedIoULoss', 'GIoULoss', 'GHMC', 'GHMR', 'reduce_loss', 19 | 'weight_reduce_loss', 'weighted_loss' 20 | ] 21 | -------------------------------------------------------------------------------- /mmdet/models/losses/accuracy.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | 4 | def accuracy(pred, target, topk=1): 5 | assert isinstance(topk, (int, tuple)) 6 | if isinstance(topk, int): 7 | topk = (topk, ) 8 | return_single = True 9 | else: 10 | return_single = False 11 | 12 | maxk = max(topk) 13 | _, pred_label = pred.topk(maxk, dim=1) 14 | pred_label = pred_label.t() 15 | correct = pred_label.eq(target.view(1, -1).expand_as(pred_label)) 16 | 17 | res = [] 18 | for k in topk: 19 | correct_k = correct[:k].view(-1).float().sum(0, keepdim=True) 20 | res.append(correct_k.mul_(100.0 / pred.size(0))) 21 | return res[0] if return_single else res 22 | 23 | 24 | class Accuracy(nn.Module): 25 | 26 | def __init__(self, topk=(1, )): 27 | super().__init__() 28 | self.topk = topk 29 | 30 | def forward(self, pred, target): 31 | return accuracy(pred, target, self.topk) 32 | -------------------------------------------------------------------------------- /mmdet/models/losses/balanced_l1_loss.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn as nn 4 | 5 | from ..registry import LOSSES 6 | from .utils import weighted_loss 7 | 8 | 9 | @weighted_loss 10 | def balanced_l1_loss(pred, 11 | target, 12 | beta=1.0, 13 | alpha=0.5, 14 | gamma=1.5, 15 | reduction='mean'): 16 | assert beta > 0 17 | assert pred.size() == target.size() and target.numel() > 0 18 | 19 | diff = torch.abs(pred - target) 20 | b = np.e**(gamma / alpha) - 1 21 | loss = torch.where( 22 | diff < beta, alpha / b * 23 | (b * diff + 1) * torch.log(b * diff / beta + 1) - alpha * diff, 24 | gamma * diff + gamma / b - alpha * beta) 25 | 26 | return loss 27 | 28 | 29 | @LOSSES.register_module 30 | class BalancedL1Loss(nn.Module): 31 | """Balanced L1 Loss 32 | 33 | arXiv: https://arxiv.org/pdf/1904.02701.pdf (CVPR 2019) 34 | """ 35 | 36 | def __init__(self, 37 | alpha=0.5, 38 | gamma=1.5, 39 | beta=1.0, 40 | reduction='mean', 41 | loss_weight=1.0): 42 | super(BalancedL1Loss, self).__init__() 43 | self.alpha = alpha 44 | self.gamma = gamma 45 | self.beta = beta 46 | self.reduction = reduction 47 | self.loss_weight = loss_weight 48 | 49 | def forward(self, 50 | pred, 51 | target, 52 | weight=None, 53 | avg_factor=None, 54 | reduction_override=None, 55 | **kwargs): 56 | assert reduction_override in (None, 'none', 'mean', 'sum') 57 | reduction = ( 58 | reduction_override if reduction_override else self.reduction) 59 | loss_bbox = self.loss_weight * balanced_l1_loss( 60 | pred, 61 | target, 62 | weight, 63 | alpha=self.alpha, 64 | gamma=self.gamma, 65 | beta=self.beta, 66 | reduction=reduction, 67 | avg_factor=avg_factor, 68 | **kwargs) 69 | return loss_bbox 70 | -------------------------------------------------------------------------------- /mmdet/models/losses/focal_loss.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch.nn.functional as F 3 | 4 | from mmdet.ops import sigmoid_focal_loss as _sigmoid_focal_loss 5 | from ..registry import LOSSES 6 | from .utils import weight_reduce_loss 7 | import torch 8 | 9 | # This method is only for debugging 10 | def py_sigmoid_focal_loss(pred, 11 | target, 12 | weight=None, 13 | gamma=2.0, 14 | alpha=0.25, 15 | reduction='mean', 16 | avg_factor=None): 17 | pred_sigmoid = pred.sigmoid() 18 | target = target.type_as(pred) 19 | pt = (1 - pred_sigmoid) * target + pred_sigmoid * (1 - target) 20 | focal_weight = (alpha * target + (1 - alpha) * 21 | (1 - target)) * pt.pow(gamma) 22 | loss = F.binary_cross_entropy_with_logits( 23 | pred, target, reduction='none') * focal_weight 24 | loss = weight_reduce_loss(loss, weight, reduction, avg_factor) 25 | return loss 26 | 27 | 28 | def sigmoid_focal_loss(pred, 29 | target, 30 | weight=None, 31 | gamma=2.0, 32 | alpha=0.25, 33 | reduction='mean', 34 | avg_factor=None, 35 | cate_loss_weight=None): 36 | # Function.apply does not accept keyword arguments, so the decorator 37 | # "weighted_loss" is not applicable 38 | loss = _sigmoid_focal_loss(pred, target, gamma, alpha) 39 | if cate_loss_weight is None: 40 | cate_loss_weight = [1.0] * loss.shape[1] 41 | cate_loss_weight = torch.tensor(cate_loss_weight).unsqueeze(0).cuda() 42 | loss = loss * cate_loss_weight 43 | 44 | # TODO: find a proper way to handle the shape of weight 45 | if weight is not None: 46 | weight = weight.view(-1, 1) 47 | loss = weight_reduce_loss(loss, weight, reduction, avg_factor) 48 | return loss 49 | 50 | 51 | @LOSSES.register_module 52 | class FocalLoss(nn.Module): 53 | 54 | def __init__(self, 55 | use_sigmoid=True, 56 | gamma=2.0, 57 | alpha=0.25, 58 | reduction='mean', 59 | loss_weight=1.0, 60 | cate_loss_weight=None): 61 | super(FocalLoss, self).__init__() 62 | assert use_sigmoid is True, 'Only sigmoid focal loss supported now.' 63 | self.use_sigmoid = use_sigmoid 64 | self.gamma = gamma 65 | self.alpha = alpha 66 | self.reduction = reduction 67 | self.loss_weight = loss_weight 68 | self.cate_loss_weight = cate_loss_weight 69 | 70 | def forward(self, 71 | pred, 72 | target, 73 | weight=None, 74 | avg_factor=None, 75 | reduction_override=None): 76 | assert reduction_override in (None, 'none', 'mean', 'sum') 77 | reduction = ( 78 | reduction_override if reduction_override else self.reduction) 79 | if self.use_sigmoid: 80 | loss_cls = self.loss_weight * sigmoid_focal_loss( 81 | pred, 82 | target, 83 | weight, 84 | gamma=self.gamma, 85 | alpha=self.alpha, 86 | reduction=reduction, 87 | avg_factor=avg_factor, 88 | cate_loss_weight=self.cate_loss_weight) 89 | else: 90 | raise NotImplementedError 91 | return loss_cls 92 | -------------------------------------------------------------------------------- /mmdet/models/losses/mse_loss.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch.nn.functional as F 3 | 4 | from ..registry import LOSSES 5 | from .utils import weighted_loss 6 | 7 | mse_loss = weighted_loss(F.mse_loss) 8 | 9 | 10 | @LOSSES.register_module 11 | class MSELoss(nn.Module): 12 | 13 | def __init__(self, reduction='mean', loss_weight=1.0): 14 | super().__init__() 15 | self.reduction = reduction 16 | self.loss_weight = loss_weight 17 | 18 | def forward(self, pred, target, weight=None, avg_factor=None): 19 | loss = self.loss_weight * mse_loss( 20 | pred, 21 | target, 22 | weight, 23 | reduction=self.reduction, 24 | avg_factor=avg_factor) 25 | return loss 26 | -------------------------------------------------------------------------------- /mmdet/models/losses/smooth_l1_loss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | from ..registry import LOSSES 5 | from .utils import weighted_loss 6 | 7 | 8 | @weighted_loss 9 | def smooth_l1_loss(pred, target, beta=1.0): 10 | assert beta > 0 11 | assert pred.size() == target.size() and target.numel() > 0 12 | diff = torch.abs(pred - target) 13 | loss = torch.where(diff < beta, 0.5 * diff * diff / beta, 14 | diff - 0.5 * beta) 15 | return loss 16 | 17 | 18 | @LOSSES.register_module 19 | class SmoothL1Loss(nn.Module): 20 | 21 | def __init__(self, beta=1.0, reduction='mean', loss_weight=1.0): 22 | super(SmoothL1Loss, self).__init__() 23 | self.beta = beta 24 | self.reduction = reduction 25 | self.loss_weight = loss_weight 26 | 27 | def forward(self, 28 | pred, 29 | target, 30 | weight=None, 31 | avg_factor=None, 32 | reduction_override=None, 33 | **kwargs): 34 | assert reduction_override in (None, 'none', 'mean', 'sum') 35 | reduction = ( 36 | reduction_override if reduction_override else self.reduction) 37 | loss_bbox = self.loss_weight * smooth_l1_loss( 38 | pred, 39 | target, 40 | weight, 41 | beta=self.beta, 42 | reduction=reduction, 43 | avg_factor=avg_factor, 44 | **kwargs) 45 | return loss_bbox 46 | -------------------------------------------------------------------------------- /mmdet/models/losses/utils.py: -------------------------------------------------------------------------------- 1 | import functools 2 | 3 | import torch.nn.functional as F 4 | 5 | 6 | def reduce_loss(loss, reduction): 7 | """Reduce loss as specified. 8 | 9 | Args: 10 | loss (Tensor): Elementwise loss tensor. 11 | reduction (str): Options are "none", "mean" and "sum". 12 | 13 | Return: 14 | Tensor: Reduced loss tensor. 15 | """ 16 | reduction_enum = F._Reduction.get_enum(reduction) 17 | # none: 0, elementwise_mean:1, sum: 2 18 | if reduction_enum == 0: 19 | return loss 20 | elif reduction_enum == 1: 21 | return loss.mean() 22 | elif reduction_enum == 2: 23 | return loss.sum() 24 | 25 | 26 | def weight_reduce_loss(loss, weight=None, reduction='mean', avg_factor=None): 27 | """Apply element-wise weight and reduce loss. 28 | 29 | Args: 30 | loss (Tensor): Element-wise loss. 31 | weight (Tensor): Element-wise weights. 32 | reduction (str): Same as built-in losses of PyTorch. 33 | avg_factor (float): Avarage factor when computing the mean of losses. 34 | 35 | Returns: 36 | Tensor: Processed loss values. 37 | """ 38 | # if weight is specified, apply element-wise weight 39 | if weight is not None: 40 | loss = loss * weight 41 | 42 | # if avg_factor is not specified, just reduce the loss 43 | if avg_factor is None: 44 | loss = reduce_loss(loss, reduction) 45 | else: 46 | # if reduction is mean, then average the loss by avg_factor 47 | if reduction == 'mean': 48 | loss = loss.sum() / avg_factor 49 | # if reduction is 'none', then do nothing, otherwise raise an error 50 | elif reduction != 'none': 51 | raise ValueError('avg_factor can not be used with reduction="sum"') 52 | return loss 53 | 54 | 55 | def weighted_loss(loss_func): 56 | """Create a weighted version of a given loss function. 57 | 58 | To use this decorator, the loss function must have the signature like 59 | `loss_func(pred, target, **kwargs)`. The function only needs to compute 60 | element-wise loss without any reduction. This decorator will add weight 61 | and reduction arguments to the function. The decorated function will have 62 | the signature like `loss_func(pred, target, weight=None, reduction='mean', 63 | avg_factor=None, **kwargs)`. 64 | 65 | :Example: 66 | 67 | >>> import torch 68 | >>> @weighted_loss 69 | >>> def l1_loss(pred, target): 70 | >>> return (pred - target).abs() 71 | 72 | >>> pred = torch.Tensor([0, 2, 3]) 73 | >>> target = torch.Tensor([1, 1, 1]) 74 | >>> weight = torch.Tensor([1, 0, 1]) 75 | 76 | >>> l1_loss(pred, target) 77 | tensor(1.3333) 78 | >>> l1_loss(pred, target, weight) 79 | tensor(1.) 80 | >>> l1_loss(pred, target, reduction='none') 81 | tensor([1., 1., 2.]) 82 | >>> l1_loss(pred, target, weight, avg_factor=2) 83 | tensor(1.5000) 84 | """ 85 | 86 | @functools.wraps(loss_func) 87 | def wrapper(pred, 88 | target, 89 | weight=None, 90 | reduction='mean', 91 | avg_factor=None, 92 | **kwargs): 93 | # get element-wise loss 94 | loss = loss_func(pred, target, **kwargs) 95 | loss = weight_reduce_loss(loss, weight, reduction, avg_factor) 96 | return loss 97 | 98 | return wrapper 99 | -------------------------------------------------------------------------------- /mmdet/models/mask_heads/__init__.py: -------------------------------------------------------------------------------- 1 | from .fcn_mask_head import FCNMaskHead 2 | from .fused_semantic_head import FusedSemanticHead 3 | from .grid_head import GridHead 4 | from .htc_mask_head import HTCMaskHead 5 | from .maskiou_head import MaskIoUHead 6 | from .mask_feat_head import MaskFeatHead 7 | 8 | __all__ = [ 9 | 'FCNMaskHead', 'HTCMaskHead', 'FusedSemanticHead', 'GridHead', 10 | 'MaskIoUHead', 'MaskFeatHead' 11 | ] 12 | -------------------------------------------------------------------------------- /mmdet/models/mask_heads/htc_mask_head.py: -------------------------------------------------------------------------------- 1 | from ..registry import HEADS 2 | from ..utils import ConvModule 3 | from .fcn_mask_head import FCNMaskHead 4 | 5 | 6 | @HEADS.register_module 7 | class HTCMaskHead(FCNMaskHead): 8 | 9 | def __init__(self, *args, **kwargs): 10 | super(HTCMaskHead, self).__init__(*args, **kwargs) 11 | self.conv_res = ConvModule( 12 | self.conv_out_channels, 13 | self.conv_out_channels, 14 | 1, 15 | conv_cfg=self.conv_cfg, 16 | norm_cfg=self.norm_cfg) 17 | 18 | def init_weights(self): 19 | super(HTCMaskHead, self).init_weights() 20 | self.conv_res.init_weights() 21 | 22 | def forward(self, x, res_feat=None, return_logits=True, return_feat=True): 23 | if res_feat is not None: 24 | res_feat = self.conv_res(res_feat) 25 | x = x + res_feat 26 | for conv in self.convs: 27 | x = conv(x) 28 | res_feat = x 29 | outs = [] 30 | if return_logits: 31 | x = self.upsample(x) 32 | if self.upsample_method == 'deconv': 33 | x = self.relu(x) 34 | mask_pred = self.conv_logits(x) 35 | outs.append(mask_pred) 36 | if return_feat: 37 | outs.append(res_feat) 38 | return outs if len(outs) > 1 else outs[0] 39 | -------------------------------------------------------------------------------- /mmdet/models/necks/__init__.py: -------------------------------------------------------------------------------- 1 | from .bfp import BFP 2 | from .fpn import FPN 3 | from .hrfpn import HRFPN 4 | from .nas_fpn import NASFPN 5 | from .fpn_flo_warp import FPNFlowWarp 6 | 7 | __all__ = ['FPN', 'BFP', 'HRFPN', 'NASFPN', 'FPNFlowWarp'] 8 | -------------------------------------------------------------------------------- /mmdet/models/necks/fpn_flo_warp.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch.nn.functional as F 3 | from mmcv.cnn import xavier_init 4 | import torch 5 | import numpy as np 6 | import copy 7 | 8 | from mmdet.core import auto_fp16 9 | from ..registry import NECKS 10 | from ..utils import ConvModule 11 | from .fpn import FPN 12 | 13 | @NECKS.register_module 14 | class FPNFlowWarp(FPN): 15 | 16 | def __init__(self, **kwargs): 17 | super(FPNFlowWarp, self).__init__(**kwargs) 18 | 19 | @auto_fp16() 20 | def forward(self, inputs, flow): 21 | outs = super().forward(inputs) 22 | 23 | # Get even and odd indices 24 | odd_indices = torch.range(0, outs[0].shape[0]-1) % 2 != 0 25 | even_indices = torch.range(0, outs[0].shape[0]-1) % 2 == 0 26 | 27 | feat_shape = outs[0][odd_indices].shape[-2:] 28 | 29 | identity_grid = np.meshgrid(np.linspace(-1, 1, feat_shape[1]), 30 | np.linspace(-1, 1, feat_shape[0])) 31 | identity_grid = torch.tensor(identity_grid).float().cuda() 32 | identity_grid = identity_grid.permute(1,2,0).unsqueeze(0) 33 | 34 | warped_outs = [] 35 | if flow is not None: 36 | for level in range(len(outs)): 37 | # Warp Odd indices , Previous Frame Features 38 | original_feat_shape = outs[level][odd_indices].shape[-2:] 39 | warping_flow = F.interpolate(flow[odd_indices], 40 | feat_shape, 41 | mode='bilinear', align_corners=True) 42 | warping_flow = warping_flow.permute(0, 2, 3, 1) 43 | warping_flow_normalize = copy.deepcopy(warping_flow) 44 | warping_flow_normalize[:, :, :, 0] = warping_flow[:, :, :, 0] / feat_shape[1] 45 | warping_flow_normalize[:, :, :, 1] = warping_flow[:, :, :, 1] / feat_shape[0] 46 | 47 | 48 | feats = F.interpolate(outs[level][odd_indices], feat_shape, 49 | mode='bilinear', align_corners=True) 50 | 51 | warped_feats = F.grid_sample( 52 | feats, identity_grid - warping_flow_normalize 53 | ) 54 | 55 | warped_feats = F.interpolate(warped_feats, original_feat_shape, 56 | mode='bilinear', align_corners=True) 57 | 58 | # Construct final feats from warped and even indices features (current ones) as is 59 | final_warped_feats = torch.zeros(outs[level].shape).cuda() 60 | final_warped_feats[odd_indices] = warped_feats 61 | final_warped_feats[even_indices] = outs[level][even_indices] 62 | warped_outs.append(final_warped_feats) 63 | else: 64 | warped_outs = outs 65 | 66 | return tuple(warped_outs) 67 | -------------------------------------------------------------------------------- /mmdet/models/plugins/__init__.py: -------------------------------------------------------------------------------- 1 | from .generalized_attention import GeneralizedAttention 2 | from .non_local import NonLocal2D 3 | 4 | __all__ = ['NonLocal2D', 'GeneralizedAttention'] 5 | -------------------------------------------------------------------------------- /mmdet/models/registry.py: -------------------------------------------------------------------------------- 1 | from mmdet.utils import Registry 2 | 3 | BACKBONES = Registry('backbone') 4 | NECKS = Registry('neck') 5 | ROI_EXTRACTORS = Registry('roi_extractor') 6 | SHARED_HEADS = Registry('shared_head') 7 | HEADS = Registry('head') 8 | LOSSES = Registry('loss') 9 | DETECTORS = Registry('detector') 10 | -------------------------------------------------------------------------------- /mmdet/models/roi_extractors/__init__.py: -------------------------------------------------------------------------------- 1 | from .single_level import SingleRoIExtractor 2 | 3 | __all__ = ['SingleRoIExtractor'] 4 | -------------------------------------------------------------------------------- /mmdet/models/shared_heads/__init__.py: -------------------------------------------------------------------------------- 1 | from .res_layer import ResLayer 2 | 3 | __all__ = ['ResLayer'] 4 | -------------------------------------------------------------------------------- /mmdet/models/shared_heads/res_layer.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | from mmcv.cnn import constant_init, kaiming_init 3 | from mmcv.runner import load_checkpoint 4 | 5 | from mmdet.core import auto_fp16 6 | from mmdet.utils import get_root_logger 7 | from ..backbones import ResNet, make_res_layer 8 | from ..registry import SHARED_HEADS 9 | 10 | 11 | @SHARED_HEADS.register_module 12 | class ResLayer(nn.Module): 13 | 14 | def __init__(self, 15 | depth, 16 | stage=3, 17 | stride=2, 18 | dilation=1, 19 | style='pytorch', 20 | norm_cfg=dict(type='BN', requires_grad=True), 21 | norm_eval=True, 22 | with_cp=False, 23 | dcn=None): 24 | super(ResLayer, self).__init__() 25 | self.norm_eval = norm_eval 26 | self.norm_cfg = norm_cfg 27 | self.stage = stage 28 | self.fp16_enabled = False 29 | block, stage_blocks = ResNet.arch_settings[depth] 30 | stage_block = stage_blocks[stage] 31 | planes = 64 * 2**stage 32 | inplanes = 64 * 2**(stage - 1) * block.expansion 33 | 34 | res_layer = make_res_layer( 35 | block, 36 | inplanes, 37 | planes, 38 | stage_block, 39 | stride=stride, 40 | dilation=dilation, 41 | style=style, 42 | with_cp=with_cp, 43 | norm_cfg=self.norm_cfg, 44 | dcn=dcn) 45 | self.add_module('layer{}'.format(stage + 1), res_layer) 46 | 47 | def init_weights(self, pretrained=None): 48 | if isinstance(pretrained, str): 49 | logger = get_root_logger() 50 | load_checkpoint(self, pretrained, strict=False, logger=logger) 51 | elif pretrained is None: 52 | for m in self.modules(): 53 | if isinstance(m, nn.Conv2d): 54 | kaiming_init(m) 55 | elif isinstance(m, nn.BatchNorm2d): 56 | constant_init(m, 1) 57 | else: 58 | raise TypeError('pretrained must be a str or None') 59 | 60 | @auto_fp16() 61 | def forward(self, x): 62 | res_layer = getattr(self, 'layer{}'.format(self.stage + 1)) 63 | out = res_layer(x) 64 | return out 65 | 66 | def train(self, mode=True): 67 | super(ResLayer, self).train(mode) 68 | if self.norm_eval: 69 | for m in self.modules(): 70 | if isinstance(m, nn.BatchNorm2d): 71 | m.eval() 72 | -------------------------------------------------------------------------------- /mmdet/models/track_heads/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from .track_head import TrackHead 3 | 4 | __all__ = ['TrackHead'] 5 | -------------------------------------------------------------------------------- /mmdet/models/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .conv_module import ConvModule, build_conv_layer 2 | from .conv_ws import ConvWS2d, conv_ws_2d 3 | from .norm import build_norm_layer 4 | from .scale import Scale 5 | from .weight_init import (bias_init_with_prob, kaiming_init, normal_init, 6 | uniform_init, xavier_init) 7 | from .fpn_utils import merge_fpn 8 | 9 | __all__ = [ 10 | 'conv_ws_2d', 'ConvWS2d', 'build_conv_layer', 'ConvModule', 11 | 'build_norm_layer', 'xavier_init', 'normal_init', 'uniform_init', 12 | 'kaiming_init', 'bias_init_with_prob', 'Scale', 'mrege_fpn' 13 | ] 14 | -------------------------------------------------------------------------------- /mmdet/models/utils/conv_ws.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch.nn.functional as F 3 | 4 | 5 | def conv_ws_2d(input, 6 | weight, 7 | bias=None, 8 | stride=1, 9 | padding=0, 10 | dilation=1, 11 | groups=1, 12 | eps=1e-5): 13 | c_in = weight.size(0) 14 | weight_flat = weight.view(c_in, -1) 15 | mean = weight_flat.mean(dim=1, keepdim=True).view(c_in, 1, 1, 1) 16 | std = weight_flat.std(dim=1, keepdim=True).view(c_in, 1, 1, 1) 17 | weight = (weight - mean) / (std + eps) 18 | return F.conv2d(input, weight, bias, stride, padding, dilation, groups) 19 | 20 | 21 | class ConvWS2d(nn.Conv2d): 22 | 23 | def __init__(self, 24 | in_channels, 25 | out_channels, 26 | kernel_size, 27 | stride=1, 28 | padding=0, 29 | dilation=1, 30 | groups=1, 31 | bias=True, 32 | eps=1e-5): 33 | super(ConvWS2d, self).__init__( 34 | in_channels, 35 | out_channels, 36 | kernel_size, 37 | stride=stride, 38 | padding=padding, 39 | dilation=dilation, 40 | groups=groups, 41 | bias=bias) 42 | self.eps = eps 43 | 44 | def forward(self, x): 45 | return conv_ws_2d(x, self.weight, self.bias, self.stride, self.padding, 46 | self.dilation, self.groups, self.eps) 47 | -------------------------------------------------------------------------------- /mmdet/models/utils/fpn_utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | 4 | def merge_fpn(x, average=True): 5 | max_size = x[0].shape 6 | merged_fpn = [] 7 | for i, _ in enumerate(x): 8 | merged_fpn.append(F.interpolate(x[i], max_size[-2:])) 9 | if average: 10 | return torch.stack(merged_fpn).mean(dim=0) 11 | else: 12 | concat = torch.stack(merged_fpn) 13 | return concat.permute(1,0,2,3,4).reshape(concat.shape[1], -1, *concat.shape[-2:]) 14 | 15 | -------------------------------------------------------------------------------- /mmdet/models/utils/functional.py: -------------------------------------------------------------------------------- 1 | from . import functions 2 | 3 | 4 | def aggregation(input, weight, kernel_size=3, stride=1, padding=0, dilation=1, pad_mode=1): 5 | assert input.shape[0] == weight.shape[0] and (input.shape[1] % weight.shape[1] == 0) and pad_mode in [0, 1] 6 | if input.is_cuda: 7 | if pad_mode == 0: 8 | out = functions.aggregation_zeropad(input, weight, kernel_size, stride, padding, dilation) 9 | elif pad_mode == 1: 10 | out = functions.aggregation_refpad(input, weight, kernel_size, stride, padding, dilation) 11 | else: 12 | raise NotImplementedError 13 | return out 14 | -------------------------------------------------------------------------------- /mmdet/models/utils/functions/__init__.py: -------------------------------------------------------------------------------- 1 | from .aggregation_zeropad import * 2 | from .aggregation_refpad import * 3 | from .utils import * 4 | -------------------------------------------------------------------------------- /mmdet/models/utils/functions/utils.py: -------------------------------------------------------------------------------- 1 | from collections import namedtuple 2 | from string import Template 3 | import cupy 4 | import torch 5 | 6 | 7 | Stream = namedtuple('Stream', ['ptr']) 8 | 9 | 10 | def Dtype(t): 11 | if isinstance(t, torch.cuda.FloatTensor): 12 | return 'float' 13 | elif isinstance(t, torch.cuda.DoubleTensor): 14 | return 'double' 15 | 16 | 17 | @cupy.util.memoize(for_each_device=True) 18 | def load_kernel(kernel_name, code, **kwargs): 19 | code = Template(code).substitute(**kwargs) 20 | kernel_code = cupy.cuda.compile_with_cache(code) 21 | return kernel_code.get_function(kernel_name) 22 | -------------------------------------------------------------------------------- /mmdet/models/utils/modules/__init__.py: -------------------------------------------------------------------------------- 1 | from .aggregation import * 2 | -------------------------------------------------------------------------------- /mmdet/models/utils/modules/aggregation.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | from torch.nn.modules.utils import _pair 3 | 4 | from .. import functional as F 5 | 6 | 7 | class Aggregation(nn.Module): 8 | 9 | def __init__(self, kernel_size, stride, padding, dilation, pad_mode): 10 | super(Aggregation, self).__init__() 11 | self.kernel_size = _pair(kernel_size) 12 | self.stride = _pair(stride) 13 | self.padding = _pair(padding) 14 | self.dilation = _pair(dilation) 15 | self.pad_mode = pad_mode 16 | 17 | def forward(self, input, weight): 18 | return F.aggregation(input, weight, self.kernel_size, self.stride, self.padding, self.dilation, self.pad_mode) 19 | -------------------------------------------------------------------------------- /mmdet/models/utils/norm.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | norm_cfg = { 4 | # format: layer_type: (abbreviation, module) 5 | 'BN': ('bn', nn.BatchNorm2d), 6 | 'SyncBN': ('bn', nn.SyncBatchNorm), 7 | 'GN': ('gn', nn.GroupNorm), 8 | # and potentially 'SN' 9 | } 10 | 11 | 12 | def build_norm_layer(cfg, num_features, postfix=''): 13 | """ Build normalization layer 14 | 15 | Args: 16 | cfg (dict): cfg should contain: 17 | type (str): identify norm layer type. 18 | layer args: args needed to instantiate a norm layer. 19 | requires_grad (bool): [optional] whether stop gradient updates 20 | num_features (int): number of channels from input. 21 | postfix (int, str): appended into norm abbreviation to 22 | create named layer. 23 | 24 | Returns: 25 | name (str): abbreviation + postfix 26 | layer (nn.Module): created norm layer 27 | """ 28 | assert isinstance(cfg, dict) and 'type' in cfg 29 | cfg_ = cfg.copy() 30 | 31 | layer_type = cfg_.pop('type') 32 | if layer_type not in norm_cfg: 33 | raise KeyError('Unrecognized norm type {}'.format(layer_type)) 34 | else: 35 | abbr, norm_layer = norm_cfg[layer_type] 36 | if norm_layer is None: 37 | raise NotImplementedError 38 | 39 | assert isinstance(postfix, (int, str)) 40 | name = abbr + str(postfix) 41 | 42 | requires_grad = cfg_.pop('requires_grad', True) 43 | cfg_.setdefault('eps', 1e-5) 44 | if layer_type != 'GN': 45 | layer = norm_layer(num_features, **cfg_) 46 | if layer_type == 'SyncBN': 47 | layer._specify_ddp_gpu_num(1) 48 | else: 49 | assert 'num_groups' in cfg_ 50 | layer = norm_layer(num_channels=num_features, **cfg_) 51 | 52 | for param in layer.parameters(): 53 | param.requires_grad = requires_grad 54 | 55 | return name, layer 56 | -------------------------------------------------------------------------------- /mmdet/models/utils/scale.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | 5 | class Scale(nn.Module): 6 | """ 7 | A learnable scale parameter 8 | """ 9 | 10 | def __init__(self, scale=1.0): 11 | super(Scale, self).__init__() 12 | self.scale = nn.Parameter(torch.tensor(scale, dtype=torch.float)) 13 | 14 | def forward(self, x): 15 | return x * self.scale 16 | -------------------------------------------------------------------------------- /mmdet/models/utils/sta_module.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | from .modules import Aggregation 6 | 7 | def conv1x1(in_planes, out_planes, stride=1): 8 | return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False) 9 | 10 | 11 | def position(H, W, is_cuda=True): 12 | if is_cuda: 13 | loc_w = torch.linspace(-1.0, 1.0, W).cuda().unsqueeze(0).repeat(H, 1) 14 | loc_h = torch.linspace(-1.0, 1.0, H).cuda().unsqueeze(1).repeat(1, W) 15 | else: 16 | loc_w = torch.linspace(-1.0, 1.0, W).unsqueeze(0).repeat(H, 1) 17 | loc_h = torch.linspace(-1.0, 1.0, H).unsqueeze(1).repeat(1, W) 18 | loc = torch.cat([loc_w.unsqueeze(0), loc_h.unsqueeze(0)], 0).unsqueeze(0) 19 | return loc 20 | 21 | 22 | class SAM(nn.Module): 23 | def __init__(self, sa_type, in_planes, rel_planes, out_planes, share_planes, kernel_size=3, stride=1, dilation=1): 24 | super(SAM, self).__init__() 25 | self.sa_type, self.kernel_size, self.stride = sa_type, kernel_size, stride 26 | self.conv1 = nn.Conv2d(in_planes, rel_planes, kernel_size=1) 27 | self.conv2 = nn.Conv2d(in_planes, rel_planes, kernel_size=1) 28 | self.conv3 = nn.Conv2d(in_planes, out_planes, kernel_size=1) 29 | 30 | self.conv_w = nn.Sequential(nn.BatchNorm2d(rel_planes * (pow(kernel_size, 2) + 1)), nn.ReLU(inplace=True), 31 | nn.Conv2d(rel_planes * (pow(kernel_size, 2) + 1), out_planes // share_planes, kernel_size=1, bias=False), 32 | nn.BatchNorm2d(out_planes // share_planes), nn.ReLU(inplace=True), 33 | nn.Conv2d(out_planes // share_planes, pow(kernel_size, 2) * out_planes // share_planes, kernel_size=1)) 34 | self.unfold_j = nn.Unfold(kernel_size=kernel_size, dilation=dilation, padding=0, stride=stride) 35 | self.pad = nn.ReflectionPad2d(kernel_size // 2) 36 | 37 | assert self.stride == 1, 'stride > 1 not implemented' 38 | self.aggregation = Aggregation(kernel_size, stride, (dilation * (kernel_size - 1) + 1) // 2, dilation, pad_mode=1) 39 | 40 | def forward(self, x_ref, x_current): 41 | 42 | x1, x2, x3 = self.conv1(x_current), self.conv2(x_ref), self.conv3(x_current) 43 | x1 = x1.view(x_ref.shape[0], -1, 1, x_ref.shape[2]*x_ref.shape[3]) 44 | x2 = self.unfold_j(self.pad(x2)).view(x_ref.shape[0], -1, 1, x1.shape[-1]) 45 | # Refer to equation 5, R(i): 7x7, delta: concatenation, gamma: conv_w, 46 | w = self.conv_w(torch.cat([x1, x2], 1)).view(x_ref.shape[0], -1, pow(self.kernel_size, 2), x1.shape[-1]) 47 | x = self.aggregation(x3, w) 48 | return x 49 | 50 | 51 | class STABottleneck(nn.Module): 52 | def __init__(self, sa_type, in_planes, rel_planes, out_planes, share_planes=8, kernel_size=3, stride=1): 53 | super(STABottleneck, self).__init__() 54 | self.sam = SAM(sa_type=sa_type, in_planes=in_planes, rel_planes=rel_planes, 55 | out_planes=out_planes, share_planes=share_planes, kernel_size=kernel_size, 56 | stride=stride) 57 | self.stride = stride 58 | 59 | def forward_single(self, x_ref, x_current): 60 | out = self.sam(x_ref, x_current) 61 | return out 62 | 63 | def forward(self, x_ref, x_current): 64 | out = [] 65 | for xr, xc in zip(x_ref, x_current): 66 | out.append(self.forward_single(xr, xc)) 67 | return out 68 | 69 | 70 | -------------------------------------------------------------------------------- /mmdet/models/utils/weight_init.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch.nn as nn 3 | 4 | 5 | def xavier_init(module, gain=1, bias=0, distribution='normal'): 6 | assert distribution in ['uniform', 'normal'] 7 | if distribution == 'uniform': 8 | nn.init.xavier_uniform_(module.weight, gain=gain) 9 | else: 10 | nn.init.xavier_normal_(module.weight, gain=gain) 11 | if hasattr(module, 'bias'): 12 | nn.init.constant_(module.bias, bias) 13 | 14 | 15 | def normal_init(module, mean=0, std=1, bias=0): 16 | nn.init.normal_(module.weight, mean, std) 17 | if hasattr(module, 'bias'): 18 | nn.init.constant_(module.bias, bias) 19 | 20 | 21 | def uniform_init(module, a=0, b=1, bias=0): 22 | nn.init.uniform_(module.weight, a, b) 23 | if hasattr(module, 'bias'): 24 | nn.init.constant_(module.bias, bias) 25 | 26 | 27 | def kaiming_init(module, 28 | mode='fan_out', 29 | nonlinearity='relu', 30 | bias=0, 31 | distribution='normal'): 32 | assert distribution in ['uniform', 'normal'] 33 | if distribution == 'uniform': 34 | nn.init.kaiming_uniform_( 35 | module.weight, mode=mode, nonlinearity=nonlinearity) 36 | else: 37 | nn.init.kaiming_normal_( 38 | module.weight, mode=mode, nonlinearity=nonlinearity) 39 | if hasattr(module, 'bias'): 40 | nn.init.constant_(module.bias, bias) 41 | 42 | 43 | def bias_init_with_prob(prior_prob): 44 | """ initialize conv/fc bias value according to giving probablity""" 45 | bias_init = float(-np.log((1 - prior_prob) / prior_prob)) 46 | return bias_init 47 | -------------------------------------------------------------------------------- /mmdet/ops/__init__.py: -------------------------------------------------------------------------------- 1 | from .context_block import ContextBlock 2 | from .dcn import (DeformConv, DeformConvPack, DeformRoIPooling, 3 | DeformRoIPoolingPack, ModulatedDeformConv, 4 | ModulatedDeformConvPack, ModulatedDeformRoIPoolingPack, 5 | deform_conv, deform_roi_pooling, modulated_deform_conv) 6 | from .masked_conv import MaskedConv2d 7 | from .nms import nms, soft_nms 8 | from .roi_align import RoIAlign, roi_align 9 | from .roi_pool import RoIPool, roi_pool 10 | from .sigmoid_focal_loss import SigmoidFocalLoss, sigmoid_focal_loss 11 | from .utils import get_compiler_version, get_compiling_cuda_version 12 | 13 | __all__ = [ 14 | 'nms', 'soft_nms', 'RoIAlign', 'roi_align', 'RoIPool', 'roi_pool', 15 | 'DeformConv', 'DeformConvPack', 'DeformRoIPooling', 'DeformRoIPoolingPack', 16 | 'ModulatedDeformRoIPoolingPack', 'ModulatedDeformConv', 17 | 'ModulatedDeformConvPack', 'deform_conv', 'modulated_deform_conv', 18 | 'deform_roi_pooling', 'SigmoidFocalLoss', 'sigmoid_focal_loss', 19 | 'MaskedConv2d', 'ContextBlock', 'get_compiler_version', 20 | 'get_compiling_cuda_version' 21 | ] 22 | -------------------------------------------------------------------------------- /mmdet/ops/dcn/__init__.py: -------------------------------------------------------------------------------- 1 | from .deform_conv import (DeformConv, DeformConvPack, ModulatedDeformConv, 2 | ModulatedDeformConvPack, deform_conv, 3 | modulated_deform_conv) 4 | from .deform_pool import (DeformRoIPooling, DeformRoIPoolingPack, 5 | ModulatedDeformRoIPoolingPack, deform_roi_pooling) 6 | 7 | __all__ = [ 8 | 'DeformConv', 'DeformConvPack', 'ModulatedDeformConv', 9 | 'ModulatedDeformConvPack', 'DeformRoIPooling', 'DeformRoIPoolingPack', 10 | 'ModulatedDeformRoIPoolingPack', 'deform_conv', 'modulated_deform_conv', 11 | 'deform_roi_pooling' 12 | ] 13 | -------------------------------------------------------------------------------- /mmdet/ops/masked_conv/__init__.py: -------------------------------------------------------------------------------- 1 | from .masked_conv import MaskedConv2d, masked_conv2d 2 | 3 | __all__ = ['masked_conv2d', 'MaskedConv2d'] 4 | -------------------------------------------------------------------------------- /mmdet/ops/masked_conv/src/masked_conv2d_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | 6 | int MaskedIm2colForwardLaucher(const at::Tensor im, const int height, 7 | const int width, const int channels, 8 | const int kernel_h, const int kernel_w, 9 | const int pad_h, const int pad_w, 10 | const at::Tensor mask_h_idx, 11 | const at::Tensor mask_w_idx, const int mask_cnt, 12 | at::Tensor col); 13 | 14 | int MaskedCol2imForwardLaucher(const at::Tensor col, const int height, 15 | const int width, const int channels, 16 | const at::Tensor mask_h_idx, 17 | const at::Tensor mask_w_idx, const int mask_cnt, 18 | at::Tensor im); 19 | 20 | #define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ") 21 | #define CHECK_CONTIGUOUS(x) \ 22 | AT_CHECK(x.is_contiguous(), #x, " must be contiguous ") 23 | #define CHECK_INPUT(x) \ 24 | CHECK_CUDA(x); \ 25 | CHECK_CONTIGUOUS(x) 26 | 27 | int masked_im2col_forward_cuda(const at::Tensor im, const at::Tensor mask_h_idx, 28 | const at::Tensor mask_w_idx, const int kernel_h, 29 | const int kernel_w, const int pad_h, 30 | const int pad_w, at::Tensor col) { 31 | CHECK_INPUT(im); 32 | CHECK_INPUT(mask_h_idx); 33 | CHECK_INPUT(mask_w_idx); 34 | CHECK_INPUT(col); 35 | // im: (n, ic, h, w), kernel size (kh, kw) 36 | // kernel: (oc, ic * kh * kw), col: (kh * kw * ic, ow * oh) 37 | 38 | int channels = im.size(1); 39 | int height = im.size(2); 40 | int width = im.size(3); 41 | int mask_cnt = mask_h_idx.size(0); 42 | 43 | MaskedIm2colForwardLaucher(im, height, width, channels, kernel_h, kernel_w, 44 | pad_h, pad_w, mask_h_idx, mask_w_idx, mask_cnt, 45 | col); 46 | 47 | return 1; 48 | } 49 | 50 | int masked_col2im_forward_cuda(const at::Tensor col, 51 | const at::Tensor mask_h_idx, 52 | const at::Tensor mask_w_idx, int height, 53 | int width, int channels, at::Tensor im) { 54 | CHECK_INPUT(col); 55 | CHECK_INPUT(mask_h_idx); 56 | CHECK_INPUT(mask_w_idx); 57 | CHECK_INPUT(im); 58 | // im: (n, ic, h, w), kernel size (kh, kw) 59 | // kernel: (oc, ic * kh * kh), col: (kh * kw * ic, ow * oh) 60 | 61 | int mask_cnt = mask_h_idx.size(0); 62 | 63 | MaskedCol2imForwardLaucher(col, height, width, channels, mask_h_idx, 64 | mask_w_idx, mask_cnt, im); 65 | 66 | return 1; 67 | } 68 | 69 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 70 | m.def("masked_im2col_forward", &masked_im2col_forward_cuda, 71 | "masked_im2col forward (CUDA)"); 72 | m.def("masked_col2im_forward", &masked_col2im_forward_cuda, 73 | "masked_col2im forward (CUDA)"); 74 | } -------------------------------------------------------------------------------- /mmdet/ops/nms/__init__.py: -------------------------------------------------------------------------------- 1 | from .nms_wrapper import nms, soft_nms 2 | 3 | __all__ = ['nms', 'soft_nms'] 4 | -------------------------------------------------------------------------------- /mmdet/ops/nms/src/nms_cpu.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #include 3 | 4 | template 5 | at::Tensor nms_cpu_kernel(const at::Tensor& dets, const float threshold) { 6 | AT_ASSERTM(!dets.type().is_cuda(), "dets must be a CPU tensor"); 7 | 8 | if (dets.numel() == 0) { 9 | return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU)); 10 | } 11 | 12 | auto x1_t = dets.select(1, 0).contiguous(); 13 | auto y1_t = dets.select(1, 1).contiguous(); 14 | auto x2_t = dets.select(1, 2).contiguous(); 15 | auto y2_t = dets.select(1, 3).contiguous(); 16 | auto scores = dets.select(1, 4).contiguous(); 17 | 18 | at::Tensor areas_t = (x2_t - x1_t + 1) * (y2_t - y1_t + 1); 19 | 20 | auto order_t = std::get<1>(scores.sort(0, /* descending=*/true)); 21 | 22 | auto ndets = dets.size(0); 23 | at::Tensor suppressed_t = 24 | at::zeros({ndets}, dets.options().dtype(at::kByte).device(at::kCPU)); 25 | 26 | auto suppressed = suppressed_t.data(); 27 | auto order = order_t.data(); 28 | auto x1 = x1_t.data(); 29 | auto y1 = y1_t.data(); 30 | auto x2 = x2_t.data(); 31 | auto y2 = y2_t.data(); 32 | auto areas = areas_t.data(); 33 | 34 | for (int64_t _i = 0; _i < ndets; _i++) { 35 | auto i = order[_i]; 36 | if (suppressed[i] == 1) continue; 37 | auto ix1 = x1[i]; 38 | auto iy1 = y1[i]; 39 | auto ix2 = x2[i]; 40 | auto iy2 = y2[i]; 41 | auto iarea = areas[i]; 42 | 43 | for (int64_t _j = _i + 1; _j < ndets; _j++) { 44 | auto j = order[_j]; 45 | if (suppressed[j] == 1) continue; 46 | auto xx1 = std::max(ix1, x1[j]); 47 | auto yy1 = std::max(iy1, y1[j]); 48 | auto xx2 = std::min(ix2, x2[j]); 49 | auto yy2 = std::min(iy2, y2[j]); 50 | 51 | auto w = std::max(static_cast(0), xx2 - xx1 + 1); 52 | auto h = std::max(static_cast(0), yy2 - yy1 + 1); 53 | auto inter = w * h; 54 | auto ovr = inter / (iarea + areas[j] - inter); 55 | if (ovr >= threshold) suppressed[j] = 1; 56 | } 57 | } 58 | return at::nonzero(suppressed_t == 0).squeeze(1); 59 | } 60 | 61 | at::Tensor nms(const at::Tensor& dets, const float threshold) { 62 | at::Tensor result; 63 | AT_DISPATCH_FLOATING_TYPES(dets.scalar_type(), "nms", [&] { 64 | result = nms_cpu_kernel(dets, threshold); 65 | }); 66 | return result; 67 | } 68 | 69 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 70 | m.def("nms", &nms, "non-maximum suppression"); 71 | } -------------------------------------------------------------------------------- /mmdet/ops/nms/src/nms_cuda.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #include 3 | 4 | #define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ") 5 | 6 | at::Tensor nms_cuda(const at::Tensor boxes, float nms_overlap_thresh); 7 | 8 | at::Tensor nms(const at::Tensor& dets, const float threshold) { 9 | CHECK_CUDA(dets); 10 | if (dets.numel() == 0) 11 | return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU)); 12 | return nms_cuda(dets, threshold); 13 | } 14 | 15 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 16 | m.def("nms", &nms, "non-maximum suppression"); 17 | } -------------------------------------------------------------------------------- /mmdet/ops/roi_align/__init__.py: -------------------------------------------------------------------------------- 1 | from .roi_align import RoIAlign, roi_align 2 | 3 | __all__ = ['roi_align', 'RoIAlign'] 4 | -------------------------------------------------------------------------------- /mmdet/ops/roi_align/gradcheck.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | import sys 3 | 4 | import numpy as np 5 | import torch 6 | from torch.autograd import gradcheck 7 | 8 | sys.path.append(osp.abspath(osp.join(__file__, '../../'))) 9 | from roi_align import RoIAlign # noqa: E402, isort:skip 10 | 11 | feat_size = 15 12 | spatial_scale = 1.0 / 8 13 | img_size = feat_size / spatial_scale 14 | num_imgs = 2 15 | num_rois = 20 16 | 17 | batch_ind = np.random.randint(num_imgs, size=(num_rois, 1)) 18 | rois = np.random.rand(num_rois, 4) * img_size * 0.5 19 | rois[:, 2:] += img_size * 0.5 20 | rois = np.hstack((batch_ind, rois)) 21 | 22 | feat = torch.randn( 23 | num_imgs, 16, feat_size, feat_size, requires_grad=True, device='cuda:0') 24 | rois = torch.from_numpy(rois).float().cuda() 25 | inputs = (feat, rois) 26 | print('Gradcheck for roi align...') 27 | test = gradcheck(RoIAlign(3, spatial_scale), inputs, atol=1e-3, eps=1e-3) 28 | print(test) 29 | test = gradcheck(RoIAlign(3, spatial_scale, 2), inputs, atol=1e-3, eps=1e-3) 30 | print(test) 31 | -------------------------------------------------------------------------------- /mmdet/ops/roi_align/roi_align.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | from torch.autograd import Function 3 | from torch.autograd.function import once_differentiable 4 | from torch.nn.modules.utils import _pair 5 | 6 | from . import roi_align_cuda 7 | 8 | 9 | class RoIAlignFunction(Function): 10 | 11 | @staticmethod 12 | def forward(ctx, features, rois, out_size, spatial_scale, sample_num=0): 13 | out_h, out_w = _pair(out_size) 14 | assert isinstance(out_h, int) and isinstance(out_w, int) 15 | ctx.spatial_scale = spatial_scale 16 | ctx.sample_num = sample_num 17 | ctx.save_for_backward(rois) 18 | ctx.feature_size = features.size() 19 | 20 | batch_size, num_channels, data_height, data_width = features.size() 21 | num_rois = rois.size(0) 22 | 23 | output = features.new_zeros(num_rois, num_channels, out_h, out_w) 24 | if features.is_cuda: 25 | roi_align_cuda.forward(features, rois, out_h, out_w, spatial_scale, 26 | sample_num, output) 27 | else: 28 | raise NotImplementedError 29 | 30 | return output 31 | 32 | @staticmethod 33 | @once_differentiable 34 | def backward(ctx, grad_output): 35 | feature_size = ctx.feature_size 36 | spatial_scale = ctx.spatial_scale 37 | sample_num = ctx.sample_num 38 | rois = ctx.saved_tensors[0] 39 | assert (feature_size is not None and grad_output.is_cuda) 40 | 41 | batch_size, num_channels, data_height, data_width = feature_size 42 | out_w = grad_output.size(3) 43 | out_h = grad_output.size(2) 44 | 45 | grad_input = grad_rois = None 46 | if ctx.needs_input_grad[0]: 47 | grad_input = rois.new_zeros(batch_size, num_channels, data_height, 48 | data_width) 49 | roi_align_cuda.backward(grad_output.contiguous(), rois, out_h, 50 | out_w, spatial_scale, sample_num, 51 | grad_input) 52 | 53 | return grad_input, grad_rois, None, None, None 54 | 55 | 56 | roi_align = RoIAlignFunction.apply 57 | 58 | 59 | class RoIAlign(nn.Module): 60 | 61 | def __init__(self, 62 | out_size, 63 | spatial_scale, 64 | sample_num=0, 65 | use_torchvision=False): 66 | super(RoIAlign, self).__init__() 67 | 68 | self.out_size = _pair(out_size) 69 | self.spatial_scale = float(spatial_scale) 70 | self.sample_num = int(sample_num) 71 | self.use_torchvision = use_torchvision 72 | 73 | def forward(self, features, rois): 74 | if self.use_torchvision: 75 | from torchvision.ops import roi_align as tv_roi_align 76 | return tv_roi_align(features, rois, self.out_size, 77 | self.spatial_scale, self.sample_num) 78 | else: 79 | return roi_align(features, rois, self.out_size, self.spatial_scale, 80 | self.sample_num) 81 | 82 | def __repr__(self): 83 | format_str = self.__class__.__name__ 84 | format_str += '(out_size={}, spatial_scale={}, sample_num={}'.format( 85 | self.out_size, self.spatial_scale, self.sample_num) 86 | format_str += ', use_torchvision={})'.format(self.use_torchvision) 87 | return format_str 88 | -------------------------------------------------------------------------------- /mmdet/ops/roi_align/src/roi_align_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | 5 | #include 6 | #include 7 | 8 | int ROIAlignForwardLaucher(const at::Tensor features, const at::Tensor rois, 9 | const float spatial_scale, const int sample_num, 10 | const int channels, const int height, 11 | const int width, const int num_rois, 12 | const int pooled_height, const int pooled_width, 13 | at::Tensor output); 14 | 15 | int ROIAlignBackwardLaucher(const at::Tensor top_grad, const at::Tensor rois, 16 | const float spatial_scale, const int sample_num, 17 | const int channels, const int height, 18 | const int width, const int num_rois, 19 | const int pooled_height, const int pooled_width, 20 | at::Tensor bottom_grad); 21 | 22 | #define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ") 23 | #define CHECK_CONTIGUOUS(x) \ 24 | AT_CHECK(x.is_contiguous(), #x, " must be contiguous ") 25 | #define CHECK_INPUT(x) \ 26 | CHECK_CUDA(x); \ 27 | CHECK_CONTIGUOUS(x) 28 | 29 | int roi_align_forward_cuda(at::Tensor features, at::Tensor rois, 30 | int pooled_height, int pooled_width, 31 | float spatial_scale, int sample_num, 32 | at::Tensor output) { 33 | CHECK_INPUT(features); 34 | CHECK_INPUT(rois); 35 | CHECK_INPUT(output); 36 | 37 | // Number of ROIs 38 | int num_rois = rois.size(0); 39 | int size_rois = rois.size(1); 40 | 41 | if (size_rois != 5) { 42 | printf("wrong roi size\n"); 43 | return 0; 44 | } 45 | 46 | int num_channels = features.size(1); 47 | int data_height = features.size(2); 48 | int data_width = features.size(3); 49 | 50 | ROIAlignForwardLaucher(features, rois, spatial_scale, sample_num, 51 | num_channels, data_height, data_width, num_rois, 52 | pooled_height, pooled_width, output); 53 | 54 | return 1; 55 | } 56 | 57 | int roi_align_backward_cuda(at::Tensor top_grad, at::Tensor rois, 58 | int pooled_height, int pooled_width, 59 | float spatial_scale, int sample_num, 60 | at::Tensor bottom_grad) { 61 | CHECK_INPUT(top_grad); 62 | CHECK_INPUT(rois); 63 | CHECK_INPUT(bottom_grad); 64 | 65 | // Number of ROIs 66 | int num_rois = rois.size(0); 67 | int size_rois = rois.size(1); 68 | if (size_rois != 5) { 69 | printf("wrong roi size\n"); 70 | return 0; 71 | } 72 | 73 | int num_channels = bottom_grad.size(1); 74 | int data_height = bottom_grad.size(2); 75 | int data_width = bottom_grad.size(3); 76 | 77 | ROIAlignBackwardLaucher(top_grad, rois, spatial_scale, sample_num, 78 | num_channels, data_height, data_width, num_rois, 79 | pooled_height, pooled_width, bottom_grad); 80 | 81 | return 1; 82 | } 83 | 84 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 85 | m.def("forward", &roi_align_forward_cuda, "Roi_Align forward (CUDA)"); 86 | m.def("backward", &roi_align_backward_cuda, "Roi_Align backward (CUDA)"); 87 | } 88 | -------------------------------------------------------------------------------- /mmdet/ops/roi_pool/__init__.py: -------------------------------------------------------------------------------- 1 | from .roi_pool import RoIPool, roi_pool 2 | 3 | __all__ = ['roi_pool', 'RoIPool'] 4 | -------------------------------------------------------------------------------- /mmdet/ops/roi_pool/gradcheck.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | import sys 3 | 4 | import torch 5 | from torch.autograd import gradcheck 6 | 7 | sys.path.append(osp.abspath(osp.join(__file__, '../../'))) 8 | from roi_pool import RoIPool # noqa: E402, isort:skip 9 | 10 | feat = torch.randn(4, 16, 15, 15, requires_grad=True).cuda() 11 | rois = torch.Tensor([[0, 0, 0, 50, 50], [0, 10, 30, 43, 55], 12 | [1, 67, 40, 110, 120]]).cuda() 13 | inputs = (feat, rois) 14 | print('Gradcheck for roi pooling...') 15 | test = gradcheck(RoIPool(4, 1.0 / 8), inputs, eps=1e-5, atol=1e-3) 16 | print(test) 17 | -------------------------------------------------------------------------------- /mmdet/ops/roi_pool/roi_pool.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.autograd import Function 4 | from torch.autograd.function import once_differentiable 5 | from torch.nn.modules.utils import _pair 6 | 7 | from . import roi_pool_cuda 8 | 9 | 10 | class RoIPoolFunction(Function): 11 | 12 | @staticmethod 13 | def forward(ctx, features, rois, out_size, spatial_scale): 14 | assert features.is_cuda 15 | out_h, out_w = _pair(out_size) 16 | assert isinstance(out_h, int) and isinstance(out_w, int) 17 | ctx.save_for_backward(rois) 18 | num_channels = features.size(1) 19 | num_rois = rois.size(0) 20 | out_size = (num_rois, num_channels, out_h, out_w) 21 | output = features.new_zeros(out_size) 22 | argmax = features.new_zeros(out_size, dtype=torch.int) 23 | roi_pool_cuda.forward(features, rois, out_h, out_w, spatial_scale, 24 | output, argmax) 25 | ctx.spatial_scale = spatial_scale 26 | ctx.feature_size = features.size() 27 | ctx.argmax = argmax 28 | 29 | return output 30 | 31 | @staticmethod 32 | @once_differentiable 33 | def backward(ctx, grad_output): 34 | assert grad_output.is_cuda 35 | spatial_scale = ctx.spatial_scale 36 | feature_size = ctx.feature_size 37 | argmax = ctx.argmax 38 | rois = ctx.saved_tensors[0] 39 | assert feature_size is not None 40 | 41 | grad_input = grad_rois = None 42 | if ctx.needs_input_grad[0]: 43 | grad_input = grad_output.new_zeros(feature_size) 44 | roi_pool_cuda.backward(grad_output.contiguous(), rois, argmax, 45 | spatial_scale, grad_input) 46 | 47 | return grad_input, grad_rois, None, None 48 | 49 | 50 | roi_pool = RoIPoolFunction.apply 51 | 52 | 53 | class RoIPool(nn.Module): 54 | 55 | def __init__(self, out_size, spatial_scale, use_torchvision=False): 56 | super(RoIPool, self).__init__() 57 | 58 | self.out_size = _pair(out_size) 59 | self.spatial_scale = float(spatial_scale) 60 | self.use_torchvision = use_torchvision 61 | 62 | def forward(self, features, rois): 63 | if self.use_torchvision: 64 | from torchvision.ops import roi_pool as tv_roi_pool 65 | return tv_roi_pool(features, rois, self.out_size, 66 | self.spatial_scale) 67 | else: 68 | return roi_pool(features, rois, self.out_size, self.spatial_scale) 69 | 70 | def __repr__(self): 71 | format_str = self.__class__.__name__ 72 | format_str += '(out_size={}, spatial_scale={}'.format( 73 | self.out_size, self.spatial_scale) 74 | format_str += ', use_torchvision={})'.format(self.use_torchvision) 75 | return format_str 76 | -------------------------------------------------------------------------------- /mmdet/ops/roi_pool/src/roi_pool_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | 6 | int ROIPoolForwardLaucher(const at::Tensor features, const at::Tensor rois, 7 | const float spatial_scale, const int channels, 8 | const int height, const int width, const int num_rois, 9 | const int pooled_h, const int pooled_w, 10 | at::Tensor output, at::Tensor argmax); 11 | 12 | int ROIPoolBackwardLaucher(const at::Tensor top_grad, const at::Tensor rois, 13 | const at::Tensor argmax, const float spatial_scale, 14 | const int batch_size, const int channels, 15 | const int height, const int width, 16 | const int num_rois, const int pooled_h, 17 | const int pooled_w, at::Tensor bottom_grad); 18 | 19 | #define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ") 20 | #define CHECK_CONTIGUOUS(x) \ 21 | AT_CHECK(x.is_contiguous(), #x, " must be contiguous ") 22 | #define CHECK_INPUT(x) \ 23 | CHECK_CUDA(x); \ 24 | CHECK_CONTIGUOUS(x) 25 | 26 | int roi_pooling_forward_cuda(at::Tensor features, at::Tensor rois, 27 | int pooled_height, int pooled_width, 28 | float spatial_scale, at::Tensor output, 29 | at::Tensor argmax) { 30 | CHECK_INPUT(features); 31 | CHECK_INPUT(rois); 32 | CHECK_INPUT(output); 33 | CHECK_INPUT(argmax); 34 | 35 | // Number of ROIs 36 | int num_rois = rois.size(0); 37 | int size_rois = rois.size(1); 38 | 39 | if (size_rois != 5) { 40 | printf("wrong roi size\n"); 41 | return 0; 42 | } 43 | 44 | int channels = features.size(1); 45 | int height = features.size(2); 46 | int width = features.size(3); 47 | 48 | ROIPoolForwardLaucher(features, rois, spatial_scale, channels, height, width, 49 | num_rois, pooled_height, pooled_width, output, argmax); 50 | 51 | return 1; 52 | } 53 | 54 | int roi_pooling_backward_cuda(at::Tensor top_grad, at::Tensor rois, 55 | at::Tensor argmax, float spatial_scale, 56 | at::Tensor bottom_grad) { 57 | CHECK_INPUT(top_grad); 58 | CHECK_INPUT(rois); 59 | CHECK_INPUT(argmax); 60 | CHECK_INPUT(bottom_grad); 61 | 62 | int pooled_height = top_grad.size(2); 63 | int pooled_width = top_grad.size(3); 64 | int num_rois = rois.size(0); 65 | int size_rois = rois.size(1); 66 | 67 | if (size_rois != 5) { 68 | printf("wrong roi size\n"); 69 | return 0; 70 | } 71 | int batch_size = bottom_grad.size(0); 72 | int channels = bottom_grad.size(1); 73 | int height = bottom_grad.size(2); 74 | int width = bottom_grad.size(3); 75 | 76 | ROIPoolBackwardLaucher(top_grad, rois, argmax, spatial_scale, batch_size, 77 | channels, height, width, num_rois, pooled_height, 78 | pooled_width, bottom_grad); 79 | 80 | return 1; 81 | } 82 | 83 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 84 | m.def("forward", &roi_pooling_forward_cuda, "Roi_Pooling forward (CUDA)"); 85 | m.def("backward", &roi_pooling_backward_cuda, "Roi_Pooling backward (CUDA)"); 86 | } 87 | -------------------------------------------------------------------------------- /mmdet/ops/sigmoid_focal_loss/__init__.py: -------------------------------------------------------------------------------- 1 | from .sigmoid_focal_loss import SigmoidFocalLoss, sigmoid_focal_loss 2 | 3 | __all__ = ['SigmoidFocalLoss', 'sigmoid_focal_loss'] 4 | -------------------------------------------------------------------------------- /mmdet/ops/sigmoid_focal_loss/sigmoid_focal_loss.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | from torch.autograd import Function 3 | from torch.autograd.function import once_differentiable 4 | 5 | from . import sigmoid_focal_loss_cuda 6 | 7 | 8 | class SigmoidFocalLossFunction(Function): 9 | 10 | @staticmethod 11 | def forward(ctx, input, target, gamma=2.0, alpha=0.25): 12 | ctx.save_for_backward(input, target) 13 | num_classes = input.shape[1] 14 | ctx.num_classes = num_classes 15 | ctx.gamma = gamma 16 | ctx.alpha = alpha 17 | 18 | loss = sigmoid_focal_loss_cuda.forward(input, target, num_classes, 19 | gamma, alpha) 20 | return loss 21 | 22 | @staticmethod 23 | @once_differentiable 24 | def backward(ctx, d_loss): 25 | input, target = ctx.saved_tensors 26 | num_classes = ctx.num_classes 27 | gamma = ctx.gamma 28 | alpha = ctx.alpha 29 | d_loss = d_loss.contiguous() 30 | d_input = sigmoid_focal_loss_cuda.backward(input, target, d_loss, 31 | num_classes, gamma, alpha) 32 | return d_input, None, None, None, None 33 | 34 | 35 | sigmoid_focal_loss = SigmoidFocalLossFunction.apply 36 | 37 | 38 | # TODO: remove this module 39 | class SigmoidFocalLoss(nn.Module): 40 | 41 | def __init__(self, gamma, alpha): 42 | super(SigmoidFocalLoss, self).__init__() 43 | self.gamma = gamma 44 | self.alpha = alpha 45 | 46 | def forward(self, logits, targets): 47 | assert logits.is_cuda 48 | loss = sigmoid_focal_loss(logits, targets, self.gamma, self.alpha) 49 | return loss.sum() 50 | 51 | def __repr__(self): 52 | tmpstr = self.__class__.__name__ + '(gamma={}, alpha={})'.format( 53 | self.gamma, self.alpha) 54 | return tmpstr 55 | -------------------------------------------------------------------------------- /mmdet/ops/sigmoid_focal_loss/src/sigmoid_focal_loss.cpp: -------------------------------------------------------------------------------- 1 | // modify from 2 | // https://github.com/facebookresearch/maskrcnn-benchmark/blob/master/maskrcnn_benchmark/csrc/SigmoidFocalLoss.h 3 | #include 4 | 5 | at::Tensor SigmoidFocalLoss_forward_cuda(const at::Tensor &logits, 6 | const at::Tensor &targets, 7 | const int num_classes, 8 | const float gamma, const float alpha); 9 | 10 | at::Tensor SigmoidFocalLoss_backward_cuda(const at::Tensor &logits, 11 | const at::Tensor &targets, 12 | const at::Tensor &d_losses, 13 | const int num_classes, 14 | const float gamma, const float alpha); 15 | 16 | // Interface for Python 17 | at::Tensor SigmoidFocalLoss_forward(const at::Tensor &logits, 18 | const at::Tensor &targets, 19 | const int num_classes, const float gamma, 20 | const float alpha) { 21 | if (logits.type().is_cuda()) { 22 | return SigmoidFocalLoss_forward_cuda(logits, targets, num_classes, gamma, 23 | alpha); 24 | } 25 | AT_ERROR("SigmoidFocalLoss is not implemented on the CPU"); 26 | } 27 | 28 | at::Tensor SigmoidFocalLoss_backward(const at::Tensor &logits, 29 | const at::Tensor &targets, 30 | const at::Tensor &d_losses, 31 | const int num_classes, const float gamma, 32 | const float alpha) { 33 | if (logits.type().is_cuda()) { 34 | return SigmoidFocalLoss_backward_cuda(logits, targets, d_losses, 35 | num_classes, gamma, alpha); 36 | } 37 | AT_ERROR("SigmoidFocalLoss is not implemented on the CPU"); 38 | } 39 | 40 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 41 | m.def("forward", &SigmoidFocalLoss_forward, 42 | "SigmoidFocalLoss forward (CUDA)"); 43 | m.def("backward", &SigmoidFocalLoss_backward, 44 | "SigmoidFocalLoss backward (CUDA)"); 45 | } 46 | -------------------------------------------------------------------------------- /mmdet/ops/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # from . import compiling_info 2 | from .compiling_info import get_compiler_version, get_compiling_cuda_version 3 | 4 | # get_compiler_version = compiling_info.get_compiler_version 5 | # get_compiling_cuda_version = compiling_info.get_compiling_cuda_version 6 | 7 | __all__ = ['get_compiler_version', 'get_compiling_cuda_version'] 8 | -------------------------------------------------------------------------------- /mmdet/ops/utils/src/compiling_info.cpp: -------------------------------------------------------------------------------- 1 | // modified from 2 | // https://github.com/facebookresearch/detectron2/blob/master/detectron2/layers/csrc/vision.cpp 3 | #include 4 | #include 5 | 6 | #ifdef WITH_CUDA 7 | int get_cudart_version() { return CUDART_VERSION; } 8 | #endif 9 | 10 | std::string get_compiling_cuda_version() { 11 | #ifdef WITH_CUDA 12 | std::ostringstream oss; 13 | 14 | // copied from 15 | // https://github.com/pytorch/pytorch/blob/master/aten/src/ATen/cuda/detail/CUDAHooks.cpp#L231 16 | auto printCudaStyleVersion = [&](int v) { 17 | oss << (v / 1000) << "." << (v / 10 % 100); 18 | if (v % 10 != 0) { 19 | oss << "." << (v % 10); 20 | } 21 | }; 22 | printCudaStyleVersion(get_cudart_version()); 23 | return oss.str(); 24 | #else 25 | return std::string("not available"); 26 | #endif 27 | } 28 | 29 | // similar to 30 | // https://github.com/pytorch/pytorch/blob/master/aten/src/ATen/Version.cpp 31 | std::string get_compiler_version() { 32 | std::ostringstream ss; 33 | #if defined(__GNUC__) 34 | #ifndef __clang__ 35 | { ss << "GCC " << __GNUC__ << "." << __GNUC_MINOR__; } 36 | #endif 37 | #endif 38 | 39 | #if defined(__clang_major__) 40 | { 41 | ss << "clang " << __clang_major__ << "." << __clang_minor__ << "." 42 | << __clang_patchlevel__; 43 | } 44 | #endif 45 | 46 | #if defined(_MSC_VER) 47 | { ss << "MSVC " << _MSC_FULL_VER; } 48 | #endif 49 | return ss.str(); 50 | } 51 | 52 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 53 | m.def("get_compiler_version", &get_compiler_version, "get_compiler_version"); 54 | m.def("get_compiling_cuda_version", &get_compiling_cuda_version, 55 | "get_compiling_cuda_version"); 56 | } 57 | -------------------------------------------------------------------------------- /mmdet/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .flops_counter import get_model_complexity_info 2 | from .logger import get_root_logger, print_log 3 | from .registry import Registry, build_from_cfg 4 | 5 | __all__ = [ 6 | 'Registry', 'build_from_cfg', 'get_model_complexity_info', 7 | 'get_root_logger', 'print_log' 8 | ] 9 | -------------------------------------------------------------------------------- /mmdet/utils/logger.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from mmcv.runner import get_dist_info 4 | 5 | 6 | def get_root_logger(log_file=None, log_level=logging.INFO): 7 | """Get the root logger. 8 | 9 | The logger will be initialized if it has not been initialized. By default a 10 | StreamHandler will be added. If `log_file` is specified, a FileHandler will 11 | also be added. The name of the root logger is the top-level package name, 12 | e.g., "mmdet". 13 | 14 | Args: 15 | log_file (str | None): The log filename. If specified, a FileHandler 16 | will be added to the root logger. 17 | log_level (int): The root logger level. Note that only the process of 18 | rank 0 is affected, while other processes will set the level to 19 | "Error" and be silent most of the time. 20 | 21 | Returns: 22 | logging.Logger: The root logger. 23 | """ 24 | logger = logging.getLogger(__name__.split('.')[0]) # i.e., mmdet 25 | # if the logger has been initialized, just return it 26 | if logger.hasHandlers(): 27 | return logger 28 | 29 | format_str = '%(asctime)s - %(name)s - %(levelname)s - %(message)s' 30 | logging.basicConfig(format=format_str, level=log_level) 31 | rank, _ = get_dist_info() 32 | if rank != 0: 33 | logger.setLevel('ERROR') 34 | elif log_file is not None: 35 | file_handler = logging.FileHandler(log_file, 'w') 36 | file_handler.setFormatter(logging.Formatter(format_str)) 37 | file_handler.setLevel(log_level) 38 | logger.addHandler(file_handler) 39 | 40 | return logger 41 | 42 | 43 | def print_log(msg, logger=None, level=logging.INFO): 44 | """Print a log message. 45 | 46 | Args: 47 | msg (str): The message to be logged. 48 | logger (logging.Logger | str | None): The logger to be used. Some 49 | special loggers are: 50 | - "root": the root logger obtained with `get_root_logger()`. 51 | - "silent": no message will be printed. 52 | - None: The `print()` method will be used to print log messages. 53 | level (int): Logging level. Only available when `logger` is a Logger 54 | object or "root". 55 | """ 56 | if logger is None: 57 | print(msg) 58 | elif logger == 'root': 59 | _logger = get_root_logger() 60 | _logger.log(level, msg) 61 | elif isinstance(logger, logging.Logger): 62 | logger.log(level, msg) 63 | elif logger != 'silent': 64 | raise TypeError( 65 | 'logger should be either a logging.Logger object, "root", ' 66 | '"silent" or None, but got {}'.format(logger)) 67 | -------------------------------------------------------------------------------- /mmdet/utils/profiling.py: -------------------------------------------------------------------------------- 1 | import contextlib 2 | import sys 3 | import time 4 | 5 | import torch 6 | 7 | if sys.version_info >= (3, 7): 8 | 9 | @contextlib.contextmanager 10 | def profile_time(trace_name, 11 | name, 12 | enabled=True, 13 | stream=None, 14 | end_stream=None): 15 | """Print time spent by CPU and GPU. 16 | 17 | Useful as a temporary context manager to find sweet spots of 18 | code suitable for async implementation. 19 | 20 | """ 21 | if (not enabled) or not torch.cuda.is_available(): 22 | yield 23 | return 24 | stream = stream if stream else torch.cuda.current_stream() 25 | end_stream = end_stream if end_stream else stream 26 | start = torch.cuda.Event(enable_timing=True) 27 | end = torch.cuda.Event(enable_timing=True) 28 | stream.record_event(start) 29 | try: 30 | cpu_start = time.monotonic() 31 | yield 32 | finally: 33 | cpu_end = time.monotonic() 34 | end_stream.record_event(end) 35 | end.synchronize() 36 | cpu_time = (cpu_end - cpu_start) * 1000 37 | gpu_time = start.elapsed_time(end) 38 | msg = "{} {} cpu_time {:.2f} ms ".format(trace_name, name, 39 | cpu_time) 40 | msg += "gpu_time {:.2f} ms stream {}".format(gpu_time, stream) 41 | print(msg, end_stream) 42 | -------------------------------------------------------------------------------- /mmdet/utils/registry.py: -------------------------------------------------------------------------------- 1 | import inspect 2 | from functools import partial 3 | 4 | import mmcv 5 | 6 | 7 | class Registry(object): 8 | 9 | def __init__(self, name): 10 | self._name = name 11 | self._module_dict = dict() 12 | 13 | def __repr__(self): 14 | format_str = self.__class__.__name__ + '(name={}, items={})'.format( 15 | self._name, list(self._module_dict.keys())) 16 | return format_str 17 | 18 | @property 19 | def name(self): 20 | return self._name 21 | 22 | @property 23 | def module_dict(self): 24 | return self._module_dict 25 | 26 | def get(self, key): 27 | return self._module_dict.get(key, None) 28 | 29 | def _register_module(self, module_class, force=False): 30 | """Register a module. 31 | 32 | Args: 33 | module (:obj:`nn.Module`): Module to be registered. 34 | """ 35 | if not inspect.isclass(module_class): 36 | raise TypeError('module must be a class, but got {}'.format( 37 | type(module_class))) 38 | module_name = module_class.__name__ 39 | if not force and module_name in self._module_dict: 40 | raise KeyError('{} is already registered in {}'.format( 41 | module_name, self.name)) 42 | self._module_dict[module_name] = module_class 43 | 44 | def register_module(self, cls=None, force=False): 45 | if cls is None: 46 | return partial(self.register_module, force=force) 47 | self._register_module(cls, force=force) 48 | return cls 49 | 50 | 51 | def build_from_cfg(cfg, registry, default_args=None): 52 | """Build a module from config dict. 53 | 54 | Args: 55 | cfg (dict): Config dict. It should at least contain the key "type". 56 | registry (:obj:`Registry`): The registry to search the type from. 57 | default_args (dict, optional): Default initialization arguments. 58 | 59 | Returns: 60 | obj: The constructed object. 61 | """ 62 | assert isinstance(cfg, dict) and 'type' in cfg 63 | assert isinstance(default_args, dict) or default_args is None 64 | args = cfg.copy() 65 | obj_type = args.pop('type') 66 | if mmcv.is_str(obj_type): 67 | obj_cls = registry.get(obj_type) 68 | if obj_cls is None: 69 | raise KeyError('{} is not in the {} registry'.format( 70 | obj_type, registry.name)) 71 | elif inspect.isclass(obj_type): 72 | obj_cls = obj_type 73 | else: 74 | raise TypeError('type must be a str or valid type, but got {}'.format( 75 | type(obj_type))) 76 | if default_args is not None: 77 | for name, value in default_args.items(): 78 | args.setdefault(name, value) 79 | return obj_cls(**args) 80 | -------------------------------------------------------------------------------- /mmdet/version.py: -------------------------------------------------------------------------------- 1 | # GENERATED VERSION FILE 2 | # TIME: Sat Mar 20 14:30:26 2021 3 | 4 | __version__ = '1.0.0+da6b82a' 5 | short_version = '1.0.0' 6 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | -r requirements/runtime.txt 2 | -r requirements/optional.txt 3 | -r requirements/tests.txt 4 | -r requirements/build.txt 5 | -------------------------------------------------------------------------------- /requirements/build.txt: -------------------------------------------------------------------------------- 1 | # These must be installed before building mmdetection 2 | cython 3 | numpy 4 | torch>=1.1 5 | -------------------------------------------------------------------------------- /requirements/optional.txt: -------------------------------------------------------------------------------- 1 | albumentations>=0.3.2 2 | imagecorruptions 3 | -------------------------------------------------------------------------------- /requirements/runtime.txt: -------------------------------------------------------------------------------- 1 | matplotlib 2 | mmcv==0.2.16 3 | numpy 4 | scipy 5 | # need older pillow until torchvision is fixed 6 | Pillow<=6.2.2 7 | six 8 | terminaltables 9 | torch>=1.1 10 | torchvision 11 | cupy 12 | tensorboard 13 | GitPython==3.1.12 14 | -------------------------------------------------------------------------------- /requirements/tests.txt: -------------------------------------------------------------------------------- 1 | asynctest 2 | codecov 3 | flake8 4 | isort 5 | pytest 6 | pytest-cov 7 | pytest-runner 8 | xdoctest >= 0.10.0 9 | yapf 10 | # Note: used for kwarray.group_items, this may be ported to mmcv in the future. 11 | kwarray 12 | -------------------------------------------------------------------------------- /tools/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /tools/dataset/__init__.py: -------------------------------------------------------------------------------- 1 | # from .cityscapes_vps import Cityscapes 2 | from .cityscapes_vps import CityscapesVps 3 | # from .viper import Viper 4 | --------------------------------------------------------------------------------