├── .gitignore
├── LICENSE
├── Motion_Dataset_Download.md
├── Motion_Dataset_Preview.md
├── README.md
├── configs
    ├── data
    │   ├── cscapes.py
    │   ├── cscapesvps.py
    │   ├── cscapesvps_motion.py
    │   ├── cscapesvps_motion_supp.py
    │   ├── cscapesvps_motion_supp_2048.py
    │   ├── cscapesvps_repeat.py
    │   ├── davis.py
    │   ├── idd.py
    │   ├── idd_annots.py
    │   ├── idd_supp.py
    │   ├── kittimots.py
    │   ├── kittimots_motion.py
    │   └── kittimots_motion_supp.py
    ├── experiments
    │   └── general.py
    ├── infer_cscapesvps.py
    ├── infer_cscapesvps_pq.py
    ├── infer_kittimots.py
    ├── misc
    │   ├── debug_classagnostic_loaders.py
    │   ├── visualise_mod_cscapesvps.py
    │   └── visualise_mod_kittimots.py
    └── models
    │   ├── backbone_1stream.py
    │   ├── backbone_2stream.py
    │   ├── backbone_2stream_tfstyle.py
    │   ├── bbox_head.py
    │   ├── ca_appearance_mahalanobis_head.py
    │   ├── ca_appearance_map.py
    │   ├── ca_motion_head.py
    │   ├── neck.py
    │   └── panoptic_head.py
├── images
    └── VCA_Teaser.png
├── mmdet
    ├── __init__.py
    ├── apis
    │   ├── __init__.py
    │   ├── inference.py
    │   └── train.py
    ├── core
    │   ├── __init__.py
    │   ├── anchor
    │   │   ├── __init__.py
    │   │   ├── anchor_generator.py
    │   │   ├── anchor_target.py
    │   │   ├── guided_anchor_target.py
    │   │   ├── point_generator.py
    │   │   └── point_target.py
    │   ├── bbox
    │   │   ├── __init__.py
    │   │   ├── assign_sampling.py
    │   │   ├── assigners
    │   │   │   ├── __init__.py
    │   │   │   ├── approx_max_iou_assigner.py
    │   │   │   ├── assign_result.py
    │   │   │   ├── atss_assigner.py
    │   │   │   ├── base_assigner.py
    │   │   │   ├── max_iou_assigner.py
    │   │   │   └── point_assigner.py
    │   │   ├── bbox_target.py
    │   │   ├── demodata.py
    │   │   ├── geometry.py
    │   │   ├── samplers
    │   │   │   ├── __init__.py
    │   │   │   ├── base_sampler.py
    │   │   │   ├── combined_sampler.py
    │   │   │   ├── instance_balanced_pos_sampler.py
    │   │   │   ├── iou_balanced_neg_sampler.py
    │   │   │   ├── ohem_sampler.py
    │   │   │   ├── pseudo_sampler.py
    │   │   │   ├── random_sampler.py
    │   │   │   └── sampling_result.py
    │   │   └── transforms.py
    │   ├── evaluation
    │   │   ├── __init__.py
    │   │   ├── bbox_overlaps.py
    │   │   ├── class_names.py
    │   │   ├── coco_utils.py
    │   │   ├── eval_hooks.py
    │   │   ├── mean_ap.py
    │   │   └── recall.py
    │   ├── fp16
    │   │   ├── __init__.py
    │   │   ├── decorators.py
    │   │   ├── hooks.py
    │   │   └── utils.py
    │   ├── mask
    │   │   ├── __init__.py
    │   │   ├── mask_target.py
    │   │   └── utils.py
    │   ├── post_processing
    │   │   ├── __init__.py
    │   │   ├── bbox_nms.py
    │   │   ├── matrix_nms.py
    │   │   └── merge_augs.py
    │   └── utils
    │   │   ├── __init__.py
    │   │   ├── colormap.py
    │   │   ├── dist_utils.py
    │   │   ├── map.py
    │   │   ├── misc.py
    │   │   └── post_proc_utils.py
    ├── datasets
    │   ├── __init__.py
    │   ├── builder.py
    │   ├── cityscapes.py
    │   ├── cityscapes_ps.py
    │   ├── cityscapes_vps.py
    │   ├── cityscapes_vps_segonly.py
    │   ├── coco.py
    │   ├── custom.py
    │   ├── dataset_wrappers.py
    │   ├── kittimots.py
    │   ├── loader
    │   │   ├── __init__.py
    │   │   ├── build_loader.py
    │   │   └── sampler.py
    │   ├── motion_dataset.py
    │   ├── pipelines
    │   │   ├── __init__.py
    │   │   ├── compose.py
    │   │   ├── formating.py
    │   │   ├── instaboost.py
    │   │   ├── loading.py
    │   │   ├── test_aug.py
    │   │   └── transforms.py
    │   ├── registry.py
    │   ├── utils.py
    │   ├── voc.py
    │   ├── wider_face.py
    │   └── xml_style.py
    ├── metrics.py
    ├── models
    │   ├── __init__.py
    │   ├── anchor_heads
    │   │   ├── __init__.py
    │   │   ├── anchor_head.py
    │   │   ├── atss_head.py
    │   │   ├── decoupled_solo_head.py
    │   │   ├── decoupled_solo_light_head.py
    │   │   ├── fcos_head.py
    │   │   ├── fovea_head.py
    │   │   ├── free_anchor_retina_head.py
    │   │   ├── ga_retina_head.py
    │   │   ├── ga_rpn_head.py
    │   │   ├── guided_anchor_head.py
    │   │   ├── panoptic_head.py
    │   │   ├── reppoints_head.py
    │   │   ├── retina_head.py
    │   │   ├── retina_sepbn_head.py
    │   │   ├── rpn_head.py
    │   │   ├── solo_head.py
    │   │   ├── solov2_head.py
    │   │   ├── solov2_light_head.py
    │   │   └── ssd_head.py
    │   ├── backbones
    │   │   ├── __init__.py
    │   │   ├── hrnet.py
    │   │   ├── resnet.py
    │   │   ├── resnet_tfstyle.py
    │   │   ├── resnext.py
    │   │   └── ssd_vgg.py
    │   ├── bbox_heads
    │   │   ├── __init__.py
    │   │   ├── bbox_head.py
    │   │   ├── convfc_bbox_head.py
    │   │   └── double_bbox_head.py
    │   ├── builder.py
    │   ├── ca_heads
    │   │   ├── __init__.py
    │   │   ├── appearance_ca_abstract.py
    │   │   ├── appearance_ca_mahalanobis.py
    │   │   ├── appearance_ca_map.py
    │   │   └── compose_ca.py
    │   ├── detectors
    │   │   ├── __init__.py
    │   │   ├── atss.py
    │   │   ├── base.py
    │   │   ├── cascade_rcnn.py
    │   │   ├── double_head_rcnn.py
    │   │   ├── fast_rcnn.py
    │   │   ├── faster_rcnn.py
    │   │   ├── fcos.py
    │   │   ├── fovea.py
    │   │   ├── grid_rcnn.py
    │   │   ├── htc.py
    │   │   ├── mask_rcnn.py
    │   │   ├── mask_scoring_rcnn.py
    │   │   ├── reppoints_detector.py
    │   │   ├── retinanet.py
    │   │   ├── rpn.py
    │   │   ├── single_stage.py
    │   │   ├── single_stage_ins.py
    │   │   ├── solo.py
    │   │   ├── solov2.py
    │   │   ├── test_mixins.py
    │   │   └── two_stage.py
    │   ├── losses
    │   │   ├── __init__.py
    │   │   ├── accuracy.py
    │   │   ├── balanced_l1_loss.py
    │   │   ├── cross_entropy_loss.py
    │   │   ├── focal_loss.py
    │   │   ├── ghm_loss.py
    │   │   ├── iou_loss.py
    │   │   ├── mse_loss.py
    │   │   ├── smooth_l1_loss.py
    │   │   └── utils.py
    │   ├── mask_heads
    │   │   ├── __init__.py
    │   │   ├── fcn_mask_head.py
    │   │   ├── fused_semantic_head.py
    │   │   ├── grid_head.py
    │   │   ├── htc_mask_head.py
    │   │   ├── mask_feat_head.py
    │   │   └── maskiou_head.py
    │   ├── necks
    │   │   ├── __init__.py
    │   │   ├── bfp.py
    │   │   ├── fpn.py
    │   │   ├── fpn_flo_warp.py
    │   │   ├── hrfpn.py
    │   │   └── nas_fpn.py
    │   ├── plugins
    │   │   ├── __init__.py
    │   │   ├── generalized_attention.py
    │   │   └── non_local.py
    │   ├── registry.py
    │   ├── roi_extractors
    │   │   ├── __init__.py
    │   │   └── single_level.py
    │   ├── shared_heads
    │   │   ├── __init__.py
    │   │   └── res_layer.py
    │   ├── track_heads
    │   │   ├── __init__.py
    │   │   └── track_head.py
    │   ├── tracker_inference.py
    │   └── utils
    │   │   ├── __init__.py
    │   │   ├── conv_module.py
    │   │   ├── conv_ws.py
    │   │   ├── fpn_utils.py
    │   │   ├── functional.py
    │   │   ├── functions
    │   │       ├── __init__.py
    │   │       ├── aggregation_refpad.py
    │   │       ├── aggregation_zeropad.py
    │   │       └── utils.py
    │   │   ├── modules
    │   │       ├── __init__.py
    │   │       └── aggregation.py
    │   │   ├── norm.py
    │   │   ├── scale.py
    │   │   ├── sta_module.py
    │   │   └── weight_init.py
    ├── ops
    │   ├── __init__.py
    │   ├── context_block.py
    │   ├── dcn
    │   │   ├── __init__.py
    │   │   ├── deform_conv.py
    │   │   ├── deform_pool.py
    │   │   └── src
    │   │   │   ├── deform_conv_cuda.cpp
    │   │   │   ├── deform_conv_cuda_kernel.cu
    │   │   │   ├── deform_pool_cuda.cpp
    │   │   │   └── deform_pool_cuda_kernel.cu
    │   ├── masked_conv
    │   │   ├── __init__.py
    │   │   ├── masked_conv.py
    │   │   └── src
    │   │   │   ├── masked_conv2d_cuda.cpp
    │   │   │   └── masked_conv2d_kernel.cu
    │   ├── nms
    │   │   ├── __init__.py
    │   │   ├── nms_wrapper.py
    │   │   └── src
    │   │   │   ├── nms_cpu.cpp
    │   │   │   ├── nms_cuda.cpp
    │   │   │   ├── nms_kernel.cu
    │   │   │   ├── soft_nms_cpu.cpp
    │   │   │   └── soft_nms_cpu.pyx
    │   ├── roi_align
    │   │   ├── __init__.py
    │   │   ├── gradcheck.py
    │   │   ├── roi_align.py
    │   │   └── src
    │   │   │   ├── roi_align_cuda.cpp
    │   │   │   └── roi_align_kernel.cu
    │   ├── roi_pool
    │   │   ├── __init__.py
    │   │   ├── gradcheck.py
    │   │   ├── roi_pool.py
    │   │   └── src
    │   │   │   ├── roi_pool_cuda.cpp
    │   │   │   └── roi_pool_kernel.cu
    │   ├── sigmoid_focal_loss
    │   │   ├── __init__.py
    │   │   ├── sigmoid_focal_loss.py
    │   │   └── src
    │   │   │   ├── sigmoid_focal_loss.cpp
    │   │   │   └── sigmoid_focal_loss_cuda.cu
    │   └── utils
    │   │   ├── __init__.py
    │   │   └── src
    │   │       └── compiling_info.cpp
    ├── utils
    │   ├── __init__.py
    │   ├── contextmanagers.py
    │   ├── flops_counter.py
    │   ├── logger.py
    │   ├── profiling.py
    │   ├── registry.py
    │   └── util_mixins.py
    └── version.py
├── requirements.txt
├── requirements
    ├── build.txt
    ├── optional.txt
    ├── runtime.txt
    └── tests.txt
├── setup.py
├── tests
    └── test_loader.py
└── tools
    ├── __init__.py
    ├── dataset
        ├── __init__.py
        ├── base_dataset.py
        └── cityscapes_vps.py
    ├── test_eval_caq.py
    ├── test_eval_ipq.py
    ├── test_vis.py
    └── train.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 95 | __pypackages__/
 96 | 
 97 | # Celery stuff
 98 | celerybeat-schedule
 99 | celerybeat.pid
100 | 
101 | # SageMath parsed files
102 | *.sage.py
103 | 
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 | 
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 | 
117 | # Rope project settings
118 | .ropeproject
119 | 
120 | # mkdocs documentation
121 | /site
122 | 
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 | 
128 | # Pyre type checker
129 | .pyre/
130 | 


--------------------------------------------------------------------------------
/Motion_Dataset_Download.md:
--------------------------------------------------------------------------------
 1 | # Dataset Preparation
 2 | 
 3 | We build our motion annotations on KITTI-MOTS[1] and Cityscapes-VPS[2].
 4 | 
 5 | ## KITTI
 6 | 
 7 | * Images: Download the original [KITTI-MOTS Dataset](http://www.cvlibs.net/datasets/kitti/eval_instance_seg.php?benchmark=instanceSeg2015).
 8 | * Flow: Download Precomupted [Flow](https://drive.google.com/file/d/1tIyRKO5o9imAF3huUo0s-R-ys4znly5t/view?usp=sharing).
 9 | * Annotations: Download [motion annotations](https://drive.google.com/drive/folders/1whMm0NMzkz77jQRHkQeNNQsuAGEVekqk?usp=sharing).
10 | * Construct Dataset Folder with Structure
11 | ```
12 |     .
13 |     +-- Images
14 |     +-- Flow
15 |     +-- Flow_Suppressed
16 |     +-- Annotations
17 | ```
18 | 
19 | ## Cityscapes
20 | * Download motion annotations for Cityscapes
21 | * Images: Download the original [Cityscapes-VPS](https://www.dropbox.com/s/ecem4kq0fdkver4/cityscapes-vps-dataset-1.0.zip?dl=0). Follow full instructions [here](https://github.com/mcahny/vps/blob/master/docs/DATASET.md).
22 | * Flow: Download Precomupted [Flow](https://drive.google.com/file/d/1HE4WTIW7HvjpQPU2wZ-eD6CVxmlAwigb/view?usp=sharing).
23 | * Annotations: Download [motion annotations](https://drive.google.com/drive/folders/1whMm0NMzkz77jQRHkQeNNQsuAGEVekqk?usp=sharing).
24 | * Construct Dataset Folder with Structure
25 | ```
26 |     .
27 |     +-- train
28 |     |   +-- images
29 |     |   +-- flow
30 |     |   +-- flow_suppressed
31 |     +-- val
32 |     |   +-- images
33 |     |   +-- flow
34 |     |   +-- flow_suppressed
35 |     +-- annotations
36 | ```
37 | 
38 | # References
39 | 
40 | [1] Voigtlaender, Paul, et al. "Mots: Multi-object tracking and segmentation." Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition. 2019.
41 | [2] Kim, Dahun, et al. "Video panoptic segmentation." Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition. 2020.
42 | 


--------------------------------------------------------------------------------
/Motion_Dataset_Preview.md:
--------------------------------------------------------------------------------
 1 | # Motion Segmentation Datasets Preview
 2 | 
 3 | We build our motion annotations on KITTI-MOTS[1] and Cityscapes-VPS[2] by annotating on the trajectory level per instance.
 4 | 
 5 | ## KITTI-MOTS
 6 | 
 7 | Preview of the 20 sequences annotated for moving and static cars and pedestrians:
 8 | 
 9 | * [sequence 1](https://youtu.be/Oc7foi7XTEM )
10 | * [sequence 2](https://youtu.be/Z_mParrlYpU )
11 | * [sequence 3](https://youtu.be/PWk0jKMP0B8 )
12 | * [sequence 4](https://youtu.be/2gfuRXWkWmY )
13 | * [sequence 5](https://youtu.be/RjTyhaiFe7c )
14 | * [sequence 6](https://youtu.be/TWyId0iuUSY )
15 | * [sequence 7](https://youtu.be/ig08tI8x3g4 )
16 | * [sequence 8](https://youtu.be/TN8jebMbuds )
17 | * [sequence 9](https://youtu.be/T_vu9qvxCjI )
18 | * [sequence 10](https://youtu.be/sQNbSSdT2Wg )
19 | * [sequence 11](https://youtu.be/gP3mHmP1dmw )
20 | * [sequence 12](https://youtu.be/COvfmAYXpFc )
21 | * [sequence 13](https://youtu.be/X4tLTSclGKg )
22 | * [sequence 14](https://youtu.be/Stjq2sgiu8I )
23 | * [sequence 15](https://youtu.be/Gh5YYdWv_ic )
24 | * [sequence 16](https://youtu.be/w9kpmHX8vNo )
25 | * [sequence 17](https://youtu.be/osjucd3pbNg )
26 | * [sequence 18](https://youtu.be/IN9vH29qu4U )
27 | * [sequence 19](https://youtu.be/koApyZS0dus )
28 | * [sequence 20](https://youtu.be/PJvTPKu9eGs )
29 | 
30 | ## Cityscapes-VPS
31 | 
32 | Preview of the 500 sequences with 6 frames per sequence annotated for moving and static objects
33 | 
34 | [preview](https://youtu.be/rTQ1OIqF6Dw)
35 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Video Class Agnostic Segmentation
 2 | \[[Method Paper]()\] \[[Benchmark Paper](https://arxiv.org/pdf/2103.11015.pdf)\] \[[Project](https://msiam.github.io/vca/)\] \[[Demo](https://www.youtube.com/watch?v=c9hMFHdTs6M)\]
 3 | 
 4 | Official Datasets and Implementation from our Paper "Video Class Agnostic Segmentation Benchmark in Autonomous Driving" in Workshop on Autonomous Driving, CVPR 2021.
 5 | 
 6 | <div align="center">
 7 | <img src="https://github.com/MSiam/video_class_agnostic_segmentation/blob/main/images/VCA_Teaser.png" width="70%" height="70%"><br><br>
 8 | </div>
 9 | 
10 | 
11 | # Installation
12 | This repo is tested under Python 3.6, PyTorch 1.4
13 | 
14 | * Download Required Packages
15 | ```
16 | pip install -r requirements.txt
17 | pip install "git+https://github.com/cocodataset/panopticapi.git"
18 | ```
19 | 
20 | * Setup mmdet
21 | ```
22 | python setup.py develop
23 | ```
24 | 
25 | # Motion Segmentation Track
26 | ## Dataset Preparation
27 | 
28 | * Follow Dataset Preparation [Instructions](https://github.com/MSiam/video_class_agnostic_segmentation/blob/main/Motion_Dataset_Download.md).
29 | * Low resolution view of the [full dataset](https://www.youtube.com/playlist?list=PL4jKsHbreeuBhEmzcL94JxWzVear79r5z)
30 |   
31 | ## Inference
32 | 
33 | * Download [Trained Weights](https://drive.google.com/file/d/16qEH0WoFVt0n6Ooi6zl4ymWKZYv1YVJ8/view?usp=sharing) on Ego Flow Suppressed, trained on Cityscapes and KITTI-MOTS
34 | 
35 | * Modify Configs according to dataset path + Image/Annotation/Flow prefix
36 | ```
37 | configs/data/kittimots_motion_supp.py
38 | configs/data/cscapesvps_motion_supp.py
39 | ```
40 | 
41 | * Evaluate CAQ, 
42 | ```
43 | python tools/test_eval_caq.py CONFIG_FILE WEIGHTS_FILE
44 | ```
45 | CONFIG_FILE: configs/infer_kittimots.py or configs/infer_cscapesvps.py
46 | 
47 | 
48 | * Qualitative Results
49 | ```
50 | python tools/test_vis.py CONFIG_FILE WEIGHTS_FILE --vis_unknown --save_dir OUTS_DIR
51 | ```
52 | 
53 | * Evaluate Image Panoptic Quality, Note: evaluated on 1024x2048 Images
54 | ```
55 | python tools/test_eval_ipq.py configs/infer_cscapesvps_pq.py WEIGHTS_FILE --out PKL_FILE
56 | ```
57 | 
58 | ## Training
59 | 
60 | Coming Soon ...
61 | 
62 | # Open-set Segmentation Track
63 | 
64 | Coming soon ...
65 | 
66 | # Acknowledgements
67 | 
68 | Dataset and Repository relied on these sources:
69 | 
70 | * Voigtlaender, Paul, et al. "Mots: Multi-object tracking and segmentation." Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition. 2019.
71 | * Kim, Dahun, et al. "Video panoptic segmentation." Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition. 2020.
72 | * Wang, Xinlong, et al. "Solo: Segmenting objects by locations." European Conference on Computer Vision. Springer, Cham, 2020.
73 | * This Repository built upon [SOLO Code](https://github.com/WXinlong/SOLO)
74 | 
75 | # Citation
76 | 
77 | ```
78 | @article{siam2021video,
79 |       title={Video Class Agnostic Segmentation Benchmark for Autonomous Driving}, 
80 |       author={Mennatullah Siam and Alex Kendall and Martin Jagersand},
81 |       year={2021},
82 |       eprint={2103.11015},
83 |       archivePrefix={arXiv},
84 |       primaryClass={cs.CV}
85 | }
86 | ```
87 | 
88 | # Contact
89 | If you have any questions regarding the dataset or repository, please contact menna.seyam@gmail.com.
90 | 


--------------------------------------------------------------------------------
/configs/data/cscapes.py:
--------------------------------------------------------------------------------
 1 | dataset_type = 'CityscapesDataset'
 2 | data_root = 'data/cityscapes/'
 3 | img_norm_cfg = dict(
 4 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 5 | train_pipeline = [
 6 |     dict(type='LoadImageFromFile'),
 7 |     dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
 8 |     dict(type='Resize', img_scale=[(2048, 1024), (2048, 992), (2048, 960),(2048, 928), (2048, 896), (2048, 864)],
 9 |         multiscale_mode='value',
10 |         keep_ratio=True),
11 |     dict(type='RandomFlip', flip_ratio=0.5),
12 |     dict(type='Normalize', **img_norm_cfg),
13 |     dict(type='Pad', size_divisor=32),
14 |     dict(type='DefaultFormatBundle'),
15 |     dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
16 | ]
17 | test_pipeline = [
18 |     dict(type='LoadImageFromFile'),
19 |     dict(
20 |         type='MultiScaleFlipAug',
21 |         img_scale=[(2048, 1024)],
22 |         flip=False,
23 |         transforms=[
24 |             dict(type='Resize', keep_ratio=True),
25 |             dict(type='RandomFlip'),
26 |             dict(type='Normalize', **img_norm_cfg),
27 |             dict(type='Pad', size_divisor=32),
28 |             dict(type='ImageToTensor', keys=['img']),
29 |             dict(type='Collect', keys=['img']),
30 |         ])
31 | ]
32 | data = dict(
33 |     imgs_per_gpu=2,
34 |     workers_per_gpu=1,
35 |     train=dict(
36 |         type=dataset_type,
37 |         ann_file=data_root + 'annotations/instancesonly_filtered_gtFine_train.json',
38 |         img_prefix=data_root + 'train/',
39 |         pipeline=train_pipeline),
40 |     val=dict(
41 |         type=dataset_type,
42 |         ann_file=data_root + 'annotations/instancesonly_filtered_gtFine_val.json',
43 |         img_prefix=data_root + 'val/',
44 |         pipeline=test_pipeline),
45 |     test=dict(
46 |         type=dataset_type,
47 |         ann_file=data_root + 'annotations/instancesonly_filtered_gtFine_val.json',
48 |         img_prefix=data_root + 'val/',
49 |         pipeline=test_pipeline))
50 | 
51 | 


--------------------------------------------------------------------------------
/configs/data/cscapesvps.py:
--------------------------------------------------------------------------------
 1 | dataset_type = 'CityscapesVPSDataset'
 2 | data_root = 'data/cityscapes_vps/'
 3 | img_norm_cfg = dict(
 4 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 5 | train_pipeline = [
 6 |     dict(type='LoadRefImageFromFile'),
 7 |     dict(type='LoadAnnotations', with_bbox=True, with_mask=True,
 8 |         with_seg=True, with_pid=True,
 9 |         # Cityscapes specific class mapping
10 |         semantic2label={0:0, 1:1, 2:2, 3:3, 4:4, 5:5, 6:6, 7:7, 8:8, 9:9,
11 |                         10:10, 11:11, 12:12, 13:13, 14:14, 15:15, 16:16,
12 |                         17:17, 18:18, -1:255, 255:255},),
13 |     dict(type='Resize', img_scale=[(2048, 1024)], keep_ratio=True,
14 |         multiscale_mode='value', ratio_range=(0.8, 1.5)),
15 |     dict(type='RandomFlip', flip_ratio=0.5),
16 |     dict(type='Normalize', **img_norm_cfg),
17 |     dict(type='RandomCrop', crop_size=(800, 1600)),
18 |     dict(type='Pad', size_divisor=32),
19 |     dict(type='SegResizeFlipCropPadRescale', scale_factor=[1, 0.25]),
20 |     dict(type='DefaultFormatBundle'),
21 |     dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels',
22 |             'gt_obj_ids', 'gt_masks', 'gt_semantic_seg',
23 |             'gt_semantic_seg_Nx', 'ref_img', 'ref_bboxes',
24 |             'ref_labels', 'ref_obj_ids', 'ref_masks']),
25 | ]
26 | test_pipeline = [
27 |     dict(type='LoadRefImageFromFile'),
28 | 
29 |     dict(
30 |         type='MultiScaleFlipAug',
31 |         img_scale=[(2048, 1024)],
32 |         flip=False,
33 |         transforms=[
34 |             dict(type='Resize', keep_ratio=True),
35 |             dict(type='RandomFlip'),
36 |             dict(type='Normalize', **img_norm_cfg),
37 |             dict(type='Pad', size_divisor=32),
38 |             dict(type='ImageToTensor', keys=['img', 'ref_img']),
39 |             dict(type='Collect', keys=['img', 'ref_img']),
40 |         ])
41 | ]
42 | data = dict(
43 |     imgs_per_gpu=1,
44 |     workers_per_gpu=0,
45 |     train=dict(
46 |             type=dataset_type,
47 |             ann_file=data_root +
48 |             'instances_train_city_vps_rle.json',
49 |             img_prefix=data_root + 'train/img/',
50 |             ref_prefix=data_root + 'train/img/',
51 |             seg_prefix=data_root + 'train/labelmap/',
52 |             pipeline=train_pipeline,
53 |             ref_ann_file=data_root +
54 |             'instances_train_city_vps_rle.json',
55 |             offsets=[-1,+1]),
56 |     val=dict(
57 |         type=dataset_type,
58 |         ann_file=data_root +
59 |         'instances_val_city_vps_rle.json',
60 |         img_prefix=data_root + 'val/img/',
61 |         pipeline=test_pipeline),
62 |     test=dict(
63 |         type=dataset_type,
64 |         ann_file=data_root +
65 |         #'im_all_info_val_city_vps.json',
66 |         'instances_val_city_vps_rle.json',
67 |         #img_prefix=data_root + 'val/img_all/',
68 |         img_prefix=data_root + 'val/img/',
69 |         ref_prefix=data_root + 'val/img/',
70 |         seg_prefix=data_root + 'val/labelmap/',
71 |         #nframes_span_test=30,
72 |         nframes_span_test=6,
73 |         pipeline=test_pipeline))
74 | 


--------------------------------------------------------------------------------
/configs/data/cscapesvps_motion.py:
--------------------------------------------------------------------------------
 1 | dataset_type = 'MotionDataset'
 2 | data_root = 'data/cityscapes_vps/'
 3 | 
 4 | img_norm_cfg = dict(
 5 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 6 | 
 7 | train_pipeline = [
 8 |     dict(type='LoadImageFromFile'),
 9 |     dict(type='LoadFlowFromFile'),
10 |     dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
11 |     dict(type='Resize', img_scale=[(1024, 512), (1024, 496), (1024, 480),(1024, 464), (1024, 448), (1024, 432)],
12 |          multiscale_mode='value',
13 |          keep_ratio=True),
14 |     dict(type='RandomFlip', flip_ratio=0.5),
15 |     dict(type='Normalize', **img_norm_cfg),
16 |     dict(type='RandomCrop', crop_size=(300, 800)),
17 |     dict(type='Pad', size_divisor=32),
18 |     dict(type='DefaultFormatBundle'),
19 |     dict(type='Collect', keys=['img', 'flow', 'gt_bboxes', 'gt_labels', 'gt_masks']),
20 | ]
21 | test_pipeline = [
22 |     dict(type='LoadImageFromFile'),
23 |     dict(type='LoadFlowFromFile'),
24 |     dict(
25 |         type='MultiScaleFlipAug',
26 |         img_scale=(1024, 512),
27 |         flip=False,
28 |         transforms=[
29 |             dict(type='Resize', keep_ratio=True),
30 |             dict(type='RandomFlip'),
31 |             dict(type='Normalize', **img_norm_cfg),
32 |             dict(type='Pad', size_divisor=32),
33 |             dict(type='ImageToTensor', keys=['img', 'flow']),
34 |             dict(type='Collect', keys=['img', 'flow']),
35 |         ])
36 | ]
37 | data = dict(
38 |     imgs_per_gpu=2,
39 |     workers_per_gpu=0,
40 |     train=dict(
41 |         type=dataset_type,
42 |         ann_file=data_root + 'CityscapesVPS_MOSeg_train_Annotations.json',
43 |         img_prefix=data_root + 'train/img/',
44 |         flow_prefix=data_root + 'train/flow/',
45 |         pipeline=train_pipeline),
46 |     val=dict(
47 |         type=dataset_type,
48 |         ann_file=data_root + 'CityscapesVPS_MOSeg_val_Annotations.json',
49 |         img_prefix=data_root + 'val/img/',
50 |         flow_prefix=data_root + 'val/flow/',
51 |         pipeline=test_pipeline),
52 |     test=dict(
53 |         type=dataset_type,
54 |         ann_file=data_root + 'CityscapesVPS_MOSeg_val_Annotations.json',
55 |         img_prefix=data_root + 'val/img/',
56 |         flow_prefix=data_root + 'val/flow/',
57 |         pipeline=test_pipeline))
58 | 
59 | 


--------------------------------------------------------------------------------
/configs/data/cscapesvps_motion_supp.py:
--------------------------------------------------------------------------------
 1 | from configs.data.cscapesvps_motion import *
 2 | 
 3 | data = dict(
 4 |     imgs_per_gpu=2,
 5 |     workers_per_gpu=0,
 6 |     train=dict(
 7 |         type=dataset_type,
 8 |         ann_file=data_root + 'CityscapesVPS_MOSeg_train_Annotations.json',
 9 |         img_prefix=data_root + 'train/img/',
10 |         flow_prefix=data_root + 'train/flow_suppressed/',
11 |         pipeline=train_pipeline),
12 |     val=dict(
13 |         type=dataset_type,
14 |         ann_file=data_root + 'CityscapesVPS_MOSeg_val_Annotations.json',
15 |         img_prefix=data_root + 'val/img/',
16 |         flow_prefix=data_root + 'val/flow_suppressed/',
17 |         pipeline=test_pipeline),
18 |     test=dict(
19 |         type=dataset_type,
20 |         ann_file=data_root + 'CityscapesVPS_MOSeg_val_Annotations.json',
21 |         img_prefix=data_root + 'val/img/',
22 |         flow_prefix=data_root + 'val/flow_suppressed/',
23 |         pipeline=test_pipeline))
24 | 
25 | 


--------------------------------------------------------------------------------
/configs/data/cscapesvps_motion_supp_2048.py:
--------------------------------------------------------------------------------
 1 | from configs.data.cscapesvps_motion import *
 2 | 
 3 | test_pipeline = [
 4 |     dict(type='LoadImageFromFile'),
 5 |     dict(type='LoadFlowFromFile'),
 6 |     dict(
 7 |         type='MultiScaleFlipAug',
 8 |         img_scale=(2048, 1024),
 9 |         flip=False,
10 |         transforms=[
11 |             dict(type='Resize', keep_ratio=True),
12 |             dict(type='RandomFlip'),
13 |             dict(type='Normalize', **img_norm_cfg),
14 |             dict(type='Pad', size_divisor=32),
15 |             dict(type='ImageToTensor', keys=['img', 'flow']),
16 |             dict(type='Collect', keys=['img', 'flow']),
17 |         ])
18 | ]
19 | 
20 | data = dict(
21 |     imgs_per_gpu=2,
22 |     workers_per_gpu=0,
23 |     train=dict(
24 |         type=dataset_type,
25 |         ann_file=data_root + 'CityscapesVPS_MOSeg_train_Annotations.json',
26 |         img_prefix=data_root + 'train/img/',
27 |         flow_prefix=data_root + 'train/flow_suppressed/',
28 |         pipeline=train_pipeline),
29 |     val=dict(
30 |         type=dataset_type,
31 |         ann_file=data_root + 'CityscapesVPS_MOSeg_val_Annotations.json',
32 |         img_prefix=data_root + 'val/img/',
33 |         flow_prefix=data_root + 'val/flow_suppressed/',
34 |         pipeline=test_pipeline),
35 |     test=dict(
36 |         type=dataset_type,
37 |         ann_file=data_root + 'CityscapesVPS_MOSeg_val_Annotations.json',
38 |         img_prefix=data_root + 'val/img/',
39 |         flow_prefix=data_root + 'val/flow_suppressed/',
40 |         pipeline=test_pipeline))
41 | 
42 | 


--------------------------------------------------------------------------------
/configs/data/cscapesvps_repeat.py:
--------------------------------------------------------------------------------
 1 | dataset_type = 'CityscapesVPSDataset'
 2 | data_root = 'data/cityscapes_vps/'
 3 | img_norm_cfg = dict(
 4 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 5 | train_pipeline = [
 6 |     dict(type='LoadRefImageFromFile'),
 7 |     dict(type='LoadAnnotations', with_bbox=True, with_mask=True,
 8 |         with_seg=True, with_pid=True,
 9 |         # Cityscapes specific class mapping
10 |         semantic2label={0:0, 1:1, 2:2, 3:3, 4:4, 5:5, 6:6, 7:7, 8:8, 9:9,
11 |                         10:10, 11:11, 12:12, 13:13, 14:14, 15:15, 16:16,
12 |                         17:17, 18:18, -1:255, 255:255},),
13 |     dict(type='Resize', img_scale=[(2048, 1024)], keep_ratio=True,
14 |         multiscale_mode='value', ratio_range=(0.8, 1.5)),
15 |     dict(type='RandomFlip', flip_ratio=0.5),
16 |     dict(type='Normalize', **img_norm_cfg),
17 |     dict(type='RandomCrop', crop_size=(800, 1600)),
18 |     dict(type='Pad', size_divisor=32),
19 |     dict(type='SegResizeFlipCropPadRescale', scale_factor=[1, 0.25]),
20 |     dict(type='DefaultFormatBundle'),
21 |     dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels',
22 |             'gt_obj_ids', 'gt_masks', 'gt_semantic_seg',
23 |             'gt_semantic_seg_Nx', 'ref_img', 'ref_bboxes',
24 |             'ref_labels', 'ref_obj_ids', 'ref_masks']),
25 | ]
26 | test_pipeline = [
27 |     dict(type='LoadRefImageFromFile'),
28 | 
29 |     dict(
30 |         type='MultiScaleFlipAug',
31 |         img_scale=[(2048, 1024)],
32 |         flip=False,
33 |         transforms=[
34 |             dict(type='Resize', keep_ratio=True),
35 |             dict(type='RandomFlip'),
36 |             dict(type='Normalize', **img_norm_cfg),
37 |             dict(type='Pad', size_divisor=32),
38 |             dict(type='ImageToTensor', keys=['img', 'ref_img']),
39 |             dict(type='Collect', keys=['img', 'ref_img']),
40 |         ])
41 | ]
42 | data = dict(
43 |     imgs_per_gpu=1,
44 |     workers_per_gpu=0,
45 |     train=dict(
46 |         type='RepeatDataset',
47 |         times=8,
48 |         dataset=dict(
49 |             type=dataset_type,
50 |             ann_file=data_root +
51 |             'instances_train_city_vps_rle.json',
52 |             img_prefix=data_root + 'train/img/',
53 |             ref_prefix=data_root + 'train/img/',
54 |             seg_prefix=data_root + 'train/labelmap/',
55 |             pipeline=train_pipeline,
56 |             ref_ann_file=data_root +
57 |             'instances_train_city_vps_rle.json',
58 |             offsets=[-1,+1])),
59 |     val=dict(
60 |         type=dataset_type,
61 |         ann_file=data_root +
62 |         'instances_val_city_vps_rle.json',
63 |         img_prefix=data_root + 'val/img/',
64 |         pipeline=test_pipeline),
65 |     test=dict(
66 |         type=dataset_type,
67 |         ann_file=data_root +
68 |         #'im_all_info_val_city_vps.json',
69 |         'instances_val_city_vps_rle.json',
70 |         #img_prefix=data_root + 'val/img_all/',
71 |         img_prefix=data_root + 'val/img/',
72 |         ref_prefix=data_root + 'val/img/',
73 |         #nframes_span_test=30,
74 |         nframes_span_test=6,
75 |         pipeline=test_pipeline))
76 | 


--------------------------------------------------------------------------------
/configs/data/davis.py:
--------------------------------------------------------------------------------
 1 | dataset_type = 'MotionDataset'
 2 | data_root = 'data/DAVIS/'
 3 | img_norm_cfg = dict(
 4 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 5 | train_pipeline = [
 6 |     dict(type='LoadImageFromFile'),
 7 |     dict(type='LoadFlowFromFile'),
 8 |     dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
 9 |     dict(type='Resize', img_scale=[(854, 480), (854, 448), (854, 416),
10 |                                    (854, 384), (854, 352), (854, 320)],
11 |          multiscale_mode='value',
12 |         keep_ratio=True),
13 |     dict(type='RandomFlip', flip_ratio=0.5),
14 |     dict(type='Normalize', **img_norm_cfg),
15 |     dict(type='Pad', size_divisor=32),
16 |     dict(type='DefaultFormatBundle'),
17 |     dict(type='Collect', keys=['img', 'flow', 'gt_bboxes', 'gt_labels', 'gt_masks']),
18 | ]
19 | test_pipeline = [
20 |     dict(type='LoadImageFromFile'),
21 |     dict(type='LoadFlowFromFile'),
22 |     dict(
23 |         type='MultiScaleFlipAug',
24 |         img_scale=(854, 480),
25 |         flip=False,
26 |         transforms=[
27 |             dict(type='Resize', keep_ratio=True),
28 |             dict(type='RandomFlip'),
29 |             dict(type='Normalize', **img_norm_cfg),
30 |             dict(type='Pad', size_divisor=32),
31 |             dict(type='ImageToTensor', keys=['img', 'flow']),
32 |             dict(type='Collect', keys=['img', 'flow']),
33 |         ])
34 | ]
35 | data = dict(
36 |     imgs_per_gpu=2,
37 |     workers_per_gpu=0,
38 |     train=dict(
39 |         type=dataset_type,
40 |         ann_file=data_root + 'Annotations_json/DAVIS_Unsupervised_train_Annotations.json',
41 |         img_prefix=data_root + 'JPEGImages_480/',
42 |         flow_prefix=data_root + 'OpticalFlow_480/',
43 |         pipeline=train_pipeline),
44 |     val=dict(
45 |         type=dataset_type,
46 |         ann_file=data_root + 'Annotations_json/DAVIS_Unsupervised_val_Annotations.json',
47 |         img_prefix=data_root + 'JPEGImages_480/',
48 |         flow_prefix=data_root + 'OpticalFlow_480/',
49 |         pipeline=test_pipeline),
50 |     test=dict(
51 |         type=dataset_type,
52 |         ann_file=data_root + 'Annotations_json/DAVIS_Unsupervised_val_Annotations.json',
53 |         img_prefix=data_root + 'JPEGImages_480/',
54 |         flow_prefix=data_root + 'OpticalFlow_480/',
55 |         pipeline=test_pipeline))
56 | 
57 | 


--------------------------------------------------------------------------------
/configs/data/idd.py:
--------------------------------------------------------------------------------
 1 | dataset_type = 'MotionDataset'
 2 | data_root = 'data/debug_ca_idd_data/'
 3 | img_norm_cfg = dict(
 4 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 5 | 
 6 | test_pipeline = [
 7 |     dict(type='LoadImageFromFile'),
 8 |     dict(type='LoadFlowFromFile'),
 9 | 
10 |     dict(
11 |         type='MultiScaleFlipAug',
12 |         img_scale=[(1280, 720)],
13 |         flip=False,
14 |         transforms=[
15 |             dict(type='Resize', keep_ratio=True),
16 |             dict(type='RandomFlip'),
17 |             dict(type='Normalize', **img_norm_cfg),
18 |             dict(type='Pad', size_divisor=32),
19 |             dict(type='ImageToTensor', keys=['img', 'flow']),
20 |             dict(type='Collect', keys=['img', 'flow']),
21 |         ])
22 | ]
23 | data = dict(
24 |     test=dict(
25 |         type=dataset_type,
26 |         ann_file=data_root +
27 |         'IDD_Test.json',
28 |         img_prefix=data_root+'/JPEGImages_480/',
29 |         flow_prefix=data_root+'OpticalFlow_480_0/',
30 |         pipeline=test_pipeline))
31 | 
32 | 


--------------------------------------------------------------------------------
/configs/data/idd_annots.py:
--------------------------------------------------------------------------------
 1 | from configs.data.idd import *
 2 | 
 3 | data = dict(
 4 |     test=dict(
 5 |         type=dataset_type,
 6 |         ann_file=data_root +
 7 |         'IDD_Annotated_Annotations.json',
 8 |         img_prefix=data_root+'/images/',
 9 |         flow_prefix=data_root+'/flow/',
10 |         pipeline=test_pipeline))
11 | 
12 | 


--------------------------------------------------------------------------------
/configs/data/idd_supp.py:
--------------------------------------------------------------------------------
 1 | from configs.data.idd import *
 2 | 
 3 | data = dict(
 4 |     test=dict(
 5 |         type=dataset_type,
 6 |         ann_file=data_root +
 7 |         'IDD_Test.json',
 8 |         img_prefix=data_root+'/images/',
 9 |         flow_prefix=data_root+'flow_suppressed/',
10 |         pipeline=test_pipeline))
11 | 
12 | 


--------------------------------------------------------------------------------
/configs/data/kittimots.py:
--------------------------------------------------------------------------------
 1 | 
 2 | dataset_type = 'KITTIMOTSDataset'
 3 | data_root = 'data/kitti_mots/'
 4 | img_norm_cfg = dict(
 5 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 6 | train_pipeline = [
 7 |     dict(type='LoadImageFromFile'),
 8 |     dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
 9 |     dict(type='Resize', img_scale=[(1242, 375), (1242, 343), (1242, 311),
10 |                                    (1242, 279), (1242, 247), (1242, 215)],
11 |          multiscale_mode='value',
12 |         keep_ratio=True),
13 |     dict(type='RandomFlip', flip_ratio=0.5),
14 |     dict(type='Normalize', **img_norm_cfg),
15 |     dict(type='Pad', size_divisor=32),
16 |     dict(type='DefaultFormatBundle'),
17 |     dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
18 | ]
19 | test_pipeline = [
20 |     dict(type='LoadImageFromFile'),
21 |     dict(
22 |         type='MultiScaleFlipAug',
23 |         img_scale=(1242, 375),
24 |         flip=False,
25 |         transforms=[
26 |             dict(type='Resize', keep_ratio=True),
27 |             dict(type='RandomFlip'),
28 |             dict(type='Normalize', **img_norm_cfg),
29 |             dict(type='Pad', size_divisor=32),
30 |             dict(type='ImageToTensor', keys=['img']),
31 |             dict(type='Collect', keys=['img']),
32 |         ])
33 | ]
34 | data = dict(
35 |     imgs_per_gpu=1,
36 |     workers_per_gpu=0,
37 |     train=dict(
38 |         type=dataset_type,
39 |         ann_file=data_root + 'annotations/KITTIMOTS_train_Annotations.json',
40 |         img_prefix=data_root + 'images/',
41 |         pipeline=train_pipeline),
42 |     val=dict(
43 |         type=dataset_type,
44 |         ann_file=data_root + 'annotations/KITTIMOTS_val_Annotations.json',
45 |         img_prefix=data_root + 'images/',
46 |         pipeline=test_pipeline),
47 |     test=dict(
48 |         type=dataset_type,
49 |         ann_file=data_root + 'annotations/KITTIMOTS_val_Annotations.json',
50 |         img_prefix=data_root + 'images/',
51 |         pipeline=test_pipeline))
52 | 


--------------------------------------------------------------------------------
/configs/data/kittimots_motion.py:
--------------------------------------------------------------------------------
 1 | dataset_type = 'MotionDataset'
 2 | data_root = 'data/kittimots_moseg/'
 3 | img_norm_cfg = dict(
 4 |     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 5 | train_pipeline = [
 6 |     dict(type='LoadImageFromFile'),
 7 |     dict(type='LoadFlowFromFile'),
 8 |     dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
 9 |     dict(type='Resize', img_scale=[(1242, 375), (1242, 343), (1242, 311),
10 |                                    (1242, 279), (1242, 247), (1242, 215)],
11 |          multiscale_mode='value',
12 |         keep_ratio=True),
13 |     dict(type='RandomFlip', flip_ratio=0.5),
14 |     dict(type='Normalize', **img_norm_cfg),
15 |     dict(type='Pad', size_divisor=32),
16 |     dict(type='DefaultFormatBundle'),
17 |     dict(type='Collect', keys=['img', 'flow', 'gt_bboxes', 'gt_labels', 'gt_masks']),
18 | ]
19 | test_pipeline = [
20 |     dict(type='LoadImageFromFile'),
21 |     dict(type='LoadFlowFromFile'),
22 |     dict(
23 |         type='MultiScaleFlipAug',
24 |         img_scale=(1242, 375),
25 |         flip=False,
26 |         transforms=[
27 |             dict(type='Resize', keep_ratio=True),
28 |             dict(type='RandomFlip'),
29 |             dict(type='Normalize', **img_norm_cfg),
30 |             dict(type='Pad', size_divisor=32),
31 |             dict(type='ImageToTensor', keys=['img', 'flow']),
32 |             dict(type='Collect', keys=['img', 'flow']),
33 |         ])
34 | ]
35 | data = dict(
36 |     imgs_per_gpu=2,
37 |     workers_per_gpu=0,
38 |     train=dict(
39 |         type=dataset_type,
40 |         ann_file=data_root + 'annotations/KITTIMOTS_MOSeg_train.json',
41 |         img_prefix=data_root + 'images/',
42 |         flow_prefix=data_root + 'flow/',
43 |         pipeline=train_pipeline),
44 |     val=dict(
45 |         type=dataset_type,
46 |         ann_file=data_root + 'annotations/KITTIMOTS_MOSeg_val.json',
47 |         img_prefix=data_root + 'images/',
48 |         flow_prefix=data_root + 'flow/',
49 |         pipeline=test_pipeline),
50 |     test=dict(
51 |         type=dataset_type,
52 |         ann_file=data_root + 'annotations/KITTIMOTS_MOSeg_val.json',
53 |         img_prefix=data_root + 'images/',
54 |         flow_prefix=data_root + 'flow/',
55 |         pipeline=test_pipeline))
56 | 


--------------------------------------------------------------------------------
/configs/data/kittimots_motion_supp.py:
--------------------------------------------------------------------------------
 1 | from configs.data.kittimots_motion import *
 2 | 
 3 | data = dict(
 4 |     imgs_per_gpu=2,
 5 |     workers_per_gpu=0,
 6 |     train=dict(
 7 |         type=dataset_type,
 8 |         ann_file=data_root + 'annotations/KITTIMOTS_MOSeg_train.json',
 9 |         img_prefix=data_root + 'images/',
10 |         flow_prefix=data_root + 'flow_suppressed/',
11 |         pipeline=train_pipeline),
12 |     val=dict(
13 |         type=dataset_type,
14 |         ann_file=data_root + 'annotations/KITTIMOTS_MOSeg_val.json',
15 |         img_prefix=data_root + 'images/',
16 |         flow_prefix=data_root + 'flow_suppressed/',
17 |         pipeline=test_pipeline),
18 |     test=dict(
19 |         type=dataset_type,
20 |         ann_file=data_root + 'annotations/KITTIMOTS_MOSeg_val.json',
21 |         img_prefix=data_root + 'images/',
22 |         flow_prefix=data_root + 'flow_suppressed/',
23 |         pipeline=test_pipeline))
24 | 


--------------------------------------------------------------------------------
/configs/experiments/general.py:
--------------------------------------------------------------------------------
 1 | train_cfg = dict(endtoend=False, train_tracker=False, train_ca=False,
 2 |                  train_inst_seg=False, train_panoptic=False)
 3 | 
 4 | test_cfg = dict(
 5 |     nms_pre=500,
 6 |     score_thr=0.1,
 7 |     mask_thr=0.5,
 8 |     update_thr=0.05,
 9 |     kernel='gaussian',  # gaussian/linear
10 |     sigma=2.0,
11 |     max_per_img=100)
12 | 
13 | # yapf:disable
14 | log_config = dict(
15 |     interval=50,
16 |     hooks=[
17 |         dict(type='TextLoggerHook'),
18 |         dict(type='TensorboardLoggerHook')
19 |     ])
20 | device_ids = range(4)
21 | dist_params = dict(backend='nccl')
22 | log_level = 'INFO'
23 | load_from = None
24 | resume_from = None
25 | workflow = [('train', 1)]
26 | 


--------------------------------------------------------------------------------
/configs/infer_cscapesvps.py:
--------------------------------------------------------------------------------
 1 | from configs.models.backbone_2stream import backbone
 2 | from configs.models.neck import neck
 3 | from configs.models.bbox_head import set_num_classes
 4 | from configs.models.ca_motion_head import set_params
 5 | from configs.models.panoptic_head import panoptic_head
 6 | from configs.experiments.general import *
 7 | from configs.data.cscapesvps_motion_supp import data as cscapesvps_data
 8 | from configs.data.kittimots_motion_supp import data as kittimots_data
 9 | from configs.data.cscapesvps_motion_supp import *
10 | 
11 | 
12 | # model settings
13 | bbox_head = set_num_classes(num_classes=9)
14 | ca_head = set_params(num_classes=3)
15 | 
16 | # model settings
17 | model = dict(
18 |     type='SOLO',
19 |     pretrained='torchvision://resnet50',
20 |     backbone=backbone,
21 |     neck=neck,
22 |     panoptic_head=panoptic_head,
23 |     bbox_head=bbox_head,
24 |     ca_head=ca_head,
25 |     )
26 | 
27 | data = dict(
28 |     imgs_per_gpu=2,
29 |     workers_per_gpu=0,
30 |     train=[kittimots_data['train'], cscapesvps_data['train']],
31 |     val=cscapesvps_data['val'],
32 |     test=cscapesvps_data['test'],)
33 | 
34 | # optimizer
35 | total_epochs = 15
36 | optimizer = dict(type='SGD', lr=0.005, momentum=0.9, weight_decay=0.0001)
37 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
38 | 
39 | # learning policy
40 | lr_config = dict(
41 |     policy='step',
42 |     warmup='linear',
43 |     warmup_iters=500,
44 |     warmup_ratio=1.0 / 3,
45 |     step=[6, 8])
46 | checkpoint_config = dict(interval=5)
47 | 
48 | # yapf:enable
49 | work_dir = './work_dirs/ca_motion/'
50 | pretrain_weights = './trained_models/panopticseg_cscapesvps.pth'
51 | ignore_clf = False
52 | same_nclasses = True
53 | freeze_vars={'backbone.appearance_stream':True, 'neck':True, 'bbox_head':True, 'panoptic_head':True}
54 | 


--------------------------------------------------------------------------------
/configs/infer_cscapesvps_pq.py:
--------------------------------------------------------------------------------
 1 | from configs.models.backbone_2stream import backbone
 2 | from configs.models.neck import neck
 3 | from configs.models.bbox_head import set_num_classes
 4 | from configs.models.ca_motion_head import set_params
 5 | from configs.models.panoptic_head import panoptic_head
 6 | from configs.experiments.general import *
 7 | from configs.data.cscapesvps_motion_supp_2048 import data as cscapesvps_data
 8 | from configs.data.kittimots_motion_supp import data as kittimots_data
 9 | from configs.data.cscapesvps_motion_supp_2048 import *
10 | 
11 | 
12 | # model settings
13 | bbox_head = set_num_classes(num_classes=9)
14 | ca_head = set_params(num_classes=3)
15 | 
16 | # model settings
17 | model = dict(
18 |     type='SOLO',
19 |     pretrained='torchvision://resnet50',
20 |     backbone=backbone,
21 |     neck=neck,
22 |     panoptic_head=panoptic_head,
23 |     bbox_head=bbox_head,
24 |     ca_head=ca_head,
25 |     )
26 | 
27 | data = dict(
28 |     imgs_per_gpu=2,
29 |     workers_per_gpu=0,
30 |     train=[kittimots_data['train'], cscapesvps_data['train']],
31 |     val=cscapesvps_data['val'],
32 |     test=cscapesvps_data['test'],)
33 | 
34 | # optimizer
35 | total_epochs = 15
36 | optimizer = dict(type='SGD', lr=0.005, momentum=0.9, weight_decay=0.0001)
37 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
38 | 
39 | # learning policy
40 | lr_config = dict(
41 |     policy='step',
42 |     warmup='linear',
43 |     warmup_iters=500,
44 |     warmup_ratio=1.0 / 3,
45 |     step=[6, 8])
46 | checkpoint_config = dict(interval=5)
47 | 
48 | # yapf:enable
49 | work_dir = './work_dirs/ca_motion/'
50 | pretrain_weights = './trained_models/panopticseg_cscapesvps.pth'
51 | ignore_clf = False
52 | same_nclasses = True
53 | freeze_vars={'backbone.appearance_stream':True, 'neck':True, 'bbox_head':True, 'panoptic_head':True}
54 | 


--------------------------------------------------------------------------------
/configs/infer_kittimots.py:
--------------------------------------------------------------------------------
 1 | from configs.models.backbone_2stream import backbone
 2 | from configs.models.neck import neck
 3 | from configs.models.bbox_head import set_num_classes
 4 | from configs.models.ca_motion_head import set_params
 5 | from configs.models.panoptic_head import panoptic_head
 6 | from configs.experiments.general import *
 7 | from configs.data.cscapesvps_motion_supp import data as cscapesvps_data
 8 | from configs.data.kittimots_motion_supp import data as kittimots_data
 9 | from configs.data.kittimots_motion_supp import *
10 | 
11 | # model settings
12 | bbox_head = set_num_classes(num_classes=9)
13 | ca_head = set_params(num_classes=3)
14 | 
15 | # model settings
16 | model = dict(
17 |     type='SOLO',
18 |     pretrained='torchvision://resnet50',
19 |     backbone=backbone,
20 |     neck=neck,
21 |     panoptic_head=panoptic_head,
22 |     bbox_head=bbox_head,
23 |     ca_head=ca_head,
24 |     max_nottrack=5
25 |     )
26 | 
27 | data = dict(
28 |     imgs_per_gpu=2,
29 |     workers_per_gpu=0,
30 |     train=[kittimots_data['train'], cscapesvps_data['train']],
31 |     val=kittimots_data['val'],
32 |     test=kittimots_data['test'],)
33 | 
34 | # optimizer
35 | total_epochs = 15
36 | optimizer = dict(type='SGD', lr=0.005, momentum=0.9, weight_decay=0.0001)
37 | optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
38 | 
39 | # learning policy
40 | lr_config = dict(
41 |     policy='step',
42 |     warmup='linear',
43 |     warmup_iters=500,
44 |     warmup_ratio=1.0 / 3,
45 |     step=[6, 8])
46 | checkpoint_config = dict(interval=5)
47 | 
48 | # yapf:enable
49 | work_dir = './work_dirs/ca_motion/'
50 | pretrain_weights = './trained_models/panopticseg_cscapesvps.pth'
51 | ignore_clf = False
52 | same_nclasses = True
53 | freeze_vars={'backbone.appearance_stream':True, 'neck':True, 'bbox_head':True, 'panoptic_head':True}
54 | 


--------------------------------------------------------------------------------
/configs/misc/debug_classagnostic_loaders.py:
--------------------------------------------------------------------------------
 1 | # dataset settings
 2 | from configs.data.kittimots_motion_supp import data as data_kittimots_motion
 3 | from configs.data.cscapesvps_motion_supp import data as data_cscapesvps_motion
 4 | 
 5 | img_norm_cfg = dict(
 6 |     mean=[0.0, 0.0, 0.0], std=[1.0, 1.0, 1.0], to_rgb=True)
 7 | 
 8 | for idx, pipeline in enumerate(data_kittimots_motion['train']['pipeline']):
 9 |     if pipeline['type'] == 'Normalize':
10 |         data_kittimots_motion['train']['pipeline'][idx]['mean'] = img_norm_cfg['mean']
11 |         data_kittimots_motion['train']['pipeline'][idx]['std'] = img_norm_cfg['std']
12 |         data_kittimots_motion['train']['pipeline'][idx]['to_rgb'] = img_norm_cfg['to_rgb']
13 | 
14 | for idx, pipeline in enumerate(data_cscapesvps_motion['train']['pipeline']):
15 |     if pipeline['type'] == 'Normalize':
16 |         data_cscapesvps_motion['train']['pipeline'][idx]['mean'] = img_norm_cfg['mean']
17 |         data_cscapesvps_motion['train']['pipeline'][idx]['std'] = img_norm_cfg['std']
18 |         data_cscapesvps_motion['train']['pipeline'][idx]['to_rgb'] = img_norm_cfg['to_rgb']
19 | 
20 | data = dict(
21 |     imgs_per_gpu=1,
22 |     workers_per_gpu=0,
23 |     train=[data_kittimots_motion['train'], data_cscapesvps_motion['train']],
24 |     val=data_kittimots_motion['val'],
25 |     test=data_kittimots_motion['test'],)
26 | 


--------------------------------------------------------------------------------
/configs/misc/visualise_mod_cscapesvps.py:
--------------------------------------------------------------------------------
 1 | dataset_type = 'CityscapesVPSDataset'
 2 | data_root = 'data/cityscapes_vps/'
 3 | img_norm_cfg = dict(
 4 |     mean=[0.0, 0.0, 0.0], std=[1.0, 1.0, 1.0], to_rgb=True)
 5 | train_pipeline = [
 6 |     dict(type='LoadRefImageFromFile'),
 7 |     dict(type='LoadAnnotations', with_bbox=True, with_mask=True,
 8 |         with_seg=True, with_pid=True,
 9 |         # Cityscapes specific class mapping
10 |         semantic2label={0:0, 1:1, 2:2, 3:3, 4:4, 5:5, 6:6, 7:7, 8:8, 9:9,
11 |                         10:10, 11:11, 12:12, 13:13, 14:14, 15:15, 16:16,
12 |                         17:17, 18:18, -1:255, 255:255},),
13 |     dict(type='Resize', img_scale=[(2048, 1024)], keep_ratio=True),
14 |     dict(type='RandomFlip'),
15 |     dict(type='Normalize', **img_norm_cfg),
16 |     dict(type='SegResizeFlipCropPadRescale', scale_factor=[1, 0.25]),
17 |     dict(type='DefaultFormatBundle'),
18 |     dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels',
19 |             'gt_obj_ids', 'gt_masks', 'gt_semantic_seg',
20 |             'gt_semantic_seg_Nx', 'ref_img', 'ref_bboxes',
21 |             'ref_labels', 'ref_obj_ids', 'ref_masks']),
22 | ]
23 | test_pipeline = [
24 |     dict(type='LoadRefImageFromFile'),
25 | 
26 |     dict(
27 |         type='MultiScaleFlipAug',
28 |         img_scale=[(2048, 1024)],
29 |         flip=False,
30 |         transforms=[
31 |             dict(type='Resize', keep_ratio=True),
32 |             dict(type='RandomFlip'),
33 |             dict(type='Normalize', **img_norm_cfg),
34 |             dict(type='Pad', size_divisor=32),
35 |             dict(type='ImageToTensor', keys=['img', 'ref_img']),
36 |             dict(type='Collect', keys=['img', 'ref_img']),
37 |         ])
38 | ]
39 | data = dict(
40 |     imgs_per_gpu=1,
41 |     workers_per_gpu=0,
42 |     train=dict(
43 |             type=dataset_type,
44 |             ann_file=data_root +
45 |             'instances_train_city_vps_rle.json',
46 |             img_prefix=data_root + 'train/img/',
47 |             ref_prefix=data_root + 'train/img/',
48 |             seg_prefix=data_root + 'train/labelmap/',
49 |             pipeline=train_pipeline,
50 |             ref_ann_file=data_root +
51 |             'instances_train_city_vps_rle.json',
52 |             offsets=[-1,+1]),
53 |     val=dict(
54 |         type=dataset_type,
55 |         ann_file=data_root +
56 |         'instances_val_city_vps_rle.json',
57 |         img_prefix=data_root + 'val/img/',
58 |         pipeline=test_pipeline),
59 |     test=dict(
60 |         type=dataset_type,
61 |         ann_file=data_root +
62 |         #'im_all_info_val_city_vps.json',
63 |         'instances_val_city_vps_rle.json',
64 |         #img_prefix=data_root + 'val/img_all/',
65 |         img_prefix=data_root + 'val/img/',
66 |         ref_prefix=data_root + 'val/img/',
67 |         seg_prefix=data_root + 'val/labelmap/',
68 |         #nframes_span_test=30,
69 |         nframes_span_test=6,
70 |         pipeline=test_pipeline))
71 | 


--------------------------------------------------------------------------------
/configs/misc/visualise_mod_kittimots.py:
--------------------------------------------------------------------------------
 1 | dataset_type = 'MotionDataset'
 2 | data_root = 'data/kittimots_moseg/'
 3 | img_norm_cfg = dict(
 4 |     mean=[0.0, 0.0, 0.0], std=[1.0, 1.0, 1.0], to_rgb=True)
 5 | train_pipeline = [
 6 |     dict(type='LoadImageFromFile'),
 7 |     dict(type='LoadFlowFromFile'),
 8 |     dict(type='LoadAnnotations', with_bbox=True, with_mask=True,
 9 |         with_seg=True, with_pid=True,
10 |         # Cityscapes specific class mapping
11 |         semantic2label={0:0, 1:1, 2:2, 3:3, 4:4, 5:5, 6:6, 7:7, 8:8, 9:9,
12 |                         10:10, 11:11, 12:12, 13:13, 14:14, 15:15, 16:16,
13 |                         17:17, 18:18, -1:255, 255:255},),
14 |     dict(type='Resize', img_scale=[(1242, 375)], keep_ratio=True),
15 |     dict(type='RandomFlip'),
16 |     dict(type='Normalize', **img_norm_cfg),
17 |     dict(type='SegResizeFlipCropPadRescale', scale_factor=[1, 0.25]),
18 |     dict(type='DefaultFormatBundle'),
19 |     dict(type='Collect', keys=['img', 'flow', 'gt_bboxes', 'gt_labels', 'gt_masks']),
20 | ]
21 | test_pipeline = [
22 |     dict(type='LoadRefImageFromFile'),
23 | 
24 |     dict(
25 |         type='MultiScaleFlipAug',
26 |         img_scale=[(1242, 375)],
27 |         flip=False,
28 |         transforms=[
29 |             dict(type='Resize', keep_ratio=True),
30 |             dict(type='RandomFlip'),
31 |             dict(type='Normalize', **img_norm_cfg),
32 |             dict(type='Pad', size_divisor=32),
33 |             dict(type='ImageToTensor', keys=['img', 'ref_img']),
34 |             dict(type='Collect', keys=['img', 'ref_img']),
35 |         ])
36 | ]
37 | data = dict(
38 |     imgs_per_gpu=2,
39 |     workers_per_gpu=0,
40 |     train=dict(
41 |         type=dataset_type,
42 |         ann_file=data_root + 'annotations/KITTIMOTS_MOSeg_train_3classes_Annotations.json',
43 |         img_prefix=data_root + 'JPEGImages_480/',
44 |         flow_prefix=data_root + 'OpticalFlow_480/',
45 |         pipeline=train_pipeline),
46 |     val=dict(
47 |         type=dataset_type,
48 |         ann_file=data_root + 'annotations/KITTIMOTS_MOSeg_val_3classes_Annotations.json',
49 |         img_prefix=data_root + 'JPEGImages_480/',
50 |         flow_prefix=data_root + 'OpticalFlow_480/',
51 |         pipeline=test_pipeline),
52 |     test=dict(
53 |         type=dataset_type,
54 |         ann_file=data_root + 'annotations/KITTIMOTS_MOSeg_val_3classes_Annotations.json',
55 |         img_prefix=data_root + 'JPEGImages_480/',
56 |         flow_prefix=data_root + 'OpticalFlow_480/',
57 |         pipeline=test_pipeline))
58 | 


--------------------------------------------------------------------------------
/configs/models/backbone_1stream.py:
--------------------------------------------------------------------------------
1 | backbone=dict(
2 |     type='ResNet',
3 |     depth=50,
4 |     num_stages=4,
5 |     out_indices=(0, 1, 2, 3), # C2, C3, C4, C5
6 |     frozen_stages=1,
7 |     style='pytorch')
8 | 
9 | 


--------------------------------------------------------------------------------
/configs/models/backbone_2stream.py:
--------------------------------------------------------------------------------
1 | backbone=dict(
2 |     type='TwoStreamResNet',
3 |     depth=50,
4 |     num_stages=4,
5 |     out_indices=(0, 1, 2, 3), # C2, C3, C4, C5
6 |     frozen_stages=1,
7 |     style='pytorch')
8 | 


--------------------------------------------------------------------------------
/configs/models/backbone_2stream_tfstyle.py:
--------------------------------------------------------------------------------
 1 | backbone=dict(
 2 |     type='TwoStreamResNetTFStyle',
 3 |     layers=[3, 4, 6, 3],
 4 |     width_multiplier=1,
 5 |     sk_ratio=0,
 6 |     out_indices=(0, 1, 2, 3), # C2, C3, C4, C5
 7 |     frozen_stages=3
 8 |     )
 9 | 
10 | def set_frozen_stages(frozen_stages=1):
11 |     backbone['frozen_stages'] = frozen_stages
12 |     return backbone
13 | 


--------------------------------------------------------------------------------
/configs/models/bbox_head.py:
--------------------------------------------------------------------------------
 1 | bbox_head=dict(
 2 |     type='DecoupledSOLOHead',
 3 |     num_classes=9,
 4 |     in_channels=256,
 5 |     stacked_convs=7,
 6 |     seg_feat_channels=256,
 7 |     strides=[8, 8, 16, 32, 32],
 8 |     scale_ranges=((1, 96), (48, 192), (96, 384), (192, 768), (384, 2048)),
 9 |     sigma=0.2,
10 |     num_grids=[80, 72, 64, 48, 32],
11 |     cate_down_pos=0,
12 |     with_deform=False,
13 |     loss_ins=dict(
14 |         type='DiceLoss',
15 |         use_sigmoid=True,
16 |         loss_weight=3.0),
17 |     loss_cate=dict(
18 |         type='FocalLoss',
19 |         use_sigmoid=True,
20 |         gamma=2.0,
21 |         alpha=0.25,
22 |         loss_weight=1.0),
23 | )
24 | 
25 | def set_num_classes(num_classes):
26 |     bbox_head['num_classes'] = num_classes
27 |     return bbox_head
28 | 


--------------------------------------------------------------------------------
/configs/models/ca_appearance_mahalanobis_head.py:
--------------------------------------------------------------------------------
 1 | ca_head=dict(type='MahalanobisAppearanceBasedClassAgnosticHead',
 2 |     n_convs=4,
 3 |     clustering_type='dbscan',
 4 |     norm_cfg = dict(type='GN', num_groups=32, requires_grad=True),
 5 |     num_classes=19,
 6 |     interm_channels=256,
 7 |     merge_fpn=True
 8 | )
 9 | 
10 | def set_params(num_classes, ca_label, merge_fpn=True, merge_average=True):
11 |     ca_head['num_classes'] = num_classes
12 |     ca_head['ca_label'] = ca_label
13 |     ca_head['merge_fpn'] = merge_fpn
14 |     ca_head['merge_average'] = merge_average
15 |     return ca_head
16 | 


--------------------------------------------------------------------------------
/configs/models/ca_appearance_map.py:
--------------------------------------------------------------------------------
1 | ca_head=dict(type='MAPClassAgnosticHead',
2 |              num_classes=11)
3 | 


--------------------------------------------------------------------------------
/configs/models/ca_motion_head.py:
--------------------------------------------------------------------------------
 1 | ca_head=dict(
 2 |     type='DecoupledSOLOHead',
 3 |     num_classes=3,
 4 |     in_channels=256,
 5 |     stacked_convs=7,
 6 |     seg_feat_channels=256,
 7 |     strides=[8, 8, 16, 32, 32],
 8 |     scale_ranges=((1, 96), (48, 192), (96, 384), (192, 768), (384, 2048)),
 9 |     sigma=0.2,
10 |     num_grids=[80, 72, 64, 48, 32],
11 |     cate_down_pos=0,
12 |     with_deform=False,
13 |     loss_ins=dict(
14 |         type='DiceLoss',
15 |         use_sigmoid=True,
16 |         loss_weight=3.0),
17 |     loss_cate=dict(
18 |         type='FocalLoss',
19 |         use_sigmoid=True,
20 |         gamma=2.0,
21 |         alpha=0.25,
22 |         loss_weight=1.0,
23 |         cate_loss_weight=[1.0, 1.0]
24 |         )
25 | )
26 | 
27 | def set_params(num_classes, loss_weights=[1.0, 1.0]):
28 |     ca_head['num_classes'] = num_classes
29 |     ca_head['loss_cate']['cate_loss_weight'] = loss_weights
30 |     return ca_head
31 | 


--------------------------------------------------------------------------------
/configs/models/neck.py:
--------------------------------------------------------------------------------
1 | neck=dict(
2 |     type='FPN',
3 |     in_channels=[256, 512, 1024, 2048],
4 |     out_channels=256,
5 |     start_level=0,
6 |     num_outs=5)
7 | 


--------------------------------------------------------------------------------
/configs/models/panoptic_head.py:
--------------------------------------------------------------------------------
 1 | panoptic_head=dict(type='SimpleSegHead',
 2 |     num_classes=19,
 3 |     in_channels=256,
 4 |     seg_feats_channel=256,
 5 |     stacked_convs=5,
 6 |     original_image_size=(1600, 800))
 7 | 
 8 | def set_params(num_classes=19):
 9 |     panoptic_head['num_classes'] = num_classes
10 |     return panoptic_head
11 | 


--------------------------------------------------------------------------------
/images/VCA_Teaser.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MSiam/video_class_agnostic_segmentation/59fd84485e87c0f5895110240837b76325dde657/images/VCA_Teaser.png


--------------------------------------------------------------------------------
/mmdet/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MSiam/video_class_agnostic_segmentation/59fd84485e87c0f5895110240837b76325dde657/mmdet/__init__.py


--------------------------------------------------------------------------------
/mmdet/apis/__init__.py:
--------------------------------------------------------------------------------
 1 | from .inference import (async_inference_detector, inference_detector,
 2 |                         init_detector, show_result, show_result_pyplot, show_result_ins)
 3 | from .train import get_root_logger, set_random_seed, train_detector
 4 | 
 5 | __all__ = [
 6 |     'get_root_logger', 'set_random_seed', 'train_detector', 'init_detector',
 7 |     'async_inference_detector', 'inference_detector', 'show_result',
 8 |     'show_result_pyplot', 'show_result_ins'
 9 | ]
10 | 


--------------------------------------------------------------------------------
/mmdet/core/__init__.py:
--------------------------------------------------------------------------------
1 | from .anchor import *  # noqa: F401, F403
2 | from .bbox import *  # noqa: F401, F403
3 | from .evaluation import *  # noqa: F401, F403
4 | from .fp16 import *  # noqa: F401, F403
5 | from .mask import *  # noqa: F401, F403
6 | from .post_processing import *  # noqa: F401, F403
7 | from .utils import *  # noqa: F401, F403
8 | 


--------------------------------------------------------------------------------
/mmdet/core/anchor/__init__.py:
--------------------------------------------------------------------------------
 1 | from .anchor_generator import AnchorGenerator
 2 | from .anchor_target import (anchor_inside_flags, anchor_target,
 3 |                             images_to_levels, unmap)
 4 | from .guided_anchor_target import ga_loc_target, ga_shape_target
 5 | from .point_generator import PointGenerator
 6 | from .point_target import point_target
 7 | 
 8 | __all__ = [
 9 |     'AnchorGenerator', 'anchor_target', 'anchor_inside_flags', 'ga_loc_target',
10 |     'ga_shape_target', 'PointGenerator', 'point_target', 'images_to_levels',
11 |     'unmap'
12 | ]
13 | 


--------------------------------------------------------------------------------
/mmdet/core/anchor/point_generator.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | class PointGenerator(object):
 5 | 
 6 |     def _meshgrid(self, x, y, row_major=True):
 7 |         xx = x.repeat(len(y))
 8 |         yy = y.view(-1, 1).repeat(1, len(x)).view(-1)
 9 |         if row_major:
10 |             return xx, yy
11 |         else:
12 |             return yy, xx
13 | 
14 |     def grid_points(self, featmap_size, stride=16, device='cuda'):
15 |         feat_h, feat_w = featmap_size
16 |         shift_x = torch.arange(0., feat_w, device=device) * stride
17 |         shift_y = torch.arange(0., feat_h, device=device) * stride
18 |         shift_xx, shift_yy = self._meshgrid(shift_x, shift_y)
19 |         stride = shift_x.new_full((shift_xx.shape[0], ), stride)
20 |         shifts = torch.stack([shift_xx, shift_yy, stride], dim=-1)
21 |         all_points = shifts.to(device)
22 |         return all_points
23 | 
24 |     def valid_flags(self, featmap_size, valid_size, device='cuda'):
25 |         feat_h, feat_w = featmap_size
26 |         valid_h, valid_w = valid_size
27 |         assert valid_h <= feat_h and valid_w <= feat_w
28 |         valid_x = torch.zeros(feat_w, dtype=torch.uint8, device=device)
29 |         valid_y = torch.zeros(feat_h, dtype=torch.uint8, device=device)
30 |         valid_x[:valid_w] = 1
31 |         valid_y[:valid_h] = 1
32 |         valid_xx, valid_yy = self._meshgrid(valid_x, valid_y)
33 |         valid = valid_xx & valid_yy
34 |         return valid
35 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/__init__.py:
--------------------------------------------------------------------------------
 1 | from .assigners import AssignResult, BaseAssigner, MaxIoUAssigner
 2 | from .bbox_target import bbox_target
 3 | from .geometry import bbox_overlaps
 4 | from .samplers import (BaseSampler, CombinedSampler,
 5 |                        InstanceBalancedPosSampler, IoUBalancedNegSampler,
 6 |                        PseudoSampler, RandomSampler, SamplingResult)
 7 | from .transforms import (bbox2delta, bbox2result, bbox2roi, bbox_flip,
 8 |                          bbox_mapping, bbox_mapping_back, delta2bbox,
 9 |                          distance2bbox, roi2bbox)
10 | 
11 | from .assign_sampling import (  # isort:skip, avoid recursive imports
12 |     assign_and_sample, build_assigner, build_sampler)
13 | 
14 | __all__ = [
15 |     'bbox_overlaps', 'BaseAssigner', 'MaxIoUAssigner', 'AssignResult',
16 |     'BaseSampler', 'PseudoSampler', 'RandomSampler',
17 |     'InstanceBalancedPosSampler', 'IoUBalancedNegSampler', 'CombinedSampler',
18 |     'SamplingResult', 'build_assigner', 'build_sampler', 'assign_and_sample',
19 |     'bbox2delta', 'delta2bbox', 'bbox_flip', 'bbox_mapping',
20 |     'bbox_mapping_back', 'bbox2roi', 'roi2bbox', 'bbox2result',
21 |     'distance2bbox', 'bbox_target'
22 | ]
23 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/assign_sampling.py:
--------------------------------------------------------------------------------
 1 | import mmcv
 2 | 
 3 | from . import assigners, samplers
 4 | 
 5 | 
 6 | def build_assigner(cfg, **kwargs):
 7 |     if isinstance(cfg, assigners.BaseAssigner):
 8 |         return cfg
 9 |     elif isinstance(cfg, dict):
10 |         return mmcv.runner.obj_from_dict(cfg, assigners, default_args=kwargs)
11 |     else:
12 |         raise TypeError('Invalid type {} for building a sampler'.format(
13 |             type(cfg)))
14 | 
15 | 
16 | def build_sampler(cfg, **kwargs):
17 |     if isinstance(cfg, samplers.BaseSampler):
18 |         return cfg
19 |     elif isinstance(cfg, dict):
20 |         return mmcv.runner.obj_from_dict(cfg, samplers, default_args=kwargs)
21 |     else:
22 |         raise TypeError('Invalid type {} for building a sampler'.format(
23 |             type(cfg)))
24 | 
25 | 
26 | def assign_and_sample(bboxes, gt_bboxes, gt_bboxes_ignore, gt_labels, cfg):
27 |     bbox_assigner = build_assigner(cfg.assigner)
28 |     bbox_sampler = build_sampler(cfg.sampler)
29 |     assign_result = bbox_assigner.assign(bboxes, gt_bboxes, gt_bboxes_ignore,
30 |                                          gt_labels)
31 |     sampling_result = bbox_sampler.sample(assign_result, bboxes, gt_bboxes,
32 |                                           gt_labels)
33 |     return assign_result, sampling_result
34 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/assigners/__init__.py:
--------------------------------------------------------------------------------
 1 | from .approx_max_iou_assigner import ApproxMaxIoUAssigner
 2 | from .assign_result import AssignResult
 3 | from .atss_assigner import ATSSAssigner
 4 | from .base_assigner import BaseAssigner
 5 | from .max_iou_assigner import MaxIoUAssigner
 6 | from .point_assigner import PointAssigner
 7 | 
 8 | __all__ = [
 9 |     'BaseAssigner', 'MaxIoUAssigner', 'ApproxMaxIoUAssigner', 'AssignResult',
10 |     'PointAssigner', 'ATSSAssigner'
11 | ]
12 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/assigners/base_assigner.py:
--------------------------------------------------------------------------------
1 | from abc import ABCMeta, abstractmethod
2 | 
3 | 
4 | class BaseAssigner(metaclass=ABCMeta):
5 | 
6 |     @abstractmethod
7 |     def assign(self, bboxes, gt_bboxes, gt_bboxes_ignore=None, gt_labels=None):
8 |         pass
9 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/bbox_target.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from ..utils import multi_apply
 4 | from .transforms import bbox2delta
 5 | 
 6 | 
 7 | def bbox_target(pos_bboxes_list,
 8 |                 neg_bboxes_list,
 9 |                 pos_gt_bboxes_list,
10 |                 pos_gt_labels_list,
11 |                 cfg,
12 |                 reg_classes=1,
13 |                 target_means=[.0, .0, .0, .0],
14 |                 target_stds=[1.0, 1.0, 1.0, 1.0],
15 |                 concat=True):
16 |     labels, label_weights, bbox_targets, bbox_weights = multi_apply(
17 |         bbox_target_single,
18 |         pos_bboxes_list,
19 |         neg_bboxes_list,
20 |         pos_gt_bboxes_list,
21 |         pos_gt_labels_list,
22 |         cfg=cfg,
23 |         reg_classes=reg_classes,
24 |         target_means=target_means,
25 |         target_stds=target_stds)
26 | 
27 |     if concat:
28 |         labels = torch.cat(labels, 0)
29 |         label_weights = torch.cat(label_weights, 0)
30 |         bbox_targets = torch.cat(bbox_targets, 0)
31 |         bbox_weights = torch.cat(bbox_weights, 0)
32 |     return labels, label_weights, bbox_targets, bbox_weights
33 | 
34 | 
35 | def bbox_target_single(pos_bboxes,
36 |                        neg_bboxes,
37 |                        pos_gt_bboxes,
38 |                        pos_gt_labels,
39 |                        cfg,
40 |                        reg_classes=1,
41 |                        target_means=[.0, .0, .0, .0],
42 |                        target_stds=[1.0, 1.0, 1.0, 1.0]):
43 |     num_pos = pos_bboxes.size(0)
44 |     num_neg = neg_bboxes.size(0)
45 |     num_samples = num_pos + num_neg
46 |     labels = pos_bboxes.new_zeros(num_samples, dtype=torch.long)
47 |     label_weights = pos_bboxes.new_zeros(num_samples)
48 |     bbox_targets = pos_bboxes.new_zeros(num_samples, 4)
49 |     bbox_weights = pos_bboxes.new_zeros(num_samples, 4)
50 |     if num_pos > 0:
51 |         labels[:num_pos] = pos_gt_labels
52 |         pos_weight = 1.0 if cfg.pos_weight <= 0 else cfg.pos_weight
53 |         label_weights[:num_pos] = pos_weight
54 |         pos_bbox_targets = bbox2delta(pos_bboxes, pos_gt_bboxes, target_means,
55 |                                       target_stds)
56 |         bbox_targets[:num_pos, :] = pos_bbox_targets
57 |         bbox_weights[:num_pos, :] = 1
58 |     if num_neg > 0:
59 |         label_weights[-num_neg:] = 1.0
60 | 
61 |     return labels, label_weights, bbox_targets, bbox_weights
62 | 
63 | 
64 | def expand_target(bbox_targets, bbox_weights, labels, num_classes):
65 |     bbox_targets_expand = bbox_targets.new_zeros(
66 |         (bbox_targets.size(0), 4 * num_classes))
67 |     bbox_weights_expand = bbox_weights.new_zeros(
68 |         (bbox_weights.size(0), 4 * num_classes))
69 |     for i in torch.nonzero(labels > 0).squeeze(-1):
70 |         start, end = labels[i] * 4, (labels[i] + 1) * 4
71 |         bbox_targets_expand[i, start:end] = bbox_targets[i, :]
72 |         bbox_weights_expand[i, start:end] = bbox_weights[i, :]
73 |     return bbox_targets_expand, bbox_weights_expand
74 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/demodata.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | 
 4 | 
 5 | def ensure_rng(rng=None):
 6 |     """
 7 |     Simple version of the ``kwarray.ensure_rng``
 8 | 
 9 |     Args:
10 |         rng (int | numpy.random.RandomState | None):
11 |             if None, then defaults to the global rng. Otherwise this can be an
12 |             integer or a RandomState class
13 |     Returns:
14 |         (numpy.random.RandomState) : rng -
15 |             a numpy random number generator
16 | 
17 |     References:
18 |         https://gitlab.kitware.com/computer-vision/kwarray/blob/master/kwarray/util_random.py#L270
19 |     """
20 | 
21 |     if rng is None:
22 |         rng = np.random.mtrand._rand
23 |     elif isinstance(rng, int):
24 |         rng = np.random.RandomState(rng)
25 |     else:
26 |         rng = rng
27 |     return rng
28 | 
29 | 
30 | def random_boxes(num=1, scale=1, rng=None):
31 |     """
32 |     Simple version of ``kwimage.Boxes.random``
33 | 
34 |     Returns:
35 |         Tensor: shape (n, 4) in x1, y1, x2, y2 format.
36 | 
37 |     References:
38 |         https://gitlab.kitware.com/computer-vision/kwimage/blob/master/kwimage/structs/boxes.py#L1390
39 | 
40 |     Example:
41 |         >>> num = 3
42 |         >>> scale = 512
43 |         >>> rng = 0
44 |         >>> boxes = random_boxes(num, scale, rng)
45 |         >>> print(boxes)
46 |         tensor([[280.9925, 278.9802, 308.6148, 366.1769],
47 |                 [216.9113, 330.6978, 224.0446, 456.5878],
48 |                 [405.3632, 196.3221, 493.3953, 270.7942]])
49 |     """
50 |     rng = ensure_rng(rng)
51 | 
52 |     tlbr = rng.rand(num, 4).astype(np.float32)
53 | 
54 |     tl_x = np.minimum(tlbr[:, 0], tlbr[:, 2])
55 |     tl_y = np.minimum(tlbr[:, 1], tlbr[:, 3])
56 |     br_x = np.maximum(tlbr[:, 0], tlbr[:, 2])
57 |     br_y = np.maximum(tlbr[:, 1], tlbr[:, 3])
58 | 
59 |     tlbr[:, 0] = tl_x * scale
60 |     tlbr[:, 1] = tl_y * scale
61 |     tlbr[:, 2] = br_x * scale
62 |     tlbr[:, 3] = br_y * scale
63 | 
64 |     boxes = torch.from_numpy(tlbr)
65 |     return boxes
66 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/geometry.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | def bbox_overlaps(bboxes1, bboxes2, mode='iou', is_aligned=False):
 5 |     """Calculate overlap between two set of bboxes.
 6 | 
 7 |     If ``is_aligned`` is ``False``, then calculate the ious between each bbox
 8 |     of bboxes1 and bboxes2, otherwise the ious between each aligned pair of
 9 |     bboxes1 and bboxes2.
10 | 
11 |     Args:
12 |         bboxes1 (Tensor): shape (m, 4) in <x1, y1, x2, y2> format.
13 |         bboxes2 (Tensor): shape (n, 4) in <x1, y1, x2, y2> format.
14 |             If is_aligned is ``True``, then m and n must be equal.
15 |         mode (str): "iou" (intersection over union) or iof (intersection over
16 |             foreground).
17 | 
18 |     Returns:
19 |         ious(Tensor): shape (m, n) if is_aligned == False else shape (m, 1)
20 | 
21 |     Example:
22 |         >>> bboxes1 = torch.FloatTensor([
23 |         >>>     [0, 0, 10, 10],
24 |         >>>     [10, 10, 20, 20],
25 |         >>>     [32, 32, 38, 42],
26 |         >>> ])
27 |         >>> bboxes2 = torch.FloatTensor([
28 |         >>>     [0, 0, 10, 20],
29 |         >>>     [0, 10, 10, 19],
30 |         >>>     [10, 10, 20, 20],
31 |         >>> ])
32 |         >>> bbox_overlaps(bboxes1, bboxes2)
33 |         tensor([[0.5238, 0.0500, 0.0041],
34 |                 [0.0323, 0.0452, 1.0000],
35 |                 [0.0000, 0.0000, 0.0000]])
36 | 
37 |     Example:
38 |         >>> empty = torch.FloatTensor([])
39 |         >>> nonempty = torch.FloatTensor([
40 |         >>>     [0, 0, 10, 9],
41 |         >>> ])
42 |         >>> assert tuple(bbox_overlaps(empty, nonempty).shape) == (0, 1)
43 |         >>> assert tuple(bbox_overlaps(nonempty, empty).shape) == (1, 0)
44 |         >>> assert tuple(bbox_overlaps(empty, empty).shape) == (0, 0)
45 |     """
46 | 
47 |     assert mode in ['iou', 'iof']
48 | 
49 |     rows = bboxes1.size(0)
50 |     cols = bboxes2.size(0)
51 |     if is_aligned:
52 |         assert rows == cols
53 | 
54 |     if rows * cols == 0:
55 |         return bboxes1.new(rows, 1) if is_aligned else bboxes1.new(rows, cols)
56 | 
57 |     if is_aligned:
58 |         lt = torch.max(bboxes1[:, :2], bboxes2[:, :2])  # [rows, 2]
59 |         rb = torch.min(bboxes1[:, 2:], bboxes2[:, 2:])  # [rows, 2]
60 | 
61 |         wh = (rb - lt + 1).clamp(min=0)  # [rows, 2]
62 |         overlap = wh[:, 0] * wh[:, 1]
63 |         area1 = (bboxes1[:, 2] - bboxes1[:, 0] + 1) * (
64 |             bboxes1[:, 3] - bboxes1[:, 1] + 1)
65 | 
66 |         if mode == 'iou':
67 |             area2 = (bboxes2[:, 2] - bboxes2[:, 0] + 1) * (
68 |                 bboxes2[:, 3] - bboxes2[:, 1] + 1)
69 |             ious = overlap / (area1 + area2 - overlap)
70 |         else:
71 |             ious = overlap / area1
72 |     else:
73 |         lt = torch.max(bboxes1[:, None, :2], bboxes2[:, :2])  # [rows, cols, 2]
74 |         rb = torch.min(bboxes1[:, None, 2:], bboxes2[:, 2:])  # [rows, cols, 2]
75 | 
76 |         wh = (rb - lt + 1).clamp(min=0)  # [rows, cols, 2]
77 |         overlap = wh[:, :, 0] * wh[:, :, 1]
78 |         area1 = (bboxes1[:, 2] - bboxes1[:, 0] + 1) * (
79 |             bboxes1[:, 3] - bboxes1[:, 1] + 1)
80 | 
81 |         if mode == 'iou':
82 |             area2 = (bboxes2[:, 2] - bboxes2[:, 0] + 1) * (
83 |                 bboxes2[:, 3] - bboxes2[:, 1] + 1)
84 |             ious = overlap / (area1[:, None] + area2 - overlap)
85 |         else:
86 |             ious = overlap / (area1[:, None])
87 | 
88 |     return ious
89 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/samplers/__init__.py:
--------------------------------------------------------------------------------
 1 | from .base_sampler import BaseSampler
 2 | from .combined_sampler import CombinedSampler
 3 | from .instance_balanced_pos_sampler import InstanceBalancedPosSampler
 4 | from .iou_balanced_neg_sampler import IoUBalancedNegSampler
 5 | from .ohem_sampler import OHEMSampler
 6 | from .pseudo_sampler import PseudoSampler
 7 | from .random_sampler import RandomSampler
 8 | from .sampling_result import SamplingResult
 9 | 
10 | __all__ = [
11 |     'BaseSampler', 'PseudoSampler', 'RandomSampler',
12 |     'InstanceBalancedPosSampler', 'IoUBalancedNegSampler', 'CombinedSampler',
13 |     'OHEMSampler', 'SamplingResult'
14 | ]
15 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/samplers/combined_sampler.py:
--------------------------------------------------------------------------------
 1 | from ..assign_sampling import build_sampler
 2 | from .base_sampler import BaseSampler
 3 | 
 4 | 
 5 | class CombinedSampler(BaseSampler):
 6 | 
 7 |     def __init__(self, pos_sampler, neg_sampler, **kwargs):
 8 |         super(CombinedSampler, self).__init__(**kwargs)
 9 |         self.pos_sampler = build_sampler(pos_sampler, **kwargs)
10 |         self.neg_sampler = build_sampler(neg_sampler, **kwargs)
11 | 
12 |     def _sample_pos(self, **kwargs):
13 |         raise NotImplementedError
14 | 
15 |     def _sample_neg(self, **kwargs):
16 |         raise NotImplementedError
17 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/samplers/instance_balanced_pos_sampler.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | 
 4 | from .random_sampler import RandomSampler
 5 | 
 6 | 
 7 | class InstanceBalancedPosSampler(RandomSampler):
 8 | 
 9 |     def _sample_pos(self, assign_result, num_expected, **kwargs):
10 |         pos_inds = torch.nonzero(assign_result.gt_inds > 0)
11 |         if pos_inds.numel() != 0:
12 |             pos_inds = pos_inds.squeeze(1)
13 |         if pos_inds.numel() <= num_expected:
14 |             return pos_inds
15 |         else:
16 |             unique_gt_inds = assign_result.gt_inds[pos_inds].unique()
17 |             num_gts = len(unique_gt_inds)
18 |             num_per_gt = int(round(num_expected / float(num_gts)) + 1)
19 |             sampled_inds = []
20 |             for i in unique_gt_inds:
21 |                 inds = torch.nonzero(assign_result.gt_inds == i.item())
22 |                 if inds.numel() != 0:
23 |                     inds = inds.squeeze(1)
24 |                 else:
25 |                     continue
26 |                 if len(inds) > num_per_gt:
27 |                     inds = self.random_choice(inds, num_per_gt)
28 |                 sampled_inds.append(inds)
29 |             sampled_inds = torch.cat(sampled_inds)
30 |             if len(sampled_inds) < num_expected:
31 |                 num_extra = num_expected - len(sampled_inds)
32 |                 extra_inds = np.array(
33 |                     list(set(pos_inds.cpu()) - set(sampled_inds.cpu())))
34 |                 if len(extra_inds) > num_extra:
35 |                     extra_inds = self.random_choice(extra_inds, num_extra)
36 |                 extra_inds = torch.from_numpy(extra_inds).to(
37 |                     assign_result.gt_inds.device).long()
38 |                 sampled_inds = torch.cat([sampled_inds, extra_inds])
39 |             elif len(sampled_inds) > num_expected:
40 |                 sampled_inds = self.random_choice(sampled_inds, num_expected)
41 |             return sampled_inds
42 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/samplers/ohem_sampler.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from ..transforms import bbox2roi
 4 | from .base_sampler import BaseSampler
 5 | 
 6 | 
 7 | class OHEMSampler(BaseSampler):
 8 |     """
 9 |     Online Hard Example Mining Sampler described in [1]_.
10 | 
11 |     References:
12 |         .. [1] https://arxiv.org/pdf/1604.03540.pdf
13 |     """
14 | 
15 |     def __init__(self,
16 |                  num,
17 |                  pos_fraction,
18 |                  context,
19 |                  neg_pos_ub=-1,
20 |                  add_gt_as_proposals=True,
21 |                  **kwargs):
22 |         super(OHEMSampler, self).__init__(num, pos_fraction, neg_pos_ub,
23 |                                           add_gt_as_proposals)
24 |         if not hasattr(context, 'num_stages'):
25 |             self.bbox_roi_extractor = context.bbox_roi_extractor
26 |             self.bbox_head = context.bbox_head
27 |         else:
28 |             self.bbox_roi_extractor = context.bbox_roi_extractor[
29 |                 context.current_stage]
30 |             self.bbox_head = context.bbox_head[context.current_stage]
31 | 
32 |     def hard_mining(self, inds, num_expected, bboxes, labels, feats):
33 |         with torch.no_grad():
34 |             rois = bbox2roi([bboxes])
35 |             bbox_feats = self.bbox_roi_extractor(
36 |                 feats[:self.bbox_roi_extractor.num_inputs], rois)
37 |             cls_score, _ = self.bbox_head(bbox_feats)
38 |             loss = self.bbox_head.loss(
39 |                 cls_score=cls_score,
40 |                 bbox_pred=None,
41 |                 labels=labels,
42 |                 label_weights=cls_score.new_ones(cls_score.size(0)),
43 |                 bbox_targets=None,
44 |                 bbox_weights=None,
45 |                 reduction_override='none')['loss_cls']
46 |             _, topk_loss_inds = loss.topk(num_expected)
47 |         return inds[topk_loss_inds]
48 | 
49 |     def _sample_pos(self,
50 |                     assign_result,
51 |                     num_expected,
52 |                     bboxes=None,
53 |                     feats=None,
54 |                     **kwargs):
55 |         # Sample some hard positive samples
56 |         pos_inds = torch.nonzero(assign_result.gt_inds > 0)
57 |         if pos_inds.numel() != 0:
58 |             pos_inds = pos_inds.squeeze(1)
59 |         if pos_inds.numel() <= num_expected:
60 |             return pos_inds
61 |         else:
62 |             return self.hard_mining(pos_inds, num_expected, bboxes[pos_inds],
63 |                                     assign_result.labels[pos_inds], feats)
64 | 
65 |     def _sample_neg(self,
66 |                     assign_result,
67 |                     num_expected,
68 |                     bboxes=None,
69 |                     feats=None,
70 |                     **kwargs):
71 |         # Sample some hard negative samples
72 |         neg_inds = torch.nonzero(assign_result.gt_inds == 0)
73 |         if neg_inds.numel() != 0:
74 |             neg_inds = neg_inds.squeeze(1)
75 |         if len(neg_inds) <= num_expected:
76 |             return neg_inds
77 |         else:
78 |             return self.hard_mining(neg_inds, num_expected, bboxes[neg_inds],
79 |                                     assign_result.labels[neg_inds], feats)
80 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/samplers/pseudo_sampler.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from .base_sampler import BaseSampler
 4 | from .sampling_result import SamplingResult
 5 | 
 6 | 
 7 | class PseudoSampler(BaseSampler):
 8 | 
 9 |     def __init__(self, **kwargs):
10 |         pass
11 | 
12 |     def _sample_pos(self, **kwargs):
13 |         raise NotImplementedError
14 | 
15 |     def _sample_neg(self, **kwargs):
16 |         raise NotImplementedError
17 | 
18 |     def sample(self, assign_result, bboxes, gt_bboxes, **kwargs):
19 |         pos_inds = torch.nonzero(
20 |             assign_result.gt_inds > 0).squeeze(-1).unique()
21 |         neg_inds = torch.nonzero(
22 |             assign_result.gt_inds == 0).squeeze(-1).unique()
23 |         gt_flags = bboxes.new_zeros(bboxes.shape[0], dtype=torch.uint8)
24 |         sampling_result = SamplingResult(pos_inds, neg_inds, bboxes, gt_bboxes,
25 |                                          assign_result, gt_flags)
26 |         return sampling_result
27 | 


--------------------------------------------------------------------------------
/mmdet/core/bbox/samplers/random_sampler.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | 
 4 | from .base_sampler import BaseSampler
 5 | 
 6 | 
 7 | class RandomSampler(BaseSampler):
 8 | 
 9 |     def __init__(self,
10 |                  num,
11 |                  pos_fraction,
12 |                  neg_pos_ub=-1,
13 |                  add_gt_as_proposals=True,
14 |                  **kwargs):
15 |         from mmdet.core.bbox import demodata
16 |         super(RandomSampler, self).__init__(num, pos_fraction, neg_pos_ub,
17 |                                             add_gt_as_proposals)
18 |         self.rng = demodata.ensure_rng(kwargs.get('rng', None))
19 | 
20 |     def random_choice(self, gallery, num):
21 |         """Random select some elements from the gallery.
22 | 
23 |         It seems that Pytorch's implementation is slower than numpy so we use
24 |         numpy to randperm the indices.
25 |         """
26 |         assert len(gallery) >= num
27 |         if isinstance(gallery, list):
28 |             gallery = np.array(gallery)
29 |         cands = np.arange(len(gallery))
30 |         self.rng.shuffle(cands)
31 |         rand_inds = cands[:num]
32 |         if not isinstance(gallery, np.ndarray):
33 |             rand_inds = torch.from_numpy(rand_inds).long().to(gallery.device)
34 |         return gallery[rand_inds]
35 | 
36 |     def _sample_pos(self, assign_result, num_expected, **kwargs):
37 |         """Randomly sample some positive samples."""
38 |         pos_inds = torch.nonzero(assign_result.gt_inds > 0)
39 |         if pos_inds.numel() != 0:
40 |             pos_inds = pos_inds.squeeze(1)
41 |         if pos_inds.numel() <= num_expected:
42 |             return pos_inds
43 |         else:
44 |             return self.random_choice(pos_inds, num_expected)
45 | 
46 |     def _sample_neg(self, assign_result, num_expected, **kwargs):
47 |         """Randomly sample some negative samples."""
48 |         neg_inds = torch.nonzero(assign_result.gt_inds == 0)
49 |         if neg_inds.numel() != 0:
50 |             neg_inds = neg_inds.squeeze(1)
51 |         if len(neg_inds) <= num_expected:
52 |             return neg_inds
53 |         else:
54 |             return self.random_choice(neg_inds, num_expected)
55 | 


--------------------------------------------------------------------------------
/mmdet/core/evaluation/__init__.py:
--------------------------------------------------------------------------------
 1 | from .class_names import (coco_classes, dataset_aliases, get_classes,
 2 |                           imagenet_det_classes, imagenet_vid_classes,
 3 |                           voc_classes)
 4 | from .coco_utils import coco_eval, fast_eval_recall, results2json, results2json_segm
 5 | from .eval_hooks import (CocoDistEvalmAPHook, CocoDistEvalRecallHook,
 6 |                          DistEvalHook, DistEvalmAPHook)
 7 | from .mean_ap import average_precision, eval_map, print_map_summary
 8 | from .recall import (eval_recalls, plot_iou_recall, plot_num_recall,
 9 |                      print_recall_summary)
10 | 
11 | __all__ = [
12 |     'voc_classes', 'imagenet_det_classes', 'imagenet_vid_classes',
13 |     'coco_classes', 'dataset_aliases', 'get_classes', 'coco_eval',
14 |     'fast_eval_recall', 'results2json', 'DistEvalHook', 'DistEvalmAPHook',
15 |     'CocoDistEvalRecallHook', 'CocoDistEvalmAPHook', 'average_precision',
16 |     'eval_map', 'print_map_summary', 'eval_recalls', 'print_recall_summary',
17 |     'plot_num_recall', 'plot_iou_recall', 'results2json_segm'
18 | ]
19 | 


--------------------------------------------------------------------------------
/mmdet/core/evaluation/bbox_overlaps.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | def bbox_overlaps(bboxes1, bboxes2, mode='iou'):
 5 |     """Calculate the ious between each bbox of bboxes1 and bboxes2.
 6 | 
 7 |     Args:
 8 |         bboxes1(ndarray): shape (n, 4)
 9 |         bboxes2(ndarray): shape (k, 4)
10 |         mode(str): iou (intersection over union) or iof (intersection
11 |             over foreground)
12 | 
13 |     Returns:
14 |         ious(ndarray): shape (n, k)
15 |     """
16 | 
17 |     assert mode in ['iou', 'iof']
18 | 
19 |     bboxes1 = bboxes1.astype(np.float32)
20 |     bboxes2 = bboxes2.astype(np.float32)
21 |     rows = bboxes1.shape[0]
22 |     cols = bboxes2.shape[0]
23 |     ious = np.zeros((rows, cols), dtype=np.float32)
24 |     if rows * cols == 0:
25 |         return ious
26 |     exchange = False
27 |     if bboxes1.shape[0] > bboxes2.shape[0]:
28 |         bboxes1, bboxes2 = bboxes2, bboxes1
29 |         ious = np.zeros((cols, rows), dtype=np.float32)
30 |         exchange = True
31 |     area1 = (bboxes1[:, 2] - bboxes1[:, 0] + 1) * (
32 |         bboxes1[:, 3] - bboxes1[:, 1] + 1)
33 |     area2 = (bboxes2[:, 2] - bboxes2[:, 0] + 1) * (
34 |         bboxes2[:, 3] - bboxes2[:, 1] + 1)
35 |     for i in range(bboxes1.shape[0]):
36 |         x_start = np.maximum(bboxes1[i, 0], bboxes2[:, 0])
37 |         y_start = np.maximum(bboxes1[i, 1], bboxes2[:, 1])
38 |         x_end = np.minimum(bboxes1[i, 2], bboxes2[:, 2])
39 |         y_end = np.minimum(bboxes1[i, 3], bboxes2[:, 3])
40 |         overlap = np.maximum(x_end - x_start + 1, 0) * np.maximum(
41 |             y_end - y_start + 1, 0)
42 |         if mode == 'iou':
43 |             union = area1[i] + area2 - overlap
44 |         else:
45 |             union = area1[i] if not exchange else area2
46 |         ious[i, :] = overlap / union
47 |     if exchange:
48 |         ious = ious.T
49 |     return ious
50 | 


--------------------------------------------------------------------------------
/mmdet/core/fp16/__init__.py:
--------------------------------------------------------------------------------
1 | from .decorators import auto_fp16, force_fp32
2 | from .hooks import Fp16OptimizerHook, wrap_fp16_model
3 | 
4 | __all__ = ['auto_fp16', 'force_fp32', 'Fp16OptimizerHook', 'wrap_fp16_model']
5 | 


--------------------------------------------------------------------------------
/mmdet/core/fp16/utils.py:
--------------------------------------------------------------------------------
 1 | from collections import abc
 2 | 
 3 | import numpy as np
 4 | import torch
 5 | 
 6 | 
 7 | def cast_tensor_type(inputs, src_type, dst_type):
 8 |     if isinstance(inputs, torch.Tensor):
 9 |         return inputs.to(dst_type)
10 |     elif isinstance(inputs, str):
11 |         return inputs
12 |     elif isinstance(inputs, np.ndarray):
13 |         return inputs
14 |     elif isinstance(inputs, abc.Mapping):
15 |         return type(inputs)({
16 |             k: cast_tensor_type(v, src_type, dst_type)
17 |             for k, v in inputs.items()
18 |         })
19 |     elif isinstance(inputs, abc.Iterable):
20 |         return type(inputs)(
21 |             cast_tensor_type(item, src_type, dst_type) for item in inputs)
22 |     else:
23 |         return inputs
24 | 


--------------------------------------------------------------------------------
/mmdet/core/mask/__init__.py:
--------------------------------------------------------------------------------
1 | from .mask_target import mask_target
2 | from .utils import split_combined_polys
3 | 
4 | __all__ = ['split_combined_polys', 'mask_target']
5 | 


--------------------------------------------------------------------------------
/mmdet/core/mask/mask_target.py:
--------------------------------------------------------------------------------
 1 | import mmcv
 2 | import numpy as np
 3 | import torch
 4 | from torch.nn.modules.utils import _pair
 5 | 
 6 | 
 7 | def mask_target(pos_proposals_list, pos_assigned_gt_inds_list, gt_masks_list,
 8 |                 cfg):
 9 |     cfg_list = [cfg for _ in range(len(pos_proposals_list))]
10 |     mask_targets = map(mask_target_single, pos_proposals_list,
11 |                        pos_assigned_gt_inds_list, gt_masks_list, cfg_list)
12 |     mask_targets = torch.cat(list(mask_targets))
13 |     return mask_targets
14 | 
15 | 
16 | def mask_target_single(pos_proposals, pos_assigned_gt_inds, gt_masks, cfg):
17 |     mask_size = _pair(cfg.mask_size)
18 |     num_pos = pos_proposals.size(0)
19 |     mask_targets = []
20 |     if num_pos > 0:
21 |         proposals_np = pos_proposals.cpu().numpy()
22 |         _, maxh, maxw = gt_masks.shape
23 |         proposals_np[:, [0, 2]] = np.clip(proposals_np[:, [0, 2]], 0, maxw - 1)
24 |         proposals_np[:, [1, 3]] = np.clip(proposals_np[:, [1, 3]], 0, maxh - 1)
25 |         pos_assigned_gt_inds = pos_assigned_gt_inds.cpu().numpy()
26 |         for i in range(num_pos):
27 |             gt_mask = gt_masks[pos_assigned_gt_inds[i]]
28 |             bbox = proposals_np[i, :].astype(np.int32)
29 |             x1, y1, x2, y2 = bbox
30 |             w = np.maximum(x2 - x1 + 1, 1)
31 |             h = np.maximum(y2 - y1 + 1, 1)
32 |             # mask is uint8 both before and after resizing
33 |             # mask_size (h, w) to (w, h)
34 |             target = mmcv.imresize(gt_mask[y1:y1 + h, x1:x1 + w],
35 |                                    mask_size[::-1])
36 |             mask_targets.append(target)
37 |         mask_targets = torch.from_numpy(np.stack(mask_targets)).float().to(
38 |             pos_proposals.device)
39 |     else:
40 |         mask_targets = pos_proposals.new_zeros((0, ) + mask_size)
41 |     return mask_targets
42 | 


--------------------------------------------------------------------------------
/mmdet/core/mask/utils.py:
--------------------------------------------------------------------------------
 1 | import mmcv
 2 | 
 3 | 
 4 | def split_combined_polys(polys, poly_lens, polys_per_mask):
 5 |     """Split the combined 1-D polys into masks.
 6 | 
 7 |     A mask is represented as a list of polys, and a poly is represented as
 8 |     a 1-D array. In dataset, all masks are concatenated into a single 1-D
 9 |     tensor. Here we need to split the tensor into original representations.
10 | 
11 |     Args:
12 |         polys (list): a list (length = image num) of 1-D tensors
13 |         poly_lens (list): a list (length = image num) of poly length
14 |         polys_per_mask (list): a list (length = image num) of poly number
15 |             of each mask
16 | 
17 |     Returns:
18 |         list: a list (length = image num) of list (length = mask num) of
19 |             list (length = poly num) of numpy array
20 |     """
21 |     mask_polys_list = []
22 |     for img_id in range(len(polys)):
23 |         polys_single = polys[img_id]
24 |         polys_lens_single = poly_lens[img_id].tolist()
25 |         polys_per_mask_single = polys_per_mask[img_id].tolist()
26 | 
27 |         split_polys = mmcv.slice_list(polys_single, polys_lens_single)
28 |         mask_polys = mmcv.slice_list(split_polys, polys_per_mask_single)
29 |         mask_polys_list.append(mask_polys)
30 |     return mask_polys_list
31 | 


--------------------------------------------------------------------------------
/mmdet/core/post_processing/__init__.py:
--------------------------------------------------------------------------------
 1 | from .bbox_nms import multiclass_nms
 2 | from .matrix_nms import matrix_nms
 3 | from .merge_augs import (merge_aug_bboxes, merge_aug_masks,
 4 |                          merge_aug_proposals, merge_aug_scores)
 5 | 
 6 | __all__ = [
 7 |     'multiclass_nms', 'merge_aug_proposals', 'merge_aug_bboxes',
 8 |     'merge_aug_scores', 'merge_aug_masks', 'matrix_nms'
 9 | ]
10 | 


--------------------------------------------------------------------------------
/mmdet/core/post_processing/bbox_nms.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from mmdet.ops.nms import nms_wrapper
 4 | 
 5 | 
 6 | def multiclass_nms(multi_bboxes,
 7 |                    multi_scores,
 8 |                    score_thr,
 9 |                    nms_cfg,
10 |                    max_num=-1,
11 |                    score_factors=None):
12 |     """NMS for multi-class bboxes.
13 | 
14 |     Args:
15 |         multi_bboxes (Tensor): shape (n, #class*4) or (n, 4)
16 |         multi_scores (Tensor): shape (n, #class), where the 0th column
17 |             contains scores of the background class, but this will be ignored.
18 |         score_thr (float): bbox threshold, bboxes with scores lower than it
19 |             will not be considered.
20 |         nms_thr (float): NMS IoU threshold
21 |         max_num (int): if there are more than max_num bboxes after NMS,
22 |             only top max_num will be kept.
23 |         score_factors (Tensor): The factors multiplied to scores before
24 |             applying NMS
25 | 
26 |     Returns:
27 |         tuple: (bboxes, labels), tensors of shape (k, 5) and (k, 1). Labels
28 |             are 0-based.
29 |     """
30 |     num_classes = multi_scores.shape[1]
31 |     bboxes, labels = [], []
32 |     nms_cfg_ = nms_cfg.copy()
33 |     nms_type = nms_cfg_.pop('type', 'nms')
34 |     nms_op = getattr(nms_wrapper, nms_type)
35 |     for i in range(1, num_classes):
36 |         cls_inds = multi_scores[:, i] > score_thr
37 |         if not cls_inds.any():
38 |             continue
39 |         # get bboxes and scores of this class
40 |         if multi_bboxes.shape[1] == 4:
41 |             _bboxes = multi_bboxes[cls_inds, :]
42 |         else:
43 |             _bboxes = multi_bboxes[cls_inds, i * 4:(i + 1) * 4]
44 |         _scores = multi_scores[cls_inds, i]
45 |         if score_factors is not None:
46 |             _scores *= score_factors[cls_inds]
47 |         cls_dets = torch.cat([_bboxes, _scores[:, None]], dim=1)
48 |         cls_dets, _ = nms_op(cls_dets, **nms_cfg_)
49 |         cls_labels = multi_bboxes.new_full((cls_dets.shape[0], ),
50 |                                            i - 1,
51 |                                            dtype=torch.long)
52 |         bboxes.append(cls_dets)
53 |         labels.append(cls_labels)
54 |     if bboxes:
55 |         bboxes = torch.cat(bboxes)
56 |         labels = torch.cat(labels)
57 |         if bboxes.shape[0] > max_num:
58 |             _, inds = bboxes[:, -1].sort(descending=True)
59 |             inds = inds[:max_num]
60 |             bboxes = bboxes[inds]
61 |             labels = labels[inds]
62 |     else:
63 |         bboxes = multi_bboxes.new_zeros((0, 5))
64 |         labels = multi_bboxes.new_zeros((0, ), dtype=torch.long)
65 | 
66 |     return bboxes, labels
67 | 


--------------------------------------------------------------------------------
/mmdet/core/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | from .dist_utils import DistOptimizerHook, allreduce_grads
 2 | from .misc import multi_apply, tensor2imgs, unmap, \
 3 |                         partial_load, masked_avg_pool, \
 4 |                         freeze_model_partially, vis_seg, \
 5 |                         compute_mask_ious, compute_box_ious, \
 6 |                         convert_and_load_checkpoint, process_gt_masks, process_seg_masks, \
 7 |                         compute_gaussian, compute_ood_scores
 8 | from .colormap import get_color_map
 9 | 
10 | __all__ = [
11 |     'allreduce_grads', 'DistOptimizerHook', 'tensor2imgs', 'unmap',
12 |     'multi_apply', 'partial_load', 'masked_avg_pool', 'freeze_model_partially',
13 |     'vis_seg', 'compute_mask_ious', 'compute_box_ious', 'convert_and_load_checkpoint',
14 |     'get_color_map', 'process_gt_masks', 'process_seg_masks', 'compute_gaussian',
15 |     'compute_ood_scores'
16 | ]
17 | 


--------------------------------------------------------------------------------
/mmdet/core/utils/dist_utils.py:
--------------------------------------------------------------------------------
 1 | from collections import OrderedDict
 2 | 
 3 | import torch.distributed as dist
 4 | from mmcv.runner import OptimizerHook
 5 | from torch._utils import (_flatten_dense_tensors, _take_tensors,
 6 |                           _unflatten_dense_tensors)
 7 | 
 8 | 
 9 | def _allreduce_coalesced(tensors, world_size, bucket_size_mb=-1):
10 |     if bucket_size_mb > 0:
11 |         bucket_size_bytes = bucket_size_mb * 1024 * 1024
12 |         buckets = _take_tensors(tensors, bucket_size_bytes)
13 |     else:
14 |         buckets = OrderedDict()
15 |         for tensor in tensors:
16 |             tp = tensor.type()
17 |             if tp not in buckets:
18 |                 buckets[tp] = []
19 |             buckets[tp].append(tensor)
20 |         buckets = buckets.values()
21 | 
22 |     for bucket in buckets:
23 |         flat_tensors = _flatten_dense_tensors(bucket)
24 |         dist.all_reduce(flat_tensors)
25 |         flat_tensors.div_(world_size)
26 |         for tensor, synced in zip(
27 |                 bucket, _unflatten_dense_tensors(flat_tensors, bucket)):
28 |             tensor.copy_(synced)
29 | 
30 | 
31 | def allreduce_grads(params, coalesce=True, bucket_size_mb=-1):
32 |     grads = [
33 |         param.grad.data for param in params
34 |         if param.requires_grad and param.grad is not None
35 |     ]
36 |     world_size = dist.get_world_size()
37 |     if coalesce:
38 |         _allreduce_coalesced(grads, world_size, bucket_size_mb)
39 |     else:
40 |         for tensor in grads:
41 |             dist.all_reduce(tensor.div_(world_size))
42 | 
43 | 
44 | class DistOptimizerHook(OptimizerHook):
45 | 
46 |     def __init__(self, grad_clip=None, coalesce=True, bucket_size_mb=-1):
47 |         self.grad_clip = grad_clip
48 |         self.coalesce = coalesce
49 |         self.bucket_size_mb = bucket_size_mb
50 | 
51 |     def after_train_iter(self, runner):
52 |         runner.optimizer.zero_grad()
53 |         runner.outputs['loss'].backward()
54 |         allreduce_grads(runner.model.parameters(), self.coalesce,
55 |                         self.bucket_size_mb)
56 |         if self.grad_clip is not None:
57 |             self.clip_grads(runner.model.parameters())
58 |         runner.optimizer.step()
59 | 


--------------------------------------------------------------------------------
/mmdet/core/utils/map.py:
--------------------------------------------------------------------------------
 1 | import torch.nn.functional as F
 2 | 
 3 | def compute_map(mask, x):
 4 |     h, w = x.shape[-2:]
 5 |     if mask.ndim < 4:
 6 |         mask = mask.unsqueeze(1)
 7 | 
 8 |     masked_embedding = mask * x.unsqueeze(0)
 9 |     area = F.avg_pool2d(mask, x.shape[-2:]) * h * w + 0.0005
10 |     map_embedding = F.avg_pool2d(input=masked_embedding, kernel_size=x.shape[-2:]) * h * w / area
11 |     map_embedding = map_embedding.squeeze()
12 |     if map_embedding.ndim < 2:
13 |         map_embedding = map_embedding.unsqueeze(0)
14 |     return map_embedding
15 | 
16 | 


--------------------------------------------------------------------------------
/mmdet/core/utils/post_proc_utils.py:
--------------------------------------------------------------------------------
 1 | import torch.nn.functional as F
 2 | 
 3 | def post_process_seg_result(seg_result, img, train=False):
 4 |     score_thr = 0.3
 5 |     empty = True
 6 | 
 7 |     for i in range(len(seg_result)):
 8 |         if seg_result[i] is None or 'scores' not in seg_result[i]:  # Happens when only embeddings are there
 9 |             continue
10 |         vis_inds = seg_result[i]['scores'] > score_thr
11 |         seg_result[i]['masks'] = seg_result[i]['masks'][vis_inds]
12 |         seg_result[i]['labels'] = seg_result[i]['labels'][vis_inds]
13 |         seg_result[i]['scores'] = seg_result[i]['scores'][vis_inds]
14 | 
15 |         if seg_result[i]['masks'].shape[0] != 0:
16 |             empty = False
17 |             if train:
18 |                 seg_result[i]['masks'] = F.interpolate(seg_result[i]['masks'].unsqueeze(0).float(), \
19 |                     img[i].shape[-2:], mode='nearest').squeeze()
20 |                 if len(seg_result[i]['masks'].shape) < 3:
21 |                     seg_result[i]['masks'] = seg_result[i]['masks'].unsqueeze(0)
22 | 
23 |     return seg_result, empty
24 | 
25 | def process_bbox_outputs(outs, bbox_head, img_meta, rescale, pred_semantic_seg=None, cfg=None):
26 |     if 'eval_tensors' in outs:  # BBox or Ca Head is using decoupled SOLO
27 |         seg_inputs = outs['eval_tensors']
28 |         seg_inputs.update({'img_metas': img_meta, 'cfg': cfg, 'rescale': rescale})
29 |         seg_result = bbox_head.get_seg(**seg_inputs)
30 |     else:
31 |         seg_inputs = {'class_agnostic_embeddings': outs['class_agnostic_embeddings'],
32 |                       'merged_fpn_embeddings': outs['merged_fpn_embeddings'],
33 |                       'pred_semantic_seg': pred_semantic_seg}
34 |         seg_result = bbox_head.get_seg(**seg_inputs)
35 | 
36 |     extra_keys = ['class_agnostic_embeddings', 'merged_fpn_embeddings']
37 |     for key in extra_keys:
38 |         if key in outs:
39 |             seg_result[0][key] = outs[key]
40 |     return seg_result
41 | 


--------------------------------------------------------------------------------
/mmdet/datasets/__init__.py:
--------------------------------------------------------------------------------
 1 | from .builder import build_dataset
 2 | from .cityscapes import CityscapesDataset
 3 | from .coco import CocoDataset
 4 | from .custom import CustomDataset
 5 | from .dataset_wrappers import ConcatDataset, RepeatDataset
 6 | from .loader import DistributedGroupSampler, GroupSampler, build_dataloader
 7 | from .registry import DATASETS
 8 | from .voc import VOCDataset
 9 | from .wider_face import WIDERFaceDataset
10 | from .xml_style import XMLDataset
11 | from .kittimots import KITTIMOTSDataset
12 | from .motion_dataset import MotionDataset
13 | from .cityscapes_vps import CityscapesVPSDataset
14 | from .cityscapes_ps import CityscapesPanopticDataset
15 | from .cityscapes_vps_segonly import CityscapesVPSSegDataset
16 | 
17 | __all__ = [
18 |     'CustomDataset', 'XMLDataset', 'CocoDataset', 'VOCDataset', 'CityscapesVPSDataset',
19 |     'CityscapesDataset', 'GroupSampler', 'DistributedGroupSampler',
20 |     'build_dataloader', 'ConcatDataset', 'RepeatDataset', 'WIDERFaceDataset',
21 |     'DATASETS', 'build_dataset', 'KITTIMOTSDataset',
22 |     'MotionDataset', 'CityscapesPanopticDataset',
23 |     'CityscapesVPSSegDataset'
24 | ]
25 | 


--------------------------------------------------------------------------------
/mmdet/datasets/builder.py:
--------------------------------------------------------------------------------
 1 | import copy
 2 | 
 3 | from mmdet.utils import build_from_cfg
 4 | from .dataset_wrappers import ConcatDataset, RepeatDataset
 5 | from .registry import DATASETS
 6 | 
 7 | 
 8 | def _concat_dataset(cfg, default_args=None):
 9 |     ann_files = cfg['ann_file']
10 |     img_prefixes = cfg.get('img_prefix', None)
11 |     seg_prefixes = cfg.get('seg_prefix', None)
12 |     proposal_files = cfg.get('proposal_file', None)
13 | 
14 |     datasets = []
15 |     num_dset = len(ann_files)
16 |     for i in range(num_dset):
17 |         data_cfg = copy.deepcopy(cfg)
18 |         data_cfg['ann_file'] = ann_files[i]
19 |         if isinstance(img_prefixes, (list, tuple)):
20 |             data_cfg['img_prefix'] = img_prefixes[i]
21 |         if isinstance(seg_prefixes, (list, tuple)):
22 |             data_cfg['seg_prefix'] = seg_prefixes[i]
23 |         if isinstance(proposal_files, (list, tuple)):
24 |             data_cfg['proposal_file'] = proposal_files[i]
25 |         datasets.append(build_dataset(data_cfg, default_args))
26 | 
27 |     return ConcatDataset(datasets)
28 | 
29 | 
30 | def build_dataset(cfg, default_args=None):
31 |     if isinstance(cfg, (list, tuple)):
32 |         dataset = ConcatDataset([build_dataset(c, default_args) for c in cfg])
33 |     elif cfg['type'] == 'RepeatDataset':
34 |         dataset = RepeatDataset(
35 |             build_dataset(cfg['dataset'], default_args), cfg['times'])
36 |     elif isinstance(cfg['ann_file'], (list, tuple)):
37 |         dataset = _concat_dataset(cfg, default_args)
38 |     else:
39 |         dataset = build_from_cfg(cfg, DATASETS, default_args)
40 | 
41 |     return dataset
42 | 


--------------------------------------------------------------------------------
/mmdet/datasets/cityscapes.py:
--------------------------------------------------------------------------------
1 | from .coco import CocoDataset
2 | from .registry import DATASETS
3 | 
4 | 
5 | @DATASETS.register_module
6 | class CityscapesDataset(CocoDataset):
7 | 
8 |     CLASSES = ("person", "rider", "car", "truck", "bicycle", "motorcycle", "bus", "train")
9 | 


--------------------------------------------------------------------------------
/mmdet/datasets/cityscapes_ps.py:
--------------------------------------------------------------------------------
 1 | from .cityscapes import CityscapesDataset
 2 | from .registry import DATASETS
 3 | import os.path as osp
 4 | 
 5 | @DATASETS.register_module
 6 | class CityscapesPanopticDataset(CityscapesDataset):
 7 |     """
 8 |     Cityscapes/Carla Dataset loading semantic segmentation
 9 |     without Instance support
10 |     """
11 | 
12 |     def prepare_train_img(self, idx):
13 |         img_info = self.img_infos[idx]
14 |         ann_info = self.get_ann_info(idx)
15 |         results = dict(img_info=img_info, ann_info=ann_info)
16 | 
17 |         # Add reading of semantic segmentation labels
18 |         seg_filename = osp.join(
19 |                 self.seg_prefix,
20 |                 results['ann_info']['seg_map'].replace(
21 |                         'leftImg8bit', 'gtFine_labelTrainIds'))
22 |         results['ann_info']['seg_filename'] = seg_filename
23 | 
24 |         if self.proposals is not None:
25 |             results['proposals'] = self.proposals[idx]
26 |         self.pre_pipeline(results)
27 | 
28 |         results = self.pipeline(results)
29 | 
30 |         if results is not None and 'gt_labels' not in results:
31 |             results['gt_labels'] = []
32 |             results['gt_bboxes'] = []
33 |         return results
34 | 


--------------------------------------------------------------------------------
/mmdet/datasets/dataset_wrappers.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from torch.utils.data.dataset import ConcatDataset as _ConcatDataset
 3 | 
 4 | from .registry import DATASETS
 5 | import bisect
 6 | 
 7 | 
 8 | @DATASETS.register_module
 9 | class ConcatDataset(_ConcatDataset):
10 |     """A wrapper of concatenated dataset.
11 | 
12 |     Same as :obj:`torch.utils.data.dataset.ConcatDataset`, but
13 |     concat the group flag for image aspect ratio.
14 | 
15 |     Args:
16 |         datasets (list[:obj:`Dataset`]): A list of datasets.
17 |     """
18 | 
19 |     def __init__(self, datasets):
20 |         super(ConcatDataset, self).__init__(datasets)
21 |         self.CLASSES = datasets[0].CLASSES
22 |         if hasattr(datasets[0], 'flag'):
23 |             flags = []
24 |             for i in range(0, len(datasets)):
25 |                 flags.append(datasets[i].flag)
26 |             self.flag = np.concatenate(flags)
27 |         self.get_ann_info = None  # Callable
28 | 
29 |     def __getitem__(self, idx):
30 |         if idx < 0:
31 |             if -idx > len(self):
32 |                 raise ValueError("absolute value of index should not exceed dataset length")
33 |             idx = len(self) + idx
34 |         dataset_idx = bisect.bisect_right(self.cumulative_sizes, idx)
35 |         if dataset_idx == 0:
36 |             sample_idx = idx
37 |         else:
38 |             sample_idx = idx - self.cumulative_sizes[dataset_idx - 1]
39 | 
40 |         self.get_ann_info = self.datasets[dataset_idx].get_ann_info
41 |         return self.datasets[dataset_idx][sample_idx]
42 | 
43 | @DATASETS.register_module
44 | class RepeatDataset(object):
45 |     """A wrapper of repeated dataset.
46 | 
47 |     The length of repeated dataset will be `times` larger than the original
48 |     dataset. This is useful when the data loading time is long but the dataset
49 |     is small. Using RepeatDataset can reduce the data loading time between
50 |     epochs.
51 | 
52 |     Args:
53 |         dataset (:obj:`Dataset`): The dataset to be repeated.
54 |         times (int): Repeat times.
55 |     """
56 | 
57 |     def __init__(self, dataset, times):
58 |         self.dataset = dataset
59 |         self.times = times
60 |         self.CLASSES = dataset.CLASSES
61 |         if hasattr(self.dataset, 'flag'):
62 |             self.flag = np.tile(self.dataset.flag, times)
63 | 
64 |         self._ori_len = len(self.dataset)
65 |         self.get_ann_info = self.dataset.get_ann_info
66 | 
67 |     def __getitem__(self, idx):
68 |         return self.dataset[idx % self._ori_len]
69 | 
70 |     def __len__(self):
71 |         return self.times * self._ori_len
72 | 


--------------------------------------------------------------------------------
/mmdet/datasets/kittimots.py:
--------------------------------------------------------------------------------
 1 | from .coco import CocoDataset
 2 | from .registry import DATASETS
 3 | 
 4 | 
 5 | @DATASETS.register_module
 6 | class KITTIMOTSDataset(CocoDataset):
 7 |     """
 8 |     KITTI Dataset for Instance Segmentation
 9 |     """
10 |     CLASSES = ("car", "person")
11 | 


--------------------------------------------------------------------------------
/mmdet/datasets/loader/__init__.py:
--------------------------------------------------------------------------------
1 | from .build_loader import build_dataloader
2 | from .sampler import DistributedGroupSampler, GroupSampler
3 | 
4 | __all__ = ['GroupSampler', 'DistributedGroupSampler', 'build_dataloader']
5 | 


--------------------------------------------------------------------------------
/mmdet/datasets/loader/build_loader.py:
--------------------------------------------------------------------------------
 1 | import platform
 2 | from functools import partial
 3 | 
 4 | from mmcv.parallel import collate
 5 | from mmcv.runner import get_dist_info
 6 | from torch.utils.data import DataLoader
 7 | 
 8 | from .sampler import DistributedGroupSampler, DistributedSampler, GroupSampler
 9 | 
10 | if platform.system() != 'Windows':
11 |     # https://github.com/pytorch/pytorch/issues/973
12 |     import resource
13 |     rlimit = resource.getrlimit(resource.RLIMIT_NOFILE)
14 |     resource.setrlimit(resource.RLIMIT_NOFILE, (4096, rlimit[1]))
15 | 
16 | 
17 | def full_collate(batch, samples_per_gpu=1):
18 |     # Collate CL Augmentations with Batch Dim
19 |     if type(batch[0]) == list:
20 |         modified_batch = []
21 |         for batch_element in batch:
22 |             modified_batch += batch_element
23 |         return collate(modified_batch, samples_per_gpu*2)
24 |     else:
25 |         return collate(batch, samples_per_gpu)
26 | 
27 | 
28 | def build_dataloader(dataset,
29 |                      imgs_per_gpu,
30 |                      workers_per_gpu,
31 |                      num_gpus=1,
32 |                      dist=True,
33 |                      shuffle=True,
34 |                      **kwargs):
35 |     """Build PyTorch DataLoader.
36 | 
37 |     In distributed training, each GPU/process has a dataloader.
38 |     In non-distributed training, there is only one dataloader for all GPUs.
39 | 
40 |     Args:
41 |         dataset (Dataset): A PyTorch dataset.
42 |         imgs_per_gpu (int): Number of images on each GPU, i.e., batch size of
43 |             each GPU.
44 |         workers_per_gpu (int): How many subprocesses to use for data loading
45 |             for each GPU.
46 |         num_gpus (int): Number of GPUs. Only used in non-distributed training.
47 |         dist (bool): Distributed training/test or not. Default: True.
48 |         shuffle (bool): Whether to shuffle the data at every epoch.
49 |             Default: True.
50 |         kwargs: any keyword argument to be used to initialize DataLoader
51 | 
52 |     Returns:
53 |         DataLoader: A PyTorch dataloader.
54 |     """
55 |     if dist:
56 |         rank, world_size = get_dist_info()
57 |         # DistributedGroupSampler will definitely shuffle the data to satisfy
58 |         # that images on each GPU are in the same group
59 |         if shuffle:
60 |             sampler = DistributedGroupSampler(dataset, imgs_per_gpu,
61 |                                               world_size, rank)
62 |         else:
63 |             sampler = DistributedSampler(
64 |                 dataset, world_size, rank, shuffle=False)
65 |         batch_size = imgs_per_gpu
66 |         num_workers = workers_per_gpu
67 |     else:
68 |         sampler = GroupSampler(dataset, imgs_per_gpu) if shuffle else None
69 |         batch_size = num_gpus * imgs_per_gpu
70 |         num_workers = num_gpus * workers_per_gpu
71 | 
72 |     collate_fn = partial(full_collate, samples_per_gpu=imgs_per_gpu)
73 | 
74 |     data_loader = DataLoader(
75 |         dataset,
76 |         batch_size=batch_size,
77 |         sampler=sampler,
78 |         num_workers=num_workers,
79 |         collate_fn=collate_fn,
80 |         pin_memory=False,
81 |         **kwargs)
82 | 
83 |     return data_loader
84 | 


--------------------------------------------------------------------------------
/mmdet/datasets/motion_dataset.py:
--------------------------------------------------------------------------------
 1 | from .coco import CocoDataset
 2 | from .registry import DATASETS
 3 | 
 4 | 
 5 | @DATASETS.register_module
 6 | class MotionDataset(CocoDataset):
 7 |     """
 8 |     Motion Dataset for Motion Instance Segmentation
 9 |     """
10 | 
11 |     CLASSES = ("moving", "static")
12 |     def prepare_test_img(self, idx):
13 |         results = super().prepare_test_img(idx)
14 | 
15 |         # TODO: Use more generic way to work with cityscapes as well not only kitti
16 |         if 'kitti' in results['img_meta'][0].data['filename']:
17 |             fileno = int(results['img_meta'][0].data['filename'].split('/')[-1].split('.')[0])
18 |             is_first = fileno==0
19 |         else:
20 |             nframes_span_test = 6
21 |             is_first = (idx % nframes_span_test == 0)
22 | 
23 |         results['img_meta'][0].data['is_first'] = is_first
24 |         return results
25 | 


--------------------------------------------------------------------------------
/mmdet/datasets/pipelines/__init__.py:
--------------------------------------------------------------------------------
 1 | from .compose import Compose
 2 | from .formating import (Collect, ImageToTensor, ToDataContainer,
 3 |                         ToTensor, Transpose, to_tensor)
 4 | from .loading import LoadAnnotations, LoadImageFromFile, LoadRefImageFromFile, LoadProposals
 5 | from .test_aug import MultiScaleFlipAug
 6 | from .transforms import (Expand, MinIoURandomCrop, Normalize, Pad,
 7 |                          PhotoMetricDistortion, RandomCrop,
 8 |                          RandomFlip, Resize,
 9 |                          SegResizeFlipCropPadRescale,
10 |                          ImgResizeFlipNormCropPad, ColorJitter,
11 |                          GaussianBlur, RandGrayscale)
12 | 
13 | __all__ = [
14 |     'Compose', 'to_tensor', 'ToTensor', 'ImageToTensor',
15 |     'ToDataContainer', 'Transpose', 'Collect', 'LoadAnnotations',
16 |     'LoadImageFromFile', 'LoadProposals', 'MultiScaleFlipAug',
17 |     'Resize', 'RandomFlip', 'Pad', 'RandomCrop', 'Normalize',
18 |     'SegResizeFlipCropPadRescale', 'ImgResizeFlipNormCropPad',
19 |     'MinIoURandomCrop', 'Expand', 'PhotoMetricDistortion',
20 |     'LoadRefImageFromFile', 'ColorJitter', 'GaussianBlur', 'RandGrayscale',
21 | ]
22 | 


--------------------------------------------------------------------------------
/mmdet/datasets/pipelines/compose.py:
--------------------------------------------------------------------------------
 1 | import collections
 2 | from mmdet.utils import build_from_cfg
 3 | from ..registry import PIPELINES
 4 | 
 5 | @PIPELINES.register_module
 6 | class Compose(object):
 7 | 
 8 |     def __init__(self, transforms):
 9 |         assert isinstance(transforms, collections.abc.Sequence)
10 |         self.transforms = []
11 |         for transform in transforms:
12 |             if isinstance(transform, dict):
13 |                 transform = build_from_cfg(transform, PIPELINES)
14 |                 self.transforms.append(transform)
15 |             elif callable(transform):
16 |                 self.transforms.append(transform)
17 |             else:
18 |                 raise TypeError('transform must be callable or a dict')
19 | 
20 |     def __call__(self, data):
21 |         for t in self.transforms:
22 |             data = t(data)
23 |             if data is None:
24 |                 return None
25 |         return data
26 | 
27 |     def __repr__(self):
28 |         format_string = self.__class__.__name__ + '('
29 |         for t in self.transforms:
30 |             format_string += '\n'
31 |             format_string += '    {0}'.format(t)
32 |         format_string += '\n)'
33 |         return format_string
34 | 


--------------------------------------------------------------------------------
/mmdet/datasets/pipelines/test_aug.py:
--------------------------------------------------------------------------------
 1 | import mmcv
 2 | 
 3 | from ..registry import PIPELINES
 4 | from .compose import Compose
 5 | 
 6 | 
 7 | @PIPELINES.register_module
 8 | class MultiScaleFlipAug(object):
 9 | 
10 |     def __init__(self, transforms, img_scale, flip=False):
11 |         self.transforms = Compose(transforms)
12 |         self.img_scale = img_scale if isinstance(img_scale,
13 |                                                  list) else [img_scale]
14 |         assert mmcv.is_list_of(self.img_scale, tuple)
15 |         self.flip = flip
16 | 
17 |     def __call__(self, results):
18 |         aug_data = []
19 |         flip_aug = [False, True] if self.flip else [False]
20 |         for scale in self.img_scale:
21 |             for flip in flip_aug:
22 |                 _results = results.copy()
23 |                 _results['scale'] = scale
24 |                 _results['flip'] = flip
25 |                 data = self.transforms(_results)
26 |                 aug_data.append(data)
27 |         # list of dict to dict of list
28 |         aug_data_dict = {key: [] for key in aug_data[0]}
29 |         for data in aug_data:
30 |             for key, val in data.items():
31 |                 aug_data_dict[key].append(val)
32 |         return aug_data_dict
33 | 
34 |     def __repr__(self):
35 |         repr_str = self.__class__.__name__
36 |         repr_str += '(transforms={}, img_scale={}, flip={})'.format(
37 |             self.transforms, self.img_scale, self.flip)
38 |         return repr_str
39 | 


--------------------------------------------------------------------------------
/mmdet/datasets/registry.py:
--------------------------------------------------------------------------------
1 | from mmdet.utils import Registry
2 | 
3 | DATASETS = Registry('dataset')
4 | PIPELINES = Registry('pipeline')
5 | 


--------------------------------------------------------------------------------
/mmdet/datasets/voc.py:
--------------------------------------------------------------------------------
 1 | from .registry import DATASETS
 2 | from .xml_style import XMLDataset
 3 | 
 4 | 
 5 | @DATASETS.register_module
 6 | class VOCDataset(XMLDataset):
 7 | 
 8 |     CLASSES = ('aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car',
 9 |                'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse',
10 |                'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train',
11 |                'tvmonitor')
12 | 
13 |     def __init__(self, **kwargs):
14 |         super(VOCDataset, self).__init__(**kwargs)
15 |         if 'VOC2007' in self.img_prefix:
16 |             self.year = 2007
17 |         elif 'VOC2012' in self.img_prefix:
18 |             self.year = 2012
19 |         else:
20 |             raise ValueError('Cannot infer dataset year from img_prefix')
21 | 


--------------------------------------------------------------------------------
/mmdet/datasets/wider_face.py:
--------------------------------------------------------------------------------
 1 | import os.path as osp
 2 | import xml.etree.ElementTree as ET
 3 | 
 4 | import mmcv
 5 | 
 6 | from .registry import DATASETS
 7 | from .xml_style import XMLDataset
 8 | 
 9 | 
10 | @DATASETS.register_module
11 | class WIDERFaceDataset(XMLDataset):
12 |     """
13 |     Reader for the WIDER Face dataset in PASCAL VOC format.
14 |     Conversion scripts can be found in
15 |     https://github.com/sovrasov/wider-face-pascal-voc-annotations
16 |     """
17 |     CLASSES = ('face', )
18 | 
19 |     def __init__(self, **kwargs):
20 |         super(WIDERFaceDataset, self).__init__(**kwargs)
21 | 
22 |     def load_annotations(self, ann_file):
23 |         img_infos = []
24 |         img_ids = mmcv.list_from_file(ann_file)
25 |         for img_id in img_ids:
26 |             filename = '{}.jpg'.format(img_id)
27 |             xml_path = osp.join(self.img_prefix, 'Annotations',
28 |                                 '{}.xml'.format(img_id))
29 |             tree = ET.parse(xml_path)
30 |             root = tree.getroot()
31 |             size = root.find('size')
32 |             width = int(size.find('width').text)
33 |             height = int(size.find('height').text)
34 |             folder = root.find('folder').text
35 |             img_infos.append(
36 |                 dict(
37 |                     id=img_id,
38 |                     filename=osp.join(folder, filename),
39 |                     width=width,
40 |                     height=height))
41 | 
42 |         return img_infos
43 | 


--------------------------------------------------------------------------------
/mmdet/datasets/xml_style.py:
--------------------------------------------------------------------------------
 1 | import os.path as osp
 2 | import xml.etree.ElementTree as ET
 3 | 
 4 | import mmcv
 5 | import numpy as np
 6 | 
 7 | from .custom import CustomDataset
 8 | from .registry import DATASETS
 9 | 
10 | 
11 | @DATASETS.register_module
12 | class XMLDataset(CustomDataset):
13 | 
14 |     def __init__(self, min_size=None, **kwargs):
15 |         super(XMLDataset, self).__init__(**kwargs)
16 |         self.cat2label = {cat: i + 1 for i, cat in enumerate(self.CLASSES)}
17 |         self.min_size = min_size
18 | 
19 |     def load_annotations(self, ann_file):
20 |         img_infos = []
21 |         img_ids = mmcv.list_from_file(ann_file)
22 |         for img_id in img_ids:
23 |             filename = 'JPEGImages/{}.jpg'.format(img_id)
24 |             xml_path = osp.join(self.img_prefix, 'Annotations',
25 |                                 '{}.xml'.format(img_id))
26 |             tree = ET.parse(xml_path)
27 |             root = tree.getroot()
28 |             size = root.find('size')
29 |             width = int(size.find('width').text)
30 |             height = int(size.find('height').text)
31 |             img_infos.append(
32 |                 dict(id=img_id, filename=filename, width=width, height=height))
33 |         return img_infos
34 | 
35 |     def get_ann_info(self, idx):
36 |         img_id = self.img_infos[idx]['id']
37 |         xml_path = osp.join(self.img_prefix, 'Annotations',
38 |                             '{}.xml'.format(img_id))
39 |         tree = ET.parse(xml_path)
40 |         root = tree.getroot()
41 |         bboxes = []
42 |         labels = []
43 |         bboxes_ignore = []
44 |         labels_ignore = []
45 |         for obj in root.findall('object'):
46 |             name = obj.find('name').text
47 |             label = self.cat2label[name]
48 |             difficult = int(obj.find('difficult').text)
49 |             bnd_box = obj.find('bndbox')
50 |             bbox = [
51 |                 int(bnd_box.find('xmin').text),
52 |                 int(bnd_box.find('ymin').text),
53 |                 int(bnd_box.find('xmax').text),
54 |                 int(bnd_box.find('ymax').text)
55 |             ]
56 |             ignore = False
57 |             if self.min_size:
58 |                 assert not self.test_mode
59 |                 w = bbox[2] - bbox[0]
60 |                 h = bbox[3] - bbox[1]
61 |                 if w < self.min_size or h < self.min_size:
62 |                     ignore = True
63 |             if difficult or ignore:
64 |                 bboxes_ignore.append(bbox)
65 |                 labels_ignore.append(label)
66 |             else:
67 |                 bboxes.append(bbox)
68 |                 labels.append(label)
69 |         if not bboxes:
70 |             bboxes = np.zeros((0, 4))
71 |             labels = np.zeros((0, ))
72 |         else:
73 |             bboxes = np.array(bboxes, ndmin=2) - 1
74 |             labels = np.array(labels)
75 |         if not bboxes_ignore:
76 |             bboxes_ignore = np.zeros((0, 4))
77 |             labels_ignore = np.zeros((0, ))
78 |         else:
79 |             bboxes_ignore = np.array(bboxes_ignore, ndmin=2) - 1
80 |             labels_ignore = np.array(labels_ignore)
81 |         ann = dict(
82 |             bboxes=bboxes.astype(np.float32),
83 |             labels=labels.astype(np.int64),
84 |             bboxes_ignore=bboxes_ignore.astype(np.float32),
85 |             labels_ignore=labels_ignore.astype(np.int64))
86 |         return ann
87 | 


--------------------------------------------------------------------------------
/mmdet/metrics.py:
--------------------------------------------------------------------------------
 1 | # Adapted from score written by wkentaro
 2 | # https://github.com/wkentaro/pytorch-fcn/blob/master/torchfcn/utils.py
 3 | 
 4 | import numpy as np
 5 | 
 6 | 
 7 | class RunningScore(object):
 8 |     def __init__(self, n_classes):
 9 |         self.n_classes = n_classes
10 |         self.confusion_matrix = np.zeros((n_classes, n_classes))
11 | 
12 |     def _fast_hist(self, label_true, label_pred, n_class):
13 |         mask = (label_true >= 0) & (label_true < n_class)
14 |         hist = np.bincount(
15 |             n_class * label_true[mask].astype(int) + label_pred[mask], minlength=n_class ** 2
16 |         ).reshape(n_class, n_class)
17 |         return hist
18 | 
19 |     def update(self, label_trues, label_preds):
20 |         for lt, lp in zip(label_trues, label_preds):
21 |             self.confusion_matrix += self._fast_hist(lt.flatten(), lp.flatten(), self.n_classes)
22 | 
23 |     def get_scores(self):
24 |         """Returns accuracy score evaluation result.
25 |             - overall accuracy
26 |             - mean accuracy
27 |             - mean IU
28 |             - fwavacc
29 |         """
30 |         hist = self.confusion_matrix
31 |         acc = np.diag(hist).sum() / hist.sum()
32 |         acc_cls = np.diag(hist) / hist.sum(axis=1)
33 |         acc_cls = np.nanmean(acc_cls)
34 |         iu = np.diag(hist) / (hist.sum(axis=1) + hist.sum(axis=0) - np.diag(hist))
35 |         mean_iu = np.nanmean(iu)
36 |         freq = hist.sum(axis=1) / hist.sum()
37 |         fwavacc = (freq[freq > 0] * iu[freq > 0]).sum()
38 |         cls_iu = dict(zip(range(self.n_classes), iu))
39 | 
40 |         return (
41 |             {
42 |                 "Overall Acc: \t": acc,
43 |                 "Mean Acc : \t": acc_cls,
44 |                 "FreqW Acc : \t": fwavacc,
45 |                 "Mean IoU : \t": mean_iu,
46 |             },
47 |             cls_iu,
48 |             self.confusion_matrix
49 |         )
50 | 
51 |     def reset(self):
52 |         self.confusion_matrix = np.zeros((self.n_classes, self.n_classes))
53 | 


--------------------------------------------------------------------------------
/mmdet/models/__init__.py:
--------------------------------------------------------------------------------
 1 | from .anchor_heads import *  # noqa: F401,F403
 2 | from .backbones import *  # noqa: F401,F403
 3 | from .bbox_heads import *  # noqa: F401,F403
 4 | from .builder import (build_backbone, build_detector, build_head, build_loss,
 5 |                       build_neck, build_roi_extractor, build_shared_head)
 6 | from .detectors import *  # noqa: F401,F403
 7 | from .losses import *  # noqa: F401,F403
 8 | from .mask_heads import *  # noqa: F401,F403
 9 | from .necks import *  # noqa: F401,F403
10 | from .registry import (BACKBONES, DETECTORS, HEADS, LOSSES, NECKS,
11 |                        ROI_EXTRACTORS, SHARED_HEADS)
12 | from .roi_extractors import *  # noqa: F401,F403
13 | from .shared_heads import *  # noqa: F401,F403
14 | from .track_heads import *
15 | from .ca_heads import *
16 | 
17 | __all__ = [
18 |     'BACKBONES', 'NECKS', 'ROI_EXTRACTORS', 'SHARED_HEADS', 'HEADS', 'LOSSES',
19 |     'DETECTORS', 'build_backbone', 'build_neck', 'build_roi_extractor',
20 |     'build_shared_head', 'build_head', 'build_loss', 'build_detector'
21 | ]
22 | 


--------------------------------------------------------------------------------
/mmdet/models/anchor_heads/__init__.py:
--------------------------------------------------------------------------------
 1 | from .anchor_head import AnchorHead
 2 | from .atss_head import ATSSHead
 3 | from .fcos_head import FCOSHead
 4 | from .fovea_head import FoveaHead
 5 | from .free_anchor_retina_head import FreeAnchorRetinaHead
 6 | from .ga_retina_head import GARetinaHead
 7 | from .ga_rpn_head import GARPNHead
 8 | from .guided_anchor_head import FeatureAdaption, GuidedAnchorHead
 9 | from .reppoints_head import RepPointsHead
10 | from .retina_head import RetinaHead
11 | from .retina_sepbn_head import RetinaSepBNHead
12 | from .rpn_head import RPNHead
13 | from .ssd_head import SSDHead
14 | from .solo_head import SOLOHead
15 | from .solov2_head import SOLOv2Head
16 | from .solov2_light_head import SOLOv2LightHead
17 | from .decoupled_solo_head import DecoupledSOLOHead
18 | from .decoupled_solo_light_head import DecoupledSOLOLightHead
19 | from .panoptic_head import SimpleSegHead
20 | 
21 | __all__ = [
22 |     'AnchorHead', 'GuidedAnchorHead', 'FeatureAdaption', 'RPNHead',
23 |     'GARPNHead', 'RetinaHead', 'RetinaSepBNHead', 'GARetinaHead', 'SSDHead',
24 |     'FCOSHead', 'RepPointsHead', 'FoveaHead', 'FreeAnchorRetinaHead',
25 |     'ATSSHead', 'SOLOHead', 'SOLOv2Head', 'SOLOv2LightHead', 'DecoupledSOLOHead', 'DecoupledSOLOLightHead',
26 |     'SimpleSegHead'
27 | ]
28 | 


--------------------------------------------------------------------------------
/mmdet/models/anchor_heads/panoptic_head.py:
--------------------------------------------------------------------------------
 1 | import torch.nn.functional as F
 2 | import torch.nn as nn
 3 | from ..utils import bias_init_with_prob, ConvModule, merge_fpn
 4 | from ..registry import HEADS
 5 | import torch
 6 | 
 7 | @HEADS.register_module
 8 | class SimpleSegHead(nn.Module):
 9 |     def __init__(self, num_classes, in_channels, seg_feats_channel, stacked_convs, original_image_size,
10 |                  merge_fpn=True):
11 | 
12 |         super().__init__()
13 |         self.num_classes = num_classes
14 |         self.original_image_size = original_image_size
15 |         self.fcn = nn.ModuleList()
16 |         self.stacked_convs = stacked_convs
17 |         self.merge_fpn = merge_fpn
18 | 
19 |         chn = in_channels
20 |         for i in range(stacked_convs):
21 |             self.fcn.append(
22 |                 ConvModule(
23 |                     chn,
24 |                     seg_feats_channel,
25 |                     3,
26 |                     stride=1,
27 |                     padding=1,
28 |                     norm_cfg=None,
29 |                     bias=True))
30 |             chn = seg_feats_channel
31 | 
32 |         self.upsample_conv = nn.Conv2d(chn, chn, 1)
33 |         self.classifier = nn.Conv2d(chn, num_classes, 1)
34 | 
35 |     def forward(self, x):
36 |         x = merge_fpn(x, average=self.merge_fpn)
37 |         for i in range(self.stacked_convs):
38 |             x = self.fcn[i](x)
39 |         intermediate_feats = x
40 |         x = F.interpolate(x, (x.shape[2]*2, x.shape[3]*2))
41 |         x = F.relu(self.upsample_conv(x))
42 |         x = self.classifier(x)
43 |         x = F.interpolate(x, self.original_image_size[::-1])
44 |         return x, intermediate_feats
45 | 
46 |     def loss(self, seg_map, gt_semantic_seg):
47 |         #TODO: Add loss using two logits from instance and semantic seg
48 |         gt_semantic_seg_up = F.interpolate(gt_semantic_seg.float(), seg_map.shape[-2:], mode='nearest')
49 |         gt_semantic_seg_up = gt_semantic_seg_up.long().squeeze(1)
50 |         loss_seg = F.cross_entropy(seg_map, gt_semantic_seg_up, ignore_index=255)
51 |         return loss_seg
52 | 


--------------------------------------------------------------------------------
/mmdet/models/backbones/__init__.py:
--------------------------------------------------------------------------------
1 | from .hrnet import HRNet
2 | from .resnet import ResNet, make_res_layer, TwoStreamResNet
3 | from .resnext import ResNeXt
4 | from .ssd_vgg import SSDVGG
5 | from .resnet_tfstyle import ResNetTFStyle, TwoStreamResNetTFStyle
6 | 
7 | __all__ = ['ResNet', 'make_res_layer', 'ResNeXt', 'SSDVGG', 'HRNet', 'TwoStreamResNet',
8 |            'ResNetTFStyle', 'TwoStreamResNetTFStyle']
9 | 


--------------------------------------------------------------------------------
/mmdet/models/bbox_heads/__init__.py:
--------------------------------------------------------------------------------
1 | from .bbox_head import BBoxHead
2 | from .convfc_bbox_head import ConvFCBBoxHead, SharedFCBBoxHead
3 | from .double_bbox_head import DoubleConvFCBBoxHead
4 | 
5 | __all__ = [
6 |     'BBoxHead', 'ConvFCBBoxHead', 'SharedFCBBoxHead', 'DoubleConvFCBBoxHead'
7 | ]
8 | 


--------------------------------------------------------------------------------
/mmdet/models/builder.py:
--------------------------------------------------------------------------------
 1 | from torch import nn
 2 | 
 3 | from mmdet.utils import build_from_cfg
 4 | from .registry import (BACKBONES, DETECTORS, HEADS, LOSSES, NECKS,
 5 |                        ROI_EXTRACTORS, SHARED_HEADS)
 6 | 
 7 | 
 8 | def build(cfg, registry, default_args=None):
 9 |     if isinstance(cfg, list):
10 |         modules = [
11 |             build_from_cfg(cfg_, registry, default_args) for cfg_ in cfg
12 |         ]
13 |         return nn.Sequential(*modules)
14 |     else:
15 |         return build_from_cfg(cfg, registry, default_args)
16 | 
17 | 
18 | def build_backbone(cfg):
19 |     return build(cfg, BACKBONES)
20 | 
21 | 
22 | def build_neck(cfg):
23 |     return build(cfg, NECKS)
24 | 
25 | 
26 | def build_roi_extractor(cfg):
27 |     return build(cfg, ROI_EXTRACTORS)
28 | 
29 | 
30 | def build_shared_head(cfg):
31 |     return build(cfg, SHARED_HEADS)
32 | 
33 | 
34 | def build_head(cfg):
35 |     return build(cfg, HEADS)
36 | 
37 | 
38 | def build_loss(cfg):
39 |     return build(cfg, LOSSES)
40 | 
41 | 
42 | def build_detector(cfg, train_cfg=None, test_cfg=None):
43 |     return build(cfg, DETECTORS, dict(train_cfg=train_cfg, test_cfg=test_cfg))
44 | 


--------------------------------------------------------------------------------
/mmdet/models/ca_heads/__init__.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from .compose_ca import ComposedClassAgnosticHead
 3 | from .appearance_ca_mahalanobis import MahalanobisAppearanceBasedClassAgnosticHead
 4 | from .appearance_ca_map import MAPClassAgnosticHead
 5 | 
 6 | __all__ = ['ComposedClassAgnosticHead',
 7 |            'MahalanobisAppearanceBasedClassAgnosticHead',
 8 |            'MAPClassAgnosticHead',
 9 |           ]
10 | 


--------------------------------------------------------------------------------
/mmdet/models/ca_heads/appearance_ca_abstract.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Class Agnostic Head Abstract Class
 3 | """
 4 | 
 5 | import torch.nn as nn
 6 | import torch
 7 | from mmcv.cnn import normal_init
 8 | import torch.nn.functional as F
 9 | 
10 | from ..utils import ConvModule
11 | from ..utils import merge_fpn
12 | import random
13 | import numpy as np
14 | 
15 | class AppearanceBasedClassAgnosticAbstract(nn.Module):
16 |     def __init__(self, clustering_type='dbscan', in_channels=256, interm_channels=256,
17 |                  n_convs=7, norm_cfg=None, num_classes=19, merge_fpn=False):
18 | 
19 |         super().__init__()
20 |         self.clustering_type = clustering_type
21 |         self.stuff_idx = 11
22 |         self.interm_channels = interm_channels
23 |         self.conv_modules = nn.ModuleList()
24 |         self.merge_fpn = merge_fpn
25 |         self.num_classes = num_classes
26 |         self.norm_cfg = norm_cfg
27 | 
28 |         self.conv_modules = self.init_layers(
29 |             n_convs, in_channels, interm_channels, norm_cfg, self.conv_modules
30 |         )
31 |         self.conv_modules = self.init_weights_module(self.conv_modules)
32 | 
33 |     def init_layers(self, n_convs, in_channels, interm_channels, norm_cfg, conv_modules):
34 |         for idx in range(n_convs):
35 |             if idx == 0:
36 |                 chn = in_channels
37 |             else:
38 |                 chn = interm_channels
39 | 
40 |             conv_modules.append(
41 |                     ConvModule(
42 |                         chn,
43 |                         interm_channels,
44 |                         3,
45 |                         stride=1,
46 |                         padding=1,
47 |                         norm_cfg=norm_cfg,
48 |                         bias=norm_cfg is None))
49 |         return conv_modules
50 | 
51 |     def init_weights(self):
52 |         self.conv_modules = self.init_weights_module(self.conv_modules)
53 | 
54 |     def init_weights_module(self, conv_modules):
55 |         for m in conv_modules:
56 |             normal_init(m.conv, std=0.01)
57 |         return conv_modules
58 | 
59 |     def forward(self, feats, eval=0):
60 |         if self.merge_fpn:
61 |             feats = merge_fpn(feats)
62 |         else:
63 |             feats = feats[0]
64 | 
65 |         merged_fpn_feats = feats
66 | 
67 |         for conv_layer in self.conv_modules:
68 |             feats = conv_layer(feats)
69 | 
70 |         out = {'class_agnostic_embeddings': feats}
71 |         if not self.training:
72 |             out['merged_fpn_embeddings'] = merged_fpn_feats
73 |         return out
74 | 
75 |     def loss(self, **kwargs):
76 |         pass
77 | 
78 |     def get_seg(self, **kwargs):
79 |         pass
80 | 


--------------------------------------------------------------------------------
/mmdet/models/ca_heads/appearance_ca_map.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Masked Average Pooling Head for Feature Analysis
 3 | """
 4 | 
 5 | import torch.nn as nn
 6 | import torch
 7 | from mmcv.cnn import normal_init
 8 | import torch.nn.functional as F
 9 | 
10 | from ..registry import HEADS
11 | from ..utils import ConvModule
12 | from ..utils import merge_fpn
13 | from mmdet.core import compute_ood_scores
14 | import random
15 | import numpy as np
16 | import time
17 | 
18 | 
19 | @HEADS.register_module
20 | class MAPClassAgnosticHead(nn.Module):
21 |     def __init__(self, num_classes=9):
22 |         super().__init__()
23 | 
24 |     def init_weights(self):
25 |         pass
26 | 
27 |     def forward(self, feats, eval=0):
28 |         out = {'class_agnostic_embeddings': feats,
29 |                'merged_fpn_embeddings': feats}
30 |         return out
31 | 
32 |     def get_seg(self, **kwargs):
33 |         return [{}]
34 | 


--------------------------------------------------------------------------------
/mmdet/models/ca_heads/compose_ca.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | 
 3 | from ..registry import HEADS
 4 | from .. import builder
 5 | 
 6 | @HEADS.register_module
 7 | class ComposedClassAgnosticHead(nn.Module):
 8 |     def __init__(self, ca_heads):
 9 |         super().__init__()
10 |         self.heads_list = nn.ModuleDict()
11 |         for ca_name, ca_head in ca_heads.items():
12 |             self.heads_list[ca_name] = builder.build_head(ca_head)
13 | 
14 |     def init_weights(self):
15 |         for _, ca_head in self.heads_list.items():
16 |             ca_head.init_weights()
17 | 
18 |     def forward(self, **kwargs):
19 |         outs = {}
20 |         for _, ca_head in self.heads_list.items():
21 |             outs.update(ca_head(**kwargs))
22 |         return outs
23 | 
24 |     def loss(self, **kwargs):
25 |         losses = {}
26 |         for _, ca_head in self.heads_list.items():
27 |             losses.update(ca_head.loss(**kwargs))
28 |         return losses
29 | 
30 |     def get_seg(self, **kwargs):
31 |         seg_out = [{}]
32 |         for _, ca_head in self.heads_list.items():
33 |             ca_out = ca_head.get_seg(**kwargs)
34 |             if ca_out[0] is not None:
35 |                 seg_out[0].update(ca_out[0])
36 |         return seg_out
37 | 


--------------------------------------------------------------------------------
/mmdet/models/detectors/__init__.py:
--------------------------------------------------------------------------------
 1 | from .atss import ATSS
 2 | from .base import BaseDetector
 3 | from .cascade_rcnn import CascadeRCNN
 4 | from .double_head_rcnn import DoubleHeadRCNN
 5 | from .fast_rcnn import FastRCNN
 6 | from .faster_rcnn import FasterRCNN
 7 | from .fcos import FCOS
 8 | from .fovea import FOVEA
 9 | from .grid_rcnn import GridRCNN
10 | from .htc import HybridTaskCascade
11 | from .mask_rcnn import MaskRCNN
12 | from .mask_scoring_rcnn import MaskScoringRCNN
13 | from .reppoints_detector import RepPointsDetector
14 | from .retinanet import RetinaNet
15 | from .rpn import RPN
16 | from .single_stage import SingleStageDetector
17 | from .single_stage_ins import SingleStageInsDetector
18 | from .two_stage import TwoStageDetector
19 | from .solo import SOLO
20 | from .solov2 import SOLOv2
21 | 
22 | __all__ = [
23 |     'ATSS', 'BaseDetector', 'SingleStageDetector', 'TwoStageDetector', 'RPN',
24 |     'FastRCNN', 'FasterRCNN', 'MaskRCNN', 'CascadeRCNN', 'HybridTaskCascade',
25 |     'DoubleHeadRCNN', 'RetinaNet', 'FCOS', 'GridRCNN', 'MaskScoringRCNN',
26 |     'RepPointsDetector', 'FOVEA', 'SingleStageInsDetector', 'SOLO', 'SOLOv2'
27 | ]
28 | 


--------------------------------------------------------------------------------
/mmdet/models/detectors/atss.py:
--------------------------------------------------------------------------------
 1 | from ..registry import DETECTORS
 2 | from .single_stage import SingleStageDetector
 3 | 
 4 | 
 5 | @DETECTORS.register_module
 6 | class ATSS(SingleStageDetector):
 7 | 
 8 |     def __init__(self,
 9 |                  backbone,
10 |                  neck,
11 |                  bbox_head,
12 |                  train_cfg=None,
13 |                  test_cfg=None,
14 |                  pretrained=None):
15 |         super(ATSS, self).__init__(backbone, neck, bbox_head, train_cfg,
16 |                                    test_cfg, pretrained)
17 | 


--------------------------------------------------------------------------------
/mmdet/models/detectors/fast_rcnn.py:
--------------------------------------------------------------------------------
 1 | from ..registry import DETECTORS
 2 | from .two_stage import TwoStageDetector
 3 | 
 4 | 
 5 | @DETECTORS.register_module
 6 | class FastRCNN(TwoStageDetector):
 7 | 
 8 |     def __init__(self,
 9 |                  backbone,
10 |                  bbox_roi_extractor,
11 |                  bbox_head,
12 |                  train_cfg,
13 |                  test_cfg,
14 |                  neck=None,
15 |                  shared_head=None,
16 |                  mask_roi_extractor=None,
17 |                  mask_head=None,
18 |                  pretrained=None):
19 |         super(FastRCNN, self).__init__(
20 |             backbone=backbone,
21 |             neck=neck,
22 |             shared_head=shared_head,
23 |             bbox_roi_extractor=bbox_roi_extractor,
24 |             bbox_head=bbox_head,
25 |             train_cfg=train_cfg,
26 |             test_cfg=test_cfg,
27 |             mask_roi_extractor=mask_roi_extractor,
28 |             mask_head=mask_head,
29 |             pretrained=pretrained)
30 | 
31 |     def forward_test(self, imgs, img_metas, proposals, **kwargs):
32 |         """
33 |         Args:
34 |             imgs (List[Tensor]): the outer list indicates test-time
35 |                 augmentations and inner Tensor should have a shape NxCxHxW,
36 |                 which contains all images in the batch.
37 |             img_meta (List[List[dict]]): the outer list indicates test-time
38 |                 augs (multiscale, flip, etc.) and the inner list indicates
39 |                 images in a batch
40 |             proposals (List[List[Tensor | None]]): predefiend proposals for
41 |                 each test-time augmentation and each item.
42 |         """
43 |         for var, name in [(imgs, 'imgs'), (img_metas, 'img_metas')]:
44 |             if not isinstance(var, list):
45 |                 raise TypeError('{} must be a list, but got {}'.format(
46 |                     name, type(var)))
47 | 
48 |         num_augs = len(imgs)
49 |         if num_augs != len(img_metas):
50 |             raise ValueError(
51 |                 'num of augmentations ({}) != num of image meta ({})'.format(
52 |                     len(imgs), len(img_metas)))
53 |         # TODO: remove the restriction of imgs_per_gpu == 1 when prepared
54 |         imgs_per_gpu = imgs[0].size(0)
55 |         assert imgs_per_gpu == 1
56 | 
57 |         if num_augs == 1:
58 |             return self.simple_test(imgs[0], img_metas[0], proposals[0],
59 |                                     **kwargs)
60 |         else:
61 |             return self.aug_test(imgs, img_metas, proposals, **kwargs)
62 | 


--------------------------------------------------------------------------------
/mmdet/models/detectors/faster_rcnn.py:
--------------------------------------------------------------------------------
 1 | from ..registry import DETECTORS
 2 | from .two_stage import TwoStageDetector
 3 | 
 4 | 
 5 | @DETECTORS.register_module
 6 | class FasterRCNN(TwoStageDetector):
 7 | 
 8 |     def __init__(self,
 9 |                  backbone,
10 |                  rpn_head,
11 |                  bbox_roi_extractor,
12 |                  bbox_head,
13 |                  train_cfg,
14 |                  test_cfg,
15 |                  neck=None,
16 |                  shared_head=None,
17 |                  pretrained=None):
18 |         super(FasterRCNN, self).__init__(
19 |             backbone=backbone,
20 |             neck=neck,
21 |             shared_head=shared_head,
22 |             rpn_head=rpn_head,
23 |             bbox_roi_extractor=bbox_roi_extractor,
24 |             bbox_head=bbox_head,
25 |             train_cfg=train_cfg,
26 |             test_cfg=test_cfg,
27 |             pretrained=pretrained)
28 | 


--------------------------------------------------------------------------------
/mmdet/models/detectors/fcos.py:
--------------------------------------------------------------------------------
 1 | from ..registry import DETECTORS
 2 | from .single_stage import SingleStageDetector
 3 | 
 4 | 
 5 | @DETECTORS.register_module
 6 | class FCOS(SingleStageDetector):
 7 | 
 8 |     def __init__(self,
 9 |                  backbone,
10 |                  neck,
11 |                  bbox_head,
12 |                  train_cfg=None,
13 |                  test_cfg=None,
14 |                  pretrained=None):
15 |         super(FCOS, self).__init__(backbone, neck, bbox_head, train_cfg,
16 |                                    test_cfg, pretrained)
17 | 


--------------------------------------------------------------------------------
/mmdet/models/detectors/fovea.py:
--------------------------------------------------------------------------------
 1 | from ..registry import DETECTORS
 2 | from .single_stage import SingleStageDetector
 3 | 
 4 | 
 5 | @DETECTORS.register_module
 6 | class FOVEA(SingleStageDetector):
 7 | 
 8 |     def __init__(self,
 9 |                  backbone,
10 |                  neck,
11 |                  bbox_head,
12 |                  train_cfg=None,
13 |                  test_cfg=None,
14 |                  pretrained=None):
15 |         super(FOVEA, self).__init__(backbone, neck, bbox_head, train_cfg,
16 |                                     test_cfg, pretrained)
17 | 


--------------------------------------------------------------------------------
/mmdet/models/detectors/mask_rcnn.py:
--------------------------------------------------------------------------------
 1 | from ..registry import DETECTORS
 2 | from .two_stage import TwoStageDetector
 3 | 
 4 | 
 5 | @DETECTORS.register_module
 6 | class MaskRCNN(TwoStageDetector):
 7 | 
 8 |     def __init__(self,
 9 |                  backbone,
10 |                  rpn_head,
11 |                  bbox_roi_extractor,
12 |                  bbox_head,
13 |                  mask_roi_extractor,
14 |                  mask_head,
15 |                  train_cfg,
16 |                  test_cfg,
17 |                  neck=None,
18 |                  shared_head=None,
19 |                  pretrained=None):
20 |         super(MaskRCNN, self).__init__(
21 |             backbone=backbone,
22 |             neck=neck,
23 |             shared_head=shared_head,
24 |             rpn_head=rpn_head,
25 |             bbox_roi_extractor=bbox_roi_extractor,
26 |             bbox_head=bbox_head,
27 |             mask_roi_extractor=mask_roi_extractor,
28 |             mask_head=mask_head,
29 |             train_cfg=train_cfg,
30 |             test_cfg=test_cfg,
31 |             pretrained=pretrained)
32 | 


--------------------------------------------------------------------------------
/mmdet/models/detectors/reppoints_detector.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from mmdet.core import bbox2result, bbox_mapping_back, multiclass_nms
 4 | from ..registry import DETECTORS
 5 | from .single_stage import SingleStageDetector
 6 | 
 7 | 
 8 | @DETECTORS.register_module
 9 | class RepPointsDetector(SingleStageDetector):
10 |     """RepPoints: Point Set Representation for Object Detection.
11 | 
12 |         This detector is the implementation of:
13 |         - RepPoints detector (https://arxiv.org/pdf/1904.11490)
14 |     """
15 | 
16 |     def __init__(self,
17 |                  backbone,
18 |                  neck,
19 |                  bbox_head,
20 |                  train_cfg=None,
21 |                  test_cfg=None,
22 |                  pretrained=None):
23 |         super(RepPointsDetector,
24 |               self).__init__(backbone, neck, bbox_head, train_cfg, test_cfg,
25 |                              pretrained)
26 | 
27 |     def merge_aug_results(self, aug_bboxes, aug_scores, img_metas):
28 |         """Merge augmented detection bboxes and scores.
29 | 
30 |         Args:
31 |             aug_bboxes (list[Tensor]): shape (n, 4*#class)
32 |             aug_scores (list[Tensor] or None): shape (n, #class)
33 |             img_shapes (list[Tensor]): shape (3, ).
34 | 
35 |         Returns:
36 |             tuple: (bboxes, scores)
37 |         """
38 |         recovered_bboxes = []
39 |         for bboxes, img_info in zip(aug_bboxes, img_metas):
40 |             img_shape = img_info[0]['img_shape']
41 |             scale_factor = img_info[0]['scale_factor']
42 |             flip = img_info[0]['flip']
43 |             bboxes = bbox_mapping_back(bboxes, img_shape, scale_factor, flip)
44 |             recovered_bboxes.append(bboxes)
45 |         bboxes = torch.cat(recovered_bboxes, dim=0)
46 |         if aug_scores is None:
47 |             return bboxes
48 |         else:
49 |             scores = torch.cat(aug_scores, dim=0)
50 |             return bboxes, scores
51 | 
52 |     def aug_test(self, imgs, img_metas, rescale=False):
53 |         # recompute feats to save memory
54 |         feats = self.extract_feats(imgs)
55 | 
56 |         aug_bboxes = []
57 |         aug_scores = []
58 |         for x, img_meta in zip(feats, img_metas):
59 |             # only one image in the batch
60 |             outs = self.bbox_head(x)
61 |             bbox_inputs = outs + (img_meta, self.test_cfg, False, False)
62 |             det_bboxes, det_scores = self.bbox_head.get_bboxes(*bbox_inputs)[0]
63 |             aug_bboxes.append(det_bboxes)
64 |             aug_scores.append(det_scores)
65 | 
66 |         # after merging, bboxes will be rescaled to the original image size
67 |         merged_bboxes, merged_scores = self.merge_aug_results(
68 |             aug_bboxes, aug_scores, img_metas)
69 |         det_bboxes, det_labels = multiclass_nms(merged_bboxes, merged_scores,
70 |                                                 self.test_cfg.score_thr,
71 |                                                 self.test_cfg.nms,
72 |                                                 self.test_cfg.max_per_img)
73 | 
74 |         if rescale:
75 |             _det_bboxes = det_bboxes
76 |         else:
77 |             _det_bboxes = det_bboxes.clone()
78 |             _det_bboxes[:, :4] *= img_metas[0][0]['scale_factor']
79 |         bbox_results = bbox2result(_det_bboxes, det_labels,
80 |                                    self.bbox_head.num_classes)
81 |         return bbox_results
82 | 


--------------------------------------------------------------------------------
/mmdet/models/detectors/retinanet.py:
--------------------------------------------------------------------------------
 1 | from ..registry import DETECTORS
 2 | from .single_stage import SingleStageDetector
 3 | 
 4 | 
 5 | @DETECTORS.register_module
 6 | class RetinaNet(SingleStageDetector):
 7 | 
 8 |     def __init__(self,
 9 |                  backbone,
10 |                  neck,
11 |                  bbox_head,
12 |                  train_cfg=None,
13 |                  test_cfg=None,
14 |                  pretrained=None):
15 |         super(RetinaNet, self).__init__(backbone, neck, bbox_head, train_cfg,
16 |                                         test_cfg, pretrained)
17 | 


--------------------------------------------------------------------------------
/mmdet/models/detectors/single_stage.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | 
 3 | from mmdet.core import bbox2result
 4 | from .. import builder
 5 | from ..registry import DETECTORS
 6 | from .base import BaseDetector
 7 | 
 8 | 
 9 | @DETECTORS.register_module
10 | class SingleStageDetector(BaseDetector):
11 |     """Base class for single-stage detectors.
12 | 
13 |     Single-stage detectors directly and densely predict bounding boxes on the
14 |     output features of the backbone+neck.
15 |     """
16 | 
17 |     def __init__(self,
18 |                  backbone,
19 |                  neck=None,
20 |                  bbox_head=None,
21 |                  train_cfg=None,
22 |                  test_cfg=None,
23 |                  pretrained=None):
24 |         super(SingleStageDetector, self).__init__()
25 |         self.backbone = builder.build_backbone(backbone)
26 |         if neck is not None:
27 |             self.neck = builder.build_neck(neck)
28 |         self.bbox_head = builder.build_head(bbox_head)
29 |         self.train_cfg = train_cfg
30 |         self.test_cfg = test_cfg
31 |         self.init_weights(pretrained=pretrained)
32 | 
33 |     def init_weights(self, pretrained=None):
34 |         super(SingleStageDetector, self).init_weights(pretrained)
35 |         self.backbone.init_weights(pretrained=pretrained)
36 |         if self.with_neck:
37 |             if isinstance(self.neck, nn.Sequential):
38 |                 for m in self.neck:
39 |                     m.init_weights()
40 |             else:
41 |                 self.neck.init_weights()
42 |         self.bbox_head.init_weights()
43 | 
44 |     def extract_feat(self, img):
45 |         """Directly extract features from the backbone+neck
46 |         """
47 |         x = self.backbone(img)
48 |         if self.with_neck:
49 |             x = self.neck(x)
50 |         return x
51 | 
52 |     def forward_dummy(self, img):
53 |         """Used for computing network flops.
54 | 
55 |         See `mmedetection/tools/get_flops.py`
56 |         """
57 |         x = self.extract_feat(img)
58 |         outs = self.bbox_head(x)
59 |         return outs
60 | 
61 |     def forward_train(self,
62 |                       img,
63 |                       img_metas,
64 |                       gt_bboxes,
65 |                       gt_labels,
66 |                       gt_bboxes_ignore=None):
67 |         x = self.extract_feat(img)
68 |         outs = self.bbox_head(x)
69 |         loss_inputs = outs + (gt_bboxes, gt_labels, img_metas, self.train_cfg)
70 |         losses = self.bbox_head.loss(
71 |             *loss_inputs, gt_bboxes_ignore=gt_bboxes_ignore)
72 |         return losses
73 | 
74 |     def simple_test(self, img, img_meta, rescale=False):
75 |         x = self.extract_feat(img)
76 |         outs = self.bbox_head(x)
77 |         bbox_inputs = outs + (img_meta, self.test_cfg, rescale)
78 |         bbox_list = self.bbox_head.get_bboxes(*bbox_inputs)
79 |         bbox_results = [
80 |             bbox2result(det_bboxes, det_labels, self.bbox_head.num_classes)
81 |             for det_bboxes, det_labels in bbox_list
82 |         ]
83 |         return bbox_results[0]
84 | 
85 |     def aug_test(self, imgs, img_metas, rescale=False):
86 |         raise NotImplementedError
87 | 


--------------------------------------------------------------------------------
/mmdet/models/detectors/solo.py:
--------------------------------------------------------------------------------
 1 | from .single_stage_ins import SingleStageInsDetector
 2 | from ..registry import DETECTORS
 3 | 
 4 | 
 5 | @DETECTORS.register_module
 6 | class SOLO(SingleStageInsDetector):
 7 | 
 8 |     def __init__(self,
 9 |                  backbone,
10 |                  neck,
11 |                  bbox_head=None,
12 |                  track_head=None,
13 |                  panoptic_head=None,
14 |                  train_cfg=None,
15 |                  test_cfg=None,
16 |                  pretrained=None,
17 |                  sta_config=None,
18 |                  ca_head=None,
19 |                  max_nottrack=20):
20 |         super(SOLO, self).__init__(backbone, neck, bbox_head, track_head, panoptic_head, train_cfg,
21 |                                    test_cfg, pretrained, sta_config=sta_config, ca_head=ca_head,
22 |                                    max_nottrack=max_nottrack)
23 | 


--------------------------------------------------------------------------------
/mmdet/models/detectors/solov2.py:
--------------------------------------------------------------------------------
 1 | from .single_stage_ins import SingleStageInsDetector
 2 | from ..registry import DETECTORS
 3 | 
 4 | 
 5 | @DETECTORS.register_module
 6 | class SOLOv2(SingleStageInsDetector):
 7 | 
 8 |     def __init__(self,
 9 |                  backbone,
10 |                  neck,
11 |                  bbox_head,
12 |                  mask_feat_head,
13 |                  train_cfg=None,
14 |                  test_cfg=None,
15 |                  pretrained=None):
16 |         super(SOLOv2, self).__init__(backbone, neck, bbox_head, mask_feat_head, train_cfg,
17 |                                    test_cfg, pretrained)
18 | 


--------------------------------------------------------------------------------
/mmdet/models/losses/__init__.py:
--------------------------------------------------------------------------------
 1 | from .accuracy import Accuracy, accuracy
 2 | from .balanced_l1_loss import BalancedL1Loss, balanced_l1_loss
 3 | from .cross_entropy_loss import (CrossEntropyLoss, binary_cross_entropy,
 4 |                                  cross_entropy, mask_cross_entropy)
 5 | from .focal_loss import FocalLoss, sigmoid_focal_loss
 6 | from .ghm_loss import GHMC, GHMR
 7 | from .iou_loss import (BoundedIoULoss, GIoULoss, IoULoss, bounded_iou_loss,
 8 |                        iou_loss)
 9 | from .mse_loss import MSELoss, mse_loss
10 | from .smooth_l1_loss import SmoothL1Loss, smooth_l1_loss
11 | from .utils import reduce_loss, weight_reduce_loss, weighted_loss
12 | 
13 | __all__ = [
14 |     'accuracy', 'Accuracy', 'cross_entropy', 'binary_cross_entropy',
15 |     'mask_cross_entropy', 'CrossEntropyLoss', 'sigmoid_focal_loss',
16 |     'FocalLoss', 'smooth_l1_loss', 'SmoothL1Loss', 'balanced_l1_loss',
17 |     'BalancedL1Loss', 'mse_loss', 'MSELoss', 'iou_loss', 'bounded_iou_loss',
18 |     'IoULoss', 'BoundedIoULoss', 'GIoULoss', 'GHMC', 'GHMR', 'reduce_loss',
19 |     'weight_reduce_loss', 'weighted_loss'
20 | ]
21 | 


--------------------------------------------------------------------------------
/mmdet/models/losses/accuracy.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | 
 3 | 
 4 | def accuracy(pred, target, topk=1):
 5 |     assert isinstance(topk, (int, tuple))
 6 |     if isinstance(topk, int):
 7 |         topk = (topk, )
 8 |         return_single = True
 9 |     else:
10 |         return_single = False
11 | 
12 |     maxk = max(topk)
13 |     _, pred_label = pred.topk(maxk, dim=1)
14 |     pred_label = pred_label.t()
15 |     correct = pred_label.eq(target.view(1, -1).expand_as(pred_label))
16 | 
17 |     res = []
18 |     for k in topk:
19 |         correct_k = correct[:k].view(-1).float().sum(0, keepdim=True)
20 |         res.append(correct_k.mul_(100.0 / pred.size(0)))
21 |     return res[0] if return_single else res
22 | 
23 | 
24 | class Accuracy(nn.Module):
25 | 
26 |     def __init__(self, topk=(1, )):
27 |         super().__init__()
28 |         self.topk = topk
29 | 
30 |     def forward(self, pred, target):
31 |         return accuracy(pred, target, self.topk)
32 | 


--------------------------------------------------------------------------------
/mmdet/models/losses/balanced_l1_loss.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | import torch.nn as nn
 4 | 
 5 | from ..registry import LOSSES
 6 | from .utils import weighted_loss
 7 | 
 8 | 
 9 | @weighted_loss
10 | def balanced_l1_loss(pred,
11 |                      target,
12 |                      beta=1.0,
13 |                      alpha=0.5,
14 |                      gamma=1.5,
15 |                      reduction='mean'):
16 |     assert beta > 0
17 |     assert pred.size() == target.size() and target.numel() > 0
18 | 
19 |     diff = torch.abs(pred - target)
20 |     b = np.e**(gamma / alpha) - 1
21 |     loss = torch.where(
22 |         diff < beta, alpha / b *
23 |         (b * diff + 1) * torch.log(b * diff / beta + 1) - alpha * diff,
24 |         gamma * diff + gamma / b - alpha * beta)
25 | 
26 |     return loss
27 | 
28 | 
29 | @LOSSES.register_module
30 | class BalancedL1Loss(nn.Module):
31 |     """Balanced L1 Loss
32 | 
33 |     arXiv: https://arxiv.org/pdf/1904.02701.pdf (CVPR 2019)
34 |     """
35 | 
36 |     def __init__(self,
37 |                  alpha=0.5,
38 |                  gamma=1.5,
39 |                  beta=1.0,
40 |                  reduction='mean',
41 |                  loss_weight=1.0):
42 |         super(BalancedL1Loss, self).__init__()
43 |         self.alpha = alpha
44 |         self.gamma = gamma
45 |         self.beta = beta
46 |         self.reduction = reduction
47 |         self.loss_weight = loss_weight
48 | 
49 |     def forward(self,
50 |                 pred,
51 |                 target,
52 |                 weight=None,
53 |                 avg_factor=None,
54 |                 reduction_override=None,
55 |                 **kwargs):
56 |         assert reduction_override in (None, 'none', 'mean', 'sum')
57 |         reduction = (
58 |             reduction_override if reduction_override else self.reduction)
59 |         loss_bbox = self.loss_weight * balanced_l1_loss(
60 |             pred,
61 |             target,
62 |             weight,
63 |             alpha=self.alpha,
64 |             gamma=self.gamma,
65 |             beta=self.beta,
66 |             reduction=reduction,
67 |             avg_factor=avg_factor,
68 |             **kwargs)
69 |         return loss_bbox
70 | 


--------------------------------------------------------------------------------
/mmdet/models/losses/focal_loss.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | import torch.nn.functional as F
 3 | 
 4 | from mmdet.ops import sigmoid_focal_loss as _sigmoid_focal_loss
 5 | from ..registry import LOSSES
 6 | from .utils import weight_reduce_loss
 7 | import torch
 8 | 
 9 | # This method is only for debugging
10 | def py_sigmoid_focal_loss(pred,
11 |                           target,
12 |                           weight=None,
13 |                           gamma=2.0,
14 |                           alpha=0.25,
15 |                           reduction='mean',
16 |                           avg_factor=None):
17 |     pred_sigmoid = pred.sigmoid()
18 |     target = target.type_as(pred)
19 |     pt = (1 - pred_sigmoid) * target + pred_sigmoid * (1 - target)
20 |     focal_weight = (alpha * target + (1 - alpha) *
21 |                     (1 - target)) * pt.pow(gamma)
22 |     loss = F.binary_cross_entropy_with_logits(
23 |         pred, target, reduction='none') * focal_weight
24 |     loss = weight_reduce_loss(loss, weight, reduction, avg_factor)
25 |     return loss
26 | 
27 | 
28 | def sigmoid_focal_loss(pred,
29 |                        target,
30 |                        weight=None,
31 |                        gamma=2.0,
32 |                        alpha=0.25,
33 |                        reduction='mean',
34 |                        avg_factor=None,
35 |                        cate_loss_weight=None):
36 |     # Function.apply does not accept keyword arguments, so the decorator
37 |     # "weighted_loss" is not applicable
38 |     loss = _sigmoid_focal_loss(pred, target, gamma, alpha)
39 |     if cate_loss_weight is None:
40 |         cate_loss_weight = [1.0] * loss.shape[1]
41 |     cate_loss_weight = torch.tensor(cate_loss_weight).unsqueeze(0).cuda()
42 |     loss = loss * cate_loss_weight
43 | 
44 |     # TODO: find a proper way to handle the shape of weight
45 |     if weight is not None:
46 |         weight = weight.view(-1, 1)
47 |     loss = weight_reduce_loss(loss, weight, reduction, avg_factor)
48 |     return loss
49 | 
50 | 
51 | @LOSSES.register_module
52 | class FocalLoss(nn.Module):
53 | 
54 |     def __init__(self,
55 |                  use_sigmoid=True,
56 |                  gamma=2.0,
57 |                  alpha=0.25,
58 |                  reduction='mean',
59 |                  loss_weight=1.0,
60 |                  cate_loss_weight=None):
61 |         super(FocalLoss, self).__init__()
62 |         assert use_sigmoid is True, 'Only sigmoid focal loss supported now.'
63 |         self.use_sigmoid = use_sigmoid
64 |         self.gamma = gamma
65 |         self.alpha = alpha
66 |         self.reduction = reduction
67 |         self.loss_weight = loss_weight
68 |         self.cate_loss_weight = cate_loss_weight
69 | 
70 |     def forward(self,
71 |                 pred,
72 |                 target,
73 |                 weight=None,
74 |                 avg_factor=None,
75 |                 reduction_override=None):
76 |         assert reduction_override in (None, 'none', 'mean', 'sum')
77 |         reduction = (
78 |             reduction_override if reduction_override else self.reduction)
79 |         if self.use_sigmoid:
80 |             loss_cls = self.loss_weight * sigmoid_focal_loss(
81 |                 pred,
82 |                 target,
83 |                 weight,
84 |                 gamma=self.gamma,
85 |                 alpha=self.alpha,
86 |                 reduction=reduction,
87 |                 avg_factor=avg_factor,
88 |                 cate_loss_weight=self.cate_loss_weight)
89 |         else:
90 |             raise NotImplementedError
91 |         return loss_cls
92 | 


--------------------------------------------------------------------------------
/mmdet/models/losses/mse_loss.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | import torch.nn.functional as F
 3 | 
 4 | from ..registry import LOSSES
 5 | from .utils import weighted_loss
 6 | 
 7 | mse_loss = weighted_loss(F.mse_loss)
 8 | 
 9 | 
10 | @LOSSES.register_module
11 | class MSELoss(nn.Module):
12 | 
13 |     def __init__(self, reduction='mean', loss_weight=1.0):
14 |         super().__init__()
15 |         self.reduction = reduction
16 |         self.loss_weight = loss_weight
17 | 
18 |     def forward(self, pred, target, weight=None, avg_factor=None):
19 |         loss = self.loss_weight * mse_loss(
20 |             pred,
21 |             target,
22 |             weight,
23 |             reduction=self.reduction,
24 |             avg_factor=avg_factor)
25 |         return loss
26 | 


--------------------------------------------------------------------------------
/mmdet/models/losses/smooth_l1_loss.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | from ..registry import LOSSES
 5 | from .utils import weighted_loss
 6 | 
 7 | 
 8 | @weighted_loss
 9 | def smooth_l1_loss(pred, target, beta=1.0):
10 |     assert beta > 0
11 |     assert pred.size() == target.size() and target.numel() > 0
12 |     diff = torch.abs(pred - target)
13 |     loss = torch.where(diff < beta, 0.5 * diff * diff / beta,
14 |                        diff - 0.5 * beta)
15 |     return loss
16 | 
17 | 
18 | @LOSSES.register_module
19 | class SmoothL1Loss(nn.Module):
20 | 
21 |     def __init__(self, beta=1.0, reduction='mean', loss_weight=1.0):
22 |         super(SmoothL1Loss, self).__init__()
23 |         self.beta = beta
24 |         self.reduction = reduction
25 |         self.loss_weight = loss_weight
26 | 
27 |     def forward(self,
28 |                 pred,
29 |                 target,
30 |                 weight=None,
31 |                 avg_factor=None,
32 |                 reduction_override=None,
33 |                 **kwargs):
34 |         assert reduction_override in (None, 'none', 'mean', 'sum')
35 |         reduction = (
36 |             reduction_override if reduction_override else self.reduction)
37 |         loss_bbox = self.loss_weight * smooth_l1_loss(
38 |             pred,
39 |             target,
40 |             weight,
41 |             beta=self.beta,
42 |             reduction=reduction,
43 |             avg_factor=avg_factor,
44 |             **kwargs)
45 |         return loss_bbox
46 | 


--------------------------------------------------------------------------------
/mmdet/models/losses/utils.py:
--------------------------------------------------------------------------------
 1 | import functools
 2 | 
 3 | import torch.nn.functional as F
 4 | 
 5 | 
 6 | def reduce_loss(loss, reduction):
 7 |     """Reduce loss as specified.
 8 | 
 9 |     Args:
10 |         loss (Tensor): Elementwise loss tensor.
11 |         reduction (str): Options are "none", "mean" and "sum".
12 | 
13 |     Return:
14 |         Tensor: Reduced loss tensor.
15 |     """
16 |     reduction_enum = F._Reduction.get_enum(reduction)
17 |     # none: 0, elementwise_mean:1, sum: 2
18 |     if reduction_enum == 0:
19 |         return loss
20 |     elif reduction_enum == 1:
21 |         return loss.mean()
22 |     elif reduction_enum == 2:
23 |         return loss.sum()
24 | 
25 | 
26 | def weight_reduce_loss(loss, weight=None, reduction='mean', avg_factor=None):
27 |     """Apply element-wise weight and reduce loss.
28 | 
29 |     Args:
30 |         loss (Tensor): Element-wise loss.
31 |         weight (Tensor): Element-wise weights.
32 |         reduction (str): Same as built-in losses of PyTorch.
33 |         avg_factor (float): Avarage factor when computing the mean of losses.
34 | 
35 |     Returns:
36 |         Tensor: Processed loss values.
37 |     """
38 |     # if weight is specified, apply element-wise weight
39 |     if weight is not None:
40 |         loss = loss * weight
41 | 
42 |     # if avg_factor is not specified, just reduce the loss
43 |     if avg_factor is None:
44 |         loss = reduce_loss(loss, reduction)
45 |     else:
46 |         # if reduction is mean, then average the loss by avg_factor
47 |         if reduction == 'mean':
48 |             loss = loss.sum() / avg_factor
49 |         # if reduction is 'none', then do nothing, otherwise raise an error
50 |         elif reduction != 'none':
51 |             raise ValueError('avg_factor can not be used with reduction="sum"')
52 |     return loss
53 | 
54 | 
55 | def weighted_loss(loss_func):
56 |     """Create a weighted version of a given loss function.
57 | 
58 |     To use this decorator, the loss function must have the signature like
59 |     `loss_func(pred, target, **kwargs)`. The function only needs to compute
60 |     element-wise loss without any reduction. This decorator will add weight
61 |     and reduction arguments to the function. The decorated function will have
62 |     the signature like `loss_func(pred, target, weight=None, reduction='mean',
63 |     avg_factor=None, **kwargs)`.
64 | 
65 |     :Example:
66 | 
67 |     >>> import torch
68 |     >>> @weighted_loss
69 |     >>> def l1_loss(pred, target):
70 |     >>>     return (pred - target).abs()
71 | 
72 |     >>> pred = torch.Tensor([0, 2, 3])
73 |     >>> target = torch.Tensor([1, 1, 1])
74 |     >>> weight = torch.Tensor([1, 0, 1])
75 | 
76 |     >>> l1_loss(pred, target)
77 |     tensor(1.3333)
78 |     >>> l1_loss(pred, target, weight)
79 |     tensor(1.)
80 |     >>> l1_loss(pred, target, reduction='none')
81 |     tensor([1., 1., 2.])
82 |     >>> l1_loss(pred, target, weight, avg_factor=2)
83 |     tensor(1.5000)
84 |     """
85 | 
86 |     @functools.wraps(loss_func)
87 |     def wrapper(pred,
88 |                 target,
89 |                 weight=None,
90 |                 reduction='mean',
91 |                 avg_factor=None,
92 |                 **kwargs):
93 |         # get element-wise loss
94 |         loss = loss_func(pred, target, **kwargs)
95 |         loss = weight_reduce_loss(loss, weight, reduction, avg_factor)
96 |         return loss
97 | 
98 |     return wrapper
99 | 


--------------------------------------------------------------------------------
/mmdet/models/mask_heads/__init__.py:
--------------------------------------------------------------------------------
 1 | from .fcn_mask_head import FCNMaskHead
 2 | from .fused_semantic_head import FusedSemanticHead
 3 | from .grid_head import GridHead
 4 | from .htc_mask_head import HTCMaskHead
 5 | from .maskiou_head import MaskIoUHead
 6 | from .mask_feat_head import MaskFeatHead
 7 | 
 8 | __all__ = [
 9 |     'FCNMaskHead', 'HTCMaskHead', 'FusedSemanticHead', 'GridHead',
10 |     'MaskIoUHead', 'MaskFeatHead'
11 | ]
12 | 


--------------------------------------------------------------------------------
/mmdet/models/mask_heads/htc_mask_head.py:
--------------------------------------------------------------------------------
 1 | from ..registry import HEADS
 2 | from ..utils import ConvModule
 3 | from .fcn_mask_head import FCNMaskHead
 4 | 
 5 | 
 6 | @HEADS.register_module
 7 | class HTCMaskHead(FCNMaskHead):
 8 | 
 9 |     def __init__(self, *args, **kwargs):
10 |         super(HTCMaskHead, self).__init__(*args, **kwargs)
11 |         self.conv_res = ConvModule(
12 |             self.conv_out_channels,
13 |             self.conv_out_channels,
14 |             1,
15 |             conv_cfg=self.conv_cfg,
16 |             norm_cfg=self.norm_cfg)
17 | 
18 |     def init_weights(self):
19 |         super(HTCMaskHead, self).init_weights()
20 |         self.conv_res.init_weights()
21 | 
22 |     def forward(self, x, res_feat=None, return_logits=True, return_feat=True):
23 |         if res_feat is not None:
24 |             res_feat = self.conv_res(res_feat)
25 |             x = x + res_feat
26 |         for conv in self.convs:
27 |             x = conv(x)
28 |         res_feat = x
29 |         outs = []
30 |         if return_logits:
31 |             x = self.upsample(x)
32 |             if self.upsample_method == 'deconv':
33 |                 x = self.relu(x)
34 |             mask_pred = self.conv_logits(x)
35 |             outs.append(mask_pred)
36 |         if return_feat:
37 |             outs.append(res_feat)
38 |         return outs if len(outs) > 1 else outs[0]
39 | 


--------------------------------------------------------------------------------
/mmdet/models/necks/__init__.py:
--------------------------------------------------------------------------------
1 | from .bfp import BFP
2 | from .fpn import FPN
3 | from .hrfpn import HRFPN
4 | from .nas_fpn import NASFPN
5 | from .fpn_flo_warp import FPNFlowWarp
6 | 
7 | __all__ = ['FPN', 'BFP', 'HRFPN', 'NASFPN', 'FPNFlowWarp']
8 | 


--------------------------------------------------------------------------------
/mmdet/models/necks/fpn_flo_warp.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | import torch.nn.functional as F
 3 | from mmcv.cnn import xavier_init
 4 | import torch
 5 | import numpy as np
 6 | import copy
 7 | 
 8 | from mmdet.core import auto_fp16
 9 | from ..registry import NECKS
10 | from ..utils import ConvModule
11 | from .fpn import FPN
12 | 
13 | @NECKS.register_module
14 | class FPNFlowWarp(FPN):
15 | 
16 |     def __init__(self, **kwargs):
17 |         super(FPNFlowWarp, self).__init__(**kwargs)
18 | 
19 |     @auto_fp16()
20 |     def forward(self, inputs, flow):
21 |         outs = super().forward(inputs)
22 | 
23 |         # Get even and odd indices
24 |         odd_indices = torch.range(0, outs[0].shape[0]-1) % 2 != 0
25 |         even_indices = torch.range(0, outs[0].shape[0]-1) % 2 == 0
26 | 
27 |         feat_shape = outs[0][odd_indices].shape[-2:]
28 | 
29 |         identity_grid = np.meshgrid(np.linspace(-1, 1, feat_shape[1]),
30 |                                     np.linspace(-1, 1, feat_shape[0]))
31 |         identity_grid = torch.tensor(identity_grid).float().cuda()
32 |         identity_grid = identity_grid.permute(1,2,0).unsqueeze(0)
33 | 
34 |         warped_outs = []
35 |         if flow is not None:
36 |             for level in range(len(outs)):
37 |                 # Warp Odd indices , Previous Frame Features
38 |                 original_feat_shape = outs[level][odd_indices].shape[-2:]
39 |                 warping_flow = F.interpolate(flow[odd_indices],
40 |                                              feat_shape,
41 |                                              mode='bilinear', align_corners=True)
42 |                 warping_flow = warping_flow.permute(0, 2, 3, 1)
43 |                 warping_flow_normalize = copy.deepcopy(warping_flow)
44 |                 warping_flow_normalize[:, :, :, 0] = warping_flow[:, :, :, 0] / feat_shape[1]
45 |                 warping_flow_normalize[:, :, :, 1] = warping_flow[:, :, :, 1] / feat_shape[0]
46 | 
47 | 
48 |                 feats = F.interpolate(outs[level][odd_indices], feat_shape,
49 |                                       mode='bilinear', align_corners=True)
50 | 
51 |                 warped_feats = F.grid_sample(
52 |                     feats, identity_grid - warping_flow_normalize
53 |                 )
54 | 
55 |                 warped_feats = F.interpolate(warped_feats, original_feat_shape,
56 |                                                  mode='bilinear', align_corners=True)
57 | 
58 |                 # Construct final feats from warped and even indices features (current ones) as is
59 |                 final_warped_feats = torch.zeros(outs[level].shape).cuda()
60 |                 final_warped_feats[odd_indices] = warped_feats
61 |                 final_warped_feats[even_indices] = outs[level][even_indices]
62 |                 warped_outs.append(final_warped_feats)
63 |         else:
64 |             warped_outs = outs
65 | 
66 |         return tuple(warped_outs)
67 | 


--------------------------------------------------------------------------------
/mmdet/models/plugins/__init__.py:
--------------------------------------------------------------------------------
1 | from .generalized_attention import GeneralizedAttention
2 | from .non_local import NonLocal2D
3 | 
4 | __all__ = ['NonLocal2D', 'GeneralizedAttention']
5 | 


--------------------------------------------------------------------------------
/mmdet/models/registry.py:
--------------------------------------------------------------------------------
 1 | from mmdet.utils import Registry
 2 | 
 3 | BACKBONES = Registry('backbone')
 4 | NECKS = Registry('neck')
 5 | ROI_EXTRACTORS = Registry('roi_extractor')
 6 | SHARED_HEADS = Registry('shared_head')
 7 | HEADS = Registry('head')
 8 | LOSSES = Registry('loss')
 9 | DETECTORS = Registry('detector')
10 | 


--------------------------------------------------------------------------------
/mmdet/models/roi_extractors/__init__.py:
--------------------------------------------------------------------------------
1 | from .single_level import SingleRoIExtractor
2 | 
3 | __all__ = ['SingleRoIExtractor']
4 | 


--------------------------------------------------------------------------------
/mmdet/models/shared_heads/__init__.py:
--------------------------------------------------------------------------------
1 | from .res_layer import ResLayer
2 | 
3 | __all__ = ['ResLayer']
4 | 


--------------------------------------------------------------------------------
/mmdet/models/shared_heads/res_layer.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | from mmcv.cnn import constant_init, kaiming_init
 3 | from mmcv.runner import load_checkpoint
 4 | 
 5 | from mmdet.core import auto_fp16
 6 | from mmdet.utils import get_root_logger
 7 | from ..backbones import ResNet, make_res_layer
 8 | from ..registry import SHARED_HEADS
 9 | 
10 | 
11 | @SHARED_HEADS.register_module
12 | class ResLayer(nn.Module):
13 | 
14 |     def __init__(self,
15 |                  depth,
16 |                  stage=3,
17 |                  stride=2,
18 |                  dilation=1,
19 |                  style='pytorch',
20 |                  norm_cfg=dict(type='BN', requires_grad=True),
21 |                  norm_eval=True,
22 |                  with_cp=False,
23 |                  dcn=None):
24 |         super(ResLayer, self).__init__()
25 |         self.norm_eval = norm_eval
26 |         self.norm_cfg = norm_cfg
27 |         self.stage = stage
28 |         self.fp16_enabled = False
29 |         block, stage_blocks = ResNet.arch_settings[depth]
30 |         stage_block = stage_blocks[stage]
31 |         planes = 64 * 2**stage
32 |         inplanes = 64 * 2**(stage - 1) * block.expansion
33 | 
34 |         res_layer = make_res_layer(
35 |             block,
36 |             inplanes,
37 |             planes,
38 |             stage_block,
39 |             stride=stride,
40 |             dilation=dilation,
41 |             style=style,
42 |             with_cp=with_cp,
43 |             norm_cfg=self.norm_cfg,
44 |             dcn=dcn)
45 |         self.add_module('layer{}'.format(stage + 1), res_layer)
46 | 
47 |     def init_weights(self, pretrained=None):
48 |         if isinstance(pretrained, str):
49 |             logger = get_root_logger()
50 |             load_checkpoint(self, pretrained, strict=False, logger=logger)
51 |         elif pretrained is None:
52 |             for m in self.modules():
53 |                 if isinstance(m, nn.Conv2d):
54 |                     kaiming_init(m)
55 |                 elif isinstance(m, nn.BatchNorm2d):
56 |                     constant_init(m, 1)
57 |         else:
58 |             raise TypeError('pretrained must be a str or None')
59 | 
60 |     @auto_fp16()
61 |     def forward(self, x):
62 |         res_layer = getattr(self, 'layer{}'.format(self.stage + 1))
63 |         out = res_layer(x)
64 |         return out
65 | 
66 |     def train(self, mode=True):
67 |         super(ResLayer, self).train(mode)
68 |         if self.norm_eval:
69 |             for m in self.modules():
70 |                 if isinstance(m, nn.BatchNorm2d):
71 |                     m.eval()
72 | 


--------------------------------------------------------------------------------
/mmdet/models/track_heads/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | from .track_head import TrackHead
3 | 
4 | __all__ = ['TrackHead']
5 | 


--------------------------------------------------------------------------------
/mmdet/models/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | from .conv_module import ConvModule, build_conv_layer
 2 | from .conv_ws import ConvWS2d, conv_ws_2d
 3 | from .norm import build_norm_layer
 4 | from .scale import Scale
 5 | from .weight_init import (bias_init_with_prob, kaiming_init, normal_init,
 6 |                           uniform_init, xavier_init)
 7 | from .fpn_utils import merge_fpn
 8 | 
 9 | __all__ = [
10 |     'conv_ws_2d', 'ConvWS2d', 'build_conv_layer', 'ConvModule',
11 |     'build_norm_layer', 'xavier_init', 'normal_init', 'uniform_init',
12 |     'kaiming_init', 'bias_init_with_prob', 'Scale', 'mrege_fpn'
13 | ]
14 | 


--------------------------------------------------------------------------------
/mmdet/models/utils/conv_ws.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | import torch.nn.functional as F
 3 | 
 4 | 
 5 | def conv_ws_2d(input,
 6 |                weight,
 7 |                bias=None,
 8 |                stride=1,
 9 |                padding=0,
10 |                dilation=1,
11 |                groups=1,
12 |                eps=1e-5):
13 |     c_in = weight.size(0)
14 |     weight_flat = weight.view(c_in, -1)
15 |     mean = weight_flat.mean(dim=1, keepdim=True).view(c_in, 1, 1, 1)
16 |     std = weight_flat.std(dim=1, keepdim=True).view(c_in, 1, 1, 1)
17 |     weight = (weight - mean) / (std + eps)
18 |     return F.conv2d(input, weight, bias, stride, padding, dilation, groups)
19 | 
20 | 
21 | class ConvWS2d(nn.Conv2d):
22 | 
23 |     def __init__(self,
24 |                  in_channels,
25 |                  out_channels,
26 |                  kernel_size,
27 |                  stride=1,
28 |                  padding=0,
29 |                  dilation=1,
30 |                  groups=1,
31 |                  bias=True,
32 |                  eps=1e-5):
33 |         super(ConvWS2d, self).__init__(
34 |             in_channels,
35 |             out_channels,
36 |             kernel_size,
37 |             stride=stride,
38 |             padding=padding,
39 |             dilation=dilation,
40 |             groups=groups,
41 |             bias=bias)
42 |         self.eps = eps
43 | 
44 |     def forward(self, x):
45 |         return conv_ws_2d(x, self.weight, self.bias, self.stride, self.padding,
46 |                           self.dilation, self.groups, self.eps)
47 | 


--------------------------------------------------------------------------------
/mmdet/models/utils/fpn_utils.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn.functional as F
 3 | 
 4 | def merge_fpn(x, average=True):
 5 |     max_size = x[0].shape
 6 |     merged_fpn = []
 7 |     for i, _ in enumerate(x):
 8 |         merged_fpn.append(F.interpolate(x[i], max_size[-2:]))
 9 |     if average:
10 |         return torch.stack(merged_fpn).mean(dim=0)
11 |     else:
12 |         concat = torch.stack(merged_fpn)
13 |         return concat.permute(1,0,2,3,4).reshape(concat.shape[1], -1, *concat.shape[-2:])
14 | 
15 | 


--------------------------------------------------------------------------------
/mmdet/models/utils/functional.py:
--------------------------------------------------------------------------------
 1 | from . import functions
 2 | 
 3 | 
 4 | def aggregation(input, weight, kernel_size=3, stride=1, padding=0, dilation=1, pad_mode=1):
 5 |     assert input.shape[0] == weight.shape[0] and (input.shape[1] % weight.shape[1] == 0) and pad_mode in [0, 1]
 6 |     if input.is_cuda:
 7 |         if pad_mode == 0:
 8 |             out = functions.aggregation_zeropad(input, weight, kernel_size, stride, padding, dilation)
 9 |         elif pad_mode == 1:
10 |             out = functions.aggregation_refpad(input, weight, kernel_size, stride, padding, dilation)
11 |     else:
12 |         raise NotImplementedError
13 |     return out
14 | 


--------------------------------------------------------------------------------
/mmdet/models/utils/functions/__init__.py:
--------------------------------------------------------------------------------
1 | from .aggregation_zeropad import *
2 | from .aggregation_refpad import *
3 | from .utils import *
4 | 


--------------------------------------------------------------------------------
/mmdet/models/utils/functions/utils.py:
--------------------------------------------------------------------------------
 1 | from collections import namedtuple
 2 | from string import Template
 3 | import cupy
 4 | import torch
 5 | 
 6 | 
 7 | Stream = namedtuple('Stream', ['ptr'])
 8 | 
 9 | 
10 | def Dtype(t):
11 |     if isinstance(t, torch.cuda.FloatTensor):
12 |         return 'float'
13 |     elif isinstance(t, torch.cuda.DoubleTensor):
14 |         return 'double'
15 | 
16 | 
17 | @cupy.util.memoize(for_each_device=True)
18 | def load_kernel(kernel_name, code, **kwargs):
19 |     code = Template(code).substitute(**kwargs)
20 |     kernel_code = cupy.cuda.compile_with_cache(code)
21 |     return kernel_code.get_function(kernel_name)
22 | 


--------------------------------------------------------------------------------
/mmdet/models/utils/modules/__init__.py:
--------------------------------------------------------------------------------
1 | from .aggregation import *
2 | 


--------------------------------------------------------------------------------
/mmdet/models/utils/modules/aggregation.py:
--------------------------------------------------------------------------------
 1 | from torch import nn
 2 | from torch.nn.modules.utils import _pair
 3 | 
 4 | from .. import functional as F
 5 | 
 6 | 
 7 | class Aggregation(nn.Module):
 8 | 
 9 |     def __init__(self, kernel_size, stride, padding, dilation, pad_mode):
10 |         super(Aggregation, self).__init__()
11 |         self.kernel_size = _pair(kernel_size)
12 |         self.stride = _pair(stride)
13 |         self.padding = _pair(padding)
14 |         self.dilation = _pair(dilation)
15 |         self.pad_mode = pad_mode
16 | 
17 |     def forward(self, input, weight):
18 |         return F.aggregation(input, weight, self.kernel_size, self.stride, self.padding, self.dilation, self.pad_mode)
19 | 


--------------------------------------------------------------------------------
/mmdet/models/utils/norm.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | 
 3 | norm_cfg = {
 4 |     # format: layer_type: (abbreviation, module)
 5 |     'BN': ('bn', nn.BatchNorm2d),
 6 |     'SyncBN': ('bn', nn.SyncBatchNorm),
 7 |     'GN': ('gn', nn.GroupNorm),
 8 |     # and potentially 'SN'
 9 | }
10 | 
11 | 
12 | def build_norm_layer(cfg, num_features, postfix=''):
13 |     """ Build normalization layer
14 | 
15 |     Args:
16 |         cfg (dict): cfg should contain:
17 |             type (str): identify norm layer type.
18 |             layer args: args needed to instantiate a norm layer.
19 |             requires_grad (bool): [optional] whether stop gradient updates
20 |         num_features (int): number of channels from input.
21 |         postfix (int, str): appended into norm abbreviation to
22 |             create named layer.
23 | 
24 |     Returns:
25 |         name (str): abbreviation + postfix
26 |         layer (nn.Module): created norm layer
27 |     """
28 |     assert isinstance(cfg, dict) and 'type' in cfg
29 |     cfg_ = cfg.copy()
30 | 
31 |     layer_type = cfg_.pop('type')
32 |     if layer_type not in norm_cfg:
33 |         raise KeyError('Unrecognized norm type {}'.format(layer_type))
34 |     else:
35 |         abbr, norm_layer = norm_cfg[layer_type]
36 |         if norm_layer is None:
37 |             raise NotImplementedError
38 | 
39 |     assert isinstance(postfix, (int, str))
40 |     name = abbr + str(postfix)
41 | 
42 |     requires_grad = cfg_.pop('requires_grad', True)
43 |     cfg_.setdefault('eps', 1e-5)
44 |     if layer_type != 'GN':
45 |         layer = norm_layer(num_features, **cfg_)
46 |         if layer_type == 'SyncBN':
47 |             layer._specify_ddp_gpu_num(1)
48 |     else:
49 |         assert 'num_groups' in cfg_
50 |         layer = norm_layer(num_channels=num_features, **cfg_)
51 | 
52 |     for param in layer.parameters():
53 |         param.requires_grad = requires_grad
54 | 
55 |     return name, layer
56 | 


--------------------------------------------------------------------------------
/mmdet/models/utils/scale.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | 
 5 | class Scale(nn.Module):
 6 |     """
 7 |     A learnable scale parameter
 8 |     """
 9 | 
10 |     def __init__(self, scale=1.0):
11 |         super(Scale, self).__init__()
12 |         self.scale = nn.Parameter(torch.tensor(scale, dtype=torch.float))
13 | 
14 |     def forward(self, x):
15 |         return x * self.scale
16 | 


--------------------------------------------------------------------------------
/mmdet/models/utils/sta_module.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | from .modules import Aggregation
 6 | 
 7 | def conv1x1(in_planes, out_planes, stride=1):
 8 |     return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)
 9 | 
10 | 
11 | def position(H, W, is_cuda=True):
12 |     if is_cuda:
13 |         loc_w = torch.linspace(-1.0, 1.0, W).cuda().unsqueeze(0).repeat(H, 1)
14 |         loc_h = torch.linspace(-1.0, 1.0, H).cuda().unsqueeze(1).repeat(1, W)
15 |     else:
16 |         loc_w = torch.linspace(-1.0, 1.0, W).unsqueeze(0).repeat(H, 1)
17 |         loc_h = torch.linspace(-1.0, 1.0, H).unsqueeze(1).repeat(1, W)
18 |     loc = torch.cat([loc_w.unsqueeze(0), loc_h.unsqueeze(0)], 0).unsqueeze(0)
19 |     return loc
20 | 
21 | 
22 | class SAM(nn.Module):
23 |     def __init__(self, sa_type, in_planes, rel_planes, out_planes, share_planes, kernel_size=3, stride=1, dilation=1):
24 |         super(SAM, self).__init__()
25 |         self.sa_type, self.kernel_size, self.stride = sa_type, kernel_size, stride
26 |         self.conv1 = nn.Conv2d(in_planes, rel_planes, kernel_size=1)
27 |         self.conv2 = nn.Conv2d(in_planes, rel_planes, kernel_size=1)
28 |         self.conv3 = nn.Conv2d(in_planes, out_planes, kernel_size=1)
29 | 
30 |         self.conv_w = nn.Sequential(nn.BatchNorm2d(rel_planes * (pow(kernel_size, 2) + 1)), nn.ReLU(inplace=True),
31 |                                     nn.Conv2d(rel_planes * (pow(kernel_size, 2) + 1), out_planes // share_planes, kernel_size=1, bias=False),
32 |                                     nn.BatchNorm2d(out_planes // share_planes), nn.ReLU(inplace=True),
33 |                                     nn.Conv2d(out_planes // share_planes, pow(kernel_size, 2) * out_planes // share_planes, kernel_size=1))
34 |         self.unfold_j = nn.Unfold(kernel_size=kernel_size, dilation=dilation, padding=0, stride=stride)
35 |         self.pad = nn.ReflectionPad2d(kernel_size // 2)
36 | 
37 |         assert self.stride == 1, 'stride > 1 not implemented'
38 |         self.aggregation = Aggregation(kernel_size, stride, (dilation * (kernel_size - 1) + 1) // 2, dilation, pad_mode=1)
39 | 
40 |     def forward(self, x_ref, x_current):
41 | 
42 |         x1, x2, x3 = self.conv1(x_current), self.conv2(x_ref), self.conv3(x_current)
43 |         x1 = x1.view(x_ref.shape[0], -1, 1, x_ref.shape[2]*x_ref.shape[3])
44 |         x2 = self.unfold_j(self.pad(x2)).view(x_ref.shape[0], -1, 1, x1.shape[-1])
45 |         # Refer to equation 5, R(i): 7x7, delta: concatenation, gamma: conv_w,
46 |         w = self.conv_w(torch.cat([x1, x2], 1)).view(x_ref.shape[0], -1, pow(self.kernel_size, 2), x1.shape[-1])
47 |         x = self.aggregation(x3, w)
48 |         return x
49 | 
50 | 
51 | class STABottleneck(nn.Module):
52 |     def __init__(self, sa_type, in_planes, rel_planes, out_planes, share_planes=8, kernel_size=3, stride=1):
53 |         super(STABottleneck, self).__init__()
54 |         self.sam = SAM(sa_type=sa_type, in_planes=in_planes, rel_planes=rel_planes,
55 |                        out_planes=out_planes, share_planes=share_planes, kernel_size=kernel_size,
56 |                        stride=stride)
57 |         self.stride = stride
58 | 
59 |     def forward_single(self, x_ref, x_current):
60 |        out = self.sam(x_ref, x_current)
61 |        return out
62 | 
63 |     def forward(self, x_ref, x_current):
64 |         out = []
65 |         for xr, xc in zip(x_ref, x_current):
66 |             out.append(self.forward_single(xr, xc))
67 |         return out
68 | 
69 | 
70 | 


--------------------------------------------------------------------------------
/mmdet/models/utils/weight_init.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch.nn as nn
 3 | 
 4 | 
 5 | def xavier_init(module, gain=1, bias=0, distribution='normal'):
 6 |     assert distribution in ['uniform', 'normal']
 7 |     if distribution == 'uniform':
 8 |         nn.init.xavier_uniform_(module.weight, gain=gain)
 9 |     else:
10 |         nn.init.xavier_normal_(module.weight, gain=gain)
11 |     if hasattr(module, 'bias'):
12 |         nn.init.constant_(module.bias, bias)
13 | 
14 | 
15 | def normal_init(module, mean=0, std=1, bias=0):
16 |     nn.init.normal_(module.weight, mean, std)
17 |     if hasattr(module, 'bias'):
18 |         nn.init.constant_(module.bias, bias)
19 | 
20 | 
21 | def uniform_init(module, a=0, b=1, bias=0):
22 |     nn.init.uniform_(module.weight, a, b)
23 |     if hasattr(module, 'bias'):
24 |         nn.init.constant_(module.bias, bias)
25 | 
26 | 
27 | def kaiming_init(module,
28 |                  mode='fan_out',
29 |                  nonlinearity='relu',
30 |                  bias=0,
31 |                  distribution='normal'):
32 |     assert distribution in ['uniform', 'normal']
33 |     if distribution == 'uniform':
34 |         nn.init.kaiming_uniform_(
35 |             module.weight, mode=mode, nonlinearity=nonlinearity)
36 |     else:
37 |         nn.init.kaiming_normal_(
38 |             module.weight, mode=mode, nonlinearity=nonlinearity)
39 |     if hasattr(module, 'bias'):
40 |         nn.init.constant_(module.bias, bias)
41 | 
42 | 
43 | def bias_init_with_prob(prior_prob):
44 |     """ initialize conv/fc bias value according to giving probablity"""
45 |     bias_init = float(-np.log((1 - prior_prob) / prior_prob))
46 |     return bias_init
47 | 


--------------------------------------------------------------------------------
/mmdet/ops/__init__.py:
--------------------------------------------------------------------------------
 1 | from .context_block import ContextBlock
 2 | from .dcn import (DeformConv, DeformConvPack, DeformRoIPooling,
 3 |                   DeformRoIPoolingPack, ModulatedDeformConv,
 4 |                   ModulatedDeformConvPack, ModulatedDeformRoIPoolingPack,
 5 |                   deform_conv, deform_roi_pooling, modulated_deform_conv)
 6 | from .masked_conv import MaskedConv2d
 7 | from .nms import nms, soft_nms
 8 | from .roi_align import RoIAlign, roi_align
 9 | from .roi_pool import RoIPool, roi_pool
10 | from .sigmoid_focal_loss import SigmoidFocalLoss, sigmoid_focal_loss
11 | from .utils import get_compiler_version, get_compiling_cuda_version
12 | 
13 | __all__ = [
14 |     'nms', 'soft_nms', 'RoIAlign', 'roi_align', 'RoIPool', 'roi_pool',
15 |     'DeformConv', 'DeformConvPack', 'DeformRoIPooling', 'DeformRoIPoolingPack',
16 |     'ModulatedDeformRoIPoolingPack', 'ModulatedDeformConv',
17 |     'ModulatedDeformConvPack', 'deform_conv', 'modulated_deform_conv',
18 |     'deform_roi_pooling', 'SigmoidFocalLoss', 'sigmoid_focal_loss',
19 |     'MaskedConv2d', 'ContextBlock', 'get_compiler_version',
20 |     'get_compiling_cuda_version'
21 | ]
22 | 


--------------------------------------------------------------------------------
/mmdet/ops/dcn/__init__.py:
--------------------------------------------------------------------------------
 1 | from .deform_conv import (DeformConv, DeformConvPack, ModulatedDeformConv,
 2 |                           ModulatedDeformConvPack, deform_conv,
 3 |                           modulated_deform_conv)
 4 | from .deform_pool import (DeformRoIPooling, DeformRoIPoolingPack,
 5 |                           ModulatedDeformRoIPoolingPack, deform_roi_pooling)
 6 | 
 7 | __all__ = [
 8 |     'DeformConv', 'DeformConvPack', 'ModulatedDeformConv',
 9 |     'ModulatedDeformConvPack', 'DeformRoIPooling', 'DeformRoIPoolingPack',
10 |     'ModulatedDeformRoIPoolingPack', 'deform_conv', 'modulated_deform_conv',
11 |     'deform_roi_pooling'
12 | ]
13 | 


--------------------------------------------------------------------------------
/mmdet/ops/masked_conv/__init__.py:
--------------------------------------------------------------------------------
1 | from .masked_conv import MaskedConv2d, masked_conv2d
2 | 
3 | __all__ = ['masked_conv2d', 'MaskedConv2d']
4 | 


--------------------------------------------------------------------------------
/mmdet/ops/masked_conv/src/masked_conv2d_cuda.cpp:
--------------------------------------------------------------------------------
 1 | #include <torch/extension.h>
 2 | 
 3 | #include <cmath>
 4 | #include <vector>
 5 | 
 6 | int MaskedIm2colForwardLaucher(const at::Tensor im, const int height,
 7 |                                const int width, const int channels,
 8 |                                const int kernel_h, const int kernel_w,
 9 |                                const int pad_h, const int pad_w,
10 |                                const at::Tensor mask_h_idx,
11 |                                const at::Tensor mask_w_idx, const int mask_cnt,
12 |                                at::Tensor col);
13 | 
14 | int MaskedCol2imForwardLaucher(const at::Tensor col, const int height,
15 |                                const int width, const int channels,
16 |                                const at::Tensor mask_h_idx,
17 |                                const at::Tensor mask_w_idx, const int mask_cnt,
18 |                                at::Tensor im);
19 | 
20 | #define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ")
21 | #define CHECK_CONTIGUOUS(x) \
22 |   AT_CHECK(x.is_contiguous(), #x, " must be contiguous ")
23 | #define CHECK_INPUT(x) \
24 |   CHECK_CUDA(x);       \
25 |   CHECK_CONTIGUOUS(x)
26 | 
27 | int masked_im2col_forward_cuda(const at::Tensor im, const at::Tensor mask_h_idx,
28 |                                const at::Tensor mask_w_idx, const int kernel_h,
29 |                                const int kernel_w, const int pad_h,
30 |                                const int pad_w, at::Tensor col) {
31 |   CHECK_INPUT(im);
32 |   CHECK_INPUT(mask_h_idx);
33 |   CHECK_INPUT(mask_w_idx);
34 |   CHECK_INPUT(col);
35 |   // im: (n, ic, h, w), kernel size (kh, kw)
36 |   // kernel: (oc, ic * kh * kw), col: (kh * kw * ic, ow * oh)
37 | 
38 |   int channels = im.size(1);
39 |   int height = im.size(2);
40 |   int width = im.size(3);
41 |   int mask_cnt = mask_h_idx.size(0);
42 | 
43 |   MaskedIm2colForwardLaucher(im, height, width, channels, kernel_h, kernel_w,
44 |                              pad_h, pad_w, mask_h_idx, mask_w_idx, mask_cnt,
45 |                              col);
46 | 
47 |   return 1;
48 | }
49 | 
50 | int masked_col2im_forward_cuda(const at::Tensor col,
51 |                                const at::Tensor mask_h_idx,
52 |                                const at::Tensor mask_w_idx, int height,
53 |                                int width, int channels, at::Tensor im) {
54 |   CHECK_INPUT(col);
55 |   CHECK_INPUT(mask_h_idx);
56 |   CHECK_INPUT(mask_w_idx);
57 |   CHECK_INPUT(im);
58 |   // im: (n, ic, h, w), kernel size (kh, kw)
59 |   // kernel: (oc, ic * kh * kh), col: (kh * kw * ic, ow * oh)
60 | 
61 |   int mask_cnt = mask_h_idx.size(0);
62 | 
63 |   MaskedCol2imForwardLaucher(col, height, width, channels, mask_h_idx,
64 |                              mask_w_idx, mask_cnt, im);
65 | 
66 |   return 1;
67 | }
68 | 
69 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
70 |   m.def("masked_im2col_forward", &masked_im2col_forward_cuda,
71 |         "masked_im2col forward (CUDA)");
72 |   m.def("masked_col2im_forward", &masked_col2im_forward_cuda,
73 |         "masked_col2im forward (CUDA)");
74 | }


--------------------------------------------------------------------------------
/mmdet/ops/nms/__init__.py:
--------------------------------------------------------------------------------
1 | from .nms_wrapper import nms, soft_nms
2 | 
3 | __all__ = ['nms', 'soft_nms']
4 | 


--------------------------------------------------------------------------------
/mmdet/ops/nms/src/nms_cpu.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #include <torch/extension.h>
 3 | 
 4 | template <typename scalar_t>
 5 | at::Tensor nms_cpu_kernel(const at::Tensor& dets, const float threshold) {
 6 |   AT_ASSERTM(!dets.type().is_cuda(), "dets must be a CPU tensor");
 7 | 
 8 |   if (dets.numel() == 0) {
 9 |     return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU));
10 |   }
11 | 
12 |   auto x1_t = dets.select(1, 0).contiguous();
13 |   auto y1_t = dets.select(1, 1).contiguous();
14 |   auto x2_t = dets.select(1, 2).contiguous();
15 |   auto y2_t = dets.select(1, 3).contiguous();
16 |   auto scores = dets.select(1, 4).contiguous();
17 | 
18 |   at::Tensor areas_t = (x2_t - x1_t + 1) * (y2_t - y1_t + 1);
19 | 
20 |   auto order_t = std::get<1>(scores.sort(0, /* descending=*/true));
21 | 
22 |   auto ndets = dets.size(0);
23 |   at::Tensor suppressed_t =
24 |       at::zeros({ndets}, dets.options().dtype(at::kByte).device(at::kCPU));
25 | 
26 |   auto suppressed = suppressed_t.data<uint8_t>();
27 |   auto order = order_t.data<int64_t>();
28 |   auto x1 = x1_t.data<scalar_t>();
29 |   auto y1 = y1_t.data<scalar_t>();
30 |   auto x2 = x2_t.data<scalar_t>();
31 |   auto y2 = y2_t.data<scalar_t>();
32 |   auto areas = areas_t.data<scalar_t>();
33 | 
34 |   for (int64_t _i = 0; _i < ndets; _i++) {
35 |     auto i = order[_i];
36 |     if (suppressed[i] == 1) continue;
37 |     auto ix1 = x1[i];
38 |     auto iy1 = y1[i];
39 |     auto ix2 = x2[i];
40 |     auto iy2 = y2[i];
41 |     auto iarea = areas[i];
42 | 
43 |     for (int64_t _j = _i + 1; _j < ndets; _j++) {
44 |       auto j = order[_j];
45 |       if (suppressed[j] == 1) continue;
46 |       auto xx1 = std::max(ix1, x1[j]);
47 |       auto yy1 = std::max(iy1, y1[j]);
48 |       auto xx2 = std::min(ix2, x2[j]);
49 |       auto yy2 = std::min(iy2, y2[j]);
50 | 
51 |       auto w = std::max(static_cast<scalar_t>(0), xx2 - xx1 + 1);
52 |       auto h = std::max(static_cast<scalar_t>(0), yy2 - yy1 + 1);
53 |       auto inter = w * h;
54 |       auto ovr = inter / (iarea + areas[j] - inter);
55 |       if (ovr >= threshold) suppressed[j] = 1;
56 |     }
57 |   }
58 |   return at::nonzero(suppressed_t == 0).squeeze(1);
59 | }
60 | 
61 | at::Tensor nms(const at::Tensor& dets, const float threshold) {
62 |   at::Tensor result;
63 |   AT_DISPATCH_FLOATING_TYPES(dets.scalar_type(), "nms", [&] {
64 |     result = nms_cpu_kernel<scalar_t>(dets, threshold);
65 |   });
66 |   return result;
67 | }
68 | 
69 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
70 |   m.def("nms", &nms, "non-maximum suppression");
71 | }


--------------------------------------------------------------------------------
/mmdet/ops/nms/src/nms_cuda.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #include <torch/extension.h>
 3 | 
 4 | #define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ")
 5 | 
 6 | at::Tensor nms_cuda(const at::Tensor boxes, float nms_overlap_thresh);
 7 | 
 8 | at::Tensor nms(const at::Tensor& dets, const float threshold) {
 9 |   CHECK_CUDA(dets);
10 |   if (dets.numel() == 0)
11 |     return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU));
12 |   return nms_cuda(dets, threshold);
13 | }
14 | 
15 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
16 |   m.def("nms", &nms, "non-maximum suppression");
17 | }


--------------------------------------------------------------------------------
/mmdet/ops/roi_align/__init__.py:
--------------------------------------------------------------------------------
1 | from .roi_align import RoIAlign, roi_align
2 | 
3 | __all__ = ['roi_align', 'RoIAlign']
4 | 


--------------------------------------------------------------------------------
/mmdet/ops/roi_align/gradcheck.py:
--------------------------------------------------------------------------------
 1 | import os.path as osp
 2 | import sys
 3 | 
 4 | import numpy as np
 5 | import torch
 6 | from torch.autograd import gradcheck
 7 | 
 8 | sys.path.append(osp.abspath(osp.join(__file__, '../../')))
 9 | from roi_align import RoIAlign  # noqa: E402, isort:skip
10 | 
11 | feat_size = 15
12 | spatial_scale = 1.0 / 8
13 | img_size = feat_size / spatial_scale
14 | num_imgs = 2
15 | num_rois = 20
16 | 
17 | batch_ind = np.random.randint(num_imgs, size=(num_rois, 1))
18 | rois = np.random.rand(num_rois, 4) * img_size * 0.5
19 | rois[:, 2:] += img_size * 0.5
20 | rois = np.hstack((batch_ind, rois))
21 | 
22 | feat = torch.randn(
23 |     num_imgs, 16, feat_size, feat_size, requires_grad=True, device='cuda:0')
24 | rois = torch.from_numpy(rois).float().cuda()
25 | inputs = (feat, rois)
26 | print('Gradcheck for roi align...')
27 | test = gradcheck(RoIAlign(3, spatial_scale), inputs, atol=1e-3, eps=1e-3)
28 | print(test)
29 | test = gradcheck(RoIAlign(3, spatial_scale, 2), inputs, atol=1e-3, eps=1e-3)
30 | print(test)
31 | 


--------------------------------------------------------------------------------
/mmdet/ops/roi_align/roi_align.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | from torch.autograd import Function
 3 | from torch.autograd.function import once_differentiable
 4 | from torch.nn.modules.utils import _pair
 5 | 
 6 | from . import roi_align_cuda
 7 | 
 8 | 
 9 | class RoIAlignFunction(Function):
10 | 
11 |     @staticmethod
12 |     def forward(ctx, features, rois, out_size, spatial_scale, sample_num=0):
13 |         out_h, out_w = _pair(out_size)
14 |         assert isinstance(out_h, int) and isinstance(out_w, int)
15 |         ctx.spatial_scale = spatial_scale
16 |         ctx.sample_num = sample_num
17 |         ctx.save_for_backward(rois)
18 |         ctx.feature_size = features.size()
19 | 
20 |         batch_size, num_channels, data_height, data_width = features.size()
21 |         num_rois = rois.size(0)
22 | 
23 |         output = features.new_zeros(num_rois, num_channels, out_h, out_w)
24 |         if features.is_cuda:
25 |             roi_align_cuda.forward(features, rois, out_h, out_w, spatial_scale,
26 |                                    sample_num, output)
27 |         else:
28 |             raise NotImplementedError
29 | 
30 |         return output
31 | 
32 |     @staticmethod
33 |     @once_differentiable
34 |     def backward(ctx, grad_output):
35 |         feature_size = ctx.feature_size
36 |         spatial_scale = ctx.spatial_scale
37 |         sample_num = ctx.sample_num
38 |         rois = ctx.saved_tensors[0]
39 |         assert (feature_size is not None and grad_output.is_cuda)
40 | 
41 |         batch_size, num_channels, data_height, data_width = feature_size
42 |         out_w = grad_output.size(3)
43 |         out_h = grad_output.size(2)
44 | 
45 |         grad_input = grad_rois = None
46 |         if ctx.needs_input_grad[0]:
47 |             grad_input = rois.new_zeros(batch_size, num_channels, data_height,
48 |                                         data_width)
49 |             roi_align_cuda.backward(grad_output.contiguous(), rois, out_h,
50 |                                     out_w, spatial_scale, sample_num,
51 |                                     grad_input)
52 | 
53 |         return grad_input, grad_rois, None, None, None
54 | 
55 | 
56 | roi_align = RoIAlignFunction.apply
57 | 
58 | 
59 | class RoIAlign(nn.Module):
60 | 
61 |     def __init__(self,
62 |                  out_size,
63 |                  spatial_scale,
64 |                  sample_num=0,
65 |                  use_torchvision=False):
66 |         super(RoIAlign, self).__init__()
67 | 
68 |         self.out_size = _pair(out_size)
69 |         self.spatial_scale = float(spatial_scale)
70 |         self.sample_num = int(sample_num)
71 |         self.use_torchvision = use_torchvision
72 | 
73 |     def forward(self, features, rois):
74 |         if self.use_torchvision:
75 |             from torchvision.ops import roi_align as tv_roi_align
76 |             return tv_roi_align(features, rois, self.out_size,
77 |                                 self.spatial_scale, self.sample_num)
78 |         else:
79 |             return roi_align(features, rois, self.out_size, self.spatial_scale,
80 |                              self.sample_num)
81 | 
82 |     def __repr__(self):
83 |         format_str = self.__class__.__name__
84 |         format_str += '(out_size={}, spatial_scale={}, sample_num={}'.format(
85 |             self.out_size, self.spatial_scale, self.sample_num)
86 |         format_str += ', use_torchvision={})'.format(self.use_torchvision)
87 |         return format_str
88 | 


--------------------------------------------------------------------------------
/mmdet/ops/roi_align/src/roi_align_cuda.cpp:
--------------------------------------------------------------------------------
 1 | #include <torch/extension.h>
 2 | 
 3 | #include <ATen/ATen.h>
 4 | 
 5 | #include <cmath>
 6 | #include <vector>
 7 | 
 8 | int ROIAlignForwardLaucher(const at::Tensor features, const at::Tensor rois,
 9 |                            const float spatial_scale, const int sample_num,
10 |                            const int channels, const int height,
11 |                            const int width, const int num_rois,
12 |                            const int pooled_height, const int pooled_width,
13 |                            at::Tensor output);
14 | 
15 | int ROIAlignBackwardLaucher(const at::Tensor top_grad, const at::Tensor rois,
16 |                             const float spatial_scale, const int sample_num,
17 |                             const int channels, const int height,
18 |                             const int width, const int num_rois,
19 |                             const int pooled_height, const int pooled_width,
20 |                             at::Tensor bottom_grad);
21 | 
22 | #define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ")
23 | #define CHECK_CONTIGUOUS(x) \
24 |   AT_CHECK(x.is_contiguous(), #x, " must be contiguous ")
25 | #define CHECK_INPUT(x) \
26 |   CHECK_CUDA(x);       \
27 |   CHECK_CONTIGUOUS(x)
28 | 
29 | int roi_align_forward_cuda(at::Tensor features, at::Tensor rois,
30 |                            int pooled_height, int pooled_width,
31 |                            float spatial_scale, int sample_num,
32 |                            at::Tensor output) {
33 |   CHECK_INPUT(features);
34 |   CHECK_INPUT(rois);
35 |   CHECK_INPUT(output);
36 | 
37 |   // Number of ROIs
38 |   int num_rois = rois.size(0);
39 |   int size_rois = rois.size(1);
40 | 
41 |   if (size_rois != 5) {
42 |     printf("wrong roi size\n");
43 |     return 0;
44 |   }
45 | 
46 |   int num_channels = features.size(1);
47 |   int data_height = features.size(2);
48 |   int data_width = features.size(3);
49 | 
50 |   ROIAlignForwardLaucher(features, rois, spatial_scale, sample_num,
51 |                          num_channels, data_height, data_width, num_rois,
52 |                          pooled_height, pooled_width, output);
53 | 
54 |   return 1;
55 | }
56 | 
57 | int roi_align_backward_cuda(at::Tensor top_grad, at::Tensor rois,
58 |                             int pooled_height, int pooled_width,
59 |                             float spatial_scale, int sample_num,
60 |                             at::Tensor bottom_grad) {
61 |   CHECK_INPUT(top_grad);
62 |   CHECK_INPUT(rois);
63 |   CHECK_INPUT(bottom_grad);
64 | 
65 |   // Number of ROIs
66 |   int num_rois = rois.size(0);
67 |   int size_rois = rois.size(1);
68 |   if (size_rois != 5) {
69 |     printf("wrong roi size\n");
70 |     return 0;
71 |   }
72 | 
73 |   int num_channels = bottom_grad.size(1);
74 |   int data_height = bottom_grad.size(2);
75 |   int data_width = bottom_grad.size(3);
76 | 
77 |   ROIAlignBackwardLaucher(top_grad, rois, spatial_scale, sample_num,
78 |                           num_channels, data_height, data_width, num_rois,
79 |                           pooled_height, pooled_width, bottom_grad);
80 | 
81 |   return 1;
82 | }
83 | 
84 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
85 |   m.def("forward", &roi_align_forward_cuda, "Roi_Align forward (CUDA)");
86 |   m.def("backward", &roi_align_backward_cuda, "Roi_Align backward (CUDA)");
87 | }
88 | 


--------------------------------------------------------------------------------
/mmdet/ops/roi_pool/__init__.py:
--------------------------------------------------------------------------------
1 | from .roi_pool import RoIPool, roi_pool
2 | 
3 | __all__ = ['roi_pool', 'RoIPool']
4 | 


--------------------------------------------------------------------------------
/mmdet/ops/roi_pool/gradcheck.py:
--------------------------------------------------------------------------------
 1 | import os.path as osp
 2 | import sys
 3 | 
 4 | import torch
 5 | from torch.autograd import gradcheck
 6 | 
 7 | sys.path.append(osp.abspath(osp.join(__file__, '../../')))
 8 | from roi_pool import RoIPool  # noqa: E402, isort:skip
 9 | 
10 | feat = torch.randn(4, 16, 15, 15, requires_grad=True).cuda()
11 | rois = torch.Tensor([[0, 0, 0, 50, 50], [0, 10, 30, 43, 55],
12 |                      [1, 67, 40, 110, 120]]).cuda()
13 | inputs = (feat, rois)
14 | print('Gradcheck for roi pooling...')
15 | test = gradcheck(RoIPool(4, 1.0 / 8), inputs, eps=1e-5, atol=1e-3)
16 | print(test)
17 | 


--------------------------------------------------------------------------------
/mmdet/ops/roi_pool/roi_pool.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | from torch.autograd import Function
 4 | from torch.autograd.function import once_differentiable
 5 | from torch.nn.modules.utils import _pair
 6 | 
 7 | from . import roi_pool_cuda
 8 | 
 9 | 
10 | class RoIPoolFunction(Function):
11 | 
12 |     @staticmethod
13 |     def forward(ctx, features, rois, out_size, spatial_scale):
14 |         assert features.is_cuda
15 |         out_h, out_w = _pair(out_size)
16 |         assert isinstance(out_h, int) and isinstance(out_w, int)
17 |         ctx.save_for_backward(rois)
18 |         num_channels = features.size(1)
19 |         num_rois = rois.size(0)
20 |         out_size = (num_rois, num_channels, out_h, out_w)
21 |         output = features.new_zeros(out_size)
22 |         argmax = features.new_zeros(out_size, dtype=torch.int)
23 |         roi_pool_cuda.forward(features, rois, out_h, out_w, spatial_scale,
24 |                               output, argmax)
25 |         ctx.spatial_scale = spatial_scale
26 |         ctx.feature_size = features.size()
27 |         ctx.argmax = argmax
28 | 
29 |         return output
30 | 
31 |     @staticmethod
32 |     @once_differentiable
33 |     def backward(ctx, grad_output):
34 |         assert grad_output.is_cuda
35 |         spatial_scale = ctx.spatial_scale
36 |         feature_size = ctx.feature_size
37 |         argmax = ctx.argmax
38 |         rois = ctx.saved_tensors[0]
39 |         assert feature_size is not None
40 | 
41 |         grad_input = grad_rois = None
42 |         if ctx.needs_input_grad[0]:
43 |             grad_input = grad_output.new_zeros(feature_size)
44 |             roi_pool_cuda.backward(grad_output.contiguous(), rois, argmax,
45 |                                    spatial_scale, grad_input)
46 | 
47 |         return grad_input, grad_rois, None, None
48 | 
49 | 
50 | roi_pool = RoIPoolFunction.apply
51 | 
52 | 
53 | class RoIPool(nn.Module):
54 | 
55 |     def __init__(self, out_size, spatial_scale, use_torchvision=False):
56 |         super(RoIPool, self).__init__()
57 | 
58 |         self.out_size = _pair(out_size)
59 |         self.spatial_scale = float(spatial_scale)
60 |         self.use_torchvision = use_torchvision
61 | 
62 |     def forward(self, features, rois):
63 |         if self.use_torchvision:
64 |             from torchvision.ops import roi_pool as tv_roi_pool
65 |             return tv_roi_pool(features, rois, self.out_size,
66 |                                self.spatial_scale)
67 |         else:
68 |             return roi_pool(features, rois, self.out_size, self.spatial_scale)
69 | 
70 |     def __repr__(self):
71 |         format_str = self.__class__.__name__
72 |         format_str += '(out_size={}, spatial_scale={}'.format(
73 |             self.out_size, self.spatial_scale)
74 |         format_str += ', use_torchvision={})'.format(self.use_torchvision)
75 |         return format_str
76 | 


--------------------------------------------------------------------------------
/mmdet/ops/roi_pool/src/roi_pool_cuda.cpp:
--------------------------------------------------------------------------------
 1 | #include <torch/extension.h>
 2 | 
 3 | #include <cmath>
 4 | #include <vector>
 5 | 
 6 | int ROIPoolForwardLaucher(const at::Tensor features, const at::Tensor rois,
 7 |                           const float spatial_scale, const int channels,
 8 |                           const int height, const int width, const int num_rois,
 9 |                           const int pooled_h, const int pooled_w,
10 |                           at::Tensor output, at::Tensor argmax);
11 | 
12 | int ROIPoolBackwardLaucher(const at::Tensor top_grad, const at::Tensor rois,
13 |                            const at::Tensor argmax, const float spatial_scale,
14 |                            const int batch_size, const int channels,
15 |                            const int height, const int width,
16 |                            const int num_rois, const int pooled_h,
17 |                            const int pooled_w, at::Tensor bottom_grad);
18 | 
19 | #define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ")
20 | #define CHECK_CONTIGUOUS(x) \
21 |   AT_CHECK(x.is_contiguous(), #x, " must be contiguous ")
22 | #define CHECK_INPUT(x) \
23 |   CHECK_CUDA(x);       \
24 |   CHECK_CONTIGUOUS(x)
25 | 
26 | int roi_pooling_forward_cuda(at::Tensor features, at::Tensor rois,
27 |                              int pooled_height, int pooled_width,
28 |                              float spatial_scale, at::Tensor output,
29 |                              at::Tensor argmax) {
30 |   CHECK_INPUT(features);
31 |   CHECK_INPUT(rois);
32 |   CHECK_INPUT(output);
33 |   CHECK_INPUT(argmax);
34 | 
35 |   // Number of ROIs
36 |   int num_rois = rois.size(0);
37 |   int size_rois = rois.size(1);
38 | 
39 |   if (size_rois != 5) {
40 |     printf("wrong roi size\n");
41 |     return 0;
42 |   }
43 | 
44 |   int channels = features.size(1);
45 |   int height = features.size(2);
46 |   int width = features.size(3);
47 | 
48 |   ROIPoolForwardLaucher(features, rois, spatial_scale, channels, height, width,
49 |                         num_rois, pooled_height, pooled_width, output, argmax);
50 | 
51 |   return 1;
52 | }
53 | 
54 | int roi_pooling_backward_cuda(at::Tensor top_grad, at::Tensor rois,
55 |                               at::Tensor argmax, float spatial_scale,
56 |                               at::Tensor bottom_grad) {
57 |   CHECK_INPUT(top_grad);
58 |   CHECK_INPUT(rois);
59 |   CHECK_INPUT(argmax);
60 |   CHECK_INPUT(bottom_grad);
61 | 
62 |   int pooled_height = top_grad.size(2);
63 |   int pooled_width = top_grad.size(3);
64 |   int num_rois = rois.size(0);
65 |   int size_rois = rois.size(1);
66 | 
67 |   if (size_rois != 5) {
68 |     printf("wrong roi size\n");
69 |     return 0;
70 |   }
71 |   int batch_size = bottom_grad.size(0);
72 |   int channels = bottom_grad.size(1);
73 |   int height = bottom_grad.size(2);
74 |   int width = bottom_grad.size(3);
75 | 
76 |   ROIPoolBackwardLaucher(top_grad, rois, argmax, spatial_scale, batch_size,
77 |                          channels, height, width, num_rois, pooled_height,
78 |                          pooled_width, bottom_grad);
79 | 
80 |   return 1;
81 | }
82 | 
83 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
84 |   m.def("forward", &roi_pooling_forward_cuda, "Roi_Pooling forward (CUDA)");
85 |   m.def("backward", &roi_pooling_backward_cuda, "Roi_Pooling backward (CUDA)");
86 | }
87 | 


--------------------------------------------------------------------------------
/mmdet/ops/sigmoid_focal_loss/__init__.py:
--------------------------------------------------------------------------------
1 | from .sigmoid_focal_loss import SigmoidFocalLoss, sigmoid_focal_loss
2 | 
3 | __all__ = ['SigmoidFocalLoss', 'sigmoid_focal_loss']
4 | 


--------------------------------------------------------------------------------
/mmdet/ops/sigmoid_focal_loss/sigmoid_focal_loss.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | from torch.autograd import Function
 3 | from torch.autograd.function import once_differentiable
 4 | 
 5 | from . import sigmoid_focal_loss_cuda
 6 | 
 7 | 
 8 | class SigmoidFocalLossFunction(Function):
 9 | 
10 |     @staticmethod
11 |     def forward(ctx, input, target, gamma=2.0, alpha=0.25):
12 |         ctx.save_for_backward(input, target)
13 |         num_classes = input.shape[1]
14 |         ctx.num_classes = num_classes
15 |         ctx.gamma = gamma
16 |         ctx.alpha = alpha
17 | 
18 |         loss = sigmoid_focal_loss_cuda.forward(input, target, num_classes,
19 |                                                gamma, alpha)
20 |         return loss
21 | 
22 |     @staticmethod
23 |     @once_differentiable
24 |     def backward(ctx, d_loss):
25 |         input, target = ctx.saved_tensors
26 |         num_classes = ctx.num_classes
27 |         gamma = ctx.gamma
28 |         alpha = ctx.alpha
29 |         d_loss = d_loss.contiguous()
30 |         d_input = sigmoid_focal_loss_cuda.backward(input, target, d_loss,
31 |                                                    num_classes, gamma, alpha)
32 |         return d_input, None, None, None, None
33 | 
34 | 
35 | sigmoid_focal_loss = SigmoidFocalLossFunction.apply
36 | 
37 | 
38 | # TODO: remove this module
39 | class SigmoidFocalLoss(nn.Module):
40 | 
41 |     def __init__(self, gamma, alpha):
42 |         super(SigmoidFocalLoss, self).__init__()
43 |         self.gamma = gamma
44 |         self.alpha = alpha
45 | 
46 |     def forward(self, logits, targets):
47 |         assert logits.is_cuda
48 |         loss = sigmoid_focal_loss(logits, targets, self.gamma, self.alpha)
49 |         return loss.sum()
50 | 
51 |     def __repr__(self):
52 |         tmpstr = self.__class__.__name__ + '(gamma={}, alpha={})'.format(
53 |             self.gamma, self.alpha)
54 |         return tmpstr
55 | 


--------------------------------------------------------------------------------
/mmdet/ops/sigmoid_focal_loss/src/sigmoid_focal_loss.cpp:
--------------------------------------------------------------------------------
 1 | // modify from
 2 | // https://github.com/facebookresearch/maskrcnn-benchmark/blob/master/maskrcnn_benchmark/csrc/SigmoidFocalLoss.h
 3 | #include <torch/extension.h>
 4 | 
 5 | at::Tensor SigmoidFocalLoss_forward_cuda(const at::Tensor &logits,
 6 |                                          const at::Tensor &targets,
 7 |                                          const int num_classes,
 8 |                                          const float gamma, const float alpha);
 9 | 
10 | at::Tensor SigmoidFocalLoss_backward_cuda(const at::Tensor &logits,
11 |                                           const at::Tensor &targets,
12 |                                           const at::Tensor &d_losses,
13 |                                           const int num_classes,
14 |                                           const float gamma, const float alpha);
15 | 
16 | // Interface for Python
17 | at::Tensor SigmoidFocalLoss_forward(const at::Tensor &logits,
18 |                                     const at::Tensor &targets,
19 |                                     const int num_classes, const float gamma,
20 |                                     const float alpha) {
21 |   if (logits.type().is_cuda()) {
22 |     return SigmoidFocalLoss_forward_cuda(logits, targets, num_classes, gamma,
23 |                                          alpha);
24 |   }
25 |   AT_ERROR("SigmoidFocalLoss is not implemented on the CPU");
26 | }
27 | 
28 | at::Tensor SigmoidFocalLoss_backward(const at::Tensor &logits,
29 |                                      const at::Tensor &targets,
30 |                                      const at::Tensor &d_losses,
31 |                                      const int num_classes, const float gamma,
32 |                                      const float alpha) {
33 |   if (logits.type().is_cuda()) {
34 |     return SigmoidFocalLoss_backward_cuda(logits, targets, d_losses,
35 |                                           num_classes, gamma, alpha);
36 |   }
37 |   AT_ERROR("SigmoidFocalLoss is not implemented on the CPU");
38 | }
39 | 
40 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
41 |   m.def("forward", &SigmoidFocalLoss_forward,
42 |         "SigmoidFocalLoss forward (CUDA)");
43 |   m.def("backward", &SigmoidFocalLoss_backward,
44 |         "SigmoidFocalLoss backward (CUDA)");
45 | }
46 | 


--------------------------------------------------------------------------------
/mmdet/ops/utils/__init__.py:
--------------------------------------------------------------------------------
1 | # from . import compiling_info
2 | from .compiling_info import get_compiler_version, get_compiling_cuda_version
3 | 
4 | # get_compiler_version = compiling_info.get_compiler_version
5 | # get_compiling_cuda_version = compiling_info.get_compiling_cuda_version
6 | 
7 | __all__ = ['get_compiler_version', 'get_compiling_cuda_version']
8 | 


--------------------------------------------------------------------------------
/mmdet/ops/utils/src/compiling_info.cpp:
--------------------------------------------------------------------------------
 1 | // modified from
 2 | // https://github.com/facebookresearch/detectron2/blob/master/detectron2/layers/csrc/vision.cpp
 3 | #include <cuda_runtime_api.h>
 4 | #include <torch/extension.h>
 5 | 
 6 | #ifdef WITH_CUDA
 7 | int get_cudart_version() { return CUDART_VERSION; }
 8 | #endif
 9 | 
10 | std::string get_compiling_cuda_version() {
11 | #ifdef WITH_CUDA
12 |   std::ostringstream oss;
13 | 
14 |   // copied from
15 |   // https://github.com/pytorch/pytorch/blob/master/aten/src/ATen/cuda/detail/CUDAHooks.cpp#L231
16 |   auto printCudaStyleVersion = [&](int v) {
17 |     oss << (v / 1000) << "." << (v / 10 % 100);
18 |     if (v % 10 != 0) {
19 |       oss << "." << (v % 10);
20 |     }
21 |   };
22 |   printCudaStyleVersion(get_cudart_version());
23 |   return oss.str();
24 | #else
25 |   return std::string("not available");
26 | #endif
27 | }
28 | 
29 | // similar to
30 | // https://github.com/pytorch/pytorch/blob/master/aten/src/ATen/Version.cpp
31 | std::string get_compiler_version() {
32 |   std::ostringstream ss;
33 | #if defined(__GNUC__)
34 | #ifndef __clang__
35 |   { ss << "GCC " << __GNUC__ << "." << __GNUC_MINOR__; }
36 | #endif
37 | #endif
38 | 
39 | #if defined(__clang_major__)
40 |   {
41 |     ss << "clang " << __clang_major__ << "." << __clang_minor__ << "."
42 |        << __clang_patchlevel__;
43 |   }
44 | #endif
45 | 
46 | #if defined(_MSC_VER)
47 |   { ss << "MSVC " << _MSC_FULL_VER; }
48 | #endif
49 |   return ss.str();
50 | }
51 | 
52 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
53 |   m.def("get_compiler_version", &get_compiler_version, "get_compiler_version");
54 |   m.def("get_compiling_cuda_version", &get_compiling_cuda_version,
55 |         "get_compiling_cuda_version");
56 | }
57 | 


--------------------------------------------------------------------------------
/mmdet/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .flops_counter import get_model_complexity_info
2 | from .logger import get_root_logger, print_log
3 | from .registry import Registry, build_from_cfg
4 | 
5 | __all__ = [
6 |     'Registry', 'build_from_cfg', 'get_model_complexity_info',
7 |     'get_root_logger', 'print_log'
8 | ]
9 | 


--------------------------------------------------------------------------------
/mmdet/utils/logger.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | from mmcv.runner import get_dist_info
 4 | 
 5 | 
 6 | def get_root_logger(log_file=None, log_level=logging.INFO):
 7 |     """Get the root logger.
 8 | 
 9 |     The logger will be initialized if it has not been initialized. By default a
10 |     StreamHandler will be added. If `log_file` is specified, a FileHandler will
11 |     also be added. The name of the root logger is the top-level package name,
12 |     e.g., "mmdet".
13 | 
14 |     Args:
15 |         log_file (str | None): The log filename. If specified, a FileHandler
16 |             will be added to the root logger.
17 |         log_level (int): The root logger level. Note that only the process of
18 |             rank 0 is affected, while other processes will set the level to
19 |             "Error" and be silent most of the time.
20 | 
21 |     Returns:
22 |         logging.Logger: The root logger.
23 |     """
24 |     logger = logging.getLogger(__name__.split('.')[0])  # i.e., mmdet
25 |     # if the logger has been initialized, just return it
26 |     if logger.hasHandlers():
27 |         return logger
28 | 
29 |     format_str = '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
30 |     logging.basicConfig(format=format_str, level=log_level)
31 |     rank, _ = get_dist_info()
32 |     if rank != 0:
33 |         logger.setLevel('ERROR')
34 |     elif log_file is not None:
35 |         file_handler = logging.FileHandler(log_file, 'w')
36 |         file_handler.setFormatter(logging.Formatter(format_str))
37 |         file_handler.setLevel(log_level)
38 |         logger.addHandler(file_handler)
39 | 
40 |     return logger
41 | 
42 | 
43 | def print_log(msg, logger=None, level=logging.INFO):
44 |     """Print a log message.
45 | 
46 |     Args:
47 |         msg (str): The message to be logged.
48 |         logger (logging.Logger | str | None): The logger to be used. Some
49 |             special loggers are:
50 |             - "root": the root logger obtained with `get_root_logger()`.
51 |             - "silent": no message will be printed.
52 |             - None: The `print()` method will be used to print log messages.
53 |         level (int): Logging level. Only available when `logger` is a Logger
54 |             object or "root".
55 |     """
56 |     if logger is None:
57 |         print(msg)
58 |     elif logger == 'root':
59 |         _logger = get_root_logger()
60 |         _logger.log(level, msg)
61 |     elif isinstance(logger, logging.Logger):
62 |         logger.log(level, msg)
63 |     elif logger != 'silent':
64 |         raise TypeError(
65 |             'logger should be either a logging.Logger object, "root", '
66 |             '"silent" or None, but got {}'.format(logger))
67 | 


--------------------------------------------------------------------------------
/mmdet/utils/profiling.py:
--------------------------------------------------------------------------------
 1 | import contextlib
 2 | import sys
 3 | import time
 4 | 
 5 | import torch
 6 | 
 7 | if sys.version_info >= (3, 7):
 8 | 
 9 |     @contextlib.contextmanager
10 |     def profile_time(trace_name,
11 |                      name,
12 |                      enabled=True,
13 |                      stream=None,
14 |                      end_stream=None):
15 |         """Print time spent by CPU and GPU.
16 | 
17 |         Useful as a temporary context manager to find sweet spots of
18 |         code suitable for async implementation.
19 | 
20 |         """
21 |         if (not enabled) or not torch.cuda.is_available():
22 |             yield
23 |             return
24 |         stream = stream if stream else torch.cuda.current_stream()
25 |         end_stream = end_stream if end_stream else stream
26 |         start = torch.cuda.Event(enable_timing=True)
27 |         end = torch.cuda.Event(enable_timing=True)
28 |         stream.record_event(start)
29 |         try:
30 |             cpu_start = time.monotonic()
31 |             yield
32 |         finally:
33 |             cpu_end = time.monotonic()
34 |             end_stream.record_event(end)
35 |             end.synchronize()
36 |             cpu_time = (cpu_end - cpu_start) * 1000
37 |             gpu_time = start.elapsed_time(end)
38 |             msg = "{} {} cpu_time {:.2f} ms ".format(trace_name, name,
39 |                                                      cpu_time)
40 |             msg += "gpu_time {:.2f} ms stream {}".format(gpu_time, stream)
41 |             print(msg, end_stream)
42 | 


--------------------------------------------------------------------------------
/mmdet/utils/registry.py:
--------------------------------------------------------------------------------
 1 | import inspect
 2 | from functools import partial
 3 | 
 4 | import mmcv
 5 | 
 6 | 
 7 | class Registry(object):
 8 | 
 9 |     def __init__(self, name):
10 |         self._name = name
11 |         self._module_dict = dict()
12 | 
13 |     def __repr__(self):
14 |         format_str = self.__class__.__name__ + '(name={}, items={})'.format(
15 |             self._name, list(self._module_dict.keys()))
16 |         return format_str
17 | 
18 |     @property
19 |     def name(self):
20 |         return self._name
21 | 
22 |     @property
23 |     def module_dict(self):
24 |         return self._module_dict
25 | 
26 |     def get(self, key):
27 |         return self._module_dict.get(key, None)
28 | 
29 |     def _register_module(self, module_class, force=False):
30 |         """Register a module.
31 | 
32 |         Args:
33 |             module (:obj:`nn.Module`): Module to be registered.
34 |         """
35 |         if not inspect.isclass(module_class):
36 |             raise TypeError('module must be a class, but got {}'.format(
37 |                 type(module_class)))
38 |         module_name = module_class.__name__
39 |         if not force and module_name in self._module_dict:
40 |             raise KeyError('{} is already registered in {}'.format(
41 |                 module_name, self.name))
42 |         self._module_dict[module_name] = module_class
43 | 
44 |     def register_module(self, cls=None, force=False):
45 |         if cls is None:
46 |             return partial(self.register_module, force=force)
47 |         self._register_module(cls, force=force)
48 |         return cls
49 | 
50 | 
51 | def build_from_cfg(cfg, registry, default_args=None):
52 |     """Build a module from config dict.
53 | 
54 |     Args:
55 |         cfg (dict): Config dict. It should at least contain the key "type".
56 |         registry (:obj:`Registry`): The registry to search the type from.
57 |         default_args (dict, optional): Default initialization arguments.
58 | 
59 |     Returns:
60 |         obj: The constructed object.
61 |     """
62 |     assert isinstance(cfg, dict) and 'type' in cfg
63 |     assert isinstance(default_args, dict) or default_args is None
64 |     args = cfg.copy()
65 |     obj_type = args.pop('type')
66 |     if mmcv.is_str(obj_type):
67 |         obj_cls = registry.get(obj_type)
68 |         if obj_cls is None:
69 |             raise KeyError('{} is not in the {} registry'.format(
70 |                 obj_type, registry.name))
71 |     elif inspect.isclass(obj_type):
72 |         obj_cls = obj_type
73 |     else:
74 |         raise TypeError('type must be a str or valid type, but got {}'.format(
75 |             type(obj_type)))
76 |     if default_args is not None:
77 |         for name, value in default_args.items():
78 |             args.setdefault(name, value)
79 |     return obj_cls(**args)
80 | 


--------------------------------------------------------------------------------
/mmdet/version.py:
--------------------------------------------------------------------------------
1 | # GENERATED VERSION FILE
2 | # TIME: Sat Mar 20 14:30:26 2021
3 | 
4 | __version__ = '1.0.0+da6b82a'
5 | short_version = '1.0.0'
6 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | -r requirements/runtime.txt
2 | -r requirements/optional.txt
3 | -r requirements/tests.txt
4 | -r requirements/build.txt
5 | 


--------------------------------------------------------------------------------
/requirements/build.txt:
--------------------------------------------------------------------------------
1 | # These must be installed before building mmdetection
2 | cython
3 | numpy
4 | torch>=1.1
5 | 


--------------------------------------------------------------------------------
/requirements/optional.txt:
--------------------------------------------------------------------------------
1 | albumentations>=0.3.2
2 | imagecorruptions
3 | 


--------------------------------------------------------------------------------
/requirements/runtime.txt:
--------------------------------------------------------------------------------
 1 | matplotlib
 2 | mmcv==0.2.16
 3 | numpy
 4 | scipy
 5 | # need older pillow until torchvision is fixed
 6 | Pillow<=6.2.2
 7 | six
 8 | terminaltables
 9 | torch>=1.1
10 | torchvision
11 | cupy
12 | tensorboard
13 | GitPython==3.1.12
14 | 


--------------------------------------------------------------------------------
/requirements/tests.txt:
--------------------------------------------------------------------------------
 1 | asynctest
 2 | codecov
 3 | flake8
 4 | isort
 5 | pytest 
 6 | pytest-cov
 7 | pytest-runner
 8 | xdoctest >= 0.10.0
 9 | yapf
10 | # Note: used for kwarray.group_items, this may be ported to mmcv in the future.
11 | kwarray
12 | 


--------------------------------------------------------------------------------
/tools/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/tools/dataset/__init__.py:
--------------------------------------------------------------------------------
1 | # from .cityscapes_vps import Cityscapes
2 | from .cityscapes_vps import CityscapesVps
3 | # from .viper import Viper
4 | 


--------------------------------------------------------------------------------