├── .flake8 ├── .gitignore ├── LICENSE ├── README.md ├── datasets ├── README.md └── cityscapes ├── dl_lib ├── __init__.py ├── checkpoint │ ├── __init__.py │ ├── c2_model_loading.py │ ├── catalog.py │ └── detection_checkpoint.py ├── configs │ ├── base_config.py │ └── segm_config.py ├── data │ ├── __init__.py │ ├── build.py │ ├── catalog.py │ ├── common.py │ ├── dataset_mapper.py │ ├── datasets │ │ ├── README.md │ │ ├── __init__.py │ │ ├── builtin.py │ │ ├── builtin_meta.py │ │ ├── cityscapes.py │ │ ├── coco.py │ │ ├── pascal_voc.py │ │ └── register_coco.py │ ├── detection_utils.py │ ├── samplers │ │ ├── __init__.py │ │ ├── distributed_sampler.py │ │ └── grouped_batch_sampler.py │ └── transforms │ │ ├── __init__.py │ │ ├── extend_transform.py │ │ ├── transform.py │ │ ├── transform_gen.py │ │ └── transform_util.py ├── engine │ ├── __init__.py │ ├── custom.py │ ├── defaults.py │ ├── hooks.py │ ├── launch.py │ └── train_loop.py ├── evaluation │ ├── __init__.py │ ├── cityscapes_evaluation.py │ ├── evaluator.py │ ├── pascal_voc_evaluation.py │ ├── sem_seg_evaluation.py │ └── testing.py ├── layers │ ├── ROIAlign │ │ ├── ROIAlign.h │ │ ├── ROIAlign_cpu.cpp │ │ ├── ROIAlign_cuda.cu │ │ └── roi_align.py │ ├── __init__.py │ ├── batch_norm.py │ ├── deformable │ │ ├── deform_conv.h │ │ ├── deform_conv.py │ │ ├── deform_conv_cuda.cu │ │ ├── deform_conv_cuda_kernel.cu │ │ └── deform_conv_with_off.py │ ├── shape_spec.py │ ├── vision.cpp │ └── wrappers.py ├── modeling │ ├── __init__.py │ ├── backbone │ │ ├── __init__.py │ │ ├── backbone.py │ │ ├── fpn.py │ │ └── resnet.py │ ├── basenet │ │ ├── __init__.py │ │ ├── basenet.py │ │ └── show.py │ ├── dynamic_arch │ │ ├── __init__.py │ │ ├── cal_op_flops.py │ │ ├── dynamic_backbone.py │ │ ├── dynamic_cell.py │ │ └── op_with_flops.py │ ├── meta_arch │ │ ├── __init__.py │ │ ├── dynamic4seg.py │ │ └── semantic_seg.py │ ├── nn_utils │ │ ├── __init__.py │ │ ├── flop_count.py │ │ ├── jit_handles.py │ │ ├── precise_bn.py │ │ ├── scale_grad.py │ │ └── weight_init.py │ ├── postprocessing.py │ └── test_time_augmentation.py ├── solver │ ├── __init__.py │ ├── build.py │ └── lr_scheduler.py ├── structures │ ├── __init__.py │ ├── boxes.py │ ├── image_list.py │ ├── instances.py │ ├── keypoints.py │ └── masks.py └── utils │ ├── README.md │ ├── __init__.py │ ├── benchmark.py │ ├── checkpoint.py │ ├── collect_env.py │ ├── colormap.py │ ├── comm.py │ ├── config_helper.py │ ├── download.py │ ├── env.py │ ├── events.py │ ├── file_io.py │ ├── history_buffer.py │ ├── imports.py │ ├── logger.py │ ├── memory.py │ ├── serialize.py │ ├── timer.py │ ├── video_visualizer.py │ └── visualizer.py ├── intro └── introduce.png ├── playground ├── .gitignore └── Dynamic │ ├── Seg.Layer16.ImageNet │ ├── config.py │ └── net.py │ ├── Seg.Layer16.Iter560k.SDP0_3 │ ├── config.py │ └── net.py │ ├── Seg.Layer16.SmallGate.Dynamic_A │ ├── config.py │ └── net.py │ ├── Seg.Layer16.SmallGate.Dynamic_B │ ├── config.py │ └── net.py │ ├── Seg.Layer16.SmallGate.Dynamic_C │ ├── config.py │ └── net.py │ ├── Seg.Layer16 │ ├── config.py │ └── net.py │ └── Seg.Layer33.ImageNet │ ├── config.py │ └── net.py ├── setup.py └── tools ├── benchmark.py ├── rm_files.py ├── test_net.py └── train_net.py /.flake8: -------------------------------------------------------------------------------- 1 | # This is an example .flake8 config, used when developing *Black* itself. 2 | # Keep in sync with setup.cfg which is used for source packages. 3 | 4 | [flake8] 5 | ignore = W503, E203, E221, C901, E999, W504 6 | max-line-length = 100 7 | max-complexity = 18 8 | select = B,C,E,F,W,T4,B9 9 | exclude = build,__init__.py 10 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # output dir 2 | output 3 | instant_test_output 4 | inference_test_output 5 | 6 | 7 | *.jpg 8 | *.txt 9 | 10 | # compilation and distribution 11 | __pycache__ 12 | _ext 13 | *.pyc 14 | *.so 15 | *.egg-info/ 16 | build/ 17 | dist/ 18 | 19 | # pytorch/python/numpy formats 20 | *.pth 21 | *.pkl 22 | *.npy 23 | 24 | # ipython/jupyter notebooks 25 | *.ipynb 26 | **/.ipynb_checkpoints/ 27 | 28 | # Editor temporaries 29 | *.swn 30 | *.swo 31 | *.swp 32 | *~ 33 | 34 | # Pycharm/VSCode editor settings 35 | .idea 36 | *.vscode 37 | 38 | # project dirs 39 | **/log 40 | tools/dl_test 41 | tools/dl_train 42 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # DynamicRouting 2 | 3 | This project provides an implementation for "[Learning Dynamic Routing for Semantic Segmentation](https://arxiv.org/abs/2003.10401)" (*CVPR2020 Oral*) on PyTorch. 4 | For the reason that experiments in the paper were conducted using internal framework, this project reimplements them on *dl_lib* and reports detailed comparisons below. Some parts of code in *dl_lib* are based on [detectron2](https://github.com/facebookresearch/detectron2). 5 | 6 | ![introduce image](intro/introduce.png) 7 | 8 | ## Requirement 9 | - Python >= 3.6 10 | - `python3 --version` 11 | - PyTorch >= 1.3 12 | - `pip3 install torch torchvision` 13 | - OpenCV 14 | - `pip3 install opencv-python` 15 | - GCC >= 4.9 16 | - `gcc --version` 17 | 18 | ## Installation 19 | 20 | Make sure that your get at least one gpu when compiling. Run: 21 | - `git clone https://github.com/yanwei-li/DynamicRouting.git` 22 | - `cd DynamicRouting` 23 | - `sudo python3 setup.py build develop` 24 | 25 | ## Usage 26 | 27 | ### Dataset 28 | We use Cityscapes dataset for training and validation. Please refer to [`datasets/README.md`](datasets/README.md) or dataset structure in [detectron2](https://github.com/facebookresearch/detectron2) for more details. 29 | - Cityscapes [Download](https://www.cityscapes-dataset.com/) 30 | 31 | ### Pretrained Model 32 | We give ImageNet pretained models: 33 | - Layer16-Fix [GoogleDrive](https://drive.google.com/file/d/1WqVOZo8oelXTKlf0BDE3q2W-lyYm2G2U/view?usp=sharing) 34 | - Layer33-Fix [GoogleDrive](https://drive.google.com/file/d/1xktVvVsYSaDlb8yQcn0zAzx2ZMUhyD_K/view?usp=sharing) 35 | 36 | ### Training 37 | For example, if you want to train Dynamic Network with Layer16 backbone: 38 | - Train from scratch 39 | ```shell 40 | cd playground/Dynamic/Seg.Layer16 41 | dl_train --num-gpus 4 42 | ``` 43 | - Use ImageNet pretrain 44 | ```shell 45 | cd playground/Dynamic/Seg.Layer16.ImageNet 46 | dl_train --num-gpus 4 MODEL.WEIGHTS /path/to/your/save_dir/ckpt.pth 47 | ``` 48 | 49 | NOTE: Please set `FIX_SIZE_FOR_FLOPS` to `[768,768]` and `[1024,2048]` for training and evaluation, respectively. 50 | 51 | ### Evaluation 52 | You can evaluate the trained or downloaded model: 53 | - Evaluate the trained model 54 | ```shell 55 | dl_test --num-gpus 8 56 | ``` 57 | - Evaluate the downloaded model: 58 | ```shell 59 | dl_test --num-gpus 8 MODEL.WEIGHTS /path/to/your/save_dir/ckpt.pth 60 | ``` 61 | 62 | NOTE: If your machine does not support such setting, please change settings in `config.py` to a suitable value. 63 | 64 | ## Performance 65 | ### Cityscapes *val* set 66 | ***Without* ImageNet Pretrain:** 67 | 68 | Methods | Backbone | Iter/K | mIoU (paper) | GFLOPs (paper) | mIoU (ours) | GFLOPs (ours) | Model 69 | :--:|:--:|:--:|:--:|:--:|:--:|:--:|:--: 70 | Dynamic-A | Layer16 | 186 | 72.8 | 44.9 | 73.9 | 52.5 | [GoogleDrive](https://drive.google.com/file/d/1Fa4hLS2GKL90HR0tVhmcZkFwemK6DlgT/view?usp=sharing) 71 | Dynamic-B | Layer16 | 186 | 73.8 | 58.7 | 74.3 | 58.9 | [GoogleDrive](https://drive.google.com/open?id=1o31jNkoSMugK0rEsi6grE9WEioWJgsZN) 72 | Dynamic-C | Layer16 | 186 | 74.6 | 66.6 | 74.8 | 59.8 | [GoogleDrive](https://drive.google.com/open?id=1nJ8oJD9yZj82hTlyDcMBGd9kmpIZVIto) 73 | Dynamic-Raw | Layer16 | 186 | 76.1 | 119.5 | 76.7 | 114.9 | [GoogleDrive](https://drive.google.com/open?id=1_fSMzzaUVzbkjYqSuvD2EmmZiZZVsDvh) 74 | Dynamic-Raw | Layer16 | 558 | 78.3 | 113.3 | 78.1 | 114.2 | [GoogleDrive](https://drive.google.com/open?id=1ToV_YAVxv5pCoRTreIXsQxNI5jolektt) 75 | 76 | ***With* ImageNet Pretrain:** 77 | 78 | Methods | Backbone | Iter/K | mIoU (paper) | GFLOPs (paper) | mIoU (ours) | GFLOPs (ours) | Model 79 | :--:|:--:|:--:|:--:|:--:|:--:|:--:|:--: 80 | Dynamic-Raw | Layer16 | 186 | 78.6 | 119.4 | 78.8 | 117.8 | [GoogleDrive](https://drive.google.com/open?id=1xRZkRw5qIKc_A6repZkRmLUpSAnnU63e) 81 | Dynamic-Raw | Layer33 | 186 | 79.2 | 242.3 | 79.4 | 243.1 | [GoogleDrive](https://drive.google.com/file/d/1DdLwt0jzBTqx_3EFGPLm_gWdaeoXXux7/view?usp=sharing) 82 | 83 | ## To do 84 | - [ ] Faster inference speed 85 | - [ ] Support more vision tasks 86 | - [ ] Object detection 87 | - [ ] Instance segmentation 88 | - [ ] Panoptic segmentation 89 | 90 | ## Acknowledgement 91 | - [Detectron2](https://github.com/facebookresearch/detectron2) 92 | - [DARTS](https://github.com/quark0/darts) 93 | 94 | ## Citation 95 | Consider cite the Dynamic Routing in your publications if it helps your research. 96 | 97 | ``` 98 | @inproceedings{li2020learning, 99 | title = {Learning Dynamic Routing for Semantic Segmentation}, 100 | author = {Yanwei Li, Lin Song, Yukang Chen, Zeming Li, Xiangyu Zhang, Xingang Wang, Jian Sun}, 101 | booktitle = {IEEE Conference on Computer Vision and Pattern Recognition}, 102 | year = {2020} 103 | } 104 | ``` 105 | 106 | Consider cite this project in your publications if it helps your research. 107 | ``` 108 | @misc{DynamicRouting, 109 | author = {Yanwei Li}, 110 | title = {DynamicRouting}, 111 | howpublished = {\url{https://github.com/yanwei-li/DynamicRouting}}, 112 | year ={2020} 113 | } 114 | ``` -------------------------------------------------------------------------------- /datasets/README.md: -------------------------------------------------------------------------------- 1 | 2 | For a few datasets that detectron2 natively supports, 3 | the datasets are assumed to exist in a directory called 4 | "datasets/", under the directory where you launch the program. 5 | They need to have the following directory structure: 6 | 7 | ## Expected dataset structure for cityscapes: 8 | ``` 9 | cityscapes/ 10 | gtFine/ 11 | train/ 12 | aachen/ 13 | color.png, instanceIds.png, labelIds.png, polygons.json, 14 | labelTrainIds.png 15 | ... 16 | val/ 17 | test/ 18 | leftImg8bit/ 19 | train/ 20 | val/ 21 | test/ 22 | ``` 23 | Install cityscapes scripts by: 24 | ``` 25 | pip install git+https://github.com/mcordts/cityscapesScripts.git 26 | ``` 27 | 28 | Note: 29 | labelTrainIds.png are created by `cityscapesscripts/preparation/createTrainIdLabelImgs.py`. 30 | They are not needed for instance segmentation. 31 | 32 | ## Expected dataset structure for Pascal VOC: 33 | ``` 34 | VOC20{07,12}/ 35 | Annotations/ 36 | ImageSets/ 37 | JPEGImages/ 38 | ``` 39 | -------------------------------------------------------------------------------- /datasets/cityscapes: -------------------------------------------------------------------------------- 1 | /data/Datasets/cityscapes -------------------------------------------------------------------------------- /dl_lib/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | 3 | from .utils.env import setup_environment 4 | 5 | setup_environment() 6 | 7 | __version__ = "0.1" 8 | -------------------------------------------------------------------------------- /dl_lib/checkpoint/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 3 | # File: 4 | 5 | from . import catalog as _UNUSED # register the handler 6 | from .detection_checkpoint import DetectionCheckpointer 7 | from dl_lib.utils.checkpoint import Checkpointer, PeriodicCheckpointer 8 | 9 | __all__ = ["Checkpointer", "PeriodicCheckpointer", "DetectionCheckpointer"] 10 | -------------------------------------------------------------------------------- /dl_lib/checkpoint/catalog.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | import logging 3 | 4 | from dl_lib.utils.file_io import PathHandler, PathManager 5 | 6 | 7 | class ModelCatalog(object): 8 | """ 9 | Store mappings from names to third-party models. 10 | """ 11 | 12 | S3_C2_DETECTRON_PREFIX = "https://dl.fbaipublicfiles.com/detectron" 13 | 14 | # MSRA models have STRIDE_IN_1X1=True. False otherwise. 15 | # NOTE: all BN models here have fused BN into an affine layer. 16 | # As a result, you should only load them to a model with "FrozenBN". 17 | # Loading them to a model with regular BN or SyncBN is wrong. 18 | # Even when loaded to FrozenBN, it is still different from affine by an epsilon, 19 | # which should be negligible for training. 20 | # NOTE: all models here uses PIXEL_STD=[1,1,1] 21 | C2_IMAGENET_MODELS = { 22 | "MSRA/R-50": 23 | "ImageNetPretrained/MSRA/R-50.pkl", 24 | "MSRA/R-101": 25 | "ImageNetPretrained/MSRA/R-101.pkl", 26 | "FAIR/R-50-GN": 27 | "ImageNetPretrained/47261647/R-50-GN.pkl", 28 | "FAIR/R-101-GN": 29 | "ImageNetPretrained/47592356/R-101-GN.pkl", 30 | "FAIR/X-101-32x8d": 31 | "ImageNetPretrained/20171220/X-101-32x8d.pkl", 32 | "FAIR/X-101-64x4d": 33 | "ImageNetPretrained/FBResNeXt/X-101-64x4d.pkl", 34 | "FAIR/X-152-32x8d-IN5k": 35 | "ImageNetPretrained/25093814/X-152-32x8d-IN5k.pkl", 36 | } 37 | 38 | C2_DETECTRON_PATH_FORMAT = ( 39 | "{prefix}/{url}/output/train/{dataset}/{type}/model_final.pkl" 40 | ) # noqa B950 41 | 42 | C2_DATASET_COCO = "coco_2014_train%3Acoco_2014_valminusminival" 43 | C2_DATASET_COCO_KEYPOINTS = "keypoints_coco_2014_train%3Akeypoints_coco_2014_valminusminival" 44 | 45 | # format: {model_name} -> part of the url 46 | C2_DETECTRON_MODELS = { 47 | "35857197/e2e_faster_rcnn_R-50-C4_1x": 48 | "35857197/12_2017_baselines/e2e_faster_rcnn_R-50-C4_1x.yaml.01_33_49.iAX0mXvW", # noqa B950 49 | "35857345/e2e_faster_rcnn_R-50-FPN_1x": 50 | "35857345/12_2017_baselines/e2e_faster_rcnn_R-50-FPN_1x.yaml.01_36_30.cUF7QR7I", # noqa B950 51 | "35857890/e2e_faster_rcnn_R-101-FPN_1x": 52 | "35857890/12_2017_baselines/e2e_faster_rcnn_R-101-FPN_1x.yaml.01_38_50.sNxI7sX7", # noqa B950 53 | "36761737/e2e_faster_rcnn_X-101-32x8d-FPN_1x": 54 | "36761737/12_2017_baselines/e2e_faster_rcnn_X-101-32x8d-FPN_1x.yaml.06_31_39.5MIHi1fZ", # noqa B950 55 | "35858791/e2e_mask_rcnn_R-50-C4_1x": 56 | "35858791/12_2017_baselines/e2e_mask_rcnn_R-50-C4_1x.yaml.01_45_57.ZgkA7hPB", # noqa B950 57 | "35858933/e2e_mask_rcnn_R-50-FPN_1x": 58 | "35858933/12_2017_baselines/e2e_mask_rcnn_R-50-FPN_1x.yaml.01_48_14.DzEQe4wC", # noqa B950 59 | "35861795/e2e_mask_rcnn_R-101-FPN_1x": 60 | "35861795/12_2017_baselines/e2e_mask_rcnn_R-101-FPN_1x.yaml.02_31_37.KqyEK4tT", # noqa B950 61 | "36761843/e2e_mask_rcnn_X-101-32x8d-FPN_1x": 62 | "36761843/12_2017_baselines/e2e_mask_rcnn_X-101-32x8d-FPN_1x.yaml.06_35_59.RZotkLKI", # noqa B950 63 | "48616381/e2e_mask_rcnn_R-50-FPN_2x_gn": 64 | "GN/48616381/04_2018_gn_baselines/e2e_mask_rcnn_R-50-FPN_2x_gn_0416.13_23_38.bTlTI97Q", # noqa B950 65 | "37697547/e2e_keypoint_rcnn_R-50-FPN_1x": 66 | "37697547/12_2017_baselines/e2e_keypoint_rcnn_R-50-FPN_1x.yaml.08_42_54.kdzV35ao", # noqa B950 67 | "35998355/rpn_R-50-C4_1x": 68 | "35998355/12_2017_baselines/rpn_R-50-C4_1x.yaml.08_00_43.njH5oD9L", # noqa B950 69 | "35998814/rpn_R-50-FPN_1x": 70 | "35998814/12_2017_baselines/rpn_R-50-FPN_1x.yaml.08_06_03.Axg0r179", # noqa B950 71 | "36225147/fast_R-50-FPN_1x": 72 | "36225147/12_2017_baselines/fast_rcnn_R-50-FPN_1x.yaml.08_39_09.L3obSdQ2", # noqa B950 73 | } 74 | 75 | @staticmethod 76 | def get(name): 77 | if name.startswith("Caffe2Detectron/COCO"): 78 | return ModelCatalog._get_c2_detectron_baseline(name) 79 | if name.startswith("ImageNetPretrained/"): 80 | return ModelCatalog._get_c2_imagenet_pretrained(name) 81 | raise RuntimeError("model not present in the catalog: {}".format(name)) 82 | 83 | @staticmethod 84 | def _get_c2_imagenet_pretrained(name): 85 | prefix = ModelCatalog.S3_C2_DETECTRON_PREFIX 86 | name = name[len("ImageNetPretrained/"):] 87 | name = ModelCatalog.C2_IMAGENET_MODELS[name] 88 | url = "/".join([prefix, name]) 89 | return url 90 | 91 | @staticmethod 92 | def _get_c2_detectron_baseline(name): 93 | name = name[len("Caffe2Detectron/COCO/"):] 94 | url = ModelCatalog.C2_DETECTRON_MODELS[name] 95 | if "keypoint_rcnn" in name: 96 | dataset = ModelCatalog.C2_DATASET_COCO_KEYPOINTS 97 | else: 98 | dataset = ModelCatalog.C2_DATASET_COCO 99 | 100 | if "35998355/rpn_R-50-C4_1x" in name: 101 | # this one model is somehow different from others .. 102 | type = "rpn" 103 | else: 104 | type = "generalized_rcnn" 105 | 106 | # Detectron C2 models are stored in the structure defined in `C2_DETECTRON_PATH_FORMAT`. 107 | url = ModelCatalog.C2_DETECTRON_PATH_FORMAT.format( 108 | prefix=ModelCatalog.S3_C2_DETECTRON_PREFIX, 109 | url=url, 110 | type=type, 111 | dataset=dataset) 112 | return url 113 | 114 | 115 | class ModelCatalogHandler(PathHandler): 116 | """ 117 | Resolve URL like catalog://. 118 | """ 119 | 120 | PREFIX = "catalog://" 121 | 122 | def _get_supported_prefixes(self): 123 | return [self.PREFIX] 124 | 125 | def _get_local_path(self, path): 126 | logger = logging.getLogger(__name__) 127 | catalog_path = ModelCatalog.get(path[len(self.PREFIX):]) 128 | logger.info("Catalog entry {} points to {}".format(path, catalog_path)) 129 | return PathManager.get_local_path(catalog_path) 130 | 131 | def _open(self, path, mode="r", **kwargs): 132 | return PathManager.open(self._get_local_path(path), mode, **kwargs) 133 | 134 | 135 | class Detectron2Handler(PathHandler): 136 | """ 137 | Resolve anything that's in Detectron2 model zoo. 138 | """ 139 | 140 | PREFIX = "detectron2://" 141 | S3_DETECTRON2_PREFIX = "https://dl.fbaipublicfiles.com/detectron2/" 142 | 143 | def _get_supported_prefixes(self): 144 | return [self.PREFIX] 145 | 146 | def _get_local_path(self, path): 147 | name = path[len(self.PREFIX):] 148 | return PathManager.get_local_path(self.S3_DETECTRON2_PREFIX + name) 149 | 150 | def _open(self, path, mode="r", **kwargs): 151 | return PathManager.open(self._get_local_path(path), mode, **kwargs) 152 | 153 | 154 | PathManager.register_handler(ModelCatalogHandler()) 155 | PathManager.register_handler(Detectron2Handler()) 156 | -------------------------------------------------------------------------------- /dl_lib/checkpoint/detection_checkpoint.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | import pickle 3 | 4 | import dl_lib.utils.comm as comm 5 | from dl_lib.utils.checkpoint import Checkpointer 6 | from dl_lib.utils.file_io import PathManager 7 | 8 | from .c2_model_loading import align_and_update_state_dicts 9 | 10 | 11 | class DetectionCheckpointer(Checkpointer): 12 | """ 13 | Same as :class:`Checkpointer`, but is able to handle models in detectron & dl_lib 14 | model zoo, and apply conversions for legacy models. 15 | """ 16 | def __init__(self, 17 | model, 18 | save_dir="", 19 | *, 20 | save_to_disk=None, 21 | **checkpointables): 22 | is_main_process = comm.is_main_process() 23 | super().__init__( 24 | model, 25 | save_dir, 26 | save_to_disk=is_main_process 27 | if save_to_disk is None else save_to_disk, 28 | **checkpointables, 29 | ) 30 | 31 | def _load_file(self, filename): 32 | if filename.endswith(".pkl"): 33 | with PathManager.open(filename, "rb") as f: 34 | data = pickle.load(f, encoding="latin1") 35 | if "model" in data and "__author__" in data: 36 | # file is in dl_lib model zoo format 37 | self.logger.info("Reading a file from '{}'".format( 38 | data["__author__"])) 39 | return data 40 | else: 41 | # assume file is from Caffe2 / Detectron1 model zoo 42 | if "blobs" in data: 43 | # Detection models have "blobs", but ImageNet models don't 44 | data = data["blobs"] 45 | data = { 46 | k: v 47 | for k, v in data.items() if not k.endswith("_momentum") 48 | } 49 | return { 50 | "model": data, 51 | "__author__": "Caffe2", 52 | "matching_heuristics": True 53 | } 54 | 55 | loaded = super()._load_file(filename) # load native pth checkpoint 56 | if "model" not in loaded: 57 | loaded = {"model": loaded} 58 | return loaded 59 | 60 | def _load_model(self, checkpoint): 61 | if checkpoint.get("matching_heuristics", False): 62 | self._convert_ndarray_to_tensor(checkpoint["model"]) 63 | # convert weights by name-matching heuristics 64 | model_state_dict = self.model.state_dict() 65 | align_and_update_state_dicts( 66 | model_state_dict, 67 | checkpoint["model"], 68 | c2_conversion=checkpoint.get("__author__", None) == "Caffe2", 69 | ) 70 | checkpoint["model"] = model_state_dict 71 | # for non-caffe2 models, use standard ways to load it 72 | super()._load_model(checkpoint) 73 | -------------------------------------------------------------------------------- /dl_lib/configs/segm_config.py: -------------------------------------------------------------------------------- 1 | from .base_config import BaseConfig 2 | 3 | _config_dict = dict( 4 | MODEL=dict( 5 | LOAD_PROPOSALS=False, 6 | MASK_ON=False, 7 | KEYPOINT_ON=False, 8 | BACKBONE=dict(FREEZE_AT=0, ), 9 | RESNETS=dict( 10 | OUT_FEATURES=["res2", "res3", "res4", "res5"], 11 | NORM="nnSyncBN", 12 | NUM_GROUPS=1, 13 | WIDTH_PER_GROUP=64, 14 | STRIDE_IN_1X1=True, 15 | RES5_DILATION=1, 16 | RES2_OUT_CHANNELS=256, 17 | STEM_OUT_CHANNELS=64, 18 | DEFORM_ON_PER_STAGE=[False, False, False, False], 19 | DEFORM_MODULATED=False, 20 | DEFORM_NUM_GROUPS=1, 21 | ), 22 | FPN=dict( 23 | IN_FEATURES=[], 24 | OUT_CHANNELS=256, 25 | NORM="", 26 | FUSE_TYPE="sum", 27 | ), 28 | SEM_SEG_HEAD=dict( 29 | # NAME="SemSegFPNHead", 30 | IN_FEATURES=[], 31 | IGNORE_VALUE=255, 32 | NUM_CLASSES=(), 33 | CONVS_DIM=256, 34 | COMMON_STRIDE=(), 35 | NORM="GN", 36 | LOSS_WEIGHT=1.0, 37 | ), 38 | SOLVER=dict( 39 | LR_SCHEDULER=dict( 40 | NAME="PolyLR", 41 | POLY_POWER=0.9, 42 | MAX_ITER=40000, 43 | WARMUP_ITERS=1000, 44 | WARMUP_FACTOR=0.001, 45 | WARMUP_METHOD="linear", 46 | ), 47 | OPTIMIZER=dict(BASE_LR=0.01, ), 48 | IMS_PER_BATCH=16, 49 | CHECKPOINT_PERIOD=5000, 50 | ), 51 | TEST=dict(PRECISE_BN=dict(ENABLED=True), ), 52 | ), 53 | INPUT=dict(CROP_PAD=dict( 54 | ENABLED=True, 55 | TYPE='absolute', 56 | SIZE=(), 57 | IMG_PAD_VALUE=0, 58 | SEG_PAD_VALUE=255, 59 | ), ), 60 | ) 61 | 62 | 63 | class SemanticSegmentationConfig(BaseConfig): 64 | def __init__(self): 65 | super(SemanticSegmentationConfig, self).__init__() 66 | self._register_configuration(_config_dict) 67 | 68 | 69 | config = SemanticSegmentationConfig() 70 | -------------------------------------------------------------------------------- /dl_lib/data/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | from . import transforms # isort:skip 3 | 4 | from .build import ( 5 | build_detection_test_loader, 6 | build_detection_train_loader, 7 | get_detection_dataset_dicts, 8 | load_proposals_into_dataset, 9 | print_instances_class_histogram, 10 | ) 11 | from .catalog import DatasetCatalog, MetadataCatalog 12 | from .common import DatasetFromList, MapDataset 13 | from .dataset_mapper import DatasetMapper 14 | 15 | # ensure the builtin datasets are registered 16 | from . import datasets, samplers # isort:skip 17 | 18 | __all__ = [k for k in globals().keys() if not k.startswith("_")] 19 | -------------------------------------------------------------------------------- /dl_lib/data/common.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | import copy 3 | import logging 4 | import random 5 | 6 | import torch.utils.data as data 7 | 8 | from dl_lib.utils.serialize import PicklableWrapper 9 | 10 | __all__ = ["MapDataset", "DatasetFromList"] 11 | 12 | 13 | class MapDataset(data.Dataset): 14 | """ 15 | Map a function over the elements in a dataset. 16 | 17 | Args: 18 | dataset: a dataset where map function is applied. 19 | map_func: a callable which maps the element in dataset. map_func is 20 | responsible for error handling, when error happens, it needs to 21 | return None so the MapDataset will randomly use other 22 | elements from the dataset. 23 | """ 24 | def __init__(self, dataset, map_func): 25 | self._dataset = dataset 26 | self._map_func = PicklableWrapper( 27 | map_func) # wrap so that a lambda will work 28 | 29 | self._rng = random.Random(42) 30 | self._fallback_candidates = set(range(len(dataset))) 31 | 32 | def __len__(self): 33 | return len(self._dataset) 34 | 35 | def __getitem__(self, idx): 36 | retry_count = 0 37 | cur_idx = int(idx) 38 | 39 | while True: 40 | data = self._map_func(self._dataset[cur_idx]) 41 | if data is not None: 42 | self._fallback_candidates.add(cur_idx) 43 | return data 44 | 45 | # _map_func fails for this idx, use a random new index from the pool 46 | retry_count += 1 47 | self._fallback_candidates.discard(cur_idx) 48 | cur_idx = self._rng.sample(self._fallback_candidates, k=1)[0] 49 | 50 | if retry_count >= 3: 51 | logger = logging.getLogger(__name__) 52 | logger.warning( 53 | "Failed to apply `_map_func` for idx: {}, retry count: {}". 54 | format(idx, retry_count)) 55 | 56 | 57 | class DatasetFromList(data.Dataset): 58 | """ 59 | Wrap a list to a torch Dataset. It produces elements of the list as data. 60 | """ 61 | def __init__(self, lst: list, copy: bool = True): 62 | """ 63 | Args: 64 | lst (list): a list which contains elements to produce. 65 | copy (bool): whether to deepcopy the element when producing it, 66 | so that the result can be modified in place without affecting the 67 | source in the list. 68 | """ 69 | self._lst = lst 70 | self._copy = copy 71 | 72 | def __len__(self): 73 | return len(self._lst) 74 | 75 | def __getitem__(self, idx): 76 | if self._copy: 77 | return copy.deepcopy(self._lst[idx]) 78 | else: 79 | return self._lst[idx] 80 | -------------------------------------------------------------------------------- /dl_lib/data/datasets/README.md: -------------------------------------------------------------------------------- 1 | 2 | ### Common Datasets 3 | 4 | The dataset implemented here do not need to load the data into the final format. 5 | It should provide the minimal data structure needed to use the dataset, so it can be very efficient. 6 | 7 | For example, for an image dataset, just provide the file names and labels, but don't read the images. 8 | Let the downstream decide how to read. 9 | -------------------------------------------------------------------------------- /dl_lib/data/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | from .cityscapes import load_cityscapes_instances 3 | from .coco import load_coco_json, load_sem_seg 4 | from .register_coco import register_coco_instances, register_coco_panoptic_separated 5 | from . import builtin # ensure the builtin datasets are registered 6 | 7 | __all__ = [ 8 | k for k in globals().keys() if "builtin" not in k and not k.startswith("_") 9 | ] 10 | -------------------------------------------------------------------------------- /dl_lib/data/datasets/builtin.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 3 | """ 4 | This file registers pre-defined datasets at hard-coded paths, and their metadata. 5 | We hard-code metadata for common datasets. This will enable: 6 | 1. Consistency check when loading the datasets 7 | 2. Use models on these standard datasets directly and run demos, 8 | without having to download the dataset annotations 9 | We hard-code some paths to the dataset that's assumed to 10 | exist in "./datasets/". 11 | Users SHOULD NOT use this file to create new dataset / metadata for new dataset. 12 | To add new dataset, refer to the tutorial "docs/DATASETS.md". 13 | """ 14 | 15 | import os 16 | import os.path as osp 17 | 18 | import dl_lib 19 | 20 | from dl_lib.data import MetadataCatalog, DatasetCatalog 21 | from .register_coco import register_coco_instances 22 | from .cityscapes import load_cityscapes_instances, load_cityscapes_semantic 23 | from .pascal_voc import register_pascal_voc 24 | from .builtin_meta import _get_builtin_metadata 25 | 26 | # ==== Predefined datasets and splits for COCO ========== 27 | 28 | _PREDEFINED_SPLITS_COCO = {} 29 | _PREDEFINED_SPLITS_COCO["coco"] = { 30 | "coco_2014_train": 31 | ("coco/train2014", "coco/annotations/instances_train2014.json"), 32 | "coco_2014_val": 33 | ("coco/val2014", "coco/annotations/instances_val2014.json"), 34 | "coco_2014_minival": 35 | ("coco/val2014", "coco/annotations/instances_minival2014.json"), 36 | "coco_2014_minival_100": 37 | ("coco/val2014", "coco/annotations/instances_minival2014_100.json"), 38 | "coco_2014_valminusminival": ( 39 | "coco/val2014", 40 | "coco/annotations/instances_valminusminival2014.json", 41 | ), 42 | "coco_2017_train": ("coco/train2017", 43 | "coco/annotations/instances_train2017.json"), 44 | "coco_2017_val": ("coco/val2017", 45 | "coco/annotations/instances_val2017.json"), 46 | "coco_2017_test": ("coco/test2017", 47 | "coco/annotations/image_info_test2017.json"), 48 | "coco_2017_test-dev": ("coco/test2017", 49 | "coco/annotations/image_info_test-dev2017.json"), 50 | "coco_2017_val_100": ("coco/val2017", 51 | "coco/annotations/instances_val2017_100.json"), 52 | } 53 | 54 | 55 | def register_all_coco(root=osp.join( 56 | osp.split(osp.split(dl_lib.__file__)[0])[0], "datasets")): 57 | for dataset_name, splits_per_dataset in _PREDEFINED_SPLITS_COCO.items(): 58 | for key, (image_root, json_file) in splits_per_dataset.items(): 59 | # Assume pre-defined datasets live in `./datasets`. 60 | register_coco_instances( 61 | key, 62 | _get_builtin_metadata(dataset_name), 63 | os.path.join(root, json_file) 64 | if "://" not in json_file else json_file, 65 | os.path.join(root, image_root), 66 | ) 67 | 68 | # ==== Predefined splits for raw cityscapes images =========== 69 | 70 | 71 | _RAW_CITYSCAPES_SPLITS = { 72 | "cityscapes_fine_{task}_train": 73 | ("cityscapes/leftImg8bit/train", "cityscapes/gtFine/train"), 74 | "cityscapes_fine_{task}_val": 75 | ("cityscapes/leftImg8bit/val", "cityscapes/gtFine/val"), 76 | "cityscapes_fine_{task}_test": 77 | ("cityscapes/leftImg8bit/test", "cityscapes/gtFine/test"), 78 | } 79 | 80 | 81 | def register_all_cityscapes(root=osp.join( 82 | osp.split(osp.split(dl_lib.__file__)[0])[0], "datasets")): 83 | for key, (image_dir, gt_dir) in _RAW_CITYSCAPES_SPLITS.items(): 84 | meta = _get_builtin_metadata("cityscapes") 85 | image_dir = os.path.join(root, image_dir) 86 | gt_dir = os.path.join(root, gt_dir) 87 | 88 | inst_key = key.format(task="instance_seg") 89 | DatasetCatalog.register( 90 | inst_key, 91 | lambda x=image_dir, y=gt_dir: load_cityscapes_instances( 92 | x, y, from_json=True, to_polygons=True), 93 | ) 94 | MetadataCatalog.get(inst_key).set(image_dir=image_dir, 95 | gt_dir=gt_dir, 96 | evaluator_type="cityscapes", 97 | **meta) 98 | 99 | sem_key = key.format(task="sem_seg") 100 | DatasetCatalog.register( 101 | sem_key, 102 | lambda x=image_dir, y=gt_dir: load_cityscapes_semantic(x, y)) 103 | MetadataCatalog.get(sem_key).set(image_dir=image_dir, 104 | gt_dir=gt_dir, 105 | evaluator_type="sem_seg", 106 | **meta) 107 | 108 | 109 | # ==== Predefined splits for PASCAL VOC =========== 110 | def register_all_pascal_voc(root=osp.join( 111 | osp.split(osp.split(dl_lib.__file__)[0])[0], "datasets")): 112 | SPLITS = [ 113 | ("voc_2007_trainval", "VOC2007", "trainval"), 114 | ("voc_2007_train", "VOC2007", "train"), 115 | ("voc_2007_val", "VOC2007", "val"), 116 | ("voc_2007_test", "VOC2007", "test"), 117 | ("voc_2012_trainval", "VOC2012", "trainval"), 118 | ("voc_2012_train", "VOC2012", "train"), 119 | ("voc_2012_val", "VOC2012", "val"), 120 | ] 121 | for name, dirname, split in SPLITS: 122 | year = 2007 if "2007" in name else 2012 123 | register_pascal_voc(name, os.path.join(root, dirname), split, year) 124 | MetadataCatalog.get(name).evaluator_type = "pascal_voc" 125 | 126 | 127 | # Register them all under "./datasets" 128 | register_all_coco() 129 | register_all_cityscapes() 130 | register_all_pascal_voc() 131 | -------------------------------------------------------------------------------- /dl_lib/data/datasets/pascal_voc.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 3 | 4 | import os 5 | import xml.etree.ElementTree as ET 6 | 7 | import numpy as np 8 | 9 | from dl_lib.data import DatasetCatalog, MetadataCatalog 10 | from dl_lib.structures import BoxMode 11 | from dl_lib.utils.file_io import PathManager 12 | 13 | __all__ = ["register_pascal_voc"] 14 | 15 | # fmt: off 16 | CLASS_NAMES = [ 17 | "aeroplane", 18 | "bicycle", 19 | "bird", 20 | "boat", 21 | "bottle", 22 | "bus", 23 | "car", 24 | "cat", 25 | "chair", 26 | "cow", 27 | "diningtable", 28 | "dog", 29 | "horse", 30 | "motorbike", 31 | "person", 32 | "pottedplant", 33 | "sheep", 34 | "sofa", 35 | "train", 36 | "tvmonitor", 37 | ] 38 | # fmt: on 39 | 40 | 41 | def load_voc_instances(dirname: str, split: str): 42 | """ 43 | Load Pascal VOC detection annotations to dl_lib format. 44 | 45 | Args: 46 | dirname: Contain "Annotations", "ImageSets", "JPEGImages" 47 | split (str): one of "train", "test", "val", "trainval" 48 | """ 49 | with PathManager.open( 50 | os.path.join(dirname, "ImageSets", "Main", split + ".txt")) as f: 51 | fileids = np.loadtxt(f, dtype=np.str) 52 | 53 | dicts = [] 54 | for fileid in fileids: 55 | anno_file = os.path.join(dirname, "Annotations", fileid + ".xml") 56 | jpeg_file = os.path.join(dirname, "JPEGImages", fileid + ".jpg") 57 | 58 | tree = ET.parse(anno_file) 59 | 60 | r = { 61 | "file_name": jpeg_file, 62 | "image_id": fileid, 63 | "height": int(tree.findall("./size/height")[0].text), 64 | "width": int(tree.findall("./size/width")[0].text), 65 | } 66 | instances = [] 67 | 68 | for obj in tree.findall("object"): 69 | cls = obj.find("name").text 70 | # We include "difficult" samples in training. 71 | # Based on limited experiments, they don't hurt accuracy. 72 | # difficult = int(obj.find("difficult").text) 73 | # if difficult == 1: 74 | # continue 75 | bbox = obj.find("bndbox") 76 | bbox = [ 77 | float(bbox.find(x).text) 78 | for x in ["xmin", "ymin", "xmax", "ymax"] 79 | ] 80 | # Original annotations are integers in the range [1, W or H] 81 | # Assuming they mean 1-based pixel indices (inclusive), 82 | # a box with annotation (xmin=1, xmax=W) covers the whole image. 83 | # In coordinate space this is represented by (xmin=0, xmax=W) 84 | bbox[0] -= 1.0 85 | bbox[1] -= 1.0 86 | instances.append({ 87 | "category_id": CLASS_NAMES.index(cls), 88 | "bbox": bbox, 89 | "bbox_mode": BoxMode.XYXY_ABS 90 | }) 91 | r["annotations"] = instances 92 | dicts.append(r) 93 | return dicts 94 | 95 | 96 | def register_pascal_voc(name, dirname, split, year): 97 | DatasetCatalog.register(name, lambda: load_voc_instances(dirname, split)) 98 | MetadataCatalog.get(name).set(thing_classes=CLASS_NAMES, 99 | dirname=dirname, 100 | year=year, 101 | split=split) 102 | -------------------------------------------------------------------------------- /dl_lib/data/datasets/register_coco.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | import copy 3 | 4 | from dl_lib.data import DatasetCatalog, MetadataCatalog 5 | 6 | from .coco import load_coco_json, load_sem_seg 7 | """ 8 | This file contains functions to register a COCO-format dataset to the DatasetCatalog. 9 | """ 10 | 11 | __all__ = ["register_coco_instances", "register_coco_panoptic_separated"] 12 | 13 | 14 | def register_coco_instances(name, metadata, json_file, image_root): 15 | """ 16 | Register a dataset in COCO's json annotation format for 17 | instance detection, instance segmentation and keypoint detection. 18 | (i.e., Type 1 and 2 in http://cocodataset.org/#format-data. 19 | `instances*.json` and `person_keypoints*.json` in the dataset). 20 | 21 | This is an example of how to register a new dataset. 22 | You can do something similar to this function, to register new datasets. 23 | 24 | Args: 25 | name (str): the name that identifies a dataset, e.g. "coco_2014_train". 26 | metadata (dict): extra metadata associated with this dataset. You can 27 | leave it as an empty dict. 28 | json_file (str): path to the json instance annotation file. 29 | image_root (str): directory which contains all the images. 30 | """ 31 | # 1. register a function which returns dicts 32 | DatasetCatalog.register( 33 | name, lambda: load_coco_json(json_file, image_root, name)) 34 | 35 | # 2. Optionally, add metadata about this dataset, 36 | # since they might be useful in evaluation, visualization or logging 37 | MetadataCatalog.get(name).set(json_file=json_file, 38 | image_root=image_root, 39 | evaluator_type="coco", 40 | **metadata) 41 | 42 | 43 | def register_coco_panoptic_separated(name, metadata, image_root, panoptic_root, 44 | panoptic_json, sem_seg_root, 45 | instances_json): 46 | """ 47 | Register a COCO panoptic segmentation dataset named `name`. 48 | The annotations in this registered dataset will contain both instance annotations and 49 | semantic annotations, each with its own contiguous ids. Hence it's called "separated". 50 | 51 | It follows the setting used by the PanopticFPN paper: 52 | 53 | 1. The instance annotations directly come from polygons in the COCO 54 | instances annotation task, rather than from the masks in the COCO panoptic annotations. 55 | 56 | The two format have small differences: 57 | Polygons in the instance annotations may have overlaps. 58 | The mask annotations are produced by labeling the overlapped polygons 59 | with depth ordering. 60 | 61 | 2. The semantic annotations are converted from panoptic annotations, where 62 | all "things" are assigned a semantic id of 0. 63 | All semantic categories will therefore have ids in contiguous 64 | range [1, #stuff_categories]. 65 | 66 | This function will also register a pure semantic segmentation dataset 67 | named ``name + '_stuffonly'``. 68 | 69 | Args: 70 | name (str): the name that identifies a dataset, 71 | e.g. "coco_2017_train_panoptic" 72 | metadata (dict): extra metadata associated with this dataset. 73 | image_root (str): directory which contains all the images 74 | panoptic_root (str): directory which contains panoptic annotation images 75 | panoptic_json (str): path to the json panoptic annotation file 76 | sem_seg_root (str): directory which contains all the ground truth segmentation annotations. 77 | instances_json (str): path to the json instance annotation file 78 | """ 79 | panoptic_name = name + "_separated" 80 | DatasetCatalog.register( 81 | panoptic_name, 82 | lambda: merge_to_panoptic( 83 | load_coco_json(instances_json, image_root, panoptic_name), 84 | load_sem_seg(sem_seg_root, image_root), 85 | ), 86 | ) 87 | MetadataCatalog.get(panoptic_name).set( 88 | panoptic_root=panoptic_root, 89 | image_root=image_root, 90 | panoptic_json=panoptic_json, 91 | sem_seg_root=sem_seg_root, 92 | json_file=instances_json, # TODO rename 93 | evaluator_type="coco_panoptic_seg", 94 | **metadata) 95 | 96 | semantic_name = name + "_stuffonly" 97 | DatasetCatalog.register(semantic_name, 98 | lambda: load_sem_seg(sem_seg_root, image_root)) 99 | MetadataCatalog.get(semantic_name).set(sem_seg_root=sem_seg_root, 100 | image_root=image_root, 101 | evaluator_type="sem_seg", 102 | **metadata) 103 | 104 | 105 | def merge_to_panoptic(detection_dicts, sem_seg_dicts): 106 | """ 107 | Create dataset dicts for panoptic segmentation, by 108 | merging two dicts using "file_name" field to match their entries. 109 | 110 | Args: 111 | detection_dicts (list[dict]): lists of dicts for object detection or instance segmentation. 112 | sem_seg_dicts (list[dict]): lists of dicts for semantic segmentation. 113 | 114 | Returns: 115 | list[dict] (one per input image): Each dict contains all (key, value) pairs from dicts in 116 | both detection_dicts and sem_seg_dicts that correspond to the same image. 117 | The function assumes that the same key in different dicts has the same value. 118 | """ 119 | results = [] 120 | sem_seg_file_to_entry = {x["file_name"]: x for x in sem_seg_dicts} 121 | assert len(sem_seg_file_to_entry) > 0 122 | 123 | for det_dict in detection_dicts: 124 | dic = copy.copy(det_dict) 125 | dic.update(sem_seg_file_to_entry[dic["file_name"]]) 126 | results.append(dic) 127 | return results 128 | -------------------------------------------------------------------------------- /dl_lib/data/samplers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | from .distributed_sampler import InferenceSampler, RepeatFactorTrainingSampler, TrainingSampler 3 | from .grouped_batch_sampler import GroupedBatchSampler 4 | 5 | __all__ = [ 6 | "GroupedBatchSampler", 7 | "TrainingSampler", 8 | "InferenceSampler", 9 | "RepeatFactorTrainingSampler", 10 | ] 11 | -------------------------------------------------------------------------------- /dl_lib/data/samplers/grouped_batch_sampler.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | import numpy as np 3 | from torch.utils.data.sampler import BatchSampler, Sampler 4 | 5 | 6 | class GroupedBatchSampler(BatchSampler): 7 | """ 8 | Wraps another sampler to yield a mini-batch of indices. 9 | It enforces that the batch only contain elements from the same group. 10 | It also tries to provide mini-batches which follows an ordering which is 11 | as close as possible to the ordering from the original sampler. 12 | """ 13 | def __init__(self, sampler, group_ids, batch_size): 14 | """ 15 | Args: 16 | sampler (Sampler): Base sampler. 17 | group_ids (list[int]): If the sampler produces indices in range [0, N), 18 | `group_ids` must be a list of `N` ints which contains the group id of each sample. 19 | The group ids must be a set of integers in the range [0, num_groups). 20 | batch_size (int): Size of mini-batch. 21 | """ 22 | if not isinstance(sampler, Sampler): 23 | raise ValueError( 24 | "sampler should be an instance of " 25 | "torch.utils.data.Sampler, but got sampler={}".format(sampler)) 26 | self.sampler = sampler 27 | self.group_ids = np.asarray(group_ids) 28 | assert self.group_ids.ndim == 1 29 | self.batch_size = batch_size 30 | groups = np.unique(self.group_ids).tolist() 31 | 32 | # buffer the indices of each group until batch size is reached 33 | self.buffer_per_group = {k: [] for k in groups} 34 | 35 | def __iter__(self): 36 | for idx in self.sampler: 37 | group_id = self.group_ids[idx] 38 | group_buffer = self.buffer_per_group[group_id] 39 | group_buffer.append(idx) 40 | if len(group_buffer) == self.batch_size: 41 | yield group_buffer[:] # yield a copy of the list 42 | del group_buffer[:] 43 | 44 | def __len__(self): 45 | raise NotImplementedError( 46 | "len() of GroupedBatchSampler is not well-defined.") 47 | -------------------------------------------------------------------------------- /dl_lib/data/transforms/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | from .transform import * 3 | from .extend_transform import * 4 | from .transform_gen import * 5 | 6 | __all__ = [k for k in globals().keys() if not k.startswith("_")] 7 | -------------------------------------------------------------------------------- /dl_lib/data/transforms/transform.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 3 | # File: transform.py 4 | 5 | import numpy as np 6 | from PIL import Image 7 | 8 | from .extend_transform import HFlipTransform, NoOpTransform, Transform 9 | 10 | __all__ = ["ExtentTransform", "ResizeTransform"] 11 | 12 | 13 | class ExtentTransform(Transform): 14 | """ 15 | Extracts a subregion from the source image and scales it to the output size. 16 | 17 | The fill color is used to map pixels from the source rect that fall outside 18 | the source image. 19 | 20 | See: https://pillow.readthedocs.io/en/latest/PIL.html#PIL.ImageTransform.ExtentTransform 21 | """ 22 | def __init__(self, src_rect, output_size, interp=Image.LINEAR, fill=0): 23 | """ 24 | Args: 25 | src_rect (x0, y0, x1, y1): src coordinates 26 | output_size (h, w): dst image size 27 | interp: PIL interpolation methods 28 | fill: Fill color used when src_rect extends outside image 29 | """ 30 | super().__init__() 31 | self._set_attributes(locals()) 32 | 33 | def apply_image(self, img, interp=None): 34 | h, w = self.output_size 35 | ret = Image.fromarray(img).transform( 36 | size=(w, h), 37 | method=Image.EXTENT, 38 | data=self.src_rect, 39 | resample=interp if interp else self.interp, 40 | fill=self.fill, 41 | ) 42 | return np.asarray(ret) 43 | 44 | def apply_coords(self, coords): 45 | # Transform image center from source coordinates into output coordinates 46 | # and then map the new origin to the corner of the output image. 47 | h, w = self.output_size 48 | x0, y0, x1, y1 = self.src_rect 49 | new_coords = coords.astype(np.float32) 50 | new_coords[:, 0] -= 0.5 * (x0 + x1) 51 | new_coords[:, 1] -= 0.5 * (y0 + y1) 52 | new_coords[:, 0] *= w / (x1 - x0) 53 | new_coords[:, 1] *= h / (y1 - y0) 54 | new_coords[:, 0] += 0.5 * w 55 | new_coords[:, 1] += 0.5 * h 56 | return new_coords 57 | 58 | def apply_segmentation(self, segmentation): 59 | segmentation = self.apply_image(segmentation, interp=Image.NEAREST) 60 | return segmentation 61 | 62 | 63 | class ResizeTransform(Transform): 64 | """ 65 | Resize the image to a target size. 66 | """ 67 | def __init__(self, h, w, new_h, new_w, interp): 68 | """ 69 | Args: 70 | h, w (int): original image size 71 | new_h, new_w (int): new image size 72 | interp: PIL interpolation methods 73 | """ 74 | # TODO decide on PIL vs opencv 75 | super().__init__() 76 | self._set_attributes(locals()) 77 | 78 | def apply_image(self, img, interp=None): 79 | assert img.shape[:2] == (self.h, self.w) 80 | pil_image = Image.fromarray(img) 81 | interp_method = interp if interp is not None else self.interp 82 | pil_image = pil_image.resize((self.new_w, self.new_h), interp_method) 83 | ret = np.asarray(pil_image) 84 | return ret 85 | 86 | def apply_coords(self, coords): 87 | coords[:, 0] = coords[:, 0] * (self.new_w * 1.0 / self.w) 88 | coords[:, 1] = coords[:, 1] * (self.new_h * 1.0 / self.h) 89 | return coords 90 | 91 | def apply_segmentation(self, segmentation): 92 | segmentation = self.apply_image(segmentation, interp=Image.NEAREST) 93 | return segmentation 94 | 95 | 96 | def HFlip_rotated_box(transform, rotated_boxes): 97 | """ 98 | Apply the horizontal flip transform on rotated boxes. 99 | 100 | Args: 101 | rotated_boxes (ndarray): Nx5 floating point array of 102 | (x_center, y_center, width, height, angle_degrees) format 103 | in absolute coordinates. 104 | """ 105 | # Transform x_center 106 | rotated_boxes[:, 0] = transform.width - rotated_boxes[:, 0] 107 | # Transform angle 108 | rotated_boxes[:, 4] = -rotated_boxes[:, 4] 109 | return rotated_boxes 110 | 111 | 112 | def Resize_rotated_box(transform, rotated_boxes): 113 | """ 114 | Apply the resizing transform on rotated boxes. For details of how these (approximation) 115 | formulas are derived, please refer to :meth:`RotatedBoxes.scale`. 116 | 117 | Args: 118 | rotated_boxes (ndarray): Nx5 floating point array of 119 | (x_center, y_center, width, height, angle_degrees) format 120 | in absolute coordinates. 121 | """ 122 | scale_factor_x = transform.new_w * 1.0 / transform.w 123 | scale_factor_y = transform.new_h * 1.0 / transform.h 124 | rotated_boxes[:, 0] *= scale_factor_x 125 | rotated_boxes[:, 1] *= scale_factor_y 126 | theta = rotated_boxes[:, 4] * np.pi / 180.0 127 | c = np.cos(theta) 128 | s = np.sin(theta) 129 | rotated_boxes[:, 2] *= np.sqrt( 130 | np.square(scale_factor_x * c) + np.square(scale_factor_y * s)) 131 | rotated_boxes[:, 3] *= np.sqrt( 132 | np.square(scale_factor_x * s) + np.square(scale_factor_y * c)) 133 | rotated_boxes[:, 4] = np.arctan2(scale_factor_x * s, 134 | scale_factor_y * c) * 180 / np.pi 135 | 136 | return rotated_boxes 137 | 138 | 139 | HFlipTransform.register_type("rotated_box", HFlip_rotated_box) 140 | NoOpTransform.register_type("rotated_box", lambda t, x: x) 141 | ResizeTransform.register_type("rotated_box", Resize_rotated_box) 142 | -------------------------------------------------------------------------------- /dl_lib/data/transforms/transform_util.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 3 | 4 | import numpy as np 5 | import torch 6 | 7 | 8 | # pyre-ignore-all-errors 9 | def to_float_tensor(numpy_array: np.ndarray) -> torch.Tensor: 10 | """ 11 | Convert the numpy array to torch float tensor with dimension of NxCxHxW. 12 | Pytorch is not fully supporting uint8, so convert tensor to float if the 13 | numpy_array is uint8. 14 | Args: 15 | numpy_array (ndarray): of shape NxHxWxC, or HxWxC or HxW to 16 | represent an image. The array can be of type uint8 in range 17 | [0, 255], or floating point in range [0, 1] or [0, 255]. 18 | Returns: 19 | float_tensor (tensor): converted float tensor. 20 | """ 21 | assert isinstance(numpy_array, np.ndarray) 22 | assert len(numpy_array.shape) in (2, 3, 4) 23 | 24 | # Some of the input numpy array has negative strides. Pytorch currently 25 | # does not support negative strides, perform ascontiguousarray to 26 | # resolve the issue. 27 | float_tensor = torch.from_numpy(np.ascontiguousarray(numpy_array)) 28 | if numpy_array.dtype == np.uint8: 29 | float_tensor = float_tensor.float() 30 | 31 | if len(numpy_array.shape) == 2: 32 | # HxW -> 1x1xHxW. 33 | float_tensor = float_tensor[None, None, :, :] 34 | elif len(numpy_array.shape) == 3: 35 | # HxWxC -> 1xCxHxW. 36 | float_tensor = float_tensor.permute(2, 0, 1) 37 | float_tensor = float_tensor[None, :, :, :] 38 | elif len(numpy_array.shape) == 4: 39 | # NxHxWxC -> NxCxHxW 40 | float_tensor = float_tensor.permute(0, 3, 1, 2) 41 | else: 42 | raise NotImplementedError("Unknow numpy_array dimension of {}".format( 43 | float_tensor.shape)) 44 | return float_tensor 45 | 46 | 47 | def to_numpy(float_tensor: torch.Tensor, target_shape: list, 48 | target_dtype: np.dtype) -> np.ndarray: 49 | """ 50 | Convert float tensor with dimension of NxCxHxW back to numpy array. 51 | Args: 52 | float_tensor (tensor): a float pytorch tensor with shape of NxCxHxW. 53 | target_shape (list): the target shape of the numpy array to represent 54 | the image as output. options include NxHxWxC, or HxWxC or HxW. 55 | target_dtype (dtype): the target dtype of the numpy array to represent 56 | the image as output. The array can be of type uint8 in range 57 | [0, 255], or floating point in range [0, 1] or [0, 255]. 58 | Returns: 59 | (ndarray): converted numpy array. 60 | """ 61 | assert len(target_shape) in (2, 3, 4) 62 | 63 | if len(target_shape) == 2: 64 | # 1x1xHxW -> HxW. 65 | assert float_tensor.shape[0] == 1 66 | assert float_tensor.shape[1] == 1 67 | float_tensor = float_tensor[0, 0, :, :] 68 | elif len(target_shape) == 3: 69 | assert float_tensor.shape[0] == 1 70 | # 1xCxHxW -> HxWxC. 71 | float_tensor = float_tensor[0].permute(1, 2, 0) 72 | elif len(target_shape) == 4: 73 | # NxCxHxW -> NxHxWxC 74 | float_tensor = float_tensor.permute(0, 2, 3, 1) 75 | else: 76 | raise NotImplementedError( 77 | "Unknow target shape dimension of {}".format(target_shape)) 78 | if target_dtype == np.uint8: 79 | # Need to specifically call round here, notice in pytroch the round 80 | # is half to even. 81 | # https://github.com/pytorch/pytorch/issues/16498 82 | float_tensor = float_tensor.round().byte() 83 | return float_tensor.numpy() 84 | -------------------------------------------------------------------------------- /dl_lib/engine/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | 3 | from .launch import * 4 | from .train_loop import * 5 | 6 | __all__ = [k for k in globals().keys() if not k.startswith("_")] 7 | 8 | # prefer to let hooks and defaults live in separate namespaces (therefore not in __all__) 9 | # but still make them available here 10 | from .hooks import * 11 | from .defaults import * 12 | from .custom import * 13 | -------------------------------------------------------------------------------- /dl_lib/engine/launch.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | import logging 3 | 4 | import torch 5 | import torch.distributed as dist 6 | import torch.multiprocessing as mp 7 | 8 | from dl_lib.utils import comm 9 | 10 | __all__ = ["launch"] 11 | 12 | 13 | def _find_free_port(): 14 | import socket 15 | 16 | sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) 17 | # Binding to port 0 will cause the OS to find an available port for us 18 | sock.bind(("", 0)) 19 | port = sock.getsockname()[1] 20 | sock.close() 21 | # NOTE: there is still a chance the port could be taken by other processes. 22 | return port 23 | 24 | 25 | def launch(main_func, 26 | num_gpus_per_machine, 27 | num_machines=1, 28 | machine_rank=0, 29 | dist_url=None, 30 | args=()): 31 | """ 32 | Args: 33 | main_func: a function that will be called by `main_func(*args)` 34 | num_machines (int): the total number of machines 35 | machine_rank (int): the rank of this machine (one per machine) 36 | dist_url (str): url to connect to for distributed training, including protocol 37 | e.g. "tcp://127.0.0.1:8686". 38 | Can be set to auto to automatically select a free port on localhost 39 | args (tuple): arguments passed to main_func 40 | """ 41 | world_size = num_machines * num_gpus_per_machine 42 | if world_size > 1: 43 | # https://github.com/pytorch/pytorch/pull/14391 44 | # TODO prctl in spawned processes 45 | 46 | if dist_url == "auto": 47 | assert num_machines == 1, "dist_url=auto cannot work with distributed training." 48 | port = _find_free_port() 49 | dist_url = f"tcp://127.0.0.1:{port}" 50 | 51 | mp.spawn( 52 | _distributed_worker, 53 | nprocs=num_gpus_per_machine, 54 | args=(main_func, world_size, num_gpus_per_machine, machine_rank, 55 | dist_url, args), 56 | daemon=False, 57 | ) 58 | else: 59 | main_func(*args) 60 | 61 | 62 | def _distributed_worker(local_rank, main_func, world_size, 63 | num_gpus_per_machine, machine_rank, dist_url, args): 64 | assert torch.cuda.is_available( 65 | ), "cuda is not available. Please check your installation." 66 | global_rank = machine_rank * num_gpus_per_machine + local_rank 67 | try: 68 | dist.init_process_group(backend="NCCL", 69 | init_method=dist_url, 70 | world_size=world_size, 71 | rank=global_rank) 72 | except Exception as e: 73 | logger = logging.getLogger(__name__) 74 | logger.error("Process group URL: {}".format(dist_url)) 75 | raise e 76 | # synchronize is needed here to prevent a possible timeout after calling init_process_group 77 | # See: https://github.com/facebookresearch/maskrcnn-benchmark/issues/172 78 | comm.synchronize() 79 | 80 | assert num_gpus_per_machine <= torch.cuda.device_count() 81 | torch.cuda.set_device(local_rank) 82 | 83 | # Setup the local process group (which contains ranks within the same machine) 84 | assert comm._LOCAL_PROCESS_GROUP is None 85 | num_machines = world_size // num_gpus_per_machine 86 | for i in range(num_machines): 87 | ranks_on_i = list( 88 | range(i * num_gpus_per_machine, (i + 1) * num_gpus_per_machine)) 89 | pg = dist.new_group(ranks_on_i) 90 | if i == machine_rank: 91 | comm._LOCAL_PROCESS_GROUP = pg 92 | 93 | main_func(*args) 94 | -------------------------------------------------------------------------------- /dl_lib/evaluation/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | from .cityscapes_evaluation import CityscapesEvaluator 3 | from .evaluator import DatasetEvaluator, DatasetEvaluators, inference_context, inference_on_dataset 4 | from .pascal_voc_evaluation import PascalVOCDetectionEvaluator 5 | from .sem_seg_evaluation import SemSegEvaluator 6 | from .testing import print_csv_format, verify_results 7 | 8 | __all__ = [k for k in globals().keys() if not k.startswith("_")] 9 | -------------------------------------------------------------------------------- /dl_lib/evaluation/cityscapes_evaluation.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | import glob 3 | import logging 4 | import os 5 | import tempfile 6 | from collections import OrderedDict 7 | 8 | import torch 9 | from PIL import Image 10 | 11 | from dl_lib.data import MetadataCatalog 12 | from dl_lib.utils import comm 13 | 14 | from .evaluator import DatasetEvaluator 15 | 16 | 17 | class CityscapesEvaluator(DatasetEvaluator): 18 | """ 19 | Evaluate instance segmentation results using cityscapes API. 20 | 21 | Note: 22 | * It does not work in multi-machine distributed training. 23 | * It contains a synchronization, therefore has to be used on all ranks. 24 | """ 25 | def __init__(self, dataset_name): 26 | """ 27 | Args: 28 | dataset_name (str): the name of the dataset. 29 | It must have the following metadata associated with it: 30 | "thing_classes", "gt_dir". 31 | """ 32 | self._metadata = MetadataCatalog.get(dataset_name) 33 | self._cpu_device = torch.device("cpu") 34 | self._logger = logging.getLogger(__name__) 35 | 36 | def reset(self): 37 | self._working_dir = tempfile.TemporaryDirectory( 38 | prefix="cityscapes_eval_") 39 | self._temp_dir = self._working_dir.name 40 | # All workers will write to the same results directory 41 | # TODO this does not work in distributed training 42 | self._temp_dir = comm.all_gather(self._temp_dir)[0] 43 | if self._temp_dir != self._working_dir.name: 44 | self._working_dir.cleanup() 45 | self._logger.info( 46 | "Writing cityscapes results to temporary directory {} ...".format( 47 | self._temp_dir)) 48 | 49 | def process(self, inputs, outputs): 50 | from cityscapesscripts.helpers.labels import name2label 51 | 52 | for input, output in zip(inputs, outputs): 53 | file_name = input["file_name"] 54 | basename = os.path.splitext(os.path.basename(file_name))[0] 55 | pred_txt = os.path.join(self._temp_dir, basename + "_pred.txt") 56 | 57 | output = output["instances"].to(self._cpu_device) 58 | num_instances = len(output) 59 | with open(pred_txt, "w") as fout: 60 | for i in range(num_instances): 61 | pred_class = output.pred_classes[i] 62 | classes = self._metadata.thing_classes[pred_class] 63 | class_id = name2label[classes].id 64 | score = output.scores[i] 65 | mask = output.pred_masks[i].numpy().astype("uint8") 66 | png_filename = os.path.join( 67 | self._temp_dir, 68 | basename + "_{}_{}.png".format(i, classes)) 69 | 70 | Image.fromarray(mask * 255).save(png_filename) 71 | fout.write("{} {} {}\n".format( 72 | os.path.basename(png_filename), class_id, score)) 73 | 74 | def evaluate(self): 75 | """ 76 | Returns: 77 | dict: has a key "segm", whose value is a dict of "AP" and "AP50". 78 | """ 79 | comm.synchronize() 80 | if comm.get_rank() > 0: 81 | return 82 | os.environ["CITYSCAPES_DATASET"] = os.path.abspath( 83 | os.path.join(self._metadata.gt_dir, "..", "..")) 84 | # Load the Cityscapes eval script *after* setting the required env var, 85 | # since the script reads CITYSCAPES_DATASET into global variables at load time. 86 | import cityscapesscripts.evaluation.evalInstanceLevelSemanticLabeling as cityscapes_eval 87 | 88 | self._logger.info("Evaluating results under {} ...".format( 89 | self._temp_dir)) 90 | 91 | # set some global states in cityscapes evaluation API, before evaluating 92 | cityscapes_eval.args.predictionPath = os.path.abspath(self._temp_dir) 93 | cityscapes_eval.args.predictionWalk = None 94 | cityscapes_eval.args.JSONOutput = False 95 | cityscapes_eval.args.colorized = False 96 | cityscapes_eval.args.gtInstancesFile = os.path.join( 97 | self._temp_dir, "gtInstances.json") 98 | 99 | # These lines are adopted from 100 | # https://github.com/mcordts/cityscapesScripts/blob/master/cityscapesscripts/evaluation/evalInstanceLevelSemanticLabeling.py # noqa 101 | groundTruthImgList = glob.glob(cityscapes_eval.args.groundTruthSearch) 102 | assert len( 103 | groundTruthImgList 104 | ), "Cannot find any ground truth images to use for evaluation. Searched for: {}".format( 105 | cityscapes_eval.args.groundTruthSearch) 106 | predictionImgList = [] 107 | for gt in groundTruthImgList: 108 | predictionImgList.append( 109 | cityscapes_eval.getPrediction(gt, cityscapes_eval.args)) 110 | results = cityscapes_eval.evaluateImgLists( 111 | predictionImgList, groundTruthImgList, 112 | cityscapes_eval.args)["averages"] 113 | 114 | ret = OrderedDict() 115 | ret["segm"] = { 116 | "AP": results["allAp"] * 100, 117 | "AP50": results["allAp50%"] * 100 118 | } 119 | self._working_dir.cleanup() 120 | return ret 121 | -------------------------------------------------------------------------------- /dl_lib/evaluation/evaluator.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | import datetime 3 | import logging 4 | import time 5 | from collections import OrderedDict 6 | from contextlib import contextmanager 7 | 8 | import torch 9 | 10 | from dl_lib.utils.comm import is_main_process 11 | 12 | 13 | class DatasetEvaluator: 14 | """ 15 | Base class for a dataset evaluator. 16 | 17 | The function :func:`inference_on_dataset` runs the model over 18 | all samples in the dataset, and have a DatasetEvaluator to process the inputs/outputs. 19 | 20 | This class will accumulate information of the inputs/outputs (by :meth:`process`), 21 | and produce evaluation results in the end (by :meth:`evaluate`). 22 | """ 23 | def reset(self): 24 | """ 25 | Preparation for a new round of evaluation. 26 | Should be called before starting a round of evaluation. 27 | """ 28 | pass 29 | 30 | def process(self, input, output): 31 | """ 32 | Process an input/output pair. 33 | 34 | Args: 35 | input: the input that's used to call the model. 36 | output: the return value of `model(output)` 37 | """ 38 | pass 39 | 40 | def evaluate(self): 41 | """ 42 | Evaluate/summarize the performance, after processing all input/output pairs. 43 | 44 | Returns: 45 | dict: 46 | A new evaluator class can return a dict of arbitrary format 47 | as long as the user can process the results. 48 | In our train_net.py, we expect the following format: 49 | 50 | * key: the name of the task (e.g., bbox) 51 | * value: a dict of {metric name: score}, e.g.: {"AP50": 80} 52 | """ 53 | pass 54 | 55 | 56 | class DatasetEvaluators(DatasetEvaluator): 57 | def __init__(self, evaluators): 58 | assert len(evaluators) 59 | super().__init__() 60 | self._evaluators = evaluators 61 | 62 | def reset(self): 63 | for evaluator in self._evaluators: 64 | evaluator.reset() 65 | 66 | def process(self, input, output): 67 | for evaluator in self._evaluators: 68 | evaluator.process(input, output) 69 | 70 | def evaluate(self): 71 | results = OrderedDict() 72 | for evaluator in self._evaluators: 73 | result = evaluator.evaluate() 74 | if is_main_process(): 75 | for k, v in result.items(): 76 | assert ( 77 | k not in results 78 | ), "Different evaluators produce results with the same key {}".format( 79 | k) 80 | results[k] = v 81 | return results 82 | 83 | 84 | def inference_on_dataset(model, data_loader, evaluator): 85 | """ 86 | Run model on the data_loader and evaluate the metrics with evaluator. 87 | The model will be used in eval mode. 88 | 89 | Args: 90 | model (nn.Module): a module which accepts an object from 91 | `data_loader` and returns some outputs. It will be temporarily set to `eval` mode. 92 | 93 | If you wish to evaluate a model in `training` mode instead, you can 94 | wrap the given model and override its behavior of `.eval()` and `.train()`. 95 | data_loader: an iterable object with a length. 96 | The elements it generates will be the inputs to the model. 97 | evaluator (DatasetEvaluator): the evaluator to run. Use 98 | :class:`DatasetEvaluators([])` if you only want to benchmark, but 99 | don't want to do any evaluation. 100 | 101 | Returns: 102 | The return value of `evaluator.evaluate()` 103 | """ 104 | num_devices = torch.distributed.get_world_size( 105 | ) if torch.distributed.is_initialized() else 1 106 | logger = logging.getLogger(__name__) 107 | logger.info("Start inference on {} images".format(len(data_loader))) 108 | 109 | total = len(data_loader) # inference data loader must have a fixed length 110 | evaluator.reset() 111 | 112 | logging_interval = 50 113 | num_warmup = min(5, logging_interval - 1, total - 1) 114 | start_time = time.time() 115 | total_compute_time = 0 116 | with inference_context(model), torch.no_grad(): 117 | for idx, inputs in enumerate(data_loader): 118 | if idx == num_warmup: 119 | start_time = time.time() 120 | total_compute_time = 0 121 | 122 | start_compute_time = time.time() 123 | outputs = model(inputs) 124 | if torch.cuda.is_available(): 125 | torch.cuda.synchronize() 126 | total_compute_time += time.time() - start_compute_time 127 | evaluator.process(inputs, outputs) 128 | 129 | if (idx + 1) % logging_interval == 0: 130 | duration = time.time() - start_time 131 | seconds_per_img = duration / (idx + 1 - num_warmup) 132 | eta = datetime.timedelta(seconds=int(seconds_per_img * 133 | (total - num_warmup) - 134 | duration)) 135 | logger.info( 136 | "Inference done {}/{}. {:.4f} s / img. ETA={}".format( 137 | idx + 1, total, seconds_per_img, str(eta))) 138 | 139 | # Measure the time only for this worker (before the synchronization barrier) 140 | total_time = int(time.time() - start_time) 141 | total_time_str = str(datetime.timedelta(seconds=total_time)) 142 | # NOTE this format is parsed by grep 143 | logger.info( 144 | "Total inference time: {} ({:.6f} s / img per device, on {} devices)". 145 | format(total_time_str, total_time / (total - num_warmup), num_devices)) 146 | total_compute_time_str = str( 147 | datetime.timedelta(seconds=int(total_compute_time))) 148 | logger.info( 149 | "Total inference pure compute time: {} ({:.6f} s / img per device, on {} devices)" 150 | .format(total_compute_time_str, 151 | total_compute_time / (total - num_warmup), num_devices)) 152 | 153 | results = evaluator.evaluate() 154 | # An evaluator may return None when not in main process. 155 | # Replace it by an empty dict instead to make it easier for downstream code to handle 156 | if results is None: 157 | results = {} 158 | return results 159 | 160 | 161 | @contextmanager 162 | def inference_context(model): 163 | """ 164 | A context where the model is temporarily changed to eval mode, 165 | and restored to previous mode afterwards. 166 | 167 | Args: 168 | model: a torch Module 169 | """ 170 | training_mode = model.training 171 | model.eval() 172 | yield 173 | model.train(training_mode) 174 | -------------------------------------------------------------------------------- /dl_lib/evaluation/testing.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | import logging 3 | import pprint 4 | import sys 5 | from collections import Mapping, OrderedDict 6 | 7 | import numpy as np 8 | 9 | 10 | def print_csv_format(results): 11 | """ 12 | Print main metrics in a format similar to Detectron, 13 | so that they are easy to copypaste into a spreadsheet. 14 | 15 | Args: 16 | results (OrderedDict[dict]): task_name -> {metric -> score} 17 | """ 18 | assert isinstance( 19 | results, 20 | OrderedDict), results # unordered results cannot be properly printed 21 | logger = logging.getLogger(__name__) 22 | for task, res in results.items(): 23 | # Don't print "AP-category" metrics since they are usually not tracked. 24 | important_res = [(k, v) for k, v in res.items() if "-" not in k] 25 | logger.info("copypaste: Task: {}".format(task)) 26 | logger.info("copypaste: " + ",".join([k[0] for k in important_res])) 27 | logger.info("copypaste: " + 28 | ",".join(["{0:.4f}".format(k[1]) for k in important_res])) 29 | 30 | 31 | def verify_results(cfg, results): 32 | """ 33 | Args: 34 | results (OrderedDict[dict]): task_name -> {metric -> score} 35 | 36 | Returns: 37 | bool: whether the verification succeeds or not 38 | """ 39 | expected_results = cfg.TEST.EXPECTED_RESULTS 40 | if not len(expected_results): 41 | return True 42 | 43 | ok = True 44 | for task, metric, expected, tolerance in expected_results: 45 | actual = results[task][metric] 46 | if not np.isfinite(actual): 47 | ok = False 48 | diff = abs(actual - expected) 49 | if diff > tolerance: 50 | ok = False 51 | 52 | logger = logging.getLogger(__name__) 53 | if not ok: 54 | logger.error("Result verification failed!") 55 | logger.error("Expected Results: " + str(expected_results)) 56 | logger.error("Actual Results: " + pprint.pformat(results)) 57 | 58 | sys.exit(1) 59 | else: 60 | logger.info("Results verification passed.") 61 | return ok 62 | 63 | 64 | def flatten_results_dict(results): 65 | """ 66 | Expand a hierarchical dict of scalars into a flat dict of scalars. 67 | If results[k1][k2][k3] = v, the returned dict will have the entry 68 | {"k1/k2/k3": v}. 69 | 70 | Args: 71 | results (dict): 72 | """ 73 | r = {} 74 | for k, v in results.items(): 75 | if isinstance(v, Mapping): 76 | v = flatten_results_dict(v) 77 | for kk, vv in v.items(): 78 | r[k + "/" + kk] = vv 79 | else: 80 | r[k] = v 81 | return r 82 | -------------------------------------------------------------------------------- /dl_lib/layers/ROIAlign/ROIAlign.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | #pragma once 3 | #include 4 | 5 | namespace dl_lib { 6 | 7 | at::Tensor ROIAlign_forward_cpu( 8 | const at::Tensor& input, 9 | const at::Tensor& rois, 10 | const float spatial_scale, 11 | const int pooled_height, 12 | const int pooled_width, 13 | const int sampling_ratio, 14 | bool aligned); 15 | 16 | at::Tensor ROIAlign_backward_cpu( 17 | const at::Tensor& grad, 18 | const at::Tensor& rois, 19 | const float spatial_scale, 20 | const int pooled_height, 21 | const int pooled_width, 22 | const int batch_size, 23 | const int channels, 24 | const int height, 25 | const int width, 26 | const int sampling_ratio, 27 | bool aligned); 28 | 29 | #ifdef WITH_CUDA 30 | at::Tensor ROIAlign_forward_cuda( 31 | const at::Tensor& input, 32 | const at::Tensor& rois, 33 | const float spatial_scale, 34 | const int pooled_height, 35 | const int pooled_width, 36 | const int sampling_ratio, 37 | bool aligned); 38 | 39 | at::Tensor ROIAlign_backward_cuda( 40 | const at::Tensor& grad, 41 | const at::Tensor& rois, 42 | const float spatial_scale, 43 | const int pooled_height, 44 | const int pooled_width, 45 | const int batch_size, 46 | const int channels, 47 | const int height, 48 | const int width, 49 | const int sampling_ratio, 50 | bool aligned); 51 | #endif 52 | 53 | // Interface for Python 54 | inline at::Tensor ROIAlign_forward( 55 | const at::Tensor& input, 56 | const at::Tensor& rois, 57 | const float spatial_scale, 58 | const int pooled_height, 59 | const int pooled_width, 60 | const int sampling_ratio, 61 | bool aligned) { 62 | if (input.type().is_cuda()) { 63 | #ifdef WITH_CUDA 64 | return ROIAlign_forward_cuda( 65 | input, 66 | rois, 67 | spatial_scale, 68 | pooled_height, 69 | pooled_width, 70 | sampling_ratio, 71 | aligned); 72 | #else 73 | AT_ERROR("Not compiled with GPU support"); 74 | #endif 75 | } 76 | return ROIAlign_forward_cpu( 77 | input, 78 | rois, 79 | spatial_scale, 80 | pooled_height, 81 | pooled_width, 82 | sampling_ratio, 83 | aligned); 84 | } 85 | 86 | inline at::Tensor ROIAlign_backward( 87 | const at::Tensor& grad, 88 | const at::Tensor& rois, 89 | const float spatial_scale, 90 | const int pooled_height, 91 | const int pooled_width, 92 | const int batch_size, 93 | const int channels, 94 | const int height, 95 | const int width, 96 | const int sampling_ratio, 97 | bool aligned) { 98 | if (grad.type().is_cuda()) { 99 | #ifdef WITH_CUDA 100 | return ROIAlign_backward_cuda( 101 | grad, 102 | rois, 103 | spatial_scale, 104 | pooled_height, 105 | pooled_width, 106 | batch_size, 107 | channels, 108 | height, 109 | width, 110 | sampling_ratio, 111 | aligned); 112 | #else 113 | AT_ERROR("Not compiled with GPU support"); 114 | #endif 115 | } 116 | return ROIAlign_backward_cpu( 117 | grad, 118 | rois, 119 | spatial_scale, 120 | pooled_height, 121 | pooled_width, 122 | batch_size, 123 | channels, 124 | height, 125 | width, 126 | sampling_ratio, 127 | aligned); 128 | } 129 | 130 | } // namespace dl_lib 131 | -------------------------------------------------------------------------------- /dl_lib/layers/ROIAlign/roi_align.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | from torch import nn 3 | from torch.autograd import Function 4 | from torch.autograd.function import once_differentiable 5 | from torch.nn.modules.utils import _pair 6 | 7 | from dl_lib import _C 8 | 9 | 10 | class _ROIAlign(Function): 11 | @staticmethod 12 | def forward(ctx, input, roi, output_size, spatial_scale, sampling_ratio, 13 | aligned): 14 | ctx.save_for_backward(roi) 15 | ctx.output_size = _pair(output_size) 16 | ctx.spatial_scale = spatial_scale 17 | ctx.sampling_ratio = sampling_ratio 18 | ctx.input_shape = input.size() 19 | ctx.aligned = aligned 20 | output = _C.roi_align_forward(input, roi, spatial_scale, 21 | output_size[0], output_size[1], 22 | sampling_ratio, aligned) 23 | return output 24 | 25 | @staticmethod 26 | @once_differentiable 27 | def backward(ctx, grad_output): 28 | rois, = ctx.saved_tensors 29 | output_size = ctx.output_size 30 | spatial_scale = ctx.spatial_scale 31 | sampling_ratio = ctx.sampling_ratio 32 | bs, ch, h, w = ctx.input_shape 33 | grad_input = _C.roi_align_backward( 34 | grad_output, 35 | rois, 36 | spatial_scale, 37 | output_size[0], 38 | output_size[1], 39 | bs, 40 | ch, 41 | h, 42 | w, 43 | sampling_ratio, 44 | ctx.aligned, 45 | ) 46 | return grad_input, None, None, None, None, None 47 | 48 | 49 | roi_align = _ROIAlign.apply 50 | 51 | 52 | class ROIAlign(nn.Module): 53 | def __init__(self, 54 | output_size, 55 | spatial_scale, 56 | sampling_ratio, 57 | aligned=True): 58 | """ 59 | Args: 60 | output_size (tuple): h, w 61 | spatial_scale (float): scale the input boxes by this number 62 | sampling_ratio (int): number of inputs samples to take for each output 63 | sample. 0 to take samples densely. 64 | aligned (bool): if False, use the legacy implementation in 65 | Detectron. If True, align the results more perfectly. 66 | 67 | Note: 68 | The meaning of aligned=True: 69 | 70 | Given a continuous coordinate c, its two neighboring pixel indices (in our 71 | pixel model) are computed by floor(c - 0.5) and ceil(c - 0.5). For example, 72 | c=1.3 has pixel neighbors with discrete indices [0] and [1] (which are sampled 73 | from the underlying signal at continuous coordinates 0.5 and 1.5). But the original 74 | roi_align (aligned=False) does not subtract the 0.5 when computing neighboring 75 | pixel indices and therefore it uses pixels with a slightly incorrect alignment 76 | (relative to our pixel model) when performing bilinear interpolation. 77 | 78 | With `aligned=True`, 79 | we first appropriately scale the ROI and then shift it by -0.5 80 | prior to calling roi_align. This produces the correct neighbors; 81 | 82 | The difference does not make a difference to the model's performance if 83 | ROIAlign is used together with conv layers. 84 | """ 85 | super(ROIAlign, self).__init__() 86 | self.output_size = output_size 87 | self.spatial_scale = spatial_scale 88 | self.sampling_ratio = sampling_ratio 89 | self.aligned = aligned 90 | 91 | def forward(self, input, rois): 92 | """ 93 | Args: 94 | input: NCHW images 95 | rois: Bx5 boxes. First column is the index into N. The other 4 columns are xyxy. 96 | """ 97 | assert rois.dim() == 2 and rois.size(1) == 5 98 | return roi_align(input, rois, self.output_size, self.spatial_scale, 99 | self.sampling_ratio, self.aligned) 100 | 101 | def __repr__(self): 102 | tmpstr = self.__class__.__name__ + "(" 103 | tmpstr += "output_size=" + str(self.output_size) 104 | tmpstr += ", spatial_scale=" + str(self.spatial_scale) 105 | tmpstr += ", sampling_ratio=" + str(self.sampling_ratio) 106 | tmpstr += ", aligned=" + str(self.aligned) 107 | tmpstr += ")" 108 | return tmpstr 109 | -------------------------------------------------------------------------------- /dl_lib/layers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | from .batch_norm import FrozenBatchNorm2d, get_norm, NaiveSyncBatchNorm 3 | from .deformable.deform_conv import DeformConv, ModulatedDeformConv 4 | from .deformable.deform_conv_with_off import (DeformConvWithOff, 5 | ModulatedDeformConvWithOff) 6 | from .ROIAlign.roi_align import ROIAlign, roi_align 7 | from .shape_spec import ShapeSpec 8 | from .wrappers import BatchNorm2d, Conv2d, ConvTranspose2d, cat, interpolate 9 | 10 | __all__ = [k for k in globals().keys() if not k.startswith("_")] 11 | -------------------------------------------------------------------------------- /dl_lib/layers/deformable/deform_conv_with_off.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding:utf-8 -*- 3 | import torch 4 | import torch.nn as nn 5 | 6 | from .deform_conv import DeformConv, ModulatedDeformConv 7 | 8 | 9 | class DeformConvWithOff(nn.Module): 10 | def __init__(self, 11 | in_channels, 12 | out_channels, 13 | kernel_size=3, 14 | stride=1, 15 | padding=1, 16 | dilation=1, 17 | deformable_groups=1): 18 | super(DeformConvWithOff, self).__init__() 19 | self.offset_conv = nn.Conv2d( 20 | in_channels, 21 | deformable_groups * 2 * kernel_size * kernel_size, 22 | kernel_size=kernel_size, 23 | stride=stride, 24 | padding=padding, 25 | ) 26 | self.dcn = DeformConv( 27 | in_channels, 28 | out_channels, 29 | kernel_size=kernel_size, 30 | stride=stride, 31 | padding=padding, 32 | dilation=dilation, 33 | deformable_groups=deformable_groups, 34 | ) 35 | 36 | def forward(self, input): 37 | offset = self.offset_conv(input) 38 | output = self.dcn(input, offset) 39 | return output 40 | 41 | 42 | class ModulatedDeformConvWithOff(nn.Module): 43 | def __init__(self, 44 | in_channels, 45 | out_channels, 46 | kernel_size=3, 47 | stride=1, 48 | padding=1, 49 | dilation=1, 50 | deformable_groups=1): 51 | super(ModulatedDeformConvWithOff, self).__init__() 52 | self.offset_mask_conv = nn.Conv2d( 53 | in_channels, 54 | deformable_groups * 3 * kernel_size * kernel_size, 55 | kernel_size=kernel_size, 56 | stride=stride, 57 | padding=padding, 58 | ) 59 | self.dcnv2 = ModulatedDeformConv( 60 | in_channels, 61 | out_channels, 62 | kernel_size=kernel_size, 63 | stride=stride, 64 | padding=padding, 65 | dilation=dilation, 66 | deformable_groups=deformable_groups, 67 | ) 68 | 69 | def forward(self, input): 70 | x = self.offset_mask_conv(input) 71 | o1, o2, mask = torch.chunk(x, 3, dim=1) 72 | offset = torch.cat((o1, o2), dim=1) 73 | mask = torch.sigmoid(mask) 74 | output = self.dcnv2(input, offset, mask) 75 | return output 76 | -------------------------------------------------------------------------------- /dl_lib/layers/shape_spec.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 3 | from collections import namedtuple 4 | 5 | 6 | class ShapeSpec( 7 | namedtuple("_ShapeSpec", ["channels", "height", "width", "stride"])): 8 | """ 9 | A simple structure that contains basic shape specification about a tensor. 10 | It is often used as the auxiliary inputs/outputs of models, 11 | to obtain the shape inference ability among pytorch modules. 12 | 13 | Attributes: 14 | channels: 15 | height: 16 | width: 17 | stride: 18 | """ 19 | def __new__(cls, *, channels=None, height=None, width=None, stride=None): 20 | return super().__new__(cls, channels, height, width, stride) 21 | -------------------------------------------------------------------------------- /dl_lib/layers/vision.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | // Modified by Yanwei Li 3 | 4 | #include 5 | #include 6 | #include 7 | #include "ROIAlign/ROIAlign.h" 8 | #include "deformable/deform_conv.h" 9 | 10 | namespace dl_lib { 11 | 12 | #ifdef WITH_CUDA 13 | int get_cudart_version() { 14 | return CUDART_VERSION; 15 | } 16 | #endif 17 | 18 | std::string get_cuda_version() { 19 | #ifdef WITH_CUDA 20 | std::ostringstream oss; 21 | 22 | // copied from 23 | // https://github.com/pytorch/pytorch/blob/master/aten/src/ATen/cuda/detail/CUDAHooks.cpp#L231 24 | auto printCudaStyleVersion = [&](int v) { 25 | oss << (v / 1000) << "." << (v / 10 % 100); 26 | if (v % 10 != 0) { 27 | oss << "." << (v % 10); 28 | } 29 | }; 30 | printCudaStyleVersion(get_cudart_version()); 31 | return oss.str(); 32 | #else 33 | return std::string("not available"); 34 | #endif 35 | } 36 | 37 | // similar to 38 | // https://github.com/pytorch/pytorch/blob/master/aten/src/ATen/Version.cpp 39 | std::string get_compiler_version() { 40 | std::ostringstream ss; 41 | #if defined(__GNUC__) 42 | #ifndef __clang__ 43 | { ss << "GCC " << __GNUC__ << "." << __GNUC_MINOR__; } 44 | #endif 45 | #endif 46 | 47 | #if defined(__clang_major__) 48 | { 49 | ss << "clang " << __clang_major__ << "." << __clang_minor__ << "." 50 | << __clang_patchlevel__; 51 | } 52 | #endif 53 | 54 | #if defined(_MSC_VER) 55 | { ss << "MSVC " << _MSC_FULL_VER; } 56 | #endif 57 | return ss.str(); 58 | } 59 | 60 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 61 | m.def("get_compiler_version", &get_compiler_version, "get_compiler_version"); 62 | m.def("get_cuda_version", &get_cuda_version, "get_cuda_version"); 63 | 64 | m.def("deform_conv_forward", &deform_conv_forward, "deform_conv_forward"); 65 | m.def( 66 | "deform_conv_backward_input", 67 | &deform_conv_backward_input, 68 | "deform_conv_backward_input"); 69 | m.def( 70 | "deform_conv_backward_filter", 71 | &deform_conv_backward_filter, 72 | "deform_conv_backward_filter"); 73 | m.def( 74 | "modulated_deform_conv_forward", 75 | &modulated_deform_conv_forward, 76 | "modulated_deform_conv_forward"); 77 | m.def( 78 | "modulated_deform_conv_backward", 79 | &modulated_deform_conv_backward, 80 | "modulated_deform_conv_backward"); 81 | 82 | m.def("roi_align_forward", &ROIAlign_forward, "ROIAlign_forward"); 83 | m.def("roi_align_backward", &ROIAlign_backward, "ROIAlign_backward"); 84 | } 85 | 86 | } // namespace dl_lib 87 | -------------------------------------------------------------------------------- /dl_lib/layers/wrappers.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | """ 3 | Wrappers around on some nn functions, mainly to support empty tensors. 4 | 5 | Ideally, add support directly in PyTorch to empty tensors in those functions. 6 | 7 | These can be removed once https://github.com/pytorch/pytorch/issues/12013 8 | is implemented 9 | """ 10 | 11 | import math 12 | 13 | import torch 14 | from torch.nn.modules.utils import _ntuple 15 | 16 | 17 | def cat(tensors, dim=0): 18 | """ 19 | Efficient version of torch.cat that avoids a copy if there is only a single element in a list 20 | """ 21 | assert isinstance(tensors, (list, tuple)) 22 | if len(tensors) == 1: 23 | return tensors[0] 24 | return torch.cat(tensors, dim) 25 | 26 | 27 | class _NewEmptyTensorOp(torch.autograd.Function): 28 | @staticmethod 29 | def forward(ctx, x, new_shape): 30 | ctx.shape = x.shape 31 | return x.new_empty(new_shape) 32 | 33 | @staticmethod 34 | def backward(ctx, grad): 35 | shape = ctx.shape 36 | return _NewEmptyTensorOp.apply(grad, shape), None 37 | 38 | 39 | class Conv2d(torch.nn.Conv2d): 40 | """ 41 | A wrapper around :class:`torch.nn.Conv2d` to support zero-size tensor and more features. 42 | """ 43 | def __init__(self, *args, **kwargs): 44 | """ 45 | Extra keyword arguments supported in addition to those in `torch.nn.Conv2d`: 46 | 47 | Args: 48 | norm (nn.Module, optional): a normalization layer 49 | activation (callable(Tensor) -> Tensor): a callable activation function 50 | 51 | It assumes that norm layer is used before activation. 52 | """ 53 | norm = kwargs.pop("norm", None) 54 | activation = kwargs.pop("activation", None) 55 | super().__init__(*args, **kwargs) 56 | 57 | self.norm = norm 58 | self.activation = activation 59 | 60 | def forward(self, x): 61 | if x.numel() == 0: 62 | # When input is empty, we want to return a empty tensor with "correct" shape, 63 | # So that the following operations will not panic 64 | # if they check for the shape of the tensor. 65 | # This computes the height and width of the output tensor 66 | output_shape = [(i + 2 * p - (di * (k - 1) + 1)) // s + 1 67 | for i, p, di, k, s in 68 | zip(x.shape[-2:], self.padding, self.dilation, 69 | self.kernel_size, self.stride)] 70 | output_shape = [x.shape[0], self.weight.shape[0]] + output_shape 71 | empty = _NewEmptyTensorOp.apply(x, output_shape) 72 | if self.training: 73 | # https://github.com/pytorch/pytorch/issues/12013 74 | assert not isinstance( 75 | self.norm, torch.nn.SyncBatchNorm 76 | ), "SyncBatchNorm does not support empty inputs!" 77 | 78 | # This is to make DDP happy. 79 | # DDP expects all workers to have gradient w.r.t the same set of parameters. 80 | _dummy = sum(x.view(-1)[0] for x in self.parameters()) * 0.0 81 | return empty + _dummy 82 | else: 83 | return empty 84 | 85 | x = super().forward(x) 86 | if self.norm is not None: 87 | x = self.norm(x) 88 | if self.activation is not None: 89 | x = self.activation(x) 90 | return x 91 | 92 | 93 | class ConvTranspose2d(torch.nn.ConvTranspose2d): 94 | """ 95 | A wrapper around :class:`torch.nn.ConvTranspose2d` to support zero-size tensor. 96 | """ 97 | def forward(self, x): 98 | if x.numel() > 0: 99 | return super(ConvTranspose2d, self).forward(x) 100 | # get output shape 101 | 102 | output_shape = [(i - 1) * d - 2 * p + (di * (k - 1) + 1) + op 103 | for i, p, di, k, d, op in zip( 104 | x.shape[-2:], 105 | self.padding, 106 | self.dilation, 107 | self.kernel_size, 108 | self.stride, 109 | self.output_padding, 110 | )] 111 | output_shape = [x.shape[0], self.out_channels] + output_shape 112 | # This is to make DDP happy. 113 | # DDP expects all workers to have gradient w.r.t the same set of parameters. 114 | _dummy = sum(x.view(-1)[0] for x in self.parameters()) * 0.0 115 | return _NewEmptyTensorOp.apply(x, output_shape) + _dummy 116 | 117 | 118 | class BatchNorm2d(torch.nn.BatchNorm2d): 119 | """ 120 | A wrapper around :class:`torch.nn.BatchNorm2d` to support zero-size tensor. 121 | """ 122 | def forward(self, x): 123 | if x.numel() > 0: 124 | return super(BatchNorm2d, self).forward(x) 125 | # get output shape 126 | output_shape = x.shape 127 | return _NewEmptyTensorOp.apply(x, output_shape) 128 | 129 | 130 | def interpolate(input, 131 | size=None, 132 | scale_factor=None, 133 | mode="nearest", 134 | align_corners=None): 135 | """ 136 | A wrapper around :func:`torch.nn.functional.interpolate` to support zero-size tensor. 137 | """ 138 | if input.numel() > 0: 139 | return torch.nn.functional.interpolate(input, 140 | size, 141 | scale_factor, 142 | mode, 143 | align_corners=align_corners) 144 | 145 | def _check_size_scale_factor(dim): 146 | if size is None and scale_factor is None: 147 | raise ValueError("either size or scale_factor should be defined") 148 | if size is not None and scale_factor is not None: 149 | raise ValueError( 150 | "only one of size or scale_factor should be defined") 151 | if (scale_factor is not None and isinstance(scale_factor, tuple) 152 | and len(scale_factor) != dim): 153 | raise ValueError("scale_factor shape must match input shape. " 154 | "Input is {}D, scale_factor size is {}".format( 155 | dim, len(scale_factor))) 156 | 157 | def _output_size(dim): 158 | _check_size_scale_factor(dim) 159 | if size is not None: 160 | return size 161 | scale_factors = _ntuple(dim)(scale_factor) 162 | # math.floor might return float in py2.7 163 | return [ 164 | int(math.floor(input.size(i + 2) * scale_factors[i])) 165 | for i in range(dim) 166 | ] 167 | 168 | output_shape = tuple(_output_size(2)) 169 | output_shape = input.shape[:-2] + output_shape 170 | return _NewEmptyTensorOp.apply(input, output_shape) 171 | -------------------------------------------------------------------------------- /dl_lib/modeling/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | import torch 3 | 4 | from dl_lib.layers import ShapeSpec 5 | 6 | from .backbone import ( 7 | FPN, 8 | Backbone, 9 | ResNet, 10 | ResNetBlockBase, 11 | build_resnet_backbone, 12 | make_stage, 13 | ) 14 | from .meta_arch import (SemanticSegmentor, DynamicNet4Seg) 15 | from .test_time_augmentation import DatasetMapperTTA, SemanticSegmentorWithTTA 16 | 17 | _EXCLUDE = {"torch", "ShapeSpec"} 18 | __all__ = [ 19 | k for k in globals().keys() if k not in _EXCLUDE and not k.startswith("_") 20 | ] 21 | 22 | assert ( 23 | torch.Tensor([1]) == torch.Tensor([2]) 24 | ).dtype == torch.bool, "Your Pytorch is too old. Please update to contain https://github.com/pytorch/pytorch/pull/21113" 25 | -------------------------------------------------------------------------------- /dl_lib/modeling/backbone/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | 3 | from .backbone import Backbone 4 | from .fpn import FPN, build_retinanet_resnet_fpn_p5_backbone 5 | from .resnet import ResNet, ResNetBlockBase, build_resnet_backbone, make_stage 6 | 7 | # TODO can expose more resnet blocks after careful consideration 8 | -------------------------------------------------------------------------------- /dl_lib/modeling/backbone/backbone.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | from abc import ABCMeta, abstractmethod 3 | 4 | import torch.nn as nn 5 | 6 | from dl_lib.layers import ShapeSpec 7 | 8 | __all__ = ["Backbone"] 9 | 10 | 11 | class Backbone(nn.Module, metaclass=ABCMeta): 12 | """ 13 | Abstract base class for network backbones. 14 | """ 15 | def __init__(self): 16 | """ 17 | The `__init__` method of any subclass can specify its own set of arguments. 18 | """ 19 | super().__init__() 20 | 21 | @abstractmethod 22 | def forward(self): 23 | """ 24 | Subclasses must override this method, but adhere to the same return type. 25 | 26 | Returns: 27 | dict[str->Tensor]: mapping from feature name (e.g., "res2") to tensor 28 | """ 29 | pass 30 | 31 | @property 32 | def size_divisibility(self): 33 | """ 34 | Some backbones require the input height and width to be divisible by a 35 | specific integer. This is typically true for encoder / decoder type networks 36 | with lateral connection (e.g., FPN) for which feature maps need to match 37 | dimension in the "bottom up" and "top down" paths. Set to 0 if no specific 38 | input size divisibility is required. 39 | """ 40 | return 0 41 | 42 | def output_shape(self): 43 | """ 44 | Returns: 45 | dict[str->ShapeSpec] 46 | """ 47 | # this is a backward-compatible default 48 | return { 49 | name: ShapeSpec(channels=self._out_feature_channels[name], 50 | stride=self._out_feature_strides[name]) 51 | for name in self._out_features 52 | } 53 | 54 | # the properties below are not used any more 55 | 56 | @property 57 | def out_features(self): 58 | """deprecated""" 59 | return self._out_features 60 | 61 | @property 62 | def out_feature_strides(self): 63 | """deprecated""" 64 | return {f: self._out_feature_strides[f] for f in self._out_features} 65 | 66 | @property 67 | def out_feature_channels(self): 68 | """deprecated""" 69 | return {f: self._out_feature_channels[f] for f in self._out_features} 70 | -------------------------------------------------------------------------------- /dl_lib/modeling/basenet/__init__.py: -------------------------------------------------------------------------------- 1 | from .basenet import basenet 2 | -------------------------------------------------------------------------------- /dl_lib/modeling/basenet/basenet.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import cv2 4 | import numpy as np 5 | import torch 6 | from PIL import Image 7 | 8 | from dl_lib.configs.base_config import config as cfg 9 | from dl_lib.data import MetadataCatalog 10 | from dl_lib.utils.visualizer import Visualizer 11 | 12 | from .show import visualize_feature_maps 13 | 14 | 15 | def basenet(cls): 16 | def visualize_data(self, per_image, save_to_file=False): 17 | """ 18 | Visualize data from batch_inputs of dataloader. 19 | 20 | Args: 21 | per_image (dict): a dict that contains: 22 | * image: Tensor, image in (C, H, W) format. 23 | * instances: Instances 24 | Other information that's included in the original dicts, such as: 25 | * "height", "width" (int): the output resolution of the model, used in inference. 26 | See :meth:`postprocess` for details. 27 | save_to_file: whether save img to disk. 28 | 29 | Example: 30 | >>> self.visualize_data(batch_inputs[0]) 31 | """ 32 | metadata = MetadataCatalog.get("coco_2017_train") 33 | 34 | def output(vis, fname): 35 | if not save_to_file: 36 | print(fname) 37 | cv2.imshow("window", vis.get_image()[:, :, ::-1]) 38 | cv2.waitKey() 39 | else: 40 | filepath = os.path.join("./", fname) 41 | print("Saving to {} ...".format(filepath)) 42 | vis.save(filepath) 43 | 44 | scale = 1.0 45 | # Pytorch tensor is in (C, H, W) format 46 | img = per_image["image"].permute(1, 2, 0) 47 | if cfg.INPUT.FORMAT == "BGR": 48 | img = img[:, :, [2, 1, 0]] 49 | else: 50 | img = np.asarray( 51 | Image.fromarray(img, mode=cfg.INPUT.FORMAT).convert("RGB")) 52 | 53 | visualizer = Visualizer(img, metadata=metadata, scale=scale) 54 | target_fields = per_image["instances"].get_fields() 55 | labels = [ 56 | metadata.thing_classes[i] for i in target_fields["gt_classes"] 57 | ] 58 | vis = visualizer.overlay_instances( 59 | labels=labels, 60 | boxes=target_fields.get("gt_boxes", None), 61 | masks=target_fields.get("gt_masks", None), 62 | keypoints=target_fields.get("gt_keypoints", None), 63 | ) 64 | output(vis, str(per_image["image_id"]) + ".jpg") 65 | 66 | cls.visualize_data = visualize_data 67 | 68 | def visualize_feature_map(self, 69 | feature_map, 70 | per_image=None, 71 | stride=8, 72 | save_name=0, 73 | with_img=True, 74 | channelwise=False): 75 | """ 76 | Visualize feature map with (optional) gt boxes 77 | 78 | Args: 79 | feature_map (torch.Tensor): C x H x W 80 | per_image (dict): batch_inputs[i] 81 | stride (int): down sample ratio of current feature_map 82 | save_name (int or str): feature map figure name 83 | with_img (bool): weather visualize corresponding image data 84 | channelwise (bool): visualize feature map mean or all channels 85 | 86 | Examples:: 87 | >>> level = 1 88 | >>> self.visualize_feature_map(features[level][0], 89 | >>> per_image=batched_inputs[level], 90 | >>> stride=self.fpn_strides[level], 91 | >>> save_name=1, 92 | >>> with_img=False, 93 | >>> channelwise=False) 94 | """ 95 | if with_img and save_name == 0: 96 | self.visualize_data(per_image) 97 | 98 | with torch.no_grad(): 99 | if "instances" in per_image: 100 | instance = per_image["instances"] 101 | gts = instance.gt_boxes.tensor.cpu().numpy() 102 | l = gts[:, 0:1] 103 | t = gts[:, 1:2] 104 | r = gts[:, 2:3] 105 | b = gts[:, 3:4] 106 | boxes = (np.concatenate([l, t, l, b, r, b, r, t], 107 | axis=1).reshape(-1, 4, 108 | 2).transpose(0, 2, 1)) 109 | else: 110 | boxes = [] 111 | if not channelwise: 112 | fm = feature_map.permute(1, 2, 0).mean(dim=-1, keepdim=True) 113 | else: 114 | fm = feature_map.permute(1, 2, 0) 115 | # visualize_feature_maps(fm.sigmoid().cpu().numpy(), 116 | visualize_feature_maps( 117 | fm.cpu().numpy(), 118 | boxes=boxes, 119 | stride=stride, 120 | save_filename=f"feature_map_{save_name}.png", 121 | ) 122 | 123 | cls.visualize_feature_map = visualize_feature_map 124 | 125 | return cls 126 | -------------------------------------------------------------------------------- /dl_lib/modeling/basenet/show.py: -------------------------------------------------------------------------------- 1 | import copy 2 | 3 | import numpy as np 4 | import pylab as plt 5 | 6 | 7 | def draw_box(ax, vertices, color='black'): 8 | """ 9 | Draw box with color. 10 | 11 | Args: 12 | ax (list): axes to draw box along 13 | vertices (ndarray): indices of shape (N x 2) 14 | color (str): plotted color 15 | """ 16 | connections = [ 17 | [0, 1], 18 | [1, 2], 19 | [2, 3], 20 | [3, 0], 21 | ] 22 | for connection in connections: 23 | ax.plot(*vertices[:, connection], c=color, lw=5) 24 | 25 | 26 | def visualize_feature_maps(fm, 27 | boxes=[], 28 | keypoints=[], 29 | stride=1, 30 | save_filename=None): 31 | """ 32 | Visualize feature map with boxes or key points. 33 | 34 | Args: 35 | fm (torch.Tensor): feature map of shape H x W x c, c is channel 36 | boxes (ndarray): boxes to be visualized. 37 | keypoints (ndarray): key points to be visualized 38 | stride (int): used to normalize boxes or keypoints 39 | save_filename (bool): whether save to disk 40 | """ 41 | nc = np.ceil(np.sqrt(fm.shape[2])) # column 42 | nr = np.ceil(fm.shape[2] / nc) # row 43 | nc = int(nc) 44 | nr = int(nr) 45 | plt.figure(figsize=(64, 64)) 46 | for i in range(fm.shape[2]): 47 | ax = plt.subplot(nr, nc, i + 1) 48 | ax.imshow(fm[:, :, i], cmap='jet') 49 | 50 | for obj in boxes: 51 | box = copy.deepcopy(obj) / stride 52 | draw_box(ax, box, color='g') 53 | 54 | for pts_score in keypoints: 55 | pts = pts_score[:8] 56 | pts = pts / stride 57 | for i in range(4): 58 | ax.plot(pts[2 * i + 1], pts[2 * i + 0], 'r*') 59 | ax.plot([pts[1], pts[3]], [pts[0], pts[2]], c='y', lw=5) 60 | ax.plot([pts[3], pts[5]], [pts[2], pts[4]], c='g', lw=5) 61 | ax.plot([pts[5], pts[7]], [pts[4], pts[6]], c='b', lw=5) 62 | ax.plot([pts[7], pts[1]], [pts[6], pts[0]], c='r', lw=5) 63 | 64 | # plt.colorbar() 65 | ax.axis('off') 66 | if save_filename: 67 | plt.savefig(save_filename) 68 | else: 69 | plt.show() 70 | plt.close() 71 | -------------------------------------------------------------------------------- /dl_lib/modeling/dynamic_arch/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # build for dynamic networks 3 | # @Author: yanwei.li 4 | 5 | from .dynamic_backbone import DynamicNetwork, build_dynamic_backbone -------------------------------------------------------------------------------- /dl_lib/modeling/dynamic_arch/cal_op_flops.py: -------------------------------------------------------------------------------- 1 | # Count Operation MFLOPs when fix batch to 1 2 | # @author: yanwei.li 3 | 4 | 5 | def count_Conv_flop( 6 | in_h, in_w, in_channel, out_channel, 7 | kernel_size, is_bias=False, stride=1, groups=1 8 | ): 9 | out_h = in_h // stride 10 | out_w = in_w // stride 11 | bias_ops = 1 if is_bias else 0 12 | kernel_ops = kernel_size[0] * kernel_size[1] * (in_channel // groups) 13 | delta_ops = (kernel_ops + bias_ops) * out_channel * out_h * out_w 14 | return delta_ops / 1e6 15 | 16 | 17 | def count_Linear_flop(in_num, out_num, is_bias): 18 | weight_ops = in_num * out_num 19 | bias_ops = out_num if is_bias else 0 20 | delta_ops = weight_ops + bias_ops 21 | return delta_ops / 1e6 22 | 23 | 24 | def count_BN_flop(in_h, in_w, in_channel, is_affine): 25 | multi_affine = 2 if is_affine else 1 26 | delta_ops = multi_affine * in_h * in_w * in_channel 27 | return delta_ops / 1e6 28 | 29 | 30 | def count_ReLU_flop(in_h, in_w, in_channel): 31 | delta_ops = in_h * in_w * in_channel 32 | return delta_ops / 1e6 33 | 34 | 35 | def count_Pool2d_flop(in_h, in_w, out_channel, kernel_size, stride): 36 | out_h = in_h // stride 37 | out_w = in_w // stride 38 | kernel_ops = kernel_size[0] * kernel_size[1] 39 | delta_ops = kernel_ops * out_w * out_h * out_channel 40 | return delta_ops / 1e6 41 | 42 | 43 | def count_ConvBNReLU_flop( 44 | in_h, in_w, in_channel, out_channel, 45 | kernel_size, is_bias=False, stride=1, 46 | groups=1, is_affine=True 47 | ): 48 | flops = 0.0 49 | flops += count_Conv_flop( 50 | in_h, in_w, in_channel, out_channel, 51 | kernel_size, is_bias, stride, groups 52 | ) 53 | in_h = in_h // stride 54 | in_w = in_w // stride 55 | flops += count_BN_flop(in_h, in_w, out_channel, is_affine) 56 | flops += count_ReLU_flop(in_h, in_w, out_channel) 57 | return flops 58 | -------------------------------------------------------------------------------- /dl_lib/modeling/meta_arch/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 3 | 4 | # import all the meta_arch, so they will be registered 5 | from .semantic_seg import SemanticSegmentor 6 | from .dynamic4seg import DynamicNet4Seg -------------------------------------------------------------------------------- /dl_lib/modeling/meta_arch/semantic_seg.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | import numpy as np 3 | from typing import Dict 4 | from dl_lib.modeling.nn_utils import weight_init 5 | import torch 6 | from torch import nn 7 | from torch.nn import functional as F 8 | 9 | from dl_lib.layers import Conv2d, ShapeSpec 10 | from dl_lib.structures import ImageList 11 | from ..postprocessing import sem_seg_postprocess 12 | 13 | __all__ = ["SemanticSegmentor", "SemSegFPNHead"] 14 | 15 | 16 | def build_backbone(cfg): 17 | pass 18 | 19 | 20 | def build_sem_seg_head(cfg, ShapeSpec): 21 | pass 22 | 23 | 24 | """ 25 | Registry for semantic segmentation heads, which make semantic segmentation predictions 26 | from feature maps. 27 | """ 28 | 29 | 30 | class SemanticSegmentor(nn.Module): 31 | """ 32 | Main class for semantic segmentation architectures. 33 | """ 34 | def __init__(self, cfg): 35 | super().__init__() 36 | 37 | self.device = torch.device(cfg.MODEL.DEVICE) 38 | 39 | self.backbone = build_backbone(cfg) 40 | self.sem_seg_head = build_sem_seg_head( 41 | cfg, self.backbone.output_shape() 42 | ) 43 | 44 | pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to( 45 | self.device).view(-1, 1, 1) 46 | pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to( 47 | self.device).view(-1, 1, 1) 48 | self.normalizer = lambda x: (x - pixel_mean) / pixel_std 49 | 50 | self.to(self.device) 51 | 52 | def forward(self, batched_inputs): 53 | """ 54 | Args: 55 | batched_inputs: a list, batched outputs of :class:`DatasetMapper` . 56 | Each item in the list contains the inputs for one image. 57 | For now, each item in the list is a dict that contains: 58 | image: Tensor, image in (C, H, W) format. 59 | sem_seg: semantic segmentation ground truth 60 | Other information that's included in the original dicts, such as: 61 | "height", "width" (int): the output resolution of the model, used in inference. 62 | See :meth:`postprocess` for details. 63 | Returns: 64 | list[dict]: Each dict is the output for one input image. 65 | The dict contains one key "sem_seg" whose value is a 66 | Tensor of the output resolution that represents the 67 | per-pixel segmentation prediction. 68 | """ 69 | images = [x["image"].to(self.device) for x in batched_inputs] 70 | images = [self.normalizer(x) for x in images] 71 | images = ImageList.from_tensors( 72 | images, self.backbone.size_divisibility 73 | ) 74 | 75 | features = self.backbone(images.tensor) 76 | 77 | if "sem_seg" in batched_inputs[0]: 78 | targets = [x["sem_seg"].to(self.device) for x in batched_inputs] 79 | targets = ImageList.from_tensors( 80 | targets, self.backbone.size_divisibility, 81 | self.sem_seg_head.ignore_value).tensor 82 | else: 83 | targets = None 84 | results, losses = self.sem_seg_head(features, targets) 85 | 86 | if self.training: 87 | return losses 88 | 89 | processed_results = [] 90 | for result, input_per_image, image_size in zip( 91 | results, batched_inputs, images.image_sizes 92 | ): 93 | height = input_per_image.get("height") 94 | width = input_per_image.get("width") 95 | r = sem_seg_postprocess(result, image_size, height, width) 96 | processed_results.append({"sem_seg": r}) 97 | return processed_results 98 | 99 | 100 | class SemSegFPNHead(nn.Module): 101 | """ 102 | A semantic segmentation head described in detail in the Panoptic Feature Pyramid Networks paper 103 | (https://arxiv.org/abs/1901.02446). It takes FPN features as input and merges information from 104 | all levels of the FPN into single output. 105 | """ 106 | def __init__(self, cfg, input_shape: Dict[str, ShapeSpec]): 107 | super().__init__() 108 | 109 | # fmt: off 110 | self.in_features = cfg.MODEL.SEM_SEG_HEAD.IN_FEATURES 111 | feature_strides = {k: v.stride for k, v in input_shape.items()} 112 | feature_channels = {k: v.channels for k, v in input_shape.items()} 113 | self.ignore_value = cfg.MODEL.SEM_SEG_HEAD.IGNORE_VALUE 114 | num_classes = cfg.MODEL.SEM_SEG_HEAD.NUM_CLASSES 115 | conv_dims = cfg.MODEL.SEM_SEG_HEAD.CONVS_DIM 116 | self.common_stride = cfg.MODEL.SEM_SEG_HEAD.COMMON_STRIDE 117 | norm = cfg.MODEL.SEM_SEG_HEAD.NORM 118 | self.loss_weight = cfg.MODEL.SEM_SEG_HEAD.LOSS_WEIGHT 119 | # fmt: on 120 | 121 | self.scale_heads = [] 122 | for in_feature in self.in_features: 123 | head_ops = [] 124 | head_length = max( 125 | 1, int(np.log2(feature_strides[in_feature]) - np.log2(self.common_stride)) 126 | ) 127 | for k in range(head_length): 128 | norm_module = nn.GroupNorm(32, conv_dims) if norm == "GN" else None 129 | conv = Conv2d( 130 | feature_channels[in_feature] if k == 0 else conv_dims, 131 | conv_dims, kernel_size=3, stride=1, padding=1, 132 | bias=not norm, norm=norm_module, activation=F.relu, 133 | ) 134 | weight_init.c2_msra_fill(conv) 135 | head_ops.append(conv) 136 | if feature_strides[in_feature] != self.common_stride: 137 | head_ops.append( 138 | nn.Upsample(scale_factor=2, mode="bilinear", align_corners=False) 139 | ) 140 | self.scale_heads.append(nn.Sequential(*head_ops)) 141 | self.add_module(in_feature, self.scale_heads[-1]) 142 | self.predictor = Conv2d(conv_dims, num_classes, kernel_size=1, stride=1, padding=0) 143 | weight_init.c2_msra_fill(self.predictor) 144 | 145 | def forward(self, features, targets=None): 146 | for i, f in enumerate(self.in_features): 147 | if i == 0: 148 | x = self.scale_heads[i](features[f]) 149 | else: 150 | x = x + self.scale_heads[i](features[f]) 151 | x = self.predictor(x) 152 | x = F.interpolate( 153 | x, scale_factor=self.common_stride, mode="bilinear", align_corners=False 154 | ) 155 | 156 | if self.training: 157 | losses = {} 158 | losses["loss_sem_seg"] = ( 159 | F.cross_entropy( 160 | x, targets, reduction="mean", 161 | ignore_index=self.ignore_value) * self.loss_weight 162 | ) 163 | return [], losses 164 | else: 165 | return x, {} 166 | -------------------------------------------------------------------------------- /dl_lib/modeling/nn_utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Megvii-BaseDetection/DynamicRouting/2ad0a95139b1bf21878dd222854f98974ac4930a/dl_lib/modeling/nn_utils/__init__.py -------------------------------------------------------------------------------- /dl_lib/modeling/nn_utils/flop_count.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | 3 | import logging 4 | import typing 5 | from collections import defaultdict 6 | 7 | import torch.nn as nn 8 | 9 | from .jit_handles import (addmm_flop_jit, conv_flop_jit, einsum_flop_jit, 10 | get_jit_model_analysis, matmul_flop_jit) 11 | 12 | # A dictionary that maps supported operations to their flop count jit handles. 13 | _SUPPORTED_OPS: typing.Dict[str, typing.Callable] = { 14 | "aten::addmm": addmm_flop_jit, 15 | "aten::_convolution": conv_flop_jit, 16 | "aten::einsum": einsum_flop_jit, 17 | "aten::matmul": matmul_flop_jit, 18 | } 19 | 20 | 21 | def flop_count( 22 | model: nn.Module, 23 | inputs: typing.Tuple[object, ...], 24 | supported_ops: typing.Union[typing.Dict[str, typing.Callable], 25 | None] = None, 26 | ) -> typing.Tuple[typing.DefaultDict[str, float], typing.Counter[str]]: 27 | """ 28 | Given a model and an input to the model, compute the Gflops of the given 29 | model. Note the input should have a batch size of 1. 30 | Args: 31 | model (nn.Module): The model to compute flop counts. 32 | inputs (tuple): Inputs that are passed to `model` to count flops. 33 | Inputs need to be in a tuple. 34 | supported_ops (dict(str,Callable) or None) : By default, we count flops 35 | for convolution layers, fully connected layers, torch.matmul and 36 | torch.einsum operations. We define a FLOP as a single atomic 37 | Multiply-Add. Users can provide customized supported_ops for 38 | counting flops if desired. 39 | Returns: 40 | tuple[defaultdict, Counter]: A dictionary that records the number of 41 | gflops for each operation and a Counter that records the number of 42 | skipped operations. 43 | """ 44 | assert isinstance(inputs, tuple), "Inputs need to be in a tuple." 45 | if not supported_ops: 46 | supported_ops = _SUPPORTED_OPS.copy() 47 | 48 | # Run flop count. 49 | total_flop_counter, skipped_ops = get_jit_model_analysis( 50 | model, inputs, supported_ops) 51 | 52 | # Log for skipped operations. 53 | if len(skipped_ops) > 0: 54 | for op, freq in skipped_ops.items(): 55 | logging.warning("Skipped operation {} {} time(s)".format(op, freq)) 56 | 57 | # Convert flop count to gigaflops. 58 | final_count = defaultdict(float) 59 | for op in total_flop_counter: 60 | final_count[op] = total_flop_counter[op] / 1e9 61 | 62 | return final_count, skipped_ops 63 | -------------------------------------------------------------------------------- /dl_lib/modeling/nn_utils/precise_bn.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 3 | 4 | import itertools 5 | 6 | import torch 7 | 8 | BN_MODULE_TYPES = ( 9 | torch.nn.BatchNorm1d, 10 | torch.nn.BatchNorm2d, 11 | torch.nn.BatchNorm3d, 12 | torch.nn.SyncBatchNorm, 13 | ) 14 | 15 | 16 | @torch.no_grad() 17 | def update_bn_stats(model, data_loader, num_iters: int = 200): 18 | """ 19 | Recompute and update the batch norm stats to make them more precise. During 20 | training both BN stats and the weight are changing after every iteration, so 21 | the running average can not precisely reflect the actual stats of the 22 | current model. 23 | In this function, the BN stats are recomputed with fixed weights, to make 24 | the running average more precise. Specifically, it computes the true average 25 | of per-batch mean/variance instead of the running average. 26 | 27 | Args: 28 | model (nn.Module): the model whose bn stats will be recomputed. 29 | 30 | Note that: 31 | 32 | 1. This function will not alter the training mode of the given model. 33 | Users are responsible for setting the layers that needs 34 | precise-BN to training mode, prior to calling this function. 35 | 36 | 2. Be careful if your models contain other stateful layers in 37 | addition to BN, i.e. layers whose state can change in forward 38 | iterations. This function will alter their state. If you wish 39 | them unchanged, you need to either pass in a submodule without 40 | those layers, or backup the states. 41 | data_loader (iterator): an iterator. Produce data as inputs to the model. 42 | num_iters (int): number of iterations to compute the stats. 43 | """ 44 | bn_layers = get_bn_modules(model) 45 | 46 | if len(bn_layers) == 0: 47 | return 48 | 49 | # In order to make the running stats only reflect the current batch, the 50 | # momentum is disabled. 51 | # bn.running_mean = (1 - momentum) * bn.running_mean + momentum * batch_mean 52 | # Setting the momentum to 1.0 to compute the stats without momentum. 53 | momentum_actual = [bn.momentum for bn in bn_layers] 54 | for bn in bn_layers: 55 | bn.momentum = 1.0 56 | 57 | # Note that running_var actually means "running average of variance" 58 | running_mean = [torch.zeros_like(bn.running_mean) for bn in bn_layers] 59 | running_var = [torch.zeros_like(bn.running_var) for bn in bn_layers] 60 | 61 | for ind, inputs in enumerate(itertools.islice(data_loader, num_iters)): 62 | model(inputs) 63 | 64 | for i, bn in enumerate(bn_layers): 65 | # Accumulates the bn stats. 66 | running_mean[i] += (bn.running_mean - running_mean[i]) / (ind + 1) 67 | running_var[i] += (bn.running_var - running_var[i]) / (ind + 1) 68 | # We compute the "average of variance" across iterations. 69 | assert ind == num_iters - 1, ( 70 | "update_bn_stats is meant to run for {} iterations, " 71 | "but the dataloader stops at {} iterations.".format(num_iters, ind)) 72 | 73 | for i, bn in enumerate(bn_layers): 74 | # Sets the precise bn stats. 75 | bn.running_mean = running_mean[i] 76 | bn.running_var = running_var[i] 77 | bn.momentum = momentum_actual[i] 78 | 79 | 80 | def get_bn_modules(model): 81 | """ 82 | Find all BatchNorm (BN) modules that are in training mode. See 83 | fvcore.precise_bn.BN_MODULE_TYPES for a list of all modules that are 84 | included in this search. 85 | 86 | Args: 87 | model (nn.Module): a model possibly containing BN modules. 88 | 89 | Returns: 90 | list[nn.Module]: all BN modules in the model. 91 | """ 92 | # Finds all the bn layers. 93 | bn_layers = [ 94 | m for m in model.modules() 95 | if m.training and isinstance(m, BN_MODULE_TYPES) 96 | ] 97 | return bn_layers 98 | -------------------------------------------------------------------------------- /dl_lib/modeling/nn_utils/scale_grad.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding:utf-8 -*- 3 | from torch.autograd.function import Function 4 | 5 | 6 | class _ScaleGradient(Function): 7 | @staticmethod 8 | def forward(ctx, input, scale): 9 | ctx.scale = scale 10 | return input 11 | 12 | @staticmethod 13 | def backward(ctx, grad_output): 14 | return grad_output * ctx.scale, None 15 | -------------------------------------------------------------------------------- /dl_lib/modeling/nn_utils/weight_init.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 3 | 4 | import torch.nn as nn 5 | 6 | from dl_lib.layers.batch_norm import BatchNorm2d, NaiveSyncBatchNorm 7 | 8 | 9 | def constant_init(module, val, bias=0): 10 | nn.init.constant_(module.weight, val) 11 | if hasattr(module, 'bias') and module.bias is not None: 12 | nn.init.constant_(module.bias, bias) 13 | 14 | 15 | def xavier_init(module, gain=1, bias=0, distribution='normal'): 16 | assert distribution in ['uniform', 'normal'] 17 | if distribution == 'uniform': 18 | nn.init.xavier_uniform_(module.weight, gain=gain) 19 | else: 20 | nn.init.xavier_normal_(module.weight, gain=gain) 21 | if hasattr(module, 'bias') and module.bias is not None: 22 | nn.init.constant_(module.bias, bias) 23 | 24 | 25 | def normal_init(module, mean=0, std=1, bias=0): 26 | nn.init.normal_(module.weight, mean, std) 27 | if hasattr(module, 'bias') and module.bias is not None: 28 | nn.init.constant_(module.bias, bias) 29 | 30 | 31 | def uniform_init(module, a=0, b=1, bias=0): 32 | nn.init.uniform_(module.weight, a, b) 33 | if hasattr(module, 'bias') and module.bias is not None: 34 | nn.init.constant_(module.bias, bias) 35 | 36 | 37 | def kaiming_init(module, 38 | a=0, 39 | mode='fan_out', 40 | nonlinearity='relu', 41 | bias=0, 42 | distribution='normal'): 43 | assert distribution in ['uniform', 'normal'] 44 | if distribution == 'uniform': 45 | nn.init.kaiming_uniform_(module.weight, 46 | a=a, 47 | mode=mode, 48 | nonlinearity=nonlinearity) 49 | else: 50 | nn.init.kaiming_normal_(module.weight, 51 | a=a, 52 | mode=mode, 53 | nonlinearity=nonlinearity) 54 | if hasattr(module, 'bias') and module.bias is not None: 55 | nn.init.constant_(module.bias, bias) 56 | 57 | 58 | def caffe2_xavier_init(module, bias=0): 59 | # `XavierFill` in Caffe2 corresponds to `kaiming_uniform_` in PyTorch 60 | # Acknowledgment to FAIR's internal code 61 | kaiming_init(module, 62 | a=1, 63 | mode='fan_in', 64 | nonlinearity='leaky_relu', 65 | distribution='uniform') 66 | 67 | 68 | def c2_xavier_fill(module: nn.Module): 69 | """ 70 | Initialize `module.weight` using the "XavierFill" implemented in Caffe2. 71 | Also initializes `module.bias` to 0. 72 | 73 | Args: 74 | module (torch.nn.Module): module to initialize. 75 | """ 76 | # Caffe2 implementation of XavierFill in fact 77 | # corresponds to kaiming_uniform_ in PyTorch 78 | nn.init.kaiming_uniform_(module.weight, a=1) 79 | if module.bias is not None: 80 | nn.init.constant_(module.bias, 0) 81 | 82 | 83 | def c2_msra_fill(module: nn.Module): 84 | """ 85 | Initialize `module.weight` using the "MSRAFill" implemented in Caffe2. 86 | Also initializes `module.bias` to 0. 87 | 88 | Args: 89 | module (torch.nn.Module): module to initialize. 90 | """ 91 | nn.init.kaiming_normal_(module.weight, mode="fan_out", nonlinearity="relu") 92 | if module.bias is not None: 93 | nn.init.constant_(module.bias, 0) 94 | 95 | 96 | def kaiming_init_module(module, 97 | a=0, 98 | mode='fan_out', 99 | nonlinearity='relu', 100 | bias=0, 101 | distribution='normal'): 102 | assert distribution in ['uniform', 'normal'] 103 | 104 | for name, m in module.named_modules(): 105 | if isinstance(m, nn.Conv2d): 106 | if distribution == 'uniform': 107 | nn.init.kaiming_uniform_(m.weight, 108 | a=a, 109 | mode=mode, 110 | nonlinearity=nonlinearity) 111 | else: 112 | nn.init.kaiming_normal_(m.weight, 113 | a=a, 114 | mode=mode, 115 | nonlinearity=nonlinearity) 116 | if hasattr(m, 'bias') and m.bias is not None: 117 | nn.init.constant_(m.bias, bias) 118 | elif isinstance(m, 119 | (BatchNorm2d, nn.SyncBatchNorm, NaiveSyncBatchNorm)): 120 | if m.weight is not None: 121 | nn.init.constant_(m.weight, 1) 122 | if m.bias is not None: 123 | nn.init.constant_(m.bias, 0) 124 | -------------------------------------------------------------------------------- /dl_lib/modeling/postprocessing.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | from torch.nn import functional as F 3 | 4 | 5 | def sem_seg_postprocess(result, img_size, output_height, output_width): 6 | """ 7 | Return semantic segmentation predictions in the original resolution. 8 | 9 | The input images are often resized when entering semantic segmentor. Moreover, in same 10 | cases, they also padded inside segmentor to be divisible by maximum network stride. 11 | As a result, we often need the predictions of the segmentor in a different 12 | resolution from its inputs. 13 | 14 | Args: 15 | result (Tensor): semantic segmentation prediction logits. A tensor of shape (C, H, W), 16 | where C is the number of classes, and H, W are the height and width of the prediction. 17 | img_size (tuple): image size that segmentor is taking as input. 18 | output_height, output_width: the desired output resolution. 19 | 20 | Returns: 21 | semantic segmentation prediction (Tensor): A tensor of the shape 22 | (C, output_height, output_width) that contains per-pixel soft predictions. 23 | """ 24 | result = result[:, :img_size[0], :img_size[1]].expand(1, -1, -1, -1) 25 | result = F.interpolate(result, 26 | size=(output_height, output_width), 27 | mode="bilinear", 28 | align_corners=False)[0] 29 | return result 30 | -------------------------------------------------------------------------------- /dl_lib/modeling/test_time_augmentation.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | import copy 3 | from itertools import count 4 | 5 | import numpy as np 6 | import torch 7 | from torch import nn 8 | 9 | from dl_lib.data.detection_utils import read_image 10 | from dl_lib.data.transforms import ResizeShortestEdge 11 | 12 | __all__ = ["DatasetMapperTTA", "SemanticSegmentorWithTTA"] 13 | 14 | 15 | class DatasetMapperTTA: 16 | """ 17 | Implement test-time augmentation for detection data. 18 | It is a callable which takes a dataset dict from a detection dataset, 19 | and returns a list of dataset dicts where the images 20 | are augmented from the input image by the transformations defined in the config. 21 | This is used for test-time augmentation. 22 | """ 23 | def __init__(self, cfg): 24 | self.min_sizes = cfg.TEST.AUG.MIN_SIZES 25 | self.max_size = cfg.TEST.AUG.MAX_SIZE 26 | self.flip = cfg.TEST.AUG.FLIP 27 | self.image_format = cfg.INPUT.FORMAT 28 | 29 | def __call__(self, dataset_dict): 30 | """ 31 | Args: 32 | dict: a detection dataset dict 33 | 34 | Returns: 35 | list[dict]: 36 | a list of dataset dicts, which contain augmented version of the input image. 37 | The total number of dicts is ``len(min_sizes) * (2 if flip else 1)``. 38 | """ 39 | ret = [] 40 | if "image" not in dataset_dict: 41 | numpy_image = read_image(dataset_dict["file_name"], 42 | self.image_format) 43 | else: 44 | numpy_image = dataset_dict["image"].permute( 45 | 1, 2, 0).numpy().astype("uint8") 46 | for min_size in self.min_sizes: 47 | image = np.copy(numpy_image) 48 | tfm = ResizeShortestEdge(min_size, 49 | self.max_size).get_transform(image) 50 | resized = tfm.apply_image(image) 51 | resized = torch.as_tensor( 52 | resized.transpose(2, 0, 1).astype("float32")) 53 | 54 | dic = copy.deepcopy(dataset_dict) 55 | dic["horiz_flip"] = False 56 | dic["image"] = resized 57 | ret.append(dic) 58 | 59 | if self.flip: 60 | dic = copy.deepcopy(dataset_dict) 61 | dic["horiz_flip"] = True 62 | dic["image"] = torch.flip(resized, dims=[2]) 63 | ret.append(dic) 64 | return ret 65 | 66 | 67 | class SemanticSegmentorWithTTA(nn.Module): 68 | """ 69 | A SementicSegmentor with test-time augmentation enabled. 70 | Its :meth:`__call__` method has the same interface as :meth:`SemanticSegmentor.forward`. 71 | """ 72 | def __init__(self, cfg, model, tta_mapper=None, batch_size=1): 73 | """ 74 | Args: 75 | cfg (CfgNode): 76 | model (SementicSegmentor): a SementicSegmentor to apply TTA on. 77 | tta_mapper (callable): takes a dataset dict and returns a list of 78 | augmented versions of the dataset dict. Defaults to 79 | `DatasetMapperTTA(cfg)`. 80 | batch_size (int): batch the augmented images into this batch size for inference. 81 | """ 82 | super().__init__() 83 | self.cfg = copy.deepcopy(cfg) 84 | self.model = model 85 | 86 | if tta_mapper is None: 87 | tta_mapper = DatasetMapperTTA(cfg) 88 | self.tta_mapper = tta_mapper 89 | self.batch_size = batch_size 90 | 91 | def _batch_inference(self, batched_inputs): 92 | """ 93 | Execute inference on a list of inputs, 94 | using batch size = self.batch_size, instead of the length of the list. 95 | 96 | Inputs & outputs have the same format as :meth:`SemanticSegmentor.inference` 97 | """ 98 | 99 | outputs = [] 100 | inputs = [] 101 | for idx, input in zip(count(), batched_inputs): 102 | inputs.append(input) 103 | if len(inputs 104 | ) == self.batch_size or idx == len(batched_inputs) - 1: 105 | outputs.extend(self.model.forward(inputs, )) 106 | inputs = [] 107 | return outputs 108 | 109 | def __call__(self, batched_inputs): 110 | """ 111 | Same input/output format as :meth:`SemanticSegmentor.forward` 112 | """ 113 | return [self._inference_one_image(x) for x in batched_inputs] 114 | 115 | def _hflip_sem_seg(self, x): 116 | y = x.flip(dims=[2]) 117 | return y 118 | 119 | def _inference_one_image(self, input): 120 | """ 121 | Args: 122 | input (dict): one dataset dict 123 | 124 | Returns: 125 | dict: one output dict 126 | """ 127 | augmented_inputs = self.tta_mapper(input) 128 | 129 | do_hflip = [k.pop("horiz_flip", False) for k in augmented_inputs] 130 | heights = [k["height"] for k in augmented_inputs] 131 | widths = [k["width"] for k in augmented_inputs] 132 | assert ( 133 | len(set(heights)) == 1 and len(set(widths)) == 1 134 | ), "Augmented version of the inputs should have the same original resolution!" 135 | 136 | # 1. Segment from all augmented versions 137 | # 1.1: forward with all augmented images 138 | outputs = self._batch_inference(augmented_inputs) 139 | # 1.2: union the results 140 | for idx, output in enumerate(outputs): 141 | if do_hflip[idx]: 142 | output["sem_seg"] = self._hflip_sem_seg(output["sem_seg"]) 143 | all_pred_masks = torch.stack([o["sem_seg"] for o in outputs], dim=0) 144 | avg_pred_masks = torch.mean(all_pred_masks, dim=0) 145 | output = outputs[0] 146 | output["sem_seg"] = avg_pred_masks 147 | return output 148 | -------------------------------------------------------------------------------- /dl_lib/solver/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | from .build import build_lr_scheduler, build_optimizer 3 | from .lr_scheduler import WarmupCosineLR, WarmupMultiStepLR 4 | 5 | __all__ = [k for k in globals().keys() if not k.startswith("_")] 6 | -------------------------------------------------------------------------------- /dl_lib/solver/build.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | from typing import Any, Dict, List 3 | 4 | import torch 5 | from torch.optim.lr_scheduler import LambdaLR, OneCycleLR 6 | 7 | from .lr_scheduler import PolyLR, WarmupCosineLR, WarmupMultiStepLR 8 | 9 | 10 | def build_optimizer(cfg, model: torch.nn.Module) -> torch.optim.Optimizer: 11 | """ 12 | Build an optimizer from config.SOLVER.OPTIMIZER 13 | """ 14 | if cfg.NAME == "SGD": 15 | params: List[Dict[str, Any]] = [] 16 | for key, value in model.named_parameters(): 17 | if not cfg.get("WEIGHT_DECAY_CONV_ONLY", False): 18 | if not value.requires_grad: 19 | continue 20 | lr = cfg.BASE_LR 21 | weight_decay = cfg.WEIGHT_DECAY 22 | if key.endswith("norm.weight") or key.endswith("norm.bias"): 23 | weight_decay = cfg.WEIGHT_DECAY_NORM 24 | elif key.endswith(".bias"): 25 | # NOTE: unlike Detectron v1, we now default BIAS_LR_FACTOR to 1.0 26 | # and WEIGHT_DECAY_BIAS to WEIGHT_DECAY so that bias optimizer 27 | # hyperparameters are by default exactly the same as for regular 28 | # weights. 29 | lr = cfg.BASE_LR * cfg.BIAS_LR_FACTOR 30 | weight_decay = cfg.WEIGHT_DECAY_BIAS 31 | else: 32 | lr = cfg.BASE_LR 33 | if "conv.weight" not in key: 34 | weight_decay = 0 35 | else: 36 | weight_decay = cfg.WEIGHT_DECAY 37 | # multiply lr for gating function 38 | if "GATE_LR_MULTI" in cfg: 39 | if cfg.GATE_LR_MULTI > 0.0 and "gate_conv" in key: 40 | lr *= cfg.GATE_LR_MULTI 41 | 42 | params += [{ 43 | "params": [value], 44 | "lr": lr, 45 | "weight_decay": weight_decay 46 | }] 47 | optimizer = torch.optim.SGD(params, lr, momentum=cfg.MOMENTUM) 48 | elif cfg.NAME == "AdamW": 49 | lr = cfg.BASE_LR 50 | optimizer = torch.optim.Adam(model.parameters(), 51 | lr=lr, 52 | betas=cfg.BETAS, 53 | weight_decay=cfg.WEIGHT_DECAY, 54 | amsgrad=cfg.AMSGRAD) 55 | return optimizer 56 | 57 | 58 | def build_lr_scheduler( 59 | cfg, optimizer: torch.optim.Optimizer 60 | ) -> torch.optim.lr_scheduler._LRScheduler: 61 | """ 62 | Build a LR scheduler from config. 63 | """ 64 | name = cfg.NAME 65 | if name == "WarmupMultiStepLR": 66 | return WarmupMultiStepLR( 67 | optimizer, 68 | cfg.STEPS, 69 | cfg.GAMMA, 70 | warmup_factor=cfg.WARMUP_FACTOR, 71 | warmup_iters=cfg.WARMUP_ITERS, 72 | warmup_method=cfg.WARMUP_METHOD, 73 | ) 74 | elif name == "WarmupCosineLR": 75 | return WarmupCosineLR( 76 | optimizer, 77 | cfg.MAX_ITER, 78 | warmup_factor=cfg.WARMUP_FACTOR, 79 | warmup_iters=cfg.WARMUP_ITERS, 80 | warmup_method=cfg.WARMUP_METHOD, 81 | ) 82 | elif name == "LambdaLR": 83 | return LambdaLR(optimizer, cfg.LAMBDA_SCHEDULE) 84 | elif name == "OneCycleLR": 85 | return OneCycleLR(optimizer, 86 | cfg.MAX_LR, 87 | total_steps=cfg.MAX_ITER, 88 | pct_start=cfg.PCT_START, 89 | base_momentum=cfg.BASE_MOM, 90 | max_momentum=cfg.MAX_MOM, 91 | div_factor=cfg.DIV_FACTOR) 92 | elif name == "PolyLR": 93 | return PolyLR( 94 | optimizer, 95 | cfg.MAX_ITER, 96 | cfg.POLY_POWER, 97 | warmup_factor=cfg.WARMUP_FACTOR, 98 | warmup_iters=cfg.WARMUP_ITERS, 99 | warmup_method=cfg.WARMUP_METHOD, 100 | ) 101 | else: 102 | raise ValueError("Unknown LR scheduler: {}".format(name)) 103 | -------------------------------------------------------------------------------- /dl_lib/structures/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | from .boxes import Boxes, BoxMode, pairwise_iou 3 | from .image_list import ImageList 4 | from .instances import Instances 5 | from .keypoints import Keypoints, heatmaps_to_keypoints 6 | from .masks import BitMasks, PolygonMasks, rasterize_polygons_within_box 7 | 8 | __all__ = [k for k in globals().keys() if not k.startswith("_")] 9 | -------------------------------------------------------------------------------- /dl_lib/structures/image_list.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from __future__ import division 3 | 4 | from typing import Any, List, Sequence, Tuple, Union 5 | 6 | import torch 7 | from torch.nn import functional as F 8 | 9 | 10 | class ImageList(object): 11 | """ 12 | Structure that holds a list of images (of possibly 13 | varying sizes) as a single tensor. 14 | This works by padding the images to the same size, 15 | and storing in a field the original sizes of each image 16 | 17 | Attributes: 18 | image_sizes (list[tuple[int, int]]): each tuple is (h, w) 19 | """ 20 | def __init__(self, tensor: torch.Tensor, image_sizes: List[Tuple[int, 21 | int]]): 22 | """ 23 | Arguments: 24 | tensor (Tensor): of shape (N, H, W) or (N, C_1, ..., C_K, H, W) where K >= 1 25 | image_sizes (list[tuple[int, int]]): Each tuple is (h, w). 26 | """ 27 | self.tensor = tensor 28 | self.image_sizes = image_sizes 29 | 30 | def __len__(self) -> int: 31 | return len(self.image_sizes) 32 | 33 | def __getitem__(self, idx: Union[int, slice]) -> torch.Tensor: 34 | """ 35 | Access the individual image in its original size. 36 | 37 | Returns: 38 | Tensor: an image of shape (H, W) or (C_1, ..., C_K, H, W) where K >= 1 39 | """ 40 | size = self.image_sizes[idx] 41 | return self.tensor[idx, ..., :size[0], :size[1]] # type: ignore 42 | 43 | def to(self, *args: Any, **kwargs: Any) -> "ImageList": 44 | cast_tensor = self.tensor.to(*args, **kwargs) 45 | return ImageList(cast_tensor, self.image_sizes) 46 | 47 | @property 48 | def device(self) -> torch.device: 49 | return self.tensor.device 50 | 51 | @staticmethod 52 | def from_tensors( 53 | tensors: Sequence[torch.Tensor], 54 | size_divisibility: int = 0, 55 | pad_ref_long: bool = False, 56 | pad_value: float = 0.0, 57 | ) -> "ImageList": 58 | """ 59 | Args: 60 | tensors: a tuple or list of `torch.Tensors`, each of shape (Hi, Wi) or 61 | (C_1, ..., C_K, Hi, Wi) where K >= 1. The Tensors will be padded with `pad_value` 62 | so that they will have the same shape. 63 | size_divisibility (int): If `size_divisibility > 0`, also adds padding to ensure 64 | the common height and width is divisible by `size_divisibility` 65 | pad_value (float): value to pad 66 | 67 | Returns: 68 | an `ImageList`. 69 | """ 70 | assert len(tensors) > 0 71 | assert isinstance(tensors, (tuple, list)) 72 | for t in tensors: 73 | assert isinstance(t, torch.Tensor), type(t) 74 | assert t.shape[1:-2] == tensors[0].shape[1:-2], t.shape 75 | # per dimension maximum (H, W) or (C_1, ..., C_K, H, W) where K >= 1 among all tensors 76 | max_size = list(max(s) for s in zip(*[img.shape for img in tensors])) 77 | if pad_ref_long: 78 | max_size_max = max(max_size[-2:]) 79 | max_size[-2:] = [max_size_max] * 2 80 | max_size = tuple(max_size) 81 | 82 | if size_divisibility > 0: 83 | import math 84 | 85 | stride = size_divisibility 86 | max_size = list(max_size) # type: ignore 87 | max_size[-2] = int(math.ceil(max_size[-2] / stride) * 88 | stride) # type: ignore 89 | max_size[-1] = int(math.ceil(max_size[-1] / stride) * 90 | stride) # type: ignore 91 | max_size = tuple(max_size) 92 | 93 | image_sizes = [im.shape[-2:] for im in tensors] 94 | 95 | if len(tensors) == 1: 96 | # This seems slightly (2%) faster. 97 | # TODO: check whether it's faster for multiple images as well 98 | image_size = image_sizes[0] 99 | padded = F.pad( 100 | tensors[0], 101 | [ 102 | 0, max_size[-1] - image_size[1], 0, 103 | max_size[-2] - image_size[0] 104 | ], 105 | value=pad_value, 106 | ) 107 | batched_imgs = padded.unsqueeze_(0) 108 | else: 109 | batch_shape = (len(tensors), ) + max_size 110 | batched_imgs = tensors[0].new_full(batch_shape, pad_value) 111 | for img, pad_img in zip(tensors, batched_imgs): 112 | pad_img[..., :img.shape[-2], :img.shape[-1]].copy_(img) 113 | 114 | return ImageList(batched_imgs.contiguous(), image_sizes) 115 | -------------------------------------------------------------------------------- /dl_lib/utils/README.md: -------------------------------------------------------------------------------- 1 | # Utility functions 2 | 3 | This folder contain utility functions that are not used in the 4 | core library, but are useful for building models or training 5 | code using the config system. 6 | -------------------------------------------------------------------------------- /dl_lib/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | -------------------------------------------------------------------------------- /dl_lib/utils/benchmark.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 3 | 4 | import sys 5 | import time 6 | from typing import Dict, List 7 | 8 | import numpy as np 9 | 10 | 11 | def timeit(num_iters: int = -1, warmup_iters: int = 0): 12 | """ 13 | This is intened to be used as a decorator to time any function. 14 | 15 | Args: 16 | num_iters (int): number of iterations used to compute the average time 17 | (sec) required to run the function. If negative, the number of 18 | iterations is determined dynamically by running the function a few 19 | times to make sure the estimate is stable. 20 | warmup_iters (int): number of iterations used to warm up the function. 21 | This is useful for functions that exhibit poor performance during 22 | the first few times they run (due to caches, autotuning, etc). 23 | Returns: 24 | Dict[str, float]: dictionary of the aggregated timing estimates. 25 | "iterations": number of iterations used to compute the estimated 26 | time. 27 | "mean": averate time (sec) used to run the function. 28 | "median": median time (sec) used to run the function. 29 | "min": minimal time (sec) used to run the function. 30 | "max": maximal time (sec) used to run the function. 31 | "stddev": standard deviation of the time (sec) used to run the 32 | function. 33 | """ 34 | def decorator(func): 35 | def decorated(*args, **kwargs) -> Dict[str, float]: 36 | # Warmup phase. 37 | for _ in range(warmup_iters): 38 | func(*args, **kwargs) 39 | 40 | # Estimate the run time of the function. 41 | total_time: float = 0 42 | count = 0 43 | run_times: List[float] = [] 44 | max_num_iters = num_iters if num_iters > 0 else sys.maxsize 45 | for _ in range(max_num_iters): 46 | start_time = time.time() 47 | func(*args, **kwargs) 48 | run_time = time.time() - start_time 49 | 50 | run_times.append(run_time) 51 | total_time += run_time 52 | count += 1 53 | if num_iters < 0 and total_time >= 0.5: 54 | # If num_iters is negative, run the function enough times so 55 | # that we can have a more robust estimate of the average time. 56 | break 57 | assert count == len(run_times) 58 | ret: Dict[str, float] = {} 59 | ret["iterations"] = count 60 | ret["mean"] = total_time / count 61 | ret["median"] = np.median(run_times) 62 | ret["min"] = np.min(run_times) 63 | ret["max"] = np.max(run_times) 64 | ret["stddev"] = np.std(run_times) 65 | return ret 66 | 67 | return decorated 68 | 69 | return decorator 70 | 71 | 72 | def benchmark(func, 73 | bm_name: str, 74 | kwargs_list: List[Dict], 75 | *, 76 | num_iters: int = -1, 77 | warmup_iters: int = 0) -> None: 78 | """ 79 | Benchmark the input function and print out the results. 80 | 81 | Args: 82 | func (callable): a closure that returns a function for benchmarking, 83 | where initialization can be done before the function to benchmark. 84 | bm_name (str): name of the benchmark to print out, e.g. "BM_UPDATE". 85 | kwargs_list (list): a list of argument dict to pass to the function. The 86 | intput function will be timed separately for each argument dict. 87 | num_iters (int): number of iterations to run. Defaults to run until 0.5s. 88 | warmup_iters (int): number of iterations used to warm up the function. 89 | 90 | Outputs: 91 | For each argument dict, print out the time (in microseconds) required 92 | to run the function along with the number of iterations used to get 93 | the timing estimate. Example output: 94 | 95 | Benchmark Avg Time(μs) Peak Time(μs) Iterations 96 | ------------------------------------------------------------------- 97 | BM_UPDATE_100 820 914 610 98 | BM_UPDATE_1000 7655 8709 66 99 | BM_UPDATE_10000 78062 81748 7 100 | ------------------------------------------------------------------- 101 | """ 102 | 103 | print("") 104 | outputs = [] 105 | for kwargs in kwargs_list: 106 | func_bm = func(**kwargs) 107 | 108 | time_func = timeit(num_iters=num_iters, 109 | warmup_iters=warmup_iters)(func_bm) 110 | 111 | ret = time_func() 112 | name = bm_name 113 | if kwargs: 114 | name += "_" + "_".join(str(v) for k, v in kwargs.items()) 115 | outputs.append([ 116 | name, 117 | str(ret["mean"] * 1000000), 118 | str(ret["max"] * 1000000), 119 | str(ret["iterations"]), 120 | ]) 121 | outputs = np.array(outputs) 122 | # Calculate column widths for metrics table. 123 | c1 = len(max(outputs[:, 0], key=len)) 124 | c2 = len(max(outputs[:, 1], key=len)) 125 | c3 = len(max(outputs[:, 2], key=len)) 126 | c4 = len(max(outputs[:, 3], key=len)) 127 | dash = "-" * 80 128 | print("{:{}s} {:>{}s} {:>{}s} {:>{}s}".format( 129 | "Benchmark", 130 | c1, 131 | "Avg Time(μs)", 132 | c2, 133 | "Peak Time(μs)", 134 | c3, 135 | "Iterations", 136 | c4, 137 | )) 138 | print(dash) 139 | for output in outputs: 140 | print("{:{}s} {:15.0f} {:15.0f} {:14d}".format( 141 | output[0], 142 | c1, 143 | float(output[1]), 144 | float(output[2]), 145 | int(output[3]), 146 | )) 147 | print(dash) 148 | -------------------------------------------------------------------------------- /dl_lib/utils/collect_env.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | import os 3 | import subprocess 4 | import sys 5 | from collections import defaultdict 6 | 7 | import numpy as np 8 | import PIL 9 | import torch 10 | import torchvision 11 | from tabulate import tabulate 12 | 13 | __all__ = ["collect_env_info"] 14 | 15 | 16 | def collect_torch_env(): 17 | try: 18 | import torch.__config__ 19 | 20 | return torch.__config__.show() 21 | except ImportError: 22 | # compatible with older versions of pytorch 23 | from torch.utils.collect_env import get_pretty_env_info 24 | 25 | return get_pretty_env_info() 26 | 27 | 28 | def get_env_module(): 29 | var_name = "dl_lib_ENV_MODULE" 30 | return var_name, os.environ.get(var_name, "") 31 | 32 | 33 | def collect_env_info(): 34 | data = [] 35 | data.append(("sys.platform", sys.platform)) 36 | data.append(("Python", sys.version.replace("\n", ""))) 37 | data.append(("Numpy", np.__version__)) 38 | try: 39 | from dl_lib import _C 40 | except ImportError: 41 | data.append(("dl_lib._C", "failed to import")) 42 | else: 43 | data.append(("dl_lib Compiler", _C.get_compiler_version())) 44 | data.append(("dl_lib CUDA Compiler", _C.get_cuda_version())) 45 | 46 | data.append(get_env_module()) 47 | data.append(("PyTorch", torch.__version__)) 48 | data.append(("PyTorch Debug Build", torch.version.debug)) 49 | try: 50 | data.append(("torchvision", torchvision.__version__)) 51 | except AttributeError: 52 | data.append(("torchvision", "unknown")) 53 | 54 | has_cuda = torch.cuda.is_available() 55 | data.append(("CUDA available", has_cuda)) 56 | if has_cuda: 57 | devices = defaultdict(list) 58 | for k in range(torch.cuda.device_count()): 59 | devices[torch.cuda.get_device_name(k)].append(str(k)) 60 | for name, devids in devices.items(): 61 | data.append(("GPU " + ",".join(devids), name)) 62 | 63 | from torch.utils.cpp_extension import CUDA_HOME 64 | 65 | data.append(("CUDA_HOME", str(CUDA_HOME))) 66 | 67 | if CUDA_HOME is not None and os.path.isdir(CUDA_HOME): 68 | try: 69 | nvcc = os.path.join(CUDA_HOME, "bin", "nvcc") 70 | nvcc = subprocess.check_output( 71 | "'{}' -V | tail -n1".format(nvcc), shell=True) 72 | nvcc = nvcc.decode("utf-8").strip() 73 | except subprocess.SubprocessError: 74 | nvcc = "Not Available" 75 | data.append(("NVCC", nvcc)) 76 | 77 | cuda_arch_list = os.environ.get("TORCH_CUDA_ARCH_LIST", None) 78 | if cuda_arch_list: 79 | data.append(("TORCH_CUDA_ARCH_LIST", cuda_arch_list)) 80 | data.append(("Pillow", PIL.__version__)) 81 | 82 | try: 83 | import cv2 84 | 85 | data.append(("cv2", cv2.__version__)) 86 | except ImportError: 87 | pass 88 | env_str = tabulate(data) + "\n" 89 | env_str += collect_torch_env() 90 | return env_str 91 | 92 | 93 | if __name__ == "__main__": 94 | print(collect_env_info()) 95 | -------------------------------------------------------------------------------- /dl_lib/utils/colormap.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | """ 3 | An awesome colormap for really neat visualizations. 4 | Copied from Detectron, and removed gray colors. 5 | """ 6 | 7 | import numpy as np 8 | 9 | __all__ = ["colormap", "random_color"] 10 | 11 | # fmt: off 12 | # RGB: 13 | _COLORS = np.array([ 14 | 0.000, 0.447, 0.741, 0.850, 0.325, 0.098, 0.929, 0.694, 0.125, 0.494, 15 | 0.184, 0.556, 0.466, 0.674, 0.188, 0.301, 0.745, 0.933, 0.635, 0.078, 16 | 0.184, 0.300, 0.300, 0.300, 0.600, 0.600, 0.600, 1.000, 0.000, 0.000, 17 | 1.000, 0.500, 0.000, 0.749, 0.749, 0.000, 0.000, 1.000, 0.000, 0.000, 18 | 0.000, 1.000, 0.667, 0.000, 1.000, 0.333, 0.333, 0.000, 0.333, 0.667, 19 | 0.000, 0.333, 1.000, 0.000, 0.667, 0.333, 0.000, 0.667, 0.667, 0.000, 20 | 0.667, 1.000, 0.000, 1.000, 0.333, 0.000, 1.000, 0.667, 0.000, 1.000, 21 | 1.000, 0.000, 0.000, 0.333, 0.500, 0.000, 0.667, 0.500, 0.000, 1.000, 22 | 0.500, 0.333, 0.000, 0.500, 0.333, 0.333, 0.500, 0.333, 0.667, 0.500, 23 | 0.333, 1.000, 0.500, 0.667, 0.000, 0.500, 0.667, 0.333, 0.500, 0.667, 24 | 0.667, 0.500, 0.667, 1.000, 0.500, 1.000, 0.000, 0.500, 1.000, 0.333, 25 | 0.500, 1.000, 0.667, 0.500, 1.000, 1.000, 0.500, 0.000, 0.333, 1.000, 26 | 0.000, 0.667, 1.000, 0.000, 1.000, 1.000, 0.333, 0.000, 1.000, 0.333, 27 | 0.333, 1.000, 0.333, 0.667, 1.000, 0.333, 1.000, 1.000, 0.667, 0.000, 28 | 1.000, 0.667, 0.333, 1.000, 0.667, 0.667, 1.000, 0.667, 1.000, 1.000, 29 | 1.000, 0.000, 1.000, 1.000, 0.333, 1.000, 1.000, 0.667, 1.000, 0.333, 30 | 0.000, 0.000, 0.500, 0.000, 0.000, 0.667, 0.000, 0.000, 0.833, 0.000, 31 | 0.000, 1.000, 0.000, 0.000, 0.000, 0.167, 0.000, 0.000, 0.333, 0.000, 32 | 0.000, 0.500, 0.000, 0.000, 0.667, 0.000, 0.000, 0.833, 0.000, 0.000, 33 | 1.000, 0.000, 0.000, 0.000, 0.167, 0.000, 0.000, 0.333, 0.000, 0.000, 34 | 0.500, 0.000, 0.000, 0.667, 0.000, 0.000, 0.833, 0.000, 0.000, 1.000, 35 | 0.000, 0.000, 0.000, 0.143, 0.143, 0.143, 0.857, 0.857, 0.857, 1.000, 36 | 1.000, 1.000 37 | ]).astype(np.float32).reshape(-1, 3) 38 | # fmt: on 39 | 40 | 41 | def colormap(rgb=False, maximum=255): 42 | """ 43 | Args: 44 | rgb (bool): whether to return RGB colors or BGR colors. 45 | maximum (int): either 255 or 1 46 | 47 | Returns: 48 | ndarray: a float32 array of Nx3 colors, in range [0, 255] or [0, 1] 49 | """ 50 | assert maximum in [255, 1], maximum 51 | c = _COLORS * maximum 52 | if not rgb: 53 | c = c[:, ::-1] 54 | return c 55 | 56 | 57 | def random_color(rgb=False, maximum=255): 58 | """ 59 | Args: 60 | rgb (bool): whether to return RGB colors or BGR colors. 61 | maximum (int): either 255 or 1 62 | 63 | Returns: 64 | ndarray: a vector of 3 numbers 65 | """ 66 | idx = np.random.randint(0, len(_COLORS)) 67 | ret = _COLORS[idx] * maximum 68 | if not rgb: 69 | ret = ret[::-1] 70 | return ret 71 | 72 | 73 | if __name__ == "__main__": 74 | import cv2 75 | 76 | size = 100 77 | H, W = 10, 10 78 | canvas = np.random.rand(H * size, W * size, 3).astype("float32") 79 | for h in range(H): 80 | for w in range(W): 81 | idx = h * W + w 82 | if idx >= len(_COLORS): 83 | break 84 | canvas[h * size:(h + 1) * size, 85 | w * size:(w + 1) * size] = _COLORS[idx] 86 | cv2.imshow("a", canvas) 87 | cv2.waitKey(0) 88 | -------------------------------------------------------------------------------- /dl_lib/utils/config_helper.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding:utf-8 -*- 3 | import collections 4 | import logging 5 | import re 6 | 7 | import six 8 | from colorama import Back, Fore, Style 9 | 10 | # python 3.8+ compatibility 11 | try: 12 | collectionsAbc = collections.abc 13 | except ImportError: 14 | collectionsAbc = collections 15 | 16 | 17 | def highlight(keyword, target, color=Fore.BLACK + Back.YELLOW): 18 | """ 19 | use given color to highlight keyword in target string 20 | 21 | Args: 22 | keyword(str): highlight string 23 | target(str): target string 24 | color(str): string represent the color, use black foreground 25 | and yellow background as default 26 | 27 | Returns: 28 | (str) target string with keyword highlighted 29 | 30 | """ 31 | return re.sub(keyword, color + r"\g<0>" + Style.RESET_ALL, target) 32 | 33 | 34 | def find_key(param_dict: dict, key: str) -> dict: 35 | """ 36 | find key in dict 37 | 38 | Args: 39 | param_dict(dict): 40 | key(str): 41 | 42 | Returns: 43 | (dict) 44 | 45 | Examples:: 46 | >>> d = dict(abc=2, ab=4, c=4) 47 | >>> find_key(d, "ab") 48 | {'abc': 2, 'ab':4} 49 | 50 | """ 51 | find_result = {} 52 | for k, v in param_dict.items(): 53 | if re.search(key, k): 54 | find_result[k] = v 55 | if isinstance(v, dict): 56 | res = find_key(v, key) 57 | if res: 58 | find_result[k] = res 59 | return find_result 60 | 61 | 62 | def diff_dict(src, dst): 63 | """ 64 | find difference between src dict and dst dict 65 | 66 | Args: 67 | src(dict): src dict 68 | dst(dict): dst dict 69 | 70 | Returns: 71 | (dict) dict contains all the difference key 72 | 73 | """ 74 | diff_result = {} 75 | for k, v in src.items(): 76 | if k not in dst: 77 | diff_result[k] = v 78 | elif dst[k] != v: 79 | if isinstance(v, dict): 80 | diff_result[k] = diff_dict(v, dst[k]) 81 | else: 82 | diff_result[k] = v 83 | return diff_result 84 | 85 | 86 | def _assert_with_logging(cond, msg): 87 | logger = logging.getLogger(__name__) 88 | if not cond: 89 | logger.debug(msg) 90 | assert cond, msg 91 | 92 | 93 | def update(d, u): 94 | for k, v in six.iteritems(u): 95 | dv = d.get(k, {}) 96 | if not isinstance(dv, collectionsAbc.Mapping): 97 | d[k] = v 98 | elif isinstance(v, collectionsAbc.Mapping): 99 | d[k] = update(dv, v) 100 | else: 101 | d[k] = v 102 | return d 103 | 104 | 105 | def _check_and_coerce_cfg_value_type(replacement, original, key, full_key): 106 | """ 107 | Checks that `replacement`, which is intended to replace `original` is of 108 | the right type. The type is correct if it matches exactly or is one of a few 109 | cases in which the type can be easily coerced. 110 | """ 111 | original_type = type(original) 112 | replacement_type = type(replacement) 113 | 114 | # The types must match (with some exceptions) 115 | if replacement_type == original_type: 116 | return replacement 117 | 118 | # Cast replacement from from_type to to_type if the replacement and original 119 | # types match from_type and to_type 120 | def conditional_cast(from_type, to_type): 121 | if replacement_type == from_type and original_type == to_type: 122 | return True, to_type(replacement) 123 | else: 124 | return False, None 125 | 126 | # Conditionally casts 127 | # list <-> tuple 128 | casts = [(tuple, list), (list, tuple)] 129 | # For py2: allow converting from str (bytes) to a unicode string 130 | try: 131 | casts.append((str, unicode)) # noqa: F821 132 | except Exception: 133 | pass 134 | 135 | for (from_type, to_type) in casts: 136 | converted, converted_value = conditional_cast(from_type, to_type) 137 | if converted: 138 | return converted_value 139 | 140 | raise ValueError( 141 | "Type mismatch ({} vs. {}) with values ({} vs. {}) for config " 142 | "key: {}".format(original_type, replacement_type, original, 143 | replacement, full_key)) 144 | -------------------------------------------------------------------------------- /dl_lib/utils/download.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 3 | 4 | import logging 5 | import os 6 | import shutil 7 | from typing import Callable, Optional 8 | from urllib import request 9 | 10 | 11 | def download(url: str, 12 | dir: str, 13 | *, 14 | filename: Optional[str] = None, 15 | progress: bool = True) -> str: 16 | """ 17 | Download a file from a given URL to a directory. If file exists, will not 18 | overwrite the existing file. 19 | 20 | Args: 21 | url (str): 22 | dir (str): the directory to download the file 23 | filename (str or None): the basename to save the file. 24 | Will use the name in the URL if not given. 25 | progress (bool): whether to use tqdm to draw a progress bar. 26 | 27 | Returns: 28 | str: the path to the downloaded file or the existing one. 29 | """ 30 | os.makedirs(dir, exist_ok=True) 31 | if filename is None: 32 | filename = url.split("/")[-1] 33 | assert len(filename), "Cannot obtain filename from url {}".format(url) 34 | fpath = os.path.join(dir, filename) 35 | logger = logging.getLogger(__name__) 36 | 37 | if os.path.isfile(fpath): 38 | logger.info("File {} exists! Skipping download.".format(filename)) 39 | return fpath 40 | 41 | tmp = fpath + ".tmp" # download to a tmp file first, to be more atomic. 42 | try: 43 | logger.info("Downloading from {} ...".format(url)) 44 | if progress: 45 | import tqdm 46 | 47 | def hook( 48 | t: tqdm.tqdm) -> Callable[[int, int, Optional[int]], None]: 49 | last_b = [0] 50 | 51 | def inner(b: int, 52 | bsize: int, 53 | tsize: Optional[int] = None) -> None: 54 | if tsize is not None: 55 | t.total = tsize 56 | t.update((b - last_b[0]) * bsize) # type: ignore 57 | last_b[0] = b 58 | 59 | return inner 60 | 61 | with tqdm.tqdm( # type: ignore 62 | unit="B", 63 | unit_scale=True, 64 | miniters=1, 65 | desc=filename, 66 | leave=True) as t: 67 | tmp, _ = request.urlretrieve(url, 68 | filename=tmp, 69 | reporthook=hook(t)) 70 | 71 | else: 72 | tmp, _ = request.urlretrieve(url, filename=tmp) 73 | statinfo = os.stat(tmp) 74 | size = statinfo.st_size 75 | if size == 0: 76 | raise IOError("Downloaded an empty file from {}!".format(url)) 77 | # download to tmp first and move to fpath, to make this function more 78 | # atomic. 79 | shutil.move(tmp, fpath) 80 | except IOError: 81 | logger.error("Failed to download {}".format(url)) 82 | raise 83 | finally: 84 | try: 85 | os.unlink(tmp) 86 | except IOError: 87 | pass 88 | 89 | logger.info("Successfully downloaded " + fpath + ". " + str(size) + 90 | " bytes.") 91 | return fpath 92 | -------------------------------------------------------------------------------- /dl_lib/utils/env.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | import importlib 3 | import importlib.util 4 | import logging 5 | import os 6 | import random 7 | import sys 8 | from datetime import datetime 9 | 10 | import numpy as np 11 | import torch 12 | 13 | __all__ = ["seed_all_rng"] 14 | 15 | 16 | def seed_all_rng(seed=None): 17 | """ 18 | Set the random seed for the RNG in torch, numpy and python. 19 | 20 | Args: 21 | seed (int): if None, will use a strong random seed. 22 | """ 23 | if seed is None: 24 | seed = (os.getpid() + int(datetime.now().strftime("%S%f")) + 25 | int.from_bytes(os.urandom(2), "big")) 26 | logger = logging.getLogger(__name__) 27 | logger.info("Using a generated random seed {}".format(seed)) 28 | np.random.seed(seed) 29 | torch.set_rng_state(torch.manual_seed(seed).get_state()) 30 | random.seed(seed) 31 | 32 | 33 | # from https://stackoverflow.com/questions/67631/how-to-import-a-module-given-the-full-path 34 | def _import_file(module_name, file_path, make_importable=False): 35 | spec = importlib.util.spec_from_file_location(module_name, file_path) 36 | module = importlib.util.module_from_spec(spec) 37 | spec.loader.exec_module(module) 38 | if make_importable: 39 | sys.modules[module_name] = module 40 | return module 41 | 42 | 43 | def _configure_libraries(): 44 | """ 45 | Configurations for some libraries. 46 | """ 47 | # An environment option to disable `import cv2` globally, 48 | # in case it leads to negative performance impact 49 | disable_cv2 = int(os.environ.get("dl_lib_DISABLE_CV2", False)) 50 | if disable_cv2: 51 | sys.modules["cv2"] = None 52 | else: 53 | # Disable opencl in opencv since its interaction with cuda often has negative effects 54 | # This envvar is supported after OpenCV 3.4.0 55 | os.environ["OPENCV_OPENCL_RUNTIME"] = "disabled" 56 | try: 57 | import cv2 58 | 59 | if int(cv2.__version__.split(".")[0]) >= 3: 60 | cv2.ocl.setUseOpenCL(False) 61 | except ImportError: 62 | pass 63 | 64 | 65 | _ENV_SETUP_DONE = False 66 | 67 | 68 | def setup_environment(): 69 | """Perform environment setup work. The default setup is a no-op, but this 70 | function allows the user to specify a Python source file or a module in 71 | the $dl_lib_ENV_MODULE environment variable, that performs 72 | custom setup work that may be necessary to their computing environment. 73 | """ 74 | global _ENV_SETUP_DONE 75 | if _ENV_SETUP_DONE: 76 | return 77 | _ENV_SETUP_DONE = True 78 | 79 | _configure_libraries() 80 | 81 | custom_module_path = os.environ.get("dl_lib_ENV_MODULE") 82 | 83 | if custom_module_path: 84 | setup_custom_environment(custom_module_path) 85 | else: 86 | # The default setup is a no-op 87 | pass 88 | 89 | 90 | def setup_custom_environment(custom_module): 91 | """ 92 | Load custom environment setup by importing a Python source file or a 93 | module, and run the setup function. 94 | """ 95 | if custom_module.endswith(".py"): 96 | module = _import_file("dl_lib.utils.env.custom_module", custom_module) 97 | else: 98 | module = importlib.import_module(custom_module) 99 | assert hasattr(module, "setup_environment") and callable( 100 | module.setup_environment), ( 101 | "Custom environment module defined in {} does not have the " 102 | "required callable attribute 'setup_environment'." 103 | ).format(custom_module) 104 | module.setup_environment() 105 | -------------------------------------------------------------------------------- /dl_lib/utils/history_buffer.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 3 | 4 | from typing import List, Tuple 5 | 6 | import numpy as np 7 | 8 | 9 | class HistoryBuffer: 10 | """ 11 | Track a series of scalar values and provide access to smoothed values over a 12 | window or the global average of the series. 13 | """ 14 | def __init__(self, max_length: int = 1000000): 15 | """ 16 | Args: 17 | max_length: maximal number of values that can be stored in the 18 | buffer. When the capacity of the buffer is exhausted, old 19 | values will be removed. 20 | """ 21 | self._max_length: int = max_length 22 | self._data: List[Tuple[float, float]] = [] # (value, iteration) pairs 23 | self._count: int = 0 24 | self._global_avg: float = 0 25 | 26 | def update(self, value: float, iteration: float = None): 27 | """ 28 | Add a new scalar value produced at certain iteration. If the length 29 | of the buffer exceeds self._max_length, the oldest element will be 30 | removed from the buffer. 31 | """ 32 | if iteration is None: 33 | iteration = self._count 34 | if len(self._data) == self._max_length: 35 | self._data.pop(0) 36 | self._data.append((value, iteration)) 37 | 38 | self._count += 1 39 | self._global_avg += (value - self._global_avg) / self._count 40 | 41 | def latest(self): 42 | """ 43 | Return the latest scalar value added to the buffer. 44 | """ 45 | return self._data[-1][0] 46 | 47 | def median(self, window_size: int): 48 | """ 49 | Return the median of the latest `window_size` values in the buffer. 50 | """ 51 | return np.median([x[0] for x in self._data[-window_size:]]) 52 | 53 | def avg(self, window_size: int): 54 | """ 55 | Return the mean of the latest `window_size` values in the buffer. 56 | """ 57 | return np.mean([x[0] for x in self._data[-window_size:]]) 58 | 59 | def global_avg(self): 60 | """ 61 | Return the mean of all the elements in the buffer. Note that this 62 | includes those getting removed due to limited buffer storage. 63 | """ 64 | return self._global_avg 65 | 66 | def values(self): 67 | """ 68 | Returns: 69 | list[(number, iteration)]: content of the current buffer. 70 | """ 71 | return self._data 72 | -------------------------------------------------------------------------------- /dl_lib/utils/imports.py: -------------------------------------------------------------------------------- 1 | import imp 2 | 3 | 4 | def dynamic_import(config_name, config_path): 5 | """ 6 | Dynamic import a project. 7 | 8 | Args: 9 | config_name (str): module name 10 | config_path (str): the dir that contains the .py with this module. 11 | 12 | Examples:: 13 | >>> root = "/data/repos/dl_lib_playground/zhubenjin/retinanet/" 14 | >>> project = root + "retinanet.res50.fpn.coco.800size.1x.mrcnn_sigmoid" 15 | >>> cfg = dynamic_import("config", project).config 16 | >>> net = dynamic_import("net", project) 17 | """ 18 | fp, pth, desc = imp.find_module(config_name, [config_path]) 19 | 20 | return imp.load_module(config_name, fp, pth, desc) 21 | -------------------------------------------------------------------------------- /dl_lib/utils/memory.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import logging 4 | from contextlib import contextmanager 5 | from functools import wraps 6 | 7 | import torch 8 | 9 | __all__ = ["retry_if_cuda_oom"] 10 | 11 | 12 | @contextmanager 13 | def _ignore_torch_cuda_oom(): 14 | """ 15 | A context which ignores CUDA OOM exception from pytorch. 16 | """ 17 | try: 18 | yield 19 | except RuntimeError as e: 20 | # NOTE: the string may change? 21 | if "CUDA out of memory. " in str(e): 22 | pass 23 | else: 24 | raise 25 | 26 | 27 | def retry_if_cuda_oom(func): 28 | r""" 29 | Makes a function retry itself after encountering 30 | pytorch's CUDA OOM error. 31 | It will first retry after calling `torch.cuda.empty_cache()`. 32 | 33 | If that still fails, it will then retry by trying to convert inputs to CPUs. 34 | In this case, it expects the function to dispatch to CPU implementation. 35 | The return values may become CPU tensors as well and it's user's 36 | responsibility to convert it back to CUDA tensor if needed. 37 | 38 | Args: 39 | func: a stateless callable that takes tensor-like objects as arguments 40 | 41 | Returns: 42 | a callable which retries `func` if OOM is encountered. 43 | 44 | Examples: 45 | 46 | .. code-block:: python 47 | 48 | output = retry_if_cuda_oom(some_torch_function)(input1, input2) 49 | # output may be on CPU even if inputs are on GPU 50 | 51 | Note: 52 | 1. When converting inputs to CPU, it will only look at each argument and check 53 | if it has `.device` and `.to` for conversion. Nested structures of tensors 54 | are not supported. 55 | 56 | 2. Since the function might be called more than once, it has to be 57 | stateless. 58 | """ 59 | def maybe_to_cpu(x): 60 | try: 61 | like_gpu_tensor = x.device.type == "cuda" and hasattr(x, "to") 62 | except AttributeError: 63 | like_gpu_tensor = False 64 | if like_gpu_tensor: 65 | return x.to(device="cpu") 66 | else: 67 | return x 68 | 69 | @wraps(func) 70 | def wrapped(*args, **kwargs): 71 | with _ignore_torch_cuda_oom(): 72 | return func(*args, **kwargs) 73 | 74 | # Clear cache and retry 75 | torch.cuda.empty_cache() 76 | with _ignore_torch_cuda_oom(): 77 | return func(*args, **kwargs) 78 | 79 | # Try on CPU. This slows down the code significantly, therefore print a notice. 80 | logger = logging.getLogger(__name__) 81 | logger.info( 82 | "Attempting to copy inputs of {} to CPU due to CUDA OOM".format( 83 | str(func))) 84 | new_args = (maybe_to_cpu(x) for x in args) 85 | new_kwargs = {k: maybe_to_cpu(v) for k, v in kwargs.items()} 86 | return func(*new_args, **new_kwargs) 87 | 88 | return wrapped 89 | -------------------------------------------------------------------------------- /dl_lib/utils/serialize.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | import cloudpickle 3 | 4 | 5 | class PicklableWrapper(object): 6 | """ 7 | Wrap an object to make it more picklable, note that it uses 8 | heavy weight serialization libraries that are slower than pickle. 9 | It's best to use it only on closures (which are usually not picklable). 10 | 11 | This is a simplified version of 12 | https://github.com/joblib/joblib/blob/master/joblib/externals/loky/cloudpickle_wrapper.py 13 | """ 14 | def __init__(self, obj): 15 | self._obj = obj 16 | 17 | def __reduce__(self): 18 | s = cloudpickle.dumps(self._obj) 19 | return cloudpickle.loads, (s, ) 20 | 21 | def __call__(self, *args, **kwargs): 22 | return self._obj(*args, **kwargs) 23 | 24 | def __getattr__(self, attr): 25 | # Ensure that the wrapped object can be used seamlessly as the previous object. 26 | if attr not in ["_obj"]: 27 | return getattr(self._obj, attr) 28 | return getattr(self, attr) 29 | -------------------------------------------------------------------------------- /dl_lib/utils/timer.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 3 | # -*- coding: utf-8 -*- 4 | 5 | from time import perf_counter 6 | from typing import Optional 7 | 8 | 9 | class Timer: 10 | """ 11 | A timer which computes the time elapsed since the start/reset of the timer. 12 | """ 13 | def __init__(self): 14 | self.reset() 15 | 16 | def reset(self): 17 | """ 18 | Reset the timer. 19 | """ 20 | self._start = perf_counter() 21 | self._paused: Optional[float] = None 22 | self._total_paused = 0 23 | 24 | def pause(self): 25 | """ 26 | Pause the timer. 27 | """ 28 | if self._paused is not None: 29 | raise ValueError("Trying to pause a Timer that is already paused!") 30 | self._paused = perf_counter() 31 | 32 | def is_paused(self) -> bool: 33 | """ 34 | Returns: 35 | bool: whether the timer is currently paused 36 | """ 37 | return self._paused is not None 38 | 39 | def resume(self): 40 | """ 41 | Resume the timer. 42 | """ 43 | if self._paused is None: 44 | raise ValueError("Trying to resume a Timer that is not paused!") 45 | self._total_paused += perf_counter() - self._paused 46 | self._paused = None 47 | 48 | def seconds(self) -> float: 49 | """ 50 | Returns: 51 | (float): the total number of seconds since the start/reset of the 52 | timer, excluding the time when the timer is paused. 53 | """ 54 | if self._paused is not None: 55 | end_time: float = self._paused # type: ignore 56 | else: 57 | end_time = perf_counter() 58 | return end_time - self._start - self._total_paused 59 | -------------------------------------------------------------------------------- /intro/introduce.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Megvii-BaseDetection/DynamicRouting/2ad0a95139b1bf21878dd222854f98974ac4930a/intro/introduce.png -------------------------------------------------------------------------------- /playground/.gitignore: -------------------------------------------------------------------------------- 1 | # output dir 2 | output 3 | instant_test_output 4 | inference_test_output 5 | 6 | 7 | # useless file type 8 | *.jpg 9 | *.png 10 | *.txt 11 | *.json 12 | *.csv 13 | *.brainmodel 14 | 15 | # compilation and distribution 16 | __pycache__ 17 | _ext 18 | *.pyc 19 | *.so 20 | *.egg-info/ 21 | *.egg 22 | build/ 23 | dist/ 24 | 25 | # pytorch/python/numpy formats 26 | *.pth 27 | *.pkl 28 | *.npy 29 | 30 | # ipython/jupyter notebooks 31 | *.ipynb 32 | **/.ipynb_checkpoints/ 33 | 34 | # Editor temporaries 35 | *.swn 36 | *.swo 37 | *.bak 38 | *.swp 39 | *~ 40 | 41 | # Pycharm editor settings 42 | .idea 43 | 44 | # project dirs 45 | 46 | # soft link 47 | **/log 48 | -------------------------------------------------------------------------------- /playground/Dynamic/Seg.Layer16.ImageNet/config.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | 3 | from dl_lib.configs.segm_config import SemanticSegmentationConfig 4 | 5 | _config_dict = dict( 6 | MODEL=dict( 7 | WEIGHTS="/data/dl_lib_modelzoo/dynamicmodel/Dynamic-L16B4-A73-convert-seg.pth", 8 | CAL_FLOPS=True, 9 | BACKBONE=dict( 10 | CELL_TYPE=['sep_conv_3x3', 'skip_connect'], 11 | LAYER_NUM=16, 12 | CELL_NUM_LIST=[2, 3, 4] + [4 for _ in range(13)], 13 | INIT_CHANNEL=64, 14 | MAX_STRIDE=32, 15 | SEPT_STEM=True, 16 | NORM="nnSyncBN", 17 | DROP_PROB=0.0, 18 | ), 19 | GATE=dict( 20 | GATE_ON=True, 21 | GATE_INIT_BIAS=1.5, 22 | SMALL_GATE=False, 23 | ), 24 | SEM_SEG_HEAD=dict( 25 | IN_FEATURES=['layer_0', 'layer_1', 'layer_2', 'layer_3'], 26 | NUM_CLASSES=19, 27 | IGNORE_VALUE=255, 28 | NORM="nnSyncBN", 29 | LOSS_WEIGHT=1.0, 30 | ), 31 | BUDGET=dict( 32 | CONSTRAIN=False, 33 | LOSS_WEIGHT=0.0, 34 | LOSS_MU=0.0, 35 | FLOPS_ALL=26300.0, 36 | UNUPDATE_RATE=0.4, 37 | WARM_UP=True, 38 | ), 39 | ), 40 | DATASETS=dict( 41 | TRAIN=("cityscapes_fine_sem_seg_train", ), 42 | TEST=("cityscapes_fine_sem_seg_val", ), 43 | ), 44 | SOLVER=dict( 45 | LR_SCHEDULER=dict( 46 | NAME="PolyLR", 47 | POLY_POWER=0.9, 48 | MAX_ITER=190000, 49 | ), 50 | OPTIMIZER=dict( 51 | BASE_LR=0.02, 52 | GATE_LR_MULTI=2.5, 53 | ), 54 | IMS_PER_BATCH=8, 55 | CHECKPOINT_PERIOD=5000, 56 | GRAD_CLIP=5.0, 57 | ), 58 | INPUT=dict( 59 | MIN_SIZE_TRAIN=(512, 768, 1024, 1280, 1536, 2048, ), 60 | MIN_SIZE_TRAIN_SAMPLING="choice", 61 | MAX_SIZE_TRAIN=4096, 62 | MIN_SIZE_TEST=1024, 63 | MAX_SIZE_TEST=2048, 64 | # FIX_SIZE_FOR_FLOPS=[768, 768], 65 | FIX_SIZE_FOR_FLOPS=[1024, 2048], 66 | CROP_PAD=dict(SIZE=[768, 768], ), 67 | ), 68 | TEST=dict( 69 | AUG=dict( 70 | ENABLED=False, 71 | MIN_SIZES=(512, 768, 1024, 1280, 1536, 2048, ), 72 | MAX_SIZE=4096, 73 | FLIP=True, 74 | ), 75 | PRECISE_BN=dict(ENABLED=True), 76 | ), 77 | OUTPUT_DIR=osp.join( 78 | '/data/Outputs/model_logs/dl_lib_playground', 79 | osp.split(osp.realpath(__file__))[0].split("playground/")[-1]), 80 | ) 81 | 82 | 83 | class DynamicSemanticSegmentationConfig(SemanticSegmentationConfig): 84 | def __init__(self): 85 | super(DynamicSemanticSegmentationConfig, self).__init__() 86 | self._register_configuration(_config_dict) 87 | 88 | 89 | config = DynamicSemanticSegmentationConfig() 90 | -------------------------------------------------------------------------------- /playground/Dynamic/Seg.Layer16.ImageNet/net.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from dl_lib.layers import ShapeSpec 4 | from dl_lib.modeling.backbone import Backbone 5 | from dl_lib.modeling.dynamic_arch.dynamic_backbone import build_dynamic_backbone 6 | from dl_lib.modeling.meta_arch.dynamic4seg import (DynamicNet4Seg, 7 | SemSegDecoderHead) 8 | 9 | 10 | def build_backbone(cfg, input_shape=None): 11 | """ 12 | Build a backbone from `cfg.MODEL.BACKBONE.NAME`. 13 | 14 | Returns: 15 | an instance of :class:`Backbone` 16 | """ 17 | if input_shape is None: 18 | input_shape = ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN), 19 | height=cfg.INPUT.FIX_SIZE_FOR_FLOPS[0], 20 | width=cfg.INPUT.FIX_SIZE_FOR_FLOPS[1]) 21 | 22 | backbone = build_dynamic_backbone(cfg, input_shape) 23 | assert isinstance(backbone, Backbone) 24 | return backbone 25 | 26 | 27 | def build_sem_seg_head(cfg, input_shape=None): 28 | return SemSegDecoderHead(cfg, input_shape) 29 | 30 | 31 | def build_model(cfg): 32 | cfg.build_backbone = build_backbone 33 | cfg.build_sem_seg_head = build_sem_seg_head 34 | model = DynamicNet4Seg(cfg) 35 | logger = logging.getLogger(__name__) 36 | logger.info("Model:\n{}".format(model)) 37 | return model 38 | -------------------------------------------------------------------------------- /playground/Dynamic/Seg.Layer16.Iter560k.SDP0_3/config.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | 3 | from dl_lib.configs.segm_config import SemanticSegmentationConfig 4 | 5 | _config_dict = dict( 6 | MODEL=dict( 7 | # WEIGHTS="", 8 | CAL_FLOPS=True, 9 | BACKBONE=dict( 10 | CELL_TYPE=['sep_conv_3x3', 'skip_connect'], 11 | LAYER_NUM=16, 12 | CELL_NUM_LIST=[2, 3, 4] + [4 for _ in range(13)], 13 | INIT_CHANNEL=64, 14 | MAX_STRIDE=32, 15 | SEPT_STEM=True, 16 | NORM="nnSyncBN", 17 | DROP_PROB=0.3, 18 | ), 19 | GATE=dict( 20 | GATE_ON=True, 21 | GATE_INIT_BIAS=1.5, 22 | SMALL_GATE=False, 23 | ), 24 | SEM_SEG_HEAD=dict( 25 | IN_FEATURES=['layer_0', 'layer_1', 'layer_2', 'layer_3'], 26 | NUM_CLASSES=19, 27 | IGNORE_VALUE=255, 28 | NORM="nnSyncBN", 29 | LOSS_WEIGHT=1.0, 30 | ), 31 | BUDGET=dict( 32 | CONSTRAIN=False, 33 | LOSS_WEIGHT=0.0, 34 | LOSS_MU=0.0, 35 | FLOPS_ALL=26300.0, 36 | UNUPDATE_RATE=0.4, 37 | WARM_UP=True, 38 | ), 39 | ), 40 | DATASETS=dict( 41 | TRAIN=("cityscapes_fine_sem_seg_train", ), 42 | TEST=("cityscapes_fine_sem_seg_val", ), 43 | ), 44 | SOLVER=dict( 45 | LR_SCHEDULER=dict( 46 | NAME="PolyLR", 47 | POLY_POWER=0.9, 48 | MAX_ITER=560000, 49 | ), 50 | OPTIMIZER=dict(BASE_LR=0.05, ), 51 | IMS_PER_BATCH=8, 52 | CHECKPOINT_PERIOD=10000, 53 | GRAD_CLIP=5.0, 54 | ), 55 | INPUT=dict( 56 | MIN_SIZE_TRAIN=(512, 768, 1024, 1280, 1536, 2048, ), 57 | MIN_SIZE_TRAIN_SAMPLING="choice", 58 | MAX_SIZE_TRAIN=4096, 59 | MIN_SIZE_TEST=1024, 60 | MAX_SIZE_TEST=2048, 61 | # FIX_SIZE_FOR_FLOPS=[768, 768], 62 | FIX_SIZE_FOR_FLOPS=[1024, 2048], 63 | CROP_PAD=dict(SIZE=[768, 768], ), 64 | ), 65 | TEST=dict( 66 | AUG=dict( 67 | ENABLED=False, 68 | MIN_SIZES=(512, 768, 1024, 1280, 1536, 2048, ), 69 | MAX_SIZE=4096, 70 | FLIP=True, 71 | ), 72 | PRECISE_BN=dict(ENABLED=True), 73 | ), 74 | OUTPUT_DIR=osp.join( 75 | '/data/Outputs/model_logs/dl_lib_playground', 76 | osp.split(osp.realpath(__file__))[0].split("playground/")[-1]), 77 | ) 78 | 79 | 80 | class DynamicSemanticSegmentationConfig(SemanticSegmentationConfig): 81 | def __init__(self): 82 | super(DynamicSemanticSegmentationConfig, self).__init__() 83 | self._register_configuration(_config_dict) 84 | 85 | 86 | config = DynamicSemanticSegmentationConfig() 87 | -------------------------------------------------------------------------------- /playground/Dynamic/Seg.Layer16.Iter560k.SDP0_3/net.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from dl_lib.layers import ShapeSpec 4 | from dl_lib.modeling.backbone import Backbone 5 | from dl_lib.modeling.dynamic_arch.dynamic_backbone import build_dynamic_backbone 6 | from dl_lib.modeling.meta_arch.dynamic4seg import (DynamicNet4Seg, 7 | SemSegDecoderHead) 8 | 9 | 10 | def build_backbone(cfg, input_shape=None): 11 | """ 12 | Build a backbone from `cfg.MODEL.BACKBONE.NAME`. 13 | 14 | Returns: 15 | an instance of :class:`Backbone` 16 | """ 17 | if input_shape is None: 18 | input_shape = ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN), 19 | height=cfg.INPUT.FIX_SIZE_FOR_FLOPS[0], 20 | width=cfg.INPUT.FIX_SIZE_FOR_FLOPS[1]) 21 | 22 | backbone = build_dynamic_backbone(cfg, input_shape) 23 | assert isinstance(backbone, Backbone) 24 | return backbone 25 | 26 | 27 | def build_sem_seg_head(cfg, input_shape=None): 28 | return SemSegDecoderHead(cfg, input_shape) 29 | 30 | 31 | def build_model(cfg): 32 | cfg.build_backbone = build_backbone 33 | cfg.build_sem_seg_head = build_sem_seg_head 34 | model = DynamicNet4Seg(cfg) 35 | logger = logging.getLogger(__name__) 36 | logger.info("Model:\n{}".format(model)) 37 | return model 38 | -------------------------------------------------------------------------------- /playground/Dynamic/Seg.Layer16.SmallGate.Dynamic_A/config.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | 3 | from dl_lib.configs.segm_config import SemanticSegmentationConfig 4 | 5 | _config_dict = dict( 6 | MODEL=dict( 7 | WEIGHTS="", 8 | CAL_FLOPS=True, 9 | BACKBONE=dict( 10 | CELL_TYPE=['sep_conv_3x3', 'skip_connect'], 11 | LAYER_NUM=16, 12 | CELL_NUM_LIST=[2, 3, 4] + [4 for _ in range(13)], 13 | INIT_CHANNEL=64, 14 | MAX_STRIDE=32, 15 | SEPT_STEM=True, 16 | NORM="nnSyncBN", 17 | DROP_PROB=0.0, 18 | ), 19 | GATE=dict( 20 | GATE_ON=True, 21 | GATE_INIT_BIAS=1.5, 22 | SMALL_GATE=True, 23 | ), 24 | SEM_SEG_HEAD=dict( 25 | IN_FEATURES=['layer_0', 'layer_1', 'layer_2', 'layer_3'], 26 | NUM_CLASSES=19, 27 | IGNORE_VALUE=255, 28 | NORM="nnSyncBN", 29 | LOSS_WEIGHT=1.0, 30 | ), 31 | BUDGET=dict( 32 | CONSTRAIN=True, 33 | LOSS_WEIGHT=0.8, 34 | LOSS_MU=0.1, 35 | FLOPS_ALL=26300.0, 36 | UNUPDATE_RATE=0.4, 37 | WARM_UP=True, 38 | ), 39 | ), 40 | DATASETS=dict( 41 | TRAIN=("cityscapes_fine_sem_seg_train", ), 42 | TEST=("cityscapes_fine_sem_seg_val", ), 43 | ), 44 | SOLVER=dict( 45 | LR_SCHEDULER=dict( 46 | NAME="PolyLR", 47 | POLY_POWER=0.9, 48 | MAX_ITER=190000, 49 | ), 50 | OPTIMIZER=dict(BASE_LR=0.05, ), 51 | IMS_PER_BATCH=8, 52 | CHECKPOINT_PERIOD=5000, 53 | GRAD_CLIP=5.0, 54 | ), 55 | INPUT=dict( 56 | MIN_SIZE_TRAIN=(512, 768, 1024, 1280, 1536, 2048, ), 57 | MIN_SIZE_TRAIN_SAMPLING="choice", 58 | MAX_SIZE_TRAIN=4096, 59 | MIN_SIZE_TEST=1024, 60 | MAX_SIZE_TEST=2048, 61 | # FIX_SIZE_FOR_FLOPS=[768, 768], 62 | FIX_SIZE_FOR_FLOPS=[1024, 2048], 63 | CROP_PAD=dict(SIZE=[768, 768], ), 64 | ), 65 | OUTPUT_DIR=osp.join( 66 | '/data/Outputs/model_logs/dl_lib_playground', 67 | osp.split(osp.realpath(__file__))[0].split("playground/")[-1]), 68 | ) 69 | 70 | 71 | class DynamicSemanticSegmentationConfig(SemanticSegmentationConfig): 72 | def __init__(self): 73 | super(DynamicSemanticSegmentationConfig, self).__init__() 74 | self._register_configuration(_config_dict) 75 | 76 | 77 | config = DynamicSemanticSegmentationConfig() 78 | -------------------------------------------------------------------------------- /playground/Dynamic/Seg.Layer16.SmallGate.Dynamic_A/net.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from dl_lib.layers import ShapeSpec 4 | from dl_lib.modeling.backbone import Backbone 5 | from dl_lib.modeling.dynamic_arch.dynamic_backbone import build_dynamic_backbone 6 | from dl_lib.modeling.meta_arch.dynamic4seg import (DynamicNet4Seg, 7 | SemSegDecoderHead) 8 | 9 | 10 | def build_backbone(cfg, input_shape=None): 11 | """ 12 | Build a backbone from `cfg.MODEL.BACKBONE.NAME`. 13 | 14 | Returns: 15 | an instance of :class:`Backbone` 16 | """ 17 | if input_shape is None: 18 | input_shape = ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN), 19 | height=cfg.INPUT.FIX_SIZE_FOR_FLOPS[0], 20 | width=cfg.INPUT.FIX_SIZE_FOR_FLOPS[1]) 21 | 22 | backbone = build_dynamic_backbone(cfg, input_shape) 23 | assert isinstance(backbone, Backbone) 24 | return backbone 25 | 26 | 27 | def build_sem_seg_head(cfg, input_shape=None): 28 | return SemSegDecoderHead(cfg, input_shape) 29 | 30 | 31 | def build_model(cfg): 32 | cfg.build_backbone = build_backbone 33 | cfg.build_sem_seg_head = build_sem_seg_head 34 | model = DynamicNet4Seg(cfg) 35 | logger = logging.getLogger(__name__) 36 | logger.info("Model:\n{}".format(model)) 37 | return model 38 | -------------------------------------------------------------------------------- /playground/Dynamic/Seg.Layer16.SmallGate.Dynamic_B/config.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | 3 | from dl_lib.configs.segm_config import SemanticSegmentationConfig 4 | 5 | _config_dict = dict( 6 | MODEL=dict( 7 | WEIGHTS="", 8 | CAL_FLOPS=True, 9 | BACKBONE=dict( 10 | CELL_TYPE=['sep_conv_3x3', 'skip_connect'], 11 | LAYER_NUM=16, 12 | CELL_NUM_LIST=[2, 3, 4] + [4 for _ in range(13)], 13 | INIT_CHANNEL=64, 14 | MAX_STRIDE=32, 15 | SEPT_STEM=True, 16 | NORM="nnSyncBN", 17 | DROP_PROB=0.0, 18 | ), 19 | GATE=dict( 20 | GATE_ON=True, 21 | GATE_INIT_BIAS=1.5, 22 | SMALL_GATE=True, 23 | ), 24 | SEM_SEG_HEAD=dict( 25 | IN_FEATURES=['layer_0', 'layer_1', 'layer_2', 'layer_3'], 26 | NUM_CLASSES=19, 27 | IGNORE_VALUE=255, 28 | NORM="nnSyncBN", 29 | LOSS_WEIGHT=1.0, 30 | ), 31 | BUDGET=dict( 32 | CONSTRAIN=True, 33 | LOSS_WEIGHT=0.5, 34 | LOSS_MU=0.1, 35 | FLOPS_ALL=26300.0, 36 | UNUPDATE_RATE=0.4, 37 | WARM_UP=True, 38 | ), 39 | ), 40 | DATASETS=dict( 41 | TRAIN=("cityscapes_fine_sem_seg_train", ), 42 | TEST=("cityscapes_fine_sem_seg_val", ), 43 | ), 44 | SOLVER=dict( 45 | LR_SCHEDULER=dict( 46 | NAME="PolyLR", 47 | POLY_POWER=0.9, 48 | MAX_ITER=190000, 49 | ), 50 | OPTIMIZER=dict(BASE_LR=0.05, ), 51 | IMS_PER_BATCH=8, 52 | CHECKPOINT_PERIOD=5000, 53 | GRAD_CLIP=5.0, 54 | ), 55 | INPUT=dict( 56 | MIN_SIZE_TRAIN=(512, 768, 1024, 1280, 1536, 2048, ), 57 | MIN_SIZE_TRAIN_SAMPLING="choice", 58 | MAX_SIZE_TRAIN=4096, 59 | MIN_SIZE_TEST=1024, 60 | MAX_SIZE_TEST=2048, 61 | # FIX_SIZE_FOR_FLOPS=[768,768], 62 | FIX_SIZE_FOR_FLOPS=[1024, 2048], 63 | CROP_PAD=dict(SIZE=[768, 768], ), 64 | ), 65 | OUTPUT_DIR=osp.join( 66 | '/data/Outputs/model_logs/dl_lib_playground', 67 | osp.split(osp.realpath(__file__))[0].split("playground/")[-1]), 68 | ) 69 | 70 | 71 | class DynamicSemanticSegmentationConfig(SemanticSegmentationConfig): 72 | def __init__(self): 73 | super(DynamicSemanticSegmentationConfig, self).__init__() 74 | self._register_configuration(_config_dict) 75 | 76 | 77 | config = DynamicSemanticSegmentationConfig() 78 | -------------------------------------------------------------------------------- /playground/Dynamic/Seg.Layer16.SmallGate.Dynamic_B/net.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from dl_lib.layers import ShapeSpec 4 | from dl_lib.modeling.backbone import Backbone 5 | from dl_lib.modeling.dynamic_arch.dynamic_backbone import build_dynamic_backbone 6 | from dl_lib.modeling.meta_arch.dynamic4seg import (DynamicNet4Seg, 7 | SemSegDecoderHead) 8 | 9 | 10 | def build_backbone(cfg, input_shape=None): 11 | """ 12 | Build a backbone from `cfg.MODEL.BACKBONE.NAME`. 13 | 14 | Returns: 15 | an instance of :class:`Backbone` 16 | """ 17 | if input_shape is None: 18 | input_shape = ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN), 19 | height=cfg.INPUT.FIX_SIZE_FOR_FLOPS[0], 20 | width=cfg.INPUT.FIX_SIZE_FOR_FLOPS[1]) 21 | 22 | backbone = build_dynamic_backbone(cfg, input_shape) 23 | assert isinstance(backbone, Backbone) 24 | return backbone 25 | 26 | 27 | def build_sem_seg_head(cfg, input_shape=None): 28 | return SemSegDecoderHead(cfg, input_shape) 29 | 30 | 31 | def build_model(cfg): 32 | cfg.build_backbone = build_backbone 33 | cfg.build_sem_seg_head = build_sem_seg_head 34 | model = DynamicNet4Seg(cfg) 35 | logger = logging.getLogger(__name__) 36 | logger.info("Model:\n{}".format(model)) 37 | return model 38 | -------------------------------------------------------------------------------- /playground/Dynamic/Seg.Layer16.SmallGate.Dynamic_C/config.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | 3 | from dl_lib.configs.segm_config import SemanticSegmentationConfig 4 | 5 | _config_dict = dict( 6 | MODEL=dict( 7 | # WEIGHTS="", 8 | CAL_FLOPS=True, 9 | BACKBONE=dict( 10 | CELL_TYPE=['sep_conv_3x3', 'skip_connect'], 11 | LAYER_NUM=16, 12 | CELL_NUM_LIST=[2, 3, 4] + [4 for _ in range(13)], 13 | INIT_CHANNEL=64, 14 | MAX_STRIDE=32, 15 | SEPT_STEM=True, 16 | NORM="nnSyncBN", 17 | DROP_PROB=0.0, 18 | ), 19 | GATE=dict( 20 | GATE_ON=True, 21 | GATE_INIT_BIAS=1.5, 22 | SMALL_GATE=True, 23 | ), 24 | SEM_SEG_HEAD=dict( 25 | IN_FEATURES=['layer_0', 'layer_1', 'layer_2', 'layer_3'], 26 | NUM_CLASSES=19, 27 | IGNORE_VALUE=255, 28 | NORM="nnSyncBN", 29 | LOSS_WEIGHT=1.0, 30 | ), 31 | BUDGET=dict( 32 | CONSTRAIN=True, 33 | LOSS_WEIGHT=0.5, 34 | LOSS_MU=0.2, 35 | FLOPS_ALL=26300.0, 36 | UNUPDATE_RATE=0.4, 37 | WARM_UP=True, 38 | ), 39 | ), 40 | DATASETS=dict( 41 | TRAIN=("cityscapes_fine_sem_seg_train", ), 42 | TEST=("cityscapes_fine_sem_seg_val", ), 43 | ), 44 | SOLVER=dict( 45 | LR_SCHEDULER=dict( 46 | NAME="PolyLR", 47 | POLY_POWER=0.9, 48 | MAX_ITER=190000, 49 | ), 50 | OPTIMIZER=dict(BASE_LR=0.05, ), 51 | IMS_PER_BATCH=8, 52 | CHECKPOINT_PERIOD=5000, 53 | GRAD_CLIP=5.0, 54 | ), 55 | INPUT=dict( 56 | MIN_SIZE_TRAIN=(512, 768, 1024, 1280, 1536, 2048, ), 57 | MIN_SIZE_TRAIN_SAMPLING="choice", 58 | MAX_SIZE_TRAIN=4096, 59 | MIN_SIZE_TEST=1024, 60 | MAX_SIZE_TEST=2048, 61 | # FIX_SIZE_FOR_FLOPS=[768, 768], 62 | FIX_SIZE_FOR_FLOPS=[1024, 2048], 63 | CROP_PAD=dict(SIZE=[768, 768], ), 64 | ), 65 | OUTPUT_DIR=osp.join( 66 | '/data/Outputs/model_logs/dl_lib_playground', 67 | osp.split(osp.realpath(__file__))[0].split("playground/")[-1]), 68 | ) 69 | 70 | 71 | class DynamicSemanticSegmentationConfig(SemanticSegmentationConfig): 72 | def __init__(self): 73 | super(DynamicSemanticSegmentationConfig, self).__init__() 74 | self._register_configuration(_config_dict) 75 | 76 | 77 | config = DynamicSemanticSegmentationConfig() 78 | -------------------------------------------------------------------------------- /playground/Dynamic/Seg.Layer16.SmallGate.Dynamic_C/net.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from dl_lib.layers import ShapeSpec 4 | from dl_lib.modeling.backbone import Backbone 5 | from dl_lib.modeling.dynamic_arch.dynamic_backbone import build_dynamic_backbone 6 | from dl_lib.modeling.meta_arch.dynamic4seg import (DynamicNet4Seg, 7 | SemSegDecoderHead) 8 | 9 | 10 | def build_backbone(cfg, input_shape=None): 11 | """ 12 | Build a backbone from `cfg.MODEL.BACKBONE.NAME`. 13 | 14 | Returns: 15 | an instance of :class:`Backbone` 16 | """ 17 | if input_shape is None: 18 | input_shape = ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN), 19 | height=cfg.INPUT.FIX_SIZE_FOR_FLOPS[0], 20 | width=cfg.INPUT.FIX_SIZE_FOR_FLOPS[1]) 21 | 22 | backbone = build_dynamic_backbone(cfg, input_shape) 23 | assert isinstance(backbone, Backbone) 24 | return backbone 25 | 26 | 27 | def build_sem_seg_head(cfg, input_shape=None): 28 | return SemSegDecoderHead(cfg, input_shape) 29 | 30 | 31 | def build_model(cfg): 32 | cfg.build_backbone = build_backbone 33 | cfg.build_sem_seg_head = build_sem_seg_head 34 | model = DynamicNet4Seg(cfg) 35 | logger = logging.getLogger(__name__) 36 | logger.info("Model:\n{}".format(model)) 37 | return model 38 | -------------------------------------------------------------------------------- /playground/Dynamic/Seg.Layer16/config.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | 3 | from dl_lib.configs.segm_config import SemanticSegmentationConfig 4 | 5 | _config_dict = dict( 6 | MODEL=dict( 7 | WEIGHTS="", 8 | CAL_FLOPS=True, 9 | BACKBONE=dict( 10 | CELL_TYPE=['sep_conv_3x3', 'skip_connect'], 11 | LAYER_NUM=16, 12 | CELL_NUM_LIST=[2, 3, 4] + [4 for _ in range(13)], 13 | INIT_CHANNEL=64, 14 | MAX_STRIDE=32, 15 | SEPT_STEM=True, 16 | NORM="nnSyncBN", 17 | DROP_PROB=0.0, 18 | ), 19 | GATE=dict( 20 | GATE_ON=True, 21 | GATE_INIT_BIAS=1.5, 22 | SMALL_GATE=False, 23 | ), 24 | SEM_SEG_HEAD=dict( 25 | IN_FEATURES=['layer_0', 'layer_1', 'layer_2', 'layer_3'], 26 | NUM_CLASSES=19, 27 | IGNORE_VALUE=255, 28 | NORM="nnSyncBN", 29 | LOSS_WEIGHT=1.0, 30 | ), 31 | BUDGET=dict( 32 | CONSTRAIN=False, 33 | LOSS_WEIGHT=0.0, 34 | LOSS_MU=0.0, 35 | FLOPS_ALL=26300.0, 36 | UNUPDATE_RATE=0.4, 37 | WARM_UP=True, 38 | ), 39 | ), 40 | DATASETS=dict( 41 | TRAIN=("cityscapes_fine_sem_seg_train", ), 42 | TEST=("cityscapes_fine_sem_seg_val", ), 43 | ), 44 | SOLVER=dict( 45 | LR_SCHEDULER=dict( 46 | NAME="PolyLR", 47 | POLY_POWER=0.9, 48 | MAX_ITER=190000, 49 | ), 50 | OPTIMIZER=dict(BASE_LR=0.05, ), 51 | IMS_PER_BATCH=8, 52 | CHECKPOINT_PERIOD=5000, 53 | GRAD_CLIP=5.0, 54 | ), 55 | INPUT=dict( 56 | MIN_SIZE_TRAIN=(512, 768, 1024, 1280, 1536, 2048, ), 57 | MIN_SIZE_TRAIN_SAMPLING="choice", 58 | MAX_SIZE_TRAIN=4096, 59 | MIN_SIZE_TEST=1024, 60 | MAX_SIZE_TEST=2048, 61 | # FIX_SIZE_FOR_FLOPS=[768, 768], 62 | FIX_SIZE_FOR_FLOPS=[1024, 2048], 63 | CROP_PAD=dict(SIZE=[768, 768], ), 64 | ), 65 | TEST=dict( 66 | AUG=dict( 67 | ENABLED=False, 68 | MIN_SIZES=(512, 768, 1024, 1280, 1536, 2048, ), 69 | MAX_SIZE=4096, 70 | FLIP=True, 71 | ), 72 | PRECISE_BN=dict(ENABLED=True), 73 | ), 74 | OUTPUT_DIR=osp.join( 75 | '/data/Outputs/model_logs/dl_lib_playground', 76 | osp.split(osp.realpath(__file__))[0].split("playground/")[-1]), 77 | ) 78 | 79 | 80 | class DynamicSemanticSegmentationConfig(SemanticSegmentationConfig): 81 | def __init__(self): 82 | super(DynamicSemanticSegmentationConfig, self).__init__() 83 | self._register_configuration(_config_dict) 84 | 85 | 86 | config = DynamicSemanticSegmentationConfig() 87 | -------------------------------------------------------------------------------- /playground/Dynamic/Seg.Layer16/net.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from dl_lib.layers import ShapeSpec 4 | from dl_lib.modeling.backbone import Backbone 5 | from dl_lib.modeling.dynamic_arch.dynamic_backbone import build_dynamic_backbone 6 | from dl_lib.modeling.meta_arch.dynamic4seg import (DynamicNet4Seg, 7 | SemSegDecoderHead) 8 | 9 | 10 | def build_backbone(cfg, input_shape=None): 11 | """ 12 | Build a backbone from `cfg.MODEL.BACKBONE.NAME`. 13 | 14 | Returns: 15 | an instance of :class:`Backbone` 16 | """ 17 | if input_shape is None: 18 | input_shape = ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN), 19 | height=cfg.INPUT.FIX_SIZE_FOR_FLOPS[0], 20 | width=cfg.INPUT.FIX_SIZE_FOR_FLOPS[1]) 21 | 22 | backbone = build_dynamic_backbone(cfg, input_shape) 23 | assert isinstance(backbone, Backbone) 24 | return backbone 25 | 26 | 27 | def build_sem_seg_head(cfg, input_shape=None): 28 | return SemSegDecoderHead(cfg, input_shape) 29 | 30 | 31 | def build_model(cfg): 32 | cfg.build_backbone = build_backbone 33 | cfg.build_sem_seg_head = build_sem_seg_head 34 | model = DynamicNet4Seg(cfg) 35 | logger = logging.getLogger(__name__) 36 | logger.info("Model:\n{}".format(model)) 37 | return model 38 | -------------------------------------------------------------------------------- /playground/Dynamic/Seg.Layer33.ImageNet/config.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | 3 | from dl_lib.configs.segm_config import SemanticSegmentationConfig 4 | 5 | _config_dict = dict( 6 | MODEL=dict( 7 | WEIGHTS="/data/dl_lib_modelzoo/dynamicmodel/Dynamic-L33B4-A74-convert-seg.pth", 8 | CAL_FLOPS=True, 9 | BACKBONE=dict( 10 | CELL_TYPE=['sep_conv_3x3', 'skip_connect'], 11 | LAYER_NUM=33, 12 | CELL_NUM_LIST=[2, 3, 4] + [4 for _ in range(30)], 13 | INIT_CHANNEL=64, 14 | MAX_STRIDE=32, 15 | SEPT_STEM=True, 16 | NORM="nnSyncBN", 17 | DROP_PROB=0.0, 18 | ), 19 | GATE=dict( 20 | GATE_ON=True, 21 | GATE_INIT_BIAS=1.5, 22 | SMALL_GATE=False, 23 | ), 24 | SEM_SEG_HEAD=dict( 25 | IN_FEATURES=['layer_0', 'layer_1', 'layer_2', 'layer_3'], 26 | NUM_CLASSES=19, 27 | IGNORE_VALUE=255, 28 | NORM="nnSyncBN", 29 | LOSS_WEIGHT=1.0, 30 | ), 31 | BUDGET=dict( 32 | CONSTRAIN=False, 33 | LOSS_WEIGHT=0.0, 34 | LOSS_MU=0.0, 35 | FLOPS_ALL=26300.0, 36 | UNUPDATE_RATE=0.4, 37 | WARM_UP=True, 38 | ), 39 | ), 40 | DATASETS=dict( 41 | TRAIN=("cityscapes_fine_sem_seg_train", ), 42 | TEST=("cityscapes_fine_sem_seg_val", ), 43 | ), 44 | SOLVER=dict( 45 | LR_SCHEDULER=dict( 46 | NAME="PolyLR", 47 | POLY_POWER=0.9, 48 | MAX_ITER=190000, 49 | ), 50 | OPTIMIZER=dict( 51 | BASE_LR=0.02, 52 | GATE_LR_MULTI=2.5, 53 | ), 54 | IMS_PER_BATCH=8, 55 | CHECKPOINT_PERIOD=5000, 56 | GRAD_CLIP=5.0, 57 | ), 58 | INPUT=dict( 59 | MIN_SIZE_TRAIN=(512, 768, 1024, 1280, 1536, 2048, ), 60 | MIN_SIZE_TRAIN_SAMPLING="choice", 61 | MAX_SIZE_TRAIN=4096, 62 | MIN_SIZE_TEST=1024, 63 | MAX_SIZE_TEST=2048, 64 | # FIX_SIZE_FOR_FLOPS=[768,768], 65 | FIX_SIZE_FOR_FLOPS=[1024, 2048], 66 | CROP_PAD=dict(SIZE=[768, 768], ), 67 | ), 68 | TEST=dict( 69 | AUG=dict( 70 | ENABLED=False, 71 | MIN_SIZES=(512, 768, 1024, 1280, 1536, 2048, ), 72 | MAX_SIZE=4096, 73 | FLIP=True, 74 | ), 75 | PRECISE_BN=dict(ENABLED=True), 76 | ), 77 | OUTPUT_DIR=osp.join( 78 | '/data/Outputs/model_logs/dl_lib_playground', 79 | osp.split(osp.realpath(__file__))[0].split("playground/")[-1]), 80 | ) 81 | 82 | 83 | class DynamicSemanticSegmentationConfig(SemanticSegmentationConfig): 84 | def __init__(self): 85 | super(DynamicSemanticSegmentationConfig, self).__init__() 86 | self._register_configuration(_config_dict) 87 | 88 | 89 | config = DynamicSemanticSegmentationConfig() 90 | -------------------------------------------------------------------------------- /playground/Dynamic/Seg.Layer33.ImageNet/net.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from dl_lib.layers import ShapeSpec 4 | from dl_lib.modeling.backbone import Backbone 5 | from dl_lib.modeling.dynamic_arch.dynamic_backbone import build_dynamic_backbone 6 | from dl_lib.modeling.meta_arch.dynamic4seg import (DynamicNet4Seg, 7 | SemSegDecoderHead) 8 | 9 | 10 | def build_backbone(cfg, input_shape=None): 11 | """ 12 | Build a backbone from `cfg.MODEL.BACKBONE.NAME`. 13 | 14 | Returns: 15 | an instance of :class:`Backbone` 16 | """ 17 | if input_shape is None: 18 | input_shape = ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN), 19 | height=cfg.INPUT.FIX_SIZE_FOR_FLOPS[0], 20 | width=cfg.INPUT.FIX_SIZE_FOR_FLOPS[1]) 21 | 22 | backbone = build_dynamic_backbone(cfg, input_shape) 23 | assert isinstance(backbone, Backbone) 24 | return backbone 25 | 26 | 27 | def build_sem_seg_head(cfg, input_shape=None): 28 | return SemSegDecoderHead(cfg, input_shape) 29 | 30 | 31 | def build_model(cfg): 32 | cfg.build_backbone = build_backbone 33 | cfg.build_sem_seg_head = build_sem_seg_head 34 | model = DynamicNet4Seg(cfg) 35 | logger = logging.getLogger(__name__) 36 | logger.info("Model:\n{}".format(model)) 37 | return model 38 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 3 | 4 | import glob 5 | import os 6 | 7 | import torch 8 | from setuptools import find_packages, setup 9 | from torch.utils.cpp_extension import CUDA_HOME, CppExtension, CUDAExtension 10 | 11 | torch_ver = [int(x) for x in torch.__version__.split(".")[:2]] 12 | assert torch_ver >= [1, 3], "Requires PyTorch >= 1.3" 13 | 14 | 15 | def get_extensions(): 16 | this_dir = os.path.dirname(os.path.abspath(__file__)) 17 | extensions_dir = os.path.join(this_dir, "dl_lib", "layers") 18 | 19 | main_source = os.path.join(extensions_dir, "vision.cpp") 20 | sources = glob.glob(os.path.join(extensions_dir, "**", "*.cpp")) 21 | source_cuda = glob.glob(os.path.join( 22 | extensions_dir, "**", "*.cu")) + glob.glob( 23 | os.path.join(extensions_dir, "*.cu")) 24 | 25 | sources = [main_source] + sources 26 | extension = CppExtension 27 | 28 | extra_compile_args = {"cxx": []} 29 | define_macros = [] 30 | 31 | if (torch.cuda.is_available() and CUDA_HOME is not None) or os.getenv( 32 | "FORCE_CUDA", "0") == "1": 33 | extension = CUDAExtension 34 | sources += source_cuda 35 | define_macros += [("WITH_CUDA", None)] 36 | extra_compile_args["nvcc"] = [ 37 | "-DCUDA_HAS_FP16=1", 38 | "-D__CUDA_NO_HALF_OPERATORS__", 39 | "-D__CUDA_NO_HALF_CONVERSIONS__", 40 | "-D__CUDA_NO_HALF2_OPERATORS__", 41 | ] 42 | 43 | # It's better if pytorch can do this by default .. 44 | CC = os.environ.get("CC", None) 45 | if CC is not None: 46 | extra_compile_args["nvcc"].append("-ccbin={}".format(CC)) 47 | 48 | include_dirs = [extensions_dir] 49 | 50 | ext_modules = [ 51 | extension( 52 | "dl_lib._C", 53 | sources, 54 | include_dirs=include_dirs, 55 | define_macros=define_macros, 56 | extra_compile_args=extra_compile_args, 57 | ) 58 | ] 59 | 60 | return ext_modules 61 | 62 | 63 | cur_dir = os.getcwd() 64 | with open("tools/dl_train", "w") as dl_lib_train: 65 | head = f"#!/bin/bash\n\nexport OMP_NUM_THREADS=1\n" 66 | dl_lib_train.write( 67 | head + f"python3 {os.path.join(cur_dir, 'tools', 'train_net.py')} $@") 68 | with open("tools/dl_test", "w") as dl_lib_test: 69 | dl_lib_test.write( 70 | head + f"python3 {os.path.join(cur_dir, 'tools', 'test_net.py')} $@") 71 | 72 | setup( 73 | name="dl_lib", 74 | version="0.1", 75 | author="Yanwei Li", 76 | url="https://github.com/yanwei-li/Dynamic-Routing", 77 | description="Deep Learning lib (dl_lib) is a " 78 | "platform for object detection based on Detectron2.", 79 | packages=find_packages(exclude=("configs", "tests")), 80 | python_requires=">=3.6", 81 | install_requires=[ 82 | "termcolor>=1.1", 83 | "Pillow>=6.0", 84 | "tabulate", 85 | "cloudpickle", 86 | "matplotlib", 87 | "tqdm>4.29.0", 88 | "Shapely", 89 | "tensorboard", 90 | "portalocker", 91 | "pycocotools", 92 | "easydict", 93 | "imagesize", 94 | ], 95 | extras_require={"all": ["shapely", "psutil"]}, 96 | ext_modules=get_extensions(), 97 | cmdclass={"build_ext": torch.utils.cpp_extension.BuildExtension}, 98 | scripts=["tools/dl_train", "tools/dl_test"], 99 | ) 100 | -------------------------------------------------------------------------------- /tools/benchmark.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | """ 3 | A script to benchmark builtin models. 4 | 5 | Note: this script has an extra dependency of psutil. 6 | """ 7 | 8 | import itertools 9 | import logging 10 | 11 | import torch 12 | import tqdm 13 | from torch.nn.parallel import DistributedDataParallel 14 | 15 | import psutil 16 | from dl_lib.checkpoint import DetectionCheckpointer 17 | from dl_lib.config import get_cfg 18 | from dl_lib.data import (DatasetFromList, build_detection_test_loader, 19 | build_detection_train_loader) 20 | from dl_lib.engine import SimpleTrainer, default_argument_parser, hooks, launch 21 | from dl_lib.modeling import build_model 22 | from dl_lib.solver import build_optimizer 23 | from dl_lib.utils import comm 24 | from dl_lib.utils.events import CommonMetricPrinter 25 | from dl_lib.utils.logger import setup_logger 26 | from dl_lib.utils.timer import Timer 27 | 28 | logger = logging.getLogger("dl_lib") 29 | 30 | 31 | def setup(args): 32 | cfg = get_cfg() 33 | cfg.merge_from_file(args.config_file) 34 | cfg.SOLVER.BASE_LR = 0.001 # Avoid NaNs. Not useful in this script anyway. 35 | cfg.merge_from_list(args.opts) 36 | cfg.freeze() 37 | setup_logger(distributed_rank=comm.get_rank()) 38 | return cfg 39 | 40 | 41 | def benchmark_data(args): 42 | cfg = setup(args) 43 | 44 | dataloader = build_detection_train_loader(cfg) 45 | 46 | timer = Timer() 47 | itr = iter(dataloader) 48 | for i in range(10): # warmup 49 | next(itr) 50 | if i == 0: 51 | startup_time = timer.seconds() 52 | timer = Timer() 53 | max_iter = 1000 54 | for _ in tqdm.trange(max_iter): 55 | next(itr) 56 | logger.info("{} iters ({} images) in {} seconds.".format( 57 | max_iter, max_iter * cfg.SOLVER.IMS_PER_BATCH, timer.seconds())) 58 | logger.info("Startup time: {} seconds".format(startup_time)) 59 | vram = psutil.virtual_memory() 60 | logger.info("RAM Usage: {:.2f}/{:.2f} GB".format( 61 | (vram.total - vram.available) / 1024**3, vram.total / 1024**3)) 62 | 63 | 64 | def benchmark_train(args): 65 | cfg = setup(args) 66 | model = build_model(cfg) 67 | logger.info("Model:\n{}".format(model)) 68 | if comm.get_world_size() > 1: 69 | model = DistributedDataParallel(model, 70 | device_ids=[comm.get_local_rank()], 71 | broadcast_buffers=False) 72 | optimizer = build_optimizer(cfg, model) 73 | checkpointer = DetectionCheckpointer(model, optimizer=optimizer) 74 | checkpointer.load(cfg.MODEL.WEIGHTS) 75 | 76 | cfg.defrost() 77 | cfg.DATALOADER.NUM_WORKERS = 0 78 | data_loader = build_detection_train_loader(cfg) 79 | dummy_data = list(itertools.islice(data_loader, 100)) 80 | 81 | def f(): 82 | while True: 83 | yield from DatasetFromList(dummy_data, copy=False) 84 | 85 | max_iter = 400 86 | trainer = SimpleTrainer(model, f(), optimizer) 87 | trainer.register_hooks([ 88 | hooks.IterationTimer(), 89 | hooks.PeriodicWriter([CommonMetricPrinter(max_iter)]) 90 | ]) 91 | trainer.train(1, max_iter) 92 | 93 | 94 | @torch.no_grad() 95 | def benchmark_eval(args): 96 | cfg = setup(args) 97 | model = build_model(cfg) 98 | model.eval() 99 | logger.info("Model:\n{}".format(model)) 100 | DetectionCheckpointer(model).load(cfg.MODEL.WEIGHTS) 101 | 102 | cfg.defrost() 103 | cfg.DATALOADER.NUM_WORKERS = 0 104 | data_loader = build_detection_test_loader(cfg, cfg.DATASETS.TEST[0]) 105 | dummy_data = list(itertools.islice(data_loader, 100)) 106 | 107 | def f(): 108 | while True: 109 | yield from DatasetFromList(dummy_data, copy=False) 110 | 111 | for _ in range(5): # warmup 112 | model(dummy_data[0]) 113 | 114 | max_iter = 400 115 | timer = Timer() 116 | with tqdm.tqdm(total=max_iter) as pbar: 117 | for idx, d in enumerate(f()): 118 | if idx == max_iter: 119 | break 120 | model(d) 121 | pbar.update() 122 | logger.info("{} iters in {} seconds.".format(max_iter, timer.seconds())) 123 | 124 | 125 | if __name__ == "__main__": 126 | parser = default_argument_parser() 127 | parser.add_argument("--task", 128 | choices=["train", "eval", "data"], 129 | required=True) 130 | args = parser.parse_args() 131 | assert not args.eval_only 132 | 133 | if args.task == "data": 134 | f = benchmark_data 135 | elif args.task == "train": 136 | """ 137 | Note: training speed may not be representative. 138 | The training cost of a R-CNN model varies with the content of the data 139 | and the quality of the model. 140 | """ 141 | f = benchmark_train 142 | elif args.task == "eval": 143 | f = benchmark_eval 144 | # only benchmark single-GPU inference. 145 | assert args.num_gpus == 1 and args.num_machines == 1 146 | launch(f, 147 | args.num_gpus, 148 | args.num_machines, 149 | args.machine_rank, 150 | args.dist_url, 151 | args=(args, )) 152 | -------------------------------------------------------------------------------- /tools/rm_files.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding:utf-8 -*- 3 | 4 | import argparse 5 | import os 6 | import re 7 | 8 | from colorama import Fore, Style 9 | 10 | 11 | def remove_parser(): 12 | parser = argparse.ArgumentParser() 13 | parser.add_argument("--start-iter", 14 | "-s", 15 | type=int, 16 | default=0, 17 | help="start iter to remove") 18 | parser.add_argument("--end-iter", 19 | "-e", 20 | type=int, 21 | default=0, 22 | help="end iter to remove") 23 | parser.add_argument("--prefix", 24 | "-p", 25 | type=str, 26 | default="model_", 27 | help="prefix of model to remove") 28 | parser.add_argument("--dir", 29 | "-d", 30 | type=str, 31 | default="/data/Outputs", 32 | help="dir to remove pth model") 33 | parser.add_argument("--real", 34 | "-r", 35 | action="store_true", 36 | help="really delete or just show what you will delete") 37 | return parser 38 | 39 | 40 | def remove_files(args): 41 | start = args.start_iter 42 | end = args.end_iter 43 | prefix = args.prefix 44 | for folder, _, files in os.walk(args.dir): 45 | # l = [x for x in f if x.endswith(".pth")] 46 | models = [ 47 | f for f in files if re.search(prefix + r"[0123456789]*\.pth", f) 48 | ] 49 | delete = [ 50 | os.path.join(folder, model) for model in models 51 | if start <= int(model[len(prefix):-len(".pth")]) <= end 52 | ] 53 | if delete: 54 | for f in delete: 55 | if args.real: 56 | print(f"remove {f}") 57 | os.remove(f) 58 | else: 59 | print(f"you may remove {f}") 60 | if not args.real: 61 | print(Fore.RED + f"use --real parameter to really delete models" + 62 | Style.RESET_ALL) 63 | 64 | 65 | def main(): 66 | args = remove_parser().parse_args() 67 | remove_files(args) 68 | 69 | 70 | if __name__ == "__main__": 71 | main() 72 | -------------------------------------------------------------------------------- /tools/train_net.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | """ 3 | Detection Training Script. 4 | 5 | This scripts reads a given config file and runs the training or evaluation. 6 | It is an entry point that is made to train standard models in dl_lib. 7 | 8 | In order to let one script support training of many models, 9 | this script contains logic that are specific to these built-in models and therefore 10 | may not be suitable for your own project. 11 | For example, your research project perhaps only needs a single "evaluator". 12 | 13 | Therefore, we recommend you to use dl_lib as an library and take 14 | this file as an example of how to use the library. 15 | You may want to write your own script with your datasets and other customizations. 16 | """ 17 | import logging 18 | import os 19 | import sys 20 | sys.path.insert(0, '.') # noqa: E402 21 | 22 | from collections import OrderedDict 23 | import torch 24 | from colorama import Fore, Style 25 | 26 | import dl_lib.utils.comm as comm 27 | from config import config 28 | from dl_lib.checkpoint import DetectionCheckpointer 29 | from dl_lib.data import MetadataCatalog 30 | from dl_lib.engine import (CustomizedTrainer, default_argument_parser, 31 | default_setup, hooks, launch) 32 | from dl_lib.evaluation import (CityscapesEvaluator, DatasetEvaluators, 33 | PascalVOCDetectionEvaluator, SemSegEvaluator, 34 | verify_results) 35 | from dl_lib.modeling import SemanticSegmentorWithTTA 36 | from net import build_model 37 | 38 | 39 | class Trainer(CustomizedTrainer): 40 | """ 41 | We use the "CustomizedTrainer" which contains a number pre-defined logic for 42 | standard training workflow. They may not work for you, especially if you 43 | are working on a new research project. In that case you can use the cleaner 44 | "SimpleTrainer", or write your own training loop. 45 | """ 46 | @classmethod 47 | def build_evaluator(cls, cfg, dataset_name, output_folder=None): 48 | """ 49 | Create evaluator(s) for a given dataset. 50 | This uses the special metadata "evaluator_type" associated with each builtin dataset. 51 | For your own dataset, you can simply create an evaluator manually in your 52 | script and do not have to worry about the hacky if-else logic here. 53 | """ 54 | if output_folder is None: 55 | output_folder = os.path.join(cfg.OUTPUT_DIR, "inference") 56 | evaluator_list = [] 57 | evaluator_type = MetadataCatalog.get(dataset_name).evaluator_type 58 | if evaluator_type in ["sem_seg", "coco_panoptic_seg"]: 59 | evaluator_list.append( 60 | SemSegEvaluator( 61 | dataset_name, 62 | distributed=True, 63 | num_classes=cfg.MODEL.SEM_SEG_HEAD.NUM_CLASSES, 64 | ignore_label=cfg.MODEL.SEM_SEG_HEAD.IGNORE_VALUE, 65 | output_dir=output_folder, 66 | )) 67 | elif evaluator_type == "cityscapes": 68 | assert ( 69 | torch.cuda.device_count() >= comm.get_rank() 70 | ), "CityscapesEvaluator currently do not work with multiple machines." 71 | return CityscapesEvaluator(dataset_name) 72 | elif evaluator_type == "pascal_voc": 73 | return PascalVOCDetectionEvaluator(dataset_name) 74 | if hasattr(cfg, "EVALUATORS"): 75 | for evaluator in cfg.EVALUATORS: 76 | evaluator_list.append( 77 | evaluator(dataset_name, True, output_folder, dump=True)) 78 | if len(evaluator_list) == 0: 79 | raise NotImplementedError( 80 | "no Evaluator for the dataset {} with the type {}".format( 81 | dataset_name, evaluator_type)) 82 | elif len(evaluator_list) == 1: 83 | return evaluator_list[0] 84 | return DatasetEvaluators(evaluator_list) 85 | 86 | @classmethod 87 | def test_with_TTA(cls, cfg, model): 88 | logger = logging.getLogger("dl_lib.trainer") 89 | # In the end of training, run an evaluation with TTA 90 | logger.info("Running inference with test-time augmentation ...") 91 | model = SemanticSegmentorWithTTA(cfg, model) 92 | evaluators = [ 93 | cls.build_evaluator(cfg, 94 | name, 95 | output_folder=os.path.join( 96 | cfg.OUTPUT_DIR, "inference_TTA")) 97 | for name in cfg.DATASETS.TEST 98 | ] 99 | res = cls.test(cfg, model, evaluators) 100 | res = OrderedDict({k + "_TTA": v for k, v in res.items()}) 101 | return res 102 | 103 | 104 | def main(args): 105 | config.merge_from_list(args.opts) 106 | cfg, logger = default_setup(config, args) 107 | model = build_model(cfg) 108 | logger.info(f"Model structure: {model}") 109 | file_sys = os.statvfs(cfg.OUTPUT_DIR) 110 | free_space_Gb = (file_sys.f_bfree * file_sys.f_frsize) / 2**30 111 | # We assume that a single dumped model is 700Mb 112 | eval_space_Gb = (cfg.SOLVER.LR_SCHEDULER.MAX_ITER // 113 | cfg.SOLVER.CHECKPOINT_PERIOD) * 700 / 2**10 114 | if eval_space_Gb > free_space_Gb: 115 | logger.warning(f"{Fore.RED}Remaining space({free_space_Gb}GB) " 116 | f"is less than ({eval_space_Gb}GB){Style.RESET_ALL}") 117 | if args.eval_only: 118 | DetectionCheckpointer(model, save_dir=cfg.OUTPUT_DIR).resume_or_load( 119 | cfg.MODEL.WEIGHTS, resume=args.resume) 120 | res = Trainer.test(cfg, model) 121 | if comm.is_main_process(): 122 | verify_results(cfg, res) 123 | if cfg.TEST.AUG.ENABLED: 124 | res.update(Trainer.test_with_TTA(cfg, model)) 125 | return res 126 | """ 127 | If you'd like to do anything fancier than the standard training logic, 128 | consider writing your own training loop or subclassing the trainer. 129 | """ 130 | trainer = Trainer(cfg, model) 131 | trainer.resume_or_load(resume=args.resume) 132 | if cfg.TEST.AUG.ENABLED: 133 | trainer.register_hooks([ 134 | hooks.EvalHook(0, 135 | lambda: trainer.test_with_TTA(cfg, trainer.model)) 136 | ]) 137 | 138 | return trainer.train() 139 | 140 | 141 | if __name__ == "__main__": 142 | args = default_argument_parser().parse_args() 143 | print("soft link to {}".format(config.OUTPUT_DIR)) 144 | config.link_log() 145 | print("Command Line Args:", args) 146 | launch( 147 | main, 148 | args.num_gpus, 149 | num_machines=args.num_machines, 150 | machine_rank=args.machine_rank, 151 | dist_url=args.dist_url, 152 | args=(args, ), 153 | ) 154 | --------------------------------------------------------------------------------