├── .flake8
├── .gitignore
├── LICENSE
├── README.md
├── datasets
    ├── README.md
    └── cityscapes
├── dl_lib
    ├── __init__.py
    ├── checkpoint
    │   ├── __init__.py
    │   ├── c2_model_loading.py
    │   ├── catalog.py
    │   └── detection_checkpoint.py
    ├── configs
    │   ├── base_config.py
    │   └── segm_config.py
    ├── data
    │   ├── __init__.py
    │   ├── build.py
    │   ├── catalog.py
    │   ├── common.py
    │   ├── dataset_mapper.py
    │   ├── datasets
    │   │   ├── README.md
    │   │   ├── __init__.py
    │   │   ├── builtin.py
    │   │   ├── builtin_meta.py
    │   │   ├── cityscapes.py
    │   │   ├── coco.py
    │   │   ├── pascal_voc.py
    │   │   └── register_coco.py
    │   ├── detection_utils.py
    │   ├── samplers
    │   │   ├── __init__.py
    │   │   ├── distributed_sampler.py
    │   │   └── grouped_batch_sampler.py
    │   └── transforms
    │   │   ├── __init__.py
    │   │   ├── extend_transform.py
    │   │   ├── transform.py
    │   │   ├── transform_gen.py
    │   │   └── transform_util.py
    ├── engine
    │   ├── __init__.py
    │   ├── custom.py
    │   ├── defaults.py
    │   ├── hooks.py
    │   ├── launch.py
    │   └── train_loop.py
    ├── evaluation
    │   ├── __init__.py
    │   ├── cityscapes_evaluation.py
    │   ├── evaluator.py
    │   ├── pascal_voc_evaluation.py
    │   ├── sem_seg_evaluation.py
    │   └── testing.py
    ├── layers
    │   ├── ROIAlign
    │   │   ├── ROIAlign.h
    │   │   ├── ROIAlign_cpu.cpp
    │   │   ├── ROIAlign_cuda.cu
    │   │   └── roi_align.py
    │   ├── __init__.py
    │   ├── batch_norm.py
    │   ├── deformable
    │   │   ├── deform_conv.h
    │   │   ├── deform_conv.py
    │   │   ├── deform_conv_cuda.cu
    │   │   ├── deform_conv_cuda_kernel.cu
    │   │   └── deform_conv_with_off.py
    │   ├── shape_spec.py
    │   ├── vision.cpp
    │   └── wrappers.py
    ├── modeling
    │   ├── __init__.py
    │   ├── backbone
    │   │   ├── __init__.py
    │   │   ├── backbone.py
    │   │   ├── fpn.py
    │   │   └── resnet.py
    │   ├── basenet
    │   │   ├── __init__.py
    │   │   ├── basenet.py
    │   │   └── show.py
    │   ├── dynamic_arch
    │   │   ├── __init__.py
    │   │   ├── cal_op_flops.py
    │   │   ├── dynamic_backbone.py
    │   │   ├── dynamic_cell.py
    │   │   └── op_with_flops.py
    │   ├── meta_arch
    │   │   ├── __init__.py
    │   │   ├── dynamic4seg.py
    │   │   └── semantic_seg.py
    │   ├── nn_utils
    │   │   ├── __init__.py
    │   │   ├── flop_count.py
    │   │   ├── jit_handles.py
    │   │   ├── precise_bn.py
    │   │   ├── scale_grad.py
    │   │   └── weight_init.py
    │   ├── postprocessing.py
    │   └── test_time_augmentation.py
    ├── solver
    │   ├── __init__.py
    │   ├── build.py
    │   └── lr_scheduler.py
    ├── structures
    │   ├── __init__.py
    │   ├── boxes.py
    │   ├── image_list.py
    │   ├── instances.py
    │   ├── keypoints.py
    │   └── masks.py
    └── utils
    │   ├── README.md
    │   ├── __init__.py
    │   ├── benchmark.py
    │   ├── checkpoint.py
    │   ├── collect_env.py
    │   ├── colormap.py
    │   ├── comm.py
    │   ├── config_helper.py
    │   ├── download.py
    │   ├── env.py
    │   ├── events.py
    │   ├── file_io.py
    │   ├── history_buffer.py
    │   ├── imports.py
    │   ├── logger.py
    │   ├── memory.py
    │   ├── serialize.py
    │   ├── timer.py
    │   ├── video_visualizer.py
    │   └── visualizer.py
├── intro
    └── introduce.png
├── playground
    ├── .gitignore
    └── Dynamic
    │   ├── Seg.Layer16.ImageNet
    │       ├── config.py
    │       └── net.py
    │   ├── Seg.Layer16.Iter560k.SDP0_3
    │       ├── config.py
    │       └── net.py
    │   ├── Seg.Layer16.SmallGate.Dynamic_A
    │       ├── config.py
    │       └── net.py
    │   ├── Seg.Layer16.SmallGate.Dynamic_B
    │       ├── config.py
    │       └── net.py
    │   ├── Seg.Layer16.SmallGate.Dynamic_C
    │       ├── config.py
    │       └── net.py
    │   ├── Seg.Layer16
    │       ├── config.py
    │       └── net.py
    │   └── Seg.Layer33.ImageNet
    │       ├── config.py
    │       └── net.py
├── setup.py
└── tools
    ├── benchmark.py
    ├── rm_files.py
    ├── test_net.py
    └── train_net.py


/.flake8:
--------------------------------------------------------------------------------
 1 | # This is an example .flake8 config, used when developing *Black* itself.
 2 | # Keep in sync with setup.cfg which is used for source packages.
 3 | 
 4 | [flake8]
 5 | ignore = W503, E203, E221, C901, E999, W504
 6 | max-line-length = 100
 7 | max-complexity = 18
 8 | select = B,C,E,F,W,T4,B9
 9 | exclude = build,__init__.py
10 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # output dir
 2 | output
 3 | instant_test_output
 4 | inference_test_output
 5 | 
 6 | 
 7 | *.jpg
 8 | *.txt
 9 | 
10 | # compilation and distribution
11 | __pycache__
12 | _ext
13 | *.pyc
14 | *.so
15 | *.egg-info/
16 | build/
17 | dist/
18 | 
19 | # pytorch/python/numpy formats
20 | *.pth
21 | *.pkl
22 | *.npy
23 | 
24 | # ipython/jupyter notebooks
25 | *.ipynb
26 | **/.ipynb_checkpoints/
27 | 
28 | # Editor temporaries
29 | *.swn
30 | *.swo
31 | *.swp
32 | *~
33 | 
34 | # Pycharm/VSCode editor settings
35 | .idea
36 | *.vscode
37 | 
38 | # project dirs
39 | **/log
40 | tools/dl_test
41 | tools/dl_train
42 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # DynamicRouting
  2 | 
  3 | This project provides an implementation for "[Learning Dynamic Routing for Semantic Segmentation](https://arxiv.org/abs/2003.10401)" (*CVPR2020 Oral*) on PyTorch.
  4 | For the reason that experiments in the paper were conducted using internal framework, this project reimplements them on *dl_lib* and reports detailed comparisons below. Some parts of code in *dl_lib* are based on [detectron2](https://github.com/facebookresearch/detectron2).
  5 | 
  6 | ![introduce image](intro/introduce.png)
  7 | 
  8 | ## Requirement
  9 | - Python >= 3.6
 10 |     - `python3 --version`
 11 | - PyTorch >= 1.3
 12 |     - `pip3 install torch torchvision`
 13 | - OpenCV
 14 |     - `pip3 install opencv-python`
 15 | - GCC >= 4.9
 16 |     - `gcc --version`
 17 | 
 18 | ## Installation
 19 | 
 20 | Make sure that your get at least one gpu when compiling. Run:
 21 | - `git clone https://github.com/yanwei-li/DynamicRouting.git`
 22 | - `cd DynamicRouting`
 23 | - `sudo python3 setup.py build develop`
 24 | 
 25 | ## Usage
 26 | 
 27 | ### Dataset
 28 | We use Cityscapes dataset for training and validation. Please refer to [`datasets/README.md`](datasets/README.md) or dataset structure in [detectron2](https://github.com/facebookresearch/detectron2) for more details.
 29 | - Cityscapes [Download](https://www.cityscapes-dataset.com/)
 30 | 
 31 | ### Pretrained Model
 32 | We give ImageNet pretained models:
 33 | - Layer16-Fix [GoogleDrive](https://drive.google.com/file/d/1WqVOZo8oelXTKlf0BDE3q2W-lyYm2G2U/view?usp=sharing)
 34 | - Layer33-Fix [GoogleDrive](https://drive.google.com/file/d/1xktVvVsYSaDlb8yQcn0zAzx2ZMUhyD_K/view?usp=sharing)
 35 | 
 36 | ### Training
 37 | For example, if you want to train Dynamic Network with Layer16 backbone:
 38 | - Train from scratch
 39 |     ```shell
 40 |     cd playground/Dynamic/Seg.Layer16
 41 |     dl_train --num-gpus 4
 42 |     ```
 43 | - Use ImageNet pretrain
 44 |     ```shell
 45 |     cd playground/Dynamic/Seg.Layer16.ImageNet
 46 |     dl_train --num-gpus 4 MODEL.WEIGHTS /path/to/your/save_dir/ckpt.pth
 47 |     ```
 48 | 
 49 | NOTE: Please set `FIX_SIZE_FOR_FLOPS` to `[768,768]` and `[1024,2048]` for training and evaluation, respectively.
 50 | 
 51 | ### Evaluation
 52 | You can evaluate the trained or downloaded model:
 53 | - Evaluate the trained model
 54 |     ```shell
 55 |     dl_test --num-gpus 8
 56 |     ```
 57 | - Evaluate the downloaded model:
 58 |     ```shell
 59 |     dl_test --num-gpus 8 MODEL.WEIGHTS /path/to/your/save_dir/ckpt.pth 
 60 |     ```
 61 | 
 62 | NOTE: If your machine does not support such setting, please change settings in `config.py` to a suitable value. 
 63 | 
 64 | ## Performance
 65 | ### Cityscapes *val* set
 66 | ***Without* ImageNet Pretrain:**
 67 | 
 68 |  Methods | Backbone | Iter/K | mIoU (paper) | GFLOPs (paper) | mIoU (ours) | GFLOPs (ours) | Model 
 69 | :--:|:--:|:--:|:--:|:--:|:--:|:--:|:--:
 70 | Dynamic-A | Layer16 | 186 | 72.8 | 44.9 | 73.9 | 52.5 | [GoogleDrive](https://drive.google.com/file/d/1Fa4hLS2GKL90HR0tVhmcZkFwemK6DlgT/view?usp=sharing)
 71 | Dynamic-B | Layer16 | 186 | 73.8 | 58.7 | 74.3 | 58.9 | [GoogleDrive](https://drive.google.com/open?id=1o31jNkoSMugK0rEsi6grE9WEioWJgsZN)
 72 | Dynamic-C | Layer16 | 186 | 74.6 | 66.6 | 74.8 | 59.8 | [GoogleDrive](https://drive.google.com/open?id=1nJ8oJD9yZj82hTlyDcMBGd9kmpIZVIto)
 73 | Dynamic-Raw | Layer16 | 186 | 76.1 | 119.5 | 76.7 | 114.9 | [GoogleDrive](https://drive.google.com/open?id=1_fSMzzaUVzbkjYqSuvD2EmmZiZZVsDvh)
 74 | Dynamic-Raw | Layer16 | 558 | 78.3 | 113.3 | 78.1 | 114.2 | [GoogleDrive](https://drive.google.com/open?id=1ToV_YAVxv5pCoRTreIXsQxNI5jolektt)
 75 | 
 76 | ***With* ImageNet Pretrain:**
 77 | 
 78 |  Methods | Backbone | Iter/K | mIoU (paper) | GFLOPs (paper) | mIoU (ours) | GFLOPs (ours) | Model 
 79 | :--:|:--:|:--:|:--:|:--:|:--:|:--:|:--:
 80 | Dynamic-Raw | Layer16 | 186 | 78.6 | 119.4 | 78.8 | 117.8 | [GoogleDrive](https://drive.google.com/open?id=1xRZkRw5qIKc_A6repZkRmLUpSAnnU63e)
 81 | Dynamic-Raw | Layer33 | 186 | 79.2 | 242.3 | 79.4 | 243.1 | [GoogleDrive](https://drive.google.com/file/d/1DdLwt0jzBTqx_3EFGPLm_gWdaeoXXux7/view?usp=sharing)
 82 | 
 83 | ## To do
 84 | - [ ] Faster inference speed
 85 | - [ ] Support more vision tasks
 86 |     - [ ] Object detection
 87 |     - [ ] Instance segmentation
 88 |     - [ ] Panoptic segmentation
 89 | 
 90 | ## Acknowledgement
 91 | - [Detectron2](https://github.com/facebookresearch/detectron2)
 92 | - [DARTS](https://github.com/quark0/darts)
 93 | 
 94 | ## Citation
 95 | Consider cite the Dynamic Routing in your publications if it helps your research. 
 96 | 
 97 | ```
 98 | @inproceedings{li2020learning,
 99 |     title = {Learning Dynamic Routing for Semantic Segmentation},
100 |     author = {Yanwei Li, Lin Song, Yukang Chen, Zeming Li, Xiangyu Zhang, Xingang Wang, Jian Sun},
101 |     booktitle = {IEEE Conference on Computer Vision and Pattern Recognition},
102 |     year = {2020}
103 | }
104 | ```
105 | 
106 | Consider cite this project in your publications if it helps your research. 
107 | ```
108 | @misc{DynamicRouting,
109 |     author = {Yanwei Li},
110 |     title = {DynamicRouting},
111 |     howpublished = {\url{https://github.com/yanwei-li/DynamicRouting}},
112 |     year ={2020}
113 | }
114 | ```


--------------------------------------------------------------------------------
/datasets/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | For a few datasets that detectron2 natively supports,
 3 | the datasets are assumed to exist in a directory called
 4 | "datasets/", under the directory where you launch the program.
 5 | They need to have the following directory structure:
 6 | 
 7 | ## Expected dataset structure for cityscapes:
 8 | ```
 9 | cityscapes/
10 |   gtFine/
11 |     train/
12 |       aachen/
13 |         color.png, instanceIds.png, labelIds.png, polygons.json,
14 |         labelTrainIds.png
15 |       ...
16 |     val/
17 |     test/
18 |   leftImg8bit/
19 |     train/
20 |     val/
21 |     test/
22 | ```
23 | Install cityscapes scripts by:
24 | ```
25 | pip install git+https://github.com/mcordts/cityscapesScripts.git
26 | ```
27 | 
28 | Note:
29 | labelTrainIds.png are created by `cityscapesscripts/preparation/createTrainIdLabelImgs.py`.
30 | They are not needed for instance segmentation.
31 | 
32 | ## Expected dataset structure for Pascal VOC:
33 | ```
34 | VOC20{07,12}/
35 |   Annotations/
36 |   ImageSets/
37 |   JPEGImages/
38 | ```
39 | 


--------------------------------------------------------------------------------
/datasets/cityscapes:
--------------------------------------------------------------------------------
1 | /data/Datasets/cityscapes


--------------------------------------------------------------------------------
/dl_lib/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
2 | 
3 | from .utils.env import setup_environment
4 | 
5 | setup_environment()
6 | 
7 | __version__ = "0.1"
8 | 


--------------------------------------------------------------------------------
/dl_lib/checkpoint/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 3 | # File:
 4 | 
 5 | from . import catalog as _UNUSED  # register the handler
 6 | from .detection_checkpoint import DetectionCheckpointer
 7 | from dl_lib.utils.checkpoint import Checkpointer, PeriodicCheckpointer
 8 | 
 9 | __all__ = ["Checkpointer", "PeriodicCheckpointer", "DetectionCheckpointer"]
10 | 


--------------------------------------------------------------------------------
/dl_lib/checkpoint/catalog.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
  2 | import logging
  3 | 
  4 | from dl_lib.utils.file_io import PathHandler, PathManager
  5 | 
  6 | 
  7 | class ModelCatalog(object):
  8 |     """
  9 |     Store mappings from names to third-party models.
 10 |     """
 11 | 
 12 |     S3_C2_DETECTRON_PREFIX = "https://dl.fbaipublicfiles.com/detectron"
 13 | 
 14 |     # MSRA models have STRIDE_IN_1X1=True. False otherwise.
 15 |     # NOTE: all BN models here have fused BN into an affine layer.
 16 |     # As a result, you should only load them to a model with "FrozenBN".
 17 |     # Loading them to a model with regular BN or SyncBN is wrong.
 18 |     # Even when loaded to FrozenBN, it is still different from affine by an epsilon,
 19 |     # which should be negligible for training.
 20 |     # NOTE: all models here uses PIXEL_STD=[1,1,1]
 21 |     C2_IMAGENET_MODELS = {
 22 |         "MSRA/R-50":
 23 |         "ImageNetPretrained/MSRA/R-50.pkl",
 24 |         "MSRA/R-101":
 25 |         "ImageNetPretrained/MSRA/R-101.pkl",
 26 |         "FAIR/R-50-GN":
 27 |         "ImageNetPretrained/47261647/R-50-GN.pkl",
 28 |         "FAIR/R-101-GN":
 29 |         "ImageNetPretrained/47592356/R-101-GN.pkl",
 30 |         "FAIR/X-101-32x8d":
 31 |         "ImageNetPretrained/20171220/X-101-32x8d.pkl",
 32 |         "FAIR/X-101-64x4d":
 33 |         "ImageNetPretrained/FBResNeXt/X-101-64x4d.pkl",
 34 |         "FAIR/X-152-32x8d-IN5k":
 35 |         "ImageNetPretrained/25093814/X-152-32x8d-IN5k.pkl",
 36 |     }
 37 | 
 38 |     C2_DETECTRON_PATH_FORMAT = (
 39 |         "{prefix}/{url}/output/train/{dataset}/{type}/model_final.pkl"
 40 |     )  # noqa B950
 41 | 
 42 |     C2_DATASET_COCO = "coco_2014_train%3Acoco_2014_valminusminival"
 43 |     C2_DATASET_COCO_KEYPOINTS = "keypoints_coco_2014_train%3Akeypoints_coco_2014_valminusminival"
 44 | 
 45 |     # format: {model_name} -> part of the url
 46 |     C2_DETECTRON_MODELS = {
 47 |         "35857197/e2e_faster_rcnn_R-50-C4_1x":
 48 |         "35857197/12_2017_baselines/e2e_faster_rcnn_R-50-C4_1x.yaml.01_33_49.iAX0mXvW",  # noqa B950
 49 |         "35857345/e2e_faster_rcnn_R-50-FPN_1x":
 50 |         "35857345/12_2017_baselines/e2e_faster_rcnn_R-50-FPN_1x.yaml.01_36_30.cUF7QR7I",  # noqa B950
 51 |         "35857890/e2e_faster_rcnn_R-101-FPN_1x":
 52 |         "35857890/12_2017_baselines/e2e_faster_rcnn_R-101-FPN_1x.yaml.01_38_50.sNxI7sX7",  # noqa B950
 53 |         "36761737/e2e_faster_rcnn_X-101-32x8d-FPN_1x":
 54 |         "36761737/12_2017_baselines/e2e_faster_rcnn_X-101-32x8d-FPN_1x.yaml.06_31_39.5MIHi1fZ",  # noqa B950
 55 |         "35858791/e2e_mask_rcnn_R-50-C4_1x":
 56 |         "35858791/12_2017_baselines/e2e_mask_rcnn_R-50-C4_1x.yaml.01_45_57.ZgkA7hPB",  # noqa B950
 57 |         "35858933/e2e_mask_rcnn_R-50-FPN_1x":
 58 |         "35858933/12_2017_baselines/e2e_mask_rcnn_R-50-FPN_1x.yaml.01_48_14.DzEQe4wC",  # noqa B950
 59 |         "35861795/e2e_mask_rcnn_R-101-FPN_1x":
 60 |         "35861795/12_2017_baselines/e2e_mask_rcnn_R-101-FPN_1x.yaml.02_31_37.KqyEK4tT",  # noqa B950
 61 |         "36761843/e2e_mask_rcnn_X-101-32x8d-FPN_1x":
 62 |         "36761843/12_2017_baselines/e2e_mask_rcnn_X-101-32x8d-FPN_1x.yaml.06_35_59.RZotkLKI",  # noqa B950
 63 |         "48616381/e2e_mask_rcnn_R-50-FPN_2x_gn":
 64 |         "GN/48616381/04_2018_gn_baselines/e2e_mask_rcnn_R-50-FPN_2x_gn_0416.13_23_38.bTlTI97Q",  # noqa B950
 65 |         "37697547/e2e_keypoint_rcnn_R-50-FPN_1x":
 66 |         "37697547/12_2017_baselines/e2e_keypoint_rcnn_R-50-FPN_1x.yaml.08_42_54.kdzV35ao",  # noqa B950
 67 |         "35998355/rpn_R-50-C4_1x":
 68 |         "35998355/12_2017_baselines/rpn_R-50-C4_1x.yaml.08_00_43.njH5oD9L",  # noqa B950
 69 |         "35998814/rpn_R-50-FPN_1x":
 70 |         "35998814/12_2017_baselines/rpn_R-50-FPN_1x.yaml.08_06_03.Axg0r179",  # noqa B950
 71 |         "36225147/fast_R-50-FPN_1x":
 72 |         "36225147/12_2017_baselines/fast_rcnn_R-50-FPN_1x.yaml.08_39_09.L3obSdQ2",  # noqa B950
 73 |     }
 74 | 
 75 |     @staticmethod
 76 |     def get(name):
 77 |         if name.startswith("Caffe2Detectron/COCO"):
 78 |             return ModelCatalog._get_c2_detectron_baseline(name)
 79 |         if name.startswith("ImageNetPretrained/"):
 80 |             return ModelCatalog._get_c2_imagenet_pretrained(name)
 81 |         raise RuntimeError("model not present in the catalog: {}".format(name))
 82 | 
 83 |     @staticmethod
 84 |     def _get_c2_imagenet_pretrained(name):
 85 |         prefix = ModelCatalog.S3_C2_DETECTRON_PREFIX
 86 |         name = name[len("ImageNetPretrained/"):]
 87 |         name = ModelCatalog.C2_IMAGENET_MODELS[name]
 88 |         url = "/".join([prefix, name])
 89 |         return url
 90 | 
 91 |     @staticmethod
 92 |     def _get_c2_detectron_baseline(name):
 93 |         name = name[len("Caffe2Detectron/COCO/"):]
 94 |         url = ModelCatalog.C2_DETECTRON_MODELS[name]
 95 |         if "keypoint_rcnn" in name:
 96 |             dataset = ModelCatalog.C2_DATASET_COCO_KEYPOINTS
 97 |         else:
 98 |             dataset = ModelCatalog.C2_DATASET_COCO
 99 | 
100 |         if "35998355/rpn_R-50-C4_1x" in name:
101 |             # this one model is somehow different from others ..
102 |             type = "rpn"
103 |         else:
104 |             type = "generalized_rcnn"
105 | 
106 |         # Detectron C2 models are stored in the structure defined in `C2_DETECTRON_PATH_FORMAT`.
107 |         url = ModelCatalog.C2_DETECTRON_PATH_FORMAT.format(
108 |             prefix=ModelCatalog.S3_C2_DETECTRON_PREFIX,
109 |             url=url,
110 |             type=type,
111 |             dataset=dataset)
112 |         return url
113 | 
114 | 
115 | class ModelCatalogHandler(PathHandler):
116 |     """
117 |     Resolve URL like catalog://.
118 |     """
119 | 
120 |     PREFIX = "catalog://"
121 | 
122 |     def _get_supported_prefixes(self):
123 |         return [self.PREFIX]
124 | 
125 |     def _get_local_path(self, path):
126 |         logger = logging.getLogger(__name__)
127 |         catalog_path = ModelCatalog.get(path[len(self.PREFIX):])
128 |         logger.info("Catalog entry {} points to {}".format(path, catalog_path))
129 |         return PathManager.get_local_path(catalog_path)
130 | 
131 |     def _open(self, path, mode="r", **kwargs):
132 |         return PathManager.open(self._get_local_path(path), mode, **kwargs)
133 | 
134 | 
135 | class Detectron2Handler(PathHandler):
136 |     """
137 |     Resolve anything that's in Detectron2 model zoo.
138 |     """
139 | 
140 |     PREFIX = "detectron2://"
141 |     S3_DETECTRON2_PREFIX = "https://dl.fbaipublicfiles.com/detectron2/"
142 | 
143 |     def _get_supported_prefixes(self):
144 |         return [self.PREFIX]
145 | 
146 |     def _get_local_path(self, path):
147 |         name = path[len(self.PREFIX):]
148 |         return PathManager.get_local_path(self.S3_DETECTRON2_PREFIX + name)
149 | 
150 |     def _open(self, path, mode="r", **kwargs):
151 |         return PathManager.open(self._get_local_path(path), mode, **kwargs)
152 | 
153 | 
154 | PathManager.register_handler(ModelCatalogHandler())
155 | PathManager.register_handler(Detectron2Handler())
156 | 


--------------------------------------------------------------------------------
/dl_lib/checkpoint/detection_checkpoint.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | import pickle
 3 | 
 4 | import dl_lib.utils.comm as comm
 5 | from dl_lib.utils.checkpoint import Checkpointer
 6 | from dl_lib.utils.file_io import PathManager
 7 | 
 8 | from .c2_model_loading import align_and_update_state_dicts
 9 | 
10 | 
11 | class DetectionCheckpointer(Checkpointer):
12 |     """
13 |     Same as :class:`Checkpointer`, but is able to handle models in detectron & dl_lib
14 |     model zoo, and apply conversions for legacy models.
15 |     """
16 |     def __init__(self,
17 |                  model,
18 |                  save_dir="",
19 |                  *,
20 |                  save_to_disk=None,
21 |                  **checkpointables):
22 |         is_main_process = comm.is_main_process()
23 |         super().__init__(
24 |             model,
25 |             save_dir,
26 |             save_to_disk=is_main_process
27 |             if save_to_disk is None else save_to_disk,
28 |             **checkpointables,
29 |         )
30 | 
31 |     def _load_file(self, filename):
32 |         if filename.endswith(".pkl"):
33 |             with PathManager.open(filename, "rb") as f:
34 |                 data = pickle.load(f, encoding="latin1")
35 |             if "model" in data and "__author__" in data:
36 |                 # file is in dl_lib model zoo format
37 |                 self.logger.info("Reading a file from '{}'".format(
38 |                     data["__author__"]))
39 |                 return data
40 |             else:
41 |                 # assume file is from Caffe2 / Detectron1 model zoo
42 |                 if "blobs" in data:
43 |                     # Detection models have "blobs", but ImageNet models don't
44 |                     data = data["blobs"]
45 |                 data = {
46 |                     k: v
47 |                     for k, v in data.items() if not k.endswith("_momentum")
48 |                 }
49 |                 return {
50 |                     "model": data,
51 |                     "__author__": "Caffe2",
52 |                     "matching_heuristics": True
53 |                 }
54 | 
55 |         loaded = super()._load_file(filename)  # load native pth checkpoint
56 |         if "model" not in loaded:
57 |             loaded = {"model": loaded}
58 |         return loaded
59 | 
60 |     def _load_model(self, checkpoint):
61 |         if checkpoint.get("matching_heuristics", False):
62 |             self._convert_ndarray_to_tensor(checkpoint["model"])
63 |             # convert weights by name-matching heuristics
64 |             model_state_dict = self.model.state_dict()
65 |             align_and_update_state_dicts(
66 |                 model_state_dict,
67 |                 checkpoint["model"],
68 |                 c2_conversion=checkpoint.get("__author__", None) == "Caffe2",
69 |             )
70 |             checkpoint["model"] = model_state_dict
71 |         # for non-caffe2 models, use standard ways to load it
72 |         super()._load_model(checkpoint)
73 | 


--------------------------------------------------------------------------------
/dl_lib/configs/segm_config.py:
--------------------------------------------------------------------------------
 1 | from .base_config import BaseConfig
 2 | 
 3 | _config_dict = dict(
 4 |     MODEL=dict(
 5 |         LOAD_PROPOSALS=False,
 6 |         MASK_ON=False,
 7 |         KEYPOINT_ON=False,
 8 |         BACKBONE=dict(FREEZE_AT=0, ),
 9 |         RESNETS=dict(
10 |             OUT_FEATURES=["res2", "res3", "res4", "res5"],
11 |             NORM="nnSyncBN",
12 |             NUM_GROUPS=1,
13 |             WIDTH_PER_GROUP=64,
14 |             STRIDE_IN_1X1=True,
15 |             RES5_DILATION=1,
16 |             RES2_OUT_CHANNELS=256,
17 |             STEM_OUT_CHANNELS=64,
18 |             DEFORM_ON_PER_STAGE=[False, False, False, False],
19 |             DEFORM_MODULATED=False,
20 |             DEFORM_NUM_GROUPS=1,
21 |         ),
22 |         FPN=dict(
23 |             IN_FEATURES=[],
24 |             OUT_CHANNELS=256,
25 |             NORM="",
26 |             FUSE_TYPE="sum",
27 |         ),
28 |         SEM_SEG_HEAD=dict(
29 |             # NAME="SemSegFPNHead",
30 |             IN_FEATURES=[],
31 |             IGNORE_VALUE=255,
32 |             NUM_CLASSES=(),
33 |             CONVS_DIM=256,
34 |             COMMON_STRIDE=(),
35 |             NORM="GN",
36 |             LOSS_WEIGHT=1.0,
37 |         ),
38 |         SOLVER=dict(
39 |             LR_SCHEDULER=dict(
40 |                 NAME="PolyLR",
41 |                 POLY_POWER=0.9,
42 |                 MAX_ITER=40000,
43 |                 WARMUP_ITERS=1000,
44 |                 WARMUP_FACTOR=0.001,
45 |                 WARMUP_METHOD="linear",
46 |             ),
47 |             OPTIMIZER=dict(BASE_LR=0.01, ),
48 |             IMS_PER_BATCH=16,
49 |             CHECKPOINT_PERIOD=5000,
50 |         ),
51 |         TEST=dict(PRECISE_BN=dict(ENABLED=True), ),
52 |     ),
53 |     INPUT=dict(CROP_PAD=dict(
54 |         ENABLED=True,
55 |         TYPE='absolute',
56 |         SIZE=(),
57 |         IMG_PAD_VALUE=0,
58 |         SEG_PAD_VALUE=255,
59 |     ), ),
60 | )
61 | 
62 | 
63 | class SemanticSegmentationConfig(BaseConfig):
64 |     def __init__(self):
65 |         super(SemanticSegmentationConfig, self).__init__()
66 |         self._register_configuration(_config_dict)
67 | 
68 | 
69 | config = SemanticSegmentationConfig()
70 | 


--------------------------------------------------------------------------------
/dl_lib/data/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | from . import transforms  # isort:skip
 3 | 
 4 | from .build import (
 5 |     build_detection_test_loader,
 6 |     build_detection_train_loader,
 7 |     get_detection_dataset_dicts,
 8 |     load_proposals_into_dataset,
 9 |     print_instances_class_histogram,
10 | )
11 | from .catalog import DatasetCatalog, MetadataCatalog
12 | from .common import DatasetFromList, MapDataset
13 | from .dataset_mapper import DatasetMapper
14 | 
15 | # ensure the builtin datasets are registered
16 | from . import datasets, samplers  # isort:skip
17 | 
18 | __all__ = [k for k in globals().keys() if not k.startswith("_")]
19 | 


--------------------------------------------------------------------------------
/dl_lib/data/common.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | import copy
 3 | import logging
 4 | import random
 5 | 
 6 | import torch.utils.data as data
 7 | 
 8 | from dl_lib.utils.serialize import PicklableWrapper
 9 | 
10 | __all__ = ["MapDataset", "DatasetFromList"]
11 | 
12 | 
13 | class MapDataset(data.Dataset):
14 |     """
15 |     Map a function over the elements in a dataset.
16 | 
17 |     Args:
18 |         dataset: a dataset where map function is applied.
19 |         map_func: a callable which maps the element in dataset. map_func is
20 |             responsible for error handling, when error happens, it needs to
21 |             return None so the MapDataset will randomly use other
22 |             elements from the dataset.
23 |     """
24 |     def __init__(self, dataset, map_func):
25 |         self._dataset = dataset
26 |         self._map_func = PicklableWrapper(
27 |             map_func)  # wrap so that a lambda will work
28 | 
29 |         self._rng = random.Random(42)
30 |         self._fallback_candidates = set(range(len(dataset)))
31 | 
32 |     def __len__(self):
33 |         return len(self._dataset)
34 | 
35 |     def __getitem__(self, idx):
36 |         retry_count = 0
37 |         cur_idx = int(idx)
38 | 
39 |         while True:
40 |             data = self._map_func(self._dataset[cur_idx])
41 |             if data is not None:
42 |                 self._fallback_candidates.add(cur_idx)
43 |                 return data
44 | 
45 |             # _map_func fails for this idx, use a random new index from the pool
46 |             retry_count += 1
47 |             self._fallback_candidates.discard(cur_idx)
48 |             cur_idx = self._rng.sample(self._fallback_candidates, k=1)[0]
49 | 
50 |             if retry_count >= 3:
51 |                 logger = logging.getLogger(__name__)
52 |                 logger.warning(
53 |                     "Failed to apply `_map_func` for idx: {}, retry count: {}".
54 |                     format(idx, retry_count))
55 | 
56 | 
57 | class DatasetFromList(data.Dataset):
58 |     """
59 |     Wrap a list to a torch Dataset. It produces elements of the list as data.
60 |     """
61 |     def __init__(self, lst: list, copy: bool = True):
62 |         """
63 |         Args:
64 |             lst (list): a list which contains elements to produce.
65 |             copy (bool): whether to deepcopy the element when producing it,
66 |                 so that the result can be modified in place without affecting the
67 |                 source in the list.
68 |         """
69 |         self._lst = lst
70 |         self._copy = copy
71 | 
72 |     def __len__(self):
73 |         return len(self._lst)
74 | 
75 |     def __getitem__(self, idx):
76 |         if self._copy:
77 |             return copy.deepcopy(self._lst[idx])
78 |         else:
79 |             return self._lst[idx]
80 | 


--------------------------------------------------------------------------------
/dl_lib/data/datasets/README.md:
--------------------------------------------------------------------------------
1 | 
2 | ### Common Datasets
3 | 
4 | The dataset implemented here do not need to load the data into the final format.
5 | It should provide the minimal data structure needed to use the dataset, so it can be very efficient.
6 | 
7 | For example, for an image dataset, just provide the file names and labels, but don't read the images.
8 | Let the downstream decide how to read.
9 | 


--------------------------------------------------------------------------------
/dl_lib/data/datasets/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | from .cityscapes import load_cityscapes_instances
 3 | from .coco import load_coco_json, load_sem_seg
 4 | from .register_coco import register_coco_instances, register_coco_panoptic_separated
 5 | from . import builtin  # ensure the builtin datasets are registered
 6 | 
 7 | __all__ = [
 8 |     k for k in globals().keys() if "builtin" not in k and not k.startswith("_")
 9 | ]
10 | 


--------------------------------------------------------------------------------
/dl_lib/data/datasets/builtin.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
  3 | """
  4 | This file registers pre-defined datasets at hard-coded paths, and their metadata.
  5 | We hard-code metadata for common datasets. This will enable:
  6 | 1. Consistency check when loading the datasets
  7 | 2. Use models on these standard datasets directly and run demos,
  8 |    without having to download the dataset annotations
  9 | We hard-code some paths to the dataset that's assumed to
 10 | exist in "./datasets/".
 11 | Users SHOULD NOT use this file to create new dataset / metadata for new dataset.
 12 | To add new dataset, refer to the tutorial "docs/DATASETS.md".
 13 | """
 14 | 
 15 | import os
 16 | import os.path as osp
 17 | 
 18 | import dl_lib
 19 | 
 20 | from dl_lib.data import MetadataCatalog, DatasetCatalog
 21 | from .register_coco import register_coco_instances
 22 | from .cityscapes import load_cityscapes_instances, load_cityscapes_semantic
 23 | from .pascal_voc import register_pascal_voc
 24 | from .builtin_meta import _get_builtin_metadata
 25 | 
 26 | # ==== Predefined datasets and splits for COCO ==========
 27 | 
 28 | _PREDEFINED_SPLITS_COCO = {}
 29 | _PREDEFINED_SPLITS_COCO["coco"] = {
 30 |     "coco_2014_train":
 31 |     ("coco/train2014", "coco/annotations/instances_train2014.json"),
 32 |     "coco_2014_val":
 33 |     ("coco/val2014", "coco/annotations/instances_val2014.json"),
 34 |     "coco_2014_minival":
 35 |     ("coco/val2014", "coco/annotations/instances_minival2014.json"),
 36 |     "coco_2014_minival_100":
 37 |     ("coco/val2014", "coco/annotations/instances_minival2014_100.json"),
 38 |     "coco_2014_valminusminival": (
 39 |         "coco/val2014",
 40 |         "coco/annotations/instances_valminusminival2014.json",
 41 |     ),
 42 |     "coco_2017_train": ("coco/train2017",
 43 |                         "coco/annotations/instances_train2017.json"),
 44 |     "coco_2017_val": ("coco/val2017",
 45 |                       "coco/annotations/instances_val2017.json"),
 46 |     "coco_2017_test": ("coco/test2017",
 47 |                        "coco/annotations/image_info_test2017.json"),
 48 |     "coco_2017_test-dev": ("coco/test2017",
 49 |                            "coco/annotations/image_info_test-dev2017.json"),
 50 |     "coco_2017_val_100": ("coco/val2017",
 51 |                           "coco/annotations/instances_val2017_100.json"),
 52 | }
 53 | 
 54 | 
 55 | def register_all_coco(root=osp.join(
 56 |         osp.split(osp.split(dl_lib.__file__)[0])[0], "datasets")):
 57 |     for dataset_name, splits_per_dataset in _PREDEFINED_SPLITS_COCO.items():
 58 |         for key, (image_root, json_file) in splits_per_dataset.items():
 59 |             # Assume pre-defined datasets live in `./datasets`.
 60 |             register_coco_instances(
 61 |                 key,
 62 |                 _get_builtin_metadata(dataset_name),
 63 |                 os.path.join(root, json_file)
 64 |                 if "://" not in json_file else json_file,
 65 |                 os.path.join(root, image_root),
 66 |             )
 67 | 
 68 | # ==== Predefined splits for raw cityscapes images ===========
 69 | 
 70 | 
 71 | _RAW_CITYSCAPES_SPLITS = {
 72 |     "cityscapes_fine_{task}_train":
 73 |     ("cityscapes/leftImg8bit/train", "cityscapes/gtFine/train"),
 74 |     "cityscapes_fine_{task}_val":
 75 |     ("cityscapes/leftImg8bit/val", "cityscapes/gtFine/val"),
 76 |     "cityscapes_fine_{task}_test":
 77 |     ("cityscapes/leftImg8bit/test", "cityscapes/gtFine/test"),
 78 | }
 79 | 
 80 | 
 81 | def register_all_cityscapes(root=osp.join(
 82 |         osp.split(osp.split(dl_lib.__file__)[0])[0], "datasets")):
 83 |     for key, (image_dir, gt_dir) in _RAW_CITYSCAPES_SPLITS.items():
 84 |         meta = _get_builtin_metadata("cityscapes")
 85 |         image_dir = os.path.join(root, image_dir)
 86 |         gt_dir = os.path.join(root, gt_dir)
 87 | 
 88 |         inst_key = key.format(task="instance_seg")
 89 |         DatasetCatalog.register(
 90 |             inst_key,
 91 |             lambda x=image_dir, y=gt_dir: load_cityscapes_instances(
 92 |                 x, y, from_json=True, to_polygons=True),
 93 |         )
 94 |         MetadataCatalog.get(inst_key).set(image_dir=image_dir,
 95 |                                           gt_dir=gt_dir,
 96 |                                           evaluator_type="cityscapes",
 97 |                                           **meta)
 98 | 
 99 |         sem_key = key.format(task="sem_seg")
100 |         DatasetCatalog.register(
101 |             sem_key,
102 |             lambda x=image_dir, y=gt_dir: load_cityscapes_semantic(x, y))
103 |         MetadataCatalog.get(sem_key).set(image_dir=image_dir,
104 |                                          gt_dir=gt_dir,
105 |                                          evaluator_type="sem_seg",
106 |                                          **meta)
107 | 
108 | 
109 | # ==== Predefined splits for PASCAL VOC ===========
110 | def register_all_pascal_voc(root=osp.join(
111 |         osp.split(osp.split(dl_lib.__file__)[0])[0], "datasets")):
112 |     SPLITS = [
113 |         ("voc_2007_trainval", "VOC2007", "trainval"),
114 |         ("voc_2007_train", "VOC2007", "train"),
115 |         ("voc_2007_val", "VOC2007", "val"),
116 |         ("voc_2007_test", "VOC2007", "test"),
117 |         ("voc_2012_trainval", "VOC2012", "trainval"),
118 |         ("voc_2012_train", "VOC2012", "train"),
119 |         ("voc_2012_val", "VOC2012", "val"),
120 |     ]
121 |     for name, dirname, split in SPLITS:
122 |         year = 2007 if "2007" in name else 2012
123 |         register_pascal_voc(name, os.path.join(root, dirname), split, year)
124 |         MetadataCatalog.get(name).evaluator_type = "pascal_voc"
125 | 
126 | 
127 | # Register them all under "./datasets"
128 | register_all_coco()
129 | register_all_cityscapes()
130 | register_all_pascal_voc()
131 | 


--------------------------------------------------------------------------------
/dl_lib/data/datasets/pascal_voc.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
  3 | 
  4 | import os
  5 | import xml.etree.ElementTree as ET
  6 | 
  7 | import numpy as np
  8 | 
  9 | from dl_lib.data import DatasetCatalog, MetadataCatalog
 10 | from dl_lib.structures import BoxMode
 11 | from dl_lib.utils.file_io import PathManager
 12 | 
 13 | __all__ = ["register_pascal_voc"]
 14 | 
 15 | # fmt: off
 16 | CLASS_NAMES = [
 17 |     "aeroplane",
 18 |     "bicycle",
 19 |     "bird",
 20 |     "boat",
 21 |     "bottle",
 22 |     "bus",
 23 |     "car",
 24 |     "cat",
 25 |     "chair",
 26 |     "cow",
 27 |     "diningtable",
 28 |     "dog",
 29 |     "horse",
 30 |     "motorbike",
 31 |     "person",
 32 |     "pottedplant",
 33 |     "sheep",
 34 |     "sofa",
 35 |     "train",
 36 |     "tvmonitor",
 37 | ]
 38 | # fmt: on
 39 | 
 40 | 
 41 | def load_voc_instances(dirname: str, split: str):
 42 |     """
 43 |     Load Pascal VOC detection annotations to dl_lib format.
 44 | 
 45 |     Args:
 46 |         dirname: Contain "Annotations", "ImageSets", "JPEGImages"
 47 |         split (str): one of "train", "test", "val", "trainval"
 48 |     """
 49 |     with PathManager.open(
 50 |             os.path.join(dirname, "ImageSets", "Main", split + ".txt")) as f:
 51 |         fileids = np.loadtxt(f, dtype=np.str)
 52 | 
 53 |     dicts = []
 54 |     for fileid in fileids:
 55 |         anno_file = os.path.join(dirname, "Annotations", fileid + ".xml")
 56 |         jpeg_file = os.path.join(dirname, "JPEGImages", fileid + ".jpg")
 57 | 
 58 |         tree = ET.parse(anno_file)
 59 | 
 60 |         r = {
 61 |             "file_name": jpeg_file,
 62 |             "image_id": fileid,
 63 |             "height": int(tree.findall("./size/height")[0].text),
 64 |             "width": int(tree.findall("./size/width")[0].text),
 65 |         }
 66 |         instances = []
 67 | 
 68 |         for obj in tree.findall("object"):
 69 |             cls = obj.find("name").text
 70 |             # We include "difficult" samples in training.
 71 |             # Based on limited experiments, they don't hurt accuracy.
 72 |             # difficult = int(obj.find("difficult").text)
 73 |             # if difficult == 1:
 74 |             # continue
 75 |             bbox = obj.find("bndbox")
 76 |             bbox = [
 77 |                 float(bbox.find(x).text)
 78 |                 for x in ["xmin", "ymin", "xmax", "ymax"]
 79 |             ]
 80 |             # Original annotations are integers in the range [1, W or H]
 81 |             # Assuming they mean 1-based pixel indices (inclusive),
 82 |             # a box with annotation (xmin=1, xmax=W) covers the whole image.
 83 |             # In coordinate space this is represented by (xmin=0, xmax=W)
 84 |             bbox[0] -= 1.0
 85 |             bbox[1] -= 1.0
 86 |             instances.append({
 87 |                 "category_id": CLASS_NAMES.index(cls),
 88 |                 "bbox": bbox,
 89 |                 "bbox_mode": BoxMode.XYXY_ABS
 90 |             })
 91 |         r["annotations"] = instances
 92 |         dicts.append(r)
 93 |     return dicts
 94 | 
 95 | 
 96 | def register_pascal_voc(name, dirname, split, year):
 97 |     DatasetCatalog.register(name, lambda: load_voc_instances(dirname, split))
 98 |     MetadataCatalog.get(name).set(thing_classes=CLASS_NAMES,
 99 |                                   dirname=dirname,
100 |                                   year=year,
101 |                                   split=split)
102 | 


--------------------------------------------------------------------------------
/dl_lib/data/datasets/register_coco.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
  2 | import copy
  3 | 
  4 | from dl_lib.data import DatasetCatalog, MetadataCatalog
  5 | 
  6 | from .coco import load_coco_json, load_sem_seg
  7 | """
  8 | This file contains functions to register a COCO-format dataset to the DatasetCatalog.
  9 | """
 10 | 
 11 | __all__ = ["register_coco_instances", "register_coco_panoptic_separated"]
 12 | 
 13 | 
 14 | def register_coco_instances(name, metadata, json_file, image_root):
 15 |     """
 16 |     Register a dataset in COCO's json annotation format for
 17 |     instance detection, instance segmentation and keypoint detection.
 18 |     (i.e., Type 1 and 2 in http://cocodataset.org/#format-data.
 19 |     `instances*.json` and `person_keypoints*.json` in the dataset).
 20 | 
 21 |     This is an example of how to register a new dataset.
 22 |     You can do something similar to this function, to register new datasets.
 23 | 
 24 |     Args:
 25 |         name (str): the name that identifies a dataset, e.g. "coco_2014_train".
 26 |         metadata (dict): extra metadata associated with this dataset.  You can
 27 |             leave it as an empty dict.
 28 |         json_file (str): path to the json instance annotation file.
 29 |         image_root (str): directory which contains all the images.
 30 |     """
 31 |     # 1. register a function which returns dicts
 32 |     DatasetCatalog.register(
 33 |         name, lambda: load_coco_json(json_file, image_root, name))
 34 | 
 35 |     # 2. Optionally, add metadata about this dataset,
 36 |     # since they might be useful in evaluation, visualization or logging
 37 |     MetadataCatalog.get(name).set(json_file=json_file,
 38 |                                   image_root=image_root,
 39 |                                   evaluator_type="coco",
 40 |                                   **metadata)
 41 | 
 42 | 
 43 | def register_coco_panoptic_separated(name, metadata, image_root, panoptic_root,
 44 |                                      panoptic_json, sem_seg_root,
 45 |                                      instances_json):
 46 |     """
 47 |     Register a COCO panoptic segmentation dataset named `name`.
 48 |     The annotations in this registered dataset will contain both instance annotations and
 49 |     semantic annotations, each with its own contiguous ids. Hence it's called "separated".
 50 | 
 51 |     It follows the setting used by the PanopticFPN paper:
 52 | 
 53 |     1. The instance annotations directly come from polygons in the COCO
 54 |        instances annotation task, rather than from the masks in the COCO panoptic annotations.
 55 | 
 56 |        The two format have small differences:
 57 |        Polygons in the instance annotations may have overlaps.
 58 |        The mask annotations are produced by labeling the overlapped polygons
 59 |        with depth ordering.
 60 | 
 61 |     2. The semantic annotations are converted from panoptic annotations, where
 62 |        all "things" are assigned a semantic id of 0.
 63 |        All semantic categories will therefore have ids in contiguous
 64 |        range [1, #stuff_categories].
 65 | 
 66 |     This function will also register a pure semantic segmentation dataset
 67 |     named ``name + '_stuffonly'``.
 68 | 
 69 |     Args:
 70 |         name (str): the name that identifies a dataset,
 71 |             e.g. "coco_2017_train_panoptic"
 72 |         metadata (dict): extra metadata associated with this dataset.
 73 |         image_root (str): directory which contains all the images
 74 |         panoptic_root (str): directory which contains panoptic annotation images
 75 |         panoptic_json (str): path to the json panoptic annotation file
 76 |         sem_seg_root (str): directory which contains all the ground truth segmentation annotations.
 77 |         instances_json (str): path to the json instance annotation file
 78 |     """
 79 |     panoptic_name = name + "_separated"
 80 |     DatasetCatalog.register(
 81 |         panoptic_name,
 82 |         lambda: merge_to_panoptic(
 83 |             load_coco_json(instances_json, image_root, panoptic_name),
 84 |             load_sem_seg(sem_seg_root, image_root),
 85 |         ),
 86 |     )
 87 |     MetadataCatalog.get(panoptic_name).set(
 88 |         panoptic_root=panoptic_root,
 89 |         image_root=image_root,
 90 |         panoptic_json=panoptic_json,
 91 |         sem_seg_root=sem_seg_root,
 92 |         json_file=instances_json,  # TODO rename
 93 |         evaluator_type="coco_panoptic_seg",
 94 |         **metadata)
 95 | 
 96 |     semantic_name = name + "_stuffonly"
 97 |     DatasetCatalog.register(semantic_name,
 98 |                             lambda: load_sem_seg(sem_seg_root, image_root))
 99 |     MetadataCatalog.get(semantic_name).set(sem_seg_root=sem_seg_root,
100 |                                            image_root=image_root,
101 |                                            evaluator_type="sem_seg",
102 |                                            **metadata)
103 | 
104 | 
105 | def merge_to_panoptic(detection_dicts, sem_seg_dicts):
106 |     """
107 |     Create dataset dicts for panoptic segmentation, by
108 |     merging two dicts using "file_name" field to match their entries.
109 | 
110 |     Args:
111 |         detection_dicts (list[dict]): lists of dicts for object detection or instance segmentation.
112 |         sem_seg_dicts (list[dict]): lists of dicts for semantic segmentation.
113 | 
114 |     Returns:
115 |         list[dict] (one per input image): Each dict contains all (key, value) pairs from dicts in
116 |             both detection_dicts and sem_seg_dicts that correspond to the same image.
117 |             The function assumes that the same key in different dicts has the same value.
118 |     """
119 |     results = []
120 |     sem_seg_file_to_entry = {x["file_name"]: x for x in sem_seg_dicts}
121 |     assert len(sem_seg_file_to_entry) > 0
122 | 
123 |     for det_dict in detection_dicts:
124 |         dic = copy.copy(det_dict)
125 |         dic.update(sem_seg_file_to_entry[dic["file_name"]])
126 |         results.append(dic)
127 |     return results
128 | 


--------------------------------------------------------------------------------
/dl_lib/data/samplers/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | from .distributed_sampler import InferenceSampler, RepeatFactorTrainingSampler, TrainingSampler
 3 | from .grouped_batch_sampler import GroupedBatchSampler
 4 | 
 5 | __all__ = [
 6 |     "GroupedBatchSampler",
 7 |     "TrainingSampler",
 8 |     "InferenceSampler",
 9 |     "RepeatFactorTrainingSampler",
10 | ]
11 | 


--------------------------------------------------------------------------------
/dl_lib/data/samplers/grouped_batch_sampler.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | import numpy as np
 3 | from torch.utils.data.sampler import BatchSampler, Sampler
 4 | 
 5 | 
 6 | class GroupedBatchSampler(BatchSampler):
 7 |     """
 8 |     Wraps another sampler to yield a mini-batch of indices.
 9 |     It enforces that the batch only contain elements from the same group.
10 |     It also tries to provide mini-batches which follows an ordering which is
11 |     as close as possible to the ordering from the original sampler.
12 |     """
13 |     def __init__(self, sampler, group_ids, batch_size):
14 |         """
15 |         Args:
16 |             sampler (Sampler): Base sampler.
17 |             group_ids (list[int]): If the sampler produces indices in range [0, N),
18 |                 `group_ids` must be a list of `N` ints which contains the group id of each sample.
19 |                 The group ids must be a set of integers in the range [0, num_groups).
20 |             batch_size (int): Size of mini-batch.
21 |         """
22 |         if not isinstance(sampler, Sampler):
23 |             raise ValueError(
24 |                 "sampler should be an instance of "
25 |                 "torch.utils.data.Sampler, but got sampler={}".format(sampler))
26 |         self.sampler = sampler
27 |         self.group_ids = np.asarray(group_ids)
28 |         assert self.group_ids.ndim == 1
29 |         self.batch_size = batch_size
30 |         groups = np.unique(self.group_ids).tolist()
31 | 
32 |         # buffer the indices of each group until batch size is reached
33 |         self.buffer_per_group = {k: [] for k in groups}
34 | 
35 |     def __iter__(self):
36 |         for idx in self.sampler:
37 |             group_id = self.group_ids[idx]
38 |             group_buffer = self.buffer_per_group[group_id]
39 |             group_buffer.append(idx)
40 |             if len(group_buffer) == self.batch_size:
41 |                 yield group_buffer[:]  # yield a copy of the list
42 |                 del group_buffer[:]
43 | 
44 |     def __len__(self):
45 |         raise NotImplementedError(
46 |             "len() of GroupedBatchSampler is not well-defined.")
47 | 


--------------------------------------------------------------------------------
/dl_lib/data/transforms/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
2 | from .transform import *
3 | from .extend_transform import *
4 | from .transform_gen import *
5 | 
6 | __all__ = [k for k in globals().keys() if not k.startswith("_")]
7 | 


--------------------------------------------------------------------------------
/dl_lib/data/transforms/transform.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
  3 | # File: transform.py
  4 | 
  5 | import numpy as np
  6 | from PIL import Image
  7 | 
  8 | from .extend_transform import HFlipTransform, NoOpTransform, Transform
  9 | 
 10 | __all__ = ["ExtentTransform", "ResizeTransform"]
 11 | 
 12 | 
 13 | class ExtentTransform(Transform):
 14 |     """
 15 |     Extracts a subregion from the source image and scales it to the output size.
 16 | 
 17 |     The fill color is used to map pixels from the source rect that fall outside
 18 |     the source image.
 19 | 
 20 |     See: https://pillow.readthedocs.io/en/latest/PIL.html#PIL.ImageTransform.ExtentTransform
 21 |     """
 22 |     def __init__(self, src_rect, output_size, interp=Image.LINEAR, fill=0):
 23 |         """
 24 |         Args:
 25 |             src_rect (x0, y0, x1, y1): src coordinates
 26 |             output_size (h, w): dst image size
 27 |             interp: PIL interpolation methods
 28 |             fill: Fill color used when src_rect extends outside image
 29 |         """
 30 |         super().__init__()
 31 |         self._set_attributes(locals())
 32 | 
 33 |     def apply_image(self, img, interp=None):
 34 |         h, w = self.output_size
 35 |         ret = Image.fromarray(img).transform(
 36 |             size=(w, h),
 37 |             method=Image.EXTENT,
 38 |             data=self.src_rect,
 39 |             resample=interp if interp else self.interp,
 40 |             fill=self.fill,
 41 |         )
 42 |         return np.asarray(ret)
 43 | 
 44 |     def apply_coords(self, coords):
 45 |         # Transform image center from source coordinates into output coordinates
 46 |         # and then map the new origin to the corner of the output image.
 47 |         h, w = self.output_size
 48 |         x0, y0, x1, y1 = self.src_rect
 49 |         new_coords = coords.astype(np.float32)
 50 |         new_coords[:, 0] -= 0.5 * (x0 + x1)
 51 |         new_coords[:, 1] -= 0.5 * (y0 + y1)
 52 |         new_coords[:, 0] *= w / (x1 - x0)
 53 |         new_coords[:, 1] *= h / (y1 - y0)
 54 |         new_coords[:, 0] += 0.5 * w
 55 |         new_coords[:, 1] += 0.5 * h
 56 |         return new_coords
 57 | 
 58 |     def apply_segmentation(self, segmentation):
 59 |         segmentation = self.apply_image(segmentation, interp=Image.NEAREST)
 60 |         return segmentation
 61 | 
 62 | 
 63 | class ResizeTransform(Transform):
 64 |     """
 65 |     Resize the image to a target size.
 66 |     """
 67 |     def __init__(self, h, w, new_h, new_w, interp):
 68 |         """
 69 |         Args:
 70 |             h, w (int): original image size
 71 |             new_h, new_w (int): new image size
 72 |             interp: PIL interpolation methods
 73 |         """
 74 |         # TODO decide on PIL vs opencv
 75 |         super().__init__()
 76 |         self._set_attributes(locals())
 77 | 
 78 |     def apply_image(self, img, interp=None):
 79 |         assert img.shape[:2] == (self.h, self.w)
 80 |         pil_image = Image.fromarray(img)
 81 |         interp_method = interp if interp is not None else self.interp
 82 |         pil_image = pil_image.resize((self.new_w, self.new_h), interp_method)
 83 |         ret = np.asarray(pil_image)
 84 |         return ret
 85 | 
 86 |     def apply_coords(self, coords):
 87 |         coords[:, 0] = coords[:, 0] * (self.new_w * 1.0 / self.w)
 88 |         coords[:, 1] = coords[:, 1] * (self.new_h * 1.0 / self.h)
 89 |         return coords
 90 | 
 91 |     def apply_segmentation(self, segmentation):
 92 |         segmentation = self.apply_image(segmentation, interp=Image.NEAREST)
 93 |         return segmentation
 94 | 
 95 | 
 96 | def HFlip_rotated_box(transform, rotated_boxes):
 97 |     """
 98 |     Apply the horizontal flip transform on rotated boxes.
 99 | 
100 |     Args:
101 |         rotated_boxes (ndarray): Nx5 floating point array of
102 |             (x_center, y_center, width, height, angle_degrees) format
103 |             in absolute coordinates.
104 |     """
105 |     # Transform x_center
106 |     rotated_boxes[:, 0] = transform.width - rotated_boxes[:, 0]
107 |     # Transform angle
108 |     rotated_boxes[:, 4] = -rotated_boxes[:, 4]
109 |     return rotated_boxes
110 | 
111 | 
112 | def Resize_rotated_box(transform, rotated_boxes):
113 |     """
114 |     Apply the resizing transform on rotated boxes. For details of how these (approximation)
115 |     formulas are derived, please refer to :meth:`RotatedBoxes.scale`.
116 | 
117 |     Args:
118 |         rotated_boxes (ndarray): Nx5 floating point array of
119 |             (x_center, y_center, width, height, angle_degrees) format
120 |             in absolute coordinates.
121 |     """
122 |     scale_factor_x = transform.new_w * 1.0 / transform.w
123 |     scale_factor_y = transform.new_h * 1.0 / transform.h
124 |     rotated_boxes[:, 0] *= scale_factor_x
125 |     rotated_boxes[:, 1] *= scale_factor_y
126 |     theta = rotated_boxes[:, 4] * np.pi / 180.0
127 |     c = np.cos(theta)
128 |     s = np.sin(theta)
129 |     rotated_boxes[:, 2] *= np.sqrt(
130 |         np.square(scale_factor_x * c) + np.square(scale_factor_y * s))
131 |     rotated_boxes[:, 3] *= np.sqrt(
132 |         np.square(scale_factor_x * s) + np.square(scale_factor_y * c))
133 |     rotated_boxes[:, 4] = np.arctan2(scale_factor_x * s,
134 |                                      scale_factor_y * c) * 180 / np.pi
135 | 
136 |     return rotated_boxes
137 | 
138 | 
139 | HFlipTransform.register_type("rotated_box", HFlip_rotated_box)
140 | NoOpTransform.register_type("rotated_box", lambda t, x: x)
141 | ResizeTransform.register_type("rotated_box", Resize_rotated_box)
142 | 


--------------------------------------------------------------------------------
/dl_lib/data/transforms/transform_util.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 3 | 
 4 | import numpy as np
 5 | import torch
 6 | 
 7 | 
 8 | # pyre-ignore-all-errors
 9 | def to_float_tensor(numpy_array: np.ndarray) -> torch.Tensor:
10 |     """
11 |     Convert the numpy array to torch float tensor with dimension of NxCxHxW.
12 |     Pytorch is not fully supporting uint8, so convert tensor to float if the
13 |     numpy_array is uint8.
14 |     Args:
15 |         numpy_array (ndarray): of shape NxHxWxC, or HxWxC or HxW to
16 |             represent an image. The array can be of type uint8 in range
17 |             [0, 255], or floating point in range [0, 1] or [0, 255].
18 |     Returns:
19 |         float_tensor (tensor): converted float tensor.
20 |     """
21 |     assert isinstance(numpy_array, np.ndarray)
22 |     assert len(numpy_array.shape) in (2, 3, 4)
23 | 
24 |     # Some of the input numpy array has negative strides. Pytorch currently
25 |     # does not support negative strides, perform ascontiguousarray to
26 |     # resolve the issue.
27 |     float_tensor = torch.from_numpy(np.ascontiguousarray(numpy_array))
28 |     if numpy_array.dtype == np.uint8:
29 |         float_tensor = float_tensor.float()
30 | 
31 |     if len(numpy_array.shape) == 2:
32 |         # HxW -> 1x1xHxW.
33 |         float_tensor = float_tensor[None, None, :, :]
34 |     elif len(numpy_array.shape) == 3:
35 |         # HxWxC -> 1xCxHxW.
36 |         float_tensor = float_tensor.permute(2, 0, 1)
37 |         float_tensor = float_tensor[None, :, :, :]
38 |     elif len(numpy_array.shape) == 4:
39 |         # NxHxWxC -> NxCxHxW
40 |         float_tensor = float_tensor.permute(0, 3, 1, 2)
41 |     else:
42 |         raise NotImplementedError("Unknow numpy_array dimension of {}".format(
43 |             float_tensor.shape))
44 |     return float_tensor
45 | 
46 | 
47 | def to_numpy(float_tensor: torch.Tensor, target_shape: list,
48 |              target_dtype: np.dtype) -> np.ndarray:
49 |     """
50 |     Convert float tensor with dimension of NxCxHxW back to numpy array.
51 |     Args:
52 |         float_tensor (tensor): a float pytorch tensor with shape of NxCxHxW.
53 |         target_shape (list): the target shape of the numpy array to represent
54 |             the image as output. options include NxHxWxC, or HxWxC or HxW.
55 |         target_dtype (dtype): the target dtype of the numpy array to represent
56 |             the image as output. The array can be of type uint8 in range
57 |             [0, 255], or floating point in range [0, 1] or [0, 255].
58 |     Returns:
59 |         (ndarray): converted numpy array.
60 |     """
61 |     assert len(target_shape) in (2, 3, 4)
62 | 
63 |     if len(target_shape) == 2:
64 |         # 1x1xHxW -> HxW.
65 |         assert float_tensor.shape[0] == 1
66 |         assert float_tensor.shape[1] == 1
67 |         float_tensor = float_tensor[0, 0, :, :]
68 |     elif len(target_shape) == 3:
69 |         assert float_tensor.shape[0] == 1
70 |         # 1xCxHxW -> HxWxC.
71 |         float_tensor = float_tensor[0].permute(1, 2, 0)
72 |     elif len(target_shape) == 4:
73 |         # NxCxHxW -> NxHxWxC
74 |         float_tensor = float_tensor.permute(0, 2, 3, 1)
75 |     else:
76 |         raise NotImplementedError(
77 |             "Unknow target shape dimension of {}".format(target_shape))
78 |     if target_dtype == np.uint8:
79 |         # Need to specifically call round here, notice in pytroch the round
80 |         # is half to even.
81 |         # https://github.com/pytorch/pytorch/issues/16498
82 |         float_tensor = float_tensor.round().byte()
83 |     return float_tensor.numpy()
84 | 


--------------------------------------------------------------------------------
/dl_lib/engine/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | 
 3 | from .launch import *
 4 | from .train_loop import *
 5 | 
 6 | __all__ = [k for k in globals().keys() if not k.startswith("_")]
 7 | 
 8 | # prefer to let hooks and defaults live in separate namespaces (therefore not in __all__)
 9 | # but still make them available here
10 | from .hooks import *
11 | from .defaults import *
12 | from .custom import *
13 | 


--------------------------------------------------------------------------------
/dl_lib/engine/launch.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | import logging
 3 | 
 4 | import torch
 5 | import torch.distributed as dist
 6 | import torch.multiprocessing as mp
 7 | 
 8 | from dl_lib.utils import comm
 9 | 
10 | __all__ = ["launch"]
11 | 
12 | 
13 | def _find_free_port():
14 |     import socket
15 | 
16 |     sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
17 |     # Binding to port 0 will cause the OS to find an available port for us
18 |     sock.bind(("", 0))
19 |     port = sock.getsockname()[1]
20 |     sock.close()
21 |     # NOTE: there is still a chance the port could be taken by other processes.
22 |     return port
23 | 
24 | 
25 | def launch(main_func,
26 |            num_gpus_per_machine,
27 |            num_machines=1,
28 |            machine_rank=0,
29 |            dist_url=None,
30 |            args=()):
31 |     """
32 |     Args:
33 |         main_func: a function that will be called by `main_func(*args)`
34 |         num_machines (int): the total number of machines
35 |         machine_rank (int): the rank of this machine (one per machine)
36 |         dist_url (str): url to connect to for distributed training, including protocol
37 |                        e.g. "tcp://127.0.0.1:8686".
38 |                        Can be set to auto to automatically select a free port on localhost
39 |         args (tuple): arguments passed to main_func
40 |     """
41 |     world_size = num_machines * num_gpus_per_machine
42 |     if world_size > 1:
43 |         # https://github.com/pytorch/pytorch/pull/14391
44 |         # TODO prctl in spawned processes
45 | 
46 |         if dist_url == "auto":
47 |             assert num_machines == 1, "dist_url=auto cannot work with distributed training."
48 |             port = _find_free_port()
49 |             dist_url = f"tcp://127.0.0.1:{port}"
50 | 
51 |         mp.spawn(
52 |             _distributed_worker,
53 |             nprocs=num_gpus_per_machine,
54 |             args=(main_func, world_size, num_gpus_per_machine, machine_rank,
55 |                   dist_url, args),
56 |             daemon=False,
57 |         )
58 |     else:
59 |         main_func(*args)
60 | 
61 | 
62 | def _distributed_worker(local_rank, main_func, world_size,
63 |                         num_gpus_per_machine, machine_rank, dist_url, args):
64 |     assert torch.cuda.is_available(
65 |     ), "cuda is not available. Please check your installation."
66 |     global_rank = machine_rank * num_gpus_per_machine + local_rank
67 |     try:
68 |         dist.init_process_group(backend="NCCL",
69 |                                 init_method=dist_url,
70 |                                 world_size=world_size,
71 |                                 rank=global_rank)
72 |     except Exception as e:
73 |         logger = logging.getLogger(__name__)
74 |         logger.error("Process group URL: {}".format(dist_url))
75 |         raise e
76 |     # synchronize is needed here to prevent a possible timeout after calling init_process_group
77 |     # See: https://github.com/facebookresearch/maskrcnn-benchmark/issues/172
78 |     comm.synchronize()
79 | 
80 |     assert num_gpus_per_machine <= torch.cuda.device_count()
81 |     torch.cuda.set_device(local_rank)
82 | 
83 |     # Setup the local process group (which contains ranks within the same machine)
84 |     assert comm._LOCAL_PROCESS_GROUP is None
85 |     num_machines = world_size // num_gpus_per_machine
86 |     for i in range(num_machines):
87 |         ranks_on_i = list(
88 |             range(i * num_gpus_per_machine, (i + 1) * num_gpus_per_machine))
89 |         pg = dist.new_group(ranks_on_i)
90 |         if i == machine_rank:
91 |             comm._LOCAL_PROCESS_GROUP = pg
92 | 
93 |     main_func(*args)
94 | 


--------------------------------------------------------------------------------
/dl_lib/evaluation/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
2 | from .cityscapes_evaluation import CityscapesEvaluator
3 | from .evaluator import DatasetEvaluator, DatasetEvaluators, inference_context, inference_on_dataset
4 | from .pascal_voc_evaluation import PascalVOCDetectionEvaluator
5 | from .sem_seg_evaluation import SemSegEvaluator
6 | from .testing import print_csv_format, verify_results
7 | 
8 | __all__ = [k for k in globals().keys() if not k.startswith("_")]
9 | 


--------------------------------------------------------------------------------
/dl_lib/evaluation/cityscapes_evaluation.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
  2 | import glob
  3 | import logging
  4 | import os
  5 | import tempfile
  6 | from collections import OrderedDict
  7 | 
  8 | import torch
  9 | from PIL import Image
 10 | 
 11 | from dl_lib.data import MetadataCatalog
 12 | from dl_lib.utils import comm
 13 | 
 14 | from .evaluator import DatasetEvaluator
 15 | 
 16 | 
 17 | class CityscapesEvaluator(DatasetEvaluator):
 18 |     """
 19 |     Evaluate instance segmentation results using cityscapes API.
 20 | 
 21 |     Note:
 22 |         * It does not work in multi-machine distributed training.
 23 |         * It contains a synchronization, therefore has to be used on all ranks.
 24 |     """
 25 |     def __init__(self, dataset_name):
 26 |         """
 27 |         Args:
 28 |             dataset_name (str): the name of the dataset.
 29 |                 It must have the following metadata associated with it:
 30 |                 "thing_classes", "gt_dir".
 31 |         """
 32 |         self._metadata = MetadataCatalog.get(dataset_name)
 33 |         self._cpu_device = torch.device("cpu")
 34 |         self._logger = logging.getLogger(__name__)
 35 | 
 36 |     def reset(self):
 37 |         self._working_dir = tempfile.TemporaryDirectory(
 38 |             prefix="cityscapes_eval_")
 39 |         self._temp_dir = self._working_dir.name
 40 |         # All workers will write to the same results directory
 41 |         # TODO this does not work in distributed training
 42 |         self._temp_dir = comm.all_gather(self._temp_dir)[0]
 43 |         if self._temp_dir != self._working_dir.name:
 44 |             self._working_dir.cleanup()
 45 |         self._logger.info(
 46 |             "Writing cityscapes results to temporary directory {} ...".format(
 47 |                 self._temp_dir))
 48 | 
 49 |     def process(self, inputs, outputs):
 50 |         from cityscapesscripts.helpers.labels import name2label
 51 | 
 52 |         for input, output in zip(inputs, outputs):
 53 |             file_name = input["file_name"]
 54 |             basename = os.path.splitext(os.path.basename(file_name))[0]
 55 |             pred_txt = os.path.join(self._temp_dir, basename + "_pred.txt")
 56 | 
 57 |             output = output["instances"].to(self._cpu_device)
 58 |             num_instances = len(output)
 59 |             with open(pred_txt, "w") as fout:
 60 |                 for i in range(num_instances):
 61 |                     pred_class = output.pred_classes[i]
 62 |                     classes = self._metadata.thing_classes[pred_class]
 63 |                     class_id = name2label[classes].id
 64 |                     score = output.scores[i]
 65 |                     mask = output.pred_masks[i].numpy().astype("uint8")
 66 |                     png_filename = os.path.join(
 67 |                         self._temp_dir,
 68 |                         basename + "_{}_{}.png".format(i, classes))
 69 | 
 70 |                     Image.fromarray(mask * 255).save(png_filename)
 71 |                     fout.write("{} {} {}\n".format(
 72 |                         os.path.basename(png_filename), class_id, score))
 73 | 
 74 |     def evaluate(self):
 75 |         """
 76 |         Returns:
 77 |             dict: has a key "segm", whose value is a dict of "AP" and "AP50".
 78 |         """
 79 |         comm.synchronize()
 80 |         if comm.get_rank() > 0:
 81 |             return
 82 |         os.environ["CITYSCAPES_DATASET"] = os.path.abspath(
 83 |             os.path.join(self._metadata.gt_dir, "..", ".."))
 84 |         # Load the Cityscapes eval script *after* setting the required env var,
 85 |         # since the script reads CITYSCAPES_DATASET into global variables at load time.
 86 |         import cityscapesscripts.evaluation.evalInstanceLevelSemanticLabeling as cityscapes_eval
 87 | 
 88 |         self._logger.info("Evaluating results under {} ...".format(
 89 |             self._temp_dir))
 90 | 
 91 |         # set some global states in cityscapes evaluation API, before evaluating
 92 |         cityscapes_eval.args.predictionPath = os.path.abspath(self._temp_dir)
 93 |         cityscapes_eval.args.predictionWalk = None
 94 |         cityscapes_eval.args.JSONOutput = False
 95 |         cityscapes_eval.args.colorized = False
 96 |         cityscapes_eval.args.gtInstancesFile = os.path.join(
 97 |             self._temp_dir, "gtInstances.json")
 98 | 
 99 |         # These lines are adopted from
100 |         # https://github.com/mcordts/cityscapesScripts/blob/master/cityscapesscripts/evaluation/evalInstanceLevelSemanticLabeling.py # noqa
101 |         groundTruthImgList = glob.glob(cityscapes_eval.args.groundTruthSearch)
102 |         assert len(
103 |             groundTruthImgList
104 |         ), "Cannot find any ground truth images to use for evaluation. Searched for: {}".format(
105 |             cityscapes_eval.args.groundTruthSearch)
106 |         predictionImgList = []
107 |         for gt in groundTruthImgList:
108 |             predictionImgList.append(
109 |                 cityscapes_eval.getPrediction(gt, cityscapes_eval.args))
110 |         results = cityscapes_eval.evaluateImgLists(
111 |             predictionImgList, groundTruthImgList,
112 |             cityscapes_eval.args)["averages"]
113 | 
114 |         ret = OrderedDict()
115 |         ret["segm"] = {
116 |             "AP": results["allAp"] * 100,
117 |             "AP50": results["allAp50%"] * 100
118 |         }
119 |         self._working_dir.cleanup()
120 |         return ret
121 | 


--------------------------------------------------------------------------------
/dl_lib/evaluation/evaluator.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
  2 | import datetime
  3 | import logging
  4 | import time
  5 | from collections import OrderedDict
  6 | from contextlib import contextmanager
  7 | 
  8 | import torch
  9 | 
 10 | from dl_lib.utils.comm import is_main_process
 11 | 
 12 | 
 13 | class DatasetEvaluator:
 14 |     """
 15 |     Base class for a dataset evaluator.
 16 | 
 17 |     The function :func:`inference_on_dataset` runs the model over
 18 |     all samples in the dataset, and have a DatasetEvaluator to process the inputs/outputs.
 19 | 
 20 |     This class will accumulate information of the inputs/outputs (by :meth:`process`),
 21 |     and produce evaluation results in the end (by :meth:`evaluate`).
 22 |     """
 23 |     def reset(self):
 24 |         """
 25 |         Preparation for a new round of evaluation.
 26 |         Should be called before starting a round of evaluation.
 27 |         """
 28 |         pass
 29 | 
 30 |     def process(self, input, output):
 31 |         """
 32 |         Process an input/output pair.
 33 | 
 34 |         Args:
 35 |             input: the input that's used to call the model.
 36 |             output: the return value of `model(output)`
 37 |         """
 38 |         pass
 39 | 
 40 |     def evaluate(self):
 41 |         """
 42 |         Evaluate/summarize the performance, after processing all input/output pairs.
 43 | 
 44 |         Returns:
 45 |             dict:
 46 |                 A new evaluator class can return a dict of arbitrary format
 47 |                 as long as the user can process the results.
 48 |                 In our train_net.py, we expect the following format:
 49 | 
 50 |                 * key: the name of the task (e.g., bbox)
 51 |                 * value: a dict of {metric name: score}, e.g.: {"AP50": 80}
 52 |         """
 53 |         pass
 54 | 
 55 | 
 56 | class DatasetEvaluators(DatasetEvaluator):
 57 |     def __init__(self, evaluators):
 58 |         assert len(evaluators)
 59 |         super().__init__()
 60 |         self._evaluators = evaluators
 61 | 
 62 |     def reset(self):
 63 |         for evaluator in self._evaluators:
 64 |             evaluator.reset()
 65 | 
 66 |     def process(self, input, output):
 67 |         for evaluator in self._evaluators:
 68 |             evaluator.process(input, output)
 69 | 
 70 |     def evaluate(self):
 71 |         results = OrderedDict()
 72 |         for evaluator in self._evaluators:
 73 |             result = evaluator.evaluate()
 74 |             if is_main_process():
 75 |                 for k, v in result.items():
 76 |                     assert (
 77 |                         k not in results
 78 |                     ), "Different evaluators produce results with the same key {}".format(
 79 |                         k)
 80 |                     results[k] = v
 81 |         return results
 82 | 
 83 | 
 84 | def inference_on_dataset(model, data_loader, evaluator):
 85 |     """
 86 |     Run model on the data_loader and evaluate the metrics with evaluator.
 87 |     The model will be used in eval mode.
 88 | 
 89 |     Args:
 90 |         model (nn.Module): a module which accepts an object from
 91 |             `data_loader` and returns some outputs. It will be temporarily set to `eval` mode.
 92 | 
 93 |             If you wish to evaluate a model in `training` mode instead, you can
 94 |             wrap the given model and override its behavior of `.eval()` and `.train()`.
 95 |         data_loader: an iterable object with a length.
 96 |             The elements it generates will be the inputs to the model.
 97 |         evaluator (DatasetEvaluator): the evaluator to run. Use
 98 |             :class:`DatasetEvaluators([])` if you only want to benchmark, but
 99 |             don't want to do any evaluation.
100 | 
101 |     Returns:
102 |         The return value of `evaluator.evaluate()`
103 |     """
104 |     num_devices = torch.distributed.get_world_size(
105 |     ) if torch.distributed.is_initialized() else 1
106 |     logger = logging.getLogger(__name__)
107 |     logger.info("Start inference on {} images".format(len(data_loader)))
108 | 
109 |     total = len(data_loader)  # inference data loader must have a fixed length
110 |     evaluator.reset()
111 | 
112 |     logging_interval = 50
113 |     num_warmup = min(5, logging_interval - 1, total - 1)
114 |     start_time = time.time()
115 |     total_compute_time = 0
116 |     with inference_context(model), torch.no_grad():
117 |         for idx, inputs in enumerate(data_loader):
118 |             if idx == num_warmup:
119 |                 start_time = time.time()
120 |                 total_compute_time = 0
121 | 
122 |             start_compute_time = time.time()
123 |             outputs = model(inputs)
124 |             if torch.cuda.is_available():
125 |                 torch.cuda.synchronize()
126 |             total_compute_time += time.time() - start_compute_time
127 |             evaluator.process(inputs, outputs)
128 | 
129 |             if (idx + 1) % logging_interval == 0:
130 |                 duration = time.time() - start_time
131 |                 seconds_per_img = duration / (idx + 1 - num_warmup)
132 |                 eta = datetime.timedelta(seconds=int(seconds_per_img *
133 |                                                      (total - num_warmup) -
134 |                                                      duration))
135 |                 logger.info(
136 |                     "Inference done {}/{}. {:.4f} s / img. ETA={}".format(
137 |                         idx + 1, total, seconds_per_img, str(eta)))
138 | 
139 |     # Measure the time only for this worker (before the synchronization barrier)
140 |     total_time = int(time.time() - start_time)
141 |     total_time_str = str(datetime.timedelta(seconds=total_time))
142 |     # NOTE this format is parsed by grep
143 |     logger.info(
144 |         "Total inference time: {} ({:.6f} s / img per device, on {} devices)".
145 |         format(total_time_str, total_time / (total - num_warmup), num_devices))
146 |     total_compute_time_str = str(
147 |         datetime.timedelta(seconds=int(total_compute_time)))
148 |     logger.info(
149 |         "Total inference pure compute time: {} ({:.6f} s / img per device, on {} devices)"
150 |         .format(total_compute_time_str,
151 |                 total_compute_time / (total - num_warmup), num_devices))
152 | 
153 |     results = evaluator.evaluate()
154 |     # An evaluator may return None when not in main process.
155 |     # Replace it by an empty dict instead to make it easier for downstream code to handle
156 |     if results is None:
157 |         results = {}
158 |     return results
159 | 
160 | 
161 | @contextmanager
162 | def inference_context(model):
163 |     """
164 |     A context where the model is temporarily changed to eval mode,
165 |     and restored to previous mode afterwards.
166 | 
167 |     Args:
168 |         model: a torch Module
169 |     """
170 |     training_mode = model.training
171 |     model.eval()
172 |     yield
173 |     model.train(training_mode)
174 | 


--------------------------------------------------------------------------------
/dl_lib/evaluation/testing.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | import logging
 3 | import pprint
 4 | import sys
 5 | from collections import Mapping, OrderedDict
 6 | 
 7 | import numpy as np
 8 | 
 9 | 
10 | def print_csv_format(results):
11 |     """
12 |     Print main metrics in a format similar to Detectron,
13 |     so that they are easy to copypaste into a spreadsheet.
14 | 
15 |     Args:
16 |         results (OrderedDict[dict]): task_name -> {metric -> score}
17 |     """
18 |     assert isinstance(
19 |         results,
20 |         OrderedDict), results  # unordered results cannot be properly printed
21 |     logger = logging.getLogger(__name__)
22 |     for task, res in results.items():
23 |         # Don't print "AP-category" metrics since they are usually not tracked.
24 |         important_res = [(k, v) for k, v in res.items() if "-" not in k]
25 |         logger.info("copypaste: Task: {}".format(task))
26 |         logger.info("copypaste: " + ",".join([k[0] for k in important_res]))
27 |         logger.info("copypaste: " +
28 |                     ",".join(["{0:.4f}".format(k[1]) for k in important_res]))
29 | 
30 | 
31 | def verify_results(cfg, results):
32 |     """
33 |     Args:
34 |         results (OrderedDict[dict]): task_name -> {metric -> score}
35 | 
36 |     Returns:
37 |         bool: whether the verification succeeds or not
38 |     """
39 |     expected_results = cfg.TEST.EXPECTED_RESULTS
40 |     if not len(expected_results):
41 |         return True
42 | 
43 |     ok = True
44 |     for task, metric, expected, tolerance in expected_results:
45 |         actual = results[task][metric]
46 |         if not np.isfinite(actual):
47 |             ok = False
48 |         diff = abs(actual - expected)
49 |         if diff > tolerance:
50 |             ok = False
51 | 
52 |     logger = logging.getLogger(__name__)
53 |     if not ok:
54 |         logger.error("Result verification failed!")
55 |         logger.error("Expected Results: " + str(expected_results))
56 |         logger.error("Actual Results: " + pprint.pformat(results))
57 | 
58 |         sys.exit(1)
59 |     else:
60 |         logger.info("Results verification passed.")
61 |     return ok
62 | 
63 | 
64 | def flatten_results_dict(results):
65 |     """
66 |     Expand a hierarchical dict of scalars into a flat dict of scalars.
67 |     If results[k1][k2][k3] = v, the returned dict will have the entry
68 |     {"k1/k2/k3": v}.
69 | 
70 |     Args:
71 |         results (dict):
72 |     """
73 |     r = {}
74 |     for k, v in results.items():
75 |         if isinstance(v, Mapping):
76 |             v = flatten_results_dict(v)
77 |             for kk, vv in v.items():
78 |                 r[k + "/" + kk] = vv
79 |         else:
80 |             r[k] = v
81 |     return r
82 | 


--------------------------------------------------------------------------------
/dl_lib/layers/ROIAlign/ROIAlign.h:
--------------------------------------------------------------------------------
  1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
  2 | #pragma once
  3 | #include <torch/types.h>
  4 | 
  5 | namespace dl_lib {
  6 | 
  7 | at::Tensor ROIAlign_forward_cpu(
  8 |     const at::Tensor& input,
  9 |     const at::Tensor& rois,
 10 |     const float spatial_scale,
 11 |     const int pooled_height,
 12 |     const int pooled_width,
 13 |     const int sampling_ratio,
 14 |     bool aligned);
 15 | 
 16 | at::Tensor ROIAlign_backward_cpu(
 17 |     const at::Tensor& grad,
 18 |     const at::Tensor& rois,
 19 |     const float spatial_scale,
 20 |     const int pooled_height,
 21 |     const int pooled_width,
 22 |     const int batch_size,
 23 |     const int channels,
 24 |     const int height,
 25 |     const int width,
 26 |     const int sampling_ratio,
 27 |     bool aligned);
 28 | 
 29 | #ifdef WITH_CUDA
 30 | at::Tensor ROIAlign_forward_cuda(
 31 |     const at::Tensor& input,
 32 |     const at::Tensor& rois,
 33 |     const float spatial_scale,
 34 |     const int pooled_height,
 35 |     const int pooled_width,
 36 |     const int sampling_ratio,
 37 |     bool aligned);
 38 | 
 39 | at::Tensor ROIAlign_backward_cuda(
 40 |     const at::Tensor& grad,
 41 |     const at::Tensor& rois,
 42 |     const float spatial_scale,
 43 |     const int pooled_height,
 44 |     const int pooled_width,
 45 |     const int batch_size,
 46 |     const int channels,
 47 |     const int height,
 48 |     const int width,
 49 |     const int sampling_ratio,
 50 |     bool aligned);
 51 | #endif
 52 | 
 53 | // Interface for Python
 54 | inline at::Tensor ROIAlign_forward(
 55 |     const at::Tensor& input,
 56 |     const at::Tensor& rois,
 57 |     const float spatial_scale,
 58 |     const int pooled_height,
 59 |     const int pooled_width,
 60 |     const int sampling_ratio,
 61 |     bool aligned) {
 62 |   if (input.type().is_cuda()) {
 63 | #ifdef WITH_CUDA
 64 |     return ROIAlign_forward_cuda(
 65 |         input,
 66 |         rois,
 67 |         spatial_scale,
 68 |         pooled_height,
 69 |         pooled_width,
 70 |         sampling_ratio,
 71 |         aligned);
 72 | #else
 73 |     AT_ERROR("Not compiled with GPU support");
 74 | #endif
 75 |   }
 76 |   return ROIAlign_forward_cpu(
 77 |       input,
 78 |       rois,
 79 |       spatial_scale,
 80 |       pooled_height,
 81 |       pooled_width,
 82 |       sampling_ratio,
 83 |       aligned);
 84 | }
 85 | 
 86 | inline at::Tensor ROIAlign_backward(
 87 |     const at::Tensor& grad,
 88 |     const at::Tensor& rois,
 89 |     const float spatial_scale,
 90 |     const int pooled_height,
 91 |     const int pooled_width,
 92 |     const int batch_size,
 93 |     const int channels,
 94 |     const int height,
 95 |     const int width,
 96 |     const int sampling_ratio,
 97 |     bool aligned) {
 98 |   if (grad.type().is_cuda()) {
 99 | #ifdef WITH_CUDA
100 |     return ROIAlign_backward_cuda(
101 |         grad,
102 |         rois,
103 |         spatial_scale,
104 |         pooled_height,
105 |         pooled_width,
106 |         batch_size,
107 |         channels,
108 |         height,
109 |         width,
110 |         sampling_ratio,
111 |         aligned);
112 | #else
113 |     AT_ERROR("Not compiled with GPU support");
114 | #endif
115 |   }
116 |   return ROIAlign_backward_cpu(
117 |       grad,
118 |       rois,
119 |       spatial_scale,
120 |       pooled_height,
121 |       pooled_width,
122 |       batch_size,
123 |       channels,
124 |       height,
125 |       width,
126 |       sampling_ratio,
127 |       aligned);
128 | }
129 | 
130 | } // namespace dl_lib
131 | 


--------------------------------------------------------------------------------
/dl_lib/layers/ROIAlign/roi_align.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
  2 | from torch import nn
  3 | from torch.autograd import Function
  4 | from torch.autograd.function import once_differentiable
  5 | from torch.nn.modules.utils import _pair
  6 | 
  7 | from dl_lib import _C
  8 | 
  9 | 
 10 | class _ROIAlign(Function):
 11 |     @staticmethod
 12 |     def forward(ctx, input, roi, output_size, spatial_scale, sampling_ratio,
 13 |                 aligned):
 14 |         ctx.save_for_backward(roi)
 15 |         ctx.output_size = _pair(output_size)
 16 |         ctx.spatial_scale = spatial_scale
 17 |         ctx.sampling_ratio = sampling_ratio
 18 |         ctx.input_shape = input.size()
 19 |         ctx.aligned = aligned
 20 |         output = _C.roi_align_forward(input, roi, spatial_scale,
 21 |                                       output_size[0], output_size[1],
 22 |                                       sampling_ratio, aligned)
 23 |         return output
 24 | 
 25 |     @staticmethod
 26 |     @once_differentiable
 27 |     def backward(ctx, grad_output):
 28 |         rois, = ctx.saved_tensors
 29 |         output_size = ctx.output_size
 30 |         spatial_scale = ctx.spatial_scale
 31 |         sampling_ratio = ctx.sampling_ratio
 32 |         bs, ch, h, w = ctx.input_shape
 33 |         grad_input = _C.roi_align_backward(
 34 |             grad_output,
 35 |             rois,
 36 |             spatial_scale,
 37 |             output_size[0],
 38 |             output_size[1],
 39 |             bs,
 40 |             ch,
 41 |             h,
 42 |             w,
 43 |             sampling_ratio,
 44 |             ctx.aligned,
 45 |         )
 46 |         return grad_input, None, None, None, None, None
 47 | 
 48 | 
 49 | roi_align = _ROIAlign.apply
 50 | 
 51 | 
 52 | class ROIAlign(nn.Module):
 53 |     def __init__(self,
 54 |                  output_size,
 55 |                  spatial_scale,
 56 |                  sampling_ratio,
 57 |                  aligned=True):
 58 |         """
 59 |         Args:
 60 |             output_size (tuple): h, w
 61 |             spatial_scale (float): scale the input boxes by this number
 62 |             sampling_ratio (int): number of inputs samples to take for each output
 63 |                 sample. 0 to take samples densely.
 64 |             aligned (bool): if False, use the legacy implementation in
 65 |                 Detectron. If True, align the results more perfectly.
 66 | 
 67 |         Note:
 68 |             The meaning of aligned=True:
 69 | 
 70 |             Given a continuous coordinate c, its two neighboring pixel indices (in our
 71 |             pixel model) are computed by floor(c - 0.5) and ceil(c - 0.5). For example,
 72 |             c=1.3 has pixel neighbors with discrete indices [0] and [1] (which are sampled
 73 |             from the underlying signal at continuous coordinates 0.5 and 1.5). But the original
 74 |             roi_align (aligned=False) does not subtract the 0.5 when computing neighboring
 75 |             pixel indices and therefore it uses pixels with a slightly incorrect alignment
 76 |             (relative to our pixel model) when performing bilinear interpolation.
 77 | 
 78 |             With `aligned=True`,
 79 |             we first appropriately scale the ROI and then shift it by -0.5
 80 |             prior to calling roi_align. This produces the correct neighbors;
 81 | 
 82 |             The difference does not make a difference to the model's performance if
 83 |             ROIAlign is used together with conv layers.
 84 |         """
 85 |         super(ROIAlign, self).__init__()
 86 |         self.output_size = output_size
 87 |         self.spatial_scale = spatial_scale
 88 |         self.sampling_ratio = sampling_ratio
 89 |         self.aligned = aligned
 90 | 
 91 |     def forward(self, input, rois):
 92 |         """
 93 |         Args:
 94 |             input: NCHW images
 95 |             rois: Bx5 boxes. First column is the index into N. The other 4 columns are xyxy.
 96 |         """
 97 |         assert rois.dim() == 2 and rois.size(1) == 5
 98 |         return roi_align(input, rois, self.output_size, self.spatial_scale,
 99 |                          self.sampling_ratio, self.aligned)
100 | 
101 |     def __repr__(self):
102 |         tmpstr = self.__class__.__name__ + "("
103 |         tmpstr += "output_size=" + str(self.output_size)
104 |         tmpstr += ", spatial_scale=" + str(self.spatial_scale)
105 |         tmpstr += ", sampling_ratio=" + str(self.sampling_ratio)
106 |         tmpstr += ", aligned=" + str(self.aligned)
107 |         tmpstr += ")"
108 |         return tmpstr
109 | 


--------------------------------------------------------------------------------
/dl_lib/layers/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | from .batch_norm import FrozenBatchNorm2d, get_norm, NaiveSyncBatchNorm
 3 | from .deformable.deform_conv import DeformConv, ModulatedDeformConv
 4 | from .deformable.deform_conv_with_off import (DeformConvWithOff,
 5 |                                               ModulatedDeformConvWithOff)
 6 | from .ROIAlign.roi_align import ROIAlign, roi_align
 7 | from .shape_spec import ShapeSpec
 8 | from .wrappers import BatchNorm2d, Conv2d, ConvTranspose2d, cat, interpolate
 9 | 
10 | __all__ = [k for k in globals().keys() if not k.startswith("_")]
11 | 


--------------------------------------------------------------------------------
/dl_lib/layers/deformable/deform_conv_with_off.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python3
 2 | # -*- coding:utf-8 -*-
 3 | import torch
 4 | import torch.nn as nn
 5 | 
 6 | from .deform_conv import DeformConv, ModulatedDeformConv
 7 | 
 8 | 
 9 | class DeformConvWithOff(nn.Module):
10 |     def __init__(self,
11 |                  in_channels,
12 |                  out_channels,
13 |                  kernel_size=3,
14 |                  stride=1,
15 |                  padding=1,
16 |                  dilation=1,
17 |                  deformable_groups=1):
18 |         super(DeformConvWithOff, self).__init__()
19 |         self.offset_conv = nn.Conv2d(
20 |             in_channels,
21 |             deformable_groups * 2 * kernel_size * kernel_size,
22 |             kernel_size=kernel_size,
23 |             stride=stride,
24 |             padding=padding,
25 |         )
26 |         self.dcn = DeformConv(
27 |             in_channels,
28 |             out_channels,
29 |             kernel_size=kernel_size,
30 |             stride=stride,
31 |             padding=padding,
32 |             dilation=dilation,
33 |             deformable_groups=deformable_groups,
34 |         )
35 | 
36 |     def forward(self, input):
37 |         offset = self.offset_conv(input)
38 |         output = self.dcn(input, offset)
39 |         return output
40 | 
41 | 
42 | class ModulatedDeformConvWithOff(nn.Module):
43 |     def __init__(self,
44 |                  in_channels,
45 |                  out_channels,
46 |                  kernel_size=3,
47 |                  stride=1,
48 |                  padding=1,
49 |                  dilation=1,
50 |                  deformable_groups=1):
51 |         super(ModulatedDeformConvWithOff, self).__init__()
52 |         self.offset_mask_conv = nn.Conv2d(
53 |             in_channels,
54 |             deformable_groups * 3 * kernel_size * kernel_size,
55 |             kernel_size=kernel_size,
56 |             stride=stride,
57 |             padding=padding,
58 |         )
59 |         self.dcnv2 = ModulatedDeformConv(
60 |             in_channels,
61 |             out_channels,
62 |             kernel_size=kernel_size,
63 |             stride=stride,
64 |             padding=padding,
65 |             dilation=dilation,
66 |             deformable_groups=deformable_groups,
67 |         )
68 | 
69 |     def forward(self, input):
70 |         x = self.offset_mask_conv(input)
71 |         o1, o2, mask = torch.chunk(x, 3, dim=1)
72 |         offset = torch.cat((o1, o2), dim=1)
73 |         mask = torch.sigmoid(mask)
74 |         output = self.dcnv2(input, offset, mask)
75 |         return output
76 | 


--------------------------------------------------------------------------------
/dl_lib/layers/shape_spec.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 3 | from collections import namedtuple
 4 | 
 5 | 
 6 | class ShapeSpec(
 7 |         namedtuple("_ShapeSpec", ["channels", "height", "width", "stride"])):
 8 |     """
 9 |     A simple structure that contains basic shape specification about a tensor.
10 |     It is often used as the auxiliary inputs/outputs of models,
11 |     to obtain the shape inference ability among pytorch modules.
12 | 
13 |     Attributes:
14 |         channels:
15 |         height:
16 |         width:
17 |         stride:
18 |     """
19 |     def __new__(cls, *, channels=None, height=None, width=None, stride=None):
20 |         return super().__new__(cls, channels, height, width, stride)
21 | 


--------------------------------------------------------------------------------
/dl_lib/layers/vision.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | // Modified by Yanwei Li
 3 | 
 4 | #include <cuda_runtime_api.h>
 5 | #include <torch/types.h>
 6 | #include <torch/extension.h>
 7 | #include "ROIAlign/ROIAlign.h"
 8 | #include "deformable/deform_conv.h"
 9 | 
10 | namespace dl_lib {
11 | 
12 | #ifdef WITH_CUDA
13 | int get_cudart_version() {
14 |   return CUDART_VERSION;
15 | }
16 | #endif
17 | 
18 | std::string get_cuda_version() {
19 | #ifdef WITH_CUDA
20 |   std::ostringstream oss;
21 | 
22 |   // copied from
23 |   // https://github.com/pytorch/pytorch/blob/master/aten/src/ATen/cuda/detail/CUDAHooks.cpp#L231
24 |   auto printCudaStyleVersion = [&](int v) {
25 |     oss << (v / 1000) << "." << (v / 10 % 100);
26 |     if (v % 10 != 0) {
27 |       oss << "." << (v % 10);
28 |     }
29 |   };
30 |   printCudaStyleVersion(get_cudart_version());
31 |   return oss.str();
32 | #else
33 |   return std::string("not available");
34 | #endif
35 | }
36 | 
37 | // similar to
38 | // https://github.com/pytorch/pytorch/blob/master/aten/src/ATen/Version.cpp
39 | std::string get_compiler_version() {
40 |   std::ostringstream ss;
41 | #if defined(__GNUC__)
42 | #ifndef __clang__
43 |   { ss << "GCC " << __GNUC__ << "." << __GNUC_MINOR__; }
44 | #endif
45 | #endif
46 | 
47 | #if defined(__clang_major__)
48 |   {
49 |     ss << "clang " << __clang_major__ << "." << __clang_minor__ << "."
50 |        << __clang_patchlevel__;
51 |   }
52 | #endif
53 | 
54 | #if defined(_MSC_VER)
55 |   { ss << "MSVC " << _MSC_FULL_VER; }
56 | #endif
57 |   return ss.str();
58 | }
59 | 
60 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
61 |   m.def("get_compiler_version", &get_compiler_version, "get_compiler_version");
62 |   m.def("get_cuda_version", &get_cuda_version, "get_cuda_version");
63 | 
64 |   m.def("deform_conv_forward", &deform_conv_forward, "deform_conv_forward");
65 |   m.def(
66 |       "deform_conv_backward_input",
67 |       &deform_conv_backward_input,
68 |       "deform_conv_backward_input");
69 |   m.def(
70 |       "deform_conv_backward_filter",
71 |       &deform_conv_backward_filter,
72 |       "deform_conv_backward_filter");
73 |   m.def(
74 |       "modulated_deform_conv_forward",
75 |       &modulated_deform_conv_forward,
76 |       "modulated_deform_conv_forward");
77 |   m.def(
78 |       "modulated_deform_conv_backward",
79 |       &modulated_deform_conv_backward,
80 |       "modulated_deform_conv_backward");
81 | 
82 |   m.def("roi_align_forward", &ROIAlign_forward, "ROIAlign_forward");
83 |   m.def("roi_align_backward", &ROIAlign_backward, "ROIAlign_backward");
84 | }
85 | 
86 | } // namespace dl_lib
87 | 


--------------------------------------------------------------------------------
/dl_lib/layers/wrappers.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
  2 | """
  3 | Wrappers around on some nn functions, mainly to support empty tensors.
  4 | 
  5 | Ideally, add support directly in PyTorch to empty tensors in those functions.
  6 | 
  7 | These can be removed once https://github.com/pytorch/pytorch/issues/12013
  8 | is implemented
  9 | """
 10 | 
 11 | import math
 12 | 
 13 | import torch
 14 | from torch.nn.modules.utils import _ntuple
 15 | 
 16 | 
 17 | def cat(tensors, dim=0):
 18 |     """
 19 |     Efficient version of torch.cat that avoids a copy if there is only a single element in a list
 20 |     """
 21 |     assert isinstance(tensors, (list, tuple))
 22 |     if len(tensors) == 1:
 23 |         return tensors[0]
 24 |     return torch.cat(tensors, dim)
 25 | 
 26 | 
 27 | class _NewEmptyTensorOp(torch.autograd.Function):
 28 |     @staticmethod
 29 |     def forward(ctx, x, new_shape):
 30 |         ctx.shape = x.shape
 31 |         return x.new_empty(new_shape)
 32 | 
 33 |     @staticmethod
 34 |     def backward(ctx, grad):
 35 |         shape = ctx.shape
 36 |         return _NewEmptyTensorOp.apply(grad, shape), None
 37 | 
 38 | 
 39 | class Conv2d(torch.nn.Conv2d):
 40 |     """
 41 |     A wrapper around :class:`torch.nn.Conv2d` to support zero-size tensor and more features.
 42 |     """
 43 |     def __init__(self, *args, **kwargs):
 44 |         """
 45 |         Extra keyword arguments supported in addition to those in `torch.nn.Conv2d`:
 46 | 
 47 |         Args:
 48 |             norm (nn.Module, optional): a normalization layer
 49 |             activation (callable(Tensor) -> Tensor): a callable activation function
 50 | 
 51 |         It assumes that norm layer is used before activation.
 52 |         """
 53 |         norm = kwargs.pop("norm", None)
 54 |         activation = kwargs.pop("activation", None)
 55 |         super().__init__(*args, **kwargs)
 56 | 
 57 |         self.norm = norm
 58 |         self.activation = activation
 59 | 
 60 |     def forward(self, x):
 61 |         if x.numel() == 0:
 62 |             # When input is empty, we want to return a empty tensor with "correct" shape,
 63 |             # So that the following operations will not panic
 64 |             # if they check for the shape of the tensor.
 65 |             # This computes the height and width of the output tensor
 66 |             output_shape = [(i + 2 * p - (di * (k - 1) + 1)) // s + 1
 67 |                             for i, p, di, k, s in
 68 |                             zip(x.shape[-2:], self.padding, self.dilation,
 69 |                                 self.kernel_size, self.stride)]
 70 |             output_shape = [x.shape[0], self.weight.shape[0]] + output_shape
 71 |             empty = _NewEmptyTensorOp.apply(x, output_shape)
 72 |             if self.training:
 73 |                 # https://github.com/pytorch/pytorch/issues/12013
 74 |                 assert not isinstance(
 75 |                     self.norm, torch.nn.SyncBatchNorm
 76 |                 ), "SyncBatchNorm does not support empty inputs!"
 77 | 
 78 |                 # This is to make DDP happy.
 79 |                 # DDP expects all workers to have gradient w.r.t the same set of parameters.
 80 |                 _dummy = sum(x.view(-1)[0] for x in self.parameters()) * 0.0
 81 |                 return empty + _dummy
 82 |             else:
 83 |                 return empty
 84 | 
 85 |         x = super().forward(x)
 86 |         if self.norm is not None:
 87 |             x = self.norm(x)
 88 |         if self.activation is not None:
 89 |             x = self.activation(x)
 90 |         return x
 91 | 
 92 | 
 93 | class ConvTranspose2d(torch.nn.ConvTranspose2d):
 94 |     """
 95 |     A wrapper around :class:`torch.nn.ConvTranspose2d` to support zero-size tensor.
 96 |     """
 97 |     def forward(self, x):
 98 |         if x.numel() > 0:
 99 |             return super(ConvTranspose2d, self).forward(x)
100 |         # get output shape
101 | 
102 |         output_shape = [(i - 1) * d - 2 * p + (di * (k - 1) + 1) + op
103 |                         for i, p, di, k, d, op in zip(
104 |                             x.shape[-2:],
105 |                             self.padding,
106 |                             self.dilation,
107 |                             self.kernel_size,
108 |                             self.stride,
109 |                             self.output_padding,
110 |                         )]
111 |         output_shape = [x.shape[0], self.out_channels] + output_shape
112 |         # This is to make DDP happy.
113 |         # DDP expects all workers to have gradient w.r.t the same set of parameters.
114 |         _dummy = sum(x.view(-1)[0] for x in self.parameters()) * 0.0
115 |         return _NewEmptyTensorOp.apply(x, output_shape) + _dummy
116 | 
117 | 
118 | class BatchNorm2d(torch.nn.BatchNorm2d):
119 |     """
120 |     A wrapper around :class:`torch.nn.BatchNorm2d` to support zero-size tensor.
121 |     """
122 |     def forward(self, x):
123 |         if x.numel() > 0:
124 |             return super(BatchNorm2d, self).forward(x)
125 |         # get output shape
126 |         output_shape = x.shape
127 |         return _NewEmptyTensorOp.apply(x, output_shape)
128 | 
129 | 
130 | def interpolate(input,
131 |                 size=None,
132 |                 scale_factor=None,
133 |                 mode="nearest",
134 |                 align_corners=None):
135 |     """
136 |     A wrapper around :func:`torch.nn.functional.interpolate` to support zero-size tensor.
137 |     """
138 |     if input.numel() > 0:
139 |         return torch.nn.functional.interpolate(input,
140 |                                                size,
141 |                                                scale_factor,
142 |                                                mode,
143 |                                                align_corners=align_corners)
144 | 
145 |     def _check_size_scale_factor(dim):
146 |         if size is None and scale_factor is None:
147 |             raise ValueError("either size or scale_factor should be defined")
148 |         if size is not None and scale_factor is not None:
149 |             raise ValueError(
150 |                 "only one of size or scale_factor should be defined")
151 |         if (scale_factor is not None and isinstance(scale_factor, tuple)
152 |                 and len(scale_factor) != dim):
153 |             raise ValueError("scale_factor shape must match input shape. "
154 |                              "Input is {}D, scale_factor size is {}".format(
155 |                                  dim, len(scale_factor)))
156 | 
157 |     def _output_size(dim):
158 |         _check_size_scale_factor(dim)
159 |         if size is not None:
160 |             return size
161 |         scale_factors = _ntuple(dim)(scale_factor)
162 |         # math.floor might return float in py2.7
163 |         return [
164 |             int(math.floor(input.size(i + 2) * scale_factors[i]))
165 |             for i in range(dim)
166 |         ]
167 | 
168 |     output_shape = tuple(_output_size(2))
169 |     output_shape = input.shape[:-2] + output_shape
170 |     return _NewEmptyTensorOp.apply(input, output_shape)
171 | 


--------------------------------------------------------------------------------
/dl_lib/modeling/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | import torch
 3 | 
 4 | from dl_lib.layers import ShapeSpec
 5 | 
 6 | from .backbone import (
 7 |     FPN,
 8 |     Backbone,
 9 |     ResNet,
10 |     ResNetBlockBase,
11 |     build_resnet_backbone,
12 |     make_stage,
13 | )
14 | from .meta_arch import (SemanticSegmentor, DynamicNet4Seg)
15 | from .test_time_augmentation import DatasetMapperTTA, SemanticSegmentorWithTTA
16 | 
17 | _EXCLUDE = {"torch", "ShapeSpec"}
18 | __all__ = [
19 |     k for k in globals().keys() if k not in _EXCLUDE and not k.startswith("_")
20 | ]
21 | 
22 | assert (
23 |     torch.Tensor([1]) == torch.Tensor([2])
24 | ).dtype == torch.bool, "Your Pytorch is too old. Please update to contain https://github.com/pytorch/pytorch/pull/21113"
25 | 


--------------------------------------------------------------------------------
/dl_lib/modeling/backbone/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
2 | 
3 | from .backbone import Backbone
4 | from .fpn import FPN, build_retinanet_resnet_fpn_p5_backbone
5 | from .resnet import ResNet, ResNetBlockBase, build_resnet_backbone, make_stage
6 | 
7 | # TODO can expose more resnet blocks after careful consideration
8 | 


--------------------------------------------------------------------------------
/dl_lib/modeling/backbone/backbone.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | from abc import ABCMeta, abstractmethod
 3 | 
 4 | import torch.nn as nn
 5 | 
 6 | from dl_lib.layers import ShapeSpec
 7 | 
 8 | __all__ = ["Backbone"]
 9 | 
10 | 
11 | class Backbone(nn.Module, metaclass=ABCMeta):
12 |     """
13 |     Abstract base class for network backbones.
14 |     """
15 |     def __init__(self):
16 |         """
17 |         The `__init__` method of any subclass can specify its own set of arguments.
18 |         """
19 |         super().__init__()
20 | 
21 |     @abstractmethod
22 |     def forward(self):
23 |         """
24 |         Subclasses must override this method, but adhere to the same return type.
25 | 
26 |         Returns:
27 |             dict[str->Tensor]: mapping from feature name (e.g., "res2") to tensor
28 |         """
29 |         pass
30 | 
31 |     @property
32 |     def size_divisibility(self):
33 |         """
34 |         Some backbones require the input height and width to be divisible by a
35 |         specific integer. This is typically true for encoder / decoder type networks
36 |         with lateral connection (e.g., FPN) for which feature maps need to match
37 |         dimension in the "bottom up" and "top down" paths. Set to 0 if no specific
38 |         input size divisibility is required.
39 |         """
40 |         return 0
41 | 
42 |     def output_shape(self):
43 |         """
44 |         Returns:
45 |             dict[str->ShapeSpec]
46 |         """
47 |         # this is a backward-compatible default
48 |         return {
49 |             name: ShapeSpec(channels=self._out_feature_channels[name],
50 |                             stride=self._out_feature_strides[name])
51 |             for name in self._out_features
52 |         }
53 | 
54 |     # the properties below are not used any more
55 | 
56 |     @property
57 |     def out_features(self):
58 |         """deprecated"""
59 |         return self._out_features
60 | 
61 |     @property
62 |     def out_feature_strides(self):
63 |         """deprecated"""
64 |         return {f: self._out_feature_strides[f] for f in self._out_features}
65 | 
66 |     @property
67 |     def out_feature_channels(self):
68 |         """deprecated"""
69 |         return {f: self._out_feature_channels[f] for f in self._out_features}
70 | 


--------------------------------------------------------------------------------
/dl_lib/modeling/basenet/__init__.py:
--------------------------------------------------------------------------------
1 | from .basenet import basenet
2 | 


--------------------------------------------------------------------------------
/dl_lib/modeling/basenet/basenet.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | 
  3 | import cv2
  4 | import numpy as np
  5 | import torch
  6 | from PIL import Image
  7 | 
  8 | from dl_lib.configs.base_config import config as cfg
  9 | from dl_lib.data import MetadataCatalog
 10 | from dl_lib.utils.visualizer import Visualizer
 11 | 
 12 | from .show import visualize_feature_maps
 13 | 
 14 | 
 15 | def basenet(cls):
 16 |     def visualize_data(self, per_image, save_to_file=False):
 17 |         """
 18 |         Visualize data from batch_inputs of dataloader.
 19 | 
 20 |         Args:
 21 |             per_image (dict): a dict that contains:
 22 |                 * image: Tensor, image in (C, H, W) format.
 23 |                 * instances: Instances
 24 |                 Other information that's included in the original dicts, such as:
 25 |                 * "height", "width" (int): the output resolution of the model, used in inference.
 26 |                     See :meth:`postprocess` for details.
 27 |             save_to_file: whether save img to disk.
 28 | 
 29 |         Example:
 30 |             >>> self.visualize_data(batch_inputs[0])
 31 |         """
 32 |         metadata = MetadataCatalog.get("coco_2017_train")
 33 | 
 34 |         def output(vis, fname):
 35 |             if not save_to_file:
 36 |                 print(fname)
 37 |                 cv2.imshow("window", vis.get_image()[:, :, ::-1])
 38 |                 cv2.waitKey()
 39 |             else:
 40 |                 filepath = os.path.join("./", fname)
 41 |                 print("Saving to {} ...".format(filepath))
 42 |                 vis.save(filepath)
 43 | 
 44 |         scale = 1.0
 45 |         # Pytorch tensor is in (C, H, W) format
 46 |         img = per_image["image"].permute(1, 2, 0)
 47 |         if cfg.INPUT.FORMAT == "BGR":
 48 |             img = img[:, :, [2, 1, 0]]
 49 |         else:
 50 |             img = np.asarray(
 51 |                 Image.fromarray(img, mode=cfg.INPUT.FORMAT).convert("RGB"))
 52 | 
 53 |         visualizer = Visualizer(img, metadata=metadata, scale=scale)
 54 |         target_fields = per_image["instances"].get_fields()
 55 |         labels = [
 56 |             metadata.thing_classes[i] for i in target_fields["gt_classes"]
 57 |         ]
 58 |         vis = visualizer.overlay_instances(
 59 |             labels=labels,
 60 |             boxes=target_fields.get("gt_boxes", None),
 61 |             masks=target_fields.get("gt_masks", None),
 62 |             keypoints=target_fields.get("gt_keypoints", None),
 63 |         )
 64 |         output(vis, str(per_image["image_id"]) + ".jpg")
 65 | 
 66 |     cls.visualize_data = visualize_data
 67 | 
 68 |     def visualize_feature_map(self,
 69 |                               feature_map,
 70 |                               per_image=None,
 71 |                               stride=8,
 72 |                               save_name=0,
 73 |                               with_img=True,
 74 |                               channelwise=False):
 75 |         """
 76 |         Visualize feature map with (optional) gt boxes
 77 | 
 78 |         Args:
 79 |             feature_map (torch.Tensor): C x H x W
 80 |             per_image (dict): batch_inputs[i]
 81 |             stride (int): down sample ratio of current feature_map
 82 |             save_name (int or str): feature map figure name
 83 |             with_img (bool): weather visualize corresponding image data
 84 |             channelwise (bool): visualize feature map mean or all channels
 85 | 
 86 |         Examples::
 87 |             >>> level = 1
 88 |             >>> self.visualize_feature_map(features[level][0],
 89 |             >>>                        per_image=batched_inputs[level],
 90 |             >>>                        stride=self.fpn_strides[level],
 91 |             >>>                        save_name=1,
 92 |             >>>                        with_img=False,
 93 |             >>>                        channelwise=False)
 94 |         """
 95 |         if with_img and save_name == 0:
 96 |             self.visualize_data(per_image)
 97 | 
 98 |         with torch.no_grad():
 99 |             if "instances" in per_image:
100 |                 instance = per_image["instances"]
101 |                 gts = instance.gt_boxes.tensor.cpu().numpy()
102 |                 l = gts[:, 0:1]
103 |                 t = gts[:, 1:2]
104 |                 r = gts[:, 2:3]
105 |                 b = gts[:, 3:4]
106 |                 boxes = (np.concatenate([l, t, l, b, r, b, r, t],
107 |                                         axis=1).reshape(-1, 4,
108 |                                                         2).transpose(0, 2, 1))
109 |             else:
110 |                 boxes = []
111 |             if not channelwise:
112 |                 fm = feature_map.permute(1, 2, 0).mean(dim=-1, keepdim=True)
113 |             else:
114 |                 fm = feature_map.permute(1, 2, 0)
115 |             # visualize_feature_maps(fm.sigmoid().cpu().numpy(),
116 |             visualize_feature_maps(
117 |                 fm.cpu().numpy(),
118 |                 boxes=boxes,
119 |                 stride=stride,
120 |                 save_filename=f"feature_map_{save_name}.png",
121 |             )
122 | 
123 |     cls.visualize_feature_map = visualize_feature_map
124 | 
125 |     return cls
126 | 


--------------------------------------------------------------------------------
/dl_lib/modeling/basenet/show.py:
--------------------------------------------------------------------------------
 1 | import copy
 2 | 
 3 | import numpy as np
 4 | import pylab as plt
 5 | 
 6 | 
 7 | def draw_box(ax, vertices, color='black'):
 8 |     """
 9 |     Draw box with color.
10 | 
11 |     Args:
12 |         ax (list): axes to draw box along
13 |         vertices (ndarray): indices of shape (N x 2)
14 |         color (str): plotted color
15 |     """
16 |     connections = [
17 |         [0, 1],
18 |         [1, 2],
19 |         [2, 3],
20 |         [3, 0],
21 |     ]
22 |     for connection in connections:
23 |         ax.plot(*vertices[:, connection], c=color, lw=5)
24 | 
25 | 
26 | def visualize_feature_maps(fm,
27 |                            boxes=[],
28 |                            keypoints=[],
29 |                            stride=1,
30 |                            save_filename=None):
31 |     """
32 |     Visualize feature map with boxes or key points.
33 | 
34 |     Args:
35 |         fm (torch.Tensor): feature map of shape H x W x c, c is channel
36 |         boxes (ndarray): boxes to be visualized.
37 |         keypoints (ndarray): key points to be visualized
38 |         stride (int): used to normalize boxes or keypoints
39 |         save_filename (bool): whether save to disk
40 |     """
41 |     nc = np.ceil(np.sqrt(fm.shape[2]))  # column
42 |     nr = np.ceil(fm.shape[2] / nc)  # row
43 |     nc = int(nc)
44 |     nr = int(nr)
45 |     plt.figure(figsize=(64, 64))
46 |     for i in range(fm.shape[2]):
47 |         ax = plt.subplot(nr, nc, i + 1)
48 |         ax.imshow(fm[:, :, i], cmap='jet')
49 | 
50 |         for obj in boxes:
51 |             box = copy.deepcopy(obj) / stride
52 |             draw_box(ax, box, color='g')
53 | 
54 |         for pts_score in keypoints:
55 |             pts = pts_score[:8]
56 |             pts = pts / stride
57 |             for i in range(4):
58 |                 ax.plot(pts[2 * i + 1], pts[2 * i + 0], 'r*')
59 |             ax.plot([pts[1], pts[3]], [pts[0], pts[2]], c='y', lw=5)
60 |             ax.plot([pts[3], pts[5]], [pts[2], pts[4]], c='g', lw=5)
61 |             ax.plot([pts[5], pts[7]], [pts[4], pts[6]], c='b', lw=5)
62 |             ax.plot([pts[7], pts[1]], [pts[6], pts[0]], c='r', lw=5)
63 | 
64 |         # plt.colorbar()
65 |         ax.axis('off')
66 |     if save_filename:
67 |         plt.savefig(save_filename)
68 |     else:
69 |         plt.show()
70 |     plt.close()
71 | 


--------------------------------------------------------------------------------
/dl_lib/modeling/dynamic_arch/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # build for dynamic networks
3 | # @Author: yanwei.li
4 | 
5 | from .dynamic_backbone import DynamicNetwork, build_dynamic_backbone


--------------------------------------------------------------------------------
/dl_lib/modeling/dynamic_arch/cal_op_flops.py:
--------------------------------------------------------------------------------
 1 | # Count Operation MFLOPs when fix batch to 1
 2 | # @author: yanwei.li
 3 | 
 4 | 
 5 | def count_Conv_flop(
 6 |     in_h, in_w, in_channel, out_channel,
 7 |     kernel_size, is_bias=False, stride=1, groups=1
 8 | ):
 9 |     out_h = in_h // stride
10 |     out_w = in_w // stride
11 |     bias_ops = 1 if is_bias else 0
12 |     kernel_ops = kernel_size[0] * kernel_size[1] * (in_channel // groups)
13 |     delta_ops = (kernel_ops + bias_ops) * out_channel * out_h * out_w
14 |     return delta_ops / 1e6
15 | 
16 | 
17 | def count_Linear_flop(in_num, out_num, is_bias):
18 |     weight_ops = in_num * out_num
19 |     bias_ops = out_num if is_bias else 0
20 |     delta_ops = weight_ops + bias_ops
21 |     return delta_ops / 1e6
22 | 
23 | 
24 | def count_BN_flop(in_h, in_w, in_channel, is_affine):
25 |     multi_affine = 2 if is_affine else 1
26 |     delta_ops = multi_affine * in_h * in_w * in_channel
27 |     return delta_ops / 1e6
28 | 
29 | 
30 | def count_ReLU_flop(in_h, in_w, in_channel):
31 |     delta_ops = in_h * in_w * in_channel
32 |     return delta_ops / 1e6
33 | 
34 | 
35 | def count_Pool2d_flop(in_h, in_w, out_channel, kernel_size, stride):
36 |     out_h = in_h // stride
37 |     out_w = in_w // stride
38 |     kernel_ops = kernel_size[0] * kernel_size[1]
39 |     delta_ops = kernel_ops * out_w * out_h * out_channel
40 |     return delta_ops / 1e6
41 | 
42 | 
43 | def count_ConvBNReLU_flop(
44 |     in_h, in_w, in_channel, out_channel,
45 |     kernel_size, is_bias=False, stride=1,
46 |     groups=1, is_affine=True
47 | ):
48 |     flops = 0.0
49 |     flops += count_Conv_flop(
50 |         in_h, in_w, in_channel, out_channel,
51 |         kernel_size, is_bias, stride, groups
52 |     )
53 |     in_h = in_h // stride
54 |     in_w = in_w // stride
55 |     flops += count_BN_flop(in_h, in_w, out_channel, is_affine)
56 |     flops += count_ReLU_flop(in_h, in_w, out_channel)
57 |     return flops
58 | 


--------------------------------------------------------------------------------
/dl_lib/modeling/meta_arch/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
3 | 
4 | # import all the meta_arch, so they will be registered
5 | from .semantic_seg import SemanticSegmentor
6 | from .dynamic4seg import DynamicNet4Seg


--------------------------------------------------------------------------------
/dl_lib/modeling/meta_arch/semantic_seg.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
  2 | import numpy as np
  3 | from typing import Dict
  4 | from dl_lib.modeling.nn_utils import weight_init
  5 | import torch
  6 | from torch import nn
  7 | from torch.nn import functional as F
  8 | 
  9 | from dl_lib.layers import Conv2d, ShapeSpec
 10 | from dl_lib.structures import ImageList
 11 | from ..postprocessing import sem_seg_postprocess
 12 | 
 13 | __all__ = ["SemanticSegmentor", "SemSegFPNHead"]
 14 | 
 15 | 
 16 | def build_backbone(cfg):
 17 |     pass
 18 | 
 19 | 
 20 | def build_sem_seg_head(cfg, ShapeSpec):
 21 |     pass
 22 | 
 23 | 
 24 | """
 25 | Registry for semantic segmentation heads, which make semantic segmentation predictions
 26 | from feature maps.
 27 | """
 28 | 
 29 | 
 30 | class SemanticSegmentor(nn.Module):
 31 |     """
 32 |     Main class for semantic segmentation architectures.
 33 |     """
 34 |     def __init__(self, cfg):
 35 |         super().__init__()
 36 | 
 37 |         self.device = torch.device(cfg.MODEL.DEVICE)
 38 | 
 39 |         self.backbone = build_backbone(cfg)
 40 |         self.sem_seg_head = build_sem_seg_head(
 41 |             cfg, self.backbone.output_shape()
 42 |         )
 43 | 
 44 |         pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(
 45 |             self.device).view(-1, 1, 1)
 46 |         pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(
 47 |             self.device).view(-1, 1, 1)
 48 |         self.normalizer = lambda x: (x - pixel_mean) / pixel_std
 49 | 
 50 |         self.to(self.device)
 51 | 
 52 |     def forward(self, batched_inputs):
 53 |         """
 54 |         Args:
 55 |             batched_inputs: a list, batched outputs of :class:`DatasetMapper` .
 56 |             Each item in the list contains the inputs for one image.
 57 |         For now, each item in the list is a dict that contains:
 58 |             image: Tensor, image in (C, H, W) format.
 59 |             sem_seg: semantic segmentation ground truth
 60 |         Other information that's included in the original dicts, such as:
 61 |             "height", "width" (int): the output resolution of the model, used in inference.
 62 |             See :meth:`postprocess` for details.
 63 |         Returns:
 64 |             list[dict]: Each dict is the output for one input image.
 65 |                 The dict contains one key "sem_seg" whose value is a
 66 |                 Tensor of the output resolution that represents the
 67 |                 per-pixel segmentation prediction.
 68 |         """
 69 |         images = [x["image"].to(self.device) for x in batched_inputs]
 70 |         images = [self.normalizer(x) for x in images]
 71 |         images = ImageList.from_tensors(
 72 |             images, self.backbone.size_divisibility
 73 |         )
 74 | 
 75 |         features = self.backbone(images.tensor)
 76 | 
 77 |         if "sem_seg" in batched_inputs[0]:
 78 |             targets = [x["sem_seg"].to(self.device) for x in batched_inputs]
 79 |             targets = ImageList.from_tensors(
 80 |                 targets, self.backbone.size_divisibility,
 81 |                 self.sem_seg_head.ignore_value).tensor
 82 |         else:
 83 |             targets = None
 84 |         results, losses = self.sem_seg_head(features, targets)
 85 | 
 86 |         if self.training:
 87 |             return losses
 88 | 
 89 |         processed_results = []
 90 |         for result, input_per_image, image_size in zip(
 91 |             results, batched_inputs, images.image_sizes
 92 |         ):
 93 |             height = input_per_image.get("height")
 94 |             width = input_per_image.get("width")
 95 |             r = sem_seg_postprocess(result, image_size, height, width)
 96 |             processed_results.append({"sem_seg": r})
 97 |         return processed_results
 98 | 
 99 | 
100 | class SemSegFPNHead(nn.Module):
101 |     """
102 |     A semantic segmentation head described in detail in the Panoptic Feature Pyramid Networks paper
103 |     (https://arxiv.org/abs/1901.02446). It takes FPN features as input and merges information from
104 |     all levels of the FPN into single output.
105 |     """
106 |     def __init__(self, cfg, input_shape: Dict[str, ShapeSpec]):
107 |         super().__init__()
108 | 
109 |         # fmt: off
110 |         self.in_features = cfg.MODEL.SEM_SEG_HEAD.IN_FEATURES
111 |         feature_strides = {k: v.stride for k, v in input_shape.items()}
112 |         feature_channels = {k: v.channels for k, v in input_shape.items()}
113 |         self.ignore_value = cfg.MODEL.SEM_SEG_HEAD.IGNORE_VALUE
114 |         num_classes = cfg.MODEL.SEM_SEG_HEAD.NUM_CLASSES
115 |         conv_dims = cfg.MODEL.SEM_SEG_HEAD.CONVS_DIM
116 |         self.common_stride = cfg.MODEL.SEM_SEG_HEAD.COMMON_STRIDE
117 |         norm = cfg.MODEL.SEM_SEG_HEAD.NORM
118 |         self.loss_weight = cfg.MODEL.SEM_SEG_HEAD.LOSS_WEIGHT
119 |         # fmt: on
120 | 
121 |         self.scale_heads = []
122 |         for in_feature in self.in_features:
123 |             head_ops = []
124 |             head_length = max(
125 |                 1, int(np.log2(feature_strides[in_feature]) - np.log2(self.common_stride))
126 |             )
127 |             for k in range(head_length):
128 |                 norm_module = nn.GroupNorm(32, conv_dims) if norm == "GN" else None
129 |                 conv = Conv2d(
130 |                     feature_channels[in_feature] if k == 0 else conv_dims,
131 |                     conv_dims, kernel_size=3, stride=1, padding=1,
132 |                     bias=not norm, norm=norm_module, activation=F.relu,
133 |                 )
134 |                 weight_init.c2_msra_fill(conv)
135 |                 head_ops.append(conv)
136 |                 if feature_strides[in_feature] != self.common_stride:
137 |                     head_ops.append(
138 |                         nn.Upsample(scale_factor=2, mode="bilinear", align_corners=False)
139 |                     )
140 |             self.scale_heads.append(nn.Sequential(*head_ops))
141 |             self.add_module(in_feature, self.scale_heads[-1])
142 |         self.predictor = Conv2d(conv_dims, num_classes, kernel_size=1, stride=1, padding=0)
143 |         weight_init.c2_msra_fill(self.predictor)
144 | 
145 |     def forward(self, features, targets=None):
146 |         for i, f in enumerate(self.in_features):
147 |             if i == 0:
148 |                 x = self.scale_heads[i](features[f])
149 |             else:
150 |                 x = x + self.scale_heads[i](features[f])
151 |         x = self.predictor(x)
152 |         x = F.interpolate(
153 |             x, scale_factor=self.common_stride, mode="bilinear", align_corners=False
154 |         )
155 | 
156 |         if self.training:
157 |             losses = {}
158 |             losses["loss_sem_seg"] = (
159 |                 F.cross_entropy(
160 |                     x, targets, reduction="mean",
161 |                     ignore_index=self.ignore_value) * self.loss_weight
162 |             )
163 |             return [], losses
164 |         else:
165 |             return x, {}
166 | 


--------------------------------------------------------------------------------
/dl_lib/modeling/nn_utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Megvii-BaseDetection/DynamicRouting/2ad0a95139b1bf21878dd222854f98974ac4930a/dl_lib/modeling/nn_utils/__init__.py


--------------------------------------------------------------------------------
/dl_lib/modeling/nn_utils/flop_count.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | 
 3 | import logging
 4 | import typing
 5 | from collections import defaultdict
 6 | 
 7 | import torch.nn as nn
 8 | 
 9 | from .jit_handles import (addmm_flop_jit, conv_flop_jit, einsum_flop_jit,
10 |                           get_jit_model_analysis, matmul_flop_jit)
11 | 
12 | # A dictionary that maps supported operations to their flop count jit handles.
13 | _SUPPORTED_OPS: typing.Dict[str, typing.Callable] = {
14 |     "aten::addmm": addmm_flop_jit,
15 |     "aten::_convolution": conv_flop_jit,
16 |     "aten::einsum": einsum_flop_jit,
17 |     "aten::matmul": matmul_flop_jit,
18 | }
19 | 
20 | 
21 | def flop_count(
22 |     model: nn.Module,
23 |     inputs: typing.Tuple[object, ...],
24 |     supported_ops: typing.Union[typing.Dict[str, typing.Callable],
25 |                                 None] = None,
26 | ) -> typing.Tuple[typing.DefaultDict[str, float], typing.Counter[str]]:
27 |     """
28 |     Given a model and an input to the model, compute the Gflops of the given
29 |     model. Note the input should have a batch size of 1.
30 |     Args:
31 |         model (nn.Module): The model to compute flop counts.
32 |         inputs (tuple): Inputs that are passed to `model` to count flops.
33 |             Inputs need to be in a tuple.
34 |         supported_ops (dict(str,Callable) or None) : By default, we count flops
35 |             for convolution layers, fully connected layers, torch.matmul and
36 |             torch.einsum operations. We define a FLOP as a single atomic
37 |             Multiply-Add. Users can provide customized supported_ops for
38 |             counting flops if desired.
39 |     Returns:
40 |         tuple[defaultdict, Counter]: A dictionary that records the number of
41 |             gflops for each operation and a Counter that records the number of
42 |             skipped operations.
43 |     """
44 |     assert isinstance(inputs, tuple), "Inputs need to be in a tuple."
45 |     if not supported_ops:
46 |         supported_ops = _SUPPORTED_OPS.copy()
47 | 
48 |     # Run flop count.
49 |     total_flop_counter, skipped_ops = get_jit_model_analysis(
50 |         model, inputs, supported_ops)
51 | 
52 |     # Log for skipped operations.
53 |     if len(skipped_ops) > 0:
54 |         for op, freq in skipped_ops.items():
55 |             logging.warning("Skipped operation {} {} time(s)".format(op, freq))
56 | 
57 |     # Convert flop count to gigaflops.
58 |     final_count = defaultdict(float)
59 |     for op in total_flop_counter:
60 |         final_count[op] = total_flop_counter[op] / 1e9
61 | 
62 |     return final_count, skipped_ops
63 | 


--------------------------------------------------------------------------------
/dl_lib/modeling/nn_utils/precise_bn.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 3 | 
 4 | import itertools
 5 | 
 6 | import torch
 7 | 
 8 | BN_MODULE_TYPES = (
 9 |     torch.nn.BatchNorm1d,
10 |     torch.nn.BatchNorm2d,
11 |     torch.nn.BatchNorm3d,
12 |     torch.nn.SyncBatchNorm,
13 | )
14 | 
15 | 
16 | @torch.no_grad()
17 | def update_bn_stats(model, data_loader, num_iters: int = 200):
18 |     """
19 |     Recompute and update the batch norm stats to make them more precise. During
20 |     training both BN stats and the weight are changing after every iteration, so
21 |     the running average can not precisely reflect the actual stats of the
22 |     current model.
23 |     In this function, the BN stats are recomputed with fixed weights, to make
24 |     the running average more precise. Specifically, it computes the true average
25 |     of per-batch mean/variance instead of the running average.
26 | 
27 |     Args:
28 |         model (nn.Module): the model whose bn stats will be recomputed.
29 | 
30 |             Note that:
31 | 
32 |             1. This function will not alter the training mode of the given model.
33 |                Users are responsible for setting the layers that needs
34 |                precise-BN to training mode, prior to calling this function.
35 | 
36 |             2. Be careful if your models contain other stateful layers in
37 |                addition to BN, i.e. layers whose state can change in forward
38 |                iterations.  This function will alter their state. If you wish
39 |                them unchanged, you need to either pass in a submodule without
40 |                those layers, or backup the states.
41 |         data_loader (iterator): an iterator. Produce data as inputs to the model.
42 |         num_iters (int): number of iterations to compute the stats.
43 |     """
44 |     bn_layers = get_bn_modules(model)
45 | 
46 |     if len(bn_layers) == 0:
47 |         return
48 | 
49 |     # In order to make the running stats only reflect the current batch, the
50 |     # momentum is disabled.
51 |     # bn.running_mean = (1 - momentum) * bn.running_mean + momentum * batch_mean
52 |     # Setting the momentum to 1.0 to compute the stats without momentum.
53 |     momentum_actual = [bn.momentum for bn in bn_layers]
54 |     for bn in bn_layers:
55 |         bn.momentum = 1.0
56 | 
57 |     # Note that running_var actually means "running average of variance"
58 |     running_mean = [torch.zeros_like(bn.running_mean) for bn in bn_layers]
59 |     running_var = [torch.zeros_like(bn.running_var) for bn in bn_layers]
60 | 
61 |     for ind, inputs in enumerate(itertools.islice(data_loader, num_iters)):
62 |         model(inputs)
63 | 
64 |         for i, bn in enumerate(bn_layers):
65 |             # Accumulates the bn stats.
66 |             running_mean[i] += (bn.running_mean - running_mean[i]) / (ind + 1)
67 |             running_var[i] += (bn.running_var - running_var[i]) / (ind + 1)
68 |             # We compute the "average of variance" across iterations.
69 |     assert ind == num_iters - 1, (
70 |         "update_bn_stats is meant to run for {} iterations, "
71 |         "but the dataloader stops at {} iterations.".format(num_iters, ind))
72 | 
73 |     for i, bn in enumerate(bn_layers):
74 |         # Sets the precise bn stats.
75 |         bn.running_mean = running_mean[i]
76 |         bn.running_var = running_var[i]
77 |         bn.momentum = momentum_actual[i]
78 | 
79 | 
80 | def get_bn_modules(model):
81 |     """
82 |     Find all BatchNorm (BN) modules that are in training mode. See
83 |     fvcore.precise_bn.BN_MODULE_TYPES for a list of all modules that are
84 |     included in this search.
85 | 
86 |     Args:
87 |         model (nn.Module): a model possibly containing BN modules.
88 | 
89 |     Returns:
90 |         list[nn.Module]: all BN modules in the model.
91 |     """
92 |     # Finds all the bn layers.
93 |     bn_layers = [
94 |         m for m in model.modules()
95 |         if m.training and isinstance(m, BN_MODULE_TYPES)
96 |     ]
97 |     return bn_layers
98 | 


--------------------------------------------------------------------------------
/dl_lib/modeling/nn_utils/scale_grad.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python3
 2 | # -*- coding:utf-8 -*-
 3 | from torch.autograd.function import Function
 4 | 
 5 | 
 6 | class _ScaleGradient(Function):
 7 |     @staticmethod
 8 |     def forward(ctx, input, scale):
 9 |         ctx.scale = scale
10 |         return input
11 | 
12 |     @staticmethod
13 |     def backward(ctx, grad_output):
14 |         return grad_output * ctx.scale, None
15 | 


--------------------------------------------------------------------------------
/dl_lib/modeling/nn_utils/weight_init.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
  3 | 
  4 | import torch.nn as nn
  5 | 
  6 | from dl_lib.layers.batch_norm import BatchNorm2d, NaiveSyncBatchNorm
  7 | 
  8 | 
  9 | def constant_init(module, val, bias=0):
 10 |     nn.init.constant_(module.weight, val)
 11 |     if hasattr(module, 'bias') and module.bias is not None:
 12 |         nn.init.constant_(module.bias, bias)
 13 | 
 14 | 
 15 | def xavier_init(module, gain=1, bias=0, distribution='normal'):
 16 |     assert distribution in ['uniform', 'normal']
 17 |     if distribution == 'uniform':
 18 |         nn.init.xavier_uniform_(module.weight, gain=gain)
 19 |     else:
 20 |         nn.init.xavier_normal_(module.weight, gain=gain)
 21 |     if hasattr(module, 'bias') and module.bias is not None:
 22 |         nn.init.constant_(module.bias, bias)
 23 | 
 24 | 
 25 | def normal_init(module, mean=0, std=1, bias=0):
 26 |     nn.init.normal_(module.weight, mean, std)
 27 |     if hasattr(module, 'bias') and module.bias is not None:
 28 |         nn.init.constant_(module.bias, bias)
 29 | 
 30 | 
 31 | def uniform_init(module, a=0, b=1, bias=0):
 32 |     nn.init.uniform_(module.weight, a, b)
 33 |     if hasattr(module, 'bias') and module.bias is not None:
 34 |         nn.init.constant_(module.bias, bias)
 35 | 
 36 | 
 37 | def kaiming_init(module,
 38 |                  a=0,
 39 |                  mode='fan_out',
 40 |                  nonlinearity='relu',
 41 |                  bias=0,
 42 |                  distribution='normal'):
 43 |     assert distribution in ['uniform', 'normal']
 44 |     if distribution == 'uniform':
 45 |         nn.init.kaiming_uniform_(module.weight,
 46 |                                  a=a,
 47 |                                  mode=mode,
 48 |                                  nonlinearity=nonlinearity)
 49 |     else:
 50 |         nn.init.kaiming_normal_(module.weight,
 51 |                                 a=a,
 52 |                                 mode=mode,
 53 |                                 nonlinearity=nonlinearity)
 54 |     if hasattr(module, 'bias') and module.bias is not None:
 55 |         nn.init.constant_(module.bias, bias)
 56 | 
 57 | 
 58 | def caffe2_xavier_init(module, bias=0):
 59 |     # `XavierFill` in Caffe2 corresponds to `kaiming_uniform_` in PyTorch
 60 |     # Acknowledgment to FAIR's internal code
 61 |     kaiming_init(module,
 62 |                  a=1,
 63 |                  mode='fan_in',
 64 |                  nonlinearity='leaky_relu',
 65 |                  distribution='uniform')
 66 | 
 67 | 
 68 | def c2_xavier_fill(module: nn.Module):
 69 |     """
 70 |     Initialize `module.weight` using the "XavierFill" implemented in Caffe2.
 71 |     Also initializes `module.bias` to 0.
 72 | 
 73 |     Args:
 74 |         module (torch.nn.Module): module to initialize.
 75 |     """
 76 |     # Caffe2 implementation of XavierFill in fact
 77 |     # corresponds to kaiming_uniform_ in PyTorch
 78 |     nn.init.kaiming_uniform_(module.weight, a=1)
 79 |     if module.bias is not None:
 80 |         nn.init.constant_(module.bias, 0)
 81 | 
 82 | 
 83 | def c2_msra_fill(module: nn.Module):
 84 |     """
 85 |     Initialize `module.weight` using the "MSRAFill" implemented in Caffe2.
 86 |     Also initializes `module.bias` to 0.
 87 | 
 88 |     Args:
 89 |         module (torch.nn.Module): module to initialize.
 90 |     """
 91 |     nn.init.kaiming_normal_(module.weight, mode="fan_out", nonlinearity="relu")
 92 |     if module.bias is not None:
 93 |         nn.init.constant_(module.bias, 0)
 94 | 
 95 | 
 96 | def kaiming_init_module(module,
 97 |                         a=0,
 98 |                         mode='fan_out',
 99 |                         nonlinearity='relu',
100 |                         bias=0,
101 |                         distribution='normal'):
102 |     assert distribution in ['uniform', 'normal']
103 | 
104 |     for name, m in module.named_modules():
105 |         if isinstance(m, nn.Conv2d):
106 |             if distribution == 'uniform':
107 |                 nn.init.kaiming_uniform_(m.weight,
108 |                                          a=a,
109 |                                          mode=mode,
110 |                                          nonlinearity=nonlinearity)
111 |             else:
112 |                 nn.init.kaiming_normal_(m.weight,
113 |                                         a=a,
114 |                                         mode=mode,
115 |                                         nonlinearity=nonlinearity)
116 |             if hasattr(m, 'bias') and m.bias is not None:
117 |                 nn.init.constant_(m.bias, bias)
118 |         elif isinstance(m,
119 |                         (BatchNorm2d, nn.SyncBatchNorm, NaiveSyncBatchNorm)):
120 |             if m.weight is not None:
121 |                 nn.init.constant_(m.weight, 1)
122 |             if m.bias is not None:
123 |                 nn.init.constant_(m.bias, 0)
124 | 


--------------------------------------------------------------------------------
/dl_lib/modeling/postprocessing.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | from torch.nn import functional as F
 3 | 
 4 | 
 5 | def sem_seg_postprocess(result, img_size, output_height, output_width):
 6 |     """
 7 |     Return semantic segmentation predictions in the original resolution.
 8 | 
 9 |     The input images are often resized when entering semantic segmentor. Moreover, in same
10 |     cases, they also padded inside segmentor to be divisible by maximum network stride.
11 |     As a result, we often need the predictions of the segmentor in a different
12 |     resolution from its inputs.
13 | 
14 |     Args:
15 |         result (Tensor): semantic segmentation prediction logits. A tensor of shape (C, H, W),
16 |             where C is the number of classes, and H, W are the height and width of the prediction.
17 |         img_size (tuple): image size that segmentor is taking as input.
18 |         output_height, output_width: the desired output resolution.
19 | 
20 |     Returns:
21 |         semantic segmentation prediction (Tensor): A tensor of the shape
22 |             (C, output_height, output_width) that contains per-pixel soft predictions.
23 |     """
24 |     result = result[:, :img_size[0], :img_size[1]].expand(1, -1, -1, -1)
25 |     result = F.interpolate(result,
26 |                            size=(output_height, output_width),
27 |                            mode="bilinear",
28 |                            align_corners=False)[0]
29 |     return result
30 | 


--------------------------------------------------------------------------------
/dl_lib/modeling/test_time_augmentation.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
  2 | import copy
  3 | from itertools import count
  4 | 
  5 | import numpy as np
  6 | import torch
  7 | from torch import nn
  8 | 
  9 | from dl_lib.data.detection_utils import read_image
 10 | from dl_lib.data.transforms import ResizeShortestEdge
 11 | 
 12 | __all__ = ["DatasetMapperTTA", "SemanticSegmentorWithTTA"]
 13 | 
 14 | 
 15 | class DatasetMapperTTA:
 16 |     """
 17 |     Implement test-time augmentation for detection data.
 18 |     It is a callable which takes a dataset dict from a detection dataset,
 19 |     and returns a list of dataset dicts where the images
 20 |     are augmented from the input image by the transformations defined in the config.
 21 |     This is used for test-time augmentation.
 22 |     """
 23 |     def __init__(self, cfg):
 24 |         self.min_sizes = cfg.TEST.AUG.MIN_SIZES
 25 |         self.max_size = cfg.TEST.AUG.MAX_SIZE
 26 |         self.flip = cfg.TEST.AUG.FLIP
 27 |         self.image_format = cfg.INPUT.FORMAT
 28 | 
 29 |     def __call__(self, dataset_dict):
 30 |         """
 31 |         Args:
 32 |             dict: a detection dataset dict
 33 | 
 34 |         Returns:
 35 |             list[dict]:
 36 |                 a list of dataset dicts, which contain augmented version of the input image.
 37 |                 The total number of dicts is ``len(min_sizes) * (2 if flip else 1)``.
 38 |         """
 39 |         ret = []
 40 |         if "image" not in dataset_dict:
 41 |             numpy_image = read_image(dataset_dict["file_name"],
 42 |                                      self.image_format)
 43 |         else:
 44 |             numpy_image = dataset_dict["image"].permute(
 45 |                 1, 2, 0).numpy().astype("uint8")
 46 |         for min_size in self.min_sizes:
 47 |             image = np.copy(numpy_image)
 48 |             tfm = ResizeShortestEdge(min_size,
 49 |                                      self.max_size).get_transform(image)
 50 |             resized = tfm.apply_image(image)
 51 |             resized = torch.as_tensor(
 52 |                 resized.transpose(2, 0, 1).astype("float32"))
 53 | 
 54 |             dic = copy.deepcopy(dataset_dict)
 55 |             dic["horiz_flip"] = False
 56 |             dic["image"] = resized
 57 |             ret.append(dic)
 58 | 
 59 |             if self.flip:
 60 |                 dic = copy.deepcopy(dataset_dict)
 61 |                 dic["horiz_flip"] = True
 62 |                 dic["image"] = torch.flip(resized, dims=[2])
 63 |                 ret.append(dic)
 64 |         return ret
 65 | 
 66 | 
 67 | class SemanticSegmentorWithTTA(nn.Module):
 68 |     """
 69 |     A SementicSegmentor with test-time augmentation enabled.
 70 |     Its :meth:`__call__` method has the same interface as :meth:`SemanticSegmentor.forward`.
 71 |     """
 72 |     def __init__(self, cfg, model, tta_mapper=None, batch_size=1):
 73 |         """
 74 |         Args:
 75 |             cfg (CfgNode):
 76 |             model (SementicSegmentor): a SementicSegmentor to apply TTA on.
 77 |             tta_mapper (callable): takes a dataset dict and returns a list of
 78 |                 augmented versions of the dataset dict. Defaults to
 79 |                 `DatasetMapperTTA(cfg)`.
 80 |             batch_size (int): batch the augmented images into this batch size for inference.
 81 |         """
 82 |         super().__init__()
 83 |         self.cfg = copy.deepcopy(cfg)
 84 |         self.model = model
 85 | 
 86 |         if tta_mapper is None:
 87 |             tta_mapper = DatasetMapperTTA(cfg)
 88 |         self.tta_mapper = tta_mapper
 89 |         self.batch_size = batch_size
 90 | 
 91 |     def _batch_inference(self, batched_inputs):
 92 |         """
 93 |         Execute inference on a list of inputs,
 94 |         using batch size = self.batch_size, instead of the length of the list.
 95 | 
 96 |         Inputs & outputs have the same format as :meth:`SemanticSegmentor.inference`
 97 |         """
 98 | 
 99 |         outputs = []
100 |         inputs = []
101 |         for idx, input in zip(count(), batched_inputs):
102 |             inputs.append(input)
103 |             if len(inputs
104 |                    ) == self.batch_size or idx == len(batched_inputs) - 1:
105 |                 outputs.extend(self.model.forward(inputs, ))
106 |                 inputs = []
107 |         return outputs
108 | 
109 |     def __call__(self, batched_inputs):
110 |         """
111 |         Same input/output format as :meth:`SemanticSegmentor.forward`
112 |         """
113 |         return [self._inference_one_image(x) for x in batched_inputs]
114 | 
115 |     def _hflip_sem_seg(self, x):
116 |         y = x.flip(dims=[2])
117 |         return y
118 | 
119 |     def _inference_one_image(self, input):
120 |         """
121 |         Args:
122 |             input (dict): one dataset dict
123 | 
124 |         Returns:
125 |             dict: one output dict
126 |         """
127 |         augmented_inputs = self.tta_mapper(input)
128 | 
129 |         do_hflip = [k.pop("horiz_flip", False) for k in augmented_inputs]
130 |         heights = [k["height"] for k in augmented_inputs]
131 |         widths = [k["width"] for k in augmented_inputs]
132 |         assert (
133 |             len(set(heights)) == 1 and len(set(widths)) == 1
134 |         ), "Augmented version of the inputs should have the same original resolution!"
135 | 
136 |         # 1. Segment from all augmented versions
137 |         # 1.1: forward with all augmented images
138 |         outputs = self._batch_inference(augmented_inputs)
139 |         # 1.2: union the results
140 |         for idx, output in enumerate(outputs):
141 |             if do_hflip[idx]:
142 |                 output["sem_seg"] = self._hflip_sem_seg(output["sem_seg"])
143 |         all_pred_masks = torch.stack([o["sem_seg"] for o in outputs], dim=0)
144 |         avg_pred_masks = torch.mean(all_pred_masks, dim=0)
145 |         output = outputs[0]
146 |         output["sem_seg"] = avg_pred_masks
147 |         return output
148 | 


--------------------------------------------------------------------------------
/dl_lib/solver/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
2 | from .build import build_lr_scheduler, build_optimizer
3 | from .lr_scheduler import WarmupCosineLR, WarmupMultiStepLR
4 | 
5 | __all__ = [k for k in globals().keys() if not k.startswith("_")]
6 | 


--------------------------------------------------------------------------------
/dl_lib/solver/build.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
  2 | from typing import Any, Dict, List
  3 | 
  4 | import torch
  5 | from torch.optim.lr_scheduler import LambdaLR, OneCycleLR
  6 | 
  7 | from .lr_scheduler import PolyLR, WarmupCosineLR, WarmupMultiStepLR
  8 | 
  9 | 
 10 | def build_optimizer(cfg, model: torch.nn.Module) -> torch.optim.Optimizer:
 11 |     """
 12 |     Build an optimizer from config.SOLVER.OPTIMIZER
 13 |     """
 14 |     if cfg.NAME == "SGD":
 15 |         params: List[Dict[str, Any]] = []
 16 |         for key, value in model.named_parameters():
 17 |             if not cfg.get("WEIGHT_DECAY_CONV_ONLY", False):
 18 |                 if not value.requires_grad:
 19 |                     continue
 20 |                 lr = cfg.BASE_LR
 21 |                 weight_decay = cfg.WEIGHT_DECAY
 22 |                 if key.endswith("norm.weight") or key.endswith("norm.bias"):
 23 |                     weight_decay = cfg.WEIGHT_DECAY_NORM
 24 |                 elif key.endswith(".bias"):
 25 |                     # NOTE: unlike Detectron v1, we now default BIAS_LR_FACTOR to 1.0
 26 |                     # and WEIGHT_DECAY_BIAS to WEIGHT_DECAY so that bias optimizer
 27 |                     # hyperparameters are by default exactly the same as for regular
 28 |                     # weights.
 29 |                     lr = cfg.BASE_LR * cfg.BIAS_LR_FACTOR
 30 |                     weight_decay = cfg.WEIGHT_DECAY_BIAS
 31 |             else:
 32 |                 lr = cfg.BASE_LR
 33 |                 if "conv.weight" not in key:
 34 |                     weight_decay = 0
 35 |                 else:
 36 |                     weight_decay = cfg.WEIGHT_DECAY
 37 |             # multiply lr for gating function
 38 |             if "GATE_LR_MULTI" in cfg:
 39 |                 if cfg.GATE_LR_MULTI > 0.0 and "gate_conv" in key:
 40 |                     lr *= cfg.GATE_LR_MULTI
 41 | 
 42 |             params += [{
 43 |                 "params": [value],
 44 |                 "lr": lr,
 45 |                 "weight_decay": weight_decay
 46 |             }]
 47 |         optimizer = torch.optim.SGD(params, lr, momentum=cfg.MOMENTUM)
 48 |     elif cfg.NAME == "AdamW":
 49 |         lr = cfg.BASE_LR
 50 |         optimizer = torch.optim.Adam(model.parameters(),
 51 |                                      lr=lr,
 52 |                                      betas=cfg.BETAS,
 53 |                                      weight_decay=cfg.WEIGHT_DECAY,
 54 |                                      amsgrad=cfg.AMSGRAD)
 55 |     return optimizer
 56 | 
 57 | 
 58 | def build_lr_scheduler(
 59 |     cfg, optimizer: torch.optim.Optimizer
 60 | ) -> torch.optim.lr_scheduler._LRScheduler:
 61 |     """
 62 |     Build a LR scheduler from config.
 63 |     """
 64 |     name = cfg.NAME
 65 |     if name == "WarmupMultiStepLR":
 66 |         return WarmupMultiStepLR(
 67 |             optimizer,
 68 |             cfg.STEPS,
 69 |             cfg.GAMMA,
 70 |             warmup_factor=cfg.WARMUP_FACTOR,
 71 |             warmup_iters=cfg.WARMUP_ITERS,
 72 |             warmup_method=cfg.WARMUP_METHOD,
 73 |         )
 74 |     elif name == "WarmupCosineLR":
 75 |         return WarmupCosineLR(
 76 |             optimizer,
 77 |             cfg.MAX_ITER,
 78 |             warmup_factor=cfg.WARMUP_FACTOR,
 79 |             warmup_iters=cfg.WARMUP_ITERS,
 80 |             warmup_method=cfg.WARMUP_METHOD,
 81 |         )
 82 |     elif name == "LambdaLR":
 83 |         return LambdaLR(optimizer, cfg.LAMBDA_SCHEDULE)
 84 |     elif name == "OneCycleLR":
 85 |         return OneCycleLR(optimizer,
 86 |                           cfg.MAX_LR,
 87 |                           total_steps=cfg.MAX_ITER,
 88 |                           pct_start=cfg.PCT_START,
 89 |                           base_momentum=cfg.BASE_MOM,
 90 |                           max_momentum=cfg.MAX_MOM,
 91 |                           div_factor=cfg.DIV_FACTOR)
 92 |     elif name == "PolyLR":
 93 |         return PolyLR(
 94 |             optimizer,
 95 |             cfg.MAX_ITER,
 96 |             cfg.POLY_POWER,
 97 |             warmup_factor=cfg.WARMUP_FACTOR,
 98 |             warmup_iters=cfg.WARMUP_ITERS,
 99 |             warmup_method=cfg.WARMUP_METHOD,
100 |         )
101 |     else:
102 |         raise ValueError("Unknown LR scheduler: {}".format(name))
103 | 


--------------------------------------------------------------------------------
/dl_lib/structures/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
2 | from .boxes import Boxes, BoxMode, pairwise_iou
3 | from .image_list import ImageList
4 | from .instances import Instances
5 | from .keypoints import Keypoints, heatmaps_to_keypoints
6 | from .masks import BitMasks, PolygonMasks, rasterize_polygons_within_box
7 | 
8 | __all__ = [k for k in globals().keys() if not k.startswith("_")]
9 | 


--------------------------------------------------------------------------------
/dl_lib/structures/image_list.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
  2 | from __future__ import division
  3 | 
  4 | from typing import Any, List, Sequence, Tuple, Union
  5 | 
  6 | import torch
  7 | from torch.nn import functional as F
  8 | 
  9 | 
 10 | class ImageList(object):
 11 |     """
 12 |     Structure that holds a list of images (of possibly
 13 |     varying sizes) as a single tensor.
 14 |     This works by padding the images to the same size,
 15 |     and storing in a field the original sizes of each image
 16 | 
 17 |     Attributes:
 18 |         image_sizes (list[tuple[int, int]]): each tuple is (h, w)
 19 |     """
 20 |     def __init__(self, tensor: torch.Tensor, image_sizes: List[Tuple[int,
 21 |                                                                      int]]):
 22 |         """
 23 |         Arguments:
 24 |             tensor (Tensor): of shape (N, H, W) or (N, C_1, ..., C_K, H, W) where K >= 1
 25 |             image_sizes (list[tuple[int, int]]): Each tuple is (h, w).
 26 |         """
 27 |         self.tensor = tensor
 28 |         self.image_sizes = image_sizes
 29 | 
 30 |     def __len__(self) -> int:
 31 |         return len(self.image_sizes)
 32 | 
 33 |     def __getitem__(self, idx: Union[int, slice]) -> torch.Tensor:
 34 |         """
 35 |         Access the individual image in its original size.
 36 | 
 37 |         Returns:
 38 |             Tensor: an image of shape (H, W) or (C_1, ..., C_K, H, W) where K >= 1
 39 |         """
 40 |         size = self.image_sizes[idx]
 41 |         return self.tensor[idx, ..., :size[0], :size[1]]  # type: ignore
 42 | 
 43 |     def to(self, *args: Any, **kwargs: Any) -> "ImageList":
 44 |         cast_tensor = self.tensor.to(*args, **kwargs)
 45 |         return ImageList(cast_tensor, self.image_sizes)
 46 | 
 47 |     @property
 48 |     def device(self) -> torch.device:
 49 |         return self.tensor.device
 50 | 
 51 |     @staticmethod
 52 |     def from_tensors(
 53 |             tensors: Sequence[torch.Tensor],
 54 |             size_divisibility: int = 0,
 55 |             pad_ref_long: bool = False,
 56 |             pad_value: float = 0.0,
 57 |     ) -> "ImageList":
 58 |         """
 59 |         Args:
 60 |             tensors: a tuple or list of `torch.Tensors`, each of shape (Hi, Wi) or
 61 |                 (C_1, ..., C_K, Hi, Wi) where K >= 1. The Tensors will be padded with `pad_value`
 62 |                 so that they will have the same shape.
 63 |             size_divisibility (int): If `size_divisibility > 0`, also adds padding to ensure
 64 |                 the common height and width is divisible by `size_divisibility`
 65 |             pad_value (float): value to pad
 66 | 
 67 |         Returns:
 68 |             an `ImageList`.
 69 |         """
 70 |         assert len(tensors) > 0
 71 |         assert isinstance(tensors, (tuple, list))
 72 |         for t in tensors:
 73 |             assert isinstance(t, torch.Tensor), type(t)
 74 |             assert t.shape[1:-2] == tensors[0].shape[1:-2], t.shape
 75 |         # per dimension maximum (H, W) or (C_1, ..., C_K, H, W) where K >= 1 among all tensors
 76 |         max_size = list(max(s) for s in zip(*[img.shape for img in tensors]))
 77 |         if pad_ref_long:
 78 |             max_size_max = max(max_size[-2:])
 79 |             max_size[-2:] = [max_size_max] * 2
 80 |         max_size = tuple(max_size)
 81 | 
 82 |         if size_divisibility > 0:
 83 |             import math
 84 | 
 85 |             stride = size_divisibility
 86 |             max_size = list(max_size)  # type: ignore
 87 |             max_size[-2] = int(math.ceil(max_size[-2] / stride) *
 88 |                                stride)  # type: ignore
 89 |             max_size[-1] = int(math.ceil(max_size[-1] / stride) *
 90 |                                stride)  # type: ignore
 91 |             max_size = tuple(max_size)
 92 | 
 93 |         image_sizes = [im.shape[-2:] for im in tensors]
 94 | 
 95 |         if len(tensors) == 1:
 96 |             # This seems slightly (2%) faster.
 97 |             # TODO: check whether it's faster for multiple images as well
 98 |             image_size = image_sizes[0]
 99 |             padded = F.pad(
100 |                 tensors[0],
101 |                 [
102 |                     0, max_size[-1] - image_size[1], 0,
103 |                     max_size[-2] - image_size[0]
104 |                 ],
105 |                 value=pad_value,
106 |             )
107 |             batched_imgs = padded.unsqueeze_(0)
108 |         else:
109 |             batch_shape = (len(tensors), ) + max_size
110 |             batched_imgs = tensors[0].new_full(batch_shape, pad_value)
111 |             for img, pad_img in zip(tensors, batched_imgs):
112 |                 pad_img[..., :img.shape[-2], :img.shape[-1]].copy_(img)
113 | 
114 |         return ImageList(batched_imgs.contiguous(), image_sizes)
115 | 


--------------------------------------------------------------------------------
/dl_lib/utils/README.md:
--------------------------------------------------------------------------------
1 | # Utility functions
2 | 
3 | This folder contain utility functions that are not used in the
4 | core library, but are useful for building models or training
5 | code using the config system.
6 | 


--------------------------------------------------------------------------------
/dl_lib/utils/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
2 | 


--------------------------------------------------------------------------------
/dl_lib/utils/benchmark.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
  3 | 
  4 | import sys
  5 | import time
  6 | from typing import Dict, List
  7 | 
  8 | import numpy as np
  9 | 
 10 | 
 11 | def timeit(num_iters: int = -1, warmup_iters: int = 0):
 12 |     """
 13 |     This is intened to be used as a decorator to time any function.
 14 | 
 15 |     Args:
 16 |         num_iters (int): number of iterations used to compute the average time
 17 |             (sec) required to run the function. If negative, the number of
 18 |             iterations is determined dynamically by running the function a few
 19 |             times to make sure the estimate is stable.
 20 |         warmup_iters (int): number of iterations used to warm up the function.
 21 |             This is useful for functions that exhibit poor performance during
 22 |             the first few times they run (due to caches, autotuning, etc).
 23 |     Returns:
 24 |         Dict[str, float]: dictionary of the aggregated timing estimates.
 25 |             "iterations": number of iterations used to compute the estimated
 26 |                           time.
 27 |             "mean": averate time (sec) used to run the function.
 28 |             "median": median time (sec) used to run the function.
 29 |             "min": minimal time (sec) used to run the function.
 30 |             "max": maximal time (sec) used to run the function.
 31 |             "stddev": standard deviation of the time (sec) used to run the
 32 |                       function.
 33 |     """
 34 |     def decorator(func):
 35 |         def decorated(*args, **kwargs) -> Dict[str, float]:
 36 |             # Warmup phase.
 37 |             for _ in range(warmup_iters):
 38 |                 func(*args, **kwargs)
 39 | 
 40 |             # Estimate the run time of the function.
 41 |             total_time: float = 0
 42 |             count = 0
 43 |             run_times: List[float] = []
 44 |             max_num_iters = num_iters if num_iters > 0 else sys.maxsize
 45 |             for _ in range(max_num_iters):
 46 |                 start_time = time.time()
 47 |                 func(*args, **kwargs)
 48 |                 run_time = time.time() - start_time
 49 | 
 50 |                 run_times.append(run_time)
 51 |                 total_time += run_time
 52 |                 count += 1
 53 |                 if num_iters < 0 and total_time >= 0.5:
 54 |                     # If num_iters is negative, run the function enough times so
 55 |                     # that we can have a more robust estimate of the average time.
 56 |                     break
 57 |             assert count == len(run_times)
 58 |             ret: Dict[str, float] = {}
 59 |             ret["iterations"] = count
 60 |             ret["mean"] = total_time / count
 61 |             ret["median"] = np.median(run_times)
 62 |             ret["min"] = np.min(run_times)
 63 |             ret["max"] = np.max(run_times)
 64 |             ret["stddev"] = np.std(run_times)
 65 |             return ret
 66 | 
 67 |         return decorated
 68 | 
 69 |     return decorator
 70 | 
 71 | 
 72 | def benchmark(func,
 73 |               bm_name: str,
 74 |               kwargs_list: List[Dict],
 75 |               *,
 76 |               num_iters: int = -1,
 77 |               warmup_iters: int = 0) -> None:
 78 |     """
 79 |     Benchmark the input function and print out the results.
 80 | 
 81 |     Args:
 82 |         func (callable): a closure that returns a function for benchmarking,
 83 |             where initialization can be done before the function to benchmark.
 84 |         bm_name (str): name of the benchmark to print out, e.g. "BM_UPDATE".
 85 |         kwargs_list (list): a list of argument dict to pass to the function. The
 86 |             intput function will be timed separately for each argument dict.
 87 |         num_iters (int): number of iterations to run. Defaults to run until 0.5s.
 88 |         warmup_iters (int): number of iterations used to warm up the function.
 89 | 
 90 |     Outputs:
 91 |         For each argument dict, print out the time (in microseconds) required
 92 |         to run the function along with the number of iterations used to get
 93 |         the timing estimate. Example output:
 94 | 
 95 |         Benchmark               Avg Time(μs)   Peak Time(μs)     Iterations
 96 |         -------------------------------------------------------------------
 97 |         BM_UPDATE_100                    820             914            610
 98 |         BM_UPDATE_1000                  7655            8709             66
 99 |         BM_UPDATE_10000                78062           81748              7
100 |         -------------------------------------------------------------------
101 |     """
102 | 
103 |     print("")
104 |     outputs = []
105 |     for kwargs in kwargs_list:
106 |         func_bm = func(**kwargs)
107 | 
108 |         time_func = timeit(num_iters=num_iters,
109 |                            warmup_iters=warmup_iters)(func_bm)
110 | 
111 |         ret = time_func()
112 |         name = bm_name
113 |         if kwargs:
114 |             name += "_" + "_".join(str(v) for k, v in kwargs.items())
115 |         outputs.append([
116 |             name,
117 |             str(ret["mean"] * 1000000),
118 |             str(ret["max"] * 1000000),
119 |             str(ret["iterations"]),
120 |         ])
121 |     outputs = np.array(outputs)
122 |     # Calculate column widths for metrics table.
123 |     c1 = len(max(outputs[:, 0], key=len))
124 |     c2 = len(max(outputs[:, 1], key=len))
125 |     c3 = len(max(outputs[:, 2], key=len))
126 |     c4 = len(max(outputs[:, 3], key=len))
127 |     dash = "-" * 80
128 |     print("{:{}s} {:>{}s} {:>{}s} {:>{}s}".format(
129 |         "Benchmark",
130 |         c1,
131 |         "Avg Time(μs)",
132 |         c2,
133 |         "Peak Time(μs)",
134 |         c3,
135 |         "Iterations",
136 |         c4,
137 |     ))
138 |     print(dash)
139 |     for output in outputs:
140 |         print("{:{}s} {:15.0f} {:15.0f} {:14d}".format(
141 |             output[0],
142 |             c1,
143 |             float(output[1]),
144 |             float(output[2]),
145 |             int(output[3]),
146 |         ))
147 |     print(dash)
148 | 


--------------------------------------------------------------------------------
/dl_lib/utils/collect_env.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | import os
 3 | import subprocess
 4 | import sys
 5 | from collections import defaultdict
 6 | 
 7 | import numpy as np
 8 | import PIL
 9 | import torch
10 | import torchvision
11 | from tabulate import tabulate
12 | 
13 | __all__ = ["collect_env_info"]
14 | 
15 | 
16 | def collect_torch_env():
17 |     try:
18 |         import torch.__config__
19 | 
20 |         return torch.__config__.show()
21 |     except ImportError:
22 |         # compatible with older versions of pytorch
23 |         from torch.utils.collect_env import get_pretty_env_info
24 | 
25 |         return get_pretty_env_info()
26 | 
27 | 
28 | def get_env_module():
29 |     var_name = "dl_lib_ENV_MODULE"
30 |     return var_name, os.environ.get(var_name, "<not set>")
31 | 
32 | 
33 | def collect_env_info():
34 |     data = []
35 |     data.append(("sys.platform", sys.platform))
36 |     data.append(("Python", sys.version.replace("\n", "")))
37 |     data.append(("Numpy", np.__version__))
38 |     try:
39 |         from dl_lib import _C
40 |     except ImportError:
41 |         data.append(("dl_lib._C", "failed to import"))
42 |     else:
43 |         data.append(("dl_lib Compiler", _C.get_compiler_version()))
44 |         data.append(("dl_lib CUDA Compiler", _C.get_cuda_version()))
45 | 
46 |     data.append(get_env_module())
47 |     data.append(("PyTorch", torch.__version__))
48 |     data.append(("PyTorch Debug Build", torch.version.debug))
49 |     try:
50 |         data.append(("torchvision", torchvision.__version__))
51 |     except AttributeError:
52 |         data.append(("torchvision", "unknown"))
53 | 
54 |     has_cuda = torch.cuda.is_available()
55 |     data.append(("CUDA available", has_cuda))
56 |     if has_cuda:
57 |         devices = defaultdict(list)
58 |         for k in range(torch.cuda.device_count()):
59 |             devices[torch.cuda.get_device_name(k)].append(str(k))
60 |         for name, devids in devices.items():
61 |             data.append(("GPU " + ",".join(devids), name))
62 | 
63 |         from torch.utils.cpp_extension import CUDA_HOME
64 | 
65 |         data.append(("CUDA_HOME", str(CUDA_HOME)))
66 | 
67 |         if CUDA_HOME is not None and os.path.isdir(CUDA_HOME):
68 |             try:
69 |                 nvcc = os.path.join(CUDA_HOME, "bin", "nvcc")
70 |                 nvcc = subprocess.check_output(
71 |                     "'{}' -V | tail -n1".format(nvcc), shell=True)
72 |                 nvcc = nvcc.decode("utf-8").strip()
73 |             except subprocess.SubprocessError:
74 |                 nvcc = "Not Available"
75 |             data.append(("NVCC", nvcc))
76 | 
77 |         cuda_arch_list = os.environ.get("TORCH_CUDA_ARCH_LIST", None)
78 |         if cuda_arch_list:
79 |             data.append(("TORCH_CUDA_ARCH_LIST", cuda_arch_list))
80 |     data.append(("Pillow", PIL.__version__))
81 | 
82 |     try:
83 |         import cv2
84 | 
85 |         data.append(("cv2", cv2.__version__))
86 |     except ImportError:
87 |         pass
88 |     env_str = tabulate(data) + "\n"
89 |     env_str += collect_torch_env()
90 |     return env_str
91 | 
92 | 
93 | if __name__ == "__main__":
94 |     print(collect_env_info())
95 | 


--------------------------------------------------------------------------------
/dl_lib/utils/colormap.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | """
 3 | An awesome colormap for really neat visualizations.
 4 | Copied from Detectron, and removed gray colors.
 5 | """
 6 | 
 7 | import numpy as np
 8 | 
 9 | __all__ = ["colormap", "random_color"]
10 | 
11 | # fmt: off
12 | # RGB:
13 | _COLORS = np.array([
14 |     0.000, 0.447, 0.741, 0.850, 0.325, 0.098, 0.929, 0.694, 0.125, 0.494,
15 |     0.184, 0.556, 0.466, 0.674, 0.188, 0.301, 0.745, 0.933, 0.635, 0.078,
16 |     0.184, 0.300, 0.300, 0.300, 0.600, 0.600, 0.600, 1.000, 0.000, 0.000,
17 |     1.000, 0.500, 0.000, 0.749, 0.749, 0.000, 0.000, 1.000, 0.000, 0.000,
18 |     0.000, 1.000, 0.667, 0.000, 1.000, 0.333, 0.333, 0.000, 0.333, 0.667,
19 |     0.000, 0.333, 1.000, 0.000, 0.667, 0.333, 0.000, 0.667, 0.667, 0.000,
20 |     0.667, 1.000, 0.000, 1.000, 0.333, 0.000, 1.000, 0.667, 0.000, 1.000,
21 |     1.000, 0.000, 0.000, 0.333, 0.500, 0.000, 0.667, 0.500, 0.000, 1.000,
22 |     0.500, 0.333, 0.000, 0.500, 0.333, 0.333, 0.500, 0.333, 0.667, 0.500,
23 |     0.333, 1.000, 0.500, 0.667, 0.000, 0.500, 0.667, 0.333, 0.500, 0.667,
24 |     0.667, 0.500, 0.667, 1.000, 0.500, 1.000, 0.000, 0.500, 1.000, 0.333,
25 |     0.500, 1.000, 0.667, 0.500, 1.000, 1.000, 0.500, 0.000, 0.333, 1.000,
26 |     0.000, 0.667, 1.000, 0.000, 1.000, 1.000, 0.333, 0.000, 1.000, 0.333,
27 |     0.333, 1.000, 0.333, 0.667, 1.000, 0.333, 1.000, 1.000, 0.667, 0.000,
28 |     1.000, 0.667, 0.333, 1.000, 0.667, 0.667, 1.000, 0.667, 1.000, 1.000,
29 |     1.000, 0.000, 1.000, 1.000, 0.333, 1.000, 1.000, 0.667, 1.000, 0.333,
30 |     0.000, 0.000, 0.500, 0.000, 0.000, 0.667, 0.000, 0.000, 0.833, 0.000,
31 |     0.000, 1.000, 0.000, 0.000, 0.000, 0.167, 0.000, 0.000, 0.333, 0.000,
32 |     0.000, 0.500, 0.000, 0.000, 0.667, 0.000, 0.000, 0.833, 0.000, 0.000,
33 |     1.000, 0.000, 0.000, 0.000, 0.167, 0.000, 0.000, 0.333, 0.000, 0.000,
34 |     0.500, 0.000, 0.000, 0.667, 0.000, 0.000, 0.833, 0.000, 0.000, 1.000,
35 |     0.000, 0.000, 0.000, 0.143, 0.143, 0.143, 0.857, 0.857, 0.857, 1.000,
36 |     1.000, 1.000
37 | ]).astype(np.float32).reshape(-1, 3)
38 | # fmt: on
39 | 
40 | 
41 | def colormap(rgb=False, maximum=255):
42 |     """
43 |     Args:
44 |         rgb (bool): whether to return RGB colors or BGR colors.
45 |         maximum (int): either 255 or 1
46 | 
47 |     Returns:
48 |         ndarray: a float32 array of Nx3 colors, in range [0, 255] or [0, 1]
49 |     """
50 |     assert maximum in [255, 1], maximum
51 |     c = _COLORS * maximum
52 |     if not rgb:
53 |         c = c[:, ::-1]
54 |     return c
55 | 
56 | 
57 | def random_color(rgb=False, maximum=255):
58 |     """
59 |     Args:
60 |         rgb (bool): whether to return RGB colors or BGR colors.
61 |         maximum (int): either 255 or 1
62 | 
63 |     Returns:
64 |         ndarray: a vector of 3 numbers
65 |     """
66 |     idx = np.random.randint(0, len(_COLORS))
67 |     ret = _COLORS[idx] * maximum
68 |     if not rgb:
69 |         ret = ret[::-1]
70 |     return ret
71 | 
72 | 
73 | if __name__ == "__main__":
74 |     import cv2
75 | 
76 |     size = 100
77 |     H, W = 10, 10
78 |     canvas = np.random.rand(H * size, W * size, 3).astype("float32")
79 |     for h in range(H):
80 |         for w in range(W):
81 |             idx = h * W + w
82 |             if idx >= len(_COLORS):
83 |                 break
84 |             canvas[h * size:(h + 1) * size,
85 |                    w * size:(w + 1) * size] = _COLORS[idx]
86 |     cv2.imshow("a", canvas)
87 |     cv2.waitKey(0)
88 | 


--------------------------------------------------------------------------------
/dl_lib/utils/config_helper.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python3
  2 | # -*- coding:utf-8 -*-
  3 | import collections
  4 | import logging
  5 | import re
  6 | 
  7 | import six
  8 | from colorama import Back, Fore, Style
  9 | 
 10 | # python 3.8+ compatibility
 11 | try:
 12 |     collectionsAbc = collections.abc
 13 | except ImportError:
 14 |     collectionsAbc = collections
 15 | 
 16 | 
 17 | def highlight(keyword, target, color=Fore.BLACK + Back.YELLOW):
 18 |     """
 19 |     use given color to highlight keyword in target string
 20 | 
 21 |     Args:
 22 |         keyword(str): highlight string
 23 |         target(str): target string
 24 |         color(str): string represent the color, use black foreground
 25 |         and yellow background as default
 26 | 
 27 |     Returns:
 28 |         (str) target string with keyword highlighted
 29 | 
 30 |     """
 31 |     return re.sub(keyword, color + r"\g<0>" + Style.RESET_ALL, target)
 32 | 
 33 | 
 34 | def find_key(param_dict: dict, key: str) -> dict:
 35 |     """
 36 |     find key in dict
 37 | 
 38 |     Args:
 39 |         param_dict(dict):
 40 |         key(str):
 41 | 
 42 |     Returns:
 43 |         (dict)
 44 | 
 45 |     Examples::
 46 |         >>> d = dict(abc=2, ab=4, c=4)
 47 |         >>> find_key(d, "ab")
 48 |         {'abc': 2, 'ab':4}
 49 | 
 50 |     """
 51 |     find_result = {}
 52 |     for k, v in param_dict.items():
 53 |         if re.search(key, k):
 54 |             find_result[k] = v
 55 |         if isinstance(v, dict):
 56 |             res = find_key(v, key)
 57 |             if res:
 58 |                 find_result[k] = res
 59 |     return find_result
 60 | 
 61 | 
 62 | def diff_dict(src, dst):
 63 |     """
 64 |     find difference between src dict and dst dict
 65 | 
 66 |     Args:
 67 |         src(dict): src dict
 68 |         dst(dict): dst dict
 69 | 
 70 |     Returns:
 71 |         (dict) dict contains all the difference key
 72 | 
 73 |     """
 74 |     diff_result = {}
 75 |     for k, v in src.items():
 76 |         if k not in dst:
 77 |             diff_result[k] = v
 78 |         elif dst[k] != v:
 79 |             if isinstance(v, dict):
 80 |                 diff_result[k] = diff_dict(v, dst[k])
 81 |             else:
 82 |                 diff_result[k] = v
 83 |     return diff_result
 84 | 
 85 | 
 86 | def _assert_with_logging(cond, msg):
 87 |     logger = logging.getLogger(__name__)
 88 |     if not cond:
 89 |         logger.debug(msg)
 90 |     assert cond, msg
 91 | 
 92 | 
 93 | def update(d, u):
 94 |     for k, v in six.iteritems(u):
 95 |         dv = d.get(k, {})
 96 |         if not isinstance(dv, collectionsAbc.Mapping):
 97 |             d[k] = v
 98 |         elif isinstance(v, collectionsAbc.Mapping):
 99 |             d[k] = update(dv, v)
100 |         else:
101 |             d[k] = v
102 |     return d
103 | 
104 | 
105 | def _check_and_coerce_cfg_value_type(replacement, original, key, full_key):
106 |     """
107 |     Checks that `replacement`, which is intended to replace `original` is of
108 |     the right type. The type is correct if it matches exactly or is one of a few
109 |     cases in which the type can be easily coerced.
110 |     """
111 |     original_type = type(original)
112 |     replacement_type = type(replacement)
113 | 
114 |     # The types must match (with some exceptions)
115 |     if replacement_type == original_type:
116 |         return replacement
117 | 
118 |     # Cast replacement from from_type to to_type if the replacement and original
119 |     # types match from_type and to_type
120 |     def conditional_cast(from_type, to_type):
121 |         if replacement_type == from_type and original_type == to_type:
122 |             return True, to_type(replacement)
123 |         else:
124 |             return False, None
125 | 
126 |     # Conditionally casts
127 |     # list <-> tuple
128 |     casts = [(tuple, list), (list, tuple)]
129 |     # For py2: allow converting from str (bytes) to a unicode string
130 |     try:
131 |         casts.append((str, unicode))  # noqa: F821
132 |     except Exception:
133 |         pass
134 | 
135 |     for (from_type, to_type) in casts:
136 |         converted, converted_value = conditional_cast(from_type, to_type)
137 |         if converted:
138 |             return converted_value
139 | 
140 |     raise ValueError(
141 |         "Type mismatch ({} vs. {}) with values ({} vs. {}) for config "
142 |         "key: {}".format(original_type, replacement_type, original,
143 |                          replacement, full_key))
144 | 


--------------------------------------------------------------------------------
/dl_lib/utils/download.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 3 | 
 4 | import logging
 5 | import os
 6 | import shutil
 7 | from typing import Callable, Optional
 8 | from urllib import request
 9 | 
10 | 
11 | def download(url: str,
12 |              dir: str,
13 |              *,
14 |              filename: Optional[str] = None,
15 |              progress: bool = True) -> str:
16 |     """
17 |     Download a file from a given URL to a directory. If file exists, will not
18 |         overwrite the existing file.
19 | 
20 |     Args:
21 |         url (str):
22 |         dir (str): the directory to download the file
23 |         filename (str or None): the basename to save the file.
24 |             Will use the name in the URL if not given.
25 |         progress (bool): whether to use tqdm to draw a progress bar.
26 | 
27 |     Returns:
28 |         str: the path to the downloaded file or the existing one.
29 |     """
30 |     os.makedirs(dir, exist_ok=True)
31 |     if filename is None:
32 |         filename = url.split("/")[-1]
33 |         assert len(filename), "Cannot obtain filename from url {}".format(url)
34 |     fpath = os.path.join(dir, filename)
35 |     logger = logging.getLogger(__name__)
36 | 
37 |     if os.path.isfile(fpath):
38 |         logger.info("File {} exists! Skipping download.".format(filename))
39 |         return fpath
40 | 
41 |     tmp = fpath + ".tmp"  # download to a tmp file first, to be more atomic.
42 |     try:
43 |         logger.info("Downloading from {} ...".format(url))
44 |         if progress:
45 |             import tqdm
46 | 
47 |             def hook(
48 |                     t: tqdm.tqdm) -> Callable[[int, int, Optional[int]], None]:
49 |                 last_b = [0]
50 | 
51 |                 def inner(b: int,
52 |                           bsize: int,
53 |                           tsize: Optional[int] = None) -> None:
54 |                     if tsize is not None:
55 |                         t.total = tsize
56 |                     t.update((b - last_b[0]) * bsize)  # type: ignore
57 |                     last_b[0] = b
58 | 
59 |                 return inner
60 | 
61 |             with tqdm.tqdm(  # type: ignore
62 |                     unit="B",
63 |                     unit_scale=True,
64 |                     miniters=1,
65 |                     desc=filename,
66 |                     leave=True) as t:
67 |                 tmp, _ = request.urlretrieve(url,
68 |                                              filename=tmp,
69 |                                              reporthook=hook(t))
70 | 
71 |         else:
72 |             tmp, _ = request.urlretrieve(url, filename=tmp)
73 |         statinfo = os.stat(tmp)
74 |         size = statinfo.st_size
75 |         if size == 0:
76 |             raise IOError("Downloaded an empty file from {}!".format(url))
77 |         # download to tmp first and move to fpath, to make this function more
78 |         # atomic.
79 |         shutil.move(tmp, fpath)
80 |     except IOError:
81 |         logger.error("Failed to download {}".format(url))
82 |         raise
83 |     finally:
84 |         try:
85 |             os.unlink(tmp)
86 |         except IOError:
87 |             pass
88 | 
89 |     logger.info("Successfully downloaded " + fpath + ". " + str(size) +
90 |                 " bytes.")
91 |     return fpath
92 | 


--------------------------------------------------------------------------------
/dl_lib/utils/env.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
  2 | import importlib
  3 | import importlib.util
  4 | import logging
  5 | import os
  6 | import random
  7 | import sys
  8 | from datetime import datetime
  9 | 
 10 | import numpy as np
 11 | import torch
 12 | 
 13 | __all__ = ["seed_all_rng"]
 14 | 
 15 | 
 16 | def seed_all_rng(seed=None):
 17 |     """
 18 |     Set the random seed for the RNG in torch, numpy and python.
 19 | 
 20 |     Args:
 21 |         seed (int): if None, will use a strong random seed.
 22 |     """
 23 |     if seed is None:
 24 |         seed = (os.getpid() + int(datetime.now().strftime("%S%f")) +
 25 |                 int.from_bytes(os.urandom(2), "big"))
 26 |         logger = logging.getLogger(__name__)
 27 |         logger.info("Using a generated random seed {}".format(seed))
 28 |     np.random.seed(seed)
 29 |     torch.set_rng_state(torch.manual_seed(seed).get_state())
 30 |     random.seed(seed)
 31 | 
 32 | 
 33 | # from https://stackoverflow.com/questions/67631/how-to-import-a-module-given-the-full-path
 34 | def _import_file(module_name, file_path, make_importable=False):
 35 |     spec = importlib.util.spec_from_file_location(module_name, file_path)
 36 |     module = importlib.util.module_from_spec(spec)
 37 |     spec.loader.exec_module(module)
 38 |     if make_importable:
 39 |         sys.modules[module_name] = module
 40 |     return module
 41 | 
 42 | 
 43 | def _configure_libraries():
 44 |     """
 45 |     Configurations for some libraries.
 46 |     """
 47 |     # An environment option to disable `import cv2` globally,
 48 |     # in case it leads to negative performance impact
 49 |     disable_cv2 = int(os.environ.get("dl_lib_DISABLE_CV2", False))
 50 |     if disable_cv2:
 51 |         sys.modules["cv2"] = None
 52 |     else:
 53 |         # Disable opencl in opencv since its interaction with cuda often has negative effects
 54 |         # This envvar is supported after OpenCV 3.4.0
 55 |         os.environ["OPENCV_OPENCL_RUNTIME"] = "disabled"
 56 |         try:
 57 |             import cv2
 58 | 
 59 |             if int(cv2.__version__.split(".")[0]) >= 3:
 60 |                 cv2.ocl.setUseOpenCL(False)
 61 |         except ImportError:
 62 |             pass
 63 | 
 64 | 
 65 | _ENV_SETUP_DONE = False
 66 | 
 67 | 
 68 | def setup_environment():
 69 |     """Perform environment setup work. The default setup is a no-op, but this
 70 |     function allows the user to specify a Python source file or a module in
 71 |     the $dl_lib_ENV_MODULE environment variable, that performs
 72 |     custom setup work that may be necessary to their computing environment.
 73 |     """
 74 |     global _ENV_SETUP_DONE
 75 |     if _ENV_SETUP_DONE:
 76 |         return
 77 |     _ENV_SETUP_DONE = True
 78 | 
 79 |     _configure_libraries()
 80 | 
 81 |     custom_module_path = os.environ.get("dl_lib_ENV_MODULE")
 82 | 
 83 |     if custom_module_path:
 84 |         setup_custom_environment(custom_module_path)
 85 |     else:
 86 |         # The default setup is a no-op
 87 |         pass
 88 | 
 89 | 
 90 | def setup_custom_environment(custom_module):
 91 |     """
 92 |     Load custom environment setup by importing a Python source file or a
 93 |     module, and run the setup function.
 94 |     """
 95 |     if custom_module.endswith(".py"):
 96 |         module = _import_file("dl_lib.utils.env.custom_module", custom_module)
 97 |     else:
 98 |         module = importlib.import_module(custom_module)
 99 |     assert hasattr(module, "setup_environment") and callable(
100 |         module.setup_environment), (
101 |             "Custom environment module defined in {} does not have the "
102 |             "required callable attribute 'setup_environment'."
103 |         ).format(custom_module)
104 |     module.setup_environment()
105 | 


--------------------------------------------------------------------------------
/dl_lib/utils/history_buffer.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 3 | 
 4 | from typing import List, Tuple
 5 | 
 6 | import numpy as np
 7 | 
 8 | 
 9 | class HistoryBuffer:
10 |     """
11 |     Track a series of scalar values and provide access to smoothed values over a
12 |     window or the global average of the series.
13 |     """
14 |     def __init__(self, max_length: int = 1000000):
15 |         """
16 |         Args:
17 |             max_length: maximal number of values that can be stored in the
18 |                 buffer. When the capacity of the buffer is exhausted, old
19 |                 values will be removed.
20 |         """
21 |         self._max_length: int = max_length
22 |         self._data: List[Tuple[float, float]] = []  # (value, iteration) pairs
23 |         self._count: int = 0
24 |         self._global_avg: float = 0
25 | 
26 |     def update(self, value: float, iteration: float = None):
27 |         """
28 |         Add a new scalar value produced at certain iteration. If the length
29 |         of the buffer exceeds self._max_length, the oldest element will be
30 |         removed from the buffer.
31 |         """
32 |         if iteration is None:
33 |             iteration = self._count
34 |         if len(self._data) == self._max_length:
35 |             self._data.pop(0)
36 |         self._data.append((value, iteration))
37 | 
38 |         self._count += 1
39 |         self._global_avg += (value - self._global_avg) / self._count
40 | 
41 |     def latest(self):
42 |         """
43 |         Return the latest scalar value added to the buffer.
44 |         """
45 |         return self._data[-1][0]
46 | 
47 |     def median(self, window_size: int):
48 |         """
49 |         Return the median of the latest `window_size` values in the buffer.
50 |         """
51 |         return np.median([x[0] for x in self._data[-window_size:]])
52 | 
53 |     def avg(self, window_size: int):
54 |         """
55 |         Return the mean of the latest `window_size` values in the buffer.
56 |         """
57 |         return np.mean([x[0] for x in self._data[-window_size:]])
58 | 
59 |     def global_avg(self):
60 |         """
61 |         Return the mean of all the elements in the buffer. Note that this
62 |         includes those getting removed due to limited buffer storage.
63 |         """
64 |         return self._global_avg
65 | 
66 |     def values(self):
67 |         """
68 |         Returns:
69 |             list[(number, iteration)]: content of the current buffer.
70 |         """
71 |         return self._data
72 | 


--------------------------------------------------------------------------------
/dl_lib/utils/imports.py:
--------------------------------------------------------------------------------
 1 | import imp
 2 | 
 3 | 
 4 | def dynamic_import(config_name, config_path):
 5 |     """
 6 |     Dynamic import a project.
 7 | 
 8 |     Args:
 9 |         config_name (str): module name
10 |         config_path (str): the dir that contains the .py with this module.
11 | 
12 |     Examples::
13 |         >>> root = "/data/repos/dl_lib_playground/zhubenjin/retinanet/"
14 |         >>> project = root + "retinanet.res50.fpn.coco.800size.1x.mrcnn_sigmoid"
15 |         >>> cfg = dynamic_import("config", project).config
16 |         >>> net = dynamic_import("net", project)
17 |     """
18 |     fp, pth, desc = imp.find_module(config_name, [config_path])
19 | 
20 |     return imp.load_module(config_name, fp, pth, desc)
21 | 


--------------------------------------------------------------------------------
/dl_lib/utils/memory.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | import logging
 4 | from contextlib import contextmanager
 5 | from functools import wraps
 6 | 
 7 | import torch
 8 | 
 9 | __all__ = ["retry_if_cuda_oom"]
10 | 
11 | 
12 | @contextmanager
13 | def _ignore_torch_cuda_oom():
14 |     """
15 |     A context which ignores CUDA OOM exception from pytorch.
16 |     """
17 |     try:
18 |         yield
19 |     except RuntimeError as e:
20 |         # NOTE: the string may change?
21 |         if "CUDA out of memory. " in str(e):
22 |             pass
23 |         else:
24 |             raise
25 | 
26 | 
27 | def retry_if_cuda_oom(func):
28 |     r"""
29 |     Makes a function retry itself after encountering
30 |     pytorch's CUDA OOM error.
31 |     It will first retry after calling `torch.cuda.empty_cache()`.
32 | 
33 |     If that still fails, it will then retry by trying to convert inputs to CPUs.
34 |     In this case, it expects the function to dispatch to CPU implementation.
35 |     The return values may become CPU tensors as well and it's user's
36 |     responsibility to convert it back to CUDA tensor if needed.
37 | 
38 |     Args:
39 |         func: a stateless callable that takes tensor-like objects as arguments
40 | 
41 |     Returns:
42 |         a callable which retries `func` if OOM is encountered.
43 | 
44 |     Examples:
45 | 
46 |     .. code-block:: python
47 | 
48 |         output = retry_if_cuda_oom(some_torch_function)(input1, input2)
49 |         # output may be on CPU even if inputs are on GPU
50 | 
51 |     Note:
52 |         1. When converting inputs to CPU, it will only look at each argument and check
53 |            if it has `.device` and `.to` for conversion. Nested structures of tensors
54 |            are not supported.
55 | 
56 |         2. Since the function might be called more than once, it has to be
57 |            stateless.
58 |     """
59 |     def maybe_to_cpu(x):
60 |         try:
61 |             like_gpu_tensor = x.device.type == "cuda" and hasattr(x, "to")
62 |         except AttributeError:
63 |             like_gpu_tensor = False
64 |         if like_gpu_tensor:
65 |             return x.to(device="cpu")
66 |         else:
67 |             return x
68 | 
69 |     @wraps(func)
70 |     def wrapped(*args, **kwargs):
71 |         with _ignore_torch_cuda_oom():
72 |             return func(*args, **kwargs)
73 | 
74 |         # Clear cache and retry
75 |         torch.cuda.empty_cache()
76 |         with _ignore_torch_cuda_oom():
77 |             return func(*args, **kwargs)
78 | 
79 |         # Try on CPU. This slows down the code significantly, therefore print a notice.
80 |         logger = logging.getLogger(__name__)
81 |         logger.info(
82 |             "Attempting to copy inputs of {} to CPU due to CUDA OOM".format(
83 |                 str(func)))
84 |         new_args = (maybe_to_cpu(x) for x in args)
85 |         new_kwargs = {k: maybe_to_cpu(v) for k, v in kwargs.items()}
86 |         return func(*new_args, **new_kwargs)
87 | 
88 |     return wrapped
89 | 


--------------------------------------------------------------------------------
/dl_lib/utils/serialize.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | import cloudpickle
 3 | 
 4 | 
 5 | class PicklableWrapper(object):
 6 |     """
 7 |     Wrap an object to make it more picklable, note that it uses
 8 |     heavy weight serialization libraries that are slower than pickle.
 9 |     It's best to use it only on closures (which are usually not picklable).
10 | 
11 |     This is a simplified version of
12 |     https://github.com/joblib/joblib/blob/master/joblib/externals/loky/cloudpickle_wrapper.py
13 |     """
14 |     def __init__(self, obj):
15 |         self._obj = obj
16 | 
17 |     def __reduce__(self):
18 |         s = cloudpickle.dumps(self._obj)
19 |         return cloudpickle.loads, (s, )
20 | 
21 |     def __call__(self, *args, **kwargs):
22 |         return self._obj(*args, **kwargs)
23 | 
24 |     def __getattr__(self, attr):
25 |         # Ensure that the wrapped object can be used seamlessly as the previous object.
26 |         if attr not in ["_obj"]:
27 |             return getattr(self._obj, attr)
28 |         return getattr(self, attr)
29 | 


--------------------------------------------------------------------------------
/dl_lib/utils/timer.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 3 | # -*- coding: utf-8 -*-
 4 | 
 5 | from time import perf_counter
 6 | from typing import Optional
 7 | 
 8 | 
 9 | class Timer:
10 |     """
11 |     A timer which computes the time elapsed since the start/reset of the timer.
12 |     """
13 |     def __init__(self):
14 |         self.reset()
15 | 
16 |     def reset(self):
17 |         """
18 |         Reset the timer.
19 |         """
20 |         self._start = perf_counter()
21 |         self._paused: Optional[float] = None
22 |         self._total_paused = 0
23 | 
24 |     def pause(self):
25 |         """
26 |         Pause the timer.
27 |         """
28 |         if self._paused is not None:
29 |             raise ValueError("Trying to pause a Timer that is already paused!")
30 |         self._paused = perf_counter()
31 | 
32 |     def is_paused(self) -> bool:
33 |         """
34 |         Returns:
35 |             bool: whether the timer is currently paused
36 |         """
37 |         return self._paused is not None
38 | 
39 |     def resume(self):
40 |         """
41 |         Resume the timer.
42 |         """
43 |         if self._paused is None:
44 |             raise ValueError("Trying to resume a Timer that is not paused!")
45 |         self._total_paused += perf_counter() - self._paused
46 |         self._paused = None
47 | 
48 |     def seconds(self) -> float:
49 |         """
50 |         Returns:
51 |             (float): the total number of seconds since the start/reset of the
52 |                 timer, excluding the time when the timer is paused.
53 |         """
54 |         if self._paused is not None:
55 |             end_time: float = self._paused  # type: ignore
56 |         else:
57 |             end_time = perf_counter()
58 |         return end_time - self._start - self._total_paused
59 | 


--------------------------------------------------------------------------------
/intro/introduce.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Megvii-BaseDetection/DynamicRouting/2ad0a95139b1bf21878dd222854f98974ac4930a/intro/introduce.png


--------------------------------------------------------------------------------
/playground/.gitignore:
--------------------------------------------------------------------------------
 1 | # output dir
 2 | output
 3 | instant_test_output
 4 | inference_test_output
 5 | 
 6 | 
 7 | # useless file type
 8 | *.jpg
 9 | *.png
10 | *.txt
11 | *.json
12 | *.csv
13 | *.brainmodel
14 | 
15 | # compilation and distribution
16 | __pycache__
17 | _ext
18 | *.pyc
19 | *.so
20 | *.egg-info/
21 | *.egg
22 | build/
23 | dist/
24 | 
25 | # pytorch/python/numpy formats
26 | *.pth
27 | *.pkl
28 | *.npy
29 | 
30 | # ipython/jupyter notebooks
31 | *.ipynb
32 | **/.ipynb_checkpoints/
33 | 
34 | # Editor temporaries
35 | *.swn
36 | *.swo
37 | *.bak
38 | *.swp
39 | *~
40 | 
41 | # Pycharm editor settings
42 | .idea
43 | 
44 | # project dirs
45 | 
46 | # soft link
47 | **/log
48 | 


--------------------------------------------------------------------------------
/playground/Dynamic/Seg.Layer16.ImageNet/config.py:
--------------------------------------------------------------------------------
 1 | import os.path as osp
 2 | 
 3 | from dl_lib.configs.segm_config import SemanticSegmentationConfig
 4 | 
 5 | _config_dict = dict(
 6 |     MODEL=dict(
 7 |         WEIGHTS="/data/dl_lib_modelzoo/dynamicmodel/Dynamic-L16B4-A73-convert-seg.pth",
 8 |         CAL_FLOPS=True,
 9 |         BACKBONE=dict(
10 |             CELL_TYPE=['sep_conv_3x3', 'skip_connect'],
11 |             LAYER_NUM=16,
12 |             CELL_NUM_LIST=[2, 3, 4] + [4 for _ in range(13)],
13 |             INIT_CHANNEL=64,
14 |             MAX_STRIDE=32,
15 |             SEPT_STEM=True,
16 |             NORM="nnSyncBN",
17 |             DROP_PROB=0.0,
18 |         ),
19 |         GATE=dict(
20 |             GATE_ON=True,
21 |             GATE_INIT_BIAS=1.5,
22 |             SMALL_GATE=False,
23 |         ),
24 |         SEM_SEG_HEAD=dict(
25 |             IN_FEATURES=['layer_0', 'layer_1', 'layer_2', 'layer_3'],
26 |             NUM_CLASSES=19,
27 |             IGNORE_VALUE=255,
28 |             NORM="nnSyncBN",
29 |             LOSS_WEIGHT=1.0,
30 |         ),
31 |         BUDGET=dict(
32 |             CONSTRAIN=False,
33 |             LOSS_WEIGHT=0.0,
34 |             LOSS_MU=0.0,
35 |             FLOPS_ALL=26300.0,
36 |             UNUPDATE_RATE=0.4,
37 |             WARM_UP=True,
38 |         ),
39 |     ),
40 |     DATASETS=dict(
41 |         TRAIN=("cityscapes_fine_sem_seg_train", ),
42 |         TEST=("cityscapes_fine_sem_seg_val", ),
43 |     ),
44 |     SOLVER=dict(
45 |         LR_SCHEDULER=dict(
46 |             NAME="PolyLR",
47 |             POLY_POWER=0.9,
48 |             MAX_ITER=190000,
49 |         ),
50 |         OPTIMIZER=dict(
51 |             BASE_LR=0.02,
52 |             GATE_LR_MULTI=2.5,
53 |         ),
54 |         IMS_PER_BATCH=8,
55 |         CHECKPOINT_PERIOD=5000,
56 |         GRAD_CLIP=5.0,
57 |     ),
58 |     INPUT=dict(
59 |         MIN_SIZE_TRAIN=(512, 768, 1024, 1280, 1536, 2048, ),
60 |         MIN_SIZE_TRAIN_SAMPLING="choice",
61 |         MAX_SIZE_TRAIN=4096,
62 |         MIN_SIZE_TEST=1024,
63 |         MAX_SIZE_TEST=2048,
64 |         # FIX_SIZE_FOR_FLOPS=[768, 768],
65 |         FIX_SIZE_FOR_FLOPS=[1024, 2048],
66 |         CROP_PAD=dict(SIZE=[768, 768], ),
67 |     ),
68 |     TEST=dict(
69 |         AUG=dict(
70 |             ENABLED=False,
71 |             MIN_SIZES=(512, 768, 1024, 1280, 1536, 2048, ),
72 |             MAX_SIZE=4096,
73 |             FLIP=True,
74 |         ),
75 |         PRECISE_BN=dict(ENABLED=True),
76 |     ),
77 |     OUTPUT_DIR=osp.join(
78 |         '/data/Outputs/model_logs/dl_lib_playground',
79 |         osp.split(osp.realpath(__file__))[0].split("playground/")[-1]),
80 | )
81 | 
82 | 
83 | class DynamicSemanticSegmentationConfig(SemanticSegmentationConfig):
84 |     def __init__(self):
85 |         super(DynamicSemanticSegmentationConfig, self).__init__()
86 |         self._register_configuration(_config_dict)
87 | 
88 | 
89 | config = DynamicSemanticSegmentationConfig()
90 | 


--------------------------------------------------------------------------------
/playground/Dynamic/Seg.Layer16.ImageNet/net.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | from dl_lib.layers import ShapeSpec
 4 | from dl_lib.modeling.backbone import Backbone
 5 | from dl_lib.modeling.dynamic_arch.dynamic_backbone import build_dynamic_backbone
 6 | from dl_lib.modeling.meta_arch.dynamic4seg import (DynamicNet4Seg,
 7 |                                                    SemSegDecoderHead)
 8 | 
 9 | 
10 | def build_backbone(cfg, input_shape=None):
11 |     """
12 |     Build a backbone from `cfg.MODEL.BACKBONE.NAME`.
13 | 
14 |     Returns:
15 |         an instance of :class:`Backbone`
16 |     """
17 |     if input_shape is None:
18 |         input_shape = ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN),
19 |                                 height=cfg.INPUT.FIX_SIZE_FOR_FLOPS[0],
20 |                                 width=cfg.INPUT.FIX_SIZE_FOR_FLOPS[1])
21 | 
22 |     backbone = build_dynamic_backbone(cfg, input_shape)
23 |     assert isinstance(backbone, Backbone)
24 |     return backbone
25 | 
26 | 
27 | def build_sem_seg_head(cfg, input_shape=None):
28 |     return SemSegDecoderHead(cfg, input_shape)
29 | 
30 | 
31 | def build_model(cfg):
32 |     cfg.build_backbone = build_backbone
33 |     cfg.build_sem_seg_head = build_sem_seg_head
34 |     model = DynamicNet4Seg(cfg)
35 |     logger = logging.getLogger(__name__)
36 |     logger.info("Model:\n{}".format(model))
37 |     return model
38 | 


--------------------------------------------------------------------------------
/playground/Dynamic/Seg.Layer16.Iter560k.SDP0_3/config.py:
--------------------------------------------------------------------------------
 1 | import os.path as osp
 2 | 
 3 | from dl_lib.configs.segm_config import SemanticSegmentationConfig
 4 | 
 5 | _config_dict = dict(
 6 |     MODEL=dict(
 7 |         # WEIGHTS="",
 8 |         CAL_FLOPS=True,
 9 |         BACKBONE=dict(
10 |             CELL_TYPE=['sep_conv_3x3', 'skip_connect'],
11 |             LAYER_NUM=16,
12 |             CELL_NUM_LIST=[2, 3, 4] + [4 for _ in range(13)],
13 |             INIT_CHANNEL=64,
14 |             MAX_STRIDE=32,
15 |             SEPT_STEM=True,
16 |             NORM="nnSyncBN",
17 |             DROP_PROB=0.3,
18 |         ),
19 |         GATE=dict(
20 |             GATE_ON=True,
21 |             GATE_INIT_BIAS=1.5,
22 |             SMALL_GATE=False,
23 |         ),
24 |         SEM_SEG_HEAD=dict(
25 |             IN_FEATURES=['layer_0', 'layer_1', 'layer_2', 'layer_3'],
26 |             NUM_CLASSES=19,
27 |             IGNORE_VALUE=255,
28 |             NORM="nnSyncBN",
29 |             LOSS_WEIGHT=1.0,
30 |         ),
31 |         BUDGET=dict(
32 |             CONSTRAIN=False,
33 |             LOSS_WEIGHT=0.0,
34 |             LOSS_MU=0.0,
35 |             FLOPS_ALL=26300.0,
36 |             UNUPDATE_RATE=0.4,
37 |             WARM_UP=True,
38 |         ),
39 |     ),
40 |     DATASETS=dict(
41 |         TRAIN=("cityscapes_fine_sem_seg_train", ),
42 |         TEST=("cityscapes_fine_sem_seg_val", ),
43 |     ),
44 |     SOLVER=dict(
45 |         LR_SCHEDULER=dict(
46 |             NAME="PolyLR",
47 |             POLY_POWER=0.9,
48 |             MAX_ITER=560000,
49 |         ),
50 |         OPTIMIZER=dict(BASE_LR=0.05, ),
51 |         IMS_PER_BATCH=8,
52 |         CHECKPOINT_PERIOD=10000,
53 |         GRAD_CLIP=5.0,
54 |     ),
55 |     INPUT=dict(
56 |         MIN_SIZE_TRAIN=(512, 768, 1024, 1280, 1536, 2048, ),
57 |         MIN_SIZE_TRAIN_SAMPLING="choice",
58 |         MAX_SIZE_TRAIN=4096,
59 |         MIN_SIZE_TEST=1024,
60 |         MAX_SIZE_TEST=2048,
61 |         # FIX_SIZE_FOR_FLOPS=[768, 768],
62 |         FIX_SIZE_FOR_FLOPS=[1024, 2048],
63 |         CROP_PAD=dict(SIZE=[768, 768], ),
64 |     ),
65 |     TEST=dict(
66 |         AUG=dict(
67 |             ENABLED=False,
68 |             MIN_SIZES=(512, 768, 1024, 1280, 1536, 2048, ),
69 |             MAX_SIZE=4096,
70 |             FLIP=True,
71 |         ),
72 |         PRECISE_BN=dict(ENABLED=True),
73 |     ),
74 |     OUTPUT_DIR=osp.join(
75 |         '/data/Outputs/model_logs/dl_lib_playground',
76 |         osp.split(osp.realpath(__file__))[0].split("playground/")[-1]),
77 | )
78 | 
79 | 
80 | class DynamicSemanticSegmentationConfig(SemanticSegmentationConfig):
81 |     def __init__(self):
82 |         super(DynamicSemanticSegmentationConfig, self).__init__()
83 |         self._register_configuration(_config_dict)
84 | 
85 | 
86 | config = DynamicSemanticSegmentationConfig()
87 | 


--------------------------------------------------------------------------------
/playground/Dynamic/Seg.Layer16.Iter560k.SDP0_3/net.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | from dl_lib.layers import ShapeSpec
 4 | from dl_lib.modeling.backbone import Backbone
 5 | from dl_lib.modeling.dynamic_arch.dynamic_backbone import build_dynamic_backbone
 6 | from dl_lib.modeling.meta_arch.dynamic4seg import (DynamicNet4Seg,
 7 |                                                    SemSegDecoderHead)
 8 | 
 9 | 
10 | def build_backbone(cfg, input_shape=None):
11 |     """
12 |     Build a backbone from `cfg.MODEL.BACKBONE.NAME`.
13 | 
14 |     Returns:
15 |         an instance of :class:`Backbone`
16 |     """
17 |     if input_shape is None:
18 |         input_shape = ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN),
19 |                                 height=cfg.INPUT.FIX_SIZE_FOR_FLOPS[0],
20 |                                 width=cfg.INPUT.FIX_SIZE_FOR_FLOPS[1])
21 | 
22 |     backbone = build_dynamic_backbone(cfg, input_shape)
23 |     assert isinstance(backbone, Backbone)
24 |     return backbone
25 | 
26 | 
27 | def build_sem_seg_head(cfg, input_shape=None):
28 |     return SemSegDecoderHead(cfg, input_shape)
29 | 
30 | 
31 | def build_model(cfg):
32 |     cfg.build_backbone = build_backbone
33 |     cfg.build_sem_seg_head = build_sem_seg_head
34 |     model = DynamicNet4Seg(cfg)
35 |     logger = logging.getLogger(__name__)
36 |     logger.info("Model:\n{}".format(model))
37 |     return model
38 | 


--------------------------------------------------------------------------------
/playground/Dynamic/Seg.Layer16.SmallGate.Dynamic_A/config.py:
--------------------------------------------------------------------------------
 1 | import os.path as osp
 2 | 
 3 | from dl_lib.configs.segm_config import SemanticSegmentationConfig
 4 | 
 5 | _config_dict = dict(
 6 |     MODEL=dict(
 7 |         WEIGHTS="",
 8 |         CAL_FLOPS=True,
 9 |         BACKBONE=dict(
10 |             CELL_TYPE=['sep_conv_3x3', 'skip_connect'],
11 |             LAYER_NUM=16,
12 |             CELL_NUM_LIST=[2, 3, 4] + [4 for _ in range(13)],
13 |             INIT_CHANNEL=64,
14 |             MAX_STRIDE=32,
15 |             SEPT_STEM=True,
16 |             NORM="nnSyncBN",
17 |             DROP_PROB=0.0,
18 |         ),
19 |         GATE=dict(
20 |             GATE_ON=True,
21 |             GATE_INIT_BIAS=1.5,
22 |             SMALL_GATE=True,
23 |         ),
24 |         SEM_SEG_HEAD=dict(
25 |             IN_FEATURES=['layer_0', 'layer_1', 'layer_2', 'layer_3'],
26 |             NUM_CLASSES=19,
27 |             IGNORE_VALUE=255,
28 |             NORM="nnSyncBN",
29 |             LOSS_WEIGHT=1.0,
30 |         ),
31 |         BUDGET=dict(
32 |             CONSTRAIN=True,
33 |             LOSS_WEIGHT=0.8,
34 |             LOSS_MU=0.1,
35 |             FLOPS_ALL=26300.0,
36 |             UNUPDATE_RATE=0.4,
37 |             WARM_UP=True,
38 |         ),
39 |     ),
40 |     DATASETS=dict(
41 |         TRAIN=("cityscapes_fine_sem_seg_train", ),
42 |         TEST=("cityscapes_fine_sem_seg_val", ),
43 |     ),
44 |     SOLVER=dict(
45 |         LR_SCHEDULER=dict(
46 |             NAME="PolyLR",
47 |             POLY_POWER=0.9,
48 |             MAX_ITER=190000,
49 |         ),
50 |         OPTIMIZER=dict(BASE_LR=0.05, ),
51 |         IMS_PER_BATCH=8,
52 |         CHECKPOINT_PERIOD=5000,
53 |         GRAD_CLIP=5.0,
54 |     ),
55 |     INPUT=dict(
56 |         MIN_SIZE_TRAIN=(512, 768, 1024, 1280, 1536, 2048, ),
57 |         MIN_SIZE_TRAIN_SAMPLING="choice",
58 |         MAX_SIZE_TRAIN=4096,
59 |         MIN_SIZE_TEST=1024,
60 |         MAX_SIZE_TEST=2048,
61 |         # FIX_SIZE_FOR_FLOPS=[768, 768],
62 |         FIX_SIZE_FOR_FLOPS=[1024, 2048],
63 |         CROP_PAD=dict(SIZE=[768, 768], ),
64 |     ),
65 |     OUTPUT_DIR=osp.join(
66 |         '/data/Outputs/model_logs/dl_lib_playground',
67 |         osp.split(osp.realpath(__file__))[0].split("playground/")[-1]),
68 | )
69 | 
70 | 
71 | class DynamicSemanticSegmentationConfig(SemanticSegmentationConfig):
72 |     def __init__(self):
73 |         super(DynamicSemanticSegmentationConfig, self).__init__()
74 |         self._register_configuration(_config_dict)
75 | 
76 | 
77 | config = DynamicSemanticSegmentationConfig()
78 | 


--------------------------------------------------------------------------------
/playground/Dynamic/Seg.Layer16.SmallGate.Dynamic_A/net.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | from dl_lib.layers import ShapeSpec
 4 | from dl_lib.modeling.backbone import Backbone
 5 | from dl_lib.modeling.dynamic_arch.dynamic_backbone import build_dynamic_backbone
 6 | from dl_lib.modeling.meta_arch.dynamic4seg import (DynamicNet4Seg,
 7 |                                                    SemSegDecoderHead)
 8 | 
 9 | 
10 | def build_backbone(cfg, input_shape=None):
11 |     """
12 |     Build a backbone from `cfg.MODEL.BACKBONE.NAME`.
13 | 
14 |     Returns:
15 |         an instance of :class:`Backbone`
16 |     """
17 |     if input_shape is None:
18 |         input_shape = ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN),
19 |                                 height=cfg.INPUT.FIX_SIZE_FOR_FLOPS[0],
20 |                                 width=cfg.INPUT.FIX_SIZE_FOR_FLOPS[1])
21 | 
22 |     backbone = build_dynamic_backbone(cfg, input_shape)
23 |     assert isinstance(backbone, Backbone)
24 |     return backbone
25 | 
26 | 
27 | def build_sem_seg_head(cfg, input_shape=None):
28 |     return SemSegDecoderHead(cfg, input_shape)
29 | 
30 | 
31 | def build_model(cfg):
32 |     cfg.build_backbone = build_backbone
33 |     cfg.build_sem_seg_head = build_sem_seg_head
34 |     model = DynamicNet4Seg(cfg)
35 |     logger = logging.getLogger(__name__)
36 |     logger.info("Model:\n{}".format(model))
37 |     return model
38 | 


--------------------------------------------------------------------------------
/playground/Dynamic/Seg.Layer16.SmallGate.Dynamic_B/config.py:
--------------------------------------------------------------------------------
 1 | import os.path as osp
 2 | 
 3 | from dl_lib.configs.segm_config import SemanticSegmentationConfig
 4 | 
 5 | _config_dict = dict(
 6 |     MODEL=dict(
 7 |         WEIGHTS="",
 8 |         CAL_FLOPS=True,
 9 |         BACKBONE=dict(
10 |             CELL_TYPE=['sep_conv_3x3', 'skip_connect'],
11 |             LAYER_NUM=16,
12 |             CELL_NUM_LIST=[2, 3, 4] + [4 for _ in range(13)],
13 |             INIT_CHANNEL=64,
14 |             MAX_STRIDE=32,
15 |             SEPT_STEM=True,
16 |             NORM="nnSyncBN",
17 |             DROP_PROB=0.0,
18 |         ),
19 |         GATE=dict(
20 |             GATE_ON=True,
21 |             GATE_INIT_BIAS=1.5,
22 |             SMALL_GATE=True,
23 |         ),
24 |         SEM_SEG_HEAD=dict(
25 |             IN_FEATURES=['layer_0', 'layer_1', 'layer_2', 'layer_3'],
26 |             NUM_CLASSES=19,
27 |             IGNORE_VALUE=255,
28 |             NORM="nnSyncBN",
29 |             LOSS_WEIGHT=1.0,
30 |         ),
31 |         BUDGET=dict(
32 |             CONSTRAIN=True,
33 |             LOSS_WEIGHT=0.5,
34 |             LOSS_MU=0.1,
35 |             FLOPS_ALL=26300.0,
36 |             UNUPDATE_RATE=0.4,
37 |             WARM_UP=True,
38 |         ),
39 |     ),
40 |     DATASETS=dict(
41 |         TRAIN=("cityscapes_fine_sem_seg_train", ),
42 |         TEST=("cityscapes_fine_sem_seg_val", ),
43 |     ),
44 |     SOLVER=dict(
45 |         LR_SCHEDULER=dict(
46 |             NAME="PolyLR",
47 |             POLY_POWER=0.9,
48 |             MAX_ITER=190000,
49 |         ),
50 |         OPTIMIZER=dict(BASE_LR=0.05, ),
51 |         IMS_PER_BATCH=8,
52 |         CHECKPOINT_PERIOD=5000,
53 |         GRAD_CLIP=5.0,
54 |     ),
55 |     INPUT=dict(
56 |         MIN_SIZE_TRAIN=(512, 768, 1024, 1280, 1536, 2048, ),
57 |         MIN_SIZE_TRAIN_SAMPLING="choice",
58 |         MAX_SIZE_TRAIN=4096,
59 |         MIN_SIZE_TEST=1024,
60 |         MAX_SIZE_TEST=2048,
61 |         # FIX_SIZE_FOR_FLOPS=[768,768],
62 |         FIX_SIZE_FOR_FLOPS=[1024, 2048],
63 |         CROP_PAD=dict(SIZE=[768, 768], ),
64 |     ),
65 |     OUTPUT_DIR=osp.join(
66 |         '/data/Outputs/model_logs/dl_lib_playground',
67 |         osp.split(osp.realpath(__file__))[0].split("playground/")[-1]),
68 | )
69 | 
70 | 
71 | class DynamicSemanticSegmentationConfig(SemanticSegmentationConfig):
72 |     def __init__(self):
73 |         super(DynamicSemanticSegmentationConfig, self).__init__()
74 |         self._register_configuration(_config_dict)
75 | 
76 | 
77 | config = DynamicSemanticSegmentationConfig()
78 | 


--------------------------------------------------------------------------------
/playground/Dynamic/Seg.Layer16.SmallGate.Dynamic_B/net.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | from dl_lib.layers import ShapeSpec
 4 | from dl_lib.modeling.backbone import Backbone
 5 | from dl_lib.modeling.dynamic_arch.dynamic_backbone import build_dynamic_backbone
 6 | from dl_lib.modeling.meta_arch.dynamic4seg import (DynamicNet4Seg,
 7 |                                                    SemSegDecoderHead)
 8 | 
 9 | 
10 | def build_backbone(cfg, input_shape=None):
11 |     """
12 |     Build a backbone from `cfg.MODEL.BACKBONE.NAME`.
13 | 
14 |     Returns:
15 |         an instance of :class:`Backbone`
16 |     """
17 |     if input_shape is None:
18 |         input_shape = ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN),
19 |                                 height=cfg.INPUT.FIX_SIZE_FOR_FLOPS[0],
20 |                                 width=cfg.INPUT.FIX_SIZE_FOR_FLOPS[1])
21 | 
22 |     backbone = build_dynamic_backbone(cfg, input_shape)
23 |     assert isinstance(backbone, Backbone)
24 |     return backbone
25 | 
26 | 
27 | def build_sem_seg_head(cfg, input_shape=None):
28 |     return SemSegDecoderHead(cfg, input_shape)
29 | 
30 | 
31 | def build_model(cfg):
32 |     cfg.build_backbone = build_backbone
33 |     cfg.build_sem_seg_head = build_sem_seg_head
34 |     model = DynamicNet4Seg(cfg)
35 |     logger = logging.getLogger(__name__)
36 |     logger.info("Model:\n{}".format(model))
37 |     return model
38 | 


--------------------------------------------------------------------------------
/playground/Dynamic/Seg.Layer16.SmallGate.Dynamic_C/config.py:
--------------------------------------------------------------------------------
 1 | import os.path as osp
 2 | 
 3 | from dl_lib.configs.segm_config import SemanticSegmentationConfig
 4 | 
 5 | _config_dict = dict(
 6 |     MODEL=dict(
 7 |         # WEIGHTS="",
 8 |         CAL_FLOPS=True,
 9 |         BACKBONE=dict(
10 |             CELL_TYPE=['sep_conv_3x3', 'skip_connect'],
11 |             LAYER_NUM=16,
12 |             CELL_NUM_LIST=[2, 3, 4] + [4 for _ in range(13)],
13 |             INIT_CHANNEL=64,
14 |             MAX_STRIDE=32,
15 |             SEPT_STEM=True,
16 |             NORM="nnSyncBN",
17 |             DROP_PROB=0.0,
18 |         ),
19 |         GATE=dict(
20 |             GATE_ON=True,
21 |             GATE_INIT_BIAS=1.5,
22 |             SMALL_GATE=True,
23 |         ),
24 |         SEM_SEG_HEAD=dict(
25 |             IN_FEATURES=['layer_0', 'layer_1', 'layer_2', 'layer_3'],
26 |             NUM_CLASSES=19,
27 |             IGNORE_VALUE=255,
28 |             NORM="nnSyncBN",
29 |             LOSS_WEIGHT=1.0,
30 |         ),
31 |         BUDGET=dict(
32 |             CONSTRAIN=True,
33 |             LOSS_WEIGHT=0.5,
34 |             LOSS_MU=0.2,
35 |             FLOPS_ALL=26300.0,
36 |             UNUPDATE_RATE=0.4,
37 |             WARM_UP=True,
38 |         ),
39 |     ),
40 |     DATASETS=dict(
41 |         TRAIN=("cityscapes_fine_sem_seg_train", ),
42 |         TEST=("cityscapes_fine_sem_seg_val", ),
43 |     ),
44 |     SOLVER=dict(
45 |         LR_SCHEDULER=dict(
46 |             NAME="PolyLR",
47 |             POLY_POWER=0.9,
48 |             MAX_ITER=190000,
49 |         ),
50 |         OPTIMIZER=dict(BASE_LR=0.05, ),
51 |         IMS_PER_BATCH=8,
52 |         CHECKPOINT_PERIOD=5000,
53 |         GRAD_CLIP=5.0,
54 |     ),
55 |     INPUT=dict(
56 |         MIN_SIZE_TRAIN=(512, 768, 1024, 1280, 1536, 2048, ),
57 |         MIN_SIZE_TRAIN_SAMPLING="choice",
58 |         MAX_SIZE_TRAIN=4096,
59 |         MIN_SIZE_TEST=1024,
60 |         MAX_SIZE_TEST=2048,
61 |         # FIX_SIZE_FOR_FLOPS=[768, 768],
62 |         FIX_SIZE_FOR_FLOPS=[1024, 2048],
63 |         CROP_PAD=dict(SIZE=[768, 768], ),
64 |     ),
65 |     OUTPUT_DIR=osp.join(
66 |         '/data/Outputs/model_logs/dl_lib_playground',
67 |         osp.split(osp.realpath(__file__))[0].split("playground/")[-1]),
68 | )
69 | 
70 | 
71 | class DynamicSemanticSegmentationConfig(SemanticSegmentationConfig):
72 |     def __init__(self):
73 |         super(DynamicSemanticSegmentationConfig, self).__init__()
74 |         self._register_configuration(_config_dict)
75 | 
76 | 
77 | config = DynamicSemanticSegmentationConfig()
78 | 


--------------------------------------------------------------------------------
/playground/Dynamic/Seg.Layer16.SmallGate.Dynamic_C/net.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | from dl_lib.layers import ShapeSpec
 4 | from dl_lib.modeling.backbone import Backbone
 5 | from dl_lib.modeling.dynamic_arch.dynamic_backbone import build_dynamic_backbone
 6 | from dl_lib.modeling.meta_arch.dynamic4seg import (DynamicNet4Seg,
 7 |                                                    SemSegDecoderHead)
 8 | 
 9 | 
10 | def build_backbone(cfg, input_shape=None):
11 |     """
12 |     Build a backbone from `cfg.MODEL.BACKBONE.NAME`.
13 | 
14 |     Returns:
15 |         an instance of :class:`Backbone`
16 |     """
17 |     if input_shape is None:
18 |         input_shape = ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN),
19 |                                 height=cfg.INPUT.FIX_SIZE_FOR_FLOPS[0],
20 |                                 width=cfg.INPUT.FIX_SIZE_FOR_FLOPS[1])
21 | 
22 |     backbone = build_dynamic_backbone(cfg, input_shape)
23 |     assert isinstance(backbone, Backbone)
24 |     return backbone
25 | 
26 | 
27 | def build_sem_seg_head(cfg, input_shape=None):
28 |     return SemSegDecoderHead(cfg, input_shape)
29 | 
30 | 
31 | def build_model(cfg):
32 |     cfg.build_backbone = build_backbone
33 |     cfg.build_sem_seg_head = build_sem_seg_head
34 |     model = DynamicNet4Seg(cfg)
35 |     logger = logging.getLogger(__name__)
36 |     logger.info("Model:\n{}".format(model))
37 |     return model
38 | 


--------------------------------------------------------------------------------
/playground/Dynamic/Seg.Layer16/config.py:
--------------------------------------------------------------------------------
 1 | import os.path as osp
 2 | 
 3 | from dl_lib.configs.segm_config import SemanticSegmentationConfig
 4 | 
 5 | _config_dict = dict(
 6 |     MODEL=dict(
 7 |         WEIGHTS="",
 8 |         CAL_FLOPS=True,
 9 |         BACKBONE=dict(
10 |             CELL_TYPE=['sep_conv_3x3', 'skip_connect'],
11 |             LAYER_NUM=16,
12 |             CELL_NUM_LIST=[2, 3, 4] + [4 for _ in range(13)],
13 |             INIT_CHANNEL=64,
14 |             MAX_STRIDE=32,
15 |             SEPT_STEM=True,
16 |             NORM="nnSyncBN",
17 |             DROP_PROB=0.0,
18 |         ),
19 |         GATE=dict(
20 |             GATE_ON=True,
21 |             GATE_INIT_BIAS=1.5,
22 |             SMALL_GATE=False,
23 |         ),
24 |         SEM_SEG_HEAD=dict(
25 |             IN_FEATURES=['layer_0', 'layer_1', 'layer_2', 'layer_3'],
26 |             NUM_CLASSES=19,
27 |             IGNORE_VALUE=255,
28 |             NORM="nnSyncBN",
29 |             LOSS_WEIGHT=1.0,
30 |         ),
31 |         BUDGET=dict(
32 |             CONSTRAIN=False,
33 |             LOSS_WEIGHT=0.0,
34 |             LOSS_MU=0.0,
35 |             FLOPS_ALL=26300.0,
36 |             UNUPDATE_RATE=0.4,
37 |             WARM_UP=True,
38 |         ),
39 |     ),
40 |     DATASETS=dict(
41 |         TRAIN=("cityscapes_fine_sem_seg_train", ),
42 |         TEST=("cityscapes_fine_sem_seg_val", ),
43 |     ),
44 |     SOLVER=dict(
45 |         LR_SCHEDULER=dict(
46 |             NAME="PolyLR",
47 |             POLY_POWER=0.9,
48 |             MAX_ITER=190000,
49 |         ),
50 |         OPTIMIZER=dict(BASE_LR=0.05, ),
51 |         IMS_PER_BATCH=8,
52 |         CHECKPOINT_PERIOD=5000,
53 |         GRAD_CLIP=5.0,
54 |     ),
55 |     INPUT=dict(
56 |         MIN_SIZE_TRAIN=(512, 768, 1024, 1280, 1536, 2048, ),
57 |         MIN_SIZE_TRAIN_SAMPLING="choice",
58 |         MAX_SIZE_TRAIN=4096,
59 |         MIN_SIZE_TEST=1024,
60 |         MAX_SIZE_TEST=2048,
61 |         # FIX_SIZE_FOR_FLOPS=[768, 768],
62 |         FIX_SIZE_FOR_FLOPS=[1024, 2048],
63 |         CROP_PAD=dict(SIZE=[768, 768], ),
64 |     ),
65 |     TEST=dict(
66 |         AUG=dict(
67 |             ENABLED=False,
68 |             MIN_SIZES=(512, 768, 1024, 1280, 1536, 2048, ),
69 |             MAX_SIZE=4096,
70 |             FLIP=True,
71 |         ),
72 |         PRECISE_BN=dict(ENABLED=True),
73 |     ),
74 |     OUTPUT_DIR=osp.join(
75 |         '/data/Outputs/model_logs/dl_lib_playground',
76 |         osp.split(osp.realpath(__file__))[0].split("playground/")[-1]),
77 | )
78 | 
79 | 
80 | class DynamicSemanticSegmentationConfig(SemanticSegmentationConfig):
81 |     def __init__(self):
82 |         super(DynamicSemanticSegmentationConfig, self).__init__()
83 |         self._register_configuration(_config_dict)
84 | 
85 | 
86 | config = DynamicSemanticSegmentationConfig()
87 | 


--------------------------------------------------------------------------------
/playground/Dynamic/Seg.Layer16/net.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | from dl_lib.layers import ShapeSpec
 4 | from dl_lib.modeling.backbone import Backbone
 5 | from dl_lib.modeling.dynamic_arch.dynamic_backbone import build_dynamic_backbone
 6 | from dl_lib.modeling.meta_arch.dynamic4seg import (DynamicNet4Seg,
 7 |                                                    SemSegDecoderHead)
 8 | 
 9 | 
10 | def build_backbone(cfg, input_shape=None):
11 |     """
12 |     Build a backbone from `cfg.MODEL.BACKBONE.NAME`.
13 | 
14 |     Returns:
15 |         an instance of :class:`Backbone`
16 |     """
17 |     if input_shape is None:
18 |         input_shape = ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN),
19 |                                 height=cfg.INPUT.FIX_SIZE_FOR_FLOPS[0],
20 |                                 width=cfg.INPUT.FIX_SIZE_FOR_FLOPS[1])
21 | 
22 |     backbone = build_dynamic_backbone(cfg, input_shape)
23 |     assert isinstance(backbone, Backbone)
24 |     return backbone
25 | 
26 | 
27 | def build_sem_seg_head(cfg, input_shape=None):
28 |     return SemSegDecoderHead(cfg, input_shape)
29 | 
30 | 
31 | def build_model(cfg):
32 |     cfg.build_backbone = build_backbone
33 |     cfg.build_sem_seg_head = build_sem_seg_head
34 |     model = DynamicNet4Seg(cfg)
35 |     logger = logging.getLogger(__name__)
36 |     logger.info("Model:\n{}".format(model))
37 |     return model
38 | 


--------------------------------------------------------------------------------
/playground/Dynamic/Seg.Layer33.ImageNet/config.py:
--------------------------------------------------------------------------------
 1 | import os.path as osp
 2 | 
 3 | from dl_lib.configs.segm_config import SemanticSegmentationConfig
 4 | 
 5 | _config_dict = dict(
 6 |     MODEL=dict(
 7 |         WEIGHTS="/data/dl_lib_modelzoo/dynamicmodel/Dynamic-L33B4-A74-convert-seg.pth",
 8 |         CAL_FLOPS=True,
 9 |         BACKBONE=dict(
10 |             CELL_TYPE=['sep_conv_3x3', 'skip_connect'],
11 |             LAYER_NUM=33,
12 |             CELL_NUM_LIST=[2, 3, 4] + [4 for _ in range(30)],
13 |             INIT_CHANNEL=64,
14 |             MAX_STRIDE=32,
15 |             SEPT_STEM=True,
16 |             NORM="nnSyncBN",
17 |             DROP_PROB=0.0,
18 |         ),
19 |         GATE=dict(
20 |             GATE_ON=True,
21 |             GATE_INIT_BIAS=1.5,
22 |             SMALL_GATE=False,
23 |         ),
24 |         SEM_SEG_HEAD=dict(
25 |             IN_FEATURES=['layer_0', 'layer_1', 'layer_2', 'layer_3'],
26 |             NUM_CLASSES=19,
27 |             IGNORE_VALUE=255,
28 |             NORM="nnSyncBN",
29 |             LOSS_WEIGHT=1.0,
30 |         ),
31 |         BUDGET=dict(
32 |             CONSTRAIN=False,
33 |             LOSS_WEIGHT=0.0,
34 |             LOSS_MU=0.0,
35 |             FLOPS_ALL=26300.0,
36 |             UNUPDATE_RATE=0.4,
37 |             WARM_UP=True,
38 |         ),
39 |     ),
40 |     DATASETS=dict(
41 |         TRAIN=("cityscapes_fine_sem_seg_train", ),
42 |         TEST=("cityscapes_fine_sem_seg_val", ),
43 |     ),
44 |     SOLVER=dict(
45 |         LR_SCHEDULER=dict(
46 |             NAME="PolyLR",
47 |             POLY_POWER=0.9,
48 |             MAX_ITER=190000,
49 |         ),
50 |         OPTIMIZER=dict(
51 |             BASE_LR=0.02,
52 |             GATE_LR_MULTI=2.5,
53 |         ),
54 |         IMS_PER_BATCH=8,
55 |         CHECKPOINT_PERIOD=5000,
56 |         GRAD_CLIP=5.0,
57 |     ),
58 |     INPUT=dict(
59 |         MIN_SIZE_TRAIN=(512, 768, 1024, 1280, 1536, 2048, ),
60 |         MIN_SIZE_TRAIN_SAMPLING="choice",
61 |         MAX_SIZE_TRAIN=4096,
62 |         MIN_SIZE_TEST=1024,
63 |         MAX_SIZE_TEST=2048,
64 |         # FIX_SIZE_FOR_FLOPS=[768,768],
65 |         FIX_SIZE_FOR_FLOPS=[1024, 2048],
66 |         CROP_PAD=dict(SIZE=[768, 768], ),
67 |     ),
68 |     TEST=dict(
69 |         AUG=dict(
70 |             ENABLED=False,
71 |             MIN_SIZES=(512, 768, 1024, 1280, 1536, 2048, ),
72 |             MAX_SIZE=4096,
73 |             FLIP=True,
74 |         ),
75 |         PRECISE_BN=dict(ENABLED=True),
76 |     ),
77 |     OUTPUT_DIR=osp.join(
78 |         '/data/Outputs/model_logs/dl_lib_playground',
79 |         osp.split(osp.realpath(__file__))[0].split("playground/")[-1]),
80 | )
81 | 
82 | 
83 | class DynamicSemanticSegmentationConfig(SemanticSegmentationConfig):
84 |     def __init__(self):
85 |         super(DynamicSemanticSegmentationConfig, self).__init__()
86 |         self._register_configuration(_config_dict)
87 | 
88 | 
89 | config = DynamicSemanticSegmentationConfig()
90 | 


--------------------------------------------------------------------------------
/playground/Dynamic/Seg.Layer33.ImageNet/net.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | from dl_lib.layers import ShapeSpec
 4 | from dl_lib.modeling.backbone import Backbone
 5 | from dl_lib.modeling.dynamic_arch.dynamic_backbone import build_dynamic_backbone
 6 | from dl_lib.modeling.meta_arch.dynamic4seg import (DynamicNet4Seg,
 7 |                                                    SemSegDecoderHead)
 8 | 
 9 | 
10 | def build_backbone(cfg, input_shape=None):
11 |     """
12 |     Build a backbone from `cfg.MODEL.BACKBONE.NAME`.
13 | 
14 |     Returns:
15 |         an instance of :class:`Backbone`
16 |     """
17 |     if input_shape is None:
18 |         input_shape = ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN),
19 |                                 height=cfg.INPUT.FIX_SIZE_FOR_FLOPS[0],
20 |                                 width=cfg.INPUT.FIX_SIZE_FOR_FLOPS[1])
21 | 
22 |     backbone = build_dynamic_backbone(cfg, input_shape)
23 |     assert isinstance(backbone, Backbone)
24 |     return backbone
25 | 
26 | 
27 | def build_sem_seg_head(cfg, input_shape=None):
28 |     return SemSegDecoderHead(cfg, input_shape)
29 | 
30 | 
31 | def build_model(cfg):
32 |     cfg.build_backbone = build_backbone
33 |     cfg.build_sem_seg_head = build_sem_seg_head
34 |     model = DynamicNet4Seg(cfg)
35 |     logger = logging.getLogger(__name__)
36 |     logger.info("Model:\n{}".format(model))
37 |     return model
38 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
  3 | 
  4 | import glob
  5 | import os
  6 | 
  7 | import torch
  8 | from setuptools import find_packages, setup
  9 | from torch.utils.cpp_extension import CUDA_HOME, CppExtension, CUDAExtension
 10 | 
 11 | torch_ver = [int(x) for x in torch.__version__.split(".")[:2]]
 12 | assert torch_ver >= [1, 3], "Requires PyTorch >= 1.3"
 13 | 
 14 | 
 15 | def get_extensions():
 16 |     this_dir = os.path.dirname(os.path.abspath(__file__))
 17 |     extensions_dir = os.path.join(this_dir, "dl_lib", "layers")
 18 | 
 19 |     main_source = os.path.join(extensions_dir, "vision.cpp")
 20 |     sources = glob.glob(os.path.join(extensions_dir, "**", "*.cpp"))
 21 |     source_cuda = glob.glob(os.path.join(
 22 |         extensions_dir, "**", "*.cu")) + glob.glob(
 23 |             os.path.join(extensions_dir, "*.cu"))
 24 | 
 25 |     sources = [main_source] + sources
 26 |     extension = CppExtension
 27 | 
 28 |     extra_compile_args = {"cxx": []}
 29 |     define_macros = []
 30 | 
 31 |     if (torch.cuda.is_available() and CUDA_HOME is not None) or os.getenv(
 32 |             "FORCE_CUDA", "0") == "1":
 33 |         extension = CUDAExtension
 34 |         sources += source_cuda
 35 |         define_macros += [("WITH_CUDA", None)]
 36 |         extra_compile_args["nvcc"] = [
 37 |             "-DCUDA_HAS_FP16=1",
 38 |             "-D__CUDA_NO_HALF_OPERATORS__",
 39 |             "-D__CUDA_NO_HALF_CONVERSIONS__",
 40 |             "-D__CUDA_NO_HALF2_OPERATORS__",
 41 |         ]
 42 | 
 43 |         # It's better if pytorch can do this by default ..
 44 |         CC = os.environ.get("CC", None)
 45 |         if CC is not None:
 46 |             extra_compile_args["nvcc"].append("-ccbin={}".format(CC))
 47 | 
 48 |     include_dirs = [extensions_dir]
 49 | 
 50 |     ext_modules = [
 51 |         extension(
 52 |             "dl_lib._C",
 53 |             sources,
 54 |             include_dirs=include_dirs,
 55 |             define_macros=define_macros,
 56 |             extra_compile_args=extra_compile_args,
 57 |         )
 58 |     ]
 59 | 
 60 |     return ext_modules
 61 | 
 62 | 
 63 | cur_dir = os.getcwd()
 64 | with open("tools/dl_train", "w") as dl_lib_train:
 65 |     head = f"#!/bin/bash\n\nexport OMP_NUM_THREADS=1\n"
 66 |     dl_lib_train.write(
 67 |         head + f"python3 {os.path.join(cur_dir, 'tools', 'train_net.py')} $@")
 68 | with open("tools/dl_test", "w") as dl_lib_test:
 69 |     dl_lib_test.write(
 70 |         head + f"python3 {os.path.join(cur_dir, 'tools', 'test_net.py')} $@")
 71 | 
 72 | setup(
 73 |     name="dl_lib",
 74 |     version="0.1",
 75 |     author="Yanwei Li",
 76 |     url="https://github.com/yanwei-li/Dynamic-Routing",
 77 |     description="Deep Learning lib (dl_lib) is a "
 78 |     "platform for object detection based on Detectron2.",
 79 |     packages=find_packages(exclude=("configs", "tests")),
 80 |     python_requires=">=3.6",
 81 |     install_requires=[
 82 |         "termcolor>=1.1",
 83 |         "Pillow>=6.0",
 84 |         "tabulate",
 85 |         "cloudpickle",
 86 |         "matplotlib",
 87 |         "tqdm>4.29.0",
 88 |         "Shapely",
 89 |         "tensorboard",
 90 |         "portalocker",
 91 |         "pycocotools",
 92 |         "easydict",
 93 |         "imagesize",
 94 |     ],
 95 |     extras_require={"all": ["shapely", "psutil"]},
 96 |     ext_modules=get_extensions(),
 97 |     cmdclass={"build_ext": torch.utils.cpp_extension.BuildExtension},
 98 |     scripts=["tools/dl_train", "tools/dl_test"],
 99 | )
100 | 


--------------------------------------------------------------------------------
/tools/benchmark.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
  2 | """
  3 | A script to benchmark builtin models.
  4 | 
  5 | Note: this script has an extra dependency of psutil.
  6 | """
  7 | 
  8 | import itertools
  9 | import logging
 10 | 
 11 | import torch
 12 | import tqdm
 13 | from torch.nn.parallel import DistributedDataParallel
 14 | 
 15 | import psutil
 16 | from dl_lib.checkpoint import DetectionCheckpointer
 17 | from dl_lib.config import get_cfg
 18 | from dl_lib.data import (DatasetFromList, build_detection_test_loader,
 19 |                          build_detection_train_loader)
 20 | from dl_lib.engine import SimpleTrainer, default_argument_parser, hooks, launch
 21 | from dl_lib.modeling import build_model
 22 | from dl_lib.solver import build_optimizer
 23 | from dl_lib.utils import comm
 24 | from dl_lib.utils.events import CommonMetricPrinter
 25 | from dl_lib.utils.logger import setup_logger
 26 | from dl_lib.utils.timer import Timer
 27 | 
 28 | logger = logging.getLogger("dl_lib")
 29 | 
 30 | 
 31 | def setup(args):
 32 |     cfg = get_cfg()
 33 |     cfg.merge_from_file(args.config_file)
 34 |     cfg.SOLVER.BASE_LR = 0.001  # Avoid NaNs. Not useful in this script anyway.
 35 |     cfg.merge_from_list(args.opts)
 36 |     cfg.freeze()
 37 |     setup_logger(distributed_rank=comm.get_rank())
 38 |     return cfg
 39 | 
 40 | 
 41 | def benchmark_data(args):
 42 |     cfg = setup(args)
 43 | 
 44 |     dataloader = build_detection_train_loader(cfg)
 45 | 
 46 |     timer = Timer()
 47 |     itr = iter(dataloader)
 48 |     for i in range(10):  # warmup
 49 |         next(itr)
 50 |         if i == 0:
 51 |             startup_time = timer.seconds()
 52 |     timer = Timer()
 53 |     max_iter = 1000
 54 |     for _ in tqdm.trange(max_iter):
 55 |         next(itr)
 56 |     logger.info("{} iters ({} images) in {} seconds.".format(
 57 |         max_iter, max_iter * cfg.SOLVER.IMS_PER_BATCH, timer.seconds()))
 58 |     logger.info("Startup time: {} seconds".format(startup_time))
 59 |     vram = psutil.virtual_memory()
 60 |     logger.info("RAM Usage: {:.2f}/{:.2f} GB".format(
 61 |         (vram.total - vram.available) / 1024**3, vram.total / 1024**3))
 62 | 
 63 | 
 64 | def benchmark_train(args):
 65 |     cfg = setup(args)
 66 |     model = build_model(cfg)
 67 |     logger.info("Model:\n{}".format(model))
 68 |     if comm.get_world_size() > 1:
 69 |         model = DistributedDataParallel(model,
 70 |                                         device_ids=[comm.get_local_rank()],
 71 |                                         broadcast_buffers=False)
 72 |     optimizer = build_optimizer(cfg, model)
 73 |     checkpointer = DetectionCheckpointer(model, optimizer=optimizer)
 74 |     checkpointer.load(cfg.MODEL.WEIGHTS)
 75 | 
 76 |     cfg.defrost()
 77 |     cfg.DATALOADER.NUM_WORKERS = 0
 78 |     data_loader = build_detection_train_loader(cfg)
 79 |     dummy_data = list(itertools.islice(data_loader, 100))
 80 | 
 81 |     def f():
 82 |         while True:
 83 |             yield from DatasetFromList(dummy_data, copy=False)
 84 | 
 85 |     max_iter = 400
 86 |     trainer = SimpleTrainer(model, f(), optimizer)
 87 |     trainer.register_hooks([
 88 |         hooks.IterationTimer(),
 89 |         hooks.PeriodicWriter([CommonMetricPrinter(max_iter)])
 90 |     ])
 91 |     trainer.train(1, max_iter)
 92 | 
 93 | 
 94 | @torch.no_grad()
 95 | def benchmark_eval(args):
 96 |     cfg = setup(args)
 97 |     model = build_model(cfg)
 98 |     model.eval()
 99 |     logger.info("Model:\n{}".format(model))
100 |     DetectionCheckpointer(model).load(cfg.MODEL.WEIGHTS)
101 | 
102 |     cfg.defrost()
103 |     cfg.DATALOADER.NUM_WORKERS = 0
104 |     data_loader = build_detection_test_loader(cfg, cfg.DATASETS.TEST[0])
105 |     dummy_data = list(itertools.islice(data_loader, 100))
106 | 
107 |     def f():
108 |         while True:
109 |             yield from DatasetFromList(dummy_data, copy=False)
110 | 
111 |     for _ in range(5):  # warmup
112 |         model(dummy_data[0])
113 | 
114 |     max_iter = 400
115 |     timer = Timer()
116 |     with tqdm.tqdm(total=max_iter) as pbar:
117 |         for idx, d in enumerate(f()):
118 |             if idx == max_iter:
119 |                 break
120 |             model(d)
121 |             pbar.update()
122 |     logger.info("{} iters in {} seconds.".format(max_iter, timer.seconds()))
123 | 
124 | 
125 | if __name__ == "__main__":
126 |     parser = default_argument_parser()
127 |     parser.add_argument("--task",
128 |                         choices=["train", "eval", "data"],
129 |                         required=True)
130 |     args = parser.parse_args()
131 |     assert not args.eval_only
132 | 
133 |     if args.task == "data":
134 |         f = benchmark_data
135 |     elif args.task == "train":
136 |         """
137 |         Note: training speed may not be representative.
138 |         The training cost of a R-CNN model varies with the content of the data
139 |         and the quality of the model.
140 |         """
141 |         f = benchmark_train
142 |     elif args.task == "eval":
143 |         f = benchmark_eval
144 |         # only benchmark single-GPU inference.
145 |         assert args.num_gpus == 1 and args.num_machines == 1
146 |     launch(f,
147 |            args.num_gpus,
148 |            args.num_machines,
149 |            args.machine_rank,
150 |            args.dist_url,
151 |            args=(args, ))
152 | 


--------------------------------------------------------------------------------
/tools/rm_files.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python3
 2 | # -*- coding:utf-8 -*-
 3 | 
 4 | import argparse
 5 | import os
 6 | import re
 7 | 
 8 | from colorama import Fore, Style
 9 | 
10 | 
11 | def remove_parser():
12 |     parser = argparse.ArgumentParser()
13 |     parser.add_argument("--start-iter",
14 |                         "-s",
15 |                         type=int,
16 |                         default=0,
17 |                         help="start iter to remove")
18 |     parser.add_argument("--end-iter",
19 |                         "-e",
20 |                         type=int,
21 |                         default=0,
22 |                         help="end iter to remove")
23 |     parser.add_argument("--prefix",
24 |                         "-p",
25 |                         type=str,
26 |                         default="model_",
27 |                         help="prefix of model to remove")
28 |     parser.add_argument("--dir",
29 |                         "-d",
30 |                         type=str,
31 |                         default="/data/Outputs",
32 |                         help="dir to remove pth model")
33 |     parser.add_argument("--real",
34 |                         "-r",
35 |                         action="store_true",
36 |                         help="really delete or just show what you will delete")
37 |     return parser
38 | 
39 | 
40 | def remove_files(args):
41 |     start = args.start_iter
42 |     end = args.end_iter
43 |     prefix = args.prefix
44 |     for folder, _, files in os.walk(args.dir):
45 |         # l = [x for x in f if x.endswith(".pth")]
46 |         models = [
47 |             f for f in files if re.search(prefix + r"[0123456789]*\.pth", f)
48 |         ]
49 |         delete = [
50 |             os.path.join(folder, model) for model in models
51 |             if start <= int(model[len(prefix):-len(".pth")]) <= end
52 |         ]
53 |         if delete:
54 |             for f in delete:
55 |                 if args.real:
56 |                     print(f"remove {f}")
57 |                     os.remove(f)
58 |                 else:
59 |                     print(f"you may remove {f}")
60 |     if not args.real:
61 |         print(Fore.RED + f"use --real parameter to really delete models" +
62 |               Style.RESET_ALL)
63 | 
64 | 
65 | def main():
66 |     args = remove_parser().parse_args()
67 |     remove_files(args)
68 | 
69 | 
70 | if __name__ == "__main__":
71 |     main()
72 | 


--------------------------------------------------------------------------------
/tools/train_net.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
  2 | """
  3 | Detection Training Script.
  4 | 
  5 | This scripts reads a given config file and runs the training or evaluation.
  6 | It is an entry point that is made to train standard models in dl_lib.
  7 | 
  8 | In order to let one script support training of many models,
  9 | this script contains logic that are specific to these built-in models and therefore
 10 | may not be suitable for your own project.
 11 | For example, your research project perhaps only needs a single "evaluator".
 12 | 
 13 | Therefore, we recommend you to use dl_lib as an library and take
 14 | this file as an example of how to use the library.
 15 | You may want to write your own script with your datasets and other customizations.
 16 | """
 17 | import logging
 18 | import os
 19 | import sys
 20 | sys.path.insert(0, '.')  # noqa: E402
 21 | 
 22 | from collections import OrderedDict
 23 | import torch
 24 | from colorama import Fore, Style
 25 | 
 26 | import dl_lib.utils.comm as comm
 27 | from config import config
 28 | from dl_lib.checkpoint import DetectionCheckpointer
 29 | from dl_lib.data import MetadataCatalog
 30 | from dl_lib.engine import (CustomizedTrainer, default_argument_parser,
 31 |                            default_setup, hooks, launch)
 32 | from dl_lib.evaluation import (CityscapesEvaluator, DatasetEvaluators,
 33 |                                PascalVOCDetectionEvaluator, SemSegEvaluator,
 34 |                                verify_results)
 35 | from dl_lib.modeling import SemanticSegmentorWithTTA
 36 | from net import build_model
 37 | 
 38 | 
 39 | class Trainer(CustomizedTrainer):
 40 |     """
 41 |     We use the "CustomizedTrainer" which contains a number pre-defined logic for
 42 |     standard training workflow. They may not work for you, especially if you
 43 |     are working on a new research project. In that case you can use the cleaner
 44 |     "SimpleTrainer", or write your own training loop.
 45 |     """
 46 |     @classmethod
 47 |     def build_evaluator(cls, cfg, dataset_name, output_folder=None):
 48 |         """
 49 |         Create evaluator(s) for a given dataset.
 50 |         This uses the special metadata "evaluator_type" associated with each builtin dataset.
 51 |         For your own dataset, you can simply create an evaluator manually in your
 52 |         script and do not have to worry about the hacky if-else logic here.
 53 |         """
 54 |         if output_folder is None:
 55 |             output_folder = os.path.join(cfg.OUTPUT_DIR, "inference")
 56 |         evaluator_list = []
 57 |         evaluator_type = MetadataCatalog.get(dataset_name).evaluator_type
 58 |         if evaluator_type in ["sem_seg", "coco_panoptic_seg"]:
 59 |             evaluator_list.append(
 60 |                 SemSegEvaluator(
 61 |                     dataset_name,
 62 |                     distributed=True,
 63 |                     num_classes=cfg.MODEL.SEM_SEG_HEAD.NUM_CLASSES,
 64 |                     ignore_label=cfg.MODEL.SEM_SEG_HEAD.IGNORE_VALUE,
 65 |                     output_dir=output_folder,
 66 |                 ))
 67 |         elif evaluator_type == "cityscapes":
 68 |             assert (
 69 |                 torch.cuda.device_count() >= comm.get_rank()
 70 |             ), "CityscapesEvaluator currently do not work with multiple machines."
 71 |             return CityscapesEvaluator(dataset_name)
 72 |         elif evaluator_type == "pascal_voc":
 73 |             return PascalVOCDetectionEvaluator(dataset_name)
 74 |         if hasattr(cfg, "EVALUATORS"):
 75 |             for evaluator in cfg.EVALUATORS:
 76 |                 evaluator_list.append(
 77 |                     evaluator(dataset_name, True, output_folder, dump=True))
 78 |         if len(evaluator_list) == 0:
 79 |             raise NotImplementedError(
 80 |                 "no Evaluator for the dataset {} with the type {}".format(
 81 |                     dataset_name, evaluator_type))
 82 |         elif len(evaluator_list) == 1:
 83 |             return evaluator_list[0]
 84 |         return DatasetEvaluators(evaluator_list)
 85 | 
 86 |     @classmethod
 87 |     def test_with_TTA(cls, cfg, model):
 88 |         logger = logging.getLogger("dl_lib.trainer")
 89 |         # In the end of training, run an evaluation with TTA
 90 |         logger.info("Running inference with test-time augmentation ...")
 91 |         model = SemanticSegmentorWithTTA(cfg, model)
 92 |         evaluators = [
 93 |             cls.build_evaluator(cfg,
 94 |                                 name,
 95 |                                 output_folder=os.path.join(
 96 |                                     cfg.OUTPUT_DIR, "inference_TTA"))
 97 |             for name in cfg.DATASETS.TEST
 98 |         ]
 99 |         res = cls.test(cfg, model, evaluators)
100 |         res = OrderedDict({k + "_TTA": v for k, v in res.items()})
101 |         return res
102 | 
103 | 
104 | def main(args):
105 |     config.merge_from_list(args.opts)
106 |     cfg, logger = default_setup(config, args)
107 |     model = build_model(cfg)
108 |     logger.info(f"Model structure: {model}")
109 |     file_sys = os.statvfs(cfg.OUTPUT_DIR)
110 |     free_space_Gb = (file_sys.f_bfree * file_sys.f_frsize) / 2**30
111 |     # We assume that a single dumped model is 700Mb
112 |     eval_space_Gb = (cfg.SOLVER.LR_SCHEDULER.MAX_ITER //
113 |                      cfg.SOLVER.CHECKPOINT_PERIOD) * 700 / 2**10
114 |     if eval_space_Gb > free_space_Gb:
115 |         logger.warning(f"{Fore.RED}Remaining space({free_space_Gb}GB) "
116 |                        f"is less than ({eval_space_Gb}GB){Style.RESET_ALL}")
117 |     if args.eval_only:
118 |         DetectionCheckpointer(model, save_dir=cfg.OUTPUT_DIR).resume_or_load(
119 |             cfg.MODEL.WEIGHTS, resume=args.resume)
120 |         res = Trainer.test(cfg, model)
121 |         if comm.is_main_process():
122 |             verify_results(cfg, res)
123 |         if cfg.TEST.AUG.ENABLED:
124 |             res.update(Trainer.test_with_TTA(cfg, model))
125 |         return res
126 |     """
127 |     If you'd like to do anything fancier than the standard training logic,
128 |     consider writing your own training loop or subclassing the trainer.
129 |     """
130 |     trainer = Trainer(cfg, model)
131 |     trainer.resume_or_load(resume=args.resume)
132 |     if cfg.TEST.AUG.ENABLED:
133 |         trainer.register_hooks([
134 |             hooks.EvalHook(0,
135 |                            lambda: trainer.test_with_TTA(cfg, trainer.model))
136 |         ])
137 | 
138 |     return trainer.train()
139 | 
140 | 
141 | if __name__ == "__main__":
142 |     args = default_argument_parser().parse_args()
143 |     print("soft link to {}".format(config.OUTPUT_DIR))
144 |     config.link_log()
145 |     print("Command Line Args:", args)
146 |     launch(
147 |         main,
148 |         args.num_gpus,
149 |         num_machines=args.num_machines,
150 |         machine_rank=args.machine_rank,
151 |         dist_url=args.dist_url,
152 |         args=(args, ),
153 |     )
154 | 


--------------------------------------------------------------------------------