├── README.md ├── classification ├── .gitignore ├── .gitmodules ├── README.md ├── data │ └── README.md ├── datasets │ ├── __init__.py │ ├── cvfunctional.py │ ├── cvtransforms.py │ ├── dataloader_imagenet_dct.py │ ├── dataset_imagenet_dct.py │ └── vision.py ├── install_libjpegturbo.sh ├── main │ ├── __init__.py │ ├── imagenet_mobilenetv2_upscaled_static.py │ └── imagenet_resnet_upscaled_static.py ├── models │ ├── __init__.py │ ├── imagenet │ │ ├── __init__.py │ │ ├── mobilenetv2.py │ │ └── resnet.py │ └── utils.py ├── pretrained │ └── README.md ├── requirements.txt ├── scripts │ └── resnet_upscaled_static.sh └── utils │ ├── __init__.py │ ├── eval.py │ ├── init_weights.py │ ├── logger.py │ ├── misc.py │ ├── progress │ ├── .gitignore │ ├── LICENSE │ ├── MANIFEST.in │ ├── README.rst │ ├── demo.gif │ ├── progress │ │ ├── __init__.py │ │ ├── bar.py │ │ ├── counter.py │ │ ├── helpers.py │ │ └── spinner.py │ ├── setup.py │ └── test_progress.py │ └── visualize.py └── segmentation ├── .gitignore ├── .isort.cfg ├── .style.yapf ├── .travis.yml ├── README.md ├── configs ├── faster_rcnn_r50_fpn_1x_static_24_wofreeze.py ├── faster_rcnn_r50_fpn_1x_static_64_wofreeze.py ├── mask_rcnn_r50_rpn_1x_DCT_static_24_wofreeze.py ├── mask_rcnn_r50_rpn_1x_DCT_static_64_wofreeze.py └── mean_std.py ├── data └── coco │ └── README.md ├── docker └── Dockerfile ├── docs ├── DATA_PIPELINE.md ├── GETTING_STARTED.md ├── INSTALL.md ├── MODEL_ZOO.md ├── ROBUSTNESS_BENCHMARKING.md └── TECHNICAL_DETAILS.md ├── install_libjpegturbo.sh ├── mmdet ├── __init__.py ├── apis │ ├── __init__.py │ ├── env.py │ ├── inference.py │ ├── mean_std_cal.py │ └── train.py ├── core │ ├── __init__.py │ ├── anchor │ │ ├── __init__.py │ │ ├── anchor_generator.py │ │ ├── anchor_target.py │ │ ├── guided_anchor_target.py │ │ ├── point_generator.py │ │ └── point_target.py │ ├── bbox │ │ ├── __init__.py │ │ ├── assign_sampling.py │ │ ├── assigners │ │ │ ├── __init__.py │ │ │ ├── approx_max_iou_assigner.py │ │ │ ├── assign_result.py │ │ │ ├── base_assigner.py │ │ │ ├── max_iou_assigner.py │ │ │ └── point_assigner.py │ │ ├── bbox_target.py │ │ ├── geometry.py │ │ ├── samplers │ │ │ ├── __init__.py │ │ │ ├── base_sampler.py │ │ │ ├── combined_sampler.py │ │ │ ├── instance_balanced_pos_sampler.py │ │ │ ├── iou_balanced_neg_sampler.py │ │ │ ├── ohem_sampler.py │ │ │ ├── pseudo_sampler.py │ │ │ ├── random_sampler.py │ │ │ └── sampling_result.py │ │ └── transforms.py │ ├── evaluation │ │ ├── __init__.py │ │ ├── bbox_overlaps.py │ │ ├── class_names.py │ │ ├── coco_utils.py │ │ ├── eval_hooks.py │ │ ├── mean_ap.py │ │ └── recall.py │ ├── fp16 │ │ ├── __init__.py │ │ ├── decorators.py │ │ ├── hooks.py │ │ └── utils.py │ ├── mask │ │ ├── __init__.py │ │ ├── mask_target.py │ │ └── utils.py │ ├── post_processing │ │ ├── __init__.py │ │ ├── bbox_nms.py │ │ └── merge_augs.py │ └── utils │ │ ├── __init__.py │ │ ├── dist_utils.py │ │ └── misc.py ├── datasets │ ├── __init__.py │ ├── builder.py │ ├── cityscapes.py │ ├── coco.py │ ├── custom.py │ ├── dataset_wrappers.py │ ├── extra_aug.py │ ├── loader │ │ ├── __init__.py │ │ ├── build_loader.py │ │ └── sampler.py │ ├── pipelines │ │ ├── __init__.py │ │ ├── compose.py │ │ ├── dct_channel_index.py │ │ ├── formating.py │ │ ├── formatingDCT.py │ │ ├── loading.py │ │ ├── test_aug.py │ │ ├── transforms.py │ │ └── transformsDCT.py │ ├── registry.py │ ├── transforms.py │ ├── voc.py │ ├── wider_face.py │ └── xml_style.py ├── models │ ├── __init__.py │ ├── anchor_heads │ │ ├── __init__.py │ │ ├── anchor_head.py │ │ ├── fcos_head.py │ │ ├── ga_retina_head.py │ │ ├── ga_rpn_head.py │ │ ├── guided_anchor_head.py │ │ ├── reppoints_head.py │ │ ├── retina_head.py │ │ ├── rpn_head.py │ │ └── ssd_head.py │ ├── backbones │ │ ├── __init__.py │ │ ├── gate.py │ │ ├── gumbel.py │ │ ├── hrnet.py │ │ ├── resnet.py │ │ ├── resnetDCT.py │ │ ├── resnetDCT_dynamic.py │ │ ├── resnet_dynamic.py │ │ ├── resnet_static.py │ │ ├── resnext.py │ │ └── ssd_vgg.py │ ├── bbox_heads │ │ ├── __init__.py │ │ ├── bbox_head.py │ │ ├── convfc_bbox_head.py │ │ └── double_bbox_head.py │ ├── builder.py │ ├── detectors │ │ ├── __init__.py │ │ ├── base.py │ │ ├── cascade_rcnn.py │ │ ├── double_head_rcnn.py │ │ ├── fast_rcnn.py │ │ ├── faster_rcnn.py │ │ ├── fcos.py │ │ ├── grid_rcnn.py │ │ ├── htc.py │ │ ├── mask_rcnn.py │ │ ├── mask_scoring_rcnn.py │ │ ├── reppoints_detector.py │ │ ├── retinanet.py │ │ ├── rpn.py │ │ ├── single_stage.py │ │ ├── test_mixins.py │ │ └── two_stage.py │ ├── losses │ │ ├── __init__.py │ │ ├── accuracy.py │ │ ├── balanced_l1_loss.py │ │ ├── cross_entropy_loss.py │ │ ├── focal_loss.py │ │ ├── ghm_loss.py │ │ ├── iou_loss.py │ │ ├── mse_loss.py │ │ ├── smooth_l1_loss.py │ │ └── utils.py │ ├── mask_heads │ │ ├── __init__.py │ │ ├── fcn_mask_head.py │ │ ├── fused_semantic_head.py │ │ ├── grid_head.py │ │ ├── htc_mask_head.py │ │ └── maskiou_head.py │ ├── necks │ │ ├── __init__.py │ │ ├── bfp.py │ │ ├── fpn.py │ │ └── hrfpn.py │ ├── plugins │ │ ├── __init__.py │ │ ├── generalized_attention.py │ │ └── non_local.py │ ├── registry.py │ ├── roi_extractors │ │ ├── __init__.py │ │ └── single_level.py │ ├── shared_heads │ │ ├── __init__.py │ │ └── res_layer.py │ └── utils │ │ ├── __init__.py │ │ ├── conv_module.py │ │ ├── conv_ws.py │ │ ├── norm.py │ │ ├── scale.py │ │ └── weight_init.py ├── ops │ ├── __init__.py │ ├── context_block.py │ ├── dcn │ │ ├── __init__.py │ │ ├── deform_conv.py │ │ ├── deform_pool.py │ │ └── src │ │ │ ├── deform_conv_cuda.cpp │ │ │ ├── deform_conv_cuda_kernel.cu │ │ │ ├── deform_pool_cuda.cpp │ │ │ └── deform_pool_cuda_kernel.cu │ ├── masked_conv │ │ ├── __init__.py │ │ ├── masked_conv.py │ │ └── src │ │ │ ├── masked_conv2d_cuda.cpp │ │ │ └── masked_conv2d_kernel.cu │ ├── nms │ │ ├── __init__.py │ │ ├── nms_wrapper.py │ │ └── src │ │ │ ├── nms_cpu.cpp │ │ │ ├── nms_cuda.cpp │ │ │ ├── nms_kernel.cu │ │ │ └── soft_nms_cpu.pyx │ ├── roi_align │ │ ├── __init__.py │ │ ├── gradcheck.py │ │ ├── roi_align.py │ │ └── src │ │ │ ├── roi_align_cuda.cpp │ │ │ └── roi_align_kernel.cu │ ├── roi_pool │ │ ├── __init__.py │ │ ├── gradcheck.py │ │ ├── roi_pool.py │ │ └── src │ │ │ ├── roi_pool_cuda.cpp │ │ │ └── roi_pool_kernel.cu │ └── sigmoid_focal_loss │ │ ├── __init__.py │ │ ├── sigmoid_focal_loss.py │ │ └── src │ │ ├── sigmoid_focal_loss.cpp │ │ └── sigmoid_focal_loss_cuda.cu └── utils │ ├── __init__.py │ ├── draw_inputgate.py │ ├── flops_counter.py │ ├── plot_dct.py │ ├── registry.py │ └── transfer_model.py ├── requirements.txt ├── results └── segmentation_result.jpg ├── setup.py ├── setup_env.sh ├── tests ├── requirements.txt └── test_utils.py ├── tools └── test.py └── work_dirs └── README.md /README.md: -------------------------------------------------------------------------------- 1 | # Notice: This repository is deprecated, please use https://github.com/calmevtime/DCTNet. 2 | 3 | # Learning in the Frequency Domain 4 | 5 | 6 | This is the source code for the CVPR'20 paper entitled "Learning in the Frequency Domain" (https://arxiv.org/abs/2002.12416). 7 | 8 | ## Highlights 9 | * We propose a method of learning in the frequency domain (using DCT coefficients as input), which requires little modification to the existing CNN models that take RGB input. We validate our method on ResNet50 and MobileNetV2 for the image classification task and Mask R-CNN for the instance segmentation task. 10 | * We show that learning in the frequency domain better preserves image information in the pre-processing stage than the conventional spatial downsampling approach (spatially resizing the images to 224×224, the default input size of most CNN models) and consequently achieves improved accuracy, i.e., +1.41% on ResNet-50 and +0.66% on MobileNetV2 for the ImageNet classification task, +0.8% on Mask R-CNN for both object detection and instance segmentation tasks. 11 | * We analyze the spectral bias from the frequency perspective and show that the CNN models are more sensitive to low-frequency channels than high-frequency channels, similar to the human visual system (HVS). 12 | * We propose a learning-based dynamic channel selection method to identify the trivial frequency components for static removal during inference. Experiment results on ResNet-50 show that one can prune up to 87.5% of the frequency channels using the proposed channel selection method with no or little accuracy degradation in the ImageNet classification task. 13 | * To the best of our knowledge, this is the first work that explores learning in the frequency domain for object detection and instance segmentation. Experiment results on Mask R-CNN show that learning in the frequency domain can achieve a 0.8% average precision improvement for the instance segmentation task on the COCO dataset. 14 | 15 | Please refer to the [image classfication](classification) and [instance segmentation](segmentation) sections for more details. 16 | -------------------------------------------------------------------------------- /classification/.gitignore: -------------------------------------------------------------------------------- 1 | # tmp dirs and files 2 | .idea 3 | checkpoint 4 | checkpoints 5 | cifar-debug.py 6 | test.eps 7 | dev 8 | monitor.py 9 | exp 10 | jpeg2dct 11 | libjpeg-turbo-2.0.3 12 | 13 | # Byte-compiled / optimized / DLL files 14 | __pycache__/ 15 | *.py[cod] 16 | *$py.class 17 | 18 | # C extensions 19 | *.so 20 | 21 | # Distribution / packaging 22 | .Python 23 | env/ 24 | build/ 25 | develop-eggs/ 26 | dist/ 27 | downloads/ 28 | eggs/ 29 | .eggs/ 30 | lib/ 31 | lib64/ 32 | parts/ 33 | sdist/ 34 | var/ 35 | *.egg-info/ 36 | .installed.cfg 37 | *.egg 38 | 39 | # PyInstaller 40 | # Usually these files are written by a python script from a template 41 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 42 | *.manifest 43 | *.spec 44 | 45 | # Installer logs 46 | pip-log.txt 47 | pip-delete-this-directory.txt 48 | 49 | # Unit test / coverage reports 50 | htmlcov/ 51 | .tox/ 52 | .coverage 53 | .coverage.* 54 | .cache 55 | nosetests.xml 56 | coverage.xml 57 | *,cover 58 | .hypothesis/ 59 | 60 | # Translations 61 | *.mo 62 | *.pot 63 | 64 | # Django stuff: 65 | *.log 66 | local_settings.py 67 | 68 | # Flask stuff: 69 | instance/ 70 | .webassets-cache 71 | 72 | # Scrapy stuff: 73 | .scrapy 74 | 75 | # Sphinx documentation 76 | docs/_build/ 77 | 78 | # PyBuilder 79 | target/ 80 | 81 | # IPython Notebook 82 | .ipynb_checkpoints 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # celery beat schedule file 88 | celerybeat-schedule 89 | 90 | # dotenv 91 | .env 92 | 93 | # virtualenv 94 | venv/ 95 | ENV/ 96 | 97 | # Spyder project settings 98 | .spyderproject 99 | 100 | # Rope project settings 101 | .ropeproject 102 | 103 | *events 104 | 105 | -------------------------------------------------------------------------------- /classification/.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "utils/progress"] 2 | path = utils/progress 3 | url = https://github.com/verigak/progress.git 4 | -------------------------------------------------------------------------------- /classification/README.md: -------------------------------------------------------------------------------- 1 | # Learning in the Frequency Domain 2 | Classification on ImageNet with PyTorch. 3 | 4 | ## Prerequisites 5 | * PyTorch compatible GPU 6 | * Python 3.7 7 | * PyTorch >= 1.2.0 8 | * opencv-python 4.1.1 9 | * libjpeg-turbo 2.0.3 10 | * [jpeg2dct](https://github.com/uber-research/jpeg2dct) 11 | 12 | ## Install 13 | * Install [PyTorch](http://pytorch.org/) 14 | 15 | * Clone this repo recursively 16 | ``` 17 | git clone --recursive https://github.com/calmevtime1990/supp 18 | ``` 19 | 20 | * Install required packages 21 | ``` 22 | pip install -r requirements.txt 23 | ``` 24 | 25 | * Install [libjpeg-turbo](http://www.linuxfromscratch.org/blfs/view/svn/general/libjpeg.html) 26 | ``` 27 | bash install_libjpegturbo.sh 28 | ``` 29 | 30 | * Download pretrained [models][1] and extract to [`pretrained`](pretrained). The folder structure should look like this: 31 | ``` 32 | pretrained 33 | ├── resnet50dct_upscaled_static_24 34 | │   ├── log.txt 35 | │   └── model_best.pth.tar 36 | └── resnet50dct_upscaled_static_64 37 | ├── log.txt 38 | └── model_best.pth.tar 39 | ``` 40 | * Prepare datasets 41 | It is recommended to symlink the dataset root to [`data`](data). The folder structure should look like this: 42 | ``` 43 | data 44 | ├── train 45 | ├── val 46 | └── README.md 47 | ``` 48 | 49 | ## Evaluation 50 | Run [`resnet_upscaled_static.sh`](scripts/resnet_upscaled_static.sh) to start testing. Change the --data $imagenet_dir to the location of the ImageNet dataset. 51 | ### Testing the proposed model with 24 channels 52 | ``` 53 | bash scripts/resnet_upscaled_static.sh 24 54 | ``` 55 | 56 | ### Testing the proposed model with 64 channels 57 | ``` 58 | bash scripts/resnet_upscaled_static.sh 64 59 | ``` 60 | 61 | ## Results 62 | ### Performance of the proposed model - ResNet-50 63 | | ResNet-50 | #Channels | Size Per Channel | Top-1 | Top-5 | Normalized Input Size | 64 | |:--------------:|:---------:|:----------------:|:------:|:------:|:---------------------:| 65 | | RGB | 3 | 224x224 | 75.780 | 92.650 | 1.0 | 66 | | [DCT-24 (ours)][2] | 24 | 56x56 | 76.792 | 93.254 | 0.5 | 67 | | [DCT-64 (ours)][3] | 64 | 56x56 | 77.160 | 93.474 | 1.3 | 68 | 69 | ### Performance of the proposed model - MobileNetV2 70 | | MobileNetV2 | #Channels | Size Per Channel | Top-1 | Top-5 | 71 | |:-------------:|:---------:|:----------------:|:------:|:------:| 72 | | RGB | 3 | 224x224 | 71.702 | 90.415 | 73 | | [DCT-24 (ours)][4] | 24 | 112x112 | 72.364 | 90.606 | 74 | | [DCT-32 (ours)][5] | 32 | 112x112 | 72.282 | 90.592 | 75 | 76 | [1]: https://drive.google.com/open?id=1Wl9JDY1zJPtkcRBpVojQ9ERtxul7k9rt 77 | [2]: https://drive.google.com/open?id=1eZosbUN1-MwdIOk6ANrca049kztV-Gv2 78 | [3]: https://drive.google.com/open?id=1eluAU8gngxMFL7ti73qv7vyrTujhGZA9 79 | [4]: https://drive.google.com/open?id=1wkAFUDDj6wUVKLuFjlPvvaHTOsROOaRk 80 | [5]: https://drive.google.com/open?id=1QaDqumx9j4cGgcJuQVzYSIPiaClOKZj3 81 | -------------------------------------------------------------------------------- /classification/data/README.md: -------------------------------------------------------------------------------- 1 | ## This folder should contain the ImageNet dataset. You can download the dataset [`dataset`](http://image-net.org/download) here. 2 | -------------------------------------------------------------------------------- /classification/datasets/dataloader_imagenet_dct.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | import torch 4 | from datasets.dataset_imagenet_dct import ImageFolderDCT 5 | import datasets.cvtransforms as transforms 6 | from datasets import train_y_mean, train_y_std, train_cb_mean, train_cb_std, \ 7 | train_cr_mean, train_cr_std 8 | from datasets import train_y_mean_upscaled, train_y_std_upscaled, train_cb_mean_upscaled, train_cb_std_upscaled, \ 9 | train_cr_mean_upscaled, train_cr_std_upscaled 10 | from datasets import train_dct_subset_mean, train_dct_subset_std 11 | from datasets import train_upscaled_static_mean, train_upscaled_static_std 12 | 13 | def valloader_upscaled_static(args, model='mobilenet'): 14 | valdir = os.path.join(args.data, 'val') 15 | 16 | if model == 'mobilenet': 17 | input_size1 = 1024 18 | input_size2 = 896 19 | elif model == 'resnet': 20 | input_size1 = 512 21 | input_size2 = 448 22 | else: 23 | raise NotImplementedError 24 | if int(args.subset) == 0 or int(args.subset) == 192: 25 | transform = transforms.Compose([ 26 | transforms.Resize(input_size1), 27 | transforms.CenterCrop(input_size2), 28 | transforms.Upscale(upscale_factor=2), 29 | transforms.TransformUpscaledDCT(), 30 | transforms.ToTensorDCT(), 31 | transforms.Aggregate(), 32 | transforms.NormalizeDCT( 33 | train_upscaled_static_mean, 34 | train_upscaled_static_std, 35 | ) 36 | ]) 37 | else: 38 | transform = transforms.Compose([ 39 | transforms.Resize(input_size1), 40 | transforms.CenterCrop(input_size2), 41 | transforms.Upscale(upscale_factor=2), 42 | transforms.TransformUpscaledDCT(), 43 | transforms.ToTensorDCT(), 44 | transforms.SubsetDCT(channels=args.subset), 45 | transforms.Aggregate(), 46 | transforms.NormalizeDCT( 47 | train_upscaled_static_mean, 48 | train_upscaled_static_std, 49 | channels=args.subset 50 | ) 51 | ]) 52 | 53 | val_loader = torch.utils.data.DataLoader( 54 | ImageFolderDCT(valdir, transform), 55 | batch_size=args.test_batch, shuffle=False, 56 | num_workers=args.workers, pin_memory=True) 57 | 58 | return val_loader 59 | 60 | -------------------------------------------------------------------------------- /classification/datasets/vision.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | import torch.utils.data as data 4 | 5 | 6 | class VisionDataset(data.Dataset): 7 | _repr_indent = 4 8 | 9 | def __init__(self, root, transforms=None, transform=None, target_transform=None): 10 | if isinstance(root, torch._six.string_classes): 11 | root = os.path.expanduser(root) 12 | self.root = root 13 | 14 | has_transforms = transforms is not None 15 | has_separate_transform = transform is not None or target_transform is not None 16 | if has_transforms and has_separate_transform: 17 | raise ValueError("Only transforms or transform/target_transform can " 18 | "be passed as argument") 19 | 20 | # for backwards-compatibility 21 | self.transform = transform 22 | self.target_transform = target_transform 23 | 24 | if has_separate_transform: 25 | transforms = StandardTransform(transform, target_transform) 26 | self.transforms = transforms 27 | 28 | def __getitem__(self, index): 29 | raise NotImplementedError 30 | 31 | def __len__(self): 32 | raise NotImplementedError 33 | 34 | def __repr__(self): 35 | head = "Dataset " + self.__class__.__name__ 36 | body = ["Number of datapoints: {}".format(self.__len__())] 37 | if self.root is not None: 38 | body.append("Root location: {}".format(self.root)) 39 | body += self.extra_repr().splitlines() 40 | if hasattr(self, "transforms") and self.transforms is not None: 41 | body += [repr(self.transforms)] 42 | lines = [head] + [" " * self._repr_indent + line for line in body] 43 | return '\n'.join(lines) 44 | 45 | def _format_transform_repr(self, transform, head): 46 | lines = transform.__repr__().splitlines() 47 | return (["{}{}".format(head, lines[0])] + 48 | ["{}{}".format(" " * len(head), line) for line in lines[1:]]) 49 | 50 | def extra_repr(self): 51 | return "" 52 | 53 | 54 | class StandardTransform(object): 55 | def __init__(self, transform=None, target_transform=None): 56 | self.transform = transform 57 | self.target_transform = target_transform 58 | 59 | def __call__(self, input, target): 60 | if self.transform is not None: 61 | input = self.transform(input) 62 | if self.target_transform is not None: 63 | target = self.target_transform(target) 64 | return input, target 65 | 66 | def _format_transform_repr(self, transform, head): 67 | lines = transform.__repr__().splitlines() 68 | return (["{}{}".format(head, lines[0])] + 69 | ["{}{}".format(" " * len(head), line) for line in lines[1:]]) 70 | 71 | def __repr__(self): 72 | body = [self.__class__.__name__] 73 | if self.transform is not None: 74 | body += self._format_transform_repr(self.transform, 75 | "Transform: ") 76 | if self.target_transform is not None: 77 | body += self._format_transform_repr(self.target_transform, 78 | "Target transform: ") 79 | 80 | return '\n'.join(body) 81 | -------------------------------------------------------------------------------- /classification/install_libjpegturbo.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # Install libjpeg-turbo 4 | sudo apt install -y nasm 5 | sudo apt install -y cmake 6 | sudo apt install -y libsm6 libxext6 libxrender-dev 7 | 8 | wget https://downloads.sourceforge.net/libjpeg-turbo/libjpeg-turbo-2.0.3.tar.gz 9 | tar xvf libjpeg-turbo-2.0.3.tar.gz 10 | cd libjpeg-turbo-2.0.3 11 | 12 | mkdir build 13 | cd build 14 | 15 | cmake -DCMAKE_INSTALL_PREFIX=/usr \ 16 | -DCMAKE_BUILD_TYPE=RELEASE \ 17 | -DENABLE_STATIC=FALSE \ 18 | -DCMAKE_INSTALL_DOCDIR=/usr/share/doc/libjpeg-turbo-2.0.3 \ 19 | -DCMAKE_INSTALL_DEFAULT_LIBDIR=lib \ 20 | .. 21 | make 22 | sudo make install 23 | 24 | -------------------------------------------------------------------------------- /classification/main/__init__.py: -------------------------------------------------------------------------------- 1 | subset_channel_index = { 2 | '6': 3 | [ 4 | [0,1, 5 | 8,9], 6 | [0], 7 | [0] 8 | ], 9 | '24': 10 | [ 11 | [0, 1, 2, 3, 12 | 8, 9, 10, 11, 13 | 16, 17, 18, 19, 14 | 24, 25, 26, 27], 15 | [0, 1, 16 | 8, 9], 17 | [0, 1, 18 | 8, 9] 19 | ], 20 | # '24': 21 | # [ 22 | # [0, 1, 2, 3, 4, 5 23 | # 8, 9, 10, 24 | # 16, 17, 18, 25 | # 24, 26 | # 32], 27 | # [0, 1, 3, 28 | # 8, 29 | # 24], 30 | # [0, 1, 3, 31 | # 8, 32 | # 24] 33 | # ], 34 | '32': 35 | [ 36 | [0, 1, 2, 3, 4, 37 | 8, 9, 10, 11, 12, 38 | 16, 17, 18, 19, 20, 39 | 24, 25, 26, 27, 40 | 32, 33, 34], 41 | [0, 1, 2, 42 | 8, 9], 43 | [0, 1, 2, 44 | 8, 9] 45 | ], 46 | '48': 47 | [ 48 | [0, 1, 2, 3, 4, 5, 49 | 8, 9, 10, 11, 12, 13, 50 | 16, 17, 18, 19, 20, 21, 51 | 24, 25, 26, 27, 28, 29, 52 | 32, 33, 34, 35, 53 | 40, 41, 42, 43], 54 | [0, 1, 2, 55 | 8, 9, 10, 56 | 16, 17], 57 | [0, 1, 2, 58 | 8, 9, 10, 59 | 16, 17] 60 | ], 61 | '64': 62 | [ 63 | [0, 1, 2, 3, 4, 5, 6, 64 | 8, 9, 10, 11, 12, 13, 14, 65 | 16, 17, 18, 19, 20, 21, 66 | 24, 25, 26, 27, 28, 29, 67 | 32, 33, 34, 35, 36, 37, 68 | 40, 41, 42, 43, 44, 45, 69 | 48, 49, 50, 51, 52, 53], 70 | [0, 1, 2, 71 | 8, 9, 10, 72 | 16, 17, 73 | 24, 25], 74 | [0, 1, 2, 75 | 8, 9, 10, 76 | 16, 17, 77 | 24, 25], 78 | ] 79 | } 80 | -------------------------------------------------------------------------------- /classification/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PSCLab-ASU/Learning-in-the-Frequency-Domain/a1e620fd12a023c8288135050e76574a431888bd/classification/models/__init__.py -------------------------------------------------------------------------------- /classification/models/imagenet/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | from .resnet import * 4 | from .mobilenetv2 import * 5 | 6 | -------------------------------------------------------------------------------- /classification/models/utils.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch 3 | import numpy as np 4 | 5 | def constant_init(module, val, bias=0): 6 | nn.init.constant_(module.weight, val) 7 | if hasattr(module, 'bias') and module.bias is not None: 8 | nn.init.constant_(module.bias, bias) 9 | 10 | 11 | def xavier_init(module, gain=1, bias=0, distribution='normal'): 12 | assert distribution in ['uniform', 'normal'] 13 | if distribution == 'uniform': 14 | nn.init.xavier_uniform_(module.weight, gain=gain) 15 | else: 16 | nn.init.xavier_normal_(module.weight, gain=gain) 17 | if hasattr(module, 'bias') and module.bias is not None: 18 | nn.init.constant_(module.bias, bias) 19 | 20 | 21 | def normal_init(module, mean=0, std=1, bias=0): 22 | nn.init.normal_(module.weight, mean, std) 23 | if hasattr(module, 'bias') and module.bias is not None: 24 | nn.init.constant_(module.bias, bias) 25 | 26 | 27 | def uniform_init(module, a=0, b=1, bias=0): 28 | nn.init.uniform_(module.weight, a, b) 29 | if hasattr(module, 'bias') and module.bias is not None: 30 | nn.init.constant_(module.bias, bias) 31 | 32 | 33 | def kaiming_init(module, 34 | a=0, 35 | mode='fan_out', 36 | nonlinearity='relu', 37 | bias=0, 38 | distribution='normal'): 39 | assert distribution in ['uniform', 'normal'] 40 | if distribution == 'uniform': 41 | nn.init.kaiming_uniform_( 42 | module.weight, a=a, mode=mode, nonlinearity=nonlinearity) 43 | else: 44 | nn.init.kaiming_normal_( 45 | module.weight, a=a, mode=mode, nonlinearity=nonlinearity) 46 | if hasattr(module, 'bias') and module.bias is not None: 47 | nn.init.constant_(module.bias, bias) 48 | 49 | 50 | def caffe2_xavier_init(module, bias=0): 51 | # `XavierFill` in Caffe2 corresponds to `kaiming_uniform_` in PyTorch 52 | # Acknowledgment to FAIR's internal code 53 | kaiming_init( 54 | module, 55 | a=1, 56 | mode='fan_in', 57 | nonlinearity='leaky_relu', 58 | distribution='uniform') 59 | 60 | 61 | def get_upsample_filter(size): 62 | """Make a 2D bilinear kernel suitable for upsampling""" 63 | factor = (size + 1) // 2 64 | if size % 2 == 1: 65 | center = factor - 1 66 | else: 67 | center = factor - 0.5 68 | og = np.ogrid[:size, :size] 69 | filter = (1 - abs(og[0] - center) / factor) * \ 70 | (1 - abs(og[1] - center) / factor) 71 | return torch.from_numpy(filter).float() 72 | -------------------------------------------------------------------------------- /classification/pretrained/README.md: -------------------------------------------------------------------------------- 1 | ## This folder should contain pretrained models. You can download the pretrained [`models`](https://drive.google.com/open?id=1GlImzw3_PRNFgieS-VsNWZRGqq-xGoKS) here. 2 | -------------------------------------------------------------------------------- /classification/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | Pillow 3 | matplotlib 4 | jpeg2dct 5 | sklearn 6 | PyTurboJPEG 7 | tensorboardX 8 | opencv-python 9 | lmdb 10 | pyarrow 11 | scikit-image 12 | -------------------------------------------------------------------------------- /classification/scripts/resnet_upscaled_static.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | subset=$1 3 | subset=${subset:-0} 4 | echo "subset: $subset" 5 | python main/imagenet_resnet_upscaled_static.py -j 16 --gpu-id 0,1,2,3 --arch ResNetDCT_Upscaled_Static --subset $subset --resume pretrained/resnet50dct_upscaled_static_$subset/model_best.pth.tar --data ./data --evaluate -------------------------------------------------------------------------------- /classification/utils/__init__.py: -------------------------------------------------------------------------------- 1 | """Useful utils 2 | """ 3 | from .misc import * 4 | from .logger import * 5 | from .visualize import * 6 | from .eval import * 7 | # progress bar 8 | import os, sys 9 | sys.path.append(os.path.join(os.path.dirname(__file__), "progress")) 10 | from .progress.progress.bar import Bar as Bar 11 | 12 | -------------------------------------------------------------------------------- /classification/utils/eval.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, absolute_import 2 | 3 | __all__ = ['accuracy'] 4 | 5 | def accuracy(output, target, topk=(1,)): 6 | """Computes the precision@k for the specified values of k""" 7 | maxk = max(topk) 8 | batch_size = target.size(0) 9 | 10 | _, pred = output.topk(maxk, 1, True, True) 11 | pred = pred.t() 12 | correct = pred.eq(target.view(1, -1).expand_as(pred)) 13 | 14 | res = [] 15 | for k in topk: 16 | correct_k = correct[:k].view(-1).float().sum(0) 17 | res.append(correct_k.mul_(100.0 / batch_size)) 18 | return res -------------------------------------------------------------------------------- /classification/utils/init_weights.py: -------------------------------------------------------------------------------- 1 | from torch.nn import init 2 | 3 | def weights_init_normal(m): 4 | classname = m.__class__.__name__ 5 | # print(classname) 6 | if classname.find('Conv') != -1: 7 | init.uniform_(m.weight.data, 0.0, 0.02) 8 | elif classname.find('Linear') != -1: 9 | init.uniform_(m.weight.data, 0.0, 0.02) 10 | elif classname.find('BatchNorm2d') != -1: 11 | init.uniform_(m.weight.data, 1.0, 0.02) 12 | init.constant_(m.bias.data, 0.0) 13 | 14 | def weights_init_xavier(m): 15 | classname = m.__class__.__name__ 16 | # print(classname) 17 | if classname.find('Conv') != -1: 18 | init.xavier_normal(m.weight.data, gain=1) 19 | elif classname.find('Linear') != -1: 20 | init.xavier_normal(m.weight.data, gain=1) 21 | elif classname.find('BatchNorm2d') != -1: 22 | init.uniform_(m.weight.data, 1.0, 0.02) 23 | init.constant_(m.bias.data, 0.0) 24 | 25 | 26 | def weights_init_kaiming(m): 27 | classname = m.__class__.__name__ 28 | # print(classname) 29 | if classname.find('Conv') != -1: 30 | init.kaiming_normal(m.weight.data, a=0, mode='fan_in') 31 | elif classname.find('Linear') != -1: 32 | init.kaiming_normal(m.weight.data, a=0, mode='fan_in') 33 | elif classname.find('BatchNorm2d') != -1: 34 | init.uniform_(m.weight.data, 1.0, 0.02) 35 | init.constant_(m.bias.data, 0.0) 36 | 37 | 38 | def weights_init_orthogonal(m): 39 | classname = m.__class__.__name__ 40 | print(classname) 41 | if classname.find('Conv') != -1: 42 | init.orthogonal(m.weight.data, gain=1) 43 | elif classname.find('Linear') != -1: 44 | init.orthogonal(m.weight.data, gain=1) 45 | elif classname.find('BatchNorm2d') != -1: 46 | init.uniform_(m.weight.data, 1.0, 0.02) 47 | init.constant_(m.bias.data, 0.0) 48 | 49 | 50 | def weights_init(net, init_type='normal'): 51 | print('initialization method [%s]' % init_type) 52 | if init_type == 'normal': 53 | net.apply(weights_init_normal) 54 | elif init_type == 'xavier': 55 | net.apply(weights_init_xavier) 56 | elif init_type == 'kaiming': 57 | net.apply(weights_init_kaiming) 58 | elif init_type == 'orthogonal': 59 | net.apply(weights_init_orthogonal) 60 | else: 61 | raise NotImplementedError('initialization method [%s] is not implemented' % init_type) 62 | -------------------------------------------------------------------------------- /classification/utils/progress/.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | *.egg-info 3 | build/ 4 | dist/ 5 | -------------------------------------------------------------------------------- /classification/utils/progress/LICENSE: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2012 Giorgos Verigakis 2 | # 3 | # Permission to use, copy, modify, and distribute this software for any 4 | # purpose with or without fee is hereby granted, provided that the above 5 | # copyright notice and this permission notice appear in all copies. 6 | # 7 | # THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 8 | # WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 9 | # MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 10 | # ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 11 | # WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 12 | # ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 13 | # OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 14 | -------------------------------------------------------------------------------- /classification/utils/progress/MANIFEST.in: -------------------------------------------------------------------------------- 1 | include README.rst LICENSE 2 | -------------------------------------------------------------------------------- /classification/utils/progress/README.rst: -------------------------------------------------------------------------------- 1 | Easy progress reporting for Python 2 | ================================== 3 | 4 | |pypi| 5 | 6 | |demo| 7 | 8 | .. |pypi| image:: https://img.shields.io/pypi/v/progress.svg 9 | .. |demo| image:: https://raw.github.com/verigak/progress/master/demo.gif 10 | :alt: Demo 11 | 12 | Bars 13 | ---- 14 | 15 | There are 7 progress bars to choose from: 16 | 17 | - ``Bar`` 18 | - ``ChargingBar`` 19 | - ``FillingSquaresBar`` 20 | - ``FillingCirclesBar`` 21 | - ``IncrementalBar`` 22 | - ``PixelBar`` 23 | - ``ShadyBar`` 24 | 25 | To use them, just call ``next`` to advance and ``finish`` to finish: 26 | 27 | .. code-block:: python 28 | 29 | from progress.bar import Bar 30 | 31 | bar = Bar('Processing', max=20) 32 | for i in range(20): 33 | # Do some work 34 | bar.next() 35 | bar.finish() 36 | 37 | The result will be a bar like the following: :: 38 | 39 | Processing |############# | 42/100 40 | 41 | To simplify the common case where the work is done in an iterator, you can 42 | use the ``iter`` method: 43 | 44 | .. code-block:: python 45 | 46 | for i in Bar('Processing').iter(it): 47 | # Do some work 48 | 49 | Progress bars are very customizable, you can change their width, their fill 50 | character, their suffix and more: 51 | 52 | .. code-block:: python 53 | 54 | bar = Bar('Loading', fill='@', suffix='%(percent)d%%') 55 | 56 | This will produce a bar like the following: :: 57 | 58 | Loading |@@@@@@@@@@@@@ | 42% 59 | 60 | You can use a number of template arguments in ``message`` and ``suffix``: 61 | 62 | ========== ================================ 63 | Name Value 64 | ========== ================================ 65 | index current value 66 | max maximum value 67 | remaining max - index 68 | progress index / max 69 | percent progress * 100 70 | avg simple moving average time per item (in seconds) 71 | elapsed elapsed time in seconds 72 | elapsed_td elapsed as a timedelta (useful for printing as a string) 73 | eta avg * remaining 74 | eta_td eta as a timedelta (useful for printing as a string) 75 | ========== ================================ 76 | 77 | Instead of passing all configuration options on instatiation, you can create 78 | your custom subclass: 79 | 80 | .. code-block:: python 81 | 82 | class FancyBar(Bar): 83 | message = 'Loading' 84 | fill = '*' 85 | suffix = '%(percent).1f%% - %(eta)ds' 86 | 87 | You can also override any of the arguments or create your own: 88 | 89 | .. code-block:: python 90 | 91 | class SlowBar(Bar): 92 | suffix = '%(remaining_hours)d hours remaining' 93 | @property 94 | def remaining_hours(self): 95 | return self.eta // 3600 96 | 97 | 98 | Spinners 99 | ======== 100 | 101 | For actions with an unknown number of steps you can use a spinner: 102 | 103 | .. code-block:: python 104 | 105 | from progress.spinner import Spinner 106 | 107 | spinner = Spinner('Loading ') 108 | while state != 'FINISHED': 109 | # Do some work 110 | spinner.next() 111 | 112 | There are 5 predefined spinners: 113 | 114 | - ``Spinner`` 115 | - ``PieSpinner`` 116 | - ``MoonSpinner`` 117 | - ``LineSpinner`` 118 | - ``PixelSpinner`` 119 | 120 | 121 | Other 122 | ===== 123 | 124 | There are a number of other classes available too, please check the source or 125 | subclass one of them to create your own. 126 | 127 | 128 | License 129 | ======= 130 | 131 | progress is licensed under ISC 132 | -------------------------------------------------------------------------------- /classification/utils/progress/demo.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PSCLab-ASU/Learning-in-the-Frequency-Domain/a1e620fd12a023c8288135050e76574a431888bd/classification/utils/progress/demo.gif -------------------------------------------------------------------------------- /classification/utils/progress/progress/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2012 Giorgos Verigakis 2 | # 3 | # Permission to use, copy, modify, and distribute this software for any 4 | # purpose with or without fee is hereby granted, provided that the above 5 | # copyright notice and this permission notice appear in all copies. 6 | # 7 | # THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 8 | # WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 9 | # MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 10 | # ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 11 | # WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 12 | # ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 13 | # OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 14 | 15 | from __future__ import division 16 | 17 | from collections import deque 18 | from datetime import timedelta 19 | from math import ceil 20 | from sys import stderr 21 | from time import time 22 | 23 | 24 | __version__ = '1.3' 25 | 26 | 27 | class Infinite(object): 28 | file = stderr 29 | sma_window = 10 # Simple Moving Average window 30 | 31 | def __init__(self, *args, **kwargs): 32 | self.index = 0 33 | self.start_ts = time() 34 | self.avg = 0 35 | self._ts = self.start_ts 36 | self._xput = deque(maxlen=self.sma_window) 37 | for key, val in kwargs.items(): 38 | setattr(self, key, val) 39 | 40 | def __getitem__(self, key): 41 | if key.startswith('_'): 42 | return None 43 | return getattr(self, key, None) 44 | 45 | @property 46 | def elapsed(self): 47 | return int(time() - self.start_ts) 48 | 49 | @property 50 | def elapsed_td(self): 51 | return timedelta(seconds=self.elapsed) 52 | 53 | def update_avg(self, n, dt): 54 | if n > 0: 55 | self._xput.append(dt / n) 56 | self.avg = sum(self._xput) / len(self._xput) 57 | 58 | def update(self): 59 | pass 60 | 61 | def start(self): 62 | pass 63 | 64 | def finish(self): 65 | pass 66 | 67 | def next(self, n=1): 68 | now = time() 69 | dt = now - self._ts 70 | self.update_avg(n, dt) 71 | self._ts = now 72 | self.index = self.index + n 73 | self.update() 74 | 75 | def iter(self, it): 76 | try: 77 | for x in it: 78 | yield x 79 | self.next() 80 | finally: 81 | self.finish() 82 | 83 | 84 | class Progress(Infinite): 85 | def __init__(self, *args, **kwargs): 86 | super(Progress, self).__init__(*args, **kwargs) 87 | self.max = kwargs.get('max', 100) 88 | 89 | @property 90 | def eta(self): 91 | return int(ceil(self.avg * self.remaining)) 92 | 93 | @property 94 | def eta_td(self): 95 | return timedelta(seconds=self.eta) 96 | 97 | @property 98 | def percent(self): 99 | return self.progress * 100 100 | 101 | @property 102 | def progress(self): 103 | return min(1, self.index / self.max) 104 | 105 | @property 106 | def remaining(self): 107 | return max(self.max - self.index, 0) 108 | 109 | def start(self): 110 | self.update() 111 | 112 | def goto(self, index): 113 | incr = index - self.index 114 | self.next(incr) 115 | 116 | def iter(self, it): 117 | try: 118 | self.max = len(it) 119 | except TypeError: 120 | pass 121 | 122 | try: 123 | for x in it: 124 | yield x 125 | self.next() 126 | finally: 127 | self.finish() 128 | -------------------------------------------------------------------------------- /classification/utils/progress/progress/bar.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Copyright (c) 2012 Giorgos Verigakis 4 | # 5 | # Permission to use, copy, modify, and distribute this software for any 6 | # purpose with or without fee is hereby granted, provided that the above 7 | # copyright notice and this permission notice appear in all copies. 8 | # 9 | # THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 10 | # WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11 | # MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 12 | # ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13 | # WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14 | # ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15 | # OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 | 17 | from __future__ import unicode_literals 18 | from . import Progress 19 | from .helpers import WritelnMixin 20 | 21 | 22 | class Bar(WritelnMixin, Progress): 23 | width = 32 24 | message = '' 25 | suffix = '%(index)d/%(max)d' 26 | bar_prefix = ' |' 27 | bar_suffix = '| ' 28 | empty_fill = ' ' 29 | fill = '#' 30 | hide_cursor = True 31 | 32 | def update(self): 33 | filled_length = int(self.width * self.progress) 34 | empty_length = self.width - filled_length 35 | 36 | message = self.message % self 37 | bar = self.fill * filled_length 38 | empty = self.empty_fill * empty_length 39 | suffix = self.suffix % self 40 | line = ''.join([message, self.bar_prefix, bar, empty, self.bar_suffix, 41 | suffix]) 42 | self.writeln(line) 43 | 44 | 45 | class ChargingBar(Bar): 46 | suffix = '%(percent)d%%' 47 | bar_prefix = ' ' 48 | bar_suffix = ' ' 49 | empty_fill = '∙' 50 | fill = '█' 51 | 52 | 53 | class FillingSquaresBar(ChargingBar): 54 | empty_fill = '▢' 55 | fill = '▣' 56 | 57 | 58 | class FillingCirclesBar(ChargingBar): 59 | empty_fill = '◯' 60 | fill = '◉' 61 | 62 | 63 | class IncrementalBar(Bar): 64 | phases = (' ', '▏', '▎', '▍', '▌', '▋', '▊', '▉', '█') 65 | 66 | def update(self): 67 | nphases = len(self.phases) 68 | filled_len = self.width * self.progress 69 | nfull = int(filled_len) # Number of full chars 70 | phase = int((filled_len - nfull) * nphases) # Phase of last char 71 | nempty = self.width - nfull # Number of empty chars 72 | 73 | message = self.message % self 74 | bar = self.phases[-1] * nfull 75 | current = self.phases[phase] if phase > 0 else '' 76 | empty = self.empty_fill * max(0, nempty - len(current)) 77 | suffix = self.suffix % self 78 | line = ''.join([message, self.bar_prefix, bar, current, empty, 79 | self.bar_suffix, suffix]) 80 | self.writeln(line) 81 | 82 | 83 | class PixelBar(IncrementalBar): 84 | phases = ('⡀', '⡄', '⡆', '⡇', '⣇', '⣧', '⣷', '⣿') 85 | 86 | 87 | class ShadyBar(IncrementalBar): 88 | phases = (' ', '░', '▒', '▓', '█') 89 | -------------------------------------------------------------------------------- /classification/utils/progress/progress/counter.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Copyright (c) 2012 Giorgos Verigakis 4 | # 5 | # Permission to use, copy, modify, and distribute this software for any 6 | # purpose with or without fee is hereby granted, provided that the above 7 | # copyright notice and this permission notice appear in all copies. 8 | # 9 | # THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 10 | # WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11 | # MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 12 | # ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13 | # WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14 | # ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15 | # OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 | 17 | from __future__ import unicode_literals 18 | from . import Infinite, Progress 19 | from .helpers import WriteMixin 20 | 21 | 22 | class Counter(WriteMixin, Infinite): 23 | message = '' 24 | hide_cursor = True 25 | 26 | def update(self): 27 | self.write(str(self.index)) 28 | 29 | 30 | class Countdown(WriteMixin, Progress): 31 | hide_cursor = True 32 | 33 | def update(self): 34 | self.write(str(self.remaining)) 35 | 36 | 37 | class Stack(WriteMixin, Progress): 38 | phases = (' ', '▁', '▂', '▃', '▄', '▅', '▆', '▇', '█') 39 | hide_cursor = True 40 | 41 | def update(self): 42 | nphases = len(self.phases) 43 | i = min(nphases - 1, int(self.progress * nphases)) 44 | self.write(self.phases[i]) 45 | 46 | 47 | class Pie(Stack): 48 | phases = ('○', '◔', '◑', '◕', '●') 49 | -------------------------------------------------------------------------------- /classification/utils/progress/progress/helpers.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2012 Giorgos Verigakis 2 | # 3 | # Permission to use, copy, modify, and distribute this software for any 4 | # purpose with or without fee is hereby granted, provided that the above 5 | # copyright notice and this permission notice appear in all copies. 6 | # 7 | # THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 8 | # WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 9 | # MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 10 | # ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 11 | # WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 12 | # ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 13 | # OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 14 | 15 | from __future__ import print_function 16 | 17 | 18 | HIDE_CURSOR = '\x1b[?25l' 19 | SHOW_CURSOR = '\x1b[?25h' 20 | 21 | 22 | class WriteMixin(object): 23 | hide_cursor = False 24 | 25 | def __init__(self, message=None, **kwargs): 26 | super(WriteMixin, self).__init__(**kwargs) 27 | self._width = 0 28 | if message: 29 | self.message = message 30 | 31 | if self.file.isatty(): 32 | if self.hide_cursor: 33 | print(HIDE_CURSOR, end='', file=self.file) 34 | print(self.message, end='', file=self.file) 35 | self.file.flush() 36 | 37 | def write(self, s): 38 | if self.file.isatty(): 39 | b = '\b' * self._width 40 | c = s.ljust(self._width) 41 | print(b + c, end='', file=self.file) 42 | self._width = max(self._width, len(s)) 43 | self.file.flush() 44 | 45 | def finish(self): 46 | if self.file.isatty() and self.hide_cursor: 47 | print(SHOW_CURSOR, end='', file=self.file) 48 | 49 | 50 | class WritelnMixin(object): 51 | hide_cursor = False 52 | 53 | def __init__(self, message=None, **kwargs): 54 | super(WritelnMixin, self).__init__(**kwargs) 55 | if message: 56 | self.message = message 57 | 58 | if self.file.isatty() and self.hide_cursor: 59 | print(HIDE_CURSOR, end='', file=self.file) 60 | 61 | def clearln(self): 62 | if self.file.isatty(): 63 | print('\r\x1b[K', end='', file=self.file) 64 | 65 | def writeln(self, line): 66 | if self.file.isatty(): 67 | self.clearln() 68 | print(line, end='', file=self.file) 69 | self.file.flush() 70 | 71 | def finish(self): 72 | if self.file.isatty(): 73 | print(file=self.file) 74 | if self.hide_cursor: 75 | print(SHOW_CURSOR, end='', file=self.file) 76 | 77 | 78 | from signal import signal, SIGINT 79 | from sys import exit 80 | 81 | 82 | class SigIntMixin(object): 83 | """Registers a signal handler that calls finish on SIGINT""" 84 | 85 | def __init__(self, *args, **kwargs): 86 | super(SigIntMixin, self).__init__(*args, **kwargs) 87 | signal(SIGINT, self._sigint_handler) 88 | 89 | def _sigint_handler(self, signum, frame): 90 | self.finish() 91 | exit(0) 92 | -------------------------------------------------------------------------------- /classification/utils/progress/progress/spinner.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Copyright (c) 2012 Giorgos Verigakis 4 | # 5 | # Permission to use, copy, modify, and distribute this software for any 6 | # purpose with or without fee is hereby granted, provided that the above 7 | # copyright notice and this permission notice appear in all copies. 8 | # 9 | # THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 10 | # WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11 | # MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 12 | # ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13 | # WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14 | # ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15 | # OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 | 17 | from __future__ import unicode_literals 18 | from . import Infinite 19 | from .helpers import WriteMixin 20 | 21 | 22 | class Spinner(WriteMixin, Infinite): 23 | message = '' 24 | phases = ('-', '\\', '|', '/') 25 | hide_cursor = True 26 | 27 | def update(self): 28 | i = self.index % len(self.phases) 29 | self.write(self.phases[i]) 30 | 31 | 32 | class PieSpinner(Spinner): 33 | phases = ['◷', '◶', '◵', '◴'] 34 | 35 | 36 | class MoonSpinner(Spinner): 37 | phases = ['◑', '◒', '◐', '◓'] 38 | 39 | 40 | class LineSpinner(Spinner): 41 | phases = ['⎺', '⎻', '⎼', '⎽', '⎼', '⎻'] 42 | 43 | class PixelSpinner(Spinner): 44 | phases = ['⣾','⣷', '⣯', '⣟', '⡿', '⢿', '⣻', '⣽'] 45 | -------------------------------------------------------------------------------- /classification/utils/progress/setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from setuptools import setup 4 | 5 | import progress 6 | 7 | 8 | setup( 9 | name='progress', 10 | version=progress.__version__, 11 | description='Easy to use progress bars', 12 | long_description=open('README.rst').read(), 13 | author='Giorgos Verigakis', 14 | author_email='verigak@gmail.com', 15 | url='http://github.com/verigak/progress/', 16 | license='ISC', 17 | packages=['progress'], 18 | classifiers=[ 19 | 'Environment :: Console', 20 | 'Intended Audience :: Developers', 21 | 'License :: OSI Approved :: ISC License (ISCL)', 22 | 'Programming Language :: Python :: 2.6', 23 | 'Programming Language :: Python :: 2.7', 24 | 'Programming Language :: Python :: 3.3', 25 | 'Programming Language :: Python :: 3.4', 26 | 'Programming Language :: Python :: 3.5', 27 | 'Programming Language :: Python :: 3.6', 28 | ] 29 | ) 30 | -------------------------------------------------------------------------------- /classification/utils/progress/test_progress.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from __future__ import print_function 4 | 5 | import random 6 | import time 7 | 8 | from progress.bar import (Bar, ChargingBar, FillingSquaresBar, 9 | FillingCirclesBar, IncrementalBar, PixelBar, 10 | ShadyBar) 11 | from progress.spinner import (Spinner, PieSpinner, MoonSpinner, LineSpinner, 12 | PixelSpinner) 13 | from progress.counter import Counter, Countdown, Stack, Pie 14 | 15 | 16 | def sleep(): 17 | t = 0.01 18 | t += t * random.uniform(-0.1, 0.1) # Add some variance 19 | time.sleep(t) 20 | 21 | 22 | for bar_cls in (Bar, ChargingBar, FillingSquaresBar, FillingCirclesBar): 23 | suffix = '%(index)d/%(max)d [%(elapsed)d / %(eta)d / %(eta_td)s]' 24 | bar = bar_cls(bar_cls.__name__, suffix=suffix) 25 | for i in bar.iter(range(200)): 26 | sleep() 27 | 28 | for bar_cls in (IncrementalBar, PixelBar, ShadyBar): 29 | suffix = '%(percent)d%% [%(elapsed_td)s / %(eta)d / %(eta_td)s]' 30 | bar = bar_cls(bar_cls.__name__, suffix=suffix) 31 | for i in bar.iter(range(200)): 32 | sleep() 33 | 34 | for spin in (Spinner, PieSpinner, MoonSpinner, LineSpinner, PixelSpinner): 35 | for i in spin(spin.__name__ + ' ').iter(range(100)): 36 | sleep() 37 | print() 38 | 39 | for singleton in (Counter, Countdown, Stack, Pie): 40 | for i in singleton(singleton.__name__ + ' ').iter(range(100)): 41 | sleep() 42 | print() 43 | 44 | bar = IncrementalBar('Random', suffix='%(index)d') 45 | for i in range(100): 46 | bar.goto(random.randint(0, 100)) 47 | sleep() 48 | bar.finish() 49 | -------------------------------------------------------------------------------- /segmentation/.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | db.sqlite3 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # Environments 85 | .env 86 | .venv 87 | env/ 88 | venv/ 89 | ENV/ 90 | env.bak/ 91 | venv.bak/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | .spyproject 96 | 97 | # Rope project settings 98 | .ropeproject 99 | 100 | # mkdocs documentation 101 | /site 102 | 103 | # mypy 104 | .mypy_cache/ 105 | 106 | # cython generated cpp 107 | mmdet/ops/nms/src/soft_nms_cpu.cpp 108 | mmdet/version.py 109 | .vscode 110 | .idea 111 | 112 | # custom 113 | *.pkl 114 | *.pkl.json 115 | *.log.json 116 | 117 | 118 | -------------------------------------------------------------------------------- /segmentation/.isort.cfg: -------------------------------------------------------------------------------- 1 | [isort] 2 | line_length = 79 3 | multi_line_output = 0 4 | known_first_party = mmdet 5 | known_third_party = mmcv,numpy,matplotlib,pycocotools,six,seaborn,terminaltables,torch,torchvision 6 | no_lines_before = STDLIB,LOCALFOLDER 7 | default_section = THIRDPARTY -------------------------------------------------------------------------------- /segmentation/.style.yapf: -------------------------------------------------------------------------------- 1 | [style] 2 | BASED_ON_STYLE = pep8 3 | BLANK_LINE_BEFORE_NESTED_CLASS_OR_DEF = true 4 | SPLIT_BEFORE_EXPRESSION_AFTER_OPENING_PAREN = true 5 | -------------------------------------------------------------------------------- /segmentation/.travis.yml: -------------------------------------------------------------------------------- 1 | dist: xenial 2 | language: python 3 | 4 | python: 5 | - "3.5" 6 | - "3.6" 7 | - "3.7" 8 | 9 | env: CUDA=9.2.148-1 CUDA_SHORT=9.2 UBUNTU_VERSION=ubuntu1604 10 | 11 | # Ref to CUDA installation in Travis: https://github.com/jeremad/cuda-travis 12 | before_install: 13 | - INSTALLER=cuda-repo-${UBUNTU_VERSION}_${CUDA}_amd64.deb 14 | - wget http://developer.download.nvidia.com/compute/cuda/repos/${UBUNTU_VERSION}/x86_64/${INSTALLER} 15 | - sudo dpkg -i ${INSTALLER} 16 | - wget https://developer.download.nvidia.com/compute/cuda/repos/${UBUNTU_VERSION}/x86_64/7fa2af80.pub 17 | - sudo apt-key add 7fa2af80.pub 18 | - sudo apt update -qq 19 | - sudo apt install -y cuda-${CUDA_SHORT/./-} cuda-cufft-dev-${CUDA_SHORT/./-} 20 | - sudo apt clean 21 | - CUDA_HOME=/usr/local/cuda-${CUDA_SHORT} 22 | - LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${CUDA_HOME}/include:${LD_LIBRARY_PATH} 23 | - PATH=${CUDA_HOME}/bin:${PATH} 24 | 25 | install: 26 | - pip install Cython 27 | - pip install -r requirements.txt 28 | - pip install -r tests/requirements.txt 29 | 30 | script: 31 | - flake8 . 32 | - isort -rc --check-only --diff mmdet/ tools/ tests/ 33 | - yapf -r -d --style .style.yapf mmdet/ tools/ tests/ 34 | - python setup.py check -m -s 35 | - python setup.py build_ext --inplace 36 | - coverage run --source mmdet -m py.test tests -v --doctest-modules 37 | 38 | after_success: 39 | - coverage report -------------------------------------------------------------------------------- /segmentation/data/coco/README.md: -------------------------------------------------------------------------------- 1 | ## This folder should contain the COCO2017 dataset. You can download the dataset [`dataset`](http://cocodataset.org/#download) here. 2 | -------------------------------------------------------------------------------- /segmentation/docker/Dockerfile: -------------------------------------------------------------------------------- 1 | ARG PYTORCH="1.1.0" 2 | ARG CUDA="10.0" 3 | ARG CUDNN="7.5" 4 | 5 | FROM pytorch/pytorch:${PYTORCH}-cuda${CUDA}-cudnn${CUDNN}-devel 6 | 7 | RUN apt-get update && apt-get install -y libglib2.0-0 libsm6 libxrender-dev libxext6 \ 8 | && apt-get clean \ 9 | && rm -rf /var/lib/apt/lists/* 10 | 11 | # Install mmdetection 12 | RUN conda install cython -y && conda clean --all 13 | RUN git clone https://github.com/open-mmlab/mmdetection.git /mmdetection 14 | WORKDIR /mmdetection 15 | RUN pip install --no-cache-dir -e . 16 | -------------------------------------------------------------------------------- /segmentation/docs/DATA_PIPELINE.md: -------------------------------------------------------------------------------- 1 | ## Data preparation pipeline 2 | 3 | The data preparation pipeline and the dataset is decomposed. Usually a dataset 4 | defines how to process the annotations and a data pipeline defines all the steps to prepare a data dict. 5 | A pipeline consists of a sequence of operations. Each operation takes a dict as input and also output a dict for the next transform. 6 | 7 | We present a classical pipeline in the following figure. The blue blocks are pipeline operations. With the pipeline going on, each operator can add new keys (marked as green) to the result dict or update the existing keys (marked as orange). 8 | ![pipeline figure](../demo/data_pipeline.png) 9 | 10 | The operations are categorized into data loading, pre-processing, formatting and test-time augmentation. 11 | 12 | Here is an pipeline example for Faster R-CNN. 13 | ```python 14 | img_norm_cfg = dict( 15 | mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) 16 | train_pipeline = [ 17 | dict(type='LoadImageFromFile'), 18 | dict(type='LoadAnnotations', with_bbox=True), 19 | dict(type='Resize', img_scale=(1333, 800), keep_ratio=True), 20 | dict(type='RandomFlip', flip_ratio=0.5), 21 | dict(type='Normalize', **img_norm_cfg), 22 | dict(type='Pad', size_divisor=32), 23 | dict(type='DefaultFormatBundle'), 24 | dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), 25 | ] 26 | test_pipeline = [ 27 | dict(type='LoadImageFromFile'), 28 | dict( 29 | type='MultiScaleFlipAug', 30 | img_scale=(1333, 800), 31 | flip=False, 32 | transforms=[ 33 | dict(type='Resize', keep_ratio=True), 34 | dict(type='RandomFlip'), 35 | dict(type='Normalize', **img_norm_cfg), 36 | dict(type='Pad', size_divisor=32), 37 | dict(type='ImageToTensor', keys=['img']), 38 | dict(type='Collect', keys=['img']), 39 | ]) 40 | ] 41 | ``` 42 | 43 | For each operation, we list the related dict fields that are added/updated/removed. 44 | 45 | ### Data loading 46 | 47 | `LoadImageFromFile` 48 | - add: img, img_shape, ori_shape 49 | 50 | `LoadAnnotations` 51 | - add: gt_bboxes, gt_bboxes_ignore, gt_labels, gt_masks, gt_semantic_seg, bbox_fields, mask_fields 52 | 53 | `LoadProposals` 54 | - add: proposals 55 | 56 | ### Pre-processing 57 | 58 | `Resize` 59 | - add: scale, scale_idx, pad_shape, scale_factor, keep_ratio 60 | - update: img, img_shape, *bbox_fields, *mask_fields 61 | 62 | `RandomFlip` 63 | - add: flip 64 | - update: img, *bbox_fields, *mask_fields 65 | 66 | `Pad` 67 | - add: pad_fixed_size, pad_size_divisor 68 | - update: img, pad_shape, *mask_fields 69 | 70 | `RandomCrop` 71 | - update: img, pad_shape, gt_bboxes, gt_labels, gt_masks, *bbox_fields 72 | 73 | `Normalize` 74 | - add: img_norm_cfg 75 | - update: img 76 | 77 | `SegResizeFlipPadRescale` 78 | - update: gt_semantic_seg 79 | 80 | `PhotoMetricDistortion` 81 | - update: img 82 | 83 | `Expand` 84 | - update: img, gt_bboxes 85 | 86 | `MinIoURandomCrop` 87 | - update: img, gt_bboxes, gt_labels 88 | 89 | `Corrupt` 90 | - update: img 91 | 92 | ### Formatting 93 | 94 | `ToTensor` 95 | - update: specified by `keys`. 96 | 97 | `ImageToTensor` 98 | - update: specified by `keys`. 99 | 100 | `Transpose` 101 | - update: specified by `keys`. 102 | 103 | `ToDataContainer` 104 | - update: specified by `fields`. 105 | 106 | `DefaultFormatBundle` 107 | - update: img, proposals, gt_bboxes, gt_bboxes_ignore, gt_labels, gt_masks, gt_semantic_seg 108 | 109 | `Collect` 110 | - add: img_meta (the keys of img_meta is specified by `meta_keys`) 111 | - remove: all other keys except for those specified by `keys` 112 | 113 | ### Test time augmentation 114 | 115 | `MultiScaleFlipAug` -------------------------------------------------------------------------------- /segmentation/install_libjpegturbo.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # Install libjpeg-turbo 4 | sudo apt install -y nasm 5 | sudo apt install -y cmake 6 | sudo apt install -y libsm6 libxext6 libxrender-dev 7 | 8 | wget https://downloads.sourceforge.net/libjpeg-turbo/libjpeg-turbo-2.0.3.tar.gz 9 | tar xvf libjpeg-turbo-2.0.3.tar.gz && 10 | cd libjpeg-turbo-2.0.3 && 11 | 12 | mkdir build && 13 | cd build && 14 | 15 | cmake -DCMAKE_INSTALL_PREFIX=/usr \ 16 | -DCMAKE_BUILD_TYPE=RELEASE \ 17 | -DENABLE_STATIC=FALSE \ 18 | -DCMAKE_INSTALL_DOCDIR=/usr/share/doc/libjpeg-turbo-2.0.3 \ 19 | -DCMAKE_INSTALL_DEFAULT_LIBDIR=lib \ 20 | .. && 21 | make 22 | make install 23 | 24 | -------------------------------------------------------------------------------- /segmentation/mmdet/__init__.py: -------------------------------------------------------------------------------- 1 | from .version import __version__, short_version 2 | 3 | __all__ = ['__version__', 'short_version'] 4 | -------------------------------------------------------------------------------- /segmentation/mmdet/apis/__init__.py: -------------------------------------------------------------------------------- 1 | from .env import get_root_logger, init_dist, set_random_seed 2 | from .inference import (inference_detector, init_detector, show_result, 3 | show_result_pyplot) 4 | from .train import train_detector 5 | 6 | __all__ = [ 7 | 'init_dist', 'get_root_logger', 'set_random_seed', 'train_detector', 8 | 'init_detector', 'inference_detector', 'show_result', 'show_result_pyplot' 9 | ] 10 | -------------------------------------------------------------------------------- /segmentation/mmdet/apis/env.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | import random 4 | import subprocess 5 | 6 | import numpy as np 7 | import torch 8 | import torch.distributed as dist 9 | import torch.multiprocessing as mp 10 | from mmcv.runner import get_dist_info 11 | 12 | 13 | def init_dist(launcher, backend='nccl', **kwargs): 14 | if mp.get_start_method(allow_none=True) is None: 15 | mp.set_start_method('spawn') 16 | if launcher == 'pytorch': 17 | _init_dist_pytorch(backend, **kwargs) 18 | elif launcher == 'mpi': 19 | _init_dist_mpi(backend, **kwargs) 20 | elif launcher == 'slurm': 21 | _init_dist_slurm(backend, **kwargs) 22 | else: 23 | raise ValueError('Invalid launcher type: {}'.format(launcher)) 24 | 25 | 26 | def _init_dist_pytorch(backend, **kwargs): 27 | # TODO: use local_rank instead of rank % num_gpus 28 | rank = int(os.environ['RANK']) 29 | num_gpus = torch.cuda.device_count() 30 | torch.cuda.set_device(rank % num_gpus) 31 | dist.init_process_group(backend=backend, **kwargs) 32 | 33 | 34 | def _init_dist_mpi(backend, **kwargs): 35 | raise NotImplementedError 36 | 37 | 38 | def _init_dist_slurm(backend, port=29500, **kwargs): 39 | proc_id = int(os.environ['SLURM_PROCID']) 40 | ntasks = int(os.environ['SLURM_NTASKS']) 41 | node_list = os.environ['SLURM_NODELIST'] 42 | num_gpus = torch.cuda.device_count() 43 | torch.cuda.set_device(proc_id % num_gpus) 44 | addr = subprocess.getoutput( 45 | 'scontrol show hostname {} | head -n1'.format(node_list)) 46 | os.environ['MASTER_PORT'] = str(port) 47 | os.environ['MASTER_ADDR'] = addr 48 | os.environ['WORLD_SIZE'] = str(ntasks) 49 | os.environ['RANK'] = str(proc_id) 50 | dist.init_process_group(backend=backend) 51 | 52 | 53 | def set_random_seed(seed): 54 | random.seed(seed) 55 | np.random.seed(seed) 56 | torch.manual_seed(seed) 57 | torch.cuda.manual_seed_all(seed) 58 | 59 | 60 | def get_root_logger(log_level=logging.INFO): 61 | logger = logging.getLogger() 62 | if not logger.hasHandlers(): 63 | logging.basicConfig( 64 | format='%(asctime)s - %(levelname)s - %(message)s', 65 | level=log_level) 66 | rank, _ = get_dist_info() 67 | if rank != 0: 68 | logger.setLevel('ERROR') 69 | return logger 70 | -------------------------------------------------------------------------------- /segmentation/mmdet/core/__init__.py: -------------------------------------------------------------------------------- 1 | from .anchor import * # noqa: F401, F403 2 | from .bbox import * # noqa: F401, F403 3 | from .evaluation import * # noqa: F401, F403 4 | from .fp16 import * # noqa: F401, F403 5 | from .mask import * # noqa: F401, F403 6 | from .post_processing import * # noqa: F401, F403 7 | from .utils import * # noqa: F401, F403 8 | -------------------------------------------------------------------------------- /segmentation/mmdet/core/anchor/__init__.py: -------------------------------------------------------------------------------- 1 | from .anchor_generator import AnchorGenerator 2 | from .anchor_target import anchor_inside_flags, anchor_target 3 | from .guided_anchor_target import ga_loc_target, ga_shape_target 4 | from .point_generator import PointGenerator 5 | from .point_target import point_target 6 | 7 | __all__ = [ 8 | 'AnchorGenerator', 'anchor_target', 'anchor_inside_flags', 'ga_loc_target', 9 | 'ga_shape_target', 'PointGenerator', 'point_target' 10 | ] 11 | -------------------------------------------------------------------------------- /segmentation/mmdet/core/anchor/anchor_generator.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | class AnchorGenerator(object): 5 | 6 | def __init__(self, base_size, scales, ratios, scale_major=True, ctr=None): 7 | self.base_size = base_size 8 | self.scales = torch.Tensor(scales) 9 | self.ratios = torch.Tensor(ratios) 10 | self.scale_major = scale_major 11 | self.ctr = ctr 12 | self.base_anchors = self.gen_base_anchors() 13 | 14 | @property 15 | def num_base_anchors(self): 16 | return self.base_anchors.size(0) 17 | 18 | def gen_base_anchors(self): 19 | w = self.base_size 20 | h = self.base_size 21 | if self.ctr is None: 22 | x_ctr = 0.5 * (w - 1) 23 | y_ctr = 0.5 * (h - 1) 24 | else: 25 | x_ctr, y_ctr = self.ctr 26 | 27 | h_ratios = torch.sqrt(self.ratios) 28 | w_ratios = 1 / h_ratios 29 | if self.scale_major: 30 | ws = (w * w_ratios[:, None] * self.scales[None, :]).view(-1) 31 | hs = (h * h_ratios[:, None] * self.scales[None, :]).view(-1) 32 | else: 33 | ws = (w * self.scales[:, None] * w_ratios[None, :]).view(-1) 34 | hs = (h * self.scales[:, None] * h_ratios[None, :]).view(-1) 35 | 36 | # yapf: disable 37 | base_anchors = torch.stack( 38 | [ 39 | x_ctr - 0.5 * (ws - 1), y_ctr - 0.5 * (hs - 1), 40 | x_ctr + 0.5 * (ws - 1), y_ctr + 0.5 * (hs - 1) 41 | ], 42 | dim=-1).round() 43 | # yapf: enable 44 | 45 | return base_anchors 46 | 47 | def _meshgrid(self, x, y, row_major=True): 48 | xx = x.repeat(len(y)) 49 | yy = y.view(-1, 1).repeat(1, len(x)).view(-1) 50 | if row_major: 51 | return xx, yy 52 | else: 53 | return yy, xx 54 | 55 | def grid_anchors(self, featmap_size, stride=16, device='cuda'): 56 | base_anchors = self.base_anchors.to(device) 57 | 58 | feat_h, feat_w = featmap_size 59 | shift_x = torch.arange(0, feat_w, device=device) * stride 60 | shift_y = torch.arange(0, feat_h, device=device) * stride 61 | shift_xx, shift_yy = self._meshgrid(shift_x, shift_y) 62 | shifts = torch.stack([shift_xx, shift_yy, shift_xx, shift_yy], dim=-1) 63 | shifts = shifts.type_as(base_anchors) 64 | # first feat_w elements correspond to the first row of shifts 65 | # add A anchors (1, A, 4) to K shifts (K, 1, 4) to get 66 | # shifted anchors (K, A, 4), reshape to (K*A, 4) 67 | 68 | all_anchors = base_anchors[None, :, :] + shifts[:, None, :] 69 | all_anchors = all_anchors.view(-1, 4) 70 | # first A rows correspond to A anchors of (0, 0) in feature map, 71 | # then (0, 1), (0, 2), ... 72 | return all_anchors 73 | 74 | def valid_flags(self, featmap_size, valid_size, device='cuda'): 75 | feat_h, feat_w = featmap_size 76 | valid_h, valid_w = valid_size 77 | assert valid_h <= feat_h and valid_w <= feat_w 78 | valid_x = torch.zeros(feat_w, dtype=torch.uint8, device=device) 79 | valid_y = torch.zeros(feat_h, dtype=torch.uint8, device=device) 80 | valid_x[:valid_w] = 1 81 | valid_y[:valid_h] = 1 82 | valid_xx, valid_yy = self._meshgrid(valid_x, valid_y) 83 | valid = valid_xx & valid_yy 84 | valid = valid[:, None].expand( 85 | valid.size(0), self.num_base_anchors).contiguous().view(-1) 86 | return valid 87 | -------------------------------------------------------------------------------- /segmentation/mmdet/core/anchor/point_generator.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | class PointGenerator(object): 5 | 6 | def _meshgrid(self, x, y, row_major=True): 7 | xx = x.repeat(len(y)) 8 | yy = y.view(-1, 1).repeat(1, len(x)).view(-1) 9 | if row_major: 10 | return xx, yy 11 | else: 12 | return yy, xx 13 | 14 | def grid_points(self, featmap_size, stride=16, device='cuda'): 15 | feat_h, feat_w = featmap_size 16 | shift_x = torch.arange(0., feat_w, device=device) * stride 17 | shift_y = torch.arange(0., feat_h, device=device) * stride 18 | shift_xx, shift_yy = self._meshgrid(shift_x, shift_y) 19 | stride = shift_x.new_full((shift_xx.shape[0], ), stride) 20 | shifts = torch.stack([shift_xx, shift_yy, stride], dim=-1) 21 | all_points = shifts.to(device) 22 | return all_points 23 | 24 | def valid_flags(self, featmap_size, valid_size, device='cuda'): 25 | feat_h, feat_w = featmap_size 26 | valid_h, valid_w = valid_size 27 | assert valid_h <= feat_h and valid_w <= feat_w 28 | valid_x = torch.zeros(feat_w, dtype=torch.uint8, device=device) 29 | valid_y = torch.zeros(feat_h, dtype=torch.uint8, device=device) 30 | valid_x[:valid_w] = 1 31 | valid_y[:valid_h] = 1 32 | valid_xx, valid_yy = self._meshgrid(valid_x, valid_y) 33 | valid = valid_xx & valid_yy 34 | return valid 35 | -------------------------------------------------------------------------------- /segmentation/mmdet/core/bbox/__init__.py: -------------------------------------------------------------------------------- 1 | from .assigners import AssignResult, BaseAssigner, MaxIoUAssigner 2 | from .bbox_target import bbox_target 3 | from .geometry import bbox_overlaps 4 | from .samplers import (BaseSampler, CombinedSampler, 5 | InstanceBalancedPosSampler, IoUBalancedNegSampler, 6 | PseudoSampler, RandomSampler, SamplingResult) 7 | from .transforms import (bbox2delta, bbox2result, bbox2roi, bbox_flip, 8 | bbox_mapping, bbox_mapping_back, delta2bbox, 9 | distance2bbox, roi2bbox) 10 | 11 | from .assign_sampling import ( # isort:skip, avoid recursive imports 12 | assign_and_sample, build_assigner, build_sampler) 13 | 14 | __all__ = [ 15 | 'bbox_overlaps', 'BaseAssigner', 'MaxIoUAssigner', 'AssignResult', 16 | 'BaseSampler', 'PseudoSampler', 'RandomSampler', 17 | 'InstanceBalancedPosSampler', 'IoUBalancedNegSampler', 'CombinedSampler', 18 | 'SamplingResult', 'build_assigner', 'build_sampler', 'assign_and_sample', 19 | 'bbox2delta', 'delta2bbox', 'bbox_flip', 'bbox_mapping', 20 | 'bbox_mapping_back', 'bbox2roi', 'roi2bbox', 'bbox2result', 21 | 'distance2bbox', 'bbox_target' 22 | ] 23 | -------------------------------------------------------------------------------- /segmentation/mmdet/core/bbox/assign_sampling.py: -------------------------------------------------------------------------------- 1 | import mmcv 2 | 3 | from . import assigners, samplers 4 | 5 | 6 | def build_assigner(cfg, **kwargs): 7 | if isinstance(cfg, assigners.BaseAssigner): 8 | return cfg 9 | elif isinstance(cfg, dict): 10 | return mmcv.runner.obj_from_dict(cfg, assigners, default_args=kwargs) 11 | else: 12 | raise TypeError('Invalid type {} for building a sampler'.format( 13 | type(cfg))) 14 | 15 | 16 | def build_sampler(cfg, **kwargs): 17 | if isinstance(cfg, samplers.BaseSampler): 18 | return cfg 19 | elif isinstance(cfg, dict): 20 | return mmcv.runner.obj_from_dict(cfg, samplers, default_args=kwargs) 21 | else: 22 | raise TypeError('Invalid type {} for building a sampler'.format( 23 | type(cfg))) 24 | 25 | 26 | def assign_and_sample(bboxes, gt_bboxes, gt_bboxes_ignore, gt_labels, cfg): 27 | bbox_assigner = build_assigner(cfg.assigner) 28 | bbox_sampler = build_sampler(cfg.sampler) 29 | assign_result = bbox_assigner.assign(bboxes, gt_bboxes, gt_bboxes_ignore, 30 | gt_labels) 31 | sampling_result = bbox_sampler.sample(assign_result, bboxes, gt_bboxes, 32 | gt_labels) 33 | return assign_result, sampling_result 34 | -------------------------------------------------------------------------------- /segmentation/mmdet/core/bbox/assigners/__init__.py: -------------------------------------------------------------------------------- 1 | from .approx_max_iou_assigner import ApproxMaxIoUAssigner 2 | from .assign_result import AssignResult 3 | from .base_assigner import BaseAssigner 4 | from .max_iou_assigner import MaxIoUAssigner 5 | from .point_assigner import PointAssigner 6 | 7 | __all__ = [ 8 | 'BaseAssigner', 'MaxIoUAssigner', 'ApproxMaxIoUAssigner', 'AssignResult', 9 | 'PointAssigner' 10 | ] 11 | -------------------------------------------------------------------------------- /segmentation/mmdet/core/bbox/assigners/assign_result.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | class AssignResult(object): 5 | 6 | def __init__(self, num_gts, gt_inds, max_overlaps, labels=None): 7 | self.num_gts = num_gts 8 | self.gt_inds = gt_inds 9 | self.max_overlaps = max_overlaps 10 | self.labels = labels 11 | 12 | def add_gt_(self, gt_labels): 13 | self_inds = torch.arange( 14 | 1, len(gt_labels) + 1, dtype=torch.long, device=gt_labels.device) 15 | self.gt_inds = torch.cat([self_inds, self.gt_inds]) 16 | self.max_overlaps = torch.cat( 17 | [self.max_overlaps.new_ones(self.num_gts), self.max_overlaps]) 18 | if self.labels is not None: 19 | self.labels = torch.cat([gt_labels, self.labels]) 20 | -------------------------------------------------------------------------------- /segmentation/mmdet/core/bbox/assigners/base_assigner.py: -------------------------------------------------------------------------------- 1 | from abc import ABCMeta, abstractmethod 2 | 3 | 4 | class BaseAssigner(metaclass=ABCMeta): 5 | 6 | @abstractmethod 7 | def assign(self, bboxes, gt_bboxes, gt_bboxes_ignore=None, gt_labels=None): 8 | pass 9 | -------------------------------------------------------------------------------- /segmentation/mmdet/core/bbox/bbox_target.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from ..utils import multi_apply 4 | from .transforms import bbox2delta 5 | 6 | 7 | def bbox_target(pos_bboxes_list, 8 | neg_bboxes_list, 9 | pos_gt_bboxes_list, 10 | pos_gt_labels_list, 11 | cfg, 12 | reg_classes=1, 13 | target_means=[.0, .0, .0, .0], 14 | target_stds=[1.0, 1.0, 1.0, 1.0], 15 | concat=True): 16 | labels, label_weights, bbox_targets, bbox_weights = multi_apply( 17 | bbox_target_single, 18 | pos_bboxes_list, 19 | neg_bboxes_list, 20 | pos_gt_bboxes_list, 21 | pos_gt_labels_list, 22 | cfg=cfg, 23 | reg_classes=reg_classes, 24 | target_means=target_means, 25 | target_stds=target_stds) 26 | 27 | if concat: 28 | labels = torch.cat(labels, 0) 29 | label_weights = torch.cat(label_weights, 0) 30 | bbox_targets = torch.cat(bbox_targets, 0) 31 | bbox_weights = torch.cat(bbox_weights, 0) 32 | return labels, label_weights, bbox_targets, bbox_weights 33 | 34 | 35 | def bbox_target_single(pos_bboxes, 36 | neg_bboxes, 37 | pos_gt_bboxes, 38 | pos_gt_labels, 39 | cfg, 40 | reg_classes=1, 41 | target_means=[.0, .0, .0, .0], 42 | target_stds=[1.0, 1.0, 1.0, 1.0]): 43 | num_pos = pos_bboxes.size(0) 44 | num_neg = neg_bboxes.size(0) 45 | num_samples = num_pos + num_neg 46 | labels = pos_bboxes.new_zeros(num_samples, dtype=torch.long) 47 | label_weights = pos_bboxes.new_zeros(num_samples) 48 | bbox_targets = pos_bboxes.new_zeros(num_samples, 4) 49 | bbox_weights = pos_bboxes.new_zeros(num_samples, 4) 50 | if num_pos > 0: 51 | labels[:num_pos] = pos_gt_labels 52 | pos_weight = 1.0 if cfg.pos_weight <= 0 else cfg.pos_weight 53 | label_weights[:num_pos] = pos_weight 54 | pos_bbox_targets = bbox2delta(pos_bboxes, pos_gt_bboxes, target_means, 55 | target_stds) 56 | bbox_targets[:num_pos, :] = pos_bbox_targets 57 | bbox_weights[:num_pos, :] = 1 58 | if num_neg > 0: 59 | label_weights[-num_neg:] = 1.0 60 | 61 | return labels, label_weights, bbox_targets, bbox_weights 62 | 63 | 64 | def expand_target(bbox_targets, bbox_weights, labels, num_classes): 65 | bbox_targets_expand = bbox_targets.new_zeros( 66 | (bbox_targets.size(0), 4 * num_classes)) 67 | bbox_weights_expand = bbox_weights.new_zeros( 68 | (bbox_weights.size(0), 4 * num_classes)) 69 | for i in torch.nonzero(labels > 0).squeeze(-1): 70 | start, end = labels[i] * 4, (labels[i] + 1) * 4 71 | bbox_targets_expand[i, start:end] = bbox_targets[i, :] 72 | bbox_weights_expand[i, start:end] = bbox_weights[i, :] 73 | return bbox_targets_expand, bbox_weights_expand 74 | -------------------------------------------------------------------------------- /segmentation/mmdet/core/bbox/geometry.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | def bbox_overlaps(bboxes1, bboxes2, mode='iou', is_aligned=False): 5 | """Calculate overlap between two set of bboxes. 6 | 7 | If ``is_aligned`` is ``False``, then calculate the ious between each bbox 8 | of bboxes1 and bboxes2, otherwise the ious between each aligned pair of 9 | bboxes1 and bboxes2. 10 | 11 | Args: 12 | bboxes1 (Tensor): shape (m, 4) 13 | bboxes2 (Tensor): shape (n, 4), if is_aligned is ``True``, then m and n 14 | must be equal. 15 | mode (str): "iou" (intersection over union) or iof (intersection over 16 | foreground). 17 | 18 | Returns: 19 | ious(Tensor): shape (m, n) if is_aligned == False else shape (m, 1) 20 | """ 21 | 22 | assert mode in ['iou', 'iof'] 23 | 24 | rows = bboxes1.size(0) 25 | cols = bboxes2.size(0) 26 | if is_aligned: 27 | assert rows == cols 28 | 29 | if rows * cols == 0: 30 | return bboxes1.new(rows, 1) if is_aligned else bboxes1.new(rows, cols) 31 | 32 | if is_aligned: 33 | lt = torch.max(bboxes1[:, :2], bboxes2[:, :2]) # [rows, 2] 34 | rb = torch.min(bboxes1[:, 2:], bboxes2[:, 2:]) # [rows, 2] 35 | 36 | wh = (rb - lt + 1).clamp(min=0) # [rows, 2] 37 | overlap = wh[:, 0] * wh[:, 1] 38 | area1 = (bboxes1[:, 2] - bboxes1[:, 0] + 1) * ( 39 | bboxes1[:, 3] - bboxes1[:, 1] + 1) 40 | 41 | if mode == 'iou': 42 | area2 = (bboxes2[:, 2] - bboxes2[:, 0] + 1) * ( 43 | bboxes2[:, 3] - bboxes2[:, 1] + 1) 44 | ious = overlap / (area1 + area2 - overlap) 45 | else: 46 | ious = overlap / area1 47 | else: 48 | lt = torch.max(bboxes1[:, None, :2], bboxes2[:, :2]) # [rows, cols, 2] 49 | rb = torch.min(bboxes1[:, None, 2:], bboxes2[:, 2:]) # [rows, cols, 2] 50 | 51 | wh = (rb - lt + 1).clamp(min=0) # [rows, cols, 2] 52 | overlap = wh[:, :, 0] * wh[:, :, 1] 53 | area1 = (bboxes1[:, 2] - bboxes1[:, 0] + 1) * ( 54 | bboxes1[:, 3] - bboxes1[:, 1] + 1) 55 | 56 | if mode == 'iou': 57 | area2 = (bboxes2[:, 2] - bboxes2[:, 0] + 1) * ( 58 | bboxes2[:, 3] - bboxes2[:, 1] + 1) 59 | ious = overlap / (area1[:, None] + area2 - overlap) 60 | else: 61 | ious = overlap / (area1[:, None]) 62 | 63 | return ious 64 | -------------------------------------------------------------------------------- /segmentation/mmdet/core/bbox/samplers/__init__.py: -------------------------------------------------------------------------------- 1 | from .base_sampler import BaseSampler 2 | from .combined_sampler import CombinedSampler 3 | from .instance_balanced_pos_sampler import InstanceBalancedPosSampler 4 | from .iou_balanced_neg_sampler import IoUBalancedNegSampler 5 | from .ohem_sampler import OHEMSampler 6 | from .pseudo_sampler import PseudoSampler 7 | from .random_sampler import RandomSampler 8 | from .sampling_result import SamplingResult 9 | 10 | __all__ = [ 11 | 'BaseSampler', 'PseudoSampler', 'RandomSampler', 12 | 'InstanceBalancedPosSampler', 'IoUBalancedNegSampler', 'CombinedSampler', 13 | 'OHEMSampler', 'SamplingResult' 14 | ] 15 | -------------------------------------------------------------------------------- /segmentation/mmdet/core/bbox/samplers/base_sampler.py: -------------------------------------------------------------------------------- 1 | from abc import ABCMeta, abstractmethod 2 | 3 | import torch 4 | 5 | from .sampling_result import SamplingResult 6 | 7 | 8 | class BaseSampler(metaclass=ABCMeta): 9 | 10 | def __init__(self, 11 | num, 12 | pos_fraction, 13 | neg_pos_ub=-1, 14 | add_gt_as_proposals=True, 15 | **kwargs): 16 | self.num = num 17 | self.pos_fraction = pos_fraction 18 | self.neg_pos_ub = neg_pos_ub 19 | self.add_gt_as_proposals = add_gt_as_proposals 20 | self.pos_sampler = self 21 | self.neg_sampler = self 22 | 23 | @abstractmethod 24 | def _sample_pos(self, assign_result, num_expected, **kwargs): 25 | pass 26 | 27 | @abstractmethod 28 | def _sample_neg(self, assign_result, num_expected, **kwargs): 29 | pass 30 | 31 | def sample(self, 32 | assign_result, 33 | bboxes, 34 | gt_bboxes, 35 | gt_labels=None, 36 | **kwargs): 37 | """Sample positive and negative bboxes. 38 | 39 | This is a simple implementation of bbox sampling given candidates, 40 | assigning results and ground truth bboxes. 41 | 42 | Args: 43 | assign_result (:obj:`AssignResult`): Bbox assigning results. 44 | bboxes (Tensor): Boxes to be sampled from. 45 | gt_bboxes (Tensor): Ground truth bboxes. 46 | gt_labels (Tensor, optional): Class labels of ground truth bboxes. 47 | 48 | Returns: 49 | :obj:`SamplingResult`: Sampling result. 50 | """ 51 | bboxes = bboxes[:, :4] 52 | 53 | gt_flags = bboxes.new_zeros((bboxes.shape[0], ), dtype=torch.uint8) 54 | if self.add_gt_as_proposals: 55 | bboxes = torch.cat([gt_bboxes, bboxes], dim=0) 56 | assign_result.add_gt_(gt_labels) 57 | gt_ones = bboxes.new_ones(gt_bboxes.shape[0], dtype=torch.uint8) 58 | gt_flags = torch.cat([gt_ones, gt_flags]) 59 | 60 | num_expected_pos = int(self.num * self.pos_fraction) 61 | pos_inds = self.pos_sampler._sample_pos( 62 | assign_result, num_expected_pos, bboxes=bboxes, **kwargs) 63 | # We found that sampled indices have duplicated items occasionally. 64 | # (may be a bug of PyTorch) 65 | pos_inds = pos_inds.unique() 66 | num_sampled_pos = pos_inds.numel() 67 | num_expected_neg = self.num - num_sampled_pos 68 | if self.neg_pos_ub >= 0: 69 | _pos = max(1, num_sampled_pos) 70 | neg_upper_bound = int(self.neg_pos_ub * _pos) 71 | if num_expected_neg > neg_upper_bound: 72 | num_expected_neg = neg_upper_bound 73 | neg_inds = self.neg_sampler._sample_neg( 74 | assign_result, num_expected_neg, bboxes=bboxes, **kwargs) 75 | neg_inds = neg_inds.unique() 76 | 77 | return SamplingResult(pos_inds, neg_inds, bboxes, gt_bboxes, 78 | assign_result, gt_flags) 79 | -------------------------------------------------------------------------------- /segmentation/mmdet/core/bbox/samplers/combined_sampler.py: -------------------------------------------------------------------------------- 1 | from ..assign_sampling import build_sampler 2 | from .base_sampler import BaseSampler 3 | 4 | 5 | class CombinedSampler(BaseSampler): 6 | 7 | def __init__(self, pos_sampler, neg_sampler, **kwargs): 8 | super(CombinedSampler, self).__init__(**kwargs) 9 | self.pos_sampler = build_sampler(pos_sampler, **kwargs) 10 | self.neg_sampler = build_sampler(neg_sampler, **kwargs) 11 | 12 | def _sample_pos(self, **kwargs): 13 | raise NotImplementedError 14 | 15 | def _sample_neg(self, **kwargs): 16 | raise NotImplementedError 17 | -------------------------------------------------------------------------------- /segmentation/mmdet/core/bbox/samplers/instance_balanced_pos_sampler.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | from .random_sampler import RandomSampler 5 | 6 | 7 | class InstanceBalancedPosSampler(RandomSampler): 8 | 9 | def _sample_pos(self, assign_result, num_expected, **kwargs): 10 | pos_inds = torch.nonzero(assign_result.gt_inds > 0) 11 | if pos_inds.numel() != 0: 12 | pos_inds = pos_inds.squeeze(1) 13 | if pos_inds.numel() <= num_expected: 14 | return pos_inds 15 | else: 16 | unique_gt_inds = assign_result.gt_inds[pos_inds].unique() 17 | num_gts = len(unique_gt_inds) 18 | num_per_gt = int(round(num_expected / float(num_gts)) + 1) 19 | sampled_inds = [] 20 | for i in unique_gt_inds: 21 | inds = torch.nonzero(assign_result.gt_inds == i.item()) 22 | if inds.numel() != 0: 23 | inds = inds.squeeze(1) 24 | else: 25 | continue 26 | if len(inds) > num_per_gt: 27 | inds = self.random_choice(inds, num_per_gt) 28 | sampled_inds.append(inds) 29 | sampled_inds = torch.cat(sampled_inds) 30 | if len(sampled_inds) < num_expected: 31 | num_extra = num_expected - len(sampled_inds) 32 | extra_inds = np.array( 33 | list(set(pos_inds.cpu()) - set(sampled_inds.cpu()))) 34 | if len(extra_inds) > num_extra: 35 | extra_inds = self.random_choice(extra_inds, num_extra) 36 | extra_inds = torch.from_numpy(extra_inds).to( 37 | assign_result.gt_inds.device).long() 38 | sampled_inds = torch.cat([sampled_inds, extra_inds]) 39 | elif len(sampled_inds) > num_expected: 40 | sampled_inds = self.random_choice(sampled_inds, num_expected) 41 | return sampled_inds 42 | -------------------------------------------------------------------------------- /segmentation/mmdet/core/bbox/samplers/ohem_sampler.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from ..transforms import bbox2roi 4 | from .base_sampler import BaseSampler 5 | 6 | 7 | class OHEMSampler(BaseSampler): 8 | 9 | def __init__(self, 10 | num, 11 | pos_fraction, 12 | context, 13 | neg_pos_ub=-1, 14 | add_gt_as_proposals=True, 15 | **kwargs): 16 | super(OHEMSampler, self).__init__(num, pos_fraction, neg_pos_ub, 17 | add_gt_as_proposals) 18 | if not hasattr(context, 'num_stages'): 19 | self.bbox_roi_extractor = context.bbox_roi_extractor 20 | self.bbox_head = context.bbox_head 21 | else: 22 | self.bbox_roi_extractor = context.bbox_roi_extractor[ 23 | context.current_stage] 24 | self.bbox_head = context.bbox_head[context.current_stage] 25 | 26 | def hard_mining(self, inds, num_expected, bboxes, labels, feats): 27 | with torch.no_grad(): 28 | rois = bbox2roi([bboxes]) 29 | bbox_feats = self.bbox_roi_extractor( 30 | feats[:self.bbox_roi_extractor.num_inputs], rois) 31 | cls_score, _ = self.bbox_head(bbox_feats) 32 | loss = self.bbox_head.loss( 33 | cls_score=cls_score, 34 | bbox_pred=None, 35 | labels=labels, 36 | label_weights=cls_score.new_ones(cls_score.size(0)), 37 | bbox_targets=None, 38 | bbox_weights=None, 39 | reduction_override='none')['loss_cls'] 40 | _, topk_loss_inds = loss.topk(num_expected) 41 | return inds[topk_loss_inds] 42 | 43 | def _sample_pos(self, 44 | assign_result, 45 | num_expected, 46 | bboxes=None, 47 | feats=None, 48 | **kwargs): 49 | # Sample some hard positive samples 50 | pos_inds = torch.nonzero(assign_result.gt_inds > 0) 51 | if pos_inds.numel() != 0: 52 | pos_inds = pos_inds.squeeze(1) 53 | if pos_inds.numel() <= num_expected: 54 | return pos_inds 55 | else: 56 | return self.hard_mining(pos_inds, num_expected, bboxes[pos_inds], 57 | assign_result.labels[pos_inds], feats) 58 | 59 | def _sample_neg(self, 60 | assign_result, 61 | num_expected, 62 | bboxes=None, 63 | feats=None, 64 | **kwargs): 65 | # Sample some hard negative samples 66 | neg_inds = torch.nonzero(assign_result.gt_inds == 0) 67 | if neg_inds.numel() != 0: 68 | neg_inds = neg_inds.squeeze(1) 69 | if len(neg_inds) <= num_expected: 70 | return neg_inds 71 | else: 72 | return self.hard_mining(neg_inds, num_expected, bboxes[neg_inds], 73 | assign_result.labels[neg_inds], feats) 74 | -------------------------------------------------------------------------------- /segmentation/mmdet/core/bbox/samplers/pseudo_sampler.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from .base_sampler import BaseSampler 4 | from .sampling_result import SamplingResult 5 | 6 | 7 | class PseudoSampler(BaseSampler): 8 | 9 | def __init__(self, **kwargs): 10 | pass 11 | 12 | def _sample_pos(self, **kwargs): 13 | raise NotImplementedError 14 | 15 | def _sample_neg(self, **kwargs): 16 | raise NotImplementedError 17 | 18 | def sample(self, assign_result, bboxes, gt_bboxes, **kwargs): 19 | pos_inds = torch.nonzero( 20 | assign_result.gt_inds > 0).squeeze(-1).unique() 21 | neg_inds = torch.nonzero( 22 | assign_result.gt_inds == 0).squeeze(-1).unique() 23 | gt_flags = bboxes.new_zeros(bboxes.shape[0], dtype=torch.uint8) 24 | sampling_result = SamplingResult(pos_inds, neg_inds, bboxes, gt_bboxes, 25 | assign_result, gt_flags) 26 | return sampling_result 27 | -------------------------------------------------------------------------------- /segmentation/mmdet/core/bbox/samplers/random_sampler.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | from .base_sampler import BaseSampler 5 | 6 | 7 | class RandomSampler(BaseSampler): 8 | 9 | def __init__(self, 10 | num, 11 | pos_fraction, 12 | neg_pos_ub=-1, 13 | add_gt_as_proposals=True, 14 | **kwargs): 15 | super(RandomSampler, self).__init__(num, pos_fraction, neg_pos_ub, 16 | add_gt_as_proposals) 17 | 18 | @staticmethod 19 | def random_choice(gallery, num): 20 | """Random select some elements from the gallery. 21 | 22 | It seems that Pytorch's implementation is slower than numpy so we use 23 | numpy to randperm the indices. 24 | """ 25 | assert len(gallery) >= num 26 | if isinstance(gallery, list): 27 | gallery = np.array(gallery) 28 | cands = np.arange(len(gallery)) 29 | np.random.shuffle(cands) 30 | rand_inds = cands[:num] 31 | if not isinstance(gallery, np.ndarray): 32 | rand_inds = torch.from_numpy(rand_inds).long().to(gallery.device) 33 | return gallery[rand_inds] 34 | 35 | def _sample_pos(self, assign_result, num_expected, **kwargs): 36 | """Randomly sample some positive samples.""" 37 | pos_inds = torch.nonzero(assign_result.gt_inds > 0) 38 | if pos_inds.numel() != 0: 39 | pos_inds = pos_inds.squeeze(1) 40 | if pos_inds.numel() <= num_expected: 41 | return pos_inds 42 | else: 43 | return self.random_choice(pos_inds, num_expected) 44 | 45 | def _sample_neg(self, assign_result, num_expected, **kwargs): 46 | """Randomly sample some negative samples.""" 47 | neg_inds = torch.nonzero(assign_result.gt_inds == 0) 48 | if neg_inds.numel() != 0: 49 | neg_inds = neg_inds.squeeze(1) 50 | if len(neg_inds) <= num_expected: 51 | return neg_inds 52 | else: 53 | return self.random_choice(neg_inds, num_expected) 54 | -------------------------------------------------------------------------------- /segmentation/mmdet/core/bbox/samplers/sampling_result.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | class SamplingResult(object): 5 | 6 | def __init__(self, pos_inds, neg_inds, bboxes, gt_bboxes, assign_result, 7 | gt_flags): 8 | self.pos_inds = pos_inds 9 | self.neg_inds = neg_inds 10 | self.pos_bboxes = bboxes[pos_inds] 11 | self.neg_bboxes = bboxes[neg_inds] 12 | self.pos_is_gt = gt_flags[pos_inds] 13 | 14 | self.num_gts = gt_bboxes.shape[0] 15 | self.pos_assigned_gt_inds = assign_result.gt_inds[pos_inds] - 1 16 | self.pos_gt_bboxes = gt_bboxes[self.pos_assigned_gt_inds, :] 17 | if assign_result.labels is not None: 18 | self.pos_gt_labels = assign_result.labels[pos_inds] 19 | else: 20 | self.pos_gt_labels = None 21 | 22 | @property 23 | def bboxes(self): 24 | return torch.cat([self.pos_bboxes, self.neg_bboxes]) 25 | -------------------------------------------------------------------------------- /segmentation/mmdet/core/evaluation/__init__.py: -------------------------------------------------------------------------------- 1 | from .class_names import (coco_classes, dataset_aliases, get_classes, 2 | imagenet_det_classes, imagenet_vid_classes, 3 | voc_classes) 4 | from .coco_utils import coco_eval, fast_eval_recall, results2json 5 | from .eval_hooks import (CocoDistEvalmAPHook, CocoDistEvalRecallHook, 6 | DistEvalHook, DistEvalmAPHook) 7 | from .mean_ap import average_precision, eval_map, print_map_summary 8 | from .recall import (eval_recalls, plot_iou_recall, plot_num_recall, 9 | print_recall_summary) 10 | 11 | __all__ = [ 12 | 'voc_classes', 'imagenet_det_classes', 'imagenet_vid_classes', 13 | 'coco_classes', 'dataset_aliases', 'get_classes', 'coco_eval', 14 | 'fast_eval_recall', 'results2json', 'DistEvalHook', 'DistEvalmAPHook', 15 | 'CocoDistEvalRecallHook', 'CocoDistEvalmAPHook', 'average_precision', 16 | 'eval_map', 'print_map_summary', 'eval_recalls', 'print_recall_summary', 17 | 'plot_num_recall', 'plot_iou_recall' 18 | ] 19 | -------------------------------------------------------------------------------- /segmentation/mmdet/core/evaluation/bbox_overlaps.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def bbox_overlaps(bboxes1, bboxes2, mode='iou'): 5 | """Calculate the ious between each bbox of bboxes1 and bboxes2. 6 | 7 | Args: 8 | bboxes1(ndarray): shape (n, 4) 9 | bboxes2(ndarray): shape (k, 4) 10 | mode(str): iou (intersection over union) or iof (intersection 11 | over foreground) 12 | 13 | Returns: 14 | ious(ndarray): shape (n, k) 15 | """ 16 | 17 | assert mode in ['iou', 'iof'] 18 | 19 | bboxes1 = bboxes1.astype(np.float32) 20 | bboxes2 = bboxes2.astype(np.float32) 21 | rows = bboxes1.shape[0] 22 | cols = bboxes2.shape[0] 23 | ious = np.zeros((rows, cols), dtype=np.float32) 24 | if rows * cols == 0: 25 | return ious 26 | exchange = False 27 | if bboxes1.shape[0] > bboxes2.shape[0]: 28 | bboxes1, bboxes2 = bboxes2, bboxes1 29 | ious = np.zeros((cols, rows), dtype=np.float32) 30 | exchange = True 31 | area1 = (bboxes1[:, 2] - bboxes1[:, 0] + 1) * ( 32 | bboxes1[:, 3] - bboxes1[:, 1] + 1) 33 | area2 = (bboxes2[:, 2] - bboxes2[:, 0] + 1) * ( 34 | bboxes2[:, 3] - bboxes2[:, 1] + 1) 35 | for i in range(bboxes1.shape[0]): 36 | x_start = np.maximum(bboxes1[i, 0], bboxes2[:, 0]) 37 | y_start = np.maximum(bboxes1[i, 1], bboxes2[:, 1]) 38 | x_end = np.minimum(bboxes1[i, 2], bboxes2[:, 2]) 39 | y_end = np.minimum(bboxes1[i, 3], bboxes2[:, 3]) 40 | overlap = np.maximum(x_end - x_start + 1, 0) * np.maximum( 41 | y_end - y_start + 1, 0) 42 | if mode == 'iou': 43 | union = area1[i] + area2 - overlap 44 | else: 45 | union = area1[i] if not exchange else area2 46 | ious[i, :] = overlap / union 47 | if exchange: 48 | ious = ious.T 49 | return ious 50 | -------------------------------------------------------------------------------- /segmentation/mmdet/core/fp16/__init__.py: -------------------------------------------------------------------------------- 1 | from .decorators import auto_fp16, force_fp32 2 | from .hooks import Fp16OptimizerHook, wrap_fp16_model 3 | 4 | __all__ = ['auto_fp16', 'force_fp32', 'Fp16OptimizerHook', 'wrap_fp16_model'] 5 | -------------------------------------------------------------------------------- /segmentation/mmdet/core/fp16/utils.py: -------------------------------------------------------------------------------- 1 | from collections import abc 2 | 3 | import numpy as np 4 | import torch 5 | 6 | 7 | def cast_tensor_type(inputs, src_type, dst_type): 8 | if isinstance(inputs, torch.Tensor): 9 | return inputs.to(dst_type) 10 | elif isinstance(inputs, str): 11 | return inputs 12 | elif isinstance(inputs, np.ndarray): 13 | return inputs 14 | elif isinstance(inputs, abc.Mapping): 15 | return type(inputs)({ 16 | k: cast_tensor_type(v, src_type, dst_type) 17 | for k, v in inputs.items() 18 | }) 19 | elif isinstance(inputs, abc.Iterable): 20 | return type(inputs)( 21 | cast_tensor_type(item, src_type, dst_type) for item in inputs) 22 | else: 23 | return inputs 24 | -------------------------------------------------------------------------------- /segmentation/mmdet/core/mask/__init__.py: -------------------------------------------------------------------------------- 1 | from .mask_target import mask_target 2 | from .utils import split_combined_polys 3 | 4 | __all__ = ['split_combined_polys', 'mask_target'] 5 | -------------------------------------------------------------------------------- /segmentation/mmdet/core/mask/mask_target.py: -------------------------------------------------------------------------------- 1 | import mmcv 2 | import numpy as np 3 | import torch 4 | from torch.nn.modules.utils import _pair 5 | 6 | 7 | def mask_target(pos_proposals_list, pos_assigned_gt_inds_list, gt_masks_list, 8 | cfg): 9 | cfg_list = [cfg for _ in range(len(pos_proposals_list))] 10 | mask_targets = map(mask_target_single, pos_proposals_list, 11 | pos_assigned_gt_inds_list, gt_masks_list, cfg_list) 12 | mask_targets = torch.cat(list(mask_targets)) 13 | return mask_targets 14 | 15 | 16 | def mask_target_single(pos_proposals, pos_assigned_gt_inds, gt_masks, cfg): 17 | mask_size = _pair(cfg.mask_size) 18 | num_pos = pos_proposals.size(0) 19 | mask_targets = [] 20 | if num_pos > 0: 21 | proposals_np = pos_proposals.cpu().numpy() 22 | pos_assigned_gt_inds = pos_assigned_gt_inds.cpu().numpy() 23 | for i in range(num_pos): 24 | gt_mask = gt_masks[pos_assigned_gt_inds[i]] 25 | bbox = proposals_np[i, :].astype(np.int32) 26 | x1, y1, x2, y2 = bbox 27 | w = np.maximum(x2 - x1 + 1, 1) 28 | h = np.maximum(y2 - y1 + 1, 1) 29 | # mask is uint8 both before and after resizing 30 | # mask_size (h, w) to (w, h) 31 | target = mmcv.imresize(gt_mask[y1:y1 + h, x1:x1 + w], 32 | mask_size[::-1]) 33 | mask_targets.append(target) 34 | mask_targets = torch.from_numpy(np.stack(mask_targets)).float().to( 35 | pos_proposals.device) 36 | else: 37 | mask_targets = pos_proposals.new_zeros((0, ) + mask_size) 38 | return mask_targets 39 | -------------------------------------------------------------------------------- /segmentation/mmdet/core/mask/utils.py: -------------------------------------------------------------------------------- 1 | import mmcv 2 | 3 | 4 | def split_combined_polys(polys, poly_lens, polys_per_mask): 5 | """Split the combined 1-D polys into masks. 6 | 7 | A mask is represented as a list of polys, and a poly is represented as 8 | a 1-D array. In dataset, all masks are concatenated into a single 1-D 9 | tensor. Here we need to split the tensor into original representations. 10 | 11 | Args: 12 | polys (list): a list (length = image num) of 1-D tensors 13 | poly_lens (list): a list (length = image num) of poly length 14 | polys_per_mask (list): a list (length = image num) of poly number 15 | of each mask 16 | 17 | Returns: 18 | list: a list (length = image num) of list (length = mask num) of 19 | list (length = poly num) of numpy array 20 | """ 21 | mask_polys_list = [] 22 | for img_id in range(len(polys)): 23 | polys_single = polys[img_id] 24 | polys_lens_single = poly_lens[img_id].tolist() 25 | polys_per_mask_single = polys_per_mask[img_id].tolist() 26 | 27 | split_polys = mmcv.slice_list(polys_single, polys_lens_single) 28 | mask_polys = mmcv.slice_list(split_polys, polys_per_mask_single) 29 | mask_polys_list.append(mask_polys) 30 | return mask_polys_list 31 | -------------------------------------------------------------------------------- /segmentation/mmdet/core/post_processing/__init__.py: -------------------------------------------------------------------------------- 1 | from .bbox_nms import multiclass_nms 2 | from .merge_augs import (merge_aug_bboxes, merge_aug_masks, 3 | merge_aug_proposals, merge_aug_scores) 4 | 5 | __all__ = [ 6 | 'multiclass_nms', 'merge_aug_proposals', 'merge_aug_bboxes', 7 | 'merge_aug_scores', 'merge_aug_masks' 8 | ] 9 | -------------------------------------------------------------------------------- /segmentation/mmdet/core/post_processing/bbox_nms.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from mmdet.ops.nms import nms_wrapper 4 | 5 | 6 | def multiclass_nms(multi_bboxes, 7 | multi_scores, 8 | score_thr, 9 | nms_cfg, 10 | max_num=-1, 11 | score_factors=None): 12 | """NMS for multi-class bboxes. 13 | 14 | Args: 15 | multi_bboxes (Tensor): shape (n, #class*4) or (n, 4) 16 | multi_scores (Tensor): shape (n, #class) 17 | score_thr (float): bbox threshold, bboxes with scores lower than it 18 | will not be considered. 19 | nms_thr (float): NMS IoU threshold 20 | max_num (int): if there are more than max_num bboxes after NMS, 21 | only top max_num will be kept. 22 | score_factors (Tensor): The factors multiplied to scores before 23 | applying NMS 24 | 25 | Returns: 26 | tuple: (bboxes, labels), tensors of shape (k, 5) and (k, 1). Labels 27 | are 0-based. 28 | """ 29 | num_classes = multi_scores.shape[1] 30 | bboxes, labels = [], [] 31 | nms_cfg_ = nms_cfg.copy() 32 | nms_type = nms_cfg_.pop('type', 'nms') 33 | nms_op = getattr(nms_wrapper, nms_type) 34 | for i in range(1, num_classes): 35 | cls_inds = multi_scores[:, i] > score_thr 36 | if not cls_inds.any(): 37 | continue 38 | # get bboxes and scores of this class 39 | if multi_bboxes.shape[1] == 4: 40 | _bboxes = multi_bboxes[cls_inds, :] 41 | else: 42 | _bboxes = multi_bboxes[cls_inds, i * 4:(i + 1) * 4] 43 | _scores = multi_scores[cls_inds, i] 44 | if score_factors is not None: 45 | _scores *= score_factors[cls_inds] 46 | cls_dets = torch.cat([_bboxes, _scores[:, None]], dim=1) 47 | cls_dets, _ = nms_op(cls_dets, **nms_cfg_) 48 | cls_labels = multi_bboxes.new_full((cls_dets.shape[0], ), 49 | i - 1, 50 | dtype=torch.long) 51 | bboxes.append(cls_dets) 52 | labels.append(cls_labels) 53 | if bboxes: 54 | bboxes = torch.cat(bboxes) 55 | labels = torch.cat(labels) 56 | if bboxes.shape[0] > max_num: 57 | _, inds = bboxes[:, -1].sort(descending=True) 58 | inds = inds[:max_num] 59 | bboxes = bboxes[inds] 60 | labels = labels[inds] 61 | else: 62 | bboxes = multi_bboxes.new_zeros((0, 5)) 63 | labels = multi_bboxes.new_zeros((0, ), dtype=torch.long) 64 | 65 | return bboxes, labels 66 | -------------------------------------------------------------------------------- /segmentation/mmdet/core/post_processing/merge_augs.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | from mmdet.ops import nms 5 | from ..bbox import bbox_mapping_back 6 | 7 | 8 | def merge_aug_proposals(aug_proposals, img_metas, rpn_test_cfg): 9 | """Merge augmented proposals (multiscale, flip, etc.) 10 | 11 | Args: 12 | aug_proposals (list[Tensor]): proposals from different testing 13 | schemes, shape (n, 5). Note that they are not rescaled to the 14 | original image size. 15 | img_metas (list[dict]): image info including "shape_scale" and "flip". 16 | rpn_test_cfg (dict): rpn test config. 17 | 18 | Returns: 19 | Tensor: shape (n, 4), proposals corresponding to original image scale. 20 | """ 21 | recovered_proposals = [] 22 | for proposals, img_info in zip(aug_proposals, img_metas): 23 | img_shape = img_info['img_shape'] 24 | scale_factor = img_info['scale_factor'] 25 | flip = img_info['flip'] 26 | _proposals = proposals.clone() 27 | _proposals[:, :4] = bbox_mapping_back(_proposals[:, :4], img_shape, 28 | scale_factor, flip) 29 | recovered_proposals.append(_proposals) 30 | aug_proposals = torch.cat(recovered_proposals, dim=0) 31 | merged_proposals, _ = nms(aug_proposals, rpn_test_cfg.nms_thr) 32 | scores = merged_proposals[:, 4] 33 | _, order = scores.sort(0, descending=True) 34 | num = min(rpn_test_cfg.max_num, merged_proposals.shape[0]) 35 | order = order[:num] 36 | merged_proposals = merged_proposals[order, :] 37 | return merged_proposals 38 | 39 | 40 | def merge_aug_bboxes(aug_bboxes, aug_scores, img_metas, rcnn_test_cfg): 41 | """Merge augmented detection bboxes and scores. 42 | 43 | Args: 44 | aug_bboxes (list[Tensor]): shape (n, 4*#class) 45 | aug_scores (list[Tensor] or None): shape (n, #class) 46 | img_shapes (list[Tensor]): shape (3, ). 47 | rcnn_test_cfg (dict): rcnn test config. 48 | 49 | Returns: 50 | tuple: (bboxes, scores) 51 | """ 52 | recovered_bboxes = [] 53 | for bboxes, img_info in zip(aug_bboxes, img_metas): 54 | img_shape = img_info[0]['img_shape'] 55 | scale_factor = img_info[0]['scale_factor'] 56 | flip = img_info[0]['flip'] 57 | bboxes = bbox_mapping_back(bboxes, img_shape, scale_factor, flip) 58 | recovered_bboxes.append(bboxes) 59 | bboxes = torch.stack(recovered_bboxes).mean(dim=0) 60 | if aug_scores is None: 61 | return bboxes 62 | else: 63 | scores = torch.stack(aug_scores).mean(dim=0) 64 | return bboxes, scores 65 | 66 | 67 | def merge_aug_scores(aug_scores): 68 | """Merge augmented bbox scores.""" 69 | if isinstance(aug_scores[0], torch.Tensor): 70 | return torch.mean(torch.stack(aug_scores), dim=0) 71 | else: 72 | return np.mean(aug_scores, axis=0) 73 | 74 | 75 | def merge_aug_masks(aug_masks, img_metas, rcnn_test_cfg, weights=None): 76 | """Merge augmented mask prediction. 77 | 78 | Args: 79 | aug_masks (list[ndarray]): shape (n, #class, h, w) 80 | img_shapes (list[ndarray]): shape (3, ). 81 | rcnn_test_cfg (dict): rcnn test config. 82 | 83 | Returns: 84 | tuple: (bboxes, scores) 85 | """ 86 | recovered_masks = [ 87 | mask if not img_info[0]['flip'] else mask[..., ::-1] 88 | for mask, img_info in zip(aug_masks, img_metas) 89 | ] 90 | if weights is None: 91 | merged_masks = np.mean(recovered_masks, axis=0) 92 | else: 93 | merged_masks = np.average( 94 | np.array(recovered_masks), axis=0, weights=np.array(weights)) 95 | return merged_masks 96 | -------------------------------------------------------------------------------- /segmentation/mmdet/core/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .dist_utils import DistOptimizerHook, allreduce_grads 2 | from .misc import multi_apply, tensor2imgs, unmap 3 | 4 | __all__ = [ 5 | 'allreduce_grads', 'DistOptimizerHook', 'tensor2imgs', 'unmap', 6 | 'multi_apply' 7 | ] 8 | -------------------------------------------------------------------------------- /segmentation/mmdet/core/utils/dist_utils.py: -------------------------------------------------------------------------------- 1 | from collections import OrderedDict 2 | 3 | import torch.distributed as dist 4 | from mmcv.runner import OptimizerHook 5 | from torch._utils import (_flatten_dense_tensors, _take_tensors, 6 | _unflatten_dense_tensors) 7 | 8 | 9 | def _allreduce_coalesced(tensors, world_size, bucket_size_mb=-1): 10 | if bucket_size_mb > 0: 11 | bucket_size_bytes = bucket_size_mb * 1024 * 1024 12 | buckets = _take_tensors(tensors, bucket_size_bytes) 13 | else: 14 | buckets = OrderedDict() 15 | for tensor in tensors: 16 | tp = tensor.type() 17 | if tp not in buckets: 18 | buckets[tp] = [] 19 | buckets[tp].append(tensor) 20 | buckets = buckets.values() 21 | 22 | for bucket in buckets: 23 | flat_tensors = _flatten_dense_tensors(bucket) 24 | dist.all_reduce(flat_tensors) 25 | flat_tensors.div_(world_size) 26 | for tensor, synced in zip( 27 | bucket, _unflatten_dense_tensors(flat_tensors, bucket)): 28 | tensor.copy_(synced) 29 | 30 | 31 | def allreduce_grads(params, coalesce=True, bucket_size_mb=-1): 32 | grads = [ 33 | param.grad.data for param in params 34 | if param.requires_grad and param.grad is not None 35 | ] 36 | world_size = dist.get_world_size() 37 | if coalesce: 38 | _allreduce_coalesced(grads, world_size, bucket_size_mb) 39 | else: 40 | for tensor in grads: 41 | dist.all_reduce(tensor.div_(world_size)) 42 | 43 | 44 | class DistOptimizerHook(OptimizerHook): 45 | 46 | def __init__(self, grad_clip=None, coalesce=True, bucket_size_mb=-1): 47 | self.grad_clip = grad_clip 48 | self.coalesce = coalesce 49 | self.bucket_size_mb = bucket_size_mb 50 | 51 | def after_train_iter(self, runner): 52 | runner.optimizer.zero_grad() 53 | runner.outputs['loss'].backward() 54 | allreduce_grads(runner.model.parameters(), self.coalesce, 55 | self.bucket_size_mb) 56 | if self.grad_clip is not None: 57 | self.clip_grads(runner.model.parameters()) 58 | runner.optimizer.step() 59 | -------------------------------------------------------------------------------- /segmentation/mmdet/core/utils/misc.py: -------------------------------------------------------------------------------- 1 | from functools import partial 2 | 3 | import mmcv 4 | import numpy as np 5 | from six.moves import map, zip 6 | 7 | 8 | def tensor2imgs(tensor, mean=(0, 0, 0), std=(1, 1, 1), to_rgb=True): 9 | num_imgs = tensor.size(0) 10 | mean = np.array(mean, dtype=np.float32) 11 | std = np.array(std, dtype=np.float32) 12 | imgs = [] 13 | for img_id in range(num_imgs): 14 | img = tensor[img_id, ...].cpu().numpy().transpose(1, 2, 0) 15 | img = mmcv.imdenormalize( 16 | img, mean, std, to_bgr=to_rgb).astype(np.uint8) 17 | imgs.append(np.ascontiguousarray(img)) 18 | return imgs 19 | 20 | 21 | def multi_apply(func, *args, **kwargs): 22 | pfunc = partial(func, **kwargs) if kwargs else func 23 | map_results = map(pfunc, *args) 24 | return tuple(map(list, zip(*map_results))) 25 | 26 | 27 | def unmap(data, count, inds, fill=0): 28 | """ Unmap a subset of item (data) back to the original set of items (of 29 | size count) """ 30 | if data.dim() == 1: 31 | ret = data.new_full((count, ), fill) 32 | ret[inds] = data 33 | else: 34 | new_size = (count, ) + data.size()[1:] 35 | ret = data.new_full(new_size, fill) 36 | ret[inds, :] = data 37 | return ret 38 | -------------------------------------------------------------------------------- /segmentation/mmdet/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | from .builder import build_dataset 2 | from .cityscapes import CityscapesDataset 3 | from .coco import CocoDataset 4 | from .custom import CustomDataset 5 | from .dataset_wrappers import ConcatDataset, RepeatDataset 6 | from .extra_aug import ExtraAugmentation 7 | from .loader import DistributedGroupSampler, GroupSampler, build_dataloader 8 | from .registry import DATASETS 9 | from .voc import VOCDataset 10 | from .wider_face import WIDERFaceDataset 11 | from .xml_style import XMLDataset 12 | 13 | __all__ = [ 14 | 'CustomDataset', 'XMLDataset', 'CocoDataset', 'VOCDataset', 15 | 'CityscapesDataset', 'GroupSampler', 'DistributedGroupSampler', 16 | 'build_dataloader', 'ConcatDataset', 'RepeatDataset', 'ExtraAugmentation', 17 | 'WIDERFaceDataset', 'DATASETS', 'build_dataset' 18 | ] 19 | -------------------------------------------------------------------------------- /segmentation/mmdet/datasets/builder.py: -------------------------------------------------------------------------------- 1 | import copy 2 | 3 | from mmdet.utils import build_from_cfg 4 | from .dataset_wrappers import ConcatDataset, RepeatDataset 5 | from .registry import DATASETS 6 | 7 | 8 | def _concat_dataset(cfg, default_args=None): 9 | ann_files = cfg['ann_file'] 10 | img_prefixes = cfg.get('img_prefix', None) 11 | seg_prefixes = cfg.get('seg_prefixes', None) 12 | proposal_files = cfg.get('proposal_file', None) 13 | 14 | datasets = [] 15 | num_dset = len(ann_files) 16 | for i in range(num_dset): 17 | data_cfg = copy.deepcopy(cfg) 18 | data_cfg['ann_file'] = ann_files[i] 19 | if isinstance(img_prefixes, (list, tuple)): 20 | data_cfg['img_prefix'] = img_prefixes[i] 21 | if isinstance(seg_prefixes, (list, tuple)): 22 | data_cfg['seg_prefix'] = seg_prefixes[i] 23 | if isinstance(proposal_files, (list, tuple)): 24 | data_cfg['proposal_file'] = proposal_files[i] 25 | datasets.append(build_dataset(data_cfg, default_args)) 26 | 27 | return ConcatDataset(datasets) 28 | 29 | 30 | def build_dataset(cfg, default_args=None): 31 | if isinstance(cfg, (list, tuple)): 32 | dataset = ConcatDataset([build_dataset(c, default_args) for c in cfg]) 33 | elif cfg['type'] == 'RepeatDataset': 34 | dataset = RepeatDataset( 35 | build_dataset(cfg['dataset'], default_args), cfg['times']) 36 | elif isinstance(cfg['ann_file'], (list, tuple)): 37 | dataset = _concat_dataset(cfg, default_args) 38 | else: 39 | dataset = build_from_cfg(cfg, DATASETS, default_args) 40 | 41 | return dataset 42 | -------------------------------------------------------------------------------- /segmentation/mmdet/datasets/cityscapes.py: -------------------------------------------------------------------------------- 1 | from .coco import CocoDataset 2 | from .registry import DATASETS 3 | 4 | 5 | @DATASETS.register_module 6 | class CityscapesDataset(CocoDataset): 7 | 8 | CLASSES = ('person', 'rider', 'car', 'truck', 'bus', 'train', 'motorcycle', 9 | 'bicycle') 10 | -------------------------------------------------------------------------------- /segmentation/mmdet/datasets/dataset_wrappers.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from torch.utils.data.dataset import ConcatDataset as _ConcatDataset 3 | 4 | from .registry import DATASETS 5 | 6 | 7 | @DATASETS.register_module 8 | class ConcatDataset(_ConcatDataset): 9 | """A wrapper of concatenated dataset. 10 | 11 | Same as :obj:`torch.utils.data.dataset.ConcatDataset`, but 12 | concat the group flag for image aspect ratio. 13 | 14 | Args: 15 | datasets (list[:obj:`Dataset`]): A list of datasets. 16 | """ 17 | 18 | def __init__(self, datasets): 19 | super(ConcatDataset, self).__init__(datasets) 20 | self.CLASSES = datasets[0].CLASSES 21 | if hasattr(datasets[0], 'flag'): 22 | flags = [] 23 | for i in range(0, len(datasets)): 24 | flags.append(datasets[i].flag) 25 | self.flag = np.concatenate(flags) 26 | 27 | 28 | @DATASETS.register_module 29 | class RepeatDataset(object): 30 | """A wrapper of repeated dataset. 31 | 32 | The length of repeated dataset will be `times` larger than the original 33 | dataset. This is useful when the data loading time is long but the dataset 34 | is small. Using RepeatDataset can reduce the data loading time between 35 | epochs. 36 | 37 | Args: 38 | dataset (:obj:`Dataset`): The dataset to be repeated. 39 | times (int): Repeat times. 40 | """ 41 | 42 | def __init__(self, dataset, times): 43 | self.dataset = dataset 44 | self.times = times 45 | self.CLASSES = dataset.CLASSES 46 | if hasattr(self.dataset, 'flag'): 47 | self.flag = np.tile(self.dataset.flag, times) 48 | 49 | self._ori_len = len(self.dataset) 50 | 51 | def __getitem__(self, idx): 52 | return self.dataset[idx % self._ori_len] 53 | 54 | def __len__(self): 55 | return self.times * self._ori_len 56 | -------------------------------------------------------------------------------- /segmentation/mmdet/datasets/loader/__init__.py: -------------------------------------------------------------------------------- 1 | from .build_loader import build_dataloader 2 | from .sampler import DistributedGroupSampler, GroupSampler 3 | 4 | __all__ = ['GroupSampler', 'DistributedGroupSampler', 'build_dataloader'] 5 | -------------------------------------------------------------------------------- /segmentation/mmdet/datasets/loader/build_loader.py: -------------------------------------------------------------------------------- 1 | import platform 2 | from functools import partial 3 | 4 | from mmcv.parallel import collate 5 | from mmcv.runner import get_dist_info 6 | from torch.utils.data import DataLoader 7 | 8 | from .sampler import DistributedGroupSampler, DistributedSampler, GroupSampler 9 | 10 | if platform.system() != 'Windows': 11 | # https://github.com/pytorch/pytorch/issues/973 12 | import resource 13 | rlimit = resource.getrlimit(resource.RLIMIT_NOFILE) 14 | resource.setrlimit(resource.RLIMIT_NOFILE, (4096, rlimit[1])) 15 | 16 | 17 | def build_dataloader(dataset, 18 | imgs_per_gpu, 19 | workers_per_gpu, 20 | num_gpus=1, 21 | dist=True, 22 | **kwargs): 23 | shuffle = kwargs.get('shuffle', True) 24 | if dist: 25 | rank, world_size = get_dist_info() 26 | if shuffle: 27 | sampler = DistributedGroupSampler(dataset, imgs_per_gpu, 28 | world_size, rank) 29 | else: 30 | sampler = DistributedSampler( 31 | dataset, world_size, rank, shuffle=False) 32 | batch_size = imgs_per_gpu 33 | num_workers = workers_per_gpu 34 | else: 35 | sampler = GroupSampler(dataset, imgs_per_gpu) if shuffle else None 36 | batch_size = num_gpus * imgs_per_gpu 37 | num_workers = num_gpus * workers_per_gpu 38 | 39 | data_loader = DataLoader( 40 | dataset, 41 | batch_size=batch_size, 42 | sampler=sampler, 43 | num_workers=num_workers, 44 | collate_fn=partial(collate, samples_per_gpu=imgs_per_gpu), 45 | pin_memory=False, 46 | **kwargs) 47 | 48 | return data_loader 49 | -------------------------------------------------------------------------------- /segmentation/mmdet/datasets/pipelines/__init__.py: -------------------------------------------------------------------------------- 1 | from .compose import Compose 2 | from .formating import (Collect, ImageToTensor, ToDataContainer, ToTensor, 3 | Transpose, to_tensor) 4 | from .formatingDCT import DefaultFormatBundleDCT 5 | from .loading import LoadAnnotations, LoadImageFromFile, LoadProposals 6 | from .test_aug import MultiScaleFlipAug 7 | from .transforms import (Expand, MinIoURandomCrop, Normalize, Pad, 8 | PhotoMetricDistortion, RandomCrop, RandomFlip, Resize, 9 | SegResizeFlipPadRescale) 10 | from .transformsDCT import ToDCT, ToDCTUpscaledStatic 11 | 12 | 13 | __all__ = [ 14 | 'Compose', 'to_tensor', 'ToTensor', 'ImageToTensor', 'ToDataContainer', 15 | 'Transpose', 'Collect', 'LoadAnnotations', 'LoadImageFromFile', 16 | 'LoadProposals', 'MultiScaleFlipAug', 'Resize', 'RandomFlip', 'Pad', 17 | 'RandomCrop', 'Normalize', 'SegResizeFlipPadRescale', 'MinIoURandomCrop', 18 | 'Expand', 'PhotoMetricDistortion', 'ToDCT', 'ToDCTUpscaledStatic', 'DefaultFormatBundleDCT' 19 | ] 20 | 21 | -------------------------------------------------------------------------------- /segmentation/mmdet/datasets/pipelines/compose.py: -------------------------------------------------------------------------------- 1 | import collections 2 | 3 | from mmdet.utils import build_from_cfg 4 | from ..registry import PIPELINES 5 | 6 | 7 | @PIPELINES.register_module 8 | class Compose(object): 9 | 10 | def __init__(self, transforms): 11 | assert isinstance(transforms, collections.abc.Sequence) 12 | self.transforms = [] 13 | for transform in transforms: 14 | if isinstance(transform, dict): 15 | transform = build_from_cfg(transform, PIPELINES) 16 | self.transforms.append(transform) 17 | elif callable(transform): 18 | self.transforms.append(transform) 19 | else: 20 | raise TypeError('transform must be callable or a dict') 21 | 22 | def __call__(self, data): 23 | for t in self.transforms: 24 | data = t(data) 25 | if data is None: 26 | return None 27 | return data 28 | 29 | def __repr__(self): 30 | format_string = self.__class__.__name__ + '(' 31 | for t in self.transforms: 32 | format_string += '\n' 33 | format_string += ' {0}'.format(t) 34 | format_string += '\n)' 35 | return format_string 36 | -------------------------------------------------------------------------------- /segmentation/mmdet/datasets/pipelines/dct_channel_index.py: -------------------------------------------------------------------------------- 1 | dct_channel_index = { 2 | 24: 3 | [ 4 | [0, 1, 2, 3, 5 | 8, 9, 10, 11, 6 | 16, 17, 18, 19, 7 | 24, 25, 26, 27], 8 | [0, 1, 9 | 8, 9], 10 | [0, 1, 11 | 8, 9] 12 | ], 13 | 32: 14 | [ 15 | [0, 1, 2, 3, 4, 16 | 8, 9, 10, 11, 12, 17 | 16, 17, 18, 19, 20, 18 | 24, 25, 26, 27, 19 | 32, 33, 34], 20 | [0, 1, 2, 21 | 8, 9], 22 | [0, 1, 2, 23 | 8, 9] 24 | ], 25 | 48: 26 | [ 27 | [0, 1, 2, 3, 4, 5, 28 | 8, 9, 10, 11, 12, 13, 29 | 16, 17, 18, 19, 20, 21, 30 | 24, 25, 26, 27, 28, 29, 31 | 32, 33, 34, 35, 32 | 40, 41, 42, 43], 33 | [0, 1, 2, 34 | 8, 9, 10, 35 | 16, 17], 36 | [0, 1, 2, 37 | 8, 9, 10, 38 | 16, 17] 39 | ], 40 | 64: 41 | [ 42 | [0, 1, 2, 3, 4, 5, 6, 43 | 8, 9, 10, 11, 12, 13, 14, 44 | 16, 17, 18, 19, 20, 21, 45 | 24, 25, 26, 27, 28, 29, 46 | 32, 33, 34, 35, 36, 37, 47 | 40, 41, 42, 43, 44, 45, 48 | 48, 49, 50, 51, 52, 53], 49 | [0, 1, 2, 50 | 8, 9, 10, 51 | 16, 17, 52 | 24, 25], 53 | [0, 1, 2, 54 | 8, 9, 10, 55 | 16, 17, 56 | 24, 25], 57 | ] 58 | } 59 | -------------------------------------------------------------------------------- /segmentation/mmdet/datasets/pipelines/test_aug.py: -------------------------------------------------------------------------------- 1 | import mmcv 2 | 3 | from ..registry import PIPELINES 4 | from .compose import Compose 5 | 6 | 7 | @PIPELINES.register_module 8 | class MultiScaleFlipAug(object): 9 | 10 | def __init__(self, transforms, img_scale, flip=False): 11 | self.transforms = Compose(transforms) 12 | self.img_scale = img_scale if isinstance(img_scale, 13 | list) else [img_scale] 14 | assert mmcv.is_list_of(self.img_scale, tuple) 15 | self.flip = flip 16 | 17 | def __call__(self, results): 18 | aug_data = [] 19 | flip_aug = [False, True] if self.flip else [False] 20 | for scale in self.img_scale: 21 | for flip in flip_aug: 22 | _results = results.copy() 23 | _results['scale'] = scale 24 | _results['flip'] = flip 25 | data = self.transforms(_results) 26 | aug_data.append(data) 27 | # list of dict to dict of list 28 | aug_data_dict = {key: [] for key in aug_data[0]} 29 | for data in aug_data: 30 | for key, val in data.items(): 31 | aug_data_dict[key].append(val) 32 | return aug_data_dict 33 | 34 | def __repr__(self): 35 | repr_str = self.__class__.__name__ 36 | repr_str += '(transforms={}, img_scale={}, flip={})'.format( 37 | self.transforms, self.img_scale, self.flip) 38 | return repr_str 39 | -------------------------------------------------------------------------------- /segmentation/mmdet/datasets/pipelines/transformsDCT.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import mmcv 3 | import numpy as np 4 | from imagecorruptions import corrupt 5 | from numpy import random 6 | 7 | from mmdet.core.evaluation.bbox_overlaps import bbox_overlaps 8 | from ..registry import PIPELINES 9 | from turbojpeg import TurboJPEG 10 | from jpeg2dct.numpy import load, loads 11 | from mmdet.datasets.pipelines.dct_channel_index import dct_channel_index 12 | from mmdet.utils.plot_dct import plot_dct 13 | 14 | INTER_MODE = {'NEAREST': cv2.INTER_NEAREST, 'BILINEAR': cv2.INTER_LINEAR, 'BICUBIC': cv2.INTER_CUBIC} 15 | PAD_MOD = {'constant': cv2.BORDER_CONSTANT, 16 | 'edge': cv2.BORDER_REPLICATE, 17 | 'reflect': cv2.BORDER_DEFAULT, 18 | 'symmetric': cv2.BORDER_REFLECT 19 | } 20 | 21 | @PIPELINES.register_module 22 | class ToDCT(object): 23 | def __init__(self): 24 | self.jpeg = TurboJPEG('/usr/lib/libturbojpeg.so') 25 | 26 | def __call__(self, results): 27 | img = np.ascontiguousarray(results['img'], dtype="uint8") 28 | img_encode = self.jpeg.encode(img, quality=100, jpeg_subsample=2) 29 | dct_y, dct_cb, dct_cr = loads(img_encode) # 28 30 | results['dct_y'] = dct_y 31 | results['dct_cb'] = dct_cb 32 | results['dct_cr'] = dct_cr 33 | return results 34 | 35 | @PIPELINES.register_module 36 | class ToDCTUpscaledStatic(object): 37 | def __init__(self, channels=None, is_test=False, interpolation='BILINEAR'): 38 | self.jpeg = TurboJPEG('/usr/lib/libturbojpeg.so') 39 | self.channels = channels 40 | self.is_test = is_test 41 | self.interpolation = interpolation 42 | 43 | if channels and channels != 192: 44 | self.subset_channel_index = dct_channel_index 45 | self.subset_y = self.subset_channel_index[channels][0] 46 | self.subset_cb = self.subset_channel_index[channels][1] 47 | self.subset_cr = self.subset_channel_index[channels][2] 48 | 49 | def __call__(self, results): 50 | h, w = results['img'].shape[:-1] 51 | if self.is_test: 52 | results['img_raw'] = results['img'] 53 | img_raw_4x = cv2.resize(results['img'], dsize=(w*2, h*2), interpolation=INTER_MODE[self.interpolation]) 54 | img_raw_8x = cv2.resize(results['img'], dsize=(w*4, h*4), interpolation=INTER_MODE[self.interpolation]) 55 | img_4x = np.ascontiguousarray(img_raw_4x, dtype="uint8") 56 | img_8x = np.ascontiguousarray(img_raw_8x, dtype="uint8") 57 | img_encode_4x = self.jpeg.encode(img_4x, quality=100, jpeg_subsample=2) 58 | img_encode_8x = self.jpeg.encode(img_8x, quality=100, jpeg_subsample=2) 59 | dct_y, _, _ = loads(img_encode_4x) # 28 60 | _, dct_cb, dct_cr = loads(img_encode_8x) # 28 61 | 62 | 63 | plot_dct(dct_y, results['img_info']['filename']) 64 | 65 | if self.channels == 192: 66 | results['img'] = np.concatenate((dct_y, dct_cb, dct_cr), axis=2) 67 | else: 68 | results['img'] = np.concatenate((dct_y[:, :, self.subset_y], dct_cb[:, :, self.subset_cb], 69 | dct_cr[:, :, self.subset_cr]), axis=2) 70 | 71 | 72 | return results 73 | -------------------------------------------------------------------------------- /segmentation/mmdet/datasets/registry.py: -------------------------------------------------------------------------------- 1 | from mmdet.utils import Registry 2 | 3 | DATASETS = Registry('dataset') 4 | PIPELINES = Registry('pipeline') 5 | -------------------------------------------------------------------------------- /segmentation/mmdet/datasets/voc.py: -------------------------------------------------------------------------------- 1 | from .registry import DATASETS 2 | from .xml_style import XMLDataset 3 | 4 | 5 | @DATASETS.register_module 6 | class VOCDataset(XMLDataset): 7 | 8 | CLASSES = ('aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 9 | 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 10 | 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 11 | 'tvmonitor') 12 | 13 | def __init__(self, **kwargs): 14 | super(VOCDataset, self).__init__(**kwargs) 15 | if 'VOC2007' in self.img_prefix: 16 | self.year = 2007 17 | elif 'VOC2012' in self.img_prefix: 18 | self.year = 2012 19 | else: 20 | raise ValueError('Cannot infer dataset year from img_prefix') 21 | -------------------------------------------------------------------------------- /segmentation/mmdet/datasets/wider_face.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | import xml.etree.ElementTree as ET 3 | 4 | import mmcv 5 | 6 | from .registry import DATASETS 7 | from .xml_style import XMLDataset 8 | 9 | 10 | @DATASETS.register_module 11 | class WIDERFaceDataset(XMLDataset): 12 | """ 13 | Reader for the WIDER Face dataset in PASCAL VOC format. 14 | Conversion scripts can be found in 15 | https://github.com/sovrasov/wider-face-pascal-voc-annotations 16 | """ 17 | CLASSES = ('face', ) 18 | 19 | def __init__(self, **kwargs): 20 | super(WIDERFaceDataset, self).__init__(**kwargs) 21 | 22 | def load_annotations(self, ann_file): 23 | img_infos = [] 24 | img_ids = mmcv.list_from_file(ann_file) 25 | for img_id in img_ids: 26 | filename = '{}.jpg'.format(img_id) 27 | xml_path = osp.join(self.img_prefix, 'Annotations', 28 | '{}.xml'.format(img_id)) 29 | tree = ET.parse(xml_path) 30 | root = tree.getroot() 31 | size = root.find('size') 32 | width = int(size.find('width').text) 33 | height = int(size.find('height').text) 34 | folder = root.find('folder').text 35 | img_infos.append( 36 | dict( 37 | id=img_id, 38 | filename=osp.join(folder, filename), 39 | width=width, 40 | height=height)) 41 | 42 | return img_infos 43 | -------------------------------------------------------------------------------- /segmentation/mmdet/datasets/xml_style.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | import xml.etree.ElementTree as ET 3 | 4 | import mmcv 5 | import numpy as np 6 | 7 | from .custom import CustomDataset 8 | from .registry import DATASETS 9 | 10 | 11 | @DATASETS.register_module 12 | class XMLDataset(CustomDataset): 13 | 14 | def __init__(self, min_size=None, **kwargs): 15 | super(XMLDataset, self).__init__(**kwargs) 16 | self.cat2label = {cat: i + 1 for i, cat in enumerate(self.CLASSES)} 17 | self.min_size = min_size 18 | 19 | def load_annotations(self, ann_file): 20 | img_infos = [] 21 | img_ids = mmcv.list_from_file(ann_file) 22 | for img_id in img_ids: 23 | filename = 'JPEGImages/{}.jpg'.format(img_id) 24 | xml_path = osp.join(self.img_prefix, 'Annotations', 25 | '{}.xml'.format(img_id)) 26 | tree = ET.parse(xml_path) 27 | root = tree.getroot() 28 | size = root.find('size') 29 | width = int(size.find('width').text) 30 | height = int(size.find('height').text) 31 | img_infos.append( 32 | dict(id=img_id, filename=filename, width=width, height=height)) 33 | return img_infos 34 | 35 | def get_ann_info(self, idx): 36 | img_id = self.img_infos[idx]['id'] 37 | xml_path = osp.join(self.img_prefix, 'Annotations', 38 | '{}.xml'.format(img_id)) 39 | tree = ET.parse(xml_path) 40 | root = tree.getroot() 41 | bboxes = [] 42 | labels = [] 43 | bboxes_ignore = [] 44 | labels_ignore = [] 45 | for obj in root.findall('object'): 46 | name = obj.find('name').text 47 | label = self.cat2label[name] 48 | difficult = int(obj.find('difficult').text) 49 | bnd_box = obj.find('bndbox') 50 | bbox = [ 51 | int(bnd_box.find('xmin').text), 52 | int(bnd_box.find('ymin').text), 53 | int(bnd_box.find('xmax').text), 54 | int(bnd_box.find('ymax').text) 55 | ] 56 | ignore = False 57 | if self.min_size: 58 | assert not self.test_mode 59 | w = bbox[2] - bbox[0] 60 | h = bbox[3] - bbox[1] 61 | if w < self.min_size or h < self.min_size: 62 | ignore = True 63 | if difficult or ignore: 64 | bboxes_ignore.append(bbox) 65 | labels_ignore.append(label) 66 | else: 67 | bboxes.append(bbox) 68 | labels.append(label) 69 | if not bboxes: 70 | bboxes = np.zeros((0, 4)) 71 | labels = np.zeros((0, )) 72 | else: 73 | bboxes = np.array(bboxes, ndmin=2) - 1 74 | labels = np.array(labels) 75 | if not bboxes_ignore: 76 | bboxes_ignore = np.zeros((0, 4)) 77 | labels_ignore = np.zeros((0, )) 78 | else: 79 | bboxes_ignore = np.array(bboxes_ignore, ndmin=2) - 1 80 | labels_ignore = np.array(labels_ignore) 81 | ann = dict( 82 | bboxes=bboxes.astype(np.float32), 83 | labels=labels.astype(np.int64), 84 | bboxes_ignore=bboxes_ignore.astype(np.float32), 85 | labels_ignore=labels_ignore.astype(np.int64)) 86 | return ann 87 | -------------------------------------------------------------------------------- /segmentation/mmdet/models/__init__.py: -------------------------------------------------------------------------------- 1 | from .anchor_heads import * # noqa: F401,F403 2 | from .backbones import * # noqa: F401,F403 3 | from .bbox_heads import * # noqa: F401,F403 4 | from .builder import (build_backbone, build_detector, build_head, build_loss, 5 | build_neck, build_roi_extractor, build_shared_head) 6 | from .detectors import * # noqa: F401,F403 7 | from .losses import * # noqa: F401,F403 8 | from .mask_heads import * # noqa: F401,F403 9 | from .necks import * # noqa: F401,F403 10 | from .registry import (BACKBONES, DETECTORS, HEADS, LOSSES, NECKS, 11 | ROI_EXTRACTORS, SHARED_HEADS) 12 | from .roi_extractors import * # noqa: F401,F403 13 | from .shared_heads import * # noqa: F401,F403 14 | 15 | __all__ = [ 16 | 'BACKBONES', 'NECKS', 'ROI_EXTRACTORS', 'SHARED_HEADS', 'HEADS', 'LOSSES', 17 | 'DETECTORS', 'build_backbone', 'build_neck', 'build_roi_extractor', 18 | 'build_shared_head', 'build_head', 'build_loss', 'build_detector' 19 | ] 20 | -------------------------------------------------------------------------------- /segmentation/mmdet/models/anchor_heads/__init__.py: -------------------------------------------------------------------------------- 1 | from .anchor_head import AnchorHead 2 | from .fcos_head import FCOSHead 3 | from .ga_retina_head import GARetinaHead 4 | from .ga_rpn_head import GARPNHead 5 | from .guided_anchor_head import FeatureAdaption, GuidedAnchorHead 6 | from .reppoints_head import RepPointsHead 7 | from .retina_head import RetinaHead 8 | from .rpn_head import RPNHead 9 | from .ssd_head import SSDHead 10 | 11 | __all__ = [ 12 | 'AnchorHead', 'GuidedAnchorHead', 'FeatureAdaption', 'RPNHead', 13 | 'GARPNHead', 'RetinaHead', 'GARetinaHead', 'SSDHead', 'FCOSHead', 14 | 'RepPointsHead' 15 | ] 16 | -------------------------------------------------------------------------------- /segmentation/mmdet/models/anchor_heads/retina_head.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch.nn as nn 3 | from mmcv.cnn import normal_init 4 | 5 | from ..registry import HEADS 6 | from ..utils import ConvModule, bias_init_with_prob 7 | from .anchor_head import AnchorHead 8 | 9 | 10 | @HEADS.register_module 11 | class RetinaHead(AnchorHead): 12 | 13 | def __init__(self, 14 | num_classes, 15 | in_channels, 16 | stacked_convs=4, 17 | octave_base_scale=4, 18 | scales_per_octave=3, 19 | conv_cfg=None, 20 | norm_cfg=None, 21 | **kwargs): 22 | self.stacked_convs = stacked_convs 23 | self.octave_base_scale = octave_base_scale 24 | self.scales_per_octave = scales_per_octave 25 | self.conv_cfg = conv_cfg 26 | self.norm_cfg = norm_cfg 27 | octave_scales = np.array( 28 | [2**(i / scales_per_octave) for i in range(scales_per_octave)]) 29 | anchor_scales = octave_scales * octave_base_scale 30 | super(RetinaHead, self).__init__( 31 | num_classes, in_channels, anchor_scales=anchor_scales, **kwargs) 32 | 33 | def _init_layers(self): 34 | self.relu = nn.ReLU(inplace=True) 35 | self.cls_convs = nn.ModuleList() 36 | self.reg_convs = nn.ModuleList() 37 | for i in range(self.stacked_convs): 38 | chn = self.in_channels if i == 0 else self.feat_channels 39 | self.cls_convs.append( 40 | ConvModule( 41 | chn, 42 | self.feat_channels, 43 | 3, 44 | stride=1, 45 | padding=1, 46 | conv_cfg=self.conv_cfg, 47 | norm_cfg=self.norm_cfg)) 48 | self.reg_convs.append( 49 | ConvModule( 50 | chn, 51 | self.feat_channels, 52 | 3, 53 | stride=1, 54 | padding=1, 55 | conv_cfg=self.conv_cfg, 56 | norm_cfg=self.norm_cfg)) 57 | self.retina_cls = nn.Conv2d( 58 | self.feat_channels, 59 | self.num_anchors * self.cls_out_channels, 60 | 3, 61 | padding=1) 62 | self.retina_reg = nn.Conv2d( 63 | self.feat_channels, self.num_anchors * 4, 3, padding=1) 64 | 65 | def init_weights(self): 66 | for m in self.cls_convs: 67 | normal_init(m.conv, std=0.01) 68 | for m in self.reg_convs: 69 | normal_init(m.conv, std=0.01) 70 | bias_cls = bias_init_with_prob(0.01) 71 | normal_init(self.retina_cls, std=0.01, bias=bias_cls) 72 | normal_init(self.retina_reg, std=0.01) 73 | 74 | def forward_single(self, x): 75 | cls_feat = x 76 | reg_feat = x 77 | for cls_conv in self.cls_convs: 78 | cls_feat = cls_conv(cls_feat) 79 | for reg_conv in self.reg_convs: 80 | reg_feat = reg_conv(reg_feat) 81 | cls_score = self.retina_cls(cls_feat) 82 | bbox_pred = self.retina_reg(reg_feat) 83 | return cls_score, bbox_pred 84 | -------------------------------------------------------------------------------- /segmentation/mmdet/models/backbones/__init__.py: -------------------------------------------------------------------------------- 1 | from .hrnet import HRNet 2 | from .resnetDCT import ResNetDCT, make_res_layer 3 | from .resnet import ResNet 4 | from .resnet_static import ResNetUpscaledStatic 5 | from .resnet_dynamic import ResNetUpscaledDynamic 6 | from .resnetDCT_dynamic import ResNetDCT_Dynamic 7 | from .resnext import ResNeXt 8 | from .ssd_vgg import SSDVGG 9 | 10 | __all__ = ['ResNet', 'ResNetDCT', 'ResNetUpscaledStatic', 'ResNetUpscaledDynamic', 11 | 'ResNetDCT_Dynamic', 'make_res_layer', 'ResNeXt', 'SSDVGG','HRNet'] 12 | -------------------------------------------------------------------------------- /segmentation/mmdet/models/backbones/gumbel.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | from torch.autograd import Variable 4 | 5 | class GumbleSoftmax(torch.nn.Module): 6 | def __init__(self, hard=False): 7 | super(GumbleSoftmax, self).__init__() 8 | self.hard = hard 9 | 10 | def sample_gumbel(self, shape, eps=1e-10): 11 | """Sample from Gumbel(0, 1)""" 12 | noise = torch.rand(shape) 13 | noise.add_(eps).log_().neg_() 14 | noise.add_(eps).log_().neg_() 15 | if self.gpu: 16 | return noise.cuda() 17 | else: 18 | return noise 19 | 20 | def sample_gumbel_like(self, template_tensor, eps=1e-10): 21 | uniform_samples_tensor = template_tensor.clone().uniform_() 22 | gumble_samples_tensor = - torch.log(eps - torch.log(uniform_samples_tensor + eps)) 23 | return gumble_samples_tensor 24 | 25 | def gumbel_softmax_sample(self, logits, temperature): 26 | """ Draw a sample from the Gumbel-Softmax distribution""" 27 | dim = logits.size(2) 28 | gumble_samples_tensor = self.sample_gumbel_like(logits.data) 29 | gumble_trick_log_prob_samples = logits + gumble_samples_tensor 30 | soft_samples = F.softmax(gumble_trick_log_prob_samples / temperature, dim) 31 | return soft_samples 32 | 33 | def gumbel_softmax(self, logits, temperature, hard=False): 34 | """Sample from the Gumbel-Softmax distribution and optionally discretize. 35 | Args: 36 | logits: [batch_size, n_class] unnormalized log-probslibaba 37 | temperature: non-negative scalar 38 | hard: if True, take argmax, but differentiate w.r.t. soft sample y 39 | Returns: 40 | [batch_size, n_class] sample from the Gumbel-Softmax distribution. 41 | If hard=True, then the returned sample will be one-hot, otherwise it will 42 | be a probabilitiy distribution that sums to 1 across classes 43 | """ 44 | y = self.gumbel_softmax_sample(logits, temperature) 45 | if hard: 46 | # block layer 47 | # _, max_value_indexes = y.data.max(1, keepdim=True) 48 | # y_hard = logits.data.clone().zero_().scatter_(1, max_value_indexes, 1) 49 | # block channel 50 | _, max_value_indexes = y.data.max(2, keepdim=True) 51 | y_hard = logits.data.clone().zero_().scatter_(2, max_value_indexes, 1) 52 | y = Variable(y_hard - y.data) + y 53 | return y 54 | 55 | def forward(self, logits, temp=1, force_hard=False): 56 | samplesize = logits.size() 57 | 58 | if self.training and not force_hard: 59 | return self.gumbel_softmax(logits, temperature=1, hard=False) 60 | else: 61 | return self.gumbel_softmax(logits, temperature=1, hard=True) -------------------------------------------------------------------------------- /segmentation/mmdet/models/bbox_heads/__init__.py: -------------------------------------------------------------------------------- 1 | from .bbox_head import BBoxHead 2 | from .convfc_bbox_head import ConvFCBBoxHead, SharedFCBBoxHead 3 | from .double_bbox_head import DoubleConvFCBBoxHead 4 | 5 | __all__ = [ 6 | 'BBoxHead', 'ConvFCBBoxHead', 'SharedFCBBoxHead', 'DoubleConvFCBBoxHead' 7 | ] 8 | -------------------------------------------------------------------------------- /segmentation/mmdet/models/builder.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | 3 | from mmdet.utils import build_from_cfg 4 | from .registry import (BACKBONES, DETECTORS, HEADS, LOSSES, NECKS, 5 | ROI_EXTRACTORS, SHARED_HEADS) 6 | 7 | 8 | def build(cfg, registry, default_args=None): 9 | if isinstance(cfg, list): 10 | modules = [ 11 | build_from_cfg(cfg_, registry, default_args) for cfg_ in cfg 12 | ] 13 | return nn.Sequential(*modules) 14 | else: 15 | return build_from_cfg(cfg, registry, default_args) 16 | 17 | 18 | def build_backbone(cfg): 19 | return build(cfg, BACKBONES) 20 | 21 | 22 | def build_neck(cfg): 23 | return build(cfg, NECKS) 24 | 25 | 26 | def build_roi_extractor(cfg): 27 | return build(cfg, ROI_EXTRACTORS) 28 | 29 | 30 | def build_shared_head(cfg): 31 | return build(cfg, SHARED_HEADS) 32 | 33 | 34 | def build_head(cfg): 35 | return build(cfg, HEADS) 36 | 37 | 38 | def build_loss(cfg): 39 | return build(cfg, LOSSES) 40 | 41 | 42 | def build_detector(cfg, train_cfg=None, test_cfg=None): 43 | return build(cfg, DETECTORS, dict(train_cfg=train_cfg, test_cfg=test_cfg)) 44 | -------------------------------------------------------------------------------- /segmentation/mmdet/models/detectors/__init__.py: -------------------------------------------------------------------------------- 1 | from .base import BaseDetector 2 | from .cascade_rcnn import CascadeRCNN 3 | from .double_head_rcnn import DoubleHeadRCNN 4 | from .fast_rcnn import FastRCNN 5 | from .faster_rcnn import FasterRCNN 6 | from .fcos import FCOS 7 | from .grid_rcnn import GridRCNN 8 | from .htc import HybridTaskCascade 9 | from .mask_rcnn import MaskRCNN 10 | from .mask_scoring_rcnn import MaskScoringRCNN 11 | from .reppoints_detector import RepPointsDetector 12 | from .retinanet import RetinaNet 13 | from .rpn import RPN 14 | from .single_stage import SingleStageDetector 15 | from .two_stage import TwoStageDetector 16 | 17 | __all__ = [ 18 | 'BaseDetector', 'SingleStageDetector', 'TwoStageDetector', 'RPN', 19 | 'FastRCNN', 'FasterRCNN', 'MaskRCNN', 'CascadeRCNN', 'HybridTaskCascade', 20 | 'DoubleHeadRCNN', 'RetinaNet', 'FCOS', 'GridRCNN', 'MaskScoringRCNN', 21 | 'RepPointsDetector' 22 | ] 23 | -------------------------------------------------------------------------------- /segmentation/mmdet/models/detectors/fast_rcnn.py: -------------------------------------------------------------------------------- 1 | from ..registry import DETECTORS 2 | from .two_stage import TwoStageDetector 3 | 4 | 5 | @DETECTORS.register_module 6 | class FastRCNN(TwoStageDetector): 7 | 8 | def __init__(self, 9 | backbone, 10 | bbox_roi_extractor, 11 | bbox_head, 12 | train_cfg, 13 | test_cfg, 14 | neck=None, 15 | shared_head=None, 16 | mask_roi_extractor=None, 17 | mask_head=None, 18 | pretrained=None): 19 | super(FastRCNN, self).__init__( 20 | backbone=backbone, 21 | neck=neck, 22 | shared_head=shared_head, 23 | bbox_roi_extractor=bbox_roi_extractor, 24 | bbox_head=bbox_head, 25 | train_cfg=train_cfg, 26 | test_cfg=test_cfg, 27 | mask_roi_extractor=mask_roi_extractor, 28 | mask_head=mask_head, 29 | pretrained=pretrained) 30 | 31 | def forward_test(self, imgs, img_metas, proposals, **kwargs): 32 | for var, name in [(imgs, 'imgs'), (img_metas, 'img_metas')]: 33 | if not isinstance(var, list): 34 | raise TypeError('{} must be a list, but got {}'.format( 35 | name, type(var))) 36 | 37 | num_augs = len(imgs) 38 | if num_augs != len(img_metas): 39 | raise ValueError( 40 | 'num of augmentations ({}) != num of image meta ({})'.format( 41 | len(imgs), len(img_metas))) 42 | # TODO: remove the restriction of imgs_per_gpu == 1 when prepared 43 | imgs_per_gpu = imgs[0].size(0) 44 | assert imgs_per_gpu == 1 45 | 46 | if num_augs == 1: 47 | return self.simple_test(imgs[0], img_metas[0], proposals[0], 48 | **kwargs) 49 | else: 50 | return self.aug_test(imgs, img_metas, proposals, **kwargs) 51 | -------------------------------------------------------------------------------- /segmentation/mmdet/models/detectors/faster_rcnn.py: -------------------------------------------------------------------------------- 1 | from ..registry import DETECTORS 2 | from .two_stage import TwoStageDetector 3 | 4 | 5 | @DETECTORS.register_module 6 | class FasterRCNN(TwoStageDetector): 7 | 8 | def __init__(self, 9 | backbone, 10 | rpn_head, 11 | bbox_roi_extractor, 12 | bbox_head, 13 | train_cfg, 14 | test_cfg, 15 | neck=None, 16 | shared_head=None, 17 | pretrained=None): 18 | super(FasterRCNN, self).__init__( 19 | backbone=backbone, 20 | neck=neck, 21 | shared_head=shared_head, 22 | rpn_head=rpn_head, 23 | bbox_roi_extractor=bbox_roi_extractor, 24 | bbox_head=bbox_head, 25 | train_cfg=train_cfg, 26 | test_cfg=test_cfg, 27 | pretrained=pretrained) 28 | -------------------------------------------------------------------------------- /segmentation/mmdet/models/detectors/fcos.py: -------------------------------------------------------------------------------- 1 | from ..registry import DETECTORS 2 | from .single_stage import SingleStageDetector 3 | 4 | 5 | @DETECTORS.register_module 6 | class FCOS(SingleStageDetector): 7 | 8 | def __init__(self, 9 | backbone, 10 | neck, 11 | bbox_head, 12 | train_cfg=None, 13 | test_cfg=None, 14 | pretrained=None): 15 | super(FCOS, self).__init__(backbone, neck, bbox_head, train_cfg, 16 | test_cfg, pretrained) 17 | -------------------------------------------------------------------------------- /segmentation/mmdet/models/detectors/mask_rcnn.py: -------------------------------------------------------------------------------- 1 | from ..registry import DETECTORS 2 | from .two_stage import TwoStageDetector 3 | 4 | 5 | @DETECTORS.register_module 6 | class MaskRCNN(TwoStageDetector): 7 | 8 | def __init__(self, 9 | backbone, 10 | rpn_head, 11 | bbox_roi_extractor, 12 | bbox_head, 13 | mask_roi_extractor, 14 | mask_head, 15 | train_cfg, 16 | test_cfg, 17 | neck=None, 18 | shared_head=None, 19 | pretrained=None): 20 | super(MaskRCNN, self).__init__( 21 | backbone=backbone, 22 | neck=neck, 23 | shared_head=shared_head, 24 | rpn_head=rpn_head, 25 | bbox_roi_extractor=bbox_roi_extractor, 26 | bbox_head=bbox_head, 27 | mask_roi_extractor=mask_roi_extractor, 28 | mask_head=mask_head, 29 | train_cfg=train_cfg, 30 | test_cfg=test_cfg, 31 | pretrained=pretrained) 32 | -------------------------------------------------------------------------------- /segmentation/mmdet/models/detectors/reppoints_detector.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from mmdet.core import bbox2result, bbox_mapping_back, multiclass_nms 4 | from ..registry import DETECTORS 5 | from .single_stage import SingleStageDetector 6 | 7 | 8 | @DETECTORS.register_module 9 | class RepPointsDetector(SingleStageDetector): 10 | """RepPoints: Point Set Representation for Object Detection. 11 | 12 | This detector is the implementation of: 13 | - RepPoints detector (https://arxiv.org/pdf/1904.11490) 14 | """ 15 | 16 | def __init__(self, 17 | backbone, 18 | neck, 19 | bbox_head, 20 | train_cfg=None, 21 | test_cfg=None, 22 | pretrained=None): 23 | super(RepPointsDetector, 24 | self).__init__(backbone, neck, bbox_head, train_cfg, test_cfg, 25 | pretrained) 26 | 27 | def merge_aug_results(self, aug_bboxes, aug_scores, img_metas): 28 | """Merge augmented detection bboxes and scores. 29 | 30 | Args: 31 | aug_bboxes (list[Tensor]): shape (n, 4*#class) 32 | aug_scores (list[Tensor] or None): shape (n, #class) 33 | img_shapes (list[Tensor]): shape (3, ). 34 | 35 | Returns: 36 | tuple: (bboxes, scores) 37 | """ 38 | recovered_bboxes = [] 39 | for bboxes, img_info in zip(aug_bboxes, img_metas): 40 | img_shape = img_info[0]['img_shape'] 41 | scale_factor = img_info[0]['scale_factor'] 42 | flip = img_info[0]['flip'] 43 | bboxes = bbox_mapping_back(bboxes, img_shape, scale_factor, flip) 44 | recovered_bboxes.append(bboxes) 45 | bboxes = torch.cat(recovered_bboxes, dim=0) 46 | if aug_scores is None: 47 | return bboxes 48 | else: 49 | scores = torch.cat(aug_scores, dim=0) 50 | return bboxes, scores 51 | 52 | def aug_test(self, imgs, img_metas, rescale=False): 53 | # recompute feats to save memory 54 | feats = self.extract_feats(imgs) 55 | 56 | aug_bboxes = [] 57 | aug_scores = [] 58 | for x, img_meta in zip(feats, img_metas): 59 | # only one image in the batch 60 | outs = self.bbox_head(x) 61 | bbox_inputs = outs + (img_meta, self.test_cfg, False, False) 62 | det_bboxes, det_scores = self.bbox_head.get_bboxes(*bbox_inputs)[0] 63 | aug_bboxes.append(det_bboxes) 64 | aug_scores.append(det_scores) 65 | 66 | # after merging, bboxes will be rescaled to the original image size 67 | merged_bboxes, merged_scores = self.merge_aug_results( 68 | aug_bboxes, aug_scores, img_metas) 69 | det_bboxes, det_labels = multiclass_nms(merged_bboxes, merged_scores, 70 | self.test_cfg.score_thr, 71 | self.test_cfg.nms, 72 | self.test_cfg.max_per_img) 73 | 74 | if rescale: 75 | _det_bboxes = det_bboxes 76 | else: 77 | _det_bboxes = det_bboxes.clone() 78 | _det_bboxes[:, :4] *= img_metas[0][0]['scale_factor'] 79 | bbox_results = bbox2result(_det_bboxes, det_labels, 80 | self.bbox_head.num_classes) 81 | return bbox_results 82 | -------------------------------------------------------------------------------- /segmentation/mmdet/models/detectors/retinanet.py: -------------------------------------------------------------------------------- 1 | from ..registry import DETECTORS 2 | from .single_stage import SingleStageDetector 3 | 4 | 5 | @DETECTORS.register_module 6 | class RetinaNet(SingleStageDetector): 7 | 8 | def __init__(self, 9 | backbone, 10 | neck, 11 | bbox_head, 12 | train_cfg=None, 13 | test_cfg=None, 14 | pretrained=None): 15 | super(RetinaNet, self).__init__(backbone, neck, bbox_head, train_cfg, 16 | test_cfg, pretrained) 17 | -------------------------------------------------------------------------------- /segmentation/mmdet/models/losses/__init__.py: -------------------------------------------------------------------------------- 1 | from .accuracy import Accuracy, accuracy 2 | from .balanced_l1_loss import BalancedL1Loss, balanced_l1_loss 3 | from .cross_entropy_loss import (CrossEntropyLoss, binary_cross_entropy, 4 | cross_entropy, mask_cross_entropy) 5 | from .focal_loss import FocalLoss, sigmoid_focal_loss 6 | from .ghm_loss import GHMC, GHMR 7 | from .iou_loss import BoundedIoULoss, IoULoss, bounded_iou_loss, iou_loss 8 | from .mse_loss import MSELoss, mse_loss 9 | from .smooth_l1_loss import SmoothL1Loss, smooth_l1_loss 10 | from .utils import reduce_loss, weight_reduce_loss, weighted_loss 11 | 12 | __all__ = [ 13 | 'accuracy', 'Accuracy', 'cross_entropy', 'binary_cross_entropy', 14 | 'mask_cross_entropy', 'CrossEntropyLoss', 'sigmoid_focal_loss', 15 | 'FocalLoss', 'smooth_l1_loss', 'SmoothL1Loss', 'balanced_l1_loss', 16 | 'BalancedL1Loss', 'mse_loss', 'MSELoss', 'iou_loss', 'bounded_iou_loss', 17 | 'IoULoss', 'BoundedIoULoss', 'GHMC', 'GHMR', 'reduce_loss', 18 | 'weight_reduce_loss', 'weighted_loss' 19 | ] 20 | -------------------------------------------------------------------------------- /segmentation/mmdet/models/losses/accuracy.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | 4 | def accuracy(pred, target, topk=1): 5 | assert isinstance(topk, (int, tuple)) 6 | if isinstance(topk, int): 7 | topk = (topk, ) 8 | return_single = True 9 | else: 10 | return_single = False 11 | 12 | maxk = max(topk) 13 | _, pred_label = pred.topk(maxk, dim=1) 14 | pred_label = pred_label.t() 15 | correct = pred_label.eq(target.view(1, -1).expand_as(pred_label)) 16 | 17 | res = [] 18 | for k in topk: 19 | correct_k = correct[:k].view(-1).float().sum(0, keepdim=True) 20 | res.append(correct_k.mul_(100.0 / pred.size(0))) 21 | return res[0] if return_single else res 22 | 23 | 24 | class Accuracy(nn.Module): 25 | 26 | def __init__(self, topk=(1, )): 27 | super().__init__() 28 | self.topk = topk 29 | 30 | def forward(self, pred, target): 31 | return accuracy(pred, target, self.topk) 32 | -------------------------------------------------------------------------------- /segmentation/mmdet/models/losses/balanced_l1_loss.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn as nn 4 | 5 | from ..registry import LOSSES 6 | from .utils import weighted_loss 7 | 8 | 9 | @weighted_loss 10 | def balanced_l1_loss(pred, 11 | target, 12 | beta=1.0, 13 | alpha=0.5, 14 | gamma=1.5, 15 | reduction='mean'): 16 | assert beta > 0 17 | assert pred.size() == target.size() and target.numel() > 0 18 | 19 | diff = torch.abs(pred - target) 20 | b = np.e**(gamma / alpha) - 1 21 | loss = torch.where( 22 | diff < beta, alpha / b * 23 | (b * diff + 1) * torch.log(b * diff / beta + 1) - alpha * diff, 24 | gamma * diff + gamma / b - alpha * beta) 25 | 26 | return loss 27 | 28 | 29 | @LOSSES.register_module 30 | class BalancedL1Loss(nn.Module): 31 | """Balanced L1 Loss 32 | 33 | arXiv: https://arxiv.org/pdf/1904.02701.pdf (CVPR 2019) 34 | """ 35 | 36 | def __init__(self, 37 | alpha=0.5, 38 | gamma=1.5, 39 | beta=1.0, 40 | reduction='mean', 41 | loss_weight=1.0): 42 | super(BalancedL1Loss, self).__init__() 43 | self.alpha = alpha 44 | self.gamma = gamma 45 | self.beta = beta 46 | self.reduction = reduction 47 | self.loss_weight = loss_weight 48 | 49 | def forward(self, 50 | pred, 51 | target, 52 | weight=None, 53 | avg_factor=None, 54 | reduction_override=None, 55 | **kwargs): 56 | assert reduction_override in (None, 'none', 'mean', 'sum') 57 | reduction = ( 58 | reduction_override if reduction_override else self.reduction) 59 | loss_bbox = self.loss_weight * balanced_l1_loss( 60 | pred, 61 | target, 62 | weight, 63 | alpha=self.alpha, 64 | gamma=self.gamma, 65 | beta=self.beta, 66 | reduction=reduction, 67 | avg_factor=avg_factor, 68 | **kwargs) 69 | return loss_bbox 70 | -------------------------------------------------------------------------------- /segmentation/mmdet/models/losses/focal_loss.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch.nn.functional as F 3 | 4 | from mmdet.ops import sigmoid_focal_loss as _sigmoid_focal_loss 5 | from ..registry import LOSSES 6 | from .utils import weight_reduce_loss 7 | 8 | 9 | # This method is only for debugging 10 | def py_sigmoid_focal_loss(pred, 11 | target, 12 | weight=None, 13 | gamma=2.0, 14 | alpha=0.25, 15 | reduction='mean', 16 | avg_factor=None): 17 | pred_sigmoid = pred.sigmoid() 18 | target = target.type_as(pred) 19 | pt = (1 - pred_sigmoid) * target + pred_sigmoid * (1 - target) 20 | focal_weight = (alpha * target + (1 - alpha) * 21 | (1 - target)) * pt.pow(gamma) 22 | loss = F.binary_cross_entropy_with_logits( 23 | pred, target, reduction='none') * focal_weight 24 | loss = weight_reduce_loss(loss, weight, reduction, avg_factor) 25 | return loss 26 | 27 | 28 | def sigmoid_focal_loss(pred, 29 | target, 30 | weight=None, 31 | gamma=2.0, 32 | alpha=0.25, 33 | reduction='mean', 34 | avg_factor=None): 35 | # Function.apply does not accept keyword arguments, so the decorator 36 | # "weighted_loss" is not applicable 37 | loss = _sigmoid_focal_loss(pred, target, gamma, alpha) 38 | # TODO: find a proper way to handle the shape of weight 39 | if weight is not None: 40 | weight = weight.view(-1, 1) 41 | loss = weight_reduce_loss(loss, weight, reduction, avg_factor) 42 | return loss 43 | 44 | 45 | @LOSSES.register_module 46 | class FocalLoss(nn.Module): 47 | 48 | def __init__(self, 49 | use_sigmoid=True, 50 | gamma=2.0, 51 | alpha=0.25, 52 | reduction='mean', 53 | loss_weight=1.0): 54 | super(FocalLoss, self).__init__() 55 | assert use_sigmoid is True, 'Only sigmoid focal loss supported now.' 56 | self.use_sigmoid = use_sigmoid 57 | self.gamma = gamma 58 | self.alpha = alpha 59 | self.reduction = reduction 60 | self.loss_weight = loss_weight 61 | 62 | def forward(self, 63 | pred, 64 | target, 65 | weight=None, 66 | avg_factor=None, 67 | reduction_override=None): 68 | assert reduction_override in (None, 'none', 'mean', 'sum') 69 | reduction = ( 70 | reduction_override if reduction_override else self.reduction) 71 | if self.use_sigmoid: 72 | loss_cls = self.loss_weight * sigmoid_focal_loss( 73 | pred, 74 | target, 75 | weight, 76 | gamma=self.gamma, 77 | alpha=self.alpha, 78 | reduction=reduction, 79 | avg_factor=avg_factor) 80 | else: 81 | raise NotImplementedError 82 | return loss_cls 83 | -------------------------------------------------------------------------------- /segmentation/mmdet/models/losses/mse_loss.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch.nn.functional as F 3 | 4 | from ..registry import LOSSES 5 | from .utils import weighted_loss 6 | 7 | mse_loss = weighted_loss(F.mse_loss) 8 | 9 | 10 | @LOSSES.register_module 11 | class MSELoss(nn.Module): 12 | 13 | def __init__(self, reduction='mean', loss_weight=1.0): 14 | super().__init__() 15 | self.reduction = reduction 16 | self.loss_weight = loss_weight 17 | 18 | def forward(self, pred, target, weight=None, avg_factor=None): 19 | loss = self.loss_weight * mse_loss( 20 | pred, 21 | target, 22 | weight, 23 | reduction=self.reduction, 24 | avg_factor=avg_factor) 25 | return loss 26 | -------------------------------------------------------------------------------- /segmentation/mmdet/models/losses/smooth_l1_loss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | from ..registry import LOSSES 5 | from .utils import weighted_loss 6 | 7 | 8 | @weighted_loss 9 | def smooth_l1_loss(pred, target, beta=1.0): 10 | assert beta > 0 11 | assert pred.size() == target.size() and target.numel() > 0 12 | diff = torch.abs(pred - target) 13 | loss = torch.where(diff < beta, 0.5 * diff * diff / beta, 14 | diff - 0.5 * beta) 15 | return loss 16 | 17 | 18 | @LOSSES.register_module 19 | class SmoothL1Loss(nn.Module): 20 | 21 | def __init__(self, beta=1.0, reduction='mean', loss_weight=1.0): 22 | super(SmoothL1Loss, self).__init__() 23 | self.beta = beta 24 | self.reduction = reduction 25 | self.loss_weight = loss_weight 26 | 27 | def forward(self, 28 | pred, 29 | target, 30 | weight=None, 31 | avg_factor=None, 32 | reduction_override=None, 33 | **kwargs): 34 | assert reduction_override in (None, 'none', 'mean', 'sum') 35 | reduction = ( 36 | reduction_override if reduction_override else self.reduction) 37 | loss_bbox = self.loss_weight * smooth_l1_loss( 38 | pred, 39 | target, 40 | weight, 41 | beta=self.beta, 42 | reduction=reduction, 43 | avg_factor=avg_factor, 44 | **kwargs) 45 | return loss_bbox 46 | -------------------------------------------------------------------------------- /segmentation/mmdet/models/losses/utils.py: -------------------------------------------------------------------------------- 1 | import functools 2 | 3 | import torch.nn.functional as F 4 | 5 | 6 | def reduce_loss(loss, reduction): 7 | """Reduce loss as specified. 8 | 9 | Args: 10 | loss (Tensor): Elementwise loss tensor. 11 | reduction (str): Options are "none", "mean" and "sum". 12 | 13 | Return: 14 | Tensor: Reduced loss tensor. 15 | """ 16 | reduction_enum = F._Reduction.get_enum(reduction) 17 | # none: 0, elementwise_mean:1, sum: 2 18 | if reduction_enum == 0: 19 | return loss 20 | elif reduction_enum == 1: 21 | return loss.mean() 22 | elif reduction_enum == 2: 23 | return loss.sum() 24 | 25 | 26 | def weight_reduce_loss(loss, weight=None, reduction='mean', avg_factor=None): 27 | """Apply element-wise weight and reduce loss. 28 | 29 | Args: 30 | loss (Tensor): Element-wise loss. 31 | weight (Tensor): Element-wise weights. 32 | reduction (str): Same as built-in losses of PyTorch. 33 | avg_factor (float): Avarage factor when computing the mean of losses. 34 | 35 | Returns: 36 | Tensor: Processed loss values. 37 | """ 38 | # if weight is specified, apply element-wise weight 39 | if weight is not None: 40 | loss = loss * weight 41 | 42 | # if avg_factor is not specified, just reduce the loss 43 | if avg_factor is None: 44 | loss = reduce_loss(loss, reduction) 45 | else: 46 | # if reduction is mean, then average the loss by avg_factor 47 | if reduction == 'mean': 48 | loss = loss.sum() / avg_factor 49 | # if reduction is 'none', then do nothing, otherwise raise an error 50 | elif reduction != 'none': 51 | raise ValueError('avg_factor can not be used with reduction="sum"') 52 | return loss 53 | 54 | 55 | def weighted_loss(loss_func): 56 | """Create a weighted version of a given loss function. 57 | 58 | To use this decorator, the loss function must have the signature like 59 | `loss_func(pred, target, **kwargs)`. The function only needs to compute 60 | element-wise loss without any reduction. This decorator will add weight 61 | and reduction arguments to the function. The decorated function will have 62 | the signature like `loss_func(pred, target, weight=None, reduction='mean', 63 | avg_factor=None, **kwargs)`. 64 | 65 | :Example: 66 | 67 | >>> @weighted_loss 68 | >>> def l1_loss(pred, target): 69 | >>> return (pred - target).abs() 70 | 71 | >>> pred = torch.Tensor([0, 2, 3]) 72 | >>> target = torch.Tensor([1, 1, 1]) 73 | >>> weight = torch.Tensor([1, 0, 1]) 74 | 75 | >>> l1_loss(pred, target) 76 | tensor(1.3333) 77 | >>> l1_loss(pred, target, weight) 78 | tensor(1.) 79 | >>> l1_loss(pred, target, reduction='none') 80 | tensor([1., 1., 2.]) 81 | >>> l1_loss(pred, target, weight, avg_factor=2) 82 | tensor(1.5000) 83 | """ 84 | 85 | @functools.wraps(loss_func) 86 | def wrapper(pred, 87 | target, 88 | weight=None, 89 | reduction='mean', 90 | avg_factor=None, 91 | **kwargs): 92 | # get element-wise loss 93 | loss = loss_func(pred, target, **kwargs) 94 | loss = weight_reduce_loss(loss, weight, reduction, avg_factor) 95 | return loss 96 | 97 | return wrapper 98 | -------------------------------------------------------------------------------- /segmentation/mmdet/models/mask_heads/__init__.py: -------------------------------------------------------------------------------- 1 | from .fcn_mask_head import FCNMaskHead 2 | from .fused_semantic_head import FusedSemanticHead 3 | from .grid_head import GridHead 4 | from .htc_mask_head import HTCMaskHead 5 | from .maskiou_head import MaskIoUHead 6 | 7 | __all__ = [ 8 | 'FCNMaskHead', 'HTCMaskHead', 'FusedSemanticHead', 'GridHead', 9 | 'MaskIoUHead' 10 | ] 11 | -------------------------------------------------------------------------------- /segmentation/mmdet/models/mask_heads/htc_mask_head.py: -------------------------------------------------------------------------------- 1 | from ..registry import HEADS 2 | from ..utils import ConvModule 3 | from .fcn_mask_head import FCNMaskHead 4 | 5 | 6 | @HEADS.register_module 7 | class HTCMaskHead(FCNMaskHead): 8 | 9 | def __init__(self, *args, **kwargs): 10 | super(HTCMaskHead, self).__init__(*args, **kwargs) 11 | self.conv_res = ConvModule( 12 | self.conv_out_channels, 13 | self.conv_out_channels, 14 | 1, 15 | conv_cfg=self.conv_cfg, 16 | norm_cfg=self.norm_cfg) 17 | 18 | def init_weights(self): 19 | super(HTCMaskHead, self).init_weights() 20 | self.conv_res.init_weights() 21 | 22 | def forward(self, x, res_feat=None, return_logits=True, return_feat=True): 23 | if res_feat is not None: 24 | res_feat = self.conv_res(res_feat) 25 | x = x + res_feat 26 | for conv in self.convs: 27 | x = conv(x) 28 | res_feat = x 29 | outs = [] 30 | if return_logits: 31 | x = self.upsample(x) 32 | if self.upsample_method == 'deconv': 33 | x = self.relu(x) 34 | mask_pred = self.conv_logits(x) 35 | outs.append(mask_pred) 36 | if return_feat: 37 | outs.append(res_feat) 38 | return outs if len(outs) > 1 else outs[0] 39 | -------------------------------------------------------------------------------- /segmentation/mmdet/models/necks/__init__.py: -------------------------------------------------------------------------------- 1 | from .bfp import BFP 2 | from .fpn import FPN 3 | from .hrfpn import HRFPN 4 | 5 | __all__ = ['FPN', 'BFP', 'HRFPN'] 6 | -------------------------------------------------------------------------------- /segmentation/mmdet/models/necks/hrfpn.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from mmcv.cnn.weight_init import caffe2_xavier_init 5 | from torch.utils.checkpoint import checkpoint 6 | 7 | from ..registry import NECKS 8 | from ..utils import ConvModule 9 | 10 | 11 | @NECKS.register_module 12 | class HRFPN(nn.Module): 13 | """HRFPN (High Resolution Feature Pyrmamids) 14 | 15 | arXiv: https://arxiv.org/abs/1904.04514 16 | 17 | Args: 18 | in_channels (list): number of channels for each branch. 19 | out_channels (int): output channels of feature pyramids. 20 | num_outs (int): number of output stages. 21 | pooling_type (str): pooling for generating feature pyramids 22 | from {MAX, AVG}. 23 | conv_cfg (dict): dictionary to construct and config conv layer. 24 | norm_cfg (dict): dictionary to construct and config norm layer. 25 | with_cp (bool): Use checkpoint or not. Using checkpoint will save some 26 | memory while slowing down the training speed. 27 | stride (int): stride of 3x3 convolutional layers 28 | """ 29 | 30 | def __init__(self, 31 | in_channels, 32 | out_channels, 33 | num_outs=5, 34 | pooling_type='AVG', 35 | conv_cfg=None, 36 | norm_cfg=None, 37 | with_cp=False, 38 | stride=1): 39 | super(HRFPN, self).__init__() 40 | assert isinstance(in_channels, list) 41 | self.in_channels = in_channels 42 | self.out_channels = out_channels 43 | self.num_ins = len(in_channels) 44 | self.num_outs = num_outs 45 | self.with_cp = with_cp 46 | self.conv_cfg = conv_cfg 47 | self.norm_cfg = norm_cfg 48 | 49 | self.reduction_conv = ConvModule( 50 | sum(in_channels), 51 | out_channels, 52 | kernel_size=1, 53 | conv_cfg=self.conv_cfg, 54 | activation=None) 55 | 56 | self.fpn_convs = nn.ModuleList() 57 | for i in range(self.num_outs): 58 | self.fpn_convs.append( 59 | ConvModule( 60 | out_channels, 61 | out_channels, 62 | kernel_size=3, 63 | padding=1, 64 | stride=stride, 65 | conv_cfg=self.conv_cfg, 66 | activation=None)) 67 | 68 | if pooling_type == 'MAX': 69 | self.pooling = F.max_pool2d 70 | else: 71 | self.pooling = F.avg_pool2d 72 | 73 | def init_weights(self): 74 | for m in self.modules(): 75 | if isinstance(m, nn.Conv2d): 76 | caffe2_xavier_init(m) 77 | 78 | def forward(self, inputs): 79 | assert len(inputs) == self.num_ins 80 | outs = [inputs[0]] 81 | for i in range(1, self.num_ins): 82 | outs.append( 83 | F.interpolate(inputs[i], scale_factor=2**i, mode='bilinear')) 84 | out = torch.cat(outs, dim=1) 85 | if out.requires_grad and self.with_cp: 86 | out = checkpoint(self.reduction_conv, out) 87 | else: 88 | out = self.reduction_conv(out) 89 | outs = [out] 90 | for i in range(1, self.num_outs): 91 | outs.append(self.pooling(out, kernel_size=2**i, stride=2**i)) 92 | outputs = [] 93 | 94 | for i in range(self.num_outs): 95 | if outs[i].requires_grad and self.with_cp: 96 | tmp_out = checkpoint(self.fpn_convs[i], outs[i]) 97 | else: 98 | tmp_out = self.fpn_convs[i](outs[i]) 99 | outputs.append(tmp_out) 100 | return tuple(outputs) 101 | -------------------------------------------------------------------------------- /segmentation/mmdet/models/plugins/__init__.py: -------------------------------------------------------------------------------- 1 | from .generalized_attention import GeneralizedAttention 2 | from .non_local import NonLocal2D 3 | 4 | __all__ = ['NonLocal2D', 'GeneralizedAttention'] 5 | -------------------------------------------------------------------------------- /segmentation/mmdet/models/registry.py: -------------------------------------------------------------------------------- 1 | from mmdet.utils import Registry 2 | 3 | BACKBONES = Registry('backbone') 4 | NECKS = Registry('neck') 5 | ROI_EXTRACTORS = Registry('roi_extractor') 6 | SHARED_HEADS = Registry('shared_head') 7 | HEADS = Registry('head') 8 | LOSSES = Registry('loss') 9 | DETECTORS = Registry('detector') 10 | -------------------------------------------------------------------------------- /segmentation/mmdet/models/roi_extractors/__init__.py: -------------------------------------------------------------------------------- 1 | from .single_level import SingleRoIExtractor 2 | 3 | __all__ = ['SingleRoIExtractor'] 4 | -------------------------------------------------------------------------------- /segmentation/mmdet/models/shared_heads/__init__.py: -------------------------------------------------------------------------------- 1 | from .res_layer import ResLayer 2 | 3 | __all__ = ['ResLayer'] 4 | -------------------------------------------------------------------------------- /segmentation/mmdet/models/shared_heads/res_layer.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | import torch.nn as nn 4 | from mmcv.cnn import constant_init, kaiming_init 5 | from mmcv.runner import load_checkpoint 6 | 7 | from mmdet.core import auto_fp16 8 | from ..backbones import ResNetDCT, make_res_layer 9 | from ..registry import SHARED_HEADS 10 | 11 | 12 | @SHARED_HEADS.register_module 13 | class ResLayer(nn.Module): 14 | 15 | def __init__(self, 16 | depth, 17 | stage=3, 18 | stride=2, 19 | dilation=1, 20 | style='pytorch', 21 | norm_cfg=dict(type='BN', requires_grad=True), 22 | norm_eval=True, 23 | with_cp=False, 24 | dcn=None): 25 | super(ResLayer, self).__init__() 26 | self.norm_eval = norm_eval 27 | self.norm_cfg = norm_cfg 28 | self.stage = stage 29 | self.fp16_enabled = False 30 | block, stage_blocks = ResNet.arch_settings[depth] 31 | stage_block = stage_blocks[stage] 32 | planes = 64 * 2**stage 33 | inplanes = 64 * 2**(stage - 1) * block.expansion 34 | 35 | res_layer = make_res_layer( 36 | block, 37 | inplanes, 38 | planes, 39 | stage_block, 40 | stride=stride, 41 | dilation=dilation, 42 | style=style, 43 | with_cp=with_cp, 44 | norm_cfg=self.norm_cfg, 45 | dcn=dcn) 46 | self.add_module('layer{}'.format(stage + 1), res_layer) 47 | 48 | def init_weights(self, pretrained=None): 49 | if isinstance(pretrained, str): 50 | logger = logging.getLogger() 51 | load_checkpoint(self, pretrained, strict=False, logger=logger) 52 | elif pretrained is None: 53 | for m in self.modules(): 54 | if isinstance(m, nn.Conv2d): 55 | kaiming_init(m) 56 | elif isinstance(m, nn.BatchNorm2d): 57 | constant_init(m, 1) 58 | else: 59 | raise TypeError('pretrained must be a str or None') 60 | 61 | @auto_fp16() 62 | def forward(self, x): 63 | res_layer = getattr(self, 'layer{}'.format(self.stage + 1)) 64 | out = res_layer(x) 65 | return out 66 | 67 | def train(self, mode=True): 68 | super(ResLayer, self).train(mode) 69 | if self.norm_eval: 70 | for m in self.modules(): 71 | if isinstance(m, nn.BatchNorm2d): 72 | m.eval() 73 | -------------------------------------------------------------------------------- /segmentation/mmdet/models/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .conv_module import ConvModule, build_conv_layer 2 | from .conv_ws import ConvWS2d, conv_ws_2d 3 | from .norm import build_norm_layer 4 | from .scale import Scale 5 | from .weight_init import (bias_init_with_prob, kaiming_init, normal_init, 6 | uniform_init, xavier_init) 7 | 8 | __all__ = [ 9 | 'conv_ws_2d', 'ConvWS2d', 'build_conv_layer', 'ConvModule', 10 | 'build_norm_layer', 'xavier_init', 'normal_init', 'uniform_init', 11 | 'kaiming_init', 'bias_init_with_prob', 'Scale' 12 | ] 13 | -------------------------------------------------------------------------------- /segmentation/mmdet/models/utils/conv_ws.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch.nn.functional as F 3 | 4 | 5 | def conv_ws_2d(input, 6 | weight, 7 | bias=None, 8 | stride=1, 9 | padding=0, 10 | dilation=1, 11 | groups=1, 12 | eps=1e-5): 13 | c_in = weight.size(0) 14 | weight_flat = weight.view(c_in, -1) 15 | mean = weight_flat.mean(dim=1, keepdim=True).view(c_in, 1, 1, 1) 16 | std = weight_flat.std(dim=1, keepdim=True).view(c_in, 1, 1, 1) 17 | weight = (weight - mean) / (std + eps) 18 | return F.conv2d(input, weight, bias, stride, padding, dilation, groups) 19 | 20 | 21 | class ConvWS2d(nn.Conv2d): 22 | 23 | def __init__(self, 24 | in_channels, 25 | out_channels, 26 | kernel_size, 27 | stride=1, 28 | padding=0, 29 | dilation=1, 30 | groups=1, 31 | bias=True, 32 | eps=1e-5): 33 | super(ConvWS2d, self).__init__( 34 | in_channels, 35 | out_channels, 36 | kernel_size, 37 | stride=stride, 38 | padding=padding, 39 | dilation=dilation, 40 | groups=groups, 41 | bias=bias) 42 | self.eps = eps 43 | 44 | def forward(self, x): 45 | return conv_ws_2d(x, self.weight, self.bias, self.stride, self.padding, 46 | self.dilation, self.groups, self.eps) 47 | -------------------------------------------------------------------------------- /segmentation/mmdet/models/utils/norm.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | norm_cfg = { 4 | # format: layer_type: (abbreviation, module) 5 | 'BN': ('bn', nn.BatchNorm2d), 6 | 'SyncBN': ('bn', nn.SyncBatchNorm), 7 | 'GN': ('gn', nn.GroupNorm), 8 | # and potentially 'SN' 9 | } 10 | 11 | 12 | def build_norm_layer(cfg, num_features, postfix=''): 13 | """ Build normalization layer 14 | 15 | Args: 16 | cfg (dict): cfg should contain: 17 | type (str): identify norm layer type. 18 | layer args: args needed to instantiate a norm layer. 19 | requires_grad (bool): [optional] whether stop gradient updates 20 | num_features (int): number of channels from input. 21 | postfix (int, str): appended into norm abbreviation to 22 | create named layer. 23 | 24 | Returns: 25 | name (str): abbreviation + postfix 26 | layer (nn.Module): created norm layer 27 | """ 28 | assert isinstance(cfg, dict) and 'type' in cfg 29 | cfg_ = cfg.copy() 30 | 31 | layer_type = cfg_.pop('type') 32 | if layer_type not in norm_cfg: 33 | raise KeyError('Unrecognized norm type {}'.format(layer_type)) 34 | else: 35 | abbr, norm_layer = norm_cfg[layer_type] 36 | if norm_layer is None: 37 | raise NotImplementedError 38 | 39 | assert isinstance(postfix, (int, str)) 40 | name = abbr + str(postfix) 41 | 42 | requires_grad = cfg_.pop('requires_grad', True) 43 | cfg_.setdefault('eps', 1e-5) 44 | if layer_type != 'GN': 45 | layer = norm_layer(num_features, **cfg_) 46 | if layer_type == 'SyncBN': 47 | layer._specify_ddp_gpu_num(1) 48 | else: 49 | assert 'num_groups' in cfg_ 50 | layer = norm_layer(num_channels=num_features, **cfg_) 51 | 52 | for param in layer.parameters(): 53 | param.requires_grad = requires_grad 54 | 55 | return name, layer 56 | -------------------------------------------------------------------------------- /segmentation/mmdet/models/utils/scale.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | 5 | class Scale(nn.Module): 6 | 7 | def __init__(self, scale=1.0): 8 | super(Scale, self).__init__() 9 | self.scale = nn.Parameter(torch.tensor(scale, dtype=torch.float)) 10 | 11 | def forward(self, x): 12 | return x * self.scale 13 | -------------------------------------------------------------------------------- /segmentation/mmdet/models/utils/weight_init.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch.nn as nn 3 | 4 | 5 | def xavier_init(module, gain=1, bias=0, distribution='normal'): 6 | assert distribution in ['uniform', 'normal'] 7 | if distribution == 'uniform': 8 | nn.init.xavier_uniform_(module.weight, gain=gain) 9 | else: 10 | nn.init.xavier_normal_(module.weight, gain=gain) 11 | if hasattr(module, 'bias'): 12 | nn.init.constant_(module.bias, bias) 13 | 14 | 15 | def normal_init(module, mean=0, std=1, bias=0): 16 | nn.init.normal_(module.weight, mean, std) 17 | if hasattr(module, 'bias'): 18 | nn.init.constant_(module.bias, bias) 19 | 20 | 21 | def uniform_init(module, a=0, b=1, bias=0): 22 | nn.init.uniform_(module.weight, a, b) 23 | if hasattr(module, 'bias'): 24 | nn.init.constant_(module.bias, bias) 25 | 26 | 27 | def kaiming_init(module, 28 | mode='fan_out', 29 | nonlinearity='relu', 30 | bias=0, 31 | distribution='normal'): 32 | assert distribution in ['uniform', 'normal'] 33 | if distribution == 'uniform': 34 | nn.init.kaiming_uniform_( 35 | module.weight, mode=mode, nonlinearity=nonlinearity) 36 | else: 37 | nn.init.kaiming_normal_( 38 | module.weight, mode=mode, nonlinearity=nonlinearity) 39 | if hasattr(module, 'bias'): 40 | nn.init.constant_(module.bias, bias) 41 | 42 | 43 | def bias_init_with_prob(prior_prob): 44 | """ initialize conv/fc bias value according to giving probablity""" 45 | bias_init = float(-np.log((1 - prior_prob) / prior_prob)) 46 | return bias_init 47 | -------------------------------------------------------------------------------- /segmentation/mmdet/ops/__init__.py: -------------------------------------------------------------------------------- 1 | from .context_block import ContextBlock 2 | from .dcn import (DeformConv, DeformConvPack, DeformRoIPooling, 3 | DeformRoIPoolingPack, ModulatedDeformConv, 4 | ModulatedDeformConvPack, ModulatedDeformRoIPoolingPack, 5 | deform_conv, deform_roi_pooling, modulated_deform_conv) 6 | from .masked_conv import MaskedConv2d 7 | from .nms import nms, soft_nms 8 | from .roi_align import RoIAlign, roi_align 9 | from .roi_pool import RoIPool, roi_pool 10 | from .sigmoid_focal_loss import SigmoidFocalLoss, sigmoid_focal_loss 11 | 12 | __all__ = [ 13 | 'nms', 'soft_nms', 'RoIAlign', 'roi_align', 'RoIPool', 'roi_pool', 14 | 'DeformConv', 'DeformConvPack', 'DeformRoIPooling', 'DeformRoIPoolingPack', 15 | 'ModulatedDeformRoIPoolingPack', 'ModulatedDeformConv', 16 | 'ModulatedDeformConvPack', 'deform_conv', 'modulated_deform_conv', 17 | 'deform_roi_pooling', 'SigmoidFocalLoss', 'sigmoid_focal_loss', 18 | 'MaskedConv2d', 'ContextBlock' 19 | ] 20 | -------------------------------------------------------------------------------- /segmentation/mmdet/ops/dcn/__init__.py: -------------------------------------------------------------------------------- 1 | from .deform_conv import (DeformConv, DeformConvPack, ModulatedDeformConv, 2 | ModulatedDeformConvPack, deform_conv, 3 | modulated_deform_conv) 4 | from .deform_pool import (DeformRoIPooling, DeformRoIPoolingPack, 5 | ModulatedDeformRoIPoolingPack, deform_roi_pooling) 6 | 7 | __all__ = [ 8 | 'DeformConv', 'DeformConvPack', 'ModulatedDeformConv', 9 | 'ModulatedDeformConvPack', 'DeformRoIPooling', 'DeformRoIPoolingPack', 10 | 'ModulatedDeformRoIPoolingPack', 'deform_conv', 'modulated_deform_conv', 11 | 'deform_roi_pooling' 12 | ] 13 | -------------------------------------------------------------------------------- /segmentation/mmdet/ops/masked_conv/__init__.py: -------------------------------------------------------------------------------- 1 | from .masked_conv import MaskedConv2d, masked_conv2d 2 | 3 | __all__ = ['masked_conv2d', 'MaskedConv2d'] 4 | -------------------------------------------------------------------------------- /segmentation/mmdet/ops/masked_conv/masked_conv.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | import torch 4 | import torch.nn as nn 5 | from torch.autograd import Function 6 | from torch.autograd.function import once_differentiable 7 | from torch.nn.modules.utils import _pair 8 | 9 | from . import masked_conv2d_cuda 10 | 11 | 12 | class MaskedConv2dFunction(Function): 13 | 14 | @staticmethod 15 | def forward(ctx, features, mask, weight, bias, padding=0, stride=1): 16 | assert mask.dim() == 3 and mask.size(0) == 1 17 | assert features.dim() == 4 and features.size(0) == 1 18 | assert features.size()[2:] == mask.size()[1:] 19 | pad_h, pad_w = _pair(padding) 20 | stride_h, stride_w = _pair(stride) 21 | if stride_h != 1 or stride_w != 1: 22 | raise ValueError( 23 | 'Stride could not only be 1 in masked_conv2d currently.') 24 | if not features.is_cuda: 25 | raise NotImplementedError 26 | 27 | out_channel, in_channel, kernel_h, kernel_w = weight.size() 28 | 29 | batch_size = features.size(0) 30 | out_h = int( 31 | math.floor((features.size(2) + 2 * pad_h - 32 | (kernel_h - 1) - 1) / stride_h + 1)) 33 | out_w = int( 34 | math.floor((features.size(3) + 2 * pad_w - 35 | (kernel_h - 1) - 1) / stride_w + 1)) 36 | mask_inds = torch.nonzero(mask[0] > 0) 37 | output = features.new_zeros(batch_size, out_channel, out_h, out_w) 38 | if mask_inds.numel() > 0: 39 | mask_h_idx = mask_inds[:, 0].contiguous() 40 | mask_w_idx = mask_inds[:, 1].contiguous() 41 | data_col = features.new_zeros(in_channel * kernel_h * kernel_w, 42 | mask_inds.size(0)) 43 | masked_conv2d_cuda.masked_im2col_forward(features, mask_h_idx, 44 | mask_w_idx, kernel_h, 45 | kernel_w, pad_h, pad_w, 46 | data_col) 47 | 48 | masked_output = torch.addmm(1, bias[:, None], 1, 49 | weight.view(out_channel, -1), data_col) 50 | masked_conv2d_cuda.masked_col2im_forward(masked_output, mask_h_idx, 51 | mask_w_idx, out_h, out_w, 52 | out_channel, output) 53 | return output 54 | 55 | @staticmethod 56 | @once_differentiable 57 | def backward(ctx, grad_output): 58 | return (None, ) * 5 59 | 60 | 61 | masked_conv2d = MaskedConv2dFunction.apply 62 | 63 | 64 | class MaskedConv2d(nn.Conv2d): 65 | """A MaskedConv2d which inherits the official Conv2d. 66 | 67 | The masked forward doesn't implement the backward function and only 68 | supports the stride parameter to be 1 currently. 69 | """ 70 | 71 | def __init__(self, 72 | in_channels, 73 | out_channels, 74 | kernel_size, 75 | stride=1, 76 | padding=0, 77 | dilation=1, 78 | groups=1, 79 | bias=True): 80 | super(MaskedConv2d, 81 | self).__init__(in_channels, out_channels, kernel_size, stride, 82 | padding, dilation, groups, bias) 83 | 84 | def forward(self, input, mask=None): 85 | if mask is None: # fallback to the normal Conv2d 86 | return super(MaskedConv2d, self).forward(input) 87 | else: 88 | return masked_conv2d(input, mask, self.weight, self.bias, 89 | self.padding) 90 | -------------------------------------------------------------------------------- /segmentation/mmdet/ops/masked_conv/src/masked_conv2d_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | 6 | int MaskedIm2colForwardLaucher(const at::Tensor im, const int height, 7 | const int width, const int channels, 8 | const int kernel_h, const int kernel_w, 9 | const int pad_h, const int pad_w, 10 | const at::Tensor mask_h_idx, 11 | const at::Tensor mask_w_idx, const int mask_cnt, 12 | at::Tensor col); 13 | 14 | int MaskedCol2imForwardLaucher(const at::Tensor col, const int height, 15 | const int width, const int channels, 16 | const at::Tensor mask_h_idx, 17 | const at::Tensor mask_w_idx, const int mask_cnt, 18 | at::Tensor im); 19 | 20 | #define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ") 21 | #define CHECK_CONTIGUOUS(x) \ 22 | AT_CHECK(x.is_contiguous(), #x, " must be contiguous ") 23 | #define CHECK_INPUT(x) \ 24 | CHECK_CUDA(x); \ 25 | CHECK_CONTIGUOUS(x) 26 | 27 | int masked_im2col_forward_cuda(const at::Tensor im, const at::Tensor mask_h_idx, 28 | const at::Tensor mask_w_idx, const int kernel_h, 29 | const int kernel_w, const int pad_h, 30 | const int pad_w, at::Tensor col) { 31 | CHECK_INPUT(im); 32 | CHECK_INPUT(mask_h_idx); 33 | CHECK_INPUT(mask_w_idx); 34 | CHECK_INPUT(col); 35 | // im: (n, ic, h, w), kernel size (kh, kw) 36 | // kernel: (oc, ic * kh * kw), col: (kh * kw * ic, ow * oh) 37 | 38 | int channels = im.size(1); 39 | int height = im.size(2); 40 | int width = im.size(3); 41 | int mask_cnt = mask_h_idx.size(0); 42 | 43 | MaskedIm2colForwardLaucher(im, height, width, channels, kernel_h, kernel_w, 44 | pad_h, pad_w, mask_h_idx, mask_w_idx, mask_cnt, 45 | col); 46 | 47 | return 1; 48 | } 49 | 50 | int masked_col2im_forward_cuda(const at::Tensor col, 51 | const at::Tensor mask_h_idx, 52 | const at::Tensor mask_w_idx, int height, 53 | int width, int channels, at::Tensor im) { 54 | CHECK_INPUT(col); 55 | CHECK_INPUT(mask_h_idx); 56 | CHECK_INPUT(mask_w_idx); 57 | CHECK_INPUT(im); 58 | // im: (n, ic, h, w), kernel size (kh, kw) 59 | // kernel: (oc, ic * kh * kh), col: (kh * kw * ic, ow * oh) 60 | 61 | int mask_cnt = mask_h_idx.size(0); 62 | 63 | MaskedCol2imForwardLaucher(col, height, width, channels, mask_h_idx, 64 | mask_w_idx, mask_cnt, im); 65 | 66 | return 1; 67 | } 68 | 69 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 70 | m.def("masked_im2col_forward", &masked_im2col_forward_cuda, 71 | "masked_im2col forward (CUDA)"); 72 | m.def("masked_col2im_forward", &masked_col2im_forward_cuda, 73 | "masked_col2im forward (CUDA)"); 74 | } -------------------------------------------------------------------------------- /segmentation/mmdet/ops/nms/__init__.py: -------------------------------------------------------------------------------- 1 | from .nms_wrapper import nms, soft_nms 2 | 3 | __all__ = ['nms', 'soft_nms'] 4 | -------------------------------------------------------------------------------- /segmentation/mmdet/ops/nms/nms_wrapper.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | from . import nms_cpu, nms_cuda 5 | from .soft_nms_cpu import soft_nms_cpu 6 | 7 | 8 | def nms(dets, iou_thr, device_id=None): 9 | """Dispatch to either CPU or GPU NMS implementations. 10 | 11 | The input can be either a torch tensor or numpy array. GPU NMS will be used 12 | if the input is a gpu tensor or device_id is specified, otherwise CPU NMS 13 | will be used. The returned type will always be the same as inputs. 14 | 15 | Arguments: 16 | dets (torch.Tensor or np.ndarray): bboxes with scores. 17 | iou_thr (float): IoU threshold for NMS. 18 | device_id (int, optional): when `dets` is a numpy array, if `device_id` 19 | is None, then cpu nms is used, otherwise gpu_nms will be used. 20 | 21 | Returns: 22 | tuple: kept bboxes and indice, which is always the same data type as 23 | the input. 24 | """ 25 | # convert dets (tensor or numpy array) to tensor 26 | if isinstance(dets, torch.Tensor): 27 | is_numpy = False 28 | dets_th = dets 29 | elif isinstance(dets, np.ndarray): 30 | is_numpy = True 31 | device = 'cpu' if device_id is None else 'cuda:{}'.format(device_id) 32 | dets_th = torch.from_numpy(dets).to(device) 33 | else: 34 | raise TypeError( 35 | 'dets must be either a Tensor or numpy array, but got {}'.format( 36 | type(dets))) 37 | 38 | # execute cpu or cuda nms 39 | if dets_th.shape[0] == 0: 40 | inds = dets_th.new_zeros(0, dtype=torch.long) 41 | else: 42 | if dets_th.is_cuda: 43 | inds = nms_cuda.nms(dets_th, iou_thr) 44 | else: 45 | inds = nms_cpu.nms(dets_th, iou_thr) 46 | 47 | if is_numpy: 48 | inds = inds.cpu().numpy() 49 | return dets[inds, :], inds 50 | 51 | 52 | def soft_nms(dets, iou_thr, method='linear', sigma=0.5, min_score=1e-3): 53 | if isinstance(dets, torch.Tensor): 54 | is_tensor = True 55 | dets_np = dets.detach().cpu().numpy() 56 | elif isinstance(dets, np.ndarray): 57 | is_tensor = False 58 | dets_np = dets 59 | else: 60 | raise TypeError( 61 | 'dets must be either a Tensor or numpy array, but got {}'.format( 62 | type(dets))) 63 | 64 | method_codes = {'linear': 1, 'gaussian': 2} 65 | if method not in method_codes: 66 | raise ValueError('Invalid method for SoftNMS: {}'.format(method)) 67 | new_dets, inds = soft_nms_cpu( 68 | dets_np, 69 | iou_thr, 70 | method=method_codes[method], 71 | sigma=sigma, 72 | min_score=min_score) 73 | 74 | if is_tensor: 75 | return dets.new_tensor(new_dets), dets.new_tensor( 76 | inds, dtype=torch.long) 77 | else: 78 | return new_dets.astype(np.float32), inds.astype(np.int64) 79 | -------------------------------------------------------------------------------- /segmentation/mmdet/ops/nms/src/nms_cpu.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #include 3 | 4 | template 5 | at::Tensor nms_cpu_kernel(const at::Tensor& dets, const float threshold) { 6 | AT_ASSERTM(!dets.type().is_cuda(), "dets must be a CPU tensor"); 7 | 8 | if (dets.numel() == 0) { 9 | return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU)); 10 | } 11 | 12 | auto x1_t = dets.select(1, 0).contiguous(); 13 | auto y1_t = dets.select(1, 1).contiguous(); 14 | auto x2_t = dets.select(1, 2).contiguous(); 15 | auto y2_t = dets.select(1, 3).contiguous(); 16 | auto scores = dets.select(1, 4).contiguous(); 17 | 18 | at::Tensor areas_t = (x2_t - x1_t + 1) * (y2_t - y1_t + 1); 19 | 20 | auto order_t = std::get<1>(scores.sort(0, /* descending=*/true)); 21 | 22 | auto ndets = dets.size(0); 23 | at::Tensor suppressed_t = 24 | at::zeros({ndets}, dets.options().dtype(at::kByte).device(at::kCPU)); 25 | 26 | auto suppressed = suppressed_t.data(); 27 | auto order = order_t.data(); 28 | auto x1 = x1_t.data(); 29 | auto y1 = y1_t.data(); 30 | auto x2 = x2_t.data(); 31 | auto y2 = y2_t.data(); 32 | auto areas = areas_t.data(); 33 | 34 | for (int64_t _i = 0; _i < ndets; _i++) { 35 | auto i = order[_i]; 36 | if (suppressed[i] == 1) continue; 37 | auto ix1 = x1[i]; 38 | auto iy1 = y1[i]; 39 | auto ix2 = x2[i]; 40 | auto iy2 = y2[i]; 41 | auto iarea = areas[i]; 42 | 43 | for (int64_t _j = _i + 1; _j < ndets; _j++) { 44 | auto j = order[_j]; 45 | if (suppressed[j] == 1) continue; 46 | auto xx1 = std::max(ix1, x1[j]); 47 | auto yy1 = std::max(iy1, y1[j]); 48 | auto xx2 = std::min(ix2, x2[j]); 49 | auto yy2 = std::min(iy2, y2[j]); 50 | 51 | auto w = std::max(static_cast(0), xx2 - xx1 + 1); 52 | auto h = std::max(static_cast(0), yy2 - yy1 + 1); 53 | auto inter = w * h; 54 | auto ovr = inter / (iarea + areas[j] - inter); 55 | if (ovr >= threshold) suppressed[j] = 1; 56 | } 57 | } 58 | return at::nonzero(suppressed_t == 0).squeeze(1); 59 | } 60 | 61 | at::Tensor nms(const at::Tensor& dets, const float threshold) { 62 | at::Tensor result; 63 | AT_DISPATCH_FLOATING_TYPES(dets.scalar_type(), "nms", [&] { 64 | result = nms_cpu_kernel(dets, threshold); 65 | }); 66 | return result; 67 | } 68 | 69 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 70 | m.def("nms", &nms, "non-maximum suppression"); 71 | } -------------------------------------------------------------------------------- /segmentation/mmdet/ops/nms/src/nms_cuda.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #include 3 | 4 | #define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ") 5 | 6 | at::Tensor nms_cuda(const at::Tensor boxes, float nms_overlap_thresh); 7 | 8 | at::Tensor nms(const at::Tensor& dets, const float threshold) { 9 | CHECK_CUDA(dets); 10 | if (dets.numel() == 0) 11 | return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU)); 12 | return nms_cuda(dets, threshold); 13 | } 14 | 15 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 16 | m.def("nms", &nms, "non-maximum suppression"); 17 | } -------------------------------------------------------------------------------- /segmentation/mmdet/ops/roi_align/__init__.py: -------------------------------------------------------------------------------- 1 | from .roi_align import RoIAlign, roi_align 2 | 3 | __all__ = ['roi_align', 'RoIAlign'] 4 | -------------------------------------------------------------------------------- /segmentation/mmdet/ops/roi_align/gradcheck.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | import sys 3 | 4 | import numpy as np 5 | import torch 6 | from torch.autograd import gradcheck 7 | 8 | sys.path.append(osp.abspath(osp.join(__file__, '../../'))) 9 | from roi_align import RoIAlign # noqa: E402, isort:skip 10 | 11 | feat_size = 15 12 | spatial_scale = 1.0 / 8 13 | img_size = feat_size / spatial_scale 14 | num_imgs = 2 15 | num_rois = 20 16 | 17 | batch_ind = np.random.randint(num_imgs, size=(num_rois, 1)) 18 | rois = np.random.rand(num_rois, 4) * img_size * 0.5 19 | rois[:, 2:] += img_size * 0.5 20 | rois = np.hstack((batch_ind, rois)) 21 | 22 | feat = torch.randn( 23 | num_imgs, 16, feat_size, feat_size, requires_grad=True, device='cuda:0') 24 | rois = torch.from_numpy(rois).float().cuda() 25 | inputs = (feat, rois) 26 | print('Gradcheck for roi align...') 27 | test = gradcheck(RoIAlign(3, spatial_scale), inputs, atol=1e-3, eps=1e-3) 28 | print(test) 29 | test = gradcheck(RoIAlign(3, spatial_scale, 2), inputs, atol=1e-3, eps=1e-3) 30 | print(test) 31 | -------------------------------------------------------------------------------- /segmentation/mmdet/ops/roi_align/roi_align.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | from torch.autograd import Function 3 | from torch.autograd.function import once_differentiable 4 | from torch.nn.modules.utils import _pair 5 | 6 | from . import roi_align_cuda 7 | 8 | 9 | class RoIAlignFunction(Function): 10 | 11 | @staticmethod 12 | def forward(ctx, features, rois, out_size, spatial_scale, sample_num=0): 13 | out_h, out_w = _pair(out_size) 14 | assert isinstance(out_h, int) and isinstance(out_w, int) 15 | ctx.spatial_scale = spatial_scale 16 | ctx.sample_num = sample_num 17 | ctx.save_for_backward(rois) 18 | ctx.feature_size = features.size() 19 | 20 | batch_size, num_channels, data_height, data_width = features.size() 21 | num_rois = rois.size(0) 22 | 23 | output = features.new_zeros(num_rois, num_channels, out_h, out_w) 24 | if features.is_cuda: 25 | roi_align_cuda.forward(features, rois, out_h, out_w, spatial_scale, 26 | sample_num, output) 27 | else: 28 | raise NotImplementedError 29 | 30 | return output 31 | 32 | @staticmethod 33 | @once_differentiable 34 | def backward(ctx, grad_output): 35 | feature_size = ctx.feature_size 36 | spatial_scale = ctx.spatial_scale 37 | sample_num = ctx.sample_num 38 | rois = ctx.saved_tensors[0] 39 | assert (feature_size is not None and grad_output.is_cuda) 40 | 41 | batch_size, num_channels, data_height, data_width = feature_size 42 | out_w = grad_output.size(3) 43 | out_h = grad_output.size(2) 44 | 45 | grad_input = grad_rois = None 46 | if ctx.needs_input_grad[0]: 47 | grad_input = rois.new_zeros(batch_size, num_channels, data_height, 48 | data_width) 49 | roi_align_cuda.backward(grad_output.contiguous(), rois, out_h, 50 | out_w, spatial_scale, sample_num, 51 | grad_input) 52 | 53 | return grad_input, grad_rois, None, None, None 54 | 55 | 56 | roi_align = RoIAlignFunction.apply 57 | 58 | 59 | class RoIAlign(nn.Module): 60 | 61 | def __init__(self, 62 | out_size, 63 | spatial_scale, 64 | sample_num=0, 65 | use_torchvision=False): 66 | super(RoIAlign, self).__init__() 67 | 68 | self.out_size = _pair(out_size) 69 | self.spatial_scale = float(spatial_scale) 70 | self.sample_num = int(sample_num) 71 | self.use_torchvision = use_torchvision 72 | 73 | def forward(self, features, rois): 74 | if self.use_torchvision: 75 | from torchvision.ops import roi_align as tv_roi_align 76 | return tv_roi_align(features, rois, self.out_size, 77 | self.spatial_scale, self.sample_num) 78 | else: 79 | return roi_align(features, rois, self.out_size, self.spatial_scale, 80 | self.sample_num) 81 | 82 | def __repr__(self): 83 | format_str = self.__class__.__name__ 84 | format_str += '(out_size={}, spatial_scale={}, sample_num={}'.format( 85 | self.out_size, self.spatial_scale, self.sample_num) 86 | format_str += ', use_torchvision={})'.format(self.use_torchvision) 87 | return format_str 88 | -------------------------------------------------------------------------------- /segmentation/mmdet/ops/roi_align/src/roi_align_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | 6 | int ROIAlignForwardLaucher(const at::Tensor features, const at::Tensor rois, 7 | const float spatial_scale, const int sample_num, 8 | const int channels, const int height, 9 | const int width, const int num_rois, 10 | const int pooled_height, const int pooled_width, 11 | at::Tensor output); 12 | 13 | int ROIAlignBackwardLaucher(const at::Tensor top_grad, const at::Tensor rois, 14 | const float spatial_scale, const int sample_num, 15 | const int channels, const int height, 16 | const int width, const int num_rois, 17 | const int pooled_height, const int pooled_width, 18 | at::Tensor bottom_grad); 19 | 20 | #define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ") 21 | #define CHECK_CONTIGUOUS(x) \ 22 | AT_CHECK(x.is_contiguous(), #x, " must be contiguous ") 23 | #define CHECK_INPUT(x) \ 24 | CHECK_CUDA(x); \ 25 | CHECK_CONTIGUOUS(x) 26 | 27 | int roi_align_forward_cuda(at::Tensor features, at::Tensor rois, 28 | int pooled_height, int pooled_width, 29 | float spatial_scale, int sample_num, 30 | at::Tensor output) { 31 | CHECK_INPUT(features); 32 | CHECK_INPUT(rois); 33 | CHECK_INPUT(output); 34 | 35 | // Number of ROIs 36 | int num_rois = rois.size(0); 37 | int size_rois = rois.size(1); 38 | 39 | if (size_rois != 5) { 40 | printf("wrong roi size\n"); 41 | return 0; 42 | } 43 | 44 | int num_channels = features.size(1); 45 | int data_height = features.size(2); 46 | int data_width = features.size(3); 47 | 48 | ROIAlignForwardLaucher(features, rois, spatial_scale, sample_num, 49 | num_channels, data_height, data_width, num_rois, 50 | pooled_height, pooled_width, output); 51 | 52 | return 1; 53 | } 54 | 55 | int roi_align_backward_cuda(at::Tensor top_grad, at::Tensor rois, 56 | int pooled_height, int pooled_width, 57 | float spatial_scale, int sample_num, 58 | at::Tensor bottom_grad) { 59 | CHECK_INPUT(top_grad); 60 | CHECK_INPUT(rois); 61 | CHECK_INPUT(bottom_grad); 62 | 63 | // Number of ROIs 64 | int num_rois = rois.size(0); 65 | int size_rois = rois.size(1); 66 | if (size_rois != 5) { 67 | printf("wrong roi size\n"); 68 | return 0; 69 | } 70 | 71 | int num_channels = bottom_grad.size(1); 72 | int data_height = bottom_grad.size(2); 73 | int data_width = bottom_grad.size(3); 74 | 75 | ROIAlignBackwardLaucher(top_grad, rois, spatial_scale, sample_num, 76 | num_channels, data_height, data_width, num_rois, 77 | pooled_height, pooled_width, bottom_grad); 78 | 79 | return 1; 80 | } 81 | 82 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 83 | m.def("forward", &roi_align_forward_cuda, "Roi_Align forward (CUDA)"); 84 | m.def("backward", &roi_align_backward_cuda, "Roi_Align backward (CUDA)"); 85 | } 86 | -------------------------------------------------------------------------------- /segmentation/mmdet/ops/roi_pool/__init__.py: -------------------------------------------------------------------------------- 1 | from .roi_pool import RoIPool, roi_pool 2 | 3 | __all__ = ['roi_pool', 'RoIPool'] 4 | -------------------------------------------------------------------------------- /segmentation/mmdet/ops/roi_pool/gradcheck.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | import sys 3 | 4 | import torch 5 | from torch.autograd import gradcheck 6 | 7 | sys.path.append(osp.abspath(osp.join(__file__, '../../'))) 8 | from roi_pool import RoIPool # noqa: E402, isort:skip 9 | 10 | feat = torch.randn(4, 16, 15, 15, requires_grad=True).cuda() 11 | rois = torch.Tensor([[0, 0, 0, 50, 50], [0, 10, 30, 43, 55], 12 | [1, 67, 40, 110, 120]]).cuda() 13 | inputs = (feat, rois) 14 | print('Gradcheck for roi pooling...') 15 | test = gradcheck(RoIPool(4, 1.0 / 8), inputs, eps=1e-5, atol=1e-3) 16 | print(test) 17 | -------------------------------------------------------------------------------- /segmentation/mmdet/ops/roi_pool/roi_pool.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.autograd import Function 4 | from torch.autograd.function import once_differentiable 5 | from torch.nn.modules.utils import _pair 6 | 7 | from . import roi_pool_cuda 8 | 9 | 10 | class RoIPoolFunction(Function): 11 | 12 | @staticmethod 13 | def forward(ctx, features, rois, out_size, spatial_scale): 14 | assert features.is_cuda 15 | out_h, out_w = _pair(out_size) 16 | assert isinstance(out_h, int) and isinstance(out_w, int) 17 | ctx.save_for_backward(rois) 18 | num_channels = features.size(1) 19 | num_rois = rois.size(0) 20 | out_size = (num_rois, num_channels, out_h, out_w) 21 | output = features.new_zeros(out_size) 22 | argmax = features.new_zeros(out_size, dtype=torch.int) 23 | roi_pool_cuda.forward(features, rois, out_h, out_w, spatial_scale, 24 | output, argmax) 25 | ctx.spatial_scale = spatial_scale 26 | ctx.feature_size = features.size() 27 | ctx.argmax = argmax 28 | 29 | return output 30 | 31 | @staticmethod 32 | @once_differentiable 33 | def backward(ctx, grad_output): 34 | assert grad_output.is_cuda 35 | spatial_scale = ctx.spatial_scale 36 | feature_size = ctx.feature_size 37 | argmax = ctx.argmax 38 | rois = ctx.saved_tensors[0] 39 | assert feature_size is not None 40 | 41 | grad_input = grad_rois = None 42 | if ctx.needs_input_grad[0]: 43 | grad_input = grad_output.new_zeros(feature_size) 44 | roi_pool_cuda.backward(grad_output.contiguous(), rois, argmax, 45 | spatial_scale, grad_input) 46 | 47 | return grad_input, grad_rois, None, None 48 | 49 | 50 | roi_pool = RoIPoolFunction.apply 51 | 52 | 53 | class RoIPool(nn.Module): 54 | 55 | def __init__(self, out_size, spatial_scale, use_torchvision=False): 56 | super(RoIPool, self).__init__() 57 | 58 | self.out_size = _pair(out_size) 59 | self.spatial_scale = float(spatial_scale) 60 | self.use_torchvision = use_torchvision 61 | 62 | def forward(self, features, rois): 63 | if self.use_torchvision: 64 | from torchvision.ops import roi_pool as tv_roi_pool 65 | return tv_roi_pool(features, rois, self.out_size, 66 | self.spatial_scale) 67 | else: 68 | return roi_pool(features, rois, self.out_size, self.spatial_scale) 69 | 70 | def __repr__(self): 71 | format_str = self.__class__.__name__ 72 | format_str += '(out_size={}, spatial_scale={}'.format( 73 | self.out_size, self.spatial_scale) 74 | format_str += ', use_torchvision={})'.format(self.use_torchvision) 75 | return format_str 76 | -------------------------------------------------------------------------------- /segmentation/mmdet/ops/roi_pool/src/roi_pool_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | 6 | int ROIPoolForwardLaucher(const at::Tensor features, const at::Tensor rois, 7 | const float spatial_scale, const int channels, 8 | const int height, const int width, const int num_rois, 9 | const int pooled_h, const int pooled_w, 10 | at::Tensor output, at::Tensor argmax); 11 | 12 | int ROIPoolBackwardLaucher(const at::Tensor top_grad, const at::Tensor rois, 13 | const at::Tensor argmax, const float spatial_scale, 14 | const int batch_size, const int channels, 15 | const int height, const int width, 16 | const int num_rois, const int pooled_h, 17 | const int pooled_w, at::Tensor bottom_grad); 18 | 19 | #define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ") 20 | #define CHECK_CONTIGUOUS(x) \ 21 | AT_CHECK(x.is_contiguous(), #x, " must be contiguous ") 22 | #define CHECK_INPUT(x) \ 23 | CHECK_CUDA(x); \ 24 | CHECK_CONTIGUOUS(x) 25 | 26 | int roi_pooling_forward_cuda(at::Tensor features, at::Tensor rois, 27 | int pooled_height, int pooled_width, 28 | float spatial_scale, at::Tensor output, 29 | at::Tensor argmax) { 30 | CHECK_INPUT(features); 31 | CHECK_INPUT(rois); 32 | CHECK_INPUT(output); 33 | CHECK_INPUT(argmax); 34 | 35 | // Number of ROIs 36 | int num_rois = rois.size(0); 37 | int size_rois = rois.size(1); 38 | 39 | if (size_rois != 5) { 40 | printf("wrong roi size\n"); 41 | return 0; 42 | } 43 | 44 | int channels = features.size(1); 45 | int height = features.size(2); 46 | int width = features.size(3); 47 | 48 | ROIPoolForwardLaucher(features, rois, spatial_scale, channels, height, width, 49 | num_rois, pooled_height, pooled_width, output, argmax); 50 | 51 | return 1; 52 | } 53 | 54 | int roi_pooling_backward_cuda(at::Tensor top_grad, at::Tensor rois, 55 | at::Tensor argmax, float spatial_scale, 56 | at::Tensor bottom_grad) { 57 | CHECK_INPUT(top_grad); 58 | CHECK_INPUT(rois); 59 | CHECK_INPUT(argmax); 60 | CHECK_INPUT(bottom_grad); 61 | 62 | int pooled_height = top_grad.size(2); 63 | int pooled_width = top_grad.size(3); 64 | int num_rois = rois.size(0); 65 | int size_rois = rois.size(1); 66 | 67 | if (size_rois != 5) { 68 | printf("wrong roi size\n"); 69 | return 0; 70 | } 71 | int batch_size = bottom_grad.size(0); 72 | int channels = bottom_grad.size(1); 73 | int height = bottom_grad.size(2); 74 | int width = bottom_grad.size(3); 75 | 76 | ROIPoolBackwardLaucher(top_grad, rois, argmax, spatial_scale, batch_size, 77 | channels, height, width, num_rois, pooled_height, 78 | pooled_width, bottom_grad); 79 | 80 | return 1; 81 | } 82 | 83 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 84 | m.def("forward", &roi_pooling_forward_cuda, "Roi_Pooling forward (CUDA)"); 85 | m.def("backward", &roi_pooling_backward_cuda, "Roi_Pooling backward (CUDA)"); 86 | } 87 | -------------------------------------------------------------------------------- /segmentation/mmdet/ops/sigmoid_focal_loss/__init__.py: -------------------------------------------------------------------------------- 1 | from .sigmoid_focal_loss import SigmoidFocalLoss, sigmoid_focal_loss 2 | 3 | __all__ = ['SigmoidFocalLoss', 'sigmoid_focal_loss'] 4 | -------------------------------------------------------------------------------- /segmentation/mmdet/ops/sigmoid_focal_loss/sigmoid_focal_loss.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | from torch.autograd import Function 3 | from torch.autograd.function import once_differentiable 4 | 5 | from . import sigmoid_focal_loss_cuda 6 | 7 | 8 | class SigmoidFocalLossFunction(Function): 9 | 10 | @staticmethod 11 | def forward(ctx, input, target, gamma=2.0, alpha=0.25): 12 | ctx.save_for_backward(input, target) 13 | num_classes = input.shape[1] 14 | ctx.num_classes = num_classes 15 | ctx.gamma = gamma 16 | ctx.alpha = alpha 17 | 18 | loss = sigmoid_focal_loss_cuda.forward(input, target, num_classes, 19 | gamma, alpha) 20 | return loss 21 | 22 | @staticmethod 23 | @once_differentiable 24 | def backward(ctx, d_loss): 25 | input, target = ctx.saved_tensors 26 | num_classes = ctx.num_classes 27 | gamma = ctx.gamma 28 | alpha = ctx.alpha 29 | d_loss = d_loss.contiguous() 30 | d_input = sigmoid_focal_loss_cuda.backward(input, target, d_loss, 31 | num_classes, gamma, alpha) 32 | return d_input, None, None, None, None 33 | 34 | 35 | sigmoid_focal_loss = SigmoidFocalLossFunction.apply 36 | 37 | 38 | # TODO: remove this module 39 | class SigmoidFocalLoss(nn.Module): 40 | 41 | def __init__(self, gamma, alpha): 42 | super(SigmoidFocalLoss, self).__init__() 43 | self.gamma = gamma 44 | self.alpha = alpha 45 | 46 | def forward(self, logits, targets): 47 | assert logits.is_cuda 48 | loss = sigmoid_focal_loss(logits, targets, self.gamma, self.alpha) 49 | return loss.sum() 50 | 51 | def __repr__(self): 52 | tmpstr = self.__class__.__name__ + '(gamma={}, alpha={})'.format( 53 | self.gamma, self.alpha) 54 | return tmpstr 55 | -------------------------------------------------------------------------------- /segmentation/mmdet/ops/sigmoid_focal_loss/src/sigmoid_focal_loss.cpp: -------------------------------------------------------------------------------- 1 | // modify from 2 | // https://github.com/facebookresearch/maskrcnn-benchmark/blob/master/maskrcnn_benchmark/csrc/SigmoidFocalLoss.h 3 | #include 4 | 5 | at::Tensor SigmoidFocalLoss_forward_cuda(const at::Tensor &logits, 6 | const at::Tensor &targets, 7 | const int num_classes, 8 | const float gamma, const float alpha); 9 | 10 | at::Tensor SigmoidFocalLoss_backward_cuda(const at::Tensor &logits, 11 | const at::Tensor &targets, 12 | const at::Tensor &d_losses, 13 | const int num_classes, 14 | const float gamma, const float alpha); 15 | 16 | // Interface for Python 17 | at::Tensor SigmoidFocalLoss_forward(const at::Tensor &logits, 18 | const at::Tensor &targets, 19 | const int num_classes, const float gamma, 20 | const float alpha) { 21 | if (logits.type().is_cuda()) { 22 | return SigmoidFocalLoss_forward_cuda(logits, targets, num_classes, gamma, 23 | alpha); 24 | } 25 | AT_ERROR("SigmoidFocalLoss is not implemented on the CPU"); 26 | } 27 | 28 | at::Tensor SigmoidFocalLoss_backward(const at::Tensor &logits, 29 | const at::Tensor &targets, 30 | const at::Tensor &d_losses, 31 | const int num_classes, const float gamma, 32 | const float alpha) { 33 | if (logits.type().is_cuda()) { 34 | return SigmoidFocalLoss_backward_cuda(logits, targets, d_losses, 35 | num_classes, gamma, alpha); 36 | } 37 | AT_ERROR("SigmoidFocalLoss is not implemented on the CPU"); 38 | } 39 | 40 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 41 | m.def("forward", &SigmoidFocalLoss_forward, 42 | "SigmoidFocalLoss forward (CUDA)"); 43 | m.def("backward", &SigmoidFocalLoss_backward, 44 | "SigmoidFocalLoss backward (CUDA)"); 45 | } 46 | -------------------------------------------------------------------------------- /segmentation/mmdet/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .flops_counter import get_model_complexity_info 2 | from .registry import Registry, build_from_cfg 3 | 4 | __all__ = ['Registry', 'build_from_cfg', 'get_model_complexity_info'] 5 | -------------------------------------------------------------------------------- /segmentation/mmdet/utils/draw_inputgate.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import seaborn as sns 3 | 4 | def draw_inputgate(gate_np): 5 | gate_activations_dist = np.average(gate_np.squeeze(), axis=0) 6 | g = sns.barplot(x=list(range(192)), y=gate_activations_dist) 7 | g.set_xlabel("Channel Index", fontsize=14) 8 | g.set_ylabel("Probability of Gates Enabled", fontsize=11) 9 | # plt.setp(g.get_xticklabels(), rotation=45) 10 | xticks = g.get_xticks() 11 | xticks_new = np.concatenate((xticks[:4], xticks[8:12], xticks[16:20], xticks[24:28], xticks[32:36], xticks[64:68], 12 | xticks[72:76], xticks[128:132], xticks[136:140])) 13 | for n, label in enumerate(g.xaxis.get_ticklabels()): 14 | if n not in xticks_new: 15 | label.set_visible(False) 16 | g.xaxis.set_tick_params(labelsize=6) 17 | np.save('input_gate.npy', gate_activations_dist) 18 | g.figure.savefig('inputgate.png') 19 | g.figure.savefig('inputgate.pdf') 20 | 21 | 22 | def zigZag(arr): 23 | rows, columns = len(arr), len(arr[0]) 24 | result = [[] for i in range(rows + columns - 1)] 25 | 26 | for i in range(rows): 27 | for j in range(columns): 28 | sum = i + j 29 | if (sum % 2 == 0): 30 | 31 | # add at beginning 32 | result[sum].insert(0, arr[i][j]) 33 | else: 34 | 35 | # add at end of the list 36 | result[sum].append(arr[i][j]) 37 | return result 38 | 39 | def draw_from_npy(filename): 40 | import matplotlib.pyplot as plt 41 | gate_activations_dist = np.load(filename) 42 | y = gate_activations_dist[:64].reshape((8, 8)) 43 | cb = gate_activations_dist[64:128].reshape((8, 8)) 44 | cr = gate_activations_dist[128:].reshape((8, 8)) 45 | 46 | plt.figure(1, figsize = (32, 32)) 47 | plt.subplot(411) 48 | ax = sns.heatmap(y, linewidth=0.5, cmap="OrRd", square=True) 49 | 50 | plt.subplot(412) 51 | ax = sns.heatmap(cb, linewidth=0.5, cmap="OrRd", square=True) 52 | 53 | plt.subplot(413) 54 | ax = sns.heatmap(cr, linewidth=0.5, cmap="OrRd", square=True) 55 | 56 | plt.subplot(414) 57 | list_a = list(np.arange(64)) 58 | list_b = [x for sublist in zigZag(np.asarray(list_a).reshape((8, 8))) for x in sublist] 59 | list_c = [list_b.index(m) for m in list_a] 60 | ax = sns.heatmap(np.asarray(list_c).reshape((8, 8)), linewidth=0.5, cmap="OrRd", square=True, annot=True, annot_kws={"size": 18}) 61 | # ax = sns.heatmap(np.arange(64).reshape((8, 8)), linewidth=0.5, cmap="OrRd", square=True, annot=True, annot_kws={"size": 18}) 62 | # plt.show() 63 | plt.savefig('heatmap.svg') 64 | print('heatmap saved.') 65 | 66 | if __name__ == '__main__': 67 | # main() 68 | draw_from_npy('/mnt/kai/work/code/dctDet/input_gate.npy') -------------------------------------------------------------------------------- /segmentation/mmdet/utils/plot_dct.py: -------------------------------------------------------------------------------- 1 | import math 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | 5 | filelist = ['000000120420.jpg', '000000166287.jpg', '000000166391.jpg', '000000212559.jpg', '000000286994.jpg', 6 | '000000300659.jpg', '000000438862.jpg', '000000460347.jpg', '000000509735.jpg'] 7 | 8 | def unblockshaped(arr, h, w): 9 | """ 10 | Return an array of shape (h, w) where 11 | h * w = arr.size 12 | 13 | If arr is of shape (n, nrows, ncols), n sublocks of shape (nrows, ncols), 14 | then the returned array preserves the "physical" layout of the sublocks. 15 | """ 16 | n, nrows, ncols = arr.shape 17 | return (arr.reshape(h//nrows, -1, nrows, ncols) 18 | .swapaxes(1,2) 19 | .reshape(h, w)) 20 | 21 | def dct_flatten_2d(img): 22 | height, width, channel = img.shape 23 | N = int(math.sqrt(channel)) 24 | height_resized, width_resized = height * N, width * N 25 | 26 | # Do 8x8 DCT on image (in-place) 27 | img = img.reshape((height, width, N, N)).reshape(-1, N, N).astype(dtype='float32') 28 | img_resized = unblockshaped(img, height_resized, width_resized) 29 | return img_resized 30 | 31 | 32 | def plot_dct(img, filename): 33 | if filename in filelist: 34 | dct = dct_flatten_2d(img) 35 | plt.figure() 36 | plt.imshow(dct,cmap='gray',vmax = np.max(dct)*0.01,vmin = 0) 37 | plt.title( "8x8 DCTs of the image") 38 | plt.savefig(filename) -------------------------------------------------------------------------------- /segmentation/mmdet/utils/registry.py: -------------------------------------------------------------------------------- 1 | import inspect 2 | 3 | import mmcv 4 | 5 | 6 | class Registry(object): 7 | 8 | def __init__(self, name): 9 | self._name = name 10 | self._module_dict = dict() 11 | 12 | def __repr__(self): 13 | format_str = self.__class__.__name__ + '(name={}, items={})'.format( 14 | self._name, list(self._module_dict.keys())) 15 | return format_str 16 | 17 | @property 18 | def name(self): 19 | return self._name 20 | 21 | @property 22 | def module_dict(self): 23 | return self._module_dict 24 | 25 | def get(self, key): 26 | return self._module_dict.get(key, None) 27 | 28 | def _register_module(self, module_class): 29 | """Register a module. 30 | 31 | Args: 32 | module (:obj:`nn.Module`): Module to be registered. 33 | """ 34 | if not inspect.isclass(module_class): 35 | raise TypeError('module must be a class, but got {}'.format( 36 | type(module_class))) 37 | module_name = module_class.__name__ 38 | if module_name in self._module_dict: 39 | raise KeyError('{} is already registered in {}'.format( 40 | module_name, self.name)) 41 | self._module_dict[module_name] = module_class 42 | 43 | def register_module(self, cls): 44 | self._register_module(cls) 45 | return cls 46 | 47 | 48 | def build_from_cfg(cfg, registry, default_args=None): 49 | """Build a module from config dict. 50 | 51 | Args: 52 | cfg (dict): Config dict. It should at least contain the key "type". 53 | registry (:obj:`Registry`): The registry to search the type from. 54 | default_args (dict, optional): Default initialization arguments. 55 | 56 | Returns: 57 | obj: The constructed object. 58 | """ 59 | assert isinstance(cfg, dict) and 'type' in cfg 60 | assert isinstance(default_args, dict) or default_args is None 61 | args = cfg.copy() 62 | obj_type = args.pop('type') 63 | if mmcv.is_str(obj_type): 64 | obj_cls = registry.get(obj_type) 65 | if obj_cls is None: 66 | raise KeyError('{} is not in the {} registry'.format( 67 | obj_type, registry.name)) 68 | elif inspect.isclass(obj_type): 69 | obj_cls = obj_type 70 | else: 71 | raise TypeError('type must be a str or valid type, but got {}'.format( 72 | type(obj_type))) 73 | if default_args is not None: 74 | for name, value in default_args.items(): 75 | args.setdefault(name, value) 76 | return obj_cls(**args) 77 | -------------------------------------------------------------------------------- /segmentation/mmdet/utils/transfer_model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from collections import OrderedDict 3 | 4 | file_path = '../../pretrained/model_best.pth.tar' 5 | checkpoint = torch.load(file_path)['state_dict'] 6 | new_checkpoint = OrderedDict() 7 | 8 | prefix = ['model.0', 'model.1', 'model.2', 'model.3'] 9 | for k, v in checkpoint.items(): 10 | if any(word in k for word in prefix): 11 | k = k[7:] 12 | new_k = k.replace('model.' + k.split('.')[1], 'layer' + str(int(k.split('.')[1]) + 1)) 13 | new_checkpoint[new_k] = v 14 | 15 | torch.save(new_checkpoint, '../../pretrained/resnet50_192.pth.tar') 16 | 17 | -------------------------------------------------------------------------------- /segmentation/requirements.txt: -------------------------------------------------------------------------------- 1 | mmcv>=0.2.10 2 | numpy 3 | matplotlib 4 | six 5 | terminaltables 6 | pycocotools 7 | torch>=1.1 8 | torchvision 9 | imagecorruptions 10 | jpeg2dct 11 | PyTurboJPEG -------------------------------------------------------------------------------- /segmentation/results/segmentation_result.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PSCLab-ASU/Learning-in-the-Frequency-Domain/a1e620fd12a023c8288135050e76574a431888bd/segmentation/results/segmentation_result.jpg -------------------------------------------------------------------------------- /segmentation/setup_env.sh: -------------------------------------------------------------------------------- 1 | # Configure environment 2 | sudo apt install -y unzip 3 | mkdir -p /mnt/dataset/coco 4 | cd /mnt/dataset/coco 5 | wget http://images.cocodataset.org/zips/train2017.zip 6 | wget http://images.cocodataset.org/zips/val2017.zip 7 | wget http://images.cocodataset.org/zips/test2017.zip 8 | wget http://images.cocodataset.org/annotations/annotations_trainval2017.zip 9 | unzip train2017.zip 10 | unzip val2017.zip 11 | unzip test2017.zip 12 | unzip annotations_trainval2017.zip 13 | rm train2017.zip 14 | rm val2017.zip 15 | rm test2017.zip 16 | 17 | cd /mnt/kai/work/code 18 | git clone https://github.com/calmevtime/dctDet 19 | mkdir -p /mnt/kai/work/code/dctDet/data/ 20 | cd /mnt/kai/work/code/dctDet/data/ 21 | ln -s /mnt/dataset/coco/ /mnt/kai/work/code/dctDet/data/ 22 | -------------------------------------------------------------------------------- /segmentation/tests/requirements.txt: -------------------------------------------------------------------------------- 1 | isort 2 | flake8 3 | yapf 4 | pytest-cov 5 | codecov -------------------------------------------------------------------------------- /segmentation/tests/test_utils.py: -------------------------------------------------------------------------------- 1 | import numpy.testing as npt 2 | 3 | from mmdet.utils.flops_counter import params_to_string 4 | 5 | 6 | def test_params_to_string(): 7 | npt.assert_equal(params_to_string(1e9), '1000.0 M') 8 | npt.assert_equal(params_to_string(2e5), '200.0 k') 9 | npt.assert_equal(params_to_string(3e-9), '3e-09') 10 | -------------------------------------------------------------------------------- /segmentation/work_dirs/README.md: -------------------------------------------------------------------------------- 1 | ## This folder should contain pretrained models. You can download the pretrained [`models`](https://drive.google.com/open?id=1UKmNORizsulH9E4awxjBR4fAlW1KlC5s) here. 2 | --------------------------------------------------------------------------------