├── images ├── BiB-vis.PNG ├── BiB-pipeline.png └── BiB-results.PNG ├── wetectron ├── utils │ ├── README.md │ ├── __init__.py │ ├── collect_env.py │ ├── cv2_util.py │ ├── imports.py │ ├── timer.py │ ├── env.py │ ├── registry.py │ ├── logger.py │ ├── miscellaneous.py │ ├── model_zoo.py │ ├── model_serialization.py │ ├── metric_logger.py │ └── comm.py ├── modeling │ ├── __init__.py │ ├── roi_heads │ │ ├── __init__.py │ │ ├── box_head │ │ │ ├── __init__.py │ │ │ ├── roi_box_predictors.py │ │ │ └── box_head.py │ │ ├── mask_head │ │ │ ├── __init__.py │ │ │ ├── roi_mask_predictors.py │ │ │ ├── roi_mask_feature_extractors.py │ │ │ └── mask_head.py │ │ ├── keypoint_head │ │ │ ├── __init__.py │ │ │ ├── roi_keypoint_predictors.py │ │ │ ├── roi_keypoint_feature_extractors.py │ │ │ ├── keypoint_head.py │ │ │ └── inference.py │ │ ├── weak_head │ │ │ └── __init__.py │ │ └── roi_heads.py │ ├── rpn │ │ ├── retinanet │ │ │ ├── __init__.py │ │ │ └── loss.py │ │ ├── __init__.py │ │ └── utils.py │ ├── detector │ │ ├── __init__.py │ │ └── detectors.py │ ├── backbone │ │ ├── __init__.py │ │ ├── backbone.py │ │ └── fpn.py │ ├── utils.py │ ├── registry.py │ ├── balanced_positive_negative_sampler.py │ ├── box_coder.py │ └── make_layers.py ├── structures │ ├── __init__.py │ └── image_list.py ├── engine │ └── __init__.py ├── data │ ├── __init__.py │ ├── samplers │ │ ├── __init__.py │ │ ├── iteration_based_batch_sampler.py │ │ ├── distributed.py │ │ └── grouped_batch_sampler.py │ ├── transforms │ │ ├── __init__.py │ │ ├── build.py │ │ └── transforms.py │ ├── datasets │ │ ├── evaluation │ │ │ ├── coco │ │ │ │ └── __init__.py │ │ │ ├── __init__.py │ │ │ └── voc │ │ │ │ └── __init__.py │ │ ├── __init__.py │ │ ├── list_dataset.py │ │ └── concat_dataset.py │ ├── collate_batch.py │ └── README.md ├── layers │ ├── dcn │ │ ├── __init__.py │ │ └── deform_pool_func.py │ ├── nms.py │ ├── smooth_l1_loss.py │ ├── _utils.py │ ├── batch_norm.py │ ├── __init__.py │ ├── roi_pool.py │ ├── roi_align.py │ └── sigmoid_focal_loss.py ├── __init__.py ├── config │ └── __init__.py ├── solver │ ├── __init__.py │ ├── lr_scheduler.py │ └── build.py └── csrc │ ├── cpu │ ├── vision.h │ └── nms_cpu.cpp │ ├── nms.h │ ├── SigmoidFocalLoss.h │ ├── vision.cpp │ ├── ROIPool.h │ ├── ROIAlign.h │ ├── deform_pool.h │ ├── cuda │ ├── deform_pool_cuda.cu │ └── nms.cu │ └── deform_conv.h ├── active_strategy ├── utils │ └── __init__.py └── __init__.py ├── configs ├── README.md ├── coco │ ├── V_16_coco14.yaml │ ├── V_16_coco17.yaml │ ├── V_16_coco14_point.yaml │ ├── V_16_coco14_scribble.yaml │ └── V_16_coco14_active.yaml └── voc │ ├── V_16_voc07.yaml │ ├── V_16_voc12.yaml │ ├── V_16_voc0712.yaml │ └── V_16_voc07_active.yaml ├── .gitignore ├── launch_mist_coco14.sh ├── launch_mist_voc07.sh ├── inference_scripts ├── run_inference_coco14_val.sh ├── run_inference_voc07_test.sh ├── run_inference_coco14_train.sh └── run_inference_voc07_trainval.sh ├── summarize_performance.sh └── setup.py /images/BiB-vis.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huyvvo/BiB/HEAD/images/BiB-vis.PNG -------------------------------------------------------------------------------- /images/BiB-pipeline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huyvvo/BiB/HEAD/images/BiB-pipeline.png -------------------------------------------------------------------------------- /images/BiB-results.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/huyvvo/BiB/HEAD/images/BiB-results.PNG -------------------------------------------------------------------------------- /wetectron/utils/README.md: -------------------------------------------------------------------------------- 1 | # Utility functions 2 | 3 | This folder contain utility functions that are not used in the 4 | core library, but are useful for building models or training 5 | code using the config system. 6 | -------------------------------------------------------------------------------- /wetectron/modeling/__init__.py: -------------------------------------------------------------------------------- 1 | #------------------------------------------------------------------------------ 2 | # Code taken from https://github.com/NVlabs/wetectron 3 | #------------------------------------------------------------------------------ 4 | 5 | -------------------------------------------------------------------------------- /wetectron/structures/__init__.py: -------------------------------------------------------------------------------- 1 | #------------------------------------------------------------------------------ 2 | # Code taken from https://github.com/NVlabs/wetectron 3 | #------------------------------------------------------------------------------ 4 | 5 | -------------------------------------------------------------------------------- /wetectron/modeling/roi_heads/__init__.py: -------------------------------------------------------------------------------- 1 | #------------------------------------------------------------------------------ 2 | # Code taken from https://github.com/NVlabs/wetectron 3 | #------------------------------------------------------------------------------ 4 | 5 | -------------------------------------------------------------------------------- /active_strategy/utils/__init__.py: -------------------------------------------------------------------------------- 1 | #------------------------------------------------------------------------------ 2 | # Code developed by Huy V. Vo and Oriane Simeoni 3 | # INRIA, Valeo.ai 4 | #------------------------------------------------------------------------------ 5 | -------------------------------------------------------------------------------- /wetectron/modeling/rpn/retinanet/__init__.py: -------------------------------------------------------------------------------- 1 | #------------------------------------------------------------------------------ 2 | # Code taken from https://github.com/NVlabs/wetectron 3 | #------------------------------------------------------------------------------ 4 | 5 | -------------------------------------------------------------------------------- /wetectron/modeling/roi_heads/box_head/__init__.py: -------------------------------------------------------------------------------- 1 | #------------------------------------------------------------------------------ 2 | # Code taken from https://github.com/NVlabs/wetectron 3 | #------------------------------------------------------------------------------ 4 | 5 | -------------------------------------------------------------------------------- /wetectron/modeling/roi_heads/mask_head/__init__.py: -------------------------------------------------------------------------------- 1 | #------------------------------------------------------------------------------ 2 | # Code taken from https://github.com/NVlabs/wetectron 3 | #------------------------------------------------------------------------------ 4 | 5 | -------------------------------------------------------------------------------- /wetectron/modeling/roi_heads/keypoint_head/__init__.py: -------------------------------------------------------------------------------- 1 | #------------------------------------------------------------------------------ 2 | # Code taken from https://github.com/NVlabs/wetectron 3 | #------------------------------------------------------------------------------ 4 | 5 | -------------------------------------------------------------------------------- /wetectron/engine/__init__.py: -------------------------------------------------------------------------------- 1 | #------------------------------------------------------------------------------ 2 | # Code taken from https://github.com/NVlabs/wetectron 3 | #------------------------------------------------------------------------------ 4 | 5 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 6 | -------------------------------------------------------------------------------- /active_strategy/__init__.py: -------------------------------------------------------------------------------- 1 | #------------------------------------------------------------------------------ 2 | # Code developed by Huy V. Vo and Oriane Simeoni 3 | # INRIA, Valeo.ai 4 | #------------------------------------------------------------------------------ -------------------------------------------------------------------------------- /wetectron/data/__init__.py: -------------------------------------------------------------------------------- 1 | #------------------------------------------------------------------------------ 2 | # Code taken from https://github.com/NVlabs/wetectron 3 | #------------------------------------------------------------------------------ 4 | 5 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 6 | from .build import make_data_loader 7 | -------------------------------------------------------------------------------- /wetectron/layers/dcn/__init__.py: -------------------------------------------------------------------------------- 1 | #------------------------------------------------------------------------------ 2 | # Code taken from https://github.com/NVlabs/wetectron 3 | #------------------------------------------------------------------------------ 4 | 5 | # 6 | # Copied From [mmdetection](https://github.com/open-mmlab/mmdetection/tree/master/mmdet/ops/dcn) 7 | # 8 | -------------------------------------------------------------------------------- /wetectron/modeling/rpn/__init__.py: -------------------------------------------------------------------------------- 1 | #------------------------------------------------------------------------------ 2 | # Code taken from https://github.com/NVlabs/wetectron 3 | #------------------------------------------------------------------------------ 4 | 5 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 6 | # from .rpn import build_rpn 7 | -------------------------------------------------------------------------------- /wetectron/modeling/detector/__init__.py: -------------------------------------------------------------------------------- 1 | #------------------------------------------------------------------------------ 2 | # Code taken from https://github.com/NVlabs/wetectron 3 | #------------------------------------------------------------------------------ 4 | 5 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 6 | from .detectors import build_detection_model 7 | -------------------------------------------------------------------------------- /wetectron/modeling/backbone/__init__.py: -------------------------------------------------------------------------------- 1 | #------------------------------------------------------------------------------ 2 | # Code taken from https://github.com/NVlabs/wetectron 3 | #------------------------------------------------------------------------------ 4 | 5 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 6 | from .backbone import build_backbone 7 | from . import fbnet 8 | from . import vgg16 -------------------------------------------------------------------------------- /wetectron/__init__.py: -------------------------------------------------------------------------------- 1 | #------------------------------------------------------------------------------ 2 | # Code taken from https://github.com/NVlabs/wetectron 3 | #------------------------------------------------------------------------------ 4 | 5 | # -------------------------------------------------------- 6 | # Copyright (C) 2020 NVIDIA Corporation. All rights reserved. 7 | # Nvidia Source Code License-NC 8 | # -------------------------------------------------------- -------------------------------------------------------------------------------- /wetectron/utils/__init__.py: -------------------------------------------------------------------------------- 1 | #------------------------------------------------------------------------------ 2 | # Code taken from https://github.com/NVlabs/wetectron 3 | #------------------------------------------------------------------------------ 4 | 5 | # -------------------------------------------------------- 6 | # Copyright (C) 2020 NVIDIA Corporation. All rights reserved. 7 | # Nvidia Source Code License-NC 8 | # -------------------------------------------------------- -------------------------------------------------------------------------------- /wetectron/modeling/roi_heads/weak_head/__init__.py: -------------------------------------------------------------------------------- 1 | #------------------------------------------------------------------------------ 2 | # Code taken from https://github.com/NVlabs/wetectron 3 | #------------------------------------------------------------------------------ 4 | 5 | # -------------------------------------------------------- 6 | # Copyright (C) 2020 NVIDIA Corporation. All rights reserved. 7 | # Nvidia Source Code License-NC 8 | # -------------------------------------------------------- -------------------------------------------------------------------------------- /configs/README.md: -------------------------------------------------------------------------------- 1 | # Configuration files 2 | 3 | All the configs for supervised methods are removed. You can find them at markrcnn-benchmark [configs](https://github.com/facebookresearch/maskrcnn-benchmark/tree/master/configs). 4 | 5 | Since we made changes to lots of places in this code, the performance of supervised models may also be affected. 6 | To test the performance of them, we recommend using [Detectron2](https://github.com/facebookresearch/detectron2) or original [maskrcnn-benchmark](https://github.com/facebookresearch/maskrcnn-benchmark). 7 | -------------------------------------------------------------------------------- /wetectron/layers/nms.py: -------------------------------------------------------------------------------- 1 | #------------------------------------------------------------------------------ 2 | # Code taken from https://github.com/NVlabs/wetectron 3 | #------------------------------------------------------------------------------ 4 | 5 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 6 | from wetectron import _C 7 | from apex import amp 8 | 9 | # Only valid with fp32 inputs - give AMP the hint 10 | nms = amp.float_function(_C.nms) 11 | 12 | # nms.__doc__ = """ 13 | # This function performs Non-maximum suppresion""" 14 | -------------------------------------------------------------------------------- /wetectron/config/__init__.py: -------------------------------------------------------------------------------- 1 | #------------------------------------------------------------------------------ 2 | # Code taken from https://github.com/NVlabs/wetectron 3 | #------------------------------------------------------------------------------ 4 | 5 | # -------------------------------------------------------- 6 | # Copyright (C) 2020 NVIDIA Corporation. All rights reserved. 7 | # Nvidia Source Code License-NC 8 | # -------------------------------------------------------- 9 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 10 | from .defaults import _C as cfg 11 | -------------------------------------------------------------------------------- /wetectron/modeling/detector/detectors.py: -------------------------------------------------------------------------------- 1 | #------------------------------------------------------------------------------ 2 | # Code taken from https://github.com/NVlabs/wetectron 3 | #------------------------------------------------------------------------------ 4 | 5 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 6 | from .generalized_rcnn import GeneralizedRCNN 7 | 8 | 9 | _DETECTION_META_ARCHITECTURES = {"GeneralizedRCNN": GeneralizedRCNN} 10 | 11 | 12 | def build_detection_model(cfg): 13 | meta_arch = _DETECTION_META_ARCHITECTURES[cfg.MODEL.META_ARCHITECTURE] 14 | return meta_arch(cfg) 15 | -------------------------------------------------------------------------------- /wetectron/modeling/utils.py: -------------------------------------------------------------------------------- 1 | #------------------------------------------------------------------------------ 2 | # Code taken from https://github.com/NVlabs/wetectron 3 | #------------------------------------------------------------------------------ 4 | 5 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 6 | """ 7 | Miscellaneous utility functions 8 | """ 9 | 10 | import torch 11 | 12 | 13 | def cat(tensors, dim=0): 14 | """ 15 | Efficient version of torch.cat that avoids a copy if there is only a single element in a list 16 | """ 17 | assert isinstance(tensors, (list, tuple)) 18 | if len(tensors) == 1: 19 | return tensors[0] 20 | return torch.cat(tensors, dim) 21 | -------------------------------------------------------------------------------- /wetectron/solver/__init__.py: -------------------------------------------------------------------------------- 1 | #------------------------------------------------------------------------------ 2 | # Code taken from https://github.com/NVlabs/wetectron 3 | #------------------------------------------------------------------------------ 4 | 5 | # -------------------------------------------------------- 6 | # Copyright (C) 2020 NVIDIA Corporation. All rights reserved. 7 | # Nvidia Source Code License-NC 8 | # -------------------------------------------------------- 9 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 10 | from .build import make_optimizer, make_cdb_optimizer 11 | from .build import make_lr_scheduler, make_lr_cdb_scheduler 12 | from .lr_scheduler import WarmupMultiStepLR 13 | -------------------------------------------------------------------------------- /wetectron/data/samplers/__init__.py: -------------------------------------------------------------------------------- 1 | #------------------------------------------------------------------------------ 2 | # Code adapted from https://github.com/NVlabs/wetectron 3 | # by Huy V. Vo and Oriane Simeoni 4 | # INRIA, Valeo.ai 5 | #------------------------------------------------------------------------------ 6 | 7 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 8 | from .distributed import DistributedSampler, WeightedDistributedSampler 9 | from .grouped_batch_sampler import GroupedBatchSampler 10 | from .iteration_based_batch_sampler import IterationBasedBatchSampler 11 | 12 | __all__ = ["WeightedDistributedSampler", "DistributedSampler", "GroupedBatchSampler", "IterationBasedBatchSampler"] 13 | -------------------------------------------------------------------------------- /wetectron/data/transforms/__init__.py: -------------------------------------------------------------------------------- 1 | #------------------------------------------------------------------------------ 2 | # Code taken from https://github.com/NVlabs/wetectron 3 | #------------------------------------------------------------------------------ 4 | 5 | # -------------------------------------------------------- 6 | # Copyright (C) 2020 NVIDIA Corporation. All rights reserved. 7 | # Nvidia Source Code License-NC 8 | # -------------------------------------------------------- 9 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 10 | from .transforms import Compose 11 | from .transforms import Resize 12 | from .transforms import RandomHorizontalFlip 13 | from .transforms import ToTensor 14 | from .transforms import Normalize 15 | 16 | from .build import build_transforms 17 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *__pycache__* 2 | *.so 3 | *.nfs* 4 | 5 | apex/* 6 | build/* 7 | outputs/* 8 | wetectron.egg-info/* 9 | 10 | # compilation and distribution 11 | __pycache__ 12 | _ext 13 | *.pyc 14 | *.so 15 | maskrcnn_benchmark.egg-info/ 16 | wetectron.egg-info/ 17 | build/ 18 | proposal 19 | datasets 20 | !wetectron/data/datasets 21 | dist/ 22 | cache/ 23 | apex/ 24 | cocoapi/ 25 | output/ 26 | runs/ 27 | configs/weak_old 28 | configs/voc_ignore 29 | outputs 30 | 31 | 32 | # pytorch/python/numpy formats 33 | *.pth 34 | *.pkl 35 | *.npy 36 | 37 | # ipython/jupyter notebooks 38 | *.ipynb 39 | **/.ipynb_checkpoints/ 40 | 41 | # Editor temporaries 42 | *.swn 43 | *.swo 44 | *.swp 45 | *~ 46 | 47 | # Pycharm editor settings 48 | .idea 49 | 50 | # vscode editor settings 51 | .vscode 52 | 53 | # MacOS 54 | .DS_Store 55 | -------------------------------------------------------------------------------- /wetectron/layers/smooth_l1_loss.py: -------------------------------------------------------------------------------- 1 | #------------------------------------------------------------------------------ 2 | # Code taken from https://github.com/NVlabs/wetectron 3 | #------------------------------------------------------------------------------ 4 | 5 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 6 | import torch 7 | 8 | def smooth_l1_loss(input, target, beta=1. / 9, size_average=True, reduction=True): 9 | """ 10 | very similar to the smooth_l1_loss from pytorch, but with 11 | the extra beta parameter 12 | """ 13 | n = torch.abs(input - target) 14 | cond = n < beta 15 | loss = torch.where(cond, 0.5 * n ** 2 / beta, n - 0.5 * beta) 16 | if reduction == False: 17 | return loss 18 | if size_average: 19 | return loss.mean() 20 | return loss.sum() 21 | -------------------------------------------------------------------------------- /launch_mist_coco14.sh: -------------------------------------------------------------------------------- 1 | #------------------------------------------------------------------------------ 2 | # Code adapted from https://github.com/NVlabs/wetectron 3 | # by Huy V. Vo and Oriane Simeoni 4 | # INRIA, Valeo.ai 5 | #------------------------------------------------------------------------------ 6 | 7 | source ~/.bashrc 8 | conda deactivate 9 | conda activate bib 10 | 11 | NUM_GPUS="$1" 12 | OUTPUT_DIR="$2" 13 | 14 | python -m torch.distributed.launch --nproc_per_node=$NUM_GPUS tools/train_active_net.py \ 15 | --config-file "configs/coco/V_16_coco14.yaml" --use-tensorboard \ 16 | SOLVER.CHECKPOINT_PERIOD 20000 OUTPUT_DIR "$OUTPUT_DIR" \ 17 | SOLVER.ITER_SIZE "$((8/NUM_GPUS))" SOLVER.IMS_PER_BATCH "$NUM_GPUS" TEST.IMS_PER_BATCH "$((2*NUM_GPUS))" 18 | -------------------------------------------------------------------------------- /launch_mist_voc07.sh: -------------------------------------------------------------------------------- 1 | #------------------------------------------------------------------------------ 2 | # Code adapted from https://github.com/NVlabs/wetectron 3 | # by Huy V. Vo and Oriane Simeoni 4 | # INRIA, Valeo.ai 5 | #------------------------------------------------------------------------------ 6 | 7 | source ~/.bashrc 8 | conda deactivate 9 | conda activate bib 10 | 11 | NUM_GPUS="$1" 12 | OUTPUT_DIR="$2" 13 | 14 | python -m torch.distributed.launch --nproc_per_node=$NUM_GPUS tools/train_active_net.py \ 15 | --config-file "configs/voc/V_16_voc07.yaml" --use-tensorboard \ 16 | SOLVER.CHECKPOINT_PERIOD 6000 OUTPUT_DIR "$OUTPUT_DIR" \ 17 | SOLVER.ITER_SIZE "$((8/NUM_GPUS))" SOLVER.IMS_PER_BATCH "$NUM_GPUS" TEST.IMS_PER_BATCH "$((2*NUM_GPUS))" 18 | -------------------------------------------------------------------------------- /wetectron/data/datasets/evaluation/coco/__init__.py: -------------------------------------------------------------------------------- 1 | #------------------------------------------------------------------------------ 2 | # Code taken from https://github.com/NVlabs/wetectron 3 | #------------------------------------------------------------------------------ 4 | 5 | from .coco_eval import do_coco_evaluation 6 | 7 | 8 | def coco_evaluation( 9 | dataset, 10 | predictions, 11 | output_folder, 12 | box_only, 13 | iou_types, 14 | expected_results, 15 | expected_results_sigma_tol, 16 | ): 17 | return do_coco_evaluation( 18 | dataset=dataset, 19 | predictions=predictions, 20 | box_only=box_only, 21 | output_folder=output_folder, 22 | iou_types=iou_types, 23 | expected_results=expected_results, 24 | expected_results_sigma_tol=expected_results_sigma_tol, 25 | ) 26 | -------------------------------------------------------------------------------- /wetectron/utils/collect_env.py: -------------------------------------------------------------------------------- 1 | #------------------------------------------------------------------------------ 2 | # Code taken from https://github.com/NVlabs/wetectron 3 | #------------------------------------------------------------------------------ 4 | 5 | # -------------------------------------------------------- 6 | # Copyright (C) 2020 NVIDIA Corporation. All rights reserved. 7 | # Nvidia Source Code License-NC 8 | # -------------------------------------------------------- 9 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 10 | import PIL 11 | 12 | from torch.utils.collect_env import get_pretty_env_info 13 | 14 | 15 | def get_pil_version(): 16 | return "\n Pillow ({})".format(PIL.__version__) 17 | 18 | 19 | def collect_env_info(): 20 | env_str = get_pretty_env_info() 21 | env_str += get_pil_version() 22 | return env_str 23 | -------------------------------------------------------------------------------- /wetectron/csrc/cpu/vision.h: -------------------------------------------------------------------------------- 1 | //------------------------------------------------------------------------------ 2 | // Code taken from https://github.com/NVlabs/wetectron 3 | //------------------------------------------------------------------------------ 4 | 5 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 6 | #pragma once 7 | #include 8 | 9 | 10 | at::Tensor ROIAlign_forward_cpu(const at::Tensor& input, 11 | const at::Tensor& rois, 12 | const float spatial_scale, 13 | const int pooled_height, 14 | const int pooled_width, 15 | const int sampling_ratio); 16 | 17 | 18 | at::Tensor nms_cpu(const at::Tensor& dets, 19 | const at::Tensor& scores, 20 | const float threshold); 21 | -------------------------------------------------------------------------------- /wetectron/data/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | #------------------------------------------------------------------------------ 2 | # Code adapted from https://github.com/NVlabs/wetectron 3 | # by Huy V. Vo and Oriane Simeoni 4 | # INRIA, Valeo.ai 5 | #------------------------------------------------------------------------------ 6 | 7 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 8 | from .coco import COCODataset 9 | from .voc import PascalVOCDataset 10 | from .concat_dataset import ConcatDataset 11 | 12 | def _isinstance(dataset, dataset_type): 13 | if isinstance(dataset, ConcatDataset): 14 | membership = [isinstance(dataset.datasets[i], dataset_type) for i in range(len(dataset.datasets))] 15 | assert(membership.count(membership[0]) == len(membership)) 16 | return membership[0] 17 | else: 18 | return isinstance(dataset, dataset_type) 19 | 20 | __all__ = ["COCODataset", "ConcatDataset", "PascalVOCDataset", "_isinstance"] 21 | -------------------------------------------------------------------------------- /wetectron/modeling/registry.py: -------------------------------------------------------------------------------- 1 | #------------------------------------------------------------------------------ 2 | # Code taken from https://github.com/NVlabs/wetectron 3 | #------------------------------------------------------------------------------ 4 | 5 | # -------------------------------------------------------- 6 | # Copyright (C) 2020 NVIDIA Corporation. All rights reserved. 7 | # Nvidia Source Code License-NC 8 | # -------------------------------------------------------- 9 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 10 | 11 | from wetectron.utils.registry import Registry 12 | 13 | BACKBONES = Registry() 14 | RPN_HEADS = Registry() 15 | ROI_BOX_FEATURE_EXTRACTORS = Registry() 16 | ROI_BOX_PREDICTOR = Registry() 17 | ROI_KEYPOINT_FEATURE_EXTRACTORS = Registry() 18 | ROI_KEYPOINT_PREDICTOR = Registry() 19 | ROI_MASK_FEATURE_EXTRACTORS = Registry() 20 | ROI_MASK_PREDICTOR = Registry() 21 | 22 | ROI_WEAK_PREDICTOR = Registry() 23 | ROI_WEAK_LOSS = Registry() -------------------------------------------------------------------------------- /wetectron/csrc/nms.h: -------------------------------------------------------------------------------- 1 | //------------------------------------------------------------------------------ 2 | // Code taken from https://github.com/NVlabs/wetectron 3 | //------------------------------------------------------------------------------ 4 | 5 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 6 | #pragma once 7 | #include "cpu/vision.h" 8 | 9 | #ifdef WITH_CUDA 10 | #include "cuda/vision.h" 11 | #endif 12 | 13 | 14 | at::Tensor nms(const at::Tensor& dets, 15 | const at::Tensor& scores, 16 | const float threshold) { 17 | 18 | if (dets.is_cuda()) { 19 | #ifdef WITH_CUDA 20 | // TODO raise error if not compiled with CUDA 21 | if (dets.numel() == 0) 22 | return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU)); 23 | auto b = at::cat({dets, scores.unsqueeze(1)}, 1); 24 | return nms_cuda(b, threshold); 25 | #else 26 | AT_ERROR("Not compiled with GPU support"); 27 | #endif 28 | } 29 | 30 | at::Tensor result = nms_cpu(dets, scores, threshold); 31 | return result; 32 | } 33 | -------------------------------------------------------------------------------- /wetectron/utils/cv2_util.py: -------------------------------------------------------------------------------- 1 | #------------------------------------------------------------------------------ 2 | # Code taken from https://github.com/NVlabs/wetectron 3 | #------------------------------------------------------------------------------ 4 | 5 | # -------------------------------------------------------- 6 | # Copyright (C) 2020 NVIDIA Corporation. All rights reserved. 7 | # Nvidia Source Code License-NC 8 | # -------------------------------------------------------- 9 | """ 10 | Module for cv2 utility functions and maintaining version compatibility 11 | between 3.x and 4.x 12 | """ 13 | import cv2 14 | 15 | 16 | def findContours(*args, **kwargs): 17 | """ 18 | Wraps cv2.findContours to maintain compatiblity between versions 19 | 3 and 4 20 | 21 | Returns: 22 | contours, hierarchy 23 | """ 24 | if cv2.__version__.startswith('4'): 25 | contours, hierarchy = cv2.findContours(*args, **kwargs) 26 | elif cv2.__version__.startswith('3'): 27 | _, contours, hierarchy = cv2.findContours(*args, **kwargs) 28 | else: 29 | raise AssertionError( 30 | 'cv2 must be either version 3 or 4 to call this method') 31 | 32 | return contours, hierarchy 33 | -------------------------------------------------------------------------------- /configs/coco/V_16_coco14.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://VGGImageNetPretrained/JCJOHNS/VGG-16" 4 | BACKBONE: 5 | CONV_BODY: "VGG16-OICR" 6 | WSOD_ON: True 7 | FASTER_RCNN: False 8 | ROI_HEADS: 9 | SCORE_THRESH: 0.0 10 | NMS: 0.3 11 | ROI_BOX_HEAD: 12 | POOLER_METHOD: "ROIPool" 13 | POOLER_RESOLUTION: 7 14 | POOLER_SCALES: (0.125,) 15 | FEATURE_EXTRACTOR: "VGG16.roi_head" 16 | ROI_WEAK_HEAD: 17 | PREDICTOR: "MISTPredictor" 18 | LOSS: "RoIRegLoss" 19 | OICR_P: 0.15 20 | REGRESS_ON: True 21 | DATASETS: 22 | TRAIN: ("coco_2014_train",) 23 | TEST: ("coco_2014_val",) 24 | PROPOSAL_FILES: 25 | TRAIN: ('proposal/MCG-coco_2014_train-boxes.pkl',) 26 | TEST: ('proposal/MCG-coco_2014_val-boxes.pkl',) 27 | DATALOADER: 28 | SIZE_DIVISIBILITY: 32 29 | INPUT: 30 | MIN_SIZE_TRAIN: (480, 576, 688, 864, 1000, 1200) 31 | MAX_SIZE_TRAIN: 2000 32 | MIN_SIZE_TEST: 800 33 | MAX_SIZE_TEST: 2000 34 | SOLVER: 35 | IMS_PER_BATCH: 8 36 | BASE_LR: 0.01 37 | WEIGHT_DECAY: 0.0001 38 | STEPS: (90000, 120000) 39 | MAX_ITER: 130000 40 | CHECKPOINT_PERIOD: 10000 41 | TEST: 42 | BBOX_AUG: 43 | ENABLED: True 44 | HEUR: "AVG" 45 | H_FLIP: True 46 | SCALES: (480, 576, 688, 864, 1000, 1200) 47 | MAX_SIZE: 2000 48 | SCALE_H_FLIP: True -------------------------------------------------------------------------------- /wetectron/data/datasets/list_dataset.py: -------------------------------------------------------------------------------- 1 | #------------------------------------------------------------------------------ 2 | # Code taken from https://github.com/NVlabs/wetectron 3 | #------------------------------------------------------------------------------ 4 | 5 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 6 | """ 7 | Simple dataset class that wraps a list of path names 8 | """ 9 | 10 | from PIL import Image 11 | 12 | from wetectron.structures.bounding_box import BoxList 13 | 14 | 15 | class ListDataset(object): 16 | def __init__(self, image_lists, transforms=None): 17 | self.image_lists = image_lists 18 | self.transforms = transforms 19 | 20 | def __getitem__(self, item): 21 | img = Image.open(self.image_lists[item]).convert("RGB") 22 | 23 | # dummy target 24 | w, h = img.size 25 | target = BoxList([[0, 0, w, h]], img.size, mode="xyxy") 26 | 27 | if self.transforms is not None: 28 | img, target = self.transforms(img, target) 29 | 30 | return img, target 31 | 32 | def __len__(self): 33 | return len(self.image_lists) 34 | 35 | def get_img_info(self, item): 36 | """ 37 | Return the image dimensions for the image, without 38 | loading and pre-processing it 39 | """ 40 | pass 41 | -------------------------------------------------------------------------------- /wetectron/utils/imports.py: -------------------------------------------------------------------------------- 1 | #------------------------------------------------------------------------------ 2 | # Code taken from https://github.com/NVlabs/wetectron 3 | #------------------------------------------------------------------------------ 4 | 5 | # -------------------------------------------------------- 6 | # Copyright (C) 2020 NVIDIA Corporation. All rights reserved. 7 | # Nvidia Source Code License-NC 8 | # -------------------------------------------------------- 9 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 10 | import torch 11 | 12 | if torch._six.PY3: 13 | import importlib 14 | import importlib.util 15 | import sys 16 | 17 | # from https://stackoverflow.com/questions/67631/how-to-import-a-module-given-the-full-path?utm_medium=organic&utm_source=google_rich_qa&utm_campaign=google_rich_qa 18 | def import_file(module_name, file_path, make_importable=False): 19 | spec = importlib.util.spec_from_file_location(module_name, file_path) 20 | module = importlib.util.module_from_spec(spec) 21 | spec.loader.exec_module(module) 22 | if make_importable: 23 | sys.modules[module_name] = module 24 | return module 25 | else: 26 | import imp 27 | 28 | def import_file(module_name, file_path, make_importable=None): 29 | module = imp.load_source(module_name, file_path) 30 | return module 31 | -------------------------------------------------------------------------------- /configs/coco/V_16_coco17.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://VGGImageNetPretrained/JCJOHNS/VGG-16" 4 | BACKBONE: 5 | CONV_BODY: "VGG16-OICR" 6 | WSOD_ON: True 7 | FASTER_RCNN: False 8 | ROI_HEADS: 9 | SCORE_THRESH: 0.0 10 | NMS: 0.3 11 | ROI_BOX_HEAD: 12 | POOLER_METHOD: "ROIPool" 13 | POOLER_RESOLUTION: 7 14 | POOLER_SCALES: (0.125,) 15 | FEATURE_EXTRACTOR: "VGG16.roi_head" 16 | ROI_WEAK_HEAD: 17 | PREDICTOR: "MISTPredictor" 18 | LOSS: "RoIRegLoss" 19 | OICR_P: 0.15 20 | REGRESS_ON: True 21 | DATASETS: 22 | TRAIN: ("coco_2014_train", "coco_2014_valminusminival") 23 | TEST: ("coco_2014_minival",) 24 | PROPOSAL_FILES: 25 | TRAIN: ('proposal/MCG-coco_2014_train-boxes.pkl', 'proposal/MCG-coco_2014_valminusminival-boxes.pkl') 26 | TEST: ('proposal/MCG-coco_2014_minival-boxes.pkl',) 27 | DATALOADER: 28 | SIZE_DIVISIBILITY: 32 29 | INPUT: 30 | MIN_SIZE_TRAIN: (480, 576, 688, 864, 1000, 1200) 31 | MAX_SIZE_TRAIN: 2000 32 | MIN_SIZE_TEST: 800 33 | MAX_SIZE_TEST: 2000 34 | SOLVER: 35 | IMS_PER_BATCH: 8 36 | BASE_LR: 0.01 37 | WEIGHT_DECAY: 0.0001 38 | STEPS: (90000, 120000) 39 | MAX_ITER: 130000 40 | CHECKPOINT_PERIOD: 10000 41 | TEST: 42 | BBOX_AUG: 43 | ENABLED: True 44 | HEUR: "AVG" 45 | H_FLIP: True 46 | SCALES: (480, 576, 688, 864, 1000, 1200) 47 | MAX_SIZE: 2000 48 | SCALE_H_FLIP: True -------------------------------------------------------------------------------- /wetectron/layers/_utils.py: -------------------------------------------------------------------------------- 1 | #------------------------------------------------------------------------------ 2 | # Code taken from https://github.com/NVlabs/wetectron 3 | #------------------------------------------------------------------------------ 4 | 5 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 6 | import glob 7 | import os.path 8 | import torch 9 | from torch.utils.cpp_extension import load as load_ext 10 | from torch.utils.cpp_extension import CUDA_HOME 11 | 12 | 13 | def _load_C_extensions(): 14 | this_dir = os.path.dirname(os.path.abspath(__file__)) 15 | this_dir = os.path.dirname(this_dir) 16 | this_dir = os.path.join(this_dir, "csrc") 17 | 18 | main_file = glob.glob(os.path.join(this_dir, "*.cpp")) 19 | source_cpu = glob.glob(os.path.join(this_dir, "cpu", "*.cpp")) 20 | source_cuda = glob.glob(os.path.join(this_dir, "cuda", "*.cu")) 21 | 22 | source = main_file + source_cpu 23 | 24 | extra_cflags = [] 25 | if torch.cuda.is_available() and CUDA_HOME is not None: 26 | source.extend(source_cuda) 27 | extra_cflags = ["-DWITH_CUDA"] 28 | source = [os.path.join(this_dir, s) for s in source] 29 | extra_include_paths = [this_dir] 30 | return load_ext( 31 | "torchvision", 32 | source, 33 | extra_cflags=extra_cflags, 34 | extra_include_paths=extra_include_paths, 35 | ) 36 | 37 | 38 | _C = _load_C_extensions() 39 | -------------------------------------------------------------------------------- /configs/coco/V_16_coco14_point.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://VGGImageNetPretrained/JCJOHNS/VGG-16" 4 | BACKBONE: 5 | CONV_BODY: "VGG16-OICR" 6 | WSOD_ON: True 7 | FASTER_RCNN: False 8 | ROI_HEADS: 9 | BATCH_SIZE_PER_IMAGE: 1024 10 | SCORE_THRESH: 0.0 11 | NMS: 0.3 12 | ROI_BOX_HEAD: 13 | POOLER_METHOD: "ROIPool" 14 | POOLER_RESOLUTION: 7 15 | POOLER_SCALES: (0.125,) 16 | FEATURE_EXTRACTOR: "VGG16.roi_head" 17 | ROI_WEAK_HEAD: 18 | PARTIAL_LABELS: 'point' 19 | ROI_LOSS_REFINE: True 20 | PREDICTOR: "MISTPredictor" 21 | LOSS: "RoIRegLoss" 22 | OICR_P: 0.15 23 | REGRESS_ON: True 24 | DATASETS: 25 | TRAIN: ("coco_2014_train_partial", ) 26 | TEST: ("coco_2014_val",) 27 | PROPOSAL_FILES: 28 | TRAIN: ('proposal/MCG-coco_2014_train-boxes.pkl', ) 29 | TEST: ('proposal/MCG-coco_2014_val-boxes.pkl',) 30 | DATALOADER: 31 | SIZE_DIVISIBILITY: 32 32 | INPUT: 33 | MIN_SIZE_TRAIN: (480, 576, 688, 864, 1000, 1200) 34 | MAX_SIZE_TRAIN: 2000 35 | MIN_SIZE_TEST: 800 36 | MAX_SIZE_TEST: 2000 37 | SOLVER: 38 | IMS_PER_BATCH: 8 39 | BASE_LR: 0.01 40 | WEIGHT_DECAY: 0.0001 41 | STEPS: (90000, 120000) 42 | MAX_ITER: 130000 43 | CHECKPOINT_PERIOD: 10000 44 | TEST: 45 | BBOX_AUG: 46 | ENABLED: True 47 | HEUR: "AVG" 48 | H_FLIP: True 49 | SCALES: (480, 576, 688, 864, 1000, 1200) 50 | MAX_SIZE: 2000 51 | SCALE_H_FLIP: True -------------------------------------------------------------------------------- /configs/coco/V_16_coco14_scribble.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://VGGImageNetPretrained/JCJOHNS/VGG-16" 4 | BACKBONE: 5 | CONV_BODY: "VGG16-OICR" 6 | WSOD_ON: True 7 | FASTER_RCNN: False 8 | ROI_HEADS: 9 | BATCH_SIZE_PER_IMAGE: 1024 10 | SCORE_THRESH: 0.0 11 | NMS: 0.3 12 | ROI_BOX_HEAD: 13 | POOLER_METHOD: "ROIPool" 14 | POOLER_RESOLUTION: 7 15 | POOLER_SCALES: (0.125,) 16 | FEATURE_EXTRACTOR: "VGG16.roi_head" 17 | ROI_WEAK_HEAD: 18 | PARTIAL_LABELS: 'scribble' 19 | ROI_LOSS_REFINE: True 20 | PREDICTOR: "MISTPredictor" 21 | LOSS: "RoIRegLoss" 22 | OICR_P: 0.15 23 | REGRESS_ON: True 24 | DATASETS: 25 | TRAIN: ("coco_2014_train_partial", ) 26 | TEST: ("coco_2014_val",) 27 | PROPOSAL_FILES: 28 | TRAIN: ('proposal/MCG-coco_2014_train-boxes.pkl', ) 29 | TEST: ('proposal/MCG-coco_2014_val-boxes.pkl',) 30 | DATALOADER: 31 | SIZE_DIVISIBILITY: 32 32 | INPUT: 33 | MIN_SIZE_TRAIN: (480, 576, 688, 864, 1000, 1200) 34 | MAX_SIZE_TRAIN: 2000 35 | MIN_SIZE_TEST: 800 36 | MAX_SIZE_TEST: 2000 37 | SOLVER: 38 | IMS_PER_BATCH: 8 39 | BASE_LR: 0.01 40 | WEIGHT_DECAY: 0.0001 41 | STEPS: (90000, 120000) 42 | MAX_ITER: 130000 43 | CHECKPOINT_PERIOD: 10000 44 | TEST: 45 | BBOX_AUG: 46 | ENABLED: True 47 | HEUR: "AVG" 48 | H_FLIP: True 49 | SCALES: (480, 576, 688, 864, 1000, 1200) 50 | MAX_SIZE: 2000 51 | SCALE_H_FLIP: True -------------------------------------------------------------------------------- /wetectron/csrc/SigmoidFocalLoss.h: -------------------------------------------------------------------------------- 1 | //------------------------------------------------------------------------------ 2 | // Code taken from https://github.com/NVlabs/wetectron 3 | //------------------------------------------------------------------------------ 4 | 5 | #pragma once 6 | 7 | #include "cpu/vision.h" 8 | 9 | #ifdef WITH_CUDA 10 | #include "cuda/vision.h" 11 | #endif 12 | 13 | // Interface for Python 14 | at::Tensor SigmoidFocalLoss_forward( 15 | const at::Tensor& logits, 16 | const at::Tensor& targets, 17 | const int num_classes, 18 | const float gamma, 19 | const float alpha) { 20 | if (logits.is_cuda()) { 21 | #ifdef WITH_CUDA 22 | return SigmoidFocalLoss_forward_cuda(logits, targets, num_classes, gamma, alpha); 23 | #else 24 | AT_ERROR("Not compiled with GPU support"); 25 | #endif 26 | } 27 | AT_ERROR("Not implemented on the CPU"); 28 | } 29 | 30 | at::Tensor SigmoidFocalLoss_backward( 31 | const at::Tensor& logits, 32 | const at::Tensor& targets, 33 | const at::Tensor& d_losses, 34 | const int num_classes, 35 | const float gamma, 36 | const float alpha) { 37 | if (logits.is_cuda()) { 38 | #ifdef WITH_CUDA 39 | return SigmoidFocalLoss_backward_cuda(logits, targets, d_losses, num_classes, gamma, alpha); 40 | #else 41 | AT_ERROR("Not compiled with GPU support"); 42 | #endif 43 | } 44 | AT_ERROR("Not implemented on the CPU"); 45 | } 46 | -------------------------------------------------------------------------------- /wetectron/layers/batch_norm.py: -------------------------------------------------------------------------------- 1 | #------------------------------------------------------------------------------ 2 | # Code taken from https://github.com/NVlabs/wetectron 3 | #------------------------------------------------------------------------------ 4 | 5 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 6 | import torch 7 | from torch import nn 8 | 9 | 10 | class FrozenBatchNorm2d(nn.Module): 11 | """ 12 | BatchNorm2d where the batch statistics and the affine parameters 13 | are fixed 14 | """ 15 | 16 | def __init__(self, n): 17 | super(FrozenBatchNorm2d, self).__init__() 18 | self.register_buffer("weight", torch.ones(n)) 19 | self.register_buffer("bias", torch.zeros(n)) 20 | self.register_buffer("running_mean", torch.zeros(n)) 21 | self.register_buffer("running_var", torch.ones(n)) 22 | 23 | def forward(self, x): 24 | # Cast all fixed parameters to half() if necessary 25 | if x.dtype == torch.float16: 26 | self.weight = self.weight.half() 27 | self.bias = self.bias.half() 28 | self.running_mean = self.running_mean.half() 29 | self.running_var = self.running_var.half() 30 | 31 | scale = self.weight * self.running_var.rsqrt() 32 | bias = self.bias - self.running_mean * scale 33 | scale = scale.reshape(1, -1, 1, 1) 34 | bias = bias.reshape(1, -1, 1, 1) 35 | return x * scale + bias 36 | -------------------------------------------------------------------------------- /inference_scripts/run_inference_coco14_val.sh: -------------------------------------------------------------------------------- 1 | #------------------------------------------------------------------------------ 2 | # Code developed by Huy V. Vo and Oriane Simeoni 3 | # INRIA, Valeo.ai 4 | #------------------------------------------------------------------------------ 5 | 6 | source ~/.bashrc 7 | conda deactivate 8 | conda activate bib 9 | 10 | echo "$(which conda)" 11 | echo "$(which python)" 12 | 13 | if [ -z "$3" ] 14 | then 15 | num_gpus=1 16 | else 17 | num_gpus="$3" 18 | fi 19 | 20 | if [ -z "$4" ] 21 | then 22 | enable_aug="False" 23 | else 24 | enable_aug="$4" 25 | fi 26 | 27 | if [ -z "$5" ] 28 | then 29 | test_per_gpu=2 30 | else 31 | test_per_gpu="$5" 32 | fi 33 | 34 | if [ -z "$6" ] 35 | then 36 | PORT=82124 37 | else 38 | PORT="$6" 39 | fi 40 | 41 | exp_name="$1" 42 | model_name="$2" 43 | echo "$exp_name" "$model_name" "$num_gpus" "$enable_aug" "$test_per_gpu" "$PORT" 44 | 45 | python -m torch.distributed.launch --nproc_per_node=$num_gpus --master_port=$PORT tools/test_net.py \ 46 | --config-file "$exp_name"/config.yml \ 47 | MODEL.WEIGHT "$exp_name"/ckpt/"$model_name" \ 48 | OUTPUT_DIR "$exp_name" \ 49 | TEST.IMS_PER_BATCH $[2*$num_gpus] \ 50 | TEST.BBOX_AUG.ENABLED "$enable_aug" \ 51 | TEST.CONCAT_DATASETS True \ 52 | DATASETS.TEST '("coco_2014_val", )' \ 53 | PROPOSAL_FILES.TEST '("proposal/MCG-coco_2014_val-boxes.pkl", )' 54 | -------------------------------------------------------------------------------- /inference_scripts/run_inference_voc07_test.sh: -------------------------------------------------------------------------------- 1 | #------------------------------------------------------------------------------ 2 | # Code developed by Huy V. Vo and Oriane Simeoni 3 | # INRIA, Valeo.ai 4 | #------------------------------------------------------------------------------ 5 | 6 | source ~/.bashrc 7 | conda deactivate 8 | conda activate bib 9 | 10 | echo "$(which conda)" 11 | echo "$(which python)" 12 | 13 | if [ -z "$3" ] 14 | then 15 | num_gpus=1 16 | else 17 | num_gpus="$3" 18 | fi 19 | 20 | if [ -z "$4" ] 21 | then 22 | enable_aug="False" 23 | else 24 | enable_aug="$4" 25 | fi 26 | 27 | if [ -z "$5" ] 28 | then 29 | test_per_gpu=2 30 | else 31 | test_per_gpu="$5" 32 | fi 33 | 34 | if [ -z "$6" ] 35 | then 36 | PORT=82124 37 | else 38 | PORT="$6" 39 | fi 40 | 41 | exp_name="$1" 42 | model_name="$2" 43 | echo "$exp_name" "$model_name" "$num_gpus" "$enable_aug" "$test_per_gpu" "$PORT" 44 | 45 | python -m torch.distributed.launch --nproc_per_node=$num_gpus --master_port=$PORT tools/test_net.py \ 46 | --config-file "$exp_name"/config.yml \ 47 | MODEL.WEIGHT "$exp_name"/ckpt/"$model_name" \ 48 | OUTPUT_DIR "$exp_name" \ 49 | TEST.IMS_PER_BATCH $[$test_per_gpu*$num_gpus] \ 50 | TEST.BBOX_AUG.ENABLED "$enable_aug" \ 51 | TEST.CONCAT_DATASETS "True" \ 52 | DATASETS.TEST '("voc_2007_test",)' \ 53 | PROPOSAL_FILES.TEST '("proposal/SS-voc_2007_test-boxes.pkl",)' \ 54 | ACTIVE.INPUT_FILE None 55 | -------------------------------------------------------------------------------- /wetectron/data/datasets/evaluation/__init__.py: -------------------------------------------------------------------------------- 1 | #------------------------------------------------------------------------------ 2 | # Code adapted from https://github.com/NVlabs/wetectron 3 | # by Huy V. Vo and Oriane Simeoni 4 | # INRIA, Valeo.ai 5 | #------------------------------------------------------------------------------ 6 | 7 | from wetectron.data import datasets 8 | from .coco import coco_evaluation 9 | from .voc import voc_evaluation 10 | 11 | def evaluate(dataset, predictions, output_folder, task='det', **kwargs): 12 | """ 13 | Evaluate dataset using different methods based on dataset type. 14 | Args: 15 | dataset: Dataset object 16 | predictions(list[BoxList]): each item in the list represents the 17 | prediction results for one image. 18 | output_folder: output folder, to save evaluation files or results. 19 | **kwargs: other args. 20 | Returns: 21 | evaluation result 22 | """ 23 | args = dict( 24 | dataset=dataset, predictions=predictions, output_folder=output_folder, **kwargs 25 | ) 26 | if datasets._isinstance(dataset, datasets.COCODataset) and "voc_2012" not in dataset.ann_file: 27 | return coco_evaluation(**args) 28 | elif datasets._isinstance(dataset, datasets.PascalVOCDataset) or "voc_2012" in dataset.ann_file: 29 | args['task'] = task 30 | return voc_evaluation(**args) 31 | else: 32 | dataset_name = dataset.__class__.__name__ 33 | raise NotImplementedError("Unsupported dataset type {}.".format(dataset_name)) 34 | -------------------------------------------------------------------------------- /wetectron/data/samplers/iteration_based_batch_sampler.py: -------------------------------------------------------------------------------- 1 | #------------------------------------------------------------------------------ 2 | # Code taken from https://github.com/NVlabs/wetectron 3 | #------------------------------------------------------------------------------ 4 | 5 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 6 | from torch.utils.data.sampler import BatchSampler 7 | 8 | 9 | class IterationBasedBatchSampler(BatchSampler): 10 | """ 11 | Wraps a BatchSampler, resampling from it until 12 | a specified number of iterations have been sampled 13 | """ 14 | 15 | def __init__(self, batch_sampler, num_iterations, start_iter=0): 16 | self.batch_sampler = batch_sampler 17 | self.num_iterations = num_iterations 18 | self.start_iter = start_iter 19 | 20 | def __iter__(self): 21 | iteration = self.start_iter 22 | while iteration <= self.num_iterations: 23 | # if the underlying sampler has a set_epoch method, like 24 | # DistributedSampler, used for making each process see 25 | # a different split of the dataset, then set it 26 | if hasattr(self.batch_sampler.sampler, "set_epoch"): 27 | self.batch_sampler.sampler.set_epoch(iteration) 28 | for batch in self.batch_sampler: 29 | iteration += 1 30 | if iteration > self.num_iterations: 31 | break 32 | yield batch 33 | 34 | def __len__(self): 35 | return self.num_iterations 36 | -------------------------------------------------------------------------------- /summarize_performance.sh: -------------------------------------------------------------------------------- 1 | # Read arguments 2 | 3 | DATASET="$1" # 'voc07' or 'coco14' 4 | OUTPUT_DIR="$2" 5 | NUM_CYCLE="$3" 6 | BUDGET="$4" 7 | 8 | echo "Average and standard deviation of the performance on "$OUTPUT_DIR":" 9 | if [[ "$DATASET" == "voc07" ]] 10 | then 11 | for cycle in $(seq 1 $NUM_CYCLE) 12 | do 13 | echo -n "Cycle $cycle: " 14 | find $OUTPUT_DIR -wholename "*ver*_"$((cycle * BUDGET))"_images/inference*test*/result.txt" -not -wholename "*bbox_aug/result*" \ 15 | -exec head -1 {} \; | awk '{m+=$2;n+=(($2)^2)}END{printf "%.1f +/- %.1f\n", 100*m/NR, 100*sqrt(n/(NR) - ((m/NR)^2) ) }' 16 | done 17 | elif [[ "$DATASET" == "coco14" ]] 18 | then 19 | echo "Cycle AP AP50" 20 | for cycle in $(seq 1 $NUM_CYCLE) 21 | do 22 | echo -n "$cycle" " " 23 | for f in $(find $OUTPUT_DIR -wholename "*cycle"$cycle"*val*/coco_results*" -not -wholename "*bbox_aug/coco*") 24 | do 25 | python -c "import torch; a = torch.load('"$f"'); print(a.results['bbox']['AP']*100)" 26 | done | awk '{m+=$1;n+=(($1)^2 )}END{printf "%.1f +/- %.1f %-3s", m/NR, sqrt(n/NR - (m/NR)^2 ), " " }' 27 | 28 | for f in $(find $OUTPUT_DIR -wholename "*cycle"$cycle"*val*/coco_results*" -not -wholename "*bbox_aug/coco*") 29 | do 30 | python -c "import torch; a = torch.load('"$f"'); print(a.results['bbox']['AP50']*100)" 31 | done | awk '{m+=$1;n+=(($1)^2 )}END{printf "%.1f +/- %.1f\n", m/NR, sqrt(n/NR - (m/NR)^2 ) }' 32 | done 33 | else 34 | exit "DATASET="$DATASET" is not supported!" 35 | exit 1 36 | fi -------------------------------------------------------------------------------- /configs/voc/V_16_voc07.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://VGGImageNetPretrained/JCJOHNS/VGG-16" 4 | BACKBONE: 5 | CONV_BODY: "VGG16-OICR" 6 | WSOD_ON: True 7 | FASTER_RCNN: False 8 | ROI_HEADS: 9 | SCORE_THRESH: 0.0 10 | NMS: 0.3 11 | ROI_BOX_HEAD: 12 | NUM_CLASSES: 21 13 | POOLER_METHOD: "ROIPool" 14 | POOLER_RESOLUTION: 7 15 | POOLER_SCALES: (0.125,) 16 | FEATURE_EXTRACTOR: "VGG16.roi_head" 17 | ROI_WEAK_HEAD: 18 | PREDICTOR: "MISTPredictor" 19 | LOSS: "RoIRegLoss" 20 | OICR_P: 0.15 21 | REGRESS_ON: True 22 | DB: 23 | METHOD: 'concrete' 24 | DATASETS: 25 | TRAIN: ("voc_2007_train", "voc_2007_val") 26 | TEST: ("voc_2007_test",) 27 | PROPOSAL_FILES: 28 | TRAIN: ('proposal/SS-voc_2007_train-boxes.pkl','proposal/SS-voc_2007_val-boxes.pkl') 29 | TEST: ('proposal/SS-voc_2007_test-boxes.pkl',) 30 | DATALOADER: 31 | SIZE_DIVISIBILITY: 32 32 | INPUT: 33 | MIN_SIZE_TRAIN: (480, 576, 688, 864, 1000, 1200) 34 | MAX_SIZE_TRAIN: 2000 35 | MIN_SIZE_TEST: 800 36 | MAX_SIZE_TEST: 2000 37 | SOLVER: 38 | IMS_PER_BATCH: 8 39 | BASE_LR: 0.01 40 | WEIGHT_DECAY: 0.0001 41 | WARMUP_ITERS: 200 42 | STEPS: (20000, 26700) 43 | MAX_ITER: 30000 44 | CHECKPOINT_PERIOD: 1000 45 | SOLVER_CDB: 46 | BASE_LR: 0.0001 47 | GAMMA: 3.0 48 | WEIGHT_DECAY: 0.0001 49 | STEPS: (20000, 26700) 50 | TEST: 51 | BBOX_AUG: 52 | ENABLED: True 53 | HEUR: "AVG" 54 | H_FLIP: True 55 | SCALES: (480, 576, 688, 864, 1000, 1200) 56 | MAX_SIZE: 2000 57 | SCALE_H_FLIP: True 58 | SEED: 1234 -------------------------------------------------------------------------------- /configs/voc/V_16_voc12.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://VGGImageNetPretrained/JCJOHNS/VGG-16" 4 | BACKBONE: 5 | CONV_BODY: "VGG16-OICR" 6 | WSOD_ON: True 7 | FASTER_RCNN: False 8 | ROI_HEADS: 9 | SCORE_THRESH: 0.0 10 | NMS: 0.3 11 | ROI_BOX_HEAD: 12 | NUM_CLASSES: 21 13 | POOLER_METHOD: "ROIPool" 14 | POOLER_RESOLUTION: 7 15 | POOLER_SCALES: (0.125,) 16 | FEATURE_EXTRACTOR: "VGG16.roi_head" 17 | ROI_WEAK_HEAD: 18 | PREDICTOR: "MISTPredictor" 19 | LOSS: "RoIRegLoss" 20 | OICR_P: 0.15 21 | REGRESS_ON: True 22 | DB: 23 | METHOD: 'concrete' 24 | DATASETS: 25 | TRAIN: ("voc_2012_train", "voc_2012_val") 26 | TEST: ("voc_2007_test",) 27 | PROPOSAL_FILES: 28 | TRAIN: ('proposal/SS-voc_2012_train-boxes.pkl','proposal/SS-voc_2012_val-boxes.pkl') 29 | TEST: ('proposal/SS-voc_2007_test-boxes.pkl',) 30 | DATALOADER: 31 | SIZE_DIVISIBILITY: 32 32 | INPUT: 33 | MIN_SIZE_TRAIN: (480, 576, 688, 864, 1000, 1200) 34 | MAX_SIZE_TRAIN: 2000 35 | MIN_SIZE_TEST: 800 36 | MAX_SIZE_TEST: 2000 37 | SOLVER: 38 | IMS_PER_BATCH: 16 39 | BASE_LR: 0.02 40 | WEIGHT_DECAY: 0.0001 41 | WARMUP_ITERS: 200 42 | STEPS: (20000, 26700) 43 | MAX_ITER: 30000 44 | CHECKPOINT_PERIOD: 1000 45 | SOLVER_CDB: 46 | BASE_LR: 0.0001 47 | GAMMA: 3.0 48 | WEIGHT_DECAY: 0.0001 49 | STEPS: (20000, 26700) 50 | TEST: 51 | BBOX_AUG: 52 | ENABLED: True 53 | HEUR: "AVG" 54 | H_FLIP: True 55 | SCALES: (480, 576, 688, 864, 1000, 1200) 56 | MAX_SIZE: 2000 57 | SCALE_H_FLIP: True 58 | SEED: 1234 -------------------------------------------------------------------------------- /inference_scripts/run_inference_coco14_train.sh: -------------------------------------------------------------------------------- 1 | #------------------------------------------------------------------------------ 2 | # Code developed by Huy V. Vo and Oriane Simeoni 3 | # INRIA, Valeo.ai 4 | #------------------------------------------------------------------------------ 5 | 6 | source ~/.bashrc 7 | conda deactivate 8 | conda activate bib 9 | 10 | echo "$(which conda)" 11 | echo "$(which python)" 12 | 13 | if [ -z "$3" ] 14 | then 15 | num_gpus=1 16 | else 17 | num_gpus="$3" 18 | fi 19 | 20 | if [ -z "$4" ] 21 | then 22 | enable_aug="False" 23 | else 24 | enable_aug="$4" 25 | fi 26 | 27 | if [ -z "$5" ] 28 | then 29 | test_per_gpu=2 30 | else 31 | test_per_gpu="$5" 32 | fi 33 | 34 | if [ -z "$6" ] 35 | then 36 | PORT=82124 37 | else 38 | PORT="$6" 39 | fi 40 | 41 | exp_name="$1" 42 | model_name="$2" 43 | echo "$exp_name" "$model_name" "$num_gpus" "$enable_aug" "$test_per_gpu" "$PORT" 44 | 45 | python -m torch.distributed.launch --nproc_per_node=$num_gpus --master_port=$PORT tools/test_net.py \ 46 | --config-file "$exp_name"/config.yml \ 47 | MODEL.WEIGHT "$exp_name"/ckpt/"$model_name" \ 48 | OUTPUT_DIR "$exp_name" \ 49 | TEST.IMS_PER_BATCH $[2*$num_gpus] \ 50 | TEST.BBOX_AUG.ENABLED "$enable_aug" \ 51 | TEST.RETURN_LOSS True \ 52 | TEST.CONCAT_DATASETS True \ 53 | DATASETS.TEST '("coco_2014_train", )' \ 54 | TEST.REMOVE_IMAGES_WITHOUT_ANNOTATIONS True \ 55 | TEST.RUN_EVALUATION False \ 56 | PROPOSAL_FILES.TEST '("proposal/MCG-coco_2014_train-boxes.pkl", )' 57 | -------------------------------------------------------------------------------- /inference_scripts/run_inference_voc07_trainval.sh: -------------------------------------------------------------------------------- 1 | #------------------------------------------------------------------------------ 2 | # Code developed by Huy V. Vo and Oriane Simeoni 3 | # INRIA, Valeo.ai 4 | #------------------------------------------------------------------------------ 5 | 6 | source ~/.bashrc 7 | conda deactivate 8 | conda activate bib 9 | 10 | echo "$(which conda)" 11 | echo "$(which python)" 12 | 13 | if [ -z "$3" ] 14 | then 15 | num_gpus=1 16 | else 17 | num_gpus="$3" 18 | fi 19 | 20 | if [ -z "$4" ] 21 | then 22 | enable_aug="False" 23 | else 24 | enable_aug="$4" 25 | fi 26 | 27 | if [ -z "$5" ] 28 | then 29 | test_per_gpu=2 30 | else 31 | test_per_gpu="$5" 32 | fi 33 | 34 | if [ -z "$6" ] 35 | then 36 | PORT=82124 37 | else 38 | PORT="$6" 39 | fi 40 | 41 | exp_name="$1" 42 | model_name="$2" 43 | echo "$exp_name" "$model_name" "$num_gpus" "$enable_aug" "$test_per_gpu" "$PORT" 44 | 45 | python -m torch.distributed.launch --nproc_per_node=$num_gpus --master_port=$PORT tools/test_net.py \ 46 | --config-file "$exp_name"/config.yml \ 47 | MODEL.WEIGHT "$exp_name"/ckpt/"$model_name" \ 48 | OUTPUT_DIR "$exp_name" \ 49 | TEST.IMS_PER_BATCH $[2*$num_gpus] \ 50 | TEST.BBOX_AUG.ENABLED "$enable_aug" \ 51 | TEST.CONCAT_DATASETS "True" \ 52 | TEST.RETURN_LOSS "True" \ 53 | DATASETS.TEST '("voc_2007_train", "voc_2007_val")' \ 54 | TEST.REMOVE_IMAGES_WITHOUT_ANNOTATIONS True \ 55 | PROPOSAL_FILES.TEST '("proposal/SS-voc_2007_train-boxes.pkl", "proposal/SS-voc_2007_val-boxes.pkl")' 56 | -------------------------------------------------------------------------------- /wetectron/data/collate_batch.py: -------------------------------------------------------------------------------- 1 | #------------------------------------------------------------------------------ 2 | # Code taken from https://github.com/NVlabs/wetectron 3 | #------------------------------------------------------------------------------ 4 | 5 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 6 | from wetectron.structures.image_list import to_image_list 7 | 8 | 9 | class BatchCollator(object): 10 | """ 11 | From a list of samples from the dataset, 12 | returns the batched images and targets. 13 | This should be passed to the DataLoader 14 | """ 15 | 16 | def __init__(self, size_divisible=0): 17 | self.size_divisible = size_divisible 18 | 19 | def __call__(self, batch): 20 | transposed_batch = list(zip(*batch)) 21 | images = to_image_list(transposed_batch[0], self.size_divisible) 22 | targets = transposed_batch[1] 23 | if len(transposed_batch) == 3: 24 | img_ids = transposed_batch[2] 25 | return images, targets, img_ids 26 | elif len(transposed_batch) == 4: 27 | rois = transposed_batch[2] 28 | img_ids = transposed_batch[3] 29 | return images, targets, rois, img_ids 30 | else: 31 | raise ValueError('wrong item') 32 | 33 | 34 | class BBoxAugCollator(object): 35 | """ 36 | From a list of samples from the dataset, 37 | returns the images and targets. 38 | Images should be converted to batched images in `im_detect_bbox_aug` 39 | """ 40 | 41 | def __call__(self, batch): 42 | return list(zip(*batch)) 43 | 44 | -------------------------------------------------------------------------------- /wetectron/data/datasets/evaluation/voc/__init__.py: -------------------------------------------------------------------------------- 1 | #------------------------------------------------------------------------------ 2 | # Code adapted from https://github.com/NVlabs/wetectron 3 | # by Huy V. Vo and Oriane Simeoni 4 | # INRIA, Valeo.ai 5 | #------------------------------------------------------------------------------ 6 | 7 | import logging 8 | 9 | from .voc_eval import do_voc_evaluation 10 | from .voc_eval import calc_detection_voc_prec_rec 11 | from .voc_eval_old import do_loc_evaluation 12 | from .voc_eval_old import do_voc_evaluation as do_voc_evaluation_old 13 | 14 | def voc_evaluation(dataset, predictions, output_folder, box_only, task='det', **_): 15 | logger = logging.getLogger("maskrcnn_benchmark.inference") 16 | if box_only: 17 | logger.warning("voc evaluation doesn't support box_only, ignored.") 18 | logger.info("performing voc evaluation, ignored iou_types.") 19 | if task == 'det': 20 | return do_voc_evaluation( 21 | dataset=dataset, 22 | predictions=predictions, 23 | output_folder=output_folder, 24 | logger=logger, 25 | ) 26 | if task == 'det_old': 27 | return do_voc_evaluation_old( 28 | dataset=dataset, 29 | predictions=predictions, 30 | output_folder=output_folder, 31 | logger=logger, 32 | ) 33 | elif task == 'corloc': 34 | return do_loc_evaluation( 35 | dataset=dataset, 36 | predictions=predictions, 37 | output_folder=output_folder, 38 | logger=logger, 39 | ) 40 | else: 41 | raise ValueError 42 | -------------------------------------------------------------------------------- /configs/coco/V_16_coco14_active.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://VGGImageNetPretrained/JCJOHNS/VGG-16" 4 | BACKBONE: 5 | CONV_BODY: "VGG16-OICR" 6 | WSOD_ON: True 7 | FASTER_RCNN: False 8 | ROI_HEADS: 9 | SCORE_THRESH: 0.0 10 | NMS: 0.3 11 | BG_IOU_THRESHOLD: 0.3 12 | ROI_BOX_HEAD: 13 | POOLER_METHOD: "ROIPool" 14 | POOLER_RESOLUTION: 7 15 | POOLER_SCALES: (0.125,) 16 | FEATURE_EXTRACTOR: "VGG16.roi_head" 17 | ROI_WEAK_HEAD: 18 | PREDICTOR: "MISTPredictor" 19 | LOSS: "RoIRegLoss" 20 | OICR_P: 0.15 21 | REGRESS_ON: True 22 | ACTIVE_LOSS: "RoIRegActiveLoss" 23 | ACTIVE_LOSS_WEIGHT: 1.0 24 | DATASETS: 25 | TRAIN: ("coco_2014_train",) 26 | TEST: ("coco_2014_val",) 27 | PROPOSAL_FILES: 28 | TRAIN: ('proposal/MCG-coco_2014_train-boxes.pkl',) 29 | TEST: ('proposal/MCG-coco_2014_val-boxes.pkl',) 30 | DATALOADER: 31 | SIZE_DIVISIBILITY: 32 32 | INPUT: 33 | MIN_SIZE_TRAIN: (480, 576, 688, 864, 1000, 1200) 34 | MAX_SIZE_TRAIN: 2000 35 | MIN_SIZE_TEST: 800 36 | MAX_SIZE_TEST: 2000 37 | SOLVER: 38 | IMS_PER_BATCH: 8 39 | BASE_LR: 0.01 40 | WEIGHT_DECAY: 0.0001 41 | STEPS: (90000, 120000) 42 | MAX_ITER: 130000 43 | CHECKPOINT_PERIOD: 10000 44 | TEST: 45 | BBOX_AUG: 46 | ENABLED: True 47 | HEUR: "AVG" 48 | H_FLIP: True 49 | SCALES: (480, 576, 688, 864, 1000, 1200) 50 | MAX_SIZE: 2000 51 | SCALE_H_FLIP: True 52 | SEED: 1234 53 | ACTIVE: 54 | INPUT_FILE: None 55 | PSEUDO_BOXES_FILE: None 56 | LOSS: 57 | STRONG_LOSS_ON_MIL: False 58 | IMG_STRONG_DET_WEIGHT: 0.0 59 | WEIGHTS_BBX_LOSS: 1.0 60 | WEIGHTED_SAMPLING: True -------------------------------------------------------------------------------- /wetectron/modeling/roi_heads/keypoint_head/roi_keypoint_predictors.py: -------------------------------------------------------------------------------- 1 | #------------------------------------------------------------------------------ 2 | # Code taken from https://github.com/NVlabs/wetectron 3 | #------------------------------------------------------------------------------ 4 | 5 | from torch import nn 6 | 7 | from wetectron import layers 8 | from wetectron.modeling import registry 9 | 10 | 11 | @registry.ROI_KEYPOINT_PREDICTOR.register("KeypointRCNNPredictor") 12 | class KeypointRCNNPredictor(nn.Module): 13 | def __init__(self, cfg, in_channels): 14 | super(KeypointRCNNPredictor, self).__init__() 15 | input_features = in_channels 16 | num_keypoints = cfg.MODEL.ROI_KEYPOINT_HEAD.NUM_CLASSES 17 | deconv_kernel = 4 18 | self.kps_score_lowres = layers.ConvTranspose2d( 19 | input_features, 20 | num_keypoints, 21 | deconv_kernel, 22 | stride=2, 23 | padding=deconv_kernel // 2 - 1, 24 | ) 25 | nn.init.kaiming_normal_( 26 | self.kps_score_lowres.weight, mode="fan_out", nonlinearity="relu" 27 | ) 28 | nn.init.constant_(self.kps_score_lowres.bias, 0) 29 | self.up_scale = 2 30 | self.out_channels = num_keypoints 31 | 32 | def forward(self, x): 33 | x = self.kps_score_lowres(x) 34 | x = layers.interpolate( 35 | x, scale_factor=self.up_scale, mode="bilinear", align_corners=False 36 | ) 37 | return x 38 | 39 | 40 | def make_roi_keypoint_predictor(cfg, in_channels): 41 | func = registry.ROI_KEYPOINT_PREDICTOR[cfg.MODEL.ROI_KEYPOINT_HEAD.PREDICTOR] 42 | return func(cfg, in_channels) 43 | -------------------------------------------------------------------------------- /configs/voc/V_16_voc0712.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://VGGImageNetPretrained/JCJOHNS/VGG-16" 4 | BACKBONE: 5 | CONV_BODY: "VGG16-OICR" 6 | WSOD_ON: True 7 | FASTER_RCNN: False 8 | ROI_HEADS: 9 | SCORE_THRESH: 0.0 10 | NMS: 0.3 11 | ROI_BOX_HEAD: 12 | NUM_CLASSES: 21 13 | POOLER_METHOD: "ROIPool" 14 | POOLER_RESOLUTION: 7 15 | POOLER_SCALES: (0.125,) 16 | FEATURE_EXTRACTOR: "VGG16.roi_head" 17 | ROI_WEAK_HEAD: 18 | PREDICTOR: "MISTPredictor" 19 | LOSS: "RoIRegLoss" 20 | OICR_P: 0.15 21 | REGRESS_ON: True 22 | DB: 23 | METHOD: 'concrete' 24 | DATASETS: 25 | TRAIN: ("voc_2007_train", "voc_2007_val", "voc_2012_train", "voc_2012_val") 26 | TEST: ("voc_2007_test",) 27 | PROPOSAL_FILES: 28 | TRAIN: ('proposal/SS-voc_2007_train-boxes.pkl','proposal/SS-voc_2007_val-boxes.pkl', 'proposal/SS-voc_2012_train-boxes.pkl','proposal/SS-voc_2012_val-boxes.pkl') 29 | TEST: ('proposal/SS-voc_2007_test-boxes.pkl',) 30 | DATALOADER: 31 | SIZE_DIVISIBILITY: 32 32 | INPUT: 33 | MIN_SIZE_TRAIN: (480, 576, 688, 864, 1000, 1200) 34 | MAX_SIZE_TRAIN: 2000 35 | MIN_SIZE_TEST: 800 36 | MAX_SIZE_TEST: 2000 37 | SOLVER: 38 | IMS_PER_BATCH: 16 39 | BASE_LR: 0.02 40 | WEIGHT_DECAY: 0.0001 41 | WARMUP_ITERS: 200 42 | STEPS: (30000, 35000) 43 | MAX_ITER: 40000 44 | CHECKPOINT_PERIOD: 1000 45 | SOLVER_CDB: 46 | BASE_LR: 0.0001 47 | GAMMA: 3.0 48 | WEIGHT_DECAY: 0.0001 49 | STEPS: (30000, 35000) 50 | TEST: 51 | BBOX_AUG: 52 | ENABLED: True 53 | HEUR: "AVG" 54 | H_FLIP: True 55 | SCALES: (480, 576, 688, 864, 1000, 1200) 56 | MAX_SIZE: 2000 57 | SCALE_H_FLIP: True 58 | SEED: 1234 -------------------------------------------------------------------------------- /wetectron/layers/__init__.py: -------------------------------------------------------------------------------- 1 | #------------------------------------------------------------------------------ 2 | # Code taken from https://github.com/NVlabs/wetectron 3 | #------------------------------------------------------------------------------ 4 | 5 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 6 | import torch 7 | 8 | from .batch_norm import FrozenBatchNorm2d 9 | from .misc import Conv2d 10 | from .misc import DFConv2d 11 | from .misc import ConvTranspose2d 12 | from .misc import BatchNorm2d 13 | from .misc import interpolate 14 | from .nms import nms 15 | from .roi_align import ROIAlign 16 | from .roi_align import roi_align 17 | from .roi_pool import ROIPool 18 | from .roi_pool import roi_pool 19 | from .smooth_l1_loss import smooth_l1_loss 20 | from .sigmoid_focal_loss import SigmoidFocalLoss 21 | from .dcn.deform_conv_func import deform_conv, modulated_deform_conv 22 | from .dcn.deform_conv_module import DeformConv, ModulatedDeformConv, ModulatedDeformConvPack 23 | from .dcn.deform_pool_func import deform_roi_pooling 24 | from .dcn.deform_pool_module import DeformRoIPooling, DeformRoIPoolingPack, ModulatedDeformRoIPoolingPack 25 | 26 | 27 | __all__ = [ 28 | "nms", 29 | "roi_align", 30 | "ROIAlign", 31 | "roi_pool", 32 | "ROIPool", 33 | "smooth_l1_loss", 34 | "Conv2d", 35 | "DFConv2d", 36 | "ConvTranspose2d", 37 | "interpolate", 38 | "BatchNorm2d", 39 | "FrozenBatchNorm2d", 40 | "SigmoidFocalLoss", 41 | 'deform_conv', 42 | 'modulated_deform_conv', 43 | 'DeformConv', 44 | 'ModulatedDeformConv', 45 | 'ModulatedDeformConvPack', 46 | 'deform_roi_pooling', 47 | 'DeformRoIPooling', 48 | 'DeformRoIPoolingPack', 49 | 'ModulatedDeformRoIPoolingPack', 50 | ] 51 | 52 | -------------------------------------------------------------------------------- /wetectron/utils/timer.py: -------------------------------------------------------------------------------- 1 | #------------------------------------------------------------------------------ 2 | # Code taken from https://github.com/NVlabs/wetectron 3 | #------------------------------------------------------------------------------ 4 | 5 | # -------------------------------------------------------- 6 | # Copyright (C) 2020 NVIDIA Corporation. All rights reserved. 7 | # Nvidia Source Code License-NC 8 | # -------------------------------------------------------- 9 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 10 | import time 11 | import datetime 12 | 13 | 14 | class Timer(object): 15 | def __init__(self): 16 | self.reset() 17 | 18 | @property 19 | def average_time(self): 20 | return self.total_time / self.calls if self.calls > 0 else 0.0 21 | 22 | def tic(self): 23 | # using time.time instead of time.clock because time time.clock 24 | # does not normalize for multithreading 25 | self.start_time = time.time() 26 | 27 | def toc(self, average=True): 28 | self.add(time.time() - self.start_time) 29 | if average: 30 | return self.average_time 31 | else: 32 | return self.diff 33 | 34 | def add(self, time_diff): 35 | self.diff = time_diff 36 | self.total_time += self.diff 37 | self.calls += 1 38 | 39 | def reset(self): 40 | self.total_time = 0.0 41 | self.calls = 0 42 | self.start_time = 0.0 43 | self.diff = 0.0 44 | 45 | def avg_time_str(self): 46 | time_str = str(datetime.timedelta(seconds=self.average_time)) 47 | return time_str 48 | 49 | 50 | def get_time_str(time_diff): 51 | time_str = str(datetime.timedelta(seconds=time_diff)) 52 | return time_str 53 | -------------------------------------------------------------------------------- /wetectron/csrc/vision.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------------------------------------ 2 | // Code taken from https://github.com/NVlabs/wetectron 3 | //------------------------------------------------------------------------------ 4 | 5 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 6 | #include "nms.h" 7 | #include "ROIAlign.h" 8 | #include "ROIPool.h" 9 | #include "SigmoidFocalLoss.h" 10 | #include "deform_conv.h" 11 | #include "deform_pool.h" 12 | 13 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 14 | m.def("nms", &nms, "non-maximum suppression"); 15 | m.def("roi_align_forward", &ROIAlign_forward, "ROIAlign_forward"); 16 | m.def("roi_align_backward", &ROIAlign_backward, "ROIAlign_backward"); 17 | m.def("roi_pool_forward", &ROIPool_forward, "ROIPool_forward"); 18 | m.def("roi_pool_backward", &ROIPool_backward, "ROIPool_backward"); 19 | m.def("sigmoid_focalloss_forward", &SigmoidFocalLoss_forward, "SigmoidFocalLoss_forward"); 20 | m.def("sigmoid_focalloss_backward", &SigmoidFocalLoss_backward, "SigmoidFocalLoss_backward"); 21 | // dcn-v2 22 | m.def("deform_conv_forward", &deform_conv_forward, "deform_conv_forward"); 23 | m.def("deform_conv_backward_input", &deform_conv_backward_input, "deform_conv_backward_input"); 24 | m.def("deform_conv_backward_parameters", &deform_conv_backward_parameters, "deform_conv_backward_parameters"); 25 | m.def("modulated_deform_conv_forward", &modulated_deform_conv_forward, "modulated_deform_conv_forward"); 26 | m.def("modulated_deform_conv_backward", &modulated_deform_conv_backward, "modulated_deform_conv_backward"); 27 | m.def("deform_psroi_pooling_forward", &deform_psroi_pooling_forward, "deform_psroi_pooling_forward"); 28 | m.def("deform_psroi_pooling_backward", &deform_psroi_pooling_backward, "deform_psroi_pooling_backward"); 29 | } -------------------------------------------------------------------------------- /wetectron/utils/env.py: -------------------------------------------------------------------------------- 1 | #------------------------------------------------------------------------------ 2 | # Code taken from https://github.com/NVlabs/wetectron 3 | #------------------------------------------------------------------------------ 4 | 5 | # -------------------------------------------------------- 6 | # Copyright (C) 2020 NVIDIA Corporation. All rights reserved. 7 | # Nvidia Source Code License-NC 8 | # -------------------------------------------------------- 9 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 10 | import os 11 | from wetectron.utils.imports import import_file 12 | 13 | 14 | def setup_environment(): 15 | """Perform environment setup work. The default setup is a no-op, but this 16 | function allows the user to specify a Python source file that performs 17 | custom setup work that may be necessary to their computing environment. 18 | """ 19 | custom_module_path = os.environ.get("TORCH_DETECTRON_ENV_MODULE") 20 | if custom_module_path: 21 | setup_custom_environment(custom_module_path) 22 | else: 23 | # The default setup is a no-op 24 | pass 25 | 26 | 27 | def setup_custom_environment(custom_module_path): 28 | """Load custom environment setup from a Python source file and run the setup 29 | function. 30 | """ 31 | module = import_file("wetectron.utils.env.custom_module", custom_module_path) 32 | assert hasattr(module, "setup_environment") and callable( 33 | module.setup_environment 34 | ), ( 35 | "Custom environment module defined in {} does not have the " 36 | "required callable attribute 'setup_environment'." 37 | ).format( 38 | custom_module_path 39 | ) 40 | module.setup_environment() 41 | 42 | 43 | # Force environment setup when this module is imported 44 | setup_environment() 45 | -------------------------------------------------------------------------------- /configs/voc/V_16_voc07_active.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | WEIGHT: "catalog://VGGImageNetPretrained/JCJOHNS/VGG-16" 4 | BACKBONE: 5 | CONV_BODY: "VGG16-OICR" 6 | WSOD_ON: True 7 | FASTER_RCNN: False 8 | ROI_HEADS: 9 | SCORE_THRESH: 0.001 10 | NMS: 0.3 11 | BG_IOU_THRESHOLD: 0.3 12 | ROI_BOX_HEAD: 13 | NUM_CLASSES: 21 14 | POOLER_METHOD: "ROIPool" 15 | POOLER_RESOLUTION: 7 16 | POOLER_SCALES: (0.125,) 17 | FEATURE_EXTRACTOR: "VGG16.roi_head" 18 | ROI_WEAK_HEAD: 19 | PREDICTOR: "MISTPredictor" 20 | LOSS: "RoIRegLoss" 21 | OICR_P: 0.15 22 | REGRESS_ON: True 23 | ACTIVE_LOSS: "RoIRegActiveLoss" 24 | ACTIVE_LOSS_WEIGHT: 1.0 25 | DB: 26 | METHOD: 'none' #'concrete' or 'none' 27 | DATASETS: 28 | TRAIN: ("voc_2007_train", "voc_2007_val", ) 29 | TEST: ("voc_2007_test",) 30 | PROPOSAL_FILES: 31 | TRAIN: ('proposal/SS-voc_2007_train-boxes.pkl', 'proposal/SS-voc_2007_val-boxes.pkl', ) 32 | TEST: ('proposal/SS-voc_2007_test-boxes.pkl',) 33 | DATALOADER: 34 | SIZE_DIVISIBILITY: 32 35 | INPUT: 36 | MIN_SIZE_TRAIN: (480, 576, 688, 864, 1000, 1200) 37 | MAX_SIZE_TRAIN: 2000 38 | MIN_SIZE_TEST: 800 39 | MAX_SIZE_TEST: 2000 40 | SOLVER: 41 | IMS_PER_BATCH: 1 42 | BASE_LR: 0.01 43 | WEIGHT_DECAY: 0.0001 44 | WARMUP_ITERS: 200 45 | STEPS: (20000, 26700) 46 | MAX_ITER: 30000 47 | CHECKPOINT_PERIOD: 2000 48 | SOLVER_CDB: 49 | BASE_LR: 0.0001 50 | GAMMA: 3.0 51 | WEIGHT_DECAY: 0.0001 52 | STEPS: (20000, 26700) 53 | TEST: 54 | IMS_PER_BATCH: 1 55 | BBOX_AUG: 56 | ENABLED: True 57 | HEUR: "AVG" 58 | H_FLIP: True 59 | SCALES: (480, 576, 688, 864, 1000, 1200) 60 | MAX_SIZE: 2000 61 | SCALE_H_FLIP: True 62 | SEED: 1234 63 | ACTIVE: 64 | INPUT_FILE: None 65 | PSEUDO_BOXES_FILE: None 66 | LOSS: 67 | STRONG_LOSS_ON_MIL: False 68 | IMG_STRONG_DET_WEIGHT: 0.0 69 | WEIGHTS_BBX_LOSS: 1.0 70 | WEIGHTED_SAMPLING: True 71 | -------------------------------------------------------------------------------- /wetectron/utils/registry.py: -------------------------------------------------------------------------------- 1 | #------------------------------------------------------------------------------ 2 | # Code taken from https://github.com/NVlabs/wetectron 3 | #------------------------------------------------------------------------------ 4 | 5 | # -------------------------------------------------------- 6 | # Copyright (C) 2020 NVIDIA Corporation. All rights reserved. 7 | # Nvidia Source Code License-NC 8 | # -------------------------------------------------------- 9 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 10 | 11 | 12 | def _register_generic(module_dict, module_name, module): 13 | assert module_name not in module_dict 14 | module_dict[module_name] = module 15 | 16 | 17 | class Registry(dict): 18 | ''' 19 | A helper class for managing registering modules, it extends a dictionary 20 | and provides a register functions. 21 | 22 | Eg. creeting a registry: 23 | some_registry = Registry({"default": default_module}) 24 | 25 | There're two ways of registering new modules: 26 | 1): normal way is just calling register function: 27 | def foo(): 28 | ... 29 | some_registry.register("foo_module", foo) 30 | 2): used as decorator when declaring the module: 31 | @some_registry.register("foo_module") 32 | @some_registry.register("foo_modeul_nickname") 33 | def foo(): 34 | ... 35 | 36 | Access of module is just like using a dictionary, eg: 37 | f = some_registry["foo_modeul"] 38 | ''' 39 | def __init__(self, *args, **kwargs): 40 | super(Registry, self).__init__(*args, **kwargs) 41 | 42 | def register(self, module_name, module=None): 43 | # used as function call 44 | if module is not None: 45 | _register_generic(self, module_name, module) 46 | return 47 | 48 | # used as decorator 49 | def register_fn(fn): 50 | _register_generic(self, module_name, fn) 51 | return fn 52 | 53 | return register_fn 54 | -------------------------------------------------------------------------------- /wetectron/csrc/ROIPool.h: -------------------------------------------------------------------------------- 1 | //------------------------------------------------------------------------------ 2 | // Code taken from https://github.com/NVlabs/wetectron 3 | //------------------------------------------------------------------------------ 4 | 5 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 6 | #pragma once 7 | 8 | #include "cpu/vision.h" 9 | 10 | #ifdef WITH_CUDA 11 | #include "cuda/vision.h" 12 | #endif 13 | 14 | 15 | std::tuple ROIPool_forward(const at::Tensor& input, 16 | const at::Tensor& rois, 17 | const float spatial_scale, 18 | const int pooled_height, 19 | const int pooled_width) { 20 | if (input.is_cuda()) { 21 | #ifdef WITH_CUDA 22 | return ROIPool_forward_cuda(input, rois, spatial_scale, pooled_height, pooled_width); 23 | #else 24 | AT_ERROR("Not compiled with GPU support"); 25 | #endif 26 | } 27 | AT_ERROR("Not implemented on the CPU"); 28 | } 29 | 30 | at::Tensor ROIPool_backward(const at::Tensor& grad, 31 | const at::Tensor& input, 32 | const at::Tensor& rois, 33 | const at::Tensor& argmax, 34 | const float spatial_scale, 35 | const int pooled_height, 36 | const int pooled_width, 37 | const int batch_size, 38 | const int channels, 39 | const int height, 40 | const int width) { 41 | if (grad.is_cuda()) { 42 | #ifdef WITH_CUDA 43 | return ROIPool_backward_cuda(grad, input, rois, argmax, spatial_scale, pooled_height, pooled_width, batch_size, channels, height, width); 44 | #else 45 | AT_ERROR("Not compiled with GPU support"); 46 | #endif 47 | } 48 | AT_ERROR("Not implemented on the CPU"); 49 | } 50 | 51 | 52 | 53 | -------------------------------------------------------------------------------- /wetectron/csrc/ROIAlign.h: -------------------------------------------------------------------------------- 1 | //------------------------------------------------------------------------------ 2 | // Code taken from https://github.com/NVlabs/wetectron 3 | //------------------------------------------------------------------------------ 4 | 5 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 6 | #pragma once 7 | 8 | #include "cpu/vision.h" 9 | 10 | #ifdef WITH_CUDA 11 | #include "cuda/vision.h" 12 | #endif 13 | 14 | // Interface for Python 15 | at::Tensor ROIAlign_forward(const at::Tensor& input, 16 | const at::Tensor& rois, 17 | const float spatial_scale, 18 | const int pooled_height, 19 | const int pooled_width, 20 | const int sampling_ratio) { 21 | if (input.is_cuda()) { 22 | #ifdef WITH_CUDA 23 | return ROIAlign_forward_cuda(input, rois, spatial_scale, pooled_height, pooled_width, sampling_ratio); 24 | #else 25 | AT_ERROR("Not compiled with GPU support"); 26 | #endif 27 | } 28 | return ROIAlign_forward_cpu(input, rois, spatial_scale, pooled_height, pooled_width, sampling_ratio); 29 | } 30 | 31 | at::Tensor ROIAlign_backward(const at::Tensor& grad, 32 | const at::Tensor& rois, 33 | const float spatial_scale, 34 | const int pooled_height, 35 | const int pooled_width, 36 | const int batch_size, 37 | const int channels, 38 | const int height, 39 | const int width, 40 | const int sampling_ratio) { 41 | if (grad.is_cuda()) { 42 | #ifdef WITH_CUDA 43 | return ROIAlign_backward_cuda(grad, rois, spatial_scale, pooled_height, pooled_width, batch_size, channels, height, width, sampling_ratio); 44 | #else 45 | AT_ERROR("Not compiled with GPU support"); 46 | #endif 47 | } 48 | AT_ERROR("Not implemented on the CPU"); 49 | } 50 | 51 | -------------------------------------------------------------------------------- /wetectron/modeling/rpn/utils.py: -------------------------------------------------------------------------------- 1 | #------------------------------------------------------------------------------ 2 | # Code taken from https://github.com/NVlabs/wetectron 3 | #------------------------------------------------------------------------------ 4 | 5 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 6 | """ 7 | Utility functions minipulating the prediction layers 8 | """ 9 | 10 | from ..utils import cat 11 | 12 | import torch 13 | 14 | def permute_and_flatten(layer, N, A, C, H, W): 15 | layer = layer.view(N, -1, C, H, W) 16 | layer = layer.permute(0, 3, 4, 1, 2) 17 | layer = layer.reshape(N, -1, C) 18 | return layer 19 | 20 | 21 | def concat_box_prediction_layers(box_cls, box_regression): 22 | box_cls_flattened = [] 23 | box_regression_flattened = [] 24 | # for each feature level, permute the outputs to make them be in the 25 | # same format as the labels. Note that the labels are computed for 26 | # all feature levels concatenated, so we keep the same representation 27 | # for the objectness and the box_regression 28 | for box_cls_per_level, box_regression_per_level in zip( 29 | box_cls, box_regression 30 | ): 31 | N, AxC, H, W = box_cls_per_level.shape 32 | Ax4 = box_regression_per_level.shape[1] 33 | A = Ax4 // 4 34 | C = AxC // A 35 | box_cls_per_level = permute_and_flatten( 36 | box_cls_per_level, N, A, C, H, W 37 | ) 38 | box_cls_flattened.append(box_cls_per_level) 39 | 40 | box_regression_per_level = permute_and_flatten( 41 | box_regression_per_level, N, A, 4, H, W 42 | ) 43 | box_regression_flattened.append(box_regression_per_level) 44 | # concatenate on the first dimension (representing the feature levels), to 45 | # take into account the way the labels were generated (with all feature maps 46 | # being concatenated as well) 47 | box_cls = cat(box_cls_flattened, dim=1).reshape(-1, C) 48 | box_regression = cat(box_regression_flattened, dim=1).reshape(-1, 4) 49 | return box_cls, box_regression 50 | -------------------------------------------------------------------------------- /wetectron/utils/logger.py: -------------------------------------------------------------------------------- 1 | #------------------------------------------------------------------------------ 2 | # Code adapted from https://github.com/NVlabs/wetectron 3 | # by Huy V. Vo and Oriane Simeoni 4 | # INRIA, Valeo.ai 5 | #------------------------------------------------------------------------------ 6 | 7 | # -------------------------------------------------------- 8 | # Copyright (C) 2020 NVIDIA Corporation. All rights reserved. 9 | # Nvidia Source Code License-NC 10 | # -------------------------------------------------------- 11 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 12 | import logging 13 | import os 14 | import sys 15 | 16 | import time 17 | from tqdm import tqdm 18 | import io 19 | 20 | class TqdmToLogger(io.StringIO): 21 | """ 22 | Output stream for TQDM which will output to logger module instead of 23 | the StdOut. 24 | """ 25 | logger = None 26 | level = None 27 | buf = '' 28 | def __init__(self,logger,level=None): 29 | super(TqdmToLogger, self).__init__() 30 | self.logger = logger 31 | self.level = level or logging.INFO 32 | def write(self,buf): 33 | self.buf = buf.strip('\r\n\t ') 34 | def flush(self): 35 | self.logger.log(self.level, self.buf) 36 | 37 | 38 | def setup_logger(name, save_dir, distributed_rank, filename="log.txt"): 39 | logging.basicConfig() 40 | logger = logging.getLogger(name) 41 | logger.setLevel(logging.DEBUG) 42 | list(map(logger.removeHandler, logger.handlers[:])) 43 | list(map(logger.root.removeHandler, logger.root.handlers[:])) 44 | # don't log results for the non-master process 45 | if distributed_rank > 0: 46 | return logger 47 | ch = logging.StreamHandler(stream=sys.stdout) 48 | ch.setLevel(logging.DEBUG) 49 | formatter = logging.Formatter("%(levelname)s: %(name)s %(filename)s.%(lineno)4d: %(message)s") 50 | ch.setFormatter(formatter) 51 | logger.addHandler(ch) 52 | 53 | if save_dir: 54 | fh = logging.FileHandler(os.path.join(save_dir, filename)) 55 | fh.setLevel(logging.DEBUG) 56 | fh.setFormatter(formatter) 57 | logger.addHandler(fh) 58 | 59 | return logger 60 | -------------------------------------------------------------------------------- /wetectron/csrc/deform_pool.h: -------------------------------------------------------------------------------- 1 | //------------------------------------------------------------------------------ 2 | // Code taken from https://github.com/NVlabs/wetectron 3 | //------------------------------------------------------------------------------ 4 | 5 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 6 | #pragma once 7 | #include "cpu/vision.h" 8 | 9 | #ifdef WITH_CUDA 10 | #include "cuda/vision.h" 11 | #endif 12 | 13 | 14 | // Interface for Python 15 | void deform_psroi_pooling_forward( 16 | at::Tensor input, 17 | at::Tensor bbox, 18 | at::Tensor trans, 19 | at::Tensor out, 20 | at::Tensor top_count, 21 | const int no_trans, 22 | const float spatial_scale, 23 | const int output_dim, 24 | const int group_size, 25 | const int pooled_size, 26 | const int part_size, 27 | const int sample_per_part, 28 | const float trans_std) 29 | { 30 | if (input.is_cuda()) { 31 | #ifdef WITH_CUDA 32 | return deform_psroi_pooling_cuda_forward( 33 | input, bbox, trans, out, top_count, 34 | no_trans, spatial_scale, output_dim, group_size, 35 | pooled_size, part_size, sample_per_part, trans_std 36 | ); 37 | #else 38 | AT_ERROR("Not compiled with GPU support"); 39 | #endif 40 | } 41 | AT_ERROR("Not implemented on the CPU"); 42 | } 43 | 44 | 45 | void deform_psroi_pooling_backward( 46 | at::Tensor out_grad, 47 | at::Tensor input, 48 | at::Tensor bbox, 49 | at::Tensor trans, 50 | at::Tensor top_count, 51 | at::Tensor input_grad, 52 | at::Tensor trans_grad, 53 | const int no_trans, 54 | const float spatial_scale, 55 | const int output_dim, 56 | const int group_size, 57 | const int pooled_size, 58 | const int part_size, 59 | const int sample_per_part, 60 | const float trans_std) 61 | { 62 | if (input.is_cuda()) { 63 | #ifdef WITH_CUDA 64 | return deform_psroi_pooling_cuda_backward( 65 | out_grad, input, bbox, trans, top_count, input_grad, trans_grad, 66 | no_trans, spatial_scale, output_dim, group_size, pooled_size, 67 | part_size, sample_per_part, trans_std 68 | ); 69 | #else 70 | AT_ERROR("Not compiled with GPU support"); 71 | #endif 72 | } 73 | AT_ERROR("Not implemented on the CPU"); 74 | } 75 | -------------------------------------------------------------------------------- /wetectron/modeling/roi_heads/keypoint_head/roi_keypoint_feature_extractors.py: -------------------------------------------------------------------------------- 1 | #------------------------------------------------------------------------------ 2 | # Code taken from https://github.com/NVlabs/wetectron 3 | #------------------------------------------------------------------------------ 4 | 5 | from torch import nn 6 | from torch.nn import functional as F 7 | 8 | from wetectron.modeling import registry 9 | from wetectron.modeling.poolers import Pooler 10 | 11 | from wetectron.layers import Conv2d 12 | 13 | 14 | @registry.ROI_KEYPOINT_FEATURE_EXTRACTORS.register("KeypointRCNNFeatureExtractor") 15 | class KeypointRCNNFeatureExtractor(nn.Module): 16 | def __init__(self, cfg, in_channels): 17 | super(KeypointRCNNFeatureExtractor, self).__init__() 18 | 19 | resolution = cfg.MODEL.ROI_KEYPOINT_HEAD.POOLER_RESOLUTION 20 | scales = cfg.MODEL.ROI_KEYPOINT_HEAD.POOLER_SCALES 21 | sampling_ratio = cfg.MODEL.ROI_KEYPOINT_HEAD.POOLER_SAMPLING_RATIO 22 | pooler = Pooler( 23 | output_size=(resolution, resolution), 24 | scales=scales, 25 | sampling_ratio=sampling_ratio, 26 | ) 27 | self.pooler = pooler 28 | 29 | input_features = in_channels 30 | layers = cfg.MODEL.ROI_KEYPOINT_HEAD.CONV_LAYERS 31 | next_feature = input_features 32 | self.blocks = [] 33 | for layer_idx, layer_features in enumerate(layers, 1): 34 | layer_name = "conv_fcn{}".format(layer_idx) 35 | module = Conv2d(next_feature, layer_features, 3, stride=1, padding=1) 36 | nn.init.kaiming_normal_(module.weight, mode="fan_out", nonlinearity="relu") 37 | nn.init.constant_(module.bias, 0) 38 | self.add_module(layer_name, module) 39 | next_feature = layer_features 40 | self.blocks.append(layer_name) 41 | self.out_channels = layer_features 42 | 43 | def forward(self, x, proposals): 44 | x = self.pooler(x, proposals) 45 | for layer_name in self.blocks: 46 | x = F.relu(getattr(self, layer_name)(x)) 47 | return x 48 | 49 | 50 | def make_roi_keypoint_feature_extractor(cfg, in_channels): 51 | func = registry.ROI_KEYPOINT_FEATURE_EXTRACTORS[ 52 | cfg.MODEL.ROI_KEYPOINT_HEAD.FEATURE_EXTRACTOR 53 | ] 54 | return func(cfg, in_channels) 55 | -------------------------------------------------------------------------------- /wetectron/layers/roi_pool.py: -------------------------------------------------------------------------------- 1 | #------------------------------------------------------------------------------ 2 | # Code taken from https://github.com/NVlabs/wetectron 3 | #------------------------------------------------------------------------------ 4 | 5 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 6 | import torch 7 | from torch import nn 8 | from torch.autograd import Function 9 | from torch.autograd.function import once_differentiable 10 | from torch.nn.modules.utils import _pair 11 | 12 | from wetectron import _C 13 | from apex import amp 14 | 15 | class _ROIPool(Function): 16 | @staticmethod 17 | def forward(ctx, input, roi, output_size, spatial_scale): 18 | ctx.output_size = _pair(output_size) 19 | ctx.spatial_scale = spatial_scale 20 | ctx.input_shape = input.size() 21 | output, argmax = _C.roi_pool_forward( 22 | input, roi, spatial_scale, output_size[0], output_size[1] 23 | ) 24 | ctx.save_for_backward(input, roi, argmax) 25 | return output 26 | 27 | @staticmethod 28 | @once_differentiable 29 | def backward(ctx, grad_output): 30 | input, rois, argmax = ctx.saved_tensors 31 | output_size = ctx.output_size 32 | spatial_scale = ctx.spatial_scale 33 | bs, ch, h, w = ctx.input_shape 34 | grad_input = _C.roi_pool_backward( 35 | grad_output, 36 | input, 37 | rois, 38 | argmax, 39 | spatial_scale, 40 | output_size[0], 41 | output_size[1], 42 | bs, 43 | ch, 44 | h, 45 | w, 46 | ) 47 | return grad_input, None, None, None 48 | 49 | 50 | roi_pool = _ROIPool.apply 51 | 52 | 53 | class ROIPool(nn.Module): 54 | def __init__(self, output_size, spatial_scale): 55 | super(ROIPool, self).__init__() 56 | self.output_size = output_size 57 | self.spatial_scale = spatial_scale 58 | 59 | @amp.float_function 60 | def forward(self, input, rois): 61 | return roi_pool(input, rois, self.output_size, self.spatial_scale) 62 | 63 | def __repr__(self): 64 | tmpstr = self.__class__.__name__ + "(" 65 | tmpstr += "output_size=" + str(self.output_size) 66 | tmpstr += ", spatial_scale=" + str(self.spatial_scale) 67 | tmpstr += ")" 68 | return tmpstr 69 | -------------------------------------------------------------------------------- /wetectron/solver/lr_scheduler.py: -------------------------------------------------------------------------------- 1 | #------------------------------------------------------------------------------ 2 | # Code taken from https://github.com/NVlabs/wetectron 3 | #------------------------------------------------------------------------------ 4 | 5 | # -------------------------------------------------------- 6 | # Copyright (C) 2020 NVIDIA Corporation. All rights reserved. 7 | # Nvidia Source Code License-NC 8 | # -------------------------------------------------------- 9 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 10 | from bisect import bisect_right 11 | 12 | import torch 13 | 14 | 15 | # FIXME ideally this would be achieved with a CombinedLRScheduler, 16 | # separating MultiStepLR with WarmupLR 17 | # but the current LRScheduler design doesn't allow it 18 | class WarmupMultiStepLR(torch.optim.lr_scheduler._LRScheduler): 19 | def __init__( 20 | self, 21 | optimizer, 22 | milestones, 23 | gamma=0.1, 24 | warmup_factor=1.0 / 3, 25 | warmup_iters=500, 26 | warmup_method="linear", 27 | last_epoch=-1, 28 | ): 29 | if not list(milestones) == sorted(milestones): 30 | raise ValueError( 31 | "Milestones should be a list of" " increasing integers. Got {}", 32 | milestones, 33 | ) 34 | 35 | if warmup_method not in ("constant", "linear"): 36 | raise ValueError( 37 | "Only 'constant' or 'linear' warmup_method accepted" 38 | "got {}".format(warmup_method) 39 | ) 40 | self.milestones = milestones 41 | self.gamma = gamma 42 | self.warmup_factor = warmup_factor 43 | self.warmup_iters = warmup_iters 44 | self.warmup_method = warmup_method 45 | super(WarmupMultiStepLR, self).__init__(optimizer, last_epoch) 46 | 47 | def get_lr(self): 48 | warmup_factor = 1 49 | if self.last_epoch < self.warmup_iters: 50 | if self.warmup_method == "constant": 51 | warmup_factor = self.warmup_factor 52 | elif self.warmup_method == "linear": 53 | alpha = float(self.last_epoch) / self.warmup_iters 54 | warmup_factor = self.warmup_factor * (1 - alpha) + alpha 55 | return [ 56 | base_lr 57 | * warmup_factor 58 | * self.gamma ** bisect_right(self.milestones, self.last_epoch) 59 | for base_lr in self.base_lrs 60 | ] 61 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Copyright (C) 2020 NVIDIA Corporation. All rights reserved. 3 | # Nvidia Source Code License-NC 4 | # -------------------------------------------------------- 5 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 6 | #!/usr/bin/env python 7 | 8 | import glob 9 | import os 10 | 11 | import torch 12 | from setuptools import find_packages 13 | from setuptools import setup 14 | from torch.utils.cpp_extension import CUDA_HOME 15 | from torch.utils.cpp_extension import CppExtension 16 | from torch.utils.cpp_extension import CUDAExtension 17 | 18 | requirements = ["torch", "torchvision"] 19 | 20 | 21 | def get_extensions(): 22 | this_dir = os.path.dirname(os.path.abspath(__file__)) 23 | extensions_dir = os.path.join(this_dir, "wetectron", "csrc") 24 | 25 | main_file = glob.glob(os.path.join(extensions_dir, "*.cpp")) 26 | source_cpu = glob.glob(os.path.join(extensions_dir, "cpu", "*.cpp")) 27 | source_cuda = glob.glob(os.path.join(extensions_dir, "cuda", "*.cu")) 28 | 29 | sources = main_file + source_cpu 30 | extension = CppExtension 31 | 32 | extra_compile_args = {"cxx": []} 33 | define_macros = [] 34 | 35 | if (torch.cuda.is_available() and CUDA_HOME is not None) or os.getenv("FORCE_CUDA", "0") == "1": 36 | extension = CUDAExtension 37 | sources += source_cuda 38 | define_macros += [("WITH_CUDA", None)] 39 | extra_compile_args["nvcc"] = [ 40 | "-DCUDA_HAS_FP16=1", 41 | "-D__CUDA_NO_HALF_OPERATORS__", 42 | "-D__CUDA_NO_HALF_CONVERSIONS__", 43 | "-D__CUDA_NO_HALF2_OPERATORS__", 44 | ] 45 | 46 | sources = [os.path.join(extensions_dir, s) for s in sources] 47 | 48 | include_dirs = [extensions_dir] 49 | 50 | ext_modules = [ 51 | extension( 52 | "wetectron._C", 53 | sources, 54 | include_dirs=include_dirs, 55 | define_macros=define_macros, 56 | extra_compile_args=extra_compile_args, 57 | ) 58 | ] 59 | 60 | return ext_modules 61 | 62 | 63 | setup( 64 | name="wetectron", 65 | version="1.0", 66 | author="jason718", 67 | url="https://github.com/nvlabs/wetectron/", 68 | description="weakly-supervised object detection in pytorch", 69 | packages=find_packages(exclude=("configs", "tests",)), 70 | ext_modules=get_extensions(), 71 | cmdclass={"build_ext": torch.utils.cpp_extension.BuildExtension}, 72 | ) 73 | -------------------------------------------------------------------------------- /wetectron/modeling/roi_heads/keypoint_head/keypoint_head.py: -------------------------------------------------------------------------------- 1 | #------------------------------------------------------------------------------ 2 | # Code taken from https://github.com/NVlabs/wetectron 3 | #------------------------------------------------------------------------------ 4 | 5 | import torch 6 | 7 | from .roi_keypoint_feature_extractors import make_roi_keypoint_feature_extractor 8 | from .roi_keypoint_predictors import make_roi_keypoint_predictor 9 | from .inference import make_roi_keypoint_post_processor 10 | from .loss import make_roi_keypoint_loss_evaluator 11 | 12 | 13 | class ROIKeypointHead(torch.nn.Module): 14 | def __init__(self, cfg, in_channels): 15 | super(ROIKeypointHead, self).__init__() 16 | self.cfg = cfg.clone() 17 | self.feature_extractor = make_roi_keypoint_feature_extractor(cfg, in_channels) 18 | self.predictor = make_roi_keypoint_predictor( 19 | cfg, self.feature_extractor.out_channels) 20 | self.post_processor = make_roi_keypoint_post_processor(cfg) 21 | self.loss_evaluator = make_roi_keypoint_loss_evaluator(cfg) 22 | 23 | def forward(self, features, proposals, targets=None): 24 | """ 25 | Arguments: 26 | features (list[Tensor]): feature-maps from possibly several levels 27 | proposals (list[BoxList]): proposal boxes 28 | targets (list[BoxList], optional): the ground-truth targets. 29 | 30 | Returns: 31 | x (Tensor): the result of the feature extractor 32 | proposals (list[BoxList]): during training, the original proposals 33 | are returned. During testing, the predicted boxlists are returned 34 | with the `mask` field set 35 | losses (dict[Tensor]): During training, returns the losses for the 36 | head. During testing, returns an empty dict. 37 | """ 38 | if self.training: 39 | with torch.no_grad(): 40 | proposals = self.loss_evaluator.subsample(proposals, targets) 41 | 42 | x = self.feature_extractor(features, proposals) 43 | kp_logits = self.predictor(x) 44 | 45 | if not self.training: 46 | result = self.post_processor(kp_logits, proposals) 47 | return x, result, {} 48 | 49 | loss_kp = self.loss_evaluator(proposals, kp_logits) 50 | 51 | return x, proposals, dict(loss_kp=loss_kp) 52 | 53 | 54 | def build_roi_keypoint_head(cfg, in_channels): 55 | return ROIKeypointHead(cfg, in_channels) 56 | -------------------------------------------------------------------------------- /wetectron/layers/roi_align.py: -------------------------------------------------------------------------------- 1 | #------------------------------------------------------------------------------ 2 | # Code taken from https://github.com/NVlabs/wetectron 3 | #------------------------------------------------------------------------------ 4 | 5 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 6 | import torch 7 | from torch import nn 8 | from torch.autograd import Function 9 | from torch.autograd.function import once_differentiable 10 | from torch.nn.modules.utils import _pair 11 | 12 | from wetectron import _C 13 | from apex import amp 14 | 15 | class _ROIAlign(Function): 16 | @staticmethod 17 | def forward(ctx, input, roi, output_size, spatial_scale, sampling_ratio): 18 | ctx.save_for_backward(roi) 19 | ctx.output_size = _pair(output_size) 20 | ctx.spatial_scale = spatial_scale 21 | ctx.sampling_ratio = sampling_ratio 22 | ctx.input_shape = input.size() 23 | output = _C.roi_align_forward( 24 | input, roi, spatial_scale, output_size[0], output_size[1], sampling_ratio 25 | ) 26 | return output 27 | 28 | @staticmethod 29 | @once_differentiable 30 | def backward(ctx, grad_output): 31 | rois, = ctx.saved_tensors 32 | output_size = ctx.output_size 33 | spatial_scale = ctx.spatial_scale 34 | sampling_ratio = ctx.sampling_ratio 35 | bs, ch, h, w = ctx.input_shape 36 | grad_input = _C.roi_align_backward( 37 | grad_output, 38 | rois, 39 | spatial_scale, 40 | output_size[0], 41 | output_size[1], 42 | bs, 43 | ch, 44 | h, 45 | w, 46 | sampling_ratio, 47 | ) 48 | return grad_input, None, None, None, None 49 | 50 | 51 | roi_align = _ROIAlign.apply 52 | 53 | class ROIAlign(nn.Module): 54 | def __init__(self, output_size, spatial_scale, sampling_ratio): 55 | super(ROIAlign, self).__init__() 56 | self.output_size = output_size 57 | self.spatial_scale = spatial_scale 58 | self.sampling_ratio = sampling_ratio 59 | 60 | @amp.float_function 61 | def forward(self, input, rois): 62 | return roi_align( 63 | input, rois, self.output_size, self.spatial_scale, self.sampling_ratio 64 | ) 65 | 66 | def __repr__(self): 67 | tmpstr = self.__class__.__name__ + "(" 68 | tmpstr += "output_size=" + str(self.output_size) 69 | tmpstr += ", spatial_scale=" + str(self.spatial_scale) 70 | tmpstr += ", sampling_ratio=" + str(self.sampling_ratio) 71 | tmpstr += ")" 72 | return tmpstr 73 | -------------------------------------------------------------------------------- /wetectron/modeling/roi_heads/mask_head/roi_mask_predictors.py: -------------------------------------------------------------------------------- 1 | #------------------------------------------------------------------------------ 2 | # Code taken from https://github.com/NVlabs/wetectron 3 | #------------------------------------------------------------------------------ 4 | 5 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 6 | from torch import nn 7 | from torch.nn import functional as F 8 | 9 | from wetectron.layers import Conv2d 10 | from wetectron.layers import ConvTranspose2d 11 | from wetectron.modeling import registry 12 | 13 | 14 | @registry.ROI_MASK_PREDICTOR.register("MaskRCNNC4Predictor") 15 | class MaskRCNNC4Predictor(nn.Module): 16 | def __init__(self, cfg, in_channels): 17 | super(MaskRCNNC4Predictor, self).__init__() 18 | num_classes = cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES 19 | dim_reduced = cfg.MODEL.ROI_MASK_HEAD.CONV_LAYERS[-1] 20 | num_inputs = in_channels 21 | 22 | self.conv5_mask = ConvTranspose2d(num_inputs, dim_reduced, 2, 2, 0) 23 | self.mask_fcn_logits = Conv2d(dim_reduced, num_classes, 1, 1, 0) 24 | 25 | for name, param in self.named_parameters(): 26 | if "bias" in name: 27 | nn.init.constant_(param, 0) 28 | elif "weight" in name: 29 | # Caffe2 implementation uses MSRAFill, which in fact 30 | # corresponds to kaiming_normal_ in PyTorch 31 | nn.init.kaiming_normal_(param, mode="fan_out", nonlinearity="relu") 32 | 33 | def forward(self, x): 34 | x = F.relu(self.conv5_mask(x)) 35 | return self.mask_fcn_logits(x) 36 | 37 | 38 | @registry.ROI_MASK_PREDICTOR.register("MaskRCNNConv1x1Predictor") 39 | class MaskRCNNConv1x1Predictor(nn.Module): 40 | def __init__(self, cfg, in_channels): 41 | super(MaskRCNNConv1x1Predictor, self).__init__() 42 | num_classes = cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES 43 | num_inputs = in_channels 44 | 45 | self.mask_fcn_logits = Conv2d(num_inputs, num_classes, 1, 1, 0) 46 | 47 | for name, param in self.named_parameters(): 48 | if "bias" in name: 49 | nn.init.constant_(param, 0) 50 | elif "weight" in name: 51 | # Caffe2 implementation uses MSRAFill, which in fact 52 | # corresponds to kaiming_normal_ in PyTorch 53 | nn.init.kaiming_normal_(param, mode="fan_out", nonlinearity="relu") 54 | 55 | def forward(self, x): 56 | return self.mask_fcn_logits(x) 57 | 58 | 59 | def make_roi_mask_predictor(cfg, in_channels): 60 | func = registry.ROI_MASK_PREDICTOR[cfg.MODEL.ROI_MASK_HEAD.PREDICTOR] 61 | return func(cfg, in_channels) 62 | -------------------------------------------------------------------------------- /wetectron/solver/build.py: -------------------------------------------------------------------------------- 1 | #------------------------------------------------------------------------------ 2 | # Code adapted from https://github.com/NVlabs/wetectron 3 | # by Huy V. Vo and Oriane Simeoni 4 | # INRIA, Valeo.ai 5 | #------------------------------------------------------------------------------ 6 | 7 | # -------------------------------------------------------- 8 | # Copyright (C) 2020 NVIDIA Corporation. All rights reserved. 9 | # Nvidia Source Code License-NC 10 | # -------------------------------------------------------- 11 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 12 | import torch 13 | 14 | from .lr_scheduler import WarmupMultiStepLR 15 | 16 | def make_optimizer(cfg, model): 17 | params = [] 18 | for key, value in model.named_parameters(): 19 | if not value.requires_grad: 20 | continue 21 | lr = cfg.SOLVER.BASE_LR 22 | weight_decay = cfg.SOLVER.WEIGHT_DECAY 23 | if "bias" in key: 24 | lr = cfg.SOLVER.BASE_LR * cfg.SOLVER.BIAS_LR_FACTOR 25 | weight_decay = cfg.SOLVER.WEIGHT_DECAY_BIAS 26 | params += [{"params": [value], "lr": lr, "weight_decay": weight_decay}] 27 | 28 | optimizer = torch.optim.SGD(params, lr, momentum=cfg.SOLVER.MOMENTUM) 29 | 30 | return optimizer 31 | 32 | def make_cdb_optimizer(cfg, model): 33 | params = [] 34 | for key, value in model.named_parameters(): 35 | if not value.requires_grad: 36 | continue 37 | lr = cfg.SOLVER_CDB.BASE_LR 38 | weight_decay = cfg.SOLVER_CDB.WEIGHT_DECAY 39 | if "bias" in key: 40 | lr = cfg.SOLVER_CDB.BASE_LR * cfg.SOLVER_CDB.BIAS_LR_FACTOR 41 | weight_decay = cfg.SOLVER_CDB.WEIGHT_DECAY_BIAS 42 | params += [{"params": [value], "lr": lr, "weight_decay": weight_decay}] 43 | 44 | optimizer = torch.optim.SGD(params, lr, momentum=cfg.SOLVER_CDB.MOMENTUM) 45 | 46 | return optimizer 47 | 48 | def make_lr_scheduler(cfg, optimizer): 49 | return WarmupMultiStepLR( 50 | optimizer, 51 | cfg.SOLVER.STEPS, 52 | cfg.SOLVER.GAMMA, 53 | warmup_factor=cfg.SOLVER.WARMUP_FACTOR, 54 | warmup_iters=cfg.SOLVER.WARMUP_ITERS, 55 | warmup_method=cfg.SOLVER.WARMUP_METHOD, 56 | last_epoch=cfg.SOLVER.LAST_EPOCH, 57 | ) 58 | 59 | def make_lr_cdb_scheduler(cfg, optimizer): 60 | return WarmupMultiStepLR( 61 | optimizer, 62 | cfg.SOLVER_CDB.STEPS, 63 | cfg.SOLVER_CDB.GAMMA, 64 | warmup_factor=cfg.SOLVER_CDB.WARMUP_FACTOR, 65 | warmup_iters=cfg.SOLVER_CDB.WARMUP_ITERS, 66 | warmup_method=cfg.SOLVER_CDB.WARMUP_METHOD, 67 | last_epoch=cfg.SOLVER.LAST_EPOCH, 68 | ) 69 | -------------------------------------------------------------------------------- /wetectron/data/datasets/concat_dataset.py: -------------------------------------------------------------------------------- 1 | #------------------------------------------------------------------------------ 2 | # Code adapted from https://github.com/NVlabs/wetectron 3 | # by Huy V. Vo and Oriane Simeoni 4 | # INRIA, Valeo.ai 5 | #------------------------------------------------------------------------------ 6 | 7 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 8 | import bisect 9 | 10 | from torch.utils.data.dataset import ConcatDataset as _ConcatDataset 11 | from wetectron.data import datasets 12 | 13 | class ConcatDataset(_ConcatDataset): 14 | """ 15 | Same as torch.utils.data.dataset.ConcatDataset, but exposes an extra 16 | method for querying the sizes of the image 17 | """ 18 | 19 | def __getitem__(self, idx): 20 | dataset_idx, sample_idx = self.get_idxs(idx) 21 | img, target, rois, _ = self.datasets[dataset_idx][sample_idx] 22 | return img, target, rois, idx 23 | 24 | def get_categories(self): 25 | return self.datasets[0].get_categories() 26 | 27 | def get_idxs(self, idx): 28 | dataset_idx = bisect.bisect_right(self.cumulative_sizes, idx) 29 | if dataset_idx == 0: 30 | sample_idx = idx 31 | else: 32 | sample_idx = idx - self.cumulative_sizes[dataset_idx - 1] 33 | return dataset_idx, sample_idx 34 | 35 | def get_img_info(self, idx): 36 | dataset_idx, sample_idx = self.get_idxs(idx) 37 | return self.datasets[dataset_idx].get_img_info(sample_idx) 38 | 39 | def get_active_images(self): 40 | return self.datasets[0].get_active_images() 41 | 42 | def is_active(self, idx): 43 | dataset_idx, sample_idx = self.get_idxs(idx) 44 | return self.datasets[dataset_idx].is_active(sample_idx) 45 | 46 | def get_active_sampling_weight(self, idx): 47 | dataset_idx, sample_idx = self.get_idxs(idx) 48 | return self.datasets[dataset_idx].get_active_sampling_weight(sample_idx) 49 | 50 | def get_weak_instance_weight(self, idx): 51 | dataset_idx, sample_idx = self.get_idxs(idx) 52 | return self.datasets[dataset_idx].get_weak_instance_weight(sample_idx) 53 | 54 | def has_pseudo_gt(self, idx): 55 | dataset_idx, sample_idx = self.get_idxs(idx) 56 | return self.datasets[dataset_idx].has_pseudo_gt(sample_idx) 57 | 58 | # Methods that only apply on a ConcatDataset of datasets.PascalVOCDataset 59 | def get_groundtruth(self, idx): 60 | dataset_idx, sample_idx = self.get_idxs(idx) 61 | return self.datasets[dataset_idx].get_groundtruth(sample_idx) 62 | 63 | def map_class_id_to_class_name(self, idx): 64 | dataset_idx, sample_idx = self.get_idxs(idx) 65 | return self.datasets[dataset_idx].map_class_id_to_class_name(sample_idx) -------------------------------------------------------------------------------- /wetectron/layers/sigmoid_focal_loss.py: -------------------------------------------------------------------------------- 1 | #------------------------------------------------------------------------------ 2 | # Code taken from https://github.com/NVlabs/wetectron 3 | #------------------------------------------------------------------------------ 4 | 5 | import torch 6 | from torch import nn 7 | from torch.autograd import Function 8 | from torch.autograd.function import once_differentiable 9 | 10 | from wetectron import _C 11 | 12 | # TODO: Use JIT to replace CUDA implementation in the future. 13 | class _SigmoidFocalLoss(Function): 14 | @staticmethod 15 | def forward(ctx, logits, targets, gamma, alpha): 16 | ctx.save_for_backward(logits, targets) 17 | num_classes = logits.shape[1] 18 | ctx.num_classes = num_classes 19 | ctx.gamma = gamma 20 | ctx.alpha = alpha 21 | 22 | losses = _C.sigmoid_focalloss_forward( 23 | logits, targets, num_classes, gamma, alpha 24 | ) 25 | return losses 26 | 27 | @staticmethod 28 | @once_differentiable 29 | def backward(ctx, d_loss): 30 | logits, targets = ctx.saved_tensors 31 | num_classes = ctx.num_classes 32 | gamma = ctx.gamma 33 | alpha = ctx.alpha 34 | d_loss = d_loss.contiguous() 35 | d_logits = _C.sigmoid_focalloss_backward( 36 | logits, targets, d_loss, num_classes, gamma, alpha 37 | ) 38 | return d_logits, None, None, None, None 39 | 40 | 41 | sigmoid_focal_loss_cuda = _SigmoidFocalLoss.apply 42 | 43 | 44 | def sigmoid_focal_loss_cpu(logits, targets, gamma, alpha): 45 | num_classes = logits.shape[1] 46 | gamma = gamma[0] 47 | alpha = alpha[0] 48 | dtype = targets.dtype 49 | device = targets.device 50 | class_range = torch.arange(1, num_classes+1, dtype=dtype, device=device).unsqueeze(0) 51 | 52 | t = targets.unsqueeze(1) 53 | p = torch.sigmoid(logits) 54 | term1 = (1 - p) ** gamma * torch.log(p) 55 | term2 = p ** gamma * torch.log(1 - p) 56 | return -(t == class_range).float() * term1 * alpha - ((t != class_range) * (t >= 0)).float() * term2 * (1 - alpha) 57 | 58 | 59 | class SigmoidFocalLoss(nn.Module): 60 | def __init__(self, gamma, alpha): 61 | super(SigmoidFocalLoss, self).__init__() 62 | self.gamma = gamma 63 | self.alpha = alpha 64 | 65 | def forward(self, logits, targets): 66 | device = logits.device 67 | if logits.is_cuda: 68 | loss_func = sigmoid_focal_loss_cuda 69 | else: 70 | loss_func = sigmoid_focal_loss_cpu 71 | 72 | loss = loss_func(logits, targets, self.gamma, self.alpha) 73 | return loss.sum() 74 | 75 | def __repr__(self): 76 | tmpstr = self.__class__.__name__ + "(" 77 | tmpstr += "gamma=" + str(self.gamma) 78 | tmpstr += ", alpha=" + str(self.alpha) 79 | tmpstr += ")" 80 | return tmpstr 81 | -------------------------------------------------------------------------------- /wetectron/modeling/roi_heads/box_head/roi_box_predictors.py: -------------------------------------------------------------------------------- 1 | #------------------------------------------------------------------------------ 2 | # Code taken from https://github.com/NVlabs/wetectron 3 | #------------------------------------------------------------------------------ 4 | 5 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 6 | from wetectron.modeling import registry 7 | from torch import nn 8 | 9 | 10 | @registry.ROI_BOX_PREDICTOR.register("FastRCNNPredictor") 11 | class FastRCNNPredictor(nn.Module): 12 | def __init__(self, config, in_channels): 13 | super(FastRCNNPredictor, self).__init__() 14 | assert in_channels is not None 15 | 16 | num_inputs = in_channels 17 | 18 | num_classes = config.MODEL.ROI_BOX_HEAD.NUM_CLASSES 19 | self.avgpool = nn.AdaptiveAvgPool2d(1) 20 | self.cls_score = nn.Linear(num_inputs, num_classes) 21 | num_bbox_reg_classes = 2 if config.MODEL.CLS_AGNOSTIC_BBOX_REG else num_classes 22 | self.bbox_pred = nn.Linear(num_inputs, num_bbox_reg_classes * 4) 23 | 24 | nn.init.normal_(self.cls_score.weight, mean=0, std=0.01) 25 | nn.init.constant_(self.cls_score.bias, 0) 26 | 27 | nn.init.normal_(self.bbox_pred.weight, mean=0, std=0.001) 28 | nn.init.constant_(self.bbox_pred.bias, 0) 29 | 30 | def forward(self, x): 31 | if x.dim() == 4: 32 | x = self.avgpool(x) 33 | x = x.view(x.size(0), -1) 34 | assert x.dim() == 2 35 | # x = self.avgpool(x) 36 | # x = x.view(x.size(0), -1) 37 | cls_logit = self.cls_score(x) 38 | bbox_pred = self.bbox_pred(x) 39 | return cls_logit, bbox_pred 40 | 41 | 42 | @registry.ROI_BOX_PREDICTOR.register("FPNPredictor") 43 | class FPNPredictor(nn.Module): 44 | def __init__(self, cfg, in_channels): 45 | super(FPNPredictor, self).__init__() 46 | num_classes = cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES 47 | representation_size = in_channels 48 | 49 | self.cls_score = nn.Linear(representation_size, num_classes) 50 | num_bbox_reg_classes = 2 if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG else num_classes 51 | self.bbox_pred = nn.Linear(representation_size, num_bbox_reg_classes * 4) 52 | 53 | nn.init.normal_(self.cls_score.weight, std=0.01) 54 | nn.init.normal_(self.bbox_pred.weight, std=0.001) 55 | for l in [self.cls_score, self.bbox_pred]: 56 | nn.init.constant_(l.bias, 0) 57 | 58 | def forward(self, x): 59 | if x.ndimension() == 4: 60 | assert list(x.shape[2:]) == [1, 1] 61 | x = x.view(x.size(0), -1) 62 | scores = self.cls_score(x) 63 | bbox_deltas = self.bbox_pred(x) 64 | 65 | return scores, bbox_deltas 66 | 67 | 68 | def make_roi_box_predictor(cfg, in_channels): 69 | func = registry.ROI_BOX_PREDICTOR[cfg.MODEL.ROI_BOX_HEAD.PREDICTOR] 70 | return func(cfg, in_channels) 71 | -------------------------------------------------------------------------------- /wetectron/data/transforms/build.py: -------------------------------------------------------------------------------- 1 | #------------------------------------------------------------------------------ 2 | # Code adapted from https://github.com/NVlabs/wetectron 3 | # by Huy V. Vo and Oriane Simeoni 4 | # INRIA, Valeo.ai 5 | #------------------------------------------------------------------------------ 6 | 7 | # -------------------------------------------------------- 8 | # Copyright (C) 2020 NVIDIA Corporation. All rights reserved. 9 | # Nvidia Source Code License-NC 10 | # -------------------------------------------------------- 11 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 12 | import torch 13 | from . import transforms as T 14 | 15 | _imagenet_pca = { 16 | 'eigval': torch.Tensor([0.2175, 0.0188, 0.0045]), 17 | 'eigvec': torch.Tensor([ 18 | [-0.5675, 0.7192, 0.4009], 19 | [-0.5808, -0.0045, -0.8140], 20 | [-0.5836, -0.6948, 0.4203], 21 | ]) 22 | } 23 | 24 | def build_transforms(cfg, is_train=True): 25 | if is_train: 26 | min_size = cfg.INPUT.MIN_SIZE_TRAIN 27 | max_size = cfg.INPUT.MAX_SIZE_TRAIN 28 | flip_horizontal_prob = 0.5 # cfg.INPUT.FLIP_PROB_TRAIN 29 | flip_vertical_prob = cfg.INPUT.VERTICAL_FLIP_PROB_TRAIN 30 | brightness = cfg.INPUT.BRIGHTNESS 31 | contrast = cfg.INPUT.CONTRAST 32 | saturation = cfg.INPUT.SATURATION 33 | hue = cfg.INPUT.HUE 34 | else: 35 | min_size = cfg.INPUT.MIN_SIZE_TEST 36 | max_size = cfg.INPUT.MAX_SIZE_TEST 37 | flip_horizontal_prob = 0.0 38 | flip_vertical_prob = 0.0 39 | brightness = 0.0 40 | contrast = 0.0 41 | saturation = 0.0 42 | hue = 0.0 43 | 44 | to_bgr255 = cfg.INPUT.TO_BGR255 45 | normalize_transform = T.Normalize( 46 | mean=cfg.INPUT.PIXEL_MEAN, std=cfg.INPUT.PIXEL_STD, to_bgr255=to_bgr255 47 | ) 48 | color_jitter = T.ColorJitter( 49 | brightness=brightness, 50 | contrast=contrast, 51 | saturation=saturation, 52 | hue=hue, 53 | ) 54 | 55 | if cfg.INPUT.PCA and is_train: 56 | transform = T.Compose( 57 | [ 58 | color_jitter, 59 | T.Resize(min_size, max_size), 60 | T.RandomHorizontalFlip(flip_horizontal_prob), 61 | T.RandomVerticalFlip(flip_vertical_prob), 62 | T.ToTensor(), 63 | T.Lighting(0.1, _imagenet_pca['eigval'], _imagenet_pca['eigvec']), 64 | normalize_transform, 65 | ] 66 | ) 67 | else: 68 | transform = T.Compose( 69 | [ 70 | color_jitter, 71 | T.Resize(min_size, max_size), 72 | T.RandomHorizontalFlip(flip_horizontal_prob), 73 | T.RandomVerticalFlip(flip_vertical_prob), 74 | T.ToTensor(), 75 | normalize_transform, 76 | ] 77 | ) 78 | 79 | return transform 80 | -------------------------------------------------------------------------------- /wetectron/csrc/cpu/nms_cpu.cpp: -------------------------------------------------------------------------------- 1 | //------------------------------------------------------------------------------ 2 | // Code taken from https://github.com/NVlabs/wetectron 3 | //------------------------------------------------------------------------------ 4 | 5 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 6 | #include "cpu/vision.h" 7 | 8 | 9 | template 10 | at::Tensor nms_cpu_kernel(const at::Tensor& dets, 11 | const at::Tensor& scores, 12 | const float threshold) { 13 | AT_ASSERTM(!dets.is_cuda(), "dets must be a CPU tensor"); 14 | AT_ASSERTM(!scores.is_cuda(), "scores must be a CPU tensor"); 15 | AT_ASSERTM(dets.scalar_type() == scores.scalar_type(), "dets should have the same type as scores"); 16 | 17 | if (dets.numel() == 0) { 18 | return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU)); 19 | } 20 | 21 | auto x1_t = dets.select(1, 0).contiguous(); 22 | auto y1_t = dets.select(1, 1).contiguous(); 23 | auto x2_t = dets.select(1, 2).contiguous(); 24 | auto y2_t = dets.select(1, 3).contiguous(); 25 | 26 | at::Tensor areas_t = (x2_t - x1_t + 1) * (y2_t - y1_t + 1); 27 | 28 | auto order_t = std::get<1>(scores.sort(0, /* descending=*/true)); 29 | 30 | auto ndets = dets.size(0); 31 | at::Tensor suppressed_t = at::zeros({ndets}, dets.options().dtype(at::kByte).device(at::kCPU)); 32 | 33 | auto suppressed = suppressed_t.data_ptr(); 34 | auto order = order_t.data_ptr(); 35 | auto x1 = x1_t.data_ptr(); 36 | auto y1 = y1_t.data_ptr(); 37 | auto x2 = x2_t.data_ptr(); 38 | auto y2 = y2_t.data_ptr(); 39 | auto areas = areas_t.data_ptr(); 40 | 41 | for (int64_t _i = 0; _i < ndets; _i++) { 42 | auto i = order[_i]; 43 | if (suppressed[i] == 1) 44 | continue; 45 | auto ix1 = x1[i]; 46 | auto iy1 = y1[i]; 47 | auto ix2 = x2[i]; 48 | auto iy2 = y2[i]; 49 | auto iarea = areas[i]; 50 | 51 | for (int64_t _j = _i + 1; _j < ndets; _j++) { 52 | auto j = order[_j]; 53 | if (suppressed[j] == 1) 54 | continue; 55 | auto xx1 = std::max(ix1, x1[j]); 56 | auto yy1 = std::max(iy1, y1[j]); 57 | auto xx2 = std::min(ix2, x2[j]); 58 | auto yy2 = std::min(iy2, y2[j]); 59 | 60 | auto w = std::max(static_cast(0), xx2 - xx1 + 1); 61 | auto h = std::max(static_cast(0), yy2 - yy1 + 1); 62 | auto inter = w * h; 63 | auto ovr = inter / (iarea + areas[j] - inter); 64 | if (ovr >= threshold) 65 | suppressed[j] = 1; 66 | } 67 | } 68 | return at::nonzero(suppressed_t == 0).squeeze(1); 69 | } 70 | 71 | at::Tensor nms_cpu(const at::Tensor& dets, 72 | const at::Tensor& scores, 73 | const float threshold) { 74 | at::Tensor result; 75 | AT_DISPATCH_FLOATING_TYPES(dets.scalar_type(), "nms", [&] { 76 | result = nms_cpu_kernel(dets, scores, threshold); 77 | }); 78 | return result; 79 | } 80 | -------------------------------------------------------------------------------- /wetectron/modeling/roi_heads/mask_head/roi_mask_feature_extractors.py: -------------------------------------------------------------------------------- 1 | #------------------------------------------------------------------------------ 2 | # Code taken from https://github.com/NVlabs/wetectron 3 | #------------------------------------------------------------------------------ 4 | 5 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 6 | from torch import nn 7 | from torch.nn import functional as F 8 | 9 | from ..box_head.roi_box_feature_extractors import ResNet50Conv5ROIFeatureExtractor 10 | from wetectron.modeling import registry 11 | from wetectron.modeling.poolers import Pooler 12 | from wetectron.modeling.make_layers import make_conv3x3 13 | 14 | 15 | registry.ROI_MASK_FEATURE_EXTRACTORS.register( 16 | "ResNet50Conv5ROIFeatureExtractor", ResNet50Conv5ROIFeatureExtractor 17 | ) 18 | 19 | 20 | @registry.ROI_MASK_FEATURE_EXTRACTORS.register("MaskRCNNFPNFeatureExtractor") 21 | class MaskRCNNFPNFeatureExtractor(nn.Module): 22 | """ 23 | Heads for FPN for classification 24 | """ 25 | 26 | def __init__(self, cfg, in_channels): 27 | """ 28 | Arguments: 29 | num_classes (int): number of output classes 30 | input_size (int): number of channels of the input once it's flattened 31 | representation_size (int): size of the intermediate representation 32 | """ 33 | super(MaskRCNNFPNFeatureExtractor, self).__init__() 34 | 35 | resolution = cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION 36 | scales = cfg.MODEL.ROI_MASK_HEAD.POOLER_SCALES 37 | sampling_ratio = cfg.MODEL.ROI_MASK_HEAD.POOLER_SAMPLING_RATIO 38 | pooler = Pooler( 39 | output_size=(resolution, resolution), 40 | scales=scales, 41 | sampling_ratio=sampling_ratio, 42 | ) 43 | input_size = in_channels 44 | self.pooler = pooler 45 | 46 | use_gn = cfg.MODEL.ROI_MASK_HEAD.USE_GN 47 | layers = cfg.MODEL.ROI_MASK_HEAD.CONV_LAYERS 48 | dilation = cfg.MODEL.ROI_MASK_HEAD.DILATION 49 | 50 | next_feature = input_size 51 | self.blocks = [] 52 | for layer_idx, layer_features in enumerate(layers, 1): 53 | layer_name = "mask_fcn{}".format(layer_idx) 54 | module = make_conv3x3( 55 | next_feature, layer_features, 56 | dilation=dilation, stride=1, use_gn=use_gn 57 | ) 58 | self.add_module(layer_name, module) 59 | next_feature = layer_features 60 | self.blocks.append(layer_name) 61 | self.out_channels = layer_features 62 | 63 | def forward(self, x, proposals): 64 | x = self.pooler(x, proposals) 65 | 66 | for layer_name in self.blocks: 67 | x = F.relu(getattr(self, layer_name)(x)) 68 | 69 | return x 70 | 71 | 72 | def make_roi_mask_feature_extractor(cfg, in_channels): 73 | func = registry.ROI_MASK_FEATURE_EXTRACTORS[ 74 | cfg.MODEL.ROI_MASK_HEAD.FEATURE_EXTRACTOR 75 | ] 76 | return func(cfg, in_channels) 77 | -------------------------------------------------------------------------------- /wetectron/data/README.md: -------------------------------------------------------------------------------- 1 | # Setting Up Datasets 2 | This file describes how to perform training on other datasets. 3 | 4 | Only Pascal VOC dataset can be loaded from its original format and be outputted to Pascal style results currently. 5 | 6 | We expect the annotations from other datasets be converted to COCO json format, and 7 | the output will be in COCO-style. (i.e. AP, AP50, AP75, APs, APm, APl for bbox and segm) 8 | 9 | ## Creating Symlinks for PASCAL VOC 10 | 11 | We assume that your symlinked `datasets/voc/VOC` directory has the following structure: 12 | 13 | ``` 14 | VOC 15 | |_ JPEGImages 16 | | |_ .jpg 17 | | |_ ... 18 | | |_ .jpg 19 | |_ Annotations 20 | | |_ pascal_train.json (optional) 21 | | |_ pascal_val.json (optional) 22 | | |_ pascal_test.json (optional) 23 | | |_ .xml 24 | | |_ ... 25 | | |_ .xml 26 | |_ VOCdevkit 27 | ``` 28 | 29 | Create symlinks for `voc/VOC`: 30 | 31 | ``` 32 | cd ~/github/maskrcnn-benchmark 33 | mkdir -p datasets/voc/VOC 34 | ln -s /path/to/VOC /datasets/voc/VOC 35 | ``` 36 | Example configuration files for PASCAL VOC could be found [here](https://github.com/facebookresearch/maskrcnn-benchmark/blob/master/configs/pascal_voc/). 37 | 38 | ### PASCAL VOC Annotations in COCO Format 39 | To output COCO-style evaluation result, PASCAL VOC annotations in COCO json format is required and could be downloaded from [here](https://storage.googleapis.com/coco-dataset/external/PASCAL_VOC.zip) 40 | via http://cocodataset.org/#external. 41 | 42 | ## Creating Symlinks for Cityscapes: 43 | 44 | We assume that your symlinked `datasets/cityscapes` directory has the following structure: 45 | 46 | ``` 47 | cityscapes 48 | |_ images 49 | | |_ .jpg 50 | | |_ ... 51 | | |_ .jpg 52 | |_ annotations 53 | | |_ instanceonly_gtFile_train.json 54 | | |_ ... 55 | |_ raw 56 | |_ gtFine 57 | |_ ... 58 | |_ README.md 59 | ``` 60 | 61 | Create symlinks for `cityscapes`: 62 | 63 | ``` 64 | cd ~/github/maskrcnn-benchmark 65 | mkdir -p datasets/cityscapes 66 | ln -s /path/to/cityscapes datasets/data/cityscapes 67 | ``` 68 | 69 | ### Steps to convert Cityscapes Annotations to COCO Format 70 | 1. Download gtFine_trainvaltest.zip from https://www.cityscapes-dataset.com/downloads/ (login required) 71 | 2. Extract it to /path/to/gtFine_trainvaltest 72 | ``` 73 | cityscapes 74 | |_ gtFine_trainvaltest.zip 75 | |_ gtFine_trainvaltest 76 | |_ gtFine 77 | ``` 78 | 3. Run the below commands to convert the annotations 79 | 80 | ``` 81 | cd ~/github 82 | git clone https://github.com/mcordts/cityscapesScripts.git 83 | cd cityscapesScripts 84 | cp ~/github/maskrcnn-benchmark/tools/cityscapes/instances2dict_with_polygons.py cityscapesscripts/evaluation 85 | python setup.py install 86 | cd ~/github/maskrcnn-benchmark 87 | python tools/cityscapes/convert_cityscapes_to_coco.py --datadir /path/to/cityscapes --outdir /path/to/cityscapes/annotations 88 | ``` 89 | 90 | Example configuration files for Cityscapes could be found [here](https://github.com/facebookresearch/maskrcnn-benchmark/blob/master/configs/cityscapes/). 91 | -------------------------------------------------------------------------------- /wetectron/structures/image_list.py: -------------------------------------------------------------------------------- 1 | #------------------------------------------------------------------------------ 2 | # Code taken from https://github.com/NVlabs/wetectron 3 | #------------------------------------------------------------------------------ 4 | 5 | # -------------------------------------------------------- 6 | # Copyright (C) 2020 NVIDIA Corporation. All rights reserved. 7 | # Nvidia Source Code License-NC 8 | # -------------------------------------------------------- 9 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 10 | from __future__ import division 11 | 12 | import torch 13 | 14 | 15 | class ImageList(object): 16 | """ 17 | Structure that holds a list of images (of possibly 18 | varying sizes) as a single tensor. 19 | This works by padding the images to the same size, 20 | and storing in a field the original sizes of each image 21 | """ 22 | 23 | def __init__(self, tensors, image_sizes): 24 | """ 25 | Arguments: 26 | tensors (tensor) 27 | image_sizes (list[tuple[int, int]]) 28 | """ 29 | self.tensors = tensors 30 | self.image_sizes = image_sizes 31 | 32 | def to(self, *args, **kwargs): 33 | cast_tensor = self.tensors.to(*args, **kwargs) 34 | return ImageList(cast_tensor, self.image_sizes) 35 | 36 | 37 | def to_image_list(tensors, size_divisible=0): 38 | """ 39 | tensors can be an ImageList, a torch.Tensor or 40 | an iterable of Tensors. It can't be a numpy array. 41 | When tensors is an iterable of Tensors, it pads 42 | the Tensors with zeros so that they have the same 43 | shape 44 | """ 45 | if isinstance(tensors, torch.Tensor) and size_divisible > 0: 46 | tensors = [tensors] 47 | 48 | if isinstance(tensors, ImageList): 49 | return tensors 50 | elif isinstance(tensors, torch.Tensor): 51 | # single tensor shape can be inferred 52 | if tensors.dim() == 3: 53 | tensors = tensors[None] 54 | assert tensors.dim() == 4 55 | image_sizes = [tensor.shape[-2:] for tensor in tensors] 56 | return ImageList(tensors, image_sizes) 57 | elif isinstance(tensors, (tuple, list)): 58 | max_size = tuple(max(s) for s in zip(*[img.shape for img in tensors])) 59 | 60 | # TODO Ideally, just remove this and let me model handle arbitrary 61 | # input sizs 62 | if size_divisible > 0: 63 | import math 64 | 65 | stride = size_divisible 66 | max_size = list(max_size) 67 | max_size[1] = int(math.ceil(max_size[1] / stride) * stride) 68 | max_size[2] = int(math.ceil(max_size[2] / stride) * stride) 69 | max_size = tuple(max_size) 70 | 71 | batch_shape = (len(tensors),) + max_size 72 | batched_imgs = tensors[0].new(*batch_shape).zero_() 73 | for img, pad_img in zip(tensors, batched_imgs): 74 | pad_img[: img.shape[0], : img.shape[1], : img.shape[2]].copy_(img) 75 | 76 | image_sizes = [im.shape[-2:] for im in tensors] 77 | 78 | return ImageList(batched_imgs, image_sizes) 79 | else: 80 | raise TypeError("Unsupported type for to_image_list: {}".format(type(tensors))) 81 | -------------------------------------------------------------------------------- /wetectron/layers/dcn/deform_pool_func.py: -------------------------------------------------------------------------------- 1 | #------------------------------------------------------------------------------ 2 | # Code taken from https://github.com/NVlabs/wetectron 3 | #------------------------------------------------------------------------------ 4 | 5 | import torch 6 | from torch.autograd import Function 7 | from torch.autograd.function import once_differentiable 8 | 9 | from wetectron import _C 10 | 11 | 12 | class DeformRoIPoolingFunction(Function): 13 | 14 | @staticmethod 15 | def forward( 16 | ctx, 17 | data, 18 | rois, 19 | offset, 20 | spatial_scale, 21 | out_size, 22 | out_channels, 23 | no_trans, 24 | group_size=1, 25 | part_size=None, 26 | sample_per_part=4, 27 | trans_std=.0 28 | ): 29 | ctx.spatial_scale = spatial_scale 30 | ctx.out_size = out_size 31 | ctx.out_channels = out_channels 32 | ctx.no_trans = no_trans 33 | ctx.group_size = group_size 34 | ctx.part_size = out_size if part_size is None else part_size 35 | ctx.sample_per_part = sample_per_part 36 | ctx.trans_std = trans_std 37 | 38 | assert 0.0 <= ctx.trans_std <= 1.0 39 | if not data.is_cuda: 40 | raise NotImplementedError 41 | 42 | n = rois.shape[0] 43 | output = data.new_empty(n, out_channels, out_size, out_size) 44 | output_count = data.new_empty(n, out_channels, out_size, out_size) 45 | _C.deform_psroi_pooling_forward( 46 | data, 47 | rois, 48 | offset, 49 | output, 50 | output_count, 51 | ctx.no_trans, 52 | ctx.spatial_scale, 53 | ctx.out_channels, 54 | ctx.group_size, 55 | ctx.out_size, 56 | ctx.part_size, 57 | ctx.sample_per_part, 58 | ctx.trans_std 59 | ) 60 | 61 | if data.requires_grad or rois.requires_grad or offset.requires_grad: 62 | ctx.save_for_backward(data, rois, offset) 63 | ctx.output_count = output_count 64 | 65 | return output 66 | 67 | @staticmethod 68 | @once_differentiable 69 | def backward(ctx, grad_output): 70 | if not grad_output.is_cuda: 71 | raise NotImplementedError 72 | 73 | data, rois, offset = ctx.saved_tensors 74 | output_count = ctx.output_count 75 | grad_input = torch.zeros_like(data) 76 | grad_rois = None 77 | grad_offset = torch.zeros_like(offset) 78 | 79 | _C.deform_psroi_pooling_backward( 80 | grad_output, 81 | data, 82 | rois, 83 | offset, 84 | output_count, 85 | grad_input, 86 | grad_offset, 87 | ctx.no_trans, 88 | ctx.spatial_scale, 89 | ctx.out_channels, 90 | ctx.group_size, 91 | ctx.out_size, 92 | ctx.part_size, 93 | ctx.sample_per_part, 94 | ctx.trans_std 95 | ) 96 | return (grad_input, grad_rois, grad_offset, None, None, None, None, None, None, None, None) 97 | 98 | 99 | deform_roi_pooling = DeformRoIPoolingFunction.apply 100 | -------------------------------------------------------------------------------- /wetectron/modeling/roi_heads/box_head/box_head.py: -------------------------------------------------------------------------------- 1 | #------------------------------------------------------------------------------ 2 | # Code taken from https://github.com/NVlabs/wetectron 3 | #------------------------------------------------------------------------------ 4 | 5 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 6 | import torch 7 | from torch import nn 8 | 9 | from .roi_box_feature_extractors import make_roi_box_feature_extractor 10 | from .roi_box_predictors import make_roi_box_predictor 11 | from .inference import make_roi_box_post_processor 12 | from .loss import make_roi_box_loss_evaluator 13 | 14 | 15 | class ROIBoxHead(torch.nn.Module): 16 | """ 17 | Generic Box Head class. 18 | """ 19 | 20 | def __init__(self, cfg, in_channels): 21 | super(ROIBoxHead, self).__init__() 22 | self.feature_extractor = make_roi_box_feature_extractor(cfg, in_channels) 23 | self.predictor = make_roi_box_predictor( 24 | cfg, self.feature_extractor.out_channels) 25 | self.post_processor = make_roi_box_post_processor(cfg) 26 | self.loss_evaluator = make_roi_box_loss_evaluator(cfg) 27 | 28 | def forward(self, features, proposals, targets=None): 29 | """ 30 | Arguments: 31 | features (list[Tensor]): feature-maps from possibly several levels 32 | proposals (list[BoxList]): proposal boxes 33 | targets (list[BoxList], optional): the ground-truth targets. 34 | 35 | Returns: 36 | x (Tensor): the result of the feature extractor 37 | proposals (list[BoxList]): during training, the subsampled proposals 38 | are returned. During testing, the predicted boxlists are returned 39 | losses (dict[Tensor]): During training, returns the losses for the 40 | head. During testing, returns an empty dict. 41 | """ 42 | if self.training: 43 | # Faster R-CNN subsamples during training the proposals with a fixed 44 | # positive / negative ratio 45 | with torch.no_grad(): 46 | proposals = self.loss_evaluator.subsample(proposals, targets) 47 | 48 | # extract features that will be fed to the final classifier. The 49 | # feature_extractor generally corresponds to the pooler + heads 50 | x = self.feature_extractor(features, proposals) 51 | # final classifier that converts the features into predictions 52 | class_logits, box_regression = self.predictor(x) 53 | 54 | if not self.training: 55 | result = self.post_processor((class_logits, box_regression), proposals) 56 | return x, result, {}, {} 57 | 58 | loss_classifier, loss_box_reg, accuracy_cls = self.loss_evaluator( 59 | [class_logits], [box_regression] 60 | ) 61 | 62 | return ( 63 | x, 64 | proposals, 65 | dict(loss_classifier=loss_classifier, loss_box_reg=loss_box_reg), 66 | dict(accuracy_cls=accuracy_cls) 67 | ) 68 | 69 | 70 | def build_roi_box_head(cfg, in_channels): 71 | """ 72 | Constructs a new box head. 73 | By default, uses ROIBoxHead, but if it turns out not to be enough, just register a new class 74 | and make it a parameter in the config 75 | """ 76 | return ROIBoxHead(cfg, in_channels) 77 | -------------------------------------------------------------------------------- /wetectron/modeling/backbone/backbone.py: -------------------------------------------------------------------------------- 1 | #------------------------------------------------------------------------------ 2 | # Code taken from https://github.com/NVlabs/wetectron 3 | #------------------------------------------------------------------------------ 4 | 5 | # -------------------------------------------------------- 6 | # Copyright (C) 2020 NVIDIA Corporation. All rights reserved. 7 | # Nvidia Source Code License-NC 8 | # -------------------------------------------------------- 9 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 10 | from collections import OrderedDict 11 | 12 | from torch import nn 13 | 14 | from wetectron.modeling import registry 15 | from wetectron.modeling.make_layers import conv_with_kaiming_uniform 16 | from . import fpn as fpn_module 17 | from . import resnet 18 | 19 | 20 | @registry.BACKBONES.register("R-50-C4") 21 | @registry.BACKBONES.register("R-50-C5") 22 | @registry.BACKBONES.register("R-101-C4") 23 | @registry.BACKBONES.register("R-101-C5") 24 | def build_resnet_backbone(cfg): 25 | body = resnet.ResNet(cfg) 26 | model = nn.Sequential(OrderedDict([("body", body)])) 27 | model.out_channels = cfg.MODEL.RESNETS.BACKBONE_OUT_CHANNELS 28 | return model 29 | 30 | 31 | @registry.BACKBONES.register("R-50-FPN") 32 | @registry.BACKBONES.register("R-101-FPN") 33 | @registry.BACKBONES.register("R-152-FPN") 34 | def build_resnet_fpn_backbone(cfg): 35 | body = resnet.ResNet(cfg) 36 | in_channels_stage2 = cfg.MODEL.RESNETS.RES2_OUT_CHANNELS 37 | out_channels = cfg.MODEL.RESNETS.BACKBONE_OUT_CHANNELS 38 | fpn = fpn_module.FPN( 39 | in_channels_list=[ 40 | in_channels_stage2, 41 | in_channels_stage2 * 2, 42 | in_channels_stage2 * 4, 43 | in_channels_stage2 * 8, 44 | ], 45 | out_channels=out_channels, 46 | conv_block=conv_with_kaiming_uniform( 47 | cfg.MODEL.FPN.USE_GN, cfg.MODEL.FPN.USE_RELU 48 | ), 49 | top_blocks=fpn_module.LastLevelMaxPool(), 50 | ) 51 | model = nn.Sequential(OrderedDict([("body", body), ("fpn", fpn)])) 52 | model.out_channels = out_channels 53 | return model 54 | 55 | 56 | @registry.BACKBONES.register("R-50-FPN-RETINANET") 57 | @registry.BACKBONES.register("R-101-FPN-RETINANET") 58 | def build_resnet_fpn_p3p7_backbone(cfg): 59 | body = resnet.ResNet(cfg) 60 | in_channels_stage2 = cfg.MODEL.RESNETS.RES2_OUT_CHANNELS 61 | out_channels = cfg.MODEL.RESNETS.BACKBONE_OUT_CHANNELS 62 | in_channels_p6p7 = in_channels_stage2 * 8 if cfg.MODEL.RETINANET.USE_C5 \ 63 | else out_channels 64 | fpn = fpn_module.FPN( 65 | in_channels_list=[ 66 | 0, 67 | in_channels_stage2 * 2, 68 | in_channels_stage2 * 4, 69 | in_channels_stage2 * 8, 70 | ], 71 | out_channels=out_channels, 72 | conv_block=conv_with_kaiming_uniform( 73 | cfg.MODEL.FPN.USE_GN, cfg.MODEL.FPN.USE_RELU 74 | ), 75 | top_blocks=fpn_module.LastLevelP6P7(in_channels_p6p7, out_channels), 76 | ) 77 | model = nn.Sequential(OrderedDict([("body", body), ("fpn", fpn)])) 78 | model.out_channels = out_channels 79 | return model 80 | 81 | 82 | def build_backbone(cfg): 83 | assert cfg.MODEL.BACKBONE.CONV_BODY in registry.BACKBONES, \ 84 | "cfg.MODEL.BACKBONE.CONV_BODY: {} are not registered in registry".format( 85 | cfg.MODEL.BACKBONE.CONV_BODY 86 | ) 87 | return registry.BACKBONES[cfg.MODEL.BACKBONE.CONV_BODY](cfg) 88 | -------------------------------------------------------------------------------- /wetectron/utils/miscellaneous.py: -------------------------------------------------------------------------------- 1 | #------------------------------------------------------------------------------ 2 | # Code adapted from https://github.com/NVlabs/wetectron 3 | # by Huy V. Vo and Oriane Simeoni 4 | # INRIA, Valeo.ai 5 | #------------------------------------------------------------------------------ 6 | 7 | # -------------------------------------------------------- 8 | # Copyright (C) 2020 NVIDIA Corporation. All rights reserved. 9 | # Nvidia Source Code License-NC 10 | # -------------------------------------------------------- 11 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 12 | import errno 13 | import json 14 | import logging 15 | import os 16 | import shutil 17 | import socket 18 | import datetime 19 | from .comm import is_main_process 20 | from datetime import datetime 21 | import random 22 | import numpy as np 23 | import torch 24 | from pathlib import Path 25 | 26 | def get_run_name(): 27 | """ A unique name for each run """ 28 | return datetime.now().strftime( 29 | '%b%d_%H_%M_%S') + '_' + socket.gethostname() 30 | 31 | def copy_source_code(output_dir): 32 | os.makedirs(output_dir) 33 | p = Path(__file__).parents[2] 34 | checklist = ['apex', 'build', 'configs', 'setup.py', 35 | 'tools', 'wetectron', 'wetectron.egg-info'] 36 | except_list = ['LICENSE', 'outputs', 'README.md', 37 | 'datasets', 'docs', 'proposal', 'notebooks'] 38 | Fs = os.listdir(p) 39 | assert(set(checklist).issubset(set(Fs))) 40 | to_copy = [el for el in Fs if el not in except_list] 41 | # return 42 | for f in to_copy: 43 | if Path(p,f).is_dir(): 44 | shutil.copytree(Path(p,f), Path(output_dir,f), symlinks=True) 45 | else: 46 | shutil.copyfile(Path(p,f), Path(output_dir,f), follow_symlinks=False) 47 | 48 | def seed_all_rng(seed=None): 49 | """ 50 | Set the random seed for the RNG in torch, numpy and python. 51 | Args: 52 | seed (int): if None, will use a strong random seed. 53 | """ 54 | if seed is None: 55 | seed = ( 56 | os.getpid() 57 | + int(datetime.now().strftime("%S%f")) 58 | + int.from_bytes(os.urandom(2), "big") 59 | ) 60 | logger = logging.getLogger(__name__) 61 | logger.info("Using a generated random seed {}".format(seed)) 62 | np.random.seed(seed) 63 | torch.set_rng_state(torch.manual_seed(seed).get_state()) 64 | random.seed(seed) 65 | 66 | def mkdir(path): 67 | try: 68 | os.makedirs(path) 69 | except OSError as e: 70 | if e.errno != errno.EEXIST: 71 | raise 72 | 73 | 74 | def save_labels(dataset_list, output_dir): 75 | if is_main_process(): 76 | logger = logging.getLogger(__name__) 77 | 78 | ids_to_labels = {} 79 | for dataset in dataset_list: 80 | if hasattr(dataset, 'categories'): 81 | ids_to_labels.update(dataset.categories) 82 | else: 83 | logger.warning("Dataset [{}] has no categories attribute, labels.json file won't be created".format( 84 | dataset.__class__.__name__)) 85 | 86 | if ids_to_labels: 87 | labels_file = os.path.join(output_dir, 'labels.json') 88 | logger.info("Saving labels mapping into {}".format(labels_file)) 89 | with open(labels_file, 'w') as f: 90 | json.dump(ids_to_labels, f, indent=2) 91 | 92 | 93 | def save_config(cfg, path): 94 | if is_main_process(): 95 | with open(path, 'w') as f: 96 | f.write(cfg.dump()) 97 | -------------------------------------------------------------------------------- /wetectron/modeling/roi_heads/mask_head/mask_head.py: -------------------------------------------------------------------------------- 1 | #------------------------------------------------------------------------------ 2 | # Code taken from https://github.com/NVlabs/wetectron 3 | #------------------------------------------------------------------------------ 4 | 5 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 6 | import torch 7 | from torch import nn 8 | 9 | from wetectron.structures.bounding_box import BoxList 10 | 11 | from .roi_mask_feature_extractors import make_roi_mask_feature_extractor 12 | from .roi_mask_predictors import make_roi_mask_predictor 13 | from .inference import make_roi_mask_post_processor 14 | from .loss import make_roi_mask_loss_evaluator 15 | 16 | 17 | def keep_only_positive_boxes(boxes): 18 | """ 19 | Given a set of BoxList containing the `labels` field, 20 | return a set of BoxList for which `labels > 0`. 21 | 22 | Arguments: 23 | boxes (list of BoxList) 24 | """ 25 | assert isinstance(boxes, (list, tuple)) 26 | assert isinstance(boxes[0], BoxList) 27 | assert boxes[0].has_field("labels") 28 | positive_boxes = [] 29 | positive_inds = [] 30 | num_boxes = 0 31 | for boxes_per_image in boxes: 32 | labels = boxes_per_image.get_field("labels") 33 | inds_mask = labels > 0 34 | inds = inds_mask.nonzero().squeeze(1) 35 | positive_boxes.append(boxes_per_image[inds]) 36 | positive_inds.append(inds_mask) 37 | return positive_boxes, positive_inds 38 | 39 | 40 | class ROIMaskHead(torch.nn.Module): 41 | def __init__(self, cfg, in_channels): 42 | super(ROIMaskHead, self).__init__() 43 | self.cfg = cfg.clone() 44 | self.feature_extractor = make_roi_mask_feature_extractor(cfg, in_channels) 45 | self.predictor = make_roi_mask_predictor( 46 | cfg, self.feature_extractor.out_channels) 47 | self.post_processor = make_roi_mask_post_processor(cfg) 48 | self.loss_evaluator = make_roi_mask_loss_evaluator(cfg) 49 | 50 | def forward(self, features, proposals, targets=None): 51 | """ 52 | Arguments: 53 | features (list[Tensor]): feature-maps from possibly several levels 54 | proposals (list[BoxList]): proposal boxes 55 | targets (list[BoxList], optional): the ground-truth targets. 56 | 57 | Returns: 58 | x (Tensor): the result of the feature extractor 59 | proposals (list[BoxList]): during training, the original proposals 60 | are returned. During testing, the predicted boxlists are returned 61 | with the `mask` field set 62 | losses (dict[Tensor]): During training, returns the losses for the 63 | head. During testing, returns an empty dict. 64 | """ 65 | 66 | if self.training: 67 | # during training, only focus on positive boxes 68 | all_proposals = proposals 69 | proposals, positive_inds = keep_only_positive_boxes(proposals) 70 | if self.training and self.cfg.MODEL.ROI_MASK_HEAD.SHARE_BOX_FEATURE_EXTRACTOR: 71 | x = features 72 | x = x[torch.cat(positive_inds, dim=0)] 73 | else: 74 | x = self.feature_extractor(features, proposals) 75 | mask_logits = self.predictor(x) 76 | 77 | if not self.training: 78 | result = self.post_processor(mask_logits, proposals) 79 | return x, result, {} 80 | 81 | loss_mask = self.loss_evaluator(proposals, mask_logits, targets) 82 | 83 | return x, all_proposals, dict(loss_mask=loss_mask) 84 | 85 | 86 | def build_roi_mask_head(cfg, in_channels): 87 | return ROIMaskHead(cfg, in_channels) 88 | -------------------------------------------------------------------------------- /wetectron/utils/model_zoo.py: -------------------------------------------------------------------------------- 1 | #------------------------------------------------------------------------------ 2 | # Code taken from https://github.com/NVlabs/wetectron 3 | #------------------------------------------------------------------------------ 4 | 5 | # -------------------------------------------------------- 6 | # Copyright (C) 2020 NVIDIA Corporation. All rights reserved. 7 | # Nvidia Source Code License-NC 8 | # -------------------------------------------------------- 9 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 10 | import os 11 | import sys 12 | 13 | try: 14 | from torch.hub import _download_url_to_file 15 | from torch.hub import urlparse 16 | from torch.hub import HASH_REGEX 17 | except ImportError: 18 | from torch.utils.model_zoo import _download_url_to_file 19 | from torch.utils.model_zoo import urlparse 20 | from torch.utils.model_zoo import HASH_REGEX 21 | 22 | from wetectron.utils.comm import is_main_process 23 | from wetectron.utils.comm import synchronize 24 | 25 | 26 | # very similar to https://github.com/pytorch/pytorch/blob/master/torch/utils/model_zoo.py 27 | # but with a few improvements and modifications 28 | def cache_url(url, model_dir=None, progress=True): 29 | r"""Loads the Torch serialized object at the given URL. 30 | If the object is already present in `model_dir`, it's deserialized and 31 | returned. The filename part of the URL should follow the naming convention 32 | ``filename-.ext`` where ```` is the first eight or more 33 | digits of the SHA256 hash of the contents of the file. The hash is used to 34 | ensure unique names and to verify the contents of the file. 35 | The default value of `model_dir` is ``$TORCH_HOME/models`` where 36 | ``$TORCH_HOME`` defaults to ``~/.torch``. The default directory can be 37 | overridden with the ``$TORCH_MODEL_ZOO`` environment variable. 38 | Args: 39 | url (string): URL of the object to download 40 | model_dir (string, optional): directory in which to save the object 41 | progress (bool, optional): whether or not to display a progress bar to stderr 42 | Example: 43 | >>> cached_file = wetectron.utils.model_zoo.cache_url('https://s3.amazonaws.com/pytorch/models/resnet18-5c106cde.pth') 44 | """ 45 | if model_dir is None: 46 | torch_home = os.path.expanduser(os.getenv("TORCH_HOME", "~/.torch")) 47 | model_dir = os.getenv("TORCH_MODEL_ZOO", os.path.join(torch_home, "models")) 48 | if not os.path.exists(model_dir): 49 | os.makedirs(model_dir) 50 | parts = urlparse(url) 51 | filename = os.path.basename(parts.path) 52 | if filename == "model_final.pkl" or filename == "rpn_proposals.pkl": 53 | # workaround as pre-trained Caffe2 models from Detectron have all the same filename 54 | # so make the full path the filename by replacing / with _ 55 | filename = parts.path.replace("/", "_") 56 | cached_file = os.path.join(model_dir, filename) 57 | if not os.path.exists(cached_file) and is_main_process(): 58 | sys.stderr.write('Downloading: "{}" to {}\n'.format(url, cached_file)) 59 | hash_prefix = HASH_REGEX.search(filename) 60 | if hash_prefix is not None: 61 | hash_prefix = hash_prefix.group(1) 62 | # workaround: Caffe2 models don't have a hash, but follow the R-50 convention, 63 | # which matches the hash PyTorch uses. So we skip the hash matching 64 | # if the hash_prefix is less than 6 characters 65 | if len(hash_prefix) < 6: 66 | hash_prefix = None 67 | _download_url_to_file(url, cached_file, hash_prefix, progress=progress) 68 | synchronize() 69 | return cached_file 70 | -------------------------------------------------------------------------------- /wetectron/modeling/balanced_positive_negative_sampler.py: -------------------------------------------------------------------------------- 1 | #------------------------------------------------------------------------------ 2 | # Code adapted from https://github.com/NVlabs/wetectron 3 | # by Huy V. Vo and Oriane Simeoni 4 | # INRIA, Valeo.ai 5 | #------------------------------------------------------------------------------ 6 | 7 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 8 | import torch 9 | 10 | 11 | class BalancedPositiveNegativeSampler(object): 12 | """ 13 | This class samples batches, ensuring that they contain a fixed proportion of positives 14 | """ 15 | 16 | def __init__(self, batch_size_per_image, positive_fraction): 17 | """ 18 | Arguments: 19 | batch_size_per_image (int): number of elements to be selected per image 20 | positive_fraction (float): percentace of positive elements per batch 21 | """ 22 | self.batch_size_per_image = batch_size_per_image 23 | self.positive_fraction = positive_fraction 24 | 25 | def __call__(self, matched_idxs, sampling_weights=None): 26 | """ 27 | Arguments: 28 | matched idxs: list of tensors containing -1, 0 or positive values. 29 | Each tensor corresponds to a specific image. 30 | -1 values are ignored, 0 are considered as negatives and > 0 as 31 | positives. 32 | sampling_weights: list of tensors or None, containing sampling weights of proposals. 33 | The weights are only used in negative sampling. 34 | 35 | Returns: 36 | pos_idx (list[tensor]) 37 | neg_idx (list[tensor]) 38 | 39 | Returns two lists of binary masks for each image. 40 | The first list contains the positive elements that were selected, 41 | and the second list the negative example. 42 | """ 43 | pos_idx = [] 44 | neg_idx = [] 45 | if sampling_weights is None: 46 | sampling_weights = [None] * len(matched_idxs) 47 | for matched_idxs_per_image, sampling_weights_per_image in zip(matched_idxs, sampling_weights): 48 | positive = torch.nonzero(matched_idxs_per_image >= 1, as_tuple=False).squeeze(1) 49 | negative = torch.nonzero(matched_idxs_per_image == 0, as_tuple=False).squeeze(1) 50 | 51 | num_pos = int(self.batch_size_per_image * self.positive_fraction) 52 | # protect against not enough positive examples 53 | num_pos = min(positive.numel(), num_pos) 54 | num_neg = self.batch_size_per_image - num_pos 55 | # protect against not enough negative examples 56 | num_neg = min(negative.numel(), num_neg) 57 | 58 | # randomly select positive and negative examples 59 | perm1 = torch.randperm(positive.numel(), device=positive.device)[:num_pos] 60 | if sampling_weights_per_image is None: 61 | perm2 = torch.randperm(negative.numel(), device=negative.device)[:num_neg] 62 | else: 63 | perm2 = torch.argsort(sampling_weights_per_image[negative], descending=True)[:num_neg] 64 | 65 | pos_idx_per_image = positive[perm1] 66 | neg_idx_per_image = negative[perm2] 67 | 68 | # create binary mask from indices 69 | pos_idx_per_image_mask = torch.zeros_like( 70 | matched_idxs_per_image, dtype=torch.bool 71 | ) 72 | neg_idx_per_image_mask = torch.zeros_like( 73 | matched_idxs_per_image, dtype=torch.bool 74 | ) 75 | pos_idx_per_image_mask[pos_idx_per_image] = 1 76 | neg_idx_per_image_mask[neg_idx_per_image] = 1 77 | 78 | pos_idx.append(pos_idx_per_image_mask) 79 | neg_idx.append(neg_idx_per_image_mask) 80 | 81 | return pos_idx, neg_idx 82 | -------------------------------------------------------------------------------- /wetectron/modeling/box_coder.py: -------------------------------------------------------------------------------- 1 | #------------------------------------------------------------------------------ 2 | # Code taken from https://github.com/NVlabs/wetectron 3 | #------------------------------------------------------------------------------ 4 | 5 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 6 | import math 7 | 8 | import torch 9 | 10 | 11 | class BoxCoder(object): 12 | """ 13 | This class encodes and decodes a set of bounding boxes into 14 | the representation used for training the regressors. 15 | """ 16 | 17 | def __init__(self, weights, bbox_xform_clip=math.log(1000. / 16)): 18 | """ 19 | Arguments: 20 | weights (4-element tuple) 21 | bbox_xform_clip (float) 22 | """ 23 | self.weights = weights 24 | self.bbox_xform_clip = bbox_xform_clip 25 | 26 | def encode(self, reference_boxes, proposals): 27 | """ 28 | Encode a set of proposals with respect to some 29 | reference boxes 30 | 31 | Arguments: 32 | reference_boxes (Tensor): reference boxes 33 | proposals (Tensor): boxes to be encoded 34 | """ 35 | 36 | TO_REMOVE = 1 # TODO remove 37 | ex_widths = proposals[:, 2] - proposals[:, 0] + TO_REMOVE 38 | ex_heights = proposals[:, 3] - proposals[:, 1] + TO_REMOVE 39 | ex_ctr_x = proposals[:, 0] + 0.5 * ex_widths 40 | ex_ctr_y = proposals[:, 1] + 0.5 * ex_heights 41 | 42 | gt_widths = reference_boxes[:, 2] - reference_boxes[:, 0] + TO_REMOVE 43 | gt_heights = reference_boxes[:, 3] - reference_boxes[:, 1] + TO_REMOVE 44 | gt_ctr_x = reference_boxes[:, 0] + 0.5 * gt_widths 45 | gt_ctr_y = reference_boxes[:, 1] + 0.5 * gt_heights 46 | 47 | wx, wy, ww, wh = self.weights 48 | targets_dx = wx * (gt_ctr_x - ex_ctr_x) / ex_widths 49 | targets_dy = wy * (gt_ctr_y - ex_ctr_y) / ex_heights 50 | targets_dw = ww * torch.log(gt_widths / ex_widths) 51 | targets_dh = wh * torch.log(gt_heights / ex_heights) 52 | 53 | targets = torch.stack((targets_dx, targets_dy, targets_dw, targets_dh), dim=1) 54 | return targets 55 | 56 | def decode(self, rel_codes, boxes): 57 | """ 58 | From a set of original boxes and encoded relative box offsets, 59 | get the decoded boxes. 60 | 61 | Arguments: 62 | rel_codes (Tensor): encoded boxes 63 | boxes (Tensor): reference boxes. 64 | """ 65 | 66 | boxes = boxes.to(rel_codes.dtype) 67 | 68 | TO_REMOVE = 1 # TODO remove 69 | widths = boxes[:, 2] - boxes[:, 0] + TO_REMOVE 70 | heights = boxes[:, 3] - boxes[:, 1] + TO_REMOVE 71 | ctr_x = boxes[:, 0] + 0.5 * widths 72 | ctr_y = boxes[:, 1] + 0.5 * heights 73 | 74 | wx, wy, ww, wh = self.weights 75 | dx = rel_codes[:, 0::4] / wx 76 | dy = rel_codes[:, 1::4] / wy 77 | dw = rel_codes[:, 2::4] / ww 78 | dh = rel_codes[:, 3::4] / wh 79 | 80 | # Prevent sending too large values into torch.exp() 81 | dw = torch.clamp(dw, max=self.bbox_xform_clip) 82 | dh = torch.clamp(dh, max=self.bbox_xform_clip) 83 | 84 | pred_ctr_x = dx * widths[:, None] + ctr_x[:, None] 85 | pred_ctr_y = dy * heights[:, None] + ctr_y[:, None] 86 | pred_w = torch.exp(dw) * widths[:, None] 87 | pred_h = torch.exp(dh) * heights[:, None] 88 | 89 | pred_boxes = torch.zeros_like(rel_codes) 90 | # x1 91 | pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * pred_w 92 | # y1 93 | pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * pred_h 94 | # x2 (note: "- 1" is correct; don't be fooled by the asymmetry) 95 | pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * pred_w - 1 96 | # y2 (note: "- 1" is correct; don't be fooled by the asymmetry) 97 | pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * pred_h - 1 98 | 99 | return pred_boxes 100 | -------------------------------------------------------------------------------- /wetectron/modeling/roi_heads/roi_heads.py: -------------------------------------------------------------------------------- 1 | #------------------------------------------------------------------------------ 2 | # Code taken from https://github.com/NVlabs/wetectron 3 | #------------------------------------------------------------------------------ 4 | 5 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 6 | import torch 7 | 8 | from .box_head.box_head import build_roi_box_head 9 | from .mask_head.mask_head import build_roi_mask_head 10 | from .keypoint_head.keypoint_head import build_roi_keypoint_head 11 | from .weak_head.weak_head import build_roi_weak_head 12 | 13 | 14 | class CombinedROIHeads(torch.nn.ModuleDict): 15 | """ 16 | Combines a set of individual heads (for box prediction or masks) into a single 17 | head. 18 | """ 19 | 20 | def __init__(self, cfg, heads): 21 | super(CombinedROIHeads, self).__init__(heads) 22 | self.cfg = cfg.clone() 23 | if cfg.MODEL.MASK_ON and cfg.MODEL.ROI_MASK_HEAD.SHARE_BOX_FEATURE_EXTRACTOR: 24 | self.mask.feature_extractor = self.box.feature_extractor 25 | if cfg.MODEL.KEYPOINT_ON and cfg.MODEL.ROI_KEYPOINT_HEAD.SHARE_BOX_FEATURE_EXTRACTOR: 26 | self.keypoint.feature_extractor = self.box.feature_extractor 27 | 28 | def forward(self, features, proposals, targets=None, dummy_model=None): 29 | losses = {} 30 | metrics = {} 31 | # TODO rename x to roi_box_features, if it doesn't increase memory consumption 32 | x, detections, loss_box, accuracy_cls = self.box(features, proposals, targets) 33 | losses.update(loss_box) 34 | metrics.update(accuracy_cls) 35 | if self.cfg.MODEL.MASK_ON: 36 | mask_features = features 37 | # optimization: during training, if we share the feature extractor between 38 | # the box and the mask heads, then we can reuse the features already computed 39 | if ( 40 | self.training 41 | and self.cfg.MODEL.ROI_MASK_HEAD.SHARE_BOX_FEATURE_EXTRACTOR 42 | ): 43 | mask_features = x 44 | # During training, self.box() will return the unaltered proposals as "detections" 45 | # this makes the API consistent during training and testing 46 | x, detections, loss_mask = self.mask(mask_features, detections, targets) 47 | losses.update(loss_mask) 48 | 49 | if self.cfg.MODEL.KEYPOINT_ON: 50 | keypoint_features = features 51 | # optimization: during training, if we share the feature extractor between 52 | # the box and the mask heads, then we can reuse the features already computed 53 | if ( 54 | self.training 55 | and self.cfg.MODEL.ROI_KEYPOINT_HEAD.SHARE_BOX_FEATURE_EXTRACTOR 56 | ): 57 | keypoint_features = x 58 | # During training, self.box() will return the unaltered proposals as "detections" 59 | # this makes the API consistent during training and testing 60 | x, detections, loss_keypoint = self.keypoint(keypoint_features, detections, targets) 61 | losses.update(loss_keypoint) 62 | return x, detections, losses, metrics 63 | 64 | 65 | def build_roi_heads(cfg, in_channels): 66 | # individually create the heads, that will be combined together 67 | # afterwards 68 | roi_heads = [] 69 | if cfg.MODEL.RETINANET_ON: 70 | return [] 71 | 72 | if cfg.MODEL.WSOD_ON: 73 | return build_roi_weak_head(cfg, in_channels) 74 | 75 | if not cfg.MODEL.RPN_ONLY: 76 | roi_heads.append(("box", build_roi_box_head(cfg, in_channels))) 77 | 78 | if cfg.MODEL.MASK_ON: 79 | roi_heads.append(("mask", build_roi_mask_head(cfg, in_channels))) 80 | 81 | if cfg.MODEL.KEYPOINT_ON: 82 | roi_heads.append(("keypoint", build_roi_keypoint_head(cfg, in_channels))) 83 | 84 | # combine individual heads in a single module 85 | if roi_heads: 86 | roi_heads = CombinedROIHeads(cfg, roi_heads) 87 | 88 | return roi_heads 89 | -------------------------------------------------------------------------------- /wetectron/data/samplers/distributed.py: -------------------------------------------------------------------------------- 1 | #------------------------------------------------------------------------------ 2 | # Code adapted from https://github.com/NVlabs/wetectron 3 | # by Huy V. Vo and Oriane Simeoni 4 | # INRIA, Valeo.ai 5 | #------------------------------------------------------------------------------ 6 | 7 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 8 | # Code is copy-pasted exactly as in torch.utils.data.distributed. 9 | # FIXME remove this once c10d fixes the bug it has 10 | import math 11 | import torch 12 | import torch.distributed as dist 13 | from torch.utils.data.sampler import Sampler 14 | 15 | 16 | class DistributedSampler(Sampler): 17 | """ 18 | Sampler that restricts data loading to a subset of the dataset. 19 | It is especially useful in conjunction with 20 | :class:`torch.nn.parallel.DistributedDataParallel`. In such case, each 21 | process can pass a DistributedSampler instance as a DataLoader sampler, 22 | and load a subset of the original dataset that is exclusive to it. 23 | .. note:: 24 | Dataset is assumed to be of constant size. 25 | Arguments: 26 | dataset: Dataset used for sampling. 27 | num_replicas (optional): Number of processes participating in 28 | distributed training. 29 | rank (optional): Rank of the current process within num_replicas. 30 | """ 31 | 32 | def __init__(self, dataset, num_replicas=None, rank=None, shuffle=True): 33 | if num_replicas is None: 34 | if not dist.is_available(): 35 | raise RuntimeError("Requires distributed package to be available") 36 | num_replicas = dist.get_world_size() 37 | if rank is None: 38 | if not dist.is_available(): 39 | raise RuntimeError("Requires distributed package to be available") 40 | rank = dist.get_rank() 41 | self.dataset = dataset 42 | self.num_replicas = num_replicas 43 | self.rank = rank 44 | self.epoch = 0 45 | self.num_samples = int(math.ceil(len(self.dataset) * 1.0 / self.num_replicas)) 46 | self.total_size = self.num_samples * self.num_replicas 47 | self.shuffle = shuffle 48 | 49 | def __iter__(self): 50 | if self.shuffle: 51 | # deterministically shuffle based on epoch 52 | g = torch.Generator() 53 | g.manual_seed(self.epoch) 54 | indices = torch.randperm(len(self.dataset), generator=g).tolist() 55 | else: 56 | indices = torch.arange(len(self.dataset)).tolist() 57 | 58 | # add extra samples to make it evenly divisible 59 | indices += indices[: (self.total_size - len(indices))] 60 | assert len(indices) == self.total_size 61 | 62 | # subsample 63 | offset = self.num_samples * self.rank 64 | indices = indices[offset : offset + self.num_samples] 65 | assert len(indices) == self.num_samples 66 | 67 | return iter(indices) 68 | 69 | def __len__(self): 70 | return self.num_samples 71 | 72 | def set_epoch(self, epoch): 73 | self.epoch = epoch 74 | 75 | class WeightedDistributedSampler(DistributedSampler): 76 | """ 77 | A modified version of DistributedSampler to take into account the diffierent 78 | sampling weights of datapoints. 79 | """ 80 | def __init__(self, dataset, weights, num_replicas=None, rank=None, shuffle=True): 81 | super(WeightedDistributedSampler, self).__init__( 82 | dataset, num_replicas=None, rank=None, shuffle=True 83 | ) 84 | self.weights = torch.as_tensor(weights, dtype=torch.double) 85 | 86 | def __iter__(self): 87 | indices = torch.multinomial(self.weights, len(self.dataset), True).tolist() 88 | 89 | # add extra samples to make it evenly divisible 90 | indices += indices[: (self.total_size - len(indices))] 91 | assert len(indices) == self.total_size 92 | 93 | # subsample 94 | offset = self.num_samples * self.rank 95 | indices = indices[offset : offset + self.num_samples] 96 | assert len(indices) == self.num_samples 97 | 98 | return iter(indices) 99 | -------------------------------------------------------------------------------- /wetectron/modeling/rpn/retinanet/loss.py: -------------------------------------------------------------------------------- 1 | #------------------------------------------------------------------------------ 2 | # Code taken from https://github.com/NVlabs/wetectron 3 | #------------------------------------------------------------------------------ 4 | 5 | """ 6 | This file contains specific functions for computing losses on the RetinaNet 7 | file 8 | """ 9 | 10 | import torch 11 | from torch.nn import functional as F 12 | 13 | from ..utils import concat_box_prediction_layers 14 | 15 | from wetectron.layers import smooth_l1_loss 16 | from wetectron.layers import SigmoidFocalLoss 17 | from wetectron.modeling.matcher import Matcher 18 | from wetectron.modeling.utils import cat 19 | from wetectron.structures.boxlist_ops import boxlist_iou 20 | from wetectron.structures.boxlist_ops import cat_boxlist 21 | from wetectron.modeling.rpn.loss import RPNLossComputation 22 | 23 | class RetinaNetLossComputation(RPNLossComputation): 24 | """ 25 | This class computes the RetinaNet loss. 26 | """ 27 | 28 | def __init__(self, proposal_matcher, box_coder, 29 | generate_labels_func, 30 | sigmoid_focal_loss, 31 | bbox_reg_beta=0.11, 32 | regress_norm=1.0): 33 | """ 34 | Arguments: 35 | proposal_matcher (Matcher) 36 | box_coder (BoxCoder) 37 | """ 38 | self.proposal_matcher = proposal_matcher 39 | self.box_coder = box_coder 40 | self.box_cls_loss_func = sigmoid_focal_loss 41 | self.bbox_reg_beta = bbox_reg_beta 42 | self.copied_fields = ['labels'] 43 | self.generate_labels_func = generate_labels_func 44 | self.discard_cases = ['between_thresholds'] 45 | self.regress_norm = regress_norm 46 | 47 | def __call__(self, anchors, box_cls, box_regression, targets): 48 | """ 49 | Arguments: 50 | anchors (list[BoxList]) 51 | box_cls (list[Tensor]) 52 | box_regression (list[Tensor]) 53 | targets (list[BoxList]) 54 | 55 | Returns: 56 | retinanet_cls_loss (Tensor) 57 | retinanet_regression_loss (Tensor 58 | """ 59 | anchors = [cat_boxlist(anchors_per_image) for anchors_per_image in anchors] 60 | labels, regression_targets = self.prepare_targets(anchors, targets) 61 | 62 | N = len(labels) 63 | box_cls, box_regression = \ 64 | concat_box_prediction_layers(box_cls, box_regression) 65 | 66 | labels = torch.cat(labels, dim=0) 67 | regression_targets = torch.cat(regression_targets, dim=0) 68 | pos_inds = torch.nonzero(labels > 0).squeeze(1) 69 | 70 | retinanet_regression_loss = smooth_l1_loss( 71 | box_regression[pos_inds], 72 | regression_targets[pos_inds], 73 | beta=self.bbox_reg_beta, 74 | size_average=False, 75 | ) / (max(1, pos_inds.numel() * self.regress_norm)) 76 | 77 | labels = labels.int() 78 | 79 | retinanet_cls_loss = self.box_cls_loss_func( 80 | box_cls, 81 | labels 82 | ) / (pos_inds.numel() + N) 83 | 84 | return retinanet_cls_loss, retinanet_regression_loss 85 | 86 | 87 | def generate_retinanet_labels(matched_targets): 88 | labels_per_image = matched_targets.get_field("labels") 89 | return labels_per_image 90 | 91 | 92 | def make_retinanet_loss_evaluator(cfg, box_coder): 93 | matcher = Matcher( 94 | cfg.MODEL.RETINANET.FG_IOU_THRESHOLD, 95 | cfg.MODEL.RETINANET.BG_IOU_THRESHOLD, 96 | allow_low_quality_matches=True, 97 | ) 98 | sigmoid_focal_loss = SigmoidFocalLoss( 99 | cfg.MODEL.RETINANET.LOSS_GAMMA, 100 | cfg.MODEL.RETINANET.LOSS_ALPHA 101 | ) 102 | 103 | loss_evaluator = RetinaNetLossComputation( 104 | matcher, 105 | box_coder, 106 | generate_retinanet_labels, 107 | sigmoid_focal_loss, 108 | bbox_reg_beta = cfg.MODEL.RETINANET.BBOX_REG_BETA, 109 | regress_norm = cfg.MODEL.RETINANET.BBOX_REG_WEIGHT, 110 | ) 111 | return loss_evaluator 112 | -------------------------------------------------------------------------------- /wetectron/utils/model_serialization.py: -------------------------------------------------------------------------------- 1 | #------------------------------------------------------------------------------ 2 | # Code taken from https://github.com/NVlabs/wetectron 3 | #------------------------------------------------------------------------------ 4 | 5 | # -------------------------------------------------------- 6 | # Copyright (C) 2020 NVIDIA Corporation. All rights reserved. 7 | # Nvidia Source Code License-NC 8 | # -------------------------------------------------------- 9 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 10 | from collections import OrderedDict 11 | import logging 12 | import torch 13 | 14 | from wetectron.utils.imports import import_file 15 | 16 | 17 | def align_and_update_state_dicts(model_state_dict, loaded_state_dict): 18 | """ 19 | Strategy: suppose that the models that we will create will have prefixes appended 20 | to each of its keys, for example due to an extra level of nesting that the original 21 | pre-trained weights from ImageNet won't contain. For example, model.state_dict() 22 | might return backbone[0].body.res2.conv1.weight, while the pre-trained model contains 23 | res2.conv1.weight. We thus want to match both parameters together. 24 | For that, we look for each model weight, look among all loaded keys if there is one 25 | that is a suffix of the current weight name, and use it if that's the case. 26 | If multiple matches exist, take the one with longest size 27 | of the corresponding name. For example, for the same model as before, the pretrained 28 | weight file can contain both res2.conv1.weight, as well as conv1.weight. In this case, 29 | we want to match backbone[0].body.conv1.weight to conv1.weight, and 30 | backbone[0].body.res2.conv1.weight to res2.conv1.weight. 31 | """ 32 | current_keys = sorted(list(model_state_dict.keys())) 33 | loaded_keys = sorted(list(loaded_state_dict.keys())) 34 | # get a matrix of string matches, where each (i, j) entry correspond to the size of the 35 | # loaded_key string, if it matches 36 | match_matrix = [ 37 | len(j) if i.endswith(j) else 0 for i in current_keys for j in loaded_keys 38 | ] 39 | match_matrix = torch.as_tensor(match_matrix).view( 40 | len(current_keys), len(loaded_keys) 41 | ) 42 | max_match_size, idxs = match_matrix.max(1) 43 | # remove indices that correspond to no-match 44 | idxs[max_match_size == 0] = -1 45 | 46 | # used for logging 47 | max_size = max([len(key) for key in current_keys]) if current_keys else 1 48 | max_size_loaded = max([len(key) for key in loaded_keys]) if loaded_keys else 1 49 | log_str_template = "{: <{}} loaded from {: <{}} of shape {}" 50 | logger = logging.getLogger(__name__) 51 | for idx_new, idx_old in enumerate(idxs.tolist()): 52 | if idx_old == -1: 53 | continue 54 | key = current_keys[idx_new] 55 | key_old = loaded_keys[idx_old] 56 | model_state_dict[key] = loaded_state_dict[key_old] 57 | logger.info( 58 | log_str_template.format( 59 | key, 60 | max_size, 61 | key_old, 62 | max_size_loaded, 63 | tuple(loaded_state_dict[key_old].shape), 64 | ) 65 | ) 66 | 67 | 68 | def strip_prefix_if_present(state_dict, prefix): 69 | keys = sorted(state_dict.keys()) 70 | if not all(key.startswith(prefix) for key in keys): 71 | return state_dict 72 | stripped_state_dict = OrderedDict() 73 | for key, value in state_dict.items(): 74 | stripped_state_dict[key.replace(prefix, "")] = value 75 | return stripped_state_dict 76 | 77 | 78 | def load_state_dict(model, loaded_state_dict): 79 | model_state_dict = model.state_dict() 80 | # if the state_dict comes from a model that was wrapped in a 81 | # DataParallel or DistributedDataParallel during serialization, 82 | # remove the "module" prefix before performing the matching 83 | loaded_state_dict = strip_prefix_if_present(loaded_state_dict, prefix="module.") 84 | align_and_update_state_dicts(model_state_dict, loaded_state_dict) 85 | 86 | # use strict loading 87 | model.load_state_dict(model_state_dict) 88 | -------------------------------------------------------------------------------- /wetectron/csrc/cuda/deform_pool_cuda.cu: -------------------------------------------------------------------------------- 1 | //------------------------------------------------------------------------------ 2 | // Code taken from https://github.com/NVlabs/wetectron 3 | //------------------------------------------------------------------------------ 4 | 5 | // modify from 6 | // https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/blob/mmdetection/mmdet/ops/dcn/src/modulated_dcn_cuda.c 7 | 8 | // based on 9 | // author: Charles Shang 10 | // https://github.com/torch/cunn/blob/master/lib/THCUNN/generic/SpatialConvolutionMM.cu 11 | 12 | #include 13 | #include 14 | 15 | #include 16 | #include 17 | 18 | #include 19 | #include 20 | #include 21 | 22 | #ifndef AT_CHECK 23 | #define AT_CHECK TORCH_CHECK 24 | #endif 25 | 26 | void DeformablePSROIPoolForward( 27 | const at::Tensor data, const at::Tensor bbox, const at::Tensor trans, 28 | at::Tensor out, at::Tensor top_count, const int batch, const int channels, 29 | const int height, const int width, const int num_bbox, 30 | const int channels_trans, const int no_trans, const float spatial_scale, 31 | const int output_dim, const int group_size, const int pooled_size, 32 | const int part_size, const int sample_per_part, const float trans_std); 33 | 34 | void DeformablePSROIPoolBackwardAcc( 35 | const at::Tensor out_grad, const at::Tensor data, const at::Tensor bbox, 36 | const at::Tensor trans, const at::Tensor top_count, at::Tensor in_grad, 37 | at::Tensor trans_grad, const int batch, const int channels, 38 | const int height, const int width, const int num_bbox, 39 | const int channels_trans, const int no_trans, const float spatial_scale, 40 | const int output_dim, const int group_size, const int pooled_size, 41 | const int part_size, const int sample_per_part, const float trans_std); 42 | 43 | void deform_psroi_pooling_cuda_forward( 44 | at::Tensor input, at::Tensor bbox, at::Tensor trans, at::Tensor out, 45 | at::Tensor top_count, const int no_trans, const float spatial_scale, 46 | const int output_dim, const int group_size, const int pooled_size, 47 | const int part_size, const int sample_per_part, const float trans_std) 48 | { 49 | AT_CHECK(input.is_contiguous(), "input tensor has to be contiguous"); 50 | 51 | const int batch = input.size(0); 52 | const int channels = input.size(1); 53 | const int height = input.size(2); 54 | const int width = input.size(3); 55 | const int channels_trans = no_trans ? 2 : trans.size(1); 56 | 57 | const int num_bbox = bbox.size(0); 58 | if (num_bbox != out.size(0)) 59 | AT_ERROR("Output shape and bbox number wont match: (%d vs %d).", 60 | out.size(0), num_bbox); 61 | 62 | DeformablePSROIPoolForward( 63 | input, bbox, trans, out, top_count, batch, channels, height, width, 64 | num_bbox, channels_trans, no_trans, spatial_scale, output_dim, group_size, 65 | pooled_size, part_size, sample_per_part, trans_std); 66 | } 67 | 68 | void deform_psroi_pooling_cuda_backward( 69 | at::Tensor out_grad, at::Tensor input, at::Tensor bbox, at::Tensor trans, 70 | at::Tensor top_count, at::Tensor input_grad, at::Tensor trans_grad, 71 | const int no_trans, const float spatial_scale, const int output_dim, 72 | const int group_size, const int pooled_size, const int part_size, 73 | const int sample_per_part, const float trans_std) 74 | { 75 | AT_CHECK(out_grad.is_contiguous(), "out_grad tensor has to be contiguous"); 76 | AT_CHECK(input.is_contiguous(), "input tensor has to be contiguous"); 77 | 78 | const int batch = input.size(0); 79 | const int channels = input.size(1); 80 | const int height = input.size(2); 81 | const int width = input.size(3); 82 | const int channels_trans = no_trans ? 2 : trans.size(1); 83 | 84 | const int num_bbox = bbox.size(0); 85 | if (num_bbox != out_grad.size(0)) 86 | AT_ERROR("Output shape and bbox number wont match: (%d vs %d).", 87 | out_grad.size(0), num_bbox); 88 | 89 | DeformablePSROIPoolBackwardAcc( 90 | out_grad, input, bbox, trans, top_count, input_grad, trans_grad, batch, 91 | channels, height, width, num_bbox, channels_trans, no_trans, 92 | spatial_scale, output_dim, group_size, pooled_size, part_size, 93 | sample_per_part, trans_std); 94 | } 95 | -------------------------------------------------------------------------------- /wetectron/modeling/make_layers.py: -------------------------------------------------------------------------------- 1 | #------------------------------------------------------------------------------ 2 | # Code taken from https://github.com/NVlabs/wetectron 3 | #------------------------------------------------------------------------------ 4 | 5 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 6 | """ 7 | Miscellaneous utility functions 8 | """ 9 | 10 | import torch 11 | from torch import nn 12 | from torch.nn import functional as F 13 | from wetectron.config import cfg 14 | from wetectron.layers import Conv2d 15 | from wetectron.modeling.poolers import Pooler 16 | 17 | 18 | def get_group_gn(dim, dim_per_gp, num_groups): 19 | """get number of groups used by GroupNorm, based on number of channels.""" 20 | assert dim_per_gp == -1 or num_groups == -1, \ 21 | "GroupNorm: can only specify G or C/G." 22 | 23 | if dim_per_gp > 0: 24 | assert dim % dim_per_gp == 0, \ 25 | "dim: {}, dim_per_gp: {}".format(dim, dim_per_gp) 26 | group_gn = dim // dim_per_gp 27 | else: 28 | assert dim % num_groups == 0, \ 29 | "dim: {}, num_groups: {}".format(dim, num_groups) 30 | group_gn = num_groups 31 | 32 | return group_gn 33 | 34 | 35 | def group_norm(out_channels, affine=True, divisor=1): 36 | out_channels = out_channels // divisor 37 | dim_per_gp = cfg.MODEL.GROUP_NORM.DIM_PER_GP // divisor 38 | num_groups = cfg.MODEL.GROUP_NORM.NUM_GROUPS // divisor 39 | eps = cfg.MODEL.GROUP_NORM.EPSILON # default: 1e-5 40 | return torch.nn.GroupNorm( 41 | get_group_gn(out_channels, dim_per_gp, num_groups), 42 | out_channels, 43 | eps, 44 | affine 45 | ) 46 | 47 | 48 | def make_conv3x3( 49 | in_channels, 50 | out_channels, 51 | dilation=1, 52 | stride=1, 53 | use_gn=False, 54 | use_relu=False, 55 | kaiming_init=True 56 | ): 57 | conv = Conv2d( 58 | in_channels, 59 | out_channels, 60 | kernel_size=3, 61 | stride=stride, 62 | padding=dilation, 63 | dilation=dilation, 64 | bias=False if use_gn else True 65 | ) 66 | if kaiming_init: 67 | nn.init.kaiming_normal_( 68 | conv.weight, mode="fan_out", nonlinearity="relu" 69 | ) 70 | else: 71 | torch.nn.init.normal_(conv.weight, std=0.01) 72 | if not use_gn: 73 | nn.init.constant_(conv.bias, 0) 74 | module = [conv,] 75 | if use_gn: 76 | module.append(group_norm(out_channels)) 77 | if use_relu: 78 | module.append(nn.ReLU(inplace=True)) 79 | if len(module) > 1: 80 | return nn.Sequential(*module) 81 | return conv 82 | 83 | 84 | def make_fc(dim_in, hidden_dim, use_gn=False): 85 | ''' 86 | Caffe2 implementation uses XavierFill, which in fact 87 | corresponds to kaiming_uniform_ in PyTorch 88 | ''' 89 | if use_gn: 90 | fc = nn.Linear(dim_in, hidden_dim, bias=False) 91 | nn.init.kaiming_uniform_(fc.weight, a=1) 92 | return nn.Sequential(fc, group_norm(hidden_dim)) 93 | fc = nn.Linear(dim_in, hidden_dim) 94 | nn.init.kaiming_uniform_(fc.weight, a=1) 95 | nn.init.constant_(fc.bias, 0) 96 | return fc 97 | 98 | 99 | def conv_with_kaiming_uniform(use_gn=False, use_relu=False): 100 | def make_conv( 101 | in_channels, out_channels, kernel_size, stride=1, dilation=1 102 | ): 103 | conv = Conv2d( 104 | in_channels, 105 | out_channels, 106 | kernel_size=kernel_size, 107 | stride=stride, 108 | padding=dilation * (kernel_size - 1) // 2, 109 | dilation=dilation, 110 | bias=False if use_gn else True 111 | ) 112 | # Caffe2 implementation uses XavierFill, which in fact 113 | # corresponds to kaiming_uniform_ in PyTorch 114 | nn.init.kaiming_uniform_(conv.weight, a=1) 115 | if not use_gn: 116 | nn.init.constant_(conv.bias, 0) 117 | module = [conv,] 118 | if use_gn: 119 | module.append(group_norm(out_channels)) 120 | if use_relu: 121 | module.append(nn.ReLU(inplace=True)) 122 | if len(module) > 1: 123 | return nn.Sequential(*module) 124 | return conv 125 | 126 | return make_conv 127 | -------------------------------------------------------------------------------- /wetectron/utils/metric_logger.py: -------------------------------------------------------------------------------- 1 | #------------------------------------------------------------------------------ 2 | # Code adapted from https://github.com/NVlabs/wetectron 3 | # by Huy V. Vo and Oriane Simeoni 4 | # INRIA, Valeo.ai 5 | #------------------------------------------------------------------------------ 6 | 7 | # -------------------------------------------------------- 8 | # Copyright (C) 2020 NVIDIA Corporation. All rights reserved. 9 | # Nvidia Source Code License-NC 10 | # -------------------------------------------------------- 11 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 12 | from collections import defaultdict 13 | from collections import deque 14 | import time 15 | from datetime import datetime 16 | import torch 17 | 18 | from .comm import is_main_process 19 | 20 | 21 | class SmoothedValue(object): 22 | """Track a series of values and provide access to smoothed values over a 23 | window or the global series average. 24 | """ 25 | 26 | def __init__(self, window_size=20): 27 | self.deque = deque(maxlen=window_size) 28 | self.series = [] 29 | self.total = 0.0 30 | self.count = 0 31 | 32 | def update(self, value): 33 | self.deque.append(value) 34 | self.series.append(value) 35 | self.count += 1 36 | self.total += value 37 | 38 | @property 39 | def median(self): 40 | d = torch.tensor(list(self.deque)) 41 | return d.median().item() 42 | 43 | @property 44 | def avg(self): 45 | d = torch.tensor(list(self.deque)) 46 | return d.mean().item() 47 | 48 | @property 49 | def global_avg(self): 50 | return self.total / self.count 51 | 52 | 53 | class MetricLogger(object): 54 | def __init__(self, delimiter="\t"): 55 | self.meters = defaultdict(SmoothedValue) 56 | self.delimiter = delimiter 57 | 58 | def update(self, **kwargs): 59 | for k, v in kwargs.items(): 60 | if isinstance(v, torch.Tensor): 61 | v = v.item() 62 | assert isinstance(v, (float, int)) 63 | self.meters[k].update(v) 64 | 65 | def __getattr__(self, attr): 66 | if attr in self.meters: 67 | return self.meters[attr] 68 | if attr in self.__dict__: 69 | return self.__dict__[attr] 70 | raise AttributeError("'{}' object has no attribute '{}'".format( 71 | type(self).__name__, attr)) 72 | 73 | def __str__(self): 74 | loss_str = [] 75 | for name, meter in self.meters.items(): 76 | loss_str.append( 77 | "{}: {:.4f} ({:.4f})".format(name, meter.median, meter.global_avg) 78 | ) 79 | return self.delimiter.join(loss_str) 80 | 81 | 82 | class TensorboardLogger(MetricLogger): 83 | def __init__(self, 84 | log_dir, 85 | start_iter=0, 86 | delimiter='\t'): 87 | 88 | super(TensorboardLogger, self).__init__(delimiter) 89 | self.iteration = start_iter 90 | self.writer = self._get_tensorboard_writer(log_dir) 91 | 92 | @staticmethod 93 | def _get_tensorboard_writer(log_dir): 94 | try: 95 | from tensorboardX import SummaryWriter 96 | except ImportError: 97 | raise ImportError( 98 | 'To use tensorboard please install tensorboardX ' 99 | '[ pip install tensorflow tensorboardX ].' 100 | ) 101 | 102 | if is_main_process(): 103 | timestamp = datetime.fromtimestamp(time.time()).strftime('%Y%m%d-%H:%M') 104 | tb_logger = SummaryWriter('{}{}'.format(log_dir, timestamp)) 105 | return tb_logger 106 | else: 107 | return None 108 | 109 | def update(self, ** kwargs): 110 | super(TensorboardLogger, self).update(**kwargs) 111 | if self.writer: 112 | for k, v in kwargs.items(): 113 | if isinstance(v, torch.Tensor): 114 | v = v.item() 115 | assert isinstance(v, (float, int)) 116 | self.writer.add_scalar(k, v, self.iteration) 117 | 118 | def increase_counter(self): 119 | self.iteration += 1 120 | 121 | def decrease_counter(self): 122 | self.iteration -= 1 -------------------------------------------------------------------------------- /wetectron/utils/comm.py: -------------------------------------------------------------------------------- 1 | #------------------------------------------------------------------------------ 2 | # Code taken from https://github.com/NVlabs/wetectron 3 | #------------------------------------------------------------------------------ 4 | 5 | # -------------------------------------------------------- 6 | # Copyright (C) 2020 NVIDIA Corporation. All rights reserved. 7 | # Nvidia Source Code License-NC 8 | # -------------------------------------------------------- 9 | """ 10 | This file contains primitives for multi-gpu communication. 11 | This is useful when doing distributed training. 12 | """ 13 | 14 | import pickle 15 | import time 16 | 17 | import torch 18 | import torch.distributed as dist 19 | 20 | 21 | def get_world_size(): 22 | if not dist.is_available(): 23 | return 1 24 | if not dist.is_initialized(): 25 | return 1 26 | return dist.get_world_size() 27 | 28 | 29 | def get_rank(): 30 | if not dist.is_available(): 31 | return 0 32 | if not dist.is_initialized(): 33 | return 0 34 | return dist.get_rank() 35 | 36 | 37 | def is_main_process(): 38 | return get_rank() == 0 39 | 40 | 41 | def synchronize(): 42 | """ 43 | Helper function to synchronize (barrier) among all processes when 44 | using distributed training 45 | """ 46 | if not dist.is_available(): 47 | return 48 | if not dist.is_initialized(): 49 | return 50 | world_size = dist.get_world_size() 51 | if world_size == 1: 52 | return 53 | dist.barrier() 54 | 55 | 56 | def all_gather(data): 57 | """ 58 | Run all_gather on arbitrary picklable data (not necessarily tensors) 59 | Args: 60 | data: any picklable object 61 | Returns: 62 | list[data]: list of data gathered from each rank 63 | """ 64 | world_size = get_world_size() 65 | if world_size == 1: 66 | return [data] 67 | 68 | # serialized to a Tensor 69 | buffer = pickle.dumps(data) 70 | storage = torch.ByteStorage.from_buffer(buffer) 71 | tensor = torch.ByteTensor(storage).to("cuda") 72 | 73 | # obtain Tensor size of each rank 74 | local_size = torch.LongTensor([tensor.numel()]).to("cuda") 75 | size_list = [torch.LongTensor([0]).to("cuda") for _ in range(world_size)] 76 | dist.all_gather(size_list, local_size) 77 | size_list = [int(size.item()) for size in size_list] 78 | max_size = max(size_list) 79 | 80 | # receiving Tensor from all ranks 81 | # we pad the tensor because torch all_gather does not support 82 | # gathering tensors of different shapes 83 | tensor_list = [] 84 | for _ in size_list: 85 | tensor_list.append(torch.ByteTensor(size=(max_size,)).to("cuda")) 86 | if local_size != max_size: 87 | padding = torch.ByteTensor(size=(max_size - local_size,)).to("cuda") 88 | tensor = torch.cat((tensor, padding), dim=0) 89 | dist.all_gather(tensor_list, tensor) 90 | 91 | data_list = [] 92 | for size, tensor in zip(size_list, tensor_list): 93 | buffer = tensor.cpu().numpy().tobytes()[:size] 94 | data_list.append(pickle.loads(buffer)) 95 | 96 | return data_list 97 | 98 | 99 | def reduce_dict(input_dict, average=True): 100 | """ 101 | Args: 102 | input_dict (dict): all the values will be reduced 103 | average (bool): whether to do average or sum 104 | Reduce the values in the dictionary from all processes so that process with rank 105 | 0 has the averaged results. Returns a dict with the same fields as 106 | input_dict, after reduction. 107 | """ 108 | world_size = get_world_size() 109 | if world_size < 2: 110 | return input_dict 111 | with torch.no_grad(): 112 | names = [] 113 | values = [] 114 | # sort the keys so that they are consistent across processes 115 | for k in sorted(input_dict.keys()): 116 | names.append(k) 117 | values.append(input_dict[k]) 118 | values = torch.stack(values, dim=0) 119 | dist.reduce(values, dst=0) 120 | if dist.get_rank() == 0 and average: 121 | # only main process gets accumulated, so only divide by 122 | # world_size in this case 123 | values /= world_size 124 | reduced_dict = {k: v for k, v in zip(names, values)} 125 | return reduced_dict 126 | -------------------------------------------------------------------------------- /wetectron/modeling/backbone/fpn.py: -------------------------------------------------------------------------------- 1 | #------------------------------------------------------------------------------ 2 | # Code taken from https://github.com/NVlabs/wetectron 3 | #------------------------------------------------------------------------------ 4 | 5 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 6 | import torch 7 | import torch.nn.functional as F 8 | from torch import nn 9 | 10 | 11 | class FPN(nn.Module): 12 | """ 13 | Module that adds FPN on top of a list of feature maps. 14 | The feature maps are currently supposed to be in increasing depth 15 | order, and must be consecutive 16 | """ 17 | 18 | def __init__( 19 | self, in_channels_list, out_channels, conv_block, top_blocks=None 20 | ): 21 | """ 22 | Arguments: 23 | in_channels_list (list[int]): number of channels for each feature map that 24 | will be fed 25 | out_channels (int): number of channels of the FPN representation 26 | top_blocks (nn.Module or None): if provided, an extra operation will 27 | be performed on the output of the last (smallest resolution) 28 | FPN output, and the result will extend the result list 29 | """ 30 | super(FPN, self).__init__() 31 | self.inner_blocks = [] 32 | self.layer_blocks = [] 33 | for idx, in_channels in enumerate(in_channels_list, 1): 34 | inner_block = "fpn_inner{}".format(idx) 35 | layer_block = "fpn_layer{}".format(idx) 36 | 37 | if in_channels == 0: 38 | continue 39 | inner_block_module = conv_block(in_channels, out_channels, 1) 40 | layer_block_module = conv_block(out_channels, out_channels, 3, 1) 41 | self.add_module(inner_block, inner_block_module) 42 | self.add_module(layer_block, layer_block_module) 43 | self.inner_blocks.append(inner_block) 44 | self.layer_blocks.append(layer_block) 45 | self.top_blocks = top_blocks 46 | 47 | def forward(self, x): 48 | """ 49 | Arguments: 50 | x (list[Tensor]): feature maps for each feature level. 51 | Returns: 52 | results (tuple[Tensor]): feature maps after FPN layers. 53 | They are ordered from highest resolution first. 54 | """ 55 | last_inner = getattr(self, self.inner_blocks[-1])(x[-1]) 56 | results = [] 57 | results.append(getattr(self, self.layer_blocks[-1])(last_inner)) 58 | for feature, inner_block, layer_block in zip( 59 | x[:-1][::-1], self.inner_blocks[:-1][::-1], self.layer_blocks[:-1][::-1] 60 | ): 61 | if not inner_block: 62 | continue 63 | inner_top_down = F.interpolate(last_inner, scale_factor=2, mode="nearest") 64 | inner_lateral = getattr(self, inner_block)(feature) 65 | # TODO use size instead of scale to make it robust to different sizes 66 | # inner_top_down = F.upsample(last_inner, size=inner_lateral.shape[-2:], 67 | # mode='bilinear', align_corners=False) 68 | last_inner = inner_lateral + inner_top_down 69 | results.insert(0, getattr(self, layer_block)(last_inner)) 70 | 71 | if isinstance(self.top_blocks, LastLevelP6P7): 72 | last_results = self.top_blocks(x[-1], results[-1]) 73 | results.extend(last_results) 74 | elif isinstance(self.top_blocks, LastLevelMaxPool): 75 | last_results = self.top_blocks(results[-1]) 76 | results.extend(last_results) 77 | 78 | return tuple(results) 79 | 80 | 81 | class LastLevelMaxPool(nn.Module): 82 | def forward(self, x): 83 | return [F.max_pool2d(x, 1, 2, 0)] 84 | 85 | 86 | class LastLevelP6P7(nn.Module): 87 | """ 88 | This module is used in RetinaNet to generate extra layers, P6 and P7. 89 | """ 90 | def __init__(self, in_channels, out_channels): 91 | super(LastLevelP6P7, self).__init__() 92 | self.p6 = nn.Conv2d(in_channels, out_channels, 3, 2, 1) 93 | self.p7 = nn.Conv2d(out_channels, out_channels, 3, 2, 1) 94 | for module in [self.p6, self.p7]: 95 | nn.init.kaiming_uniform_(module.weight, a=1) 96 | nn.init.constant_(module.bias, 0) 97 | self.use_P5 = in_channels == out_channels 98 | 99 | def forward(self, c5, p5): 100 | x = p5 if self.use_P5 else c5 101 | p6 = self.p6(x) 102 | p7 = self.p7(F.relu(p6)) 103 | return [p6, p7] 104 | -------------------------------------------------------------------------------- /wetectron/modeling/roi_heads/keypoint_head/inference.py: -------------------------------------------------------------------------------- 1 | #------------------------------------------------------------------------------ 2 | # Code taken from https://github.com/NVlabs/wetectron 3 | #------------------------------------------------------------------------------ 4 | 5 | import torch 6 | from torch import nn 7 | 8 | 9 | class KeypointPostProcessor(nn.Module): 10 | def __init__(self, keypointer=None): 11 | super(KeypointPostProcessor, self).__init__() 12 | self.keypointer = keypointer 13 | 14 | def forward(self, x, boxes): 15 | mask_prob = x 16 | 17 | scores = None 18 | if self.keypointer: 19 | mask_prob, scores = self.keypointer(x, boxes) 20 | 21 | assert len(boxes) == 1, "Only non-batched inference supported for now" 22 | boxes_per_image = [box.bbox.size(0) for box in boxes] 23 | mask_prob = mask_prob.split(boxes_per_image, dim=0) 24 | scores = scores.split(boxes_per_image, dim=0) 25 | 26 | results = [] 27 | for prob, box, score in zip(mask_prob, boxes, scores): 28 | bbox = BoxList(box.bbox, box.size, mode="xyxy") 29 | for field in box.fields(): 30 | bbox.add_field(field, box.get_field(field)) 31 | prob = PersonKeypoints(prob, box.size) 32 | prob.add_field("logits", score) 33 | bbox.add_field("keypoints", prob) 34 | results.append(bbox) 35 | 36 | return results 37 | 38 | 39 | # TODO remove and use only the Keypointer 40 | import numpy as np 41 | import cv2 42 | 43 | 44 | def heatmaps_to_keypoints(maps, rois): 45 | """Extract predicted keypoint locations from heatmaps. Output has shape 46 | (#rois, 4, #keypoints) with the 4 rows corresponding to (x, y, logit, prob) 47 | for each keypoint. 48 | """ 49 | # This function converts a discrete image coordinate in a HEATMAP_SIZE x 50 | # HEATMAP_SIZE image to a continuous keypoint coordinate. We maintain 51 | # consistency with keypoints_to_heatmap_labels by using the conversion from 52 | # Heckbert 1990: c = d + 0.5, where d is a discrete coordinate and c is a 53 | # continuous coordinate. 54 | offset_x = rois[:, 0] 55 | offset_y = rois[:, 1] 56 | 57 | widths = rois[:, 2] - rois[:, 0] 58 | heights = rois[:, 3] - rois[:, 1] 59 | widths = np.maximum(widths, 1) 60 | heights = np.maximum(heights, 1) 61 | widths_ceil = np.ceil(widths) 62 | heights_ceil = np.ceil(heights) 63 | 64 | # NCHW to NHWC for use with OpenCV 65 | maps = np.transpose(maps, [0, 2, 3, 1]) 66 | min_size = 0 # cfg.KRCNN.INFERENCE_MIN_SIZE 67 | num_keypoints = maps.shape[3] 68 | xy_preds = np.zeros((len(rois), 3, num_keypoints), dtype=np.float32) 69 | end_scores = np.zeros((len(rois), num_keypoints), dtype=np.float32) 70 | for i in range(len(rois)): 71 | if min_size > 0: 72 | roi_map_width = int(np.maximum(widths_ceil[i], min_size)) 73 | roi_map_height = int(np.maximum(heights_ceil[i], min_size)) 74 | else: 75 | roi_map_width = widths_ceil[i] 76 | roi_map_height = heights_ceil[i] 77 | width_correction = widths[i] / roi_map_width 78 | height_correction = heights[i] / roi_map_height 79 | roi_map = cv2.resize( 80 | maps[i], (roi_map_width, roi_map_height), interpolation=cv2.INTER_CUBIC 81 | ) 82 | # Bring back to CHW 83 | roi_map = np.transpose(roi_map, [2, 0, 1]) 84 | # roi_map_probs = scores_to_probs(roi_map.copy()) 85 | w = roi_map.shape[2] 86 | pos = roi_map.reshape(num_keypoints, -1).argmax(axis=1) 87 | x_int = pos % w 88 | y_int = (pos - x_int) // w 89 | # assert (roi_map_probs[k, y_int, x_int] == 90 | # roi_map_probs[k, :, :].max()) 91 | x = (x_int + 0.5) * width_correction 92 | y = (y_int + 0.5) * height_correction 93 | xy_preds[i, 0, :] = x + offset_x[i] 94 | xy_preds[i, 1, :] = y + offset_y[i] 95 | xy_preds[i, 2, :] = 1 96 | end_scores[i, :] = roi_map[np.arange(num_keypoints), y_int, x_int] 97 | 98 | return np.transpose(xy_preds, [0, 2, 1]), end_scores 99 | 100 | 101 | from wetectron.structures.bounding_box import BoxList 102 | from wetectron.structures.keypoint import PersonKeypoints 103 | 104 | 105 | class Keypointer(object): 106 | """ 107 | Projects a set of masks in an image on the locations 108 | specified by the bounding boxes 109 | """ 110 | 111 | def __init__(self, padding=0): 112 | self.padding = padding 113 | 114 | def __call__(self, masks, boxes): 115 | # TODO do this properly 116 | if isinstance(boxes, BoxList): 117 | boxes = [boxes] 118 | assert len(boxes) == 1 119 | 120 | result, scores = heatmaps_to_keypoints( 121 | masks.detach().cpu().numpy(), boxes[0].bbox.cpu().numpy() 122 | ) 123 | return torch.from_numpy(result).to(masks.device), torch.as_tensor(scores, device=masks.device) 124 | 125 | 126 | def make_roi_keypoint_post_processor(cfg): 127 | keypointer = Keypointer() 128 | keypoint_post_processor = KeypointPostProcessor(keypointer) 129 | return keypoint_post_processor 130 | -------------------------------------------------------------------------------- /wetectron/csrc/deform_conv.h: -------------------------------------------------------------------------------- 1 | //------------------------------------------------------------------------------ 2 | // Code taken from https://github.com/NVlabs/wetectron 3 | //------------------------------------------------------------------------------ 4 | 5 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 6 | #pragma once 7 | #include "cpu/vision.h" 8 | 9 | #ifdef WITH_CUDA 10 | #include "cuda/vision.h" 11 | #endif 12 | 13 | 14 | // Interface for Python 15 | int deform_conv_forward( 16 | at::Tensor input, 17 | at::Tensor weight, 18 | at::Tensor offset, 19 | at::Tensor output, 20 | at::Tensor columns, 21 | at::Tensor ones, 22 | int kW, 23 | int kH, 24 | int dW, 25 | int dH, 26 | int padW, 27 | int padH, 28 | int dilationW, 29 | int dilationH, 30 | int group, 31 | int deformable_group, 32 | int im2col_step) 33 | { 34 | if (input.is_cuda()) { 35 | #ifdef WITH_CUDA 36 | return deform_conv_forward_cuda( 37 | input, weight, offset, output, columns, ones, 38 | kW, kH, dW, dH, padW, padH, dilationW, dilationH, 39 | group, deformable_group, im2col_step 40 | ); 41 | #else 42 | AT_ERROR("Not compiled with GPU support"); 43 | #endif 44 | } 45 | AT_ERROR("Not implemented on the CPU"); 46 | } 47 | 48 | 49 | int deform_conv_backward_input( 50 | at::Tensor input, 51 | at::Tensor offset, 52 | at::Tensor gradOutput, 53 | at::Tensor gradInput, 54 | at::Tensor gradOffset, 55 | at::Tensor weight, 56 | at::Tensor columns, 57 | int kW, 58 | int kH, 59 | int dW, 60 | int dH, 61 | int padW, 62 | int padH, 63 | int dilationW, 64 | int dilationH, 65 | int group, 66 | int deformable_group, 67 | int im2col_step) 68 | { 69 | if (input.is_cuda()) { 70 | #ifdef WITH_CUDA 71 | return deform_conv_backward_input_cuda( 72 | input, offset, gradOutput, gradInput, gradOffset, weight, columns, 73 | kW, kH, dW, dH, padW, padH, dilationW, dilationH, 74 | group, deformable_group, im2col_step 75 | ); 76 | #else 77 | AT_ERROR("Not compiled with GPU support"); 78 | #endif 79 | } 80 | AT_ERROR("Not implemented on the CPU"); 81 | } 82 | 83 | 84 | int deform_conv_backward_parameters( 85 | at::Tensor input, 86 | at::Tensor offset, 87 | at::Tensor gradOutput, 88 | at::Tensor gradWeight, // at::Tensor gradBias, 89 | at::Tensor columns, 90 | at::Tensor ones, 91 | int kW, 92 | int kH, 93 | int dW, 94 | int dH, 95 | int padW, 96 | int padH, 97 | int dilationW, 98 | int dilationH, 99 | int group, 100 | int deformable_group, 101 | float scale, 102 | int im2col_step) 103 | { 104 | if (input.is_cuda()) { 105 | #ifdef WITH_CUDA 106 | return deform_conv_backward_parameters_cuda( 107 | input, offset, gradOutput, gradWeight, columns, ones, 108 | kW, kH, dW, dH, padW, padH, dilationW, dilationH, 109 | group, deformable_group, scale, im2col_step 110 | ); 111 | #else 112 | AT_ERROR("Not compiled with GPU support"); 113 | #endif 114 | } 115 | AT_ERROR("Not implemented on the CPU"); 116 | } 117 | 118 | 119 | void modulated_deform_conv_forward( 120 | at::Tensor input, 121 | at::Tensor weight, 122 | at::Tensor bias, 123 | at::Tensor ones, 124 | at::Tensor offset, 125 | at::Tensor mask, 126 | at::Tensor output, 127 | at::Tensor columns, 128 | int kernel_h, 129 | int kernel_w, 130 | const int stride_h, 131 | const int stride_w, 132 | const int pad_h, 133 | const int pad_w, 134 | const int dilation_h, 135 | const int dilation_w, 136 | const int group, 137 | const int deformable_group, 138 | const bool with_bias) 139 | { 140 | if (input.is_cuda()) { 141 | #ifdef WITH_CUDA 142 | return modulated_deform_conv_cuda_forward( 143 | input, weight, bias, ones, offset, mask, output, columns, 144 | kernel_h, kernel_w, stride_h, stride_w, 145 | pad_h, pad_w, dilation_h, dilation_w, 146 | group, deformable_group, with_bias 147 | ); 148 | #else 149 | AT_ERROR("Not compiled with GPU support"); 150 | #endif 151 | } 152 | AT_ERROR("Not implemented on the CPU"); 153 | } 154 | 155 | 156 | void modulated_deform_conv_backward( 157 | at::Tensor input, 158 | at::Tensor weight, 159 | at::Tensor bias, 160 | at::Tensor ones, 161 | at::Tensor offset, 162 | at::Tensor mask, 163 | at::Tensor columns, 164 | at::Tensor grad_input, 165 | at::Tensor grad_weight, 166 | at::Tensor grad_bias, 167 | at::Tensor grad_offset, 168 | at::Tensor grad_mask, 169 | at::Tensor grad_output, 170 | int kernel_h, 171 | int kernel_w, 172 | int stride_h, 173 | int stride_w, 174 | int pad_h, 175 | int pad_w, 176 | int dilation_h, 177 | int dilation_w, 178 | int group, 179 | int deformable_group, 180 | const bool with_bias) 181 | { 182 | if (input.is_cuda()) { 183 | #ifdef WITH_CUDA 184 | return modulated_deform_conv_cuda_backward( 185 | input, weight, bias, ones, offset, mask, columns, 186 | grad_input, grad_weight, grad_bias, grad_offset, grad_mask, grad_output, 187 | kernel_h, kernel_w, stride_h, stride_w, pad_h, pad_w, dilation_h, dilation_w, 188 | group, deformable_group, with_bias 189 | ); 190 | #else 191 | AT_ERROR("Not compiled with GPU support"); 192 | #endif 193 | } 194 | AT_ERROR("Not implemented on the CPU"); 195 | } -------------------------------------------------------------------------------- /wetectron/data/samplers/grouped_batch_sampler.py: -------------------------------------------------------------------------------- 1 | #------------------------------------------------------------------------------ 2 | # Code taken from https://github.com/NVlabs/wetectron 3 | #------------------------------------------------------------------------------ 4 | 5 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 6 | import itertools 7 | 8 | import torch 9 | from torch.utils.data.sampler import BatchSampler 10 | from torch.utils.data.sampler import Sampler 11 | 12 | 13 | class GroupedBatchSampler(BatchSampler): 14 | """ 15 | Wraps another sampler to yield a mini-batch of indices. 16 | It enforces that elements from the same group should appear in groups of batch_size. 17 | It also tries to provide mini-batches which follows an ordering which is 18 | as close as possible to the ordering from the original sampler. 19 | 20 | Arguments: 21 | sampler (Sampler): Base sampler. 22 | batch_size (int): Size of mini-batch. 23 | drop_uneven (bool): If ``True``, the sampler will drop the batches whose 24 | size is less than ``batch_size`` 25 | 26 | """ 27 | 28 | def __init__(self, sampler, group_ids, batch_size, drop_uneven=False): 29 | if not isinstance(sampler, Sampler): 30 | raise ValueError( 31 | "sampler should be an instance of " 32 | "torch.utils.data.Sampler, but got sampler={}".format(sampler) 33 | ) 34 | self.sampler = sampler 35 | self.group_ids = torch.as_tensor(group_ids) 36 | assert self.group_ids.dim() == 1 37 | self.batch_size = batch_size 38 | self.drop_uneven = drop_uneven 39 | 40 | self.groups = torch.unique(self.group_ids).sort(0)[0] 41 | 42 | self._can_reuse_batches = False 43 | 44 | def _prepare_batches(self): 45 | dataset_size = len(self.group_ids) 46 | # get the sampled indices from the sampler 47 | sampled_ids = torch.as_tensor(list(self.sampler)) 48 | # potentially not all elements of the dataset were sampled 49 | # by the sampler (e.g., DistributedSampler). 50 | # construct a tensor which contains -1 if the element was 51 | # not sampled, and a non-negative number indicating the 52 | # order where the element was sampled. 53 | # for example. if sampled_ids = [3, 1] and dataset_size = 5, 54 | # the order is [-1, 1, -1, 0, -1] 55 | order = torch.full((dataset_size,), -1, dtype=torch.int64) 56 | order[sampled_ids] = torch.arange(len(sampled_ids)) 57 | 58 | # get a mask with the elements that were sampled 59 | mask = order >= 0 60 | 61 | # find the elements that belong to each individual cluster 62 | clusters = [(self.group_ids == i) & mask for i in self.groups] 63 | # get relative order of the elements inside each cluster 64 | # that follows the order from the sampler 65 | relative_order = [order[cluster] for cluster in clusters] 66 | # with the relative order, find the absolute order in the 67 | # sampled space 68 | permutation_ids = [s[s.sort()[1]] for s in relative_order] 69 | # permute each cluster so that they follow the order from 70 | # the sampler 71 | permuted_clusters = [sampled_ids[idx] for idx in permutation_ids] 72 | 73 | # splits each cluster in batch_size, and merge as a list of tensors 74 | splits = [c.split(self.batch_size) for c in permuted_clusters] 75 | merged = tuple(itertools.chain.from_iterable(splits)) 76 | 77 | # now each batch internally has the right order, but 78 | # they are grouped by clusters. Find the permutation between 79 | # different batches that brings them as close as possible to 80 | # the order that we have in the sampler. For that, we will consider the 81 | # ordering as coming from the first element of each batch, and sort 82 | # correspondingly 83 | first_element_of_batch = [t[0].item() for t in merged] 84 | # get and inverse mapping from sampled indices and the position where 85 | # they occur (as returned by the sampler) 86 | inv_sampled_ids_map = {v: k for k, v in enumerate(sampled_ids.tolist())} 87 | # from the first element in each batch, get a relative ordering 88 | first_index_of_batch = torch.as_tensor( 89 | [inv_sampled_ids_map[s] for s in first_element_of_batch] 90 | ) 91 | 92 | # permute the batches so that they approximately follow the order 93 | # from the sampler 94 | permutation_order = first_index_of_batch.sort(0)[1].tolist() 95 | # finally, permute the batches 96 | batches = [merged[i].tolist() for i in permutation_order] 97 | 98 | if self.drop_uneven: 99 | kept = [] 100 | for batch in batches: 101 | if len(batch) == self.batch_size: 102 | kept.append(batch) 103 | batches = kept 104 | return batches 105 | 106 | def __iter__(self): 107 | if self._can_reuse_batches: 108 | batches = self._batches 109 | self._can_reuse_batches = False 110 | else: 111 | batches = self._prepare_batches() 112 | self._batches = batches 113 | return iter(batches) 114 | 115 | def __len__(self): 116 | if not hasattr(self, "_batches"): 117 | self._batches = self._prepare_batches() 118 | self._can_reuse_batches = True 119 | return len(self._batches) 120 | -------------------------------------------------------------------------------- /wetectron/csrc/cuda/nms.cu: -------------------------------------------------------------------------------- 1 | //------------------------------------------------------------------------------ 2 | // Code taken from https://github.com/NVlabs/wetectron 3 | //------------------------------------------------------------------------------ 4 | 5 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 6 | #include 7 | #include 8 | 9 | #include 10 | #include 11 | 12 | #include 13 | #include 14 | 15 | int const threadsPerBlock = sizeof(unsigned long long) * 8; 16 | 17 | __device__ inline float devIoU(float const * const a, float const * const b) { 18 | float left = max(a[0], b[0]), right = min(a[2], b[2]); 19 | float top = max(a[1], b[1]), bottom = min(a[3], b[3]); 20 | float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f); 21 | float interS = width * height; 22 | float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1); 23 | float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1); 24 | return interS / (Sa + Sb - interS); 25 | } 26 | 27 | __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh, 28 | const float *dev_boxes, unsigned long long *dev_mask) { 29 | const int row_start = blockIdx.y; 30 | const int col_start = blockIdx.x; 31 | 32 | // if (row_start > col_start) return; 33 | 34 | const int row_size = 35 | min(n_boxes - row_start * threadsPerBlock, threadsPerBlock); 36 | const int col_size = 37 | min(n_boxes - col_start * threadsPerBlock, threadsPerBlock); 38 | 39 | __shared__ float block_boxes[threadsPerBlock * 5]; 40 | if (threadIdx.x < col_size) { 41 | block_boxes[threadIdx.x * 5 + 0] = 42 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0]; 43 | block_boxes[threadIdx.x * 5 + 1] = 44 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1]; 45 | block_boxes[threadIdx.x * 5 + 2] = 46 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2]; 47 | block_boxes[threadIdx.x * 5 + 3] = 48 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3]; 49 | block_boxes[threadIdx.x * 5 + 4] = 50 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4]; 51 | } 52 | __syncthreads(); 53 | 54 | if (threadIdx.x < row_size) { 55 | const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x; 56 | const float *cur_box = dev_boxes + cur_box_idx * 5; 57 | int i = 0; 58 | unsigned long long t = 0; 59 | int start = 0; 60 | if (row_start == col_start) { 61 | start = threadIdx.x + 1; 62 | } 63 | for (i = start; i < col_size; i++) { 64 | if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) { 65 | t |= 1ULL << i; 66 | } 67 | } 68 | const int col_blocks = THCCeilDiv(n_boxes, threadsPerBlock); 69 | dev_mask[cur_box_idx * col_blocks + col_start] = t; 70 | } 71 | } 72 | 73 | // boxes is a N x 5 tensor 74 | at::Tensor nms_cuda(const at::Tensor boxes, float nms_overlap_thresh) { 75 | using scalar_t = float; 76 | AT_ASSERTM(boxes.is_cuda(), "boxes must be a CUDA tensor"); 77 | auto scores = boxes.select(1, 4); 78 | auto order_t = std::get<1>(scores.sort(0, /* descending=*/true)); 79 | auto boxes_sorted = boxes.index_select(0, order_t); 80 | 81 | int boxes_num = boxes.size(0); 82 | 83 | const int col_blocks = THCCeilDiv(boxes_num, threadsPerBlock); 84 | 85 | scalar_t* boxes_dev = boxes_sorted.data_ptr(); 86 | 87 | THCState *state = at::globalContext().lazyInitCUDA(); // TODO replace with getTHCState 88 | 89 | unsigned long long* mask_dev = NULL; 90 | //THCudaCheck(THCudaMalloc(state, (void**) &mask_dev, 91 | // boxes_num * col_blocks * sizeof(unsigned long long))); 92 | 93 | mask_dev = (unsigned long long*) THCudaMalloc(state, boxes_num * col_blocks * sizeof(unsigned long long)); 94 | 95 | dim3 blocks(THCCeilDiv(boxes_num, threadsPerBlock), 96 | THCCeilDiv(boxes_num, threadsPerBlock)); 97 | dim3 threads(threadsPerBlock); 98 | nms_kernel<<>>(boxes_num, 99 | nms_overlap_thresh, 100 | boxes_dev, 101 | mask_dev); 102 | 103 | std::vector mask_host(boxes_num * col_blocks); 104 | THCudaCheck(cudaMemcpy(&mask_host[0], 105 | mask_dev, 106 | sizeof(unsigned long long) * boxes_num * col_blocks, 107 | cudaMemcpyDeviceToHost)); 108 | 109 | std::vector remv(col_blocks); 110 | memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks); 111 | 112 | at::Tensor keep = at::empty({boxes_num}, boxes.options().dtype(at::kLong).device(at::kCPU)); 113 | int64_t* keep_out = keep.data_ptr(); 114 | 115 | int num_to_keep = 0; 116 | for (int i = 0; i < boxes_num; i++) { 117 | int nblock = i / threadsPerBlock; 118 | int inblock = i % threadsPerBlock; 119 | 120 | if (!(remv[nblock] & (1ULL << inblock))) { 121 | keep_out[num_to_keep++] = i; 122 | unsigned long long *p = &mask_host[0] + i * col_blocks; 123 | for (int j = nblock; j < col_blocks; j++) { 124 | remv[j] |= p[j]; 125 | } 126 | } 127 | } 128 | 129 | THCudaFree(state, mask_dev); 130 | // TODO improve this part 131 | return std::get<0>(order_t.index({ 132 | keep.narrow(/*dim=*/0, /*start=*/0, /*length=*/num_to_keep).to( 133 | order_t.device(), keep.scalar_type()) 134 | }).sort(0, false)); 135 | } 136 | -------------------------------------------------------------------------------- /wetectron/data/transforms/transforms.py: -------------------------------------------------------------------------------- 1 | #------------------------------------------------------------------------------ 2 | # Code taken from https://github.com/NVlabs/wetectron 3 | #------------------------------------------------------------------------------ 4 | 5 | # -------------------------------------------------------- 6 | # Copyright (C) 2020 NVIDIA Corporation. All rights reserved. 7 | # Nvidia Source Code License-NC 8 | # -------------------------------------------------------- 9 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 10 | import random 11 | import numpy as np 12 | import cv2 13 | from PIL import ImageFilter 14 | 15 | import torch 16 | import torchvision 17 | from torchvision.transforms import functional as F 18 | 19 | 20 | class Compose(object): 21 | def __init__(self, transforms): 22 | self.transforms = transforms 23 | 24 | def __call__(self, image, target=None, rois=None): 25 | for t in self.transforms: 26 | image, target, rois = t(image, target, rois) 27 | return image, target, rois 28 | 29 | def __repr__(self): 30 | format_string = self.__class__.__name__ + "(" 31 | for t in self.transforms: 32 | format_string += "\n" 33 | format_string += " {0}".format(t) 34 | format_string += "\n)" 35 | return format_string 36 | 37 | class Resize(object): 38 | def __init__(self, min_size, max_size): 39 | if not isinstance(min_size, (list, tuple)): 40 | min_size = (min_size,) 41 | self.min_size = min_size 42 | self.max_size = max_size 43 | 44 | # modified from torchvision to add support for max size 45 | def get_size(self, image_size): 46 | w, h = image_size 47 | size = random.choice(self.min_size) 48 | max_size = self.max_size 49 | if max_size is not None: 50 | min_original_size = float(min((w, h))) 51 | max_original_size = float(max((w, h))) 52 | if max_original_size / min_original_size * size > max_size: 53 | size = int(round(max_size * min_original_size / max_original_size)) 54 | 55 | if (w <= h and w == size) or (h <= w and h == size): 56 | return (h, w) 57 | 58 | if w < h: 59 | ow = size 60 | oh = int(size * h / w) 61 | else: 62 | oh = size 63 | ow = int(size * w / h) 64 | 65 | return (oh, ow) 66 | 67 | def __call__(self, image, target=None, rois=None): 68 | size = self.get_size(image.size) 69 | image = F.resize(image, size) 70 | if target is not None: 71 | target = target.resize(image.size) 72 | if rois is not None: 73 | rois = rois.resize(image.size) 74 | 75 | return image, target, rois 76 | 77 | class RandomHorizontalFlip(object): 78 | def __init__(self, prob=0.5): 79 | self.prob = prob 80 | 81 | def __call__(self, image, target=None, rois=None): 82 | if random.random() < self.prob: 83 | image = F.hflip(image) 84 | if target is not None: 85 | target = target.transpose(0) 86 | if rois is not None: 87 | rois = rois.transpose(0) 88 | 89 | return image, target, rois 90 | 91 | class RandomVerticalFlip(object): 92 | def __init__(self, prob=0.5): 93 | self.prob = prob 94 | 95 | def __call__(self, image, target=None, rois=None): 96 | if random.random() < self.prob: 97 | image = F.vflip(image) 98 | if target is not None: 99 | target = target.transpose(1) 100 | if rois is not None: 101 | rois = rois.transpose(1) 102 | 103 | return image, target, rois 104 | 105 | class ColorJitter(object): 106 | def __init__(self, 107 | brightness=None, 108 | contrast=None, 109 | saturation=None, 110 | hue=None, 111 | ): 112 | self.color_jitter = torchvision.transforms.ColorJitter( 113 | brightness=brightness, 114 | contrast=contrast, 115 | saturation=saturation, 116 | hue=hue,) 117 | 118 | def __call__(self, image, target=None, rois=None): 119 | image = self.color_jitter(image) 120 | return image, target, rois 121 | 122 | class ToTensor(object): 123 | def __call__(self, image, target=None, rois=None): 124 | return F.to_tensor(image), target, rois 125 | 126 | class Normalize(object): 127 | def __init__(self, mean, std, to_bgr255=True): 128 | self.mean = mean 129 | self.std = std 130 | self.to_bgr255 = to_bgr255 131 | 132 | def __call__(self, image, target=None, rois=None): 133 | if self.to_bgr255: 134 | image = image[[2, 1, 0]] * 255 135 | image = F.normalize(image, mean=self.mean, std=self.std) 136 | return image, target, rois 137 | 138 | class Lighting(object): 139 | """Lighting noise(AlexNet - style PCA - based noise)""" 140 | def __init__(self, alphastd, eigval, eigvec): 141 | self.alphastd = alphastd 142 | self.eigval = eigval 143 | self.eigvec = eigvec 144 | 145 | def __call__(self, img, target=None, rois=None): 146 | if self.alphastd == 0: 147 | return img 148 | 149 | alpha = img.new().resize_(3).normal_(0, self.alphastd) 150 | rgb = self.eigvec.type_as(img).clone()\ 151 | .mul(alpha.view(1, 3).expand(3, 3))\ 152 | .mul(self.eigval.view(1, 3).expand(3, 3))\ 153 | .sum(1).squeeze() 154 | 155 | return img.add(rgb.view(3, 1, 1).expand_as(img)), target, rois --------------------------------------------------------------------------------