├── images
    ├── BiB-vis.PNG
    ├── BiB-pipeline.png
    └── BiB-results.PNG
├── wetectron
    ├── utils
    │   ├── README.md
    │   ├── __init__.py
    │   ├── collect_env.py
    │   ├── cv2_util.py
    │   ├── imports.py
    │   ├── timer.py
    │   ├── env.py
    │   ├── registry.py
    │   ├── logger.py
    │   ├── miscellaneous.py
    │   ├── model_zoo.py
    │   ├── model_serialization.py
    │   ├── metric_logger.py
    │   └── comm.py
    ├── modeling
    │   ├── __init__.py
    │   ├── roi_heads
    │   │   ├── __init__.py
    │   │   ├── box_head
    │   │   │   ├── __init__.py
    │   │   │   ├── roi_box_predictors.py
    │   │   │   └── box_head.py
    │   │   ├── mask_head
    │   │   │   ├── __init__.py
    │   │   │   ├── roi_mask_predictors.py
    │   │   │   ├── roi_mask_feature_extractors.py
    │   │   │   └── mask_head.py
    │   │   ├── keypoint_head
    │   │   │   ├── __init__.py
    │   │   │   ├── roi_keypoint_predictors.py
    │   │   │   ├── roi_keypoint_feature_extractors.py
    │   │   │   ├── keypoint_head.py
    │   │   │   └── inference.py
    │   │   ├── weak_head
    │   │   │   └── __init__.py
    │   │   └── roi_heads.py
    │   ├── rpn
    │   │   ├── retinanet
    │   │   │   ├── __init__.py
    │   │   │   └── loss.py
    │   │   ├── __init__.py
    │   │   └── utils.py
    │   ├── detector
    │   │   ├── __init__.py
    │   │   └── detectors.py
    │   ├── backbone
    │   │   ├── __init__.py
    │   │   ├── backbone.py
    │   │   └── fpn.py
    │   ├── utils.py
    │   ├── registry.py
    │   ├── balanced_positive_negative_sampler.py
    │   ├── box_coder.py
    │   └── make_layers.py
    ├── structures
    │   ├── __init__.py
    │   └── image_list.py
    ├── engine
    │   └── __init__.py
    ├── data
    │   ├── __init__.py
    │   ├── samplers
    │   │   ├── __init__.py
    │   │   ├── iteration_based_batch_sampler.py
    │   │   ├── distributed.py
    │   │   └── grouped_batch_sampler.py
    │   ├── transforms
    │   │   ├── __init__.py
    │   │   ├── build.py
    │   │   └── transforms.py
    │   ├── datasets
    │   │   ├── evaluation
    │   │   │   ├── coco
    │   │   │   │   └── __init__.py
    │   │   │   ├── __init__.py
    │   │   │   └── voc
    │   │   │   │   └── __init__.py
    │   │   ├── __init__.py
    │   │   ├── list_dataset.py
    │   │   └── concat_dataset.py
    │   ├── collate_batch.py
    │   └── README.md
    ├── layers
    │   ├── dcn
    │   │   ├── __init__.py
    │   │   └── deform_pool_func.py
    │   ├── nms.py
    │   ├── smooth_l1_loss.py
    │   ├── _utils.py
    │   ├── batch_norm.py
    │   ├── __init__.py
    │   ├── roi_pool.py
    │   ├── roi_align.py
    │   └── sigmoid_focal_loss.py
    ├── __init__.py
    ├── config
    │   └── __init__.py
    ├── solver
    │   ├── __init__.py
    │   ├── lr_scheduler.py
    │   └── build.py
    └── csrc
    │   ├── cpu
    │       ├── vision.h
    │       └── nms_cpu.cpp
    │   ├── nms.h
    │   ├── SigmoidFocalLoss.h
    │   ├── vision.cpp
    │   ├── ROIPool.h
    │   ├── ROIAlign.h
    │   ├── deform_pool.h
    │   ├── cuda
    │       ├── deform_pool_cuda.cu
    │       └── nms.cu
    │   └── deform_conv.h
├── active_strategy
    ├── utils
    │   └── __init__.py
    └── __init__.py
├── configs
    ├── README.md
    ├── coco
    │   ├── V_16_coco14.yaml
    │   ├── V_16_coco17.yaml
    │   ├── V_16_coco14_point.yaml
    │   ├── V_16_coco14_scribble.yaml
    │   └── V_16_coco14_active.yaml
    └── voc
    │   ├── V_16_voc07.yaml
    │   ├── V_16_voc12.yaml
    │   ├── V_16_voc0712.yaml
    │   └── V_16_voc07_active.yaml
├── .gitignore
├── launch_mist_coco14.sh
├── launch_mist_voc07.sh
├── inference_scripts
    ├── run_inference_coco14_val.sh
    ├── run_inference_voc07_test.sh
    ├── run_inference_coco14_train.sh
    └── run_inference_voc07_trainval.sh
├── summarize_performance.sh
└── setup.py


/images/BiB-vis.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huyvvo/BiB/HEAD/images/BiB-vis.PNG


--------------------------------------------------------------------------------
/images/BiB-pipeline.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huyvvo/BiB/HEAD/images/BiB-pipeline.png


--------------------------------------------------------------------------------
/images/BiB-results.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/huyvvo/BiB/HEAD/images/BiB-results.PNG


--------------------------------------------------------------------------------
/wetectron/utils/README.md:
--------------------------------------------------------------------------------
1 | # Utility functions
2 | 
3 | This folder contain utility functions that are not used in the
4 | core library, but are useful for building models or training
5 | code using the config system.
6 | 


--------------------------------------------------------------------------------
/wetectron/modeling/__init__.py:
--------------------------------------------------------------------------------
1 | #------------------------------------------------------------------------------
2 | # Code taken from https://github.com/NVlabs/wetectron
3 | #------------------------------------------------------------------------------
4 | 
5 | 


--------------------------------------------------------------------------------
/wetectron/structures/__init__.py:
--------------------------------------------------------------------------------
1 | #------------------------------------------------------------------------------
2 | # Code taken from https://github.com/NVlabs/wetectron
3 | #------------------------------------------------------------------------------
4 | 
5 | 


--------------------------------------------------------------------------------
/wetectron/modeling/roi_heads/__init__.py:
--------------------------------------------------------------------------------
1 | #------------------------------------------------------------------------------
2 | # Code taken from https://github.com/NVlabs/wetectron
3 | #------------------------------------------------------------------------------
4 | 
5 | 


--------------------------------------------------------------------------------
/active_strategy/utils/__init__.py:
--------------------------------------------------------------------------------
1 | #------------------------------------------------------------------------------
2 | # Code developed by Huy V. Vo and Oriane Simeoni
3 | # INRIA, Valeo.ai
4 | #------------------------------------------------------------------------------
5 | 


--------------------------------------------------------------------------------
/wetectron/modeling/rpn/retinanet/__init__.py:
--------------------------------------------------------------------------------
1 | #------------------------------------------------------------------------------
2 | # Code taken from https://github.com/NVlabs/wetectron
3 | #------------------------------------------------------------------------------
4 | 
5 | 


--------------------------------------------------------------------------------
/wetectron/modeling/roi_heads/box_head/__init__.py:
--------------------------------------------------------------------------------
1 | #------------------------------------------------------------------------------
2 | # Code taken from https://github.com/NVlabs/wetectron
3 | #------------------------------------------------------------------------------
4 | 
5 | 


--------------------------------------------------------------------------------
/wetectron/modeling/roi_heads/mask_head/__init__.py:
--------------------------------------------------------------------------------
1 | #------------------------------------------------------------------------------
2 | # Code taken from https://github.com/NVlabs/wetectron
3 | #------------------------------------------------------------------------------
4 | 
5 | 


--------------------------------------------------------------------------------
/wetectron/modeling/roi_heads/keypoint_head/__init__.py:
--------------------------------------------------------------------------------
1 | #------------------------------------------------------------------------------
2 | # Code taken from https://github.com/NVlabs/wetectron
3 | #------------------------------------------------------------------------------
4 | 
5 | 


--------------------------------------------------------------------------------
/wetectron/engine/__init__.py:
--------------------------------------------------------------------------------
1 | #------------------------------------------------------------------------------
2 | # Code taken from https://github.com/NVlabs/wetectron
3 | #------------------------------------------------------------------------------
4 | 
5 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
6 | 


--------------------------------------------------------------------------------
/active_strategy/__init__.py:
--------------------------------------------------------------------------------
1 | #------------------------------------------------------------------------------
2 | # Code developed by Huy V. Vo and Oriane Simeoni                              
3 | # INRIA, Valeo.ai                                                             
4 | #------------------------------------------------------------------------------


--------------------------------------------------------------------------------
/wetectron/data/__init__.py:
--------------------------------------------------------------------------------
1 | #------------------------------------------------------------------------------
2 | # Code taken from https://github.com/NVlabs/wetectron
3 | #------------------------------------------------------------------------------
4 | 
5 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
6 | from .build import make_data_loader
7 | 


--------------------------------------------------------------------------------
/wetectron/layers/dcn/__init__.py:
--------------------------------------------------------------------------------
1 | #------------------------------------------------------------------------------
2 | # Code taken from https://github.com/NVlabs/wetectron
3 | #------------------------------------------------------------------------------
4 | 
5 | #
6 | # Copied From [mmdetection](https://github.com/open-mmlab/mmdetection/tree/master/mmdet/ops/dcn)
7 | #
8 | 


--------------------------------------------------------------------------------
/wetectron/modeling/rpn/__init__.py:
--------------------------------------------------------------------------------
1 | #------------------------------------------------------------------------------
2 | # Code taken from https://github.com/NVlabs/wetectron
3 | #------------------------------------------------------------------------------
4 | 
5 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
6 | # from .rpn import build_rpn
7 | 


--------------------------------------------------------------------------------
/wetectron/modeling/detector/__init__.py:
--------------------------------------------------------------------------------
1 | #------------------------------------------------------------------------------
2 | # Code taken from https://github.com/NVlabs/wetectron
3 | #------------------------------------------------------------------------------
4 | 
5 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
6 | from .detectors import build_detection_model
7 | 


--------------------------------------------------------------------------------
/wetectron/modeling/backbone/__init__.py:
--------------------------------------------------------------------------------
1 | #------------------------------------------------------------------------------
2 | # Code taken from https://github.com/NVlabs/wetectron
3 | #------------------------------------------------------------------------------
4 | 
5 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
6 | from .backbone import build_backbone
7 | from . import fbnet
8 | from . import vgg16


--------------------------------------------------------------------------------
/wetectron/__init__.py:
--------------------------------------------------------------------------------
1 | #------------------------------------------------------------------------------
2 | # Code taken from https://github.com/NVlabs/wetectron
3 | #------------------------------------------------------------------------------
4 | 
5 | # --------------------------------------------------------
6 | # Copyright (C) 2020 NVIDIA Corporation. All rights reserved.
7 | # Nvidia Source Code License-NC
8 | # --------------------------------------------------------


--------------------------------------------------------------------------------
/wetectron/utils/__init__.py:
--------------------------------------------------------------------------------
1 | #------------------------------------------------------------------------------
2 | # Code taken from https://github.com/NVlabs/wetectron
3 | #------------------------------------------------------------------------------
4 | 
5 | # --------------------------------------------------------
6 | # Copyright (C) 2020 NVIDIA Corporation. All rights reserved.
7 | # Nvidia Source Code License-NC
8 | # --------------------------------------------------------


--------------------------------------------------------------------------------
/wetectron/modeling/roi_heads/weak_head/__init__.py:
--------------------------------------------------------------------------------
1 | #------------------------------------------------------------------------------
2 | # Code taken from https://github.com/NVlabs/wetectron
3 | #------------------------------------------------------------------------------
4 | 
5 | # --------------------------------------------------------
6 | # Copyright (C) 2020 NVIDIA Corporation. All rights reserved.
7 | # Nvidia Source Code License-NC
8 | # --------------------------------------------------------


--------------------------------------------------------------------------------
/configs/README.md:
--------------------------------------------------------------------------------
1 | # Configuration files
2 | 
3 | All the configs for supervised methods are removed. You can find them at markrcnn-benchmark [configs](https://github.com/facebookresearch/maskrcnn-benchmark/tree/master/configs).
4 | 
5 | Since we made changes to lots of places in this code, the performance of supervised models may also be affected.
6 | To test the performance of them, we recommend using [Detectron2](https://github.com/facebookresearch/detectron2) or original [maskrcnn-benchmark](https://github.com/facebookresearch/maskrcnn-benchmark).
7 | 


--------------------------------------------------------------------------------
/wetectron/layers/nms.py:
--------------------------------------------------------------------------------
 1 | #------------------------------------------------------------------------------
 2 | # Code taken from https://github.com/NVlabs/wetectron
 3 | #------------------------------------------------------------------------------
 4 | 
 5 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 6 | from wetectron import _C
 7 | from apex import amp
 8 | 
 9 | # Only valid with fp32 inputs - give AMP the hint
10 | nms = amp.float_function(_C.nms)
11 | 
12 | # nms.__doc__ = """
13 | # This function performs Non-maximum suppresion"""
14 | 


--------------------------------------------------------------------------------
/wetectron/config/__init__.py:
--------------------------------------------------------------------------------
 1 | #------------------------------------------------------------------------------
 2 | # Code taken from https://github.com/NVlabs/wetectron
 3 | #------------------------------------------------------------------------------
 4 | 
 5 | # --------------------------------------------------------
 6 | # Copyright (C) 2020 NVIDIA Corporation. All rights reserved.
 7 | # Nvidia Source Code License-NC
 8 | # --------------------------------------------------------
 9 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
10 | from .defaults import _C as cfg
11 | 


--------------------------------------------------------------------------------
/wetectron/modeling/detector/detectors.py:
--------------------------------------------------------------------------------
 1 | #------------------------------------------------------------------------------
 2 | # Code taken from https://github.com/NVlabs/wetectron
 3 | #------------------------------------------------------------------------------
 4 | 
 5 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 6 | from .generalized_rcnn import GeneralizedRCNN
 7 | 
 8 | 
 9 | _DETECTION_META_ARCHITECTURES = {"GeneralizedRCNN": GeneralizedRCNN}
10 | 
11 | 
12 | def build_detection_model(cfg):
13 |     meta_arch = _DETECTION_META_ARCHITECTURES[cfg.MODEL.META_ARCHITECTURE]
14 |     return meta_arch(cfg)
15 | 


--------------------------------------------------------------------------------
/wetectron/modeling/utils.py:
--------------------------------------------------------------------------------
 1 | #------------------------------------------------------------------------------
 2 | # Code taken from https://github.com/NVlabs/wetectron
 3 | #------------------------------------------------------------------------------
 4 | 
 5 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 6 | """
 7 | Miscellaneous utility functions
 8 | """
 9 | 
10 | import torch
11 | 
12 | 
13 | def cat(tensors, dim=0):
14 |     """
15 |     Efficient version of torch.cat that avoids a copy if there is only a single element in a list
16 |     """
17 |     assert isinstance(tensors, (list, tuple))
18 |     if len(tensors) == 1:
19 |         return tensors[0]
20 |     return torch.cat(tensors, dim)
21 | 


--------------------------------------------------------------------------------
/wetectron/solver/__init__.py:
--------------------------------------------------------------------------------
 1 | #------------------------------------------------------------------------------
 2 | # Code taken from https://github.com/NVlabs/wetectron
 3 | #------------------------------------------------------------------------------
 4 | 
 5 | # --------------------------------------------------------
 6 | # Copyright (C) 2020 NVIDIA Corporation. All rights reserved.
 7 | # Nvidia Source Code License-NC
 8 | # --------------------------------------------------------
 9 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
10 | from .build import make_optimizer, make_cdb_optimizer
11 | from .build import make_lr_scheduler, make_lr_cdb_scheduler
12 | from .lr_scheduler import WarmupMultiStepLR
13 | 


--------------------------------------------------------------------------------
/wetectron/data/samplers/__init__.py:
--------------------------------------------------------------------------------
 1 | #------------------------------------------------------------------------------
 2 | # Code adapted from https://github.com/NVlabs/wetectron
 3 | # by Huy V. Vo and Oriane Simeoni
 4 | # INRIA, Valeo.ai
 5 | #------------------------------------------------------------------------------
 6 | 
 7 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 8 | from .distributed import DistributedSampler, WeightedDistributedSampler
 9 | from .grouped_batch_sampler import GroupedBatchSampler
10 | from .iteration_based_batch_sampler import IterationBasedBatchSampler
11 | 
12 | __all__ = ["WeightedDistributedSampler", "DistributedSampler", "GroupedBatchSampler", "IterationBasedBatchSampler"]
13 | 


--------------------------------------------------------------------------------
/wetectron/data/transforms/__init__.py:
--------------------------------------------------------------------------------
 1 | #------------------------------------------------------------------------------
 2 | # Code taken from https://github.com/NVlabs/wetectron
 3 | #------------------------------------------------------------------------------
 4 | 
 5 | # --------------------------------------------------------
 6 | # Copyright (C) 2020 NVIDIA Corporation. All rights reserved.
 7 | # Nvidia Source Code License-NC
 8 | # --------------------------------------------------------
 9 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
10 | from .transforms import Compose
11 | from .transforms import Resize
12 | from .transforms import RandomHorizontalFlip
13 | from .transforms import ToTensor
14 | from .transforms import Normalize
15 | 
16 | from .build import build_transforms
17 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *__pycache__*
 2 | *.so
 3 | *.nfs*
 4 | 
 5 | apex/*
 6 | build/*
 7 | outputs/*
 8 | wetectron.egg-info/*
 9 | 
10 | # compilation and distribution
11 | __pycache__
12 | _ext
13 | *.pyc
14 | *.so
15 | maskrcnn_benchmark.egg-info/
16 | wetectron.egg-info/
17 | build/
18 | proposal
19 | datasets
20 | !wetectron/data/datasets
21 | dist/
22 | cache/
23 | apex/
24 | cocoapi/
25 | output/
26 | runs/
27 | configs/weak_old
28 | configs/voc_ignore
29 | outputs
30 | 
31 | 
32 | # pytorch/python/numpy formats
33 | *.pth
34 | *.pkl
35 | *.npy
36 | 
37 | # ipython/jupyter notebooks
38 | *.ipynb
39 | **/.ipynb_checkpoints/
40 | 
41 | # Editor temporaries
42 | *.swn
43 | *.swo
44 | *.swp
45 | *~
46 | 
47 | # Pycharm editor settings
48 | .idea
49 | 
50 | # vscode editor settings
51 | .vscode
52 | 
53 | # MacOS
54 | .DS_Store
55 | 


--------------------------------------------------------------------------------
/wetectron/layers/smooth_l1_loss.py:
--------------------------------------------------------------------------------
 1 | #------------------------------------------------------------------------------
 2 | # Code taken from https://github.com/NVlabs/wetectron
 3 | #------------------------------------------------------------------------------
 4 | 
 5 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 6 | import torch
 7 | 
 8 | def smooth_l1_loss(input, target, beta=1. / 9, size_average=True, reduction=True):
 9 |     """
10 |     very similar to the smooth_l1_loss from pytorch, but with
11 |     the extra beta parameter
12 |     """
13 |     n = torch.abs(input - target)
14 |     cond = n < beta
15 |     loss = torch.where(cond, 0.5 * n ** 2 / beta, n - 0.5 * beta)
16 |     if reduction == False:
17 |         return loss
18 |     if size_average:
19 |         return loss.mean()
20 |     return loss.sum()
21 | 


--------------------------------------------------------------------------------
/launch_mist_coco14.sh:
--------------------------------------------------------------------------------
 1 | #------------------------------------------------------------------------------
 2 | # Code adapted from https://github.com/NVlabs/wetectron
 3 | # by Huy V. Vo and Oriane Simeoni                              
 4 | # INRIA, Valeo.ai                                                             
 5 | #------------------------------------------------------------------------------
 6 | 
 7 | source ~/.bashrc
 8 | conda deactivate
 9 | conda activate bib
10 | 
11 | NUM_GPUS="$1"
12 | OUTPUT_DIR="$2"
13 | 
14 | python -m torch.distributed.launch --nproc_per_node=$NUM_GPUS tools/train_active_net.py \
15 |     --config-file "configs/coco/V_16_coco14.yaml" --use-tensorboard \
16 |     SOLVER.CHECKPOINT_PERIOD 20000 OUTPUT_DIR "$OUTPUT_DIR" \
17 |     SOLVER.ITER_SIZE "$((8/NUM_GPUS))" SOLVER.IMS_PER_BATCH "$NUM_GPUS" TEST.IMS_PER_BATCH "$((2*NUM_GPUS))"
18 | 


--------------------------------------------------------------------------------
/launch_mist_voc07.sh:
--------------------------------------------------------------------------------
 1 | #------------------------------------------------------------------------------
 2 | # Code adapted from https://github.com/NVlabs/wetectron
 3 | # by Huy V. Vo and Oriane Simeoni                              
 4 | # INRIA, Valeo.ai                                                             
 5 | #------------------------------------------------------------------------------
 6 | 
 7 | source ~/.bashrc
 8 | conda deactivate
 9 | conda activate bib
10 | 
11 | NUM_GPUS="$1"
12 | OUTPUT_DIR="$2"
13 | 
14 | python -m torch.distributed.launch --nproc_per_node=$NUM_GPUS tools/train_active_net.py \
15 |     --config-file "configs/voc/V_16_voc07.yaml" --use-tensorboard \
16 |     SOLVER.CHECKPOINT_PERIOD 6000 OUTPUT_DIR "$OUTPUT_DIR" \
17 |     SOLVER.ITER_SIZE "$((8/NUM_GPUS))" SOLVER.IMS_PER_BATCH "$NUM_GPUS" TEST.IMS_PER_BATCH "$((2*NUM_GPUS))"
18 | 


--------------------------------------------------------------------------------
/wetectron/data/datasets/evaluation/coco/__init__.py:
--------------------------------------------------------------------------------
 1 | #------------------------------------------------------------------------------
 2 | # Code taken from https://github.com/NVlabs/wetectron
 3 | #------------------------------------------------------------------------------
 4 | 
 5 | from .coco_eval import do_coco_evaluation
 6 | 
 7 | 
 8 | def coco_evaluation(
 9 |     dataset,
10 |     predictions,
11 |     output_folder,
12 |     box_only,
13 |     iou_types,
14 |     expected_results,
15 |     expected_results_sigma_tol,
16 | ):
17 |     return do_coco_evaluation(
18 |         dataset=dataset,
19 |         predictions=predictions,
20 |         box_only=box_only,
21 |         output_folder=output_folder,
22 |         iou_types=iou_types,
23 |         expected_results=expected_results,
24 |         expected_results_sigma_tol=expected_results_sigma_tol,
25 |     )
26 | 


--------------------------------------------------------------------------------
/wetectron/utils/collect_env.py:
--------------------------------------------------------------------------------
 1 | #------------------------------------------------------------------------------
 2 | # Code taken from https://github.com/NVlabs/wetectron
 3 | #------------------------------------------------------------------------------
 4 | 
 5 | # --------------------------------------------------------
 6 | # Copyright (C) 2020 NVIDIA Corporation. All rights reserved.
 7 | # Nvidia Source Code License-NC
 8 | # --------------------------------------------------------
 9 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
10 | import PIL
11 | 
12 | from torch.utils.collect_env import get_pretty_env_info
13 | 
14 | 
15 | def get_pil_version():
16 |     return "\n        Pillow ({})".format(PIL.__version__)
17 | 
18 | 
19 | def collect_env_info():
20 |     env_str = get_pretty_env_info()
21 |     env_str += get_pil_version()
22 |     return env_str
23 | 


--------------------------------------------------------------------------------
/wetectron/csrc/cpu/vision.h:
--------------------------------------------------------------------------------
 1 | //------------------------------------------------------------------------------
 2 | // Code taken from https://github.com/NVlabs/wetectron
 3 | //------------------------------------------------------------------------------
 4 | 
 5 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 6 | #pragma once
 7 | #include <torch/extension.h>
 8 | 
 9 | 
10 | at::Tensor ROIAlign_forward_cpu(const at::Tensor& input,
11 |                                 const at::Tensor& rois,
12 |                                 const float spatial_scale,
13 |                                 const int pooled_height,
14 |                                 const int pooled_width,
15 |                                 const int sampling_ratio);
16 | 
17 | 
18 | at::Tensor nms_cpu(const at::Tensor& dets,
19 |                    const at::Tensor& scores,
20 |                    const float threshold);
21 | 


--------------------------------------------------------------------------------
/wetectron/data/datasets/__init__.py:
--------------------------------------------------------------------------------
 1 | #------------------------------------------------------------------------------
 2 | # Code adapted from https://github.com/NVlabs/wetectron
 3 | # by Huy V. Vo and Oriane Simeoni
 4 | # INRIA, Valeo.ai
 5 | #------------------------------------------------------------------------------
 6 | 
 7 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 8 | from .coco import COCODataset
 9 | from .voc import PascalVOCDataset
10 | from .concat_dataset import ConcatDataset
11 | 
12 | def _isinstance(dataset, dataset_type):
13 |     if isinstance(dataset, ConcatDataset):
14 |         membership = [isinstance(dataset.datasets[i], dataset_type) for i in range(len(dataset.datasets))]
15 |         assert(membership.count(membership[0]) == len(membership))
16 |         return membership[0]
17 |     else:
18 |         return isinstance(dataset, dataset_type)
19 | 
20 | __all__ = ["COCODataset", "ConcatDataset", "PascalVOCDataset", "_isinstance"]
21 | 


--------------------------------------------------------------------------------
/wetectron/modeling/registry.py:
--------------------------------------------------------------------------------
 1 | #------------------------------------------------------------------------------
 2 | # Code taken from https://github.com/NVlabs/wetectron
 3 | #------------------------------------------------------------------------------
 4 | 
 5 | # --------------------------------------------------------
 6 | # Copyright (C) 2020 NVIDIA Corporation. All rights reserved.
 7 | # Nvidia Source Code License-NC
 8 | # --------------------------------------------------------
 9 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
10 | 
11 | from wetectron.utils.registry import Registry
12 | 
13 | BACKBONES = Registry()
14 | RPN_HEADS = Registry()
15 | ROI_BOX_FEATURE_EXTRACTORS = Registry()
16 | ROI_BOX_PREDICTOR = Registry()
17 | ROI_KEYPOINT_FEATURE_EXTRACTORS = Registry()
18 | ROI_KEYPOINT_PREDICTOR = Registry()
19 | ROI_MASK_FEATURE_EXTRACTORS = Registry()
20 | ROI_MASK_PREDICTOR = Registry()
21 | 
22 | ROI_WEAK_PREDICTOR = Registry()
23 | ROI_WEAK_LOSS = Registry()


--------------------------------------------------------------------------------
/wetectron/csrc/nms.h:
--------------------------------------------------------------------------------
 1 | //------------------------------------------------------------------------------
 2 | // Code taken from https://github.com/NVlabs/wetectron
 3 | //------------------------------------------------------------------------------
 4 | 
 5 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 6 | #pragma once
 7 | #include "cpu/vision.h"
 8 | 
 9 | #ifdef WITH_CUDA
10 | #include "cuda/vision.h"
11 | #endif
12 | 
13 | 
14 | at::Tensor nms(const at::Tensor& dets,
15 |                const at::Tensor& scores,
16 |                const float threshold) {
17 | 
18 |   if (dets.is_cuda()) {
19 | #ifdef WITH_CUDA
20 |     // TODO raise error if not compiled with CUDA
21 |     if (dets.numel() == 0)
22 |       return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU));
23 |     auto b = at::cat({dets, scores.unsqueeze(1)}, 1);
24 |     return nms_cuda(b, threshold);
25 | #else
26 |     AT_ERROR("Not compiled with GPU support");
27 | #endif
28 |   }
29 | 
30 |   at::Tensor result = nms_cpu(dets, scores, threshold);
31 |   return result;
32 | }
33 | 


--------------------------------------------------------------------------------
/wetectron/utils/cv2_util.py:
--------------------------------------------------------------------------------
 1 | #------------------------------------------------------------------------------
 2 | # Code taken from https://github.com/NVlabs/wetectron
 3 | #------------------------------------------------------------------------------
 4 | 
 5 | # --------------------------------------------------------
 6 | # Copyright (C) 2020 NVIDIA Corporation. All rights reserved.
 7 | # Nvidia Source Code License-NC
 8 | # --------------------------------------------------------
 9 | """
10 | Module for cv2 utility functions and maintaining version compatibility
11 | between 3.x and 4.x
12 | """
13 | import cv2
14 | 
15 | 
16 | def findContours(*args, **kwargs):
17 |     """
18 |     Wraps cv2.findContours to maintain compatiblity between versions
19 |     3 and 4
20 | 
21 |     Returns:
22 |         contours, hierarchy
23 |     """
24 |     if cv2.__version__.startswith('4'):
25 |         contours, hierarchy = cv2.findContours(*args, **kwargs)
26 |     elif cv2.__version__.startswith('3'):
27 |         _, contours, hierarchy = cv2.findContours(*args, **kwargs)
28 |     else:
29 |         raise AssertionError(
30 |             'cv2 must be either version 3 or 4 to call this method')
31 | 
32 |     return contours, hierarchy
33 | 


--------------------------------------------------------------------------------
/configs/coco/V_16_coco14.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://VGGImageNetPretrained/JCJOHNS/VGG-16" 
 4 |   BACKBONE:
 5 |     CONV_BODY: "VGG16-OICR"
 6 |   WSOD_ON: True
 7 |   FASTER_RCNN: False
 8 |   ROI_HEADS:
 9 |     SCORE_THRESH: 0.0
10 |     NMS: 0.3
11 |   ROI_BOX_HEAD:
12 |     POOLER_METHOD: "ROIPool"
13 |     POOLER_RESOLUTION: 7
14 |     POOLER_SCALES: (0.125,)
15 |     FEATURE_EXTRACTOR: "VGG16.roi_head"
16 |   ROI_WEAK_HEAD:
17 |     PREDICTOR: "MISTPredictor"
18 |     LOSS: "RoIRegLoss"
19 |     OICR_P: 0.15
20 |     REGRESS_ON: True
21 | DATASETS:
22 |   TRAIN: ("coco_2014_train",)
23 |   TEST: ("coco_2014_val",)
24 | PROPOSAL_FILES:
25 |   TRAIN: ('proposal/MCG-coco_2014_train-boxes.pkl',)
26 |   TEST:  ('proposal/MCG-coco_2014_val-boxes.pkl',)
27 | DATALOADER:
28 |   SIZE_DIVISIBILITY: 32
29 | INPUT:
30 |   MIN_SIZE_TRAIN: (480, 576, 688, 864, 1000, 1200) 
31 |   MAX_SIZE_TRAIN: 2000
32 |   MIN_SIZE_TEST: 800
33 |   MAX_SIZE_TEST: 2000
34 | SOLVER:
35 |   IMS_PER_BATCH: 8 
36 |   BASE_LR: 0.01 
37 |   WEIGHT_DECAY: 0.0001
38 |   STEPS: (90000, 120000)
39 |   MAX_ITER: 130000
40 |   CHECKPOINT_PERIOD: 10000
41 | TEST:
42 |   BBOX_AUG:
43 |     ENABLED: True
44 |     HEUR: "AVG"
45 |     H_FLIP: True
46 |     SCALES: (480, 576, 688, 864, 1000, 1200) 
47 |     MAX_SIZE: 2000
48 |     SCALE_H_FLIP: True


--------------------------------------------------------------------------------
/wetectron/data/datasets/list_dataset.py:
--------------------------------------------------------------------------------
 1 | #------------------------------------------------------------------------------
 2 | # Code taken from https://github.com/NVlabs/wetectron
 3 | #------------------------------------------------------------------------------
 4 | 
 5 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 6 | """
 7 | Simple dataset class that wraps a list of path names
 8 | """
 9 | 
10 | from PIL import Image
11 | 
12 | from wetectron.structures.bounding_box import BoxList
13 | 
14 | 
15 | class ListDataset(object):
16 |     def __init__(self, image_lists, transforms=None):
17 |         self.image_lists = image_lists
18 |         self.transforms = transforms
19 | 
20 |     def __getitem__(self, item):
21 |         img = Image.open(self.image_lists[item]).convert("RGB")
22 | 
23 |         # dummy target
24 |         w, h = img.size
25 |         target = BoxList([[0, 0, w, h]], img.size, mode="xyxy")
26 | 
27 |         if self.transforms is not None:
28 |             img, target = self.transforms(img, target)
29 | 
30 |         return img, target
31 | 
32 |     def __len__(self):
33 |         return len(self.image_lists)
34 | 
35 |     def get_img_info(self, item):
36 |         """
37 |         Return the image dimensions for the image, without
38 |         loading and pre-processing it
39 |         """
40 |         pass
41 | 


--------------------------------------------------------------------------------
/wetectron/utils/imports.py:
--------------------------------------------------------------------------------
 1 | #------------------------------------------------------------------------------
 2 | # Code taken from https://github.com/NVlabs/wetectron
 3 | #------------------------------------------------------------------------------
 4 | 
 5 | # --------------------------------------------------------
 6 | # Copyright (C) 2020 NVIDIA Corporation. All rights reserved.
 7 | # Nvidia Source Code License-NC
 8 | # --------------------------------------------------------
 9 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
10 | import torch
11 | 
12 | if torch._six.PY3:
13 |     import importlib
14 |     import importlib.util
15 |     import sys
16 | 
17 |     # from https://stackoverflow.com/questions/67631/how-to-import-a-module-given-the-full-path?utm_medium=organic&utm_source=google_rich_qa&utm_campaign=google_rich_qa
18 |     def import_file(module_name, file_path, make_importable=False):
19 |         spec = importlib.util.spec_from_file_location(module_name, file_path)
20 |         module = importlib.util.module_from_spec(spec)
21 |         spec.loader.exec_module(module)
22 |         if make_importable:
23 |             sys.modules[module_name] = module
24 |         return module
25 | else:
26 |     import imp
27 | 
28 |     def import_file(module_name, file_path, make_importable=None):
29 |         module = imp.load_source(module_name, file_path)
30 |         return module
31 | 


--------------------------------------------------------------------------------
/configs/coco/V_16_coco17.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://VGGImageNetPretrained/JCJOHNS/VGG-16" 
 4 |   BACKBONE:
 5 |     CONV_BODY: "VGG16-OICR"
 6 |   WSOD_ON: True
 7 |   FASTER_RCNN: False
 8 |   ROI_HEADS:
 9 |     SCORE_THRESH: 0.0
10 |     NMS: 0.3
11 |   ROI_BOX_HEAD:
12 |     POOLER_METHOD: "ROIPool"
13 |     POOLER_RESOLUTION: 7
14 |     POOLER_SCALES: (0.125,)
15 |     FEATURE_EXTRACTOR: "VGG16.roi_head"
16 |   ROI_WEAK_HEAD:
17 |     PREDICTOR: "MISTPredictor"
18 |     LOSS: "RoIRegLoss"
19 |     OICR_P: 0.15
20 |     REGRESS_ON: True
21 | DATASETS:
22 |   TRAIN: ("coco_2014_train", "coco_2014_valminusminival")
23 |   TEST: ("coco_2014_minival",)
24 | PROPOSAL_FILES:
25 |   TRAIN: ('proposal/MCG-coco_2014_train-boxes.pkl', 'proposal/MCG-coco_2014_valminusminival-boxes.pkl')
26 |   TEST:  ('proposal/MCG-coco_2014_minival-boxes.pkl',)
27 | DATALOADER:
28 |   SIZE_DIVISIBILITY: 32
29 | INPUT:
30 |   MIN_SIZE_TRAIN: (480, 576, 688, 864, 1000, 1200) 
31 |   MAX_SIZE_TRAIN: 2000
32 |   MIN_SIZE_TEST: 800
33 |   MAX_SIZE_TEST: 2000
34 | SOLVER:
35 |   IMS_PER_BATCH: 8 
36 |   BASE_LR: 0.01 
37 |   WEIGHT_DECAY: 0.0001
38 |   STEPS: (90000, 120000)
39 |   MAX_ITER: 130000
40 |   CHECKPOINT_PERIOD: 10000
41 | TEST:
42 |   BBOX_AUG:
43 |     ENABLED: True
44 |     HEUR: "AVG"
45 |     H_FLIP: True
46 |     SCALES: (480, 576, 688, 864, 1000, 1200) 
47 |     MAX_SIZE: 2000
48 |     SCALE_H_FLIP: True


--------------------------------------------------------------------------------
/wetectron/layers/_utils.py:
--------------------------------------------------------------------------------
 1 | #------------------------------------------------------------------------------
 2 | # Code taken from https://github.com/NVlabs/wetectron
 3 | #------------------------------------------------------------------------------
 4 | 
 5 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 6 | import glob
 7 | import os.path
 8 | import torch
 9 | from torch.utils.cpp_extension import load as load_ext
10 | from torch.utils.cpp_extension import CUDA_HOME
11 | 
12 | 
13 | def _load_C_extensions():
14 |     this_dir = os.path.dirname(os.path.abspath(__file__))
15 |     this_dir = os.path.dirname(this_dir)
16 |     this_dir = os.path.join(this_dir, "csrc")
17 | 
18 |     main_file = glob.glob(os.path.join(this_dir, "*.cpp"))
19 |     source_cpu = glob.glob(os.path.join(this_dir, "cpu", "*.cpp"))
20 |     source_cuda = glob.glob(os.path.join(this_dir, "cuda", "*.cu"))
21 | 
22 |     source = main_file + source_cpu
23 | 
24 |     extra_cflags = []
25 |     if torch.cuda.is_available() and CUDA_HOME is not None:
26 |         source.extend(source_cuda)
27 |         extra_cflags = ["-DWITH_CUDA"]
28 |     source = [os.path.join(this_dir, s) for s in source]
29 |     extra_include_paths = [this_dir]
30 |     return load_ext(
31 |         "torchvision",
32 |         source,
33 |         extra_cflags=extra_cflags,
34 |         extra_include_paths=extra_include_paths,
35 |     )
36 | 
37 | 
38 | _C = _load_C_extensions()
39 | 


--------------------------------------------------------------------------------
/configs/coco/V_16_coco14_point.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://VGGImageNetPretrained/JCJOHNS/VGG-16" 
 4 |   BACKBONE:
 5 |     CONV_BODY: "VGG16-OICR"
 6 |   WSOD_ON: True
 7 |   FASTER_RCNN: False
 8 |   ROI_HEADS:
 9 |     BATCH_SIZE_PER_IMAGE: 1024
10 |     SCORE_THRESH: 0.0
11 |     NMS: 0.3
12 |   ROI_BOX_HEAD:
13 |     POOLER_METHOD: "ROIPool"
14 |     POOLER_RESOLUTION: 7
15 |     POOLER_SCALES: (0.125,)
16 |     FEATURE_EXTRACTOR: "VGG16.roi_head"
17 |   ROI_WEAK_HEAD:
18 |     PARTIAL_LABELS: 'point'
19 |     ROI_LOSS_REFINE: True
20 |     PREDICTOR: "MISTPredictor"
21 |     LOSS: "RoIRegLoss"
22 |     OICR_P: 0.15
23 |     REGRESS_ON: True
24 | DATASETS:
25 |   TRAIN: ("coco_2014_train_partial", )
26 |   TEST: ("coco_2014_val",)
27 | PROPOSAL_FILES:
28 |   TRAIN: ('proposal/MCG-coco_2014_train-boxes.pkl', )
29 |   TEST:  ('proposal/MCG-coco_2014_val-boxes.pkl',)
30 | DATALOADER:
31 |   SIZE_DIVISIBILITY: 32
32 | INPUT:
33 |   MIN_SIZE_TRAIN: (480, 576, 688, 864, 1000, 1200) 
34 |   MAX_SIZE_TRAIN: 2000
35 |   MIN_SIZE_TEST: 800
36 |   MAX_SIZE_TEST: 2000
37 | SOLVER:
38 |   IMS_PER_BATCH: 8 
39 |   BASE_LR: 0.01 
40 |   WEIGHT_DECAY: 0.0001
41 |   STEPS: (90000, 120000)
42 |   MAX_ITER: 130000
43 |   CHECKPOINT_PERIOD: 10000
44 | TEST:
45 |   BBOX_AUG:
46 |     ENABLED: True
47 |     HEUR: "AVG"
48 |     H_FLIP: True
49 |     SCALES: (480, 576, 688, 864, 1000, 1200) 
50 |     MAX_SIZE: 2000
51 |     SCALE_H_FLIP: True


--------------------------------------------------------------------------------
/configs/coco/V_16_coco14_scribble.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://VGGImageNetPretrained/JCJOHNS/VGG-16" 
 4 |   BACKBONE:
 5 |     CONV_BODY: "VGG16-OICR"
 6 |   WSOD_ON: True
 7 |   FASTER_RCNN: False
 8 |   ROI_HEADS:
 9 |     BATCH_SIZE_PER_IMAGE: 1024
10 |     SCORE_THRESH: 0.0
11 |     NMS: 0.3
12 |   ROI_BOX_HEAD:
13 |     POOLER_METHOD: "ROIPool"
14 |     POOLER_RESOLUTION: 7
15 |     POOLER_SCALES: (0.125,)
16 |     FEATURE_EXTRACTOR: "VGG16.roi_head"
17 |   ROI_WEAK_HEAD:
18 |     PARTIAL_LABELS: 'scribble'
19 |     ROI_LOSS_REFINE: True
20 |     PREDICTOR: "MISTPredictor"
21 |     LOSS: "RoIRegLoss"
22 |     OICR_P: 0.15
23 |     REGRESS_ON: True
24 | DATASETS:
25 |   TRAIN: ("coco_2014_train_partial", )
26 |   TEST: ("coco_2014_val",)
27 | PROPOSAL_FILES:
28 |   TRAIN: ('proposal/MCG-coco_2014_train-boxes.pkl', )
29 |   TEST:  ('proposal/MCG-coco_2014_val-boxes.pkl',)
30 | DATALOADER:
31 |   SIZE_DIVISIBILITY: 32
32 | INPUT:
33 |   MIN_SIZE_TRAIN: (480, 576, 688, 864, 1000, 1200) 
34 |   MAX_SIZE_TRAIN: 2000
35 |   MIN_SIZE_TEST: 800
36 |   MAX_SIZE_TEST: 2000
37 | SOLVER:
38 |   IMS_PER_BATCH: 8 
39 |   BASE_LR: 0.01 
40 |   WEIGHT_DECAY: 0.0001
41 |   STEPS: (90000, 120000)
42 |   MAX_ITER: 130000
43 |   CHECKPOINT_PERIOD: 10000
44 | TEST:
45 |   BBOX_AUG:
46 |     ENABLED: True
47 |     HEUR: "AVG"
48 |     H_FLIP: True
49 |     SCALES: (480, 576, 688, 864, 1000, 1200) 
50 |     MAX_SIZE: 2000
51 |     SCALE_H_FLIP: True


--------------------------------------------------------------------------------
/wetectron/csrc/SigmoidFocalLoss.h:
--------------------------------------------------------------------------------
 1 | //------------------------------------------------------------------------------
 2 | // Code taken from https://github.com/NVlabs/wetectron
 3 | //------------------------------------------------------------------------------
 4 | 
 5 | #pragma once
 6 | 
 7 | #include "cpu/vision.h"
 8 | 
 9 | #ifdef WITH_CUDA
10 | #include "cuda/vision.h"
11 | #endif
12 | 
13 | // Interface for Python
14 | at::Tensor SigmoidFocalLoss_forward(
15 | 		const at::Tensor& logits,
16 |                 const at::Tensor& targets,
17 | 		const int num_classes, 
18 | 		const float gamma, 
19 | 		const float alpha) {
20 |   if (logits.is_cuda()) {
21 | #ifdef WITH_CUDA
22 |     return SigmoidFocalLoss_forward_cuda(logits, targets, num_classes, gamma, alpha);
23 | #else
24 |     AT_ERROR("Not compiled with GPU support");
25 | #endif
26 |   }
27 |   AT_ERROR("Not implemented on the CPU");
28 | }
29 | 
30 | at::Tensor SigmoidFocalLoss_backward(
31 | 			     const at::Tensor& logits,
32 |                              const at::Tensor& targets,
33 | 			     const at::Tensor& d_losses,
34 | 			     const int num_classes,
35 | 			     const float gamma,
36 | 			     const float alpha) {
37 |   if (logits.is_cuda()) {
38 | #ifdef WITH_CUDA
39 |     return SigmoidFocalLoss_backward_cuda(logits, targets, d_losses, num_classes, gamma, alpha);
40 | #else
41 |     AT_ERROR("Not compiled with GPU support");
42 | #endif
43 |   }
44 |   AT_ERROR("Not implemented on the CPU");
45 | }
46 | 


--------------------------------------------------------------------------------
/wetectron/layers/batch_norm.py:
--------------------------------------------------------------------------------
 1 | #------------------------------------------------------------------------------
 2 | # Code taken from https://github.com/NVlabs/wetectron
 3 | #------------------------------------------------------------------------------
 4 | 
 5 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 6 | import torch
 7 | from torch import nn
 8 | 
 9 | 
10 | class FrozenBatchNorm2d(nn.Module):
11 |     """
12 |     BatchNorm2d where the batch statistics and the affine parameters
13 |     are fixed
14 |     """
15 | 
16 |     def __init__(self, n):
17 |         super(FrozenBatchNorm2d, self).__init__()
18 |         self.register_buffer("weight", torch.ones(n))
19 |         self.register_buffer("bias", torch.zeros(n))
20 |         self.register_buffer("running_mean", torch.zeros(n))
21 |         self.register_buffer("running_var", torch.ones(n))
22 | 
23 |     def forward(self, x):
24 |         # Cast all fixed parameters to half() if necessary
25 |         if x.dtype == torch.float16:
26 |             self.weight = self.weight.half()
27 |             self.bias = self.bias.half()
28 |             self.running_mean = self.running_mean.half()
29 |             self.running_var = self.running_var.half()
30 | 
31 |         scale = self.weight * self.running_var.rsqrt()
32 |         bias = self.bias - self.running_mean * scale
33 |         scale = scale.reshape(1, -1, 1, 1)
34 |         bias = bias.reshape(1, -1, 1, 1)
35 |         return x * scale + bias
36 | 


--------------------------------------------------------------------------------
/inference_scripts/run_inference_coco14_val.sh:
--------------------------------------------------------------------------------
 1 | #------------------------------------------------------------------------------
 2 | # Code developed by Huy V. Vo and Oriane Simeoni                              
 3 | # INRIA, Valeo.ai                                                             
 4 | #------------------------------------------------------------------------------
 5 | 
 6 | source ~/.bashrc
 7 | conda deactivate
 8 | conda activate bib
 9 | 
10 | echo "$(which conda)"
11 | echo "$(which python)"
12 | 
13 | if [ -z "$3" ] 
14 | 	then
15 | 	num_gpus=1
16 | else
17 | 	num_gpus="$3"
18 | fi
19 | 
20 | if [ -z "$4" ] 
21 |   then
22 |   enable_aug="False"
23 | else
24 |   enable_aug="$4"
25 | fi
26 | 
27 | if [ -z "$5" ]
28 |   then
29 |   test_per_gpu=2
30 | else
31 |   test_per_gpu="$5"
32 | fi
33 | 
34 | if [ -z "$6" ]
35 |   then
36 |   PORT=82124
37 | else
38 |   PORT="$6"
39 | fi
40 | 
41 | exp_name="$1"
42 | model_name="$2"
43 | echo "$exp_name" "$model_name" "$num_gpus" "$enable_aug" "$test_per_gpu" "$PORT"
44 | 
45 | python -m torch.distributed.launch --nproc_per_node=$num_gpus --master_port=$PORT tools/test_net.py \
46 |   --config-file "$exp_name"/config.yml \
47 |   MODEL.WEIGHT "$exp_name"/ckpt/"$model_name" \
48 |   OUTPUT_DIR "$exp_name" \
49 |   TEST.IMS_PER_BATCH $[2*$num_gpus] \
50 |   TEST.BBOX_AUG.ENABLED "$enable_aug" \
51 |   TEST.CONCAT_DATASETS True \
52 |   DATASETS.TEST '("coco_2014_val", )' \
53 |   PROPOSAL_FILES.TEST '("proposal/MCG-coco_2014_val-boxes.pkl", )'
54 | 


--------------------------------------------------------------------------------
/inference_scripts/run_inference_voc07_test.sh:
--------------------------------------------------------------------------------
 1 | #------------------------------------------------------------------------------
 2 | # Code developed by Huy V. Vo and Oriane Simeoni                              
 3 | # INRIA, Valeo.ai                                                             
 4 | #------------------------------------------------------------------------------
 5 | 
 6 | source ~/.bashrc
 7 | conda deactivate
 8 | conda activate bib
 9 | 
10 | echo "$(which conda)"
11 | echo "$(which python)"
12 | 
13 | if [ -z "$3" ] 
14 | 	then
15 | 	num_gpus=1
16 | else
17 | 	num_gpus="$3"
18 | fi
19 | 
20 | if [ -z "$4" ] 
21 |   then
22 |   enable_aug="False"
23 | else
24 |   enable_aug="$4"
25 | fi
26 | 
27 | if [ -z "$5" ]
28 |   then
29 |   test_per_gpu=2
30 | else
31 |   test_per_gpu="$5"
32 | fi
33 | 
34 | if [ -z "$6" ]
35 |   then
36 |   PORT=82124
37 | else
38 |   PORT="$6"
39 | fi
40 | 
41 | exp_name="$1"
42 | model_name="$2"
43 | echo "$exp_name" "$model_name" "$num_gpus" "$enable_aug" "$test_per_gpu" "$PORT"
44 | 
45 | python -m torch.distributed.launch --nproc_per_node=$num_gpus --master_port=$PORT tools/test_net.py \
46 |   --config-file "$exp_name"/config.yml \
47 |   MODEL.WEIGHT "$exp_name"/ckpt/"$model_name" \
48 |   OUTPUT_DIR "$exp_name" \
49 |   TEST.IMS_PER_BATCH $[$test_per_gpu*$num_gpus] \
50 |   TEST.BBOX_AUG.ENABLED "$enable_aug" \
51 |   TEST.CONCAT_DATASETS "True" \
52 |   DATASETS.TEST '("voc_2007_test",)' \
53 |   PROPOSAL_FILES.TEST '("proposal/SS-voc_2007_test-boxes.pkl",)' \
54 |   ACTIVE.INPUT_FILE None
55 | 


--------------------------------------------------------------------------------
/wetectron/data/datasets/evaluation/__init__.py:
--------------------------------------------------------------------------------
 1 | #------------------------------------------------------------------------------
 2 | # Code adapted from https://github.com/NVlabs/wetectron
 3 | # by Huy V. Vo and Oriane Simeoni
 4 | # INRIA, Valeo.ai
 5 | #------------------------------------------------------------------------------
 6 | 
 7 | from wetectron.data import datasets
 8 | from .coco import coco_evaluation
 9 | from .voc import voc_evaluation
10 | 
11 | def evaluate(dataset, predictions, output_folder, task='det', **kwargs):
12 |     """
13 |     Evaluate dataset using different methods based on dataset type.
14 |     Args:
15 |         dataset: Dataset object
16 |         predictions(list[BoxList]): each item in the list represents the
17 |             prediction results for one image.
18 |         output_folder: output folder, to save evaluation files or results.
19 |         **kwargs: other args.
20 |     Returns:
21 |         evaluation result
22 |     """
23 |     args = dict(
24 |         dataset=dataset, predictions=predictions, output_folder=output_folder, **kwargs
25 |     )
26 |     if datasets._isinstance(dataset, datasets.COCODataset) and "voc_2012" not in dataset.ann_file:
27 |         return coco_evaluation(**args)
28 |     elif datasets._isinstance(dataset, datasets.PascalVOCDataset) or "voc_2012" in dataset.ann_file:
29 |         args['task'] = task
30 |         return voc_evaluation(**args)
31 |     else:
32 |         dataset_name = dataset.__class__.__name__
33 |         raise NotImplementedError("Unsupported dataset type {}.".format(dataset_name))
34 | 


--------------------------------------------------------------------------------
/wetectron/data/samplers/iteration_based_batch_sampler.py:
--------------------------------------------------------------------------------
 1 | #------------------------------------------------------------------------------
 2 | # Code taken from https://github.com/NVlabs/wetectron
 3 | #------------------------------------------------------------------------------
 4 | 
 5 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 6 | from torch.utils.data.sampler import BatchSampler
 7 | 
 8 | 
 9 | class IterationBasedBatchSampler(BatchSampler):
10 |     """
11 |     Wraps a BatchSampler, resampling from it until
12 |     a specified number of iterations have been sampled
13 |     """
14 | 
15 |     def __init__(self, batch_sampler, num_iterations, start_iter=0):
16 |         self.batch_sampler = batch_sampler
17 |         self.num_iterations = num_iterations
18 |         self.start_iter = start_iter
19 | 
20 |     def __iter__(self):
21 |         iteration = self.start_iter
22 |         while iteration <= self.num_iterations:
23 |             # if the underlying sampler has a set_epoch method, like
24 |             # DistributedSampler, used for making each process see
25 |             # a different split of the dataset, then set it
26 |             if hasattr(self.batch_sampler.sampler, "set_epoch"):
27 |                 self.batch_sampler.sampler.set_epoch(iteration)
28 |             for batch in self.batch_sampler:
29 |                 iteration += 1
30 |                 if iteration > self.num_iterations:
31 |                     break
32 |                 yield batch
33 | 
34 |     def __len__(self):
35 |         return self.num_iterations
36 | 


--------------------------------------------------------------------------------
/summarize_performance.sh:
--------------------------------------------------------------------------------
 1 | # Read arguments
 2 | 
 3 | DATASET="$1" # 'voc07' or 'coco14'
 4 | OUTPUT_DIR="$2"
 5 | NUM_CYCLE="$3"
 6 | BUDGET="$4"
 7 | 
 8 | echo "Average and standard deviation of the performance on "$OUTPUT_DIR":"
 9 | if [[ "$DATASET" == "voc07" ]]
10 | then 
11 |   for cycle in $(seq 1 $NUM_CYCLE)
12 |   do 
13 |     echo -n "Cycle $cycle: "
14 |     find $OUTPUT_DIR -wholename "*ver*_"$((cycle * BUDGET))"_images/inference*test*/result.txt" -not -wholename "*bbox_aug/result*" \
15 |          -exec head -1 {} \; | awk '{m+=$2;n+=(($2)^2)}END{printf "%.1f +/- %.1f\n", 100*m/NR, 100*sqrt(n/(NR) - ((m/NR)^2) ) }'
16 |   done
17 | elif [[ "$DATASET" == "coco14" ]]
18 | then
19 |   echo "Cycle           AP              AP50"
20 |   for cycle in $(seq 1 $NUM_CYCLE)
21 |   do
22 |     echo -n "$cycle" "              "
23 |     for f in $(find $OUTPUT_DIR -wholename "*cycle"$cycle"*val*/coco_results*" -not -wholename "*bbox_aug/coco*")
24 |     do 
25 |       python -c "import torch; a = torch.load('"$f"'); print(a.results['bbox']['AP']*100)"
26 |     done | awk '{m+=$1;n+=(($1)^2 )}END{printf "%.1f +/- %.1f %-3s", m/NR, sqrt(n/NR - (m/NR)^2 ), " " }'
27 | 
28 |     for f in $(find $OUTPUT_DIR -wholename "*cycle"$cycle"*val*/coco_results*" -not -wholename "*bbox_aug/coco*")
29 |     do 
30 |       python -c "import torch; a = torch.load('"$f"'); print(a.results['bbox']['AP50']*100)"
31 |     done | awk '{m+=$1;n+=(($1)^2 )}END{printf "%.1f +/- %.1f\n", m/NR, sqrt(n/NR - (m/NR)^2 ) }'
32 |   done
33 | else 
34 |   exit "DATASET="$DATASET" is not supported!"
35 |   exit 1
36 | fi


--------------------------------------------------------------------------------
/configs/voc/V_16_voc07.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://VGGImageNetPretrained/JCJOHNS/VGG-16" 
 4 |   BACKBONE:
 5 |     CONV_BODY: "VGG16-OICR"
 6 |   WSOD_ON: True
 7 |   FASTER_RCNN: False
 8 |   ROI_HEADS:
 9 |     SCORE_THRESH: 0.0
10 |     NMS: 0.3
11 |   ROI_BOX_HEAD:
12 |     NUM_CLASSES: 21
13 |     POOLER_METHOD: "ROIPool"
14 |     POOLER_RESOLUTION: 7
15 |     POOLER_SCALES: (0.125,)
16 |     FEATURE_EXTRACTOR: "VGG16.roi_head"
17 |   ROI_WEAK_HEAD:
18 |     PREDICTOR: "MISTPredictor"
19 |     LOSS: "RoIRegLoss"
20 |     OICR_P: 0.15
21 |     REGRESS_ON: True
22 | DB:
23 |   METHOD: 'concrete'
24 | DATASETS:
25 |   TRAIN: ("voc_2007_train", "voc_2007_val")
26 |   TEST: ("voc_2007_test",)
27 | PROPOSAL_FILES:
28 |   TRAIN: ('proposal/SS-voc_2007_train-boxes.pkl','proposal/SS-voc_2007_val-boxes.pkl')
29 |   TEST:  ('proposal/SS-voc_2007_test-boxes.pkl',)
30 | DATALOADER:
31 |   SIZE_DIVISIBILITY: 32
32 | INPUT:
33 |   MIN_SIZE_TRAIN: (480, 576, 688, 864, 1000, 1200) 
34 |   MAX_SIZE_TRAIN: 2000
35 |   MIN_SIZE_TEST: 800
36 |   MAX_SIZE_TEST: 2000
37 | SOLVER:
38 |   IMS_PER_BATCH: 8
39 |   BASE_LR: 0.01
40 |   WEIGHT_DECAY: 0.0001
41 |   WARMUP_ITERS: 200
42 |   STEPS: (20000, 26700)
43 |   MAX_ITER: 30000
44 |   CHECKPOINT_PERIOD: 1000
45 | SOLVER_CDB:
46 |   BASE_LR: 0.0001
47 |   GAMMA: 3.0
48 |   WEIGHT_DECAY: 0.0001
49 |   STEPS: (20000, 26700)
50 | TEST:
51 |   BBOX_AUG:
52 |     ENABLED: True
53 |     HEUR: "AVG"
54 |     H_FLIP: True
55 |     SCALES: (480, 576, 688, 864, 1000, 1200) 
56 |     MAX_SIZE: 2000
57 |     SCALE_H_FLIP: True
58 | SEED: 1234


--------------------------------------------------------------------------------
/configs/voc/V_16_voc12.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://VGGImageNetPretrained/JCJOHNS/VGG-16" 
 4 |   BACKBONE:
 5 |     CONV_BODY: "VGG16-OICR"
 6 |   WSOD_ON: True
 7 |   FASTER_RCNN: False
 8 |   ROI_HEADS:
 9 |     SCORE_THRESH: 0.0
10 |     NMS: 0.3
11 |   ROI_BOX_HEAD:
12 |     NUM_CLASSES: 21
13 |     POOLER_METHOD: "ROIPool"
14 |     POOLER_RESOLUTION: 7
15 |     POOLER_SCALES: (0.125,)
16 |     FEATURE_EXTRACTOR: "VGG16.roi_head"
17 |   ROI_WEAK_HEAD:
18 |     PREDICTOR: "MISTPredictor"
19 |     LOSS: "RoIRegLoss"
20 |     OICR_P: 0.15
21 |     REGRESS_ON: True
22 | DB:
23 |   METHOD: 'concrete'
24 | DATASETS:
25 |   TRAIN: ("voc_2012_train", "voc_2012_val")
26 |   TEST: ("voc_2007_test",)
27 | PROPOSAL_FILES:
28 |   TRAIN: ('proposal/SS-voc_2012_train-boxes.pkl','proposal/SS-voc_2012_val-boxes.pkl')
29 |   TEST:  ('proposal/SS-voc_2007_test-boxes.pkl',)
30 | DATALOADER:
31 |   SIZE_DIVISIBILITY: 32
32 | INPUT:
33 |   MIN_SIZE_TRAIN: (480, 576, 688, 864, 1000, 1200) 
34 |   MAX_SIZE_TRAIN: 2000
35 |   MIN_SIZE_TEST: 800
36 |   MAX_SIZE_TEST: 2000
37 | SOLVER:
38 |   IMS_PER_BATCH: 16
39 |   BASE_LR: 0.02
40 |   WEIGHT_DECAY: 0.0001
41 |   WARMUP_ITERS: 200
42 |   STEPS: (20000, 26700)
43 |   MAX_ITER: 30000
44 |   CHECKPOINT_PERIOD: 1000
45 | SOLVER_CDB:
46 |   BASE_LR: 0.0001
47 |   GAMMA: 3.0
48 |   WEIGHT_DECAY: 0.0001
49 |   STEPS: (20000, 26700)
50 | TEST:
51 |   BBOX_AUG:
52 |     ENABLED: True
53 |     HEUR: "AVG"
54 |     H_FLIP: True
55 |     SCALES: (480, 576, 688, 864, 1000, 1200) 
56 |     MAX_SIZE: 2000
57 |     SCALE_H_FLIP: True
58 | SEED: 1234


--------------------------------------------------------------------------------
/inference_scripts/run_inference_coco14_train.sh:
--------------------------------------------------------------------------------
 1 | #------------------------------------------------------------------------------
 2 | # Code developed by Huy V. Vo and Oriane Simeoni                              
 3 | # INRIA, Valeo.ai                                                             
 4 | #------------------------------------------------------------------------------
 5 | 
 6 | source ~/.bashrc
 7 | conda deactivate
 8 | conda activate bib
 9 | 
10 | echo "$(which conda)"
11 | echo "$(which python)"
12 | 
13 | if [ -z "$3" ] 
14 | 	then
15 | 	num_gpus=1
16 | else
17 | 	num_gpus="$3"
18 | fi
19 | 
20 | if [ -z "$4" ] 
21 |   then
22 |   enable_aug="False"
23 | else
24 |   enable_aug="$4"
25 | fi
26 | 
27 | if [ -z "$5" ]
28 |   then
29 |   test_per_gpu=2
30 | else
31 |   test_per_gpu="$5"
32 | fi
33 | 
34 | if [ -z "$6" ]
35 |   then
36 |   PORT=82124
37 | else
38 |   PORT="$6"
39 | fi
40 | 
41 | exp_name="$1"
42 | model_name="$2"
43 | echo "$exp_name" "$model_name" "$num_gpus" "$enable_aug" "$test_per_gpu" "$PORT"
44 | 
45 | python -m torch.distributed.launch --nproc_per_node=$num_gpus --master_port=$PORT tools/test_net.py \
46 |   --config-file "$exp_name"/config.yml \
47 |   MODEL.WEIGHT "$exp_name"/ckpt/"$model_name" \
48 |   OUTPUT_DIR "$exp_name" \
49 |   TEST.IMS_PER_BATCH $[2*$num_gpus] \
50 |   TEST.BBOX_AUG.ENABLED "$enable_aug" \
51 |   TEST.RETURN_LOSS True \
52 |   TEST.CONCAT_DATASETS True \
53 |   DATASETS.TEST '("coco_2014_train", )' \
54 |   TEST.REMOVE_IMAGES_WITHOUT_ANNOTATIONS True \
55 |   TEST.RUN_EVALUATION False \
56 |   PROPOSAL_FILES.TEST '("proposal/MCG-coco_2014_train-boxes.pkl", )'
57 | 


--------------------------------------------------------------------------------
/inference_scripts/run_inference_voc07_trainval.sh:
--------------------------------------------------------------------------------
 1 | #------------------------------------------------------------------------------
 2 | # Code developed by Huy V. Vo and Oriane Simeoni                              
 3 | # INRIA, Valeo.ai                                                             
 4 | #------------------------------------------------------------------------------
 5 | 
 6 | source ~/.bashrc
 7 | conda deactivate
 8 | conda activate bib
 9 | 
10 | echo "$(which conda)"
11 | echo "$(which python)"
12 | 
13 | if [ -z "$3" ] 
14 | 	then
15 | 	num_gpus=1
16 | else
17 | 	num_gpus="$3"
18 | fi
19 | 
20 | if [ -z "$4" ] 
21 |   then
22 |   enable_aug="False"
23 | else
24 |   enable_aug="$4"
25 | fi
26 | 
27 | if [ -z "$5" ]
28 |   then
29 |   test_per_gpu=2
30 | else
31 |   test_per_gpu="$5"
32 | fi
33 | 
34 | if [ -z "$6" ]
35 |   then
36 |   PORT=82124
37 | else
38 |   PORT="$6"
39 | fi
40 | 
41 | exp_name="$1"
42 | model_name="$2"
43 | echo "$exp_name" "$model_name" "$num_gpus" "$enable_aug" "$test_per_gpu" "$PORT"
44 | 
45 | python -m torch.distributed.launch --nproc_per_node=$num_gpus --master_port=$PORT tools/test_net.py \
46 |   --config-file "$exp_name"/config.yml \
47 |   MODEL.WEIGHT "$exp_name"/ckpt/"$model_name" \
48 |   OUTPUT_DIR "$exp_name" \
49 |   TEST.IMS_PER_BATCH $[2*$num_gpus] \
50 |   TEST.BBOX_AUG.ENABLED "$enable_aug" \
51 |   TEST.CONCAT_DATASETS "True" \
52 |   TEST.RETURN_LOSS "True" \
53 |   DATASETS.TEST '("voc_2007_train", "voc_2007_val")' \
54 |   TEST.REMOVE_IMAGES_WITHOUT_ANNOTATIONS True \
55 |   PROPOSAL_FILES.TEST '("proposal/SS-voc_2007_train-boxes.pkl", "proposal/SS-voc_2007_val-boxes.pkl")'
56 | 


--------------------------------------------------------------------------------
/wetectron/data/collate_batch.py:
--------------------------------------------------------------------------------
 1 | #------------------------------------------------------------------------------
 2 | # Code taken from https://github.com/NVlabs/wetectron
 3 | #------------------------------------------------------------------------------
 4 | 
 5 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 6 | from wetectron.structures.image_list import to_image_list
 7 | 
 8 | 
 9 | class BatchCollator(object):
10 |     """
11 |     From a list of samples from the dataset,
12 |     returns the batched images and targets.
13 |     This should be passed to the DataLoader
14 |     """
15 | 
16 |     def __init__(self, size_divisible=0):
17 |         self.size_divisible = size_divisible
18 | 
19 |     def __call__(self, batch):
20 |         transposed_batch = list(zip(*batch))
21 |         images = to_image_list(transposed_batch[0], self.size_divisible)
22 |         targets = transposed_batch[1]
23 |         if len(transposed_batch) == 3:
24 |             img_ids = transposed_batch[2]
25 |             return images, targets, img_ids
26 |         elif len(transposed_batch) == 4:
27 |             rois = transposed_batch[2]
28 |             img_ids = transposed_batch[3]
29 |             return images, targets, rois, img_ids
30 |         else:
31 |             raise ValueError('wrong item')
32 | 
33 | 
34 | class BBoxAugCollator(object):
35 |     """
36 |     From a list of samples from the dataset,
37 |     returns the images and targets.
38 |     Images should be converted to batched images in `im_detect_bbox_aug`
39 |     """
40 | 
41 |     def __call__(self, batch):
42 |         return list(zip(*batch))
43 | 
44 | 


--------------------------------------------------------------------------------
/wetectron/data/datasets/evaluation/voc/__init__.py:
--------------------------------------------------------------------------------
 1 | #------------------------------------------------------------------------------
 2 | # Code adapted from https://github.com/NVlabs/wetectron
 3 | # by Huy V. Vo and Oriane Simeoni
 4 | # INRIA, Valeo.ai
 5 | #------------------------------------------------------------------------------
 6 | 
 7 | import logging
 8 | 
 9 | from .voc_eval import do_voc_evaluation
10 | from .voc_eval import calc_detection_voc_prec_rec
11 | from .voc_eval_old import do_loc_evaluation
12 | from .voc_eval_old import do_voc_evaluation as do_voc_evaluation_old
13 | 
14 | def voc_evaluation(dataset, predictions, output_folder, box_only, task='det', **_):
15 |     logger = logging.getLogger("maskrcnn_benchmark.inference")
16 |     if box_only:
17 |         logger.warning("voc evaluation doesn't support box_only, ignored.")
18 |     logger.info("performing voc evaluation, ignored iou_types.")
19 |     if task == 'det':
20 |         return do_voc_evaluation(
21 |             dataset=dataset,
22 |             predictions=predictions,
23 |             output_folder=output_folder,
24 |             logger=logger,
25 |         )
26 |     if task == 'det_old':
27 |         return do_voc_evaluation_old(
28 |             dataset=dataset,
29 |             predictions=predictions,
30 |             output_folder=output_folder,
31 |             logger=logger,
32 |         )
33 |     elif task == 'corloc':
34 |         return do_loc_evaluation(
35 |             dataset=dataset,
36 |             predictions=predictions,
37 |             output_folder=output_folder,
38 |             logger=logger,
39 |         )
40 |     else:
41 |         raise ValueError
42 | 


--------------------------------------------------------------------------------
/configs/coco/V_16_coco14_active.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://VGGImageNetPretrained/JCJOHNS/VGG-16" 
 4 |   BACKBONE:
 5 |     CONV_BODY: "VGG16-OICR"
 6 |   WSOD_ON: True
 7 |   FASTER_RCNN: False
 8 |   ROI_HEADS:
 9 |     SCORE_THRESH: 0.0
10 |     NMS: 0.3
11 |     BG_IOU_THRESHOLD: 0.3
12 |   ROI_BOX_HEAD:
13 |     POOLER_METHOD: "ROIPool"
14 |     POOLER_RESOLUTION: 7
15 |     POOLER_SCALES: (0.125,)
16 |     FEATURE_EXTRACTOR: "VGG16.roi_head"
17 |   ROI_WEAK_HEAD:
18 |     PREDICTOR: "MISTPredictor"
19 |     LOSS: "RoIRegLoss"
20 |     OICR_P: 0.15
21 |     REGRESS_ON: True
22 |     ACTIVE_LOSS: "RoIRegActiveLoss"
23 |     ACTIVE_LOSS_WEIGHT: 1.0
24 | DATASETS:
25 |   TRAIN: ("coco_2014_train",)
26 |   TEST: ("coco_2014_val",)
27 | PROPOSAL_FILES:
28 |   TRAIN: ('proposal/MCG-coco_2014_train-boxes.pkl',)
29 |   TEST:  ('proposal/MCG-coco_2014_val-boxes.pkl',)
30 | DATALOADER:
31 |   SIZE_DIVISIBILITY: 32
32 | INPUT:
33 |   MIN_SIZE_TRAIN: (480, 576, 688, 864, 1000, 1200) 
34 |   MAX_SIZE_TRAIN: 2000
35 |   MIN_SIZE_TEST: 800
36 |   MAX_SIZE_TEST: 2000
37 | SOLVER:
38 |   IMS_PER_BATCH: 8 
39 |   BASE_LR: 0.01 
40 |   WEIGHT_DECAY: 0.0001
41 |   STEPS: (90000, 120000)
42 |   MAX_ITER: 130000
43 |   CHECKPOINT_PERIOD: 10000
44 | TEST:
45 |   BBOX_AUG:
46 |     ENABLED: True
47 |     HEUR: "AVG"
48 |     H_FLIP: True
49 |     SCALES: (480, 576, 688, 864, 1000, 1200) 
50 |     MAX_SIZE: 2000
51 |     SCALE_H_FLIP: True
52 | SEED: 1234
53 | ACTIVE:
54 |   INPUT_FILE: None
55 |   PSEUDO_BOXES_FILE: None
56 |   LOSS:
57 |     STRONG_LOSS_ON_MIL: False
58 |   IMG_STRONG_DET_WEIGHT: 0.0
59 |   WEIGHTS_BBX_LOSS: 1.0
60 |   WEIGHTED_SAMPLING: True


--------------------------------------------------------------------------------
/wetectron/modeling/roi_heads/keypoint_head/roi_keypoint_predictors.py:
--------------------------------------------------------------------------------
 1 | #------------------------------------------------------------------------------
 2 | # Code taken from https://github.com/NVlabs/wetectron
 3 | #------------------------------------------------------------------------------
 4 | 
 5 | from torch import nn
 6 | 
 7 | from wetectron import layers
 8 | from wetectron.modeling import registry
 9 | 
10 | 
11 | @registry.ROI_KEYPOINT_PREDICTOR.register("KeypointRCNNPredictor")
12 | class KeypointRCNNPredictor(nn.Module):
13 |     def __init__(self, cfg, in_channels):
14 |         super(KeypointRCNNPredictor, self).__init__()
15 |         input_features = in_channels
16 |         num_keypoints = cfg.MODEL.ROI_KEYPOINT_HEAD.NUM_CLASSES
17 |         deconv_kernel = 4
18 |         self.kps_score_lowres = layers.ConvTranspose2d(
19 |             input_features,
20 |             num_keypoints,
21 |             deconv_kernel,
22 |             stride=2,
23 |             padding=deconv_kernel // 2 - 1,
24 |         )
25 |         nn.init.kaiming_normal_(
26 |             self.kps_score_lowres.weight, mode="fan_out", nonlinearity="relu"
27 |         )
28 |         nn.init.constant_(self.kps_score_lowres.bias, 0)
29 |         self.up_scale = 2
30 |         self.out_channels = num_keypoints
31 | 
32 |     def forward(self, x):
33 |         x = self.kps_score_lowres(x)
34 |         x = layers.interpolate(
35 |             x, scale_factor=self.up_scale, mode="bilinear", align_corners=False
36 |         )
37 |         return x
38 | 
39 | 
40 | def make_roi_keypoint_predictor(cfg, in_channels):
41 |     func = registry.ROI_KEYPOINT_PREDICTOR[cfg.MODEL.ROI_KEYPOINT_HEAD.PREDICTOR]
42 |     return func(cfg, in_channels)
43 | 


--------------------------------------------------------------------------------
/configs/voc/V_16_voc0712.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://VGGImageNetPretrained/JCJOHNS/VGG-16" 
 4 |   BACKBONE:
 5 |     CONV_BODY: "VGG16-OICR"
 6 |   WSOD_ON: True
 7 |   FASTER_RCNN: False
 8 |   ROI_HEADS:
 9 |     SCORE_THRESH: 0.0
10 |     NMS: 0.3
11 |   ROI_BOX_HEAD:
12 |     NUM_CLASSES: 21
13 |     POOLER_METHOD: "ROIPool"
14 |     POOLER_RESOLUTION: 7
15 |     POOLER_SCALES: (0.125,)
16 |     FEATURE_EXTRACTOR: "VGG16.roi_head"
17 |   ROI_WEAK_HEAD:
18 |     PREDICTOR: "MISTPredictor"
19 |     LOSS: "RoIRegLoss"
20 |     OICR_P: 0.15
21 |     REGRESS_ON: True
22 | DB:
23 |   METHOD: 'concrete'
24 | DATASETS:
25 |   TRAIN: ("voc_2007_train", "voc_2007_val", "voc_2012_train", "voc_2012_val")
26 |   TEST: ("voc_2007_test",)
27 | PROPOSAL_FILES:
28 |   TRAIN: ('proposal/SS-voc_2007_train-boxes.pkl','proposal/SS-voc_2007_val-boxes.pkl', 'proposal/SS-voc_2012_train-boxes.pkl','proposal/SS-voc_2012_val-boxes.pkl')
29 |   TEST:  ('proposal/SS-voc_2007_test-boxes.pkl',)
30 | DATALOADER:
31 |   SIZE_DIVISIBILITY: 32
32 | INPUT:
33 |   MIN_SIZE_TRAIN: (480, 576, 688, 864, 1000, 1200) 
34 |   MAX_SIZE_TRAIN: 2000
35 |   MIN_SIZE_TEST: 800
36 |   MAX_SIZE_TEST: 2000
37 | SOLVER:
38 |   IMS_PER_BATCH: 16
39 |   BASE_LR: 0.02
40 |   WEIGHT_DECAY: 0.0001
41 |   WARMUP_ITERS: 200
42 |   STEPS: (30000, 35000)
43 |   MAX_ITER: 40000
44 |   CHECKPOINT_PERIOD: 1000
45 | SOLVER_CDB:
46 |   BASE_LR: 0.0001
47 |   GAMMA: 3.0
48 |   WEIGHT_DECAY: 0.0001
49 |   STEPS: (30000, 35000)
50 | TEST:
51 |   BBOX_AUG:
52 |     ENABLED: True
53 |     HEUR: "AVG"
54 |     H_FLIP: True
55 |     SCALES: (480, 576, 688, 864, 1000, 1200) 
56 |     MAX_SIZE: 2000
57 |     SCALE_H_FLIP: True
58 | SEED: 1234


--------------------------------------------------------------------------------
/wetectron/layers/__init__.py:
--------------------------------------------------------------------------------
 1 | #------------------------------------------------------------------------------
 2 | # Code taken from https://github.com/NVlabs/wetectron
 3 | #------------------------------------------------------------------------------
 4 | 
 5 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 6 | import torch
 7 | 
 8 | from .batch_norm import FrozenBatchNorm2d
 9 | from .misc import Conv2d
10 | from .misc import DFConv2d
11 | from .misc import ConvTranspose2d
12 | from .misc import BatchNorm2d
13 | from .misc import interpolate
14 | from .nms import nms
15 | from .roi_align import ROIAlign
16 | from .roi_align import roi_align
17 | from .roi_pool import ROIPool
18 | from .roi_pool import roi_pool
19 | from .smooth_l1_loss import smooth_l1_loss
20 | from .sigmoid_focal_loss import SigmoidFocalLoss
21 | from .dcn.deform_conv_func import deform_conv, modulated_deform_conv
22 | from .dcn.deform_conv_module import DeformConv, ModulatedDeformConv, ModulatedDeformConvPack
23 | from .dcn.deform_pool_func import deform_roi_pooling
24 | from .dcn.deform_pool_module import DeformRoIPooling, DeformRoIPoolingPack, ModulatedDeformRoIPoolingPack
25 | 
26 | 
27 | __all__ = [
28 |     "nms",
29 |     "roi_align",
30 |     "ROIAlign",
31 |     "roi_pool",
32 |     "ROIPool",
33 |     "smooth_l1_loss",
34 |     "Conv2d",
35 |     "DFConv2d",
36 |     "ConvTranspose2d",
37 |     "interpolate",
38 |     "BatchNorm2d",
39 |     "FrozenBatchNorm2d",
40 |     "SigmoidFocalLoss",
41 |     'deform_conv',
42 |     'modulated_deform_conv',
43 |     'DeformConv',
44 |     'ModulatedDeformConv',
45 |     'ModulatedDeformConvPack',
46 |     'deform_roi_pooling',
47 |     'DeformRoIPooling',
48 |     'DeformRoIPoolingPack',
49 |     'ModulatedDeformRoIPoolingPack',
50 | ]
51 | 
52 | 


--------------------------------------------------------------------------------
/wetectron/utils/timer.py:
--------------------------------------------------------------------------------
 1 | #------------------------------------------------------------------------------
 2 | # Code taken from https://github.com/NVlabs/wetectron
 3 | #------------------------------------------------------------------------------
 4 | 
 5 | # --------------------------------------------------------
 6 | # Copyright (C) 2020 NVIDIA Corporation. All rights reserved.
 7 | # Nvidia Source Code License-NC
 8 | # --------------------------------------------------------
 9 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
10 | import time
11 | import datetime
12 | 
13 | 
14 | class Timer(object):
15 |     def __init__(self):
16 |         self.reset()
17 | 
18 |     @property
19 |     def average_time(self):
20 |         return self.total_time / self.calls if self.calls > 0 else 0.0
21 | 
22 |     def tic(self):
23 |         # using time.time instead of time.clock because time time.clock
24 |         # does not normalize for multithreading
25 |         self.start_time = time.time()
26 | 
27 |     def toc(self, average=True):
28 |         self.add(time.time() - self.start_time)
29 |         if average:
30 |             return self.average_time
31 |         else:
32 |             return self.diff
33 | 
34 |     def add(self, time_diff):
35 |         self.diff = time_diff
36 |         self.total_time += self.diff
37 |         self.calls += 1
38 | 
39 |     def reset(self):
40 |         self.total_time = 0.0
41 |         self.calls = 0
42 |         self.start_time = 0.0
43 |         self.diff = 0.0
44 | 
45 |     def avg_time_str(self):
46 |         time_str = str(datetime.timedelta(seconds=self.average_time))
47 |         return time_str
48 | 
49 | 
50 | def get_time_str(time_diff):
51 |     time_str = str(datetime.timedelta(seconds=time_diff))
52 |     return time_str
53 | 


--------------------------------------------------------------------------------
/wetectron/csrc/vision.cpp:
--------------------------------------------------------------------------------
 1 | //------------------------------------------------------------------------------
 2 | // Code taken from https://github.com/NVlabs/wetectron
 3 | //------------------------------------------------------------------------------
 4 | 
 5 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 6 | #include "nms.h"
 7 | #include "ROIAlign.h"
 8 | #include "ROIPool.h"
 9 | #include "SigmoidFocalLoss.h"
10 | #include "deform_conv.h"
11 | #include "deform_pool.h"
12 | 
13 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
14 |   m.def("nms", &nms, "non-maximum suppression");
15 |   m.def("roi_align_forward", &ROIAlign_forward, "ROIAlign_forward");
16 |   m.def("roi_align_backward", &ROIAlign_backward, "ROIAlign_backward");
17 |   m.def("roi_pool_forward", &ROIPool_forward, "ROIPool_forward");
18 |   m.def("roi_pool_backward", &ROIPool_backward, "ROIPool_backward");
19 |   m.def("sigmoid_focalloss_forward", &SigmoidFocalLoss_forward, "SigmoidFocalLoss_forward");
20 |   m.def("sigmoid_focalloss_backward", &SigmoidFocalLoss_backward, "SigmoidFocalLoss_backward");
21 |   // dcn-v2
22 |   m.def("deform_conv_forward", &deform_conv_forward, "deform_conv_forward");
23 |   m.def("deform_conv_backward_input", &deform_conv_backward_input, "deform_conv_backward_input");
24 |   m.def("deform_conv_backward_parameters", &deform_conv_backward_parameters, "deform_conv_backward_parameters");
25 |   m.def("modulated_deform_conv_forward", &modulated_deform_conv_forward, "modulated_deform_conv_forward");
26 |   m.def("modulated_deform_conv_backward", &modulated_deform_conv_backward, "modulated_deform_conv_backward");
27 |   m.def("deform_psroi_pooling_forward", &deform_psroi_pooling_forward, "deform_psroi_pooling_forward");
28 |   m.def("deform_psroi_pooling_backward", &deform_psroi_pooling_backward, "deform_psroi_pooling_backward");
29 | }


--------------------------------------------------------------------------------
/wetectron/utils/env.py:
--------------------------------------------------------------------------------
 1 | #------------------------------------------------------------------------------
 2 | # Code taken from https://github.com/NVlabs/wetectron
 3 | #------------------------------------------------------------------------------
 4 | 
 5 | # --------------------------------------------------------
 6 | # Copyright (C) 2020 NVIDIA Corporation. All rights reserved.
 7 | # Nvidia Source Code License-NC
 8 | # --------------------------------------------------------
 9 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
10 | import os
11 | from wetectron.utils.imports import import_file
12 | 
13 | 
14 | def setup_environment():
15 |     """Perform environment setup work. The default setup is a no-op, but this
16 |     function allows the user to specify a Python source file that performs
17 |     custom setup work that may be necessary to their computing environment.
18 |     """
19 |     custom_module_path = os.environ.get("TORCH_DETECTRON_ENV_MODULE")
20 |     if custom_module_path:
21 |         setup_custom_environment(custom_module_path)
22 |     else:
23 |         # The default setup is a no-op
24 |         pass
25 | 
26 | 
27 | def setup_custom_environment(custom_module_path):
28 |     """Load custom environment setup from a Python source file and run the setup
29 |     function.
30 |     """
31 |     module = import_file("wetectron.utils.env.custom_module", custom_module_path)
32 |     assert hasattr(module, "setup_environment") and callable(
33 |         module.setup_environment
34 |     ), (
35 |         "Custom environment module defined in {} does not have the "
36 |         "required callable attribute 'setup_environment'."
37 |     ).format(
38 |         custom_module_path
39 |     )
40 |     module.setup_environment()
41 | 
42 | 
43 | # Force environment setup when this module is imported
44 | setup_environment()
45 | 


--------------------------------------------------------------------------------
/configs/voc/V_16_voc07_active.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   WEIGHT: "catalog://VGGImageNetPretrained/JCJOHNS/VGG-16" 
 4 |   BACKBONE:
 5 |     CONV_BODY: "VGG16-OICR"
 6 |   WSOD_ON: True
 7 |   FASTER_RCNN: False
 8 |   ROI_HEADS:
 9 |     SCORE_THRESH: 0.001
10 |     NMS: 0.3
11 |     BG_IOU_THRESHOLD: 0.3
12 |   ROI_BOX_HEAD:
13 |     NUM_CLASSES: 21
14 |     POOLER_METHOD: "ROIPool"
15 |     POOLER_RESOLUTION: 7
16 |     POOLER_SCALES: (0.125,)
17 |     FEATURE_EXTRACTOR: "VGG16.roi_head"
18 |   ROI_WEAK_HEAD:
19 |     PREDICTOR: "MISTPredictor"
20 |     LOSS: "RoIRegLoss"
21 |     OICR_P: 0.15
22 |     REGRESS_ON: True
23 |     ACTIVE_LOSS: "RoIRegActiveLoss"
24 |     ACTIVE_LOSS_WEIGHT: 1.0
25 | DB:
26 |   METHOD:  'none' #'concrete' or 'none'
27 | DATASETS:
28 |   TRAIN: ("voc_2007_train", "voc_2007_val", )
29 |   TEST: ("voc_2007_test",)
30 | PROPOSAL_FILES:
31 |   TRAIN: ('proposal/SS-voc_2007_train-boxes.pkl', 'proposal/SS-voc_2007_val-boxes.pkl', )
32 |   TEST:  ('proposal/SS-voc_2007_test-boxes.pkl',)
33 | DATALOADER:
34 |   SIZE_DIVISIBILITY: 32
35 | INPUT:
36 |   MIN_SIZE_TRAIN: (480, 576, 688, 864, 1000, 1200) 
37 |   MAX_SIZE_TRAIN: 2000
38 |   MIN_SIZE_TEST: 800
39 |   MAX_SIZE_TEST: 2000
40 | SOLVER:
41 |   IMS_PER_BATCH: 1
42 |   BASE_LR: 0.01
43 |   WEIGHT_DECAY: 0.0001
44 |   WARMUP_ITERS: 200
45 |   STEPS: (20000, 26700)
46 |   MAX_ITER: 30000
47 |   CHECKPOINT_PERIOD: 2000
48 | SOLVER_CDB:
49 |   BASE_LR: 0.0001
50 |   GAMMA: 3.0
51 |   WEIGHT_DECAY: 0.0001
52 |   STEPS: (20000, 26700)
53 | TEST:
54 |   IMS_PER_BATCH: 1
55 |   BBOX_AUG:
56 |     ENABLED: True
57 |     HEUR: "AVG"
58 |     H_FLIP: True
59 |     SCALES: (480, 576, 688, 864, 1000, 1200) 
60 |     MAX_SIZE: 2000
61 |     SCALE_H_FLIP: True
62 | SEED: 1234
63 | ACTIVE:
64 |   INPUT_FILE: None
65 |   PSEUDO_BOXES_FILE: None
66 |   LOSS:
67 |     STRONG_LOSS_ON_MIL: False
68 |   IMG_STRONG_DET_WEIGHT: 0.0
69 |   WEIGHTS_BBX_LOSS: 1.0
70 |   WEIGHTED_SAMPLING: True
71 | 


--------------------------------------------------------------------------------
/wetectron/utils/registry.py:
--------------------------------------------------------------------------------
 1 | #------------------------------------------------------------------------------
 2 | # Code taken from https://github.com/NVlabs/wetectron
 3 | #------------------------------------------------------------------------------
 4 | 
 5 | # --------------------------------------------------------
 6 | # Copyright (C) 2020 NVIDIA Corporation. All rights reserved.
 7 | # Nvidia Source Code License-NC
 8 | # --------------------------------------------------------
 9 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
10 | 
11 | 
12 | def _register_generic(module_dict, module_name, module):
13 |     assert module_name not in module_dict
14 |     module_dict[module_name] = module
15 | 
16 | 
17 | class Registry(dict):
18 |     '''
19 |     A helper class for managing registering modules, it extends a dictionary
20 |     and provides a register functions.
21 | 
22 |     Eg. creeting a registry:
23 |         some_registry = Registry({"default": default_module})
24 | 
25 |     There're two ways of registering new modules:
26 |     1): normal way is just calling register function:
27 |         def foo():
28 |             ...
29 |         some_registry.register("foo_module", foo)
30 |     2): used as decorator when declaring the module:
31 |         @some_registry.register("foo_module")
32 |         @some_registry.register("foo_modeul_nickname")
33 |         def foo():
34 |             ...
35 | 
36 |     Access of module is just like using a dictionary, eg:
37 |         f = some_registry["foo_modeul"]
38 |     '''
39 |     def __init__(self, *args, **kwargs):
40 |         super(Registry, self).__init__(*args, **kwargs)
41 | 
42 |     def register(self, module_name, module=None):
43 |         # used as function call
44 |         if module is not None:
45 |             _register_generic(self, module_name, module)
46 |             return
47 | 
48 |         # used as decorator
49 |         def register_fn(fn):
50 |             _register_generic(self, module_name, fn)
51 |             return fn
52 | 
53 |         return register_fn
54 | 


--------------------------------------------------------------------------------
/wetectron/csrc/ROIPool.h:
--------------------------------------------------------------------------------
 1 | //------------------------------------------------------------------------------
 2 | // Code taken from https://github.com/NVlabs/wetectron
 3 | //------------------------------------------------------------------------------
 4 | 
 5 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 6 | #pragma once
 7 | 
 8 | #include "cpu/vision.h"
 9 | 
10 | #ifdef WITH_CUDA
11 | #include "cuda/vision.h"
12 | #endif
13 | 
14 | 
15 | std::tuple<at::Tensor, at::Tensor> ROIPool_forward(const at::Tensor& input,
16 |                                 const at::Tensor& rois,
17 |                                 const float spatial_scale,
18 |                                 const int pooled_height,
19 |                                 const int pooled_width) {
20 |   if (input.is_cuda()) {
21 | #ifdef WITH_CUDA
22 |     return ROIPool_forward_cuda(input, rois, spatial_scale, pooled_height, pooled_width);
23 | #else
24 |     AT_ERROR("Not compiled with GPU support");
25 | #endif
26 |   }
27 |   AT_ERROR("Not implemented on the CPU");
28 | }
29 | 
30 | at::Tensor ROIPool_backward(const at::Tensor& grad,
31 |                                  const at::Tensor& input,
32 |                                  const at::Tensor& rois,
33 |                                  const at::Tensor& argmax,
34 |                                  const float spatial_scale,
35 |                                  const int pooled_height,
36 |                                  const int pooled_width,
37 |                                  const int batch_size,
38 |                                  const int channels,
39 |                                  const int height,
40 |                                  const int width) {
41 |   if (grad.is_cuda()) {
42 | #ifdef WITH_CUDA
43 |     return ROIPool_backward_cuda(grad, input, rois, argmax, spatial_scale, pooled_height, pooled_width, batch_size, channels, height, width);
44 | #else
45 |     AT_ERROR("Not compiled with GPU support");
46 | #endif
47 |   }
48 |   AT_ERROR("Not implemented on the CPU");
49 | }
50 | 
51 | 
52 | 
53 | 


--------------------------------------------------------------------------------
/wetectron/csrc/ROIAlign.h:
--------------------------------------------------------------------------------
 1 | //------------------------------------------------------------------------------
 2 | // Code taken from https://github.com/NVlabs/wetectron
 3 | //------------------------------------------------------------------------------
 4 | 
 5 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 6 | #pragma once
 7 | 
 8 | #include "cpu/vision.h"
 9 | 
10 | #ifdef WITH_CUDA
11 | #include "cuda/vision.h"
12 | #endif
13 | 
14 | // Interface for Python
15 | at::Tensor ROIAlign_forward(const at::Tensor& input,
16 |                             const at::Tensor& rois,
17 |                             const float spatial_scale,
18 |                             const int pooled_height,
19 |                             const int pooled_width,
20 |                             const int sampling_ratio) {
21 |   if (input.is_cuda()) {
22 | #ifdef WITH_CUDA
23 |     return ROIAlign_forward_cuda(input, rois, spatial_scale, pooled_height, pooled_width, sampling_ratio);
24 | #else
25 |     AT_ERROR("Not compiled with GPU support");
26 | #endif
27 |   }
28 |   return ROIAlign_forward_cpu(input, rois, spatial_scale, pooled_height, pooled_width, sampling_ratio);
29 | }
30 | 
31 | at::Tensor ROIAlign_backward(const at::Tensor& grad,
32 |                              const at::Tensor& rois,
33 |                              const float spatial_scale,
34 |                              const int pooled_height,
35 |                              const int pooled_width,
36 |                              const int batch_size,
37 |                              const int channels,
38 |                              const int height,
39 |                              const int width,
40 |                              const int sampling_ratio) {
41 |   if (grad.is_cuda()) {
42 | #ifdef WITH_CUDA
43 |     return ROIAlign_backward_cuda(grad, rois, spatial_scale, pooled_height, pooled_width, batch_size, channels, height, width, sampling_ratio);
44 | #else
45 |     AT_ERROR("Not compiled with GPU support");
46 | #endif
47 |   }
48 |   AT_ERROR("Not implemented on the CPU");
49 | }
50 | 
51 | 


--------------------------------------------------------------------------------
/wetectron/modeling/rpn/utils.py:
--------------------------------------------------------------------------------
 1 | #------------------------------------------------------------------------------
 2 | # Code taken from https://github.com/NVlabs/wetectron
 3 | #------------------------------------------------------------------------------
 4 | 
 5 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 6 | """
 7 | Utility functions minipulating the prediction layers
 8 | """
 9 | 
10 | from ..utils import cat
11 | 
12 | import torch
13 | 
14 | def permute_and_flatten(layer, N, A, C, H, W):
15 |     layer = layer.view(N, -1, C, H, W)
16 |     layer = layer.permute(0, 3, 4, 1, 2)
17 |     layer = layer.reshape(N, -1, C)
18 |     return layer
19 | 
20 | 
21 | def concat_box_prediction_layers(box_cls, box_regression):
22 |     box_cls_flattened = []
23 |     box_regression_flattened = []
24 |     # for each feature level, permute the outputs to make them be in the
25 |     # same format as the labels. Note that the labels are computed for
26 |     # all feature levels concatenated, so we keep the same representation
27 |     # for the objectness and the box_regression
28 |     for box_cls_per_level, box_regression_per_level in zip(
29 |         box_cls, box_regression
30 |     ):
31 |         N, AxC, H, W = box_cls_per_level.shape
32 |         Ax4 = box_regression_per_level.shape[1]
33 |         A = Ax4 // 4
34 |         C = AxC // A
35 |         box_cls_per_level = permute_and_flatten(
36 |             box_cls_per_level, N, A, C, H, W
37 |         )
38 |         box_cls_flattened.append(box_cls_per_level)
39 | 
40 |         box_regression_per_level = permute_and_flatten(
41 |             box_regression_per_level, N, A, 4, H, W
42 |         )
43 |         box_regression_flattened.append(box_regression_per_level)
44 |     # concatenate on the first dimension (representing the feature levels), to
45 |     # take into account the way the labels were generated (with all feature maps
46 |     # being concatenated as well)
47 |     box_cls = cat(box_cls_flattened, dim=1).reshape(-1, C)
48 |     box_regression = cat(box_regression_flattened, dim=1).reshape(-1, 4)
49 |     return box_cls, box_regression
50 | 


--------------------------------------------------------------------------------
/wetectron/utils/logger.py:
--------------------------------------------------------------------------------
 1 | #------------------------------------------------------------------------------
 2 | # Code adapted from https://github.com/NVlabs/wetectron
 3 | # by Huy V. Vo and Oriane Simeoni
 4 | # INRIA, Valeo.ai
 5 | #------------------------------------------------------------------------------
 6 | 
 7 | # --------------------------------------------------------
 8 | # Copyright (C) 2020 NVIDIA Corporation. All rights reserved.
 9 | # Nvidia Source Code License-NC
10 | # --------------------------------------------------------
11 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
12 | import logging
13 | import os
14 | import sys
15 | 
16 | import time
17 | from tqdm import tqdm
18 | import io
19 | 
20 | class TqdmToLogger(io.StringIO):
21 |     """
22 |         Output stream for TQDM which will output to logger module instead of
23 |         the StdOut.
24 |     """
25 |     logger = None
26 |     level = None
27 |     buf = ''
28 |     def __init__(self,logger,level=None):
29 |         super(TqdmToLogger, self).__init__()
30 |         self.logger = logger
31 |         self.level = level or logging.INFO
32 |     def write(self,buf):
33 |         self.buf = buf.strip('\r\n\t ')
34 |     def flush(self):
35 |         self.logger.log(self.level, self.buf)
36 | 
37 | 
38 | def setup_logger(name, save_dir, distributed_rank, filename="log.txt"):
39 |     logging.basicConfig()
40 |     logger = logging.getLogger(name)
41 |     logger.setLevel(logging.DEBUG)
42 |     list(map(logger.removeHandler, logger.handlers[:]))
43 |     list(map(logger.root.removeHandler, logger.root.handlers[:]))
44 |     # don't log results for the non-master process
45 |     if distributed_rank > 0:
46 |         return logger
47 |     ch = logging.StreamHandler(stream=sys.stdout)
48 |     ch.setLevel(logging.DEBUG)
49 |     formatter = logging.Formatter("%(levelname)s: %(name)s %(filename)s.%(lineno)4d: %(message)s")
50 |     ch.setFormatter(formatter)
51 |     logger.addHandler(ch)
52 | 
53 |     if save_dir:
54 |         fh = logging.FileHandler(os.path.join(save_dir, filename))
55 |         fh.setLevel(logging.DEBUG)
56 |         fh.setFormatter(formatter)
57 |         logger.addHandler(fh)
58 | 
59 |     return logger
60 | 


--------------------------------------------------------------------------------
/wetectron/csrc/deform_pool.h:
--------------------------------------------------------------------------------
 1 | //------------------------------------------------------------------------------
 2 | // Code taken from https://github.com/NVlabs/wetectron
 3 | //------------------------------------------------------------------------------
 4 | 
 5 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 6 | #pragma once
 7 | #include "cpu/vision.h"
 8 | 
 9 | #ifdef WITH_CUDA
10 | #include "cuda/vision.h"
11 | #endif
12 | 
13 | 
14 | // Interface for Python
15 | void deform_psroi_pooling_forward(
16 |     at::Tensor input, 
17 |     at::Tensor bbox, 
18 |     at::Tensor trans, 
19 |     at::Tensor out,
20 |     at::Tensor top_count, 
21 |     const int no_trans, 
22 |     const float spatial_scale,
23 |     const int output_dim, 
24 |     const int group_size, 
25 |     const int pooled_size,
26 |     const int part_size, 
27 |     const int sample_per_part, 
28 |     const float trans_std)
29 | {
30 |   if (input.is_cuda()) {
31 | #ifdef WITH_CUDA
32 |     return deform_psroi_pooling_cuda_forward(
33 |         input, bbox, trans, out, top_count, 
34 |         no_trans, spatial_scale, output_dim, group_size,
35 |         pooled_size, part_size, sample_per_part, trans_std
36 |     );
37 | #else
38 |     AT_ERROR("Not compiled with GPU support");
39 | #endif
40 |   }
41 |   AT_ERROR("Not implemented on the CPU");
42 | }
43 | 
44 | 
45 | void deform_psroi_pooling_backward(
46 |     at::Tensor out_grad, 
47 |     at::Tensor input, 
48 |     at::Tensor bbox, 
49 |     at::Tensor trans,
50 |     at::Tensor top_count, 
51 |     at::Tensor input_grad, 
52 |     at::Tensor trans_grad,
53 |     const int no_trans, 
54 |     const float spatial_scale, 
55 |     const int output_dim,
56 |     const int group_size, 
57 |     const int pooled_size, 
58 |     const int part_size,
59 |     const int sample_per_part, 
60 |     const float trans_std) 
61 | {
62 |   if (input.is_cuda()) {
63 | #ifdef WITH_CUDA
64 |     return deform_psroi_pooling_cuda_backward(
65 |         out_grad, input, bbox, trans, top_count, input_grad, trans_grad,
66 |         no_trans, spatial_scale, output_dim, group_size, pooled_size, 
67 |         part_size, sample_per_part, trans_std
68 |     );
69 | #else
70 |     AT_ERROR("Not compiled with GPU support");
71 | #endif
72 |   }
73 |   AT_ERROR("Not implemented on the CPU");
74 | }
75 | 


--------------------------------------------------------------------------------
/wetectron/modeling/roi_heads/keypoint_head/roi_keypoint_feature_extractors.py:
--------------------------------------------------------------------------------
 1 | #------------------------------------------------------------------------------
 2 | # Code taken from https://github.com/NVlabs/wetectron
 3 | #------------------------------------------------------------------------------
 4 | 
 5 | from torch import nn
 6 | from torch.nn import functional as F
 7 | 
 8 | from wetectron.modeling import registry
 9 | from wetectron.modeling.poolers import Pooler
10 | 
11 | from wetectron.layers import Conv2d
12 | 
13 | 
14 | @registry.ROI_KEYPOINT_FEATURE_EXTRACTORS.register("KeypointRCNNFeatureExtractor")
15 | class KeypointRCNNFeatureExtractor(nn.Module):
16 |     def __init__(self, cfg, in_channels):
17 |         super(KeypointRCNNFeatureExtractor, self).__init__()
18 | 
19 |         resolution = cfg.MODEL.ROI_KEYPOINT_HEAD.POOLER_RESOLUTION
20 |         scales = cfg.MODEL.ROI_KEYPOINT_HEAD.POOLER_SCALES
21 |         sampling_ratio = cfg.MODEL.ROI_KEYPOINT_HEAD.POOLER_SAMPLING_RATIO
22 |         pooler = Pooler(
23 |             output_size=(resolution, resolution),
24 |             scales=scales,
25 |             sampling_ratio=sampling_ratio,
26 |         )
27 |         self.pooler = pooler
28 | 
29 |         input_features = in_channels
30 |         layers = cfg.MODEL.ROI_KEYPOINT_HEAD.CONV_LAYERS
31 |         next_feature = input_features
32 |         self.blocks = []
33 |         for layer_idx, layer_features in enumerate(layers, 1):
34 |             layer_name = "conv_fcn{}".format(layer_idx)
35 |             module = Conv2d(next_feature, layer_features, 3, stride=1, padding=1)
36 |             nn.init.kaiming_normal_(module.weight, mode="fan_out", nonlinearity="relu")
37 |             nn.init.constant_(module.bias, 0)
38 |             self.add_module(layer_name, module)
39 |             next_feature = layer_features
40 |             self.blocks.append(layer_name)
41 |         self.out_channels = layer_features
42 | 
43 |     def forward(self, x, proposals):
44 |         x = self.pooler(x, proposals)
45 |         for layer_name in self.blocks:
46 |             x = F.relu(getattr(self, layer_name)(x))
47 |         return x
48 | 
49 | 
50 | def make_roi_keypoint_feature_extractor(cfg, in_channels):
51 |     func = registry.ROI_KEYPOINT_FEATURE_EXTRACTORS[
52 |         cfg.MODEL.ROI_KEYPOINT_HEAD.FEATURE_EXTRACTOR
53 |     ]
54 |     return func(cfg, in_channels)
55 | 


--------------------------------------------------------------------------------
/wetectron/layers/roi_pool.py:
--------------------------------------------------------------------------------
 1 | #------------------------------------------------------------------------------
 2 | # Code taken from https://github.com/NVlabs/wetectron
 3 | #------------------------------------------------------------------------------
 4 | 
 5 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 6 | import torch
 7 | from torch import nn
 8 | from torch.autograd import Function
 9 | from torch.autograd.function import once_differentiable
10 | from torch.nn.modules.utils import _pair
11 | 
12 | from wetectron import _C
13 | from apex import amp
14 | 
15 | class _ROIPool(Function):
16 |     @staticmethod
17 |     def forward(ctx, input, roi, output_size, spatial_scale):
18 |         ctx.output_size = _pair(output_size)
19 |         ctx.spatial_scale = spatial_scale
20 |         ctx.input_shape = input.size()
21 |         output, argmax = _C.roi_pool_forward(
22 |             input, roi, spatial_scale, output_size[0], output_size[1]
23 |         )
24 |         ctx.save_for_backward(input, roi, argmax)
25 |         return output
26 | 
27 |     @staticmethod
28 |     @once_differentiable
29 |     def backward(ctx, grad_output):
30 |         input, rois, argmax = ctx.saved_tensors
31 |         output_size = ctx.output_size
32 |         spatial_scale = ctx.spatial_scale
33 |         bs, ch, h, w = ctx.input_shape
34 |         grad_input = _C.roi_pool_backward(
35 |             grad_output,
36 |             input,
37 |             rois,
38 |             argmax,
39 |             spatial_scale,
40 |             output_size[0],
41 |             output_size[1],
42 |             bs,
43 |             ch,
44 |             h,
45 |             w,
46 |         )
47 |         return grad_input, None, None, None
48 | 
49 | 
50 | roi_pool = _ROIPool.apply
51 | 
52 | 
53 | class ROIPool(nn.Module):
54 |     def __init__(self, output_size, spatial_scale):
55 |         super(ROIPool, self).__init__()
56 |         self.output_size = output_size
57 |         self.spatial_scale = spatial_scale
58 | 
59 |     @amp.float_function
60 |     def forward(self, input, rois):
61 |         return roi_pool(input, rois, self.output_size, self.spatial_scale)
62 | 
63 |     def __repr__(self):
64 |         tmpstr = self.__class__.__name__ + "("
65 |         tmpstr += "output_size=" + str(self.output_size)
66 |         tmpstr += ", spatial_scale=" + str(self.spatial_scale)
67 |         tmpstr += ")"
68 |         return tmpstr
69 | 


--------------------------------------------------------------------------------
/wetectron/solver/lr_scheduler.py:
--------------------------------------------------------------------------------
 1 | #------------------------------------------------------------------------------
 2 | # Code taken from https://github.com/NVlabs/wetectron
 3 | #------------------------------------------------------------------------------
 4 | 
 5 | # --------------------------------------------------------
 6 | # Copyright (C) 2020 NVIDIA Corporation. All rights reserved.
 7 | # Nvidia Source Code License-NC
 8 | # --------------------------------------------------------
 9 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
10 | from bisect import bisect_right
11 | 
12 | import torch
13 | 
14 | 
15 | # FIXME ideally this would be achieved with a CombinedLRScheduler,
16 | # separating MultiStepLR with WarmupLR
17 | # but the current LRScheduler design doesn't allow it
18 | class WarmupMultiStepLR(torch.optim.lr_scheduler._LRScheduler):
19 |     def __init__(
20 |         self,
21 |         optimizer,
22 |         milestones,
23 |         gamma=0.1,
24 |         warmup_factor=1.0 / 3,
25 |         warmup_iters=500,
26 |         warmup_method="linear",
27 |         last_epoch=-1,
28 |     ):
29 |         if not list(milestones) == sorted(milestones):
30 |             raise ValueError(
31 |                 "Milestones should be a list of" " increasing integers. Got {}",
32 |                 milestones,
33 |             )
34 | 
35 |         if warmup_method not in ("constant", "linear"):
36 |             raise ValueError(
37 |                 "Only 'constant' or 'linear' warmup_method accepted"
38 |                 "got {}".format(warmup_method)
39 |             )
40 |         self.milestones = milestones
41 |         self.gamma = gamma
42 |         self.warmup_factor = warmup_factor
43 |         self.warmup_iters = warmup_iters
44 |         self.warmup_method = warmup_method
45 |         super(WarmupMultiStepLR, self).__init__(optimizer, last_epoch)
46 | 
47 |     def get_lr(self):
48 |         warmup_factor = 1
49 |         if self.last_epoch < self.warmup_iters:
50 |             if self.warmup_method == "constant":
51 |                 warmup_factor = self.warmup_factor
52 |             elif self.warmup_method == "linear":
53 |                 alpha = float(self.last_epoch) / self.warmup_iters
54 |                 warmup_factor = self.warmup_factor * (1 - alpha) + alpha
55 |         return [
56 |             base_lr
57 |             * warmup_factor
58 |             * self.gamma ** bisect_right(self.milestones, self.last_epoch)
59 |             for base_lr in self.base_lrs
60 |         ]
61 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Copyright (C) 2020 NVIDIA Corporation. All rights reserved.
 3 | # Nvidia Source Code License-NC
 4 | # --------------------------------------------------------
 5 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 6 | #!/usr/bin/env python
 7 | 
 8 | import glob
 9 | import os
10 | 
11 | import torch
12 | from setuptools import find_packages
13 | from setuptools import setup
14 | from torch.utils.cpp_extension import CUDA_HOME
15 | from torch.utils.cpp_extension import CppExtension
16 | from torch.utils.cpp_extension import CUDAExtension
17 | 
18 | requirements = ["torch", "torchvision"]
19 | 
20 | 
21 | def get_extensions():
22 |     this_dir = os.path.dirname(os.path.abspath(__file__))
23 |     extensions_dir = os.path.join(this_dir, "wetectron", "csrc")
24 | 
25 |     main_file = glob.glob(os.path.join(extensions_dir, "*.cpp"))
26 |     source_cpu = glob.glob(os.path.join(extensions_dir, "cpu", "*.cpp"))
27 |     source_cuda = glob.glob(os.path.join(extensions_dir, "cuda", "*.cu"))
28 | 
29 |     sources = main_file + source_cpu
30 |     extension = CppExtension
31 | 
32 |     extra_compile_args = {"cxx": []}
33 |     define_macros = []
34 | 
35 |     if (torch.cuda.is_available() and CUDA_HOME is not None) or os.getenv("FORCE_CUDA", "0") == "1":
36 |         extension = CUDAExtension
37 |         sources += source_cuda
38 |         define_macros += [("WITH_CUDA", None)]
39 |         extra_compile_args["nvcc"] = [
40 |             "-DCUDA_HAS_FP16=1",
41 |             "-D__CUDA_NO_HALF_OPERATORS__",
42 |             "-D__CUDA_NO_HALF_CONVERSIONS__",
43 |             "-D__CUDA_NO_HALF2_OPERATORS__",
44 |         ]
45 | 
46 |     sources = [os.path.join(extensions_dir, s) for s in sources]
47 | 
48 |     include_dirs = [extensions_dir]
49 | 
50 |     ext_modules = [
51 |         extension(
52 |             "wetectron._C",
53 |             sources,
54 |             include_dirs=include_dirs,
55 |             define_macros=define_macros,
56 |             extra_compile_args=extra_compile_args,
57 |         )
58 |     ]
59 | 
60 |     return ext_modules
61 | 
62 | 
63 | setup(
64 |     name="wetectron",
65 |     version="1.0",
66 |     author="jason718",
67 |     url="https://github.com/nvlabs/wetectron/",
68 |     description="weakly-supervised object detection in pytorch",
69 |     packages=find_packages(exclude=("configs", "tests",)),
70 |     ext_modules=get_extensions(),
71 |     cmdclass={"build_ext": torch.utils.cpp_extension.BuildExtension},
72 | )
73 | 


--------------------------------------------------------------------------------
/wetectron/modeling/roi_heads/keypoint_head/keypoint_head.py:
--------------------------------------------------------------------------------
 1 | #------------------------------------------------------------------------------
 2 | # Code taken from https://github.com/NVlabs/wetectron
 3 | #------------------------------------------------------------------------------
 4 | 
 5 | import torch
 6 | 
 7 | from .roi_keypoint_feature_extractors import make_roi_keypoint_feature_extractor
 8 | from .roi_keypoint_predictors import make_roi_keypoint_predictor
 9 | from .inference import make_roi_keypoint_post_processor
10 | from .loss import make_roi_keypoint_loss_evaluator
11 | 
12 | 
13 | class ROIKeypointHead(torch.nn.Module):
14 |     def __init__(self, cfg, in_channels):
15 |         super(ROIKeypointHead, self).__init__()
16 |         self.cfg = cfg.clone()
17 |         self.feature_extractor = make_roi_keypoint_feature_extractor(cfg, in_channels)
18 |         self.predictor = make_roi_keypoint_predictor(
19 |             cfg, self.feature_extractor.out_channels)
20 |         self.post_processor = make_roi_keypoint_post_processor(cfg)
21 |         self.loss_evaluator = make_roi_keypoint_loss_evaluator(cfg)
22 | 
23 |     def forward(self, features, proposals, targets=None):
24 |         """
25 |         Arguments:
26 |             features (list[Tensor]): feature-maps from possibly several levels
27 |             proposals (list[BoxList]): proposal boxes
28 |             targets (list[BoxList], optional): the ground-truth targets.
29 | 
30 |         Returns:
31 |             x (Tensor): the result of the feature extractor
32 |             proposals (list[BoxList]): during training, the original proposals
33 |                 are returned. During testing, the predicted boxlists are returned
34 |                 with the `mask` field set
35 |             losses (dict[Tensor]): During training, returns the losses for the
36 |                 head. During testing, returns an empty dict.
37 |         """
38 |         if self.training:
39 |             with torch.no_grad():
40 |                 proposals = self.loss_evaluator.subsample(proposals, targets)
41 | 
42 |         x = self.feature_extractor(features, proposals)
43 |         kp_logits = self.predictor(x)
44 | 
45 |         if not self.training:
46 |             result = self.post_processor(kp_logits, proposals)
47 |             return x, result, {}
48 | 
49 |         loss_kp = self.loss_evaluator(proposals, kp_logits)
50 | 
51 |         return x, proposals, dict(loss_kp=loss_kp)
52 | 
53 | 
54 | def build_roi_keypoint_head(cfg, in_channels):
55 |     return ROIKeypointHead(cfg, in_channels)
56 | 


--------------------------------------------------------------------------------
/wetectron/layers/roi_align.py:
--------------------------------------------------------------------------------
 1 | #------------------------------------------------------------------------------
 2 | # Code taken from https://github.com/NVlabs/wetectron
 3 | #------------------------------------------------------------------------------
 4 | 
 5 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 6 | import torch
 7 | from torch import nn
 8 | from torch.autograd import Function
 9 | from torch.autograd.function import once_differentiable
10 | from torch.nn.modules.utils import _pair
11 | 
12 | from wetectron import _C
13 | from apex import amp
14 | 
15 | class _ROIAlign(Function):
16 |     @staticmethod
17 |     def forward(ctx, input, roi, output_size, spatial_scale, sampling_ratio):
18 |         ctx.save_for_backward(roi)
19 |         ctx.output_size = _pair(output_size)
20 |         ctx.spatial_scale = spatial_scale
21 |         ctx.sampling_ratio = sampling_ratio
22 |         ctx.input_shape = input.size()
23 |         output = _C.roi_align_forward(
24 |             input, roi, spatial_scale, output_size[0], output_size[1], sampling_ratio
25 |         )
26 |         return output
27 | 
28 |     @staticmethod
29 |     @once_differentiable
30 |     def backward(ctx, grad_output):
31 |         rois, = ctx.saved_tensors
32 |         output_size = ctx.output_size
33 |         spatial_scale = ctx.spatial_scale
34 |         sampling_ratio = ctx.sampling_ratio
35 |         bs, ch, h, w = ctx.input_shape
36 |         grad_input = _C.roi_align_backward(
37 |             grad_output,
38 |             rois,
39 |             spatial_scale,
40 |             output_size[0],
41 |             output_size[1],
42 |             bs,
43 |             ch,
44 |             h,
45 |             w,
46 |             sampling_ratio,
47 |         )
48 |         return grad_input, None, None, None, None
49 | 
50 | 
51 | roi_align = _ROIAlign.apply
52 | 
53 | class ROIAlign(nn.Module):
54 |     def __init__(self, output_size, spatial_scale, sampling_ratio):
55 |         super(ROIAlign, self).__init__()
56 |         self.output_size = output_size
57 |         self.spatial_scale = spatial_scale
58 |         self.sampling_ratio = sampling_ratio
59 | 
60 |     @amp.float_function
61 |     def forward(self, input, rois):
62 |         return roi_align(
63 |             input, rois, self.output_size, self.spatial_scale, self.sampling_ratio
64 |         )
65 | 
66 |     def __repr__(self):
67 |         tmpstr = self.__class__.__name__ + "("
68 |         tmpstr += "output_size=" + str(self.output_size)
69 |         tmpstr += ", spatial_scale=" + str(self.spatial_scale)
70 |         tmpstr += ", sampling_ratio=" + str(self.sampling_ratio)
71 |         tmpstr += ")"
72 |         return tmpstr
73 | 


--------------------------------------------------------------------------------
/wetectron/modeling/roi_heads/mask_head/roi_mask_predictors.py:
--------------------------------------------------------------------------------
 1 | #------------------------------------------------------------------------------
 2 | # Code taken from https://github.com/NVlabs/wetectron
 3 | #------------------------------------------------------------------------------
 4 | 
 5 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 6 | from torch import nn
 7 | from torch.nn import functional as F
 8 | 
 9 | from wetectron.layers import Conv2d
10 | from wetectron.layers import ConvTranspose2d
11 | from wetectron.modeling import registry
12 | 
13 | 
14 | @registry.ROI_MASK_PREDICTOR.register("MaskRCNNC4Predictor")
15 | class MaskRCNNC4Predictor(nn.Module):
16 |     def __init__(self, cfg, in_channels):
17 |         super(MaskRCNNC4Predictor, self).__init__()
18 |         num_classes = cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES
19 |         dim_reduced = cfg.MODEL.ROI_MASK_HEAD.CONV_LAYERS[-1]
20 |         num_inputs = in_channels
21 | 
22 |         self.conv5_mask = ConvTranspose2d(num_inputs, dim_reduced, 2, 2, 0)
23 |         self.mask_fcn_logits = Conv2d(dim_reduced, num_classes, 1, 1, 0)
24 | 
25 |         for name, param in self.named_parameters():
26 |             if "bias" in name:
27 |                 nn.init.constant_(param, 0)
28 |             elif "weight" in name:
29 |                 # Caffe2 implementation uses MSRAFill, which in fact
30 |                 # corresponds to kaiming_normal_ in PyTorch
31 |                 nn.init.kaiming_normal_(param, mode="fan_out", nonlinearity="relu")
32 | 
33 |     def forward(self, x):
34 |         x = F.relu(self.conv5_mask(x))
35 |         return self.mask_fcn_logits(x)
36 | 
37 | 
38 | @registry.ROI_MASK_PREDICTOR.register("MaskRCNNConv1x1Predictor")
39 | class MaskRCNNConv1x1Predictor(nn.Module):
40 |     def __init__(self, cfg, in_channels):
41 |         super(MaskRCNNConv1x1Predictor, self).__init__()
42 |         num_classes = cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES
43 |         num_inputs = in_channels
44 | 
45 |         self.mask_fcn_logits = Conv2d(num_inputs, num_classes, 1, 1, 0)
46 | 
47 |         for name, param in self.named_parameters():
48 |             if "bias" in name:
49 |                 nn.init.constant_(param, 0)
50 |             elif "weight" in name:
51 |                 # Caffe2 implementation uses MSRAFill, which in fact
52 |                 # corresponds to kaiming_normal_ in PyTorch
53 |                 nn.init.kaiming_normal_(param, mode="fan_out", nonlinearity="relu")
54 | 
55 |     def forward(self, x):
56 |         return self.mask_fcn_logits(x)
57 | 
58 | 
59 | def make_roi_mask_predictor(cfg, in_channels):
60 |     func = registry.ROI_MASK_PREDICTOR[cfg.MODEL.ROI_MASK_HEAD.PREDICTOR]
61 |     return func(cfg, in_channels)
62 | 


--------------------------------------------------------------------------------
/wetectron/solver/build.py:
--------------------------------------------------------------------------------
 1 | #------------------------------------------------------------------------------
 2 | # Code adapted from https://github.com/NVlabs/wetectron
 3 | # by Huy V. Vo and Oriane Simeoni
 4 | # INRIA, Valeo.ai
 5 | #------------------------------------------------------------------------------
 6 | 
 7 | # --------------------------------------------------------
 8 | # Copyright (C) 2020 NVIDIA Corporation. All rights reserved.
 9 | # Nvidia Source Code License-NC
10 | # --------------------------------------------------------
11 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
12 | import torch
13 | 
14 | from .lr_scheduler import WarmupMultiStepLR
15 | 
16 | def make_optimizer(cfg, model):
17 |     params = []
18 |     for key, value in model.named_parameters():
19 |         if not value.requires_grad:
20 |             continue
21 |         lr = cfg.SOLVER.BASE_LR
22 |         weight_decay = cfg.SOLVER.WEIGHT_DECAY
23 |         if "bias" in key:
24 |             lr = cfg.SOLVER.BASE_LR * cfg.SOLVER.BIAS_LR_FACTOR
25 |             weight_decay = cfg.SOLVER.WEIGHT_DECAY_BIAS
26 |         params += [{"params": [value], "lr": lr, "weight_decay": weight_decay}]
27 | 
28 |     optimizer = torch.optim.SGD(params, lr, momentum=cfg.SOLVER.MOMENTUM)
29 | 
30 |     return optimizer
31 | 
32 | def make_cdb_optimizer(cfg, model):
33 |     params = []
34 |     for key, value in model.named_parameters():
35 |         if not value.requires_grad:
36 |             continue
37 |         lr = cfg.SOLVER_CDB.BASE_LR
38 |         weight_decay = cfg.SOLVER_CDB.WEIGHT_DECAY
39 |         if "bias" in key:
40 |             lr = cfg.SOLVER_CDB.BASE_LR * cfg.SOLVER_CDB.BIAS_LR_FACTOR
41 |             weight_decay = cfg.SOLVER_CDB.WEIGHT_DECAY_BIAS
42 |         params += [{"params": [value], "lr": lr, "weight_decay": weight_decay}]
43 | 
44 |     optimizer = torch.optim.SGD(params, lr, momentum=cfg.SOLVER_CDB.MOMENTUM)
45 | 
46 |     return optimizer
47 | 
48 | def make_lr_scheduler(cfg, optimizer):
49 |     return WarmupMultiStepLR(
50 |         optimizer,
51 |         cfg.SOLVER.STEPS,
52 |         cfg.SOLVER.GAMMA,
53 |         warmup_factor=cfg.SOLVER.WARMUP_FACTOR,
54 |         warmup_iters=cfg.SOLVER.WARMUP_ITERS,
55 |         warmup_method=cfg.SOLVER.WARMUP_METHOD,
56 |         last_epoch=cfg.SOLVER.LAST_EPOCH,
57 |     )
58 | 
59 | def make_lr_cdb_scheduler(cfg, optimizer):
60 |     return WarmupMultiStepLR(
61 |         optimizer,
62 |         cfg.SOLVER_CDB.STEPS,
63 |         cfg.SOLVER_CDB.GAMMA,
64 |         warmup_factor=cfg.SOLVER_CDB.WARMUP_FACTOR,
65 |         warmup_iters=cfg.SOLVER_CDB.WARMUP_ITERS,
66 |         warmup_method=cfg.SOLVER_CDB.WARMUP_METHOD,
67 |         last_epoch=cfg.SOLVER.LAST_EPOCH,
68 |     )
69 | 


--------------------------------------------------------------------------------
/wetectron/data/datasets/concat_dataset.py:
--------------------------------------------------------------------------------
 1 | #------------------------------------------------------------------------------
 2 | # Code adapted from https://github.com/NVlabs/wetectron
 3 | # by Huy V. Vo and Oriane Simeoni
 4 | # INRIA, Valeo.ai
 5 | #------------------------------------------------------------------------------
 6 | 
 7 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 8 | import bisect
 9 | 
10 | from torch.utils.data.dataset import ConcatDataset as _ConcatDataset
11 | from wetectron.data import datasets
12 | 
13 | class ConcatDataset(_ConcatDataset):
14 |     """
15 |     Same as torch.utils.data.dataset.ConcatDataset, but exposes an extra
16 |     method for querying the sizes of the image
17 |     """
18 | 
19 |     def __getitem__(self, idx):
20 |         dataset_idx, sample_idx = self.get_idxs(idx)
21 |         img, target, rois, _ = self.datasets[dataset_idx][sample_idx]
22 |         return img, target, rois, idx
23 | 
24 |     def get_categories(self):
25 |         return self.datasets[0].get_categories()
26 | 
27 |     def get_idxs(self, idx):
28 |         dataset_idx = bisect.bisect_right(self.cumulative_sizes, idx)
29 |         if dataset_idx == 0:
30 |             sample_idx = idx
31 |         else:
32 |             sample_idx = idx - self.cumulative_sizes[dataset_idx - 1]
33 |         return dataset_idx, sample_idx
34 | 
35 |     def get_img_info(self, idx):
36 |         dataset_idx, sample_idx = self.get_idxs(idx)
37 |         return self.datasets[dataset_idx].get_img_info(sample_idx)
38 | 
39 |     def get_active_images(self):
40 |         return self.datasets[0].get_active_images()
41 | 
42 |     def is_active(self, idx):
43 |         dataset_idx, sample_idx = self.get_idxs(idx)
44 |         return self.datasets[dataset_idx].is_active(sample_idx)
45 | 
46 |     def get_active_sampling_weight(self, idx):
47 |         dataset_idx, sample_idx = self.get_idxs(idx)
48 |         return self.datasets[dataset_idx].get_active_sampling_weight(sample_idx)
49 |         
50 |     def get_weak_instance_weight(self, idx):
51 |         dataset_idx, sample_idx = self.get_idxs(idx)
52 |         return self.datasets[dataset_idx].get_weak_instance_weight(sample_idx)
53 | 
54 |     def has_pseudo_gt(self, idx):
55 |         dataset_idx, sample_idx = self.get_idxs(idx)
56 |         return self.datasets[dataset_idx].has_pseudo_gt(sample_idx)
57 | 
58 |     # Methods that only apply on a ConcatDataset of datasets.PascalVOCDataset
59 |     def get_groundtruth(self, idx):
60 |         dataset_idx, sample_idx = self.get_idxs(idx)
61 |         return self.datasets[dataset_idx].get_groundtruth(sample_idx)
62 | 
63 |     def map_class_id_to_class_name(self, idx):
64 |         dataset_idx, sample_idx = self.get_idxs(idx)
65 |         return self.datasets[dataset_idx].map_class_id_to_class_name(sample_idx)


--------------------------------------------------------------------------------
/wetectron/layers/sigmoid_focal_loss.py:
--------------------------------------------------------------------------------
 1 | #------------------------------------------------------------------------------
 2 | # Code taken from https://github.com/NVlabs/wetectron
 3 | #------------------------------------------------------------------------------
 4 | 
 5 | import torch
 6 | from torch import nn
 7 | from torch.autograd import Function
 8 | from torch.autograd.function import once_differentiable
 9 | 
10 | from wetectron import _C
11 | 
12 | # TODO: Use JIT to replace CUDA implementation in the future.
13 | class _SigmoidFocalLoss(Function):
14 |     @staticmethod
15 |     def forward(ctx, logits, targets, gamma, alpha):
16 |         ctx.save_for_backward(logits, targets)
17 |         num_classes = logits.shape[1]
18 |         ctx.num_classes = num_classes
19 |         ctx.gamma = gamma
20 |         ctx.alpha = alpha
21 | 
22 |         losses = _C.sigmoid_focalloss_forward(
23 |             logits, targets, num_classes, gamma, alpha
24 |         )
25 |         return losses
26 | 
27 |     @staticmethod
28 |     @once_differentiable
29 |     def backward(ctx, d_loss):
30 |         logits, targets = ctx.saved_tensors
31 |         num_classes = ctx.num_classes
32 |         gamma = ctx.gamma
33 |         alpha = ctx.alpha
34 |         d_loss = d_loss.contiguous()
35 |         d_logits = _C.sigmoid_focalloss_backward(
36 |             logits, targets, d_loss, num_classes, gamma, alpha
37 |         )
38 |         return d_logits, None, None, None, None
39 | 
40 | 
41 | sigmoid_focal_loss_cuda = _SigmoidFocalLoss.apply
42 | 
43 | 
44 | def sigmoid_focal_loss_cpu(logits, targets, gamma, alpha):
45 |     num_classes = logits.shape[1]
46 |     gamma = gamma[0]
47 |     alpha = alpha[0]
48 |     dtype = targets.dtype
49 |     device = targets.device
50 |     class_range = torch.arange(1, num_classes+1, dtype=dtype, device=device).unsqueeze(0)
51 | 
52 |     t = targets.unsqueeze(1)
53 |     p = torch.sigmoid(logits)
54 |     term1 = (1 - p) ** gamma * torch.log(p)
55 |     term2 = p ** gamma * torch.log(1 - p)
56 |     return -(t == class_range).float() * term1 * alpha - ((t != class_range) * (t >= 0)).float() * term2 * (1 - alpha)
57 | 
58 | 
59 | class SigmoidFocalLoss(nn.Module):
60 |     def __init__(self, gamma, alpha):
61 |         super(SigmoidFocalLoss, self).__init__()
62 |         self.gamma = gamma
63 |         self.alpha = alpha
64 | 
65 |     def forward(self, logits, targets):
66 |         device = logits.device
67 |         if logits.is_cuda:
68 |             loss_func = sigmoid_focal_loss_cuda
69 |         else:
70 |             loss_func = sigmoid_focal_loss_cpu
71 | 
72 |         loss = loss_func(logits, targets, self.gamma, self.alpha)
73 |         return loss.sum()
74 | 
75 |     def __repr__(self):
76 |         tmpstr = self.__class__.__name__ + "("
77 |         tmpstr += "gamma=" + str(self.gamma)
78 |         tmpstr += ", alpha=" + str(self.alpha)
79 |         tmpstr += ")"
80 |         return tmpstr
81 | 


--------------------------------------------------------------------------------
/wetectron/modeling/roi_heads/box_head/roi_box_predictors.py:
--------------------------------------------------------------------------------
 1 | #------------------------------------------------------------------------------
 2 | # Code taken from https://github.com/NVlabs/wetectron
 3 | #------------------------------------------------------------------------------
 4 | 
 5 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 6 | from wetectron.modeling import registry
 7 | from torch import nn
 8 | 
 9 | 
10 | @registry.ROI_BOX_PREDICTOR.register("FastRCNNPredictor")
11 | class FastRCNNPredictor(nn.Module):
12 |     def __init__(self, config, in_channels):
13 |         super(FastRCNNPredictor, self).__init__()
14 |         assert in_channels is not None
15 | 
16 |         num_inputs = in_channels
17 | 
18 |         num_classes = config.MODEL.ROI_BOX_HEAD.NUM_CLASSES
19 |         self.avgpool = nn.AdaptiveAvgPool2d(1)
20 |         self.cls_score = nn.Linear(num_inputs, num_classes)
21 |         num_bbox_reg_classes = 2 if config.MODEL.CLS_AGNOSTIC_BBOX_REG else num_classes
22 |         self.bbox_pred = nn.Linear(num_inputs, num_bbox_reg_classes * 4)
23 | 
24 |         nn.init.normal_(self.cls_score.weight, mean=0, std=0.01)
25 |         nn.init.constant_(self.cls_score.bias, 0)
26 | 
27 |         nn.init.normal_(self.bbox_pred.weight, mean=0, std=0.001)
28 |         nn.init.constant_(self.bbox_pred.bias, 0)
29 | 
30 |     def forward(self, x):
31 |         if x.dim() == 4:
32 |             x = self.avgpool(x)
33 |             x = x.view(x.size(0), -1)
34 |         assert x.dim() == 2
35 |         # x = self.avgpool(x)
36 |         # x = x.view(x.size(0), -1)
37 |         cls_logit = self.cls_score(x)
38 |         bbox_pred = self.bbox_pred(x)
39 |         return cls_logit, bbox_pred
40 | 
41 | 
42 | @registry.ROI_BOX_PREDICTOR.register("FPNPredictor")
43 | class FPNPredictor(nn.Module):
44 |     def __init__(self, cfg, in_channels):
45 |         super(FPNPredictor, self).__init__()
46 |         num_classes = cfg.MODEL.ROI_BOX_HEAD.NUM_CLASSES
47 |         representation_size = in_channels
48 | 
49 |         self.cls_score = nn.Linear(representation_size, num_classes)
50 |         num_bbox_reg_classes = 2 if cfg.MODEL.CLS_AGNOSTIC_BBOX_REG else num_classes
51 |         self.bbox_pred = nn.Linear(representation_size, num_bbox_reg_classes * 4)
52 | 
53 |         nn.init.normal_(self.cls_score.weight, std=0.01)
54 |         nn.init.normal_(self.bbox_pred.weight, std=0.001)
55 |         for l in [self.cls_score, self.bbox_pred]:
56 |             nn.init.constant_(l.bias, 0)
57 | 
58 |     def forward(self, x):
59 |         if x.ndimension() == 4:
60 |             assert list(x.shape[2:]) == [1, 1]
61 |             x = x.view(x.size(0), -1)
62 |         scores = self.cls_score(x)
63 |         bbox_deltas = self.bbox_pred(x)
64 | 
65 |         return scores, bbox_deltas
66 | 
67 | 
68 | def make_roi_box_predictor(cfg, in_channels):
69 |     func = registry.ROI_BOX_PREDICTOR[cfg.MODEL.ROI_BOX_HEAD.PREDICTOR]
70 |     return func(cfg, in_channels)
71 | 


--------------------------------------------------------------------------------
/wetectron/data/transforms/build.py:
--------------------------------------------------------------------------------
 1 | #------------------------------------------------------------------------------
 2 | # Code adapted from https://github.com/NVlabs/wetectron
 3 | # by Huy V. Vo and Oriane Simeoni
 4 | # INRIA, Valeo.ai
 5 | #------------------------------------------------------------------------------
 6 | 
 7 | # --------------------------------------------------------
 8 | # Copyright (C) 2020 NVIDIA Corporation. All rights reserved.
 9 | # Nvidia Source Code License-NC
10 | # --------------------------------------------------------
11 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
12 | import torch
13 | from . import transforms as T
14 | 
15 | _imagenet_pca = {
16 |     'eigval': torch.Tensor([0.2175, 0.0188, 0.0045]),
17 |     'eigvec': torch.Tensor([
18 |         [-0.5675,  0.7192,  0.4009],
19 |         [-0.5808, -0.0045, -0.8140],
20 |         [-0.5836, -0.6948,  0.4203],
21 |     ])
22 | }
23 | 
24 | def build_transforms(cfg, is_train=True):
25 |     if is_train:
26 |         min_size = cfg.INPUT.MIN_SIZE_TRAIN
27 |         max_size = cfg.INPUT.MAX_SIZE_TRAIN
28 |         flip_horizontal_prob = 0.5  # cfg.INPUT.FLIP_PROB_TRAIN
29 |         flip_vertical_prob = cfg.INPUT.VERTICAL_FLIP_PROB_TRAIN
30 |         brightness = cfg.INPUT.BRIGHTNESS
31 |         contrast = cfg.INPUT.CONTRAST
32 |         saturation = cfg.INPUT.SATURATION
33 |         hue = cfg.INPUT.HUE
34 |     else:
35 |         min_size = cfg.INPUT.MIN_SIZE_TEST
36 |         max_size = cfg.INPUT.MAX_SIZE_TEST
37 |         flip_horizontal_prob = 0.0
38 |         flip_vertical_prob = 0.0
39 |         brightness = 0.0
40 |         contrast = 0.0
41 |         saturation = 0.0
42 |         hue = 0.0
43 | 
44 |     to_bgr255 = cfg.INPUT.TO_BGR255
45 |     normalize_transform = T.Normalize(
46 |         mean=cfg.INPUT.PIXEL_MEAN, std=cfg.INPUT.PIXEL_STD, to_bgr255=to_bgr255
47 |     )
48 |     color_jitter = T.ColorJitter(
49 |         brightness=brightness,
50 |         contrast=contrast,
51 |         saturation=saturation,
52 |         hue=hue,
53 |     )
54 | 
55 |     if cfg.INPUT.PCA and is_train:
56 |         transform = T.Compose(
57 |             [
58 |                 color_jitter,
59 |                 T.Resize(min_size, max_size),
60 |                 T.RandomHorizontalFlip(flip_horizontal_prob),
61 |                 T.RandomVerticalFlip(flip_vertical_prob),
62 |                 T.ToTensor(),
63 |                 T.Lighting(0.1, _imagenet_pca['eigval'], _imagenet_pca['eigvec']),
64 |                 normalize_transform,
65 |             ]
66 |         )
67 |     else:
68 |         transform = T.Compose(
69 |             [
70 |                 color_jitter,
71 |                 T.Resize(min_size, max_size),
72 |                 T.RandomHorizontalFlip(flip_horizontal_prob),
73 |                 T.RandomVerticalFlip(flip_vertical_prob),
74 |                 T.ToTensor(),
75 |                 normalize_transform,
76 |             ]
77 |         )
78 | 
79 |     return transform
80 | 


--------------------------------------------------------------------------------
/wetectron/csrc/cpu/nms_cpu.cpp:
--------------------------------------------------------------------------------
 1 | //------------------------------------------------------------------------------
 2 | // Code taken from https://github.com/NVlabs/wetectron
 3 | //------------------------------------------------------------------------------
 4 | 
 5 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 6 | #include "cpu/vision.h"
 7 | 
 8 | 
 9 | template <typename scalar_t>
10 | at::Tensor nms_cpu_kernel(const at::Tensor& dets,
11 |                           const at::Tensor& scores,
12 |                           const float threshold) {
13 |   AT_ASSERTM(!dets.is_cuda(), "dets must be a CPU tensor");
14 |   AT_ASSERTM(!scores.is_cuda(), "scores must be a CPU tensor");
15 |   AT_ASSERTM(dets.scalar_type() == scores.scalar_type(), "dets should have the same type as scores");
16 | 
17 |   if (dets.numel() == 0) {
18 |     return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU));
19 |   }
20 | 
21 |   auto x1_t = dets.select(1, 0).contiguous();
22 |   auto y1_t = dets.select(1, 1).contiguous();
23 |   auto x2_t = dets.select(1, 2).contiguous();
24 |   auto y2_t = dets.select(1, 3).contiguous();
25 | 
26 |   at::Tensor areas_t = (x2_t - x1_t + 1) * (y2_t - y1_t + 1);
27 | 
28 |   auto order_t = std::get<1>(scores.sort(0, /* descending=*/true));
29 | 
30 |   auto ndets = dets.size(0);
31 |   at::Tensor suppressed_t = at::zeros({ndets}, dets.options().dtype(at::kByte).device(at::kCPU));
32 | 
33 |   auto suppressed = suppressed_t.data_ptr<uint8_t>();
34 |   auto order = order_t.data_ptr<int64_t>();
35 |   auto x1 = x1_t.data_ptr<scalar_t>();
36 |   auto y1 = y1_t.data_ptr<scalar_t>();
37 |   auto x2 = x2_t.data_ptr<scalar_t>();
38 |   auto y2 = y2_t.data_ptr<scalar_t>();
39 |   auto areas = areas_t.data_ptr<scalar_t>();
40 | 
41 |   for (int64_t _i = 0; _i < ndets; _i++) {
42 |     auto i = order[_i];
43 |     if (suppressed[i] == 1)
44 |       continue;
45 |     auto ix1 = x1[i];
46 |     auto iy1 = y1[i];
47 |     auto ix2 = x2[i];
48 |     auto iy2 = y2[i];
49 |     auto iarea = areas[i];
50 | 
51 |     for (int64_t _j = _i + 1; _j < ndets; _j++) {
52 |       auto j = order[_j];
53 |       if (suppressed[j] == 1)
54 |         continue;
55 |       auto xx1 = std::max(ix1, x1[j]);
56 |       auto yy1 = std::max(iy1, y1[j]);
57 |       auto xx2 = std::min(ix2, x2[j]);
58 |       auto yy2 = std::min(iy2, y2[j]);
59 | 
60 |       auto w = std::max(static_cast<scalar_t>(0), xx2 - xx1 + 1);
61 |       auto h = std::max(static_cast<scalar_t>(0), yy2 - yy1 + 1);
62 |       auto inter = w * h;
63 |       auto ovr = inter / (iarea + areas[j] - inter);
64 |       if (ovr >= threshold)
65 |         suppressed[j] = 1;
66 |    }
67 |   }
68 |   return at::nonzero(suppressed_t == 0).squeeze(1);
69 | }
70 | 
71 | at::Tensor nms_cpu(const at::Tensor& dets,
72 |                const at::Tensor& scores,
73 |                const float threshold) {
74 |   at::Tensor result;
75 |   AT_DISPATCH_FLOATING_TYPES(dets.scalar_type(), "nms", [&] {
76 |     result = nms_cpu_kernel<scalar_t>(dets, scores, threshold);
77 |   });
78 |   return result;
79 | }
80 | 


--------------------------------------------------------------------------------
/wetectron/modeling/roi_heads/mask_head/roi_mask_feature_extractors.py:
--------------------------------------------------------------------------------
 1 | #------------------------------------------------------------------------------
 2 | # Code taken from https://github.com/NVlabs/wetectron
 3 | #------------------------------------------------------------------------------
 4 | 
 5 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 6 | from torch import nn
 7 | from torch.nn import functional as F
 8 | 
 9 | from ..box_head.roi_box_feature_extractors import ResNet50Conv5ROIFeatureExtractor
10 | from wetectron.modeling import registry
11 | from wetectron.modeling.poolers import Pooler
12 | from wetectron.modeling.make_layers import make_conv3x3
13 | 
14 | 
15 | registry.ROI_MASK_FEATURE_EXTRACTORS.register(
16 |     "ResNet50Conv5ROIFeatureExtractor", ResNet50Conv5ROIFeatureExtractor
17 | )
18 | 
19 | 
20 | @registry.ROI_MASK_FEATURE_EXTRACTORS.register("MaskRCNNFPNFeatureExtractor")
21 | class MaskRCNNFPNFeatureExtractor(nn.Module):
22 |     """
23 |     Heads for FPN for classification
24 |     """
25 | 
26 |     def __init__(self, cfg, in_channels):
27 |         """
28 |         Arguments:
29 |             num_classes (int): number of output classes
30 |             input_size (int): number of channels of the input once it's flattened
31 |             representation_size (int): size of the intermediate representation
32 |         """
33 |         super(MaskRCNNFPNFeatureExtractor, self).__init__()
34 | 
35 |         resolution = cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION
36 |         scales = cfg.MODEL.ROI_MASK_HEAD.POOLER_SCALES
37 |         sampling_ratio = cfg.MODEL.ROI_MASK_HEAD.POOLER_SAMPLING_RATIO
38 |         pooler = Pooler(
39 |             output_size=(resolution, resolution),
40 |             scales=scales,
41 |             sampling_ratio=sampling_ratio,
42 |         )
43 |         input_size = in_channels
44 |         self.pooler = pooler
45 | 
46 |         use_gn = cfg.MODEL.ROI_MASK_HEAD.USE_GN
47 |         layers = cfg.MODEL.ROI_MASK_HEAD.CONV_LAYERS
48 |         dilation = cfg.MODEL.ROI_MASK_HEAD.DILATION
49 | 
50 |         next_feature = input_size
51 |         self.blocks = []
52 |         for layer_idx, layer_features in enumerate(layers, 1):
53 |             layer_name = "mask_fcn{}".format(layer_idx)
54 |             module = make_conv3x3(
55 |                 next_feature, layer_features,
56 |                 dilation=dilation, stride=1, use_gn=use_gn
57 |             )
58 |             self.add_module(layer_name, module)
59 |             next_feature = layer_features
60 |             self.blocks.append(layer_name)
61 |         self.out_channels = layer_features
62 | 
63 |     def forward(self, x, proposals):
64 |         x = self.pooler(x, proposals)
65 | 
66 |         for layer_name in self.blocks:
67 |             x = F.relu(getattr(self, layer_name)(x))
68 | 
69 |         return x
70 | 
71 | 
72 | def make_roi_mask_feature_extractor(cfg, in_channels):
73 |     func = registry.ROI_MASK_FEATURE_EXTRACTORS[
74 |         cfg.MODEL.ROI_MASK_HEAD.FEATURE_EXTRACTOR
75 |     ]
76 |     return func(cfg, in_channels)
77 | 


--------------------------------------------------------------------------------
/wetectron/data/README.md:
--------------------------------------------------------------------------------
 1 | # Setting Up Datasets
 2 | This file describes how to perform training on other datasets.
 3 | 
 4 | Only Pascal VOC dataset can be loaded from its original format and be outputted to Pascal style results currently.
 5 | 
 6 | We expect the annotations from other datasets be converted to COCO json format, and
 7 | the output will be in COCO-style. (i.e. AP, AP50, AP75, APs, APm, APl for bbox and segm)
 8 | 
 9 | ## Creating Symlinks for PASCAL VOC
10 | 
11 | We assume that your symlinked `datasets/voc/VOC<year>` directory has the following structure:
12 | 
13 | ```
14 | VOC<year>
15 | |_ JPEGImages
16 | |  |_ <im-1-name>.jpg
17 | |  |_ ...
18 | |  |_ <im-N-name>.jpg
19 | |_ Annotations
20 | |  |_ pascal_train<year>.json (optional)
21 | |  |_ pascal_val<year>.json (optional)
22 | |  |_ pascal_test<year>.json (optional)
23 | |  |_ <im-1-name>.xml
24 | |  |_ ...
25 | |  |_ <im-N-name>.xml
26 | |_ VOCdevkit<year>
27 | ```
28 | 
29 | Create symlinks for `voc/VOC<year>`:
30 | 
31 | ```
32 | cd ~/github/maskrcnn-benchmark
33 | mkdir -p datasets/voc/VOC<year>
34 | ln -s /path/to/VOC<year> /datasets/voc/VOC<year>
35 | ```
36 | Example configuration files for PASCAL VOC could be found [here](https://github.com/facebookresearch/maskrcnn-benchmark/blob/master/configs/pascal_voc/).
37 | 
38 | ### PASCAL VOC Annotations in COCO Format
39 | To output COCO-style evaluation result, PASCAL VOC annotations in COCO json format is required and could be downloaded from [here](https://storage.googleapis.com/coco-dataset/external/PASCAL_VOC.zip)
40 | via http://cocodataset.org/#external.
41 | 
42 | ## Creating Symlinks for Cityscapes:
43 | 
44 | We assume that your symlinked `datasets/cityscapes` directory has the following structure:
45 | 
46 | ```
47 | cityscapes
48 | |_ images
49 | |  |_ <im-1-name>.jpg
50 | |  |_ ...
51 | |  |_ <im-N-name>.jpg
52 | |_ annotations
53 | |  |_ instanceonly_gtFile_train.json
54 | |  |_ ...
55 | |_ raw
56 |    |_ gtFine
57 |    |_ ...
58 |    |_ README.md
59 | ```
60 | 
61 | Create symlinks for `cityscapes`:
62 | 
63 | ```
64 | cd ~/github/maskrcnn-benchmark
65 | mkdir -p datasets/cityscapes
66 | ln -s /path/to/cityscapes datasets/data/cityscapes
67 | ```
68 | 
69 | ### Steps to convert Cityscapes Annotations to COCO Format
70 | 1. Download gtFine_trainvaltest.zip from https://www.cityscapes-dataset.com/downloads/ (login required)
71 | 2. Extract it to /path/to/gtFine_trainvaltest
72 | ```
73 | cityscapes
74 | |_ gtFine_trainvaltest.zip
75 | |_ gtFine_trainvaltest
76 |    |_ gtFine
77 | ```
78 | 3. Run the below commands to convert the annotations
79 | 
80 | ```
81 | cd ~/github
82 | git clone https://github.com/mcordts/cityscapesScripts.git
83 | cd cityscapesScripts
84 | cp ~/github/maskrcnn-benchmark/tools/cityscapes/instances2dict_with_polygons.py cityscapesscripts/evaluation
85 | python setup.py install
86 | cd ~/github/maskrcnn-benchmark
87 | python tools/cityscapes/convert_cityscapes_to_coco.py --datadir /path/to/cityscapes --outdir /path/to/cityscapes/annotations
88 | ```
89 | 
90 | Example configuration files for Cityscapes could be found [here](https://github.com/facebookresearch/maskrcnn-benchmark/blob/master/configs/cityscapes/).
91 | 


--------------------------------------------------------------------------------
/wetectron/structures/image_list.py:
--------------------------------------------------------------------------------
 1 | #------------------------------------------------------------------------------
 2 | # Code taken from https://github.com/NVlabs/wetectron
 3 | #------------------------------------------------------------------------------
 4 | 
 5 | # --------------------------------------------------------
 6 | # Copyright (C) 2020 NVIDIA Corporation. All rights reserved.
 7 | # Nvidia Source Code License-NC
 8 | # --------------------------------------------------------
 9 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
10 | from __future__ import division
11 | 
12 | import torch
13 | 
14 | 
15 | class ImageList(object):
16 |     """
17 |     Structure that holds a list of images (of possibly
18 |     varying sizes) as a single tensor.
19 |     This works by padding the images to the same size,
20 |     and storing in a field the original sizes of each image
21 |     """
22 | 
23 |     def __init__(self, tensors, image_sizes):
24 |         """
25 |         Arguments:
26 |             tensors (tensor)
27 |             image_sizes (list[tuple[int, int]])
28 |         """
29 |         self.tensors = tensors
30 |         self.image_sizes = image_sizes
31 | 
32 |     def to(self, *args, **kwargs):
33 |         cast_tensor = self.tensors.to(*args, **kwargs)
34 |         return ImageList(cast_tensor, self.image_sizes)
35 | 
36 | 
37 | def to_image_list(tensors, size_divisible=0):
38 |     """
39 |     tensors can be an ImageList, a torch.Tensor or
40 |     an iterable of Tensors. It can't be a numpy array.
41 |     When tensors is an iterable of Tensors, it pads
42 |     the Tensors with zeros so that they have the same
43 |     shape
44 |     """
45 |     if isinstance(tensors, torch.Tensor) and size_divisible > 0:
46 |         tensors = [tensors]
47 | 
48 |     if isinstance(tensors, ImageList):
49 |         return tensors
50 |     elif isinstance(tensors, torch.Tensor):
51 |         # single tensor shape can be inferred
52 |         if tensors.dim() == 3:
53 |             tensors = tensors[None]
54 |         assert tensors.dim() == 4
55 |         image_sizes = [tensor.shape[-2:] for tensor in tensors]
56 |         return ImageList(tensors, image_sizes)
57 |     elif isinstance(tensors, (tuple, list)):
58 |         max_size = tuple(max(s) for s in zip(*[img.shape for img in tensors]))
59 | 
60 |         # TODO Ideally, just remove this and let me model handle arbitrary
61 |         # input sizs
62 |         if size_divisible > 0:
63 |             import math
64 | 
65 |             stride = size_divisible
66 |             max_size = list(max_size)
67 |             max_size[1] = int(math.ceil(max_size[1] / stride) * stride)
68 |             max_size[2] = int(math.ceil(max_size[2] / stride) * stride)
69 |             max_size = tuple(max_size)
70 | 
71 |         batch_shape = (len(tensors),) + max_size
72 |         batched_imgs = tensors[0].new(*batch_shape).zero_()
73 |         for img, pad_img in zip(tensors, batched_imgs):
74 |             pad_img[: img.shape[0], : img.shape[1], : img.shape[2]].copy_(img)
75 | 
76 |         image_sizes = [im.shape[-2:] for im in tensors]
77 | 
78 |         return ImageList(batched_imgs, image_sizes)
79 |     else:
80 |         raise TypeError("Unsupported type for to_image_list: {}".format(type(tensors)))
81 | 


--------------------------------------------------------------------------------
/wetectron/layers/dcn/deform_pool_func.py:
--------------------------------------------------------------------------------
  1 | #------------------------------------------------------------------------------
  2 | # Code taken from https://github.com/NVlabs/wetectron
  3 | #------------------------------------------------------------------------------
  4 | 
  5 | import torch
  6 | from torch.autograd import Function
  7 | from torch.autograd.function import once_differentiable
  8 | 
  9 | from wetectron import _C
 10 | 
 11 | 
 12 | class DeformRoIPoolingFunction(Function):
 13 | 
 14 |     @staticmethod
 15 |     def forward(
 16 |         ctx,
 17 |         data,
 18 |         rois,
 19 |         offset,
 20 |         spatial_scale,
 21 |         out_size,
 22 |         out_channels,
 23 |         no_trans,
 24 |         group_size=1,
 25 |         part_size=None,
 26 |         sample_per_part=4,
 27 |         trans_std=.0
 28 |     ):
 29 |         ctx.spatial_scale = spatial_scale
 30 |         ctx.out_size = out_size
 31 |         ctx.out_channels = out_channels
 32 |         ctx.no_trans = no_trans
 33 |         ctx.group_size = group_size
 34 |         ctx.part_size = out_size if part_size is None else part_size
 35 |         ctx.sample_per_part = sample_per_part
 36 |         ctx.trans_std = trans_std
 37 | 
 38 |         assert 0.0 <= ctx.trans_std <= 1.0
 39 |         if not data.is_cuda:
 40 |             raise NotImplementedError
 41 | 
 42 |         n = rois.shape[0]
 43 |         output = data.new_empty(n, out_channels, out_size, out_size)
 44 |         output_count = data.new_empty(n, out_channels, out_size, out_size)
 45 |         _C.deform_psroi_pooling_forward(
 46 |             data,
 47 |             rois,
 48 |             offset,
 49 |             output,
 50 |             output_count,
 51 |             ctx.no_trans,
 52 |             ctx.spatial_scale,
 53 |             ctx.out_channels,
 54 |             ctx.group_size,
 55 |             ctx.out_size,
 56 |             ctx.part_size,
 57 |             ctx.sample_per_part,
 58 |             ctx.trans_std
 59 |         )
 60 | 
 61 |         if data.requires_grad or rois.requires_grad or offset.requires_grad:
 62 |             ctx.save_for_backward(data, rois, offset)
 63 |         ctx.output_count = output_count
 64 | 
 65 |         return output
 66 | 
 67 |     @staticmethod
 68 |     @once_differentiable
 69 |     def backward(ctx, grad_output):
 70 |         if not grad_output.is_cuda:
 71 |             raise NotImplementedError
 72 | 
 73 |         data, rois, offset = ctx.saved_tensors
 74 |         output_count = ctx.output_count
 75 |         grad_input = torch.zeros_like(data)
 76 |         grad_rois = None
 77 |         grad_offset = torch.zeros_like(offset)
 78 | 
 79 |         _C.deform_psroi_pooling_backward(
 80 |             grad_output,
 81 |             data,
 82 |             rois,
 83 |             offset,
 84 |             output_count,
 85 |             grad_input,
 86 |             grad_offset,
 87 |             ctx.no_trans,
 88 |             ctx.spatial_scale,
 89 |             ctx.out_channels,
 90 |             ctx.group_size,
 91 |             ctx.out_size,
 92 |             ctx.part_size,
 93 |             ctx.sample_per_part,
 94 |             ctx.trans_std
 95 |         )
 96 |         return (grad_input, grad_rois, grad_offset, None, None, None, None, None, None, None, None)
 97 | 
 98 | 
 99 | deform_roi_pooling = DeformRoIPoolingFunction.apply
100 | 


--------------------------------------------------------------------------------
/wetectron/modeling/roi_heads/box_head/box_head.py:
--------------------------------------------------------------------------------
 1 | #------------------------------------------------------------------------------
 2 | # Code taken from https://github.com/NVlabs/wetectron
 3 | #------------------------------------------------------------------------------
 4 | 
 5 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 6 | import torch
 7 | from torch import nn
 8 | 
 9 | from .roi_box_feature_extractors import make_roi_box_feature_extractor
10 | from .roi_box_predictors import make_roi_box_predictor
11 | from .inference import make_roi_box_post_processor
12 | from .loss import make_roi_box_loss_evaluator
13 | 
14 | 
15 | class ROIBoxHead(torch.nn.Module):
16 |     """
17 |     Generic Box Head class.
18 |     """
19 | 
20 |     def __init__(self, cfg, in_channels):
21 |         super(ROIBoxHead, self).__init__()
22 |         self.feature_extractor = make_roi_box_feature_extractor(cfg, in_channels)
23 |         self.predictor = make_roi_box_predictor(
24 |             cfg, self.feature_extractor.out_channels)
25 |         self.post_processor = make_roi_box_post_processor(cfg)
26 |         self.loss_evaluator = make_roi_box_loss_evaluator(cfg)
27 | 
28 |     def forward(self, features, proposals, targets=None):
29 |         """
30 |         Arguments:
31 |             features (list[Tensor]): feature-maps from possibly several levels
32 |             proposals (list[BoxList]): proposal boxes
33 |             targets (list[BoxList], optional): the ground-truth targets.
34 | 
35 |         Returns:
36 |             x (Tensor): the result of the feature extractor
37 |             proposals (list[BoxList]): during training, the subsampled proposals
38 |                 are returned. During testing, the predicted boxlists are returned
39 |             losses (dict[Tensor]): During training, returns the losses for the
40 |                 head. During testing, returns an empty dict.
41 |         """
42 |         if self.training:
43 |             # Faster R-CNN subsamples during training the proposals with a fixed
44 |             # positive / negative ratio
45 |             with torch.no_grad():
46 |                 proposals = self.loss_evaluator.subsample(proposals, targets)
47 | 
48 |         # extract features that will be fed to the final classifier. The
49 |         # feature_extractor generally corresponds to the pooler + heads
50 |         x = self.feature_extractor(features, proposals)
51 |         # final classifier that converts the features into predictions
52 |         class_logits, box_regression = self.predictor(x)
53 | 
54 |         if not self.training:
55 |             result = self.post_processor((class_logits, box_regression), proposals)
56 |             return x, result, {}, {}
57 | 
58 |         loss_classifier, loss_box_reg, accuracy_cls = self.loss_evaluator(
59 |             [class_logits], [box_regression]
60 |         )
61 |     
62 |         return (
63 |             x,
64 |             proposals,
65 |             dict(loss_classifier=loss_classifier, loss_box_reg=loss_box_reg),
66 |             dict(accuracy_cls=accuracy_cls)
67 |         )
68 | 
69 | 
70 | def build_roi_box_head(cfg, in_channels):
71 |     """
72 |     Constructs a new box head.
73 |     By default, uses ROIBoxHead, but if it turns out not to be enough, just register a new class
74 |     and make it a parameter in the config
75 |     """
76 |     return ROIBoxHead(cfg, in_channels)
77 | 


--------------------------------------------------------------------------------
/wetectron/modeling/backbone/backbone.py:
--------------------------------------------------------------------------------
 1 | #------------------------------------------------------------------------------
 2 | # Code taken from https://github.com/NVlabs/wetectron
 3 | #------------------------------------------------------------------------------
 4 | 
 5 | # --------------------------------------------------------
 6 | # Copyright (C) 2020 NVIDIA Corporation. All rights reserved.
 7 | # Nvidia Source Code License-NC
 8 | # --------------------------------------------------------
 9 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
10 | from collections import OrderedDict
11 | 
12 | from torch import nn
13 | 
14 | from wetectron.modeling import registry
15 | from wetectron.modeling.make_layers import conv_with_kaiming_uniform
16 | from . import fpn as fpn_module
17 | from . import resnet
18 | 
19 | 
20 | @registry.BACKBONES.register("R-50-C4")
21 | @registry.BACKBONES.register("R-50-C5")
22 | @registry.BACKBONES.register("R-101-C4")
23 | @registry.BACKBONES.register("R-101-C5")
24 | def build_resnet_backbone(cfg):
25 |     body = resnet.ResNet(cfg)
26 |     model = nn.Sequential(OrderedDict([("body", body)]))
27 |     model.out_channels = cfg.MODEL.RESNETS.BACKBONE_OUT_CHANNELS
28 |     return model
29 | 
30 | 
31 | @registry.BACKBONES.register("R-50-FPN")
32 | @registry.BACKBONES.register("R-101-FPN")
33 | @registry.BACKBONES.register("R-152-FPN")
34 | def build_resnet_fpn_backbone(cfg):
35 |     body = resnet.ResNet(cfg)
36 |     in_channels_stage2 = cfg.MODEL.RESNETS.RES2_OUT_CHANNELS
37 |     out_channels = cfg.MODEL.RESNETS.BACKBONE_OUT_CHANNELS
38 |     fpn = fpn_module.FPN(
39 |         in_channels_list=[
40 |             in_channels_stage2,
41 |             in_channels_stage2 * 2,
42 |             in_channels_stage2 * 4,
43 |             in_channels_stage2 * 8,
44 |         ],
45 |         out_channels=out_channels,
46 |         conv_block=conv_with_kaiming_uniform(
47 |             cfg.MODEL.FPN.USE_GN, cfg.MODEL.FPN.USE_RELU
48 |         ),
49 |         top_blocks=fpn_module.LastLevelMaxPool(),
50 |     )
51 |     model = nn.Sequential(OrderedDict([("body", body), ("fpn", fpn)]))
52 |     model.out_channels = out_channels
53 |     return model
54 | 
55 | 
56 | @registry.BACKBONES.register("R-50-FPN-RETINANET")
57 | @registry.BACKBONES.register("R-101-FPN-RETINANET")
58 | def build_resnet_fpn_p3p7_backbone(cfg):
59 |     body = resnet.ResNet(cfg)
60 |     in_channels_stage2 = cfg.MODEL.RESNETS.RES2_OUT_CHANNELS
61 |     out_channels = cfg.MODEL.RESNETS.BACKBONE_OUT_CHANNELS
62 |     in_channels_p6p7 = in_channels_stage2 * 8 if cfg.MODEL.RETINANET.USE_C5 \
63 |         else out_channels
64 |     fpn = fpn_module.FPN(
65 |         in_channels_list=[
66 |             0,
67 |             in_channels_stage2 * 2,
68 |             in_channels_stage2 * 4,
69 |             in_channels_stage2 * 8,
70 |         ],
71 |         out_channels=out_channels,
72 |         conv_block=conv_with_kaiming_uniform(
73 |             cfg.MODEL.FPN.USE_GN, cfg.MODEL.FPN.USE_RELU
74 |         ),
75 |         top_blocks=fpn_module.LastLevelP6P7(in_channels_p6p7, out_channels),
76 |     )
77 |     model = nn.Sequential(OrderedDict([("body", body), ("fpn", fpn)]))
78 |     model.out_channels = out_channels
79 |     return model
80 | 
81 | 
82 | def build_backbone(cfg):
83 |     assert cfg.MODEL.BACKBONE.CONV_BODY in registry.BACKBONES, \
84 |         "cfg.MODEL.BACKBONE.CONV_BODY: {} are not registered in registry".format(
85 |             cfg.MODEL.BACKBONE.CONV_BODY
86 |         )
87 |     return registry.BACKBONES[cfg.MODEL.BACKBONE.CONV_BODY](cfg)
88 | 


--------------------------------------------------------------------------------
/wetectron/utils/miscellaneous.py:
--------------------------------------------------------------------------------
 1 | #------------------------------------------------------------------------------
 2 | # Code adapted from https://github.com/NVlabs/wetectron
 3 | # by Huy V. Vo and Oriane Simeoni
 4 | # INRIA, Valeo.ai
 5 | #------------------------------------------------------------------------------
 6 | 
 7 | # --------------------------------------------------------
 8 | # Copyright (C) 2020 NVIDIA Corporation. All rights reserved.
 9 | # Nvidia Source Code License-NC
10 | # --------------------------------------------------------
11 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
12 | import errno
13 | import json
14 | import logging
15 | import os
16 | import shutil
17 | import socket
18 | import datetime
19 | from .comm import is_main_process
20 | from datetime import datetime
21 | import random
22 | import numpy as np
23 | import torch
24 | from pathlib import Path
25 | 
26 | def get_run_name():
27 |     """ A unique name for each run """
28 |     return datetime.now().strftime(
29 |         '%b%d_%H_%M_%S') + '_' + socket.gethostname()
30 | 
31 | def copy_source_code(output_dir):
32 |     os.makedirs(output_dir)
33 |     p = Path(__file__).parents[2]
34 |     checklist = ['apex', 'build', 'configs', 'setup.py', 
35 |                  'tools', 'wetectron', 'wetectron.egg-info']
36 |     except_list = ['LICENSE', 'outputs', 'README.md',
37 |                    'datasets', 'docs', 'proposal', 'notebooks']
38 |     Fs = os.listdir(p)
39 |     assert(set(checklist).issubset(set(Fs)))
40 |     to_copy = [el for el in Fs if el not in except_list]
41 |     # return 
42 |     for f in to_copy:
43 |         if Path(p,f).is_dir():
44 |             shutil.copytree(Path(p,f), Path(output_dir,f), symlinks=True)
45 |         else:
46 |             shutil.copyfile(Path(p,f), Path(output_dir,f), follow_symlinks=False)
47 | 
48 | def seed_all_rng(seed=None):
49 |     """
50 |     Set the random seed for the RNG in torch, numpy and python.
51 |     Args:
52 |         seed (int): if None, will use a strong random seed.
53 |     """
54 |     if seed is None:
55 |         seed = (
56 |             os.getpid()
57 |             + int(datetime.now().strftime("%S%f"))
58 |             + int.from_bytes(os.urandom(2), "big")
59 |         )
60 |         logger = logging.getLogger(__name__)
61 |         logger.info("Using a generated random seed {}".format(seed))
62 |     np.random.seed(seed)
63 |     torch.set_rng_state(torch.manual_seed(seed).get_state())
64 |     random.seed(seed)
65 | 
66 | def mkdir(path):
67 |     try:
68 |         os.makedirs(path)
69 |     except OSError as e:
70 |         if e.errno != errno.EEXIST:
71 |             raise
72 | 
73 | 
74 | def save_labels(dataset_list, output_dir):
75 |     if is_main_process():
76 |         logger = logging.getLogger(__name__)
77 | 
78 |         ids_to_labels = {}
79 |         for dataset in dataset_list:
80 |             if hasattr(dataset, 'categories'):
81 |                 ids_to_labels.update(dataset.categories)
82 |             else:
83 |                 logger.warning("Dataset [{}] has no categories attribute, labels.json file won't be created".format(
84 |                     dataset.__class__.__name__))
85 | 
86 |         if ids_to_labels:
87 |             labels_file = os.path.join(output_dir, 'labels.json')
88 |             logger.info("Saving labels mapping into {}".format(labels_file))
89 |             with open(labels_file, 'w') as f:
90 |                 json.dump(ids_to_labels, f, indent=2)
91 | 
92 | 
93 | def save_config(cfg, path):
94 |     if is_main_process():
95 |         with open(path, 'w') as f:
96 |             f.write(cfg.dump())
97 | 


--------------------------------------------------------------------------------
/wetectron/modeling/roi_heads/mask_head/mask_head.py:
--------------------------------------------------------------------------------
 1 | #------------------------------------------------------------------------------
 2 | # Code taken from https://github.com/NVlabs/wetectron
 3 | #------------------------------------------------------------------------------
 4 | 
 5 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 6 | import torch
 7 | from torch import nn
 8 | 
 9 | from wetectron.structures.bounding_box import BoxList
10 | 
11 | from .roi_mask_feature_extractors import make_roi_mask_feature_extractor
12 | from .roi_mask_predictors import make_roi_mask_predictor
13 | from .inference import make_roi_mask_post_processor
14 | from .loss import make_roi_mask_loss_evaluator
15 | 
16 | 
17 | def keep_only_positive_boxes(boxes):
18 |     """
19 |     Given a set of BoxList containing the `labels` field,
20 |     return a set of BoxList for which `labels > 0`.
21 | 
22 |     Arguments:
23 |         boxes (list of BoxList)
24 |     """
25 |     assert isinstance(boxes, (list, tuple))
26 |     assert isinstance(boxes[0], BoxList)
27 |     assert boxes[0].has_field("labels")
28 |     positive_boxes = []
29 |     positive_inds = []
30 |     num_boxes = 0
31 |     for boxes_per_image in boxes:
32 |         labels = boxes_per_image.get_field("labels")
33 |         inds_mask = labels > 0
34 |         inds = inds_mask.nonzero().squeeze(1)
35 |         positive_boxes.append(boxes_per_image[inds])
36 |         positive_inds.append(inds_mask)
37 |     return positive_boxes, positive_inds
38 | 
39 | 
40 | class ROIMaskHead(torch.nn.Module):
41 |     def __init__(self, cfg, in_channels):
42 |         super(ROIMaskHead, self).__init__()
43 |         self.cfg = cfg.clone()
44 |         self.feature_extractor = make_roi_mask_feature_extractor(cfg, in_channels)
45 |         self.predictor = make_roi_mask_predictor(
46 |             cfg, self.feature_extractor.out_channels)
47 |         self.post_processor = make_roi_mask_post_processor(cfg)
48 |         self.loss_evaluator = make_roi_mask_loss_evaluator(cfg)
49 | 
50 |     def forward(self, features, proposals, targets=None):
51 |         """
52 |         Arguments:
53 |             features (list[Tensor]): feature-maps from possibly several levels
54 |             proposals (list[BoxList]): proposal boxes
55 |             targets (list[BoxList], optional): the ground-truth targets.
56 | 
57 |         Returns:
58 |             x (Tensor): the result of the feature extractor
59 |             proposals (list[BoxList]): during training, the original proposals
60 |                 are returned. During testing, the predicted boxlists are returned
61 |                 with the `mask` field set
62 |             losses (dict[Tensor]): During training, returns the losses for the
63 |                 head. During testing, returns an empty dict.
64 |         """
65 | 
66 |         if self.training:
67 |             # during training, only focus on positive boxes
68 |             all_proposals = proposals
69 |             proposals, positive_inds = keep_only_positive_boxes(proposals)
70 |         if self.training and self.cfg.MODEL.ROI_MASK_HEAD.SHARE_BOX_FEATURE_EXTRACTOR:
71 |             x = features
72 |             x = x[torch.cat(positive_inds, dim=0)]
73 |         else:
74 |             x = self.feature_extractor(features, proposals)
75 |         mask_logits = self.predictor(x)
76 | 
77 |         if not self.training:
78 |             result = self.post_processor(mask_logits, proposals)
79 |             return x, result, {}
80 | 
81 |         loss_mask = self.loss_evaluator(proposals, mask_logits, targets)
82 | 
83 |         return x, all_proposals, dict(loss_mask=loss_mask)
84 | 
85 | 
86 | def build_roi_mask_head(cfg, in_channels):
87 |     return ROIMaskHead(cfg, in_channels)
88 | 


--------------------------------------------------------------------------------
/wetectron/utils/model_zoo.py:
--------------------------------------------------------------------------------
 1 | #------------------------------------------------------------------------------
 2 | # Code taken from https://github.com/NVlabs/wetectron
 3 | #------------------------------------------------------------------------------
 4 | 
 5 | # --------------------------------------------------------
 6 | # Copyright (C) 2020 NVIDIA Corporation. All rights reserved.
 7 | # Nvidia Source Code License-NC
 8 | # --------------------------------------------------------
 9 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
10 | import os
11 | import sys
12 | 
13 | try:
14 |     from torch.hub import _download_url_to_file
15 |     from torch.hub import urlparse
16 |     from torch.hub import HASH_REGEX
17 | except ImportError:
18 |     from torch.utils.model_zoo import _download_url_to_file
19 |     from torch.utils.model_zoo import urlparse
20 |     from torch.utils.model_zoo import HASH_REGEX
21 | 
22 | from wetectron.utils.comm import is_main_process
23 | from wetectron.utils.comm import synchronize
24 | 
25 | 
26 | # very similar to https://github.com/pytorch/pytorch/blob/master/torch/utils/model_zoo.py
27 | # but with a few improvements and modifications
28 | def cache_url(url, model_dir=None, progress=True):
29 |     r"""Loads the Torch serialized object at the given URL.
30 |     If the object is already present in `model_dir`, it's deserialized and
31 |     returned. The filename part of the URL should follow the naming convention
32 |     ``filename-<sha256>.ext`` where ``<sha256>`` is the first eight or more
33 |     digits of the SHA256 hash of the contents of the file. The hash is used to
34 |     ensure unique names and to verify the contents of the file.
35 |     The default value of `model_dir` is ``$TORCH_HOME/models`` where
36 |     ``$TORCH_HOME`` defaults to ``~/.torch``. The default directory can be
37 |     overridden with the ``$TORCH_MODEL_ZOO`` environment variable.
38 |     Args:
39 |         url (string): URL of the object to download
40 |         model_dir (string, optional): directory in which to save the object
41 |         progress (bool, optional): whether or not to display a progress bar to stderr
42 |     Example:
43 |         >>> cached_file = wetectron.utils.model_zoo.cache_url('https://s3.amazonaws.com/pytorch/models/resnet18-5c106cde.pth')
44 |     """
45 |     if model_dir is None:
46 |         torch_home = os.path.expanduser(os.getenv("TORCH_HOME", "~/.torch"))
47 |         model_dir = os.getenv("TORCH_MODEL_ZOO", os.path.join(torch_home, "models"))
48 |     if not os.path.exists(model_dir):
49 |         os.makedirs(model_dir)
50 |     parts = urlparse(url)
51 |     filename = os.path.basename(parts.path)
52 |     if filename == "model_final.pkl" or filename == "rpn_proposals.pkl":
53 |         # workaround as pre-trained Caffe2 models from Detectron have all the same filename
54 |         # so make the full path the filename by replacing / with _
55 |         filename = parts.path.replace("/", "_")
56 |     cached_file = os.path.join(model_dir, filename)
57 |     if not os.path.exists(cached_file) and is_main_process():
58 |         sys.stderr.write('Downloading: "{}" to {}\n'.format(url, cached_file))
59 |         hash_prefix = HASH_REGEX.search(filename)
60 |         if hash_prefix is not None:
61 |             hash_prefix = hash_prefix.group(1)
62 |             # workaround: Caffe2 models don't have a hash, but follow the R-50 convention,
63 |             # which matches the hash PyTorch uses. So we skip the hash matching
64 |             # if the hash_prefix is less than 6 characters
65 |             if len(hash_prefix) < 6:
66 |                 hash_prefix = None
67 |         _download_url_to_file(url, cached_file, hash_prefix, progress=progress)
68 |     synchronize()
69 |     return cached_file
70 | 


--------------------------------------------------------------------------------
/wetectron/modeling/balanced_positive_negative_sampler.py:
--------------------------------------------------------------------------------
 1 | #------------------------------------------------------------------------------
 2 | # Code adapted from https://github.com/NVlabs/wetectron
 3 | # by Huy V. Vo and Oriane Simeoni
 4 | # INRIA, Valeo.ai
 5 | #------------------------------------------------------------------------------
 6 | 
 7 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 8 | import torch
 9 | 
10 | 
11 | class BalancedPositiveNegativeSampler(object):
12 |     """
13 |     This class samples batches, ensuring that they contain a fixed proportion of positives
14 |     """
15 | 
16 |     def __init__(self, batch_size_per_image, positive_fraction):
17 |         """
18 |         Arguments:
19 |             batch_size_per_image (int): number of elements to be selected per image
20 |             positive_fraction (float): percentace of positive elements per batch
21 |         """
22 |         self.batch_size_per_image = batch_size_per_image
23 |         self.positive_fraction = positive_fraction
24 | 
25 |     def __call__(self, matched_idxs, sampling_weights=None):
26 |         """
27 |         Arguments:
28 |             matched idxs: list of tensors containing -1, 0 or positive values.
29 |                 Each tensor corresponds to a specific image.
30 |                 -1 values are ignored, 0 are considered as negatives and > 0 as
31 |                 positives.
32 |             sampling_weights: list of tensors or None, containing sampling weights of proposals.
33 |                 The weights are only used in negative sampling.
34 | 
35 |         Returns:
36 |             pos_idx (list[tensor])
37 |             neg_idx (list[tensor])
38 | 
39 |         Returns two lists of binary masks for each image.
40 |         The first list contains the positive elements that were selected,
41 |         and the second list the negative example.
42 |         """
43 |         pos_idx = []
44 |         neg_idx = []
45 |         if sampling_weights is None:
46 |             sampling_weights = [None] * len(matched_idxs)
47 |         for matched_idxs_per_image, sampling_weights_per_image in zip(matched_idxs, sampling_weights):
48 |             positive = torch.nonzero(matched_idxs_per_image >= 1, as_tuple=False).squeeze(1)
49 |             negative = torch.nonzero(matched_idxs_per_image == 0, as_tuple=False).squeeze(1)
50 | 
51 |             num_pos = int(self.batch_size_per_image * self.positive_fraction)
52 |             # protect against not enough positive examples
53 |             num_pos = min(positive.numel(), num_pos)
54 |             num_neg = self.batch_size_per_image - num_pos
55 |             # protect against not enough negative examples
56 |             num_neg = min(negative.numel(), num_neg)
57 | 
58 |             # randomly select positive and negative examples
59 |             perm1 = torch.randperm(positive.numel(), device=positive.device)[:num_pos]
60 |             if sampling_weights_per_image is None:
61 |                 perm2 = torch.randperm(negative.numel(), device=negative.device)[:num_neg]
62 |             else:
63 |                 perm2 = torch.argsort(sampling_weights_per_image[negative], descending=True)[:num_neg]
64 |             
65 |             pos_idx_per_image = positive[perm1]
66 |             neg_idx_per_image = negative[perm2]
67 | 
68 |             # create binary mask from indices
69 |             pos_idx_per_image_mask = torch.zeros_like(
70 |                 matched_idxs_per_image, dtype=torch.bool
71 |             )
72 |             neg_idx_per_image_mask = torch.zeros_like(
73 |                 matched_idxs_per_image, dtype=torch.bool
74 |             )
75 |             pos_idx_per_image_mask[pos_idx_per_image] = 1
76 |             neg_idx_per_image_mask[neg_idx_per_image] = 1
77 | 
78 |             pos_idx.append(pos_idx_per_image_mask)
79 |             neg_idx.append(neg_idx_per_image_mask)
80 | 
81 |         return pos_idx, neg_idx
82 | 


--------------------------------------------------------------------------------
/wetectron/modeling/box_coder.py:
--------------------------------------------------------------------------------
  1 | #------------------------------------------------------------------------------
  2 | # Code taken from https://github.com/NVlabs/wetectron
  3 | #------------------------------------------------------------------------------
  4 | 
  5 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
  6 | import math
  7 | 
  8 | import torch
  9 | 
 10 | 
 11 | class BoxCoder(object):
 12 |     """
 13 |     This class encodes and decodes a set of bounding boxes into
 14 |     the representation used for training the regressors.
 15 |     """
 16 | 
 17 |     def __init__(self, weights, bbox_xform_clip=math.log(1000. / 16)):
 18 |         """
 19 |         Arguments:
 20 |             weights (4-element tuple)
 21 |             bbox_xform_clip (float)
 22 |         """
 23 |         self.weights = weights
 24 |         self.bbox_xform_clip = bbox_xform_clip
 25 | 
 26 |     def encode(self, reference_boxes, proposals):
 27 |         """
 28 |         Encode a set of proposals with respect to some
 29 |         reference boxes
 30 | 
 31 |         Arguments:
 32 |             reference_boxes (Tensor): reference boxes
 33 |             proposals (Tensor): boxes to be encoded
 34 |         """
 35 | 
 36 |         TO_REMOVE = 1  # TODO remove
 37 |         ex_widths = proposals[:, 2] - proposals[:, 0] + TO_REMOVE
 38 |         ex_heights = proposals[:, 3] - proposals[:, 1] + TO_REMOVE
 39 |         ex_ctr_x = proposals[:, 0] + 0.5 * ex_widths
 40 |         ex_ctr_y = proposals[:, 1] + 0.5 * ex_heights
 41 | 
 42 |         gt_widths = reference_boxes[:, 2] - reference_boxes[:, 0] + TO_REMOVE
 43 |         gt_heights = reference_boxes[:, 3] - reference_boxes[:, 1] + TO_REMOVE
 44 |         gt_ctr_x = reference_boxes[:, 0] + 0.5 * gt_widths
 45 |         gt_ctr_y = reference_boxes[:, 1] + 0.5 * gt_heights
 46 | 
 47 |         wx, wy, ww, wh = self.weights
 48 |         targets_dx = wx * (gt_ctr_x - ex_ctr_x) / ex_widths
 49 |         targets_dy = wy * (gt_ctr_y - ex_ctr_y) / ex_heights
 50 |         targets_dw = ww * torch.log(gt_widths / ex_widths)
 51 |         targets_dh = wh * torch.log(gt_heights / ex_heights)
 52 | 
 53 |         targets = torch.stack((targets_dx, targets_dy, targets_dw, targets_dh), dim=1)
 54 |         return targets
 55 | 
 56 |     def decode(self, rel_codes, boxes):
 57 |         """
 58 |         From a set of original boxes and encoded relative box offsets,
 59 |         get the decoded boxes.
 60 | 
 61 |         Arguments:
 62 |             rel_codes (Tensor): encoded boxes
 63 |             boxes (Tensor): reference boxes.
 64 |         """
 65 | 
 66 |         boxes = boxes.to(rel_codes.dtype)
 67 | 
 68 |         TO_REMOVE = 1  # TODO remove
 69 |         widths = boxes[:, 2] - boxes[:, 0] + TO_REMOVE
 70 |         heights = boxes[:, 3] - boxes[:, 1] + TO_REMOVE
 71 |         ctr_x = boxes[:, 0] + 0.5 * widths
 72 |         ctr_y = boxes[:, 1] + 0.5 * heights
 73 | 
 74 |         wx, wy, ww, wh = self.weights
 75 |         dx = rel_codes[:, 0::4] / wx
 76 |         dy = rel_codes[:, 1::4] / wy
 77 |         dw = rel_codes[:, 2::4] / ww
 78 |         dh = rel_codes[:, 3::4] / wh
 79 | 
 80 |         # Prevent sending too large values into torch.exp()
 81 |         dw = torch.clamp(dw, max=self.bbox_xform_clip)
 82 |         dh = torch.clamp(dh, max=self.bbox_xform_clip)
 83 | 
 84 |         pred_ctr_x = dx * widths[:, None] + ctr_x[:, None]
 85 |         pred_ctr_y = dy * heights[:, None] + ctr_y[:, None]
 86 |         pred_w = torch.exp(dw) * widths[:, None]
 87 |         pred_h = torch.exp(dh) * heights[:, None]
 88 | 
 89 |         pred_boxes = torch.zeros_like(rel_codes)
 90 |         # x1
 91 |         pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * pred_w
 92 |         # y1
 93 |         pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * pred_h
 94 |         # x2 (note: "- 1" is correct; don't be fooled by the asymmetry)
 95 |         pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * pred_w - 1
 96 |         # y2 (note: "- 1" is correct; don't be fooled by the asymmetry)
 97 |         pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * pred_h - 1
 98 | 
 99 |         return pred_boxes
100 | 


--------------------------------------------------------------------------------
/wetectron/modeling/roi_heads/roi_heads.py:
--------------------------------------------------------------------------------
 1 | #------------------------------------------------------------------------------
 2 | # Code taken from https://github.com/NVlabs/wetectron
 3 | #------------------------------------------------------------------------------
 4 | 
 5 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 6 | import torch
 7 | 
 8 | from .box_head.box_head import build_roi_box_head
 9 | from .mask_head.mask_head import build_roi_mask_head
10 | from .keypoint_head.keypoint_head import build_roi_keypoint_head
11 | from .weak_head.weak_head import build_roi_weak_head
12 | 
13 | 
14 | class CombinedROIHeads(torch.nn.ModuleDict):
15 |     """
16 |     Combines a set of individual heads (for box prediction or masks) into a single
17 |     head.
18 |     """
19 | 
20 |     def __init__(self, cfg, heads):
21 |         super(CombinedROIHeads, self).__init__(heads)
22 |         self.cfg = cfg.clone()
23 |         if cfg.MODEL.MASK_ON and cfg.MODEL.ROI_MASK_HEAD.SHARE_BOX_FEATURE_EXTRACTOR:
24 |             self.mask.feature_extractor = self.box.feature_extractor
25 |         if cfg.MODEL.KEYPOINT_ON and cfg.MODEL.ROI_KEYPOINT_HEAD.SHARE_BOX_FEATURE_EXTRACTOR:
26 |             self.keypoint.feature_extractor = self.box.feature_extractor
27 | 
28 |     def forward(self, features, proposals, targets=None, dummy_model=None):
29 |         losses = {}
30 |         metrics = {}
31 |         # TODO rename x to roi_box_features, if it doesn't increase memory consumption
32 |         x, detections, loss_box, accuracy_cls = self.box(features, proposals, targets)
33 |         losses.update(loss_box)
34 |         metrics.update(accuracy_cls)
35 |         if self.cfg.MODEL.MASK_ON:
36 |             mask_features = features
37 |             # optimization: during training, if we share the feature extractor between
38 |             # the box and the mask heads, then we can reuse the features already computed
39 |             if (
40 |                 self.training
41 |                 and self.cfg.MODEL.ROI_MASK_HEAD.SHARE_BOX_FEATURE_EXTRACTOR
42 |             ):
43 |                 mask_features = x
44 |             # During training, self.box() will return the unaltered proposals as "detections"
45 |             # this makes the API consistent during training and testing
46 |             x, detections, loss_mask = self.mask(mask_features, detections, targets)
47 |             losses.update(loss_mask)
48 | 
49 |         if self.cfg.MODEL.KEYPOINT_ON:
50 |             keypoint_features = features
51 |             # optimization: during training, if we share the feature extractor between
52 |             # the box and the mask heads, then we can reuse the features already computed
53 |             if (
54 |                 self.training
55 |                 and self.cfg.MODEL.ROI_KEYPOINT_HEAD.SHARE_BOX_FEATURE_EXTRACTOR
56 |             ):
57 |                 keypoint_features = x
58 |             # During training, self.box() will return the unaltered proposals as "detections"
59 |             # this makes the API consistent during training and testing
60 |             x, detections, loss_keypoint = self.keypoint(keypoint_features, detections, targets)
61 |             losses.update(loss_keypoint)
62 |         return x, detections, losses, metrics
63 | 
64 | 
65 | def build_roi_heads(cfg, in_channels):
66 |     # individually create the heads, that will be combined together
67 |     # afterwards
68 |     roi_heads = []
69 |     if cfg.MODEL.RETINANET_ON:
70 |         return []
71 |     
72 |     if cfg.MODEL.WSOD_ON:
73 |         return build_roi_weak_head(cfg, in_channels)
74 | 
75 |     if not cfg.MODEL.RPN_ONLY:
76 |         roi_heads.append(("box", build_roi_box_head(cfg, in_channels)))
77 | 
78 |     if cfg.MODEL.MASK_ON:
79 |         roi_heads.append(("mask", build_roi_mask_head(cfg, in_channels)))
80 |         
81 |     if cfg.MODEL.KEYPOINT_ON:
82 |         roi_heads.append(("keypoint", build_roi_keypoint_head(cfg, in_channels)))
83 | 
84 |     # combine individual heads in a single module
85 |     if roi_heads:
86 |         roi_heads = CombinedROIHeads(cfg, roi_heads)
87 | 
88 |     return roi_heads
89 | 


--------------------------------------------------------------------------------
/wetectron/data/samplers/distributed.py:
--------------------------------------------------------------------------------
 1 | #------------------------------------------------------------------------------
 2 | # Code adapted from https://github.com/NVlabs/wetectron
 3 | # by Huy V. Vo and Oriane Simeoni
 4 | # INRIA, Valeo.ai
 5 | #------------------------------------------------------------------------------
 6 | 
 7 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 8 | # Code is copy-pasted exactly as in torch.utils.data.distributed.
 9 | # FIXME remove this once c10d fixes the bug it has
10 | import math
11 | import torch
12 | import torch.distributed as dist
13 | from torch.utils.data.sampler import Sampler
14 | 
15 | 
16 | class DistributedSampler(Sampler):
17 |     """
18 |     Sampler that restricts data loading to a subset of the dataset.
19 |     It is especially useful in conjunction with
20 |     :class:`torch.nn.parallel.DistributedDataParallel`. In such case, each
21 |     process can pass a DistributedSampler instance as a DataLoader sampler,
22 |     and load a subset of the original dataset that is exclusive to it.
23 |     .. note::
24 |         Dataset is assumed to be of constant size.
25 |     Arguments:
26 |         dataset: Dataset used for sampling.
27 |         num_replicas (optional): Number of processes participating in
28 |             distributed training.
29 |         rank (optional): Rank of the current process within num_replicas.
30 |     """
31 | 
32 |     def __init__(self, dataset, num_replicas=None, rank=None, shuffle=True):
33 |         if num_replicas is None:
34 |             if not dist.is_available():
35 |                 raise RuntimeError("Requires distributed package to be available")
36 |             num_replicas = dist.get_world_size()
37 |         if rank is None:
38 |             if not dist.is_available():
39 |                 raise RuntimeError("Requires distributed package to be available")
40 |             rank = dist.get_rank()
41 |         self.dataset = dataset
42 |         self.num_replicas = num_replicas
43 |         self.rank = rank
44 |         self.epoch = 0
45 |         self.num_samples = int(math.ceil(len(self.dataset) * 1.0 / self.num_replicas))
46 |         self.total_size = self.num_samples * self.num_replicas
47 |         self.shuffle = shuffle
48 | 
49 |     def __iter__(self):
50 |         if self.shuffle:
51 |             # deterministically shuffle based on epoch
52 |             g = torch.Generator()
53 |             g.manual_seed(self.epoch)
54 |             indices = torch.randperm(len(self.dataset), generator=g).tolist()
55 |         else:
56 |             indices = torch.arange(len(self.dataset)).tolist()
57 | 
58 |         # add extra samples to make it evenly divisible
59 |         indices += indices[: (self.total_size - len(indices))]
60 |         assert len(indices) == self.total_size
61 | 
62 |         # subsample
63 |         offset = self.num_samples * self.rank
64 |         indices = indices[offset : offset + self.num_samples]
65 |         assert len(indices) == self.num_samples
66 | 
67 |         return iter(indices)
68 | 
69 |     def __len__(self):
70 |         return self.num_samples
71 | 
72 |     def set_epoch(self, epoch):
73 |         self.epoch = epoch
74 | 
75 | class WeightedDistributedSampler(DistributedSampler):
76 |     """
77 |     A modified version of DistributedSampler to take into account the diffierent
78 |     sampling weights of datapoints.
79 |     """
80 |     def __init__(self, dataset, weights, num_replicas=None, rank=None, shuffle=True):
81 |         super(WeightedDistributedSampler, self).__init__(
82 |                 dataset, num_replicas=None, rank=None, shuffle=True
83 |             )
84 |         self.weights = torch.as_tensor(weights, dtype=torch.double)
85 | 
86 |     def __iter__(self):
87 |         indices = torch.multinomial(self.weights, len(self.dataset), True).tolist()
88 | 
89 |         # add extra samples to make it evenly divisible
90 |         indices += indices[: (self.total_size - len(indices))]
91 |         assert len(indices) == self.total_size
92 | 
93 |         # subsample
94 |         offset = self.num_samples * self.rank
95 |         indices = indices[offset : offset + self.num_samples]
96 |         assert len(indices) == self.num_samples
97 | 
98 |         return iter(indices)
99 | 


--------------------------------------------------------------------------------
/wetectron/modeling/rpn/retinanet/loss.py:
--------------------------------------------------------------------------------
  1 | #------------------------------------------------------------------------------
  2 | # Code taken from https://github.com/NVlabs/wetectron
  3 | #------------------------------------------------------------------------------
  4 | 
  5 | """
  6 | This file contains specific functions for computing losses on the RetinaNet
  7 | file
  8 | """
  9 | 
 10 | import torch
 11 | from torch.nn import functional as F
 12 | 
 13 | from ..utils import concat_box_prediction_layers
 14 | 
 15 | from wetectron.layers import smooth_l1_loss
 16 | from wetectron.layers import SigmoidFocalLoss
 17 | from wetectron.modeling.matcher import Matcher
 18 | from wetectron.modeling.utils import cat
 19 | from wetectron.structures.boxlist_ops import boxlist_iou
 20 | from wetectron.structures.boxlist_ops import cat_boxlist
 21 | from wetectron.modeling.rpn.loss import RPNLossComputation
 22 | 
 23 | class RetinaNetLossComputation(RPNLossComputation):
 24 |     """
 25 |     This class computes the RetinaNet loss.
 26 |     """
 27 | 
 28 |     def __init__(self, proposal_matcher, box_coder,
 29 |                  generate_labels_func,
 30 |                  sigmoid_focal_loss,
 31 |                  bbox_reg_beta=0.11,
 32 |                  regress_norm=1.0):
 33 |         """
 34 |         Arguments:
 35 |             proposal_matcher (Matcher)
 36 |             box_coder (BoxCoder)
 37 |         """
 38 |         self.proposal_matcher = proposal_matcher
 39 |         self.box_coder = box_coder
 40 |         self.box_cls_loss_func = sigmoid_focal_loss
 41 |         self.bbox_reg_beta = bbox_reg_beta
 42 |         self.copied_fields = ['labels']
 43 |         self.generate_labels_func = generate_labels_func
 44 |         self.discard_cases = ['between_thresholds']
 45 |         self.regress_norm = regress_norm
 46 | 
 47 |     def __call__(self, anchors, box_cls, box_regression, targets):
 48 |         """
 49 |         Arguments:
 50 |             anchors (list[BoxList])
 51 |             box_cls (list[Tensor])
 52 |             box_regression (list[Tensor])
 53 |             targets (list[BoxList])
 54 | 
 55 |         Returns:
 56 |             retinanet_cls_loss (Tensor)
 57 |             retinanet_regression_loss (Tensor
 58 |         """
 59 |         anchors = [cat_boxlist(anchors_per_image) for anchors_per_image in anchors]
 60 |         labels, regression_targets = self.prepare_targets(anchors, targets)
 61 | 
 62 |         N = len(labels)
 63 |         box_cls, box_regression = \
 64 |                 concat_box_prediction_layers(box_cls, box_regression)
 65 | 
 66 |         labels = torch.cat(labels, dim=0)
 67 |         regression_targets = torch.cat(regression_targets, dim=0)
 68 |         pos_inds = torch.nonzero(labels > 0).squeeze(1)
 69 | 
 70 |         retinanet_regression_loss = smooth_l1_loss(
 71 |             box_regression[pos_inds],
 72 |             regression_targets[pos_inds],
 73 |             beta=self.bbox_reg_beta,
 74 |             size_average=False,
 75 |         ) / (max(1, pos_inds.numel() * self.regress_norm))
 76 | 
 77 |         labels = labels.int()
 78 | 
 79 |         retinanet_cls_loss = self.box_cls_loss_func(
 80 |             box_cls,
 81 |             labels
 82 |         ) / (pos_inds.numel() + N)
 83 | 
 84 |         return retinanet_cls_loss, retinanet_regression_loss
 85 | 
 86 | 
 87 | def generate_retinanet_labels(matched_targets):
 88 |     labels_per_image = matched_targets.get_field("labels")
 89 |     return labels_per_image
 90 | 
 91 | 
 92 | def make_retinanet_loss_evaluator(cfg, box_coder):
 93 |     matcher = Matcher(
 94 |         cfg.MODEL.RETINANET.FG_IOU_THRESHOLD,
 95 |         cfg.MODEL.RETINANET.BG_IOU_THRESHOLD,
 96 |         allow_low_quality_matches=True,
 97 |     )
 98 |     sigmoid_focal_loss = SigmoidFocalLoss(
 99 |         cfg.MODEL.RETINANET.LOSS_GAMMA,
100 |         cfg.MODEL.RETINANET.LOSS_ALPHA
101 |     )
102 | 
103 |     loss_evaluator = RetinaNetLossComputation(
104 |         matcher,
105 |         box_coder,
106 |         generate_retinanet_labels,
107 |         sigmoid_focal_loss,
108 |         bbox_reg_beta = cfg.MODEL.RETINANET.BBOX_REG_BETA,
109 |         regress_norm = cfg.MODEL.RETINANET.BBOX_REG_WEIGHT,
110 |     )
111 |     return loss_evaluator
112 | 


--------------------------------------------------------------------------------
/wetectron/utils/model_serialization.py:
--------------------------------------------------------------------------------
 1 | #------------------------------------------------------------------------------
 2 | # Code taken from https://github.com/NVlabs/wetectron
 3 | #------------------------------------------------------------------------------
 4 | 
 5 | # --------------------------------------------------------
 6 | # Copyright (C) 2020 NVIDIA Corporation. All rights reserved.
 7 | # Nvidia Source Code License-NC
 8 | # --------------------------------------------------------
 9 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
10 | from collections import OrderedDict
11 | import logging
12 | import torch
13 | 
14 | from wetectron.utils.imports import import_file
15 | 
16 | 
17 | def align_and_update_state_dicts(model_state_dict, loaded_state_dict):
18 |     """
19 |     Strategy: suppose that the models that we will create will have prefixes appended
20 |     to each of its keys, for example due to an extra level of nesting that the original
21 |     pre-trained weights from ImageNet won't contain. For example, model.state_dict()
22 |     might return backbone[0].body.res2.conv1.weight, while the pre-trained model contains
23 |     res2.conv1.weight. We thus want to match both parameters together.
24 |     For that, we look for each model weight, look among all loaded keys if there is one
25 |     that is a suffix of the current weight name, and use it if that's the case.
26 |     If multiple matches exist, take the one with longest size
27 |     of the corresponding name. For example, for the same model as before, the pretrained
28 |     weight file can contain both res2.conv1.weight, as well as conv1.weight. In this case,
29 |     we want to match backbone[0].body.conv1.weight to conv1.weight, and
30 |     backbone[0].body.res2.conv1.weight to res2.conv1.weight.
31 |     """
32 |     current_keys = sorted(list(model_state_dict.keys()))
33 |     loaded_keys = sorted(list(loaded_state_dict.keys()))
34 |     # get a matrix of string matches, where each (i, j) entry correspond to the size of the
35 |     # loaded_key string, if it matches
36 |     match_matrix = [
37 |         len(j) if i.endswith(j) else 0 for i in current_keys for j in loaded_keys
38 |     ]
39 |     match_matrix = torch.as_tensor(match_matrix).view(
40 |         len(current_keys), len(loaded_keys)
41 |     )
42 |     max_match_size, idxs = match_matrix.max(1)
43 |     # remove indices that correspond to no-match
44 |     idxs[max_match_size == 0] = -1
45 | 
46 |     # used for logging
47 |     max_size = max([len(key) for key in current_keys]) if current_keys else 1
48 |     max_size_loaded = max([len(key) for key in loaded_keys]) if loaded_keys else 1
49 |     log_str_template = "{: <{}} loaded from {: <{}} of shape {}"
50 |     logger = logging.getLogger(__name__)
51 |     for idx_new, idx_old in enumerate(idxs.tolist()):
52 |         if idx_old == -1:
53 |             continue
54 |         key = current_keys[idx_new]
55 |         key_old = loaded_keys[idx_old]
56 |         model_state_dict[key] = loaded_state_dict[key_old]
57 |         logger.info(
58 |             log_str_template.format(
59 |                 key,
60 |                 max_size,
61 |                 key_old,
62 |                 max_size_loaded,
63 |                 tuple(loaded_state_dict[key_old].shape),
64 |             )
65 |         )
66 | 
67 | 
68 | def strip_prefix_if_present(state_dict, prefix):
69 |     keys = sorted(state_dict.keys())
70 |     if not all(key.startswith(prefix) for key in keys):
71 |         return state_dict
72 |     stripped_state_dict = OrderedDict()
73 |     for key, value in state_dict.items():
74 |         stripped_state_dict[key.replace(prefix, "")] = value
75 |     return stripped_state_dict
76 | 
77 | 
78 | def load_state_dict(model, loaded_state_dict):
79 |     model_state_dict = model.state_dict()
80 |     # if the state_dict comes from a model that was wrapped in a
81 |     # DataParallel or DistributedDataParallel during serialization,
82 |     # remove the "module" prefix before performing the matching
83 |     loaded_state_dict = strip_prefix_if_present(loaded_state_dict, prefix="module.")
84 |     align_and_update_state_dicts(model_state_dict, loaded_state_dict)
85 | 
86 |     # use strict loading
87 |     model.load_state_dict(model_state_dict)
88 | 


--------------------------------------------------------------------------------
/wetectron/csrc/cuda/deform_pool_cuda.cu:
--------------------------------------------------------------------------------
 1 | //------------------------------------------------------------------------------
 2 | // Code taken from https://github.com/NVlabs/wetectron
 3 | //------------------------------------------------------------------------------
 4 | 
 5 | // modify from
 6 | // https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/blob/mmdetection/mmdet/ops/dcn/src/modulated_dcn_cuda.c
 7 | 
 8 | // based on
 9 | // author: Charles Shang
10 | // https://github.com/torch/cunn/blob/master/lib/THCUNN/generic/SpatialConvolutionMM.cu
11 | 
12 | #include <ATen/ATen.h>
13 | #include <ATen/cuda/CUDAContext.h>
14 | 
15 | #include <THC/THC.h>
16 | #include <THC/THCDeviceUtils.cuh>
17 | 
18 | #include <vector>
19 | #include <iostream>
20 | #include <cmath>
21 | 
22 | #ifndef AT_CHECK
23 | #define AT_CHECK TORCH_CHECK 
24 | #endif
25 | 
26 | void DeformablePSROIPoolForward(
27 |     const at::Tensor data, const at::Tensor bbox, const at::Tensor trans,
28 |     at::Tensor out, at::Tensor top_count, const int batch, const int channels,
29 |     const int height, const int width, const int num_bbox,
30 |     const int channels_trans, const int no_trans, const float spatial_scale,
31 |     const int output_dim, const int group_size, const int pooled_size,
32 |     const int part_size, const int sample_per_part, const float trans_std);
33 | 
34 | void DeformablePSROIPoolBackwardAcc(
35 |     const at::Tensor out_grad, const at::Tensor data, const at::Tensor bbox,
36 |     const at::Tensor trans, const at::Tensor top_count, at::Tensor in_grad,
37 |     at::Tensor trans_grad, const int batch, const int channels,
38 |     const int height, const int width, const int num_bbox,
39 |     const int channels_trans, const int no_trans, const float spatial_scale,
40 |     const int output_dim, const int group_size, const int pooled_size,
41 |     const int part_size, const int sample_per_part, const float trans_std);
42 | 
43 | void deform_psroi_pooling_cuda_forward(
44 |     at::Tensor input, at::Tensor bbox, at::Tensor trans, at::Tensor out,
45 |     at::Tensor top_count, const int no_trans, const float spatial_scale,
46 |     const int output_dim, const int group_size, const int pooled_size,
47 |     const int part_size, const int sample_per_part, const float trans_std) 
48 | {
49 |   AT_CHECK(input.is_contiguous(), "input tensor has to be contiguous");
50 | 
51 |   const int batch = input.size(0);
52 |   const int channels = input.size(1);
53 |   const int height = input.size(2);
54 |   const int width = input.size(3);
55 |   const int channels_trans = no_trans ? 2 : trans.size(1);
56 | 
57 |   const int num_bbox = bbox.size(0);
58 |   if (num_bbox != out.size(0))
59 |     AT_ERROR("Output shape and bbox number wont match: (%d vs %d).",
60 |              out.size(0), num_bbox);
61 | 
62 |   DeformablePSROIPoolForward(
63 |       input, bbox, trans, out, top_count, batch, channels, height, width,
64 |       num_bbox, channels_trans, no_trans, spatial_scale, output_dim, group_size,
65 |       pooled_size, part_size, sample_per_part, trans_std);
66 | }
67 | 
68 | void deform_psroi_pooling_cuda_backward(
69 |     at::Tensor out_grad, at::Tensor input, at::Tensor bbox, at::Tensor trans,
70 |     at::Tensor top_count, at::Tensor input_grad, at::Tensor trans_grad,
71 |     const int no_trans, const float spatial_scale, const int output_dim,
72 |     const int group_size, const int pooled_size, const int part_size,
73 |     const int sample_per_part, const float trans_std) 
74 | {
75 |   AT_CHECK(out_grad.is_contiguous(), "out_grad tensor has to be contiguous");
76 |   AT_CHECK(input.is_contiguous(), "input tensor has to be contiguous");
77 | 
78 |   const int batch = input.size(0);
79 |   const int channels = input.size(1);
80 |   const int height = input.size(2);
81 |   const int width = input.size(3);
82 |   const int channels_trans = no_trans ? 2 : trans.size(1);
83 | 
84 |   const int num_bbox = bbox.size(0);
85 |   if (num_bbox != out_grad.size(0))
86 |     AT_ERROR("Output shape and bbox number wont match: (%d vs %d).",
87 |              out_grad.size(0), num_bbox);
88 | 
89 |   DeformablePSROIPoolBackwardAcc(
90 |       out_grad, input, bbox, trans, top_count, input_grad, trans_grad, batch,
91 |       channels, height, width, num_bbox, channels_trans, no_trans,
92 |       spatial_scale, output_dim, group_size, pooled_size, part_size,
93 |       sample_per_part, trans_std);
94 | }
95 | 


--------------------------------------------------------------------------------
/wetectron/modeling/make_layers.py:
--------------------------------------------------------------------------------
  1 | #------------------------------------------------------------------------------
  2 | # Code taken from https://github.com/NVlabs/wetectron
  3 | #------------------------------------------------------------------------------
  4 | 
  5 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
  6 | """
  7 | Miscellaneous utility functions
  8 | """
  9 | 
 10 | import torch
 11 | from torch import nn
 12 | from torch.nn import functional as F
 13 | from wetectron.config import cfg
 14 | from wetectron.layers import Conv2d
 15 | from wetectron.modeling.poolers import Pooler
 16 | 
 17 | 
 18 | def get_group_gn(dim, dim_per_gp, num_groups):
 19 |     """get number of groups used by GroupNorm, based on number of channels."""
 20 |     assert dim_per_gp == -1 or num_groups == -1, \
 21 |         "GroupNorm: can only specify G or C/G."
 22 | 
 23 |     if dim_per_gp > 0:
 24 |         assert dim % dim_per_gp == 0, \
 25 |             "dim: {}, dim_per_gp: {}".format(dim, dim_per_gp)
 26 |         group_gn = dim // dim_per_gp
 27 |     else:
 28 |         assert dim % num_groups == 0, \
 29 |             "dim: {}, num_groups: {}".format(dim, num_groups)
 30 |         group_gn = num_groups
 31 | 
 32 |     return group_gn
 33 | 
 34 | 
 35 | def group_norm(out_channels, affine=True, divisor=1):
 36 |     out_channels = out_channels // divisor
 37 |     dim_per_gp = cfg.MODEL.GROUP_NORM.DIM_PER_GP // divisor
 38 |     num_groups = cfg.MODEL.GROUP_NORM.NUM_GROUPS // divisor
 39 |     eps = cfg.MODEL.GROUP_NORM.EPSILON # default: 1e-5
 40 |     return torch.nn.GroupNorm(
 41 |         get_group_gn(out_channels, dim_per_gp, num_groups),
 42 |         out_channels,
 43 |         eps,
 44 |         affine
 45 |     )
 46 | 
 47 | 
 48 | def make_conv3x3(
 49 |     in_channels,
 50 |     out_channels,
 51 |     dilation=1,
 52 |     stride=1,
 53 |     use_gn=False,
 54 |     use_relu=False,
 55 |     kaiming_init=True
 56 | ):
 57 |     conv = Conv2d(
 58 |         in_channels,
 59 |         out_channels,
 60 |         kernel_size=3,
 61 |         stride=stride,
 62 |         padding=dilation,
 63 |         dilation=dilation,
 64 |         bias=False if use_gn else True
 65 |     )
 66 |     if kaiming_init:
 67 |         nn.init.kaiming_normal_(
 68 |             conv.weight, mode="fan_out", nonlinearity="relu"
 69 |         )
 70 |     else:
 71 |         torch.nn.init.normal_(conv.weight, std=0.01)
 72 |     if not use_gn:
 73 |         nn.init.constant_(conv.bias, 0)
 74 |     module = [conv,]
 75 |     if use_gn:
 76 |         module.append(group_norm(out_channels))
 77 |     if use_relu:
 78 |         module.append(nn.ReLU(inplace=True))
 79 |     if len(module) > 1:
 80 |         return nn.Sequential(*module)
 81 |     return conv
 82 | 
 83 | 
 84 | def make_fc(dim_in, hidden_dim, use_gn=False):
 85 |     '''
 86 |         Caffe2 implementation uses XavierFill, which in fact
 87 |         corresponds to kaiming_uniform_ in PyTorch
 88 |     '''
 89 |     if use_gn:
 90 |         fc = nn.Linear(dim_in, hidden_dim, bias=False)
 91 |         nn.init.kaiming_uniform_(fc.weight, a=1)
 92 |         return nn.Sequential(fc, group_norm(hidden_dim))
 93 |     fc = nn.Linear(dim_in, hidden_dim)
 94 |     nn.init.kaiming_uniform_(fc.weight, a=1)
 95 |     nn.init.constant_(fc.bias, 0)
 96 |     return fc
 97 | 
 98 | 
 99 | def conv_with_kaiming_uniform(use_gn=False, use_relu=False):
100 |     def make_conv(
101 |         in_channels, out_channels, kernel_size, stride=1, dilation=1
102 |     ):
103 |         conv = Conv2d(
104 |             in_channels,
105 |             out_channels,
106 |             kernel_size=kernel_size,
107 |             stride=stride,
108 |             padding=dilation * (kernel_size - 1) // 2,
109 |             dilation=dilation,
110 |             bias=False if use_gn else True
111 |         )
112 |         # Caffe2 implementation uses XavierFill, which in fact
113 |         # corresponds to kaiming_uniform_ in PyTorch
114 |         nn.init.kaiming_uniform_(conv.weight, a=1)
115 |         if not use_gn:
116 |             nn.init.constant_(conv.bias, 0)
117 |         module = [conv,]
118 |         if use_gn:
119 |             module.append(group_norm(out_channels))
120 |         if use_relu:
121 |             module.append(nn.ReLU(inplace=True))
122 |         if len(module) > 1:
123 |             return nn.Sequential(*module)
124 |         return conv
125 | 
126 |     return make_conv
127 | 


--------------------------------------------------------------------------------
/wetectron/utils/metric_logger.py:
--------------------------------------------------------------------------------
  1 | #------------------------------------------------------------------------------
  2 | # Code adapted from https://github.com/NVlabs/wetectron
  3 | # by Huy V. Vo and Oriane Simeoni
  4 | # INRIA, Valeo.ai
  5 | #------------------------------------------------------------------------------
  6 | 
  7 | # --------------------------------------------------------
  8 | # Copyright (C) 2020 NVIDIA Corporation. All rights reserved.
  9 | # Nvidia Source Code License-NC
 10 | # --------------------------------------------------------
 11 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 12 | from collections import defaultdict
 13 | from collections import deque
 14 | import time
 15 | from datetime import datetime
 16 | import torch
 17 | 
 18 | from .comm import is_main_process
 19 | 
 20 | 
 21 | class SmoothedValue(object):
 22 |     """Track a series of values and provide access to smoothed values over a
 23 |     window or the global series average.
 24 |     """
 25 | 
 26 |     def __init__(self, window_size=20):
 27 |         self.deque = deque(maxlen=window_size)
 28 |         self.series = []
 29 |         self.total = 0.0
 30 |         self.count = 0
 31 | 
 32 |     def update(self, value):
 33 |         self.deque.append(value)
 34 |         self.series.append(value)
 35 |         self.count += 1
 36 |         self.total += value
 37 | 
 38 |     @property
 39 |     def median(self):
 40 |         d = torch.tensor(list(self.deque))
 41 |         return d.median().item()
 42 | 
 43 |     @property
 44 |     def avg(self):
 45 |         d = torch.tensor(list(self.deque))
 46 |         return d.mean().item()
 47 | 
 48 |     @property
 49 |     def global_avg(self):
 50 |         return self.total / self.count
 51 | 
 52 | 
 53 | class MetricLogger(object):
 54 |     def __init__(self, delimiter="\t"):
 55 |         self.meters = defaultdict(SmoothedValue)
 56 |         self.delimiter = delimiter
 57 | 
 58 |     def update(self, **kwargs):
 59 |         for k, v in kwargs.items():
 60 |             if isinstance(v, torch.Tensor):
 61 |                 v = v.item()
 62 |             assert isinstance(v, (float, int))
 63 |             self.meters[k].update(v)
 64 | 
 65 |     def __getattr__(self, attr):
 66 |         if attr in self.meters:
 67 |             return self.meters[attr]
 68 |         if attr in self.__dict__:
 69 |             return self.__dict__[attr]
 70 |         raise AttributeError("'{}' object has no attribute '{}'".format(
 71 |                     type(self).__name__, attr))
 72 | 
 73 |     def __str__(self):
 74 |         loss_str = []
 75 |         for name, meter in self.meters.items():
 76 |             loss_str.append(
 77 |                 "{}: {:.4f} ({:.4f})".format(name, meter.median, meter.global_avg)
 78 |             )
 79 |         return self.delimiter.join(loss_str)
 80 |     
 81 |     
 82 | class TensorboardLogger(MetricLogger):
 83 |     def __init__(self,
 84 |                  log_dir,
 85 |                  start_iter=0,
 86 |                  delimiter='\t'):
 87 | 
 88 |         super(TensorboardLogger, self).__init__(delimiter)
 89 |         self.iteration = start_iter
 90 |         self.writer = self._get_tensorboard_writer(log_dir)
 91 | 
 92 |     @staticmethod
 93 |     def _get_tensorboard_writer(log_dir):
 94 |         try:
 95 |             from tensorboardX import SummaryWriter
 96 |         except ImportError:
 97 |             raise ImportError(
 98 |                 'To use tensorboard please install tensorboardX '
 99 |                 '[ pip install tensorflow tensorboardX ].'
100 |             )
101 | 
102 |         if is_main_process():
103 |             timestamp = datetime.fromtimestamp(time.time()).strftime('%Y%m%d-%H:%M')
104 |             tb_logger = SummaryWriter('{}{}'.format(log_dir, timestamp))
105 |             return tb_logger
106 |         else:
107 |             return None
108 | 
109 |     def update(self, ** kwargs):
110 |         super(TensorboardLogger, self).update(**kwargs)
111 |         if self.writer:
112 |             for k, v in kwargs.items():
113 |                 if isinstance(v, torch.Tensor):
114 |                     v = v.item()
115 |                 assert isinstance(v, (float, int))
116 |                 self.writer.add_scalar(k, v, self.iteration)
117 |     
118 |     def increase_counter(self):
119 |         self.iteration += 1
120 | 
121 |     def decrease_counter(self):
122 |         self.iteration -= 1


--------------------------------------------------------------------------------
/wetectron/utils/comm.py:
--------------------------------------------------------------------------------
  1 | #------------------------------------------------------------------------------
  2 | # Code taken from https://github.com/NVlabs/wetectron
  3 | #------------------------------------------------------------------------------
  4 | 
  5 | # --------------------------------------------------------
  6 | # Copyright (C) 2020 NVIDIA Corporation. All rights reserved.
  7 | # Nvidia Source Code License-NC
  8 | # --------------------------------------------------------
  9 | """
 10 | This file contains primitives for multi-gpu communication.
 11 | This is useful when doing distributed training.
 12 | """
 13 | 
 14 | import pickle
 15 | import time
 16 | 
 17 | import torch
 18 | import torch.distributed as dist
 19 | 
 20 | 
 21 | def get_world_size():
 22 |     if not dist.is_available():
 23 |         return 1
 24 |     if not dist.is_initialized():
 25 |         return 1
 26 |     return dist.get_world_size()
 27 | 
 28 | 
 29 | def get_rank():
 30 |     if not dist.is_available():
 31 |         return 0
 32 |     if not dist.is_initialized():
 33 |         return 0
 34 |     return dist.get_rank()
 35 | 
 36 | 
 37 | def is_main_process():
 38 |     return get_rank() == 0
 39 | 
 40 | 
 41 | def synchronize():
 42 |     """
 43 |     Helper function to synchronize (barrier) among all processes when
 44 |     using distributed training
 45 |     """
 46 |     if not dist.is_available():
 47 |         return
 48 |     if not dist.is_initialized():
 49 |         return
 50 |     world_size = dist.get_world_size()
 51 |     if world_size == 1:
 52 |         return
 53 |     dist.barrier()
 54 | 
 55 | 
 56 | def all_gather(data):
 57 |     """
 58 |     Run all_gather on arbitrary picklable data (not necessarily tensors)
 59 |     Args:
 60 |         data: any picklable object
 61 |     Returns:
 62 |         list[data]: list of data gathered from each rank
 63 |     """
 64 |     world_size = get_world_size()
 65 |     if world_size == 1:
 66 |         return [data]
 67 | 
 68 |     # serialized to a Tensor
 69 |     buffer = pickle.dumps(data)
 70 |     storage = torch.ByteStorage.from_buffer(buffer)
 71 |     tensor = torch.ByteTensor(storage).to("cuda")
 72 | 
 73 |     # obtain Tensor size of each rank
 74 |     local_size = torch.LongTensor([tensor.numel()]).to("cuda")
 75 |     size_list = [torch.LongTensor([0]).to("cuda") for _ in range(world_size)]
 76 |     dist.all_gather(size_list, local_size)
 77 |     size_list = [int(size.item()) for size in size_list]
 78 |     max_size = max(size_list)
 79 | 
 80 |     # receiving Tensor from all ranks
 81 |     # we pad the tensor because torch all_gather does not support
 82 |     # gathering tensors of different shapes
 83 |     tensor_list = []
 84 |     for _ in size_list:
 85 |         tensor_list.append(torch.ByteTensor(size=(max_size,)).to("cuda"))
 86 |     if local_size != max_size:
 87 |         padding = torch.ByteTensor(size=(max_size - local_size,)).to("cuda")
 88 |         tensor = torch.cat((tensor, padding), dim=0)
 89 |     dist.all_gather(tensor_list, tensor)
 90 | 
 91 |     data_list = []
 92 |     for size, tensor in zip(size_list, tensor_list):
 93 |         buffer = tensor.cpu().numpy().tobytes()[:size]
 94 |         data_list.append(pickle.loads(buffer))
 95 | 
 96 |     return data_list
 97 | 
 98 | 
 99 | def reduce_dict(input_dict, average=True):
100 |     """
101 |     Args:
102 |         input_dict (dict): all the values will be reduced
103 |         average (bool): whether to do average or sum
104 |     Reduce the values in the dictionary from all processes so that process with rank
105 |     0 has the averaged results. Returns a dict with the same fields as
106 |     input_dict, after reduction.
107 |     """
108 |     world_size = get_world_size()
109 |     if world_size < 2:
110 |         return input_dict
111 |     with torch.no_grad():
112 |         names = []
113 |         values = []
114 |         # sort the keys so that they are consistent across processes
115 |         for k in sorted(input_dict.keys()):
116 |             names.append(k)
117 |             values.append(input_dict[k])
118 |         values = torch.stack(values, dim=0)
119 |         dist.reduce(values, dst=0)
120 |         if dist.get_rank() == 0 and average:
121 |             # only main process gets accumulated, so only divide by
122 |             # world_size in this case
123 |             values /= world_size
124 |         reduced_dict = {k: v for k, v in zip(names, values)}
125 |     return reduced_dict
126 | 


--------------------------------------------------------------------------------
/wetectron/modeling/backbone/fpn.py:
--------------------------------------------------------------------------------
  1 | #------------------------------------------------------------------------------
  2 | # Code taken from https://github.com/NVlabs/wetectron
  3 | #------------------------------------------------------------------------------
  4 | 
  5 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
  6 | import torch
  7 | import torch.nn.functional as F
  8 | from torch import nn
  9 | 
 10 | 
 11 | class FPN(nn.Module):
 12 |     """
 13 |     Module that adds FPN on top of a list of feature maps.
 14 |     The feature maps are currently supposed to be in increasing depth
 15 |     order, and must be consecutive
 16 |     """
 17 | 
 18 |     def __init__(
 19 |         self, in_channels_list, out_channels, conv_block, top_blocks=None
 20 |     ):
 21 |         """
 22 |         Arguments:
 23 |             in_channels_list (list[int]): number of channels for each feature map that
 24 |                 will be fed
 25 |             out_channels (int): number of channels of the FPN representation
 26 |             top_blocks (nn.Module or None): if provided, an extra operation will
 27 |                 be performed on the output of the last (smallest resolution)
 28 |                 FPN output, and the result will extend the result list
 29 |         """
 30 |         super(FPN, self).__init__()
 31 |         self.inner_blocks = []
 32 |         self.layer_blocks = []
 33 |         for idx, in_channels in enumerate(in_channels_list, 1):
 34 |             inner_block = "fpn_inner{}".format(idx)
 35 |             layer_block = "fpn_layer{}".format(idx)
 36 | 
 37 |             if in_channels == 0:
 38 |                 continue
 39 |             inner_block_module = conv_block(in_channels, out_channels, 1)
 40 |             layer_block_module = conv_block(out_channels, out_channels, 3, 1)
 41 |             self.add_module(inner_block, inner_block_module)
 42 |             self.add_module(layer_block, layer_block_module)
 43 |             self.inner_blocks.append(inner_block)
 44 |             self.layer_blocks.append(layer_block)
 45 |         self.top_blocks = top_blocks
 46 | 
 47 |     def forward(self, x):
 48 |         """
 49 |         Arguments:
 50 |             x (list[Tensor]): feature maps for each feature level.
 51 |         Returns:
 52 |             results (tuple[Tensor]): feature maps after FPN layers.
 53 |                 They are ordered from highest resolution first.
 54 |         """
 55 |         last_inner = getattr(self, self.inner_blocks[-1])(x[-1])
 56 |         results = []
 57 |         results.append(getattr(self, self.layer_blocks[-1])(last_inner))
 58 |         for feature, inner_block, layer_block in zip(
 59 |             x[:-1][::-1], self.inner_blocks[:-1][::-1], self.layer_blocks[:-1][::-1]
 60 |         ):
 61 |             if not inner_block:
 62 |                 continue
 63 |             inner_top_down = F.interpolate(last_inner, scale_factor=2, mode="nearest")
 64 |             inner_lateral = getattr(self, inner_block)(feature)
 65 |             # TODO use size instead of scale to make it robust to different sizes
 66 |             # inner_top_down = F.upsample(last_inner, size=inner_lateral.shape[-2:],
 67 |             # mode='bilinear', align_corners=False)
 68 |             last_inner = inner_lateral + inner_top_down
 69 |             results.insert(0, getattr(self, layer_block)(last_inner))
 70 | 
 71 |         if isinstance(self.top_blocks, LastLevelP6P7):
 72 |             last_results = self.top_blocks(x[-1], results[-1])
 73 |             results.extend(last_results)
 74 |         elif isinstance(self.top_blocks, LastLevelMaxPool):
 75 |             last_results = self.top_blocks(results[-1])
 76 |             results.extend(last_results)
 77 | 
 78 |         return tuple(results)
 79 | 
 80 | 
 81 | class LastLevelMaxPool(nn.Module):
 82 |     def forward(self, x):
 83 |         return [F.max_pool2d(x, 1, 2, 0)]
 84 | 
 85 | 
 86 | class LastLevelP6P7(nn.Module):
 87 |     """
 88 |     This module is used in RetinaNet to generate extra layers, P6 and P7.
 89 |     """
 90 |     def __init__(self, in_channels, out_channels):
 91 |         super(LastLevelP6P7, self).__init__()
 92 |         self.p6 = nn.Conv2d(in_channels, out_channels, 3, 2, 1)
 93 |         self.p7 = nn.Conv2d(out_channels, out_channels, 3, 2, 1)
 94 |         for module in [self.p6, self.p7]:
 95 |             nn.init.kaiming_uniform_(module.weight, a=1)
 96 |             nn.init.constant_(module.bias, 0)
 97 |         self.use_P5 = in_channels == out_channels
 98 | 
 99 |     def forward(self, c5, p5):
100 |         x = p5 if self.use_P5 else c5
101 |         p6 = self.p6(x)
102 |         p7 = self.p7(F.relu(p6))
103 |         return [p6, p7]
104 | 


--------------------------------------------------------------------------------
/wetectron/modeling/roi_heads/keypoint_head/inference.py:
--------------------------------------------------------------------------------
  1 | #------------------------------------------------------------------------------
  2 | # Code taken from https://github.com/NVlabs/wetectron
  3 | #------------------------------------------------------------------------------
  4 | 
  5 | import torch
  6 | from torch import nn
  7 | 
  8 | 
  9 | class KeypointPostProcessor(nn.Module):
 10 |     def __init__(self, keypointer=None):
 11 |         super(KeypointPostProcessor, self).__init__()
 12 |         self.keypointer = keypointer
 13 | 
 14 |     def forward(self, x, boxes):
 15 |         mask_prob = x
 16 | 
 17 |         scores = None
 18 |         if self.keypointer:
 19 |             mask_prob, scores = self.keypointer(x, boxes)
 20 | 
 21 |         assert len(boxes) == 1, "Only non-batched inference supported for now"
 22 |         boxes_per_image = [box.bbox.size(0) for box in boxes]
 23 |         mask_prob = mask_prob.split(boxes_per_image, dim=0)
 24 |         scores = scores.split(boxes_per_image, dim=0)
 25 | 
 26 |         results = []
 27 |         for prob, box, score in zip(mask_prob, boxes, scores):
 28 |             bbox = BoxList(box.bbox, box.size, mode="xyxy")
 29 |             for field in box.fields():
 30 |                 bbox.add_field(field, box.get_field(field))
 31 |             prob = PersonKeypoints(prob, box.size)
 32 |             prob.add_field("logits", score)
 33 |             bbox.add_field("keypoints", prob)
 34 |             results.append(bbox)
 35 | 
 36 |         return results
 37 | 
 38 | 
 39 | # TODO remove and use only the Keypointer
 40 | import numpy as np
 41 | import cv2
 42 | 
 43 | 
 44 | def heatmaps_to_keypoints(maps, rois):
 45 |     """Extract predicted keypoint locations from heatmaps. Output has shape
 46 |     (#rois, 4, #keypoints) with the 4 rows corresponding to (x, y, logit, prob)
 47 |     for each keypoint.
 48 |     """
 49 |     # This function converts a discrete image coordinate in a HEATMAP_SIZE x
 50 |     # HEATMAP_SIZE image to a continuous keypoint coordinate. We maintain
 51 |     # consistency with keypoints_to_heatmap_labels by using the conversion from
 52 |     # Heckbert 1990: c = d + 0.5, where d is a discrete coordinate and c is a
 53 |     # continuous coordinate.
 54 |     offset_x = rois[:, 0]
 55 |     offset_y = rois[:, 1]
 56 | 
 57 |     widths = rois[:, 2] - rois[:, 0]
 58 |     heights = rois[:, 3] - rois[:, 1]
 59 |     widths = np.maximum(widths, 1)
 60 |     heights = np.maximum(heights, 1)
 61 |     widths_ceil = np.ceil(widths)
 62 |     heights_ceil = np.ceil(heights)
 63 | 
 64 |     # NCHW to NHWC for use with OpenCV
 65 |     maps = np.transpose(maps, [0, 2, 3, 1])
 66 |     min_size = 0  # cfg.KRCNN.INFERENCE_MIN_SIZE
 67 |     num_keypoints = maps.shape[3]
 68 |     xy_preds = np.zeros((len(rois), 3, num_keypoints), dtype=np.float32)
 69 |     end_scores = np.zeros((len(rois), num_keypoints), dtype=np.float32)
 70 |     for i in range(len(rois)):
 71 |         if min_size > 0:
 72 |             roi_map_width = int(np.maximum(widths_ceil[i], min_size))
 73 |             roi_map_height = int(np.maximum(heights_ceil[i], min_size))
 74 |         else:
 75 |             roi_map_width = widths_ceil[i]
 76 |             roi_map_height = heights_ceil[i]
 77 |         width_correction = widths[i] / roi_map_width
 78 |         height_correction = heights[i] / roi_map_height
 79 |         roi_map = cv2.resize(
 80 |             maps[i], (roi_map_width, roi_map_height), interpolation=cv2.INTER_CUBIC
 81 |         )
 82 |         # Bring back to CHW
 83 |         roi_map = np.transpose(roi_map, [2, 0, 1])
 84 |         # roi_map_probs = scores_to_probs(roi_map.copy())
 85 |         w = roi_map.shape[2]
 86 |         pos = roi_map.reshape(num_keypoints, -1).argmax(axis=1)
 87 |         x_int = pos % w
 88 |         y_int = (pos - x_int) // w
 89 |         # assert (roi_map_probs[k, y_int, x_int] ==
 90 |         #         roi_map_probs[k, :, :].max())
 91 |         x = (x_int + 0.5) * width_correction
 92 |         y = (y_int + 0.5) * height_correction
 93 |         xy_preds[i, 0, :] = x + offset_x[i]
 94 |         xy_preds[i, 1, :] = y + offset_y[i]
 95 |         xy_preds[i, 2, :] = 1
 96 |         end_scores[i, :] = roi_map[np.arange(num_keypoints), y_int, x_int]
 97 | 
 98 |     return np.transpose(xy_preds, [0, 2, 1]), end_scores
 99 | 
100 | 
101 | from wetectron.structures.bounding_box import BoxList
102 | from wetectron.structures.keypoint import PersonKeypoints
103 | 
104 | 
105 | class Keypointer(object):
106 |     """
107 |     Projects a set of masks in an image on the locations
108 |     specified by the bounding boxes
109 |     """
110 | 
111 |     def __init__(self, padding=0):
112 |         self.padding = padding
113 | 
114 |     def __call__(self, masks, boxes):
115 |         # TODO do this properly
116 |         if isinstance(boxes, BoxList):
117 |             boxes = [boxes]
118 |         assert len(boxes) == 1
119 | 
120 |         result, scores = heatmaps_to_keypoints(
121 |             masks.detach().cpu().numpy(), boxes[0].bbox.cpu().numpy()
122 |         )
123 |         return torch.from_numpy(result).to(masks.device), torch.as_tensor(scores, device=masks.device)
124 | 
125 | 
126 | def make_roi_keypoint_post_processor(cfg):
127 |     keypointer = Keypointer()
128 |     keypoint_post_processor = KeypointPostProcessor(keypointer)
129 |     return keypoint_post_processor
130 | 


--------------------------------------------------------------------------------
/wetectron/csrc/deform_conv.h:
--------------------------------------------------------------------------------
  1 | //------------------------------------------------------------------------------
  2 | // Code taken from https://github.com/NVlabs/wetectron
  3 | //------------------------------------------------------------------------------
  4 | 
  5 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
  6 | #pragma once
  7 | #include "cpu/vision.h"
  8 | 
  9 | #ifdef WITH_CUDA
 10 | #include "cuda/vision.h"
 11 | #endif
 12 | 
 13 | 
 14 | // Interface for Python
 15 | int deform_conv_forward(
 16 |     at::Tensor input, 
 17 |     at::Tensor weight,
 18 |     at::Tensor offset, 
 19 |     at::Tensor output,
 20 |     at::Tensor columns, 
 21 |     at::Tensor ones, 
 22 |     int kW,
 23 |     int kH, 
 24 |     int dW, 
 25 |     int dH, 
 26 |     int padW, 
 27 |     int padH,
 28 |     int dilationW, 
 29 |     int dilationH, 
 30 |     int group,
 31 |     int deformable_group, 
 32 |     int im2col_step)
 33 | {
 34 |   if (input.is_cuda()) {
 35 | #ifdef WITH_CUDA
 36 |     return deform_conv_forward_cuda(
 37 |         input, weight, offset, output, columns, ones,
 38 |         kW, kH, dW, dH, padW, padH, dilationW, dilationH,
 39 |         group, deformable_group, im2col_step
 40 |     );
 41 | #else
 42 |     AT_ERROR("Not compiled with GPU support");
 43 | #endif
 44 |   }
 45 |   AT_ERROR("Not implemented on the CPU");
 46 | }
 47 | 
 48 | 
 49 | int deform_conv_backward_input(
 50 |     at::Tensor input, 
 51 |     at::Tensor offset,
 52 |     at::Tensor gradOutput, 
 53 |     at::Tensor gradInput,
 54 |     at::Tensor gradOffset, 
 55 |     at::Tensor weight,
 56 |     at::Tensor columns, 
 57 |     int kW, 
 58 |     int kH, 
 59 |     int dW,
 60 |     int dH, 
 61 |     int padW, 
 62 |     int padH, 
 63 |     int dilationW,
 64 |     int dilationH, 
 65 |     int group,
 66 |     int deformable_group, 
 67 |     int im2col_step)
 68 | {
 69 |   if (input.is_cuda()) {
 70 | #ifdef WITH_CUDA
 71 |     return deform_conv_backward_input_cuda(
 72 |         input, offset, gradOutput, gradInput, gradOffset, weight, columns,
 73 |         kW, kH, dW, dH, padW, padH, dilationW, dilationH, 
 74 |         group, deformable_group, im2col_step
 75 |     );
 76 | #else
 77 |     AT_ERROR("Not compiled with GPU support");
 78 | #endif
 79 |   }
 80 |   AT_ERROR("Not implemented on the CPU");
 81 | }
 82 | 
 83 | 
 84 | int deform_conv_backward_parameters(
 85 |     at::Tensor input, 
 86 |     at::Tensor offset, 
 87 |     at::Tensor gradOutput,
 88 |     at::Tensor gradWeight,  // at::Tensor gradBias,
 89 |     at::Tensor columns, 
 90 |     at::Tensor ones, 
 91 |     int kW, 
 92 |     int kH, 
 93 |     int dW, 
 94 |     int dH,
 95 |     int padW, 
 96 |     int padH, 
 97 |     int dilationW, 
 98 |     int dilationH, 
 99 |     int group,
100 |     int deformable_group, 
101 |     float scale, 
102 |     int im2col_step)
103 | {
104 |   if (input.is_cuda()) {
105 | #ifdef WITH_CUDA
106 |     return deform_conv_backward_parameters_cuda(
107 |         input, offset, gradOutput, gradWeight, columns, ones,
108 |         kW, kH, dW, dH, padW, padH, dilationW, dilationH,
109 |         group, deformable_group, scale, im2col_step
110 |     );
111 | #else
112 |     AT_ERROR("Not compiled with GPU support");
113 | #endif
114 |   }
115 |   AT_ERROR("Not implemented on the CPU");
116 | }
117 | 
118 | 
119 | void modulated_deform_conv_forward(
120 |     at::Tensor input, 
121 |     at::Tensor weight, 
122 |     at::Tensor bias, 
123 |     at::Tensor ones,
124 |     at::Tensor offset, 
125 |     at::Tensor mask, 
126 |     at::Tensor output, 
127 |     at::Tensor columns,
128 |     int kernel_h, 
129 |     int kernel_w, 
130 |     const int stride_h, 
131 |     const int stride_w,
132 |     const int pad_h, 
133 |     const int pad_w, 
134 |     const int dilation_h,
135 |     const int dilation_w, 
136 |     const int group, 
137 |     const int deformable_group,
138 |     const bool with_bias)
139 | {
140 |   if (input.is_cuda()) {
141 | #ifdef WITH_CUDA
142 |     return modulated_deform_conv_cuda_forward(
143 |         input, weight, bias, ones, offset, mask, output, columns,
144 |         kernel_h, kernel_w, stride_h, stride_w, 
145 |         pad_h, pad_w, dilation_h, dilation_w,
146 |         group, deformable_group, with_bias
147 |     );
148 | #else
149 |     AT_ERROR("Not compiled with GPU support");
150 | #endif
151 |   }
152 |   AT_ERROR("Not implemented on the CPU");
153 | }
154 | 
155 | 
156 | void modulated_deform_conv_backward(
157 |     at::Tensor input, 
158 |     at::Tensor weight, 
159 |     at::Tensor bias, 
160 |     at::Tensor ones,
161 |     at::Tensor offset, 
162 |     at::Tensor mask, 
163 |     at::Tensor columns,
164 |     at::Tensor grad_input, 
165 |     at::Tensor grad_weight, 
166 |     at::Tensor grad_bias,
167 |     at::Tensor grad_offset, 
168 |     at::Tensor grad_mask, 
169 |     at::Tensor grad_output,
170 |     int kernel_h, 
171 |     int kernel_w, 
172 |     int stride_h, 
173 |     int stride_w, 
174 |     int pad_h,
175 |     int pad_w, 
176 |     int dilation_h, 
177 |     int dilation_w, 
178 |     int group, 
179 |     int deformable_group,
180 |     const bool with_bias)
181 | {
182 |   if (input.is_cuda()) {
183 | #ifdef WITH_CUDA
184 |     return modulated_deform_conv_cuda_backward(
185 |         input, weight, bias, ones, offset, mask, columns, 
186 |         grad_input, grad_weight, grad_bias, grad_offset, grad_mask, grad_output,
187 |         kernel_h, kernel_w, stride_h, stride_w, pad_h, pad_w, dilation_h, dilation_w,
188 |         group, deformable_group, with_bias
189 |     );
190 | #else
191 |     AT_ERROR("Not compiled with GPU support");
192 | #endif
193 |   }
194 |   AT_ERROR("Not implemented on the CPU");
195 | }


--------------------------------------------------------------------------------
/wetectron/data/samplers/grouped_batch_sampler.py:
--------------------------------------------------------------------------------
  1 | #------------------------------------------------------------------------------
  2 | # Code taken from https://github.com/NVlabs/wetectron
  3 | #------------------------------------------------------------------------------
  4 | 
  5 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
  6 | import itertools
  7 | 
  8 | import torch
  9 | from torch.utils.data.sampler import BatchSampler
 10 | from torch.utils.data.sampler import Sampler
 11 | 
 12 | 
 13 | class GroupedBatchSampler(BatchSampler):
 14 |     """
 15 |     Wraps another sampler to yield a mini-batch of indices.
 16 |     It enforces that elements from the same group should appear in groups of batch_size.
 17 |     It also tries to provide mini-batches which follows an ordering which is
 18 |     as close as possible to the ordering from the original sampler.
 19 | 
 20 |     Arguments:
 21 |         sampler (Sampler): Base sampler.
 22 |         batch_size (int): Size of mini-batch.
 23 |         drop_uneven (bool): If ``True``, the sampler will drop the batches whose
 24 |             size is less than ``batch_size``
 25 | 
 26 |     """
 27 | 
 28 |     def __init__(self, sampler, group_ids, batch_size, drop_uneven=False):
 29 |         if not isinstance(sampler, Sampler):
 30 |             raise ValueError(
 31 |                 "sampler should be an instance of "
 32 |                 "torch.utils.data.Sampler, but got sampler={}".format(sampler)
 33 |             )
 34 |         self.sampler = sampler
 35 |         self.group_ids = torch.as_tensor(group_ids)
 36 |         assert self.group_ids.dim() == 1
 37 |         self.batch_size = batch_size
 38 |         self.drop_uneven = drop_uneven
 39 | 
 40 |         self.groups = torch.unique(self.group_ids).sort(0)[0]
 41 | 
 42 |         self._can_reuse_batches = False
 43 | 
 44 |     def _prepare_batches(self):
 45 |         dataset_size = len(self.group_ids)
 46 |         # get the sampled indices from the sampler
 47 |         sampled_ids = torch.as_tensor(list(self.sampler))
 48 |         # potentially not all elements of the dataset were sampled
 49 |         # by the sampler (e.g., DistributedSampler).
 50 |         # construct a tensor which contains -1 if the element was
 51 |         # not sampled, and a non-negative number indicating the
 52 |         # order where the element was sampled.
 53 |         # for example. if sampled_ids = [3, 1] and dataset_size = 5,
 54 |         # the order is [-1, 1, -1, 0, -1]
 55 |         order = torch.full((dataset_size,), -1, dtype=torch.int64)
 56 |         order[sampled_ids] = torch.arange(len(sampled_ids))
 57 | 
 58 |         # get a mask with the elements that were sampled
 59 |         mask = order >= 0
 60 | 
 61 |         # find the elements that belong to each individual cluster
 62 |         clusters = [(self.group_ids == i) & mask for i in self.groups]
 63 |         # get relative order of the elements inside each cluster
 64 |         # that follows the order from the sampler
 65 |         relative_order = [order[cluster] for cluster in clusters]
 66 |         # with the relative order, find the absolute order in the
 67 |         # sampled space
 68 |         permutation_ids = [s[s.sort()[1]] for s in relative_order]
 69 |         # permute each cluster so that they follow the order from
 70 |         # the sampler
 71 |         permuted_clusters = [sampled_ids[idx] for idx in permutation_ids]
 72 | 
 73 |         # splits each cluster in batch_size, and merge as a list of tensors
 74 |         splits = [c.split(self.batch_size) for c in permuted_clusters]
 75 |         merged = tuple(itertools.chain.from_iterable(splits))
 76 | 
 77 |         # now each batch internally has the right order, but
 78 |         # they are grouped by clusters. Find the permutation between
 79 |         # different batches that brings them as close as possible to
 80 |         # the order that we have in the sampler. For that, we will consider the
 81 |         # ordering as coming from the first element of each batch, and sort
 82 |         # correspondingly
 83 |         first_element_of_batch = [t[0].item() for t in merged]
 84 |         # get and inverse mapping from sampled indices and the position where
 85 |         # they occur (as returned by the sampler)
 86 |         inv_sampled_ids_map = {v: k for k, v in enumerate(sampled_ids.tolist())}
 87 |         # from the first element in each batch, get a relative ordering
 88 |         first_index_of_batch = torch.as_tensor(
 89 |             [inv_sampled_ids_map[s] for s in first_element_of_batch]
 90 |         )
 91 | 
 92 |         # permute the batches so that they approximately follow the order
 93 |         # from the sampler
 94 |         permutation_order = first_index_of_batch.sort(0)[1].tolist()
 95 |         # finally, permute the batches
 96 |         batches = [merged[i].tolist() for i in permutation_order]
 97 | 
 98 |         if self.drop_uneven:
 99 |             kept = []
100 |             for batch in batches:
101 |                 if len(batch) == self.batch_size:
102 |                     kept.append(batch)
103 |             batches = kept
104 |         return batches
105 | 
106 |     def __iter__(self):
107 |         if self._can_reuse_batches:
108 |             batches = self._batches
109 |             self._can_reuse_batches = False
110 |         else:
111 |             batches = self._prepare_batches()
112 |         self._batches = batches
113 |         return iter(batches)
114 | 
115 |     def __len__(self):
116 |         if not hasattr(self, "_batches"):
117 |             self._batches = self._prepare_batches()
118 |             self._can_reuse_batches = True
119 |         return len(self._batches)
120 | 


--------------------------------------------------------------------------------
/wetectron/csrc/cuda/nms.cu:
--------------------------------------------------------------------------------
  1 | //------------------------------------------------------------------------------
  2 | // Code taken from https://github.com/NVlabs/wetectron
  3 | //------------------------------------------------------------------------------
  4 | 
  5 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
  6 | #include <ATen/ATen.h>
  7 | #include <ATen/cuda/CUDAContext.h>
  8 | 
  9 | #include <THC/THC.h>
 10 | #include <THC/THCDeviceUtils.cuh>
 11 | 
 12 | #include <vector>
 13 | #include <iostream>
 14 | 
 15 | int const threadsPerBlock = sizeof(unsigned long long) * 8;
 16 | 
 17 | __device__ inline float devIoU(float const * const a, float const * const b) {
 18 |   float left = max(a[0], b[0]), right = min(a[2], b[2]);
 19 |   float top = max(a[1], b[1]), bottom = min(a[3], b[3]);
 20 |   float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f);
 21 |   float interS = width * height;
 22 |   float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1);
 23 |   float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1);
 24 |   return interS / (Sa + Sb - interS);
 25 | }
 26 | 
 27 | __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh,
 28 |                            const float *dev_boxes, unsigned long long *dev_mask) {
 29 |   const int row_start = blockIdx.y;
 30 |   const int col_start = blockIdx.x;
 31 | 
 32 |   // if (row_start > col_start) return;
 33 | 
 34 |   const int row_size =
 35 |         min(n_boxes - row_start * threadsPerBlock, threadsPerBlock);
 36 |   const int col_size =
 37 |         min(n_boxes - col_start * threadsPerBlock, threadsPerBlock);
 38 | 
 39 |   __shared__ float block_boxes[threadsPerBlock * 5];
 40 |   if (threadIdx.x < col_size) {
 41 |     block_boxes[threadIdx.x * 5 + 0] =
 42 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0];
 43 |     block_boxes[threadIdx.x * 5 + 1] =
 44 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1];
 45 |     block_boxes[threadIdx.x * 5 + 2] =
 46 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2];
 47 |     block_boxes[threadIdx.x * 5 + 3] =
 48 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3];
 49 |     block_boxes[threadIdx.x * 5 + 4] =
 50 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4];
 51 |   }
 52 |   __syncthreads();
 53 | 
 54 |   if (threadIdx.x < row_size) {
 55 |     const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x;
 56 |     const float *cur_box = dev_boxes + cur_box_idx * 5;
 57 |     int i = 0;
 58 |     unsigned long long t = 0;
 59 |     int start = 0;
 60 |     if (row_start == col_start) {
 61 |       start = threadIdx.x + 1;
 62 |     }
 63 |     for (i = start; i < col_size; i++) {
 64 |       if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) {
 65 |         t |= 1ULL << i;
 66 |       }
 67 |     }
 68 |     const int col_blocks = THCCeilDiv(n_boxes, threadsPerBlock);
 69 |     dev_mask[cur_box_idx * col_blocks + col_start] = t;
 70 |   }
 71 | }
 72 | 
 73 | // boxes is a N x 5 tensor
 74 | at::Tensor nms_cuda(const at::Tensor boxes, float nms_overlap_thresh) {
 75 |   using scalar_t = float;
 76 |   AT_ASSERTM(boxes.is_cuda(), "boxes must be a CUDA tensor");
 77 |   auto scores = boxes.select(1, 4);
 78 |   auto order_t = std::get<1>(scores.sort(0, /* descending=*/true));
 79 |   auto boxes_sorted = boxes.index_select(0, order_t);
 80 | 
 81 |   int boxes_num = boxes.size(0);
 82 | 
 83 |   const int col_blocks = THCCeilDiv(boxes_num, threadsPerBlock);
 84 | 
 85 |   scalar_t* boxes_dev = boxes_sorted.data_ptr<scalar_t>();
 86 | 
 87 |   THCState *state = at::globalContext().lazyInitCUDA(); // TODO replace with getTHCState
 88 | 
 89 |   unsigned long long* mask_dev = NULL;
 90 |   //THCudaCheck(THCudaMalloc(state, (void**) &mask_dev,
 91 |   //                      boxes_num * col_blocks * sizeof(unsigned long long)));
 92 | 
 93 |   mask_dev = (unsigned long long*) THCudaMalloc(state, boxes_num * col_blocks * sizeof(unsigned long long));
 94 | 
 95 |   dim3 blocks(THCCeilDiv(boxes_num, threadsPerBlock),
 96 |               THCCeilDiv(boxes_num, threadsPerBlock));
 97 |   dim3 threads(threadsPerBlock);
 98 |   nms_kernel<<<blocks, threads>>>(boxes_num,
 99 |                                   nms_overlap_thresh,
100 |                                   boxes_dev,
101 |                                   mask_dev);
102 | 
103 |   std::vector<unsigned long long> mask_host(boxes_num * col_blocks);
104 |   THCudaCheck(cudaMemcpy(&mask_host[0],
105 |                         mask_dev,
106 |                         sizeof(unsigned long long) * boxes_num * col_blocks,
107 |                         cudaMemcpyDeviceToHost));
108 | 
109 |   std::vector<unsigned long long> remv(col_blocks);
110 |   memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks);
111 | 
112 |   at::Tensor keep = at::empty({boxes_num}, boxes.options().dtype(at::kLong).device(at::kCPU));
113 |   int64_t* keep_out = keep.data_ptr<int64_t>();
114 | 
115 |   int num_to_keep = 0;
116 |   for (int i = 0; i < boxes_num; i++) {
117 |     int nblock = i / threadsPerBlock;
118 |     int inblock = i % threadsPerBlock;
119 | 
120 |     if (!(remv[nblock] & (1ULL << inblock))) {
121 |       keep_out[num_to_keep++] = i;
122 |       unsigned long long *p = &mask_host[0] + i * col_blocks;
123 |       for (int j = nblock; j < col_blocks; j++) {
124 |         remv[j] |= p[j];
125 |       }
126 |     }
127 |   }
128 | 
129 |   THCudaFree(state, mask_dev);
130 |   // TODO improve this part
131 |   return std::get<0>(order_t.index({
132 |                        keep.narrow(/*dim=*/0, /*start=*/0, /*length=*/num_to_keep).to(
133 |                          order_t.device(), keep.scalar_type())
134 |                      }).sort(0, false));
135 | }
136 | 


--------------------------------------------------------------------------------
/wetectron/data/transforms/transforms.py:
--------------------------------------------------------------------------------
  1 | #------------------------------------------------------------------------------
  2 | # Code taken from https://github.com/NVlabs/wetectron
  3 | #------------------------------------------------------------------------------
  4 | 
  5 | # --------------------------------------------------------
  6 | # Copyright (C) 2020 NVIDIA Corporation. All rights reserved.
  7 | # Nvidia Source Code License-NC
  8 | # --------------------------------------------------------
  9 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 10 | import random
 11 | import numpy as np
 12 | import cv2
 13 | from PIL import ImageFilter
 14 | 
 15 | import torch
 16 | import torchvision
 17 | from torchvision.transforms import functional as F
 18 | 
 19 | 
 20 | class Compose(object):
 21 |     def __init__(self, transforms):
 22 |         self.transforms = transforms
 23 | 
 24 |     def __call__(self, image, target=None, rois=None):
 25 |         for t in self.transforms:
 26 |             image, target, rois = t(image, target, rois)
 27 |         return image, target, rois
 28 | 
 29 |     def __repr__(self):
 30 |         format_string = self.__class__.__name__ + "("
 31 |         for t in self.transforms:
 32 |             format_string += "\n"
 33 |             format_string += "    {0}".format(t)
 34 |         format_string += "\n)"
 35 |         return format_string
 36 | 
 37 | class Resize(object):
 38 |     def __init__(self, min_size, max_size):
 39 |         if not isinstance(min_size, (list, tuple)):
 40 |             min_size = (min_size,)
 41 |         self.min_size = min_size
 42 |         self.max_size = max_size
 43 | 
 44 |     # modified from torchvision to add support for max size
 45 |     def get_size(self, image_size):
 46 |         w, h = image_size
 47 |         size = random.choice(self.min_size)
 48 |         max_size = self.max_size
 49 |         if max_size is not None:
 50 |             min_original_size = float(min((w, h)))
 51 |             max_original_size = float(max((w, h)))
 52 |             if max_original_size / min_original_size * size > max_size:
 53 |                 size = int(round(max_size * min_original_size / max_original_size))
 54 | 
 55 |         if (w <= h and w == size) or (h <= w and h == size):
 56 |             return (h, w)
 57 | 
 58 |         if w < h:
 59 |             ow = size
 60 |             oh = int(size * h / w)
 61 |         else:
 62 |             oh = size
 63 |             ow = int(size * w / h)
 64 | 
 65 |         return (oh, ow)
 66 | 
 67 |     def __call__(self, image, target=None, rois=None):
 68 |         size = self.get_size(image.size)
 69 |         image = F.resize(image, size)
 70 |         if target is not None:
 71 |             target = target.resize(image.size)
 72 |         if rois is not None:
 73 |             rois = rois.resize(image.size)
 74 |             
 75 |         return image, target, rois
 76 | 
 77 | class RandomHorizontalFlip(object):
 78 |     def __init__(self, prob=0.5):
 79 |         self.prob = prob
 80 | 
 81 |     def __call__(self, image, target=None, rois=None):
 82 |         if random.random() < self.prob:
 83 |             image = F.hflip(image)
 84 |             if target is not None:
 85 |                 target = target.transpose(0)
 86 |             if rois is not None:
 87 |                 rois = rois.transpose(0)
 88 | 
 89 |         return image, target, rois
 90 | 
 91 | class RandomVerticalFlip(object):
 92 |     def __init__(self, prob=0.5):
 93 |         self.prob = prob
 94 | 
 95 |     def __call__(self, image, target=None, rois=None):
 96 |         if random.random() < self.prob:
 97 |             image = F.vflip(image)
 98 |             if target is not None:
 99 |                 target = target.transpose(1)
100 |             if rois is not None:
101 |                 rois = rois.transpose(1)
102 |                 
103 |         return image, target, rois
104 | 
105 | class ColorJitter(object):
106 |     def __init__(self,
107 |                  brightness=None,
108 |                  contrast=None,
109 |                  saturation=None,
110 |                  hue=None,
111 |                  ):
112 |         self.color_jitter = torchvision.transforms.ColorJitter(
113 |             brightness=brightness,
114 |             contrast=contrast,
115 |             saturation=saturation,
116 |             hue=hue,)
117 | 
118 |     def __call__(self, image, target=None, rois=None):
119 |         image = self.color_jitter(image)
120 |         return image, target, rois
121 | 
122 | class ToTensor(object):
123 |     def __call__(self, image, target=None, rois=None):
124 |         return F.to_tensor(image), target, rois
125 | 
126 | class Normalize(object):
127 |     def __init__(self, mean, std, to_bgr255=True):
128 |         self.mean = mean
129 |         self.std = std
130 |         self.to_bgr255 = to_bgr255
131 | 
132 |     def __call__(self, image, target=None, rois=None):
133 |         if self.to_bgr255:
134 |             image = image[[2, 1, 0]] * 255
135 |         image = F.normalize(image, mean=self.mean, std=self.std)
136 |         return image, target, rois
137 | 
138 | class Lighting(object):
139 |     """Lighting noise(AlexNet - style PCA - based noise)"""
140 |     def __init__(self, alphastd, eigval, eigvec):
141 |         self.alphastd = alphastd
142 |         self.eigval = eigval
143 |         self.eigvec = eigvec
144 | 
145 |     def __call__(self, img, target=None, rois=None):
146 |         if self.alphastd == 0:
147 |             return img
148 | 
149 |         alpha = img.new().resize_(3).normal_(0, self.alphastd)
150 |         rgb = self.eigvec.type_as(img).clone()\
151 |             .mul(alpha.view(1, 3).expand(3, 3))\
152 |             .mul(self.eigval.view(1, 3).expand(3, 3))\
153 |             .sum(1).squeeze()
154 | 
155 |         return img.add(rgb.view(3, 1, 1).expand_as(img)), target, rois


--------------------------------------------------------------------------------