├── cvpods
    ├── modeling
    │   ├── nn_utils
    │   │   ├── __init__.py
    │   │   ├── scale_grad.py
    │   │   ├── feature_utils.py
    │   │   ├── activation_count.py
    │   │   ├── flop_count.py
    │   │   └── parameter_count.py
    │   ├── basenet
    │   │   └── __init__.py
    │   ├── proposal_generator
    │   │   ├── __init__.py
    │   │   ├── proposal_utils.py
    │   │   └── rrpn.py
    │   ├── backbone
    │   │   ├── dynamic_arch
    │   │   │   ├── __init__.py
    │   │   │   └── cal_op_flops.py
    │   │   ├── __init__.py
    │   │   └── backbone.py
    │   ├── roi_heads
    │   │   ├── __init__.py
    │   │   └── box_head.py
    │   ├── losses
    │   │   ├── __init__.py
    │   │   ├── reg_l1_loss.py
    │   │   ├── dice_loss.py
    │   │   ├── label_smooth_ce_loss.py
    │   │   ├── circle_loss.py
    │   │   ├── sigmoid_focal_loss.py
    │   │   └── smooth_l1_loss.py
    │   ├── meta_arch
    │   │   ├── __init__.py
    │   │   └── imagenet.py
    │   ├── __init__.py
    │   ├── sampling.py
    │   └── postprocessing.py
    ├── utils
    │   ├── metrics
    │   │   ├── __init__.py
    │   │   └── accuracy.py
    │   ├── distributed
    │   │   └── __init__.py
    │   ├── env
    │   │   └── __init__.py
    │   ├── benchmark
    │   │   ├── __init__.py
    │   │   └── timer.py
    │   ├── file
    │   │   ├── __init__.py
    │   │   ├── serialize.py
    │   │   └── download.py
    │   ├── dump
    │   │   ├── __init__.py
    │   │   └── history_buffer.py
    │   ├── visualizer
    │   │   ├── __init__.py
    │   │   └── show.py
    │   ├── README.md
    │   ├── __init__.py
    │   ├── imports.py
    │   ├── registry.py
    │   └── memory.py
    ├── analyser
    │   └── tide
    │   │   ├── __init__.py
    │   │   └── errors
    │   │       └── qualifiers.py
    ├── export
    │   ├── __init__.py
    │   └── README.md
    ├── evaluation
    │   ├── registry.py
    │   ├── __init__.py
    │   └── testing.py
    ├── layers
    │   ├── csrc
    │   │   ├── README.md
    │   │   ├── tree_filter
    │   │   │   ├── mst.hpp
    │   │   │   ├── rst.hpp
    │   │   │   ├── bfs.hpp
    │   │   │   ├── boruvka.hpp
    │   │   │   ├── boruvka_rst.hpp
    │   │   │   ├── refine.hpp
    │   │   │   └── rst.cu
    │   │   ├── cuda_version.cu
    │   │   ├── masked_conv2d
    │   │   │   └── masked_conv2d.h
    │   │   ├── PSROIPool
    │   │   │   └── psroi_pool_cuda.h
    │   │   ├── ml_nms
    │   │   │   └── ml_nms.h
    │   │   ├── box_iou_rotated
    │   │   │   ├── box_iou_rotated.h
    │   │   │   └── box_iou_rotated_cpu.cpp
    │   │   ├── border_align
    │   │   │   └── border_align.h
    │   │   ├── nms_rotated
    │   │   │   ├── nms_rotated.h
    │   │   │   └── nms_rotated_cpu.cpp
    │   │   ├── SwapAlign2Nat
    │   │   │   └── SwapAlign2Nat.h
    │   │   ├── sigmoid_focal_loss
    │   │   │   └── SigmoidFocalLoss.h
    │   │   ├── lars
    │   │   │   ├── adaptive_lr_cuda.cu
    │   │   │   └── adaptive_lr.h
    │   │   ├── vision_detectron.cpp
    │   │   └── ROIAlignRotated
    │   │   │   └── ROIAlignRotated.h
    │   ├── shape_spec.py
    │   ├── rotated_boxes.py
    │   ├── activation_funcs.py
    │   ├── border_align.py
    │   ├── __init__.py
    │   ├── deform_conv_with_off.py
    │   ├── swap_align2nat.py
    │   ├── psroi_pool.py
    │   ├── position_encoding.py
    │   └── roi_align_rotated.py
    ├── data
    │   ├── registry.py
    │   ├── transforms
    │   │   └── __init__.py
    │   ├── samplers
    │   │   ├── __init__.py
    │   │   └── grouped_batch_sampler.py
    │   ├── __init__.py
    │   ├── datasets
    │   │   └── __init__.py
    │   └── wrapped_dataset.py
    ├── engine
    │   ├── __init__.py
    │   └── predictor.py
    ├── __init__.py
    ├── checkpoint
    │   └── __init__.py
    ├── structures
    │   └── __init__.py
    ├── solver
    │   ├── __init__.py
    │   └── scheduler_builder.py
    └── configs
    │   ├── yolo_config.py
    │   ├── panoptic_seg_config.py
    │   ├── ssd_config.py
    │   ├── segm_config.py
    │   ├── rcnn_fpn_config.py
    │   ├── base_classification_config.py
    │   ├── dynamic_routing_config.py
    │   ├── fcos_config.py
    │   ├── keypoint_config.py
    │   ├── pointrend_config.py
    │   ├── solo_config.py
    │   ├── retinanet_config.py
    │   └── efficientdet_config.py
├── demo
    └── introduction.png
├── .pre-commit-config.yaml
├── tools
    ├── dev
    │   ├── README.md
    │   ├── run_instant_tests.sh
    │   ├── linter.sh
    │   ├── run_inference_tests.sh
    │   └── parse_results.sh
    ├── rm_files.py
    └── caffe2_converter.py
├── datasets
    ├── prepare_for_tests.sh
    └── README.md
├── .gitignore
├── cvpods_playground
    ├── fcos.res50.1x
    │   ├── net.py
    │   └── config.py
    ├── fcos.res50.1x.fix.d4
    │   ├── net.py
    │   └── config.py
    ├── fcos.res50.1x.fix.d8
    │   ├── net.py
    │   └── config.py
    ├── fcos.res50.1x.dynamic.d4.lambda-0_1
    │   ├── net.py
    │   └── config.py
    ├── fcos.res50.1x.dynamic.d8.lambda-0
    │   ├── net.py
    │   └── config.py
    ├── fcos.res50.1x.dynamic.d8.lambda-0_1
    │   ├── net.py
    │   └── config.py
    └── fcos.res50.1x.dynamic.d8.lambda-0_8
    │   ├── net.py
    │   └── config.py
├── setup.cfg
└── .clang-format


/cvpods/modeling/nn_utils/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/cvpods/utils/metrics/__init__.py:
--------------------------------------------------------------------------------
1 | from .accuracy import accuracy


--------------------------------------------------------------------------------
/cvpods/modeling/basenet/__init__.py:
--------------------------------------------------------------------------------
1 | from .basenet import basenet
2 | 


--------------------------------------------------------------------------------
/demo/introduction.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/StevenGrove/DynamicHead/HEAD/demo/introduction.png


--------------------------------------------------------------------------------
/cvpods/utils/distributed/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python3
2 | # -*- coding:utf-8 -*-
3 | 
4 | from .comm import *
5 | 


--------------------------------------------------------------------------------
/cvpods/utils/env/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python3
2 | # -*- coding:utf-8 -*-
3 | 
4 | from .collect_env import *
5 | from .env import *
6 | 


--------------------------------------------------------------------------------
/cvpods/utils/benchmark/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python3
2 | # -*- coding:utf-8 -*-
3 | 
4 | from .benchmark import *
5 | from .timer import *
6 | 


--------------------------------------------------------------------------------
/cvpods/modeling/proposal_generator/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
2 | from .rpn import RPN
3 | 


--------------------------------------------------------------------------------
/cvpods/analyser/tide/__init__.py:
--------------------------------------------------------------------------------
1 | __version__ = '1.0.0'
2 | 
3 | from . import datasets
4 | from .errors.qualifiers import *
5 | from .quantify import *
6 | 


--------------------------------------------------------------------------------
/cvpods/export/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | 
3 | from .api import *
4 | 
5 | __all__ = [k for k in globals().keys() if not k.startswith("_")]
6 | 


--------------------------------------------------------------------------------
/cvpods/evaluation/registry.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python3
2 | # -*- coding:utf-8 -*-
3 | 
4 | from cvpods.utils import Registry
5 | 
6 | EVALUATOR = Registry("evaluator")
7 | 


--------------------------------------------------------------------------------
/cvpods/utils/file/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python3
2 | # -*- coding:utf-8 -*-
3 | 
4 | from .download import *
5 | from .file_io import *
6 | from .serialize import *
7 | 


--------------------------------------------------------------------------------
/cvpods/utils/dump/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python3
2 | # -*- coding:utf-8 -*-
3 | 
4 | from .events import *
5 | from .history_buffer import *
6 | from .logger import *
7 | 


--------------------------------------------------------------------------------
/cvpods/layers/csrc/README.md:
--------------------------------------------------------------------------------
1 | 
2 | 
3 | To add a new Op:
4 | 
5 | 1. Create a new directory
6 | 2. Implement new ops there
7 | 3. Delcare its Python interface in `vision.cpp`.
8 | 


--------------------------------------------------------------------------------
/cvpods/utils/visualizer/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python3
2 | # -*- coding:utf-8 -*-
3 | 
4 | from .colormap import *
5 | from .video_visualizer import *
6 | from .visualizer import *
7 | 


--------------------------------------------------------------------------------
/cvpods/modeling/backbone/dynamic_arch/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # build for dynamic networks
3 | # @Author: yanwei.li
4 | 
5 | from .dynamic_backbone import DynamicNetwork, build_dynamic_backbone
6 | 


--------------------------------------------------------------------------------
/cvpods/utils/README.md:
--------------------------------------------------------------------------------
1 | # Utility functions
2 | 
3 | This folder contain utility functions that are not used in the
4 | core library, but are useful for building models or training
5 | code using the config system.
6 | 


--------------------------------------------------------------------------------
/cvpods/data/registry.py:
--------------------------------------------------------------------------------
1 | from cvpods.utils import Registry
2 | 
3 | DATASETS = Registry("datasets")
4 | TRANSFORMS = Registry("transforms")
5 | SAMPLERS = Registry("samplers")
6 | PATH_ROUTES = Registry("path_routes")
7 | 


--------------------------------------------------------------------------------
/cvpods/data/transforms/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
2 | from .transform import *
3 | from .transform_gen import *
4 | 
5 | __all__ = [k for k in globals().keys() if not k.startswith("_")]
6 | 


--------------------------------------------------------------------------------
/cvpods/layers/csrc/tree_filter/mst.hpp:
--------------------------------------------------------------------------------
1 | #pragma once
2 | #include <torch/extension.h>
3 | 
4 | extern at::Tensor mst_forward(
5 |             const at::Tensor & edge_index_tensor,
6 |             const at::Tensor & edge_weight_tensor,
7 |             int vertex_count);
8 | 
9 | 


--------------------------------------------------------------------------------
/cvpods/layers/csrc/tree_filter/rst.hpp:
--------------------------------------------------------------------------------
1 | #pragma once
2 | #include <torch/extension.h>
3 | 
4 | extern at::Tensor rst_forward(
5 |             const at::Tensor & edge_index_tensor,
6 |             const at::Tensor & edge_weight_tensor,
7 |             int vertex_count);
8 | 
9 | 


--------------------------------------------------------------------------------
/cvpods/layers/csrc/cuda_version.cu:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | 
 3 | #include <cuda_runtime_api.h>
 4 | 
 5 | namespace cvpods {
 6 | int get_cudart_version() {
 7 |   return CUDART_VERSION;
 8 | }
 9 | } // namespace cvpods
10 | 


--------------------------------------------------------------------------------
/cvpods/layers/csrc/tree_filter/bfs.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include <torch/extension.h>
 3 | 
 4 | extern std::tuple<at::Tensor, at::Tensor, at::Tensor>
 5 |     bfs_forward(
 6 |         const at::Tensor & edge_index_tensor,
 7 |         int max_adj_per_node
 8 |     );
 9 | 
10 | 


--------------------------------------------------------------------------------
/cvpods/modeling/roi_heads/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
2 | from .roi_heads import Res5ROIHeads, ROIHeads, StandardROIHeads, select_foreground_proposals
3 | from .rotated_fast_rcnn import RROIHeads
4 | 
5 | from . import cascade_rcnn  # isort:skip
6 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 | # -   repo: https://github.com/ambv/black
 3 | #     rev: stable
 4 | #     hooks:
 5 | #     - id: black
 6 | #       language_version: python3.6
 7 | -   repo: https://github.com/pre-commit/pre-commit-hooks
 8 |     rev: v1.2.3
 9 |     hooks:
10 |     - id: flake8
11 | 


--------------------------------------------------------------------------------
/cvpods/engine/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | 
 3 | from .hooks import *
 4 | from .launch import *
 5 | from .predictor import *
 6 | from .setup import *
 7 | from .trainer import *
 8 | 
 9 | __all__ = [k for k in globals().keys() if not k.startswith("_")]
10 | 


--------------------------------------------------------------------------------
/tools/dev/README.md:
--------------------------------------------------------------------------------
1 | 
2 | ## Some scripts for developers to use, include:
3 | 
4 | - `linter.sh`: lint the codebase before commit
5 | - `run_{inference,instant}_tests.sh`: run inference/training for a few iterations.
6 |      Note that these tests require 2 GPUs.
7 | - `parse_results.sh`: parse results from a log file.
8 | 


--------------------------------------------------------------------------------
/cvpods/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | 
 3 | from .utils import setup_environment
 4 | 
 5 | setup_environment()
 6 | 
 7 | # This line will be programatically read/write by setup.py.
 8 | # Leave them at the bottom of this file and don't touch them.
 9 | __version__ = "0.1"
10 | 


--------------------------------------------------------------------------------
/cvpods/checkpoint/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 3 | # File:
 4 | 
 5 | 
 6 | from . import catalog as _UNUSED  # register the handler
 7 | from .checkpoint import Checkpointer, PeriodicCheckpointer
 8 | from .detection_checkpoint import DetectionCheckpointer
 9 | 
10 | __all__ = ["Checkpointer", "PeriodicCheckpointer", "DetectionCheckpointer"]
11 | 


--------------------------------------------------------------------------------
/cvpods/export/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | This directory contains code to prepare a detectron2 model for deployment.
 3 | Currently it supports exporting a detectron2 model to Caffe2 format through ONNX.
 4 | 
 5 | Please see [documentation](https://detectron2.readthedocs.io/tutorials/deployment.html) for its usage.
 6 | 
 7 | 
 8 | ### Acknowledgements
 9 | 
10 | Thanks to Mobile Vision team at Facebook for developing the conversion tools.
11 | 


--------------------------------------------------------------------------------
/cvpods/modeling/nn_utils/scale_grad.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python3
 2 | # -*- coding:utf-8 -*-
 3 | from torch.autograd.function import Function
 4 | 
 5 | 
 6 | class _ScaleGradient(Function):
 7 | 
 8 |     @staticmethod
 9 |     def forward(ctx, input, scale):
10 |         ctx.scale = scale
11 |         return input
12 | 
13 |     @staticmethod
14 |     def backward(ctx, grad_output):
15 |         return grad_output * ctx.scale, None
16 | 


--------------------------------------------------------------------------------
/cvpods/modeling/losses/__init__.py:
--------------------------------------------------------------------------------
 1 | from .dice_loss import dice_loss
 2 | from .focal_loss import (
 3 |     sigmoid_focal_loss,
 4 |     sigmoid_focal_loss_jit,
 5 |     sigmoid_focal_loss_star,
 6 |     sigmoid_focal_loss_star_jit
 7 | )
 8 | from .iou_loss import IOULoss, iou_loss
 9 | from .label_smooth_ce_loss import LabelSmoothCELoss, label_smooth_ce_loss
10 | from .reg_l1_loss import reg_l1_loss
11 | from .sigmoid_focal_loss import SigmoidFocalLoss, sigmoid_focal_loss_cuda
12 | from .smooth_l1_loss import smooth_l1_loss
13 | 


--------------------------------------------------------------------------------
/cvpods/structures/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | from .boxes import Boxes, BoxMode, pairwise_ioa, pairwise_iou
 3 | from .image_list import ImageList
 4 | from .instances import Instances
 5 | from .keypoints import Keypoints, heatmaps_to_keypoints
 6 | from .masks import BitMasks, PolygonMasks, polygons_to_bitmask, rasterize_polygons_within_box
 7 | from .rotated_boxes import RotatedBoxes
 8 | from .rotated_boxes import pairwise_iou as pairwise_iou_rotated
 9 | 
10 | __all__ = [k for k in globals().keys() if not k.startswith("_")]
11 | 


--------------------------------------------------------------------------------
/cvpods/data/samplers/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | from .distributed_sampler import InferenceSampler, RepeatFactorTrainingSampler, TrainingSampler
 3 | from .grouped_batch_sampler import GroupedBatchSampler
 4 | from .sampler import DistributedGroupSampler, DistributedSampler, GroupSampler
 5 | 
 6 | __all__ = [
 7 |     "GroupedBatchSampler",
 8 |     "TrainingSampler",
 9 |     "InferenceSampler",
10 |     "RepeatFactorTrainingSampler",
11 |     "DistributedSampler",
12 |     "GroupSampler",
13 |     "DistributedGroupSampler",
14 | ]
15 | 


--------------------------------------------------------------------------------
/cvpods/data/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | from .build import (
 3 |     build_dataset,
 4 |     build_detection_test_loader,
 5 |     build_detection_train_loader,
 6 |     build_transform_gen
 7 | )
 8 | from .registry import DATASETS, SAMPLERS, TRANSFORMS
 9 | from .wrapped_dataset import ConcatDataset, RepeatDataset
10 | 
11 | from . import transforms  # isort:skip
12 | # ensure the builtin datasets are registered
13 | from . import datasets, samplers  # isort:skip
14 | 
15 | 
16 | __all__ = [k for k in globals().keys() if not k.startswith("_")]
17 | 


--------------------------------------------------------------------------------
/datasets/prepare_for_tests.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -e
 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 3 | 
 4 | # Download some files needed for running tests.
 5 | 
 6 | cd "${0%/*}"
 7 | 
 8 | BASE=https://dl.fbaipublicfiles.com/detectron2
 9 | mkdir -p coco/annotations
10 | 
11 | for anno in instances_val2017_100 \
12 |   person_keypoints_val2017_100 \
13 |   instances_minival2014_100 \
14 |   person_keypoints_minival2014_100; do
15 | 
16 |   dest=coco/annotations/$anno.json
17 |   [[ -s $dest ]] && {
18 |     echo "$dest exists. Skipping ..."
19 |   } || {
20 |     wget $BASE/annotations/coco/$anno.json -O $dest
21 |   }
22 | done
23 | 


--------------------------------------------------------------------------------
/cvpods/utils/metrics/accuracy.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | @torch.no_grad()
 5 | def accuracy(output, target, topk=(1,)):
 6 |     """Computes the precision@k for the specified values of k"""
 7 |     if target.numel() == 0:
 8 |         return [torch.zeros([], device=output.device)]
 9 |     maxk = max(topk)
10 |     batch_size = target.size(0)
11 | 
12 |     _, pred = output.topk(maxk, 1, True, True)
13 |     pred = pred.t()
14 |     correct = pred.eq(target.view(1, -1).expand_as(pred))
15 | 
16 |     res = []
17 |     for k in topk:
18 |         correct_k = correct[:k].view(-1).float().sum(0)
19 |         res.append(correct_k.mul_(100.0 / batch_size))
20 |     return res
21 | 


--------------------------------------------------------------------------------
/cvpods/layers/csrc/masked_conv2d/masked_conv2d.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include <torch/types.h>
 3 | 
 4 | namespace cvpods {
 5 |     
 6 | void masked_im2col_forward(const at::Tensor im, const at::Tensor mask_h_idx,
 7 |                            const at::Tensor mask_w_idx, at::Tensor col,
 8 |                            const int kernel_h, const int kernel_w,
 9 |                            const int pad_h, const int pad_w);
10 | 
11 | void masked_col2im_forward(const at::Tensor col, const at::Tensor mask_h_idx,
12 |                            const at::Tensor mask_w_idx, at::Tensor im, int height,
13 |                            int width, int channels);
14 | 
15 | } // namespace cvpods
16 | 


--------------------------------------------------------------------------------
/cvpods/solver/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | from .build import build_lr_scheduler, build_optimizer
 3 | from .lars import LARS
 4 | from .optimizer_builder import (
 5 |     OPTIMIZER_BUILDER,
 6 |     AdamBuilder,
 7 |     AdamWBuilder,
 8 |     OptimizerBuilder,
 9 |     SGDBuilder,
10 |     SGDGateLRBuilder
11 | )
12 | from .scheduler_builder import (
13 |     SCHEDULER_BUILDER,
14 |     BaseSchedulerBuilder,
15 |     LambdaLRBuilder,
16 |     OneCycleLRBuilder,
17 |     PolyLRBuilder,
18 |     WarmupCosineLR,
19 |     WarmupCosineLRBuilder,
20 |     WarmupMultiStepLR,
21 |     WarmupMultiStepLRBuilder
22 | )
23 | 
24 | __all__ = [k for k in globals().keys() if not k.startswith("_")]
25 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # output dir
 2 | output
 3 | instant_test_output
 4 | inference_test_output
 5 | 
 6 | *.jpg
 7 | *.png
 8 | *.txt
 9 | 
10 | # compilation and distribution
11 | __pycache__
12 | _ext
13 | *.pyc
14 | *.so
15 | cvpods.egg-info/
16 | build/
17 | dist/
18 | wheels/
19 | 
20 | tools/cvpods_test
21 | tools/cvpods_train
22 | 
23 | # pytorch/python/numpy formats
24 | *.pth
25 | *.pkl
26 | *.npy
27 | 
28 | # ipython/jupyter notebooks
29 | *.ipynb
30 | **/.ipynb_checkpoints/
31 | 
32 | # Editor temporaries
33 | *.swn
34 | *.swo
35 | *.swp
36 | *~
37 | 
38 | # temporary files
39 | *.DS_Store
40 | 
41 | # IDE editor settings
42 | .idea
43 | .vscode/
44 | 
45 | # project dirs
46 | /cvpods/model_zoo/configs
47 | /datasets
48 | /models
49 | /playground
50 | 


--------------------------------------------------------------------------------
/cvpods/data/datasets/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Megvii, Inc. and its affiliates. All Rights Reserved
 2 | 
 3 | from .citypersons import CityPersonsDataset
 4 | from .cityscapes import CityScapesDataset
 5 | from .coco import COCODataset
 6 | from .crowdhuman import CrowdHumanDataset
 7 | from .imagenet import ImageNetDataset
 8 | from .lvis import LVISDataset
 9 | from .objects365 import Objects365Dataset
10 | from .voc import VOCDataset
11 | from .widerface import WiderFaceDataset
12 | 
13 | __all__ = [
14 |     "COCODataset",
15 |     "VOCDataset",
16 |     "CityScapesDataset",
17 |     "ImageNetDataset",
18 |     "WiderFaceDataset",
19 |     "LVISDataset",
20 |     "CityPersonsDataset",
21 |     "Objects365Dataset",
22 |     "CrowdHumanDataset",
23 | ]
24 | 


--------------------------------------------------------------------------------
/cvpods/modeling/losses/reg_l1_loss.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python3
 2 | # -*- coding:utf-8 -*-
 3 | import torch.nn as nn
 4 | import torch.nn.functional as F
 5 | 
 6 | from cvpods.modeling.nn_utils.feature_utils import gather_feature
 7 | 
 8 | 
 9 | class reg_l1_loss(nn.Module):
10 | 
11 |     def __init__(self):
12 |         super(reg_l1_loss, self).__init__()
13 | 
14 |     def forward(self, output, mask, index, target):
15 |         pred = gather_feature(output, index, use_transform=True)
16 |         mask = mask.unsqueeze(dim=2).expand_as(pred).float()
17 |         # loss = F.l1_loss(pred * mask, target * mask, reduction='elementwise_mean')
18 |         loss = F.l1_loss(pred * mask, target * mask, reduction='sum')
19 |         loss = loss / (mask.sum() + 1e-4)
20 |         return loss
21 | 


--------------------------------------------------------------------------------
/cvpods/layers/shape_spec.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 3 | from collections import namedtuple
 4 | 
 5 | 
 6 | class ShapeSpec(namedtuple("_ShapeSpec", ["channels", "height", "width", "stride"])):
 7 |     """
 8 |     A simple structure that contains basic shape specification about a tensor.
 9 |     It is often used as the auxiliary inputs/outputs of models,
10 |     to obtain the shape inference ability among pytorch modules.
11 | 
12 |     Attributes:
13 |         channels:
14 |         height:
15 |         width:
16 |         stride:
17 |     """
18 | 
19 |     def __new__(cls, *, channels=None, height=None, width=None, stride=None):
20 |         return super().__new__(cls, channels, height, width, stride)
21 | 


--------------------------------------------------------------------------------
/cvpods/layers/csrc/tree_filter/boruvka.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | // a structure to represent a weighted edge in graph 
 4 | struct Edge 
 5 | { 
 6 |     int src, dest;
 7 |     float weight; 
 8 | }; 
 9 | 
10 | // a structure to represent a connected, undirected 
11 | // and weighted graph as a collection of edges. 
12 | struct Graph 
13 | { 
14 |     // V-> Number of vertices, E-> Number of edges
15 |     int V, E;
16 | 
17 |     // graph is represented as an array of edges.
18 |     // Since the graph is undirected, the edge
19 |     // from src to dest is also edge from dest
20 |     // to src. Both are counted as 1 edge here.
21 |     Edge* edge;
22 | }; 
23 | 
24 | extern struct Graph* createGraph(int V, int E); 
25 | extern void boruvkaMST(struct Graph* graph, int * edge_out);
26 | 
27 | 


--------------------------------------------------------------------------------
/cvpods/layers/csrc/tree_filter/boruvka_rst.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | // a structure to represent a weighted edge in graph 
 4 | struct Edge 
 5 | { 
 6 |     int src, dest;
 7 |     float weight; 
 8 | }; 
 9 | 
10 | // a structure to represent a connected, undirected 
11 | // and weighted graph as a collection of edges. 
12 | struct Graph 
13 | { 
14 |     // V-> Number of vertices, E-> Number of edges
15 |     int V, E;
16 | 
17 |     // graph is represented as an array of edges.
18 |     // Since the graph is undirected, the edge
19 |     // from src to dest is also edge from dest
20 |     // to src. Both are counted as 1 edge here.
21 |     Edge* edge;
22 | }; 
23 | 
24 | extern struct Graph* create_graph(int V, int E); 
25 | extern void boruvka_rst(struct Graph* graph, int * edge_out);
26 | 
27 | 


--------------------------------------------------------------------------------
/cvpods/layers/rotated_boxes.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | from __future__ import absolute_import, division, print_function, unicode_literals
 3 | 
 4 | # import torch
 5 | from cvpods import _C
 6 | 
 7 | 
 8 | def pairwise_iou_rotated(boxes1, boxes2):
 9 |     """
10 |     Return intersection-over-union (Jaccard index) of boxes.
11 | 
12 |     Both sets of boxes are expected to be in
13 |     (x_center, y_center, width, height, angle) format.
14 | 
15 |     Arguments:
16 |         boxes1 (Tensor[N, 5])
17 |         boxes2 (Tensor[M, 5])
18 | 
19 |     Returns:
20 |         iou (Tensor[N, M]): the NxM matrix containing the pairwise
21 |             IoU values for every element in boxes1 and boxes2
22 |     """
23 |     return _C.box_iou_rotated(boxes1, boxes2)
24 | 


--------------------------------------------------------------------------------
/cvpods/modeling/nn_utils/feature_utils.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python3
 2 | # -*- coding:utf-8 -*-
 3 | 
 4 | 
 5 | def gather_feature(fmap, index, mask=None, use_transform=False):
 6 |     """
 7 |     used for Centernet
 8 |     """
 9 |     if use_transform:
10 |         # change a (N, C, H, W) tenor to (N, HxW, C) shape
11 |         batch, channel = fmap.shape[:2]
12 |         fmap = fmap.view(batch, channel, -1).permute((0, 2, 1)).contiguous()
13 | 
14 |     dim = fmap.size(-1)
15 |     index  = index.unsqueeze(len(index.shape)).expand(*index.shape, dim)
16 |     fmap = fmap.gather(dim=1, index=index)
17 |     if mask is not None:
18 |         # this part is not called in Res18 dcn COCO
19 |         mask = mask.unsqueeze(2).expand_as(fmap)
20 |         fmap = fmap[mask]
21 |         fmap = fmap.reshape(-1, dim)
22 |     return fmap
23 | 


--------------------------------------------------------------------------------
/cvpods/layers/csrc/PSROIPool/psroi_pool_cuda.h:
--------------------------------------------------------------------------------
 1 | #include <torch/types.h>
 2 | #include <torch/extension.h>
 3 | #include <ATen/ATen.h>
 4 | #include <THC/THC.h>
 5 | #include <math.h>
 6 | #include <stdio.h>
 7 | 
 8 | namespace cvpods {
 9 | at::Tensor psroi_pooling_forward_cuda(
10 |     at::Tensor& features,
11 |     at::Tensor& rois,
12 |     at::Tensor& mappingchannel,
13 |     const int pooled_height,
14 |     const int pooled_width,
15 |     const float spatial_scale,
16 |     const int group_size,
17 |     const int output_dim);
18 | 
19 | at::Tensor psroi_pooling_backward_cuda(
20 |     at::Tensor& top_grad,
21 |     at::Tensor& rois,
22 |     at::Tensor& mappingchannel,
23 |     const int batch_size,
24 |     const int bottom_dim,
25 |     const int bottom_height,
26 |     const int bottom_width,
27 |     const float spatial_scale);
28 | }
29 | 


--------------------------------------------------------------------------------
/cvpods/modeling/meta_arch/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 3 | 
 4 | # import all the meta_arch, so they will be registered
 5 | 
 6 | from .borderdet import BorderDet
 7 | from .centernet import CenterNet
 8 | from .dynamic4seg import DynamicNet4Seg
 9 | from .efficientdet import EfficientDet
10 | from .fcn import FCNHead
11 | from .fcos import FCOS, FCOSHead, FCOSDynamicHead, FCOSFixHead
12 | from .free_anchor import FreeAnchor
13 | from .panoptic_fpn import PanopticFPN
14 | from .pointrend import CoarseMaskHead, PointRendROIHeads, PointRendSemSegHead, StandardPointHead
15 | from .rcnn import GeneralizedRCNN, ProposalNetwork
16 | from .reppoints import RepPoints
17 | from .retinanet import RetinaNet
18 | from .semantic_seg import SemanticSegmentor, SemSegFPNHead
19 | from .ssd import SSD
20 | from .tensormask import TensorMask
21 | from .yolov3 import YOLOv3
22 | 


--------------------------------------------------------------------------------
/cvpods/modeling/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | import torch
 3 | 
 4 | from cvpods.layers import ShapeSpec
 5 | 
 6 | # from .anchor_generator import build_anchor_generator
 7 | from .backbone import FPN, Backbone, ResNet, ResNetBlockBase, build_resnet_backbone, make_stage
 8 | from .meta_arch import GeneralizedRCNN, PanopticFPN, ProposalNetwork, RetinaNet, SemanticSegmentor
 9 | from .postprocessing import detector_postprocess
10 | from .roi_heads import ROIHeads, StandardROIHeads
11 | from .test_time_augmentation import DatasetMapperTTA, GeneralizedRCNNWithTTA, TTAWarper
12 | 
13 | _EXCLUDE = {"torch", "ShapeSpec"}
14 | __all__ = [k for k in globals().keys() if k not in _EXCLUDE and not k.startswith("_")]
15 | 
16 | assert (
17 |     torch.Tensor([1]) == torch.Tensor([2])
18 | ).dtype == torch.bool, ("Your Pytorch is too old. "
19 |                         "Please update to contain https://github.com/pytorch/pytorch/pull/21113")
20 | 


--------------------------------------------------------------------------------
/cvpods/layers/csrc/ml_nms/ml_nms.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | #pragma once
 3 | #include <torch/extension.h>
 4 | 
 5 | namespace cvpods {
 6 | #ifdef WITH_CUDA
 7 | at::Tensor ml_nms_cuda(const at::Tensor boxes, float nms_overlap_thresh);
 8 | #endif
 9 | 
10 | 
11 | inline at::Tensor ml_nms(const at::Tensor& dets,
12 |                   const at::Tensor& scores,
13 |                   const at::Tensor& labels,
14 |                   const float threshold) {
15 | 
16 |   if (dets.type().is_cuda()) {
17 | #ifdef WITH_CUDA
18 |     // TODO raise error if not compiled with CUDA
19 |     if (dets.numel() == 0)
20 |       return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU));
21 |     auto b = at::cat({dets, scores.unsqueeze(1), labels.unsqueeze(1)}, 1);
22 |     return ml_nms_cuda(b, threshold);
23 | #else
24 |     AT_ERROR("Not compiled with GPU support");
25 | #endif
26 |   }
27 |   AT_ERROR("CPU version not implemented");
28 | }
29 | 
30 | }
31 | 


--------------------------------------------------------------------------------
/cvpods/modeling/backbone/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | 
 3 | from .backbone import Backbone
 4 | from .bifpn import BiFPN, build_efficientnet_bifpn_backbone
 5 | from .darknet import Darknet, build_darknet_backbone
 6 | from .dynamic_arch import DynamicNetwork, build_dynamic_backbone
 7 | from .efficientnet import EfficientNet, build_efficientnet_backbone
 8 | from .fpn import (
 9 |     FPN,
10 |     _assert_strides_are_log2_contiguous,
11 |     build_retinanet_mobilenetv2_fpn_p5_backbone,
12 |     build_retinanet_resnet_fpn_p5_backbone
13 | )
14 | from .mobilenet import InvertedResBlock, MobileNetV2, MobileStem, build_mobilenetv2_backbone
15 | from .resnet import ResNet, ResNetBlockBase, build_resnet_backbone, make_stage
16 | # TODO can expose more resnet blocks after careful consideration
17 | from .shufflenet import ShuffleNetV2, ShuffleV2Block, build_shufflenetv2_backbone
18 | from .snet import SNet, build_snet_backbone
19 | from .transformer import Transformer


--------------------------------------------------------------------------------
/tools/dev/run_instant_tests.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -e
 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 3 | 
 4 | BIN="python tools/train_net.py"
 5 | OUTPUT="instant_test_output"
 6 | NUM_GPUS=2
 7 | 
 8 | CFG_LIST=( "${@:1}" )
 9 | if [ ${#CFG_LIST[@]} -eq 0 ]; then
10 |   CFG_LIST=( ./configs/quick_schedules/*instant_test.yaml )
11 | fi
12 | 
13 | echo "========================================================================"
14 | echo "Configs to run:"
15 | echo "${CFG_LIST[@]}"
16 | echo "========================================================================"
17 | 
18 | for cfg in "${CFG_LIST[@]}"; do
19 |     echo "========================================================================"
20 |     echo "Running $cfg ..."
21 |     echo "========================================================================"
22 |     $BIN --num-gpus $NUM_GPUS --config-file "$cfg" \
23 |       SOLVER.IMS_PER_BATCH $(($NUM_GPUS * 2)) \
24 |       OUTPUT_DIR "$OUTPUT"
25 |     rm -rf "$OUTPUT"
26 | done
27 | 
28 | 


--------------------------------------------------------------------------------
/tools/dev/linter.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -e
 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 3 | 
 4 | # Run this script at project root by "./dev/linter.sh" before you commit
 5 | 
 6 | {
 7 | 	black --version | grep "19.3b0" > /dev/null
 8 | } || {
 9 | 	echo "Linter requires black==19.3b0 !"
10 | 	exit 1
11 | }
12 | 
13 | set -v
14 | 
15 | echo "Running isort ..."
16 | isort -y --multi-line 3 --trailing-comma -sp . --skip datasets --skip docs --skip-glob '*/__init__.py' --atomic
17 | 
18 | echo "Running black ..."
19 | black -l 100 .
20 | 
21 | echo "Running flake8 ..."
22 | if [ -x "$(command -v flake8-3)" ]; then
23 |   flake8-3 .
24 | else
25 |   python3 -m flake8 .
26 | fi
27 | 
28 | # echo "Running mypy ..."
29 | # Pytorch does not have enough type annotations
30 | # mypy cvpods/solver cvpods/structures cvpods/config
31 | 
32 | echo "Running clang-format ..."
33 | find . -regex ".*\.\(cpp\|c\|cc\|cu\|cxx\|h\|hh\|hpp\|hxx\|tcc\|mm\|m\)" -print0 | xargs -0 clang-format -i
34 | 
35 | command -v arc > /dev/null && arc lint
36 | 


--------------------------------------------------------------------------------
/cvpods/evaluation/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | from .build import build_evaluator
 3 | from .citypersons_evaluation import CityPersonsEvaluator
 4 | from .cityscapes_evaluation import CityscapesEvaluator
 5 | from .classification_evaluation import ClassificationEvaluator
 6 | from .coco_evaluation import COCOEvaluator
 7 | from .crowdhuman_evaluation import CrowdHumanEvaluator
 8 | from .evaluator import DatasetEvaluator, DatasetEvaluators, inference_context, inference_on_dataset
 9 | from .lvis_evaluation import LVISEvaluator
10 | from .panoptic_evaluation import COCOPanopticEvaluator
11 | from .pascal_voc_evaluation import PascalVOCDetectionEvaluator
12 | from .registry import EVALUATOR
13 | from .rotated_coco_evaluation import RotatedCOCOEvaluator
14 | from .sem_seg_evaluation import SemSegEvaluator
15 | from .testing import print_csv_format, verify_results
16 | from .widerface_evaluation import WiderFaceEvaluator
17 | 
18 | __all__ = [k for k in globals().keys() if not k.startswith("_")]
19 | 


--------------------------------------------------------------------------------
/cvpods/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python3
 2 | # -*- coding:utf-8 -*-
 3 | 
 4 | from .benchmark import Timer, benchmark, timeit
 5 | from .distributed import comm
 6 | from .dump import (
 7 |     CommonMetricPrinter,
 8 |     EventStorage,
 9 |     EventWriter,
10 |     HistoryBuffer,
11 |     JSONWriter,
12 |     TensorboardXWriter,
13 |     create_small_table,
14 |     create_table_with_header,
15 |     get_event_storage,
16 |     log_every_n,
17 |     log_every_n_seconds,
18 |     log_first_n,
19 |     setup_logger
20 | )
21 | from .env import collect_env_info, seed_all_rng, setup_custom_environment, setup_environment
22 | from .file import PathHandler, PathManager, PicklableWrapper, download, file_lock, get_cache_dir
23 | from .imports import dynamic_import
24 | from .memory import retry_if_cuda_oom
25 | from .metrics import accuracy
26 | from .registry import Registry
27 | from .visualizer import ColorMode, VideoVisualizer, VisImage, Visualizer, colormap, random_color
28 | 
29 | __all__ = [k for k in globals().keys() if not k.startswith("_")]
30 | 


--------------------------------------------------------------------------------
/cvpods/utils/imports.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- encoding: utf-8 -*-
 3 | '''
 4 | @File               :   imports.py
 5 | @Time               :   2020/05/07 23:59:19
 6 | @Author             :   Benjin Zhu
 7 | @Contact            :   poodarchu@gmail.com
 8 | @Last Modified by   :   Benjin Zhu
 9 | @Last Modified time :   2020/05/07 23:59:19
10 | '''
11 | 
12 | import imp
13 | 
14 | 
15 | def dynamic_import(config_name, config_path):
16 |     """
17 |     Dynamic import a project.
18 | 
19 |     Args:
20 |         config_name (str): module name
21 |         config_path (str): the dir that contains the .py with this module.
22 | 
23 |     Examples::
24 |         >>> root = "/path/to/retinanet/"
25 |         >>> project = root + "retinanet.res50.fpn.coco.800size.1x.mrcnn_sigmoid"
26 |         >>> cfg = dynamic_import("config", project).config
27 |         >>> net = dynamic_import("net", project)
28 |     """
29 |     fp, pth, desc = imp.find_module(config_name, [config_path])
30 | 
31 |     return imp.load_module(config_name, fp, pth, desc)
32 | 


--------------------------------------------------------------------------------
/cvpods/layers/csrc/box_iou_rotated/box_iou_rotated.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | #pragma once
 3 | #include <torch/types.h>
 4 | 
 5 | namespace cvpods {
 6 | 
 7 | at::Tensor box_iou_rotated_cpu(
 8 |     const at::Tensor& boxes1,
 9 |     const at::Tensor& boxes2);
10 | 
11 | #ifdef WITH_CUDA
12 | at::Tensor box_iou_rotated_cuda(
13 |     const at::Tensor& boxes1,
14 |     const at::Tensor& boxes2);
15 | #endif
16 | 
17 | // Interface for Python
18 | // inline is needed to prevent multiple function definitions when this header is
19 | // included by different cpps
20 | inline at::Tensor box_iou_rotated(
21 |     const at::Tensor& boxes1,
22 |     const at::Tensor& boxes2) {
23 |   assert(boxes1.device().is_cuda() == boxes2.device().is_cuda());
24 |   if (boxes1.device().is_cuda()) {
25 | #ifdef WITH_CUDA
26 |     return box_iou_rotated_cuda(boxes1, boxes2);
27 | #else
28 |     AT_ERROR("Not compiled with GPU support");
29 | #endif
30 |   }
31 | 
32 |   return box_iou_rotated_cpu(boxes1, boxes2);
33 | }
34 | 
35 | } // namespace cvpods
36 | 


--------------------------------------------------------------------------------
/cvpods/modeling/losses/dice_loss.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | def dice_loss(input, target):
 5 |     r"""
 6 |     Dice loss defined in the V-Net paper as:
 7 | 
 8 |     Loss_dice = 1 - D
 9 | 
10 |             2 * sum(p_i * g_i)
11 |     D = ------------------------------
12 |          sum(p_i ^ 2) + sum(g_i ^ 2)
13 | 
14 |     where the sums run over the N mask pixels (i = 1 ... N), of the predicted binary segmentation
15 |     pixel p_i ∈ P and the ground truth binary pixel g_i ∈ G.
16 | 
17 |     Args:
18 |         input (Tensor): predicted binary mask, each pixel value should be in range [0, 1].
19 |         target (Tensor): ground truth binary mask.
20 | 
21 |     Returns:
22 |         Tensor: dice loss.
23 |     """
24 |     assert input.shape[-2:] == target.shape[-2:]
25 |     input = input.view(input.size(0), -1).float()
26 |     target = target.view(target.size(0), -1).float()
27 | 
28 |     d = (
29 |         2 * torch.sum(input * target, dim=1)
30 |     ) / (
31 |         torch.sum(input * input, dim=1) + torch.sum(target * target, dim=1) + 1e-4
32 |     )
33 | 
34 |     return 1 - d
35 | 


--------------------------------------------------------------------------------
/cvpods/utils/file/serialize.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | import cloudpickle
 3 | 
 4 | 
 5 | class PicklableWrapper(object):
 6 |     """
 7 |     Wrap an object to make it more picklable, note that it uses
 8 |     heavy weight serialization libraries that are slower than pickle.
 9 |     It's best to use it only on closures (which are usually not picklable).
10 | 
11 |     This is a simplified version of
12 |     https://github.com/joblib/joblib/blob/master/joblib/externals/loky/cloudpickle_wrapper.py
13 |     """
14 | 
15 |     def __init__(self, obj):
16 |         self._obj = obj
17 | 
18 |     def __reduce__(self):
19 |         s = cloudpickle.dumps(self._obj)
20 |         return cloudpickle.loads, (s,)
21 | 
22 |     def __call__(self, *args, **kwargs):
23 |         return self._obj(*args, **kwargs)
24 | 
25 |     def __getattr__(self, attr):
26 |         # Ensure that the wrapped object can be used seamlessly as the previous object.
27 |         if attr not in ["_obj"]:
28 |             return getattr(self._obj, attr)
29 |         return getattr(self, attr)
30 | 


--------------------------------------------------------------------------------
/cvpods/layers/csrc/border_align/border_align.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include <torch/types.h>
 3 | #include <torch/extension.h>
 4 | #include <ATen/ATen.h>
 5 | 
 6 | namespace cvpods {
 7 | 
 8 | at::Tensor border_align_cuda_forward(
 9 |     const at::Tensor& feature,
10 |     const at::Tensor& boxes,
11 |     const at::Tensor& wh,
12 |     const int pool_size);
13 | 
14 | 
15 | at::Tensor border_align_cuda_backward(
16 |     const at::Tensor& gradOutput,
17 |     const at::Tensor& feature,
18 |     const at::Tensor& boxes,
19 |     const at::Tensor& wh,
20 |     const int pool_size);
21 | 
22 | 
23 | at::Tensor BorderAlign_Forward(
24 |     const at::Tensor& feature,
25 |     const at::Tensor& boxes,
26 |     const at::Tensor& wh,
27 |     const int pool_size) {
28 |     return border_align_cuda_forward(feature, boxes, wh, pool_size);
29 | }
30 | 
31 | 
32 | at::Tensor BorderAlign_Backward(
33 |     const at::Tensor& gradOutput,
34 |     const at::Tensor& feature,
35 |     const at::Tensor& boxes,
36 |     const at::Tensor& wh,
37 |     const int pool_size) {
38 |     return border_align_cuda_backward(gradOutput, feature, boxes, wh, pool_size);
39 | }
40 | 
41 | } // namespace cvpods


--------------------------------------------------------------------------------
/cvpods/layers/csrc/nms_rotated/nms_rotated.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | #pragma once
 3 | #include <torch/types.h>
 4 | 
 5 | namespace cvpods {
 6 | 
 7 | at::Tensor nms_rotated_cpu(
 8 |     const at::Tensor& dets,
 9 |     const at::Tensor& scores,
10 |     const float iou_threshold);
11 | 
12 | #ifdef WITH_CUDA
13 | at::Tensor nms_rotated_cuda(
14 |     const at::Tensor& dets,
15 |     const at::Tensor& scores,
16 |     const float iou_threshold);
17 | #endif
18 | 
19 | // Interface for Python
20 | // inline is needed to prevent multiple function definitions when this header is
21 | // included by different cpps
22 | inline at::Tensor nms_rotated(
23 |     const at::Tensor& dets,
24 |     const at::Tensor& scores,
25 |     const float iou_threshold) {
26 |   assert(dets.device().is_cuda() == scores.device().is_cuda());
27 |   if (dets.device().is_cuda()) {
28 | #ifdef WITH_CUDA
29 |     return nms_rotated_cuda(dets, scores, iou_threshold);
30 | #else
31 |     AT_ERROR("Not compiled with GPU support");
32 | #endif
33 |   }
34 | 
35 |   return nms_rotated_cpu(dets, scores, iou_threshold);
36 | }
37 | 
38 | } // namespace cvpods
39 | 


--------------------------------------------------------------------------------
/cvpods/layers/activation_funcs.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | 
 5 | # Ref:
 6 | # https://medium.com/the-artificial-impostor/more-memory-efficient-swish-activation-function-e07c22c12a76
 7 | class SwishImplementation(torch.autograd.Function):
 8 |     """
 9 |     Swish activation function memory-efficient implementation.
10 | 
11 |     This implementation explicitly processes the gradient, it keeps a copy of the input tensor,
12 |     and uses it to calculate the gradient during the back-propagation phase.
13 |     """
14 |     @staticmethod
15 |     def forward(ctx, i):
16 |         result = i * torch.sigmoid(i)
17 |         ctx.save_for_backward(i)
18 |         return result
19 | 
20 |     @staticmethod
21 |     def backward(ctx, grad_output):
22 |         i = ctx.saved_variables[0]
23 |         sigmoid_i = torch.sigmoid(i)
24 |         return grad_output * (sigmoid_i * (1 + i * (1 - sigmoid_i)))
25 | 
26 | 
27 | class MemoryEfficientSwish(nn.Module):
28 |     def forward(self, x):
29 |         return SwishImplementation.apply(x)
30 | 
31 | 
32 | class Swish(nn.Module):
33 |     """
34 |     Implement the Swish activation function.
35 |     See: https://arxiv.org/abs/1710.05941 for more details.
36 |     """
37 |     def forward(self, x):
38 |         return x * torch.sigmoid(x)
39 | 


--------------------------------------------------------------------------------
/cvpods_playground/fcos.res50.1x/net.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | from cvpods.layers import ShapeSpec
 4 | from cvpods.modeling.anchor_generator import ShiftGenerator
 5 | from cvpods.modeling.backbone import Backbone
 6 | from cvpods.modeling.backbone.fpn import build_retinanet_resnet_fpn_backbone
 7 | from cvpods.modeling.meta_arch import FCOS, FCOSHead
 8 | 
 9 | 
10 | def build_backbone(cfg, input_shape=None):
11 |     """
12 |     Build a backbone from `cfg.MODEL.BACKBONE.NAME`.
13 | 
14 |     Returns:
15 |         an instance of :class:`Backbone`
16 |     """
17 |     if input_shape is None:
18 |         input_shape = ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN))
19 | 
20 |     backbone = build_retinanet_resnet_fpn_backbone(cfg, input_shape)
21 |     assert isinstance(backbone, Backbone)
22 |     return backbone
23 | 
24 | 
25 | def build_shift_generator(cfg, input_shape):
26 |     return ShiftGenerator(cfg, input_shape)
27 | 
28 | 
29 | def build_head(cfg, input_shape):
30 |     return FCOSHead(cfg, input_shape)
31 | 
32 | 
33 | def build_model(cfg):
34 |     cfg.build_backbone = build_backbone
35 |     cfg.build_shift_generator = build_shift_generator
36 |     cfg.build_head = build_head
37 | 
38 |     model = FCOS(cfg)
39 |     logger = logging.getLogger(__name__)
40 |     logger.info("Model:\n{}".format(model))
41 |     return model
42 | 


--------------------------------------------------------------------------------
/cvpods_playground/fcos.res50.1x.fix.d4/net.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | from cvpods.layers import ShapeSpec
 4 | from cvpods.modeling.anchor_generator import ShiftGenerator
 5 | from cvpods.modeling.backbone import Backbone
 6 | from cvpods.modeling.backbone.fpn import build_retinanet_resnet_fpn_backbone
 7 | from cvpods.modeling.meta_arch import FCOS, FCOSFixHead 
 8 | 
 9 | 
10 | def build_backbone(cfg, input_shape=None):
11 |     """
12 |     Build a backbone from `cfg.MODEL.BACKBONE.NAME`.
13 | 
14 |     Returns:
15 |         an instance of :class:`Backbone`
16 |     """
17 |     if input_shape is None:
18 |         input_shape = ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN))
19 | 
20 |     backbone = build_retinanet_resnet_fpn_backbone(cfg, input_shape)
21 |     assert isinstance(backbone, Backbone)
22 |     return backbone
23 | 
24 | 
25 | def build_shift_generator(cfg, input_shape):
26 |     return ShiftGenerator(cfg, input_shape)
27 | 
28 | 
29 | def build_head(cfg, input_shape):
30 |     return FCOSFixHead(cfg, input_shape)
31 | 
32 | 
33 | def build_model(cfg):
34 |     cfg.build_backbone = build_backbone
35 |     cfg.build_shift_generator = build_shift_generator
36 |     cfg.build_head = build_head
37 | 
38 |     model = FCOS(cfg)
39 |     logger = logging.getLogger(__name__)
40 |     logger.info("Model:\n{}".format(model))
41 |     return model
42 | 


--------------------------------------------------------------------------------
/cvpods_playground/fcos.res50.1x.fix.d8/net.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | from cvpods.layers import ShapeSpec
 4 | from cvpods.modeling.anchor_generator import ShiftGenerator
 5 | from cvpods.modeling.backbone import Backbone
 6 | from cvpods.modeling.backbone.fpn import build_retinanet_resnet_fpn_backbone
 7 | from cvpods.modeling.meta_arch import FCOS, FCOSFixHead 
 8 | 
 9 | 
10 | def build_backbone(cfg, input_shape=None):
11 |     """
12 |     Build a backbone from `cfg.MODEL.BACKBONE.NAME`.
13 | 
14 |     Returns:
15 |         an instance of :class:`Backbone`
16 |     """
17 |     if input_shape is None:
18 |         input_shape = ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN))
19 | 
20 |     backbone = build_retinanet_resnet_fpn_backbone(cfg, input_shape)
21 |     assert isinstance(backbone, Backbone)
22 |     return backbone
23 | 
24 | 
25 | def build_shift_generator(cfg, input_shape):
26 |     return ShiftGenerator(cfg, input_shape)
27 | 
28 | 
29 | def build_head(cfg, input_shape):
30 |     return FCOSFixHead(cfg, input_shape)
31 | 
32 | 
33 | def build_model(cfg):
34 |     cfg.build_backbone = build_backbone
35 |     cfg.build_shift_generator = build_shift_generator
36 |     cfg.build_head = build_head
37 | 
38 |     model = FCOS(cfg)
39 |     logger = logging.getLogger(__name__)
40 |     logger.info("Model:\n{}".format(model))
41 |     return model
42 | 


--------------------------------------------------------------------------------
/cvpods_playground/fcos.res50.1x.dynamic.d4.lambda-0_1/net.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | from cvpods.layers import ShapeSpec
 4 | from cvpods.modeling.anchor_generator import ShiftGenerator
 5 | from cvpods.modeling.backbone import Backbone
 6 | from cvpods.modeling.backbone.fpn import build_retinanet_resnet_fpn_backbone
 7 | from cvpods.modeling.meta_arch import FCOS, FCOSDynamicHead 
 8 | 
 9 | 
10 | def build_backbone(cfg, input_shape=None):
11 |     """
12 |     Build a backbone from `cfg.MODEL.BACKBONE.NAME`.
13 | 
14 |     Returns:
15 |         an instance of :class:`Backbone`
16 |     """
17 |     if input_shape is None:
18 |         input_shape = ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN))
19 | 
20 |     backbone = build_retinanet_resnet_fpn_backbone(cfg, input_shape)
21 |     assert isinstance(backbone, Backbone)
22 |     return backbone
23 | 
24 | 
25 | def build_shift_generator(cfg, input_shape):
26 |     return ShiftGenerator(cfg, input_shape)
27 | 
28 | 
29 | def build_head(cfg, input_shape):
30 |     return FCOSDynamicHead(cfg, input_shape)
31 | 
32 | 
33 | def build_model(cfg):
34 |     cfg.build_backbone = build_backbone
35 |     cfg.build_shift_generator = build_shift_generator
36 |     cfg.build_head = build_head
37 | 
38 |     model = FCOS(cfg)
39 |     logger = logging.getLogger(__name__)
40 |     logger.info("Model:\n{}".format(model))
41 |     return model
42 | 


--------------------------------------------------------------------------------
/cvpods_playground/fcos.res50.1x.dynamic.d8.lambda-0/net.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | from cvpods.layers import ShapeSpec
 4 | from cvpods.modeling.anchor_generator import ShiftGenerator
 5 | from cvpods.modeling.backbone import Backbone
 6 | from cvpods.modeling.backbone.fpn import build_retinanet_resnet_fpn_backbone
 7 | from cvpods.modeling.meta_arch import FCOS, FCOSDynamicHead 
 8 | 
 9 | 
10 | def build_backbone(cfg, input_shape=None):
11 |     """
12 |     Build a backbone from `cfg.MODEL.BACKBONE.NAME`.
13 | 
14 |     Returns:
15 |         an instance of :class:`Backbone`
16 |     """
17 |     if input_shape is None:
18 |         input_shape = ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN))
19 | 
20 |     backbone = build_retinanet_resnet_fpn_backbone(cfg, input_shape)
21 |     assert isinstance(backbone, Backbone)
22 |     return backbone
23 | 
24 | 
25 | def build_shift_generator(cfg, input_shape):
26 |     return ShiftGenerator(cfg, input_shape)
27 | 
28 | 
29 | def build_head(cfg, input_shape):
30 |     return FCOSDynamicHead(cfg, input_shape)
31 | 
32 | 
33 | def build_model(cfg):
34 |     cfg.build_backbone = build_backbone
35 |     cfg.build_shift_generator = build_shift_generator
36 |     cfg.build_head = build_head
37 | 
38 |     model = FCOS(cfg)
39 |     logger = logging.getLogger(__name__)
40 |     logger.info("Model:\n{}".format(model))
41 |     return model
42 | 


--------------------------------------------------------------------------------
/cvpods_playground/fcos.res50.1x.dynamic.d8.lambda-0_1/net.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | from cvpods.layers import ShapeSpec
 4 | from cvpods.modeling.anchor_generator import ShiftGenerator
 5 | from cvpods.modeling.backbone import Backbone
 6 | from cvpods.modeling.backbone.fpn import build_retinanet_resnet_fpn_backbone
 7 | from cvpods.modeling.meta_arch import FCOS, FCOSDynamicHead 
 8 | 
 9 | 
10 | def build_backbone(cfg, input_shape=None):
11 |     """
12 |     Build a backbone from `cfg.MODEL.BACKBONE.NAME`.
13 | 
14 |     Returns:
15 |         an instance of :class:`Backbone`
16 |     """
17 |     if input_shape is None:
18 |         input_shape = ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN))
19 | 
20 |     backbone = build_retinanet_resnet_fpn_backbone(cfg, input_shape)
21 |     assert isinstance(backbone, Backbone)
22 |     return backbone
23 | 
24 | 
25 | def build_shift_generator(cfg, input_shape):
26 |     return ShiftGenerator(cfg, input_shape)
27 | 
28 | 
29 | def build_head(cfg, input_shape):
30 |     return FCOSDynamicHead(cfg, input_shape)
31 | 
32 | 
33 | def build_model(cfg):
34 |     cfg.build_backbone = build_backbone
35 |     cfg.build_shift_generator = build_shift_generator
36 |     cfg.build_head = build_head
37 | 
38 |     model = FCOS(cfg)
39 |     logger = logging.getLogger(__name__)
40 |     logger.info("Model:\n{}".format(model))
41 |     return model
42 | 


--------------------------------------------------------------------------------
/cvpods_playground/fcos.res50.1x.dynamic.d8.lambda-0_8/net.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | from cvpods.layers import ShapeSpec
 4 | from cvpods.modeling.anchor_generator import ShiftGenerator
 5 | from cvpods.modeling.backbone import Backbone
 6 | from cvpods.modeling.backbone.fpn import build_retinanet_resnet_fpn_backbone
 7 | from cvpods.modeling.meta_arch import FCOS, FCOSDynamicHead 
 8 | 
 9 | 
10 | def build_backbone(cfg, input_shape=None):
11 |     """
12 |     Build a backbone from `cfg.MODEL.BACKBONE.NAME`.
13 | 
14 |     Returns:
15 |         an instance of :class:`Backbone`
16 |     """
17 |     if input_shape is None:
18 |         input_shape = ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN))
19 | 
20 |     backbone = build_retinanet_resnet_fpn_backbone(cfg, input_shape)
21 |     assert isinstance(backbone, Backbone)
22 |     return backbone
23 | 
24 | 
25 | def build_shift_generator(cfg, input_shape):
26 |     return ShiftGenerator(cfg, input_shape)
27 | 
28 | 
29 | def build_head(cfg, input_shape):
30 |     return FCOSDynamicHead(cfg, input_shape)
31 | 
32 | 
33 | def build_model(cfg):
34 |     cfg.build_backbone = build_backbone
35 |     cfg.build_shift_generator = build_shift_generator
36 |     cfg.build_head = build_head
37 | 
38 |     model = FCOS(cfg)
39 |     logger = logging.getLogger(__name__)
40 |     logger.info("Model:\n{}".format(model))
41 |     return model
42 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [isort]
 2 | line_length = 100
 3 | multi_line_output = 3
 4 | balanced_wrapping = True
 5 | skip = tools/test_net.py, tools/train_net.py
 6 | known_standard_library = setuptools,mock
 7 | known_myself = cvpods
 8 | known_third_party = appdirs,colorama,easydict,portalocker,yacs,termcolor,tabulate,tqdm,psutil,pkg_resources
 9 | known_data_processing = cv2,numpy,scipy,PIL,matplotlib
10 | known_datasets = pycocotools,cityscapesscripts,lvis
11 | known_deeplearning = torch,torchvision,caffe2,onnx
12 | sections = FUTURE,STDLIB,THIRDPARTY,data_processing,datasets,deeplearning,myself,FIRSTPARTY,LOCALFOLDER
13 | no_lines_before=STDLIB,THIRDPARTY,datasets
14 | default_section = FIRSTPARTY
15 | 
16 | [flake8]
17 | ignore = W503, E221
18 | max-line-length = 100
19 | max-complexity = 18
20 | select = B,C,E,F,W,T4,B9
21 | exclude = build,__init__.py
22 | 
23 | [pep8]
24 | ignore = W503, E203, E221, E402, E741, C901, W504, E731, F541, E722
25 | max-line-length = 100 
26 | 
27 | [yapf]
28 | based_on_style = pep8
29 | spaces_before_comment = 4
30 | split_before_logical_operator = true
31 | 
32 | [mypy]
33 | python_version=3.6
34 | ignore_missing_imports = True
35 | warn_unused_configs = True
36 | disallow_untyped_defs = True
37 | check_untyped_defs = True
38 | warn_unused_ignores = True
39 | warn_redundant_casts = True
40 | show_column_numbers = True
41 | follow_imports = silent
42 | allow_redefinition = True
43 | ; Require all functions to be annotated
44 | disallow_incomplete_defs = True
45 | 


--------------------------------------------------------------------------------
/cvpods/configs/yolo_config.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- encoding: utf-8 -*-
 3 | '''
 4 | @File               :   yolo_config.py
 5 | @Time               :   2020/05/07 23:55:49
 6 | @Author             :   Benjin Zhu
 7 | @Contact            :   poodarchu@gmail.com
 8 | @Last Modified by   :   Benjin Zhu
 9 | @Last Modified time :   2020/05/07 23:55:49
10 | '''
11 | 
12 | from .base_detection_config import BaseDetectionConfig
13 | 
14 | _config_dict = dict(
15 |     MODEL=dict(
16 |         PIXEL_MEAN=(0.485, 0.456, 0.406),
17 |         PIXEL_STD=(0.229, 0.224, 0.225),
18 |         DARKNET=dict(
19 |             DEPTH=53,
20 |             STEM_OUT_CHANNELS=32,
21 |             WEIGHTS="s3://generalDetection/cvpods/ImageNetPretrained/custom/darknet53.mix.pth",
22 |             OUT_FEATURES=["dark3", "dark4", "dark5"]
23 |         ),
24 |         YOLO=dict(
25 |             CLASSES=80,
26 |             IN_FEATURES=["dark3", "dark4", "dark5"],
27 |             ANCHORS=[
28 |                 [[116, 90], [156, 198], [373, 326]],
29 |                 [[30, 61], [62, 45], [42, 119]],
30 |                 [[10, 13], [16, 30], [33, 23]],
31 |             ],
32 |             CONF_THRESHOLD=0.01,  # TEST
33 |             NMS_THRESHOLD=0.5,
34 |             IGNORE_THRESHOLD=0.7,
35 |         ),
36 |     ),
37 | )
38 | 
39 | 
40 | class YOLO3Config(BaseDetectionConfig):
41 |     def __init__(self):
42 |         super(YOLO3Config, self).__init__()
43 |         self._register_configuration(_config_dict)
44 | 
45 | 
46 | config = YOLO3Config()
47 | 


--------------------------------------------------------------------------------
/tools/dev/run_inference_tests.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -e
 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 3 | 
 4 | BIN="python tools/train_net.py"
 5 | OUTPUT="inference_test_output"
 6 | NUM_GPUS=2
 7 | 
 8 | CFG_LIST=( "${@:1}" )
 9 | 
10 | if [ ${#CFG_LIST[@]} -eq 0 ]; then
11 |   CFG_LIST=( ./configs/quick_schedules/*inference_acc_test.yaml )
12 | fi
13 | 
14 | echo "========================================================================"
15 | echo "Configs to run:"
16 | echo "${CFG_LIST[@]}"
17 | echo "========================================================================"
18 | 
19 | 
20 | for cfg in "${CFG_LIST[@]}"; do
21 |     echo "========================================================================"
22 |     echo "Running $cfg ..."
23 |     echo "========================================================================"
24 |     $BIN \
25 |       --eval-only \
26 |       --num-gpus $NUM_GPUS \
27 |       --config-file "$cfg" \
28 |       OUTPUT_DIR $OUTPUT
29 |       rm -rf $OUTPUT
30 | done
31 | 
32 | 
33 | echo "========================================================================"
34 | echo "Running demo.py ..."
35 | echo "========================================================================"
36 | DEMO_BIN="python demo/demo.py"
37 | COCO_DIR=datasets/coco/val2014
38 | mkdir -pv $OUTPUT
39 | 
40 | set -v
41 | 
42 | $DEMO_BIN --config-file ./configs/quick_schedules/panoptic_fpn_R_50_inference_acc_test.yaml \
43 |   --input $COCO_DIR/COCO_val2014_0000001933* --output $OUTPUT
44 | rm -rf $OUTPUT
45 | 


--------------------------------------------------------------------------------
/cvpods/analyser/tide/errors/qualifiers.py:
--------------------------------------------------------------------------------
 1 | # Defines qualifiers like "Extra small box"
 2 | 
 3 | 
 4 | def _area(x):
 5 |     return x["bbox"][2] * x["bbox"][3]
 6 | 
 7 | 
 8 | def _ar(x):
 9 |     return x["bbox"][2] / x["bbox"][3]
10 | 
11 | 
12 | class Qualifier:
13 |     """
14 |     Creates a qualifier with the given name.
15 | 
16 |     test_func should be a callable object (e.g., lambda) that
17 |     takes in as input an annotation object (either a ground truth or prediction)
18 |     and returns whether or not that object qualifies (i.e., a bool).
19 |     """
20 | 
21 |     def __init__(self, name: str, test_func: object):
22 |         self.test = test_func
23 |         self.name = name
24 | 
25 |     # This is horrible, but I like it
26 |     def _make_error_func(self, error_type):
27 |         return (
28 |             (
29 |                 lambda err: isinstance(err, error_type)
30 |                 and (self.test(err.gt) if hasattr(err, "gt") else self.test(err.pred))
31 |             )
32 |             if self.test is not None
33 |             else (lambda err: isinstance(err, error_type))
34 |         )
35 | 
36 | 
37 | AREA = [
38 |     Qualifier("Small", lambda x: _area(x) <= 32 ** 2),
39 |     Qualifier("Medium", lambda x: 32 ** 2 < _area(x) <= 96 ** 2),
40 |     Qualifier("Large", lambda x: 96 ** 2 < _area(x)),
41 | ]
42 | 
43 | ASPECT_RATIO = [
44 |     Qualifier("Tall", lambda x: _ar(x) <= 0.75),
45 |     Qualifier("Square", lambda x: 0.75 < _ar(x) <= 1.33),
46 |     Qualifier("Wide", lambda x: 1.33 < _ar(x)),
47 | ]
48 | 


--------------------------------------------------------------------------------
/cvpods/layers/border_align.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | from torch import nn
 3 | from torch.autograd import Function
 4 | from torch.autograd.function import once_differentiable
 5 | 
 6 | from cvpods import _C
 7 | 
 8 | 
 9 | class BorderAlignFunc(Function):
10 |     @staticmethod
11 |     def forward(ctx, input, boxes, wh, pool_size):
12 |         output = _C.border_align_forward(input, boxes, wh, pool_size)
13 |         ctx.pool_size = pool_size
14 |         ctx.save_for_backward(input, boxes, wh)
15 |         return output
16 | 
17 |     @staticmethod
18 |     @once_differentiable
19 |     def backward(ctx, grad_output):
20 |         pool_size = ctx.pool_size
21 |         input, boxes, wh = ctx.saved_tensors
22 |         grad_input = _C.border_align_backward(
23 |             grad_output, input, boxes, wh, pool_size)
24 |         return grad_input, None, None, None
25 | 
26 | 
27 | border_align = BorderAlignFunc.apply
28 | 
29 | 
30 | class BorderAlign(nn.Module):
31 |     def __init__(self, pool_size):
32 |         super(BorderAlign, self).__init__()
33 |         self.pool_size = pool_size
34 | 
35 |     def forward(self, feature, boxes):
36 |         feature = feature.contiguous()
37 |         boxes = boxes.contiguous()
38 |         wh = (boxes[:, :, 2:] - boxes[:, :, :2]).contiguous()
39 |         output = border_align(feature, boxes, wh, self.pool_size)
40 |         return output
41 | 
42 |     def __repr__(self):
43 |         tmpstr = self.__class__.__name__
44 |         return tmpstr
45 | 


--------------------------------------------------------------------------------
/cvpods/layers/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | from .activation_funcs import MemoryEfficientSwish, Swish
 3 | from .batch_norm import FrozenBatchNorm2d, NaiveSyncBatchNorm, get_activation, get_norm
 4 | from .deform_conv import DeformConv, ModulatedDeformConv
 5 | from .deform_conv_with_off import DeformConvWithOff, ModulatedDeformConvWithOff
 6 | from .mask_ops import paste_masks_in_image
 7 | from .nms import (
 8 |     batched_nms,
 9 |     batched_nms_rotated,
10 |     batched_softnms,
11 |     batched_softnms_rotated,
12 |     cluster_nms,
13 |     generalized_batched_nms,
14 |     matrix_nms,
15 |     ml_nms,
16 |     nms,
17 |     nms_rotated,
18 |     softnms,
19 |     softnms_rotated
20 | )
21 | from .position_encoding import position_encoding_dict
22 | from .roi_align import ROIAlign, roi_align
23 | from .roi_align_rotated import ROIAlignRotated, roi_align_rotated
24 | from .shape_spec import ShapeSpec
25 | from .swap_align2nat import SwapAlign2Nat, swap_align2nat
26 | from .wrappers import (
27 |     BatchNorm2d,
28 |     Conv2d,
29 |     Conv2dSamePadding,
30 |     ConvTranspose2d,
31 |     MaxPool2dSamePadding,
32 |     SeparableConvBlock,
33 |     cat,
34 |     interpolate
35 | )
36 | from .masked_conv import MaskedConv2d, masked_conv2d
37 | from .tree_filter_v2 import TreeFilterV2 
38 | from .dynamic_conv import DynamicConv2D, DynamicScale, DynamicBottleneck
39 | from .fix_conv import Bottleneck, ScaleConv2d
40 | 
41 | __all__ = [k for k in globals().keys() if not k.startswith("_")]
42 | 


--------------------------------------------------------------------------------
/cvpods/layers/csrc/SwapAlign2Nat/SwapAlign2Nat.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | #pragma once
 3 | #include <torch/types.h>
 4 | 
 5 | namespace cvpods {
 6 | 
 7 | #ifdef WITH_CUDA
 8 | at::Tensor SwapAlign2Nat_forward_cuda(
 9 |     const at::Tensor& X,
10 |     const int lambda_val,
11 |     const float pad_val);
12 | 
13 | at::Tensor SwapAlign2Nat_backward_cuda(
14 |     const at::Tensor& gY,
15 |     const int lambda_val,
16 |     const int batch_size,
17 |     const int channel,
18 |     const int height,
19 |     const int width);
20 | #endif
21 | 
22 | inline at::Tensor SwapAlign2Nat_forward(
23 |     const at::Tensor& X,
24 |     const int lambda_val,
25 |     const float pad_val) {
26 |   if (X.type().is_cuda()) {
27 | #ifdef WITH_CUDA
28 |     return SwapAlign2Nat_forward_cuda(X, lambda_val, pad_val);
29 | #else
30 |     AT_ERROR("Not compiled with GPU support");
31 | #endif
32 |   }
33 |   AT_ERROR("Not implemented on the CPU");
34 | }
35 | 
36 | inline at::Tensor SwapAlign2Nat_backward(
37 |     const at::Tensor& gY,
38 |     const int lambda_val,
39 |     const int batch_size,
40 |     const int channel,
41 |     const int height,
42 |     const int width) {
43 |   if (gY.type().is_cuda()) {
44 | #ifdef WITH_CUDA
45 |     return SwapAlign2Nat_backward_cuda(
46 |         gY, lambda_val, batch_size, channel, height, width);
47 | #else
48 |     AT_ERROR("Not compiled with GPU support");
49 | #endif
50 |   }
51 |   AT_ERROR("Not implemented on the CPU");
52 | }
53 | 
54 | } // namespace cvpods 
55 | 


--------------------------------------------------------------------------------
/cvpods/layers/csrc/box_iou_rotated/box_iou_rotated_cpu.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | #include "box_iou_rotated.h"
 3 | #include "box_iou_rotated_utils.h"
 4 | 
 5 | namespace cvpods {
 6 | 
 7 | template <typename T>
 8 | void box_iou_rotated_cpu_kernel(
 9 |     const at::Tensor& boxes1,
10 |     const at::Tensor& boxes2,
11 |     at::Tensor& ious) {
12 |   auto widths1 = boxes1.select(1, 2).contiguous();
13 |   auto heights1 = boxes1.select(1, 3).contiguous();
14 |   auto widths2 = boxes2.select(1, 2).contiguous();
15 |   auto heights2 = boxes2.select(1, 3).contiguous();
16 | 
17 |   at::Tensor areas1 = widths1 * heights1;
18 |   at::Tensor areas2 = widths2 * heights2;
19 | 
20 |   auto num_boxes1 = boxes1.size(0);
21 |   auto num_boxes2 = boxes2.size(0);
22 | 
23 |   for (int i = 0; i < num_boxes1; i++) {
24 |     for (int j = 0; j < num_boxes2; j++) {
25 |       ious[i * num_boxes2 + j] = single_box_iou_rotated<T>(
26 |           boxes1[i].data_ptr<T>(), boxes2[j].data_ptr<T>());
27 |     }
28 |   }
29 | }
30 | 
31 | at::Tensor box_iou_rotated_cpu(
32 |     const at::Tensor& boxes1,
33 |     const at::Tensor& boxes2) {
34 |   auto num_boxes1 = boxes1.size(0);
35 |   auto num_boxes2 = boxes2.size(0);
36 |   at::Tensor ious =
37 |       at::empty({num_boxes1 * num_boxes2}, boxes1.options().dtype(at::kFloat));
38 | 
39 |   box_iou_rotated_cpu_kernel<float>(boxes1, boxes2, ious);
40 | 
41 |   // reshape from 1d array to 2d array
42 |   auto shape = std::vector<int64_t>{num_boxes1, num_boxes2};
43 |   return ious.reshape(shape);
44 | }
45 | 
46 | } // namespace cvpods
47 | 


--------------------------------------------------------------------------------
/cvpods/configs/panoptic_seg_config.py:
--------------------------------------------------------------------------------
 1 | from .rcnn_fpn_config import RCNNFPNConfig
 2 | 
 3 | _config_dict = dict(
 4 |     MODEL=dict(
 5 |         SEM_SEG_HEAD=dict(
 6 |             # NAME="SemSegFPNHead",
 7 |             IN_FEATURES=["p2", "p3", "p4", "p5"],
 8 |             # Label in the semantic segmentation ground truth that is ignored,
 9 |             # i.e., no loss is calculated for the correposnding pixel.
10 |             IGNORE_VALUE=255,
11 |             # Number of classes in the semantic segmentation head
12 |             NUM_CLASSES=54,
13 |             # Number of channels in the 3x3 convs inside semantic-FPN heads.
14 |             CONVS_DIM=128,
15 |             # Outputs from semantic-FPN heads are up-scaled to the COMMON_STRIDE stride.
16 |             COMMON_STRIDE=4,
17 |             # Normalization method for the convolution layers. Options: "" (no norm), "GN".
18 |             NORM="GN",
19 |             LOSS_WEIGHT=0.5,
20 |         ),
21 |         PANOPTIC_FPN=dict(
22 |             # Scaling of all losses from instance detection / segmentation head.
23 |             INSTANCE_LOSS_WEIGHT=1.0,
24 |             # options when combining instance & semantic segmentation outputs
25 |             COMBINE=dict(
26 |                 ENABLED=True,
27 |                 OVERLAP_THRESH=0.5,
28 |                 STUFF_AREA_LIMIT=4096,
29 |                 INSTANCES_CONFIDENCE_THRESH=0.5,
30 |             ),
31 |         ),
32 |     )
33 | )
34 | 
35 | 
36 | class PanopticSegmentationConfig(RCNNFPNConfig):
37 |     def __init__(self):
38 |         super(PanopticSegmentationConfig, self).__init__()
39 |         self._register_configuration(_config_dict)
40 | 
41 | 
42 | config = PanopticSegmentationConfig()
43 | 


--------------------------------------------------------------------------------
/cvpods/configs/ssd_config.py:
--------------------------------------------------------------------------------
 1 | from .base_detection_config import BaseDetectionConfig
 2 | 
 3 | _config_dict = dict(
 4 |     MODEL=dict(
 5 |         PIXEL_MEAN=[123.675, 116.28, 103.53],  # RGB FORMAT
 6 |         PIXEL_STD=[1.0, 1.0, 1.0],
 7 |         VGG=dict(
 8 |             ARCH='D',
 9 |             NORM="",
10 |             NUM_CLASSES=None,
11 |             OUT_FEATURES=["Conv4_3", "Conv7"],
12 |             POOL_ARGS=dict(
13 |                 pool3=(2, 2, 0, True),  # k, s, p, ceil_model
14 |                 pool5=(3, 1, 1, False)  # k, s, p, ceil_model
15 |             ),
16 |             FC_TO_CONV=True,
17 |         ),
18 |         SSD=dict(
19 |             NUM_CLASSES=80,
20 |             IN_FEATURES=["Conv4_3", "Conv7"],
21 |             EXTRA_LAYER_ARCH={
22 |                 # the number after "S" and "S" to denote conv layer with stride=2
23 |                 "300": [256, 'S', 512, 128, 'S', 256, 128, 256, 128, 256],
24 |                 "512": [256, 'S', 512, 128, 'S', 256, 128, 'S', 256, 128, 'S', 256, 128, 256],
25 |             },
26 |             IOU_THRESHOLDS=[0.5, 0.5],
27 |             IOU_LABELS=[0, -1, 1],
28 |             BBOX_REG_WEIGHTS=(10.0, 10.0, 5.0, 5.0),
29 |             L2NORM_SCALE=20.0,
30 |             # Loss parameters:
31 |             LOSS_ALPHA=1.0,
32 |             SMOOTH_L1_LOSS_BETA=1.0,
33 |             NEGATIVE_POSITIVE_RATIO=3.0,
34 |             # Inference parameters:
35 |             SCORE_THRESH_TEST=0.02,
36 |             NMS_THRESH_TEST=0.45,
37 |         ),
38 |     )
39 | )
40 | 
41 | 
42 | class SSDConfig(BaseDetectionConfig):
43 |     def __init__(self):
44 |         super(SSDConfig, self).__init__()
45 |         self._register_configuration(_config_dict)
46 | 
47 | 
48 | config = SSDConfig()
49 | 


--------------------------------------------------------------------------------
/cvpods/configs/segm_config.py:
--------------------------------------------------------------------------------
 1 | from .base_detection_config import BaseDetectionConfig
 2 | 
 3 | _config_dict = dict(
 4 |     MODEL=dict(
 5 |         SEM_SEG_HEAD=dict(
 6 |             # NAME="SemSegFPNHead",
 7 |             IN_FEATURES=["p2", "p3", "p4", "p5"],
 8 |             # Label in the semantic segmentation ground truth that is ignored,
 9 |             # i.e., no loss is calculated for the correposnding pixel.
10 |             IGNORE_VALUE=255,
11 |             # Number of classes in the semantic segmentation head
12 |             NUM_CLASSES=54,
13 |             # Number of channels in the 3x3 convs inside semantic-FPN heads.
14 |             CONVS_DIM=128,
15 |             # Outputs from semantic-FPN heads are up-scaled to the COMMON_STRIDE stride.
16 |             COMMON_STRIDE=4,
17 |             # Normalization method for the convolution layers. Options: "" (no norm), "GN".
18 |             NORM="GN",
19 |             LOSS_WEIGHT=1.0,
20 |         ),
21 |         PANOPTIC_FPN=dict(
22 |             # Scaling of all losses from instance detection / segmentation head.
23 |             INSTANCE_LOSS_WEIGHT=1.0,
24 |             # options when combining instance & semantic segmentation outputs
25 |             COMBINE=dict(
26 |                 ENABLED=True,
27 |                 OVERLAP_THRESH=0.5,
28 |                 STUFF_AREA_LIMIT=4096,
29 |                 INSTANCES_CONFIDENCE_THRESH=0.5,
30 |             ),
31 |         ),
32 |     ),
33 |     DATALOADER=dict(FILTER_EMPTY_ANNOTATIONS=False,),
34 | )
35 | 
36 | 
37 | class SegmentationConfig(BaseDetectionConfig):
38 |     def __init__(self):
39 |         super(SegmentationConfig, self).__init__()
40 |         self._register_configuration(_config_dict)
41 | 
42 | 
43 | config = SegmentationConfig()
44 | 


--------------------------------------------------------------------------------
/cvpods/layers/csrc/sigmoid_focal_loss/SigmoidFocalLoss.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include <torch/extension.h>
 3 | 
 4 | namespace cvpods {
 5 | #ifdef WITH_CUDA
 6 | at::Tensor SigmoidFocalLoss_forward_cuda(
 7 | 		const at::Tensor& logits,
 8 |         const at::Tensor& targets,
 9 | 		const int num_classes, 
10 | 		const float gamma, 
11 | 		const float alpha); 
12 | 
13 | at::Tensor SigmoidFocalLoss_backward_cuda(
14 | 			     const at::Tensor& logits,
15 |                  const at::Tensor& targets,
16 | 			     const at::Tensor& d_losses,
17 | 			     const int num_classes,
18 | 			     const float gamma,
19 | 			     const float alpha);
20 | #endif
21 | 
22 | //
23 | // Interface for Python
24 | inline at::Tensor SigmoidFocalLoss_forward(
25 | 		const at::Tensor& logits,
26 |         const at::Tensor& targets,
27 | 		const int num_classes, 
28 | 		const float gamma, 
29 | 		const float alpha) {
30 |   if (logits.type().is_cuda()) {
31 | #ifdef WITH_CUDA
32 |     return SigmoidFocalLoss_forward_cuda(logits, targets, num_classes, gamma, alpha);
33 | #else
34 |     AT_ERROR("Not compiled with GPU support");
35 | #endif
36 |   }
37 |   AT_ERROR("Not implemented on the CPU");
38 | }
39 | 
40 | inline at::Tensor SigmoidFocalLoss_backward(
41 | 			     const at::Tensor& logits,
42 |                  const at::Tensor& targets,
43 | 			     const at::Tensor& d_losses,
44 | 			     const int num_classes,
45 | 			     const float gamma,
46 | 			     const float alpha) {
47 |   if (logits.type().is_cuda()) {
48 | #ifdef WITH_CUDA
49 |     return SigmoidFocalLoss_backward_cuda(logits, targets, d_losses, num_classes, gamma, alpha);
50 | #else
51 |     AT_ERROR("Not compiled with GPU support");
52 | #endif
53 |   }
54 |   AT_ERROR("Not implemented on the CPU");
55 | }
56 | 
57 | }
58 | 


--------------------------------------------------------------------------------
/cvpods/configs/rcnn_fpn_config.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- encoding: utf-8 -*-
 3 | '''
 4 | @File               :   rcnn_fpn_config.py
 5 | @Time               :   2020/05/07 23:55:41
 6 | @Author             :   Benjin Zhu
 7 | @Contact            :   poodarchu@gmail.com
 8 | @Last Modified by   :   Benjin Zhu
 9 | @Last Modified time :   2020/05/07 23:55:41
10 | '''
11 | 
12 | from .rcnn_config import RCNNConfig
13 | 
14 | _config_dict = dict(
15 |     MODEL=dict(
16 |         # BACKBONE=dict(NAME='build_resnet_backbone',),
17 |         RESNETS=dict(OUT_FEATURES=["res2", "res3", "res4", "res5"],),
18 |         FPN=dict(IN_FEATURES=["res2", "res3", "res4", "res5"]),
19 |         ANCHOR_GENERATOR=dict(
20 |             SIZES=[[32], [64], [128], [256], [512]], ASPECT_RATIOS=[[0.5, 1.0, 2.0]],
21 |         ),
22 |         RPN=dict(
23 |             IN_FEATURES=["p2", "p3", "p4", "p5", "p6"],
24 |             PRE_NMS_TOPK_TRAIN=2000,
25 |             PRE_NMS_TOPK_TEST=1000,
26 |             POST_NMS_TOPK_TRAIN=1000,
27 |             POST_NMS_TOPK_TEST=1000,
28 |         ),
29 |         ROI_HEADS=dict(
30 |             # NAME: "StandardROIHeads"
31 |             IN_FEATURES=["p2", "p3", "p4", "p5"],
32 |         ),
33 |         ROI_BOX_HEAD=dict(
34 |             # NAME: "FastRCNNConvFCHead"
35 |             NUM_FC=2,
36 |             POOLER_RESOLUTION=7,
37 |         ),
38 |         ROI_MASK_HEAD=dict(
39 |             # NAME: "MaskRCNNConvUpsampleHead"
40 |             NUM_CONV=4,
41 |             POOLER_RESOLUTION=14,
42 |         ),
43 |     ),
44 | )
45 | 
46 | 
47 | class RCNNFPNConfig(RCNNConfig):
48 |     def __init__(self):
49 |         super(RCNNFPNConfig, self).__init__()
50 |         self._register_configuration(_config_dict)
51 | 
52 | 
53 | config = RCNNFPNConfig()
54 | 


--------------------------------------------------------------------------------
/tools/rm_files.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python3
 2 | # -*- coding:utf-8 -*-
 3 | 
 4 | import argparse
 5 | import os
 6 | import re
 7 | from colorama import Fore, Style
 8 | 
 9 | 
10 | def remove_parser():
11 |     parser = argparse.ArgumentParser()
12 |     parser.add_argument("--start-iter", "-s", type=int, default=0, help="start iter to remove")
13 |     parser.add_argument("--end-iter", "-e", type=int, default=0, help="end iter to remove")
14 |     parser.add_argument("--prefix", "-p", type=str, default="model_",
15 |                         help="prefix of model to remove")
16 |     parser.add_argument("--dir", "-d", type=str, default="/data/Outputs",
17 |                         help="dir to remove pth model")
18 |     parser.add_argument("--real", "-r", action="store_true",
19 |                         help="really delete or just show what you will delete")
20 |     return parser
21 | 
22 | 
23 | def remove_files(args):
24 |     start = args.start_iter
25 |     end = args.end_iter
26 |     prefix = args.prefix
27 |     for folder, _, files in os.walk(args.dir):
28 |         # l = [x for x in f if x.endswith(".pth")]
29 |         models = [f for f in files if re.search(prefix + r"[0123456789]*\.pth", f)]
30 |         delete = [os.path.join(folder, model) for model in models
31 |                   if start <= int(model[len(prefix):-len(".pth")]) <= end]
32 |         if delete:
33 |             for f in delete:
34 |                 if args.real:
35 |                     print(f"remove {f}")
36 |                     os.remove(f)
37 |                 else:
38 |                     print(f"you may remove {f}")
39 |     if not args.real:
40 |         print(Fore.RED + "use --real parameter to really delete models" + Style.RESET_ALL)
41 | 
42 | 
43 | def main():
44 |     args = remove_parser().parse_args()
45 |     remove_files(args)
46 | 
47 | 
48 | if __name__ == "__main__":
49 |     main()
50 | 


--------------------------------------------------------------------------------
/cvpods/utils/benchmark/timer.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 3 | # -*- coding: utf-8 -*-
 4 | 
 5 | from time import perf_counter
 6 | from typing import Optional
 7 | 
 8 | 
 9 | class Timer:
10 |     """
11 |     A timer which computes the time elapsed since the start/reset of the timer.
12 |     """
13 | 
14 |     def __init__(self):
15 |         self.reset()
16 | 
17 |     def reset(self):
18 |         """
19 |         Reset the timer.
20 |         """
21 |         self._start = perf_counter()
22 |         self._paused: Optional[float] = None
23 |         self._total_paused = 0
24 | 
25 |     def pause(self):
26 |         """
27 |         Pause the timer.
28 |         """
29 |         if self._paused is not None:
30 |             raise ValueError("Trying to pause a Timer that is already paused!")
31 |         self._paused = perf_counter()
32 | 
33 |     def is_paused(self) -> bool:
34 |         """
35 |         Returns:
36 |             bool: whether the timer is currently paused
37 |         """
38 |         return self._paused is not None
39 | 
40 |     def resume(self):
41 |         """
42 |         Resume the timer.
43 |         """
44 |         if self._paused is None:
45 |             raise ValueError("Trying to resume a Timer that is not paused!")
46 |         self._total_paused += perf_counter() - self._paused
47 |         self._paused = None
48 | 
49 |     def seconds(self) -> float:
50 |         """
51 |         Returns:
52 |             (float): the total number of seconds since the start/reset of the
53 |                 timer, excluding the time when the timer is paused.
54 |         """
55 |         if self._paused is not None:
56 |             end_time: float = self._paused  # type: ignore
57 |         else:
58 |             end_time = perf_counter()
59 |         return end_time - self._start - self._total_paused
60 | 


--------------------------------------------------------------------------------
/cvpods/configs/base_classification_config.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- encoding: utf-8 -*-
 3 | '''
 4 | @File               :   base_classification_config.py
 5 | @Time               :   2020/05/07 23:56:17
 6 | @Author             :   Benjin Zhu
 7 | @Contact            :   poodarchu@gmail.com
 8 | @Last Modified by   :   Benjin Zhu
 9 | @Last Modified time :   2020/05/07 23:56:17
10 | '''
11 | 
12 | from cvpods.configs.base_config import BaseConfig
13 | 
14 | _config_dict = dict(
15 |     MODEL=dict(
16 |         WEIGHTS="",
17 |         PIXEL_MEAN=[0.406, 0.456, 0.485],  # BGR
18 |         PIXEL_STD=[0.225, 0.224, 0.229],
19 |         BACKBONE=dict(FREEZE_AT=-1, ),  # do not freeze
20 |         RESNETS=dict(
21 |             NUM_CLASSES=None,
22 |             DEPTH=None,
23 |             OUT_FEATURES=["linear"],
24 |             NUM_GROUPS=1,
25 |             # Options: FrozenBN, GN, "SyncBN", "BN"
26 |             NORM="BN",
27 |             ACTIVATION=dict(
28 |                 NAME="ReLU",
29 |                 INPLACE=True,
30 |             ),
31 |             # Whether init last bn weight of each BasicBlock or BottleneckBlock to 0
32 |             ZERO_INIT_RESIDUAL=True,
33 |             WIDTH_PER_GROUP=64,
34 |             # Use True only for the original MSRA ResNet; use False for C2 and Torch models
35 |             STRIDE_IN_1X1=False,
36 |             RES5_DILATION=1,
37 |             RES2_OUT_CHANNELS=256,
38 |             STEM_OUT_CHANNELS=64,
39 | 
40 |             # Deep Stem
41 |             DEEP_STEM=False,
42 |         ),
43 |     ),
44 |     SOLVER=dict(
45 |         IMS_PER_DEVICE=32,  # defalut: 8 gpus x 32 = 256
46 |     ),
47 | )
48 | 
49 | 
50 | class BaseClassificationConfig(BaseConfig):
51 |     def __init__(self):
52 |         super(BaseClassificationConfig, self).__init__()
53 |         self._register_configuration(_config_dict)
54 | 
55 | 
56 | config = BaseClassificationConfig()
57 | 


--------------------------------------------------------------------------------
/cvpods/modeling/backbone/dynamic_arch/cal_op_flops.py:
--------------------------------------------------------------------------------
 1 | # Count Operation MFLOPs when fix batch to 1
 2 | # @author: yanwei.li
 3 | 
 4 | 
 5 | def count_Conv_flop(
 6 |     in_h, in_w, in_channel, out_channel,
 7 |     kernel_size, is_bias=False, stride=1, groups=1
 8 | ):
 9 |     out_h = in_h // stride
10 |     out_w = in_w // stride
11 |     bias_ops = 1 if is_bias else 0
12 |     kernel_ops = kernel_size[0] * kernel_size[1] * (in_channel // groups)
13 |     delta_ops = (kernel_ops + bias_ops) * out_channel * out_h * out_w
14 |     return delta_ops / 1e6
15 | 
16 | 
17 | def count_Linear_flop(in_num, out_num, is_bias):
18 |     weight_ops = in_num * out_num
19 |     bias_ops = out_num if is_bias else 0
20 |     delta_ops = weight_ops + bias_ops
21 |     return delta_ops / 1e6
22 | 
23 | 
24 | def count_BN_flop(in_h, in_w, in_channel, is_affine):
25 |     multi_affine = 2 if is_affine else 1
26 |     delta_ops = multi_affine * in_h * in_w * in_channel
27 |     return delta_ops / 1e6
28 | 
29 | 
30 | def count_ReLU_flop(in_h, in_w, in_channel):
31 |     delta_ops = in_h * in_w * in_channel
32 |     return delta_ops / 1e6
33 | 
34 | 
35 | def count_Pool2d_flop(in_h, in_w, out_channel, kernel_size, stride):
36 |     out_h = in_h // stride
37 |     out_w = in_w // stride
38 |     kernel_ops = kernel_size[0] * kernel_size[1]
39 |     delta_ops = kernel_ops * out_w * out_h * out_channel
40 |     return delta_ops / 1e6
41 | 
42 | 
43 | def count_ConvBNReLU_flop(
44 |     in_h, in_w, in_channel, out_channel,
45 |     kernel_size, is_bias=False, stride=1,
46 |     groups=1, is_affine=True
47 | ):
48 |     flops = 0.0
49 |     flops += count_Conv_flop(
50 |         in_h, in_w, in_channel, out_channel,
51 |         kernel_size, is_bias, stride, groups
52 |     )
53 |     in_h = in_h // stride
54 |     in_w = in_w // stride
55 |     flops += count_BN_flop(in_h, in_w, out_channel, is_affine)
56 |     flops += count_ReLU_flop(in_h, in_w, out_channel)
57 |     return flops
58 | 


--------------------------------------------------------------------------------
/cvpods/modeling/losses/label_smooth_ce_loss.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | 
 5 | class LabelSmoothCELoss(nn.Module):
 6 |     """
 7 |     Cross-entrophy loss with label smooth.
 8 | 
 9 |     Args:
10 |         epsilon: Smoothing level. Use one-hot label when set to 0, use uniform label when set to 1.
11 |     """
12 |     def __init__(self, epsilon):
13 |         super(LabelSmoothCELoss, self).__init__()
14 |         self.epsilon = epsilon
15 |         self.logsoftmax = nn.LogSoftmax(dim=1)
16 | 
17 |     def forward(self, logits, targets):
18 |         """
19 |         Args:
20 |             logits: A float tensor of shape: (minibatch, C).
21 |             targets: A float tensor of shape: (minibatch,). Stores the class indices
22 |                     in range `[0, C - 1]`.
23 | 
24 |         Returns:
25 |             A scalar tensor.
26 |         """
27 |         log_probs = self.logsoftmax(logits)
28 |         targets = torch.zeros_like(log_probs).scatter_(1, targets.unsqueeze(1), 1)
29 |         targets = (1 - self.epsilon) * targets + self.epsilon / logits.shape[1]
30 |         loss = (-targets * log_probs).mean(0).sum()
31 |         return loss
32 | 
33 | 
34 | def label_smooth_ce_loss(logits, targets, epsilon):
35 |     """
36 |     Cross-entrophy loss with label smooth.
37 | 
38 |     Args:
39 |         logits: A float tensor of shape: (minibatch, C).
40 |         targets: A float tensor of shape: (minibatch,). Stores the class indices
41 |                  in range `[0, C - 1]`.
42 |         epsilon: Smoothing level. Use one-hot label when set to 0, use uniform label when set to 1.
43 | 
44 |     Returns:
45 |         A scalar tensor.
46 |     """
47 |     log_probs = nn.functional.log_softmax(logits, dim=1)
48 |     targets = torch.zeros_like(log_probs).scatter_(1, targets.unsqueeze(1), 1)
49 |     targets = (1 - epsilon) * targets + epsilon / logits.shape[1]
50 |     loss = (-targets * log_probs).mean(0).sum()
51 |     return loss
52 | 


--------------------------------------------------------------------------------
/cvpods/configs/dynamic_routing_config.py:
--------------------------------------------------------------------------------
 1 | from .base_config import BaseConfig
 2 | 
 3 | _config_dict = dict(
 4 |     MODEL=dict(
 5 |         LOAD_PROPOSALS=False,
 6 |         MASK_ON=False,
 7 |         KEYPOINT_ON=False,
 8 |         BACKBONE=dict(FREEZE_AT=0,),
 9 |         RESNETS=dict(
10 |             OUT_FEATURES=["res2", "res3", "res4", "res5"],
11 |             NORM="nnSyncBN",
12 |             NUM_GROUPS=1,
13 |             WIDTH_PER_GROUP=64,
14 |             STRIDE_IN_1X1=True,
15 |             RES5_DILATION=1,
16 |             RES2_OUT_CHANNELS=256,
17 |             STEM_OUT_CHANNELS=64,
18 |             DEFORM_ON_PER_STAGE=[False, False, False, False],
19 |             DEFORM_MODULATED=False,
20 |             DEFORM_NUM_GROUPS=1,
21 |         ),
22 |         FPN=dict(
23 |             IN_FEATURES=[],
24 |             OUT_CHANNELS=256,
25 |             NORM="",
26 |             FUSE_TYPE="sum",
27 |         ),
28 |         SEM_SEG_HEAD=dict(
29 |             # NAME="SemSegFPNHead",
30 |             IN_FEATURES=[],
31 |             IGNORE_VALUE=255,
32 |             NUM_CLASSES=(),
33 |             CONVS_DIM=256,
34 |             COMMON_STRIDE=(),
35 |             NORM="GN",
36 |             LOSS_WEIGHT=1.0,
37 |         ),
38 |     ),
39 |     DATALOADER=dict(FILTER_EMPTY_ANNOTATIONS=False,),
40 |     SOLVER=dict(
41 |         LR_SCHEDULER=dict(
42 |             NAME="PolyLR",
43 |             POLY_POWER=0.9,
44 |             MAX_ITER=40000,
45 |             WARMUP_ITERS=1000,
46 |             WARMUP_FACTOR=0.001,
47 |             WARMUP_METHOD="linear",
48 |         ),
49 |         OPTIMIZER=dict(BASE_LR=0.01, ),
50 |         IMS_PER_BATCH=16,
51 |         CHECKPOINT_PERIOD=5000,
52 |     ),
53 |     TEST=dict(PRECISE_BN=dict(ENABLED=True), ),
54 | )
55 | 
56 | 
57 | class SemanticSegmentationConfig(BaseConfig):
58 |     def __init__(self):
59 |         super(SemanticSegmentationConfig, self).__init__()
60 |         self._register_configuration(_config_dict)
61 | 
62 | 
63 | config = SemanticSegmentationConfig()
64 | 


--------------------------------------------------------------------------------
/cvpods/modeling/backbone/backbone.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- encoding: utf-8 -*-
 3 | '''
 4 | @File               :   backbone.py
 5 | @Time               :   2020/05/07 23:58:08
 6 | @Author             :   Facebook, Inc. and its affiliates.
 7 | @Contact            :   poodarchu@gmail.com
 8 | @Last Modified by   :   Benjin Zhu
 9 | @Last Modified time :   2020/05/07 23:58:08
10 | '''
11 | 
12 | from abc import ABCMeta, abstractmethod
13 | 
14 | import torch.nn as nn
15 | 
16 | from cvpods.layers import ShapeSpec
17 | 
18 | __all__ = ["Backbone"]
19 | 
20 | 
21 | class Backbone(nn.Module, metaclass=ABCMeta):
22 |     """
23 |     Abstract base class for network backbones.
24 |     """
25 |     def __init__(self):
26 |         """
27 |         The `__init__` method of any subclass can specify its own set of arguments.
28 |         """
29 |         super().__init__()
30 | 
31 |     @abstractmethod
32 |     def forward(self):
33 |         """
34 |         Subclasses must override this method, but adhere to the same return type.
35 | 
36 |         Returns:
37 |             dict[str->Tensor]: mapping from feature name (e.g., "res2") to tensor
38 |         """
39 |         pass
40 | 
41 |     @property
42 |     def size_divisibility(self):
43 |         """
44 |         Some backbones require the input height and width to be divisible by a
45 |         specific integer. This is typically true for encoder / decoder type networks
46 |         with lateral connection (e.g., FPN) for which feature maps need to match
47 |         dimension in the "bottom up" and "top down" paths. Set to 0 if no specific
48 |         input size divisibility is required.
49 |         """
50 |         return 0
51 | 
52 |     def output_shape(self):
53 |         """
54 |         Returns:
55 |             dict[str->ShapeSpec]
56 |         """
57 |         # this is a backward-compatible default
58 |         return {
59 |             name: ShapeSpec(channels=self._out_feature_channels[name],
60 |                             stride=self._out_feature_strides[name])
61 |             for name in self._out_features
62 |         }
63 | 


--------------------------------------------------------------------------------
/cvpods/data/wrapped_dataset.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- encoding: utf-8 -*-
 3 | from types import SimpleNamespace
 4 | 
 5 | import numpy as np
 6 | 
 7 | from torch.utils.data.dataset import ConcatDataset as _ConcatDataset
 8 | 
 9 | from .registry import DATASETS
10 | 
11 | 
12 | @DATASETS.register()
13 | class ConcatDataset(_ConcatDataset):
14 |     """A wrapper of concatenated dataset.
15 |     Same as :obj:`torch.utils.data.dataset.ConcatDataset`, but
16 |     concat the group flag for image aspect ratio.
17 |     Args:
18 |         datasets (list[:obj:`Dataset`]): A list of datasets.
19 |     """
20 | 
21 |     def __init__(self, datasets):
22 |         super(ConcatDataset, self).__init__(datasets)
23 |         if hasattr(self.datasets[0], 'aspect_ratios'):
24 |             aspect_ratios = [d.aspect_ratios for d in self.datasets]
25 |             self.aspect_ratios = np.concatenate(aspect_ratios)
26 |         if hasattr(self.datasets[0], 'meta'):
27 |             self.meta = {}
28 |             for d in self.datasets:
29 |                 self.meta.update(d.meta)
30 |             self.meta = SimpleNamespace(**self.meta)
31 | 
32 | 
33 | @DATASETS.register()
34 | class RepeatDataset(object):
35 |     """A wrapper of repeated dataset.
36 |     The length of repeated dataset will be `times` larger than the original
37 |     dataset. This is useful when the data loading time is long but the dataset
38 |     is small. Using RepeatDataset can reduce the data loading time between
39 |     epochs.
40 |     Args:
41 |         dataset (:obj:`Dataset`): The dataset to be repeated.
42 |         times (int): Repeat times.
43 |     """
44 | 
45 |     def __init__(self, dataset, times):
46 |         self.dataset = dataset
47 |         self.times = times
48 |         if hasattr(self.dataset, 'aspect_ratios'):
49 |             self.aspect_ratios = np.tile(self.dataset.aspect_ratios, times)
50 | 
51 |         self._ori_len = len(self.dataset)
52 | 
53 |     def __getitem__(self, idx):
54 |         return self.dataset[idx % self._ori_len]
55 | 
56 |     def __len__(self):
57 |         return self.times * self._ori_len
58 | 


--------------------------------------------------------------------------------
/cvpods_playground/fcos.res50.1x/config.py:
--------------------------------------------------------------------------------
 1 | import os.path as osp
 2 | 
 3 | from cvpods.configs.fcos_config import FCOSConfig
 4 | 
 5 | _config_dict = dict(
 6 |     MODEL=dict(
 7 |         WEIGHTS="detectron2://ImageNetPretrained/MSRA/R-50.pkl",
 8 |         RESNETS=dict(DEPTH=50),
 9 |         FCOS=dict(
10 |             NUM_CONVS=4,
11 |             CENTERNESS_ON_REG=True,
12 |             NORM_REG_TARGETS=True,
13 |             NMS_THRESH_TEST=0.6,
14 |             BBOX_REG_WEIGHTS=(1.0, 1.0, 1.0, 1.0),
15 |             FOCAL_LOSS_GAMMA=2.0,
16 |             FOCAL_LOSS_ALPHA=0.25,
17 |             IOU_LOSS_TYPE="giou",
18 |             CENTER_SAMPLING_RADIUS=1.5,
19 |             OBJECT_SIZES_OF_INTEREST=[
20 |                 [-1, 64],
21 |                 [64, 128],
22 |                 [128, 256],
23 |                 [256, 512],
24 |                 [512, float("inf")],
25 |             ],
26 |             NORM_SYNC=True,
27 |         ),
28 |     ),
29 |     DATASETS=dict(
30 |         TRAIN=("coco_2017_train",),
31 |         TEST=("coco_2017_val",),
32 |     ),
33 |     SOLVER=dict(
34 |         IMS_PER_BATCH=16,
35 |         BASE_LR=0.01,
36 |         STEPS=(60000, 80000),
37 |         MAX_ITER=90000,
38 |     ),
39 |     INPUT=dict(
40 |         AUG=dict(
41 |             TRAIN_PIPELINES=[
42 |                 ("ResizeShortestEdge",
43 |                  dict(short_edge_length=(800,), max_size=1333, sample_style="choice")),
44 |                 ("RandomFlip", dict()),
45 |             ],
46 |             TEST_PIPELINES=[
47 |                 ("ResizeShortestEdge",
48 |                  dict(short_edge_length=800, max_size=1333, sample_style="choice")),
49 |             ],
50 |         )
51 |     ),
52 |     OUTPUT_DIR=osp.join(
53 |         '/data/Outputs/model_logs/cvpods_playground',
54 |         osp.split(osp.realpath(__file__))[0].split("playground/")[-1]
55 |     ),
56 | )
57 | 
58 | 
59 | class CustomFCOSConfig(FCOSConfig):
60 |     def __init__(self):
61 |         super(CustomFCOSConfig, self).__init__()
62 |         self._register_configuration(_config_dict)
63 | 
64 | 
65 | config = CustomFCOSConfig()
66 | 


--------------------------------------------------------------------------------
/cvpods/layers/csrc/tree_filter/refine.hpp:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include <torch/extension.h>
 3 | 
 4 | extern std::tuple<at::Tensor, at::Tensor, at::Tensor>
 5 |     tree_filter_refine_forward(
 6 |         const at::Tensor & feature_in_tensor, 
 7 |         const at::Tensor & edge_weight_tensor, 
 8 |         const at::Tensor & self_weight_tensor,
 9 |         const at::Tensor & sorted_index_tensor, 
10 |         const at::Tensor & sorted_parent_index_tensor, 
11 |         const at::Tensor & sorted_child_index_tensor 
12 |     );
13 | 
14 | extern at::Tensor tree_filter_refine_backward_feature(
15 |         const at::Tensor & feature_in_tensor, 
16 |         const at::Tensor & edge_weight_tensor, 
17 |         const at::Tensor & self_weight_tensor,
18 |         const at::Tensor & sorted_index_tensor, 
19 |         const at::Tensor & sorted_parent_tensor, 
20 |         const at::Tensor & sorted_child_tensor,
21 |         const at::Tensor & feature_aggr_tensor,
22 |         const at::Tensor & feature_aggr_up_tensor,
23 |         const at::Tensor & grad_out_tensor
24 |     );
25 | 
26 | extern at::Tensor tree_filter_refine_backward_edge_weight(
27 |         const at::Tensor & feature_in_tensor, 
28 |         const at::Tensor & edge_weight_tensor, 
29 |         const at::Tensor & self_weight_tensor,
30 |         const at::Tensor & sorted_index_tensor, 
31 |         const at::Tensor & sorted_parent_tensor, 
32 |         const at::Tensor & sorted_child_tensor,
33 |         const at::Tensor & feature_aggr_tensor,
34 |         const at::Tensor & feature_aggr_up_tensor,
35 |         const at::Tensor & grad_out_tensor
36 |     );
37 | 
38 | extern at::Tensor tree_filter_refine_backward_self_weight(
39 |         const at::Tensor & feature_in_tensor, 
40 |         const at::Tensor & edge_weight_tensor, 
41 |         const at::Tensor & self_weight_tensor,
42 |         const at::Tensor & sorted_index_tensor, 
43 |         const at::Tensor & sorted_parent_tensor, 
44 |         const at::Tensor & sorted_child_tensor,
45 |         const at::Tensor & feature_aggr_tensor,
46 |         const at::Tensor & feature_aggr_up_tensor,
47 |         const at::Tensor & grad_out_tensor
48 |     );
49 | 
50 | 


--------------------------------------------------------------------------------
/cvpods_playground/fcos.res50.1x.fix.d4/config.py:
--------------------------------------------------------------------------------
 1 | import os.path as osp
 2 | 
 3 | from cvpods.configs.fcos_config import FCOSConfig
 4 | 
 5 | _config_dict = dict(
 6 |     MODEL=dict(
 7 |         WEIGHTS="detectron2://ImageNetPretrained/MSRA/R-50.pkl",
 8 |         RESNETS=dict(DEPTH=50),
 9 |         FCOS=dict(
10 |             NUM_CONVS=8,
11 |             CENTERNESS_ON_REG=True,
12 |             NORM_REG_TARGETS=True,
13 |             NMS_THRESH_TEST=0.6,
14 |             BBOX_REG_WEIGHTS=(1.0, 1.0, 1.0, 1.0),
15 |             FOCAL_LOSS_GAMMA=2.0,
16 |             FOCAL_LOSS_ALPHA=0.25,
17 |             IOU_LOSS_TYPE="giou",
18 |             CENTER_SAMPLING_RADIUS=1.5,
19 |             OBJECT_SIZES_OF_INTEREST=[
20 |                 [-1, 64],
21 |                 [64, 128],
22 |                 [128, 256],
23 |                 [256, 512],
24 |                 [512, float("inf")],
25 |             ],
26 |             NORM_SYNC=True,
27 |             RESIZE_METHOD="bilinear",
28 |         ),
29 |     ),
30 |     DATASETS=dict(
31 |         TRAIN=("coco_2017_train",),
32 |         TEST=("coco_2017_val",),
33 |     ),
34 |     SOLVER=dict(
35 |         IMS_PER_BATCH=16,
36 |         BASE_LR=0.01,
37 |         STEPS=(60000, 80000),
38 |         MAX_ITER=90000,
39 |     ),
40 |     INPUT=dict(
41 |         AUG=dict(
42 |             TRAIN_PIPELINES=[
43 |                 ("ResizeShortestEdge",
44 |                  dict(short_edge_length=(800,), max_size=1333, sample_style="choice")),
45 |                 ("RandomFlip", dict()),
46 |             ],
47 |             TEST_PIPELINES=[
48 |                 ("ResizeShortestEdge",
49 |                  dict(short_edge_length=800, max_size=1333, sample_style="choice")),
50 |             ],
51 |         )
52 |     ),
53 |     OUTPUT_DIR=osp.join(
54 |         '/data/Outputs/model_logs/cvpods_playground',
55 |         osp.split(osp.realpath(__file__))[0].split("playground/")[-1]
56 |     ),
57 | )
58 | 
59 | 
60 | class CustomFCOSConfig(FCOSConfig):
61 |     def __init__(self):
62 |         super(CustomFCOSConfig, self).__init__()
63 |         self._register_configuration(_config_dict)
64 | 
65 | 
66 | config = CustomFCOSConfig()
67 | 


--------------------------------------------------------------------------------
/cvpods_playground/fcos.res50.1x.fix.d8/config.py:
--------------------------------------------------------------------------------
 1 | import os.path as osp
 2 | 
 3 | from cvpods.configs.fcos_config import FCOSConfig
 4 | 
 5 | _config_dict = dict(
 6 |     MODEL=dict(
 7 |         WEIGHTS="detectron2://ImageNetPretrained/MSRA/R-50.pkl",
 8 |         RESNETS=dict(DEPTH=50),
 9 |         FCOS=dict(
10 |             NUM_CONVS=16,
11 |             CENTERNESS_ON_REG=True,
12 |             NORM_REG_TARGETS=True,
13 |             NMS_THRESH_TEST=0.6,
14 |             BBOX_REG_WEIGHTS=(1.0, 1.0, 1.0, 1.0),
15 |             FOCAL_LOSS_GAMMA=2.0,
16 |             FOCAL_LOSS_ALPHA=0.25,
17 |             IOU_LOSS_TYPE="giou",
18 |             CENTER_SAMPLING_RADIUS=1.5,
19 |             OBJECT_SIZES_OF_INTEREST=[
20 |                 [-1, 64],
21 |                 [64, 128],
22 |                 [128, 256],
23 |                 [256, 512],
24 |                 [512, float("inf")],
25 |             ],
26 |             NORM_SYNC=True,
27 |             RESIZE_METHOD="bilinear",
28 |         ),
29 |     ),
30 |     DATASETS=dict(
31 |         TRAIN=("coco_2017_train",),
32 |         TEST=("coco_2017_val",),
33 |     ),
34 |     SOLVER=dict(
35 |         IMS_PER_BATCH=16,
36 |         BASE_LR=0.01,
37 |         STEPS=(60000, 80000),
38 |         MAX_ITER=90000,
39 |     ),
40 |     INPUT=dict(
41 |         AUG=dict(
42 |             TRAIN_PIPELINES=[
43 |                 ("ResizeShortestEdge",
44 |                  dict(short_edge_length=(800,), max_size=1333, sample_style="choice")),
45 |                 ("RandomFlip", dict()),
46 |             ],
47 |             TEST_PIPELINES=[
48 |                 ("ResizeShortestEdge",
49 |                  dict(short_edge_length=800, max_size=1333, sample_style="choice")),
50 |             ],
51 |         )
52 |     ),
53 |     OUTPUT_DIR=osp.join(
54 |         '/data/Outputs/model_logs/cvpods_playground',
55 |         osp.split(osp.realpath(__file__))[0].split("playground/")[-1]
56 |     ),
57 | )
58 | 
59 | 
60 | class CustomFCOSConfig(FCOSConfig):
61 |     def __init__(self):
62 |         super(CustomFCOSConfig, self).__init__()
63 |         self._register_configuration(_config_dict)
64 | 
65 | 
66 | config = CustomFCOSConfig()
67 | 


--------------------------------------------------------------------------------
/cvpods/layers/deform_conv_with_off.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python3
 2 | # -*- coding:utf-8 -*-
 3 | import torch
 4 | import torch.nn as nn
 5 | 
 6 | from .deform_conv import DeformConv, ModulatedDeformConv
 7 | 
 8 | 
 9 | class DeformConvWithOff(nn.Module):
10 | 
11 |     def __init__(self, in_channels, out_channels,
12 |                  kernel_size=3, stride=1, padding=1,
13 |                  dilation=1, deformable_groups=1):
14 |         super(DeformConvWithOff, self).__init__()
15 |         self.offset_conv = nn.Conv2d(
16 |             in_channels,
17 |             deformable_groups * 2 * kernel_size * kernel_size,
18 |             kernel_size=kernel_size,
19 |             stride=stride,
20 |             padding=padding,
21 |         )
22 |         self.dcn = DeformConv(
23 |             in_channels, out_channels, kernel_size=kernel_size,
24 |             stride=stride, padding=padding, dilation=dilation,
25 |             deformable_groups=deformable_groups,
26 |         )
27 | 
28 |     def forward(self, input):
29 |         offset = self.offset_conv(input)
30 |         output = self.dcn(input, offset)
31 |         return output
32 | 
33 | 
34 | class ModulatedDeformConvWithOff(nn.Module):
35 | 
36 |     def __init__(self, in_channels, out_channels,
37 |                  kernel_size=3, stride=1, padding=1,
38 |                  dilation=1, deformable_groups=1):
39 |         super(ModulatedDeformConvWithOff, self).__init__()
40 |         self.offset_mask_conv = nn.Conv2d(
41 |             in_channels,
42 |             deformable_groups * 3 * kernel_size * kernel_size,
43 |             kernel_size=kernel_size,
44 |             stride=stride,
45 |             padding=padding,
46 |         )
47 |         self.dcnv2 = ModulatedDeformConv(
48 |             in_channels, out_channels, kernel_size=kernel_size,
49 |             stride=stride, padding=padding, dilation=dilation,
50 |             deformable_groups=deformable_groups,
51 |         )
52 | 
53 |     def forward(self, input):
54 |         x = self.offset_mask_conv(input)
55 |         o1, o2, mask = torch.chunk(x, 3, dim=1)
56 |         offset = torch.cat((o1, o2), dim=1)
57 |         mask = torch.sigmoid(mask)
58 |         output = self.dcnv2(input, offset, mask)
59 |         return output
60 | 


--------------------------------------------------------------------------------
/cvpods/modeling/proposal_generator/proposal_utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | import math
 3 | 
 4 | import torch
 5 | 
 6 | from cvpods.structures import Instances
 7 | 
 8 | 
 9 | def add_ground_truth_to_proposals(gt_boxes, proposals):
10 |     """
11 |     Call `add_ground_truth_to_proposals_single_image` for all images.
12 | 
13 |     Args:
14 |         gt_boxes(list[Boxes]): list of N elements. Element i is a Boxes
15 |             representing the gound-truth for image i.
16 |         proposals (list[Instances]): list of N elements. Element i is a Instances
17 |             representing the proposals for image i.
18 | 
19 |     Returns:
20 |         list[Instances]: list of N Instances. Each is the proposals for the image,
21 |             with field "proposal_boxes" and "objectness_logits".
22 |     """
23 |     assert gt_boxes is not None
24 | 
25 |     assert len(proposals) == len(gt_boxes)
26 |     if len(proposals) == 0:
27 |         return proposals
28 | 
29 |     return [
30 |         add_ground_truth_to_proposals_single_image(gt_boxes_i, proposals_i)
31 |         for gt_boxes_i, proposals_i in zip(gt_boxes, proposals)
32 |     ]
33 | 
34 | 
35 | def add_ground_truth_to_proposals_single_image(gt_boxes, proposals):
36 |     """
37 |     Augment `proposals` with ground-truth boxes from `gt_boxes`.
38 | 
39 |     Args:
40 |         Same as `add_ground_truth_to_proposals`, but with gt_boxes and proposals
41 |         per image.
42 | 
43 |     Returns:
44 |         Same as `add_ground_truth_to_proposals`, but for only one image.
45 |     """
46 |     device = proposals.objectness_logits.device
47 |     # Concatenating gt_boxes with proposals requires them to have the same fields
48 |     # Assign all ground-truth boxes an objectness logit corresponding to P(object) \approx 1.
49 |     gt_logit_value = math.log((1.0 - 1e-10) / (1 - (1.0 - 1e-10)))
50 | 
51 |     gt_logits = gt_logit_value * torch.ones(len(gt_boxes), device=device)
52 |     gt_proposal = Instances(proposals.image_size)
53 | 
54 |     gt_proposal.proposal_boxes = gt_boxes
55 |     gt_proposal.objectness_logits = gt_logits
56 |     new_proposals = Instances.cat([proposals, gt_proposal])
57 | 
58 |     return new_proposals
59 | 


--------------------------------------------------------------------------------
/cvpods/data/samplers/grouped_batch_sampler.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | import numpy as np
 3 | 
 4 | from torch.utils.data.sampler import BatchSampler, Sampler
 5 | 
 6 | from ..registry import SAMPLERS
 7 | 
 8 | 
 9 | @SAMPLERS.register()
10 | class GroupedBatchSampler(BatchSampler):
11 |     """
12 |     Wraps another sampler to yield a mini-batch of indices.
13 |     It enforces that the batch only contain elements from the same group.
14 |     It also tries to provide mini-batches which follows an ordering which is
15 |     as close as possible to the ordering from the original sampler.
16 |     """
17 | 
18 |     def __init__(self, sampler, group_ids, batch_size):
19 |         """
20 |         Args:
21 |             sampler (Sampler): Base sampler.
22 |             group_ids (list[int]): If the sampler produces indices in range [0, N),
23 |                 `group_ids` must be a list of `N` ints which contains the group id of each sample.
24 |                 The group ids must be a set of integers in the range [0, num_groups).
25 |             batch_size (int): Size of mini-batch.
26 |         """
27 |         if not isinstance(sampler, Sampler):
28 |             raise ValueError(
29 |                 "sampler should be an instance of "
30 |                 "torch.utils.data.Sampler, but got sampler={}".format(sampler)
31 |             )
32 |         self.sampler = sampler
33 |         self.group_ids = np.asarray(group_ids)
34 |         assert self.group_ids.ndim == 1
35 |         self.batch_size = batch_size
36 |         groups = np.unique(self.group_ids).tolist()
37 | 
38 |         # buffer the indices of each group until batch size is reached
39 |         self.buffer_per_group = {k: [] for k in groups}
40 | 
41 |     def __iter__(self):
42 |         for idx in self.sampler:
43 |             group_id = self.group_ids[idx]
44 |             group_buffer = self.buffer_per_group[group_id]
45 |             group_buffer.append(idx)
46 |             if len(group_buffer) == self.batch_size:
47 |                 yield group_buffer[:]  # yield a copy of the list
48 |                 del group_buffer[:]
49 | 
50 |     def __len__(self):
51 |         raise NotImplementedError("len() of GroupedBatchSampler is not well-defined.")
52 | 


--------------------------------------------------------------------------------
/cvpods/layers/csrc/tree_filter/rst.cu:
--------------------------------------------------------------------------------
 1 | #include <thread>
 2 | #include <iostream>
 3 | #include <stdlib.h>
 4 | #include <fstream>
 5 | 
 6 | #include <ATen/ATen.h>
 7 | #include <ATen/cuda/CUDAContext.h>
 8 | 
 9 | #include <THC/THC.h>
10 | #include <THC/THCAtomics.cuh>
11 | #include <THC/THCDeviceUtils.cuh>
12 | 
13 | #include "boruvka_rst.hpp"
14 | 
15 | static void forward_kernel(int * edge_index, float * edge_weight, int * edge_out, int vertex_count, int edge_count){
16 |     struct Graph * g = create_graph(vertex_count, edge_count);
17 |     for (int i = 0; i < edge_count; ++i){
18 |         g->edge[i].src = edge_index[i * 2];
19 |         g->edge[i].dest = edge_index[i * 2 + 1];
20 |         g->edge[i].weight = edge_weight[i];
21 |     }
22 |     
23 |     boruvka_rst(g, edge_out);
24 | 
25 |     delete[] g->edge;
26 |     delete[] g;
27 | }
28 |     
29 | at::Tensor rst_forward(
30 |             const at::Tensor & edge_index_tensor,
31 |             const at::Tensor & edge_weight_tensor,
32 |             int vertex_count){
33 |     unsigned batch_size = edge_index_tensor.size(0);
34 |     unsigned edge_count = edge_index_tensor.size(1);
35 |     
36 |     auto edge_index_cpu   = edge_index_tensor.cpu();
37 |     auto edge_weight_cpu  = edge_weight_tensor.cpu(); 
38 |     auto edge_out_cpu     = at::empty({batch_size, vertex_count - 1, 2}, edge_index_cpu.options());
39 |     
40 |     int * edge_out      = edge_out_cpu.contiguous().data<int>();
41 |     int * edge_index    = edge_index_cpu.contiguous().data<int>();
42 |     float * edge_weight = edge_weight_cpu.contiguous().data<float>(); 
43 | 
44 |     // Loop for batch
45 |     std::thread pids[batch_size];
46 |     for (unsigned i = 0; i < batch_size; i++){
47 |         auto edge_index_iter  = edge_index + i * edge_count * 2;
48 |         auto edge_weight_iter = edge_weight + i * edge_count;
49 |         auto edge_out_iter    = edge_out + i * (vertex_count - 1) * 2;
50 |         pids[i] = std::thread(forward_kernel, edge_index_iter, edge_weight_iter, edge_out_iter, vertex_count, edge_count);
51 |     }
52 |     
53 |     for (unsigned i = 0; i < batch_size; i++){
54 |         pids[i].join();
55 |     }
56 |     
57 |     auto edge_out_tensor = edge_out_cpu.to(edge_index_tensor.device());
58 |     
59 |     return edge_out_tensor;
60 | }
61 | 
62 | 


--------------------------------------------------------------------------------
/cvpods/configs/fcos_config.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- encoding: utf-8 -*-
 3 | '''
 4 | @File               :   fcos_config.py
 5 | @Time               :   2020/05/07 23:56:09
 6 | @Author             :   Benjin Zhu
 7 | @Contact            :   poodarchu@gmail.com
 8 | @Last Modified by   :   Benjin Zhu
 9 | @Last Modified time :   2020/05/07 23:56:09
10 | '''
11 | 
12 | from .base_detection_config import BaseDetectionConfig
13 | 
14 | _config_dict = dict(
15 |     MODEL=dict(
16 |         # META_ARCHITECTURE="RetinaNet",
17 |         RESNETS=dict(OUT_FEATURES=["res3", "res4", "res5"]),
18 |         FPN=dict(IN_FEATURES=["res3", "res4", "res5"]),
19 |         SHIFT_GENERATOR=dict(
20 |             NUM_SHIFTS=1,
21 |             # Relative offset between the center of the first shift and the top-left corner of img
22 |             # Units: fraction of feature map stride (e.g., 0.5 means half stride)
23 |             # Allowed values are floats in [0, 1) range inclusive.
24 |             # Recommended value is 0.5, although it is not expected to affect model accuracy.
25 |             OFFSET=0.0,
26 |         ),
27 |         FCOS=dict(
28 |             NUM_CLASSES=80,
29 |             IN_FEATURES=["p3", "p4", "p5", "p6", "p7"],
30 |             NUM_CONVS=4,
31 |             FPN_STRIDES=[8, 16, 32, 64, 128],
32 |             PRIOR_PROB=0.01,
33 |             CENTERNESS_ON_REG=False,
34 |             NORM_REG_TARGETS=False,
35 |             SCORE_THRESH_TEST=0.05,
36 |             TOPK_CANDIDATES_TEST=1000,
37 |             NMS_THRESH_TEST=0.6,
38 |             BBOX_REG_WEIGHTS=(1.0, 1.0, 1.0, 1.0),
39 |             FOCAL_LOSS_GAMMA=2.0,
40 |             FOCAL_LOSS_ALPHA=0.25,
41 |             IOU_LOSS_TYPE="iou",
42 |             CENTER_SAMPLING_RADIUS=0.0,
43 |             OBJECT_SIZES_OF_INTEREST=[
44 |                 [-1, 64],
45 |                 [64, 128],
46 |                 [128, 256],
47 |                 [256, 512],
48 |                 [512, float("inf")],
49 |             ],
50 |             NORM_SYNC=True,
51 |             BUDGET_LOSS_LAMBDA=0,
52 |         ),
53 |     ),
54 | )
55 | 
56 | 
57 | class FCOSConfig(BaseDetectionConfig):
58 |     def __init__(self):
59 |         super(FCOSConfig, self).__init__()
60 |         self._register_configuration(_config_dict)
61 | 
62 | 
63 | config = FCOSConfig()
64 | 


--------------------------------------------------------------------------------
/cvpods/layers/swap_align2nat.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | from torch import nn
 3 | from torch.autograd import Function
 4 | from torch.autograd.function import once_differentiable
 5 | 
 6 | from cvpods import _C
 7 | 
 8 | 
 9 | class _SwapAlign2Nat(Function):
10 |     @staticmethod
11 |     def forward(ctx, X, lambda_val, pad_val):
12 |         ctx.lambda_val = lambda_val
13 |         ctx.input_shape = X.size()
14 | 
15 |         Y = _C.swap_align2nat_forward(X, lambda_val, pad_val)
16 |         return Y
17 | 
18 |     @staticmethod
19 |     @once_differentiable
20 |     def backward(ctx, gY):
21 |         lambda_val = ctx.lambda_val
22 |         bs, ch, h, w = ctx.input_shape
23 | 
24 |         gX = _C.swap_align2nat_backward(gY, lambda_val, bs, ch, h, w)
25 | 
26 |         return gX, None, None
27 | 
28 | 
29 | swap_align2nat = _SwapAlign2Nat.apply
30 | 
31 | 
32 | class SwapAlign2Nat(nn.Module):
33 |     """
34 |         The op `SwapAlign2Nat` described in https://arxiv.org/abs/1903.12174.
35 |         Given an input tensor that predicts masks of shape (N, C=VxU, H, W),
36 |         apply the op, it will return masks of shape (N, V'xU', H', W') where
37 |         the unit lengths of (V, U) and (H, W) are swapped, and the mask representation
38 |         is transformed from aligned to natural.
39 |         Args:
40 |             lambda_val (int): the relative unit length ratio between (V, U) and (H, W),
41 |             as we always have larger unit lengths for (V, U) than (H, W),
42 |             lambda_val is always >= 1.
43 |             pad_val (float): padding value for the values falling outside of the input
44 |             tensor, default set to -6 as sigmoid(-6) is ~0, indicating
45 |             that is no masks outside of the tensor.
46 |     """
47 | 
48 |     def __init__(self, lambda_val, pad_val=-6.0):
49 |         super(SwapAlign2Nat, self).__init__()
50 |         self.lambda_val = lambda_val
51 |         self.pad_val = pad_val
52 | 
53 |     def forward(self, X):
54 |         return swap_align2nat(X, self.lambda_val, self.pad_val)
55 | 
56 |     def __repr__(self):
57 |         tmpstr = self.__class__.__name__ + "("
58 |         tmpstr += "lambda_val=" + str(self.lambda_val)
59 |         tmpstr += ", pad_val=" + str(self.pad_val)
60 |         tmpstr += ")"
61 |         return tmpstr
62 | 


--------------------------------------------------------------------------------
/cvpods_playground/fcos.res50.1x.dynamic.d4.lambda-0_1/config.py:
--------------------------------------------------------------------------------
 1 | import os.path as osp
 2 | 
 3 | from cvpods.configs.fcos_config import FCOSConfig
 4 | 
 5 | _config_dict = dict(
 6 |     MODEL=dict(
 7 |         WEIGHTS="detectron2://ImageNetPretrained/MSRA/R-50.pkl",
 8 |         RESNETS=dict(DEPTH=50),
 9 |         FCOS=dict(
10 |             NUM_CONVS=8,
11 |             CENTERNESS_ON_REG=True,
12 |             NORM_REG_TARGETS=True,
13 |             NMS_THRESH_TEST=0.6,
14 |             BBOX_REG_WEIGHTS=(1.0, 1.0, 1.0, 1.0),
15 |             FOCAL_LOSS_GAMMA=2.0,
16 |             FOCAL_LOSS_ALPHA=0.25,
17 |             IOU_LOSS_TYPE="giou",
18 |             CENTER_SAMPLING_RADIUS=1.5,
19 |             OBJECT_SIZES_OF_INTEREST=[
20 |                 [-1, 64],
21 |                 [64, 128],
22 |                 [128, 256],
23 |                 [256, 512],
24 |                 [512, float("inf")],
25 |             ],
26 |             NORM_SYNC=True,
27 |             NUM_GROUPS=1,
28 |             GATE_ACTIVATION="GeReTanH",
29 |             GATE_ACTIVATION_KARGS=dict(tau=1.5),
30 |             RESIZE_METHOD="bilinear",
31 |             BUDGET_LOSS_LAMBDA=0.1,
32 |         ),
33 |     ),
34 |     DATASETS=dict(
35 |         TRAIN=("coco_2017_train",),
36 |         TEST=("coco_2017_val",),
37 |     ),
38 |     SOLVER=dict(
39 |         IMS_PER_BATCH=16,
40 |         BASE_LR=0.01,
41 |         STEPS=(60000, 80000),
42 |         MAX_ITER=90000,
43 |     ),
44 |     INPUT=dict(
45 |         AUG=dict(
46 |             TRAIN_PIPELINES=[
47 |                 ("ResizeShortestEdge",
48 |                  dict(short_edge_length=(800,), max_size=1333, sample_style="choice")),
49 |                 ("RandomFlip", dict()),
50 |             ],
51 |             TEST_PIPELINES=[
52 |                 ("ResizeShortestEdge",
53 |                  dict(short_edge_length=800, max_size=1333, sample_style="choice")),
54 |             ],
55 |         )
56 |     ),
57 |     OUTPUT_DIR=osp.join(
58 |         '/data/Outputs/model_logs/cvpods_playground',
59 |         osp.split(osp.realpath(__file__))[0].split("playground/")[-1]
60 |     ),
61 | )
62 | 
63 | 
64 | class CustomFCOSConfig(FCOSConfig):
65 |     def __init__(self):
66 |         super(CustomFCOSConfig, self).__init__()
67 |         self._register_configuration(_config_dict)
68 | 
69 | 
70 | config = CustomFCOSConfig()
71 | 


--------------------------------------------------------------------------------
/cvpods_playground/fcos.res50.1x.dynamic.d8.lambda-0/config.py:
--------------------------------------------------------------------------------
 1 | import os.path as osp
 2 | 
 3 | from cvpods.configs.fcos_config import FCOSConfig
 4 | 
 5 | _config_dict = dict(
 6 |     MODEL=dict(
 7 |         WEIGHTS="detectron2://ImageNetPretrained/MSRA/R-50.pkl",
 8 |         RESNETS=dict(DEPTH=50),
 9 |         FCOS=dict(
10 |             NUM_CONVS=16,
11 |             CENTERNESS_ON_REG=True,
12 |             NORM_REG_TARGETS=True,
13 |             NMS_THRESH_TEST=0.6,
14 |             BBOX_REG_WEIGHTS=(1.0, 1.0, 1.0, 1.0),
15 |             FOCAL_LOSS_GAMMA=2.0,
16 |             FOCAL_LOSS_ALPHA=0.25,
17 |             IOU_LOSS_TYPE="giou",
18 |             CENTER_SAMPLING_RADIUS=1.5,
19 |             OBJECT_SIZES_OF_INTEREST=[
20 |                 [-1, 64],
21 |                 [64, 128],
22 |                 [128, 256],
23 |                 [256, 512],
24 |                 [512, float("inf")],
25 |             ],
26 |             NORM_SYNC=True,
27 |             NUM_GROUPS=1,
28 |             GATE_ACTIVATION="GeReTanH",
29 |             GATE_ACTIVATION_KARGS=dict(tau=1.5),
30 |             RESIZE_METHOD="bilinear",
31 |             BUDGET_LOSS_LAMBDA=0.0,
32 |         ),
33 |     ),
34 |     DATASETS=dict(
35 |         TRAIN=("coco_2017_train",),
36 |         TEST=("coco_2017_val",),
37 |     ),
38 |     SOLVER=dict(
39 |         IMS_PER_BATCH=16,
40 |         BASE_LR=0.01,
41 |         STEPS=(60000, 80000),
42 |         MAX_ITER=90000,
43 |     ),
44 |     INPUT=dict(
45 |         AUG=dict(
46 |             TRAIN_PIPELINES=[
47 |                 ("ResizeShortestEdge",
48 |                  dict(short_edge_length=(800,), max_size=1333, sample_style="choice")),
49 |                 ("RandomFlip", dict()),
50 |             ],
51 |             TEST_PIPELINES=[
52 |                 ("ResizeShortestEdge",
53 |                  dict(short_edge_length=800, max_size=1333, sample_style="choice")),
54 |             ],
55 |         )
56 |     ),
57 |     OUTPUT_DIR=osp.join(
58 |         '/data/Outputs/model_logs/cvpods_playground',
59 |         osp.split(osp.realpath(__file__))[0].split("playground/")[-1]
60 |     ),
61 | )
62 | 
63 | 
64 | class CustomFCOSConfig(FCOSConfig):
65 |     def __init__(self):
66 |         super(CustomFCOSConfig, self).__init__()
67 |         self._register_configuration(_config_dict)
68 | 
69 | 
70 | config = CustomFCOSConfig()
71 | 


--------------------------------------------------------------------------------
/cvpods_playground/fcos.res50.1x.dynamic.d8.lambda-0_1/config.py:
--------------------------------------------------------------------------------
 1 | import os.path as osp
 2 | 
 3 | from cvpods.configs.fcos_config import FCOSConfig
 4 | 
 5 | _config_dict = dict(
 6 |     MODEL=dict(
 7 |         WEIGHTS="detectron2://ImageNetPretrained/MSRA/R-50.pkl",
 8 |         RESNETS=dict(DEPTH=50),
 9 |         FCOS=dict(
10 |             NUM_CONVS=16,
11 |             CENTERNESS_ON_REG=True,
12 |             NORM_REG_TARGETS=True,
13 |             NMS_THRESH_TEST=0.6,
14 |             BBOX_REG_WEIGHTS=(1.0, 1.0, 1.0, 1.0),
15 |             FOCAL_LOSS_GAMMA=2.0,
16 |             FOCAL_LOSS_ALPHA=0.25,
17 |             IOU_LOSS_TYPE="giou",
18 |             CENTER_SAMPLING_RADIUS=1.5,
19 |             OBJECT_SIZES_OF_INTEREST=[
20 |                 [-1, 64],
21 |                 [64, 128],
22 |                 [128, 256],
23 |                 [256, 512],
24 |                 [512, float("inf")],
25 |             ],
26 |             NORM_SYNC=True,
27 |             NUM_GROUPS=1,
28 |             GATE_ACTIVATION="GeReTanH",
29 |             GATE_ACTIVATION_KARGS=dict(tau=1.5),
30 |             RESIZE_METHOD="bilinear",
31 |             BUDGET_LOSS_LAMBDA=0.1,
32 |         ),
33 |     ),
34 |     DATASETS=dict(
35 |         TRAIN=("coco_2017_train",),
36 |         TEST=("coco_2017_val",),
37 |     ),
38 |     SOLVER=dict(
39 |         IMS_PER_BATCH=16,
40 |         BASE_LR=0.01,
41 |         STEPS=(60000, 80000),
42 |         MAX_ITER=90000,
43 |     ),
44 |     INPUT=dict(
45 |         AUG=dict(
46 |             TRAIN_PIPELINES=[
47 |                 ("ResizeShortestEdge",
48 |                  dict(short_edge_length=(800,), max_size=1333, sample_style="choice")),
49 |                 ("RandomFlip", dict()),
50 |             ],
51 |             TEST_PIPELINES=[
52 |                 ("ResizeShortestEdge",
53 |                  dict(short_edge_length=800, max_size=1333, sample_style="choice")),
54 |             ],
55 |         )
56 |     ),
57 |     OUTPUT_DIR=osp.join(
58 |         '/data/Outputs/model_logs/cvpods_playground',
59 |         osp.split(osp.realpath(__file__))[0].split("playground/")[-1]
60 |     ),
61 | )
62 | 
63 | 
64 | class CustomFCOSConfig(FCOSConfig):
65 |     def __init__(self):
66 |         super(CustomFCOSConfig, self).__init__()
67 |         self._register_configuration(_config_dict)
68 | 
69 | 
70 | config = CustomFCOSConfig()
71 | 


--------------------------------------------------------------------------------
/cvpods_playground/fcos.res50.1x.dynamic.d8.lambda-0_8/config.py:
--------------------------------------------------------------------------------
 1 | import os.path as osp
 2 | 
 3 | from cvpods.configs.fcos_config import FCOSConfig
 4 | 
 5 | _config_dict = dict(
 6 |     MODEL=dict(
 7 |         WEIGHTS="detectron2://ImageNetPretrained/MSRA/R-50.pkl",
 8 |         RESNETS=dict(DEPTH=50),
 9 |         FCOS=dict(
10 |             NUM_CONVS=16,
11 |             CENTERNESS_ON_REG=True,
12 |             NORM_REG_TARGETS=True,
13 |             NMS_THRESH_TEST=0.6,
14 |             BBOX_REG_WEIGHTS=(1.0, 1.0, 1.0, 1.0),
15 |             FOCAL_LOSS_GAMMA=2.0,
16 |             FOCAL_LOSS_ALPHA=0.25,
17 |             IOU_LOSS_TYPE="giou",
18 |             CENTER_SAMPLING_RADIUS=1.5,
19 |             OBJECT_SIZES_OF_INTEREST=[
20 |                 [-1, 64],
21 |                 [64, 128],
22 |                 [128, 256],
23 |                 [256, 512],
24 |                 [512, float("inf")],
25 |             ],
26 |             NORM_SYNC=True,
27 |             NUM_GROUPS=1,
28 |             GATE_ACTIVATION="GeReTanH",
29 |             GATE_ACTIVATION_KARGS=dict(tau=1.5),
30 |             RESIZE_METHOD="bilinear",
31 |             BUDGET_LOSS_LAMBDA=0.8,
32 |         ),
33 |     ),
34 |     DATASETS=dict(
35 |         TRAIN=("coco_2017_train",),
36 |         TEST=("coco_2017_val",),
37 |     ),
38 |     SOLVER=dict(
39 |         IMS_PER_BATCH=16,
40 |         BASE_LR=0.01,
41 |         STEPS=(60000, 80000),
42 |         MAX_ITER=90000,
43 |     ),
44 |     INPUT=dict(
45 |         AUG=dict(
46 |             TRAIN_PIPELINES=[
47 |                 ("ResizeShortestEdge",
48 |                  dict(short_edge_length=(800,), max_size=1333, sample_style="choice")),
49 |                 ("RandomFlip", dict()),
50 |             ],
51 |             TEST_PIPELINES=[
52 |                 ("ResizeShortestEdge",
53 |                  dict(short_edge_length=800, max_size=1333, sample_style="choice")),
54 |             ],
55 |         )
56 |     ),
57 |     OUTPUT_DIR=osp.join(
58 |         '/data/Outputs/model_logs/cvpods_playground',
59 |         osp.split(osp.realpath(__file__))[0].split("playground/")[-1]
60 |     ),
61 | )
62 | 
63 | 
64 | class CustomFCOSConfig(FCOSConfig):
65 |     def __init__(self):
66 |         super(CustomFCOSConfig, self).__init__()
67 |         self._register_configuration(_config_dict)
68 | 
69 | 
70 | config = CustomFCOSConfig()
71 | 


--------------------------------------------------------------------------------
/tools/dev/parse_results.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 3 | 
 4 | # A shell script that parses metrics from the log file.
 5 | # Make it easier for developers to track performance of models.
 6 | 
 7 | LOG="$1"
 8 | 
 9 | if [[ -z "$LOG" ]]; then
10 | 	echo "Usage: $0 /path/to/log/file"
11 | 	exit 1
12 | fi
13 | 
14 | # [12/15 11:47:32] trainer INFO: Total training time: 12:15:04.446477 (0.4900 s / it)
15 | # [12/15 11:49:03] inference INFO: Total inference time: 0:01:25.326167 (0.13652186737060548 s / img per device, on 8 devices)
16 | # [12/15 11:49:03] inference INFO: Total inference pure compute time: .....
17 | 
18 | # training time
19 | trainspeed=$(grep -o 'Overall training.*' "$LOG" | grep -Eo '\(.*\)' | grep -o '[0-9\.]*')
20 | echo "Training speed: $trainspeed s/it"
21 | 
22 | # inference time: there could be multiple inference during training
23 | inferencespeed=$(grep -o 'Total inference pure.*' "$LOG" | tail -n1 | grep -Eo '\(.*\)' | grep -o '[0-9\.]*' | head -n1)
24 | echo "Inference speed: $inferencespeed s/it"
25 | 
26 | # [12/15 11:47:18] trainer INFO: eta: 0:00:00  iter: 90000  loss: 0.5407 (0.7256)  loss_classifier: 0.1744 (0.2446)  loss_box_reg: 0.0838 (0.1160)  loss_mask: 0.2159 (0.2722)  loss_objectness: 0.0244 (0.0429)  loss_rpn_box_reg: 0.0279 (0.0500)  time: 0.4487 (0.4899)  data: 0.0076 (0.0975) lr: 0.000200  max mem: 4161
27 | memory=$(grep -o 'max[_ ]mem: [0-9]*' "$LOG" | tail -n1 | grep -o '[0-9]*')
28 | echo "Training memory: $memory MB"
29 | 
30 | echo "Easy to copypaste:"
31 | echo "$trainspeed","$inferencespeed","$memory"
32 | 
33 | echo "------------------------------"
34 | 
35 | # [12/26 17:26:32] engine.coco_evaluation: copypaste: Task: bbox
36 | # [12/26 17:26:32] engine.coco_evaluation: copypaste: AP,AP50,AP75,APs,APm,APl
37 | # [12/26 17:26:32] engine.coco_evaluation: copypaste: 0.0017,0.0024,0.0017,0.0005,0.0019,0.0011
38 | # [12/26 17:26:32] engine.coco_evaluation: copypaste: Task: segm
39 | # [12/26 17:26:32] engine.coco_evaluation: copypaste: AP,AP50,AP75,APs,APm,APl
40 | # [12/26 17:26:32] engine.coco_evaluation: copypaste: 0.0014,0.0021,0.0016,0.0005,0.0016,0.0011
41 | 
42 | echo "COCO Results:"
43 | num_tasks=$(grep -o 'copypaste:.*Task.*' "$LOG" | sort -u | wc -l)
44 | # each task has 3 lines
45 | grep -o 'copypaste:.*' "$LOG" | cut -d ' ' -f 2- | tail -n $((num_tasks * 3))
46 | 


--------------------------------------------------------------------------------
/cvpods/configs/keypoint_config.py:
--------------------------------------------------------------------------------
 1 | from .base_detection_config import BaseDetectionConfig
 2 | 
 3 | _config_dict = dict(
 4 |     MODEL=dict(
 5 |         KEYPOINT_ON=True,
 6 |         ROI_KEYPOINT_HEAD=dict(
 7 |             NAME="KRCNNConvDeconvUpsampleHead",
 8 |             POOLER_RESOLUTION=14,
 9 |             POOLER_SAMPLING_RATIO=0,
10 |             CONV_DIMS=tuple(512 for _ in range(8)),
11 |             NUM_KEYPOINTS=17,  # 17 is the number of keypoints in COCO
12 |             # Images with too few (or no) keypoints are excluded from training.
13 |             MIN_KEYPOINTS_PER_IMAGE=1,
14 |             # Normalize by the total number of visible keypoints in the minibatch if True.
15 |             # Otherwise, normalize by the total number of keypoints that could ever exist
16 |             # in the minibatch.
17 |             # The keypoint softmax loss is only calculated on visible keypoints.
18 |             # Since the number of visible keypoints can vary significantly between
19 |             # minibatches, this has the effect of up-weighting the importance of
20 |             # minibatches with few visible keypoints. (Imagine the extreme case of
21 |             # only one visible keypoint versus N: in the case of N, each one
22 |             # contributes 1/N to the gradient compared to the single keypoint
23 |             # determining the gradient direction). Instead, we can normalize the
24 |             # loss by the total number of keypoints, if it were the case that all
25 |             # keypoints were visible in a full minibatch. (Returning to the example,
26 |             # this means that the one visible keypoint contributes as much as each
27 |             # of the N keypoints.)
28 |             NORMALIZE_LOSS_BY_VISIBLE_KEYPOINTS=True,
29 |             # Multi-task loss weight to use for keypoints
30 |             # Recommended values:
31 |             #   - use 1.0 if NORMALIZE_LOSS_BY_VISIBLE_KEYPOINTS is True
32 |             #   - use 4.0 if NORMALIZE_LOSS_BY_VISIBLE_KEYPOINTS is False
33 |             LOSS_WEIGHT=1.0,
34 |             # Type of pooling operation applied to the incoming feature map for each RoI
35 |             POOLER_TYPE="ROIAlignV2",
36 |         ),
37 |     )
38 | )
39 | 
40 | 
41 | class KeypointConfig(BaseDetectionConfig):
42 |     def __init__(self):
43 |         super(KeypointConfig, self).__init__()
44 |         self._register_configuration(_config_dict)
45 | 
46 | 
47 | config = KeypointConfig()
48 | 


--------------------------------------------------------------------------------
/cvpods/modeling/nn_utils/activation_count.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | 
 3 | import logging
 4 | import typing
 5 | from collections import defaultdict
 6 | 
 7 | import torch.nn as nn
 8 | 
 9 | from .jit_handles import generic_activation_jit, get_jit_model_analysis
10 | 
11 | # A dictionary that maps supported operations to their activation count handles.
12 | _SUPPORTED_OPS: typing.Dict[str, typing.Callable] = {
13 |     "aten::_convolution": generic_activation_jit("conv"),
14 |     "aten::addmm": generic_activation_jit("addmm"),
15 | }
16 | 
17 | 
18 | def activation_count(
19 |     model: nn.Module,
20 |     inputs: typing.Tuple[object, ...],
21 |     supported_ops: typing.Union[typing.Dict[str, typing.Callable], None] = None,
22 | ) -> typing.Tuple[typing.DefaultDict[str, float], typing.Counter[str]]:
23 |     """
24 |     Given a model and an input to the model, compute the total number of
25 |     activations of the model. Note the input should have a batch size of 1.
26 | 
27 |     Args:
28 |         model (nn.Module): The model to compute activation counts.
29 |         inputs (tuple): Inputs that are passed to `model` to count activations.
30 |             Inputs need to be in a tuple.
31 |         supported_ops (dict(str,Callable) or None) : By default, we count
32 |             activation for convolution and fully connected layers. Users can
33 |             provide customized supported_ops if desired.
34 | 
35 |     Returns:
36 |         tuple[defaultdict, Counter]: A dictionary that records the number of
37 |             activation (mega) for each operation and a Counter that records the
38 |             number of skipped operations.
39 |     """
40 |     assert isinstance(inputs, tuple), "Inputs need to be in a tuple."
41 |     if not supported_ops:
42 |         supported_ops = _SUPPORTED_OPS.copy()
43 | 
44 |     # Run activation count.
45 |     total_activation_count, skipped_ops = get_jit_model_analysis(
46 |         model, inputs, supported_ops
47 |     )
48 | 
49 |     # Log for skipped operations.
50 |     if len(skipped_ops) > 0:
51 |         for op, freq in skipped_ops.items():
52 |             logging.warning("Skipped operation {} {} time(s)".format(op, freq))
53 | 
54 |     # Convert activation count to mega count.
55 |     final_count = defaultdict(float)
56 |     for op in total_activation_count:
57 |         final_count[op] = total_activation_count[op] / 1e6
58 | 
59 |     return final_count, skipped_ops
60 | 


--------------------------------------------------------------------------------
/datasets/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | For a few datasets that detectron2 natively supports,
 3 | the datasets are assumed to exist in a directory called
 4 | "datasets/", under the directory where you launch the program.
 5 | They need to have the following directory structure:
 6 | 
 7 | ## Expected dataset structure for COCO instance/keypoint detection:
 8 | 
 9 | ```
10 | coco/
11 |   annotations/
12 |     instances_{train,val}2017.json
13 |     person_keypoints_{train,val}2017.json
14 |   {train,val}2017/
15 |     # image files that are mentioned in the corresponding json
16 | ```
17 | 
18 | You can use the 2014 version of the dataset as well.
19 | 
20 | Some of the builtin tests (`dev/run_*_tests.sh`) uses a tiny version of the COCO dataset,
21 | which you can download with `./prepare_for_tests.sh`.
22 | 
23 | ## Expected dataset structure for PanopticFPN:
24 | 
25 | ```
26 | coco/
27 |   annotations/
28 |     panoptic_{train,val}2017.json
29 |   panoptic_{train,val}2017/
30 |     # png annotations
31 |   panoptic_stuff_{train,val}2017/  # generated by the script mentioned below
32 | ```
33 | 
34 | Install panopticapi by:
35 | ```
36 | pip install git+https://github.com/cocodataset/panopticapi.git
37 | ```
38 | Then, run `python prepare_panoptic_fpn.py`, to extract semantic annotations from panoptic annotations.
39 | 
40 | ## Expected dataset structure for LVIS instance segmentation:
41 | ```
42 | coco/
43 |   {train,val,test}2017/
44 | lvis/
45 |   lvis_v0.5_{train,val}.json
46 |   lvis_v0.5_image_info_test.json
47 | ```
48 | 
49 | Install lvis-api by:
50 | ```
51 | pip install git+https://github.com/lvis-dataset/lvis-api.git
52 | ```
53 | 
54 | ## Expected dataset structure for cityscapes:
55 | ```
56 | cityscapes/
57 |   gtFine/
58 |     train/
59 |       aachen/
60 |         color.png, instanceIds.png, labelIds.png, polygons.json,
61 |         labelTrainIds.png
62 |       ...
63 |     val/
64 |     test/
65 |   leftImg8bit/
66 |     train/
67 |     val/
68 |     test/
69 | ```
70 | Install cityscapes scripts by:
71 | ```
72 | pip install git+https://github.com/mcordts/cityscapesScripts.git
73 | ```
74 | 
75 | Note:
76 | labelTrainIds.png are created by `cityscapesscripts/preparation/createTrainIdLabelImgs.py`.
77 | They are not needed for instance segmentation.
78 | 
79 | ## Expected dataset structure for Pascal VOC:
80 | ```
81 | VOC20{07,12}/
82 |   Annotations/
83 |   ImageSets/
84 |     Main/
85 |       trainval.txt
86 |       test.txt
87 |       # train.txt or val.txt, if you use these splits
88 |   JPEGImages/
89 | ```
90 | 


--------------------------------------------------------------------------------
/cvpods/layers/csrc/lars/adaptive_lr_cuda.cu:
--------------------------------------------------------------------------------
 1 | #include "cuda.h"
 2 | #include "cuda_runtime.h"
 3 | #include "torch/extension.h"
 4 | 
 5 | namespace cvpods {
 6 | template <typename scalar_t>
 7 | __global__ void ComputeAdaptiveLrOnDeviceAfterTypeCheck(
 8 |     const scalar_t &param_norm,
 9 |     const scalar_t &grad_norm,
10 |     const scalar_t weight_decay,
11 |     const scalar_t eps,
12 |     const scalar_t trust_coef,
13 |     scalar_t *out) {
14 |   // 1. The case that `param_norm` is `zero` means all elements of the parameter 
15 |   // are `zero` (In general, it occurs when right after the parameter initialized 
16 |   // as `zero`).  In this case, `adaptive_lr` will be calculated as `zero`, which 
17 |   // may be the reason for breaking parameter updates.  In this context, we construct 
18 |   // LARS to use only wrapped optimizer's algorithm when this situation occurs by 
19 |   // converting `adaptive_lr` to `one`.
20 |   //
21 |   // 2. The case that `grad_norm` is `zero` means all elements of the gradient are 
22 |   // `zero` (In general, it occurs when backward propagation doesn't work correctly).  
23 |   // In this case, it can be interpreted as there exists an exceptional situation, 
24 |   // which may result in inappropriate parameter updates.  In this context, we 
25 |   // construct LARS to pass the responsibility of handling the exceptional case 
26 |   // to the wrapped optimizer when this exception occurs by converting `adaptive_lr` 
27 |   // to `one`.
28 |   if (param_norm > 0 && grad_norm > 0) {
29 |     scalar_t divisor = grad_norm + weight_decay * param_norm + eps;
30 |     *out = param_norm / divisor * trust_coef;
31 |   } else {
32 |     *out = 1.0;
33 |   }
34 | }
35 | 
36 | #define CHECK_CUDA(x) AT_ASSERTM(x.type().is_cuda(), #x " must be a CUDA tensor")
37 | 
38 | void ComputeAdaptiveLrOnDevice(
39 |     torch::Tensor param_norm,
40 |     torch::Tensor grad_norm,
41 |     double weight_decay,
42 |     double eps,
43 |     double trust_coef,
44 |     torch::Tensor out) {
45 |   CHECK_CUDA(param_norm);
46 |   CHECK_CUDA(grad_norm);
47 |   CHECK_CUDA(out);
48 | 
49 |   AT_DISPATCH_FLOATING_TYPES_AND_HALF(
50 |       param_norm.type(),
51 |       "compute_adaptive_lr_cuda",
52 |       ([&] {
53 |          ComputeAdaptiveLrOnDeviceAfterTypeCheck<scalar_t><<<1, 1>>>(
54 |              *param_norm.data<scalar_t>(),
55 |              *grad_norm.data<scalar_t>(),
56 |              weight_decay,
57 |              eps,
58 |              trust_coef,
59 |              out.data<scalar_t>());
60 |        }));
61 | }
62 | }
63 | 


--------------------------------------------------------------------------------
/cvpods/modeling/sampling.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | import torch
 3 | 
 4 | __all__ = ["subsample_labels"]
 5 | 
 6 | 
 7 | def subsample_labels(labels, num_samples, positive_fraction, bg_label):
 8 |     """
 9 |     Return `num_samples` (or fewer, if not enough found)
10 |     random samples from `labels` which is a mixture of positives & negatives.
11 |     It will try to return as many positives as possible without
12 |     exceeding `positive_fraction * num_samples`, and then try to
13 |     fill the remaining slots with negatives.
14 | 
15 |     Args:
16 |         labels (Tensor): (N, ) label vector with values:
17 |             * -1: ignore
18 |             * bg_label: background ("negative") class
19 |             * otherwise: one or more foreground ("positive") classes
20 |         num_samples (int): The total number of labels with value >= 0 to return.
21 |             Values that are not sampled will be filled with -1 (ignore).
22 |         positive_fraction (float): The number of subsampled labels with values > 0
23 |             is `min(num_positives, int(positive_fraction * num_samples))`. The number
24 |             of negatives sampled is `min(num_negatives, num_samples - num_positives_sampled)`.
25 |             In order words, if there are not enough positives, the sample is filled with
26 |             negatives. If there are also not enough negatives, then as many elements are
27 |             sampled as is possible.
28 |         bg_label (int): label index of background ("negative") class.
29 | 
30 |     Returns:
31 |         pos_idx, neg_idx (Tensor):
32 |             1D vector of indices. The total length of both is `num_samples` or fewer.
33 |     """
34 |     positive = torch.nonzero((labels != -1) & (labels != bg_label), as_tuple=False).squeeze(1)
35 |     negative = torch.nonzero(labels == bg_label, as_tuple=False).squeeze(1)
36 | 
37 |     num_pos = int(num_samples * positive_fraction)
38 |     # protect against not enough positive examples
39 |     num_pos = min(positive.numel(), num_pos)
40 |     num_neg = num_samples - num_pos
41 |     # protect against not enough negative examples
42 |     num_neg = min(negative.numel(), num_neg)
43 | 
44 |     # randomly select positive and negative examples
45 |     perm1 = torch.randperm(positive.numel(), device=positive.device)[:num_pos]
46 |     perm2 = torch.randperm(negative.numel(), device=negative.device)[:num_neg]
47 | 
48 |     pos_idx = positive[perm1]
49 |     neg_idx = negative[perm2]
50 |     return pos_idx, neg_idx
51 | 


--------------------------------------------------------------------------------
/cvpods/configs/pointrend_config.py:
--------------------------------------------------------------------------------
 1 | from .rcnn_fpn_config import RCNNFPNConfig
 2 | 
 3 | _config_dict = dict(
 4 |     MODEL=dict(
 5 |         ROI_HEADS=dict(
 6 |             # NAME="PointRendROIHeads",
 7 |             IN_FEATURES=["p2", "p3", "p4", "p5"],
 8 |         ),
 9 |         ROI_BOX_HEAD=dict(
10 |             TRAIN_ON_PRED_BOXES=True,
11 |         ),
12 |         ROI_MASK_HEAD=dict(
13 |             # NAME="CoarseMaskHead",
14 |             # Names of the input feature maps to be used by a coarse mask head.
15 |             IN_FEATURES=["p2"],
16 |             FC_DIM=1024,
17 |             NUM_FC=2,
18 |             # The side size of a coarse mask head prediction.
19 |             OUTPUT_SIDE_RESOLUTION=7,
20 |             # True if point head is used.
21 |             POINT_HEAD_ON=True,
22 |         ),
23 |         POINT_HEAD=dict(
24 |             # Names of the input feature maps to be used by a mask point head.
25 |             IN_FEATURES=["p2"],
26 |             NUM_CLASSES=80,
27 |             FC_DIM=256,
28 |             NUM_FC=3,
29 |             # Number of points sampled during training for a mask point head.
30 |             TRAIN_NUM_POINTS=14 * 14,
31 |             # Oversampling parameter for PointRend point sampling during training.
32 |             # Parameter `k` in the original paper.
33 |             OVERSAMPLE_RATIO=3,
34 |             # Importance sampling parameter for PointRend point sampling during training.
35 |             # Parametr `beta` in the original paper.
36 |             IMPORTANCE_SAMPLE_RATIO=0.75,
37 |             # Number of subdivision steps during inference.
38 |             SUBDIVISION_STEPS=5,
39 |             # Maximum number of points selected at each subdivision step (N).
40 |             SUBDIVISION_NUM_POINTS=28 * 28,
41 |             CLS_AGNOSTIC_MASK=False,
42 |             # If True, then coarse prediction features are used as inout for each layer
43 |             # in PointRend's MLP.
44 |             COARSE_PRED_EACH_LAYER=True,
45 |             # COARSE_SEM_SEG_HEAD_NAME="SemSegFPNHead"
46 |         ),
47 |     ),
48 |     INPUT=dict(
49 |         # PointRend for instance segmenation does not work with "polygon" mask_format
50 |         MASK_FORMAT="bitmask",
51 |     ),
52 |     DATALOADER=dict(FILTER_EMPTY_ANNOTATIONS=False,),
53 | )
54 | 
55 | 
56 | class PointRendRCNNFPNConfig(RCNNFPNConfig):
57 |     def __init__(self):
58 |         super(PointRendRCNNFPNConfig, self).__init__()
59 |         self._register_configuration(_config_dict)
60 | 
61 | 
62 | config = PointRendRCNNFPNConfig()
63 | 


--------------------------------------------------------------------------------
/cvpods/configs/solo_config.py:
--------------------------------------------------------------------------------
 1 | from .base_detection_config import BaseDetectionConfig
 2 | 
 3 | _config_dict = dict(
 4 |     MODEL=dict(
 5 |         MASK_ON=True,
 6 |         PIXEL_MEAN=[123.675, 116.28, 103.53],  # RGB FORMAT
 7 |         PIXEL_STD=[1.0, 1.0, 1.0],
 8 |         RESNETS=dict(
 9 |             DEPTH=50,
10 |             OUT_FEATURES=["res2", "res3", "res4", "res5"],
11 |         ),
12 |         FPN=dict(
13 |             IN_FEATURES=["res2", "res3", "res4", "res5"],
14 |             OUT_CHANNELS=256,
15 |         ),
16 |         SOLO=dict(
17 |             NUM_CLASSES=80,
18 |             IN_FEATURES=["p2", "p3", "p4", "p5", "p6"],
19 |             NUM_GRIDS=[40, 36, 24, 16, 12],  # per level
20 |             SCALE_RANGES=((1, 64), (32, 128), (64, 256), (128, 512), (256, 2048)),
21 |             FEATURE_STRIDES=[8, 8, 16, 32, 32],
22 |             # Given a gt: (cx, cy, w, h), the center region is controlled by
23 |             # constant scale factors sigma: (cx, cy, sigma*w, sigma*h)
24 |             SIGMA=0.2,
25 |             HEAD=dict(
26 |                 TYPE="SOLOHead",  # "SOLOHead", "DecoupledSOLOHead"
27 |                 SEG_FEAT_CHANNELS=256,
28 |                 STACKED_CONVS=7,
29 |                 PRIOR_PROB=0.01,
30 |                 NORM="GN",
31 |                 # The following two items are useful in the "DecoupledSOLOLightHead"
32 |                 USE_DCN_IN_TOWER=False,
33 |                 DCN_TYPE=None,
34 |             ),
35 |             # Loss parameters:
36 |             LOSS_INS=dict(
37 |                 TYPE='DiceLoss',
38 |                 LOSS_WEIGHT=3.0
39 |             ),
40 |             LOSS_CAT=dict(
41 |                 TYPE='FocalLoss',
42 |                 GAMMA=2.0,
43 |                 ALPHA=0.25,
44 |                 LOSS_WEIGHT=1.0,
45 |             ),
46 |             # Inference parameters:
47 |             SCORE_THRESH_TEST=0.1,
48 |             MASK_THRESH_TEST=0.5,
49 |             # NMS parameters:
50 |             NMS_PER_IMAGE=500,
51 |             NMS_KERNEL='gaussian',  # gaussian/linear
52 |             NMS_SIGMA=2.0,
53 |             UPDATE_THRESH=0.05,
54 |             DETECTIONS_PER_IMAGE=100,
55 |         ),
56 |     ),
57 |     INPUT=dict(
58 |         # SOLO for instance segmenation does not work with "polygon" mask_format
59 |         MASK_FORMAT="bitmask",
60 |     )
61 | )
62 | 
63 | 
64 | class SOLOConfig(BaseDetectionConfig):
65 |     def __init__(self):
66 |         super(SOLOConfig, self).__init__()
67 |         self._register_configuration(_config_dict)
68 | 
69 | 
70 | config = SOLOConfig()
71 | 


--------------------------------------------------------------------------------
/cvpods/utils/dump/history_buffer.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 3 | 
 4 | from typing import List, Tuple
 5 | 
 6 | import numpy as np
 7 | 
 8 | 
 9 | class HistoryBuffer:
10 |     """
11 |     Track a series of scalar values and provide access to smoothed values over a
12 |     window or the global average of the series.
13 |     """
14 | 
15 |     def __init__(self, max_length: int = 1000000):
16 |         """
17 |         Args:
18 |             max_length: maximal number of values that can be stored in the
19 |                 buffer. When the capacity of the buffer is exhausted, old
20 |                 values will be removed.
21 |         """
22 |         self._max_length: int = max_length
23 |         self._data: List[Tuple[float, float]] = []  # (value, iteration) pairs
24 |         self._count: int = 0
25 |         self._global_avg: float = 0
26 | 
27 |     def update(self, value: float, iteration: float = None):
28 |         """
29 |         Add a new scalar value produced at certain iteration. If the length
30 |         of the buffer exceeds self._max_length, the oldest element will be
31 |         removed from the buffer.
32 |         """
33 |         if iteration is None:
34 |             iteration = self._count
35 |         if len(self._data) == self._max_length:
36 |             self._data.pop(0)
37 |         self._data.append((value, iteration))
38 | 
39 |         self._count += 1
40 |         self._global_avg += (value - self._global_avg) / self._count
41 | 
42 |     def latest(self):
43 |         """
44 |         Return the latest scalar value added to the buffer.
45 |         """
46 |         return self._data[-1][0]
47 | 
48 |     def median(self, window_size: int):
49 |         """
50 |         Return the median of the latest `window_size` values in the buffer.
51 |         """
52 |         return np.median([x[0] for x in self._data[-window_size:]])
53 | 
54 |     def avg(self, window_size: int):
55 |         """
56 |         Return the mean of the latest `window_size` values in the buffer.
57 |         """
58 |         return np.mean([x[0] for x in self._data[-window_size:]])
59 | 
60 |     def global_avg(self):
61 |         """
62 |         Return the mean of all the elements in the buffer. Note that this
63 |         includes those getting removed due to limited buffer storage.
64 |         """
65 |         return self._global_avg
66 | 
67 |     def values(self):
68 |         """
69 |         Returns:
70 |             list[(number, iteration)]: content of the current buffer.
71 |         """
72 |         return self._data
73 | 


--------------------------------------------------------------------------------
/cvpods/layers/csrc/nms_rotated/nms_rotated_cpu.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | #include "../box_iou_rotated/box_iou_rotated_utils.h"
 3 | #include "nms_rotated.h"
 4 | 
 5 | namespace cvpods {
 6 | 
 7 | template <typename scalar_t>
 8 | at::Tensor nms_rotated_cpu_kernel(
 9 |     const at::Tensor& dets,
10 |     const at::Tensor& scores,
11 |     const float iou_threshold) {
12 |   // nms_rotated_cpu_kernel is modified from torchvision's nms_cpu_kernel,
13 |   // however, the code in this function is much shorter because
14 |   // we delegate the IoU computation for rotated boxes to
15 |   // the single_box_iou_rotated function in box_iou_rotated_utils.h
16 |   AT_ASSERTM(!dets.type().is_cuda(), "dets must be a CPU tensor");
17 |   AT_ASSERTM(!scores.type().is_cuda(), "scores must be a CPU tensor");
18 |   AT_ASSERTM(
19 |       dets.type() == scores.type(), "dets should have the same type as scores");
20 | 
21 |   if (dets.numel() == 0) {
22 |     return at::empty({0}, dets.options().dtype(at::kLong));
23 |   }
24 | 
25 |   auto order_t = std::get<1>(scores.sort(0, /* descending=*/true));
26 | 
27 |   auto ndets = dets.size(0);
28 |   at::Tensor suppressed_t = at::zeros({ndets}, dets.options().dtype(at::kByte));
29 |   at::Tensor keep_t = at::zeros({ndets}, dets.options().dtype(at::kLong));
30 | 
31 |   auto suppressed = suppressed_t.data_ptr<uint8_t>();
32 |   auto keep = keep_t.data_ptr<int64_t>();
33 |   auto order = order_t.data_ptr<int64_t>();
34 | 
35 |   int64_t num_to_keep = 0;
36 | 
37 |   for (int64_t _i = 0; _i < ndets; _i++) {
38 |     auto i = order[_i];
39 |     if (suppressed[i] == 1) {
40 |       continue;
41 |     }
42 | 
43 |     keep[num_to_keep++] = i;
44 | 
45 |     for (int64_t _j = _i + 1; _j < ndets; _j++) {
46 |       auto j = order[_j];
47 |       if (suppressed[j] == 1) {
48 |         continue;
49 |       }
50 | 
51 |       auto ovr = single_box_iou_rotated<scalar_t>(
52 |           dets[i].data_ptr<scalar_t>(), dets[j].data_ptr<scalar_t>());
53 |       if (ovr >= iou_threshold) {
54 |         suppressed[j] = 1;
55 |       }
56 |     }
57 |   }
58 |   return keep_t.narrow(/*dim=*/0, /*start=*/0, /*length=*/num_to_keep);
59 | }
60 | 
61 | at::Tensor nms_rotated_cpu(
62 |     const at::Tensor& dets,
63 |     const at::Tensor& scores,
64 |     const float iou_threshold) {
65 |   auto result = at::empty({0}, dets.options());
66 | 
67 |   AT_DISPATCH_FLOATING_TYPES(dets.scalar_type(), "nms_rotated", [&] {
68 |     result = nms_rotated_cpu_kernel<scalar_t>(dets, scores, iou_threshold);
69 |   });
70 |   return result;
71 | }
72 | 
73 | } // namespace cvpods
74 | 


--------------------------------------------------------------------------------
/cvpods/utils/visualizer/show.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- encoding: utf-8 -*-
 3 | '''
 4 | @File               :   show.py
 5 | @Time               :   2020/05/07 23:58:35
 6 | @Author             :   Benjin Zhu
 7 | @Contact            :   poodarchu@gmail.com
 8 | @Last Modified by   :   Benjin Zhu
 9 | @Last Modified time :   2020/05/07 23:58:35
10 | '''
11 | 
12 | import copy
13 | import pylab as plt
14 | 
15 | import numpy as np
16 | 
17 | 
18 | def draw_box(ax, vertices, color='black'):
19 |     """
20 |     Draw box with color.
21 | 
22 |     Args:
23 |         ax (list): axes to draw box along
24 |         vertices (ndarray): indices of shape (N x 2)
25 |         color (str): plotted color
26 |     """
27 |     connections = [
28 |         [0, 1],
29 |         [1, 2],
30 |         [2, 3],
31 |         [3, 0],
32 |     ]
33 |     for connection in connections:
34 |         ax.plot(*vertices[:, connection], c=color, lw=5)
35 | 
36 | 
37 | def visualize_feature_maps(
38 |         fm,
39 |         boxes=[],
40 |         keypoints=[],
41 |         stride=1,
42 |         save_filename=None
43 | ):
44 |     """
45 |     Visualize feature map with boxes or key points.
46 | 
47 |     Args:
48 |         fm (torch.Tensor): feature map of shape H x W x c, c is channel
49 |         boxes (ndarray): boxes to be visualized.
50 |         keypoints (ndarray): key points to be visualized
51 |         stride (int): used to normalize boxes or keypoints
52 |         save_filename (bool): whether save to disk
53 |     """
54 |     nc = np.ceil(np.sqrt(fm.shape[2]))  # column
55 |     nr = np.ceil(fm.shape[2] / nc)  # row
56 |     nc = int(nc)
57 |     nr = int(nr)
58 |     plt.figure(figsize=(64, 64))
59 |     for i in range(fm.shape[2]):
60 |         ax = plt.subplot(nr, nc, i + 1)
61 |         ax.imshow(fm[:, :, i], cmap='jet')
62 | 
63 |         for obj in boxes:
64 |             box = copy.deepcopy(obj) / stride
65 |             draw_box(ax, box, color='g')
66 | 
67 |         for pts_score in keypoints:
68 |             pts = pts_score[:8]
69 |             pts = pts / stride
70 |             for i in range(4):
71 |                 ax.plot(pts[2 * i + 1], pts[2 * i + 0], 'r*')
72 |             ax.plot([pts[1], pts[3]], [pts[0], pts[2]], c='y', lw=5)
73 |             ax.plot([pts[3], pts[5]], [pts[2], pts[4]], c='g', lw=5)
74 |             ax.plot([pts[5], pts[7]], [pts[4], pts[6]], c='b', lw=5)
75 |             ax.plot([pts[7], pts[1]], [pts[6], pts[0]], c='r', lw=5)
76 | 
77 |         # plt.colorbar()
78 |         ax.axis('off')
79 |     if save_filename:
80 |         plt.savefig(save_filename)
81 |     else:
82 |         plt.show()
83 |     plt.close()
84 | 


--------------------------------------------------------------------------------
/cvpods/modeling/nn_utils/flop_count.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | 
 3 | import logging
 4 | import typing
 5 | from collections import defaultdict
 6 | 
 7 | import torch.nn as nn
 8 | 
 9 | from .jit_handles import (
10 |     addmm_flop_jit,
11 |     conv_flop_jit,
12 |     einsum_flop_jit,
13 |     get_jit_model_analysis,
14 |     matmul_flop_jit
15 | )
16 | 
17 | # A dictionary that maps supported operations to their flop count jit handles.
18 | _SUPPORTED_OPS: typing.Dict[str, typing.Callable] = {
19 |     "aten::addmm": addmm_flop_jit,
20 |     "aten::_convolution": conv_flop_jit,
21 |     "aten::einsum": einsum_flop_jit,
22 |     "aten::matmul": matmul_flop_jit,
23 | }
24 | 
25 | 
26 | def flop_count(
27 |     model: nn.Module,
28 |     inputs: typing.Tuple[object, ...],
29 |     supported_ops: typing.Union[typing.Dict[str, typing.Callable], None] = None,
30 | ) -> typing.Tuple[typing.DefaultDict[str, float], typing.Counter[str]]:
31 |     """
32 |     Given a model and an input to the model, compute the Gflops of the given
33 |     model. Note the input should have a batch size of 1.
34 | 
35 |     Args:
36 |         model (nn.Module): The model to compute flop counts.
37 |         inputs (tuple): Inputs that are passed to `model` to count flops.
38 |             Inputs need to be in a tuple.
39 |         supported_ops (dict(str,Callable) or None) : By default, we count flops
40 |             for convolution layers, fully connected layers, torch.matmul and
41 |             torch.einsum operations. We define a FLOP as a single atomic
42 |             Multiply-Add. Users can provide customized supported_ops for
43 |             counting flops if desired.
44 | 
45 |     Returns:
46 |         tuple[defaultdict, Counter]: A dictionary that records the number of
47 |             gflops for each operation and a Counter that records the number of
48 |             skipped operations.
49 |     """
50 |     assert isinstance(inputs, tuple), "Inputs need to be in a tuple."
51 |     if not supported_ops:
52 |         supported_ops = _SUPPORTED_OPS.copy()
53 | 
54 |     # Run flop count.
55 |     total_flop_counter, skipped_ops = get_jit_model_analysis(
56 |         model, inputs, supported_ops
57 |     )
58 | 
59 |     # Log for skipped operations.
60 |     if len(skipped_ops) > 0:
61 |         for op, freq in skipped_ops.items():
62 |             logging.warning("Skipped operation {} {} time(s)".format(op, freq))
63 | 
64 |     # Convert flop count to gigaflops.
65 |     final_count = defaultdict(float)
66 |     for op in total_flop_counter:
67 |         final_count[op] = total_flop_counter[op] / 1e9
68 | 
69 |     return final_count, skipped_ops
70 | 


--------------------------------------------------------------------------------
/tools/caffe2_converter.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import argparse
 3 | import os
 4 | 
 5 | from cvpods.checkpoint import DetectionCheckpointer
 6 | from cvpods.config import get_cfg
 7 | from cvpods.data import build_detection_test_loader
 8 | from cvpods.evaluation import COCOEvaluator, inference_on_dataset, print_csv_format
 9 | from cvpods.export import add_export_config, export_caffe2_model
10 | from cvpods.modeling import build_model
11 | from cvpods.utils import setup_logger
12 | 
13 | 
14 | def setup_cfg(args):
15 |     cfg = get_cfg()
16 |     # cuda context is initialized before creating dataloader, so we don't fork anymore
17 |     cfg.DATALOADER.NUM_WORKERS = 0
18 |     cfg = add_export_config(cfg)
19 |     cfg.merge_from_file(args.config_file)
20 |     cfg.merge_from_list(args.opts)
21 |     cfg.freeze()
22 |     return cfg
23 | 
24 | 
25 | if __name__ == "__main__":
26 |     parser = argparse.ArgumentParser(description="Convert a model to Caffe2")
27 |     parser.add_argument("--config-file", default="", metavar="FILE", help="path to config file")
28 |     parser.add_argument("--run-eval", action="store_true")
29 |     parser.add_argument("--output", help="output directory for the converted caffe2 model")
30 |     parser.add_argument(
31 |         "opts",
32 |         help="Modify config options using the command-line",
33 |         default=None,
34 |         nargs=argparse.REMAINDER,
35 |     )
36 |     args = parser.parse_args()
37 |     logger = setup_logger()
38 |     logger.info("Command line arguments: " + str(args))
39 | 
40 |     cfg = setup_cfg(args)
41 | 
42 |     # create a torch model
43 |     torch_model = build_model(cfg)
44 |     DetectionCheckpointer(torch_model).resume_or_load(cfg.MODEL.WEIGHTS)
45 | 
46 |     # get a sample data
47 |     data_loader = build_detection_test_loader(cfg, cfg.DATASETS.TEST[0])
48 |     first_batch = next(iter(data_loader))
49 | 
50 |     # convert and save caffe2 model
51 |     caffe2_model = export_caffe2_model(cfg, torch_model, first_batch)
52 |     caffe2_model.save_protobuf(args.output)
53 |     # draw the caffe2 graph
54 |     caffe2_model.save_graph(os.path.join(args.output, "model.svg"), inputs=first_batch)
55 | 
56 |     # run evaluation with the converted model
57 |     if args.run_eval:
58 |         dataset = cfg.DATASETS.TEST[0]
59 |         data_loader = build_detection_test_loader(cfg, dataset)
60 |         # NOTE: hard-coded evaluator. change to the evaluator for your dataset
61 |         evaluator = COCOEvaluator(dataset, cfg, True, args.output)
62 |         metrics = inference_on_dataset(caffe2_model, data_loader, evaluator)
63 |         print_csv_format(metrics)
64 | 


--------------------------------------------------------------------------------
/cvpods/modeling/meta_arch/imagenet.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn
 3 | 
 4 | from cvpods.layers import ShapeSpec
 5 | from cvpods.structures import ImageList
 6 | 
 7 | 
 8 | def accuracy(output, target, topk=(1,)):
 9 |     """Computes the accuracy over the k top predictions for the specified values of k"""
10 |     with torch.no_grad():
11 |         maxk = max(topk)
12 |         batch_size = target.size(0)
13 | 
14 |         _, pred = output.topk(maxk, 1, True, True)
15 |         pred = pred.t()
16 |         correct = pred.eq(target.view(1, -1).expand_as(pred))
17 | 
18 |         res = []
19 |         for k in topk:
20 |             correct_k = correct[:k].view(-1).float().sum(0, keepdim=True)
21 |             res.append(correct_k.mul_(100.0 / batch_size))
22 |         return res
23 | 
24 | 
25 | class Classification(nn.Module):
26 |     """
27 |     ImageNet classification module.
28 |     Weights of this model can be used as pretrained weights of any models in cvpods.
29 |     """
30 |     def __init__(self, cfg):
31 |         super(Classification, self).__init__()
32 | 
33 |         self.device = torch.device(cfg.MODEL.DEVICE)
34 | 
35 |         self.network = cfg.build_backbone(
36 |             cfg, input_shape=ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN)))
37 | 
38 |         self.loss_evaluator = nn.CrossEntropyLoss()
39 | 
40 |         pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view(
41 |             3, 1, 1)
42 |         pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view(
43 |             3, 1, 1)
44 |         self.normalizer = lambda x: (x - pixel_mean) / pixel_std
45 | 
46 |         self.to(self.device)
47 | 
48 |     def forward(self, batched_inputs):
49 |         images = self.preprocess_image(batched_inputs)
50 | 
51 |         preds = self.network(images.tensor)["linear"]
52 | 
53 |         if self.training:
54 |             labels = torch.tensor([gi["category_id"] for gi in batched_inputs]).cuda()
55 |             losses = self.loss_evaluator(preds, labels)
56 |             acc1, acc5 = accuracy(preds, labels, topk=(1, 5))
57 | 
58 |             return {
59 |                 "loss_cls": losses,
60 |                 "Acc@1": acc1,
61 |                 "Acc@5": acc5,
62 |             }
63 |         else:
64 |             return preds
65 | 
66 |     def preprocess_image(self, batched_inputs):
67 |         """
68 |         Normalize, pad and batch the input images.
69 |         """
70 |         images = [x["image"].float().to(self.device) for x in batched_inputs]
71 |         images = [self.normalizer(x.div(255)) for x in images]
72 |         images = ImageList.from_tensors(images, self.network.size_divisibility)
73 |         return images
74 | 


--------------------------------------------------------------------------------
/cvpods/configs/retinanet_config.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- encoding: utf-8 -*-
 3 | '''
 4 | @File               :   retinanet_config.py
 5 | @Time               :   2020/05/07 23:56:02
 6 | @Author             :   Benjin Zhu
 7 | @Contact            :   poodarchu@gmail.com
 8 | @Last Modified by   :   Benjin Zhu
 9 | @Last Modified time :   2020/05/07 23:56:02
10 | '''
11 | 
12 | from .base_detection_config import BaseDetectionConfig
13 | 
14 | _config_dict = dict(
15 |     MODEL=dict(
16 |         # Backbone NAME: "build_retinanet_resnet_fpn_backbone"
17 |         RESNETS=dict(OUT_FEATURES=["res3", "res4", "res5"]),
18 |         FPN=dict(IN_FEATURES=["res3", "res4", "res5"]),
19 |         ANCHOR_GENERATOR=dict(
20 |             SIZES=[
21 |                 [x, x * 2 ** (1.0 / 3), x * 2 ** (2.0 / 3)]
22 |                 for x in [32, 64, 128, 256, 512]
23 |             ]
24 |         ),
25 |         RETINANET=dict(
26 |             # This is the number of foreground classes.
27 |             NUM_CLASSES=80,
28 |             IN_FEATURES=["p3", "p4", "p5", "p6", "p7"],
29 |             # Convolutions to use in the cls and bbox tower
30 |             # NOTE: this doesn't include the last conv for logits
31 |             NUM_CONVS=4,
32 |             # IoU overlap ratio [bg, fg] for labeling anchors.
33 |             # Anchors with < bg are labeled negative (0)
34 |             # Anchors  with >= bg and < fg are ignored (-1)
35 |             # Anchors with >= fg are labeled positive (1)
36 |             IOU_THRESHOLDS=[0.4, 0.5],
37 |             IOU_LABELS=[0, -1, 1],
38 |             # Prior prob for rare case (i.e. foreground) at the beginning of training.
39 |             # This is used to set the bias for the logits layer of the classifier subnet.
40 |             # This improves training stability in the case of heavy class imbalance.
41 |             PRIOR_PROB=0.01,
42 |             # Inference cls score threshold, only anchors with score > INFERENCE_TH are
43 |             # considered for inference (to improve speed)
44 |             SCORE_THRESH_TEST=0.05,
45 |             TOPK_CANDIDATES_TEST=1000,
46 |             NMS_THRESH_TEST=0.5,
47 |             # Weights on (dx, dy, dw, dh) for normalizing Retinanet anchor regression targets
48 |             BBOX_REG_WEIGHTS=(1.0, 1.0, 1.0, 1.0),
49 |             # Loss parameters
50 |             FOCAL_LOSS_GAMMA=2.0,
51 |             FOCAL_LOSS_ALPHA=0.25,
52 |             SMOOTH_L1_LOSS_BETA=0.1,
53 |         ),
54 |     ),
55 | )
56 | 
57 | 
58 | class RetinaNetConfig(BaseDetectionConfig):
59 |     def __init__(self):
60 |         super(RetinaNetConfig, self).__init__()
61 |         self._register_configuration(_config_dict)
62 | 
63 | 
64 | config = RetinaNetConfig()
65 | 


--------------------------------------------------------------------------------
/cvpods/evaluation/testing.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | import logging
 3 | import pprint
 4 | import sys
 5 | from collections import Mapping, OrderedDict
 6 | 
 7 | import numpy as np
 8 | 
 9 | 
10 | def print_csv_format(results):
11 |     """
12 |     Print main metrics in a format similar to Detectron,
13 |     so that they are easy to copypaste into a spreadsheet.
14 | 
15 |     Args:
16 |         results (OrderedDict[dict]): task_name -> {metric -> score}
17 |     """
18 |     assert isinstance(results, OrderedDict), results  # unordered results cannot be properly printed
19 |     logger = logging.getLogger(__name__)
20 |     for task, res in results.items():
21 |         # Don't print "AP-category" metrics since they are usually not tracked.
22 |         important_res = [(k, v) for k, v in res.items() if "-" not in k]
23 |         logger.info("copypaste: Task: {}".format(task))
24 |         logger.info("copypaste: " + ",".join([k[0] for k in important_res]))
25 |         logger.info("copypaste: " + ",".join(["{0:.4f}".format(k[1]) for k in important_res]))
26 | 
27 | 
28 | def verify_results(cfg, results):
29 |     """
30 |     Args:
31 |         results (OrderedDict[dict]): task_name -> {metric -> score}
32 | 
33 |     Returns:
34 |         bool: whether the verification succeeds or not
35 |     """
36 |     expected_results = cfg.TEST.EXPECTED_RESULTS
37 |     if not len(expected_results):
38 |         return True
39 | 
40 |     ok = True
41 |     for task, metric, expected, tolerance in expected_results:
42 |         actual = results[task][metric]
43 |         if not np.isfinite(actual):
44 |             ok = False
45 |         diff = abs(actual - expected)
46 |         if diff > tolerance:
47 |             ok = False
48 | 
49 |     logger = logging.getLogger(__name__)
50 |     if not ok:
51 |         logger.error("Result verification failed!")
52 |         logger.error("Expected Results: " + str(expected_results))
53 |         logger.error("Actual Results: " + pprint.pformat(results))
54 | 
55 |         sys.exit(1)
56 |     else:
57 |         logger.info("Results verification passed.")
58 |     return ok
59 | 
60 | 
61 | def flatten_results_dict(results):
62 |     """
63 |     Expand a hierarchical dict of scalars into a flat dict of scalars.
64 |     If results[k1][k2][k3] = v, the returned dict will have the entry
65 |     {"k1/k2/k3": v}.
66 | 
67 |     Args:
68 |         results (dict):
69 |     """
70 |     r = {}
71 |     for k, v in results.items():
72 |         if isinstance(v, Mapping):
73 |             v = flatten_results_dict(v)
74 |             for kk, vv in v.items():
75 |                 r[k + "/" + kk] = vv
76 |         else:
77 |             r[k] = v
78 |     return r
79 | 


--------------------------------------------------------------------------------
/cvpods/utils/registry.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | 
 3 | from typing import Dict, Optional
 4 | from tabulate import tabulate
 5 | 
 6 | 
 7 | class Registry(object):
 8 |     """
 9 |     The registry that provides name -> object mapping, to support third-party
10 |     users' custom modules.
11 |     To create a registry (e.g. a backbone registry):
12 |     .. code-block:: python
13 |         BACKBONE_REGISTRY = Registry('BACKBONE')
14 |     To register an object:
15 |     .. code-block:: python
16 |         @BACKBONE_REGISTRY.register()
17 |         class MyBackbone():
18 |             ...
19 |     Or:
20 |     .. code-block:: python
21 |         BACKBONE_REGISTRY.register(MyBackbone)
22 |     """
23 | 
24 |     def __init__(self, name: str) -> None:
25 |         """
26 |         Args:
27 |             name (str): the name of this registry
28 |         """
29 |         self._name: str = name
30 |         self._obj_map: Dict[str, object] = {}
31 | 
32 |     def _do_register(self, name: str, obj: object) -> None:
33 |         assert (
34 |             name not in self._obj_map
35 |         ), "An object named '{}' was already registered in '{}' registry!".format(
36 |             name, self._name
37 |         )
38 |         self._obj_map[name] = obj
39 | 
40 |     def register(self, obj: object = None, name: str = None) -> Optional[object]:
41 |         """
42 |         Register the given object under the the name `obj.__name__`.
43 |         Can be used as either a decorator or not. See docstring of this class for usage.
44 |         """
45 |         if obj is None:
46 |             # used as a decorator
47 |             def deco(func_or_class: object) -> object:
48 |                 nonlocal name
49 |                 if name is None:
50 |                     name = func_or_class.__name__  # pyre-ignore
51 |                 self._do_register(name, func_or_class)
52 |                 return func_or_class
53 | 
54 |             return deco
55 | 
56 |         # used as a function call
57 |         if name is None:
58 |             name = obj.__name__  # pyre-ignore
59 |         self._do_register(name, obj)
60 | 
61 |     def get(self, name: str) -> object:
62 |         ret = self._obj_map.get(name)
63 |         if ret is None:
64 |             raise KeyError(
65 |                 "No object named '{}' found in '{}' registry!".format(name, self._name)
66 |             )
67 |         return ret
68 | 
69 |     def __contains__(self, name: str) -> bool:
70 |         return name in self._obj_map
71 | 
72 |     def __repr__(self) -> str:
73 |         table_headers = ["Names", "Objects"]
74 |         table = tabulate(self._obj_map.items(), headers=table_headers, tablefmt="fancy_grid")
75 |         return "Registry of {}:\n".format(self._name) + table
76 | 


--------------------------------------------------------------------------------
/cvpods/configs/efficientdet_config.py:
--------------------------------------------------------------------------------
 1 | from .base_detection_config import BaseDetectionConfig
 2 | 
 3 | _config_dict = dict(
 4 |     MODEL=dict(
 5 |         PIXEL_MEAN=[0.485, 0.456, 0.406],  # mean value from ImageNet
 6 |         PIXEL_STD=[0.229, 0.224, 0.225],
 7 |         EFFICIENTNET=dict(
 8 |             MODEL_NAME="efficientnet-b0",  # default setting for EfficientDet-D0
 9 |             NORM="BN",
10 |             BN_MOMENTUM=1 - 0.99,
11 |             BN_EPS=1e-3,
12 |             DROP_CONNECT_RATE=1 - 0.8,  # survival_prob = 0.8
13 |             DEPTH_DIVISOR=8,
14 |             MIN_DEPTH=None,
15 |             NUM_CLASSES=None,
16 |             FIX_HEAD_STEAM=False,
17 |             MEMORY_EFFICIENT_SWISH=True,
18 |             OUT_FEATURES=["stage4", "stage6", "stage8"],
19 |         ),
20 |         BIFPN=dict(
21 |             IN_FEATURES=["stage4", "stage6", "stage8"],
22 |             NORM="BN",
23 |             BN_MOMENTUM=0.01,  # 1 - 0.99
24 |             BN_EPS=1e-3,
25 |             MEMORY_EFFICIENT_SWISH=True,
26 |             INPUT_SIZE=512,  # default setting for EfficientDet-D0
27 |             NUM_LAYERS=3,  # default setting for EfficientDet-D0
28 |             OUT_CHANNELS=60,  # default setting for EfficientDet-D0
29 |             FUSE_TYPE="fast",  # select in ["softmax", "fast", "sum"]
30 |         ),
31 |         EFFICIENTDET=dict(
32 |             IN_FEATURES=[f"p{i}" for i in range(3, 8)],  # p3-p7
33 |             NUM_CLASSES=80,
34 |             FREEZE_BACKBONE=False,
35 |             FREEZE_BN=False,
36 |             HEAD=dict(
37 |                 NUM_CONV=3,  # default setting for EfficientDet-D0
38 |                 NORM="BN",
39 |                 BN_MOMENTUM=1 - 0.99,
40 |                 BN_EPS=1e-3,
41 |                 PRIOR_PROB=0.01,
42 |                 MEMORY_EFFICIENT_SWISH=True,
43 |             ),
44 |             IOU_THRESHOLDS=[0.5, 0.5],
45 |             IOU_LABELS=[0, -1, 1],
46 |             SCORE_THRESH_TEST=0.05,
47 |             TOPK_CANDIDATES_TEST=1000,
48 |             NMS_THRESH_TEST=0.5,
49 |             BBOX_REG_WEIGHTS=(1.0, 1.0, 1.0, 1.0),
50 |             FOCAL_LOSS_GAMMA=1.5,
51 |             FOCAL_LOSS_ALPHA=0.25,
52 |             SMOOTH_L1_LOSS_BETA=0.1,
53 |             REG_NORM=4.0,
54 |             BOX_LOSS_WEIGHT=50.0,
55 |         ),
56 |         ANCHOR_GENERATOR=dict(
57 |             SIZES=[
58 |                 [x, x * 2 ** (1.0 / 3), x * 2 ** (2.0 / 3)]
59 |                 for x in [4 * 2**i for i in range(3, 8)]
60 |             ]
61 |         ),
62 |     ),
63 | )
64 | 
65 | 
66 | class EfficientDetConfig(BaseDetectionConfig):
67 |     def __init__(self):
68 |         super(EfficientDetConfig, self).__init__()
69 |         self._register_configuration(_config_dict)
70 | 
71 | 
72 | config = EfficientDetConfig()
73 | 


--------------------------------------------------------------------------------
/cvpods/modeling/nn_utils/parameter_count.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | 
 3 | import typing
 4 | from collections import defaultdict
 5 | import tabulate
 6 | 
 7 | from torch import nn
 8 | 
 9 | 
10 | def parameter_count(model: nn.Module) -> typing.DefaultDict[str, int]:
11 |     """
12 |     Count parameters of a model and its submodules.
13 | 
14 |     Args:
15 |         model: a torch module
16 | 
17 |     Returns:
18 |         dict (str-> int): the key is either a parameter name or a module name.
19 |             The value is the number of elements in the parameter, or in all
20 |             parameters of the module. The key "" corresponds to the total
21 |             number of parameters of the model.
22 |     """
23 |     r = defaultdict(int)
24 |     for name, prm in model.named_parameters():
25 |         size = prm.numel()
26 |         name = name.split(".")
27 |         for k in range(0, len(name) + 1):
28 |             prefix = ".".join(name[:k])
29 |             r[prefix] += size
30 |     return r
31 | 
32 | 
33 | def parameter_count_table(model: nn.Module, max_depth: int = 3) -> str:
34 |     """
35 |     Format the parameter count of the model (and its submodules or parameters)
36 |     in a nice table.
37 | 
38 |     Args:
39 |         model: a torch module
40 |         max_depth (int): maximum depth to recursively print submodules or
41 |             parameters
42 | 
43 |     Returns:
44 |         str: the table to be printed
45 |     """
46 |     count: typing.DefaultDict[str, int] = parameter_count(model)
47 |     param_shape: typing.Dict[str, typing.Tuple] = {
48 |         k: tuple(v.shape) for k, v in model.named_parameters()
49 |     }
50 | 
51 |     table: typing.List[typing.Tuple] = []
52 | 
53 |     def format_size(x: int) -> str:
54 |         if x > 1e5:
55 |             return "{:.1f}M".format(x / 1e6)
56 |         if x > 1e2:
57 |             return "{:.1f}K".format(x / 1e3)
58 |         return str(x)
59 | 
60 |     def fill(lvl: int, prefix: str) -> None:
61 |         if lvl >= max_depth:
62 |             return
63 |         for name, v in count.items():
64 |             if name.count(".") == lvl and name.startswith(prefix):
65 |                 indent = " " * (lvl + 1)
66 |                 if name in param_shape:
67 |                     table.append((indent + name, indent + str(param_shape[name])))
68 |                 else:
69 |                     table.append((indent + name, indent + format_size(v)))
70 |                     fill(lvl + 1, name + ".")
71 | 
72 |     table.append(("model", format_size(count.pop(""))))
73 |     fill(0, "")
74 | 
75 |     old_ws = tabulate.PRESERVE_WHITESPACE
76 |     tabulate.PRESERVE_WHITESPACE = True
77 |     tab = tabulate.tabulate(
78 |         table, headers=["name", "#elements or shape"], tablefmt="pipe"
79 |     )
80 |     tabulate.PRESERVE_WHITESPACE = old_ws
81 |     return tab
82 | 


--------------------------------------------------------------------------------
/cvpods/modeling/losses/circle_loss.py:
--------------------------------------------------------------------------------
 1 | # Authors: YiFan Sun <peter@megvii.com>, Changmao Cheng <chengchangmao@megvii.com>
 2 | from typing import Tuple
 3 | 
 4 | import torch
 5 | from torch import nn
 6 | from torch.nn.functional import cross_entropy
 7 | 
 8 | 
 9 | class ClassificationCircleLoss(nn.Module):
10 |     """Circle loss for class-level labels as described in the paper
11 |     `"Circle Loss: A Unified Perspective of Pair Similarity Optimization" <#>`_
12 | 
13 |     Args:
14 |         scale (float): the scale factor. Default: 256.0
15 |         margin (float): the relax margin value. Default: 0.25
16 |         circle_center (tuple[float]): the center of the circle (logit_ap, logit_an). Default: (1, 0)
17 |         reduction (string, optional): Specifies the reduction to apply to the output:
18 |             ``'none'`` | ``'mean'`` | ``'sum'``. ``'none'``: no reduction will be applied,
19 |             ``'mean'``: the sum of the output will be divided by the number of
20 |             elements in the output, ``'sum'``: the output will be summed. Default: ``'mean'``
21 |     """
22 | 
23 |     def __init__(
24 |         self,
25 |         scale: float = 256.0,
26 |         margin: float = 0.25,
27 |         circle_center: Tuple[float, float] = (1, 0),
28 |         reduction: str = "mean",
29 |     ) -> None:
30 |         super(ClassificationCircleLoss, self).__init__()
31 |         self.scale = scale
32 |         self.margin = margin
33 |         self.circle_center = circle_center
34 |         self.reduction = reduction
35 | 
36 |     def forward(self, logits: torch.Tensor, targets: torch.LongTensor) -> torch.Tensor:
37 |         r"""
38 | 
39 |         Args:
40 |             logits (torch.Tensor): The predicted logits before softmax,
41 |                 namely :math:`\cos \theta` in the above equation, with shape of :math:`(N, C)`
42 |             targets (torch.LongTensor): The ground-truth label long vector,
43 |                 namely :math:`y` in the above equation, with shape of :math:`(N,)`
44 | 
45 |         Returns:
46 |             torch.Tensor: loss
47 |                 the computed loss
48 |         """
49 | 
50 |         mask = torch.zeros(logits.shape, dtype=torch.bool, device=logits.device).scatter_(
51 |             dim=1, index=targets.unsqueeze(1), value=1
52 |         )
53 |         positive_weighting = torch.clamp(
54 |             self.circle_center[0] + self.margin - logits.detach(), min=0)
55 |         negative_weighting = torch.clamp(
56 |             logits.detach() - self.circle_center[1] + self.margin, min=0)
57 |         logits = torch.where(
58 |             mask,
59 |             self.scale * positive_weighting * (logits - (self.circle_center[0] - self.margin)),
60 |             self.scale * negative_weighting * (logits - self.circle_center[1] - self.margin),
61 |         )
62 |         loss = cross_entropy(input=logits, target=targets, reduction=self.reduction)
63 | 
64 |         return loss
65 | 


--------------------------------------------------------------------------------
/.clang-format:
--------------------------------------------------------------------------------
 1 | AccessModifierOffset: -1
 2 | AlignAfterOpenBracket: AlwaysBreak
 3 | AlignConsecutiveAssignments: false
 4 | AlignConsecutiveDeclarations: false
 5 | AlignEscapedNewlinesLeft: true
 6 | AlignOperands:   false
 7 | AlignTrailingComments: false
 8 | AllowAllParametersOfDeclarationOnNextLine: false
 9 | AllowShortBlocksOnASingleLine: false
10 | AllowShortCaseLabelsOnASingleLine: false
11 | AllowShortFunctionsOnASingleLine: Empty
12 | AllowShortIfStatementsOnASingleLine: false
13 | AllowShortLoopsOnASingleLine: false
14 | AlwaysBreakAfterReturnType: None
15 | AlwaysBreakBeforeMultilineStrings: true
16 | AlwaysBreakTemplateDeclarations: true
17 | BinPackArguments: false
18 | BinPackParameters: false
19 | BraceWrapping:
20 |   AfterClass:      false
21 |   AfterControlStatement: false
22 |   AfterEnum:       false
23 |   AfterFunction:   false
24 |   AfterNamespace:  false
25 |   AfterObjCDeclaration: false
26 |   AfterStruct:     false
27 |   AfterUnion:      false
28 |   BeforeCatch:     false
29 |   BeforeElse:      false
30 |   IndentBraces:    false
31 | BreakBeforeBinaryOperators: None
32 | BreakBeforeBraces: Attach
33 | BreakBeforeTernaryOperators: true
34 | BreakConstructorInitializersBeforeComma: false
35 | BreakAfterJavaFieldAnnotations: false
36 | BreakStringLiterals: false
37 | ColumnLimit:     80
38 | CommentPragmas:  '^ IWYU pragma:'
39 | ConstructorInitializerAllOnOneLineOrOnePerLine: true
40 | ConstructorInitializerIndentWidth: 4
41 | ContinuationIndentWidth: 4
42 | Cpp11BracedListStyle: true
43 | DerivePointerAlignment: false
44 | DisableFormat:   false
45 | ForEachMacros:   [ FOR_EACH, FOR_EACH_ENUMERATE, FOR_EACH_KV, FOR_EACH_R, FOR_EACH_RANGE, ]
46 | IncludeCategories:
47 |   - Regex:           '^<.*\.h(pp)?>'
48 |     Priority:        1
49 |   - Regex:           '^<.*'
50 |     Priority:        2
51 |   - Regex:           '.*'
52 |     Priority:        3
53 | IndentCaseLabels: true
54 | IndentWidth:     2
55 | IndentWrappedFunctionNames: false
56 | KeepEmptyLinesAtTheStartOfBlocks: false
57 | MacroBlockBegin: ''
58 | MacroBlockEnd:   ''
59 | MaxEmptyLinesToKeep: 1
60 | NamespaceIndentation: None
61 | ObjCBlockIndentWidth: 2
62 | ObjCSpaceAfterProperty: false
63 | ObjCSpaceBeforeProtocolList: false
64 | PenaltyBreakBeforeFirstCallParameter: 1
65 | PenaltyBreakComment: 300
66 | PenaltyBreakFirstLessLess: 120
67 | PenaltyBreakString: 1000
68 | PenaltyExcessCharacter: 1000000
69 | PenaltyReturnTypeOnItsOwnLine: 200
70 | PointerAlignment: Left
71 | ReflowComments:  true
72 | SortIncludes:    true
73 | SpaceAfterCStyleCast: false
74 | SpaceBeforeAssignmentOperators: true
75 | SpaceBeforeParens: ControlStatements
76 | SpaceInEmptyParentheses: false
77 | SpacesBeforeTrailingComments: 1
78 | SpacesInAngles:  false
79 | SpacesInContainerLiterals: true
80 | SpacesInCStyleCastParentheses: false
81 | SpacesInParentheses: false
82 | SpacesInSquareBrackets: false
83 | Standard:        Cpp11
84 | TabWidth:        8
85 | UseTab:          Never
86 | 


--------------------------------------------------------------------------------
/cvpods/modeling/proposal_generator/rrpn.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | import logging
 3 | from typing import Dict
 4 | 
 5 | import torch
 6 | 
 7 | from cvpods.layers import ShapeSpec
 8 | 
 9 | from ..box_regression import Box2BoxTransformRotated
10 | from .build import PROPOSAL_GENERATOR_REGISTRY
11 | from .rpn import RPN
12 | from .rrpn_outputs import RRPNOutputs, find_top_rrpn_proposals
13 | 
14 | logger = logging.getLogger(__name__)
15 | 
16 | 
17 | @PROPOSAL_GENERATOR_REGISTRY.register()
18 | class RRPN(RPN):
19 |     """
20 |     Rotated RPN subnetwork.
21 |     Please refer to https://arxiv.org/pdf/1703.01086.pdf for the original RRPN paper:
22 |     Ma, J., Shao, W., Ye, H., Wang, L., Wang, H., Zheng, Y., & Xue, X. (2018).
23 |     Arbitrary-oriented scene text detection via rotation proposals.
24 |     IEEE Transactions on Multimedia, 20(11), 3111-3122.
25 |     """
26 | 
27 |     def __init__(self, cfg, input_shape: Dict[str, ShapeSpec]):
28 |         super().__init__(cfg, input_shape)
29 |         self.box2box_transform = Box2BoxTransformRotated(weights=cfg.MODEL.RPN.BBOX_REG_WEIGHTS)
30 | 
31 |     def forward(self, images, features, gt_instances=None):
32 |         # same signature as RPN.forward
33 |         gt_boxes = [x.gt_boxes for x in gt_instances] if gt_instances is not None else None
34 |         del gt_instances
35 |         features = [features[f] for f in self.in_features]
36 |         pred_objectness_logits, pred_anchor_deltas = self.rpn_head(features)
37 |         anchors = self.anchor_generator(features)
38 | 
39 |         outputs = RRPNOutputs(
40 |             self.box2box_transform,
41 |             self.anchor_matcher,
42 |             self.batch_size_per_image,
43 |             self.positive_fraction,
44 |             images,
45 |             pred_objectness_logits,
46 |             pred_anchor_deltas,
47 |             anchors,
48 |             self.boundary_threshold,
49 |             gt_boxes,
50 |             self.smooth_l1_beta,
51 |         )
52 | 
53 |         if self.training:
54 |             losses = outputs.losses()
55 |         else:
56 |             losses = {}
57 | 
58 |         with torch.no_grad():
59 |             # Find the top proposals by applying NMS and removing boxes that
60 |             # are too small. The proposals are treated as fixed for approximate
61 |             # joint training with roi heads. This approach ignores the derivative
62 |             # w.r.t. the proposal boxes’ coordinates that are also network
63 |             # responses, so is approximate.
64 |             proposals = find_top_rrpn_proposals(
65 |                 outputs.predict_proposals(),
66 |                 outputs.predict_objectness_logits(),
67 |                 images,
68 |                 self.nms_thresh,
69 |                 self.pre_nms_topk[self.training],
70 |                 self.post_nms_topk[self.training],
71 |                 self.min_box_side_len,
72 |                 self.training,
73 |             )
74 | 
75 |         return proposals, losses
76 | 


--------------------------------------------------------------------------------
/cvpods/utils/memory.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 3 | 
 4 | import logging
 5 | from contextlib import contextmanager
 6 | from functools import wraps
 7 | 
 8 | import torch
 9 | 
10 | __all__ = ["retry_if_cuda_oom"]
11 | 
12 | 
13 | @contextmanager
14 | def _ignore_torch_cuda_oom():
15 |     """
16 |     A context which ignores CUDA OOM exception from pytorch.
17 |     """
18 |     try:
19 |         yield
20 |     except RuntimeError as e:
21 |         # NOTE: the string may change?
22 |         if "CUDA out of memory. " in str(e):
23 |             pass
24 |         else:
25 |             raise
26 | 
27 | 
28 | def retry_if_cuda_oom(func):
29 |     r"""
30 |     Makes a function retry itself after encountering
31 |     pytorch's CUDA OOM error.
32 |     It will first retry after calling `torch.cuda.empty_cache()`.
33 | 
34 |     If that still fails, it will then retry by trying to convert inputs to CPUs.
35 |     In this case, it expects the function to dispatch to CPU implementation.
36 |     The return values may become CPU tensors as well and it's user's
37 |     responsibility to convert it back to CUDA tensor if needed.
38 | 
39 |     Args:
40 |         func: a stateless callable that takes tensor-like objects as arguments
41 | 
42 |     Returns:
43 |         a callable which retries `func` if OOM is encountered.
44 | 
45 |     Examples:
46 | 
47 |     .. code-block:: python
48 | 
49 |         output = retry_if_cuda_oom(some_torch_function)(input1, input2)
50 |         # output may be on CPU even if inputs are on GPU
51 | 
52 |     Note:
53 |         1. When converting inputs to CPU, it will only look at each argument and check
54 |            if it has `.device` and `.to` for conversion. Nested structures of tensors
55 |            are not supported.
56 | 
57 |         2. Since the function might be called more than once, it has to be
58 |            stateless.
59 |     """
60 | 
61 |     def maybe_to_cpu(x):
62 |         try:
63 |             like_gpu_tensor = x.device.type == "cuda" and hasattr(x, "to")
64 |         except AttributeError:
65 |             like_gpu_tensor = False
66 |         if like_gpu_tensor:
67 |             return x.to(device="cpu")
68 |         else:
69 |             return x
70 | 
71 |     @wraps(func)
72 |     def wrapped(*args, **kwargs):
73 |         with _ignore_torch_cuda_oom():
74 |             return func(*args, **kwargs)
75 | 
76 |         # Clear cache and retry
77 |         torch.cuda.empty_cache()
78 |         with _ignore_torch_cuda_oom():
79 |             return func(*args, **kwargs)
80 | 
81 |         # Try on CPU. This slows down the code significantly, therefore print a notice.
82 |         logger = logging.getLogger(__name__)
83 |         logger.info("Attempting to copy inputs of {} to CPU due to CUDA OOM".format(str(func)))
84 |         new_args = (maybe_to_cpu(x) for x in args)
85 |         new_kwargs = {k: maybe_to_cpu(v) for k, v in kwargs.items()}
86 |         return func(*new_args, **new_kwargs)
87 | 
88 |     return wrapped
89 | 


--------------------------------------------------------------------------------
/cvpods/modeling/roi_heads/box_head.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | import numpy as np
 3 | 
 4 | import torch
 5 | from torch import nn
 6 | from torch.nn import functional as F
 7 | 
 8 | from cvpods.layers import Conv2d, ShapeSpec, get_norm
 9 | from cvpods.modeling.nn_utils import weight_init
10 | 
11 | 
12 | """
13 | Registry for box heads, which make box predictions from per-region features.
14 | 
15 | The registered object will be called with `obj(cfg, input_shape)`.
16 | """
17 | 
18 | 
19 | class FastRCNNConvFCHead(nn.Module):
20 |     """
21 |     A head with several 3x3 conv layers (each followed by norm & relu) and
22 |     several fc layers (each followed by relu).
23 |     """
24 | 
25 |     def __init__(self, cfg, input_shape: ShapeSpec):
26 |         """
27 |         The following attributes are parsed from config:
28 |             num_conv, num_fc: the number of conv/fc layers
29 |             conv_dim/fc_dim: the dimension of the conv/fc layers
30 |             norm: normalization for the conv layers
31 |         """
32 |         super().__init__()
33 | 
34 |         # fmt: off
35 |         num_conv   = cfg.MODEL.ROI_BOX_HEAD.NUM_CONV
36 |         conv_dim   = cfg.MODEL.ROI_BOX_HEAD.CONV_DIM
37 |         num_fc     = cfg.MODEL.ROI_BOX_HEAD.NUM_FC
38 |         fc_dim     = cfg.MODEL.ROI_BOX_HEAD.FC_DIM
39 |         norm       = cfg.MODEL.ROI_BOX_HEAD.NORM
40 |         # fmt: on
41 |         assert num_conv + num_fc > 0
42 | 
43 |         self._output_size = (input_shape.channels, input_shape.height, input_shape.width)
44 | 
45 |         self.conv_norm_relus = []
46 |         for k in range(num_conv):
47 |             conv = Conv2d(
48 |                 self._output_size[0],
49 |                 conv_dim,
50 |                 kernel_size=3,
51 |                 padding=1,
52 |                 bias=not norm,
53 |                 norm=get_norm(norm, conv_dim),
54 |                 activation=F.relu,
55 |             )
56 |             self.add_module("conv{}".format(k + 1), conv)
57 |             self.conv_norm_relus.append(conv)
58 |             self._output_size = (conv_dim, self._output_size[1], self._output_size[2])
59 | 
60 |         self.fcs = []
61 |         for k in range(num_fc):
62 |             fc = nn.Linear(np.prod(self._output_size), fc_dim)
63 |             self.add_module("fc{}".format(k + 1), fc)
64 |             self.fcs.append(fc)
65 |             self._output_size = fc_dim
66 | 
67 |         for layer in self.conv_norm_relus:
68 |             weight_init.c2_msra_fill(layer)
69 |         for layer in self.fcs:
70 |             weight_init.c2_xavier_fill(layer)
71 | 
72 |     def forward(self, x):
73 |         for layer in self.conv_norm_relus:
74 |             x = layer(x)
75 |         if len(self.fcs):
76 |             if x.dim() > 2:
77 |                 x = torch.flatten(x, start_dim=1)
78 |             for layer in self.fcs:
79 |                 x = F.relu(layer(x))
80 |         return x
81 | 
82 |     @property
83 |     def output_size(self):
84 |         return self._output_size
85 | 


--------------------------------------------------------------------------------
/cvpods/layers/csrc/vision_detectron.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | #include "ROIAlign/ROIAlign.h"
 3 | #include "ROIAlignRotated/ROIAlignRotated.h"
 4 | #include "box_iou_rotated/box_iou_rotated.h"
 5 | #include "deformable/deform_conv.h"
 6 | #include "nms_rotated/nms_rotated.h"
 7 | #include "tree_filter/refine.hpp"
 8 | #include "tree_filter/mst.hpp"
 9 | #include "tree_filter/rst.hpp"
10 | #include "tree_filter/bfs.hpp"
11 | 
12 | namespace detectron2 {
13 | 
14 | // similar to
15 | // https://github.com/pytorch/pytorch/blob/master/aten/src/ATen/Version.cpp
16 | std::string get_compiler_version() {
17 |   std::ostringstream ss;
18 | #if defined(__GNUC__)
19 | #ifndef __clang__
20 |   { ss << "GCC " << __GNUC__ << "." << __GNUC_MINOR__; }
21 | #endif
22 | #endif
23 | 
24 | #if defined(__clang_major__)
25 |   {
26 |     ss << "clang " << __clang_major__ << "." << __clang_minor__ << "."
27 |        << __clang_patchlevel__;
28 |   }
29 | #endif
30 | 
31 | #if defined(_MSC_VER)
32 |   { ss << "MSVC " << _MSC_FULL_VER; }
33 | #endif
34 |   return ss.str();
35 | }
36 | 
37 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
38 |   m.def("get_compiler_version", &get_compiler_version, "get_compiler_version");
39 | 
40 |   m.def("box_iou_rotated", &box_iou_rotated, "IoU for rotated boxes");
41 | 
42 |   m.def("deform_conv_forward", &deform_conv_forward, "deform_conv_forward");
43 |   m.def(
44 |       "deform_conv_backward_input",
45 |       &deform_conv_backward_input,
46 |       "deform_conv_backward_input");
47 |   m.def(
48 |       "deform_conv_backward_filter",
49 |       &deform_conv_backward_filter,
50 |       "deform_conv_backward_filter");
51 |   m.def(
52 |       "modulated_deform_conv_forward",
53 |       &modulated_deform_conv_forward,
54 |       "modulated_deform_conv_forward");
55 |   m.def(
56 |       "modulated_deform_conv_backward",
57 |       &modulated_deform_conv_backward,
58 |       "modulated_deform_conv_backward");
59 | 
60 |   m.def("nms_rotated", &nms_rotated, "NMS for rotated boxes");
61 | 
62 |   m.def("roi_align_forward", &ROIAlign_forward, "ROIAlign_forward");
63 |   m.def("roi_align_backward", &ROIAlign_backward, "ROIAlign_backward");
64 | 
65 |   m.def(
66 |       "roi_align_rotated_forward",
67 |       &ROIAlignRotated_forward,
68 |       "Forward pass for Rotated ROI-Align Operator");
69 |   m.def(
70 |       "roi_align_rotated_backward",
71 |       &ROIAlignRotated_backward,
72 |       "Backward pass for Rotated ROI-Align Operator");
73 | 
74 |   m.def("rst_forward", &rst_forward, "rst forward");
75 |   m.def("mst_forward", &mst_forward, "mst forward");
76 |   m.def("bfs_forward", &bfs_forward, "bfs forward");
77 |   m.def("refine_forward", &refine_forward, "refine forward");
78 |   m.def("refine_backward_feature", &refine_backward_feature, "refine backward wrt feature");
79 |   m.def("refine_backward_edge_weight", &refine_backward_edge_weight, "refine backward wrt edge weight");
80 |   m.def("refine_backward_self_weight", &refine_backward_self_weight, "refine backward wrt self weight");
81 | }
82 | 
83 | } // namespace detectron2
84 | 


--------------------------------------------------------------------------------
/cvpods/layers/psroi_pool.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn
 3 | from torch.autograd import Function
 4 | 
 5 | from cvpods import _C
 6 | 
 7 | 
 8 | class _PSROIPool(Function):
 9 |     @staticmethod
10 |     def forward(ctx, features, rois, output_size, spatial_scale, group_size, output_dim):
11 |         ctx.pooled_width = int(output_size[0])
12 |         ctx.pooled_height = int(output_size[1])
13 |         ctx.spatial_scale = float(spatial_scale)
14 |         ctx.group_size = int(group_size)
15 |         ctx.output_dim = int(output_dim)
16 | 
17 |         batch_size, num_channels, data_height, data_width = features.size()
18 |         num_rois = rois.size()[0]
19 |         mapping_channel = torch.zeros(num_rois, ctx.output_dim,
20 |                                       ctx.pooled_height, ctx.pooled_width).int()
21 |         mapping_channel = mapping_channel.to(features.device)
22 |         output = _C.psroi_pooling_forward_cuda(
23 |             features, rois, mapping_channel,
24 |             ctx.pooled_height, ctx.pooled_width,
25 |             ctx.spatial_scale, ctx.group_size, ctx.output_dim
26 |         )
27 |         ctx.output = output
28 |         ctx.mapping_channel = mapping_channel
29 |         ctx.rois = rois
30 |         ctx.feature_size = features.size()
31 | 
32 |         return output
33 | 
34 |     @staticmethod
35 |     def backward(ctx, grad_output):
36 |         assert(ctx.feature_size is not None and grad_output.is_cuda)
37 | 
38 |         batch_size, num_channels, data_height, data_width = ctx.feature_size
39 | 
40 |         grad_input = _C.psroi_pooling_backward_cuda(
41 |             grad_output, ctx.rois, ctx.mapping_channel,
42 |             batch_size, num_channels, data_height, data_width,
43 |             ctx.spatial_scale
44 |             # ctx.pooled_height, ctx.pooled_width, ctx.spatial_scale, ctx.output_dim
45 |         )
46 |         return grad_input, None, None, None, None, None
47 | 
48 | 
49 | psroi_pool = _PSROIPool.apply
50 | 
51 | 
52 | class PSROIPool(nn.Module):
53 |     def __init__(self, output_size, spatial_scale, group_size, output_dim):
54 |         super(PSROIPool, self).__init__()
55 |         self.output_size = output_size
56 |         self.spatial_scale = spatial_scale
57 |         self.group_size = group_size
58 |         self.output_dim = output_dim
59 | 
60 |     def forward(self, input, rois):
61 |         """
62 |         Args:
63 |             input: NCHW images
64 |             rois: Bx5 boxes. First column is the index into N. The other 4 columns are xyxy.
65 |         """
66 |         assert rois.dim() == 2 and rois.size(1) == 5
67 |         return psroi_pool(
68 |             input, rois, self.output_size, self.spatial_scale, self.group_size, self.output_dim
69 |         )
70 | 
71 |     def __repr__(self):
72 |         tmpstr = self.__class__.__name__ + "("
73 |         tmpstr += "output_size=" + str(self.output_size)
74 |         tmpstr += ", spatial_scale=" + str(self.spatial_scale)
75 |         tmpstr += ", group_size=" + str(self.group_size)
76 |         tmpstr += ", output_dim=" + str(self.output_dim)
77 |         tmpstr += ")"
78 |         return tmpstr
79 | 


--------------------------------------------------------------------------------
/cvpods/utils/file/download.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 3 | 
 4 | import logging
 5 | import os
 6 | import shutil
 7 | from typing import Callable, Optional
 8 | from urllib import request
 9 | 
10 | 
11 | def download(
12 |     url: str, dir: str, *, filename: Optional[str] = None, progress: bool = True
13 | ) -> str:
14 |     """
15 |     Download a file from a given URL to a directory. If file exists, will not
16 |         overwrite the existing file.
17 | 
18 |     Args:
19 |         url (str):
20 |         dir (str): the directory to download the file
21 |         filename (str or None): the basename to save the file.
22 |             Will use the name in the URL if not given.
23 |         progress (bool): whether to use tqdm to draw a progress bar.
24 | 
25 |     Returns:
26 |         str: the path to the downloaded file or the existing one.
27 |     """
28 |     os.makedirs(dir, exist_ok=True)
29 |     if filename is None:
30 |         filename = url.split("/")[-1]
31 |         assert len(filename), "Cannot obtain filename from url {}".format(url)
32 |     fpath = os.path.join(dir, filename)
33 |     logger = logging.getLogger(__name__)
34 | 
35 |     if os.path.isfile(fpath):
36 |         logger.info("File {} exists! Skipping download.".format(filename))
37 |         return fpath
38 | 
39 |     tmp = fpath + ".tmp"  # download to a tmp file first, to be more atomic.
40 |     try:
41 |         logger.info("Downloading from {} ...".format(url))
42 |         if progress:
43 |             import tqdm
44 | 
45 |             def hook(t: tqdm.tqdm) -> Callable[[int, int, Optional[int]], None]:
46 |                 last_b = [0]
47 | 
48 |                 def inner(
49 |                     b: int, bsize: int, tsize: Optional[int] = None
50 |                 ) -> None:
51 |                     if tsize is not None:
52 |                         t.total = tsize
53 |                     t.update((b - last_b[0]) * bsize)  # type: ignore
54 |                     last_b[0] = b
55 | 
56 |                 return inner
57 | 
58 |             with tqdm.tqdm(  # type: ignore
59 |                 unit="B", unit_scale=True, miniters=1, desc=filename, leave=True
60 |             ) as t:
61 |                 tmp, _ = request.urlretrieve(
62 |                     url, filename=tmp, reporthook=hook(t)
63 |                 )
64 | 
65 |         else:
66 |             tmp, _ = request.urlretrieve(url, filename=tmp)
67 |         statinfo = os.stat(tmp)
68 |         size = statinfo.st_size
69 |         if size == 0:
70 |             raise IOError("Downloaded an empty file from {}!".format(url))
71 |         # download to tmp first and move to fpath, to make this function more
72 |         # atomic.
73 |         shutil.move(tmp, fpath)
74 |     except IOError:
75 |         logger.error("Failed to download {}".format(url))
76 |         raise
77 |     finally:
78 |         try:
79 |             os.unlink(tmp)
80 |         except IOError:
81 |             pass
82 | 
83 |     logger.info(
84 |         "Successfully downloaded " + fpath + ". " + str(size) + " bytes."
85 |     )
86 |     return fpath
87 | 


--------------------------------------------------------------------------------
/cvpods/modeling/losses/sigmoid_focal_loss.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn
 3 | from torch.autograd import Function
 4 | from torch.autograd.function import once_differentiable
 5 | 
 6 | from cvpods import _C
 7 | 
 8 | 
 9 | # TODO: Use JIT to replace CUDA implementation in the future.
10 | class _SigmoidFocalLoss(Function):
11 | 
12 |     @staticmethod
13 |     def forward(ctx, logits, targets, gamma, alpha):
14 |         """
15 |         Sigmoid Focal Loss forward func
16 | 
17 |         Args:
18 |             ctx:
19 |             logits (torch.Tensor): predicted logits
20 |             targets (torch.Tensor): target logits
21 |             gamma (float): focal loss gamma
22 |             alpha (float): focal loss alpha
23 |         """
24 |         ctx.save_for_backward(logits, targets)
25 |         num_classes = logits.shape[1]
26 |         ctx.num_classes = num_classes
27 |         ctx.gamma = gamma
28 |         ctx.alpha = alpha
29 | 
30 |         losses = _C.sigmoid_focalloss_forward(
31 |             logits, targets, num_classes, gamma, alpha
32 |         )
33 |         return losses
34 | 
35 |     @staticmethod
36 |     @once_differentiable
37 |     def backward(ctx, d_loss):
38 |         logits, targets = ctx.saved_tensors
39 |         num_classes = ctx.num_classes
40 |         gamma = ctx.gamma
41 |         alpha = ctx.alpha
42 |         d_loss = d_loss.contiguous()
43 |         d_logits = _C.sigmoid_focalloss_backward(
44 |             logits, targets, d_loss, num_classes, gamma, alpha
45 |         )
46 |         return d_logits, None, None, None, None
47 | 
48 | 
49 | sigmoid_focal_loss_cuda = _SigmoidFocalLoss.apply
50 | 
51 | 
52 | def sigmoid_focal_loss_cpu(logits, targets, gamma, alpha):
53 |     """
54 |     Cpu version of Sigmoid Focal Loss, the same to :class:`_SigmoidFocalLoss`.
55 | 
56 |     """
57 |     num_classes = logits.shape[1]
58 |     gamma = gamma[0]
59 |     alpha = alpha[0]
60 |     dtype = targets.dtype
61 |     device = targets.device
62 |     class_range = torch.arange(1, num_classes + 1, dtype=dtype, device=device).unsqueeze(0)
63 | 
64 |     t = targets.unsqueeze(1)
65 |     p = torch.sigmoid(logits)
66 |     term1 = (1 - p) ** gamma * torch.log(p)
67 |     term2 = p ** gamma * torch.log(1 - p)
68 |     return -(t == class_range).float() * term1 * alpha - \
69 |             ((t != class_range) * (t >= 0)).float() * term2 * (1 - alpha)
70 | 
71 | 
72 | class SigmoidFocalLoss(nn.Module):
73 | 
74 |     def __init__(self, gamma, alpha):
75 |         super(SigmoidFocalLoss, self).__init__()
76 |         self.gamma = gamma
77 |         self.alpha = alpha
78 | 
79 |     def forward(self, logits, targets):
80 |         # device = logits.device
81 |         if logits.is_cuda:
82 |             loss_func = sigmoid_focal_loss_cuda
83 |         else:
84 |             loss_func = sigmoid_focal_loss_cpu
85 | 
86 |         loss = loss_func(logits, targets, self.gamma, self.alpha)
87 |         return loss.sum()
88 | 
89 |     def __repr__(self):
90 |         tmpstr = self.__class__.__name__ + "("
91 |         tmpstr += "gamma=" + str(self.gamma)
92 |         tmpstr += ", alpha=" + str(self.alpha)
93 |         tmpstr += ")"
94 |         return tmpstr
95 | 


--------------------------------------------------------------------------------
/cvpods/layers/csrc/lars/adaptive_lr.h:
--------------------------------------------------------------------------------
 1 | #include "torch/extension.h"
 2 | 
 3 | namespace cvpods {
 4 | template <typename scalar_t>
 5 | void ComputeAdaptiveLrAfterTypeCheck(
 6 |     const scalar_t &param_norm,
 7 |     const scalar_t &grad_norm,
 8 |     const scalar_t weight_decay,
 9 |     const scalar_t eps,
10 |     const scalar_t trust_coef,
11 |     scalar_t *out) {
12 |   // 1. The case that `param_norm` is `zero` means all elements of the parameter 
13 |   // are `zero` (In general, it occurs when right after the parameter initialized 
14 |   // as `zero`).  In this case, `adaptive_lr` will be calculated as `zero`, which 
15 |   // may be the reason for breaking parameter updates.  In this context, we construct 
16 |   // LARS to use only wrapped optimizer's algorithm when this situation occurs by 
17 |   // converting `adaptive_lr` to `one`.
18 |   //
19 |   // 2. The case that `grad_norm` is `zero` means all elements of the gradient are 
20 |   // `zero` (In general, it occurs when backward propagation doesn't work correctly).  
21 |   // In this case, it can be interpreted as there exists an exceptional situation, 
22 |   // which may result in inappropriate parameter updates.  In this context, we 
23 |   // construct LARS to pass the responsibility of handling the exceptional case 
24 |   // to the wrapped optimizer when this exception occurs by converting `adaptive_lr` 
25 |   // to `one`.
26 |   if (param_norm > 0 && grad_norm > 0) {
27 |     scalar_t divisor = grad_norm + weight_decay * param_norm + eps;
28 |     *out = param_norm / divisor * trust_coef;
29 |   } else {
30 |     *out = 1.0;
31 |   }
32 | }
33 | 
34 | // CUDA function interface
35 | void ComputeAdaptiveLrOnDevice(
36 |     torch::Tensor param_norm,
37 |     torch::Tensor grad_norm,
38 |     double weight_decay,
39 |     double eps,
40 |     double trust_coef,
41 |     torch::Tensor out);
42 | 
43 | #define CHECK_CONTIGUOUS(x) AT_ASSERTM(x.is_contiguous(), #x " must be contiguous")
44 | #define CHECK_CPU(x) AT_ASSERTM(!x.type().is_cuda(), #x " must be a CPU tensor")
45 | 
46 | torch::Tensor ComputeAdaptiveLr(
47 |     torch::Tensor param_norm,
48 |     torch::Tensor grad_norm,
49 |     double weight_decay,
50 |     double eps,
51 |     double trust_coef,
52 |     torch::Tensor out) {
53 |   CHECK_CONTIGUOUS(param_norm);
54 |   CHECK_CONTIGUOUS(grad_norm);
55 |   CHECK_CONTIGUOUS(out);
56 | 
57 |   if (param_norm.type().is_cuda() && grad_norm.type().is_cuda()) {
58 |     ComputeAdaptiveLrOnDevice(
59 |         param_norm,
60 |         grad_norm,
61 |         weight_decay,
62 |         eps,
63 |         trust_coef,
64 |         out);
65 |   } else {
66 |     CHECK_CPU(param_norm);
67 |     CHECK_CPU(grad_norm);
68 |     CHECK_CPU(out);
69 | 
70 |     AT_DISPATCH_FLOATING_TYPES_AND_HALF(
71 |         param_norm.type(),
72 |         "compute_adaptive_lr_cpu",
73 |         ([&] {
74 |            ComputeAdaptiveLrAfterTypeCheck<scalar_t>(
75 |                *param_norm.data<scalar_t>(),
76 |                *grad_norm.data<scalar_t>(),
77 |                weight_decay,
78 |                eps,
79 |                trust_coef,
80 |                out.data<scalar_t>());
81 |          }));
82 |   }
83 | 
84 |   return out;
85 | }
86 | 
87 | }
88 | 


--------------------------------------------------------------------------------
/cvpods/engine/predictor.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python3
 2 | # -*- coding:utf-8 -*-
 3 | from copy import deepcopy
 4 | 
 5 | import torch
 6 | 
 7 | from cvpods.checkpoint import DetectionCheckpointer
 8 | from cvpods.data import build_transform_gen
 9 | 
10 | __all__ = ["DefaultPredictor"]
11 | 
12 | 
13 | class DefaultPredictor:
14 |     """
15 |     Create a simple end-to-end predictor with the given config that runs on
16 |     single device for a single input image.
17 |     Compared to using the model directly, this class does the following additions:
18 | 
19 |     1. Load checkpoint from `cfg.MODEL.WEIGHTS`.
20 |     2. Always take BGR image as the input and apply conversion defined by `cfg.INPUT.FORMAT`.
21 |     3. Apply resizing defined by `cfg.INPUT.{MIN,MAX}_SIZE_TEST`.
22 |     4. Take one input image and produce a single output, instead of a batch.
23 | 
24 |     If you'd like to do anything more fancy, please refer to its source code
25 |     as examples to build and use the model manually.
26 | 
27 |     Attributes:
28 |         metadata (Metadata): the metadata of the underlying dataset, obtained from
29 |             cfg.DATASETS.TEST.
30 | 
31 |     Examples:
32 |     .. code-block:: python
33 | 
34 |         pred = DefaultPredictor(cfg)
35 |         inputs = cv2.imread("input.jpg")
36 |         outputs = pred(inputs)
37 |     """
38 |     def __init__(self, cfg, meta):
39 |         self.cfg = deepcopy(cfg)
40 |         if self.cfg.MODEL.DEVICE.startswith("cuda:"):
41 |             torch.cuda.set_device(self.cfg.MODEL.DEVICE)
42 |             self.cfg.MODEL.DEVICE = "cuda"
43 |         self.model = cfg.build_model(self.cfg)
44 |         self.model.eval()
45 |         self.metadata = meta
46 | 
47 |         checkpointer = DetectionCheckpointer(self.model)
48 |         checkpointer.load(cfg.MODEL.WEIGHTS)
49 | 
50 |         self.transform_gen = build_transform_gen(cfg.INPUT.AUG.TEST_PIPELINES)
51 | 
52 |         self.input_format = cfg.INPUT.FORMAT
53 |         assert self.input_format in ["RGB", "BGR"], self.input_format
54 | 
55 |     def __call__(self, original_image):
56 |         """
57 |         Args:
58 |             original_image (np.ndarray): an image of shape (H, W, C) (in BGR order).
59 | 
60 |         Returns:
61 |             predictions (dict):
62 |                 the output of the model for one image only.
63 |                 See :doc:`/tutorials/models` for details about the format.
64 |         """
65 |         with torch.no_grad(
66 |         ):  # https://github.com/sphinx-doc/sphinx/issues/4258
67 |             # Apply pre-processing to image.
68 |             if self.input_format == "RGB":
69 |                 # whether the model expects BGR inputs or RGB
70 |                 original_image = original_image[:, :, ::-1]
71 |             height, width = original_image.shape[:2]
72 | 
73 |             image = original_image
74 |             for tfm_gen in self.transform_gen:
75 |                 image = tfm_gen.get_transform(image).apply_image(image)
76 | 
77 |             image = torch.as_tensor(image.astype("float32").transpose(2, 0, 1))
78 | 
79 |             inputs = {"image": image, "height": height, "width": width}
80 |             predictions = self.model([inputs])[0]
81 |             return predictions
82 | 


--------------------------------------------------------------------------------
/cvpods/layers/position_encoding.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | """
 3 | Various positional encodings for the transformer.
 4 | """
 5 | import math
 6 | 
 7 | import torch
 8 | from torch import nn
 9 | 
10 | 
11 | class PositionEmbeddingSine(nn.Module):
12 |     """
13 |     This is a more standard version of the position embedding, very similar to the one
14 |     used by the Attention is all you need paper, generalized to work on images.
15 |     """
16 | 
17 |     def __init__(self, num_pos_feats=64, temperature=10000, normalize=False, scale=None):
18 |         super().__init__()
19 |         self.num_pos_feats = num_pos_feats
20 |         self.temperature = temperature
21 |         self.normalize = normalize
22 |         if scale is not None and normalize is False:
23 |             raise ValueError("normalize should be True if scale is passed")
24 |         if scale is None:
25 |             scale = 2 * math.pi
26 |         self.scale = scale
27 | 
28 |     def forward(self, x, mask):
29 |         not_mask = ~mask
30 |         y_embed = not_mask.cumsum(1, dtype=torch.float32)
31 |         x_embed = not_mask.cumsum(2, dtype=torch.float32)
32 | 
33 |         if self.normalize:
34 |             eps = 1e-6
35 |             y_embed = y_embed / (y_embed[:, -1:, :] + eps) * self.scale
36 |             x_embed = x_embed / (x_embed[:, :, -1:] + eps) * self.scale
37 | 
38 |         dim_t = torch.arange(self.num_pos_feats, dtype=torch.float32, device=x.device)
39 |         dim_t = self.temperature ** (2 * (dim_t // 2) / self.num_pos_feats)
40 | 
41 |         pos_x = x_embed[:, :, :, None] / dim_t
42 |         pos_y = y_embed[:, :, :, None] / dim_t
43 |         pos_x = torch.stack(
44 |             (pos_x[:, :, :, 0::2].sin(), pos_x[:, :, :, 1::2].cos()), dim=4
45 |         ).flatten(3)
46 |         pos_y = torch.stack(
47 |             (pos_y[:, :, :, 0::2].sin(), pos_y[:, :, :, 1::2].cos()), dim=4
48 |         ).flatten(3)
49 |         pos = torch.cat((pos_y, pos_x), dim=3).permute(0, 3, 1, 2)
50 |         return pos
51 | 
52 | 
53 | class PositionEmbeddingLearned(nn.Module):
54 |     """
55 |     Absolute pos embedding, learned.
56 |     """
57 | 
58 |     def __init__(self, num_pos_feats=256):
59 |         super().__init__()
60 |         self.row_embed = nn.Embedding(50, num_pos_feats)
61 |         self.col_embed = nn.Embedding(50, num_pos_feats)
62 |         self.reset_parameters()
63 | 
64 |     def reset_parameters(self):
65 |         nn.init.uniform_(self.row_embed.weight)
66 |         nn.init.uniform_(self.col_embed.weight)
67 | 
68 |     def forward(self, tensor_list):
69 |         x = tensor_list.tensors
70 |         h, w = x.shape[-2:]
71 |         i = torch.arange(w, device=x.device)
72 |         j = torch.arange(h, device=x.device)
73 |         x_emb = self.col_embed(i)
74 |         y_emb = self.row_embed(j)
75 |         pos = (
76 |             torch.cat(
77 |                 [x_emb.unsqueeze(0).repeat(h, 1, 1), y_emb.unsqueeze(1).repeat(1, w, 1)], dim=-1
78 |             )
79 |             .permute(2, 0, 1)
80 |             .unsqueeze(0)
81 |             .repeat(x.shape[0], 1, 1, 1)
82 |         )
83 |         return pos
84 | 
85 | 
86 | position_encoding_dict = {"sine": PositionEmbeddingSine, "learned": PositionEmbeddingLearned}
87 | 


--------------------------------------------------------------------------------
/cvpods/modeling/losses/smooth_l1_loss.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | import torch
 3 | 
 4 | 
 5 | def smooth_l1_loss(input,
 6 |                    target,
 7 |                    beta: float,
 8 |                    reduction: str = "none",
 9 |                    size_average=False):
10 |     """
11 |     Smooth L1 loss defined in the Fast R-CNN paper as:
12 | 
13 |                   | 0.5 * x ** 2 / beta   if abs(x) < beta
14 |     smoothl1(x) = |
15 |                   | abs(x) - 0.5 * beta   otherwise,
16 | 
17 |     where x = input - target.
18 | 
19 |     Smooth L1 loss is related to Huber loss, which is defined as:
20 | 
21 |                 | 0.5 * x ** 2                  if abs(x) < beta
22 |      huber(x) = |
23 |                 | beta * (abs(x) - 0.5 * beta)  otherwise
24 | 
25 |     Smooth L1 loss is equal to huber(x) / beta. This leads to the following
26 |     differences:
27 | 
28 |      - As beta -> 0, Smooth L1 loss converges to L1 loss, while Huber loss
29 |        converges to a constant 0 loss.
30 |      - As beta -> +inf, Smooth L1 converges to a constant 0 loss, while Huber loss
31 |        converges to L2 loss.
32 |      - For Smooth L1 loss, as beta varies, the L1 segment of the loss has a constant
33 |        slope of 1. For Huber loss, the slope of the L1 segment is beta.
34 | 
35 |     Smooth L1 loss can be seen as exactly L1 loss, but with the abs(x) < beta
36 |     portion replaced with a quadratic function such that at abs(x) = beta, its
37 |     slope is 1. The quadratic segment smooths the L1 loss near x = 0.
38 | 
39 |     Args:
40 |         input (Tensor): input tensor of any shape
41 |         target (Tensor): target value tensor with the same shape as input
42 |         beta (float): L1 to L2 change point.
43 |             For beta values < 1e-5, L1 loss is computed.
44 |         reduction: 'none' | 'mean' | 'sum'
45 |                  'none': No reduction will be applied to the output.
46 |                  'mean': The output will be averaged.
47 |                  'sum': The output will be summed.
48 | 
49 |     Returns:
50 |         The loss with the reduction option applied.
51 | 
52 |     Note:
53 |         PyTorch's builtin "Smooth L1 loss" implementation does not actually
54 |         implement Smooth L1 loss, nor does it implement Huber loss. It implements
55 |         the special case of both in which they are equal (beta=1).
56 |         See: https://pytorch.org/docs/stable/nn.html#torch.nn.SmoothL1Loss.
57 |      """
58 |     if beta < 1e-5:
59 |         # if beta == 0, then torch.where will result in nan gradients when
60 |         # the chain rule is applied due to pytorch implementation details
61 |         # (the False branch "0.5 * n ** 2 / 0" has an incoming gradient of
62 |         # zeros, rather than "no gradient"). To avoid this issue, we define
63 |         # small values of beta to be exactly l1 loss.
64 |         loss = torch.abs(input - target)
65 |     else:
66 |         n = torch.abs(input - target)
67 |         cond = n < beta
68 |         loss = torch.where(cond, 0.5 * n**2 / beta, n - 0.5 * beta)
69 | 
70 |     if reduction == "mean" or size_average:
71 |         loss = loss.mean()
72 |     elif reduction == "sum":
73 |         loss = loss.sum()
74 | 
75 |     return loss
76 | 


--------------------------------------------------------------------------------
/cvpods/solver/scheduler_builder.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python3
 2 | # -*- coding:utf-8 -*-
 3 | 
 4 | from torch.optim import lr_scheduler
 5 | 
 6 | from cvpods.utils.registry import Registry
 7 | 
 8 | from .lr_scheduler import PolyLR, WarmupCosineLR, WarmupMultiStepLR
 9 | 
10 | SCHEDULER_BUILDER = Registry("LRScheduler builder")
11 | 
12 | 
13 | @SCHEDULER_BUILDER.register()
14 | class BaseSchedulerBuilder:
15 | 
16 |     @staticmethod
17 |     def build(optimizer, cfg):
18 |         raise NotImplementedError
19 | 
20 | 
21 | @SCHEDULER_BUILDER.register()
22 | class WarmupMultiStepLRBuilder(BaseSchedulerBuilder):
23 | 
24 |     @staticmethod
25 |     def build(optimizer, cfg):
26 |         scheduler = WarmupMultiStepLR(
27 |             optimizer,
28 |             cfg.SOLVER.LR_SCHEDULER.STEPS,
29 |             cfg.SOLVER.LR_SCHEDULER.GAMMA,
30 |             warmup_factor=cfg.SOLVER.LR_SCHEDULER.WARMUP_FACTOR,
31 |             warmup_iters=cfg.SOLVER.LR_SCHEDULER.WARMUP_ITERS,
32 |             warmup_method=cfg.SOLVER.LR_SCHEDULER.WARMUP_METHOD,
33 |         )
34 |         return scheduler
35 | 
36 | 
37 | @SCHEDULER_BUILDER.register()
38 | class WarmupCosineLRBuilder(BaseSchedulerBuilder):
39 | 
40 |     @staticmethod
41 |     def build(optimizer, cfg):
42 |         max_iter = cfg.SOLVER.LR_SCHEDULER.MAX_ITER
43 |         max_epoch = cfg.SOLVER.LR_SCHEDULER.MAX_EPOCH
44 |         iters_per_epoch = None if max_epoch is None else max_iter // max_epoch
45 | 
46 |         scheduler = WarmupCosineLR(
47 |             optimizer,
48 |             cfg.SOLVER.LR_SCHEDULER.MAX_ITER,
49 |             warmup_factor=cfg.SOLVER.LR_SCHEDULER.WARMUP_FACTOR,
50 |             warmup_iters=cfg.SOLVER.LR_SCHEDULER.WARMUP_ITERS,
51 |             warmup_method=cfg.SOLVER.LR_SCHEDULER.WARMUP_METHOD,
52 |             iters_per_epoch=iters_per_epoch,
53 |         )
54 |         return scheduler
55 | 
56 | 
57 | @SCHEDULER_BUILDER.register()
58 | class PolyLRBuilder(BaseSchedulerBuilder):
59 | 
60 |     @staticmethod
61 |     def build(optimizer, cfg):
62 |         return PolyLR(
63 |             optimizer,
64 |             cfg.SOLVER.LR_SCHEDULER.MAX_ITER,
65 |             cfg.SOLVER.LR_SCHEDULER.POLY_POWER,
66 |             warmup_factor=cfg.SOLVER.LR_SCHEDULER.WARMUP_FACTOR,
67 |             warmup_iters=cfg.SOLVER.LR_SCHEDULER.WARMUP_ITERS,
68 |             warmup_method=cfg.SOLVER.LR_SCHEDULER.WARMUP_METHOD,
69 |         )
70 | 
71 | 
72 | @SCHEDULER_BUILDER.register()
73 | class LambdaLRBuilder(BaseSchedulerBuilder):
74 | 
75 |     @staticmethod
76 |     def build(optimizer, cfg):
77 |         return lr_scheduler.LambdaLR(
78 |             optimizer,
79 |             cfg.SOLVER.LR_SCHEDULER.LAMBDA_SCHEDULE
80 |         )
81 | 
82 | 
83 | @SCHEDULER_BUILDER.register()
84 | class OneCycleLRBuilder(BaseSchedulerBuilder):
85 | 
86 |     @staticmethod
87 |     def build(optimizer, cfg):
88 |         return lr_scheduler.OneCycleLR(
89 |             optimizer,
90 |             cfg.SOLVER.LR_SCHEDULER.MAX_LR,
91 |             total_steps=cfg.SOLVER.LR_SCHEDULER.MAX_ITER,
92 |             pct_start=cfg.SOLVER.LR_SCHEDULER.PCT_START,
93 |             base_momentum=cfg.SOLVER.LR_SCHEDULER.BASE_MOM,
94 |             max_momentum=cfg.SOLVER.LR_SCHEDULER.MAX_MOM,
95 |             div_factor=cfg.SOLVER.LR_SCHEDULER.DIV_FACTOR
96 |         )
97 | 


--------------------------------------------------------------------------------
/cvpods/layers/csrc/ROIAlignRotated/ROIAlignRotated.h:
--------------------------------------------------------------------------------
  1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
  2 | #pragma once
  3 | #include <torch/types.h>
  4 | 
  5 | namespace cvpods {
  6 | 
  7 | at::Tensor ROIAlignRotated_forward_cpu(
  8 |     const at::Tensor& input,
  9 |     const at::Tensor& rois,
 10 |     const float spatial_scale,
 11 |     const int pooled_height,
 12 |     const int pooled_width,
 13 |     const int sampling_ratio);
 14 | 
 15 | at::Tensor ROIAlignRotated_backward_cpu(
 16 |     const at::Tensor& grad,
 17 |     const at::Tensor& rois,
 18 |     const float spatial_scale,
 19 |     const int pooled_height,
 20 |     const int pooled_width,
 21 |     const int batch_size,
 22 |     const int channels,
 23 |     const int height,
 24 |     const int width,
 25 |     const int sampling_ratio);
 26 | 
 27 | #ifdef WITH_CUDA
 28 | at::Tensor ROIAlignRotated_forward_cuda(
 29 |     const at::Tensor& input,
 30 |     const at::Tensor& rois,
 31 |     const float spatial_scale,
 32 |     const int pooled_height,
 33 |     const int pooled_width,
 34 |     const int sampling_ratio);
 35 | 
 36 | at::Tensor ROIAlignRotated_backward_cuda(
 37 |     const at::Tensor& grad,
 38 |     const at::Tensor& rois,
 39 |     const float spatial_scale,
 40 |     const int pooled_height,
 41 |     const int pooled_width,
 42 |     const int batch_size,
 43 |     const int channels,
 44 |     const int height,
 45 |     const int width,
 46 |     const int sampling_ratio);
 47 | #endif
 48 | 
 49 | // Interface for Python
 50 | inline at::Tensor ROIAlignRotated_forward(
 51 |     const at::Tensor& input,
 52 |     const at::Tensor& rois,
 53 |     const float spatial_scale,
 54 |     const int pooled_height,
 55 |     const int pooled_width,
 56 |     const int sampling_ratio) {
 57 |   if (input.type().is_cuda()) {
 58 | #ifdef WITH_CUDA
 59 |     return ROIAlignRotated_forward_cuda(
 60 |         input,
 61 |         rois,
 62 |         spatial_scale,
 63 |         pooled_height,
 64 |         pooled_width,
 65 |         sampling_ratio);
 66 | #else
 67 |     AT_ERROR("Not compiled with GPU support");
 68 | #endif
 69 |   }
 70 |   return ROIAlignRotated_forward_cpu(
 71 |       input, rois, spatial_scale, pooled_height, pooled_width, sampling_ratio);
 72 | }
 73 | 
 74 | inline at::Tensor ROIAlignRotated_backward(
 75 |     const at::Tensor& grad,
 76 |     const at::Tensor& rois,
 77 |     const float spatial_scale,
 78 |     const int pooled_height,
 79 |     const int pooled_width,
 80 |     const int batch_size,
 81 |     const int channels,
 82 |     const int height,
 83 |     const int width,
 84 |     const int sampling_ratio) {
 85 |   if (grad.type().is_cuda()) {
 86 | #ifdef WITH_CUDA
 87 |     return ROIAlignRotated_backward_cuda(
 88 |         grad,
 89 |         rois,
 90 |         spatial_scale,
 91 |         pooled_height,
 92 |         pooled_width,
 93 |         batch_size,
 94 |         channels,
 95 |         height,
 96 |         width,
 97 |         sampling_ratio);
 98 | #else
 99 |     AT_ERROR("Not compiled with GPU support");
100 | #endif
101 |   }
102 |   return ROIAlignRotated_backward_cpu(
103 |       grad,
104 |       rois,
105 |       spatial_scale,
106 |       pooled_height,
107 |       pooled_width,
108 |       batch_size,
109 |       channels,
110 |       height,
111 |       width,
112 |       sampling_ratio);
113 | }
114 | 
115 | } // namespace cvpods
116 | 


--------------------------------------------------------------------------------
/cvpods/modeling/postprocessing.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | from torch.nn import functional as F
 3 | 
 4 | from cvpods.layers import paste_masks_in_image
 5 | from cvpods.structures import Instances
 6 | 
 7 | 
 8 | def detector_postprocess(results, output_height, output_width, mask_threshold=0.5):
 9 |     """
10 |     Resize the output instances.
11 |     The input images are often resized when entering an object detector.
12 |     As a result, we often need the outputs of the detector in a different
13 |     resolution from its inputs.
14 | 
15 |     This function will resize the raw outputs of an R-CNN detector
16 |     to produce outputs according to the desired output resolution.
17 | 
18 |     Args:
19 |         results (Instances): the raw outputs from the detector.
20 |             `results.image_size` contains the input image resolution the detector sees.
21 |             This object might be modified in-place.
22 |         output_height, output_width: the desired output resolution.
23 | 
24 |     Returns:
25 |         Instances: the resized output from the model, based on the output resolution
26 |     """
27 |     scale_x, scale_y = (output_width / results.image_size[1], output_height / results.image_size[0])
28 |     results = Instances((output_height, output_width), **results.get_fields())
29 | 
30 |     if results.has("pred_boxes"):
31 |         output_boxes = results.pred_boxes
32 |     elif results.has("proposal_boxes"):
33 |         output_boxes = results.proposal_boxes
34 | 
35 |     output_boxes.scale(scale_x, scale_y)
36 |     output_boxes.clip(results.image_size)
37 | 
38 |     results = results[output_boxes.nonempty()]
39 | 
40 |     if results.has("pred_masks"):
41 |         results.pred_masks = paste_masks_in_image(
42 |             results.pred_masks[:, 0, :, :],  # N, 1, M, M
43 |             results.pred_boxes,
44 |             results.image_size,
45 |             threshold=mask_threshold,
46 |         )
47 | 
48 |     if results.has("pred_keypoints"):
49 |         results.pred_keypoints[:, :, 0] *= scale_x
50 |         results.pred_keypoints[:, :, 1] *= scale_y
51 | 
52 |     return results
53 | 
54 | 
55 | def sem_seg_postprocess(result, img_size, output_height, output_width):
56 |     """
57 |     Return semantic segmentation predictions in the original resolution.
58 | 
59 |     The input images are often resized when entering semantic segmentor. Moreover, in same
60 |     cases, they also padded inside segmentor to be divisible by maximum network stride.
61 |     As a result, we often need the predictions of the segmentor in a different
62 |     resolution from its inputs.
63 | 
64 |     Args:
65 |         result (Tensor): semantic segmentation prediction logits. A tensor of shape (C, H, W),
66 |             where C is the number of classes, and H, W are the height and width of the prediction.
67 |         img_size (tuple): image size that segmentor is taking as input.
68 |         output_height, output_width: the desired output resolution.
69 | 
70 |     Returns:
71 |         semantic segmentation prediction (Tensor): A tensor of the shape
72 |             (C, output_height, output_width) that contains per-pixel soft predictions.
73 |     """
74 |     result = result[:, : img_size[0], : img_size[1]].expand(1, -1, -1, -1)
75 |     result = F.interpolate(
76 |         result, size=(output_height, output_width), mode="bilinear", align_corners=False
77 |     )[0]
78 |     return result
79 | 


--------------------------------------------------------------------------------
/cvpods/layers/roi_align_rotated.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | from torch import nn
 3 | from torch.autograd import Function
 4 | from torch.autograd.function import once_differentiable
 5 | from torch.nn.modules.utils import _pair
 6 | 
 7 | from cvpods import _C
 8 | 
 9 | 
10 | class _ROIAlignRotated(Function):
11 |     @staticmethod
12 |     def forward(ctx, input, roi, output_size, spatial_scale, sampling_ratio):
13 |         ctx.save_for_backward(roi)
14 |         ctx.output_size = _pair(output_size)
15 |         ctx.spatial_scale = spatial_scale
16 |         ctx.sampling_ratio = sampling_ratio
17 |         ctx.input_shape = input.size()
18 |         output = _C.roi_align_rotated_forward(
19 |             input, roi, spatial_scale, output_size[0], output_size[1], sampling_ratio
20 |         )
21 |         return output
22 | 
23 |     @staticmethod
24 |     @once_differentiable
25 |     def backward(ctx, grad_output):
26 |         rois, = ctx.saved_tensors
27 |         output_size = ctx.output_size
28 |         spatial_scale = ctx.spatial_scale
29 |         sampling_ratio = ctx.sampling_ratio
30 |         bs, ch, h, w = ctx.input_shape
31 |         grad_input = _C.roi_align_rotated_backward(
32 |             grad_output,
33 |             rois,
34 |             spatial_scale,
35 |             output_size[0],
36 |             output_size[1],
37 |             bs,
38 |             ch,
39 |             h,
40 |             w,
41 |             sampling_ratio,
42 |         )
43 |         return grad_input, None, None, None, None, None
44 | 
45 | 
46 | roi_align_rotated = _ROIAlignRotated.apply
47 | 
48 | 
49 | class ROIAlignRotated(nn.Module):
50 |     def __init__(self, output_size, spatial_scale, sampling_ratio):
51 |         """
52 |         Args:
53 |             output_size (tuple): h, w
54 |             spatial_scale (float): scale the input boxes by this number
55 |             sampling_ratio (int): number of inputs samples to take for each output
56 |                 sample. 0 to take samples densely.
57 | 
58 |         Note:
59 |             ROIAlignRotated supports continuous coordinate by default:
60 |             Given a continuous coordinate c, its two neighboring pixel indices (in our
61 |             pixel model) are computed by floor(c - 0.5) and ceil(c - 0.5). For example,
62 |             c=1.3 has pixel neighbors with discrete indices [0] and [1] (which are sampled
63 |             from the underlying signal at continuous coordinates 0.5 and 1.5).
64 |         """
65 |         super(ROIAlignRotated, self).__init__()
66 |         self.output_size = output_size
67 |         self.spatial_scale = spatial_scale
68 |         self.sampling_ratio = sampling_ratio
69 | 
70 |     def forward(self, input, rois):
71 |         """
72 |         Args:
73 |             input: NCHW images
74 |             rois: Bx6 boxes. First column is the index into N.
75 |                 The other 5 columns are (x_ctr, y_ctr, width, height, angle_degrees).
76 |         """
77 |         assert rois.dim() == 2 and rois.size(1) == 6
78 |         return roi_align_rotated(
79 |             input, rois, self.output_size, self.spatial_scale, self.sampling_ratio
80 |         )
81 | 
82 |     def __repr__(self):
83 |         tmpstr = self.__class__.__name__ + "("
84 |         tmpstr += "output_size=" + str(self.output_size)
85 |         tmpstr += ", spatial_scale=" + str(self.spatial_scale)
86 |         tmpstr += ", sampling_ratio=" + str(self.sampling_ratio)
87 |         tmpstr += ")"
88 |         return tmpstr
89 | 


--------------------------------------------------------------------------------