├── cvpods ├── modeling │ ├── nn_utils │ │ ├── __init__.py │ │ ├── scale_grad.py │ │ ├── feature_utils.py │ │ ├── activation_count.py │ │ ├── flop_count.py │ │ └── parameter_count.py │ ├── basenet │ │ └── __init__.py │ ├── proposal_generator │ │ ├── __init__.py │ │ ├── proposal_utils.py │ │ └── rrpn.py │ ├── backbone │ │ ├── dynamic_arch │ │ │ ├── __init__.py │ │ │ └── cal_op_flops.py │ │ ├── __init__.py │ │ └── backbone.py │ ├── roi_heads │ │ ├── __init__.py │ │ └── box_head.py │ ├── losses │ │ ├── __init__.py │ │ ├── reg_l1_loss.py │ │ ├── dice_loss.py │ │ ├── label_smooth_ce_loss.py │ │ ├── circle_loss.py │ │ ├── sigmoid_focal_loss.py │ │ └── smooth_l1_loss.py │ ├── meta_arch │ │ ├── __init__.py │ │ └── imagenet.py │ ├── __init__.py │ ├── sampling.py │ └── postprocessing.py ├── utils │ ├── metrics │ │ ├── __init__.py │ │ └── accuracy.py │ ├── distributed │ │ └── __init__.py │ ├── env │ │ └── __init__.py │ ├── benchmark │ │ ├── __init__.py │ │ └── timer.py │ ├── file │ │ ├── __init__.py │ │ ├── serialize.py │ │ └── download.py │ ├── dump │ │ ├── __init__.py │ │ └── history_buffer.py │ ├── visualizer │ │ ├── __init__.py │ │ └── show.py │ ├── README.md │ ├── __init__.py │ ├── imports.py │ ├── registry.py │ └── memory.py ├── analyser │ └── tide │ │ ├── __init__.py │ │ └── errors │ │ └── qualifiers.py ├── export │ ├── __init__.py │ └── README.md ├── evaluation │ ├── registry.py │ ├── __init__.py │ └── testing.py ├── layers │ ├── csrc │ │ ├── README.md │ │ ├── tree_filter │ │ │ ├── mst.hpp │ │ │ ├── rst.hpp │ │ │ ├── bfs.hpp │ │ │ ├── boruvka.hpp │ │ │ ├── boruvka_rst.hpp │ │ │ ├── refine.hpp │ │ │ └── rst.cu │ │ ├── cuda_version.cu │ │ ├── masked_conv2d │ │ │ └── masked_conv2d.h │ │ ├── PSROIPool │ │ │ └── psroi_pool_cuda.h │ │ ├── ml_nms │ │ │ └── ml_nms.h │ │ ├── box_iou_rotated │ │ │ ├── box_iou_rotated.h │ │ │ └── box_iou_rotated_cpu.cpp │ │ ├── border_align │ │ │ └── border_align.h │ │ ├── nms_rotated │ │ │ ├── nms_rotated.h │ │ │ └── nms_rotated_cpu.cpp │ │ ├── SwapAlign2Nat │ │ │ └── SwapAlign2Nat.h │ │ ├── sigmoid_focal_loss │ │ │ └── SigmoidFocalLoss.h │ │ ├── lars │ │ │ ├── adaptive_lr_cuda.cu │ │ │ └── adaptive_lr.h │ │ ├── vision_detectron.cpp │ │ └── ROIAlignRotated │ │ │ └── ROIAlignRotated.h │ ├── shape_spec.py │ ├── rotated_boxes.py │ ├── activation_funcs.py │ ├── border_align.py │ ├── __init__.py │ ├── deform_conv_with_off.py │ ├── swap_align2nat.py │ ├── psroi_pool.py │ ├── position_encoding.py │ └── roi_align_rotated.py ├── data │ ├── registry.py │ ├── transforms │ │ └── __init__.py │ ├── samplers │ │ ├── __init__.py │ │ └── grouped_batch_sampler.py │ ├── __init__.py │ ├── datasets │ │ └── __init__.py │ └── wrapped_dataset.py ├── engine │ ├── __init__.py │ └── predictor.py ├── __init__.py ├── checkpoint │ └── __init__.py ├── structures │ └── __init__.py ├── solver │ ├── __init__.py │ └── scheduler_builder.py └── configs │ ├── yolo_config.py │ ├── panoptic_seg_config.py │ ├── ssd_config.py │ ├── segm_config.py │ ├── rcnn_fpn_config.py │ ├── base_classification_config.py │ ├── dynamic_routing_config.py │ ├── fcos_config.py │ ├── keypoint_config.py │ ├── pointrend_config.py │ ├── solo_config.py │ ├── retinanet_config.py │ └── efficientdet_config.py ├── demo └── introduction.png ├── .pre-commit-config.yaml ├── tools ├── dev │ ├── README.md │ ├── run_instant_tests.sh │ ├── linter.sh │ ├── run_inference_tests.sh │ └── parse_results.sh ├── rm_files.py └── caffe2_converter.py ├── datasets ├── prepare_for_tests.sh └── README.md ├── .gitignore ├── cvpods_playground ├── fcos.res50.1x │ ├── net.py │ └── config.py ├── fcos.res50.1x.fix.d4 │ ├── net.py │ └── config.py ├── fcos.res50.1x.fix.d8 │ ├── net.py │ └── config.py ├── fcos.res50.1x.dynamic.d4.lambda-0_1 │ ├── net.py │ └── config.py ├── fcos.res50.1x.dynamic.d8.lambda-0 │ ├── net.py │ └── config.py ├── fcos.res50.1x.dynamic.d8.lambda-0_1 │ ├── net.py │ └── config.py └── fcos.res50.1x.dynamic.d8.lambda-0_8 │ ├── net.py │ └── config.py ├── setup.cfg └── .clang-format /cvpods/modeling/nn_utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /cvpods/utils/metrics/__init__.py: -------------------------------------------------------------------------------- 1 | from .accuracy import accuracy -------------------------------------------------------------------------------- /cvpods/modeling/basenet/__init__.py: -------------------------------------------------------------------------------- 1 | from .basenet import basenet 2 | -------------------------------------------------------------------------------- /demo/introduction.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/StevenGrove/DynamicHead/HEAD/demo/introduction.png -------------------------------------------------------------------------------- /cvpods/utils/distributed/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding:utf-8 -*- 3 | 4 | from .comm import * 5 | -------------------------------------------------------------------------------- /cvpods/utils/env/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding:utf-8 -*- 3 | 4 | from .collect_env import * 5 | from .env import * 6 | -------------------------------------------------------------------------------- /cvpods/utils/benchmark/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding:utf-8 -*- 3 | 4 | from .benchmark import * 5 | from .timer import * 6 | -------------------------------------------------------------------------------- /cvpods/modeling/proposal_generator/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | from .rpn import RPN 3 | -------------------------------------------------------------------------------- /cvpods/analyser/tide/__init__.py: -------------------------------------------------------------------------------- 1 | __version__ = '1.0.0' 2 | 3 | from . import datasets 4 | from .errors.qualifiers import * 5 | from .quantify import * 6 | -------------------------------------------------------------------------------- /cvpods/export/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from .api import * 4 | 5 | __all__ = [k for k in globals().keys() if not k.startswith("_")] 6 | -------------------------------------------------------------------------------- /cvpods/evaluation/registry.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding:utf-8 -*- 3 | 4 | from cvpods.utils import Registry 5 | 6 | EVALUATOR = Registry("evaluator") 7 | -------------------------------------------------------------------------------- /cvpods/utils/file/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding:utf-8 -*- 3 | 4 | from .download import * 5 | from .file_io import * 6 | from .serialize import * 7 | -------------------------------------------------------------------------------- /cvpods/utils/dump/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding:utf-8 -*- 3 | 4 | from .events import * 5 | from .history_buffer import * 6 | from .logger import * 7 | -------------------------------------------------------------------------------- /cvpods/layers/csrc/README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | To add a new Op: 4 | 5 | 1. Create a new directory 6 | 2. Implement new ops there 7 | 3. Delcare its Python interface in `vision.cpp`. 8 | -------------------------------------------------------------------------------- /cvpods/utils/visualizer/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding:utf-8 -*- 3 | 4 | from .colormap import * 5 | from .video_visualizer import * 6 | from .visualizer import * 7 | -------------------------------------------------------------------------------- /cvpods/modeling/backbone/dynamic_arch/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # build for dynamic networks 3 | # @Author: yanwei.li 4 | 5 | from .dynamic_backbone import DynamicNetwork, build_dynamic_backbone 6 | -------------------------------------------------------------------------------- /cvpods/utils/README.md: -------------------------------------------------------------------------------- 1 | # Utility functions 2 | 3 | This folder contain utility functions that are not used in the 4 | core library, but are useful for building models or training 5 | code using the config system. 6 | -------------------------------------------------------------------------------- /cvpods/data/registry.py: -------------------------------------------------------------------------------- 1 | from cvpods.utils import Registry 2 | 3 | DATASETS = Registry("datasets") 4 | TRANSFORMS = Registry("transforms") 5 | SAMPLERS = Registry("samplers") 6 | PATH_ROUTES = Registry("path_routes") 7 | -------------------------------------------------------------------------------- /cvpods/data/transforms/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | from .transform import * 3 | from .transform_gen import * 4 | 5 | __all__ = [k for k in globals().keys() if not k.startswith("_")] 6 | -------------------------------------------------------------------------------- /cvpods/layers/csrc/tree_filter/mst.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | 4 | extern at::Tensor mst_forward( 5 | const at::Tensor & edge_index_tensor, 6 | const at::Tensor & edge_weight_tensor, 7 | int vertex_count); 8 | 9 | -------------------------------------------------------------------------------- /cvpods/layers/csrc/tree_filter/rst.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | 4 | extern at::Tensor rst_forward( 5 | const at::Tensor & edge_index_tensor, 6 | const at::Tensor & edge_weight_tensor, 7 | int vertex_count); 8 | 9 | -------------------------------------------------------------------------------- /cvpods/layers/csrc/cuda_version.cu: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | 3 | #include 4 | 5 | namespace cvpods { 6 | int get_cudart_version() { 7 | return CUDART_VERSION; 8 | } 9 | } // namespace cvpods 10 | -------------------------------------------------------------------------------- /cvpods/layers/csrc/tree_filter/bfs.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | 4 | extern std::tuple 5 | bfs_forward( 6 | const at::Tensor & edge_index_tensor, 7 | int max_adj_per_node 8 | ); 9 | 10 | -------------------------------------------------------------------------------- /cvpods/modeling/roi_heads/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | from .roi_heads import Res5ROIHeads, ROIHeads, StandardROIHeads, select_foreground_proposals 3 | from .rotated_fast_rcnn import RROIHeads 4 | 5 | from . import cascade_rcnn # isort:skip 6 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | # - repo: https://github.com/ambv/black 3 | # rev: stable 4 | # hooks: 5 | # - id: black 6 | # language_version: python3.6 7 | - repo: https://github.com/pre-commit/pre-commit-hooks 8 | rev: v1.2.3 9 | hooks: 10 | - id: flake8 11 | -------------------------------------------------------------------------------- /cvpods/engine/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | 3 | from .hooks import * 4 | from .launch import * 5 | from .predictor import * 6 | from .setup import * 7 | from .trainer import * 8 | 9 | __all__ = [k for k in globals().keys() if not k.startswith("_")] 10 | -------------------------------------------------------------------------------- /tools/dev/README.md: -------------------------------------------------------------------------------- 1 | 2 | ## Some scripts for developers to use, include: 3 | 4 | - `linter.sh`: lint the codebase before commit 5 | - `run_{inference,instant}_tests.sh`: run inference/training for a few iterations. 6 | Note that these tests require 2 GPUs. 7 | - `parse_results.sh`: parse results from a log file. 8 | -------------------------------------------------------------------------------- /cvpods/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | 3 | from .utils import setup_environment 4 | 5 | setup_environment() 6 | 7 | # This line will be programatically read/write by setup.py. 8 | # Leave them at the bottom of this file and don't touch them. 9 | __version__ = "0.1" 10 | -------------------------------------------------------------------------------- /cvpods/checkpoint/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 3 | # File: 4 | 5 | 6 | from . import catalog as _UNUSED # register the handler 7 | from .checkpoint import Checkpointer, PeriodicCheckpointer 8 | from .detection_checkpoint import DetectionCheckpointer 9 | 10 | __all__ = ["Checkpointer", "PeriodicCheckpointer", "DetectionCheckpointer"] 11 | -------------------------------------------------------------------------------- /cvpods/export/README.md: -------------------------------------------------------------------------------- 1 | 2 | This directory contains code to prepare a detectron2 model for deployment. 3 | Currently it supports exporting a detectron2 model to Caffe2 format through ONNX. 4 | 5 | Please see [documentation](https://detectron2.readthedocs.io/tutorials/deployment.html) for its usage. 6 | 7 | 8 | ### Acknowledgements 9 | 10 | Thanks to Mobile Vision team at Facebook for developing the conversion tools. 11 | -------------------------------------------------------------------------------- /cvpods/modeling/nn_utils/scale_grad.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding:utf-8 -*- 3 | from torch.autograd.function import Function 4 | 5 | 6 | class _ScaleGradient(Function): 7 | 8 | @staticmethod 9 | def forward(ctx, input, scale): 10 | ctx.scale = scale 11 | return input 12 | 13 | @staticmethod 14 | def backward(ctx, grad_output): 15 | return grad_output * ctx.scale, None 16 | -------------------------------------------------------------------------------- /cvpods/modeling/losses/__init__.py: -------------------------------------------------------------------------------- 1 | from .dice_loss import dice_loss 2 | from .focal_loss import ( 3 | sigmoid_focal_loss, 4 | sigmoid_focal_loss_jit, 5 | sigmoid_focal_loss_star, 6 | sigmoid_focal_loss_star_jit 7 | ) 8 | from .iou_loss import IOULoss, iou_loss 9 | from .label_smooth_ce_loss import LabelSmoothCELoss, label_smooth_ce_loss 10 | from .reg_l1_loss import reg_l1_loss 11 | from .sigmoid_focal_loss import SigmoidFocalLoss, sigmoid_focal_loss_cuda 12 | from .smooth_l1_loss import smooth_l1_loss 13 | -------------------------------------------------------------------------------- /cvpods/structures/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | from .boxes import Boxes, BoxMode, pairwise_ioa, pairwise_iou 3 | from .image_list import ImageList 4 | from .instances import Instances 5 | from .keypoints import Keypoints, heatmaps_to_keypoints 6 | from .masks import BitMasks, PolygonMasks, polygons_to_bitmask, rasterize_polygons_within_box 7 | from .rotated_boxes import RotatedBoxes 8 | from .rotated_boxes import pairwise_iou as pairwise_iou_rotated 9 | 10 | __all__ = [k for k in globals().keys() if not k.startswith("_")] 11 | -------------------------------------------------------------------------------- /cvpods/data/samplers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | from .distributed_sampler import InferenceSampler, RepeatFactorTrainingSampler, TrainingSampler 3 | from .grouped_batch_sampler import GroupedBatchSampler 4 | from .sampler import DistributedGroupSampler, DistributedSampler, GroupSampler 5 | 6 | __all__ = [ 7 | "GroupedBatchSampler", 8 | "TrainingSampler", 9 | "InferenceSampler", 10 | "RepeatFactorTrainingSampler", 11 | "DistributedSampler", 12 | "GroupSampler", 13 | "DistributedGroupSampler", 14 | ] 15 | -------------------------------------------------------------------------------- /cvpods/data/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | from .build import ( 3 | build_dataset, 4 | build_detection_test_loader, 5 | build_detection_train_loader, 6 | build_transform_gen 7 | ) 8 | from .registry import DATASETS, SAMPLERS, TRANSFORMS 9 | from .wrapped_dataset import ConcatDataset, RepeatDataset 10 | 11 | from . import transforms # isort:skip 12 | # ensure the builtin datasets are registered 13 | from . import datasets, samplers # isort:skip 14 | 15 | 16 | __all__ = [k for k in globals().keys() if not k.startswith("_")] 17 | -------------------------------------------------------------------------------- /datasets/prepare_for_tests.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 3 | 4 | # Download some files needed for running tests. 5 | 6 | cd "${0%/*}" 7 | 8 | BASE=https://dl.fbaipublicfiles.com/detectron2 9 | mkdir -p coco/annotations 10 | 11 | for anno in instances_val2017_100 \ 12 | person_keypoints_val2017_100 \ 13 | instances_minival2014_100 \ 14 | person_keypoints_minival2014_100; do 15 | 16 | dest=coco/annotations/$anno.json 17 | [[ -s $dest ]] && { 18 | echo "$dest exists. Skipping ..." 19 | } || { 20 | wget $BASE/annotations/coco/$anno.json -O $dest 21 | } 22 | done 23 | -------------------------------------------------------------------------------- /cvpods/utils/metrics/accuracy.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | @torch.no_grad() 5 | def accuracy(output, target, topk=(1,)): 6 | """Computes the precision@k for the specified values of k""" 7 | if target.numel() == 0: 8 | return [torch.zeros([], device=output.device)] 9 | maxk = max(topk) 10 | batch_size = target.size(0) 11 | 12 | _, pred = output.topk(maxk, 1, True, True) 13 | pred = pred.t() 14 | correct = pred.eq(target.view(1, -1).expand_as(pred)) 15 | 16 | res = [] 17 | for k in topk: 18 | correct_k = correct[:k].view(-1).float().sum(0) 19 | res.append(correct_k.mul_(100.0 / batch_size)) 20 | return res 21 | -------------------------------------------------------------------------------- /cvpods/layers/csrc/masked_conv2d/masked_conv2d.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | 4 | namespace cvpods { 5 | 6 | void masked_im2col_forward(const at::Tensor im, const at::Tensor mask_h_idx, 7 | const at::Tensor mask_w_idx, at::Tensor col, 8 | const int kernel_h, const int kernel_w, 9 | const int pad_h, const int pad_w); 10 | 11 | void masked_col2im_forward(const at::Tensor col, const at::Tensor mask_h_idx, 12 | const at::Tensor mask_w_idx, at::Tensor im, int height, 13 | int width, int channels); 14 | 15 | } // namespace cvpods 16 | -------------------------------------------------------------------------------- /cvpods/solver/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | from .build import build_lr_scheduler, build_optimizer 3 | from .lars import LARS 4 | from .optimizer_builder import ( 5 | OPTIMIZER_BUILDER, 6 | AdamBuilder, 7 | AdamWBuilder, 8 | OptimizerBuilder, 9 | SGDBuilder, 10 | SGDGateLRBuilder 11 | ) 12 | from .scheduler_builder import ( 13 | SCHEDULER_BUILDER, 14 | BaseSchedulerBuilder, 15 | LambdaLRBuilder, 16 | OneCycleLRBuilder, 17 | PolyLRBuilder, 18 | WarmupCosineLR, 19 | WarmupCosineLRBuilder, 20 | WarmupMultiStepLR, 21 | WarmupMultiStepLRBuilder 22 | ) 23 | 24 | __all__ = [k for k in globals().keys() if not k.startswith("_")] 25 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # output dir 2 | output 3 | instant_test_output 4 | inference_test_output 5 | 6 | *.jpg 7 | *.png 8 | *.txt 9 | 10 | # compilation and distribution 11 | __pycache__ 12 | _ext 13 | *.pyc 14 | *.so 15 | cvpods.egg-info/ 16 | build/ 17 | dist/ 18 | wheels/ 19 | 20 | tools/cvpods_test 21 | tools/cvpods_train 22 | 23 | # pytorch/python/numpy formats 24 | *.pth 25 | *.pkl 26 | *.npy 27 | 28 | # ipython/jupyter notebooks 29 | *.ipynb 30 | **/.ipynb_checkpoints/ 31 | 32 | # Editor temporaries 33 | *.swn 34 | *.swo 35 | *.swp 36 | *~ 37 | 38 | # temporary files 39 | *.DS_Store 40 | 41 | # IDE editor settings 42 | .idea 43 | .vscode/ 44 | 45 | # project dirs 46 | /cvpods/model_zoo/configs 47 | /datasets 48 | /models 49 | /playground 50 | -------------------------------------------------------------------------------- /cvpods/data/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Megvii, Inc. and its affiliates. All Rights Reserved 2 | 3 | from .citypersons import CityPersonsDataset 4 | from .cityscapes import CityScapesDataset 5 | from .coco import COCODataset 6 | from .crowdhuman import CrowdHumanDataset 7 | from .imagenet import ImageNetDataset 8 | from .lvis import LVISDataset 9 | from .objects365 import Objects365Dataset 10 | from .voc import VOCDataset 11 | from .widerface import WiderFaceDataset 12 | 13 | __all__ = [ 14 | "COCODataset", 15 | "VOCDataset", 16 | "CityScapesDataset", 17 | "ImageNetDataset", 18 | "WiderFaceDataset", 19 | "LVISDataset", 20 | "CityPersonsDataset", 21 | "Objects365Dataset", 22 | "CrowdHumanDataset", 23 | ] 24 | -------------------------------------------------------------------------------- /cvpods/modeling/losses/reg_l1_loss.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding:utf-8 -*- 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | 6 | from cvpods.modeling.nn_utils.feature_utils import gather_feature 7 | 8 | 9 | class reg_l1_loss(nn.Module): 10 | 11 | def __init__(self): 12 | super(reg_l1_loss, self).__init__() 13 | 14 | def forward(self, output, mask, index, target): 15 | pred = gather_feature(output, index, use_transform=True) 16 | mask = mask.unsqueeze(dim=2).expand_as(pred).float() 17 | # loss = F.l1_loss(pred * mask, target * mask, reduction='elementwise_mean') 18 | loss = F.l1_loss(pred * mask, target * mask, reduction='sum') 19 | loss = loss / (mask.sum() + 1e-4) 20 | return loss 21 | -------------------------------------------------------------------------------- /cvpods/layers/shape_spec.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 3 | from collections import namedtuple 4 | 5 | 6 | class ShapeSpec(namedtuple("_ShapeSpec", ["channels", "height", "width", "stride"])): 7 | """ 8 | A simple structure that contains basic shape specification about a tensor. 9 | It is often used as the auxiliary inputs/outputs of models, 10 | to obtain the shape inference ability among pytorch modules. 11 | 12 | Attributes: 13 | channels: 14 | height: 15 | width: 16 | stride: 17 | """ 18 | 19 | def __new__(cls, *, channels=None, height=None, width=None, stride=None): 20 | return super().__new__(cls, channels, height, width, stride) 21 | -------------------------------------------------------------------------------- /cvpods/layers/csrc/tree_filter/boruvka.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | // a structure to represent a weighted edge in graph 4 | struct Edge 5 | { 6 | int src, dest; 7 | float weight; 8 | }; 9 | 10 | // a structure to represent a connected, undirected 11 | // and weighted graph as a collection of edges. 12 | struct Graph 13 | { 14 | // V-> Number of vertices, E-> Number of edges 15 | int V, E; 16 | 17 | // graph is represented as an array of edges. 18 | // Since the graph is undirected, the edge 19 | // from src to dest is also edge from dest 20 | // to src. Both are counted as 1 edge here. 21 | Edge* edge; 22 | }; 23 | 24 | extern struct Graph* createGraph(int V, int E); 25 | extern void boruvkaMST(struct Graph* graph, int * edge_out); 26 | 27 | -------------------------------------------------------------------------------- /cvpods/layers/csrc/tree_filter/boruvka_rst.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | // a structure to represent a weighted edge in graph 4 | struct Edge 5 | { 6 | int src, dest; 7 | float weight; 8 | }; 9 | 10 | // a structure to represent a connected, undirected 11 | // and weighted graph as a collection of edges. 12 | struct Graph 13 | { 14 | // V-> Number of vertices, E-> Number of edges 15 | int V, E; 16 | 17 | // graph is represented as an array of edges. 18 | // Since the graph is undirected, the edge 19 | // from src to dest is also edge from dest 20 | // to src. Both are counted as 1 edge here. 21 | Edge* edge; 22 | }; 23 | 24 | extern struct Graph* create_graph(int V, int E); 25 | extern void boruvka_rst(struct Graph* graph, int * edge_out); 26 | 27 | -------------------------------------------------------------------------------- /cvpods/layers/rotated_boxes.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | from __future__ import absolute_import, division, print_function, unicode_literals 3 | 4 | # import torch 5 | from cvpods import _C 6 | 7 | 8 | def pairwise_iou_rotated(boxes1, boxes2): 9 | """ 10 | Return intersection-over-union (Jaccard index) of boxes. 11 | 12 | Both sets of boxes are expected to be in 13 | (x_center, y_center, width, height, angle) format. 14 | 15 | Arguments: 16 | boxes1 (Tensor[N, 5]) 17 | boxes2 (Tensor[M, 5]) 18 | 19 | Returns: 20 | iou (Tensor[N, M]): the NxM matrix containing the pairwise 21 | IoU values for every element in boxes1 and boxes2 22 | """ 23 | return _C.box_iou_rotated(boxes1, boxes2) 24 | -------------------------------------------------------------------------------- /cvpods/modeling/nn_utils/feature_utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding:utf-8 -*- 3 | 4 | 5 | def gather_feature(fmap, index, mask=None, use_transform=False): 6 | """ 7 | used for Centernet 8 | """ 9 | if use_transform: 10 | # change a (N, C, H, W) tenor to (N, HxW, C) shape 11 | batch, channel = fmap.shape[:2] 12 | fmap = fmap.view(batch, channel, -1).permute((0, 2, 1)).contiguous() 13 | 14 | dim = fmap.size(-1) 15 | index = index.unsqueeze(len(index.shape)).expand(*index.shape, dim) 16 | fmap = fmap.gather(dim=1, index=index) 17 | if mask is not None: 18 | # this part is not called in Res18 dcn COCO 19 | mask = mask.unsqueeze(2).expand_as(fmap) 20 | fmap = fmap[mask] 21 | fmap = fmap.reshape(-1, dim) 22 | return fmap 23 | -------------------------------------------------------------------------------- /cvpods/layers/csrc/PSROIPool/psroi_pool_cuda.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | namespace cvpods { 9 | at::Tensor psroi_pooling_forward_cuda( 10 | at::Tensor& features, 11 | at::Tensor& rois, 12 | at::Tensor& mappingchannel, 13 | const int pooled_height, 14 | const int pooled_width, 15 | const float spatial_scale, 16 | const int group_size, 17 | const int output_dim); 18 | 19 | at::Tensor psroi_pooling_backward_cuda( 20 | at::Tensor& top_grad, 21 | at::Tensor& rois, 22 | at::Tensor& mappingchannel, 23 | const int batch_size, 24 | const int bottom_dim, 25 | const int bottom_height, 26 | const int bottom_width, 27 | const float spatial_scale); 28 | } 29 | -------------------------------------------------------------------------------- /cvpods/modeling/meta_arch/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 3 | 4 | # import all the meta_arch, so they will be registered 5 | 6 | from .borderdet import BorderDet 7 | from .centernet import CenterNet 8 | from .dynamic4seg import DynamicNet4Seg 9 | from .efficientdet import EfficientDet 10 | from .fcn import FCNHead 11 | from .fcos import FCOS, FCOSHead, FCOSDynamicHead, FCOSFixHead 12 | from .free_anchor import FreeAnchor 13 | from .panoptic_fpn import PanopticFPN 14 | from .pointrend import CoarseMaskHead, PointRendROIHeads, PointRendSemSegHead, StandardPointHead 15 | from .rcnn import GeneralizedRCNN, ProposalNetwork 16 | from .reppoints import RepPoints 17 | from .retinanet import RetinaNet 18 | from .semantic_seg import SemanticSegmentor, SemSegFPNHead 19 | from .ssd import SSD 20 | from .tensormask import TensorMask 21 | from .yolov3 import YOLOv3 22 | -------------------------------------------------------------------------------- /cvpods/modeling/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | import torch 3 | 4 | from cvpods.layers import ShapeSpec 5 | 6 | # from .anchor_generator import build_anchor_generator 7 | from .backbone import FPN, Backbone, ResNet, ResNetBlockBase, build_resnet_backbone, make_stage 8 | from .meta_arch import GeneralizedRCNN, PanopticFPN, ProposalNetwork, RetinaNet, SemanticSegmentor 9 | from .postprocessing import detector_postprocess 10 | from .roi_heads import ROIHeads, StandardROIHeads 11 | from .test_time_augmentation import DatasetMapperTTA, GeneralizedRCNNWithTTA, TTAWarper 12 | 13 | _EXCLUDE = {"torch", "ShapeSpec"} 14 | __all__ = [k for k in globals().keys() if k not in _EXCLUDE and not k.startswith("_")] 15 | 16 | assert ( 17 | torch.Tensor([1]) == torch.Tensor([2]) 18 | ).dtype == torch.bool, ("Your Pytorch is too old. " 19 | "Please update to contain https://github.com/pytorch/pytorch/pull/21113") 20 | -------------------------------------------------------------------------------- /cvpods/layers/csrc/ml_nms/ml_nms.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | #pragma once 3 | #include 4 | 5 | namespace cvpods { 6 | #ifdef WITH_CUDA 7 | at::Tensor ml_nms_cuda(const at::Tensor boxes, float nms_overlap_thresh); 8 | #endif 9 | 10 | 11 | inline at::Tensor ml_nms(const at::Tensor& dets, 12 | const at::Tensor& scores, 13 | const at::Tensor& labels, 14 | const float threshold) { 15 | 16 | if (dets.type().is_cuda()) { 17 | #ifdef WITH_CUDA 18 | // TODO raise error if not compiled with CUDA 19 | if (dets.numel() == 0) 20 | return at::empty({0}, dets.options().dtype(at::kLong).device(at::kCPU)); 21 | auto b = at::cat({dets, scores.unsqueeze(1), labels.unsqueeze(1)}, 1); 22 | return ml_nms_cuda(b, threshold); 23 | #else 24 | AT_ERROR("Not compiled with GPU support"); 25 | #endif 26 | } 27 | AT_ERROR("CPU version not implemented"); 28 | } 29 | 30 | } 31 | -------------------------------------------------------------------------------- /cvpods/modeling/backbone/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | 3 | from .backbone import Backbone 4 | from .bifpn import BiFPN, build_efficientnet_bifpn_backbone 5 | from .darknet import Darknet, build_darknet_backbone 6 | from .dynamic_arch import DynamicNetwork, build_dynamic_backbone 7 | from .efficientnet import EfficientNet, build_efficientnet_backbone 8 | from .fpn import ( 9 | FPN, 10 | _assert_strides_are_log2_contiguous, 11 | build_retinanet_mobilenetv2_fpn_p5_backbone, 12 | build_retinanet_resnet_fpn_p5_backbone 13 | ) 14 | from .mobilenet import InvertedResBlock, MobileNetV2, MobileStem, build_mobilenetv2_backbone 15 | from .resnet import ResNet, ResNetBlockBase, build_resnet_backbone, make_stage 16 | # TODO can expose more resnet blocks after careful consideration 17 | from .shufflenet import ShuffleNetV2, ShuffleV2Block, build_shufflenetv2_backbone 18 | from .snet import SNet, build_snet_backbone 19 | from .transformer import Transformer -------------------------------------------------------------------------------- /tools/dev/run_instant_tests.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 3 | 4 | BIN="python tools/train_net.py" 5 | OUTPUT="instant_test_output" 6 | NUM_GPUS=2 7 | 8 | CFG_LIST=( "${@:1}" ) 9 | if [ ${#CFG_LIST[@]} -eq 0 ]; then 10 | CFG_LIST=( ./configs/quick_schedules/*instant_test.yaml ) 11 | fi 12 | 13 | echo "========================================================================" 14 | echo "Configs to run:" 15 | echo "${CFG_LIST[@]}" 16 | echo "========================================================================" 17 | 18 | for cfg in "${CFG_LIST[@]}"; do 19 | echo "========================================================================" 20 | echo "Running $cfg ..." 21 | echo "========================================================================" 22 | $BIN --num-gpus $NUM_GPUS --config-file "$cfg" \ 23 | SOLVER.IMS_PER_BATCH $(($NUM_GPUS * 2)) \ 24 | OUTPUT_DIR "$OUTPUT" 25 | rm -rf "$OUTPUT" 26 | done 27 | 28 | -------------------------------------------------------------------------------- /tools/dev/linter.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 3 | 4 | # Run this script at project root by "./dev/linter.sh" before you commit 5 | 6 | { 7 | black --version | grep "19.3b0" > /dev/null 8 | } || { 9 | echo "Linter requires black==19.3b0 !" 10 | exit 1 11 | } 12 | 13 | set -v 14 | 15 | echo "Running isort ..." 16 | isort -y --multi-line 3 --trailing-comma -sp . --skip datasets --skip docs --skip-glob '*/__init__.py' --atomic 17 | 18 | echo "Running black ..." 19 | black -l 100 . 20 | 21 | echo "Running flake8 ..." 22 | if [ -x "$(command -v flake8-3)" ]; then 23 | flake8-3 . 24 | else 25 | python3 -m flake8 . 26 | fi 27 | 28 | # echo "Running mypy ..." 29 | # Pytorch does not have enough type annotations 30 | # mypy cvpods/solver cvpods/structures cvpods/config 31 | 32 | echo "Running clang-format ..." 33 | find . -regex ".*\.\(cpp\|c\|cc\|cu\|cxx\|h\|hh\|hpp\|hxx\|tcc\|mm\|m\)" -print0 | xargs -0 clang-format -i 34 | 35 | command -v arc > /dev/null && arc lint 36 | -------------------------------------------------------------------------------- /cvpods/evaluation/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | from .build import build_evaluator 3 | from .citypersons_evaluation import CityPersonsEvaluator 4 | from .cityscapes_evaluation import CityscapesEvaluator 5 | from .classification_evaluation import ClassificationEvaluator 6 | from .coco_evaluation import COCOEvaluator 7 | from .crowdhuman_evaluation import CrowdHumanEvaluator 8 | from .evaluator import DatasetEvaluator, DatasetEvaluators, inference_context, inference_on_dataset 9 | from .lvis_evaluation import LVISEvaluator 10 | from .panoptic_evaluation import COCOPanopticEvaluator 11 | from .pascal_voc_evaluation import PascalVOCDetectionEvaluator 12 | from .registry import EVALUATOR 13 | from .rotated_coco_evaluation import RotatedCOCOEvaluator 14 | from .sem_seg_evaluation import SemSegEvaluator 15 | from .testing import print_csv_format, verify_results 16 | from .widerface_evaluation import WiderFaceEvaluator 17 | 18 | __all__ = [k for k in globals().keys() if not k.startswith("_")] 19 | -------------------------------------------------------------------------------- /cvpods/utils/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding:utf-8 -*- 3 | 4 | from .benchmark import Timer, benchmark, timeit 5 | from .distributed import comm 6 | from .dump import ( 7 | CommonMetricPrinter, 8 | EventStorage, 9 | EventWriter, 10 | HistoryBuffer, 11 | JSONWriter, 12 | TensorboardXWriter, 13 | create_small_table, 14 | create_table_with_header, 15 | get_event_storage, 16 | log_every_n, 17 | log_every_n_seconds, 18 | log_first_n, 19 | setup_logger 20 | ) 21 | from .env import collect_env_info, seed_all_rng, setup_custom_environment, setup_environment 22 | from .file import PathHandler, PathManager, PicklableWrapper, download, file_lock, get_cache_dir 23 | from .imports import dynamic_import 24 | from .memory import retry_if_cuda_oom 25 | from .metrics import accuracy 26 | from .registry import Registry 27 | from .visualizer import ColorMode, VideoVisualizer, VisImage, Visualizer, colormap, random_color 28 | 29 | __all__ = [k for k in globals().keys() if not k.startswith("_")] 30 | -------------------------------------------------------------------------------- /cvpods/utils/imports.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- encoding: utf-8 -*- 3 | ''' 4 | @File : imports.py 5 | @Time : 2020/05/07 23:59:19 6 | @Author : Benjin Zhu 7 | @Contact : poodarchu@gmail.com 8 | @Last Modified by : Benjin Zhu 9 | @Last Modified time : 2020/05/07 23:59:19 10 | ''' 11 | 12 | import imp 13 | 14 | 15 | def dynamic_import(config_name, config_path): 16 | """ 17 | Dynamic import a project. 18 | 19 | Args: 20 | config_name (str): module name 21 | config_path (str): the dir that contains the .py with this module. 22 | 23 | Examples:: 24 | >>> root = "/path/to/retinanet/" 25 | >>> project = root + "retinanet.res50.fpn.coco.800size.1x.mrcnn_sigmoid" 26 | >>> cfg = dynamic_import("config", project).config 27 | >>> net = dynamic_import("net", project) 28 | """ 29 | fp, pth, desc = imp.find_module(config_name, [config_path]) 30 | 31 | return imp.load_module(config_name, fp, pth, desc) 32 | -------------------------------------------------------------------------------- /cvpods/layers/csrc/box_iou_rotated/box_iou_rotated.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | #pragma once 3 | #include 4 | 5 | namespace cvpods { 6 | 7 | at::Tensor box_iou_rotated_cpu( 8 | const at::Tensor& boxes1, 9 | const at::Tensor& boxes2); 10 | 11 | #ifdef WITH_CUDA 12 | at::Tensor box_iou_rotated_cuda( 13 | const at::Tensor& boxes1, 14 | const at::Tensor& boxes2); 15 | #endif 16 | 17 | // Interface for Python 18 | // inline is needed to prevent multiple function definitions when this header is 19 | // included by different cpps 20 | inline at::Tensor box_iou_rotated( 21 | const at::Tensor& boxes1, 22 | const at::Tensor& boxes2) { 23 | assert(boxes1.device().is_cuda() == boxes2.device().is_cuda()); 24 | if (boxes1.device().is_cuda()) { 25 | #ifdef WITH_CUDA 26 | return box_iou_rotated_cuda(boxes1, boxes2); 27 | #else 28 | AT_ERROR("Not compiled with GPU support"); 29 | #endif 30 | } 31 | 32 | return box_iou_rotated_cpu(boxes1, boxes2); 33 | } 34 | 35 | } // namespace cvpods 36 | -------------------------------------------------------------------------------- /cvpods/modeling/losses/dice_loss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | def dice_loss(input, target): 5 | r""" 6 | Dice loss defined in the V-Net paper as: 7 | 8 | Loss_dice = 1 - D 9 | 10 | 2 * sum(p_i * g_i) 11 | D = ------------------------------ 12 | sum(p_i ^ 2) + sum(g_i ^ 2) 13 | 14 | where the sums run over the N mask pixels (i = 1 ... N), of the predicted binary segmentation 15 | pixel p_i ∈ P and the ground truth binary pixel g_i ∈ G. 16 | 17 | Args: 18 | input (Tensor): predicted binary mask, each pixel value should be in range [0, 1]. 19 | target (Tensor): ground truth binary mask. 20 | 21 | Returns: 22 | Tensor: dice loss. 23 | """ 24 | assert input.shape[-2:] == target.shape[-2:] 25 | input = input.view(input.size(0), -1).float() 26 | target = target.view(target.size(0), -1).float() 27 | 28 | d = ( 29 | 2 * torch.sum(input * target, dim=1) 30 | ) / ( 31 | torch.sum(input * input, dim=1) + torch.sum(target * target, dim=1) + 1e-4 32 | ) 33 | 34 | return 1 - d 35 | -------------------------------------------------------------------------------- /cvpods/utils/file/serialize.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | import cloudpickle 3 | 4 | 5 | class PicklableWrapper(object): 6 | """ 7 | Wrap an object to make it more picklable, note that it uses 8 | heavy weight serialization libraries that are slower than pickle. 9 | It's best to use it only on closures (which are usually not picklable). 10 | 11 | This is a simplified version of 12 | https://github.com/joblib/joblib/blob/master/joblib/externals/loky/cloudpickle_wrapper.py 13 | """ 14 | 15 | def __init__(self, obj): 16 | self._obj = obj 17 | 18 | def __reduce__(self): 19 | s = cloudpickle.dumps(self._obj) 20 | return cloudpickle.loads, (s,) 21 | 22 | def __call__(self, *args, **kwargs): 23 | return self._obj(*args, **kwargs) 24 | 25 | def __getattr__(self, attr): 26 | # Ensure that the wrapped object can be used seamlessly as the previous object. 27 | if attr not in ["_obj"]: 28 | return getattr(self._obj, attr) 29 | return getattr(self, attr) 30 | -------------------------------------------------------------------------------- /cvpods/layers/csrc/border_align/border_align.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | #include 5 | 6 | namespace cvpods { 7 | 8 | at::Tensor border_align_cuda_forward( 9 | const at::Tensor& feature, 10 | const at::Tensor& boxes, 11 | const at::Tensor& wh, 12 | const int pool_size); 13 | 14 | 15 | at::Tensor border_align_cuda_backward( 16 | const at::Tensor& gradOutput, 17 | const at::Tensor& feature, 18 | const at::Tensor& boxes, 19 | const at::Tensor& wh, 20 | const int pool_size); 21 | 22 | 23 | at::Tensor BorderAlign_Forward( 24 | const at::Tensor& feature, 25 | const at::Tensor& boxes, 26 | const at::Tensor& wh, 27 | const int pool_size) { 28 | return border_align_cuda_forward(feature, boxes, wh, pool_size); 29 | } 30 | 31 | 32 | at::Tensor BorderAlign_Backward( 33 | const at::Tensor& gradOutput, 34 | const at::Tensor& feature, 35 | const at::Tensor& boxes, 36 | const at::Tensor& wh, 37 | const int pool_size) { 38 | return border_align_cuda_backward(gradOutput, feature, boxes, wh, pool_size); 39 | } 40 | 41 | } // namespace cvpods -------------------------------------------------------------------------------- /cvpods/layers/csrc/nms_rotated/nms_rotated.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | #pragma once 3 | #include 4 | 5 | namespace cvpods { 6 | 7 | at::Tensor nms_rotated_cpu( 8 | const at::Tensor& dets, 9 | const at::Tensor& scores, 10 | const float iou_threshold); 11 | 12 | #ifdef WITH_CUDA 13 | at::Tensor nms_rotated_cuda( 14 | const at::Tensor& dets, 15 | const at::Tensor& scores, 16 | const float iou_threshold); 17 | #endif 18 | 19 | // Interface for Python 20 | // inline is needed to prevent multiple function definitions when this header is 21 | // included by different cpps 22 | inline at::Tensor nms_rotated( 23 | const at::Tensor& dets, 24 | const at::Tensor& scores, 25 | const float iou_threshold) { 26 | assert(dets.device().is_cuda() == scores.device().is_cuda()); 27 | if (dets.device().is_cuda()) { 28 | #ifdef WITH_CUDA 29 | return nms_rotated_cuda(dets, scores, iou_threshold); 30 | #else 31 | AT_ERROR("Not compiled with GPU support"); 32 | #endif 33 | } 34 | 35 | return nms_rotated_cpu(dets, scores, iou_threshold); 36 | } 37 | 38 | } // namespace cvpods 39 | -------------------------------------------------------------------------------- /cvpods/layers/activation_funcs.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | 5 | # Ref: 6 | # https://medium.com/the-artificial-impostor/more-memory-efficient-swish-activation-function-e07c22c12a76 7 | class SwishImplementation(torch.autograd.Function): 8 | """ 9 | Swish activation function memory-efficient implementation. 10 | 11 | This implementation explicitly processes the gradient, it keeps a copy of the input tensor, 12 | and uses it to calculate the gradient during the back-propagation phase. 13 | """ 14 | @staticmethod 15 | def forward(ctx, i): 16 | result = i * torch.sigmoid(i) 17 | ctx.save_for_backward(i) 18 | return result 19 | 20 | @staticmethod 21 | def backward(ctx, grad_output): 22 | i = ctx.saved_variables[0] 23 | sigmoid_i = torch.sigmoid(i) 24 | return grad_output * (sigmoid_i * (1 + i * (1 - sigmoid_i))) 25 | 26 | 27 | class MemoryEfficientSwish(nn.Module): 28 | def forward(self, x): 29 | return SwishImplementation.apply(x) 30 | 31 | 32 | class Swish(nn.Module): 33 | """ 34 | Implement the Swish activation function. 35 | See: https://arxiv.org/abs/1710.05941 for more details. 36 | """ 37 | def forward(self, x): 38 | return x * torch.sigmoid(x) 39 | -------------------------------------------------------------------------------- /cvpods_playground/fcos.res50.1x/net.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from cvpods.layers import ShapeSpec 4 | from cvpods.modeling.anchor_generator import ShiftGenerator 5 | from cvpods.modeling.backbone import Backbone 6 | from cvpods.modeling.backbone.fpn import build_retinanet_resnet_fpn_backbone 7 | from cvpods.modeling.meta_arch import FCOS, FCOSHead 8 | 9 | 10 | def build_backbone(cfg, input_shape=None): 11 | """ 12 | Build a backbone from `cfg.MODEL.BACKBONE.NAME`. 13 | 14 | Returns: 15 | an instance of :class:`Backbone` 16 | """ 17 | if input_shape is None: 18 | input_shape = ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN)) 19 | 20 | backbone = build_retinanet_resnet_fpn_backbone(cfg, input_shape) 21 | assert isinstance(backbone, Backbone) 22 | return backbone 23 | 24 | 25 | def build_shift_generator(cfg, input_shape): 26 | return ShiftGenerator(cfg, input_shape) 27 | 28 | 29 | def build_head(cfg, input_shape): 30 | return FCOSHead(cfg, input_shape) 31 | 32 | 33 | def build_model(cfg): 34 | cfg.build_backbone = build_backbone 35 | cfg.build_shift_generator = build_shift_generator 36 | cfg.build_head = build_head 37 | 38 | model = FCOS(cfg) 39 | logger = logging.getLogger(__name__) 40 | logger.info("Model:\n{}".format(model)) 41 | return model 42 | -------------------------------------------------------------------------------- /cvpods_playground/fcos.res50.1x.fix.d4/net.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from cvpods.layers import ShapeSpec 4 | from cvpods.modeling.anchor_generator import ShiftGenerator 5 | from cvpods.modeling.backbone import Backbone 6 | from cvpods.modeling.backbone.fpn import build_retinanet_resnet_fpn_backbone 7 | from cvpods.modeling.meta_arch import FCOS, FCOSFixHead 8 | 9 | 10 | def build_backbone(cfg, input_shape=None): 11 | """ 12 | Build a backbone from `cfg.MODEL.BACKBONE.NAME`. 13 | 14 | Returns: 15 | an instance of :class:`Backbone` 16 | """ 17 | if input_shape is None: 18 | input_shape = ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN)) 19 | 20 | backbone = build_retinanet_resnet_fpn_backbone(cfg, input_shape) 21 | assert isinstance(backbone, Backbone) 22 | return backbone 23 | 24 | 25 | def build_shift_generator(cfg, input_shape): 26 | return ShiftGenerator(cfg, input_shape) 27 | 28 | 29 | def build_head(cfg, input_shape): 30 | return FCOSFixHead(cfg, input_shape) 31 | 32 | 33 | def build_model(cfg): 34 | cfg.build_backbone = build_backbone 35 | cfg.build_shift_generator = build_shift_generator 36 | cfg.build_head = build_head 37 | 38 | model = FCOS(cfg) 39 | logger = logging.getLogger(__name__) 40 | logger.info("Model:\n{}".format(model)) 41 | return model 42 | -------------------------------------------------------------------------------- /cvpods_playground/fcos.res50.1x.fix.d8/net.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from cvpods.layers import ShapeSpec 4 | from cvpods.modeling.anchor_generator import ShiftGenerator 5 | from cvpods.modeling.backbone import Backbone 6 | from cvpods.modeling.backbone.fpn import build_retinanet_resnet_fpn_backbone 7 | from cvpods.modeling.meta_arch import FCOS, FCOSFixHead 8 | 9 | 10 | def build_backbone(cfg, input_shape=None): 11 | """ 12 | Build a backbone from `cfg.MODEL.BACKBONE.NAME`. 13 | 14 | Returns: 15 | an instance of :class:`Backbone` 16 | """ 17 | if input_shape is None: 18 | input_shape = ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN)) 19 | 20 | backbone = build_retinanet_resnet_fpn_backbone(cfg, input_shape) 21 | assert isinstance(backbone, Backbone) 22 | return backbone 23 | 24 | 25 | def build_shift_generator(cfg, input_shape): 26 | return ShiftGenerator(cfg, input_shape) 27 | 28 | 29 | def build_head(cfg, input_shape): 30 | return FCOSFixHead(cfg, input_shape) 31 | 32 | 33 | def build_model(cfg): 34 | cfg.build_backbone = build_backbone 35 | cfg.build_shift_generator = build_shift_generator 36 | cfg.build_head = build_head 37 | 38 | model = FCOS(cfg) 39 | logger = logging.getLogger(__name__) 40 | logger.info("Model:\n{}".format(model)) 41 | return model 42 | -------------------------------------------------------------------------------- /cvpods_playground/fcos.res50.1x.dynamic.d4.lambda-0_1/net.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from cvpods.layers import ShapeSpec 4 | from cvpods.modeling.anchor_generator import ShiftGenerator 5 | from cvpods.modeling.backbone import Backbone 6 | from cvpods.modeling.backbone.fpn import build_retinanet_resnet_fpn_backbone 7 | from cvpods.modeling.meta_arch import FCOS, FCOSDynamicHead 8 | 9 | 10 | def build_backbone(cfg, input_shape=None): 11 | """ 12 | Build a backbone from `cfg.MODEL.BACKBONE.NAME`. 13 | 14 | Returns: 15 | an instance of :class:`Backbone` 16 | """ 17 | if input_shape is None: 18 | input_shape = ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN)) 19 | 20 | backbone = build_retinanet_resnet_fpn_backbone(cfg, input_shape) 21 | assert isinstance(backbone, Backbone) 22 | return backbone 23 | 24 | 25 | def build_shift_generator(cfg, input_shape): 26 | return ShiftGenerator(cfg, input_shape) 27 | 28 | 29 | def build_head(cfg, input_shape): 30 | return FCOSDynamicHead(cfg, input_shape) 31 | 32 | 33 | def build_model(cfg): 34 | cfg.build_backbone = build_backbone 35 | cfg.build_shift_generator = build_shift_generator 36 | cfg.build_head = build_head 37 | 38 | model = FCOS(cfg) 39 | logger = logging.getLogger(__name__) 40 | logger.info("Model:\n{}".format(model)) 41 | return model 42 | -------------------------------------------------------------------------------- /cvpods_playground/fcos.res50.1x.dynamic.d8.lambda-0/net.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from cvpods.layers import ShapeSpec 4 | from cvpods.modeling.anchor_generator import ShiftGenerator 5 | from cvpods.modeling.backbone import Backbone 6 | from cvpods.modeling.backbone.fpn import build_retinanet_resnet_fpn_backbone 7 | from cvpods.modeling.meta_arch import FCOS, FCOSDynamicHead 8 | 9 | 10 | def build_backbone(cfg, input_shape=None): 11 | """ 12 | Build a backbone from `cfg.MODEL.BACKBONE.NAME`. 13 | 14 | Returns: 15 | an instance of :class:`Backbone` 16 | """ 17 | if input_shape is None: 18 | input_shape = ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN)) 19 | 20 | backbone = build_retinanet_resnet_fpn_backbone(cfg, input_shape) 21 | assert isinstance(backbone, Backbone) 22 | return backbone 23 | 24 | 25 | def build_shift_generator(cfg, input_shape): 26 | return ShiftGenerator(cfg, input_shape) 27 | 28 | 29 | def build_head(cfg, input_shape): 30 | return FCOSDynamicHead(cfg, input_shape) 31 | 32 | 33 | def build_model(cfg): 34 | cfg.build_backbone = build_backbone 35 | cfg.build_shift_generator = build_shift_generator 36 | cfg.build_head = build_head 37 | 38 | model = FCOS(cfg) 39 | logger = logging.getLogger(__name__) 40 | logger.info("Model:\n{}".format(model)) 41 | return model 42 | -------------------------------------------------------------------------------- /cvpods_playground/fcos.res50.1x.dynamic.d8.lambda-0_1/net.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from cvpods.layers import ShapeSpec 4 | from cvpods.modeling.anchor_generator import ShiftGenerator 5 | from cvpods.modeling.backbone import Backbone 6 | from cvpods.modeling.backbone.fpn import build_retinanet_resnet_fpn_backbone 7 | from cvpods.modeling.meta_arch import FCOS, FCOSDynamicHead 8 | 9 | 10 | def build_backbone(cfg, input_shape=None): 11 | """ 12 | Build a backbone from `cfg.MODEL.BACKBONE.NAME`. 13 | 14 | Returns: 15 | an instance of :class:`Backbone` 16 | """ 17 | if input_shape is None: 18 | input_shape = ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN)) 19 | 20 | backbone = build_retinanet_resnet_fpn_backbone(cfg, input_shape) 21 | assert isinstance(backbone, Backbone) 22 | return backbone 23 | 24 | 25 | def build_shift_generator(cfg, input_shape): 26 | return ShiftGenerator(cfg, input_shape) 27 | 28 | 29 | def build_head(cfg, input_shape): 30 | return FCOSDynamicHead(cfg, input_shape) 31 | 32 | 33 | def build_model(cfg): 34 | cfg.build_backbone = build_backbone 35 | cfg.build_shift_generator = build_shift_generator 36 | cfg.build_head = build_head 37 | 38 | model = FCOS(cfg) 39 | logger = logging.getLogger(__name__) 40 | logger.info("Model:\n{}".format(model)) 41 | return model 42 | -------------------------------------------------------------------------------- /cvpods_playground/fcos.res50.1x.dynamic.d8.lambda-0_8/net.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from cvpods.layers import ShapeSpec 4 | from cvpods.modeling.anchor_generator import ShiftGenerator 5 | from cvpods.modeling.backbone import Backbone 6 | from cvpods.modeling.backbone.fpn import build_retinanet_resnet_fpn_backbone 7 | from cvpods.modeling.meta_arch import FCOS, FCOSDynamicHead 8 | 9 | 10 | def build_backbone(cfg, input_shape=None): 11 | """ 12 | Build a backbone from `cfg.MODEL.BACKBONE.NAME`. 13 | 14 | Returns: 15 | an instance of :class:`Backbone` 16 | """ 17 | if input_shape is None: 18 | input_shape = ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN)) 19 | 20 | backbone = build_retinanet_resnet_fpn_backbone(cfg, input_shape) 21 | assert isinstance(backbone, Backbone) 22 | return backbone 23 | 24 | 25 | def build_shift_generator(cfg, input_shape): 26 | return ShiftGenerator(cfg, input_shape) 27 | 28 | 29 | def build_head(cfg, input_shape): 30 | return FCOSDynamicHead(cfg, input_shape) 31 | 32 | 33 | def build_model(cfg): 34 | cfg.build_backbone = build_backbone 35 | cfg.build_shift_generator = build_shift_generator 36 | cfg.build_head = build_head 37 | 38 | model = FCOS(cfg) 39 | logger = logging.getLogger(__name__) 40 | logger.info("Model:\n{}".format(model)) 41 | return model 42 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [isort] 2 | line_length = 100 3 | multi_line_output = 3 4 | balanced_wrapping = True 5 | skip = tools/test_net.py, tools/train_net.py 6 | known_standard_library = setuptools,mock 7 | known_myself = cvpods 8 | known_third_party = appdirs,colorama,easydict,portalocker,yacs,termcolor,tabulate,tqdm,psutil,pkg_resources 9 | known_data_processing = cv2,numpy,scipy,PIL,matplotlib 10 | known_datasets = pycocotools,cityscapesscripts,lvis 11 | known_deeplearning = torch,torchvision,caffe2,onnx 12 | sections = FUTURE,STDLIB,THIRDPARTY,data_processing,datasets,deeplearning,myself,FIRSTPARTY,LOCALFOLDER 13 | no_lines_before=STDLIB,THIRDPARTY,datasets 14 | default_section = FIRSTPARTY 15 | 16 | [flake8] 17 | ignore = W503, E221 18 | max-line-length = 100 19 | max-complexity = 18 20 | select = B,C,E,F,W,T4,B9 21 | exclude = build,__init__.py 22 | 23 | [pep8] 24 | ignore = W503, E203, E221, E402, E741, C901, W504, E731, F541, E722 25 | max-line-length = 100 26 | 27 | [yapf] 28 | based_on_style = pep8 29 | spaces_before_comment = 4 30 | split_before_logical_operator = true 31 | 32 | [mypy] 33 | python_version=3.6 34 | ignore_missing_imports = True 35 | warn_unused_configs = True 36 | disallow_untyped_defs = True 37 | check_untyped_defs = True 38 | warn_unused_ignores = True 39 | warn_redundant_casts = True 40 | show_column_numbers = True 41 | follow_imports = silent 42 | allow_redefinition = True 43 | ; Require all functions to be annotated 44 | disallow_incomplete_defs = True 45 | -------------------------------------------------------------------------------- /cvpods/configs/yolo_config.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- encoding: utf-8 -*- 3 | ''' 4 | @File : yolo_config.py 5 | @Time : 2020/05/07 23:55:49 6 | @Author : Benjin Zhu 7 | @Contact : poodarchu@gmail.com 8 | @Last Modified by : Benjin Zhu 9 | @Last Modified time : 2020/05/07 23:55:49 10 | ''' 11 | 12 | from .base_detection_config import BaseDetectionConfig 13 | 14 | _config_dict = dict( 15 | MODEL=dict( 16 | PIXEL_MEAN=(0.485, 0.456, 0.406), 17 | PIXEL_STD=(0.229, 0.224, 0.225), 18 | DARKNET=dict( 19 | DEPTH=53, 20 | STEM_OUT_CHANNELS=32, 21 | WEIGHTS="s3://generalDetection/cvpods/ImageNetPretrained/custom/darknet53.mix.pth", 22 | OUT_FEATURES=["dark3", "dark4", "dark5"] 23 | ), 24 | YOLO=dict( 25 | CLASSES=80, 26 | IN_FEATURES=["dark3", "dark4", "dark5"], 27 | ANCHORS=[ 28 | [[116, 90], [156, 198], [373, 326]], 29 | [[30, 61], [62, 45], [42, 119]], 30 | [[10, 13], [16, 30], [33, 23]], 31 | ], 32 | CONF_THRESHOLD=0.01, # TEST 33 | NMS_THRESHOLD=0.5, 34 | IGNORE_THRESHOLD=0.7, 35 | ), 36 | ), 37 | ) 38 | 39 | 40 | class YOLO3Config(BaseDetectionConfig): 41 | def __init__(self): 42 | super(YOLO3Config, self).__init__() 43 | self._register_configuration(_config_dict) 44 | 45 | 46 | config = YOLO3Config() 47 | -------------------------------------------------------------------------------- /tools/dev/run_inference_tests.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 3 | 4 | BIN="python tools/train_net.py" 5 | OUTPUT="inference_test_output" 6 | NUM_GPUS=2 7 | 8 | CFG_LIST=( "${@:1}" ) 9 | 10 | if [ ${#CFG_LIST[@]} -eq 0 ]; then 11 | CFG_LIST=( ./configs/quick_schedules/*inference_acc_test.yaml ) 12 | fi 13 | 14 | echo "========================================================================" 15 | echo "Configs to run:" 16 | echo "${CFG_LIST[@]}" 17 | echo "========================================================================" 18 | 19 | 20 | for cfg in "${CFG_LIST[@]}"; do 21 | echo "========================================================================" 22 | echo "Running $cfg ..." 23 | echo "========================================================================" 24 | $BIN \ 25 | --eval-only \ 26 | --num-gpus $NUM_GPUS \ 27 | --config-file "$cfg" \ 28 | OUTPUT_DIR $OUTPUT 29 | rm -rf $OUTPUT 30 | done 31 | 32 | 33 | echo "========================================================================" 34 | echo "Running demo.py ..." 35 | echo "========================================================================" 36 | DEMO_BIN="python demo/demo.py" 37 | COCO_DIR=datasets/coco/val2014 38 | mkdir -pv $OUTPUT 39 | 40 | set -v 41 | 42 | $DEMO_BIN --config-file ./configs/quick_schedules/panoptic_fpn_R_50_inference_acc_test.yaml \ 43 | --input $COCO_DIR/COCO_val2014_0000001933* --output $OUTPUT 44 | rm -rf $OUTPUT 45 | -------------------------------------------------------------------------------- /cvpods/analyser/tide/errors/qualifiers.py: -------------------------------------------------------------------------------- 1 | # Defines qualifiers like "Extra small box" 2 | 3 | 4 | def _area(x): 5 | return x["bbox"][2] * x["bbox"][3] 6 | 7 | 8 | def _ar(x): 9 | return x["bbox"][2] / x["bbox"][3] 10 | 11 | 12 | class Qualifier: 13 | """ 14 | Creates a qualifier with the given name. 15 | 16 | test_func should be a callable object (e.g., lambda) that 17 | takes in as input an annotation object (either a ground truth or prediction) 18 | and returns whether or not that object qualifies (i.e., a bool). 19 | """ 20 | 21 | def __init__(self, name: str, test_func: object): 22 | self.test = test_func 23 | self.name = name 24 | 25 | # This is horrible, but I like it 26 | def _make_error_func(self, error_type): 27 | return ( 28 | ( 29 | lambda err: isinstance(err, error_type) 30 | and (self.test(err.gt) if hasattr(err, "gt") else self.test(err.pred)) 31 | ) 32 | if self.test is not None 33 | else (lambda err: isinstance(err, error_type)) 34 | ) 35 | 36 | 37 | AREA = [ 38 | Qualifier("Small", lambda x: _area(x) <= 32 ** 2), 39 | Qualifier("Medium", lambda x: 32 ** 2 < _area(x) <= 96 ** 2), 40 | Qualifier("Large", lambda x: 96 ** 2 < _area(x)), 41 | ] 42 | 43 | ASPECT_RATIO = [ 44 | Qualifier("Tall", lambda x: _ar(x) <= 0.75), 45 | Qualifier("Square", lambda x: 0.75 < _ar(x) <= 1.33), 46 | Qualifier("Wide", lambda x: 1.33 < _ar(x)), 47 | ] 48 | -------------------------------------------------------------------------------- /cvpods/layers/border_align.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | from torch import nn 3 | from torch.autograd import Function 4 | from torch.autograd.function import once_differentiable 5 | 6 | from cvpods import _C 7 | 8 | 9 | class BorderAlignFunc(Function): 10 | @staticmethod 11 | def forward(ctx, input, boxes, wh, pool_size): 12 | output = _C.border_align_forward(input, boxes, wh, pool_size) 13 | ctx.pool_size = pool_size 14 | ctx.save_for_backward(input, boxes, wh) 15 | return output 16 | 17 | @staticmethod 18 | @once_differentiable 19 | def backward(ctx, grad_output): 20 | pool_size = ctx.pool_size 21 | input, boxes, wh = ctx.saved_tensors 22 | grad_input = _C.border_align_backward( 23 | grad_output, input, boxes, wh, pool_size) 24 | return grad_input, None, None, None 25 | 26 | 27 | border_align = BorderAlignFunc.apply 28 | 29 | 30 | class BorderAlign(nn.Module): 31 | def __init__(self, pool_size): 32 | super(BorderAlign, self).__init__() 33 | self.pool_size = pool_size 34 | 35 | def forward(self, feature, boxes): 36 | feature = feature.contiguous() 37 | boxes = boxes.contiguous() 38 | wh = (boxes[:, :, 2:] - boxes[:, :, :2]).contiguous() 39 | output = border_align(feature, boxes, wh, self.pool_size) 40 | return output 41 | 42 | def __repr__(self): 43 | tmpstr = self.__class__.__name__ 44 | return tmpstr 45 | -------------------------------------------------------------------------------- /cvpods/layers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | from .activation_funcs import MemoryEfficientSwish, Swish 3 | from .batch_norm import FrozenBatchNorm2d, NaiveSyncBatchNorm, get_activation, get_norm 4 | from .deform_conv import DeformConv, ModulatedDeformConv 5 | from .deform_conv_with_off import DeformConvWithOff, ModulatedDeformConvWithOff 6 | from .mask_ops import paste_masks_in_image 7 | from .nms import ( 8 | batched_nms, 9 | batched_nms_rotated, 10 | batched_softnms, 11 | batched_softnms_rotated, 12 | cluster_nms, 13 | generalized_batched_nms, 14 | matrix_nms, 15 | ml_nms, 16 | nms, 17 | nms_rotated, 18 | softnms, 19 | softnms_rotated 20 | ) 21 | from .position_encoding import position_encoding_dict 22 | from .roi_align import ROIAlign, roi_align 23 | from .roi_align_rotated import ROIAlignRotated, roi_align_rotated 24 | from .shape_spec import ShapeSpec 25 | from .swap_align2nat import SwapAlign2Nat, swap_align2nat 26 | from .wrappers import ( 27 | BatchNorm2d, 28 | Conv2d, 29 | Conv2dSamePadding, 30 | ConvTranspose2d, 31 | MaxPool2dSamePadding, 32 | SeparableConvBlock, 33 | cat, 34 | interpolate 35 | ) 36 | from .masked_conv import MaskedConv2d, masked_conv2d 37 | from .tree_filter_v2 import TreeFilterV2 38 | from .dynamic_conv import DynamicConv2D, DynamicScale, DynamicBottleneck 39 | from .fix_conv import Bottleneck, ScaleConv2d 40 | 41 | __all__ = [k for k in globals().keys() if not k.startswith("_")] 42 | -------------------------------------------------------------------------------- /cvpods/layers/csrc/SwapAlign2Nat/SwapAlign2Nat.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | #pragma once 3 | #include 4 | 5 | namespace cvpods { 6 | 7 | #ifdef WITH_CUDA 8 | at::Tensor SwapAlign2Nat_forward_cuda( 9 | const at::Tensor& X, 10 | const int lambda_val, 11 | const float pad_val); 12 | 13 | at::Tensor SwapAlign2Nat_backward_cuda( 14 | const at::Tensor& gY, 15 | const int lambda_val, 16 | const int batch_size, 17 | const int channel, 18 | const int height, 19 | const int width); 20 | #endif 21 | 22 | inline at::Tensor SwapAlign2Nat_forward( 23 | const at::Tensor& X, 24 | const int lambda_val, 25 | const float pad_val) { 26 | if (X.type().is_cuda()) { 27 | #ifdef WITH_CUDA 28 | return SwapAlign2Nat_forward_cuda(X, lambda_val, pad_val); 29 | #else 30 | AT_ERROR("Not compiled with GPU support"); 31 | #endif 32 | } 33 | AT_ERROR("Not implemented on the CPU"); 34 | } 35 | 36 | inline at::Tensor SwapAlign2Nat_backward( 37 | const at::Tensor& gY, 38 | const int lambda_val, 39 | const int batch_size, 40 | const int channel, 41 | const int height, 42 | const int width) { 43 | if (gY.type().is_cuda()) { 44 | #ifdef WITH_CUDA 45 | return SwapAlign2Nat_backward_cuda( 46 | gY, lambda_val, batch_size, channel, height, width); 47 | #else 48 | AT_ERROR("Not compiled with GPU support"); 49 | #endif 50 | } 51 | AT_ERROR("Not implemented on the CPU"); 52 | } 53 | 54 | } // namespace cvpods 55 | -------------------------------------------------------------------------------- /cvpods/layers/csrc/box_iou_rotated/box_iou_rotated_cpu.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | #include "box_iou_rotated.h" 3 | #include "box_iou_rotated_utils.h" 4 | 5 | namespace cvpods { 6 | 7 | template 8 | void box_iou_rotated_cpu_kernel( 9 | const at::Tensor& boxes1, 10 | const at::Tensor& boxes2, 11 | at::Tensor& ious) { 12 | auto widths1 = boxes1.select(1, 2).contiguous(); 13 | auto heights1 = boxes1.select(1, 3).contiguous(); 14 | auto widths2 = boxes2.select(1, 2).contiguous(); 15 | auto heights2 = boxes2.select(1, 3).contiguous(); 16 | 17 | at::Tensor areas1 = widths1 * heights1; 18 | at::Tensor areas2 = widths2 * heights2; 19 | 20 | auto num_boxes1 = boxes1.size(0); 21 | auto num_boxes2 = boxes2.size(0); 22 | 23 | for (int i = 0; i < num_boxes1; i++) { 24 | for (int j = 0; j < num_boxes2; j++) { 25 | ious[i * num_boxes2 + j] = single_box_iou_rotated( 26 | boxes1[i].data_ptr(), boxes2[j].data_ptr()); 27 | } 28 | } 29 | } 30 | 31 | at::Tensor box_iou_rotated_cpu( 32 | const at::Tensor& boxes1, 33 | const at::Tensor& boxes2) { 34 | auto num_boxes1 = boxes1.size(0); 35 | auto num_boxes2 = boxes2.size(0); 36 | at::Tensor ious = 37 | at::empty({num_boxes1 * num_boxes2}, boxes1.options().dtype(at::kFloat)); 38 | 39 | box_iou_rotated_cpu_kernel(boxes1, boxes2, ious); 40 | 41 | // reshape from 1d array to 2d array 42 | auto shape = std::vector{num_boxes1, num_boxes2}; 43 | return ious.reshape(shape); 44 | } 45 | 46 | } // namespace cvpods 47 | -------------------------------------------------------------------------------- /cvpods/configs/panoptic_seg_config.py: -------------------------------------------------------------------------------- 1 | from .rcnn_fpn_config import RCNNFPNConfig 2 | 3 | _config_dict = dict( 4 | MODEL=dict( 5 | SEM_SEG_HEAD=dict( 6 | # NAME="SemSegFPNHead", 7 | IN_FEATURES=["p2", "p3", "p4", "p5"], 8 | # Label in the semantic segmentation ground truth that is ignored, 9 | # i.e., no loss is calculated for the correposnding pixel. 10 | IGNORE_VALUE=255, 11 | # Number of classes in the semantic segmentation head 12 | NUM_CLASSES=54, 13 | # Number of channels in the 3x3 convs inside semantic-FPN heads. 14 | CONVS_DIM=128, 15 | # Outputs from semantic-FPN heads are up-scaled to the COMMON_STRIDE stride. 16 | COMMON_STRIDE=4, 17 | # Normalization method for the convolution layers. Options: "" (no norm), "GN". 18 | NORM="GN", 19 | LOSS_WEIGHT=0.5, 20 | ), 21 | PANOPTIC_FPN=dict( 22 | # Scaling of all losses from instance detection / segmentation head. 23 | INSTANCE_LOSS_WEIGHT=1.0, 24 | # options when combining instance & semantic segmentation outputs 25 | COMBINE=dict( 26 | ENABLED=True, 27 | OVERLAP_THRESH=0.5, 28 | STUFF_AREA_LIMIT=4096, 29 | INSTANCES_CONFIDENCE_THRESH=0.5, 30 | ), 31 | ), 32 | ) 33 | ) 34 | 35 | 36 | class PanopticSegmentationConfig(RCNNFPNConfig): 37 | def __init__(self): 38 | super(PanopticSegmentationConfig, self).__init__() 39 | self._register_configuration(_config_dict) 40 | 41 | 42 | config = PanopticSegmentationConfig() 43 | -------------------------------------------------------------------------------- /cvpods/configs/ssd_config.py: -------------------------------------------------------------------------------- 1 | from .base_detection_config import BaseDetectionConfig 2 | 3 | _config_dict = dict( 4 | MODEL=dict( 5 | PIXEL_MEAN=[123.675, 116.28, 103.53], # RGB FORMAT 6 | PIXEL_STD=[1.0, 1.0, 1.0], 7 | VGG=dict( 8 | ARCH='D', 9 | NORM="", 10 | NUM_CLASSES=None, 11 | OUT_FEATURES=["Conv4_3", "Conv7"], 12 | POOL_ARGS=dict( 13 | pool3=(2, 2, 0, True), # k, s, p, ceil_model 14 | pool5=(3, 1, 1, False) # k, s, p, ceil_model 15 | ), 16 | FC_TO_CONV=True, 17 | ), 18 | SSD=dict( 19 | NUM_CLASSES=80, 20 | IN_FEATURES=["Conv4_3", "Conv7"], 21 | EXTRA_LAYER_ARCH={ 22 | # the number after "S" and "S" to denote conv layer with stride=2 23 | "300": [256, 'S', 512, 128, 'S', 256, 128, 256, 128, 256], 24 | "512": [256, 'S', 512, 128, 'S', 256, 128, 'S', 256, 128, 'S', 256, 128, 256], 25 | }, 26 | IOU_THRESHOLDS=[0.5, 0.5], 27 | IOU_LABELS=[0, -1, 1], 28 | BBOX_REG_WEIGHTS=(10.0, 10.0, 5.0, 5.0), 29 | L2NORM_SCALE=20.0, 30 | # Loss parameters: 31 | LOSS_ALPHA=1.0, 32 | SMOOTH_L1_LOSS_BETA=1.0, 33 | NEGATIVE_POSITIVE_RATIO=3.0, 34 | # Inference parameters: 35 | SCORE_THRESH_TEST=0.02, 36 | NMS_THRESH_TEST=0.45, 37 | ), 38 | ) 39 | ) 40 | 41 | 42 | class SSDConfig(BaseDetectionConfig): 43 | def __init__(self): 44 | super(SSDConfig, self).__init__() 45 | self._register_configuration(_config_dict) 46 | 47 | 48 | config = SSDConfig() 49 | -------------------------------------------------------------------------------- /cvpods/configs/segm_config.py: -------------------------------------------------------------------------------- 1 | from .base_detection_config import BaseDetectionConfig 2 | 3 | _config_dict = dict( 4 | MODEL=dict( 5 | SEM_SEG_HEAD=dict( 6 | # NAME="SemSegFPNHead", 7 | IN_FEATURES=["p2", "p3", "p4", "p5"], 8 | # Label in the semantic segmentation ground truth that is ignored, 9 | # i.e., no loss is calculated for the correposnding pixel. 10 | IGNORE_VALUE=255, 11 | # Number of classes in the semantic segmentation head 12 | NUM_CLASSES=54, 13 | # Number of channels in the 3x3 convs inside semantic-FPN heads. 14 | CONVS_DIM=128, 15 | # Outputs from semantic-FPN heads are up-scaled to the COMMON_STRIDE stride. 16 | COMMON_STRIDE=4, 17 | # Normalization method for the convolution layers. Options: "" (no norm), "GN". 18 | NORM="GN", 19 | LOSS_WEIGHT=1.0, 20 | ), 21 | PANOPTIC_FPN=dict( 22 | # Scaling of all losses from instance detection / segmentation head. 23 | INSTANCE_LOSS_WEIGHT=1.0, 24 | # options when combining instance & semantic segmentation outputs 25 | COMBINE=dict( 26 | ENABLED=True, 27 | OVERLAP_THRESH=0.5, 28 | STUFF_AREA_LIMIT=4096, 29 | INSTANCES_CONFIDENCE_THRESH=0.5, 30 | ), 31 | ), 32 | ), 33 | DATALOADER=dict(FILTER_EMPTY_ANNOTATIONS=False,), 34 | ) 35 | 36 | 37 | class SegmentationConfig(BaseDetectionConfig): 38 | def __init__(self): 39 | super(SegmentationConfig, self).__init__() 40 | self._register_configuration(_config_dict) 41 | 42 | 43 | config = SegmentationConfig() 44 | -------------------------------------------------------------------------------- /cvpods/layers/csrc/sigmoid_focal_loss/SigmoidFocalLoss.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | 4 | namespace cvpods { 5 | #ifdef WITH_CUDA 6 | at::Tensor SigmoidFocalLoss_forward_cuda( 7 | const at::Tensor& logits, 8 | const at::Tensor& targets, 9 | const int num_classes, 10 | const float gamma, 11 | const float alpha); 12 | 13 | at::Tensor SigmoidFocalLoss_backward_cuda( 14 | const at::Tensor& logits, 15 | const at::Tensor& targets, 16 | const at::Tensor& d_losses, 17 | const int num_classes, 18 | const float gamma, 19 | const float alpha); 20 | #endif 21 | 22 | // 23 | // Interface for Python 24 | inline at::Tensor SigmoidFocalLoss_forward( 25 | const at::Tensor& logits, 26 | const at::Tensor& targets, 27 | const int num_classes, 28 | const float gamma, 29 | const float alpha) { 30 | if (logits.type().is_cuda()) { 31 | #ifdef WITH_CUDA 32 | return SigmoidFocalLoss_forward_cuda(logits, targets, num_classes, gamma, alpha); 33 | #else 34 | AT_ERROR("Not compiled with GPU support"); 35 | #endif 36 | } 37 | AT_ERROR("Not implemented on the CPU"); 38 | } 39 | 40 | inline at::Tensor SigmoidFocalLoss_backward( 41 | const at::Tensor& logits, 42 | const at::Tensor& targets, 43 | const at::Tensor& d_losses, 44 | const int num_classes, 45 | const float gamma, 46 | const float alpha) { 47 | if (logits.type().is_cuda()) { 48 | #ifdef WITH_CUDA 49 | return SigmoidFocalLoss_backward_cuda(logits, targets, d_losses, num_classes, gamma, alpha); 50 | #else 51 | AT_ERROR("Not compiled with GPU support"); 52 | #endif 53 | } 54 | AT_ERROR("Not implemented on the CPU"); 55 | } 56 | 57 | } 58 | -------------------------------------------------------------------------------- /cvpods/configs/rcnn_fpn_config.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- encoding: utf-8 -*- 3 | ''' 4 | @File : rcnn_fpn_config.py 5 | @Time : 2020/05/07 23:55:41 6 | @Author : Benjin Zhu 7 | @Contact : poodarchu@gmail.com 8 | @Last Modified by : Benjin Zhu 9 | @Last Modified time : 2020/05/07 23:55:41 10 | ''' 11 | 12 | from .rcnn_config import RCNNConfig 13 | 14 | _config_dict = dict( 15 | MODEL=dict( 16 | # BACKBONE=dict(NAME='build_resnet_backbone',), 17 | RESNETS=dict(OUT_FEATURES=["res2", "res3", "res4", "res5"],), 18 | FPN=dict(IN_FEATURES=["res2", "res3", "res4", "res5"]), 19 | ANCHOR_GENERATOR=dict( 20 | SIZES=[[32], [64], [128], [256], [512]], ASPECT_RATIOS=[[0.5, 1.0, 2.0]], 21 | ), 22 | RPN=dict( 23 | IN_FEATURES=["p2", "p3", "p4", "p5", "p6"], 24 | PRE_NMS_TOPK_TRAIN=2000, 25 | PRE_NMS_TOPK_TEST=1000, 26 | POST_NMS_TOPK_TRAIN=1000, 27 | POST_NMS_TOPK_TEST=1000, 28 | ), 29 | ROI_HEADS=dict( 30 | # NAME: "StandardROIHeads" 31 | IN_FEATURES=["p2", "p3", "p4", "p5"], 32 | ), 33 | ROI_BOX_HEAD=dict( 34 | # NAME: "FastRCNNConvFCHead" 35 | NUM_FC=2, 36 | POOLER_RESOLUTION=7, 37 | ), 38 | ROI_MASK_HEAD=dict( 39 | # NAME: "MaskRCNNConvUpsampleHead" 40 | NUM_CONV=4, 41 | POOLER_RESOLUTION=14, 42 | ), 43 | ), 44 | ) 45 | 46 | 47 | class RCNNFPNConfig(RCNNConfig): 48 | def __init__(self): 49 | super(RCNNFPNConfig, self).__init__() 50 | self._register_configuration(_config_dict) 51 | 52 | 53 | config = RCNNFPNConfig() 54 | -------------------------------------------------------------------------------- /tools/rm_files.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding:utf-8 -*- 3 | 4 | import argparse 5 | import os 6 | import re 7 | from colorama import Fore, Style 8 | 9 | 10 | def remove_parser(): 11 | parser = argparse.ArgumentParser() 12 | parser.add_argument("--start-iter", "-s", type=int, default=0, help="start iter to remove") 13 | parser.add_argument("--end-iter", "-e", type=int, default=0, help="end iter to remove") 14 | parser.add_argument("--prefix", "-p", type=str, default="model_", 15 | help="prefix of model to remove") 16 | parser.add_argument("--dir", "-d", type=str, default="/data/Outputs", 17 | help="dir to remove pth model") 18 | parser.add_argument("--real", "-r", action="store_true", 19 | help="really delete or just show what you will delete") 20 | return parser 21 | 22 | 23 | def remove_files(args): 24 | start = args.start_iter 25 | end = args.end_iter 26 | prefix = args.prefix 27 | for folder, _, files in os.walk(args.dir): 28 | # l = [x for x in f if x.endswith(".pth")] 29 | models = [f for f in files if re.search(prefix + r"[0123456789]*\.pth", f)] 30 | delete = [os.path.join(folder, model) for model in models 31 | if start <= int(model[len(prefix):-len(".pth")]) <= end] 32 | if delete: 33 | for f in delete: 34 | if args.real: 35 | print(f"remove {f}") 36 | os.remove(f) 37 | else: 38 | print(f"you may remove {f}") 39 | if not args.real: 40 | print(Fore.RED + "use --real parameter to really delete models" + Style.RESET_ALL) 41 | 42 | 43 | def main(): 44 | args = remove_parser().parse_args() 45 | remove_files(args) 46 | 47 | 48 | if __name__ == "__main__": 49 | main() 50 | -------------------------------------------------------------------------------- /cvpods/utils/benchmark/timer.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 3 | # -*- coding: utf-8 -*- 4 | 5 | from time import perf_counter 6 | from typing import Optional 7 | 8 | 9 | class Timer: 10 | """ 11 | A timer which computes the time elapsed since the start/reset of the timer. 12 | """ 13 | 14 | def __init__(self): 15 | self.reset() 16 | 17 | def reset(self): 18 | """ 19 | Reset the timer. 20 | """ 21 | self._start = perf_counter() 22 | self._paused: Optional[float] = None 23 | self._total_paused = 0 24 | 25 | def pause(self): 26 | """ 27 | Pause the timer. 28 | """ 29 | if self._paused is not None: 30 | raise ValueError("Trying to pause a Timer that is already paused!") 31 | self._paused = perf_counter() 32 | 33 | def is_paused(self) -> bool: 34 | """ 35 | Returns: 36 | bool: whether the timer is currently paused 37 | """ 38 | return self._paused is not None 39 | 40 | def resume(self): 41 | """ 42 | Resume the timer. 43 | """ 44 | if self._paused is None: 45 | raise ValueError("Trying to resume a Timer that is not paused!") 46 | self._total_paused += perf_counter() - self._paused 47 | self._paused = None 48 | 49 | def seconds(self) -> float: 50 | """ 51 | Returns: 52 | (float): the total number of seconds since the start/reset of the 53 | timer, excluding the time when the timer is paused. 54 | """ 55 | if self._paused is not None: 56 | end_time: float = self._paused # type: ignore 57 | else: 58 | end_time = perf_counter() 59 | return end_time - self._start - self._total_paused 60 | -------------------------------------------------------------------------------- /cvpods/configs/base_classification_config.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- encoding: utf-8 -*- 3 | ''' 4 | @File : base_classification_config.py 5 | @Time : 2020/05/07 23:56:17 6 | @Author : Benjin Zhu 7 | @Contact : poodarchu@gmail.com 8 | @Last Modified by : Benjin Zhu 9 | @Last Modified time : 2020/05/07 23:56:17 10 | ''' 11 | 12 | from cvpods.configs.base_config import BaseConfig 13 | 14 | _config_dict = dict( 15 | MODEL=dict( 16 | WEIGHTS="", 17 | PIXEL_MEAN=[0.406, 0.456, 0.485], # BGR 18 | PIXEL_STD=[0.225, 0.224, 0.229], 19 | BACKBONE=dict(FREEZE_AT=-1, ), # do not freeze 20 | RESNETS=dict( 21 | NUM_CLASSES=None, 22 | DEPTH=None, 23 | OUT_FEATURES=["linear"], 24 | NUM_GROUPS=1, 25 | # Options: FrozenBN, GN, "SyncBN", "BN" 26 | NORM="BN", 27 | ACTIVATION=dict( 28 | NAME="ReLU", 29 | INPLACE=True, 30 | ), 31 | # Whether init last bn weight of each BasicBlock or BottleneckBlock to 0 32 | ZERO_INIT_RESIDUAL=True, 33 | WIDTH_PER_GROUP=64, 34 | # Use True only for the original MSRA ResNet; use False for C2 and Torch models 35 | STRIDE_IN_1X1=False, 36 | RES5_DILATION=1, 37 | RES2_OUT_CHANNELS=256, 38 | STEM_OUT_CHANNELS=64, 39 | 40 | # Deep Stem 41 | DEEP_STEM=False, 42 | ), 43 | ), 44 | SOLVER=dict( 45 | IMS_PER_DEVICE=32, # defalut: 8 gpus x 32 = 256 46 | ), 47 | ) 48 | 49 | 50 | class BaseClassificationConfig(BaseConfig): 51 | def __init__(self): 52 | super(BaseClassificationConfig, self).__init__() 53 | self._register_configuration(_config_dict) 54 | 55 | 56 | config = BaseClassificationConfig() 57 | -------------------------------------------------------------------------------- /cvpods/modeling/backbone/dynamic_arch/cal_op_flops.py: -------------------------------------------------------------------------------- 1 | # Count Operation MFLOPs when fix batch to 1 2 | # @author: yanwei.li 3 | 4 | 5 | def count_Conv_flop( 6 | in_h, in_w, in_channel, out_channel, 7 | kernel_size, is_bias=False, stride=1, groups=1 8 | ): 9 | out_h = in_h // stride 10 | out_w = in_w // stride 11 | bias_ops = 1 if is_bias else 0 12 | kernel_ops = kernel_size[0] * kernel_size[1] * (in_channel // groups) 13 | delta_ops = (kernel_ops + bias_ops) * out_channel * out_h * out_w 14 | return delta_ops / 1e6 15 | 16 | 17 | def count_Linear_flop(in_num, out_num, is_bias): 18 | weight_ops = in_num * out_num 19 | bias_ops = out_num if is_bias else 0 20 | delta_ops = weight_ops + bias_ops 21 | return delta_ops / 1e6 22 | 23 | 24 | def count_BN_flop(in_h, in_w, in_channel, is_affine): 25 | multi_affine = 2 if is_affine else 1 26 | delta_ops = multi_affine * in_h * in_w * in_channel 27 | return delta_ops / 1e6 28 | 29 | 30 | def count_ReLU_flop(in_h, in_w, in_channel): 31 | delta_ops = in_h * in_w * in_channel 32 | return delta_ops / 1e6 33 | 34 | 35 | def count_Pool2d_flop(in_h, in_w, out_channel, kernel_size, stride): 36 | out_h = in_h // stride 37 | out_w = in_w // stride 38 | kernel_ops = kernel_size[0] * kernel_size[1] 39 | delta_ops = kernel_ops * out_w * out_h * out_channel 40 | return delta_ops / 1e6 41 | 42 | 43 | def count_ConvBNReLU_flop( 44 | in_h, in_w, in_channel, out_channel, 45 | kernel_size, is_bias=False, stride=1, 46 | groups=1, is_affine=True 47 | ): 48 | flops = 0.0 49 | flops += count_Conv_flop( 50 | in_h, in_w, in_channel, out_channel, 51 | kernel_size, is_bias, stride, groups 52 | ) 53 | in_h = in_h // stride 54 | in_w = in_w // stride 55 | flops += count_BN_flop(in_h, in_w, out_channel, is_affine) 56 | flops += count_ReLU_flop(in_h, in_w, out_channel) 57 | return flops 58 | -------------------------------------------------------------------------------- /cvpods/modeling/losses/label_smooth_ce_loss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | 5 | class LabelSmoothCELoss(nn.Module): 6 | """ 7 | Cross-entrophy loss with label smooth. 8 | 9 | Args: 10 | epsilon: Smoothing level. Use one-hot label when set to 0, use uniform label when set to 1. 11 | """ 12 | def __init__(self, epsilon): 13 | super(LabelSmoothCELoss, self).__init__() 14 | self.epsilon = epsilon 15 | self.logsoftmax = nn.LogSoftmax(dim=1) 16 | 17 | def forward(self, logits, targets): 18 | """ 19 | Args: 20 | logits: A float tensor of shape: (minibatch, C). 21 | targets: A float tensor of shape: (minibatch,). Stores the class indices 22 | in range `[0, C - 1]`. 23 | 24 | Returns: 25 | A scalar tensor. 26 | """ 27 | log_probs = self.logsoftmax(logits) 28 | targets = torch.zeros_like(log_probs).scatter_(1, targets.unsqueeze(1), 1) 29 | targets = (1 - self.epsilon) * targets + self.epsilon / logits.shape[1] 30 | loss = (-targets * log_probs).mean(0).sum() 31 | return loss 32 | 33 | 34 | def label_smooth_ce_loss(logits, targets, epsilon): 35 | """ 36 | Cross-entrophy loss with label smooth. 37 | 38 | Args: 39 | logits: A float tensor of shape: (minibatch, C). 40 | targets: A float tensor of shape: (minibatch,). Stores the class indices 41 | in range `[0, C - 1]`. 42 | epsilon: Smoothing level. Use one-hot label when set to 0, use uniform label when set to 1. 43 | 44 | Returns: 45 | A scalar tensor. 46 | """ 47 | log_probs = nn.functional.log_softmax(logits, dim=1) 48 | targets = torch.zeros_like(log_probs).scatter_(1, targets.unsqueeze(1), 1) 49 | targets = (1 - epsilon) * targets + epsilon / logits.shape[1] 50 | loss = (-targets * log_probs).mean(0).sum() 51 | return loss 52 | -------------------------------------------------------------------------------- /cvpods/configs/dynamic_routing_config.py: -------------------------------------------------------------------------------- 1 | from .base_config import BaseConfig 2 | 3 | _config_dict = dict( 4 | MODEL=dict( 5 | LOAD_PROPOSALS=False, 6 | MASK_ON=False, 7 | KEYPOINT_ON=False, 8 | BACKBONE=dict(FREEZE_AT=0,), 9 | RESNETS=dict( 10 | OUT_FEATURES=["res2", "res3", "res4", "res5"], 11 | NORM="nnSyncBN", 12 | NUM_GROUPS=1, 13 | WIDTH_PER_GROUP=64, 14 | STRIDE_IN_1X1=True, 15 | RES5_DILATION=1, 16 | RES2_OUT_CHANNELS=256, 17 | STEM_OUT_CHANNELS=64, 18 | DEFORM_ON_PER_STAGE=[False, False, False, False], 19 | DEFORM_MODULATED=False, 20 | DEFORM_NUM_GROUPS=1, 21 | ), 22 | FPN=dict( 23 | IN_FEATURES=[], 24 | OUT_CHANNELS=256, 25 | NORM="", 26 | FUSE_TYPE="sum", 27 | ), 28 | SEM_SEG_HEAD=dict( 29 | # NAME="SemSegFPNHead", 30 | IN_FEATURES=[], 31 | IGNORE_VALUE=255, 32 | NUM_CLASSES=(), 33 | CONVS_DIM=256, 34 | COMMON_STRIDE=(), 35 | NORM="GN", 36 | LOSS_WEIGHT=1.0, 37 | ), 38 | ), 39 | DATALOADER=dict(FILTER_EMPTY_ANNOTATIONS=False,), 40 | SOLVER=dict( 41 | LR_SCHEDULER=dict( 42 | NAME="PolyLR", 43 | POLY_POWER=0.9, 44 | MAX_ITER=40000, 45 | WARMUP_ITERS=1000, 46 | WARMUP_FACTOR=0.001, 47 | WARMUP_METHOD="linear", 48 | ), 49 | OPTIMIZER=dict(BASE_LR=0.01, ), 50 | IMS_PER_BATCH=16, 51 | CHECKPOINT_PERIOD=5000, 52 | ), 53 | TEST=dict(PRECISE_BN=dict(ENABLED=True), ), 54 | ) 55 | 56 | 57 | class SemanticSegmentationConfig(BaseConfig): 58 | def __init__(self): 59 | super(SemanticSegmentationConfig, self).__init__() 60 | self._register_configuration(_config_dict) 61 | 62 | 63 | config = SemanticSegmentationConfig() 64 | -------------------------------------------------------------------------------- /cvpods/modeling/backbone/backbone.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- encoding: utf-8 -*- 3 | ''' 4 | @File : backbone.py 5 | @Time : 2020/05/07 23:58:08 6 | @Author : Facebook, Inc. and its affiliates. 7 | @Contact : poodarchu@gmail.com 8 | @Last Modified by : Benjin Zhu 9 | @Last Modified time : 2020/05/07 23:58:08 10 | ''' 11 | 12 | from abc import ABCMeta, abstractmethod 13 | 14 | import torch.nn as nn 15 | 16 | from cvpods.layers import ShapeSpec 17 | 18 | __all__ = ["Backbone"] 19 | 20 | 21 | class Backbone(nn.Module, metaclass=ABCMeta): 22 | """ 23 | Abstract base class for network backbones. 24 | """ 25 | def __init__(self): 26 | """ 27 | The `__init__` method of any subclass can specify its own set of arguments. 28 | """ 29 | super().__init__() 30 | 31 | @abstractmethod 32 | def forward(self): 33 | """ 34 | Subclasses must override this method, but adhere to the same return type. 35 | 36 | Returns: 37 | dict[str->Tensor]: mapping from feature name (e.g., "res2") to tensor 38 | """ 39 | pass 40 | 41 | @property 42 | def size_divisibility(self): 43 | """ 44 | Some backbones require the input height and width to be divisible by a 45 | specific integer. This is typically true for encoder / decoder type networks 46 | with lateral connection (e.g., FPN) for which feature maps need to match 47 | dimension in the "bottom up" and "top down" paths. Set to 0 if no specific 48 | input size divisibility is required. 49 | """ 50 | return 0 51 | 52 | def output_shape(self): 53 | """ 54 | Returns: 55 | dict[str->ShapeSpec] 56 | """ 57 | # this is a backward-compatible default 58 | return { 59 | name: ShapeSpec(channels=self._out_feature_channels[name], 60 | stride=self._out_feature_strides[name]) 61 | for name in self._out_features 62 | } 63 | -------------------------------------------------------------------------------- /cvpods/data/wrapped_dataset.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- encoding: utf-8 -*- 3 | from types import SimpleNamespace 4 | 5 | import numpy as np 6 | 7 | from torch.utils.data.dataset import ConcatDataset as _ConcatDataset 8 | 9 | from .registry import DATASETS 10 | 11 | 12 | @DATASETS.register() 13 | class ConcatDataset(_ConcatDataset): 14 | """A wrapper of concatenated dataset. 15 | Same as :obj:`torch.utils.data.dataset.ConcatDataset`, but 16 | concat the group flag for image aspect ratio. 17 | Args: 18 | datasets (list[:obj:`Dataset`]): A list of datasets. 19 | """ 20 | 21 | def __init__(self, datasets): 22 | super(ConcatDataset, self).__init__(datasets) 23 | if hasattr(self.datasets[0], 'aspect_ratios'): 24 | aspect_ratios = [d.aspect_ratios for d in self.datasets] 25 | self.aspect_ratios = np.concatenate(aspect_ratios) 26 | if hasattr(self.datasets[0], 'meta'): 27 | self.meta = {} 28 | for d in self.datasets: 29 | self.meta.update(d.meta) 30 | self.meta = SimpleNamespace(**self.meta) 31 | 32 | 33 | @DATASETS.register() 34 | class RepeatDataset(object): 35 | """A wrapper of repeated dataset. 36 | The length of repeated dataset will be `times` larger than the original 37 | dataset. This is useful when the data loading time is long but the dataset 38 | is small. Using RepeatDataset can reduce the data loading time between 39 | epochs. 40 | Args: 41 | dataset (:obj:`Dataset`): The dataset to be repeated. 42 | times (int): Repeat times. 43 | """ 44 | 45 | def __init__(self, dataset, times): 46 | self.dataset = dataset 47 | self.times = times 48 | if hasattr(self.dataset, 'aspect_ratios'): 49 | self.aspect_ratios = np.tile(self.dataset.aspect_ratios, times) 50 | 51 | self._ori_len = len(self.dataset) 52 | 53 | def __getitem__(self, idx): 54 | return self.dataset[idx % self._ori_len] 55 | 56 | def __len__(self): 57 | return self.times * self._ori_len 58 | -------------------------------------------------------------------------------- /cvpods_playground/fcos.res50.1x/config.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | 3 | from cvpods.configs.fcos_config import FCOSConfig 4 | 5 | _config_dict = dict( 6 | MODEL=dict( 7 | WEIGHTS="detectron2://ImageNetPretrained/MSRA/R-50.pkl", 8 | RESNETS=dict(DEPTH=50), 9 | FCOS=dict( 10 | NUM_CONVS=4, 11 | CENTERNESS_ON_REG=True, 12 | NORM_REG_TARGETS=True, 13 | NMS_THRESH_TEST=0.6, 14 | BBOX_REG_WEIGHTS=(1.0, 1.0, 1.0, 1.0), 15 | FOCAL_LOSS_GAMMA=2.0, 16 | FOCAL_LOSS_ALPHA=0.25, 17 | IOU_LOSS_TYPE="giou", 18 | CENTER_SAMPLING_RADIUS=1.5, 19 | OBJECT_SIZES_OF_INTEREST=[ 20 | [-1, 64], 21 | [64, 128], 22 | [128, 256], 23 | [256, 512], 24 | [512, float("inf")], 25 | ], 26 | NORM_SYNC=True, 27 | ), 28 | ), 29 | DATASETS=dict( 30 | TRAIN=("coco_2017_train",), 31 | TEST=("coco_2017_val",), 32 | ), 33 | SOLVER=dict( 34 | IMS_PER_BATCH=16, 35 | BASE_LR=0.01, 36 | STEPS=(60000, 80000), 37 | MAX_ITER=90000, 38 | ), 39 | INPUT=dict( 40 | AUG=dict( 41 | TRAIN_PIPELINES=[ 42 | ("ResizeShortestEdge", 43 | dict(short_edge_length=(800,), max_size=1333, sample_style="choice")), 44 | ("RandomFlip", dict()), 45 | ], 46 | TEST_PIPELINES=[ 47 | ("ResizeShortestEdge", 48 | dict(short_edge_length=800, max_size=1333, sample_style="choice")), 49 | ], 50 | ) 51 | ), 52 | OUTPUT_DIR=osp.join( 53 | '/data/Outputs/model_logs/cvpods_playground', 54 | osp.split(osp.realpath(__file__))[0].split("playground/")[-1] 55 | ), 56 | ) 57 | 58 | 59 | class CustomFCOSConfig(FCOSConfig): 60 | def __init__(self): 61 | super(CustomFCOSConfig, self).__init__() 62 | self._register_configuration(_config_dict) 63 | 64 | 65 | config = CustomFCOSConfig() 66 | -------------------------------------------------------------------------------- /cvpods/layers/csrc/tree_filter/refine.hpp: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | 4 | extern std::tuple 5 | tree_filter_refine_forward( 6 | const at::Tensor & feature_in_tensor, 7 | const at::Tensor & edge_weight_tensor, 8 | const at::Tensor & self_weight_tensor, 9 | const at::Tensor & sorted_index_tensor, 10 | const at::Tensor & sorted_parent_index_tensor, 11 | const at::Tensor & sorted_child_index_tensor 12 | ); 13 | 14 | extern at::Tensor tree_filter_refine_backward_feature( 15 | const at::Tensor & feature_in_tensor, 16 | const at::Tensor & edge_weight_tensor, 17 | const at::Tensor & self_weight_tensor, 18 | const at::Tensor & sorted_index_tensor, 19 | const at::Tensor & sorted_parent_tensor, 20 | const at::Tensor & sorted_child_tensor, 21 | const at::Tensor & feature_aggr_tensor, 22 | const at::Tensor & feature_aggr_up_tensor, 23 | const at::Tensor & grad_out_tensor 24 | ); 25 | 26 | extern at::Tensor tree_filter_refine_backward_edge_weight( 27 | const at::Tensor & feature_in_tensor, 28 | const at::Tensor & edge_weight_tensor, 29 | const at::Tensor & self_weight_tensor, 30 | const at::Tensor & sorted_index_tensor, 31 | const at::Tensor & sorted_parent_tensor, 32 | const at::Tensor & sorted_child_tensor, 33 | const at::Tensor & feature_aggr_tensor, 34 | const at::Tensor & feature_aggr_up_tensor, 35 | const at::Tensor & grad_out_tensor 36 | ); 37 | 38 | extern at::Tensor tree_filter_refine_backward_self_weight( 39 | const at::Tensor & feature_in_tensor, 40 | const at::Tensor & edge_weight_tensor, 41 | const at::Tensor & self_weight_tensor, 42 | const at::Tensor & sorted_index_tensor, 43 | const at::Tensor & sorted_parent_tensor, 44 | const at::Tensor & sorted_child_tensor, 45 | const at::Tensor & feature_aggr_tensor, 46 | const at::Tensor & feature_aggr_up_tensor, 47 | const at::Tensor & grad_out_tensor 48 | ); 49 | 50 | -------------------------------------------------------------------------------- /cvpods_playground/fcos.res50.1x.fix.d4/config.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | 3 | from cvpods.configs.fcos_config import FCOSConfig 4 | 5 | _config_dict = dict( 6 | MODEL=dict( 7 | WEIGHTS="detectron2://ImageNetPretrained/MSRA/R-50.pkl", 8 | RESNETS=dict(DEPTH=50), 9 | FCOS=dict( 10 | NUM_CONVS=8, 11 | CENTERNESS_ON_REG=True, 12 | NORM_REG_TARGETS=True, 13 | NMS_THRESH_TEST=0.6, 14 | BBOX_REG_WEIGHTS=(1.0, 1.0, 1.0, 1.0), 15 | FOCAL_LOSS_GAMMA=2.0, 16 | FOCAL_LOSS_ALPHA=0.25, 17 | IOU_LOSS_TYPE="giou", 18 | CENTER_SAMPLING_RADIUS=1.5, 19 | OBJECT_SIZES_OF_INTEREST=[ 20 | [-1, 64], 21 | [64, 128], 22 | [128, 256], 23 | [256, 512], 24 | [512, float("inf")], 25 | ], 26 | NORM_SYNC=True, 27 | RESIZE_METHOD="bilinear", 28 | ), 29 | ), 30 | DATASETS=dict( 31 | TRAIN=("coco_2017_train",), 32 | TEST=("coco_2017_val",), 33 | ), 34 | SOLVER=dict( 35 | IMS_PER_BATCH=16, 36 | BASE_LR=0.01, 37 | STEPS=(60000, 80000), 38 | MAX_ITER=90000, 39 | ), 40 | INPUT=dict( 41 | AUG=dict( 42 | TRAIN_PIPELINES=[ 43 | ("ResizeShortestEdge", 44 | dict(short_edge_length=(800,), max_size=1333, sample_style="choice")), 45 | ("RandomFlip", dict()), 46 | ], 47 | TEST_PIPELINES=[ 48 | ("ResizeShortestEdge", 49 | dict(short_edge_length=800, max_size=1333, sample_style="choice")), 50 | ], 51 | ) 52 | ), 53 | OUTPUT_DIR=osp.join( 54 | '/data/Outputs/model_logs/cvpods_playground', 55 | osp.split(osp.realpath(__file__))[0].split("playground/")[-1] 56 | ), 57 | ) 58 | 59 | 60 | class CustomFCOSConfig(FCOSConfig): 61 | def __init__(self): 62 | super(CustomFCOSConfig, self).__init__() 63 | self._register_configuration(_config_dict) 64 | 65 | 66 | config = CustomFCOSConfig() 67 | -------------------------------------------------------------------------------- /cvpods_playground/fcos.res50.1x.fix.d8/config.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | 3 | from cvpods.configs.fcos_config import FCOSConfig 4 | 5 | _config_dict = dict( 6 | MODEL=dict( 7 | WEIGHTS="detectron2://ImageNetPretrained/MSRA/R-50.pkl", 8 | RESNETS=dict(DEPTH=50), 9 | FCOS=dict( 10 | NUM_CONVS=16, 11 | CENTERNESS_ON_REG=True, 12 | NORM_REG_TARGETS=True, 13 | NMS_THRESH_TEST=0.6, 14 | BBOX_REG_WEIGHTS=(1.0, 1.0, 1.0, 1.0), 15 | FOCAL_LOSS_GAMMA=2.0, 16 | FOCAL_LOSS_ALPHA=0.25, 17 | IOU_LOSS_TYPE="giou", 18 | CENTER_SAMPLING_RADIUS=1.5, 19 | OBJECT_SIZES_OF_INTEREST=[ 20 | [-1, 64], 21 | [64, 128], 22 | [128, 256], 23 | [256, 512], 24 | [512, float("inf")], 25 | ], 26 | NORM_SYNC=True, 27 | RESIZE_METHOD="bilinear", 28 | ), 29 | ), 30 | DATASETS=dict( 31 | TRAIN=("coco_2017_train",), 32 | TEST=("coco_2017_val",), 33 | ), 34 | SOLVER=dict( 35 | IMS_PER_BATCH=16, 36 | BASE_LR=0.01, 37 | STEPS=(60000, 80000), 38 | MAX_ITER=90000, 39 | ), 40 | INPUT=dict( 41 | AUG=dict( 42 | TRAIN_PIPELINES=[ 43 | ("ResizeShortestEdge", 44 | dict(short_edge_length=(800,), max_size=1333, sample_style="choice")), 45 | ("RandomFlip", dict()), 46 | ], 47 | TEST_PIPELINES=[ 48 | ("ResizeShortestEdge", 49 | dict(short_edge_length=800, max_size=1333, sample_style="choice")), 50 | ], 51 | ) 52 | ), 53 | OUTPUT_DIR=osp.join( 54 | '/data/Outputs/model_logs/cvpods_playground', 55 | osp.split(osp.realpath(__file__))[0].split("playground/")[-1] 56 | ), 57 | ) 58 | 59 | 60 | class CustomFCOSConfig(FCOSConfig): 61 | def __init__(self): 62 | super(CustomFCOSConfig, self).__init__() 63 | self._register_configuration(_config_dict) 64 | 65 | 66 | config = CustomFCOSConfig() 67 | -------------------------------------------------------------------------------- /cvpods/layers/deform_conv_with_off.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding:utf-8 -*- 3 | import torch 4 | import torch.nn as nn 5 | 6 | from .deform_conv import DeformConv, ModulatedDeformConv 7 | 8 | 9 | class DeformConvWithOff(nn.Module): 10 | 11 | def __init__(self, in_channels, out_channels, 12 | kernel_size=3, stride=1, padding=1, 13 | dilation=1, deformable_groups=1): 14 | super(DeformConvWithOff, self).__init__() 15 | self.offset_conv = nn.Conv2d( 16 | in_channels, 17 | deformable_groups * 2 * kernel_size * kernel_size, 18 | kernel_size=kernel_size, 19 | stride=stride, 20 | padding=padding, 21 | ) 22 | self.dcn = DeformConv( 23 | in_channels, out_channels, kernel_size=kernel_size, 24 | stride=stride, padding=padding, dilation=dilation, 25 | deformable_groups=deformable_groups, 26 | ) 27 | 28 | def forward(self, input): 29 | offset = self.offset_conv(input) 30 | output = self.dcn(input, offset) 31 | return output 32 | 33 | 34 | class ModulatedDeformConvWithOff(nn.Module): 35 | 36 | def __init__(self, in_channels, out_channels, 37 | kernel_size=3, stride=1, padding=1, 38 | dilation=1, deformable_groups=1): 39 | super(ModulatedDeformConvWithOff, self).__init__() 40 | self.offset_mask_conv = nn.Conv2d( 41 | in_channels, 42 | deformable_groups * 3 * kernel_size * kernel_size, 43 | kernel_size=kernel_size, 44 | stride=stride, 45 | padding=padding, 46 | ) 47 | self.dcnv2 = ModulatedDeformConv( 48 | in_channels, out_channels, kernel_size=kernel_size, 49 | stride=stride, padding=padding, dilation=dilation, 50 | deformable_groups=deformable_groups, 51 | ) 52 | 53 | def forward(self, input): 54 | x = self.offset_mask_conv(input) 55 | o1, o2, mask = torch.chunk(x, 3, dim=1) 56 | offset = torch.cat((o1, o2), dim=1) 57 | mask = torch.sigmoid(mask) 58 | output = self.dcnv2(input, offset, mask) 59 | return output 60 | -------------------------------------------------------------------------------- /cvpods/modeling/proposal_generator/proposal_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | import math 3 | 4 | import torch 5 | 6 | from cvpods.structures import Instances 7 | 8 | 9 | def add_ground_truth_to_proposals(gt_boxes, proposals): 10 | """ 11 | Call `add_ground_truth_to_proposals_single_image` for all images. 12 | 13 | Args: 14 | gt_boxes(list[Boxes]): list of N elements. Element i is a Boxes 15 | representing the gound-truth for image i. 16 | proposals (list[Instances]): list of N elements. Element i is a Instances 17 | representing the proposals for image i. 18 | 19 | Returns: 20 | list[Instances]: list of N Instances. Each is the proposals for the image, 21 | with field "proposal_boxes" and "objectness_logits". 22 | """ 23 | assert gt_boxes is not None 24 | 25 | assert len(proposals) == len(gt_boxes) 26 | if len(proposals) == 0: 27 | return proposals 28 | 29 | return [ 30 | add_ground_truth_to_proposals_single_image(gt_boxes_i, proposals_i) 31 | for gt_boxes_i, proposals_i in zip(gt_boxes, proposals) 32 | ] 33 | 34 | 35 | def add_ground_truth_to_proposals_single_image(gt_boxes, proposals): 36 | """ 37 | Augment `proposals` with ground-truth boxes from `gt_boxes`. 38 | 39 | Args: 40 | Same as `add_ground_truth_to_proposals`, but with gt_boxes and proposals 41 | per image. 42 | 43 | Returns: 44 | Same as `add_ground_truth_to_proposals`, but for only one image. 45 | """ 46 | device = proposals.objectness_logits.device 47 | # Concatenating gt_boxes with proposals requires them to have the same fields 48 | # Assign all ground-truth boxes an objectness logit corresponding to P(object) \approx 1. 49 | gt_logit_value = math.log((1.0 - 1e-10) / (1 - (1.0 - 1e-10))) 50 | 51 | gt_logits = gt_logit_value * torch.ones(len(gt_boxes), device=device) 52 | gt_proposal = Instances(proposals.image_size) 53 | 54 | gt_proposal.proposal_boxes = gt_boxes 55 | gt_proposal.objectness_logits = gt_logits 56 | new_proposals = Instances.cat([proposals, gt_proposal]) 57 | 58 | return new_proposals 59 | -------------------------------------------------------------------------------- /cvpods/data/samplers/grouped_batch_sampler.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | import numpy as np 3 | 4 | from torch.utils.data.sampler import BatchSampler, Sampler 5 | 6 | from ..registry import SAMPLERS 7 | 8 | 9 | @SAMPLERS.register() 10 | class GroupedBatchSampler(BatchSampler): 11 | """ 12 | Wraps another sampler to yield a mini-batch of indices. 13 | It enforces that the batch only contain elements from the same group. 14 | It also tries to provide mini-batches which follows an ordering which is 15 | as close as possible to the ordering from the original sampler. 16 | """ 17 | 18 | def __init__(self, sampler, group_ids, batch_size): 19 | """ 20 | Args: 21 | sampler (Sampler): Base sampler. 22 | group_ids (list[int]): If the sampler produces indices in range [0, N), 23 | `group_ids` must be a list of `N` ints which contains the group id of each sample. 24 | The group ids must be a set of integers in the range [0, num_groups). 25 | batch_size (int): Size of mini-batch. 26 | """ 27 | if not isinstance(sampler, Sampler): 28 | raise ValueError( 29 | "sampler should be an instance of " 30 | "torch.utils.data.Sampler, but got sampler={}".format(sampler) 31 | ) 32 | self.sampler = sampler 33 | self.group_ids = np.asarray(group_ids) 34 | assert self.group_ids.ndim == 1 35 | self.batch_size = batch_size 36 | groups = np.unique(self.group_ids).tolist() 37 | 38 | # buffer the indices of each group until batch size is reached 39 | self.buffer_per_group = {k: [] for k in groups} 40 | 41 | def __iter__(self): 42 | for idx in self.sampler: 43 | group_id = self.group_ids[idx] 44 | group_buffer = self.buffer_per_group[group_id] 45 | group_buffer.append(idx) 46 | if len(group_buffer) == self.batch_size: 47 | yield group_buffer[:] # yield a copy of the list 48 | del group_buffer[:] 49 | 50 | def __len__(self): 51 | raise NotImplementedError("len() of GroupedBatchSampler is not well-defined.") 52 | -------------------------------------------------------------------------------- /cvpods/layers/csrc/tree_filter/rst.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include 7 | #include 8 | 9 | #include 10 | #include 11 | #include 12 | 13 | #include "boruvka_rst.hpp" 14 | 15 | static void forward_kernel(int * edge_index, float * edge_weight, int * edge_out, int vertex_count, int edge_count){ 16 | struct Graph * g = create_graph(vertex_count, edge_count); 17 | for (int i = 0; i < edge_count; ++i){ 18 | g->edge[i].src = edge_index[i * 2]; 19 | g->edge[i].dest = edge_index[i * 2 + 1]; 20 | g->edge[i].weight = edge_weight[i]; 21 | } 22 | 23 | boruvka_rst(g, edge_out); 24 | 25 | delete[] g->edge; 26 | delete[] g; 27 | } 28 | 29 | at::Tensor rst_forward( 30 | const at::Tensor & edge_index_tensor, 31 | const at::Tensor & edge_weight_tensor, 32 | int vertex_count){ 33 | unsigned batch_size = edge_index_tensor.size(0); 34 | unsigned edge_count = edge_index_tensor.size(1); 35 | 36 | auto edge_index_cpu = edge_index_tensor.cpu(); 37 | auto edge_weight_cpu = edge_weight_tensor.cpu(); 38 | auto edge_out_cpu = at::empty({batch_size, vertex_count - 1, 2}, edge_index_cpu.options()); 39 | 40 | int * edge_out = edge_out_cpu.contiguous().data(); 41 | int * edge_index = edge_index_cpu.contiguous().data(); 42 | float * edge_weight = edge_weight_cpu.contiguous().data(); 43 | 44 | // Loop for batch 45 | std::thread pids[batch_size]; 46 | for (unsigned i = 0; i < batch_size; i++){ 47 | auto edge_index_iter = edge_index + i * edge_count * 2; 48 | auto edge_weight_iter = edge_weight + i * edge_count; 49 | auto edge_out_iter = edge_out + i * (vertex_count - 1) * 2; 50 | pids[i] = std::thread(forward_kernel, edge_index_iter, edge_weight_iter, edge_out_iter, vertex_count, edge_count); 51 | } 52 | 53 | for (unsigned i = 0; i < batch_size; i++){ 54 | pids[i].join(); 55 | } 56 | 57 | auto edge_out_tensor = edge_out_cpu.to(edge_index_tensor.device()); 58 | 59 | return edge_out_tensor; 60 | } 61 | 62 | -------------------------------------------------------------------------------- /cvpods/configs/fcos_config.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- encoding: utf-8 -*- 3 | ''' 4 | @File : fcos_config.py 5 | @Time : 2020/05/07 23:56:09 6 | @Author : Benjin Zhu 7 | @Contact : poodarchu@gmail.com 8 | @Last Modified by : Benjin Zhu 9 | @Last Modified time : 2020/05/07 23:56:09 10 | ''' 11 | 12 | from .base_detection_config import BaseDetectionConfig 13 | 14 | _config_dict = dict( 15 | MODEL=dict( 16 | # META_ARCHITECTURE="RetinaNet", 17 | RESNETS=dict(OUT_FEATURES=["res3", "res4", "res5"]), 18 | FPN=dict(IN_FEATURES=["res3", "res4", "res5"]), 19 | SHIFT_GENERATOR=dict( 20 | NUM_SHIFTS=1, 21 | # Relative offset between the center of the first shift and the top-left corner of img 22 | # Units: fraction of feature map stride (e.g., 0.5 means half stride) 23 | # Allowed values are floats in [0, 1) range inclusive. 24 | # Recommended value is 0.5, although it is not expected to affect model accuracy. 25 | OFFSET=0.0, 26 | ), 27 | FCOS=dict( 28 | NUM_CLASSES=80, 29 | IN_FEATURES=["p3", "p4", "p5", "p6", "p7"], 30 | NUM_CONVS=4, 31 | FPN_STRIDES=[8, 16, 32, 64, 128], 32 | PRIOR_PROB=0.01, 33 | CENTERNESS_ON_REG=False, 34 | NORM_REG_TARGETS=False, 35 | SCORE_THRESH_TEST=0.05, 36 | TOPK_CANDIDATES_TEST=1000, 37 | NMS_THRESH_TEST=0.6, 38 | BBOX_REG_WEIGHTS=(1.0, 1.0, 1.0, 1.0), 39 | FOCAL_LOSS_GAMMA=2.0, 40 | FOCAL_LOSS_ALPHA=0.25, 41 | IOU_LOSS_TYPE="iou", 42 | CENTER_SAMPLING_RADIUS=0.0, 43 | OBJECT_SIZES_OF_INTEREST=[ 44 | [-1, 64], 45 | [64, 128], 46 | [128, 256], 47 | [256, 512], 48 | [512, float("inf")], 49 | ], 50 | NORM_SYNC=True, 51 | BUDGET_LOSS_LAMBDA=0, 52 | ), 53 | ), 54 | ) 55 | 56 | 57 | class FCOSConfig(BaseDetectionConfig): 58 | def __init__(self): 59 | super(FCOSConfig, self).__init__() 60 | self._register_configuration(_config_dict) 61 | 62 | 63 | config = FCOSConfig() 64 | -------------------------------------------------------------------------------- /cvpods/layers/swap_align2nat.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | from torch import nn 3 | from torch.autograd import Function 4 | from torch.autograd.function import once_differentiable 5 | 6 | from cvpods import _C 7 | 8 | 9 | class _SwapAlign2Nat(Function): 10 | @staticmethod 11 | def forward(ctx, X, lambda_val, pad_val): 12 | ctx.lambda_val = lambda_val 13 | ctx.input_shape = X.size() 14 | 15 | Y = _C.swap_align2nat_forward(X, lambda_val, pad_val) 16 | return Y 17 | 18 | @staticmethod 19 | @once_differentiable 20 | def backward(ctx, gY): 21 | lambda_val = ctx.lambda_val 22 | bs, ch, h, w = ctx.input_shape 23 | 24 | gX = _C.swap_align2nat_backward(gY, lambda_val, bs, ch, h, w) 25 | 26 | return gX, None, None 27 | 28 | 29 | swap_align2nat = _SwapAlign2Nat.apply 30 | 31 | 32 | class SwapAlign2Nat(nn.Module): 33 | """ 34 | The op `SwapAlign2Nat` described in https://arxiv.org/abs/1903.12174. 35 | Given an input tensor that predicts masks of shape (N, C=VxU, H, W), 36 | apply the op, it will return masks of shape (N, V'xU', H', W') where 37 | the unit lengths of (V, U) and (H, W) are swapped, and the mask representation 38 | is transformed from aligned to natural. 39 | Args: 40 | lambda_val (int): the relative unit length ratio between (V, U) and (H, W), 41 | as we always have larger unit lengths for (V, U) than (H, W), 42 | lambda_val is always >= 1. 43 | pad_val (float): padding value for the values falling outside of the input 44 | tensor, default set to -6 as sigmoid(-6) is ~0, indicating 45 | that is no masks outside of the tensor. 46 | """ 47 | 48 | def __init__(self, lambda_val, pad_val=-6.0): 49 | super(SwapAlign2Nat, self).__init__() 50 | self.lambda_val = lambda_val 51 | self.pad_val = pad_val 52 | 53 | def forward(self, X): 54 | return swap_align2nat(X, self.lambda_val, self.pad_val) 55 | 56 | def __repr__(self): 57 | tmpstr = self.__class__.__name__ + "(" 58 | tmpstr += "lambda_val=" + str(self.lambda_val) 59 | tmpstr += ", pad_val=" + str(self.pad_val) 60 | tmpstr += ")" 61 | return tmpstr 62 | -------------------------------------------------------------------------------- /cvpods_playground/fcos.res50.1x.dynamic.d4.lambda-0_1/config.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | 3 | from cvpods.configs.fcos_config import FCOSConfig 4 | 5 | _config_dict = dict( 6 | MODEL=dict( 7 | WEIGHTS="detectron2://ImageNetPretrained/MSRA/R-50.pkl", 8 | RESNETS=dict(DEPTH=50), 9 | FCOS=dict( 10 | NUM_CONVS=8, 11 | CENTERNESS_ON_REG=True, 12 | NORM_REG_TARGETS=True, 13 | NMS_THRESH_TEST=0.6, 14 | BBOX_REG_WEIGHTS=(1.0, 1.0, 1.0, 1.0), 15 | FOCAL_LOSS_GAMMA=2.0, 16 | FOCAL_LOSS_ALPHA=0.25, 17 | IOU_LOSS_TYPE="giou", 18 | CENTER_SAMPLING_RADIUS=1.5, 19 | OBJECT_SIZES_OF_INTEREST=[ 20 | [-1, 64], 21 | [64, 128], 22 | [128, 256], 23 | [256, 512], 24 | [512, float("inf")], 25 | ], 26 | NORM_SYNC=True, 27 | NUM_GROUPS=1, 28 | GATE_ACTIVATION="GeReTanH", 29 | GATE_ACTIVATION_KARGS=dict(tau=1.5), 30 | RESIZE_METHOD="bilinear", 31 | BUDGET_LOSS_LAMBDA=0.1, 32 | ), 33 | ), 34 | DATASETS=dict( 35 | TRAIN=("coco_2017_train",), 36 | TEST=("coco_2017_val",), 37 | ), 38 | SOLVER=dict( 39 | IMS_PER_BATCH=16, 40 | BASE_LR=0.01, 41 | STEPS=(60000, 80000), 42 | MAX_ITER=90000, 43 | ), 44 | INPUT=dict( 45 | AUG=dict( 46 | TRAIN_PIPELINES=[ 47 | ("ResizeShortestEdge", 48 | dict(short_edge_length=(800,), max_size=1333, sample_style="choice")), 49 | ("RandomFlip", dict()), 50 | ], 51 | TEST_PIPELINES=[ 52 | ("ResizeShortestEdge", 53 | dict(short_edge_length=800, max_size=1333, sample_style="choice")), 54 | ], 55 | ) 56 | ), 57 | OUTPUT_DIR=osp.join( 58 | '/data/Outputs/model_logs/cvpods_playground', 59 | osp.split(osp.realpath(__file__))[0].split("playground/")[-1] 60 | ), 61 | ) 62 | 63 | 64 | class CustomFCOSConfig(FCOSConfig): 65 | def __init__(self): 66 | super(CustomFCOSConfig, self).__init__() 67 | self._register_configuration(_config_dict) 68 | 69 | 70 | config = CustomFCOSConfig() 71 | -------------------------------------------------------------------------------- /cvpods_playground/fcos.res50.1x.dynamic.d8.lambda-0/config.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | 3 | from cvpods.configs.fcos_config import FCOSConfig 4 | 5 | _config_dict = dict( 6 | MODEL=dict( 7 | WEIGHTS="detectron2://ImageNetPretrained/MSRA/R-50.pkl", 8 | RESNETS=dict(DEPTH=50), 9 | FCOS=dict( 10 | NUM_CONVS=16, 11 | CENTERNESS_ON_REG=True, 12 | NORM_REG_TARGETS=True, 13 | NMS_THRESH_TEST=0.6, 14 | BBOX_REG_WEIGHTS=(1.0, 1.0, 1.0, 1.0), 15 | FOCAL_LOSS_GAMMA=2.0, 16 | FOCAL_LOSS_ALPHA=0.25, 17 | IOU_LOSS_TYPE="giou", 18 | CENTER_SAMPLING_RADIUS=1.5, 19 | OBJECT_SIZES_OF_INTEREST=[ 20 | [-1, 64], 21 | [64, 128], 22 | [128, 256], 23 | [256, 512], 24 | [512, float("inf")], 25 | ], 26 | NORM_SYNC=True, 27 | NUM_GROUPS=1, 28 | GATE_ACTIVATION="GeReTanH", 29 | GATE_ACTIVATION_KARGS=dict(tau=1.5), 30 | RESIZE_METHOD="bilinear", 31 | BUDGET_LOSS_LAMBDA=0.0, 32 | ), 33 | ), 34 | DATASETS=dict( 35 | TRAIN=("coco_2017_train",), 36 | TEST=("coco_2017_val",), 37 | ), 38 | SOLVER=dict( 39 | IMS_PER_BATCH=16, 40 | BASE_LR=0.01, 41 | STEPS=(60000, 80000), 42 | MAX_ITER=90000, 43 | ), 44 | INPUT=dict( 45 | AUG=dict( 46 | TRAIN_PIPELINES=[ 47 | ("ResizeShortestEdge", 48 | dict(short_edge_length=(800,), max_size=1333, sample_style="choice")), 49 | ("RandomFlip", dict()), 50 | ], 51 | TEST_PIPELINES=[ 52 | ("ResizeShortestEdge", 53 | dict(short_edge_length=800, max_size=1333, sample_style="choice")), 54 | ], 55 | ) 56 | ), 57 | OUTPUT_DIR=osp.join( 58 | '/data/Outputs/model_logs/cvpods_playground', 59 | osp.split(osp.realpath(__file__))[0].split("playground/")[-1] 60 | ), 61 | ) 62 | 63 | 64 | class CustomFCOSConfig(FCOSConfig): 65 | def __init__(self): 66 | super(CustomFCOSConfig, self).__init__() 67 | self._register_configuration(_config_dict) 68 | 69 | 70 | config = CustomFCOSConfig() 71 | -------------------------------------------------------------------------------- /cvpods_playground/fcos.res50.1x.dynamic.d8.lambda-0_1/config.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | 3 | from cvpods.configs.fcos_config import FCOSConfig 4 | 5 | _config_dict = dict( 6 | MODEL=dict( 7 | WEIGHTS="detectron2://ImageNetPretrained/MSRA/R-50.pkl", 8 | RESNETS=dict(DEPTH=50), 9 | FCOS=dict( 10 | NUM_CONVS=16, 11 | CENTERNESS_ON_REG=True, 12 | NORM_REG_TARGETS=True, 13 | NMS_THRESH_TEST=0.6, 14 | BBOX_REG_WEIGHTS=(1.0, 1.0, 1.0, 1.0), 15 | FOCAL_LOSS_GAMMA=2.0, 16 | FOCAL_LOSS_ALPHA=0.25, 17 | IOU_LOSS_TYPE="giou", 18 | CENTER_SAMPLING_RADIUS=1.5, 19 | OBJECT_SIZES_OF_INTEREST=[ 20 | [-1, 64], 21 | [64, 128], 22 | [128, 256], 23 | [256, 512], 24 | [512, float("inf")], 25 | ], 26 | NORM_SYNC=True, 27 | NUM_GROUPS=1, 28 | GATE_ACTIVATION="GeReTanH", 29 | GATE_ACTIVATION_KARGS=dict(tau=1.5), 30 | RESIZE_METHOD="bilinear", 31 | BUDGET_LOSS_LAMBDA=0.1, 32 | ), 33 | ), 34 | DATASETS=dict( 35 | TRAIN=("coco_2017_train",), 36 | TEST=("coco_2017_val",), 37 | ), 38 | SOLVER=dict( 39 | IMS_PER_BATCH=16, 40 | BASE_LR=0.01, 41 | STEPS=(60000, 80000), 42 | MAX_ITER=90000, 43 | ), 44 | INPUT=dict( 45 | AUG=dict( 46 | TRAIN_PIPELINES=[ 47 | ("ResizeShortestEdge", 48 | dict(short_edge_length=(800,), max_size=1333, sample_style="choice")), 49 | ("RandomFlip", dict()), 50 | ], 51 | TEST_PIPELINES=[ 52 | ("ResizeShortestEdge", 53 | dict(short_edge_length=800, max_size=1333, sample_style="choice")), 54 | ], 55 | ) 56 | ), 57 | OUTPUT_DIR=osp.join( 58 | '/data/Outputs/model_logs/cvpods_playground', 59 | osp.split(osp.realpath(__file__))[0].split("playground/")[-1] 60 | ), 61 | ) 62 | 63 | 64 | class CustomFCOSConfig(FCOSConfig): 65 | def __init__(self): 66 | super(CustomFCOSConfig, self).__init__() 67 | self._register_configuration(_config_dict) 68 | 69 | 70 | config = CustomFCOSConfig() 71 | -------------------------------------------------------------------------------- /cvpods_playground/fcos.res50.1x.dynamic.d8.lambda-0_8/config.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | 3 | from cvpods.configs.fcos_config import FCOSConfig 4 | 5 | _config_dict = dict( 6 | MODEL=dict( 7 | WEIGHTS="detectron2://ImageNetPretrained/MSRA/R-50.pkl", 8 | RESNETS=dict(DEPTH=50), 9 | FCOS=dict( 10 | NUM_CONVS=16, 11 | CENTERNESS_ON_REG=True, 12 | NORM_REG_TARGETS=True, 13 | NMS_THRESH_TEST=0.6, 14 | BBOX_REG_WEIGHTS=(1.0, 1.0, 1.0, 1.0), 15 | FOCAL_LOSS_GAMMA=2.0, 16 | FOCAL_LOSS_ALPHA=0.25, 17 | IOU_LOSS_TYPE="giou", 18 | CENTER_SAMPLING_RADIUS=1.5, 19 | OBJECT_SIZES_OF_INTEREST=[ 20 | [-1, 64], 21 | [64, 128], 22 | [128, 256], 23 | [256, 512], 24 | [512, float("inf")], 25 | ], 26 | NORM_SYNC=True, 27 | NUM_GROUPS=1, 28 | GATE_ACTIVATION="GeReTanH", 29 | GATE_ACTIVATION_KARGS=dict(tau=1.5), 30 | RESIZE_METHOD="bilinear", 31 | BUDGET_LOSS_LAMBDA=0.8, 32 | ), 33 | ), 34 | DATASETS=dict( 35 | TRAIN=("coco_2017_train",), 36 | TEST=("coco_2017_val",), 37 | ), 38 | SOLVER=dict( 39 | IMS_PER_BATCH=16, 40 | BASE_LR=0.01, 41 | STEPS=(60000, 80000), 42 | MAX_ITER=90000, 43 | ), 44 | INPUT=dict( 45 | AUG=dict( 46 | TRAIN_PIPELINES=[ 47 | ("ResizeShortestEdge", 48 | dict(short_edge_length=(800,), max_size=1333, sample_style="choice")), 49 | ("RandomFlip", dict()), 50 | ], 51 | TEST_PIPELINES=[ 52 | ("ResizeShortestEdge", 53 | dict(short_edge_length=800, max_size=1333, sample_style="choice")), 54 | ], 55 | ) 56 | ), 57 | OUTPUT_DIR=osp.join( 58 | '/data/Outputs/model_logs/cvpods_playground', 59 | osp.split(osp.realpath(__file__))[0].split("playground/")[-1] 60 | ), 61 | ) 62 | 63 | 64 | class CustomFCOSConfig(FCOSConfig): 65 | def __init__(self): 66 | super(CustomFCOSConfig, self).__init__() 67 | self._register_configuration(_config_dict) 68 | 69 | 70 | config = CustomFCOSConfig() 71 | -------------------------------------------------------------------------------- /tools/dev/parse_results.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 3 | 4 | # A shell script that parses metrics from the log file. 5 | # Make it easier for developers to track performance of models. 6 | 7 | LOG="$1" 8 | 9 | if [[ -z "$LOG" ]]; then 10 | echo "Usage: $0 /path/to/log/file" 11 | exit 1 12 | fi 13 | 14 | # [12/15 11:47:32] trainer INFO: Total training time: 12:15:04.446477 (0.4900 s / it) 15 | # [12/15 11:49:03] inference INFO: Total inference time: 0:01:25.326167 (0.13652186737060548 s / img per device, on 8 devices) 16 | # [12/15 11:49:03] inference INFO: Total inference pure compute time: ..... 17 | 18 | # training time 19 | trainspeed=$(grep -o 'Overall training.*' "$LOG" | grep -Eo '\(.*\)' | grep -o '[0-9\.]*') 20 | echo "Training speed: $trainspeed s/it" 21 | 22 | # inference time: there could be multiple inference during training 23 | inferencespeed=$(grep -o 'Total inference pure.*' "$LOG" | tail -n1 | grep -Eo '\(.*\)' | grep -o '[0-9\.]*' | head -n1) 24 | echo "Inference speed: $inferencespeed s/it" 25 | 26 | # [12/15 11:47:18] trainer INFO: eta: 0:00:00 iter: 90000 loss: 0.5407 (0.7256) loss_classifier: 0.1744 (0.2446) loss_box_reg: 0.0838 (0.1160) loss_mask: 0.2159 (0.2722) loss_objectness: 0.0244 (0.0429) loss_rpn_box_reg: 0.0279 (0.0500) time: 0.4487 (0.4899) data: 0.0076 (0.0975) lr: 0.000200 max mem: 4161 27 | memory=$(grep -o 'max[_ ]mem: [0-9]*' "$LOG" | tail -n1 | grep -o '[0-9]*') 28 | echo "Training memory: $memory MB" 29 | 30 | echo "Easy to copypaste:" 31 | echo "$trainspeed","$inferencespeed","$memory" 32 | 33 | echo "------------------------------" 34 | 35 | # [12/26 17:26:32] engine.coco_evaluation: copypaste: Task: bbox 36 | # [12/26 17:26:32] engine.coco_evaluation: copypaste: AP,AP50,AP75,APs,APm,APl 37 | # [12/26 17:26:32] engine.coco_evaluation: copypaste: 0.0017,0.0024,0.0017,0.0005,0.0019,0.0011 38 | # [12/26 17:26:32] engine.coco_evaluation: copypaste: Task: segm 39 | # [12/26 17:26:32] engine.coco_evaluation: copypaste: AP,AP50,AP75,APs,APm,APl 40 | # [12/26 17:26:32] engine.coco_evaluation: copypaste: 0.0014,0.0021,0.0016,0.0005,0.0016,0.0011 41 | 42 | echo "COCO Results:" 43 | num_tasks=$(grep -o 'copypaste:.*Task.*' "$LOG" | sort -u | wc -l) 44 | # each task has 3 lines 45 | grep -o 'copypaste:.*' "$LOG" | cut -d ' ' -f 2- | tail -n $((num_tasks * 3)) 46 | -------------------------------------------------------------------------------- /cvpods/configs/keypoint_config.py: -------------------------------------------------------------------------------- 1 | from .base_detection_config import BaseDetectionConfig 2 | 3 | _config_dict = dict( 4 | MODEL=dict( 5 | KEYPOINT_ON=True, 6 | ROI_KEYPOINT_HEAD=dict( 7 | NAME="KRCNNConvDeconvUpsampleHead", 8 | POOLER_RESOLUTION=14, 9 | POOLER_SAMPLING_RATIO=0, 10 | CONV_DIMS=tuple(512 for _ in range(8)), 11 | NUM_KEYPOINTS=17, # 17 is the number of keypoints in COCO 12 | # Images with too few (or no) keypoints are excluded from training. 13 | MIN_KEYPOINTS_PER_IMAGE=1, 14 | # Normalize by the total number of visible keypoints in the minibatch if True. 15 | # Otherwise, normalize by the total number of keypoints that could ever exist 16 | # in the minibatch. 17 | # The keypoint softmax loss is only calculated on visible keypoints. 18 | # Since the number of visible keypoints can vary significantly between 19 | # minibatches, this has the effect of up-weighting the importance of 20 | # minibatches with few visible keypoints. (Imagine the extreme case of 21 | # only one visible keypoint versus N: in the case of N, each one 22 | # contributes 1/N to the gradient compared to the single keypoint 23 | # determining the gradient direction). Instead, we can normalize the 24 | # loss by the total number of keypoints, if it were the case that all 25 | # keypoints were visible in a full minibatch. (Returning to the example, 26 | # this means that the one visible keypoint contributes as much as each 27 | # of the N keypoints.) 28 | NORMALIZE_LOSS_BY_VISIBLE_KEYPOINTS=True, 29 | # Multi-task loss weight to use for keypoints 30 | # Recommended values: 31 | # - use 1.0 if NORMALIZE_LOSS_BY_VISIBLE_KEYPOINTS is True 32 | # - use 4.0 if NORMALIZE_LOSS_BY_VISIBLE_KEYPOINTS is False 33 | LOSS_WEIGHT=1.0, 34 | # Type of pooling operation applied to the incoming feature map for each RoI 35 | POOLER_TYPE="ROIAlignV2", 36 | ), 37 | ) 38 | ) 39 | 40 | 41 | class KeypointConfig(BaseDetectionConfig): 42 | def __init__(self): 43 | super(KeypointConfig, self).__init__() 44 | self._register_configuration(_config_dict) 45 | 46 | 47 | config = KeypointConfig() 48 | -------------------------------------------------------------------------------- /cvpods/modeling/nn_utils/activation_count.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | 3 | import logging 4 | import typing 5 | from collections import defaultdict 6 | 7 | import torch.nn as nn 8 | 9 | from .jit_handles import generic_activation_jit, get_jit_model_analysis 10 | 11 | # A dictionary that maps supported operations to their activation count handles. 12 | _SUPPORTED_OPS: typing.Dict[str, typing.Callable] = { 13 | "aten::_convolution": generic_activation_jit("conv"), 14 | "aten::addmm": generic_activation_jit("addmm"), 15 | } 16 | 17 | 18 | def activation_count( 19 | model: nn.Module, 20 | inputs: typing.Tuple[object, ...], 21 | supported_ops: typing.Union[typing.Dict[str, typing.Callable], None] = None, 22 | ) -> typing.Tuple[typing.DefaultDict[str, float], typing.Counter[str]]: 23 | """ 24 | Given a model and an input to the model, compute the total number of 25 | activations of the model. Note the input should have a batch size of 1. 26 | 27 | Args: 28 | model (nn.Module): The model to compute activation counts. 29 | inputs (tuple): Inputs that are passed to `model` to count activations. 30 | Inputs need to be in a tuple. 31 | supported_ops (dict(str,Callable) or None) : By default, we count 32 | activation for convolution and fully connected layers. Users can 33 | provide customized supported_ops if desired. 34 | 35 | Returns: 36 | tuple[defaultdict, Counter]: A dictionary that records the number of 37 | activation (mega) for each operation and a Counter that records the 38 | number of skipped operations. 39 | """ 40 | assert isinstance(inputs, tuple), "Inputs need to be in a tuple." 41 | if not supported_ops: 42 | supported_ops = _SUPPORTED_OPS.copy() 43 | 44 | # Run activation count. 45 | total_activation_count, skipped_ops = get_jit_model_analysis( 46 | model, inputs, supported_ops 47 | ) 48 | 49 | # Log for skipped operations. 50 | if len(skipped_ops) > 0: 51 | for op, freq in skipped_ops.items(): 52 | logging.warning("Skipped operation {} {} time(s)".format(op, freq)) 53 | 54 | # Convert activation count to mega count. 55 | final_count = defaultdict(float) 56 | for op in total_activation_count: 57 | final_count[op] = total_activation_count[op] / 1e6 58 | 59 | return final_count, skipped_ops 60 | -------------------------------------------------------------------------------- /datasets/README.md: -------------------------------------------------------------------------------- 1 | 2 | For a few datasets that detectron2 natively supports, 3 | the datasets are assumed to exist in a directory called 4 | "datasets/", under the directory where you launch the program. 5 | They need to have the following directory structure: 6 | 7 | ## Expected dataset structure for COCO instance/keypoint detection: 8 | 9 | ``` 10 | coco/ 11 | annotations/ 12 | instances_{train,val}2017.json 13 | person_keypoints_{train,val}2017.json 14 | {train,val}2017/ 15 | # image files that are mentioned in the corresponding json 16 | ``` 17 | 18 | You can use the 2014 version of the dataset as well. 19 | 20 | Some of the builtin tests (`dev/run_*_tests.sh`) uses a tiny version of the COCO dataset, 21 | which you can download with `./prepare_for_tests.sh`. 22 | 23 | ## Expected dataset structure for PanopticFPN: 24 | 25 | ``` 26 | coco/ 27 | annotations/ 28 | panoptic_{train,val}2017.json 29 | panoptic_{train,val}2017/ 30 | # png annotations 31 | panoptic_stuff_{train,val}2017/ # generated by the script mentioned below 32 | ``` 33 | 34 | Install panopticapi by: 35 | ``` 36 | pip install git+https://github.com/cocodataset/panopticapi.git 37 | ``` 38 | Then, run `python prepare_panoptic_fpn.py`, to extract semantic annotations from panoptic annotations. 39 | 40 | ## Expected dataset structure for LVIS instance segmentation: 41 | ``` 42 | coco/ 43 | {train,val,test}2017/ 44 | lvis/ 45 | lvis_v0.5_{train,val}.json 46 | lvis_v0.5_image_info_test.json 47 | ``` 48 | 49 | Install lvis-api by: 50 | ``` 51 | pip install git+https://github.com/lvis-dataset/lvis-api.git 52 | ``` 53 | 54 | ## Expected dataset structure for cityscapes: 55 | ``` 56 | cityscapes/ 57 | gtFine/ 58 | train/ 59 | aachen/ 60 | color.png, instanceIds.png, labelIds.png, polygons.json, 61 | labelTrainIds.png 62 | ... 63 | val/ 64 | test/ 65 | leftImg8bit/ 66 | train/ 67 | val/ 68 | test/ 69 | ``` 70 | Install cityscapes scripts by: 71 | ``` 72 | pip install git+https://github.com/mcordts/cityscapesScripts.git 73 | ``` 74 | 75 | Note: 76 | labelTrainIds.png are created by `cityscapesscripts/preparation/createTrainIdLabelImgs.py`. 77 | They are not needed for instance segmentation. 78 | 79 | ## Expected dataset structure for Pascal VOC: 80 | ``` 81 | VOC20{07,12}/ 82 | Annotations/ 83 | ImageSets/ 84 | Main/ 85 | trainval.txt 86 | test.txt 87 | # train.txt or val.txt, if you use these splits 88 | JPEGImages/ 89 | ``` 90 | -------------------------------------------------------------------------------- /cvpods/layers/csrc/lars/adaptive_lr_cuda.cu: -------------------------------------------------------------------------------- 1 | #include "cuda.h" 2 | #include "cuda_runtime.h" 3 | #include "torch/extension.h" 4 | 5 | namespace cvpods { 6 | template 7 | __global__ void ComputeAdaptiveLrOnDeviceAfterTypeCheck( 8 | const scalar_t ¶m_norm, 9 | const scalar_t &grad_norm, 10 | const scalar_t weight_decay, 11 | const scalar_t eps, 12 | const scalar_t trust_coef, 13 | scalar_t *out) { 14 | // 1. The case that `param_norm` is `zero` means all elements of the parameter 15 | // are `zero` (In general, it occurs when right after the parameter initialized 16 | // as `zero`). In this case, `adaptive_lr` will be calculated as `zero`, which 17 | // may be the reason for breaking parameter updates. In this context, we construct 18 | // LARS to use only wrapped optimizer's algorithm when this situation occurs by 19 | // converting `adaptive_lr` to `one`. 20 | // 21 | // 2. The case that `grad_norm` is `zero` means all elements of the gradient are 22 | // `zero` (In general, it occurs when backward propagation doesn't work correctly). 23 | // In this case, it can be interpreted as there exists an exceptional situation, 24 | // which may result in inappropriate parameter updates. In this context, we 25 | // construct LARS to pass the responsibility of handling the exceptional case 26 | // to the wrapped optimizer when this exception occurs by converting `adaptive_lr` 27 | // to `one`. 28 | if (param_norm > 0 && grad_norm > 0) { 29 | scalar_t divisor = grad_norm + weight_decay * param_norm + eps; 30 | *out = param_norm / divisor * trust_coef; 31 | } else { 32 | *out = 1.0; 33 | } 34 | } 35 | 36 | #define CHECK_CUDA(x) AT_ASSERTM(x.type().is_cuda(), #x " must be a CUDA tensor") 37 | 38 | void ComputeAdaptiveLrOnDevice( 39 | torch::Tensor param_norm, 40 | torch::Tensor grad_norm, 41 | double weight_decay, 42 | double eps, 43 | double trust_coef, 44 | torch::Tensor out) { 45 | CHECK_CUDA(param_norm); 46 | CHECK_CUDA(grad_norm); 47 | CHECK_CUDA(out); 48 | 49 | AT_DISPATCH_FLOATING_TYPES_AND_HALF( 50 | param_norm.type(), 51 | "compute_adaptive_lr_cuda", 52 | ([&] { 53 | ComputeAdaptiveLrOnDeviceAfterTypeCheck<<<1, 1>>>( 54 | *param_norm.data(), 55 | *grad_norm.data(), 56 | weight_decay, 57 | eps, 58 | trust_coef, 59 | out.data()); 60 | })); 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /cvpods/modeling/sampling.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | import torch 3 | 4 | __all__ = ["subsample_labels"] 5 | 6 | 7 | def subsample_labels(labels, num_samples, positive_fraction, bg_label): 8 | """ 9 | Return `num_samples` (or fewer, if not enough found) 10 | random samples from `labels` which is a mixture of positives & negatives. 11 | It will try to return as many positives as possible without 12 | exceeding `positive_fraction * num_samples`, and then try to 13 | fill the remaining slots with negatives. 14 | 15 | Args: 16 | labels (Tensor): (N, ) label vector with values: 17 | * -1: ignore 18 | * bg_label: background ("negative") class 19 | * otherwise: one or more foreground ("positive") classes 20 | num_samples (int): The total number of labels with value >= 0 to return. 21 | Values that are not sampled will be filled with -1 (ignore). 22 | positive_fraction (float): The number of subsampled labels with values > 0 23 | is `min(num_positives, int(positive_fraction * num_samples))`. The number 24 | of negatives sampled is `min(num_negatives, num_samples - num_positives_sampled)`. 25 | In order words, if there are not enough positives, the sample is filled with 26 | negatives. If there are also not enough negatives, then as many elements are 27 | sampled as is possible. 28 | bg_label (int): label index of background ("negative") class. 29 | 30 | Returns: 31 | pos_idx, neg_idx (Tensor): 32 | 1D vector of indices. The total length of both is `num_samples` or fewer. 33 | """ 34 | positive = torch.nonzero((labels != -1) & (labels != bg_label), as_tuple=False).squeeze(1) 35 | negative = torch.nonzero(labels == bg_label, as_tuple=False).squeeze(1) 36 | 37 | num_pos = int(num_samples * positive_fraction) 38 | # protect against not enough positive examples 39 | num_pos = min(positive.numel(), num_pos) 40 | num_neg = num_samples - num_pos 41 | # protect against not enough negative examples 42 | num_neg = min(negative.numel(), num_neg) 43 | 44 | # randomly select positive and negative examples 45 | perm1 = torch.randperm(positive.numel(), device=positive.device)[:num_pos] 46 | perm2 = torch.randperm(negative.numel(), device=negative.device)[:num_neg] 47 | 48 | pos_idx = positive[perm1] 49 | neg_idx = negative[perm2] 50 | return pos_idx, neg_idx 51 | -------------------------------------------------------------------------------- /cvpods/configs/pointrend_config.py: -------------------------------------------------------------------------------- 1 | from .rcnn_fpn_config import RCNNFPNConfig 2 | 3 | _config_dict = dict( 4 | MODEL=dict( 5 | ROI_HEADS=dict( 6 | # NAME="PointRendROIHeads", 7 | IN_FEATURES=["p2", "p3", "p4", "p5"], 8 | ), 9 | ROI_BOX_HEAD=dict( 10 | TRAIN_ON_PRED_BOXES=True, 11 | ), 12 | ROI_MASK_HEAD=dict( 13 | # NAME="CoarseMaskHead", 14 | # Names of the input feature maps to be used by a coarse mask head. 15 | IN_FEATURES=["p2"], 16 | FC_DIM=1024, 17 | NUM_FC=2, 18 | # The side size of a coarse mask head prediction. 19 | OUTPUT_SIDE_RESOLUTION=7, 20 | # True if point head is used. 21 | POINT_HEAD_ON=True, 22 | ), 23 | POINT_HEAD=dict( 24 | # Names of the input feature maps to be used by a mask point head. 25 | IN_FEATURES=["p2"], 26 | NUM_CLASSES=80, 27 | FC_DIM=256, 28 | NUM_FC=3, 29 | # Number of points sampled during training for a mask point head. 30 | TRAIN_NUM_POINTS=14 * 14, 31 | # Oversampling parameter for PointRend point sampling during training. 32 | # Parameter `k` in the original paper. 33 | OVERSAMPLE_RATIO=3, 34 | # Importance sampling parameter for PointRend point sampling during training. 35 | # Parametr `beta` in the original paper. 36 | IMPORTANCE_SAMPLE_RATIO=0.75, 37 | # Number of subdivision steps during inference. 38 | SUBDIVISION_STEPS=5, 39 | # Maximum number of points selected at each subdivision step (N). 40 | SUBDIVISION_NUM_POINTS=28 * 28, 41 | CLS_AGNOSTIC_MASK=False, 42 | # If True, then coarse prediction features are used as inout for each layer 43 | # in PointRend's MLP. 44 | COARSE_PRED_EACH_LAYER=True, 45 | # COARSE_SEM_SEG_HEAD_NAME="SemSegFPNHead" 46 | ), 47 | ), 48 | INPUT=dict( 49 | # PointRend for instance segmenation does not work with "polygon" mask_format 50 | MASK_FORMAT="bitmask", 51 | ), 52 | DATALOADER=dict(FILTER_EMPTY_ANNOTATIONS=False,), 53 | ) 54 | 55 | 56 | class PointRendRCNNFPNConfig(RCNNFPNConfig): 57 | def __init__(self): 58 | super(PointRendRCNNFPNConfig, self).__init__() 59 | self._register_configuration(_config_dict) 60 | 61 | 62 | config = PointRendRCNNFPNConfig() 63 | -------------------------------------------------------------------------------- /cvpods/configs/solo_config.py: -------------------------------------------------------------------------------- 1 | from .base_detection_config import BaseDetectionConfig 2 | 3 | _config_dict = dict( 4 | MODEL=dict( 5 | MASK_ON=True, 6 | PIXEL_MEAN=[123.675, 116.28, 103.53], # RGB FORMAT 7 | PIXEL_STD=[1.0, 1.0, 1.0], 8 | RESNETS=dict( 9 | DEPTH=50, 10 | OUT_FEATURES=["res2", "res3", "res4", "res5"], 11 | ), 12 | FPN=dict( 13 | IN_FEATURES=["res2", "res3", "res4", "res5"], 14 | OUT_CHANNELS=256, 15 | ), 16 | SOLO=dict( 17 | NUM_CLASSES=80, 18 | IN_FEATURES=["p2", "p3", "p4", "p5", "p6"], 19 | NUM_GRIDS=[40, 36, 24, 16, 12], # per level 20 | SCALE_RANGES=((1, 64), (32, 128), (64, 256), (128, 512), (256, 2048)), 21 | FEATURE_STRIDES=[8, 8, 16, 32, 32], 22 | # Given a gt: (cx, cy, w, h), the center region is controlled by 23 | # constant scale factors sigma: (cx, cy, sigma*w, sigma*h) 24 | SIGMA=0.2, 25 | HEAD=dict( 26 | TYPE="SOLOHead", # "SOLOHead", "DecoupledSOLOHead" 27 | SEG_FEAT_CHANNELS=256, 28 | STACKED_CONVS=7, 29 | PRIOR_PROB=0.01, 30 | NORM="GN", 31 | # The following two items are useful in the "DecoupledSOLOLightHead" 32 | USE_DCN_IN_TOWER=False, 33 | DCN_TYPE=None, 34 | ), 35 | # Loss parameters: 36 | LOSS_INS=dict( 37 | TYPE='DiceLoss', 38 | LOSS_WEIGHT=3.0 39 | ), 40 | LOSS_CAT=dict( 41 | TYPE='FocalLoss', 42 | GAMMA=2.0, 43 | ALPHA=0.25, 44 | LOSS_WEIGHT=1.0, 45 | ), 46 | # Inference parameters: 47 | SCORE_THRESH_TEST=0.1, 48 | MASK_THRESH_TEST=0.5, 49 | # NMS parameters: 50 | NMS_PER_IMAGE=500, 51 | NMS_KERNEL='gaussian', # gaussian/linear 52 | NMS_SIGMA=2.0, 53 | UPDATE_THRESH=0.05, 54 | DETECTIONS_PER_IMAGE=100, 55 | ), 56 | ), 57 | INPUT=dict( 58 | # SOLO for instance segmenation does not work with "polygon" mask_format 59 | MASK_FORMAT="bitmask", 60 | ) 61 | ) 62 | 63 | 64 | class SOLOConfig(BaseDetectionConfig): 65 | def __init__(self): 66 | super(SOLOConfig, self).__init__() 67 | self._register_configuration(_config_dict) 68 | 69 | 70 | config = SOLOConfig() 71 | -------------------------------------------------------------------------------- /cvpods/utils/dump/history_buffer.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 3 | 4 | from typing import List, Tuple 5 | 6 | import numpy as np 7 | 8 | 9 | class HistoryBuffer: 10 | """ 11 | Track a series of scalar values and provide access to smoothed values over a 12 | window or the global average of the series. 13 | """ 14 | 15 | def __init__(self, max_length: int = 1000000): 16 | """ 17 | Args: 18 | max_length: maximal number of values that can be stored in the 19 | buffer. When the capacity of the buffer is exhausted, old 20 | values will be removed. 21 | """ 22 | self._max_length: int = max_length 23 | self._data: List[Tuple[float, float]] = [] # (value, iteration) pairs 24 | self._count: int = 0 25 | self._global_avg: float = 0 26 | 27 | def update(self, value: float, iteration: float = None): 28 | """ 29 | Add a new scalar value produced at certain iteration. If the length 30 | of the buffer exceeds self._max_length, the oldest element will be 31 | removed from the buffer. 32 | """ 33 | if iteration is None: 34 | iteration = self._count 35 | if len(self._data) == self._max_length: 36 | self._data.pop(0) 37 | self._data.append((value, iteration)) 38 | 39 | self._count += 1 40 | self._global_avg += (value - self._global_avg) / self._count 41 | 42 | def latest(self): 43 | """ 44 | Return the latest scalar value added to the buffer. 45 | """ 46 | return self._data[-1][0] 47 | 48 | def median(self, window_size: int): 49 | """ 50 | Return the median of the latest `window_size` values in the buffer. 51 | """ 52 | return np.median([x[0] for x in self._data[-window_size:]]) 53 | 54 | def avg(self, window_size: int): 55 | """ 56 | Return the mean of the latest `window_size` values in the buffer. 57 | """ 58 | return np.mean([x[0] for x in self._data[-window_size:]]) 59 | 60 | def global_avg(self): 61 | """ 62 | Return the mean of all the elements in the buffer. Note that this 63 | includes those getting removed due to limited buffer storage. 64 | """ 65 | return self._global_avg 66 | 67 | def values(self): 68 | """ 69 | Returns: 70 | list[(number, iteration)]: content of the current buffer. 71 | """ 72 | return self._data 73 | -------------------------------------------------------------------------------- /cvpods/layers/csrc/nms_rotated/nms_rotated_cpu.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | #include "../box_iou_rotated/box_iou_rotated_utils.h" 3 | #include "nms_rotated.h" 4 | 5 | namespace cvpods { 6 | 7 | template 8 | at::Tensor nms_rotated_cpu_kernel( 9 | const at::Tensor& dets, 10 | const at::Tensor& scores, 11 | const float iou_threshold) { 12 | // nms_rotated_cpu_kernel is modified from torchvision's nms_cpu_kernel, 13 | // however, the code in this function is much shorter because 14 | // we delegate the IoU computation for rotated boxes to 15 | // the single_box_iou_rotated function in box_iou_rotated_utils.h 16 | AT_ASSERTM(!dets.type().is_cuda(), "dets must be a CPU tensor"); 17 | AT_ASSERTM(!scores.type().is_cuda(), "scores must be a CPU tensor"); 18 | AT_ASSERTM( 19 | dets.type() == scores.type(), "dets should have the same type as scores"); 20 | 21 | if (dets.numel() == 0) { 22 | return at::empty({0}, dets.options().dtype(at::kLong)); 23 | } 24 | 25 | auto order_t = std::get<1>(scores.sort(0, /* descending=*/true)); 26 | 27 | auto ndets = dets.size(0); 28 | at::Tensor suppressed_t = at::zeros({ndets}, dets.options().dtype(at::kByte)); 29 | at::Tensor keep_t = at::zeros({ndets}, dets.options().dtype(at::kLong)); 30 | 31 | auto suppressed = suppressed_t.data_ptr(); 32 | auto keep = keep_t.data_ptr(); 33 | auto order = order_t.data_ptr(); 34 | 35 | int64_t num_to_keep = 0; 36 | 37 | for (int64_t _i = 0; _i < ndets; _i++) { 38 | auto i = order[_i]; 39 | if (suppressed[i] == 1) { 40 | continue; 41 | } 42 | 43 | keep[num_to_keep++] = i; 44 | 45 | for (int64_t _j = _i + 1; _j < ndets; _j++) { 46 | auto j = order[_j]; 47 | if (suppressed[j] == 1) { 48 | continue; 49 | } 50 | 51 | auto ovr = single_box_iou_rotated( 52 | dets[i].data_ptr(), dets[j].data_ptr()); 53 | if (ovr >= iou_threshold) { 54 | suppressed[j] = 1; 55 | } 56 | } 57 | } 58 | return keep_t.narrow(/*dim=*/0, /*start=*/0, /*length=*/num_to_keep); 59 | } 60 | 61 | at::Tensor nms_rotated_cpu( 62 | const at::Tensor& dets, 63 | const at::Tensor& scores, 64 | const float iou_threshold) { 65 | auto result = at::empty({0}, dets.options()); 66 | 67 | AT_DISPATCH_FLOATING_TYPES(dets.scalar_type(), "nms_rotated", [&] { 68 | result = nms_rotated_cpu_kernel(dets, scores, iou_threshold); 69 | }); 70 | return result; 71 | } 72 | 73 | } // namespace cvpods 74 | -------------------------------------------------------------------------------- /cvpods/utils/visualizer/show.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- encoding: utf-8 -*- 3 | ''' 4 | @File : show.py 5 | @Time : 2020/05/07 23:58:35 6 | @Author : Benjin Zhu 7 | @Contact : poodarchu@gmail.com 8 | @Last Modified by : Benjin Zhu 9 | @Last Modified time : 2020/05/07 23:58:35 10 | ''' 11 | 12 | import copy 13 | import pylab as plt 14 | 15 | import numpy as np 16 | 17 | 18 | def draw_box(ax, vertices, color='black'): 19 | """ 20 | Draw box with color. 21 | 22 | Args: 23 | ax (list): axes to draw box along 24 | vertices (ndarray): indices of shape (N x 2) 25 | color (str): plotted color 26 | """ 27 | connections = [ 28 | [0, 1], 29 | [1, 2], 30 | [2, 3], 31 | [3, 0], 32 | ] 33 | for connection in connections: 34 | ax.plot(*vertices[:, connection], c=color, lw=5) 35 | 36 | 37 | def visualize_feature_maps( 38 | fm, 39 | boxes=[], 40 | keypoints=[], 41 | stride=1, 42 | save_filename=None 43 | ): 44 | """ 45 | Visualize feature map with boxes or key points. 46 | 47 | Args: 48 | fm (torch.Tensor): feature map of shape H x W x c, c is channel 49 | boxes (ndarray): boxes to be visualized. 50 | keypoints (ndarray): key points to be visualized 51 | stride (int): used to normalize boxes or keypoints 52 | save_filename (bool): whether save to disk 53 | """ 54 | nc = np.ceil(np.sqrt(fm.shape[2])) # column 55 | nr = np.ceil(fm.shape[2] / nc) # row 56 | nc = int(nc) 57 | nr = int(nr) 58 | plt.figure(figsize=(64, 64)) 59 | for i in range(fm.shape[2]): 60 | ax = plt.subplot(nr, nc, i + 1) 61 | ax.imshow(fm[:, :, i], cmap='jet') 62 | 63 | for obj in boxes: 64 | box = copy.deepcopy(obj) / stride 65 | draw_box(ax, box, color='g') 66 | 67 | for pts_score in keypoints: 68 | pts = pts_score[:8] 69 | pts = pts / stride 70 | for i in range(4): 71 | ax.plot(pts[2 * i + 1], pts[2 * i + 0], 'r*') 72 | ax.plot([pts[1], pts[3]], [pts[0], pts[2]], c='y', lw=5) 73 | ax.plot([pts[3], pts[5]], [pts[2], pts[4]], c='g', lw=5) 74 | ax.plot([pts[5], pts[7]], [pts[4], pts[6]], c='b', lw=5) 75 | ax.plot([pts[7], pts[1]], [pts[6], pts[0]], c='r', lw=5) 76 | 77 | # plt.colorbar() 78 | ax.axis('off') 79 | if save_filename: 80 | plt.savefig(save_filename) 81 | else: 82 | plt.show() 83 | plt.close() 84 | -------------------------------------------------------------------------------- /cvpods/modeling/nn_utils/flop_count.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | 3 | import logging 4 | import typing 5 | from collections import defaultdict 6 | 7 | import torch.nn as nn 8 | 9 | from .jit_handles import ( 10 | addmm_flop_jit, 11 | conv_flop_jit, 12 | einsum_flop_jit, 13 | get_jit_model_analysis, 14 | matmul_flop_jit 15 | ) 16 | 17 | # A dictionary that maps supported operations to their flop count jit handles. 18 | _SUPPORTED_OPS: typing.Dict[str, typing.Callable] = { 19 | "aten::addmm": addmm_flop_jit, 20 | "aten::_convolution": conv_flop_jit, 21 | "aten::einsum": einsum_flop_jit, 22 | "aten::matmul": matmul_flop_jit, 23 | } 24 | 25 | 26 | def flop_count( 27 | model: nn.Module, 28 | inputs: typing.Tuple[object, ...], 29 | supported_ops: typing.Union[typing.Dict[str, typing.Callable], None] = None, 30 | ) -> typing.Tuple[typing.DefaultDict[str, float], typing.Counter[str]]: 31 | """ 32 | Given a model and an input to the model, compute the Gflops of the given 33 | model. Note the input should have a batch size of 1. 34 | 35 | Args: 36 | model (nn.Module): The model to compute flop counts. 37 | inputs (tuple): Inputs that are passed to `model` to count flops. 38 | Inputs need to be in a tuple. 39 | supported_ops (dict(str,Callable) or None) : By default, we count flops 40 | for convolution layers, fully connected layers, torch.matmul and 41 | torch.einsum operations. We define a FLOP as a single atomic 42 | Multiply-Add. Users can provide customized supported_ops for 43 | counting flops if desired. 44 | 45 | Returns: 46 | tuple[defaultdict, Counter]: A dictionary that records the number of 47 | gflops for each operation and a Counter that records the number of 48 | skipped operations. 49 | """ 50 | assert isinstance(inputs, tuple), "Inputs need to be in a tuple." 51 | if not supported_ops: 52 | supported_ops = _SUPPORTED_OPS.copy() 53 | 54 | # Run flop count. 55 | total_flop_counter, skipped_ops = get_jit_model_analysis( 56 | model, inputs, supported_ops 57 | ) 58 | 59 | # Log for skipped operations. 60 | if len(skipped_ops) > 0: 61 | for op, freq in skipped_ops.items(): 62 | logging.warning("Skipped operation {} {} time(s)".format(op, freq)) 63 | 64 | # Convert flop count to gigaflops. 65 | final_count = defaultdict(float) 66 | for op in total_flop_counter: 67 | final_count[op] = total_flop_counter[op] / 1e9 68 | 69 | return final_count, skipped_ops 70 | -------------------------------------------------------------------------------- /tools/caffe2_converter.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import argparse 3 | import os 4 | 5 | from cvpods.checkpoint import DetectionCheckpointer 6 | from cvpods.config import get_cfg 7 | from cvpods.data import build_detection_test_loader 8 | from cvpods.evaluation import COCOEvaluator, inference_on_dataset, print_csv_format 9 | from cvpods.export import add_export_config, export_caffe2_model 10 | from cvpods.modeling import build_model 11 | from cvpods.utils import setup_logger 12 | 13 | 14 | def setup_cfg(args): 15 | cfg = get_cfg() 16 | # cuda context is initialized before creating dataloader, so we don't fork anymore 17 | cfg.DATALOADER.NUM_WORKERS = 0 18 | cfg = add_export_config(cfg) 19 | cfg.merge_from_file(args.config_file) 20 | cfg.merge_from_list(args.opts) 21 | cfg.freeze() 22 | return cfg 23 | 24 | 25 | if __name__ == "__main__": 26 | parser = argparse.ArgumentParser(description="Convert a model to Caffe2") 27 | parser.add_argument("--config-file", default="", metavar="FILE", help="path to config file") 28 | parser.add_argument("--run-eval", action="store_true") 29 | parser.add_argument("--output", help="output directory for the converted caffe2 model") 30 | parser.add_argument( 31 | "opts", 32 | help="Modify config options using the command-line", 33 | default=None, 34 | nargs=argparse.REMAINDER, 35 | ) 36 | args = parser.parse_args() 37 | logger = setup_logger() 38 | logger.info("Command line arguments: " + str(args)) 39 | 40 | cfg = setup_cfg(args) 41 | 42 | # create a torch model 43 | torch_model = build_model(cfg) 44 | DetectionCheckpointer(torch_model).resume_or_load(cfg.MODEL.WEIGHTS) 45 | 46 | # get a sample data 47 | data_loader = build_detection_test_loader(cfg, cfg.DATASETS.TEST[0]) 48 | first_batch = next(iter(data_loader)) 49 | 50 | # convert and save caffe2 model 51 | caffe2_model = export_caffe2_model(cfg, torch_model, first_batch) 52 | caffe2_model.save_protobuf(args.output) 53 | # draw the caffe2 graph 54 | caffe2_model.save_graph(os.path.join(args.output, "model.svg"), inputs=first_batch) 55 | 56 | # run evaluation with the converted model 57 | if args.run_eval: 58 | dataset = cfg.DATASETS.TEST[0] 59 | data_loader = build_detection_test_loader(cfg, dataset) 60 | # NOTE: hard-coded evaluator. change to the evaluator for your dataset 61 | evaluator = COCOEvaluator(dataset, cfg, True, args.output) 62 | metrics = inference_on_dataset(caffe2_model, data_loader, evaluator) 63 | print_csv_format(metrics) 64 | -------------------------------------------------------------------------------- /cvpods/modeling/meta_arch/imagenet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | 4 | from cvpods.layers import ShapeSpec 5 | from cvpods.structures import ImageList 6 | 7 | 8 | def accuracy(output, target, topk=(1,)): 9 | """Computes the accuracy over the k top predictions for the specified values of k""" 10 | with torch.no_grad(): 11 | maxk = max(topk) 12 | batch_size = target.size(0) 13 | 14 | _, pred = output.topk(maxk, 1, True, True) 15 | pred = pred.t() 16 | correct = pred.eq(target.view(1, -1).expand_as(pred)) 17 | 18 | res = [] 19 | for k in topk: 20 | correct_k = correct[:k].view(-1).float().sum(0, keepdim=True) 21 | res.append(correct_k.mul_(100.0 / batch_size)) 22 | return res 23 | 24 | 25 | class Classification(nn.Module): 26 | """ 27 | ImageNet classification module. 28 | Weights of this model can be used as pretrained weights of any models in cvpods. 29 | """ 30 | def __init__(self, cfg): 31 | super(Classification, self).__init__() 32 | 33 | self.device = torch.device(cfg.MODEL.DEVICE) 34 | 35 | self.network = cfg.build_backbone( 36 | cfg, input_shape=ShapeSpec(channels=len(cfg.MODEL.PIXEL_MEAN))) 37 | 38 | self.loss_evaluator = nn.CrossEntropyLoss() 39 | 40 | pixel_mean = torch.Tensor(cfg.MODEL.PIXEL_MEAN).to(self.device).view( 41 | 3, 1, 1) 42 | pixel_std = torch.Tensor(cfg.MODEL.PIXEL_STD).to(self.device).view( 43 | 3, 1, 1) 44 | self.normalizer = lambda x: (x - pixel_mean) / pixel_std 45 | 46 | self.to(self.device) 47 | 48 | def forward(self, batched_inputs): 49 | images = self.preprocess_image(batched_inputs) 50 | 51 | preds = self.network(images.tensor)["linear"] 52 | 53 | if self.training: 54 | labels = torch.tensor([gi["category_id"] for gi in batched_inputs]).cuda() 55 | losses = self.loss_evaluator(preds, labels) 56 | acc1, acc5 = accuracy(preds, labels, topk=(1, 5)) 57 | 58 | return { 59 | "loss_cls": losses, 60 | "Acc@1": acc1, 61 | "Acc@5": acc5, 62 | } 63 | else: 64 | return preds 65 | 66 | def preprocess_image(self, batched_inputs): 67 | """ 68 | Normalize, pad and batch the input images. 69 | """ 70 | images = [x["image"].float().to(self.device) for x in batched_inputs] 71 | images = [self.normalizer(x.div(255)) for x in images] 72 | images = ImageList.from_tensors(images, self.network.size_divisibility) 73 | return images 74 | -------------------------------------------------------------------------------- /cvpods/configs/retinanet_config.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- encoding: utf-8 -*- 3 | ''' 4 | @File : retinanet_config.py 5 | @Time : 2020/05/07 23:56:02 6 | @Author : Benjin Zhu 7 | @Contact : poodarchu@gmail.com 8 | @Last Modified by : Benjin Zhu 9 | @Last Modified time : 2020/05/07 23:56:02 10 | ''' 11 | 12 | from .base_detection_config import BaseDetectionConfig 13 | 14 | _config_dict = dict( 15 | MODEL=dict( 16 | # Backbone NAME: "build_retinanet_resnet_fpn_backbone" 17 | RESNETS=dict(OUT_FEATURES=["res3", "res4", "res5"]), 18 | FPN=dict(IN_FEATURES=["res3", "res4", "res5"]), 19 | ANCHOR_GENERATOR=dict( 20 | SIZES=[ 21 | [x, x * 2 ** (1.0 / 3), x * 2 ** (2.0 / 3)] 22 | for x in [32, 64, 128, 256, 512] 23 | ] 24 | ), 25 | RETINANET=dict( 26 | # This is the number of foreground classes. 27 | NUM_CLASSES=80, 28 | IN_FEATURES=["p3", "p4", "p5", "p6", "p7"], 29 | # Convolutions to use in the cls and bbox tower 30 | # NOTE: this doesn't include the last conv for logits 31 | NUM_CONVS=4, 32 | # IoU overlap ratio [bg, fg] for labeling anchors. 33 | # Anchors with < bg are labeled negative (0) 34 | # Anchors with >= bg and < fg are ignored (-1) 35 | # Anchors with >= fg are labeled positive (1) 36 | IOU_THRESHOLDS=[0.4, 0.5], 37 | IOU_LABELS=[0, -1, 1], 38 | # Prior prob for rare case (i.e. foreground) at the beginning of training. 39 | # This is used to set the bias for the logits layer of the classifier subnet. 40 | # This improves training stability in the case of heavy class imbalance. 41 | PRIOR_PROB=0.01, 42 | # Inference cls score threshold, only anchors with score > INFERENCE_TH are 43 | # considered for inference (to improve speed) 44 | SCORE_THRESH_TEST=0.05, 45 | TOPK_CANDIDATES_TEST=1000, 46 | NMS_THRESH_TEST=0.5, 47 | # Weights on (dx, dy, dw, dh) for normalizing Retinanet anchor regression targets 48 | BBOX_REG_WEIGHTS=(1.0, 1.0, 1.0, 1.0), 49 | # Loss parameters 50 | FOCAL_LOSS_GAMMA=2.0, 51 | FOCAL_LOSS_ALPHA=0.25, 52 | SMOOTH_L1_LOSS_BETA=0.1, 53 | ), 54 | ), 55 | ) 56 | 57 | 58 | class RetinaNetConfig(BaseDetectionConfig): 59 | def __init__(self): 60 | super(RetinaNetConfig, self).__init__() 61 | self._register_configuration(_config_dict) 62 | 63 | 64 | config = RetinaNetConfig() 65 | -------------------------------------------------------------------------------- /cvpods/evaluation/testing.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | import logging 3 | import pprint 4 | import sys 5 | from collections import Mapping, OrderedDict 6 | 7 | import numpy as np 8 | 9 | 10 | def print_csv_format(results): 11 | """ 12 | Print main metrics in a format similar to Detectron, 13 | so that they are easy to copypaste into a spreadsheet. 14 | 15 | Args: 16 | results (OrderedDict[dict]): task_name -> {metric -> score} 17 | """ 18 | assert isinstance(results, OrderedDict), results # unordered results cannot be properly printed 19 | logger = logging.getLogger(__name__) 20 | for task, res in results.items(): 21 | # Don't print "AP-category" metrics since they are usually not tracked. 22 | important_res = [(k, v) for k, v in res.items() if "-" not in k] 23 | logger.info("copypaste: Task: {}".format(task)) 24 | logger.info("copypaste: " + ",".join([k[0] for k in important_res])) 25 | logger.info("copypaste: " + ",".join(["{0:.4f}".format(k[1]) for k in important_res])) 26 | 27 | 28 | def verify_results(cfg, results): 29 | """ 30 | Args: 31 | results (OrderedDict[dict]): task_name -> {metric -> score} 32 | 33 | Returns: 34 | bool: whether the verification succeeds or not 35 | """ 36 | expected_results = cfg.TEST.EXPECTED_RESULTS 37 | if not len(expected_results): 38 | return True 39 | 40 | ok = True 41 | for task, metric, expected, tolerance in expected_results: 42 | actual = results[task][metric] 43 | if not np.isfinite(actual): 44 | ok = False 45 | diff = abs(actual - expected) 46 | if diff > tolerance: 47 | ok = False 48 | 49 | logger = logging.getLogger(__name__) 50 | if not ok: 51 | logger.error("Result verification failed!") 52 | logger.error("Expected Results: " + str(expected_results)) 53 | logger.error("Actual Results: " + pprint.pformat(results)) 54 | 55 | sys.exit(1) 56 | else: 57 | logger.info("Results verification passed.") 58 | return ok 59 | 60 | 61 | def flatten_results_dict(results): 62 | """ 63 | Expand a hierarchical dict of scalars into a flat dict of scalars. 64 | If results[k1][k2][k3] = v, the returned dict will have the entry 65 | {"k1/k2/k3": v}. 66 | 67 | Args: 68 | results (dict): 69 | """ 70 | r = {} 71 | for k, v in results.items(): 72 | if isinstance(v, Mapping): 73 | v = flatten_results_dict(v) 74 | for kk, vv in v.items(): 75 | r[k + "/" + kk] = vv 76 | else: 77 | r[k] = v 78 | return r 79 | -------------------------------------------------------------------------------- /cvpods/utils/registry.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | 3 | from typing import Dict, Optional 4 | from tabulate import tabulate 5 | 6 | 7 | class Registry(object): 8 | """ 9 | The registry that provides name -> object mapping, to support third-party 10 | users' custom modules. 11 | To create a registry (e.g. a backbone registry): 12 | .. code-block:: python 13 | BACKBONE_REGISTRY = Registry('BACKBONE') 14 | To register an object: 15 | .. code-block:: python 16 | @BACKBONE_REGISTRY.register() 17 | class MyBackbone(): 18 | ... 19 | Or: 20 | .. code-block:: python 21 | BACKBONE_REGISTRY.register(MyBackbone) 22 | """ 23 | 24 | def __init__(self, name: str) -> None: 25 | """ 26 | Args: 27 | name (str): the name of this registry 28 | """ 29 | self._name: str = name 30 | self._obj_map: Dict[str, object] = {} 31 | 32 | def _do_register(self, name: str, obj: object) -> None: 33 | assert ( 34 | name not in self._obj_map 35 | ), "An object named '{}' was already registered in '{}' registry!".format( 36 | name, self._name 37 | ) 38 | self._obj_map[name] = obj 39 | 40 | def register(self, obj: object = None, name: str = None) -> Optional[object]: 41 | """ 42 | Register the given object under the the name `obj.__name__`. 43 | Can be used as either a decorator or not. See docstring of this class for usage. 44 | """ 45 | if obj is None: 46 | # used as a decorator 47 | def deco(func_or_class: object) -> object: 48 | nonlocal name 49 | if name is None: 50 | name = func_or_class.__name__ # pyre-ignore 51 | self._do_register(name, func_or_class) 52 | return func_or_class 53 | 54 | return deco 55 | 56 | # used as a function call 57 | if name is None: 58 | name = obj.__name__ # pyre-ignore 59 | self._do_register(name, obj) 60 | 61 | def get(self, name: str) -> object: 62 | ret = self._obj_map.get(name) 63 | if ret is None: 64 | raise KeyError( 65 | "No object named '{}' found in '{}' registry!".format(name, self._name) 66 | ) 67 | return ret 68 | 69 | def __contains__(self, name: str) -> bool: 70 | return name in self._obj_map 71 | 72 | def __repr__(self) -> str: 73 | table_headers = ["Names", "Objects"] 74 | table = tabulate(self._obj_map.items(), headers=table_headers, tablefmt="fancy_grid") 75 | return "Registry of {}:\n".format(self._name) + table 76 | -------------------------------------------------------------------------------- /cvpods/configs/efficientdet_config.py: -------------------------------------------------------------------------------- 1 | from .base_detection_config import BaseDetectionConfig 2 | 3 | _config_dict = dict( 4 | MODEL=dict( 5 | PIXEL_MEAN=[0.485, 0.456, 0.406], # mean value from ImageNet 6 | PIXEL_STD=[0.229, 0.224, 0.225], 7 | EFFICIENTNET=dict( 8 | MODEL_NAME="efficientnet-b0", # default setting for EfficientDet-D0 9 | NORM="BN", 10 | BN_MOMENTUM=1 - 0.99, 11 | BN_EPS=1e-3, 12 | DROP_CONNECT_RATE=1 - 0.8, # survival_prob = 0.8 13 | DEPTH_DIVISOR=8, 14 | MIN_DEPTH=None, 15 | NUM_CLASSES=None, 16 | FIX_HEAD_STEAM=False, 17 | MEMORY_EFFICIENT_SWISH=True, 18 | OUT_FEATURES=["stage4", "stage6", "stage8"], 19 | ), 20 | BIFPN=dict( 21 | IN_FEATURES=["stage4", "stage6", "stage8"], 22 | NORM="BN", 23 | BN_MOMENTUM=0.01, # 1 - 0.99 24 | BN_EPS=1e-3, 25 | MEMORY_EFFICIENT_SWISH=True, 26 | INPUT_SIZE=512, # default setting for EfficientDet-D0 27 | NUM_LAYERS=3, # default setting for EfficientDet-D0 28 | OUT_CHANNELS=60, # default setting for EfficientDet-D0 29 | FUSE_TYPE="fast", # select in ["softmax", "fast", "sum"] 30 | ), 31 | EFFICIENTDET=dict( 32 | IN_FEATURES=[f"p{i}" for i in range(3, 8)], # p3-p7 33 | NUM_CLASSES=80, 34 | FREEZE_BACKBONE=False, 35 | FREEZE_BN=False, 36 | HEAD=dict( 37 | NUM_CONV=3, # default setting for EfficientDet-D0 38 | NORM="BN", 39 | BN_MOMENTUM=1 - 0.99, 40 | BN_EPS=1e-3, 41 | PRIOR_PROB=0.01, 42 | MEMORY_EFFICIENT_SWISH=True, 43 | ), 44 | IOU_THRESHOLDS=[0.5, 0.5], 45 | IOU_LABELS=[0, -1, 1], 46 | SCORE_THRESH_TEST=0.05, 47 | TOPK_CANDIDATES_TEST=1000, 48 | NMS_THRESH_TEST=0.5, 49 | BBOX_REG_WEIGHTS=(1.0, 1.0, 1.0, 1.0), 50 | FOCAL_LOSS_GAMMA=1.5, 51 | FOCAL_LOSS_ALPHA=0.25, 52 | SMOOTH_L1_LOSS_BETA=0.1, 53 | REG_NORM=4.0, 54 | BOX_LOSS_WEIGHT=50.0, 55 | ), 56 | ANCHOR_GENERATOR=dict( 57 | SIZES=[ 58 | [x, x * 2 ** (1.0 / 3), x * 2 ** (2.0 / 3)] 59 | for x in [4 * 2**i for i in range(3, 8)] 60 | ] 61 | ), 62 | ), 63 | ) 64 | 65 | 66 | class EfficientDetConfig(BaseDetectionConfig): 67 | def __init__(self): 68 | super(EfficientDetConfig, self).__init__() 69 | self._register_configuration(_config_dict) 70 | 71 | 72 | config = EfficientDetConfig() 73 | -------------------------------------------------------------------------------- /cvpods/modeling/nn_utils/parameter_count.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | 3 | import typing 4 | from collections import defaultdict 5 | import tabulate 6 | 7 | from torch import nn 8 | 9 | 10 | def parameter_count(model: nn.Module) -> typing.DefaultDict[str, int]: 11 | """ 12 | Count parameters of a model and its submodules. 13 | 14 | Args: 15 | model: a torch module 16 | 17 | Returns: 18 | dict (str-> int): the key is either a parameter name or a module name. 19 | The value is the number of elements in the parameter, or in all 20 | parameters of the module. The key "" corresponds to the total 21 | number of parameters of the model. 22 | """ 23 | r = defaultdict(int) 24 | for name, prm in model.named_parameters(): 25 | size = prm.numel() 26 | name = name.split(".") 27 | for k in range(0, len(name) + 1): 28 | prefix = ".".join(name[:k]) 29 | r[prefix] += size 30 | return r 31 | 32 | 33 | def parameter_count_table(model: nn.Module, max_depth: int = 3) -> str: 34 | """ 35 | Format the parameter count of the model (and its submodules or parameters) 36 | in a nice table. 37 | 38 | Args: 39 | model: a torch module 40 | max_depth (int): maximum depth to recursively print submodules or 41 | parameters 42 | 43 | Returns: 44 | str: the table to be printed 45 | """ 46 | count: typing.DefaultDict[str, int] = parameter_count(model) 47 | param_shape: typing.Dict[str, typing.Tuple] = { 48 | k: tuple(v.shape) for k, v in model.named_parameters() 49 | } 50 | 51 | table: typing.List[typing.Tuple] = [] 52 | 53 | def format_size(x: int) -> str: 54 | if x > 1e5: 55 | return "{:.1f}M".format(x / 1e6) 56 | if x > 1e2: 57 | return "{:.1f}K".format(x / 1e3) 58 | return str(x) 59 | 60 | def fill(lvl: int, prefix: str) -> None: 61 | if lvl >= max_depth: 62 | return 63 | for name, v in count.items(): 64 | if name.count(".") == lvl and name.startswith(prefix): 65 | indent = " " * (lvl + 1) 66 | if name in param_shape: 67 | table.append((indent + name, indent + str(param_shape[name]))) 68 | else: 69 | table.append((indent + name, indent + format_size(v))) 70 | fill(lvl + 1, name + ".") 71 | 72 | table.append(("model", format_size(count.pop("")))) 73 | fill(0, "") 74 | 75 | old_ws = tabulate.PRESERVE_WHITESPACE 76 | tabulate.PRESERVE_WHITESPACE = True 77 | tab = tabulate.tabulate( 78 | table, headers=["name", "#elements or shape"], tablefmt="pipe" 79 | ) 80 | tabulate.PRESERVE_WHITESPACE = old_ws 81 | return tab 82 | -------------------------------------------------------------------------------- /cvpods/modeling/losses/circle_loss.py: -------------------------------------------------------------------------------- 1 | # Authors: YiFan Sun , Changmao Cheng 2 | from typing import Tuple 3 | 4 | import torch 5 | from torch import nn 6 | from torch.nn.functional import cross_entropy 7 | 8 | 9 | class ClassificationCircleLoss(nn.Module): 10 | """Circle loss for class-level labels as described in the paper 11 | `"Circle Loss: A Unified Perspective of Pair Similarity Optimization" <#>`_ 12 | 13 | Args: 14 | scale (float): the scale factor. Default: 256.0 15 | margin (float): the relax margin value. Default: 0.25 16 | circle_center (tuple[float]): the center of the circle (logit_ap, logit_an). Default: (1, 0) 17 | reduction (string, optional): Specifies the reduction to apply to the output: 18 | ``'none'`` | ``'mean'`` | ``'sum'``. ``'none'``: no reduction will be applied, 19 | ``'mean'``: the sum of the output will be divided by the number of 20 | elements in the output, ``'sum'``: the output will be summed. Default: ``'mean'`` 21 | """ 22 | 23 | def __init__( 24 | self, 25 | scale: float = 256.0, 26 | margin: float = 0.25, 27 | circle_center: Tuple[float, float] = (1, 0), 28 | reduction: str = "mean", 29 | ) -> None: 30 | super(ClassificationCircleLoss, self).__init__() 31 | self.scale = scale 32 | self.margin = margin 33 | self.circle_center = circle_center 34 | self.reduction = reduction 35 | 36 | def forward(self, logits: torch.Tensor, targets: torch.LongTensor) -> torch.Tensor: 37 | r""" 38 | 39 | Args: 40 | logits (torch.Tensor): The predicted logits before softmax, 41 | namely :math:`\cos \theta` in the above equation, with shape of :math:`(N, C)` 42 | targets (torch.LongTensor): The ground-truth label long vector, 43 | namely :math:`y` in the above equation, with shape of :math:`(N,)` 44 | 45 | Returns: 46 | torch.Tensor: loss 47 | the computed loss 48 | """ 49 | 50 | mask = torch.zeros(logits.shape, dtype=torch.bool, device=logits.device).scatter_( 51 | dim=1, index=targets.unsqueeze(1), value=1 52 | ) 53 | positive_weighting = torch.clamp( 54 | self.circle_center[0] + self.margin - logits.detach(), min=0) 55 | negative_weighting = torch.clamp( 56 | logits.detach() - self.circle_center[1] + self.margin, min=0) 57 | logits = torch.where( 58 | mask, 59 | self.scale * positive_weighting * (logits - (self.circle_center[0] - self.margin)), 60 | self.scale * negative_weighting * (logits - self.circle_center[1] - self.margin), 61 | ) 62 | loss = cross_entropy(input=logits, target=targets, reduction=self.reduction) 63 | 64 | return loss 65 | -------------------------------------------------------------------------------- /.clang-format: -------------------------------------------------------------------------------- 1 | AccessModifierOffset: -1 2 | AlignAfterOpenBracket: AlwaysBreak 3 | AlignConsecutiveAssignments: false 4 | AlignConsecutiveDeclarations: false 5 | AlignEscapedNewlinesLeft: true 6 | AlignOperands: false 7 | AlignTrailingComments: false 8 | AllowAllParametersOfDeclarationOnNextLine: false 9 | AllowShortBlocksOnASingleLine: false 10 | AllowShortCaseLabelsOnASingleLine: false 11 | AllowShortFunctionsOnASingleLine: Empty 12 | AllowShortIfStatementsOnASingleLine: false 13 | AllowShortLoopsOnASingleLine: false 14 | AlwaysBreakAfterReturnType: None 15 | AlwaysBreakBeforeMultilineStrings: true 16 | AlwaysBreakTemplateDeclarations: true 17 | BinPackArguments: false 18 | BinPackParameters: false 19 | BraceWrapping: 20 | AfterClass: false 21 | AfterControlStatement: false 22 | AfterEnum: false 23 | AfterFunction: false 24 | AfterNamespace: false 25 | AfterObjCDeclaration: false 26 | AfterStruct: false 27 | AfterUnion: false 28 | BeforeCatch: false 29 | BeforeElse: false 30 | IndentBraces: false 31 | BreakBeforeBinaryOperators: None 32 | BreakBeforeBraces: Attach 33 | BreakBeforeTernaryOperators: true 34 | BreakConstructorInitializersBeforeComma: false 35 | BreakAfterJavaFieldAnnotations: false 36 | BreakStringLiterals: false 37 | ColumnLimit: 80 38 | CommentPragmas: '^ IWYU pragma:' 39 | ConstructorInitializerAllOnOneLineOrOnePerLine: true 40 | ConstructorInitializerIndentWidth: 4 41 | ContinuationIndentWidth: 4 42 | Cpp11BracedListStyle: true 43 | DerivePointerAlignment: false 44 | DisableFormat: false 45 | ForEachMacros: [ FOR_EACH, FOR_EACH_ENUMERATE, FOR_EACH_KV, FOR_EACH_R, FOR_EACH_RANGE, ] 46 | IncludeCategories: 47 | - Regex: '^<.*\.h(pp)?>' 48 | Priority: 1 49 | - Regex: '^<.*' 50 | Priority: 2 51 | - Regex: '.*' 52 | Priority: 3 53 | IndentCaseLabels: true 54 | IndentWidth: 2 55 | IndentWrappedFunctionNames: false 56 | KeepEmptyLinesAtTheStartOfBlocks: false 57 | MacroBlockBegin: '' 58 | MacroBlockEnd: '' 59 | MaxEmptyLinesToKeep: 1 60 | NamespaceIndentation: None 61 | ObjCBlockIndentWidth: 2 62 | ObjCSpaceAfterProperty: false 63 | ObjCSpaceBeforeProtocolList: false 64 | PenaltyBreakBeforeFirstCallParameter: 1 65 | PenaltyBreakComment: 300 66 | PenaltyBreakFirstLessLess: 120 67 | PenaltyBreakString: 1000 68 | PenaltyExcessCharacter: 1000000 69 | PenaltyReturnTypeOnItsOwnLine: 200 70 | PointerAlignment: Left 71 | ReflowComments: true 72 | SortIncludes: true 73 | SpaceAfterCStyleCast: false 74 | SpaceBeforeAssignmentOperators: true 75 | SpaceBeforeParens: ControlStatements 76 | SpaceInEmptyParentheses: false 77 | SpacesBeforeTrailingComments: 1 78 | SpacesInAngles: false 79 | SpacesInContainerLiterals: true 80 | SpacesInCStyleCastParentheses: false 81 | SpacesInParentheses: false 82 | SpacesInSquareBrackets: false 83 | Standard: Cpp11 84 | TabWidth: 8 85 | UseTab: Never 86 | -------------------------------------------------------------------------------- /cvpods/modeling/proposal_generator/rrpn.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | import logging 3 | from typing import Dict 4 | 5 | import torch 6 | 7 | from cvpods.layers import ShapeSpec 8 | 9 | from ..box_regression import Box2BoxTransformRotated 10 | from .build import PROPOSAL_GENERATOR_REGISTRY 11 | from .rpn import RPN 12 | from .rrpn_outputs import RRPNOutputs, find_top_rrpn_proposals 13 | 14 | logger = logging.getLogger(__name__) 15 | 16 | 17 | @PROPOSAL_GENERATOR_REGISTRY.register() 18 | class RRPN(RPN): 19 | """ 20 | Rotated RPN subnetwork. 21 | Please refer to https://arxiv.org/pdf/1703.01086.pdf for the original RRPN paper: 22 | Ma, J., Shao, W., Ye, H., Wang, L., Wang, H., Zheng, Y., & Xue, X. (2018). 23 | Arbitrary-oriented scene text detection via rotation proposals. 24 | IEEE Transactions on Multimedia, 20(11), 3111-3122. 25 | """ 26 | 27 | def __init__(self, cfg, input_shape: Dict[str, ShapeSpec]): 28 | super().__init__(cfg, input_shape) 29 | self.box2box_transform = Box2BoxTransformRotated(weights=cfg.MODEL.RPN.BBOX_REG_WEIGHTS) 30 | 31 | def forward(self, images, features, gt_instances=None): 32 | # same signature as RPN.forward 33 | gt_boxes = [x.gt_boxes for x in gt_instances] if gt_instances is not None else None 34 | del gt_instances 35 | features = [features[f] for f in self.in_features] 36 | pred_objectness_logits, pred_anchor_deltas = self.rpn_head(features) 37 | anchors = self.anchor_generator(features) 38 | 39 | outputs = RRPNOutputs( 40 | self.box2box_transform, 41 | self.anchor_matcher, 42 | self.batch_size_per_image, 43 | self.positive_fraction, 44 | images, 45 | pred_objectness_logits, 46 | pred_anchor_deltas, 47 | anchors, 48 | self.boundary_threshold, 49 | gt_boxes, 50 | self.smooth_l1_beta, 51 | ) 52 | 53 | if self.training: 54 | losses = outputs.losses() 55 | else: 56 | losses = {} 57 | 58 | with torch.no_grad(): 59 | # Find the top proposals by applying NMS and removing boxes that 60 | # are too small. The proposals are treated as fixed for approximate 61 | # joint training with roi heads. This approach ignores the derivative 62 | # w.r.t. the proposal boxes’ coordinates that are also network 63 | # responses, so is approximate. 64 | proposals = find_top_rrpn_proposals( 65 | outputs.predict_proposals(), 66 | outputs.predict_objectness_logits(), 67 | images, 68 | self.nms_thresh, 69 | self.pre_nms_topk[self.training], 70 | self.post_nms_topk[self.training], 71 | self.min_box_side_len, 72 | self.training, 73 | ) 74 | 75 | return proposals, losses 76 | -------------------------------------------------------------------------------- /cvpods/utils/memory.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 3 | 4 | import logging 5 | from contextlib import contextmanager 6 | from functools import wraps 7 | 8 | import torch 9 | 10 | __all__ = ["retry_if_cuda_oom"] 11 | 12 | 13 | @contextmanager 14 | def _ignore_torch_cuda_oom(): 15 | """ 16 | A context which ignores CUDA OOM exception from pytorch. 17 | """ 18 | try: 19 | yield 20 | except RuntimeError as e: 21 | # NOTE: the string may change? 22 | if "CUDA out of memory. " in str(e): 23 | pass 24 | else: 25 | raise 26 | 27 | 28 | def retry_if_cuda_oom(func): 29 | r""" 30 | Makes a function retry itself after encountering 31 | pytorch's CUDA OOM error. 32 | It will first retry after calling `torch.cuda.empty_cache()`. 33 | 34 | If that still fails, it will then retry by trying to convert inputs to CPUs. 35 | In this case, it expects the function to dispatch to CPU implementation. 36 | The return values may become CPU tensors as well and it's user's 37 | responsibility to convert it back to CUDA tensor if needed. 38 | 39 | Args: 40 | func: a stateless callable that takes tensor-like objects as arguments 41 | 42 | Returns: 43 | a callable which retries `func` if OOM is encountered. 44 | 45 | Examples: 46 | 47 | .. code-block:: python 48 | 49 | output = retry_if_cuda_oom(some_torch_function)(input1, input2) 50 | # output may be on CPU even if inputs are on GPU 51 | 52 | Note: 53 | 1. When converting inputs to CPU, it will only look at each argument and check 54 | if it has `.device` and `.to` for conversion. Nested structures of tensors 55 | are not supported. 56 | 57 | 2. Since the function might be called more than once, it has to be 58 | stateless. 59 | """ 60 | 61 | def maybe_to_cpu(x): 62 | try: 63 | like_gpu_tensor = x.device.type == "cuda" and hasattr(x, "to") 64 | except AttributeError: 65 | like_gpu_tensor = False 66 | if like_gpu_tensor: 67 | return x.to(device="cpu") 68 | else: 69 | return x 70 | 71 | @wraps(func) 72 | def wrapped(*args, **kwargs): 73 | with _ignore_torch_cuda_oom(): 74 | return func(*args, **kwargs) 75 | 76 | # Clear cache and retry 77 | torch.cuda.empty_cache() 78 | with _ignore_torch_cuda_oom(): 79 | return func(*args, **kwargs) 80 | 81 | # Try on CPU. This slows down the code significantly, therefore print a notice. 82 | logger = logging.getLogger(__name__) 83 | logger.info("Attempting to copy inputs of {} to CPU due to CUDA OOM".format(str(func))) 84 | new_args = (maybe_to_cpu(x) for x in args) 85 | new_kwargs = {k: maybe_to_cpu(v) for k, v in kwargs.items()} 86 | return func(*new_args, **new_kwargs) 87 | 88 | return wrapped 89 | -------------------------------------------------------------------------------- /cvpods/modeling/roi_heads/box_head.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | import numpy as np 3 | 4 | import torch 5 | from torch import nn 6 | from torch.nn import functional as F 7 | 8 | from cvpods.layers import Conv2d, ShapeSpec, get_norm 9 | from cvpods.modeling.nn_utils import weight_init 10 | 11 | 12 | """ 13 | Registry for box heads, which make box predictions from per-region features. 14 | 15 | The registered object will be called with `obj(cfg, input_shape)`. 16 | """ 17 | 18 | 19 | class FastRCNNConvFCHead(nn.Module): 20 | """ 21 | A head with several 3x3 conv layers (each followed by norm & relu) and 22 | several fc layers (each followed by relu). 23 | """ 24 | 25 | def __init__(self, cfg, input_shape: ShapeSpec): 26 | """ 27 | The following attributes are parsed from config: 28 | num_conv, num_fc: the number of conv/fc layers 29 | conv_dim/fc_dim: the dimension of the conv/fc layers 30 | norm: normalization for the conv layers 31 | """ 32 | super().__init__() 33 | 34 | # fmt: off 35 | num_conv = cfg.MODEL.ROI_BOX_HEAD.NUM_CONV 36 | conv_dim = cfg.MODEL.ROI_BOX_HEAD.CONV_DIM 37 | num_fc = cfg.MODEL.ROI_BOX_HEAD.NUM_FC 38 | fc_dim = cfg.MODEL.ROI_BOX_HEAD.FC_DIM 39 | norm = cfg.MODEL.ROI_BOX_HEAD.NORM 40 | # fmt: on 41 | assert num_conv + num_fc > 0 42 | 43 | self._output_size = (input_shape.channels, input_shape.height, input_shape.width) 44 | 45 | self.conv_norm_relus = [] 46 | for k in range(num_conv): 47 | conv = Conv2d( 48 | self._output_size[0], 49 | conv_dim, 50 | kernel_size=3, 51 | padding=1, 52 | bias=not norm, 53 | norm=get_norm(norm, conv_dim), 54 | activation=F.relu, 55 | ) 56 | self.add_module("conv{}".format(k + 1), conv) 57 | self.conv_norm_relus.append(conv) 58 | self._output_size = (conv_dim, self._output_size[1], self._output_size[2]) 59 | 60 | self.fcs = [] 61 | for k in range(num_fc): 62 | fc = nn.Linear(np.prod(self._output_size), fc_dim) 63 | self.add_module("fc{}".format(k + 1), fc) 64 | self.fcs.append(fc) 65 | self._output_size = fc_dim 66 | 67 | for layer in self.conv_norm_relus: 68 | weight_init.c2_msra_fill(layer) 69 | for layer in self.fcs: 70 | weight_init.c2_xavier_fill(layer) 71 | 72 | def forward(self, x): 73 | for layer in self.conv_norm_relus: 74 | x = layer(x) 75 | if len(self.fcs): 76 | if x.dim() > 2: 77 | x = torch.flatten(x, start_dim=1) 78 | for layer in self.fcs: 79 | x = F.relu(layer(x)) 80 | return x 81 | 82 | @property 83 | def output_size(self): 84 | return self._output_size 85 | -------------------------------------------------------------------------------- /cvpods/layers/csrc/vision_detectron.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | #include "ROIAlign/ROIAlign.h" 3 | #include "ROIAlignRotated/ROIAlignRotated.h" 4 | #include "box_iou_rotated/box_iou_rotated.h" 5 | #include "deformable/deform_conv.h" 6 | #include "nms_rotated/nms_rotated.h" 7 | #include "tree_filter/refine.hpp" 8 | #include "tree_filter/mst.hpp" 9 | #include "tree_filter/rst.hpp" 10 | #include "tree_filter/bfs.hpp" 11 | 12 | namespace detectron2 { 13 | 14 | // similar to 15 | // https://github.com/pytorch/pytorch/blob/master/aten/src/ATen/Version.cpp 16 | std::string get_compiler_version() { 17 | std::ostringstream ss; 18 | #if defined(__GNUC__) 19 | #ifndef __clang__ 20 | { ss << "GCC " << __GNUC__ << "." << __GNUC_MINOR__; } 21 | #endif 22 | #endif 23 | 24 | #if defined(__clang_major__) 25 | { 26 | ss << "clang " << __clang_major__ << "." << __clang_minor__ << "." 27 | << __clang_patchlevel__; 28 | } 29 | #endif 30 | 31 | #if defined(_MSC_VER) 32 | { ss << "MSVC " << _MSC_FULL_VER; } 33 | #endif 34 | return ss.str(); 35 | } 36 | 37 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 38 | m.def("get_compiler_version", &get_compiler_version, "get_compiler_version"); 39 | 40 | m.def("box_iou_rotated", &box_iou_rotated, "IoU for rotated boxes"); 41 | 42 | m.def("deform_conv_forward", &deform_conv_forward, "deform_conv_forward"); 43 | m.def( 44 | "deform_conv_backward_input", 45 | &deform_conv_backward_input, 46 | "deform_conv_backward_input"); 47 | m.def( 48 | "deform_conv_backward_filter", 49 | &deform_conv_backward_filter, 50 | "deform_conv_backward_filter"); 51 | m.def( 52 | "modulated_deform_conv_forward", 53 | &modulated_deform_conv_forward, 54 | "modulated_deform_conv_forward"); 55 | m.def( 56 | "modulated_deform_conv_backward", 57 | &modulated_deform_conv_backward, 58 | "modulated_deform_conv_backward"); 59 | 60 | m.def("nms_rotated", &nms_rotated, "NMS for rotated boxes"); 61 | 62 | m.def("roi_align_forward", &ROIAlign_forward, "ROIAlign_forward"); 63 | m.def("roi_align_backward", &ROIAlign_backward, "ROIAlign_backward"); 64 | 65 | m.def( 66 | "roi_align_rotated_forward", 67 | &ROIAlignRotated_forward, 68 | "Forward pass for Rotated ROI-Align Operator"); 69 | m.def( 70 | "roi_align_rotated_backward", 71 | &ROIAlignRotated_backward, 72 | "Backward pass for Rotated ROI-Align Operator"); 73 | 74 | m.def("rst_forward", &rst_forward, "rst forward"); 75 | m.def("mst_forward", &mst_forward, "mst forward"); 76 | m.def("bfs_forward", &bfs_forward, "bfs forward"); 77 | m.def("refine_forward", &refine_forward, "refine forward"); 78 | m.def("refine_backward_feature", &refine_backward_feature, "refine backward wrt feature"); 79 | m.def("refine_backward_edge_weight", &refine_backward_edge_weight, "refine backward wrt edge weight"); 80 | m.def("refine_backward_self_weight", &refine_backward_self_weight, "refine backward wrt self weight"); 81 | } 82 | 83 | } // namespace detectron2 84 | -------------------------------------------------------------------------------- /cvpods/layers/psroi_pool.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | from torch.autograd import Function 4 | 5 | from cvpods import _C 6 | 7 | 8 | class _PSROIPool(Function): 9 | @staticmethod 10 | def forward(ctx, features, rois, output_size, spatial_scale, group_size, output_dim): 11 | ctx.pooled_width = int(output_size[0]) 12 | ctx.pooled_height = int(output_size[1]) 13 | ctx.spatial_scale = float(spatial_scale) 14 | ctx.group_size = int(group_size) 15 | ctx.output_dim = int(output_dim) 16 | 17 | batch_size, num_channels, data_height, data_width = features.size() 18 | num_rois = rois.size()[0] 19 | mapping_channel = torch.zeros(num_rois, ctx.output_dim, 20 | ctx.pooled_height, ctx.pooled_width).int() 21 | mapping_channel = mapping_channel.to(features.device) 22 | output = _C.psroi_pooling_forward_cuda( 23 | features, rois, mapping_channel, 24 | ctx.pooled_height, ctx.pooled_width, 25 | ctx.spatial_scale, ctx.group_size, ctx.output_dim 26 | ) 27 | ctx.output = output 28 | ctx.mapping_channel = mapping_channel 29 | ctx.rois = rois 30 | ctx.feature_size = features.size() 31 | 32 | return output 33 | 34 | @staticmethod 35 | def backward(ctx, grad_output): 36 | assert(ctx.feature_size is not None and grad_output.is_cuda) 37 | 38 | batch_size, num_channels, data_height, data_width = ctx.feature_size 39 | 40 | grad_input = _C.psroi_pooling_backward_cuda( 41 | grad_output, ctx.rois, ctx.mapping_channel, 42 | batch_size, num_channels, data_height, data_width, 43 | ctx.spatial_scale 44 | # ctx.pooled_height, ctx.pooled_width, ctx.spatial_scale, ctx.output_dim 45 | ) 46 | return grad_input, None, None, None, None, None 47 | 48 | 49 | psroi_pool = _PSROIPool.apply 50 | 51 | 52 | class PSROIPool(nn.Module): 53 | def __init__(self, output_size, spatial_scale, group_size, output_dim): 54 | super(PSROIPool, self).__init__() 55 | self.output_size = output_size 56 | self.spatial_scale = spatial_scale 57 | self.group_size = group_size 58 | self.output_dim = output_dim 59 | 60 | def forward(self, input, rois): 61 | """ 62 | Args: 63 | input: NCHW images 64 | rois: Bx5 boxes. First column is the index into N. The other 4 columns are xyxy. 65 | """ 66 | assert rois.dim() == 2 and rois.size(1) == 5 67 | return psroi_pool( 68 | input, rois, self.output_size, self.spatial_scale, self.group_size, self.output_dim 69 | ) 70 | 71 | def __repr__(self): 72 | tmpstr = self.__class__.__name__ + "(" 73 | tmpstr += "output_size=" + str(self.output_size) 74 | tmpstr += ", spatial_scale=" + str(self.spatial_scale) 75 | tmpstr += ", group_size=" + str(self.group_size) 76 | tmpstr += ", output_dim=" + str(self.output_dim) 77 | tmpstr += ")" 78 | return tmpstr 79 | -------------------------------------------------------------------------------- /cvpods/utils/file/download.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 3 | 4 | import logging 5 | import os 6 | import shutil 7 | from typing import Callable, Optional 8 | from urllib import request 9 | 10 | 11 | def download( 12 | url: str, dir: str, *, filename: Optional[str] = None, progress: bool = True 13 | ) -> str: 14 | """ 15 | Download a file from a given URL to a directory. If file exists, will not 16 | overwrite the existing file. 17 | 18 | Args: 19 | url (str): 20 | dir (str): the directory to download the file 21 | filename (str or None): the basename to save the file. 22 | Will use the name in the URL if not given. 23 | progress (bool): whether to use tqdm to draw a progress bar. 24 | 25 | Returns: 26 | str: the path to the downloaded file or the existing one. 27 | """ 28 | os.makedirs(dir, exist_ok=True) 29 | if filename is None: 30 | filename = url.split("/")[-1] 31 | assert len(filename), "Cannot obtain filename from url {}".format(url) 32 | fpath = os.path.join(dir, filename) 33 | logger = logging.getLogger(__name__) 34 | 35 | if os.path.isfile(fpath): 36 | logger.info("File {} exists! Skipping download.".format(filename)) 37 | return fpath 38 | 39 | tmp = fpath + ".tmp" # download to a tmp file first, to be more atomic. 40 | try: 41 | logger.info("Downloading from {} ...".format(url)) 42 | if progress: 43 | import tqdm 44 | 45 | def hook(t: tqdm.tqdm) -> Callable[[int, int, Optional[int]], None]: 46 | last_b = [0] 47 | 48 | def inner( 49 | b: int, bsize: int, tsize: Optional[int] = None 50 | ) -> None: 51 | if tsize is not None: 52 | t.total = tsize 53 | t.update((b - last_b[0]) * bsize) # type: ignore 54 | last_b[0] = b 55 | 56 | return inner 57 | 58 | with tqdm.tqdm( # type: ignore 59 | unit="B", unit_scale=True, miniters=1, desc=filename, leave=True 60 | ) as t: 61 | tmp, _ = request.urlretrieve( 62 | url, filename=tmp, reporthook=hook(t) 63 | ) 64 | 65 | else: 66 | tmp, _ = request.urlretrieve(url, filename=tmp) 67 | statinfo = os.stat(tmp) 68 | size = statinfo.st_size 69 | if size == 0: 70 | raise IOError("Downloaded an empty file from {}!".format(url)) 71 | # download to tmp first and move to fpath, to make this function more 72 | # atomic. 73 | shutil.move(tmp, fpath) 74 | except IOError: 75 | logger.error("Failed to download {}".format(url)) 76 | raise 77 | finally: 78 | try: 79 | os.unlink(tmp) 80 | except IOError: 81 | pass 82 | 83 | logger.info( 84 | "Successfully downloaded " + fpath + ". " + str(size) + " bytes." 85 | ) 86 | return fpath 87 | -------------------------------------------------------------------------------- /cvpods/modeling/losses/sigmoid_focal_loss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | from torch.autograd import Function 4 | from torch.autograd.function import once_differentiable 5 | 6 | from cvpods import _C 7 | 8 | 9 | # TODO: Use JIT to replace CUDA implementation in the future. 10 | class _SigmoidFocalLoss(Function): 11 | 12 | @staticmethod 13 | def forward(ctx, logits, targets, gamma, alpha): 14 | """ 15 | Sigmoid Focal Loss forward func 16 | 17 | Args: 18 | ctx: 19 | logits (torch.Tensor): predicted logits 20 | targets (torch.Tensor): target logits 21 | gamma (float): focal loss gamma 22 | alpha (float): focal loss alpha 23 | """ 24 | ctx.save_for_backward(logits, targets) 25 | num_classes = logits.shape[1] 26 | ctx.num_classes = num_classes 27 | ctx.gamma = gamma 28 | ctx.alpha = alpha 29 | 30 | losses = _C.sigmoid_focalloss_forward( 31 | logits, targets, num_classes, gamma, alpha 32 | ) 33 | return losses 34 | 35 | @staticmethod 36 | @once_differentiable 37 | def backward(ctx, d_loss): 38 | logits, targets = ctx.saved_tensors 39 | num_classes = ctx.num_classes 40 | gamma = ctx.gamma 41 | alpha = ctx.alpha 42 | d_loss = d_loss.contiguous() 43 | d_logits = _C.sigmoid_focalloss_backward( 44 | logits, targets, d_loss, num_classes, gamma, alpha 45 | ) 46 | return d_logits, None, None, None, None 47 | 48 | 49 | sigmoid_focal_loss_cuda = _SigmoidFocalLoss.apply 50 | 51 | 52 | def sigmoid_focal_loss_cpu(logits, targets, gamma, alpha): 53 | """ 54 | Cpu version of Sigmoid Focal Loss, the same to :class:`_SigmoidFocalLoss`. 55 | 56 | """ 57 | num_classes = logits.shape[1] 58 | gamma = gamma[0] 59 | alpha = alpha[0] 60 | dtype = targets.dtype 61 | device = targets.device 62 | class_range = torch.arange(1, num_classes + 1, dtype=dtype, device=device).unsqueeze(0) 63 | 64 | t = targets.unsqueeze(1) 65 | p = torch.sigmoid(logits) 66 | term1 = (1 - p) ** gamma * torch.log(p) 67 | term2 = p ** gamma * torch.log(1 - p) 68 | return -(t == class_range).float() * term1 * alpha - \ 69 | ((t != class_range) * (t >= 0)).float() * term2 * (1 - alpha) 70 | 71 | 72 | class SigmoidFocalLoss(nn.Module): 73 | 74 | def __init__(self, gamma, alpha): 75 | super(SigmoidFocalLoss, self).__init__() 76 | self.gamma = gamma 77 | self.alpha = alpha 78 | 79 | def forward(self, logits, targets): 80 | # device = logits.device 81 | if logits.is_cuda: 82 | loss_func = sigmoid_focal_loss_cuda 83 | else: 84 | loss_func = sigmoid_focal_loss_cpu 85 | 86 | loss = loss_func(logits, targets, self.gamma, self.alpha) 87 | return loss.sum() 88 | 89 | def __repr__(self): 90 | tmpstr = self.__class__.__name__ + "(" 91 | tmpstr += "gamma=" + str(self.gamma) 92 | tmpstr += ", alpha=" + str(self.alpha) 93 | tmpstr += ")" 94 | return tmpstr 95 | -------------------------------------------------------------------------------- /cvpods/layers/csrc/lars/adaptive_lr.h: -------------------------------------------------------------------------------- 1 | #include "torch/extension.h" 2 | 3 | namespace cvpods { 4 | template 5 | void ComputeAdaptiveLrAfterTypeCheck( 6 | const scalar_t ¶m_norm, 7 | const scalar_t &grad_norm, 8 | const scalar_t weight_decay, 9 | const scalar_t eps, 10 | const scalar_t trust_coef, 11 | scalar_t *out) { 12 | // 1. The case that `param_norm` is `zero` means all elements of the parameter 13 | // are `zero` (In general, it occurs when right after the parameter initialized 14 | // as `zero`). In this case, `adaptive_lr` will be calculated as `zero`, which 15 | // may be the reason for breaking parameter updates. In this context, we construct 16 | // LARS to use only wrapped optimizer's algorithm when this situation occurs by 17 | // converting `adaptive_lr` to `one`. 18 | // 19 | // 2. The case that `grad_norm` is `zero` means all elements of the gradient are 20 | // `zero` (In general, it occurs when backward propagation doesn't work correctly). 21 | // In this case, it can be interpreted as there exists an exceptional situation, 22 | // which may result in inappropriate parameter updates. In this context, we 23 | // construct LARS to pass the responsibility of handling the exceptional case 24 | // to the wrapped optimizer when this exception occurs by converting `adaptive_lr` 25 | // to `one`. 26 | if (param_norm > 0 && grad_norm > 0) { 27 | scalar_t divisor = grad_norm + weight_decay * param_norm + eps; 28 | *out = param_norm / divisor * trust_coef; 29 | } else { 30 | *out = 1.0; 31 | } 32 | } 33 | 34 | // CUDA function interface 35 | void ComputeAdaptiveLrOnDevice( 36 | torch::Tensor param_norm, 37 | torch::Tensor grad_norm, 38 | double weight_decay, 39 | double eps, 40 | double trust_coef, 41 | torch::Tensor out); 42 | 43 | #define CHECK_CONTIGUOUS(x) AT_ASSERTM(x.is_contiguous(), #x " must be contiguous") 44 | #define CHECK_CPU(x) AT_ASSERTM(!x.type().is_cuda(), #x " must be a CPU tensor") 45 | 46 | torch::Tensor ComputeAdaptiveLr( 47 | torch::Tensor param_norm, 48 | torch::Tensor grad_norm, 49 | double weight_decay, 50 | double eps, 51 | double trust_coef, 52 | torch::Tensor out) { 53 | CHECK_CONTIGUOUS(param_norm); 54 | CHECK_CONTIGUOUS(grad_norm); 55 | CHECK_CONTIGUOUS(out); 56 | 57 | if (param_norm.type().is_cuda() && grad_norm.type().is_cuda()) { 58 | ComputeAdaptiveLrOnDevice( 59 | param_norm, 60 | grad_norm, 61 | weight_decay, 62 | eps, 63 | trust_coef, 64 | out); 65 | } else { 66 | CHECK_CPU(param_norm); 67 | CHECK_CPU(grad_norm); 68 | CHECK_CPU(out); 69 | 70 | AT_DISPATCH_FLOATING_TYPES_AND_HALF( 71 | param_norm.type(), 72 | "compute_adaptive_lr_cpu", 73 | ([&] { 74 | ComputeAdaptiveLrAfterTypeCheck( 75 | *param_norm.data(), 76 | *grad_norm.data(), 77 | weight_decay, 78 | eps, 79 | trust_coef, 80 | out.data()); 81 | })); 82 | } 83 | 84 | return out; 85 | } 86 | 87 | } 88 | -------------------------------------------------------------------------------- /cvpods/engine/predictor.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding:utf-8 -*- 3 | from copy import deepcopy 4 | 5 | import torch 6 | 7 | from cvpods.checkpoint import DetectionCheckpointer 8 | from cvpods.data import build_transform_gen 9 | 10 | __all__ = ["DefaultPredictor"] 11 | 12 | 13 | class DefaultPredictor: 14 | """ 15 | Create a simple end-to-end predictor with the given config that runs on 16 | single device for a single input image. 17 | Compared to using the model directly, this class does the following additions: 18 | 19 | 1. Load checkpoint from `cfg.MODEL.WEIGHTS`. 20 | 2. Always take BGR image as the input and apply conversion defined by `cfg.INPUT.FORMAT`. 21 | 3. Apply resizing defined by `cfg.INPUT.{MIN,MAX}_SIZE_TEST`. 22 | 4. Take one input image and produce a single output, instead of a batch. 23 | 24 | If you'd like to do anything more fancy, please refer to its source code 25 | as examples to build and use the model manually. 26 | 27 | Attributes: 28 | metadata (Metadata): the metadata of the underlying dataset, obtained from 29 | cfg.DATASETS.TEST. 30 | 31 | Examples: 32 | .. code-block:: python 33 | 34 | pred = DefaultPredictor(cfg) 35 | inputs = cv2.imread("input.jpg") 36 | outputs = pred(inputs) 37 | """ 38 | def __init__(self, cfg, meta): 39 | self.cfg = deepcopy(cfg) 40 | if self.cfg.MODEL.DEVICE.startswith("cuda:"): 41 | torch.cuda.set_device(self.cfg.MODEL.DEVICE) 42 | self.cfg.MODEL.DEVICE = "cuda" 43 | self.model = cfg.build_model(self.cfg) 44 | self.model.eval() 45 | self.metadata = meta 46 | 47 | checkpointer = DetectionCheckpointer(self.model) 48 | checkpointer.load(cfg.MODEL.WEIGHTS) 49 | 50 | self.transform_gen = build_transform_gen(cfg.INPUT.AUG.TEST_PIPELINES) 51 | 52 | self.input_format = cfg.INPUT.FORMAT 53 | assert self.input_format in ["RGB", "BGR"], self.input_format 54 | 55 | def __call__(self, original_image): 56 | """ 57 | Args: 58 | original_image (np.ndarray): an image of shape (H, W, C) (in BGR order). 59 | 60 | Returns: 61 | predictions (dict): 62 | the output of the model for one image only. 63 | See :doc:`/tutorials/models` for details about the format. 64 | """ 65 | with torch.no_grad( 66 | ): # https://github.com/sphinx-doc/sphinx/issues/4258 67 | # Apply pre-processing to image. 68 | if self.input_format == "RGB": 69 | # whether the model expects BGR inputs or RGB 70 | original_image = original_image[:, :, ::-1] 71 | height, width = original_image.shape[:2] 72 | 73 | image = original_image 74 | for tfm_gen in self.transform_gen: 75 | image = tfm_gen.get_transform(image).apply_image(image) 76 | 77 | image = torch.as_tensor(image.astype("float32").transpose(2, 0, 1)) 78 | 79 | inputs = {"image": image, "height": height, "width": width} 80 | predictions = self.model([inputs])[0] 81 | return predictions 82 | -------------------------------------------------------------------------------- /cvpods/layers/position_encoding.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | """ 3 | Various positional encodings for the transformer. 4 | """ 5 | import math 6 | 7 | import torch 8 | from torch import nn 9 | 10 | 11 | class PositionEmbeddingSine(nn.Module): 12 | """ 13 | This is a more standard version of the position embedding, very similar to the one 14 | used by the Attention is all you need paper, generalized to work on images. 15 | """ 16 | 17 | def __init__(self, num_pos_feats=64, temperature=10000, normalize=False, scale=None): 18 | super().__init__() 19 | self.num_pos_feats = num_pos_feats 20 | self.temperature = temperature 21 | self.normalize = normalize 22 | if scale is not None and normalize is False: 23 | raise ValueError("normalize should be True if scale is passed") 24 | if scale is None: 25 | scale = 2 * math.pi 26 | self.scale = scale 27 | 28 | def forward(self, x, mask): 29 | not_mask = ~mask 30 | y_embed = not_mask.cumsum(1, dtype=torch.float32) 31 | x_embed = not_mask.cumsum(2, dtype=torch.float32) 32 | 33 | if self.normalize: 34 | eps = 1e-6 35 | y_embed = y_embed / (y_embed[:, -1:, :] + eps) * self.scale 36 | x_embed = x_embed / (x_embed[:, :, -1:] + eps) * self.scale 37 | 38 | dim_t = torch.arange(self.num_pos_feats, dtype=torch.float32, device=x.device) 39 | dim_t = self.temperature ** (2 * (dim_t // 2) / self.num_pos_feats) 40 | 41 | pos_x = x_embed[:, :, :, None] / dim_t 42 | pos_y = y_embed[:, :, :, None] / dim_t 43 | pos_x = torch.stack( 44 | (pos_x[:, :, :, 0::2].sin(), pos_x[:, :, :, 1::2].cos()), dim=4 45 | ).flatten(3) 46 | pos_y = torch.stack( 47 | (pos_y[:, :, :, 0::2].sin(), pos_y[:, :, :, 1::2].cos()), dim=4 48 | ).flatten(3) 49 | pos = torch.cat((pos_y, pos_x), dim=3).permute(0, 3, 1, 2) 50 | return pos 51 | 52 | 53 | class PositionEmbeddingLearned(nn.Module): 54 | """ 55 | Absolute pos embedding, learned. 56 | """ 57 | 58 | def __init__(self, num_pos_feats=256): 59 | super().__init__() 60 | self.row_embed = nn.Embedding(50, num_pos_feats) 61 | self.col_embed = nn.Embedding(50, num_pos_feats) 62 | self.reset_parameters() 63 | 64 | def reset_parameters(self): 65 | nn.init.uniform_(self.row_embed.weight) 66 | nn.init.uniform_(self.col_embed.weight) 67 | 68 | def forward(self, tensor_list): 69 | x = tensor_list.tensors 70 | h, w = x.shape[-2:] 71 | i = torch.arange(w, device=x.device) 72 | j = torch.arange(h, device=x.device) 73 | x_emb = self.col_embed(i) 74 | y_emb = self.row_embed(j) 75 | pos = ( 76 | torch.cat( 77 | [x_emb.unsqueeze(0).repeat(h, 1, 1), y_emb.unsqueeze(1).repeat(1, w, 1)], dim=-1 78 | ) 79 | .permute(2, 0, 1) 80 | .unsqueeze(0) 81 | .repeat(x.shape[0], 1, 1, 1) 82 | ) 83 | return pos 84 | 85 | 86 | position_encoding_dict = {"sine": PositionEmbeddingSine, "learned": PositionEmbeddingLearned} 87 | -------------------------------------------------------------------------------- /cvpods/modeling/losses/smooth_l1_loss.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | import torch 3 | 4 | 5 | def smooth_l1_loss(input, 6 | target, 7 | beta: float, 8 | reduction: str = "none", 9 | size_average=False): 10 | """ 11 | Smooth L1 loss defined in the Fast R-CNN paper as: 12 | 13 | | 0.5 * x ** 2 / beta if abs(x) < beta 14 | smoothl1(x) = | 15 | | abs(x) - 0.5 * beta otherwise, 16 | 17 | where x = input - target. 18 | 19 | Smooth L1 loss is related to Huber loss, which is defined as: 20 | 21 | | 0.5 * x ** 2 if abs(x) < beta 22 | huber(x) = | 23 | | beta * (abs(x) - 0.5 * beta) otherwise 24 | 25 | Smooth L1 loss is equal to huber(x) / beta. This leads to the following 26 | differences: 27 | 28 | - As beta -> 0, Smooth L1 loss converges to L1 loss, while Huber loss 29 | converges to a constant 0 loss. 30 | - As beta -> +inf, Smooth L1 converges to a constant 0 loss, while Huber loss 31 | converges to L2 loss. 32 | - For Smooth L1 loss, as beta varies, the L1 segment of the loss has a constant 33 | slope of 1. For Huber loss, the slope of the L1 segment is beta. 34 | 35 | Smooth L1 loss can be seen as exactly L1 loss, but with the abs(x) < beta 36 | portion replaced with a quadratic function such that at abs(x) = beta, its 37 | slope is 1. The quadratic segment smooths the L1 loss near x = 0. 38 | 39 | Args: 40 | input (Tensor): input tensor of any shape 41 | target (Tensor): target value tensor with the same shape as input 42 | beta (float): L1 to L2 change point. 43 | For beta values < 1e-5, L1 loss is computed. 44 | reduction: 'none' | 'mean' | 'sum' 45 | 'none': No reduction will be applied to the output. 46 | 'mean': The output will be averaged. 47 | 'sum': The output will be summed. 48 | 49 | Returns: 50 | The loss with the reduction option applied. 51 | 52 | Note: 53 | PyTorch's builtin "Smooth L1 loss" implementation does not actually 54 | implement Smooth L1 loss, nor does it implement Huber loss. It implements 55 | the special case of both in which they are equal (beta=1). 56 | See: https://pytorch.org/docs/stable/nn.html#torch.nn.SmoothL1Loss. 57 | """ 58 | if beta < 1e-5: 59 | # if beta == 0, then torch.where will result in nan gradients when 60 | # the chain rule is applied due to pytorch implementation details 61 | # (the False branch "0.5 * n ** 2 / 0" has an incoming gradient of 62 | # zeros, rather than "no gradient"). To avoid this issue, we define 63 | # small values of beta to be exactly l1 loss. 64 | loss = torch.abs(input - target) 65 | else: 66 | n = torch.abs(input - target) 67 | cond = n < beta 68 | loss = torch.where(cond, 0.5 * n**2 / beta, n - 0.5 * beta) 69 | 70 | if reduction == "mean" or size_average: 71 | loss = loss.mean() 72 | elif reduction == "sum": 73 | loss = loss.sum() 74 | 75 | return loss 76 | -------------------------------------------------------------------------------- /cvpods/solver/scheduler_builder.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | # -*- coding:utf-8 -*- 3 | 4 | from torch.optim import lr_scheduler 5 | 6 | from cvpods.utils.registry import Registry 7 | 8 | from .lr_scheduler import PolyLR, WarmupCosineLR, WarmupMultiStepLR 9 | 10 | SCHEDULER_BUILDER = Registry("LRScheduler builder") 11 | 12 | 13 | @SCHEDULER_BUILDER.register() 14 | class BaseSchedulerBuilder: 15 | 16 | @staticmethod 17 | def build(optimizer, cfg): 18 | raise NotImplementedError 19 | 20 | 21 | @SCHEDULER_BUILDER.register() 22 | class WarmupMultiStepLRBuilder(BaseSchedulerBuilder): 23 | 24 | @staticmethod 25 | def build(optimizer, cfg): 26 | scheduler = WarmupMultiStepLR( 27 | optimizer, 28 | cfg.SOLVER.LR_SCHEDULER.STEPS, 29 | cfg.SOLVER.LR_SCHEDULER.GAMMA, 30 | warmup_factor=cfg.SOLVER.LR_SCHEDULER.WARMUP_FACTOR, 31 | warmup_iters=cfg.SOLVER.LR_SCHEDULER.WARMUP_ITERS, 32 | warmup_method=cfg.SOLVER.LR_SCHEDULER.WARMUP_METHOD, 33 | ) 34 | return scheduler 35 | 36 | 37 | @SCHEDULER_BUILDER.register() 38 | class WarmupCosineLRBuilder(BaseSchedulerBuilder): 39 | 40 | @staticmethod 41 | def build(optimizer, cfg): 42 | max_iter = cfg.SOLVER.LR_SCHEDULER.MAX_ITER 43 | max_epoch = cfg.SOLVER.LR_SCHEDULER.MAX_EPOCH 44 | iters_per_epoch = None if max_epoch is None else max_iter // max_epoch 45 | 46 | scheduler = WarmupCosineLR( 47 | optimizer, 48 | cfg.SOLVER.LR_SCHEDULER.MAX_ITER, 49 | warmup_factor=cfg.SOLVER.LR_SCHEDULER.WARMUP_FACTOR, 50 | warmup_iters=cfg.SOLVER.LR_SCHEDULER.WARMUP_ITERS, 51 | warmup_method=cfg.SOLVER.LR_SCHEDULER.WARMUP_METHOD, 52 | iters_per_epoch=iters_per_epoch, 53 | ) 54 | return scheduler 55 | 56 | 57 | @SCHEDULER_BUILDER.register() 58 | class PolyLRBuilder(BaseSchedulerBuilder): 59 | 60 | @staticmethod 61 | def build(optimizer, cfg): 62 | return PolyLR( 63 | optimizer, 64 | cfg.SOLVER.LR_SCHEDULER.MAX_ITER, 65 | cfg.SOLVER.LR_SCHEDULER.POLY_POWER, 66 | warmup_factor=cfg.SOLVER.LR_SCHEDULER.WARMUP_FACTOR, 67 | warmup_iters=cfg.SOLVER.LR_SCHEDULER.WARMUP_ITERS, 68 | warmup_method=cfg.SOLVER.LR_SCHEDULER.WARMUP_METHOD, 69 | ) 70 | 71 | 72 | @SCHEDULER_BUILDER.register() 73 | class LambdaLRBuilder(BaseSchedulerBuilder): 74 | 75 | @staticmethod 76 | def build(optimizer, cfg): 77 | return lr_scheduler.LambdaLR( 78 | optimizer, 79 | cfg.SOLVER.LR_SCHEDULER.LAMBDA_SCHEDULE 80 | ) 81 | 82 | 83 | @SCHEDULER_BUILDER.register() 84 | class OneCycleLRBuilder(BaseSchedulerBuilder): 85 | 86 | @staticmethod 87 | def build(optimizer, cfg): 88 | return lr_scheduler.OneCycleLR( 89 | optimizer, 90 | cfg.SOLVER.LR_SCHEDULER.MAX_LR, 91 | total_steps=cfg.SOLVER.LR_SCHEDULER.MAX_ITER, 92 | pct_start=cfg.SOLVER.LR_SCHEDULER.PCT_START, 93 | base_momentum=cfg.SOLVER.LR_SCHEDULER.BASE_MOM, 94 | max_momentum=cfg.SOLVER.LR_SCHEDULER.MAX_MOM, 95 | div_factor=cfg.SOLVER.LR_SCHEDULER.DIV_FACTOR 96 | ) 97 | -------------------------------------------------------------------------------- /cvpods/layers/csrc/ROIAlignRotated/ROIAlignRotated.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | #pragma once 3 | #include 4 | 5 | namespace cvpods { 6 | 7 | at::Tensor ROIAlignRotated_forward_cpu( 8 | const at::Tensor& input, 9 | const at::Tensor& rois, 10 | const float spatial_scale, 11 | const int pooled_height, 12 | const int pooled_width, 13 | const int sampling_ratio); 14 | 15 | at::Tensor ROIAlignRotated_backward_cpu( 16 | const at::Tensor& grad, 17 | const at::Tensor& rois, 18 | const float spatial_scale, 19 | const int pooled_height, 20 | const int pooled_width, 21 | const int batch_size, 22 | const int channels, 23 | const int height, 24 | const int width, 25 | const int sampling_ratio); 26 | 27 | #ifdef WITH_CUDA 28 | at::Tensor ROIAlignRotated_forward_cuda( 29 | const at::Tensor& input, 30 | const at::Tensor& rois, 31 | const float spatial_scale, 32 | const int pooled_height, 33 | const int pooled_width, 34 | const int sampling_ratio); 35 | 36 | at::Tensor ROIAlignRotated_backward_cuda( 37 | const at::Tensor& grad, 38 | const at::Tensor& rois, 39 | const float spatial_scale, 40 | const int pooled_height, 41 | const int pooled_width, 42 | const int batch_size, 43 | const int channels, 44 | const int height, 45 | const int width, 46 | const int sampling_ratio); 47 | #endif 48 | 49 | // Interface for Python 50 | inline at::Tensor ROIAlignRotated_forward( 51 | const at::Tensor& input, 52 | const at::Tensor& rois, 53 | const float spatial_scale, 54 | const int pooled_height, 55 | const int pooled_width, 56 | const int sampling_ratio) { 57 | if (input.type().is_cuda()) { 58 | #ifdef WITH_CUDA 59 | return ROIAlignRotated_forward_cuda( 60 | input, 61 | rois, 62 | spatial_scale, 63 | pooled_height, 64 | pooled_width, 65 | sampling_ratio); 66 | #else 67 | AT_ERROR("Not compiled with GPU support"); 68 | #endif 69 | } 70 | return ROIAlignRotated_forward_cpu( 71 | input, rois, spatial_scale, pooled_height, pooled_width, sampling_ratio); 72 | } 73 | 74 | inline at::Tensor ROIAlignRotated_backward( 75 | const at::Tensor& grad, 76 | const at::Tensor& rois, 77 | const float spatial_scale, 78 | const int pooled_height, 79 | const int pooled_width, 80 | const int batch_size, 81 | const int channels, 82 | const int height, 83 | const int width, 84 | const int sampling_ratio) { 85 | if (grad.type().is_cuda()) { 86 | #ifdef WITH_CUDA 87 | return ROIAlignRotated_backward_cuda( 88 | grad, 89 | rois, 90 | spatial_scale, 91 | pooled_height, 92 | pooled_width, 93 | batch_size, 94 | channels, 95 | height, 96 | width, 97 | sampling_ratio); 98 | #else 99 | AT_ERROR("Not compiled with GPU support"); 100 | #endif 101 | } 102 | return ROIAlignRotated_backward_cpu( 103 | grad, 104 | rois, 105 | spatial_scale, 106 | pooled_height, 107 | pooled_width, 108 | batch_size, 109 | channels, 110 | height, 111 | width, 112 | sampling_ratio); 113 | } 114 | 115 | } // namespace cvpods 116 | -------------------------------------------------------------------------------- /cvpods/modeling/postprocessing.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | from torch.nn import functional as F 3 | 4 | from cvpods.layers import paste_masks_in_image 5 | from cvpods.structures import Instances 6 | 7 | 8 | def detector_postprocess(results, output_height, output_width, mask_threshold=0.5): 9 | """ 10 | Resize the output instances. 11 | The input images are often resized when entering an object detector. 12 | As a result, we often need the outputs of the detector in a different 13 | resolution from its inputs. 14 | 15 | This function will resize the raw outputs of an R-CNN detector 16 | to produce outputs according to the desired output resolution. 17 | 18 | Args: 19 | results (Instances): the raw outputs from the detector. 20 | `results.image_size` contains the input image resolution the detector sees. 21 | This object might be modified in-place. 22 | output_height, output_width: the desired output resolution. 23 | 24 | Returns: 25 | Instances: the resized output from the model, based on the output resolution 26 | """ 27 | scale_x, scale_y = (output_width / results.image_size[1], output_height / results.image_size[0]) 28 | results = Instances((output_height, output_width), **results.get_fields()) 29 | 30 | if results.has("pred_boxes"): 31 | output_boxes = results.pred_boxes 32 | elif results.has("proposal_boxes"): 33 | output_boxes = results.proposal_boxes 34 | 35 | output_boxes.scale(scale_x, scale_y) 36 | output_boxes.clip(results.image_size) 37 | 38 | results = results[output_boxes.nonempty()] 39 | 40 | if results.has("pred_masks"): 41 | results.pred_masks = paste_masks_in_image( 42 | results.pred_masks[:, 0, :, :], # N, 1, M, M 43 | results.pred_boxes, 44 | results.image_size, 45 | threshold=mask_threshold, 46 | ) 47 | 48 | if results.has("pred_keypoints"): 49 | results.pred_keypoints[:, :, 0] *= scale_x 50 | results.pred_keypoints[:, :, 1] *= scale_y 51 | 52 | return results 53 | 54 | 55 | def sem_seg_postprocess(result, img_size, output_height, output_width): 56 | """ 57 | Return semantic segmentation predictions in the original resolution. 58 | 59 | The input images are often resized when entering semantic segmentor. Moreover, in same 60 | cases, they also padded inside segmentor to be divisible by maximum network stride. 61 | As a result, we often need the predictions of the segmentor in a different 62 | resolution from its inputs. 63 | 64 | Args: 65 | result (Tensor): semantic segmentation prediction logits. A tensor of shape (C, H, W), 66 | where C is the number of classes, and H, W are the height and width of the prediction. 67 | img_size (tuple): image size that segmentor is taking as input. 68 | output_height, output_width: the desired output resolution. 69 | 70 | Returns: 71 | semantic segmentation prediction (Tensor): A tensor of the shape 72 | (C, output_height, output_width) that contains per-pixel soft predictions. 73 | """ 74 | result = result[:, : img_size[0], : img_size[1]].expand(1, -1, -1, -1) 75 | result = F.interpolate( 76 | result, size=(output_height, output_width), mode="bilinear", align_corners=False 77 | )[0] 78 | return result 79 | -------------------------------------------------------------------------------- /cvpods/layers/roi_align_rotated.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | from torch import nn 3 | from torch.autograd import Function 4 | from torch.autograd.function import once_differentiable 5 | from torch.nn.modules.utils import _pair 6 | 7 | from cvpods import _C 8 | 9 | 10 | class _ROIAlignRotated(Function): 11 | @staticmethod 12 | def forward(ctx, input, roi, output_size, spatial_scale, sampling_ratio): 13 | ctx.save_for_backward(roi) 14 | ctx.output_size = _pair(output_size) 15 | ctx.spatial_scale = spatial_scale 16 | ctx.sampling_ratio = sampling_ratio 17 | ctx.input_shape = input.size() 18 | output = _C.roi_align_rotated_forward( 19 | input, roi, spatial_scale, output_size[0], output_size[1], sampling_ratio 20 | ) 21 | return output 22 | 23 | @staticmethod 24 | @once_differentiable 25 | def backward(ctx, grad_output): 26 | rois, = ctx.saved_tensors 27 | output_size = ctx.output_size 28 | spatial_scale = ctx.spatial_scale 29 | sampling_ratio = ctx.sampling_ratio 30 | bs, ch, h, w = ctx.input_shape 31 | grad_input = _C.roi_align_rotated_backward( 32 | grad_output, 33 | rois, 34 | spatial_scale, 35 | output_size[0], 36 | output_size[1], 37 | bs, 38 | ch, 39 | h, 40 | w, 41 | sampling_ratio, 42 | ) 43 | return grad_input, None, None, None, None, None 44 | 45 | 46 | roi_align_rotated = _ROIAlignRotated.apply 47 | 48 | 49 | class ROIAlignRotated(nn.Module): 50 | def __init__(self, output_size, spatial_scale, sampling_ratio): 51 | """ 52 | Args: 53 | output_size (tuple): h, w 54 | spatial_scale (float): scale the input boxes by this number 55 | sampling_ratio (int): number of inputs samples to take for each output 56 | sample. 0 to take samples densely. 57 | 58 | Note: 59 | ROIAlignRotated supports continuous coordinate by default: 60 | Given a continuous coordinate c, its two neighboring pixel indices (in our 61 | pixel model) are computed by floor(c - 0.5) and ceil(c - 0.5). For example, 62 | c=1.3 has pixel neighbors with discrete indices [0] and [1] (which are sampled 63 | from the underlying signal at continuous coordinates 0.5 and 1.5). 64 | """ 65 | super(ROIAlignRotated, self).__init__() 66 | self.output_size = output_size 67 | self.spatial_scale = spatial_scale 68 | self.sampling_ratio = sampling_ratio 69 | 70 | def forward(self, input, rois): 71 | """ 72 | Args: 73 | input: NCHW images 74 | rois: Bx6 boxes. First column is the index into N. 75 | The other 5 columns are (x_ctr, y_ctr, width, height, angle_degrees). 76 | """ 77 | assert rois.dim() == 2 and rois.size(1) == 6 78 | return roi_align_rotated( 79 | input, rois, self.output_size, self.spatial_scale, self.sampling_ratio 80 | ) 81 | 82 | def __repr__(self): 83 | tmpstr = self.__class__.__name__ + "(" 84 | tmpstr += "output_size=" + str(self.output_size) 85 | tmpstr += ", spatial_scale=" + str(self.spatial_scale) 86 | tmpstr += ", sampling_ratio=" + str(self.sampling_ratio) 87 | tmpstr += ")" 88 | return tmpstr 89 | --------------------------------------------------------------------------------