├── .gitattributes ├── Debug └── CfgDefaults.yaml ├── Detectron2 ├── Base.cpp ├── Base.h ├── CfgDefaults.yaml ├── Data │ ├── BuiltinDataset.cpp │ ├── BuiltinDataset.h │ ├── BuiltinMeta.cpp │ ├── BuiltinMeta.h │ ├── MetadataCatalog.cpp │ ├── MetadataCatalog.h │ ├── ResizeShortestEdge.cpp │ ├── ResizeShortestEdge.h │ ├── ResizeTransform.cpp │ ├── ResizeTransform.h │ ├── Transform.cpp │ ├── Transform.h │ ├── TransformGen.cpp │ └── TransformGen.h ├── Detectron2.cpp ├── Detectron2.h ├── Detectron2.vcxproj ├── Detectron2.vcxproj.filters ├── Detectron2.vcxproj.user ├── Detectron2Includes.h ├── Import │ ├── Baseline │ │ ├── model_final_997cc7.cpp │ │ ├── model_final_a3ec72.cpp │ │ ├── model_final_cafdb1.cpp │ │ ├── model_final_f10217.cpp │ │ └── model_final_f6e8b1.cpp │ ├── ImportBaseline.py │ ├── ModelImporter.cpp │ └── ModelImporter.h ├── LICENSE ├── MetaArch │ ├── GeneralizedRCNN.cpp │ ├── GeneralizedRCNN.h │ ├── MetaArch.cpp │ ├── MetaArch.h │ ├── PanopticFPN.cpp │ ├── PanopticFPN.h │ ├── ProposalNetwork.cpp │ ├── ProposalNetwork.h │ ├── SemanticSegmentor.cpp │ └── SemanticSegmentor.h ├── Modules │ ├── Backbone.h │ ├── BatchNorm │ │ ├── BatchNorm.cpp │ │ ├── BatchNorm.h │ │ ├── BatchNorm2d.h │ │ ├── FrozenBatchNorm2d.cpp │ │ ├── FrozenBatchNorm2d.h │ │ ├── GroupNorm.h │ │ ├── NaiveSyncBatchNorm.cpp │ │ └── NaiveSyncBatchNorm.h │ ├── Conv │ │ ├── ConvBn2d.cpp │ │ ├── ConvBn2d.h │ │ ├── DeformConv.cpp │ │ ├── DeformConv.h │ │ ├── ModulatedDeformConv.cpp │ │ └── ModulatedDeformConv.h │ ├── FPN │ │ ├── FPN.cpp │ │ ├── FPN.h │ │ ├── LastLevelMaxPool.cpp │ │ ├── LastLevelMaxPool.h │ │ ├── LastLevelP6P7.cpp │ │ ├── LastLevelP6P7.h │ │ ├── SemSegFPNHead.cpp │ │ ├── SemSegFPNHead.h │ │ └── TopBlock.h │ ├── Opeartors │ │ ├── DeformConvOp.cpp │ │ ├── DeformConvOp.h │ │ ├── ModulatedDeformConvOp.cpp │ │ ├── ModulatedDeformConvOp.h │ │ ├── NewEmptyTensorOp.cpp │ │ └── NewEmptyTensorOp.h │ ├── ROIHeads │ │ ├── BaseKeypointRCNNHead.cpp │ │ ├── BaseKeypointRCNNHead.h │ │ ├── BaseMaskRCNNHead.cpp │ │ ├── BaseMaskRCNNHead.h │ │ ├── CascadeROIHeads.cpp │ │ ├── CascadeROIHeads.h │ │ ├── FastRCNNConvFCHead.cpp │ │ ├── FastRCNNConvFCHead.h │ │ ├── FastRCNNOutputLayers.cpp │ │ ├── FastRCNNOutputLayers.h │ │ ├── FastRCNNOutputs.cpp │ │ ├── FastRCNNOutputs.h │ │ ├── KRCNNConvDeconvUpsampleHead.cpp │ │ ├── KRCNNConvDeconvUpsampleHead.h │ │ ├── MaskRCNNConvUpsampleHead.cpp │ │ ├── MaskRCNNConvUpsampleHead.h │ │ ├── ROIHeads.cpp │ │ ├── ROIHeads.h │ │ ├── RROIHeads.cpp │ │ ├── RROIHeads.h │ │ ├── Res5ROIHeads.cpp │ │ ├── Res5ROIHeads.h │ │ ├── RotatedFastRCNNOutputLayers.cpp │ │ ├── RotatedFastRCNNOutputLayers.h │ │ ├── StandardROIHeads.cpp │ │ └── StandardROIHeads.h │ ├── ROIPooler │ │ ├── ROIAlign.cpp │ │ ├── ROIAlign.h │ │ ├── ROIAlignRotated.cpp │ │ ├── ROIAlignRotated.h │ │ ├── ROIPool.cpp │ │ ├── ROIPool.h │ │ ├── ROIPooler.cpp │ │ ├── ROIPooler.h │ │ └── ROIPoolerLevel.h │ ├── RPN │ │ ├── AnchorGenerator.cpp │ │ ├── AnchorGenerator.h │ │ ├── DefaultAnchorGenerator.cpp │ │ ├── DefaultAnchorGenerator.h │ │ ├── RPN.cpp │ │ ├── RPN.h │ │ ├── RPNOutputs.cpp │ │ ├── RPNOutputs.h │ │ ├── RRPN.cpp │ │ ├── RRPN.h │ │ ├── RotatedAnchorGenerator.cpp │ │ ├── RotatedAnchorGenerator.h │ │ ├── StandardRPNHead.cpp │ │ └── StandardRPNHead.h │ └── ResNet │ │ ├── BasicBlock.cpp │ │ ├── BasicBlock.h │ │ ├── BasicStem.cpp │ │ ├── BasicStem.h │ │ ├── BottleneckBlock.cpp │ │ ├── BottleneckBlock.h │ │ ├── CNNBlockBase.cpp │ │ ├── CNNBlockBase.h │ │ ├── DeformBottleneckBlock.cpp │ │ ├── DeformBottleneckBlock.h │ │ ├── ResNet.cpp │ │ └── ResNet.h ├── Structures │ ├── BitMasks.cpp │ ├── BitMasks.h │ ├── Box2BoxTransform.cpp │ ├── Box2BoxTransform.h │ ├── Boxes.cpp │ ├── Boxes.h │ ├── GenericMask.cpp │ ├── GenericMask.h │ ├── ImageList.cpp │ ├── ImageList.h │ ├── Instances.cpp │ ├── Instances.h │ ├── Keypoints.cpp │ ├── Keypoints.h │ ├── MaskOps.cpp │ ├── MaskOps.h │ ├── Masks.h │ ├── Matcher.cpp │ ├── Matcher.h │ ├── NMS.cpp │ ├── NMS.h │ ├── PanopticSegment.cpp │ ├── PanopticSegment.h │ ├── PolygonMasks.cpp │ ├── PolygonMasks.h │ ├── PostProcessing.cpp │ ├── PostProcessing.h │ ├── RotatedBoxes.cpp │ ├── RotatedBoxes.h │ ├── Sampling.cpp │ ├── Sampling.h │ ├── Sequence.cpp │ ├── Sequence.h │ ├── ShapeSpec.cpp │ └── ShapeSpec.h ├── Utils │ ├── AsyncPredictor.cpp │ ├── AsyncPredictor.h │ ├── Canvas.h │ ├── CfgNode.cpp │ ├── CfgNode.h │ ├── DefaultPredictor.cpp │ ├── DefaultPredictor.h │ ├── DefaultTrainer.cpp │ ├── DefaultTrainer.h │ ├── EventStorage.cpp │ ├── EventStorage.h │ ├── File.cpp │ ├── File.h │ ├── Predictor.h │ ├── Timer.cpp │ ├── Timer.h │ ├── TrainerBase.h │ ├── Utils.cpp │ ├── Utils.h │ ├── VideoAnalyzer.cpp │ ├── VideoAnalyzer.h │ ├── VideoVisualizer.cpp │ ├── VideoVisualizer.h │ ├── VisColor.cpp │ ├── VisColor.h │ ├── VisImage.cpp │ ├── VisImage.h │ ├── Visualizer.cpp │ ├── Visualizer.h │ ├── cvCanvas.cpp │ ├── cvCanvas.h │ ├── utils_train.cpp │ └── utils_train.hpp ├── VisualizationDemo.cpp ├── VisualizationDemo.h ├── coco │ ├── 1 │ │ ├── data.cpp │ │ └── data.hpp │ ├── data.cpp │ ├── data.hpp │ ├── json.hpp │ ├── mask.cpp │ ├── mask.h │ ├── maskApi.c │ └── maskApi.h ├── detectron2 │ ├── ROIAlign │ │ ├── ROIAlign.h │ │ ├── ROIAlign_cpu.cpp │ │ └── ROIAlign_cuda.cu │ ├── ROIAlignRotated │ │ ├── ROIAlignRotated.h │ │ ├── ROIAlignRotated_cpu.cpp │ │ └── ROIAlignRotated_cuda.cu │ ├── ROIPool │ │ ├── ROIPool.h │ │ ├── ROIPool_cpu.cpp │ │ └── ROIPool_cuda.cu │ ├── box_iou_rotated │ │ ├── box_iou_rotated.h │ │ ├── box_iou_rotated_cpu.cpp │ │ ├── box_iou_rotated_cuda.cu │ │ └── box_iou_rotated_utils.h │ ├── cuda_version.cu │ ├── deformable │ │ ├── deform_conv.h │ │ ├── deform_conv_cuda.cu │ │ └── deform_conv_cuda_kernel.cu │ ├── nms │ │ ├── cuda_helpers.h │ │ ├── nms.h │ │ ├── nms_cpu.cpp │ │ └── nms_cuda.cu │ ├── nms_rotated │ │ ├── nms_rotated.h │ │ ├── nms_rotated_cpu.cpp │ │ └── nms_rotated_cuda.cu │ └── vision.cpp ├── fvcore │ ├── config.cpp │ ├── config.h │ ├── fvcore.cpp │ ├── fvcore.h │ ├── yacs.cpp │ └── yacs.h ├── trainDemo.cpp └── trainDemo.h ├── Detectron2_Project.sln ├── Detectron2_Project.vcxproj ├── Detectron2_Project.vcxproj.filters ├── Detectron2_Project.vcxproj.user ├── Detectron2_test.cpp ├── Detectron2_train.cpp ├── NetLib2 ├── NetLib2.vcxproj ├── NetLib2.vcxproj.filters └── NetLib2.vcxproj.user ├── README.md ├── configs ├── Base-RCNN-C4.yaml ├── Base-RCNN-DilatedC5.yaml ├── Base-RCNN-FPN.yaml ├── Base-RetinaNet.yaml ├── COCO-Detection │ ├── fast_rcnn_R_50_FPN_1x.yaml │ ├── faster_rcnn_R_101_C4_3x.yaml │ ├── faster_rcnn_R_101_DC5_3x.yaml │ ├── faster_rcnn_R_101_FPN_3x.yaml │ ├── faster_rcnn_R_50_C4_1x.yaml │ ├── faster_rcnn_R_50_C4_3x.yaml │ ├── faster_rcnn_R_50_DC5_1x.yaml │ ├── faster_rcnn_R_50_DC5_3x.yaml │ ├── faster_rcnn_R_50_FPN_1x.yaml │ ├── faster_rcnn_R_50_FPN_3x.yaml │ ├── faster_rcnn_X_101_32x8d_FPN_3x.yaml │ ├── retinanet_R_101_FPN_3x.yaml │ ├── retinanet_R_50_FPN_1x.yaml │ ├── retinanet_R_50_FPN_3x.yaml │ ├── rpn_R_50_C4_1x.yaml │ └── rpn_R_50_FPN_1x.yaml ├── COCO-InstanceSegmentation │ ├── mask_rcnn_R_101_C4_3x.yaml │ ├── mask_rcnn_R_101_DC5_3x.yaml │ ├── mask_rcnn_R_101_FPN_3x.yaml │ ├── mask_rcnn_R_50_C4_1x.yaml │ ├── mask_rcnn_R_50_C4_3x.yaml │ ├── mask_rcnn_R_50_DC5_1x.yaml │ ├── mask_rcnn_R_50_DC5_3x.yaml │ ├── mask_rcnn_R_50_FPN_1x.yaml │ ├── mask_rcnn_R_50_FPN_1x_giou.yaml │ ├── mask_rcnn_R_50_FPN_3x.yaml │ └── mask_rcnn_X_101_32x8d_FPN_3x.yaml ├── COCO-Keypoints │ ├── Base-Keypoint-RCNN-FPN.yaml │ ├── keypoint_rcnn_R_101_FPN_3x.yaml │ ├── keypoint_rcnn_R_50_FPN_1x.yaml │ ├── keypoint_rcnn_R_50_FPN_3x.yaml │ └── keypoint_rcnn_X_101_32x8d_FPN_3x.yaml ├── COCO-PanopticSegmentation │ ├── Base-Panoptic-FPN.yaml │ ├── panoptic_fpn_R_101_3x.yaml │ ├── panoptic_fpn_R_50_1x.yaml │ └── panoptic_fpn_R_50_3x.yaml ├── Cityscapes │ └── mask_rcnn_R_50_FPN.yaml ├── Detectron1-Comparisons │ ├── faster_rcnn_R_50_FPN_noaug_1x.yaml │ ├── keypoint_rcnn_R_50_FPN_1x.yaml │ └── mask_rcnn_R_50_FPN_noaug_1x.yaml ├── LVISv0.5-InstanceSegmentation │ ├── mask_rcnn_R_101_FPN_1x.yaml │ ├── mask_rcnn_R_50_FPN_1x.yaml │ └── mask_rcnn_X_101_32x8d_FPN_1x.yaml ├── LVISv1-InstanceSegmentation │ ├── mask_rcnn_R_101_FPN_1x.yaml │ ├── mask_rcnn_R_50_FPN_1x.yaml │ └── mask_rcnn_X_101_32x8d_FPN_1x.yaml ├── Misc │ ├── cascade_mask_rcnn_R_50_FPN_1x.yaml │ ├── cascade_mask_rcnn_R_50_FPN_3x.yaml │ ├── cascade_mask_rcnn_X_152_32x8d_FPN_IN5k_gn_dconv.yaml │ ├── mask_rcnn_R_50_FPN_1x_cls_agnostic.yaml │ ├── mask_rcnn_R_50_FPN_1x_dconv_c3-c5.yaml │ ├── mask_rcnn_R_50_FPN_3x_dconv_c3-c5.yaml │ ├── mask_rcnn_R_50_FPN_3x_gn.yaml │ ├── mask_rcnn_R_50_FPN_3x_syncbn.yaml │ ├── panoptic_fpn_R_101_dconv_cascade_gn_3x.yaml │ ├── scratch_mask_rcnn_R_50_FPN_3x_gn.yaml │ ├── scratch_mask_rcnn_R_50_FPN_9x_gn.yaml │ ├── scratch_mask_rcnn_R_50_FPN_9x_syncbn.yaml │ └── semantic_R_50_FPN_1x.yaml ├── PascalVOC-Detection │ ├── faster_rcnn_R_50_C4.yaml │ └── faster_rcnn_R_50_FPN.yaml └── quick_schedules │ ├── cascade_mask_rcnn_R_50_FPN_inference_acc_test.yaml │ ├── cascade_mask_rcnn_R_50_FPN_instant_test.yaml │ ├── fast_rcnn_R_50_FPN_inference_acc_test.yaml │ ├── fast_rcnn_R_50_FPN_instant_test.yaml │ ├── keypoint_rcnn_R_50_FPN_inference_acc_test.yaml │ ├── keypoint_rcnn_R_50_FPN_instant_test.yaml │ ├── keypoint_rcnn_R_50_FPN_normalized_training_acc_test.yaml │ ├── keypoint_rcnn_R_50_FPN_training_acc_test.yaml │ ├── mask_rcnn_R_50_C4_GCV_instant_test.yaml │ ├── mask_rcnn_R_50_C4_inference_acc_test.yaml │ ├── mask_rcnn_R_50_C4_instant_test.yaml │ ├── mask_rcnn_R_50_C4_training_acc_test.yaml │ ├── mask_rcnn_R_50_DC5_inference_acc_test.yaml │ ├── mask_rcnn_R_50_FPN_inference_acc_test.yaml │ ├── mask_rcnn_R_50_FPN_instant_test.yaml │ ├── mask_rcnn_R_50_FPN_pred_boxes_training_acc_test.yaml │ ├── mask_rcnn_R_50_FPN_training_acc_test.yaml │ ├── panoptic_fpn_R_50_inference_acc_test.yaml │ ├── panoptic_fpn_R_50_instant_test.yaml │ ├── panoptic_fpn_R_50_training_acc_test.yaml │ ├── retinanet_R_50_FPN_inference_acc_test.yaml │ ├── retinanet_R_50_FPN_instant_test.yaml │ ├── rpn_R_50_FPN_inference_acc_test.yaml │ ├── rpn_R_50_FPN_instant_test.yaml │ ├── semantic_R_50_FPN_inference_acc_test.yaml │ ├── semantic_R_50_FPN_instant_test.yaml │ └── semantic_R_50_FPN_training_acc_test.yaml └── output ├── result.jpg └── weixin.jpg /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | -------------------------------------------------------------------------------- /Detectron2/Base.cpp: -------------------------------------------------------------------------------- 1 | #include "Base.h" 2 | -------------------------------------------------------------------------------- /Detectron2/Base.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | #include 6 | #include 7 | #include 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | 26 | #include 27 | -------------------------------------------------------------------------------- /Detectron2/Data/BuiltinMeta.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "MetadataCatalog.h" 4 | 5 | namespace Detectron2 6 | { 7 | /////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 8 | // converted from data/datasets/builtin_meta.py 9 | 10 | class BuiltinMeta { 11 | public: 12 | static Metadata _get_builtin_metadata(const std::string &dataset_name); 13 | 14 | private: 15 | static void _get_coco_instances_meta(Metadata &metadata); 16 | 17 | // Returns metadata for "separated" version of the panoptic segmentation dataset. 18 | static void _get_coco_panoptic_separated_meta(Metadata &metadata); 19 | 20 | static void _get_coco_person_meta(Metadata &metadata); 21 | static void _get_cityscapes_meta(Metadata &metadata); 22 | }; 23 | } 24 | -------------------------------------------------------------------------------- /Detectron2/Data/MetadataCatalog.cpp: -------------------------------------------------------------------------------- 1 | #include "Base.h" 2 | #include "MetadataCatalog.h" 3 | 4 | using namespace std; 5 | using namespace Detectron2; 6 | 7 | /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 8 | 9 | static unordered_map _NAME_TO_META = {}; 10 | 11 | Metadata MetadataCatalog::get(const std::string &name) { 12 | assert(!name.empty()); 13 | auto iter = _NAME_TO_META.find(name); 14 | if (iter != _NAME_TO_META.end()) { 15 | return iter->second; 16 | } 17 | else { 18 | auto m = make_shared(); 19 | m->name = name; 20 | _NAME_TO_META[name] = m; 21 | return m; 22 | } 23 | } 24 | 25 | std::vector MetadataCatalog::list() { 26 | std::vector ret; 27 | ret.reserve(_NAME_TO_META.size()); 28 | for (auto iter : _NAME_TO_META) { 29 | ret.push_back(iter.first); 30 | } 31 | return ret; 32 | } 33 | -------------------------------------------------------------------------------- /Detectron2/Data/ResizeShortestEdge.cpp: -------------------------------------------------------------------------------- 1 | #include "Base.h" 2 | #include "ResizeShortestEdge.h" 3 | 4 | #include "ResizeTransform.h" 5 | 6 | using namespace std; 7 | using namespace torch; 8 | using namespace Detectron2; 9 | 10 | /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 11 | 12 | ResizeShortestEdge::ResizeShortestEdge(int short_edge_length, int64_t max_size, const std::string &sample_style, 13 | Transform::Interp interp) : 14 | ResizeShortestEdge({ short_edge_length, short_edge_length }, max_size, sample_style, interp) { 15 | } 16 | 17 | ResizeShortestEdge::ResizeShortestEdge(const std::vector &short_edge_length, int64_t max_size, 18 | const std::string &sample_style, Transform::Interp interp) : 19 | m_short_edge_length(short_edge_length), 20 | m_max_size(max_size), 21 | m_is_range(sample_style == "range"), 22 | m_interp(interp) 23 | { 24 | assert(sample_style == "range" || sample_style == "choice"); 25 | } 26 | 27 | std::shared_ptr ResizeShortestEdge::get_transform(torch::Tensor img) { 28 | auto h = img.size(0); 29 | auto w = img.size(1); 30 | 31 | int64_t size; 32 | if (m_is_range) { 33 | size = torch::randint(m_short_edge_length[0], m_short_edge_length[1] + 1, 1).item(); 34 | } 35 | else { 36 | size = m_short_edge_length[torch::randint(0, m_short_edge_length.size(), 1).item()]; 37 | } 38 | if (size == 0) { 39 | return make_shared(); 40 | } 41 | 42 | auto scale_h = (float)size / h; 43 | auto scale_w = (float)size / w; 44 | float newh, neww; 45 | newh = scale_h * h; 46 | neww = scale_w * w; 47 | return make_shared(h, w, int(newh + 0.5), int(neww + 0.5), m_interp); 48 | } 49 | -------------------------------------------------------------------------------- /Detectron2/Data/ResizeShortestEdge.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "TransformGen.h" 4 | 5 | namespace Detectron2 6 | { 7 | /////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 8 | // converted from data/transform/transform_gen.py 9 | 10 | /** 11 | Scale the shorter edge to the given size, with a limit of `max_size` on the longer edge. 12 | If `max_size` is reached, then downscale so that the longer edge does not exceed max_size. 13 | */ 14 | class ResizeShortestEdge : public TransformGen { 15 | public: 16 | /** 17 | short_edge_length (list[int]): If ``sample_style=="range"``, 18 | a [min, max] interval from which to sample the shortest edge length. 19 | If ``sample_style=="choice"``, a list of shortest edge lengths to sample from. 20 | max_size (int): maximum allowed longest edge length. 21 | sample_style (str): either "range" or "choice". 22 | */ 23 | ResizeShortestEdge(int short_edge_length, int64_t max_size = INT64_MAX, 24 | const std::string &sample_style = "range", Transform::Interp interp = Transform::kBILINEAR); 25 | ResizeShortestEdge(const std::vector &short_edge_length, int64_t max_size = INT64_MAX, 26 | const std::string &sample_style = "range", Transform::Interp interp = Transform::kBILINEAR); 27 | 28 | virtual std::shared_ptr get_transform(torch::Tensor img) override; 29 | 30 | private: 31 | std::vector m_short_edge_length; 32 | int64_t m_max_size; 33 | std::string m_sample_style; 34 | bool m_is_range; 35 | Transform::Interp m_interp; 36 | }; 37 | } 38 | -------------------------------------------------------------------------------- /Detectron2/Data/ResizeTransform.cpp: -------------------------------------------------------------------------------- 1 | #include "Base.h" 2 | #include "ResizeTransform.h" 3 | 4 | #include 5 | 6 | using namespace std; 7 | using namespace torch; 8 | using namespace Detectron2; 9 | 10 | /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 11 | 12 | ResizeTransform::ResizeTransform(int h, int w, int new_h, int new_w, Interp interp) : 13 | m_h(h), m_w(w), m_new_h(new_h), m_new_w(new_w), m_interp(interp) { 14 | } 15 | 16 | torch::Tensor ResizeTransform::apply_image(torch::Tensor img, Interp interp) { 17 | assert(img.size(0) == m_h && img.size(1) == m_w); 18 | assert(img.dim() <= 4); 19 | if (interp == kNone) { // doh' original code didn't do this when img.dtype() != torch::kUInt8 20 | interp = m_interp; 21 | } 22 | 23 | if (img.dtype() == torch::kUInt8) { 24 | cv::Mat mimg = image_to_mat(img); 25 | cv::resize(mimg, mimg, { m_new_w, m_new_h }, 0.0, 0.0, interp); 26 | img = image_to_tensor(mimg); 27 | } 28 | else { 29 | auto shape = torch::tensor(img.sizes()); 30 | auto shape_4d = shape.index({ Slice(None, 2) }) + torch::tensor({ 1 }) * (4 - shape.size(0)) + 31 | shape.index({ Slice(2, None) }); 32 | img = img.view(vectorize(shape_4d)).permute({ 2, 3, 0, 1 }); // hw(c) -> nchw 33 | 34 | auto options = nn::functional::InterpolateFuncOptions() 35 | .size(vector{ m_new_h, m_new_w }).align_corners(false); 36 | switch (interp) { 37 | case kNEAREST: options.mode(torch::kNearest); break; 38 | case kBILINEAR: options.mode(torch::kBilinear); break; 39 | case kBICUBIC: options.mode(torch::kBicubic); break; 40 | default: assert(false); break; 41 | } 42 | img = nn::functional::interpolate(img, options); 43 | shape.index_put_({ Slice(None, 2) }, torch::tensor({ m_new_h, m_new_w })); 44 | img = img.permute({ 2, 3, 0, 1 }).view(vectorize(shape)); // nchw -> hw(c) 45 | } 46 | return img; 47 | } 48 | 49 | torch::Tensor ResizeTransform::apply_coords(torch::Tensor coords) { 50 | coords.index_put_({ Colon, 0 }, coords.index({ Colon, 0 }) * ((float)m_new_w / m_w)); 51 | coords.index_put_({ Colon, 1 }, coords.index({ Colon, 1 }) * ((float)m_new_h / m_h)); 52 | return coords; 53 | } 54 | 55 | torch::Tensor ResizeTransform::apply_segmentation(torch::Tensor segmentation) { 56 | return apply_image(segmentation, kNEAREST); 57 | } 58 | 59 | std::shared_ptr ResizeTransform::inverse() { 60 | return make_shared(m_new_h, m_new_w, m_h, m_w, m_interp); 61 | } 62 | -------------------------------------------------------------------------------- /Detectron2/Data/ResizeTransform.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "Transform.h" 4 | 5 | namespace Detectron2 6 | { 7 | /////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 8 | // converted from data/transform/transform.py 9 | 10 | /** 11 | Resize the image to a target size. 12 | */ 13 | class ResizeTransform : public Transform { 14 | public: 15 | /** 16 | h, w (int): original image size 17 | new_h, new_w (int): new image size 18 | interp: PIL interpolation methods, defaults to bilinear. 19 | */ 20 | ResizeTransform(int h, int w, int new_h, int new_w, Interp interp = kBILINEAR); 21 | 22 | virtual torch::Tensor apply_image(torch::Tensor img, Interp interp = kNone) override; 23 | virtual torch::Tensor apply_coords(torch::Tensor coords) override; 24 | virtual torch::Tensor apply_segmentation(torch::Tensor segmentation) override; 25 | virtual std::shared_ptr inverse() override; 26 | 27 | private: 28 | int m_h; 29 | int m_w; 30 | int m_new_h; 31 | int m_new_w; 32 | Interp m_interp; 33 | }; 34 | } 35 | -------------------------------------------------------------------------------- /Detectron2/Data/Transform.cpp: -------------------------------------------------------------------------------- 1 | #include "Base.h" 2 | #include "Transform.h" 3 | 4 | using namespace std; 5 | using namespace torch; 6 | using namespace Detectron2; 7 | 8 | /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 9 | 10 | void Transform::_set_attributes(const std::unordered_map ¶ms) { 11 | for (auto iter : params) { 12 | auto &key = iter.first; 13 | if (key != "self" && key.find('_') != 0) { 14 | m_attrs[key] = iter.second; 15 | } 16 | } 17 | } 18 | 19 | torch::Tensor Transform::apply_box(torch::Tensor box) { 20 | // Indexes of converting (x0, y0, x1, y1) box into 4 coordinates of 21 | // ([x0, y0], [x1, y0], [x0, y1], [x1, y1]). 22 | Tensor idxs = torch::tensor(vector{ 0, 1, 2, 1, 0, 3, 2, 3 }); 23 | auto coords = box.reshape({ -1, 4 }).index({ Colon, idxs }).reshape({ -1, 2 }); 24 | coords = apply_coords(coords).reshape({ -1, 4, 2 }); 25 | auto minxy = coords.min_values(1); 26 | auto maxxy = coords.max_values(1); 27 | auto trans_boxes = torch::cat({ minxy, maxxy }, 1); 28 | return trans_boxes; 29 | } 30 | 31 | TensorVec Transform::apply_polygons(const TensorVec &polygons) { 32 | TensorVec ret; 33 | ret.reserve(polygons.size()); 34 | for (auto &p : polygons) { 35 | ret.push_back(apply_coords(p)); 36 | } 37 | return ret; 38 | } 39 | 40 | std::shared_ptr Transform::inverse() { 41 | assert(false); 42 | return nullptr; 43 | } 44 | 45 | std::string Transform::repr() const { 46 | assert(false); 47 | return ""; 48 | } 49 | 50 | -------------------------------------------------------------------------------- /Detectron2/Data/TransformGen.cpp: -------------------------------------------------------------------------------- 1 | #include "Base.h" 2 | #include "TransformGen.h" 3 | 4 | using namespace std; 5 | using namespace torch; 6 | using namespace Detectron2; 7 | 8 | /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 9 | 10 | torch::Tensor TransformGen::_rand_range(double low, double *high, IntArrayRef size) { 11 | if (high == nullptr) { 12 | *high = low; 13 | low = 0; 14 | } 15 | return torch::rand(size).uniform_(low, *high); 16 | } 17 | 18 | /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 19 | 20 | TransformGen::TransformGen(const std::unordered_map ¶ms) { 21 | for (auto iter : params) { 22 | auto &key = iter.first; 23 | if (key != "self" && key.find('_') != 0) { 24 | m_attrs[key] = iter.second; 25 | } 26 | } 27 | } 28 | 29 | std::string TransformGen::repr() const { 30 | assert(false); 31 | return ""; 32 | } 33 | -------------------------------------------------------------------------------- /Detectron2/Data/TransformGen.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "Transform.h" 4 | 5 | namespace Detectron2 6 | { 7 | /////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 8 | // converted from data/transform/transform_gen.py 9 | 10 | /** 11 | TransformGen takes an image of type uint8 in range [0, 255], or 12 | floating point in range [0, 1] or [0, 255] as input. 13 | 14 | It creates a :class:`Transform` based on the given image, sometimes with randomness. 15 | The transform can then be used to transform images 16 | or other data (boxes, points, annotations, etc.) associated with it. 17 | 18 | The assumption made in this class 19 | is that the image itself is sufficient to instantiate a transform. 20 | When this assumption is not true, you need to create the transforms by your own. 21 | 22 | A list of `TransformGen` can be applied with :func:`apply_transform_gens`. 23 | */ 24 | class TransformGen { 25 | public: 26 | TransformGen(const std::unordered_map ¶ms = {}); 27 | virtual ~TransformGen() {} 28 | 29 | virtual std::shared_ptr get_transform(torch::Tensor img) = 0; 30 | 31 | /** 32 | Produce something like: 33 | "MyTransformGen(field1={self.field1}, field2={self.field2})" 34 | */ 35 | std::string repr() const; 36 | std::string str() const { 37 | return repr(); 38 | } 39 | 40 | protected: 41 | std::unordered_map m_attrs; 42 | 43 | // Uniform float random number between low and high. 44 | static torch::Tensor _rand_range(double low = 1.0, double *high = nullptr, torch::IntArrayRef size = {}); 45 | }; 46 | } 47 | -------------------------------------------------------------------------------- /Detectron2/Detectron2.cpp: -------------------------------------------------------------------------------- 1 | #include "Base.h" 2 | #include "Detectron2.h" 3 | 4 | using namespace std; 5 | using namespace torch; 6 | using namespace Detectron2; 7 | 8 | /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 9 | 10 | bool Detectron2::cudaEnabled() { 11 | return torch::cuda::is_available(); 12 | } 13 | 14 | void Detectron2::retry_if_cuda_oom(std::function func) { 15 | //~! we're not doing any retry yet 16 | func(); 17 | } 18 | 19 | int Detectron2::IntLog2(int exp) { 20 | int n = 0; 21 | while (exp > 1) { 22 | assert(exp % 2 == 0); 23 | exp /= 2; 24 | n++; 25 | } 26 | assert(exp == 1); 27 | return n; 28 | } 29 | 30 | int Detectron2::IntExp2(int n) { 31 | int result = 1; 32 | for (int i = 0; i < n; i++) { 33 | result *= 2; 34 | } 35 | return result; 36 | } 37 | 38 | std::string Detectron2::FormatString(const char *fmt, int d) { 39 | char buf[256]; 40 | snprintf(buf, sizeof(buf), fmt, d); 41 | return buf; 42 | } 43 | 44 | std::string Detectron2::FormatString(const char *fmt, double f) { 45 | char buf[256]; 46 | snprintf(buf, sizeof(buf), fmt, f); 47 | return buf; 48 | } 49 | 50 | torch::Tensor Detectron2::slice_range(int64_t start, int64_t end, int64_t step) { 51 | vector range; 52 | range.reserve((end - start) / step + 1); 53 | for (int64_t i = start; i < end; i += step) { 54 | range.push_back(i); 55 | } 56 | return torch::tensor(range); 57 | } 58 | 59 | std::vector Detectron2::vectorize(const torch::Tensor &t) { 60 | assert(t.dim() == 1); 61 | vector ret; 62 | ret.reserve(t.numel()); 63 | for (int i = 0; i < t.numel(); i++) { 64 | ret.push_back(t[i].item()); 65 | } 66 | return ret; 67 | } 68 | 69 | torch::Tensor Detectron2::tapply(const torch::Tensor &src, function fx) { 70 | TensorVec ret; 71 | int count = src.size(0); 72 | ret.reserve(count); 73 | for (int i = 0; i < count; i++) { 74 | ret.push_back(fx(src[i])); 75 | } 76 | return torch::cat(ret); 77 | } 78 | -------------------------------------------------------------------------------- /Detectron2/Detectron2.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | -------------------------------------------------------------------------------- /Detectron2/Detectron2Includes.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include -------------------------------------------------------------------------------- /Detectron2/Import/ImportBaseline.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | import os 3 | import pickle 4 | import re 5 | import sys 6 | from numpy import array 7 | 8 | # Example: python ImportBaseline.py model_final_997cc7 9 | # modelName = sys.argv[1] 10 | modelName = 'model_final_f6e8b1' 11 | 12 | # checkpoints = os.getenv('D2_CHECKPOINTS_DIR') + '\\' 13 | checkpoints = 'D:\\libtorch\\detectron2_project\\Detectron2\\Import\\' 14 | 15 | fcpp = open(os.getcwd() + '\\Baseline\\' + modelName + '.cpp', 'w') 16 | fdataFileName = checkpoints + modelName + '.data' 17 | fdata = open(fdataFileName, 'wb') 18 | loaded = pickle.load(open(checkpoints + modelName + '.pkl', 'rb')) 19 | model = loaded["model"] 20 | 21 | fcpp.write('#include "Base.h"\n') 22 | fcpp.write('#include \n') 23 | fcpp.write('\n') 24 | fcpp.write('using namespace Detectron2;\n') 25 | fcpp.write('\n') 26 | fcpp.write('/' * 119) 27 | fcpp.write('\n') 28 | fcpp.write('\n') 29 | 30 | fcpp.write('std::string ModelImporter::import_' + modelName + '() {\n') 31 | offset = 0 32 | num = 0 33 | for key in model: 34 | m_key = key[0:18] 35 | m_key_1 = key[0:9] 36 | m_key_box_predictor = key[0:23] 37 | 38 | data = model[key] 39 | shape = data.shape 40 | numel = data.size 41 | data = data.reshape([numel]) 42 | 43 | 44 | if (m_key_box_predictor == 'roi_heads.box_predictor'): 45 | continue 46 | # if (m_key_1 == 'roi_heads'): 47 | # continue 48 | # if (m_key == 'proposal_generator') : 49 | # continue 50 | fcpp.write('\tAdd("' + key + '", ' + str(numel) + '); // ' + str(offset) + '\n') 51 | fdata.write(data.tobytes()) 52 | offset += numel * 4 53 | assert fdata.tell() == offset, "{} != {}".format(fdata.tell(), offset) 54 | num = num+1 55 | 56 | fcpp.write('\n') 57 | fcpp.write('\treturn DataDir() + "\\\\' + modelName + '.data";\n') 58 | fcpp.write('}\n') 59 | -------------------------------------------------------------------------------- /Detectron2/Import/ModelImporter.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | namespace Detectron2 7 | { 8 | class ModelImporter { 9 | public: 10 | enum Model { 11 | kNone, 12 | kDemo, // COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml 13 | 14 | // Root: https://github.com/facebookresearch/detectron2/tree/master/configs 15 | kCOCODetection, // COCO-Detection/faster_rcnn_R_101_FPN_3x.yaml 16 | kCOCOKeypoints, // COCO-Keypoints/keypoint_rcnn_R_101_FPN_3x.yaml 17 | kCOCOInstanceSegmentation, // COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml 18 | kCOCOPanopticSegmentation // COCO-PanopticSegmentation/panoptic_fpn_R_101_3x.yaml 19 | }; 20 | static Model FilenameToModel(const std::string &filename); 21 | 22 | enum Fill { 23 | kNoFill, 24 | kZeroFill, 25 | kConstantFill, 26 | kNormalFill2, 27 | kNormalFill3, 28 | kXavierNormalFill, 29 | kCaffe2XavierFill, 30 | kCaffe2MSRAFill, 31 | kCaffe2MSRAFillIn 32 | }; 33 | 34 | static void FillTensor(torch::Tensor x, Fill fill); 35 | 36 | static std::string DataDir(); 37 | 38 | public: 39 | ModelImporter(Model model); 40 | ModelImporter(const std::string &filename); 41 | 42 | bool HasData() const { return m_fdata.get() != nullptr; } 43 | 44 | void Import(const std::string &name, torch::nn::Conv2d &conv, Fill fill) const; 45 | void Import(const std::string &name, torch::nn::ConvTranspose2d &conv, Fill fill) const; 46 | void Import(const std::string &name, torch::nn::Linear &fc, Fill fill) const; 47 | 48 | void Initialize(const std::string &name, torch::Tensor &tensor) const; 49 | 50 | int ReportUnimported(const std::string &prefix = "") const; 51 | 52 | private: 53 | // implemented in generated files by ImportBaseline.py 54 | std::string import_model_final_f10217(); 55 | std::string import_model_final_f6e8b1(); 56 | std::string import_model_final_a3ec72(); 57 | std::string import_model_final_997cc7(); 58 | std::string import_model_final_cafdb1(); 59 | 60 | std::unordered_map> m_sections; 61 | int m_size; 62 | void Add(const char *name, int count); 63 | 64 | std::string m_fullpath; 65 | std::shared_ptr m_fdata; 66 | 67 | mutable std::unordered_set m_imported; 68 | }; 69 | } -------------------------------------------------------------------------------- /Detectron2/MetaArch/ProposalNetwork.cpp: -------------------------------------------------------------------------------- 1 | #include "Base.h" 2 | #include "ProposalNetwork.h" 3 | 4 | using namespace std; 5 | using namespace torch; 6 | using namespace Detectron2; 7 | 8 | /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 9 | 10 | ProposalNetworkImpl::ProposalNetworkImpl(CfgNode &cfg) : MetaArchImpl(cfg) { 11 | } 12 | 13 | std::tuple ProposalNetworkImpl::forward( 14 | const std::vector &batched_inputs) { 15 | auto images = preprocess_image(batched_inputs, m_backbone->size_divisibility()); 16 | auto features = m_backbone(images.tensor()); 17 | 18 | InstancesList gt_instances = get_gt_instances(batched_inputs); 19 | 20 | InstancesList proposals; TensorMap proposal_losses; 21 | tie(proposals, proposal_losses) = m_proposal_generator(images, features, gt_instances); 22 | 23 | // In training, the proposals are not useful at all but we generate them anyway. 24 | // This makes RPN-only models about 5% slower. 25 | if (is_training()) { 26 | return { InstancesList{}, proposal_losses }; 27 | } 28 | 29 | return { _postprocess(proposals, batched_inputs, images.image_sizes()), {} }; 30 | } -------------------------------------------------------------------------------- /Detectron2/MetaArch/ProposalNetwork.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "MetaArch.h" 4 | #include 5 | #include 6 | 7 | namespace Detectron2 8 | { 9 | /////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 10 | // converted from modeling/meta_arch/rcnn.py 11 | 12 | // A meta architecture that only predicts object proposals. 13 | class ProposalNetworkImpl : public MetaArchImpl { 14 | public: 15 | ProposalNetworkImpl(CfgNode &cfg); 16 | 17 | /** 18 | Args: 19 | Same as in :class:`GeneralizedRCNN.forward` 20 | 21 | Returns: 22 | list[dict]: 23 | Each dict is the output for one input image. 24 | The dict contains one key "proposals" whose value is a 25 | :class:`Instances` with keys "proposal_boxes" and "objectness_logits". 26 | */ 27 | virtual std::tuple 28 | forward(const std::vector &batched_inputs) override; 29 | }; 30 | TORCH_MODULE(ProposalNetwork); 31 | } -------------------------------------------------------------------------------- /Detectron2/MetaArch/SemanticSegmentor.cpp: -------------------------------------------------------------------------------- 1 | #include "Base.h" 2 | #include "SemanticSegmentor.h" 3 | 4 | #include 5 | 6 | using namespace std; 7 | using namespace torch; 8 | using namespace Detectron2; 9 | 10 | /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 11 | 12 | SemanticSegmentorImpl::SemanticSegmentorImpl(CfgNode &cfg) : MetaArchImpl(cfg) { 13 | m_sem_seg_head = make_shared(cfg, m_backbone->output_shapes()); 14 | register_module("sem_seg_head", m_sem_seg_head); 15 | } 16 | 17 | void SemanticSegmentorImpl::initialize(const ModelImporter &importer, const std::string &prefix) { 18 | MetaArchImpl::initialize(importer, prefix); 19 | m_sem_seg_head->initialize(importer, "sem_seg_head"); 20 | } 21 | 22 | std::tuple SemanticSegmentorImpl::forward( 23 | const std::vector &batched_inputs) { 24 | auto images = preprocess_image(batched_inputs, m_backbone->size_divisibility()); 25 | auto features = m_backbone(images.tensor()); 26 | 27 | auto gt_sem_seg = get_gt_sem_seg(batched_inputs, m_sem_seg_head->ignore_value()); 28 | Tensor results; 29 | TensorMap losses; 30 | tie(results, losses) = m_sem_seg_head(features, gt_sem_seg); 31 | 32 | if (is_training()) { 33 | return { InstancesList{}, losses }; 34 | } 35 | 36 | int count = batched_inputs.size(); 37 | assert(results.size(0) == count); 38 | auto &image_sizes = images.image_sizes(); 39 | assert(image_sizes.size() == count); 40 | 41 | InstancesList processed_results; 42 | for (int i = 0; i < count; i++) { 43 | auto result = results[i]; 44 | auto &input_per_image = batched_inputs[i]; 45 | auto &image_size = image_sizes[i]; 46 | 47 | int height = input_per_image.height ? *input_per_image.height : image_size.height; 48 | int width = input_per_image.width ? *input_per_image.width : image_size.width; 49 | 50 | auto sem_seg_r = PostProcessing::sem_seg_postprocess(result, image_size, height, width); 51 | auto m = make_shared(ImageSize{ height, width }); 52 | m->set("sem_seg", sem_seg_r); 53 | processed_results.push_back(m); 54 | } 55 | return { processed_results, {} }; 56 | } 57 | -------------------------------------------------------------------------------- /Detectron2/MetaArch/SemanticSegmentor.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "MetaArch.h" 4 | #include 5 | 6 | namespace Detectron2 7 | { 8 | /////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 9 | // converted from modeling/meta_arch/semantic_seg.py 10 | 11 | // semantic segmentation heads, which make semantic segmentation predictions from feature maps. 12 | // Main class for semantic segmentation architectures. 13 | class SemanticSegmentorImpl : public MetaArchImpl { 14 | public: 15 | SemanticSegmentorImpl(CfgNode &cfg); 16 | 17 | virtual void initialize(const ModelImporter &importer, const std::string &prefix) override; 18 | 19 | /** 20 | Args: 21 | batched_inputs: a list, batched outputs of :class:`DatasetMapper`. 22 | Each item in the list contains the inputs for one image. 23 | 24 | For now, each item in the list is a dict that contains: 25 | 26 | * "image": Tensor, image in (C, H, W) format. 27 | * "sem_seg": semantic segmentation ground truth 28 | * Other information that's included in the original dicts, such as: 29 | "height", "width" (int): the output resolution of the model, used in inference. 30 | See :meth:`postprocess` for details. 31 | 32 | Returns: 33 | list[dict]: 34 | Each dict is the output for one input image. 35 | The dict contains one key "sem_seg" whose value is a 36 | Tensor that represents the 37 | per-pixel segmentation prediced by the head. 38 | The prediction has shape KxHxW that represents the logits of 39 | each class for each pixel. 40 | */ 41 | virtual std::tuple 42 | forward(const std::vector &batched_inputs) override; 43 | 44 | private: 45 | SemSegFPNHead m_sem_seg_head{ nullptr }; 46 | }; 47 | TORCH_MODULE(SemanticSegmentor); 48 | } -------------------------------------------------------------------------------- /Detectron2/Modules/Backbone.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | namespace Detectron2 6 | { 7 | /////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 8 | // converted from modeling/backbone/backbone.py 9 | 10 | // Abstract base class for network backbones. 11 | class BackboneImpl : public torch::nn::Module { 12 | public: 13 | virtual ~BackboneImpl() {} 14 | 15 | const ShapeSpec::Map &output_shapes() const { 16 | return m_output_shapes; 17 | } 18 | 19 | virtual void initialize(const ModelImporter &importer, const std::string &prefix) = 0; 20 | 21 | /** 22 | Subclasses must override this method, but adhere to the same return type. 23 | 24 | Returns: 25 | dict[str->Tensor]: mapping from feature name (e.g., "res2") to tensor 26 | */ 27 | virtual TensorMap forward(torch::Tensor x) = 0; 28 | 29 | /** 30 | Some backbones require the input height and width to be divisible by a 31 | specific integer. This is typically true for encoder / decoder type networks 32 | with lateral connection (e.g., FPN) for which feature maps need to match 33 | dimension in the "bottom up" and "top down" paths. Set to 0 if no specific 34 | input size divisibility is required. 35 | */ 36 | virtual int size_divisibility() { 37 | return 0; 38 | } 39 | 40 | protected: 41 | ShapeSpec::Map m_output_shapes; 42 | }; 43 | TORCH_MODULE(Backbone); 44 | } -------------------------------------------------------------------------------- /Detectron2/Modules/BatchNorm/BatchNorm.cpp: -------------------------------------------------------------------------------- 1 | #include "Base.h" 2 | #include "BatchNorm.h" 3 | 4 | #include "BatchNorm2d.h" 5 | #include "FrozenBatchNorm2d.h" 6 | #include "GroupNorm.h" 7 | #include "NaiveSyncBatchNorm.h" 8 | 9 | using namespace std; 10 | using namespace torch; 11 | using namespace Detectron2; 12 | 13 | /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 14 | 15 | BatchNorm::Type BatchNorm::GetType(const std::string &name) { 16 | static map lookup_table{ 17 | { "", kNone }, 18 | { "BN", kBN }, 19 | { "SyncBN", kSyncBN }, 20 | { "FrozenBN", kFrozenBN }, 21 | { "GN", kGN }, 22 | { "nnSyncBN", nnSyncBN }, 23 | { "naiveSyncBN", naiveSyncBN } 24 | }; 25 | auto iter = lookup_table.find(name); 26 | assert(iter != lookup_table.end()); 27 | return iter->second; 28 | } 29 | 30 | /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 31 | 32 | BatchNorm::BatchNorm(BatchNorm::Type type, int out_channels) { 33 | switch (type) { 34 | case kNone: break; 35 | case kBN: reset(new BatchNorm2dImpl(out_channels)); break; 36 | case kFrozenBN: reset(new FrozenBatchNorm2dImpl(out_channels)); break; 37 | case naiveSyncBN: reset(new NaiveSyncBatchNormImpl(out_channels)); break; 38 | case kGN: reset(new GroupNormImpl(nn::GroupNormOptions(32, out_channels))); break; 39 | 40 | case kSyncBN: //"SyncBN": NaiveSyncBatchNorm if TORCH_VERSION <= (1, 5) else nn.SyncBatchNorm, 41 | case nnSyncBN: // return nn.SyncBatchNorm(out_channels); 42 | default: 43 | assert(false); 44 | } 45 | } 46 | 47 | void BatchNormImpl::initialize(const ModelImporter &importer, const std::string &prefix, ModelImporter::Fill fill) { 48 | if (importer.HasData()) { 49 | importer.Initialize(prefix + ".weight", get_weight()); 50 | importer.Initialize(prefix + ".bias", get_bias()); 51 | if (get_running_mean()) { 52 | importer.Initialize(prefix + ".running_mean", *get_running_mean()); 53 | importer.Initialize(prefix + ".running_var", *get_running_var()); 54 | } 55 | } 56 | else { 57 | ModelImporter::FillTensor(get_weight(), fill); 58 | ModelImporter::FillTensor(get_bias(), ModelImporter::kZeroFill); 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /Detectron2/Modules/BatchNorm/BatchNorm.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | namespace Detectron2 6 | { 7 | /////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 8 | // converted from layers/batch_norm.py 9 | 10 | class BatchNormImpl { 11 | public: 12 | virtual ~BatchNormImpl() {} 13 | 14 | virtual torch::Tensor &get_weight() = 0; 15 | virtual torch::Tensor &get_bias() = 0; 16 | virtual torch::Tensor *get_running_mean() = 0; 17 | virtual torch::Tensor *get_running_var() = 0; 18 | 19 | virtual void initialize(const ModelImporter &importer, const std::string &prefix, ModelImporter::Fill fill); 20 | virtual torch::Tensor forward(torch::Tensor x) = 0; 21 | }; 22 | 23 | class BatchNorm : public std::shared_ptr { 24 | public: 25 | enum Type { 26 | kNone, 27 | 28 | kBN, // BatchNorm2d, Fixed in https ://github.com/pytorch/pytorch/pull/36382 29 | kSyncBN, // NaiveSyncBatchNorm if TORCH_VERSION <= (1, 5) else nn.SyncBatchNorm, 30 | kFrozenBN, // FrozenBatchNorm2d, 31 | kGN, // lambda channels : nn.GroupNorm(32, channels), 32 | 33 | // for debugging: 34 | nnSyncBN, // nn.SyncBatchNorm, 35 | naiveSyncBN // NaiveSyncBatchNorm, 36 | }; 37 | 38 | static Type GetType(const std::string &name); 39 | 40 | public: 41 | /** 42 | Args: 43 | norm (str or callable): either one of BN, SyncBN, FrozenBN, GN; 44 | or a callable that takes a channel number and returns 45 | the normalization layer as a nn.Module. 46 | 47 | Returns: 48 | nn.Module or None: the normalization layer 49 | */ 50 | BatchNorm(std::nullptr_t) {} 51 | BatchNorm(Type type, int out_channels); 52 | 53 | template 54 | std::shared_ptr as() { 55 | return std::dynamic_pointer_cast(*this); 56 | } 57 | ModulePtr asModule() { return as(); } 58 | 59 | torch::Tensor operator()(torch::Tensor x) { 60 | return get()->forward(x); 61 | } 62 | }; 63 | } 64 | -------------------------------------------------------------------------------- /Detectron2/Modules/BatchNorm/BatchNorm2d.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "BatchNorm.h" 4 | 5 | namespace Detectron2 6 | { 7 | /////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 8 | // This is to overwrite torch::nn::BatchNorm2dImpl and torch::nn::BatchNorm2d to have BatchNormImpl interface. 9 | 10 | class BatchNorm2dImpl : public torch::nn::BatchNorm2dImpl, public BatchNormImpl { 11 | public: 12 | BatchNorm2dImpl(const torch::nn::BatchNorm2dOptions &options) : torch::nn::BatchNorm2dImpl(options) {} 13 | 14 | // implementing BatchNormImpl 15 | virtual torch::Tensor &get_weight() override { return weight; } 16 | virtual torch::Tensor &get_bias() override { return bias; } 17 | virtual torch::Tensor *get_running_mean() override { return &running_mean; } 18 | virtual torch::Tensor *get_running_var() override { return &running_var; } 19 | virtual torch::Tensor forward(torch::Tensor x) override { 20 | return torch::nn::BatchNorm2dImpl::forward(x); 21 | } 22 | }; 23 | TORCH_MODULE(BatchNorm2d); 24 | } 25 | -------------------------------------------------------------------------------- /Detectron2/Modules/BatchNorm/FrozenBatchNorm2d.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "BatchNorm.h" 4 | 5 | namespace Detectron2 6 | { 7 | /////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 8 | // converted from layers/batch_norm.py 9 | 10 | /** 11 | BatchNorm2d where the batch statistics and the affine parameters are fixed. 12 | 13 | It contains non-trainable buffers called 14 | "weight" and "bias", "running_mean", "running_var", 15 | initialized to perform identity transformation. 16 | 17 | The pre-trained backbone models from Caffe2 only contain "weight" and "bias", 18 | which are computed from the original four parameters of BN. 19 | The affine transform `x * weight + bias` will perform the equivalent 20 | computation of `(x - running_mean) / sqrt(running_var) * weight + bias`. 21 | When loading a backbone model from Caffe2, "running_mean" and "running_var" 22 | will be left unchanged as identity transformation. 23 | 24 | Other pre-trained backbone models may contain all 4 parameters. 25 | 26 | The forward is implemented by `F.batch_norm(..., training=False)`. 27 | */ 28 | class FrozenBatchNorm2dImpl : public torch::nn::Module, public BatchNormImpl { 29 | public: 30 | /** 31 | Convert BatchNorm/SyncBatchNorm in module into FrozenBatchNorm. 32 | 33 | Args: 34 | module (torch.nn.Module): 35 | 36 | Returns: 37 | If module is BatchNorm/SyncBatchNorm, returns a new module. 38 | Otherwise, in-place convert module and return it. 39 | 40 | Similar to convert_sync_batchnorm in 41 | https://github.com/pytorch/pytorch/blob/master/torch/nn/modules/batchnorm.py 42 | */ 43 | static ModulePtr convert_frozen_batchnorm(const ModulePtr &mod); 44 | 45 | public: 46 | FrozenBatchNorm2dImpl(int num_features, double eps = 1e-5); 47 | 48 | std::string toString() const; 49 | 50 | // implementing BatchNormImpl 51 | virtual torch::Tensor &get_weight() override { return m_weight; } 52 | virtual torch::Tensor &get_bias() override { return m_bias; } 53 | virtual torch::Tensor *get_running_mean() override { return &m_running_mean; } 54 | virtual torch::Tensor *get_running_var() override { return &m_running_var; } 55 | virtual torch::Tensor forward(torch::Tensor x) override; 56 | 57 | private: 58 | int m_num_features; 59 | double m_eps; 60 | 61 | torch::Tensor m_weight; 62 | torch::Tensor m_bias; 63 | torch::Tensor m_running_mean; 64 | torch::Tensor m_running_var; 65 | }; 66 | TORCH_MODULE(FrozenBatchNorm2d); 67 | } 68 | -------------------------------------------------------------------------------- /Detectron2/Modules/BatchNorm/GroupNorm.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "BatchNorm.h" 4 | 5 | namespace Detectron2 6 | { 7 | /////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 8 | // This is to overwrite torch::nn::GroupNormImpl and torch::nn::GroupNorm to have BatchNormImpl interface. 9 | 10 | class GroupNormImpl : public torch::nn::GroupNormImpl, public BatchNormImpl { 11 | public: 12 | GroupNormImpl(const torch::nn::GroupNormOptions &options) : torch::nn::GroupNormImpl(options) {} 13 | 14 | // implementing BatchNormImpl 15 | virtual torch::Tensor &get_weight() override { return weight; } 16 | virtual torch::Tensor &get_bias() override { return bias; } 17 | virtual torch::Tensor *get_running_mean() override { return nullptr; } 18 | virtual torch::Tensor *get_running_var() override { return nullptr; } 19 | virtual torch::Tensor forward(torch::Tensor x) override { 20 | return torch::nn::GroupNormImpl::forward(x); 21 | } 22 | }; 23 | TORCH_MODULE(GroupNorm); 24 | } 25 | -------------------------------------------------------------------------------- /Detectron2/Modules/BatchNorm/NaiveSyncBatchNorm.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "BatchNorm2d.h" 4 | 5 | namespace Detectron2 6 | { 7 | /////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 8 | // converted from layers/batch_norm.py 9 | 10 | /** 11 | In PyTorch<=1.5, `nn.SyncBatchNorm` has incorrect gradient 12 | when the batch size on each worker is different. 13 | (e.g., when scale augmentation is used, or when it is applied to mask head). 14 | 15 | This is a slower but correct alternative to `nn.SyncBatchNorm`. 16 | 17 | Note: 18 | There isn't a single definition of Sync BatchNorm. 19 | 20 | When ``stats_mode==""``, this module computes overall statistics by using 21 | statistics of each worker with equal weight. The result is true statistics 22 | of all samples (as if they are all on one worker) only when all workers 23 | have the same (N, H, W). This mode does not support inputs with zero batch size. 24 | 25 | When ``stats_mode=="N"``, this module computes overall statistics by weighting 26 | the statistics of each worker by their ``N``. The result is true statistics 27 | of all samples (as if they are all on one worker) only when all workers 28 | have the same (H, W). It is slower than ``stats_mode==""``. 29 | 30 | Even though the result of this module may not be the true statistics of all samples, 31 | it may still be reasonable because it might be preferrable to assign equal weights 32 | to all workers, regardless of their (H, W) dimension, instead of putting larger weight 33 | on larger images. From preliminary experiments, little difference is found between such 34 | a simplified implementation and an accurate computation of overall mean & variance. 35 | */ 36 | class NaiveSyncBatchNormImpl : public BatchNorm2dImpl { 37 | public: 38 | NaiveSyncBatchNormImpl(const torch::nn::BatchNorm2dOptions &options, const std::string &stats_mode = ""); 39 | 40 | virtual torch::Tensor forward(torch::Tensor x) override; 41 | 42 | private: 43 | std::string m_stats_mode; 44 | }; 45 | TORCH_MODULE(NaiveSyncBatchNorm); 46 | } 47 | -------------------------------------------------------------------------------- /Detectron2/Modules/Conv/ConvBn2d.cpp: -------------------------------------------------------------------------------- 1 | #include "Base.h" 2 | #include "ConvBn2d.h" 3 | 4 | using namespace std; 5 | using namespace torch; 6 | using namespace Detectron2; 7 | 8 | /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 9 | 10 | ConvBn2dImpl::ConvBn2dImpl(const torch::nn::Conv2dOptions &options, BatchNorm::Type norm, bool activation) 11 | : m_activation(activation) { 12 | m_conv = torch::nn::Conv2d(options); 13 | register_module("conv", m_conv); 14 | 15 | m_bn = BatchNorm(norm, options.out_channels()); 16 | if (m_bn) { 17 | register_module("bn", m_bn.asModule()); 18 | } 19 | } 20 | 21 | void ConvBn2dImpl::initialize(const ModelImporter &importer, const std::string &prefix, ModelImporter::Fill fill) { 22 | importer.Import(prefix, m_conv, fill); 23 | if (m_bn) { 24 | m_bn->initialize(importer, prefix + ".norm", fill); 25 | } 26 | } 27 | 28 | torch::Tensor ConvBn2dImpl::forward(torch::Tensor x) { 29 | x = m_conv(x); 30 | if (m_bn) { 31 | x = m_bn(x); 32 | } 33 | if (m_activation) { 34 | x = relu(x); 35 | } 36 | return x; 37 | } 38 | -------------------------------------------------------------------------------- /Detectron2/Modules/Conv/ConvBn2d.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | namespace Detectron2 6 | { 7 | /////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 8 | // converted from layers/wrappers.py 9 | 10 | // A wrapper around :class:`torch.nn.Conv2d` to support empty inputs and more features. 11 | class ConvBn2dImpl : public torch::nn::Module { 12 | public: 13 | /** 14 | Extra keyword arguments supported in addition to those in `torch.nn.Conv2d`: 15 | 16 | Args: 17 | norm (nn.Module, optional): a normalization layer 18 | activation (callable(Tensor) -> Tensor): a callable activation function 19 | 20 | It assumes that norm layer is used before activation. 21 | */ 22 | ConvBn2dImpl(const torch::nn::Conv2dOptions &options, BatchNorm::Type norm = BatchNorm::kNone, 23 | bool activation = false); 24 | void initialize(const ModelImporter &importer, const std::string &prefix, ModelImporter::Fill fill); 25 | 26 | torch::Tensor forward(torch::Tensor x); 27 | 28 | public: 29 | torch::nn::Conv2d m_conv{ nullptr }; 30 | BatchNorm m_bn{ nullptr }; 31 | bool m_activation; // relu 32 | }; 33 | TORCH_MODULE(ConvBn2d); 34 | } -------------------------------------------------------------------------------- /Detectron2/Modules/Conv/DeformConv.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | namespace Detectron2 6 | { 7 | /////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 8 | // converted from layers/deform_conv.py 9 | 10 | class DeformConvImpl : public torch::nn::Module { 11 | public: 12 | /** 13 | Deformable convolution from :paper:`deformconv`. 14 | 15 | Arguments are similar to :class:`Conv2D`. Extra arguments: 16 | 17 | Args: 18 | deformable_groups (int): number of groups used in deformable convolution. 19 | norm (nn.Module, optional): a normalization layer 20 | activation (callable(Tensor) -> Tensor): a callable activation function 21 | */ 22 | DeformConvImpl(int in_channels, int out_channels, int kernel_size, int stride, int padding, 23 | int dilation, int groups, int deformable_groups, bool bias, BatchNorm::Type norm, bool activation = false); 24 | void initialize(const ModelImporter &importer, const std::string &prefix, ModelImporter::Fill fill); 25 | 26 | torch::Tensor forward(torch::Tensor x, torch::Tensor offset); 27 | 28 | std::string extra_repr() const; 29 | 30 | public: 31 | int m_in_channels; 32 | int m_out_channels; 33 | std::vector m_kernel_size; 34 | int m_stride; 35 | int m_padding; 36 | int m_dilation; 37 | int m_groups; 38 | int m_deformable_groups; 39 | bool m_bias; 40 | BatchNorm m_bn; 41 | bool m_activation; // relu 42 | 43 | torch::Tensor m_weight; 44 | }; 45 | TORCH_MODULE(DeformConv); 46 | } -------------------------------------------------------------------------------- /Detectron2/Modules/Conv/ModulatedDeformConv.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | namespace Detectron2 6 | { 7 | /////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 8 | // converted from layers/deform_conv.py 9 | 10 | class ModulatedDeformConvImpl : public torch::nn::Module { 11 | public: 12 | /** 13 | Modulated deformable convolution from :paper:`deformconv2`. 14 | 15 | Arguments are similar to :class:`Conv2D`. Extra arguments: 16 | 17 | Args: 18 | deformable_groups (int): number of groups used in deformable convolution. 19 | norm (nn.Module, optional): a normalization layer 20 | activation (callable(Tensor) -> Tensor): a callable activation function 21 | */ 22 | ModulatedDeformConvImpl(int in_channels, int out_channels, int kernel_size, int stride, int padding, 23 | int dilation, int groups, int deformable_groups, bool bias, BatchNorm::Type norm, bool activation = false); 24 | void initialize(const ModelImporter &importer, const std::string &prefix, ModelImporter::Fill fill); 25 | 26 | torch::Tensor forward(torch::Tensor x, torch::Tensor offset, torch::Tensor mask); 27 | 28 | std::string extra_repr() const; 29 | 30 | public: 31 | int m_in_channels; 32 | int m_out_channels; 33 | std::vector m_kernel_size; 34 | int m_stride; 35 | int m_padding; 36 | int m_dilation; 37 | int m_groups; 38 | int m_deformable_groups; 39 | bool m_with_bias; 40 | BatchNorm m_bn; 41 | bool m_activation; // relu 42 | 43 | torch::Tensor m_weight; 44 | torch::Tensor m_bias; 45 | }; 46 | TORCH_MODULE(ModulatedDeformConv); 47 | } -------------------------------------------------------------------------------- /Detectron2/Modules/FPN/LastLevelMaxPool.cpp: -------------------------------------------------------------------------------- 1 | #include "Base.h" 2 | #include "LastLevelMaxPool.h" 3 | 4 | using namespace std; 5 | using namespace torch; 6 | using namespace Detectron2; 7 | 8 | /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 9 | 10 | LastLevelMaxPoolImpl::LastLevelMaxPoolImpl() { 11 | m_num_levels = 1; 12 | m_in_feature = "p5"; 13 | } 14 | 15 | void LastLevelMaxPoolImpl::initialize(const ModelImporter &importer, const std::string &prefix) { 16 | // do nothing 17 | } 18 | 19 | TensorVec LastLevelMaxPoolImpl::forward(torch::Tensor x) { 20 | torch::nn::functional::MaxPool2dFuncOptions options(1); 21 | return { torch::nn::functional::max_pool2d(x, options.stride(2).padding(0)) }; 22 | } 23 | -------------------------------------------------------------------------------- /Detectron2/Modules/FPN/LastLevelMaxPool.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "TopBlock.h" 4 | 5 | namespace Detectron2 6 | { 7 | /////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 8 | // converted from modeling/backbone/fpn.py 9 | 10 | // This module is used in the original FPN to generate a downsampled P6 feature from P5. 11 | class LastLevelMaxPoolImpl : public TopBlockImpl { 12 | public: 13 | LastLevelMaxPoolImpl(); 14 | 15 | virtual void initialize(const ModelImporter &importer, const std::string &prefix) override; 16 | virtual TensorVec forward(torch::Tensor x) override; 17 | }; 18 | } -------------------------------------------------------------------------------- /Detectron2/Modules/FPN/LastLevelP6P7.cpp: -------------------------------------------------------------------------------- 1 | #include "Base.h" 2 | #include "LastLevelP6P7.h" 3 | 4 | using namespace std; 5 | using namespace torch; 6 | using namespace Detectron2; 7 | 8 | /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 9 | 10 | LastLevelP6P7Impl::LastLevelP6P7Impl(int64_t in_channels, int64_t out_channels, const char *in_feature) : 11 | m_p6(nn::Conv2dOptions(in_channels, out_channels, 3).stride(2).padding(1)), 12 | m_p7(nn::Conv2dOptions(out_channels, out_channels, 3).stride(2).padding(1)) 13 | { 14 | register_module("p6", m_p6); 15 | register_module("p7", m_p7); 16 | 17 | m_num_levels = 2; 18 | m_in_feature = in_feature; 19 | } 20 | 21 | void LastLevelP6P7Impl::initialize(const ModelImporter &importer, const std::string &prefix) { 22 | importer.Import(prefix + ".p6", m_p6, ModelImporter::kCaffe2XavierFill); 23 | importer.Import(prefix + ".p7", m_p7, ModelImporter::kCaffe2XavierFill); 24 | } 25 | 26 | TensorVec LastLevelP6P7Impl::forward(torch::Tensor c5) { 27 | auto x6 = m_p6(c5); 28 | auto x7 = m_p7(relu(x6)); 29 | return { x6, x7 }; 30 | } 31 | -------------------------------------------------------------------------------- /Detectron2/Modules/FPN/LastLevelP6P7.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "TopBlock.h" 4 | 5 | namespace Detectron2 6 | { 7 | /////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 8 | // converted from modeling/backbone/fpn.py 9 | 10 | // This module is used in RetinaNet to generate extra layers, P6 and P7 from C5 feature. 11 | class LastLevelP6P7Impl : public TopBlockImpl { 12 | public: 13 | LastLevelP6P7Impl(int64_t in_channels, int64_t out_channels, const char *in_feature); 14 | 15 | virtual void initialize(const ModelImporter &importer, const std::string &prefix) override; 16 | virtual TensorVec forward(torch::Tensor c5) override; 17 | 18 | private: 19 | torch::nn::Conv2d m_p6; 20 | torch::nn::Conv2d m_p7; 21 | }; 22 | } -------------------------------------------------------------------------------- /Detectron2/Modules/FPN/SemSegFPNHead.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | namespace Detectron2 8 | { 9 | /////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 10 | // converted from modeling/meta_arch/semantic_seg.py 11 | 12 | /** 13 | A semantic segmentation head described in :paper:`PanopticFPN`. 14 | It takes FPN features as input and merges information from all 15 | levels of the FPN into single output. 16 | */ 17 | class SemSegFPNHeadImpl : public torch::nn::Module { 18 | public: 19 | SemSegFPNHeadImpl(CfgNode &cfg, const ShapeSpec::Map &input_shapes); 20 | 21 | void initialize(const ModelImporter &importer, const std::string &prefix); 22 | 23 | int ignore_value() const { return m_ignore_value; } 24 | 25 | /** 26 | Returns: 27 | In training, returns (None, dict of losses) 28 | In inference, returns (CxHxW logits, {}) 29 | */ 30 | std::tuple forward(const TensorMap &features, const torch::Tensor &targets); 31 | 32 | private: 33 | std::vector m_in_features; 34 | int m_ignore_value; // Label in the semantic segmentation ground truth that is ignored, i.e., no loss is 35 | // calculated for the correposnding pixel. 36 | int m_common_stride; // Outputs from semantic - FPN heads are up - scaled to the COMMON_STRIDE stride. 37 | float m_loss_weight; 38 | torch::nn::functional::InterpolateFuncOptions m_interpolate_options; 39 | 40 | std::vector m_scale_heads; 41 | ConvBn2d m_predictor{ nullptr }; 42 | 43 | torch::Tensor layers(const TensorMap &features); 44 | TensorMap losses(torch::Tensor predictions, torch::Tensor targets); 45 | }; 46 | TORCH_MODULE(SemSegFPNHead); 47 | } -------------------------------------------------------------------------------- /Detectron2/Modules/FPN/TopBlock.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | namespace Detectron2 6 | { 7 | /////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 8 | // converted from modeling/backbone/fpn.py 9 | 10 | class TopBlockImpl : public torch::nn::Module { 11 | public: 12 | virtual ~TopBlockImpl() {} 13 | virtual void initialize(const ModelImporter &importer, const std::string &prefix) = 0; 14 | virtual TensorVec forward(torch::Tensor x) = 0; 15 | 16 | int num_levels() const { return m_num_levels; } 17 | std::string in_feature() const { return m_in_feature; } 18 | 19 | protected: 20 | int m_num_levels; // the number of extra FPN levels added by this block 21 | std::string m_in_feature; // a string representing its input feature (e.g., p5). 22 | }; 23 | TORCH_MODULE(TopBlock); 24 | } -------------------------------------------------------------------------------- /Detectron2/Modules/Opeartors/DeformConvOp.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | namespace Detectron2 6 | { 7 | /////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 8 | // converted from layers/deform_conv.py 9 | 10 | class _DeformConv : public torch::autograd::Function<_DeformConv> { 11 | public: 12 | static torch::autograd::variable_list forward(torch::autograd::AutogradContext *ctx, 13 | torch::Tensor input, torch::Tensor offset, torch::Tensor weight, 14 | int64_t stride = 1, int64_t padding = 0, int64_t dilation = 1, int64_t groups = 1, 15 | int64_t deformable_groups = 1, int64_t im2col_step = 64); 16 | 17 | //! @once_differentiable 18 | static torch::autograd::variable_list backward(torch::autograd::AutogradContext *ctx, 19 | torch::autograd::variable_list grad_output); 20 | 21 | private: 22 | static std::vector _output_size(torch::Tensor input, torch::Tensor weight, 23 | const std::vector &stride, const std::vector &padding, 24 | const std::vector &dilation); 25 | 26 | //! @lru_cache(maxsize=128) 27 | /** 28 | Calculate proper im2col step size, which should be divisible by input_size and not larger 29 | than prefer_size. Meanwhile the step size should be as large as possible to be more 30 | efficient. So we choose the largest one among all divisors of input_size which are smaller 31 | than prefer_size. 32 | :param input_size: input batch size . 33 | :param default_size: default preferred im2col step size. 34 | :return: the largest proper step size. 35 | */ 36 | static int64_t _cal_im2col_step(int64_t input_size, int64_t default_size); 37 | }; 38 | using deform_conv = _DeformConv; 39 | } -------------------------------------------------------------------------------- /Detectron2/Modules/Opeartors/ModulatedDeformConvOp.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | namespace Detectron2 6 | { 7 | /////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 8 | // converted from layers/deform_conv.py 9 | 10 | class _ModulatedDeformConv : public torch::autograd::Function<_ModulatedDeformConv> { 11 | public: 12 | static torch::autograd::variable_list forward(torch::autograd::AutogradContext *ctx, 13 | torch::Tensor input, torch::Tensor offset, torch::Tensor mask, torch::Tensor weight, torch::Tensor bias, 14 | int64_t stride = 1, int64_t padding = 0, int64_t dilation = 1, int64_t groups = 1, 15 | int64_t deformable_groups = 1); 16 | 17 | static torch::autograd::variable_list backward(torch::autograd::AutogradContext *ctx, 18 | torch::autograd::variable_list grad_output); 19 | 20 | private: 21 | static int _infer_shape(torch::autograd::AutogradContext *ctx, torch::Tensor input, torch::Tensor weight); 22 | }; 23 | using modulated_deform_conv = _ModulatedDeformConv; 24 | } -------------------------------------------------------------------------------- /Detectron2/Modules/Opeartors/NewEmptyTensorOp.cpp: -------------------------------------------------------------------------------- 1 | #include "Base.h" 2 | #include "NewEmptyTensorOp.h" 3 | 4 | using namespace std; 5 | using namespace torch; 6 | using namespace Detectron2; 7 | 8 | /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 9 | 10 | torch::autograd::variable_list _NewEmptyTensorOp::forward(torch::autograd::AutogradContext *ctx, 11 | const torch::Tensor &x, IntArrayRef new_shape) { 12 | ctx->saved_data["shape"] = x.sizes(); 13 | return { x.new_empty(new_shape) }; 14 | } 15 | 16 | torch::autograd::variable_list _NewEmptyTensorOp::backward(torch::autograd::AutogradContext *ctx, 17 | torch::autograd::variable_list grad) { 18 | auto shape = ctx->saved_data["shape"].toIntVector(); 19 | return { grad[0].new_empty(shape), Tensor() }; 20 | } 21 | -------------------------------------------------------------------------------- /Detectron2/Modules/Opeartors/NewEmptyTensorOp.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | namespace Detectron2 6 | { 7 | /////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 8 | // converted from layers/wrappers.py 9 | 10 | class _NewEmptyTensorOp : public torch::autograd::Function<_NewEmptyTensorOp> { 11 | public: 12 | static torch::autograd::variable_list forward(torch::autograd::AutogradContext *ctx, 13 | const torch::Tensor &x, torch::IntArrayRef new_shape); 14 | 15 | static torch::autograd::variable_list backward(torch::autograd::AutogradContext *ctx, 16 | torch::autograd::variable_list grad); 17 | }; 18 | } -------------------------------------------------------------------------------- /Detectron2/Modules/ROIHeads/FastRCNNConvFCHead.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | namespace Detectron2 7 | { 8 | /////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 9 | // converted from modeling/roi_heads/box_head.py 10 | 11 | // FastRCNNConvFCHead: makes box predictions from per-region features. 12 | class FastRCNNConvFCHeadImpl : public torch::nn::Module { 13 | public: 14 | // input_shape: shape of the input feature. 15 | FastRCNNConvFCHeadImpl(CfgNode &cfg, const ShapeSpec &input_shape); 16 | void initialize(const ModelImporter &importer, const std::string &prefix); 17 | 18 | // ShapeSpec: the output feature shape 19 | ShapeSpec output_shape() const { 20 | return m_output_size; 21 | } 22 | 23 | torch::Tensor forward(torch::Tensor x); 24 | 25 | private: 26 | ShapeSpec m_output_size; 27 | std::vector m_conv_norm_relus; 28 | std::vector m_fcs; 29 | }; 30 | TORCH_MODULE(FastRCNNConvFCHead); 31 | using BoxHead = FastRCNNConvFCHead; 32 | 33 | // Build a box head defined by `cfg.MODEL.ROI_BOX_HEAD.NAME`. 34 | BoxHead build_box_head(CfgNode &cfg, const ShapeSpec &input_shape); 35 | } -------------------------------------------------------------------------------- /Detectron2/Modules/ROIHeads/KRCNNConvDeconvUpsampleHead.cpp: -------------------------------------------------------------------------------- 1 | #include "Base.h" 2 | #include "KRCNNConvDeconvUpsampleHead.h" 3 | 4 | #include 5 | 6 | using namespace std; 7 | using namespace torch; 8 | using namespace Detectron2; 9 | 10 | /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 11 | 12 | KRCNNConvDeconvUpsampleHeadImpl::KRCNNConvDeconvUpsampleHeadImpl(CfgNode &cfg, const ShapeSpec &input_shape) : 13 | BaseKeypointRCNNHeadImpl(cfg) 14 | { 15 | auto conv_dims = cfg["MODEL.ROI_KEYPOINT_HEAD.CONV_DIMS"].as>(); 16 | 17 | // default up_scale to 2 (this can be made an option) 18 | int up_scale = 2; 19 | int in_channels = input_shape.channels; 20 | 21 | for (int idx = 0; idx < conv_dims.size(); idx++) { 22 | auto &layer_channels = conv_dims[idx]; 23 | auto module = ConvBn2d(nn::Conv2dOptions(in_channels, layer_channels, 3).stride(1).padding(1)); 24 | register_module(FormatString("conv_fcn%d", idx + 1), module); 25 | m_blocks.push_back(module); 26 | in_channels = layer_channels; 27 | } 28 | 29 | int deconv_kernel = 4; 30 | m_score_lowres = nn::ConvTranspose2d(nn::ConvTranspose2dOptions(in_channels, m_num_keypoints, deconv_kernel) 31 | .stride(2).padding(deconv_kernel / 2 - 1)); 32 | register_module("score_lowres", m_score_lowres); 33 | m_up_scale = up_scale; 34 | } 35 | 36 | void KRCNNConvDeconvUpsampleHeadImpl::initialize(const ModelImporter &importer, const std::string &prefix) { 37 | for (int i = 0; i < m_blocks.size(); i++) { 38 | m_blocks[i]->initialize(importer, prefix + FormatString(".conv_fcn%d", i + 1), ModelImporter::kCaffe2MSRAFill); 39 | } 40 | importer.Import(prefix + ".score_lowres", m_score_lowres, ModelImporter::kCaffe2MSRAFill); 41 | } 42 | 43 | torch::Tensor KRCNNConvDeconvUpsampleHeadImpl::layers(torch::Tensor x) { 44 | for (auto &layer : m_blocks) { 45 | x = relu(layer(x)); 46 | } 47 | x = m_score_lowres(x); 48 | auto options = nn::functional::InterpolateFuncOptions() 49 | .scale_factor(vector{ (double)m_up_scale, (double)m_up_scale }) 50 | .mode(torch::kBilinear).align_corners(false); 51 | x = Keypoints::interpolate(x, options); 52 | return x; 53 | } -------------------------------------------------------------------------------- /Detectron2/Modules/ROIHeads/KRCNNConvDeconvUpsampleHead.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "BaseKeypointRCNNHead.h" 4 | #include 5 | 6 | namespace Detectron2 7 | { 8 | /////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 9 | // converted from modeling/roi_heads/keypoint_head.py 10 | 11 | /** 12 | A standard keypoint head containing a series of 3x3 convs, followed by 13 | a transpose convolution and bilinear interpolation for upsampling. 14 | */ 15 | class KRCNNConvDeconvUpsampleHeadImpl : public BaseKeypointRCNNHeadImpl { 16 | public: 17 | /** 18 | NOTE: this interface is experimental. 19 | 20 | Args: 21 | input_shape (ShapeSpec): shape of the input feature 22 | conv_dims: an iterable of output channel counts for each conv in the head 23 | e.g. (512, 512, 512) for three convs outputting 512 channels. 24 | */ 25 | KRCNNConvDeconvUpsampleHeadImpl(CfgNode &cfg, const ShapeSpec &input_shape); 26 | 27 | virtual void initialize(const ModelImporter &importer, const std::string &prefix) override; 28 | virtual torch::Tensor layers(torch::Tensor x) override; 29 | 30 | private: 31 | int m_up_scale; 32 | std::vector m_blocks; 33 | torch::nn::ConvTranspose2d m_score_lowres{ nullptr }; 34 | }; 35 | } -------------------------------------------------------------------------------- /Detectron2/Modules/ROIHeads/MaskRCNNConvUpsampleHead.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "BaseMaskRCNNHead.h" 4 | #include 5 | 6 | namespace Detectron2 7 | { 8 | /////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 9 | // converted from modeling/roi_heads/mask_head.py 10 | 11 | /** 12 | A mask head with several conv layers, plus an upsample layer (with `ConvTranspose2d`). 13 | Predictions are made with a final 1x1 conv layer. 14 | */ 15 | class MaskRCNNConvUpsampleHeadImpl : public BaseMaskRCNNHeadImpl { 16 | public: 17 | /** 18 | NOTE: this interface is experimental. 19 | 20 | Args: 21 | input_shape (ShapeSpec): shape of the input feature 22 | num_classes (int): the number of classes. 1 if using class agnostic prediction. 23 | conv_dims (list[int]): a list of N>0 integers representing the output dimensions 24 | of N-1 conv layers and the last upsample layer. 25 | conv_norm (str or callable): normalization for the conv layers. 26 | See :func:`detectron2.layers.get_norm` for supported types. 27 | */ 28 | MaskRCNNConvUpsampleHeadImpl(CfgNode &cfg, const ShapeSpec &input_shape); 29 | 30 | virtual void initialize(const ModelImporter &importer, const std::string &prefix) override; 31 | virtual torch::Tensor layers(torch::Tensor x) override; 32 | 33 | private: 34 | int m_num_classes; 35 | std::vector m_conv_norm_relus; 36 | torch::nn::ConvTranspose2d m_deconv{ nullptr }; 37 | ConvBn2d m_predictor{ nullptr }; 38 | }; 39 | } -------------------------------------------------------------------------------- /Detectron2/Modules/ROIHeads/RROIHeads.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "StandardROIHeads.h" 4 | 5 | namespace Detectron2 6 | { 7 | /////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 8 | // converted from modeling/roi_heads/rotated_fast_rcnn.py 9 | 10 | /** 11 | This class is used by Rotated Fast R-CNN to detect rotated boxes. 12 | For now, it only supports box predictions but not mask or keypoints. 13 | */ 14 | class RROIHeadsImpl : public StandardROIHeadsImpl { 15 | public: 16 | RROIHeadsImpl(CfgNode &cfg); 17 | void Create(CfgNode &cfg, const ShapeSpec::Map &input_shapes); 18 | 19 | /** 20 | Prepare some proposals to be used to train the RROI heads. 21 | It performs box matching between `proposals` and `targets`, and assigns 22 | training labels to the proposals. 23 | It returns `self.batch_size_per_image` random samples from proposals and groundtruth boxes, 24 | with a fraction of positives that is no larger than `self.positive_sample_fraction. 25 | 26 | Args: 27 | See :meth:`StandardROIHeads.forward` 28 | 29 | Returns: 30 | list[Instances]: length `N` list of `Instances`s containing the proposals 31 | sampled for training. Each `Instances` has the following fields: 32 | - proposal_boxes: the rotated proposal boxes 33 | - gt_boxes: the ground-truth rotated boxes that the proposal is assigned to 34 | (this is only meaningful if the proposal has a label > 0; if label = 0 35 | then the ground-truth box is random) 36 | - gt_classes: the ground-truth classification lable for each proposal 37 | */ 38 | InstancesList label_and_sample_proposals(InstancesList &proposals, const InstancesList &targets); 39 | 40 | private: 41 | virtual void _init_box_head(CfgNode &cfg, const ShapeSpec::Map &input_shapes) override; 42 | }; 43 | } 44 | -------------------------------------------------------------------------------- /Detectron2/Modules/ROIHeads/Res5ROIHeads.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include "ROIHeads.h" 5 | #include "FastRCNNOutputLayers.h" 6 | #include "BaseMaskRCNNHead.h" 7 | 8 | namespace Detectron2 9 | { 10 | /////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 11 | // converted from modeling/roi_heads/roi_heads.py 12 | 13 | // The ROIHeads in a typical "C4" R-CNN model, where the box and mask head share the cropping and the per-region 14 | // feature computation by a Res5 block. 15 | class Res5ROIHeadsImpl : public ROIHeadsImpl { 16 | public: 17 | Res5ROIHeadsImpl(CfgNode &cfg, const ShapeSpec::Map &input_shapes); 18 | 19 | virtual void initialize(const ModelImporter &importer, const std::string &prefix) override; 20 | 21 | virtual std::tuple forward(const ImageList &images, const TensorMap &features, 22 | InstancesList &proposals, const InstancesList &targets = {}) override; 23 | 24 | /** 25 | Use the given boxes in `instances` to produce other (non-box) per-ROI outputs. 26 | 27 | Args: 28 | features: same as in `forward()` 29 | instances (list[Instances]): instances to predict other outputs. Expect the keys 30 | "pred_boxes" and "pred_classes" to exist. 31 | 32 | Returns: 33 | instances (Instances): 34 | the same `Instances` object, with extra 35 | fields such as `pred_masks` or `pred_keypoints`. 36 | */ 37 | virtual InstancesList forward_with_given_boxes(const TensorMap &features, InstancesList &instances) override; 38 | 39 | private: 40 | std::vector m_in_features; 41 | bool m_mask_on; 42 | ROIPooler m_pooler{ nullptr }; 43 | FastRCNNOutputLayers m_box_predictor{ nullptr }; 44 | torch::nn::Sequential m_res5; 45 | MaskHead m_mask_head{ nullptr }; 46 | 47 | int _build_res5_block(CfgNode &cfg); 48 | torch::Tensor _shared_roi_transform(const TensorVec &features, const BoxesList &boxes); 49 | TensorVec select_features(const TensorMap &features); 50 | }; 51 | TORCH_MODULE(Res5ROIHeads); 52 | } -------------------------------------------------------------------------------- /Detectron2/Modules/ROIPooler/ROIAlign.cpp: -------------------------------------------------------------------------------- 1 | #include "Base.h" 2 | #include "ROIAlign.h" 3 | 4 | #include 5 | 6 | using namespace std; 7 | using namespace torch; 8 | using namespace Detectron2; 9 | 10 | /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 11 | 12 | ROIAlignImpl::ROIAlignImpl(const Size2D &output_size, float spatial_scale, int sampling_ratio, bool aligned) : 13 | m_output_size(output_size), 14 | m_spatial_scale(spatial_scale), 15 | m_sampling_ratio(sampling_ratio), 16 | m_aligned(aligned) 17 | { 18 | } 19 | 20 | Tensor ROIAlignImpl::forward(const Tensor &input, const Tensor &rois) { 21 | assert(rois.dim() == 2 and rois.size(1) == 5); 22 | return detectron2::ROIAlign_forward(input, rois, m_spatial_scale, m_output_size.height, m_output_size.width, 23 | m_sampling_ratio, m_aligned); 24 | } 25 | 26 | std::string ROIAlignImpl::toString() const { 27 | std::string tmpstr = "ROIAlign("; 28 | tmpstr += "output_size=(" + torch::str(m_output_size.height) + ", " + torch::str(m_output_size.width) + ")"; 29 | tmpstr += ", spatial_scale=" + torch::str(m_spatial_scale); 30 | tmpstr += ", sampling_ratio=" + torch::str(m_sampling_ratio); 31 | tmpstr += ", aligned=" + torch::str(m_aligned); 32 | tmpstr += ")"; 33 | return tmpstr; 34 | } 35 | -------------------------------------------------------------------------------- /Detectron2/Modules/ROIPooler/ROIAlign.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "ROIPoolerLevel.h" 4 | 5 | namespace Detectron2 6 | { 7 | /////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 8 | // converted from layers/roi_align.py 9 | 10 | class ROIAlignImpl : public ROIPoolerLevelImpl { 11 | public: 12 | /** 13 | Args: 14 | output_size (tuple): h, w 15 | spatial_scale (float): scale the input boxes by this number 16 | sampling_ratio (int): number of inputs samples to take for each output 17 | sample. 0 to take samples densely. 18 | aligned (bool): if False, use the legacy implementation in 19 | Detectron. If True, align the results more perfectly. 20 | 21 | Note: 22 | The meaning of aligned=True: 23 | 24 | Given a continuous coordinate c, its two neighboring pixel indices (in our 25 | pixel model) are computed by floor(c - 0.5) and ceil(c - 0.5). For example, 26 | c=1.3 has pixel neighbors with discrete indices [0] and [1] (which are sampled 27 | from the underlying signal at continuous coordinates 0.5 and 1.5). But the original 28 | roi_align (aligned=False) does not subtract the 0.5 when computing neighboring 29 | pixel indices and therefore it uses pixels with a slightly incorrect alignment 30 | (relative to our pixel model) when performing bilinear interpolation. 31 | 32 | With `aligned=True`, 33 | we first appropriately scale the ROI and then shift it by -0.5 34 | prior to calling roi_align. This produces the correct neighbors; see 35 | detectron2/tests/test_roi_align.py for verification. 36 | 37 | The difference does not make a difference to the model's performance if 38 | ROIAlign is used together with conv layers. 39 | */ 40 | ROIAlignImpl(const Size2D &output_size, float spatial_scale, int sampling_ratio, bool aligned = true); 41 | 42 | // input: NCHW images 43 | // rois : Bx5 boxes.First column is the index into N.The other 4 columns are xyxy. 44 | virtual torch::Tensor forward(const torch::Tensor &input, const torch::Tensor &rois) override; 45 | virtual std::string toString() const override; 46 | 47 | private: 48 | Size2D m_output_size; 49 | float m_spatial_scale; 50 | int m_sampling_ratio; 51 | bool m_aligned; 52 | }; 53 | } -------------------------------------------------------------------------------- /Detectron2/Modules/ROIPooler/ROIAlignRotated.cpp: -------------------------------------------------------------------------------- 1 | #include "Base.h" 2 | #include "ROIAlignRotated.h" 3 | 4 | #include 5 | 6 | using namespace std; 7 | using namespace torch; 8 | using namespace Detectron2; 9 | 10 | /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 11 | 12 | ROIAlignRotatedImpl::ROIAlignRotatedImpl(const Size2D &output_size, float spatial_scale, 13 | int sampling_ratio) : 14 | m_output_size(output_size), 15 | m_spatial_scale(spatial_scale), 16 | m_sampling_ratio(sampling_ratio) 17 | { 18 | } 19 | 20 | Tensor ROIAlignRotatedImpl::forward(const Tensor &input, const Tensor &rois) { 21 | assert(rois.dim() == 2 and rois.size(1) == 6); 22 | return detectron2::ROIAlignRotated_forward(input, rois, m_spatial_scale, 23 | m_output_size.height, m_output_size.width, m_sampling_ratio); 24 | } 25 | 26 | std::string ROIAlignRotatedImpl::toString() const { 27 | std::string tmpstr = "ROIAlign("; 28 | tmpstr += "output_size=(" + torch::str(m_output_size.height) + ", " + torch::str(m_output_size.width) + ")"; 29 | tmpstr += ", spatial_scale=" + torch::str(m_spatial_scale); 30 | tmpstr += ", sampling_ratio=" + torch::str(m_sampling_ratio); 31 | tmpstr += ")"; 32 | return tmpstr; 33 | } 34 | -------------------------------------------------------------------------------- /Detectron2/Modules/ROIPooler/ROIAlignRotated.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "ROIPoolerLevel.h" 4 | 5 | namespace Detectron2 6 | { 7 | /////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 8 | // converted from layers/roi_align_rotated.py 9 | 10 | class ROIAlignRotatedImpl : public ROIPoolerLevelImpl { 11 | public: 12 | /** 13 | Args: 14 | output_size (tuple): h, w 15 | spatial_scale (float): scale the input boxes by this number 16 | sampling_ratio (int): number of inputs samples to take for each output 17 | sample. 0 to take samples densely. 18 | 19 | Note: 20 | ROIAlignRotated supports continuous coordinate by default: 21 | Given a continuous coordinate c, its two neighboring pixel indices (in our 22 | pixel model) are computed by floor(c - 0.5) and ceil(c - 0.5). For example, 23 | c=1.3 has pixel neighbors with discrete indices [0] and [1] (which are sampled 24 | from the underlying signal at continuous coordinates 0.5 and 1.5). 25 | */ 26 | ROIAlignRotatedImpl(const Size2D &output_size, float spatial_scale, int sampling_ratio); 27 | 28 | // input: NCHW images 29 | // rois : Bx6 boxes.First column is the index into N. The other 5 columns are 30 | // (x_ctr, y_ctr, width, height, angle_degrees). 31 | virtual torch::Tensor forward(const torch::Tensor &input, const torch::Tensor &rois) override; 32 | virtual std::string toString() const override; 33 | 34 | private: 35 | Size2D m_output_size; 36 | float m_spatial_scale; 37 | int m_sampling_ratio; 38 | }; 39 | } -------------------------------------------------------------------------------- /Detectron2/Modules/ROIPooler/ROIPool.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "ROIPoolerLevel.h" 4 | 5 | namespace Detectron2 6 | { 7 | /////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 8 | // converted from https://github.com/pytorch/vision torchvision/ops/roi_pool.py 9 | 10 | /** 11 | Performs Region of Interest (RoI) Pool operator described in Fast R-CNN 12 | 13 | Arguments: 14 | input (Tensor[N, C, H, W]): input tensor 15 | boxes (Tensor[K, 5] or List[Tensor[L, 4]]): the box coordinates in (x1, y1, x2, y2) 16 | format where the regions will be taken from. If a single Tensor is passed, 17 | then the first column should contain the batch index. If a list of Tensors 18 | is passed, then each Tensor will correspond to the boxes for an element i 19 | in a batch 20 | output_size (int or Tuple[int, int]): the size of the output after the cropping 21 | is performed, as (height, width) 22 | spatial_scale (float): a scaling factor that maps the input coordinates to 23 | the box coordinates. Default: 1.0 24 | 25 | Returns: 26 | output (Tensor[K, C, output_size[0], output_size[1]]) 27 | */ 28 | torch::Tensor roi_pool(const torch::Tensor &input, const torch::Tensor &boxes, const Size2D &output_size, 29 | float spatial_scale = 1.0); 30 | torch::Tensor roi_pool(const torch::Tensor &input, const BoxesList &boxes, const Size2D &output_size, 31 | float spatial_scale = 1.0); 32 | 33 | class RoIPoolImpl : public ROIPoolerLevelImpl { 34 | public: 35 | RoIPoolImpl(const Size2D &output_size, float spatial_scale); 36 | 37 | virtual torch::Tensor forward(const torch::Tensor &input, const torch::Tensor &rois) override; 38 | virtual std::string toString() const override; 39 | 40 | private: 41 | Size2D m_output_size; 42 | float m_spatial_scale; 43 | }; 44 | } -------------------------------------------------------------------------------- /Detectron2/Modules/ROIPooler/ROIPoolerLevel.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | namespace Detectron2 6 | { 7 | /////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 8 | 9 | class ROIPoolerLevelImpl : public torch::nn::Module { 10 | public: 11 | virtual ~ROIPoolerLevelImpl() {} 12 | virtual torch::Tensor forward(const torch::Tensor &input, const torch::Tensor &rois) = 0; 13 | virtual std::string toString() const = 0; 14 | }; 15 | TORCH_MODULE(ROIPoolerLevel); 16 | } -------------------------------------------------------------------------------- /Detectron2/Modules/RPN/AnchorGenerator.cpp: -------------------------------------------------------------------------------- 1 | #include "Base.h" 2 | #include "AnchorGenerator.h" 3 | 4 | #include "DefaultAnchorGenerator.h" 5 | #include "RotatedAnchorGenerator.h" 6 | 7 | using namespace std; 8 | using namespace torch; 9 | using namespace Detectron2; 10 | 11 | /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 12 | 13 | AnchorGenerator Detectron2::build_anchor_generator(CfgNode &cfg, const ShapeSpec::Vec &input_shapes) { 14 | auto anchor_generator = cfg["MODEL.ANCHOR_GENERATOR.NAME"].as(); 15 | if (anchor_generator == "DefaultAnchorGenerator") { 16 | return shared_ptr(new DefaultAnchorGeneratorImpl(cfg, input_shapes)); 17 | } 18 | if (anchor_generator == "RotatedAnchorGenerator") { 19 | return shared_ptr(new RotatedAnchorGeneratorImpl(cfg, input_shapes)); 20 | } 21 | assert(false); 22 | return nullptr; 23 | } 24 | 25 | /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 26 | 27 | TensorVec AnchorGeneratorImpl::_create_grid_offsets(const pair &size, int stride, float offset, 28 | torch::Device device) { 29 | int grid_height = size.first; 30 | int grid_width = size.second; 31 | 32 | auto options = TensorOptions(torch::kFloat32).device(device); 33 | auto shifts_x = torch::arange(offset * stride, grid_width * stride, stride, options); 34 | auto shifts_y = torch::arange(offset * stride, grid_height * stride, stride, options); 35 | auto vars = torch::meshgrid({ shifts_y, shifts_x }); 36 | 37 | auto shift_y = vars[0]; 38 | auto shift_x = vars[1]; 39 | shift_x = shift_x.reshape(-1); 40 | shift_y = shift_y.reshape(-1); 41 | return { shift_x, shift_y }; 42 | } 43 | 44 | vector> AnchorGeneratorImpl::_broadcast_params(const vector> ¶ms, int num_features) { 45 | assert(!params.empty()); 46 | if (params.size() == 1) { 47 | return vector>(num_features, params[0]); 48 | } 49 | assert(params.size() == num_features); 50 | return params; 51 | } 52 | 53 | vector> AnchorGeneratorImpl::_broadcast_params(const vector ¶ms, int num_features) { 54 | return vector>(num_features, params); 55 | } 56 | 57 | void AnchorGeneratorImpl::register_cell_anchors(const TensorVec &cell_anchors) { 58 | for (int i = 0; i < cell_anchors.size(); i++) { 59 | register_buffer(FormatString("%d", i), cell_anchors[i]); 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /Detectron2/Modules/RPN/DefaultAnchorGenerator.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "AnchorGenerator.h" 4 | 5 | namespace Detectron2 6 | { 7 | /////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 8 | // converted from modeling/anchor_generator.py 9 | 10 | // DefaultAnchorGenerator: Computes anchors in the standard ways described in https://arxiv.org/abs/1506.01497 11 | class DefaultAnchorGeneratorImpl : public AnchorGeneratorImpl { 12 | public: 13 | /** 14 | sizes: list of anchor sizes (i.e. sqrt of anchor area) to use for the i-th feature map. Anchor sizes are 15 | given in absolute lengths in units of the input image; they do not dynamically scale if input image 16 | size changes. 17 | aspect_ratios: list of aspect ratios (i.e. height / width) to use for anchors. Same "broadcast" rule for 18 | `sizes` applies. 19 | strides: stride of each input feature. 20 | offset: Relative offset between the center of the first anchor and the top-left corner of the image. Value 21 | has to be in [0, 1). Recommend to use 0.5, which means half stride. 22 | */ 23 | DefaultAnchorGeneratorImpl(CfgNode &cfg, const ShapeSpec::Vec &input_shapes); 24 | DefaultAnchorGeneratorImpl(const std::vector &strides, const std::vector> &sizes, 25 | const std::vector> &aspect_ratios, float offset = 0.5); 26 | 27 | virtual std::vector num_anchors() const override; 28 | virtual void initialize(const ModelImporter &importer, const std::string &prefix) override; 29 | virtual BoxesList forward(const TensorVec &features) override; 30 | 31 | private: 32 | std::vector> m_sizes; 33 | std::vector> m_aspect_ratios; 34 | std::vector m_strides; 35 | int m_num_features; 36 | float m_offset; 37 | 38 | // num_features of tensors of shape(len(sizes) * len(aspect_ratios), 4) storing anchor boxes in XYXY format. 39 | TensorVec m_cell_anchors; 40 | }; 41 | } -------------------------------------------------------------------------------- /Detectron2/Modules/RPN/RotatedAnchorGenerator.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "AnchorGenerator.h" 4 | 5 | namespace Detectron2 6 | { 7 | /////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 8 | // converted from modeling/anchor_generator.py 9 | 10 | // RotatedAnchorGenerator: Computes rotated anchors used by Rotated RPN (RRPN), 11 | // described in https://arxiv.org/abs/1703.01086 "Arbitrary-Oriented Scene Text Detection via Rotation Proposals" 12 | class RotatedAnchorGeneratorImpl : public AnchorGeneratorImpl { 13 | public: 14 | // angles: list of angles (in degrees CCW) to use for anchors. Same "broadcast" rule for `sizes` applies. 15 | RotatedAnchorGeneratorImpl(CfgNode &cfg, const ShapeSpec::Vec &input_shapes); 16 | RotatedAnchorGeneratorImpl(const std::vector &strides, const std::vector> &sizes, 17 | const std::vector> &aspect_ratios, const std::vector> &angles, 18 | float offset = 0.5); 19 | 20 | virtual std::vector num_anchors() const override; 21 | virtual void initialize(const ModelImporter &importer, const std::string &prefix) override; 22 | virtual BoxesList forward(const TensorVec &features) override; 23 | 24 | private: 25 | std::vector> m_sizes; 26 | std::vector> m_aspect_ratios; 27 | std::vector> m_angles; 28 | std::vector m_strides; 29 | int m_num_features; 30 | float m_offset; 31 | 32 | // num_features of tensors of shape (len(sizes) * len(aspect_ratios) * len(angles), 5) 33 | // storing anchor boxes in(x_ctr, y_ctr, w, h, angle) format. 34 | TensorVec m_cell_anchors; 35 | }; 36 | } -------------------------------------------------------------------------------- /Detectron2/Modules/RPN/StandardRPNHead.cpp: -------------------------------------------------------------------------------- 1 | #include "Base.h" 2 | #include "StandardRPNHead.h" 3 | 4 | using namespace std; 5 | using namespace torch; 6 | using namespace Detectron2; 7 | 8 | /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 9 | 10 | RPNHead Detectron2::build_rpn_head(CfgNode &cfg, const ShapeSpec::Vec &input_shapes) { 11 | auto name = cfg["MODEL.RPN.HEAD_NAME"].as(); 12 | if (name == "StandardRPNHead") { 13 | return make_shared(cfg, input_shapes); 14 | } 15 | assert(false); 16 | return nullptr; 17 | } 18 | 19 | /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 20 | 21 | StandardRPNHeadImpl::StandardRPNHeadImpl(CfgNode &cfg, const ShapeSpec::Vec &input_shapes) { 22 | auto anchor_generator = build_anchor_generator(cfg, input_shapes); 23 | int box_dim = anchor_generator->box_dim(); 24 | auto anchors = anchor_generator->num_anchors(); 25 | int num_anchors = anchors[0]; 26 | for (int i = 1; i < anchors.size(); i++) { 27 | assert(anchors[i] == num_anchors); 28 | } 29 | 30 | auto in_channels = ShapeSpec::channels_single(input_shapes); 31 | m_conv = ConvBn2d(nn::Conv2dOptions(in_channels, in_channels, 3).padding(1)); 32 | register_module("conv", m_conv); 33 | m_objectness_logits = ConvBn2d(nn::Conv2dOptions(in_channels, num_anchors, 1)); 34 | register_module("objectness_logits", m_objectness_logits); 35 | m_anchor_deltas = ConvBn2d(nn::Conv2dOptions(in_channels, num_anchors * box_dim, 1)); 36 | register_module("anchor_deltas", m_anchor_deltas); 37 | } 38 | 39 | void StandardRPNHeadImpl::initialize(const ModelImporter &importer, const std::string &prefix) { 40 | m_conv->initialize(importer, prefix + ".conv", ModelImporter::kNormalFill2); 41 | m_objectness_logits->initialize(importer, prefix + ".objectness_logits", ModelImporter::kNormalFill2); 42 | m_anchor_deltas->initialize(importer, prefix + ".anchor_deltas", ModelImporter::kNormalFill2); 43 | } 44 | 45 | vector StandardRPNHeadImpl::forward(const TensorVec &features) { 46 | TensorVec pred_objectness_logits; 47 | TensorVec pred_anchor_deltas; 48 | for (auto x : features) { 49 | x = relu(m_conv(x)); 50 | pred_objectness_logits.push_back(m_objectness_logits(x)); 51 | pred_anchor_deltas.push_back(m_anchor_deltas(x)); 52 | } 53 | return { pred_objectness_logits, pred_anchor_deltas }; 54 | } 55 | -------------------------------------------------------------------------------- /Detectron2/Modules/ResNet/BasicBlock.cpp: -------------------------------------------------------------------------------- 1 | #include "Base.h" 2 | #include "BasicBlock.h" 3 | 4 | using namespace std; 5 | using namespace torch; 6 | using namespace Detectron2; 7 | 8 | /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 9 | 10 | BasicBlockImpl::BasicBlockImpl(int in_channels, int out_channels, int stride, BatchNorm::Type norm) : 11 | CNNBlockBaseImpl(in_channels, out_channels, stride), 12 | m_convbn1(nn::Conv2dOptions(in_channels, out_channels, 3).stride(stride).padding(1).bias(false), norm), 13 | m_convbn2(nn::Conv2dOptions(out_channels, out_channels, 3).stride(stride).padding(1).bias(false), norm) { 14 | register_module("conv1", m_convbn1); 15 | register_module("conv2", m_convbn2); 16 | if (in_channels != out_channels) { 17 | m_shortcut = ConvBn2d(nn::Conv2dOptions(in_channels, out_channels, 1).stride(stride).bias(false), norm); 18 | register_module("shortcut", m_shortcut); 19 | } 20 | } 21 | 22 | void BasicBlockImpl::initialize(const ModelImporter &importer, const std::string &prefix) { 23 | if (m_shortcut) { 24 | m_shortcut->initialize(importer, prefix + ".shortcut", ModelImporter::kCaffe2MSRAFill); 25 | } 26 | m_convbn1->initialize(importer, prefix + ".conv1", ModelImporter::kCaffe2MSRAFill); 27 | m_convbn2->initialize(importer, prefix + ".conv2", ModelImporter::kCaffe2MSRAFill); 28 | } 29 | 30 | torch::Tensor BasicBlockImpl::forward(torch::Tensor x) { 31 | auto out = m_convbn1(x); 32 | out = relu(out); 33 | out = m_convbn2(out); 34 | 35 | torch::Tensor shortcut; 36 | if (m_shortcut) { 37 | shortcut = m_shortcut(x); 38 | } 39 | else { 40 | shortcut = x; 41 | } 42 | 43 | out += shortcut; 44 | out = relu(out); 45 | return out; 46 | } 47 | -------------------------------------------------------------------------------- /Detectron2/Modules/ResNet/BasicBlock.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "CNNBlockBase.h" 4 | 5 | namespace Detectron2 6 | { 7 | /////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 8 | // converted from modeling/backbone/resnet.py 9 | 10 | /** 11 | The basic residual block for ResNet-18 and ResNet-34 defined in :paper:`ResNet`, 12 | with two 3x3 conv layers and a projection shortcut if needed. 13 | */ 14 | class BasicBlockImpl : public CNNBlockBaseImpl { 15 | public: 16 | /** 17 | in_channels (int): Number of input channels. 18 | out_channels (int): Number of output channels. 19 | stride (int): Stride for the first conv. 20 | norm (str or callable): normalization for all conv layers. 21 | See :func:`layers.get_norm` for supported format. 22 | */ 23 | BasicBlockImpl(int in_channels, int out_channels, int stride, BatchNorm::Type norm); 24 | 25 | virtual void initialize(const ModelImporter &importer, const std::string &prefix) override; 26 | virtual torch::Tensor forward(torch::Tensor x) override; 27 | 28 | private: 29 | ConvBn2d m_shortcut{ nullptr }; 30 | ConvBn2d m_convbn1; 31 | ConvBn2d m_convbn2; 32 | }; 33 | } -------------------------------------------------------------------------------- /Detectron2/Modules/ResNet/BasicStem.cpp: -------------------------------------------------------------------------------- 1 | #include "Base.h" 2 | #include "BasicStem.h" 3 | 4 | using namespace std; 5 | using namespace torch; 6 | using namespace Detectron2; 7 | 8 | /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 9 | 10 | BasicStemImpl::BasicStemImpl(int in_channels, int out_channels, BatchNorm::Type norm) 11 | : CNNBlockBaseImpl(in_channels, out_channels, 4), m_in_channels(in_channels), 12 | m_convbn1(nn::Conv2dOptions(in_channels, out_channels, 7).stride(2).padding(3).bias(false), norm) { 13 | register_module("conv1", m_convbn1); 14 | } 15 | 16 | void BasicStemImpl::initialize(const ModelImporter &importer, const std::string &prefix) { 17 | m_convbn1->initialize(importer, prefix + ".conv1", ModelImporter::kCaffe2MSRAFill); 18 | } 19 | 20 | torch::Tensor BasicStemImpl::forward(torch::Tensor x) { 21 | x = m_convbn1(x); 22 | x = relu_(x); 23 | x = max_pool2d(x, 3, 2, 1); 24 | return x; 25 | } 26 | -------------------------------------------------------------------------------- /Detectron2/Modules/ResNet/BasicStem.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "CNNBlockBase.h" 4 | 5 | namespace Detectron2 6 | { 7 | /////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 8 | // converted from modeling/backbone/resnet.py 9 | 10 | // The standard ResNet stem (layers before the first residual block). 11 | class BasicStemImpl : public CNNBlockBaseImpl { 12 | public: 13 | // norm (str or callable): norm after the first conv layer. 14 | // See : func:`layers.get_norm` for supported format. 15 | BasicStemImpl(int in_channels, int out_channels, BatchNorm::Type norm); 16 | 17 | virtual void initialize(const ModelImporter &importer, const std::string &prefix) override; 18 | virtual torch::Tensor forward(torch::Tensor x) override; 19 | 20 | private: 21 | int m_in_channels; 22 | 23 | ConvBn2d m_convbn1; 24 | }; 25 | TORCH_MODULE(BasicStem); 26 | } -------------------------------------------------------------------------------- /Detectron2/Modules/ResNet/BottleneckBlock.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "CNNBlockBase.h" 4 | 5 | namespace Detectron2 6 | { 7 | /////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 8 | // converted from modeling/backbone/resnet.py 9 | 10 | /** 11 | The standard bottleneck residual block used by ResNet-50, 101 and 152 12 | defined in :paper:`ResNet`. It contains 3 conv layers with kernels 13 | 1x1, 3x3, 1x1, and a projection shortcut if needed. 14 | */ 15 | class BottleneckBlockImpl : public CNNBlockBaseImpl { 16 | public: 17 | /** 18 | bottleneck_channels (int): number of output channels for the 3x3 19 | "bottleneck" conv layers. 20 | num_groups (int): number of groups for the 3x3 conv layer. 21 | norm (str or callable): normalization for all conv layers. 22 | See :func:`layers.get_norm` for supported format. 23 | stride_in_1x1 (bool): when stride>1, whether to put stride in the 24 | first 1x1 convolution or the bottleneck 3x3 convolution. 25 | dilation (int): the dilation rate of the 3x3 conv layer. 26 | */ 27 | BottleneckBlockImpl(int in_channels, int out_channels, int bottleneck_channels, int stride, 28 | int num_groups, BatchNorm::Type norm, bool stride_in_1x1, int dilation = 1); 29 | 30 | virtual void initialize(const ModelImporter &importer, const std::string &prefix) override; 31 | virtual torch::Tensor forward(torch::Tensor x) override; 32 | 33 | private: 34 | ConvBn2d m_shortcut{ nullptr }; 35 | ConvBn2d m_convbn1; 36 | ConvBn2d m_convbn2; 37 | ConvBn2d m_convbn3; 38 | }; 39 | } -------------------------------------------------------------------------------- /Detectron2/Modules/ResNet/CNNBlockBase.cpp: -------------------------------------------------------------------------------- 1 | #include "Base.h" 2 | #include "CNNBlockBase.h" 3 | 4 | #include 5 | 6 | using namespace std; 7 | using namespace torch; 8 | using namespace Detectron2; 9 | 10 | /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 11 | 12 | CNNBlockBaseImpl::CNNBlockBaseImpl(int in_channels, int out_channels, int stride) : 13 | m_in_channels(in_channels), m_out_channels(out_channels), m_stride(stride) { 14 | } 15 | 16 | void CNNBlockBaseImpl::freeze() { 17 | for (auto p : parameters()) { 18 | p.set_requires_grad(false); 19 | } 20 | auto self = shared_from_this(); 21 | FrozenBatchNorm2dImpl::convert_frozen_batchnorm(self); 22 | } 23 | -------------------------------------------------------------------------------- /Detectron2/Modules/ResNet/CNNBlockBase.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | namespace Detectron2 7 | { 8 | /////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 9 | // converted from layers/blocks.py 10 | 11 | /** 12 | A CNN block is assumed to have input channels, output channels and a stride. 13 | The input and output of `forward()` method must be NCHW tensors. 14 | The method can perform arbitrary computation but must match the given 15 | channels and stride specification. 16 | */ 17 | class CNNBlockBaseImpl : public torch::nn::Module { 18 | public: 19 | /** 20 | The `__init__` method of any subclass should also contain these arguments. 21 | 22 | Args: 23 | in_channels (int): 24 | out_channels (int): 25 | stride (int): 26 | */ 27 | CNNBlockBaseImpl(int in_channels, int out_channels, int stride); 28 | virtual ~CNNBlockBaseImpl() {} 29 | 30 | int stride() const { return m_stride; } 31 | int out_channels() const { return m_out_channels; } 32 | 33 | virtual void initialize(const ModelImporter &importer, const std::string &prefix) = 0; 34 | virtual torch::Tensor forward(torch::Tensor x) = 0; 35 | 36 | /** 37 | Make this block not trainable. 38 | This method sets all parameters to `requires_grad=False`, 39 | and convert all BatchNorm layers to FrozenBatchNorm 40 | */ 41 | void freeze(); 42 | 43 | protected: 44 | int m_in_channels; 45 | int m_out_channels; 46 | int m_stride; 47 | }; 48 | TORCH_MODULE(CNNBlockBase); 49 | } -------------------------------------------------------------------------------- /Detectron2/Modules/ResNet/DeformBottleneckBlock.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "CNNBlockBase.h" 4 | 5 | namespace Detectron2 6 | { 7 | /////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 8 | // converted from modeling/backbone/resnet.py 9 | 10 | // Similar to :class:`BottleneckBlock`, but with :paper:`deformable conv ` in the 3x3 convolution. 11 | class DeformBottleneckBlockImpl : public CNNBlockBaseImpl { 12 | public: 13 | DeformBottleneckBlockImpl(int in_channels, int out_channels, int bottleneck_channels, int stride, 14 | int num_groups, BatchNorm::Type norm, bool stride_in_1x1, int dilation, 15 | bool deform_modulated, int deform_num_groups); 16 | 17 | virtual void initialize(const ModelImporter &importer, const std::string &prefix) override; 18 | virtual torch::Tensor forward(torch::Tensor x) override; 19 | 20 | private: 21 | bool m_deform_modulated; 22 | 23 | ConvBn2d m_shortcut{ nullptr }; 24 | ConvBn2d m_convbn1; 25 | ConvBn2d m_convbn2_offset; 26 | ModulePtr m_convbn2{ nullptr }; 27 | ConvBn2d m_convbn3; 28 | }; 29 | } -------------------------------------------------------------------------------- /Detectron2/Structures/BitMasks.cpp: -------------------------------------------------------------------------------- 1 | #include "Base.h" 2 | #include "BitMasks.h" 3 | 4 | #include 5 | 6 | using namespace std; 7 | using namespace torch; 8 | using namespace Detectron2; 9 | 10 | /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 11 | 12 | BitMasks::BitMasks(const torch::Tensor &tensor) { 13 | m_tensor = tensor.to(torch::kBool); 14 | assert(m_tensor.dim() == 3); 15 | m_image_size = { (int)m_tensor.size(1), (int)m_tensor.size(2) }; 16 | } 17 | 18 | BitMasks::BitMasks(const BitMasks &bitmasks) : m_tensor(bitmasks.m_tensor), m_image_size(bitmasks.m_image_size) { 19 | } 20 | 21 | BitMasks BitMasks::operator[](int64_t item) const { 22 | return m_tensor[item].view({ 1, -1 }); 23 | } 24 | 25 | BitMasks BitMasks::operator[](ArrayRef item) const { 26 | auto m = m_tensor.index(item); 27 | assert(m.dim() == 3); 28 | return m; 29 | } 30 | 31 | std::string BitMasks::toString() const { 32 | string s = "BitMasks("; 33 | s += FormatString("num_instances=%d)", size()); 34 | return s; 35 | } 36 | 37 | SequencePtr BitMasks::slice(int64_t start, int64_t end) const { 38 | auto sliced = m_tensor.slice(0, start, end); 39 | return std::shared_ptr(new BitMasks(sliced)); 40 | } 41 | 42 | SequencePtr BitMasks::index(torch::Tensor item) const { 43 | auto selected = m_tensor.index(item); 44 | return std::shared_ptr(new BitMasks(selected)); 45 | } 46 | 47 | SequencePtr BitMasks::cat(const std::vector &seqs, int total) const { 48 | TensorVec tensors; 49 | tensors.reserve(seqs.size()); 50 | for (auto &seq : seqs) { 51 | Tensor t = dynamic_pointer_cast(seq)->m_tensor; 52 | tensors.push_back(t); 53 | } 54 | auto aggregated = torch::cat(tensors); 55 | assert(aggregated.size(0) == total); 56 | return std::shared_ptr(new BitMasks(aggregated)); 57 | } 58 | 59 | torch::Tensor BitMasks::crop_and_resize(torch::Tensor boxes, int mask_size) { 60 | assert(boxes.size(0) == size()); 61 | auto device = m_tensor.device(); 62 | 63 | auto batch_inds = torch::arange(size(), device).to(boxes.dtype()).index({ Colon, None }); 64 | auto rois = torch::cat({ batch_inds, boxes }, 1); // Nx5 65 | 66 | auto bit_masks = m_tensor.to(torch::kFloat32); 67 | rois = rois.to(device); 68 | auto output = ( 69 | ROIAlignImpl({ mask_size, mask_size }, 1.0, 0, true) 70 | .forward(bit_masks.index({ Colon, None, Colon, Colon }), rois) 71 | .squeeze(1) 72 | ); 73 | output = (output >= 0.5); 74 | return output; 75 | } 76 | -------------------------------------------------------------------------------- /Detectron2/Structures/GenericMask.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "PolygonMasks.h" 4 | 5 | namespace Detectron2 6 | { 7 | /////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 8 | // converted from utils/visualizer.py 9 | 10 | /** 11 | Attribute: 12 | polygons (list[ndarray]): list[ndarray]: polygons for this mask. 13 | Each ndarray has format [x, y, x, y, ...] 14 | mask (ndarray): a binary mask 15 | */ 16 | class GenericMask { 17 | public: 18 | static torch::Tensor toCocoMask(const std::vector> &masks); 19 | 20 | static std::tuple mask_to_polygons(const torch::Tensor &mask); 21 | 22 | static std::vector> 23 | _convert_masks(const BitMasks &m, int height, int width); 24 | static std::vector> 25 | _convert_masks(const PolygonMasks &m, int height, int width); 26 | 27 | public: 28 | GenericMask(const torch::Tensor &mask, int height, int width); 29 | GenericMask(const TensorVec &polygons, int height, int width); 30 | GenericMask(const mask_util::MaskObject &obj, int height, int width); 31 | 32 | torch::Tensor polygons_to_mask(const TensorVec &polygons); 33 | 34 | torch::Tensor mask(); 35 | TensorVec polygons(); 36 | bool has_holes(); 37 | 38 | float area() const { 39 | return m_mask.sum().item(); 40 | } 41 | 42 | torch::Tensor bbox() const; 43 | 44 | private: 45 | int m_height; 46 | int m_width; 47 | torch::Tensor m_mask; 48 | TensorVec m_polygons; 49 | bool m_has_mask; 50 | bool m_has_polygons; 51 | int m_has_holes; 52 | }; 53 | } 54 | -------------------------------------------------------------------------------- /Detectron2/Structures/ImageList.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | namespace Detectron2 6 | { 7 | /////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 8 | // converted from structures/image_list.py 9 | 10 | /** 11 | Structure that holds a list of images (of possibly 12 | varying sizes) as a single tensor. 13 | This works by padding the images to the same size, 14 | and storing in a field the original sizes of each image 15 | 16 | Attributes: 17 | image_sizes (list[tuple[int, int]]): each tuple is (h, w) 18 | */ 19 | class ImageList { 20 | public: 21 | /** 22 | Args: 23 | tensors: a tuple or list of `torch.Tensors`, each of shape (Hi, Wi) or 24 | (C_1, ..., C_K, Hi, Wi) where K >= 1. The Tensors will be padded 25 | to the same shape with `pad_value`. 26 | size_divisibility (int): If `size_divisibility > 0`, add padding to ensure 27 | the common height and width is divisible by `size_divisibility`. 28 | This depends on the model and many models need a divisibility of 32. 29 | pad_value (float): value to pad 30 | 31 | Returns: 32 | an `ImageList`. 33 | */ 34 | static ImageList from_tensors(const TensorVec &tensors, int size_divisibility = 0, double pad_value = 0.0); 35 | 36 | public: 37 | /** 38 | tensor (Tensor): of shape (N, H, W) or (N, C_1, ..., C_K, H, W) where K >= 1 39 | image_sizes (list[tuple[int, int]]): Each tuple is (h, w). It can 40 | be smaller than (H, W) due to padding. 41 | */ 42 | ImageList(torch::Tensor tensor, std::vector image_sizes) : 43 | m_tensor(tensor), m_image_sizes(std::move(image_sizes)) { 44 | } 45 | 46 | int length() const { 47 | return m_image_sizes.size(); 48 | } 49 | const std::vector &image_sizes() const { 50 | return m_image_sizes; 51 | } 52 | 53 | torch::Tensor tensor() const { 54 | return m_tensor; 55 | } 56 | torch::Device device() const { 57 | return m_tensor.device(); 58 | } 59 | 60 | ImageList to(torch::Device device) { 61 | std::vector image_sizes = m_image_sizes; 62 | return ImageList(m_tensor.to(device), std::move(image_sizes)); 63 | } 64 | 65 | /** 66 | Access the individual image in its original size. 67 | 68 | Returns: 69 | Tensor: an image of shape (H, W) or (C_1, ..., C_K, H, W) where K >= 1 70 | */ 71 | torch::Tensor get(int64_t idx); 72 | 73 | private: 74 | torch::Tensor m_tensor; 75 | std::vector m_image_sizes; 76 | }; 77 | } 78 | -------------------------------------------------------------------------------- /Detectron2/Structures/Masks.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "Sequence.h" 4 | 5 | namespace Detectron2 6 | { 7 | /////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 8 | 9 | // Base class for BitMasks or PolygonMasks, so we can store in Instances with polymorphism. 10 | class Masks : public Sequence { 11 | public: 12 | virtual ~Masks() {} 13 | 14 | virtual torch::Tensor crop_and_resize(torch::Tensor boxes, int mask_size) = 0; 15 | }; 16 | } -------------------------------------------------------------------------------- /Detectron2/Structures/PanopticSegment.cpp: -------------------------------------------------------------------------------- 1 | #include "Base.h" 2 | #include "PanopticSegment.h" 3 | 4 | using namespace std; 5 | using namespace torch; 6 | using namespace Detectron2; 7 | 8 | /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 9 | 10 | int PanopticSegment::size() const { 11 | return infos.size(); 12 | } 13 | 14 | std::string PanopticSegment::toString() const { 15 | // TODO: infos 16 | return seg.toString(); 17 | } 18 | 19 | SequencePtr PanopticSegment::slice(int64_t start, int64_t end) const { 20 | assert(false); 21 | return nullptr; 22 | } 23 | 24 | SequencePtr PanopticSegment::index(torch::Tensor item) const { 25 | assert(false); 26 | return nullptr; 27 | } 28 | 29 | SequencePtr PanopticSegment::cat(const std::vector &seqs, int total) const { 30 | assert(false); 31 | return nullptr; 32 | } 33 | -------------------------------------------------------------------------------- /Detectron2/Structures/PanopticSegment.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "Sequence.h" 4 | 5 | namespace Detectron2 6 | { 7 | /////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 8 | 9 | struct SegmentInfo { 10 | int id; 11 | bool isthing; 12 | float score; 13 | int category_id; 14 | int instance_id; 15 | float area; 16 | }; 17 | 18 | class PanopticSegment : public Sequence { 19 | public: 20 | torch::Tensor seg; 21 | std::vector infos; 22 | 23 | // implementing Sequence 24 | virtual int size() const override; 25 | virtual std::string toString() const override; 26 | virtual SequencePtr slice(int64_t start, int64_t end) const override; 27 | virtual SequencePtr index(torch::Tensor item) const override; 28 | virtual SequencePtr cat(const std::vector &seqs, int total) const override; 29 | }; 30 | } -------------------------------------------------------------------------------- /Detectron2/Structures/PostProcessing.cpp: -------------------------------------------------------------------------------- 1 | #include "Base.h" 2 | #include "PostProcessing.h" 3 | 4 | #include 5 | #include 6 | 7 | using namespace std; 8 | using namespace torch; 9 | using namespace Detectron2; 10 | 11 | /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 12 | 13 | InstancesPtr PostProcessing::detector_postprocess(const InstancesPtr &results_, 14 | int output_height, int output_width, float mask_threshold) { 15 | auto scale_x = (float)output_width / results_->image_size().width; 16 | auto scale_y = (float)output_height / results_->image_size().height; 17 | InstancesPtr results(new Instances({ output_height, output_width }, results_->move_fields())); 18 | 19 | Tensor toutput_boxes; 20 | if (results->has("pred_boxes")) { 21 | toutput_boxes = results->getTensor("pred_boxes"); 22 | } 23 | else if (results->has("proposal_boxes")) { 24 | toutput_boxes = results->getTensor("proposal_boxes"); 25 | } 26 | auto output_boxes = Boxes::boxes(toutput_boxes); 27 | output_boxes->scale(scale_x, scale_y); 28 | output_boxes->clip(results->image_size()); 29 | 30 | results = (*results)[output_boxes->nonempty()]; 31 | 32 | if (results->has("pred_masks")) { 33 | retry_if_cuda_oom([&]() { 34 | results->set("pred_masks", 35 | MaskOps::paste_masks_in_image( 36 | results->getTensor("pred_masks").index({ Colon, 0, Colon, Colon }), // N, 1, M, M 37 | results->getTensor("pred_boxes"), 38 | results->image_size(), 39 | mask_threshold)); 40 | }); 41 | } 42 | if (results->has("pred_keypoints")) { 43 | Tensor t = results->getTensor("pred_keypoints"); 44 | t.index_put_({ Colon, Colon, 0 }, t.index({ Colon, Colon, 0 }) * scale_x); 45 | t.index_put_({ Colon, Colon, 1 }, t.index({ Colon, Colon, 1 }) * scale_y); 46 | results->set("pred_keypoints", t); // this isn't necessary in theory 47 | } 48 | return results; 49 | } 50 | 51 | torch::Tensor PostProcessing::sem_seg_postprocess(torch::Tensor result, const ImageSize &img_size, 52 | int output_height, int output_width) { 53 | auto sliceImageSizes = vector{ 54 | Colon, 55 | Slice(None, img_size.height), 56 | Slice(None, img_size.width) 57 | }; 58 | result = result.index(sliceImageSizes).expand({ 1, -1, -1, -1 }); 59 | auto options = nn::functional::InterpolateFuncOptions() 60 | .size(vector{ output_height, output_width }) 61 | .mode(torch::kBilinear) 62 | .align_corners(false); 63 | return nn::functional::interpolate(result, options)[0];; 64 | } 65 | -------------------------------------------------------------------------------- /Detectron2/Structures/PostProcessing.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "Instances.h" 4 | 5 | namespace Detectron2 6 | { 7 | /////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 8 | // converted from modeling/postprocessing.py 9 | 10 | class PostProcessing { 11 | public: 12 | /** 13 | Resize the output instances. 14 | The input images are often resized when entering an object detector. 15 | As a result, we often need the outputs of the detector in a different 16 | resolution from its inputs. 17 | 18 | This function will resize the raw outputs of an R-CNN detector 19 | to produce outputs according to the desired output resolution. 20 | 21 | Args: 22 | results (Instances): the raw outputs from the detector. 23 | `results.image_size` contains the input image resolution the detector sees. 24 | This object might be modified in-place. 25 | output_height, output_width: the desired output resolution. 26 | 27 | Returns: 28 | Instances: the resized output from the model, based on the output resolution 29 | */ 30 | static InstancesPtr detector_postprocess(const InstancesPtr &results, 31 | int output_height, int output_width, float mask_threshold = 0.5); 32 | 33 | /** 34 | Return semantic segmentation predictions in the original resolution. 35 | 36 | The input images are often resized when entering semantic segmentor. Moreover, in same 37 | cases, they also padded inside segmentor to be divisible by maximum network stride. 38 | As a result, we often need the predictions of the segmentor in a different 39 | resolution from its inputs. 40 | 41 | Args: 42 | result (Tensor): semantic segmentation prediction logits. A tensor of shape (C, H, W), 43 | where C is the number of classes, and H, W are the height and width of the prediction. 44 | img_size (tuple): image size that segmentor is taking as input. 45 | output_height, output_width: the desired output resolution. 46 | 47 | Returns: 48 | semantic segmentation prediction (Tensor): A tensor of the shape 49 | (C, output_height, output_width) that contains per-pixel soft predictions. 50 | */ 51 | static torch::Tensor sem_seg_postprocess(torch::Tensor result, const ImageSize &img_size, 52 | int output_height, int output_width); 53 | }; 54 | } -------------------------------------------------------------------------------- /Detectron2/Structures/Sampling.cpp: -------------------------------------------------------------------------------- 1 | #include "Base.h" 2 | #include "Utils/Utils.h" 3 | #include "Sampling.h" 4 | 5 | using namespace std; 6 | using namespace torch; 7 | using namespace Detectron2; 8 | 9 | /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 10 | 11 | std::tuple 12 | Detectron2::subsample_labels(const torch::Tensor &labels, int num_samples, float positive_fraction, int bg_label) { 13 | auto positive = torch::nonzero((labels != -1).bitwise_and(labels != bg_label)).index({ Colon, 0 }); 14 | auto negative = torch::nonzero(labels == bg_label).index({ Colon, 0 }); 15 | 16 | auto num_pos = int64_t(num_samples * positive_fraction); 17 | // protect against not enough positive examples 18 | num_pos = min(positive.numel(), num_pos); 19 | auto num_neg = num_samples - num_pos; 20 | // protect against not enough negative examples 21 | num_neg = min(negative.numel(), num_neg); 22 | 23 | // randomly select positive and negative examples 24 | auto perm1 = torch::randperm(positive.numel(), positive.device()).index({ Slice(None, num_pos) }).toType(torch::kLong); 25 | auto perm2 = torch::randperm(negative.numel(), negative.device()).index({ Slice(None, num_neg) }).toType(torch::kLong); 26 | 27 | auto pos_idx = positive.index(perm1); 28 | auto neg_idx = negative.index(perm2); 29 | return { pos_idx, neg_idx }; 30 | } 31 | -------------------------------------------------------------------------------- /Detectron2/Structures/Sampling.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | namespace Detectron2 6 | { 7 | /////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 8 | // converted from modeling/sampling.py 9 | 10 | /** 11 | Return `num_samples` (or fewer, if not enough found) 12 | random samples from `labels` which is a mixture of positives & negatives. 13 | It will try to return as many positives as possible without 14 | exceeding `positive_fraction * num_samples`, and then try to 15 | fill the remaining slots with negatives. 16 | 17 | Args: 18 | labels (Tensor): (N, ) label vector with values: 19 | * -1: ignore 20 | * bg_label: background ("negative") class 21 | * otherwise: one or more foreground ("positive") classes 22 | num_samples (int): The total number of labels with value >= 0 to return. 23 | Values that are not sampled will be filled with -1 (ignore). 24 | positive_fraction (float): The number of subsampled labels with values > 0 25 | is `min(num_positives, int(positive_fraction * num_samples))`. The number 26 | of negatives sampled is `min(num_negatives, num_samples - num_positives_sampled)`. 27 | In order words, if there are not enough positives, the sample is filled with 28 | negatives. If there are also not enough negatives, then as many elements are 29 | sampled as is possible. 30 | bg_label (int): label index of background ("negative") class. 31 | 32 | Returns: 33 | pos_idx, neg_idx (Tensor): 34 | 1D vector of indices. The total length of both is `num_samples` or fewer. 35 | */ 36 | std::tuple 37 | subsample_labels(const torch::Tensor &labels, int num_samples, float positive_fraction, int bg_label); 38 | } -------------------------------------------------------------------------------- /Detectron2/Structures/Sequence.cpp: -------------------------------------------------------------------------------- 1 | #include "Base.h" 2 | #include "Sequence.h" 3 | 4 | using namespace std; 5 | using namespace torch; 6 | using namespace Detectron2; 7 | 8 | /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 9 | 10 | SequencePtr SequenceTensor::slice(int64_t start, int64_t end) const { 11 | auto sliced = m_data.slice(0, start, end); 12 | return std::shared_ptr(new SequenceTensor(sliced)); 13 | } 14 | 15 | SequencePtr SequenceTensor::index(torch::Tensor item) const { 16 | auto selected = m_data.index(item); 17 | return std::shared_ptr(new SequenceTensor(selected)); 18 | } 19 | 20 | SequencePtr SequenceTensor::cat(const std::vector &seqs, int total) const { 21 | TensorVec tensors; 22 | tensors.reserve(seqs.size()); 23 | for (auto &seq : seqs) { 24 | Tensor t = dynamic_pointer_cast(seq)->m_data; 25 | tensors.push_back(t); 26 | } 27 | auto aggregated = torch::cat(tensors); 28 | assert(aggregated.size(0) == total); 29 | return std::shared_ptr(new SequenceTensor(aggregated)); 30 | } 31 | -------------------------------------------------------------------------------- /Detectron2/Structures/ShapeSpec.cpp: -------------------------------------------------------------------------------- 1 | #include "Base.h" 2 | #include "ShapeSpec.h" 3 | 4 | using namespace std; 5 | using namespace torch; 6 | using namespace Detectron2; 7 | 8 | /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 9 | 10 | ShapeSpec::Vec ShapeSpec::filter(const Map &shapes, const std::vector &names) { 11 | Vec filtered; 12 | filtered.reserve(names.size()); 13 | for (auto name : names) { 14 | auto iter = shapes.find(name); 15 | assert(iter != shapes.end()); 16 | auto &shape = iter->second; 17 | filtered.push_back(shape); 18 | } 19 | return filtered; 20 | } 21 | 22 | int ShapeSpec::channels_single(const Vec &shapes) { 23 | assert(!shapes.empty()); 24 | int ret = shapes[0].channels; 25 | for (int i = 1; i < shapes.size(); i++) { 26 | assert(shapes[i].channels == ret); 27 | } 28 | return ret; 29 | } 30 | 31 | std::vector ShapeSpec::channels_vec(const Vec &shapes) { 32 | vector ret; 33 | ret.reserve(shapes.size()); 34 | for (auto shape : shapes) { 35 | ret.push_back(shape.channels); 36 | } 37 | return ret; 38 | } 39 | 40 | std::vector ShapeSpec::strides_vec(const ShapeSpec::Vec &shapes) { 41 | vector ret; 42 | ret.reserve(shapes.size()); 43 | for (auto shape : shapes) { 44 | ret.push_back(shape.stride); 45 | } 46 | return ret; 47 | } 48 | -------------------------------------------------------------------------------- /Detectron2/Structures/ShapeSpec.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "Boxes.h" 4 | 5 | namespace Detectron2 6 | { 7 | /////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 8 | // converted from layers/shape_spec.py 9 | 10 | /** 11 | A simple structure that contains basic shape specification about a tensor. 12 | It is often used as the auxiliary inputs/outputs of models, 13 | to obtain the shape inference ability among pytorch modules. 14 | 15 | Attributes: 16 | channels: 17 | height: 18 | width: 19 | stride: 20 | */ 21 | struct ShapeSpec { 22 | int channels; 23 | int height; 24 | int width; 25 | int stride; 26 | int index; 27 | 28 | using Map = std::unordered_map; 29 | using Vec = std::vector; 30 | 31 | int64_t prod() const { return channels * height * width; } 32 | static Vec filter(const Map &shapes, const std::vector &names); 33 | static std::vector channels_vec(const Vec &shapes); 34 | static int channels_single(const Vec &shapes); 35 | static std::vector strides_vec(const Vec &shapes); 36 | }; 37 | } 38 | -------------------------------------------------------------------------------- /Detectron2/Utils/AsyncPredictor.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "Predictor.h" 4 | 5 | namespace Detectron2 6 | { 7 | /////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 8 | // converted from demo/predictor.py 9 | 10 | /** 11 | A predictor that runs the model asynchronously, possibly on >1 GPUs. 12 | Because rendering the visualization takes considerably amount of time, 13 | this helps improve throughput when rendering videos. 14 | */ 15 | class AsyncPredictor : public Predictor { 16 | public: 17 | /** 18 | cfg (CfgNode): 19 | num_gpus (int): if 0, will run on CPU 20 | */ 21 | AsyncPredictor(const CfgNode &cfg, int num_gpus = 1); 22 | 23 | int64_t len() const { return m_put_idx - m_get_idx; } 24 | int default_buffer_size() const { return m_procs.size() * 5; } 25 | 26 | void put(torch::Tensor image); 27 | InstancesPtr get(); 28 | InstancesPtr operator()(torch::Tensor image) { return predict(image); } 29 | virtual InstancesPtr predict(torch::Tensor original_image) override { 30 | put(original_image); 31 | return get(); 32 | } 33 | 34 | void shutdown(); 35 | 36 | private: 37 | std::mutex m_task_queue_mutex; 38 | std::condition_variable m_task_queue_ready; 39 | std::list> m_task_queue; 40 | std::mutex m_result_queue_mutex; 41 | std::list> m_result_queue; 42 | std::vector> m_procs; 43 | 44 | int m_put_idx; 45 | int m_get_idx; 46 | std::mutex m_result_rank_mutex; 47 | std::list> m_result_rank; 48 | }; 49 | } 50 | -------------------------------------------------------------------------------- /Detectron2/Utils/Canvas.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | namespace Detectron2 6 | { 7 | class Canvas { 8 | public: 9 | enum Alignment { 10 | kLeft, 11 | kCenter, 12 | kRight, 13 | 14 | kTop, 15 | kMiddle, 16 | kBottom, 17 | }; 18 | 19 | // https://matplotlib.org/3.1.0/gallery/lines_bars_and_markers/linestyles.html 20 | enum LineStyle { 21 | kSolid, // '-' 22 | kDotted, // '.' 23 | kDashed, // '--' 24 | kDashDot, // '-.' 25 | }; 26 | 27 | // RGB or RGBA 28 | using Color3 = std::vector; 29 | using Color4 = std::vector; 30 | 31 | public: 32 | virtual ~Canvas() {} 33 | 34 | // buffer, width, height, alpha 35 | virtual std::tuple SaveToTensor() = 0; 36 | 37 | struct DrawLine2DOptions { 38 | float line_width; 39 | Color3 color; 40 | LineStyle line_style; 41 | }; 42 | virtual void DrawLine2D(const std::vector &x_data, const std::vector &y_data, 43 | const DrawLine2DOptions &options) = 0; 44 | 45 | struct DrawRectangleOptions { 46 | bool fill = false; 47 | Color3 edge_color; 48 | float line_width; 49 | float alpha; 50 | LineStyle line_style; 51 | }; 52 | virtual void DrawRectangle(int x, int y, int width, int height, const DrawRectangleOptions &options) = 0; 53 | 54 | struct DrawPolygonOptions { 55 | bool fill = false; 56 | Color4 face_color; 57 | Color4 edge_color; 58 | float line_width; 59 | }; 60 | virtual void DrawPolygon(const torch::Tensor &segment, const DrawPolygonOptions &options) = 0; 61 | 62 | struct DrawCircleOptions { 63 | bool fill = false; 64 | Color3 color; 65 | }; 66 | virtual void DrawCircle(int x, int y, int radius, const DrawCircleOptions &options) = 0; 67 | 68 | struct DrawTextOptions { 69 | float font_size = 8; 70 | Color3 font_color; 71 | const char *font_family = "arial"; 72 | Color3 bbox_color; 73 | float bbox_alpha = 1.0; 74 | float bbox_padding = 0.0; 75 | Color3 edge_color; 76 | Alignment vertical_alignment = kTop; 77 | Alignment horizontal_alignment = kLeft; 78 | int zorder = 0; 79 | float rotation = 0.0f; 80 | }; 81 | virtual void DrawText(int x, int y, const std::string &text, const DrawTextOptions &options) = 0; 82 | 83 | // img in torch::kFloat32 with alpha 84 | virtual void DrawImage(const torch::Tensor &img) = 0; 85 | }; 86 | } 87 | -------------------------------------------------------------------------------- /Detectron2/Utils/CfgNode.cpp: -------------------------------------------------------------------------------- 1 | #include "Base.h" 2 | #include "CfgNode.h" 3 | #include 4 | #include 5 | #include "File.h" 6 | 7 | using namespace std; 8 | using namespace Detectron2; 9 | 10 | /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 11 | 12 | int s_latest_ver = 0; 13 | CfgNode CfgNode::get_cfg() { 14 | static CfgNode _C; 15 | if (s_latest_ver == 0) { 16 | //auto defaultConfigDir = getenv("D2_CONFIGS_DEFAULT_DIR"); 17 | char buf[256]; 18 | getcwd(buf, sizeof(buf)); 19 | auto defaultConfigDir = File::ComposeFilename(buf, "\\Debug\\"); 20 | cout << "defaultConfigDir:" << defaultConfigDir << endl; 21 | if (_access(defaultConfigDir.c_str(), 0) == -1) 22 | { 23 | cout << "defaultConfigDir no exist" << endl; 24 | } 25 | assert(defaultConfigDir.c_str()); 26 | // This yaml was created by dumping _C into yaml from config/defaults.py. 27 | _C = load_cfg_from_yaml_file(File::ComposeFilename(defaultConfigDir, "CfgDefaults.yaml")); 28 | s_latest_ver = _C["VERSION"].as(); 29 | } 30 | return _C.clone(); 31 | } 32 | 33 | static CfgNode s_global_cfg; 34 | void CfgNode::set_global_cfg(const CfgNode &cfg) { 35 | s_global_cfg = cfg; 36 | } 37 | 38 | /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 39 | 40 | CfgNode::CfgNode(YAML::Node init_dict) : fvcore::CfgNode(init_dict) { 41 | } 42 | 43 | void CfgNode::merge_from_file(const std::string &cfg_filename, bool allow_unsafe) { 44 | CfgNode loaded_cfg = fvcore::CfgNode::load_yaml_with_base(cfg_filename, allow_unsafe); 45 | 46 | auto ver = m_dict["VERSION"].as(); 47 | 48 | // CfgNode.merge_from_file is only allowed on a config object of latest version! 49 | assert(s_latest_ver == ver); 50 | 51 | auto loaded_ver = loaded_cfg["VERSION"].as(); 52 | /*~! 53 | if loaded_ver is None: 54 | from .compat import guess_version 55 | loaded_ver = guess_version(loaded_cfg, cfg_filename) 56 | */ 57 | assert(loaded_ver <= ver); // Cannot merge a v{loaded_ver} config into a v{self.VERSION} config. 58 | 59 | if (loaded_ver == ver) { 60 | merge_from_other_cfg(loaded_cfg); 61 | } 62 | else { 63 | assert(false); 64 | //~! didn't convert config upgrade/downgrade 65 | } 66 | } 67 | -------------------------------------------------------------------------------- /Detectron2/Utils/CfgNode.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | namespace Detectron2 6 | { 7 | /////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 8 | // converted from config/config.py 9 | 10 | /** 11 | The same as `fvcore.common.config.CfgNode`, but different in: 12 | 13 | 1. Use unsafe yaml loading by default. 14 | Note that this may lead to arbitrary code execution: you must not 15 | load a config file from untrusted sources before manually inspecting 16 | the content of the file. 17 | 2. Support config versioning. 18 | When attempting to merge an old config, it will convert the old config automatically. 19 | */ 20 | class CfgNode : public fvcore::CfgNode { 21 | public: 22 | /** 23 | Get a copy of the default config. 24 | 25 | Returns: 26 | a detectron2 CfgNode instance. 27 | */ 28 | static CfgNode get_cfg(); 29 | 30 | /** 31 | Let the global config point to the given cfg. 32 | 33 | Assume that the given "cfg" has the key "KEY", after calling 34 | `set_global_cfg(cfg)`, the key can be accessed by: 35 | 36 | .. code-block:: python 37 | 38 | from detectron2.config import global_cfg 39 | print(global_cfg.KEY) 40 | 41 | By using a hacky global config, you can access these configs anywhere, 42 | without having to pass the config object or the values deep into the code. 43 | This is a hacky feature introduced for quick prototyping / research exploration. 44 | */ 45 | static void set_global_cfg(const CfgNode &cfg); 46 | 47 | public: 48 | CfgNode(YAML::Node init_dict = {}); 49 | 50 | // Note that the default value of allow_unsafe is changed to True 51 | void merge_from_file(const std::string &cfg_filename, bool allow_unsafe = true); 52 | }; 53 | } 54 | -------------------------------------------------------------------------------- /Detectron2/Utils/DefaultPredictor.cpp: -------------------------------------------------------------------------------- 1 | #include "Base.h" 2 | #include "DefaultPredictor.h" 3 | #include 4 | #include 5 | 6 | #include 7 | 8 | using namespace std; 9 | using namespace torch; 10 | using namespace Detectron2; 11 | 12 | /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 13 | 14 | DefaultPredictor::DefaultPredictor(const CfgNode &cfg) : m_model(nullptr) { 15 | m_cfg = cfg.clone(); // cfg can be modified by model 16 | { 17 | Timer timer("build_model"); 18 | m_model = build_model(m_cfg); 19 | } 20 | m_model->eval(); 21 | 22 | std::string m_weight = cfg["MODEL.WEIGHTS"].as(""); 23 | //torch::load(m_model, m_weight); 24 | auto name = CfgNode::parseTuple(cfg["DATASETS.TEST"], { "" })[0]; 25 | m_metadata = MetadataCatalog::get(name); 26 | { 27 | Timer timer("load_checkpoint"); 28 | m_model->load_checkpoint(m_weight, true); 29 | //m_model->load_checkpoint(cfg["MODEL.WEIGHTS"].as(""), false); 30 | } 31 | m_transform_gen = shared_ptr(new ResizeShortestEdge( 32 | { cfg["INPUT.MIN_SIZE_TEST"].as(), cfg["INPUT.MIN_SIZE_TEST"].as() }, 33 | cfg["INPUT.MAX_SIZE_TEST"].as() 34 | )); 35 | 36 | m_input_format = cfg["INPUT.FORMAT"].as(); 37 | assert(m_input_format == "RGB" || m_input_format == "BGR"); 38 | } 39 | 40 | InstancesPtr DefaultPredictor::predict(torch::Tensor original_image) { 41 | 42 | torch::NoGradGuard guard; // https://github.com/sphinx-doc/sphinx/issues/4258 43 | 44 | // Apply pre-processing to image. 45 | if (m_input_format == "RGB") { 46 | // whether the model expects BGR inputs or RGB 47 | original_image = torch::flip(original_image, { -1 }); 48 | } 49 | auto height = original_image.size(0); 50 | auto width = original_image.size(1); 51 | auto image = m_transform_gen->get_transform(original_image)->apply_image(original_image); 52 | image = image.to(torch::kFloat32).permute({ 2, 0, 1 }); 53 | torch::Device _device = torch::Device(DeviceType::CUDA, 0); 54 | std::vector inputs(1); 55 | inputs[0].image = image.to(_device); 56 | inputs[0].height = make_shared(height); 57 | inputs[0].width = make_shared(width); 58 | InstancesPtr predictions; 59 | { 60 | Timer timer("forward"); 61 | predictions = get<0>(m_model->forward(inputs))[0]; 62 | } 63 | return predictions; 64 | } -------------------------------------------------------------------------------- /Detectron2/Utils/DefaultPredictor.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "Predictor.h" 4 | #include "VisImage.h" 5 | #include 6 | #include 7 | #include 8 | 9 | namespace Detectron2 10 | { 11 | /////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 12 | // converted from engine/defaults.py 13 | 14 | /** 15 | Create a simple end-to-end predictor with the given config that runs on 16 | single device for a single input image. 17 | 18 | Compared to using the model directly, this class does the following additions: 19 | 20 | 1. Load checkpoint from `cfg.MODEL.WEIGHTS`. 21 | 2. Always take BGR image as the input and apply conversion defined by `cfg.INPUT.FORMAT`. 22 | 3. Apply resizing defined by `cfg.INPUT.{MIN,MAX}_SIZE_TEST`. 23 | 4. Take one input image and produce a single output, instead of a batch. 24 | 25 | If you'd like to do anything more fancy, please refer to its source code 26 | as examples to build and use the model manually. 27 | 28 | Attributes: 29 | metadata (Metadata): the metadata of the underlying dataset, obtained from 30 | cfg.DATASETS.TEST. 31 | 32 | Examples: 33 | 34 | .. code-block:: python 35 | 36 | pred = DefaultPredictor(cfg) 37 | inputs = cv2.imread("input.jpg") 38 | outputs = pred(inputs) 39 | */ 40 | class DefaultPredictor : public Predictor { 41 | public: 42 | DefaultPredictor(const CfgNode &cfg); 43 | 44 | /** 45 | Args: 46 | original_image (np.ndarray): an image of shape (H, W, C) (in BGR order). 47 | 48 | Returns: 49 | predictions (dict): 50 | the output of the model for one image only. 51 | See :doc:`/tutorials/models` for details about the format. 52 | */ 53 | virtual InstancesPtr predict(torch::Tensor original_image) override; 54 | InstancesPtr predict_1(torch::Tensor original_image); 55 | protected: 56 | CfgNode m_cfg; 57 | MetaArch m_model; 58 | Metadata m_metadata; 59 | std::shared_ptr m_transform_gen; 60 | std::string m_input_format; 61 | }; 62 | } 63 | -------------------------------------------------------------------------------- /Detectron2/Utils/DefaultTrainer.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include "TrainerBase.h" 4 | #include "utils_train.hpp" 5 | #include 6 | #include 7 | #include 8 | namespace Detectron2 9 | { 10 | class DefaultTrainer : public TrainerBase { 11 | public: 12 | DefaultTrainer(const CfgNode& cfg); 13 | virtual void train() override; 14 | void LoadData(std::vector& img_datas, 15 | std::vector& inputs, int& img_data_i); 16 | float get_lr(); 17 | void warmup_lr(); 18 | void set_lr(float lr); 19 | torch::Tensor sum_loss(TensorMap& loss_map); 20 | protected: 21 | CfgNode m_cfg; 22 | MetaArch m_model; 23 | Metadata m_metadata; 24 | std::shared_ptr m_transform_gen; 25 | std::string m_input_format; 26 | 27 | private: 28 | int batch_size; 29 | int max_iter; 30 | 31 | float base_lr; 32 | float base_momentum; 33 | float base_weight_decay; 34 | std::shared_ptr _optimizer{ nullptr }; 35 | std::shared_ptr _dataset{ nullptr }; 36 | ProgressTracker _pg_tracker; 37 | int _warmup_steps; 38 | float _warmup_start; 39 | int total_epochs; 40 | std::set decay_epochs; 41 | torch::Device _device = torch::Device(torch::kCPU); 42 | int train_path_size; 43 | std::vector decay_step; 44 | }; 45 | } -------------------------------------------------------------------------------- /Detectron2/Utils/File.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | namespace Detectron2 6 | { 7 | class File { 8 | public: 9 | static std::string GetCwd(); 10 | static void SetCwd(const std::string &cwd); 11 | 12 | static bool IsAbsolutePath(const std::string &pathname); 13 | static bool IsDir(const std::string &pathname); 14 | static bool IsFile(const std::string &pathname); 15 | 16 | static std::string Dirname(const std::string &pathname); 17 | static std::string Basename(const std::string &pathname); 18 | static std::string ComposeFilename(const std::string &dirname, const std::string &basename); 19 | static std::string ReplaceExtension(const std::string &pathname, const std::string &new_extension); 20 | 21 | public: 22 | File(const std::string &fullpath, bool read = true); 23 | void Close(); 24 | 25 | std::string Read(); 26 | void Write(const std::string &content); 27 | 28 | void Seek(int offset); 29 | int ReadInt(); 30 | void Read(char *buf, size_t total); 31 | void Write(const char *buf, size_t total); 32 | 33 | private: 34 | std::string m_filename; 35 | FILE *m_file; 36 | 37 | void Verify(bool expr); 38 | }; 39 | } 40 | -------------------------------------------------------------------------------- /Detectron2/Utils/Predictor.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | namespace Detectron2 6 | { 7 | /////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 8 | 9 | class Predictor { 10 | public: 11 | virtual ~Predictor() {} 12 | 13 | /** 14 | Args: 15 | original_image (np.ndarray): an image of shape (H, W, C) (in BGR order). 16 | 17 | Returns: 18 | predictions (dict): 19 | the output of the model for one image only. 20 | See :doc:`/tutorials/models` for details about the format. 21 | */ 22 | virtual InstancesPtr predict(torch::Tensor original_image) = 0; 23 | }; 24 | } 25 | -------------------------------------------------------------------------------- /Detectron2/Utils/Timer.cpp: -------------------------------------------------------------------------------- 1 | #include "Base.h" 2 | #include "Timer.h" 3 | 4 | #include 5 | #include 6 | 7 | using namespace std; 8 | using namespace Detectron2; 9 | 10 | /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 11 | 12 | class QueryPerformanceTimer { 13 | public: 14 | QueryPerformanceTimer() { 15 | LARGE_INTEGER li; 16 | auto ret = QueryPerformanceFrequency(&li); 17 | assert(ret); 18 | m_freq = double(li.QuadPart) / 1000.0; 19 | QueryPerformanceCounter(&li); 20 | m_counter0 = li.QuadPart; 21 | } 22 | 23 | double get_counter() { 24 | LARGE_INTEGER li; 25 | QueryPerformanceCounter(&li); 26 | return double(li.QuadPart - m_counter0) / m_freq; 27 | } 28 | 29 | private: 30 | double m_freq = 0.0; 31 | __int64 m_counter0 = 0; 32 | }; 33 | static QueryPerformanceTimer s_hp_timer; 34 | 35 | Timer::Timer(const std::string &name) : m_name(name) { 36 | m_t0 = (int)s_hp_timer.get_counter(); 37 | } 38 | 39 | Timer::~Timer() { 40 | char buf[256]; 41 | snprintf(buf, sizeof(buf), ">>>>>>> %s: %dms\n", m_name.c_str(), (int)s_hp_timer.get_counter() - m_t0); 42 | cout << buf; 43 | } 44 | -------------------------------------------------------------------------------- /Detectron2/Utils/Timer.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | namespace Detectron2 6 | { 7 | /** 8 | * ms-level timing of blocks. In Python, this is equivalent to: 9 | * 10 | * import time 11 | * t0 = time.time() 12 | * ... 13 | * print(">>>>>>> {}: {:.2f}ms".format("some name", (time.time() - t0) * 1000)) 14 | */ 15 | class Timer { 16 | public: 17 | Timer(const std::string &name); 18 | ~Timer(); 19 | 20 | private: 21 | std::string m_name; 22 | int m_t0; 23 | }; 24 | } 25 | -------------------------------------------------------------------------------- /Detectron2/Utils/TrainerBase.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | namespace Detectron2 6 | { 7 | class TrainerBase { 8 | public: 9 | virtual ~TrainerBase() {} 10 | 11 | virtual void train() = 0; 12 | }; 13 | } -------------------------------------------------------------------------------- /Detectron2/Utils/Utils.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | namespace Detectron2 6 | { 7 | /////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 8 | 9 | // assert in debug build; throw in release build 10 | void verify(bool expr); 11 | 12 | // string functions 13 | std::vector tokenize(const std::string &input, char delimiter); 14 | std::string lower(const std::string &s); 15 | bool endswith(const std::string &s, const std::string &ending); 16 | std::string replace_all(const std::string &s, const std::string &src, const std::string &target); 17 | 18 | // image functions 19 | torch::Tensor mat_to_tensor(const cv::Mat &mat); 20 | cv::Mat image_to_mat(const torch::Tensor &t); 21 | torch::Tensor image_to_tensor(const cv::Mat &mat); 22 | torch::Tensor read_image(const std::string &pathname, const std::string &format = ""); 23 | } 24 | -------------------------------------------------------------------------------- /Detectron2/Utils/VideoAnalyzer.cpp: -------------------------------------------------------------------------------- 1 | #include "Base.h" 2 | #include "VideoAnalyzer.h" 3 | 4 | using namespace std; 5 | using namespace torch; 6 | using namespace Detectron2; 7 | 8 | /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 9 | 10 | VideoAnalyzer::VideoAnalyzer() { 11 | } 12 | 13 | VideoAnalyzer::~VideoAnalyzer() { 14 | } 15 | 16 | void VideoAnalyzer::on_instance_predictions(cv::Mat frame, const InstancesPtr &predictions, 17 | const std::vector &keypoint_names) { 18 | } 19 | 20 | void VideoAnalyzer::on_sem_seg(cv::Mat frame, const torch::Tensor &sem_seg) { 21 | } 22 | 23 | void VideoAnalyzer::on_panoptic_seg_predictions(cv::Mat frame, const torch::Tensor &panoptic_seg, 24 | const std::vector &segments_info) { 25 | } 26 | -------------------------------------------------------------------------------- /Detectron2/Utils/VideoAnalyzer.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | 6 | namespace Detectron2 7 | { 8 | /////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 9 | // converted from utils/video_visualizer.py 10 | 11 | class VideoAnalyzer { 12 | public: 13 | VideoAnalyzer(); 14 | virtual ~VideoAnalyzer(); 15 | 16 | virtual void on_instance_predictions(cv::Mat frame, const InstancesPtr &predictions, 17 | const std::vector &keypoint_names); 18 | virtual void on_sem_seg(cv::Mat frame, const torch::Tensor &sem_seg); 19 | virtual void on_panoptic_seg_predictions(cv::Mat frame, const torch::Tensor &panoptic_seg, 20 | const std::vector &segments_info); 21 | }; 22 | } 23 | -------------------------------------------------------------------------------- /Detectron2/Utils/cvCanvas.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "Canvas.h" 4 | 5 | namespace Detectron2 6 | { 7 | /////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 8 | 9 | class cvCanvas : public Canvas { 10 | public: 11 | cvCanvas(int height, int width); 12 | 13 | // implementing Canvas 14 | virtual std::tuple SaveToTensor() override; 15 | virtual void DrawLine2D(const std::vector &x_data, const std::vector &y_data, 16 | const DrawLine2DOptions &options) override; 17 | virtual void DrawRectangle(int x, int y, int width, int height, const DrawRectangleOptions &options) override; 18 | virtual void DrawPolygon(const torch::Tensor &segment, const DrawPolygonOptions &options) override; 19 | virtual void DrawCircle(int x, int y, int radius, const DrawCircleOptions &options) override; 20 | virtual void DrawText(int x, int y, const std::string &text, const DrawTextOptions &options) override; 21 | virtual void DrawImage(const torch::Tensor &img) override; 22 | 23 | protected: 24 | cv::Mat m_canvas; 25 | 26 | static cv::Scalar cvColor(const std::vector &c); 27 | static cv::Scalar cvColor(const Color3 &c, float alpha); 28 | static int cvLineWidth(float line_width); 29 | static int cvLineType(LineStyle line_style); 30 | static double cvFontScale(float font_size); 31 | }; 32 | } -------------------------------------------------------------------------------- /Detectron2/coco/maskApi.h: -------------------------------------------------------------------------------- 1 | /************************************************************************** 2 | * Microsoft COCO Toolbox. version 2.0 3 | * Data, paper, and tutorials available at: http://mscoco.org/ 4 | * Code written by Piotr Dollar and Tsung-Yi Lin, 2015. 5 | * Licensed under the Simplified BSD License [see coco/license.txt] 6 | **************************************************************************/ 7 | #pragma once 8 | 9 | typedef unsigned int uint; 10 | typedef unsigned long siz; 11 | typedef unsigned char byte; 12 | typedef double* BB; 13 | typedef struct { siz h, w, m; uint *cnts; } RLE; 14 | 15 | /* Initialize/destroy RLE. */ 16 | void rleInit( RLE *R, siz h, siz w, siz m, uint *cnts ); 17 | void rleFree( RLE *R ); 18 | 19 | /* Initialize/destroy RLE array. */ 20 | void rlesInit( RLE **R, siz n ); 21 | void rlesFree( RLE **R, siz n ); 22 | 23 | /* Encode binary masks using RLE. */ 24 | void rleEncode( RLE *R, const byte *mask, siz h, siz w, siz n ); 25 | 26 | /* Decode binary masks encoded via RLE. */ 27 | void rleDecode( const RLE *R, byte *mask, siz n ); 28 | 29 | /* Compute union or intersection of encoded masks. */ 30 | void rleMerge( const RLE *R, RLE *M, siz n, int intersect ); 31 | 32 | /* Compute area of encoded masks. */ 33 | void rleArea( const RLE *R, siz n, uint *a ); 34 | 35 | /* Compute intersection over union between masks. */ 36 | void rleIou( RLE *dt, RLE *gt, siz m, siz n, byte *iscrowd, double *o ); 37 | 38 | /* Compute non-maximum suppression between bounding masks */ 39 | void rleNms( RLE *dt, siz n, uint *keep, double thr ); 40 | 41 | /* Compute intersection over union between bounding boxes. */ 42 | void bbIou( BB dt, BB gt, siz m, siz n, byte *iscrowd, double *o ); 43 | 44 | /* Compute non-maximum suppression between bounding boxes */ 45 | void bbNms( BB dt, siz n, uint *keep, double thr ); 46 | 47 | /* Get bounding boxes surrounding encoded masks. */ 48 | void rleToBbox( const RLE *R, BB bb, siz n ); 49 | 50 | /* Convert bounding boxes to encoded masks. */ 51 | void rleFrBbox( RLE *R, const BB bb, siz h, siz w, siz n ); 52 | 53 | /* Convert polygon to encoded mask. */ 54 | void rleFrPoly( RLE *R, const double *xy, siz k, siz h, siz w ); 55 | 56 | /* Get compressed string representation of encoded mask. */ 57 | char* rleToString( const RLE *R ); 58 | 59 | /* Convert from compressed string representation of encoded mask. */ 60 | void rleFrString( RLE *R, char *s, siz h, siz w ); 61 | -------------------------------------------------------------------------------- /Detectron2/detectron2/box_iou_rotated/box_iou_rotated.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | #pragma once 3 | #include 4 | 5 | namespace detectron2 { 6 | 7 | at::Tensor box_iou_rotated_cpu( 8 | const at::Tensor& boxes1, 9 | const at::Tensor& boxes2); 10 | 11 | #ifdef WITH_CUDA 12 | at::Tensor box_iou_rotated_cuda( 13 | const at::Tensor& boxes1, 14 | const at::Tensor& boxes2); 15 | #endif 16 | 17 | // Interface for Python 18 | // inline is needed to prevent multiple function definitions when this header is 19 | // included by different cpps 20 | inline at::Tensor box_iou_rotated( 21 | const at::Tensor& boxes1, 22 | const at::Tensor& boxes2) { 23 | assert(boxes1.device().is_cuda() == boxes2.device().is_cuda()); 24 | if (boxes1.device().is_cuda()) { 25 | #ifdef WITH_CUDA 26 | return box_iou_rotated_cuda(boxes1.contiguous(), boxes2.contiguous()); 27 | #else 28 | AT_ERROR("Not compiled with GPU support"); 29 | #endif 30 | } 31 | 32 | return box_iou_rotated_cpu(boxes1.contiguous(), boxes2.contiguous()); 33 | } 34 | 35 | } // namespace detectron2 36 | -------------------------------------------------------------------------------- /Detectron2/detectron2/box_iou_rotated/box_iou_rotated_cpu.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | #include "box_iou_rotated.h" 3 | #include "box_iou_rotated_utils.h" 4 | 5 | namespace detectron2 { 6 | 7 | template 8 | void box_iou_rotated_cpu_kernel( 9 | const at::Tensor& boxes1, 10 | const at::Tensor& boxes2, 11 | at::Tensor& ious) { 12 | auto num_boxes1 = boxes1.size(0); 13 | auto num_boxes2 = boxes2.size(0); 14 | 15 | for (int i = 0; i < num_boxes1; i++) { 16 | for (int j = 0; j < num_boxes2; j++) { 17 | ious[i * num_boxes2 + j] = single_box_iou_rotated( 18 | boxes1[i].data_ptr(), boxes2[j].data_ptr()); 19 | } 20 | } 21 | } 22 | 23 | at::Tensor box_iou_rotated_cpu( 24 | // input must be contiguous: 25 | const at::Tensor& boxes1, 26 | const at::Tensor& boxes2) { 27 | auto num_boxes1 = boxes1.size(0); 28 | auto num_boxes2 = boxes2.size(0); 29 | at::Tensor ious = 30 | at::empty({num_boxes1 * num_boxes2}, boxes1.options().dtype(at::kFloat)); 31 | 32 | box_iou_rotated_cpu_kernel(boxes1, boxes2, ious); 33 | 34 | // reshape from 1d array to 2d array 35 | auto shape = std::vector{num_boxes1, num_boxes2}; 36 | return ious.reshape(shape); 37 | } 38 | 39 | } // namespace detectron2 40 | -------------------------------------------------------------------------------- /Detectron2/detectron2/cuda_version.cu: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 2 | 3 | #include 4 | 5 | namespace detectron2 { 6 | int get_cudart_version() { 7 | return CUDART_VERSION; 8 | } 9 | } // namespace detectron2 10 | -------------------------------------------------------------------------------- /Detectron2/detectron2/nms/cuda_helpers.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #define CUDA_1D_KERNEL_LOOP(i, n) \ 4 | for (int i = (blockIdx.x * blockDim.x) + threadIdx.x; i < (n); \ 5 | i += (blockDim.x * gridDim.x)) 6 | 7 | template 8 | constexpr __host__ __device__ inline integer ceil_div(integer n, integer m) { 9 | return (n + m - 1) / m; 10 | } 11 | -------------------------------------------------------------------------------- /Detectron2/detectron2/nms/nms.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | 4 | namespace detectron2 { 5 | 6 | at::Tensor nms_cpu( 7 | const at::Tensor& dets, 8 | const at::Tensor& scores, 9 | const double iou_threshold); 10 | 11 | #ifdef WITH_CUDA 12 | at::Tensor nms_cuda( 13 | const at::Tensor& dets, 14 | const at::Tensor& scores, 15 | const double iou_threshold); 16 | #endif 17 | 18 | // Interface for Python 19 | // inline is needed to prevent multiple function definitions when this header is 20 | // included by different cpps 21 | inline at::Tensor nms( 22 | const at::Tensor& dets, 23 | const at::Tensor& scores, 24 | const float iou_threshold) { 25 | assert(dets.device().is_cuda() == scores.device().is_cuda()); 26 | if (dets.device().is_cuda()) { 27 | #ifdef WITH_CUDA 28 | return nms_cuda( 29 | dets.contiguous(), scores.contiguous(), iou_threshold); 30 | #else 31 | AT_ERROR("Not compiled with GPU support"); 32 | #endif 33 | } 34 | 35 | return nms_cpu(dets.contiguous(), scores.contiguous(), iou_threshold); 36 | } 37 | 38 | } // namespace detectron2 39 | -------------------------------------------------------------------------------- /Detectron2/detectron2/nms_rotated/nms_rotated.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 2 | #pragma once 3 | #include 4 | 5 | namespace detectron2 { 6 | 7 | at::Tensor nms_rotated_cpu( 8 | const at::Tensor& dets, 9 | const at::Tensor& scores, 10 | const float iou_threshold); 11 | 12 | #ifdef WITH_CUDA 13 | at::Tensor nms_rotated_cuda( 14 | const at::Tensor& dets, 15 | const at::Tensor& scores, 16 | const float iou_threshold); 17 | #endif 18 | 19 | // Interface for Python 20 | // inline is needed to prevent multiple function definitions when this header is 21 | // included by different cpps 22 | inline at::Tensor nms_rotated( 23 | const at::Tensor& dets, 24 | const at::Tensor& scores, 25 | const float iou_threshold) { 26 | assert(dets.device().is_cuda() == scores.device().is_cuda()); 27 | if (dets.device().is_cuda()) { 28 | #ifdef WITH_CUDA 29 | return nms_rotated_cuda( 30 | dets.contiguous(), scores.contiguous(), iou_threshold); 31 | #else 32 | AT_ERROR("Not compiled with GPU support"); 33 | #endif 34 | } 35 | 36 | return nms_rotated_cpu(dets.contiguous(), scores.contiguous(), iou_threshold); 37 | } 38 | 39 | } // namespace detectron2 40 | -------------------------------------------------------------------------------- /Detectron2/fvcore/config.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "yacs.h" 4 | 5 | namespace Detectron2 { namespace fvcore 6 | { 7 | /////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 8 | // converted from fvcore/common/config.py 9 | 10 | /** 11 | Our own extended version of :class:`yacs.config.CfgNode`. 12 | It contains the following extra features: 13 | 14 | 1. The :meth:`merge_from_file` method supports the "_BASE_" key, 15 | which allows the new CfgNode to inherit all the attributes from the 16 | base configuration file. 17 | 2. Keys that start with "COMPUTED_" are treated as insertion-only 18 | "computed" attributes. They can be inserted regardless of whether 19 | the CfgNode is frozen or not. 20 | 3. With "allow_unsafe=True", it supports pyyaml tags that evaluate 21 | expressions in config. See examples in 22 | https://pyyaml.org/wiki/PyYAMLDocumentation#yaml-tags-and-python-types 23 | Note that this may lead to arbitrary code execution: you must not 24 | load a config file from untrusted sources before manually inspecting 25 | the content of the file. 26 | */ 27 | class CfgNode : public yacs::CfgNode { 28 | public: 29 | /** 30 | Just like `yaml.load(open(filename))`, but inherit attributes from its 31 | `_BASE_`. 32 | 33 | Args: 34 | filename (str): the file name of the current config. Will be used to 35 | find the base config file. 36 | allow_unsafe (bool): whether to allow loading the config file with 37 | `yaml.unsafe_load`. 38 | 39 | Returns: 40 | (dict): the loaded yaml 41 | */ 42 | static YAML::Node load_yaml_with_base(const std::string &filename, bool allow_unsafe = false); 43 | 44 | public: 45 | CfgNode(YAML::Node init_dict = {}); 46 | 47 | virtual void set(const std::string &name, YAML::Node val) override; 48 | 49 | /** 50 | Merge configs from a given yaml file. 51 | 52 | Args: 53 | cfg_filename: the file name of the yaml config. 54 | allow_unsafe: whether to allow loading the config file with 55 | `yaml.unsafe_load`. 56 | */ 57 | void merge_from_file(const std::string &cfg_filename, bool allow_unsafe = false); 58 | 59 | // Forward the following calls to base, but with a check on the BASE_KEY. 60 | /** 61 | Args: 62 | cfg_other (CfgNode): configs to merge from. 63 | */ 64 | void merge_from_other_cfg(const CfgNode &cfg_other); 65 | 66 | /** 67 | Args: 68 | cfg_list (list): list of configs to merge from. 69 | */ 70 | void merge_from_list(const OptionList &cfg_list); 71 | }; 72 | }} 73 | -------------------------------------------------------------------------------- /Detectron2/fvcore/fvcore.cpp: -------------------------------------------------------------------------------- 1 | #include "Base.h" 2 | #include "fvcore.h" 3 | 4 | using namespace std; 5 | using namespace torch; 6 | using namespace Detectron2; 7 | 8 | /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 9 | 10 | torch::Tensor fvcore::smooth_l1_loss(const torch::Tensor &input, const torch::Tensor &target, 11 | float beta, torch::Reduction::Reduction reduction) { 12 | Tensor loss; 13 | if (beta < 1e-5) { 14 | // if beta == 0, then torch.where will result in nan gradients when 15 | // the chain rule is applied due to pytorch implementation details 16 | // (the False branch "0.5 * n ** 2 / 0" has an incoming gradient of 17 | // zeros, rather than "no gradient"). To avoid this issue, we define 18 | // small values of beta to be exactly l1 loss. 19 | loss = torch::abs(input - target); 20 | } 21 | else { 22 | auto n = torch::abs(input - target); 23 | auto cond = n < beta; 24 | loss = torch::where(cond, 0.5 * (n * n) / beta, n - 0.5 * beta); 25 | } 26 | 27 | if (reduction == Reduction::Mean) { 28 | loss = loss.mean(); 29 | } 30 | else if (reduction == Reduction::Sum) { 31 | loss = loss.sum(); 32 | } 33 | return loss; 34 | } 35 | -------------------------------------------------------------------------------- /Detectron2/fvcore/fvcore.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | namespace Detectron2 { namespace fvcore 6 | { 7 | /////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 8 | // converted from fvcore/nn/smooth_l1_loss.py 9 | 10 | /** 11 | Smooth L1 loss defined in the Fast R-CNN paper as: 12 | 13 | | 0.5 * x ** 2 / beta if abs(x) < beta 14 | smoothl1(x) = | 15 | | abs(x) - 0.5 * beta otherwise, 16 | 17 | where x = input - target. 18 | 19 | Smooth L1 loss is related to Huber loss, which is defined as: 20 | 21 | | 0.5 * x ** 2 if abs(x) < beta 22 | huber(x) = | 23 | | beta * (abs(x) - 0.5 * beta) otherwise 24 | 25 | Smooth L1 loss is equal to huber(x) / beta. This leads to the following 26 | differences: 27 | 28 | - As beta -> 0, Smooth L1 loss converges to L1 loss, while Huber loss 29 | converges to a constant 0 loss. 30 | - As beta -> +inf, Smooth L1 converges to a constant 0 loss, while Huber loss 31 | converges to L2 loss. 32 | - For Smooth L1 loss, as beta varies, the L1 segment of the loss has a constant 33 | slope of 1. For Huber loss, the slope of the L1 segment is beta. 34 | 35 | Smooth L1 loss can be seen as exactly L1 loss, but with the abs(x) < beta 36 | portion replaced with a quadratic function such that at abs(x) = beta, its 37 | slope is 1. The quadratic segment smooths the L1 loss near x = 0. 38 | 39 | Args: 40 | input (Tensor): input tensor of any shape 41 | target (Tensor): target value tensor with the same shape as input 42 | beta (float): L1 to L2 change point. 43 | For beta values < 1e-5, L1 loss is computed. 44 | reduction: 'none' | 'mean' | 'sum' 45 | 'none': No reduction will be applied to the output. 46 | 'mean': The output will be averaged. 47 | 'sum': The output will be summed. 48 | 49 | Returns: 50 | The loss with the reduction option applied. 51 | 52 | Note: 53 | PyTorch's builtin "Smooth L1 loss" implementation does not actually 54 | implement Smooth L1 loss, nor does it implement Huber loss. It implements 55 | the special case of both in which they are equal (beta=1). 56 | See: https://pytorch.org/docs/stable/nn.html#torch.nn.SmoothL1Loss. 57 | */ 58 | torch::Tensor smooth_l1_loss(const torch::Tensor &input, const torch::Tensor &target, 59 | float beta, torch::Reduction::Reduction reduction); 60 | }} 61 | -------------------------------------------------------------------------------- /Detectron2/trainDemo.cpp: -------------------------------------------------------------------------------- 1 | #include "Base.h" 2 | #include "trainDemo.h" 3 | #include 4 | #include 5 | using namespace Detectron2; 6 | 7 | CfgNode Trainer::setup_cfg(const std::string& config_file, const CfgNode::OptionList& opts, 8 | float confidence_threshold) { 9 | // load config from file and command-line arguments 10 | auto cfg = CfgNode::get_cfg(); 11 | cfg.merge_from_file(config_file); 12 | cfg.merge_from_list(opts); 13 | // Set score_threshold for builtin models 14 | cfg["MODEL.RETINANET.SCORE_THRESH_TEST"] = confidence_threshold; 15 | cfg["MODEL.ROI_HEADS.SCORE_THRESH_TEST"] = confidence_threshold; 16 | cfg["MODEL.PANOPTIC_FPN.COMBINE.INSTANCES_CONFIDENCE_THRESH"] = confidence_threshold; 17 | cfg.freeze(); 18 | return cfg; 19 | } 20 | 21 | void Trainer::start(const Options& options) { 22 | auto cfg = setup_cfg(options.config_file, options.opts, options.confidence_threshold); 23 | BuiltinDataset::register_all(); 24 | Trainer m_Trainer(cfg); 25 | m_Trainer.run_train(); 26 | //m_Trainer. 27 | } 28 | 29 | Trainer::Trainer(const CfgNode& cfg, ColorMode instance_mode, bool parallel) : 30 | m_cpu_device(torch::kCPU), m_instance_mode(instance_mode), m_parallel(parallel) 31 | { 32 | auto name = CfgNode::parseTuple(cfg["DATASETS.TEST"], { "__unused" })[0]; 33 | m_metadata = MetadataCatalog::get(name); 34 | if (parallel) { 35 | //int num_gpu = torch::cuda::device_count(); 36 | //m_TrainerBase = make_shared(cfg, num_gpu); 37 | } 38 | else { 39 | m_TrainerBase = std::make_shared(cfg); 40 | } 41 | } 42 | 43 | void Trainer::run_train() { 44 | //VisImage vis_output; 45 | m_TrainerBase->train(); 46 | 47 | } -------------------------------------------------------------------------------- /Detectron2/trainDemo.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | #include 5 | namespace Detectron2 6 | { 7 | /////////////////////////////////////////////////////////////////////////////////////////////////////////////////// 8 | // converted from tools/train_net.py 9 | 10 | class Trainer { 11 | public: 12 | struct Options { 13 | std::string config_file // path to config file 14 | = "configs/quick_schedules/mask_rcnn_R_50_FPN_inference_acc_test.yaml"; 15 | bool webcam = false; // Take inputs from webcam 16 | std::vector input; // A list of space separated input images 17 | // or a single glob pattern such as 'directory/*.jpg' 18 | CfgNode::OptionList opts; // Modify config options using the command-line 'KEY VALUE' pairs 19 | float confidence_threshold = 0.5; // Minimum score for instance predictions to be shown 20 | }; 21 | static void start(const Options& options); 22 | 23 | static CfgNode setup_cfg(const std::string& config_file, const CfgNode::OptionList& opts, 24 | float confidence_threshold); 25 | 26 | void run_train(); 27 | public: 28 | Trainer(const CfgNode& cfg, ColorMode instance_mode = ColorMode::kIMAGE, bool parallel = false); 29 | private: 30 | Metadata m_metadata; 31 | torch::Device m_cpu_device; 32 | ColorMode m_instance_mode; 33 | bool m_parallel; 34 | std::shared_ptr m_TrainerBase; 35 | }; 36 | } -------------------------------------------------------------------------------- /Detectron2_Project.vcxproj.filters: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | {4FC737F1-C7A5-4376-A066-2A32D752A2FF} 6 | cpp;c;cc;cxx;c++;cppm;ixx;def;odl;idl;hpj;bat;asm;asmx 7 | 8 | 9 | {93995380-89BD-4b04-88EB-625FBE52EBFB} 10 | h;hh;hpp;hxx;h++;hm;inl;inc;ipp;xsd 11 | 12 | 13 | {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} 14 | rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms 15 | 16 | 17 | 18 | 19 | 源文件 20 | 21 | 22 | -------------------------------------------------------------------------------- /Detectron2_Project.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | -------------------------------------------------------------------------------- /Detectron2_test.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "assert.h" 4 | using namespace Detectron2; 5 | using namespace std; 6 | 7 | void demo() { 8 | int selected = 0; // <-- change this number to choose different demo 9 | 10 | static const char* models[] = { 11 | //"COCO-Detection/faster_rcnn_R_50_FPN_3x/137851257/model_final_f6e8b1.pkl" 12 | //"COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl", 13 | "COCO-Detection/faster_rcnn_R_101_FPN_3x/137851257/model_final_f6e8b1.pkl" 14 | //"COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x/138205316/model_final_a3ec72.pkl", 15 | //"COCO-Keypoints/keypoint_rcnn_R_101_FPN_3x/138363331/model_final_997cc7.pkl", 16 | //"COCO-PanopticSegmentation/panoptic_fpn_R_101_3x/139514519/model_final_cafdb1.pkl" 17 | }; 18 | string model = models[selected]; 19 | auto tokens = tokenize(model, '/'); 20 | 21 | string configDir = "D:\\libtorch\\detectron2_project\\configs\\"; 22 | VisualizationDemo::Options options; 23 | options.config_file = File::ComposeFilename(configDir, tokens[0] + "\\" + tokens[1] + ".yaml"); 24 | vector m_file; 25 | cv::glob("F:\\data\\faster_rcnn\\images\\train\\",m_file); 26 | for (int i = 0;i< m_file.size();i++) 27 | { 28 | options.input.push_back(m_file[i]); 29 | } 30 | options.output = "D:\\libtorch\\detectron2_project\\output\\"; 31 | //options.output = "predict"; 32 | //options.opts = { {"MODEL.WEIGHTS", YAML::Node("detectron2://" + model) } }; 33 | //try { 34 | VisualizationDemo::start(options); 35 | //} 36 | //catch (const std::exception& e) { 37 | // const char* msg = e.what(); 38 | // std::cerr << msg; 39 | //} 40 | } 41 | 42 | int main() 43 | { 44 | demo(); 45 | } -------------------------------------------------------------------------------- /Detectron2_train.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "assert.h" 4 | using namespace Detectron2; 5 | using namespace std; 6 | 7 | void demo() { 8 | int selected = 0; // <-- change this number to choose different demo 9 | 10 | static const char* models[] = { 11 | //"COCO-Detection/faster_rcnn_R_50_FPN_3x/137851257/model_final_f6e8b1.pkl" 12 | //"COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl", 13 | "COCO-Detection/faster_rcnn_R_101_FPN_3x/137851257/model_final_f6e8b1.pkl" 14 | //"COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x/138205316/model_final_a3ec72.pkl", 15 | //"COCO-Keypoints/keypoint_rcnn_R_101_FPN_3x/138363331/model_final_997cc7.pkl", 16 | //"COCO-PanopticSegmentation/panoptic_fpn_R_101_3x/139514519/model_final_cafdb1.pkl" 17 | }; 18 | string model = models[selected]; 19 | auto tokens = tokenize(model, '/'); 20 | 21 | string configDir = "D:\\libtorch\\detectron2_project\\configs\\"; 22 | Trainer::Options options; 23 | options.config_file = File::ComposeFilename(configDir, tokens[0] + "\\" + tokens[1] + ".yaml"); 24 | 25 | //options.output = "predict"; 26 | options.opts = { {"MODEL.WEIGHTS", YAML::Node("detectron2://" + model) } }; 27 | //try { 28 | Trainer::start(options); 29 | //} 30 | //catch (const std::exception& e) { 31 | // const char* msg = e.what(); 32 | // std::cerr << msg; 33 | //} 34 | } 35 | 36 | int main() 37 | { 38 | demo(); 39 | } -------------------------------------------------------------------------------- /NetLib2/NetLib2.vcxproj.filters: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | {c57bc6e1-7389-4540-b7b0-083317f9900d} 6 | 7 | 8 | 9 | 10 | detectron2 11 | 12 | 13 | detectron2 14 | 15 | 16 | detectron2 17 | 18 | 19 | detectron2 20 | 21 | 22 | detectron2 23 | 24 | 25 | detectron2 26 | 27 | 28 | detectron2 29 | 30 | 31 | detectron2 32 | 33 | 34 | detectron2 35 | 36 | 37 | -------------------------------------------------------------------------------- /NetLib2/NetLib2.vcxproj.user: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | -------------------------------------------------------------------------------- /configs/Base-RCNN-C4.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | RPN: 4 | PRE_NMS_TOPK_TEST: 6000 5 | POST_NMS_TOPK_TEST: 1000 6 | ROI_HEADS: 7 | NAME: "Res5ROIHeads" 8 | DATASETS: 9 | TRAIN: ("coco_2017_train",) 10 | TEST: ("coco_2017_val",) 11 | SOLVER: 12 | IMS_PER_BATCH: 16 13 | BASE_LR: 0.02 14 | STEPS: (60000, 80000) 15 | MAX_ITER: 90000 16 | INPUT: 17 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) 18 | VERSION: 2 19 | -------------------------------------------------------------------------------- /configs/Base-RCNN-DilatedC5.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | RESNETS: 4 | OUT_FEATURES: ["res5"] 5 | RES5_DILATION: 2 6 | RPN: 7 | IN_FEATURES: ["res5"] 8 | PRE_NMS_TOPK_TEST: 6000 9 | POST_NMS_TOPK_TEST: 1000 10 | ROI_HEADS: 11 | NAME: "StandardROIHeads" 12 | IN_FEATURES: ["res5"] 13 | ROI_BOX_HEAD: 14 | NAME: "FastRCNNConvFCHead" 15 | NUM_FC: 2 16 | POOLER_RESOLUTION: 7 17 | ROI_MASK_HEAD: 18 | NAME: "MaskRCNNConvUpsampleHead" 19 | NUM_CONV: 4 20 | POOLER_RESOLUTION: 14 21 | DATASETS: 22 | TRAIN: ("coco_2017_train",) 23 | TEST: ("coco_2017_val",) 24 | SOLVER: 25 | IMS_PER_BATCH: 16 26 | BASE_LR: 0.02 27 | STEPS: (60000, 80000) 28 | MAX_ITER: 90000 29 | INPUT: 30 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) 31 | VERSION: 2 32 | -------------------------------------------------------------------------------- /configs/Base-RCNN-FPN.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "GeneralizedRCNN" 3 | BACKBONE: 4 | NAME: "build_resnet_fpn_backbone" 5 | RESNETS: 6 | OUT_FEATURES: ["res2", "res3", "res4", "res5"] 7 | FPN: 8 | IN_FEATURES: ["res2", "res3", "res4", "res5"] 9 | ANCHOR_GENERATOR: 10 | SIZES: [[32], [64], [128], [256], [512]] # One size for each in feature map 11 | ASPECT_RATIOS: [[0.5, 1.0, 2.0]] # Three aspect ratios (same for all in feature maps) 12 | RPN: 13 | IN_FEATURES: ["p2", "p3", "p4", "p5", "p6"] 14 | PRE_NMS_TOPK_TRAIN: 2000 # Per FPN level 15 | PRE_NMS_TOPK_TEST: 1000 # Per FPN level 16 | # Detectron1 uses 2000 proposals per-batch, 17 | # (See "modeling/rpn/rpn_outputs.py" for details of this legacy issue) 18 | # which is approximately 1000 proposals per-image since the default batch size for FPN is 2. 19 | POST_NMS_TOPK_TRAIN: 1000 20 | POST_NMS_TOPK_TEST: 1000 21 | ROI_HEADS: 22 | NAME: "StandardROIHeads" 23 | IN_FEATURES: ["p2", "p3", "p4", "p5"] 24 | NUM_CLASSES: 1 25 | ROI_BOX_HEAD: 26 | NAME: "FastRCNNConvFCHead" 27 | NUM_FC: 2 28 | POOLER_RESOLUTION: 7 29 | CLS_AGNOSTIC_BBOX_REG: True 30 | ROI_MASK_HEAD: 31 | NAME: "MaskRCNNConvUpsampleHead" 32 | NUM_CONV: 4 33 | POOLER_RESOLUTION: 14 34 | DATASETS: 35 | TRAIN: ("coco_2017_train",) 36 | TEST: ("coco_2017_val",) 37 | SOLVER: 38 | IMS_PER_BATCH: 4 39 | BASE_LR: 0.002 40 | STEPS: (60000, 80000) 41 | MAX_ITER: 90000 42 | INPUT: 43 | MAX_SIZE_TRAIN: 608 44 | MAX_SIZE_TEST: 608 45 | MIN_SIZE_TRAIN: (608, 608) 46 | MIN_SIZE_TEST: 608 47 | -------------------------------------------------------------------------------- /configs/Base-RetinaNet.yaml: -------------------------------------------------------------------------------- 1 | MODEL: 2 | META_ARCHITECTURE: "RetinaNet" 3 | BACKBONE: 4 | NAME: "build_retinanet_resnet_fpn_backbone" 5 | RESNETS: 6 | OUT_FEATURES: ["res3", "res4", "res5"] 7 | ANCHOR_GENERATOR: 8 | SIZES: !!python/object/apply:eval ["[[x, x * 2**(1.0/3), x * 2**(2.0/3) ] for x in [32, 64, 128, 256, 512 ]]"] 9 | FPN: 10 | IN_FEATURES: ["res3", "res4", "res5"] 11 | RETINANET: 12 | IOU_THRESHOLDS: [0.4, 0.5] 13 | IOU_LABELS: [0, -1, 1] 14 | SMOOTH_L1_LOSS_BETA: 0.0 15 | DATASETS: 16 | TRAIN: ("coco_2017_train",) 17 | TEST: ("coco_2017_val",) 18 | SOLVER: 19 | IMS_PER_BATCH: 16 20 | BASE_LR: 0.01 # Note that RetinaNet uses a different default learning rate 21 | STEPS: (60000, 80000) 22 | MAX_ITER: 90000 23 | INPUT: 24 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) 25 | VERSION: 2 26 | -------------------------------------------------------------------------------- /configs/COCO-Detection/fast_rcnn_R_50_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: False 5 | LOAD_PROPOSALS: True 6 | RESNETS: 7 | DEPTH: 50 8 | PROPOSAL_GENERATOR: 9 | NAME: "PrecomputedProposals" 10 | DATASETS: 11 | TRAIN: ("coco_2017_train",) 12 | PROPOSAL_FILES_TRAIN: ("detectron2://COCO-Detection/rpn_R_50_FPN_1x/137258492/coco_2017_train_box_proposals_21bc3a.pkl", ) 13 | TEST: ("coco_2017_val",) 14 | PROPOSAL_FILES_TEST: ("detectron2://COCO-Detection/rpn_R_50_FPN_1x/137258492/coco_2017_val_box_proposals_ee0dad.pkl", ) 15 | DATALOADER: 16 | # proposals are part of the dataset_dicts, and take a lot of RAM 17 | NUM_WORKERS: 2 18 | -------------------------------------------------------------------------------- /configs/COCO-Detection/faster_rcnn_R_101_C4_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-C4.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" 4 | MASK_ON: False 5 | RESNETS: 6 | DEPTH: 101 7 | SOLVER: 8 | STEPS: (210000, 250000) 9 | MAX_ITER: 270000 10 | -------------------------------------------------------------------------------- /configs/COCO-Detection/faster_rcnn_R_101_DC5_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-DilatedC5.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" 4 | MASK_ON: False 5 | RESNETS: 6 | DEPTH: 101 7 | SOLVER: 8 | STEPS: (210000, 250000) 9 | MAX_ITER: 270000 10 | -------------------------------------------------------------------------------- /configs/COCO-Detection/faster_rcnn_R_101_FPN_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "F:\\data\\centernetv2_data\\weight\\epoch_300.pt" 4 | MASK_ON: False 5 | RESNETS: 6 | DEPTH: 101 7 | DATASETS: 8 | TRAIN: ("F:\\data\\centernetv2_data\\annotations\\train.json","F:\\data\\centernetv2_data\\images\\train\\",) 9 | TEST: ("coco_2017_val",) 10 | SOLVER: 11 | STEPS: (10000,14000,18000,) 12 | MAX_ITER: 20000 13 | WARMUP_ITERS: 4000 14 | WARMUP_FACTOR: 0.001 15 | VERSION: 2 16 | -------------------------------------------------------------------------------- /configs/COCO-Detection/faster_rcnn_R_50_C4_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-C4.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: False 5 | RESNETS: 6 | DEPTH: 50 7 | -------------------------------------------------------------------------------- /configs/COCO-Detection/faster_rcnn_R_50_C4_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-C4.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: False 5 | RESNETS: 6 | DEPTH: 50 7 | SOLVER: 8 | STEPS: (210000, 250000) 9 | MAX_ITER: 270000 10 | -------------------------------------------------------------------------------- /configs/COCO-Detection/faster_rcnn_R_50_DC5_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-DilatedC5.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: False 5 | RESNETS: 6 | DEPTH: 50 7 | -------------------------------------------------------------------------------- /configs/COCO-Detection/faster_rcnn_R_50_DC5_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-DilatedC5.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: False 5 | RESNETS: 6 | DEPTH: 50 7 | SOLVER: 8 | STEPS: (210000, 250000) 9 | MAX_ITER: 270000 10 | -------------------------------------------------------------------------------- /configs/COCO-Detection/faster_rcnn_R_50_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: False 5 | RESNETS: 6 | DEPTH: 50 7 | -------------------------------------------------------------------------------- /configs/COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: False 5 | RESNETS: 6 | DEPTH: 50 7 | SOLVER: 8 | STEPS: (210000, 250000) 9 | MAX_ITER: 270000 10 | -------------------------------------------------------------------------------- /configs/COCO-Detection/faster_rcnn_X_101_32x8d_FPN_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | MASK_ON: False 4 | WEIGHTS: "detectron2://ImageNetPretrained/FAIR/X-101-32x8d.pkl" 5 | PIXEL_STD: [57.375, 57.120, 58.395] 6 | RESNETS: 7 | STRIDE_IN_1X1: False # this is a C2 model 8 | NUM_GROUPS: 32 9 | WIDTH_PER_GROUP: 8 10 | DEPTH: 101 11 | SOLVER: 12 | STEPS: (210000, 250000) 13 | MAX_ITER: 270000 14 | -------------------------------------------------------------------------------- /configs/COCO-Detection/retinanet_R_101_FPN_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RetinaNet.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" 4 | RESNETS: 5 | DEPTH: 101 6 | SOLVER: 7 | STEPS: (210000, 250000) 8 | MAX_ITER: 270000 9 | -------------------------------------------------------------------------------- /configs/COCO-Detection/retinanet_R_50_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RetinaNet.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | RESNETS: 5 | DEPTH: 50 6 | -------------------------------------------------------------------------------- /configs/COCO-Detection/retinanet_R_50_FPN_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RetinaNet.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | RESNETS: 5 | DEPTH: 50 6 | SOLVER: 7 | STEPS: (210000, 250000) 8 | MAX_ITER: 270000 9 | -------------------------------------------------------------------------------- /configs/COCO-Detection/rpn_R_50_C4_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-C4.yaml" 2 | MODEL: 3 | META_ARCHITECTURE: "ProposalNetwork" 4 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 5 | MASK_ON: False 6 | RESNETS: 7 | DEPTH: 50 8 | RPN: 9 | PRE_NMS_TOPK_TEST: 12000 10 | POST_NMS_TOPK_TEST: 2000 11 | -------------------------------------------------------------------------------- /configs/COCO-Detection/rpn_R_50_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | META_ARCHITECTURE: "ProposalNetwork" 4 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 5 | MASK_ON: False 6 | RESNETS: 7 | DEPTH: 50 8 | RPN: 9 | POST_NMS_TOPK_TEST: 2000 10 | -------------------------------------------------------------------------------- /configs/COCO-InstanceSegmentation/mask_rcnn_R_101_C4_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-C4.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" 4 | MASK_ON: True 5 | RESNETS: 6 | DEPTH: 101 7 | SOLVER: 8 | STEPS: (210000, 250000) 9 | MAX_ITER: 270000 10 | -------------------------------------------------------------------------------- /configs/COCO-InstanceSegmentation/mask_rcnn_R_101_DC5_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-DilatedC5.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" 4 | MASK_ON: True 5 | RESNETS: 6 | DEPTH: 101 7 | SOLVER: 8 | STEPS: (210000, 250000) 9 | MAX_ITER: 270000 10 | -------------------------------------------------------------------------------- /configs/COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" 4 | MASK_ON: True 5 | RESNETS: 6 | DEPTH: 101 7 | SOLVER: 8 | STEPS: (210000, 250000) 9 | MAX_ITER: 270000 10 | -------------------------------------------------------------------------------- /configs/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-C4.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: True 5 | RESNETS: 6 | DEPTH: 50 7 | -------------------------------------------------------------------------------- /configs/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-C4.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: True 5 | RESNETS: 6 | DEPTH: 50 7 | SOLVER: 8 | STEPS: (210000, 250000) 9 | MAX_ITER: 270000 10 | -------------------------------------------------------------------------------- /configs/COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-DilatedC5.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: True 5 | RESNETS: 6 | DEPTH: 50 7 | -------------------------------------------------------------------------------- /configs/COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-DilatedC5.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: True 5 | RESNETS: 6 | DEPTH: 50 7 | SOLVER: 8 | STEPS: (210000, 250000) 9 | MAX_ITER: 270000 10 | -------------------------------------------------------------------------------- /configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: True 5 | RESNETS: 6 | DEPTH: 50 7 | -------------------------------------------------------------------------------- /configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x_giou.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: True 5 | RESNETS: 6 | DEPTH: 50 7 | RPN: 8 | BBOX_REG_LOSS_TYPE: "giou" 9 | BBOX_REG_LOSS_WEIGHT: 2.0 10 | ROI_BOX_HEAD: 11 | BBOX_REG_LOSS_TYPE: "giou" 12 | BBOX_REG_LOSS_WEIGHT: 10.0 13 | -------------------------------------------------------------------------------- /configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: True 5 | RESNETS: 6 | DEPTH: 50 7 | SOLVER: 8 | STEPS: (210000, 250000) 9 | MAX_ITER: 270000 10 | -------------------------------------------------------------------------------- /configs/COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | MASK_ON: True 4 | WEIGHTS: "detectron2://ImageNetPretrained/FAIR/X-101-32x8d.pkl" 5 | PIXEL_STD: [57.375, 57.120, 58.395] 6 | RESNETS: 7 | STRIDE_IN_1X1: False # this is a C2 model 8 | NUM_GROUPS: 32 9 | WIDTH_PER_GROUP: 8 10 | DEPTH: 101 11 | SOLVER: 12 | STEPS: (210000, 250000) 13 | MAX_ITER: 270000 14 | -------------------------------------------------------------------------------- /configs/COCO-Keypoints/Base-Keypoint-RCNN-FPN.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | KEYPOINT_ON: True 4 | ROI_HEADS: 5 | NUM_CLASSES: 1 6 | ROI_BOX_HEAD: 7 | SMOOTH_L1_BETA: 0.5 # Keypoint AP degrades (though box AP improves) when using plain L1 loss 8 | RPN: 9 | # Detectron1 uses 2000 proposals per-batch, but this option is per-image in detectron2. 10 | # 1000 proposals per-image is found to hurt box AP. 11 | # Therefore we increase it to 1500 per-image. 12 | POST_NMS_TOPK_TRAIN: 1500 13 | DATASETS: 14 | TRAIN: ("keypoints_coco_2017_train",) 15 | TEST: ("keypoints_coco_2017_val",) 16 | -------------------------------------------------------------------------------- /configs/COCO-Keypoints/keypoint_rcnn_R_101_FPN_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-Keypoint-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" 4 | RESNETS: 5 | DEPTH: 101 6 | SOLVER: 7 | STEPS: (210000, 250000) 8 | MAX_ITER: 270000 9 | -------------------------------------------------------------------------------- /configs/COCO-Keypoints/keypoint_rcnn_R_50_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-Keypoint-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | RESNETS: 5 | DEPTH: 50 6 | -------------------------------------------------------------------------------- /configs/COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-Keypoint-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | RESNETS: 5 | DEPTH: 50 6 | SOLVER: 7 | STEPS: (210000, 250000) 8 | MAX_ITER: 270000 9 | -------------------------------------------------------------------------------- /configs/COCO-Keypoints/keypoint_rcnn_X_101_32x8d_FPN_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-Keypoint-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/FAIR/X-101-32x8d.pkl" 4 | PIXEL_STD: [57.375, 57.120, 58.395] 5 | RESNETS: 6 | STRIDE_IN_1X1: False # this is a C2 model 7 | NUM_GROUPS: 32 8 | WIDTH_PER_GROUP: 8 9 | DEPTH: 101 10 | SOLVER: 11 | STEPS: (210000, 250000) 12 | MAX_ITER: 270000 13 | -------------------------------------------------------------------------------- /configs/COCO-PanopticSegmentation/Base-Panoptic-FPN.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | META_ARCHITECTURE: "PanopticFPN" 4 | MASK_ON: True 5 | SEM_SEG_HEAD: 6 | LOSS_WEIGHT: 0.5 7 | DATASETS: 8 | TRAIN: ("coco_2017_train_panoptic_separated",) 9 | TEST: ("coco_2017_val_panoptic_separated",) 10 | DATALOADER: 11 | FILTER_EMPTY_ANNOTATIONS: False 12 | -------------------------------------------------------------------------------- /configs/COCO-PanopticSegmentation/panoptic_fpn_R_101_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-Panoptic-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" 4 | RESNETS: 5 | DEPTH: 101 6 | SOLVER: 7 | STEPS: (210000, 250000) 8 | MAX_ITER: 270000 9 | -------------------------------------------------------------------------------- /configs/COCO-PanopticSegmentation/panoptic_fpn_R_50_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-Panoptic-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | RESNETS: 5 | DEPTH: 50 6 | -------------------------------------------------------------------------------- /configs/COCO-PanopticSegmentation/panoptic_fpn_R_50_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "Base-Panoptic-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | RESNETS: 5 | DEPTH: 50 6 | SOLVER: 7 | STEPS: (210000, 250000) 8 | MAX_ITER: 270000 9 | -------------------------------------------------------------------------------- /configs/Cityscapes/mask_rcnn_R_50_FPN.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | # WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | # For better, more stable performance initialize from COCO 5 | WEIGHTS: "detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl" 6 | MASK_ON: True 7 | ROI_HEADS: 8 | NUM_CLASSES: 8 9 | # This is similar to the setting used in Mask R-CNN paper, Appendix A 10 | # But there are some differences, e.g., we did not initialize the output 11 | # layer using the corresponding classes from COCO 12 | INPUT: 13 | MIN_SIZE_TRAIN: (800, 832, 864, 896, 928, 960, 992, 1024) 14 | MIN_SIZE_TRAIN_SAMPLING: "choice" 15 | MIN_SIZE_TEST: 1024 16 | MAX_SIZE_TRAIN: 2048 17 | MAX_SIZE_TEST: 2048 18 | DATASETS: 19 | TRAIN: ("cityscapes_fine_instance_seg_train",) 20 | TEST: ("cityscapes_fine_instance_seg_val",) 21 | SOLVER: 22 | BASE_LR: 0.01 23 | STEPS: (18000,) 24 | MAX_ITER: 24000 25 | IMS_PER_BATCH: 8 26 | TEST: 27 | EVAL_PERIOD: 8000 28 | -------------------------------------------------------------------------------- /configs/Detectron1-Comparisons/faster_rcnn_R_50_FPN_noaug_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: False 5 | RESNETS: 6 | DEPTH: 50 7 | # Detectron1 uses smooth L1 loss with some magic beta values. 8 | # The defaults are changed to L1 loss in Detectron2. 9 | RPN: 10 | SMOOTH_L1_BETA: 0.1111 11 | ROI_BOX_HEAD: 12 | SMOOTH_L1_BETA: 1.0 13 | POOLER_SAMPLING_RATIO: 2 14 | POOLER_TYPE: "ROIAlign" 15 | INPUT: 16 | # no scale augmentation 17 | MIN_SIZE_TRAIN: (800, ) 18 | -------------------------------------------------------------------------------- /configs/Detectron1-Comparisons/keypoint_rcnn_R_50_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | KEYPOINT_ON: True 5 | RESNETS: 6 | DEPTH: 50 7 | ROI_HEADS: 8 | NUM_CLASSES: 1 9 | ROI_KEYPOINT_HEAD: 10 | POOLER_RESOLUTION: 14 11 | POOLER_SAMPLING_RATIO: 2 12 | POOLER_TYPE: "ROIAlign" 13 | # Detectron1 uses smooth L1 loss with some magic beta values. 14 | # The defaults are changed to L1 loss in Detectron2. 15 | ROI_BOX_HEAD: 16 | SMOOTH_L1_BETA: 1.0 17 | POOLER_SAMPLING_RATIO: 2 18 | POOLER_TYPE: "ROIAlign" 19 | RPN: 20 | SMOOTH_L1_BETA: 0.1111 21 | # Detectron1 uses 2000 proposals per-batch, but this option is per-image in detectron2 22 | # 1000 proposals per-image is found to hurt box AP. 23 | # Therefore we increase it to 1500 per-image. 24 | POST_NMS_TOPK_TRAIN: 1500 25 | DATASETS: 26 | TRAIN: ("keypoints_coco_2017_train",) 27 | TEST: ("keypoints_coco_2017_val",) 28 | -------------------------------------------------------------------------------- /configs/Detectron1-Comparisons/mask_rcnn_R_50_FPN_noaug_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: True 5 | RESNETS: 6 | DEPTH: 50 7 | # Detectron1 uses smooth L1 loss with some magic beta values. 8 | # The defaults are changed to L1 loss in Detectron2. 9 | RPN: 10 | SMOOTH_L1_BETA: 0.1111 11 | ROI_BOX_HEAD: 12 | SMOOTH_L1_BETA: 1.0 13 | POOLER_SAMPLING_RATIO: 2 14 | POOLER_TYPE: "ROIAlign" 15 | ROI_MASK_HEAD: 16 | POOLER_SAMPLING_RATIO: 2 17 | POOLER_TYPE: "ROIAlign" 18 | INPUT: 19 | # no scale augmentation 20 | MIN_SIZE_TRAIN: (800, ) 21 | -------------------------------------------------------------------------------- /configs/LVISv0.5-InstanceSegmentation/mask_rcnn_R_101_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" 4 | MASK_ON: True 5 | RESNETS: 6 | DEPTH: 101 7 | ROI_HEADS: 8 | NUM_CLASSES: 1230 9 | SCORE_THRESH_TEST: 0.0001 10 | INPUT: 11 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) 12 | DATASETS: 13 | TRAIN: ("lvis_v0.5_train",) 14 | TEST: ("lvis_v0.5_val",) 15 | TEST: 16 | DETECTIONS_PER_IMAGE: 300 # LVIS allows up to 300 17 | DATALOADER: 18 | SAMPLER_TRAIN: "RepeatFactorTrainingSampler" 19 | REPEAT_THRESHOLD: 0.001 20 | -------------------------------------------------------------------------------- /configs/LVISv0.5-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: True 5 | RESNETS: 6 | DEPTH: 50 7 | ROI_HEADS: 8 | NUM_CLASSES: 1230 9 | SCORE_THRESH_TEST: 0.0001 10 | INPUT: 11 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) 12 | DATASETS: 13 | TRAIN: ("lvis_v0.5_train",) 14 | TEST: ("lvis_v0.5_val",) 15 | TEST: 16 | DETECTIONS_PER_IMAGE: 300 # LVIS allows up to 300 17 | DATALOADER: 18 | SAMPLER_TRAIN: "RepeatFactorTrainingSampler" 19 | REPEAT_THRESHOLD: 0.001 20 | -------------------------------------------------------------------------------- /configs/LVISv0.5-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/FAIR/X-101-32x8d.pkl" 4 | PIXEL_STD: [57.375, 57.120, 58.395] 5 | MASK_ON: True 6 | RESNETS: 7 | STRIDE_IN_1X1: False # this is a C2 model 8 | NUM_GROUPS: 32 9 | WIDTH_PER_GROUP: 8 10 | DEPTH: 101 11 | ROI_HEADS: 12 | NUM_CLASSES: 1230 13 | SCORE_THRESH_TEST: 0.0001 14 | INPUT: 15 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) 16 | DATASETS: 17 | TRAIN: ("lvis_v0.5_train",) 18 | TEST: ("lvis_v0.5_val",) 19 | TEST: 20 | DETECTIONS_PER_IMAGE: 300 # LVIS allows up to 300 21 | DATALOADER: 22 | SAMPLER_TRAIN: "RepeatFactorTrainingSampler" 23 | REPEAT_THRESHOLD: 0.001 24 | -------------------------------------------------------------------------------- /configs/LVISv1-InstanceSegmentation/mask_rcnn_R_101_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl" 4 | MASK_ON: True 5 | RESNETS: 6 | DEPTH: 101 7 | ROI_HEADS: 8 | NUM_CLASSES: 1203 9 | SCORE_THRESH_TEST: 0.0001 10 | INPUT: 11 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) 12 | DATASETS: 13 | TRAIN: ("lvis_v1_train",) 14 | TEST: ("lvis_v1_val",) 15 | TEST: 16 | DETECTIONS_PER_IMAGE: 300 # LVIS allows up to 300 17 | SOLVER: 18 | STEPS: (120000, 160000) 19 | MAX_ITER: 180000 # 180000 * 16 / 100000 ~ 28.8 epochs 20 | DATALOADER: 21 | SAMPLER_TRAIN: "RepeatFactorTrainingSampler" 22 | REPEAT_THRESHOLD: 0.001 23 | -------------------------------------------------------------------------------- /configs/LVISv1-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: True 5 | RESNETS: 6 | DEPTH: 50 7 | ROI_HEADS: 8 | NUM_CLASSES: 1203 9 | SCORE_THRESH_TEST: 0.0001 10 | INPUT: 11 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) 12 | DATASETS: 13 | TRAIN: ("lvis_v1_train",) 14 | TEST: ("lvis_v1_val",) 15 | TEST: 16 | DETECTIONS_PER_IMAGE: 300 # LVIS allows up to 300 17 | SOLVER: 18 | STEPS: (120000, 160000) 19 | MAX_ITER: 180000 # 180000 * 16 / 100000 ~ 28.8 epochs 20 | DATALOADER: 21 | SAMPLER_TRAIN: "RepeatFactorTrainingSampler" 22 | REPEAT_THRESHOLD: 0.001 23 | -------------------------------------------------------------------------------- /configs/LVISv1-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/FAIR/X-101-32x8d.pkl" 4 | PIXEL_STD: [57.375, 57.120, 58.395] 5 | MASK_ON: True 6 | RESNETS: 7 | STRIDE_IN_1X1: False # this is a C2 model 8 | NUM_GROUPS: 32 9 | WIDTH_PER_GROUP: 8 10 | DEPTH: 101 11 | ROI_HEADS: 12 | NUM_CLASSES: 1203 13 | SCORE_THRESH_TEST: 0.0001 14 | INPUT: 15 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) 16 | DATASETS: 17 | TRAIN: ("lvis_v1_train",) 18 | TEST: ("lvis_v1_val",) 19 | SOLVER: 20 | STEPS: (120000, 160000) 21 | MAX_ITER: 180000 # 180000 * 16 / 100000 ~ 28.8 epochs 22 | TEST: 23 | DETECTIONS_PER_IMAGE: 300 # LVIS allows up to 300 24 | DATALOADER: 25 | SAMPLER_TRAIN: "RepeatFactorTrainingSampler" 26 | REPEAT_THRESHOLD: 0.001 27 | -------------------------------------------------------------------------------- /configs/Misc/cascade_mask_rcnn_R_50_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: True 5 | RESNETS: 6 | DEPTH: 50 7 | ROI_HEADS: 8 | NAME: CascadeROIHeads 9 | ROI_BOX_HEAD: 10 | CLS_AGNOSTIC_BBOX_REG: True 11 | RPN: 12 | POST_NMS_TOPK_TRAIN: 2000 13 | -------------------------------------------------------------------------------- /configs/Misc/cascade_mask_rcnn_R_50_FPN_3x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: True 5 | RESNETS: 6 | DEPTH: 50 7 | ROI_HEADS: 8 | NAME: CascadeROIHeads 9 | ROI_BOX_HEAD: 10 | CLS_AGNOSTIC_BBOX_REG: True 11 | RPN: 12 | POST_NMS_TOPK_TRAIN: 2000 13 | SOLVER: 14 | STEPS: (210000, 250000) 15 | MAX_ITER: 270000 16 | -------------------------------------------------------------------------------- /configs/Misc/cascade_mask_rcnn_X_152_32x8d_FPN_IN5k_gn_dconv.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | MASK_ON: True 4 | WEIGHTS: "catalog://ImageNetPretrained/FAIR/X-152-32x8d-IN5k" 5 | RESNETS: 6 | STRIDE_IN_1X1: False # this is a C2 model 7 | NUM_GROUPS: 32 8 | WIDTH_PER_GROUP: 8 9 | DEPTH: 152 10 | DEFORM_ON_PER_STAGE: [False, True, True, True] 11 | ROI_HEADS: 12 | NAME: "CascadeROIHeads" 13 | ROI_BOX_HEAD: 14 | NAME: "FastRCNNConvFCHead" 15 | NUM_CONV: 4 16 | NUM_FC: 1 17 | NORM: "GN" 18 | CLS_AGNOSTIC_BBOX_REG: True 19 | ROI_MASK_HEAD: 20 | NUM_CONV: 8 21 | NORM: "GN" 22 | RPN: 23 | POST_NMS_TOPK_TRAIN: 2000 24 | SOLVER: 25 | IMS_PER_BATCH: 128 26 | STEPS: (35000, 45000) 27 | MAX_ITER: 50000 28 | BASE_LR: 0.16 29 | INPUT: 30 | MIN_SIZE_TRAIN: (640, 864) 31 | MIN_SIZE_TRAIN_SAMPLING: "range" 32 | MAX_SIZE_TRAIN: 1440 33 | CROP: 34 | ENABLED: True 35 | TEST: 36 | EVAL_PERIOD: 2500 37 | -------------------------------------------------------------------------------- /configs/Misc/mask_rcnn_R_50_FPN_1x_cls_agnostic.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: True 5 | RESNETS: 6 | DEPTH: 50 7 | ROI_BOX_HEAD: 8 | CLS_AGNOSTIC_BBOX_REG: True 9 | ROI_MASK_HEAD: 10 | CLS_AGNOSTIC_MASK: True 11 | -------------------------------------------------------------------------------- /configs/Misc/mask_rcnn_R_50_FPN_1x_dconv_c3-c5.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: True 5 | RESNETS: 6 | DEPTH: 50 7 | DEFORM_ON_PER_STAGE: [False, True, True, True] # on Res3,Res4,Res5 8 | DEFORM_MODULATED: False 9 | -------------------------------------------------------------------------------- /configs/Misc/mask_rcnn_R_50_FPN_3x_dconv_c3-c5.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: True 5 | RESNETS: 6 | DEPTH: 50 7 | DEFORM_ON_PER_STAGE: [False, True, True, True] # on Res3,Res4,Res5 8 | DEFORM_MODULATED: False 9 | SOLVER: 10 | STEPS: (210000, 250000) 11 | MAX_ITER: 270000 12 | -------------------------------------------------------------------------------- /configs/Misc/mask_rcnn_R_50_FPN_3x_gn.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "catalog://ImageNetPretrained/FAIR/R-50-GN" 4 | MASK_ON: True 5 | RESNETS: 6 | DEPTH: 50 7 | NORM: "GN" 8 | STRIDE_IN_1X1: False 9 | FPN: 10 | NORM: "GN" 11 | ROI_BOX_HEAD: 12 | NAME: "FastRCNNConvFCHead" 13 | NUM_CONV: 4 14 | NUM_FC: 1 15 | NORM: "GN" 16 | ROI_MASK_HEAD: 17 | NORM: "GN" 18 | SOLVER: 19 | # 3x schedule 20 | STEPS: (210000, 250000) 21 | MAX_ITER: 270000 22 | -------------------------------------------------------------------------------- /configs/Misc/mask_rcnn_R_50_FPN_3x_syncbn.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: True 5 | RESNETS: 6 | DEPTH: 50 7 | NORM: "SyncBN" 8 | STRIDE_IN_1X1: True 9 | FPN: 10 | NORM: "SyncBN" 11 | ROI_BOX_HEAD: 12 | NAME: "FastRCNNConvFCHead" 13 | NUM_CONV: 4 14 | NUM_FC: 1 15 | NORM: "SyncBN" 16 | ROI_MASK_HEAD: 17 | NORM: "SyncBN" 18 | SOLVER: 19 | # 3x schedule 20 | STEPS: (210000, 250000) 21 | MAX_ITER: 270000 22 | TEST: 23 | PRECISE_BN: 24 | ENABLED: True 25 | -------------------------------------------------------------------------------- /configs/Misc/panoptic_fpn_R_101_dconv_cascade_gn_3x.yaml: -------------------------------------------------------------------------------- 1 | # A large PanopticFPN for demo purposes. 2 | # Use GN on backbone to support semantic seg. 3 | # Use Cascade + Deform Conv to improve localization. 4 | _BASE_: "../COCO-PanopticSegmentation/Base-Panoptic-FPN.yaml" 5 | MODEL: 6 | WEIGHTS: "catalog://ImageNetPretrained/FAIR/R-101-GN" 7 | RESNETS: 8 | DEPTH: 101 9 | NORM: "GN" 10 | DEFORM_ON_PER_STAGE: [False, True, True, True] 11 | STRIDE_IN_1X1: False 12 | FPN: 13 | NORM: "GN" 14 | ROI_HEADS: 15 | NAME: CascadeROIHeads 16 | ROI_BOX_HEAD: 17 | CLS_AGNOSTIC_BBOX_REG: True 18 | ROI_MASK_HEAD: 19 | NORM: "GN" 20 | RPN: 21 | POST_NMS_TOPK_TRAIN: 2000 22 | SOLVER: 23 | STEPS: (105000, 125000) 24 | MAX_ITER: 135000 25 | IMS_PER_BATCH: 32 26 | BASE_LR: 0.04 27 | -------------------------------------------------------------------------------- /configs/Misc/scratch_mask_rcnn_R_50_FPN_3x_gn.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "mask_rcnn_R_50_FPN_3x_gn.yaml" 2 | MODEL: 3 | # Train from random initialization. 4 | WEIGHTS: "" 5 | # It makes sense to divide by STD when training from scratch 6 | # But it seems to make no difference on the results and C2's models didn't do this. 7 | # So we keep things consistent with C2. 8 | # PIXEL_STD: [57.375, 57.12, 58.395] 9 | MASK_ON: True 10 | BACKBONE: 11 | FREEZE_AT: 0 12 | # NOTE: Please refer to Rethinking ImageNet Pre-training https://arxiv.org/abs/1811.08883 13 | # to learn what you need for training from scratch. 14 | -------------------------------------------------------------------------------- /configs/Misc/scratch_mask_rcnn_R_50_FPN_9x_gn.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "mask_rcnn_R_50_FPN_3x_gn.yaml" 2 | MODEL: 3 | PIXEL_STD: [57.375, 57.12, 58.395] 4 | WEIGHTS: "" 5 | MASK_ON: True 6 | RESNETS: 7 | STRIDE_IN_1X1: False 8 | BACKBONE: 9 | FREEZE_AT: 0 10 | SOLVER: 11 | # 9x schedule 12 | IMS_PER_BATCH: 64 # 4x the standard 13 | STEPS: (187500, 197500) # last 60/4==15k and last 20/4==5k 14 | MAX_ITER: 202500 # 90k * 9 / 4 15 | BASE_LR: 0.08 16 | TEST: 17 | EVAL_PERIOD: 2500 18 | # NOTE: Please refer to Rethinking ImageNet Pre-training https://arxiv.org/abs/1811.08883 19 | # to learn what you need for training from scratch. 20 | -------------------------------------------------------------------------------- /configs/Misc/scratch_mask_rcnn_R_50_FPN_9x_syncbn.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "mask_rcnn_R_50_FPN_3x_syncbn.yaml" 2 | MODEL: 3 | PIXEL_STD: [57.375, 57.12, 58.395] 4 | WEIGHTS: "" 5 | MASK_ON: True 6 | RESNETS: 7 | STRIDE_IN_1X1: False 8 | BACKBONE: 9 | FREEZE_AT: 0 10 | SOLVER: 11 | # 9x schedule 12 | IMS_PER_BATCH: 64 # 4x the standard 13 | STEPS: (187500, 197500) # last 60/4==15k and last 20/4==5k 14 | MAX_ITER: 202500 # 90k * 9 / 4 15 | BASE_LR: 0.08 16 | TEST: 17 | EVAL_PERIOD: 2500 18 | # NOTE: Please refer to Rethinking ImageNet Pre-training https://arxiv.org/abs/1811.08883 19 | # to learn what you need for training from scratch. 20 | -------------------------------------------------------------------------------- /configs/Misc/semantic_R_50_FPN_1x.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | META_ARCHITECTURE: "SemanticSegmentor" 4 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 5 | RESNETS: 6 | DEPTH: 50 7 | DATASETS: 8 | TRAIN: ("coco_2017_train_panoptic_stuffonly",) 9 | TEST: ("coco_2017_val_panoptic_stuffonly",) 10 | INPUT: 11 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) 12 | -------------------------------------------------------------------------------- /configs/PascalVOC-Detection/faster_rcnn_R_50_C4.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-C4.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: False 5 | RESNETS: 6 | DEPTH: 50 7 | ROI_HEADS: 8 | NUM_CLASSES: 20 9 | INPUT: 10 | MIN_SIZE_TRAIN: (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800) 11 | MIN_SIZE_TEST: 800 12 | DATASETS: 13 | TRAIN: ('voc_2007_trainval', 'voc_2012_trainval') 14 | TEST: ('voc_2007_test',) 15 | SOLVER: 16 | STEPS: (12000, 16000) 17 | MAX_ITER: 18000 # 17.4 epochs 18 | WARMUP_ITERS: 100 19 | -------------------------------------------------------------------------------- /configs/PascalVOC-Detection/faster_rcnn_R_50_FPN.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: False 5 | RESNETS: 6 | DEPTH: 50 7 | ROI_HEADS: 8 | NUM_CLASSES: 20 9 | INPUT: 10 | MIN_SIZE_TRAIN: (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800) 11 | MIN_SIZE_TEST: 800 12 | DATASETS: 13 | TRAIN: ('voc_2007_trainval', 'voc_2012_trainval') 14 | TEST: ('voc_2007_test',) 15 | SOLVER: 16 | STEPS: (12000, 16000) 17 | MAX_ITER: 18000 # 17.4 epochs 18 | WARMUP_ITERS: 100 19 | -------------------------------------------------------------------------------- /configs/quick_schedules/cascade_mask_rcnn_R_50_FPN_inference_acc_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Misc/cascade_mask_rcnn_R_50_FPN_3x.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://Misc/cascade_mask_rcnn_R_50_FPN_3x/144998488/model_final_480dd8.pkl" 4 | DATASETS: 5 | TEST: ("coco_2017_val_100",) 6 | TEST: 7 | EXPECTED_RESULTS: [["bbox", "AP", 50.18, 0.02], ["segm", "AP", 43.87, 0.02]] 8 | -------------------------------------------------------------------------------- /configs/quick_schedules/cascade_mask_rcnn_R_50_FPN_instant_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Misc/cascade_mask_rcnn_R_50_FPN_3x.yaml" 2 | DATASETS: 3 | TRAIN: ("coco_2017_val_100",) 4 | TEST: ("coco_2017_val_100",) 5 | SOLVER: 6 | BASE_LR: 0.005 7 | STEPS: (30,) 8 | MAX_ITER: 40 9 | IMS_PER_BATCH: 4 10 | DATALOADER: 11 | NUM_WORKERS: 2 12 | -------------------------------------------------------------------------------- /configs/quick_schedules/fast_rcnn_R_50_FPN_inference_acc_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../COCO-Detection/fast_rcnn_R_50_FPN_1x.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://COCO-Detection/fast_rcnn_R_50_FPN_1x/137635226/model_final_e5f7ce.pkl" 4 | DATASETS: 5 | TEST: ("coco_2017_val_100",) 6 | TEST: 7 | EXPECTED_RESULTS: [["bbox", "AP", 45.70, 0.02]] 8 | -------------------------------------------------------------------------------- /configs/quick_schedules/fast_rcnn_R_50_FPN_instant_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../COCO-Detection/fast_rcnn_R_50_FPN_1x.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | DATASETS: 5 | TRAIN: ("coco_2017_val_100",) 6 | PROPOSAL_FILES_TRAIN: ("detectron2://COCO-Detection/rpn_R_50_FPN_1x/137258492/coco_2017_val_box_proposals_ee0dad.pkl", ) 7 | TEST: ("coco_2017_val_100",) 8 | PROPOSAL_FILES_TEST: ("detectron2://COCO-Detection/rpn_R_50_FPN_1x/137258492/coco_2017_val_box_proposals_ee0dad.pkl", ) 9 | SOLVER: 10 | BASE_LR: 0.005 11 | STEPS: (30,) 12 | MAX_ITER: 40 13 | IMS_PER_BATCH: 4 14 | DATALOADER: 15 | NUM_WORKERS: 2 16 | -------------------------------------------------------------------------------- /configs/quick_schedules/keypoint_rcnn_R_50_FPN_inference_acc_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x/137849621/model_final_a6e10b.pkl" 4 | DATASETS: 5 | TEST: ("keypoints_coco_2017_val_100",) 6 | TEST: 7 | EXPECTED_RESULTS: [["bbox", "AP", 52.47, 0.02], ["keypoints", "AP", 67.36, 0.02]] 8 | -------------------------------------------------------------------------------- /configs/quick_schedules/keypoint_rcnn_R_50_FPN_instant_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | KEYPOINT_ON: True 5 | ROI_HEADS: 6 | NUM_CLASSES: 1 7 | DATASETS: 8 | TRAIN: ("keypoints_coco_2017_val_100",) 9 | TEST: ("keypoints_coco_2017_val_100",) 10 | SOLVER: 11 | BASE_LR: 0.005 12 | STEPS: (30,) 13 | MAX_ITER: 40 14 | IMS_PER_BATCH: 4 15 | DATALOADER: 16 | NUM_WORKERS: 2 17 | -------------------------------------------------------------------------------- /configs/quick_schedules/keypoint_rcnn_R_50_FPN_normalized_training_acc_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | KEYPOINT_ON: True 5 | RESNETS: 6 | DEPTH: 50 7 | ROI_HEADS: 8 | BATCH_SIZE_PER_IMAGE: 256 9 | NUM_CLASSES: 1 10 | ROI_KEYPOINT_HEAD: 11 | POOLER_RESOLUTION: 14 12 | POOLER_SAMPLING_RATIO: 2 13 | NORMALIZE_LOSS_BY_VISIBLE_KEYPOINTS: False 14 | LOSS_WEIGHT: 4.0 15 | ROI_BOX_HEAD: 16 | SMOOTH_L1_BETA: 1.0 # Keypoint AP degrades when using plain L1 loss 17 | RPN: 18 | SMOOTH_L1_BETA: 0.2 # Keypoint AP degrades when using plain L1 loss 19 | DATASETS: 20 | TRAIN: ("keypoints_coco_2017_val",) 21 | TEST: ("keypoints_coco_2017_val",) 22 | INPUT: 23 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) 24 | SOLVER: 25 | WARMUP_FACTOR: 0.33333333 26 | WARMUP_ITERS: 100 27 | STEPS: (5500, 5800) 28 | MAX_ITER: 6000 29 | TEST: 30 | EXPECTED_RESULTS: [["bbox", "AP", 55.35, 1.0], ["keypoints", "AP", 76.91, 1.0]] 31 | -------------------------------------------------------------------------------- /configs/quick_schedules/keypoint_rcnn_R_50_FPN_training_acc_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | KEYPOINT_ON: True 5 | RESNETS: 6 | DEPTH: 50 7 | ROI_HEADS: 8 | BATCH_SIZE_PER_IMAGE: 256 9 | NUM_CLASSES: 1 10 | ROI_KEYPOINT_HEAD: 11 | POOLER_RESOLUTION: 14 12 | POOLER_SAMPLING_RATIO: 2 13 | ROI_BOX_HEAD: 14 | SMOOTH_L1_BETA: 1.0 # Keypoint AP degrades when using plain L1 loss 15 | RPN: 16 | SMOOTH_L1_BETA: 0.2 # Keypoint AP degrades when using plain L1 loss 17 | DATASETS: 18 | TRAIN: ("keypoints_coco_2017_val",) 19 | TEST: ("keypoints_coco_2017_val",) 20 | INPUT: 21 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) 22 | SOLVER: 23 | WARMUP_FACTOR: 0.33333333 24 | WARMUP_ITERS: 100 25 | STEPS: (5500, 5800) 26 | MAX_ITER: 6000 27 | TEST: 28 | EXPECTED_RESULTS: [["bbox", "AP", 53.5, 1.0], ["keypoints", "AP", 72.4, 1.0]] 29 | -------------------------------------------------------------------------------- /configs/quick_schedules/mask_rcnn_R_50_C4_GCV_instant_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-C4.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: True 5 | DATASETS: 6 | TRAIN: ("coco_2017_val_100",) 7 | TEST: ("coco_2017_val_100",) 8 | SOLVER: 9 | BASE_LR: 0.001 10 | STEPS: (30,) 11 | MAX_ITER: 40 12 | IMS_PER_BATCH: 4 13 | CLIP_GRADIENTS: 14 | ENABLED: True 15 | CLIP_TYPE: "value" 16 | CLIP_VALUE: 1.0 17 | DATALOADER: 18 | NUM_WORKERS: 2 19 | -------------------------------------------------------------------------------- /configs/quick_schedules/mask_rcnn_R_50_C4_inference_acc_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../COCO-InstanceSegmentation/mask_rcnn_R_50_C4_3x.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_C4_3x/137849525/model_final_4ce675.pkl" 4 | DATASETS: 5 | TEST: ("coco_2017_val_100",) 6 | TEST: 7 | EXPECTED_RESULTS: [["bbox", "AP", 47.37, 0.02], ["segm", "AP", 40.99, 0.02]] 8 | -------------------------------------------------------------------------------- /configs/quick_schedules/mask_rcnn_R_50_C4_instant_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-C4.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: True 5 | DATASETS: 6 | TRAIN: ("coco_2017_val_100",) 7 | TEST: ("coco_2017_val_100",) 8 | SOLVER: 9 | BASE_LR: 0.001 10 | STEPS: (30,) 11 | MAX_ITER: 40 12 | IMS_PER_BATCH: 4 13 | DATALOADER: 14 | NUM_WORKERS: 2 15 | -------------------------------------------------------------------------------- /configs/quick_schedules/mask_rcnn_R_50_C4_training_acc_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-C4.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | ROI_HEADS: 5 | BATCH_SIZE_PER_IMAGE: 256 6 | MASK_ON: True 7 | DATASETS: 8 | TRAIN: ("coco_2017_val",) 9 | TEST: ("coco_2017_val",) 10 | INPUT: 11 | MIN_SIZE_TRAIN: (600,) 12 | MAX_SIZE_TRAIN: 1000 13 | MIN_SIZE_TEST: 800 14 | MAX_SIZE_TEST: 1000 15 | SOLVER: 16 | IMS_PER_BATCH: 8 # base uses 16 17 | WARMUP_FACTOR: 0.33333 18 | WARMUP_ITERS: 100 19 | STEPS: (11000, 11600) 20 | MAX_ITER: 12000 21 | TEST: 22 | EXPECTED_RESULTS: [["bbox", "AP", 41.88, 0.7], ["segm", "AP", 33.79, 0.5]] 23 | -------------------------------------------------------------------------------- /configs/quick_schedules/mask_rcnn_R_50_DC5_inference_acc_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_3x.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_3x/137849551/model_final_84107b.pkl" 4 | DATASETS: 5 | TEST: ("coco_2017_val_100",) 6 | TEST: 7 | EXPECTED_RESULTS: [["bbox", "AP", 47.44, 0.02], ["segm", "AP", 42.94, 0.02]] 8 | -------------------------------------------------------------------------------- /configs/quick_schedules/mask_rcnn_R_50_FPN_inference_acc_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl" 4 | DATASETS: 5 | TEST: ("coco_2017_val_100",) 6 | TEST: 7 | EXPECTED_RESULTS: [["bbox", "AP", 47.34, 0.02], ["segm", "AP", 42.67, 0.02], ["bbox_TTA", "AP", 49.11, 0.02], ["segm_TTA", "AP", 45.04, 0.02]] 8 | AUG: 9 | ENABLED: True 10 | MIN_SIZES: (700, 800) # to save some time 11 | -------------------------------------------------------------------------------- /configs/quick_schedules/mask_rcnn_R_50_FPN_instant_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | MASK_ON: True 5 | DATASETS: 6 | TRAIN: ("coco_2017_val_100",) 7 | TEST: ("coco_2017_val_100",) 8 | SOLVER: 9 | BASE_LR: 0.005 10 | STEPS: (30,) 11 | MAX_ITER: 40 12 | IMS_PER_BATCH: 4 13 | DATALOADER: 14 | NUM_WORKERS: 2 15 | -------------------------------------------------------------------------------- /configs/quick_schedules/mask_rcnn_R_50_FPN_pred_boxes_training_acc_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "./mask_rcnn_R_50_FPN_training_acc_test.yaml" 2 | MODEL: 3 | ROI_BOX_HEAD: 4 | TRAIN_ON_PRED_BOXES: True 5 | TEST: 6 | EXPECTED_RESULTS: [["bbox", "AP", 42.6, 1.0], ["segm", "AP", 35.8, 0.8]] 7 | -------------------------------------------------------------------------------- /configs/quick_schedules/mask_rcnn_R_50_FPN_training_acc_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | ROI_HEADS: 5 | BATCH_SIZE_PER_IMAGE: 256 6 | MASK_ON: True 7 | DATASETS: 8 | TRAIN: ("coco_2017_val",) 9 | TEST: ("coco_2017_val",) 10 | INPUT: 11 | MIN_SIZE_TRAIN: (600,) 12 | MAX_SIZE_TRAIN: 1000 13 | MIN_SIZE_TEST: 800 14 | MAX_SIZE_TEST: 1000 15 | SOLVER: 16 | WARMUP_FACTOR: 0.3333333 17 | WARMUP_ITERS: 100 18 | STEPS: (5500, 5800) 19 | MAX_ITER: 6000 20 | TEST: 21 | EXPECTED_RESULTS: [["bbox", "AP", 42.5, 1.0], ["segm", "AP", 35.8, 0.8]] 22 | -------------------------------------------------------------------------------- /configs/quick_schedules/panoptic_fpn_R_50_inference_acc_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../COCO-PanopticSegmentation/panoptic_fpn_R_50_3x.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://COCO-PanopticSegmentation/panoptic_fpn_R_50_3x/139514569/model_final_c10459.pkl" 4 | DATASETS: 5 | TEST: ("coco_2017_val_100_panoptic_separated",) 6 | TEST: 7 | EXPECTED_RESULTS: [["bbox", "AP", 46.47, 0.02], ["segm", "AP", 43.39, 0.02], ["sem_seg", "mIoU", 42.55, 0.02], ["panoptic_seg", "PQ", 38.99, 0.02]] 8 | -------------------------------------------------------------------------------- /configs/quick_schedules/panoptic_fpn_R_50_instant_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | META_ARCHITECTURE: "PanopticFPN" 4 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 5 | MASK_ON: True 6 | RESNETS: 7 | DEPTH: 50 8 | SEM_SEG_HEAD: 9 | LOSS_WEIGHT: 0.5 10 | DATASETS: 11 | TRAIN: ("coco_2017_val_100_panoptic_separated",) 12 | TEST: ("coco_2017_val_100_panoptic_separated",) 13 | SOLVER: 14 | BASE_LR: 0.005 15 | STEPS: (30,) 16 | MAX_ITER: 40 17 | IMS_PER_BATCH: 4 18 | DATALOADER: 19 | NUM_WORKERS: 1 20 | -------------------------------------------------------------------------------- /configs/quick_schedules/panoptic_fpn_R_50_training_acc_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | META_ARCHITECTURE: "PanopticFPN" 4 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 5 | MASK_ON: True 6 | RESNETS: 7 | DEPTH: 50 8 | SEM_SEG_HEAD: 9 | LOSS_WEIGHT: 0.5 10 | DATASETS: 11 | TRAIN: ("coco_2017_val_panoptic_separated",) 12 | TEST: ("coco_2017_val_panoptic_separated",) 13 | SOLVER: 14 | BASE_LR: 0.01 15 | WARMUP_FACTOR: 0.001 16 | WARMUP_ITERS: 500 17 | STEPS: (5500,) 18 | MAX_ITER: 7000 19 | TEST: 20 | EXPECTED_RESULTS: [["bbox", "AP", 46.70, 1.1], ["segm", "AP", 39.0, 0.7], ["sem_seg", "mIoU", 64.73, 1.3], ["panoptic_seg", "PQ", 48.13, 0.8]] 21 | -------------------------------------------------------------------------------- /configs/quick_schedules/retinanet_R_50_FPN_inference_acc_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../COCO-Detection/retinanet_R_50_FPN_3x.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://COCO-Detection/retinanet_R_50_FPN_3x/190397829/model_final_5bd44e.pkl" 4 | DATASETS: 5 | TEST: ("coco_2017_val_100",) 6 | TEST: 7 | EXPECTED_RESULTS: [["bbox", "AP", 44.45, 0.02]] 8 | -------------------------------------------------------------------------------- /configs/quick_schedules/retinanet_R_50_FPN_instant_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../COCO-Detection/retinanet_R_50_FPN_1x.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | DATASETS: 5 | TRAIN: ("coco_2017_val_100",) 6 | TEST: ("coco_2017_val_100",) 7 | SOLVER: 8 | BASE_LR: 0.005 9 | STEPS: (30,) 10 | MAX_ITER: 40 11 | IMS_PER_BATCH: 4 12 | DATALOADER: 13 | NUM_WORKERS: 2 14 | -------------------------------------------------------------------------------- /configs/quick_schedules/rpn_R_50_FPN_inference_acc_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../COCO-Detection/rpn_R_50_FPN_1x.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://COCO-Detection/rpn_R_50_FPN_1x/137258492/model_final_02ce48.pkl" 4 | DATASETS: 5 | TEST: ("coco_2017_val_100",) 6 | TEST: 7 | EXPECTED_RESULTS: [["box_proposals", "AR@1000", 58.16, 0.02]] 8 | -------------------------------------------------------------------------------- /configs/quick_schedules/rpn_R_50_FPN_instant_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../COCO-Detection/rpn_R_50_FPN_1x.yaml" 2 | MODEL: 3 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 4 | DATASETS: 5 | TRAIN: ("coco_2017_val_100",) 6 | TEST: ("coco_2017_val_100",) 7 | SOLVER: 8 | STEPS: (30,) 9 | MAX_ITER: 40 10 | BASE_LR: 0.005 11 | IMS_PER_BATCH: 4 12 | DATALOADER: 13 | NUM_WORKERS: 2 14 | -------------------------------------------------------------------------------- /configs/quick_schedules/semantic_R_50_FPN_inference_acc_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | META_ARCHITECTURE: "SemanticSegmentor" 4 | WEIGHTS: "detectron2://semantic_R_50_FPN_1x/111802073/model_final_c18079783c55a94968edc28b7101c5f0.pkl" 5 | RESNETS: 6 | DEPTH: 50 7 | DATASETS: 8 | TEST: ("coco_2017_val_100_panoptic_stuffonly",) 9 | TEST: 10 | EXPECTED_RESULTS: [["sem_seg", "mIoU", 39.53, 0.02], ["sem_seg", "mACC", 51.50, 0.02]] 11 | -------------------------------------------------------------------------------- /configs/quick_schedules/semantic_R_50_FPN_instant_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | META_ARCHITECTURE: "SemanticSegmentor" 4 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 5 | RESNETS: 6 | DEPTH: 50 7 | DATASETS: 8 | TRAIN: ("coco_2017_val_100_panoptic_stuffonly",) 9 | TEST: ("coco_2017_val_100_panoptic_stuffonly",) 10 | INPUT: 11 | MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) 12 | SOLVER: 13 | BASE_LR: 0.005 14 | STEPS: (30,) 15 | MAX_ITER: 40 16 | IMS_PER_BATCH: 4 17 | DATALOADER: 18 | NUM_WORKERS: 2 19 | -------------------------------------------------------------------------------- /configs/quick_schedules/semantic_R_50_FPN_training_acc_test.yaml: -------------------------------------------------------------------------------- 1 | _BASE_: "../Base-RCNN-FPN.yaml" 2 | MODEL: 3 | META_ARCHITECTURE: "SemanticSegmentor" 4 | WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl" 5 | RESNETS: 6 | DEPTH: 50 7 | DATASETS: 8 | TRAIN: ("coco_2017_val_panoptic_stuffonly",) 9 | TEST: ("coco_2017_val_panoptic_stuffonly",) 10 | SOLVER: 11 | BASE_LR: 0.01 12 | WARMUP_FACTOR: 0.001 13 | WARMUP_ITERS: 300 14 | STEPS: (5500,) 15 | MAX_ITER: 7000 16 | TEST: 17 | EXPECTED_RESULTS: [["sem_seg", "mIoU", 76.51, 1.0], ["sem_seg", "mACC", 83.25, 1.0]] 18 | INPUT: 19 | # no scale augmentation 20 | MIN_SIZE_TRAIN: (800, ) 21 | -------------------------------------------------------------------------------- /output/result.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ttanzhiqiang/Detectron2_Project/91bf05ab3b0be0a8e20a244b2729f576160d6953/output/result.jpg -------------------------------------------------------------------------------- /output/weixin.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ttanzhiqiang/Detectron2_Project/91bf05ab3b0be0a8e20a244b2729f576160d6953/output/weixin.jpg --------------------------------------------------------------------------------