├── .gitattributes
├── Debug
    └── CfgDefaults.yaml
├── Detectron2
    ├── Base.cpp
    ├── Base.h
    ├── CfgDefaults.yaml
    ├── Data
    │   ├── BuiltinDataset.cpp
    │   ├── BuiltinDataset.h
    │   ├── BuiltinMeta.cpp
    │   ├── BuiltinMeta.h
    │   ├── MetadataCatalog.cpp
    │   ├── MetadataCatalog.h
    │   ├── ResizeShortestEdge.cpp
    │   ├── ResizeShortestEdge.h
    │   ├── ResizeTransform.cpp
    │   ├── ResizeTransform.h
    │   ├── Transform.cpp
    │   ├── Transform.h
    │   ├── TransformGen.cpp
    │   └── TransformGen.h
    ├── Detectron2.cpp
    ├── Detectron2.h
    ├── Detectron2.vcxproj
    ├── Detectron2.vcxproj.filters
    ├── Detectron2.vcxproj.user
    ├── Detectron2Includes.h
    ├── Import
    │   ├── Baseline
    │   │   ├── model_final_997cc7.cpp
    │   │   ├── model_final_a3ec72.cpp
    │   │   ├── model_final_cafdb1.cpp
    │   │   ├── model_final_f10217.cpp
    │   │   └── model_final_f6e8b1.cpp
    │   ├── ImportBaseline.py
    │   ├── ModelImporter.cpp
    │   └── ModelImporter.h
    ├── LICENSE
    ├── MetaArch
    │   ├── GeneralizedRCNN.cpp
    │   ├── GeneralizedRCNN.h
    │   ├── MetaArch.cpp
    │   ├── MetaArch.h
    │   ├── PanopticFPN.cpp
    │   ├── PanopticFPN.h
    │   ├── ProposalNetwork.cpp
    │   ├── ProposalNetwork.h
    │   ├── SemanticSegmentor.cpp
    │   └── SemanticSegmentor.h
    ├── Modules
    │   ├── Backbone.h
    │   ├── BatchNorm
    │   │   ├── BatchNorm.cpp
    │   │   ├── BatchNorm.h
    │   │   ├── BatchNorm2d.h
    │   │   ├── FrozenBatchNorm2d.cpp
    │   │   ├── FrozenBatchNorm2d.h
    │   │   ├── GroupNorm.h
    │   │   ├── NaiveSyncBatchNorm.cpp
    │   │   └── NaiveSyncBatchNorm.h
    │   ├── Conv
    │   │   ├── ConvBn2d.cpp
    │   │   ├── ConvBn2d.h
    │   │   ├── DeformConv.cpp
    │   │   ├── DeformConv.h
    │   │   ├── ModulatedDeformConv.cpp
    │   │   └── ModulatedDeformConv.h
    │   ├── FPN
    │   │   ├── FPN.cpp
    │   │   ├── FPN.h
    │   │   ├── LastLevelMaxPool.cpp
    │   │   ├── LastLevelMaxPool.h
    │   │   ├── LastLevelP6P7.cpp
    │   │   ├── LastLevelP6P7.h
    │   │   ├── SemSegFPNHead.cpp
    │   │   ├── SemSegFPNHead.h
    │   │   └── TopBlock.h
    │   ├── Opeartors
    │   │   ├── DeformConvOp.cpp
    │   │   ├── DeformConvOp.h
    │   │   ├── ModulatedDeformConvOp.cpp
    │   │   ├── ModulatedDeformConvOp.h
    │   │   ├── NewEmptyTensorOp.cpp
    │   │   └── NewEmptyTensorOp.h
    │   ├── ROIHeads
    │   │   ├── BaseKeypointRCNNHead.cpp
    │   │   ├── BaseKeypointRCNNHead.h
    │   │   ├── BaseMaskRCNNHead.cpp
    │   │   ├── BaseMaskRCNNHead.h
    │   │   ├── CascadeROIHeads.cpp
    │   │   ├── CascadeROIHeads.h
    │   │   ├── FastRCNNConvFCHead.cpp
    │   │   ├── FastRCNNConvFCHead.h
    │   │   ├── FastRCNNOutputLayers.cpp
    │   │   ├── FastRCNNOutputLayers.h
    │   │   ├── FastRCNNOutputs.cpp
    │   │   ├── FastRCNNOutputs.h
    │   │   ├── KRCNNConvDeconvUpsampleHead.cpp
    │   │   ├── KRCNNConvDeconvUpsampleHead.h
    │   │   ├── MaskRCNNConvUpsampleHead.cpp
    │   │   ├── MaskRCNNConvUpsampleHead.h
    │   │   ├── ROIHeads.cpp
    │   │   ├── ROIHeads.h
    │   │   ├── RROIHeads.cpp
    │   │   ├── RROIHeads.h
    │   │   ├── Res5ROIHeads.cpp
    │   │   ├── Res5ROIHeads.h
    │   │   ├── RotatedFastRCNNOutputLayers.cpp
    │   │   ├── RotatedFastRCNNOutputLayers.h
    │   │   ├── StandardROIHeads.cpp
    │   │   └── StandardROIHeads.h
    │   ├── ROIPooler
    │   │   ├── ROIAlign.cpp
    │   │   ├── ROIAlign.h
    │   │   ├── ROIAlignRotated.cpp
    │   │   ├── ROIAlignRotated.h
    │   │   ├── ROIPool.cpp
    │   │   ├── ROIPool.h
    │   │   ├── ROIPooler.cpp
    │   │   ├── ROIPooler.h
    │   │   └── ROIPoolerLevel.h
    │   ├── RPN
    │   │   ├── AnchorGenerator.cpp
    │   │   ├── AnchorGenerator.h
    │   │   ├── DefaultAnchorGenerator.cpp
    │   │   ├── DefaultAnchorGenerator.h
    │   │   ├── RPN.cpp
    │   │   ├── RPN.h
    │   │   ├── RPNOutputs.cpp
    │   │   ├── RPNOutputs.h
    │   │   ├── RRPN.cpp
    │   │   ├── RRPN.h
    │   │   ├── RotatedAnchorGenerator.cpp
    │   │   ├── RotatedAnchorGenerator.h
    │   │   ├── StandardRPNHead.cpp
    │   │   └── StandardRPNHead.h
    │   └── ResNet
    │   │   ├── BasicBlock.cpp
    │   │   ├── BasicBlock.h
    │   │   ├── BasicStem.cpp
    │   │   ├── BasicStem.h
    │   │   ├── BottleneckBlock.cpp
    │   │   ├── BottleneckBlock.h
    │   │   ├── CNNBlockBase.cpp
    │   │   ├── CNNBlockBase.h
    │   │   ├── DeformBottleneckBlock.cpp
    │   │   ├── DeformBottleneckBlock.h
    │   │   ├── ResNet.cpp
    │   │   └── ResNet.h
    ├── Structures
    │   ├── BitMasks.cpp
    │   ├── BitMasks.h
    │   ├── Box2BoxTransform.cpp
    │   ├── Box2BoxTransform.h
    │   ├── Boxes.cpp
    │   ├── Boxes.h
    │   ├── GenericMask.cpp
    │   ├── GenericMask.h
    │   ├── ImageList.cpp
    │   ├── ImageList.h
    │   ├── Instances.cpp
    │   ├── Instances.h
    │   ├── Keypoints.cpp
    │   ├── Keypoints.h
    │   ├── MaskOps.cpp
    │   ├── MaskOps.h
    │   ├── Masks.h
    │   ├── Matcher.cpp
    │   ├── Matcher.h
    │   ├── NMS.cpp
    │   ├── NMS.h
    │   ├── PanopticSegment.cpp
    │   ├── PanopticSegment.h
    │   ├── PolygonMasks.cpp
    │   ├── PolygonMasks.h
    │   ├── PostProcessing.cpp
    │   ├── PostProcessing.h
    │   ├── RotatedBoxes.cpp
    │   ├── RotatedBoxes.h
    │   ├── Sampling.cpp
    │   ├── Sampling.h
    │   ├── Sequence.cpp
    │   ├── Sequence.h
    │   ├── ShapeSpec.cpp
    │   └── ShapeSpec.h
    ├── Utils
    │   ├── AsyncPredictor.cpp
    │   ├── AsyncPredictor.h
    │   ├── Canvas.h
    │   ├── CfgNode.cpp
    │   ├── CfgNode.h
    │   ├── DefaultPredictor.cpp
    │   ├── DefaultPredictor.h
    │   ├── DefaultTrainer.cpp
    │   ├── DefaultTrainer.h
    │   ├── EventStorage.cpp
    │   ├── EventStorage.h
    │   ├── File.cpp
    │   ├── File.h
    │   ├── Predictor.h
    │   ├── Timer.cpp
    │   ├── Timer.h
    │   ├── TrainerBase.h
    │   ├── Utils.cpp
    │   ├── Utils.h
    │   ├── VideoAnalyzer.cpp
    │   ├── VideoAnalyzer.h
    │   ├── VideoVisualizer.cpp
    │   ├── VideoVisualizer.h
    │   ├── VisColor.cpp
    │   ├── VisColor.h
    │   ├── VisImage.cpp
    │   ├── VisImage.h
    │   ├── Visualizer.cpp
    │   ├── Visualizer.h
    │   ├── cvCanvas.cpp
    │   ├── cvCanvas.h
    │   ├── utils_train.cpp
    │   └── utils_train.hpp
    ├── VisualizationDemo.cpp
    ├── VisualizationDemo.h
    ├── coco
    │   ├── 1
    │   │   ├── data.cpp
    │   │   └── data.hpp
    │   ├── data.cpp
    │   ├── data.hpp
    │   ├── json.hpp
    │   ├── mask.cpp
    │   ├── mask.h
    │   ├── maskApi.c
    │   └── maskApi.h
    ├── detectron2
    │   ├── ROIAlign
    │   │   ├── ROIAlign.h
    │   │   ├── ROIAlign_cpu.cpp
    │   │   └── ROIAlign_cuda.cu
    │   ├── ROIAlignRotated
    │   │   ├── ROIAlignRotated.h
    │   │   ├── ROIAlignRotated_cpu.cpp
    │   │   └── ROIAlignRotated_cuda.cu
    │   ├── ROIPool
    │   │   ├── ROIPool.h
    │   │   ├── ROIPool_cpu.cpp
    │   │   └── ROIPool_cuda.cu
    │   ├── box_iou_rotated
    │   │   ├── box_iou_rotated.h
    │   │   ├── box_iou_rotated_cpu.cpp
    │   │   ├── box_iou_rotated_cuda.cu
    │   │   └── box_iou_rotated_utils.h
    │   ├── cuda_version.cu
    │   ├── deformable
    │   │   ├── deform_conv.h
    │   │   ├── deform_conv_cuda.cu
    │   │   └── deform_conv_cuda_kernel.cu
    │   ├── nms
    │   │   ├── cuda_helpers.h
    │   │   ├── nms.h
    │   │   ├── nms_cpu.cpp
    │   │   └── nms_cuda.cu
    │   ├── nms_rotated
    │   │   ├── nms_rotated.h
    │   │   ├── nms_rotated_cpu.cpp
    │   │   └── nms_rotated_cuda.cu
    │   └── vision.cpp
    ├── fvcore
    │   ├── config.cpp
    │   ├── config.h
    │   ├── fvcore.cpp
    │   ├── fvcore.h
    │   ├── yacs.cpp
    │   └── yacs.h
    ├── trainDemo.cpp
    └── trainDemo.h
├── Detectron2_Project.sln
├── Detectron2_Project.vcxproj
├── Detectron2_Project.vcxproj.filters
├── Detectron2_Project.vcxproj.user
├── Detectron2_test.cpp
├── Detectron2_train.cpp
├── NetLib2
    ├── NetLib2.vcxproj
    ├── NetLib2.vcxproj.filters
    └── NetLib2.vcxproj.user
├── README.md
├── configs
    ├── Base-RCNN-C4.yaml
    ├── Base-RCNN-DilatedC5.yaml
    ├── Base-RCNN-FPN.yaml
    ├── Base-RetinaNet.yaml
    ├── COCO-Detection
    │   ├── fast_rcnn_R_50_FPN_1x.yaml
    │   ├── faster_rcnn_R_101_C4_3x.yaml
    │   ├── faster_rcnn_R_101_DC5_3x.yaml
    │   ├── faster_rcnn_R_101_FPN_3x.yaml
    │   ├── faster_rcnn_R_50_C4_1x.yaml
    │   ├── faster_rcnn_R_50_C4_3x.yaml
    │   ├── faster_rcnn_R_50_DC5_1x.yaml
    │   ├── faster_rcnn_R_50_DC5_3x.yaml
    │   ├── faster_rcnn_R_50_FPN_1x.yaml
    │   ├── faster_rcnn_R_50_FPN_3x.yaml
    │   ├── faster_rcnn_X_101_32x8d_FPN_3x.yaml
    │   ├── retinanet_R_101_FPN_3x.yaml
    │   ├── retinanet_R_50_FPN_1x.yaml
    │   ├── retinanet_R_50_FPN_3x.yaml
    │   ├── rpn_R_50_C4_1x.yaml
    │   └── rpn_R_50_FPN_1x.yaml
    ├── COCO-InstanceSegmentation
    │   ├── mask_rcnn_R_101_C4_3x.yaml
    │   ├── mask_rcnn_R_101_DC5_3x.yaml
    │   ├── mask_rcnn_R_101_FPN_3x.yaml
    │   ├── mask_rcnn_R_50_C4_1x.yaml
    │   ├── mask_rcnn_R_50_C4_3x.yaml
    │   ├── mask_rcnn_R_50_DC5_1x.yaml
    │   ├── mask_rcnn_R_50_DC5_3x.yaml
    │   ├── mask_rcnn_R_50_FPN_1x.yaml
    │   ├── mask_rcnn_R_50_FPN_1x_giou.yaml
    │   ├── mask_rcnn_R_50_FPN_3x.yaml
    │   └── mask_rcnn_X_101_32x8d_FPN_3x.yaml
    ├── COCO-Keypoints
    │   ├── Base-Keypoint-RCNN-FPN.yaml
    │   ├── keypoint_rcnn_R_101_FPN_3x.yaml
    │   ├── keypoint_rcnn_R_50_FPN_1x.yaml
    │   ├── keypoint_rcnn_R_50_FPN_3x.yaml
    │   └── keypoint_rcnn_X_101_32x8d_FPN_3x.yaml
    ├── COCO-PanopticSegmentation
    │   ├── Base-Panoptic-FPN.yaml
    │   ├── panoptic_fpn_R_101_3x.yaml
    │   ├── panoptic_fpn_R_50_1x.yaml
    │   └── panoptic_fpn_R_50_3x.yaml
    ├── Cityscapes
    │   └── mask_rcnn_R_50_FPN.yaml
    ├── Detectron1-Comparisons
    │   ├── faster_rcnn_R_50_FPN_noaug_1x.yaml
    │   ├── keypoint_rcnn_R_50_FPN_1x.yaml
    │   └── mask_rcnn_R_50_FPN_noaug_1x.yaml
    ├── LVISv0.5-InstanceSegmentation
    │   ├── mask_rcnn_R_101_FPN_1x.yaml
    │   ├── mask_rcnn_R_50_FPN_1x.yaml
    │   └── mask_rcnn_X_101_32x8d_FPN_1x.yaml
    ├── LVISv1-InstanceSegmentation
    │   ├── mask_rcnn_R_101_FPN_1x.yaml
    │   ├── mask_rcnn_R_50_FPN_1x.yaml
    │   └── mask_rcnn_X_101_32x8d_FPN_1x.yaml
    ├── Misc
    │   ├── cascade_mask_rcnn_R_50_FPN_1x.yaml
    │   ├── cascade_mask_rcnn_R_50_FPN_3x.yaml
    │   ├── cascade_mask_rcnn_X_152_32x8d_FPN_IN5k_gn_dconv.yaml
    │   ├── mask_rcnn_R_50_FPN_1x_cls_agnostic.yaml
    │   ├── mask_rcnn_R_50_FPN_1x_dconv_c3-c5.yaml
    │   ├── mask_rcnn_R_50_FPN_3x_dconv_c3-c5.yaml
    │   ├── mask_rcnn_R_50_FPN_3x_gn.yaml
    │   ├── mask_rcnn_R_50_FPN_3x_syncbn.yaml
    │   ├── panoptic_fpn_R_101_dconv_cascade_gn_3x.yaml
    │   ├── scratch_mask_rcnn_R_50_FPN_3x_gn.yaml
    │   ├── scratch_mask_rcnn_R_50_FPN_9x_gn.yaml
    │   ├── scratch_mask_rcnn_R_50_FPN_9x_syncbn.yaml
    │   └── semantic_R_50_FPN_1x.yaml
    ├── PascalVOC-Detection
    │   ├── faster_rcnn_R_50_C4.yaml
    │   └── faster_rcnn_R_50_FPN.yaml
    └── quick_schedules
    │   ├── cascade_mask_rcnn_R_50_FPN_inference_acc_test.yaml
    │   ├── cascade_mask_rcnn_R_50_FPN_instant_test.yaml
    │   ├── fast_rcnn_R_50_FPN_inference_acc_test.yaml
    │   ├── fast_rcnn_R_50_FPN_instant_test.yaml
    │   ├── keypoint_rcnn_R_50_FPN_inference_acc_test.yaml
    │   ├── keypoint_rcnn_R_50_FPN_instant_test.yaml
    │   ├── keypoint_rcnn_R_50_FPN_normalized_training_acc_test.yaml
    │   ├── keypoint_rcnn_R_50_FPN_training_acc_test.yaml
    │   ├── mask_rcnn_R_50_C4_GCV_instant_test.yaml
    │   ├── mask_rcnn_R_50_C4_inference_acc_test.yaml
    │   ├── mask_rcnn_R_50_C4_instant_test.yaml
    │   ├── mask_rcnn_R_50_C4_training_acc_test.yaml
    │   ├── mask_rcnn_R_50_DC5_inference_acc_test.yaml
    │   ├── mask_rcnn_R_50_FPN_inference_acc_test.yaml
    │   ├── mask_rcnn_R_50_FPN_instant_test.yaml
    │   ├── mask_rcnn_R_50_FPN_pred_boxes_training_acc_test.yaml
    │   ├── mask_rcnn_R_50_FPN_training_acc_test.yaml
    │   ├── panoptic_fpn_R_50_inference_acc_test.yaml
    │   ├── panoptic_fpn_R_50_instant_test.yaml
    │   ├── panoptic_fpn_R_50_training_acc_test.yaml
    │   ├── retinanet_R_50_FPN_inference_acc_test.yaml
    │   ├── retinanet_R_50_FPN_instant_test.yaml
    │   ├── rpn_R_50_FPN_inference_acc_test.yaml
    │   ├── rpn_R_50_FPN_instant_test.yaml
    │   ├── semantic_R_50_FPN_inference_acc_test.yaml
    │   ├── semantic_R_50_FPN_instant_test.yaml
    │   └── semantic_R_50_FPN_training_acc_test.yaml
└── output
    ├── result.jpg
    └── weixin.jpg


/.gitattributes:
--------------------------------------------------------------------------------
1 | # Auto detect text files and perform LF normalization
2 | * text=auto
3 | 


--------------------------------------------------------------------------------
/Detectron2/Base.cpp:
--------------------------------------------------------------------------------
1 | #include "Base.h"
2 | 


--------------------------------------------------------------------------------
/Detectron2/Base.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <stdlib.h>
 4 | 
 5 | #include <assert.h>
 6 | #include <malloc.h>
 7 | #include <memory.h>
 8 | 
 9 | #include <array>
10 | #include <deque>
11 | #include <functional>
12 | #include <list>
13 | #include <memory>
14 | #include <set>
15 | #include <unordered_map>
16 | #include <unordered_set>
17 | #include <utility>
18 | #include <vector>
19 | 
20 | #include <opencv2/core/core.hpp>
21 | #include <opencv2/imgcodecs/imgcodecs.hpp>
22 | #include <opencv2/imgproc/imgproc.hpp>
23 | #include <opencv2/videoio.hpp>
24 | #include <opencv2/highgui.hpp>
25 | 
26 | #include <torch/torch.h>
27 | 


--------------------------------------------------------------------------------
/Detectron2/Data/BuiltinMeta.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "MetadataCatalog.h"
 4 | 
 5 | namespace Detectron2
 6 | {
 7 | 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 8 | 	// converted from data/datasets/builtin_meta.py
 9 | 
10 | 	class BuiltinMeta {
11 | 	public:
12 | 		static Metadata _get_builtin_metadata(const std::string &dataset_name);
13 | 
14 | 	private:
15 | 		static void _get_coco_instances_meta(Metadata &metadata);
16 | 
17 | 		// Returns metadata for "separated" version of the panoptic segmentation dataset.
18 | 		static void _get_coco_panoptic_separated_meta(Metadata &metadata);
19 | 
20 | 		static void _get_coco_person_meta(Metadata &metadata);
21 | 		static void _get_cityscapes_meta(Metadata &metadata);
22 | 	};
23 | }
24 | 


--------------------------------------------------------------------------------
/Detectron2/Data/MetadataCatalog.cpp:
--------------------------------------------------------------------------------
 1 | #include "Base.h"
 2 | #include "MetadataCatalog.h"
 3 | 
 4 | using namespace std;
 5 | using namespace Detectron2;
 6 | 
 7 | ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 8 | 
 9 | static unordered_map<string, Metadata> _NAME_TO_META = {};
10 | 
11 | Metadata MetadataCatalog::get(const std::string &name) {
12 | 	assert(!name.empty());
13 | 	auto iter = _NAME_TO_META.find(name);
14 | 	if (iter != _NAME_TO_META.end()) {
15 | 		return iter->second;
16 | 	}
17 | 	else {
18 | 		auto m = make_shared<MetadataImpl>();
19 | 		m->name = name;
20 | 		_NAME_TO_META[name] = m;
21 | 		return m;
22 | 	}
23 | }
24 | 
25 | std::vector<std::string> MetadataCatalog::list() {
26 | 	std::vector<std::string> ret;
27 | 	ret.reserve(_NAME_TO_META.size());
28 | 	for (auto iter : _NAME_TO_META) {
29 | 		ret.push_back(iter.first);
30 | 	}
31 | 	return ret;
32 | }
33 | 


--------------------------------------------------------------------------------
/Detectron2/Data/ResizeShortestEdge.cpp:
--------------------------------------------------------------------------------
 1 | #include "Base.h"
 2 | #include "ResizeShortestEdge.h"
 3 | 
 4 | #include "ResizeTransform.h"
 5 | 
 6 | using namespace std;
 7 | using namespace torch;
 8 | using namespace Detectron2;
 9 | 
10 | ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
11 | 
12 | ResizeShortestEdge::ResizeShortestEdge(int short_edge_length, int64_t max_size, const std::string &sample_style,
13 | 	Transform::Interp interp) :
14 | 	ResizeShortestEdge({ short_edge_length, short_edge_length }, max_size, sample_style, interp) {
15 | }
16 | 
17 | ResizeShortestEdge::ResizeShortestEdge(const std::vector<int> &short_edge_length, int64_t max_size,
18 | 	const std::string &sample_style, Transform::Interp interp) :
19 | 	m_short_edge_length(short_edge_length),
20 | 	m_max_size(max_size),
21 | 	m_is_range(sample_style == "range"),
22 | 	m_interp(interp)
23 | {
24 | 	assert(sample_style == "range" || sample_style == "choice");
25 | }
26 | 
27 | std::shared_ptr<Transform> ResizeShortestEdge::get_transform(torch::Tensor img) {
28 | 	auto h = img.size(0);
29 | 	auto w = img.size(1);
30 | 
31 | 	int64_t size;
32 | 	if (m_is_range) {
33 | 		size = torch::randint(m_short_edge_length[0], m_short_edge_length[1] + 1, 1).item<int64_t>();
34 | 	}
35 | 	else {
36 | 		size = m_short_edge_length[torch::randint(0, m_short_edge_length.size(), 1).item<int64_t>()];
37 | 	}
38 | 	if (size == 0) {
39 | 		return make_shared<NoOpTransform>();
40 | 	}
41 | 
42 | 	auto scale_h = (float)size / h;
43 | 	auto scale_w = (float)size / w;
44 | 	float newh, neww;
45 | 	newh = scale_h * h;
46 | 	neww = scale_w * w;
47 | 	return make_shared<ResizeTransform>(h, w, int(newh + 0.5), int(neww + 0.5), m_interp);
48 | }
49 | 


--------------------------------------------------------------------------------
/Detectron2/Data/ResizeShortestEdge.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "TransformGen.h"
 4 | 
 5 | namespace Detectron2
 6 | {
 7 | 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 8 | 	// converted from data/transform/transform_gen.py
 9 | 
10 | 	/**
11 | 		Scale the shorter edge to the given size, with a limit of `max_size` on the longer edge.
12 | 		If `max_size` is reached, then downscale so that the longer edge does not exceed max_size.
13 | 	*/
14 | 	class ResizeShortestEdge : public TransformGen {
15 | 	public:
16 | 		/**
17 | 			short_edge_length (list[int]): If ``sample_style=="range"``,
18 | 				a [min, max] interval from which to sample the shortest edge length.
19 | 				If ``sample_style=="choice"``, a list of shortest edge lengths to sample from.
20 | 			max_size (int): maximum allowed longest edge length.
21 | 			sample_style (str): either "range" or "choice".
22 | 		*/
23 | 		ResizeShortestEdge(int short_edge_length, int64_t max_size = INT64_MAX,
24 | 			const std::string &sample_style = "range", Transform::Interp interp = Transform::kBILINEAR);
25 | 		ResizeShortestEdge(const std::vector<int> &short_edge_length, int64_t max_size = INT64_MAX,
26 | 			const std::string &sample_style = "range", Transform::Interp interp = Transform::kBILINEAR);
27 | 
28 | 		virtual std::shared_ptr<Transform> get_transform(torch::Tensor img) override;
29 | 
30 | 	private:
31 | 		std::vector<int> m_short_edge_length;
32 | 		int64_t m_max_size;
33 | 		std::string m_sample_style;
34 | 		bool m_is_range;
35 | 		Transform::Interp m_interp;
36 | 	};
37 | }
38 | 


--------------------------------------------------------------------------------
/Detectron2/Data/ResizeTransform.cpp:
--------------------------------------------------------------------------------
 1 | #include "Base.h"
 2 | #include "ResizeTransform.h"
 3 | 
 4 | #include <Detectron2/Utils/Utils.h>
 5 | 
 6 | using namespace std;
 7 | using namespace torch;
 8 | using namespace Detectron2;
 9 | 
10 | ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
11 | 
12 | ResizeTransform::ResizeTransform(int h, int w, int new_h, int new_w, Interp interp) :
13 | 	m_h(h), m_w(w), m_new_h(new_h), m_new_w(new_w), m_interp(interp) {
14 | }
15 | 
16 | torch::Tensor ResizeTransform::apply_image(torch::Tensor img, Interp interp) {
17 | 	assert(img.size(0) == m_h && img.size(1) == m_w);
18 | 	assert(img.dim() <= 4);
19 | 	if (interp == kNone) { // doh' original code didn't do this when img.dtype() != torch::kUInt8
20 | 		interp = m_interp;
21 | 	}
22 | 
23 | 	if (img.dtype() == torch::kUInt8) {
24 | 		cv::Mat mimg = image_to_mat(img);
25 | 		cv::resize(mimg, mimg, { m_new_w, m_new_h }, 0.0, 0.0, interp);
26 | 		img = image_to_tensor(mimg);
27 | 	}
28 | 	else {
29 | 		auto shape = torch::tensor(img.sizes());
30 | 		auto shape_4d = shape.index({ Slice(None, 2) }) + torch::tensor({ 1 }) * (4 - shape.size(0)) +
31 | 			shape.index({ Slice(2, None) });
32 | 		img = img.view(vectorize(shape_4d)).permute({ 2, 3, 0, 1 }); // hw(c) -> nchw
33 | 
34 | 		auto options = nn::functional::InterpolateFuncOptions()
35 | 			.size(vector<int64_t>{ m_new_h, m_new_w }).align_corners(false);
36 | 		switch (interp) {
37 | 		case kNEAREST:  options.mode(torch::kNearest);  break;
38 | 		case kBILINEAR: options.mode(torch::kBilinear); break;
39 | 		case kBICUBIC:  options.mode(torch::kBicubic);  break;
40 | 		default: assert(false); break;
41 | 		}
42 | 		img = nn::functional::interpolate(img, options);
43 | 		shape.index_put_({ Slice(None, 2) }, torch::tensor({ m_new_h, m_new_w }));
44 | 		img = img.permute({ 2, 3, 0, 1 }).view(vectorize(shape)); // nchw -> hw(c)
45 | 	}
46 | 	return img;
47 | }
48 | 
49 | torch::Tensor ResizeTransform::apply_coords(torch::Tensor coords) {
50 | 	coords.index_put_({ Colon, 0 }, coords.index({ Colon, 0 }) * ((float)m_new_w / m_w));
51 | 	coords.index_put_({ Colon, 1 }, coords.index({ Colon, 1 }) * ((float)m_new_h / m_h));
52 | 	return coords;
53 | }
54 | 
55 | torch::Tensor ResizeTransform::apply_segmentation(torch::Tensor segmentation) {
56 | 	return apply_image(segmentation, kNEAREST);
57 | }
58 | 
59 | std::shared_ptr<Transform> ResizeTransform::inverse() {
60 | 	return make_shared<ResizeTransform>(m_new_h, m_new_w, m_h, m_w, m_interp);
61 | }
62 | 


--------------------------------------------------------------------------------
/Detectron2/Data/ResizeTransform.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "Transform.h"
 4 | 
 5 | namespace Detectron2
 6 | {
 7 | 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 8 | 	// converted from data/transform/transform.py
 9 | 
10 | 	/**
11 | 		Resize the image to a target size.
12 | 	*/
13 | 	class ResizeTransform : public Transform {
14 | 	public:
15 | 		/**
16 | 			h, w (int): original image size
17 | 			new_h, new_w (int): new image size
18 | 			interp: PIL interpolation methods, defaults to bilinear.
19 | 		*/
20 | 		ResizeTransform(int h, int w, int new_h, int new_w, Interp interp = kBILINEAR);
21 | 
22 | 		virtual torch::Tensor apply_image(torch::Tensor img, Interp interp = kNone) override;
23 | 		virtual torch::Tensor apply_coords(torch::Tensor coords) override;
24 | 		virtual torch::Tensor apply_segmentation(torch::Tensor segmentation) override;
25 | 		virtual std::shared_ptr<Transform> inverse() override;
26 | 
27 | 	private:
28 | 		int m_h;
29 | 		int m_w;
30 | 		int m_new_h;
31 | 		int m_new_w;
32 | 		Interp m_interp;
33 | 	};
34 | }
35 | 


--------------------------------------------------------------------------------
/Detectron2/Data/Transform.cpp:
--------------------------------------------------------------------------------
 1 | #include "Base.h"
 2 | #include "Transform.h"
 3 | 
 4 | using namespace std;
 5 | using namespace torch;
 6 | using namespace Detectron2;
 7 | 
 8 | ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 9 | 
10 | void Transform::_set_attributes(const std::unordered_map<std::string, YAML::Node> &params) {
11 | 	for (auto iter : params) {
12 | 		auto &key = iter.first;
13 | 		if (key != "self" && key.find('_') != 0) {
14 | 			m_attrs[key] = iter.second;
15 | 		}
16 | 	}
17 | }
18 | 
19 | torch::Tensor Transform::apply_box(torch::Tensor box) {
20 | 	// Indexes of converting (x0, y0, x1, y1) box into 4 coordinates of
21 | 	// ([x0, y0], [x1, y0], [x0, y1], [x1, y1]).
22 | 	Tensor idxs = torch::tensor(vector<int64_t>{ 0, 1, 2, 1, 0, 3, 2, 3 });
23 | 	auto coords = box.reshape({ -1, 4 }).index({ Colon, idxs }).reshape({ -1, 2 });
24 | 	coords = apply_coords(coords).reshape({ -1, 4, 2 });
25 | 	auto minxy = coords.min_values(1);
26 | 	auto maxxy = coords.max_values(1);
27 | 	auto trans_boxes = torch::cat({ minxy, maxxy }, 1);
28 | 	return trans_boxes;
29 | }
30 | 
31 | TensorVec Transform::apply_polygons(const TensorVec &polygons) {
32 | 	TensorVec ret;
33 | 	ret.reserve(polygons.size());
34 | 	for (auto &p : polygons) {
35 | 		ret.push_back(apply_coords(p));
36 | 	}
37 | 	return ret;
38 | }
39 | 
40 | std::shared_ptr<Transform> Transform::inverse() {
41 | 	assert(false);
42 | 	return nullptr;
43 | }
44 | 
45 | std::string Transform::repr() const {
46 | 	assert(false);
47 | 	return "";
48 | }
49 | 
50 | 


--------------------------------------------------------------------------------
/Detectron2/Data/TransformGen.cpp:
--------------------------------------------------------------------------------
 1 | #include "Base.h"
 2 | #include "TransformGen.h"
 3 | 
 4 | using namespace std;
 5 | using namespace torch;
 6 | using namespace Detectron2;
 7 | 
 8 | ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 9 | 
10 | torch::Tensor TransformGen::_rand_range(double low, double *high, IntArrayRef size) {
11 | 	if (high == nullptr) {
12 | 		*high = low;
13 | 		low = 0;
14 | 	}
15 | 	return torch::rand(size).uniform_(low, *high);
16 | }
17 | 
18 | ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
19 | 
20 | TransformGen::TransformGen(const std::unordered_map<std::string, YAML::Node> &params) {
21 | 	for (auto iter : params) {
22 | 		auto &key = iter.first;
23 | 		if (key != "self" && key.find('_') != 0) {
24 | 			m_attrs[key] = iter.second;
25 | 		}
26 | 	}
27 | }
28 | 
29 | std::string TransformGen::repr() const {
30 | 	assert(false);
31 | 	return "";
32 | }
33 | 


--------------------------------------------------------------------------------
/Detectron2/Data/TransformGen.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "Transform.h"
 4 | 
 5 | namespace Detectron2
 6 | {
 7 | 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 8 | 	// converted from data/transform/transform_gen.py
 9 | 
10 |     /**
11 | 		TransformGen takes an image of type uint8 in range [0, 255], or
12 | 		floating point in range [0, 1] or [0, 255] as input.
13 | 
14 | 		It creates a :class:`Transform` based on the given image, sometimes with randomness.
15 | 		The transform can then be used to transform images
16 | 		or other data (boxes, points, annotations, etc.) associated with it.
17 | 
18 | 		The assumption made in this class
19 | 		is that the image itself is sufficient to instantiate a transform.
20 | 		When this assumption is not true, you need to create the transforms by your own.
21 | 
22 | 		A list of `TransformGen` can be applied with :func:`apply_transform_gens`.
23 | 	*/
24 | 	class TransformGen {
25 | 	public:
26 | 		TransformGen(const std::unordered_map<std::string, YAML::Node> &params = {});
27 | 		virtual ~TransformGen() {}
28 | 
29 | 		virtual std::shared_ptr<Transform> get_transform(torch::Tensor img) = 0;
30 | 
31 | 		/**
32 | 			Produce something like:
33 | 			"MyTransformGen(field1={self.field1}, field2={self.field2})"
34 | 		*/
35 | 		std::string repr() const;
36 | 		std::string str() const {
37 | 			return repr();
38 | 		}
39 | 
40 | 	protected:
41 | 		std::unordered_map<std::string, YAML::Node> m_attrs;
42 | 
43 | 		// Uniform float random number between low and high.
44 | 		static torch::Tensor _rand_range(double low = 1.0, double *high = nullptr, torch::IntArrayRef size = {});
45 | 	};
46 | }
47 | 


--------------------------------------------------------------------------------
/Detectron2/Detectron2.cpp:
--------------------------------------------------------------------------------
 1 | #include "Base.h"
 2 | #include "Detectron2.h"
 3 | 
 4 | using namespace std;
 5 | using namespace torch;
 6 | using namespace Detectron2;
 7 | 
 8 | ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 9 | 
10 | bool Detectron2::cudaEnabled() {
11 | 	return torch::cuda::is_available();
12 | }
13 | 
14 | void Detectron2::retry_if_cuda_oom(std::function<void()> func) {
15 | 	//~! we're not doing any retry yet
16 | 	func();
17 | }
18 | 
19 | int Detectron2::IntLog2(int exp) {
20 | 	int n = 0;
21 | 	while (exp > 1) {
22 | 		assert(exp % 2 == 0);
23 | 		exp /= 2;
24 | 		n++;
25 | 	}
26 | 	assert(exp == 1);
27 | 	return n;
28 | }
29 | 
30 | int Detectron2::IntExp2(int n) {
31 | 	int result = 1;
32 | 	for (int i = 0; i < n; i++) {
33 | 		result *= 2;
34 | 	}
35 | 	return result;
36 | }
37 | 
38 | std::string Detectron2::FormatString(const char *fmt, int d) {
39 | 	char buf[256];
40 | 	snprintf(buf, sizeof(buf), fmt, d);
41 | 	return buf;
42 | }
43 | 
44 | std::string Detectron2::FormatString(const char *fmt, double f) {
45 | 	char buf[256];
46 | 	snprintf(buf, sizeof(buf), fmt, f);
47 | 	return buf;
48 | }
49 | 
50 | torch::Tensor Detectron2::slice_range(int64_t start, int64_t end, int64_t step) {
51 | 	vector<int64_t> range;
52 | 	range.reserve((end - start) / step + 1);
53 | 	for (int64_t i = start; i < end; i += step) {
54 | 		range.push_back(i);
55 | 	}
56 | 	return torch::tensor(range);
57 | }
58 | 
59 | std::vector<int64_t> Detectron2::vectorize(const torch::Tensor &t) {
60 | 	assert(t.dim() == 1);
61 | 	vector<int64_t> ret;
62 | 	ret.reserve(t.numel());
63 | 	for (int i = 0; i < t.numel(); i++) {
64 | 		ret.push_back(t[i].item<int64_t>());
65 | 	}
66 | 	return ret;
67 | }
68 | 
69 | torch::Tensor Detectron2::tapply(const torch::Tensor &src, function<torch::Tensor(torch::Tensor)> fx) {
70 | 	TensorVec ret;
71 | 	int count = src.size(0);
72 | 	ret.reserve(count);
73 | 	for (int i = 0; i < count; i++) {
74 | 		ret.push_back(fx(src[i]));
75 | 	}
76 | 	return torch::cat(ret);
77 | }
78 | 


--------------------------------------------------------------------------------
/Detectron2/Detectron2.vcxproj.user:
--------------------------------------------------------------------------------
1 | ﻿<?xml version="1.0" encoding="utf-8"?>
2 | <Project ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
3 |   <PropertyGroup />
4 | </Project>


--------------------------------------------------------------------------------
/Detectron2/Detectron2Includes.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <Detectron2/Data/ResizeShortestEdge.h>
 4 | #include <Detectron2/MetaArch/GeneralizedRCNN.h>
 5 | #include <Detectron2/Utils/DefaultPredictor.h>
 6 | #include <Detectron2/Utils/File.h>
 7 | #include <Detectron2/Utils/Utils.h>
 8 | #include <Detectron2/Utils/VideoAnalyzer.h>
 9 | #include <Detectron2/Utils/VideoVisualizer.h>
10 | #include <Detectron2/Data/BuiltinDataset.h>
11 | #include <Detectron2/VisualizationDemo.h>
12 | #include <Detectron2/trainDemo.h>


--------------------------------------------------------------------------------
/Detectron2/Import/ImportBaseline.py:
--------------------------------------------------------------------------------
 1 | ﻿import numpy
 2 | import os
 3 | import pickle
 4 | import re
 5 | import sys
 6 | from numpy import array
 7 | 
 8 | # Example: python ImportBaseline.py model_final_997cc7
 9 | # modelName = sys.argv[1]
10 | modelName = 'model_final_f6e8b1'
11 | 
12 | # checkpoints = os.getenv('D2_CHECKPOINTS_DIR') + '\\'
13 | checkpoints = 'D:\\libtorch\\detectron2_project\\Detectron2\\Import\\'
14 | 
15 | fcpp = open(os.getcwd() + '\\Baseline\\' + modelName + '.cpp', 'w')
16 | fdataFileName = checkpoints + modelName + '.data'
17 | fdata = open(fdataFileName, 'wb')
18 | loaded = pickle.load(open(checkpoints + modelName + '.pkl', 'rb'))
19 | model = loaded["model"]
20 | 
21 | fcpp.write('#include "Base.h"\n')
22 | fcpp.write('#include <Detectron2/Import/ModelImporter.h>\n')
23 | fcpp.write('\n')
24 | fcpp.write('using namespace Detectron2;\n')
25 | fcpp.write('\n')
26 | fcpp.write('/' * 119)
27 | fcpp.write('\n')
28 | fcpp.write('\n')
29 | 
30 | fcpp.write('std::string ModelImporter::import_' + modelName + '() {\n')
31 | offset = 0
32 | num = 0
33 | for key in model:
34 |     m_key = key[0:18]
35 |     m_key_1 = key[0:9]
36 |     m_key_box_predictor = key[0:23]
37 | 
38 |     data = model[key]
39 |     shape = data.shape
40 |     numel = data.size
41 |     data = data.reshape([numel])
42 | 
43 | 
44 |     if (m_key_box_predictor == 'roi_heads.box_predictor'):
45 |         continue
46 |     # if (m_key_1 == 'roi_heads'):
47 |     #     continue
48 |     # if (m_key == 'proposal_generator') :
49 |     #     continue
50 |     fcpp.write('\tAdd("' + key + '", ' + str(numel) + '); // ' + str(offset) + '\n')
51 |     fdata.write(data.tobytes())
52 |     offset += numel * 4
53 |     assert fdata.tell() == offset, "{} != {}".format(fdata.tell(), offset)
54 |     num = num+1
55 | 
56 | fcpp.write('\n')
57 | fcpp.write('\treturn DataDir() + "\\\\' + modelName + '.data";\n')
58 | fcpp.write('}\n')
59 | 


--------------------------------------------------------------------------------
/Detectron2/Import/ModelImporter.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <Detectron2/Base.h>
 4 | #include <Detectron2/Utils/File.h>
 5 | 
 6 | namespace Detectron2
 7 | {
 8 | 	class ModelImporter {
 9 | 	public:
10 | 		enum Model {
11 | 			kNone,
12 | 			kDemo,						// COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml
13 | 
14 | 			// Root: https://github.com/facebookresearch/detectron2/tree/master/configs
15 | 			kCOCODetection,				// COCO-Detection/faster_rcnn_R_101_FPN_3x.yaml
16 | 			kCOCOKeypoints,				// COCO-Keypoints/keypoint_rcnn_R_101_FPN_3x.yaml
17 | 			kCOCOInstanceSegmentation,	// COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml
18 | 			kCOCOPanopticSegmentation	// COCO-PanopticSegmentation/panoptic_fpn_R_101_3x.yaml
19 | 		};
20 | 		static Model FilenameToModel(const std::string &filename);
21 | 
22 | 		enum Fill {
23 | 			kNoFill,
24 | 			kZeroFill,
25 | 			kConstantFill,
26 | 			kNormalFill2,
27 | 			kNormalFill3,
28 | 			kXavierNormalFill,
29 | 			kCaffe2XavierFill,
30 | 			kCaffe2MSRAFill,
31 | 			kCaffe2MSRAFillIn
32 | 		};
33 | 
34 | 		static void FillTensor(torch::Tensor x, Fill fill);
35 | 
36 | 		static std::string DataDir();
37 | 
38 | 	public:
39 | 		ModelImporter(Model model);
40 | 		ModelImporter(const std::string &filename);
41 | 
42 | 		bool HasData() const { return m_fdata.get() != nullptr; }
43 | 
44 | 		void Import(const std::string &name, torch::nn::Conv2d &conv, Fill fill) const;
45 | 		void Import(const std::string &name, torch::nn::ConvTranspose2d &conv, Fill fill) const;
46 | 		void Import(const std::string &name, torch::nn::Linear &fc, Fill fill) const;
47 | 
48 | 		void Initialize(const std::string &name, torch::Tensor &tensor) const;
49 | 
50 | 		int ReportUnimported(const std::string &prefix = "") const;
51 | 
52 | 	private:
53 | 		// implemented in generated files by ImportBaseline.py
54 | 		std::string import_model_final_f10217();
55 | 		std::string import_model_final_f6e8b1();
56 | 		std::string import_model_final_a3ec72();
57 | 		std::string import_model_final_997cc7();
58 | 		std::string import_model_final_cafdb1();
59 | 
60 | 		std::unordered_map<std::string, std::pair<int, int>> m_sections;
61 | 		int m_size;
62 | 		void Add(const char *name, int count);
63 | 
64 | 		std::string m_fullpath;
65 | 		std::shared_ptr<File> m_fdata;
66 | 
67 | 		mutable std::unordered_set<std::string> m_imported;
68 | 	};
69 | }


--------------------------------------------------------------------------------
/Detectron2/MetaArch/ProposalNetwork.cpp:
--------------------------------------------------------------------------------
 1 | #include "Base.h"
 2 | #include "ProposalNetwork.h"
 3 | 
 4 | using namespace std;
 5 | using namespace torch;
 6 | using namespace Detectron2;
 7 | 
 8 | ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 9 | 
10 | ProposalNetworkImpl::ProposalNetworkImpl(CfgNode &cfg) : MetaArchImpl(cfg) {
11 | }
12 | 
13 | std::tuple<InstancesList, TensorMap> ProposalNetworkImpl::forward(
14 | 	const std::vector<DatasetMapperOutput> &batched_inputs) {
15 | 	auto images = preprocess_image(batched_inputs, m_backbone->size_divisibility());
16 | 	auto features = m_backbone(images.tensor());
17 | 
18 | 	InstancesList gt_instances = get_gt_instances(batched_inputs);
19 | 
20 | 	InstancesList proposals; TensorMap proposal_losses;
21 | 	tie(proposals, proposal_losses) = m_proposal_generator(images, features, gt_instances);
22 | 
23 | 	// In training, the proposals are not useful at all but we generate them anyway.
24 | 	// This makes RPN-only models about 5% slower.
25 | 	if (is_training()) {
26 | 		return { InstancesList{}, proposal_losses };
27 | 	}
28 | 
29 | 	return { _postprocess(proposals, batched_inputs, images.image_sizes()), {} };
30 | }


--------------------------------------------------------------------------------
/Detectron2/MetaArch/ProposalNetwork.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "MetaArch.h"
 4 | #include <Detectron2/Modules/FPN/FPN.h>
 5 | #include <Detectron2/Modules/RPN/RPN.h>
 6 | 
 7 | namespace Detectron2
 8 | {
 9 | 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
10 | 	// converted from modeling/meta_arch/rcnn.py
11 | 
12 | 	// A meta architecture that only predicts object proposals.
13 | 	class ProposalNetworkImpl : public MetaArchImpl {
14 | 	public:
15 | 		ProposalNetworkImpl(CfgNode &cfg);
16 | 
17 |         /**
18 | 			Args:
19 | 				Same as in :class:`GeneralizedRCNN.forward`
20 | 
21 | 			Returns:
22 | 				list[dict]:
23 | 					Each dict is the output for one input image.
24 | 					The dict contains one key "proposals" whose value is a
25 | 					:class:`Instances` with keys "proposal_boxes" and "objectness_logits".
26 | 		*/
27 | 		virtual std::tuple<InstancesList, TensorMap>
28 | 			forward(const std::vector<DatasetMapperOutput> &batched_inputs) override;
29 | 	};
30 | 	TORCH_MODULE(ProposalNetwork);
31 | }


--------------------------------------------------------------------------------
/Detectron2/MetaArch/SemanticSegmentor.cpp:
--------------------------------------------------------------------------------
 1 | #include "Base.h"
 2 | #include "SemanticSegmentor.h"
 3 | 
 4 | #include <Detectron2/Structures/PostProcessing.h>
 5 | 
 6 | using namespace std;
 7 | using namespace torch;
 8 | using namespace Detectron2;
 9 | 
10 | ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
11 | 
12 | SemanticSegmentorImpl::SemanticSegmentorImpl(CfgNode &cfg) : MetaArchImpl(cfg) {
13 | 	m_sem_seg_head = make_shared<SemSegFPNHeadImpl>(cfg, m_backbone->output_shapes());
14 | 	register_module("sem_seg_head", m_sem_seg_head);
15 | }
16 | 
17 | void SemanticSegmentorImpl::initialize(const ModelImporter &importer, const std::string &prefix) {
18 | 	MetaArchImpl::initialize(importer, prefix);
19 | 	m_sem_seg_head->initialize(importer, "sem_seg_head");
20 | }
21 | 
22 | std::tuple<InstancesList, TensorMap> SemanticSegmentorImpl::forward(
23 | 	const std::vector<DatasetMapperOutput> &batched_inputs) {
24 | 	auto images = preprocess_image(batched_inputs, m_backbone->size_divisibility());
25 | 	auto features = m_backbone(images.tensor());
26 | 
27 | 	auto gt_sem_seg = get_gt_sem_seg(batched_inputs, m_sem_seg_head->ignore_value());
28 | 	Tensor results;
29 | 	TensorMap losses;
30 | 	tie(results, losses) = m_sem_seg_head(features, gt_sem_seg);
31 | 
32 | 	if (is_training()) {
33 | 		return { InstancesList{}, losses };
34 | 	}
35 | 
36 | 	int count = batched_inputs.size();
37 | 	assert(results.size(0) == count);
38 | 	auto &image_sizes = images.image_sizes();
39 | 	assert(image_sizes.size() == count);
40 | 
41 | 	InstancesList processed_results;
42 | 	for (int i = 0; i < count; i++) {
43 | 		auto result = results[i];
44 | 		auto &input_per_image = batched_inputs[i];
45 | 		auto &image_size = image_sizes[i];
46 | 
47 | 		int height = input_per_image.height ? *input_per_image.height : image_size.height;
48 | 		int width = input_per_image.width ? *input_per_image.width : image_size.width;
49 | 
50 | 		auto sem_seg_r = PostProcessing::sem_seg_postprocess(result, image_size, height, width);
51 | 		auto m = make_shared<Instances>(ImageSize{ height, width });
52 | 		m->set("sem_seg", sem_seg_r);
53 | 		processed_results.push_back(m);
54 | 	}
55 | 	return { processed_results, {} };
56 | }
57 | 


--------------------------------------------------------------------------------
/Detectron2/MetaArch/SemanticSegmentor.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "MetaArch.h"
 4 | #include <Detectron2/Modules/FPN/SemSegFPNHead.h>
 5 | 
 6 | namespace Detectron2
 7 | {
 8 | 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 9 | 	// converted from modeling/meta_arch/semantic_seg.py
10 | 
11 | 	// semantic segmentation heads, which make semantic segmentation predictions from feature maps.
12 | 	// Main class for semantic segmentation architectures.
13 | 	class SemanticSegmentorImpl : public MetaArchImpl {
14 | 	public:
15 | 		SemanticSegmentorImpl(CfgNode &cfg);
16 | 
17 | 		virtual void initialize(const ModelImporter &importer, const std::string &prefix) override;
18 | 
19 | 		/**
20 | 			Args:
21 | 				batched_inputs: a list, batched outputs of :class:`DatasetMapper`.
22 | 					Each item in the list contains the inputs for one image.
23 | 
24 | 					For now, each item in the list is a dict that contains:
25 | 
26 | 					   * "image": Tensor, image in (C, H, W) format.
27 | 					   * "sem_seg": semantic segmentation ground truth
28 | 					   * Other information that's included in the original dicts, such as:
29 | 						 "height", "width" (int): the output resolution of the model, used in inference.
30 | 						 See :meth:`postprocess` for details.
31 | 
32 | 			Returns:
33 | 				list[dict]:
34 | 				  Each dict is the output for one input image.
35 | 				  The dict contains one key "sem_seg" whose value is a
36 | 				  Tensor that represents the
37 | 				  per-pixel segmentation prediced by the head.
38 | 				  The prediction has shape KxHxW that represents the logits of
39 | 				  each class for each pixel.
40 | 		*/
41 | 		virtual std::tuple<InstancesList, TensorMap>
42 | 			forward(const std::vector<DatasetMapperOutput> &batched_inputs) override;
43 | 
44 | 	private:
45 | 		SemSegFPNHead m_sem_seg_head{ nullptr };
46 | 	};
47 | 	TORCH_MODULE(SemanticSegmentor);
48 | }


--------------------------------------------------------------------------------
/Detectron2/Modules/Backbone.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <Detectron2/Structures/ShapeSpec.h>
 4 | 
 5 | namespace Detectron2
 6 | {
 7 | 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 8 | 	// converted from modeling/backbone/backbone.py
 9 | 
10 | 	// Abstract base class for network backbones.
11 | 	class BackboneImpl : public torch::nn::Module {
12 | 	public:
13 | 		virtual ~BackboneImpl() {}
14 | 
15 | 		const ShapeSpec::Map &output_shapes() const {
16 | 			return m_output_shapes;
17 | 		}
18 | 
19 | 		virtual void initialize(const ModelImporter &importer, const std::string &prefix) = 0;
20 | 
21 | 		/**
22 | 			Subclasses must override this method, but adhere to the same return type.
23 | 
24 | 			Returns:
25 | 				dict[str->Tensor]: mapping from feature name (e.g., "res2") to tensor
26 | 		*/
27 | 		virtual TensorMap forward(torch::Tensor x) = 0;
28 | 
29 | 		/**
30 | 			Some backbones require the input height and width to be divisible by a
31 | 			specific integer. This is typically true for encoder / decoder type networks
32 | 			with lateral connection (e.g., FPN) for which feature maps need to match
33 | 			dimension in the "bottom up" and "top down" paths. Set to 0 if no specific
34 | 			input size divisibility is required.
35 | 		*/
36 | 		virtual int size_divisibility() {
37 | 			return 0;
38 | 		}
39 | 
40 | 	protected:
41 | 		ShapeSpec::Map m_output_shapes;
42 | 	};
43 | 	TORCH_MODULE(Backbone);
44 | }


--------------------------------------------------------------------------------
/Detectron2/Modules/BatchNorm/BatchNorm.cpp:
--------------------------------------------------------------------------------
 1 | #include "Base.h"
 2 | #include "BatchNorm.h"
 3 | 
 4 | #include "BatchNorm2d.h"
 5 | #include "FrozenBatchNorm2d.h"
 6 | #include "GroupNorm.h"
 7 | #include "NaiveSyncBatchNorm.h"
 8 | 
 9 | using namespace std;
10 | using namespace torch;
11 | using namespace Detectron2;
12 | 
13 | ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
14 | 
15 | BatchNorm::Type BatchNorm::GetType(const std::string &name) {
16 | 	static map<string, Type> lookup_table{
17 | 		{ "",				kNone },
18 | 		{ "BN",				kBN },
19 | 		{ "SyncBN",			kSyncBN },
20 | 		{ "FrozenBN",		kFrozenBN },
21 | 		{ "GN",				kGN },
22 | 		{ "nnSyncBN",		nnSyncBN },
23 | 		{ "naiveSyncBN",	naiveSyncBN }
24 | 	};
25 | 	auto iter = lookup_table.find(name);
26 | 	assert(iter != lookup_table.end());
27 | 	return iter->second;
28 | }
29 | 
30 | ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
31 | 
32 | BatchNorm::BatchNorm(BatchNorm::Type type, int out_channels) {
33 | 	switch (type) {
34 | 	case kNone:																				break;
35 | 	case kBN:			reset(new BatchNorm2dImpl(out_channels));							break;
36 | 	case kFrozenBN:		reset(new FrozenBatchNorm2dImpl(out_channels));						break;
37 | 	case naiveSyncBN:	reset(new NaiveSyncBatchNormImpl(out_channels));					break;
38 | 	case kGN:			reset(new GroupNormImpl(nn::GroupNormOptions(32, out_channels)));	break;
39 | 
40 | 	case kSyncBN:	//"SyncBN": NaiveSyncBatchNorm if TORCH_VERSION <= (1, 5) else nn.SyncBatchNorm,
41 | 	case nnSyncBN:	// return nn.SyncBatchNorm(out_channels);
42 | 	default:
43 | 		assert(false);
44 | 	}
45 | }
46 | 
47 | void BatchNormImpl::initialize(const ModelImporter &importer, const std::string &prefix, ModelImporter::Fill fill) {
48 | 	if (importer.HasData()) {
49 | 		importer.Initialize(prefix + ".weight", get_weight());
50 | 		importer.Initialize(prefix + ".bias", get_bias());
51 | 		if (get_running_mean()) {
52 | 			importer.Initialize(prefix + ".running_mean", *get_running_mean());
53 | 			importer.Initialize(prefix + ".running_var", *get_running_var());
54 | 		}
55 | 	}
56 | 	else {
57 | 		ModelImporter::FillTensor(get_weight(), fill);
58 | 		ModelImporter::FillTensor(get_bias(), ModelImporter::kZeroFill);
59 | 	}
60 | }
61 | 


--------------------------------------------------------------------------------
/Detectron2/Modules/BatchNorm/BatchNorm.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <Detectron2/Detectron2.h>
 4 | 
 5 | namespace Detectron2
 6 | {
 7 | 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 8 | 	// converted from layers/batch_norm.py
 9 | 
10 | 	class BatchNormImpl {
11 | 	public:
12 | 		virtual ~BatchNormImpl() {}
13 | 
14 | 		virtual torch::Tensor &get_weight() = 0;
15 | 		virtual torch::Tensor &get_bias() = 0;
16 | 		virtual torch::Tensor *get_running_mean() = 0;
17 | 		virtual torch::Tensor *get_running_var() = 0;
18 | 
19 | 		virtual void initialize(const ModelImporter &importer, const std::string &prefix, ModelImporter::Fill fill);
20 | 		virtual torch::Tensor forward(torch::Tensor x) = 0;
21 | 	};
22 | 
23 | 	class BatchNorm : public std::shared_ptr<BatchNormImpl> {
24 | 	public:
25 | 		enum Type {
26 | 			kNone,
27 | 
28 | 			kBN,		// BatchNorm2d, Fixed in https ://github.com/pytorch/pytorch/pull/36382
29 | 			kSyncBN,	// NaiveSyncBatchNorm if TORCH_VERSION <= (1, 5) else nn.SyncBatchNorm,
30 | 			kFrozenBN,	// FrozenBatchNorm2d,
31 | 			kGN,		// lambda channels : nn.GroupNorm(32, channels),
32 | 
33 | 			// for debugging:
34 | 			nnSyncBN,	// nn.SyncBatchNorm,
35 | 			naiveSyncBN	// NaiveSyncBatchNorm,
36 | 		};
37 | 
38 | 		static Type GetType(const std::string &name);
39 | 
40 | 	public:
41 | 		/**
42 | 			Args:
43 | 				norm (str or callable): either one of BN, SyncBN, FrozenBN, GN;
44 | 					or a callable that takes a channel number and returns
45 | 					the normalization layer as a nn.Module.
46 | 
47 | 			Returns:
48 | 				nn.Module or None: the normalization layer
49 | 		*/
50 | 		BatchNorm(std::nullptr_t) {}
51 | 		BatchNorm(Type type, int out_channels);
52 | 
53 | 		template<typename T>
54 | 		std::shared_ptr<T> as() {
55 | 			return std::dynamic_pointer_cast<T>(*this);
56 | 		}
57 | 		ModulePtr asModule() { return as<torch::nn::Module>(); }
58 | 
59 | 		torch::Tensor operator()(torch::Tensor x) {
60 | 			return get()->forward(x);
61 | 		}
62 | 	};
63 | }
64 | 


--------------------------------------------------------------------------------
/Detectron2/Modules/BatchNorm/BatchNorm2d.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "BatchNorm.h"
 4 | 
 5 | namespace Detectron2
 6 | {
 7 | 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 8 | 	// This is to overwrite torch::nn::BatchNorm2dImpl and torch::nn::BatchNorm2d to have BatchNormImpl interface.
 9 | 
10 | 	class BatchNorm2dImpl : public torch::nn::BatchNorm2dImpl, public BatchNormImpl {
11 | 	public:
12 | 		BatchNorm2dImpl(const torch::nn::BatchNorm2dOptions &options) : torch::nn::BatchNorm2dImpl(options) {}
13 | 
14 | 		// implementing BatchNormImpl
15 | 		virtual torch::Tensor &get_weight() override		{ return weight; }
16 | 		virtual torch::Tensor &get_bias() override			{ return bias; }
17 | 		virtual torch::Tensor *get_running_mean() override	{ return &running_mean; }
18 | 		virtual torch::Tensor *get_running_var() override	{ return &running_var; }
19 | 		virtual torch::Tensor forward(torch::Tensor x) override {
20 | 			return torch::nn::BatchNorm2dImpl::forward(x);
21 | 		}
22 | 	};
23 | 	TORCH_MODULE(BatchNorm2d);
24 | }
25 | 


--------------------------------------------------------------------------------
/Detectron2/Modules/BatchNorm/FrozenBatchNorm2d.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "BatchNorm.h"
 4 | 
 5 | namespace Detectron2
 6 | {
 7 | 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 8 | 	// converted from layers/batch_norm.py
 9 | 
10 | 	/**
11 | 		BatchNorm2d where the batch statistics and the affine parameters are fixed.
12 | 
13 | 		It contains non-trainable buffers called
14 | 		"weight" and "bias", "running_mean", "running_var",
15 | 		initialized to perform identity transformation.
16 | 
17 | 		The pre-trained backbone models from Caffe2 only contain "weight" and "bias",
18 | 		which are computed from the original four parameters of BN.
19 | 		The affine transform `x * weight + bias` will perform the equivalent
20 | 		computation of `(x - running_mean) / sqrt(running_var) * weight + bias`.
21 | 		When loading a backbone model from Caffe2, "running_mean" and "running_var"
22 | 		will be left unchanged as identity transformation.
23 | 
24 | 		Other pre-trained backbone models may contain all 4 parameters.
25 | 
26 | 		The forward is implemented by `F.batch_norm(..., training=False)`.
27 | 	*/
28 | 	class FrozenBatchNorm2dImpl : public torch::nn::Module, public BatchNormImpl {
29 | 	public:
30 | 		/**
31 | 			Convert BatchNorm/SyncBatchNorm in module into FrozenBatchNorm.
32 | 
33 | 			Args:
34 | 				module (torch.nn.Module):
35 | 
36 | 			Returns:
37 | 				If module is BatchNorm/SyncBatchNorm, returns a new module.
38 | 				Otherwise, in-place convert module and return it.
39 | 
40 | 			Similar to convert_sync_batchnorm in
41 | 			https://github.com/pytorch/pytorch/blob/master/torch/nn/modules/batchnorm.py
42 | 		*/
43 | 		static ModulePtr convert_frozen_batchnorm(const ModulePtr &mod);
44 | 
45 | 	public:
46 | 		FrozenBatchNorm2dImpl(int num_features, double eps = 1e-5);
47 | 
48 | 		std::string toString() const;
49 | 
50 | 		// implementing BatchNormImpl
51 | 		virtual torch::Tensor &get_weight() override		{ return m_weight; }
52 | 		virtual torch::Tensor &get_bias() override			{ return m_bias; }
53 | 		virtual torch::Tensor *get_running_mean() override	{ return &m_running_mean; }
54 | 		virtual torch::Tensor *get_running_var() override	{ return &m_running_var; }
55 | 		virtual torch::Tensor forward(torch::Tensor x) override;
56 | 
57 | 	private:
58 | 		int m_num_features;
59 | 		double m_eps;
60 | 
61 | 		torch::Tensor m_weight;
62 | 		torch::Tensor m_bias;
63 | 		torch::Tensor m_running_mean;
64 | 		torch::Tensor m_running_var;
65 | 	};
66 | 	TORCH_MODULE(FrozenBatchNorm2d);
67 | }
68 | 


--------------------------------------------------------------------------------
/Detectron2/Modules/BatchNorm/GroupNorm.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "BatchNorm.h"
 4 | 
 5 | namespace Detectron2
 6 | {
 7 | 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 8 | 	// This is to overwrite torch::nn::GroupNormImpl and torch::nn::GroupNorm to have BatchNormImpl interface.
 9 | 
10 | 	class GroupNormImpl : public torch::nn::GroupNormImpl, public BatchNormImpl {
11 | 	public:
12 | 		GroupNormImpl(const torch::nn::GroupNormOptions &options) : torch::nn::GroupNormImpl(options) {}
13 | 
14 | 		// implementing BatchNormImpl
15 | 		virtual torch::Tensor &get_weight() override		{ return weight; }
16 | 		virtual torch::Tensor &get_bias() override			{ return bias; }
17 | 		virtual torch::Tensor *get_running_mean() override	{ return nullptr; }
18 | 		virtual torch::Tensor *get_running_var() override	{ return nullptr; }
19 | 		virtual torch::Tensor forward(torch::Tensor x) override {
20 | 			return torch::nn::GroupNormImpl::forward(x);
21 | 		}
22 | 	};
23 | 	TORCH_MODULE(GroupNorm);
24 | }
25 | 


--------------------------------------------------------------------------------
/Detectron2/Modules/BatchNorm/NaiveSyncBatchNorm.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "BatchNorm2d.h"
 4 | 
 5 | namespace Detectron2
 6 | {
 7 | 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 8 | 	// converted from layers/batch_norm.py
 9 | 
10 | 	/**
11 | 		In PyTorch<=1.5, `nn.SyncBatchNorm` has incorrect gradient
12 | 		when the batch size on each worker is different.
13 | 		(e.g., when scale augmentation is used, or when it is applied to mask head).
14 | 
15 | 		This is a slower but correct alternative to `nn.SyncBatchNorm`.
16 | 
17 | 		Note:
18 | 			There isn't a single definition of Sync BatchNorm.
19 | 
20 | 			When ``stats_mode==""``, this module computes overall statistics by using
21 | 			statistics of each worker with equal weight.  The result is true statistics
22 | 			of all samples (as if they are all on one worker) only when all workers
23 | 			have the same (N, H, W). This mode does not support inputs with zero batch size.
24 | 
25 | 			When ``stats_mode=="N"``, this module computes overall statistics by weighting
26 | 			the statistics of each worker by their ``N``. The result is true statistics
27 | 			of all samples (as if they are all on one worker) only when all workers
28 | 			have the same (H, W). It is slower than ``stats_mode==""``.
29 | 
30 | 			Even though the result of this module may not be the true statistics of all samples,
31 | 			it may still be reasonable because it might be preferrable to assign equal weights
32 | 			to all workers, regardless of their (H, W) dimension, instead of putting larger weight
33 | 			on larger images. From preliminary experiments, little difference is found between such
34 | 			a simplified implementation and an accurate computation of overall mean & variance.
35 |     */
36 | 	class NaiveSyncBatchNormImpl : public BatchNorm2dImpl {
37 | 	public:
38 | 		NaiveSyncBatchNormImpl(const torch::nn::BatchNorm2dOptions &options, const std::string &stats_mode = "");
39 | 
40 | 		virtual torch::Tensor forward(torch::Tensor x) override;
41 | 
42 | 	private:
43 | 		std::string m_stats_mode;
44 | 	};
45 | 	TORCH_MODULE(NaiveSyncBatchNorm);
46 | }
47 | 


--------------------------------------------------------------------------------
/Detectron2/Modules/Conv/ConvBn2d.cpp:
--------------------------------------------------------------------------------
 1 | #include "Base.h"
 2 | #include "ConvBn2d.h"
 3 | 
 4 | using namespace std;
 5 | using namespace torch;
 6 | using namespace Detectron2;
 7 | 
 8 | ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 9 | 
10 | ConvBn2dImpl::ConvBn2dImpl(const torch::nn::Conv2dOptions &options, BatchNorm::Type norm, bool activation)
11 | 	: m_activation(activation) {
12 | 	m_conv = torch::nn::Conv2d(options);
13 | 	register_module("conv", m_conv);
14 | 
15 | 	m_bn = BatchNorm(norm, options.out_channels());
16 | 	if (m_bn) {
17 | 		register_module("bn", m_bn.asModule());
18 | 	}
19 | }
20 | 
21 | void ConvBn2dImpl::initialize(const ModelImporter &importer, const std::string &prefix, ModelImporter::Fill fill) {
22 | 	importer.Import(prefix, m_conv, fill);
23 | 	if (m_bn) {
24 | 		m_bn->initialize(importer, prefix + ".norm", fill);
25 | 	}
26 | }
27 | 
28 | torch::Tensor ConvBn2dImpl::forward(torch::Tensor x) {
29 | 	x = m_conv(x);
30 | 	if (m_bn) {
31 | 		x = m_bn(x);
32 | 	}
33 | 	if (m_activation) {
34 | 		x = relu(x);
35 | 	}
36 | 	return x;
37 | }
38 | 


--------------------------------------------------------------------------------
/Detectron2/Modules/Conv/ConvBn2d.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <Detectron2/Modules/BatchNorm/BatchNorm.h>
 4 | 
 5 | namespace Detectron2
 6 | {
 7 | 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 8 | 	// converted from layers/wrappers.py
 9 | 
10 | 	// A wrapper around :class:`torch.nn.Conv2d` to support empty inputs and more features.
11 | 	class ConvBn2dImpl : public torch::nn::Module {
12 | 	public:
13 | 		/**
14 | 			Extra keyword arguments supported in addition to those in `torch.nn.Conv2d`:
15 | 
16 | 			Args:
17 | 				norm (nn.Module, optional): a normalization layer
18 | 				activation (callable(Tensor) -> Tensor): a callable activation function
19 | 
20 | 			It assumes that norm layer is used before activation.
21 | 		*/
22 | 		ConvBn2dImpl(const torch::nn::Conv2dOptions &options, BatchNorm::Type norm = BatchNorm::kNone,
23 | 			bool activation = false);
24 | 		void initialize(const ModelImporter &importer, const std::string &prefix, ModelImporter::Fill fill);
25 | 
26 | 		torch::Tensor forward(torch::Tensor x);
27 | 
28 | 	public:
29 | 		torch::nn::Conv2d m_conv{ nullptr };
30 | 		BatchNorm m_bn{ nullptr };
31 | 		bool m_activation; // relu
32 | 	};
33 | 	TORCH_MODULE(ConvBn2d);
34 | }


--------------------------------------------------------------------------------
/Detectron2/Modules/Conv/DeformConv.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <Detectron2/Modules/BatchNorm/BatchNorm.h>
 4 | 
 5 | namespace Detectron2
 6 | {
 7 | 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 8 | 	// converted from layers/deform_conv.py
 9 | 
10 | 	class DeformConvImpl : public torch::nn::Module {
11 | 	public:
12 | 		/**
13 | 			Deformable convolution from :paper:`deformconv`.
14 | 
15 | 			Arguments are similar to :class:`Conv2D`. Extra arguments:
16 | 
17 | 			Args:
18 | 				deformable_groups (int): number of groups used in deformable convolution.
19 | 				norm (nn.Module, optional): a normalization layer
20 | 				activation (callable(Tensor) -> Tensor): a callable activation function
21 | 		*/
22 | 		DeformConvImpl(int in_channels, int out_channels, int kernel_size, int stride, int padding,
23 | 			int dilation, int groups, int deformable_groups, bool bias, BatchNorm::Type norm, bool activation = false);
24 | 		void initialize(const ModelImporter &importer, const std::string &prefix, ModelImporter::Fill fill);
25 | 
26 | 		torch::Tensor forward(torch::Tensor x, torch::Tensor offset);
27 | 
28 | 		std::string extra_repr() const;
29 | 
30 | 	public:
31 | 		int m_in_channels;
32 | 		int m_out_channels;
33 | 		std::vector<int> m_kernel_size;
34 | 		int m_stride;
35 | 		int m_padding;
36 | 		int m_dilation;
37 | 		int m_groups;
38 | 		int m_deformable_groups;
39 | 		bool m_bias;
40 | 		BatchNorm m_bn;
41 | 		bool m_activation; // relu
42 | 
43 | 		torch::Tensor m_weight;
44 | 	};
45 | 	TORCH_MODULE(DeformConv);
46 | }


--------------------------------------------------------------------------------
/Detectron2/Modules/Conv/ModulatedDeformConv.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <Detectron2/Modules/BatchNorm/BatchNorm.h>
 4 | 
 5 | namespace Detectron2
 6 | {
 7 | 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 8 | 	// converted from layers/deform_conv.py
 9 | 
10 | 	class ModulatedDeformConvImpl : public torch::nn::Module {
11 | 	public:
12 | 		/**
13 | 			Modulated deformable convolution from :paper:`deformconv2`.
14 | 
15 | 			Arguments are similar to :class:`Conv2D`. Extra arguments:
16 | 
17 | 			Args:
18 | 				deformable_groups (int): number of groups used in deformable convolution.
19 | 				norm (nn.Module, optional): a normalization layer
20 | 				activation (callable(Tensor) -> Tensor): a callable activation function
21 | 		*/
22 | 		ModulatedDeformConvImpl(int in_channels, int out_channels, int kernel_size, int stride, int padding,
23 | 			int dilation, int groups, int deformable_groups, bool bias, BatchNorm::Type norm, bool activation = false);
24 | 		void initialize(const ModelImporter &importer, const std::string &prefix, ModelImporter::Fill fill);
25 | 
26 | 		torch::Tensor forward(torch::Tensor x, torch::Tensor offset, torch::Tensor mask);
27 | 
28 | 		std::string extra_repr() const;
29 | 
30 | 	public:
31 | 		int m_in_channels;
32 | 		int m_out_channels;
33 | 		std::vector<int> m_kernel_size;
34 | 		int m_stride;
35 | 		int m_padding;
36 | 		int m_dilation;
37 | 		int m_groups;
38 | 		int m_deformable_groups;
39 | 		bool m_with_bias;
40 | 		BatchNorm m_bn;
41 | 		bool m_activation; // relu
42 | 
43 | 		torch::Tensor m_weight;
44 | 		torch::Tensor m_bias;
45 | 	};
46 | 	TORCH_MODULE(ModulatedDeformConv);
47 | }


--------------------------------------------------------------------------------
/Detectron2/Modules/FPN/LastLevelMaxPool.cpp:
--------------------------------------------------------------------------------
 1 | #include "Base.h"
 2 | #include "LastLevelMaxPool.h"
 3 | 
 4 | using namespace std;
 5 | using namespace torch;
 6 | using namespace Detectron2;
 7 | 
 8 | ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 9 | 
10 | LastLevelMaxPoolImpl::LastLevelMaxPoolImpl() {
11 | 	m_num_levels = 1;
12 | 	m_in_feature = "p5";
13 | }
14 | 
15 | void LastLevelMaxPoolImpl::initialize(const ModelImporter &importer, const std::string &prefix) {
16 | 	// do nothing
17 | }
18 | 
19 | TensorVec LastLevelMaxPoolImpl::forward(torch::Tensor x) {
20 | 	torch::nn::functional::MaxPool2dFuncOptions options(1);
21 | 	return { torch::nn::functional::max_pool2d(x, options.stride(2).padding(0)) };
22 | }
23 | 


--------------------------------------------------------------------------------
/Detectron2/Modules/FPN/LastLevelMaxPool.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "TopBlock.h"
 4 | 
 5 | namespace Detectron2
 6 | {
 7 | 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 8 | 	// converted from modeling/backbone/fpn.py
 9 | 
10 | 	// This module is used in the original FPN to generate a downsampled P6 feature from P5.
11 | 	class LastLevelMaxPoolImpl : public TopBlockImpl {
12 | 	public:
13 | 		LastLevelMaxPoolImpl();
14 | 
15 | 		virtual void initialize(const ModelImporter &importer, const std::string &prefix) override;
16 | 		virtual TensorVec forward(torch::Tensor x) override;
17 | 	};
18 | }


--------------------------------------------------------------------------------
/Detectron2/Modules/FPN/LastLevelP6P7.cpp:
--------------------------------------------------------------------------------
 1 | #include "Base.h"
 2 | #include "LastLevelP6P7.h"
 3 | 
 4 | using namespace std;
 5 | using namespace torch;
 6 | using namespace Detectron2;
 7 | 
 8 | ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 9 | 
10 | LastLevelP6P7Impl::LastLevelP6P7Impl(int64_t in_channels, int64_t out_channels, const char *in_feature) :
11 | 	m_p6(nn::Conv2dOptions(in_channels, out_channels, 3).stride(2).padding(1)),
12 | 	m_p7(nn::Conv2dOptions(out_channels, out_channels, 3).stride(2).padding(1))
13 | {
14 | 	register_module("p6", m_p6);
15 | 	register_module("p7", m_p7);
16 | 
17 | 	m_num_levels = 2;
18 | 	m_in_feature = in_feature;
19 | }
20 | 
21 | void LastLevelP6P7Impl::initialize(const ModelImporter &importer, const std::string &prefix) {
22 | 	importer.Import(prefix + ".p6", m_p6, ModelImporter::kCaffe2XavierFill);
23 | 	importer.Import(prefix + ".p7", m_p7, ModelImporter::kCaffe2XavierFill);
24 | }
25 | 
26 | TensorVec LastLevelP6P7Impl::forward(torch::Tensor c5) {
27 | 	auto x6 = m_p6(c5);
28 | 	auto x7 = m_p7(relu(x6));
29 | 	return { x6, x7 };
30 | }
31 | 


--------------------------------------------------------------------------------
/Detectron2/Modules/FPN/LastLevelP6P7.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "TopBlock.h"
 4 | 
 5 | namespace Detectron2
 6 | {
 7 | 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 8 | 	// converted from modeling/backbone/fpn.py
 9 | 
10 | 	// This module is used in RetinaNet to generate extra layers, P6 and P7 from C5 feature.
11 | 	class LastLevelP6P7Impl : public TopBlockImpl {
12 | 	public:
13 | 		LastLevelP6P7Impl(int64_t in_channels, int64_t out_channels, const char *in_feature);
14 | 
15 | 		virtual void initialize(const ModelImporter &importer, const std::string &prefix) override;
16 | 		virtual TensorVec forward(torch::Tensor c5) override;
17 | 
18 | 	private:
19 | 		torch::nn::Conv2d m_p6;
20 | 		torch::nn::Conv2d m_p7;
21 | 	};
22 | }


--------------------------------------------------------------------------------
/Detectron2/Modules/FPN/SemSegFPNHead.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <Detectron2/Structures/Instances.h>
 4 | #include <Detectron2/Structures/ShapeSpec.h>
 5 | #include <Detectron2/Modules/Conv/ConvBn2d.h>
 6 | 
 7 | namespace Detectron2
 8 | {
 9 | 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
10 | 	// converted from modeling/meta_arch/semantic_seg.py
11 | 
12 | 	/**
13 | 		A semantic segmentation head described in :paper:`PanopticFPN`.
14 | 		It takes FPN features as input and merges information from all
15 | 		levels of the FPN into single output.
16 | 	*/
17 | 	class SemSegFPNHeadImpl : public torch::nn::Module {
18 | 	public:
19 | 		SemSegFPNHeadImpl(CfgNode &cfg, const ShapeSpec::Map &input_shapes);
20 | 
21 | 		void initialize(const ModelImporter &importer, const std::string &prefix);
22 | 
23 | 		int ignore_value() const { return m_ignore_value; }
24 | 
25 | 		/**
26 | 			Returns:
27 | 				In training, returns (None, dict of losses)
28 | 				In inference, returns (CxHxW logits, {})
29 | 		*/
30 | 		std::tuple<torch::Tensor, TensorMap> forward(const TensorMap &features, const torch::Tensor &targets);
31 | 
32 | 	private:
33 | 		std::vector<std::string> m_in_features;
34 | 		int m_ignore_value;		// Label in the semantic segmentation ground truth that is ignored, i.e., no loss is
35 | 								// calculated for the correposnding pixel.
36 | 		int m_common_stride;	// Outputs from semantic - FPN heads are up - scaled to the COMMON_STRIDE stride.
37 | 		float m_loss_weight;
38 | 		torch::nn::functional::InterpolateFuncOptions m_interpolate_options;
39 | 
40 | 		std::vector<torch::nn::Sequential> m_scale_heads;
41 | 		ConvBn2d m_predictor{ nullptr };
42 | 
43 | 		torch::Tensor layers(const TensorMap &features);
44 | 		TensorMap losses(torch::Tensor predictions, torch::Tensor targets);
45 | 	};
46 | 	TORCH_MODULE(SemSegFPNHead);
47 | }


--------------------------------------------------------------------------------
/Detectron2/Modules/FPN/TopBlock.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <Detectron2/Detectron2.h>
 4 | 
 5 | namespace Detectron2
 6 | {
 7 | 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 8 | 	// converted from modeling/backbone/fpn.py
 9 | 
10 | 	class TopBlockImpl : public torch::nn::Module {
11 | 	public:
12 | 		virtual ~TopBlockImpl() {}
13 | 		virtual void initialize(const ModelImporter &importer, const std::string &prefix) = 0;
14 | 		virtual TensorVec forward(torch::Tensor x) = 0;
15 | 
16 | 		int num_levels() const { return m_num_levels; }
17 | 		std::string in_feature() const { return m_in_feature; }
18 | 
19 | 	protected:
20 | 		int m_num_levels;			// the number of extra FPN levels added by this block
21 | 		std::string m_in_feature;	// a string representing its input feature (e.g., p5).
22 | 	};
23 | 	TORCH_MODULE(TopBlock);
24 | }


--------------------------------------------------------------------------------
/Detectron2/Modules/Opeartors/DeformConvOp.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <Detectron2/Detectron2.h>
 4 | 
 5 | namespace Detectron2
 6 | {
 7 | 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 8 | 	// converted from layers/deform_conv.py
 9 | 
10 | 	class _DeformConv : public torch::autograd::Function<_DeformConv> {
11 | 	public:
12 | 		static torch::autograd::variable_list forward(torch::autograd::AutogradContext *ctx,
13 | 			torch::Tensor input, torch::Tensor offset, torch::Tensor weight,
14 | 			int64_t stride = 1, int64_t padding = 0, int64_t dilation = 1, int64_t groups = 1,
15 | 			int64_t deformable_groups = 1, int64_t im2col_step = 64);
16 | 
17 | 		//! @once_differentiable
18 | 		static torch::autograd::variable_list backward(torch::autograd::AutogradContext *ctx,
19 | 			torch::autograd::variable_list grad_output);
20 | 
21 | 	private:
22 | 		static std::vector<int64_t> _output_size(torch::Tensor input, torch::Tensor weight,
23 | 			const std::vector<int64_t> &stride, const std::vector<int64_t> &padding,
24 | 			const std::vector<int64_t> &dilation);
25 | 
26 | 		//! @lru_cache(maxsize=128)
27 | 		/**
28 | 			Calculate proper im2col step size, which should be divisible by input_size and not larger
29 | 			than prefer_size. Meanwhile the step size should be as large as possible to be more
30 | 			efficient. So we choose the largest one among all divisors of input_size which are smaller
31 | 			than prefer_size.
32 | 			:param input_size: input batch size .
33 | 			:param default_size: default preferred im2col step size.
34 | 			:return: the largest proper step size.
35 | 		*/
36 | 		static int64_t _cal_im2col_step(int64_t input_size, int64_t default_size);
37 | 	};
38 | 	using deform_conv = _DeformConv;
39 | }


--------------------------------------------------------------------------------
/Detectron2/Modules/Opeartors/ModulatedDeformConvOp.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <Detectron2/Detectron2.h>
 4 | 
 5 | namespace Detectron2
 6 | {
 7 | 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 8 | 	// converted from layers/deform_conv.py
 9 | 
10 | 	class _ModulatedDeformConv : public torch::autograd::Function<_ModulatedDeformConv> {
11 | 	public:
12 | 		static torch::autograd::variable_list forward(torch::autograd::AutogradContext *ctx,
13 | 			torch::Tensor input, torch::Tensor offset, torch::Tensor mask, torch::Tensor weight, torch::Tensor bias,
14 | 			int64_t stride = 1, int64_t padding = 0, int64_t dilation = 1, int64_t groups = 1,
15 | 			int64_t deformable_groups = 1);
16 | 
17 | 		static torch::autograd::variable_list backward(torch::autograd::AutogradContext *ctx,
18 | 			torch::autograd::variable_list grad_output);
19 | 
20 | 	private:
21 | 		static int _infer_shape(torch::autograd::AutogradContext *ctx, torch::Tensor input, torch::Tensor weight);
22 | 	};
23 | 	using modulated_deform_conv = _ModulatedDeformConv;
24 | }


--------------------------------------------------------------------------------
/Detectron2/Modules/Opeartors/NewEmptyTensorOp.cpp:
--------------------------------------------------------------------------------
 1 | #include "Base.h"
 2 | #include "NewEmptyTensorOp.h"
 3 | 
 4 | using namespace std;
 5 | using namespace torch;
 6 | using namespace Detectron2;
 7 | 
 8 | ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 9 | 
10 | torch::autograd::variable_list _NewEmptyTensorOp::forward(torch::autograd::AutogradContext *ctx,
11 | 	const torch::Tensor &x, IntArrayRef new_shape) {
12 | 	ctx->saved_data["shape"] = x.sizes();
13 | 	return { x.new_empty(new_shape) };
14 | }
15 | 
16 | torch::autograd::variable_list _NewEmptyTensorOp::backward(torch::autograd::AutogradContext *ctx,
17 | 	torch::autograd::variable_list grad) {
18 | 	auto shape = ctx->saved_data["shape"].toIntVector();
19 | 	return { grad[0].new_empty(shape), Tensor() };
20 | }
21 | 


--------------------------------------------------------------------------------
/Detectron2/Modules/Opeartors/NewEmptyTensorOp.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <Detectron2/Detectron2.h>
 4 | 
 5 | namespace Detectron2
 6 | {
 7 | 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 8 | 	// converted from layers/wrappers.py
 9 | 
10 | 	class _NewEmptyTensorOp : public torch::autograd::Function<_NewEmptyTensorOp> {
11 | 	public:
12 | 		static torch::autograd::variable_list forward(torch::autograd::AutogradContext *ctx,
13 | 			const torch::Tensor &x, torch::IntArrayRef new_shape);
14 | 
15 | 		static torch::autograd::variable_list backward(torch::autograd::AutogradContext *ctx,
16 | 			torch::autograd::variable_list grad);
17 | 	};
18 | }


--------------------------------------------------------------------------------
/Detectron2/Modules/ROIHeads/FastRCNNConvFCHead.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <Detectron2/Modules/Conv/ConvBn2d.h>
 4 | #include <Detectron2/Structures/ShapeSpec.h>
 5 | 
 6 | namespace Detectron2
 7 | {
 8 | 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 9 | 	// converted from modeling/roi_heads/box_head.py
10 | 
11 | 	// FastRCNNConvFCHead: makes box predictions from per-region features.
12 | 	class FastRCNNConvFCHeadImpl : public torch::nn::Module {
13 | 	public:
14 | 		// input_shape: shape of the input feature.
15 | 		FastRCNNConvFCHeadImpl(CfgNode &cfg, const ShapeSpec &input_shape);
16 | 		void initialize(const ModelImporter &importer, const std::string &prefix);
17 | 
18 | 		// ShapeSpec: the output feature shape
19 | 		ShapeSpec output_shape() const {
20 | 			return m_output_size;
21 | 		}
22 | 
23 | 		torch::Tensor forward(torch::Tensor x);
24 | 
25 | 	private:
26 | 		ShapeSpec m_output_size;
27 | 		std::vector<ConvBn2d> m_conv_norm_relus;
28 | 		std::vector<torch::nn::Linear> m_fcs;
29 | 	};
30 | 	TORCH_MODULE(FastRCNNConvFCHead);
31 | 	using BoxHead = FastRCNNConvFCHead;
32 | 
33 | 	//  Build a box head defined by `cfg.MODEL.ROI_BOX_HEAD.NAME`.
34 | 	BoxHead build_box_head(CfgNode &cfg, const ShapeSpec &input_shape);
35 | }


--------------------------------------------------------------------------------
/Detectron2/Modules/ROIHeads/KRCNNConvDeconvUpsampleHead.cpp:
--------------------------------------------------------------------------------
 1 | #include "Base.h"
 2 | #include "KRCNNConvDeconvUpsampleHead.h"
 3 | 
 4 | #include <Detectron2/Structures/Keypoints.h>
 5 | 
 6 | using namespace std;
 7 | using namespace torch;
 8 | using namespace Detectron2;
 9 | 
10 | ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
11 | 
12 | KRCNNConvDeconvUpsampleHeadImpl::KRCNNConvDeconvUpsampleHeadImpl(CfgNode &cfg, const ShapeSpec &input_shape) :
13 | 	BaseKeypointRCNNHeadImpl(cfg)
14 | {
15 | 	auto conv_dims = cfg["MODEL.ROI_KEYPOINT_HEAD.CONV_DIMS"].as<vector<int64_t>>();
16 | 
17 | 	// default up_scale to 2 (this can be made an option)
18 | 	int up_scale = 2;
19 | 	int in_channels = input_shape.channels;
20 | 
21 | 	for (int idx = 0; idx < conv_dims.size(); idx++) {
22 | 		auto &layer_channels = conv_dims[idx];
23 | 		auto module = ConvBn2d(nn::Conv2dOptions(in_channels, layer_channels, 3).stride(1).padding(1));
24 | 		register_module(FormatString("conv_fcn%d", idx + 1), module);
25 | 		m_blocks.push_back(module);
26 | 		in_channels = layer_channels;
27 | 	}
28 | 
29 | 	int deconv_kernel = 4;
30 | 	m_score_lowres = nn::ConvTranspose2d(nn::ConvTranspose2dOptions(in_channels, m_num_keypoints, deconv_kernel)
31 | 		.stride(2).padding(deconv_kernel / 2 - 1));
32 | 	register_module("score_lowres", m_score_lowres);
33 | 	m_up_scale = up_scale;
34 | }
35 | 
36 | void KRCNNConvDeconvUpsampleHeadImpl::initialize(const ModelImporter &importer, const std::string &prefix) {
37 | 	for (int i = 0; i < m_blocks.size(); i++) {
38 | 		m_blocks[i]->initialize(importer, prefix + FormatString(".conv_fcn%d", i + 1), ModelImporter::kCaffe2MSRAFill);
39 | 	}
40 | 	importer.Import(prefix + ".score_lowres", m_score_lowres, ModelImporter::kCaffe2MSRAFill);
41 | }
42 | 
43 | torch::Tensor KRCNNConvDeconvUpsampleHeadImpl::layers(torch::Tensor x) {
44 | 	for (auto &layer : m_blocks) {
45 | 		x = relu(layer(x));
46 | 	}
47 | 	x = m_score_lowres(x);
48 | 	auto options = nn::functional::InterpolateFuncOptions()
49 | 		.scale_factor(vector<double>{ (double)m_up_scale, (double)m_up_scale })
50 | 		.mode(torch::kBilinear).align_corners(false);
51 | 	x = Keypoints::interpolate(x, options);
52 | 	return x;
53 | }


--------------------------------------------------------------------------------
/Detectron2/Modules/ROIHeads/KRCNNConvDeconvUpsampleHead.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "BaseKeypointRCNNHead.h"
 4 | #include <Detectron2/Modules/Conv/ConvBn2d.h>
 5 | 
 6 | namespace Detectron2
 7 | {
 8 | 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 9 | 	// converted from modeling/roi_heads/keypoint_head.py
10 | 
11 | 	/**
12 | 		A standard keypoint head containing a series of 3x3 convs, followed by
13 | 		a transpose convolution and bilinear interpolation for upsampling.
14 | 	*/
15 | 	class KRCNNConvDeconvUpsampleHeadImpl : public BaseKeypointRCNNHeadImpl {
16 | 	public:
17 | 		/**
18 | 			NOTE: this interface is experimental.
19 | 
20 | 			Args:
21 | 				input_shape (ShapeSpec): shape of the input feature
22 | 				conv_dims: an iterable of output channel counts for each conv in the head
23 | 							 e.g. (512, 512, 512) for three convs outputting 512 channels.
24 | 		*/
25 | 		KRCNNConvDeconvUpsampleHeadImpl(CfgNode &cfg, const ShapeSpec &input_shape);
26 | 
27 | 		virtual void initialize(const ModelImporter &importer, const std::string &prefix) override;
28 | 		virtual torch::Tensor layers(torch::Tensor x) override;
29 | 
30 | 	private:
31 | 		int m_up_scale;
32 | 		std::vector<ConvBn2d> m_blocks;
33 | 		torch::nn::ConvTranspose2d m_score_lowres{ nullptr };
34 | 	};
35 | }


--------------------------------------------------------------------------------
/Detectron2/Modules/ROIHeads/MaskRCNNConvUpsampleHead.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "BaseMaskRCNNHead.h"
 4 | #include <Detectron2/Modules/Conv/ConvBn2d.h>
 5 | 
 6 | namespace Detectron2
 7 | {
 8 | 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 9 | 	// converted from modeling/roi_heads/mask_head.py
10 | 
11 | 	/**
12 | 		A mask head with several conv layers, plus an upsample layer (with `ConvTranspose2d`).
13 | 		Predictions are made with a final 1x1 conv layer.
14 | 	*/
15 | 	class MaskRCNNConvUpsampleHeadImpl : public BaseMaskRCNNHeadImpl {
16 | 	public:
17 | 		/**
18 | 			NOTE: this interface is experimental.
19 | 
20 | 			Args:
21 | 				input_shape (ShapeSpec): shape of the input feature
22 | 				num_classes (int): the number of classes. 1 if using class agnostic prediction.
23 | 				conv_dims (list[int]): a list of N>0 integers representing the output dimensions
24 | 					of N-1 conv layers and the last upsample layer.
25 | 				conv_norm (str or callable): normalization for the conv layers.
26 | 					See :func:`detectron2.layers.get_norm` for supported types.
27 | 		*/
28 | 		MaskRCNNConvUpsampleHeadImpl(CfgNode &cfg, const ShapeSpec &input_shape);
29 | 
30 | 		virtual void initialize(const ModelImporter &importer, const std::string &prefix) override;
31 | 		virtual torch::Tensor layers(torch::Tensor x) override;
32 | 
33 | 	private:
34 | 		int m_num_classes;
35 | 		std::vector<ConvBn2d> m_conv_norm_relus;
36 | 		torch::nn::ConvTranspose2d m_deconv{ nullptr };
37 | 		ConvBn2d m_predictor{ nullptr };
38 | 	};
39 | }


--------------------------------------------------------------------------------
/Detectron2/Modules/ROIHeads/RROIHeads.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "StandardROIHeads.h"
 4 | 
 5 | namespace Detectron2
 6 | {
 7 | 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 8 | 	// converted from modeling/roi_heads/rotated_fast_rcnn.py
 9 | 
10 | 	/**
11 | 		This class is used by Rotated Fast R-CNN to detect rotated boxes.
12 | 		For now, it only supports box predictions but not mask or keypoints.
13 | 	*/
14 | 	class RROIHeadsImpl : public StandardROIHeadsImpl {
15 | 	public:
16 | 		RROIHeadsImpl(CfgNode &cfg);
17 | 		void Create(CfgNode &cfg, const ShapeSpec::Map &input_shapes);
18 | 
19 | 		/**
20 | 			Prepare some proposals to be used to train the RROI heads.
21 | 			It performs box matching between `proposals` and `targets`, and assigns
22 | 			training labels to the proposals.
23 | 			It returns `self.batch_size_per_image` random samples from proposals and groundtruth boxes,
24 | 			with a fraction of positives that is no larger than `self.positive_sample_fraction.
25 | 
26 | 			Args:
27 | 				See :meth:`StandardROIHeads.forward`
28 | 
29 | 			Returns:
30 | 				list[Instances]: length `N` list of `Instances`s containing the proposals
31 | 					sampled for training. Each `Instances` has the following fields:
32 | 					- proposal_boxes: the rotated proposal boxes
33 | 					- gt_boxes: the ground-truth rotated boxes that the proposal is assigned to
34 | 					  (this is only meaningful if the proposal has a label > 0; if label = 0
35 | 					   then the ground-truth box is random)
36 | 					- gt_classes: the ground-truth classification lable for each proposal
37 | 		*/
38 | 		InstancesList label_and_sample_proposals(InstancesList &proposals, const InstancesList &targets);
39 | 
40 | 	private:
41 | 		virtual void _init_box_head(CfgNode &cfg, const ShapeSpec::Map &input_shapes) override;
42 | 	};
43 | }
44 | 


--------------------------------------------------------------------------------
/Detectron2/Modules/ROIHeads/Res5ROIHeads.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <Detectron2/Modules/ROIPooler/ROIPooler.h>
 4 | #include "ROIHeads.h"
 5 | #include "FastRCNNOutputLayers.h"
 6 | #include "BaseMaskRCNNHead.h"
 7 | 
 8 | namespace Detectron2
 9 | {
10 | 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
11 | 	// converted from modeling/roi_heads/roi_heads.py
12 | 
13 | 	// The ROIHeads in a typical "C4" R-CNN model, where the box and mask head share the cropping and the per-region
14 | 	// feature computation by a Res5 block.
15 | 	class Res5ROIHeadsImpl : public ROIHeadsImpl {
16 | 	public:
17 | 		Res5ROIHeadsImpl(CfgNode &cfg, const ShapeSpec::Map &input_shapes);
18 | 
19 | 		virtual void initialize(const ModelImporter &importer, const std::string &prefix) override;
20 | 
21 | 		virtual std::tuple<InstancesList, TensorMap> forward(const ImageList &images, const TensorMap &features,
22 | 			InstancesList &proposals, const InstancesList &targets = {}) override;
23 | 
24 | 		/**
25 | 			Use the given boxes in `instances` to produce other (non-box) per-ROI outputs.
26 | 
27 | 			Args:
28 | 				features: same as in `forward()`
29 | 				instances (list[Instances]): instances to predict other outputs. Expect the keys
30 | 					"pred_boxes" and "pred_classes" to exist.
31 | 
32 | 			Returns:
33 | 				instances (Instances):
34 | 					the same `Instances` object, with extra
35 | 					fields such as `pred_masks` or `pred_keypoints`.
36 | 		*/
37 | 		virtual InstancesList forward_with_given_boxes(const TensorMap &features, InstancesList &instances) override;
38 | 
39 | 	private:
40 | 		std::vector<std::string> m_in_features;
41 | 		bool m_mask_on;
42 | 		ROIPooler m_pooler{ nullptr };
43 | 		FastRCNNOutputLayers m_box_predictor{ nullptr };
44 | 		torch::nn::Sequential m_res5;
45 | 		MaskHead m_mask_head{ nullptr };
46 | 
47 | 		int _build_res5_block(CfgNode &cfg);
48 | 		torch::Tensor _shared_roi_transform(const TensorVec &features, const BoxesList &boxes);
49 | 		TensorVec select_features(const TensorMap &features);
50 | 	};
51 | 	TORCH_MODULE(Res5ROIHeads);
52 | }


--------------------------------------------------------------------------------
/Detectron2/Modules/ROIPooler/ROIAlign.cpp:
--------------------------------------------------------------------------------
 1 | #include "Base.h"
 2 | #include "ROIAlign.h"
 3 | 
 4 | #include <Detectron2/detectron2/ROIAlign/ROIAlign.h>
 5 | 
 6 | using namespace std;
 7 | using namespace torch;
 8 | using namespace Detectron2;
 9 | 
10 | ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
11 | 
12 | ROIAlignImpl::ROIAlignImpl(const Size2D &output_size, float spatial_scale, int sampling_ratio, bool aligned) :
13 | 	m_output_size(output_size),
14 | 	m_spatial_scale(spatial_scale),
15 | 	m_sampling_ratio(sampling_ratio),
16 | 	m_aligned(aligned)
17 | {
18 | }
19 | 
20 | Tensor ROIAlignImpl::forward(const Tensor &input, const Tensor &rois) {
21 | 	assert(rois.dim() == 2 and rois.size(1) == 5);
22 | 	return detectron2::ROIAlign_forward(input, rois, m_spatial_scale, m_output_size.height, m_output_size.width,
23 | 		m_sampling_ratio, m_aligned);
24 | }
25 | 
26 | std::string ROIAlignImpl::toString() const {
27 | 	std::string tmpstr = "ROIAlign(";
28 | 	tmpstr += "output_size=(" + torch::str(m_output_size.height) + ", " + torch::str(m_output_size.width) + ")";
29 | 	tmpstr += ", spatial_scale=" + torch::str(m_spatial_scale);
30 | 	tmpstr += ", sampling_ratio=" + torch::str(m_sampling_ratio);
31 | 	tmpstr += ", aligned=" + torch::str(m_aligned);
32 | 	tmpstr += ")";
33 | 	return tmpstr;
34 | }
35 | 


--------------------------------------------------------------------------------
/Detectron2/Modules/ROIPooler/ROIAlign.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "ROIPoolerLevel.h"
 4 | 
 5 | namespace Detectron2
 6 | {
 7 | 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 8 | 	// converted from layers/roi_align.py
 9 | 
10 | 	class ROIAlignImpl : public ROIPoolerLevelImpl {
11 | 	public:
12 | 		/**
13 | 			Args:
14 | 				output_size (tuple): h, w
15 | 				spatial_scale (float): scale the input boxes by this number
16 | 				sampling_ratio (int): number of inputs samples to take for each output
17 | 					sample. 0 to take samples densely.
18 | 				aligned (bool): if False, use the legacy implementation in
19 | 					Detectron. If True, align the results more perfectly.
20 | 
21 | 			Note:
22 | 				The meaning of aligned=True:
23 | 
24 | 				Given a continuous coordinate c, its two neighboring pixel indices (in our
25 | 				pixel model) are computed by floor(c - 0.5) and ceil(c - 0.5). For example,
26 | 				c=1.3 has pixel neighbors with discrete indices [0] and [1] (which are sampled
27 | 				from the underlying signal at continuous coordinates 0.5 and 1.5). But the original
28 | 				roi_align (aligned=False) does not subtract the 0.5 when computing neighboring
29 | 				pixel indices and therefore it uses pixels with a slightly incorrect alignment
30 | 				(relative to our pixel model) when performing bilinear interpolation.
31 | 
32 | 				With `aligned=True`,
33 | 				we first appropriately scale the ROI and then shift it by -0.5
34 | 				prior to calling roi_align. This produces the correct neighbors; see
35 | 				detectron2/tests/test_roi_align.py for verification.
36 | 
37 | 				The difference does not make a difference to the model's performance if
38 | 				ROIAlign is used together with conv layers.
39 | 		*/
40 | 		ROIAlignImpl(const Size2D &output_size, float spatial_scale, int sampling_ratio, bool aligned = true);
41 | 
42 | 		// input: NCHW images
43 | 		// rois : Bx5 boxes.First column is the index into N.The other 4 columns are xyxy.
44 | 		virtual torch::Tensor forward(const torch::Tensor &input, const torch::Tensor &rois) override;
45 | 		virtual std::string toString() const override;
46 | 
47 | 	private:
48 | 		Size2D m_output_size;
49 | 		float m_spatial_scale;
50 | 		int m_sampling_ratio;
51 | 		bool m_aligned;
52 | 	};
53 | }


--------------------------------------------------------------------------------
/Detectron2/Modules/ROIPooler/ROIAlignRotated.cpp:
--------------------------------------------------------------------------------
 1 | #include "Base.h"
 2 | #include "ROIAlignRotated.h"
 3 | 
 4 | #include <Detectron2/detectron2/ROIAlignRotated/ROIAlignRotated.h>
 5 | 
 6 | using namespace std;
 7 | using namespace torch;
 8 | using namespace Detectron2;
 9 | 
10 | ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
11 | 
12 | ROIAlignRotatedImpl::ROIAlignRotatedImpl(const Size2D &output_size, float spatial_scale,
13 | 	int sampling_ratio) :
14 | 	m_output_size(output_size),
15 | 	m_spatial_scale(spatial_scale),
16 | 	m_sampling_ratio(sampling_ratio)
17 | {
18 | }
19 | 
20 | Tensor ROIAlignRotatedImpl::forward(const Tensor &input, const Tensor &rois) {
21 | 	assert(rois.dim() == 2 and rois.size(1) == 6);
22 | 	return detectron2::ROIAlignRotated_forward(input, rois, m_spatial_scale,
23 | 		m_output_size.height, m_output_size.width, m_sampling_ratio);
24 | }
25 | 
26 | std::string ROIAlignRotatedImpl::toString() const {
27 | 	std::string tmpstr = "ROIAlign(";
28 | 	tmpstr += "output_size=(" + torch::str(m_output_size.height) + ", " + torch::str(m_output_size.width) + ")";
29 | 	tmpstr += ", spatial_scale=" + torch::str(m_spatial_scale);
30 | 	tmpstr += ", sampling_ratio=" + torch::str(m_sampling_ratio);
31 | 	tmpstr += ")";
32 | 	return tmpstr;
33 | }
34 | 


--------------------------------------------------------------------------------
/Detectron2/Modules/ROIPooler/ROIAlignRotated.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "ROIPoolerLevel.h"
 4 | 
 5 | namespace Detectron2
 6 | {
 7 | 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 8 | 	// converted from layers/roi_align_rotated.py
 9 | 
10 | 	class ROIAlignRotatedImpl : public ROIPoolerLevelImpl {
11 | 	public:
12 | 		/**
13 | 			Args:
14 | 				output_size (tuple): h, w
15 | 				spatial_scale (float): scale the input boxes by this number
16 | 				sampling_ratio (int): number of inputs samples to take for each output
17 | 					sample. 0 to take samples densely.
18 | 
19 | 			Note:
20 | 				ROIAlignRotated supports continuous coordinate by default:
21 | 				Given a continuous coordinate c, its two neighboring pixel indices (in our
22 | 				pixel model) are computed by floor(c - 0.5) and ceil(c - 0.5). For example,
23 | 				c=1.3 has pixel neighbors with discrete indices [0] and [1] (which are sampled
24 | 				from the underlying signal at continuous coordinates 0.5 and 1.5).
25 | 		*/
26 | 		ROIAlignRotatedImpl(const Size2D &output_size, float spatial_scale, int sampling_ratio);
27 | 
28 | 		// input: NCHW images
29 | 		// rois : Bx6 boxes.First column is the index into N. The other 5 columns are
30 | 		//		(x_ctr, y_ctr, width, height, angle_degrees).
31 | 		virtual torch::Tensor forward(const torch::Tensor &input, const torch::Tensor &rois) override;
32 | 		virtual std::string toString() const override;
33 | 
34 | 	private:
35 | 		Size2D m_output_size;
36 | 		float m_spatial_scale;
37 | 		int m_sampling_ratio;
38 | 	};
39 | }


--------------------------------------------------------------------------------
/Detectron2/Modules/ROIPooler/ROIPool.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "ROIPoolerLevel.h"
 4 | 
 5 | namespace Detectron2
 6 | {
 7 | 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 8 | 	// converted from https://github.com/pytorch/vision torchvision/ops/roi_pool.py
 9 | 
10 |     /**
11 | 		Performs Region of Interest (RoI) Pool operator described in Fast R-CNN
12 | 
13 | 		Arguments:
14 | 			input (Tensor[N, C, H, W]): input tensor
15 | 			boxes (Tensor[K, 5] or List[Tensor[L, 4]]): the box coordinates in (x1, y1, x2, y2)
16 | 				format where the regions will be taken from. If a single Tensor is passed,
17 | 				then the first column should contain the batch index. If a list of Tensors
18 | 				is passed, then each Tensor will correspond to the boxes for an element i
19 | 				in a batch
20 | 			output_size (int or Tuple[int, int]): the size of the output after the cropping
21 | 				is performed, as (height, width)
22 | 			spatial_scale (float): a scaling factor that maps the input coordinates to
23 | 				the box coordinates. Default: 1.0
24 | 
25 | 		Returns:
26 | 			output (Tensor[K, C, output_size[0], output_size[1]])
27 | 	*/
28 | 	torch::Tensor roi_pool(const torch::Tensor &input, const torch::Tensor &boxes, const Size2D &output_size,
29 | 		float spatial_scale = 1.0);
30 | 	torch::Tensor roi_pool(const torch::Tensor &input, const BoxesList &boxes, const Size2D &output_size,
31 | 		float spatial_scale = 1.0);
32 | 
33 | 	class RoIPoolImpl : public ROIPoolerLevelImpl {
34 | 	public:
35 | 		RoIPoolImpl(const Size2D &output_size, float spatial_scale);
36 | 
37 | 		virtual torch::Tensor forward(const torch::Tensor &input, const torch::Tensor &rois) override;
38 | 		virtual std::string toString() const override;
39 | 
40 | 	private:
41 | 		Size2D m_output_size;
42 | 		float m_spatial_scale;
43 | 	};
44 | }


--------------------------------------------------------------------------------
/Detectron2/Modules/ROIPooler/ROIPoolerLevel.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <Detectron2/Detectron2.h>
 4 | 
 5 | namespace Detectron2
 6 | {
 7 | 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 8 | 
 9 | 	class ROIPoolerLevelImpl : public torch::nn::Module {
10 | 	public:
11 | 		virtual ~ROIPoolerLevelImpl() {}
12 | 		virtual torch::Tensor forward(const torch::Tensor &input, const torch::Tensor &rois) = 0;
13 | 		virtual std::string toString() const = 0;
14 | 	};
15 | 	TORCH_MODULE(ROIPoolerLevel);
16 | }


--------------------------------------------------------------------------------
/Detectron2/Modules/RPN/AnchorGenerator.cpp:
--------------------------------------------------------------------------------
 1 | #include "Base.h"
 2 | #include "AnchorGenerator.h"
 3 | 
 4 | #include "DefaultAnchorGenerator.h"
 5 | #include "RotatedAnchorGenerator.h"
 6 | 
 7 | using namespace std;
 8 | using namespace torch;
 9 | using namespace Detectron2;
10 | 
11 | ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
12 | 
13 | AnchorGenerator Detectron2::build_anchor_generator(CfgNode &cfg, const ShapeSpec::Vec &input_shapes) {
14 | 	auto anchor_generator = cfg["MODEL.ANCHOR_GENERATOR.NAME"].as<string>();
15 | 	if (anchor_generator == "DefaultAnchorGenerator") {
16 | 		return shared_ptr<AnchorGeneratorImpl>(new DefaultAnchorGeneratorImpl(cfg, input_shapes));
17 | 	}
18 | 	if (anchor_generator == "RotatedAnchorGenerator") {
19 | 		return shared_ptr<AnchorGeneratorImpl>(new RotatedAnchorGeneratorImpl(cfg, input_shapes));
20 | 	}
21 | 	assert(false);
22 | 	return nullptr;
23 | }
24 | 
25 | ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
26 | 
27 | TensorVec AnchorGeneratorImpl::_create_grid_offsets(const pair<int, int> &size, int stride, float offset,
28 | 	torch::Device device) {
29 | 	int grid_height = size.first;
30 | 	int grid_width = size.second;
31 | 
32 | 	auto options = TensorOptions(torch::kFloat32).device(device);
33 | 	auto shifts_x = torch::arange(offset * stride, grid_width * stride, stride, options);
34 | 	auto shifts_y = torch::arange(offset * stride, grid_height * stride, stride, options);
35 | 	auto vars = torch::meshgrid({ shifts_y, shifts_x });
36 | 
37 | 	auto shift_y = vars[0];
38 | 	auto shift_x = vars[1];
39 | 	shift_x = shift_x.reshape(-1);
40 | 	shift_y = shift_y.reshape(-1);
41 | 	return { shift_x, shift_y };
42 | }
43 | 
44 | vector<vector<float>> AnchorGeneratorImpl::_broadcast_params(const vector<vector<float>> &params, int num_features) {
45 | 	assert(!params.empty());
46 | 	if (params.size() == 1) {
47 | 		return vector<vector<float>>(num_features, params[0]);
48 | 	}
49 | 	assert(params.size() == num_features);
50 | 	return params;
51 | }
52 | 
53 | vector<vector<float>> AnchorGeneratorImpl::_broadcast_params(const vector<float> &params, int num_features) {
54 | 	return vector<vector<float>>(num_features, params);
55 | }
56 | 
57 | void AnchorGeneratorImpl::register_cell_anchors(const TensorVec &cell_anchors) {
58 | 	for (int i = 0; i < cell_anchors.size(); i++) {
59 | 		register_buffer(FormatString("%d", i), cell_anchors[i]);
60 | 	}
61 | }
62 | 


--------------------------------------------------------------------------------
/Detectron2/Modules/RPN/DefaultAnchorGenerator.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "AnchorGenerator.h"
 4 | 
 5 | namespace Detectron2
 6 | {
 7 | 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 8 | 	// converted from modeling/anchor_generator.py
 9 | 
10 | 	// DefaultAnchorGenerator: Computes anchors in the standard ways described in https://arxiv.org/abs/1506.01497
11 | 	class DefaultAnchorGeneratorImpl : public AnchorGeneratorImpl {
12 | 	public:
13 | 		/**
14 | 			sizes: list of anchor sizes (i.e. sqrt of anchor area) to use for the i-th feature map. Anchor sizes are
15 | 				given in absolute lengths in units of the input image; they do not dynamically scale if input image
16 | 				size changes.
17 | 			aspect_ratios: list of aspect ratios (i.e. height / width) to use for anchors. Same "broadcast" rule for
18 | 				`sizes` applies.
19 | 			strides: stride of each input feature.
20 | 			offset: Relative offset between the center of the first anchor and the top-left corner of the image. Value
21 | 				has to be in [0, 1). Recommend to use 0.5, which means half stride.
22 | 		*/
23 | 		DefaultAnchorGeneratorImpl(CfgNode &cfg, const ShapeSpec::Vec &input_shapes);
24 | 		DefaultAnchorGeneratorImpl(const std::vector<int> &strides, const std::vector<std::vector<float>> &sizes,
25 | 			const std::vector<std::vector<float>> &aspect_ratios, float offset = 0.5);
26 | 
27 | 		virtual std::vector<int> num_anchors() const override;
28 | 		virtual void initialize(const ModelImporter &importer, const std::string &prefix) override;
29 | 		virtual BoxesList forward(const TensorVec &features) override;
30 | 
31 | 	private:
32 | 		std::vector<std::vector<float>> m_sizes;
33 | 		std::vector<std::vector<float>> m_aspect_ratios;
34 | 		std::vector<int> m_strides;
35 | 		int m_num_features;
36 | 		float m_offset;
37 | 
38 | 		// num_features of tensors of shape(len(sizes) * len(aspect_ratios), 4) storing anchor boxes in XYXY format.
39 | 		TensorVec m_cell_anchors;
40 | 	};
41 | }


--------------------------------------------------------------------------------
/Detectron2/Modules/RPN/RotatedAnchorGenerator.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "AnchorGenerator.h"
 4 | 
 5 | namespace Detectron2
 6 | {
 7 | 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 8 | 	// converted from modeling/anchor_generator.py
 9 | 
10 | 	// RotatedAnchorGenerator: Computes rotated anchors used by Rotated RPN (RRPN),
11 | 	//   described in https://arxiv.org/abs/1703.01086 "Arbitrary-Oriented Scene Text Detection via Rotation Proposals"
12 | 	class RotatedAnchorGeneratorImpl : public AnchorGeneratorImpl {
13 | 	public:
14 | 		// angles: list of angles (in degrees CCW) to use for anchors. Same "broadcast" rule for `sizes` applies.
15 | 		RotatedAnchorGeneratorImpl(CfgNode &cfg, const ShapeSpec::Vec &input_shapes);
16 | 		RotatedAnchorGeneratorImpl(const std::vector<int> &strides, const std::vector<std::vector<float>> &sizes,
17 | 			const std::vector<std::vector<float>> &aspect_ratios, const std::vector<std::vector<float>> &angles,
18 | 			float offset = 0.5);
19 | 
20 | 		virtual std::vector<int> num_anchors() const override;
21 | 		virtual void initialize(const ModelImporter &importer, const std::string &prefix) override;
22 | 		virtual BoxesList forward(const TensorVec &features) override;
23 | 
24 | 	private:
25 | 		std::vector<std::vector<float>> m_sizes;
26 | 		std::vector<std::vector<float>> m_aspect_ratios;
27 | 		std::vector<std::vector<float>> m_angles;
28 | 		std::vector<int> m_strides;
29 | 		int m_num_features;
30 | 		float m_offset;
31 | 
32 | 		// num_features of tensors of shape (len(sizes) * len(aspect_ratios) * len(angles), 5)
33 | 		//   storing anchor boxes in(x_ctr, y_ctr, w, h, angle) format.
34 | 		TensorVec m_cell_anchors;
35 | 	};
36 | }


--------------------------------------------------------------------------------
/Detectron2/Modules/RPN/StandardRPNHead.cpp:
--------------------------------------------------------------------------------
 1 | #include "Base.h"
 2 | #include "StandardRPNHead.h"
 3 | 
 4 | using namespace std;
 5 | using namespace torch;
 6 | using namespace Detectron2;
 7 | 
 8 | ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 9 | 
10 | RPNHead Detectron2::build_rpn_head(CfgNode &cfg, const ShapeSpec::Vec &input_shapes) {
11 | 	auto name = cfg["MODEL.RPN.HEAD_NAME"].as<string>();
12 | 	if (name == "StandardRPNHead") {
13 | 		return make_shared<StandardRPNHeadImpl>(cfg, input_shapes);
14 | 	}
15 | 	assert(false);
16 | 	return nullptr;
17 | }
18 | 
19 | ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
20 | 
21 | StandardRPNHeadImpl::StandardRPNHeadImpl(CfgNode &cfg, const ShapeSpec::Vec &input_shapes) {
22 | 	auto anchor_generator = build_anchor_generator(cfg, input_shapes);
23 | 	int box_dim = anchor_generator->box_dim();
24 | 	auto anchors = anchor_generator->num_anchors();
25 | 	int num_anchors = anchors[0];
26 | 	for (int i = 1; i < anchors.size(); i++) {
27 | 		assert(anchors[i] == num_anchors);
28 | 	}
29 | 
30 | 	auto in_channels = ShapeSpec::channels_single(input_shapes);
31 | 	m_conv = ConvBn2d(nn::Conv2dOptions(in_channels, in_channels, 3).padding(1));
32 | 	register_module("conv", m_conv);
33 | 	m_objectness_logits = ConvBn2d(nn::Conv2dOptions(in_channels, num_anchors, 1));
34 | 	register_module("objectness_logits", m_objectness_logits);
35 | 	m_anchor_deltas = ConvBn2d(nn::Conv2dOptions(in_channels, num_anchors * box_dim, 1));
36 | 	register_module("anchor_deltas", m_anchor_deltas);
37 | }
38 | 
39 | void StandardRPNHeadImpl::initialize(const ModelImporter &importer, const std::string &prefix) {
40 | 	m_conv->initialize(importer, prefix + ".conv", ModelImporter::kNormalFill2);
41 | 	m_objectness_logits->initialize(importer, prefix + ".objectness_logits", ModelImporter::kNormalFill2);
42 | 	m_anchor_deltas->initialize(importer, prefix + ".anchor_deltas", ModelImporter::kNormalFill2);
43 | }
44 | 
45 | vector<TensorVec> StandardRPNHeadImpl::forward(const TensorVec &features) {
46 | 	TensorVec pred_objectness_logits;
47 | 	TensorVec pred_anchor_deltas;
48 | 	for (auto x : features) {
49 | 		x = relu(m_conv(x));
50 | 		pred_objectness_logits.push_back(m_objectness_logits(x));
51 | 		pred_anchor_deltas.push_back(m_anchor_deltas(x));
52 | 	}
53 | 	return { pred_objectness_logits, pred_anchor_deltas };
54 | }
55 | 


--------------------------------------------------------------------------------
/Detectron2/Modules/ResNet/BasicBlock.cpp:
--------------------------------------------------------------------------------
 1 | #include "Base.h"
 2 | #include "BasicBlock.h"
 3 | 
 4 | using namespace std;
 5 | using namespace torch;
 6 | using namespace Detectron2;
 7 | 
 8 | ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 9 | 
10 | BasicBlockImpl::BasicBlockImpl(int in_channels, int out_channels, int stride, BatchNorm::Type norm) :
11 | 	CNNBlockBaseImpl(in_channels, out_channels, stride),
12 | 	m_convbn1(nn::Conv2dOptions(in_channels, out_channels, 3).stride(stride).padding(1).bias(false), norm),
13 | 	m_convbn2(nn::Conv2dOptions(out_channels, out_channels, 3).stride(stride).padding(1).bias(false), norm) {
14 | 	register_module("conv1", m_convbn1);
15 | 	register_module("conv2", m_convbn2);
16 | 	if (in_channels != out_channels) {
17 | 		m_shortcut = ConvBn2d(nn::Conv2dOptions(in_channels, out_channels, 1).stride(stride).bias(false), norm);
18 | 		register_module("shortcut", m_shortcut);
19 | 	}
20 | }
21 | 
22 | void BasicBlockImpl::initialize(const ModelImporter &importer, const std::string &prefix) {
23 | 	if (m_shortcut) {
24 | 		m_shortcut->initialize(importer, prefix + ".shortcut", ModelImporter::kCaffe2MSRAFill);
25 | 	}
26 | 	m_convbn1->initialize(importer, prefix + ".conv1", ModelImporter::kCaffe2MSRAFill);
27 | 	m_convbn2->initialize(importer, prefix + ".conv2", ModelImporter::kCaffe2MSRAFill);
28 | }
29 | 
30 | torch::Tensor BasicBlockImpl::forward(torch::Tensor x) {
31 | 	auto out = m_convbn1(x);
32 | 	out = relu(out);
33 | 	out = m_convbn2(out);
34 | 
35 | 	torch::Tensor shortcut;
36 | 	if (m_shortcut) {
37 | 		shortcut = m_shortcut(x);
38 | 	}
39 | 	else {
40 | 		shortcut = x;
41 | 	}
42 | 
43 | 	out += shortcut;
44 | 	out = relu(out);
45 | 	return out;
46 | }
47 | 


--------------------------------------------------------------------------------
/Detectron2/Modules/ResNet/BasicBlock.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "CNNBlockBase.h"
 4 | 
 5 | namespace Detectron2
 6 | {
 7 | 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 8 | 	// converted from modeling/backbone/resnet.py
 9 | 
10 | 	/**
11 | 		The basic residual block for ResNet-18 and ResNet-34 defined in :paper:`ResNet`,
12 | 		with two 3x3 conv layers and a projection shortcut if needed.
13 | 	*/
14 | 	class BasicBlockImpl : public CNNBlockBaseImpl {
15 | 	public:
16 | 		/**
17 | 			in_channels (int): Number of input channels.
18 | 			out_channels (int): Number of output channels.
19 | 			stride (int): Stride for the first conv.
20 | 			norm (str or callable): normalization for all conv layers.
21 | 				See :func:`layers.get_norm` for supported format.
22 | 		*/
23 | 		BasicBlockImpl(int in_channels, int out_channels, int stride, BatchNorm::Type norm);
24 | 
25 | 		virtual void initialize(const ModelImporter &importer, const std::string &prefix) override;
26 | 		virtual torch::Tensor forward(torch::Tensor x) override;
27 | 
28 | 	private:
29 | 		ConvBn2d m_shortcut{ nullptr };
30 | 		ConvBn2d m_convbn1;
31 | 		ConvBn2d m_convbn2;
32 | 	};
33 | }


--------------------------------------------------------------------------------
/Detectron2/Modules/ResNet/BasicStem.cpp:
--------------------------------------------------------------------------------
 1 | #include "Base.h"
 2 | #include "BasicStem.h"
 3 | 
 4 | using namespace std;
 5 | using namespace torch;
 6 | using namespace Detectron2;
 7 | 
 8 | ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 9 | 
10 | BasicStemImpl::BasicStemImpl(int in_channels, int out_channels, BatchNorm::Type norm)
11 | 	: CNNBlockBaseImpl(in_channels, out_channels, 4), m_in_channels(in_channels),
12 | 	m_convbn1(nn::Conv2dOptions(in_channels, out_channels, 7).stride(2).padding(3).bias(false), norm) {
13 | 	register_module("conv1", m_convbn1);
14 | }
15 | 
16 | void BasicStemImpl::initialize(const ModelImporter &importer, const std::string &prefix) {
17 | 	m_convbn1->initialize(importer, prefix + ".conv1", ModelImporter::kCaffe2MSRAFill);
18 | }
19 | 
20 | torch::Tensor BasicStemImpl::forward(torch::Tensor x) {
21 | 	x = m_convbn1(x);
22 | 	x = relu_(x);
23 | 	x = max_pool2d(x, 3, 2, 1);
24 | 	return x;
25 | }
26 | 


--------------------------------------------------------------------------------
/Detectron2/Modules/ResNet/BasicStem.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "CNNBlockBase.h"
 4 | 
 5 | namespace Detectron2
 6 | {
 7 | 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 8 | 	// converted from modeling/backbone/resnet.py
 9 | 
10 | 	// The standard ResNet stem (layers before the first residual block).
11 | 	class BasicStemImpl : public CNNBlockBaseImpl {
12 | 	public:
13 | 		// norm (str or callable): norm after the first conv layer.
14 | 		//   See : func:`layers.get_norm` for supported format.
15 | 		BasicStemImpl(int in_channels, int out_channels, BatchNorm::Type norm);
16 | 
17 | 		virtual void initialize(const ModelImporter &importer, const std::string &prefix) override;
18 | 		virtual torch::Tensor forward(torch::Tensor x) override;
19 | 
20 | 	private:
21 | 		int m_in_channels;
22 | 
23 | 		ConvBn2d m_convbn1;
24 | 	};
25 | 	TORCH_MODULE(BasicStem);
26 | }


--------------------------------------------------------------------------------
/Detectron2/Modules/ResNet/BottleneckBlock.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "CNNBlockBase.h"
 4 | 
 5 | namespace Detectron2
 6 | {
 7 | 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 8 | 	// converted from modeling/backbone/resnet.py
 9 | 
10 | 	/**
11 | 		The standard bottleneck residual block used by ResNet-50, 101 and 152
12 | 		defined in :paper:`ResNet`.  It contains 3 conv layers with kernels
13 | 		1x1, 3x3, 1x1, and a projection shortcut if needed.
14 | 	*/
15 | 	class BottleneckBlockImpl : public CNNBlockBaseImpl {
16 | 	public:
17 | 		/**
18 | 			bottleneck_channels (int): number of output channels for the 3x3
19 | 				"bottleneck" conv layers.
20 | 			num_groups (int): number of groups for the 3x3 conv layer.
21 | 			norm (str or callable): normalization for all conv layers.
22 | 				See :func:`layers.get_norm` for supported format.
23 | 			stride_in_1x1 (bool): when stride>1, whether to put stride in the
24 | 				first 1x1 convolution or the bottleneck 3x3 convolution.
25 | 			dilation (int): the dilation rate of the 3x3 conv layer.
26 | 		*/
27 | 		BottleneckBlockImpl(int in_channels, int out_channels, int bottleneck_channels, int stride,
28 | 			int num_groups, BatchNorm::Type norm, bool stride_in_1x1, int dilation = 1);
29 | 
30 | 		virtual void initialize(const ModelImporter &importer, const std::string &prefix) override;
31 | 		virtual torch::Tensor forward(torch::Tensor x) override;
32 | 
33 | 	private:
34 | 		ConvBn2d m_shortcut{ nullptr };
35 | 		ConvBn2d m_convbn1;
36 | 		ConvBn2d m_convbn2;
37 | 		ConvBn2d m_convbn3;
38 | 	};
39 | }


--------------------------------------------------------------------------------
/Detectron2/Modules/ResNet/CNNBlockBase.cpp:
--------------------------------------------------------------------------------
 1 | #include "Base.h"
 2 | #include "CNNBlockBase.h"
 3 | 
 4 | #include <Detectron2/Modules/BatchNorm/FrozenBatchNorm2d.h>
 5 | 
 6 | using namespace std;
 7 | using namespace torch;
 8 | using namespace Detectron2;
 9 | 
10 | ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
11 | 
12 | CNNBlockBaseImpl::CNNBlockBaseImpl(int in_channels, int out_channels, int stride) :
13 | 	m_in_channels(in_channels), m_out_channels(out_channels), m_stride(stride) {
14 | }
15 | 
16 | void CNNBlockBaseImpl::freeze() {
17 | 	for (auto p : parameters()) {
18 | 		p.set_requires_grad(false);
19 | 	}
20 | 	auto self = shared_from_this();
21 | 	FrozenBatchNorm2dImpl::convert_frozen_batchnorm(self);
22 | }
23 | 


--------------------------------------------------------------------------------
/Detectron2/Modules/ResNet/CNNBlockBase.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <Detectron2/Structures/ShapeSpec.h>
 4 | #include <Detectron2/Modules/Conv/ConvBn2d.h>
 5 | 
 6 | namespace Detectron2
 7 | {
 8 | 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 9 | 	// converted from layers/blocks.py
10 | 
11 | 	/**
12 | 		A CNN block is assumed to have input channels, output channels and a stride.
13 | 		The input and output of `forward()` method must be NCHW tensors.
14 | 		The method can perform arbitrary computation but must match the given
15 | 		channels and stride specification.
16 | 	*/
17 | 	class CNNBlockBaseImpl : public torch::nn::Module {
18 | 	public:
19 | 		/**
20 | 			The `__init__` method of any subclass should also contain these arguments.
21 | 
22 | 			Args:
23 | 				in_channels (int):
24 | 				out_channels (int):
25 | 				stride (int):
26 | 		*/
27 | 		CNNBlockBaseImpl(int in_channels, int out_channels, int stride);
28 | 		virtual ~CNNBlockBaseImpl() {}
29 | 
30 | 		int stride() const { return m_stride; }
31 | 		int out_channels() const { return m_out_channels; }
32 | 
33 | 		virtual void initialize(const ModelImporter &importer, const std::string &prefix) = 0;
34 | 		virtual torch::Tensor forward(torch::Tensor x) = 0;
35 | 
36 | 		/**
37 | 			Make this block not trainable.
38 | 			This method sets all parameters to `requires_grad=False`,
39 | 			and convert all BatchNorm layers to FrozenBatchNorm
40 | 		*/
41 | 		void freeze();
42 | 
43 | 	protected:
44 | 		int m_in_channels;
45 | 		int m_out_channels;
46 | 		int m_stride;
47 | 	};
48 | 	TORCH_MODULE(CNNBlockBase);
49 | }


--------------------------------------------------------------------------------
/Detectron2/Modules/ResNet/DeformBottleneckBlock.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "CNNBlockBase.h"
 4 | 
 5 | namespace Detectron2
 6 | {
 7 | 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 8 | 	// converted from modeling/backbone/resnet.py
 9 | 
10 | 	// Similar to :class:`BottleneckBlock`, but with :paper:`deformable conv <deformconv>` in the 3x3 convolution.
11 | 	class DeformBottleneckBlockImpl : public CNNBlockBaseImpl {
12 | 	public:
13 | 		DeformBottleneckBlockImpl(int in_channels, int out_channels, int bottleneck_channels, int stride,
14 | 			int num_groups, BatchNorm::Type norm, bool stride_in_1x1, int dilation,
15 | 			bool deform_modulated, int deform_num_groups);
16 | 
17 | 		virtual void initialize(const ModelImporter &importer, const std::string &prefix) override;
18 | 		virtual torch::Tensor forward(torch::Tensor x) override;
19 | 
20 | 	private:
21 | 		bool m_deform_modulated;
22 | 
23 | 		ConvBn2d m_shortcut{ nullptr };
24 | 		ConvBn2d m_convbn1;
25 | 		ConvBn2d m_convbn2_offset;
26 | 		ModulePtr m_convbn2{ nullptr };
27 | 		ConvBn2d m_convbn3;
28 | 	};
29 | }


--------------------------------------------------------------------------------
/Detectron2/Structures/BitMasks.cpp:
--------------------------------------------------------------------------------
 1 | #include "Base.h"
 2 | #include "BitMasks.h"
 3 | 
 4 | #include <Detectron2/Modules/ROIPooler/ROIAlign.h>
 5 | 
 6 | using namespace std;
 7 | using namespace torch;
 8 | using namespace Detectron2;
 9 | 
10 | ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
11 | 
12 | BitMasks::BitMasks(const torch::Tensor &tensor) {
13 | 	m_tensor = tensor.to(torch::kBool);
14 | 	assert(m_tensor.dim() == 3);
15 | 	m_image_size = { (int)m_tensor.size(1), (int)m_tensor.size(2) };
16 | }
17 | 
18 | BitMasks::BitMasks(const BitMasks &bitmasks) : m_tensor(bitmasks.m_tensor), m_image_size(bitmasks.m_image_size) {
19 | }
20 | 
21 | BitMasks BitMasks::operator[](int64_t item) const {
22 | 	return m_tensor[item].view({ 1, -1 });
23 | }
24 | 
25 | BitMasks BitMasks::operator[](ArrayRef<torch::indexing::TensorIndex> item) const {
26 | 	auto m = m_tensor.index(item);
27 | 	assert(m.dim() == 3);
28 | 	return m;
29 | }
30 | 
31 | std::string BitMasks::toString() const {
32 | 	string s = "BitMasks(";
33 | 	s += FormatString("num_instances=%d)", size());
34 | 	return s;
35 | }
36 | 
37 | SequencePtr BitMasks::slice(int64_t start, int64_t end) const {
38 | 	auto sliced = m_tensor.slice(0, start, end);
39 | 	return std::shared_ptr<BitMasks>(new BitMasks(sliced));
40 | }
41 | 
42 | SequencePtr BitMasks::index(torch::Tensor item) const {
43 | 	auto selected = m_tensor.index(item);
44 | 	return std::shared_ptr<BitMasks>(new BitMasks(selected));
45 | }
46 | 
47 | SequencePtr BitMasks::cat(const std::vector<SequencePtr> &seqs, int total) const {
48 | 	TensorVec tensors;
49 | 	tensors.reserve(seqs.size());
50 | 	for (auto &seq : seqs) {
51 | 		Tensor t = dynamic_pointer_cast<BitMasks>(seq)->m_tensor;
52 | 		tensors.push_back(t);
53 | 	}
54 | 	auto aggregated = torch::cat(tensors);
55 | 	assert(aggregated.size(0) == total);
56 | 	return std::shared_ptr<BitMasks>(new BitMasks(aggregated));
57 | }
58 | 
59 | torch::Tensor BitMasks::crop_and_resize(torch::Tensor boxes, int mask_size) {
60 | 	assert(boxes.size(0) == size());
61 | 	auto device = m_tensor.device();
62 | 
63 | 	auto batch_inds = torch::arange(size(), device).to(boxes.dtype()).index({ Colon, None });
64 | 	auto rois = torch::cat({ batch_inds, boxes }, 1);  // Nx5
65 | 
66 | 	auto bit_masks = m_tensor.to(torch::kFloat32);
67 | 	rois = rois.to(device);
68 | 	auto output = (
69 | 		ROIAlignImpl({ mask_size, mask_size }, 1.0, 0, true)
70 | 		.forward(bit_masks.index({ Colon, None, Colon, Colon }), rois)
71 | 		.squeeze(1)
72 | 		);
73 | 	output = (output >= 0.5);
74 | 	return output;
75 | }
76 | 


--------------------------------------------------------------------------------
/Detectron2/Structures/GenericMask.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "PolygonMasks.h"
 4 | 
 5 | namespace Detectron2
 6 | {
 7 | 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 8 | 	// converted from utils/visualizer.py
 9 | 
10 | 	/**
11 | 		Attribute:
12 | 			polygons (list[ndarray]): list[ndarray]: polygons for this mask.
13 | 				Each ndarray has format [x, y, x, y, ...]
14 | 			mask (ndarray): a binary mask
15 | 	*/
16 | 	class GenericMask {
17 | 	public:
18 | 		static torch::Tensor toCocoMask(const std::vector<std::shared_ptr<GenericMask>> &masks);
19 | 
20 | 		static std::tuple<TensorVec, int> mask_to_polygons(const torch::Tensor &mask);
21 | 
22 | 		static std::vector<std::shared_ptr<GenericMask>>
23 | 			_convert_masks(const BitMasks &m, int height, int width);
24 | 		static std::vector<std::shared_ptr<GenericMask>>
25 | 			_convert_masks(const PolygonMasks &m, int height, int width);
26 | 
27 | 	public:
28 | 		GenericMask(const torch::Tensor &mask, int height, int width);
29 | 		GenericMask(const TensorVec &polygons, int height, int width);
30 | 		GenericMask(const mask_util::MaskObject &obj, int height, int width);
31 | 
32 | 		torch::Tensor polygons_to_mask(const TensorVec &polygons);
33 | 
34 | 		torch::Tensor mask();
35 | 		TensorVec polygons();
36 | 		bool has_holes();
37 | 
38 | 		float area() const {
39 | 			return m_mask.sum().item<float>();
40 | 		}
41 | 
42 | 		torch::Tensor bbox() const;
43 | 
44 | 	private:
45 | 		int m_height;
46 | 		int m_width;
47 | 		torch::Tensor m_mask;
48 | 		TensorVec m_polygons;
49 | 		bool m_has_mask;
50 | 		bool m_has_polygons;
51 | 		int m_has_holes;
52 | 	};
53 | }
54 | 


--------------------------------------------------------------------------------
/Detectron2/Structures/ImageList.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <Detectron2/Detectron2.h>
 4 | 
 5 | namespace Detectron2
 6 | {
 7 | 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 8 | 	// converted from structures/image_list.py
 9 | 
10 | 	/**
11 | 		Structure that holds a list of images (of possibly
12 | 		varying sizes) as a single tensor.
13 | 		This works by padding the images to the same size,
14 | 		and storing in a field the original sizes of each image
15 | 
16 | 		Attributes:
17 | 			image_sizes (list[tuple[int, int]]): each tuple is (h, w)
18 | 	*/
19 | 	class ImageList {
20 | 	public:
21 | 		/**
22 | 			Args:
23 | 				tensors: a tuple or list of `torch.Tensors`, each of shape (Hi, Wi) or
24 | 					(C_1, ..., C_K, Hi, Wi) where K >= 1. The Tensors will be padded
25 | 					to the same shape with `pad_value`.
26 | 				size_divisibility (int): If `size_divisibility > 0`, add padding to ensure
27 | 					the common height and width is divisible by `size_divisibility`.
28 | 					This depends on the model and many models need a divisibility of 32.
29 | 				pad_value (float): value to pad
30 | 
31 | 			Returns:
32 | 				an `ImageList`.
33 | 		*/
34 | 		static ImageList from_tensors(const TensorVec &tensors, int size_divisibility = 0, double pad_value = 0.0);
35 | 
36 | 	public:
37 | 		/**
38 | 			tensor (Tensor): of shape (N, H, W) or (N, C_1, ..., C_K, H, W) where K >= 1
39 | 			image_sizes (list[tuple[int, int]]): Each tuple is (h, w). It can
40 | 				be smaller than (H, W) due to padding.
41 | 		*/
42 | 		ImageList(torch::Tensor tensor, std::vector<ImageSize> image_sizes) :
43 | 			m_tensor(tensor), m_image_sizes(std::move(image_sizes)) {
44 | 		}
45 | 
46 | 		int length() const {
47 | 			return m_image_sizes.size();
48 | 		}
49 | 		const std::vector<ImageSize> &image_sizes() const {
50 | 			return m_image_sizes;
51 | 		}
52 | 
53 | 		torch::Tensor tensor() const {
54 | 			return m_tensor;
55 | 		}
56 | 		torch::Device device() const {
57 | 			return m_tensor.device();
58 | 		}
59 | 
60 | 		ImageList to(torch::Device device) {
61 | 			std::vector<ImageSize> image_sizes = m_image_sizes;
62 | 			return ImageList(m_tensor.to(device), std::move(image_sizes));
63 | 		}
64 | 
65 | 		/**
66 | 			Access the individual image in its original size.
67 | 
68 | 			Returns:
69 | 				Tensor: an image of shape (H, W) or (C_1, ..., C_K, H, W) where K >= 1
70 | 		*/
71 | 		torch::Tensor get(int64_t idx);
72 | 
73 | 	private:
74 | 		torch::Tensor m_tensor;
75 | 		std::vector<ImageSize> m_image_sizes;
76 | 	};
77 | }
78 | 


--------------------------------------------------------------------------------
/Detectron2/Structures/Masks.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "Sequence.h"
 4 | 
 5 | namespace Detectron2
 6 | {
 7 | 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 8 | 	
 9 | 	// Base class for BitMasks or PolygonMasks, so we can store in Instances with polymorphism.
10 | 	class Masks : public Sequence {
11 | 	public:
12 | 		virtual ~Masks() {}
13 | 
14 | 		virtual torch::Tensor crop_and_resize(torch::Tensor boxes, int mask_size) = 0;
15 | 	};
16 | }


--------------------------------------------------------------------------------
/Detectron2/Structures/PanopticSegment.cpp:
--------------------------------------------------------------------------------
 1 | #include "Base.h"
 2 | #include "PanopticSegment.h"
 3 | 
 4 | using namespace std;
 5 | using namespace torch;
 6 | using namespace Detectron2;
 7 | 
 8 | ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 9 | 
10 | int PanopticSegment::size() const {
11 | 	return infos.size();
12 | }
13 | 
14 | std::string PanopticSegment::toString() const {
15 | 	// TODO: infos
16 | 	return seg.toString();
17 | }
18 | 
19 | SequencePtr PanopticSegment::slice(int64_t start, int64_t end) const {
20 | 	assert(false);
21 | 	return nullptr;
22 | }
23 | 
24 | SequencePtr PanopticSegment::index(torch::Tensor item) const {
25 | 	assert(false);
26 | 	return nullptr;
27 | }
28 | 
29 | SequencePtr PanopticSegment::cat(const std::vector<SequencePtr> &seqs, int total) const {
30 | 	assert(false);
31 | 	return nullptr;
32 | }
33 | 


--------------------------------------------------------------------------------
/Detectron2/Structures/PanopticSegment.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "Sequence.h"
 4 | 
 5 | namespace Detectron2
 6 | {
 7 | 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 8 | 
 9 | 	struct SegmentInfo {
10 | 		int id;
11 | 		bool isthing;
12 | 		float score;
13 | 		int category_id;
14 | 		int instance_id;
15 | 		float area;
16 | 	};
17 | 
18 | 	class PanopticSegment : public Sequence {
19 | 	public:
20 | 		torch::Tensor seg;
21 | 		std::vector<SegmentInfo> infos;
22 | 
23 | 		// implementing Sequence
24 | 		virtual int size() const override;
25 | 		virtual std::string toString() const override;
26 | 		virtual SequencePtr slice(int64_t start, int64_t end) const override;
27 | 		virtual SequencePtr index(torch::Tensor item) const override;
28 | 		virtual SequencePtr cat(const std::vector<SequencePtr> &seqs, int total) const override;
29 | 	};
30 | }


--------------------------------------------------------------------------------
/Detectron2/Structures/PostProcessing.cpp:
--------------------------------------------------------------------------------
 1 | #include "Base.h"
 2 | #include "PostProcessing.h"
 3 | 
 4 | #include <Detectron2/Structures/Boxes.h>
 5 | #include <Detectron2/Structures/MaskOps.h>
 6 | 
 7 | using namespace std;
 8 | using namespace torch;
 9 | using namespace Detectron2;
10 | 
11 | ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
12 | 
13 | InstancesPtr PostProcessing::detector_postprocess(const InstancesPtr &results_,
14 | 	int output_height, int output_width, float mask_threshold) {
15 | 	auto scale_x = (float)output_width / results_->image_size().width;
16 | 	auto scale_y = (float)output_height / results_->image_size().height;
17 | 	InstancesPtr results(new Instances({ output_height, output_width }, results_->move_fields()));
18 | 
19 | 	Tensor toutput_boxes;
20 | 	if (results->has("pred_boxes")) {
21 | 		toutput_boxes = results->getTensor("pred_boxes");
22 | 	}
23 | 	else if (results->has("proposal_boxes")) {
24 | 		toutput_boxes = results->getTensor("proposal_boxes");
25 | 	}
26 | 	auto output_boxes = Boxes::boxes(toutput_boxes);
27 | 	output_boxes->scale(scale_x, scale_y);
28 | 	output_boxes->clip(results->image_size());
29 | 
30 | 	results = (*results)[output_boxes->nonempty()];
31 | 
32 | 	if (results->has("pred_masks")) {
33 | 		retry_if_cuda_oom([&]() {
34 | 			results->set("pred_masks",
35 | 				MaskOps::paste_masks_in_image(
36 | 					results->getTensor("pred_masks").index({ Colon, 0, Colon, Colon }), // N, 1, M, M
37 | 					results->getTensor("pred_boxes"),
38 | 					results->image_size(),
39 | 					mask_threshold));
40 | 		});
41 | 	}
42 | 	if (results->has("pred_keypoints")) {
43 | 		Tensor t = results->getTensor("pred_keypoints");
44 | 		t.index_put_({ Colon, Colon, 0 }, t.index({ Colon, Colon, 0 }) * scale_x);
45 | 		t.index_put_({ Colon, Colon, 1 }, t.index({ Colon, Colon, 1 }) * scale_y);
46 | 		results->set("pred_keypoints", t); // this isn't necessary in theory
47 | 	}
48 | 	return results;
49 | }
50 | 
51 | torch::Tensor PostProcessing::sem_seg_postprocess(torch::Tensor result, const ImageSize &img_size,
52 | 	int output_height, int output_width) {
53 | 	auto sliceImageSizes = vector<torch::indexing::TensorIndex>{
54 | 		Colon,
55 | 		Slice(None, img_size.height),
56 | 		Slice(None, img_size.width)
57 | 	};
58 | 	result = result.index(sliceImageSizes).expand({ 1, -1, -1, -1 });
59 | 	auto options = nn::functional::InterpolateFuncOptions()
60 | 		.size(vector<int64_t>{ output_height, output_width })
61 | 		.mode(torch::kBilinear)
62 | 		.align_corners(false);
63 | 	return nn::functional::interpolate(result, options)[0];;
64 | }
65 | 


--------------------------------------------------------------------------------
/Detectron2/Structures/PostProcessing.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "Instances.h"
 4 | 
 5 | namespace Detectron2
 6 | {
 7 | 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 8 | 	// converted from modeling/postprocessing.py
 9 | 
10 | 	class PostProcessing {
11 | 	public:
12 | 		/**
13 | 			Resize the output instances.
14 | 			The input images are often resized when entering an object detector.
15 | 			As a result, we often need the outputs of the detector in a different
16 | 			resolution from its inputs.
17 | 
18 | 			This function will resize the raw outputs of an R-CNN detector
19 | 			to produce outputs according to the desired output resolution.
20 | 
21 | 			Args:
22 | 				results (Instances): the raw outputs from the detector.
23 | 					`results.image_size` contains the input image resolution the detector sees.
24 | 					This object might be modified in-place.
25 | 				output_height, output_width: the desired output resolution.
26 | 
27 | 			Returns:
28 | 				Instances: the resized output from the model, based on the output resolution
29 | 		*/
30 | 		static InstancesPtr detector_postprocess(const InstancesPtr &results,
31 | 			int output_height, int output_width, float mask_threshold = 0.5);
32 | 
33 | 		/**
34 | 			Return semantic segmentation predictions in the original resolution.
35 | 
36 | 			The input images are often resized when entering semantic segmentor. Moreover, in same
37 | 			cases, they also padded inside segmentor to be divisible by maximum network stride.
38 | 			As a result, we often need the predictions of the segmentor in a different
39 | 			resolution from its inputs.
40 | 
41 | 			Args:
42 | 				result (Tensor): semantic segmentation prediction logits. A tensor of shape (C, H, W),
43 | 					where C is the number of classes, and H, W are the height and width of the prediction.
44 | 				img_size (tuple): image size that segmentor is taking as input.
45 | 				output_height, output_width: the desired output resolution.
46 | 
47 | 			Returns:
48 | 				semantic segmentation prediction (Tensor): A tensor of the shape
49 | 					(C, output_height, output_width) that contains per-pixel soft predictions.
50 | 		*/
51 | 		static torch::Tensor sem_seg_postprocess(torch::Tensor result, const ImageSize &img_size,
52 | 			int output_height, int output_width);
53 | 	};
54 | }


--------------------------------------------------------------------------------
/Detectron2/Structures/Sampling.cpp:
--------------------------------------------------------------------------------
 1 | #include "Base.h"
 2 | #include "Utils/Utils.h"
 3 | #include "Sampling.h"
 4 | 
 5 | using namespace std;
 6 | using namespace torch;
 7 | using namespace Detectron2;
 8 | 
 9 | ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
10 | 
11 | std::tuple<torch::Tensor, torch::Tensor>
12 | Detectron2::subsample_labels(const torch::Tensor &labels, int num_samples, float positive_fraction, int bg_label) {
13 | 	auto positive = torch::nonzero((labels != -1).bitwise_and(labels != bg_label)).index({ Colon, 0 });
14 | 	auto negative = torch::nonzero(labels == bg_label).index({ Colon, 0 });
15 | 
16 | 	auto num_pos = int64_t(num_samples * positive_fraction);
17 | 	// protect against not enough positive examples
18 | 	num_pos = min(positive.numel(), num_pos);
19 | 	auto num_neg = num_samples - num_pos;
20 | 	// protect against not enough negative examples
21 | 	num_neg = min(negative.numel(), num_neg);
22 | 
23 | 	// randomly select positive and negative examples
24 | 	auto perm1 = torch::randperm(positive.numel(), positive.device()).index({ Slice(None, num_pos) }).toType(torch::kLong);
25 | 	auto perm2 = torch::randperm(negative.numel(), negative.device()).index({ Slice(None, num_neg) }).toType(torch::kLong);
26 | 
27 | 	auto pos_idx = positive.index(perm1);
28 | 	auto neg_idx = negative.index(perm2);
29 | 	return { pos_idx, neg_idx };
30 | }
31 | 


--------------------------------------------------------------------------------
/Detectron2/Structures/Sampling.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <Detectron2/Detectron2.h>
 4 | 
 5 | namespace Detectron2
 6 | {
 7 | 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 8 | 	// converted from modeling/sampling.py
 9 | 
10 |     /**
11 | 		Return `num_samples` (or fewer, if not enough found)
12 | 		random samples from `labels` which is a mixture of positives & negatives.
13 | 		It will try to return as many positives as possible without
14 | 		exceeding `positive_fraction * num_samples`, and then try to
15 | 		fill the remaining slots with negatives.
16 | 
17 | 		Args:
18 | 			labels (Tensor): (N, ) label vector with values:
19 | 				* -1: ignore
20 | 				* bg_label: background ("negative") class
21 | 				* otherwise: one or more foreground ("positive") classes
22 | 			num_samples (int): The total number of labels with value >= 0 to return.
23 | 				Values that are not sampled will be filled with -1 (ignore).
24 | 			positive_fraction (float): The number of subsampled labels with values > 0
25 | 				is `min(num_positives, int(positive_fraction * num_samples))`. The number
26 | 				of negatives sampled is `min(num_negatives, num_samples - num_positives_sampled)`.
27 | 				In order words, if there are not enough positives, the sample is filled with
28 | 				negatives. If there are also not enough negatives, then as many elements are
29 | 				sampled as is possible.
30 | 			bg_label (int): label index of background ("negative") class.
31 | 
32 | 		Returns:
33 | 			pos_idx, neg_idx (Tensor):
34 | 				1D vector of indices. The total length of both is `num_samples` or fewer.
35 | 	*/
36 | 	std::tuple<torch::Tensor, torch::Tensor>
37 | 		subsample_labels(const torch::Tensor &labels, int num_samples, float positive_fraction, int bg_label);
38 | }


--------------------------------------------------------------------------------
/Detectron2/Structures/Sequence.cpp:
--------------------------------------------------------------------------------
 1 | #include "Base.h"
 2 | #include "Sequence.h"
 3 | 
 4 | using namespace std;
 5 | using namespace torch;
 6 | using namespace Detectron2;
 7 | 
 8 | ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 9 | 
10 | SequencePtr SequenceTensor::slice(int64_t start, int64_t end) const {
11 | 	auto sliced = m_data.slice(0, start, end);
12 | 	return std::shared_ptr<SequenceTensor>(new SequenceTensor(sliced));
13 | }
14 | 
15 | SequencePtr SequenceTensor::index(torch::Tensor item) const {
16 | 	auto selected = m_data.index(item);
17 | 	return std::shared_ptr<SequenceTensor>(new SequenceTensor(selected));
18 | }
19 | 
20 | SequencePtr SequenceTensor::cat(const std::vector<SequencePtr> &seqs, int total) const {
21 | 	TensorVec tensors;
22 | 	tensors.reserve(seqs.size());
23 | 	for (auto &seq : seqs) {
24 | 		Tensor t = dynamic_pointer_cast<SequenceTensor>(seq)->m_data;
25 | 		tensors.push_back(t);
26 | 	}
27 | 	auto aggregated = torch::cat(tensors);
28 | 	assert(aggregated.size(0) == total);
29 | 	return std::shared_ptr<SequenceTensor>(new SequenceTensor(aggregated));
30 | }
31 | 


--------------------------------------------------------------------------------
/Detectron2/Structures/ShapeSpec.cpp:
--------------------------------------------------------------------------------
 1 | #include "Base.h"
 2 | #include "ShapeSpec.h"
 3 | 
 4 | using namespace std;
 5 | using namespace torch;
 6 | using namespace Detectron2;
 7 | 
 8 | ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 9 | 
10 | ShapeSpec::Vec ShapeSpec::filter(const Map &shapes, const std::vector<std::string> &names) {
11 | 	Vec filtered;
12 | 	filtered.reserve(names.size());
13 | 	for (auto name : names) {
14 | 		auto iter = shapes.find(name);
15 | 		assert(iter != shapes.end());
16 | 		auto &shape = iter->second;
17 | 		filtered.push_back(shape);
18 | 	}
19 | 	return filtered;
20 | }
21 | 
22 | int ShapeSpec::channels_single(const Vec &shapes) {
23 | 	assert(!shapes.empty());
24 | 	int ret = shapes[0].channels;
25 | 	for (int i = 1; i < shapes.size(); i++) {
26 | 		assert(shapes[i].channels == ret);
27 | 	}
28 | 	return ret;
29 | }
30 | 
31 | std::vector<int> ShapeSpec::channels_vec(const Vec &shapes) {
32 | 	vector<int> ret;
33 | 	ret.reserve(shapes.size());
34 | 	for (auto shape : shapes) {
35 | 		ret.push_back(shape.channels);
36 | 	}
37 | 	return ret;
38 | }
39 | 
40 | std::vector<int> ShapeSpec::strides_vec(const ShapeSpec::Vec &shapes) {
41 | 	vector<int> ret;
42 | 	ret.reserve(shapes.size());
43 | 	for (auto shape : shapes) {
44 | 		ret.push_back(shape.stride);
45 | 	}
46 | 	return ret;
47 | }
48 | 


--------------------------------------------------------------------------------
/Detectron2/Structures/ShapeSpec.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "Boxes.h"
 4 | 
 5 | namespace Detectron2
 6 | {
 7 | 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 8 | 	// converted from layers/shape_spec.py
 9 | 
10 | 	/**
11 | 		A simple structure that contains basic shape specification about a tensor.
12 | 		It is often used as the auxiliary inputs/outputs of models,
13 | 		to obtain the shape inference ability among pytorch modules.
14 | 
15 | 		Attributes:
16 | 			channels:
17 | 			height:
18 | 			width:
19 | 			stride:
20 | 	*/
21 | 	struct ShapeSpec {
22 | 		int channels;
23 | 		int height;
24 | 		int width;
25 | 		int	stride;
26 | 		int index;
27 | 
28 | 		using Map = std::unordered_map<std::string, ShapeSpec>;
29 | 		using Vec = std::vector<ShapeSpec>;
30 | 
31 | 		int64_t prod() const { return channels * height * width; }
32 | 		static Vec filter(const Map &shapes, const std::vector<std::string> &names);
33 | 		static std::vector<int> channels_vec(const Vec &shapes);
34 | 		static int channels_single(const Vec &shapes);
35 | 		static std::vector<int> strides_vec(const Vec &shapes);
36 | 	};
37 | }
38 | 


--------------------------------------------------------------------------------
/Detectron2/Utils/AsyncPredictor.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "Predictor.h"
 4 | 
 5 | namespace Detectron2
 6 | {
 7 | 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 8 | 	// converted from demo/predictor.py
 9 | 
10 |     /**
11 | 		A predictor that runs the model asynchronously, possibly on >1 GPUs.
12 | 		Because rendering the visualization takes considerably amount of time,
13 | 		this helps improve throughput when rendering videos.
14 | 	*/
15 | 	class AsyncPredictor : public Predictor {
16 | 	public:
17 | 		/**
18 | 			cfg (CfgNode):
19 | 			num_gpus (int): if 0, will run on CPU
20 | 		*/
21 | 		AsyncPredictor(const CfgNode &cfg, int num_gpus = 1);
22 | 
23 | 		int64_t len() const { return m_put_idx - m_get_idx; }
24 | 		int default_buffer_size() const { return m_procs.size() * 5; }
25 | 
26 | 		void put(torch::Tensor image);
27 | 		InstancesPtr get();
28 | 		InstancesPtr operator()(torch::Tensor image) { return predict(image); }
29 | 		virtual InstancesPtr predict(torch::Tensor original_image) override {
30 | 			put(original_image);
31 | 			return get();
32 | 		}
33 | 
34 | 		void shutdown();
35 | 
36 | 	private:
37 | 		std::mutex m_task_queue_mutex;
38 | 		std::condition_variable m_task_queue_ready;
39 | 		std::list<std::tuple<int, torch::Tensor>> m_task_queue;
40 | 		std::mutex m_result_queue_mutex;
41 | 		std::list<std::tuple<int, InstancesPtr>> m_result_queue;
42 | 		std::vector<std::shared_ptr<std::thread>> m_procs;
43 | 
44 | 		int m_put_idx;
45 | 		int m_get_idx;
46 | 		std::mutex m_result_rank_mutex;
47 | 		std::list<std::tuple<int, InstancesPtr>> m_result_rank;
48 | 	};
49 | }
50 | 


--------------------------------------------------------------------------------
/Detectron2/Utils/Canvas.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <Detectron2/Detectron2.h>
 4 | 
 5 | namespace Detectron2
 6 | {
 7 | 	class Canvas {
 8 | 	public:
 9 | 		enum Alignment {
10 | 			kLeft,
11 | 			kCenter,
12 | 			kRight,
13 | 
14 | 			kTop,
15 | 			kMiddle,
16 | 			kBottom,
17 | 		};
18 | 
19 | 		// https://matplotlib.org/3.1.0/gallery/lines_bars_and_markers/linestyles.html
20 | 		enum LineStyle {
21 | 			kSolid,		// '-'
22 | 			kDotted,	// '.'
23 | 			kDashed,	// '--'
24 | 			kDashDot,	// '-.'
25 | 		};
26 | 
27 | 		// RGB or RGBA
28 | 		using Color3 = std::vector<float>;
29 | 		using Color4 = std::vector<float>;
30 | 
31 | 	public:
32 | 		virtual ~Canvas() {}
33 | 
34 | 		// buffer, width, height, alpha
35 | 		virtual std::tuple<torch::Tensor, int, int> SaveToTensor() = 0;
36 | 
37 | 		struct DrawLine2DOptions {
38 | 			float line_width;
39 | 			Color3 color;
40 | 			LineStyle line_style;
41 | 		};
42 | 		virtual void DrawLine2D(const std::vector<int> &x_data, const std::vector<int> &y_data,
43 | 			const DrawLine2DOptions &options) = 0;
44 | 
45 | 		struct DrawRectangleOptions {
46 | 			bool fill = false;
47 | 			Color3 edge_color;
48 | 			float line_width;
49 | 			float alpha;
50 | 			LineStyle line_style;
51 | 		};
52 | 		virtual void DrawRectangle(int x, int y, int width, int height, const DrawRectangleOptions &options) = 0;
53 | 
54 | 		struct DrawPolygonOptions {
55 | 			bool fill = false;
56 | 			Color4 face_color;
57 | 			Color4 edge_color;
58 | 			float line_width;
59 | 		};
60 | 		virtual void DrawPolygon(const torch::Tensor &segment, const DrawPolygonOptions &options) = 0;
61 | 
62 | 		struct DrawCircleOptions {
63 | 			bool fill = false;
64 | 			Color3 color;
65 | 		};
66 | 		virtual void DrawCircle(int x, int y, int radius, const DrawCircleOptions &options) = 0;
67 | 
68 | 		struct DrawTextOptions {
69 | 			float font_size = 8;
70 | 			Color3 font_color;
71 | 			const char *font_family = "arial";
72 | 			Color3 bbox_color;
73 | 			float bbox_alpha = 1.0;
74 | 			float bbox_padding = 0.0;
75 | 			Color3 edge_color;
76 | 			Alignment vertical_alignment = kTop;
77 | 			Alignment horizontal_alignment = kLeft;
78 | 			int zorder = 0;
79 | 			float rotation = 0.0f;
80 | 		};
81 | 		virtual void DrawText(int x, int y, const std::string &text, const DrawTextOptions &options) = 0;
82 | 
83 | 		// img in torch::kFloat32 with alpha
84 | 		virtual void DrawImage(const torch::Tensor &img) = 0;
85 | 	};
86 | }
87 | 


--------------------------------------------------------------------------------
/Detectron2/Utils/CfgNode.cpp:
--------------------------------------------------------------------------------
 1 | #include "Base.h"
 2 | #include "CfgNode.h"
 3 | #include <direct.h>
 4 | #include <io.h>  
 5 | #include "File.h"
 6 | 
 7 | using namespace std;
 8 | using namespace Detectron2;
 9 | 
10 | ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
11 | 
12 | int s_latest_ver = 0;
13 | CfgNode CfgNode::get_cfg() {
14 | 	static CfgNode _C;
15 | 	if (s_latest_ver == 0) {
16 | 		//auto defaultConfigDir = getenv("D2_CONFIGS_DEFAULT_DIR");
17 | 		char buf[256];
18 | 		getcwd(buf, sizeof(buf));
19 | 		auto defaultConfigDir = File::ComposeFilename(buf, "\\Debug\\");
20 | 		cout << "defaultConfigDir:" << defaultConfigDir << endl;
21 | 		if (_access(defaultConfigDir.c_str(), 0) == -1)
22 | 		{
23 | 			cout << "defaultConfigDir no exist" << endl;
24 | 		}
25 | 		assert(defaultConfigDir.c_str());
26 | 		// This yaml was created by dumping _C into yaml from config/defaults.py.
27 | 		_C = load_cfg_from_yaml_file(File::ComposeFilename(defaultConfigDir, "CfgDefaults.yaml"));
28 | 		s_latest_ver = _C["VERSION"].as<int>();
29 | 	}
30 | 	return _C.clone();
31 | }
32 | 
33 | static CfgNode s_global_cfg;
34 | void CfgNode::set_global_cfg(const CfgNode &cfg) {
35 | 	s_global_cfg = cfg;
36 | }
37 | 
38 | ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
39 | 
40 | CfgNode::CfgNode(YAML::Node init_dict) : fvcore::CfgNode(init_dict) {
41 | }
42 | 
43 | void CfgNode::merge_from_file(const std::string &cfg_filename, bool allow_unsafe) {
44 | 	CfgNode loaded_cfg = fvcore::CfgNode::load_yaml_with_base(cfg_filename, allow_unsafe);
45 | 
46 | 	auto ver = m_dict["VERSION"].as<int>();
47 | 
48 | 	// CfgNode.merge_from_file is only allowed on a config object of latest version!
49 | 	assert(s_latest_ver == ver);
50 | 
51 | 	auto loaded_ver = loaded_cfg["VERSION"].as<int>();
52 | 	/*~!
53 | 	if loaded_ver is None:
54 | 	    from .compat import guess_version
55 | 	    loaded_ver = guess_version(loaded_cfg, cfg_filename)
56 | 	*/
57 | 	assert(loaded_ver <= ver); // Cannot merge a v{loaded_ver} config into a v{self.VERSION} config.
58 | 
59 | 	if (loaded_ver == ver) {
60 | 		merge_from_other_cfg(loaded_cfg);
61 | 	}
62 | 	else {
63 | 		assert(false);
64 | 		//~! didn't convert config upgrade/downgrade
65 | 	}
66 | }
67 | 


--------------------------------------------------------------------------------
/Detectron2/Utils/CfgNode.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <Detectron2/fvcore/config.h>
 4 | 
 5 | namespace Detectron2
 6 | {
 7 | 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 8 | 	// converted from config/config.py
 9 | 	
10 |     /**
11 | 		The same as `fvcore.common.config.CfgNode`, but different in:
12 | 
13 | 		1. Use unsafe yaml loading by default.
14 | 		   Note that this may lead to arbitrary code execution: you must not
15 | 		   load a config file from untrusted sources before manually inspecting
16 | 		   the content of the file.
17 | 		2. Support config versioning.
18 | 		   When attempting to merge an old config, it will convert the old config automatically.
19 | 	*/
20 | 	class CfgNode : public fvcore::CfgNode {
21 | 	public:
22 | 		/**
23 | 			Get a copy of the default config.
24 | 
25 | 			Returns:
26 | 				a detectron2 CfgNode instance.
27 | 		*/
28 | 		static CfgNode get_cfg();
29 | 
30 | 		/**
31 | 			Let the global config point to the given cfg.
32 | 
33 | 			Assume that the given "cfg" has the key "KEY", after calling
34 | 			`set_global_cfg(cfg)`, the key can be accessed by:
35 | 
36 | 			.. code-block:: python
37 | 
38 | 				from detectron2.config import global_cfg
39 | 				print(global_cfg.KEY)
40 | 
41 | 			By using a hacky global config, you can access these configs anywhere,
42 | 			without having to pass the config object or the values deep into the code.
43 | 			This is a hacky feature introduced for quick prototyping / research exploration.
44 | 		*/
45 | 		static void set_global_cfg(const CfgNode &cfg);
46 | 
47 | 	public:
48 | 		CfgNode(YAML::Node init_dict = {});
49 | 
50 | 		// Note that the default value of allow_unsafe is changed to True
51 | 		void merge_from_file(const std::string &cfg_filename, bool allow_unsafe = true);
52 | 	};
53 | }
54 | 


--------------------------------------------------------------------------------
/Detectron2/Utils/DefaultPredictor.cpp:
--------------------------------------------------------------------------------
 1 | #include "Base.h"
 2 | #include "DefaultPredictor.h"
 3 | #include <Detectron2/Utils/Timer.h>
 4 | #include <Detectron2/Data/ResizeShortestEdge.h>
 5 | 
 6 | #include <coco/data.hpp>
 7 | 
 8 | using namespace std;
 9 | using namespace torch;
10 | using namespace Detectron2;
11 | 
12 | ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
13 | 
14 | DefaultPredictor::DefaultPredictor(const CfgNode &cfg) : m_model(nullptr) {
15 | 	m_cfg = cfg.clone();  // cfg can be modified by model
16 | 	{
17 | 		Timer timer("build_model");
18 | 		m_model = build_model(m_cfg);
19 | 	}
20 | 	m_model->eval();
21 | 
22 | 	std::string m_weight = cfg["MODEL.WEIGHTS"].as<string>("");
23 | 	//torch::load(m_model, m_weight);
24 | 	auto name = CfgNode::parseTuple<string>(cfg["DATASETS.TEST"], { "" })[0];
25 | 	m_metadata = MetadataCatalog::get(name);
26 | 	{
27 | 		Timer timer("load_checkpoint");
28 | 		m_model->load_checkpoint(m_weight, true);
29 | 		//m_model->load_checkpoint(cfg["MODEL.WEIGHTS"].as<string>(""), false);
30 | 	}
31 | 	m_transform_gen = shared_ptr<TransformGen>(new ResizeShortestEdge(
32 | 		{ cfg["INPUT.MIN_SIZE_TEST"].as<int>(), cfg["INPUT.MIN_SIZE_TEST"].as<int>() },
33 | 		cfg["INPUT.MAX_SIZE_TEST"].as<int>()
34 | 	));
35 | 
36 | 	m_input_format = cfg["INPUT.FORMAT"].as<string>();
37 | 	assert(m_input_format == "RGB" || m_input_format == "BGR");
38 | }
39 | 
40 | InstancesPtr DefaultPredictor::predict(torch::Tensor original_image) {
41 | 
42 | 	torch::NoGradGuard guard; // https://github.com/sphinx-doc/sphinx/issues/4258
43 | 
44 | 	// Apply pre-processing to image.
45 | 	if (m_input_format == "RGB") {
46 | 		// whether the model expects BGR inputs or RGB
47 | 		original_image = torch::flip(original_image, { -1 });
48 | 	}
49 | 	auto height = original_image.size(0);
50 | 	auto width = original_image.size(1);
51 | 	auto image = m_transform_gen->get_transform(original_image)->apply_image(original_image);
52 | 	image = image.to(torch::kFloat32).permute({ 2, 0, 1 });
53 | 	torch::Device _device = torch::Device(DeviceType::CUDA, 0);
54 | 	std::vector<DatasetMapperOutput> inputs(1);
55 | 	inputs[0].image = image.to(_device);
56 | 	inputs[0].height = make_shared<int>(height);
57 | 	inputs[0].width = make_shared<int>(width);
58 | 	InstancesPtr predictions;
59 | 	{
60 | 		Timer timer("forward");
61 | 		predictions = get<0>(m_model->forward(inputs))[0];
62 | 	}
63 | 	return predictions;
64 | }


--------------------------------------------------------------------------------
/Detectron2/Utils/DefaultPredictor.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "Predictor.h"
 4 | #include "VisImage.h"
 5 | #include <Detectron2/Data/MetadataCatalog.h>
 6 | #include <Detectron2/Data/TransformGen.h>
 7 | #include <Detectron2/MetaArch/MetaArch.h>
 8 | 
 9 | namespace Detectron2
10 | {
11 | 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
12 | 	// converted from engine/defaults.py
13 | 
14 |     /**
15 | 		Create a simple end-to-end predictor with the given config that runs on
16 | 		single device for a single input image.
17 | 
18 | 		Compared to using the model directly, this class does the following additions:
19 | 
20 | 		1. Load checkpoint from `cfg.MODEL.WEIGHTS`.
21 | 		2. Always take BGR image as the input and apply conversion defined by `cfg.INPUT.FORMAT`.
22 | 		3. Apply resizing defined by `cfg.INPUT.{MIN,MAX}_SIZE_TEST`.
23 | 		4. Take one input image and produce a single output, instead of a batch.
24 | 
25 | 		If you'd like to do anything more fancy, please refer to its source code
26 | 		as examples to build and use the model manually.
27 | 
28 | 		Attributes:
29 | 			metadata (Metadata): the metadata of the underlying dataset, obtained from
30 | 				cfg.DATASETS.TEST.
31 | 
32 | 		Examples:
33 | 
34 | 		.. code-block:: python
35 | 
36 | 			pred = DefaultPredictor(cfg)
37 | 			inputs = cv2.imread("input.jpg")
38 | 			outputs = pred(inputs)
39 | 	*/
40 | 	class DefaultPredictor : public Predictor {
41 | 	public:
42 | 		DefaultPredictor(const CfgNode &cfg);
43 | 
44 | 		/**
45 | 		Args:
46 | 			original_image (np.ndarray): an image of shape (H, W, C) (in BGR order).
47 | 
48 | 		Returns:
49 | 			predictions (dict):
50 | 				the output of the model for one image only.
51 | 				See :doc:`/tutorials/models` for details about the format.
52 | 		*/
53 | 		virtual InstancesPtr predict(torch::Tensor original_image) override;
54 | 		InstancesPtr predict_1(torch::Tensor original_image);
55 | 	protected:
56 | 		CfgNode m_cfg;
57 | 		MetaArch m_model;
58 | 		Metadata m_metadata;
59 | 		std::shared_ptr<TransformGen> m_transform_gen;
60 | 		std::string m_input_format;
61 | 	};
62 | }
63 | 


--------------------------------------------------------------------------------
/Detectron2/Utils/DefaultTrainer.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include <coco/data.hpp>
 3 | #include "TrainerBase.h"
 4 | #include "utils_train.hpp"
 5 | #include <Detectron2/MetaArch/MetaArch.h>
 6 | #include <Detectron2/Data/TransformGen.h>
 7 | #include <Detectron2/Data/MetadataCatalog.h>
 8 | namespace Detectron2
 9 | {
10 | 	class DefaultTrainer : public TrainerBase {
11 | 	public:
12 | 		DefaultTrainer(const CfgNode& cfg);
13 | 		virtual void train() override;
14 | 		void LoadData(std::vector<Detectron2::ImgData>& img_datas, 
15 | 			std::vector<DatasetMapperOutput>& inputs, int& img_data_i);
16 | 		float get_lr();
17 | 		void warmup_lr();
18 | 		void set_lr(float lr);
19 | 		torch::Tensor sum_loss(TensorMap& loss_map);
20 | 	protected:
21 | 		CfgNode m_cfg;
22 | 		MetaArch m_model;
23 | 		Metadata m_metadata;
24 | 		std::shared_ptr<TransformGen> m_transform_gen;
25 | 		std::string m_input_format;
26 | 
27 | 	private:
28 | 		int batch_size;
29 | 		int max_iter;
30 | 
31 | 		float base_lr;
32 | 		float base_momentum;
33 | 		float base_weight_decay;
34 | 		std::shared_ptr<torch::optim::SGD> _optimizer{ nullptr };
35 | 		std::shared_ptr<CocoDataset> _dataset{ nullptr };
36 | 		ProgressTracker _pg_tracker;
37 | 		int _warmup_steps;
38 | 		float _warmup_start;
39 | 		int total_epochs;
40 | 		std::set<int> decay_epochs;
41 | 		torch::Device _device = torch::Device(torch::kCPU);
42 | 		int train_path_size;
43 | 		std::vector<int> decay_step;
44 | 	};
45 | }


--------------------------------------------------------------------------------
/Detectron2/Utils/File.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <Detectron2/Base.h>
 4 | 
 5 | namespace Detectron2
 6 | {
 7 | 	class File {
 8 | 	public:
 9 | 		static std::string GetCwd();
10 | 		static void SetCwd(const std::string &cwd);
11 | 
12 | 		static bool IsAbsolutePath(const std::string &pathname);
13 | 		static bool IsDir(const std::string &pathname);
14 | 		static bool IsFile(const std::string &pathname);
15 | 
16 | 		static std::string Dirname(const std::string &pathname);
17 | 		static std::string Basename(const std::string &pathname);
18 | 		static std::string ComposeFilename(const std::string &dirname, const std::string &basename);
19 | 		static std::string ReplaceExtension(const std::string &pathname, const std::string &new_extension);
20 | 
21 | 	public:
22 | 		File(const std::string &fullpath, bool read = true);
23 | 		void Close();
24 | 
25 | 		std::string Read();
26 | 		void Write(const std::string &content);
27 | 
28 | 		void Seek(int offset);
29 | 		int ReadInt();
30 | 		void Read(char *buf, size_t total);
31 | 		void Write(const char *buf, size_t total);
32 | 
33 | 	private:
34 | 		std::string m_filename;
35 | 		FILE *m_file;
36 | 
37 | 		void Verify(bool expr);
38 | 	};
39 | }
40 | 


--------------------------------------------------------------------------------
/Detectron2/Utils/Predictor.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <Detectron2/Structures/Instances.h>
 4 | 
 5 | namespace Detectron2
 6 | {
 7 | 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 8 | 
 9 | 	class Predictor {
10 | 	public:
11 | 		virtual ~Predictor() {}
12 | 
13 | 		/**
14 | 		Args:
15 | 			original_image (np.ndarray): an image of shape (H, W, C) (in BGR order).
16 | 
17 | 		Returns:
18 | 			predictions (dict):
19 | 				the output of the model for one image only.
20 | 				See :doc:`/tutorials/models` for details about the format.
21 | 		*/
22 | 		virtual InstancesPtr predict(torch::Tensor original_image) = 0;
23 | 	};
24 | }
25 | 


--------------------------------------------------------------------------------
/Detectron2/Utils/Timer.cpp:
--------------------------------------------------------------------------------
 1 | #include "Base.h"
 2 | #include "Timer.h"
 3 | 
 4 | #include <windows.h>
 5 | #include <profileapi.h>
 6 | 
 7 | using namespace std;
 8 | using namespace Detectron2;
 9 | 
10 | ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
11 | 
12 | class QueryPerformanceTimer {
13 | public:
14 | 	QueryPerformanceTimer() {
15 | 		LARGE_INTEGER li;
16 | 		auto ret = QueryPerformanceFrequency(&li);
17 | 		assert(ret);
18 | 		m_freq = double(li.QuadPart) / 1000.0;
19 | 		QueryPerformanceCounter(&li);
20 | 		m_counter0 = li.QuadPart;
21 | 	}
22 | 
23 | 	double get_counter() {
24 | 		LARGE_INTEGER li;
25 | 		QueryPerformanceCounter(&li);
26 | 		return double(li.QuadPart - m_counter0) / m_freq;
27 | 	}
28 | 
29 | private:
30 | 	double m_freq = 0.0;
31 | 	__int64 m_counter0 = 0;
32 | };
33 | static QueryPerformanceTimer s_hp_timer;
34 | 
35 | Timer::Timer(const std::string &name) : m_name(name) {
36 | 	m_t0 = (int)s_hp_timer.get_counter();
37 | }
38 | 
39 | Timer::~Timer() {
40 | 	char buf[256];
41 | 	snprintf(buf, sizeof(buf), ">>>>>>> %s: %dms\n", m_name.c_str(), (int)s_hp_timer.get_counter() - m_t0);
42 | 	cout << buf;
43 | }
44 | 


--------------------------------------------------------------------------------
/Detectron2/Utils/Timer.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <Detectron2/Base.h>
 4 | 
 5 | namespace Detectron2
 6 | {
 7 | 	/**
 8 | 	 * ms-level timing of blocks. In Python, this is equivalent to:
 9 | 	 *
10 | 	 *   import time
11 | 	 *   t0 = time.time()
12 | 	 *   ...
13 | 	 *   print(">>>>>>> {}: {:.2f}ms".format("some name", (time.time() - t0) * 1000))
14 | 	 */
15 | 	class Timer {
16 | 	public:
17 | 		Timer(const std::string &name);
18 | 		~Timer();
19 | 
20 | 	private:
21 | 		std::string m_name;
22 | 		int m_t0;
23 | 	};
24 | }
25 | 


--------------------------------------------------------------------------------
/Detectron2/Utils/TrainerBase.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <Detectron2/Structures/Instances.h>
 4 | 
 5 | namespace Detectron2
 6 | {
 7 | 	class TrainerBase {
 8 | 	public:
 9 | 		virtual ~TrainerBase() {}
10 | 
11 | 		virtual void train() = 0;
12 | 	};
13 | }


--------------------------------------------------------------------------------
/Detectron2/Utils/Utils.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <Detectron2/Detectron2.h>
 4 | 
 5 | namespace Detectron2
 6 | {
 7 | 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 8 | 
 9 | 	// assert in debug build; throw in release build
10 | 	void verify(bool expr);
11 | 
12 | 	// string functions
13 | 	std::vector<std::string> tokenize(const std::string &input, char delimiter);
14 | 	std::string lower(const std::string &s);
15 | 	bool endswith(const std::string &s, const std::string &ending);
16 | 	std::string replace_all(const std::string &s, const std::string &src, const std::string &target);
17 | 
18 | 	// image functions
19 | 	torch::Tensor mat_to_tensor(const cv::Mat &mat);
20 | 	cv::Mat image_to_mat(const torch::Tensor &t);
21 | 	torch::Tensor image_to_tensor(const cv::Mat &mat);
22 | 	torch::Tensor read_image(const std::string &pathname, const std::string &format = "");
23 | }
24 | 


--------------------------------------------------------------------------------
/Detectron2/Utils/VideoAnalyzer.cpp:
--------------------------------------------------------------------------------
 1 | #include "Base.h"
 2 | #include "VideoAnalyzer.h"
 3 | 
 4 | using namespace std;
 5 | using namespace torch;
 6 | using namespace Detectron2;
 7 | 
 8 | ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 9 | 
10 | VideoAnalyzer::VideoAnalyzer() {
11 | }
12 | 
13 | VideoAnalyzer::~VideoAnalyzer() {
14 | }
15 | 
16 | void VideoAnalyzer::on_instance_predictions(cv::Mat frame, const InstancesPtr &predictions,
17 | 	const std::vector<std::string> &keypoint_names) {
18 | }
19 | 
20 | void VideoAnalyzer::on_sem_seg(cv::Mat frame, const torch::Tensor &sem_seg) {
21 | }
22 | 
23 | void VideoAnalyzer::on_panoptic_seg_predictions(cv::Mat frame, const torch::Tensor &panoptic_seg,
24 | 	const std::vector<SegmentInfo> &segments_info) {
25 | }
26 | 


--------------------------------------------------------------------------------
/Detectron2/Utils/VideoAnalyzer.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <Detectron2/Structures/Instances.h>
 4 | #include <Detectron2/Structures/PanopticSegment.h>
 5 | 
 6 | namespace Detectron2
 7 | {
 8 | 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 9 | 	// converted from utils/video_visualizer.py
10 | 
11 | 	class VideoAnalyzer {
12 | 	public:
13 | 		VideoAnalyzer();
14 | 		virtual ~VideoAnalyzer();
15 | 
16 | 		virtual void on_instance_predictions(cv::Mat frame, const InstancesPtr &predictions,
17 | 			const std::vector<std::string> &keypoint_names);
18 | 		virtual void on_sem_seg(cv::Mat frame, const torch::Tensor &sem_seg);
19 | 		virtual void on_panoptic_seg_predictions(cv::Mat frame, const torch::Tensor &panoptic_seg,
20 | 			const std::vector<SegmentInfo> &segments_info);
21 | 	};
22 | }
23 | 


--------------------------------------------------------------------------------
/Detectron2/Utils/cvCanvas.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "Canvas.h"
 4 | 
 5 | namespace Detectron2
 6 | {
 7 | 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 8 | 
 9 | 	class cvCanvas : public Canvas {
10 | 	public:
11 | 		cvCanvas(int height, int width);
12 | 
13 | 		// implementing Canvas
14 | 		virtual std::tuple<torch::Tensor, int, int> SaveToTensor() override;
15 | 		virtual void DrawLine2D(const std::vector<int> &x_data, const std::vector<int> &y_data,
16 | 			const DrawLine2DOptions &options) override;
17 | 		virtual void DrawRectangle(int x, int y, int width, int height, const DrawRectangleOptions &options) override;
18 | 		virtual void DrawPolygon(const torch::Tensor &segment, const DrawPolygonOptions &options) override;
19 | 		virtual void DrawCircle(int x, int y, int radius, const DrawCircleOptions &options) override;
20 | 		virtual void DrawText(int x, int y, const std::string &text, const DrawTextOptions &options) override;
21 | 		virtual void DrawImage(const torch::Tensor &img) override;
22 | 
23 | 	protected:
24 | 		cv::Mat m_canvas;
25 | 
26 | 		static cv::Scalar cvColor(const std::vector<float> &c);
27 | 		static cv::Scalar cvColor(const Color3 &c, float alpha);
28 | 		static int cvLineWidth(float line_width);
29 | 		static int cvLineType(LineStyle line_style);
30 | 		static double cvFontScale(float font_size);
31 | 	};
32 | }


--------------------------------------------------------------------------------
/Detectron2/coco/maskApi.h:
--------------------------------------------------------------------------------
 1 | /**************************************************************************
 2 | * Microsoft COCO Toolbox.      version 2.0
 3 | * Data, paper, and tutorials available at:  http://mscoco.org/
 4 | * Code written by Piotr Dollar and Tsung-Yi Lin, 2015.
 5 | * Licensed under the Simplified BSD License [see coco/license.txt]
 6 | **************************************************************************/
 7 | #pragma once
 8 | 
 9 | typedef unsigned int uint;
10 | typedef unsigned long siz;
11 | typedef unsigned char byte;
12 | typedef double* BB;
13 | typedef struct { siz h, w, m; uint *cnts; } RLE;
14 | 
15 | /* Initialize/destroy RLE. */
16 | void rleInit( RLE *R, siz h, siz w, siz m, uint *cnts );
17 | void rleFree( RLE *R );
18 | 
19 | /* Initialize/destroy RLE array. */
20 | void rlesInit( RLE **R, siz n );
21 | void rlesFree( RLE **R, siz n );
22 | 
23 | /* Encode binary masks using RLE. */
24 | void rleEncode( RLE *R, const byte *mask, siz h, siz w, siz n );
25 | 
26 | /* Decode binary masks encoded via RLE. */
27 | void rleDecode( const RLE *R, byte *mask, siz n );
28 | 
29 | /* Compute union or intersection of encoded masks. */
30 | void rleMerge( const RLE *R, RLE *M, siz n, int intersect );
31 | 
32 | /* Compute area of encoded masks. */
33 | void rleArea( const RLE *R, siz n, uint *a );
34 | 
35 | /* Compute intersection over union between masks. */
36 | void rleIou( RLE *dt, RLE *gt, siz m, siz n, byte *iscrowd, double *o );
37 | 
38 | /* Compute non-maximum suppression between bounding masks */
39 | void rleNms( RLE *dt, siz n, uint *keep, double thr );
40 | 
41 | /* Compute intersection over union between bounding boxes. */
42 | void bbIou( BB dt, BB gt, siz m, siz n, byte *iscrowd, double *o );
43 | 
44 | /* Compute non-maximum suppression between bounding boxes */
45 | void bbNms( BB dt, siz n, uint *keep, double thr );
46 | 
47 | /* Get bounding boxes surrounding encoded masks. */
48 | void rleToBbox( const RLE *R, BB bb, siz n );
49 | 
50 | /* Convert bounding boxes to encoded masks. */
51 | void rleFrBbox( RLE *R, const BB bb, siz h, siz w, siz n );
52 | 
53 | /* Convert polygon to encoded mask. */
54 | void rleFrPoly( RLE *R, const double *xy, siz k, siz h, siz w );
55 | 
56 | /* Get compressed string representation of encoded mask. */
57 | char* rleToString( const RLE *R );
58 | 
59 | /* Convert from compressed string representation of encoded mask. */
60 | void rleFrString( RLE *R, char *s, siz h, siz w );
61 | 


--------------------------------------------------------------------------------
/Detectron2/detectron2/box_iou_rotated/box_iou_rotated.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | #pragma once
 3 | #include <torch/types.h>
 4 | 
 5 | namespace detectron2 {
 6 | 
 7 | at::Tensor box_iou_rotated_cpu(
 8 |     const at::Tensor& boxes1,
 9 |     const at::Tensor& boxes2);
10 | 
11 | #ifdef WITH_CUDA
12 | at::Tensor box_iou_rotated_cuda(
13 |     const at::Tensor& boxes1,
14 |     const at::Tensor& boxes2);
15 | #endif
16 | 
17 | // Interface for Python
18 | // inline is needed to prevent multiple function definitions when this header is
19 | // included by different cpps
20 | inline at::Tensor box_iou_rotated(
21 |     const at::Tensor& boxes1,
22 |     const at::Tensor& boxes2) {
23 |   assert(boxes1.device().is_cuda() == boxes2.device().is_cuda());
24 |   if (boxes1.device().is_cuda()) {
25 | #ifdef WITH_CUDA
26 |     return box_iou_rotated_cuda(boxes1.contiguous(), boxes2.contiguous());
27 | #else
28 |     AT_ERROR("Not compiled with GPU support");
29 | #endif
30 |   }
31 | 
32 |   return box_iou_rotated_cpu(boxes1.contiguous(), boxes2.contiguous());
33 | }
34 | 
35 | } // namespace detectron2
36 | 


--------------------------------------------------------------------------------
/Detectron2/detectron2/box_iou_rotated/box_iou_rotated_cpu.cpp:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | #include "box_iou_rotated.h"
 3 | #include "box_iou_rotated_utils.h"
 4 | 
 5 | namespace detectron2 {
 6 | 
 7 | template <typename T>
 8 | void box_iou_rotated_cpu_kernel(
 9 |     const at::Tensor& boxes1,
10 |     const at::Tensor& boxes2,
11 |     at::Tensor& ious) {
12 |   auto num_boxes1 = boxes1.size(0);
13 |   auto num_boxes2 = boxes2.size(0);
14 | 
15 |   for (int i = 0; i < num_boxes1; i++) {
16 |     for (int j = 0; j < num_boxes2; j++) {
17 |       ious[i * num_boxes2 + j] = single_box_iou_rotated<T>(
18 |           boxes1[i].data_ptr<T>(), boxes2[j].data_ptr<T>());
19 |     }
20 |   }
21 | }
22 | 
23 | at::Tensor box_iou_rotated_cpu(
24 |     // input must be contiguous:
25 |     const at::Tensor& boxes1,
26 |     const at::Tensor& boxes2) {
27 |   auto num_boxes1 = boxes1.size(0);
28 |   auto num_boxes2 = boxes2.size(0);
29 |   at::Tensor ious =
30 |       at::empty({num_boxes1 * num_boxes2}, boxes1.options().dtype(at::kFloat));
31 | 
32 |   box_iou_rotated_cpu_kernel<float>(boxes1, boxes2, ious);
33 | 
34 |   // reshape from 1d array to 2d array
35 |   auto shape = std::vector<int64_t>{num_boxes1, num_boxes2};
36 |   return ious.reshape(shape);
37 | }
38 | 
39 | } // namespace detectron2
40 | 


--------------------------------------------------------------------------------
/Detectron2/detectron2/cuda_version.cu:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 2 | 
 3 | #include <cuda_runtime_api.h>
 4 | 
 5 | namespace detectron2 {
 6 | int get_cudart_version() {
 7 |   return CUDART_VERSION;
 8 | }
 9 | } // namespace detectron2
10 | 


--------------------------------------------------------------------------------
/Detectron2/detectron2/nms/cuda_helpers.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #define CUDA_1D_KERNEL_LOOP(i, n)                                \
 4 |   for (int i = (blockIdx.x * blockDim.x) + threadIdx.x; i < (n); \
 5 |        i += (blockDim.x * gridDim.x))
 6 | 
 7 | template <typename integer>
 8 | constexpr __host__ __device__ inline integer ceil_div(integer n, integer m) {
 9 |   return (n + m - 1) / m;
10 | }
11 | 


--------------------------------------------------------------------------------
/Detectron2/detectron2/nms/nms.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include <torch/types.h>
 3 | 
 4 | namespace detectron2 {
 5 | 
 6 | at::Tensor nms_cpu(
 7 | 	const at::Tensor& dets,
 8 | 	const at::Tensor& scores,
 9 | 	const double iou_threshold);
10 | 
11 | #ifdef WITH_CUDA
12 | at::Tensor nms_cuda(
13 | 	const at::Tensor& dets,
14 | 	const at::Tensor& scores,
15 | 	const double iou_threshold);
16 | #endif
17 | 
18 | // Interface for Python
19 | // inline is needed to prevent multiple function definitions when this header is
20 | // included by different cpps
21 | inline at::Tensor nms(
22 |     const at::Tensor& dets,
23 |     const at::Tensor& scores,
24 |     const float iou_threshold) {
25 |   assert(dets.device().is_cuda() == scores.device().is_cuda());
26 |   if (dets.device().is_cuda()) {
27 | #ifdef WITH_CUDA
28 |     return nms_cuda(
29 |         dets.contiguous(), scores.contiguous(), iou_threshold);
30 | #else
31 |     AT_ERROR("Not compiled with GPU support");
32 | #endif
33 |   }
34 | 
35 |   return nms_cpu(dets.contiguous(), scores.contiguous(), iou_threshold);
36 | }
37 | 
38 | } // namespace detectron2
39 | 


--------------------------------------------------------------------------------
/Detectron2/detectron2/nms_rotated/nms_rotated.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 2 | #pragma once
 3 | #include <torch/types.h>
 4 | 
 5 | namespace detectron2 {
 6 | 
 7 | at::Tensor nms_rotated_cpu(
 8 |     const at::Tensor& dets,
 9 |     const at::Tensor& scores,
10 |     const float iou_threshold);
11 | 
12 | #ifdef WITH_CUDA
13 | at::Tensor nms_rotated_cuda(
14 |     const at::Tensor& dets,
15 |     const at::Tensor& scores,
16 |     const float iou_threshold);
17 | #endif
18 | 
19 | // Interface for Python
20 | // inline is needed to prevent multiple function definitions when this header is
21 | // included by different cpps
22 | inline at::Tensor nms_rotated(
23 |     const at::Tensor& dets,
24 |     const at::Tensor& scores,
25 |     const float iou_threshold) {
26 |   assert(dets.device().is_cuda() == scores.device().is_cuda());
27 |   if (dets.device().is_cuda()) {
28 | #ifdef WITH_CUDA
29 |     return nms_rotated_cuda(
30 |         dets.contiguous(), scores.contiguous(), iou_threshold);
31 | #else
32 |     AT_ERROR("Not compiled with GPU support");
33 | #endif
34 |   }
35 | 
36 |   return nms_rotated_cpu(dets.contiguous(), scores.contiguous(), iou_threshold);
37 | }
38 | 
39 | } // namespace detectron2
40 | 


--------------------------------------------------------------------------------
/Detectron2/fvcore/config.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "yacs.h"
 4 | 
 5 | namespace Detectron2 { namespace fvcore
 6 | {
 7 | 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 8 | 	// converted from fvcore/common/config.py
 9 | 	
10 |     /**
11 | 		Our own extended version of :class:`yacs.config.CfgNode`.
12 | 		It contains the following extra features:
13 | 
14 | 		1. The :meth:`merge_from_file` method supports the "_BASE_" key,
15 | 		   which allows the new CfgNode to inherit all the attributes from the
16 | 		   base configuration file.
17 | 		2. Keys that start with "COMPUTED_" are treated as insertion-only
18 | 		   "computed" attributes. They can be inserted regardless of whether
19 | 		   the CfgNode is frozen or not.
20 | 		3. With "allow_unsafe=True", it supports pyyaml tags that evaluate
21 | 		   expressions in config. See examples in
22 | 		   https://pyyaml.org/wiki/PyYAMLDocumentation#yaml-tags-and-python-types
23 | 		   Note that this may lead to arbitrary code execution: you must not
24 | 		   load a config file from untrusted sources before manually inspecting
25 | 		   the content of the file.
26 | 	*/
27 | 	class CfgNode : public yacs::CfgNode {
28 | 	public:
29 | 		/**
30 | 			Just like `yaml.load(open(filename))`, but inherit attributes from its
31 | 				`_BASE_`.
32 | 
33 | 			Args:
34 | 				filename (str): the file name of the current config. Will be used to
35 | 					find the base config file.
36 | 				allow_unsafe (bool): whether to allow loading the config file with
37 | 					`yaml.unsafe_load`.
38 | 
39 | 			Returns:
40 | 				(dict): the loaded yaml
41 | 		*/
42 | 		static YAML::Node load_yaml_with_base(const std::string &filename, bool allow_unsafe = false);
43 | 
44 | 	public:
45 | 		CfgNode(YAML::Node init_dict = {});
46 | 
47 | 		virtual void set(const std::string &name, YAML::Node val) override;
48 | 
49 | 		/**
50 | 			Merge configs from a given yaml file.
51 | 
52 | 			Args:
53 | 				cfg_filename: the file name of the yaml config.
54 | 				allow_unsafe: whether to allow loading the config file with
55 | 					`yaml.unsafe_load`.
56 | 		*/
57 | 		void merge_from_file(const std::string &cfg_filename, bool allow_unsafe = false);
58 | 
59 | 		// Forward the following calls to base, but with a check on the BASE_KEY.
60 | 		/**
61 | 			Args:
62 | 				cfg_other (CfgNode): configs to merge from.
63 | 		*/
64 | 		void merge_from_other_cfg(const CfgNode &cfg_other);
65 | 
66 | 		/**
67 | 			Args:
68 | 				cfg_list (list): list of configs to merge from.
69 | 		*/
70 | 		void merge_from_list(const OptionList &cfg_list);
71 | 	};
72 | }}
73 | 


--------------------------------------------------------------------------------
/Detectron2/fvcore/fvcore.cpp:
--------------------------------------------------------------------------------
 1 | #include "Base.h"
 2 | #include "fvcore.h"
 3 | 
 4 | using namespace std;
 5 | using namespace torch;
 6 | using namespace Detectron2;
 7 | 
 8 | ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 9 | 
10 | torch::Tensor fvcore::smooth_l1_loss(const torch::Tensor &input, const torch::Tensor &target,
11 | 	float beta, torch::Reduction::Reduction reduction) {
12 | 	Tensor loss;
13 | 	if (beta < 1e-5) {
14 | 		// if beta == 0, then torch.where will result in nan gradients when
15 | 		// the chain rule is applied due to pytorch implementation details
16 | 		// (the False branch "0.5 * n ** 2 / 0" has an incoming gradient of
17 | 		// zeros, rather than "no gradient"). To avoid this issue, we define
18 | 		// small values of beta to be exactly l1 loss.
19 | 		loss = torch::abs(input - target);
20 | 	}
21 | 	else {
22 | 		auto n = torch::abs(input - target);
23 | 		auto cond = n < beta;
24 | 		loss = torch::where(cond, 0.5 * (n * n) / beta, n - 0.5 * beta);
25 | 	}
26 | 
27 | 	if (reduction == Reduction::Mean) {
28 | 		loss = loss.mean();
29 | 	}
30 | 	else if (reduction == Reduction::Sum) {
31 | 		loss = loss.sum();
32 | 	}
33 | 	return loss;
34 | }
35 | 


--------------------------------------------------------------------------------
/Detectron2/fvcore/fvcore.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include <Detectron2/Detectron2.h>
 4 | 
 5 | namespace Detectron2 { namespace fvcore
 6 | {
 7 | 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 8 | 	// converted from fvcore/nn/smooth_l1_loss.py
 9 | 	
10 |     /**
11 | 		Smooth L1 loss defined in the Fast R-CNN paper as:
12 | 
13 | 					  | 0.5 * x ** 2 / beta   if abs(x) < beta
14 | 		smoothl1(x) = |
15 | 					  | abs(x) - 0.5 * beta   otherwise,
16 | 
17 | 		where x = input - target.
18 | 
19 | 		Smooth L1 loss is related to Huber loss, which is defined as:
20 | 
21 | 					| 0.5 * x ** 2                  if abs(x) < beta
22 | 		 huber(x) = |
23 | 					| beta * (abs(x) - 0.5 * beta)  otherwise
24 | 
25 | 		Smooth L1 loss is equal to huber(x) / beta. This leads to the following
26 | 		differences:
27 | 
28 | 		 - As beta -> 0, Smooth L1 loss converges to L1 loss, while Huber loss
29 | 		   converges to a constant 0 loss.
30 | 		 - As beta -> +inf, Smooth L1 converges to a constant 0 loss, while Huber loss
31 | 		   converges to L2 loss.
32 | 		 - For Smooth L1 loss, as beta varies, the L1 segment of the loss has a constant
33 | 		   slope of 1. For Huber loss, the slope of the L1 segment is beta.
34 | 
35 | 		Smooth L1 loss can be seen as exactly L1 loss, but with the abs(x) < beta
36 | 		portion replaced with a quadratic function such that at abs(x) = beta, its
37 | 		slope is 1. The quadratic segment smooths the L1 loss near x = 0.
38 | 
39 | 		Args:
40 | 			input (Tensor): input tensor of any shape
41 | 			target (Tensor): target value tensor with the same shape as input
42 | 			beta (float): L1 to L2 change point.
43 | 				For beta values < 1e-5, L1 loss is computed.
44 | 			reduction: 'none' | 'mean' | 'sum'
45 | 					 'none': No reduction will be applied to the output.
46 | 					 'mean': The output will be averaged.
47 | 					 'sum': The output will be summed.
48 | 
49 | 		Returns:
50 | 			The loss with the reduction option applied.
51 | 
52 | 		Note:
53 | 			PyTorch's builtin "Smooth L1 loss" implementation does not actually
54 | 			implement Smooth L1 loss, nor does it implement Huber loss. It implements
55 | 			the special case of both in which they are equal (beta=1).
56 | 			See: https://pytorch.org/docs/stable/nn.html#torch.nn.SmoothL1Loss.
57 | 	*/
58 | 	torch::Tensor smooth_l1_loss(const torch::Tensor &input, const torch::Tensor &target,
59 | 		float beta, torch::Reduction::Reduction reduction);
60 | }}
61 | 


--------------------------------------------------------------------------------
/Detectron2/trainDemo.cpp:
--------------------------------------------------------------------------------
 1 | #include "Base.h"
 2 | #include "trainDemo.h"
 3 | #include <Detectron2/Utils/DefaultTrainer.h>
 4 | #include <Detectron2/Data/BuiltinDataset.h>
 5 | using namespace Detectron2;
 6 | 
 7 | CfgNode Trainer::setup_cfg(const std::string& config_file, const CfgNode::OptionList& opts,
 8 | 	float confidence_threshold) {
 9 | 	// load config from file and command-line arguments
10 | 	auto cfg = CfgNode::get_cfg();
11 | 	cfg.merge_from_file(config_file);
12 | 	cfg.merge_from_list(opts);
13 | 	// Set score_threshold for builtin models
14 | 	cfg["MODEL.RETINANET.SCORE_THRESH_TEST"] = confidence_threshold;
15 | 	cfg["MODEL.ROI_HEADS.SCORE_THRESH_TEST"] = confidence_threshold;
16 | 	cfg["MODEL.PANOPTIC_FPN.COMBINE.INSTANCES_CONFIDENCE_THRESH"] = confidence_threshold;
17 | 	cfg.freeze();
18 | 	return cfg;
19 | }
20 | 
21 | void Trainer::start(const Options& options) {
22 | 	auto cfg = setup_cfg(options.config_file, options.opts, options.confidence_threshold);
23 | 	BuiltinDataset::register_all();
24 | 	Trainer m_Trainer(cfg);
25 | 	m_Trainer.run_train();
26 | 	//m_Trainer.
27 | }
28 | 
29 | Trainer::Trainer(const CfgNode& cfg, ColorMode instance_mode, bool parallel) :
30 | 	m_cpu_device(torch::kCPU), m_instance_mode(instance_mode), m_parallel(parallel)
31 | {
32 | 	auto name = CfgNode::parseTuple<std::string>(cfg["DATASETS.TEST"], { "__unused" })[0];
33 | 	m_metadata = MetadataCatalog::get(name);
34 | 	if (parallel) {
35 | 		//int num_gpu = torch::cuda::device_count();
36 | 		//m_TrainerBase = make_shared<TrainerBase>(cfg, num_gpu);
37 | 	}
38 | 	else {
39 | 		m_TrainerBase = std::make_shared<DefaultTrainer>(cfg);
40 | 	}
41 | }
42 | 
43 | void Trainer::run_train() {
44 | 	//VisImage vis_output;
45 | 	m_TrainerBase->train();
46 | 
47 | }


--------------------------------------------------------------------------------
/Detectron2/trainDemo.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | #include <Detectron2/Utils/TrainerBase.h>
 3 | #include <Detectron2/Structures/Instances.h>
 4 | #include <Detectron2/Data/MetadataCatalog.h>
 5 | namespace Detectron2
 6 | {
 7 | 	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 8 | 	// converted from tools/train_net.py
 9 | 
10 | 	class Trainer {
11 | 	public:
12 | 		struct Options {
13 | 			std::string config_file					// path to config file
14 | 				= "configs/quick_schedules/mask_rcnn_R_50_FPN_inference_acc_test.yaml";
15 | 			bool webcam = false;					// Take inputs from webcam
16 | 			std::vector<std::string> input;			// A list of space separated input images
17 | 													// or a single glob pattern such as 'directory/*.jpg'
18 | 			CfgNode::OptionList opts;				// Modify config options using the command-line 'KEY VALUE' pairs
19 | 			float confidence_threshold = 0.5; 		// Minimum score for instance predictions to be shown
20 | 		};
21 | 		static void start(const Options& options);
22 | 
23 | 		static CfgNode setup_cfg(const std::string& config_file, const CfgNode::OptionList& opts,
24 | 			float confidence_threshold);
25 | 
26 | 		void run_train();
27 | 	public:
28 | 		Trainer(const CfgNode& cfg, ColorMode instance_mode = ColorMode::kIMAGE, bool parallel = false);
29 | 	private:
30 | 		Metadata m_metadata;
31 | 		torch::Device m_cpu_device;
32 | 		ColorMode m_instance_mode;
33 | 		bool m_parallel;
34 | 		std::shared_ptr<TrainerBase> m_TrainerBase;
35 | 	};
36 | }


--------------------------------------------------------------------------------
/Detectron2_Project.vcxproj.filters:
--------------------------------------------------------------------------------
 1 | ﻿<?xml version="1.0" encoding="utf-8"?>
 2 | <Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
 3 |   <ItemGroup>
 4 |     <Filter Include="源文件">
 5 |       <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
 6 |       <Extensions>cpp;c;cc;cxx;c++;cppm;ixx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
 7 |     </Filter>
 8 |     <Filter Include="头文件">
 9 |       <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
10 |       <Extensions>h;hh;hpp;hxx;h++;hm;inl;inc;ipp;xsd</Extensions>
11 |     </Filter>
12 |     <Filter Include="资源文件">
13 |       <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
14 |       <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
15 |     </Filter>
16 |   </ItemGroup>
17 |   <ItemGroup>
18 |     <ClCompile Include="Detectron2_train.cpp">
19 |       <Filter>源文件</Filter>
20 |     </ClCompile>
21 |   </ItemGroup>
22 | </Project>


--------------------------------------------------------------------------------
/Detectron2_Project.vcxproj.user:
--------------------------------------------------------------------------------
1 | ﻿<?xml version="1.0" encoding="utf-8"?>
2 | <Project ToolsVersion="Current" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
3 |   <PropertyGroup />
4 | </Project>


--------------------------------------------------------------------------------
/Detectron2_test.cpp:
--------------------------------------------------------------------------------
 1 | #include <Detectron2/Detectron2Includes.h>
 2 | #include <string>
 3 | #include "assert.h" 
 4 | using namespace Detectron2;
 5 | using namespace std;
 6 | 
 7 | void demo() {
 8 | 	int selected = 0; // <-- change this number to choose different demo
 9 | 
10 | 	static const char* models[] = {
11 | 		//"COCO-Detection/faster_rcnn_R_50_FPN_3x/137851257/model_final_f6e8b1.pkl"
12 | 		//"COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl",
13 | 		"COCO-Detection/faster_rcnn_R_101_FPN_3x/137851257/model_final_f6e8b1.pkl"
14 | 		//"COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x/138205316/model_final_a3ec72.pkl",
15 | 		//"COCO-Keypoints/keypoint_rcnn_R_101_FPN_3x/138363331/model_final_997cc7.pkl",
16 | 		//"COCO-PanopticSegmentation/panoptic_fpn_R_101_3x/139514519/model_final_cafdb1.pkl"
17 | 	};
18 | 	string model = models[selected];
19 | 	auto tokens = tokenize(model, '/');
20 | 
21 | 	string configDir = "D:\\libtorch\\detectron2_project\\configs\\";
22 | 	VisualizationDemo::Options options;
23 | 	options.config_file = File::ComposeFilename(configDir, tokens[0] + "\\" + tokens[1] + ".yaml");
24 | 	vector<cv::String> m_file;
25 | 	cv::glob("F:\\data\\faster_rcnn\\images\\train\\",m_file);
26 | 	for (int i = 0;i< m_file.size();i++)
27 | 	{
28 | 		options.input.push_back(m_file[i]);
29 | 	}
30 | 	options.output = "D:\\libtorch\\detectron2_project\\output\\";
31 | 	//options.output = "predict";
32 | 	//options.opts = { {"MODEL.WEIGHTS", YAML::Node("detectron2://" + model) } };
33 | 	//try {
34 | 		VisualizationDemo::start(options);
35 | 	//}
36 | 	//catch (const std::exception& e) {
37 | 	//	const char* msg = e.what();
38 | 	//	std::cerr << msg;
39 | 	//}
40 | }
41 | 
42 | int main()
43 | {
44 | 	demo();
45 | }


--------------------------------------------------------------------------------
/Detectron2_train.cpp:
--------------------------------------------------------------------------------
 1 | #include <Detectron2/Detectron2Includes.h>
 2 | #include <string>
 3 | #include "assert.h" 
 4 | using namespace Detectron2;
 5 | using namespace std;
 6 | 
 7 | void demo() {
 8 | 	int selected = 0; // <-- change this number to choose different demo
 9 | 
10 | 	static const char* models[] = {
11 | 		//"COCO-Detection/faster_rcnn_R_50_FPN_3x/137851257/model_final_f6e8b1.pkl"
12 | 		//"COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl",
13 | 		"COCO-Detection/faster_rcnn_R_101_FPN_3x/137851257/model_final_f6e8b1.pkl"
14 | 		//"COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x/138205316/model_final_a3ec72.pkl",
15 | 		//"COCO-Keypoints/keypoint_rcnn_R_101_FPN_3x/138363331/model_final_997cc7.pkl",
16 | 		//"COCO-PanopticSegmentation/panoptic_fpn_R_101_3x/139514519/model_final_cafdb1.pkl"
17 | 	};
18 | 	string model = models[selected];
19 | 	auto tokens = tokenize(model, '/');
20 | 
21 | 	string configDir = "D:\\libtorch\\detectron2_project\\configs\\";
22 | 	Trainer::Options options;
23 | 	options.config_file = File::ComposeFilename(configDir, tokens[0] + "\\" + tokens[1] + ".yaml");
24 | 
25 | 	//options.output = "predict";
26 | 	options.opts = { {"MODEL.WEIGHTS", YAML::Node("detectron2://" + model) } };
27 | 	//try {
28 | 		Trainer::start(options);
29 | 	//}
30 | 	//catch (const std::exception& e) {
31 | 	//	const char* msg = e.what();
32 | 	//	std::cerr << msg;
33 | 	//}
34 | }
35 | 
36 | int main()
37 | {
38 | 	demo();
39 | }


--------------------------------------------------------------------------------
/NetLib2/NetLib2.vcxproj.filters:
--------------------------------------------------------------------------------
 1 | ﻿<?xml version="1.0" encoding="utf-8"?>
 2 | <Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
 3 |   <ItemGroup>
 4 |     <Filter Include="detectron2">
 5 |       <UniqueIdentifier>{c57bc6e1-7389-4540-b7b0-083317f9900d}</UniqueIdentifier>
 6 |     </Filter>
 7 |   </ItemGroup>
 8 |   <ItemGroup>
 9 |     <CudaCompile Include="..\Detectron2\detectron2\box_iou_rotated\box_iou_rotated_cuda.cu">
10 |       <Filter>detectron2</Filter>
11 |     </CudaCompile>
12 |     <CudaCompile Include="..\Detectron2\detectron2\deformable\deform_conv_cuda.cu">
13 |       <Filter>detectron2</Filter>
14 |     </CudaCompile>
15 |     <CudaCompile Include="..\Detectron2\detectron2\deformable\deform_conv_cuda_kernel.cu">
16 |       <Filter>detectron2</Filter>
17 |     </CudaCompile>
18 |     <CudaCompile Include="..\Detectron2\detectron2\nms_rotated\nms_rotated_cuda.cu">
19 |       <Filter>detectron2</Filter>
20 |     </CudaCompile>
21 |     <CudaCompile Include="..\Detectron2\detectron2\ROIAlign\ROIAlign_cuda.cu">
22 |       <Filter>detectron2</Filter>
23 |     </CudaCompile>
24 |     <CudaCompile Include="..\Detectron2\detectron2\ROIAlignRotated\ROIAlignRotated_cuda.cu">
25 |       <Filter>detectron2</Filter>
26 |     </CudaCompile>
27 |     <CudaCompile Include="..\Detectron2\detectron2\cuda_version.cu">
28 |       <Filter>detectron2</Filter>
29 |     </CudaCompile>
30 |     <CudaCompile Include="..\Detectron2\detectron2\nms\nms_cuda.cu">
31 |       <Filter>detectron2</Filter>
32 |     </CudaCompile>
33 |     <CudaCompile Include="..\Detectron2\detectron2\ROIPool\ROIPool_cuda.cu">
34 |       <Filter>detectron2</Filter>
35 |     </CudaCompile>
36 |   </ItemGroup>
37 | </Project>


--------------------------------------------------------------------------------
/NetLib2/NetLib2.vcxproj.user:
--------------------------------------------------------------------------------
1 | ﻿<?xml version="1.0" encoding="utf-8"?>
2 | <Project ToolsVersion="15.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
3 |   <PropertyGroup />
4 | </Project>


--------------------------------------------------------------------------------
/configs/Base-RCNN-C4.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   RPN:
 4 |     PRE_NMS_TOPK_TEST: 6000
 5 |     POST_NMS_TOPK_TEST: 1000
 6 |   ROI_HEADS:
 7 |     NAME: "Res5ROIHeads"
 8 | DATASETS:
 9 |   TRAIN: ("coco_2017_train",)
10 |   TEST: ("coco_2017_val",)
11 | SOLVER:
12 |   IMS_PER_BATCH: 16
13 |   BASE_LR: 0.02
14 |   STEPS: (60000, 80000)
15 |   MAX_ITER: 90000
16 | INPUT:
17 |   MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
18 | VERSION: 2
19 | 


--------------------------------------------------------------------------------
/configs/Base-RCNN-DilatedC5.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   RESNETS:
 4 |     OUT_FEATURES: ["res5"]
 5 |     RES5_DILATION: 2
 6 |   RPN:
 7 |     IN_FEATURES: ["res5"]
 8 |     PRE_NMS_TOPK_TEST: 6000
 9 |     POST_NMS_TOPK_TEST: 1000
10 |   ROI_HEADS:
11 |     NAME: "StandardROIHeads"
12 |     IN_FEATURES: ["res5"]
13 |   ROI_BOX_HEAD:
14 |     NAME: "FastRCNNConvFCHead"
15 |     NUM_FC: 2
16 |     POOLER_RESOLUTION: 7
17 |   ROI_MASK_HEAD:
18 |     NAME: "MaskRCNNConvUpsampleHead"
19 |     NUM_CONV: 4
20 |     POOLER_RESOLUTION: 14
21 | DATASETS:
22 |   TRAIN: ("coco_2017_train",)
23 |   TEST: ("coco_2017_val",)
24 | SOLVER:
25 |   IMS_PER_BATCH: 16
26 |   BASE_LR: 0.02
27 |   STEPS: (60000, 80000)
28 |   MAX_ITER: 90000
29 | INPUT:
30 |   MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
31 | VERSION: 2
32 | 


--------------------------------------------------------------------------------
/configs/Base-RCNN-FPN.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "GeneralizedRCNN"
 3 |   BACKBONE:
 4 |     NAME: "build_resnet_fpn_backbone"
 5 |   RESNETS:
 6 |     OUT_FEATURES: ["res2", "res3", "res4", "res5"]
 7 |   FPN:
 8 |     IN_FEATURES: ["res2", "res3", "res4", "res5"]
 9 |   ANCHOR_GENERATOR:
10 |     SIZES: [[32], [64], [128], [256], [512]]  # One size for each in feature map
11 |     ASPECT_RATIOS: [[0.5, 1.0, 2.0]]  # Three aspect ratios (same for all in feature maps)
12 |   RPN:
13 |     IN_FEATURES: ["p2", "p3", "p4", "p5", "p6"]
14 |     PRE_NMS_TOPK_TRAIN: 2000  # Per FPN level
15 |     PRE_NMS_TOPK_TEST: 1000  # Per FPN level
16 |     # Detectron1 uses 2000 proposals per-batch,
17 |     # (See "modeling/rpn/rpn_outputs.py" for details of this legacy issue)
18 |     # which is approximately 1000 proposals per-image since the default batch size for FPN is 2.
19 |     POST_NMS_TOPK_TRAIN: 1000
20 |     POST_NMS_TOPK_TEST: 1000
21 |   ROI_HEADS:
22 |     NAME: "StandardROIHeads"
23 |     IN_FEATURES: ["p2", "p3", "p4", "p5"]
24 |     NUM_CLASSES: 1
25 |   ROI_BOX_HEAD:
26 |     NAME: "FastRCNNConvFCHead"
27 |     NUM_FC: 2
28 |     POOLER_RESOLUTION: 7
29 |     CLS_AGNOSTIC_BBOX_REG: True
30 |   ROI_MASK_HEAD:
31 |     NAME: "MaskRCNNConvUpsampleHead"
32 |     NUM_CONV: 4
33 |     POOLER_RESOLUTION: 14
34 | DATASETS:
35 |   TRAIN: ("coco_2017_train",)
36 |   TEST: ("coco_2017_val",)
37 | SOLVER:
38 |   IMS_PER_BATCH: 4
39 |   BASE_LR: 0.002
40 |   STEPS: (60000, 80000)
41 |   MAX_ITER: 90000
42 | INPUT:
43 |   MAX_SIZE_TRAIN: 608
44 |   MAX_SIZE_TEST: 608
45 |   MIN_SIZE_TRAIN: (608, 608)
46 |   MIN_SIZE_TEST: 608
47 | 


--------------------------------------------------------------------------------
/configs/Base-RetinaNet.yaml:
--------------------------------------------------------------------------------
 1 | MODEL:
 2 |   META_ARCHITECTURE: "RetinaNet"
 3 |   BACKBONE:
 4 |     NAME: "build_retinanet_resnet_fpn_backbone"
 5 |   RESNETS:
 6 |     OUT_FEATURES: ["res3", "res4", "res5"]
 7 |   ANCHOR_GENERATOR:
 8 |     SIZES: !!python/object/apply:eval ["[[x, x * 2**(1.0/3), x * 2**(2.0/3) ] for x in [32, 64, 128, 256, 512 ]]"]
 9 |   FPN:
10 |     IN_FEATURES: ["res3", "res4", "res5"]
11 |   RETINANET:
12 |     IOU_THRESHOLDS: [0.4, 0.5]
13 |     IOU_LABELS: [0, -1, 1]
14 |     SMOOTH_L1_LOSS_BETA: 0.0
15 | DATASETS:
16 |   TRAIN: ("coco_2017_train",)
17 |   TEST: ("coco_2017_val",)
18 | SOLVER:
19 |   IMS_PER_BATCH: 16
20 |   BASE_LR: 0.01  # Note that RetinaNet uses a different default learning rate
21 |   STEPS: (60000, 80000)
22 |   MAX_ITER: 90000
23 | INPUT:
24 |   MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
25 | VERSION: 2
26 | 


--------------------------------------------------------------------------------
/configs/COCO-Detection/fast_rcnn_R_50_FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   MASK_ON: False
 5 |   LOAD_PROPOSALS: True
 6 |   RESNETS:
 7 |     DEPTH: 50
 8 |   PROPOSAL_GENERATOR:
 9 |     NAME: "PrecomputedProposals"
10 | DATASETS:
11 |   TRAIN: ("coco_2017_train",)
12 |   PROPOSAL_FILES_TRAIN: ("detectron2://COCO-Detection/rpn_R_50_FPN_1x/137258492/coco_2017_train_box_proposals_21bc3a.pkl", )
13 |   TEST: ("coco_2017_val",)
14 |   PROPOSAL_FILES_TEST: ("detectron2://COCO-Detection/rpn_R_50_FPN_1x/137258492/coco_2017_val_box_proposals_ee0dad.pkl", )
15 | DATALOADER:
16 |   # proposals are part of the dataset_dicts, and take a lot of RAM
17 |   NUM_WORKERS: 2
18 | 


--------------------------------------------------------------------------------
/configs/COCO-Detection/faster_rcnn_R_101_C4_3x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-C4.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
 4 |   MASK_ON: False
 5 |   RESNETS:
 6 |     DEPTH: 101
 7 | SOLVER:
 8 |   STEPS: (210000, 250000)
 9 |   MAX_ITER: 270000
10 | 


--------------------------------------------------------------------------------
/configs/COCO-Detection/faster_rcnn_R_101_DC5_3x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-DilatedC5.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
 4 |   MASK_ON: False
 5 |   RESNETS:
 6 |     DEPTH: 101
 7 | SOLVER:
 8 |   STEPS: (210000, 250000)
 9 |   MAX_ITER: 270000
10 | 


--------------------------------------------------------------------------------
/configs/COCO-Detection/faster_rcnn_R_101_FPN_3x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "F:\\data\\centernetv2_data\\weight\\epoch_300.pt"
 4 |   MASK_ON: False
 5 |   RESNETS:
 6 |     DEPTH: 101
 7 | DATASETS:
 8 |   TRAIN: ("F:\\data\\centernetv2_data\\annotations\\train.json","F:\\data\\centernetv2_data\\images\\train\\",)
 9 |   TEST: ("coco_2017_val",)
10 | SOLVER:
11 |   STEPS: (10000,14000,18000,)
12 |   MAX_ITER: 20000
13 |   WARMUP_ITERS: 4000
14 |   WARMUP_FACTOR: 0.001
15 | VERSION: 2
16 | 


--------------------------------------------------------------------------------
/configs/COCO-Detection/faster_rcnn_R_50_C4_1x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../Base-RCNN-C4.yaml"
2 | MODEL:
3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
4 |   MASK_ON: False
5 |   RESNETS:
6 |     DEPTH: 50
7 | 


--------------------------------------------------------------------------------
/configs/COCO-Detection/faster_rcnn_R_50_C4_3x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-C4.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   MASK_ON: False
 5 |   RESNETS:
 6 |     DEPTH: 50
 7 | SOLVER:
 8 |   STEPS: (210000, 250000)
 9 |   MAX_ITER: 270000
10 | 


--------------------------------------------------------------------------------
/configs/COCO-Detection/faster_rcnn_R_50_DC5_1x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../Base-RCNN-DilatedC5.yaml"
2 | MODEL:
3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
4 |   MASK_ON: False
5 |   RESNETS:
6 |     DEPTH: 50
7 | 


--------------------------------------------------------------------------------
/configs/COCO-Detection/faster_rcnn_R_50_DC5_3x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-DilatedC5.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   MASK_ON: False
 5 |   RESNETS:
 6 |     DEPTH: 50
 7 | SOLVER:
 8 |   STEPS: (210000, 250000)
 9 |   MAX_ITER: 270000
10 | 


--------------------------------------------------------------------------------
/configs/COCO-Detection/faster_rcnn_R_50_FPN_1x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../Base-RCNN-FPN.yaml"
2 | MODEL:
3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
4 |   MASK_ON: False
5 |   RESNETS:
6 |     DEPTH: 50
7 | 


--------------------------------------------------------------------------------
/configs/COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   MASK_ON: False
 5 |   RESNETS:
 6 |     DEPTH: 50
 7 | SOLVER:
 8 |   STEPS: (210000, 250000)
 9 |   MAX_ITER: 270000
10 | 


--------------------------------------------------------------------------------
/configs/COCO-Detection/faster_rcnn_X_101_32x8d_FPN_3x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   MASK_ON: False
 4 |   WEIGHTS: "detectron2://ImageNetPretrained/FAIR/X-101-32x8d.pkl"
 5 |   PIXEL_STD: [57.375, 57.120, 58.395]
 6 |   RESNETS:
 7 |     STRIDE_IN_1X1: False  # this is a C2 model
 8 |     NUM_GROUPS: 32
 9 |     WIDTH_PER_GROUP: 8
10 |     DEPTH: 101
11 | SOLVER:
12 |   STEPS: (210000, 250000)
13 |   MAX_ITER: 270000
14 | 


--------------------------------------------------------------------------------
/configs/COCO-Detection/retinanet_R_101_FPN_3x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../Base-RetinaNet.yaml"
2 | MODEL:
3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
4 |   RESNETS:
5 |     DEPTH: 101
6 | SOLVER:
7 |   STEPS: (210000, 250000)
8 |   MAX_ITER: 270000
9 | 


--------------------------------------------------------------------------------
/configs/COCO-Detection/retinanet_R_50_FPN_1x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../Base-RetinaNet.yaml"
2 | MODEL:
3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
4 |   RESNETS:
5 |     DEPTH: 50
6 | 


--------------------------------------------------------------------------------
/configs/COCO-Detection/retinanet_R_50_FPN_3x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../Base-RetinaNet.yaml"
2 | MODEL:
3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
4 |   RESNETS:
5 |     DEPTH: 50
6 | SOLVER:
7 |   STEPS: (210000, 250000)
8 |   MAX_ITER: 270000
9 | 


--------------------------------------------------------------------------------
/configs/COCO-Detection/rpn_R_50_C4_1x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-C4.yaml"
 2 | MODEL:
 3 |   META_ARCHITECTURE: "ProposalNetwork"
 4 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 5 |   MASK_ON: False
 6 |   RESNETS:
 7 |     DEPTH: 50
 8 |   RPN:
 9 |     PRE_NMS_TOPK_TEST: 12000
10 |     POST_NMS_TOPK_TEST: 2000
11 | 


--------------------------------------------------------------------------------
/configs/COCO-Detection/rpn_R_50_FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   META_ARCHITECTURE: "ProposalNetwork"
 4 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 5 |   MASK_ON: False
 6 |   RESNETS:
 7 |     DEPTH: 50
 8 |   RPN:
 9 |     POST_NMS_TOPK_TEST: 2000
10 | 


--------------------------------------------------------------------------------
/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_C4_3x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-C4.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
 4 |   MASK_ON: True
 5 |   RESNETS:
 6 |     DEPTH: 101
 7 | SOLVER:
 8 |   STEPS: (210000, 250000)
 9 |   MAX_ITER: 270000
10 | 


--------------------------------------------------------------------------------
/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_DC5_3x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-DilatedC5.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
 4 |   MASK_ON: True
 5 |   RESNETS:
 6 |     DEPTH: 101
 7 | SOLVER:
 8 |   STEPS: (210000, 250000)
 9 |   MAX_ITER: 270000
10 | 


--------------------------------------------------------------------------------
/configs/COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
 4 |   MASK_ON: True
 5 |   RESNETS:
 6 |     DEPTH: 101
 7 | SOLVER:
 8 |   STEPS: (210000, 250000)
 9 |   MAX_ITER: 270000
10 | 


--------------------------------------------------------------------------------
/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_1x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../Base-RCNN-C4.yaml"
2 | MODEL:
3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
4 |   MASK_ON: True
5 |   RESNETS:
6 |     DEPTH: 50
7 | 


--------------------------------------------------------------------------------
/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_C4_3x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-C4.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   MASK_ON: True
 5 |   RESNETS:
 6 |     DEPTH: 50
 7 | SOLVER:
 8 |   STEPS: (210000, 250000)
 9 |   MAX_ITER: 270000
10 | 


--------------------------------------------------------------------------------
/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_1x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../Base-RCNN-DilatedC5.yaml"
2 | MODEL:
3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
4 |   MASK_ON: True
5 |   RESNETS:
6 |     DEPTH: 50
7 | 


--------------------------------------------------------------------------------
/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_3x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-DilatedC5.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   MASK_ON: True
 5 |   RESNETS:
 6 |     DEPTH: 50
 7 | SOLVER:
 8 |   STEPS: (210000, 250000)
 9 |   MAX_ITER: 270000
10 | 


--------------------------------------------------------------------------------
/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../Base-RCNN-FPN.yaml"
2 | MODEL:
3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
4 |   MASK_ON: True
5 |   RESNETS:
6 |     DEPTH: 50
7 | 


--------------------------------------------------------------------------------
/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_1x_giou.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   MASK_ON: True
 5 |   RESNETS:
 6 |     DEPTH: 50
 7 |   RPN:
 8 |     BBOX_REG_LOSS_TYPE: "giou"
 9 |     BBOX_REG_LOSS_WEIGHT: 2.0
10 |   ROI_BOX_HEAD:
11 |     BBOX_REG_LOSS_TYPE: "giou"
12 |     BBOX_REG_LOSS_WEIGHT: 10.0
13 | 


--------------------------------------------------------------------------------
/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   MASK_ON: True
 5 |   RESNETS:
 6 |     DEPTH: 50
 7 | SOLVER:
 8 |   STEPS: (210000, 250000)
 9 |   MAX_ITER: 270000
10 | 


--------------------------------------------------------------------------------
/configs/COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   MASK_ON: True
 4 |   WEIGHTS: "detectron2://ImageNetPretrained/FAIR/X-101-32x8d.pkl"
 5 |   PIXEL_STD: [57.375, 57.120, 58.395]
 6 |   RESNETS:
 7 |     STRIDE_IN_1X1: False  # this is a C2 model
 8 |     NUM_GROUPS: 32
 9 |     WIDTH_PER_GROUP: 8
10 |     DEPTH: 101
11 | SOLVER:
12 |   STEPS: (210000, 250000)
13 |   MAX_ITER: 270000
14 | 


--------------------------------------------------------------------------------
/configs/COCO-Keypoints/Base-Keypoint-RCNN-FPN.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   KEYPOINT_ON: True
 4 |   ROI_HEADS:
 5 |     NUM_CLASSES: 1
 6 |   ROI_BOX_HEAD:
 7 |     SMOOTH_L1_BETA: 0.5  # Keypoint AP degrades (though box AP improves) when using plain L1 loss
 8 |   RPN:
 9 |     # Detectron1 uses 2000 proposals per-batch, but this option is per-image in detectron2.
10 |     # 1000 proposals per-image is found to hurt box AP.
11 |     # Therefore we increase it to 1500 per-image.
12 |     POST_NMS_TOPK_TRAIN: 1500
13 | DATASETS:
14 |   TRAIN: ("keypoints_coco_2017_train",)
15 |   TEST: ("keypoints_coco_2017_val",)
16 | 


--------------------------------------------------------------------------------
/configs/COCO-Keypoints/keypoint_rcnn_R_101_FPN_3x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "Base-Keypoint-RCNN-FPN.yaml"
2 | MODEL:
3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
4 |   RESNETS:
5 |     DEPTH: 101
6 | SOLVER:
7 |   STEPS: (210000, 250000)
8 |   MAX_ITER: 270000
9 | 


--------------------------------------------------------------------------------
/configs/COCO-Keypoints/keypoint_rcnn_R_50_FPN_1x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "Base-Keypoint-RCNN-FPN.yaml"
2 | MODEL:
3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
4 |   RESNETS:
5 |     DEPTH: 50
6 | 


--------------------------------------------------------------------------------
/configs/COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "Base-Keypoint-RCNN-FPN.yaml"
2 | MODEL:
3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
4 |   RESNETS:
5 |     DEPTH: 50
6 | SOLVER:
7 |   STEPS: (210000, 250000)
8 |   MAX_ITER: 270000
9 | 


--------------------------------------------------------------------------------
/configs/COCO-Keypoints/keypoint_rcnn_X_101_32x8d_FPN_3x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "Base-Keypoint-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/FAIR/X-101-32x8d.pkl"
 4 |   PIXEL_STD: [57.375, 57.120, 58.395]
 5 |   RESNETS:
 6 |     STRIDE_IN_1X1: False  # this is a C2 model
 7 |     NUM_GROUPS: 32
 8 |     WIDTH_PER_GROUP: 8
 9 |     DEPTH: 101
10 | SOLVER:
11 |   STEPS: (210000, 250000)
12 |   MAX_ITER: 270000
13 | 


--------------------------------------------------------------------------------
/configs/COCO-PanopticSegmentation/Base-Panoptic-FPN.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   META_ARCHITECTURE: "PanopticFPN"
 4 |   MASK_ON: True
 5 |   SEM_SEG_HEAD:
 6 |     LOSS_WEIGHT: 0.5
 7 | DATASETS:
 8 |   TRAIN: ("coco_2017_train_panoptic_separated",)
 9 |   TEST: ("coco_2017_val_panoptic_separated",)
10 | DATALOADER:
11 |   FILTER_EMPTY_ANNOTATIONS: False
12 | 


--------------------------------------------------------------------------------
/configs/COCO-PanopticSegmentation/panoptic_fpn_R_101_3x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "Base-Panoptic-FPN.yaml"
2 | MODEL:
3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
4 |   RESNETS:
5 |     DEPTH: 101
6 | SOLVER:
7 |   STEPS: (210000, 250000)
8 |   MAX_ITER: 270000
9 | 


--------------------------------------------------------------------------------
/configs/COCO-PanopticSegmentation/panoptic_fpn_R_50_1x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "Base-Panoptic-FPN.yaml"
2 | MODEL:
3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
4 |   RESNETS:
5 |     DEPTH: 50
6 | 


--------------------------------------------------------------------------------
/configs/COCO-PanopticSegmentation/panoptic_fpn_R_50_3x.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "Base-Panoptic-FPN.yaml"
2 | MODEL:
3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
4 |   RESNETS:
5 |     DEPTH: 50
6 | SOLVER:
7 |   STEPS: (210000, 250000)
8 |   MAX_ITER: 270000
9 | 


--------------------------------------------------------------------------------
/configs/Cityscapes/mask_rcnn_R_50_FPN.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   # WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   # For better, more stable performance initialize from COCO
 5 |   WEIGHTS: "detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl"
 6 |   MASK_ON: True
 7 |   ROI_HEADS:
 8 |     NUM_CLASSES: 8
 9 | # This is similar to the setting used in Mask R-CNN paper, Appendix A
10 | # But there are some differences, e.g., we did not initialize the output
11 | # layer using the corresponding classes from COCO
12 | INPUT:
13 |   MIN_SIZE_TRAIN: (800, 832, 864, 896, 928, 960, 992, 1024)
14 |   MIN_SIZE_TRAIN_SAMPLING: "choice"
15 |   MIN_SIZE_TEST: 1024
16 |   MAX_SIZE_TRAIN: 2048
17 |   MAX_SIZE_TEST: 2048
18 | DATASETS:
19 |   TRAIN: ("cityscapes_fine_instance_seg_train",)
20 |   TEST: ("cityscapes_fine_instance_seg_val",)
21 | SOLVER:
22 |   BASE_LR: 0.01
23 |   STEPS: (18000,)
24 |   MAX_ITER: 24000
25 |   IMS_PER_BATCH: 8
26 | TEST:
27 |   EVAL_PERIOD: 8000
28 | 


--------------------------------------------------------------------------------
/configs/Detectron1-Comparisons/faster_rcnn_R_50_FPN_noaug_1x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   MASK_ON: False
 5 |   RESNETS:
 6 |     DEPTH: 50
 7 |   # Detectron1 uses smooth L1 loss with some magic beta values.
 8 |   # The defaults are changed to L1 loss in Detectron2.
 9 |   RPN:
10 |     SMOOTH_L1_BETA: 0.1111
11 |   ROI_BOX_HEAD:
12 |     SMOOTH_L1_BETA: 1.0
13 |     POOLER_SAMPLING_RATIO: 2
14 |     POOLER_TYPE: "ROIAlign"
15 | INPUT:
16 |   # no scale augmentation
17 |   MIN_SIZE_TRAIN: (800, )
18 | 


--------------------------------------------------------------------------------
/configs/Detectron1-Comparisons/keypoint_rcnn_R_50_FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   KEYPOINT_ON: True
 5 |   RESNETS:
 6 |     DEPTH: 50
 7 |   ROI_HEADS:
 8 |     NUM_CLASSES: 1
 9 |   ROI_KEYPOINT_HEAD:
10 |     POOLER_RESOLUTION: 14
11 |     POOLER_SAMPLING_RATIO: 2
12 |     POOLER_TYPE: "ROIAlign"
13 |   # Detectron1 uses smooth L1 loss with some magic beta values.
14 |   # The defaults are changed to L1 loss in Detectron2.
15 |   ROI_BOX_HEAD:
16 |     SMOOTH_L1_BETA: 1.0
17 |     POOLER_SAMPLING_RATIO: 2
18 |     POOLER_TYPE: "ROIAlign"
19 |   RPN:
20 |     SMOOTH_L1_BETA: 0.1111
21 |     # Detectron1 uses 2000 proposals per-batch, but this option is per-image in detectron2
22 |     # 1000 proposals per-image is found to hurt box AP.
23 |     # Therefore we increase it to 1500 per-image.
24 |     POST_NMS_TOPK_TRAIN: 1500
25 | DATASETS:
26 |   TRAIN: ("keypoints_coco_2017_train",)
27 |   TEST: ("keypoints_coco_2017_val",)
28 | 


--------------------------------------------------------------------------------
/configs/Detectron1-Comparisons/mask_rcnn_R_50_FPN_noaug_1x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   MASK_ON: True
 5 |   RESNETS:
 6 |     DEPTH: 50
 7 |   # Detectron1 uses smooth L1 loss with some magic beta values.
 8 |   # The defaults are changed to L1 loss in Detectron2.
 9 |   RPN:
10 |     SMOOTH_L1_BETA: 0.1111
11 |   ROI_BOX_HEAD:
12 |     SMOOTH_L1_BETA: 1.0
13 |     POOLER_SAMPLING_RATIO: 2
14 |     POOLER_TYPE: "ROIAlign"
15 |   ROI_MASK_HEAD:
16 |     POOLER_SAMPLING_RATIO: 2
17 |     POOLER_TYPE: "ROIAlign"
18 | INPUT:
19 |   # no scale augmentation
20 |   MIN_SIZE_TRAIN: (800, )
21 | 


--------------------------------------------------------------------------------
/configs/LVISv0.5-InstanceSegmentation/mask_rcnn_R_101_FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
 4 |   MASK_ON: True
 5 |   RESNETS:
 6 |     DEPTH: 101
 7 |   ROI_HEADS:
 8 |     NUM_CLASSES: 1230
 9 |     SCORE_THRESH_TEST: 0.0001
10 | INPUT:
11 |   MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
12 | DATASETS:
13 |   TRAIN: ("lvis_v0.5_train",)
14 |   TEST: ("lvis_v0.5_val",)
15 | TEST:
16 |   DETECTIONS_PER_IMAGE: 300  # LVIS allows up to 300
17 | DATALOADER:
18 |   SAMPLER_TRAIN: "RepeatFactorTrainingSampler"
19 |   REPEAT_THRESHOLD: 0.001
20 | 


--------------------------------------------------------------------------------
/configs/LVISv0.5-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   MASK_ON: True
 5 |   RESNETS:
 6 |     DEPTH: 50
 7 |   ROI_HEADS:
 8 |     NUM_CLASSES: 1230
 9 |     SCORE_THRESH_TEST: 0.0001
10 | INPUT:
11 |   MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
12 | DATASETS:
13 |   TRAIN: ("lvis_v0.5_train",)
14 |   TEST: ("lvis_v0.5_val",)
15 | TEST:
16 |   DETECTIONS_PER_IMAGE: 300  # LVIS allows up to 300
17 | DATALOADER:
18 |   SAMPLER_TRAIN: "RepeatFactorTrainingSampler"
19 |   REPEAT_THRESHOLD: 0.001
20 | 


--------------------------------------------------------------------------------
/configs/LVISv0.5-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/FAIR/X-101-32x8d.pkl"
 4 |   PIXEL_STD: [57.375, 57.120, 58.395]
 5 |   MASK_ON: True
 6 |   RESNETS:
 7 |     STRIDE_IN_1X1: False  # this is a C2 model
 8 |     NUM_GROUPS: 32
 9 |     WIDTH_PER_GROUP: 8
10 |     DEPTH: 101
11 |   ROI_HEADS:
12 |     NUM_CLASSES: 1230
13 |     SCORE_THRESH_TEST: 0.0001
14 | INPUT:
15 |   MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
16 | DATASETS:
17 |   TRAIN: ("lvis_v0.5_train",)
18 |   TEST: ("lvis_v0.5_val",)
19 | TEST:
20 |   DETECTIONS_PER_IMAGE: 300  # LVIS allows up to 300
21 | DATALOADER:
22 |   SAMPLER_TRAIN: "RepeatFactorTrainingSampler"
23 |   REPEAT_THRESHOLD: 0.001
24 | 


--------------------------------------------------------------------------------
/configs/LVISv1-InstanceSegmentation/mask_rcnn_R_101_FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-101.pkl"
 4 |   MASK_ON: True
 5 |   RESNETS:
 6 |     DEPTH: 101
 7 |   ROI_HEADS:
 8 |     NUM_CLASSES: 1203
 9 |     SCORE_THRESH_TEST: 0.0001
10 | INPUT:
11 |   MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
12 | DATASETS:
13 |   TRAIN: ("lvis_v1_train",)
14 |   TEST: ("lvis_v1_val",)
15 | TEST:
16 |   DETECTIONS_PER_IMAGE: 300  # LVIS allows up to 300
17 | SOLVER:
18 |   STEPS: (120000, 160000)
19 |   MAX_ITER: 180000  # 180000 * 16 / 100000 ~ 28.8 epochs
20 | DATALOADER:
21 |   SAMPLER_TRAIN: "RepeatFactorTrainingSampler"
22 |   REPEAT_THRESHOLD: 0.001
23 | 


--------------------------------------------------------------------------------
/configs/LVISv1-InstanceSegmentation/mask_rcnn_R_50_FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   MASK_ON: True
 5 |   RESNETS:
 6 |     DEPTH: 50
 7 |   ROI_HEADS:
 8 |     NUM_CLASSES: 1203
 9 |     SCORE_THRESH_TEST: 0.0001
10 | INPUT:
11 |   MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
12 | DATASETS:
13 |   TRAIN: ("lvis_v1_train",)
14 |   TEST: ("lvis_v1_val",)
15 | TEST:
16 |   DETECTIONS_PER_IMAGE: 300  # LVIS allows up to 300
17 | SOLVER:
18 |   STEPS: (120000, 160000)
19 |   MAX_ITER: 180000  # 180000 * 16 / 100000 ~ 28.8 epochs
20 | DATALOADER:
21 |   SAMPLER_TRAIN: "RepeatFactorTrainingSampler"
22 |   REPEAT_THRESHOLD: 0.001
23 | 


--------------------------------------------------------------------------------
/configs/LVISv1-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/FAIR/X-101-32x8d.pkl"
 4 |   PIXEL_STD: [57.375, 57.120, 58.395]
 5 |   MASK_ON: True
 6 |   RESNETS:
 7 |     STRIDE_IN_1X1: False  # this is a C2 model
 8 |     NUM_GROUPS: 32
 9 |     WIDTH_PER_GROUP: 8
10 |     DEPTH: 101
11 |   ROI_HEADS:
12 |     NUM_CLASSES: 1203
13 |     SCORE_THRESH_TEST: 0.0001
14 | INPUT:
15 |   MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
16 | DATASETS:
17 |   TRAIN: ("lvis_v1_train",)
18 |   TEST: ("lvis_v1_val",)
19 | SOLVER:
20 |   STEPS: (120000, 160000)
21 |   MAX_ITER: 180000  # 180000 * 16 / 100000 ~ 28.8 epochs
22 | TEST:
23 |   DETECTIONS_PER_IMAGE: 300  # LVIS allows up to 300
24 | DATALOADER:
25 |   SAMPLER_TRAIN: "RepeatFactorTrainingSampler"
26 |   REPEAT_THRESHOLD: 0.001
27 | 


--------------------------------------------------------------------------------
/configs/Misc/cascade_mask_rcnn_R_50_FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   MASK_ON: True
 5 |   RESNETS:
 6 |     DEPTH: 50
 7 |   ROI_HEADS:
 8 |     NAME: CascadeROIHeads
 9 |   ROI_BOX_HEAD:
10 |     CLS_AGNOSTIC_BBOX_REG: True
11 |   RPN:
12 |     POST_NMS_TOPK_TRAIN: 2000
13 | 


--------------------------------------------------------------------------------
/configs/Misc/cascade_mask_rcnn_R_50_FPN_3x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   MASK_ON: True
 5 |   RESNETS:
 6 |     DEPTH: 50
 7 |   ROI_HEADS:
 8 |     NAME: CascadeROIHeads
 9 |   ROI_BOX_HEAD:
10 |     CLS_AGNOSTIC_BBOX_REG: True
11 |   RPN:
12 |     POST_NMS_TOPK_TRAIN: 2000
13 | SOLVER:
14 |   STEPS: (210000, 250000)
15 |   MAX_ITER: 270000
16 | 


--------------------------------------------------------------------------------
/configs/Misc/cascade_mask_rcnn_X_152_32x8d_FPN_IN5k_gn_dconv.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   MASK_ON: True
 4 |   WEIGHTS: "catalog://ImageNetPretrained/FAIR/X-152-32x8d-IN5k"
 5 |   RESNETS:
 6 |     STRIDE_IN_1X1: False  # this is a C2 model
 7 |     NUM_GROUPS: 32
 8 |     WIDTH_PER_GROUP: 8
 9 |     DEPTH: 152
10 |     DEFORM_ON_PER_STAGE: [False, True, True, True]
11 |   ROI_HEADS:
12 |     NAME: "CascadeROIHeads"
13 |   ROI_BOX_HEAD:
14 |     NAME: "FastRCNNConvFCHead"
15 |     NUM_CONV: 4
16 |     NUM_FC: 1
17 |     NORM: "GN"
18 |     CLS_AGNOSTIC_BBOX_REG: True
19 |   ROI_MASK_HEAD:
20 |     NUM_CONV: 8
21 |     NORM: "GN"
22 |   RPN:
23 |     POST_NMS_TOPK_TRAIN: 2000
24 | SOLVER:
25 |   IMS_PER_BATCH: 128
26 |   STEPS: (35000, 45000)
27 |   MAX_ITER: 50000
28 |   BASE_LR: 0.16
29 | INPUT:
30 |   MIN_SIZE_TRAIN: (640, 864)
31 |   MIN_SIZE_TRAIN_SAMPLING: "range"
32 |   MAX_SIZE_TRAIN: 1440
33 |   CROP:
34 |     ENABLED: True
35 | TEST:
36 |   EVAL_PERIOD: 2500
37 | 


--------------------------------------------------------------------------------
/configs/Misc/mask_rcnn_R_50_FPN_1x_cls_agnostic.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   MASK_ON: True
 5 |   RESNETS:
 6 |     DEPTH: 50
 7 |   ROI_BOX_HEAD:
 8 |     CLS_AGNOSTIC_BBOX_REG: True
 9 |   ROI_MASK_HEAD:
10 |     CLS_AGNOSTIC_MASK: True
11 | 


--------------------------------------------------------------------------------
/configs/Misc/mask_rcnn_R_50_FPN_1x_dconv_c3-c5.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../Base-RCNN-FPN.yaml"
2 | MODEL:
3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
4 |   MASK_ON: True
5 |   RESNETS:
6 |     DEPTH: 50
7 |     DEFORM_ON_PER_STAGE: [False, True, True, True] # on Res3,Res4,Res5
8 |     DEFORM_MODULATED: False
9 | 


--------------------------------------------------------------------------------
/configs/Misc/mask_rcnn_R_50_FPN_3x_dconv_c3-c5.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   MASK_ON: True
 5 |   RESNETS:
 6 |     DEPTH: 50
 7 |     DEFORM_ON_PER_STAGE: [False, True, True, True] # on Res3,Res4,Res5
 8 |     DEFORM_MODULATED: False
 9 | SOLVER:
10 |   STEPS: (210000, 250000)
11 |   MAX_ITER: 270000
12 | 


--------------------------------------------------------------------------------
/configs/Misc/mask_rcnn_R_50_FPN_3x_gn.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "catalog://ImageNetPretrained/FAIR/R-50-GN"
 4 |   MASK_ON: True
 5 |   RESNETS:
 6 |     DEPTH: 50
 7 |     NORM: "GN"
 8 |     STRIDE_IN_1X1: False
 9 |   FPN:
10 |     NORM: "GN"
11 |   ROI_BOX_HEAD:
12 |     NAME: "FastRCNNConvFCHead"
13 |     NUM_CONV: 4
14 |     NUM_FC: 1
15 |     NORM: "GN"
16 |   ROI_MASK_HEAD:
17 |     NORM: "GN"
18 | SOLVER:
19 |   # 3x schedule
20 |   STEPS: (210000, 250000)
21 |   MAX_ITER: 270000
22 | 


--------------------------------------------------------------------------------
/configs/Misc/mask_rcnn_R_50_FPN_3x_syncbn.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   MASK_ON: True
 5 |   RESNETS:
 6 |     DEPTH: 50
 7 |     NORM: "SyncBN"
 8 |     STRIDE_IN_1X1: True
 9 |   FPN:
10 |     NORM: "SyncBN"
11 |   ROI_BOX_HEAD:
12 |     NAME: "FastRCNNConvFCHead"
13 |     NUM_CONV: 4
14 |     NUM_FC: 1
15 |     NORM: "SyncBN"
16 |   ROI_MASK_HEAD:
17 |     NORM: "SyncBN"
18 | SOLVER:
19 |   # 3x schedule
20 |   STEPS: (210000, 250000)
21 |   MAX_ITER: 270000
22 | TEST:
23 |   PRECISE_BN:
24 |     ENABLED: True
25 | 


--------------------------------------------------------------------------------
/configs/Misc/panoptic_fpn_R_101_dconv_cascade_gn_3x.yaml:
--------------------------------------------------------------------------------
 1 | # A large PanopticFPN for demo purposes.
 2 | # Use GN on backbone to support semantic seg.
 3 | # Use Cascade + Deform Conv to improve localization.
 4 | _BASE_: "../COCO-PanopticSegmentation/Base-Panoptic-FPN.yaml"
 5 | MODEL:
 6 |   WEIGHTS: "catalog://ImageNetPretrained/FAIR/R-101-GN"
 7 |   RESNETS:
 8 |     DEPTH: 101
 9 |     NORM: "GN"
10 |     DEFORM_ON_PER_STAGE: [False, True, True, True]
11 |     STRIDE_IN_1X1: False
12 |   FPN:
13 |     NORM: "GN"
14 |   ROI_HEADS:
15 |     NAME: CascadeROIHeads
16 |   ROI_BOX_HEAD:
17 |     CLS_AGNOSTIC_BBOX_REG: True
18 |   ROI_MASK_HEAD:
19 |     NORM: "GN"
20 |   RPN:
21 |     POST_NMS_TOPK_TRAIN: 2000
22 | SOLVER:
23 |   STEPS: (105000, 125000)
24 |   MAX_ITER: 135000
25 |   IMS_PER_BATCH: 32
26 |   BASE_LR: 0.04
27 | 


--------------------------------------------------------------------------------
/configs/Misc/scratch_mask_rcnn_R_50_FPN_3x_gn.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "mask_rcnn_R_50_FPN_3x_gn.yaml"
 2 | MODEL:
 3 |   # Train from random initialization.
 4 |   WEIGHTS: ""
 5 |   # It makes sense to divide by STD when training from scratch
 6 |   # But it seems to make no difference on the results and C2's models didn't do this.
 7 |   # So we keep things consistent with C2.
 8 |   # PIXEL_STD: [57.375, 57.12, 58.395]
 9 |   MASK_ON: True
10 |   BACKBONE:
11 |     FREEZE_AT: 0
12 | # NOTE: Please refer to Rethinking ImageNet Pre-training https://arxiv.org/abs/1811.08883
13 | # to learn what you need for training from scratch.
14 | 


--------------------------------------------------------------------------------
/configs/Misc/scratch_mask_rcnn_R_50_FPN_9x_gn.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "mask_rcnn_R_50_FPN_3x_gn.yaml"
 2 | MODEL:
 3 |   PIXEL_STD: [57.375, 57.12, 58.395]
 4 |   WEIGHTS: ""
 5 |   MASK_ON: True
 6 |   RESNETS:
 7 |     STRIDE_IN_1X1: False
 8 |   BACKBONE:
 9 |     FREEZE_AT: 0
10 | SOLVER:
11 |   # 9x schedule
12 |   IMS_PER_BATCH: 64  # 4x the standard
13 |   STEPS: (187500, 197500)  # last 60/4==15k and last 20/4==5k
14 |   MAX_ITER: 202500   # 90k * 9 / 4
15 |   BASE_LR: 0.08
16 | TEST:
17 |   EVAL_PERIOD: 2500
18 | # NOTE: Please refer to Rethinking ImageNet Pre-training https://arxiv.org/abs/1811.08883
19 | # to learn what you need for training from scratch.
20 | 


--------------------------------------------------------------------------------
/configs/Misc/scratch_mask_rcnn_R_50_FPN_9x_syncbn.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "mask_rcnn_R_50_FPN_3x_syncbn.yaml"
 2 | MODEL:
 3 |   PIXEL_STD: [57.375, 57.12, 58.395]
 4 |   WEIGHTS: ""
 5 |   MASK_ON: True
 6 |   RESNETS:
 7 |     STRIDE_IN_1X1: False
 8 |   BACKBONE:
 9 |     FREEZE_AT: 0
10 | SOLVER:
11 |   # 9x schedule
12 |   IMS_PER_BATCH: 64  # 4x the standard
13 |   STEPS: (187500, 197500)  # last 60/4==15k and last 20/4==5k
14 |   MAX_ITER: 202500   # 90k * 9 / 4
15 |   BASE_LR: 0.08
16 | TEST:
17 |   EVAL_PERIOD: 2500
18 | # NOTE: Please refer to Rethinking ImageNet Pre-training https://arxiv.org/abs/1811.08883
19 | # to learn what you need for training from scratch.
20 | 


--------------------------------------------------------------------------------
/configs/Misc/semantic_R_50_FPN_1x.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   META_ARCHITECTURE: "SemanticSegmentor"
 4 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 5 |   RESNETS:
 6 |     DEPTH: 50
 7 | DATASETS:
 8 |   TRAIN: ("coco_2017_train_panoptic_stuffonly",)
 9 |   TEST: ("coco_2017_val_panoptic_stuffonly",)
10 | INPUT:
11 |   MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
12 | 


--------------------------------------------------------------------------------
/configs/PascalVOC-Detection/faster_rcnn_R_50_C4.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-C4.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   MASK_ON: False
 5 |   RESNETS:
 6 |     DEPTH: 50
 7 |   ROI_HEADS:
 8 |     NUM_CLASSES: 20
 9 | INPUT:
10 |   MIN_SIZE_TRAIN: (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800)
11 |   MIN_SIZE_TEST: 800
12 | DATASETS:
13 |   TRAIN: ('voc_2007_trainval', 'voc_2012_trainval')
14 |   TEST: ('voc_2007_test',)
15 | SOLVER:
16 |   STEPS: (12000, 16000)
17 |   MAX_ITER: 18000  # 17.4 epochs
18 |   WARMUP_ITERS: 100
19 | 


--------------------------------------------------------------------------------
/configs/PascalVOC-Detection/faster_rcnn_R_50_FPN.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   MASK_ON: False
 5 |   RESNETS:
 6 |     DEPTH: 50
 7 |   ROI_HEADS:
 8 |     NUM_CLASSES: 20
 9 | INPUT:
10 |   MIN_SIZE_TRAIN: (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800)
11 |   MIN_SIZE_TEST: 800
12 | DATASETS:
13 |   TRAIN: ('voc_2007_trainval', 'voc_2012_trainval')
14 |   TEST: ('voc_2007_test',)
15 | SOLVER:
16 |   STEPS: (12000, 16000)
17 |   MAX_ITER: 18000  # 17.4 epochs
18 |   WARMUP_ITERS: 100
19 | 


--------------------------------------------------------------------------------
/configs/quick_schedules/cascade_mask_rcnn_R_50_FPN_inference_acc_test.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../Misc/cascade_mask_rcnn_R_50_FPN_3x.yaml"
2 | MODEL:
3 |   WEIGHTS: "detectron2://Misc/cascade_mask_rcnn_R_50_FPN_3x/144998488/model_final_480dd8.pkl"
4 | DATASETS:
5 |   TEST: ("coco_2017_val_100",)
6 | TEST:
7 |   EXPECTED_RESULTS: [["bbox", "AP", 50.18, 0.02], ["segm", "AP",  43.87, 0.02]]
8 | 


--------------------------------------------------------------------------------
/configs/quick_schedules/cascade_mask_rcnn_R_50_FPN_instant_test.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Misc/cascade_mask_rcnn_R_50_FPN_3x.yaml"
 2 | DATASETS:
 3 |   TRAIN: ("coco_2017_val_100",)
 4 |   TEST: ("coco_2017_val_100",)
 5 | SOLVER:
 6 |   BASE_LR: 0.005
 7 |   STEPS: (30,)
 8 |   MAX_ITER: 40
 9 |   IMS_PER_BATCH: 4
10 | DATALOADER:
11 |   NUM_WORKERS: 2
12 | 


--------------------------------------------------------------------------------
/configs/quick_schedules/fast_rcnn_R_50_FPN_inference_acc_test.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../COCO-Detection/fast_rcnn_R_50_FPN_1x.yaml"
2 | MODEL:
3 |   WEIGHTS: "detectron2://COCO-Detection/fast_rcnn_R_50_FPN_1x/137635226/model_final_e5f7ce.pkl"
4 | DATASETS:
5 |   TEST: ("coco_2017_val_100",)
6 | TEST:
7 |   EXPECTED_RESULTS: [["bbox", "AP", 45.70, 0.02]]
8 | 


--------------------------------------------------------------------------------
/configs/quick_schedules/fast_rcnn_R_50_FPN_instant_test.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../COCO-Detection/fast_rcnn_R_50_FPN_1x.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 | DATASETS:
 5 |   TRAIN: ("coco_2017_val_100",)
 6 |   PROPOSAL_FILES_TRAIN: ("detectron2://COCO-Detection/rpn_R_50_FPN_1x/137258492/coco_2017_val_box_proposals_ee0dad.pkl", )
 7 |   TEST: ("coco_2017_val_100",)
 8 |   PROPOSAL_FILES_TEST: ("detectron2://COCO-Detection/rpn_R_50_FPN_1x/137258492/coco_2017_val_box_proposals_ee0dad.pkl", )
 9 | SOLVER:
10 |   BASE_LR: 0.005
11 |   STEPS: (30,)
12 |   MAX_ITER: 40
13 |   IMS_PER_BATCH: 4
14 | DATALOADER:
15 |   NUM_WORKERS: 2
16 | 


--------------------------------------------------------------------------------
/configs/quick_schedules/keypoint_rcnn_R_50_FPN_inference_acc_test.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x.yaml"
2 | MODEL:
3 |   WEIGHTS: "detectron2://COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x/137849621/model_final_a6e10b.pkl"
4 | DATASETS:
5 |   TEST: ("keypoints_coco_2017_val_100",)
6 | TEST:
7 |   EXPECTED_RESULTS: [["bbox", "AP", 52.47, 0.02], ["keypoints", "AP", 67.36, 0.02]]
8 | 


--------------------------------------------------------------------------------
/configs/quick_schedules/keypoint_rcnn_R_50_FPN_instant_test.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   KEYPOINT_ON: True
 5 |   ROI_HEADS:
 6 |     NUM_CLASSES: 1
 7 | DATASETS:
 8 |   TRAIN: ("keypoints_coco_2017_val_100",)
 9 |   TEST: ("keypoints_coco_2017_val_100",)
10 | SOLVER:
11 |   BASE_LR: 0.005
12 |   STEPS: (30,)
13 |   MAX_ITER: 40
14 |   IMS_PER_BATCH: 4
15 | DATALOADER:
16 |   NUM_WORKERS: 2
17 | 


--------------------------------------------------------------------------------
/configs/quick_schedules/keypoint_rcnn_R_50_FPN_normalized_training_acc_test.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   KEYPOINT_ON: True
 5 |   RESNETS:
 6 |     DEPTH: 50
 7 |   ROI_HEADS:
 8 |     BATCH_SIZE_PER_IMAGE: 256
 9 |     NUM_CLASSES: 1
10 |   ROI_KEYPOINT_HEAD:
11 |     POOLER_RESOLUTION: 14
12 |     POOLER_SAMPLING_RATIO: 2
13 |     NORMALIZE_LOSS_BY_VISIBLE_KEYPOINTS: False
14 |     LOSS_WEIGHT: 4.0
15 |   ROI_BOX_HEAD:
16 |     SMOOTH_L1_BETA: 1.0  # Keypoint AP degrades when using plain L1 loss
17 |   RPN:
18 |     SMOOTH_L1_BETA: 0.2  # Keypoint AP degrades when using plain L1 loss
19 | DATASETS:
20 |   TRAIN: ("keypoints_coco_2017_val",)
21 |   TEST: ("keypoints_coco_2017_val",)
22 | INPUT:
23 |   MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
24 | SOLVER:
25 |   WARMUP_FACTOR: 0.33333333
26 |   WARMUP_ITERS: 100
27 |   STEPS: (5500, 5800)
28 |   MAX_ITER: 6000
29 | TEST:
30 |   EXPECTED_RESULTS: [["bbox", "AP", 55.35, 1.0], ["keypoints", "AP", 76.91, 1.0]]
31 | 


--------------------------------------------------------------------------------
/configs/quick_schedules/keypoint_rcnn_R_50_FPN_training_acc_test.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   KEYPOINT_ON: True
 5 |   RESNETS:
 6 |     DEPTH: 50
 7 |   ROI_HEADS:
 8 |     BATCH_SIZE_PER_IMAGE: 256
 9 |     NUM_CLASSES: 1
10 |   ROI_KEYPOINT_HEAD:
11 |     POOLER_RESOLUTION: 14
12 |     POOLER_SAMPLING_RATIO: 2
13 |   ROI_BOX_HEAD:
14 |     SMOOTH_L1_BETA: 1.0  # Keypoint AP degrades when using plain L1 loss
15 |   RPN:
16 |     SMOOTH_L1_BETA: 0.2  # Keypoint AP degrades when using plain L1 loss
17 | DATASETS:
18 |   TRAIN: ("keypoints_coco_2017_val",)
19 |   TEST: ("keypoints_coco_2017_val",)
20 | INPUT:
21 |   MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
22 | SOLVER:
23 |   WARMUP_FACTOR: 0.33333333
24 |   WARMUP_ITERS: 100
25 |   STEPS: (5500, 5800)
26 |   MAX_ITER: 6000
27 | TEST:
28 |   EXPECTED_RESULTS: [["bbox", "AP", 53.5, 1.0], ["keypoints", "AP", 72.4, 1.0]]
29 | 


--------------------------------------------------------------------------------
/configs/quick_schedules/mask_rcnn_R_50_C4_GCV_instant_test.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-C4.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   MASK_ON: True
 5 | DATASETS:
 6 |   TRAIN: ("coco_2017_val_100",)
 7 |   TEST: ("coco_2017_val_100",)
 8 | SOLVER:
 9 |   BASE_LR: 0.001
10 |   STEPS: (30,)
11 |   MAX_ITER: 40
12 |   IMS_PER_BATCH: 4
13 |   CLIP_GRADIENTS:
14 |     ENABLED: True
15 |     CLIP_TYPE: "value"
16 |     CLIP_VALUE: 1.0
17 | DATALOADER:
18 |   NUM_WORKERS: 2
19 | 


--------------------------------------------------------------------------------
/configs/quick_schedules/mask_rcnn_R_50_C4_inference_acc_test.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../COCO-InstanceSegmentation/mask_rcnn_R_50_C4_3x.yaml"
2 | MODEL:
3 |   WEIGHTS: "detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_C4_3x/137849525/model_final_4ce675.pkl"
4 | DATASETS:
5 |   TEST: ("coco_2017_val_100",)
6 | TEST:
7 |   EXPECTED_RESULTS: [["bbox", "AP", 47.37, 0.02], ["segm", "AP", 40.99, 0.02]]
8 | 


--------------------------------------------------------------------------------
/configs/quick_schedules/mask_rcnn_R_50_C4_instant_test.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-C4.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   MASK_ON: True
 5 | DATASETS:
 6 |   TRAIN: ("coco_2017_val_100",)
 7 |   TEST: ("coco_2017_val_100",)
 8 | SOLVER:
 9 |   BASE_LR: 0.001
10 |   STEPS: (30,)
11 |   MAX_ITER: 40
12 |   IMS_PER_BATCH: 4
13 | DATALOADER:
14 |   NUM_WORKERS: 2
15 | 


--------------------------------------------------------------------------------
/configs/quick_schedules/mask_rcnn_R_50_C4_training_acc_test.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-C4.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   ROI_HEADS:
 5 |     BATCH_SIZE_PER_IMAGE: 256
 6 |   MASK_ON: True
 7 | DATASETS:
 8 |   TRAIN: ("coco_2017_val",)
 9 |   TEST: ("coco_2017_val",)
10 | INPUT:
11 |   MIN_SIZE_TRAIN: (600,)
12 |   MAX_SIZE_TRAIN: 1000
13 |   MIN_SIZE_TEST: 800
14 |   MAX_SIZE_TEST: 1000
15 | SOLVER:
16 |   IMS_PER_BATCH: 8  # base uses 16
17 |   WARMUP_FACTOR: 0.33333
18 |   WARMUP_ITERS: 100
19 |   STEPS: (11000, 11600)
20 |   MAX_ITER: 12000
21 | TEST:
22 |   EXPECTED_RESULTS: [["bbox", "AP", 41.88, 0.7], ["segm", "AP", 33.79, 0.5]]
23 | 


--------------------------------------------------------------------------------
/configs/quick_schedules/mask_rcnn_R_50_DC5_inference_acc_test.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_3x.yaml"
2 | MODEL:
3 |   WEIGHTS: "detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_DC5_3x/137849551/model_final_84107b.pkl"
4 | DATASETS:
5 |   TEST: ("coco_2017_val_100",)
6 | TEST:
7 |   EXPECTED_RESULTS: [["bbox", "AP", 47.44, 0.02], ["segm", "AP", 42.94, 0.02]]
8 | 


--------------------------------------------------------------------------------
/configs/quick_schedules/mask_rcnn_R_50_FPN_inference_acc_test.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl"
 4 | DATASETS:
 5 |   TEST: ("coco_2017_val_100",)
 6 | TEST:
 7 |   EXPECTED_RESULTS: [["bbox", "AP", 47.34, 0.02], ["segm", "AP",  42.67, 0.02], ["bbox_TTA", "AP", 49.11, 0.02], ["segm_TTA", "AP", 45.04, 0.02]]
 8 |   AUG:
 9 |     ENABLED: True
10 |     MIN_SIZES: (700, 800)  # to save some time
11 | 


--------------------------------------------------------------------------------
/configs/quick_schedules/mask_rcnn_R_50_FPN_instant_test.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   MASK_ON: True
 5 | DATASETS:
 6 |   TRAIN: ("coco_2017_val_100",)
 7 |   TEST: ("coco_2017_val_100",)
 8 | SOLVER:
 9 |   BASE_LR: 0.005
10 |   STEPS: (30,)
11 |   MAX_ITER: 40
12 |   IMS_PER_BATCH: 4
13 | DATALOADER:
14 |   NUM_WORKERS: 2
15 | 


--------------------------------------------------------------------------------
/configs/quick_schedules/mask_rcnn_R_50_FPN_pred_boxes_training_acc_test.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "./mask_rcnn_R_50_FPN_training_acc_test.yaml"
2 | MODEL:
3 |   ROI_BOX_HEAD:
4 |     TRAIN_ON_PRED_BOXES: True
5 | TEST:
6 |   EXPECTED_RESULTS: [["bbox", "AP", 42.6, 1.0], ["segm", "AP", 35.8, 0.8]]
7 | 


--------------------------------------------------------------------------------
/configs/quick_schedules/mask_rcnn_R_50_FPN_training_acc_test.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 |   ROI_HEADS:
 5 |     BATCH_SIZE_PER_IMAGE: 256
 6 |   MASK_ON: True
 7 | DATASETS:
 8 |   TRAIN: ("coco_2017_val",)
 9 |   TEST: ("coco_2017_val",)
10 | INPUT:
11 |   MIN_SIZE_TRAIN: (600,)
12 |   MAX_SIZE_TRAIN: 1000
13 |   MIN_SIZE_TEST: 800
14 |   MAX_SIZE_TEST: 1000
15 | SOLVER:
16 |   WARMUP_FACTOR: 0.3333333
17 |   WARMUP_ITERS: 100
18 |   STEPS: (5500, 5800)
19 |   MAX_ITER: 6000
20 | TEST:
21 |   EXPECTED_RESULTS: [["bbox", "AP", 42.5, 1.0], ["segm", "AP", 35.8, 0.8]]
22 | 


--------------------------------------------------------------------------------
/configs/quick_schedules/panoptic_fpn_R_50_inference_acc_test.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../COCO-PanopticSegmentation/panoptic_fpn_R_50_3x.yaml"
2 | MODEL:
3 |   WEIGHTS: "detectron2://COCO-PanopticSegmentation/panoptic_fpn_R_50_3x/139514569/model_final_c10459.pkl"
4 | DATASETS:
5 |   TEST: ("coco_2017_val_100_panoptic_separated",)
6 | TEST:
7 |   EXPECTED_RESULTS: [["bbox", "AP", 46.47, 0.02], ["segm", "AP", 43.39, 0.02], ["sem_seg", "mIoU", 42.55, 0.02], ["panoptic_seg", "PQ", 38.99, 0.02]]
8 | 


--------------------------------------------------------------------------------
/configs/quick_schedules/panoptic_fpn_R_50_instant_test.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   META_ARCHITECTURE: "PanopticFPN"
 4 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 5 |   MASK_ON: True
 6 |   RESNETS:
 7 |     DEPTH: 50
 8 |   SEM_SEG_HEAD:
 9 |     LOSS_WEIGHT: 0.5
10 | DATASETS:
11 |   TRAIN: ("coco_2017_val_100_panoptic_separated",)
12 |   TEST: ("coco_2017_val_100_panoptic_separated",)
13 | SOLVER:
14 |   BASE_LR: 0.005
15 |   STEPS: (30,)
16 |   MAX_ITER: 40
17 |   IMS_PER_BATCH: 4
18 | DATALOADER:
19 |   NUM_WORKERS: 1
20 | 


--------------------------------------------------------------------------------
/configs/quick_schedules/panoptic_fpn_R_50_training_acc_test.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   META_ARCHITECTURE: "PanopticFPN"
 4 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 5 |   MASK_ON: True
 6 |   RESNETS:
 7 |     DEPTH: 50
 8 |   SEM_SEG_HEAD:
 9 |     LOSS_WEIGHT: 0.5
10 | DATASETS:
11 |   TRAIN: ("coco_2017_val_panoptic_separated",)
12 |   TEST: ("coco_2017_val_panoptic_separated",)
13 | SOLVER:
14 |   BASE_LR: 0.01
15 |   WARMUP_FACTOR: 0.001
16 |   WARMUP_ITERS: 500
17 |   STEPS: (5500,)
18 |   MAX_ITER: 7000
19 | TEST:
20 |   EXPECTED_RESULTS: [["bbox", "AP", 46.70, 1.1], ["segm", "AP", 39.0, 0.7], ["sem_seg", "mIoU", 64.73, 1.3], ["panoptic_seg", "PQ", 48.13, 0.8]]
21 | 


--------------------------------------------------------------------------------
/configs/quick_schedules/retinanet_R_50_FPN_inference_acc_test.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../COCO-Detection/retinanet_R_50_FPN_3x.yaml"
2 | MODEL:
3 |   WEIGHTS: "detectron2://COCO-Detection/retinanet_R_50_FPN_3x/190397829/model_final_5bd44e.pkl"
4 | DATASETS:
5 |   TEST: ("coco_2017_val_100",)
6 | TEST:
7 |   EXPECTED_RESULTS: [["bbox", "AP", 44.45, 0.02]]
8 | 


--------------------------------------------------------------------------------
/configs/quick_schedules/retinanet_R_50_FPN_instant_test.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../COCO-Detection/retinanet_R_50_FPN_1x.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 | DATASETS:
 5 |   TRAIN: ("coco_2017_val_100",)
 6 |   TEST: ("coco_2017_val_100",)
 7 | SOLVER:
 8 |   BASE_LR: 0.005
 9 |   STEPS: (30,)
10 |   MAX_ITER: 40
11 |   IMS_PER_BATCH: 4
12 | DATALOADER:
13 |   NUM_WORKERS: 2
14 | 


--------------------------------------------------------------------------------
/configs/quick_schedules/rpn_R_50_FPN_inference_acc_test.yaml:
--------------------------------------------------------------------------------
1 | _BASE_: "../COCO-Detection/rpn_R_50_FPN_1x.yaml"
2 | MODEL:
3 |   WEIGHTS: "detectron2://COCO-Detection/rpn_R_50_FPN_1x/137258492/model_final_02ce48.pkl"
4 | DATASETS:
5 |   TEST: ("coco_2017_val_100",)
6 | TEST:
7 |   EXPECTED_RESULTS: [["box_proposals", "AR@1000", 58.16, 0.02]]
8 | 


--------------------------------------------------------------------------------
/configs/quick_schedules/rpn_R_50_FPN_instant_test.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../COCO-Detection/rpn_R_50_FPN_1x.yaml"
 2 | MODEL:
 3 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 4 | DATASETS:
 5 |   TRAIN: ("coco_2017_val_100",)
 6 |   TEST: ("coco_2017_val_100",)
 7 | SOLVER:
 8 |   STEPS: (30,)
 9 |   MAX_ITER: 40
10 |   BASE_LR: 0.005
11 |   IMS_PER_BATCH: 4
12 | DATALOADER:
13 |   NUM_WORKERS: 2
14 | 


--------------------------------------------------------------------------------
/configs/quick_schedules/semantic_R_50_FPN_inference_acc_test.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   META_ARCHITECTURE: "SemanticSegmentor"
 4 |   WEIGHTS: "detectron2://semantic_R_50_FPN_1x/111802073/model_final_c18079783c55a94968edc28b7101c5f0.pkl"
 5 |   RESNETS:
 6 |     DEPTH: 50
 7 | DATASETS:
 8 |   TEST: ("coco_2017_val_100_panoptic_stuffonly",)
 9 | TEST:
10 |   EXPECTED_RESULTS: [["sem_seg", "mIoU", 39.53, 0.02], ["sem_seg", "mACC", 51.50, 0.02]]
11 | 


--------------------------------------------------------------------------------
/configs/quick_schedules/semantic_R_50_FPN_instant_test.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   META_ARCHITECTURE: "SemanticSegmentor"
 4 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 5 |   RESNETS:
 6 |     DEPTH: 50
 7 | DATASETS:
 8 |   TRAIN: ("coco_2017_val_100_panoptic_stuffonly",)
 9 |   TEST: ("coco_2017_val_100_panoptic_stuffonly",)
10 | INPUT:
11 |   MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
12 | SOLVER:
13 |   BASE_LR: 0.005
14 |   STEPS: (30,)
15 |   MAX_ITER: 40
16 |   IMS_PER_BATCH: 4
17 | DATALOADER:
18 |   NUM_WORKERS: 2
19 | 


--------------------------------------------------------------------------------
/configs/quick_schedules/semantic_R_50_FPN_training_acc_test.yaml:
--------------------------------------------------------------------------------
 1 | _BASE_: "../Base-RCNN-FPN.yaml"
 2 | MODEL:
 3 |   META_ARCHITECTURE: "SemanticSegmentor"
 4 |   WEIGHTS: "detectron2://ImageNetPretrained/MSRA/R-50.pkl"
 5 |   RESNETS:
 6 |     DEPTH: 50
 7 | DATASETS:
 8 |   TRAIN: ("coco_2017_val_panoptic_stuffonly",)
 9 |   TEST: ("coco_2017_val_panoptic_stuffonly",)
10 | SOLVER:
11 |   BASE_LR: 0.01
12 |   WARMUP_FACTOR: 0.001
13 |   WARMUP_ITERS: 300
14 |   STEPS: (5500,)
15 |   MAX_ITER: 7000
16 | TEST:
17 |   EXPECTED_RESULTS: [["sem_seg", "mIoU", 76.51, 1.0], ["sem_seg", "mACC", 83.25, 1.0]]
18 | INPUT:
19 |   # no scale augmentation
20 |   MIN_SIZE_TRAIN: (800, )
21 | 


--------------------------------------------------------------------------------
/output/result.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ttanzhiqiang/Detectron2_Project/91bf05ab3b0be0a8e20a244b2729f576160d6953/output/result.jpg


--------------------------------------------------------------------------------
/output/weixin.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ttanzhiqiang/Detectron2_Project/91bf05ab3b0be0a8e20a244b2729f576160d6953/output/weixin.jpg


--------------------------------------------------------------------------------