├── .gitignore ├── .idea ├── .gitignore ├── .name ├── Swin-Transformer-Object-Detection-PaddlePaddle.iml ├── deployment.xml ├── inspectionProfiles │ ├── Project_Default.xml │ └── profiles_settings.xml ├── misc.xml └── modules.xml ├── LICENSE ├── README.md ├── configs ├── cascade_rcnn │ ├── README.md │ ├── _base_ │ │ ├── cascade_fpn_reader.yml │ │ ├── cascade_mask_fpn_reader.yml │ │ ├── cascade_mask_rcnn_r50_fpn.yml │ │ ├── cascade_rcnn_r50_fpn.yml │ │ └── optimizer_1x.yml │ ├── cascade_mask_rcnn_r50_fpn_1x_coco.yml │ ├── cascade_mask_rcnn_r50_vd_fpn_ssld_1x_coco.yml │ ├── cascade_mask_rcnn_r50_vd_fpn_ssld_2x_coco.yml │ ├── cascade_rcnn_r50_fpn_1x_coco.yml │ ├── cascade_rcnn_r50_vd_fpn_ssld_1x_coco.yml │ └── cascade_rcnn_r50_vd_fpn_ssld_2x_coco.yml ├── datasets │ ├── coco_detection.yml │ ├── coco_instance.yml │ ├── dota.yml │ ├── roadsign_voc.yml │ ├── voc.yml │ └── wider_face.yml ├── dcn │ ├── README.md │ ├── cascade_rcnn_dcn_r50_fpn_1x_coco.yml │ ├── cascade_rcnn_dcn_x101_vd_64x4d_fpn_1x_coco.yml │ ├── faster_rcnn_dcn_r101_vd_fpn_1x_coco.yml │ ├── faster_rcnn_dcn_r50_fpn_1x_coco.yml │ ├── faster_rcnn_dcn_r50_vd_fpn_1x_coco.yml │ ├── faster_rcnn_dcn_r50_vd_fpn_2x_coco.yml │ ├── faster_rcnn_dcn_x101_vd_64x4d_fpn_1x_coco.yml │ ├── mask_rcnn_dcn_r101_vd_fpn_1x_coco.yml │ ├── mask_rcnn_dcn_r50_fpn_1x_coco.yml │ ├── mask_rcnn_dcn_r50_vd_fpn_2x_coco.yml │ └── mask_rcnn_dcn_x101_vd_64x4d_fpn_1x_coco.yml ├── dota │ ├── README.md │ ├── _base_ │ │ ├── s2anet.yml │ │ ├── s2anet_optimizer_1x.yml │ │ └── s2anet_reader.yml │ ├── s2anet_1x_dota.yml │ └── s2anet_conv_1x_dota.yml ├── face_detection │ ├── README.md │ ├── _base_ │ │ ├── blazeface.yml │ │ ├── face_reader.yml │ │ └── optimizer_1000e.yml │ └── blazeface_1000e.yml ├── faster_rcnn │ ├── README.md │ ├── _base_ │ │ ├── faster_fpn_reader.yml │ │ ├── faster_rcnn_r50.yml │ │ ├── faster_rcnn_r50_fpn.yml │ │ ├── faster_reader.yml │ │ └── optimizer_1x.yml │ ├── faster_rcnn_r101_1x_coco.yml │ ├── faster_rcnn_r101_fpn_1x_coco.yml │ ├── faster_rcnn_r101_fpn_2x_coco.yml │ ├── faster_rcnn_r101_vd_fpn_1x_coco.yml │ ├── faster_rcnn_r101_vd_fpn_2x_coco.yml │ ├── faster_rcnn_r34_fpn_1x_coco.yml │ ├── faster_rcnn_r34_vd_fpn_1x_coco.yml │ ├── faster_rcnn_r50_1x_coco.yml │ ├── faster_rcnn_r50_fpn_1x_coco.yml │ ├── faster_rcnn_r50_fpn_2x_coco.yml │ ├── faster_rcnn_r50_vd_1x_coco.yml │ ├── faster_rcnn_r50_vd_fpn_1x_coco.yml │ ├── faster_rcnn_r50_vd_fpn_2x_coco.yml │ ├── faster_rcnn_r50_vd_fpn_ssld_1x_coco.yml │ ├── faster_rcnn_r50_vd_fpn_ssld_2x_coco.yml │ ├── faster_rcnn_x101_vd_64x4d_fpn_1x_coco.yml │ └── faster_rcnn_x101_vd_64x4d_fpn_2x_coco.yml ├── fcos │ ├── README.md │ ├── _base_ │ │ ├── fcos_r50_fpn.yml │ │ ├── fcos_reader.yml │ │ └── optimizer_1x.yml │ ├── fcos_dcn_r50_fpn_1x_coco.yml │ ├── fcos_r50_fpn_1x_coco.yml │ └── fcos_r50_fpn_multiscale_2x_coco.yml ├── gn │ ├── README.md │ ├── cascade_mask_rcnn_r50_fpn_gn_2x_coco.yml │ ├── cascade_rcnn_r50_fpn_gn_2x_coco.yml │ ├── faster_rcnn_r50_fpn_gn_2x_coco.yml │ └── mask_rcnn_r50_fpn_gn_2x_coco.yml ├── hrnet │ ├── README.md │ ├── _base_ │ │ └── faster_rcnn_hrnetv2p_w18.yml │ ├── faster_rcnn_hrnetv2p_w18_1x_coco.yml │ └── faster_rcnn_hrnetv2p_w18_2x_coco.yml ├── mask_rcnn │ ├── README.md │ ├── _base_ │ │ ├── mask_fpn_reader.yml │ │ ├── mask_rcnn_r50.yml │ │ ├── mask_rcnn_r50_fpn.yml │ │ ├── mask_reader.yml │ │ └── optimizer_1x.yml │ ├── mask_rcnn_r101_fpn_1x_coco.yml │ ├── mask_rcnn_r101_vd_fpn_1x_coco.yml │ ├── mask_rcnn_r50_1x_coco.yml │ ├── mask_rcnn_r50_2x_coco.yml │ ├── mask_rcnn_r50_fpn_1x_coco.yml │ ├── mask_rcnn_r50_fpn_2x_coco.yml │ ├── mask_rcnn_r50_vd_fpn_1x_coco.yml │ ├── mask_rcnn_r50_vd_fpn_2x_coco.yml │ ├── mask_rcnn_r50_vd_fpn_ssld_1x_coco.yml │ ├── mask_rcnn_r50_vd_fpn_ssld_2x_coco.yml │ ├── mask_rcnn_x101_vd_64x4d_fpn_1x_coco.yml │ └── mask_rcnn_x101_vd_64x4d_fpn_2x_coco.yml ├── pedestrian │ ├── README.md │ ├── README_cn.md │ ├── demo │ │ ├── 001.png │ │ ├── 002.png │ │ ├── 003.png │ │ └── 004.png │ ├── pedestrian.json │ └── pedestrian_yolov3_darknet.yml ├── ppyolo │ ├── README.md │ ├── README_cn.md │ ├── _base_ │ │ ├── optimizer_1x.yml │ │ ├── optimizer_2x.yml │ │ ├── optimizer_365e.yml │ │ ├── optimizer_650e.yml │ │ ├── ppyolo_mbv3_large.yml │ │ ├── ppyolo_mbv3_small.yml │ │ ├── ppyolo_r18vd.yml │ │ ├── ppyolo_r50vd_dcn.yml │ │ ├── ppyolo_reader.yml │ │ ├── ppyolo_tiny.yml │ │ ├── ppyolo_tiny_reader.yml │ │ ├── ppyolov2_r50vd_dcn.yml │ │ └── ppyolov2_reader.yml │ ├── ppyolo_mbv3_large_coco.yml │ ├── ppyolo_mbv3_small_coco.yml │ ├── ppyolo_r18vd_coco.yml │ ├── ppyolo_r50vd_dcn_1x_coco.yml │ ├── ppyolo_r50vd_dcn_1x_minicoco.yml │ ├── ppyolo_r50vd_dcn_2x_coco.yml │ ├── ppyolo_r50vd_dcn_voc.yml │ ├── ppyolo_test.yml │ ├── ppyolo_tiny_650e_coco.yml │ ├── ppyolov2_r101vd_dcn_365e_coco.yml │ └── ppyolov2_r50vd_dcn_365e_coco.yml ├── rcnn_enhance │ ├── README.md │ ├── _base_ │ │ ├── faster_rcnn_enhance.yml │ │ ├── faster_rcnn_enhance_reader.yml │ │ └── optimizer_3x.yml │ └── faster_rcnn_enhance_3x_coco.yml ├── runtime.yml ├── slim │ ├── README.md │ ├── distill │ │ ├── README.md │ │ └── yolov3_mobilenet_v1_coco_distill.yml │ ├── extensions │ │ └── yolov3_mobilenet_v1_coco_distill_prune.yml │ ├── prune │ │ ├── yolov3_prune_fpgm.yml │ │ └── yolov3_prune_l1_norm.yml │ └── quant │ │ ├── mask_rcnn_r50_fpn_1x_qat.yml │ │ ├── ssd_mobilenet_v1_qat.yml │ │ ├── yolov3_darknet_qat.yml │ │ ├── yolov3_mobilenet_v1_qat.yml │ │ └── yolov3_mobilenet_v3_qat.yml ├── solov2 │ ├── README.md │ ├── _base_ │ │ ├── optimizer_1x.yml │ │ ├── solov2_r50_fpn.yml │ │ └── solov2_reader.yml │ ├── solov2_r50_fpn_1x_coco.yml │ └── solov2_r50_fpn_3x_coco.yml ├── ssd │ ├── README.md │ ├── _base_ │ │ ├── optimizer_120e.yml │ │ ├── optimizer_1700e.yml │ │ ├── optimizer_240e.yml │ │ ├── ssd_mobilenet_reader.yml │ │ ├── ssd_mobilenet_v1_300.yml │ │ ├── ssd_reader.yml │ │ ├── ssd_vgg16_300.yml │ │ ├── ssdlite300_reader.yml │ │ ├── ssdlite320_reader.yml │ │ ├── ssdlite_ghostnet_320.yml │ │ ├── ssdlite_mobilenet_v1_300.yml │ │ ├── ssdlite_mobilenet_v3_large_320.yml │ │ └── ssdlite_mobilenet_v3_small_320.yml │ ├── ssd_mobilenet_v1_300_120e_voc.yml │ ├── ssd_vgg16_300_240e_voc.yml │ ├── ssdlite_ghostnet_320_coco.yml │ ├── ssdlite_mobilenet_v1_300_coco.yml │ ├── ssdlite_mobilenet_v3_large_320_coco.yml │ └── ssdlite_mobilenet_v3_small_320_coco.yml ├── ttfnet │ ├── README.md │ ├── _base_ │ │ ├── optimizer_10x.yml │ │ ├── optimizer_1x.yml │ │ ├── optimizer_20x.yml │ │ ├── pafnet.yml │ │ ├── pafnet_lite.yml │ │ ├── pafnet_lite_reader.yml │ │ ├── pafnet_reader.yml │ │ ├── ttfnet_darknet53.yml │ │ └── ttfnet_reader.yml │ ├── pafnet_10x_coco.yml │ ├── pafnet_lite_mobilenet_v3_20x_coco.yml │ └── ttfnet_darknet53_1x_coco.yml ├── vehicle │ ├── README.md │ ├── README_cn.md │ ├── demo │ │ ├── 001.jpeg │ │ ├── 003.png │ │ ├── 004.png │ │ └── 005.png │ ├── vehicle.json │ └── vehicle_yolov3_darknet.yml └── yolov3 │ ├── README.md │ ├── _base_ │ ├── optimizer_270e.yml │ ├── yolov3_darknet53.yml │ ├── yolov3_mobilenet_v1.yml │ ├── yolov3_mobilenet_v3_large.yml │ ├── yolov3_mobilenet_v3_small.yml │ ├── yolov3_r34.yml │ ├── yolov3_r50vd_dcn.yml │ └── yolov3_reader.yml │ ├── yolov3_darknet53_270e_coco.yml │ ├── yolov3_darknet53_270e_voc.yml │ ├── yolov3_mobilenet_v1_270e_coco.yml │ ├── yolov3_mobilenet_v1_270e_voc.yml │ ├── yolov3_mobilenet_v1_roadsign.yml │ ├── yolov3_mobilenet_v1_ssld_270e_coco.yml │ ├── yolov3_mobilenet_v1_ssld_270e_voc.yml │ ├── yolov3_mobilenet_v3_large_270e_coco.yml │ ├── yolov3_mobilenet_v3_large_270e_voc.yml │ ├── yolov3_mobilenet_v3_large_ssld_270e_voc.yml │ ├── yolov3_r34_270e_coco.yml │ └── yolov3_r50vd_dcn_270e_coco.yml ├── dataset └── voc │ ├── create_list.py │ ├── download_voc.py │ ├── label_list.txt │ ├── test.txt │ └── trainval.txt ├── demo.py ├── deploy ├── BENCHMARK_INFER.md ├── EXPORT_MODEL.md ├── README.md ├── TENSOR_RT.md ├── cpp │ ├── CMakeLists.txt │ ├── README.md │ ├── cmake │ │ └── yaml-cpp.cmake │ ├── docs │ │ ├── Jetson_build.md │ │ ├── linux_build.md │ │ └── windows_vs2019_build.md │ ├── include │ │ ├── config_parser.h │ │ ├── object_detector.h │ │ └── preprocess_op.h │ ├── scripts │ │ └── build.sh │ └── src │ │ ├── main.cc │ │ ├── object_detector.cc │ │ └── preprocess_op.cc ├── imgs │ └── input_shape.png ├── python │ ├── README.md │ ├── infer.py │ ├── preprocess.py │ └── visualize.py └── serving │ ├── README.md │ └── test_client.py ├── docs ├── CHANGELOG.md ├── MODEL_ZOO_cn.md ├── advanced_tutorials │ ├── MODEL_TECHNICAL.md │ └── READER.md ├── feature_models │ ├── SSLD_PRETRAINED_MODEL.md │ └── SSLD_PRETRAINED_MODEL_en.md ├── images │ ├── 000000014439.jpg │ ├── 12_Group_Group_12_Group_Group_12_935.jpg │ ├── fps_map.png │ ├── model_figure.png │ ├── reader_figure.png │ ├── road554.png │ └── ssld_model.png └── tutorials │ ├── GETTING_STARTED.md │ ├── GETTING_STARTED_cn.md │ ├── INSTALL.md │ ├── INSTALL_cn.md │ ├── PrepareDataSet.md │ ├── QUICK_STARTED.md │ ├── QUICK_STARTED_cn.md │ └── config_annotation │ ├── faster_rcnn_r50_fpn_1x_coco_annotation.md │ └── ppyolo_r50vd_dcn_1x_coco_annotation.md ├── faster_rcnn_swin_ti.yaml ├── ppdet ├── __init__.py ├── core │ ├── __init__.py │ ├── config │ │ ├── __init__.py │ │ ├── schema.py │ │ └── yaml_helpers.py │ └── workspace.py ├── data │ ├── __init__.py │ ├── reader.py │ ├── shm_utils.py │ ├── source │ │ ├── __init__.py │ │ ├── category.py │ │ ├── coco.py │ │ ├── dataset.py │ │ ├── voc.py │ │ └── widerface.py │ └── transform │ │ ├── __init__.py │ │ ├── autoaugment_utils.py │ │ ├── batch_operators.py │ │ ├── gridmask_utils.py │ │ ├── op_helper.py │ │ └── operators.py ├── engine │ ├── __init__.py │ ├── callbacks.py │ ├── env.py │ ├── export_utils.py │ └── trainer.py ├── ext_op │ ├── README.md │ ├── rbox_iou_op.cc │ ├── rbox_iou_op.cu │ ├── setup.py │ └── test.py ├── metrics │ ├── __init__.py │ ├── coco_utils.py │ ├── json_results.py │ ├── map_utils.py │ ├── metrics.py │ └── widerface_utils.py ├── model_zoo │ ├── __init__.py │ └── model_zoo.py ├── modeling │ ├── __init__.py │ ├── architectures │ │ ├── __init__.py │ │ ├── cascade_rcnn.py │ │ ├── faster_rcnn.py │ │ ├── fcos.py │ │ ├── mask_rcnn.py │ │ ├── meta_arch.py │ │ ├── s2anet.py │ │ ├── solov2.py │ │ ├── ssd.py │ │ ├── ttfnet.py │ │ └── yolo.py │ ├── backbones │ │ ├── __init__.py │ │ ├── blazenet.py │ │ ├── darknet.py │ │ ├── ghostnet.py │ │ ├── hrnet.py │ │ ├── mobilenet_v1.py │ │ ├── mobilenet_v3.py │ │ ├── name_adapter.py │ │ ├── resnet.py │ │ ├── senet.py │ │ ├── swin_transformer.py │ │ └── vgg.py │ ├── bbox_utils.py │ ├── heads │ │ ├── __init__.py │ │ ├── bbox_head.py │ │ ├── cascade_head.py │ │ ├── face_head.py │ │ ├── fcos_head.py │ │ ├── mask_head.py │ │ ├── roi_extractor.py │ │ ├── s2anet_head.py │ │ ├── solov2_head.py │ │ ├── ssd_head.py │ │ ├── ttf_head.py │ │ └── yolo_head.py │ ├── layers.py │ ├── losses │ │ ├── __init__.py │ │ ├── ctfocal_loss.py │ │ ├── fcos_loss.py │ │ ├── iou_aware_loss.py │ │ ├── iou_loss.py │ │ ├── solov2_loss.py │ │ ├── ssd_loss.py │ │ └── yolo_loss.py │ ├── necks │ │ ├── __init__.py │ │ ├── fpn.py │ │ ├── hrfpn.py │ │ ├── ttf_fpn.py │ │ └── yolo_fpn.py │ ├── ops.py │ ├── post_process.py │ ├── proposal_generator │ │ ├── __init__.py │ │ ├── anchor_generator.py │ │ ├── proposal_generator.py │ │ ├── rpn_head.py │ │ ├── target.py │ │ └── target_layer.py │ ├── shape_spec.py │ └── tests │ │ ├── __init__.py │ │ ├── test_architectures.py │ │ ├── test_base.py │ │ ├── test_ops.py │ │ ├── test_transfrom.py │ │ └── test_yolov3_loss.py ├── optimizer.py ├── slim │ ├── __init__.py │ ├── distill.py │ ├── prune.py │ └── quant.py └── utils │ ├── __init__.py │ ├── check.py │ ├── checkpoint.py │ ├── cli.py │ ├── colormap.py │ ├── download.py │ ├── logger.py │ ├── stats.py │ ├── visualizer.py │ └── voc_utils.py ├── requirements.txt ├── setup.py ├── sort ├── Detector_ppyolo.py ├── Final_test │ ├── 0.txt │ ├── 1.txt │ ├── 2.txt │ └── 3.txt ├── deep_sort │ ├── __init__.py │ ├── __init__.pyc │ ├── __pycache__ │ │ ├── __init__.cpython-37.pyc │ │ ├── detection.cpython-37.pyc │ │ ├── iou_matching.cpython-37.pyc │ │ ├── kalman_filter.cpython-37.pyc │ │ ├── linear_assignment.cpython-37.pyc │ │ ├── nn_matching.cpython-37.pyc │ │ ├── preprocessing.cpython-37.pyc │ │ ├── track.cpython-37.pyc │ │ └── tracker.cpython-37.pyc │ ├── detection.py │ ├── detection.pyc │ ├── iou_matching.py │ ├── iou_matching.pyc │ ├── kalman_filter.py │ ├── kalman_filter.pyc │ ├── linear_assignment.py │ ├── linear_assignment.pyc │ ├── nn_matching.py │ ├── nn_matching.pyc │ ├── preprocessing.py │ ├── preprocessing.pyc │ ├── track.py │ ├── track.pyc │ ├── tracker.py │ └── tracker.pyc ├── detector_new.py ├── extractor_new.py └── generate_img_path.py ├── swin_transformer_paddle.py ├── swin_transformer_pytorch.py ├── tools ├── anchor_cluster.py ├── eval.py ├── export_model.py ├── infer.py ├── train.py └── x2coco.py └── yolov3_swin_ti.yaml /.idea/.gitignore: -------------------------------------------------------------------------------- 1 | # Default ignored files 2 | /shelf/ 3 | /workspace.xml 4 | # Datasource local storage ignored files 5 | /dataSources/ 6 | /dataSources.local.xml 7 | # Editor-based HTTP Client requests 8 | /httpRequests/ 9 | -------------------------------------------------------------------------------- /.idea/.name: -------------------------------------------------------------------------------- 1 | swin_transformer_paddle.py -------------------------------------------------------------------------------- /.idea/Swin-Transformer-Object-Detection-PaddlePaddle.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | -------------------------------------------------------------------------------- /.idea/deployment.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /.idea/inspectionProfiles/Project_Default.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 14 | -------------------------------------------------------------------------------- /.idea/inspectionProfiles/profiles_settings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 6 | -------------------------------------------------------------------------------- /.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /configs/cascade_rcnn/_base_/cascade_fpn_reader.yml: -------------------------------------------------------------------------------- 1 | worker_num: 2 2 | TrainReader: 3 | sample_transforms: 4 | - Decode: {} 5 | - RandomResize: {target_size: [[640, 1333], [672, 1333], [704, 1333], [736, 1333], [768, 1333], [800, 1333]], interp: 2, keep_ratio: True} 6 | - RandomFlip: {prob: 0.5} 7 | - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]} 8 | - Permute: {} 9 | batch_transforms: 10 | - PadBatch: {pad_to_stride: 32, pad_gt: true} 11 | batch_size: 1 12 | shuffle: true 13 | drop_last: true 14 | 15 | 16 | EvalReader: 17 | sample_transforms: 18 | - Decode: {} 19 | - Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True} 20 | - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]} 21 | - Permute: {} 22 | batch_transforms: 23 | - PadBatch: {pad_to_stride: 32, pad_gt: false} 24 | batch_size: 1 25 | shuffle: false 26 | drop_last: false 27 | drop_empty: false 28 | 29 | 30 | TestReader: 31 | sample_transforms: 32 | - Decode: {} 33 | - Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True} 34 | - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]} 35 | - Permute: {} 36 | batch_transforms: 37 | - PadBatch: {pad_to_stride: 32, pad_gt: false} 38 | batch_size: 1 39 | shuffle: false 40 | drop_last: false 41 | -------------------------------------------------------------------------------- /configs/cascade_rcnn/_base_/cascade_mask_fpn_reader.yml: -------------------------------------------------------------------------------- 1 | worker_num: 2 2 | TrainReader: 3 | sample_transforms: 4 | - Decode: {} 5 | - RandomResize: {target_size: [[640, 1333], [672, 1333], [704, 1333], [736, 1333], [768, 1333], [800, 1333]], interp: 2, keep_ratio: True} 6 | - RandomFlip: {prob: 0.5} 7 | - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]} 8 | - Permute: {} 9 | batch_transforms: 10 | - PadBatch: {pad_to_stride: 32, pad_gt: true} 11 | batch_size: 1 12 | shuffle: true 13 | drop_last: true 14 | 15 | 16 | EvalReader: 17 | sample_transforms: 18 | - Decode: {} 19 | - Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True} 20 | - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]} 21 | - Permute: {} 22 | batch_transforms: 23 | - PadBatch: {pad_to_stride: 32, pad_gt: false} 24 | batch_size: 1 25 | shuffle: false 26 | drop_last: false 27 | drop_empty: false 28 | 29 | 30 | TestReader: 31 | sample_transforms: 32 | - Decode: {} 33 | - Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True} 34 | - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]} 35 | - Permute: {} 36 | batch_transforms: 37 | - PadBatch: {pad_to_stride: 32, pad_gt: false} 38 | batch_size: 1 39 | shuffle: false 40 | drop_last: false 41 | -------------------------------------------------------------------------------- /configs/cascade_rcnn/_base_/cascade_rcnn_r50_fpn.yml: -------------------------------------------------------------------------------- 1 | architecture: CascadeRCNN 2 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_cos_pretrained.pdparams 3 | 4 | 5 | CascadeRCNN: 6 | backbone: ResNet 7 | neck: FPN 8 | rpn_head: RPNHead 9 | bbox_head: CascadeHead 10 | # post process 11 | bbox_post_process: BBoxPostProcess 12 | 13 | ResNet: 14 | # index 0 stands for res2 15 | depth: 50 16 | norm_type: bn 17 | freeze_at: 0 18 | return_idx: [0,1,2,3] 19 | num_stages: 4 20 | 21 | FPN: 22 | out_channel: 256 23 | 24 | RPNHead: 25 | anchor_generator: 26 | aspect_ratios: [0.5, 1.0, 2.0] 27 | anchor_sizes: [[32], [64], [128], [256], [512]] 28 | strides: [4, 8, 16, 32, 64] 29 | rpn_target_assign: 30 | batch_size_per_im: 256 31 | fg_fraction: 0.5 32 | negative_overlap: 0.3 33 | positive_overlap: 0.7 34 | use_random: True 35 | train_proposal: 36 | min_size: 0.0 37 | nms_thresh: 0.7 38 | pre_nms_top_n: 2000 39 | post_nms_top_n: 2000 40 | topk_after_collect: True 41 | test_proposal: 42 | min_size: 0.0 43 | nms_thresh: 0.7 44 | pre_nms_top_n: 1000 45 | post_nms_top_n: 1000 46 | 47 | 48 | CascadeHead: 49 | head: CascadeTwoFCHead 50 | roi_extractor: 51 | resolution: 7 52 | sampling_ratio: 0 53 | aligned: True 54 | bbox_assigner: BBoxAssigner 55 | 56 | BBoxAssigner: 57 | batch_size_per_im: 512 58 | bg_thresh: 0.5 59 | fg_thresh: 0.5 60 | fg_fraction: 0.25 61 | cascade_iou: [0.5, 0.6, 0.7] 62 | use_random: True 63 | 64 | CascadeTwoFCHead: 65 | out_channel: 1024 66 | 67 | BBoxPostProcess: 68 | decode: 69 | name: RCNNBox 70 | prior_box_var: [30.0, 30.0, 15.0, 15.0] 71 | nms: 72 | name: MultiClassNMS 73 | keep_top_k: 100 74 | score_threshold: 0.05 75 | nms_threshold: 0.5 76 | -------------------------------------------------------------------------------- /configs/cascade_rcnn/_base_/optimizer_1x.yml: -------------------------------------------------------------------------------- 1 | epoch: 12 2 | 3 | LearningRate: 4 | base_lr: 0.01 5 | schedulers: 6 | - !PiecewiseDecay 7 | gamma: 0.1 8 | milestones: [8, 11] 9 | - !LinearWarmup 10 | start_factor: 0.001 11 | steps: 1000 12 | 13 | OptimizerBuilder: 14 | optimizer: 15 | momentum: 0.9 16 | type: Momentum 17 | regularizer: 18 | factor: 0.0001 19 | type: L2 20 | -------------------------------------------------------------------------------- /configs/cascade_rcnn/cascade_mask_rcnn_r50_fpn_1x_coco.yml: -------------------------------------------------------------------------------- 1 | _BASE_: [ 2 | '../datasets/coco_instance.yml', 3 | '../runtime.yml', 4 | '_base_/optimizer_1x.yml', 5 | '_base_/cascade_mask_rcnn_r50_fpn.yml', 6 | '_base_/cascade_mask_fpn_reader.yml', 7 | ] 8 | weights: output/cascade_mask_rcnn_r50_fpn_1x_coco/model_final 9 | -------------------------------------------------------------------------------- /configs/cascade_rcnn/cascade_mask_rcnn_r50_vd_fpn_ssld_1x_coco.yml: -------------------------------------------------------------------------------- 1 | _BASE_: [ 2 | '../datasets/coco_instance.yml', 3 | '../runtime.yml', 4 | '_base_/optimizer_1x.yml', 5 | '_base_/cascade_mask_rcnn_r50_fpn.yml', 6 | '_base_/cascade_mask_fpn_reader.yml', 7 | ] 8 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_ssld_v2_pretrained.pdparams 9 | weights: output/cascade_mask_rcnn_r50_vd_fpn_ssld_1x_coco/model_final 10 | 11 | ResNet: 12 | depth: 50 13 | variant: d 14 | norm_type: bn 15 | freeze_at: 0 16 | return_idx: [0,1,2,3] 17 | num_stages: 4 18 | lr_mult_list: [0.05, 0.05, 0.1, 0.15] 19 | -------------------------------------------------------------------------------- /configs/cascade_rcnn/cascade_mask_rcnn_r50_vd_fpn_ssld_2x_coco.yml: -------------------------------------------------------------------------------- 1 | _BASE_: [ 2 | '../datasets/coco_instance.yml', 3 | '../runtime.yml', 4 | '_base_/optimizer_1x.yml', 5 | '_base_/cascade_mask_rcnn_r50_fpn.yml', 6 | '_base_/cascade_mask_fpn_reader.yml', 7 | ] 8 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_ssld_v2_pretrained.pdparams 9 | weights: output/cascade_mask_rcnn_r50_vd_fpn_ssld_2x_coco/model_final 10 | 11 | ResNet: 12 | depth: 50 13 | variant: d 14 | norm_type: bn 15 | freeze_at: 0 16 | return_idx: [0,1,2,3] 17 | num_stages: 4 18 | lr_mult_list: [0.05, 0.05, 0.1, 0.15] 19 | 20 | epoch: 24 21 | LearningRate: 22 | base_lr: 0.01 23 | schedulers: 24 | - !PiecewiseDecay 25 | gamma: 0.1 26 | milestones: [12, 22] 27 | - !LinearWarmup 28 | start_factor: 0.1 29 | steps: 1000 30 | -------------------------------------------------------------------------------- /configs/cascade_rcnn/cascade_rcnn_r50_fpn_1x_coco.yml: -------------------------------------------------------------------------------- 1 | _BASE_: [ 2 | '../datasets/coco_detection.yml', 3 | '../runtime.yml', 4 | '_base_/optimizer_1x.yml', 5 | '_base_/cascade_rcnn_r50_fpn.yml', 6 | '_base_/cascade_fpn_reader.yml', 7 | ] 8 | weights: output/cascade_rcnn_r50_fpn_1x_coco/model_final 9 | -------------------------------------------------------------------------------- /configs/cascade_rcnn/cascade_rcnn_r50_vd_fpn_ssld_1x_coco.yml: -------------------------------------------------------------------------------- 1 | _BASE_: [ 2 | '../datasets/coco_detection.yml', 3 | '../runtime.yml', 4 | '_base_/optimizer_1x.yml', 5 | '_base_/cascade_rcnn_r50_fpn.yml', 6 | '_base_/cascade_fpn_reader.yml', 7 | ] 8 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_ssld_v2_pretrained.pdparams 9 | weights: output/cascade_rcnn_r50_vd_fpn_ssld_1x_coco/model_final 10 | 11 | ResNet: 12 | depth: 50 13 | variant: d 14 | norm_type: bn 15 | freeze_at: 0 16 | return_idx: [0,1,2,3] 17 | num_stages: 4 18 | lr_mult_list: [0.05, 0.05, 0.1, 0.15] 19 | -------------------------------------------------------------------------------- /configs/cascade_rcnn/cascade_rcnn_r50_vd_fpn_ssld_2x_coco.yml: -------------------------------------------------------------------------------- 1 | _BASE_: [ 2 | '../datasets/coco_detection.yml', 3 | '../runtime.yml', 4 | '_base_/optimizer_1x.yml', 5 | '_base_/cascade_rcnn_r50_fpn.yml', 6 | '_base_/cascade_fpn_reader.yml', 7 | ] 8 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_ssld_v2_pretrained.pdparams 9 | weights: output/cascade_rcnn_r50_vd_fpn_ssld_2x_coco/model_final 10 | 11 | ResNet: 12 | depth: 50 13 | variant: d 14 | norm_type: bn 15 | freeze_at: 0 16 | return_idx: [0,1,2,3] 17 | num_stages: 4 18 | lr_mult_list: [0.05, 0.05, 0.1, 0.15] 19 | 20 | epoch: 24 21 | LearningRate: 22 | base_lr: 0.01 23 | schedulers: 24 | - !PiecewiseDecay 25 | gamma: 0.1 26 | milestones: [12, 22] 27 | - !LinearWarmup 28 | start_factor: 0.1 29 | steps: 1000 30 | -------------------------------------------------------------------------------- /configs/datasets/coco_detection.yml: -------------------------------------------------------------------------------- 1 | metric: COCO 2 | num_classes: 80 3 | 4 | TrainDataset: 5 | !COCODataSet 6 | image_dir: train2017 7 | anno_path: annotations/instances_train2017.json 8 | dataset_dir: dataset/coco 9 | data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd'] 10 | 11 | EvalDataset: 12 | !COCODataSet 13 | image_dir: val2017 14 | anno_path: annotations/instances_val2017.json 15 | dataset_dir: dataset/coco 16 | 17 | TestDataset: 18 | !ImageFolder 19 | anno_path: annotations/instances_val2017.json 20 | -------------------------------------------------------------------------------- /configs/datasets/coco_instance.yml: -------------------------------------------------------------------------------- 1 | metric: COCO 2 | num_classes: 80 3 | 4 | TrainDataset: 5 | !COCODataSet 6 | image_dir: train2017 7 | anno_path: annotations/instances_train2017.json 8 | dataset_dir: dataset/coco 9 | data_fields: ['image', 'gt_bbox', 'gt_class', 'gt_poly', 'is_crowd'] 10 | 11 | EvalDataset: 12 | !COCODataSet 13 | image_dir: val2017 14 | anno_path: annotations/instances_val2017.json 15 | dataset_dir: dataset/coco 16 | 17 | TestDataset: 18 | !ImageFolder 19 | anno_path: annotations/instances_val2017.json 20 | -------------------------------------------------------------------------------- /configs/datasets/dota.yml: -------------------------------------------------------------------------------- 1 | metric: COCO 2 | num_classes: 15 3 | 4 | TrainDataset: 5 | !COCODataSet 6 | image_dir: trainval_split/images 7 | anno_path: trainval_split/s2anet_trainval_paddle_coco.json 8 | dataset_dir: dataset/DOTA_1024_s2anet 9 | data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd', 'gt_rbox'] 10 | 11 | EvalDataset: 12 | !COCODataSet 13 | image_dir: trainval_split/images 14 | anno_path: trainval_split/s2anet_trainval_paddle_coco.json 15 | dataset_dir: dataset/DOTA_1024_s2anet/ 16 | 17 | TestDataset: 18 | !ImageFolder 19 | anno_path: trainval_split/s2anet_trainval_paddle_coco.json 20 | dataset_dir: dataset/DOTA_1024_s2anet/ 21 | -------------------------------------------------------------------------------- /configs/datasets/roadsign_voc.yml: -------------------------------------------------------------------------------- 1 | metric: VOC 2 | map_type: integral 3 | num_classes: 4 4 | 5 | TrainDataset: 6 | !VOCDataSet 7 | dataset_dir: dataset/roadsign_voc 8 | anno_path: train.txt 9 | label_list: label_list.txt 10 | data_fields: ['image', 'gt_bbox', 'gt_class', 'difficult'] 11 | 12 | EvalDataset: 13 | !VOCDataSet 14 | dataset_dir: dataset/roadsign_voc 15 | anno_path: valid.txt 16 | label_list: label_list.txt 17 | data_fields: ['image', 'gt_bbox', 'gt_class', 'difficult'] 18 | 19 | TestDataset: 20 | !ImageFolder 21 | anno_path: dataset/roadsign_voc/label_list.txt 22 | -------------------------------------------------------------------------------- /configs/datasets/voc.yml: -------------------------------------------------------------------------------- 1 | metric: VOC 2 | map_type: 11point 3 | num_classes: 20 4 | 5 | TrainDataset: 6 | !VOCDataSet 7 | dataset_dir: dataset/voc 8 | anno_path: trainval.txt 9 | label_list: label_list.txt 10 | data_fields: ['image', 'gt_bbox', 'gt_class', 'difficult'] 11 | 12 | EvalDataset: 13 | !VOCDataSet 14 | dataset_dir: dataset/voc 15 | anno_path: test.txt 16 | label_list: label_list.txt 17 | data_fields: ['image', 'gt_bbox', 'gt_class', 'difficult'] 18 | 19 | TestDataset: 20 | !ImageFolder 21 | anno_path: dataset/voc/label_list.txt 22 | -------------------------------------------------------------------------------- /configs/datasets/wider_face.yml: -------------------------------------------------------------------------------- 1 | metric: WiderFace 2 | num_classes: 1 3 | 4 | TrainDataset: 5 | !WIDERFaceDataSet 6 | dataset_dir: dataset/wider_face 7 | anno_path: wider_face_split/wider_face_train_bbx_gt.txt 8 | image_dir: WIDER_train/images 9 | data_fields: ['image', 'gt_bbox', 'gt_class'] 10 | 11 | EvalDataset: 12 | !WIDERFaceDataSet 13 | dataset_dir: dataset/wider_face 14 | anno_path: wider_face_split/wider_face_val_bbx_gt.txt 15 | image_dir: WIDER_val/images 16 | data_fields: ['image'] 17 | 18 | TestDataset: 19 | !ImageFolder 20 | use_default_label: true 21 | -------------------------------------------------------------------------------- /configs/dcn/cascade_rcnn_dcn_r50_fpn_1x_coco.yml: -------------------------------------------------------------------------------- 1 | _BASE_: [ 2 | '../datasets/coco_detection.yml', 3 | '../runtime.yml', 4 | '../cascade_rcnn/_base_/optimizer_1x.yml', 5 | '../cascade_rcnn/_base_/cascade_rcnn_r50_fpn.yml', 6 | '../cascade_rcnn/_base_/cascade_fpn_reader.yml', 7 | ] 8 | weights: output/cascade_rcnn_dcn_r50_fpn_1x_coco/model_final 9 | 10 | ResNet: 11 | depth: 50 12 | norm_type: bn 13 | freeze_at: 0 14 | return_idx: [0,1,2,3] 15 | num_stages: 4 16 | dcn_v2_stages: [1,2,3] 17 | -------------------------------------------------------------------------------- /configs/dcn/cascade_rcnn_dcn_x101_vd_64x4d_fpn_1x_coco.yml: -------------------------------------------------------------------------------- 1 | _BASE_: [ 2 | 'cascade_rcnn_dcn_r50_fpn_1x_coco.yml', 3 | ] 4 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNeXt101_vd_64x4d_pretrained.pdparams 5 | weights: output/cascade_rcnn_dcn_x101_vd_64x4d_fpn_1x_coco/model_final 6 | 7 | ResNet: 8 | depth: 101 9 | groups: 64 10 | base_width: 4 11 | variant: d 12 | norm_type: bn 13 | freeze_at: 0 14 | return_idx: [0,1,2,3] 15 | num_stages: 4 16 | dcn_v2_stages: [1,2,3] 17 | -------------------------------------------------------------------------------- /configs/dcn/faster_rcnn_dcn_r101_vd_fpn_1x_coco.yml: -------------------------------------------------------------------------------- 1 | _BASE_: [ 2 | 'faster_rcnn_dcn_r50_fpn_1x_coco.yml', 3 | ] 4 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet101_vd_pretrained.pdparams 5 | weights: output/faster_rcnn_dcn_r101_vd_fpn_1x_coco/model_final 6 | 7 | ResNet: 8 | # index 0 stands for res2 9 | depth: 101 10 | variant: d 11 | norm_type: bn 12 | freeze_at: 0 13 | return_idx: [0,1,2,3] 14 | num_stages: 4 15 | dcn_v2_stages: [1,2,3] 16 | -------------------------------------------------------------------------------- /configs/dcn/faster_rcnn_dcn_r50_fpn_1x_coco.yml: -------------------------------------------------------------------------------- 1 | _BASE_: [ 2 | '../datasets/coco_detection.yml', 3 | '../runtime.yml', 4 | '../faster_rcnn/_base_/optimizer_1x.yml', 5 | '../faster_rcnn/_base_/faster_rcnn_r50_fpn.yml', 6 | '../faster_rcnn/_base_/faster_fpn_reader.yml', 7 | ] 8 | weights: output/faster_rcnn_dcn_r50_fpn_1x_coco/model_final 9 | 10 | ResNet: 11 | depth: 50 12 | norm_type: bn 13 | freeze_at: 0 14 | return_idx: [0,1,2,3] 15 | num_stages: 4 16 | dcn_v2_stages: [1,2,3] 17 | -------------------------------------------------------------------------------- /configs/dcn/faster_rcnn_dcn_r50_vd_fpn_1x_coco.yml: -------------------------------------------------------------------------------- 1 | _BASE_: [ 2 | 'faster_rcnn_dcn_r50_fpn_1x_coco.yml', 3 | ] 4 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_pretrained.pdparams 5 | weights: output/faster_rcnn_dcn_r50_vd_fpn_2x_coco/model_final 6 | 7 | ResNet: 8 | # index 0 stands for res2 9 | depth: 50 10 | variant: d 11 | norm_type: bn 12 | freeze_at: 0 13 | return_idx: [0,1,2,3] 14 | num_stages: 4 15 | dcn_v2_stages: [1,2,3] 16 | -------------------------------------------------------------------------------- /configs/dcn/faster_rcnn_dcn_r50_vd_fpn_2x_coco.yml: -------------------------------------------------------------------------------- 1 | _BASE_: [ 2 | 'faster_rcnn_dcn_r50_fpn_1x_coco.yml', 3 | ] 4 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_pretrained.pdparams 5 | weights: output/faster_rcnn_dcn_r50_vd_fpn_2x_coco/model_final 6 | 7 | ResNet: 8 | # index 0 stands for res2 9 | depth: 50 10 | variant: d 11 | norm_type: bn 12 | freeze_at: 0 13 | return_idx: [0,1,2,3] 14 | num_stages: 4 15 | dcn_v2_stages: [1,2,3] 16 | 17 | epoch: 24 18 | LearningRate: 19 | base_lr: 0.01 20 | schedulers: 21 | - !PiecewiseDecay 22 | gamma: 0.1 23 | milestones: [16, 22] 24 | - !LinearWarmup 25 | start_factor: 0.1 26 | steps: 1000 27 | -------------------------------------------------------------------------------- /configs/dcn/faster_rcnn_dcn_x101_vd_64x4d_fpn_1x_coco.yml: -------------------------------------------------------------------------------- 1 | _BASE_: [ 2 | 'faster_rcnn_dcn_r50_fpn_1x_coco.yml', 3 | ] 4 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNeXt101_vd_64x4d_pretrained.pdparams 5 | weights: output/faster_rcnn_dcn_x101_vd_64x4d_fpn_1x_coco/model_final 6 | 7 | ResNet: 8 | # for ResNeXt: groups, base_width, base_channels 9 | depth: 101 10 | groups: 64 11 | base_width: 4 12 | variant: d 13 | norm_type: bn 14 | freeze_at: 0 15 | return_idx: [0,1,2,3] 16 | num_stages: 4 17 | dcn_v2_stages: [1,2,3] 18 | -------------------------------------------------------------------------------- /configs/dcn/mask_rcnn_dcn_r101_vd_fpn_1x_coco.yml: -------------------------------------------------------------------------------- 1 | _BASE_: [ 2 | 'mask_rcnn_dcn_r50_fpn_1x_coco.yml', 3 | ] 4 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet101_vd_pretrained.pdparams 5 | weights: output/mask_rcnn_dcn_r101_vd_fpn_1x_coco/model_final 6 | 7 | ResNet: 8 | # index 0 stands for res2 9 | depth: 101 10 | variant: d 11 | norm_type: bn 12 | freeze_at: 0 13 | return_idx: [0,1,2,3] 14 | num_stages: 4 15 | dcn_v2_stages: [1,2,3] 16 | -------------------------------------------------------------------------------- /configs/dcn/mask_rcnn_dcn_r50_fpn_1x_coco.yml: -------------------------------------------------------------------------------- 1 | _BASE_: [ 2 | '../datasets/coco_instance.yml', 3 | '../runtime.yml', 4 | '../mask_rcnn/_base_/optimizer_1x.yml', 5 | '../mask_rcnn/_base_/mask_rcnn_r50_fpn.yml', 6 | '../mask_rcnn/_base_/mask_fpn_reader.yml', 7 | ] 8 | weights: output/mask_rcnn_dcn_r50_fpn_1x_coco/model_final 9 | 10 | ResNet: 11 | depth: 50 12 | norm_type: bn 13 | freeze_at: 0 14 | return_idx: [0,1,2,3] 15 | num_stages: 4 16 | dcn_v2_stages: [1,2,3] 17 | -------------------------------------------------------------------------------- /configs/dcn/mask_rcnn_dcn_r50_vd_fpn_2x_coco.yml: -------------------------------------------------------------------------------- 1 | _BASE_: [ 2 | 'mask_rcnn_dcn_r50_fpn_1x_coco.yml', 3 | ] 4 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_pretrained.pdparams 5 | weights: output/mask_rcnn_dcn_r50_vd_fpn_2x_coco/model_final 6 | 7 | ResNet: 8 | # index 0 stands for res2 9 | depth: 50 10 | variant: d 11 | norm_type: bn 12 | freeze_at: 0 13 | return_idx: [0,1,2,3] 14 | num_stages: 4 15 | dcn_v2_stages: [1,2,3] 16 | 17 | epoch: 24 18 | LearningRate: 19 | base_lr: 0.01 20 | schedulers: 21 | - !PiecewiseDecay 22 | gamma: 0.1 23 | milestones: [16, 22] 24 | - !LinearWarmup 25 | start_factor: 0.1 26 | steps: 1000 27 | -------------------------------------------------------------------------------- /configs/dcn/mask_rcnn_dcn_x101_vd_64x4d_fpn_1x_coco.yml: -------------------------------------------------------------------------------- 1 | _BASE_: [ 2 | 'mask_rcnn_dcn_r50_fpn_1x_coco.yml', 3 | ] 4 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNeXt101_vd_64x4d_pretrained.pdparams 5 | weights: output/mask_rcnn_dcn_x101_vd_64x4d_fpn_1x_coco/model_final 6 | 7 | ResNet: 8 | # for ResNeXt: groups, base_width, base_channels 9 | depth: 101 10 | variant: d 11 | groups: 64 12 | base_width: 4 13 | norm_type: bn 14 | freeze_at: 0 15 | return_idx: [0,1,2,3] 16 | num_stages: 4 17 | dcn_v2_stages: [1,2,3] 18 | -------------------------------------------------------------------------------- /configs/dota/_base_/s2anet.yml: -------------------------------------------------------------------------------- 1 | architecture: S2ANet 2 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_cos_pretrained.pdparams 3 | weights: output/s2anet_r50_fpn_1x_dota/model_final.pdparams 4 | 5 | 6 | # Model Achitecture 7 | S2ANet: 8 | backbone: ResNet 9 | neck: FPN 10 | s2anet_head: S2ANetHead 11 | s2anet_bbox_post_process: S2ANetBBoxPostProcess 12 | 13 | ResNet: 14 | depth: 50 15 | norm_type: bn 16 | return_idx: [1,2,3] 17 | num_stages: 4 18 | 19 | FPN: 20 | in_channels: [256, 512, 1024] 21 | out_channel: 256 22 | spatial_scales: [0.25, 0.125, 0.0625] 23 | has_extra_convs: True 24 | extra_stage: 2 25 | relu_before_extra_convs: False 26 | 27 | S2ANetHead: 28 | anchor_strides: [8, 16, 32, 64, 128] 29 | anchor_scales: [4] 30 | anchor_ratios: [1.0] 31 | anchor_assign: RBoxAssigner 32 | stacked_convs: 2 33 | feat_in: 256 34 | feat_out: 256 35 | num_classes: 15 36 | align_conv_type: 'Conv' # AlignConv Conv 37 | align_conv_size: 3 38 | use_sigmoid_cls: True 39 | 40 | RBoxAssigner: 41 | pos_iou_thr: 0.5 42 | neg_iou_thr: 0.4 43 | min_iou_thr: 0.0 44 | ignore_iof_thr: -2 45 | 46 | S2ANetBBoxPostProcess: 47 | nms_pre: 2000 48 | min_bbox_size: 0.0 49 | nms: 50 | name: MultiClassNMS 51 | keep_top_k: -1 52 | score_threshold: 0.05 53 | nms_threshold: 0.1 54 | normalized: False 55 | #background_label: -1 56 | -------------------------------------------------------------------------------- /configs/dota/_base_/s2anet_optimizer_1x.yml: -------------------------------------------------------------------------------- 1 | epoch: 12 2 | 3 | LearningRate: 4 | base_lr: 0.01 5 | schedulers: 6 | - !PiecewiseDecay 7 | gamma: 0.1 8 | milestones: [7, 10] 9 | - !LinearWarmup 10 | start_factor: 0.3333333333333333 11 | steps: 500 12 | 13 | OptimizerBuilder: 14 | optimizer: 15 | momentum: 0.9 16 | type: Momentum 17 | regularizer: 18 | factor: 0.0001 19 | type: L2 20 | clip_grad_by_norm: 35 21 | -------------------------------------------------------------------------------- /configs/dota/_base_/s2anet_reader.yml: -------------------------------------------------------------------------------- 1 | worker_num: 0 2 | TrainReader: 3 | sample_transforms: 4 | - Decode: {} 5 | - Rbox2Poly: {} 6 | # Resize can process rbox 7 | - Resize: {target_size: [1024, 1024], interp: 2, keep_ratio: False} 8 | - RandomFlip: {prob: 0.5} 9 | - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]} 10 | - Permute: {} 11 | batch_transforms: 12 | - RboxPadBatch: {pad_to_stride: 32, pad_gt: true} 13 | batch_size: 1 14 | shuffle: true 15 | drop_last: true 16 | 17 | 18 | EvalReader: 19 | sample_transforms: 20 | - Decode: {} 21 | - Resize: {interp: 2, target_size: [1024, 1024], keep_ratio: True} 22 | - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]} 23 | - Permute: {} 24 | batch_transforms: 25 | - RboxPadBatch: {pad_to_stride: 32, pad_gt: false} 26 | batch_size: 1 27 | shuffle: false 28 | drop_last: false 29 | drop_empty: false 30 | 31 | 32 | TestReader: 33 | sample_transforms: 34 | - Decode: {} 35 | - Resize: {interp: 2, target_size: [1024, 1024], keep_ratio: True} 36 | - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]} 37 | - Permute: {} 38 | batch_transforms: 39 | - RboxPadBatch: {pad_to_stride: 32, pad_gt: false} 40 | batch_size: 1 41 | shuffle: false 42 | drop_last: false 43 | -------------------------------------------------------------------------------- /configs/dota/s2anet_1x_dota.yml: -------------------------------------------------------------------------------- 1 | _BASE_: [ 2 | '../datasets/dota.yml', 3 | '../runtime.yml', 4 | '_base_/s2anet_optimizer_1x.yml', 5 | '_base_/s2anet.yml', 6 | '_base_/s2anet_reader.yml', 7 | ] 8 | weights: output/s2anet_1x_dota/model_final 9 | -------------------------------------------------------------------------------- /configs/dota/s2anet_conv_1x_dota.yml: -------------------------------------------------------------------------------- 1 | _BASE_: [ 2 | '../datasets/dota.yml', 3 | '../runtime.yml', 4 | '_base_/s2anet_optimizer_1x.yml', 5 | '_base_/s2anet.yml', 6 | '_base_/s2anet_reader.yml', 7 | ] 8 | weights: output/s2anet_1x_dota/model_final 9 | 10 | S2ANetHead: 11 | anchor_strides: [8, 16, 32, 64, 128] 12 | anchor_scales: [4] 13 | anchor_ratios: [1.0] 14 | anchor_assign: RBoxAssigner 15 | stacked_convs: 2 16 | feat_in: 256 17 | feat_out: 256 18 | num_classes: 15 19 | align_conv_type: 'Conv' # AlignConv Conv 20 | align_conv_size: 3 21 | use_sigmoid_cls: True 22 | -------------------------------------------------------------------------------- /configs/face_detection/_base_/blazeface.yml: -------------------------------------------------------------------------------- 1 | architecture: SSD 2 | 3 | SSD: 4 | backbone: BlazeNet 5 | ssd_head: FaceHead 6 | post_process: BBoxPostProcess 7 | 8 | BlazeNet: 9 | blaze_filters: [[24, 24], [24, 24], [24, 48, 2], [48, 48], [48, 48]] 10 | double_blaze_filters: [[48, 24, 96, 2], [96, 24, 96], [96, 24, 96], 11 | [96, 24, 96, 2], [96, 24, 96], [96, 24, 96]] 12 | 13 | FaceHead: 14 | in_channels: [96, 96] 15 | anchor_generator: AnchorGeneratorSSD 16 | loss: SSDLoss 17 | 18 | SSDLoss: 19 | overlap_threshold: 0.35 20 | 21 | AnchorGeneratorSSD: 22 | steps: [8., 16.] 23 | aspect_ratios: [[1.], [1.]] 24 | min_sizes: [[16.,24.], [32., 48., 64., 80., 96., 128.]] 25 | max_sizes: [[], []] 26 | offset: 0.5 27 | flip: False 28 | min_max_aspect_ratios_order: false 29 | 30 | BBoxPostProcess: 31 | decode: 32 | name: SSDBox 33 | nms: 34 | name: MultiClassNMS 35 | keep_top_k: 750 36 | score_threshold: 0.01 37 | nms_threshold: 0.3 38 | nms_top_k: 5000 39 | nms_eta: 1.0 40 | -------------------------------------------------------------------------------- /configs/face_detection/_base_/face_reader.yml: -------------------------------------------------------------------------------- 1 | worker_num: 2 2 | TrainReader: 3 | inputs_def: 4 | num_max_boxes: 90 5 | sample_transforms: 6 | - Decode: {} 7 | - RandomDistort: {brightness: [0.5, 1.125, 0.875], random_apply: False} 8 | - RandomExpand: {fill_value: [123.675, 116.28, 103.53]} 9 | - RandomFlip: {} 10 | - CropWithDataAchorSampling: { 11 | anchor_sampler: [[1, 10, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.2, 0.0]], 12 | batch_sampler: [ 13 | [1, 50, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0], 14 | [1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0], 15 | [1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0], 16 | [1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0], 17 | [1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0], 18 | ], 19 | target_size: 640} 20 | - Resize: {target_size: [640, 640], keep_ratio: False, interp: 1} 21 | - NormalizeBox: {} 22 | - PadBox: {num_max_boxes: 90} 23 | batch_transforms: 24 | - NormalizeImage: {mean: [123, 117, 104], std: [127.502231, 127.502231, 127.502231], is_scale: false} 25 | - Permute: {} 26 | batch_size: 8 27 | shuffle: true 28 | drop_last: true 29 | 30 | 31 | EvalReader: 32 | sample_transforms: 33 | - Decode: {} 34 | - NormalizeImage: {mean: [123, 117, 104], std: [127.502231, 127.502231, 127.502231], is_scale: false} 35 | - Permute: {} 36 | batch_size: 1 37 | drop_empty: false 38 | 39 | 40 | TestReader: 41 | sample_transforms: 42 | - Decode: {} 43 | - NormalizeImage: {mean: [123, 117, 104], std: [127.502231, 127.502231, 127.502231], is_scale: false} 44 | - Permute: {} 45 | batch_size: 1 46 | -------------------------------------------------------------------------------- /configs/face_detection/_base_/optimizer_1000e.yml: -------------------------------------------------------------------------------- 1 | epoch: 1000 2 | 3 | LearningRate: 4 | base_lr: 0.001 5 | schedulers: 6 | - !PiecewiseDecay 7 | gamma: 0.1 8 | milestones: 9 | - 333 10 | - 800 11 | - !LinearWarmup 12 | start_factor: 0.3333333333333333 13 | steps: 500 14 | 15 | OptimizerBuilder: 16 | optimizer: 17 | momentum: 0.0 18 | type: RMSProp 19 | regularizer: 20 | factor: 0.0005 21 | type: L2 22 | -------------------------------------------------------------------------------- /configs/face_detection/blazeface_1000e.yml: -------------------------------------------------------------------------------- 1 | _BASE_: [ 2 | '../datasets/wider_face.yml', 3 | '../runtime.yml', 4 | '_base_/optimizer_1000e.yml', 5 | '_base_/blazeface.yml', 6 | '_base_/face_reader.yml', 7 | ] 8 | weights: output/blazeface_1000e/model_final 9 | multi_scale_eval: True 10 | -------------------------------------------------------------------------------- /configs/faster_rcnn/_base_/faster_fpn_reader.yml: -------------------------------------------------------------------------------- 1 | worker_num: 2 2 | TrainReader: 3 | sample_transforms: 4 | - Decode: {} 5 | - RandomResize: {target_size: [[640, 1333], [672, 1333], [704, 1333], [736, 1333], [768, 1333], [800, 1333]], interp: 2, keep_ratio: True} 6 | - RandomFlip: {prob: 0.5} 7 | - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]} 8 | - Permute: {} 9 | batch_transforms: 10 | - PadBatch: {pad_to_stride: 32, pad_gt: true} 11 | batch_size: 1 12 | shuffle: true 13 | drop_last: true 14 | 15 | 16 | EvalReader: 17 | sample_transforms: 18 | - Decode: {} 19 | - Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True} 20 | - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]} 21 | - Permute: {} 22 | batch_transforms: 23 | - PadBatch: {pad_to_stride: 32, pad_gt: false} 24 | batch_size: 1 25 | shuffle: false 26 | drop_last: false 27 | drop_empty: false 28 | 29 | 30 | TestReader: 31 | sample_transforms: 32 | - Decode: {} 33 | - Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True} 34 | - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]} 35 | - Permute: {} 36 | batch_transforms: 37 | - PadBatch: {pad_to_stride: 32, pad_gt: false} 38 | batch_size: 1 39 | shuffle: false 40 | drop_last: false 41 | -------------------------------------------------------------------------------- /configs/faster_rcnn/_base_/faster_rcnn_r50.yml: -------------------------------------------------------------------------------- 1 | architecture: FasterRCNN 2 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_cos_pretrained.pdparams 3 | 4 | FasterRCNN: 5 | backbone: ResNet 6 | rpn_head: RPNHead 7 | bbox_head: BBoxHead 8 | # post process 9 | bbox_post_process: BBoxPostProcess 10 | 11 | 12 | ResNet: 13 | # index 0 stands for res2 14 | depth: 50 15 | norm_type: bn 16 | freeze_at: 0 17 | return_idx: [2] 18 | num_stages: 3 19 | 20 | RPNHead: 21 | anchor_generator: 22 | aspect_ratios: [0.5, 1.0, 2.0] 23 | anchor_sizes: [32, 64, 128, 256, 512] 24 | strides: [16] 25 | rpn_target_assign: 26 | batch_size_per_im: 256 27 | fg_fraction: 0.5 28 | negative_overlap: 0.3 29 | positive_overlap: 0.7 30 | use_random: True 31 | train_proposal: 32 | min_size: 0.0 33 | nms_thresh: 0.7 34 | pre_nms_top_n: 12000 35 | post_nms_top_n: 2000 36 | topk_after_collect: False 37 | test_proposal: 38 | min_size: 0.0 39 | nms_thresh: 0.7 40 | pre_nms_top_n: 6000 41 | post_nms_top_n: 1000 42 | 43 | 44 | BBoxHead: 45 | head: Res5Head 46 | roi_extractor: 47 | resolution: 14 48 | sampling_ratio: 0 49 | aligned: True 50 | bbox_assigner: BBoxAssigner 51 | with_pool: true 52 | 53 | BBoxAssigner: 54 | batch_size_per_im: 512 55 | bg_thresh: 0.5 56 | fg_thresh: 0.5 57 | fg_fraction: 0.25 58 | use_random: True 59 | 60 | BBoxPostProcess: 61 | decode: RCNNBox 62 | nms: 63 | name: MultiClassNMS 64 | keep_top_k: 100 65 | score_threshold: 0.05 66 | nms_threshold: 0.5 67 | -------------------------------------------------------------------------------- /configs/faster_rcnn/_base_/faster_rcnn_r50_fpn.yml: -------------------------------------------------------------------------------- 1 | architecture: FasterRCNN 2 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_cos_pretrained.pdparams 3 | 4 | FasterRCNN: 5 | backbone: ResNet 6 | neck: FPN 7 | rpn_head: RPNHead 8 | bbox_head: BBoxHead 9 | # post process 10 | bbox_post_process: BBoxPostProcess 11 | 12 | 13 | ResNet: 14 | # index 0 stands for res2 15 | depth: 50 16 | norm_type: bn 17 | freeze_at: 0 18 | return_idx: [0,1,2,3] 19 | num_stages: 4 20 | 21 | FPN: 22 | out_channel: 256 23 | 24 | RPNHead: 25 | anchor_generator: 26 | aspect_ratios: [0.5, 1.0, 2.0] 27 | anchor_sizes: [[32], [64], [128], [256], [512]] 28 | strides: [4, 8, 16, 32, 64] 29 | rpn_target_assign: 30 | batch_size_per_im: 256 31 | fg_fraction: 0.5 32 | negative_overlap: 0.3 33 | positive_overlap: 0.7 34 | use_random: True 35 | train_proposal: 36 | min_size: 0.0 37 | nms_thresh: 0.7 38 | pre_nms_top_n: 2000 39 | post_nms_top_n: 1000 40 | topk_after_collect: True 41 | test_proposal: 42 | min_size: 0.0 43 | nms_thresh: 0.7 44 | pre_nms_top_n: 1000 45 | post_nms_top_n: 1000 46 | 47 | 48 | BBoxHead: 49 | head: TwoFCHead 50 | roi_extractor: 51 | resolution: 7 52 | sampling_ratio: 0 53 | aligned: True 54 | bbox_assigner: BBoxAssigner 55 | 56 | BBoxAssigner: 57 | batch_size_per_im: 512 58 | bg_thresh: 0.5 59 | fg_thresh: 0.5 60 | fg_fraction: 0.25 61 | use_random: True 62 | 63 | TwoFCHead: 64 | out_channel: 1024 65 | 66 | 67 | BBoxPostProcess: 68 | decode: RCNNBox 69 | nms: 70 | name: MultiClassNMS 71 | keep_top_k: 100 72 | score_threshold: 0.05 73 | nms_threshold: 0.5 74 | -------------------------------------------------------------------------------- /configs/faster_rcnn/_base_/faster_reader.yml: -------------------------------------------------------------------------------- 1 | worker_num: 2 2 | TrainReader: 3 | sample_transforms: 4 | - Decode: {} 5 | - RandomResize: {target_size: [[640, 1333], [672, 1333], [704, 1333], [736, 1333], [768, 1333], [800, 1333]], interp: 2, keep_ratio: True} 6 | - RandomFlip: {prob: 0.5} 7 | - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]} 8 | - Permute: {} 9 | batch_transforms: 10 | - PadBatch: {pad_to_stride: -1, pad_gt: true} 11 | batch_size: 1 12 | shuffle: true 13 | drop_last: true 14 | 15 | 16 | EvalReader: 17 | sample_transforms: 18 | - Decode: {} 19 | - Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True} 20 | - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]} 21 | - Permute: {} 22 | batch_transforms: 23 | - PadBatch: {pad_to_stride: -1, pad_gt: false} 24 | batch_size: 1 25 | shuffle: false 26 | drop_last: false 27 | drop_empty: false 28 | 29 | 30 | TestReader: 31 | sample_transforms: 32 | - Decode: {} 33 | - Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True} 34 | - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]} 35 | - Permute: {} 36 | batch_transforms: 37 | - PadBatch: {pad_to_stride: -1} 38 | batch_size: 1 39 | shuffle: false 40 | drop_last: false 41 | -------------------------------------------------------------------------------- /configs/faster_rcnn/_base_/optimizer_1x.yml: -------------------------------------------------------------------------------- 1 | epoch: 12 2 | 3 | LearningRate: 4 | base_lr: 0.01 5 | schedulers: 6 | - !PiecewiseDecay 7 | gamma: 0.1 8 | milestones: [8, 11] 9 | - !LinearWarmup 10 | start_factor: 0.1 11 | steps: 1000 12 | 13 | OptimizerBuilder: 14 | optimizer: 15 | momentum: 0.9 16 | type: Momentum 17 | regularizer: 18 | factor: 0.0001 19 | type: L2 20 | -------------------------------------------------------------------------------- /configs/faster_rcnn/faster_rcnn_r101_1x_coco.yml: -------------------------------------------------------------------------------- 1 | _BASE_: [ 2 | 'faster_rcnn_r50_1x_coco.yml', 3 | ] 4 | 5 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet101_pretrained.pdparams 6 | weights: output/faster_rcnn_r101_1x_coco/model_final 7 | 8 | ResNet: 9 | # index 0 stands for res2 10 | depth: 101 11 | norm_type: bn 12 | freeze_at: 0 13 | return_idx: [2] 14 | num_stages: 3 15 | -------------------------------------------------------------------------------- /configs/faster_rcnn/faster_rcnn_r101_fpn_1x_coco.yml: -------------------------------------------------------------------------------- 1 | _BASE_: [ 2 | 'faster_rcnn_r50_fpn_1x_coco.yml', 3 | ] 4 | 5 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet101_pretrained.pdparams 6 | weights: output/faster_rcnn_r101_fpn_1x_coco/model_final 7 | 8 | ResNet: 9 | # index 0 stands for res2 10 | depth: 101 11 | norm_type: bn 12 | freeze_at: 0 13 | return_idx: [0,1,2,3] 14 | num_stages: 4 15 | -------------------------------------------------------------------------------- /configs/faster_rcnn/faster_rcnn_r101_fpn_2x_coco.yml: -------------------------------------------------------------------------------- 1 | _BASE_: [ 2 | 'faster_rcnn_r50_fpn_1x_coco.yml', 3 | ] 4 | 5 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet101_pretrained.pdparams 6 | weights: output/faster_rcnn_r101_fpn_2x_coco/model_final 7 | 8 | ResNet: 9 | # index 0 stands for res2 10 | depth: 101 11 | norm_type: bn 12 | freeze_at: 0 13 | return_idx: [0,1,2,3] 14 | num_stages: 4 15 | 16 | epoch: 24 17 | LearningRate: 18 | base_lr: 0.01 19 | schedulers: 20 | - !PiecewiseDecay 21 | gamma: 0.1 22 | milestones: [16, 22] 23 | - !LinearWarmup 24 | start_factor: 0.1 25 | steps: 1000 26 | -------------------------------------------------------------------------------- /configs/faster_rcnn/faster_rcnn_r101_vd_fpn_1x_coco.yml: -------------------------------------------------------------------------------- 1 | _BASE_: [ 2 | 'faster_rcnn_r50_fpn_1x_coco.yml', 3 | ] 4 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet101_vd_pretrained.pdparams 5 | weights: output/faster_rcnn_r101_vd_fpn_1x_coco/model_final 6 | 7 | ResNet: 8 | # index 0 stands for res2 9 | depth: 101 10 | variant: d 11 | norm_type: bn 12 | freeze_at: 0 13 | return_idx: [0,1,2,3] 14 | num_stages: 4 15 | -------------------------------------------------------------------------------- /configs/faster_rcnn/faster_rcnn_r101_vd_fpn_2x_coco.yml: -------------------------------------------------------------------------------- 1 | _BASE_: [ 2 | 'faster_rcnn_r50_fpn_1x_coco.yml', 3 | ] 4 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet101_vd_pretrained.pdparams 5 | weights: output/faster_rcnn_r101_vd_fpn_2x_coco/model_final 6 | 7 | ResNet: 8 | # index 0 stands for res2 9 | depth: 101 10 | variant: d 11 | norm_type: bn 12 | freeze_at: 0 13 | return_idx: [0,1,2,3] 14 | num_stages: 4 15 | 16 | epoch: 24 17 | LearningRate: 18 | base_lr: 0.01 19 | schedulers: 20 | - !PiecewiseDecay 21 | gamma: 0.1 22 | milestones: [16, 22] 23 | - !LinearWarmup 24 | start_factor: 0.1 25 | steps: 1000 26 | -------------------------------------------------------------------------------- /configs/faster_rcnn/faster_rcnn_r34_fpn_1x_coco.yml: -------------------------------------------------------------------------------- 1 | _BASE_: [ 2 | 'faster_rcnn_r50_fpn_1x_coco.yml', 3 | ] 4 | 5 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet34_pretrained.pdparams 6 | weights: output/faster_rcnn_r34_fpn_1x_coco/model_final 7 | 8 | ResNet: 9 | # index 0 stands for res2 10 | depth: 34 11 | norm_type: bn 12 | freeze_at: 0 13 | return_idx: [0,1,2,3] 14 | num_stages: 4 15 | -------------------------------------------------------------------------------- /configs/faster_rcnn/faster_rcnn_r34_vd_fpn_1x_coco.yml: -------------------------------------------------------------------------------- 1 | _BASE_: [ 2 | 'faster_rcnn_r50_fpn_1x_coco.yml', 3 | ] 4 | 5 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet34_vd_pretrained.pdparams 6 | weights: output/faster_rcnn_r34_vd_fpn_1x_coco/model_final 7 | 8 | ResNet: 9 | # index 0 stands for res2 10 | depth: 34 11 | variant: d 12 | norm_type: bn 13 | freeze_at: 0 14 | return_idx: [0,1,2,3] 15 | num_stages: 4 16 | -------------------------------------------------------------------------------- /configs/faster_rcnn/faster_rcnn_r50_1x_coco.yml: -------------------------------------------------------------------------------- 1 | _BASE_: [ 2 | '../datasets/coco_detection.yml', 3 | '../runtime.yml', 4 | '_base_/optimizer_1x.yml', 5 | '_base_/faster_rcnn_r50.yml', 6 | '_base_/faster_reader.yml', 7 | ] 8 | weights: output/faster_rcnn_r50_1x_coco/model_final 9 | -------------------------------------------------------------------------------- /configs/faster_rcnn/faster_rcnn_r50_fpn_1x_coco.yml: -------------------------------------------------------------------------------- 1 | _BASE_: [ 2 | '../datasets/coco_detection.yml', 3 | '../runtime.yml', 4 | '_base_/optimizer_1x.yml', 5 | '_base_/faster_rcnn_r50_fpn.yml', 6 | '_base_/faster_fpn_reader.yml', 7 | ] 8 | weights: output/faster_rcnn_r50_fpn_1x_coco/model_final 9 | -------------------------------------------------------------------------------- /configs/faster_rcnn/faster_rcnn_r50_fpn_2x_coco.yml: -------------------------------------------------------------------------------- 1 | _BASE_: [ 2 | 'faster_rcnn_r50_fpn_1x_coco.yml', 3 | ] 4 | weights: output/faster_rcnn_r50_fpn_2x_coco/model_final 5 | 6 | epoch: 24 7 | LearningRate: 8 | base_lr: 0.01 9 | schedulers: 10 | - !PiecewiseDecay 11 | gamma: 0.1 12 | milestones: [16, 22] 13 | - !LinearWarmup 14 | start_factor: 0.1 15 | steps: 1000 16 | -------------------------------------------------------------------------------- /configs/faster_rcnn/faster_rcnn_r50_vd_1x_coco.yml: -------------------------------------------------------------------------------- 1 | _BASE_: [ 2 | 'faster_rcnn_r50_1x_coco.yml', 3 | ] 4 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_pretrained.pdparams 5 | weights: output/faster_rcnn_r50_vd_1x_coco/model_final 6 | 7 | ResNet: 8 | # index 0 stands for res2 9 | depth: 50 10 | variant: d 11 | norm_type: bn 12 | freeze_at: 0 13 | return_idx: [2] 14 | num_stages: 3 15 | -------------------------------------------------------------------------------- /configs/faster_rcnn/faster_rcnn_r50_vd_fpn_1x_coco.yml: -------------------------------------------------------------------------------- 1 | _BASE_: [ 2 | 'faster_rcnn_r50_fpn_1x_coco.yml', 3 | ] 4 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_pretrained.pdparams 5 | weights: output/faster_rcnn_r50_vd_fpn_1x_coco/model_final 6 | 7 | ResNet: 8 | # index 0 stands for res2 9 | depth: 50 10 | variant: d 11 | norm_type: bn 12 | freeze_at: 0 13 | return_idx: [0,1,2,3] 14 | num_stages: 4 15 | -------------------------------------------------------------------------------- /configs/faster_rcnn/faster_rcnn_r50_vd_fpn_2x_coco.yml: -------------------------------------------------------------------------------- 1 | _BASE_: [ 2 | 'faster_rcnn_r50_fpn_1x_coco.yml', 3 | ] 4 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_pretrained.pdparams 5 | weights: output/faster_rcnn_r50_vd_fpn_2x_coco/model_final 6 | 7 | ResNet: 8 | # index 0 stands for res2 9 | depth: 50 10 | variant: d 11 | norm_type: bn 12 | freeze_at: 0 13 | return_idx: [0,1,2,3] 14 | num_stages: 4 15 | 16 | epoch: 24 17 | LearningRate: 18 | base_lr: 0.01 19 | schedulers: 20 | - !PiecewiseDecay 21 | gamma: 0.1 22 | milestones: [16, 22] 23 | - !LinearWarmup 24 | start_factor: 0.1 25 | steps: 1000 26 | -------------------------------------------------------------------------------- /configs/faster_rcnn/faster_rcnn_r50_vd_fpn_ssld_1x_coco.yml: -------------------------------------------------------------------------------- 1 | _BASE_: [ 2 | '../datasets/coco_detection.yml', 3 | '../runtime.yml', 4 | '_base_/optimizer_1x.yml', 5 | '_base_/faster_rcnn_r50_fpn.yml', 6 | '_base_/faster_fpn_reader.yml', 7 | ] 8 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_ssld_v2_pretrained.pdparams 9 | weights: output/faster_rcnn_r50_vd_fpn_ssld_1x_coco/model_final 10 | 11 | ResNet: 12 | depth: 50 13 | variant: d 14 | norm_type: bn 15 | freeze_at: 0 16 | return_idx: [0,1,2,3] 17 | num_stages: 4 18 | lr_mult_list: [0.05, 0.05, 0.1, 0.15] 19 | 20 | epoch: 12 21 | LearningRate: 22 | base_lr: 0.01 23 | schedulers: 24 | - !PiecewiseDecay 25 | gamma: 0.1 26 | milestones: [8, 11] 27 | - !LinearWarmup 28 | start_factor: 0.1 29 | steps: 1000 30 | -------------------------------------------------------------------------------- /configs/faster_rcnn/faster_rcnn_r50_vd_fpn_ssld_2x_coco.yml: -------------------------------------------------------------------------------- 1 | _BASE_: [ 2 | '../datasets/coco_detection.yml', 3 | '../runtime.yml', 4 | '_base_/optimizer_1x.yml', 5 | '_base_/faster_rcnn_r50_fpn.yml', 6 | '_base_/faster_fpn_reader.yml', 7 | ] 8 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_ssld_v2_pretrained.pdparams 9 | weights: output/faster_rcnn_r50_vd_fpn_ssld_2x_coco/model_final 10 | 11 | ResNet: 12 | depth: 50 13 | variant: d 14 | norm_type: bn 15 | freeze_at: 0 16 | return_idx: [0,1,2,3] 17 | num_stages: 4 18 | lr_mult_list: [0.05, 0.05, 0.1, 0.15] 19 | 20 | epoch: 24 21 | LearningRate: 22 | base_lr: 0.01 23 | schedulers: 24 | - !PiecewiseDecay 25 | gamma: 0.1 26 | milestones: [12, 22] 27 | - !LinearWarmup 28 | start_factor: 0.1 29 | steps: 1000 30 | -------------------------------------------------------------------------------- /configs/faster_rcnn/faster_rcnn_x101_vd_64x4d_fpn_1x_coco.yml: -------------------------------------------------------------------------------- 1 | _BASE_: [ 2 | 'faster_rcnn_r50_fpn_1x_coco.yml', 3 | ] 4 | 5 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNeXt101_vd_64x4d_pretrained.pdparams 6 | weights: output/faster_rcnn_x101_vd_64x4d_fpn_1x_coco/model_final 7 | 8 | ResNet: 9 | # for ResNeXt: groups, base_width, base_channels 10 | depth: 101 11 | groups: 64 12 | base_width: 4 13 | variant: d 14 | norm_type: bn 15 | freeze_at: 0 16 | return_idx: [0,1,2,3] 17 | num_stages: 4 18 | -------------------------------------------------------------------------------- /configs/faster_rcnn/faster_rcnn_x101_vd_64x4d_fpn_2x_coco.yml: -------------------------------------------------------------------------------- 1 | _BASE_: [ 2 | 'faster_rcnn_r50_fpn_1x_coco.yml', 3 | ] 4 | 5 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNeXt101_vd_64x4d_pretrained.pdparams 6 | weights: output/faster_rcnn_x101_vd_64x4d_fpn_2x_coco/model_final 7 | 8 | ResNet: 9 | # for ResNeXt: groups, base_width, base_channels 10 | depth: 101 11 | groups: 64 12 | base_width: 4 13 | variant: d 14 | norm_type: bn 15 | freeze_at: 0 16 | return_idx: [0,1,2,3] 17 | num_stages: 4 18 | 19 | epoch: 24 20 | LearningRate: 21 | base_lr: 0.01 22 | schedulers: 23 | - !PiecewiseDecay 24 | gamma: 0.1 25 | milestones: [16, 22] 26 | - !LinearWarmup 27 | start_factor: 0.1 28 | steps: 1000 29 | -------------------------------------------------------------------------------- /configs/fcos/_base_/fcos_r50_fpn.yml: -------------------------------------------------------------------------------- 1 | architecture: FCOS 2 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_cos_pretrained.pdparams 3 | 4 | FCOS: 5 | backbone: ResNet 6 | neck: FPN 7 | fcos_head: FCOSHead 8 | fcos_post_process: FCOSPostProcess 9 | 10 | ResNet: 11 | # index 0 stands for res2 12 | depth: 50 13 | norm_type: bn 14 | freeze_at: 0 15 | return_idx: [1,2,3] 16 | num_stages: 4 17 | 18 | FPN: 19 | out_channel: 256 20 | spatial_scales: [0.125, 0.0625, 0.03125] 21 | extra_stage: 2 22 | has_extra_convs: true 23 | use_c5: false 24 | 25 | FCOSHead: 26 | fcos_feat: 27 | name: FCOSFeat 28 | feat_in: 256 29 | feat_out: 256 30 | num_convs: 4 31 | norm_type: "gn" 32 | use_dcn: false 33 | num_classes: 80 34 | fpn_stride: [8, 16, 32, 64, 128] 35 | prior_prob: 0.01 36 | fcos_loss: FCOSLoss 37 | norm_reg_targets: true 38 | centerness_on_reg: true 39 | 40 | FCOSLoss: 41 | loss_alpha: 0.25 42 | loss_gamma: 2.0 43 | iou_loss_type: "giou" 44 | reg_weights: 1.0 45 | 46 | FCOSPostProcess: 47 | decode: 48 | name: FCOSBox 49 | num_classes: 80 50 | nms: 51 | name: MultiClassNMS 52 | nms_top_k: 1000 53 | keep_top_k: 100 54 | score_threshold: 0.025 55 | nms_threshold: 0.6 56 | -------------------------------------------------------------------------------- /configs/fcos/_base_/fcos_reader.yml: -------------------------------------------------------------------------------- 1 | worker_num: 2 2 | TrainReader: 3 | sample_transforms: 4 | - Decode: {} 5 | - RandomFlip: {prob: 0.5} 6 | - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]} 7 | - Resize: {target_size: [800, 1333], keep_ratio: true, interp: 1} 8 | - Permute: {} 9 | batch_transforms: 10 | - PadBatch: {pad_to_stride: 128} 11 | - Gt2FCOSTarget: 12 | object_sizes_boundary: [64, 128, 256, 512] 13 | center_sampling_radius: 1.5 14 | downsample_ratios: [8, 16, 32, 64, 128] 15 | norm_reg_targets: True 16 | batch_size: 2 17 | shuffle: true 18 | drop_last: true 19 | 20 | 21 | EvalReader: 22 | sample_transforms: 23 | - Decode: {} 24 | - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]} 25 | - Resize: {interp: 1, target_size: [800, 1333], keep_ratio: True} 26 | - Permute: {} 27 | batch_transforms: 28 | - PadBatch: {pad_to_stride: 128} 29 | batch_size: 1 30 | shuffle: false 31 | 32 | 33 | TestReader: 34 | sample_transforms: 35 | - Decode: {} 36 | - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]} 37 | - Resize: {interp: 1, target_size: [800, 1333], keep_ratio: True} 38 | - Permute: {} 39 | batch_transforms: 40 | - PadBatch: {pad_to_stride: 128} 41 | batch_size: 1 42 | shuffle: false 43 | -------------------------------------------------------------------------------- /configs/fcos/_base_/optimizer_1x.yml: -------------------------------------------------------------------------------- 1 | epoch: 12 2 | 3 | LearningRate: 4 | base_lr: 0.01 5 | schedulers: 6 | - !PiecewiseDecay 7 | gamma: 0.1 8 | milestones: [8, 11] 9 | - !LinearWarmup 10 | start_factor: 0.3333333333333333 11 | steps: 500 12 | 13 | OptimizerBuilder: 14 | optimizer: 15 | momentum: 0.9 16 | type: Momentum 17 | regularizer: 18 | factor: 0.0001 19 | type: L2 20 | -------------------------------------------------------------------------------- /configs/fcos/fcos_dcn_r50_fpn_1x_coco.yml: -------------------------------------------------------------------------------- 1 | _BASE_: [ 2 | '../datasets/coco_detection.yml', 3 | '../runtime.yml', 4 | '_base_/fcos_r50_fpn.yml', 5 | '_base_/optimizer_1x.yml', 6 | '_base_/fcos_reader.yml', 7 | ] 8 | 9 | weights: output/fcos_dcn_r50_fpn_1x_coco/model_final 10 | 11 | ResNet: 12 | depth: 50 13 | norm_type: bn 14 | freeze_at: 0 15 | return_idx: [1,2,3] 16 | num_stages: 4 17 | dcn_v2_stages: [1,2,3] 18 | 19 | FCOSHead: 20 | fcos_feat: 21 | name: FCOSFeat 22 | feat_in: 256 23 | feat_out: 256 24 | num_convs: 4 25 | norm_type: "gn" 26 | use_dcn: true 27 | num_classes: 80 28 | fpn_stride: [8, 16, 32, 64, 128] 29 | prior_prob: 0.01 30 | fcos_loss: FCOSLoss 31 | norm_reg_targets: true 32 | centerness_on_reg: true 33 | -------------------------------------------------------------------------------- /configs/fcos/fcos_r50_fpn_1x_coco.yml: -------------------------------------------------------------------------------- 1 | _BASE_: [ 2 | '../datasets/coco_detection.yml', 3 | '../runtime.yml', 4 | '_base_/fcos_r50_fpn.yml', 5 | '_base_/optimizer_1x.yml', 6 | '_base_/fcos_reader.yml', 7 | ] 8 | 9 | weights: output/fcos_r50_fpn_1x_coco/model_final 10 | -------------------------------------------------------------------------------- /configs/fcos/fcos_r50_fpn_multiscale_2x_coco.yml: -------------------------------------------------------------------------------- 1 | _BASE_: [ 2 | '../datasets/coco_detection.yml', 3 | '../runtime.yml', 4 | '_base_/fcos_r50_fpn.yml', 5 | '_base_/optimizer_1x.yml', 6 | '_base_/fcos_reader.yml', 7 | ] 8 | 9 | weights: output/fcos_r50_fpn_multiscale_2x_coco/model_final 10 | 11 | TrainReader: 12 | sample_transforms: 13 | - Decode: {} 14 | - RandomFlip: {prob: 0.5} 15 | - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]} 16 | - RandomResize: {target_size: [[640, 1333], [672, 1333], [704, 1333], [736, 1333], [768, 1333], [800, 1333]], keep_ratio: true, interp: 1} 17 | - Permute: {} 18 | batch_transforms: 19 | - PadBatch: {pad_to_stride: 128} 20 | - Gt2FCOSTarget: 21 | object_sizes_boundary: [64, 128, 256, 512] 22 | center_sampling_radius: 1.5 23 | downsample_ratios: [8, 16, 32, 64, 128] 24 | norm_reg_targets: True 25 | batch_size: 2 26 | shuffle: true 27 | drop_last: true 28 | 29 | epoch: 24 30 | 31 | LearningRate: 32 | base_lr: 0.01 33 | schedulers: 34 | - !PiecewiseDecay 35 | gamma: 0.1 36 | milestones: [16, 22] 37 | - !LinearWarmup 38 | start_factor: 0.3333333333333333 39 | steps: 500 40 | -------------------------------------------------------------------------------- /configs/gn/README.md: -------------------------------------------------------------------------------- 1 | # Group Normalization 2 | 3 | ## Model Zoo 4 | 5 | | 骨架网络 | 网络类型 | 每张GPU图片个数 | 学习率策略 |推理时间(fps)| Box AP | Mask AP | 下载 | 配置文件 | 6 | | :------------- | :------------- | :-----------: | :------: | :--------: |:-----: | :-----: | :----: | :----: | 7 | | ResNet50-FPN | Faster | 1 | 2x | - | 41.9 | - | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_r50_fpn_gn_2x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/gn/faster_rcnn_r50_fpn_gn_2x_coco.yml) | 8 | | ResNet50-FPN | Mask | 1 | 2x | - | 42.3 | 38.4 | [下载链接](https://paddledet.bj.bcebos.com/models/mask_rcnn_r50_fpn_gn_2x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/gn/mask_rcnn_r50_fpn_gn_2x_coco.yml) | 9 | | ResNet50-FPN | Cascade Faster | 1 | 2x | - | 44.6 | - | [下载链接](https://paddledet.bj.bcebos.com/models/cascade_rcnn_r50_fpn_gn_2x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/gn/cascade_rcnn_r50_fpn_gn_2x_coco.yml) | 10 | | ResNet50-FPN | Cacade Mask | 1 | 2x | - | 45.0 | 39.3 | [下载链接](https://paddledet.bj.bcebos.com/models/cascade_mask_rcnn_r50_fpn_gn_2x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/gn/cascade_mask_rcnn_r50_fpn_gn_2x_coco.yml) | 11 | 12 | 13 | **注意:** Faster R-CNN baseline仅使用 `2fc` head,而此处使用[`4conv1fc` head](https://arxiv.org/abs/1803.08494)(4层conv之间使用GN),并且FPN也使用GN,而对于Mask R-CNN是在mask head的4层conv之间也使用GN。 14 | 15 | ## Citations 16 | ``` 17 | @inproceedings{wu2018group, 18 | title={Group Normalization}, 19 | author={Wu, Yuxin and He, Kaiming}, 20 | booktitle={Proceedings of the European Conference on Computer Vision (ECCV)}, 21 | year={2018} 22 | } 23 | ``` 24 | -------------------------------------------------------------------------------- /configs/gn/cascade_mask_rcnn_r50_fpn_gn_2x_coco.yml: -------------------------------------------------------------------------------- 1 | _BASE_: [ 2 | '../datasets/coco_instance.yml', 3 | '../runtime.yml', 4 | '../cascade_rcnn/_base_/optimizer_1x.yml', 5 | '../cascade_rcnn/_base_/cascade_mask_rcnn_r50_fpn.yml', 6 | '../cascade_rcnn/_base_/cascade_mask_fpn_reader.yml', 7 | ] 8 | weights: output/cascade_mask_rcnn_r50_fpn_gn_2x_coco/model_final 9 | 10 | CascadeRCNN: 11 | backbone: ResNet 12 | neck: FPN 13 | rpn_head: RPNHead 14 | bbox_head: CascadeHead 15 | mask_head: MaskHead 16 | # post process 17 | bbox_post_process: BBoxPostProcess 18 | mask_post_process: MaskPostProcess 19 | 20 | FPN: 21 | out_channel: 256 22 | norm_type: gn 23 | 24 | CascadeHead: 25 | head: CascadeXConvNormHead 26 | roi_extractor: 27 | resolution: 7 28 | sampling_ratio: 0 29 | aligned: True 30 | bbox_assigner: BBoxAssigner 31 | 32 | CascadeXConvNormHead: 33 | num_convs: 4 34 | out_channel: 1024 35 | norm_type: gn 36 | 37 | MaskHead: 38 | head: MaskFeat 39 | roi_extractor: 40 | resolution: 14 41 | sampling_ratio: 0 42 | aligned: True 43 | mask_assigner: MaskAssigner 44 | share_bbox_feat: False 45 | 46 | MaskFeat: 47 | num_convs: 4 48 | out_channel: 256 49 | norm_type: gn 50 | 51 | 52 | epoch: 24 53 | LearningRate: 54 | base_lr: 0.01 55 | schedulers: 56 | - !PiecewiseDecay 57 | gamma: 0.1 58 | milestones: [16, 22] 59 | - !LinearWarmup 60 | start_factor: 0.1 61 | steps: 1000 62 | -------------------------------------------------------------------------------- /configs/gn/cascade_rcnn_r50_fpn_gn_2x_coco.yml: -------------------------------------------------------------------------------- 1 | _BASE_: [ 2 | '../datasets/coco_detection.yml', 3 | '../runtime.yml', 4 | '../cascade_rcnn/_base_/optimizer_1x.yml', 5 | '../cascade_rcnn/_base_/cascade_rcnn_r50_fpn.yml', 6 | '../cascade_rcnn/_base_/cascade_fpn_reader.yml', 7 | ] 8 | weights: output/cascade_rcnn_r50_fpn_gn_2x_coco/model_final 9 | 10 | FPN: 11 | out_channel: 256 12 | norm_type: gn 13 | 14 | CascadeHead: 15 | head: CascadeXConvNormHead 16 | roi_extractor: 17 | resolution: 7 18 | sampling_ratio: 0 19 | aligned: True 20 | bbox_assigner: BBoxAssigner 21 | 22 | CascadeXConvNormHead: 23 | num_convs: 4 24 | out_channel: 1024 25 | norm_type: gn 26 | 27 | 28 | epoch: 24 29 | LearningRate: 30 | base_lr: 0.01 31 | schedulers: 32 | - !PiecewiseDecay 33 | gamma: 0.1 34 | milestones: [16, 22] 35 | - !LinearWarmup 36 | start_factor: 0.1 37 | steps: 1000 38 | -------------------------------------------------------------------------------- /configs/gn/faster_rcnn_r50_fpn_gn_2x_coco.yml: -------------------------------------------------------------------------------- 1 | _BASE_: [ 2 | '../datasets/coco_detection.yml', 3 | '../runtime.yml', 4 | '../faster_rcnn/_base_/optimizer_1x.yml', 5 | '../faster_rcnn/_base_/faster_rcnn_r50_fpn.yml', 6 | '../faster_rcnn/_base_/faster_fpn_reader.yml', 7 | ] 8 | weights: output/faster_rcnn_r50_fpn_gn_2x_coco/model_final 9 | 10 | FasterRCNN: 11 | backbone: ResNet 12 | neck: FPN 13 | rpn_head: RPNHead 14 | bbox_head: BBoxHead 15 | # post process 16 | bbox_post_process: BBoxPostProcess 17 | 18 | FPN: 19 | out_channel: 256 20 | norm_type: gn 21 | 22 | BBoxHead: 23 | head: XConvNormHead 24 | roi_extractor: 25 | resolution: 7 26 | sampling_ratio: 0 27 | aligned: True 28 | bbox_assigner: BBoxAssigner 29 | 30 | XConvNormHead: 31 | num_convs: 4 32 | out_channel: 1024 33 | norm_type: gn 34 | 35 | 36 | epoch: 24 37 | LearningRate: 38 | base_lr: 0.01 39 | schedulers: 40 | - !PiecewiseDecay 41 | gamma: 0.1 42 | milestones: [16, 22] 43 | - !LinearWarmup 44 | start_factor: 0.1 45 | steps: 1000 46 | -------------------------------------------------------------------------------- /configs/gn/mask_rcnn_r50_fpn_gn_2x_coco.yml: -------------------------------------------------------------------------------- 1 | _BASE_: [ 2 | '../datasets/coco_instance.yml', 3 | '../runtime.yml', 4 | '../mask_rcnn/_base_/optimizer_1x.yml', 5 | '../mask_rcnn/_base_/mask_rcnn_r50_fpn.yml', 6 | '../mask_rcnn/_base_/mask_fpn_reader.yml', 7 | ] 8 | weights: output/mask_rcnn_r50_fpn_gn_2x_coco/model_final 9 | 10 | MaskRCNN: 11 | backbone: ResNet 12 | neck: FPN 13 | rpn_head: RPNHead 14 | bbox_head: BBoxHead 15 | mask_head: MaskHead 16 | # post process 17 | bbox_post_process: BBoxPostProcess 18 | mask_post_process: MaskPostProcess 19 | 20 | FPN: 21 | out_channel: 256 22 | norm_type: gn 23 | 24 | BBoxHead: 25 | head: XConvNormHead 26 | roi_extractor: 27 | resolution: 7 28 | sampling_ratio: 0 29 | aligned: True 30 | bbox_assigner: BBoxAssigner 31 | 32 | XConvNormHead: 33 | num_convs: 4 34 | out_channel: 1024 35 | norm_type: gn 36 | 37 | MaskHead: 38 | head: MaskFeat 39 | roi_extractor: 40 | resolution: 14 41 | sampling_ratio: 0 42 | aligned: True 43 | mask_assigner: MaskAssigner 44 | share_bbox_feat: False 45 | 46 | MaskFeat: 47 | num_convs: 4 48 | out_channel: 256 49 | norm_type: gn 50 | 51 | 52 | epoch: 24 53 | LearningRate: 54 | base_lr: 0.01 55 | schedulers: 56 | - !PiecewiseDecay 57 | gamma: 0.1 58 | milestones: [16, 22] 59 | - !LinearWarmup 60 | start_factor: 0.1 61 | steps: 1000 62 | -------------------------------------------------------------------------------- /configs/hrnet/_base_/faster_rcnn_hrnetv2p_w18.yml: -------------------------------------------------------------------------------- 1 | architecture: FasterRCNN 2 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/HRNet_W18_C_pretrained.pdparams 3 | 4 | FasterRCNN: 5 | backbone: HRNet 6 | neck: HRFPN 7 | rpn_head: RPNHead 8 | bbox_head: BBoxHead 9 | # post process 10 | bbox_post_process: BBoxPostProcess 11 | 12 | HRNet: 13 | width: 18 14 | freeze_at: 0 15 | return_idx: [0, 1, 2, 3] 16 | 17 | HRFPN: 18 | out_channel: 256 19 | share_conv: false 20 | 21 | RPNHead: 22 | anchor_generator: 23 | aspect_ratios: [0.5, 1.0, 2.0] 24 | anchor_sizes: [[32], [64], [128], [256], [512]] 25 | strides: [4, 8, 16, 32, 64] 26 | rpn_target_assign: 27 | batch_size_per_im: 256 28 | fg_fraction: 0.5 29 | negative_overlap: 0.3 30 | positive_overlap: 0.7 31 | use_random: True 32 | train_proposal: 33 | min_size: 0.0 34 | nms_thresh: 0.7 35 | pre_nms_top_n: 2000 36 | post_nms_top_n: 2000 37 | topk_after_collect: True 38 | test_proposal: 39 | min_size: 0.0 40 | nms_thresh: 0.7 41 | pre_nms_top_n: 1000 42 | post_nms_top_n: 1000 43 | 44 | BBoxHead: 45 | head: TwoFCHead 46 | roi_extractor: 47 | resolution: 7 48 | sampling_ratio: 0 49 | aligned: True 50 | bbox_assigner: BBoxAssigner 51 | 52 | BBoxAssigner: 53 | batch_size_per_im: 512 54 | bg_thresh: 0.5 55 | fg_thresh: 0.5 56 | fg_fraction: 0.25 57 | use_random: True 58 | 59 | TwoFCHead: 60 | out_channel: 1024 61 | 62 | BBoxPostProcess: 63 | decode: RCNNBox 64 | nms: 65 | name: MultiClassNMS 66 | keep_top_k: 100 67 | score_threshold: 0.05 68 | nms_threshold: 0.5 69 | -------------------------------------------------------------------------------- /configs/hrnet/faster_rcnn_hrnetv2p_w18_1x_coco.yml: -------------------------------------------------------------------------------- 1 | _BASE_: [ 2 | '../datasets/coco_detection.yml', 3 | './_base_/faster_rcnn_hrnetv2p_w18.yml', 4 | '../faster_rcnn/_base_/optimizer_1x.yml', 5 | '../faster_rcnn/_base_/faster_fpn_reader.yml', 6 | '../runtime.yml', 7 | ] 8 | 9 | weights: output/faster_rcnn_hrnetv2p_w18_1x_coco/model_final 10 | epoch: 12 11 | 12 | LearningRate: 13 | base_lr: 0.02 14 | schedulers: 15 | - !PiecewiseDecay 16 | gamma: 0.1 17 | milestones: [8, 11] 18 | - !LinearWarmup 19 | start_factor: 0.1 20 | steps: 1000 21 | 22 | TrainReader: 23 | batch_size: 2 24 | -------------------------------------------------------------------------------- /configs/hrnet/faster_rcnn_hrnetv2p_w18_2x_coco.yml: -------------------------------------------------------------------------------- 1 | _BASE_: [ 2 | '../datasets/coco_detection.yml', 3 | './_base_/faster_rcnn_hrnetv2p_w18.yml', 4 | '../faster_rcnn/_base_/optimizer_1x.yml', 5 | '../faster_rcnn/_base_/faster_fpn_reader.yml', 6 | '../runtime.yml', 7 | ] 8 | 9 | weights: output/faster_rcnn_hrnetv2p_w18_2x_coco/model_final 10 | epoch: 24 11 | 12 | LearningRate: 13 | base_lr: 0.02 14 | schedulers: 15 | - !PiecewiseDecay 16 | gamma: 0.1 17 | milestones: [16, 22] 18 | - !LinearWarmup 19 | start_factor: 0.1 20 | steps: 1000 21 | 22 | TrainReader: 23 | batch_size: 2 24 | -------------------------------------------------------------------------------- /configs/mask_rcnn/_base_/mask_fpn_reader.yml: -------------------------------------------------------------------------------- 1 | worker_num: 2 2 | TrainReader: 3 | sample_transforms: 4 | - Decode: {} 5 | - RandomResize: {target_size: [[640, 1333], [672, 1333], [704, 1333], [736, 1333], [768, 1333], [800, 1333]], interp: 2, keep_ratio: True} 6 | - RandomFlip: {prob: 0.5} 7 | - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]} 8 | - Permute: {} 9 | batch_transforms: 10 | - PadBatch: {pad_to_stride: 32, pad_gt: true} 11 | batch_size: 1 12 | shuffle: true 13 | drop_last: true 14 | 15 | EvalReader: 16 | sample_transforms: 17 | - Decode: {} 18 | - Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True} 19 | - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]} 20 | - Permute: {} 21 | batch_transforms: 22 | - PadBatch: {pad_to_stride: 32, pad_gt: false} 23 | batch_size: 1 24 | shuffle: false 25 | drop_last: false 26 | drop_empty: false 27 | 28 | 29 | TestReader: 30 | sample_transforms: 31 | - Decode: {} 32 | - Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True} 33 | - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]} 34 | - Permute: {} 35 | batch_transforms: 36 | - PadBatch: {pad_to_stride: 32, pad_gt: false} 37 | batch_size: 1 38 | shuffle: false 39 | drop_last: false 40 | -------------------------------------------------------------------------------- /configs/mask_rcnn/_base_/mask_reader.yml: -------------------------------------------------------------------------------- 1 | worker_num: 2 2 | TrainReader: 3 | sample_transforms: 4 | - Decode: {} 5 | - RandomResize: {target_size: [[640, 1333], [672, 1333], [704, 1333], [736, 1333], [768, 1333], [800, 1333]], interp: 2, keep_ratio: True} 6 | - RandomFlip: {prob: 0.5} 7 | - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]} 8 | - Permute: {} 9 | batch_transforms: 10 | - PadBatch: {pad_to_stride: -1, pad_gt: true} 11 | batch_size: 1 12 | shuffle: true 13 | drop_last: true 14 | 15 | 16 | EvalReader: 17 | sample_transforms: 18 | - Decode: {} 19 | - Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True} 20 | - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]} 21 | - Permute: {} 22 | batch_transforms: 23 | - PadBatch: {pad_to_stride: -1} 24 | batch_size: 1 25 | shuffle: false 26 | drop_last: false 27 | drop_empty: false 28 | 29 | 30 | TestReader: 31 | sample_transforms: 32 | - Decode: {} 33 | - Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True} 34 | - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]} 35 | - Permute: {} 36 | batch_transforms: 37 | - PadBatch: {pad_to_stride: -1} 38 | batch_size: 1 39 | shuffle: false 40 | drop_last: false 41 | drop_empty: false 42 | -------------------------------------------------------------------------------- /configs/mask_rcnn/_base_/optimizer_1x.yml: -------------------------------------------------------------------------------- 1 | epoch: 12 2 | 3 | LearningRate: 4 | base_lr: 0.01 5 | schedulers: 6 | - !PiecewiseDecay 7 | gamma: 0.1 8 | milestones: [8, 11] 9 | - !LinearWarmup 10 | start_factor: 0.001 11 | steps: 1000 12 | 13 | OptimizerBuilder: 14 | optimizer: 15 | momentum: 0.9 16 | type: Momentum 17 | regularizer: 18 | factor: 0.0001 19 | type: L2 20 | -------------------------------------------------------------------------------- /configs/mask_rcnn/mask_rcnn_r101_fpn_1x_coco.yml: -------------------------------------------------------------------------------- 1 | _BASE_: [ 2 | 'mask_rcnn_r50_fpn_1x_coco.yml', 3 | ] 4 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet101_pretrained.pdparams 5 | weights: output/mask_rcnn_r101_fpn_1x_coco/model_final 6 | 7 | ResNet: 8 | # index 0 stands for res2 9 | depth: 101 10 | norm_type: bn 11 | freeze_at: 0 12 | return_idx: [0,1,2,3] 13 | num_stages: 4 14 | -------------------------------------------------------------------------------- /configs/mask_rcnn/mask_rcnn_r101_vd_fpn_1x_coco.yml: -------------------------------------------------------------------------------- 1 | _BASE_: [ 2 | 'mask_rcnn_r50_fpn_1x_coco.yml', 3 | ] 4 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet101_vd_pretrained.pdparams 5 | weights: output/mask_rcnn_r101_vd_fpn_1x_coco/model_final 6 | 7 | ResNet: 8 | # index 0 stands for res2 9 | depth: 101 10 | variant: d 11 | norm_type: bn 12 | freeze_at: 0 13 | return_idx: [0,1,2,3] 14 | num_stages: 4 15 | -------------------------------------------------------------------------------- /configs/mask_rcnn/mask_rcnn_r50_1x_coco.yml: -------------------------------------------------------------------------------- 1 | _BASE_: [ 2 | '../datasets/coco_instance.yml', 3 | '../runtime.yml', 4 | '_base_/optimizer_1x.yml', 5 | '_base_/mask_rcnn_r50.yml', 6 | '_base_/mask_reader.yml', 7 | ] 8 | weights: output/mask_rcnn_r50_1x_coco/model_final 9 | -------------------------------------------------------------------------------- /configs/mask_rcnn/mask_rcnn_r50_2x_coco.yml: -------------------------------------------------------------------------------- 1 | _BASE_: [ 2 | 'mask_rcnn_r50_1x_coco.yml', 3 | ] 4 | weights: output/mask_rcnn_r50_2x_coco/model_final 5 | 6 | epoch: 24 7 | LearningRate: 8 | base_lr: 0.01 9 | schedulers: 10 | - !PiecewiseDecay 11 | gamma: 0.1 12 | milestones: [16, 22] 13 | - !LinearWarmup 14 | start_factor: 0.3333333333333333 15 | steps: 500 16 | -------------------------------------------------------------------------------- /configs/mask_rcnn/mask_rcnn_r50_fpn_1x_coco.yml: -------------------------------------------------------------------------------- 1 | _BASE_: [ 2 | '../datasets/coco_instance.yml', 3 | '../runtime.yml', 4 | '_base_/optimizer_1x.yml', 5 | '_base_/mask_rcnn_r50_fpn.yml', 6 | '_base_/mask_fpn_reader.yml', 7 | ] 8 | weights: output/mask_rcnn_r50_fpn_1x_coco/model_final 9 | -------------------------------------------------------------------------------- /configs/mask_rcnn/mask_rcnn_r50_fpn_2x_coco.yml: -------------------------------------------------------------------------------- 1 | _BASE_: [ 2 | 'mask_rcnn_r50_fpn_1x_coco.yml', 3 | ] 4 | weights: output/mask_rcnn_r50_fpn_2x_coco/model_final 5 | 6 | epoch: 24 7 | LearningRate: 8 | base_lr: 0.01 9 | schedulers: 10 | - !PiecewiseDecay 11 | gamma: 0.1 12 | milestones: [16, 22] 13 | - !LinearWarmup 14 | start_factor: 0.3333333333333333 15 | steps: 500 16 | -------------------------------------------------------------------------------- /configs/mask_rcnn/mask_rcnn_r50_vd_fpn_1x_coco.yml: -------------------------------------------------------------------------------- 1 | _BASE_: [ 2 | 'mask_rcnn_r50_fpn_1x_coco.yml', 3 | ] 4 | 5 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_pretrained.pdparams 6 | weights: output/mask_rcnn_r50_vd_fpn_1x_coco/model_final 7 | 8 | ResNet: 9 | # index 0 stands for res2 10 | depth: 50 11 | variant: d 12 | norm_type: bn 13 | freeze_at: 0 14 | return_idx: [0,1,2,3] 15 | num_stages: 4 16 | -------------------------------------------------------------------------------- /configs/mask_rcnn/mask_rcnn_r50_vd_fpn_2x_coco.yml: -------------------------------------------------------------------------------- 1 | _BASE_: [ 2 | 'mask_rcnn_r50_fpn_1x_coco.yml', 3 | ] 4 | 5 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_pretrained.pdparams 6 | weights: output/mask_rcnn_r50_vd_fpn_2x_coco/model_final 7 | 8 | ResNet: 9 | # index 0 stands for res2 10 | depth: 50 11 | variant: d 12 | norm_type: bn 13 | freeze_at: 0 14 | return_idx: [0,1,2,3] 15 | num_stages: 4 16 | 17 | epoch: 24 18 | LearningRate: 19 | base_lr: 0.01 20 | schedulers: 21 | - !PiecewiseDecay 22 | gamma: 0.1 23 | milestones: [16, 22] 24 | - !LinearWarmup 25 | start_factor: 0.3333333333333333 26 | steps: 500 27 | -------------------------------------------------------------------------------- /configs/mask_rcnn/mask_rcnn_r50_vd_fpn_ssld_1x_coco.yml: -------------------------------------------------------------------------------- 1 | _BASE_: [ 2 | '../datasets/coco_instance.yml', 3 | '../runtime.yml', 4 | '_base_/optimizer_1x.yml', 5 | '_base_/mask_rcnn_r50_fpn.yml', 6 | '_base_/mask_fpn_reader.yml', 7 | ] 8 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_ssld_v2_pretrained.pdparams 9 | weights: output/mask_rcnn_r50_vd_fpn_ssld_1x_coco/model_final 10 | 11 | ResNet: 12 | depth: 50 13 | variant: d 14 | norm_type: bn 15 | freeze_at: 0 16 | return_idx: [0,1,2,3] 17 | num_stages: 4 18 | lr_mult_list: [0.05, 0.05, 0.1, 0.15] 19 | 20 | epoch: 12 21 | LearningRate: 22 | base_lr: 0.01 23 | schedulers: 24 | - !PiecewiseDecay 25 | gamma: 0.1 26 | milestones: [8, 11] 27 | - !LinearWarmup 28 | start_factor: 0.1 29 | steps: 1000 30 | -------------------------------------------------------------------------------- /configs/mask_rcnn/mask_rcnn_r50_vd_fpn_ssld_2x_coco.yml: -------------------------------------------------------------------------------- 1 | _BASE_: [ 2 | '../datasets/coco_instance.yml', 3 | '../runtime.yml', 4 | '_base_/optimizer_1x.yml', 5 | '_base_/mask_rcnn_r50_fpn.yml', 6 | '_base_/mask_fpn_reader.yml', 7 | ] 8 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_ssld_v2_pretrained.pdparams 9 | weights: output/mask_rcnn_r50_vd_fpn_ssld_2x_coco/model_final 10 | 11 | ResNet: 12 | depth: 50 13 | variant: d 14 | norm_type: bn 15 | freeze_at: 0 16 | return_idx: [0,1,2,3] 17 | num_stages: 4 18 | lr_mult_list: [0.05, 0.05, 0.1, 0.15] 19 | 20 | epoch: 24 21 | LearningRate: 22 | base_lr: 0.01 23 | schedulers: 24 | - !PiecewiseDecay 25 | gamma: 0.1 26 | milestones: [12, 22] 27 | - !LinearWarmup 28 | start_factor: 0.1 29 | steps: 1000 30 | -------------------------------------------------------------------------------- /configs/mask_rcnn/mask_rcnn_x101_vd_64x4d_fpn_1x_coco.yml: -------------------------------------------------------------------------------- 1 | _BASE_: [ 2 | 'mask_rcnn_r50_fpn_1x_coco.yml', 3 | ] 4 | 5 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNeXt101_vd_64x4d_pretrained.pdparams 6 | weights: output/mask_rcnn_x101_vd_64x4d_fpn_1x_coco/model_final 7 | 8 | ResNet: 9 | # for ResNeXt: groups, base_width, base_channels 10 | depth: 101 11 | variant: d 12 | groups: 64 13 | base_width: 4 14 | norm_type: bn 15 | freeze_at: 0 16 | return_idx: [0,1,2,3] 17 | num_stages: 4 18 | 19 | epoch: 12 20 | LearningRate: 21 | base_lr: 0.01 22 | schedulers: 23 | - !PiecewiseDecay 24 | gamma: 0.1 25 | milestones: [8, 11] 26 | - !LinearWarmup 27 | start_factor: 0.1 28 | steps: 1000 29 | -------------------------------------------------------------------------------- /configs/mask_rcnn/mask_rcnn_x101_vd_64x4d_fpn_2x_coco.yml: -------------------------------------------------------------------------------- 1 | _BASE_: [ 2 | 'mask_rcnn_r50_fpn_1x_coco.yml', 3 | ] 4 | 5 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNeXt101_vd_64x4d_pretrained.pdparams 6 | weights: output/mask_rcnn_x101_vd_64x4d_fpn_2x_coco/model_final 7 | 8 | ResNet: 9 | # for ResNeXt: groups, base_width, base_channels 10 | depth: 101 11 | variant: d 12 | groups: 64 13 | base_width: 4 14 | norm_type: bn 15 | freeze_at: 0 16 | return_idx: [0,1,2,3] 17 | num_stages: 4 18 | 19 | epoch: 24 20 | LearningRate: 21 | base_lr: 0.01 22 | schedulers: 23 | - !PiecewiseDecay 24 | gamma: 0.1 25 | milestones: [16, 22] 26 | - !LinearWarmup 27 | start_factor: 0.1 28 | steps: 1000 29 | -------------------------------------------------------------------------------- /configs/pedestrian/README_cn.md: -------------------------------------------------------------------------------- 1 | [English](README.md) | 简体中文 2 | # 特色垂类检测模型 3 | 4 | 我们提供了针对不同场景的基于PaddlePaddle的检测模型,用户可以下载模型进行使用。 5 | 6 | | 任务 | 算法 | 精度(Box AP) | 下载 | 配置文件 | 7 | |:---------------------|:---------:|:------:| :---------------------------------------------------------------------------------: | :------:| 8 | | 行人检测 | YOLOv3 | 51.8 | [下载链接](https://paddledet.bj.bcebos.com/models/pedestrian_yolov3_darknet.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/pedestrian/pedestrian_yolov3_darknet.yml) | 9 | 10 | ## 行人检测(Pedestrian Detection) 11 | 12 | 行人检测的主要应用有智能监控。在监控场景中,大多是从公共区域的监控摄像头视角拍摄行人,获取图像后再进行行人检测。 13 | 14 | ### 1. 模型结构 15 | 16 | Backbone为Dacknet53的YOLOv3。 17 | 18 | 19 | ### 2. 训练参数配置 20 | 21 | PaddleDetection提供了使用COCO数据集对YOLOv3进行训练的参数配置文件[yolov3_darknet53_270e_coco.yml](https://github.com/PaddlePaddle/PaddleDetection/blob/release/2.0/configs/yolov3/yolov3_darknet53_270e_coco.yml),与之相比,在进行行人检测的模型训练时,我们对以下参数进行了修改: 22 | 23 | * num_classes: 1 24 | * dataset_dir: dataset/pedestrian 25 | 26 | ### 2. 精度指标 27 | 28 | 模型在我们针对监控场景的内部数据上精度指标为: 29 | 30 | IOU=.5时的AP为 0.792。 31 | 32 | IOU=.5-.95时的AP为 0.518。 33 | 34 | ### 3. 预测 35 | 36 | 用户可以使用我们训练好的模型进行行人检测: 37 | 38 | ``` 39 | export CUDA_VISIBLE_DEVICES=0 40 | python -u tools/infer.py -c configs/pedestrian/pedestrian_yolov3_darknet.yml \ 41 | -o weights=https://paddledet.bj.bcebos.com/models/pedestrian_yolov3_darknet.pdparams \ 42 | --infer_dir configs/pedestrian/demo \ 43 | --draw_threshold 0.3 \ 44 | --output_dir configs/pedestrian/demo/output 45 | ``` 46 | 47 | 预测结果示例: 48 | 49 | ![](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/static/docs/images/PedestrianDetection_001.png) 50 | 51 | ![](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/static/docs/images/PedestrianDetection_004.png) 52 | -------------------------------------------------------------------------------- /configs/pedestrian/demo/001.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dddlli/Swin-Transformer-Object-Detection-PaddlePaddle/67244dd49a523146b4bd33e0ed5b97072c727609/configs/pedestrian/demo/001.png -------------------------------------------------------------------------------- /configs/pedestrian/demo/002.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dddlli/Swin-Transformer-Object-Detection-PaddlePaddle/67244dd49a523146b4bd33e0ed5b97072c727609/configs/pedestrian/demo/002.png -------------------------------------------------------------------------------- /configs/pedestrian/demo/003.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dddlli/Swin-Transformer-Object-Detection-PaddlePaddle/67244dd49a523146b4bd33e0ed5b97072c727609/configs/pedestrian/demo/003.png -------------------------------------------------------------------------------- /configs/pedestrian/demo/004.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dddlli/Swin-Transformer-Object-Detection-PaddlePaddle/67244dd49a523146b4bd33e0ed5b97072c727609/configs/pedestrian/demo/004.png -------------------------------------------------------------------------------- /configs/pedestrian/pedestrian.json: -------------------------------------------------------------------------------- 1 | { 2 | "images": [], 3 | "annotations": [], 4 | "categories": [ 5 | { 6 | "supercategory": "component", 7 | "id": 1, 8 | "name": "pedestrian" 9 | } 10 | ] 11 | } 12 | -------------------------------------------------------------------------------- /configs/pedestrian/pedestrian_yolov3_darknet.yml: -------------------------------------------------------------------------------- 1 | _BASE_: [ 2 | '../datasets/coco_detection.yml', 3 | '../runtime.yml', 4 | '../yolov3/_base_/optimizer_270e.yml', 5 | '../yolov3/_base_/yolov3_darknet53.yml', 6 | '../yolov3/_base_/yolov3_reader.yml', 7 | ] 8 | 9 | snapshot_epoch: 5 10 | weights: https://paddledet.bj.bcebos.com/models/pedestrian_yolov3_darknet.pdparams 11 | 12 | num_classes: 1 13 | 14 | TrainDataset: 15 | !COCODataSet 16 | dataset_dir: dataset/pedestrian 17 | anno_path: annotations/instances_train2017.json 18 | image_dir: train2017 19 | data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd'] 20 | 21 | EvalDataset: 22 | !COCODataSet 23 | dataset_dir: dataset/pedestrian 24 | anno_path: annotations/instances_val2017.json 25 | image_dir: val2017 26 | 27 | TestDataset: 28 | !ImageFolder 29 | anno_path: configs/pedestrian/pedestrian.json 30 | -------------------------------------------------------------------------------- /configs/ppyolo/_base_/optimizer_1x.yml: -------------------------------------------------------------------------------- 1 | epoch: 405 2 | 3 | LearningRate: 4 | base_lr: 0.01 5 | schedulers: 6 | - !PiecewiseDecay 7 | gamma: 0.1 8 | milestones: 9 | - 243 10 | - 324 11 | - !LinearWarmup 12 | start_factor: 0. 13 | steps: 4000 14 | 15 | OptimizerBuilder: 16 | optimizer: 17 | momentum: 0.9 18 | type: Momentum 19 | regularizer: 20 | factor: 0.0005 21 | type: L2 22 | -------------------------------------------------------------------------------- /configs/ppyolo/_base_/optimizer_2x.yml: -------------------------------------------------------------------------------- 1 | epoch: 811 2 | 3 | LearningRate: 4 | base_lr: 0.01 5 | schedulers: 6 | - !PiecewiseDecay 7 | gamma: 0.1 8 | milestones: 9 | - 649 10 | - 730 11 | - !LinearWarmup 12 | start_factor: 0. 13 | steps: 4000 14 | 15 | OptimizerBuilder: 16 | optimizer: 17 | momentum: 0.9 18 | type: Momentum 19 | regularizer: 20 | factor: 0.0005 21 | type: L2 22 | -------------------------------------------------------------------------------- /configs/ppyolo/_base_/optimizer_365e.yml: -------------------------------------------------------------------------------- 1 | epoch: 365 2 | 3 | LearningRate: 4 | base_lr: 0.005 5 | schedulers: 6 | - !PiecewiseDecay 7 | gamma: 0.1 8 | milestones: 9 | - 243 10 | - !LinearWarmup 11 | start_factor: 0. 12 | steps: 4000 13 | 14 | OptimizerBuilder: 15 | clip_grad_by_norm: 35. 16 | optimizer: 17 | momentum: 0.9 18 | type: Momentum 19 | regularizer: 20 | factor: 0.0005 21 | type: L2 22 | -------------------------------------------------------------------------------- /configs/ppyolo/_base_/optimizer_650e.yml: -------------------------------------------------------------------------------- 1 | epoch: 650 2 | 3 | LearningRate: 4 | base_lr: 0.005 5 | schedulers: 6 | - !PiecewiseDecay 7 | gamma: 0.1 8 | milestones: 9 | - 430 10 | - 540 11 | - 610 12 | - !LinearWarmup 13 | start_factor: 0. 14 | steps: 4000 15 | 16 | OptimizerBuilder: 17 | optimizer: 18 | momentum: 0.9 19 | type: Momentum 20 | regularizer: 21 | factor: 0.0005 22 | type: L2 23 | -------------------------------------------------------------------------------- /configs/ppyolo/_base_/ppyolo_mbv3_large.yml: -------------------------------------------------------------------------------- 1 | architecture: YOLOv3 2 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/MobileNetV3_large_x1_0_ssld_pretrained.pdparams 3 | norm_type: sync_bn 4 | use_ema: true 5 | ema_decay: 0.9998 6 | 7 | YOLOv3: 8 | backbone: MobileNetV3 9 | neck: PPYOLOFPN 10 | yolo_head: YOLOv3Head 11 | post_process: BBoxPostProcess 12 | 13 | MobileNetV3: 14 | model_name: large 15 | scale: 1. 16 | with_extra_blocks: false 17 | extra_block_filters: [] 18 | feature_maps: [13, 16] 19 | 20 | PPYOLOFPN: 21 | in_channels: [160, 368] 22 | coord_conv: true 23 | conv_block_num: 0 24 | spp: true 25 | drop_block: true 26 | 27 | YOLOv3Head: 28 | anchors: [[11, 18], [34, 47], [51, 126], 29 | [115, 71], [120, 195], [254, 235]] 30 | anchor_masks: [[3, 4, 5], [0, 1, 2]] 31 | loss: YOLOv3Loss 32 | 33 | YOLOv3Loss: 34 | ignore_thresh: 0.5 35 | downsample: [32, 16] 36 | label_smooth: false 37 | scale_x_y: 1.05 38 | iou_loss: IouLoss 39 | 40 | IouLoss: 41 | loss_weight: 2.5 42 | loss_square: true 43 | 44 | BBoxPostProcess: 45 | decode: 46 | name: YOLOBox 47 | conf_thresh: 0.005 48 | downsample_ratio: 32 49 | clip_bbox: true 50 | scale_x_y: 1.05 51 | nms: 52 | name: MultiClassNMS 53 | keep_top_k: 100 54 | nms_threshold: 0.45 55 | nms_top_k: 1000 56 | score_threshold: 0.005 57 | -------------------------------------------------------------------------------- /configs/ppyolo/_base_/ppyolo_mbv3_small.yml: -------------------------------------------------------------------------------- 1 | architecture: YOLOv3 2 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/MobileNetV3_small_x1_0_ssld_pretrained.pdparams 3 | norm_type: sync_bn 4 | use_ema: true 5 | ema_decay: 0.9998 6 | 7 | YOLOv3: 8 | backbone: MobileNetV3 9 | neck: PPYOLOFPN 10 | yolo_head: YOLOv3Head 11 | post_process: BBoxPostProcess 12 | 13 | MobileNetV3: 14 | model_name: small 15 | scale: 1. 16 | with_extra_blocks: false 17 | extra_block_filters: [] 18 | feature_maps: [9, 12] 19 | 20 | PPYOLOFPN: 21 | in_channels: [96, 304] 22 | coord_conv: true 23 | conv_block_num: 0 24 | spp: true 25 | drop_block: true 26 | 27 | YOLOv3Head: 28 | anchors: [[11, 18], [34, 47], [51, 126], 29 | [115, 71], [120, 195], [254, 235]] 30 | anchor_masks: [[3, 4, 5], [0, 1, 2]] 31 | loss: YOLOv3Loss 32 | 33 | YOLOv3Loss: 34 | ignore_thresh: 0.5 35 | downsample: [32, 16] 36 | label_smooth: false 37 | scale_x_y: 1.05 38 | iou_loss: IouLoss 39 | 40 | IouLoss: 41 | loss_weight: 2.5 42 | loss_square: true 43 | 44 | BBoxPostProcess: 45 | decode: 46 | name: YOLOBox 47 | conf_thresh: 0.005 48 | downsample_ratio: 32 49 | clip_bbox: true 50 | scale_x_y: 1.05 51 | nms: 52 | name: MultiClassNMS 53 | keep_top_k: 100 54 | nms_threshold: 0.45 55 | nms_top_k: 1000 56 | score_threshold: 0.005 57 | -------------------------------------------------------------------------------- /configs/ppyolo/_base_/ppyolo_r18vd.yml: -------------------------------------------------------------------------------- 1 | architecture: YOLOv3 2 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet18_vd_pretrained.pdparams 3 | norm_type: sync_bn 4 | use_ema: true 5 | ema_decay: 0.9998 6 | 7 | YOLOv3: 8 | backbone: ResNet 9 | neck: PPYOLOFPN 10 | yolo_head: YOLOv3Head 11 | post_process: BBoxPostProcess 12 | 13 | ResNet: 14 | depth: 18 15 | variant: d 16 | return_idx: [2, 3] 17 | freeze_at: -1 18 | freeze_norm: false 19 | norm_decay: 0. 20 | 21 | PPYOLOFPN: 22 | drop_block: true 23 | block_size: 3 24 | keep_prob: 0.9 25 | conv_block_num: 0 26 | 27 | YOLOv3Head: 28 | anchor_masks: [[3, 4, 5], [0, 1, 2]] 29 | anchors: [[10, 14], [23, 27], [37, 58], 30 | [81, 82], [135, 169], [344, 319]] 31 | loss: YOLOv3Loss 32 | 33 | YOLOv3Loss: 34 | ignore_thresh: 0.7 35 | downsample: [32, 16] 36 | label_smooth: false 37 | scale_x_y: 1.05 38 | iou_loss: IouLoss 39 | 40 | IouLoss: 41 | loss_weight: 2.5 42 | loss_square: true 43 | 44 | BBoxPostProcess: 45 | decode: 46 | name: YOLOBox 47 | conf_thresh: 0.01 48 | downsample_ratio: 32 49 | clip_bbox: true 50 | scale_x_y: 1.05 51 | nms: 52 | name: MatrixNMS 53 | keep_top_k: 100 54 | score_threshold: 0.01 55 | post_threshold: 0.01 56 | nms_top_k: -1 57 | background_label: -1 58 | -------------------------------------------------------------------------------- /configs/ppyolo/_base_/ppyolo_r50vd_dcn.yml: -------------------------------------------------------------------------------- 1 | architecture: YOLOv3 2 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_ssld_pretrained.pdparams 3 | norm_type: sync_bn 4 | use_ema: true 5 | ema_decay: 0.9998 6 | 7 | YOLOv3: 8 | backbone: ResNet 9 | neck: PPYOLOFPN 10 | yolo_head: YOLOv3Head 11 | post_process: BBoxPostProcess 12 | 13 | ResNet: 14 | depth: 50 15 | variant: d 16 | return_idx: [1, 2, 3] 17 | dcn_v2_stages: [3] 18 | freeze_at: -1 19 | freeze_norm: false 20 | norm_decay: 0. 21 | 22 | PPYOLOFPN: 23 | coord_conv: true 24 | drop_block: true 25 | block_size: 3 26 | keep_prob: 0.9 27 | spp: true 28 | 29 | YOLOv3Head: 30 | anchors: [[10, 13], [16, 30], [33, 23], 31 | [30, 61], [62, 45], [59, 119], 32 | [116, 90], [156, 198], [373, 326]] 33 | anchor_masks: [[6, 7, 8], [3, 4, 5], [0, 1, 2]] 34 | loss: YOLOv3Loss 35 | iou_aware: true 36 | iou_aware_factor: 0.4 37 | 38 | YOLOv3Loss: 39 | ignore_thresh: 0.7 40 | downsample: [32, 16, 8] 41 | label_smooth: false 42 | scale_x_y: 1.05 43 | iou_loss: IouLoss 44 | iou_aware_loss: IouAwareLoss 45 | 46 | IouLoss: 47 | loss_weight: 2.5 48 | loss_square: true 49 | 50 | IouAwareLoss: 51 | loss_weight: 1.0 52 | 53 | BBoxPostProcess: 54 | decode: 55 | name: YOLOBox 56 | conf_thresh: 0.01 57 | downsample_ratio: 32 58 | clip_bbox: true 59 | scale_x_y: 1.05 60 | nms: 61 | name: MatrixNMS 62 | keep_top_k: 100 63 | score_threshold: 0.01 64 | post_threshold: 0.01 65 | nms_top_k: -1 66 | background_label: -1 67 | -------------------------------------------------------------------------------- /configs/ppyolo/_base_/ppyolo_reader.yml: -------------------------------------------------------------------------------- 1 | worker_num: 2 2 | TrainReader: 3 | inputs_def: 4 | num_max_boxes: 50 5 | sample_transforms: 6 | - Decode: {} 7 | - Mixup: {alpha: 1.5, beta: 1.5} 8 | - RandomDistort: {} 9 | - RandomExpand: {fill_value: [123.675, 116.28, 103.53]} 10 | - RandomCrop: {} 11 | - RandomFlip: {} 12 | batch_transforms: 13 | - BatchRandomResize: {target_size: [320, 352, 384, 416, 448, 480, 512, 544, 576, 608], random_size: True, random_interp: True, keep_ratio: False} 14 | - NormalizeBox: {} 15 | - PadBox: {num_max_boxes: 50} 16 | - BboxXYXY2XYWH: {} 17 | - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True} 18 | - Permute: {} 19 | - Gt2YoloTarget: {anchor_masks: [[6, 7, 8], [3, 4, 5], [0, 1, 2]], anchors: [[10, 13], [16, 30], [33, 23], [30, 61], [62, 45], [59, 119], [116, 90], [156, 198], [373, 326]], downsample_ratios: [32, 16, 8]} 20 | batch_size: 24 21 | shuffle: true 22 | drop_last: true 23 | mixup_epoch: 25000 24 | use_shared_memory: true 25 | 26 | EvalReader: 27 | sample_transforms: 28 | - Decode: {} 29 | - Resize: {target_size: [608, 608], keep_ratio: False, interp: 2} 30 | - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True} 31 | - Permute: {} 32 | batch_size: 8 33 | drop_empty: false 34 | 35 | TestReader: 36 | inputs_def: 37 | image_shape: [3, 608, 608] 38 | sample_transforms: 39 | - Decode: {} 40 | - Resize: {target_size: [608, 608], keep_ratio: False, interp: 2} 41 | - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True} 42 | - Permute: {} 43 | batch_size: 1 44 | -------------------------------------------------------------------------------- /configs/ppyolo/_base_/ppyolo_tiny.yml: -------------------------------------------------------------------------------- 1 | architecture: YOLOv3 2 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/MobileNetV3_large_x0_5_pretrained.pdparams 3 | norm_type: sync_bn 4 | use_ema: true 5 | ema_decay: 0.9998 6 | 7 | YOLOv3: 8 | backbone: MobileNetV3 9 | neck: PPYOLOTinyFPN 10 | yolo_head: YOLOv3Head 11 | post_process: BBoxPostProcess 12 | 13 | MobileNetV3: 14 | model_name: large 15 | scale: .5 16 | with_extra_blocks: false 17 | extra_block_filters: [] 18 | feature_maps: [7, 13, 16] 19 | 20 | PPYOLOTinyFPN: 21 | detection_block_channels: [160, 128, 96] 22 | spp: true 23 | drop_block: true 24 | 25 | YOLOv3Head: 26 | anchors: [[10, 15], [24, 36], [72, 42], 27 | [35, 87], [102, 96], [60, 170], 28 | [220, 125], [128, 222], [264, 266]] 29 | anchor_masks: [[6, 7, 8], [3, 4, 5], [0, 1, 2]] 30 | loss: YOLOv3Loss 31 | 32 | YOLOv3Loss: 33 | ignore_thresh: 0.5 34 | downsample: [32, 16, 8] 35 | label_smooth: false 36 | scale_x_y: 1.05 37 | iou_loss: IouLoss 38 | 39 | IouLoss: 40 | loss_weight: 2.5 41 | loss_square: true 42 | 43 | BBoxPostProcess: 44 | decode: 45 | name: YOLOBox 46 | conf_thresh: 0.005 47 | downsample_ratio: 32 48 | clip_bbox: true 49 | scale_x_y: 1.05 50 | nms: 51 | name: MultiClassNMS 52 | keep_top_k: 100 53 | nms_threshold: 0.45 54 | nms_top_k: 1000 55 | score_threshold: 0.005 56 | -------------------------------------------------------------------------------- /configs/ppyolo/_base_/ppyolo_tiny_reader.yml: -------------------------------------------------------------------------------- 1 | worker_num: 4 2 | TrainReader: 3 | inputs_def: 4 | num_max_boxes: 100 5 | sample_transforms: 6 | - Decode: {} 7 | - Mixup: {alpha: 1.5, beta: 1.5} 8 | - RandomDistort: {} 9 | - RandomExpand: {fill_value: [123.675, 116.28, 103.53]} 10 | - RandomCrop: {} 11 | - RandomFlip: {} 12 | batch_transforms: 13 | - BatchRandomResize: {target_size: [192, 224, 256, 288, 320, 352, 384, 416, 448, 480, 512], random_size: True, random_interp: True, keep_ratio: False} 14 | - NormalizeBox: {} 15 | - PadBox: {num_max_boxes: 100} 16 | - BboxXYXY2XYWH: {} 17 | - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True} 18 | - Permute: {} 19 | - Gt2YoloTarget: {anchor_masks: [[6, 7, 8], [3, 4, 5], [0, 1, 2]], anchors: [[10, 15], [24, 36], [72, 42], [35, 87], [102, 96], [60, 170], [220, 125], [128, 222], [264, 266]], downsample_ratios: [32, 16, 8]} 20 | batch_size: 32 21 | shuffle: true 22 | drop_last: true 23 | mixup_epoch: 500 24 | use_shared_memory: true 25 | 26 | EvalReader: 27 | sample_transforms: 28 | - Decode: {} 29 | - Resize: {target_size: [320, 320], keep_ratio: False, interp: 2} 30 | - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True} 31 | - Permute: {} 32 | batch_size: 8 33 | drop_empty: false 34 | 35 | TestReader: 36 | inputs_def: 37 | image_shape: [3, 320, 320] 38 | sample_transforms: 39 | - Decode: {} 40 | - Resize: {target_size: [320, 320], keep_ratio: False, interp: 2} 41 | - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True} 42 | - Permute: {} 43 | batch_size: 1 44 | -------------------------------------------------------------------------------- /configs/ppyolo/_base_/ppyolov2_r50vd_dcn.yml: -------------------------------------------------------------------------------- 1 | architecture: YOLOv3 2 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_ssld_pretrained.pdparams 3 | norm_type: sync_bn 4 | use_ema: true 5 | ema_decay: 0.9998 6 | 7 | YOLOv3: 8 | backbone: ResNet 9 | neck: PPYOLOPAN 10 | yolo_head: YOLOv3Head 11 | post_process: BBoxPostProcess 12 | 13 | ResNet: 14 | depth: 50 15 | variant: d 16 | return_idx: [1, 2, 3] 17 | dcn_v2_stages: [3] 18 | freeze_at: -1 19 | freeze_norm: false 20 | norm_decay: 0. 21 | 22 | PPYOLOPAN: 23 | drop_block: true 24 | block_size: 3 25 | keep_prob: 0.9 26 | spp: true 27 | 28 | YOLOv3Head: 29 | anchors: [[10, 13], [16, 30], [33, 23], 30 | [30, 61], [62, 45], [59, 119], 31 | [116, 90], [156, 198], [373, 326]] 32 | anchor_masks: [[6, 7, 8], [3, 4, 5], [0, 1, 2]] 33 | loss: YOLOv3Loss 34 | iou_aware: true 35 | iou_aware_factor: 0.5 36 | 37 | YOLOv3Loss: 38 | ignore_thresh: 0.7 39 | downsample: [32, 16, 8] 40 | label_smooth: false 41 | scale_x_y: 1.05 42 | iou_loss: IouLoss 43 | iou_aware_loss: IouAwareLoss 44 | 45 | IouLoss: 46 | loss_weight: 2.5 47 | loss_square: true 48 | 49 | IouAwareLoss: 50 | loss_weight: 1.0 51 | 52 | BBoxPostProcess: 53 | decode: 54 | name: YOLOBox 55 | conf_thresh: 0.01 56 | downsample_ratio: 32 57 | clip_bbox: true 58 | scale_x_y: 1.05 59 | nms: 60 | name: MatrixNMS 61 | keep_top_k: 100 62 | score_threshold: 0.01 63 | post_threshold: 0.01 64 | nms_top_k: -1 65 | background_label: -1 66 | -------------------------------------------------------------------------------- /configs/ppyolo/_base_/ppyolov2_reader.yml: -------------------------------------------------------------------------------- 1 | worker_num: 8 2 | TrainReader: 3 | inputs_def: 4 | num_max_boxes: 100 5 | sample_transforms: 6 | - Decode: {} 7 | - Mixup: {alpha: 1.5, beta: 1.5} 8 | - RandomDistort: {} 9 | - RandomExpand: {fill_value: [123.675, 116.28, 103.53]} 10 | - RandomCrop: {} 11 | - RandomFlip: {} 12 | batch_transforms: 13 | - BatchRandomResize: {target_size: [320, 352, 384, 416, 448, 480, 512, 544, 576, 608, 640, 672, 704, 736, 768], random_size: True, random_interp: True, keep_ratio: False} 14 | - NormalizeBox: {} 15 | - PadBox: {num_max_boxes: 100} 16 | - BboxXYXY2XYWH: {} 17 | - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True} 18 | - Permute: {} 19 | - Gt2YoloTarget: {anchor_masks: [[6, 7, 8], [3, 4, 5], [0, 1, 2]], anchors: [[10, 13], [16, 30], [33, 23], [30, 61], [62, 45], [59, 119], [116, 90], [156, 198], [373, 326]], downsample_ratios: [32, 16, 8]} 20 | batch_size: 12 21 | shuffle: true 22 | drop_last: true 23 | mixup_epoch: 25000 24 | use_shared_memory: true 25 | 26 | EvalReader: 27 | sample_transforms: 28 | - Decode: {} 29 | - Resize: {target_size: [640, 640], keep_ratio: False, interp: 2} 30 | - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True} 31 | - Permute: {} 32 | batch_size: 8 33 | drop_empty: false 34 | 35 | TestReader: 36 | inputs_def: 37 | image_shape: [3, 640, 640] 38 | sample_transforms: 39 | - Decode: {} 40 | - Resize: {target_size: [640, 640], keep_ratio: False, interp: 2} 41 | - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True} 42 | - Permute: {} 43 | batch_size: 1 44 | -------------------------------------------------------------------------------- /configs/ppyolo/ppyolo_r50vd_dcn_1x_coco.yml: -------------------------------------------------------------------------------- 1 | _BASE_: [ 2 | '../datasets/coco_detection.yml', 3 | '../runtime.yml', 4 | './_base_/ppyolo_r50vd_dcn.yml', 5 | './_base_/optimizer_1x.yml', 6 | './_base_/ppyolo_reader.yml', 7 | ] 8 | 9 | snapshot_epoch: 16 10 | weights: output/ppyolo_r50vd_dcn_1x_coco/model_final 11 | -------------------------------------------------------------------------------- /configs/ppyolo/ppyolo_r50vd_dcn_1x_minicoco.yml: -------------------------------------------------------------------------------- 1 | _BASE_: [ 2 | '../datasets/coco_detection.yml', 3 | '../runtime.yml', 4 | './_base_/ppyolo_r50vd_dcn.yml', 5 | './_base_/optimizer_1x.yml', 6 | './_base_/ppyolo_reader.yml', 7 | ] 8 | 9 | snapshot_epoch: 8 10 | use_ema: true 11 | weights: output/ppyolo_r50vd_dcn_1x_minicoco/model_final 12 | 13 | TrainReader: 14 | batch_size: 12 15 | 16 | TrainDataset: 17 | !COCODataSet 18 | image_dir: train2017 19 | # refer to https://github.com/giddyyupp/coco-minitrain 20 | anno_path: annotations/instances_minitrain2017.json 21 | dataset_dir: dataset/coco 22 | data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd'] 23 | 24 | epoch: 192 25 | 26 | LearningRate: 27 | base_lr: 0.005 28 | schedulers: 29 | - !PiecewiseDecay 30 | gamma: 0.1 31 | milestones: 32 | - 153 33 | - 173 34 | - !LinearWarmup 35 | start_factor: 0. 36 | steps: 4000 37 | 38 | OptimizerBuilder: 39 | optimizer: 40 | momentum: 0.9 41 | type: Momentum 42 | regularizer: 43 | factor: 0.0005 44 | type: L2 45 | -------------------------------------------------------------------------------- /configs/ppyolo/ppyolo_r50vd_dcn_2x_coco.yml: -------------------------------------------------------------------------------- 1 | _BASE_: [ 2 | '../datasets/coco_detection.yml', 3 | '../runtime.yml', 4 | './_base_/ppyolo_r50vd_dcn.yml', 5 | './_base_/optimizer_2x.yml', 6 | './_base_/ppyolo_reader.yml', 7 | ] 8 | 9 | snapshot_epoch: 16 10 | weights: output/ppyolo_r50vd_dcn_2x_coco/model_final 11 | -------------------------------------------------------------------------------- /configs/ppyolo/ppyolo_r50vd_dcn_voc.yml: -------------------------------------------------------------------------------- 1 | _BASE_: [ 2 | '../datasets/voc.yml', 3 | '../runtime.yml', 4 | './_base_/ppyolo_r50vd_dcn.yml', 5 | './_base_/optimizer_1x.yml', 6 | './_base_/ppyolo_reader.yml', 7 | ] 8 | 9 | snapshot_epoch: 83 10 | weights: output/ppyolo_r50vd_dcn_voc/model_final 11 | 12 | TrainReader: 13 | mixup_epoch: 350 14 | batch_size: 12 15 | 16 | EvalReader: 17 | batch_transforms: 18 | - PadBatch: {pad_gt: True} 19 | 20 | epoch: 583 21 | 22 | LearningRate: 23 | base_lr: 0.00333 24 | schedulers: 25 | - !PiecewiseDecay 26 | gamma: 0.1 27 | milestones: 28 | - 466 29 | - 516 30 | - !LinearWarmup 31 | start_factor: 0. 32 | steps: 4000 33 | 34 | OptimizerBuilder: 35 | optimizer: 36 | momentum: 0.9 37 | type: Momentum 38 | regularizer: 39 | factor: 0.0005 40 | type: L2 41 | -------------------------------------------------------------------------------- /configs/ppyolo/ppyolo_test.yml: -------------------------------------------------------------------------------- 1 | _BASE_: [ 2 | '../datasets/coco_detection.yml', 3 | '../runtime.yml', 4 | './_base_/ppyolo_r50vd_dcn.yml', 5 | './_base_/ppyolo_1x.yml', 6 | './_base_/ppyolo_reader.yml', 7 | ] 8 | 9 | snapshot_epoch: 16 10 | 11 | EvalDataset: 12 | !COCODataSet 13 | image_dir: test2017 14 | anno_path: annotations/image_info_test-dev2017.json 15 | dataset_dir: dataset/coco 16 | -------------------------------------------------------------------------------- /configs/ppyolo/ppyolo_tiny_650e_coco.yml: -------------------------------------------------------------------------------- 1 | _BASE_: [ 2 | '../datasets/coco_detection.yml', 3 | '../runtime.yml', 4 | './_base_/ppyolo_tiny.yml', 5 | './_base_/optimizer_650e.yml', 6 | './_base_/ppyolo_tiny_reader.yml', 7 | ] 8 | 9 | snapshot_epoch: 1 10 | weights: output/ppyolo_tiny_650e_coco/model_final 11 | -------------------------------------------------------------------------------- /configs/ppyolo/ppyolov2_r101vd_dcn_365e_coco.yml: -------------------------------------------------------------------------------- 1 | _BASE_: [ 2 | '../datasets/coco_detection.yml', 3 | '../runtime.yml', 4 | './_base_/ppyolov2_r50vd_dcn.yml', 5 | './_base_/optimizer_365e.yml', 6 | './_base_/ppyolov2_reader.yml', 7 | ] 8 | 9 | snapshot_epoch: 8 10 | weights: output/ppyolov2_r101vd_dcn_365e_coco/model_final 11 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet101_vd_ssld_pretrained.pdparams 12 | 13 | ResNet: 14 | depth: 101 15 | variant: d 16 | return_idx: [1, 2, 3] 17 | dcn_v2_stages: [3] 18 | freeze_at: -1 19 | freeze_norm: false 20 | norm_decay: 0. 21 | -------------------------------------------------------------------------------- /configs/ppyolo/ppyolov2_r50vd_dcn_365e_coco.yml: -------------------------------------------------------------------------------- 1 | _BASE_: [ 2 | '../datasets/coco_detection.yml', 3 | '../runtime.yml', 4 | './_base_/ppyolov2_r50vd_dcn.yml', 5 | './_base_/optimizer_365e.yml', 6 | './_base_/ppyolov2_reader.yml', 7 | ] 8 | 9 | snapshot_epoch: 8 10 | weights: output/ppyolov2_r50vd_dcn_365e_coco/model_final 11 | -------------------------------------------------------------------------------- /configs/rcnn_enhance/README.md: -------------------------------------------------------------------------------- 1 | ## 服务器端实用目标检测方案 2 | 3 | ### 简介 4 | 5 | * 近年来,学术界和工业界广泛关注图像中目标检测任务。基于[PaddleClas](https://github.com/PaddlePaddle/PaddleClas)中SSLD蒸馏方案训练得到的ResNet50_vd预训练模型(ImageNet1k验证集上Top1 Acc为82.39%),结合PaddleDetection中的丰富算子,飞桨提供了一种面向服务器端实用的目标检测方案PSS-DET(Practical Server Side Detection)。基于COCO2017目标检测数据集,V100单卡预测速度为为61FPS时,COCO mAP可达41.2%。 6 | 7 | 8 | ### 模型库 9 | 10 | | 骨架网络 | 网络类型 | 每张GPU图片个数 | 学习率策略 |推理时间(fps) | Box AP | Mask AP | 下载 | 配置文件 | 11 | | :---------------------- | :-------------: | :-------: | :-----: | :------------: | :----: | :-----: | :-------------: | :-----: | 12 | | ResNet50-vd-FPN-Dcnv2 | Faster | 2 | 3x | 61.425 | 41.5 | - | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_enhance_3x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/rcnn_enhance/faster_rcnn_enhance_3x_coco.yml) | 13 | -------------------------------------------------------------------------------- /configs/rcnn_enhance/_base_/faster_rcnn_enhance.yml: -------------------------------------------------------------------------------- 1 | architecture: FasterRCNN 2 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_ssld_v2_pretrained.pdparams 3 | 4 | FasterRCNN: 5 | backbone: ResNet 6 | neck: FPN 7 | rpn_head: RPNHead 8 | bbox_head: BBoxHead 9 | # post process 10 | bbox_post_process: BBoxPostProcess 11 | 12 | 13 | ResNet: 14 | # index 0 stands for res2 15 | depth: 50 16 | norm_type: bn 17 | variant: d 18 | freeze_at: 0 19 | return_idx: [0,1,2,3] 20 | num_stages: 4 21 | dcn_v2_stages: [1,2,3] 22 | lr_mult_list: [0.05, 0.05, 0.1, 0.15] 23 | 24 | FPN: 25 | in_channels: [256, 512, 1024, 2048] 26 | out_channel: 64 27 | 28 | RPNHead: 29 | anchor_generator: 30 | aspect_ratios: [0.5, 1.0, 2.0] 31 | anchor_sizes: [[32], [64], [128], [256], [512]] 32 | strides: [4, 8, 16, 32, 64] 33 | rpn_target_assign: 34 | batch_size_per_im: 256 35 | fg_fraction: 0.5 36 | negative_overlap: 0.3 37 | positive_overlap: 0.7 38 | use_random: True 39 | train_proposal: 40 | min_size: 0.0 41 | nms_thresh: 0.7 42 | pre_nms_top_n: 2000 43 | post_nms_top_n: 2000 44 | topk_after_collect: True 45 | test_proposal: 46 | min_size: 0.0 47 | nms_thresh: 0.7 48 | pre_nms_top_n: 500 49 | post_nms_top_n: 300 50 | 51 | 52 | BBoxHead: 53 | head: TwoFCHead 54 | roi_extractor: 55 | resolution: 7 56 | sampling_ratio: 0 57 | aligned: True 58 | bbox_assigner: BBoxLibraAssigner 59 | bbox_loss: DIouLoss 60 | 61 | TwoFCHead: 62 | out_channel: 1024 63 | 64 | BBoxLibraAssigner: 65 | batch_size_per_im: 512 66 | bg_thresh: 0.5 67 | fg_thresh: 0.5 68 | fg_fraction: 0.25 69 | use_random: True 70 | 71 | DIouLoss: 72 | loss_weight: 10.0 73 | use_complete_iou_loss: true 74 | 75 | BBoxPostProcess: 76 | decode: RCNNBox 77 | nms: 78 | name: MultiClassNMS 79 | keep_top_k: 100 80 | score_threshold: 0.05 81 | nms_threshold: 0.5 82 | -------------------------------------------------------------------------------- /configs/rcnn_enhance/_base_/faster_rcnn_enhance_reader.yml: -------------------------------------------------------------------------------- 1 | worker_num: 2 2 | TrainReader: 3 | sample_transforms: 4 | - Decode: {} 5 | - RandomResize: {target_size: [[384,1000], [416,1000], [448,1000], [480,1000], [512,1000], [544,1000], [576,1000], [608,1000], [640,1000], [672,1000]], interp: 2, keep_ratio: True} 6 | - RandomFlip: {prob: 0.5} 7 | - AutoAugment: {autoaug_type: v1} 8 | - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]} 9 | - Permute: {} 10 | batch_transforms: 11 | - PadBatch: {pad_to_stride: 32, pad_gt: true} 12 | batch_size: 2 13 | shuffle: true 14 | drop_last: true 15 | 16 | 17 | EvalReader: 18 | sample_transforms: 19 | - Decode: {} 20 | - Resize: {interp: 2, target_size: [640, 640], keep_ratio: True} 21 | - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]} 22 | - Permute: {} 23 | batch_transforms: 24 | - PadBatch: {pad_to_stride: 32, pad_gt: false} 25 | batch_size: 1 26 | shuffle: false 27 | drop_last: false 28 | drop_empty: false 29 | 30 | 31 | TestReader: 32 | sample_transforms: 33 | - Decode: {} 34 | - Resize: {interp: 2, target_size: [640, 640], keep_ratio: True} 35 | - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]} 36 | - Permute: {} 37 | batch_transforms: 38 | - PadBatch: {pad_to_stride: 32, pad_gt: false} 39 | batch_size: 1 40 | shuffle: false 41 | drop_last: false 42 | -------------------------------------------------------------------------------- /configs/rcnn_enhance/_base_/optimizer_3x.yml: -------------------------------------------------------------------------------- 1 | epoch: 36 2 | 3 | LearningRate: 4 | base_lr: 0.02 5 | schedulers: 6 | - !PiecewiseDecay 7 | gamma: 0.1 8 | milestones: [24, 33] 9 | - !LinearWarmup 10 | start_factor: 0. 11 | steps: 1000 12 | 13 | OptimizerBuilder: 14 | optimizer: 15 | momentum: 0.9 16 | type: Momentum 17 | regularizer: 18 | factor: 0.0001 19 | type: L2 20 | -------------------------------------------------------------------------------- /configs/rcnn_enhance/faster_rcnn_enhance_3x_coco.yml: -------------------------------------------------------------------------------- 1 | _BASE_: [ 2 | '../datasets/coco_detection.yml', 3 | '../runtime.yml', 4 | '_base_/optimizer_3x.yml', 5 | '_base_/faster_rcnn_enhance.yml', 6 | '_base_/faster_rcnn_enhance_reader.yml', 7 | ] 8 | weights: output/faster_rcnn_enhance_r50_3x_coco/model_final 9 | -------------------------------------------------------------------------------- /configs/runtime.yml: -------------------------------------------------------------------------------- 1 | use_gpu: true 2 | log_iter: 20 3 | save_dir: output 4 | snapshot_epoch: 1 5 | -------------------------------------------------------------------------------- /configs/slim/distill/README.md: -------------------------------------------------------------------------------- 1 | # Distillation(蒸馏) 2 | 3 | ## YOLOv3模型蒸馏 4 | 以YOLOv3-MobileNetV1为例,使用YOLOv3-ResNet34作为蒸馏训练的teacher网络, 对YOLOv3-MobileNetV1结构的student网络进行蒸馏。 5 | COCO数据集作为目标检测任务的训练目标难度更大,意味着teacher网络会预测出更多的背景bbox,如果直接用teacher的预测输出作为student学习的`soft label`会有严重的类别不均衡问题。解决这个问题需要引入新的方法,详细背景请参考论文:[Object detection at 200 Frames Per Second](https://arxiv.org/abs/1805.06361)。 6 | 为了确定蒸馏的对象,我们首先需要找到student和teacher网络得到的`x,y,w,h,cls,objness`等Tensor,用teacher得到的结果指导student训练。具体实现可参考[代码](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/ppdet/slim/distill.py) 7 | 8 | ## Citations 9 | ``` 10 | @article{mehta2018object, 11 | title={Object detection at 200 Frames Per Second}, 12 | author={Rakesh Mehta and Cemalettin Ozturk}, 13 | year={2018}, 14 | eprint={1805.06361}, 15 | archivePrefix={arXiv}, 16 | primaryClass={cs.CV} 17 | } 18 | ``` 19 | -------------------------------------------------------------------------------- /configs/slim/distill/yolov3_mobilenet_v1_coco_distill.yml: -------------------------------------------------------------------------------- 1 | _BASE_: [ 2 | '../../yolov3/yolov3_r34_270e_coco.yml', 3 | ] 4 | 5 | pretrain_weights: https://paddledet.bj.bcebos.com/models/yolov3_r34_270e_coco.pdparams 6 | 7 | 8 | slim: Distill 9 | distill_loss: DistillYOLOv3Loss 10 | 11 | DistillYOLOv3Loss: 12 | weight: 1000 13 | -------------------------------------------------------------------------------- /configs/slim/extensions/yolov3_mobilenet_v1_coco_distill_prune.yml: -------------------------------------------------------------------------------- 1 | _BASE_: [ 2 | '../../yolov3/yolov3_r34_270e_coco.yml', 3 | ] 4 | 5 | pretrain_weights: https://paddledet.bj.bcebos.com/models/yolov3_r34_270e_coco.pdparams 6 | 7 | slim: DistillPrune 8 | 9 | distill_loss: DistillYOLOv3Loss 10 | 11 | DistillYOLOv3Loss: 12 | weight: 1000 13 | 14 | pruner: Pruner 15 | 16 | Pruner: 17 | criterion: l1_norm 18 | pruned_params: ['conv2d_27.w_0', 'conv2d_28.w_0', 'conv2d_29.w_0', 19 | 'conv2d_30.w_0', 'conv2d_31.w_0', 'conv2d_32.w_0', 20 | 'conv2d_34.w_0', 'conv2d_35.w_0', 'conv2d_36.w_0', 21 | 'conv2d_37.w_0', 'conv2d_38.w_0', 'conv2d_39.w_0', 22 | 'conv2d_41.w_0', 'conv2d_42.w_0', 'conv2d_43.w_0', 23 | 'conv2d_44.w_0', 'conv2d_45.w_0', 'conv2d_46.w_0'] 24 | pruned_ratios: [0.5,0.5,0.5,0.5,0.5,0.5,0.7,0.7,0.7,0.7,0.7,0.7,0.8,0.8,0.8,0.8,0.8,0.8] 25 | -------------------------------------------------------------------------------- /configs/slim/prune/yolov3_prune_fpgm.yml: -------------------------------------------------------------------------------- 1 | # Weights of yolov3_mobilenet_v1_voc 2 | pretrain_weights: https://paddledet.bj.bcebos.com/models/yolov3_mobilenet_v1_270e_voc.pdparams 3 | slim: Pruner 4 | 5 | Pruner: 6 | criterion: fpgm 7 | pruned_params: ['conv2d_27.w_0', 'conv2d_28.w_0', 'conv2d_29.w_0', 8 | 'conv2d_30.w_0', 'conv2d_31.w_0', 'conv2d_32.w_0', 9 | 'conv2d_34.w_0', 'conv2d_35.w_0', 'conv2d_36.w_0', 10 | 'conv2d_37.w_0', 'conv2d_38.w_0', 'conv2d_39.w_0', 11 | 'conv2d_41.w_0', 'conv2d_42.w_0', 'conv2d_43.w_0', 12 | 'conv2d_44.w_0', 'conv2d_45.w_0', 'conv2d_46.w_0'] 13 | pruned_ratios: [0.1,0.2,0.2,0.2,0.2,0.1,0.2,0.3,0.3,0.3,0.2,0.1,0.3,0.4,0.4,0.4,0.4,0.3] 14 | print_params: False 15 | -------------------------------------------------------------------------------- /configs/slim/prune/yolov3_prune_l1_norm.yml: -------------------------------------------------------------------------------- 1 | # Weights of yolov3_mobilenet_v1_voc 2 | pretrain_weights: https://paddledet.bj.bcebos.com/models/yolov3_mobilenet_v1_270e_voc.pdparams 3 | slim: Pruner 4 | 5 | Pruner: 6 | criterion: l1_norm 7 | pruned_params: ['conv2d_27.w_0', 'conv2d_28.w_0', 'conv2d_29.w_0', 8 | 'conv2d_30.w_0', 'conv2d_31.w_0', 'conv2d_32.w_0', 9 | 'conv2d_34.w_0', 'conv2d_35.w_0', 'conv2d_36.w_0', 10 | 'conv2d_37.w_0', 'conv2d_38.w_0', 'conv2d_39.w_0', 11 | 'conv2d_41.w_0', 'conv2d_42.w_0', 'conv2d_43.w_0', 12 | 'conv2d_44.w_0', 'conv2d_45.w_0', 'conv2d_46.w_0'] 13 | pruned_ratios: [0.1,0.2,0.2,0.2,0.2,0.1,0.2,0.3,0.3,0.3,0.2,0.1,0.3,0.4,0.4,0.4,0.4,0.3] 14 | print_params: False 15 | -------------------------------------------------------------------------------- /configs/slim/quant/mask_rcnn_r50_fpn_1x_qat.yml: -------------------------------------------------------------------------------- 1 | pretrain_weights: https://paddledet.bj.bcebos.com/models/mask_rcnn_r50_fpn_1x_coco.pdparams 2 | slim: QAT 3 | 4 | QAT: 5 | quant_config: { 6 | 'weight_quantize_type': 'channel_wise_abs_max', 'activation_quantize_type': 'moving_average_abs_max', 7 | 'weight_bits': 8, 'activation_bits': 8, 'dtype': 'int8', 'window_size': 10000, 'moving_rate': 0.9, 8 | 'quantizable_layer_type': ['Conv2D', 'Linear']} 9 | print_model: True 10 | 11 | 12 | epoch: 5 13 | 14 | LearningRate: 15 | base_lr: 0.001 16 | schedulers: 17 | - !PiecewiseDecay 18 | gamma: 0.1 19 | milestones: [3, 4] 20 | - !LinearWarmup 21 | start_factor: 0.001 22 | steps: 100 23 | -------------------------------------------------------------------------------- /configs/slim/quant/ssd_mobilenet_v1_qat.yml: -------------------------------------------------------------------------------- 1 | pretrain_weights: https://paddlemodels.bj.bcebos.com/object_detection/dygraph/ssd_mobilenet_v1_300_120e_voc.pdparams 2 | slim: QAT 3 | 4 | QAT: 5 | quant_config: { 6 | 'weight_quantize_type': 'channel_wise_abs_max', 'activation_quantize_type': 'moving_average_abs_max', 7 | 'weight_bits': 8, 'activation_bits': 8, 'dtype': 'int8', 'window_size': 10000, 'moving_rate': 0.9, 8 | 'quantizable_layer_type': ['Conv2D', 'Linear']} 9 | print_model: True 10 | -------------------------------------------------------------------------------- /configs/slim/quant/yolov3_darknet_qat.yml: -------------------------------------------------------------------------------- 1 | pretrain_weights: https://paddledet.bj.bcebos.com/models/yolov3_darknet53_270e_coco.pdparams 2 | slim: QAT 3 | 4 | QAT: 5 | quant_config: { 6 | 'weight_quantize_type': 'channel_wise_abs_max', 'activation_quantize_type': 'moving_average_abs_max', 7 | 'weight_bits': 8, 'activation_bits': 8, 'dtype': 'int8', 'window_size': 10000, 'moving_rate': 0.9, 8 | 'quantizable_layer_type': ['Conv2D', 'Linear']} 9 | print_model: True 10 | 11 | epoch: 50 12 | 13 | LearningRate: 14 | base_lr: 0.0001 15 | schedulers: 16 | - !PiecewiseDecay 17 | gamma: 0.1 18 | milestones: 19 | - 30 20 | - 45 21 | - !LinearWarmup 22 | start_factor: 0. 23 | steps: 1000 24 | 25 | OptimizerBuilder: 26 | optimizer: 27 | momentum: 0.9 28 | type: Momentum 29 | regularizer: 30 | factor: 0.0005 31 | type: L2 32 | -------------------------------------------------------------------------------- /configs/slim/quant/yolov3_mobilenet_v1_qat.yml: -------------------------------------------------------------------------------- 1 | # Weights of yolov3_mobilenet_v1_coco 2 | pretrain_weights: https://paddledet.bj.bcebos.com/models/yolov3_mobilenet_v1_270e_coco.pdparams 3 | slim: QAT 4 | 5 | QAT: 6 | quant_config: { 7 | 'weight_quantize_type': 'channel_wise_abs_max', 'activation_quantize_type': 'moving_average_abs_max', 8 | 'weight_bits': 8, 'activation_bits': 8, 'dtype': 'int8', 'window_size': 10000, 'moving_rate': 0.9, 9 | 'quantizable_layer_type': ['Conv2D', 'Linear']} 10 | print_model: True 11 | -------------------------------------------------------------------------------- /configs/slim/quant/yolov3_mobilenet_v3_qat.yml: -------------------------------------------------------------------------------- 1 | # Weights of yolov3_mobilenet_v3_coco 2 | pretrain_weights: https://paddledet.bj.bcebos.com/models/yolov3_mobilenet_v3_large_270e_coco.pdparams 3 | slim: QAT 4 | 5 | QAT: 6 | quant_config: { 7 | 'weight_preprocess_type': 'PACT', 8 | 'weight_quantize_type': 'channel_wise_abs_max', 'activation_quantize_type': 'moving_average_abs_max', 9 | 'weight_bits': 8, 'activation_bits': 8, 'dtype': 'int8', 'window_size': 10000, 'moving_rate': 0.9, 10 | 'quantizable_layer_type': ['Conv2D', 'Linear']} 11 | print_model: True 12 | 13 | epoch: 30 14 | LearningRate: 15 | base_lr: 0.0001 16 | schedulers: 17 | - !PiecewiseDecay 18 | gamma: 0.1 19 | milestones: 20 | - 25 21 | - 28 22 | - !LinearWarmup 23 | start_factor: 0. 24 | steps: 2000 25 | -------------------------------------------------------------------------------- /configs/solov2/_base_/optimizer_1x.yml: -------------------------------------------------------------------------------- 1 | epoch: 12 2 | 3 | LearningRate: 4 | base_lr: 0.01 5 | schedulers: 6 | - !PiecewiseDecay 7 | gamma: 0.1 8 | milestones: [8, 11] 9 | - !LinearWarmup 10 | start_factor: 0. 11 | steps: 1000 12 | 13 | OptimizerBuilder: 14 | optimizer: 15 | momentum: 0.9 16 | type: Momentum 17 | regularizer: 18 | factor: 0.0001 19 | type: L2 20 | -------------------------------------------------------------------------------- /configs/solov2/_base_/solov2_r50_fpn.yml: -------------------------------------------------------------------------------- 1 | architecture: SOLOv2 2 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_cos_pretrained.pdparams 3 | 4 | SOLOv2: 5 | backbone: ResNet 6 | neck: FPN 7 | solov2_head: SOLOv2Head 8 | mask_head: SOLOv2MaskHead 9 | 10 | ResNet: 11 | depth: 50 12 | norm_type: bn 13 | freeze_at: 0 14 | return_idx: [0,1,2,3] 15 | num_stages: 4 16 | 17 | FPN: 18 | out_channel: 256 19 | 20 | SOLOv2Head: 21 | seg_feat_channels: 512 22 | stacked_convs: 4 23 | num_grids: [40, 36, 24, 16, 12] 24 | kernel_out_channels: 256 25 | solov2_loss: SOLOv2Loss 26 | mask_nms: MaskMatrixNMS 27 | 28 | SOLOv2MaskHead: 29 | mid_channels: 128 30 | out_channels: 256 31 | start_level: 0 32 | end_level: 3 33 | 34 | SOLOv2Loss: 35 | ins_loss_weight: 3.0 36 | focal_loss_gamma: 2.0 37 | focal_loss_alpha: 0.25 38 | 39 | MaskMatrixNMS: 40 | pre_nms_top_n: 500 41 | post_nms_top_n: 100 42 | -------------------------------------------------------------------------------- /configs/solov2/_base_/solov2_reader.yml: -------------------------------------------------------------------------------- 1 | worker_num: 2 2 | TrainReader: 3 | sample_transforms: 4 | - Decode: {} 5 | - Poly2Mask: {} 6 | - Resize: {interp: 1, target_size: [800, 1333], keep_ratio: True} 7 | - RandomFlip: {} 8 | - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]} 9 | - Permute: {} 10 | batch_transforms: 11 | - PadBatch: {pad_to_stride: 32} 12 | - Gt2Solov2Target: {num_grids: [40, 36, 24, 16, 12], 13 | scale_ranges: [[1, 96], [48, 192], [96, 384], [192, 768], [384, 2048]], 14 | coord_sigma: 0.2} 15 | batch_size: 2 16 | shuffle: true 17 | drop_last: true 18 | 19 | 20 | EvalReader: 21 | sample_transforms: 22 | - Decode: {} 23 | - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]} 24 | - Resize: {interp: 1, target_size: [800, 1333], keep_ratio: True} 25 | - Permute: {} 26 | batch_transforms: 27 | - PadBatch: {pad_to_stride: 32} 28 | batch_size: 1 29 | shuffle: false 30 | drop_last: false 31 | drop_empty: false 32 | 33 | 34 | TestReader: 35 | sample_transforms: 36 | - Decode: {} 37 | - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]} 38 | - Resize: {interp: 1, target_size: [800, 1333], keep_ratio: True} 39 | - Permute: {} 40 | batch_transforms: 41 | - PadBatch: {pad_to_stride: 32} 42 | batch_size: 1 43 | shuffle: false 44 | drop_last: false 45 | -------------------------------------------------------------------------------- /configs/solov2/solov2_r50_fpn_1x_coco.yml: -------------------------------------------------------------------------------- 1 | _BASE_: [ 2 | '../datasets/coco_instance.yml', 3 | '../runtime.yml', 4 | '_base_/solov2_r50_fpn.yml', 5 | '_base_/optimizer_1x.yml', 6 | '_base_/solov2_reader.yml', 7 | ] 8 | weights: output/solov2_r50_fpn_1x_coco/model_final 9 | -------------------------------------------------------------------------------- /configs/solov2/solov2_r50_fpn_3x_coco.yml: -------------------------------------------------------------------------------- 1 | _BASE_: [ 2 | '../datasets/coco_instance.yml', 3 | '../runtime.yml', 4 | '_base_/solov2_r50_fpn.yml', 5 | '_base_/optimizer_1x.yml', 6 | '_base_/solov2_reader.yml', 7 | ] 8 | weights: output/solov2_r50_fpn_3x_coco/model_final 9 | epoch: 36 10 | 11 | LearningRate: 12 | base_lr: 0.01 13 | schedulers: 14 | - !PiecewiseDecay 15 | gamma: 0.1 16 | milestones: [24, 33] 17 | - !LinearWarmup 18 | start_factor: 0. 19 | steps: 1000 20 | 21 | TrainReader: 22 | sample_transforms: 23 | - Decode: {} 24 | - Poly2Mask: {} 25 | - RandomResize: {interp: 1, 26 | target_size: [[640, 1333], [672, 1333], [704, 1333], [736, 1333], [768, 1333], [800, 1333]], 27 | keep_ratio: True} 28 | - RandomFlip: {} 29 | - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]} 30 | - Permute: {} 31 | batch_transforms: 32 | - PadBatch: {pad_to_stride: 32} 33 | - Gt2Solov2Target: {num_grids: [40, 36, 24, 16, 12], 34 | scale_ranges: [[1, 96], [48, 192], [96, 384], [192, 768], [384, 2048]], 35 | coord_sigma: 0.2} 36 | batch_size: 2 37 | shuffle: true 38 | drop_last: true 39 | -------------------------------------------------------------------------------- /configs/ssd/README.md: -------------------------------------------------------------------------------- 1 | # SSD: Single Shot MultiBox Detector 2 | 3 | ## Model Zoo 4 | 5 | ### SSD on Pascal VOC 6 | 7 | | 骨架网络 | 网络类型 | 每张GPU图片个数 | 学习率策略 |推理时间(fps) | Box AP | 下载 | 配置文件 | 8 | | :-------------- | :------------- | :-----: | :-----: | :------------: | :-----: | :-----------------------------------------------------: | :-----: | 9 | | VGG | SSD | 8 | 240e | ---- | 77.8 | [下载链接](https://paddledet.bj.bcebos.com/models/ssd_vgg16_300_240e_voc.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/ssd/ssd_vgg16_300_240e_voc.yml) | 10 | | MobileNet v1 | SSD | 32 | 120e | ---- | 73.8 | [下载链接](https://paddledet.bj.bcebos.com/models/ssd_mobilenet_v1_300_120e_voc.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/ssd/ssd_mobilenet_v1_300_120e_voc.yml) | 11 | 12 | **注意:** SSD-VGG使用4GPU在总batch size为32下训练240个epoch。SSD-MobileNetv1使用2GPU在总batch size为64下训练120周期。 13 | 14 | ## Citations 15 | ``` 16 | @article{Liu_2016, 17 | title={SSD: Single Shot MultiBox Detector}, 18 | journal={ECCV}, 19 | author={Liu, Wei and Anguelov, Dragomir and Erhan, Dumitru and Szegedy, Christian and Reed, Scott and Fu, Cheng-Yang and Berg, Alexander C.}, 20 | year={2016}, 21 | } 22 | ``` 23 | -------------------------------------------------------------------------------- /configs/ssd/_base_/optimizer_120e.yml: -------------------------------------------------------------------------------- 1 | epoch: 120 2 | 3 | LearningRate: 4 | base_lr: 0.001 5 | schedulers: 6 | - !PiecewiseDecay 7 | milestones: [40, 60, 80, 100] 8 | values: [0.001, 0.0005, 0.00025, 0.0001, 0.00001] 9 | use_warmup: false 10 | 11 | OptimizerBuilder: 12 | optimizer: 13 | momentum: 0.0 14 | type: RMSProp 15 | regularizer: 16 | factor: 0.00005 17 | type: L2 18 | -------------------------------------------------------------------------------- /configs/ssd/_base_/optimizer_1700e.yml: -------------------------------------------------------------------------------- 1 | epoch: 1700 2 | 3 | LearningRate: 4 | base_lr: 0.4 5 | schedulers: 6 | - !CosineDecay 7 | max_epochs: 1700 8 | - !LinearWarmup 9 | start_factor: 0.3333333333333333 10 | steps: 2000 11 | 12 | OptimizerBuilder: 13 | optimizer: 14 | momentum: 0.9 15 | type: Momentum 16 | regularizer: 17 | factor: 0.0005 18 | type: L2 19 | -------------------------------------------------------------------------------- /configs/ssd/_base_/optimizer_240e.yml: -------------------------------------------------------------------------------- 1 | epoch: 240 2 | 3 | LearningRate: 4 | base_lr: 0.001 5 | schedulers: 6 | - !PiecewiseDecay 7 | gamma: 0.1 8 | milestones: 9 | - 160 10 | - 200 11 | - !LinearWarmup 12 | start_factor: 0.3333333333333333 13 | steps: 500 14 | 15 | OptimizerBuilder: 16 | optimizer: 17 | momentum: 0.9 18 | type: Momentum 19 | regularizer: 20 | factor: 0.0005 21 | type: L2 22 | -------------------------------------------------------------------------------- /configs/ssd/_base_/ssd_mobilenet_reader.yml: -------------------------------------------------------------------------------- 1 | worker_num: 8 2 | TrainReader: 3 | inputs_def: 4 | num_max_boxes: 90 5 | sample_transforms: 6 | - Decode: {} 7 | - RandomDistort: {brightness: [0.5, 1.125, 0.875], random_apply: False} 8 | - RandomExpand: {fill_value: [127.5, 127.5, 127.5]} 9 | - RandomCrop: {allow_no_crop: Fasle} 10 | - RandomFlip: {} 11 | - Resize: {target_size: [300, 300], keep_ratio: False, interp: 1} 12 | - NormalizeBox: {} 13 | - PadBox: {num_max_boxes: 90} 14 | batch_transforms: 15 | - NormalizeImage: {mean: [127.5, 127.5, 127.5], std: [127.502231, 127.502231, 127.502231], is_scale: false} 16 | - Permute: {} 17 | batch_size: 32 18 | shuffle: true 19 | drop_last: true 20 | 21 | 22 | EvalReader: 23 | sample_transforms: 24 | - Decode: {} 25 | - Resize: {target_size: [300, 300], keep_ratio: False, interp: 1} 26 | - NormalizeImage: {mean: [127.5, 127.5, 127.5], std: [127.502231, 127.502231, 127.502231], is_scale: false} 27 | - Permute: {} 28 | batch_size: 1 29 | drop_empty: false 30 | 31 | 32 | TestReader: 33 | inputs_def: 34 | image_shape: [3, 300, 300] 35 | sample_transforms: 36 | - Decode: {} 37 | - Resize: {target_size: [300, 300], keep_ratio: False, interp: 1} 38 | - NormalizeImage: {mean: [127.5, 127.5, 127.5], std: [127.502231, 127.502231, 127.502231], is_scale: false} 39 | - Permute: {} 40 | batch_size: 1 41 | -------------------------------------------------------------------------------- /configs/ssd/_base_/ssd_mobilenet_v1_300.yml: -------------------------------------------------------------------------------- 1 | architecture: SSD 2 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ssd_mobilenet_v1_coco_pretrained.pdparams 3 | 4 | SSD: 5 | backbone: MobileNet 6 | ssd_head: SSDHead 7 | post_process: BBoxPostProcess 8 | 9 | MobileNet: 10 | norm_decay: 0. 11 | scale: 1 12 | conv_learning_rate: 0.1 13 | extra_block_filters: [[256, 512], [128, 256], [128, 256], [64, 128]] 14 | with_extra_blocks: true 15 | feature_maps: [11, 13, 14, 15, 16, 17] 16 | 17 | SSDHead: 18 | kernel_size: 1 19 | padding: 0 20 | anchor_generator: 21 | steps: [0, 0, 0, 0, 0, 0] 22 | aspect_ratios: [[2.], [2., 3.], [2., 3.], [2., 3.], [2., 3.], [2., 3.]] 23 | min_ratio: 20 24 | max_ratio: 90 25 | base_size: 300 26 | min_sizes: [60.0, 105.0, 150.0, 195.0, 240.0, 285.0] 27 | max_sizes: [[], 150.0, 195.0, 240.0, 285.0, 300.0] 28 | offset: 0.5 29 | flip: true 30 | min_max_aspect_ratios_order: false 31 | 32 | BBoxPostProcess: 33 | decode: 34 | name: SSDBox 35 | nms: 36 | name: MultiClassNMS 37 | keep_top_k: 200 38 | score_threshold: 0.01 39 | nms_threshold: 0.45 40 | nms_top_k: 400 41 | nms_eta: 1.0 42 | -------------------------------------------------------------------------------- /configs/ssd/_base_/ssd_reader.yml: -------------------------------------------------------------------------------- 1 | worker_num: 2 2 | TrainReader: 3 | inputs_def: 4 | num_max_boxes: 90 5 | 6 | sample_transforms: 7 | - Decode: {} 8 | - RandomDistort: {brightness: [0.5, 1.125, 0.875], random_apply: False} 9 | - RandomExpand: {fill_value: [104., 117., 123.]} 10 | - RandomCrop: {allow_no_crop: true} 11 | - RandomFlip: {} 12 | - Resize: {target_size: [300, 300], keep_ratio: False, interp: 1} 13 | - NormalizeBox: {} 14 | - PadBox: {num_max_boxes: 90} 15 | 16 | batch_transforms: 17 | - NormalizeImage: {mean: [104., 117., 123.], std: [1., 1., 1.], is_scale: false} 18 | - Permute: {} 19 | 20 | batch_size: 8 21 | shuffle: true 22 | drop_last: true 23 | 24 | 25 | EvalReader: 26 | sample_transforms: 27 | - Decode: {} 28 | - Resize: {target_size: [300, 300], keep_ratio: False, interp: 1} 29 | - NormalizeImage: {mean: [104., 117., 123.], std: [1., 1., 1.], is_scale: false} 30 | - Permute: {} 31 | batch_size: 1 32 | drop_empty: false 33 | 34 | TestReader: 35 | inputs_def: 36 | image_shape: [3, 300, 300] 37 | sample_transforms: 38 | - Decode: {} 39 | - Resize: {target_size: [300, 300], keep_ratio: False, interp: 1} 40 | - NormalizeImage: {mean: [104., 117., 123.], std: [1., 1., 1.], is_scale: false} 41 | - Permute: {} 42 | batch_size: 1 43 | -------------------------------------------------------------------------------- /configs/ssd/_base_/ssd_vgg16_300.yml: -------------------------------------------------------------------------------- 1 | architecture: SSD 2 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/VGG16_caffe_pretrained.pdparams 3 | 4 | # Model Achitecture 5 | SSD: 6 | # model feat info flow 7 | backbone: VGG 8 | ssd_head: SSDHead 9 | # post process 10 | post_process: BBoxPostProcess 11 | 12 | VGG: 13 | depth: 16 14 | normalizations: [20., -1, -1, -1, -1, -1] 15 | 16 | SSDHead: 17 | anchor_generator: 18 | steps: [8, 16, 32, 64, 100, 300] 19 | aspect_ratios: [[2.], [2., 3.], [2., 3.], [2., 3.], [2.], [2.]] 20 | min_ratio: 20 21 | max_ratio: 90 22 | min_sizes: [30.0, 60.0, 111.0, 162.0, 213.0, 264.0] 23 | max_sizes: [60.0, 111.0, 162.0, 213.0, 264.0, 315.0] 24 | offset: 0.5 25 | flip: true 26 | min_max_aspect_ratios_order: true 27 | 28 | BBoxPostProcess: 29 | decode: 30 | name: SSDBox 31 | nms: 32 | name: MultiClassNMS 33 | keep_top_k: 200 34 | score_threshold: 0.01 35 | nms_threshold: 0.45 36 | nms_top_k: 400 37 | nms_eta: 1.0 38 | -------------------------------------------------------------------------------- /configs/ssd/_base_/ssdlite300_reader.yml: -------------------------------------------------------------------------------- 1 | worker_num: 8 2 | TrainReader: 3 | inputs_def: 4 | num_max_boxes: 90 5 | sample_transforms: 6 | - Decode: {} 7 | - RandomDistort: {brightness: [0.5, 1.125, 0.875], random_apply: False} 8 | - RandomExpand: {fill_value: [123.675, 116.28, 103.53]} 9 | - RandomCrop: {allow_no_crop: Fasle} 10 | - RandomFlip: {} 11 | - Resize: {target_size: [300, 300], keep_ratio: False, interp: 1} 12 | - NormalizeBox: {} 13 | - PadBox: {num_max_boxes: 90} 14 | batch_transforms: 15 | - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: true} 16 | - Permute: {} 17 | batch_size: 64 18 | shuffle: true 19 | drop_last: true 20 | 21 | 22 | EvalReader: 23 | sample_transforms: 24 | - Decode: {} 25 | - Resize: {target_size: [300, 300], keep_ratio: False, interp: 1} 26 | - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: true} 27 | - Permute: {} 28 | batch_size: 1 29 | drop_empty: false 30 | 31 | 32 | TestReader: 33 | inputs_def: 34 | image_shape: [3, 300, 300] 35 | sample_transforms: 36 | - Decode: {} 37 | - Resize: {target_size: [300, 300], keep_ratio: False, interp: 1} 38 | - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: true} 39 | - Permute: {} 40 | batch_size: 1 41 | -------------------------------------------------------------------------------- /configs/ssd/_base_/ssdlite320_reader.yml: -------------------------------------------------------------------------------- 1 | worker_num: 8 2 | TrainReader: 3 | inputs_def: 4 | num_max_boxes: 90 5 | sample_transforms: 6 | - Decode: {} 7 | - RandomDistort: {brightness: [0.5, 1.125, 0.875], random_apply: False} 8 | - RandomExpand: {fill_value: [123.675, 116.28, 103.53]} 9 | - RandomCrop: {allow_no_crop: Fasle} 10 | - RandomFlip: {} 11 | - Resize: {target_size: [320, 320], keep_ratio: False, interp: 1} 12 | - NormalizeBox: {} 13 | - PadBox: {num_max_boxes: 90} 14 | batch_transforms: 15 | - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: true} 16 | - Permute: {} 17 | batch_size: 64 18 | shuffle: true 19 | drop_last: true 20 | 21 | 22 | EvalReader: 23 | sample_transforms: 24 | - Decode: {} 25 | - Resize: {target_size: [320, 320], keep_ratio: False, interp: 1} 26 | - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: true} 27 | - Permute: {} 28 | batch_size: 1 29 | drop_empty: false 30 | 31 | 32 | TestReader: 33 | inputs_def: 34 | image_shape: [3, 320, 320] 35 | sample_transforms: 36 | - Decode: {} 37 | - Resize: {target_size: [320, 320], keep_ratio: False, interp: 1} 38 | - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: true} 39 | - Permute: {} 40 | batch_size: 1 41 | -------------------------------------------------------------------------------- /configs/ssd/_base_/ssdlite_ghostnet_320.yml: -------------------------------------------------------------------------------- 1 | architecture: SSD 2 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/GhostNet_x1_3_ssld_pretrained.pdparams 3 | 4 | SSD: 5 | backbone: GhostNet 6 | ssd_head: SSDHead 7 | post_process: BBoxPostProcess 8 | 9 | GhostNet: 10 | scale: 1.3 11 | conv_decay: 0.00004 12 | with_extra_blocks: true 13 | extra_block_filters: [[256, 512], [128, 256], [128, 256], [64, 128]] 14 | feature_maps: [13, 18, 19, 20, 21, 22] 15 | lr_mult_list: [0.25, 0.25, 0.5, 0.5, 0.75] 16 | 17 | SSDHead: 18 | use_sepconv: True 19 | conv_decay: 0.00004 20 | anchor_generator: 21 | steps: [16, 32, 64, 107, 160, 320] 22 | aspect_ratios: [[2.], [2., 3.], [2., 3.], [2., 3.], [2., 3.], [2., 3.]] 23 | min_ratio: 20 24 | max_ratio: 95 25 | base_size: 320 26 | min_sizes: [] 27 | max_sizes: [] 28 | offset: 0.5 29 | flip: true 30 | clip: true 31 | min_max_aspect_ratios_order: false 32 | 33 | BBoxPostProcess: 34 | decode: 35 | name: SSDBox 36 | nms: 37 | name: MultiClassNMS 38 | keep_top_k: 200 39 | score_threshold: 0.01 40 | nms_threshold: 0.45 41 | nms_top_k: 400 42 | nms_eta: 1.0 43 | -------------------------------------------------------------------------------- /configs/ssd/_base_/ssdlite_mobilenet_v1_300.yml: -------------------------------------------------------------------------------- 1 | architecture: SSD 2 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/MobileNetV1_ssld_pretrained.pdparams 3 | 4 | SSD: 5 | backbone: MobileNet 6 | ssd_head: SSDHead 7 | post_process: BBoxPostProcess 8 | 9 | MobileNet: 10 | conv_decay: 0.00004 11 | scale: 1 12 | extra_block_filters: [[256, 512], [128, 256], [128, 256], [64, 128]] 13 | with_extra_blocks: true 14 | feature_maps: [11, 13, 14, 15, 16, 17] 15 | 16 | SSDHead: 17 | use_sepconv: True 18 | conv_decay: 0.00004 19 | anchor_generator: 20 | steps: [16, 32, 64, 100, 150, 300] 21 | aspect_ratios: [[2.], [2., 3.], [2., 3.], [2., 3.], [2., 3.], [2., 3.]] 22 | min_ratio: 20 23 | max_ratio: 95 24 | base_size: 300 25 | min_sizes: [] 26 | max_sizes: [] 27 | offset: 0.5 28 | flip: true 29 | clip: true 30 | min_max_aspect_ratios_order: False 31 | 32 | BBoxPostProcess: 33 | decode: 34 | name: SSDBox 35 | nms: 36 | name: MultiClassNMS 37 | keep_top_k: 200 38 | score_threshold: 0.01 39 | nms_threshold: 0.45 40 | nms_top_k: 400 41 | nms_eta: 1.0 42 | -------------------------------------------------------------------------------- /configs/ssd/_base_/ssdlite_mobilenet_v3_large_320.yml: -------------------------------------------------------------------------------- 1 | architecture: SSD 2 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/MobileNetV3_large_x1_0_ssld_pretrained.pdparams 3 | 4 | SSD: 5 | backbone: MobileNetV3 6 | ssd_head: SSDHead 7 | post_process: BBoxPostProcess 8 | 9 | MobileNetV3: 10 | scale: 1.0 11 | model_name: large 12 | conv_decay: 0.00004 13 | with_extra_blocks: true 14 | extra_block_filters: [[256, 512], [128, 256], [128, 256], [64, 128]] 15 | feature_maps: [14, 17, 18, 19, 20, 21] 16 | lr_mult_list: [0.25, 0.25, 0.5, 0.5, 0.75] 17 | multiplier: 0.5 18 | 19 | SSDHead: 20 | use_sepconv: True 21 | conv_decay: 0.00004 22 | anchor_generator: 23 | steps: [16, 32, 64, 107, 160, 320] 24 | aspect_ratios: [[2.], [2., 3.], [2., 3.], [2., 3.], [2., 3.], [2., 3.]] 25 | min_ratio: 20 26 | max_ratio: 95 27 | base_size: 320 28 | min_sizes: [] 29 | max_sizes: [] 30 | offset: 0.5 31 | flip: true 32 | clip: true 33 | min_max_aspect_ratios_order: false 34 | 35 | BBoxPostProcess: 36 | decode: 37 | name: SSDBox 38 | nms: 39 | name: MultiClassNMS 40 | keep_top_k: 200 41 | score_threshold: 0.01 42 | nms_threshold: 0.45 43 | nms_top_k: 400 44 | nms_eta: 1.0 45 | -------------------------------------------------------------------------------- /configs/ssd/_base_/ssdlite_mobilenet_v3_small_320.yml: -------------------------------------------------------------------------------- 1 | architecture: SSD 2 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/MobileNetV3_small_x1_0_ssld_pretrained.pdparams 3 | 4 | SSD: 5 | backbone: MobileNetV3 6 | ssd_head: SSDHead 7 | post_process: BBoxPostProcess 8 | 9 | MobileNetV3: 10 | scale: 1.0 11 | model_name: small 12 | conv_decay: 0.00004 13 | with_extra_blocks: true 14 | extra_block_filters: [[256, 512], [128, 256], [128, 256], [64, 128]] 15 | feature_maps: [10, 13, 14, 15, 16, 17] 16 | lr_mult_list: [0.25, 0.25, 0.5, 0.5, 0.75] 17 | multiplier: 0.5 18 | 19 | SSDHead: 20 | use_sepconv: True 21 | conv_decay: 0.00004 22 | anchor_generator: 23 | steps: [16, 32, 64, 107, 160, 320] 24 | aspect_ratios: [[2.], [2., 3.], [2., 3.], [2., 3.], [2., 3.], [2., 3.]] 25 | min_ratio: 20 26 | max_ratio: 95 27 | base_size: 320 28 | min_sizes: [] 29 | max_sizes: [] 30 | offset: 0.5 31 | flip: true 32 | clip: true 33 | min_max_aspect_ratios_order: false 34 | 35 | BBoxPostProcess: 36 | decode: 37 | name: SSDBox 38 | nms: 39 | name: MultiClassNMS 40 | keep_top_k: 200 41 | score_threshold: 0.01 42 | nms_threshold: 0.45 43 | nms_top_k: 400 44 | nms_eta: 1.0 45 | -------------------------------------------------------------------------------- /configs/ssd/ssd_mobilenet_v1_300_120e_voc.yml: -------------------------------------------------------------------------------- 1 | _BASE_: [ 2 | '../datasets/voc.yml', 3 | '../runtime.yml', 4 | '_base_/optimizer_120e.yml', 5 | '_base_/ssd_mobilenet_v1_300.yml', 6 | '_base_/ssd_mobilenet_reader.yml', 7 | ] 8 | weights: output/ssd_mobilenet_v1_300_120e_voc/model_final 9 | 10 | EvalReader: 11 | batch_transforms: 12 | - PadBatch: {pad_gt: True} 13 | -------------------------------------------------------------------------------- /configs/ssd/ssd_vgg16_300_240e_voc.yml: -------------------------------------------------------------------------------- 1 | _BASE_: [ 2 | '../datasets/voc.yml', 3 | '../runtime.yml', 4 | '_base_/optimizer_240e.yml', 5 | '_base_/ssd_vgg16_300.yml', 6 | '_base_/ssd_reader.yml', 7 | ] 8 | weights: output/ssd_vgg16_300_240e_voc/model_final 9 | 10 | EvalReader: 11 | batch_transforms: 12 | - PadBatch: {pad_gt: True} 13 | -------------------------------------------------------------------------------- /configs/ssd/ssdlite_ghostnet_320_coco.yml: -------------------------------------------------------------------------------- 1 | _BASE_: [ 2 | '../datasets/coco_detection.yml', 3 | '../runtime.yml', 4 | '_base_/optimizer_1700e.yml', 5 | '_base_/ssdlite_ghostnet_320.yml', 6 | '_base_/ssdlite320_reader.yml', 7 | ] 8 | weights: output/ssdlite_ghostnet_320_coco/model_final 9 | 10 | epoch: 1700 11 | 12 | LearningRate: 13 | base_lr: 0.2 14 | schedulers: 15 | - !CosineDecay 16 | max_epochs: 1700 17 | - !LinearWarmup 18 | start_factor: 0.33333 19 | steps: 2000 20 | 21 | OptimizerBuilder: 22 | optimizer: 23 | momentum: 0.9 24 | type: Momentum 25 | regularizer: 26 | factor: 0.0005 27 | type: L2 28 | -------------------------------------------------------------------------------- /configs/ssd/ssdlite_mobilenet_v1_300_coco.yml: -------------------------------------------------------------------------------- 1 | _BASE_: [ 2 | '../datasets/coco_detection.yml', 3 | '../runtime.yml', 4 | '_base_/optimizer_1700e.yml', 5 | '_base_/ssdlite_mobilenet_v1_300.yml', 6 | '_base_/ssdlite300_reader.yml', 7 | ] 8 | weights: output/ssdlite_mobilenet_v1_300_coco/model_final 9 | -------------------------------------------------------------------------------- /configs/ssd/ssdlite_mobilenet_v3_large_320_coco.yml: -------------------------------------------------------------------------------- 1 | _BASE_: [ 2 | '../datasets/coco_detection.yml', 3 | '../runtime.yml', 4 | '_base_/optimizer_1700e.yml', 5 | '_base_/ssdlite_mobilenet_v3_large_320.yml', 6 | '_base_/ssdlite320_reader.yml', 7 | ] 8 | weights: output/ssdlite_mobilenet_v3_large_320_coco/model_final 9 | -------------------------------------------------------------------------------- /configs/ssd/ssdlite_mobilenet_v3_small_320_coco.yml: -------------------------------------------------------------------------------- 1 | _BASE_: [ 2 | '../datasets/coco_detection.yml', 3 | '../runtime.yml', 4 | '_base_/optimizer_1700e.yml', 5 | '_base_/ssdlite_mobilenet_v3_small_320.yml', 6 | '_base_/ssdlite320_reader.yml', 7 | ] 8 | weights: output/ssdlite_mobilenet_v3_small_320_coco/model_final 9 | -------------------------------------------------------------------------------- /configs/ttfnet/_base_/optimizer_10x.yml: -------------------------------------------------------------------------------- 1 | epoch: 120 2 | 3 | LearningRate: 4 | base_lr: 0.015 5 | schedulers: 6 | - !PiecewiseDecay 7 | gamma: 0.1 8 | milestones: [80, 110] 9 | - !LinearWarmup 10 | start_factor: 0.2 11 | steps: 500 12 | 13 | OptimizerBuilder: 14 | optimizer: 15 | momentum: 0.9 16 | type: Momentum 17 | regularizer: 18 | factor: 0.0004 19 | type: L2 20 | -------------------------------------------------------------------------------- /configs/ttfnet/_base_/optimizer_1x.yml: -------------------------------------------------------------------------------- 1 | epoch: 12 2 | 3 | LearningRate: 4 | base_lr: 0.015 5 | schedulers: 6 | - !PiecewiseDecay 7 | gamma: 0.1 8 | milestones: [8, 11] 9 | - !LinearWarmup 10 | start_factor: 0.2 11 | steps: 500 12 | 13 | OptimizerBuilder: 14 | optimizer: 15 | momentum: 0.9 16 | type: Momentum 17 | regularizer: 18 | factor: 0.0004 19 | type: L2 20 | -------------------------------------------------------------------------------- /configs/ttfnet/_base_/optimizer_20x.yml: -------------------------------------------------------------------------------- 1 | epoch: 240 2 | 3 | LearningRate: 4 | base_lr: 0.015 5 | schedulers: 6 | - !PiecewiseDecay 7 | gamma: 0.1 8 | milestones: [160, 220] 9 | - !LinearWarmup 10 | start_factor: 0.2 11 | steps: 1000 12 | 13 | OptimizerBuilder: 14 | clip_grad_by_norm: 35 15 | optimizer: 16 | momentum: 0.9 17 | type: Momentum 18 | regularizer: 19 | factor: 0.0004 20 | type: L2 21 | -------------------------------------------------------------------------------- /configs/ttfnet/_base_/pafnet.yml: -------------------------------------------------------------------------------- 1 | architecture: TTFNet 2 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_ssld_pretrained.pdparams 3 | norm_type: sync_bn 4 | use_ema: true 5 | ema_decay: 0.9998 6 | 7 | TTFNet: 8 | backbone: ResNet 9 | neck: TTFFPN 10 | ttf_head: TTFHead 11 | post_process: BBoxPostProcess 12 | 13 | ResNet: 14 | depth: 50 15 | variant: d 16 | return_idx: [0, 1, 2, 3] 17 | freeze_at: -1 18 | norm_decay: 0. 19 | variant: d 20 | dcn_v2_stages: [1, 2, 3] 21 | 22 | TTFFPN: 23 | planes: [256, 128, 64] 24 | shortcut_num: [3, 2, 1] 25 | 26 | TTFHead: 27 | dcn_head: true 28 | hm_loss: 29 | name: CTFocalLoss 30 | loss_weight: 1. 31 | wh_loss: 32 | name: GIoULoss 33 | loss_weight: 5. 34 | reduction: sum 35 | 36 | BBoxPostProcess: 37 | decode: 38 | name: TTFBox 39 | max_per_img: 100 40 | score_thresh: 0.01 41 | down_ratio: 4 42 | -------------------------------------------------------------------------------- /configs/ttfnet/_base_/pafnet_lite.yml: -------------------------------------------------------------------------------- 1 | architecture: TTFNet 2 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/MobileNetV3_large_x1_0_ssld_pretrained.pdparams 3 | norm_type: sync_bn 4 | 5 | TTFNet: 6 | backbone: MobileNetV3 7 | neck: TTFFPN 8 | ttf_head: TTFHead 9 | post_process: BBoxPostProcess 10 | 11 | MobileNetV3: 12 | scale: 1.0 13 | model_name: large 14 | feature_maps: [5, 8, 14, 17] 15 | with_extra_blocks: true 16 | lr_mult_list: [0.25, 0.25, 0.5, 0.5, 0.75] 17 | conv_decay: 0.00001 18 | norm_decay: 0.0 19 | extra_block_filters: [] 20 | 21 | TTFFPN: 22 | planes: [96, 48, 24] 23 | shortcut_num: [2, 2, 1] 24 | lite_neck: true 25 | fusion_method: concat 26 | 27 | TTFHead: 28 | hm_head_planes: 48 29 | wh_head_planes: 24 30 | lite_head: true 31 | hm_loss: 32 | name: CTFocalLoss 33 | loss_weight: 1. 34 | wh_loss: 35 | name: GIoULoss 36 | loss_weight: 5. 37 | reduction: sum 38 | 39 | BBoxPostProcess: 40 | decode: 41 | name: TTFBox 42 | max_per_img: 100 43 | score_thresh: 0.01 44 | down_ratio: 4 45 | -------------------------------------------------------------------------------- /configs/ttfnet/_base_/pafnet_lite_reader.yml: -------------------------------------------------------------------------------- 1 | worker_num: 2 2 | TrainReader: 3 | sample_transforms: 4 | - Decode: {} 5 | - Cutmix: {alpha: 1.5, beta: 1.5} 6 | - RandomDistort: {} 7 | - RandomExpand: {fill_value: [123.675, 116.28, 103.53]} 8 | - RandomCrop: {aspect_ratio: NULL, cover_all_box: True} 9 | - RandomFlip: {} 10 | - GridMask: {upper_iter: 300000} 11 | batch_transforms: 12 | - BatchRandomResize: {target_size: [320, 352, 384, 416, 448, 480, 512], random_interp: True, keep_ratio: False} 13 | - NormalizeImage: {mean: [123.675, 116.28, 103.53], std: [58.395, 57.12, 57.375], is_scale: false} 14 | - Permute: {} 15 | - Gt2TTFTarget: {down_ratio: 4} 16 | - PadBatch: {pad_to_stride: 32} 17 | batch_size: 12 18 | shuffle: true 19 | drop_last: true 20 | use_shared_memory: true 21 | 22 | EvalReader: 23 | sample_transforms: 24 | - Decode: {} 25 | - Resize: {interp: 1, target_size: [320, 320], keep_ratio: False} 26 | - NormalizeImage: {is_scale: false, mean: [123.675, 116.28, 103.53], std: [58.395, 57.12, 57.375]} 27 | - Permute: {} 28 | batch_size: 1 29 | drop_last: false 30 | drop_empty: false 31 | 32 | TestReader: 33 | sample_transforms: 34 | - Decode: {} 35 | - Resize: {interp: 1, target_size: [320, 320], keep_ratio: False} 36 | - NormalizeImage: {is_scale: false, mean: [123.675, 116.28, 103.53], std: [58.395, 57.12, 57.375]} 37 | - Permute: {} 38 | batch_size: 1 39 | drop_last: false 40 | drop_empty: false 41 | -------------------------------------------------------------------------------- /configs/ttfnet/_base_/pafnet_reader.yml: -------------------------------------------------------------------------------- 1 | worker_num: 2 2 | TrainReader: 3 | sample_transforms: 4 | - Decode: {} 5 | - Cutmix: {alpha: 1.5, beta: 1.5} 6 | - RandomDistort: {random_apply: false, random_channel: true} 7 | - RandomExpand: {fill_value: [123.675, 116.28, 103.53]} 8 | - RandomCrop: {aspect_ratio: NULL, cover_all_box: True} 9 | - RandomFlip: {prob: 0.5} 10 | batch_transforms: 11 | - BatchRandomResize: {target_size: [416, 448, 480, 512, 544, 576, 608, 640, 672], keep_ratio: false} 12 | - NormalizeImage: {mean: [123.675, 116.28, 103.53], std: [58.395, 57.12, 57.375], is_scale: false} 13 | - Permute: {} 14 | - Gt2TTFTarget: {down_ratio: 4} 15 | - PadBatch: {pad_to_stride: 32} 16 | batch_size: 18 17 | shuffle: true 18 | drop_last: true 19 | use_shared_memory: true 20 | mixup_epoch: 100 21 | 22 | EvalReader: 23 | sample_transforms: 24 | - Decode: {} 25 | - Resize: {interp: 1, target_size: [512, 512], keep_ratio: False} 26 | - NormalizeImage: {is_scale: false, mean: [123.675, 116.28, 103.53], std: [58.395, 57.12, 57.375]} 27 | - Permute: {} 28 | batch_size: 1 29 | drop_last: false 30 | drop_empty: false 31 | 32 | TestReader: 33 | sample_transforms: 34 | - Decode: {} 35 | - Resize: {interp: 1, target_size: [512, 512], keep_ratio: False} 36 | - NormalizeImage: {is_scale: false, mean: [123.675, 116.28, 103.53], std: [58.395, 57.12, 57.375]} 37 | - Permute: {} 38 | batch_size: 1 39 | drop_last: false 40 | drop_empty: false 41 | -------------------------------------------------------------------------------- /configs/ttfnet/_base_/ttfnet_darknet53.yml: -------------------------------------------------------------------------------- 1 | architecture: TTFNet 2 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/DarkNet53_pretrained.pdparams 3 | 4 | TTFNet: 5 | backbone: DarkNet 6 | neck: TTFFPN 7 | ttf_head: TTFHead 8 | post_process: BBoxPostProcess 9 | 10 | DarkNet: 11 | depth: 53 12 | freeze_at: 0 13 | return_idx: [1, 2, 3, 4] 14 | norm_type: bn 15 | norm_decay: 0.0004 16 | 17 | TTFFPN: 18 | planes: [256, 128, 64] 19 | shortcut_num: [3, 2, 1] 20 | 21 | TTFHead: 22 | hm_loss: 23 | name: CTFocalLoss 24 | loss_weight: 1. 25 | wh_loss: 26 | name: GIoULoss 27 | loss_weight: 5. 28 | reduction: sum 29 | 30 | BBoxPostProcess: 31 | decode: 32 | name: TTFBox 33 | max_per_img: 100 34 | score_thresh: 0.01 35 | down_ratio: 4 36 | -------------------------------------------------------------------------------- /configs/ttfnet/_base_/ttfnet_reader.yml: -------------------------------------------------------------------------------- 1 | worker_num: 2 2 | TrainReader: 3 | sample_transforms: 4 | - Decode: {} 5 | - RandomFlip: {prob: 0.5} 6 | - Resize: {interp: 1, target_size: [512, 512], keep_ratio: False} 7 | - NormalizeImage: {mean: [123.675, 116.28, 103.53], std: [58.395, 57.12, 57.375], is_scale: false} 8 | - Permute: {} 9 | batch_transforms: 10 | - Gt2TTFTarget: {down_ratio: 4} 11 | - PadBatch: {pad_to_stride: 32} 12 | batch_size: 12 13 | shuffle: true 14 | drop_last: true 15 | use_shared_memory: true 16 | 17 | EvalReader: 18 | sample_transforms: 19 | - Decode: {} 20 | - Resize: {interp: 1, target_size: [512, 512], keep_ratio: False} 21 | - NormalizeImage: {is_scale: false, mean: [123.675, 116.28, 103.53], std: [58.395, 57.12, 57.375]} 22 | - Permute: {} 23 | batch_size: 1 24 | drop_last: false 25 | drop_empty: false 26 | 27 | TestReader: 28 | sample_transforms: 29 | - Decode: {} 30 | - Resize: {interp: 1, target_size: [512, 512], keep_ratio: False} 31 | - NormalizeImage: {is_scale: false, mean: [123.675, 116.28, 103.53], std: [58.395, 57.12, 57.375]} 32 | - Permute: {} 33 | batch_size: 1 34 | drop_last: false 35 | drop_empty: false 36 | -------------------------------------------------------------------------------- /configs/ttfnet/pafnet_10x_coco.yml: -------------------------------------------------------------------------------- 1 | _BASE_: [ 2 | '../datasets/coco_detection.yml', 3 | '../runtime.yml', 4 | '_base_/optimizer_10x.yml', 5 | '_base_/pafnet.yml', 6 | '_base_/pafnet_reader.yml', 7 | ] 8 | weights: output/pafnet_10x_coco/model_final 9 | -------------------------------------------------------------------------------- /configs/ttfnet/pafnet_lite_mobilenet_v3_20x_coco.yml: -------------------------------------------------------------------------------- 1 | _BASE_: [ 2 | '../datasets/coco_detection.yml', 3 | '../runtime.yml', 4 | '_base_/optimizer_20x.yml', 5 | '_base_/pafnet_lite.yml', 6 | '_base_/pafnet_lite_reader.yml', 7 | ] 8 | weights: output/pafnet_lite_mobilenet_v3_10x_coco/model_final 9 | -------------------------------------------------------------------------------- /configs/ttfnet/ttfnet_darknet53_1x_coco.yml: -------------------------------------------------------------------------------- 1 | _BASE_: [ 2 | '../datasets/coco_detection.yml', 3 | '../runtime.yml', 4 | '_base_/optimizer_1x.yml', 5 | '_base_/ttfnet_darknet53.yml', 6 | '_base_/ttfnet_reader.yml', 7 | ] 8 | weights: output/ttfnet_darknet53_1x_coco/model_final 9 | -------------------------------------------------------------------------------- /configs/vehicle/demo/001.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dddlli/Swin-Transformer-Object-Detection-PaddlePaddle/67244dd49a523146b4bd33e0ed5b97072c727609/configs/vehicle/demo/001.jpeg -------------------------------------------------------------------------------- /configs/vehicle/demo/003.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dddlli/Swin-Transformer-Object-Detection-PaddlePaddle/67244dd49a523146b4bd33e0ed5b97072c727609/configs/vehicle/demo/003.png -------------------------------------------------------------------------------- /configs/vehicle/demo/004.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dddlli/Swin-Transformer-Object-Detection-PaddlePaddle/67244dd49a523146b4bd33e0ed5b97072c727609/configs/vehicle/demo/004.png -------------------------------------------------------------------------------- /configs/vehicle/demo/005.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dddlli/Swin-Transformer-Object-Detection-PaddlePaddle/67244dd49a523146b4bd33e0ed5b97072c727609/configs/vehicle/demo/005.png -------------------------------------------------------------------------------- /configs/vehicle/vehicle.json: -------------------------------------------------------------------------------- 1 | { 2 | "images": [], 3 | "annotations": [], 4 | "categories": [ 5 | { 6 | "supercategory": "component", 7 | "id": 1, 8 | "name": "car" 9 | }, 10 | { 11 | "supercategory": "component", 12 | "id": 2, 13 | "name": "truck" 14 | }, 15 | { 16 | "supercategory": "component", 17 | "id": 3, 18 | "name": "bus" 19 | }, 20 | { 21 | "supercategory": "component", 22 | "id": 4, 23 | "name": "motorbike" 24 | }, 25 | { 26 | "supercategory": "component", 27 | "id": 5, 28 | "name": "tricycle" 29 | }, 30 | { 31 | "supercategory": "component", 32 | "id": 6, 33 | "name": "carplate" 34 | } 35 | ] 36 | } 37 | -------------------------------------------------------------------------------- /configs/vehicle/vehicle_yolov3_darknet.yml: -------------------------------------------------------------------------------- 1 | _BASE_: [ 2 | '../datasets/coco_detection.yml', 3 | '../runtime.yml', 4 | '../yolov3/_base_/optimizer_270e.yml', 5 | '../yolov3/_base_/yolov3_darknet53.yml', 6 | '../yolov3/_base_/yolov3_reader.yml', 7 | ] 8 | 9 | snapshot_epoch: 5 10 | weights: https://paddledet.bj.bcebos.com/models/vehicle_yolov3_darknet.pdparams 11 | 12 | YOLOv3Head: 13 | anchors: [[8, 9], [10, 23], [19, 15], 14 | [23, 33], [40, 25], [54, 50], 15 | [101, 80], [139, 145], [253, 224]] 16 | 17 | BBoxPostProcess: 18 | nms: 19 | name: MultiClassNMS 20 | keep_top_k: 100 21 | score_threshold: 0.005 22 | nms_threshold: 0.45 23 | nms_top_k: 400 24 | 25 | num_classes: 6 26 | 27 | TrainDataset: 28 | !COCODataSet 29 | dataset_dir: dataset/vehicle 30 | anno_path: annotations/instances_train2017.json 31 | image_dir: train2017 32 | data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd'] 33 | 34 | EvalDataset: 35 | !COCODataSet 36 | dataset_dir: dataset/vehicle 37 | anno_path: annotations/instances_val2017.json 38 | image_dir: val2017 39 | 40 | TestDataset: 41 | !ImageFolder 42 | anno_path: configs/vehicle/vehicle.json 43 | -------------------------------------------------------------------------------- /configs/yolov3/_base_/optimizer_270e.yml: -------------------------------------------------------------------------------- 1 | epoch: 270 2 | 3 | LearningRate: 4 | base_lr: 0.001 5 | schedulers: 6 | - !PiecewiseDecay 7 | gamma: 0.1 8 | milestones: 9 | - 216 10 | - 243 11 | - !LinearWarmup 12 | start_factor: 0. 13 | steps: 4000 14 | 15 | OptimizerBuilder: 16 | optimizer: 17 | momentum: 0.9 18 | type: Momentum 19 | regularizer: 20 | factor: 0.0005 21 | type: L2 22 | -------------------------------------------------------------------------------- /configs/yolov3/_base_/yolov3_darknet53.yml: -------------------------------------------------------------------------------- 1 | architecture: YOLOv3 2 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/DarkNet53_pretrained.pdparams 3 | norm_type: sync_bn 4 | 5 | YOLOv3: 6 | backbone: DarkNet 7 | neck: YOLOv3FPN 8 | yolo_head: YOLOv3Head 9 | post_process: BBoxPostProcess 10 | 11 | DarkNet: 12 | depth: 53 13 | return_idx: [2, 3, 4] 14 | 15 | # use default config 16 | # YOLOv3FPN: 17 | 18 | YOLOv3Head: 19 | anchors: [[10, 13], [16, 30], [33, 23], 20 | [30, 61], [62, 45], [59, 119], 21 | [116, 90], [156, 198], [373, 326]] 22 | anchor_masks: [[6, 7, 8], [3, 4, 5], [0, 1, 2]] 23 | loss: YOLOv3Loss 24 | 25 | YOLOv3Loss: 26 | ignore_thresh: 0.7 27 | downsample: [32, 16, 8] 28 | label_smooth: false 29 | 30 | BBoxPostProcess: 31 | decode: 32 | name: YOLOBox 33 | conf_thresh: 0.005 34 | downsample_ratio: 32 35 | clip_bbox: true 36 | nms: 37 | name: MultiClassNMS 38 | keep_top_k: 100 39 | score_threshold: 0.01 40 | nms_threshold: 0.45 41 | nms_top_k: 1000 42 | -------------------------------------------------------------------------------- /configs/yolov3/_base_/yolov3_mobilenet_v1.yml: -------------------------------------------------------------------------------- 1 | architecture: YOLOv3 2 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/MobileNetV1_pretrained.pdparams 3 | norm_type: sync_bn 4 | 5 | YOLOv3: 6 | backbone: MobileNet 7 | neck: YOLOv3FPN 8 | yolo_head: YOLOv3Head 9 | post_process: BBoxPostProcess 10 | 11 | MobileNet: 12 | scale: 1 13 | feature_maps: [4, 6, 13] 14 | with_extra_blocks: false 15 | extra_block_filters: [] 16 | 17 | # use default config 18 | # YOLOv3FPN: 19 | 20 | YOLOv3Head: 21 | anchors: [[10, 13], [16, 30], [33, 23], 22 | [30, 61], [62, 45], [59, 119], 23 | [116, 90], [156, 198], [373, 326]] 24 | anchor_masks: [[6, 7, 8], [3, 4, 5], [0, 1, 2]] 25 | loss: YOLOv3Loss 26 | 27 | YOLOv3Loss: 28 | ignore_thresh: 0.7 29 | downsample: [32, 16, 8] 30 | label_smooth: false 31 | 32 | BBoxPostProcess: 33 | decode: 34 | name: YOLOBox 35 | conf_thresh: 0.005 36 | downsample_ratio: 32 37 | clip_bbox: true 38 | nms: 39 | name: MultiClassNMS 40 | keep_top_k: 100 41 | score_threshold: 0.01 42 | nms_threshold: 0.45 43 | nms_top_k: 1000 44 | -------------------------------------------------------------------------------- /configs/yolov3/_base_/yolov3_mobilenet_v3_large.yml: -------------------------------------------------------------------------------- 1 | architecture: YOLOv3 2 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/MobileNetV3_large_x1_0_ssld_pretrained.pdparams 3 | norm_type: sync_bn 4 | 5 | YOLOv3: 6 | backbone: MobileNetV3 7 | neck: YOLOv3FPN 8 | yolo_head: YOLOv3Head 9 | post_process: BBoxPostProcess 10 | 11 | MobileNetV3: 12 | model_name: large 13 | scale: 1. 14 | with_extra_blocks: false 15 | extra_block_filters: [] 16 | feature_maps: [7, 13, 16] 17 | 18 | # use default config 19 | # YOLOv3FPN: 20 | 21 | YOLOv3Head: 22 | anchors: [[10, 13], [16, 30], [33, 23], 23 | [30, 61], [62, 45], [59, 119], 24 | [116, 90], [156, 198], [373, 326]] 25 | anchor_masks: [[6, 7, 8], [3, 4, 5], [0, 1, 2]] 26 | loss: YOLOv3Loss 27 | 28 | YOLOv3Loss: 29 | ignore_thresh: 0.7 30 | downsample: [32, 16, 8] 31 | label_smooth: false 32 | 33 | BBoxPostProcess: 34 | decode: 35 | name: YOLOBox 36 | conf_thresh: 0.005 37 | downsample_ratio: 32 38 | clip_bbox: true 39 | nms: 40 | name: MultiClassNMS 41 | keep_top_k: 100 42 | score_threshold: 0.01 43 | nms_threshold: 0.45 44 | nms_top_k: 1000 45 | -------------------------------------------------------------------------------- /configs/yolov3/_base_/yolov3_mobilenet_v3_small.yml: -------------------------------------------------------------------------------- 1 | architecture: YOLOv3 2 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/MobileNetV3_small_x1_0_ssld_pretrained.pdparams 3 | norm_type: sync_bn 4 | 5 | YOLOv3: 6 | backbone: MobileNetV3 7 | neck: YOLOv3FPN 8 | yolo_head: YOLOv3Head 9 | post_process: BBoxPostProcess 10 | 11 | MobileNetV3: 12 | model_name: small 13 | scale: 1. 14 | with_extra_blocks: false 15 | extra_block_filters: [] 16 | feature_maps: [4, 9, 12] 17 | 18 | # use default config 19 | # YOLOv3FPN: 20 | 21 | YOLOv3Head: 22 | anchors: [[10, 13], [16, 30], [33, 23], 23 | [30, 61], [62, 45], [59, 119], 24 | [116, 90], [156, 198], [373, 326]] 25 | anchor_masks: [[6, 7, 8], [3, 4, 5], [0, 1, 2]] 26 | loss: YOLOv3Loss 27 | 28 | YOLOv3Loss: 29 | ignore_thresh: 0.7 30 | downsample: [32, 16, 8] 31 | label_smooth: false 32 | 33 | BBoxPostProcess: 34 | decode: 35 | name: YOLOBox 36 | conf_thresh: 0.005 37 | downsample_ratio: 32 38 | clip_bbox: true 39 | nms: 40 | name: MultiClassNMS 41 | keep_top_k: 100 42 | score_threshold: 0.01 43 | nms_threshold: 0.45 44 | nms_top_k: 1000 45 | -------------------------------------------------------------------------------- /configs/yolov3/_base_/yolov3_r34.yml: -------------------------------------------------------------------------------- 1 | architecture: YOLOv3 2 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet34_pretrained.pdparams 3 | norm_type: sync_bn 4 | 5 | YOLOv3: 6 | backbone: ResNet 7 | neck: YOLOv3FPN 8 | yolo_head: YOLOv3Head 9 | post_process: BBoxPostProcess 10 | 11 | ResNet: 12 | depth: 34 13 | return_idx: [1, 2, 3] 14 | freeze_at: -1 15 | freeze_norm: false 16 | norm_decay: 0. 17 | 18 | YOLOv3Head: 19 | anchors: [[10, 13], [16, 30], [33, 23], 20 | [30, 61], [62, 45], [59, 119], 21 | [116, 90], [156, 198], [373, 326]] 22 | anchor_masks: [[6, 7, 8], [3, 4, 5], [0, 1, 2]] 23 | loss: YOLOv3Loss 24 | 25 | YOLOv3Loss: 26 | ignore_thresh: 0.7 27 | downsample: [32, 16, 8] 28 | label_smooth: false 29 | 30 | BBoxPostProcess: 31 | decode: 32 | name: YOLOBox 33 | conf_thresh: 0.005 34 | downsample_ratio: 32 35 | clip_bbox: true 36 | nms: 37 | name: MultiClassNMS 38 | keep_top_k: 100 39 | score_threshold: 0.01 40 | nms_threshold: 0.45 41 | nms_top_k: 1000 42 | -------------------------------------------------------------------------------- /configs/yolov3/_base_/yolov3_r50vd_dcn.yml: -------------------------------------------------------------------------------- 1 | architecture: YOLOv3 2 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_ssld_pretrained.pdparams 3 | norm_type: sync_bn 4 | 5 | YOLOv3: 6 | backbone: ResNet 7 | neck: YOLOv3FPN 8 | yolo_head: YOLOv3Head 9 | post_process: BBoxPostProcess 10 | 11 | ResNet: 12 | depth: 50 13 | variant: d 14 | return_idx: [1, 2, 3] 15 | dcn_v2_stages: [3] 16 | freeze_at: -1 17 | freeze_norm: false 18 | norm_decay: 0. 19 | 20 | # YOLOv3FPN: 21 | 22 | YOLOv3Head: 23 | anchors: [[10, 13], [16, 30], [33, 23], 24 | [30, 61], [62, 45], [59, 119], 25 | [116, 90], [156, 198], [373, 326]] 26 | anchor_masks: [[6, 7, 8], [3, 4, 5], [0, 1, 2]] 27 | loss: YOLOv3Loss 28 | 29 | YOLOv3Loss: 30 | ignore_thresh: 0.7 31 | downsample: [32, 16, 8] 32 | label_smooth: false 33 | 34 | BBoxPostProcess: 35 | decode: 36 | name: YOLOBox 37 | conf_thresh: 0.005 38 | downsample_ratio: 32 39 | clip_bbox: true 40 | nms: 41 | name: MultiClassNMS 42 | keep_top_k: 100 43 | score_threshold: 0.01 44 | nms_threshold: 0.45 45 | nms_top_k: 1000 46 | -------------------------------------------------------------------------------- /configs/yolov3/_base_/yolov3_reader.yml: -------------------------------------------------------------------------------- 1 | worker_num: 2 2 | TrainReader: 3 | inputs_def: 4 | num_max_boxes: 50 5 | sample_transforms: 6 | - Decode: {} 7 | - Mixup: {alpha: 1.5, beta: 1.5} 8 | - RandomDistort: {} 9 | - RandomExpand: {fill_value: [123.675, 116.28, 103.53]} 10 | - RandomCrop: {} 11 | - RandomFlip: {} 12 | batch_transforms: 13 | - BatchRandomResize: {target_size: [320, 352, 384, 416, 448, 480, 512, 544, 576, 608], random_size: True, random_interp: True, keep_ratio: False} 14 | - NormalizeBox: {} 15 | - PadBox: {num_max_boxes: 50} 16 | - BboxXYXY2XYWH: {} 17 | - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True} 18 | - Permute: {} 19 | - Gt2YoloTarget: {anchor_masks: [[6, 7, 8], [3, 4, 5], [0, 1, 2]], anchors: [[10, 13], [16, 30], [33, 23], [30, 61], [62, 45], [59, 119], [116, 90], [156, 198], [373, 326]], downsample_ratios: [32, 16, 8]} 20 | batch_size: 8 21 | shuffle: true 22 | drop_last: true 23 | mixup_epoch: 250 24 | use_shared_memory: true 25 | 26 | EvalReader: 27 | inputs_def: 28 | num_max_boxes: 50 29 | sample_transforms: 30 | - Decode: {} 31 | - Resize: {target_size: [608, 608], keep_ratio: False, interp: 2} 32 | - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True} 33 | - Permute: {} 34 | batch_size: 1 35 | drop_empty: false 36 | 37 | TestReader: 38 | inputs_def: 39 | image_shape: [3, 608, 608] 40 | sample_transforms: 41 | - Decode: {} 42 | - Resize: {target_size: [608, 608], keep_ratio: False, interp: 2} 43 | - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True} 44 | - Permute: {} 45 | batch_size: 1 46 | -------------------------------------------------------------------------------- /configs/yolov3/yolov3_darknet53_270e_coco.yml: -------------------------------------------------------------------------------- 1 | _BASE_: [ 2 | '../datasets/coco_detection.yml', 3 | '../runtime.yml', 4 | '_base_/optimizer_270e.yml', 5 | '_base_/yolov3_darknet53.yml', 6 | '_base_/yolov3_reader.yml', 7 | ] 8 | 9 | snapshot_epoch: 5 10 | weights: output/yolov3_darknet53_270e_coco/model_final 11 | -------------------------------------------------------------------------------- /configs/yolov3/yolov3_darknet53_270e_voc.yml: -------------------------------------------------------------------------------- 1 | _BASE_: [ 2 | '../datasets/voc.yml', 3 | '../runtime.yml', 4 | '_base_/optimizer_270e.yml', 5 | '_base_/yolov3_darknet53.yml', 6 | '_base_/yolov3_reader.yml', 7 | ] 8 | 9 | snapshot_epoch: 5 10 | weights: output/yolov3_darknet53_270e_voc/model_final 11 | 12 | EvalReader: 13 | batch_transforms: 14 | - PadBatch: {pad_gt: True} 15 | -------------------------------------------------------------------------------- /configs/yolov3/yolov3_mobilenet_v1_270e_coco.yml: -------------------------------------------------------------------------------- 1 | _BASE_: [ 2 | '../datasets/coco_detection.yml', 3 | '../runtime.yml', 4 | '_base_/optimizer_270e.yml', 5 | '_base_/yolov3_mobilenet_v1.yml', 6 | '_base_/yolov3_reader.yml', 7 | ] 8 | 9 | snapshot_epoch: 5 10 | weights: output/yolov3_mobilenet_v1_270e_coco/model_final 11 | -------------------------------------------------------------------------------- /configs/yolov3/yolov3_mobilenet_v1_270e_voc.yml: -------------------------------------------------------------------------------- 1 | _BASE_: [ 2 | '../datasets/voc.yml', 3 | '../runtime.yml', 4 | '_base_/optimizer_270e.yml', 5 | '_base_/yolov3_mobilenet_v1.yml', 6 | '_base_/yolov3_reader.yml', 7 | ] 8 | 9 | snapshot_epoch: 5 10 | weights: output/yolov3_mobilenet_v1_270e_voc/model_final 11 | 12 | LearningRate: 13 | base_lr: 0.001 14 | schedulers: 15 | - !PiecewiseDecay 16 | gamma: 0.1 17 | milestones: 18 | - 216 19 | - 243 20 | - !LinearWarmup 21 | start_factor: 0. 22 | steps: 1000 23 | -------------------------------------------------------------------------------- /configs/yolov3/yolov3_mobilenet_v1_roadsign.yml: -------------------------------------------------------------------------------- 1 | _BASE_: [ 2 | '../datasets/roadsign_voc.yml', 3 | '../runtime.yml', 4 | '_base_/yolov3_mobilenet_v1.yml', 5 | '_base_/yolov3_reader.yml', 6 | ] 7 | pretrain_weights: https://paddledet.bj.bcebos.com/models/yolov3_mobilenet_v1_270e_coco.pdparams 8 | weights: output/yolov3_mobilenet_v1_roadsign/model_final 9 | 10 | YOLOv3Loss: 11 | ignore_thresh: 0.7 12 | label_smooth: true 13 | 14 | snapshot_epoch: 2 15 | epoch: 40 16 | 17 | LearningRate: 18 | base_lr: 0.0001 19 | schedulers: 20 | - !PiecewiseDecay 21 | gamma: 0.1 22 | milestones: [32, 36] 23 | - !LinearWarmup 24 | start_factor: 0.3333333333333333 25 | steps: 100 26 | 27 | OptimizerBuilder: 28 | optimizer: 29 | momentum: 0.9 30 | type: Momentum 31 | regularizer: 32 | factor: 0.0005 33 | type: L2 34 | -------------------------------------------------------------------------------- /configs/yolov3/yolov3_mobilenet_v1_ssld_270e_coco.yml: -------------------------------------------------------------------------------- 1 | _BASE_: [ 2 | '../datasets/coco_detection.yml', 3 | '../runtime.yml', 4 | '_base_/optimizer_270e.yml', 5 | '_base_/yolov3_mobilenet_v1.yml', 6 | '_base_/yolov3_reader.yml', 7 | ] 8 | 9 | snapshot_epoch: 5 10 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/MobileNetV1_ssld_pretrained.pdparams 11 | weights: output/yolov3_mobilenet_v1_ssld_270e_coco/model_final 12 | -------------------------------------------------------------------------------- /configs/yolov3/yolov3_mobilenet_v1_ssld_270e_voc.yml: -------------------------------------------------------------------------------- 1 | _BASE_: [ 2 | '../datasets/voc.yml', 3 | '../runtime.yml', 4 | '_base_/optimizer_270e.yml', 5 | '_base_/yolov3_mobilenet_v1.yml', 6 | '_base_/yolov3_reader.yml', 7 | ] 8 | 9 | snapshot_epoch: 5 10 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/MobileNetV1_ssld_pretrained.pdparams 11 | weights: output/yolov3_mobilenet_v1_ssld_270e_voc/model_final 12 | 13 | LearningRate: 14 | base_lr: 0.001 15 | schedulers: 16 | - !PiecewiseDecay 17 | gamma: 0.1 18 | milestones: 19 | - 216 20 | - 243 21 | - !LinearWarmup 22 | start_factor: 0. 23 | steps: 1000 24 | -------------------------------------------------------------------------------- /configs/yolov3/yolov3_mobilenet_v3_large_270e_coco.yml: -------------------------------------------------------------------------------- 1 | _BASE_: [ 2 | '../datasets/coco_detection.yml', 3 | '../runtime.yml', 4 | '_base_/optimizer_270e.yml', 5 | '_base_/yolov3_mobilenet_v3_large.yml', 6 | '_base_/yolov3_reader.yml', 7 | ] 8 | 9 | snapshot_epoch: 5 10 | weights: output/yolov3_mobilenet_v3_large_270e_coco/model_final 11 | -------------------------------------------------------------------------------- /configs/yolov3/yolov3_mobilenet_v3_large_270e_voc.yml: -------------------------------------------------------------------------------- 1 | _BASE_: [ 2 | '../datasets/voc.yml', 3 | '../runtime.yml', 4 | '_base_/optimizer_270e.yml', 5 | '_base_/yolov3_mobilenet_v3_large.yml', 6 | '_base_/yolov3_reader.yml', 7 | ] 8 | 9 | snapshot_epoch: 5 10 | weights: output/yolov3_mobilenet_v3_large_270e_voc/model_final 11 | 12 | LearningRate: 13 | base_lr: 0.001 14 | schedulers: 15 | - !PiecewiseDecay 16 | gamma: 0.1 17 | milestones: 18 | - 216 19 | - 243 20 | - !LinearWarmup 21 | start_factor: 0. 22 | steps: 1000 23 | -------------------------------------------------------------------------------- /configs/yolov3/yolov3_mobilenet_v3_large_ssld_270e_voc.yml: -------------------------------------------------------------------------------- 1 | _BASE_: [ 2 | '../datasets/voc.yml', 3 | '../runtime.yml', 4 | '_base_/optimizer_270e.yml', 5 | '_base_/yolov3_mobilenet_v3_large.yml', 6 | '_base_/yolov3_reader.yml', 7 | ] 8 | 9 | snapshot_epoch: 5 10 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/MobileNetV3_large_x1_0_ssld_pretrained.pdparams 11 | weights: output/yolov3_mobilenet_v3_large_ssld_270e_voc/model_final 12 | 13 | LearningRate: 14 | base_lr: 0.001 15 | schedulers: 16 | - !PiecewiseDecay 17 | gamma: 0.1 18 | milestones: 19 | - 216 20 | - 243 21 | - !LinearWarmup 22 | start_factor: 0. 23 | steps: 1000 24 | -------------------------------------------------------------------------------- /configs/yolov3/yolov3_r34_270e_coco.yml: -------------------------------------------------------------------------------- 1 | _BASE_: [ 2 | '../datasets/coco_detection.yml', 3 | '../runtime.yml', 4 | '_base_/optimizer_270e.yml', 5 | '_base_/yolov3_r34.yml', 6 | '_base_/yolov3_reader.yml', 7 | ] 8 | 9 | snapshot_epoch: 5 10 | weights: output/yolov3_r34_270e_coco/model_final 11 | -------------------------------------------------------------------------------- /configs/yolov3/yolov3_r50vd_dcn_270e_coco.yml: -------------------------------------------------------------------------------- 1 | _BASE_: [ 2 | '../datasets/coco_detection.yml', 3 | '../runtime.yml', 4 | '_base_/optimizer_270e.yml', 5 | '_base_/yolov3_r50vd_dcn.yml', 6 | '_base_/yolov3_reader.yml', 7 | ] 8 | 9 | snapshot_epoch: 5 10 | weights: output/yolov3_r50vd_dcn_270e_coco/model_final 11 | -------------------------------------------------------------------------------- /dataset/voc/create_list.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import sys 16 | import os.path as osp 17 | import logging 18 | # add python path of PadleDetection to sys.path 19 | parent_path = osp.abspath(osp.join(__file__, *(['..'] * 3))) 20 | if parent_path not in sys.path: 21 | sys.path.append(parent_path) 22 | 23 | from ppdet.utils.download import create_voc_list 24 | 25 | logging.basicConfig(level=logging.INFO) 26 | 27 | voc_path = osp.split(osp.realpath(sys.argv[0]))[0] 28 | create_voc_list(voc_path) 29 | -------------------------------------------------------------------------------- /dataset/voc/download_voc.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import sys 16 | import os.path as osp 17 | import logging 18 | # add python path of PadleDetection to sys.path 19 | parent_path = osp.abspath(osp.join(__file__, *(['..'] * 3))) 20 | if parent_path not in sys.path: 21 | sys.path.append(parent_path) 22 | 23 | from ppdet.utils.download import download_dataset 24 | 25 | logging.basicConfig(level=logging.INFO) 26 | 27 | download_path = osp.split(osp.realpath(sys.argv[0]))[0] 28 | download_dataset(download_path, 'voc') -------------------------------------------------------------------------------- /dataset/voc/label_list.txt: -------------------------------------------------------------------------------- 1 | person 2 | -------------------------------------------------------------------------------- /dataset/voc/test.txt: -------------------------------------------------------------------------------- 1 | images/05000143.jpg annotation/05000143.xml 2 | -------------------------------------------------------------------------------- /dataset/voc/trainval.txt: -------------------------------------------------------------------------------- 1 | images/05000143.jpg annotation/05000143.xml 2 | -------------------------------------------------------------------------------- /deploy/cpp/README.md: -------------------------------------------------------------------------------- 1 | # C++端预测部署 2 | 3 | ## 本教程结构 4 | 5 | [1.说明](#1说明) 6 | 7 | [2.主要目录和文件](#2主要目录和文件) 8 | 9 | [3.编译部署](#3编译) 10 | 11 | 12 | 13 | ## 1.说明 14 | 15 | 本目录为用户提供一个跨平台的`C++`部署方案,让用户通过`PaddleDetection`训练的模型导出后,即可基于本项目快速运行,也可以快速集成代码结合到自己的项目实际应用中去。 16 | 17 | 主要设计的目标包括以下四点: 18 | - 跨平台,支持在 `Windows` 和 `Linux` 完成编译、二次开发集成和部署运行 19 | - 可扩展性,支持用户针对新模型开发自己特殊的数据预处理等逻辑 20 | - 高性能,除了`PaddlePaddle`自身带来的性能优势,我们还针对图像检测的特点对关键步骤进行了性能优化 21 | - 支持各种不同检测模型结构,包括`Yolov3`/`Faster_RCNN`/`SSD`等 22 | 23 | ## 2.主要目录和文件 24 | 25 | ```bash 26 | deploy/cpp 27 | | 28 | ├── src 29 | │ ├── main.cc # 集成代码示例, 程序入口 30 | │ ├── object_detector.cc # 模型加载和预测主要逻辑封装类实现 31 | │ └── preprocess_op.cc # 预处理相关主要逻辑封装实现 32 | | 33 | ├── include 34 | │ ├── config_parser.h # 导出模型配置yaml文件解析 35 | │ ├── object_detector.h # 模型加载和预测主要逻辑封装类 36 | │ └── preprocess_op.h # 预处理相关主要逻辑类封装 37 | | 38 | ├── docs 39 | │ ├── linux_build.md # Linux 编译指南 40 | │ └── windows_vs2019_build.md # Windows VS2019编译指南 41 | │ 42 | ├── build.sh # 编译命令脚本 43 | │ 44 | ├── CMakeList.txt # cmake编译入口文件 45 | | 46 | ├── CMakeSettings.json # Visual Studio 2019 CMake项目编译设置 47 | │ 48 | └── cmake # 依赖的外部项目cmake(目前仅有yaml-cpp) 49 | 50 | ``` 51 | 52 | ## 3.编译部署 53 | 54 | ### 3.1 导出模型 55 | 请确认您已经基于`PaddleDetection`的[export_model.py](https://github.com/PaddlePaddle/PaddleDetection/blob/dygraph/tools/export_model.py)导出您的模型,并妥善保存到合适的位置。导出模型细节请参考 [导出模型教程](https://github.com/PaddlePaddle/PaddleDetection/tree/dygraph/deploy/EXPORT_MODEL.md)。 56 | 57 | 模型导出后, 目录结构如下(以`yolov3_darknet`为例): 58 | ``` 59 | yolov3_darknet # 模型目录 60 | ├── infer_cfg.yml # 模型配置信息 61 | ├── model.pdmodel # 模型文件 62 | ├── model.pdiparams.info #模型公用信息 63 | └── model.pdiparams # 参数文件 64 | ``` 65 | 66 | 预测时,该目录所在的路径会作为程序的输入参数。 67 | 68 | ### 3.2 编译 69 | 70 | 仅支持在`Windows`和`Linux`平台编译和使用 71 | - [Linux 编译指南](docs/linux_build.md) 72 | - [Windows编译指南(使用Visual Studio 2019)](docs/windows_vs2019_build.md) 73 | -------------------------------------------------------------------------------- /deploy/cpp/cmake/yaml-cpp.cmake: -------------------------------------------------------------------------------- 1 | 2 | find_package(Git REQUIRED) 3 | 4 | include(ExternalProject) 5 | 6 | message("${CMAKE_BUILD_TYPE}") 7 | 8 | ExternalProject_Add( 9 | ext-yaml-cpp 10 | URL https://bj.bcebos.com/paddlex/deploy/deps/yaml-cpp.zip 11 | URL_MD5 9542d6de397d1fbd649ed468cb5850e6 12 | CMAKE_ARGS 13 | -DYAML_CPP_BUILD_TESTS=OFF 14 | -DYAML_CPP_BUILD_TOOLS=OFF 15 | -DYAML_CPP_INSTALL=OFF 16 | -DYAML_CPP_BUILD_CONTRIB=OFF 17 | -DMSVC_SHARED_RT=OFF 18 | -DBUILD_SHARED_LIBS=OFF 19 | -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} 20 | -DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS} 21 | -DCMAKE_CXX_FLAGS_DEBUG=${CMAKE_CXX_FLAGS_DEBUG} 22 | -DCMAKE_CXX_FLAGS_RELEASE=${CMAKE_CXX_FLAGS_RELEASE} 23 | -DCMAKE_LIBRARY_OUTPUT_DIRECTORY=${CMAKE_BINARY_DIR}/ext/yaml-cpp/lib 24 | -DCMAKE_ARCHIVE_OUTPUT_DIRECTORY=${CMAKE_BINARY_DIR}/ext/yaml-cpp/lib 25 | PREFIX "${CMAKE_BINARY_DIR}/ext/yaml-cpp" 26 | # Disable install step 27 | INSTALL_COMMAND "" 28 | LOG_DOWNLOAD ON 29 | LOG_BUILD 1 30 | ) 31 | -------------------------------------------------------------------------------- /deploy/imgs/input_shape.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dddlli/Swin-Transformer-Object-Detection-PaddlePaddle/67244dd49a523146b4bd33e0ed5b97072c727609/deploy/imgs/input_shape.png -------------------------------------------------------------------------------- /deploy/serving/test_client.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import sys 16 | import numpy as np 17 | from paddle_serving_client import Client 18 | from paddle_serving_app.reader import * 19 | import cv2 20 | preprocess = Sequential([ 21 | File2Image(), BGR2RGB(), Resize( 22 | (608, 608), interpolation=cv2.INTER_LINEAR), Div(255.0), Transpose( 23 | (2, 0, 1)) 24 | ]) 25 | 26 | postprocess = RCNNPostprocess("label_list.txt", "output", [608, 608]) 27 | client = Client() 28 | 29 | client.load_client_config("serving_client/serving_client_conf.prototxt") 30 | client.connect(['127.0.0.1:9393']) 31 | 32 | im = preprocess(sys.argv[1]) 33 | fetch_map = client.predict( 34 | feed={ 35 | "image": im, 36 | "im_size": np.array(list(im.shape[1:])), 37 | }, 38 | fetch=["multiclass_nms_0.tmp_0"]) 39 | fetch_map["image"] = sys.argv[1] 40 | postprocess(fetch_map) 41 | -------------------------------------------------------------------------------- /docs/images/000000014439.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dddlli/Swin-Transformer-Object-Detection-PaddlePaddle/67244dd49a523146b4bd33e0ed5b97072c727609/docs/images/000000014439.jpg -------------------------------------------------------------------------------- /docs/images/12_Group_Group_12_Group_Group_12_935.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dddlli/Swin-Transformer-Object-Detection-PaddlePaddle/67244dd49a523146b4bd33e0ed5b97072c727609/docs/images/12_Group_Group_12_Group_Group_12_935.jpg -------------------------------------------------------------------------------- /docs/images/fps_map.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dddlli/Swin-Transformer-Object-Detection-PaddlePaddle/67244dd49a523146b4bd33e0ed5b97072c727609/docs/images/fps_map.png -------------------------------------------------------------------------------- /docs/images/model_figure.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dddlli/Swin-Transformer-Object-Detection-PaddlePaddle/67244dd49a523146b4bd33e0ed5b97072c727609/docs/images/model_figure.png -------------------------------------------------------------------------------- /docs/images/reader_figure.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dddlli/Swin-Transformer-Object-Detection-PaddlePaddle/67244dd49a523146b4bd33e0ed5b97072c727609/docs/images/reader_figure.png -------------------------------------------------------------------------------- /docs/images/road554.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dddlli/Swin-Transformer-Object-Detection-PaddlePaddle/67244dd49a523146b4bd33e0ed5b97072c727609/docs/images/road554.png -------------------------------------------------------------------------------- /docs/images/ssld_model.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dddlli/Swin-Transformer-Object-Detection-PaddlePaddle/67244dd49a523146b4bd33e0ed5b97072c727609/docs/images/ssld_model.png -------------------------------------------------------------------------------- /ppdet/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from . import (core, data, engine, modeling, model_zoo, optimizer, metrics, 16 | utils, slim) 17 | -------------------------------------------------------------------------------- /ppdet/core/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from . import config 16 | -------------------------------------------------------------------------------- /ppdet/core/config/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /ppdet/data/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from . import source 16 | from . import transform 17 | from . import reader 18 | 19 | from .source import * 20 | from .transform import * 21 | from .reader import * 22 | -------------------------------------------------------------------------------- /ppdet/data/source/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from . import coco 16 | from . import voc 17 | from . import widerface 18 | from . import category 19 | 20 | from .coco import * 21 | from .voc import * 22 | from .widerface import * 23 | from .category import * 24 | -------------------------------------------------------------------------------- /ppdet/data/transform/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from . import operators 16 | from . import batch_operators 17 | 18 | from .operators import * 19 | from .batch_operators import * 20 | 21 | __all__ = [] 22 | __all__ += registered_ops 23 | -------------------------------------------------------------------------------- /ppdet/engine/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from . import trainer 16 | from .trainer import * 17 | 18 | from . import callbacks 19 | from .callbacks import * 20 | 21 | from . import env 22 | from .env import * 23 | 24 | __all__ = trainer.__all__ \ 25 | + callbacks.__all__ \ 26 | + env.__all__ 27 | -------------------------------------------------------------------------------- /ppdet/engine/env.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from __future__ import absolute_import 16 | from __future__ import division 17 | from __future__ import print_function 18 | 19 | import os 20 | import random 21 | import numpy as np 22 | 23 | import paddle 24 | from paddle.distributed import fleet 25 | 26 | __all__ = ['init_parallel_env', 'set_random_seed', 'init_fleet_env'] 27 | 28 | 29 | def init_fleet_env(): 30 | fleet.init(is_collective=True) 31 | 32 | 33 | def init_parallel_env(): 34 | env = os.environ 35 | dist = 'PADDLE_TRAINER_ID' in env and 'PADDLE_TRAINERS_NUM' in env 36 | if dist: 37 | trainer_id = int(env['PADDLE_TRAINER_ID']) 38 | local_seed = (99 + trainer_id) 39 | random.seed(local_seed) 40 | np.random.seed(local_seed) 41 | 42 | paddle.distributed.init_parallel_env() 43 | 44 | 45 | def set_random_seed(seed): 46 | random.seed(seed) 47 | np.random.seed(seed) 48 | -------------------------------------------------------------------------------- /ppdet/ext_op/README.md: -------------------------------------------------------------------------------- 1 | # 自定义OP编译 2 | 旋转框IOU计算OP是参考[自定义外部算子](https://www.paddlepaddle.org.cn/documentation/docs/zh/guides/07_new_op/new_custom_op.html) 。 3 | 4 | ## 1. 环境依赖 5 | - Paddle >= 2.0.1 6 | - gcc 8.2 7 | 8 | ## 2. 安装 9 | ``` 10 | python3.7 setup.py install 11 | ``` 12 | 13 | 按照如下方式使用 14 | ``` 15 | # 引入自定义op 16 | from rbox_iou_ops import rbox_iou 17 | 18 | paddle.set_device('gpu:0') 19 | paddle.disable_static() 20 | 21 | rbox1 = np.random.rand(13000, 5) 22 | rbox2 = np.random.rand(7, 5) 23 | 24 | pd_rbox1 = paddle.to_tensor(rbox1) 25 | pd_rbox2 = paddle.to_tensor(rbox2) 26 | 27 | iou = rbox_iou(pd_rbox1, pd_rbox2) 28 | print('iou', iou) 29 | ``` 30 | 31 | ## 3. 单元测试 32 | 单元测试`test.py`文件中,通过对比python实现的结果和测试自定义op结果。 33 | 34 | 由于python计算细节与cpp计算细节略有区别,误差区间设置为0.02。 35 | ``` 36 | python3.7 test.py 37 | ``` 38 | 提示`rbox_iou OP compute right!`说明OP测试通过。 39 | -------------------------------------------------------------------------------- /ppdet/ext_op/rbox_iou_op.cc: -------------------------------------------------------------------------------- 1 | /* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. 2 | Licensed under the Apache License, Version 2.0 (the "License"); 3 | you may not use this file except in compliance with the License. 4 | You may obtain a copy of the License at 5 | 6 | http://www.apache.org/licenses/LICENSE-2.0 7 | 8 | Unless required by applicable law or agreed to in writing, software 9 | distributed under the License is distributed on an "AS IS" BASIS, 10 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | See the License for the specific language governing permissions and 12 | limitations under the License. */ 13 | 14 | #include "paddle/extension.h" 15 | 16 | #include 17 | 18 | std::vector RboxIouCPUForward(const paddle::Tensor& rbox1, const paddle::Tensor& rbox2); 19 | std::vector RboxIouCUDAForward(const paddle::Tensor& rbox1, const paddle::Tensor& rbox2); 20 | 21 | 22 | #define CHECK_INPUT_SAME(x1, x2) PD_CHECK(x1.place() == x2.place(), "input must be smae pacle.") 23 | std::vector RboxIouForward(const paddle::Tensor& rbox1, const paddle::Tensor& rbox2) { 24 | CHECK_INPUT_SAME(rbox1, rbox2); 25 | if (rbox1.place() == paddle::PlaceType::kCPU) { 26 | return RboxIouCPUForward(rbox1, rbox2); 27 | } 28 | else if (rbox1.place() == paddle::PlaceType::kGPU) { 29 | return RboxIouCUDAForward(rbox1, rbox2); 30 | } 31 | } 32 | 33 | std::vector> InferShape(std::vector rbox1_shape, std::vector rbox2_shape) { 34 | return {{rbox1_shape[0], rbox2_shape[0]}}; 35 | } 36 | 37 | std::vector InferDtype(paddle::DataType t1, paddle::DataType t2) { 38 | return {t1}; 39 | } 40 | 41 | PD_BUILD_OP(rbox_iou) 42 | .Inputs({"RBOX1", "RBOX2"}) 43 | .Outputs({"Output"}) 44 | .SetKernelFn(PD_KERNEL(RboxIouForward)) 45 | .SetInferShapeFn(PD_INFER_SHAPE(InferShape)) 46 | .SetInferDtypeFn(PD_INFER_DTYPE(InferDtype)); 47 | -------------------------------------------------------------------------------- /ppdet/ext_op/setup.py: -------------------------------------------------------------------------------- 1 | from paddle.utils.cpp_extension import CppExtension, CUDAExtension, setup 2 | 3 | if __name__ == "__main__": 4 | setup( 5 | name='rbox_iou_ops', 6 | ext_modules=CUDAExtension(sources=['rbox_iou_op.cc', 'rbox_iou_op.cu'])) 7 | -------------------------------------------------------------------------------- /ppdet/metrics/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from . import metrics 16 | from .metrics import * 17 | 18 | __all__ = metrics.__all__ 19 | -------------------------------------------------------------------------------- /ppdet/model_zoo/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from . import model_zoo 16 | from .model_zoo import * 17 | 18 | __all__ = model_zoo.__all__ 19 | -------------------------------------------------------------------------------- /ppdet/modeling/__init__.py: -------------------------------------------------------------------------------- 1 | # OP docs may contains math formula which may cause 2 | # DeprecationWarning in string parsing 3 | import warnings 4 | warnings.filterwarnings( 5 | action='ignore', category=DeprecationWarning, module='ops') 6 | 7 | from . import ops 8 | from . import backbones 9 | from . import necks 10 | from . import proposal_generator 11 | from . import heads 12 | from . import losses 13 | from . import architectures 14 | from . import post_process 15 | from . import layers 16 | 17 | from .ops import * 18 | from .backbones import * 19 | from .necks import * 20 | from .proposal_generator import * 21 | from .heads import * 22 | from .losses import * 23 | from .architectures import * 24 | from .post_process import * 25 | from .layers import * 26 | -------------------------------------------------------------------------------- /ppdet/modeling/architectures/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | from . import meta_arch 9 | from . import faster_rcnn 10 | from . import mask_rcnn 11 | from . import yolo 12 | from . import cascade_rcnn 13 | from . import ssd 14 | from . import fcos 15 | from . import solov2 16 | from . import ttfnet 17 | from . import s2anet 18 | 19 | from .meta_arch import * 20 | from .faster_rcnn import * 21 | from .mask_rcnn import * 22 | from .yolo import * 23 | from .cascade_rcnn import * 24 | from .ssd import * 25 | from .fcos import * 26 | from .solov2 import * 27 | from .ttfnet import * 28 | from .s2anet import * 29 | -------------------------------------------------------------------------------- /ppdet/modeling/architectures/meta_arch.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import numpy as np 6 | import paddle 7 | import paddle.nn as nn 8 | from ppdet.core.workspace import register 9 | 10 | __all__ = ['BaseArch'] 11 | 12 | 13 | @register 14 | class BaseArch(nn.Layer): 15 | def __init__(self, data_format='NCHW'): 16 | super(BaseArch, self).__init__() 17 | self.data_format = data_format 18 | 19 | def forward(self, inputs): 20 | if self.data_format == 'NHWC': 21 | image = inputs['image'] 22 | inputs['image'] = paddle.transpose(image, [0, 2, 3, 1]) 23 | self.inputs = inputs 24 | self.model_arch() 25 | 26 | if self.training: 27 | out = self.get_loss() 28 | else: 29 | out = self.get_pred() 30 | return out 31 | 32 | def build_inputs(self, data, input_def): 33 | inputs = {} 34 | for i, k in enumerate(input_def): 35 | inputs[k] = data[i] 36 | return inputs 37 | 38 | def model_arch(self, ): 39 | pass 40 | 41 | def get_loss(self, ): 42 | raise NotImplementedError("Should implement get_loss method!") 43 | 44 | def get_pred(self, ): 45 | raise NotImplementedError("Should implement get_pred method!") 46 | -------------------------------------------------------------------------------- /ppdet/modeling/backbones/__init__.py: -------------------------------------------------------------------------------- 1 | # copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from . import vgg 16 | from . import resnet 17 | from . import darknet 18 | from . import mobilenet_v1 19 | from . import mobilenet_v3 20 | from . import hrnet 21 | from . import blazenet 22 | from . import ghostnet 23 | from . import senet 24 | 25 | from .vgg import * 26 | from .resnet import * 27 | from .darknet import * 28 | from .mobilenet_v1 import * 29 | from .mobilenet_v3 import * 30 | from .hrnet import * 31 | from .blazenet import * 32 | from .ghostnet import * 33 | from .senet import * 34 | from .swin_transformer import * 35 | -------------------------------------------------------------------------------- /ppdet/modeling/heads/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from . import bbox_head 16 | from . import mask_head 17 | from . import yolo_head 18 | from . import roi_extractor 19 | from . import ssd_head 20 | from . import fcos_head 21 | from . import solov2_head 22 | from . import ttf_head 23 | from . import cascade_head 24 | from . import face_head 25 | from . import s2anet_head 26 | 27 | from .bbox_head import * 28 | from .mask_head import * 29 | from .yolo_head import * 30 | from .roi_extractor import * 31 | from .ssd_head import * 32 | from .fcos_head import * 33 | from .solov2_head import * 34 | from .ttf_head import * 35 | from .cascade_head import * 36 | from .face_head import * 37 | from .s2anet_head import * 38 | -------------------------------------------------------------------------------- /ppdet/modeling/losses/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from . import yolo_loss 16 | from . import iou_aware_loss 17 | from . import iou_loss 18 | from . import ssd_loss 19 | from . import fcos_loss 20 | from . import solov2_loss 21 | from . import ctfocal_loss 22 | 23 | from .yolo_loss import * 24 | from .iou_aware_loss import * 25 | from .iou_loss import * 26 | from .ssd_loss import * 27 | from .fcos_loss import * 28 | from .solov2_loss import * 29 | from .ctfocal_loss import * 30 | -------------------------------------------------------------------------------- /ppdet/modeling/necks/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from . import fpn 16 | from . import yolo_fpn 17 | from . import hrfpn 18 | from . import ttf_fpn 19 | 20 | from .fpn import * 21 | from .yolo_fpn import * 22 | from .hrfpn import * 23 | from .ttf_fpn import * 24 | -------------------------------------------------------------------------------- /ppdet/modeling/proposal_generator/__init__.py: -------------------------------------------------------------------------------- 1 | from . import rpn_head 2 | from .rpn_head import * 3 | -------------------------------------------------------------------------------- /ppdet/modeling/shape_spec.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from collections import namedtuple 16 | 17 | 18 | class ShapeSpec( 19 | namedtuple("_ShapeSpec", ["channels", "height", "width", "stride"])): 20 | """ 21 | A simple structure that contains basic shape specification about a tensor. 22 | It is often used as the auxiliary inputs/outputs of models, 23 | to complement the lack of shape inference ability among paddle modules. 24 | Attributes: 25 | channels: 26 | height: 27 | width: 28 | stride: 29 | """ 30 | 31 | def __new__(cls, channels=None, height=None, width=None, stride=None): 32 | return super(ShapeSpec, cls).__new__(cls, channels, height, width, 33 | stride) 34 | -------------------------------------------------------------------------------- /ppdet/modeling/tests/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /ppdet/slim/quant.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from __future__ import absolute_import 16 | from __future__ import division 17 | from __future__ import print_function 18 | 19 | import paddle 20 | from paddle.utils import try_import 21 | 22 | from ppdet.core.workspace import register, serializable 23 | from ppdet.utils.logger import setup_logger 24 | logger = setup_logger(__name__) 25 | 26 | 27 | @register 28 | @serializable 29 | class QAT(object): 30 | def __init__(self, quant_config, print_model): 31 | super(QAT, self).__init__() 32 | self.quant_config = quant_config 33 | self.print_model = print_model 34 | 35 | def __call__(self, model): 36 | paddleslim = try_import('paddleslim') 37 | self.quanter = paddleslim.dygraph.quant.QAT(config=self.quant_config) 38 | if self.print_model: 39 | logger.info("Model before quant:") 40 | logger.info(model) 41 | 42 | self.quanter.quantize(model) 43 | 44 | if self.print_model: 45 | logger.info("Quantized model:") 46 | logger.info(model) 47 | 48 | return model 49 | 50 | def save_quantized_model(self, layer, path, input_spec=None, **config): 51 | self.quanter.save_quantized_model( 52 | model=layer, path=path, input_spec=input_spec, **config) 53 | -------------------------------------------------------------------------------- /ppdet/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | tqdm 2 | typeguard ; python_version >= '3.4' 3 | visualdl>=2.1.0 4 | opencv-python 5 | PyYAML 6 | shapely 7 | scipy 8 | terminaltables 9 | pycocotools 10 | setuptools>=42.0.0 11 | -------------------------------------------------------------------------------- /sort/deep_sort/__init__.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | -------------------------------------------------------------------------------- /sort/deep_sort/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dddlli/Swin-Transformer-Object-Detection-PaddlePaddle/67244dd49a523146b4bd33e0ed5b97072c727609/sort/deep_sort/__init__.pyc -------------------------------------------------------------------------------- /sort/deep_sort/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dddlli/Swin-Transformer-Object-Detection-PaddlePaddle/67244dd49a523146b4bd33e0ed5b97072c727609/sort/deep_sort/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /sort/deep_sort/__pycache__/detection.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dddlli/Swin-Transformer-Object-Detection-PaddlePaddle/67244dd49a523146b4bd33e0ed5b97072c727609/sort/deep_sort/__pycache__/detection.cpython-37.pyc -------------------------------------------------------------------------------- /sort/deep_sort/__pycache__/iou_matching.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dddlli/Swin-Transformer-Object-Detection-PaddlePaddle/67244dd49a523146b4bd33e0ed5b97072c727609/sort/deep_sort/__pycache__/iou_matching.cpython-37.pyc -------------------------------------------------------------------------------- /sort/deep_sort/__pycache__/kalman_filter.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dddlli/Swin-Transformer-Object-Detection-PaddlePaddle/67244dd49a523146b4bd33e0ed5b97072c727609/sort/deep_sort/__pycache__/kalman_filter.cpython-37.pyc -------------------------------------------------------------------------------- /sort/deep_sort/__pycache__/linear_assignment.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dddlli/Swin-Transformer-Object-Detection-PaddlePaddle/67244dd49a523146b4bd33e0ed5b97072c727609/sort/deep_sort/__pycache__/linear_assignment.cpython-37.pyc -------------------------------------------------------------------------------- /sort/deep_sort/__pycache__/nn_matching.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dddlli/Swin-Transformer-Object-Detection-PaddlePaddle/67244dd49a523146b4bd33e0ed5b97072c727609/sort/deep_sort/__pycache__/nn_matching.cpython-37.pyc -------------------------------------------------------------------------------- /sort/deep_sort/__pycache__/preprocessing.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dddlli/Swin-Transformer-Object-Detection-PaddlePaddle/67244dd49a523146b4bd33e0ed5b97072c727609/sort/deep_sort/__pycache__/preprocessing.cpython-37.pyc -------------------------------------------------------------------------------- /sort/deep_sort/__pycache__/track.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dddlli/Swin-Transformer-Object-Detection-PaddlePaddle/67244dd49a523146b4bd33e0ed5b97072c727609/sort/deep_sort/__pycache__/track.cpython-37.pyc -------------------------------------------------------------------------------- /sort/deep_sort/__pycache__/tracker.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dddlli/Swin-Transformer-Object-Detection-PaddlePaddle/67244dd49a523146b4bd33e0ed5b97072c727609/sort/deep_sort/__pycache__/tracker.cpython-37.pyc -------------------------------------------------------------------------------- /sort/deep_sort/detection.py: -------------------------------------------------------------------------------- 1 | # vim: expandtab:ts=4:sw=4 2 | import numpy as np 3 | 4 | 5 | class Detection(object): 6 | """ 7 | This class represents a bounding box detection in a single image. 8 | 9 | Parameters 10 | ---------- 11 | tlwh : array_like 12 | Bounding box in format `(x, y, w, h)`. 13 | confidence : float 14 | Detector confidence score. 15 | feature : array_like 16 | A feature vector that describes the object contained in this image. 17 | 18 | Attributes 19 | ---------- 20 | tlwh : ndarray 21 | Bounding box in format `(top left x, top left y, width, height)`. 22 | confidence : ndarray 23 | Detector confidence score. 24 | feature : ndarray | NoneType 25 | A feature vector that describes the object contained in this image. 26 | 27 | """ 28 | 29 | def __init__(self, tlwh, confidence, feature): 30 | self.tlwh = np.asarray(tlwh, dtype=np.float) 31 | #self.tlwh = np.asarray(tlwh, dtype=float) 32 | self.confidence = float(confidence) 33 | self.feature = np.asarray(feature, dtype=np.float32) 34 | #self.feature = np.asarray(feature, dtype=float) 35 | 36 | def to_tlbr(self): 37 | """Convert bounding box to format `(min x, min y, max x, max y)`, i.e., 38 | `(top left, bottom right)`. 39 | """ 40 | ret = self.tlwh.copy() 41 | ret[2:] += ret[:2] 42 | return ret 43 | 44 | def to_xyah(self): 45 | """Convert bounding box to format `(center x, center y, aspect ratio, 46 | height)`, where the aspect ratio is `width / height`. 47 | """ 48 | ret = self.tlwh.copy() 49 | ret[:2] += ret[2:] / 2 50 | ret[2] /= ret[3] 51 | return ret 52 | def get_all(self): 53 | return [self.tlwh, self.feature, self.confidence] 54 | -------------------------------------------------------------------------------- /sort/deep_sort/detection.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dddlli/Swin-Transformer-Object-Detection-PaddlePaddle/67244dd49a523146b4bd33e0ed5b97072c727609/sort/deep_sort/detection.pyc -------------------------------------------------------------------------------- /sort/deep_sort/iou_matching.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dddlli/Swin-Transformer-Object-Detection-PaddlePaddle/67244dd49a523146b4bd33e0ed5b97072c727609/sort/deep_sort/iou_matching.pyc -------------------------------------------------------------------------------- /sort/deep_sort/kalman_filter.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dddlli/Swin-Transformer-Object-Detection-PaddlePaddle/67244dd49a523146b4bd33e0ed5b97072c727609/sort/deep_sort/kalman_filter.pyc -------------------------------------------------------------------------------- /sort/deep_sort/linear_assignment.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dddlli/Swin-Transformer-Object-Detection-PaddlePaddle/67244dd49a523146b4bd33e0ed5b97072c727609/sort/deep_sort/linear_assignment.pyc -------------------------------------------------------------------------------- /sort/deep_sort/nn_matching.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dddlli/Swin-Transformer-Object-Detection-PaddlePaddle/67244dd49a523146b4bd33e0ed5b97072c727609/sort/deep_sort/nn_matching.pyc -------------------------------------------------------------------------------- /sort/deep_sort/preprocessing.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dddlli/Swin-Transformer-Object-Detection-PaddlePaddle/67244dd49a523146b4bd33e0ed5b97072c727609/sort/deep_sort/preprocessing.pyc -------------------------------------------------------------------------------- /sort/deep_sort/track.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dddlli/Swin-Transformer-Object-Detection-PaddlePaddle/67244dd49a523146b4bd33e0ed5b97072c727609/sort/deep_sort/track.pyc -------------------------------------------------------------------------------- /sort/deep_sort/tracker.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dddlli/Swin-Transformer-Object-Detection-PaddlePaddle/67244dd49a523146b4bd33e0ed5b97072c727609/sort/deep_sort/tracker.pyc -------------------------------------------------------------------------------- /sort/detector_new.py: -------------------------------------------------------------------------------- 1 | import paddlehub as hub 2 | import os 3 | import time 4 | from extractor_new import * 5 | from PIL import Image 6 | 7 | os.environ['CUDA_VISIBLE_DEVICES'] = '0' 8 | 9 | object_detector = hub.Module(name="yolov3_darknet53_pedestrian") 10 | 11 | def get_object_position_new(img_roi, img, min_confidence, net): 12 | list_ = [] 13 | confidences = [] 14 | img_crop = [] 15 | result = object_detector.object_detection(images=[img], use_gpu=True, score_thresh=0.00, visualization=False) 16 | position = result[0]['data'] 17 | for dict_position in position: 18 | if dict_position['label'] == 'pedestrian': 19 | x = (dict_position['left']) 20 | y = (dict_position['top']) 21 | x1 = (dict_position['right']) 22 | y1 = (dict_position['bottom']) 23 | w = x1 - x 24 | h = y1 - y 25 | if h/w <= 1: 26 | continue 27 | confidence = dict_position['confidence'] 28 | roi = img_roi[int(y):int(y1), int(x):int(x1)] 29 | cv2.imwrite('deep_sort_paddle/roi_img/1.jpg', roi) 30 | img_crop.append(roi) 31 | 32 | if confidence >= min_confidence: 33 | p_p = (x, y, w, h) #(x, y, w, h) 34 | list_.append(p_p) 35 | confidences.append(confidence) 36 | time_ = time.time() 37 | extr = Extractor(net) 38 | feature = extr(img_crop) 39 | time_extr = time.time() 40 | print('extr_time:') 41 | print(time_extr-time_) 42 | return list_, confidences, feature -------------------------------------------------------------------------------- /sort/extractor_new.py: -------------------------------------------------------------------------------- 1 | import paddle.vision.transforms as transforms 2 | import numpy as np 3 | import cv2 4 | import paddle 5 | 6 | class Extractor(object): #特征提取器的定义: 7 | def __init__(self, Net): #已经训练好的model_加载进来。 8 | self.net = Net 9 | #self.net.eval() 10 | self.size = (64, 128) 11 | self.norm = transforms.Compose([ 12 | transforms.ToTensor(), 13 | transforms.Normalize([0.58666682, 0.58484647, 0.57418193], [0.20736474, 0.19249499, 0.1870952]), 14 | ]) 15 | 16 | def _preprocess(self, im_crops): #私有的preprocess函数,完成对roi区域的resize 17 | 18 | def _resize(im, size): #私有的_resize 19 | return cv2.resize(im.astype(np.float32)/255., size) 20 | 21 | im_batch = paddle.concat([self.norm(_resize(im, self.size)).unsqueeze(0) for im in im_crops], axis=0) #.float() 22 | return im_batch 23 | 24 | def __call__(self, im_crops): 25 | im_batch = self._preprocess(im_crops) 26 | with paddle.no_grad(): 27 | features = self.net(im_batch) #将roi区域送到特征提取器中去提取特征,从而获得特征。 28 | return features.cpu().numpy() 29 | -------------------------------------------------------------------------------- /sort/generate_img_path.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | img_root = '/media/hansansui/han-cv/deep_sort_paddle/test' 4 | for dir in os.listdir(img_root): 5 | img_path = '{}/{}/img1'.format(img_root, dir) 6 | length = len(os.listdir(img_path)) 7 | with open('{}/{}.txt'.format(img_root, dir), 'w') as f: 8 | for i in range(1, length+1): 9 | i = str('%06d'%i) 10 | f.write('{}/{}.jpg\n'.format(img_path, i)) 11 | 12 | 13 | --------------------------------------------------------------------------------