├── .gitignore
├── .idea
├── .gitignore
├── .name
├── Swin-Transformer-Object-Detection-PaddlePaddle.iml
├── deployment.xml
├── inspectionProfiles
│ ├── Project_Default.xml
│ └── profiles_settings.xml
├── misc.xml
└── modules.xml
├── LICENSE
├── README.md
├── configs
├── cascade_rcnn
│ ├── README.md
│ ├── _base_
│ │ ├── cascade_fpn_reader.yml
│ │ ├── cascade_mask_fpn_reader.yml
│ │ ├── cascade_mask_rcnn_r50_fpn.yml
│ │ ├── cascade_rcnn_r50_fpn.yml
│ │ └── optimizer_1x.yml
│ ├── cascade_mask_rcnn_r50_fpn_1x_coco.yml
│ ├── cascade_mask_rcnn_r50_vd_fpn_ssld_1x_coco.yml
│ ├── cascade_mask_rcnn_r50_vd_fpn_ssld_2x_coco.yml
│ ├── cascade_rcnn_r50_fpn_1x_coco.yml
│ ├── cascade_rcnn_r50_vd_fpn_ssld_1x_coco.yml
│ └── cascade_rcnn_r50_vd_fpn_ssld_2x_coco.yml
├── datasets
│ ├── coco_detection.yml
│ ├── coco_instance.yml
│ ├── dota.yml
│ ├── roadsign_voc.yml
│ ├── voc.yml
│ └── wider_face.yml
├── dcn
│ ├── README.md
│ ├── cascade_rcnn_dcn_r50_fpn_1x_coco.yml
│ ├── cascade_rcnn_dcn_x101_vd_64x4d_fpn_1x_coco.yml
│ ├── faster_rcnn_dcn_r101_vd_fpn_1x_coco.yml
│ ├── faster_rcnn_dcn_r50_fpn_1x_coco.yml
│ ├── faster_rcnn_dcn_r50_vd_fpn_1x_coco.yml
│ ├── faster_rcnn_dcn_r50_vd_fpn_2x_coco.yml
│ ├── faster_rcnn_dcn_x101_vd_64x4d_fpn_1x_coco.yml
│ ├── mask_rcnn_dcn_r101_vd_fpn_1x_coco.yml
│ ├── mask_rcnn_dcn_r50_fpn_1x_coco.yml
│ ├── mask_rcnn_dcn_r50_vd_fpn_2x_coco.yml
│ └── mask_rcnn_dcn_x101_vd_64x4d_fpn_1x_coco.yml
├── dota
│ ├── README.md
│ ├── _base_
│ │ ├── s2anet.yml
│ │ ├── s2anet_optimizer_1x.yml
│ │ └── s2anet_reader.yml
│ ├── s2anet_1x_dota.yml
│ └── s2anet_conv_1x_dota.yml
├── face_detection
│ ├── README.md
│ ├── _base_
│ │ ├── blazeface.yml
│ │ ├── face_reader.yml
│ │ └── optimizer_1000e.yml
│ └── blazeface_1000e.yml
├── faster_rcnn
│ ├── README.md
│ ├── _base_
│ │ ├── faster_fpn_reader.yml
│ │ ├── faster_rcnn_r50.yml
│ │ ├── faster_rcnn_r50_fpn.yml
│ │ ├── faster_reader.yml
│ │ └── optimizer_1x.yml
│ ├── faster_rcnn_r101_1x_coco.yml
│ ├── faster_rcnn_r101_fpn_1x_coco.yml
│ ├── faster_rcnn_r101_fpn_2x_coco.yml
│ ├── faster_rcnn_r101_vd_fpn_1x_coco.yml
│ ├── faster_rcnn_r101_vd_fpn_2x_coco.yml
│ ├── faster_rcnn_r34_fpn_1x_coco.yml
│ ├── faster_rcnn_r34_vd_fpn_1x_coco.yml
│ ├── faster_rcnn_r50_1x_coco.yml
│ ├── faster_rcnn_r50_fpn_1x_coco.yml
│ ├── faster_rcnn_r50_fpn_2x_coco.yml
│ ├── faster_rcnn_r50_vd_1x_coco.yml
│ ├── faster_rcnn_r50_vd_fpn_1x_coco.yml
│ ├── faster_rcnn_r50_vd_fpn_2x_coco.yml
│ ├── faster_rcnn_r50_vd_fpn_ssld_1x_coco.yml
│ ├── faster_rcnn_r50_vd_fpn_ssld_2x_coco.yml
│ ├── faster_rcnn_x101_vd_64x4d_fpn_1x_coco.yml
│ └── faster_rcnn_x101_vd_64x4d_fpn_2x_coco.yml
├── fcos
│ ├── README.md
│ ├── _base_
│ │ ├── fcos_r50_fpn.yml
│ │ ├── fcos_reader.yml
│ │ └── optimizer_1x.yml
│ ├── fcos_dcn_r50_fpn_1x_coco.yml
│ ├── fcos_r50_fpn_1x_coco.yml
│ └── fcos_r50_fpn_multiscale_2x_coco.yml
├── gn
│ ├── README.md
│ ├── cascade_mask_rcnn_r50_fpn_gn_2x_coco.yml
│ ├── cascade_rcnn_r50_fpn_gn_2x_coco.yml
│ ├── faster_rcnn_r50_fpn_gn_2x_coco.yml
│ └── mask_rcnn_r50_fpn_gn_2x_coco.yml
├── hrnet
│ ├── README.md
│ ├── _base_
│ │ └── faster_rcnn_hrnetv2p_w18.yml
│ ├── faster_rcnn_hrnetv2p_w18_1x_coco.yml
│ └── faster_rcnn_hrnetv2p_w18_2x_coco.yml
├── mask_rcnn
│ ├── README.md
│ ├── _base_
│ │ ├── mask_fpn_reader.yml
│ │ ├── mask_rcnn_r50.yml
│ │ ├── mask_rcnn_r50_fpn.yml
│ │ ├── mask_reader.yml
│ │ └── optimizer_1x.yml
│ ├── mask_rcnn_r101_fpn_1x_coco.yml
│ ├── mask_rcnn_r101_vd_fpn_1x_coco.yml
│ ├── mask_rcnn_r50_1x_coco.yml
│ ├── mask_rcnn_r50_2x_coco.yml
│ ├── mask_rcnn_r50_fpn_1x_coco.yml
│ ├── mask_rcnn_r50_fpn_2x_coco.yml
│ ├── mask_rcnn_r50_vd_fpn_1x_coco.yml
│ ├── mask_rcnn_r50_vd_fpn_2x_coco.yml
│ ├── mask_rcnn_r50_vd_fpn_ssld_1x_coco.yml
│ ├── mask_rcnn_r50_vd_fpn_ssld_2x_coco.yml
│ ├── mask_rcnn_x101_vd_64x4d_fpn_1x_coco.yml
│ └── mask_rcnn_x101_vd_64x4d_fpn_2x_coco.yml
├── pedestrian
│ ├── README.md
│ ├── README_cn.md
│ ├── demo
│ │ ├── 001.png
│ │ ├── 002.png
│ │ ├── 003.png
│ │ └── 004.png
│ ├── pedestrian.json
│ └── pedestrian_yolov3_darknet.yml
├── ppyolo
│ ├── README.md
│ ├── README_cn.md
│ ├── _base_
│ │ ├── optimizer_1x.yml
│ │ ├── optimizer_2x.yml
│ │ ├── optimizer_365e.yml
│ │ ├── optimizer_650e.yml
│ │ ├── ppyolo_mbv3_large.yml
│ │ ├── ppyolo_mbv3_small.yml
│ │ ├── ppyolo_r18vd.yml
│ │ ├── ppyolo_r50vd_dcn.yml
│ │ ├── ppyolo_reader.yml
│ │ ├── ppyolo_tiny.yml
│ │ ├── ppyolo_tiny_reader.yml
│ │ ├── ppyolov2_r50vd_dcn.yml
│ │ └── ppyolov2_reader.yml
│ ├── ppyolo_mbv3_large_coco.yml
│ ├── ppyolo_mbv3_small_coco.yml
│ ├── ppyolo_r18vd_coco.yml
│ ├── ppyolo_r50vd_dcn_1x_coco.yml
│ ├── ppyolo_r50vd_dcn_1x_minicoco.yml
│ ├── ppyolo_r50vd_dcn_2x_coco.yml
│ ├── ppyolo_r50vd_dcn_voc.yml
│ ├── ppyolo_test.yml
│ ├── ppyolo_tiny_650e_coco.yml
│ ├── ppyolov2_r101vd_dcn_365e_coco.yml
│ └── ppyolov2_r50vd_dcn_365e_coco.yml
├── rcnn_enhance
│ ├── README.md
│ ├── _base_
│ │ ├── faster_rcnn_enhance.yml
│ │ ├── faster_rcnn_enhance_reader.yml
│ │ └── optimizer_3x.yml
│ └── faster_rcnn_enhance_3x_coco.yml
├── runtime.yml
├── slim
│ ├── README.md
│ ├── distill
│ │ ├── README.md
│ │ └── yolov3_mobilenet_v1_coco_distill.yml
│ ├── extensions
│ │ └── yolov3_mobilenet_v1_coco_distill_prune.yml
│ ├── prune
│ │ ├── yolov3_prune_fpgm.yml
│ │ └── yolov3_prune_l1_norm.yml
│ └── quant
│ │ ├── mask_rcnn_r50_fpn_1x_qat.yml
│ │ ├── ssd_mobilenet_v1_qat.yml
│ │ ├── yolov3_darknet_qat.yml
│ │ ├── yolov3_mobilenet_v1_qat.yml
│ │ └── yolov3_mobilenet_v3_qat.yml
├── solov2
│ ├── README.md
│ ├── _base_
│ │ ├── optimizer_1x.yml
│ │ ├── solov2_r50_fpn.yml
│ │ └── solov2_reader.yml
│ ├── solov2_r50_fpn_1x_coco.yml
│ └── solov2_r50_fpn_3x_coco.yml
├── ssd
│ ├── README.md
│ ├── _base_
│ │ ├── optimizer_120e.yml
│ │ ├── optimizer_1700e.yml
│ │ ├── optimizer_240e.yml
│ │ ├── ssd_mobilenet_reader.yml
│ │ ├── ssd_mobilenet_v1_300.yml
│ │ ├── ssd_reader.yml
│ │ ├── ssd_vgg16_300.yml
│ │ ├── ssdlite300_reader.yml
│ │ ├── ssdlite320_reader.yml
│ │ ├── ssdlite_ghostnet_320.yml
│ │ ├── ssdlite_mobilenet_v1_300.yml
│ │ ├── ssdlite_mobilenet_v3_large_320.yml
│ │ └── ssdlite_mobilenet_v3_small_320.yml
│ ├── ssd_mobilenet_v1_300_120e_voc.yml
│ ├── ssd_vgg16_300_240e_voc.yml
│ ├── ssdlite_ghostnet_320_coco.yml
│ ├── ssdlite_mobilenet_v1_300_coco.yml
│ ├── ssdlite_mobilenet_v3_large_320_coco.yml
│ └── ssdlite_mobilenet_v3_small_320_coco.yml
├── ttfnet
│ ├── README.md
│ ├── _base_
│ │ ├── optimizer_10x.yml
│ │ ├── optimizer_1x.yml
│ │ ├── optimizer_20x.yml
│ │ ├── pafnet.yml
│ │ ├── pafnet_lite.yml
│ │ ├── pafnet_lite_reader.yml
│ │ ├── pafnet_reader.yml
│ │ ├── ttfnet_darknet53.yml
│ │ └── ttfnet_reader.yml
│ ├── pafnet_10x_coco.yml
│ ├── pafnet_lite_mobilenet_v3_20x_coco.yml
│ └── ttfnet_darknet53_1x_coco.yml
├── vehicle
│ ├── README.md
│ ├── README_cn.md
│ ├── demo
│ │ ├── 001.jpeg
│ │ ├── 003.png
│ │ ├── 004.png
│ │ └── 005.png
│ ├── vehicle.json
│ └── vehicle_yolov3_darknet.yml
└── yolov3
│ ├── README.md
│ ├── _base_
│ ├── optimizer_270e.yml
│ ├── yolov3_darknet53.yml
│ ├── yolov3_mobilenet_v1.yml
│ ├── yolov3_mobilenet_v3_large.yml
│ ├── yolov3_mobilenet_v3_small.yml
│ ├── yolov3_r34.yml
│ ├── yolov3_r50vd_dcn.yml
│ └── yolov3_reader.yml
│ ├── yolov3_darknet53_270e_coco.yml
│ ├── yolov3_darknet53_270e_voc.yml
│ ├── yolov3_mobilenet_v1_270e_coco.yml
│ ├── yolov3_mobilenet_v1_270e_voc.yml
│ ├── yolov3_mobilenet_v1_roadsign.yml
│ ├── yolov3_mobilenet_v1_ssld_270e_coco.yml
│ ├── yolov3_mobilenet_v1_ssld_270e_voc.yml
│ ├── yolov3_mobilenet_v3_large_270e_coco.yml
│ ├── yolov3_mobilenet_v3_large_270e_voc.yml
│ ├── yolov3_mobilenet_v3_large_ssld_270e_voc.yml
│ ├── yolov3_r34_270e_coco.yml
│ └── yolov3_r50vd_dcn_270e_coco.yml
├── dataset
└── voc
│ ├── create_list.py
│ ├── download_voc.py
│ ├── label_list.txt
│ ├── test.txt
│ └── trainval.txt
├── demo.py
├── deploy
├── BENCHMARK_INFER.md
├── EXPORT_MODEL.md
├── README.md
├── TENSOR_RT.md
├── cpp
│ ├── CMakeLists.txt
│ ├── README.md
│ ├── cmake
│ │ └── yaml-cpp.cmake
│ ├── docs
│ │ ├── Jetson_build.md
│ │ ├── linux_build.md
│ │ └── windows_vs2019_build.md
│ ├── include
│ │ ├── config_parser.h
│ │ ├── object_detector.h
│ │ └── preprocess_op.h
│ ├── scripts
│ │ └── build.sh
│ └── src
│ │ ├── main.cc
│ │ ├── object_detector.cc
│ │ └── preprocess_op.cc
├── imgs
│ └── input_shape.png
├── python
│ ├── README.md
│ ├── infer.py
│ ├── preprocess.py
│ └── visualize.py
└── serving
│ ├── README.md
│ └── test_client.py
├── docs
├── CHANGELOG.md
├── MODEL_ZOO_cn.md
├── advanced_tutorials
│ ├── MODEL_TECHNICAL.md
│ └── READER.md
├── feature_models
│ ├── SSLD_PRETRAINED_MODEL.md
│ └── SSLD_PRETRAINED_MODEL_en.md
├── images
│ ├── 000000014439.jpg
│ ├── 12_Group_Group_12_Group_Group_12_935.jpg
│ ├── fps_map.png
│ ├── model_figure.png
│ ├── reader_figure.png
│ ├── road554.png
│ └── ssld_model.png
└── tutorials
│ ├── GETTING_STARTED.md
│ ├── GETTING_STARTED_cn.md
│ ├── INSTALL.md
│ ├── INSTALL_cn.md
│ ├── PrepareDataSet.md
│ ├── QUICK_STARTED.md
│ ├── QUICK_STARTED_cn.md
│ └── config_annotation
│ ├── faster_rcnn_r50_fpn_1x_coco_annotation.md
│ └── ppyolo_r50vd_dcn_1x_coco_annotation.md
├── faster_rcnn_swin_ti.yaml
├── ppdet
├── __init__.py
├── core
│ ├── __init__.py
│ ├── config
│ │ ├── __init__.py
│ │ ├── schema.py
│ │ └── yaml_helpers.py
│ └── workspace.py
├── data
│ ├── __init__.py
│ ├── reader.py
│ ├── shm_utils.py
│ ├── source
│ │ ├── __init__.py
│ │ ├── category.py
│ │ ├── coco.py
│ │ ├── dataset.py
│ │ ├── voc.py
│ │ └── widerface.py
│ └── transform
│ │ ├── __init__.py
│ │ ├── autoaugment_utils.py
│ │ ├── batch_operators.py
│ │ ├── gridmask_utils.py
│ │ ├── op_helper.py
│ │ └── operators.py
├── engine
│ ├── __init__.py
│ ├── callbacks.py
│ ├── env.py
│ ├── export_utils.py
│ └── trainer.py
├── ext_op
│ ├── README.md
│ ├── rbox_iou_op.cc
│ ├── rbox_iou_op.cu
│ ├── setup.py
│ └── test.py
├── metrics
│ ├── __init__.py
│ ├── coco_utils.py
│ ├── json_results.py
│ ├── map_utils.py
│ ├── metrics.py
│ └── widerface_utils.py
├── model_zoo
│ ├── __init__.py
│ └── model_zoo.py
├── modeling
│ ├── __init__.py
│ ├── architectures
│ │ ├── __init__.py
│ │ ├── cascade_rcnn.py
│ │ ├── faster_rcnn.py
│ │ ├── fcos.py
│ │ ├── mask_rcnn.py
│ │ ├── meta_arch.py
│ │ ├── s2anet.py
│ │ ├── solov2.py
│ │ ├── ssd.py
│ │ ├── ttfnet.py
│ │ └── yolo.py
│ ├── backbones
│ │ ├── __init__.py
│ │ ├── blazenet.py
│ │ ├── darknet.py
│ │ ├── ghostnet.py
│ │ ├── hrnet.py
│ │ ├── mobilenet_v1.py
│ │ ├── mobilenet_v3.py
│ │ ├── name_adapter.py
│ │ ├── resnet.py
│ │ ├── senet.py
│ │ ├── swin_transformer.py
│ │ └── vgg.py
│ ├── bbox_utils.py
│ ├── heads
│ │ ├── __init__.py
│ │ ├── bbox_head.py
│ │ ├── cascade_head.py
│ │ ├── face_head.py
│ │ ├── fcos_head.py
│ │ ├── mask_head.py
│ │ ├── roi_extractor.py
│ │ ├── s2anet_head.py
│ │ ├── solov2_head.py
│ │ ├── ssd_head.py
│ │ ├── ttf_head.py
│ │ └── yolo_head.py
│ ├── layers.py
│ ├── losses
│ │ ├── __init__.py
│ │ ├── ctfocal_loss.py
│ │ ├── fcos_loss.py
│ │ ├── iou_aware_loss.py
│ │ ├── iou_loss.py
│ │ ├── solov2_loss.py
│ │ ├── ssd_loss.py
│ │ └── yolo_loss.py
│ ├── necks
│ │ ├── __init__.py
│ │ ├── fpn.py
│ │ ├── hrfpn.py
│ │ ├── ttf_fpn.py
│ │ └── yolo_fpn.py
│ ├── ops.py
│ ├── post_process.py
│ ├── proposal_generator
│ │ ├── __init__.py
│ │ ├── anchor_generator.py
│ │ ├── proposal_generator.py
│ │ ├── rpn_head.py
│ │ ├── target.py
│ │ └── target_layer.py
│ ├── shape_spec.py
│ └── tests
│ │ ├── __init__.py
│ │ ├── test_architectures.py
│ │ ├── test_base.py
│ │ ├── test_ops.py
│ │ ├── test_transfrom.py
│ │ └── test_yolov3_loss.py
├── optimizer.py
├── slim
│ ├── __init__.py
│ ├── distill.py
│ ├── prune.py
│ └── quant.py
└── utils
│ ├── __init__.py
│ ├── check.py
│ ├── checkpoint.py
│ ├── cli.py
│ ├── colormap.py
│ ├── download.py
│ ├── logger.py
│ ├── stats.py
│ ├── visualizer.py
│ └── voc_utils.py
├── requirements.txt
├── setup.py
├── sort
├── Detector_ppyolo.py
├── Final_test
│ ├── 0.txt
│ ├── 1.txt
│ ├── 2.txt
│ └── 3.txt
├── deep_sort
│ ├── __init__.py
│ ├── __init__.pyc
│ ├── __pycache__
│ │ ├── __init__.cpython-37.pyc
│ │ ├── detection.cpython-37.pyc
│ │ ├── iou_matching.cpython-37.pyc
│ │ ├── kalman_filter.cpython-37.pyc
│ │ ├── linear_assignment.cpython-37.pyc
│ │ ├── nn_matching.cpython-37.pyc
│ │ ├── preprocessing.cpython-37.pyc
│ │ ├── track.cpython-37.pyc
│ │ └── tracker.cpython-37.pyc
│ ├── detection.py
│ ├── detection.pyc
│ ├── iou_matching.py
│ ├── iou_matching.pyc
│ ├── kalman_filter.py
│ ├── kalman_filter.pyc
│ ├── linear_assignment.py
│ ├── linear_assignment.pyc
│ ├── nn_matching.py
│ ├── nn_matching.pyc
│ ├── preprocessing.py
│ ├── preprocessing.pyc
│ ├── track.py
│ ├── track.pyc
│ ├── tracker.py
│ └── tracker.pyc
├── detector_new.py
├── extractor_new.py
└── generate_img_path.py
├── swin_transformer_paddle.py
├── swin_transformer_pytorch.py
├── tools
├── anchor_cluster.py
├── eval.py
├── export_model.py
├── infer.py
├── train.py
└── x2coco.py
└── yolov3_swin_ti.yaml
/.idea/.gitignore:
--------------------------------------------------------------------------------
1 | # Default ignored files
2 | /shelf/
3 | /workspace.xml
4 | # Datasource local storage ignored files
5 | /dataSources/
6 | /dataSources.local.xml
7 | # Editor-based HTTP Client requests
8 | /httpRequests/
9 |
--------------------------------------------------------------------------------
/.idea/.name:
--------------------------------------------------------------------------------
1 | swin_transformer_paddle.py
--------------------------------------------------------------------------------
/.idea/Swin-Transformer-Object-Detection-PaddlePaddle.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
--------------------------------------------------------------------------------
/.idea/deployment.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
--------------------------------------------------------------------------------
/.idea/inspectionProfiles/Project_Default.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
12 |
13 |
14 |
--------------------------------------------------------------------------------
/.idea/inspectionProfiles/profiles_settings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/configs/cascade_rcnn/_base_/cascade_fpn_reader.yml:
--------------------------------------------------------------------------------
1 | worker_num: 2
2 | TrainReader:
3 | sample_transforms:
4 | - Decode: {}
5 | - RandomResize: {target_size: [[640, 1333], [672, 1333], [704, 1333], [736, 1333], [768, 1333], [800, 1333]], interp: 2, keep_ratio: True}
6 | - RandomFlip: {prob: 0.5}
7 | - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
8 | - Permute: {}
9 | batch_transforms:
10 | - PadBatch: {pad_to_stride: 32, pad_gt: true}
11 | batch_size: 1
12 | shuffle: true
13 | drop_last: true
14 |
15 |
16 | EvalReader:
17 | sample_transforms:
18 | - Decode: {}
19 | - Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
20 | - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
21 | - Permute: {}
22 | batch_transforms:
23 | - PadBatch: {pad_to_stride: 32, pad_gt: false}
24 | batch_size: 1
25 | shuffle: false
26 | drop_last: false
27 | drop_empty: false
28 |
29 |
30 | TestReader:
31 | sample_transforms:
32 | - Decode: {}
33 | - Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
34 | - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
35 | - Permute: {}
36 | batch_transforms:
37 | - PadBatch: {pad_to_stride: 32, pad_gt: false}
38 | batch_size: 1
39 | shuffle: false
40 | drop_last: false
41 |
--------------------------------------------------------------------------------
/configs/cascade_rcnn/_base_/cascade_mask_fpn_reader.yml:
--------------------------------------------------------------------------------
1 | worker_num: 2
2 | TrainReader:
3 | sample_transforms:
4 | - Decode: {}
5 | - RandomResize: {target_size: [[640, 1333], [672, 1333], [704, 1333], [736, 1333], [768, 1333], [800, 1333]], interp: 2, keep_ratio: True}
6 | - RandomFlip: {prob: 0.5}
7 | - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
8 | - Permute: {}
9 | batch_transforms:
10 | - PadBatch: {pad_to_stride: 32, pad_gt: true}
11 | batch_size: 1
12 | shuffle: true
13 | drop_last: true
14 |
15 |
16 | EvalReader:
17 | sample_transforms:
18 | - Decode: {}
19 | - Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
20 | - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
21 | - Permute: {}
22 | batch_transforms:
23 | - PadBatch: {pad_to_stride: 32, pad_gt: false}
24 | batch_size: 1
25 | shuffle: false
26 | drop_last: false
27 | drop_empty: false
28 |
29 |
30 | TestReader:
31 | sample_transforms:
32 | - Decode: {}
33 | - Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
34 | - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
35 | - Permute: {}
36 | batch_transforms:
37 | - PadBatch: {pad_to_stride: 32, pad_gt: false}
38 | batch_size: 1
39 | shuffle: false
40 | drop_last: false
41 |
--------------------------------------------------------------------------------
/configs/cascade_rcnn/_base_/cascade_rcnn_r50_fpn.yml:
--------------------------------------------------------------------------------
1 | architecture: CascadeRCNN
2 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_cos_pretrained.pdparams
3 |
4 |
5 | CascadeRCNN:
6 | backbone: ResNet
7 | neck: FPN
8 | rpn_head: RPNHead
9 | bbox_head: CascadeHead
10 | # post process
11 | bbox_post_process: BBoxPostProcess
12 |
13 | ResNet:
14 | # index 0 stands for res2
15 | depth: 50
16 | norm_type: bn
17 | freeze_at: 0
18 | return_idx: [0,1,2,3]
19 | num_stages: 4
20 |
21 | FPN:
22 | out_channel: 256
23 |
24 | RPNHead:
25 | anchor_generator:
26 | aspect_ratios: [0.5, 1.0, 2.0]
27 | anchor_sizes: [[32], [64], [128], [256], [512]]
28 | strides: [4, 8, 16, 32, 64]
29 | rpn_target_assign:
30 | batch_size_per_im: 256
31 | fg_fraction: 0.5
32 | negative_overlap: 0.3
33 | positive_overlap: 0.7
34 | use_random: True
35 | train_proposal:
36 | min_size: 0.0
37 | nms_thresh: 0.7
38 | pre_nms_top_n: 2000
39 | post_nms_top_n: 2000
40 | topk_after_collect: True
41 | test_proposal:
42 | min_size: 0.0
43 | nms_thresh: 0.7
44 | pre_nms_top_n: 1000
45 | post_nms_top_n: 1000
46 |
47 |
48 | CascadeHead:
49 | head: CascadeTwoFCHead
50 | roi_extractor:
51 | resolution: 7
52 | sampling_ratio: 0
53 | aligned: True
54 | bbox_assigner: BBoxAssigner
55 |
56 | BBoxAssigner:
57 | batch_size_per_im: 512
58 | bg_thresh: 0.5
59 | fg_thresh: 0.5
60 | fg_fraction: 0.25
61 | cascade_iou: [0.5, 0.6, 0.7]
62 | use_random: True
63 |
64 | CascadeTwoFCHead:
65 | out_channel: 1024
66 |
67 | BBoxPostProcess:
68 | decode:
69 | name: RCNNBox
70 | prior_box_var: [30.0, 30.0, 15.0, 15.0]
71 | nms:
72 | name: MultiClassNMS
73 | keep_top_k: 100
74 | score_threshold: 0.05
75 | nms_threshold: 0.5
76 |
--------------------------------------------------------------------------------
/configs/cascade_rcnn/_base_/optimizer_1x.yml:
--------------------------------------------------------------------------------
1 | epoch: 12
2 |
3 | LearningRate:
4 | base_lr: 0.01
5 | schedulers:
6 | - !PiecewiseDecay
7 | gamma: 0.1
8 | milestones: [8, 11]
9 | - !LinearWarmup
10 | start_factor: 0.001
11 | steps: 1000
12 |
13 | OptimizerBuilder:
14 | optimizer:
15 | momentum: 0.9
16 | type: Momentum
17 | regularizer:
18 | factor: 0.0001
19 | type: L2
20 |
--------------------------------------------------------------------------------
/configs/cascade_rcnn/cascade_mask_rcnn_r50_fpn_1x_coco.yml:
--------------------------------------------------------------------------------
1 | _BASE_: [
2 | '../datasets/coco_instance.yml',
3 | '../runtime.yml',
4 | '_base_/optimizer_1x.yml',
5 | '_base_/cascade_mask_rcnn_r50_fpn.yml',
6 | '_base_/cascade_mask_fpn_reader.yml',
7 | ]
8 | weights: output/cascade_mask_rcnn_r50_fpn_1x_coco/model_final
9 |
--------------------------------------------------------------------------------
/configs/cascade_rcnn/cascade_mask_rcnn_r50_vd_fpn_ssld_1x_coco.yml:
--------------------------------------------------------------------------------
1 | _BASE_: [
2 | '../datasets/coco_instance.yml',
3 | '../runtime.yml',
4 | '_base_/optimizer_1x.yml',
5 | '_base_/cascade_mask_rcnn_r50_fpn.yml',
6 | '_base_/cascade_mask_fpn_reader.yml',
7 | ]
8 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_ssld_v2_pretrained.pdparams
9 | weights: output/cascade_mask_rcnn_r50_vd_fpn_ssld_1x_coco/model_final
10 |
11 | ResNet:
12 | depth: 50
13 | variant: d
14 | norm_type: bn
15 | freeze_at: 0
16 | return_idx: [0,1,2,3]
17 | num_stages: 4
18 | lr_mult_list: [0.05, 0.05, 0.1, 0.15]
19 |
--------------------------------------------------------------------------------
/configs/cascade_rcnn/cascade_mask_rcnn_r50_vd_fpn_ssld_2x_coco.yml:
--------------------------------------------------------------------------------
1 | _BASE_: [
2 | '../datasets/coco_instance.yml',
3 | '../runtime.yml',
4 | '_base_/optimizer_1x.yml',
5 | '_base_/cascade_mask_rcnn_r50_fpn.yml',
6 | '_base_/cascade_mask_fpn_reader.yml',
7 | ]
8 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_ssld_v2_pretrained.pdparams
9 | weights: output/cascade_mask_rcnn_r50_vd_fpn_ssld_2x_coco/model_final
10 |
11 | ResNet:
12 | depth: 50
13 | variant: d
14 | norm_type: bn
15 | freeze_at: 0
16 | return_idx: [0,1,2,3]
17 | num_stages: 4
18 | lr_mult_list: [0.05, 0.05, 0.1, 0.15]
19 |
20 | epoch: 24
21 | LearningRate:
22 | base_lr: 0.01
23 | schedulers:
24 | - !PiecewiseDecay
25 | gamma: 0.1
26 | milestones: [12, 22]
27 | - !LinearWarmup
28 | start_factor: 0.1
29 | steps: 1000
30 |
--------------------------------------------------------------------------------
/configs/cascade_rcnn/cascade_rcnn_r50_fpn_1x_coco.yml:
--------------------------------------------------------------------------------
1 | _BASE_: [
2 | '../datasets/coco_detection.yml',
3 | '../runtime.yml',
4 | '_base_/optimizer_1x.yml',
5 | '_base_/cascade_rcnn_r50_fpn.yml',
6 | '_base_/cascade_fpn_reader.yml',
7 | ]
8 | weights: output/cascade_rcnn_r50_fpn_1x_coco/model_final
9 |
--------------------------------------------------------------------------------
/configs/cascade_rcnn/cascade_rcnn_r50_vd_fpn_ssld_1x_coco.yml:
--------------------------------------------------------------------------------
1 | _BASE_: [
2 | '../datasets/coco_detection.yml',
3 | '../runtime.yml',
4 | '_base_/optimizer_1x.yml',
5 | '_base_/cascade_rcnn_r50_fpn.yml',
6 | '_base_/cascade_fpn_reader.yml',
7 | ]
8 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_ssld_v2_pretrained.pdparams
9 | weights: output/cascade_rcnn_r50_vd_fpn_ssld_1x_coco/model_final
10 |
11 | ResNet:
12 | depth: 50
13 | variant: d
14 | norm_type: bn
15 | freeze_at: 0
16 | return_idx: [0,1,2,3]
17 | num_stages: 4
18 | lr_mult_list: [0.05, 0.05, 0.1, 0.15]
19 |
--------------------------------------------------------------------------------
/configs/cascade_rcnn/cascade_rcnn_r50_vd_fpn_ssld_2x_coco.yml:
--------------------------------------------------------------------------------
1 | _BASE_: [
2 | '../datasets/coco_detection.yml',
3 | '../runtime.yml',
4 | '_base_/optimizer_1x.yml',
5 | '_base_/cascade_rcnn_r50_fpn.yml',
6 | '_base_/cascade_fpn_reader.yml',
7 | ]
8 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_ssld_v2_pretrained.pdparams
9 | weights: output/cascade_rcnn_r50_vd_fpn_ssld_2x_coco/model_final
10 |
11 | ResNet:
12 | depth: 50
13 | variant: d
14 | norm_type: bn
15 | freeze_at: 0
16 | return_idx: [0,1,2,3]
17 | num_stages: 4
18 | lr_mult_list: [0.05, 0.05, 0.1, 0.15]
19 |
20 | epoch: 24
21 | LearningRate:
22 | base_lr: 0.01
23 | schedulers:
24 | - !PiecewiseDecay
25 | gamma: 0.1
26 | milestones: [12, 22]
27 | - !LinearWarmup
28 | start_factor: 0.1
29 | steps: 1000
30 |
--------------------------------------------------------------------------------
/configs/datasets/coco_detection.yml:
--------------------------------------------------------------------------------
1 | metric: COCO
2 | num_classes: 80
3 |
4 | TrainDataset:
5 | !COCODataSet
6 | image_dir: train2017
7 | anno_path: annotations/instances_train2017.json
8 | dataset_dir: dataset/coco
9 | data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd']
10 |
11 | EvalDataset:
12 | !COCODataSet
13 | image_dir: val2017
14 | anno_path: annotations/instances_val2017.json
15 | dataset_dir: dataset/coco
16 |
17 | TestDataset:
18 | !ImageFolder
19 | anno_path: annotations/instances_val2017.json
20 |
--------------------------------------------------------------------------------
/configs/datasets/coco_instance.yml:
--------------------------------------------------------------------------------
1 | metric: COCO
2 | num_classes: 80
3 |
4 | TrainDataset:
5 | !COCODataSet
6 | image_dir: train2017
7 | anno_path: annotations/instances_train2017.json
8 | dataset_dir: dataset/coco
9 | data_fields: ['image', 'gt_bbox', 'gt_class', 'gt_poly', 'is_crowd']
10 |
11 | EvalDataset:
12 | !COCODataSet
13 | image_dir: val2017
14 | anno_path: annotations/instances_val2017.json
15 | dataset_dir: dataset/coco
16 |
17 | TestDataset:
18 | !ImageFolder
19 | anno_path: annotations/instances_val2017.json
20 |
--------------------------------------------------------------------------------
/configs/datasets/dota.yml:
--------------------------------------------------------------------------------
1 | metric: COCO
2 | num_classes: 15
3 |
4 | TrainDataset:
5 | !COCODataSet
6 | image_dir: trainval_split/images
7 | anno_path: trainval_split/s2anet_trainval_paddle_coco.json
8 | dataset_dir: dataset/DOTA_1024_s2anet
9 | data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd', 'gt_rbox']
10 |
11 | EvalDataset:
12 | !COCODataSet
13 | image_dir: trainval_split/images
14 | anno_path: trainval_split/s2anet_trainval_paddle_coco.json
15 | dataset_dir: dataset/DOTA_1024_s2anet/
16 |
17 | TestDataset:
18 | !ImageFolder
19 | anno_path: trainval_split/s2anet_trainval_paddle_coco.json
20 | dataset_dir: dataset/DOTA_1024_s2anet/
21 |
--------------------------------------------------------------------------------
/configs/datasets/roadsign_voc.yml:
--------------------------------------------------------------------------------
1 | metric: VOC
2 | map_type: integral
3 | num_classes: 4
4 |
5 | TrainDataset:
6 | !VOCDataSet
7 | dataset_dir: dataset/roadsign_voc
8 | anno_path: train.txt
9 | label_list: label_list.txt
10 | data_fields: ['image', 'gt_bbox', 'gt_class', 'difficult']
11 |
12 | EvalDataset:
13 | !VOCDataSet
14 | dataset_dir: dataset/roadsign_voc
15 | anno_path: valid.txt
16 | label_list: label_list.txt
17 | data_fields: ['image', 'gt_bbox', 'gt_class', 'difficult']
18 |
19 | TestDataset:
20 | !ImageFolder
21 | anno_path: dataset/roadsign_voc/label_list.txt
22 |
--------------------------------------------------------------------------------
/configs/datasets/voc.yml:
--------------------------------------------------------------------------------
1 | metric: VOC
2 | map_type: 11point
3 | num_classes: 20
4 |
5 | TrainDataset:
6 | !VOCDataSet
7 | dataset_dir: dataset/voc
8 | anno_path: trainval.txt
9 | label_list: label_list.txt
10 | data_fields: ['image', 'gt_bbox', 'gt_class', 'difficult']
11 |
12 | EvalDataset:
13 | !VOCDataSet
14 | dataset_dir: dataset/voc
15 | anno_path: test.txt
16 | label_list: label_list.txt
17 | data_fields: ['image', 'gt_bbox', 'gt_class', 'difficult']
18 |
19 | TestDataset:
20 | !ImageFolder
21 | anno_path: dataset/voc/label_list.txt
22 |
--------------------------------------------------------------------------------
/configs/datasets/wider_face.yml:
--------------------------------------------------------------------------------
1 | metric: WiderFace
2 | num_classes: 1
3 |
4 | TrainDataset:
5 | !WIDERFaceDataSet
6 | dataset_dir: dataset/wider_face
7 | anno_path: wider_face_split/wider_face_train_bbx_gt.txt
8 | image_dir: WIDER_train/images
9 | data_fields: ['image', 'gt_bbox', 'gt_class']
10 |
11 | EvalDataset:
12 | !WIDERFaceDataSet
13 | dataset_dir: dataset/wider_face
14 | anno_path: wider_face_split/wider_face_val_bbx_gt.txt
15 | image_dir: WIDER_val/images
16 | data_fields: ['image']
17 |
18 | TestDataset:
19 | !ImageFolder
20 | use_default_label: true
21 |
--------------------------------------------------------------------------------
/configs/dcn/cascade_rcnn_dcn_r50_fpn_1x_coco.yml:
--------------------------------------------------------------------------------
1 | _BASE_: [
2 | '../datasets/coco_detection.yml',
3 | '../runtime.yml',
4 | '../cascade_rcnn/_base_/optimizer_1x.yml',
5 | '../cascade_rcnn/_base_/cascade_rcnn_r50_fpn.yml',
6 | '../cascade_rcnn/_base_/cascade_fpn_reader.yml',
7 | ]
8 | weights: output/cascade_rcnn_dcn_r50_fpn_1x_coco/model_final
9 |
10 | ResNet:
11 | depth: 50
12 | norm_type: bn
13 | freeze_at: 0
14 | return_idx: [0,1,2,3]
15 | num_stages: 4
16 | dcn_v2_stages: [1,2,3]
17 |
--------------------------------------------------------------------------------
/configs/dcn/cascade_rcnn_dcn_x101_vd_64x4d_fpn_1x_coco.yml:
--------------------------------------------------------------------------------
1 | _BASE_: [
2 | 'cascade_rcnn_dcn_r50_fpn_1x_coco.yml',
3 | ]
4 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNeXt101_vd_64x4d_pretrained.pdparams
5 | weights: output/cascade_rcnn_dcn_x101_vd_64x4d_fpn_1x_coco/model_final
6 |
7 | ResNet:
8 | depth: 101
9 | groups: 64
10 | base_width: 4
11 | variant: d
12 | norm_type: bn
13 | freeze_at: 0
14 | return_idx: [0,1,2,3]
15 | num_stages: 4
16 | dcn_v2_stages: [1,2,3]
17 |
--------------------------------------------------------------------------------
/configs/dcn/faster_rcnn_dcn_r101_vd_fpn_1x_coco.yml:
--------------------------------------------------------------------------------
1 | _BASE_: [
2 | 'faster_rcnn_dcn_r50_fpn_1x_coco.yml',
3 | ]
4 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet101_vd_pretrained.pdparams
5 | weights: output/faster_rcnn_dcn_r101_vd_fpn_1x_coco/model_final
6 |
7 | ResNet:
8 | # index 0 stands for res2
9 | depth: 101
10 | variant: d
11 | norm_type: bn
12 | freeze_at: 0
13 | return_idx: [0,1,2,3]
14 | num_stages: 4
15 | dcn_v2_stages: [1,2,3]
16 |
--------------------------------------------------------------------------------
/configs/dcn/faster_rcnn_dcn_r50_fpn_1x_coco.yml:
--------------------------------------------------------------------------------
1 | _BASE_: [
2 | '../datasets/coco_detection.yml',
3 | '../runtime.yml',
4 | '../faster_rcnn/_base_/optimizer_1x.yml',
5 | '../faster_rcnn/_base_/faster_rcnn_r50_fpn.yml',
6 | '../faster_rcnn/_base_/faster_fpn_reader.yml',
7 | ]
8 | weights: output/faster_rcnn_dcn_r50_fpn_1x_coco/model_final
9 |
10 | ResNet:
11 | depth: 50
12 | norm_type: bn
13 | freeze_at: 0
14 | return_idx: [0,1,2,3]
15 | num_stages: 4
16 | dcn_v2_stages: [1,2,3]
17 |
--------------------------------------------------------------------------------
/configs/dcn/faster_rcnn_dcn_r50_vd_fpn_1x_coco.yml:
--------------------------------------------------------------------------------
1 | _BASE_: [
2 | 'faster_rcnn_dcn_r50_fpn_1x_coco.yml',
3 | ]
4 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_pretrained.pdparams
5 | weights: output/faster_rcnn_dcn_r50_vd_fpn_2x_coco/model_final
6 |
7 | ResNet:
8 | # index 0 stands for res2
9 | depth: 50
10 | variant: d
11 | norm_type: bn
12 | freeze_at: 0
13 | return_idx: [0,1,2,3]
14 | num_stages: 4
15 | dcn_v2_stages: [1,2,3]
16 |
--------------------------------------------------------------------------------
/configs/dcn/faster_rcnn_dcn_r50_vd_fpn_2x_coco.yml:
--------------------------------------------------------------------------------
1 | _BASE_: [
2 | 'faster_rcnn_dcn_r50_fpn_1x_coco.yml',
3 | ]
4 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_pretrained.pdparams
5 | weights: output/faster_rcnn_dcn_r50_vd_fpn_2x_coco/model_final
6 |
7 | ResNet:
8 | # index 0 stands for res2
9 | depth: 50
10 | variant: d
11 | norm_type: bn
12 | freeze_at: 0
13 | return_idx: [0,1,2,3]
14 | num_stages: 4
15 | dcn_v2_stages: [1,2,3]
16 |
17 | epoch: 24
18 | LearningRate:
19 | base_lr: 0.01
20 | schedulers:
21 | - !PiecewiseDecay
22 | gamma: 0.1
23 | milestones: [16, 22]
24 | - !LinearWarmup
25 | start_factor: 0.1
26 | steps: 1000
27 |
--------------------------------------------------------------------------------
/configs/dcn/faster_rcnn_dcn_x101_vd_64x4d_fpn_1x_coco.yml:
--------------------------------------------------------------------------------
1 | _BASE_: [
2 | 'faster_rcnn_dcn_r50_fpn_1x_coco.yml',
3 | ]
4 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNeXt101_vd_64x4d_pretrained.pdparams
5 | weights: output/faster_rcnn_dcn_x101_vd_64x4d_fpn_1x_coco/model_final
6 |
7 | ResNet:
8 | # for ResNeXt: groups, base_width, base_channels
9 | depth: 101
10 | groups: 64
11 | base_width: 4
12 | variant: d
13 | norm_type: bn
14 | freeze_at: 0
15 | return_idx: [0,1,2,3]
16 | num_stages: 4
17 | dcn_v2_stages: [1,2,3]
18 |
--------------------------------------------------------------------------------
/configs/dcn/mask_rcnn_dcn_r101_vd_fpn_1x_coco.yml:
--------------------------------------------------------------------------------
1 | _BASE_: [
2 | 'mask_rcnn_dcn_r50_fpn_1x_coco.yml',
3 | ]
4 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet101_vd_pretrained.pdparams
5 | weights: output/mask_rcnn_dcn_r101_vd_fpn_1x_coco/model_final
6 |
7 | ResNet:
8 | # index 0 stands for res2
9 | depth: 101
10 | variant: d
11 | norm_type: bn
12 | freeze_at: 0
13 | return_idx: [0,1,2,3]
14 | num_stages: 4
15 | dcn_v2_stages: [1,2,3]
16 |
--------------------------------------------------------------------------------
/configs/dcn/mask_rcnn_dcn_r50_fpn_1x_coco.yml:
--------------------------------------------------------------------------------
1 | _BASE_: [
2 | '../datasets/coco_instance.yml',
3 | '../runtime.yml',
4 | '../mask_rcnn/_base_/optimizer_1x.yml',
5 | '../mask_rcnn/_base_/mask_rcnn_r50_fpn.yml',
6 | '../mask_rcnn/_base_/mask_fpn_reader.yml',
7 | ]
8 | weights: output/mask_rcnn_dcn_r50_fpn_1x_coco/model_final
9 |
10 | ResNet:
11 | depth: 50
12 | norm_type: bn
13 | freeze_at: 0
14 | return_idx: [0,1,2,3]
15 | num_stages: 4
16 | dcn_v2_stages: [1,2,3]
17 |
--------------------------------------------------------------------------------
/configs/dcn/mask_rcnn_dcn_r50_vd_fpn_2x_coco.yml:
--------------------------------------------------------------------------------
1 | _BASE_: [
2 | 'mask_rcnn_dcn_r50_fpn_1x_coco.yml',
3 | ]
4 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_pretrained.pdparams
5 | weights: output/mask_rcnn_dcn_r50_vd_fpn_2x_coco/model_final
6 |
7 | ResNet:
8 | # index 0 stands for res2
9 | depth: 50
10 | variant: d
11 | norm_type: bn
12 | freeze_at: 0
13 | return_idx: [0,1,2,3]
14 | num_stages: 4
15 | dcn_v2_stages: [1,2,3]
16 |
17 | epoch: 24
18 | LearningRate:
19 | base_lr: 0.01
20 | schedulers:
21 | - !PiecewiseDecay
22 | gamma: 0.1
23 | milestones: [16, 22]
24 | - !LinearWarmup
25 | start_factor: 0.1
26 | steps: 1000
27 |
--------------------------------------------------------------------------------
/configs/dcn/mask_rcnn_dcn_x101_vd_64x4d_fpn_1x_coco.yml:
--------------------------------------------------------------------------------
1 | _BASE_: [
2 | 'mask_rcnn_dcn_r50_fpn_1x_coco.yml',
3 | ]
4 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNeXt101_vd_64x4d_pretrained.pdparams
5 | weights: output/mask_rcnn_dcn_x101_vd_64x4d_fpn_1x_coco/model_final
6 |
7 | ResNet:
8 | # for ResNeXt: groups, base_width, base_channels
9 | depth: 101
10 | variant: d
11 | groups: 64
12 | base_width: 4
13 | norm_type: bn
14 | freeze_at: 0
15 | return_idx: [0,1,2,3]
16 | num_stages: 4
17 | dcn_v2_stages: [1,2,3]
18 |
--------------------------------------------------------------------------------
/configs/dota/_base_/s2anet.yml:
--------------------------------------------------------------------------------
1 | architecture: S2ANet
2 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_cos_pretrained.pdparams
3 | weights: output/s2anet_r50_fpn_1x_dota/model_final.pdparams
4 |
5 |
6 | # Model Achitecture
7 | S2ANet:
8 | backbone: ResNet
9 | neck: FPN
10 | s2anet_head: S2ANetHead
11 | s2anet_bbox_post_process: S2ANetBBoxPostProcess
12 |
13 | ResNet:
14 | depth: 50
15 | norm_type: bn
16 | return_idx: [1,2,3]
17 | num_stages: 4
18 |
19 | FPN:
20 | in_channels: [256, 512, 1024]
21 | out_channel: 256
22 | spatial_scales: [0.25, 0.125, 0.0625]
23 | has_extra_convs: True
24 | extra_stage: 2
25 | relu_before_extra_convs: False
26 |
27 | S2ANetHead:
28 | anchor_strides: [8, 16, 32, 64, 128]
29 | anchor_scales: [4]
30 | anchor_ratios: [1.0]
31 | anchor_assign: RBoxAssigner
32 | stacked_convs: 2
33 | feat_in: 256
34 | feat_out: 256
35 | num_classes: 15
36 | align_conv_type: 'Conv' # AlignConv Conv
37 | align_conv_size: 3
38 | use_sigmoid_cls: True
39 |
40 | RBoxAssigner:
41 | pos_iou_thr: 0.5
42 | neg_iou_thr: 0.4
43 | min_iou_thr: 0.0
44 | ignore_iof_thr: -2
45 |
46 | S2ANetBBoxPostProcess:
47 | nms_pre: 2000
48 | min_bbox_size: 0.0
49 | nms:
50 | name: MultiClassNMS
51 | keep_top_k: -1
52 | score_threshold: 0.05
53 | nms_threshold: 0.1
54 | normalized: False
55 | #background_label: -1
56 |
--------------------------------------------------------------------------------
/configs/dota/_base_/s2anet_optimizer_1x.yml:
--------------------------------------------------------------------------------
1 | epoch: 12
2 |
3 | LearningRate:
4 | base_lr: 0.01
5 | schedulers:
6 | - !PiecewiseDecay
7 | gamma: 0.1
8 | milestones: [7, 10]
9 | - !LinearWarmup
10 | start_factor: 0.3333333333333333
11 | steps: 500
12 |
13 | OptimizerBuilder:
14 | optimizer:
15 | momentum: 0.9
16 | type: Momentum
17 | regularizer:
18 | factor: 0.0001
19 | type: L2
20 | clip_grad_by_norm: 35
21 |
--------------------------------------------------------------------------------
/configs/dota/_base_/s2anet_reader.yml:
--------------------------------------------------------------------------------
1 | worker_num: 0
2 | TrainReader:
3 | sample_transforms:
4 | - Decode: {}
5 | - Rbox2Poly: {}
6 | # Resize can process rbox
7 | - Resize: {target_size: [1024, 1024], interp: 2, keep_ratio: False}
8 | - RandomFlip: {prob: 0.5}
9 | - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
10 | - Permute: {}
11 | batch_transforms:
12 | - RboxPadBatch: {pad_to_stride: 32, pad_gt: true}
13 | batch_size: 1
14 | shuffle: true
15 | drop_last: true
16 |
17 |
18 | EvalReader:
19 | sample_transforms:
20 | - Decode: {}
21 | - Resize: {interp: 2, target_size: [1024, 1024], keep_ratio: True}
22 | - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
23 | - Permute: {}
24 | batch_transforms:
25 | - RboxPadBatch: {pad_to_stride: 32, pad_gt: false}
26 | batch_size: 1
27 | shuffle: false
28 | drop_last: false
29 | drop_empty: false
30 |
31 |
32 | TestReader:
33 | sample_transforms:
34 | - Decode: {}
35 | - Resize: {interp: 2, target_size: [1024, 1024], keep_ratio: True}
36 | - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
37 | - Permute: {}
38 | batch_transforms:
39 | - RboxPadBatch: {pad_to_stride: 32, pad_gt: false}
40 | batch_size: 1
41 | shuffle: false
42 | drop_last: false
43 |
--------------------------------------------------------------------------------
/configs/dota/s2anet_1x_dota.yml:
--------------------------------------------------------------------------------
1 | _BASE_: [
2 | '../datasets/dota.yml',
3 | '../runtime.yml',
4 | '_base_/s2anet_optimizer_1x.yml',
5 | '_base_/s2anet.yml',
6 | '_base_/s2anet_reader.yml',
7 | ]
8 | weights: output/s2anet_1x_dota/model_final
9 |
--------------------------------------------------------------------------------
/configs/dota/s2anet_conv_1x_dota.yml:
--------------------------------------------------------------------------------
1 | _BASE_: [
2 | '../datasets/dota.yml',
3 | '../runtime.yml',
4 | '_base_/s2anet_optimizer_1x.yml',
5 | '_base_/s2anet.yml',
6 | '_base_/s2anet_reader.yml',
7 | ]
8 | weights: output/s2anet_1x_dota/model_final
9 |
10 | S2ANetHead:
11 | anchor_strides: [8, 16, 32, 64, 128]
12 | anchor_scales: [4]
13 | anchor_ratios: [1.0]
14 | anchor_assign: RBoxAssigner
15 | stacked_convs: 2
16 | feat_in: 256
17 | feat_out: 256
18 | num_classes: 15
19 | align_conv_type: 'Conv' # AlignConv Conv
20 | align_conv_size: 3
21 | use_sigmoid_cls: True
22 |
--------------------------------------------------------------------------------
/configs/face_detection/_base_/blazeface.yml:
--------------------------------------------------------------------------------
1 | architecture: SSD
2 |
3 | SSD:
4 | backbone: BlazeNet
5 | ssd_head: FaceHead
6 | post_process: BBoxPostProcess
7 |
8 | BlazeNet:
9 | blaze_filters: [[24, 24], [24, 24], [24, 48, 2], [48, 48], [48, 48]]
10 | double_blaze_filters: [[48, 24, 96, 2], [96, 24, 96], [96, 24, 96],
11 | [96, 24, 96, 2], [96, 24, 96], [96, 24, 96]]
12 |
13 | FaceHead:
14 | in_channels: [96, 96]
15 | anchor_generator: AnchorGeneratorSSD
16 | loss: SSDLoss
17 |
18 | SSDLoss:
19 | overlap_threshold: 0.35
20 |
21 | AnchorGeneratorSSD:
22 | steps: [8., 16.]
23 | aspect_ratios: [[1.], [1.]]
24 | min_sizes: [[16.,24.], [32., 48., 64., 80., 96., 128.]]
25 | max_sizes: [[], []]
26 | offset: 0.5
27 | flip: False
28 | min_max_aspect_ratios_order: false
29 |
30 | BBoxPostProcess:
31 | decode:
32 | name: SSDBox
33 | nms:
34 | name: MultiClassNMS
35 | keep_top_k: 750
36 | score_threshold: 0.01
37 | nms_threshold: 0.3
38 | nms_top_k: 5000
39 | nms_eta: 1.0
40 |
--------------------------------------------------------------------------------
/configs/face_detection/_base_/face_reader.yml:
--------------------------------------------------------------------------------
1 | worker_num: 2
2 | TrainReader:
3 | inputs_def:
4 | num_max_boxes: 90
5 | sample_transforms:
6 | - Decode: {}
7 | - RandomDistort: {brightness: [0.5, 1.125, 0.875], random_apply: False}
8 | - RandomExpand: {fill_value: [123.675, 116.28, 103.53]}
9 | - RandomFlip: {}
10 | - CropWithDataAchorSampling: {
11 | anchor_sampler: [[1, 10, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.2, 0.0]],
12 | batch_sampler: [
13 | [1, 50, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0],
14 | [1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0],
15 | [1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0],
16 | [1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0],
17 | [1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0],
18 | ],
19 | target_size: 640}
20 | - Resize: {target_size: [640, 640], keep_ratio: False, interp: 1}
21 | - NormalizeBox: {}
22 | - PadBox: {num_max_boxes: 90}
23 | batch_transforms:
24 | - NormalizeImage: {mean: [123, 117, 104], std: [127.502231, 127.502231, 127.502231], is_scale: false}
25 | - Permute: {}
26 | batch_size: 8
27 | shuffle: true
28 | drop_last: true
29 |
30 |
31 | EvalReader:
32 | sample_transforms:
33 | - Decode: {}
34 | - NormalizeImage: {mean: [123, 117, 104], std: [127.502231, 127.502231, 127.502231], is_scale: false}
35 | - Permute: {}
36 | batch_size: 1
37 | drop_empty: false
38 |
39 |
40 | TestReader:
41 | sample_transforms:
42 | - Decode: {}
43 | - NormalizeImage: {mean: [123, 117, 104], std: [127.502231, 127.502231, 127.502231], is_scale: false}
44 | - Permute: {}
45 | batch_size: 1
46 |
--------------------------------------------------------------------------------
/configs/face_detection/_base_/optimizer_1000e.yml:
--------------------------------------------------------------------------------
1 | epoch: 1000
2 |
3 | LearningRate:
4 | base_lr: 0.001
5 | schedulers:
6 | - !PiecewiseDecay
7 | gamma: 0.1
8 | milestones:
9 | - 333
10 | - 800
11 | - !LinearWarmup
12 | start_factor: 0.3333333333333333
13 | steps: 500
14 |
15 | OptimizerBuilder:
16 | optimizer:
17 | momentum: 0.0
18 | type: RMSProp
19 | regularizer:
20 | factor: 0.0005
21 | type: L2
22 |
--------------------------------------------------------------------------------
/configs/face_detection/blazeface_1000e.yml:
--------------------------------------------------------------------------------
1 | _BASE_: [
2 | '../datasets/wider_face.yml',
3 | '../runtime.yml',
4 | '_base_/optimizer_1000e.yml',
5 | '_base_/blazeface.yml',
6 | '_base_/face_reader.yml',
7 | ]
8 | weights: output/blazeface_1000e/model_final
9 | multi_scale_eval: True
10 |
--------------------------------------------------------------------------------
/configs/faster_rcnn/_base_/faster_fpn_reader.yml:
--------------------------------------------------------------------------------
1 | worker_num: 2
2 | TrainReader:
3 | sample_transforms:
4 | - Decode: {}
5 | - RandomResize: {target_size: [[640, 1333], [672, 1333], [704, 1333], [736, 1333], [768, 1333], [800, 1333]], interp: 2, keep_ratio: True}
6 | - RandomFlip: {prob: 0.5}
7 | - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
8 | - Permute: {}
9 | batch_transforms:
10 | - PadBatch: {pad_to_stride: 32, pad_gt: true}
11 | batch_size: 1
12 | shuffle: true
13 | drop_last: true
14 |
15 |
16 | EvalReader:
17 | sample_transforms:
18 | - Decode: {}
19 | - Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
20 | - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
21 | - Permute: {}
22 | batch_transforms:
23 | - PadBatch: {pad_to_stride: 32, pad_gt: false}
24 | batch_size: 1
25 | shuffle: false
26 | drop_last: false
27 | drop_empty: false
28 |
29 |
30 | TestReader:
31 | sample_transforms:
32 | - Decode: {}
33 | - Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
34 | - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
35 | - Permute: {}
36 | batch_transforms:
37 | - PadBatch: {pad_to_stride: 32, pad_gt: false}
38 | batch_size: 1
39 | shuffle: false
40 | drop_last: false
41 |
--------------------------------------------------------------------------------
/configs/faster_rcnn/_base_/faster_rcnn_r50.yml:
--------------------------------------------------------------------------------
1 | architecture: FasterRCNN
2 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_cos_pretrained.pdparams
3 |
4 | FasterRCNN:
5 | backbone: ResNet
6 | rpn_head: RPNHead
7 | bbox_head: BBoxHead
8 | # post process
9 | bbox_post_process: BBoxPostProcess
10 |
11 |
12 | ResNet:
13 | # index 0 stands for res2
14 | depth: 50
15 | norm_type: bn
16 | freeze_at: 0
17 | return_idx: [2]
18 | num_stages: 3
19 |
20 | RPNHead:
21 | anchor_generator:
22 | aspect_ratios: [0.5, 1.0, 2.0]
23 | anchor_sizes: [32, 64, 128, 256, 512]
24 | strides: [16]
25 | rpn_target_assign:
26 | batch_size_per_im: 256
27 | fg_fraction: 0.5
28 | negative_overlap: 0.3
29 | positive_overlap: 0.7
30 | use_random: True
31 | train_proposal:
32 | min_size: 0.0
33 | nms_thresh: 0.7
34 | pre_nms_top_n: 12000
35 | post_nms_top_n: 2000
36 | topk_after_collect: False
37 | test_proposal:
38 | min_size: 0.0
39 | nms_thresh: 0.7
40 | pre_nms_top_n: 6000
41 | post_nms_top_n: 1000
42 |
43 |
44 | BBoxHead:
45 | head: Res5Head
46 | roi_extractor:
47 | resolution: 14
48 | sampling_ratio: 0
49 | aligned: True
50 | bbox_assigner: BBoxAssigner
51 | with_pool: true
52 |
53 | BBoxAssigner:
54 | batch_size_per_im: 512
55 | bg_thresh: 0.5
56 | fg_thresh: 0.5
57 | fg_fraction: 0.25
58 | use_random: True
59 |
60 | BBoxPostProcess:
61 | decode: RCNNBox
62 | nms:
63 | name: MultiClassNMS
64 | keep_top_k: 100
65 | score_threshold: 0.05
66 | nms_threshold: 0.5
67 |
--------------------------------------------------------------------------------
/configs/faster_rcnn/_base_/faster_rcnn_r50_fpn.yml:
--------------------------------------------------------------------------------
1 | architecture: FasterRCNN
2 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_cos_pretrained.pdparams
3 |
4 | FasterRCNN:
5 | backbone: ResNet
6 | neck: FPN
7 | rpn_head: RPNHead
8 | bbox_head: BBoxHead
9 | # post process
10 | bbox_post_process: BBoxPostProcess
11 |
12 |
13 | ResNet:
14 | # index 0 stands for res2
15 | depth: 50
16 | norm_type: bn
17 | freeze_at: 0
18 | return_idx: [0,1,2,3]
19 | num_stages: 4
20 |
21 | FPN:
22 | out_channel: 256
23 |
24 | RPNHead:
25 | anchor_generator:
26 | aspect_ratios: [0.5, 1.0, 2.0]
27 | anchor_sizes: [[32], [64], [128], [256], [512]]
28 | strides: [4, 8, 16, 32, 64]
29 | rpn_target_assign:
30 | batch_size_per_im: 256
31 | fg_fraction: 0.5
32 | negative_overlap: 0.3
33 | positive_overlap: 0.7
34 | use_random: True
35 | train_proposal:
36 | min_size: 0.0
37 | nms_thresh: 0.7
38 | pre_nms_top_n: 2000
39 | post_nms_top_n: 1000
40 | topk_after_collect: True
41 | test_proposal:
42 | min_size: 0.0
43 | nms_thresh: 0.7
44 | pre_nms_top_n: 1000
45 | post_nms_top_n: 1000
46 |
47 |
48 | BBoxHead:
49 | head: TwoFCHead
50 | roi_extractor:
51 | resolution: 7
52 | sampling_ratio: 0
53 | aligned: True
54 | bbox_assigner: BBoxAssigner
55 |
56 | BBoxAssigner:
57 | batch_size_per_im: 512
58 | bg_thresh: 0.5
59 | fg_thresh: 0.5
60 | fg_fraction: 0.25
61 | use_random: True
62 |
63 | TwoFCHead:
64 | out_channel: 1024
65 |
66 |
67 | BBoxPostProcess:
68 | decode: RCNNBox
69 | nms:
70 | name: MultiClassNMS
71 | keep_top_k: 100
72 | score_threshold: 0.05
73 | nms_threshold: 0.5
74 |
--------------------------------------------------------------------------------
/configs/faster_rcnn/_base_/faster_reader.yml:
--------------------------------------------------------------------------------
1 | worker_num: 2
2 | TrainReader:
3 | sample_transforms:
4 | - Decode: {}
5 | - RandomResize: {target_size: [[640, 1333], [672, 1333], [704, 1333], [736, 1333], [768, 1333], [800, 1333]], interp: 2, keep_ratio: True}
6 | - RandomFlip: {prob: 0.5}
7 | - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
8 | - Permute: {}
9 | batch_transforms:
10 | - PadBatch: {pad_to_stride: -1, pad_gt: true}
11 | batch_size: 1
12 | shuffle: true
13 | drop_last: true
14 |
15 |
16 | EvalReader:
17 | sample_transforms:
18 | - Decode: {}
19 | - Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
20 | - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
21 | - Permute: {}
22 | batch_transforms:
23 | - PadBatch: {pad_to_stride: -1, pad_gt: false}
24 | batch_size: 1
25 | shuffle: false
26 | drop_last: false
27 | drop_empty: false
28 |
29 |
30 | TestReader:
31 | sample_transforms:
32 | - Decode: {}
33 | - Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
34 | - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
35 | - Permute: {}
36 | batch_transforms:
37 | - PadBatch: {pad_to_stride: -1}
38 | batch_size: 1
39 | shuffle: false
40 | drop_last: false
41 |
--------------------------------------------------------------------------------
/configs/faster_rcnn/_base_/optimizer_1x.yml:
--------------------------------------------------------------------------------
1 | epoch: 12
2 |
3 | LearningRate:
4 | base_lr: 0.01
5 | schedulers:
6 | - !PiecewiseDecay
7 | gamma: 0.1
8 | milestones: [8, 11]
9 | - !LinearWarmup
10 | start_factor: 0.1
11 | steps: 1000
12 |
13 | OptimizerBuilder:
14 | optimizer:
15 | momentum: 0.9
16 | type: Momentum
17 | regularizer:
18 | factor: 0.0001
19 | type: L2
20 |
--------------------------------------------------------------------------------
/configs/faster_rcnn/faster_rcnn_r101_1x_coco.yml:
--------------------------------------------------------------------------------
1 | _BASE_: [
2 | 'faster_rcnn_r50_1x_coco.yml',
3 | ]
4 |
5 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet101_pretrained.pdparams
6 | weights: output/faster_rcnn_r101_1x_coco/model_final
7 |
8 | ResNet:
9 | # index 0 stands for res2
10 | depth: 101
11 | norm_type: bn
12 | freeze_at: 0
13 | return_idx: [2]
14 | num_stages: 3
15 |
--------------------------------------------------------------------------------
/configs/faster_rcnn/faster_rcnn_r101_fpn_1x_coco.yml:
--------------------------------------------------------------------------------
1 | _BASE_: [
2 | 'faster_rcnn_r50_fpn_1x_coco.yml',
3 | ]
4 |
5 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet101_pretrained.pdparams
6 | weights: output/faster_rcnn_r101_fpn_1x_coco/model_final
7 |
8 | ResNet:
9 | # index 0 stands for res2
10 | depth: 101
11 | norm_type: bn
12 | freeze_at: 0
13 | return_idx: [0,1,2,3]
14 | num_stages: 4
15 |
--------------------------------------------------------------------------------
/configs/faster_rcnn/faster_rcnn_r101_fpn_2x_coco.yml:
--------------------------------------------------------------------------------
1 | _BASE_: [
2 | 'faster_rcnn_r50_fpn_1x_coco.yml',
3 | ]
4 |
5 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet101_pretrained.pdparams
6 | weights: output/faster_rcnn_r101_fpn_2x_coco/model_final
7 |
8 | ResNet:
9 | # index 0 stands for res2
10 | depth: 101
11 | norm_type: bn
12 | freeze_at: 0
13 | return_idx: [0,1,2,3]
14 | num_stages: 4
15 |
16 | epoch: 24
17 | LearningRate:
18 | base_lr: 0.01
19 | schedulers:
20 | - !PiecewiseDecay
21 | gamma: 0.1
22 | milestones: [16, 22]
23 | - !LinearWarmup
24 | start_factor: 0.1
25 | steps: 1000
26 |
--------------------------------------------------------------------------------
/configs/faster_rcnn/faster_rcnn_r101_vd_fpn_1x_coco.yml:
--------------------------------------------------------------------------------
1 | _BASE_: [
2 | 'faster_rcnn_r50_fpn_1x_coco.yml',
3 | ]
4 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet101_vd_pretrained.pdparams
5 | weights: output/faster_rcnn_r101_vd_fpn_1x_coco/model_final
6 |
7 | ResNet:
8 | # index 0 stands for res2
9 | depth: 101
10 | variant: d
11 | norm_type: bn
12 | freeze_at: 0
13 | return_idx: [0,1,2,3]
14 | num_stages: 4
15 |
--------------------------------------------------------------------------------
/configs/faster_rcnn/faster_rcnn_r101_vd_fpn_2x_coco.yml:
--------------------------------------------------------------------------------
1 | _BASE_: [
2 | 'faster_rcnn_r50_fpn_1x_coco.yml',
3 | ]
4 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet101_vd_pretrained.pdparams
5 | weights: output/faster_rcnn_r101_vd_fpn_2x_coco/model_final
6 |
7 | ResNet:
8 | # index 0 stands for res2
9 | depth: 101
10 | variant: d
11 | norm_type: bn
12 | freeze_at: 0
13 | return_idx: [0,1,2,3]
14 | num_stages: 4
15 |
16 | epoch: 24
17 | LearningRate:
18 | base_lr: 0.01
19 | schedulers:
20 | - !PiecewiseDecay
21 | gamma: 0.1
22 | milestones: [16, 22]
23 | - !LinearWarmup
24 | start_factor: 0.1
25 | steps: 1000
26 |
--------------------------------------------------------------------------------
/configs/faster_rcnn/faster_rcnn_r34_fpn_1x_coco.yml:
--------------------------------------------------------------------------------
1 | _BASE_: [
2 | 'faster_rcnn_r50_fpn_1x_coco.yml',
3 | ]
4 |
5 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet34_pretrained.pdparams
6 | weights: output/faster_rcnn_r34_fpn_1x_coco/model_final
7 |
8 | ResNet:
9 | # index 0 stands for res2
10 | depth: 34
11 | norm_type: bn
12 | freeze_at: 0
13 | return_idx: [0,1,2,3]
14 | num_stages: 4
15 |
--------------------------------------------------------------------------------
/configs/faster_rcnn/faster_rcnn_r34_vd_fpn_1x_coco.yml:
--------------------------------------------------------------------------------
1 | _BASE_: [
2 | 'faster_rcnn_r50_fpn_1x_coco.yml',
3 | ]
4 |
5 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet34_vd_pretrained.pdparams
6 | weights: output/faster_rcnn_r34_vd_fpn_1x_coco/model_final
7 |
8 | ResNet:
9 | # index 0 stands for res2
10 | depth: 34
11 | variant: d
12 | norm_type: bn
13 | freeze_at: 0
14 | return_idx: [0,1,2,3]
15 | num_stages: 4
16 |
--------------------------------------------------------------------------------
/configs/faster_rcnn/faster_rcnn_r50_1x_coco.yml:
--------------------------------------------------------------------------------
1 | _BASE_: [
2 | '../datasets/coco_detection.yml',
3 | '../runtime.yml',
4 | '_base_/optimizer_1x.yml',
5 | '_base_/faster_rcnn_r50.yml',
6 | '_base_/faster_reader.yml',
7 | ]
8 | weights: output/faster_rcnn_r50_1x_coco/model_final
9 |
--------------------------------------------------------------------------------
/configs/faster_rcnn/faster_rcnn_r50_fpn_1x_coco.yml:
--------------------------------------------------------------------------------
1 | _BASE_: [
2 | '../datasets/coco_detection.yml',
3 | '../runtime.yml',
4 | '_base_/optimizer_1x.yml',
5 | '_base_/faster_rcnn_r50_fpn.yml',
6 | '_base_/faster_fpn_reader.yml',
7 | ]
8 | weights: output/faster_rcnn_r50_fpn_1x_coco/model_final
9 |
--------------------------------------------------------------------------------
/configs/faster_rcnn/faster_rcnn_r50_fpn_2x_coco.yml:
--------------------------------------------------------------------------------
1 | _BASE_: [
2 | 'faster_rcnn_r50_fpn_1x_coco.yml',
3 | ]
4 | weights: output/faster_rcnn_r50_fpn_2x_coco/model_final
5 |
6 | epoch: 24
7 | LearningRate:
8 | base_lr: 0.01
9 | schedulers:
10 | - !PiecewiseDecay
11 | gamma: 0.1
12 | milestones: [16, 22]
13 | - !LinearWarmup
14 | start_factor: 0.1
15 | steps: 1000
16 |
--------------------------------------------------------------------------------
/configs/faster_rcnn/faster_rcnn_r50_vd_1x_coco.yml:
--------------------------------------------------------------------------------
1 | _BASE_: [
2 | 'faster_rcnn_r50_1x_coco.yml',
3 | ]
4 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_pretrained.pdparams
5 | weights: output/faster_rcnn_r50_vd_1x_coco/model_final
6 |
7 | ResNet:
8 | # index 0 stands for res2
9 | depth: 50
10 | variant: d
11 | norm_type: bn
12 | freeze_at: 0
13 | return_idx: [2]
14 | num_stages: 3
15 |
--------------------------------------------------------------------------------
/configs/faster_rcnn/faster_rcnn_r50_vd_fpn_1x_coco.yml:
--------------------------------------------------------------------------------
1 | _BASE_: [
2 | 'faster_rcnn_r50_fpn_1x_coco.yml',
3 | ]
4 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_pretrained.pdparams
5 | weights: output/faster_rcnn_r50_vd_fpn_1x_coco/model_final
6 |
7 | ResNet:
8 | # index 0 stands for res2
9 | depth: 50
10 | variant: d
11 | norm_type: bn
12 | freeze_at: 0
13 | return_idx: [0,1,2,3]
14 | num_stages: 4
15 |
--------------------------------------------------------------------------------
/configs/faster_rcnn/faster_rcnn_r50_vd_fpn_2x_coco.yml:
--------------------------------------------------------------------------------
1 | _BASE_: [
2 | 'faster_rcnn_r50_fpn_1x_coco.yml',
3 | ]
4 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_pretrained.pdparams
5 | weights: output/faster_rcnn_r50_vd_fpn_2x_coco/model_final
6 |
7 | ResNet:
8 | # index 0 stands for res2
9 | depth: 50
10 | variant: d
11 | norm_type: bn
12 | freeze_at: 0
13 | return_idx: [0,1,2,3]
14 | num_stages: 4
15 |
16 | epoch: 24
17 | LearningRate:
18 | base_lr: 0.01
19 | schedulers:
20 | - !PiecewiseDecay
21 | gamma: 0.1
22 | milestones: [16, 22]
23 | - !LinearWarmup
24 | start_factor: 0.1
25 | steps: 1000
26 |
--------------------------------------------------------------------------------
/configs/faster_rcnn/faster_rcnn_r50_vd_fpn_ssld_1x_coco.yml:
--------------------------------------------------------------------------------
1 | _BASE_: [
2 | '../datasets/coco_detection.yml',
3 | '../runtime.yml',
4 | '_base_/optimizer_1x.yml',
5 | '_base_/faster_rcnn_r50_fpn.yml',
6 | '_base_/faster_fpn_reader.yml',
7 | ]
8 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_ssld_v2_pretrained.pdparams
9 | weights: output/faster_rcnn_r50_vd_fpn_ssld_1x_coco/model_final
10 |
11 | ResNet:
12 | depth: 50
13 | variant: d
14 | norm_type: bn
15 | freeze_at: 0
16 | return_idx: [0,1,2,3]
17 | num_stages: 4
18 | lr_mult_list: [0.05, 0.05, 0.1, 0.15]
19 |
20 | epoch: 12
21 | LearningRate:
22 | base_lr: 0.01
23 | schedulers:
24 | - !PiecewiseDecay
25 | gamma: 0.1
26 | milestones: [8, 11]
27 | - !LinearWarmup
28 | start_factor: 0.1
29 | steps: 1000
30 |
--------------------------------------------------------------------------------
/configs/faster_rcnn/faster_rcnn_r50_vd_fpn_ssld_2x_coco.yml:
--------------------------------------------------------------------------------
1 | _BASE_: [
2 | '../datasets/coco_detection.yml',
3 | '../runtime.yml',
4 | '_base_/optimizer_1x.yml',
5 | '_base_/faster_rcnn_r50_fpn.yml',
6 | '_base_/faster_fpn_reader.yml',
7 | ]
8 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_ssld_v2_pretrained.pdparams
9 | weights: output/faster_rcnn_r50_vd_fpn_ssld_2x_coco/model_final
10 |
11 | ResNet:
12 | depth: 50
13 | variant: d
14 | norm_type: bn
15 | freeze_at: 0
16 | return_idx: [0,1,2,3]
17 | num_stages: 4
18 | lr_mult_list: [0.05, 0.05, 0.1, 0.15]
19 |
20 | epoch: 24
21 | LearningRate:
22 | base_lr: 0.01
23 | schedulers:
24 | - !PiecewiseDecay
25 | gamma: 0.1
26 | milestones: [12, 22]
27 | - !LinearWarmup
28 | start_factor: 0.1
29 | steps: 1000
30 |
--------------------------------------------------------------------------------
/configs/faster_rcnn/faster_rcnn_x101_vd_64x4d_fpn_1x_coco.yml:
--------------------------------------------------------------------------------
1 | _BASE_: [
2 | 'faster_rcnn_r50_fpn_1x_coco.yml',
3 | ]
4 |
5 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNeXt101_vd_64x4d_pretrained.pdparams
6 | weights: output/faster_rcnn_x101_vd_64x4d_fpn_1x_coco/model_final
7 |
8 | ResNet:
9 | # for ResNeXt: groups, base_width, base_channels
10 | depth: 101
11 | groups: 64
12 | base_width: 4
13 | variant: d
14 | norm_type: bn
15 | freeze_at: 0
16 | return_idx: [0,1,2,3]
17 | num_stages: 4
18 |
--------------------------------------------------------------------------------
/configs/faster_rcnn/faster_rcnn_x101_vd_64x4d_fpn_2x_coco.yml:
--------------------------------------------------------------------------------
1 | _BASE_: [
2 | 'faster_rcnn_r50_fpn_1x_coco.yml',
3 | ]
4 |
5 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNeXt101_vd_64x4d_pretrained.pdparams
6 | weights: output/faster_rcnn_x101_vd_64x4d_fpn_2x_coco/model_final
7 |
8 | ResNet:
9 | # for ResNeXt: groups, base_width, base_channels
10 | depth: 101
11 | groups: 64
12 | base_width: 4
13 | variant: d
14 | norm_type: bn
15 | freeze_at: 0
16 | return_idx: [0,1,2,3]
17 | num_stages: 4
18 |
19 | epoch: 24
20 | LearningRate:
21 | base_lr: 0.01
22 | schedulers:
23 | - !PiecewiseDecay
24 | gamma: 0.1
25 | milestones: [16, 22]
26 | - !LinearWarmup
27 | start_factor: 0.1
28 | steps: 1000
29 |
--------------------------------------------------------------------------------
/configs/fcos/_base_/fcos_r50_fpn.yml:
--------------------------------------------------------------------------------
1 | architecture: FCOS
2 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_cos_pretrained.pdparams
3 |
4 | FCOS:
5 | backbone: ResNet
6 | neck: FPN
7 | fcos_head: FCOSHead
8 | fcos_post_process: FCOSPostProcess
9 |
10 | ResNet:
11 | # index 0 stands for res2
12 | depth: 50
13 | norm_type: bn
14 | freeze_at: 0
15 | return_idx: [1,2,3]
16 | num_stages: 4
17 |
18 | FPN:
19 | out_channel: 256
20 | spatial_scales: [0.125, 0.0625, 0.03125]
21 | extra_stage: 2
22 | has_extra_convs: true
23 | use_c5: false
24 |
25 | FCOSHead:
26 | fcos_feat:
27 | name: FCOSFeat
28 | feat_in: 256
29 | feat_out: 256
30 | num_convs: 4
31 | norm_type: "gn"
32 | use_dcn: false
33 | num_classes: 80
34 | fpn_stride: [8, 16, 32, 64, 128]
35 | prior_prob: 0.01
36 | fcos_loss: FCOSLoss
37 | norm_reg_targets: true
38 | centerness_on_reg: true
39 |
40 | FCOSLoss:
41 | loss_alpha: 0.25
42 | loss_gamma: 2.0
43 | iou_loss_type: "giou"
44 | reg_weights: 1.0
45 |
46 | FCOSPostProcess:
47 | decode:
48 | name: FCOSBox
49 | num_classes: 80
50 | nms:
51 | name: MultiClassNMS
52 | nms_top_k: 1000
53 | keep_top_k: 100
54 | score_threshold: 0.025
55 | nms_threshold: 0.6
56 |
--------------------------------------------------------------------------------
/configs/fcos/_base_/fcos_reader.yml:
--------------------------------------------------------------------------------
1 | worker_num: 2
2 | TrainReader:
3 | sample_transforms:
4 | - Decode: {}
5 | - RandomFlip: {prob: 0.5}
6 | - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
7 | - Resize: {target_size: [800, 1333], keep_ratio: true, interp: 1}
8 | - Permute: {}
9 | batch_transforms:
10 | - PadBatch: {pad_to_stride: 128}
11 | - Gt2FCOSTarget:
12 | object_sizes_boundary: [64, 128, 256, 512]
13 | center_sampling_radius: 1.5
14 | downsample_ratios: [8, 16, 32, 64, 128]
15 | norm_reg_targets: True
16 | batch_size: 2
17 | shuffle: true
18 | drop_last: true
19 |
20 |
21 | EvalReader:
22 | sample_transforms:
23 | - Decode: {}
24 | - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
25 | - Resize: {interp: 1, target_size: [800, 1333], keep_ratio: True}
26 | - Permute: {}
27 | batch_transforms:
28 | - PadBatch: {pad_to_stride: 128}
29 | batch_size: 1
30 | shuffle: false
31 |
32 |
33 | TestReader:
34 | sample_transforms:
35 | - Decode: {}
36 | - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
37 | - Resize: {interp: 1, target_size: [800, 1333], keep_ratio: True}
38 | - Permute: {}
39 | batch_transforms:
40 | - PadBatch: {pad_to_stride: 128}
41 | batch_size: 1
42 | shuffle: false
43 |
--------------------------------------------------------------------------------
/configs/fcos/_base_/optimizer_1x.yml:
--------------------------------------------------------------------------------
1 | epoch: 12
2 |
3 | LearningRate:
4 | base_lr: 0.01
5 | schedulers:
6 | - !PiecewiseDecay
7 | gamma: 0.1
8 | milestones: [8, 11]
9 | - !LinearWarmup
10 | start_factor: 0.3333333333333333
11 | steps: 500
12 |
13 | OptimizerBuilder:
14 | optimizer:
15 | momentum: 0.9
16 | type: Momentum
17 | regularizer:
18 | factor: 0.0001
19 | type: L2
20 |
--------------------------------------------------------------------------------
/configs/fcos/fcos_dcn_r50_fpn_1x_coco.yml:
--------------------------------------------------------------------------------
1 | _BASE_: [
2 | '../datasets/coco_detection.yml',
3 | '../runtime.yml',
4 | '_base_/fcos_r50_fpn.yml',
5 | '_base_/optimizer_1x.yml',
6 | '_base_/fcos_reader.yml',
7 | ]
8 |
9 | weights: output/fcos_dcn_r50_fpn_1x_coco/model_final
10 |
11 | ResNet:
12 | depth: 50
13 | norm_type: bn
14 | freeze_at: 0
15 | return_idx: [1,2,3]
16 | num_stages: 4
17 | dcn_v2_stages: [1,2,3]
18 |
19 | FCOSHead:
20 | fcos_feat:
21 | name: FCOSFeat
22 | feat_in: 256
23 | feat_out: 256
24 | num_convs: 4
25 | norm_type: "gn"
26 | use_dcn: true
27 | num_classes: 80
28 | fpn_stride: [8, 16, 32, 64, 128]
29 | prior_prob: 0.01
30 | fcos_loss: FCOSLoss
31 | norm_reg_targets: true
32 | centerness_on_reg: true
33 |
--------------------------------------------------------------------------------
/configs/fcos/fcos_r50_fpn_1x_coco.yml:
--------------------------------------------------------------------------------
1 | _BASE_: [
2 | '../datasets/coco_detection.yml',
3 | '../runtime.yml',
4 | '_base_/fcos_r50_fpn.yml',
5 | '_base_/optimizer_1x.yml',
6 | '_base_/fcos_reader.yml',
7 | ]
8 |
9 | weights: output/fcos_r50_fpn_1x_coco/model_final
10 |
--------------------------------------------------------------------------------
/configs/fcos/fcos_r50_fpn_multiscale_2x_coco.yml:
--------------------------------------------------------------------------------
1 | _BASE_: [
2 | '../datasets/coco_detection.yml',
3 | '../runtime.yml',
4 | '_base_/fcos_r50_fpn.yml',
5 | '_base_/optimizer_1x.yml',
6 | '_base_/fcos_reader.yml',
7 | ]
8 |
9 | weights: output/fcos_r50_fpn_multiscale_2x_coco/model_final
10 |
11 | TrainReader:
12 | sample_transforms:
13 | - Decode: {}
14 | - RandomFlip: {prob: 0.5}
15 | - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
16 | - RandomResize: {target_size: [[640, 1333], [672, 1333], [704, 1333], [736, 1333], [768, 1333], [800, 1333]], keep_ratio: true, interp: 1}
17 | - Permute: {}
18 | batch_transforms:
19 | - PadBatch: {pad_to_stride: 128}
20 | - Gt2FCOSTarget:
21 | object_sizes_boundary: [64, 128, 256, 512]
22 | center_sampling_radius: 1.5
23 | downsample_ratios: [8, 16, 32, 64, 128]
24 | norm_reg_targets: True
25 | batch_size: 2
26 | shuffle: true
27 | drop_last: true
28 |
29 | epoch: 24
30 |
31 | LearningRate:
32 | base_lr: 0.01
33 | schedulers:
34 | - !PiecewiseDecay
35 | gamma: 0.1
36 | milestones: [16, 22]
37 | - !LinearWarmup
38 | start_factor: 0.3333333333333333
39 | steps: 500
40 |
--------------------------------------------------------------------------------
/configs/gn/README.md:
--------------------------------------------------------------------------------
1 | # Group Normalization
2 |
3 | ## Model Zoo
4 |
5 | | 骨架网络 | 网络类型 | 每张GPU图片个数 | 学习率策略 |推理时间(fps)| Box AP | Mask AP | 下载 | 配置文件 |
6 | | :------------- | :------------- | :-----------: | :------: | :--------: |:-----: | :-----: | :----: | :----: |
7 | | ResNet50-FPN | Faster | 1 | 2x | - | 41.9 | - | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_r50_fpn_gn_2x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/gn/faster_rcnn_r50_fpn_gn_2x_coco.yml) |
8 | | ResNet50-FPN | Mask | 1 | 2x | - | 42.3 | 38.4 | [下载链接](https://paddledet.bj.bcebos.com/models/mask_rcnn_r50_fpn_gn_2x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/gn/mask_rcnn_r50_fpn_gn_2x_coco.yml) |
9 | | ResNet50-FPN | Cascade Faster | 1 | 2x | - | 44.6 | - | [下载链接](https://paddledet.bj.bcebos.com/models/cascade_rcnn_r50_fpn_gn_2x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/gn/cascade_rcnn_r50_fpn_gn_2x_coco.yml) |
10 | | ResNet50-FPN | Cacade Mask | 1 | 2x | - | 45.0 | 39.3 | [下载链接](https://paddledet.bj.bcebos.com/models/cascade_mask_rcnn_r50_fpn_gn_2x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/gn/cascade_mask_rcnn_r50_fpn_gn_2x_coco.yml) |
11 |
12 |
13 | **注意:** Faster R-CNN baseline仅使用 `2fc` head,而此处使用[`4conv1fc` head](https://arxiv.org/abs/1803.08494)(4层conv之间使用GN),并且FPN也使用GN,而对于Mask R-CNN是在mask head的4层conv之间也使用GN。
14 |
15 | ## Citations
16 | ```
17 | @inproceedings{wu2018group,
18 | title={Group Normalization},
19 | author={Wu, Yuxin and He, Kaiming},
20 | booktitle={Proceedings of the European Conference on Computer Vision (ECCV)},
21 | year={2018}
22 | }
23 | ```
24 |
--------------------------------------------------------------------------------
/configs/gn/cascade_mask_rcnn_r50_fpn_gn_2x_coco.yml:
--------------------------------------------------------------------------------
1 | _BASE_: [
2 | '../datasets/coco_instance.yml',
3 | '../runtime.yml',
4 | '../cascade_rcnn/_base_/optimizer_1x.yml',
5 | '../cascade_rcnn/_base_/cascade_mask_rcnn_r50_fpn.yml',
6 | '../cascade_rcnn/_base_/cascade_mask_fpn_reader.yml',
7 | ]
8 | weights: output/cascade_mask_rcnn_r50_fpn_gn_2x_coco/model_final
9 |
10 | CascadeRCNN:
11 | backbone: ResNet
12 | neck: FPN
13 | rpn_head: RPNHead
14 | bbox_head: CascadeHead
15 | mask_head: MaskHead
16 | # post process
17 | bbox_post_process: BBoxPostProcess
18 | mask_post_process: MaskPostProcess
19 |
20 | FPN:
21 | out_channel: 256
22 | norm_type: gn
23 |
24 | CascadeHead:
25 | head: CascadeXConvNormHead
26 | roi_extractor:
27 | resolution: 7
28 | sampling_ratio: 0
29 | aligned: True
30 | bbox_assigner: BBoxAssigner
31 |
32 | CascadeXConvNormHead:
33 | num_convs: 4
34 | out_channel: 1024
35 | norm_type: gn
36 |
37 | MaskHead:
38 | head: MaskFeat
39 | roi_extractor:
40 | resolution: 14
41 | sampling_ratio: 0
42 | aligned: True
43 | mask_assigner: MaskAssigner
44 | share_bbox_feat: False
45 |
46 | MaskFeat:
47 | num_convs: 4
48 | out_channel: 256
49 | norm_type: gn
50 |
51 |
52 | epoch: 24
53 | LearningRate:
54 | base_lr: 0.01
55 | schedulers:
56 | - !PiecewiseDecay
57 | gamma: 0.1
58 | milestones: [16, 22]
59 | - !LinearWarmup
60 | start_factor: 0.1
61 | steps: 1000
62 |
--------------------------------------------------------------------------------
/configs/gn/cascade_rcnn_r50_fpn_gn_2x_coco.yml:
--------------------------------------------------------------------------------
1 | _BASE_: [
2 | '../datasets/coco_detection.yml',
3 | '../runtime.yml',
4 | '../cascade_rcnn/_base_/optimizer_1x.yml',
5 | '../cascade_rcnn/_base_/cascade_rcnn_r50_fpn.yml',
6 | '../cascade_rcnn/_base_/cascade_fpn_reader.yml',
7 | ]
8 | weights: output/cascade_rcnn_r50_fpn_gn_2x_coco/model_final
9 |
10 | FPN:
11 | out_channel: 256
12 | norm_type: gn
13 |
14 | CascadeHead:
15 | head: CascadeXConvNormHead
16 | roi_extractor:
17 | resolution: 7
18 | sampling_ratio: 0
19 | aligned: True
20 | bbox_assigner: BBoxAssigner
21 |
22 | CascadeXConvNormHead:
23 | num_convs: 4
24 | out_channel: 1024
25 | norm_type: gn
26 |
27 |
28 | epoch: 24
29 | LearningRate:
30 | base_lr: 0.01
31 | schedulers:
32 | - !PiecewiseDecay
33 | gamma: 0.1
34 | milestones: [16, 22]
35 | - !LinearWarmup
36 | start_factor: 0.1
37 | steps: 1000
38 |
--------------------------------------------------------------------------------
/configs/gn/faster_rcnn_r50_fpn_gn_2x_coco.yml:
--------------------------------------------------------------------------------
1 | _BASE_: [
2 | '../datasets/coco_detection.yml',
3 | '../runtime.yml',
4 | '../faster_rcnn/_base_/optimizer_1x.yml',
5 | '../faster_rcnn/_base_/faster_rcnn_r50_fpn.yml',
6 | '../faster_rcnn/_base_/faster_fpn_reader.yml',
7 | ]
8 | weights: output/faster_rcnn_r50_fpn_gn_2x_coco/model_final
9 |
10 | FasterRCNN:
11 | backbone: ResNet
12 | neck: FPN
13 | rpn_head: RPNHead
14 | bbox_head: BBoxHead
15 | # post process
16 | bbox_post_process: BBoxPostProcess
17 |
18 | FPN:
19 | out_channel: 256
20 | norm_type: gn
21 |
22 | BBoxHead:
23 | head: XConvNormHead
24 | roi_extractor:
25 | resolution: 7
26 | sampling_ratio: 0
27 | aligned: True
28 | bbox_assigner: BBoxAssigner
29 |
30 | XConvNormHead:
31 | num_convs: 4
32 | out_channel: 1024
33 | norm_type: gn
34 |
35 |
36 | epoch: 24
37 | LearningRate:
38 | base_lr: 0.01
39 | schedulers:
40 | - !PiecewiseDecay
41 | gamma: 0.1
42 | milestones: [16, 22]
43 | - !LinearWarmup
44 | start_factor: 0.1
45 | steps: 1000
46 |
--------------------------------------------------------------------------------
/configs/gn/mask_rcnn_r50_fpn_gn_2x_coco.yml:
--------------------------------------------------------------------------------
1 | _BASE_: [
2 | '../datasets/coco_instance.yml',
3 | '../runtime.yml',
4 | '../mask_rcnn/_base_/optimizer_1x.yml',
5 | '../mask_rcnn/_base_/mask_rcnn_r50_fpn.yml',
6 | '../mask_rcnn/_base_/mask_fpn_reader.yml',
7 | ]
8 | weights: output/mask_rcnn_r50_fpn_gn_2x_coco/model_final
9 |
10 | MaskRCNN:
11 | backbone: ResNet
12 | neck: FPN
13 | rpn_head: RPNHead
14 | bbox_head: BBoxHead
15 | mask_head: MaskHead
16 | # post process
17 | bbox_post_process: BBoxPostProcess
18 | mask_post_process: MaskPostProcess
19 |
20 | FPN:
21 | out_channel: 256
22 | norm_type: gn
23 |
24 | BBoxHead:
25 | head: XConvNormHead
26 | roi_extractor:
27 | resolution: 7
28 | sampling_ratio: 0
29 | aligned: True
30 | bbox_assigner: BBoxAssigner
31 |
32 | XConvNormHead:
33 | num_convs: 4
34 | out_channel: 1024
35 | norm_type: gn
36 |
37 | MaskHead:
38 | head: MaskFeat
39 | roi_extractor:
40 | resolution: 14
41 | sampling_ratio: 0
42 | aligned: True
43 | mask_assigner: MaskAssigner
44 | share_bbox_feat: False
45 |
46 | MaskFeat:
47 | num_convs: 4
48 | out_channel: 256
49 | norm_type: gn
50 |
51 |
52 | epoch: 24
53 | LearningRate:
54 | base_lr: 0.01
55 | schedulers:
56 | - !PiecewiseDecay
57 | gamma: 0.1
58 | milestones: [16, 22]
59 | - !LinearWarmup
60 | start_factor: 0.1
61 | steps: 1000
62 |
--------------------------------------------------------------------------------
/configs/hrnet/_base_/faster_rcnn_hrnetv2p_w18.yml:
--------------------------------------------------------------------------------
1 | architecture: FasterRCNN
2 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/HRNet_W18_C_pretrained.pdparams
3 |
4 | FasterRCNN:
5 | backbone: HRNet
6 | neck: HRFPN
7 | rpn_head: RPNHead
8 | bbox_head: BBoxHead
9 | # post process
10 | bbox_post_process: BBoxPostProcess
11 |
12 | HRNet:
13 | width: 18
14 | freeze_at: 0
15 | return_idx: [0, 1, 2, 3]
16 |
17 | HRFPN:
18 | out_channel: 256
19 | share_conv: false
20 |
21 | RPNHead:
22 | anchor_generator:
23 | aspect_ratios: [0.5, 1.0, 2.0]
24 | anchor_sizes: [[32], [64], [128], [256], [512]]
25 | strides: [4, 8, 16, 32, 64]
26 | rpn_target_assign:
27 | batch_size_per_im: 256
28 | fg_fraction: 0.5
29 | negative_overlap: 0.3
30 | positive_overlap: 0.7
31 | use_random: True
32 | train_proposal:
33 | min_size: 0.0
34 | nms_thresh: 0.7
35 | pre_nms_top_n: 2000
36 | post_nms_top_n: 2000
37 | topk_after_collect: True
38 | test_proposal:
39 | min_size: 0.0
40 | nms_thresh: 0.7
41 | pre_nms_top_n: 1000
42 | post_nms_top_n: 1000
43 |
44 | BBoxHead:
45 | head: TwoFCHead
46 | roi_extractor:
47 | resolution: 7
48 | sampling_ratio: 0
49 | aligned: True
50 | bbox_assigner: BBoxAssigner
51 |
52 | BBoxAssigner:
53 | batch_size_per_im: 512
54 | bg_thresh: 0.5
55 | fg_thresh: 0.5
56 | fg_fraction: 0.25
57 | use_random: True
58 |
59 | TwoFCHead:
60 | out_channel: 1024
61 |
62 | BBoxPostProcess:
63 | decode: RCNNBox
64 | nms:
65 | name: MultiClassNMS
66 | keep_top_k: 100
67 | score_threshold: 0.05
68 | nms_threshold: 0.5
69 |
--------------------------------------------------------------------------------
/configs/hrnet/faster_rcnn_hrnetv2p_w18_1x_coco.yml:
--------------------------------------------------------------------------------
1 | _BASE_: [
2 | '../datasets/coco_detection.yml',
3 | './_base_/faster_rcnn_hrnetv2p_w18.yml',
4 | '../faster_rcnn/_base_/optimizer_1x.yml',
5 | '../faster_rcnn/_base_/faster_fpn_reader.yml',
6 | '../runtime.yml',
7 | ]
8 |
9 | weights: output/faster_rcnn_hrnetv2p_w18_1x_coco/model_final
10 | epoch: 12
11 |
12 | LearningRate:
13 | base_lr: 0.02
14 | schedulers:
15 | - !PiecewiseDecay
16 | gamma: 0.1
17 | milestones: [8, 11]
18 | - !LinearWarmup
19 | start_factor: 0.1
20 | steps: 1000
21 |
22 | TrainReader:
23 | batch_size: 2
24 |
--------------------------------------------------------------------------------
/configs/hrnet/faster_rcnn_hrnetv2p_w18_2x_coco.yml:
--------------------------------------------------------------------------------
1 | _BASE_: [
2 | '../datasets/coco_detection.yml',
3 | './_base_/faster_rcnn_hrnetv2p_w18.yml',
4 | '../faster_rcnn/_base_/optimizer_1x.yml',
5 | '../faster_rcnn/_base_/faster_fpn_reader.yml',
6 | '../runtime.yml',
7 | ]
8 |
9 | weights: output/faster_rcnn_hrnetv2p_w18_2x_coco/model_final
10 | epoch: 24
11 |
12 | LearningRate:
13 | base_lr: 0.02
14 | schedulers:
15 | - !PiecewiseDecay
16 | gamma: 0.1
17 | milestones: [16, 22]
18 | - !LinearWarmup
19 | start_factor: 0.1
20 | steps: 1000
21 |
22 | TrainReader:
23 | batch_size: 2
24 |
--------------------------------------------------------------------------------
/configs/mask_rcnn/_base_/mask_fpn_reader.yml:
--------------------------------------------------------------------------------
1 | worker_num: 2
2 | TrainReader:
3 | sample_transforms:
4 | - Decode: {}
5 | - RandomResize: {target_size: [[640, 1333], [672, 1333], [704, 1333], [736, 1333], [768, 1333], [800, 1333]], interp: 2, keep_ratio: True}
6 | - RandomFlip: {prob: 0.5}
7 | - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
8 | - Permute: {}
9 | batch_transforms:
10 | - PadBatch: {pad_to_stride: 32, pad_gt: true}
11 | batch_size: 1
12 | shuffle: true
13 | drop_last: true
14 |
15 | EvalReader:
16 | sample_transforms:
17 | - Decode: {}
18 | - Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
19 | - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
20 | - Permute: {}
21 | batch_transforms:
22 | - PadBatch: {pad_to_stride: 32, pad_gt: false}
23 | batch_size: 1
24 | shuffle: false
25 | drop_last: false
26 | drop_empty: false
27 |
28 |
29 | TestReader:
30 | sample_transforms:
31 | - Decode: {}
32 | - Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
33 | - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
34 | - Permute: {}
35 | batch_transforms:
36 | - PadBatch: {pad_to_stride: 32, pad_gt: false}
37 | batch_size: 1
38 | shuffle: false
39 | drop_last: false
40 |
--------------------------------------------------------------------------------
/configs/mask_rcnn/_base_/mask_reader.yml:
--------------------------------------------------------------------------------
1 | worker_num: 2
2 | TrainReader:
3 | sample_transforms:
4 | - Decode: {}
5 | - RandomResize: {target_size: [[640, 1333], [672, 1333], [704, 1333], [736, 1333], [768, 1333], [800, 1333]], interp: 2, keep_ratio: True}
6 | - RandomFlip: {prob: 0.5}
7 | - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
8 | - Permute: {}
9 | batch_transforms:
10 | - PadBatch: {pad_to_stride: -1, pad_gt: true}
11 | batch_size: 1
12 | shuffle: true
13 | drop_last: true
14 |
15 |
16 | EvalReader:
17 | sample_transforms:
18 | - Decode: {}
19 | - Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
20 | - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
21 | - Permute: {}
22 | batch_transforms:
23 | - PadBatch: {pad_to_stride: -1}
24 | batch_size: 1
25 | shuffle: false
26 | drop_last: false
27 | drop_empty: false
28 |
29 |
30 | TestReader:
31 | sample_transforms:
32 | - Decode: {}
33 | - Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
34 | - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
35 | - Permute: {}
36 | batch_transforms:
37 | - PadBatch: {pad_to_stride: -1}
38 | batch_size: 1
39 | shuffle: false
40 | drop_last: false
41 | drop_empty: false
42 |
--------------------------------------------------------------------------------
/configs/mask_rcnn/_base_/optimizer_1x.yml:
--------------------------------------------------------------------------------
1 | epoch: 12
2 |
3 | LearningRate:
4 | base_lr: 0.01
5 | schedulers:
6 | - !PiecewiseDecay
7 | gamma: 0.1
8 | milestones: [8, 11]
9 | - !LinearWarmup
10 | start_factor: 0.001
11 | steps: 1000
12 |
13 | OptimizerBuilder:
14 | optimizer:
15 | momentum: 0.9
16 | type: Momentum
17 | regularizer:
18 | factor: 0.0001
19 | type: L2
20 |
--------------------------------------------------------------------------------
/configs/mask_rcnn/mask_rcnn_r101_fpn_1x_coco.yml:
--------------------------------------------------------------------------------
1 | _BASE_: [
2 | 'mask_rcnn_r50_fpn_1x_coco.yml',
3 | ]
4 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet101_pretrained.pdparams
5 | weights: output/mask_rcnn_r101_fpn_1x_coco/model_final
6 |
7 | ResNet:
8 | # index 0 stands for res2
9 | depth: 101
10 | norm_type: bn
11 | freeze_at: 0
12 | return_idx: [0,1,2,3]
13 | num_stages: 4
14 |
--------------------------------------------------------------------------------
/configs/mask_rcnn/mask_rcnn_r101_vd_fpn_1x_coco.yml:
--------------------------------------------------------------------------------
1 | _BASE_: [
2 | 'mask_rcnn_r50_fpn_1x_coco.yml',
3 | ]
4 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet101_vd_pretrained.pdparams
5 | weights: output/mask_rcnn_r101_vd_fpn_1x_coco/model_final
6 |
7 | ResNet:
8 | # index 0 stands for res2
9 | depth: 101
10 | variant: d
11 | norm_type: bn
12 | freeze_at: 0
13 | return_idx: [0,1,2,3]
14 | num_stages: 4
15 |
--------------------------------------------------------------------------------
/configs/mask_rcnn/mask_rcnn_r50_1x_coco.yml:
--------------------------------------------------------------------------------
1 | _BASE_: [
2 | '../datasets/coco_instance.yml',
3 | '../runtime.yml',
4 | '_base_/optimizer_1x.yml',
5 | '_base_/mask_rcnn_r50.yml',
6 | '_base_/mask_reader.yml',
7 | ]
8 | weights: output/mask_rcnn_r50_1x_coco/model_final
9 |
--------------------------------------------------------------------------------
/configs/mask_rcnn/mask_rcnn_r50_2x_coco.yml:
--------------------------------------------------------------------------------
1 | _BASE_: [
2 | 'mask_rcnn_r50_1x_coco.yml',
3 | ]
4 | weights: output/mask_rcnn_r50_2x_coco/model_final
5 |
6 | epoch: 24
7 | LearningRate:
8 | base_lr: 0.01
9 | schedulers:
10 | - !PiecewiseDecay
11 | gamma: 0.1
12 | milestones: [16, 22]
13 | - !LinearWarmup
14 | start_factor: 0.3333333333333333
15 | steps: 500
16 |
--------------------------------------------------------------------------------
/configs/mask_rcnn/mask_rcnn_r50_fpn_1x_coco.yml:
--------------------------------------------------------------------------------
1 | _BASE_: [
2 | '../datasets/coco_instance.yml',
3 | '../runtime.yml',
4 | '_base_/optimizer_1x.yml',
5 | '_base_/mask_rcnn_r50_fpn.yml',
6 | '_base_/mask_fpn_reader.yml',
7 | ]
8 | weights: output/mask_rcnn_r50_fpn_1x_coco/model_final
9 |
--------------------------------------------------------------------------------
/configs/mask_rcnn/mask_rcnn_r50_fpn_2x_coco.yml:
--------------------------------------------------------------------------------
1 | _BASE_: [
2 | 'mask_rcnn_r50_fpn_1x_coco.yml',
3 | ]
4 | weights: output/mask_rcnn_r50_fpn_2x_coco/model_final
5 |
6 | epoch: 24
7 | LearningRate:
8 | base_lr: 0.01
9 | schedulers:
10 | - !PiecewiseDecay
11 | gamma: 0.1
12 | milestones: [16, 22]
13 | - !LinearWarmup
14 | start_factor: 0.3333333333333333
15 | steps: 500
16 |
--------------------------------------------------------------------------------
/configs/mask_rcnn/mask_rcnn_r50_vd_fpn_1x_coco.yml:
--------------------------------------------------------------------------------
1 | _BASE_: [
2 | 'mask_rcnn_r50_fpn_1x_coco.yml',
3 | ]
4 |
5 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_pretrained.pdparams
6 | weights: output/mask_rcnn_r50_vd_fpn_1x_coco/model_final
7 |
8 | ResNet:
9 | # index 0 stands for res2
10 | depth: 50
11 | variant: d
12 | norm_type: bn
13 | freeze_at: 0
14 | return_idx: [0,1,2,3]
15 | num_stages: 4
16 |
--------------------------------------------------------------------------------
/configs/mask_rcnn/mask_rcnn_r50_vd_fpn_2x_coco.yml:
--------------------------------------------------------------------------------
1 | _BASE_: [
2 | 'mask_rcnn_r50_fpn_1x_coco.yml',
3 | ]
4 |
5 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_pretrained.pdparams
6 | weights: output/mask_rcnn_r50_vd_fpn_2x_coco/model_final
7 |
8 | ResNet:
9 | # index 0 stands for res2
10 | depth: 50
11 | variant: d
12 | norm_type: bn
13 | freeze_at: 0
14 | return_idx: [0,1,2,3]
15 | num_stages: 4
16 |
17 | epoch: 24
18 | LearningRate:
19 | base_lr: 0.01
20 | schedulers:
21 | - !PiecewiseDecay
22 | gamma: 0.1
23 | milestones: [16, 22]
24 | - !LinearWarmup
25 | start_factor: 0.3333333333333333
26 | steps: 500
27 |
--------------------------------------------------------------------------------
/configs/mask_rcnn/mask_rcnn_r50_vd_fpn_ssld_1x_coco.yml:
--------------------------------------------------------------------------------
1 | _BASE_: [
2 | '../datasets/coco_instance.yml',
3 | '../runtime.yml',
4 | '_base_/optimizer_1x.yml',
5 | '_base_/mask_rcnn_r50_fpn.yml',
6 | '_base_/mask_fpn_reader.yml',
7 | ]
8 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_ssld_v2_pretrained.pdparams
9 | weights: output/mask_rcnn_r50_vd_fpn_ssld_1x_coco/model_final
10 |
11 | ResNet:
12 | depth: 50
13 | variant: d
14 | norm_type: bn
15 | freeze_at: 0
16 | return_idx: [0,1,2,3]
17 | num_stages: 4
18 | lr_mult_list: [0.05, 0.05, 0.1, 0.15]
19 |
20 | epoch: 12
21 | LearningRate:
22 | base_lr: 0.01
23 | schedulers:
24 | - !PiecewiseDecay
25 | gamma: 0.1
26 | milestones: [8, 11]
27 | - !LinearWarmup
28 | start_factor: 0.1
29 | steps: 1000
30 |
--------------------------------------------------------------------------------
/configs/mask_rcnn/mask_rcnn_r50_vd_fpn_ssld_2x_coco.yml:
--------------------------------------------------------------------------------
1 | _BASE_: [
2 | '../datasets/coco_instance.yml',
3 | '../runtime.yml',
4 | '_base_/optimizer_1x.yml',
5 | '_base_/mask_rcnn_r50_fpn.yml',
6 | '_base_/mask_fpn_reader.yml',
7 | ]
8 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_ssld_v2_pretrained.pdparams
9 | weights: output/mask_rcnn_r50_vd_fpn_ssld_2x_coco/model_final
10 |
11 | ResNet:
12 | depth: 50
13 | variant: d
14 | norm_type: bn
15 | freeze_at: 0
16 | return_idx: [0,1,2,3]
17 | num_stages: 4
18 | lr_mult_list: [0.05, 0.05, 0.1, 0.15]
19 |
20 | epoch: 24
21 | LearningRate:
22 | base_lr: 0.01
23 | schedulers:
24 | - !PiecewiseDecay
25 | gamma: 0.1
26 | milestones: [12, 22]
27 | - !LinearWarmup
28 | start_factor: 0.1
29 | steps: 1000
30 |
--------------------------------------------------------------------------------
/configs/mask_rcnn/mask_rcnn_x101_vd_64x4d_fpn_1x_coco.yml:
--------------------------------------------------------------------------------
1 | _BASE_: [
2 | 'mask_rcnn_r50_fpn_1x_coco.yml',
3 | ]
4 |
5 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNeXt101_vd_64x4d_pretrained.pdparams
6 | weights: output/mask_rcnn_x101_vd_64x4d_fpn_1x_coco/model_final
7 |
8 | ResNet:
9 | # for ResNeXt: groups, base_width, base_channels
10 | depth: 101
11 | variant: d
12 | groups: 64
13 | base_width: 4
14 | norm_type: bn
15 | freeze_at: 0
16 | return_idx: [0,1,2,3]
17 | num_stages: 4
18 |
19 | epoch: 12
20 | LearningRate:
21 | base_lr: 0.01
22 | schedulers:
23 | - !PiecewiseDecay
24 | gamma: 0.1
25 | milestones: [8, 11]
26 | - !LinearWarmup
27 | start_factor: 0.1
28 | steps: 1000
29 |
--------------------------------------------------------------------------------
/configs/mask_rcnn/mask_rcnn_x101_vd_64x4d_fpn_2x_coco.yml:
--------------------------------------------------------------------------------
1 | _BASE_: [
2 | 'mask_rcnn_r50_fpn_1x_coco.yml',
3 | ]
4 |
5 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNeXt101_vd_64x4d_pretrained.pdparams
6 | weights: output/mask_rcnn_x101_vd_64x4d_fpn_2x_coco/model_final
7 |
8 | ResNet:
9 | # for ResNeXt: groups, base_width, base_channels
10 | depth: 101
11 | variant: d
12 | groups: 64
13 | base_width: 4
14 | norm_type: bn
15 | freeze_at: 0
16 | return_idx: [0,1,2,3]
17 | num_stages: 4
18 |
19 | epoch: 24
20 | LearningRate:
21 | base_lr: 0.01
22 | schedulers:
23 | - !PiecewiseDecay
24 | gamma: 0.1
25 | milestones: [16, 22]
26 | - !LinearWarmup
27 | start_factor: 0.1
28 | steps: 1000
29 |
--------------------------------------------------------------------------------
/configs/pedestrian/README_cn.md:
--------------------------------------------------------------------------------
1 | [English](README.md) | 简体中文
2 | # 特色垂类检测模型
3 |
4 | 我们提供了针对不同场景的基于PaddlePaddle的检测模型,用户可以下载模型进行使用。
5 |
6 | | 任务 | 算法 | 精度(Box AP) | 下载 | 配置文件 |
7 | |:---------------------|:---------:|:------:| :---------------------------------------------------------------------------------: | :------:|
8 | | 行人检测 | YOLOv3 | 51.8 | [下载链接](https://paddledet.bj.bcebos.com/models/pedestrian_yolov3_darknet.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/pedestrian/pedestrian_yolov3_darknet.yml) |
9 |
10 | ## 行人检测(Pedestrian Detection)
11 |
12 | 行人检测的主要应用有智能监控。在监控场景中,大多是从公共区域的监控摄像头视角拍摄行人,获取图像后再进行行人检测。
13 |
14 | ### 1. 模型结构
15 |
16 | Backbone为Dacknet53的YOLOv3。
17 |
18 |
19 | ### 2. 训练参数配置
20 |
21 | PaddleDetection提供了使用COCO数据集对YOLOv3进行训练的参数配置文件[yolov3_darknet53_270e_coco.yml](https://github.com/PaddlePaddle/PaddleDetection/blob/release/2.0/configs/yolov3/yolov3_darknet53_270e_coco.yml),与之相比,在进行行人检测的模型训练时,我们对以下参数进行了修改:
22 |
23 | * num_classes: 1
24 | * dataset_dir: dataset/pedestrian
25 |
26 | ### 2. 精度指标
27 |
28 | 模型在我们针对监控场景的内部数据上精度指标为:
29 |
30 | IOU=.5时的AP为 0.792。
31 |
32 | IOU=.5-.95时的AP为 0.518。
33 |
34 | ### 3. 预测
35 |
36 | 用户可以使用我们训练好的模型进行行人检测:
37 |
38 | ```
39 | export CUDA_VISIBLE_DEVICES=0
40 | python -u tools/infer.py -c configs/pedestrian/pedestrian_yolov3_darknet.yml \
41 | -o weights=https://paddledet.bj.bcebos.com/models/pedestrian_yolov3_darknet.pdparams \
42 | --infer_dir configs/pedestrian/demo \
43 | --draw_threshold 0.3 \
44 | --output_dir configs/pedestrian/demo/output
45 | ```
46 |
47 | 预测结果示例:
48 |
49 | 
50 |
51 | 
52 |
--------------------------------------------------------------------------------
/configs/pedestrian/demo/001.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dddlli/Swin-Transformer-Object-Detection-PaddlePaddle/67244dd49a523146b4bd33e0ed5b97072c727609/configs/pedestrian/demo/001.png
--------------------------------------------------------------------------------
/configs/pedestrian/demo/002.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dddlli/Swin-Transformer-Object-Detection-PaddlePaddle/67244dd49a523146b4bd33e0ed5b97072c727609/configs/pedestrian/demo/002.png
--------------------------------------------------------------------------------
/configs/pedestrian/demo/003.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dddlli/Swin-Transformer-Object-Detection-PaddlePaddle/67244dd49a523146b4bd33e0ed5b97072c727609/configs/pedestrian/demo/003.png
--------------------------------------------------------------------------------
/configs/pedestrian/demo/004.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dddlli/Swin-Transformer-Object-Detection-PaddlePaddle/67244dd49a523146b4bd33e0ed5b97072c727609/configs/pedestrian/demo/004.png
--------------------------------------------------------------------------------
/configs/pedestrian/pedestrian.json:
--------------------------------------------------------------------------------
1 | {
2 | "images": [],
3 | "annotations": [],
4 | "categories": [
5 | {
6 | "supercategory": "component",
7 | "id": 1,
8 | "name": "pedestrian"
9 | }
10 | ]
11 | }
12 |
--------------------------------------------------------------------------------
/configs/pedestrian/pedestrian_yolov3_darknet.yml:
--------------------------------------------------------------------------------
1 | _BASE_: [
2 | '../datasets/coco_detection.yml',
3 | '../runtime.yml',
4 | '../yolov3/_base_/optimizer_270e.yml',
5 | '../yolov3/_base_/yolov3_darknet53.yml',
6 | '../yolov3/_base_/yolov3_reader.yml',
7 | ]
8 |
9 | snapshot_epoch: 5
10 | weights: https://paddledet.bj.bcebos.com/models/pedestrian_yolov3_darknet.pdparams
11 |
12 | num_classes: 1
13 |
14 | TrainDataset:
15 | !COCODataSet
16 | dataset_dir: dataset/pedestrian
17 | anno_path: annotations/instances_train2017.json
18 | image_dir: train2017
19 | data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd']
20 |
21 | EvalDataset:
22 | !COCODataSet
23 | dataset_dir: dataset/pedestrian
24 | anno_path: annotations/instances_val2017.json
25 | image_dir: val2017
26 |
27 | TestDataset:
28 | !ImageFolder
29 | anno_path: configs/pedestrian/pedestrian.json
30 |
--------------------------------------------------------------------------------
/configs/ppyolo/_base_/optimizer_1x.yml:
--------------------------------------------------------------------------------
1 | epoch: 405
2 |
3 | LearningRate:
4 | base_lr: 0.01
5 | schedulers:
6 | - !PiecewiseDecay
7 | gamma: 0.1
8 | milestones:
9 | - 243
10 | - 324
11 | - !LinearWarmup
12 | start_factor: 0.
13 | steps: 4000
14 |
15 | OptimizerBuilder:
16 | optimizer:
17 | momentum: 0.9
18 | type: Momentum
19 | regularizer:
20 | factor: 0.0005
21 | type: L2
22 |
--------------------------------------------------------------------------------
/configs/ppyolo/_base_/optimizer_2x.yml:
--------------------------------------------------------------------------------
1 | epoch: 811
2 |
3 | LearningRate:
4 | base_lr: 0.01
5 | schedulers:
6 | - !PiecewiseDecay
7 | gamma: 0.1
8 | milestones:
9 | - 649
10 | - 730
11 | - !LinearWarmup
12 | start_factor: 0.
13 | steps: 4000
14 |
15 | OptimizerBuilder:
16 | optimizer:
17 | momentum: 0.9
18 | type: Momentum
19 | regularizer:
20 | factor: 0.0005
21 | type: L2
22 |
--------------------------------------------------------------------------------
/configs/ppyolo/_base_/optimizer_365e.yml:
--------------------------------------------------------------------------------
1 | epoch: 365
2 |
3 | LearningRate:
4 | base_lr: 0.005
5 | schedulers:
6 | - !PiecewiseDecay
7 | gamma: 0.1
8 | milestones:
9 | - 243
10 | - !LinearWarmup
11 | start_factor: 0.
12 | steps: 4000
13 |
14 | OptimizerBuilder:
15 | clip_grad_by_norm: 35.
16 | optimizer:
17 | momentum: 0.9
18 | type: Momentum
19 | regularizer:
20 | factor: 0.0005
21 | type: L2
22 |
--------------------------------------------------------------------------------
/configs/ppyolo/_base_/optimizer_650e.yml:
--------------------------------------------------------------------------------
1 | epoch: 650
2 |
3 | LearningRate:
4 | base_lr: 0.005
5 | schedulers:
6 | - !PiecewiseDecay
7 | gamma: 0.1
8 | milestones:
9 | - 430
10 | - 540
11 | - 610
12 | - !LinearWarmup
13 | start_factor: 0.
14 | steps: 4000
15 |
16 | OptimizerBuilder:
17 | optimizer:
18 | momentum: 0.9
19 | type: Momentum
20 | regularizer:
21 | factor: 0.0005
22 | type: L2
23 |
--------------------------------------------------------------------------------
/configs/ppyolo/_base_/ppyolo_mbv3_large.yml:
--------------------------------------------------------------------------------
1 | architecture: YOLOv3
2 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/MobileNetV3_large_x1_0_ssld_pretrained.pdparams
3 | norm_type: sync_bn
4 | use_ema: true
5 | ema_decay: 0.9998
6 |
7 | YOLOv3:
8 | backbone: MobileNetV3
9 | neck: PPYOLOFPN
10 | yolo_head: YOLOv3Head
11 | post_process: BBoxPostProcess
12 |
13 | MobileNetV3:
14 | model_name: large
15 | scale: 1.
16 | with_extra_blocks: false
17 | extra_block_filters: []
18 | feature_maps: [13, 16]
19 |
20 | PPYOLOFPN:
21 | in_channels: [160, 368]
22 | coord_conv: true
23 | conv_block_num: 0
24 | spp: true
25 | drop_block: true
26 |
27 | YOLOv3Head:
28 | anchors: [[11, 18], [34, 47], [51, 126],
29 | [115, 71], [120, 195], [254, 235]]
30 | anchor_masks: [[3, 4, 5], [0, 1, 2]]
31 | loss: YOLOv3Loss
32 |
33 | YOLOv3Loss:
34 | ignore_thresh: 0.5
35 | downsample: [32, 16]
36 | label_smooth: false
37 | scale_x_y: 1.05
38 | iou_loss: IouLoss
39 |
40 | IouLoss:
41 | loss_weight: 2.5
42 | loss_square: true
43 |
44 | BBoxPostProcess:
45 | decode:
46 | name: YOLOBox
47 | conf_thresh: 0.005
48 | downsample_ratio: 32
49 | clip_bbox: true
50 | scale_x_y: 1.05
51 | nms:
52 | name: MultiClassNMS
53 | keep_top_k: 100
54 | nms_threshold: 0.45
55 | nms_top_k: 1000
56 | score_threshold: 0.005
57 |
--------------------------------------------------------------------------------
/configs/ppyolo/_base_/ppyolo_mbv3_small.yml:
--------------------------------------------------------------------------------
1 | architecture: YOLOv3
2 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/MobileNetV3_small_x1_0_ssld_pretrained.pdparams
3 | norm_type: sync_bn
4 | use_ema: true
5 | ema_decay: 0.9998
6 |
7 | YOLOv3:
8 | backbone: MobileNetV3
9 | neck: PPYOLOFPN
10 | yolo_head: YOLOv3Head
11 | post_process: BBoxPostProcess
12 |
13 | MobileNetV3:
14 | model_name: small
15 | scale: 1.
16 | with_extra_blocks: false
17 | extra_block_filters: []
18 | feature_maps: [9, 12]
19 |
20 | PPYOLOFPN:
21 | in_channels: [96, 304]
22 | coord_conv: true
23 | conv_block_num: 0
24 | spp: true
25 | drop_block: true
26 |
27 | YOLOv3Head:
28 | anchors: [[11, 18], [34, 47], [51, 126],
29 | [115, 71], [120, 195], [254, 235]]
30 | anchor_masks: [[3, 4, 5], [0, 1, 2]]
31 | loss: YOLOv3Loss
32 |
33 | YOLOv3Loss:
34 | ignore_thresh: 0.5
35 | downsample: [32, 16]
36 | label_smooth: false
37 | scale_x_y: 1.05
38 | iou_loss: IouLoss
39 |
40 | IouLoss:
41 | loss_weight: 2.5
42 | loss_square: true
43 |
44 | BBoxPostProcess:
45 | decode:
46 | name: YOLOBox
47 | conf_thresh: 0.005
48 | downsample_ratio: 32
49 | clip_bbox: true
50 | scale_x_y: 1.05
51 | nms:
52 | name: MultiClassNMS
53 | keep_top_k: 100
54 | nms_threshold: 0.45
55 | nms_top_k: 1000
56 | score_threshold: 0.005
57 |
--------------------------------------------------------------------------------
/configs/ppyolo/_base_/ppyolo_r18vd.yml:
--------------------------------------------------------------------------------
1 | architecture: YOLOv3
2 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet18_vd_pretrained.pdparams
3 | norm_type: sync_bn
4 | use_ema: true
5 | ema_decay: 0.9998
6 |
7 | YOLOv3:
8 | backbone: ResNet
9 | neck: PPYOLOFPN
10 | yolo_head: YOLOv3Head
11 | post_process: BBoxPostProcess
12 |
13 | ResNet:
14 | depth: 18
15 | variant: d
16 | return_idx: [2, 3]
17 | freeze_at: -1
18 | freeze_norm: false
19 | norm_decay: 0.
20 |
21 | PPYOLOFPN:
22 | drop_block: true
23 | block_size: 3
24 | keep_prob: 0.9
25 | conv_block_num: 0
26 |
27 | YOLOv3Head:
28 | anchor_masks: [[3, 4, 5], [0, 1, 2]]
29 | anchors: [[10, 14], [23, 27], [37, 58],
30 | [81, 82], [135, 169], [344, 319]]
31 | loss: YOLOv3Loss
32 |
33 | YOLOv3Loss:
34 | ignore_thresh: 0.7
35 | downsample: [32, 16]
36 | label_smooth: false
37 | scale_x_y: 1.05
38 | iou_loss: IouLoss
39 |
40 | IouLoss:
41 | loss_weight: 2.5
42 | loss_square: true
43 |
44 | BBoxPostProcess:
45 | decode:
46 | name: YOLOBox
47 | conf_thresh: 0.01
48 | downsample_ratio: 32
49 | clip_bbox: true
50 | scale_x_y: 1.05
51 | nms:
52 | name: MatrixNMS
53 | keep_top_k: 100
54 | score_threshold: 0.01
55 | post_threshold: 0.01
56 | nms_top_k: -1
57 | background_label: -1
58 |
--------------------------------------------------------------------------------
/configs/ppyolo/_base_/ppyolo_r50vd_dcn.yml:
--------------------------------------------------------------------------------
1 | architecture: YOLOv3
2 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_ssld_pretrained.pdparams
3 | norm_type: sync_bn
4 | use_ema: true
5 | ema_decay: 0.9998
6 |
7 | YOLOv3:
8 | backbone: ResNet
9 | neck: PPYOLOFPN
10 | yolo_head: YOLOv3Head
11 | post_process: BBoxPostProcess
12 |
13 | ResNet:
14 | depth: 50
15 | variant: d
16 | return_idx: [1, 2, 3]
17 | dcn_v2_stages: [3]
18 | freeze_at: -1
19 | freeze_norm: false
20 | norm_decay: 0.
21 |
22 | PPYOLOFPN:
23 | coord_conv: true
24 | drop_block: true
25 | block_size: 3
26 | keep_prob: 0.9
27 | spp: true
28 |
29 | YOLOv3Head:
30 | anchors: [[10, 13], [16, 30], [33, 23],
31 | [30, 61], [62, 45], [59, 119],
32 | [116, 90], [156, 198], [373, 326]]
33 | anchor_masks: [[6, 7, 8], [3, 4, 5], [0, 1, 2]]
34 | loss: YOLOv3Loss
35 | iou_aware: true
36 | iou_aware_factor: 0.4
37 |
38 | YOLOv3Loss:
39 | ignore_thresh: 0.7
40 | downsample: [32, 16, 8]
41 | label_smooth: false
42 | scale_x_y: 1.05
43 | iou_loss: IouLoss
44 | iou_aware_loss: IouAwareLoss
45 |
46 | IouLoss:
47 | loss_weight: 2.5
48 | loss_square: true
49 |
50 | IouAwareLoss:
51 | loss_weight: 1.0
52 |
53 | BBoxPostProcess:
54 | decode:
55 | name: YOLOBox
56 | conf_thresh: 0.01
57 | downsample_ratio: 32
58 | clip_bbox: true
59 | scale_x_y: 1.05
60 | nms:
61 | name: MatrixNMS
62 | keep_top_k: 100
63 | score_threshold: 0.01
64 | post_threshold: 0.01
65 | nms_top_k: -1
66 | background_label: -1
67 |
--------------------------------------------------------------------------------
/configs/ppyolo/_base_/ppyolo_reader.yml:
--------------------------------------------------------------------------------
1 | worker_num: 2
2 | TrainReader:
3 | inputs_def:
4 | num_max_boxes: 50
5 | sample_transforms:
6 | - Decode: {}
7 | - Mixup: {alpha: 1.5, beta: 1.5}
8 | - RandomDistort: {}
9 | - RandomExpand: {fill_value: [123.675, 116.28, 103.53]}
10 | - RandomCrop: {}
11 | - RandomFlip: {}
12 | batch_transforms:
13 | - BatchRandomResize: {target_size: [320, 352, 384, 416, 448, 480, 512, 544, 576, 608], random_size: True, random_interp: True, keep_ratio: False}
14 | - NormalizeBox: {}
15 | - PadBox: {num_max_boxes: 50}
16 | - BboxXYXY2XYWH: {}
17 | - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True}
18 | - Permute: {}
19 | - Gt2YoloTarget: {anchor_masks: [[6, 7, 8], [3, 4, 5], [0, 1, 2]], anchors: [[10, 13], [16, 30], [33, 23], [30, 61], [62, 45], [59, 119], [116, 90], [156, 198], [373, 326]], downsample_ratios: [32, 16, 8]}
20 | batch_size: 24
21 | shuffle: true
22 | drop_last: true
23 | mixup_epoch: 25000
24 | use_shared_memory: true
25 |
26 | EvalReader:
27 | sample_transforms:
28 | - Decode: {}
29 | - Resize: {target_size: [608, 608], keep_ratio: False, interp: 2}
30 | - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True}
31 | - Permute: {}
32 | batch_size: 8
33 | drop_empty: false
34 |
35 | TestReader:
36 | inputs_def:
37 | image_shape: [3, 608, 608]
38 | sample_transforms:
39 | - Decode: {}
40 | - Resize: {target_size: [608, 608], keep_ratio: False, interp: 2}
41 | - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True}
42 | - Permute: {}
43 | batch_size: 1
44 |
--------------------------------------------------------------------------------
/configs/ppyolo/_base_/ppyolo_tiny.yml:
--------------------------------------------------------------------------------
1 | architecture: YOLOv3
2 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/MobileNetV3_large_x0_5_pretrained.pdparams
3 | norm_type: sync_bn
4 | use_ema: true
5 | ema_decay: 0.9998
6 |
7 | YOLOv3:
8 | backbone: MobileNetV3
9 | neck: PPYOLOTinyFPN
10 | yolo_head: YOLOv3Head
11 | post_process: BBoxPostProcess
12 |
13 | MobileNetV3:
14 | model_name: large
15 | scale: .5
16 | with_extra_blocks: false
17 | extra_block_filters: []
18 | feature_maps: [7, 13, 16]
19 |
20 | PPYOLOTinyFPN:
21 | detection_block_channels: [160, 128, 96]
22 | spp: true
23 | drop_block: true
24 |
25 | YOLOv3Head:
26 | anchors: [[10, 15], [24, 36], [72, 42],
27 | [35, 87], [102, 96], [60, 170],
28 | [220, 125], [128, 222], [264, 266]]
29 | anchor_masks: [[6, 7, 8], [3, 4, 5], [0, 1, 2]]
30 | loss: YOLOv3Loss
31 |
32 | YOLOv3Loss:
33 | ignore_thresh: 0.5
34 | downsample: [32, 16, 8]
35 | label_smooth: false
36 | scale_x_y: 1.05
37 | iou_loss: IouLoss
38 |
39 | IouLoss:
40 | loss_weight: 2.5
41 | loss_square: true
42 |
43 | BBoxPostProcess:
44 | decode:
45 | name: YOLOBox
46 | conf_thresh: 0.005
47 | downsample_ratio: 32
48 | clip_bbox: true
49 | scale_x_y: 1.05
50 | nms:
51 | name: MultiClassNMS
52 | keep_top_k: 100
53 | nms_threshold: 0.45
54 | nms_top_k: 1000
55 | score_threshold: 0.005
56 |
--------------------------------------------------------------------------------
/configs/ppyolo/_base_/ppyolo_tiny_reader.yml:
--------------------------------------------------------------------------------
1 | worker_num: 4
2 | TrainReader:
3 | inputs_def:
4 | num_max_boxes: 100
5 | sample_transforms:
6 | - Decode: {}
7 | - Mixup: {alpha: 1.5, beta: 1.5}
8 | - RandomDistort: {}
9 | - RandomExpand: {fill_value: [123.675, 116.28, 103.53]}
10 | - RandomCrop: {}
11 | - RandomFlip: {}
12 | batch_transforms:
13 | - BatchRandomResize: {target_size: [192, 224, 256, 288, 320, 352, 384, 416, 448, 480, 512], random_size: True, random_interp: True, keep_ratio: False}
14 | - NormalizeBox: {}
15 | - PadBox: {num_max_boxes: 100}
16 | - BboxXYXY2XYWH: {}
17 | - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True}
18 | - Permute: {}
19 | - Gt2YoloTarget: {anchor_masks: [[6, 7, 8], [3, 4, 5], [0, 1, 2]], anchors: [[10, 15], [24, 36], [72, 42], [35, 87], [102, 96], [60, 170], [220, 125], [128, 222], [264, 266]], downsample_ratios: [32, 16, 8]}
20 | batch_size: 32
21 | shuffle: true
22 | drop_last: true
23 | mixup_epoch: 500
24 | use_shared_memory: true
25 |
26 | EvalReader:
27 | sample_transforms:
28 | - Decode: {}
29 | - Resize: {target_size: [320, 320], keep_ratio: False, interp: 2}
30 | - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True}
31 | - Permute: {}
32 | batch_size: 8
33 | drop_empty: false
34 |
35 | TestReader:
36 | inputs_def:
37 | image_shape: [3, 320, 320]
38 | sample_transforms:
39 | - Decode: {}
40 | - Resize: {target_size: [320, 320], keep_ratio: False, interp: 2}
41 | - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True}
42 | - Permute: {}
43 | batch_size: 1
44 |
--------------------------------------------------------------------------------
/configs/ppyolo/_base_/ppyolov2_r50vd_dcn.yml:
--------------------------------------------------------------------------------
1 | architecture: YOLOv3
2 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_ssld_pretrained.pdparams
3 | norm_type: sync_bn
4 | use_ema: true
5 | ema_decay: 0.9998
6 |
7 | YOLOv3:
8 | backbone: ResNet
9 | neck: PPYOLOPAN
10 | yolo_head: YOLOv3Head
11 | post_process: BBoxPostProcess
12 |
13 | ResNet:
14 | depth: 50
15 | variant: d
16 | return_idx: [1, 2, 3]
17 | dcn_v2_stages: [3]
18 | freeze_at: -1
19 | freeze_norm: false
20 | norm_decay: 0.
21 |
22 | PPYOLOPAN:
23 | drop_block: true
24 | block_size: 3
25 | keep_prob: 0.9
26 | spp: true
27 |
28 | YOLOv3Head:
29 | anchors: [[10, 13], [16, 30], [33, 23],
30 | [30, 61], [62, 45], [59, 119],
31 | [116, 90], [156, 198], [373, 326]]
32 | anchor_masks: [[6, 7, 8], [3, 4, 5], [0, 1, 2]]
33 | loss: YOLOv3Loss
34 | iou_aware: true
35 | iou_aware_factor: 0.5
36 |
37 | YOLOv3Loss:
38 | ignore_thresh: 0.7
39 | downsample: [32, 16, 8]
40 | label_smooth: false
41 | scale_x_y: 1.05
42 | iou_loss: IouLoss
43 | iou_aware_loss: IouAwareLoss
44 |
45 | IouLoss:
46 | loss_weight: 2.5
47 | loss_square: true
48 |
49 | IouAwareLoss:
50 | loss_weight: 1.0
51 |
52 | BBoxPostProcess:
53 | decode:
54 | name: YOLOBox
55 | conf_thresh: 0.01
56 | downsample_ratio: 32
57 | clip_bbox: true
58 | scale_x_y: 1.05
59 | nms:
60 | name: MatrixNMS
61 | keep_top_k: 100
62 | score_threshold: 0.01
63 | post_threshold: 0.01
64 | nms_top_k: -1
65 | background_label: -1
66 |
--------------------------------------------------------------------------------
/configs/ppyolo/_base_/ppyolov2_reader.yml:
--------------------------------------------------------------------------------
1 | worker_num: 8
2 | TrainReader:
3 | inputs_def:
4 | num_max_boxes: 100
5 | sample_transforms:
6 | - Decode: {}
7 | - Mixup: {alpha: 1.5, beta: 1.5}
8 | - RandomDistort: {}
9 | - RandomExpand: {fill_value: [123.675, 116.28, 103.53]}
10 | - RandomCrop: {}
11 | - RandomFlip: {}
12 | batch_transforms:
13 | - BatchRandomResize: {target_size: [320, 352, 384, 416, 448, 480, 512, 544, 576, 608, 640, 672, 704, 736, 768], random_size: True, random_interp: True, keep_ratio: False}
14 | - NormalizeBox: {}
15 | - PadBox: {num_max_boxes: 100}
16 | - BboxXYXY2XYWH: {}
17 | - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True}
18 | - Permute: {}
19 | - Gt2YoloTarget: {anchor_masks: [[6, 7, 8], [3, 4, 5], [0, 1, 2]], anchors: [[10, 13], [16, 30], [33, 23], [30, 61], [62, 45], [59, 119], [116, 90], [156, 198], [373, 326]], downsample_ratios: [32, 16, 8]}
20 | batch_size: 12
21 | shuffle: true
22 | drop_last: true
23 | mixup_epoch: 25000
24 | use_shared_memory: true
25 |
26 | EvalReader:
27 | sample_transforms:
28 | - Decode: {}
29 | - Resize: {target_size: [640, 640], keep_ratio: False, interp: 2}
30 | - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True}
31 | - Permute: {}
32 | batch_size: 8
33 | drop_empty: false
34 |
35 | TestReader:
36 | inputs_def:
37 | image_shape: [3, 640, 640]
38 | sample_transforms:
39 | - Decode: {}
40 | - Resize: {target_size: [640, 640], keep_ratio: False, interp: 2}
41 | - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True}
42 | - Permute: {}
43 | batch_size: 1
44 |
--------------------------------------------------------------------------------
/configs/ppyolo/ppyolo_r50vd_dcn_1x_coco.yml:
--------------------------------------------------------------------------------
1 | _BASE_: [
2 | '../datasets/coco_detection.yml',
3 | '../runtime.yml',
4 | './_base_/ppyolo_r50vd_dcn.yml',
5 | './_base_/optimizer_1x.yml',
6 | './_base_/ppyolo_reader.yml',
7 | ]
8 |
9 | snapshot_epoch: 16
10 | weights: output/ppyolo_r50vd_dcn_1x_coco/model_final
11 |
--------------------------------------------------------------------------------
/configs/ppyolo/ppyolo_r50vd_dcn_1x_minicoco.yml:
--------------------------------------------------------------------------------
1 | _BASE_: [
2 | '../datasets/coco_detection.yml',
3 | '../runtime.yml',
4 | './_base_/ppyolo_r50vd_dcn.yml',
5 | './_base_/optimizer_1x.yml',
6 | './_base_/ppyolo_reader.yml',
7 | ]
8 |
9 | snapshot_epoch: 8
10 | use_ema: true
11 | weights: output/ppyolo_r50vd_dcn_1x_minicoco/model_final
12 |
13 | TrainReader:
14 | batch_size: 12
15 |
16 | TrainDataset:
17 | !COCODataSet
18 | image_dir: train2017
19 | # refer to https://github.com/giddyyupp/coco-minitrain
20 | anno_path: annotations/instances_minitrain2017.json
21 | dataset_dir: dataset/coco
22 | data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd']
23 |
24 | epoch: 192
25 |
26 | LearningRate:
27 | base_lr: 0.005
28 | schedulers:
29 | - !PiecewiseDecay
30 | gamma: 0.1
31 | milestones:
32 | - 153
33 | - 173
34 | - !LinearWarmup
35 | start_factor: 0.
36 | steps: 4000
37 |
38 | OptimizerBuilder:
39 | optimizer:
40 | momentum: 0.9
41 | type: Momentum
42 | regularizer:
43 | factor: 0.0005
44 | type: L2
45 |
--------------------------------------------------------------------------------
/configs/ppyolo/ppyolo_r50vd_dcn_2x_coco.yml:
--------------------------------------------------------------------------------
1 | _BASE_: [
2 | '../datasets/coco_detection.yml',
3 | '../runtime.yml',
4 | './_base_/ppyolo_r50vd_dcn.yml',
5 | './_base_/optimizer_2x.yml',
6 | './_base_/ppyolo_reader.yml',
7 | ]
8 |
9 | snapshot_epoch: 16
10 | weights: output/ppyolo_r50vd_dcn_2x_coco/model_final
11 |
--------------------------------------------------------------------------------
/configs/ppyolo/ppyolo_r50vd_dcn_voc.yml:
--------------------------------------------------------------------------------
1 | _BASE_: [
2 | '../datasets/voc.yml',
3 | '../runtime.yml',
4 | './_base_/ppyolo_r50vd_dcn.yml',
5 | './_base_/optimizer_1x.yml',
6 | './_base_/ppyolo_reader.yml',
7 | ]
8 |
9 | snapshot_epoch: 83
10 | weights: output/ppyolo_r50vd_dcn_voc/model_final
11 |
12 | TrainReader:
13 | mixup_epoch: 350
14 | batch_size: 12
15 |
16 | EvalReader:
17 | batch_transforms:
18 | - PadBatch: {pad_gt: True}
19 |
20 | epoch: 583
21 |
22 | LearningRate:
23 | base_lr: 0.00333
24 | schedulers:
25 | - !PiecewiseDecay
26 | gamma: 0.1
27 | milestones:
28 | - 466
29 | - 516
30 | - !LinearWarmup
31 | start_factor: 0.
32 | steps: 4000
33 |
34 | OptimizerBuilder:
35 | optimizer:
36 | momentum: 0.9
37 | type: Momentum
38 | regularizer:
39 | factor: 0.0005
40 | type: L2
41 |
--------------------------------------------------------------------------------
/configs/ppyolo/ppyolo_test.yml:
--------------------------------------------------------------------------------
1 | _BASE_: [
2 | '../datasets/coco_detection.yml',
3 | '../runtime.yml',
4 | './_base_/ppyolo_r50vd_dcn.yml',
5 | './_base_/ppyolo_1x.yml',
6 | './_base_/ppyolo_reader.yml',
7 | ]
8 |
9 | snapshot_epoch: 16
10 |
11 | EvalDataset:
12 | !COCODataSet
13 | image_dir: test2017
14 | anno_path: annotations/image_info_test-dev2017.json
15 | dataset_dir: dataset/coco
16 |
--------------------------------------------------------------------------------
/configs/ppyolo/ppyolo_tiny_650e_coco.yml:
--------------------------------------------------------------------------------
1 | _BASE_: [
2 | '../datasets/coco_detection.yml',
3 | '../runtime.yml',
4 | './_base_/ppyolo_tiny.yml',
5 | './_base_/optimizer_650e.yml',
6 | './_base_/ppyolo_tiny_reader.yml',
7 | ]
8 |
9 | snapshot_epoch: 1
10 | weights: output/ppyolo_tiny_650e_coco/model_final
11 |
--------------------------------------------------------------------------------
/configs/ppyolo/ppyolov2_r101vd_dcn_365e_coco.yml:
--------------------------------------------------------------------------------
1 | _BASE_: [
2 | '../datasets/coco_detection.yml',
3 | '../runtime.yml',
4 | './_base_/ppyolov2_r50vd_dcn.yml',
5 | './_base_/optimizer_365e.yml',
6 | './_base_/ppyolov2_reader.yml',
7 | ]
8 |
9 | snapshot_epoch: 8
10 | weights: output/ppyolov2_r101vd_dcn_365e_coco/model_final
11 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet101_vd_ssld_pretrained.pdparams
12 |
13 | ResNet:
14 | depth: 101
15 | variant: d
16 | return_idx: [1, 2, 3]
17 | dcn_v2_stages: [3]
18 | freeze_at: -1
19 | freeze_norm: false
20 | norm_decay: 0.
21 |
--------------------------------------------------------------------------------
/configs/ppyolo/ppyolov2_r50vd_dcn_365e_coco.yml:
--------------------------------------------------------------------------------
1 | _BASE_: [
2 | '../datasets/coco_detection.yml',
3 | '../runtime.yml',
4 | './_base_/ppyolov2_r50vd_dcn.yml',
5 | './_base_/optimizer_365e.yml',
6 | './_base_/ppyolov2_reader.yml',
7 | ]
8 |
9 | snapshot_epoch: 8
10 | weights: output/ppyolov2_r50vd_dcn_365e_coco/model_final
11 |
--------------------------------------------------------------------------------
/configs/rcnn_enhance/README.md:
--------------------------------------------------------------------------------
1 | ## 服务器端实用目标检测方案
2 |
3 | ### 简介
4 |
5 | * 近年来,学术界和工业界广泛关注图像中目标检测任务。基于[PaddleClas](https://github.com/PaddlePaddle/PaddleClas)中SSLD蒸馏方案训练得到的ResNet50_vd预训练模型(ImageNet1k验证集上Top1 Acc为82.39%),结合PaddleDetection中的丰富算子,飞桨提供了一种面向服务器端实用的目标检测方案PSS-DET(Practical Server Side Detection)。基于COCO2017目标检测数据集,V100单卡预测速度为为61FPS时,COCO mAP可达41.2%。
6 |
7 |
8 | ### 模型库
9 |
10 | | 骨架网络 | 网络类型 | 每张GPU图片个数 | 学习率策略 |推理时间(fps) | Box AP | Mask AP | 下载 | 配置文件 |
11 | | :---------------------- | :-------------: | :-------: | :-----: | :------------: | :----: | :-----: | :-------------: | :-----: |
12 | | ResNet50-vd-FPN-Dcnv2 | Faster | 2 | 3x | 61.425 | 41.5 | - | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_enhance_3x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/rcnn_enhance/faster_rcnn_enhance_3x_coco.yml) |
13 |
--------------------------------------------------------------------------------
/configs/rcnn_enhance/_base_/faster_rcnn_enhance.yml:
--------------------------------------------------------------------------------
1 | architecture: FasterRCNN
2 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_ssld_v2_pretrained.pdparams
3 |
4 | FasterRCNN:
5 | backbone: ResNet
6 | neck: FPN
7 | rpn_head: RPNHead
8 | bbox_head: BBoxHead
9 | # post process
10 | bbox_post_process: BBoxPostProcess
11 |
12 |
13 | ResNet:
14 | # index 0 stands for res2
15 | depth: 50
16 | norm_type: bn
17 | variant: d
18 | freeze_at: 0
19 | return_idx: [0,1,2,3]
20 | num_stages: 4
21 | dcn_v2_stages: [1,2,3]
22 | lr_mult_list: [0.05, 0.05, 0.1, 0.15]
23 |
24 | FPN:
25 | in_channels: [256, 512, 1024, 2048]
26 | out_channel: 64
27 |
28 | RPNHead:
29 | anchor_generator:
30 | aspect_ratios: [0.5, 1.0, 2.0]
31 | anchor_sizes: [[32], [64], [128], [256], [512]]
32 | strides: [4, 8, 16, 32, 64]
33 | rpn_target_assign:
34 | batch_size_per_im: 256
35 | fg_fraction: 0.5
36 | negative_overlap: 0.3
37 | positive_overlap: 0.7
38 | use_random: True
39 | train_proposal:
40 | min_size: 0.0
41 | nms_thresh: 0.7
42 | pre_nms_top_n: 2000
43 | post_nms_top_n: 2000
44 | topk_after_collect: True
45 | test_proposal:
46 | min_size: 0.0
47 | nms_thresh: 0.7
48 | pre_nms_top_n: 500
49 | post_nms_top_n: 300
50 |
51 |
52 | BBoxHead:
53 | head: TwoFCHead
54 | roi_extractor:
55 | resolution: 7
56 | sampling_ratio: 0
57 | aligned: True
58 | bbox_assigner: BBoxLibraAssigner
59 | bbox_loss: DIouLoss
60 |
61 | TwoFCHead:
62 | out_channel: 1024
63 |
64 | BBoxLibraAssigner:
65 | batch_size_per_im: 512
66 | bg_thresh: 0.5
67 | fg_thresh: 0.5
68 | fg_fraction: 0.25
69 | use_random: True
70 |
71 | DIouLoss:
72 | loss_weight: 10.0
73 | use_complete_iou_loss: true
74 |
75 | BBoxPostProcess:
76 | decode: RCNNBox
77 | nms:
78 | name: MultiClassNMS
79 | keep_top_k: 100
80 | score_threshold: 0.05
81 | nms_threshold: 0.5
82 |
--------------------------------------------------------------------------------
/configs/rcnn_enhance/_base_/faster_rcnn_enhance_reader.yml:
--------------------------------------------------------------------------------
1 | worker_num: 2
2 | TrainReader:
3 | sample_transforms:
4 | - Decode: {}
5 | - RandomResize: {target_size: [[384,1000], [416,1000], [448,1000], [480,1000], [512,1000], [544,1000], [576,1000], [608,1000], [640,1000], [672,1000]], interp: 2, keep_ratio: True}
6 | - RandomFlip: {prob: 0.5}
7 | - AutoAugment: {autoaug_type: v1}
8 | - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
9 | - Permute: {}
10 | batch_transforms:
11 | - PadBatch: {pad_to_stride: 32, pad_gt: true}
12 | batch_size: 2
13 | shuffle: true
14 | drop_last: true
15 |
16 |
17 | EvalReader:
18 | sample_transforms:
19 | - Decode: {}
20 | - Resize: {interp: 2, target_size: [640, 640], keep_ratio: True}
21 | - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
22 | - Permute: {}
23 | batch_transforms:
24 | - PadBatch: {pad_to_stride: 32, pad_gt: false}
25 | batch_size: 1
26 | shuffle: false
27 | drop_last: false
28 | drop_empty: false
29 |
30 |
31 | TestReader:
32 | sample_transforms:
33 | - Decode: {}
34 | - Resize: {interp: 2, target_size: [640, 640], keep_ratio: True}
35 | - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
36 | - Permute: {}
37 | batch_transforms:
38 | - PadBatch: {pad_to_stride: 32, pad_gt: false}
39 | batch_size: 1
40 | shuffle: false
41 | drop_last: false
42 |
--------------------------------------------------------------------------------
/configs/rcnn_enhance/_base_/optimizer_3x.yml:
--------------------------------------------------------------------------------
1 | epoch: 36
2 |
3 | LearningRate:
4 | base_lr: 0.02
5 | schedulers:
6 | - !PiecewiseDecay
7 | gamma: 0.1
8 | milestones: [24, 33]
9 | - !LinearWarmup
10 | start_factor: 0.
11 | steps: 1000
12 |
13 | OptimizerBuilder:
14 | optimizer:
15 | momentum: 0.9
16 | type: Momentum
17 | regularizer:
18 | factor: 0.0001
19 | type: L2
20 |
--------------------------------------------------------------------------------
/configs/rcnn_enhance/faster_rcnn_enhance_3x_coco.yml:
--------------------------------------------------------------------------------
1 | _BASE_: [
2 | '../datasets/coco_detection.yml',
3 | '../runtime.yml',
4 | '_base_/optimizer_3x.yml',
5 | '_base_/faster_rcnn_enhance.yml',
6 | '_base_/faster_rcnn_enhance_reader.yml',
7 | ]
8 | weights: output/faster_rcnn_enhance_r50_3x_coco/model_final
9 |
--------------------------------------------------------------------------------
/configs/runtime.yml:
--------------------------------------------------------------------------------
1 | use_gpu: true
2 | log_iter: 20
3 | save_dir: output
4 | snapshot_epoch: 1
5 |
--------------------------------------------------------------------------------
/configs/slim/distill/README.md:
--------------------------------------------------------------------------------
1 | # Distillation(蒸馏)
2 |
3 | ## YOLOv3模型蒸馏
4 | 以YOLOv3-MobileNetV1为例,使用YOLOv3-ResNet34作为蒸馏训练的teacher网络, 对YOLOv3-MobileNetV1结构的student网络进行蒸馏。
5 | COCO数据集作为目标检测任务的训练目标难度更大,意味着teacher网络会预测出更多的背景bbox,如果直接用teacher的预测输出作为student学习的`soft label`会有严重的类别不均衡问题。解决这个问题需要引入新的方法,详细背景请参考论文:[Object detection at 200 Frames Per Second](https://arxiv.org/abs/1805.06361)。
6 | 为了确定蒸馏的对象,我们首先需要找到student和teacher网络得到的`x,y,w,h,cls,objness`等Tensor,用teacher得到的结果指导student训练。具体实现可参考[代码](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/ppdet/slim/distill.py)
7 |
8 | ## Citations
9 | ```
10 | @article{mehta2018object,
11 | title={Object detection at 200 Frames Per Second},
12 | author={Rakesh Mehta and Cemalettin Ozturk},
13 | year={2018},
14 | eprint={1805.06361},
15 | archivePrefix={arXiv},
16 | primaryClass={cs.CV}
17 | }
18 | ```
19 |
--------------------------------------------------------------------------------
/configs/slim/distill/yolov3_mobilenet_v1_coco_distill.yml:
--------------------------------------------------------------------------------
1 | _BASE_: [
2 | '../../yolov3/yolov3_r34_270e_coco.yml',
3 | ]
4 |
5 | pretrain_weights: https://paddledet.bj.bcebos.com/models/yolov3_r34_270e_coco.pdparams
6 |
7 |
8 | slim: Distill
9 | distill_loss: DistillYOLOv3Loss
10 |
11 | DistillYOLOv3Loss:
12 | weight: 1000
13 |
--------------------------------------------------------------------------------
/configs/slim/extensions/yolov3_mobilenet_v1_coco_distill_prune.yml:
--------------------------------------------------------------------------------
1 | _BASE_: [
2 | '../../yolov3/yolov3_r34_270e_coco.yml',
3 | ]
4 |
5 | pretrain_weights: https://paddledet.bj.bcebos.com/models/yolov3_r34_270e_coco.pdparams
6 |
7 | slim: DistillPrune
8 |
9 | distill_loss: DistillYOLOv3Loss
10 |
11 | DistillYOLOv3Loss:
12 | weight: 1000
13 |
14 | pruner: Pruner
15 |
16 | Pruner:
17 | criterion: l1_norm
18 | pruned_params: ['conv2d_27.w_0', 'conv2d_28.w_0', 'conv2d_29.w_0',
19 | 'conv2d_30.w_0', 'conv2d_31.w_0', 'conv2d_32.w_0',
20 | 'conv2d_34.w_0', 'conv2d_35.w_0', 'conv2d_36.w_0',
21 | 'conv2d_37.w_0', 'conv2d_38.w_0', 'conv2d_39.w_0',
22 | 'conv2d_41.w_0', 'conv2d_42.w_0', 'conv2d_43.w_0',
23 | 'conv2d_44.w_0', 'conv2d_45.w_0', 'conv2d_46.w_0']
24 | pruned_ratios: [0.5,0.5,0.5,0.5,0.5,0.5,0.7,0.7,0.7,0.7,0.7,0.7,0.8,0.8,0.8,0.8,0.8,0.8]
25 |
--------------------------------------------------------------------------------
/configs/slim/prune/yolov3_prune_fpgm.yml:
--------------------------------------------------------------------------------
1 | # Weights of yolov3_mobilenet_v1_voc
2 | pretrain_weights: https://paddledet.bj.bcebos.com/models/yolov3_mobilenet_v1_270e_voc.pdparams
3 | slim: Pruner
4 |
5 | Pruner:
6 | criterion: fpgm
7 | pruned_params: ['conv2d_27.w_0', 'conv2d_28.w_0', 'conv2d_29.w_0',
8 | 'conv2d_30.w_0', 'conv2d_31.w_0', 'conv2d_32.w_0',
9 | 'conv2d_34.w_0', 'conv2d_35.w_0', 'conv2d_36.w_0',
10 | 'conv2d_37.w_0', 'conv2d_38.w_0', 'conv2d_39.w_0',
11 | 'conv2d_41.w_0', 'conv2d_42.w_0', 'conv2d_43.w_0',
12 | 'conv2d_44.w_0', 'conv2d_45.w_0', 'conv2d_46.w_0']
13 | pruned_ratios: [0.1,0.2,0.2,0.2,0.2,0.1,0.2,0.3,0.3,0.3,0.2,0.1,0.3,0.4,0.4,0.4,0.4,0.3]
14 | print_params: False
15 |
--------------------------------------------------------------------------------
/configs/slim/prune/yolov3_prune_l1_norm.yml:
--------------------------------------------------------------------------------
1 | # Weights of yolov3_mobilenet_v1_voc
2 | pretrain_weights: https://paddledet.bj.bcebos.com/models/yolov3_mobilenet_v1_270e_voc.pdparams
3 | slim: Pruner
4 |
5 | Pruner:
6 | criterion: l1_norm
7 | pruned_params: ['conv2d_27.w_0', 'conv2d_28.w_0', 'conv2d_29.w_0',
8 | 'conv2d_30.w_0', 'conv2d_31.w_0', 'conv2d_32.w_0',
9 | 'conv2d_34.w_0', 'conv2d_35.w_0', 'conv2d_36.w_0',
10 | 'conv2d_37.w_0', 'conv2d_38.w_0', 'conv2d_39.w_0',
11 | 'conv2d_41.w_0', 'conv2d_42.w_0', 'conv2d_43.w_0',
12 | 'conv2d_44.w_0', 'conv2d_45.w_0', 'conv2d_46.w_0']
13 | pruned_ratios: [0.1,0.2,0.2,0.2,0.2,0.1,0.2,0.3,0.3,0.3,0.2,0.1,0.3,0.4,0.4,0.4,0.4,0.3]
14 | print_params: False
15 |
--------------------------------------------------------------------------------
/configs/slim/quant/mask_rcnn_r50_fpn_1x_qat.yml:
--------------------------------------------------------------------------------
1 | pretrain_weights: https://paddledet.bj.bcebos.com/models/mask_rcnn_r50_fpn_1x_coco.pdparams
2 | slim: QAT
3 |
4 | QAT:
5 | quant_config: {
6 | 'weight_quantize_type': 'channel_wise_abs_max', 'activation_quantize_type': 'moving_average_abs_max',
7 | 'weight_bits': 8, 'activation_bits': 8, 'dtype': 'int8', 'window_size': 10000, 'moving_rate': 0.9,
8 | 'quantizable_layer_type': ['Conv2D', 'Linear']}
9 | print_model: True
10 |
11 |
12 | epoch: 5
13 |
14 | LearningRate:
15 | base_lr: 0.001
16 | schedulers:
17 | - !PiecewiseDecay
18 | gamma: 0.1
19 | milestones: [3, 4]
20 | - !LinearWarmup
21 | start_factor: 0.001
22 | steps: 100
23 |
--------------------------------------------------------------------------------
/configs/slim/quant/ssd_mobilenet_v1_qat.yml:
--------------------------------------------------------------------------------
1 | pretrain_weights: https://paddlemodels.bj.bcebos.com/object_detection/dygraph/ssd_mobilenet_v1_300_120e_voc.pdparams
2 | slim: QAT
3 |
4 | QAT:
5 | quant_config: {
6 | 'weight_quantize_type': 'channel_wise_abs_max', 'activation_quantize_type': 'moving_average_abs_max',
7 | 'weight_bits': 8, 'activation_bits': 8, 'dtype': 'int8', 'window_size': 10000, 'moving_rate': 0.9,
8 | 'quantizable_layer_type': ['Conv2D', 'Linear']}
9 | print_model: True
10 |
--------------------------------------------------------------------------------
/configs/slim/quant/yolov3_darknet_qat.yml:
--------------------------------------------------------------------------------
1 | pretrain_weights: https://paddledet.bj.bcebos.com/models/yolov3_darknet53_270e_coco.pdparams
2 | slim: QAT
3 |
4 | QAT:
5 | quant_config: {
6 | 'weight_quantize_type': 'channel_wise_abs_max', 'activation_quantize_type': 'moving_average_abs_max',
7 | 'weight_bits': 8, 'activation_bits': 8, 'dtype': 'int8', 'window_size': 10000, 'moving_rate': 0.9,
8 | 'quantizable_layer_type': ['Conv2D', 'Linear']}
9 | print_model: True
10 |
11 | epoch: 50
12 |
13 | LearningRate:
14 | base_lr: 0.0001
15 | schedulers:
16 | - !PiecewiseDecay
17 | gamma: 0.1
18 | milestones:
19 | - 30
20 | - 45
21 | - !LinearWarmup
22 | start_factor: 0.
23 | steps: 1000
24 |
25 | OptimizerBuilder:
26 | optimizer:
27 | momentum: 0.9
28 | type: Momentum
29 | regularizer:
30 | factor: 0.0005
31 | type: L2
32 |
--------------------------------------------------------------------------------
/configs/slim/quant/yolov3_mobilenet_v1_qat.yml:
--------------------------------------------------------------------------------
1 | # Weights of yolov3_mobilenet_v1_coco
2 | pretrain_weights: https://paddledet.bj.bcebos.com/models/yolov3_mobilenet_v1_270e_coco.pdparams
3 | slim: QAT
4 |
5 | QAT:
6 | quant_config: {
7 | 'weight_quantize_type': 'channel_wise_abs_max', 'activation_quantize_type': 'moving_average_abs_max',
8 | 'weight_bits': 8, 'activation_bits': 8, 'dtype': 'int8', 'window_size': 10000, 'moving_rate': 0.9,
9 | 'quantizable_layer_type': ['Conv2D', 'Linear']}
10 | print_model: True
11 |
--------------------------------------------------------------------------------
/configs/slim/quant/yolov3_mobilenet_v3_qat.yml:
--------------------------------------------------------------------------------
1 | # Weights of yolov3_mobilenet_v3_coco
2 | pretrain_weights: https://paddledet.bj.bcebos.com/models/yolov3_mobilenet_v3_large_270e_coco.pdparams
3 | slim: QAT
4 |
5 | QAT:
6 | quant_config: {
7 | 'weight_preprocess_type': 'PACT',
8 | 'weight_quantize_type': 'channel_wise_abs_max', 'activation_quantize_type': 'moving_average_abs_max',
9 | 'weight_bits': 8, 'activation_bits': 8, 'dtype': 'int8', 'window_size': 10000, 'moving_rate': 0.9,
10 | 'quantizable_layer_type': ['Conv2D', 'Linear']}
11 | print_model: True
12 |
13 | epoch: 30
14 | LearningRate:
15 | base_lr: 0.0001
16 | schedulers:
17 | - !PiecewiseDecay
18 | gamma: 0.1
19 | milestones:
20 | - 25
21 | - 28
22 | - !LinearWarmup
23 | start_factor: 0.
24 | steps: 2000
25 |
--------------------------------------------------------------------------------
/configs/solov2/_base_/optimizer_1x.yml:
--------------------------------------------------------------------------------
1 | epoch: 12
2 |
3 | LearningRate:
4 | base_lr: 0.01
5 | schedulers:
6 | - !PiecewiseDecay
7 | gamma: 0.1
8 | milestones: [8, 11]
9 | - !LinearWarmup
10 | start_factor: 0.
11 | steps: 1000
12 |
13 | OptimizerBuilder:
14 | optimizer:
15 | momentum: 0.9
16 | type: Momentum
17 | regularizer:
18 | factor: 0.0001
19 | type: L2
20 |
--------------------------------------------------------------------------------
/configs/solov2/_base_/solov2_r50_fpn.yml:
--------------------------------------------------------------------------------
1 | architecture: SOLOv2
2 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_cos_pretrained.pdparams
3 |
4 | SOLOv2:
5 | backbone: ResNet
6 | neck: FPN
7 | solov2_head: SOLOv2Head
8 | mask_head: SOLOv2MaskHead
9 |
10 | ResNet:
11 | depth: 50
12 | norm_type: bn
13 | freeze_at: 0
14 | return_idx: [0,1,2,3]
15 | num_stages: 4
16 |
17 | FPN:
18 | out_channel: 256
19 |
20 | SOLOv2Head:
21 | seg_feat_channels: 512
22 | stacked_convs: 4
23 | num_grids: [40, 36, 24, 16, 12]
24 | kernel_out_channels: 256
25 | solov2_loss: SOLOv2Loss
26 | mask_nms: MaskMatrixNMS
27 |
28 | SOLOv2MaskHead:
29 | mid_channels: 128
30 | out_channels: 256
31 | start_level: 0
32 | end_level: 3
33 |
34 | SOLOv2Loss:
35 | ins_loss_weight: 3.0
36 | focal_loss_gamma: 2.0
37 | focal_loss_alpha: 0.25
38 |
39 | MaskMatrixNMS:
40 | pre_nms_top_n: 500
41 | post_nms_top_n: 100
42 |
--------------------------------------------------------------------------------
/configs/solov2/_base_/solov2_reader.yml:
--------------------------------------------------------------------------------
1 | worker_num: 2
2 | TrainReader:
3 | sample_transforms:
4 | - Decode: {}
5 | - Poly2Mask: {}
6 | - Resize: {interp: 1, target_size: [800, 1333], keep_ratio: True}
7 | - RandomFlip: {}
8 | - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
9 | - Permute: {}
10 | batch_transforms:
11 | - PadBatch: {pad_to_stride: 32}
12 | - Gt2Solov2Target: {num_grids: [40, 36, 24, 16, 12],
13 | scale_ranges: [[1, 96], [48, 192], [96, 384], [192, 768], [384, 2048]],
14 | coord_sigma: 0.2}
15 | batch_size: 2
16 | shuffle: true
17 | drop_last: true
18 |
19 |
20 | EvalReader:
21 | sample_transforms:
22 | - Decode: {}
23 | - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
24 | - Resize: {interp: 1, target_size: [800, 1333], keep_ratio: True}
25 | - Permute: {}
26 | batch_transforms:
27 | - PadBatch: {pad_to_stride: 32}
28 | batch_size: 1
29 | shuffle: false
30 | drop_last: false
31 | drop_empty: false
32 |
33 |
34 | TestReader:
35 | sample_transforms:
36 | - Decode: {}
37 | - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
38 | - Resize: {interp: 1, target_size: [800, 1333], keep_ratio: True}
39 | - Permute: {}
40 | batch_transforms:
41 | - PadBatch: {pad_to_stride: 32}
42 | batch_size: 1
43 | shuffle: false
44 | drop_last: false
45 |
--------------------------------------------------------------------------------
/configs/solov2/solov2_r50_fpn_1x_coco.yml:
--------------------------------------------------------------------------------
1 | _BASE_: [
2 | '../datasets/coco_instance.yml',
3 | '../runtime.yml',
4 | '_base_/solov2_r50_fpn.yml',
5 | '_base_/optimizer_1x.yml',
6 | '_base_/solov2_reader.yml',
7 | ]
8 | weights: output/solov2_r50_fpn_1x_coco/model_final
9 |
--------------------------------------------------------------------------------
/configs/solov2/solov2_r50_fpn_3x_coco.yml:
--------------------------------------------------------------------------------
1 | _BASE_: [
2 | '../datasets/coco_instance.yml',
3 | '../runtime.yml',
4 | '_base_/solov2_r50_fpn.yml',
5 | '_base_/optimizer_1x.yml',
6 | '_base_/solov2_reader.yml',
7 | ]
8 | weights: output/solov2_r50_fpn_3x_coco/model_final
9 | epoch: 36
10 |
11 | LearningRate:
12 | base_lr: 0.01
13 | schedulers:
14 | - !PiecewiseDecay
15 | gamma: 0.1
16 | milestones: [24, 33]
17 | - !LinearWarmup
18 | start_factor: 0.
19 | steps: 1000
20 |
21 | TrainReader:
22 | sample_transforms:
23 | - Decode: {}
24 | - Poly2Mask: {}
25 | - RandomResize: {interp: 1,
26 | target_size: [[640, 1333], [672, 1333], [704, 1333], [736, 1333], [768, 1333], [800, 1333]],
27 | keep_ratio: True}
28 | - RandomFlip: {}
29 | - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
30 | - Permute: {}
31 | batch_transforms:
32 | - PadBatch: {pad_to_stride: 32}
33 | - Gt2Solov2Target: {num_grids: [40, 36, 24, 16, 12],
34 | scale_ranges: [[1, 96], [48, 192], [96, 384], [192, 768], [384, 2048]],
35 | coord_sigma: 0.2}
36 | batch_size: 2
37 | shuffle: true
38 | drop_last: true
39 |
--------------------------------------------------------------------------------
/configs/ssd/README.md:
--------------------------------------------------------------------------------
1 | # SSD: Single Shot MultiBox Detector
2 |
3 | ## Model Zoo
4 |
5 | ### SSD on Pascal VOC
6 |
7 | | 骨架网络 | 网络类型 | 每张GPU图片个数 | 学习率策略 |推理时间(fps) | Box AP | 下载 | 配置文件 |
8 | | :-------------- | :------------- | :-----: | :-----: | :------------: | :-----: | :-----------------------------------------------------: | :-----: |
9 | | VGG | SSD | 8 | 240e | ---- | 77.8 | [下载链接](https://paddledet.bj.bcebos.com/models/ssd_vgg16_300_240e_voc.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/ssd/ssd_vgg16_300_240e_voc.yml) |
10 | | MobileNet v1 | SSD | 32 | 120e | ---- | 73.8 | [下载链接](https://paddledet.bj.bcebos.com/models/ssd_mobilenet_v1_300_120e_voc.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.0/configs/ssd/ssd_mobilenet_v1_300_120e_voc.yml) |
11 |
12 | **注意:** SSD-VGG使用4GPU在总batch size为32下训练240个epoch。SSD-MobileNetv1使用2GPU在总batch size为64下训练120周期。
13 |
14 | ## Citations
15 | ```
16 | @article{Liu_2016,
17 | title={SSD: Single Shot MultiBox Detector},
18 | journal={ECCV},
19 | author={Liu, Wei and Anguelov, Dragomir and Erhan, Dumitru and Szegedy, Christian and Reed, Scott and Fu, Cheng-Yang and Berg, Alexander C.},
20 | year={2016},
21 | }
22 | ```
23 |
--------------------------------------------------------------------------------
/configs/ssd/_base_/optimizer_120e.yml:
--------------------------------------------------------------------------------
1 | epoch: 120
2 |
3 | LearningRate:
4 | base_lr: 0.001
5 | schedulers:
6 | - !PiecewiseDecay
7 | milestones: [40, 60, 80, 100]
8 | values: [0.001, 0.0005, 0.00025, 0.0001, 0.00001]
9 | use_warmup: false
10 |
11 | OptimizerBuilder:
12 | optimizer:
13 | momentum: 0.0
14 | type: RMSProp
15 | regularizer:
16 | factor: 0.00005
17 | type: L2
18 |
--------------------------------------------------------------------------------
/configs/ssd/_base_/optimizer_1700e.yml:
--------------------------------------------------------------------------------
1 | epoch: 1700
2 |
3 | LearningRate:
4 | base_lr: 0.4
5 | schedulers:
6 | - !CosineDecay
7 | max_epochs: 1700
8 | - !LinearWarmup
9 | start_factor: 0.3333333333333333
10 | steps: 2000
11 |
12 | OptimizerBuilder:
13 | optimizer:
14 | momentum: 0.9
15 | type: Momentum
16 | regularizer:
17 | factor: 0.0005
18 | type: L2
19 |
--------------------------------------------------------------------------------
/configs/ssd/_base_/optimizer_240e.yml:
--------------------------------------------------------------------------------
1 | epoch: 240
2 |
3 | LearningRate:
4 | base_lr: 0.001
5 | schedulers:
6 | - !PiecewiseDecay
7 | gamma: 0.1
8 | milestones:
9 | - 160
10 | - 200
11 | - !LinearWarmup
12 | start_factor: 0.3333333333333333
13 | steps: 500
14 |
15 | OptimizerBuilder:
16 | optimizer:
17 | momentum: 0.9
18 | type: Momentum
19 | regularizer:
20 | factor: 0.0005
21 | type: L2
22 |
--------------------------------------------------------------------------------
/configs/ssd/_base_/ssd_mobilenet_reader.yml:
--------------------------------------------------------------------------------
1 | worker_num: 8
2 | TrainReader:
3 | inputs_def:
4 | num_max_boxes: 90
5 | sample_transforms:
6 | - Decode: {}
7 | - RandomDistort: {brightness: [0.5, 1.125, 0.875], random_apply: False}
8 | - RandomExpand: {fill_value: [127.5, 127.5, 127.5]}
9 | - RandomCrop: {allow_no_crop: Fasle}
10 | - RandomFlip: {}
11 | - Resize: {target_size: [300, 300], keep_ratio: False, interp: 1}
12 | - NormalizeBox: {}
13 | - PadBox: {num_max_boxes: 90}
14 | batch_transforms:
15 | - NormalizeImage: {mean: [127.5, 127.5, 127.5], std: [127.502231, 127.502231, 127.502231], is_scale: false}
16 | - Permute: {}
17 | batch_size: 32
18 | shuffle: true
19 | drop_last: true
20 |
21 |
22 | EvalReader:
23 | sample_transforms:
24 | - Decode: {}
25 | - Resize: {target_size: [300, 300], keep_ratio: False, interp: 1}
26 | - NormalizeImage: {mean: [127.5, 127.5, 127.5], std: [127.502231, 127.502231, 127.502231], is_scale: false}
27 | - Permute: {}
28 | batch_size: 1
29 | drop_empty: false
30 |
31 |
32 | TestReader:
33 | inputs_def:
34 | image_shape: [3, 300, 300]
35 | sample_transforms:
36 | - Decode: {}
37 | - Resize: {target_size: [300, 300], keep_ratio: False, interp: 1}
38 | - NormalizeImage: {mean: [127.5, 127.5, 127.5], std: [127.502231, 127.502231, 127.502231], is_scale: false}
39 | - Permute: {}
40 | batch_size: 1
41 |
--------------------------------------------------------------------------------
/configs/ssd/_base_/ssd_mobilenet_v1_300.yml:
--------------------------------------------------------------------------------
1 | architecture: SSD
2 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ssd_mobilenet_v1_coco_pretrained.pdparams
3 |
4 | SSD:
5 | backbone: MobileNet
6 | ssd_head: SSDHead
7 | post_process: BBoxPostProcess
8 |
9 | MobileNet:
10 | norm_decay: 0.
11 | scale: 1
12 | conv_learning_rate: 0.1
13 | extra_block_filters: [[256, 512], [128, 256], [128, 256], [64, 128]]
14 | with_extra_blocks: true
15 | feature_maps: [11, 13, 14, 15, 16, 17]
16 |
17 | SSDHead:
18 | kernel_size: 1
19 | padding: 0
20 | anchor_generator:
21 | steps: [0, 0, 0, 0, 0, 0]
22 | aspect_ratios: [[2.], [2., 3.], [2., 3.], [2., 3.], [2., 3.], [2., 3.]]
23 | min_ratio: 20
24 | max_ratio: 90
25 | base_size: 300
26 | min_sizes: [60.0, 105.0, 150.0, 195.0, 240.0, 285.0]
27 | max_sizes: [[], 150.0, 195.0, 240.0, 285.0, 300.0]
28 | offset: 0.5
29 | flip: true
30 | min_max_aspect_ratios_order: false
31 |
32 | BBoxPostProcess:
33 | decode:
34 | name: SSDBox
35 | nms:
36 | name: MultiClassNMS
37 | keep_top_k: 200
38 | score_threshold: 0.01
39 | nms_threshold: 0.45
40 | nms_top_k: 400
41 | nms_eta: 1.0
42 |
--------------------------------------------------------------------------------
/configs/ssd/_base_/ssd_reader.yml:
--------------------------------------------------------------------------------
1 | worker_num: 2
2 | TrainReader:
3 | inputs_def:
4 | num_max_boxes: 90
5 |
6 | sample_transforms:
7 | - Decode: {}
8 | - RandomDistort: {brightness: [0.5, 1.125, 0.875], random_apply: False}
9 | - RandomExpand: {fill_value: [104., 117., 123.]}
10 | - RandomCrop: {allow_no_crop: true}
11 | - RandomFlip: {}
12 | - Resize: {target_size: [300, 300], keep_ratio: False, interp: 1}
13 | - NormalizeBox: {}
14 | - PadBox: {num_max_boxes: 90}
15 |
16 | batch_transforms:
17 | - NormalizeImage: {mean: [104., 117., 123.], std: [1., 1., 1.], is_scale: false}
18 | - Permute: {}
19 |
20 | batch_size: 8
21 | shuffle: true
22 | drop_last: true
23 |
24 |
25 | EvalReader:
26 | sample_transforms:
27 | - Decode: {}
28 | - Resize: {target_size: [300, 300], keep_ratio: False, interp: 1}
29 | - NormalizeImage: {mean: [104., 117., 123.], std: [1., 1., 1.], is_scale: false}
30 | - Permute: {}
31 | batch_size: 1
32 | drop_empty: false
33 |
34 | TestReader:
35 | inputs_def:
36 | image_shape: [3, 300, 300]
37 | sample_transforms:
38 | - Decode: {}
39 | - Resize: {target_size: [300, 300], keep_ratio: False, interp: 1}
40 | - NormalizeImage: {mean: [104., 117., 123.], std: [1., 1., 1.], is_scale: false}
41 | - Permute: {}
42 | batch_size: 1
43 |
--------------------------------------------------------------------------------
/configs/ssd/_base_/ssd_vgg16_300.yml:
--------------------------------------------------------------------------------
1 | architecture: SSD
2 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/VGG16_caffe_pretrained.pdparams
3 |
4 | # Model Achitecture
5 | SSD:
6 | # model feat info flow
7 | backbone: VGG
8 | ssd_head: SSDHead
9 | # post process
10 | post_process: BBoxPostProcess
11 |
12 | VGG:
13 | depth: 16
14 | normalizations: [20., -1, -1, -1, -1, -1]
15 |
16 | SSDHead:
17 | anchor_generator:
18 | steps: [8, 16, 32, 64, 100, 300]
19 | aspect_ratios: [[2.], [2., 3.], [2., 3.], [2., 3.], [2.], [2.]]
20 | min_ratio: 20
21 | max_ratio: 90
22 | min_sizes: [30.0, 60.0, 111.0, 162.0, 213.0, 264.0]
23 | max_sizes: [60.0, 111.0, 162.0, 213.0, 264.0, 315.0]
24 | offset: 0.5
25 | flip: true
26 | min_max_aspect_ratios_order: true
27 |
28 | BBoxPostProcess:
29 | decode:
30 | name: SSDBox
31 | nms:
32 | name: MultiClassNMS
33 | keep_top_k: 200
34 | score_threshold: 0.01
35 | nms_threshold: 0.45
36 | nms_top_k: 400
37 | nms_eta: 1.0
38 |
--------------------------------------------------------------------------------
/configs/ssd/_base_/ssdlite300_reader.yml:
--------------------------------------------------------------------------------
1 | worker_num: 8
2 | TrainReader:
3 | inputs_def:
4 | num_max_boxes: 90
5 | sample_transforms:
6 | - Decode: {}
7 | - RandomDistort: {brightness: [0.5, 1.125, 0.875], random_apply: False}
8 | - RandomExpand: {fill_value: [123.675, 116.28, 103.53]}
9 | - RandomCrop: {allow_no_crop: Fasle}
10 | - RandomFlip: {}
11 | - Resize: {target_size: [300, 300], keep_ratio: False, interp: 1}
12 | - NormalizeBox: {}
13 | - PadBox: {num_max_boxes: 90}
14 | batch_transforms:
15 | - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: true}
16 | - Permute: {}
17 | batch_size: 64
18 | shuffle: true
19 | drop_last: true
20 |
21 |
22 | EvalReader:
23 | sample_transforms:
24 | - Decode: {}
25 | - Resize: {target_size: [300, 300], keep_ratio: False, interp: 1}
26 | - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: true}
27 | - Permute: {}
28 | batch_size: 1
29 | drop_empty: false
30 |
31 |
32 | TestReader:
33 | inputs_def:
34 | image_shape: [3, 300, 300]
35 | sample_transforms:
36 | - Decode: {}
37 | - Resize: {target_size: [300, 300], keep_ratio: False, interp: 1}
38 | - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: true}
39 | - Permute: {}
40 | batch_size: 1
41 |
--------------------------------------------------------------------------------
/configs/ssd/_base_/ssdlite320_reader.yml:
--------------------------------------------------------------------------------
1 | worker_num: 8
2 | TrainReader:
3 | inputs_def:
4 | num_max_boxes: 90
5 | sample_transforms:
6 | - Decode: {}
7 | - RandomDistort: {brightness: [0.5, 1.125, 0.875], random_apply: False}
8 | - RandomExpand: {fill_value: [123.675, 116.28, 103.53]}
9 | - RandomCrop: {allow_no_crop: Fasle}
10 | - RandomFlip: {}
11 | - Resize: {target_size: [320, 320], keep_ratio: False, interp: 1}
12 | - NormalizeBox: {}
13 | - PadBox: {num_max_boxes: 90}
14 | batch_transforms:
15 | - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: true}
16 | - Permute: {}
17 | batch_size: 64
18 | shuffle: true
19 | drop_last: true
20 |
21 |
22 | EvalReader:
23 | sample_transforms:
24 | - Decode: {}
25 | - Resize: {target_size: [320, 320], keep_ratio: False, interp: 1}
26 | - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: true}
27 | - Permute: {}
28 | batch_size: 1
29 | drop_empty: false
30 |
31 |
32 | TestReader:
33 | inputs_def:
34 | image_shape: [3, 320, 320]
35 | sample_transforms:
36 | - Decode: {}
37 | - Resize: {target_size: [320, 320], keep_ratio: False, interp: 1}
38 | - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: true}
39 | - Permute: {}
40 | batch_size: 1
41 |
--------------------------------------------------------------------------------
/configs/ssd/_base_/ssdlite_ghostnet_320.yml:
--------------------------------------------------------------------------------
1 | architecture: SSD
2 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/GhostNet_x1_3_ssld_pretrained.pdparams
3 |
4 | SSD:
5 | backbone: GhostNet
6 | ssd_head: SSDHead
7 | post_process: BBoxPostProcess
8 |
9 | GhostNet:
10 | scale: 1.3
11 | conv_decay: 0.00004
12 | with_extra_blocks: true
13 | extra_block_filters: [[256, 512], [128, 256], [128, 256], [64, 128]]
14 | feature_maps: [13, 18, 19, 20, 21, 22]
15 | lr_mult_list: [0.25, 0.25, 0.5, 0.5, 0.75]
16 |
17 | SSDHead:
18 | use_sepconv: True
19 | conv_decay: 0.00004
20 | anchor_generator:
21 | steps: [16, 32, 64, 107, 160, 320]
22 | aspect_ratios: [[2.], [2., 3.], [2., 3.], [2., 3.], [2., 3.], [2., 3.]]
23 | min_ratio: 20
24 | max_ratio: 95
25 | base_size: 320
26 | min_sizes: []
27 | max_sizes: []
28 | offset: 0.5
29 | flip: true
30 | clip: true
31 | min_max_aspect_ratios_order: false
32 |
33 | BBoxPostProcess:
34 | decode:
35 | name: SSDBox
36 | nms:
37 | name: MultiClassNMS
38 | keep_top_k: 200
39 | score_threshold: 0.01
40 | nms_threshold: 0.45
41 | nms_top_k: 400
42 | nms_eta: 1.0
43 |
--------------------------------------------------------------------------------
/configs/ssd/_base_/ssdlite_mobilenet_v1_300.yml:
--------------------------------------------------------------------------------
1 | architecture: SSD
2 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/MobileNetV1_ssld_pretrained.pdparams
3 |
4 | SSD:
5 | backbone: MobileNet
6 | ssd_head: SSDHead
7 | post_process: BBoxPostProcess
8 |
9 | MobileNet:
10 | conv_decay: 0.00004
11 | scale: 1
12 | extra_block_filters: [[256, 512], [128, 256], [128, 256], [64, 128]]
13 | with_extra_blocks: true
14 | feature_maps: [11, 13, 14, 15, 16, 17]
15 |
16 | SSDHead:
17 | use_sepconv: True
18 | conv_decay: 0.00004
19 | anchor_generator:
20 | steps: [16, 32, 64, 100, 150, 300]
21 | aspect_ratios: [[2.], [2., 3.], [2., 3.], [2., 3.], [2., 3.], [2., 3.]]
22 | min_ratio: 20
23 | max_ratio: 95
24 | base_size: 300
25 | min_sizes: []
26 | max_sizes: []
27 | offset: 0.5
28 | flip: true
29 | clip: true
30 | min_max_aspect_ratios_order: False
31 |
32 | BBoxPostProcess:
33 | decode:
34 | name: SSDBox
35 | nms:
36 | name: MultiClassNMS
37 | keep_top_k: 200
38 | score_threshold: 0.01
39 | nms_threshold: 0.45
40 | nms_top_k: 400
41 | nms_eta: 1.0
42 |
--------------------------------------------------------------------------------
/configs/ssd/_base_/ssdlite_mobilenet_v3_large_320.yml:
--------------------------------------------------------------------------------
1 | architecture: SSD
2 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/MobileNetV3_large_x1_0_ssld_pretrained.pdparams
3 |
4 | SSD:
5 | backbone: MobileNetV3
6 | ssd_head: SSDHead
7 | post_process: BBoxPostProcess
8 |
9 | MobileNetV3:
10 | scale: 1.0
11 | model_name: large
12 | conv_decay: 0.00004
13 | with_extra_blocks: true
14 | extra_block_filters: [[256, 512], [128, 256], [128, 256], [64, 128]]
15 | feature_maps: [14, 17, 18, 19, 20, 21]
16 | lr_mult_list: [0.25, 0.25, 0.5, 0.5, 0.75]
17 | multiplier: 0.5
18 |
19 | SSDHead:
20 | use_sepconv: True
21 | conv_decay: 0.00004
22 | anchor_generator:
23 | steps: [16, 32, 64, 107, 160, 320]
24 | aspect_ratios: [[2.], [2., 3.], [2., 3.], [2., 3.], [2., 3.], [2., 3.]]
25 | min_ratio: 20
26 | max_ratio: 95
27 | base_size: 320
28 | min_sizes: []
29 | max_sizes: []
30 | offset: 0.5
31 | flip: true
32 | clip: true
33 | min_max_aspect_ratios_order: false
34 |
35 | BBoxPostProcess:
36 | decode:
37 | name: SSDBox
38 | nms:
39 | name: MultiClassNMS
40 | keep_top_k: 200
41 | score_threshold: 0.01
42 | nms_threshold: 0.45
43 | nms_top_k: 400
44 | nms_eta: 1.0
45 |
--------------------------------------------------------------------------------
/configs/ssd/_base_/ssdlite_mobilenet_v3_small_320.yml:
--------------------------------------------------------------------------------
1 | architecture: SSD
2 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/MobileNetV3_small_x1_0_ssld_pretrained.pdparams
3 |
4 | SSD:
5 | backbone: MobileNetV3
6 | ssd_head: SSDHead
7 | post_process: BBoxPostProcess
8 |
9 | MobileNetV3:
10 | scale: 1.0
11 | model_name: small
12 | conv_decay: 0.00004
13 | with_extra_blocks: true
14 | extra_block_filters: [[256, 512], [128, 256], [128, 256], [64, 128]]
15 | feature_maps: [10, 13, 14, 15, 16, 17]
16 | lr_mult_list: [0.25, 0.25, 0.5, 0.5, 0.75]
17 | multiplier: 0.5
18 |
19 | SSDHead:
20 | use_sepconv: True
21 | conv_decay: 0.00004
22 | anchor_generator:
23 | steps: [16, 32, 64, 107, 160, 320]
24 | aspect_ratios: [[2.], [2., 3.], [2., 3.], [2., 3.], [2., 3.], [2., 3.]]
25 | min_ratio: 20
26 | max_ratio: 95
27 | base_size: 320
28 | min_sizes: []
29 | max_sizes: []
30 | offset: 0.5
31 | flip: true
32 | clip: true
33 | min_max_aspect_ratios_order: false
34 |
35 | BBoxPostProcess:
36 | decode:
37 | name: SSDBox
38 | nms:
39 | name: MultiClassNMS
40 | keep_top_k: 200
41 | score_threshold: 0.01
42 | nms_threshold: 0.45
43 | nms_top_k: 400
44 | nms_eta: 1.0
45 |
--------------------------------------------------------------------------------
/configs/ssd/ssd_mobilenet_v1_300_120e_voc.yml:
--------------------------------------------------------------------------------
1 | _BASE_: [
2 | '../datasets/voc.yml',
3 | '../runtime.yml',
4 | '_base_/optimizer_120e.yml',
5 | '_base_/ssd_mobilenet_v1_300.yml',
6 | '_base_/ssd_mobilenet_reader.yml',
7 | ]
8 | weights: output/ssd_mobilenet_v1_300_120e_voc/model_final
9 |
10 | EvalReader:
11 | batch_transforms:
12 | - PadBatch: {pad_gt: True}
13 |
--------------------------------------------------------------------------------
/configs/ssd/ssd_vgg16_300_240e_voc.yml:
--------------------------------------------------------------------------------
1 | _BASE_: [
2 | '../datasets/voc.yml',
3 | '../runtime.yml',
4 | '_base_/optimizer_240e.yml',
5 | '_base_/ssd_vgg16_300.yml',
6 | '_base_/ssd_reader.yml',
7 | ]
8 | weights: output/ssd_vgg16_300_240e_voc/model_final
9 |
10 | EvalReader:
11 | batch_transforms:
12 | - PadBatch: {pad_gt: True}
13 |
--------------------------------------------------------------------------------
/configs/ssd/ssdlite_ghostnet_320_coco.yml:
--------------------------------------------------------------------------------
1 | _BASE_: [
2 | '../datasets/coco_detection.yml',
3 | '../runtime.yml',
4 | '_base_/optimizer_1700e.yml',
5 | '_base_/ssdlite_ghostnet_320.yml',
6 | '_base_/ssdlite320_reader.yml',
7 | ]
8 | weights: output/ssdlite_ghostnet_320_coco/model_final
9 |
10 | epoch: 1700
11 |
12 | LearningRate:
13 | base_lr: 0.2
14 | schedulers:
15 | - !CosineDecay
16 | max_epochs: 1700
17 | - !LinearWarmup
18 | start_factor: 0.33333
19 | steps: 2000
20 |
21 | OptimizerBuilder:
22 | optimizer:
23 | momentum: 0.9
24 | type: Momentum
25 | regularizer:
26 | factor: 0.0005
27 | type: L2
28 |
--------------------------------------------------------------------------------
/configs/ssd/ssdlite_mobilenet_v1_300_coco.yml:
--------------------------------------------------------------------------------
1 | _BASE_: [
2 | '../datasets/coco_detection.yml',
3 | '../runtime.yml',
4 | '_base_/optimizer_1700e.yml',
5 | '_base_/ssdlite_mobilenet_v1_300.yml',
6 | '_base_/ssdlite300_reader.yml',
7 | ]
8 | weights: output/ssdlite_mobilenet_v1_300_coco/model_final
9 |
--------------------------------------------------------------------------------
/configs/ssd/ssdlite_mobilenet_v3_large_320_coco.yml:
--------------------------------------------------------------------------------
1 | _BASE_: [
2 | '../datasets/coco_detection.yml',
3 | '../runtime.yml',
4 | '_base_/optimizer_1700e.yml',
5 | '_base_/ssdlite_mobilenet_v3_large_320.yml',
6 | '_base_/ssdlite320_reader.yml',
7 | ]
8 | weights: output/ssdlite_mobilenet_v3_large_320_coco/model_final
9 |
--------------------------------------------------------------------------------
/configs/ssd/ssdlite_mobilenet_v3_small_320_coco.yml:
--------------------------------------------------------------------------------
1 | _BASE_: [
2 | '../datasets/coco_detection.yml',
3 | '../runtime.yml',
4 | '_base_/optimizer_1700e.yml',
5 | '_base_/ssdlite_mobilenet_v3_small_320.yml',
6 | '_base_/ssdlite320_reader.yml',
7 | ]
8 | weights: output/ssdlite_mobilenet_v3_small_320_coco/model_final
9 |
--------------------------------------------------------------------------------
/configs/ttfnet/_base_/optimizer_10x.yml:
--------------------------------------------------------------------------------
1 | epoch: 120
2 |
3 | LearningRate:
4 | base_lr: 0.015
5 | schedulers:
6 | - !PiecewiseDecay
7 | gamma: 0.1
8 | milestones: [80, 110]
9 | - !LinearWarmup
10 | start_factor: 0.2
11 | steps: 500
12 |
13 | OptimizerBuilder:
14 | optimizer:
15 | momentum: 0.9
16 | type: Momentum
17 | regularizer:
18 | factor: 0.0004
19 | type: L2
20 |
--------------------------------------------------------------------------------
/configs/ttfnet/_base_/optimizer_1x.yml:
--------------------------------------------------------------------------------
1 | epoch: 12
2 |
3 | LearningRate:
4 | base_lr: 0.015
5 | schedulers:
6 | - !PiecewiseDecay
7 | gamma: 0.1
8 | milestones: [8, 11]
9 | - !LinearWarmup
10 | start_factor: 0.2
11 | steps: 500
12 |
13 | OptimizerBuilder:
14 | optimizer:
15 | momentum: 0.9
16 | type: Momentum
17 | regularizer:
18 | factor: 0.0004
19 | type: L2
20 |
--------------------------------------------------------------------------------
/configs/ttfnet/_base_/optimizer_20x.yml:
--------------------------------------------------------------------------------
1 | epoch: 240
2 |
3 | LearningRate:
4 | base_lr: 0.015
5 | schedulers:
6 | - !PiecewiseDecay
7 | gamma: 0.1
8 | milestones: [160, 220]
9 | - !LinearWarmup
10 | start_factor: 0.2
11 | steps: 1000
12 |
13 | OptimizerBuilder:
14 | clip_grad_by_norm: 35
15 | optimizer:
16 | momentum: 0.9
17 | type: Momentum
18 | regularizer:
19 | factor: 0.0004
20 | type: L2
21 |
--------------------------------------------------------------------------------
/configs/ttfnet/_base_/pafnet.yml:
--------------------------------------------------------------------------------
1 | architecture: TTFNet
2 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_ssld_pretrained.pdparams
3 | norm_type: sync_bn
4 | use_ema: true
5 | ema_decay: 0.9998
6 |
7 | TTFNet:
8 | backbone: ResNet
9 | neck: TTFFPN
10 | ttf_head: TTFHead
11 | post_process: BBoxPostProcess
12 |
13 | ResNet:
14 | depth: 50
15 | variant: d
16 | return_idx: [0, 1, 2, 3]
17 | freeze_at: -1
18 | norm_decay: 0.
19 | variant: d
20 | dcn_v2_stages: [1, 2, 3]
21 |
22 | TTFFPN:
23 | planes: [256, 128, 64]
24 | shortcut_num: [3, 2, 1]
25 |
26 | TTFHead:
27 | dcn_head: true
28 | hm_loss:
29 | name: CTFocalLoss
30 | loss_weight: 1.
31 | wh_loss:
32 | name: GIoULoss
33 | loss_weight: 5.
34 | reduction: sum
35 |
36 | BBoxPostProcess:
37 | decode:
38 | name: TTFBox
39 | max_per_img: 100
40 | score_thresh: 0.01
41 | down_ratio: 4
42 |
--------------------------------------------------------------------------------
/configs/ttfnet/_base_/pafnet_lite.yml:
--------------------------------------------------------------------------------
1 | architecture: TTFNet
2 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/MobileNetV3_large_x1_0_ssld_pretrained.pdparams
3 | norm_type: sync_bn
4 |
5 | TTFNet:
6 | backbone: MobileNetV3
7 | neck: TTFFPN
8 | ttf_head: TTFHead
9 | post_process: BBoxPostProcess
10 |
11 | MobileNetV3:
12 | scale: 1.0
13 | model_name: large
14 | feature_maps: [5, 8, 14, 17]
15 | with_extra_blocks: true
16 | lr_mult_list: [0.25, 0.25, 0.5, 0.5, 0.75]
17 | conv_decay: 0.00001
18 | norm_decay: 0.0
19 | extra_block_filters: []
20 |
21 | TTFFPN:
22 | planes: [96, 48, 24]
23 | shortcut_num: [2, 2, 1]
24 | lite_neck: true
25 | fusion_method: concat
26 |
27 | TTFHead:
28 | hm_head_planes: 48
29 | wh_head_planes: 24
30 | lite_head: true
31 | hm_loss:
32 | name: CTFocalLoss
33 | loss_weight: 1.
34 | wh_loss:
35 | name: GIoULoss
36 | loss_weight: 5.
37 | reduction: sum
38 |
39 | BBoxPostProcess:
40 | decode:
41 | name: TTFBox
42 | max_per_img: 100
43 | score_thresh: 0.01
44 | down_ratio: 4
45 |
--------------------------------------------------------------------------------
/configs/ttfnet/_base_/pafnet_lite_reader.yml:
--------------------------------------------------------------------------------
1 | worker_num: 2
2 | TrainReader:
3 | sample_transforms:
4 | - Decode: {}
5 | - Cutmix: {alpha: 1.5, beta: 1.5}
6 | - RandomDistort: {}
7 | - RandomExpand: {fill_value: [123.675, 116.28, 103.53]}
8 | - RandomCrop: {aspect_ratio: NULL, cover_all_box: True}
9 | - RandomFlip: {}
10 | - GridMask: {upper_iter: 300000}
11 | batch_transforms:
12 | - BatchRandomResize: {target_size: [320, 352, 384, 416, 448, 480, 512], random_interp: True, keep_ratio: False}
13 | - NormalizeImage: {mean: [123.675, 116.28, 103.53], std: [58.395, 57.12, 57.375], is_scale: false}
14 | - Permute: {}
15 | - Gt2TTFTarget: {down_ratio: 4}
16 | - PadBatch: {pad_to_stride: 32}
17 | batch_size: 12
18 | shuffle: true
19 | drop_last: true
20 | use_shared_memory: true
21 |
22 | EvalReader:
23 | sample_transforms:
24 | - Decode: {}
25 | - Resize: {interp: 1, target_size: [320, 320], keep_ratio: False}
26 | - NormalizeImage: {is_scale: false, mean: [123.675, 116.28, 103.53], std: [58.395, 57.12, 57.375]}
27 | - Permute: {}
28 | batch_size: 1
29 | drop_last: false
30 | drop_empty: false
31 |
32 | TestReader:
33 | sample_transforms:
34 | - Decode: {}
35 | - Resize: {interp: 1, target_size: [320, 320], keep_ratio: False}
36 | - NormalizeImage: {is_scale: false, mean: [123.675, 116.28, 103.53], std: [58.395, 57.12, 57.375]}
37 | - Permute: {}
38 | batch_size: 1
39 | drop_last: false
40 | drop_empty: false
41 |
--------------------------------------------------------------------------------
/configs/ttfnet/_base_/pafnet_reader.yml:
--------------------------------------------------------------------------------
1 | worker_num: 2
2 | TrainReader:
3 | sample_transforms:
4 | - Decode: {}
5 | - Cutmix: {alpha: 1.5, beta: 1.5}
6 | - RandomDistort: {random_apply: false, random_channel: true}
7 | - RandomExpand: {fill_value: [123.675, 116.28, 103.53]}
8 | - RandomCrop: {aspect_ratio: NULL, cover_all_box: True}
9 | - RandomFlip: {prob: 0.5}
10 | batch_transforms:
11 | - BatchRandomResize: {target_size: [416, 448, 480, 512, 544, 576, 608, 640, 672], keep_ratio: false}
12 | - NormalizeImage: {mean: [123.675, 116.28, 103.53], std: [58.395, 57.12, 57.375], is_scale: false}
13 | - Permute: {}
14 | - Gt2TTFTarget: {down_ratio: 4}
15 | - PadBatch: {pad_to_stride: 32}
16 | batch_size: 18
17 | shuffle: true
18 | drop_last: true
19 | use_shared_memory: true
20 | mixup_epoch: 100
21 |
22 | EvalReader:
23 | sample_transforms:
24 | - Decode: {}
25 | - Resize: {interp: 1, target_size: [512, 512], keep_ratio: False}
26 | - NormalizeImage: {is_scale: false, mean: [123.675, 116.28, 103.53], std: [58.395, 57.12, 57.375]}
27 | - Permute: {}
28 | batch_size: 1
29 | drop_last: false
30 | drop_empty: false
31 |
32 | TestReader:
33 | sample_transforms:
34 | - Decode: {}
35 | - Resize: {interp: 1, target_size: [512, 512], keep_ratio: False}
36 | - NormalizeImage: {is_scale: false, mean: [123.675, 116.28, 103.53], std: [58.395, 57.12, 57.375]}
37 | - Permute: {}
38 | batch_size: 1
39 | drop_last: false
40 | drop_empty: false
41 |
--------------------------------------------------------------------------------
/configs/ttfnet/_base_/ttfnet_darknet53.yml:
--------------------------------------------------------------------------------
1 | architecture: TTFNet
2 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/DarkNet53_pretrained.pdparams
3 |
4 | TTFNet:
5 | backbone: DarkNet
6 | neck: TTFFPN
7 | ttf_head: TTFHead
8 | post_process: BBoxPostProcess
9 |
10 | DarkNet:
11 | depth: 53
12 | freeze_at: 0
13 | return_idx: [1, 2, 3, 4]
14 | norm_type: bn
15 | norm_decay: 0.0004
16 |
17 | TTFFPN:
18 | planes: [256, 128, 64]
19 | shortcut_num: [3, 2, 1]
20 |
21 | TTFHead:
22 | hm_loss:
23 | name: CTFocalLoss
24 | loss_weight: 1.
25 | wh_loss:
26 | name: GIoULoss
27 | loss_weight: 5.
28 | reduction: sum
29 |
30 | BBoxPostProcess:
31 | decode:
32 | name: TTFBox
33 | max_per_img: 100
34 | score_thresh: 0.01
35 | down_ratio: 4
36 |
--------------------------------------------------------------------------------
/configs/ttfnet/_base_/ttfnet_reader.yml:
--------------------------------------------------------------------------------
1 | worker_num: 2
2 | TrainReader:
3 | sample_transforms:
4 | - Decode: {}
5 | - RandomFlip: {prob: 0.5}
6 | - Resize: {interp: 1, target_size: [512, 512], keep_ratio: False}
7 | - NormalizeImage: {mean: [123.675, 116.28, 103.53], std: [58.395, 57.12, 57.375], is_scale: false}
8 | - Permute: {}
9 | batch_transforms:
10 | - Gt2TTFTarget: {down_ratio: 4}
11 | - PadBatch: {pad_to_stride: 32}
12 | batch_size: 12
13 | shuffle: true
14 | drop_last: true
15 | use_shared_memory: true
16 |
17 | EvalReader:
18 | sample_transforms:
19 | - Decode: {}
20 | - Resize: {interp: 1, target_size: [512, 512], keep_ratio: False}
21 | - NormalizeImage: {is_scale: false, mean: [123.675, 116.28, 103.53], std: [58.395, 57.12, 57.375]}
22 | - Permute: {}
23 | batch_size: 1
24 | drop_last: false
25 | drop_empty: false
26 |
27 | TestReader:
28 | sample_transforms:
29 | - Decode: {}
30 | - Resize: {interp: 1, target_size: [512, 512], keep_ratio: False}
31 | - NormalizeImage: {is_scale: false, mean: [123.675, 116.28, 103.53], std: [58.395, 57.12, 57.375]}
32 | - Permute: {}
33 | batch_size: 1
34 | drop_last: false
35 | drop_empty: false
36 |
--------------------------------------------------------------------------------
/configs/ttfnet/pafnet_10x_coco.yml:
--------------------------------------------------------------------------------
1 | _BASE_: [
2 | '../datasets/coco_detection.yml',
3 | '../runtime.yml',
4 | '_base_/optimizer_10x.yml',
5 | '_base_/pafnet.yml',
6 | '_base_/pafnet_reader.yml',
7 | ]
8 | weights: output/pafnet_10x_coco/model_final
9 |
--------------------------------------------------------------------------------
/configs/ttfnet/pafnet_lite_mobilenet_v3_20x_coco.yml:
--------------------------------------------------------------------------------
1 | _BASE_: [
2 | '../datasets/coco_detection.yml',
3 | '../runtime.yml',
4 | '_base_/optimizer_20x.yml',
5 | '_base_/pafnet_lite.yml',
6 | '_base_/pafnet_lite_reader.yml',
7 | ]
8 | weights: output/pafnet_lite_mobilenet_v3_10x_coco/model_final
9 |
--------------------------------------------------------------------------------
/configs/ttfnet/ttfnet_darknet53_1x_coco.yml:
--------------------------------------------------------------------------------
1 | _BASE_: [
2 | '../datasets/coco_detection.yml',
3 | '../runtime.yml',
4 | '_base_/optimizer_1x.yml',
5 | '_base_/ttfnet_darknet53.yml',
6 | '_base_/ttfnet_reader.yml',
7 | ]
8 | weights: output/ttfnet_darknet53_1x_coco/model_final
9 |
--------------------------------------------------------------------------------
/configs/vehicle/demo/001.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dddlli/Swin-Transformer-Object-Detection-PaddlePaddle/67244dd49a523146b4bd33e0ed5b97072c727609/configs/vehicle/demo/001.jpeg
--------------------------------------------------------------------------------
/configs/vehicle/demo/003.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dddlli/Swin-Transformer-Object-Detection-PaddlePaddle/67244dd49a523146b4bd33e0ed5b97072c727609/configs/vehicle/demo/003.png
--------------------------------------------------------------------------------
/configs/vehicle/demo/004.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dddlli/Swin-Transformer-Object-Detection-PaddlePaddle/67244dd49a523146b4bd33e0ed5b97072c727609/configs/vehicle/demo/004.png
--------------------------------------------------------------------------------
/configs/vehicle/demo/005.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dddlli/Swin-Transformer-Object-Detection-PaddlePaddle/67244dd49a523146b4bd33e0ed5b97072c727609/configs/vehicle/demo/005.png
--------------------------------------------------------------------------------
/configs/vehicle/vehicle.json:
--------------------------------------------------------------------------------
1 | {
2 | "images": [],
3 | "annotations": [],
4 | "categories": [
5 | {
6 | "supercategory": "component",
7 | "id": 1,
8 | "name": "car"
9 | },
10 | {
11 | "supercategory": "component",
12 | "id": 2,
13 | "name": "truck"
14 | },
15 | {
16 | "supercategory": "component",
17 | "id": 3,
18 | "name": "bus"
19 | },
20 | {
21 | "supercategory": "component",
22 | "id": 4,
23 | "name": "motorbike"
24 | },
25 | {
26 | "supercategory": "component",
27 | "id": 5,
28 | "name": "tricycle"
29 | },
30 | {
31 | "supercategory": "component",
32 | "id": 6,
33 | "name": "carplate"
34 | }
35 | ]
36 | }
37 |
--------------------------------------------------------------------------------
/configs/vehicle/vehicle_yolov3_darknet.yml:
--------------------------------------------------------------------------------
1 | _BASE_: [
2 | '../datasets/coco_detection.yml',
3 | '../runtime.yml',
4 | '../yolov3/_base_/optimizer_270e.yml',
5 | '../yolov3/_base_/yolov3_darknet53.yml',
6 | '../yolov3/_base_/yolov3_reader.yml',
7 | ]
8 |
9 | snapshot_epoch: 5
10 | weights: https://paddledet.bj.bcebos.com/models/vehicle_yolov3_darknet.pdparams
11 |
12 | YOLOv3Head:
13 | anchors: [[8, 9], [10, 23], [19, 15],
14 | [23, 33], [40, 25], [54, 50],
15 | [101, 80], [139, 145], [253, 224]]
16 |
17 | BBoxPostProcess:
18 | nms:
19 | name: MultiClassNMS
20 | keep_top_k: 100
21 | score_threshold: 0.005
22 | nms_threshold: 0.45
23 | nms_top_k: 400
24 |
25 | num_classes: 6
26 |
27 | TrainDataset:
28 | !COCODataSet
29 | dataset_dir: dataset/vehicle
30 | anno_path: annotations/instances_train2017.json
31 | image_dir: train2017
32 | data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd']
33 |
34 | EvalDataset:
35 | !COCODataSet
36 | dataset_dir: dataset/vehicle
37 | anno_path: annotations/instances_val2017.json
38 | image_dir: val2017
39 |
40 | TestDataset:
41 | !ImageFolder
42 | anno_path: configs/vehicle/vehicle.json
43 |
--------------------------------------------------------------------------------
/configs/yolov3/_base_/optimizer_270e.yml:
--------------------------------------------------------------------------------
1 | epoch: 270
2 |
3 | LearningRate:
4 | base_lr: 0.001
5 | schedulers:
6 | - !PiecewiseDecay
7 | gamma: 0.1
8 | milestones:
9 | - 216
10 | - 243
11 | - !LinearWarmup
12 | start_factor: 0.
13 | steps: 4000
14 |
15 | OptimizerBuilder:
16 | optimizer:
17 | momentum: 0.9
18 | type: Momentum
19 | regularizer:
20 | factor: 0.0005
21 | type: L2
22 |
--------------------------------------------------------------------------------
/configs/yolov3/_base_/yolov3_darknet53.yml:
--------------------------------------------------------------------------------
1 | architecture: YOLOv3
2 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/DarkNet53_pretrained.pdparams
3 | norm_type: sync_bn
4 |
5 | YOLOv3:
6 | backbone: DarkNet
7 | neck: YOLOv3FPN
8 | yolo_head: YOLOv3Head
9 | post_process: BBoxPostProcess
10 |
11 | DarkNet:
12 | depth: 53
13 | return_idx: [2, 3, 4]
14 |
15 | # use default config
16 | # YOLOv3FPN:
17 |
18 | YOLOv3Head:
19 | anchors: [[10, 13], [16, 30], [33, 23],
20 | [30, 61], [62, 45], [59, 119],
21 | [116, 90], [156, 198], [373, 326]]
22 | anchor_masks: [[6, 7, 8], [3, 4, 5], [0, 1, 2]]
23 | loss: YOLOv3Loss
24 |
25 | YOLOv3Loss:
26 | ignore_thresh: 0.7
27 | downsample: [32, 16, 8]
28 | label_smooth: false
29 |
30 | BBoxPostProcess:
31 | decode:
32 | name: YOLOBox
33 | conf_thresh: 0.005
34 | downsample_ratio: 32
35 | clip_bbox: true
36 | nms:
37 | name: MultiClassNMS
38 | keep_top_k: 100
39 | score_threshold: 0.01
40 | nms_threshold: 0.45
41 | nms_top_k: 1000
42 |
--------------------------------------------------------------------------------
/configs/yolov3/_base_/yolov3_mobilenet_v1.yml:
--------------------------------------------------------------------------------
1 | architecture: YOLOv3
2 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/MobileNetV1_pretrained.pdparams
3 | norm_type: sync_bn
4 |
5 | YOLOv3:
6 | backbone: MobileNet
7 | neck: YOLOv3FPN
8 | yolo_head: YOLOv3Head
9 | post_process: BBoxPostProcess
10 |
11 | MobileNet:
12 | scale: 1
13 | feature_maps: [4, 6, 13]
14 | with_extra_blocks: false
15 | extra_block_filters: []
16 |
17 | # use default config
18 | # YOLOv3FPN:
19 |
20 | YOLOv3Head:
21 | anchors: [[10, 13], [16, 30], [33, 23],
22 | [30, 61], [62, 45], [59, 119],
23 | [116, 90], [156, 198], [373, 326]]
24 | anchor_masks: [[6, 7, 8], [3, 4, 5], [0, 1, 2]]
25 | loss: YOLOv3Loss
26 |
27 | YOLOv3Loss:
28 | ignore_thresh: 0.7
29 | downsample: [32, 16, 8]
30 | label_smooth: false
31 |
32 | BBoxPostProcess:
33 | decode:
34 | name: YOLOBox
35 | conf_thresh: 0.005
36 | downsample_ratio: 32
37 | clip_bbox: true
38 | nms:
39 | name: MultiClassNMS
40 | keep_top_k: 100
41 | score_threshold: 0.01
42 | nms_threshold: 0.45
43 | nms_top_k: 1000
44 |
--------------------------------------------------------------------------------
/configs/yolov3/_base_/yolov3_mobilenet_v3_large.yml:
--------------------------------------------------------------------------------
1 | architecture: YOLOv3
2 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/MobileNetV3_large_x1_0_ssld_pretrained.pdparams
3 | norm_type: sync_bn
4 |
5 | YOLOv3:
6 | backbone: MobileNetV3
7 | neck: YOLOv3FPN
8 | yolo_head: YOLOv3Head
9 | post_process: BBoxPostProcess
10 |
11 | MobileNetV3:
12 | model_name: large
13 | scale: 1.
14 | with_extra_blocks: false
15 | extra_block_filters: []
16 | feature_maps: [7, 13, 16]
17 |
18 | # use default config
19 | # YOLOv3FPN:
20 |
21 | YOLOv3Head:
22 | anchors: [[10, 13], [16, 30], [33, 23],
23 | [30, 61], [62, 45], [59, 119],
24 | [116, 90], [156, 198], [373, 326]]
25 | anchor_masks: [[6, 7, 8], [3, 4, 5], [0, 1, 2]]
26 | loss: YOLOv3Loss
27 |
28 | YOLOv3Loss:
29 | ignore_thresh: 0.7
30 | downsample: [32, 16, 8]
31 | label_smooth: false
32 |
33 | BBoxPostProcess:
34 | decode:
35 | name: YOLOBox
36 | conf_thresh: 0.005
37 | downsample_ratio: 32
38 | clip_bbox: true
39 | nms:
40 | name: MultiClassNMS
41 | keep_top_k: 100
42 | score_threshold: 0.01
43 | nms_threshold: 0.45
44 | nms_top_k: 1000
45 |
--------------------------------------------------------------------------------
/configs/yolov3/_base_/yolov3_mobilenet_v3_small.yml:
--------------------------------------------------------------------------------
1 | architecture: YOLOv3
2 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/MobileNetV3_small_x1_0_ssld_pretrained.pdparams
3 | norm_type: sync_bn
4 |
5 | YOLOv3:
6 | backbone: MobileNetV3
7 | neck: YOLOv3FPN
8 | yolo_head: YOLOv3Head
9 | post_process: BBoxPostProcess
10 |
11 | MobileNetV3:
12 | model_name: small
13 | scale: 1.
14 | with_extra_blocks: false
15 | extra_block_filters: []
16 | feature_maps: [4, 9, 12]
17 |
18 | # use default config
19 | # YOLOv3FPN:
20 |
21 | YOLOv3Head:
22 | anchors: [[10, 13], [16, 30], [33, 23],
23 | [30, 61], [62, 45], [59, 119],
24 | [116, 90], [156, 198], [373, 326]]
25 | anchor_masks: [[6, 7, 8], [3, 4, 5], [0, 1, 2]]
26 | loss: YOLOv3Loss
27 |
28 | YOLOv3Loss:
29 | ignore_thresh: 0.7
30 | downsample: [32, 16, 8]
31 | label_smooth: false
32 |
33 | BBoxPostProcess:
34 | decode:
35 | name: YOLOBox
36 | conf_thresh: 0.005
37 | downsample_ratio: 32
38 | clip_bbox: true
39 | nms:
40 | name: MultiClassNMS
41 | keep_top_k: 100
42 | score_threshold: 0.01
43 | nms_threshold: 0.45
44 | nms_top_k: 1000
45 |
--------------------------------------------------------------------------------
/configs/yolov3/_base_/yolov3_r34.yml:
--------------------------------------------------------------------------------
1 | architecture: YOLOv3
2 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet34_pretrained.pdparams
3 | norm_type: sync_bn
4 |
5 | YOLOv3:
6 | backbone: ResNet
7 | neck: YOLOv3FPN
8 | yolo_head: YOLOv3Head
9 | post_process: BBoxPostProcess
10 |
11 | ResNet:
12 | depth: 34
13 | return_idx: [1, 2, 3]
14 | freeze_at: -1
15 | freeze_norm: false
16 | norm_decay: 0.
17 |
18 | YOLOv3Head:
19 | anchors: [[10, 13], [16, 30], [33, 23],
20 | [30, 61], [62, 45], [59, 119],
21 | [116, 90], [156, 198], [373, 326]]
22 | anchor_masks: [[6, 7, 8], [3, 4, 5], [0, 1, 2]]
23 | loss: YOLOv3Loss
24 |
25 | YOLOv3Loss:
26 | ignore_thresh: 0.7
27 | downsample: [32, 16, 8]
28 | label_smooth: false
29 |
30 | BBoxPostProcess:
31 | decode:
32 | name: YOLOBox
33 | conf_thresh: 0.005
34 | downsample_ratio: 32
35 | clip_bbox: true
36 | nms:
37 | name: MultiClassNMS
38 | keep_top_k: 100
39 | score_threshold: 0.01
40 | nms_threshold: 0.45
41 | nms_top_k: 1000
42 |
--------------------------------------------------------------------------------
/configs/yolov3/_base_/yolov3_r50vd_dcn.yml:
--------------------------------------------------------------------------------
1 | architecture: YOLOv3
2 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_ssld_pretrained.pdparams
3 | norm_type: sync_bn
4 |
5 | YOLOv3:
6 | backbone: ResNet
7 | neck: YOLOv3FPN
8 | yolo_head: YOLOv3Head
9 | post_process: BBoxPostProcess
10 |
11 | ResNet:
12 | depth: 50
13 | variant: d
14 | return_idx: [1, 2, 3]
15 | dcn_v2_stages: [3]
16 | freeze_at: -1
17 | freeze_norm: false
18 | norm_decay: 0.
19 |
20 | # YOLOv3FPN:
21 |
22 | YOLOv3Head:
23 | anchors: [[10, 13], [16, 30], [33, 23],
24 | [30, 61], [62, 45], [59, 119],
25 | [116, 90], [156, 198], [373, 326]]
26 | anchor_masks: [[6, 7, 8], [3, 4, 5], [0, 1, 2]]
27 | loss: YOLOv3Loss
28 |
29 | YOLOv3Loss:
30 | ignore_thresh: 0.7
31 | downsample: [32, 16, 8]
32 | label_smooth: false
33 |
34 | BBoxPostProcess:
35 | decode:
36 | name: YOLOBox
37 | conf_thresh: 0.005
38 | downsample_ratio: 32
39 | clip_bbox: true
40 | nms:
41 | name: MultiClassNMS
42 | keep_top_k: 100
43 | score_threshold: 0.01
44 | nms_threshold: 0.45
45 | nms_top_k: 1000
46 |
--------------------------------------------------------------------------------
/configs/yolov3/_base_/yolov3_reader.yml:
--------------------------------------------------------------------------------
1 | worker_num: 2
2 | TrainReader:
3 | inputs_def:
4 | num_max_boxes: 50
5 | sample_transforms:
6 | - Decode: {}
7 | - Mixup: {alpha: 1.5, beta: 1.5}
8 | - RandomDistort: {}
9 | - RandomExpand: {fill_value: [123.675, 116.28, 103.53]}
10 | - RandomCrop: {}
11 | - RandomFlip: {}
12 | batch_transforms:
13 | - BatchRandomResize: {target_size: [320, 352, 384, 416, 448, 480, 512, 544, 576, 608], random_size: True, random_interp: True, keep_ratio: False}
14 | - NormalizeBox: {}
15 | - PadBox: {num_max_boxes: 50}
16 | - BboxXYXY2XYWH: {}
17 | - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True}
18 | - Permute: {}
19 | - Gt2YoloTarget: {anchor_masks: [[6, 7, 8], [3, 4, 5], [0, 1, 2]], anchors: [[10, 13], [16, 30], [33, 23], [30, 61], [62, 45], [59, 119], [116, 90], [156, 198], [373, 326]], downsample_ratios: [32, 16, 8]}
20 | batch_size: 8
21 | shuffle: true
22 | drop_last: true
23 | mixup_epoch: 250
24 | use_shared_memory: true
25 |
26 | EvalReader:
27 | inputs_def:
28 | num_max_boxes: 50
29 | sample_transforms:
30 | - Decode: {}
31 | - Resize: {target_size: [608, 608], keep_ratio: False, interp: 2}
32 | - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True}
33 | - Permute: {}
34 | batch_size: 1
35 | drop_empty: false
36 |
37 | TestReader:
38 | inputs_def:
39 | image_shape: [3, 608, 608]
40 | sample_transforms:
41 | - Decode: {}
42 | - Resize: {target_size: [608, 608], keep_ratio: False, interp: 2}
43 | - NormalizeImage: {mean: [0.485, 0.456, 0.406], std: [0.229, 0.224, 0.225], is_scale: True}
44 | - Permute: {}
45 | batch_size: 1
46 |
--------------------------------------------------------------------------------
/configs/yolov3/yolov3_darknet53_270e_coco.yml:
--------------------------------------------------------------------------------
1 | _BASE_: [
2 | '../datasets/coco_detection.yml',
3 | '../runtime.yml',
4 | '_base_/optimizer_270e.yml',
5 | '_base_/yolov3_darknet53.yml',
6 | '_base_/yolov3_reader.yml',
7 | ]
8 |
9 | snapshot_epoch: 5
10 | weights: output/yolov3_darknet53_270e_coco/model_final
11 |
--------------------------------------------------------------------------------
/configs/yolov3/yolov3_darknet53_270e_voc.yml:
--------------------------------------------------------------------------------
1 | _BASE_: [
2 | '../datasets/voc.yml',
3 | '../runtime.yml',
4 | '_base_/optimizer_270e.yml',
5 | '_base_/yolov3_darknet53.yml',
6 | '_base_/yolov3_reader.yml',
7 | ]
8 |
9 | snapshot_epoch: 5
10 | weights: output/yolov3_darknet53_270e_voc/model_final
11 |
12 | EvalReader:
13 | batch_transforms:
14 | - PadBatch: {pad_gt: True}
15 |
--------------------------------------------------------------------------------
/configs/yolov3/yolov3_mobilenet_v1_270e_coco.yml:
--------------------------------------------------------------------------------
1 | _BASE_: [
2 | '../datasets/coco_detection.yml',
3 | '../runtime.yml',
4 | '_base_/optimizer_270e.yml',
5 | '_base_/yolov3_mobilenet_v1.yml',
6 | '_base_/yolov3_reader.yml',
7 | ]
8 |
9 | snapshot_epoch: 5
10 | weights: output/yolov3_mobilenet_v1_270e_coco/model_final
11 |
--------------------------------------------------------------------------------
/configs/yolov3/yolov3_mobilenet_v1_270e_voc.yml:
--------------------------------------------------------------------------------
1 | _BASE_: [
2 | '../datasets/voc.yml',
3 | '../runtime.yml',
4 | '_base_/optimizer_270e.yml',
5 | '_base_/yolov3_mobilenet_v1.yml',
6 | '_base_/yolov3_reader.yml',
7 | ]
8 |
9 | snapshot_epoch: 5
10 | weights: output/yolov3_mobilenet_v1_270e_voc/model_final
11 |
12 | LearningRate:
13 | base_lr: 0.001
14 | schedulers:
15 | - !PiecewiseDecay
16 | gamma: 0.1
17 | milestones:
18 | - 216
19 | - 243
20 | - !LinearWarmup
21 | start_factor: 0.
22 | steps: 1000
23 |
--------------------------------------------------------------------------------
/configs/yolov3/yolov3_mobilenet_v1_roadsign.yml:
--------------------------------------------------------------------------------
1 | _BASE_: [
2 | '../datasets/roadsign_voc.yml',
3 | '../runtime.yml',
4 | '_base_/yolov3_mobilenet_v1.yml',
5 | '_base_/yolov3_reader.yml',
6 | ]
7 | pretrain_weights: https://paddledet.bj.bcebos.com/models/yolov3_mobilenet_v1_270e_coco.pdparams
8 | weights: output/yolov3_mobilenet_v1_roadsign/model_final
9 |
10 | YOLOv3Loss:
11 | ignore_thresh: 0.7
12 | label_smooth: true
13 |
14 | snapshot_epoch: 2
15 | epoch: 40
16 |
17 | LearningRate:
18 | base_lr: 0.0001
19 | schedulers:
20 | - !PiecewiseDecay
21 | gamma: 0.1
22 | milestones: [32, 36]
23 | - !LinearWarmup
24 | start_factor: 0.3333333333333333
25 | steps: 100
26 |
27 | OptimizerBuilder:
28 | optimizer:
29 | momentum: 0.9
30 | type: Momentum
31 | regularizer:
32 | factor: 0.0005
33 | type: L2
34 |
--------------------------------------------------------------------------------
/configs/yolov3/yolov3_mobilenet_v1_ssld_270e_coco.yml:
--------------------------------------------------------------------------------
1 | _BASE_: [
2 | '../datasets/coco_detection.yml',
3 | '../runtime.yml',
4 | '_base_/optimizer_270e.yml',
5 | '_base_/yolov3_mobilenet_v1.yml',
6 | '_base_/yolov3_reader.yml',
7 | ]
8 |
9 | snapshot_epoch: 5
10 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/MobileNetV1_ssld_pretrained.pdparams
11 | weights: output/yolov3_mobilenet_v1_ssld_270e_coco/model_final
12 |
--------------------------------------------------------------------------------
/configs/yolov3/yolov3_mobilenet_v1_ssld_270e_voc.yml:
--------------------------------------------------------------------------------
1 | _BASE_: [
2 | '../datasets/voc.yml',
3 | '../runtime.yml',
4 | '_base_/optimizer_270e.yml',
5 | '_base_/yolov3_mobilenet_v1.yml',
6 | '_base_/yolov3_reader.yml',
7 | ]
8 |
9 | snapshot_epoch: 5
10 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/MobileNetV1_ssld_pretrained.pdparams
11 | weights: output/yolov3_mobilenet_v1_ssld_270e_voc/model_final
12 |
13 | LearningRate:
14 | base_lr: 0.001
15 | schedulers:
16 | - !PiecewiseDecay
17 | gamma: 0.1
18 | milestones:
19 | - 216
20 | - 243
21 | - !LinearWarmup
22 | start_factor: 0.
23 | steps: 1000
24 |
--------------------------------------------------------------------------------
/configs/yolov3/yolov3_mobilenet_v3_large_270e_coco.yml:
--------------------------------------------------------------------------------
1 | _BASE_: [
2 | '../datasets/coco_detection.yml',
3 | '../runtime.yml',
4 | '_base_/optimizer_270e.yml',
5 | '_base_/yolov3_mobilenet_v3_large.yml',
6 | '_base_/yolov3_reader.yml',
7 | ]
8 |
9 | snapshot_epoch: 5
10 | weights: output/yolov3_mobilenet_v3_large_270e_coco/model_final
11 |
--------------------------------------------------------------------------------
/configs/yolov3/yolov3_mobilenet_v3_large_270e_voc.yml:
--------------------------------------------------------------------------------
1 | _BASE_: [
2 | '../datasets/voc.yml',
3 | '../runtime.yml',
4 | '_base_/optimizer_270e.yml',
5 | '_base_/yolov3_mobilenet_v3_large.yml',
6 | '_base_/yolov3_reader.yml',
7 | ]
8 |
9 | snapshot_epoch: 5
10 | weights: output/yolov3_mobilenet_v3_large_270e_voc/model_final
11 |
12 | LearningRate:
13 | base_lr: 0.001
14 | schedulers:
15 | - !PiecewiseDecay
16 | gamma: 0.1
17 | milestones:
18 | - 216
19 | - 243
20 | - !LinearWarmup
21 | start_factor: 0.
22 | steps: 1000
23 |
--------------------------------------------------------------------------------
/configs/yolov3/yolov3_mobilenet_v3_large_ssld_270e_voc.yml:
--------------------------------------------------------------------------------
1 | _BASE_: [
2 | '../datasets/voc.yml',
3 | '../runtime.yml',
4 | '_base_/optimizer_270e.yml',
5 | '_base_/yolov3_mobilenet_v3_large.yml',
6 | '_base_/yolov3_reader.yml',
7 | ]
8 |
9 | snapshot_epoch: 5
10 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/MobileNetV3_large_x1_0_ssld_pretrained.pdparams
11 | weights: output/yolov3_mobilenet_v3_large_ssld_270e_voc/model_final
12 |
13 | LearningRate:
14 | base_lr: 0.001
15 | schedulers:
16 | - !PiecewiseDecay
17 | gamma: 0.1
18 | milestones:
19 | - 216
20 | - 243
21 | - !LinearWarmup
22 | start_factor: 0.
23 | steps: 1000
24 |
--------------------------------------------------------------------------------
/configs/yolov3/yolov3_r34_270e_coco.yml:
--------------------------------------------------------------------------------
1 | _BASE_: [
2 | '../datasets/coco_detection.yml',
3 | '../runtime.yml',
4 | '_base_/optimizer_270e.yml',
5 | '_base_/yolov3_r34.yml',
6 | '_base_/yolov3_reader.yml',
7 | ]
8 |
9 | snapshot_epoch: 5
10 | weights: output/yolov3_r34_270e_coco/model_final
11 |
--------------------------------------------------------------------------------
/configs/yolov3/yolov3_r50vd_dcn_270e_coco.yml:
--------------------------------------------------------------------------------
1 | _BASE_: [
2 | '../datasets/coco_detection.yml',
3 | '../runtime.yml',
4 | '_base_/optimizer_270e.yml',
5 | '_base_/yolov3_r50vd_dcn.yml',
6 | '_base_/yolov3_reader.yml',
7 | ]
8 |
9 | snapshot_epoch: 5
10 | weights: output/yolov3_r50vd_dcn_270e_coco/model_final
11 |
--------------------------------------------------------------------------------
/dataset/voc/create_list.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | import sys
16 | import os.path as osp
17 | import logging
18 | # add python path of PadleDetection to sys.path
19 | parent_path = osp.abspath(osp.join(__file__, *(['..'] * 3)))
20 | if parent_path not in sys.path:
21 | sys.path.append(parent_path)
22 |
23 | from ppdet.utils.download import create_voc_list
24 |
25 | logging.basicConfig(level=logging.INFO)
26 |
27 | voc_path = osp.split(osp.realpath(sys.argv[0]))[0]
28 | create_voc_list(voc_path)
29 |
--------------------------------------------------------------------------------
/dataset/voc/download_voc.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | import sys
16 | import os.path as osp
17 | import logging
18 | # add python path of PadleDetection to sys.path
19 | parent_path = osp.abspath(osp.join(__file__, *(['..'] * 3)))
20 | if parent_path not in sys.path:
21 | sys.path.append(parent_path)
22 |
23 | from ppdet.utils.download import download_dataset
24 |
25 | logging.basicConfig(level=logging.INFO)
26 |
27 | download_path = osp.split(osp.realpath(sys.argv[0]))[0]
28 | download_dataset(download_path, 'voc')
--------------------------------------------------------------------------------
/dataset/voc/label_list.txt:
--------------------------------------------------------------------------------
1 | person
2 |
--------------------------------------------------------------------------------
/dataset/voc/test.txt:
--------------------------------------------------------------------------------
1 | images/05000143.jpg annotation/05000143.xml
2 |
--------------------------------------------------------------------------------
/dataset/voc/trainval.txt:
--------------------------------------------------------------------------------
1 | images/05000143.jpg annotation/05000143.xml
2 |
--------------------------------------------------------------------------------
/deploy/cpp/README.md:
--------------------------------------------------------------------------------
1 | # C++端预测部署
2 |
3 | ## 本教程结构
4 |
5 | [1.说明](#1说明)
6 |
7 | [2.主要目录和文件](#2主要目录和文件)
8 |
9 | [3.编译部署](#3编译)
10 |
11 |
12 |
13 | ## 1.说明
14 |
15 | 本目录为用户提供一个跨平台的`C++`部署方案,让用户通过`PaddleDetection`训练的模型导出后,即可基于本项目快速运行,也可以快速集成代码结合到自己的项目实际应用中去。
16 |
17 | 主要设计的目标包括以下四点:
18 | - 跨平台,支持在 `Windows` 和 `Linux` 完成编译、二次开发集成和部署运行
19 | - 可扩展性,支持用户针对新模型开发自己特殊的数据预处理等逻辑
20 | - 高性能,除了`PaddlePaddle`自身带来的性能优势,我们还针对图像检测的特点对关键步骤进行了性能优化
21 | - 支持各种不同检测模型结构,包括`Yolov3`/`Faster_RCNN`/`SSD`等
22 |
23 | ## 2.主要目录和文件
24 |
25 | ```bash
26 | deploy/cpp
27 | |
28 | ├── src
29 | │ ├── main.cc # 集成代码示例, 程序入口
30 | │ ├── object_detector.cc # 模型加载和预测主要逻辑封装类实现
31 | │ └── preprocess_op.cc # 预处理相关主要逻辑封装实现
32 | |
33 | ├── include
34 | │ ├── config_parser.h # 导出模型配置yaml文件解析
35 | │ ├── object_detector.h # 模型加载和预测主要逻辑封装类
36 | │ └── preprocess_op.h # 预处理相关主要逻辑类封装
37 | |
38 | ├── docs
39 | │ ├── linux_build.md # Linux 编译指南
40 | │ └── windows_vs2019_build.md # Windows VS2019编译指南
41 | │
42 | ├── build.sh # 编译命令脚本
43 | │
44 | ├── CMakeList.txt # cmake编译入口文件
45 | |
46 | ├── CMakeSettings.json # Visual Studio 2019 CMake项目编译设置
47 | │
48 | └── cmake # 依赖的外部项目cmake(目前仅有yaml-cpp)
49 |
50 | ```
51 |
52 | ## 3.编译部署
53 |
54 | ### 3.1 导出模型
55 | 请确认您已经基于`PaddleDetection`的[export_model.py](https://github.com/PaddlePaddle/PaddleDetection/blob/dygraph/tools/export_model.py)导出您的模型,并妥善保存到合适的位置。导出模型细节请参考 [导出模型教程](https://github.com/PaddlePaddle/PaddleDetection/tree/dygraph/deploy/EXPORT_MODEL.md)。
56 |
57 | 模型导出后, 目录结构如下(以`yolov3_darknet`为例):
58 | ```
59 | yolov3_darknet # 模型目录
60 | ├── infer_cfg.yml # 模型配置信息
61 | ├── model.pdmodel # 模型文件
62 | ├── model.pdiparams.info #模型公用信息
63 | └── model.pdiparams # 参数文件
64 | ```
65 |
66 | 预测时,该目录所在的路径会作为程序的输入参数。
67 |
68 | ### 3.2 编译
69 |
70 | 仅支持在`Windows`和`Linux`平台编译和使用
71 | - [Linux 编译指南](docs/linux_build.md)
72 | - [Windows编译指南(使用Visual Studio 2019)](docs/windows_vs2019_build.md)
73 |
--------------------------------------------------------------------------------
/deploy/cpp/cmake/yaml-cpp.cmake:
--------------------------------------------------------------------------------
1 |
2 | find_package(Git REQUIRED)
3 |
4 | include(ExternalProject)
5 |
6 | message("${CMAKE_BUILD_TYPE}")
7 |
8 | ExternalProject_Add(
9 | ext-yaml-cpp
10 | URL https://bj.bcebos.com/paddlex/deploy/deps/yaml-cpp.zip
11 | URL_MD5 9542d6de397d1fbd649ed468cb5850e6
12 | CMAKE_ARGS
13 | -DYAML_CPP_BUILD_TESTS=OFF
14 | -DYAML_CPP_BUILD_TOOLS=OFF
15 | -DYAML_CPP_INSTALL=OFF
16 | -DYAML_CPP_BUILD_CONTRIB=OFF
17 | -DMSVC_SHARED_RT=OFF
18 | -DBUILD_SHARED_LIBS=OFF
19 | -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
20 | -DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS}
21 | -DCMAKE_CXX_FLAGS_DEBUG=${CMAKE_CXX_FLAGS_DEBUG}
22 | -DCMAKE_CXX_FLAGS_RELEASE=${CMAKE_CXX_FLAGS_RELEASE}
23 | -DCMAKE_LIBRARY_OUTPUT_DIRECTORY=${CMAKE_BINARY_DIR}/ext/yaml-cpp/lib
24 | -DCMAKE_ARCHIVE_OUTPUT_DIRECTORY=${CMAKE_BINARY_DIR}/ext/yaml-cpp/lib
25 | PREFIX "${CMAKE_BINARY_DIR}/ext/yaml-cpp"
26 | # Disable install step
27 | INSTALL_COMMAND ""
28 | LOG_DOWNLOAD ON
29 | LOG_BUILD 1
30 | )
31 |
--------------------------------------------------------------------------------
/deploy/imgs/input_shape.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dddlli/Swin-Transformer-Object-Detection-PaddlePaddle/67244dd49a523146b4bd33e0ed5b97072c727609/deploy/imgs/input_shape.png
--------------------------------------------------------------------------------
/deploy/serving/test_client.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | import sys
16 | import numpy as np
17 | from paddle_serving_client import Client
18 | from paddle_serving_app.reader import *
19 | import cv2
20 | preprocess = Sequential([
21 | File2Image(), BGR2RGB(), Resize(
22 | (608, 608), interpolation=cv2.INTER_LINEAR), Div(255.0), Transpose(
23 | (2, 0, 1))
24 | ])
25 |
26 | postprocess = RCNNPostprocess("label_list.txt", "output", [608, 608])
27 | client = Client()
28 |
29 | client.load_client_config("serving_client/serving_client_conf.prototxt")
30 | client.connect(['127.0.0.1:9393'])
31 |
32 | im = preprocess(sys.argv[1])
33 | fetch_map = client.predict(
34 | feed={
35 | "image": im,
36 | "im_size": np.array(list(im.shape[1:])),
37 | },
38 | fetch=["multiclass_nms_0.tmp_0"])
39 | fetch_map["image"] = sys.argv[1]
40 | postprocess(fetch_map)
41 |
--------------------------------------------------------------------------------
/docs/images/000000014439.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dddlli/Swin-Transformer-Object-Detection-PaddlePaddle/67244dd49a523146b4bd33e0ed5b97072c727609/docs/images/000000014439.jpg
--------------------------------------------------------------------------------
/docs/images/12_Group_Group_12_Group_Group_12_935.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dddlli/Swin-Transformer-Object-Detection-PaddlePaddle/67244dd49a523146b4bd33e0ed5b97072c727609/docs/images/12_Group_Group_12_Group_Group_12_935.jpg
--------------------------------------------------------------------------------
/docs/images/fps_map.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dddlli/Swin-Transformer-Object-Detection-PaddlePaddle/67244dd49a523146b4bd33e0ed5b97072c727609/docs/images/fps_map.png
--------------------------------------------------------------------------------
/docs/images/model_figure.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dddlli/Swin-Transformer-Object-Detection-PaddlePaddle/67244dd49a523146b4bd33e0ed5b97072c727609/docs/images/model_figure.png
--------------------------------------------------------------------------------
/docs/images/reader_figure.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dddlli/Swin-Transformer-Object-Detection-PaddlePaddle/67244dd49a523146b4bd33e0ed5b97072c727609/docs/images/reader_figure.png
--------------------------------------------------------------------------------
/docs/images/road554.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dddlli/Swin-Transformer-Object-Detection-PaddlePaddle/67244dd49a523146b4bd33e0ed5b97072c727609/docs/images/road554.png
--------------------------------------------------------------------------------
/docs/images/ssld_model.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dddlli/Swin-Transformer-Object-Detection-PaddlePaddle/67244dd49a523146b4bd33e0ed5b97072c727609/docs/images/ssld_model.png
--------------------------------------------------------------------------------
/ppdet/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | from . import (core, data, engine, modeling, model_zoo, optimizer, metrics,
16 | utils, slim)
17 |
--------------------------------------------------------------------------------
/ppdet/core/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | from . import config
16 |
--------------------------------------------------------------------------------
/ppdet/core/config/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
--------------------------------------------------------------------------------
/ppdet/data/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | from . import source
16 | from . import transform
17 | from . import reader
18 |
19 | from .source import *
20 | from .transform import *
21 | from .reader import *
22 |
--------------------------------------------------------------------------------
/ppdet/data/source/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | from . import coco
16 | from . import voc
17 | from . import widerface
18 | from . import category
19 |
20 | from .coco import *
21 | from .voc import *
22 | from .widerface import *
23 | from .category import *
24 |
--------------------------------------------------------------------------------
/ppdet/data/transform/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | from . import operators
16 | from . import batch_operators
17 |
18 | from .operators import *
19 | from .batch_operators import *
20 |
21 | __all__ = []
22 | __all__ += registered_ops
23 |
--------------------------------------------------------------------------------
/ppdet/engine/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | from . import trainer
16 | from .trainer import *
17 |
18 | from . import callbacks
19 | from .callbacks import *
20 |
21 | from . import env
22 | from .env import *
23 |
24 | __all__ = trainer.__all__ \
25 | + callbacks.__all__ \
26 | + env.__all__
27 |
--------------------------------------------------------------------------------
/ppdet/engine/env.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | from __future__ import absolute_import
16 | from __future__ import division
17 | from __future__ import print_function
18 |
19 | import os
20 | import random
21 | import numpy as np
22 |
23 | import paddle
24 | from paddle.distributed import fleet
25 |
26 | __all__ = ['init_parallel_env', 'set_random_seed', 'init_fleet_env']
27 |
28 |
29 | def init_fleet_env():
30 | fleet.init(is_collective=True)
31 |
32 |
33 | def init_parallel_env():
34 | env = os.environ
35 | dist = 'PADDLE_TRAINER_ID' in env and 'PADDLE_TRAINERS_NUM' in env
36 | if dist:
37 | trainer_id = int(env['PADDLE_TRAINER_ID'])
38 | local_seed = (99 + trainer_id)
39 | random.seed(local_seed)
40 | np.random.seed(local_seed)
41 |
42 | paddle.distributed.init_parallel_env()
43 |
44 |
45 | def set_random_seed(seed):
46 | random.seed(seed)
47 | np.random.seed(seed)
48 |
--------------------------------------------------------------------------------
/ppdet/ext_op/README.md:
--------------------------------------------------------------------------------
1 | # 自定义OP编译
2 | 旋转框IOU计算OP是参考[自定义外部算子](https://www.paddlepaddle.org.cn/documentation/docs/zh/guides/07_new_op/new_custom_op.html) 。
3 |
4 | ## 1. 环境依赖
5 | - Paddle >= 2.0.1
6 | - gcc 8.2
7 |
8 | ## 2. 安装
9 | ```
10 | python3.7 setup.py install
11 | ```
12 |
13 | 按照如下方式使用
14 | ```
15 | # 引入自定义op
16 | from rbox_iou_ops import rbox_iou
17 |
18 | paddle.set_device('gpu:0')
19 | paddle.disable_static()
20 |
21 | rbox1 = np.random.rand(13000, 5)
22 | rbox2 = np.random.rand(7, 5)
23 |
24 | pd_rbox1 = paddle.to_tensor(rbox1)
25 | pd_rbox2 = paddle.to_tensor(rbox2)
26 |
27 | iou = rbox_iou(pd_rbox1, pd_rbox2)
28 | print('iou', iou)
29 | ```
30 |
31 | ## 3. 单元测试
32 | 单元测试`test.py`文件中,通过对比python实现的结果和测试自定义op结果。
33 |
34 | 由于python计算细节与cpp计算细节略有区别,误差区间设置为0.02。
35 | ```
36 | python3.7 test.py
37 | ```
38 | 提示`rbox_iou OP compute right!`说明OP测试通过。
39 |
--------------------------------------------------------------------------------
/ppdet/ext_op/rbox_iou_op.cc:
--------------------------------------------------------------------------------
1 | /* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
2 | Licensed under the Apache License, Version 2.0 (the "License");
3 | you may not use this file except in compliance with the License.
4 | You may obtain a copy of the License at
5 |
6 | http://www.apache.org/licenses/LICENSE-2.0
7 |
8 | Unless required by applicable law or agreed to in writing, software
9 | distributed under the License is distributed on an "AS IS" BASIS,
10 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 | See the License for the specific language governing permissions and
12 | limitations under the License. */
13 |
14 | #include "paddle/extension.h"
15 |
16 | #include
17 |
18 | std::vector RboxIouCPUForward(const paddle::Tensor& rbox1, const paddle::Tensor& rbox2);
19 | std::vector RboxIouCUDAForward(const paddle::Tensor& rbox1, const paddle::Tensor& rbox2);
20 |
21 |
22 | #define CHECK_INPUT_SAME(x1, x2) PD_CHECK(x1.place() == x2.place(), "input must be smae pacle.")
23 | std::vector RboxIouForward(const paddle::Tensor& rbox1, const paddle::Tensor& rbox2) {
24 | CHECK_INPUT_SAME(rbox1, rbox2);
25 | if (rbox1.place() == paddle::PlaceType::kCPU) {
26 | return RboxIouCPUForward(rbox1, rbox2);
27 | }
28 | else if (rbox1.place() == paddle::PlaceType::kGPU) {
29 | return RboxIouCUDAForward(rbox1, rbox2);
30 | }
31 | }
32 |
33 | std::vector> InferShape(std::vector rbox1_shape, std::vector rbox2_shape) {
34 | return {{rbox1_shape[0], rbox2_shape[0]}};
35 | }
36 |
37 | std::vector InferDtype(paddle::DataType t1, paddle::DataType t2) {
38 | return {t1};
39 | }
40 |
41 | PD_BUILD_OP(rbox_iou)
42 | .Inputs({"RBOX1", "RBOX2"})
43 | .Outputs({"Output"})
44 | .SetKernelFn(PD_KERNEL(RboxIouForward))
45 | .SetInferShapeFn(PD_INFER_SHAPE(InferShape))
46 | .SetInferDtypeFn(PD_INFER_DTYPE(InferDtype));
47 |
--------------------------------------------------------------------------------
/ppdet/ext_op/setup.py:
--------------------------------------------------------------------------------
1 | from paddle.utils.cpp_extension import CppExtension, CUDAExtension, setup
2 |
3 | if __name__ == "__main__":
4 | setup(
5 | name='rbox_iou_ops',
6 | ext_modules=CUDAExtension(sources=['rbox_iou_op.cc', 'rbox_iou_op.cu']))
7 |
--------------------------------------------------------------------------------
/ppdet/metrics/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | from . import metrics
16 | from .metrics import *
17 |
18 | __all__ = metrics.__all__
19 |
--------------------------------------------------------------------------------
/ppdet/model_zoo/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | from . import model_zoo
16 | from .model_zoo import *
17 |
18 | __all__ = model_zoo.__all__
19 |
--------------------------------------------------------------------------------
/ppdet/modeling/__init__.py:
--------------------------------------------------------------------------------
1 | # OP docs may contains math formula which may cause
2 | # DeprecationWarning in string parsing
3 | import warnings
4 | warnings.filterwarnings(
5 | action='ignore', category=DeprecationWarning, module='ops')
6 |
7 | from . import ops
8 | from . import backbones
9 | from . import necks
10 | from . import proposal_generator
11 | from . import heads
12 | from . import losses
13 | from . import architectures
14 | from . import post_process
15 | from . import layers
16 |
17 | from .ops import *
18 | from .backbones import *
19 | from .necks import *
20 | from .proposal_generator import *
21 | from .heads import *
22 | from .losses import *
23 | from .architectures import *
24 | from .post_process import *
25 | from .layers import *
26 |
--------------------------------------------------------------------------------
/ppdet/modeling/architectures/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | from . import meta_arch
9 | from . import faster_rcnn
10 | from . import mask_rcnn
11 | from . import yolo
12 | from . import cascade_rcnn
13 | from . import ssd
14 | from . import fcos
15 | from . import solov2
16 | from . import ttfnet
17 | from . import s2anet
18 |
19 | from .meta_arch import *
20 | from .faster_rcnn import *
21 | from .mask_rcnn import *
22 | from .yolo import *
23 | from .cascade_rcnn import *
24 | from .ssd import *
25 | from .fcos import *
26 | from .solov2 import *
27 | from .ttfnet import *
28 | from .s2anet import *
29 |
--------------------------------------------------------------------------------
/ppdet/modeling/architectures/meta_arch.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 |
5 | import numpy as np
6 | import paddle
7 | import paddle.nn as nn
8 | from ppdet.core.workspace import register
9 |
10 | __all__ = ['BaseArch']
11 |
12 |
13 | @register
14 | class BaseArch(nn.Layer):
15 | def __init__(self, data_format='NCHW'):
16 | super(BaseArch, self).__init__()
17 | self.data_format = data_format
18 |
19 | def forward(self, inputs):
20 | if self.data_format == 'NHWC':
21 | image = inputs['image']
22 | inputs['image'] = paddle.transpose(image, [0, 2, 3, 1])
23 | self.inputs = inputs
24 | self.model_arch()
25 |
26 | if self.training:
27 | out = self.get_loss()
28 | else:
29 | out = self.get_pred()
30 | return out
31 |
32 | def build_inputs(self, data, input_def):
33 | inputs = {}
34 | for i, k in enumerate(input_def):
35 | inputs[k] = data[i]
36 | return inputs
37 |
38 | def model_arch(self, ):
39 | pass
40 |
41 | def get_loss(self, ):
42 | raise NotImplementedError("Should implement get_loss method!")
43 |
44 | def get_pred(self, ):
45 | raise NotImplementedError("Should implement get_pred method!")
46 |
--------------------------------------------------------------------------------
/ppdet/modeling/backbones/__init__.py:
--------------------------------------------------------------------------------
1 | # copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | from . import vgg
16 | from . import resnet
17 | from . import darknet
18 | from . import mobilenet_v1
19 | from . import mobilenet_v3
20 | from . import hrnet
21 | from . import blazenet
22 | from . import ghostnet
23 | from . import senet
24 |
25 | from .vgg import *
26 | from .resnet import *
27 | from .darknet import *
28 | from .mobilenet_v1 import *
29 | from .mobilenet_v3 import *
30 | from .hrnet import *
31 | from .blazenet import *
32 | from .ghostnet import *
33 | from .senet import *
34 | from .swin_transformer import *
35 |
--------------------------------------------------------------------------------
/ppdet/modeling/heads/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | from . import bbox_head
16 | from . import mask_head
17 | from . import yolo_head
18 | from . import roi_extractor
19 | from . import ssd_head
20 | from . import fcos_head
21 | from . import solov2_head
22 | from . import ttf_head
23 | from . import cascade_head
24 | from . import face_head
25 | from . import s2anet_head
26 |
27 | from .bbox_head import *
28 | from .mask_head import *
29 | from .yolo_head import *
30 | from .roi_extractor import *
31 | from .ssd_head import *
32 | from .fcos_head import *
33 | from .solov2_head import *
34 | from .ttf_head import *
35 | from .cascade_head import *
36 | from .face_head import *
37 | from .s2anet_head import *
38 |
--------------------------------------------------------------------------------
/ppdet/modeling/losses/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | from . import yolo_loss
16 | from . import iou_aware_loss
17 | from . import iou_loss
18 | from . import ssd_loss
19 | from . import fcos_loss
20 | from . import solov2_loss
21 | from . import ctfocal_loss
22 |
23 | from .yolo_loss import *
24 | from .iou_aware_loss import *
25 | from .iou_loss import *
26 | from .ssd_loss import *
27 | from .fcos_loss import *
28 | from .solov2_loss import *
29 | from .ctfocal_loss import *
30 |
--------------------------------------------------------------------------------
/ppdet/modeling/necks/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | from . import fpn
16 | from . import yolo_fpn
17 | from . import hrfpn
18 | from . import ttf_fpn
19 |
20 | from .fpn import *
21 | from .yolo_fpn import *
22 | from .hrfpn import *
23 | from .ttf_fpn import *
24 |
--------------------------------------------------------------------------------
/ppdet/modeling/proposal_generator/__init__.py:
--------------------------------------------------------------------------------
1 | from . import rpn_head
2 | from .rpn_head import *
3 |
--------------------------------------------------------------------------------
/ppdet/modeling/shape_spec.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | from collections import namedtuple
16 |
17 |
18 | class ShapeSpec(
19 | namedtuple("_ShapeSpec", ["channels", "height", "width", "stride"])):
20 | """
21 | A simple structure that contains basic shape specification about a tensor.
22 | It is often used as the auxiliary inputs/outputs of models,
23 | to complement the lack of shape inference ability among paddle modules.
24 | Attributes:
25 | channels:
26 | height:
27 | width:
28 | stride:
29 | """
30 |
31 | def __new__(cls, channels=None, height=None, width=None, stride=None):
32 | return super(ShapeSpec, cls).__new__(cls, channels, height, width,
33 | stride)
34 |
--------------------------------------------------------------------------------
/ppdet/modeling/tests/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
--------------------------------------------------------------------------------
/ppdet/slim/quant.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | from __future__ import absolute_import
16 | from __future__ import division
17 | from __future__ import print_function
18 |
19 | import paddle
20 | from paddle.utils import try_import
21 |
22 | from ppdet.core.workspace import register, serializable
23 | from ppdet.utils.logger import setup_logger
24 | logger = setup_logger(__name__)
25 |
26 |
27 | @register
28 | @serializable
29 | class QAT(object):
30 | def __init__(self, quant_config, print_model):
31 | super(QAT, self).__init__()
32 | self.quant_config = quant_config
33 | self.print_model = print_model
34 |
35 | def __call__(self, model):
36 | paddleslim = try_import('paddleslim')
37 | self.quanter = paddleslim.dygraph.quant.QAT(config=self.quant_config)
38 | if self.print_model:
39 | logger.info("Model before quant:")
40 | logger.info(model)
41 |
42 | self.quanter.quantize(model)
43 |
44 | if self.print_model:
45 | logger.info("Quantized model:")
46 | logger.info(model)
47 |
48 | return model
49 |
50 | def save_quantized_model(self, layer, path, input_spec=None, **config):
51 | self.quanter.save_quantized_model(
52 | model=layer, path=path, input_spec=input_spec, **config)
53 |
--------------------------------------------------------------------------------
/ppdet/utils/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | tqdm
2 | typeguard ; python_version >= '3.4'
3 | visualdl>=2.1.0
4 | opencv-python
5 | PyYAML
6 | shapely
7 | scipy
8 | terminaltables
9 | pycocotools
10 | setuptools>=42.0.0
11 |
--------------------------------------------------------------------------------
/sort/deep_sort/__init__.py:
--------------------------------------------------------------------------------
1 | # vim: expandtab:ts=4:sw=4
2 |
--------------------------------------------------------------------------------
/sort/deep_sort/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dddlli/Swin-Transformer-Object-Detection-PaddlePaddle/67244dd49a523146b4bd33e0ed5b97072c727609/sort/deep_sort/__init__.pyc
--------------------------------------------------------------------------------
/sort/deep_sort/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dddlli/Swin-Transformer-Object-Detection-PaddlePaddle/67244dd49a523146b4bd33e0ed5b97072c727609/sort/deep_sort/__pycache__/__init__.cpython-37.pyc
--------------------------------------------------------------------------------
/sort/deep_sort/__pycache__/detection.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dddlli/Swin-Transformer-Object-Detection-PaddlePaddle/67244dd49a523146b4bd33e0ed5b97072c727609/sort/deep_sort/__pycache__/detection.cpython-37.pyc
--------------------------------------------------------------------------------
/sort/deep_sort/__pycache__/iou_matching.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dddlli/Swin-Transformer-Object-Detection-PaddlePaddle/67244dd49a523146b4bd33e0ed5b97072c727609/sort/deep_sort/__pycache__/iou_matching.cpython-37.pyc
--------------------------------------------------------------------------------
/sort/deep_sort/__pycache__/kalman_filter.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dddlli/Swin-Transformer-Object-Detection-PaddlePaddle/67244dd49a523146b4bd33e0ed5b97072c727609/sort/deep_sort/__pycache__/kalman_filter.cpython-37.pyc
--------------------------------------------------------------------------------
/sort/deep_sort/__pycache__/linear_assignment.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dddlli/Swin-Transformer-Object-Detection-PaddlePaddle/67244dd49a523146b4bd33e0ed5b97072c727609/sort/deep_sort/__pycache__/linear_assignment.cpython-37.pyc
--------------------------------------------------------------------------------
/sort/deep_sort/__pycache__/nn_matching.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dddlli/Swin-Transformer-Object-Detection-PaddlePaddle/67244dd49a523146b4bd33e0ed5b97072c727609/sort/deep_sort/__pycache__/nn_matching.cpython-37.pyc
--------------------------------------------------------------------------------
/sort/deep_sort/__pycache__/preprocessing.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dddlli/Swin-Transformer-Object-Detection-PaddlePaddle/67244dd49a523146b4bd33e0ed5b97072c727609/sort/deep_sort/__pycache__/preprocessing.cpython-37.pyc
--------------------------------------------------------------------------------
/sort/deep_sort/__pycache__/track.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dddlli/Swin-Transformer-Object-Detection-PaddlePaddle/67244dd49a523146b4bd33e0ed5b97072c727609/sort/deep_sort/__pycache__/track.cpython-37.pyc
--------------------------------------------------------------------------------
/sort/deep_sort/__pycache__/tracker.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dddlli/Swin-Transformer-Object-Detection-PaddlePaddle/67244dd49a523146b4bd33e0ed5b97072c727609/sort/deep_sort/__pycache__/tracker.cpython-37.pyc
--------------------------------------------------------------------------------
/sort/deep_sort/detection.py:
--------------------------------------------------------------------------------
1 | # vim: expandtab:ts=4:sw=4
2 | import numpy as np
3 |
4 |
5 | class Detection(object):
6 | """
7 | This class represents a bounding box detection in a single image.
8 |
9 | Parameters
10 | ----------
11 | tlwh : array_like
12 | Bounding box in format `(x, y, w, h)`.
13 | confidence : float
14 | Detector confidence score.
15 | feature : array_like
16 | A feature vector that describes the object contained in this image.
17 |
18 | Attributes
19 | ----------
20 | tlwh : ndarray
21 | Bounding box in format `(top left x, top left y, width, height)`.
22 | confidence : ndarray
23 | Detector confidence score.
24 | feature : ndarray | NoneType
25 | A feature vector that describes the object contained in this image.
26 |
27 | """
28 |
29 | def __init__(self, tlwh, confidence, feature):
30 | self.tlwh = np.asarray(tlwh, dtype=np.float)
31 | #self.tlwh = np.asarray(tlwh, dtype=float)
32 | self.confidence = float(confidence)
33 | self.feature = np.asarray(feature, dtype=np.float32)
34 | #self.feature = np.asarray(feature, dtype=float)
35 |
36 | def to_tlbr(self):
37 | """Convert bounding box to format `(min x, min y, max x, max y)`, i.e.,
38 | `(top left, bottom right)`.
39 | """
40 | ret = self.tlwh.copy()
41 | ret[2:] += ret[:2]
42 | return ret
43 |
44 | def to_xyah(self):
45 | """Convert bounding box to format `(center x, center y, aspect ratio,
46 | height)`, where the aspect ratio is `width / height`.
47 | """
48 | ret = self.tlwh.copy()
49 | ret[:2] += ret[2:] / 2
50 | ret[2] /= ret[3]
51 | return ret
52 | def get_all(self):
53 | return [self.tlwh, self.feature, self.confidence]
54 |
--------------------------------------------------------------------------------
/sort/deep_sort/detection.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dddlli/Swin-Transformer-Object-Detection-PaddlePaddle/67244dd49a523146b4bd33e0ed5b97072c727609/sort/deep_sort/detection.pyc
--------------------------------------------------------------------------------
/sort/deep_sort/iou_matching.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dddlli/Swin-Transformer-Object-Detection-PaddlePaddle/67244dd49a523146b4bd33e0ed5b97072c727609/sort/deep_sort/iou_matching.pyc
--------------------------------------------------------------------------------
/sort/deep_sort/kalman_filter.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dddlli/Swin-Transformer-Object-Detection-PaddlePaddle/67244dd49a523146b4bd33e0ed5b97072c727609/sort/deep_sort/kalman_filter.pyc
--------------------------------------------------------------------------------
/sort/deep_sort/linear_assignment.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dddlli/Swin-Transformer-Object-Detection-PaddlePaddle/67244dd49a523146b4bd33e0ed5b97072c727609/sort/deep_sort/linear_assignment.pyc
--------------------------------------------------------------------------------
/sort/deep_sort/nn_matching.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dddlli/Swin-Transformer-Object-Detection-PaddlePaddle/67244dd49a523146b4bd33e0ed5b97072c727609/sort/deep_sort/nn_matching.pyc
--------------------------------------------------------------------------------
/sort/deep_sort/preprocessing.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dddlli/Swin-Transformer-Object-Detection-PaddlePaddle/67244dd49a523146b4bd33e0ed5b97072c727609/sort/deep_sort/preprocessing.pyc
--------------------------------------------------------------------------------
/sort/deep_sort/track.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dddlli/Swin-Transformer-Object-Detection-PaddlePaddle/67244dd49a523146b4bd33e0ed5b97072c727609/sort/deep_sort/track.pyc
--------------------------------------------------------------------------------
/sort/deep_sort/tracker.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/dddlli/Swin-Transformer-Object-Detection-PaddlePaddle/67244dd49a523146b4bd33e0ed5b97072c727609/sort/deep_sort/tracker.pyc
--------------------------------------------------------------------------------
/sort/detector_new.py:
--------------------------------------------------------------------------------
1 | import paddlehub as hub
2 | import os
3 | import time
4 | from extractor_new import *
5 | from PIL import Image
6 |
7 | os.environ['CUDA_VISIBLE_DEVICES'] = '0'
8 |
9 | object_detector = hub.Module(name="yolov3_darknet53_pedestrian")
10 |
11 | def get_object_position_new(img_roi, img, min_confidence, net):
12 | list_ = []
13 | confidences = []
14 | img_crop = []
15 | result = object_detector.object_detection(images=[img], use_gpu=True, score_thresh=0.00, visualization=False)
16 | position = result[0]['data']
17 | for dict_position in position:
18 | if dict_position['label'] == 'pedestrian':
19 | x = (dict_position['left'])
20 | y = (dict_position['top'])
21 | x1 = (dict_position['right'])
22 | y1 = (dict_position['bottom'])
23 | w = x1 - x
24 | h = y1 - y
25 | if h/w <= 1:
26 | continue
27 | confidence = dict_position['confidence']
28 | roi = img_roi[int(y):int(y1), int(x):int(x1)]
29 | cv2.imwrite('deep_sort_paddle/roi_img/1.jpg', roi)
30 | img_crop.append(roi)
31 |
32 | if confidence >= min_confidence:
33 | p_p = (x, y, w, h) #(x, y, w, h)
34 | list_.append(p_p)
35 | confidences.append(confidence)
36 | time_ = time.time()
37 | extr = Extractor(net)
38 | feature = extr(img_crop)
39 | time_extr = time.time()
40 | print('extr_time:')
41 | print(time_extr-time_)
42 | return list_, confidences, feature
--------------------------------------------------------------------------------
/sort/extractor_new.py:
--------------------------------------------------------------------------------
1 | import paddle.vision.transforms as transforms
2 | import numpy as np
3 | import cv2
4 | import paddle
5 |
6 | class Extractor(object): #特征提取器的定义:
7 | def __init__(self, Net): #已经训练好的model_加载进来。
8 | self.net = Net
9 | #self.net.eval()
10 | self.size = (64, 128)
11 | self.norm = transforms.Compose([
12 | transforms.ToTensor(),
13 | transforms.Normalize([0.58666682, 0.58484647, 0.57418193], [0.20736474, 0.19249499, 0.1870952]),
14 | ])
15 |
16 | def _preprocess(self, im_crops): #私有的preprocess函数,完成对roi区域的resize
17 |
18 | def _resize(im, size): #私有的_resize
19 | return cv2.resize(im.astype(np.float32)/255., size)
20 |
21 | im_batch = paddle.concat([self.norm(_resize(im, self.size)).unsqueeze(0) for im in im_crops], axis=0) #.float()
22 | return im_batch
23 |
24 | def __call__(self, im_crops):
25 | im_batch = self._preprocess(im_crops)
26 | with paddle.no_grad():
27 | features = self.net(im_batch) #将roi区域送到特征提取器中去提取特征,从而获得特征。
28 | return features.cpu().numpy()
29 |
--------------------------------------------------------------------------------
/sort/generate_img_path.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | img_root = '/media/hansansui/han-cv/deep_sort_paddle/test'
4 | for dir in os.listdir(img_root):
5 | img_path = '{}/{}/img1'.format(img_root, dir)
6 | length = len(os.listdir(img_path))
7 | with open('{}/{}.txt'.format(img_root, dir), 'w') as f:
8 | for i in range(1, length+1):
9 | i = str('%06d'%i)
10 | f.write('{}/{}.jpg\n'.format(img_path, i))
11 |
12 |
13 |
--------------------------------------------------------------------------------