├── .github
    ├── FUNDING.yml
    └── ISSUE_TEMPLATE
    │   └── bug_report.md
├── .gitignore
├── LICENSE
├── README.md
├── README_cn.md
├── benchmark
    ├── README.md
    ├── dataset.py
    ├── trtexec.md
    ├── trtinfer.py
    ├── utils.py
    └── yolov8_onnx.py
├── hubconf.py
├── rtdetr_paddle
    ├── README.md
    ├── README_cn.md
    ├── configs
    │   ├── datasets
    │   │   ├── coco_detection.yml
    │   │   └── voc.yml
    │   ├── rtdetr
    │   │   ├── _base_
    │   │   │   ├── optimizer_6x.yml
    │   │   │   ├── rtdetr_r50vd.yml
    │   │   │   └── rtdetr_reader.yml
    │   │   ├── rtdetr_hgnetv2_l_6x_coco.yml
    │   │   ├── rtdetr_hgnetv2_x_6x_coco.yml
    │   │   ├── rtdetr_r101vd_6x_coco.yml
    │   │   ├── rtdetr_r18vd_6x_coco.yml
    │   │   ├── rtdetr_r34vd_6x_coco.yml
    │   │   ├── rtdetr_r50vd_6x_coco.yml
    │   │   └── rtdetr_r50vd_m_6x_coco.yml
    │   └── runtime.yml
    ├── dataset
    │   ├── coco
    │   │   └── download_coco.py
    │   └── voc
    │   │   ├── create_list.py
    │   │   ├── download_voc.py
    │   │   └── label_list.txt
    ├── ppdet
    │   ├── __init__.py
    │   ├── core
    │   │   ├── __init__.py
    │   │   ├── config
    │   │   │   ├── __init__.py
    │   │   │   ├── schema.py
    │   │   │   └── yaml_helpers.py
    │   │   └── workspace.py
    │   ├── data
    │   │   ├── __init__.py
    │   │   ├── reader.py
    │   │   ├── shm_utils.py
    │   │   ├── source
    │   │   │   ├── __init__.py
    │   │   │   ├── category.py
    │   │   │   ├── coco.py
    │   │   │   ├── dataset.py
    │   │   │   └── voc.py
    │   │   ├── transform
    │   │   │   ├── __init__.py
    │   │   │   ├── batch_operators.py
    │   │   │   ├── op_helper.py
    │   │   │   └── operators.py
    │   │   └── utils.py
    │   ├── engine
    │   │   ├── __init__.py
    │   │   ├── callbacks.py
    │   │   ├── env.py
    │   │   ├── export_utils.py
    │   │   └── trainer.py
    │   ├── metrics
    │   │   ├── __init__.py
    │   │   ├── coco_utils.py
    │   │   ├── json_results.py
    │   │   ├── keypoint_metrics.py
    │   │   ├── map_utils.py
    │   │   ├── mcmot_metrics.py
    │   │   ├── metrics.py
    │   │   ├── mot_metrics.py
    │   │   ├── munkres.py
    │   │   ├── pose3d_metrics.py
    │   │   └── widerface_utils.py
    │   ├── modeling
    │   │   ├── __init__.py
    │   │   ├── architectures
    │   │   │   ├── __init__.py
    │   │   │   ├── detr.py
    │   │   │   └── meta_arch.py
    │   │   ├── backbones
    │   │   │   ├── __init__.py
    │   │   │   ├── convnext.py
    │   │   │   ├── csp_darknet.py
    │   │   │   ├── cspresnet.py
    │   │   │   ├── darknet.py
    │   │   │   ├── focalnet.py
    │   │   │   ├── hgnet_v2.py
    │   │   │   ├── lcnet.py
    │   │   │   ├── mobilenet_v1.py
    │   │   │   ├── mobilenet_v3.py
    │   │   │   ├── mobileone.py
    │   │   │   ├── name_adapter.py
    │   │   │   ├── resnet.py
    │   │   │   ├── shufflenet_v2.py
    │   │   │   ├── swin_transformer.py
    │   │   │   ├── trans_encoder.py
    │   │   │   ├── transformer_utils.py
    │   │   │   ├── vision_transformer.py
    │   │   │   └── vit_mae.py
    │   │   ├── bbox_utils.py
    │   │   ├── cls_utils.py
    │   │   ├── heads
    │   │   │   ├── __init__.py
    │   │   │   └── detr_head.py
    │   │   ├── initializer.py
    │   │   ├── keypoint_utils.py
    │   │   ├── layers.py
    │   │   ├── losses
    │   │   │   ├── __init__.py
    │   │   │   ├── detr_loss.py
    │   │   │   ├── focal_loss.py
    │   │   │   ├── gfocal_loss.py
    │   │   │   ├── iou_loss.py
    │   │   │   ├── smooth_l1_loss.py
    │   │   │   └── varifocal_loss.py
    │   │   ├── ops.py
    │   │   ├── post_process.py
    │   │   ├── shape_spec.py
    │   │   └── transformers
    │   │   │   ├── __init__.py
    │   │   │   ├── deformable_transformer.py
    │   │   │   ├── detr_transformer.py
    │   │   │   ├── dino_transformer.py
    │   │   │   ├── ext_op
    │   │   │       ├── README.md
    │   │   │       ├── ms_deformable_attn_op.cc
    │   │   │       ├── ms_deformable_attn_op.cu
    │   │   │       ├── setup_ms_deformable_attn_op.py
    │   │   │       └── test_ms_deformable_attn_op.py
    │   │   │   ├── hybrid_encoder.py
    │   │   │   ├── matchers.py
    │   │   │   ├── position_encoding.py
    │   │   │   ├── rtdetr_transformer.py
    │   │   │   └── utils.py
    │   ├── optimizer
    │   │   ├── __init__.py
    │   │   ├── ema.py
    │   │   ├── optimizer.py
    │   │   └── utils.py
    │   ├── utils
    │   │   ├── __init__.py
    │   │   ├── cam_utils.py
    │   │   ├── check.py
    │   │   ├── checkpoint.py
    │   │   ├── cli.py
    │   │   ├── colormap.py
    │   │   ├── download.py
    │   │   ├── fuse_utils.py
    │   │   ├── logger.py
    │   │   ├── profiler.py
    │   │   ├── stats.py
    │   │   ├── visualizer.py
    │   │   └── voc_utils.py
    │   └── version.py
    ├── requirements.txt
    └── tools
    │   ├── eval.py
    │   ├── export_model.py
    │   ├── infer.py
    │   ├── slice_image.py
    │   ├── train.py
    │   └── x2coco.py
├── rtdetr_pytorch
    ├── README.md
    ├── configs
    │   ├── dataset
    │   │   └── coco_detection.yml
    │   ├── rtdetr
    │   │   ├── include
    │   │   │   ├── dataloader.yml
    │   │   │   ├── dataloader_regnet.yml
    │   │   │   ├── optimizer.yml
    │   │   │   ├── optimizer_regnet.yml
    │   │   │   ├── rtdetr_dla34.yml
    │   │   │   ├── rtdetr_r50vd.yml
    │   │   │   └── rtdetr_regnet.yml
    │   │   ├── rtdetr_dla34_6x_coco.yml
    │   │   ├── rtdetr_r101vd_6x_coco.yml
    │   │   ├── rtdetr_r18vd_6x_coco.yml
    │   │   ├── rtdetr_r34vd_6x_coco.yml
    │   │   ├── rtdetr_r50vd_6x_coco.yml
    │   │   ├── rtdetr_r50vd_m_6x_coco.yml
    │   │   └── rtdetr_regnet_6x_coco.yml
    │   └── runtime.yml
    ├── requirements.txt
    ├── src
    │   ├── __init__.py
    │   ├── core
    │   │   ├── __init__.py
    │   │   ├── config.py
    │   │   ├── yaml_config.py
    │   │   └── yaml_utils.py
    │   ├── data
    │   │   ├── __init__.py
    │   │   ├── cifar10
    │   │   │   └── __init__.py
    │   │   ├── coco
    │   │   │   ├── __init__.py
    │   │   │   ├── coco_dataset.py
    │   │   │   ├── coco_eval.py
    │   │   │   └── coco_utils.py
    │   │   ├── dataloader.py
    │   │   ├── functional.py
    │   │   └── transforms.py
    │   ├── misc
    │   │   ├── __init__.py
    │   │   ├── dist.py
    │   │   ├── logger.py
    │   │   └── visualizer.py
    │   ├── nn
    │   │   ├── __init__.py
    │   │   ├── arch
    │   │   │   ├── __init__.py
    │   │   │   └── classification.py
    │   │   ├── backbone
    │   │   │   ├── __init__.py
    │   │   │   ├── common.py
    │   │   │   ├── dla.py
    │   │   │   ├── presnet.py
    │   │   │   ├── regnet.py
    │   │   │   ├── test_resnet.py
    │   │   │   └── utils.py
    │   │   └── criterion
    │   │   │   ├── __init__.py
    │   │   │   └── utils.py
    │   ├── optim
    │   │   ├── __init__.py
    │   │   ├── amp.py
    │   │   ├── ema.py
    │   │   └── optim.py
    │   ├── solver
    │   │   ├── __init__.py
    │   │   ├── det_engine.py
    │   │   ├── det_solver.py
    │   │   └── solver.py
    │   └── zoo
    │   │   ├── __init__.py
    │   │   └── rtdetr
    │   │       ├── __init__.py
    │   │       ├── box_ops.py
    │   │       ├── denoising.py
    │   │       ├── hybrid_encoder.py
    │   │       ├── matcher.py
    │   │       ├── rtdetr.py
    │   │       ├── rtdetr_criterion.py
    │   │       ├── rtdetr_decoder.py
    │   │       ├── rtdetr_postprocessor.py
    │   │       └── utils.py
    └── tools
    │   ├── README.md
    │   ├── export_onnx.py
    │   ├── infer.py
    │   └── train.py
├── rtdetrv2_paddle
    └── readme.md
└── rtdetrv2_pytorch
    ├── Dockerfile
    ├── README.md
    ├── configs
        ├── dataset
        │   ├── coco_detection.yml
        │   └── voc_detection.yml
        ├── rtdetr
        │   ├── include
        │   │   ├── dataloader.yml
        │   │   ├── optimizer.yml
        │   │   └── rtdetr_r50vd.yml
        │   ├── readme.md
        │   ├── rtdetr_r101vd_6x_coco.yml
        │   ├── rtdetr_r18vd_6x_coco.yml
        │   ├── rtdetr_r34vd_6x_coco.yml
        │   ├── rtdetr_r50vd_6x_coco.yml
        │   └── rtdetr_r50vd_m_6x_coco.yml
        ├── rtdetrv2
        │   ├── include
        │   │   ├── dataloader.yml
        │   │   ├── optimizer.yml
        │   │   └── rtdetrv2_r50vd.yml
        │   ├── rtdetrv2_hgnetv2_h_6x_coco.yml
        │   ├── rtdetrv2_hgnetv2_l_6x_coco.yml
        │   ├── rtdetrv2_hgnetv2_x_6x_coco.yml
        │   ├── rtdetrv2_r101vd_6x_coco.yml
        │   ├── rtdetrv2_r18vd_120e_coco.yml
        │   ├── rtdetrv2_r18vd_120e_voc.yml
        │   ├── rtdetrv2_r18vd_dsp_3x_coco.yml
        │   ├── rtdetrv2_r18vd_sp1_120e_coco.yml
        │   ├── rtdetrv2_r18vd_sp2_120e_coco.yml
        │   ├── rtdetrv2_r18vd_sp3_120e_coco.yml
        │   ├── rtdetrv2_r34vd_120e_coco.yml
        │   ├── rtdetrv2_r34vd_dsp_1x_coco.yml
        │   ├── rtdetrv2_r50vd_6x_coco.yml
        │   ├── rtdetrv2_r50vd_dsp_1x_coco.yml
        │   ├── rtdetrv2_r50vd_m_7x_coco.yml
        │   └── rtdetrv2_r50vd_m_dsp_3x_coco.yml
        └── runtime.yml
    ├── dataset
        └── readme.md
    ├── docker-compose.yml
    ├── references
        └── deploy
        │   ├── readme.md
        │   ├── rtdetrv2_onnxruntime.py
        │   ├── rtdetrv2_openvino.py
        │   ├── rtdetrv2_tensorrt.py
        │   └── rtdetrv2_torch.py
    ├── requirements.txt
    ├── src
        ├── __init__.py
        ├── core
        │   ├── __init__.py
        │   ├── _config.py
        │   ├── workspace.py
        │   ├── yaml_config.py
        │   └── yaml_utils.py
        ├── data
        │   ├── __init__.py
        │   ├── _misc.py
        │   ├── dataloader.py
        │   ├── dataset
        │   │   ├── __init__.py
        │   │   ├── _dataset.py
        │   │   ├── cifar_dataset.py
        │   │   ├── coco_dataset.py
        │   │   ├── coco_eval.py
        │   │   ├── coco_utils.py
        │   │   ├── voc_detection.py
        │   │   └── voc_eval.py
        │   └── transforms
        │   │   ├── __init__.py
        │   │   ├── _transforms.py
        │   │   ├── container.py
        │   │   ├── functional.py
        │   │   ├── mosaic.py
        │   │   └── presets.py
        ├── misc
        │   ├── __init__.py
        │   ├── box_ops.py
        │   ├── dist_utils.py
        │   ├── lazy_loader.py
        │   ├── logger.py
        │   ├── profiler_utils.py
        │   └── visualizer.py
        ├── nn
        │   ├── __init__.py
        │   ├── arch
        │   │   ├── __init__.py
        │   │   ├── classification.py
        │   │   └── yolo.py
        │   ├── backbone
        │   │   ├── __init__.py
        │   │   ├── common.py
        │   │   ├── csp_darknet.py
        │   │   ├── csp_resnet.py
        │   │   ├── hgnetv2.py
        │   │   ├── presnet.py
        │   │   ├── test_resnet.py
        │   │   ├── timm_model.py
        │   │   ├── torchvision_model.py
        │   │   └── utils.py
        │   ├── criterion
        │   │   ├── __init__.py
        │   │   └── det_criterion.py
        │   └── postprocessor
        │   │   ├── __init__.py
        │   │   ├── box_revert.py
        │   │   ├── detr_postprocessor.py
        │   │   └── nms_postprocessor.py
        ├── optim
        │   ├── __init__.py
        │   ├── amp.py
        │   ├── ema.py
        │   ├── optim.py
        │   └── warmup.py
        ├── solver
        │   ├── __init__.py
        │   ├── _solver.py
        │   ├── clas_engine.py
        │   ├── clas_solver.py
        │   ├── det_engine.py
        │   └── det_solver.py
        └── zoo
        │   ├── __init__.py
        │   └── rtdetr
        │       ├── __init__.py
        │       ├── box_ops.py
        │       ├── conver_params.py
        │       ├── denoising.py
        │       ├── hybrid_encoder.py
        │       ├── matcher.py
        │       ├── rtdetr.py
        │       ├── rtdetr_criterion.py
        │       ├── rtdetr_decoder.py
        │       ├── rtdetr_postprocessor.py
        │       ├── rtdetrv2_criterion.py
        │       ├── rtdetrv2_decoder.py
        │       └── utils.py
    └── tools
        ├── README.md
        ├── export_onnx.py
        ├── export_trt.py
        ├── run_profile.py
        └── train.py


/.github/FUNDING.yml:
--------------------------------------------------------------------------------
 1 | # These are supported funding model platforms
 2 | 
 3 | github: # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2]
 4 | patreon: # Replace with a single Patreon username
 5 | open_collective: # Replace with a single Open Collective username
 6 | ko_fi: # Replace with a single Ko-fi username
 7 | tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel
 8 | community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry
 9 | liberapay: # Replace with a single Liberapay username
10 | issuehunt: # Replace with a single IssueHunt username
11 | otechie: # Replace with a single Otechie username
12 | lfx_crowdfunding: # Replace with a single LFX Crowdfunding project-name e.g., cloud-foundry
13 | custom: ['https://github.com/lyuwenyu/cvperception/assets/17582080/2b4bfcd5-5c0f-45fd-badf-3f6e5b0249ac']# Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2']
14 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Bug report
 3 | about: Create a report to help us improve
 4 | title: ''
 5 | labels: ''
 6 | assignees: lyuwenyu
 7 | 
 8 | ---
 9 | 
10 | **Star RTDETR**
11 | 请先在RTDETR主页点击**star**以支持本项目
12 | Star RTDETR to help more people discover this project. 
13 | 
14 | ---
15 | 
16 | **Describe the bug**
17 | A clear and concise description of what the bug is. 
18 | If applicable, add screenshots to help explain your problem. 
19 | 
20 | **To Reproduce**
21 | Steps to reproduce the behavior.
22 | 


--------------------------------------------------------------------------------
/benchmark/README.md:
--------------------------------------------------------------------------------
 1 | # 论文测速使用的部分代码和工具
 2 | 
 3 | 
 4 | ## 测试YOLO系列的速度 [in progress]
 5 | 以[yolov8](https://github.com/ultralytics/ultralytics)为例
 6 | 
 7 | <details open>
 8 | <summary>1. 转onnx </summary>  
 9 | 
10 | 执行`yolov8_onnx.py`中的`export_onnx`函数，新增代码主要涉及输出格式的转换
11 | </details>
12 | 
13 | 
14 | <details>
15 | <summary>2. 插入nms </summary>
16 | 
17 | 使用`utils.py`中的`yolo_insert_nms`函数，导出onnx模型后使用[Netron](https://netron.app/)查看结构. <img width="924" alt="image" src="https://github.com/lyuwenyu/RT-DETR/assets/17582080/cb466483-d3a3-4f23-a68d-7ab8825059c8">
18 | </details>
19 | 
20 | 
21 | <details>
22 | <summary>3. 转tensorrt </summary>
23 | 
24 | 可以使用`trtexec.md`中的的脚本转换，或者使用`utils.py`中的Python代码转换
25 | ```bash
26 | # trtexec -h
27 | trtexec --onnx=./yolov8l_w_nms.onnx --saveEngine=yolov8l_w_nms.engine --buildOnly --fp16
28 | ```
29 | </details>
30 | 
31 | 
32 | <details>
33 | <summary>4. trtexec测速 </summary>
34 | 
35 | 可以使用`trtexec.md`中的的脚本转换，去掉`--buildOnly`参数
36 | 
37 | </details>
38 | 
39 | 
40 | 
41 | <details>
42 | <summary>5. profile分析（可选） </summary>
43 | 
44 | 在4的基础之上加以下命令
45 | ```bash
46 | nsys profile --force-overwrite=true  -t 'nvtx,cuda,osrt,cudnn' -c cudaProfilerApi -o yolov8l_w_nms 
47 | ```
48 | 可以使用nsys可视化分析
49 | <img width="1090" alt="image" src="https://github.com/lyuwenyu/RT-DETR/assets/17582080/507d8bde-9e7c-4ae5-b571-976c540ef2c6">
50 | 
51 | </details>
52 | 
53 | 
54 | <details>
55 | <summary>6. Python测速或者部署   </summary>
56 | 
57 | 在Coco val数据集上测模型的平均速度使用`trtinfer.py`中的代码推理
58 | 
59 | </details>
60 | 


--------------------------------------------------------------------------------
/benchmark/trtexec.md:
--------------------------------------------------------------------------------
 1 | 
 2 | ```bash
 3 | # build tensorrt engine 
 4 | trtexec --onnx=./yolov8l_w_nms.onnx --saveEngine=yolov8l_w_nms.engine --buildOnly --fp16
 5 | 
 6 | # using dynamic shapes
 7 | # --explicitBatch --minShapes=image:1x3x640x640 --optShapes=image:8x3x640x640  --maxShapes=image:16x3x640x640 --shapes=image:8x3x640x640
 8 | 
 9 | # timeline 
10 | nsys profile --force-overwrite=true  -t 'nvtx,cuda,osrt,cudnn' -c cudaProfilerApi -o yolov8l_w_nms  trtexec --loadEngine=./yolov8l_w_nms.engine --fp16 --avgRuns=10 --loadInputs='image:input_tensor.bin'
11 | 
12 | # https://forums.developer.nvidia.com/t/about-loadinputs-in-trtexec/218880
13 | ```
14 | 


--------------------------------------------------------------------------------
/benchmark/utils.py:
--------------------------------------------------------------------------------
 1 | '''by lyuwenyu
 2 | '''
 3 | 
 4 | import time 
 5 | import contextlib
 6 | import numpy as np
 7 | from PIL import Image
 8 | from collections import OrderedDict
 9 | 
10 | import onnx
11 | import torch 
12 | import onnx_graphsurgeon
13 | 
14 | 
15 | def to_binary_data(path, size=(640, 640), output_name='input_tensor.bin'):
16 |     '''--loadInputs='image:input_tensor.bin'
17 |     '''
18 |     im = Image.open(path).resize(size)
19 |     data = np.asarray(im, dtype=np.float32).transpose(2, 0, 1)[None] / 255.
20 |     data.tofile(output_name)
21 | 
22 | 
23 | def yolo_insert_nms(path, score_threshold=0.01, iou_threshold=0.7, max_output_boxes=300, simplify=False):
24 |     '''
25 |     http://www.xavierdupre.fr/app/onnxcustom/helpsphinx/api/onnxops/onnx__EfficientNMS_TRT.html
26 |     https://huggingface.co/spaces/muttalib1326/Punjabi_Character_Detection/blob/3dd1e17054c64e5f6b2254278f96cfa2bf418cd4/utils/add_nms.py
27 |     '''
28 |     onnx_model = onnx.load(path)
29 | 
30 |     if simplify:
31 |         from onnxsim import simplify
32 |         onnx_model, _ = simplify(onnx_model,  overwrite_input_shapes={'image': [1, 3, 640, 640]})
33 | 
34 |     graph = onnx_graphsurgeon.import_onnx(onnx_model)
35 |     graph.toposort()
36 |     graph.fold_constants()
37 |     graph.cleanup()
38 | 
39 |     topk = max_output_boxes
40 |     attrs = OrderedDict(plugin_version='1',
41 |                         background_class=-1,
42 |                         max_output_boxes=topk,
43 |                         score_threshold=score_threshold,
44 |                         iou_threshold=iou_threshold,
45 |                         score_activation=False,
46 |                         box_coding=0, )
47 | 
48 |     outputs = [onnx_graphsurgeon.Variable('num_dets', np.int32, [-1, 1]),
49 |                onnx_graphsurgeon.Variable('det_boxes', np.float32, [-1, topk, 4]),
50 |                onnx_graphsurgeon.Variable('det_scores', np.float32, [-1, topk]),
51 |                onnx_graphsurgeon.Variable('det_classes', np.int32, [-1, topk])]
52 | 
53 |     graph.layer(op='EfficientNMS_TRT', 
54 |                 name="batched_nms", 
55 |                 inputs=[graph.outputs[0], 
56 |                         graph.outputs[1]], 
57 |                 outputs=outputs, 
58 |                 attrs=attrs, )
59 | 
60 |     graph.outputs = outputs
61 |     graph.cleanup().toposort()
62 | 
63 |     onnx.save(onnx_graphsurgeon.export_onnx(graph), f'yolo_w_nms.onnx')
64 | 
65 | 
66 | class TimeProfiler(contextlib.ContextDecorator):
67 |     def __init__(self, ):
68 |         self.total = 0
69 |         
70 |     def __enter__(self, ):
71 |         self.start = self.time()
72 |         return self 
73 |     
74 |     def __exit__(self, type, value, traceback):
75 |         self.total += self.time() - self.start
76 |     
77 |     def reset(self, ):
78 |         self.total = 0
79 |     
80 |     def time(self, ):
81 |         if torch.cuda.is_available():
82 |             torch.cuda.synchronize()
83 |         return time.time()
84 | 


--------------------------------------------------------------------------------
/benchmark/yolov8_onnx.py:
--------------------------------------------------------------------------------
 1 | '''by lyuwenyu
 2 | '''
 3 | 
 4 | import torch 
 5 | import torchvision
 6 | 
 7 | import numpy as np 
 8 | import onnxruntime as ort 
 9 | 
10 | from utils import yolo_insert_nms
11 | 
12 | class YOLOv8(torch.nn.Module):
13 |     def __init__(self, name) -> None:
14 |         super().__init__()
15 |         from ultralytics import YOLO
16 |         # Load a model
17 |         # build a new model from scratch
18 |         # model = YOLO(f'{name}.yaml')  
19 | 
20 |         # load a pretrained model (recommended for training)
21 |         model = YOLO(f'{name}.pt')  
22 |         self.model = model.model
23 | 
24 |     def forward(self, x):
25 |         '''https://github.com/ultralytics/ultralytics/blob/main/ultralytics/nn/tasks.py#L216
26 |         '''
27 |         pred: torch.Tensor = self.model(x)[0] # n 84 8400,
28 |         pred = pred.permute(0, 2, 1)
29 |         boxes, scores = pred.split([4, 80], dim=-1)
30 |         boxes = torchvision.ops.box_convert(boxes, in_fmt='cxcywh', out_fmt='xyxy')
31 | 
32 |         return boxes, scores
33 | 
34 | 
35 | 
36 | def export_onnx(name='yolov8n'):
37 |     '''export onnx
38 |     '''
39 |     m = YOLOv8(name)
40 | 
41 |     x = torch.rand(1, 3, 640, 640)
42 |     dynamic_axes = {
43 |         'image': {0: '-1'}
44 |     }
45 |     torch.onnx.export(m, x, f'{name}.onnx', 
46 |                       input_names=['image'], 
47 |                       output_names=['boxes', 'scores'], 
48 |                       opset_version=13, 
49 |                       dynamic_axes=dynamic_axes)
50 | 
51 |     data = np.random.rand(1, 3, 640, 640).astype(np.float32)
52 |     sess = ort.InferenceSession(f'{name}.onnx')
53 |     _ = sess.run(output_names=None, input_feed={'image': data})
54 | 
55 | 
56 | if __name__ == '__main__':
57 | 
58 |     import argparse
59 |     parser = argparse.ArgumentParser()
60 |     parser.add_argument('--name', type=str, default='yolov8l')
61 |     parser.add_argument('--score_threshold', type=float, default=0.001)
62 |     parser.add_argument('--iou_threshold', type=float, default=0.7)
63 |     parser.add_argument('--max_output_boxes', type=int, default=300)
64 |     args = parser.parse_args()
65 | 
66 |     export_onnx(name=args.name)
67 |     
68 |     yolo_insert_nms(path=f'{args.name}.onnx', 
69 |                     score_threshold=args.score_threshold, 
70 |                     iou_threshold=args.iou_threshold, 
71 |                     max_output_boxes=args.max_output_boxes, )
72 | 
73 | 


--------------------------------------------------------------------------------
/rtdetr_paddle/configs/datasets/coco_detection.yml:
--------------------------------------------------------------------------------
 1 | metric: COCO
 2 | num_classes: 80
 3 | 
 4 | TrainDataset:
 5 |   name: COCODataSet
 6 |   image_dir: train2017
 7 |   anno_path: annotations/instances_train2017.json
 8 |   dataset_dir: dataset/coco
 9 |   data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd']
10 | 
11 | EvalDataset:
12 |   name: COCODataSet
13 |   image_dir: val2017
14 |   anno_path: annotations/instances_val2017.json
15 |   dataset_dir: dataset/coco
16 |   allow_empty: true
17 | 
18 | TestDataset:
19 |   name: ImageFolder
20 |   anno_path: annotations/instances_val2017.json # also support txt (like VOC's label_list.txt)
21 |   dataset_dir: dataset/coco # if set, anno_path will be 'dataset_dir/anno_path'
22 | 


--------------------------------------------------------------------------------
/rtdetr_paddle/configs/datasets/voc.yml:
--------------------------------------------------------------------------------
 1 | metric: VOC
 2 | map_type: 11point
 3 | num_classes: 20
 4 | 
 5 | TrainDataset:
 6 |   name: VOCDataSet
 7 |   dataset_dir: dataset/voc
 8 |   anno_path: trainval.txt
 9 |   label_list: label_list.txt
10 |   data_fields: ['image', 'gt_bbox', 'gt_class', 'difficult']
11 | 
12 | EvalDataset:
13 |   name: VOCDataSet
14 |   dataset_dir: dataset/voc
15 |   anno_path: test.txt
16 |   label_list: label_list.txt
17 |   data_fields: ['image', 'gt_bbox', 'gt_class', 'difficult']
18 | 
19 | TestDataset:
20 |   name: ImageFolder
21 |   anno_path: dataset/voc/label_list.txt
22 | 


--------------------------------------------------------------------------------
/rtdetr_paddle/configs/rtdetr/_base_/optimizer_6x.yml:
--------------------------------------------------------------------------------
 1 | epoch: 72
 2 | 
 3 | LearningRate:
 4 |   base_lr: 0.0001
 5 |   schedulers:
 6 |   - !PiecewiseDecay
 7 |     gamma: 1.0
 8 |     milestones: [100]
 9 |     use_warmup: true
10 |   - !LinearWarmup
11 |     start_factor: 0.001
12 |     steps: 2000
13 | 
14 | OptimizerBuilder:
15 |   clip_grad_by_norm: 0.1
16 |   regularizer: false
17 |   optimizer:
18 |     type: AdamW
19 |     weight_decay: 0.0001
20 | 


--------------------------------------------------------------------------------
/rtdetr_paddle/configs/rtdetr/_base_/rtdetr_r50vd.yml:
--------------------------------------------------------------------------------
 1 | architecture: DETR
 2 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_ssld_v2_pretrained.pdparams
 3 | norm_type: sync_bn
 4 | use_ema: True
 5 | ema_decay: 0.9999
 6 | ema_decay_type: "exponential"
 7 | ema_filter_no_grad: True
 8 | hidden_dim: 256
 9 | use_focal_loss: True
10 | eval_size: [640, 640] # h, w
11 | 
12 | 
13 | DETR:
14 |   backbone: ResNet
15 |   neck: HybridEncoder
16 |   transformer: RTDETRTransformer
17 |   detr_head: DINOHead
18 |   post_process: DETRPostProcess
19 | 
20 | ResNet:
21 |   # index 0 stands for res2
22 |   depth: 50
23 |   variant: d
24 |   norm_type: bn
25 |   freeze_at: 0
26 |   return_idx: [1, 2, 3]
27 |   lr_mult_list: [0.1, 0.1, 0.1, 0.1]
28 |   num_stages: 4
29 |   freeze_stem_only: True
30 | 
31 | HybridEncoder:
32 |   hidden_dim: 256
33 |   use_encoder_idx: [2]
34 |   num_encoder_layers: 1
35 |   encoder_layer:
36 |     name: TransformerLayer
37 |     d_model: 256
38 |     nhead: 8
39 |     dim_feedforward: 1024
40 |     dropout: 0.
41 |     activation: 'gelu'
42 |   expansion: 1.0
43 | 
44 | 
45 | RTDETRTransformer:
46 |   num_queries: 300
47 |   position_embed_type: sine
48 |   feat_strides: [8, 16, 32]
49 |   num_levels: 3
50 |   nhead: 8
51 |   num_decoder_layers: 6
52 |   dim_feedforward: 1024
53 |   dropout: 0.0
54 |   activation: relu
55 |   num_denoising: 100
56 |   label_noise_ratio: 0.5
57 |   box_noise_scale: 1.0
58 |   learnt_init_query: False
59 | 
60 | DINOHead:
61 |   loss:
62 |     name: DINOLoss
63 |     loss_coeff: {class: 1, bbox: 5, giou: 2}
64 |     aux_loss: True
65 |     use_vfl: True
66 |     matcher:
67 |       name: HungarianMatcher
68 |       matcher_coeff: {class: 2, bbox: 5, giou: 2}
69 | 
70 | DETRPostProcess:
71 |   num_top_queries: 300
72 | 


--------------------------------------------------------------------------------
/rtdetr_paddle/configs/rtdetr/_base_/rtdetr_reader.yml:
--------------------------------------------------------------------------------
 1 | worker_num: 4
 2 | TrainReader:
 3 |   sample_transforms:
 4 |     - Decode: {}
 5 |     - RandomDistort: {prob: 0.8}
 6 |     - RandomExpand: {fill_value: [123.675, 116.28, 103.53]}
 7 |     - RandomCrop: {prob: 0.8}
 8 |     - RandomFlip: {}
 9 |   batch_transforms:
10 |     - BatchRandomResize: {target_size: [480, 512, 544, 576, 608, 640, 640, 640, 672, 704, 736, 768, 800], random_size: True, random_interp: True, keep_ratio: False}
11 |     - NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none}
12 |     - NormalizeBox: {}
13 |     - BboxXYXY2XYWH: {}
14 |     - Permute: {}
15 |   batch_size: 4
16 |   shuffle: true
17 |   drop_last: true
18 |   collate_batch: false
19 |   use_shared_memory: false
20 | 
21 | 
22 | EvalReader:
23 |   sample_transforms:
24 |     - Decode: {}
25 |     - Resize: {target_size: [640, 640], keep_ratio: False, interp: 2} # target_size: (h, w)
26 |     - NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none}
27 |     - Permute: {}
28 |   batch_size: 4
29 |   shuffle: false
30 |   drop_last: false
31 | 
32 | 
33 | TestReader:
34 |   inputs_def:
35 |     image_shape: [3, 640, 640]
36 |   sample_transforms:
37 |     - Decode: {}
38 |     - Resize: {target_size: [640, 640], keep_ratio: False, interp: 2}
39 |     - NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none}
40 |     - Permute: {}
41 |   batch_size: 1
42 |   shuffle: false
43 |   drop_last: false
44 | 


--------------------------------------------------------------------------------
/rtdetr_paddle/configs/rtdetr/rtdetr_hgnetv2_l_6x_coco.yml:
--------------------------------------------------------------------------------
 1 | _BASE_: [
 2 |   '../datasets/coco_detection.yml',
 3 |   '../runtime.yml',
 4 |   '_base_/optimizer_6x.yml',
 5 |   '_base_/rtdetr_r50vd.yml',
 6 |   '_base_/rtdetr_reader.yml',
 7 | ]
 8 | 
 9 | weights: output/rtdetr_hgnetv2_l_6x_coco/model_final
10 | pretrain_weights: https://bj.bcebos.com/v1/paddledet/models/pretrained/PPHGNetV2_L_ssld_pretrained.pdparams
11 | find_unused_parameters: True
12 | log_iter: 200
13 | 
14 | 
15 | DETR:
16 |   backbone: PPHGNetV2
17 | 
18 | PPHGNetV2:
19 |   arch: 'L'
20 |   return_idx: [1, 2, 3]
21 |   freeze_stem_only: True
22 |   freeze_at: 0
23 |   freeze_norm: True
24 |   lr_mult_list: [0., 0.05, 0.05, 0.05, 0.05]
25 | 


--------------------------------------------------------------------------------
/rtdetr_paddle/configs/rtdetr/rtdetr_hgnetv2_x_6x_coco.yml:
--------------------------------------------------------------------------------
 1 | _BASE_: [
 2 |   '../datasets/coco_detection.yml',
 3 |   '../runtime.yml',
 4 |   '_base_/optimizer_6x.yml',
 5 |   '_base_/rtdetr_r50vd.yml',
 6 |   '_base_/rtdetr_reader.yml',
 7 | ]
 8 | 
 9 | weights: output/rtdetr_hgnetv2_l_6x_coco/model_final
10 | pretrain_weights: https://bj.bcebos.com/v1/paddledet/models/pretrained/PPHGNetV2_X_ssld_pretrained.pdparams
11 | find_unused_parameters: True
12 | log_iter: 200
13 | 
14 | 
15 | 
16 | DETR:
17 |   backbone: PPHGNetV2
18 | 
19 | 
20 | PPHGNetV2:
21 |   arch: 'X'
22 |   return_idx: [1, 2, 3]
23 |   freeze_stem_only: True
24 |   freeze_at: 0
25 |   freeze_norm: True
26 |   lr_mult_list: [0., 0.01, 0.01, 0.01, 0.01]
27 | 
28 | 
29 | HybridEncoder:
30 |   hidden_dim: 384
31 |   use_encoder_idx: [2]
32 |   num_encoder_layers: 1
33 |   encoder_layer:
34 |     name: TransformerLayer
35 |     d_model: 384
36 |     nhead: 8
37 |     dim_feedforward: 2048
38 |     dropout: 0.
39 |     activation: 'gelu'
40 |   expansion: 1.0
41 | 


--------------------------------------------------------------------------------
/rtdetr_paddle/configs/rtdetr/rtdetr_r101vd_6x_coco.yml:
--------------------------------------------------------------------------------
 1 | _BASE_: [
 2 |   '../datasets/coco_detection.yml',
 3 |   '../runtime.yml',
 4 |   '_base_/optimizer_6x.yml',
 5 |   '_base_/rtdetr_r50vd.yml',
 6 |   '_base_/rtdetr_reader.yml',
 7 | ]
 8 | 
 9 | weights: output/rtdetr_r101vd_6x_coco/model_final
10 | find_unused_parameters: True
11 | log_iter: 200
12 | 
13 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet101_vd_ssld_pretrained.pdparams
14 | 
15 | ResNet:
16 |   # index 0 stands for res2
17 |   depth: 101
18 |   variant: d
19 |   norm_type: bn
20 |   freeze_at: 0
21 |   return_idx: [1, 2, 3]
22 |   lr_mult_list: [0.01, 0.01, 0.01, 0.01]
23 |   num_stages: 4
24 |   freeze_stem_only: True
25 | 
26 | HybridEncoder:
27 |   hidden_dim: 384
28 |   use_encoder_idx: [2]
29 |   num_encoder_layers: 1
30 |   encoder_layer:
31 |     name: TransformerLayer
32 |     d_model: 384
33 |     nhead: 8
34 |     dim_feedforward: 2048
35 |     dropout: 0.
36 |     activation: 'gelu'
37 |   expansion: 1.0
38 | 


--------------------------------------------------------------------------------
/rtdetr_paddle/configs/rtdetr/rtdetr_r18vd_6x_coco.yml:
--------------------------------------------------------------------------------
 1 | _BASE_: [
 2 |   '../datasets/coco_detection.yml',
 3 |   '../runtime.yml',
 4 |   '_base_/optimizer_6x.yml',
 5 |   '_base_/rtdetr_r50vd.yml',
 6 |   '_base_/rtdetr_reader.yml',
 7 | ]
 8 | 
 9 | weights: output/rtdetr_r18_6x_coco/model_final
10 | find_unused_parameters: True
11 | log_iter: 200
12 | 
13 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet18_vd_pretrained.pdparams
14 | ResNet:
15 |   depth: 18
16 |   variant: d
17 |   return_idx: [1, 2, 3]
18 |   freeze_at: -1
19 |   freeze_norm: false
20 |   norm_decay: 0.
21 | 
22 | HybridEncoder:
23 |   hidden_dim: 256
24 |   use_encoder_idx: [2]
25 |   num_encoder_layers: 1
26 |   encoder_layer:
27 |     name: TransformerLayer
28 |     d_model: 256
29 |     nhead: 8
30 |     dim_feedforward: 1024
31 |     dropout: 0.
32 |     activation: 'gelu'
33 |   expansion: 0.5
34 |   depth_mult: 1.0
35 | 
36 | RTDETRTransformer:
37 |   eval_idx: -1
38 |   num_decoder_layers: 3
39 | 


--------------------------------------------------------------------------------
/rtdetr_paddle/configs/rtdetr/rtdetr_r34vd_6x_coco.yml:
--------------------------------------------------------------------------------
 1 | _BASE_: [
 2 |   '../datasets/coco_detection.yml',
 3 |   '../runtime.yml',
 4 |   '_base_/optimizer_6x.yml',
 5 |   '_base_/rtdetr_r50vd.yml',
 6 |   '_base_/rtdetr_reader.yml',
 7 | ]
 8 | 
 9 | weights: output/rtdetr_r34vd_6x_coco/model_final
10 | find_unused_parameters: True
11 | log_iter: 200
12 | 
13 | pretrain_weights: https://bj.bcebos.com/v1/paddledet/models/pretrained/ResNet34_vd_pretrained.pdparams
14 | ResNet:
15 |   depth: 34
16 |   variant: d
17 |   return_idx: [1, 2, 3]
18 |   freeze_at: -1
19 |   freeze_norm: false
20 |   norm_decay: 0.
21 | 
22 | HybridEncoder:
23 |   hidden_dim: 256
24 |   use_encoder_idx: [2]
25 |   num_encoder_layers: 1
26 |   encoder_layer:
27 |     name: TransformerLayer
28 |     d_model: 256
29 |     nhead: 8
30 |     dim_feedforward: 1024
31 |     dropout: 0.
32 |     activation: 'gelu'
33 |   expansion: 0.5
34 |   depth_mult: 1.0
35 | 
36 | RTDETRTransformer:
37 |   eval_idx: -1
38 |   num_decoder_layers: 4
39 | 


--------------------------------------------------------------------------------
/rtdetr_paddle/configs/rtdetr/rtdetr_r50vd_6x_coco.yml:
--------------------------------------------------------------------------------
 1 | _BASE_: [
 2 |   '../datasets/coco_detection.yml',
 3 |   '../runtime.yml',
 4 |   '_base_/optimizer_6x.yml',
 5 |   '_base_/rtdetr_r50vd.yml',
 6 |   '_base_/rtdetr_reader.yml',
 7 | ]
 8 | 
 9 | weights: output/rtdetr_r50vd_6x_coco/model_final
10 | find_unused_parameters: True
11 | log_iter: 200
12 | 


--------------------------------------------------------------------------------
/rtdetr_paddle/configs/rtdetr/rtdetr_r50vd_m_6x_coco.yml:
--------------------------------------------------------------------------------
 1 | _BASE_: [
 2 |   '../datasets/coco_detection.yml',
 3 |   '../runtime.yml',
 4 |   '_base_/optimizer_6x.yml',
 5 |   '_base_/rtdetr_r50vd.yml',
 6 |   '_base_/rtdetr_reader.yml',
 7 | ]
 8 | 
 9 | weights: output/rtdetr_r50vd_m_6x_coco/model_final
10 | find_unused_parameters: True
11 | log_iter: 200
12 | 
13 | HybridEncoder:
14 |   hidden_dim: 256
15 |   use_encoder_idx: [2]
16 |   num_encoder_layers: 1
17 |   encoder_layer:
18 |     name: TransformerLayer
19 |     d_model: 256
20 |     nhead: 8
21 |     dim_feedforward: 1024
22 |     dropout: 0.
23 |     activation: 'gelu'
24 |   expansion: 0.5
25 |   depth_mult: 1.0
26 | 
27 | RTDETRTransformer:
28 |   eval_idx: 2 # use 3th decoder layer to eval
29 | 


--------------------------------------------------------------------------------
/rtdetr_paddle/configs/runtime.yml:
--------------------------------------------------------------------------------
 1 | use_gpu: true
 2 | use_xpu: false
 3 | use_mlu: false
 4 | use_npu: false
 5 | log_iter: 20
 6 | save_dir: output
 7 | snapshot_epoch: 1
 8 | print_flops: false
 9 | print_params: false
10 | 
11 | # Exporting the model
12 | export:
13 |   post_process: True  # Whether post-processing is included in the network when export model.
14 |   nms: True           # Whether NMS is included in the network when export model.
15 |   benchmark: False    # It is used to testing model performance, if set `True`, post-process and NMS will not be exported.
16 |   fuse_conv_bn: False
17 | 


--------------------------------------------------------------------------------
/rtdetr_paddle/dataset/coco/download_coco.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import sys
16 | import os.path as osp
17 | import logging
18 | # add python path of PaddleDetection to sys.path
19 | parent_path = osp.abspath(osp.join(__file__, *(['..'] * 3)))
20 | if parent_path not in sys.path:
21 |     sys.path.append(parent_path)
22 | 
23 | from ppdet.utils.download import download_dataset
24 | 
25 | logging.basicConfig(level=logging.INFO)
26 | 
27 | download_path = osp.split(osp.realpath(sys.argv[0]))[0]
28 | download_dataset(download_path, 'coco')
29 | 


--------------------------------------------------------------------------------
/rtdetr_paddle/dataset/voc/create_list.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import sys
16 | import os.path as osp
17 | import logging
18 | # add python path of PaddleDetection to sys.path
19 | parent_path = osp.abspath(osp.join(__file__, *(['..'] * 3)))
20 | if parent_path not in sys.path:
21 |     sys.path.append(parent_path)
22 | 
23 | from ppdet.utils.download import create_voc_list
24 | 
25 | logging.basicConfig(level=logging.INFO)
26 | 
27 | voc_path = osp.split(osp.realpath(sys.argv[0]))[0]
28 | create_voc_list(voc_path)
29 | 


--------------------------------------------------------------------------------
/rtdetr_paddle/dataset/voc/download_voc.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import sys
16 | import os.path as osp
17 | import logging
18 | # add python path of PaddleDetection to sys.path
19 | parent_path = osp.abspath(osp.join(__file__, *(['..'] * 3)))
20 | if parent_path not in sys.path:
21 |     sys.path.append(parent_path)
22 | 
23 | from ppdet.utils.download import download_dataset
24 | 
25 | logging.basicConfig(level=logging.INFO)
26 | 
27 | download_path = osp.split(osp.realpath(sys.argv[0]))[0]
28 | download_dataset(download_path, 'voc')
29 | 


--------------------------------------------------------------------------------
/rtdetr_paddle/dataset/voc/label_list.txt:
--------------------------------------------------------------------------------
 1 | aeroplane
 2 | bicycle
 3 | bird
 4 | boat
 5 | bottle
 6 | bus
 7 | car
 8 | cat
 9 | chair
10 | cow
11 | diningtable
12 | dog
13 | horse
14 | motorbike
15 | person
16 | pottedplant
17 | sheep
18 | sofa
19 | train
20 | tvmonitor
21 | 


--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from . import (core, data, engine, modeling, optimizer, metrics, utils)
16 | 
17 | 
18 | try:
19 |     from .version import full_version as __version__
20 |     from .version import commit as __git_commit__
21 | except ImportError:
22 |     import sys
23 |     sys.stderr.write("Warning: import ppdet from source directory " \
24 |             "without installing, run 'python setup.py install' to " \
25 |             "install ppdet firstly\n")
26 | 


--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/core/__init__.py:
--------------------------------------------------------------------------------
 1 | #   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from . import config
16 | 


--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/core/config/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/data/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. 
 2 | #   
 3 | # Licensed under the Apache License, Version 2.0 (the "License");   
 4 | # you may not use this file except in compliance with the License.  
 5 | # You may obtain a copy of the License at   
 6 | #   
 7 | #     http://www.apache.org/licenses/LICENSE-2.0    
 8 | #   
 9 | # Unless required by applicable law or agreed to in writing, software   
10 | # distributed under the License is distributed on an "AS IS" BASIS, 
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
12 | # See the License for the specific language governing permissions and   
13 | # limitations under the License.
14 | 
15 | from . import source
16 | from . import transform
17 | from . import reader
18 | 
19 | from .source import *
20 | from .transform import *
21 | from .reader import *
22 | 


--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/data/shm_utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import os
16 | 
17 | SIZE_UNIT = ['K', 'M', 'G', 'T']
18 | SHM_QUERY_CMD = 'df -h'
19 | SHM_KEY = 'shm'
20 | SHM_DEFAULT_MOUNT = '/dev/shm'
21 | 
22 | # [ shared memory size check ]
23 | # In detection models, image/target data occupies a lot of memory, and
24 | # will occupy lots of shared memory in multi-process DataLoader, we use
25 | # following code to get shared memory size and perform a size check to
26 | # disable shared memory use if shared memory size is not enough.
27 | # Shared memory getting process as follows:
28 | # 1. use `df -h` get all mount info
29 | # 2. pick up spaces whose mount info contains 'shm'
30 | # 3. if 'shm' space number is only 1, return its size
31 | # 4. if there are multiple 'shm' space, try to find the default mount
32 | #    directory '/dev/shm' is Linux-like system, otherwise return the
33 | #    biggest space size.
34 | 
35 | 
36 | def _parse_size_in_M(size_str):
37 |     if size_str[-1] == 'B':
38 |         num, unit = size_str[:-2], size_str[-2]
39 |     else:
40 |         num, unit = size_str[:-1], size_str[-1]
41 |     assert unit in SIZE_UNIT, \
42 |             "unknown shm size unit {}".format(unit)
43 |     return float(num) * \
44 |             (1024 ** (SIZE_UNIT.index(unit) - 1))
45 | 
46 | 
47 | def _get_shared_memory_size_in_M():
48 |     try:
49 |         df_infos = os.popen(SHM_QUERY_CMD).readlines()
50 |     except:
51 |         return None
52 |     else:
53 |         shm_infos = []
54 |         for df_info in df_infos:
55 |             info = df_info.strip()
56 |             if info.find(SHM_KEY) >= 0:
57 |                 shm_infos.append(info.split())
58 | 
59 |         if len(shm_infos) == 0:
60 |             return None
61 |         elif len(shm_infos) == 1:
62 |             return _parse_size_in_M(shm_infos[0][3])
63 |         else:
64 |             default_mount_infos = [
65 |                 si for si in shm_infos if si[-1] == SHM_DEFAULT_MOUNT
66 |             ]
67 |             if default_mount_infos:
68 |                 return _parse_size_in_M(default_mount_infos[0][3])
69 |             else:
70 |                 return max([_parse_size_in_M(si[3]) for si in shm_infos])
71 | 


--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/data/source/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .coco import *
16 | from .voc import *
17 | from .category import *
18 | from .dataset import ImageFolder
19 | 


--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/data/transform/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from . import operators
16 | from . import batch_operators
17 | 
18 | 
19 | from .operators import *
20 | from .batch_operators import *
21 | 
22 | 
23 | __all__ = []
24 | __all__ += registered_ops
25 | 
26 | 


--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/data/utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import numbers
16 | import numpy as np
17 | 
18 | try:
19 |     from collections.abc import Sequence, Mapping
20 | except:
21 |     from collections import Sequence, Mapping
22 | 
23 | 
24 | def default_collate_fn(batch):
25 |     """
26 |     Default batch collating function for :code:`paddle.io.DataLoader`,
27 |     get input data as a list of sample datas, each element in list
28 |     if the data of a sample, and sample data should composed of list,
29 |     dictionary, string, number, numpy array, this
30 |     function will parse input data recursively and stack number,
31 |     numpy array and paddle.Tensor datas as batch datas. e.g. for
32 |     following input data:
33 |     [{'image': np.array(shape=[3, 224, 224]), 'label': 1},
34 |      {'image': np.array(shape=[3, 224, 224]), 'label': 3},
35 |      {'image': np.array(shape=[3, 224, 224]), 'label': 4},
36 |      {'image': np.array(shape=[3, 224, 224]), 'label': 5},]
37 |     
38 |     
39 |     This default collate function zipped each number and numpy array
40 |     field together and stack each field as the batch field as follows:
41 |     {'image': np.array(shape=[4, 3, 224, 224]), 'label': np.array([1, 3, 4, 5])}
42 |     Args:  
43 |         batch(list of sample data): batch should be a list of sample data.
44 |     
45 |     Returns:
46 |         Batched data: batched each number, numpy array and paddle.Tensor
47 |                       in input data.
48 |     """
49 |     sample = batch[0]
50 |     if isinstance(sample, np.ndarray):
51 |         batch = np.stack(batch, axis=0)
52 |         return batch
53 |     elif isinstance(sample, numbers.Number):
54 |         batch = np.array(batch)
55 |         return batch
56 |     elif isinstance(sample, (str, bytes)):
57 |         return batch
58 |     elif isinstance(sample, Mapping):
59 |         return {
60 |             key: default_collate_fn([d[key] for d in batch])
61 |             for key in sample
62 |         }
63 |     elif isinstance(sample, Sequence):
64 |         sample_fields_num = len(sample)
65 |         if not all(len(sample) == sample_fields_num for sample in iter(batch)):
66 |             raise RuntimeError(
67 |                 "fileds number not same among samples in a batch")
68 |         return [default_collate_fn(fields) for fields in zip(*batch)]
69 | 
70 |     raise TypeError("batch data con only contains: tensor, numpy.ndarray, "
71 |                     "dict, list, number, but got {}".format(type(sample)))
72 | 


--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/engine/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 
 2 | #   
 3 | # Licensed under the Apache License, Version 2.0 (the "License");   
 4 | # you may not use this file except in compliance with the License.  
 5 | # You may obtain a copy of the License at   
 6 | #   
 7 | #     http://www.apache.org/licenses/LICENSE-2.0    
 8 | #   
 9 | # Unless required by applicable law or agreed to in writing, software   
10 | # distributed under the License is distributed on an "AS IS" BASIS, 
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
12 | # See the License for the specific language governing permissions and   
13 | # limitations under the License.
14 | 
15 | from . import trainer
16 | from .trainer import *
17 | 
18 | from . import callbacks
19 | from .callbacks import *
20 | 
21 | from . import env
22 | from .env import *
23 | 
24 | __all__ = trainer.__all__ \
25 |         + callbacks.__all__ \
26 |         + env.__all__
27 | 


--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/engine/env.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 
 2 | #   
 3 | # Licensed under the Apache License, Version 2.0 (the "License");   
 4 | # you may not use this file except in compliance with the License.  
 5 | # You may obtain a copy of the License at   
 6 | #   
 7 | #     http://www.apache.org/licenses/LICENSE-2.0    
 8 | #   
 9 | # Unless required by applicable law or agreed to in writing, software   
10 | # distributed under the License is distributed on an "AS IS" BASIS, 
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
12 | # See the License for the specific language governing permissions and   
13 | # limitations under the License.
14 | 
15 | from __future__ import absolute_import
16 | from __future__ import division
17 | from __future__ import print_function
18 | 
19 | import os
20 | import random
21 | import numpy as np
22 | 
23 | import paddle
24 | from paddle.distributed import fleet
25 | 
26 | __all__ = ['init_parallel_env', 'set_random_seed', 'init_fleet_env']
27 | 
28 | 
29 | def init_fleet_env(find_unused_parameters=False):
30 |     strategy = fleet.DistributedStrategy()
31 |     strategy.find_unused_parameters = find_unused_parameters
32 |     fleet.init(is_collective=True, strategy=strategy)
33 | 
34 | 
35 | def init_parallel_env():
36 |     env = os.environ
37 |     dist = 'PADDLE_TRAINER_ID' in env and 'PADDLE_TRAINERS_NUM' in env
38 |     if dist:
39 |         trainer_id = int(env['PADDLE_TRAINER_ID'])
40 |         local_seed = (99 + trainer_id)
41 |         random.seed(local_seed)
42 |         np.random.seed(local_seed)
43 | 
44 |     paddle.distributed.init_parallel_env()
45 | 
46 | 
47 | def set_random_seed(seed):
48 |     paddle.seed(seed)
49 |     random.seed(seed)
50 |     np.random.seed(seed)
51 | 


--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/metrics/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from . import metrics
16 | 
17 | from .metrics import *
18 | from .pose3d_metrics import *
19 | 
20 | from . import mot_metrics
21 | from .mot_metrics import *
22 | __all__ = metrics.__all__ + mot_metrics.__all__
23 | 
24 | from . import mcmot_metrics
25 | from .mcmot_metrics import *
26 | __all__ = metrics.__all__ + mcmot_metrics.__all__


--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/modeling/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. 
 2 | #   
 3 | # Licensed under the Apache License, Version 2.0 (the "License");   
 4 | # you may not use this file except in compliance with the License.  
 5 | # You may obtain a copy of the License at   
 6 | #   
 7 | #     http://www.apache.org/licenses/LICENSE-2.0    
 8 | #   
 9 | # Unless required by applicable law or agreed to in writing, software   
10 | # distributed under the License is distributed on an "AS IS" BASIS, 
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
12 | # See the License for the specific language governing permissions and   
13 | # limitations under the License.
14 | 
15 | import warnings
16 | warnings.filterwarnings(
17 |     action='ignore', category=DeprecationWarning, module='ops')
18 | 
19 | 
20 | from .ops import *
21 | from .backbones import *
22 | from .heads import *
23 | from .losses import *
24 | from .architectures import *
25 | from .post_process import *
26 | from .layers import *
27 | from .transformers import *
28 | 


--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/modeling/architectures/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 
 2 | #   
 3 | # Licensed under the Apache License, Version 2.0 (the "License");   
 4 | # you may not use this file except in compliance with the License.  
 5 | # You may obtain a copy of the License at   
 6 | #   
 7 | #     http://www.apache.org/licenses/LICENSE-2.0    
 8 | #   
 9 | # Unless required by applicable law or agreed to in writing, software   
10 | # distributed under the License is distributed on an "AS IS" BASIS, 
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
12 | # See the License for the specific language governing permissions and   
13 | # limitations under the License.
14 | 
15 | from .meta_arch import *
16 | from .detr import *
17 | 


--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/modeling/backbones/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 
 2 | #   
 3 | # Licensed under the Apache License, Version 2.0 (the "License");   
 4 | # you may not use this file except in compliance with the License.  
 5 | # You may obtain a copy of the License at   
 6 | #   
 7 | #     http://www.apache.org/licenses/LICENSE-2.0    
 8 | # 
 9 | # Unless required by applicable law or agreed to in writing, software   
10 | # distributed under the License is distributed on an "AS IS" BASIS, 
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
12 | # See the License for the specific language governing permissions and   
13 | # limitations under the License.
14 | 
15 | from .resnet import *
16 | from .darknet import *
17 | from .mobilenet_v1 import *
18 | from .mobilenet_v3 import *
19 | from .shufflenet_v2 import *
20 | from .swin_transformer import *
21 | from .lcnet import *
22 | from .cspresnet import *
23 | from .csp_darknet import *
24 | from .convnext import *
25 | from .vision_transformer import *
26 | from .mobileone import *
27 | from .trans_encoder import *
28 | from .focalnet import *
29 | from .vit_mae import *
30 | from .hgnet_v2 import *
31 | 


--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/modeling/backbones/name_adapter.py:
--------------------------------------------------------------------------------
 1 | class NameAdapter(object):
 2 |     """Fix the backbones variable names for pretrained weight"""
 3 | 
 4 |     def __init__(self, model):
 5 |         super(NameAdapter, self).__init__()
 6 |         self.model = model
 7 | 
 8 |     @property
 9 |     def model_type(self):
10 |         return getattr(self.model, '_model_type', '')
11 | 
12 |     @property
13 |     def variant(self):
14 |         return getattr(self.model, 'variant', '')
15 | 
16 |     def fix_conv_norm_name(self, name):
17 |         if name == "conv1":
18 |             bn_name = "bn_" + name
19 |         else:
20 |             bn_name = "bn" + name[3:]
21 |         # the naming rule is same as pretrained weight
22 |         if self.model_type == 'SEResNeXt':
23 |             bn_name = name + "_bn"
24 |         return bn_name
25 | 
26 |     def fix_shortcut_name(self, name):
27 |         if self.model_type == 'SEResNeXt':
28 |             name = 'conv' + name + '_prj'
29 |         return name
30 | 
31 |     def fix_bottleneck_name(self, name):
32 |         if self.model_type == 'SEResNeXt':
33 |             conv_name1 = 'conv' + name + '_x1'
34 |             conv_name2 = 'conv' + name + '_x2'
35 |             conv_name3 = 'conv' + name + '_x3'
36 |             shortcut_name = name
37 |         else:
38 |             conv_name1 = name + "_branch2a"
39 |             conv_name2 = name + "_branch2b"
40 |             conv_name3 = name + "_branch2c"
41 |             shortcut_name = name + "_branch1"
42 |         return conv_name1, conv_name2, conv_name3, shortcut_name
43 | 
44 |     def fix_basicblock_name(self, name):
45 |         if self.model_type == 'SEResNeXt':
46 |             conv_name1 = 'conv' + name + '_x1'
47 |             conv_name2 = 'conv' + name + '_x2'
48 |             shortcut_name = name
49 |         else:
50 |             conv_name1 = name + "_branch2a"
51 |             conv_name2 = name + "_branch2b"
52 |             shortcut_name = name + "_branch1"
53 |         return conv_name1, conv_name2, shortcut_name
54 | 
55 |     def fix_layer_warp_name(self, stage_num, count, i):
56 |         name = 'res' + str(stage_num)
57 |         if count > 10 and stage_num == 4:
58 |             if i == 0:
59 |                 conv_name = name + "a"
60 |             else:
61 |                 conv_name = name + "b" + str(i)
62 |         else:
63 |             conv_name = name + chr(ord("a") + i)
64 |         if self.model_type == 'SEResNeXt':
65 |             conv_name = str(stage_num + 2) + '_' + str(i + 1)
66 |         return conv_name
67 | 
68 |     def fix_c1_stage_name(self):
69 |         return "res_conv1" if self.model_type == 'ResNeXt' else "conv1"
70 | 


--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/modeling/cls_utils.py:
--------------------------------------------------------------------------------
 1 | #   Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 
16 | def _get_class_default_kwargs(cls, *args, **kwargs):
17 |     """
18 |     Get default arguments of a class in dict format, if args and
19 |     kwargs is specified, it will replace default arguments
20 |     """
21 |     varnames = cls.__init__.__code__.co_varnames
22 |     argcount = cls.__init__.__code__.co_argcount
23 |     keys = varnames[:argcount]
24 |     assert keys[0] == 'self'
25 |     keys = keys[1:]
26 | 
27 |     values = list(cls.__init__.__defaults__)
28 |     assert len(values) == len(keys)
29 | 
30 |     if len(args) > 0:
31 |         for i, arg in enumerate(args):
32 |             values[i] = arg
33 | 
34 |     default_kwargs = dict(zip(keys, values))
35 | 
36 |     if len(kwargs) > 0:
37 |         for k, v in kwargs.items():
38 |             default_kwargs[k] = v
39 | 
40 |     return default_kwargs
41 | 


--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/modeling/heads/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .detr_head import *
16 | 
17 | 


--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/modeling/losses/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .iou_loss import *
16 | from .gfocal_loss import *
17 | from .detr_loss import *
18 | from .focal_loss import *
19 | from .smooth_l1_loss import *
20 | 


--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/modeling/losses/smooth_l1_loss.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from __future__ import absolute_import
16 | from __future__ import division
17 | from __future__ import print_function
18 | 
19 | import paddle
20 | import paddle.nn as nn
21 | import paddle.nn.functional as F
22 | from ppdet.core.workspace import register
23 | 
24 | __all__ = ['SmoothL1Loss']
25 | 
26 | @register
27 | class SmoothL1Loss(nn.Layer):
28 |     """Smooth L1 Loss.
29 |     Args:
30 |         beta (float): controls smooth region, it becomes L1 Loss when beta=0.0
31 |         loss_weight (float): the final loss will be multiplied by this 
32 |     """
33 |     def __init__(self,
34 |                  beta=1.0,
35 |                  loss_weight=1.0):
36 |         super(SmoothL1Loss, self).__init__()
37 |         assert beta >= 0
38 |         self.beta = beta
39 |         self.loss_weight = loss_weight
40 | 
41 |     def forward(self, pred, target, reduction='none'):
42 |         """forward function, based on fvcore.
43 |         Args:
44 |             pred (Tensor): prediction tensor
45 |             target (Tensor): target tensor, pred.shape must be the same as target.shape
46 |             reduction (str): the way to reduce loss, one of (none, sum, mean)
47 |         """
48 |         assert reduction in ('none', 'sum', 'mean')
49 |         target = target.detach()
50 |         if self.beta < 1e-5:
51 |             loss = paddle.abs(pred - target)
52 |         else:
53 |             n = paddle.abs(pred - target)
54 |             cond = n < self.beta
55 |             loss = paddle.where(cond, 0.5 * n ** 2 / self.beta, n - 0.5 * self.beta)
56 |         if reduction == 'mean':
57 |             loss = loss.mean() if loss.size > 0 else 0.0 * loss.sum()
58 |         elif reduction == 'sum':
59 |             loss = loss.sum()
60 |         return loss * self.loss_weight
61 | 


--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/modeling/shape_spec.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 
 2 | #   
 3 | # Licensed under the Apache License, Version 2.0 (the "License");   
 4 | # you may not use this file except in compliance with the License.  
 5 | # You may obtain a copy of the License at   
 6 | #   
 7 | #     http://www.apache.org/licenses/LICENSE-2.0    
 8 | #   
 9 | # Unless required by applicable law or agreed to in writing, software   
10 | # distributed under the License is distributed on an "AS IS" BASIS, 
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
12 | # See the License for the specific language governing permissions and   
13 | # limitations under the License.
14 | 
15 | # The code is based on:
16 | # https://github.com/facebookresearch/detectron2/blob/main/detectron2/layers/shape_spec.py
17 | 
18 | from collections import namedtuple
19 | 
20 | 
21 | class ShapeSpec(
22 |         namedtuple("_ShapeSpec", ["channels", "height", "width", "stride"])):
23 |     def __new__(cls, channels=None, height=None, width=None, stride=None):
24 |         return super(ShapeSpec, cls).__new__(cls, channels, height, width,
25 |                                              stride)
26 | 


--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/modeling/transformers/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .utils import *
16 | from .matchers import *
17 | from .position_encoding import *
18 | from .rtdetr_transformer import *
19 | from .dino_transformer import *
20 | from .hybrid_encoder import *


--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/modeling/transformers/ext_op/setup_ms_deformable_attn_op.py:
--------------------------------------------------------------------------------
1 | from paddle.utils.cpp_extension import CUDAExtension, setup
2 | 
3 | if __name__ == "__main__":
4 |     setup(
5 |         name='deformable_detr_ops',
6 |         ext_modules=CUDAExtension(
7 |             sources=['ms_deformable_attn_op.cc', 'ms_deformable_attn_op.cu']))
8 | 


--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/optimizer/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from . import optimizer
16 | from . import ema
17 | 
18 | from .optimizer import *
19 | from .ema import *
20 | 


--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/optimizer/utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from __future__ import absolute_import
16 | from __future__ import division
17 | from __future__ import print_function
18 | 
19 | import paddle.nn as nn
20 | 
21 | from typing import List
22 | 
23 | 
24 | def get_bn_running_state_names(model: nn.Layer) -> List[str]:
25 |     """Get all bn state full names including running mean and variance
26 |     """
27 |     names = []
28 |     for n, m in model.named_sublayers():
29 |         if isinstance(m, (nn.BatchNorm2D, nn.SyncBatchNorm)):
30 |             assert hasattr(m, '_mean'), f'assert {m} has _mean'
31 |             assert hasattr(m, '_variance'), f'assert {m} has _variance'
32 |             running_mean = f'{n}._mean'
33 |             running_var = f'{n}._variance'
34 |             names.extend([running_mean, running_var])
35 | 
36 |     return names
37 | 


--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/utils/colormap.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from __future__ import absolute_import
16 | from __future__ import division
17 | from __future__ import print_function
18 | from __future__ import unicode_literals
19 | 
20 | import numpy as np
21 | 
22 | 
23 | def colormap(rgb=False):
24 |     """
25 |     Get colormap
26 | 
27 |     The code of this function is copied from https://github.com/facebookresearch/Detectron/blob/main/detectron/utils/colormap.py
28 |     """
29 |     color_list = np.array([
30 |         0.000, 0.447, 0.741, 0.850, 0.325, 0.098, 0.929, 0.694, 0.125, 0.494,
31 |         0.184, 0.556, 0.466, 0.674, 0.188, 0.301, 0.745, 0.933, 0.635, 0.078,
32 |         0.184, 0.300, 0.300, 0.300, 0.600, 0.600, 0.600, 1.000, 0.000, 0.000,
33 |         1.000, 0.500, 0.000, 0.749, 0.749, 0.000, 0.000, 1.000, 0.000, 0.000,
34 |         0.000, 1.000, 0.667, 0.000, 1.000, 0.333, 0.333, 0.000, 0.333, 0.667,
35 |         0.000, 0.333, 1.000, 0.000, 0.667, 0.333, 0.000, 0.667, 0.667, 0.000,
36 |         0.667, 1.000, 0.000, 1.000, 0.333, 0.000, 1.000, 0.667, 0.000, 1.000,
37 |         1.000, 0.000, 0.000, 0.333, 0.500, 0.000, 0.667, 0.500, 0.000, 1.000,
38 |         0.500, 0.333, 0.000, 0.500, 0.333, 0.333, 0.500, 0.333, 0.667, 0.500,
39 |         0.333, 1.000, 0.500, 0.667, 0.000, 0.500, 0.667, 0.333, 0.500, 0.667,
40 |         0.667, 0.500, 0.667, 1.000, 0.500, 1.000, 0.000, 0.500, 1.000, 0.333,
41 |         0.500, 1.000, 0.667, 0.500, 1.000, 1.000, 0.500, 0.000, 0.333, 1.000,
42 |         0.000, 0.667, 1.000, 0.000, 1.000, 1.000, 0.333, 0.000, 1.000, 0.333,
43 |         0.333, 1.000, 0.333, 0.667, 1.000, 0.333, 1.000, 1.000, 0.667, 0.000,
44 |         1.000, 0.667, 0.333, 1.000, 0.667, 0.667, 1.000, 0.667, 1.000, 1.000,
45 |         1.000, 0.000, 1.000, 1.000, 0.333, 1.000, 1.000, 0.667, 1.000, 0.167,
46 |         0.000, 0.000, 0.333, 0.000, 0.000, 0.500, 0.000, 0.000, 0.667, 0.000,
47 |         0.000, 0.833, 0.000, 0.000, 1.000, 0.000, 0.000, 0.000, 0.167, 0.000,
48 |         0.000, 0.333, 0.000, 0.000, 0.500, 0.000, 0.000, 0.667, 0.000, 0.000,
49 |         0.833, 0.000, 0.000, 1.000, 0.000, 0.000, 0.000, 0.167, 0.000, 0.000,
50 |         0.333, 0.000, 0.000, 0.500, 0.000, 0.000, 0.667, 0.000, 0.000, 0.833,
51 |         0.000, 0.000, 1.000, 0.000, 0.000, 0.000, 0.143, 0.143, 0.143, 0.286,
52 |         0.286, 0.286, 0.429, 0.429, 0.429, 0.571, 0.571, 0.571, 0.714, 0.714,
53 |         0.714, 0.857, 0.857, 0.857, 1.000, 1.000, 1.000
54 |     ]).astype(np.float32)
55 |     color_list = color_list.reshape((-1, 3)) * 255
56 |     if not rgb:
57 |         color_list = color_list[:, ::-1]
58 |     return color_list.astype('int32')
59 | 


--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/utils/logger.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import logging
16 | import os
17 | import sys
18 | 
19 | import paddle.distributed as dist
20 | 
21 | __all__ = ['setup_logger']
22 | 
23 | logger_initialized = []
24 | 
25 | 
26 | def setup_logger(name="ppdet", output=None):
27 |     """
28 |     Initialize logger and set its verbosity level to INFO.
29 |     Args:
30 |         output (str): a file name or a directory to save log. If None, will not save log file.
31 |             If ends with ".txt" or ".log", assumed to be a file name.
32 |             Otherwise, logs will be saved to `output/log.txt`.
33 |         name (str): the root module name of this logger
34 | 
35 |     Returns:
36 |         logging.Logger: a logger
37 |     """
38 |     logger = logging.getLogger(name)
39 |     if name in logger_initialized:
40 |         return logger
41 | 
42 |     logger.setLevel(logging.INFO)
43 |     logger.propagate = False
44 | 
45 |     formatter = logging.Formatter(
46 |         "[%(asctime)s] %(name)s %(levelname)s: %(message)s",
47 |         datefmt="%m/%d %H:%M:%S")
48 |     # stdout logging: master only
49 |     local_rank = dist.get_rank()
50 |     if local_rank == 0:
51 |         ch = logging.StreamHandler(stream=sys.stdout)
52 |         ch.setLevel(logging.DEBUG)
53 |         ch.setFormatter(formatter)
54 |         logger.addHandler(ch)
55 | 
56 |     # file logging: all workers
57 |     if output is not None:
58 |         if output.endswith(".txt") or output.endswith(".log"):
59 |             filename = output
60 |         else:
61 |             filename = os.path.join(output, "log.txt")
62 |         if local_rank > 0:
63 |             filename = filename + ".rank{}".format(local_rank)
64 |         os.makedirs(os.path.dirname(filename))
65 |         fh = logging.FileHandler(filename, mode='a')
66 |         fh.setLevel(logging.DEBUG)
67 |         fh.setFormatter(logging.Formatter())
68 |         logger.addHandler(fh)
69 |     logger_initialized.append(name)
70 |     return logger
71 | 


--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/utils/stats.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import collections
16 | import numpy as np
17 | 
18 | __all__ = ['SmoothedValue', 'TrainingStats']
19 | 
20 | 
21 | class SmoothedValue(object):
22 |     """Track a series of values and provide access to smoothed values over a
23 |     window or the global series average.
24 |     """
25 | 
26 |     def __init__(self, window_size=20, fmt=None):
27 |         if fmt is None:
28 |             fmt = "{median:.4f} ({avg:.4f})"
29 |         self.deque = collections.deque(maxlen=window_size)
30 |         self.fmt = fmt
31 |         self.total = 0.
32 |         self.count = 0
33 | 
34 |     def update(self, value, n=1):
35 |         self.deque.append(value)
36 |         self.count += n
37 |         self.total += value * n
38 | 
39 |     @property
40 |     def median(self):
41 |         return np.median(self.deque)
42 | 
43 |     @property
44 |     def avg(self):
45 |         return np.mean(self.deque)
46 | 
47 |     @property
48 |     def max(self):
49 |         return np.max(self.deque)
50 | 
51 |     @property
52 |     def value(self):
53 |         return self.deque[-1]
54 | 
55 |     @property
56 |     def global_avg(self):
57 |         return self.total / self.count
58 | 
59 |     def __str__(self):
60 |         return self.fmt.format(
61 |             median=self.median, avg=self.avg, max=self.max, value=self.value)
62 | 
63 | 
64 | class TrainingStats(object):
65 |     def __init__(self, window_size, delimiter=' '):
66 |         self.meters = None
67 |         self.window_size = window_size
68 |         self.delimiter = delimiter
69 | 
70 |     def update(self, stats):
71 |         if self.meters is None:
72 |             self.meters = {
73 |                 k: SmoothedValue(self.window_size)
74 |                 for k in stats.keys()
75 |             }
76 |         for k, v in self.meters.items():
77 |             v.update(float(stats[k]))
78 |             
79 |     def get(self, extras=None):
80 |         stats = collections.OrderedDict()
81 |         if extras:
82 |             for k, v in extras.items():
83 |                 stats[k] = v
84 |         for k, v in self.meters.items():
85 |             stats[k] = format(v.median, '.6f')
86 | 
87 |         return stats
88 | 
89 |     def log(self, extras=None):
90 |         d = self.get(extras)
91 |         strs = []
92 |         for k, v in d.items():
93 |             strs.append("{}: {}".format(k, str(v)))
94 |         return self.delimiter.join(strs)
95 | 


--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/utils/voc_utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from __future__ import absolute_import
16 | from __future__ import division
17 | from __future__ import print_function
18 | 
19 | import os
20 | import os.path as osp
21 | import re
22 | import random
23 | 
24 | __all__ = ['create_list']
25 | 
26 | 
27 | def create_list(devkit_dir, years, output_dir):
28 |     """
29 |     create following list:
30 |         1. trainval.txt
31 |         2. test.txt
32 |     """
33 |     trainval_list = []
34 |     test_list = []
35 |     for year in years:
36 |         trainval, test = _walk_voc_dir(devkit_dir, year, output_dir)
37 |         trainval_list.extend(trainval)
38 |         test_list.extend(test)
39 | 
40 |     random.shuffle(trainval_list)
41 |     with open(osp.join(output_dir, 'trainval.txt'), 'w') as ftrainval:
42 |         for item in trainval_list:
43 |             ftrainval.write(item[0] + ' ' + item[1] + '\n')
44 | 
45 |     with open(osp.join(output_dir, 'test.txt'), 'w') as fval:
46 |         ct = 0
47 |         for item in test_list:
48 |             ct += 1
49 |             fval.write(item[0] + ' ' + item[1] + '\n')
50 | 
51 | 
52 | def _get_voc_dir(devkit_dir, year, type):
53 |     return osp.join(devkit_dir, 'VOC' + year, type)
54 | 
55 | 
56 | def _walk_voc_dir(devkit_dir, year, output_dir):
57 |     filelist_dir = _get_voc_dir(devkit_dir, year, 'ImageSets/Main')
58 |     annotation_dir = _get_voc_dir(devkit_dir, year, 'Annotations')
59 |     img_dir = _get_voc_dir(devkit_dir, year, 'JPEGImages')
60 |     trainval_list = []
61 |     test_list = []
62 |     added = set()
63 | 
64 |     for _, _, files in os.walk(filelist_dir):
65 |         for fname in files:
66 |             img_ann_list = []
67 |             if re.match(r'[a-z]+_trainval\.txt', fname):
68 |                 img_ann_list = trainval_list
69 |             elif re.match(r'[a-z]+_test\.txt', fname):
70 |                 img_ann_list = test_list
71 |             else:
72 |                 continue
73 |             fpath = osp.join(filelist_dir, fname)
74 |             for line in open(fpath):
75 |                 name_prefix = line.strip().split()[0]
76 |                 if name_prefix in added:
77 |                     continue
78 |                 added.add(name_prefix)
79 |                 ann_path = osp.join(
80 |                     osp.relpath(annotation_dir, output_dir),
81 |                     name_prefix + '.xml')
82 |                 img_path = osp.join(
83 |                     osp.relpath(img_dir, output_dir), name_prefix + '.jpg')
84 |                 img_ann_list.append((img_path, ann_path))
85 | 
86 |     return trainval_list, test_list
87 | 


--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/version.py:
--------------------------------------------------------------------------------
1 | # THIS FILE IS GENERATED FROM PADDLEPADDLE SETUP.PY
2 | #
3 | full_version    = '2.4.0'
4 | commit          = '87ed5ba91eaeb332e8e5c3f4e7d5b1d765c75644'
5 | 


--------------------------------------------------------------------------------
/rtdetr_paddle/requirements.txt:
--------------------------------------------------------------------------------
 1 | paddlepaddle-gpu==2.4.2
 2 | tqdm
 3 | typeguard
 4 | visualdl>=2.2.0
 5 | opencv-python <= 4.6.0
 6 | PyYAML
 7 | shapely
 8 | scipy
 9 | terminaltables
10 | Cython
11 | pycocotools
12 | setuptools
13 | 


--------------------------------------------------------------------------------
/rtdetr_paddle/tools/slice_image.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import argparse
16 | from tqdm import tqdm
17 | 
18 | 
19 | def slice_data(image_dir, dataset_json_path, output_dir, slice_size,
20 |                overlap_ratio):
21 |     try:
22 |         from sahi.scripts.slice_coco import slice
23 |     except Exception as e:
24 |         raise RuntimeError(
25 |             'Unable to use sahi to slice images, please install sahi, for example: `pip install sahi`, see https://github.com/obss/sahi'
26 |         )
27 |     tqdm.write(
28 |         f" slicing for slice_size={slice_size}, overlap_ratio={overlap_ratio}")
29 |     slice(
30 |         image_dir=image_dir,
31 |         dataset_json_path=dataset_json_path,
32 |         output_dir=output_dir,
33 |         slice_size=slice_size,
34 |         overlap_ratio=overlap_ratio, )
35 | 
36 | 
37 | def main():
38 |     parser = argparse.ArgumentParser()
39 |     parser.add_argument(
40 |         '--image_dir', type=str, default=None, help="The image folder path.")
41 |     parser.add_argument(
42 |         '--json_path', type=str, default=None, help="Dataset json path.")
43 |     parser.add_argument(
44 |         '--output_dir', type=str, default=None, help="Output dir.")
45 |     parser.add_argument(
46 |         '--slice_size', type=int, default=500, help="slice_size")
47 |     parser.add_argument(
48 |         '--overlap_ratio', type=float, default=0.25, help="overlap_ratio")
49 |     args = parser.parse_args()
50 | 
51 |     slice_data(args.image_dir, args.json_path, args.output_dir, args.slice_size,
52 |                args.overlap_ratio)
53 | 
54 | 
55 | if __name__ == "__main__":
56 |     main()
57 | 


--------------------------------------------------------------------------------
/rtdetr_pytorch/configs/dataset/coco_detection.yml:
--------------------------------------------------------------------------------
 1 | task: detection
 2 | 
 3 | num_classes: 80
 4 | remap_mscoco_category: True
 5 | 
 6 | train_dataloader: 
 7 |   type: DataLoader
 8 |   dataset: 
 9 |     type: CocoDetection
10 |     img_folder: ./dataset/coco/train2017/
11 |     ann_file: ./dataset/coco/annotations/instances_train2017.json
12 |     transforms:
13 |       type: Compose
14 |       ops: ~
15 |   shuffle: True
16 |   batch_size: 8
17 |   num_workers: 4
18 |   drop_last: True 
19 | 
20 | 
21 | val_dataloader:
22 |   type: DataLoader
23 |   dataset: 
24 |     type: CocoDetection
25 |     img_folder: ./dataset/coco/val2017/
26 |     ann_file: ./dataset/coco/annotations/instances_val2017.json
27 |     transforms:
28 |       type: Compose
29 |       ops: ~ 
30 | 
31 |   shuffle: False
32 |   batch_size: 8
33 |   num_workers: 4
34 |   drop_last: False


--------------------------------------------------------------------------------
/rtdetr_pytorch/configs/rtdetr/include/dataloader.yml:
--------------------------------------------------------------------------------
 1 | # num_classes: 91
 2 | # remap_mscoco_category: True
 3 | 
 4 | train_dataloader: 
 5 |   dataset: 
 6 |     return_masks: False
 7 |     transforms:
 8 |       ops:
 9 |         - {type: RandomPhotometricDistort, p: 0.5}
10 |         - {type: RandomZoomOut, fill: 0}
11 |         - {type: RandomIoUCrop, p: 0.8}
12 |         - {type: SanitizeBoundingBox, min_size: 1}
13 |         - {type: RandomHorizontalFlip}
14 |         - {type: Resize, size: [640, 640], }
15 |         # - {type: Resize, size: 639, max_size: 640}
16 |         # - {type: PadToSize, spatial_size: 640}
17 |         - {type: ToImageTensor}
18 |         - {type: ConvertDtype}
19 |         - {type: SanitizeBoundingBox, min_size: 1}
20 |         - {type: ConvertBox, out_fmt: 'cxcywh', normalize: True}
21 |   shuffle: True
22 |   batch_size: 4
23 |   num_workers: 4
24 |   collate_fn: default_collate_fn
25 | 
26 | 
27 | val_dataloader:
28 |   dataset: 
29 |     transforms:
30 |       ops: 
31 |         # - {type: Resize, size: 639, max_size: 640}
32 |         # - {type: PadToSize, spatial_size: 640}
33 |         - {type: Resize, size: [640, 640]}
34 |         - {type: ToImageTensor}
35 |         - {type: ConvertDtype}
36 |   shuffle: False
37 |   batch_size: 8
38 |   num_workers: 4
39 |   collate_fn: default_collate_fn
40 | 


--------------------------------------------------------------------------------
/rtdetr_pytorch/configs/rtdetr/include/dataloader_regnet.yml:
--------------------------------------------------------------------------------
 1 | # num_classes: 91
 2 | # remap_mscoco_category: True
 3 | 
 4 | train_dataloader: 
 5 |   dataset: 
 6 |     return_masks: False
 7 |     transforms:
 8 |       ops:
 9 |         - {type: RandomPhotometricDistort, p: 0.5}
10 |         - {type: RandomZoomOut, fill: 0}
11 |         - {type: RandomIoUCrop, p: 0.8}
12 |         - {type: SanitizeBoundingBox, min_size: 1}
13 |         - {type: RandomHorizontalFlip}
14 |         - {type: Resize, size: [640, 640], }
15 |         # - {type: Resize, size: 639, max_size: 640}
16 |         # - {type: PadToSize, spatial_size: 640}
17 |         - {type: ToImageTensor}
18 |         - {type: ConvertDtype}
19 |         - {type: SanitizeBoundingBox, min_size: 1}
20 |         - {type: ConvertBox, out_fmt: 'cxcywh', normalize: True}
21 |   shuffle: True
22 |   batch_size: 8
23 |   num_workers: 2
24 |   collate_fn: default_collate_fn
25 | 
26 | 
27 | val_dataloader:
28 |   dataset: 
29 |     transforms:
30 |       ops: 
31 |         # - {type: Resize, size: 639, max_size: 640}
32 |         # - {type: PadToSize, spatial_size: 640}
33 |         - {type: Resize, size: [640, 640]}
34 |         - {type: ToImageTensor}
35 |         - {type: ConvertDtype}
36 |   shuffle: False
37 |   batch_size: 8
38 |   num_workers: 2
39 |   collate_fn: default_collate_fn
40 | 


--------------------------------------------------------------------------------
/rtdetr_pytorch/configs/rtdetr/include/optimizer.yml:
--------------------------------------------------------------------------------
 1 | 
 2 | use_ema: True 
 3 | ema:
 4 |   type: ModelEMA
 5 |   decay: 0.9999
 6 |   warmups: 2000
 7 | 
 8 | 
 9 | find_unused_parameters: True 
10 | 
11 | epoches: 72
12 | clip_max_norm: 0.1
13 | 
14 | optimizer:
15 |   type: AdamW
16 |   params: 
17 |     - 
18 |       params: 'backbone'
19 |       lr: 0.00001
20 |     - 
21 |       params: '^(?=.*encoder(?=.*bias|.*norm.*weight)).*$'
22 |       weight_decay: 0.
23 |     -
24 |       params: '^(?=.*decoder(?=.*bias|.*norm.*weight)).*$'
25 |       weight_decay: 0.
26 | 
27 |   lr: 0.0001
28 |   betas: [0.9, 0.999]
29 |   weight_decay: 0.0001
30 | 
31 | 
32 | lr_scheduler:
33 |   type: MultiStepLR
34 |   milestones: [1000]
35 |   gamma: 0.1
36 | 
37 | 


--------------------------------------------------------------------------------
/rtdetr_pytorch/configs/rtdetr/include/optimizer_regnet.yml:
--------------------------------------------------------------------------------
 1 | 
 2 | use_ema: True 
 3 | ema:
 4 |   type: ModelEMA
 5 |   decay: 0.9999
 6 |   warmups: 2000
 7 | 
 8 | 
 9 | find_unused_parameters: True 
10 | 
11 | epoches: 72
12 | clip_max_norm: 0.1
13 | 
14 | optimizer:
15 |   type: AdamW
16 |   params: 
17 |     - 
18 |       params: '^(?=.*encoder(?=.*bias|.*norm.*weight)).*$'
19 |       weight_decay: 0.
20 |     -
21 |       params: '^(?=.*decoder(?=.*bias|.*norm.*weight)).*$'
22 |       weight_decay: 0.
23 | 
24 |   lr: 0.0001
25 |   betas: [0.9, 0.999]
26 |   weight_decay: 0.0001
27 | 
28 | 
29 | lr_scheduler:
30 |   type: MultiStepLR
31 |   milestones: [1000]
32 |   gamma: 0.1
33 | 
34 | 


--------------------------------------------------------------------------------
/rtdetr_pytorch/configs/rtdetr/include/rtdetr_dla34.yml:
--------------------------------------------------------------------------------
 1 | task: detection
 2 | 
 3 | model: RTDETR
 4 | criterion: SetCriterion
 5 | postprocessor: RTDETRPostProcessor
 6 | 
 7 | 
 8 | RTDETR: 
 9 |   backbone: DLANet
10 |   encoder: HybridEncoder
11 |   decoder: RTDETRTransformer
12 |   multi_scale: [480, 512, 544, 576, 608, 640, 640, 640, 672, 704, 736, 768, 800]
13 | 
14 | DLANet:
15 |   dla: dla34
16 |   pretrained: True
17 |   return_idx: [1, 2, 3]
18 | 
19 | 
20 | HybridEncoder:
21 |   in_channels: [128, 256, 512]
22 |   feat_strides: [8, 16, 32]
23 | 
24 |   # intra
25 |   hidden_dim: 256
26 |   use_encoder_idx: [2]
27 |   num_encoder_layers: 1
28 |   nhead: 8
29 |   dim_feedforward: 1024
30 |   dropout: 0.
31 |   enc_act: 'gelu'
32 |   pe_temperature: 10000
33 |   
34 |   # cross
35 |   expansion: 1.0
36 |   depth_mult: 1
37 |   act: 'silu'
38 | 
39 |   # eval
40 |   eval_spatial_size: [640, 640]
41 | 
42 | 
43 | RTDETRTransformer:
44 |   feat_channels: [256, 256, 256]
45 |   feat_strides: [8, 16, 32]
46 |   hidden_dim: 256
47 |   num_levels: 3
48 | 
49 |   num_queries: 300
50 | 
51 |   num_decoder_layers: 6
52 |   num_denoising: 100
53 |   
54 |   eval_idx: -1
55 |   eval_spatial_size: [640, 640]
56 | 
57 | 
58 | use_focal_loss: True
59 | 
60 | RTDETRPostProcessor:
61 |   num_top_queries: 300
62 | 
63 | 
64 | SetCriterion:
65 |   weight_dict: {loss_vfl: 1, loss_bbox: 5, loss_giou: 2,}
66 |   losses: ['vfl', 'boxes', ]
67 |   alpha: 0.75
68 |   gamma: 2.0
69 | 
70 |   matcher:
71 |     type: HungarianMatcher
72 |     weight_dict: {cost_class: 2, cost_bbox: 5, cost_giou: 2}
73 |     # use_focal_loss: True 
74 |     alpha: 0.25
75 |     gamma: 2.0
76 | 
77 | 
78 | 
79 | 


--------------------------------------------------------------------------------
/rtdetr_pytorch/configs/rtdetr/include/rtdetr_r50vd.yml:
--------------------------------------------------------------------------------
 1 | task: detection
 2 | 
 3 | model: RTDETR
 4 | criterion: SetCriterion
 5 | postprocessor: RTDETRPostProcessor
 6 | 
 7 | 
 8 | RTDETR: 
 9 |   backbone: PResNet
10 |   encoder: HybridEncoder
11 |   decoder: RTDETRTransformer
12 |   multi_scale: [480, 512, 544, 576, 608, 640, 640, 640, 672, 704, 736, 768, 800]
13 | 
14 | PResNet:
15 |   depth: 50
16 |   variant: d
17 |   freeze_at: 0
18 |   return_idx: [1, 2, 3]
19 |   num_stages: 4
20 |   freeze_norm: True
21 |   pretrained: True 
22 | 
23 | HybridEncoder:
24 |   in_channels: [512, 1024, 2048]
25 |   feat_strides: [8, 16, 32]
26 | 
27 |   # intra
28 |   hidden_dim: 256
29 |   use_encoder_idx: [2]
30 |   num_encoder_layers: 1
31 |   nhead: 8
32 |   dim_feedforward: 1024
33 |   dropout: 0.
34 |   enc_act: 'gelu'
35 |   pe_temperature: 10000
36 |   
37 |   # cross
38 |   expansion: 1.0
39 |   depth_mult: 1
40 |   act: 'silu'
41 | 
42 |   # eval
43 |   eval_spatial_size: [640, 640]
44 | 
45 | 
46 | RTDETRTransformer:
47 |   feat_channels: [256, 256, 256]
48 |   feat_strides: [8, 16, 32]
49 |   hidden_dim: 256
50 |   num_levels: 3
51 | 
52 |   num_queries: 300
53 | 
54 |   num_decoder_layers: 6
55 |   num_denoising: 100
56 |   
57 |   eval_idx: -1
58 |   eval_spatial_size: [640, 640]
59 | 
60 | 
61 | use_focal_loss: True
62 | 
63 | RTDETRPostProcessor:
64 |   num_top_queries: 300
65 | 
66 | 
67 | SetCriterion:
68 |   weight_dict: {loss_vfl: 1, loss_bbox: 5, loss_giou: 2,}
69 |   losses: ['vfl', 'boxes', ]
70 |   alpha: 0.75
71 |   gamma: 2.0
72 | 
73 |   matcher:
74 |     type: HungarianMatcher
75 |     weight_dict: {cost_class: 2, cost_bbox: 5, cost_giou: 2}
76 |     # use_focal_loss: True 
77 |     alpha: 0.25
78 |     gamma: 2.0
79 | 
80 | 
81 | 
82 | 


--------------------------------------------------------------------------------
/rtdetr_pytorch/configs/rtdetr/include/rtdetr_regnet.yml:
--------------------------------------------------------------------------------
 1 | task: detection
 2 | 
 3 | model: RTDETR
 4 | criterion: SetCriterion
 5 | postprocessor: RTDETRPostProcessor
 6 | 
 7 | 
 8 | RTDETR: 
 9 |   backbone: RegNet
10 |   encoder: HybridEncoder
11 |   decoder: RTDETRTransformer
12 |   multi_scale: [480, 512, 544, 576, 608, 640, 640, 640, 672, 704, 736, 768, 800]
13 | 
14 | 
15 | RegNet:
16 |   return_idx: [1, 2, 3]
17 |   configuration: RegNetConfig()
18 | 
19 | HybridEncoder:
20 |   in_channels: [192, 512, 1088]
21 |   feat_strides: [8, 16, 32]
22 | 
23 |   # intra
24 |   hidden_dim: 256
25 |   use_encoder_idx: [2]
26 |   num_encoder_layers: 1
27 |   nhead: 8
28 |   dim_feedforward: 1024
29 |   dropout: 0.
30 |   enc_act: 'gelu'
31 |   pe_temperature: 10000
32 |   
33 |   # cross
34 |   expansion: 1.0
35 |   depth_mult: 1
36 |   act: 'silu'
37 | 
38 |   # eval
39 |   eval_spatial_size: [640, 640]
40 | 
41 | 
42 | RTDETRTransformer:
43 |   feat_channels: [256, 256, 256]
44 |   feat_strides: [8, 16, 32]
45 |   hidden_dim: 256
46 |   num_levels: 3
47 | 
48 |   num_queries: 300
49 | 
50 |   num_decoder_layers: 6
51 |   num_denoising: 100
52 |   
53 |   eval_idx: -1
54 |   eval_spatial_size: [640, 640]
55 | 
56 | 
57 | use_focal_loss: True
58 | 
59 | RTDETRPostProcessor:
60 |   num_top_queries: 300
61 | 
62 | 
63 | SetCriterion:
64 |   weight_dict: {loss_vfl: 1, loss_bbox: 5, loss_giou: 2,}
65 |   losses: ['vfl', 'boxes', ]
66 |   alpha: 0.75
67 |   gamma: 2.0
68 | 
69 |   matcher:
70 |     type: HungarianMatcher
71 |     weight_dict: {cost_class: 2, cost_bbox: 5, cost_giou: 2}
72 |     # use_focal_loss: True 
73 |     alpha: 0.25
74 |     gamma: 2.0
75 | 
76 | 
77 | 
78 | 


--------------------------------------------------------------------------------
/rtdetr_pytorch/configs/rtdetr/rtdetr_dla34_6x_coco.yml:
--------------------------------------------------------------------------------
 1 | __include__: [
 2 |   '../dataset/coco_detection.yml',
 3 |   '../runtime.yml',
 4 |   './include/dataloader.yml',
 5 |   './include/optimizer.yml',
 6 |   './include/rtdetr_dla34.yml',
 7 | ]
 8 | 
 9 | output_dir: ./output/rtdetr_dla34_6x_coco
10 | 


--------------------------------------------------------------------------------
/rtdetr_pytorch/configs/rtdetr/rtdetr_r101vd_6x_coco.yml:
--------------------------------------------------------------------------------
 1 | __include__: [
 2 |   '../dataset/coco_detection.yml',
 3 |   '../runtime.yml',
 4 |   './include/dataloader.yml',
 5 |   './include/optimizer.yml',
 6 |   './include/rtdetr_r50vd.yml',
 7 | ]
 8 | 
 9 | PResNet:
10 |   depth: 101
11 | 
12 | 
13 | HybridEncoder:
14 |   # intra
15 |   hidden_dim: 384
16 |   dim_feedforward: 2048
17 | 
18 | 
19 | RTDETRTransformer:
20 |   feat_channels: [384, 384, 384]
21 | 
22 | 
23 | optimizer:
24 |   type: AdamW
25 |   params: 
26 |     - 
27 |       params: 'backbone'
28 |       lr: 0.000001


--------------------------------------------------------------------------------
/rtdetr_pytorch/configs/rtdetr/rtdetr_r18vd_6x_coco.yml:
--------------------------------------------------------------------------------
 1 | 
 2 | __include__: [
 3 |   '../dataset/coco_detection.yml',
 4 |   '../runtime.yml',
 5 |   './include/dataloader.yml',
 6 |   './include/optimizer.yml',
 7 |   './include/rtdetr_r50vd.yml',
 8 | ]
 9 | 
10 | 
11 | output_dir: ./output/rtdetr_r18vd_6x_coco
12 | 
13 | PResNet:
14 |   depth: 18
15 |   freeze_at: -1
16 |   freeze_norm: False
17 |   pretrained: True
18 | 
19 | HybridEncoder:
20 |   in_channels: [128, 256, 512]
21 |   hidden_dim: 256
22 |   expansion: 0.5
23 | 
24 | 
25 | RTDETRTransformer:
26 |   eval_idx: -1
27 |   num_decoder_layers: 3
28 |   num_denoising: 100
29 | 
30 | 
31 | 
32 | optimizer:
33 |   type: AdamW
34 |   params: 
35 |     - 
36 |       params: '^(?=.*backbone)(?=.*norm).*$'
37 |       lr: 0.00001
38 |       weight_decay: 0.
39 |     - 
40 |       params: '^(?=.*backbone)(?!.*norm).*$'
41 |       lr: 0.00001
42 |     - 
43 |       params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bias)).*$'
44 |       weight_decay: 0.
45 | 
46 |   lr: 0.0001
47 |   betas: [0.9, 0.999]
48 |   weight_decay: 0.0001
49 | 
50 | 


--------------------------------------------------------------------------------
/rtdetr_pytorch/configs/rtdetr/rtdetr_r34vd_6x_coco.yml:
--------------------------------------------------------------------------------
 1 | 
 2 | __include__: [
 3 |   '../dataset/coco_detection.yml',
 4 |   '../runtime.yml',
 5 |   './include/dataloader.yml',
 6 |   './include/optimizer.yml',
 7 |   './include/rtdetr_r50vd.yml',
 8 | ]
 9 | 
10 | 
11 | output_dir: ./output/rtdetr_r34vd_6x_coco
12 | 
13 | 
14 | PResNet:
15 |   depth: 34
16 |   freeze_at: -1
17 |   freeze_norm: False
18 |   pretrained: True
19 | 
20 | 
21 | HybridEncoder:
22 |   in_channels: [128, 256, 512]
23 |   hidden_dim: 256
24 |   expansion: 0.5
25 | 
26 | 
27 | RTDETRTransformer:
28 |   num_decoder_layers: 4
29 | 
30 | 
31 | 
32 | optimizer:
33 |   type: AdamW
34 |   params: 
35 |     - 
36 |       params: '^(?=.*backbone)(?=.*norm|bn).*$'
37 |       weight_decay: 0.
38 |       lr: 0.00001
39 |     - 
40 |       params: '^(?=.*backbone)(?!.*norm|bn).*$'
41 |       lr: 0.00001
42 |     - 
43 |       params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn|bias)).*$'
44 |       weight_decay: 0.
45 | 
46 |   lr: 0.0001
47 |   betas: [0.9, 0.999]
48 |   weight_decay: 0.0001
49 | 


--------------------------------------------------------------------------------
/rtdetr_pytorch/configs/rtdetr/rtdetr_r50vd_6x_coco.yml:
--------------------------------------------------------------------------------
 1 | __include__: [
 2 |   '../dataset/coco_detection.yml',
 3 |   '../runtime.yml',
 4 |   './include/dataloader.yml',
 5 |   './include/optimizer.yml',
 6 |   './include/rtdetr_r50vd.yml',
 7 | ]
 8 | 
 9 | output_dir: ./output/rtdetr_r50vd_6x_coco
10 | 


--------------------------------------------------------------------------------
/rtdetr_pytorch/configs/rtdetr/rtdetr_r50vd_m_6x_coco.yml:
--------------------------------------------------------------------------------
 1 | __include__: [
 2 |   '../dataset/coco_detection.yml',
 3 |   '../runtime.yml',
 4 |   './include/dataloader.yml',
 5 |   './include/optimizer.yml',
 6 |   './include/rtdetr_r50vd.yml',
 7 | ]
 8 | 
 9 | output_dir: ./output/rtdetr_r50vd_m_6x_coco
10 | 
11 | 
12 | HybridEncoder:
13 |   expansion: 0.5
14 | 
15 | RTDETRTransformer:
16 |   eval_idx: 2 # use 3th decoder layer to eval


--------------------------------------------------------------------------------
/rtdetr_pytorch/configs/rtdetr/rtdetr_regnet_6x_coco.yml:
--------------------------------------------------------------------------------
 1 | __include__: [
 2 |   '../dataset/coco_detection.yml',
 3 |   '../runtime.yml',
 4 |   './include/dataloader_regnet.yml',
 5 |   './include/optimizer_regnet.yml',
 6 |   './include/rtdetr_regnet.yml',
 7 | ]
 8 | 
 9 | output_dir: ./output/rtdetr_regnet_6x_coco
10 | 


--------------------------------------------------------------------------------
/rtdetr_pytorch/configs/runtime.yml:
--------------------------------------------------------------------------------
 1 | sync_bn: True
 2 | find_unused_parameters: False
 3 | 
 4 | 
 5 | use_amp: False
 6 | 
 7 | scaler:
 8 |   type: GradScaler
 9 |   enabled: True
10 | 
11 | 
12 | use_ema: False
13 | ema:
14 |   type: ModelEMA
15 |   decay: 0.9999
16 |   warmups: 2000
17 | 
18 | 


--------------------------------------------------------------------------------
/rtdetr_pytorch/requirements.txt:
--------------------------------------------------------------------------------
1 | torch==2.0.1
2 | torchvision==0.15.2
3 | onnx==1.14.0
4 | onnxruntime==1.15.1
5 | pycocotools
6 | PyYAML
7 | scipy
8 | transformers
9 | 


--------------------------------------------------------------------------------
/rtdetr_pytorch/src/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | from . import data 
3 | from . import nn
4 | from . import optim
5 | from . import zoo
6 | 


--------------------------------------------------------------------------------
/rtdetr_pytorch/src/core/__init__.py:
--------------------------------------------------------------------------------
1 | """by lyuwenyu
2 | """
3 | 
4 | # from .yaml_utils import register, create, load_config, merge_config, merge_dict
5 | from .yaml_utils import *
6 | from .config import BaseConfig
7 | from .yaml_config import YAMLConfig
8 | 


--------------------------------------------------------------------------------
/rtdetr_pytorch/src/data/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | from .coco import *
3 | from .cifar10 import CIFAR10
4 | 
5 | from .dataloader import *
6 | from .transforms import *
7 | 
8 | 


--------------------------------------------------------------------------------
/rtdetr_pytorch/src/data/cifar10/__init__.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import torchvision
 3 | from typing import Optional, Callable
 4 | 
 5 | from src.core import register
 6 | 
 7 | 
 8 | @register
 9 | class CIFAR10(torchvision.datasets.CIFAR10):
10 |     __inject__ = ['transform', 'target_transform']
11 |     
12 |     def __init__(self, root: str, train: bool = True, transform: Optional[Callable] = None, target_transform: Optional[Callable] = None, download: bool = False) -> None:
13 |         super().__init__(root, train, transform, target_transform, download)
14 | 
15 | 


--------------------------------------------------------------------------------
/rtdetr_pytorch/src/data/coco/__init__.py:
--------------------------------------------------------------------------------
1 | from .coco_dataset import (
2 |     CocoDetection, 
3 |     mscoco_category2label,
4 |     mscoco_label2category,
5 |     mscoco_category2name,
6 | )
7 | from .coco_eval import *
8 | 
9 | from .coco_utils import get_coco_api_from_dataset


--------------------------------------------------------------------------------
/rtdetr_pytorch/src/data/dataloader.py:
--------------------------------------------------------------------------------
 1 | import torch 
 2 | import torch.utils.data as data
 3 | 
 4 | from src.core import register
 5 | 
 6 | 
 7 | __all__ = ['DataLoader']
 8 | 
 9 | 
10 | @register
11 | class DataLoader(data.DataLoader):
12 |     __inject__ = ['dataset', 'collate_fn']
13 | 
14 |     def __repr__(self) -> str:
15 |         format_string = self.__class__.__name__ + "("
16 |         for n in ['dataset', 'batch_size', 'num_workers', 'drop_last', 'collate_fn']:
17 |             format_string += "\n"
18 |             format_string += "    {0}: {1}".format(n, getattr(self, n))
19 |         format_string += "\n)"
20 |         return format_string
21 | 
22 | 
23 | 
24 | @register
25 | def default_collate_fn(items):
26 |     '''default collate_fn
27 |     '''    
28 |     return torch.cat([x[0][None] for x in items], dim=0), [x[1] for x in items]
29 | 


--------------------------------------------------------------------------------
/rtdetr_pytorch/src/misc/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | from .logger import *
3 | from .visualizer import *
4 | 


--------------------------------------------------------------------------------
/rtdetr_pytorch/src/misc/visualizer.py:
--------------------------------------------------------------------------------
 1 | """"by lyuwenyu
 2 | """
 3 | 
 4 | import torch
 5 | import torch.utils.data
 6 | 
 7 | import torchvision
 8 | torchvision.disable_beta_transforms_warning()
 9 | 
10 | import PIL 
11 | 
12 | __all__ = ['show_sample']
13 | 
14 | def show_sample(sample):
15 |     """for coco dataset/dataloader
16 |     """
17 |     import matplotlib.pyplot as plt
18 |     from torchvision.transforms.v2 import functional as F
19 |     from torchvision.utils import draw_bounding_boxes
20 | 
21 |     image, target = sample
22 |     if isinstance(image, PIL.Image.Image):
23 |         image = F.to_image_tensor(image)
24 | 
25 |     image = F.convert_dtype(image, torch.uint8)
26 |     annotated_image = draw_bounding_boxes(image, target["boxes"], colors="yellow", width=3)
27 | 
28 |     fig, ax = plt.subplots()
29 |     ax.imshow(annotated_image.permute(1, 2, 0).numpy())
30 |     ax.set(xticklabels=[], yticklabels=[], xticks=[], yticks=[])
31 |     fig.tight_layout()
32 |     fig.show()
33 |     plt.show()
34 | 
35 | 


--------------------------------------------------------------------------------
/rtdetr_pytorch/src/nn/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | from .arch import *
3 | from .criterion import *
4 | 
5 | # 
6 | from .backbone import *
7 | 
8 | 


--------------------------------------------------------------------------------
/rtdetr_pytorch/src/nn/arch/__init__.py:
--------------------------------------------------------------------------------
1 | from .classification import *
2 | 


--------------------------------------------------------------------------------
/rtdetr_pytorch/src/nn/arch/classification.py:
--------------------------------------------------------------------------------
 1 | import torch 
 2 | import torch.nn as nn 
 3 | 
 4 | from src.core import register
 5 | 
 6 | 
 7 | __all__ = ['Classification', 'ClassHead']
 8 | 
 9 | 
10 | @register
11 | class Classification(nn.Module):
12 |     __inject__ = ['backbone', 'head']
13 | 
14 |     def __init__(self, backbone: nn.Module, head: nn.Module=None):
15 |         super().__init__()
16 |         
17 |         self.backbone = backbone
18 |         self.head = head
19 | 
20 |     def forward(self, x):
21 |         x = self.backbone(x)
22 | 
23 |         if self.head is not None:
24 |             x = self.head(x)
25 | 
26 |         return x 
27 | 
28 | 
29 | @register
30 | class ClassHead(nn.Module):
31 |     def __init__(self, hidden_dim, num_classes):
32 |         super().__init__()
33 |         self.pool = nn.AdaptiveAvgPool2d(1)
34 |         self.proj = nn.Linear(hidden_dim, num_classes)  
35 | 
36 |     def forward(self, x):
37 |         x = x[0] if isinstance(x, (list, tuple)) else x 
38 |         x = self.pool(x)
39 |         x = x.reshape(x.shape[0], -1)
40 |         x = self.proj(x)
41 |         return x 
42 | 


--------------------------------------------------------------------------------
/rtdetr_pytorch/src/nn/backbone/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | from .presnet import *
3 | from .test_resnet import *
4 | from .regnet import *
5 | from .common import *
6 | from .dla import *


--------------------------------------------------------------------------------
/rtdetr_pytorch/src/nn/backbone/regnet.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn 
 3 | from transformers import RegNetModel
 4 | 
 5 | 
 6 | from src.core import register
 7 | 
 8 | __all__ = ['RegNet']
 9 | 
10 | @register
11 | class RegNet(nn.Module):
12 |     def __init__(self, configuration, return_idx=[0, 1, 2, 3]):
13 |         super(RegNet, self).__init__()  
14 |         self.model = RegNetModel.from_pretrained("facebook/regnet-y-040")
15 |         self.return_idx = return_idx
16 | 
17 | 
18 |     def forward(self, x):
19 |         
20 |         outputs = self.model(x, output_hidden_states = True)
21 |         x = outputs.hidden_states[2:5]
22 | 
23 |         return x


--------------------------------------------------------------------------------
/rtdetr_pytorch/src/nn/backbone/test_resnet.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn 
 3 | import torch.nn.functional as F 
 4 | 
 5 | from collections import OrderedDict
 6 | 
 7 | 
 8 | from src.core import register
 9 | 
10 | 
11 | class BasicBlock(nn.Module):
12 |     expansion = 1
13 | 
14 |     def __init__(self, in_planes, planes, stride=1):
15 |         super(BasicBlock, self).__init__()
16 | 
17 |         self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
18 |         self.bn1 = nn.BatchNorm2d(planes)
19 | 
20 |         self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,stride=1, padding=1, bias=False)
21 |         self.bn2 = nn.BatchNorm2d(planes)
22 | 
23 |         self.shortcut = nn.Sequential()         
24 |         if stride != 1 or in_planes != self.expansion*planes:
25 |             self.shortcut = nn.Sequential(
26 |                 nn.Conv2d(in_planes, self.expansion*planes,kernel_size=1, stride=stride, bias=False),
27 |                 nn.BatchNorm2d(self.expansion*planes)
28 |             )
29 |     def forward(self, x):
30 |         out = F.relu(self.bn1(self.conv1(x)))
31 |         out = self.bn2(self.conv2(out))       
32 |         out += self.shortcut(x)          
33 |         out = F.relu(out)
34 |         return out
35 | 
36 | 
37 | 
38 | class _ResNet(nn.Module):
39 |     def __init__(self, block, num_blocks, num_classes=10):
40 |         super().__init__()
41 |         self.in_planes = 64
42 | 
43 |         self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
44 |         self.bn1 = nn.BatchNorm2d(64)
45 |         
46 |         self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
47 |         self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
48 |         self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
49 |         self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
50 |         
51 |         self.linear = nn.Linear(512 * block.expansion, num_classes)
52 | 
53 |     def _make_layer(self, block, planes, num_blocks, stride):
54 |         strides = [stride] + [1]*(num_blocks-1)
55 |         layers = []
56 |         for stride in strides:
57 |             layers.append(block(self.in_planes, planes, stride))
58 |             self.in_planes = planes * block.expansion 
59 |         return nn.Sequential(*layers)
60 |         
61 |     def forward(self, x):
62 |         out = F.relu(self.bn1(self.conv1(x)))
63 |         out = self.layer1(out)
64 |         out = self.layer2(out)
65 |         out = self.layer3(out)
66 |         out = self.layer4(out)
67 |         out = F.avg_pool2d(out, 4)
68 |         out = out.view(out.size(0), -1)
69 |         out = self.linear(out)              
70 |         return out
71 |         
72 | 
73 | @register
74 | class MResNet(nn.Module):
75 |     def __init__(self, num_classes=10, num_blocks=[2, 2, 2, 2]) -> None:
76 |         super().__init__()
77 |         self.model = _ResNet(BasicBlock, num_blocks, num_classes)
78 |         
79 |     def forward(self, x):
80 |         return self.model(x)
81 | 
82 | 


--------------------------------------------------------------------------------
/rtdetr_pytorch/src/nn/backbone/utils.py:
--------------------------------------------------------------------------------
 1 | """
 2 | https://github.com/pytorch/vision/blob/main/torchvision/models/_utils.py
 3 | 
 4 | by lyuwenyu
 5 | """
 6 | 
 7 | from collections import OrderedDict
 8 | from typing import Dict, List
 9 | 
10 | 
11 | import torch.nn as nn 
12 | 
13 | 
14 | class IntermediateLayerGetter(nn.ModuleDict):
15 |     """
16 |     Module wrapper that returns intermediate layers from a model
17 | 
18 |     It has a strong assumption that the modules have been registered
19 |     into the model in the same order as they are used.
20 |     This means that one should **not** reuse the same nn.Module
21 |     twice in the forward if you want this to work.
22 | 
23 |     Additionally, it is only able to query submodules that are directly
24 |     assigned to the model. So if `model` is passed, `model.feature1` can
25 |     be returned, but not `model.feature1.layer2`.
26 |     """
27 | 
28 |     _version = 3
29 | 
30 |     def __init__(self, model: nn.Module, return_layers: List[str]) -> None:
31 |         if not set(return_layers).issubset([name for name, _ in model.named_children()]):
32 |             raise ValueError("return_layers are not present in model. {}"\
33 |                 .format([name for name, _ in model.named_children()]))
34 |         orig_return_layers = return_layers
35 |         return_layers = {str(k): str(k)  for k in return_layers}
36 |         layers = OrderedDict()
37 |         for name, module in model.named_children():
38 |             layers[name] = module
39 |             if name in return_layers:
40 |                 del return_layers[name]
41 |             if not return_layers:
42 |                 break
43 | 
44 |         super().__init__(layers)
45 |         self.return_layers = orig_return_layers
46 | 
47 |     def forward(self, x):
48 |         # out = OrderedDict()
49 |         outputs = []
50 |         for name, module in self.items():
51 |             x = module(x)
52 |             if name in self.return_layers:
53 |                 # out_name = self.return_layers[name]
54 |                 # out[out_name] = x
55 |                 outputs.append(x)
56 |         
57 |         return outputs
58 | 
59 | 


--------------------------------------------------------------------------------
/rtdetr_pytorch/src/nn/criterion/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | import torch.nn as nn 
3 | from src.core import register
4 | 
5 | CrossEntropyLoss = register(nn.CrossEntropyLoss)
6 | 
7 | 


--------------------------------------------------------------------------------
/rtdetr_pytorch/src/nn/criterion/utils.py:
--------------------------------------------------------------------------------
 1 | import torch 
 2 | import torchvision
 3 | 
 4 | 
 5 | 
 6 | def format_target(targets):
 7 |     '''
 8 |     Args:
 9 |         targets (List[Dict]),
10 |     Return: 
11 |         tensor (Tensor), [im_id, label, bbox,]
12 |     '''
13 |     outputs = []
14 |     for i, tgt in enumerate(targets):
15 |         boxes =  torchvision.ops.box_convert(tgt['boxes'], in_fmt='xyxy', out_fmt='cxcywh') 
16 |         labels = tgt['labels'].reshape(-1, 1)
17 |         im_ids = torch.ones_like(labels) * i
18 |         outputs.append(torch.cat([im_ids, labels, boxes], dim=1))
19 | 
20 |     return torch.cat(outputs, dim=0)
21 | 


--------------------------------------------------------------------------------
/rtdetr_pytorch/src/optim/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | from .ema import *
3 | from .optim import *
4 | from .amp import *


--------------------------------------------------------------------------------
/rtdetr_pytorch/src/optim/amp.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn 
 3 | import torch.cuda.amp as amp
 4 | 
 5 | 
 6 | from src.core import register
 7 | import src.misc.dist as dist 
 8 | 
 9 | 
10 | __all__ = ['GradScaler']
11 | 
12 | GradScaler = register(amp.grad_scaler.GradScaler)
13 | 


--------------------------------------------------------------------------------
/rtdetr_pytorch/src/optim/optim.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import torch 
 3 | import torch.nn as nn 
 4 | import torch.optim as optim
 5 | import torch.optim.lr_scheduler as lr_scheduler
 6 | 
 7 | from src.core import register
 8 | 
 9 | 
10 | __all__ = ['AdamW', 'SGD', 'Adam', 'MultiStepLR', 'CosineAnnealingLR', 'OneCycleLR', 'LambdaLR']
11 | 
12 | 
13 | 
14 | SGD = register(optim.SGD)
15 | Adam = register(optim.Adam)
16 | AdamW = register(optim.AdamW)
17 | 
18 | 
19 | MultiStepLR = register(lr_scheduler.MultiStepLR)
20 | CosineAnnealingLR = register(lr_scheduler.CosineAnnealingLR)
21 | OneCycleLR = register(lr_scheduler.OneCycleLR)
22 | LambdaLR = register(lr_scheduler.LambdaLR)
23 | 


--------------------------------------------------------------------------------
/rtdetr_pytorch/src/solver/__init__.py:
--------------------------------------------------------------------------------
 1 | """by lyuwenyu
 2 | """
 3 | 
 4 | from .solver import BaseSolver
 5 | from .det_solver import DetSolver
 6 | 
 7 | 
 8 | from typing import Dict 
 9 | 
10 | TASKS :Dict[str, BaseSolver] = {
11 |     'detection': DetSolver,
12 | }


--------------------------------------------------------------------------------
/rtdetr_pytorch/src/zoo/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | from .rtdetr import *
3 | 


--------------------------------------------------------------------------------
/rtdetr_pytorch/src/zoo/rtdetr/__init__.py:
--------------------------------------------------------------------------------
 1 | """by lyuwenyu
 2 | """
 3 | 
 4 | 
 5 | from .rtdetr import *
 6 | 
 7 | from .hybrid_encoder import *
 8 | from .rtdetr_decoder import *
 9 | from .rtdetr_postprocessor import *
10 | from .rtdetr_criterion import *
11 | 
12 | from .matcher import *
13 | 


--------------------------------------------------------------------------------
/rtdetr_pytorch/src/zoo/rtdetr/box_ops.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 3 | https://github.com/facebookresearch/detr/blob/main/util/box_ops.py
 4 | '''
 5 | 
 6 | import torch
 7 | from torchvision.ops.boxes import box_area
 8 | 
 9 | 
10 | def box_cxcywh_to_xyxy(x):
11 |     x_c, y_c, w, h = x.unbind(-1)
12 |     b = [(x_c - 0.5 * w), (y_c - 0.5 * h),
13 |          (x_c + 0.5 * w), (y_c + 0.5 * h)]
14 |     return torch.stack(b, dim=-1)
15 | 
16 | 
17 | def box_xyxy_to_cxcywh(x):
18 |     x0, y0, x1, y1 = x.unbind(-1)
19 |     b = [(x0 + x1) / 2, (y0 + y1) / 2,
20 |          (x1 - x0), (y1 - y0)]
21 |     return torch.stack(b, dim=-1)
22 | 
23 | 
24 | # modified from torchvision to also return the union
25 | def box_iou(boxes1, boxes2):
26 |     area1 = box_area(boxes1)
27 |     area2 = box_area(boxes2)
28 | 
29 |     lt = torch.max(boxes1[:, None, :2], boxes2[:, :2])  # [N,M,2]
30 |     rb = torch.min(boxes1[:, None, 2:], boxes2[:, 2:])  # [N,M,2]
31 | 
32 |     wh = (rb - lt).clamp(min=0)  # [N,M,2]
33 |     inter = wh[:, :, 0] * wh[:, :, 1]  # [N,M]
34 | 
35 |     union = area1[:, None] + area2 - inter
36 | 
37 |     iou = inter / union
38 |     return iou, union
39 | 
40 | 
41 | def generalized_box_iou(boxes1, boxes2):
42 |     """
43 |     Generalized IoU from https://giou.stanford.edu/
44 | 
45 |     The boxes should be in [x0, y0, x1, y1] format
46 | 
47 |     Returns a [N, M] pairwise matrix, where N = len(boxes1)
48 |     and M = len(boxes2)
49 |     """
50 |     # degenerate boxes gives inf / nan results
51 |     # so do an early check
52 |     assert (boxes1[:, 2:] >= boxes1[:, :2]).all()
53 |     assert (boxes2[:, 2:] >= boxes2[:, :2]).all()
54 |     iou, union = box_iou(boxes1, boxes2)
55 | 
56 |     lt = torch.min(boxes1[:, None, :2], boxes2[:, :2])
57 |     rb = torch.max(boxes1[:, None, 2:], boxes2[:, 2:])
58 | 
59 |     wh = (rb - lt).clamp(min=0)  # [N,M,2]
60 |     area = wh[:, :, 0] * wh[:, :, 1]
61 | 
62 |     return iou - (area - union) / area
63 | 
64 | 
65 | def masks_to_boxes(masks):
66 |     """Compute the bounding boxes around the provided masks
67 | 
68 |     The masks should be in format [N, H, W] where N is the number of masks, (H, W) are the spatial dimensions.
69 | 
70 |     Returns a [N, 4] tensors, with the boxes in xyxy format
71 |     """
72 |     if masks.numel() == 0:
73 |         return torch.zeros((0, 4), device=masks.device)
74 | 
75 |     h, w = masks.shape[-2:]
76 | 
77 |     y = torch.arange(0, h, dtype=torch.float)
78 |     x = torch.arange(0, w, dtype=torch.float)
79 |     y, x = torch.meshgrid(y, x)
80 | 
81 |     x_mask = (masks * x.unsqueeze(0))
82 |     x_max = x_mask.flatten(1).max(-1)[0]
83 |     x_min = x_mask.masked_fill(~(masks.bool()), 1e8).flatten(1).min(-1)[0]
84 | 
85 |     y_mask = (masks * y.unsqueeze(0))
86 |     y_max = y_mask.flatten(1).max(-1)[0]
87 |     y_min = y_mask.masked_fill(~(masks.bool()), 1e8).flatten(1).min(-1)[0]
88 | 
89 |     return torch.stack([x_min, y_min, x_max, y_max], 1)


--------------------------------------------------------------------------------
/rtdetr_pytorch/src/zoo/rtdetr/rtdetr.py:
--------------------------------------------------------------------------------
 1 | """by lyuwenyu
 2 | """
 3 | 
 4 | import torch 
 5 | import torch.nn as nn 
 6 | import torch.nn.functional as F 
 7 | 
 8 | import random 
 9 | import numpy as np 
10 | 
11 | from src.core import register
12 | 
13 | 
14 | __all__ = ['RTDETR', ]
15 | 
16 | 
17 | @register
18 | class RTDETR(nn.Module):
19 |     __inject__ = ['backbone', 'encoder', 'decoder', ]
20 | 
21 |     def __init__(self, backbone: nn.Module, encoder, decoder, multi_scale=None):
22 |         super().__init__()
23 |         self.backbone = backbone
24 |         self.decoder = decoder
25 |         self.encoder = encoder
26 |         self.multi_scale = multi_scale
27 |         
28 |     def forward(self, x, targets=None):
29 |         if self.multi_scale and self.training:
30 |             sz = np.random.choice(self.multi_scale)
31 |             x = F.interpolate(x, size=[sz, sz])
32 |             
33 |         x = self.backbone(x)
34 |         x = self.encoder(x)        
35 |         x = self.decoder(x, targets)
36 | 
37 |         return x
38 |     
39 |     def deploy(self, ):
40 |         self.eval()
41 |         for m in self.modules():
42 |             if hasattr(m, 'convert_to_deploy'):
43 |                 m.convert_to_deploy()
44 |         return self 
45 | 


--------------------------------------------------------------------------------
/rtdetr_pytorch/src/zoo/rtdetr/rtdetr_postprocessor.py:
--------------------------------------------------------------------------------
 1 | """by lyuwenyu
 2 | """
 3 | 
 4 | import torch 
 5 | import torch.nn as nn 
 6 | import torch.nn.functional as F 
 7 | 
 8 | import torchvision
 9 | 
10 | from src.core import register
11 | 
12 | 
13 | __all__ = ['RTDETRPostProcessor']
14 | 
15 | 
16 | @register
17 | class RTDETRPostProcessor(nn.Module):
18 |     __share__ = ['num_classes', 'use_focal_loss', 'num_top_queries', 'remap_mscoco_category']
19 |     
20 |     def __init__(self, num_classes=80, use_focal_loss=True, num_top_queries=300, remap_mscoco_category=False) -> None:
21 |         super().__init__()
22 |         self.use_focal_loss = use_focal_loss
23 |         self.num_top_queries = num_top_queries
24 |         self.num_classes = num_classes
25 |         self.remap_mscoco_category = remap_mscoco_category 
26 |         self.deploy_mode = False 
27 | 
28 |     def extra_repr(self) -> str:
29 |         return f'use_focal_loss={self.use_focal_loss}, num_classes={self.num_classes}, num_top_queries={self.num_top_queries}'
30 |     
31 |     # def forward(self, outputs, orig_target_sizes):
32 |     def forward(self, outputs, orig_target_sizes):
33 | 
34 |         logits, boxes = outputs['pred_logits'], outputs['pred_boxes']
35 |         # orig_target_sizes = torch.stack([t["orig_size"] for t in targets], dim=0)        
36 | 
37 |         bbox_pred = torchvision.ops.box_convert(boxes, in_fmt='cxcywh', out_fmt='xyxy')
38 |         bbox_pred *= orig_target_sizes.repeat(1, 2).unsqueeze(1)
39 | 
40 |         if self.use_focal_loss:
41 |             scores = F.sigmoid(logits)
42 |             scores, index = torch.topk(scores.flatten(1), self.num_top_queries, axis=-1)
43 |             labels = index % self.num_classes
44 |             index = index // self.num_classes
45 |             boxes = bbox_pred.gather(dim=1, index=index.unsqueeze(-1).repeat(1, 1, bbox_pred.shape[-1]))
46 |             
47 |         else:
48 |             scores = F.softmax(logits)[:, :, :-1]
49 |             scores, labels = scores.max(dim=-1)
50 |             boxes = bbox_pred
51 |             if scores.shape[1] > self.num_top_queries:
52 |                 scores, index = torch.topk(scores, self.num_top_queries, dim=-1)
53 |                 labels = torch.gather(labels, dim=1, index=index)
54 |                 boxes = torch.gather(boxes, dim=1, index=index.unsqueeze(-1).tile(1, 1, boxes.shape[-1]))
55 | 
56 |         # TODO for onnx export
57 |         if self.deploy_mode:
58 |             return labels, boxes, scores
59 | 
60 |         # TODO
61 |         if self.remap_mscoco_category:
62 |             from ...data.coco import mscoco_label2category
63 |             labels = torch.tensor([mscoco_label2category[int(x.item())] for x in labels.flatten()])\
64 |                 .to(boxes.device).reshape(labels.shape)
65 | 
66 |         results = []
67 |         for lab, box, sco in zip(labels, boxes, scores):
68 |             result = dict(labels=lab, boxes=box, scores=sco)
69 |             results.append(result)
70 |         
71 |         return results
72 |         
73 | 
74 |     def deploy(self, ):
75 |         self.eval()
76 |         self.deploy_mode = True
77 |         return self 
78 | 
79 |     @property
80 |     def iou_types(self, ):
81 |         return ('bbox', )
82 | 


--------------------------------------------------------------------------------
/rtdetr_pytorch/tools/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | Train/test script examples
 4 | - `CUDA_VISIBLE_DEVICES=0,1,2,3 torchrun --nproc_per_node=4 --master-port=8989 tools/train.py -c path/to/config &> train.log 2>&1 &`
 5 | - `-r path/to/checkpoint`
 6 | - `--amp`
 7 | - `--test-only` 
 8 | 
 9 | 
10 | Tuning script examples
11 | - `torchrun --master_port=8844 --nproc_per_node=4 tools/train.py -c configs/rtdetr/rtdetr_r18vd_6x_coco.yml -t https://github.com/lyuwenyu/storage/releases/download/v0.1/rtdetr_r18vd_5x_coco_objects365_from_paddle.pth` 
12 | 
13 | 
14 | Export script examples
15 | - `python tools/export_onnx.py -c path/to/config -r path/to/checkpoint --check`
16 | 
17 | 
18 | GPU do not release memory
19 | - `ps aux | grep "tools/train.py" | awk '{print $2}' | xargs kill -9`
20 | 
21 | 
22 | Save all logs
23 | - Appending `&> train.log 2>&1 &` or `&> train.log 2>&1`
24 | 
25 | 


--------------------------------------------------------------------------------
/rtdetr_pytorch/tools/train.py:
--------------------------------------------------------------------------------
 1 | """by lyuwenyu
 2 | """
 3 | 
 4 | import os 
 5 | import sys 
 6 | sys.path.insert(0, os.path.join(os.path.dirname(os.path.abspath(__file__)), '..'))
 7 | import argparse
 8 | 
 9 | import src.misc.dist as dist 
10 | from src.core import YAMLConfig 
11 | from src.solver import TASKS
12 | 
13 | 
14 | def main(args, ) -> None:
15 |     '''main
16 |     '''
17 |     dist.init_distributed()
18 |     if args.seed is not None:
19 |         dist.set_seed(args.seed)
20 | 
21 |     assert not all([args.tuning, args.resume]), \
22 |         'Only support from_scrach or resume or tuning at one time'
23 | 
24 |     cfg = YAMLConfig(
25 |         args.config,
26 |         resume=args.resume, 
27 |         use_amp=args.amp,
28 |         tuning=args.tuning
29 |     )
30 | 
31 |     solver = TASKS[cfg.yaml_cfg['task']](cfg)
32 |     
33 |     if args.test_only:
34 |         solver.val()
35 |     else:
36 |         solver.fit()
37 | 
38 | 
39 | if __name__ == '__main__':
40 | 
41 |     parser = argparse.ArgumentParser()
42 |     parser.add_argument('--config', '-c', type=str, )
43 |     parser.add_argument('--resume', '-r', type=str, )
44 |     parser.add_argument('--tuning', '-t', type=str, )
45 |     parser.add_argument('--test-only', action='store_true', default=False,)
46 |     parser.add_argument('--amp', action='store_true', default=False,)
47 |     parser.add_argument('--seed', type=int, help='seed',)
48 |     args = parser.parse_args()
49 | 
50 |     main(args)
51 | 


--------------------------------------------------------------------------------
/rtdetrv2_paddle/readme.md:
--------------------------------------------------------------------------------
1 | see https://github.com/PaddlePaddle/PaddleDetection


--------------------------------------------------------------------------------
/rtdetrv2_pytorch/Dockerfile:
--------------------------------------------------------------------------------
 1 | # tensorrt:23.01-py3 (8.5.2.2)
 2 | FROM nvcr.io/nvidia/tensorrt:23.01-py3
 3 | 
 4 | WORKDIR /workspace
 5 | 
 6 | COPY requirements.txt .
 7 | 
 8 | RUN pip install --upgrade pip && \
 9 |     pip install -r requirements.txt
10 | 
11 | CMD ["/bin/bash"]
12 | 


--------------------------------------------------------------------------------
/rtdetrv2_pytorch/configs/dataset/coco_detection.yml:
--------------------------------------------------------------------------------
 1 | task: detection
 2 | 
 3 | evaluator:
 4 |   type: CocoEvaluator
 5 |   iou_types: ['bbox', ]
 6 | 
 7 | # num_classes: 365
 8 | # remap_mscoco_category: False
 9 | 
10 | # num_classes: 91
11 | # remap_mscoco_category: False
12 | 
13 | num_classes: 80
14 | remap_mscoco_category: True
15 | 
16 | 
17 | train_dataloader: 
18 |   type: DataLoader
19 |   dataset: 
20 |     type: CocoDetection
21 |     img_folder: ./dataset/coco/train2017/
22 |     ann_file: ./dataset/coco/annotations/instances_train2017.json
23 |     return_masks: False
24 |     transforms:
25 |       type: Compose
26 |       ops: ~
27 |   shuffle: True
28 |   num_workers: 4
29 |   drop_last: True 
30 |   collate_fn:
31 |     type: BatchImageCollateFuncion
32 | 
33 | 
34 | val_dataloader:
35 |   type: DataLoader
36 |   dataset: 
37 |     type: CocoDetection
38 |     img_folder: ./dataset/coco/val2017/
39 |     ann_file: ./dataset/coco/annotations/instances_val2017.json
40 |     return_masks: False
41 |     transforms:
42 |       type: Compose
43 |       ops: ~ 
44 |   shuffle: False
45 |   num_workers: 4
46 |   drop_last: False
47 |   collate_fn:
48 |     type: BatchImageCollateFuncion
49 | 


--------------------------------------------------------------------------------
/rtdetrv2_pytorch/configs/dataset/voc_detection.yml:
--------------------------------------------------------------------------------
 1 | task: detection
 2 | 
 3 | evaluator:
 4 |   type: CocoEvaluator
 5 |   iou_types: ['bbox', ]
 6 | 
 7 | num_classes: 20
 8 | 
 9 | train_dataloader: 
10 |   type: DataLoader
11 |   dataset: 
12 |     type: VOCDetection
13 |     root: ./dataset/voc/
14 |     ann_file: trainval.txt
15 |     label_file: label_list.txt
16 |     transforms:
17 |       type: Compose
18 |       ops: ~
19 |   shuffle: True
20 |   num_workers: 4
21 |   drop_last: True 
22 |   collate_fn:
23 |     type: BatchImageCollateFuncion
24 | 
25 | 
26 | val_dataloader:
27 |   type: DataLoader
28 |   dataset: 
29 |     type: VOCDetection
30 |     root: ./dataset/voc/
31 |     ann_file: test.txt
32 |     label_file: label_list.txt
33 |     transforms:
34 |       type: Compose
35 |       ops: ~
36 |   shuffle: False
37 |   num_workers: 4
38 |   drop_last: False
39 |   collate_fn:
40 |     type: BatchImageCollateFuncion
41 | 


--------------------------------------------------------------------------------
/rtdetrv2_pytorch/configs/rtdetr/include/dataloader.yml:
--------------------------------------------------------------------------------
 1 | 
 2 | train_dataloader: 
 3 |   dataset: 
 4 |     return_masks: False
 5 |     transforms:
 6 |       ops:
 7 |         - {type: RandomPhotometricDistort, p: 0.5}
 8 |         - {type: RandomZoomOut, fill: 0}
 9 |         - {type: RandomIoUCrop, p: 0.8}
10 |         - {type: SanitizeBoundingBoxes, min_size: 1}
11 |         - {type: RandomHorizontalFlip}
12 |         - {type: Resize, size: [640, 640], }
13 |         - {type: SanitizeBoundingBoxes, min_size: 1}
14 |         - {type: ConvertPILImage, dtype: 'float32', scale: True}   
15 |         - {type: ConvertBoxes, fmt: 'cxcywh', normalize: True}  
16 |   collate_fn:
17 |     type: BatchImageCollateFuncion
18 |     scales: [480, 512, 544, 576, 608, 640, 640, 640, 672, 704, 736, 768, 800]
19 |   shuffle: True
20 |   num_workers: 4
21 |   total_batch_size: 16
22 | 
23 | val_dataloader:
24 |   dataset: 
25 |     transforms:
26 |       ops: 
27 |         - {type: Resize, size: [640, 640]}
28 |         - {type: ConvertPILImage, dtype: 'float32', scale: True}   
29 |   shuffle: False
30 |   total_batch_size: 16
31 |   num_workers: 8


--------------------------------------------------------------------------------
/rtdetrv2_pytorch/configs/rtdetr/include/optimizer.yml:
--------------------------------------------------------------------------------
 1 | 
 2 | use_ema: True 
 3 | ema:
 4 |   type: ModelEMA
 5 |   decay: 0.9999
 6 |   warmups: 2000
 7 | 
 8 | 
 9 | epoches: 72
10 | clip_max_norm: 0.1
11 | 
12 | 
13 | optimizer:
14 |   type: AdamW
15 |   params: 
16 |     - 
17 |       params: '^(?=.*backbone)(?!.*(?:norm|bn)).*$'
18 |       lr: 0.00001
19 |     -
20 |       params: '^(?=.*backbone)(?=.*(?:norm|bn)).*$'
21 |       weight_decay: 0.
22 |       lr: 0.00001
23 |     - 
24 |       params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn|bias)).*$'
25 |       weight_decay: 0.
26 | 
27 |   lr: 0.0001
28 |   betas: [0.9, 0.999]
29 |   weight_decay: 0.0001
30 | 
31 | 
32 | lr_scheduler:
33 |   type: MultiStepLR
34 |   milestones: [1000]
35 |   gamma: 0.1
36 | 
37 | 
38 | lr_warmup_scheduler:
39 |   type: LinearWarmup
40 |   warmup_duration: 2000


--------------------------------------------------------------------------------
/rtdetrv2_pytorch/configs/rtdetr/include/rtdetr_r50vd.yml:
--------------------------------------------------------------------------------
 1 | task: detection
 2 | 
 3 | model: RTDETR
 4 | criterion: RTDETRCriterion
 5 | postprocessor: RTDETRPostProcessor
 6 | 
 7 | 
 8 | use_focal_loss: True
 9 | eval_spatial_size: [640, 640] # h w
10 | 
11 | 
12 | RTDETR: 
13 |   backbone: PResNet
14 |   encoder: HybridEncoder
15 |   decoder: RTDETRTransformer
16 |   
17 | 
18 | PResNet:
19 |   depth: 50
20 |   variant: d
21 |   freeze_at: 0
22 |   return_idx: [1, 2, 3]
23 |   num_stages: 4
24 |   freeze_norm: True
25 |   pretrained: True 
26 | 
27 | 
28 | HybridEncoder:
29 |   in_channels: [512, 1024, 2048]
30 |   feat_strides: [8, 16, 32]
31 | 
32 |   # intra
33 |   hidden_dim: 256
34 |   use_encoder_idx: [2]
35 |   num_encoder_layers: 1
36 |   nhead: 8
37 |   dim_feedforward: 1024
38 |   dropout: 0.
39 |   enc_act: 'gelu'
40 |   
41 |   # cross
42 |   expansion: 1.0
43 |   depth_mult: 1
44 |   act: 'silu'
45 | 
46 |   version: v1
47 | 
48 | RTDETRTransformer:
49 |   feat_channels: [256, 256, 256]
50 |   feat_strides: [8, 16, 32]
51 |   hidden_dim: 256
52 |   num_levels: 3
53 | 
54 |   num_layers: 6
55 |   num_queries: 300
56 | 
57 |   num_denoising: 100
58 |   label_noise_ratio: 0.5
59 |   box_noise_scale: 1.0 # 1.0 0.4
60 | 
61 |   eval_idx: -1
62 | 
63 | 
64 | RTDETRPostProcessor:
65 |   num_top_queries: 300
66 | 
67 | 
68 | RTDETRCriterion:
69 |   weight_dict: {loss_vfl: 1, loss_bbox: 5, loss_giou: 2,}
70 |   losses: ['vfl', 'boxes', ]
71 |   alpha: 0.75
72 |   gamma: 2.0
73 | 
74 |   matcher:
75 |     type: HungarianMatcher
76 |     weight_dict: {cost_class: 2, cost_bbox: 5, cost_giou: 2}
77 |     alpha: 0.25
78 |     gamma: 2.0
79 | 
80 | 


--------------------------------------------------------------------------------
/rtdetrv2_pytorch/configs/rtdetr/rtdetr_r101vd_6x_coco.yml:
--------------------------------------------------------------------------------
 1 | 
 2 | __include__: [
 3 |   '../dataset/coco_detection.yml',
 4 |   '../runtime.yml',
 5 |   './include/dataloader.yml',
 6 |   './include/optimizer.yml',
 7 |   './include/rtdetr_r50vd.yml',
 8 | ]
 9 | 
10 | 
11 | output_dir: ./output/rtdetr_r101vd_6x_coco
12 | 
13 | 
14 | PResNet:
15 |   depth: 101
16 | 
17 | 
18 | HybridEncoder:
19 |   # intra
20 |   hidden_dim: 384
21 |   dim_feedforward: 2048
22 | 
23 | 
24 | RTDETRTransformer:
25 |   feat_channels: [384, 384, 384]
26 | 
27 | 
28 | optimizer:
29 |   type: AdamW
30 |   params: 
31 |     - 
32 |       params: '^(?=.*backbone)(?!.*norm|bn).*$'
33 |       lr: 0.000001
34 |     - 
35 |       params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn)).*$'
36 |       weight_decay: 0.
37 | 
38 |   lr: 0.0001
39 |   betas: [0.9, 0.999]
40 |   weight_decay: 0.0001
41 | 
42 | 


--------------------------------------------------------------------------------
/rtdetrv2_pytorch/configs/rtdetr/rtdetr_r18vd_6x_coco.yml:
--------------------------------------------------------------------------------
 1 | 
 2 | __include__: [
 3 |   '../dataset/coco_detection.yml',
 4 |   '../runtime.yml',
 5 |   './include/dataloader.yml',
 6 |   './include/optimizer.yml',
 7 |   './include/rtdetr_r50vd.yml',
 8 | ]
 9 | 
10 | 
11 | output_dir: ./output/rtdetr_r18vd_6x_coco
12 | 
13 | 
14 | PResNet:
15 |   depth: 18
16 |   freeze_at: -1
17 |   freeze_norm: False
18 |   pretrained: True
19 | 
20 | 
21 | HybridEncoder:
22 |   in_channels: [128, 256, 512]
23 |   hidden_dim: 256
24 |   expansion: 0.5
25 | 
26 | 
27 | RTDETRTransformer:
28 |   num_layers: 3
29 | 
30 | 
31 | 
32 | optimizer:
33 |   type: AdamW
34 |   params: 
35 |     - 
36 |       params: '^(?=.*backbone)(?=.*norm|bn).*$'
37 |       weight_decay: 0.
38 |       lr: 0.00001
39 |     - 
40 |       params: '^(?=.*backbone)(?!.*norm|bn).*$'
41 |       lr: 0.00001
42 |     - 
43 |       params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn|bias)).*$'
44 |       weight_decay: 0.
45 | 
46 |   lr: 0.0001
47 |   betas: [0.9, 0.999]
48 |   weight_decay: 0.0001
49 | 


--------------------------------------------------------------------------------
/rtdetrv2_pytorch/configs/rtdetr/rtdetr_r34vd_6x_coco.yml:
--------------------------------------------------------------------------------
 1 | 
 2 | __include__: [
 3 |   '../dataset/coco_detection.yml',
 4 |   '../runtime.yml',
 5 |   './include/dataloader.yml',
 6 |   './include/optimizer.yml',
 7 |   './include/rtdetr_r50vd.yml',
 8 | ]
 9 | 
10 | 
11 | output_dir: ./output/rtdetr_r34vd_6x_coco
12 | 
13 | 
14 | PResNet:
15 |   depth: 34
16 |   freeze_at: -1
17 |   freeze_norm: False
18 |   pretrained: True
19 | 
20 | 
21 | HybridEncoder:
22 |   in_channels: [128, 256, 512]
23 |   hidden_dim: 256
24 |   expansion: 0.5
25 | 
26 | 
27 | RTDETRTransformer:
28 |   num_layers: 4
29 | 
30 | 
31 | 
32 | optimizer:
33 |   type: AdamW
34 |   params: 
35 |     - 
36 |       params: '^(?=.*backbone)(?=.*norm|bn).*$'
37 |       weight_decay: 0.
38 |       lr: 0.00001
39 |     - 
40 |       params: '^(?=.*backbone)(?!.*norm|bn).*$'
41 |       lr: 0.00001
42 |     - 
43 |       params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn|bias)).*$'
44 |       weight_decay: 0.
45 | 
46 |   lr: 0.0001
47 |   betas: [0.9, 0.999]
48 |   weight_decay: 0.0001
49 | 


--------------------------------------------------------------------------------
/rtdetrv2_pytorch/configs/rtdetr/rtdetr_r50vd_6x_coco.yml:
--------------------------------------------------------------------------------
 1 | 
 2 | __include__: [
 3 |   '../dataset/coco_detection.yml',
 4 |   '../runtime.yml',
 5 |   './include/dataloader.yml',
 6 |   './include/optimizer.yml',
 7 |   './include/rtdetr_r50vd.yml',
 8 | ]
 9 | 
10 | 
11 | output_dir: ./output/rtdetr_r50vd_6x_coco
12 | 
13 | 
14 | 
15 | 


--------------------------------------------------------------------------------
/rtdetrv2_pytorch/configs/rtdetr/rtdetr_r50vd_m_6x_coco.yml:
--------------------------------------------------------------------------------
 1 | __include__: [
 2 |   '../dataset/coco_detection.yml',
 3 |   '../runtime.yml',
 4 |   './include/dataloader.yml',
 5 |   './include/optimizer.yml',
 6 |   './include/rtdetr_r50vd.yml',
 7 | ]
 8 | 
 9 | output_dir: ./output/rtdetr_r50vd_m_6x_coco
10 | 
11 | 
12 | HybridEncoder:
13 |   expansion: 0.5
14 | 
15 | 
16 | RTDETRTransformer:
17 |   eval_idx: 2 # use 3th decoder layer to eval
18 | 
19 | 
20 | 
21 | optimizer:
22 |   type: AdamW
23 |   params: 
24 |     - 
25 |       params: '^(?=.*backbone)(?!.*norm|bn).*$'
26 |       lr: 0.00001
27 |     - 
28 |       params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn|bias)).*$'
29 |       weight_decay: 0.
30 | 
31 |   lr: 0.0001
32 |   betas: [0.9, 0.999]
33 |   weight_decay: 0.0001
34 | 
35 | 


--------------------------------------------------------------------------------
/rtdetrv2_pytorch/configs/rtdetrv2/include/dataloader.yml:
--------------------------------------------------------------------------------
 1 | 
 2 | train_dataloader: 
 3 |   dataset: 
 4 |     transforms:
 5 |       ops:
 6 |         - {type: RandomPhotometricDistort, p: 0.5}
 7 |         - {type: RandomZoomOut, fill: 0}
 8 |         - {type: RandomIoUCrop, p: 0.8}
 9 |         - {type: SanitizeBoundingBoxes, min_size: 1}
10 |         - {type: RandomHorizontalFlip}
11 |         - {type: Resize, size: [640, 640], }
12 |         - {type: SanitizeBoundingBoxes, min_size: 1}
13 |         - {type: ConvertPILImage, dtype: 'float32', scale: True}   
14 |         - {type: ConvertBoxes, fmt: 'cxcywh', normalize: True}
15 |       policy:
16 |         name: stop_epoch
17 |         epoch: 71 # epoch in [71, ~) stop `ops`
18 |         ops: ['RandomPhotometricDistort', 'RandomZoomOut', 'RandomIoUCrop']
19 |   
20 |   collate_fn:
21 |     type: BatchImageCollateFuncion
22 |     scales: [480, 512, 544, 576, 608, 640, 640, 640, 672, 704, 736, 768, 800]
23 |     stop_epoch: 71 # epoch in [71, ~) stop `multiscales`
24 | 
25 |   shuffle: True
26 |   total_batch_size: 16 # total batch size equals to 16 (4 * 4)
27 |   num_workers: 4
28 | 
29 | 
30 | val_dataloader:
31 |   dataset: 
32 |     transforms:
33 |       ops: 
34 |         - {type: Resize, size: [640, 640]}
35 |         - {type: ConvertPILImage, dtype: 'float32', scale: True}   
36 |   shuffle: False
37 |   total_batch_size: 32
38 |   num_workers: 4


--------------------------------------------------------------------------------
/rtdetrv2_pytorch/configs/rtdetrv2/include/optimizer.yml:
--------------------------------------------------------------------------------
 1 | 
 2 | use_amp: True
 3 | use_ema: True 
 4 | ema:
 5 |   type: ModelEMA
 6 |   decay: 0.9999
 7 |   warmups: 2000
 8 | 
 9 | 
10 | epoches: 72
11 | clip_max_norm: 0.1
12 | 
13 | 
14 | optimizer:
15 |   type: AdamW
16 |   params: 
17 |     - 
18 |       params: '^(?=.*backbone)(?!.*norm).*$'
19 |       lr: 0.00001
20 |     - 
21 |       params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn)).*$'
22 |       weight_decay: 0.
23 | 
24 |   lr: 0.0001
25 |   betas: [0.9, 0.999]
26 |   weight_decay: 0.0001
27 | 
28 | 
29 | lr_scheduler:
30 |   type: MultiStepLR
31 |   milestones: [1000]
32 |   gamma: 0.1
33 | 
34 | 
35 | lr_warmup_scheduler:
36 |   type: LinearWarmup
37 |   warmup_duration: 2000


--------------------------------------------------------------------------------
/rtdetrv2_pytorch/configs/rtdetrv2/include/rtdetrv2_r50vd.yml:
--------------------------------------------------------------------------------
 1 | task: detection
 2 | 
 3 | model: RTDETR
 4 | criterion: RTDETRCriterionv2
 5 | postprocessor: RTDETRPostProcessor
 6 | 
 7 | 
 8 | use_focal_loss: True
 9 | eval_spatial_size: [640, 640] # h w
10 | 
11 | 
12 | RTDETR: 
13 |   backbone: PResNet
14 |   encoder: HybridEncoder
15 |   decoder: RTDETRTransformerv2
16 |   
17 | 
18 | PResNet:
19 |   depth: 50
20 |   variant: d
21 |   freeze_at: 0
22 |   return_idx: [1, 2, 3]
23 |   num_stages: 4
24 |   freeze_norm: True
25 |   pretrained: True 
26 | 
27 | 
28 | HybridEncoder:
29 |   in_channels: [512, 1024, 2048]
30 |   feat_strides: [8, 16, 32]
31 | 
32 |   # intra
33 |   hidden_dim: 256
34 |   use_encoder_idx: [2]
35 |   num_encoder_layers: 1
36 |   nhead: 8
37 |   dim_feedforward: 1024
38 |   dropout: 0.
39 |   enc_act: 'gelu'
40 |   
41 |   # cross
42 |   expansion: 1.0
43 |   depth_mult: 1
44 |   act: 'silu'
45 | 
46 | 
47 | RTDETRTransformerv2:
48 |   feat_channels: [256, 256, 256]
49 |   feat_strides: [8, 16, 32]
50 |   hidden_dim: 256
51 |   num_levels: 3
52 | 
53 |   num_layers: 6
54 |   num_queries: 300
55 | 
56 |   num_denoising: 100
57 |   label_noise_ratio: 0.5
58 |   box_noise_scale: 1.0 # 1.0 0.4
59 | 
60 |   eval_idx: -1
61 | 
62 |   # NEW
63 |   num_points: [4, 4, 4] # [3,3,3] [2,2,2]
64 |   cross_attn_method: default # default, discrete
65 |   query_select_method: default # default, agnostic 
66 | 
67 | 
68 | RTDETRPostProcessor:
69 |   num_top_queries: 300
70 | 
71 | 
72 | RTDETRCriterionv2:
73 |   weight_dict: {loss_vfl: 1, loss_bbox: 5, loss_giou: 2,}
74 |   losses: ['vfl', 'boxes', ]
75 |   alpha: 0.75
76 |   gamma: 2.0
77 | 
78 |   matcher:
79 |     type: HungarianMatcher
80 |     weight_dict: {cost_class: 2, cost_bbox: 5, cost_giou: 2}
81 |     alpha: 0.25
82 |     gamma: 2.0
83 | 
84 | 


--------------------------------------------------------------------------------
/rtdetrv2_pytorch/configs/rtdetrv2/rtdetrv2_hgnetv2_h_6x_coco.yml:
--------------------------------------------------------------------------------
 1 | __include__: [
 2 |   '../dataset/coco_detection.yml',
 3 |   '../runtime.yml',
 4 |   './include/dataloader.yml',
 5 |   './include/optimizer.yml',
 6 |   './include/rtdetrv2_r50vd.yml',
 7 | ]
 8 | 
 9 | 
10 | output_dir: ./output/rtdetrv2_hgnetv2_h_6x_coco
11 | 
12 | 
13 | RTDETR:
14 |   backbone: HGNetv2
15 | 
16 | 
17 | HGNetv2:
18 |   name: 'H'
19 |   return_idx: [1, 2, 3]
20 |   freeze_at: 0
21 |   freeze_norm: True
22 |   pretrained: True
23 | 
24 | 
25 | HybridEncoder:
26 |   # intra
27 |   hidden_dim: 512
28 |   dim_feedforward: 2048
29 |   num_encoder_layers: 2
30 | 
31 | 
32 | RTDETRTransformerv2:
33 |   feat_channels: [512, 512, 512]
34 | 
35 | 
36 | 
37 | optimizer:
38 |   type: AdamW
39 |   params: 
40 |     - 
41 |       params: '^(?=.*backbone)(?!.*norm|bn).*$'
42 |       lr: 0.000005
43 |     - 
44 |       params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn)).*$'
45 |       weight_decay: 0.
46 | 
47 |   lr: 0.0001
48 |   betas: [0.9, 0.999]
49 |   weight_decay: 0.0001
50 | 
51 | 


--------------------------------------------------------------------------------
/rtdetrv2_pytorch/configs/rtdetrv2/rtdetrv2_hgnetv2_l_6x_coco.yml:
--------------------------------------------------------------------------------
 1 | __include__: [
 2 |   '../dataset/coco_detection.yml',
 3 |   '../runtime.yml',
 4 |   './include/dataloader.yml',
 5 |   './include/optimizer.yml',
 6 |   './include/rtdetrv2_r50vd.yml',
 7 | ]
 8 | 
 9 | 
10 | output_dir: ./output/rtdetrv2_hgnetv2_l_6x_coco
11 | 
12 | 
13 | RTDETR:
14 |   backbone: HGNetv2
15 | 
16 | 
17 | HGNetv2:
18 |   name: 'L'
19 |   return_idx: [1, 2, 3]
20 |   freeze_at: 0
21 |   freeze_norm: True
22 |   pretrained: True
23 | 
24 | 
25 | optimizer:
26 |   type: AdamW
27 |   params: 
28 |     - 
29 |       params: '^(?=.*backbone)(?!.*norm|bn).*$'
30 |       lr: 0.000005
31 |     - 
32 |       params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn)).*$'
33 |       weight_decay: 0.
34 | 
35 |   lr: 0.0001
36 |   betas: [0.9, 0.999]
37 |   weight_decay: 0.0001
38 | 
39 | 


--------------------------------------------------------------------------------
/rtdetrv2_pytorch/configs/rtdetrv2/rtdetrv2_hgnetv2_x_6x_coco.yml:
--------------------------------------------------------------------------------
 1 | __include__: [
 2 |   '../dataset/coco_detection.yml',
 3 |   '../runtime.yml',
 4 |   './include/dataloader.yml',
 5 |   './include/optimizer.yml',
 6 |   './include/rtdetrv2_r50vd.yml',
 7 | ]
 8 | 
 9 | 
10 | output_dir: ./output/rtdetrv2_hgnetv2_x_6x_coco
11 | 
12 | 
13 | RTDETR:
14 |   backbone: HGNetv2
15 | 
16 | 
17 | HGNetv2:
18 |   name: 'X'
19 |   return_idx: [1, 2, 3]
20 |   freeze_at: 0
21 |   freeze_norm: True
22 |   pretrained: True
23 | 
24 | 
25 | 
26 | HybridEncoder:
27 |   # intra
28 |   hidden_dim: 384
29 |   dim_feedforward: 2048
30 | 
31 | 
32 | RTDETRTransformerv2:
33 |   feat_channels: [384, 384, 384]
34 | 
35 | 
36 | 
37 | optimizer:
38 |   type: AdamW
39 |   params: 
40 |     - 
41 |       params: '^(?=.*backbone)(?!.*norm|bn).*$'
42 |       lr: 0.000001
43 |     - 
44 |       params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn)).*$'
45 |       weight_decay: 0.
46 | 
47 |   lr: 0.0001
48 |   betas: [0.9, 0.999]
49 |   weight_decay: 0.0001
50 | 
51 | 


--------------------------------------------------------------------------------
/rtdetrv2_pytorch/configs/rtdetrv2/rtdetrv2_r101vd_6x_coco.yml:
--------------------------------------------------------------------------------
 1 | __include__: [
 2 |   '../dataset/coco_detection.yml',
 3 |   '../runtime.yml',
 4 |   './include/dataloader.yml',
 5 |   './include/optimizer.yml',
 6 |   './include/rtdetrv2_r50vd.yml',
 7 | ]
 8 | 
 9 | 
10 | output_dir: ./output/rtdetrv2_r101vd_6x_coco
11 | 
12 | 
13 | PResNet:
14 |   depth: 101
15 | 
16 | 
17 | HybridEncoder:
18 |   # intra
19 |   hidden_dim: 384
20 |   dim_feedforward: 2048
21 | 
22 | 
23 | RTDETRTransformerv2:
24 |   feat_channels: [384, 384, 384]
25 | 
26 | 
27 | optimizer:
28 |   type: AdamW
29 |   params: 
30 |     - 
31 |       params: '^(?=.*backbone)(?!.*norm|bn).*$'
32 |       lr: 0.000001
33 |     - 
34 |       params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn)).*$'
35 |       weight_decay: 0.
36 | 
37 |   lr: 0.0001
38 |   betas: [0.9, 0.999]
39 |   weight_decay: 0.0001
40 | 
41 | 


--------------------------------------------------------------------------------
/rtdetrv2_pytorch/configs/rtdetrv2/rtdetrv2_r18vd_120e_coco.yml:
--------------------------------------------------------------------------------
 1 | __include__: [
 2 |   '../dataset/coco_detection.yml',
 3 |   '../runtime.yml',
 4 |   './include/dataloader.yml',
 5 |   './include/optimizer.yml',
 6 |   './include/rtdetrv2_r50vd.yml',
 7 | ]
 8 | 
 9 | 
10 | output_dir: ./output/rtdetrv2_r18vd_120e_coco
11 | 
12 | 
13 | PResNet:
14 |   depth: 18
15 |   freeze_at: -1
16 |   freeze_norm: False
17 |   pretrained: True
18 | 
19 | 
20 | HybridEncoder:
21 |   in_channels: [128, 256, 512]
22 |   hidden_dim: 256
23 |   expansion: 0.5
24 | 
25 | 
26 | RTDETRTransformerv2:
27 |   num_layers: 3
28 | 
29 | 
30 | epoches: 120 
31 | 
32 | optimizer:
33 |   type: AdamW
34 |   params:
35 |     - 
36 |       params: '^(?=.*(?:norm|bn)).*$'
37 |       weight_decay: 0.
38 | 
39 | 
40 | train_dataloader: 
41 |   dataset: 
42 |     transforms:
43 |       policy:
44 |         epoch: 117
45 |   collate_fn:
46 |     scales: ~


--------------------------------------------------------------------------------
/rtdetrv2_pytorch/configs/rtdetrv2/rtdetrv2_r18vd_120e_voc.yml:
--------------------------------------------------------------------------------
 1 | __include__: [
 2 |   '../dataset/voc_detection.yml',
 3 |   '../runtime.yml',
 4 |   './include/dataloader.yml',
 5 |   './include/optimizer.yml',
 6 |   './include/rtdetrv2_r50vd.yml',
 7 | ]
 8 | 
 9 | 
10 | output_dir: ./output/rtdetrv2_r18vd_120e_voc
11 | 
12 | 
13 | PResNet:
14 |   depth: 18
15 |   freeze_at: -1
16 |   freeze_norm: False
17 |   pretrained: True
18 | 
19 | 
20 | HybridEncoder:
21 |   in_channels: [128, 256, 512]
22 |   hidden_dim: 256
23 |   expansion: 0.5
24 | 
25 | 
26 | RTDETRTransformerv2:
27 |   num_layers: 3
28 | 
29 | 
30 | epoches: 120 
31 | 
32 | optimizer:
33 |   type: AdamW
34 |   params:
35 |     - 
36 |       params: '^(?=.*(?:norm|bn)).*$'
37 |       weight_decay: 0.
38 | 
39 | train_dataloader: 
40 |   dataset: 
41 |     transforms:
42 |       policy:
43 |         epoch: 117
44 |   collate_fn:
45 |     scales: ~
46 |   total_batch_size: 32
47 | 


--------------------------------------------------------------------------------
/rtdetrv2_pytorch/configs/rtdetrv2/rtdetrv2_r18vd_dsp_3x_coco.yml:
--------------------------------------------------------------------------------
 1 | __include__: [
 2 |   '../dataset/coco_detection.yml',
 3 |   '../runtime.yml',
 4 |   './include/dataloader.yml',
 5 |   './include/optimizer.yml',
 6 |   './include/rtdetrv2_r50vd.yml',
 7 | ]
 8 | 
 9 | 
10 | tuning: https://github.com/lyuwenyu/storage/releases/download/v0.1/rtdetrv2_r18vd_120e_coco.pth
11 | 
12 | output_dir: ./output/rtdetrv2_r18vd_dsp_3x_coco
13 | 
14 | PResNet:
15 |   depth: 18
16 |   freeze_at: -1
17 |   freeze_norm: False
18 |   pretrained: True
19 | 
20 | 
21 | HybridEncoder:
22 |   in_channels: [128, 256, 512]
23 |   hidden_dim: 256
24 |   expansion: 0.5
25 | 
26 | 
27 | RTDETRTransformerv2:
28 |   num_layers: 3
29 |   num_points: [4, 4, 4]
30 |   cross_attn_method: discrete
31 | 
32 | 
33 | epoches: 36
34 | 
35 | optimizer:
36 |   type: AdamW
37 |   params:
38 |     - 
39 |       params: '^(?=.*(?:norm|bn)).*$'
40 |       weight_decay: 0.
41 | 
42 | 
43 | train_dataloader: 
44 |   dataset: 
45 |     transforms:
46 |       policy:
47 |         epoch: 33
48 |   collate_fn:
49 |     scales: ~


--------------------------------------------------------------------------------
/rtdetrv2_pytorch/configs/rtdetrv2/rtdetrv2_r18vd_sp1_120e_coco.yml:
--------------------------------------------------------------------------------
 1 | __include__: [
 2 |   '../dataset/coco_detection.yml',
 3 |   '../runtime.yml',
 4 |   './include/dataloader.yml',
 5 |   './include/optimizer.yml',
 6 |   './include/rtdetrv2_r50vd.yml',
 7 | ]
 8 | 
 9 | 
10 | output_dir: ./output/rtdetrv2_r18vd_sp1_120e_coco
11 | 
12 | 
13 | PResNet:
14 |   depth: 18
15 |   freeze_at: -1
16 |   freeze_norm: False
17 |   pretrained: True
18 | 
19 | 
20 | HybridEncoder:
21 |   in_channels: [128, 256, 512]
22 |   hidden_dim: 256
23 |   expansion: 0.5
24 | 
25 | 
26 | RTDETRTransformerv2:
27 |   num_layers: 3
28 |   num_points: [1, 1, 1]
29 | 
30 | 
31 | epoches: 120 
32 | 
33 | optimizer:
34 |   type: AdamW
35 |   params:
36 |     - 
37 |       params: '^(?=.*(?:norm|bn)).*$'
38 |       weight_decay: 0.
39 | 
40 | 
41 | train_dataloader: 
42 |   dataset: 
43 |     transforms:
44 |       policy:
45 |         epoch: 117
46 |   collate_fn:
47 |     scales: ~


--------------------------------------------------------------------------------
/rtdetrv2_pytorch/configs/rtdetrv2/rtdetrv2_r18vd_sp2_120e_coco.yml:
--------------------------------------------------------------------------------
 1 | __include__: [
 2 |   '../dataset/coco_detection.yml',
 3 |   '../runtime.yml',
 4 |   './include/dataloader.yml',
 5 |   './include/optimizer.yml',
 6 |   './include/rtdetrv2_r50vd.yml',
 7 | ]
 8 | 
 9 | 
10 | output_dir: ./output/rtdetrv2_r18vd_sp2_120e_coco
11 | 
12 | 
13 | PResNet:
14 |   depth: 18
15 |   freeze_at: -1
16 |   freeze_norm: False
17 |   pretrained: True
18 | 
19 | 
20 | HybridEncoder:
21 |   in_channels: [128, 256, 512]
22 |   hidden_dim: 256
23 |   expansion: 0.5
24 | 
25 | 
26 | RTDETRTransformerv2:
27 |   num_layers: 3
28 |   num_points: [2, 2, 2]
29 | 
30 | 
31 | epoches: 120 
32 | 
33 | optimizer:
34 |   type: AdamW
35 |   params:
36 |     - 
37 |       params: '^(?=.*(?:norm|bn)).*$'
38 |       weight_decay: 0.
39 | 
40 | 
41 | train_dataloader: 
42 |   dataset: 
43 |     transforms:
44 |       policy:
45 |         epoch: 117
46 |   collate_fn:
47 |     scales: ~


--------------------------------------------------------------------------------
/rtdetrv2_pytorch/configs/rtdetrv2/rtdetrv2_r18vd_sp3_120e_coco.yml:
--------------------------------------------------------------------------------
 1 | __include__: [
 2 |   '../dataset/coco_detection.yml',
 3 |   '../runtime.yml',
 4 |   './include/dataloader.yml',
 5 |   './include/optimizer.yml',
 6 |   './include/rtdetrv2_r50vd.yml',
 7 | ]
 8 | 
 9 | 
10 | output_dir: ./output/rtdetrv2_r18vd_sp3_120e_coco
11 | 
12 | 
13 | PResNet:
14 |   depth: 18
15 |   freeze_at: -1
16 |   freeze_norm: False
17 |   pretrained: True
18 | 
19 | 
20 | HybridEncoder:
21 |   in_channels: [128, 256, 512]
22 |   hidden_dim: 256
23 |   expansion: 0.5
24 | 
25 | 
26 | RTDETRTransformerv2:
27 |   num_layers: 3
28 |   num_points: [3, 3, 3]
29 | 
30 | 
31 | epoches: 120 
32 | 
33 | optimizer:
34 |   type: AdamW
35 |   params:
36 |     - 
37 |       params: '^(?=.*(?:norm|bn)).*$'
38 |       weight_decay: 0.
39 | 
40 | 
41 | train_dataloader: 
42 |   dataset: 
43 |     transforms:
44 |       policy:
45 |         epoch: 117
46 |   collate_fn:
47 |     scales: ~


--------------------------------------------------------------------------------
/rtdetrv2_pytorch/configs/rtdetrv2/rtdetrv2_r34vd_120e_coco.yml:
--------------------------------------------------------------------------------
 1 | __include__: [
 2 |   '../dataset/coco_detection.yml',
 3 |   '../runtime.yml',
 4 |   './include/dataloader.yml',
 5 |   './include/optimizer.yml',
 6 |   './include/rtdetrv2_r50vd.yml',
 7 | ]
 8 | 
 9 | 
10 | output_dir: ./output/rtdetrv2_r34vd_120e_coco
11 | 
12 | 
13 | PResNet:
14 |   depth: 34
15 |   freeze_at: -1
16 |   freeze_norm: False
17 |   pretrained: True
18 | 
19 | 
20 | HybridEncoder:
21 |   in_channels: [128, 256, 512]
22 |   hidden_dim: 256
23 |   expansion: 0.5
24 | 
25 | 
26 | RTDETRTransformerv2:
27 |   num_layers: 4
28 | 
29 | 
30 | epoches: 120
31 | 
32 | optimizer:
33 |   type: AdamW
34 |   params: 
35 |     - 
36 |       params: '^(?=.*backbone)(?!.*norm|bn).*$'
37 |       lr: 0.00005
38 |     - 
39 |       params: '^(?=.*backbone)(?=.*norm|bn).*$'
40 |       lr: 0.00005
41 |       weight_decay: 0.
42 |     - 
43 |       params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn|bias)).*$'
44 |       weight_decay: 0.
45 | 
46 |   lr: 0.0001
47 |   betas: [0.9, 0.999]
48 |   weight_decay: 0.0001
49 | 
50 | 
51 | train_dataloader: 
52 |   dataset: 
53 |     transforms:
54 |       policy:
55 |         epoch: 117
56 |   collate_fn:
57 |     stop_epoch: 117
58 | 


--------------------------------------------------------------------------------
/rtdetrv2_pytorch/configs/rtdetrv2/rtdetrv2_r34vd_dsp_1x_coco.yml:
--------------------------------------------------------------------------------
 1 | __include__: [
 2 |   '../dataset/coco_detection.yml',
 3 |   '../runtime.yml',
 4 |   './include/dataloader.yml',
 5 |   './include/optimizer.yml',
 6 |   './include/rtdetrv2_r50vd.yml',
 7 | ]
 8 | 
 9 | tuning: https://github.com/lyuwenyu/storage/releases/download/v0.1/rtdetrv2_r34vd_120e_coco_ema.pth
10 | 
11 | output_dir: ./output/rtdetrv2_r34vd_dsp_1x_coco
12 | 
13 | 
14 | PResNet:
15 |   depth: 34
16 |   freeze_at: -1
17 |   freeze_norm: False
18 |   pretrained: True
19 | 
20 | 
21 | HybridEncoder:
22 |   in_channels: [128, 256, 512]
23 |   hidden_dim: 256
24 |   expansion: 0.5
25 | 
26 | 
27 | RTDETRTransformerv2:
28 |   num_layers: 4
29 |   cross_attn_method: discrete
30 | 
31 | 
32 | epoches: 12
33 | 
34 | optimizer:
35 |   type: AdamW
36 |   params: 
37 |     - 
38 |       params: '^(?=.*backbone)(?!.*norm|bn).*$'
39 |       lr: 0.00005
40 |     - 
41 |       params: '^(?=.*backbone)(?=.*norm|bn).*$'
42 |       lr: 0.00005
43 |       weight_decay: 0.
44 |     - 
45 |       params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn|bias)).*$'
46 |       weight_decay: 0.
47 | 
48 |   lr: 0.0001
49 |   betas: [0.9, 0.999]
50 |   weight_decay: 0.0001
51 | 
52 | 
53 | train_dataloader: 
54 |   dataset: 
55 |     transforms:
56 |       policy:
57 |         epoch: 10
58 |   collate_fn:
59 |     stop_epoch: 10
60 | 


--------------------------------------------------------------------------------
/rtdetrv2_pytorch/configs/rtdetrv2/rtdetrv2_r50vd_6x_coco.yml:
--------------------------------------------------------------------------------
 1 | __include__: [
 2 |   '../dataset/coco_detection.yml',
 3 |   '../runtime.yml',
 4 |   './include/dataloader.yml',
 5 |   './include/optimizer.yml',
 6 |   './include/rtdetrv2_r50vd.yml',
 7 | ]
 8 | 
 9 | 
10 | output_dir: ./output/rtdetrv2_r50vd_6x_coco
11 | 
12 | 
13 | 
14 | optimizer:
15 |   type: AdamW
16 |   params: 
17 |     - 
18 |       params: '^(?=.*backbone)(?!.*norm).*$'
19 |       lr: 0.00001
20 |     - 
21 |       params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn)).*$'
22 |       weight_decay: 0.
23 | 
24 |   lr: 0.0001
25 |   betas: [0.9, 0.999]
26 |   weight_decay: 0.0001
27 | 
28 | 


--------------------------------------------------------------------------------
/rtdetrv2_pytorch/configs/rtdetrv2/rtdetrv2_r50vd_dsp_1x_coco.yml:
--------------------------------------------------------------------------------
 1 | __include__: [
 2 |   '../dataset/coco_detection.yml',
 3 |   '../runtime.yml',
 4 |   './include/dataloader.yml',
 5 |   './include/optimizer.yml',
 6 |   './include/rtdetrv2_r50vd.yml',
 7 | ]
 8 | 
 9 | 
10 | tuning: https://github.com/lyuwenyu/storage/releases/download/v0.1/rtdetrv2_r50vd_6x_coco_ema.pth
11 | 
12 | output_dir: ./output/rtdetrv2_r50vd_dsp_1x_coco
13 | 
14 | 
15 | RTDETRTransformerv2:
16 |   cross_attn_method: discrete
17 | 
18 | 
19 | epoches: 12
20 | 
21 | train_dataloader: 
22 |   dataset: 
23 |     transforms:
24 |       policy:
25 |         epoch: 10
26 |   collate_fn:
27 |     stop_epoch: 10
28 | 


--------------------------------------------------------------------------------
/rtdetrv2_pytorch/configs/rtdetrv2/rtdetrv2_r50vd_m_7x_coco.yml:
--------------------------------------------------------------------------------
 1 | __include__: [
 2 |   '../dataset/coco_detection.yml',
 3 |   '../runtime.yml',
 4 |   './include/dataloader.yml',
 5 |   './include/optimizer.yml',
 6 |   './include/rtdetrv2_r50vd.yml',
 7 | ]
 8 | 
 9 | output_dir: ./output/rtdetrv2_r50vd_m_6x_coco
10 | 
11 | 
12 | HybridEncoder:
13 |   expansion: 0.5
14 | 
15 | 
16 | RTDETRTransformerv2:
17 |   eval_idx: 2 # use 3th decoder layer to eval
18 | 
19 | 
20 | epoches: 84
21 | 
22 | optimizer:
23 |   type: AdamW
24 |   params: 
25 |     - 
26 |       params: '^(?=.*backbone)(?!.*norm).*$'
27 |       lr: 0.00001
28 |     - 
29 |       params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn)).*$'
30 |       weight_decay: 0.
31 | 
32 |   lr: 0.0001
33 |   betas: [0.9, 0.999]
34 |   weight_decay: 0.0001
35 | 
36 | 
37 | train_dataloader: 
38 |   dataset: 
39 |     transforms:
40 |       policy:
41 |         epoch: 81
42 |   collate_fn:
43 |     stop_epoch: 81
44 | 


--------------------------------------------------------------------------------
/rtdetrv2_pytorch/configs/rtdetrv2/rtdetrv2_r50vd_m_dsp_3x_coco.yml:
--------------------------------------------------------------------------------
 1 | __include__: [
 2 |   '../dataset/coco_detection.yml',
 3 |   '../runtime.yml',
 4 |   './include/dataloader.yml',
 5 |   './include/optimizer.yml',
 6 |   './include/rtdetrv2_r50vd.yml',
 7 | ]
 8 | 
 9 | output_dir: ./output/rtdetrv2_r50vd_m_dsp_3x_coco
10 | tuning: https://github.com/lyuwenyu/storage/releases/download/v0.1/rtdetrv2_r50vd_m_7x_coco_ema.pth
11 | 
12 | HybridEncoder:
13 |   expansion: 0.5
14 | 
15 | 
16 | RTDETRTransformerv2:
17 |   eval_idx: 2 # use 3th decoder layer to eval
18 |   cross_attn_method: discrete
19 | 
20 | 
21 | epoches: 36
22 | 
23 | optimizer:
24 |   type: AdamW
25 |   params: 
26 |     - 
27 |       params: '^(?=.*backbone)(?!.*norm).*$'
28 |       lr: 0.00001
29 |     - 
30 |       params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn)).*$'
31 |       weight_decay: 0.
32 | 
33 |   lr: 0.0001
34 |   betas: [0.9, 0.999]
35 |   weight_decay: 0.0001
36 | 
37 | 
38 | train_dataloader: 
39 |   dataset: 
40 |     transforms:
41 |       policy:
42 |         epoch: 33
43 |   collate_fn:
44 |     stop_epoch: 33
45 | 


--------------------------------------------------------------------------------
/rtdetrv2_pytorch/configs/runtime.yml:
--------------------------------------------------------------------------------
 1 | 
 2 | print_freq: 100
 3 | output_dir: './logs'
 4 | checkpoint_freq: 1
 5 | 
 6 | 
 7 | sync_bn: True
 8 | find_unused_parameters: False
 9 | 
10 | 
11 | use_amp: False
12 | scaler:
13 |   type: GradScaler
14 |   enabled: True
15 | 
16 | 
17 | use_ema: False
18 | ema:
19 |   type: ModelEMA
20 |   decay: 0.9999
21 |   warmups: 2000
22 | 


--------------------------------------------------------------------------------
/rtdetrv2_pytorch/dataset/readme.md:
--------------------------------------------------------------------------------
1 | 
2 | 
3 | ```
4 | # configs/dataset/xxx.yml
5 | ln -s /path/to/dataset/ ./dataset/dataset_name
6 | ```
7 | 


--------------------------------------------------------------------------------
/rtdetrv2_pytorch/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | version: "3.9"
 2 | 
 3 | services:
 4 |   tensorrt-container:
 5 |     build:
 6 |       context: .
 7 |       dockerfile: Dockerfile
 8 |     image: rtdetr-v2:23.01
 9 |     volumes:
10 |       - ./:/workspace
11 |     runtime: nvidia
12 |     environment:
13 |       - NVIDIA_VISIBLE_DEVICES=all
14 |     stdin_open: true
15 |     tty: true
16 | 


--------------------------------------------------------------------------------
/rtdetrv2_pytorch/references/deploy/readme.md:
--------------------------------------------------------------------------------
1 | # Deployment
2 | 
3 | 


--------------------------------------------------------------------------------
/rtdetrv2_pytorch/references/deploy/rtdetrv2_onnxruntime.py:
--------------------------------------------------------------------------------
 1 | """Copyright(c) 2023 lyuwenyu. All Rights Reserved.
 2 | """
 3 | 
 4 | import torch
 5 | import torchvision.transforms as T
 6 | 
 7 | import numpy as np 
 8 | import onnxruntime as ort 
 9 | from PIL import Image, ImageDraw
10 | 
11 | 
12 | def draw(images, labels, boxes, scores, thrh = 0.6):
13 |     for i, im in enumerate(images):
14 |         draw = ImageDraw.Draw(im)
15 | 
16 |         scr = scores[i]
17 |         lab = labels[i][scr > thrh]
18 |         box = boxes[i][scr > thrh]
19 | 
20 |         for b in box:
21 |             draw.rectangle(list(b), outline='red',)
22 |             draw.text((b[0], b[1]), text=str(lab[i].item()), fill='blue', )
23 | 
24 |         im.save(f'results_{i}.jpg')
25 | 
26 | 
27 | def main(args, ):
28 |     """main
29 |     """
30 |     sess = ort.InferenceSession(args.onnx_file)
31 |     print(ort.get_device())
32 | 
33 |     im_pil = Image.open(args.im_file).convert('RGB')
34 |     w, h = im_pil.size
35 |     orig_size = torch.tensor([w, h])[None]
36 | 
37 |     transforms = T.Compose([
38 |         T.Resize((640, 640)),
39 |         T.ToTensor(),
40 |     ])
41 |     im_data = transforms(im_pil)[None]
42 | 
43 |     output = sess.run(
44 |         # output_names=['labels', 'boxes', 'scores'],
45 |         output_names=None,
46 |         input_feed={'images': im_data.data.numpy(), "orig_target_sizes": orig_size.data.numpy()}
47 |     )
48 | 
49 |     labels, boxes, scores = output
50 | 
51 |     draw([im_pil], labels, boxes, scores)
52 | 
53 | 
54 | if __name__ == '__main__':
55 |     import argparse
56 |     parser = argparse.ArgumentParser()
57 |     parser.add_argument('--onnx-file', type=str, )
58 |     parser.add_argument('--im-file', type=str, )
59 |     # parser.add_argument('-d', '--device', type=str, default='cpu')
60 |     args = parser.parse_args()
61 |     main(args)
62 | 


--------------------------------------------------------------------------------
/rtdetrv2_pytorch/references/deploy/rtdetrv2_openvino.py:
--------------------------------------------------------------------------------
1 | """Copyright(c) 2023 lyuwenyu. All Rights Reserved.
2 | """
3 | 
4 | 
5 | # please reference: https://github.com/guojin-yan/RT-DETR-OpenVINO


--------------------------------------------------------------------------------
/rtdetrv2_pytorch/references/deploy/rtdetrv2_torch.py:
--------------------------------------------------------------------------------
 1 | """Copyright(c) 2023 lyuwenyu. All Rights Reserved.
 2 | """
 3 | 
 4 | import torch
 5 | import torch.nn as nn 
 6 | import torchvision.transforms as T
 7 | 
 8 | import numpy as np 
 9 | from PIL import Image, ImageDraw
10 | 
11 | from src.core import YAMLConfig
12 | 
13 | 
14 | def draw(images, labels, boxes, scores, thrh = 0.6):
15 |     for i, im in enumerate(images):
16 |         draw = ImageDraw.Draw(im)
17 | 
18 |         scr = scores[i]
19 |         lab = labels[i][scr > thrh]
20 |         box = boxes[i][scr > thrh]
21 |         scrs = scores[i][scr > thrh]
22 | 
23 |         for j,b in enumerate(box):
24 |             draw.rectangle(list(b), outline='red',)
25 |             draw.text((b[0], b[1]), text=f"{lab[j].item()} {round(scrs[j].item(),2)}", fill='blue', )
26 | 
27 |         im.save(f'results_{i}.jpg')
28 | 
29 | 
30 | def main(args, ):
31 |     """main
32 |     """
33 |     cfg = YAMLConfig(args.config, resume=args.resume)
34 | 
35 |     if args.resume:
36 |         checkpoint = torch.load(args.resume, map_location='cpu') 
37 |         if 'ema' in checkpoint:
38 |             state = checkpoint['ema']['module']
39 |         else:
40 |             state = checkpoint['model']
41 |     else:
42 |         raise AttributeError('Only support resume to load model.state_dict by now.')
43 | 
44 |     # NOTE load train mode state -> convert to deploy mode
45 |     cfg.model.load_state_dict(state)
46 | 
47 |     class Model(nn.Module):
48 |         def __init__(self, ) -> None:
49 |             super().__init__()
50 |             self.model = cfg.model.deploy()
51 |             self.postprocessor = cfg.postprocessor.deploy()
52 |             
53 |         def forward(self, images, orig_target_sizes):
54 |             outputs = self.model(images)
55 |             outputs = self.postprocessor(outputs, orig_target_sizes)
56 |             return outputs
57 | 
58 |     model = Model().to(args.device)
59 | 
60 |     im_pil = Image.open(args.im_file).convert('RGB')
61 |     w, h = im_pil.size
62 |     orig_size = torch.tensor([w, h])[None].to(args.device)
63 | 
64 |     transforms = T.Compose([
65 |         T.Resize((640, 640)),
66 |         T.ToTensor(),
67 |     ])
68 |     im_data = transforms(im_pil)[None].to(args.device)
69 | 
70 |     output = model(im_data, orig_size)
71 |     labels, boxes, scores = output
72 | 
73 |     draw([im_pil], labels, boxes, scores)
74 | 
75 | 
76 | if __name__ == '__main__':
77 |     import argparse
78 |     parser = argparse.ArgumentParser()
79 |     parser.add_argument('-c', '--config', type=str, )
80 |     parser.add_argument('-r', '--resume', type=str, )
81 |     parser.add_argument('-f', '--im-file', type=str, )
82 |     parser.add_argument('-d', '--device', type=str, default='cpu')
83 |     args = parser.parse_args()
84 |     main(args)
85 | 


--------------------------------------------------------------------------------
/rtdetrv2_pytorch/requirements.txt:
--------------------------------------------------------------------------------
 1 | torch>=2.0.1
 2 | torchvision>=0.15.2
 3 | faster-coco-eval>=1.6.5
 4 | PyYAML
 5 | tensorboard
 6 | scipy
 7 | pycocotools
 8 | onnx
 9 | onnxruntime-gpu
10 | tensorrt==8.5.2.2


--------------------------------------------------------------------------------
/rtdetrv2_pytorch/src/__init__.py:
--------------------------------------------------------------------------------
1 | """Copyright(c) 2023 lyuwenyu. All Rights Reserved.
2 | """
3 | 
4 | # for register purpose
5 | from . import optim
6 | from . import data 
7 | from . import nn
8 | from . import zoo


--------------------------------------------------------------------------------
/rtdetrv2_pytorch/src/core/__init__.py:
--------------------------------------------------------------------------------
1 | """Copyright(c) 2023 lyuwenyu. All Rights Reserved.
2 | """
3 | 
4 | from .workspace import GLOBAL_CONFIG, register, create
5 | from .yaml_utils import *
6 | from ._config import BaseConfig
7 | from .yaml_config import YAMLConfig
8 | 


--------------------------------------------------------------------------------
/rtdetrv2_pytorch/src/data/__init__.py:
--------------------------------------------------------------------------------
 1 | """Copyright(c) 2023 lyuwenyu. All Rights Reserved.
 2 | """
 3 | 
 4 | from .dataset import *
 5 | from .transforms import *
 6 | from .dataloader import *
 7 | 
 8 | from ._misc import convert_to_tv_tensor
 9 | 
10 | 
11 | 
12 | 
13 | # def set_epoch(self, epoch) -> None:
14 | #     self.epoch = epoch 
15 | # def _set_epoch_func(datasets):
16 | #     """Add `set_epoch` for datasets
17 | #     """
18 | #     from ..core import register
19 | #     for ds in datasets:
20 | #         register(ds)(set_epoch)
21 | # _set_epoch_func([CIFAR10, VOCDetection, CocoDetection])


--------------------------------------------------------------------------------
/rtdetrv2_pytorch/src/data/_misc.py:
--------------------------------------------------------------------------------
 1 | """Copyright(c) 2023 lyuwenyu. All Rights Reserved.
 2 | """
 3 | 
 4 | import importlib.metadata
 5 | from torch import Tensor 
 6 | 
 7 | if importlib.metadata.version('torchvision') == '0.15.2':
 8 |     import torchvision
 9 |     torchvision.disable_beta_transforms_warning()
10 | 
11 |     from torchvision.datapoints import BoundingBox as BoundingBoxes
12 |     from torchvision.datapoints import BoundingBoxFormat, Mask, Image, Video
13 |     from torchvision.transforms.v2 import SanitizeBoundingBox as SanitizeBoundingBoxes
14 |     _boxes_keys = ['format', 'spatial_size']
15 | 
16 | elif '0.17' > importlib.metadata.version('torchvision') >= '0.16':
17 |     import torchvision
18 |     torchvision.disable_beta_transforms_warning()
19 | 
20 |     from torchvision.transforms.v2 import SanitizeBoundingBoxes
21 |     from torchvision.tv_tensors import (
22 |         BoundingBoxes, BoundingBoxFormat, Mask, Image, Video)
23 |     _boxes_keys = ['format', 'canvas_size']
24 | 
25 | elif importlib.metadata.version('torchvision') >= '0.17':
26 |     import torchvision
27 |     from torchvision.transforms.v2 import SanitizeBoundingBoxes
28 |     from torchvision.tv_tensors import (
29 |         BoundingBoxes, BoundingBoxFormat, Mask, Image, Video)
30 |     _boxes_keys = ['format', 'canvas_size']
31 | 
32 | else:
33 |     raise RuntimeError('Please make sure torchvision version >= 0.15.2')
34 | 
35 | 
36 | 
37 | def convert_to_tv_tensor(tensor: Tensor, key: str, box_format='xyxy', spatial_size=None) -> Tensor:
38 |     """
39 |     Args:
40 |         tensor (Tensor): input tensor
41 |         key (str): transform to key
42 | 
43 |     Return:
44 |         Dict[str, TV_Tensor]
45 |     """
46 |     assert key in ('boxes', 'masks', ), "Only support 'boxes' and 'masks'"
47 |     
48 |     if key == 'boxes':
49 |         box_format = getattr(BoundingBoxFormat, box_format.upper())
50 |         _kwargs = dict(zip(_boxes_keys, [box_format, spatial_size]))
51 |         return BoundingBoxes(tensor, **_kwargs)
52 | 
53 |     if key == 'masks':
54 |        return Mask(tensor)
55 | 
56 | 


--------------------------------------------------------------------------------
/rtdetrv2_pytorch/src/data/dataset/__init__.py:
--------------------------------------------------------------------------------
 1 | """Copyright(c) 2023 lyuwenyu. All Rights Reserved.
 2 | """
 3 | 
 4 | # from ._dataset import DetDataset
 5 | from .cifar_dataset import CIFAR10
 6 | from .coco_dataset import CocoDetection
 7 | from .coco_dataset import (
 8 |     CocoDetection, 
 9 |     mscoco_category2name, 
10 |     mscoco_category2label,
11 |     mscoco_label2category,
12 | )
13 | from .coco_eval import CocoEvaluator
14 | from .coco_utils import get_coco_api_from_dataset
15 | from .voc_detection import VOCDetection
16 | from .voc_eval import VOCEvaluator
17 | 


--------------------------------------------------------------------------------
/rtdetrv2_pytorch/src/data/dataset/_dataset.py:
--------------------------------------------------------------------------------
 1 | """Copyright(c) 2023 lyuwenyu. All Rights Reserved.
 2 | """
 3 | 
 4 | import torch 
 5 | import torch.utils.data as data
 6 | 
 7 | class DetDataset(data.Dataset):
 8 |     def __getitem__(self, index):
 9 |         img, target = self.load_item(index)
10 |         if self.transforms is not None:
11 |             img, target, _ = self.transforms(img, target, self)
12 |         return img, target
13 | 
14 |     def load_item(self, index):
15 |         raise NotImplementedError("Please implement this function to return item before `transforms`.")
16 | 
17 |     def set_epoch(self, epoch) -> None:
18 |         self._epoch = epoch 
19 | 
20 |     @property
21 |     def epoch(self):
22 |         return self._epoch if hasattr(self, '_epoch') else -1
23 | 


--------------------------------------------------------------------------------
/rtdetrv2_pytorch/src/data/dataset/cifar_dataset.py:
--------------------------------------------------------------------------------
 1 | """Copyright(c) 2023 lyuwenyu. All Rights Reserved.
 2 | """
 3 | 
 4 | 
 5 | import torchvision
 6 | from typing import Optional, Callable
 7 | 
 8 | from ...core import register
 9 | 
10 | @register()
11 | class CIFAR10(torchvision.datasets.CIFAR10):
12 |     __inject__ = ['transform', 'target_transform']
13 |     
14 |     def __init__(self, root: str, train: bool = True, transform: Optional[Callable] = None, target_transform: Optional[Callable] = None, download: bool = False) -> None:
15 |         super().__init__(root, train, transform, target_transform, download)
16 | 
17 | 


--------------------------------------------------------------------------------
/rtdetrv2_pytorch/src/data/dataset/voc_detection.py:
--------------------------------------------------------------------------------
 1 | """Copyright(c) 2023 lyuwenyu. All Rights Reserved.
 2 | """
 3 | 
 4 | from sympy import im
 5 | import torch
 6 | import torchvision
 7 | import torchvision.transforms.functional as TVF 
 8 | 
 9 | import os
10 | from PIL import Image
11 | from typing import Optional, Callable
12 | 
13 | try:
14 |     from defusedxml.ElementTree import parse as ET_parse
15 | except ImportError:
16 |     from xml.etree.ElementTree import parse as ET_parse
17 | 
18 | from ._dataset import DetDataset
19 | from .._misc import convert_to_tv_tensor
20 | from ...core import register
21 | 
22 | @register()
23 | class VOCDetection(torchvision.datasets.VOCDetection, DetDataset):
24 |     __inject__ = ['transforms', ]
25 | 
26 |     def __init__(self, root: str, ann_file: str = "trainval.txt", label_file: str = "label_list.txt", transforms: Optional[Callable] = None):
27 | 
28 |         with open(os.path.join(root, ann_file), 'r') as f:
29 |             lines = [x.strip() for x in f.readlines()]
30 |             lines = [x.split(' ') for x in lines]
31 | 
32 |         self.images = [os.path.join(root, lin[0]) for lin in lines]
33 |         self.targets = [os.path.join(root, lin[1]) for lin in lines]
34 |         assert len(self.images) == len(self.targets)
35 | 
36 |         with open(os.path.join(root + label_file), 'r') as f:
37 |             labels = f.readlines()
38 |             labels = [lab.strip() for lab in labels]
39 | 
40 |         self.transforms = transforms
41 |         self.labels_map = {lab: i for i, lab in enumerate(labels)}
42 |         
43 |     def __getitem__(self, index: int):
44 |         image, target = self.load_item(index)
45 |         if self.transforms is not None:
46 |             image, target, _ = self.transforms(image, target, self)        
47 |         # target["orig_size"] = torch.tensor(TVF.get_image_size(image))
48 |         return image, target
49 | 
50 |     def load_item(self, index: int):
51 |         image = Image.open(self.images[index]).convert("RGB")
52 |         target = self.parse_voc_xml(ET_parse(self.annotations[index]).getroot())
53 |         
54 |         output = {}
55 |         output["image_id"] = torch.tensor([index])
56 |         for k in ['area', 'boxes', 'labels', 'iscrowd']:
57 |             output[k] = []
58 |             
59 |         for blob in target['annotation']['object']:
60 |             box = [float(v) for v in blob['bndbox'].values()]
61 |             output["boxes"].append(box)
62 |             output["labels"].append(blob['name'])
63 |             output["area"].append((box[2] - box[0]) * (box[3] - box[1]))
64 |             output["iscrowd"].append(0)
65 | 
66 |         w, h = image.size
67 |         boxes = torch.tensor(output["boxes"]) if len(output["boxes"]) > 0 else torch.zeros(0, 4)
68 |         output['boxes'] = convert_to_tv_tensor(boxes, 'boxes', box_format='xyxy', spatial_size=[h, w])
69 |         output['labels'] = torch.tensor([self.labels_map[lab] for lab in output["labels"]])
70 |         output['area'] = torch.tensor(output['area'])
71 |         output["iscrowd"] = torch.tensor(output["iscrowd"])
72 |         output["orig_size"] = torch.tensor([w, h])
73 |         
74 |         return image, output
75 |     
76 | 


--------------------------------------------------------------------------------
/rtdetrv2_pytorch/src/data/dataset/voc_eval.py:
--------------------------------------------------------------------------------
 1 | """Copyright(c) 2023 lyuwenyu. All Rights Reserved.
 2 | """
 3 | 
 4 | import torch
 5 | import torchvision
 6 | 
 7 | 
 8 | class VOCEvaluator(object):
 9 |     def __init__(self) -> None:
10 |         pass


--------------------------------------------------------------------------------
/rtdetrv2_pytorch/src/data/transforms/__init__.py:
--------------------------------------------------------------------------------
 1 | """"Copyright(c) 2023 lyuwenyu. All Rights Reserved.
 2 | """
 3 | 
 4 | 
 5 | from ._transforms import (
 6 |     EmptyTransform,
 7 |     RandomPhotometricDistort,
 8 |     RandomZoomOut,
 9 |     RandomIoUCrop,
10 |     RandomHorizontalFlip,
11 |     Resize,
12 |     PadToSize,
13 |     SanitizeBoundingBoxes,
14 |     RandomCrop,
15 |     Normalize,
16 |     ConvertBoxes,
17 |     ConvertPILImage,
18 | )
19 | from .container import Compose
20 | from .mosaic import Mosaic
21 | 


--------------------------------------------------------------------------------
/rtdetrv2_pytorch/src/data/transforms/container.py:
--------------------------------------------------------------------------------
 1 | """"Copyright(c) 2023 lyuwenyu. All Rights Reserved.
 2 | """
 3 | 
 4 | import torch 
 5 | import torch.nn as nn 
 6 | 
 7 | import torchvision
 8 | torchvision.disable_beta_transforms_warning()
 9 | import torchvision.transforms.v2 as T
10 | 
11 | from typing import Any, Dict, List, Optional
12 | 
13 | from ._transforms import EmptyTransform
14 | from ...core import register, GLOBAL_CONFIG
15 | 
16 | 
17 | @register()
18 | class Compose(T.Compose):
19 |     def __init__(self, ops, policy=None) -> None:
20 |         transforms = []
21 |         if ops is not None:
22 |             for op in ops:
23 |                 if isinstance(op, dict):
24 |                     name = op.pop('type')
25 |                     transfom = getattr(GLOBAL_CONFIG[name]['_pymodule'], GLOBAL_CONFIG[name]['_name'])(**op)
26 |                     transforms.append(transfom)
27 |                     op['type'] = name
28 | 
29 |                 elif isinstance(op, nn.Module):
30 |                     transforms.append(op)
31 | 
32 |                 else:
33 |                     raise ValueError('')
34 |         else:
35 |             transforms =[EmptyTransform(), ]
36 |  
37 |         super().__init__(transforms=transforms)
38 | 
39 |         if policy is None:
40 |             policy = {'name': 'default'}
41 | 
42 |         self.policy = policy
43 |         self.global_samples = 0
44 | 
45 |     def forward(self, *inputs: Any) -> Any:
46 |         return self.get_forward(self.policy['name'])(*inputs)
47 | 
48 |     def get_forward(self, name):
49 |         forwards = {
50 |             'default': self.default_forward,
51 |             'stop_epoch': self.stop_epoch_forward,
52 |             'stop_sample': self.stop_sample_forward,
53 |         }
54 |         return forwards[name]
55 | 
56 |     def default_forward(self, *inputs: Any) -> Any:
57 |         sample = inputs if len(inputs) > 1 else inputs[0]
58 |         for transform in self.transforms:
59 |             sample = transform(sample)
60 |         return sample
61 | 
62 |     def stop_epoch_forward(self, *inputs: Any):
63 |         sample = inputs if len(inputs) > 1 else inputs[0]
64 |         dataset = sample[-1]
65 |         
66 |         cur_epoch = dataset.epoch
67 |         policy_ops = self.policy['ops']
68 |         policy_epoch = self.policy['epoch']
69 | 
70 |         for transform in self.transforms:
71 |             if type(transform).__name__ in policy_ops and cur_epoch >= policy_epoch:
72 |                 pass
73 |             else:
74 |                 sample = transform(sample)
75 | 
76 |         return sample
77 | 
78 | 
79 |     def stop_sample_forward(self, *inputs: Any):
80 |         sample = inputs if len(inputs) > 1 else inputs[0]
81 |         dataset = sample[-1]
82 |         
83 |         cur_epoch = dataset.epoch
84 |         policy_ops = self.policy['ops']
85 |         policy_sample = self.policy['sample']
86 | 
87 |         for transform in self.transforms:
88 |             if type(transform).__name__ in policy_ops and self.global_samples >= policy_sample:
89 |                 pass
90 |             else:
91 |                 sample = transform(sample)
92 | 
93 |         self.global_samples += 1
94 | 
95 |         return sample
96 | 


--------------------------------------------------------------------------------
/rtdetrv2_pytorch/src/data/transforms/mosaic.py:
--------------------------------------------------------------------------------
 1 | """"Copyright(c) 2023 lyuwenyu. All Rights Reserved.
 2 | """
 3 | 
 4 | import torch 
 5 | import torchvision
 6 | torchvision.disable_beta_transforms_warning()
 7 | import torchvision.transforms.v2 as T
 8 | import torchvision.transforms.v2.functional as F
 9 | 
10 | import random
11 | from PIL import Image 
12 | 
13 | from .._misc import convert_to_tv_tensor
14 | from ...core import register
15 | 
16 | 
17 | @register()
18 | class Mosaic(T.Transform):
19 |     def __init__(self, size, max_size=None, ) -> None:
20 |         super().__init__()
21 |         self.resize = T.Resize(size=size, max_size=max_size)
22 |         self.crop = T.RandomCrop(size=max_size if max_size else size)
23 |         
24 |         # TODO add arg `output_size` for affine`
25 |         # self.random_perspective = T.RandomPerspective(distortion_scale=0.5, p=1., )
26 |         self.random_affine = T.RandomAffine(degrees=0, translate=(0.1, 0.1), scale=(0.5, 1.5), fill=114)
27 | 
28 |     def forward(self, *inputs):
29 |         inputs = inputs if len(inputs) > 1 else inputs[0]
30 |         image, target, dataset = inputs
31 | 
32 |         images = []
33 |         targets = []
34 |         indices = random.choices(range(len(dataset)), k=3)
35 |         for i in indices:
36 |             image, target = dataset.load_item(i)
37 |             image, target = self.resize(image, target)
38 |             images.append(image)
39 |             targets.append(target)
40 | 
41 |         h, w = F.get_spatial_size(images[0])
42 |         offset = [[0, 0], [w, 0], [0, h], [w, h]]
43 |         image = Image.new(mode=images[0].mode, size=(w * 2, h * 2), color=0)
44 |         for i, im in enumerate(images):
45 |             image.paste(im, offset[i])
46 | 
47 |         offset = torch.tensor([[0, 0], [w, 0], [0, h], [w, h]]).repeat(1, 2)
48 |         target = {}
49 |         for k in targets[0]:
50 |             if k == 'boxes':
51 |                 v = [t[k] + offset[i] for i, t in enumerate(targets)]
52 |             else: 
53 |                 v = [t[k] for t in targets]
54 |             
55 |             if isinstance(v[0], torch.Tensor):
56 |                 v = torch.cat(v, dim=0)
57 | 
58 |             target[k] = v
59 | 
60 |         if 'boxes' in target:
61 |             # target['boxes'] = target['boxes'].clamp(0, 640 * 2 - 1)
62 |             w, h = image.size
63 |             target['boxes'] = convert_to_tv_tensor(target['boxes'], 'boxes', box_format='xyxy', spatial_size=[h, w])
64 |         
65 |         if 'masks' in target:
66 |             target['masks'] = convert_to_tv_tensor(target['masks'], 'masks')
67 | 
68 |         image, target = self.random_affine(image, target)
69 |         # image, target = self.resize(image, target)
70 |         image, target = self.crop(image, target)
71 | 
72 |         return image, target, dataset
73 | 


--------------------------------------------------------------------------------
/rtdetrv2_pytorch/src/data/transforms/presets.py:
--------------------------------------------------------------------------------
1 | """"Copyright(c) 2023 lyuwenyu. All Rights Reserved.
2 | """
3 | 


--------------------------------------------------------------------------------
/rtdetrv2_pytorch/src/misc/__init__.py:
--------------------------------------------------------------------------------
1 | """Copyright(c) 2023 lyuwenyu. All Rights Reserved.
2 | """
3 | 
4 | from .logger import *
5 | from .visualizer import *
6 | from .dist_utils import setup_seed, setup_print
7 | from .profiler_utils import stats
8 | 


--------------------------------------------------------------------------------
/rtdetrv2_pytorch/src/misc/lazy_loader.py:
--------------------------------------------------------------------------------
 1 | """
 2 | https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/util/lazy_loader.py
 3 | """
 4 | 
 5 | 
 6 | import types
 7 | import importlib
 8 | 
 9 | class LazyLoader(types.ModuleType):
10 |   """Lazily import a module, mainly to avoid pulling in large dependencies.
11 | 
12 |   `paddle`, and `ffmpeg` are examples of modules that are large and not always
13 |   needed, and this allows them to only be loaded when they are used.
14 |   """
15 | 
16 |   # The lint error here is incorrect.
17 |   def __init__(self, local_name, parent_module_globals, name, warning=None):
18 |     self._local_name = local_name
19 |     self._parent_module_globals = parent_module_globals
20 |     self._warning = warning
21 | 
22 |     # These members allows doctest correctly process this module member without
23 |     # triggering self._load(). self._load() mutates parant_module_globals and
24 |     # triggers a dict mutated during iteration error from doctest.py.
25 |     # - for from_module()
26 |     self.__module__ = name.rsplit(".", 1)[0]
27 |     # - for is_routine()
28 |     self.__wrapped__ = None
29 | 
30 |     super(LazyLoader, self).__init__(name)
31 | 
32 |   def _load(self):
33 |     """Load the module and insert it into the parent's globals."""
34 |     # Import the target module and insert it into the parent's namespace
35 |     module = importlib.import_module(self.__name__)
36 |     self._parent_module_globals[self._local_name] = module
37 | 
38 |     # Emit a warning if one was specified
39 |     if self._warning:
40 |       # logging.warning(self._warning)
41 |       # Make sure to only warn once.
42 |       self._warning = None
43 | 
44 |     # Update this object's dict so that if someone keeps a reference to the
45 |     #   LazyLoader, lookups are efficient (__getattr__ is only called on lookups
46 |     #   that fail).
47 |     self.__dict__.update(module.__dict__)
48 | 
49 |     return module
50 | 
51 |   def __getattr__(self, item):
52 |     module = self._load()
53 |     return getattr(module, item)
54 | 
55 |   def __repr__(self):
56 |     # Carefully to not trigger _load, since repr may be called in very
57 |     # sensitive places.
58 |     return f"<LazyLoader {self.__name__} as {self._local_name}>"
59 | 
60 |   def __dir__(self):
61 |     module = self._load()
62 |     return dir(module)
63 | 
64 | 
65 | # import paddle.nn as nn
66 | # nn = LazyLoader("nn", globals(), "paddle.nn")
67 | 
68 | # class M(nn.Layer):
69 | #     def __init__(self) -> None:
70 | #       super().__init__()
71 | 


--------------------------------------------------------------------------------
/rtdetrv2_pytorch/src/misc/profiler_utils.py:
--------------------------------------------------------------------------------
 1 | """Copyright(c) 2023 lyuwenyu. All Rights Reserved.
 2 | """
 3 | 
 4 | import re
 5 | import torch
 6 | import torch.nn as nn
 7 | from torch import Tensor 
 8 | 
 9 | from typing import List
10 | 
11 | def stats(
12 |     model: nn.Module, 
13 |     data: Tensor=None, 
14 |     input_shape: List=[1, 3, 640, 640], 
15 |     device: str='cpu', 
16 |     verbose=False) -> str:
17 |     
18 |     is_training = model.training
19 | 
20 |     model.train()
21 |     num_params = sum([p.numel() for p in model.parameters() if p.requires_grad])
22 | 
23 |     model.eval()
24 |     model = model.to(device)
25 | 
26 |     if data is None:
27 |         data = torch.rand(*input_shape, device=device)
28 |         
29 |     def trace_handler(prof):
30 |         print(prof.key_averages().table(
31 |             sort_by="self_cuda_time_total", row_limit=-1))
32 | 
33 |     num_active = 2
34 |     with torch.profiler.profile(
35 |         activities=[
36 |             torch.profiler.ProfilerActivity.CPU,
37 |             torch.profiler.ProfilerActivity.CUDA,
38 |         ],
39 |         schedule=torch.profiler.schedule(
40 |             wait=1,
41 |             warmup=1,
42 |             active=num_active,
43 |             repeat=1
44 |         ),
45 |         # on_trace_ready=trace_handler,
46 |         # on_trace_ready=torch.profiler.tensorboard_trace_handler('./log')
47 |         # with_modules=True,
48 |         with_flops=True,
49 |     ) as p:
50 |         for _ in range(5):
51 |             _ = model(data)
52 |             p.step()
53 | 
54 |     if is_training:
55 |         model.train()
56 |     
57 |     info = p.key_averages().table(sort_by="self_cuda_time_total", row_limit=-1)
58 |     num_flops = sum([float(v.strip()) for v in re.findall('(\d+.?\d+ *\n)', info)]) / num_active
59 | 
60 |     if verbose:
61 |         # print(info)
62 |         print(f'Total number of trainable parameters: {num_params}')
63 |         print(f'Total number of flops: {int(num_flops)}M with {input_shape}')
64 | 
65 |     return {'n_parameters': num_params, 'n_flops': num_flops, 'info': info}
66 | 


--------------------------------------------------------------------------------
/rtdetrv2_pytorch/src/misc/visualizer.py:
--------------------------------------------------------------------------------
 1 | """"Copyright(c) 2023 lyuwenyu. All Rights Reserved.
 2 | """
 3 | 
 4 | import torch
 5 | import torch.utils.data
 6 | 
 7 | import torchvision
 8 | torchvision.disable_beta_transforms_warning()
 9 | 
10 | import PIL 
11 | 
12 | __all__ = ['show_sample']
13 | 
14 | def show_sample(sample):
15 |     """for coco dataset/dataloader
16 |     """
17 |     import matplotlib.pyplot as plt
18 |     from torchvision.transforms.v2 import functional as F
19 |     from torchvision.utils import draw_bounding_boxes
20 | 
21 |     image, target = sample
22 |     if isinstance(image, PIL.Image.Image):
23 |         image = F.to_image_tensor(image)
24 | 
25 |     image = F.convert_dtype(image, torch.uint8)
26 |     annotated_image = draw_bounding_boxes(image, target["boxes"], colors="yellow", width=3)
27 | 
28 |     fig, ax = plt.subplots()
29 |     ax.imshow(annotated_image.permute(1, 2, 0).numpy())
30 |     ax.set(xticklabels=[], yticklabels=[], xticks=[], yticks=[])
31 |     fig.tight_layout()
32 |     fig.show()
33 |     plt.show()
34 | 
35 | 


--------------------------------------------------------------------------------
/rtdetrv2_pytorch/src/nn/__init__.py:
--------------------------------------------------------------------------------
 1 | """Copyright(c) 2023 lyuwenyu. All Rights Reserved.
 2 | """
 3 | 
 4 | 
 5 | from .arch import *
 6 | from .criterion import *
 7 | from .postprocessor import *
 8 | 
 9 | # 
10 | from .backbone import *
11 | 
12 | 
13 | from .backbone import (
14 |     get_activation, 
15 |     FrozenBatchNorm2d,
16 |     freeze_batch_norm2d,
17 | )


--------------------------------------------------------------------------------
/rtdetrv2_pytorch/src/nn/arch/__init__.py:
--------------------------------------------------------------------------------
1 | """Copyright(c) 2023 lyuwenyu. All Rights Reserved.
2 | """
3 | 
4 | 
5 | from .classification import Classification, ClassHead
6 | from .yolo import YOLO


--------------------------------------------------------------------------------
/rtdetrv2_pytorch/src/nn/arch/classification.py:
--------------------------------------------------------------------------------
 1 | """Copyright(c) 2023 lyuwenyu. All Rights Reserved.
 2 | """
 3 | 
 4 | 
 5 | import torch 
 6 | import torch.nn as nn
 7 | 
 8 | from ...core import register
 9 | 
10 | 
11 | __all__ = ['Classification', 'ClassHead']
12 | 
13 | 
14 | @register()
15 | class Classification(torch.nn.Module):
16 |     __inject__ = ['backbone', 'head']
17 | 
18 |     def __init__(self, backbone: nn.Module, head: nn.Module=None):
19 |         super().__init__()
20 |         
21 |         self.backbone = backbone
22 |         self.head = head
23 | 
24 |     def forward(self, x):
25 |         x = self.backbone(x)
26 | 
27 |         if self.head is not None:
28 |             x = self.head(x)
29 | 
30 |         return x 
31 | 
32 | 
33 | @register()
34 | class ClassHead(nn.Module):
35 |     def __init__(self, hidden_dim, num_classes):
36 |         super().__init__()
37 |         self.pool = nn.AdaptiveAvgPool2d(1)
38 |         self.proj = nn.Linear(hidden_dim, num_classes)  
39 | 
40 |     def forward(self, x):
41 |         x = x[0] if isinstance(x, (list, tuple)) else x 
42 |         x = self.pool(x)
43 |         x = x.reshape(x.shape[0], -1)
44 |         x = self.proj(x)
45 |         return x 
46 | 


--------------------------------------------------------------------------------
/rtdetrv2_pytorch/src/nn/arch/yolo.py:
--------------------------------------------------------------------------------
 1 | """Copyright(c) 2023 lyuwenyu. All Rights Reserved.
 2 | """
 3 | 
 4 | import torch
 5 | 
 6 | from ...core import register
 7 | 
 8 | 
 9 | __all__ = ['YOLO', ]
10 | 
11 | 
12 | @register()
13 | class YOLO(torch.nn.Module):
14 |     __inject__ = ['backbone', 'neck', 'head', ]
15 | 
16 |     def __init__(self, backbone: torch.nn.Module, neck, head):
17 |         super().__init__()
18 |         self.backbone = backbone
19 |         self.neck = neck
20 |         self.head = head
21 | 
22 |     def forward(self, x, **kwargs):           
23 |         x = self.backbone(x)
24 |         x = self.neck(x)        
25 |         x = self.head(x)
26 |         return x
27 |     
28 |     def deploy(self, ):
29 |         self.eval()
30 |         for m in self.modules():
31 |             if m is not self and hasattr(m, 'deploy'):
32 |                 m.deploy()
33 |         return self 
34 | 


--------------------------------------------------------------------------------
/rtdetrv2_pytorch/src/nn/backbone/__init__.py:
--------------------------------------------------------------------------------
 1 | """Copyright(c) 2023 lyuwenyu. All Rights Reserved.
 2 | """
 3 | 
 4 | from .common import (
 5 |     get_activation, 
 6 |     FrozenBatchNorm2d,
 7 |     freeze_batch_norm2d,
 8 | )
 9 | from .presnet import PResNet
10 | from .test_resnet import MResNet
11 | 
12 | from .timm_model import TimmModel
13 | from .torchvision_model import TorchVisionModel
14 | 
15 | from .csp_resnet import CSPResNet
16 | from .csp_darknet import CSPDarkNet, CSPPAN
17 | 
18 | from .hgnetv2 import HGNetv2


--------------------------------------------------------------------------------
/rtdetrv2_pytorch/src/nn/backbone/test_resnet.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn 
 3 | import torch.nn.functional as F 
 4 | 
 5 | from collections import OrderedDict
 6 | 
 7 | 
 8 | from ...core import register
 9 | 
10 | 
11 | class BasicBlock(nn.Module):
12 |     expansion = 1
13 | 
14 |     def __init__(self, in_planes, planes, stride=1):
15 |         super(BasicBlock, self).__init__()
16 | 
17 |         self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
18 |         self.bn1 = nn.BatchNorm2d(planes)
19 | 
20 |         self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,stride=1, padding=1, bias=False)
21 |         self.bn2 = nn.BatchNorm2d(planes)
22 | 
23 |         self.shortcut = nn.Sequential()         
24 |         if stride != 1 or in_planes != self.expansion*planes:
25 |             self.shortcut = nn.Sequential(
26 |                 nn.Conv2d(in_planes, self.expansion*planes,kernel_size=1, stride=stride, bias=False),
27 |                 nn.BatchNorm2d(self.expansion*planes)
28 |             )
29 |     def forward(self, x):
30 |         out = F.relu(self.bn1(self.conv1(x)))
31 |         out = self.bn2(self.conv2(out))       
32 |         out += self.shortcut(x)          
33 |         out = F.relu(out)
34 |         return out
35 | 
36 | 
37 | 
38 | class _ResNet(nn.Module):
39 |     def __init__(self, block, num_blocks, num_classes=10):
40 |         super().__init__()
41 |         self.in_planes = 64
42 | 
43 |         self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
44 |         self.bn1 = nn.BatchNorm2d(64)
45 |         
46 |         self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
47 |         self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
48 |         self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
49 |         self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
50 |         
51 |         self.linear = nn.Linear(512 * block.expansion, num_classes)
52 | 
53 |     def _make_layer(self, block, planes, num_blocks, stride):
54 |         strides = [stride] + [1]*(num_blocks-1)
55 |         layers = []
56 |         for stride in strides:
57 |             layers.append(block(self.in_planes, planes, stride))
58 |             self.in_planes = planes * block.expansion 
59 |         return nn.Sequential(*layers)
60 |         
61 |     def forward(self, x):
62 |         out = F.relu(self.bn1(self.conv1(x)))
63 |         out = self.layer1(out)
64 |         out = self.layer2(out)
65 |         out = self.layer3(out)
66 |         out = self.layer4(out)
67 |         out = F.avg_pool2d(out, 4)
68 |         out = out.view(out.size(0), -1)
69 |         out = self.linear(out)              
70 |         return out
71 |         
72 | 
73 | @register()
74 | class MResNet(nn.Module):
75 |     def __init__(self, num_classes=10, num_blocks=[2, 2, 2, 2]) -> None:
76 |         super().__init__()
77 |         self.model = _ResNet(BasicBlock, num_blocks, num_classes)
78 |         
79 |     def forward(self, x):
80 |         return self.model(x)
81 | 
82 | 


--------------------------------------------------------------------------------
/rtdetrv2_pytorch/src/nn/backbone/timm_model.py:
--------------------------------------------------------------------------------
 1 | """Copyright(c) 2023 lyuwenyu. All Rights Reserved.
 2 | 
 3 | https://towardsdatascience.com/getting-started-with-pytorch-image-models-timm-a-practitioners-guide-4e77b4bf9055#0583
 4 | """
 5 | 
 6 | import torch
 7 | from torchvision.models.feature_extraction import get_graph_node_names, create_feature_extractor
 8 | 
 9 | from .utils import IntermediateLayerGetter
10 | from ...core import register
11 | 
12 | 
13 | @register()
14 | class TimmModel(torch.nn.Module):
15 |     def __init__(self, \
16 |         name, 
17 |         return_layers, 
18 |         pretrained=False, 
19 |         exportable=True, 
20 |         features_only=True,
21 |         **kwargs) -> None:
22 | 
23 |         super().__init__()
24 | 
25 |         import timm
26 |         model = timm.create_model(
27 |             name,
28 |             pretrained=pretrained, 
29 |             exportable=exportable, 
30 |             features_only=features_only,
31 |             **kwargs
32 |         )
33 |         # nodes, _ = get_graph_node_names(model)
34 |         # print(nodes)
35 |         # features = {'': ''}
36 |         # model = create_feature_extractor(model, return_nodes=features)
37 | 
38 |         assert set(return_layers).issubset(model.feature_info.module_name()), \
39 |             f'return_layers should be a subset of {model.feature_info.module_name()}'
40 |         
41 |         # self.model = model
42 |         self.model = IntermediateLayerGetter(model, return_layers)
43 | 
44 |         return_idx = [model.feature_info.module_name().index(name) for name in return_layers]
45 |         self.strides = [model.feature_info.reduction()[i] for i in return_idx]
46 |         self.channels = [model.feature_info.channels()[i] for i in return_idx]
47 |         self.return_idx = return_idx
48 |         self.return_layers = return_layers
49 | 
50 |     def forward(self, x: torch.Tensor): 
51 |         outputs = self.model(x)
52 |         # outputs = [outputs[i] for i in self.return_idx]
53 |         return outputs
54 | 
55 | 
56 | if __name__ == '__main__':
57 |     
58 |     model = TimmModel(name='resnet34', return_layers=['layer2', 'layer3'])
59 |     data = torch.rand(1, 3, 640, 640)
60 |     outputs = model(data)
61 |     
62 |     for output in outputs:
63 |         print(output.shape)
64 | 
65 |     """
66 |     model:
67 |         type: TimmModel
68 |         name: resnet34
69 |         return_layers: ['layer2', 'layer4']
70 |     """
71 | 


--------------------------------------------------------------------------------
/rtdetrv2_pytorch/src/nn/backbone/torchvision_model.py:
--------------------------------------------------------------------------------
 1 | """Copyright(c) 2023 lyuwenyu. All Rights Reserved.
 2 | """
 3 | 
 4 | import torch
 5 | import torchvision 
 6 | 
 7 | from ...core import register
 8 | from .utils import IntermediateLayerGetter
 9 | 
10 | __all__ = ['TorchVisionModel']
11 | 
12 | @register()
13 | class TorchVisionModel(torch.nn.Module):
14 |     def __init__(self, name, return_layers, weights=None, **kwargs) -> None:
15 |         super().__init__()
16 |         
17 |         if weights is not None:
18 |             weights = getattr(torchvision.models.get_model_weights(name), weights)
19 | 
20 |         model = torchvision.models.get_model(name, weights=weights, **kwargs)
21 | 
22 |         # TODO hard code.
23 |         if hasattr(model, 'features'):
24 |             model = IntermediateLayerGetter(model.features, return_layers)
25 |         else:
26 |             model = IntermediateLayerGetter(model, return_layers)
27 | 
28 |         self.model = model 
29 | 
30 |     def forward(self, x):
31 |         return self.model(x)
32 | 
33 | 
34 | # TorchVisionModel('swin_t', return_layers=['5', '7'])
35 | # TorchVisionModel('resnet34', return_layers=['layer2','layer3', 'layer4'])
36 | 
37 | """
38 | TorchVisionModel:
39 |     name: swin_t
40 |     return_layers: ['5', '7']
41 |     weights: DEFAULT
42 | 
43 | 
44 | model:
45 |     type: TorchVisionModel
46 |     name: resnet34
47 |     return_layers: ['layer2','layer3', 'layer4']
48 |     weights: DEFAULT
49 | """


--------------------------------------------------------------------------------
/rtdetrv2_pytorch/src/nn/backbone/utils.py:
--------------------------------------------------------------------------------
 1 | """
 2 | https://github.com/pytorch/vision/blob/main/torchvision/models/_utils.py
 3 | 
 4 | Copyright(c) 2023 lyuwenyu. All Rights Reserved.
 5 | """
 6 | 
 7 | from collections import OrderedDict
 8 | from typing import Dict, List
 9 | 
10 | 
11 | import torch.nn as nn 
12 | 
13 | 
14 | class IntermediateLayerGetter(nn.ModuleDict):
15 |     """
16 |     Module wrapper that returns intermediate layers from a model
17 | 
18 |     It has a strong assumption that the modules have been registered
19 |     into the model in the same order as they are used.
20 |     This means that one should **not** reuse the same nn.Module
21 |     twice in the forward if you want this to work.
22 | 
23 |     Additionally, it is only able to query submodules that are directly
24 |     assigned to the model. So if `model` is passed, `model.feature1` can
25 |     be returned, but not `model.feature1.layer2`.
26 |     """
27 | 
28 |     _version = 3
29 | 
30 |     def __init__(self, model: nn.Module, return_layers: List[str]) -> None:
31 |         if not set(return_layers).issubset([name for name, _ in model.named_children()]):
32 |             raise ValueError("return_layers are not present in model. {}"\
33 |                 .format([name for name, _ in model.named_children()]))
34 |         orig_return_layers = return_layers
35 |         return_layers = {str(k): str(k)  for k in return_layers}
36 |         layers = OrderedDict()
37 |         for name, module in model.named_children():
38 |             layers[name] = module
39 |             if name in return_layers:
40 |                 del return_layers[name]
41 |             if not return_layers:
42 |                 break
43 | 
44 |         super().__init__(layers)
45 |         self.return_layers = orig_return_layers
46 | 
47 |     def forward(self, x):
48 |         outputs = []
49 |         for name, module in self.items():
50 |             x = module(x)
51 |             if name in self.return_layers:
52 |                 outputs.append(x)
53 |         
54 |         return outputs
55 | 
56 | 


--------------------------------------------------------------------------------
/rtdetrv2_pytorch/src/nn/criterion/__init__.py:
--------------------------------------------------------------------------------
 1 | """Copyright(c) 2023 lyuwenyu. All Rights Reserved.
 2 | """
 3 | 
 4 | 
 5 | import torch.nn as nn 
 6 | from ...core import register
 7 | 
 8 | from .det_criterion import DetCriterion
 9 | 
10 | CrossEntropyLoss = register()(nn.CrossEntropyLoss)
11 | 


--------------------------------------------------------------------------------
/rtdetrv2_pytorch/src/nn/postprocessor/__init__.py:
--------------------------------------------------------------------------------
1 | """Copyright(c) 2023 lyuwenyu. All Rights Reserved.
2 | """
3 | 
4 | 
5 | from .nms_postprocessor import DetNMSPostProcessor


--------------------------------------------------------------------------------
/rtdetrv2_pytorch/src/nn/postprocessor/box_revert.py:
--------------------------------------------------------------------------------
 1 | """Copyright(c) 2023 lyuwenyu. All Rights Reserved.
 2 | """
 3 | 
 4 | import torch
 5 | import torchvision
 6 | from torch import Tensor
 7 | from enum import Enum
 8 | 
 9 | 
10 | class BoxProcessFormat(Enum):
11 |     """Box process format 
12 | 
13 |     Available formats are
14 |     * ``RESIZE``
15 |     * ``RESIZE_KEEP_RATIO``
16 |     * ``RESIZE_KEEP_RATIO_PADDING``
17 |     """
18 |     RESIZE = 1
19 |     RESIZE_KEEP_RATIO = 2
20 |     RESIZE_KEEP_RATIO_PADDING = 3
21 | 
22 | 
23 | def box_revert(
24 |     boxes: Tensor, 
25 |     orig_sizes: Tensor=None, 
26 |     eval_sizes: Tensor=None,
27 |     inpt_sizes: Tensor=None,
28 |     inpt_padding: Tensor=None,
29 |     normalized: bool=True,
30 |     in_fmt: str='cxcywh', 
31 |     out_fmt: str='xyxy',
32 |     process_fmt=BoxProcessFormat.RESIZE,
33 | ) -> Tensor:
34 |     """
35 |     Args:
36 |         boxes(Tensor), [N, :, 4], (x1, y1, x2, y2), pred boxes.
37 |         inpt_sizes(Tensor), [N, 2], (w, h). input sizes.
38 |         orig_sizes(Tensor), [N, 2], (w, h). origin sizes.
39 |         inpt_padding (Tensor), [N, 2], (w_pad, h_pad, ...).
40 |         (inpt_sizes + inpt_padding) == eval_sizes
41 |     """
42 |     assert in_fmt in ('cxcywh', 'xyxy'), ''
43 | 
44 |     if normalized and eval_sizes is not None:
45 |         boxes = boxes * eval_sizes.repeat(1, 2).unsqueeze(1)
46 |     
47 |     if inpt_padding is not None:
48 |         if in_fmt == 'xyxy':
49 |             boxes -= inpt_padding[:, :2].repeat(1, 2).unsqueeze(1)
50 |         elif in_fmt == 'cxcywh':
51 |             boxes[..., :2] -= inpt_padding[:, :2].repeat(1, 2).unsqueeze(1)
52 | 
53 |     if orig_sizes is not None:
54 |         orig_sizes = orig_sizes.repeat(1, 2).unsqueeze(1)
55 |         if inpt_sizes is not None:
56 |             inpt_sizes = inpt_sizes.repeat(1, 2).unsqueeze(1)
57 |             boxes = boxes * (orig_sizes / inpt_sizes)
58 |         else:
59 |             boxes = boxes * orig_sizes
60 | 
61 |     boxes = torchvision.ops.box_convert(boxes, in_fmt=in_fmt, out_fmt=out_fmt)
62 |     return boxes
63 | 


--------------------------------------------------------------------------------
/rtdetrv2_pytorch/src/nn/postprocessor/detr_postprocessor.py:
--------------------------------------------------------------------------------
 1 | """Copyright(c) 2023 lyuwenyu. All Rights Reserved.
 2 | """
 3 | 
 4 | import torch 
 5 | import torch.nn as nn 
 6 | import torch.nn.functional as F 
 7 | 
 8 | import torchvision
 9 | 
10 | 
11 | __all__ = ['DetDETRPostProcessor']
12 | 
13 | from .box_revert import box_revert
14 | from .box_revert import BoxProcessFormat
15 | 
16 | def mod(a, b):
17 |     out = a - a // b * b
18 |     return out
19 | 
20 | class DetDETRPostProcessor(nn.Module):
21 |     def __init__(
22 |         self, 
23 |         num_classes=80, 
24 |         use_focal_loss=True, 
25 |         num_top_queries=300, 
26 |         box_process_format=BoxProcessFormat.RESIZE,
27 |     ) -> None:
28 |         super().__init__()
29 |         self.use_focal_loss = use_focal_loss
30 |         self.num_top_queries = num_top_queries
31 |         self.num_classes = int(num_classes)
32 |         self.box_process_format = box_process_format
33 |         self.deploy_mode = False 
34 | 
35 |     def extra_repr(self) -> str:
36 |         return f'use_focal_loss={self.use_focal_loss}, num_classes={self.num_classes}, num_top_queries={self.num_top_queries}'
37 |     
38 |     def forward(self, outputs, **kwargs):
39 |         logits, boxes = outputs['pred_logits'], outputs['pred_boxes']
40 | 
41 |         if self.use_focal_loss:
42 |             scores = F.sigmoid(logits)
43 |             scores, index = torch.topk(scores.flatten(1), self.num_top_queries, dim=-1)
44 |             labels = index % self.num_classes
45 |             # labels = mod(index, self.num_classes) # for tensorrt
46 |             index = index // self.num_classes
47 |             boxes = boxes.gather(dim=1, index=index.unsqueeze(-1).repeat(1, 1, boxes.shape[-1]))
48 |             
49 |         else:
50 |             scores = F.softmax(logits)[:, :, :-1]
51 |             scores, labels = scores.max(dim=-1)
52 |             if scores.shape[1] > self.num_top_queries:
53 |                 scores, index = torch.topk(scores, self.num_top_queries, dim=-1)
54 |                 labels = torch.gather(labels, dim=1, index=index)
55 |                 boxes = torch.gather(boxes, dim=1, index=index.unsqueeze(-1).tile(1, 1, boxes.shape[-1]))
56 | 
57 |         if kwargs is not None:
58 |             boxes = box_revert(
59 |                 boxes, 
60 |                 in_fmt='cxcywh',
61 |                 out_fmt='xyxy',
62 |                 process_fmt=self.box_process_format,
63 |                 normalized=True,
64 |                 **kwargs,
65 |             )
66 | 
67 |         # TODO for onnx export
68 |         if self.deploy_mode:
69 |             return labels, boxes, scores
70 | 
71 |         results = []
72 |         for lab, box, sco in zip(labels, boxes, scores):
73 |             result = dict(labels=lab, boxes=box, scores=sco)
74 |             results.append(result)
75 |         
76 |         return results
77 |         
78 |     def deploy(self, ):
79 |         self.eval()
80 |         self.deploy_mode = True
81 |         return self 
82 | 


--------------------------------------------------------------------------------
/rtdetrv2_pytorch/src/nn/postprocessor/nms_postprocessor.py:
--------------------------------------------------------------------------------
 1 | """Copyright(c) 2023 lyuwenyu. All Rights Reserved.
 2 | """
 3 | 
 4 | import torch
 5 | import torch.nn.functional as F 
 6 | import torch.distributed
 7 | import torchvision
 8 | from torch import Tensor 
 9 | 
10 | from ...core import register
11 | 
12 | from typing import Dict 
13 | 
14 | 
15 | __all__ = ['DetNMSPostProcessor', ]
16 | 
17 | 
18 | @register()
19 | class DetNMSPostProcessor(torch.nn.Module):
20 |     def __init__(self, \
21 |                 iou_threshold=0.7, 
22 |                 score_threshold=0.01, 
23 |                 keep_topk=300, 
24 |                 box_fmt='cxcywh',
25 |                 logit_fmt='sigmoid') -> None:
26 |         super().__init__()
27 |         self.iou_threshold = iou_threshold
28 |         self.score_threshold = score_threshold
29 |         self.keep_topk = keep_topk
30 |         self.box_fmt = box_fmt.lower()
31 |         self.logit_fmt = logit_fmt.lower()
32 |         self.logit_func = getattr(F, self.logit_fmt, None)
33 |         self.deploy_mode = False 
34 |     
35 |     def forward(self, outputs: Dict[str, Tensor], orig_target_sizes: Tensor):
36 |         logits, boxes = outputs['pred_logits'], outputs['pred_boxes']
37 |         pred_boxes = torchvision.ops.box_convert(boxes, in_fmt=self.box_fmt, out_fmt='xyxy')
38 |         pred_boxes *= orig_target_sizes.repeat(1, 2).unsqueeze(1)
39 | 
40 |         values, pred_labels = torch.max(logits, dim=-1)
41 |         
42 |         if self.logit_func:
43 |             pred_scores = self.logit_func(values)
44 |         else:
45 |             pred_scores = values
46 | 
47 |         # TODO for onnx export
48 |         if self.deploy_mode:
49 |             blobs = {
50 |                 'pred_labels': pred_labels, 
51 |                 'pred_boxes': pred_boxes,
52 |                 'pred_scores': pred_scores
53 |             }
54 |             return blobs
55 | 
56 |         results = []
57 |         for i in range(logits.shape[0]):
58 |             score_keep = pred_scores[i] > self.score_threshold
59 |             pred_box = pred_boxes[i][score_keep]
60 |             pred_label = pred_labels[i][score_keep]
61 |             pred_score = pred_scores[i][score_keep]
62 | 
63 |             keep = torchvision.ops.batched_nms(pred_box, pred_score, pred_label, self.iou_threshold)            
64 |             keep = keep[:self.keep_topk]
65 | 
66 |             blob = {
67 |                 'labels': pred_label[keep],
68 |                 'boxes': pred_box[keep],
69 |                 'scores': pred_score[keep],
70 |             }
71 | 
72 |             results.append(blob)
73 |             
74 |         return results
75 | 
76 |     def deploy(self, ):
77 |         self.eval()
78 |         self.deploy_mode = True
79 |         return self 
80 | 


--------------------------------------------------------------------------------
/rtdetrv2_pytorch/src/optim/__init__.py:
--------------------------------------------------------------------------------
1 | """Copyright(c) 2023 lyuwenyu. All Rights Reserved.
2 | """
3 | 
4 | from .ema import *
5 | from .optim import *
6 | from .amp import *
7 | from .warmup import *


--------------------------------------------------------------------------------
/rtdetrv2_pytorch/src/optim/amp.py:
--------------------------------------------------------------------------------
 1 | """Copyright(c) 2023 lyuwenyu. All Rights Reserved.
 2 | """
 3 | 
 4 | 
 5 | import torch.cuda.amp as amp
 6 | 
 7 | from ..core import register
 8 | 
 9 | 
10 | __all__ = ['GradScaler']
11 | 
12 | GradScaler = register()(amp.grad_scaler.GradScaler)
13 | 


--------------------------------------------------------------------------------
/rtdetrv2_pytorch/src/optim/optim.py:
--------------------------------------------------------------------------------
 1 | """Copyright(c) 2023 lyuwenyu. All Rights Reserved.
 2 | """
 3 | 
 4 | 
 5 | import torch.optim as optim
 6 | import torch.optim.lr_scheduler as lr_scheduler
 7 | 
 8 | from ..core import register
 9 | 
10 | 
11 | __all__ = ['AdamW', 'SGD', 'Adam', 'MultiStepLR', 'CosineAnnealingLR', 'OneCycleLR', 'LambdaLR']
12 | 
13 | 
14 | 
15 | SGD = register()(optim.SGD)
16 | Adam = register()(optim.Adam)
17 | AdamW = register()(optim.AdamW)
18 | 
19 | 
20 | MultiStepLR = register()(lr_scheduler.MultiStepLR)
21 | CosineAnnealingLR = register()(lr_scheduler.CosineAnnealingLR)
22 | OneCycleLR = register()(lr_scheduler.OneCycleLR)
23 | LambdaLR = register()(lr_scheduler.LambdaLR)
24 | 


--------------------------------------------------------------------------------
/rtdetrv2_pytorch/src/optim/warmup.py:
--------------------------------------------------------------------------------
 1 | """Copyright(c) 2023 lyuwenyu. All Rights Reserved.
 2 | """
 3 | 
 4 | from torch.optim.lr_scheduler import LRScheduler
 5 | 
 6 | from ..core import register
 7 | 
 8 | 
 9 | class Warmup(object):
10 |     def __init__(self, lr_scheduler: LRScheduler, warmup_duration: int, last_step: int=-1) -> None:
11 |         self.lr_scheduler = lr_scheduler
12 |         self.warmup_end_values = [pg['lr'] for pg in lr_scheduler.optimizer.param_groups]
13 |         self.last_step = last_step
14 |         self.warmup_duration = warmup_duration
15 |         self.step()
16 | 
17 |     def state_dict(self):
18 |         return {k: v for k, v in self.__dict__.items() if k != 'lr_scheduler'}
19 | 
20 |     def load_state_dict(self, state_dict):
21 |         self.__dict__.update(state_dict)
22 | 
23 |     def get_warmup_factor(self, step, **kwargs):
24 |         raise NotImplementedError
25 | 
26 |     def step(self, ):
27 |         self.last_step += 1
28 |         if self.last_step >= self.warmup_duration:
29 |             return
30 |         factor = self.get_warmup_factor(self.last_step)
31 |         for i, pg in enumerate(self.lr_scheduler.optimizer.param_groups):
32 |             pg['lr'] = factor * self.warmup_end_values[i]
33 |     
34 |     def finished(self, ):
35 |         if self.last_step >= self.warmup_duration:
36 |             return True 
37 |         return False
38 | 
39 | 
40 | @register()
41 | class LinearWarmup(Warmup):
42 |     def __init__(self, lr_scheduler: LRScheduler, warmup_duration: int, last_step: int = -1) -> None:
43 |         super().__init__(lr_scheduler, warmup_duration, last_step)
44 | 
45 |     def get_warmup_factor(self, step):
46 |         return min(1.0, (step + 1) / self.warmup_duration)
47 | 
48 | 


--------------------------------------------------------------------------------
/rtdetrv2_pytorch/src/solver/__init__.py:
--------------------------------------------------------------------------------
 1 | """Copyright(c) 2023 lyuwenyu. All Rights Reserved.
 2 | """
 3 | 
 4 | from ._solver import BaseSolver
 5 | from .clas_solver import ClasSolver
 6 | from .det_solver import DetSolver
 7 | 
 8 | 
 9 | 
10 | from typing import Dict 
11 | 
12 | TASKS :Dict[str, BaseSolver] = {
13 |     'classification': ClasSolver,
14 |     'detection': DetSolver,
15 | }


--------------------------------------------------------------------------------
/rtdetrv2_pytorch/src/solver/clas_engine.py:
--------------------------------------------------------------------------------
 1 | """Copyright(c) 2023 lyuwenyu. All Rights Reserved.
 2 | """
 3 | 
 4 | import torch
 5 | import torch.nn as nn 
 6 | 
 7 | from ..misc import (MetricLogger, SmoothedValue, reduce_dict)
 8 | 
 9 | 
10 | def train_one_epoch(model: nn.Module, criterion: nn.Module, dataloader, optimizer, ema, epoch, device):
11 |     """
12 |     """
13 |     model.train()
14 | 
15 |     metric_logger = MetricLogger(delimiter="  ")
16 |     metric_logger.add_meter('lr', SmoothedValue(window_size=1, fmt='{value:.6f}'))
17 |     print_freq = 100
18 |     header = 'Epoch: [{}]'.format(epoch)
19 | 
20 |     for imgs, labels in metric_logger.log_every(dataloader, print_freq, header):
21 |         imgs = imgs.to(device)
22 |         labels = labels.to(device)
23 | 
24 |         preds = model(imgs)
25 |         loss: torch.Tensor = criterion(preds, labels)
26 |         
27 |         optimizer.zero_grad()
28 |         loss.backward()
29 |         optimizer.step()
30 |         
31 |         if ema is not None:
32 |             ema.update(model)
33 | 
34 |         loss_reduced_values = {k: v.item() for k, v in reduce_dict({'loss': loss}).items()}
35 |         metric_logger.update(**loss_reduced_values)
36 |         metric_logger.update(lr=optimizer.param_groups[0]["lr"])
37 |     
38 |     metric_logger.synchronize_between_processes()
39 |     print("Averaged stats:", metric_logger)
40 | 
41 |     stats = {k: meter.global_avg for k, meter in metric_logger.meters.items()}
42 |     return stats
43 | 
44 | 
45 | 
46 | @torch.no_grad()
47 | def evaluate(model, criterion, dataloader, device):
48 |     model.eval()
49 | 
50 |     metric_logger = MetricLogger(delimiter="  ")
51 |     # metric_logger.add_meter('acc', SmoothedValue(window_size=1, fmt='{global_avg:.4f}'))
52 |     # metric_logger.add_meter('loss', SmoothedValue(window_size=1, fmt='{value:.2f}'))
53 |     metric_logger.add_meter('acc', SmoothedValue(window_size=1))
54 |     metric_logger.add_meter('loss', SmoothedValue(window_size=1))
55 | 
56 |     header = 'Test:'
57 |     for imgs, labels in metric_logger.log_every(dataloader, 10, header):
58 |         imgs, labels = imgs.to(device), labels.to(device)
59 |         preds = model(imgs)
60 | 
61 |         acc = (preds.argmax(dim=-1) == labels).sum() / preds.shape[0]
62 |         loss = criterion(preds, labels)
63 | 
64 |         dict_reduced = reduce_dict({'acc': acc, 'loss': loss})
65 |         reduced_values = {k: v.item() for k, v in dict_reduced.items()}
66 |         metric_logger.update(**reduced_values)
67 | 
68 |     metric_logger.synchronize_between_processes()
69 |     print("Averaged stats:", metric_logger)
70 | 
71 |     stats = {k: meter.global_avg for k, meter in metric_logger.meters.items()}
72 |     return stats
73 | 
74 | 
75 | 


--------------------------------------------------------------------------------
/rtdetrv2_pytorch/src/solver/clas_solver.py:
--------------------------------------------------------------------------------
 1 | """Copyright(c) 2023 lyuwenyu. All Rights Reserved.
 2 | """
 3 | 
 4 | import time 
 5 | import json
 6 | import datetime
 7 | from pathlib import Path
 8 | 
 9 | import torch 
10 | import torch.nn as nn 
11 | 
12 | from ..misc import dist_utils
13 | from ._solver import BaseSolver
14 | from .clas_engine import train_one_epoch, evaluate
15 | 
16 | 
17 | class ClasSolver(BaseSolver):
18 | 
19 |     def fit(self, ):
20 |         print("Start training")
21 |         self.train()
22 |         args = self.cfg 
23 | 
24 |         n_parameters = sum(p.numel() for p in self.model.parameters() if p.requires_grad)
25 |         print('Number of params:', n_parameters)
26 | 
27 |         output_dir = Path(args.output_dir)
28 |         output_dir.mkdir(exist_ok=True)
29 | 
30 |         start_time = time.time()
31 |         start_epoch = self.last_epoch + 1
32 |         for epoch in range(start_epoch, args.epoches):
33 | 
34 |             if dist_utils.is_dist_available_and_initialized():
35 |                 self.train_dataloader.sampler.set_epoch(epoch)
36 |             
37 |             train_stats = train_one_epoch(self.model, 
38 |                                         self.criterion, 
39 |                                         self.train_dataloader, 
40 |                                         self.optimizer, 
41 |                                         self.ema, 
42 |                                         epoch=epoch, 
43 |                                         device=self.device)
44 |             self.lr_scheduler.step()
45 |             self.last_epoch += 1
46 | 
47 |             if output_dir:
48 |                 checkpoint_paths = [output_dir / 'checkpoint.pth']
49 |                 # extra checkpoint before LR drop and every 100 epochs
50 |                 if (epoch + 1) % args.checkpoint_freq == 0:
51 |                     checkpoint_paths.append(output_dir / f'checkpoint{epoch:04}.pth')
52 |                 for checkpoint_path in checkpoint_paths:
53 |                     dist_utils.save_on_master(self.state_dict(epoch), checkpoint_path)
54 | 
55 |             module = self.ema.module if self.ema else self.model
56 |             test_stats = evaluate(module, self.criterion, self.val_dataloader, self.device)
57 | 
58 |             log_stats = {**{f'train_{k}': v for k, v in train_stats.items()},
59 |                          **{f'test_{k}': v for k, v in test_stats.items()},
60 |                          'epoch': epoch,
61 |                          'n_parameters': n_parameters}
62 |             
63 |             if output_dir and dist_utils.is_main_process():
64 |                 with (output_dir / "log.txt").open("a") as f:
65 |                     f.write(json.dumps(log_stats) + "\n")
66 | 
67 |         total_time = time.time() - start_time
68 |         total_time_str = str(datetime.timedelta(seconds=int(total_time)))
69 |         print('Training time {}'.format(total_time_str))
70 | 
71 | 
72 | 


--------------------------------------------------------------------------------
/rtdetrv2_pytorch/src/zoo/__init__.py:
--------------------------------------------------------------------------------
1 | """Copyright(c) 2023 lyuwenyu. All Rights Reserved.
2 | """
3 | 
4 | 
5 | from . import rtdetr
6 | 


--------------------------------------------------------------------------------
/rtdetrv2_pytorch/src/zoo/rtdetr/__init__.py:
--------------------------------------------------------------------------------
 1 | """Copyright(c) 2023 lyuwenyu. All Rights Reserved.
 2 | """
 3 | 
 4 | 
 5 | from .rtdetr import RTDETR
 6 | from .matcher import HungarianMatcher
 7 | from .hybrid_encoder import HybridEncoder
 8 | from .rtdetr_decoder import RTDETRTransformer
 9 | from .rtdetr_criterion import RTDETRCriterion
10 | from .rtdetr_postprocessor import RTDETRPostProcessor
11 | 
12 | # v2
13 | from .rtdetrv2_decoder import RTDETRTransformerv2
14 | from .rtdetrv2_criterion import RTDETRCriterionv2


--------------------------------------------------------------------------------
/rtdetrv2_pytorch/src/zoo/rtdetr/box_ops.py:
--------------------------------------------------------------------------------
 1 | """
 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 3 | https://github.com/facebookresearch/detr/blob/main/util/box_ops.py
 4 | """
 5 | 
 6 | import torch
 7 | from torch import Tensor
 8 | from torchvision.ops.boxes import box_area
 9 | 
10 | 
11 | def box_cxcywh_to_xyxy(x: Tensor) -> Tensor:
12 |     x_c, y_c, w, h = x.unbind(-1)
13 |     b = [(x_c - 0.5 * w), (y_c - 0.5 * h),
14 |          (x_c + 0.5 * w), (y_c + 0.5 * h)]
15 |     return torch.stack(b, dim=-1)
16 | 
17 | 
18 | def box_xyxy_to_cxcywh(x: Tensor) -> Tensor:
19 |     x0, y0, x1, y1 = x.unbind(-1)
20 |     b = [(x0 + x1) / 2, (y0 + y1) / 2,
21 |          (x1 - x0), (y1 - y0)]
22 |     return torch.stack(b, dim=-1)
23 | 
24 | 
25 | # modified from torchvision to also return the union
26 | def box_iou(boxes1: Tensor, boxes2: Tensor):
27 |     area1 = box_area(boxes1)
28 |     area2 = box_area(boxes2)
29 | 
30 |     lt = torch.max(boxes1[:, None, :2], boxes2[:, :2])  # [N,M,2]
31 |     rb = torch.min(boxes1[:, None, 2:], boxes2[:, 2:])  # [N,M,2]
32 | 
33 |     wh = (rb - lt).clamp(min=0)  # [N,M,2]
34 |     inter = wh[:, :, 0] * wh[:, :, 1]  # [N,M]
35 | 
36 |     union = area1[:, None] + area2 - inter
37 | 
38 |     iou = inter / union
39 |     return iou, union
40 | 
41 | 
42 | def generalized_box_iou(boxes1, boxes2):
43 |     """
44 |     Generalized IoU from https://giou.stanford.edu/
45 | 
46 |     The boxes should be in [x0, y0, x1, y1] format
47 | 
48 |     Returns a [N, M] pairwise matrix, where N = len(boxes1)
49 |     and M = len(boxes2)
50 |     """
51 |     # degenerate boxes gives inf / nan results
52 |     # so do an early check
53 |     assert (boxes1[:, 2:] >= boxes1[:, :2]).all()
54 |     assert (boxes2[:, 2:] >= boxes2[:, :2]).all()
55 |     iou, union = box_iou(boxes1, boxes2)
56 | 
57 |     lt = torch.min(boxes1[:, None, :2], boxes2[:, :2])
58 |     rb = torch.max(boxes1[:, None, 2:], boxes2[:, 2:])
59 | 
60 |     wh = (rb - lt).clamp(min=0)  # [N,M,2]
61 |     area = wh[:, :, 0] * wh[:, :, 1]
62 | 
63 |     return iou - (area - union) / area
64 | 
65 | 
66 | def masks_to_boxes(masks):
67 |     """Compute the bounding boxes around the provided masks
68 | 
69 |     The masks should be in format [N, H, W] where N is the number of masks, (H, W) are the spatial dimensions.
70 | 
71 |     Returns a [N, 4] tensors, with the boxes in xyxy format
72 |     """
73 |     if masks.numel() == 0:
74 |         return torch.zeros((0, 4), device=masks.device)
75 | 
76 |     h, w = masks.shape[-2:]
77 | 
78 |     y = torch.arange(0, h, dtype=torch.float)
79 |     x = torch.arange(0, w, dtype=torch.float)
80 |     y, x = torch.meshgrid(y, x)
81 | 
82 |     x_mask = (masks * x.unsqueeze(0))
83 |     x_max = x_mask.flatten(1).max(-1)[0]
84 |     x_min = x_mask.masked_fill(~(masks.bool()), 1e8).flatten(1).min(-1)[0]
85 | 
86 |     y_mask = (masks * y.unsqueeze(0))
87 |     y_max = y_mask.flatten(1).max(-1)[0]
88 |     y_min = y_mask.masked_fill(~(masks.bool()), 1e8).flatten(1).min(-1)[0]
89 | 
90 |     return torch.stack([x_min, y_min, x_max, y_max], 1)


--------------------------------------------------------------------------------
/rtdetrv2_pytorch/src/zoo/rtdetr/conver_params.py:
--------------------------------------------------------------------------------
 1 | """Copyright(c) 2023 lyuwenyu. All Rights Reserved.
 2 | """
 3 | 
 4 | import torch 
 5 | 
 6 | def main(args) -> None:
 7 |     import cvperception
 8 |     from cvperception.core import load_config, merge_config, create
 9 |     cfg = load_config(args.config)
10 |     model: torch.nn.Module = create(cfg['model'], merge_config(cfg))
11 | 
12 |     if args.version == 1:
13 |         state = model.state_dict()
14 |         keys = [k for k in state.keys() if 'num_batches_tracked' not in k]
15 | 
16 |     elif args.version == 2:
17 |         state = model.state_dict()
18 |         ignore_keys = ['anchors', 'valid_mask', 'num_points_scale']
19 |         keys = [k for k in state.keys() if 'num_batches_tracked' not in k]
20 |         keys = [k for k in keys if not any([x in k for x in ignore_keys])]
21 |     
22 |     import paddle
23 |     p_state = paddle.load(args.pdparams)
24 |     pkeys = list(p_state.keys())
25 |     
26 |     assert len(keys) == len(pkeys), f'{len(keys)}, {len(pkeys)}'
27 | 
28 |     new_state = {}
29 |     for i, k in enumerate(keys):    
30 |         pp = p_state[pkeys[i]]
31 |         pp = torch.tensor(pp.numpy())
32 | 
33 |         if 'denoising_class_embed' in k:
34 |             new_state[k] = torch.concat([pp, torch.zeros(1, pp.shape[-1])], dim=0)
35 |             continue
36 | 
37 |         tp = state[k]
38 |         if len(tp.shape) == 2:
39 |             new_state[k] = pp.T
40 |         elif len(tp.shape) == 1:
41 |             new_state[k] = pp
42 |         else:
43 |             assert tp.shape == pp.shape, f'{k}, {pp.shape}, {tp.shape}'
44 |             new_state[k] = pp
45 | 
46 |     assert len(new_state) == len(p_state), ''
47 | 
48 |     # checkpoint = {'ema': {'module': new_state, }}
49 |     # torch.save(checkpoint, args.output_file)
50 | 
51 |     model.load_state_dict(new_state, strict=False)
52 | 
53 |     checkpoint = {'ema': {'module': model.state_dict(), }}
54 |     torch.save(checkpoint, args.output_file)
55 | 
56 | 
57 | 
58 | if __name__ == '__main__':
59 |     import argparse
60 |     parser = argparse.ArgumentParser()
61 |     parser.add_argument('-c', '--config', type=str, )
62 |     parser.add_argument('-p', '--pdparams', type=str, )
63 |     parser.add_argument('-o', '--output_file', type=str, )
64 |     parser.add_argument('-v', '--version', type=int, default=1)
65 | 
66 |     args = parser.parse_args()
67 |     main(args)
68 |     
69 |     # python ./src/cvperception/zoo/rtdetr/conver_params.py -c configs/rtdetr/rtdetr_r18vd_6x_coco.yml -p rtdetr_r18vd_dec3_6x_coco.pdparams -o rtdetr_r18vd_dec3_6x_coco_new.pth
70 |     # python ./src/cvperception/zoo/rtdetr/conver_params.py -c configs/rtdetr/rtdetr_r18vd_6x_coco.yml -p rtdetr_r18vd_5x_coco_objects365.pdparams -o rtdetr_r18vd_5x_coco_objects365_new.pth
71 |     # python ./src/cvperception/zoo/rtdetr/conver_params.py -c configs/rtdetrv2/rtdetrv2_r50vd_120e_coco.yml -p rtdetr_r50vd_1x_objects365.pdparams -o rtdetrv2_r50vd_1x_objects365_new.pth -v 2
72 |     
73 | 


--------------------------------------------------------------------------------
/rtdetrv2_pytorch/src/zoo/rtdetr/rtdetr.py:
--------------------------------------------------------------------------------
 1 | """Copyright(c) 2023 lyuwenyu. All Rights Reserved.
 2 | """
 3 | 
 4 | import torch 
 5 | import torch.nn as nn 
 6 | import torch.nn.functional as F 
 7 | 
 8 | import random 
 9 | import numpy as np 
10 | from typing import List 
11 | 
12 | from ...core import register
13 | 
14 | 
15 | __all__ = ['RTDETR', ]
16 | 
17 | 
18 | @register()
19 | class RTDETR(nn.Module):
20 |     __inject__ = ['backbone', 'encoder', 'decoder', ]
21 | 
22 |     def __init__(self, \
23 |         backbone: nn.Module, 
24 |         encoder: nn.Module, 
25 |         decoder: nn.Module, 
26 |     ):
27 |         super().__init__()
28 |         self.backbone = backbone
29 |         self.decoder = decoder
30 |         self.encoder = encoder
31 |         
32 |     def forward(self, x, targets=None):
33 |         x = self.backbone(x)
34 |         x = self.encoder(x)        
35 |         x = self.decoder(x, targets)
36 | 
37 |         return x
38 |     
39 |     def deploy(self, ):
40 |         self.eval()
41 |         for m in self.modules():
42 |             if hasattr(m, 'convert_to_deploy'):
43 |                 m.convert_to_deploy()
44 |         return self 
45 | 


--------------------------------------------------------------------------------
/rtdetrv2_pytorch/tools/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | Train/test script examples
 4 | - `CUDA_VISIBLE_DEVICES=0,1,2,3 torchrun --nproc_per_node=4 --master-port=8989 tools/train.py -c path/to/config &> train.log 2>&1 &`
 5 | - `-r path/to/checkpoint`
 6 | - `--amp`
 7 | - `--test-only` 
 8 | 
 9 | 
10 | Export script examples
11 | - `python tools/export_onnx.py -c path/to/config -r path/to/checkpoint --check`
12 | 
13 | 
14 | Gpu do not release memory
15 | - `ps aux | grep "tools/train.py" | awk '{print $2}' | xargs kill -9`
16 | 
17 | 
18 | Save all logs
19 | - Appending `&> train.log 2>&1 &` or `&> train.log 2>&1`
20 | 
21 | 
22 | Tensorboard
23 | - `--summary-dir=/path/to/summary/dir` or `-u summary_dir=/path/to/summary/dir`
24 | - `tensorboard --host=ip --port=8989 --logdir=/path/to/summary/`


--------------------------------------------------------------------------------
/rtdetrv2_pytorch/tools/run_profile.py:
--------------------------------------------------------------------------------
 1 | 
 2 | """Copyright(c) 2023 lyuwenyu. All Rights Reserved.
 3 | """
 4 | 
 5 | import torch
 6 | import torch.nn as nn
 7 | from torch import Tensor 
 8 | 
 9 | import re
10 | import os 
11 | import sys 
12 | sys.path.insert(0, os.path.join(os.path.dirname(os.path.abspath(__file__)), '..'))
13 | from src.core import YAMLConfig, yaml_utils
14 | from src.solver import TASKS
15 | 
16 | from typing import Dict, List, Optional, Any
17 | 
18 | 
19 | __all__ = ["profile_stats"]
20 | 
21 | def profile_stats(
22 |     model: nn.Module, 
23 |     data: Optional[Tensor]=None, 
24 |     shape: List[int]=[1, 3, 640, 640], 
25 |     verbose: bool=False
26 | ) -> Dict[str, Any]:
27 |     
28 |     is_training = model.training
29 | 
30 |     model.train()
31 |     num_params = sum([p.numel() for p in model.parameters() if p.requires_grad])
32 | 
33 |     model.eval()
34 | 
35 |     if data is None:
36 |         dtype = next(model.parameters()).dtype
37 |         device = next(model.parameters()).device
38 |         data = torch.rand(*shape, dtype=dtype, device=device)
39 |         print(device)
40 | 
41 |     def trace_handler(prof):
42 |         print(prof.key_averages().table(sort_by='self_cuda_time_total', row_limit=-1))
43 | 
44 |     wait = 0
45 |     warmup = 1
46 |     active = 1
47 |     repeat = 1
48 |     skip_first = 0
49 |     with torch.profiler.profile(
50 |         activities=[
51 |             torch.profiler.ProfilerActivity.CPU,
52 |             torch.profiler.ProfilerActivity.CUDA,
53 |         ],
54 |         schedule=torch.profiler.schedule(
55 |             wait=wait,
56 |             warmup=warmup,
57 |             active=active,
58 |             repeat=repeat,
59 |             skip_first=skip_first,
60 |         ),
61 |         with_flops=True,
62 |     ) as p:
63 |         n_step = skip_first + (wait + warmup + active) * repeat
64 |         for _ in range(n_step):
65 |             _ = model(data)
66 |             p.step()
67 | 
68 |     if is_training:
69 |         model.train()
70 |     
71 |     info = p.key_averages().table(sort_by='self_cuda_time_total', row_limit=-1)
72 |     num_flops = sum([float(v.strip()) for v in re.findall('(\d+.?\d+ *\n)', info)]) / active
73 | 
74 |     if verbose:
75 |         print(info)
76 |         print(f'Total number of trainable parameters: {num_params}')
77 |         print(f'Total number of flops: {int(num_flops)}M with {shape}')
78 | 
79 |     return {'n_parameters': num_params, 'n_flops': num_flops, 'info': info}
80 | 
81 | 
82 | 
83 | if __name__ == "__main__":
84 |     import argparse
85 |     parser = argparse.ArgumentParser()
86 |     parser.add_argument('-c', '--config', type=str, required=True)
87 |     parser.add_argument('-d', '--device', type=str, default='cuda:0', help='device',)
88 |     args = parser.parse_args()
89 | 
90 |     cfg = YAMLConfig(args.config, device=args.device)
91 |     model = cfg.model.to(args.device)
92 | 
93 |     profile_stats(model, verbose=True)
94 | 


--------------------------------------------------------------------------------
/rtdetrv2_pytorch/tools/train.py:
--------------------------------------------------------------------------------
 1 | """Copyright(c) 2023 lyuwenyu. All Rights Reserved.
 2 | """
 3 | 
 4 | import os 
 5 | import sys 
 6 | sys.path.insert(0, os.path.join(os.path.dirname(os.path.abspath(__file__)), '..'))
 7 | 
 8 | import argparse
 9 | 
10 | from src.misc import dist_utils
11 | from src.core import YAMLConfig, yaml_utils
12 | from src.solver import TASKS
13 | 
14 | 
15 | def main(args, ) -> None:
16 |     """main
17 |     """
18 |     dist_utils.setup_distributed(args.print_rank, args.print_method, seed=args.seed)
19 | 
20 |     assert not all([args.tuning, args.resume]), \
21 |         'Only support from_scrach or resume or tuning at one time'
22 | 
23 |     update_dict = yaml_utils.parse_cli(args.update)
24 |     update_dict.update({k: v for k, v in args.__dict__.items() \
25 |         if k not in ['update', ] and v is not None})
26 | 
27 |     cfg = YAMLConfig(args.config, **update_dict)
28 |     print('cfg: ', cfg.__dict__)
29 | 
30 |     solver = TASKS[cfg.yaml_cfg['task']](cfg)
31 |     
32 |     if args.test_only:
33 |         solver.val()
34 |     else:
35 |         solver.fit()
36 | 
37 |     dist_utils.cleanup()
38 |     
39 | 
40 | if __name__ == '__main__':
41 | 
42 |     parser = argparse.ArgumentParser()
43 |     
44 |     # priority 0
45 |     parser.add_argument('-c', '--config', type=str, required=True)
46 |     parser.add_argument('-r', '--resume', type=str, help='resume from checkpoint')
47 |     parser.add_argument('-t', '--tuning', type=str, help='tuning from checkpoint')
48 |     parser.add_argument('-d', '--device', type=str, help='device',)
49 |     parser.add_argument('--seed', type=int, help='exp reproducibility')
50 |     parser.add_argument('--use-amp', action='store_true', help='auto mixed precision training')
51 |     parser.add_argument('--output-dir', type=str, help='output directoy')
52 |     parser.add_argument('--summary-dir', type=str, help='tensorboard summry')
53 |     parser.add_argument('--test-only', action='store_true', default=False,)
54 | 
55 |     # priority 1
56 |     parser.add_argument('-u', '--update', nargs='+', help='update yaml config')
57 | 
58 |     # env
59 |     parser.add_argument('--print-method', type=str, default='builtin', help='print method')
60 |     parser.add_argument('--print-rank', type=int, default=0, help='print rank id')
61 | 
62 |     parser.add_argument('--local-rank', type=int, help='local rank id')
63 |     args = parser.parse_args()
64 | 
65 |     main(args)
66 | 


--------------------------------------------------------------------------------