├── .github
├── FUNDING.yml
└── ISSUE_TEMPLATE
│ └── bug_report.md
├── .gitignore
├── LICENSE
├── README.md
├── README_cn.md
├── benchmark
├── README.md
├── dataset.py
├── trtexec.md
├── trtinfer.py
├── utils.py
└── yolov8_onnx.py
├── hubconf.py
├── rtdetr_paddle
├── README.md
├── README_cn.md
├── configs
│ ├── datasets
│ │ ├── coco_detection.yml
│ │ └── voc.yml
│ ├── rtdetr
│ │ ├── _base_
│ │ │ ├── optimizer_6x.yml
│ │ │ ├── rtdetr_r50vd.yml
│ │ │ └── rtdetr_reader.yml
│ │ ├── rtdetr_hgnetv2_l_6x_coco.yml
│ │ ├── rtdetr_hgnetv2_x_6x_coco.yml
│ │ ├── rtdetr_r101vd_6x_coco.yml
│ │ ├── rtdetr_r18vd_6x_coco.yml
│ │ ├── rtdetr_r34vd_6x_coco.yml
│ │ ├── rtdetr_r50vd_6x_coco.yml
│ │ └── rtdetr_r50vd_m_6x_coco.yml
│ └── runtime.yml
├── dataset
│ ├── coco
│ │ └── download_coco.py
│ └── voc
│ │ ├── create_list.py
│ │ ├── download_voc.py
│ │ └── label_list.txt
├── ppdet
│ ├── __init__.py
│ ├── core
│ │ ├── __init__.py
│ │ ├── config
│ │ │ ├── __init__.py
│ │ │ ├── schema.py
│ │ │ └── yaml_helpers.py
│ │ └── workspace.py
│ ├── data
│ │ ├── __init__.py
│ │ ├── reader.py
│ │ ├── shm_utils.py
│ │ ├── source
│ │ │ ├── __init__.py
│ │ │ ├── category.py
│ │ │ ├── coco.py
│ │ │ ├── dataset.py
│ │ │ └── voc.py
│ │ ├── transform
│ │ │ ├── __init__.py
│ │ │ ├── batch_operators.py
│ │ │ ├── op_helper.py
│ │ │ └── operators.py
│ │ └── utils.py
│ ├── engine
│ │ ├── __init__.py
│ │ ├── callbacks.py
│ │ ├── env.py
│ │ ├── export_utils.py
│ │ └── trainer.py
│ ├── metrics
│ │ ├── __init__.py
│ │ ├── coco_utils.py
│ │ ├── json_results.py
│ │ ├── keypoint_metrics.py
│ │ ├── map_utils.py
│ │ ├── mcmot_metrics.py
│ │ ├── metrics.py
│ │ ├── mot_metrics.py
│ │ ├── munkres.py
│ │ ├── pose3d_metrics.py
│ │ └── widerface_utils.py
│ ├── modeling
│ │ ├── __init__.py
│ │ ├── architectures
│ │ │ ├── __init__.py
│ │ │ ├── detr.py
│ │ │ └── meta_arch.py
│ │ ├── backbones
│ │ │ ├── __init__.py
│ │ │ ├── convnext.py
│ │ │ ├── csp_darknet.py
│ │ │ ├── cspresnet.py
│ │ │ ├── darknet.py
│ │ │ ├── focalnet.py
│ │ │ ├── hgnet_v2.py
│ │ │ ├── lcnet.py
│ │ │ ├── mobilenet_v1.py
│ │ │ ├── mobilenet_v3.py
│ │ │ ├── mobileone.py
│ │ │ ├── name_adapter.py
│ │ │ ├── resnet.py
│ │ │ ├── shufflenet_v2.py
│ │ │ ├── swin_transformer.py
│ │ │ ├── trans_encoder.py
│ │ │ ├── transformer_utils.py
│ │ │ ├── vision_transformer.py
│ │ │ └── vit_mae.py
│ │ ├── bbox_utils.py
│ │ ├── cls_utils.py
│ │ ├── heads
│ │ │ ├── __init__.py
│ │ │ └── detr_head.py
│ │ ├── initializer.py
│ │ ├── keypoint_utils.py
│ │ ├── layers.py
│ │ ├── losses
│ │ │ ├── __init__.py
│ │ │ ├── detr_loss.py
│ │ │ ├── focal_loss.py
│ │ │ ├── gfocal_loss.py
│ │ │ ├── iou_loss.py
│ │ │ ├── smooth_l1_loss.py
│ │ │ └── varifocal_loss.py
│ │ ├── ops.py
│ │ ├── post_process.py
│ │ ├── shape_spec.py
│ │ └── transformers
│ │ │ ├── __init__.py
│ │ │ ├── deformable_transformer.py
│ │ │ ├── detr_transformer.py
│ │ │ ├── dino_transformer.py
│ │ │ ├── ext_op
│ │ │ ├── README.md
│ │ │ ├── ms_deformable_attn_op.cc
│ │ │ ├── ms_deformable_attn_op.cu
│ │ │ ├── setup_ms_deformable_attn_op.py
│ │ │ └── test_ms_deformable_attn_op.py
│ │ │ ├── hybrid_encoder.py
│ │ │ ├── matchers.py
│ │ │ ├── position_encoding.py
│ │ │ ├── rtdetr_transformer.py
│ │ │ └── utils.py
│ ├── optimizer
│ │ ├── __init__.py
│ │ ├── ema.py
│ │ ├── optimizer.py
│ │ └── utils.py
│ ├── utils
│ │ ├── __init__.py
│ │ ├── cam_utils.py
│ │ ├── check.py
│ │ ├── checkpoint.py
│ │ ├── cli.py
│ │ ├── colormap.py
│ │ ├── download.py
│ │ ├── fuse_utils.py
│ │ ├── logger.py
│ │ ├── profiler.py
│ │ ├── stats.py
│ │ ├── visualizer.py
│ │ └── voc_utils.py
│ └── version.py
├── requirements.txt
└── tools
│ ├── eval.py
│ ├── export_model.py
│ ├── infer.py
│ ├── slice_image.py
│ ├── train.py
│ └── x2coco.py
├── rtdetr_pytorch
├── README.md
├── configs
│ ├── dataset
│ │ └── coco_detection.yml
│ ├── rtdetr
│ │ ├── include
│ │ │ ├── dataloader.yml
│ │ │ ├── dataloader_regnet.yml
│ │ │ ├── optimizer.yml
│ │ │ ├── optimizer_regnet.yml
│ │ │ ├── rtdetr_dla34.yml
│ │ │ ├── rtdetr_r50vd.yml
│ │ │ └── rtdetr_regnet.yml
│ │ ├── rtdetr_dla34_6x_coco.yml
│ │ ├── rtdetr_r101vd_6x_coco.yml
│ │ ├── rtdetr_r18vd_6x_coco.yml
│ │ ├── rtdetr_r34vd_6x_coco.yml
│ │ ├── rtdetr_r50vd_6x_coco.yml
│ │ ├── rtdetr_r50vd_m_6x_coco.yml
│ │ └── rtdetr_regnet_6x_coco.yml
│ └── runtime.yml
├── requirements.txt
├── src
│ ├── __init__.py
│ ├── core
│ │ ├── __init__.py
│ │ ├── config.py
│ │ ├── yaml_config.py
│ │ └── yaml_utils.py
│ ├── data
│ │ ├── __init__.py
│ │ ├── cifar10
│ │ │ └── __init__.py
│ │ ├── coco
│ │ │ ├── __init__.py
│ │ │ ├── coco_dataset.py
│ │ │ ├── coco_eval.py
│ │ │ └── coco_utils.py
│ │ ├── dataloader.py
│ │ ├── functional.py
│ │ └── transforms.py
│ ├── misc
│ │ ├── __init__.py
│ │ ├── dist.py
│ │ ├── logger.py
│ │ └── visualizer.py
│ ├── nn
│ │ ├── __init__.py
│ │ ├── arch
│ │ │ ├── __init__.py
│ │ │ └── classification.py
│ │ ├── backbone
│ │ │ ├── __init__.py
│ │ │ ├── common.py
│ │ │ ├── dla.py
│ │ │ ├── presnet.py
│ │ │ ├── regnet.py
│ │ │ ├── test_resnet.py
│ │ │ └── utils.py
│ │ └── criterion
│ │ │ ├── __init__.py
│ │ │ └── utils.py
│ ├── optim
│ │ ├── __init__.py
│ │ ├── amp.py
│ │ ├── ema.py
│ │ └── optim.py
│ ├── solver
│ │ ├── __init__.py
│ │ ├── det_engine.py
│ │ ├── det_solver.py
│ │ └── solver.py
│ └── zoo
│ │ ├── __init__.py
│ │ └── rtdetr
│ │ ├── __init__.py
│ │ ├── box_ops.py
│ │ ├── denoising.py
│ │ ├── hybrid_encoder.py
│ │ ├── matcher.py
│ │ ├── rtdetr.py
│ │ ├── rtdetr_criterion.py
│ │ ├── rtdetr_decoder.py
│ │ ├── rtdetr_postprocessor.py
│ │ └── utils.py
└── tools
│ ├── README.md
│ ├── export_onnx.py
│ ├── infer.py
│ └── train.py
├── rtdetrv2_paddle
└── readme.md
└── rtdetrv2_pytorch
├── Dockerfile
├── README.md
├── configs
├── dataset
│ ├── coco_detection.yml
│ └── voc_detection.yml
├── rtdetr
│ ├── include
│ │ ├── dataloader.yml
│ │ ├── optimizer.yml
│ │ └── rtdetr_r50vd.yml
│ ├── readme.md
│ ├── rtdetr_r101vd_6x_coco.yml
│ ├── rtdetr_r18vd_6x_coco.yml
│ ├── rtdetr_r34vd_6x_coco.yml
│ ├── rtdetr_r50vd_6x_coco.yml
│ └── rtdetr_r50vd_m_6x_coco.yml
├── rtdetrv2
│ ├── include
│ │ ├── dataloader.yml
│ │ ├── optimizer.yml
│ │ └── rtdetrv2_r50vd.yml
│ ├── rtdetrv2_hgnetv2_h_6x_coco.yml
│ ├── rtdetrv2_hgnetv2_l_6x_coco.yml
│ ├── rtdetrv2_hgnetv2_x_6x_coco.yml
│ ├── rtdetrv2_r101vd_6x_coco.yml
│ ├── rtdetrv2_r18vd_120e_coco.yml
│ ├── rtdetrv2_r18vd_120e_voc.yml
│ ├── rtdetrv2_r18vd_dsp_3x_coco.yml
│ ├── rtdetrv2_r18vd_sp1_120e_coco.yml
│ ├── rtdetrv2_r18vd_sp2_120e_coco.yml
│ ├── rtdetrv2_r18vd_sp3_120e_coco.yml
│ ├── rtdetrv2_r34vd_120e_coco.yml
│ ├── rtdetrv2_r34vd_dsp_1x_coco.yml
│ ├── rtdetrv2_r50vd_6x_coco.yml
│ ├── rtdetrv2_r50vd_dsp_1x_coco.yml
│ ├── rtdetrv2_r50vd_m_7x_coco.yml
│ └── rtdetrv2_r50vd_m_dsp_3x_coco.yml
└── runtime.yml
├── dataset
└── readme.md
├── docker-compose.yml
├── references
└── deploy
│ ├── readme.md
│ ├── rtdetrv2_onnxruntime.py
│ ├── rtdetrv2_openvino.py
│ ├── rtdetrv2_tensorrt.py
│ └── rtdetrv2_torch.py
├── requirements.txt
├── src
├── __init__.py
├── core
│ ├── __init__.py
│ ├── _config.py
│ ├── workspace.py
│ ├── yaml_config.py
│ └── yaml_utils.py
├── data
│ ├── __init__.py
│ ├── _misc.py
│ ├── dataloader.py
│ ├── dataset
│ │ ├── __init__.py
│ │ ├── _dataset.py
│ │ ├── cifar_dataset.py
│ │ ├── coco_dataset.py
│ │ ├── coco_eval.py
│ │ ├── coco_utils.py
│ │ ├── voc_detection.py
│ │ └── voc_eval.py
│ └── transforms
│ │ ├── __init__.py
│ │ ├── _transforms.py
│ │ ├── container.py
│ │ ├── functional.py
│ │ ├── mosaic.py
│ │ └── presets.py
├── misc
│ ├── __init__.py
│ ├── box_ops.py
│ ├── dist_utils.py
│ ├── lazy_loader.py
│ ├── logger.py
│ ├── profiler_utils.py
│ └── visualizer.py
├── nn
│ ├── __init__.py
│ ├── arch
│ │ ├── __init__.py
│ │ ├── classification.py
│ │ └── yolo.py
│ ├── backbone
│ │ ├── __init__.py
│ │ ├── common.py
│ │ ├── csp_darknet.py
│ │ ├── csp_resnet.py
│ │ ├── hgnetv2.py
│ │ ├── presnet.py
│ │ ├── test_resnet.py
│ │ ├── timm_model.py
│ │ ├── torchvision_model.py
│ │ └── utils.py
│ ├── criterion
│ │ ├── __init__.py
│ │ └── det_criterion.py
│ └── postprocessor
│ │ ├── __init__.py
│ │ ├── box_revert.py
│ │ ├── detr_postprocessor.py
│ │ └── nms_postprocessor.py
├── optim
│ ├── __init__.py
│ ├── amp.py
│ ├── ema.py
│ ├── optim.py
│ └── warmup.py
├── solver
│ ├── __init__.py
│ ├── _solver.py
│ ├── clas_engine.py
│ ├── clas_solver.py
│ ├── det_engine.py
│ └── det_solver.py
└── zoo
│ ├── __init__.py
│ └── rtdetr
│ ├── __init__.py
│ ├── box_ops.py
│ ├── conver_params.py
│ ├── denoising.py
│ ├── hybrid_encoder.py
│ ├── matcher.py
│ ├── rtdetr.py
│ ├── rtdetr_criterion.py
│ ├── rtdetr_decoder.py
│ ├── rtdetr_postprocessor.py
│ ├── rtdetrv2_criterion.py
│ ├── rtdetrv2_decoder.py
│ └── utils.py
└── tools
├── README.md
├── export_onnx.py
├── export_trt.py
├── run_profile.py
└── train.py
/.github/FUNDING.yml:
--------------------------------------------------------------------------------
1 | # These are supported funding model platforms
2 |
3 | github: # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2]
4 | patreon: # Replace with a single Patreon username
5 | open_collective: # Replace with a single Open Collective username
6 | ko_fi: # Replace with a single Ko-fi username
7 | tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel
8 | community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry
9 | liberapay: # Replace with a single Liberapay username
10 | issuehunt: # Replace with a single IssueHunt username
11 | otechie: # Replace with a single Otechie username
12 | lfx_crowdfunding: # Replace with a single LFX Crowdfunding project-name e.g., cloud-foundry
13 | custom: ['https://github.com/lyuwenyu/cvperception/assets/17582080/2b4bfcd5-5c0f-45fd-badf-3f6e5b0249ac']# Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2']
14 |
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Bug report
3 | about: Create a report to help us improve
4 | title: ''
5 | labels: ''
6 | assignees: lyuwenyu
7 |
8 | ---
9 |
10 | **Star RTDETR**
11 | 请先在RTDETR主页点击**star**以支持本项目
12 | Star RTDETR to help more people discover this project.
13 |
14 | ---
15 |
16 | **Describe the bug**
17 | A clear and concise description of what the bug is.
18 | If applicable, add screenshots to help explain your problem.
19 |
20 | **To Reproduce**
21 | Steps to reproduce the behavior.
22 |
--------------------------------------------------------------------------------
/benchmark/README.md:
--------------------------------------------------------------------------------
1 | # 论文测速使用的部分代码和工具
2 |
3 |
4 | ## 测试YOLO系列的速度 [in progress]
5 | 以[yolov8](https://github.com/ultralytics/ultralytics)为例
6 |
7 |
8 | 1. 转onnx
9 |
10 | 执行`yolov8_onnx.py`中的`export_onnx`函数,新增代码主要涉及输出格式的转换
11 |
12 |
13 |
14 |
15 | 2. 插入nms
16 |
17 | 使用`utils.py`中的`yolo_insert_nms`函数,导出onnx模型后使用[Netron](https://netron.app/)查看结构.
18 |
19 |
20 |
21 |
22 | 3. 转tensorrt
23 |
24 | 可以使用`trtexec.md`中的的脚本转换,或者使用`utils.py`中的Python代码转换
25 | ```bash
26 | # trtexec -h
27 | trtexec --onnx=./yolov8l_w_nms.onnx --saveEngine=yolov8l_w_nms.engine --buildOnly --fp16
28 | ```
29 |
30 |
31 |
32 |
33 | 4. trtexec测速
34 |
35 | 可以使用`trtexec.md`中的的脚本转换,去掉`--buildOnly`参数
36 |
37 |
38 |
39 |
40 |
41 |
42 | 5. profile分析(可选)
43 |
44 | 在4的基础之上加以下命令
45 | ```bash
46 | nsys profile --force-overwrite=true -t 'nvtx,cuda,osrt,cudnn' -c cudaProfilerApi -o yolov8l_w_nms
47 | ```
48 | 可以使用nsys可视化分析
49 |
50 |
51 |
52 |
53 |
54 |
55 | 6. Python测速或者部署
56 |
57 | 在Coco val数据集上测模型的平均速度使用`trtinfer.py`中的代码推理
58 |
59 |
60 |
--------------------------------------------------------------------------------
/benchmark/trtexec.md:
--------------------------------------------------------------------------------
1 |
2 | ```bash
3 | # build tensorrt engine
4 | trtexec --onnx=./yolov8l_w_nms.onnx --saveEngine=yolov8l_w_nms.engine --buildOnly --fp16
5 |
6 | # using dynamic shapes
7 | # --explicitBatch --minShapes=image:1x3x640x640 --optShapes=image:8x3x640x640 --maxShapes=image:16x3x640x640 --shapes=image:8x3x640x640
8 |
9 | # timeline
10 | nsys profile --force-overwrite=true -t 'nvtx,cuda,osrt,cudnn' -c cudaProfilerApi -o yolov8l_w_nms trtexec --loadEngine=./yolov8l_w_nms.engine --fp16 --avgRuns=10 --loadInputs='image:input_tensor.bin'
11 |
12 | # https://forums.developer.nvidia.com/t/about-loadinputs-in-trtexec/218880
13 | ```
14 |
--------------------------------------------------------------------------------
/benchmark/utils.py:
--------------------------------------------------------------------------------
1 | '''by lyuwenyu
2 | '''
3 |
4 | import time
5 | import contextlib
6 | import numpy as np
7 | from PIL import Image
8 | from collections import OrderedDict
9 |
10 | import onnx
11 | import torch
12 | import onnx_graphsurgeon
13 |
14 |
15 | def to_binary_data(path, size=(640, 640), output_name='input_tensor.bin'):
16 | '''--loadInputs='image:input_tensor.bin'
17 | '''
18 | im = Image.open(path).resize(size)
19 | data = np.asarray(im, dtype=np.float32).transpose(2, 0, 1)[None] / 255.
20 | data.tofile(output_name)
21 |
22 |
23 | def yolo_insert_nms(path, score_threshold=0.01, iou_threshold=0.7, max_output_boxes=300, simplify=False):
24 | '''
25 | http://www.xavierdupre.fr/app/onnxcustom/helpsphinx/api/onnxops/onnx__EfficientNMS_TRT.html
26 | https://huggingface.co/spaces/muttalib1326/Punjabi_Character_Detection/blob/3dd1e17054c64e5f6b2254278f96cfa2bf418cd4/utils/add_nms.py
27 | '''
28 | onnx_model = onnx.load(path)
29 |
30 | if simplify:
31 | from onnxsim import simplify
32 | onnx_model, _ = simplify(onnx_model, overwrite_input_shapes={'image': [1, 3, 640, 640]})
33 |
34 | graph = onnx_graphsurgeon.import_onnx(onnx_model)
35 | graph.toposort()
36 | graph.fold_constants()
37 | graph.cleanup()
38 |
39 | topk = max_output_boxes
40 | attrs = OrderedDict(plugin_version='1',
41 | background_class=-1,
42 | max_output_boxes=topk,
43 | score_threshold=score_threshold,
44 | iou_threshold=iou_threshold,
45 | score_activation=False,
46 | box_coding=0, )
47 |
48 | outputs = [onnx_graphsurgeon.Variable('num_dets', np.int32, [-1, 1]),
49 | onnx_graphsurgeon.Variable('det_boxes', np.float32, [-1, topk, 4]),
50 | onnx_graphsurgeon.Variable('det_scores', np.float32, [-1, topk]),
51 | onnx_graphsurgeon.Variable('det_classes', np.int32, [-1, topk])]
52 |
53 | graph.layer(op='EfficientNMS_TRT',
54 | name="batched_nms",
55 | inputs=[graph.outputs[0],
56 | graph.outputs[1]],
57 | outputs=outputs,
58 | attrs=attrs, )
59 |
60 | graph.outputs = outputs
61 | graph.cleanup().toposort()
62 |
63 | onnx.save(onnx_graphsurgeon.export_onnx(graph), f'yolo_w_nms.onnx')
64 |
65 |
66 | class TimeProfiler(contextlib.ContextDecorator):
67 | def __init__(self, ):
68 | self.total = 0
69 |
70 | def __enter__(self, ):
71 | self.start = self.time()
72 | return self
73 |
74 | def __exit__(self, type, value, traceback):
75 | self.total += self.time() - self.start
76 |
77 | def reset(self, ):
78 | self.total = 0
79 |
80 | def time(self, ):
81 | if torch.cuda.is_available():
82 | torch.cuda.synchronize()
83 | return time.time()
84 |
--------------------------------------------------------------------------------
/benchmark/yolov8_onnx.py:
--------------------------------------------------------------------------------
1 | '''by lyuwenyu
2 | '''
3 |
4 | import torch
5 | import torchvision
6 |
7 | import numpy as np
8 | import onnxruntime as ort
9 |
10 | from utils import yolo_insert_nms
11 |
12 | class YOLOv8(torch.nn.Module):
13 | def __init__(self, name) -> None:
14 | super().__init__()
15 | from ultralytics import YOLO
16 | # Load a model
17 | # build a new model from scratch
18 | # model = YOLO(f'{name}.yaml')
19 |
20 | # load a pretrained model (recommended for training)
21 | model = YOLO(f'{name}.pt')
22 | self.model = model.model
23 |
24 | def forward(self, x):
25 | '''https://github.com/ultralytics/ultralytics/blob/main/ultralytics/nn/tasks.py#L216
26 | '''
27 | pred: torch.Tensor = self.model(x)[0] # n 84 8400,
28 | pred = pred.permute(0, 2, 1)
29 | boxes, scores = pred.split([4, 80], dim=-1)
30 | boxes = torchvision.ops.box_convert(boxes, in_fmt='cxcywh', out_fmt='xyxy')
31 |
32 | return boxes, scores
33 |
34 |
35 |
36 | def export_onnx(name='yolov8n'):
37 | '''export onnx
38 | '''
39 | m = YOLOv8(name)
40 |
41 | x = torch.rand(1, 3, 640, 640)
42 | dynamic_axes = {
43 | 'image': {0: '-1'}
44 | }
45 | torch.onnx.export(m, x, f'{name}.onnx',
46 | input_names=['image'],
47 | output_names=['boxes', 'scores'],
48 | opset_version=13,
49 | dynamic_axes=dynamic_axes)
50 |
51 | data = np.random.rand(1, 3, 640, 640).astype(np.float32)
52 | sess = ort.InferenceSession(f'{name}.onnx')
53 | _ = sess.run(output_names=None, input_feed={'image': data})
54 |
55 |
56 | if __name__ == '__main__':
57 |
58 | import argparse
59 | parser = argparse.ArgumentParser()
60 | parser.add_argument('--name', type=str, default='yolov8l')
61 | parser.add_argument('--score_threshold', type=float, default=0.001)
62 | parser.add_argument('--iou_threshold', type=float, default=0.7)
63 | parser.add_argument('--max_output_boxes', type=int, default=300)
64 | args = parser.parse_args()
65 |
66 | export_onnx(name=args.name)
67 |
68 | yolo_insert_nms(path=f'{args.name}.onnx',
69 | score_threshold=args.score_threshold,
70 | iou_threshold=args.iou_threshold,
71 | max_output_boxes=args.max_output_boxes, )
72 |
73 |
--------------------------------------------------------------------------------
/rtdetr_paddle/configs/datasets/coco_detection.yml:
--------------------------------------------------------------------------------
1 | metric: COCO
2 | num_classes: 80
3 |
4 | TrainDataset:
5 | name: COCODataSet
6 | image_dir: train2017
7 | anno_path: annotations/instances_train2017.json
8 | dataset_dir: dataset/coco
9 | data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd']
10 |
11 | EvalDataset:
12 | name: COCODataSet
13 | image_dir: val2017
14 | anno_path: annotations/instances_val2017.json
15 | dataset_dir: dataset/coco
16 | allow_empty: true
17 |
18 | TestDataset:
19 | name: ImageFolder
20 | anno_path: annotations/instances_val2017.json # also support txt (like VOC's label_list.txt)
21 | dataset_dir: dataset/coco # if set, anno_path will be 'dataset_dir/anno_path'
22 |
--------------------------------------------------------------------------------
/rtdetr_paddle/configs/datasets/voc.yml:
--------------------------------------------------------------------------------
1 | metric: VOC
2 | map_type: 11point
3 | num_classes: 20
4 |
5 | TrainDataset:
6 | name: VOCDataSet
7 | dataset_dir: dataset/voc
8 | anno_path: trainval.txt
9 | label_list: label_list.txt
10 | data_fields: ['image', 'gt_bbox', 'gt_class', 'difficult']
11 |
12 | EvalDataset:
13 | name: VOCDataSet
14 | dataset_dir: dataset/voc
15 | anno_path: test.txt
16 | label_list: label_list.txt
17 | data_fields: ['image', 'gt_bbox', 'gt_class', 'difficult']
18 |
19 | TestDataset:
20 | name: ImageFolder
21 | anno_path: dataset/voc/label_list.txt
22 |
--------------------------------------------------------------------------------
/rtdetr_paddle/configs/rtdetr/_base_/optimizer_6x.yml:
--------------------------------------------------------------------------------
1 | epoch: 72
2 |
3 | LearningRate:
4 | base_lr: 0.0001
5 | schedulers:
6 | - !PiecewiseDecay
7 | gamma: 1.0
8 | milestones: [100]
9 | use_warmup: true
10 | - !LinearWarmup
11 | start_factor: 0.001
12 | steps: 2000
13 |
14 | OptimizerBuilder:
15 | clip_grad_by_norm: 0.1
16 | regularizer: false
17 | optimizer:
18 | type: AdamW
19 | weight_decay: 0.0001
20 |
--------------------------------------------------------------------------------
/rtdetr_paddle/configs/rtdetr/_base_/rtdetr_r50vd.yml:
--------------------------------------------------------------------------------
1 | architecture: DETR
2 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_ssld_v2_pretrained.pdparams
3 | norm_type: sync_bn
4 | use_ema: True
5 | ema_decay: 0.9999
6 | ema_decay_type: "exponential"
7 | ema_filter_no_grad: True
8 | hidden_dim: 256
9 | use_focal_loss: True
10 | eval_size: [640, 640] # h, w
11 |
12 |
13 | DETR:
14 | backbone: ResNet
15 | neck: HybridEncoder
16 | transformer: RTDETRTransformer
17 | detr_head: DINOHead
18 | post_process: DETRPostProcess
19 |
20 | ResNet:
21 | # index 0 stands for res2
22 | depth: 50
23 | variant: d
24 | norm_type: bn
25 | freeze_at: 0
26 | return_idx: [1, 2, 3]
27 | lr_mult_list: [0.1, 0.1, 0.1, 0.1]
28 | num_stages: 4
29 | freeze_stem_only: True
30 |
31 | HybridEncoder:
32 | hidden_dim: 256
33 | use_encoder_idx: [2]
34 | num_encoder_layers: 1
35 | encoder_layer:
36 | name: TransformerLayer
37 | d_model: 256
38 | nhead: 8
39 | dim_feedforward: 1024
40 | dropout: 0.
41 | activation: 'gelu'
42 | expansion: 1.0
43 |
44 |
45 | RTDETRTransformer:
46 | num_queries: 300
47 | position_embed_type: sine
48 | feat_strides: [8, 16, 32]
49 | num_levels: 3
50 | nhead: 8
51 | num_decoder_layers: 6
52 | dim_feedforward: 1024
53 | dropout: 0.0
54 | activation: relu
55 | num_denoising: 100
56 | label_noise_ratio: 0.5
57 | box_noise_scale: 1.0
58 | learnt_init_query: False
59 |
60 | DINOHead:
61 | loss:
62 | name: DINOLoss
63 | loss_coeff: {class: 1, bbox: 5, giou: 2}
64 | aux_loss: True
65 | use_vfl: True
66 | matcher:
67 | name: HungarianMatcher
68 | matcher_coeff: {class: 2, bbox: 5, giou: 2}
69 |
70 | DETRPostProcess:
71 | num_top_queries: 300
72 |
--------------------------------------------------------------------------------
/rtdetr_paddle/configs/rtdetr/_base_/rtdetr_reader.yml:
--------------------------------------------------------------------------------
1 | worker_num: 4
2 | TrainReader:
3 | sample_transforms:
4 | - Decode: {}
5 | - RandomDistort: {prob: 0.8}
6 | - RandomExpand: {fill_value: [123.675, 116.28, 103.53]}
7 | - RandomCrop: {prob: 0.8}
8 | - RandomFlip: {}
9 | batch_transforms:
10 | - BatchRandomResize: {target_size: [480, 512, 544, 576, 608, 640, 640, 640, 672, 704, 736, 768, 800], random_size: True, random_interp: True, keep_ratio: False}
11 | - NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none}
12 | - NormalizeBox: {}
13 | - BboxXYXY2XYWH: {}
14 | - Permute: {}
15 | batch_size: 4
16 | shuffle: true
17 | drop_last: true
18 | collate_batch: false
19 | use_shared_memory: false
20 |
21 |
22 | EvalReader:
23 | sample_transforms:
24 | - Decode: {}
25 | - Resize: {target_size: [640, 640], keep_ratio: False, interp: 2} # target_size: (h, w)
26 | - NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none}
27 | - Permute: {}
28 | batch_size: 4
29 | shuffle: false
30 | drop_last: false
31 |
32 |
33 | TestReader:
34 | inputs_def:
35 | image_shape: [3, 640, 640]
36 | sample_transforms:
37 | - Decode: {}
38 | - Resize: {target_size: [640, 640], keep_ratio: False, interp: 2}
39 | - NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none}
40 | - Permute: {}
41 | batch_size: 1
42 | shuffle: false
43 | drop_last: false
44 |
--------------------------------------------------------------------------------
/rtdetr_paddle/configs/rtdetr/rtdetr_hgnetv2_l_6x_coco.yml:
--------------------------------------------------------------------------------
1 | _BASE_: [
2 | '../datasets/coco_detection.yml',
3 | '../runtime.yml',
4 | '_base_/optimizer_6x.yml',
5 | '_base_/rtdetr_r50vd.yml',
6 | '_base_/rtdetr_reader.yml',
7 | ]
8 |
9 | weights: output/rtdetr_hgnetv2_l_6x_coco/model_final
10 | pretrain_weights: https://bj.bcebos.com/v1/paddledet/models/pretrained/PPHGNetV2_L_ssld_pretrained.pdparams
11 | find_unused_parameters: True
12 | log_iter: 200
13 |
14 |
15 | DETR:
16 | backbone: PPHGNetV2
17 |
18 | PPHGNetV2:
19 | arch: 'L'
20 | return_idx: [1, 2, 3]
21 | freeze_stem_only: True
22 | freeze_at: 0
23 | freeze_norm: True
24 | lr_mult_list: [0., 0.05, 0.05, 0.05, 0.05]
25 |
--------------------------------------------------------------------------------
/rtdetr_paddle/configs/rtdetr/rtdetr_hgnetv2_x_6x_coco.yml:
--------------------------------------------------------------------------------
1 | _BASE_: [
2 | '../datasets/coco_detection.yml',
3 | '../runtime.yml',
4 | '_base_/optimizer_6x.yml',
5 | '_base_/rtdetr_r50vd.yml',
6 | '_base_/rtdetr_reader.yml',
7 | ]
8 |
9 | weights: output/rtdetr_hgnetv2_l_6x_coco/model_final
10 | pretrain_weights: https://bj.bcebos.com/v1/paddledet/models/pretrained/PPHGNetV2_X_ssld_pretrained.pdparams
11 | find_unused_parameters: True
12 | log_iter: 200
13 |
14 |
15 |
16 | DETR:
17 | backbone: PPHGNetV2
18 |
19 |
20 | PPHGNetV2:
21 | arch: 'X'
22 | return_idx: [1, 2, 3]
23 | freeze_stem_only: True
24 | freeze_at: 0
25 | freeze_norm: True
26 | lr_mult_list: [0., 0.01, 0.01, 0.01, 0.01]
27 |
28 |
29 | HybridEncoder:
30 | hidden_dim: 384
31 | use_encoder_idx: [2]
32 | num_encoder_layers: 1
33 | encoder_layer:
34 | name: TransformerLayer
35 | d_model: 384
36 | nhead: 8
37 | dim_feedforward: 2048
38 | dropout: 0.
39 | activation: 'gelu'
40 | expansion: 1.0
41 |
--------------------------------------------------------------------------------
/rtdetr_paddle/configs/rtdetr/rtdetr_r101vd_6x_coco.yml:
--------------------------------------------------------------------------------
1 | _BASE_: [
2 | '../datasets/coco_detection.yml',
3 | '../runtime.yml',
4 | '_base_/optimizer_6x.yml',
5 | '_base_/rtdetr_r50vd.yml',
6 | '_base_/rtdetr_reader.yml',
7 | ]
8 |
9 | weights: output/rtdetr_r101vd_6x_coco/model_final
10 | find_unused_parameters: True
11 | log_iter: 200
12 |
13 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet101_vd_ssld_pretrained.pdparams
14 |
15 | ResNet:
16 | # index 0 stands for res2
17 | depth: 101
18 | variant: d
19 | norm_type: bn
20 | freeze_at: 0
21 | return_idx: [1, 2, 3]
22 | lr_mult_list: [0.01, 0.01, 0.01, 0.01]
23 | num_stages: 4
24 | freeze_stem_only: True
25 |
26 | HybridEncoder:
27 | hidden_dim: 384
28 | use_encoder_idx: [2]
29 | num_encoder_layers: 1
30 | encoder_layer:
31 | name: TransformerLayer
32 | d_model: 384
33 | nhead: 8
34 | dim_feedforward: 2048
35 | dropout: 0.
36 | activation: 'gelu'
37 | expansion: 1.0
38 |
--------------------------------------------------------------------------------
/rtdetr_paddle/configs/rtdetr/rtdetr_r18vd_6x_coco.yml:
--------------------------------------------------------------------------------
1 | _BASE_: [
2 | '../datasets/coco_detection.yml',
3 | '../runtime.yml',
4 | '_base_/optimizer_6x.yml',
5 | '_base_/rtdetr_r50vd.yml',
6 | '_base_/rtdetr_reader.yml',
7 | ]
8 |
9 | weights: output/rtdetr_r18_6x_coco/model_final
10 | find_unused_parameters: True
11 | log_iter: 200
12 |
13 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet18_vd_pretrained.pdparams
14 | ResNet:
15 | depth: 18
16 | variant: d
17 | return_idx: [1, 2, 3]
18 | freeze_at: -1
19 | freeze_norm: false
20 | norm_decay: 0.
21 |
22 | HybridEncoder:
23 | hidden_dim: 256
24 | use_encoder_idx: [2]
25 | num_encoder_layers: 1
26 | encoder_layer:
27 | name: TransformerLayer
28 | d_model: 256
29 | nhead: 8
30 | dim_feedforward: 1024
31 | dropout: 0.
32 | activation: 'gelu'
33 | expansion: 0.5
34 | depth_mult: 1.0
35 |
36 | RTDETRTransformer:
37 | eval_idx: -1
38 | num_decoder_layers: 3
39 |
--------------------------------------------------------------------------------
/rtdetr_paddle/configs/rtdetr/rtdetr_r34vd_6x_coco.yml:
--------------------------------------------------------------------------------
1 | _BASE_: [
2 | '../datasets/coco_detection.yml',
3 | '../runtime.yml',
4 | '_base_/optimizer_6x.yml',
5 | '_base_/rtdetr_r50vd.yml',
6 | '_base_/rtdetr_reader.yml',
7 | ]
8 |
9 | weights: output/rtdetr_r34vd_6x_coco/model_final
10 | find_unused_parameters: True
11 | log_iter: 200
12 |
13 | pretrain_weights: https://bj.bcebos.com/v1/paddledet/models/pretrained/ResNet34_vd_pretrained.pdparams
14 | ResNet:
15 | depth: 34
16 | variant: d
17 | return_idx: [1, 2, 3]
18 | freeze_at: -1
19 | freeze_norm: false
20 | norm_decay: 0.
21 |
22 | HybridEncoder:
23 | hidden_dim: 256
24 | use_encoder_idx: [2]
25 | num_encoder_layers: 1
26 | encoder_layer:
27 | name: TransformerLayer
28 | d_model: 256
29 | nhead: 8
30 | dim_feedforward: 1024
31 | dropout: 0.
32 | activation: 'gelu'
33 | expansion: 0.5
34 | depth_mult: 1.0
35 |
36 | RTDETRTransformer:
37 | eval_idx: -1
38 | num_decoder_layers: 4
39 |
--------------------------------------------------------------------------------
/rtdetr_paddle/configs/rtdetr/rtdetr_r50vd_6x_coco.yml:
--------------------------------------------------------------------------------
1 | _BASE_: [
2 | '../datasets/coco_detection.yml',
3 | '../runtime.yml',
4 | '_base_/optimizer_6x.yml',
5 | '_base_/rtdetr_r50vd.yml',
6 | '_base_/rtdetr_reader.yml',
7 | ]
8 |
9 | weights: output/rtdetr_r50vd_6x_coco/model_final
10 | find_unused_parameters: True
11 | log_iter: 200
12 |
--------------------------------------------------------------------------------
/rtdetr_paddle/configs/rtdetr/rtdetr_r50vd_m_6x_coco.yml:
--------------------------------------------------------------------------------
1 | _BASE_: [
2 | '../datasets/coco_detection.yml',
3 | '../runtime.yml',
4 | '_base_/optimizer_6x.yml',
5 | '_base_/rtdetr_r50vd.yml',
6 | '_base_/rtdetr_reader.yml',
7 | ]
8 |
9 | weights: output/rtdetr_r50vd_m_6x_coco/model_final
10 | find_unused_parameters: True
11 | log_iter: 200
12 |
13 | HybridEncoder:
14 | hidden_dim: 256
15 | use_encoder_idx: [2]
16 | num_encoder_layers: 1
17 | encoder_layer:
18 | name: TransformerLayer
19 | d_model: 256
20 | nhead: 8
21 | dim_feedforward: 1024
22 | dropout: 0.
23 | activation: 'gelu'
24 | expansion: 0.5
25 | depth_mult: 1.0
26 |
27 | RTDETRTransformer:
28 | eval_idx: 2 # use 3th decoder layer to eval
29 |
--------------------------------------------------------------------------------
/rtdetr_paddle/configs/runtime.yml:
--------------------------------------------------------------------------------
1 | use_gpu: true
2 | use_xpu: false
3 | use_mlu: false
4 | use_npu: false
5 | log_iter: 20
6 | save_dir: output
7 | snapshot_epoch: 1
8 | print_flops: false
9 | print_params: false
10 |
11 | # Exporting the model
12 | export:
13 | post_process: True # Whether post-processing is included in the network when export model.
14 | nms: True # Whether NMS is included in the network when export model.
15 | benchmark: False # It is used to testing model performance, if set `True`, post-process and NMS will not be exported.
16 | fuse_conv_bn: False
17 |
--------------------------------------------------------------------------------
/rtdetr_paddle/dataset/coco/download_coco.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | import sys
16 | import os.path as osp
17 | import logging
18 | # add python path of PaddleDetection to sys.path
19 | parent_path = osp.abspath(osp.join(__file__, *(['..'] * 3)))
20 | if parent_path not in sys.path:
21 | sys.path.append(parent_path)
22 |
23 | from ppdet.utils.download import download_dataset
24 |
25 | logging.basicConfig(level=logging.INFO)
26 |
27 | download_path = osp.split(osp.realpath(sys.argv[0]))[0]
28 | download_dataset(download_path, 'coco')
29 |
--------------------------------------------------------------------------------
/rtdetr_paddle/dataset/voc/create_list.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | import sys
16 | import os.path as osp
17 | import logging
18 | # add python path of PaddleDetection to sys.path
19 | parent_path = osp.abspath(osp.join(__file__, *(['..'] * 3)))
20 | if parent_path not in sys.path:
21 | sys.path.append(parent_path)
22 |
23 | from ppdet.utils.download import create_voc_list
24 |
25 | logging.basicConfig(level=logging.INFO)
26 |
27 | voc_path = osp.split(osp.realpath(sys.argv[0]))[0]
28 | create_voc_list(voc_path)
29 |
--------------------------------------------------------------------------------
/rtdetr_paddle/dataset/voc/download_voc.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | import sys
16 | import os.path as osp
17 | import logging
18 | # add python path of PaddleDetection to sys.path
19 | parent_path = osp.abspath(osp.join(__file__, *(['..'] * 3)))
20 | if parent_path not in sys.path:
21 | sys.path.append(parent_path)
22 |
23 | from ppdet.utils.download import download_dataset
24 |
25 | logging.basicConfig(level=logging.INFO)
26 |
27 | download_path = osp.split(osp.realpath(sys.argv[0]))[0]
28 | download_dataset(download_path, 'voc')
29 |
--------------------------------------------------------------------------------
/rtdetr_paddle/dataset/voc/label_list.txt:
--------------------------------------------------------------------------------
1 | aeroplane
2 | bicycle
3 | bird
4 | boat
5 | bottle
6 | bus
7 | car
8 | cat
9 | chair
10 | cow
11 | diningtable
12 | dog
13 | horse
14 | motorbike
15 | person
16 | pottedplant
17 | sheep
18 | sofa
19 | train
20 | tvmonitor
21 |
--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | from . import (core, data, engine, modeling, optimizer, metrics, utils)
16 |
17 |
18 | try:
19 | from .version import full_version as __version__
20 | from .version import commit as __git_commit__
21 | except ImportError:
22 | import sys
23 | sys.stderr.write("Warning: import ppdet from source directory " \
24 | "without installing, run 'python setup.py install' to " \
25 | "install ppdet firstly\n")
26 |
--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/core/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | from . import config
16 |
--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/core/config/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/data/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | from . import source
16 | from . import transform
17 | from . import reader
18 |
19 | from .source import *
20 | from .transform import *
21 | from .reader import *
22 |
--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/data/shm_utils.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | import os
16 |
17 | SIZE_UNIT = ['K', 'M', 'G', 'T']
18 | SHM_QUERY_CMD = 'df -h'
19 | SHM_KEY = 'shm'
20 | SHM_DEFAULT_MOUNT = '/dev/shm'
21 |
22 | # [ shared memory size check ]
23 | # In detection models, image/target data occupies a lot of memory, and
24 | # will occupy lots of shared memory in multi-process DataLoader, we use
25 | # following code to get shared memory size and perform a size check to
26 | # disable shared memory use if shared memory size is not enough.
27 | # Shared memory getting process as follows:
28 | # 1. use `df -h` get all mount info
29 | # 2. pick up spaces whose mount info contains 'shm'
30 | # 3. if 'shm' space number is only 1, return its size
31 | # 4. if there are multiple 'shm' space, try to find the default mount
32 | # directory '/dev/shm' is Linux-like system, otherwise return the
33 | # biggest space size.
34 |
35 |
36 | def _parse_size_in_M(size_str):
37 | if size_str[-1] == 'B':
38 | num, unit = size_str[:-2], size_str[-2]
39 | else:
40 | num, unit = size_str[:-1], size_str[-1]
41 | assert unit in SIZE_UNIT, \
42 | "unknown shm size unit {}".format(unit)
43 | return float(num) * \
44 | (1024 ** (SIZE_UNIT.index(unit) - 1))
45 |
46 |
47 | def _get_shared_memory_size_in_M():
48 | try:
49 | df_infos = os.popen(SHM_QUERY_CMD).readlines()
50 | except:
51 | return None
52 | else:
53 | shm_infos = []
54 | for df_info in df_infos:
55 | info = df_info.strip()
56 | if info.find(SHM_KEY) >= 0:
57 | shm_infos.append(info.split())
58 |
59 | if len(shm_infos) == 0:
60 | return None
61 | elif len(shm_infos) == 1:
62 | return _parse_size_in_M(shm_infos[0][3])
63 | else:
64 | default_mount_infos = [
65 | si for si in shm_infos if si[-1] == SHM_DEFAULT_MOUNT
66 | ]
67 | if default_mount_infos:
68 | return _parse_size_in_M(default_mount_infos[0][3])
69 | else:
70 | return max([_parse_size_in_M(si[3]) for si in shm_infos])
71 |
--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/data/source/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | from .coco import *
16 | from .voc import *
17 | from .category import *
18 | from .dataset import ImageFolder
19 |
--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/data/transform/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | from . import operators
16 | from . import batch_operators
17 |
18 |
19 | from .operators import *
20 | from .batch_operators import *
21 |
22 |
23 | __all__ = []
24 | __all__ += registered_ops
25 |
26 |
--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/data/utils.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | import numbers
16 | import numpy as np
17 |
18 | try:
19 | from collections.abc import Sequence, Mapping
20 | except:
21 | from collections import Sequence, Mapping
22 |
23 |
24 | def default_collate_fn(batch):
25 | """
26 | Default batch collating function for :code:`paddle.io.DataLoader`,
27 | get input data as a list of sample datas, each element in list
28 | if the data of a sample, and sample data should composed of list,
29 | dictionary, string, number, numpy array, this
30 | function will parse input data recursively and stack number,
31 | numpy array and paddle.Tensor datas as batch datas. e.g. for
32 | following input data:
33 | [{'image': np.array(shape=[3, 224, 224]), 'label': 1},
34 | {'image': np.array(shape=[3, 224, 224]), 'label': 3},
35 | {'image': np.array(shape=[3, 224, 224]), 'label': 4},
36 | {'image': np.array(shape=[3, 224, 224]), 'label': 5},]
37 |
38 |
39 | This default collate function zipped each number and numpy array
40 | field together and stack each field as the batch field as follows:
41 | {'image': np.array(shape=[4, 3, 224, 224]), 'label': np.array([1, 3, 4, 5])}
42 | Args:
43 | batch(list of sample data): batch should be a list of sample data.
44 |
45 | Returns:
46 | Batched data: batched each number, numpy array and paddle.Tensor
47 | in input data.
48 | """
49 | sample = batch[0]
50 | if isinstance(sample, np.ndarray):
51 | batch = np.stack(batch, axis=0)
52 | return batch
53 | elif isinstance(sample, numbers.Number):
54 | batch = np.array(batch)
55 | return batch
56 | elif isinstance(sample, (str, bytes)):
57 | return batch
58 | elif isinstance(sample, Mapping):
59 | return {
60 | key: default_collate_fn([d[key] for d in batch])
61 | for key in sample
62 | }
63 | elif isinstance(sample, Sequence):
64 | sample_fields_num = len(sample)
65 | if not all(len(sample) == sample_fields_num for sample in iter(batch)):
66 | raise RuntimeError(
67 | "fileds number not same among samples in a batch")
68 | return [default_collate_fn(fields) for fields in zip(*batch)]
69 |
70 | raise TypeError("batch data con only contains: tensor, numpy.ndarray, "
71 | "dict, list, number, but got {}".format(type(sample)))
72 |
--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/engine/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | from . import trainer
16 | from .trainer import *
17 |
18 | from . import callbacks
19 | from .callbacks import *
20 |
21 | from . import env
22 | from .env import *
23 |
24 | __all__ = trainer.__all__ \
25 | + callbacks.__all__ \
26 | + env.__all__
27 |
--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/engine/env.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | from __future__ import absolute_import
16 | from __future__ import division
17 | from __future__ import print_function
18 |
19 | import os
20 | import random
21 | import numpy as np
22 |
23 | import paddle
24 | from paddle.distributed import fleet
25 |
26 | __all__ = ['init_parallel_env', 'set_random_seed', 'init_fleet_env']
27 |
28 |
29 | def init_fleet_env(find_unused_parameters=False):
30 | strategy = fleet.DistributedStrategy()
31 | strategy.find_unused_parameters = find_unused_parameters
32 | fleet.init(is_collective=True, strategy=strategy)
33 |
34 |
35 | def init_parallel_env():
36 | env = os.environ
37 | dist = 'PADDLE_TRAINER_ID' in env and 'PADDLE_TRAINERS_NUM' in env
38 | if dist:
39 | trainer_id = int(env['PADDLE_TRAINER_ID'])
40 | local_seed = (99 + trainer_id)
41 | random.seed(local_seed)
42 | np.random.seed(local_seed)
43 |
44 | paddle.distributed.init_parallel_env()
45 |
46 |
47 | def set_random_seed(seed):
48 | paddle.seed(seed)
49 | random.seed(seed)
50 | np.random.seed(seed)
51 |
--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/metrics/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | from . import metrics
16 |
17 | from .metrics import *
18 | from .pose3d_metrics import *
19 |
20 | from . import mot_metrics
21 | from .mot_metrics import *
22 | __all__ = metrics.__all__ + mot_metrics.__all__
23 |
24 | from . import mcmot_metrics
25 | from .mcmot_metrics import *
26 | __all__ = metrics.__all__ + mcmot_metrics.__all__
--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/modeling/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | import warnings
16 | warnings.filterwarnings(
17 | action='ignore', category=DeprecationWarning, module='ops')
18 |
19 |
20 | from .ops import *
21 | from .backbones import *
22 | from .heads import *
23 | from .losses import *
24 | from .architectures import *
25 | from .post_process import *
26 | from .layers import *
27 | from .transformers import *
28 |
--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/modeling/architectures/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | from .meta_arch import *
16 | from .detr import *
17 |
--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/modeling/backbones/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | from .resnet import *
16 | from .darknet import *
17 | from .mobilenet_v1 import *
18 | from .mobilenet_v3 import *
19 | from .shufflenet_v2 import *
20 | from .swin_transformer import *
21 | from .lcnet import *
22 | from .cspresnet import *
23 | from .csp_darknet import *
24 | from .convnext import *
25 | from .vision_transformer import *
26 | from .mobileone import *
27 | from .trans_encoder import *
28 | from .focalnet import *
29 | from .vit_mae import *
30 | from .hgnet_v2 import *
31 |
--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/modeling/backbones/name_adapter.py:
--------------------------------------------------------------------------------
1 | class NameAdapter(object):
2 | """Fix the backbones variable names for pretrained weight"""
3 |
4 | def __init__(self, model):
5 | super(NameAdapter, self).__init__()
6 | self.model = model
7 |
8 | @property
9 | def model_type(self):
10 | return getattr(self.model, '_model_type', '')
11 |
12 | @property
13 | def variant(self):
14 | return getattr(self.model, 'variant', '')
15 |
16 | def fix_conv_norm_name(self, name):
17 | if name == "conv1":
18 | bn_name = "bn_" + name
19 | else:
20 | bn_name = "bn" + name[3:]
21 | # the naming rule is same as pretrained weight
22 | if self.model_type == 'SEResNeXt':
23 | bn_name = name + "_bn"
24 | return bn_name
25 |
26 | def fix_shortcut_name(self, name):
27 | if self.model_type == 'SEResNeXt':
28 | name = 'conv' + name + '_prj'
29 | return name
30 |
31 | def fix_bottleneck_name(self, name):
32 | if self.model_type == 'SEResNeXt':
33 | conv_name1 = 'conv' + name + '_x1'
34 | conv_name2 = 'conv' + name + '_x2'
35 | conv_name3 = 'conv' + name + '_x3'
36 | shortcut_name = name
37 | else:
38 | conv_name1 = name + "_branch2a"
39 | conv_name2 = name + "_branch2b"
40 | conv_name3 = name + "_branch2c"
41 | shortcut_name = name + "_branch1"
42 | return conv_name1, conv_name2, conv_name3, shortcut_name
43 |
44 | def fix_basicblock_name(self, name):
45 | if self.model_type == 'SEResNeXt':
46 | conv_name1 = 'conv' + name + '_x1'
47 | conv_name2 = 'conv' + name + '_x2'
48 | shortcut_name = name
49 | else:
50 | conv_name1 = name + "_branch2a"
51 | conv_name2 = name + "_branch2b"
52 | shortcut_name = name + "_branch1"
53 | return conv_name1, conv_name2, shortcut_name
54 |
55 | def fix_layer_warp_name(self, stage_num, count, i):
56 | name = 'res' + str(stage_num)
57 | if count > 10 and stage_num == 4:
58 | if i == 0:
59 | conv_name = name + "a"
60 | else:
61 | conv_name = name + "b" + str(i)
62 | else:
63 | conv_name = name + chr(ord("a") + i)
64 | if self.model_type == 'SEResNeXt':
65 | conv_name = str(stage_num + 2) + '_' + str(i + 1)
66 | return conv_name
67 |
68 | def fix_c1_stage_name(self):
69 | return "res_conv1" if self.model_type == 'ResNeXt' else "conv1"
70 |
--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/modeling/cls_utils.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 |
16 | def _get_class_default_kwargs(cls, *args, **kwargs):
17 | """
18 | Get default arguments of a class in dict format, if args and
19 | kwargs is specified, it will replace default arguments
20 | """
21 | varnames = cls.__init__.__code__.co_varnames
22 | argcount = cls.__init__.__code__.co_argcount
23 | keys = varnames[:argcount]
24 | assert keys[0] == 'self'
25 | keys = keys[1:]
26 |
27 | values = list(cls.__init__.__defaults__)
28 | assert len(values) == len(keys)
29 |
30 | if len(args) > 0:
31 | for i, arg in enumerate(args):
32 | values[i] = arg
33 |
34 | default_kwargs = dict(zip(keys, values))
35 |
36 | if len(kwargs) > 0:
37 | for k, v in kwargs.items():
38 | default_kwargs[k] = v
39 |
40 | return default_kwargs
41 |
--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/modeling/heads/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | from .detr_head import *
16 |
17 |
--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/modeling/losses/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | from .iou_loss import *
16 | from .gfocal_loss import *
17 | from .detr_loss import *
18 | from .focal_loss import *
19 | from .smooth_l1_loss import *
20 |
--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/modeling/losses/smooth_l1_loss.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | from __future__ import absolute_import
16 | from __future__ import division
17 | from __future__ import print_function
18 |
19 | import paddle
20 | import paddle.nn as nn
21 | import paddle.nn.functional as F
22 | from ppdet.core.workspace import register
23 |
24 | __all__ = ['SmoothL1Loss']
25 |
26 | @register
27 | class SmoothL1Loss(nn.Layer):
28 | """Smooth L1 Loss.
29 | Args:
30 | beta (float): controls smooth region, it becomes L1 Loss when beta=0.0
31 | loss_weight (float): the final loss will be multiplied by this
32 | """
33 | def __init__(self,
34 | beta=1.0,
35 | loss_weight=1.0):
36 | super(SmoothL1Loss, self).__init__()
37 | assert beta >= 0
38 | self.beta = beta
39 | self.loss_weight = loss_weight
40 |
41 | def forward(self, pred, target, reduction='none'):
42 | """forward function, based on fvcore.
43 | Args:
44 | pred (Tensor): prediction tensor
45 | target (Tensor): target tensor, pred.shape must be the same as target.shape
46 | reduction (str): the way to reduce loss, one of (none, sum, mean)
47 | """
48 | assert reduction in ('none', 'sum', 'mean')
49 | target = target.detach()
50 | if self.beta < 1e-5:
51 | loss = paddle.abs(pred - target)
52 | else:
53 | n = paddle.abs(pred - target)
54 | cond = n < self.beta
55 | loss = paddle.where(cond, 0.5 * n ** 2 / self.beta, n - 0.5 * self.beta)
56 | if reduction == 'mean':
57 | loss = loss.mean() if loss.size > 0 else 0.0 * loss.sum()
58 | elif reduction == 'sum':
59 | loss = loss.sum()
60 | return loss * self.loss_weight
61 |
--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/modeling/shape_spec.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | # The code is based on:
16 | # https://github.com/facebookresearch/detectron2/blob/main/detectron2/layers/shape_spec.py
17 |
18 | from collections import namedtuple
19 |
20 |
21 | class ShapeSpec(
22 | namedtuple("_ShapeSpec", ["channels", "height", "width", "stride"])):
23 | def __new__(cls, channels=None, height=None, width=None, stride=None):
24 | return super(ShapeSpec, cls).__new__(cls, channels, height, width,
25 | stride)
26 |
--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/modeling/transformers/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | from .utils import *
16 | from .matchers import *
17 | from .position_encoding import *
18 | from .rtdetr_transformer import *
19 | from .dino_transformer import *
20 | from .hybrid_encoder import *
--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/modeling/transformers/ext_op/setup_ms_deformable_attn_op.py:
--------------------------------------------------------------------------------
1 | from paddle.utils.cpp_extension import CUDAExtension, setup
2 |
3 | if __name__ == "__main__":
4 | setup(
5 | name='deformable_detr_ops',
6 | ext_modules=CUDAExtension(
7 | sources=['ms_deformable_attn_op.cc', 'ms_deformable_attn_op.cu']))
8 |
--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/optimizer/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | from . import optimizer
16 | from . import ema
17 |
18 | from .optimizer import *
19 | from .ema import *
20 |
--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/optimizer/utils.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | from __future__ import absolute_import
16 | from __future__ import division
17 | from __future__ import print_function
18 |
19 | import paddle.nn as nn
20 |
21 | from typing import List
22 |
23 |
24 | def get_bn_running_state_names(model: nn.Layer) -> List[str]:
25 | """Get all bn state full names including running mean and variance
26 | """
27 | names = []
28 | for n, m in model.named_sublayers():
29 | if isinstance(m, (nn.BatchNorm2D, nn.SyncBatchNorm)):
30 | assert hasattr(m, '_mean'), f'assert {m} has _mean'
31 | assert hasattr(m, '_variance'), f'assert {m} has _variance'
32 | running_mean = f'{n}._mean'
33 | running_var = f'{n}._variance'
34 | names.extend([running_mean, running_var])
35 |
36 | return names
37 |
--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/utils/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/utils/colormap.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | from __future__ import absolute_import
16 | from __future__ import division
17 | from __future__ import print_function
18 | from __future__ import unicode_literals
19 |
20 | import numpy as np
21 |
22 |
23 | def colormap(rgb=False):
24 | """
25 | Get colormap
26 |
27 | The code of this function is copied from https://github.com/facebookresearch/Detectron/blob/main/detectron/utils/colormap.py
28 | """
29 | color_list = np.array([
30 | 0.000, 0.447, 0.741, 0.850, 0.325, 0.098, 0.929, 0.694, 0.125, 0.494,
31 | 0.184, 0.556, 0.466, 0.674, 0.188, 0.301, 0.745, 0.933, 0.635, 0.078,
32 | 0.184, 0.300, 0.300, 0.300, 0.600, 0.600, 0.600, 1.000, 0.000, 0.000,
33 | 1.000, 0.500, 0.000, 0.749, 0.749, 0.000, 0.000, 1.000, 0.000, 0.000,
34 | 0.000, 1.000, 0.667, 0.000, 1.000, 0.333, 0.333, 0.000, 0.333, 0.667,
35 | 0.000, 0.333, 1.000, 0.000, 0.667, 0.333, 0.000, 0.667, 0.667, 0.000,
36 | 0.667, 1.000, 0.000, 1.000, 0.333, 0.000, 1.000, 0.667, 0.000, 1.000,
37 | 1.000, 0.000, 0.000, 0.333, 0.500, 0.000, 0.667, 0.500, 0.000, 1.000,
38 | 0.500, 0.333, 0.000, 0.500, 0.333, 0.333, 0.500, 0.333, 0.667, 0.500,
39 | 0.333, 1.000, 0.500, 0.667, 0.000, 0.500, 0.667, 0.333, 0.500, 0.667,
40 | 0.667, 0.500, 0.667, 1.000, 0.500, 1.000, 0.000, 0.500, 1.000, 0.333,
41 | 0.500, 1.000, 0.667, 0.500, 1.000, 1.000, 0.500, 0.000, 0.333, 1.000,
42 | 0.000, 0.667, 1.000, 0.000, 1.000, 1.000, 0.333, 0.000, 1.000, 0.333,
43 | 0.333, 1.000, 0.333, 0.667, 1.000, 0.333, 1.000, 1.000, 0.667, 0.000,
44 | 1.000, 0.667, 0.333, 1.000, 0.667, 0.667, 1.000, 0.667, 1.000, 1.000,
45 | 1.000, 0.000, 1.000, 1.000, 0.333, 1.000, 1.000, 0.667, 1.000, 0.167,
46 | 0.000, 0.000, 0.333, 0.000, 0.000, 0.500, 0.000, 0.000, 0.667, 0.000,
47 | 0.000, 0.833, 0.000, 0.000, 1.000, 0.000, 0.000, 0.000, 0.167, 0.000,
48 | 0.000, 0.333, 0.000, 0.000, 0.500, 0.000, 0.000, 0.667, 0.000, 0.000,
49 | 0.833, 0.000, 0.000, 1.000, 0.000, 0.000, 0.000, 0.167, 0.000, 0.000,
50 | 0.333, 0.000, 0.000, 0.500, 0.000, 0.000, 0.667, 0.000, 0.000, 0.833,
51 | 0.000, 0.000, 1.000, 0.000, 0.000, 0.000, 0.143, 0.143, 0.143, 0.286,
52 | 0.286, 0.286, 0.429, 0.429, 0.429, 0.571, 0.571, 0.571, 0.714, 0.714,
53 | 0.714, 0.857, 0.857, 0.857, 1.000, 1.000, 1.000
54 | ]).astype(np.float32)
55 | color_list = color_list.reshape((-1, 3)) * 255
56 | if not rgb:
57 | color_list = color_list[:, ::-1]
58 | return color_list.astype('int32')
59 |
--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/utils/logger.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | import logging
16 | import os
17 | import sys
18 |
19 | import paddle.distributed as dist
20 |
21 | __all__ = ['setup_logger']
22 |
23 | logger_initialized = []
24 |
25 |
26 | def setup_logger(name="ppdet", output=None):
27 | """
28 | Initialize logger and set its verbosity level to INFO.
29 | Args:
30 | output (str): a file name or a directory to save log. If None, will not save log file.
31 | If ends with ".txt" or ".log", assumed to be a file name.
32 | Otherwise, logs will be saved to `output/log.txt`.
33 | name (str): the root module name of this logger
34 |
35 | Returns:
36 | logging.Logger: a logger
37 | """
38 | logger = logging.getLogger(name)
39 | if name in logger_initialized:
40 | return logger
41 |
42 | logger.setLevel(logging.INFO)
43 | logger.propagate = False
44 |
45 | formatter = logging.Formatter(
46 | "[%(asctime)s] %(name)s %(levelname)s: %(message)s",
47 | datefmt="%m/%d %H:%M:%S")
48 | # stdout logging: master only
49 | local_rank = dist.get_rank()
50 | if local_rank == 0:
51 | ch = logging.StreamHandler(stream=sys.stdout)
52 | ch.setLevel(logging.DEBUG)
53 | ch.setFormatter(formatter)
54 | logger.addHandler(ch)
55 |
56 | # file logging: all workers
57 | if output is not None:
58 | if output.endswith(".txt") or output.endswith(".log"):
59 | filename = output
60 | else:
61 | filename = os.path.join(output, "log.txt")
62 | if local_rank > 0:
63 | filename = filename + ".rank{}".format(local_rank)
64 | os.makedirs(os.path.dirname(filename))
65 | fh = logging.FileHandler(filename, mode='a')
66 | fh.setLevel(logging.DEBUG)
67 | fh.setFormatter(logging.Formatter())
68 | logger.addHandler(fh)
69 | logger_initialized.append(name)
70 | return logger
71 |
--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/utils/stats.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | import collections
16 | import numpy as np
17 |
18 | __all__ = ['SmoothedValue', 'TrainingStats']
19 |
20 |
21 | class SmoothedValue(object):
22 | """Track a series of values and provide access to smoothed values over a
23 | window or the global series average.
24 | """
25 |
26 | def __init__(self, window_size=20, fmt=None):
27 | if fmt is None:
28 | fmt = "{median:.4f} ({avg:.4f})"
29 | self.deque = collections.deque(maxlen=window_size)
30 | self.fmt = fmt
31 | self.total = 0.
32 | self.count = 0
33 |
34 | def update(self, value, n=1):
35 | self.deque.append(value)
36 | self.count += n
37 | self.total += value * n
38 |
39 | @property
40 | def median(self):
41 | return np.median(self.deque)
42 |
43 | @property
44 | def avg(self):
45 | return np.mean(self.deque)
46 |
47 | @property
48 | def max(self):
49 | return np.max(self.deque)
50 |
51 | @property
52 | def value(self):
53 | return self.deque[-1]
54 |
55 | @property
56 | def global_avg(self):
57 | return self.total / self.count
58 |
59 | def __str__(self):
60 | return self.fmt.format(
61 | median=self.median, avg=self.avg, max=self.max, value=self.value)
62 |
63 |
64 | class TrainingStats(object):
65 | def __init__(self, window_size, delimiter=' '):
66 | self.meters = None
67 | self.window_size = window_size
68 | self.delimiter = delimiter
69 |
70 | def update(self, stats):
71 | if self.meters is None:
72 | self.meters = {
73 | k: SmoothedValue(self.window_size)
74 | for k in stats.keys()
75 | }
76 | for k, v in self.meters.items():
77 | v.update(float(stats[k]))
78 |
79 | def get(self, extras=None):
80 | stats = collections.OrderedDict()
81 | if extras:
82 | for k, v in extras.items():
83 | stats[k] = v
84 | for k, v in self.meters.items():
85 | stats[k] = format(v.median, '.6f')
86 |
87 | return stats
88 |
89 | def log(self, extras=None):
90 | d = self.get(extras)
91 | strs = []
92 | for k, v in d.items():
93 | strs.append("{}: {}".format(k, str(v)))
94 | return self.delimiter.join(strs)
95 |
--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/utils/voc_utils.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | from __future__ import absolute_import
16 | from __future__ import division
17 | from __future__ import print_function
18 |
19 | import os
20 | import os.path as osp
21 | import re
22 | import random
23 |
24 | __all__ = ['create_list']
25 |
26 |
27 | def create_list(devkit_dir, years, output_dir):
28 | """
29 | create following list:
30 | 1. trainval.txt
31 | 2. test.txt
32 | """
33 | trainval_list = []
34 | test_list = []
35 | for year in years:
36 | trainval, test = _walk_voc_dir(devkit_dir, year, output_dir)
37 | trainval_list.extend(trainval)
38 | test_list.extend(test)
39 |
40 | random.shuffle(trainval_list)
41 | with open(osp.join(output_dir, 'trainval.txt'), 'w') as ftrainval:
42 | for item in trainval_list:
43 | ftrainval.write(item[0] + ' ' + item[1] + '\n')
44 |
45 | with open(osp.join(output_dir, 'test.txt'), 'w') as fval:
46 | ct = 0
47 | for item in test_list:
48 | ct += 1
49 | fval.write(item[0] + ' ' + item[1] + '\n')
50 |
51 |
52 | def _get_voc_dir(devkit_dir, year, type):
53 | return osp.join(devkit_dir, 'VOC' + year, type)
54 |
55 |
56 | def _walk_voc_dir(devkit_dir, year, output_dir):
57 | filelist_dir = _get_voc_dir(devkit_dir, year, 'ImageSets/Main')
58 | annotation_dir = _get_voc_dir(devkit_dir, year, 'Annotations')
59 | img_dir = _get_voc_dir(devkit_dir, year, 'JPEGImages')
60 | trainval_list = []
61 | test_list = []
62 | added = set()
63 |
64 | for _, _, files in os.walk(filelist_dir):
65 | for fname in files:
66 | img_ann_list = []
67 | if re.match(r'[a-z]+_trainval\.txt', fname):
68 | img_ann_list = trainval_list
69 | elif re.match(r'[a-z]+_test\.txt', fname):
70 | img_ann_list = test_list
71 | else:
72 | continue
73 | fpath = osp.join(filelist_dir, fname)
74 | for line in open(fpath):
75 | name_prefix = line.strip().split()[0]
76 | if name_prefix in added:
77 | continue
78 | added.add(name_prefix)
79 | ann_path = osp.join(
80 | osp.relpath(annotation_dir, output_dir),
81 | name_prefix + '.xml')
82 | img_path = osp.join(
83 | osp.relpath(img_dir, output_dir), name_prefix + '.jpg')
84 | img_ann_list.append((img_path, ann_path))
85 |
86 | return trainval_list, test_list
87 |
--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/version.py:
--------------------------------------------------------------------------------
1 | # THIS FILE IS GENERATED FROM PADDLEPADDLE SETUP.PY
2 | #
3 | full_version = '2.4.0'
4 | commit = '87ed5ba91eaeb332e8e5c3f4e7d5b1d765c75644'
5 |
--------------------------------------------------------------------------------
/rtdetr_paddle/requirements.txt:
--------------------------------------------------------------------------------
1 | paddlepaddle-gpu==2.4.2
2 | tqdm
3 | typeguard
4 | visualdl>=2.2.0
5 | opencv-python <= 4.6.0
6 | PyYAML
7 | shapely
8 | scipy
9 | terminaltables
10 | Cython
11 | pycocotools
12 | setuptools
13 |
--------------------------------------------------------------------------------
/rtdetr_paddle/tools/slice_image.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | import argparse
16 | from tqdm import tqdm
17 |
18 |
19 | def slice_data(image_dir, dataset_json_path, output_dir, slice_size,
20 | overlap_ratio):
21 | try:
22 | from sahi.scripts.slice_coco import slice
23 | except Exception as e:
24 | raise RuntimeError(
25 | 'Unable to use sahi to slice images, please install sahi, for example: `pip install sahi`, see https://github.com/obss/sahi'
26 | )
27 | tqdm.write(
28 | f" slicing for slice_size={slice_size}, overlap_ratio={overlap_ratio}")
29 | slice(
30 | image_dir=image_dir,
31 | dataset_json_path=dataset_json_path,
32 | output_dir=output_dir,
33 | slice_size=slice_size,
34 | overlap_ratio=overlap_ratio, )
35 |
36 |
37 | def main():
38 | parser = argparse.ArgumentParser()
39 | parser.add_argument(
40 | '--image_dir', type=str, default=None, help="The image folder path.")
41 | parser.add_argument(
42 | '--json_path', type=str, default=None, help="Dataset json path.")
43 | parser.add_argument(
44 | '--output_dir', type=str, default=None, help="Output dir.")
45 | parser.add_argument(
46 | '--slice_size', type=int, default=500, help="slice_size")
47 | parser.add_argument(
48 | '--overlap_ratio', type=float, default=0.25, help="overlap_ratio")
49 | args = parser.parse_args()
50 |
51 | slice_data(args.image_dir, args.json_path, args.output_dir, args.slice_size,
52 | args.overlap_ratio)
53 |
54 |
55 | if __name__ == "__main__":
56 | main()
57 |
--------------------------------------------------------------------------------
/rtdetr_pytorch/configs/dataset/coco_detection.yml:
--------------------------------------------------------------------------------
1 | task: detection
2 |
3 | num_classes: 80
4 | remap_mscoco_category: True
5 |
6 | train_dataloader:
7 | type: DataLoader
8 | dataset:
9 | type: CocoDetection
10 | img_folder: ./dataset/coco/train2017/
11 | ann_file: ./dataset/coco/annotations/instances_train2017.json
12 | transforms:
13 | type: Compose
14 | ops: ~
15 | shuffle: True
16 | batch_size: 8
17 | num_workers: 4
18 | drop_last: True
19 |
20 |
21 | val_dataloader:
22 | type: DataLoader
23 | dataset:
24 | type: CocoDetection
25 | img_folder: ./dataset/coco/val2017/
26 | ann_file: ./dataset/coco/annotations/instances_val2017.json
27 | transforms:
28 | type: Compose
29 | ops: ~
30 |
31 | shuffle: False
32 | batch_size: 8
33 | num_workers: 4
34 | drop_last: False
--------------------------------------------------------------------------------
/rtdetr_pytorch/configs/rtdetr/include/dataloader.yml:
--------------------------------------------------------------------------------
1 | # num_classes: 91
2 | # remap_mscoco_category: True
3 |
4 | train_dataloader:
5 | dataset:
6 | return_masks: False
7 | transforms:
8 | ops:
9 | - {type: RandomPhotometricDistort, p: 0.5}
10 | - {type: RandomZoomOut, fill: 0}
11 | - {type: RandomIoUCrop, p: 0.8}
12 | - {type: SanitizeBoundingBox, min_size: 1}
13 | - {type: RandomHorizontalFlip}
14 | - {type: Resize, size: [640, 640], }
15 | # - {type: Resize, size: 639, max_size: 640}
16 | # - {type: PadToSize, spatial_size: 640}
17 | - {type: ToImageTensor}
18 | - {type: ConvertDtype}
19 | - {type: SanitizeBoundingBox, min_size: 1}
20 | - {type: ConvertBox, out_fmt: 'cxcywh', normalize: True}
21 | shuffle: True
22 | batch_size: 4
23 | num_workers: 4
24 | collate_fn: default_collate_fn
25 |
26 |
27 | val_dataloader:
28 | dataset:
29 | transforms:
30 | ops:
31 | # - {type: Resize, size: 639, max_size: 640}
32 | # - {type: PadToSize, spatial_size: 640}
33 | - {type: Resize, size: [640, 640]}
34 | - {type: ToImageTensor}
35 | - {type: ConvertDtype}
36 | shuffle: False
37 | batch_size: 8
38 | num_workers: 4
39 | collate_fn: default_collate_fn
40 |
--------------------------------------------------------------------------------
/rtdetr_pytorch/configs/rtdetr/include/dataloader_regnet.yml:
--------------------------------------------------------------------------------
1 | # num_classes: 91
2 | # remap_mscoco_category: True
3 |
4 | train_dataloader:
5 | dataset:
6 | return_masks: False
7 | transforms:
8 | ops:
9 | - {type: RandomPhotometricDistort, p: 0.5}
10 | - {type: RandomZoomOut, fill: 0}
11 | - {type: RandomIoUCrop, p: 0.8}
12 | - {type: SanitizeBoundingBox, min_size: 1}
13 | - {type: RandomHorizontalFlip}
14 | - {type: Resize, size: [640, 640], }
15 | # - {type: Resize, size: 639, max_size: 640}
16 | # - {type: PadToSize, spatial_size: 640}
17 | - {type: ToImageTensor}
18 | - {type: ConvertDtype}
19 | - {type: SanitizeBoundingBox, min_size: 1}
20 | - {type: ConvertBox, out_fmt: 'cxcywh', normalize: True}
21 | shuffle: True
22 | batch_size: 8
23 | num_workers: 2
24 | collate_fn: default_collate_fn
25 |
26 |
27 | val_dataloader:
28 | dataset:
29 | transforms:
30 | ops:
31 | # - {type: Resize, size: 639, max_size: 640}
32 | # - {type: PadToSize, spatial_size: 640}
33 | - {type: Resize, size: [640, 640]}
34 | - {type: ToImageTensor}
35 | - {type: ConvertDtype}
36 | shuffle: False
37 | batch_size: 8
38 | num_workers: 2
39 | collate_fn: default_collate_fn
40 |
--------------------------------------------------------------------------------
/rtdetr_pytorch/configs/rtdetr/include/optimizer.yml:
--------------------------------------------------------------------------------
1 |
2 | use_ema: True
3 | ema:
4 | type: ModelEMA
5 | decay: 0.9999
6 | warmups: 2000
7 |
8 |
9 | find_unused_parameters: True
10 |
11 | epoches: 72
12 | clip_max_norm: 0.1
13 |
14 | optimizer:
15 | type: AdamW
16 | params:
17 | -
18 | params: 'backbone'
19 | lr: 0.00001
20 | -
21 | params: '^(?=.*encoder(?=.*bias|.*norm.*weight)).*$'
22 | weight_decay: 0.
23 | -
24 | params: '^(?=.*decoder(?=.*bias|.*norm.*weight)).*$'
25 | weight_decay: 0.
26 |
27 | lr: 0.0001
28 | betas: [0.9, 0.999]
29 | weight_decay: 0.0001
30 |
31 |
32 | lr_scheduler:
33 | type: MultiStepLR
34 | milestones: [1000]
35 | gamma: 0.1
36 |
37 |
--------------------------------------------------------------------------------
/rtdetr_pytorch/configs/rtdetr/include/optimizer_regnet.yml:
--------------------------------------------------------------------------------
1 |
2 | use_ema: True
3 | ema:
4 | type: ModelEMA
5 | decay: 0.9999
6 | warmups: 2000
7 |
8 |
9 | find_unused_parameters: True
10 |
11 | epoches: 72
12 | clip_max_norm: 0.1
13 |
14 | optimizer:
15 | type: AdamW
16 | params:
17 | -
18 | params: '^(?=.*encoder(?=.*bias|.*norm.*weight)).*$'
19 | weight_decay: 0.
20 | -
21 | params: '^(?=.*decoder(?=.*bias|.*norm.*weight)).*$'
22 | weight_decay: 0.
23 |
24 | lr: 0.0001
25 | betas: [0.9, 0.999]
26 | weight_decay: 0.0001
27 |
28 |
29 | lr_scheduler:
30 | type: MultiStepLR
31 | milestones: [1000]
32 | gamma: 0.1
33 |
34 |
--------------------------------------------------------------------------------
/rtdetr_pytorch/configs/rtdetr/include/rtdetr_dla34.yml:
--------------------------------------------------------------------------------
1 | task: detection
2 |
3 | model: RTDETR
4 | criterion: SetCriterion
5 | postprocessor: RTDETRPostProcessor
6 |
7 |
8 | RTDETR:
9 | backbone: DLANet
10 | encoder: HybridEncoder
11 | decoder: RTDETRTransformer
12 | multi_scale: [480, 512, 544, 576, 608, 640, 640, 640, 672, 704, 736, 768, 800]
13 |
14 | DLANet:
15 | dla: dla34
16 | pretrained: True
17 | return_idx: [1, 2, 3]
18 |
19 |
20 | HybridEncoder:
21 | in_channels: [128, 256, 512]
22 | feat_strides: [8, 16, 32]
23 |
24 | # intra
25 | hidden_dim: 256
26 | use_encoder_idx: [2]
27 | num_encoder_layers: 1
28 | nhead: 8
29 | dim_feedforward: 1024
30 | dropout: 0.
31 | enc_act: 'gelu'
32 | pe_temperature: 10000
33 |
34 | # cross
35 | expansion: 1.0
36 | depth_mult: 1
37 | act: 'silu'
38 |
39 | # eval
40 | eval_spatial_size: [640, 640]
41 |
42 |
43 | RTDETRTransformer:
44 | feat_channels: [256, 256, 256]
45 | feat_strides: [8, 16, 32]
46 | hidden_dim: 256
47 | num_levels: 3
48 |
49 | num_queries: 300
50 |
51 | num_decoder_layers: 6
52 | num_denoising: 100
53 |
54 | eval_idx: -1
55 | eval_spatial_size: [640, 640]
56 |
57 |
58 | use_focal_loss: True
59 |
60 | RTDETRPostProcessor:
61 | num_top_queries: 300
62 |
63 |
64 | SetCriterion:
65 | weight_dict: {loss_vfl: 1, loss_bbox: 5, loss_giou: 2,}
66 | losses: ['vfl', 'boxes', ]
67 | alpha: 0.75
68 | gamma: 2.0
69 |
70 | matcher:
71 | type: HungarianMatcher
72 | weight_dict: {cost_class: 2, cost_bbox: 5, cost_giou: 2}
73 | # use_focal_loss: True
74 | alpha: 0.25
75 | gamma: 2.0
76 |
77 |
78 |
79 |
--------------------------------------------------------------------------------
/rtdetr_pytorch/configs/rtdetr/include/rtdetr_r50vd.yml:
--------------------------------------------------------------------------------
1 | task: detection
2 |
3 | model: RTDETR
4 | criterion: SetCriterion
5 | postprocessor: RTDETRPostProcessor
6 |
7 |
8 | RTDETR:
9 | backbone: PResNet
10 | encoder: HybridEncoder
11 | decoder: RTDETRTransformer
12 | multi_scale: [480, 512, 544, 576, 608, 640, 640, 640, 672, 704, 736, 768, 800]
13 |
14 | PResNet:
15 | depth: 50
16 | variant: d
17 | freeze_at: 0
18 | return_idx: [1, 2, 3]
19 | num_stages: 4
20 | freeze_norm: True
21 | pretrained: True
22 |
23 | HybridEncoder:
24 | in_channels: [512, 1024, 2048]
25 | feat_strides: [8, 16, 32]
26 |
27 | # intra
28 | hidden_dim: 256
29 | use_encoder_idx: [2]
30 | num_encoder_layers: 1
31 | nhead: 8
32 | dim_feedforward: 1024
33 | dropout: 0.
34 | enc_act: 'gelu'
35 | pe_temperature: 10000
36 |
37 | # cross
38 | expansion: 1.0
39 | depth_mult: 1
40 | act: 'silu'
41 |
42 | # eval
43 | eval_spatial_size: [640, 640]
44 |
45 |
46 | RTDETRTransformer:
47 | feat_channels: [256, 256, 256]
48 | feat_strides: [8, 16, 32]
49 | hidden_dim: 256
50 | num_levels: 3
51 |
52 | num_queries: 300
53 |
54 | num_decoder_layers: 6
55 | num_denoising: 100
56 |
57 | eval_idx: -1
58 | eval_spatial_size: [640, 640]
59 |
60 |
61 | use_focal_loss: True
62 |
63 | RTDETRPostProcessor:
64 | num_top_queries: 300
65 |
66 |
67 | SetCriterion:
68 | weight_dict: {loss_vfl: 1, loss_bbox: 5, loss_giou: 2,}
69 | losses: ['vfl', 'boxes', ]
70 | alpha: 0.75
71 | gamma: 2.0
72 |
73 | matcher:
74 | type: HungarianMatcher
75 | weight_dict: {cost_class: 2, cost_bbox: 5, cost_giou: 2}
76 | # use_focal_loss: True
77 | alpha: 0.25
78 | gamma: 2.0
79 |
80 |
81 |
82 |
--------------------------------------------------------------------------------
/rtdetr_pytorch/configs/rtdetr/include/rtdetr_regnet.yml:
--------------------------------------------------------------------------------
1 | task: detection
2 |
3 | model: RTDETR
4 | criterion: SetCriterion
5 | postprocessor: RTDETRPostProcessor
6 |
7 |
8 | RTDETR:
9 | backbone: RegNet
10 | encoder: HybridEncoder
11 | decoder: RTDETRTransformer
12 | multi_scale: [480, 512, 544, 576, 608, 640, 640, 640, 672, 704, 736, 768, 800]
13 |
14 |
15 | RegNet:
16 | return_idx: [1, 2, 3]
17 | configuration: RegNetConfig()
18 |
19 | HybridEncoder:
20 | in_channels: [192, 512, 1088]
21 | feat_strides: [8, 16, 32]
22 |
23 | # intra
24 | hidden_dim: 256
25 | use_encoder_idx: [2]
26 | num_encoder_layers: 1
27 | nhead: 8
28 | dim_feedforward: 1024
29 | dropout: 0.
30 | enc_act: 'gelu'
31 | pe_temperature: 10000
32 |
33 | # cross
34 | expansion: 1.0
35 | depth_mult: 1
36 | act: 'silu'
37 |
38 | # eval
39 | eval_spatial_size: [640, 640]
40 |
41 |
42 | RTDETRTransformer:
43 | feat_channels: [256, 256, 256]
44 | feat_strides: [8, 16, 32]
45 | hidden_dim: 256
46 | num_levels: 3
47 |
48 | num_queries: 300
49 |
50 | num_decoder_layers: 6
51 | num_denoising: 100
52 |
53 | eval_idx: -1
54 | eval_spatial_size: [640, 640]
55 |
56 |
57 | use_focal_loss: True
58 |
59 | RTDETRPostProcessor:
60 | num_top_queries: 300
61 |
62 |
63 | SetCriterion:
64 | weight_dict: {loss_vfl: 1, loss_bbox: 5, loss_giou: 2,}
65 | losses: ['vfl', 'boxes', ]
66 | alpha: 0.75
67 | gamma: 2.0
68 |
69 | matcher:
70 | type: HungarianMatcher
71 | weight_dict: {cost_class: 2, cost_bbox: 5, cost_giou: 2}
72 | # use_focal_loss: True
73 | alpha: 0.25
74 | gamma: 2.0
75 |
76 |
77 |
78 |
--------------------------------------------------------------------------------
/rtdetr_pytorch/configs/rtdetr/rtdetr_dla34_6x_coco.yml:
--------------------------------------------------------------------------------
1 | __include__: [
2 | '../dataset/coco_detection.yml',
3 | '../runtime.yml',
4 | './include/dataloader.yml',
5 | './include/optimizer.yml',
6 | './include/rtdetr_dla34.yml',
7 | ]
8 |
9 | output_dir: ./output/rtdetr_dla34_6x_coco
10 |
--------------------------------------------------------------------------------
/rtdetr_pytorch/configs/rtdetr/rtdetr_r101vd_6x_coco.yml:
--------------------------------------------------------------------------------
1 | __include__: [
2 | '../dataset/coco_detection.yml',
3 | '../runtime.yml',
4 | './include/dataloader.yml',
5 | './include/optimizer.yml',
6 | './include/rtdetr_r50vd.yml',
7 | ]
8 |
9 | PResNet:
10 | depth: 101
11 |
12 |
13 | HybridEncoder:
14 | # intra
15 | hidden_dim: 384
16 | dim_feedforward: 2048
17 |
18 |
19 | RTDETRTransformer:
20 | feat_channels: [384, 384, 384]
21 |
22 |
23 | optimizer:
24 | type: AdamW
25 | params:
26 | -
27 | params: 'backbone'
28 | lr: 0.000001
--------------------------------------------------------------------------------
/rtdetr_pytorch/configs/rtdetr/rtdetr_r18vd_6x_coco.yml:
--------------------------------------------------------------------------------
1 |
2 | __include__: [
3 | '../dataset/coco_detection.yml',
4 | '../runtime.yml',
5 | './include/dataloader.yml',
6 | './include/optimizer.yml',
7 | './include/rtdetr_r50vd.yml',
8 | ]
9 |
10 |
11 | output_dir: ./output/rtdetr_r18vd_6x_coco
12 |
13 | PResNet:
14 | depth: 18
15 | freeze_at: -1
16 | freeze_norm: False
17 | pretrained: True
18 |
19 | HybridEncoder:
20 | in_channels: [128, 256, 512]
21 | hidden_dim: 256
22 | expansion: 0.5
23 |
24 |
25 | RTDETRTransformer:
26 | eval_idx: -1
27 | num_decoder_layers: 3
28 | num_denoising: 100
29 |
30 |
31 |
32 | optimizer:
33 | type: AdamW
34 | params:
35 | -
36 | params: '^(?=.*backbone)(?=.*norm).*$'
37 | lr: 0.00001
38 | weight_decay: 0.
39 | -
40 | params: '^(?=.*backbone)(?!.*norm).*$'
41 | lr: 0.00001
42 | -
43 | params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bias)).*$'
44 | weight_decay: 0.
45 |
46 | lr: 0.0001
47 | betas: [0.9, 0.999]
48 | weight_decay: 0.0001
49 |
50 |
--------------------------------------------------------------------------------
/rtdetr_pytorch/configs/rtdetr/rtdetr_r34vd_6x_coco.yml:
--------------------------------------------------------------------------------
1 |
2 | __include__: [
3 | '../dataset/coco_detection.yml',
4 | '../runtime.yml',
5 | './include/dataloader.yml',
6 | './include/optimizer.yml',
7 | './include/rtdetr_r50vd.yml',
8 | ]
9 |
10 |
11 | output_dir: ./output/rtdetr_r34vd_6x_coco
12 |
13 |
14 | PResNet:
15 | depth: 34
16 | freeze_at: -1
17 | freeze_norm: False
18 | pretrained: True
19 |
20 |
21 | HybridEncoder:
22 | in_channels: [128, 256, 512]
23 | hidden_dim: 256
24 | expansion: 0.5
25 |
26 |
27 | RTDETRTransformer:
28 | num_decoder_layers: 4
29 |
30 |
31 |
32 | optimizer:
33 | type: AdamW
34 | params:
35 | -
36 | params: '^(?=.*backbone)(?=.*norm|bn).*$'
37 | weight_decay: 0.
38 | lr: 0.00001
39 | -
40 | params: '^(?=.*backbone)(?!.*norm|bn).*$'
41 | lr: 0.00001
42 | -
43 | params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn|bias)).*$'
44 | weight_decay: 0.
45 |
46 | lr: 0.0001
47 | betas: [0.9, 0.999]
48 | weight_decay: 0.0001
49 |
--------------------------------------------------------------------------------
/rtdetr_pytorch/configs/rtdetr/rtdetr_r50vd_6x_coco.yml:
--------------------------------------------------------------------------------
1 | __include__: [
2 | '../dataset/coco_detection.yml',
3 | '../runtime.yml',
4 | './include/dataloader.yml',
5 | './include/optimizer.yml',
6 | './include/rtdetr_r50vd.yml',
7 | ]
8 |
9 | output_dir: ./output/rtdetr_r50vd_6x_coco
10 |
--------------------------------------------------------------------------------
/rtdetr_pytorch/configs/rtdetr/rtdetr_r50vd_m_6x_coco.yml:
--------------------------------------------------------------------------------
1 | __include__: [
2 | '../dataset/coco_detection.yml',
3 | '../runtime.yml',
4 | './include/dataloader.yml',
5 | './include/optimizer.yml',
6 | './include/rtdetr_r50vd.yml',
7 | ]
8 |
9 | output_dir: ./output/rtdetr_r50vd_m_6x_coco
10 |
11 |
12 | HybridEncoder:
13 | expansion: 0.5
14 |
15 | RTDETRTransformer:
16 | eval_idx: 2 # use 3th decoder layer to eval
--------------------------------------------------------------------------------
/rtdetr_pytorch/configs/rtdetr/rtdetr_regnet_6x_coco.yml:
--------------------------------------------------------------------------------
1 | __include__: [
2 | '../dataset/coco_detection.yml',
3 | '../runtime.yml',
4 | './include/dataloader_regnet.yml',
5 | './include/optimizer_regnet.yml',
6 | './include/rtdetr_regnet.yml',
7 | ]
8 |
9 | output_dir: ./output/rtdetr_regnet_6x_coco
10 |
--------------------------------------------------------------------------------
/rtdetr_pytorch/configs/runtime.yml:
--------------------------------------------------------------------------------
1 | sync_bn: True
2 | find_unused_parameters: False
3 |
4 |
5 | use_amp: False
6 |
7 | scaler:
8 | type: GradScaler
9 | enabled: True
10 |
11 |
12 | use_ema: False
13 | ema:
14 | type: ModelEMA
15 | decay: 0.9999
16 | warmups: 2000
17 |
18 |
--------------------------------------------------------------------------------
/rtdetr_pytorch/requirements.txt:
--------------------------------------------------------------------------------
1 | torch==2.0.1
2 | torchvision==0.15.2
3 | onnx==1.14.0
4 | onnxruntime==1.15.1
5 | pycocotools
6 | PyYAML
7 | scipy
8 | transformers
9 |
--------------------------------------------------------------------------------
/rtdetr_pytorch/src/__init__.py:
--------------------------------------------------------------------------------
1 |
2 | from . import data
3 | from . import nn
4 | from . import optim
5 | from . import zoo
6 |
--------------------------------------------------------------------------------
/rtdetr_pytorch/src/core/__init__.py:
--------------------------------------------------------------------------------
1 | """by lyuwenyu
2 | """
3 |
4 | # from .yaml_utils import register, create, load_config, merge_config, merge_dict
5 | from .yaml_utils import *
6 | from .config import BaseConfig
7 | from .yaml_config import YAMLConfig
8 |
--------------------------------------------------------------------------------
/rtdetr_pytorch/src/data/__init__.py:
--------------------------------------------------------------------------------
1 |
2 | from .coco import *
3 | from .cifar10 import CIFAR10
4 |
5 | from .dataloader import *
6 | from .transforms import *
7 |
8 |
--------------------------------------------------------------------------------
/rtdetr_pytorch/src/data/cifar10/__init__.py:
--------------------------------------------------------------------------------
1 |
2 | import torchvision
3 | from typing import Optional, Callable
4 |
5 | from src.core import register
6 |
7 |
8 | @register
9 | class CIFAR10(torchvision.datasets.CIFAR10):
10 | __inject__ = ['transform', 'target_transform']
11 |
12 | def __init__(self, root: str, train: bool = True, transform: Optional[Callable] = None, target_transform: Optional[Callable] = None, download: bool = False) -> None:
13 | super().__init__(root, train, transform, target_transform, download)
14 |
15 |
--------------------------------------------------------------------------------
/rtdetr_pytorch/src/data/coco/__init__.py:
--------------------------------------------------------------------------------
1 | from .coco_dataset import (
2 | CocoDetection,
3 | mscoco_category2label,
4 | mscoco_label2category,
5 | mscoco_category2name,
6 | )
7 | from .coco_eval import *
8 |
9 | from .coco_utils import get_coco_api_from_dataset
--------------------------------------------------------------------------------
/rtdetr_pytorch/src/data/dataloader.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.utils.data as data
3 |
4 | from src.core import register
5 |
6 |
7 | __all__ = ['DataLoader']
8 |
9 |
10 | @register
11 | class DataLoader(data.DataLoader):
12 | __inject__ = ['dataset', 'collate_fn']
13 |
14 | def __repr__(self) -> str:
15 | format_string = self.__class__.__name__ + "("
16 | for n in ['dataset', 'batch_size', 'num_workers', 'drop_last', 'collate_fn']:
17 | format_string += "\n"
18 | format_string += " {0}: {1}".format(n, getattr(self, n))
19 | format_string += "\n)"
20 | return format_string
21 |
22 |
23 |
24 | @register
25 | def default_collate_fn(items):
26 | '''default collate_fn
27 | '''
28 | return torch.cat([x[0][None] for x in items], dim=0), [x[1] for x in items]
29 |
--------------------------------------------------------------------------------
/rtdetr_pytorch/src/misc/__init__.py:
--------------------------------------------------------------------------------
1 |
2 | from .logger import *
3 | from .visualizer import *
4 |
--------------------------------------------------------------------------------
/rtdetr_pytorch/src/misc/visualizer.py:
--------------------------------------------------------------------------------
1 | """"by lyuwenyu
2 | """
3 |
4 | import torch
5 | import torch.utils.data
6 |
7 | import torchvision
8 | torchvision.disable_beta_transforms_warning()
9 |
10 | import PIL
11 |
12 | __all__ = ['show_sample']
13 |
14 | def show_sample(sample):
15 | """for coco dataset/dataloader
16 | """
17 | import matplotlib.pyplot as plt
18 | from torchvision.transforms.v2 import functional as F
19 | from torchvision.utils import draw_bounding_boxes
20 |
21 | image, target = sample
22 | if isinstance(image, PIL.Image.Image):
23 | image = F.to_image_tensor(image)
24 |
25 | image = F.convert_dtype(image, torch.uint8)
26 | annotated_image = draw_bounding_boxes(image, target["boxes"], colors="yellow", width=3)
27 |
28 | fig, ax = plt.subplots()
29 | ax.imshow(annotated_image.permute(1, 2, 0).numpy())
30 | ax.set(xticklabels=[], yticklabels=[], xticks=[], yticks=[])
31 | fig.tight_layout()
32 | fig.show()
33 | plt.show()
34 |
35 |
--------------------------------------------------------------------------------
/rtdetr_pytorch/src/nn/__init__.py:
--------------------------------------------------------------------------------
1 |
2 | from .arch import *
3 | from .criterion import *
4 |
5 | #
6 | from .backbone import *
7 |
8 |
--------------------------------------------------------------------------------
/rtdetr_pytorch/src/nn/arch/__init__.py:
--------------------------------------------------------------------------------
1 | from .classification import *
2 |
--------------------------------------------------------------------------------
/rtdetr_pytorch/src/nn/arch/classification.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 |
4 | from src.core import register
5 |
6 |
7 | __all__ = ['Classification', 'ClassHead']
8 |
9 |
10 | @register
11 | class Classification(nn.Module):
12 | __inject__ = ['backbone', 'head']
13 |
14 | def __init__(self, backbone: nn.Module, head: nn.Module=None):
15 | super().__init__()
16 |
17 | self.backbone = backbone
18 | self.head = head
19 |
20 | def forward(self, x):
21 | x = self.backbone(x)
22 |
23 | if self.head is not None:
24 | x = self.head(x)
25 |
26 | return x
27 |
28 |
29 | @register
30 | class ClassHead(nn.Module):
31 | def __init__(self, hidden_dim, num_classes):
32 | super().__init__()
33 | self.pool = nn.AdaptiveAvgPool2d(1)
34 | self.proj = nn.Linear(hidden_dim, num_classes)
35 |
36 | def forward(self, x):
37 | x = x[0] if isinstance(x, (list, tuple)) else x
38 | x = self.pool(x)
39 | x = x.reshape(x.shape[0], -1)
40 | x = self.proj(x)
41 | return x
42 |
--------------------------------------------------------------------------------
/rtdetr_pytorch/src/nn/backbone/__init__.py:
--------------------------------------------------------------------------------
1 |
2 | from .presnet import *
3 | from .test_resnet import *
4 | from .regnet import *
5 | from .common import *
6 | from .dla import *
--------------------------------------------------------------------------------
/rtdetr_pytorch/src/nn/backbone/regnet.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | from transformers import RegNetModel
4 |
5 |
6 | from src.core import register
7 |
8 | __all__ = ['RegNet']
9 |
10 | @register
11 | class RegNet(nn.Module):
12 | def __init__(self, configuration, return_idx=[0, 1, 2, 3]):
13 | super(RegNet, self).__init__()
14 | self.model = RegNetModel.from_pretrained("facebook/regnet-y-040")
15 | self.return_idx = return_idx
16 |
17 |
18 | def forward(self, x):
19 |
20 | outputs = self.model(x, output_hidden_states = True)
21 | x = outputs.hidden_states[2:5]
22 |
23 | return x
--------------------------------------------------------------------------------
/rtdetr_pytorch/src/nn/backbone/test_resnet.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.nn.functional as F
4 |
5 | from collections import OrderedDict
6 |
7 |
8 | from src.core import register
9 |
10 |
11 | class BasicBlock(nn.Module):
12 | expansion = 1
13 |
14 | def __init__(self, in_planes, planes, stride=1):
15 | super(BasicBlock, self).__init__()
16 |
17 | self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
18 | self.bn1 = nn.BatchNorm2d(planes)
19 |
20 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,stride=1, padding=1, bias=False)
21 | self.bn2 = nn.BatchNorm2d(planes)
22 |
23 | self.shortcut = nn.Sequential()
24 | if stride != 1 or in_planes != self.expansion*planes:
25 | self.shortcut = nn.Sequential(
26 | nn.Conv2d(in_planes, self.expansion*planes,kernel_size=1, stride=stride, bias=False),
27 | nn.BatchNorm2d(self.expansion*planes)
28 | )
29 | def forward(self, x):
30 | out = F.relu(self.bn1(self.conv1(x)))
31 | out = self.bn2(self.conv2(out))
32 | out += self.shortcut(x)
33 | out = F.relu(out)
34 | return out
35 |
36 |
37 |
38 | class _ResNet(nn.Module):
39 | def __init__(self, block, num_blocks, num_classes=10):
40 | super().__init__()
41 | self.in_planes = 64
42 |
43 | self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
44 | self.bn1 = nn.BatchNorm2d(64)
45 |
46 | self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
47 | self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
48 | self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
49 | self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
50 |
51 | self.linear = nn.Linear(512 * block.expansion, num_classes)
52 |
53 | def _make_layer(self, block, planes, num_blocks, stride):
54 | strides = [stride] + [1]*(num_blocks-1)
55 | layers = []
56 | for stride in strides:
57 | layers.append(block(self.in_planes, planes, stride))
58 | self.in_planes = planes * block.expansion
59 | return nn.Sequential(*layers)
60 |
61 | def forward(self, x):
62 | out = F.relu(self.bn1(self.conv1(x)))
63 | out = self.layer1(out)
64 | out = self.layer2(out)
65 | out = self.layer3(out)
66 | out = self.layer4(out)
67 | out = F.avg_pool2d(out, 4)
68 | out = out.view(out.size(0), -1)
69 | out = self.linear(out)
70 | return out
71 |
72 |
73 | @register
74 | class MResNet(nn.Module):
75 | def __init__(self, num_classes=10, num_blocks=[2, 2, 2, 2]) -> None:
76 | super().__init__()
77 | self.model = _ResNet(BasicBlock, num_blocks, num_classes)
78 |
79 | def forward(self, x):
80 | return self.model(x)
81 |
82 |
--------------------------------------------------------------------------------
/rtdetr_pytorch/src/nn/backbone/utils.py:
--------------------------------------------------------------------------------
1 | """
2 | https://github.com/pytorch/vision/blob/main/torchvision/models/_utils.py
3 |
4 | by lyuwenyu
5 | """
6 |
7 | from collections import OrderedDict
8 | from typing import Dict, List
9 |
10 |
11 | import torch.nn as nn
12 |
13 |
14 | class IntermediateLayerGetter(nn.ModuleDict):
15 | """
16 | Module wrapper that returns intermediate layers from a model
17 |
18 | It has a strong assumption that the modules have been registered
19 | into the model in the same order as they are used.
20 | This means that one should **not** reuse the same nn.Module
21 | twice in the forward if you want this to work.
22 |
23 | Additionally, it is only able to query submodules that are directly
24 | assigned to the model. So if `model` is passed, `model.feature1` can
25 | be returned, but not `model.feature1.layer2`.
26 | """
27 |
28 | _version = 3
29 |
30 | def __init__(self, model: nn.Module, return_layers: List[str]) -> None:
31 | if not set(return_layers).issubset([name for name, _ in model.named_children()]):
32 | raise ValueError("return_layers are not present in model. {}"\
33 | .format([name for name, _ in model.named_children()]))
34 | orig_return_layers = return_layers
35 | return_layers = {str(k): str(k) for k in return_layers}
36 | layers = OrderedDict()
37 | for name, module in model.named_children():
38 | layers[name] = module
39 | if name in return_layers:
40 | del return_layers[name]
41 | if not return_layers:
42 | break
43 |
44 | super().__init__(layers)
45 | self.return_layers = orig_return_layers
46 |
47 | def forward(self, x):
48 | # out = OrderedDict()
49 | outputs = []
50 | for name, module in self.items():
51 | x = module(x)
52 | if name in self.return_layers:
53 | # out_name = self.return_layers[name]
54 | # out[out_name] = x
55 | outputs.append(x)
56 |
57 | return outputs
58 |
59 |
--------------------------------------------------------------------------------
/rtdetr_pytorch/src/nn/criterion/__init__.py:
--------------------------------------------------------------------------------
1 |
2 | import torch.nn as nn
3 | from src.core import register
4 |
5 | CrossEntropyLoss = register(nn.CrossEntropyLoss)
6 |
7 |
--------------------------------------------------------------------------------
/rtdetr_pytorch/src/nn/criterion/utils.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torchvision
3 |
4 |
5 |
6 | def format_target(targets):
7 | '''
8 | Args:
9 | targets (List[Dict]),
10 | Return:
11 | tensor (Tensor), [im_id, label, bbox,]
12 | '''
13 | outputs = []
14 | for i, tgt in enumerate(targets):
15 | boxes = torchvision.ops.box_convert(tgt['boxes'], in_fmt='xyxy', out_fmt='cxcywh')
16 | labels = tgt['labels'].reshape(-1, 1)
17 | im_ids = torch.ones_like(labels) * i
18 | outputs.append(torch.cat([im_ids, labels, boxes], dim=1))
19 |
20 | return torch.cat(outputs, dim=0)
21 |
--------------------------------------------------------------------------------
/rtdetr_pytorch/src/optim/__init__.py:
--------------------------------------------------------------------------------
1 |
2 | from .ema import *
3 | from .optim import *
4 | from .amp import *
--------------------------------------------------------------------------------
/rtdetr_pytorch/src/optim/amp.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.cuda.amp as amp
4 |
5 |
6 | from src.core import register
7 | import src.misc.dist as dist
8 |
9 |
10 | __all__ = ['GradScaler']
11 |
12 | GradScaler = register(amp.grad_scaler.GradScaler)
13 |
--------------------------------------------------------------------------------
/rtdetr_pytorch/src/optim/optim.py:
--------------------------------------------------------------------------------
1 |
2 | import torch
3 | import torch.nn as nn
4 | import torch.optim as optim
5 | import torch.optim.lr_scheduler as lr_scheduler
6 |
7 | from src.core import register
8 |
9 |
10 | __all__ = ['AdamW', 'SGD', 'Adam', 'MultiStepLR', 'CosineAnnealingLR', 'OneCycleLR', 'LambdaLR']
11 |
12 |
13 |
14 | SGD = register(optim.SGD)
15 | Adam = register(optim.Adam)
16 | AdamW = register(optim.AdamW)
17 |
18 |
19 | MultiStepLR = register(lr_scheduler.MultiStepLR)
20 | CosineAnnealingLR = register(lr_scheduler.CosineAnnealingLR)
21 | OneCycleLR = register(lr_scheduler.OneCycleLR)
22 | LambdaLR = register(lr_scheduler.LambdaLR)
23 |
--------------------------------------------------------------------------------
/rtdetr_pytorch/src/solver/__init__.py:
--------------------------------------------------------------------------------
1 | """by lyuwenyu
2 | """
3 |
4 | from .solver import BaseSolver
5 | from .det_solver import DetSolver
6 |
7 |
8 | from typing import Dict
9 |
10 | TASKS :Dict[str, BaseSolver] = {
11 | 'detection': DetSolver,
12 | }
--------------------------------------------------------------------------------
/rtdetr_pytorch/src/zoo/__init__.py:
--------------------------------------------------------------------------------
1 |
2 | from .rtdetr import *
3 |
--------------------------------------------------------------------------------
/rtdetr_pytorch/src/zoo/rtdetr/__init__.py:
--------------------------------------------------------------------------------
1 | """by lyuwenyu
2 | """
3 |
4 |
5 | from .rtdetr import *
6 |
7 | from .hybrid_encoder import *
8 | from .rtdetr_decoder import *
9 | from .rtdetr_postprocessor import *
10 | from .rtdetr_criterion import *
11 |
12 | from .matcher import *
13 |
--------------------------------------------------------------------------------
/rtdetr_pytorch/src/zoo/rtdetr/box_ops.py:
--------------------------------------------------------------------------------
1 | '''
2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
3 | https://github.com/facebookresearch/detr/blob/main/util/box_ops.py
4 | '''
5 |
6 | import torch
7 | from torchvision.ops.boxes import box_area
8 |
9 |
10 | def box_cxcywh_to_xyxy(x):
11 | x_c, y_c, w, h = x.unbind(-1)
12 | b = [(x_c - 0.5 * w), (y_c - 0.5 * h),
13 | (x_c + 0.5 * w), (y_c + 0.5 * h)]
14 | return torch.stack(b, dim=-1)
15 |
16 |
17 | def box_xyxy_to_cxcywh(x):
18 | x0, y0, x1, y1 = x.unbind(-1)
19 | b = [(x0 + x1) / 2, (y0 + y1) / 2,
20 | (x1 - x0), (y1 - y0)]
21 | return torch.stack(b, dim=-1)
22 |
23 |
24 | # modified from torchvision to also return the union
25 | def box_iou(boxes1, boxes2):
26 | area1 = box_area(boxes1)
27 | area2 = box_area(boxes2)
28 |
29 | lt = torch.max(boxes1[:, None, :2], boxes2[:, :2]) # [N,M,2]
30 | rb = torch.min(boxes1[:, None, 2:], boxes2[:, 2:]) # [N,M,2]
31 |
32 | wh = (rb - lt).clamp(min=0) # [N,M,2]
33 | inter = wh[:, :, 0] * wh[:, :, 1] # [N,M]
34 |
35 | union = area1[:, None] + area2 - inter
36 |
37 | iou = inter / union
38 | return iou, union
39 |
40 |
41 | def generalized_box_iou(boxes1, boxes2):
42 | """
43 | Generalized IoU from https://giou.stanford.edu/
44 |
45 | The boxes should be in [x0, y0, x1, y1] format
46 |
47 | Returns a [N, M] pairwise matrix, where N = len(boxes1)
48 | and M = len(boxes2)
49 | """
50 | # degenerate boxes gives inf / nan results
51 | # so do an early check
52 | assert (boxes1[:, 2:] >= boxes1[:, :2]).all()
53 | assert (boxes2[:, 2:] >= boxes2[:, :2]).all()
54 | iou, union = box_iou(boxes1, boxes2)
55 |
56 | lt = torch.min(boxes1[:, None, :2], boxes2[:, :2])
57 | rb = torch.max(boxes1[:, None, 2:], boxes2[:, 2:])
58 |
59 | wh = (rb - lt).clamp(min=0) # [N,M,2]
60 | area = wh[:, :, 0] * wh[:, :, 1]
61 |
62 | return iou - (area - union) / area
63 |
64 |
65 | def masks_to_boxes(masks):
66 | """Compute the bounding boxes around the provided masks
67 |
68 | The masks should be in format [N, H, W] where N is the number of masks, (H, W) are the spatial dimensions.
69 |
70 | Returns a [N, 4] tensors, with the boxes in xyxy format
71 | """
72 | if masks.numel() == 0:
73 | return torch.zeros((0, 4), device=masks.device)
74 |
75 | h, w = masks.shape[-2:]
76 |
77 | y = torch.arange(0, h, dtype=torch.float)
78 | x = torch.arange(0, w, dtype=torch.float)
79 | y, x = torch.meshgrid(y, x)
80 |
81 | x_mask = (masks * x.unsqueeze(0))
82 | x_max = x_mask.flatten(1).max(-1)[0]
83 | x_min = x_mask.masked_fill(~(masks.bool()), 1e8).flatten(1).min(-1)[0]
84 |
85 | y_mask = (masks * y.unsqueeze(0))
86 | y_max = y_mask.flatten(1).max(-1)[0]
87 | y_min = y_mask.masked_fill(~(masks.bool()), 1e8).flatten(1).min(-1)[0]
88 |
89 | return torch.stack([x_min, y_min, x_max, y_max], 1)
--------------------------------------------------------------------------------
/rtdetr_pytorch/src/zoo/rtdetr/rtdetr.py:
--------------------------------------------------------------------------------
1 | """by lyuwenyu
2 | """
3 |
4 | import torch
5 | import torch.nn as nn
6 | import torch.nn.functional as F
7 |
8 | import random
9 | import numpy as np
10 |
11 | from src.core import register
12 |
13 |
14 | __all__ = ['RTDETR', ]
15 |
16 |
17 | @register
18 | class RTDETR(nn.Module):
19 | __inject__ = ['backbone', 'encoder', 'decoder', ]
20 |
21 | def __init__(self, backbone: nn.Module, encoder, decoder, multi_scale=None):
22 | super().__init__()
23 | self.backbone = backbone
24 | self.decoder = decoder
25 | self.encoder = encoder
26 | self.multi_scale = multi_scale
27 |
28 | def forward(self, x, targets=None):
29 | if self.multi_scale and self.training:
30 | sz = np.random.choice(self.multi_scale)
31 | x = F.interpolate(x, size=[sz, sz])
32 |
33 | x = self.backbone(x)
34 | x = self.encoder(x)
35 | x = self.decoder(x, targets)
36 |
37 | return x
38 |
39 | def deploy(self, ):
40 | self.eval()
41 | for m in self.modules():
42 | if hasattr(m, 'convert_to_deploy'):
43 | m.convert_to_deploy()
44 | return self
45 |
--------------------------------------------------------------------------------
/rtdetr_pytorch/src/zoo/rtdetr/rtdetr_postprocessor.py:
--------------------------------------------------------------------------------
1 | """by lyuwenyu
2 | """
3 |
4 | import torch
5 | import torch.nn as nn
6 | import torch.nn.functional as F
7 |
8 | import torchvision
9 |
10 | from src.core import register
11 |
12 |
13 | __all__ = ['RTDETRPostProcessor']
14 |
15 |
16 | @register
17 | class RTDETRPostProcessor(nn.Module):
18 | __share__ = ['num_classes', 'use_focal_loss', 'num_top_queries', 'remap_mscoco_category']
19 |
20 | def __init__(self, num_classes=80, use_focal_loss=True, num_top_queries=300, remap_mscoco_category=False) -> None:
21 | super().__init__()
22 | self.use_focal_loss = use_focal_loss
23 | self.num_top_queries = num_top_queries
24 | self.num_classes = num_classes
25 | self.remap_mscoco_category = remap_mscoco_category
26 | self.deploy_mode = False
27 |
28 | def extra_repr(self) -> str:
29 | return f'use_focal_loss={self.use_focal_loss}, num_classes={self.num_classes}, num_top_queries={self.num_top_queries}'
30 |
31 | # def forward(self, outputs, orig_target_sizes):
32 | def forward(self, outputs, orig_target_sizes):
33 |
34 | logits, boxes = outputs['pred_logits'], outputs['pred_boxes']
35 | # orig_target_sizes = torch.stack([t["orig_size"] for t in targets], dim=0)
36 |
37 | bbox_pred = torchvision.ops.box_convert(boxes, in_fmt='cxcywh', out_fmt='xyxy')
38 | bbox_pred *= orig_target_sizes.repeat(1, 2).unsqueeze(1)
39 |
40 | if self.use_focal_loss:
41 | scores = F.sigmoid(logits)
42 | scores, index = torch.topk(scores.flatten(1), self.num_top_queries, axis=-1)
43 | labels = index % self.num_classes
44 | index = index // self.num_classes
45 | boxes = bbox_pred.gather(dim=1, index=index.unsqueeze(-1).repeat(1, 1, bbox_pred.shape[-1]))
46 |
47 | else:
48 | scores = F.softmax(logits)[:, :, :-1]
49 | scores, labels = scores.max(dim=-1)
50 | boxes = bbox_pred
51 | if scores.shape[1] > self.num_top_queries:
52 | scores, index = torch.topk(scores, self.num_top_queries, dim=-1)
53 | labels = torch.gather(labels, dim=1, index=index)
54 | boxes = torch.gather(boxes, dim=1, index=index.unsqueeze(-1).tile(1, 1, boxes.shape[-1]))
55 |
56 | # TODO for onnx export
57 | if self.deploy_mode:
58 | return labels, boxes, scores
59 |
60 | # TODO
61 | if self.remap_mscoco_category:
62 | from ...data.coco import mscoco_label2category
63 | labels = torch.tensor([mscoco_label2category[int(x.item())] for x in labels.flatten()])\
64 | .to(boxes.device).reshape(labels.shape)
65 |
66 | results = []
67 | for lab, box, sco in zip(labels, boxes, scores):
68 | result = dict(labels=lab, boxes=box, scores=sco)
69 | results.append(result)
70 |
71 | return results
72 |
73 |
74 | def deploy(self, ):
75 | self.eval()
76 | self.deploy_mode = True
77 | return self
78 |
79 | @property
80 | def iou_types(self, ):
81 | return ('bbox', )
82 |
--------------------------------------------------------------------------------
/rtdetr_pytorch/tools/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 | Train/test script examples
4 | - `CUDA_VISIBLE_DEVICES=0,1,2,3 torchrun --nproc_per_node=4 --master-port=8989 tools/train.py -c path/to/config &> train.log 2>&1 &`
5 | - `-r path/to/checkpoint`
6 | - `--amp`
7 | - `--test-only`
8 |
9 |
10 | Tuning script examples
11 | - `torchrun --master_port=8844 --nproc_per_node=4 tools/train.py -c configs/rtdetr/rtdetr_r18vd_6x_coco.yml -t https://github.com/lyuwenyu/storage/releases/download/v0.1/rtdetr_r18vd_5x_coco_objects365_from_paddle.pth`
12 |
13 |
14 | Export script examples
15 | - `python tools/export_onnx.py -c path/to/config -r path/to/checkpoint --check`
16 |
17 |
18 | GPU do not release memory
19 | - `ps aux | grep "tools/train.py" | awk '{print $2}' | xargs kill -9`
20 |
21 |
22 | Save all logs
23 | - Appending `&> train.log 2>&1 &` or `&> train.log 2>&1`
24 |
25 |
--------------------------------------------------------------------------------
/rtdetr_pytorch/tools/train.py:
--------------------------------------------------------------------------------
1 | """by lyuwenyu
2 | """
3 |
4 | import os
5 | import sys
6 | sys.path.insert(0, os.path.join(os.path.dirname(os.path.abspath(__file__)), '..'))
7 | import argparse
8 |
9 | import src.misc.dist as dist
10 | from src.core import YAMLConfig
11 | from src.solver import TASKS
12 |
13 |
14 | def main(args, ) -> None:
15 | '''main
16 | '''
17 | dist.init_distributed()
18 | if args.seed is not None:
19 | dist.set_seed(args.seed)
20 |
21 | assert not all([args.tuning, args.resume]), \
22 | 'Only support from_scrach or resume or tuning at one time'
23 |
24 | cfg = YAMLConfig(
25 | args.config,
26 | resume=args.resume,
27 | use_amp=args.amp,
28 | tuning=args.tuning
29 | )
30 |
31 | solver = TASKS[cfg.yaml_cfg['task']](cfg)
32 |
33 | if args.test_only:
34 | solver.val()
35 | else:
36 | solver.fit()
37 |
38 |
39 | if __name__ == '__main__':
40 |
41 | parser = argparse.ArgumentParser()
42 | parser.add_argument('--config', '-c', type=str, )
43 | parser.add_argument('--resume', '-r', type=str, )
44 | parser.add_argument('--tuning', '-t', type=str, )
45 | parser.add_argument('--test-only', action='store_true', default=False,)
46 | parser.add_argument('--amp', action='store_true', default=False,)
47 | parser.add_argument('--seed', type=int, help='seed',)
48 | args = parser.parse_args()
49 |
50 | main(args)
51 |
--------------------------------------------------------------------------------
/rtdetrv2_paddle/readme.md:
--------------------------------------------------------------------------------
1 | see https://github.com/PaddlePaddle/PaddleDetection
--------------------------------------------------------------------------------
/rtdetrv2_pytorch/Dockerfile:
--------------------------------------------------------------------------------
1 | # tensorrt:23.01-py3 (8.5.2.2)
2 | FROM nvcr.io/nvidia/tensorrt:23.01-py3
3 |
4 | WORKDIR /workspace
5 |
6 | COPY requirements.txt .
7 |
8 | RUN pip install --upgrade pip && \
9 | pip install -r requirements.txt
10 |
11 | CMD ["/bin/bash"]
12 |
--------------------------------------------------------------------------------
/rtdetrv2_pytorch/configs/dataset/coco_detection.yml:
--------------------------------------------------------------------------------
1 | task: detection
2 |
3 | evaluator:
4 | type: CocoEvaluator
5 | iou_types: ['bbox', ]
6 |
7 | # num_classes: 365
8 | # remap_mscoco_category: False
9 |
10 | # num_classes: 91
11 | # remap_mscoco_category: False
12 |
13 | num_classes: 80
14 | remap_mscoco_category: True
15 |
16 |
17 | train_dataloader:
18 | type: DataLoader
19 | dataset:
20 | type: CocoDetection
21 | img_folder: ./dataset/coco/train2017/
22 | ann_file: ./dataset/coco/annotations/instances_train2017.json
23 | return_masks: False
24 | transforms:
25 | type: Compose
26 | ops: ~
27 | shuffle: True
28 | num_workers: 4
29 | drop_last: True
30 | collate_fn:
31 | type: BatchImageCollateFuncion
32 |
33 |
34 | val_dataloader:
35 | type: DataLoader
36 | dataset:
37 | type: CocoDetection
38 | img_folder: ./dataset/coco/val2017/
39 | ann_file: ./dataset/coco/annotations/instances_val2017.json
40 | return_masks: False
41 | transforms:
42 | type: Compose
43 | ops: ~
44 | shuffle: False
45 | num_workers: 4
46 | drop_last: False
47 | collate_fn:
48 | type: BatchImageCollateFuncion
49 |
--------------------------------------------------------------------------------
/rtdetrv2_pytorch/configs/dataset/voc_detection.yml:
--------------------------------------------------------------------------------
1 | task: detection
2 |
3 | evaluator:
4 | type: CocoEvaluator
5 | iou_types: ['bbox', ]
6 |
7 | num_classes: 20
8 |
9 | train_dataloader:
10 | type: DataLoader
11 | dataset:
12 | type: VOCDetection
13 | root: ./dataset/voc/
14 | ann_file: trainval.txt
15 | label_file: label_list.txt
16 | transforms:
17 | type: Compose
18 | ops: ~
19 | shuffle: True
20 | num_workers: 4
21 | drop_last: True
22 | collate_fn:
23 | type: BatchImageCollateFuncion
24 |
25 |
26 | val_dataloader:
27 | type: DataLoader
28 | dataset:
29 | type: VOCDetection
30 | root: ./dataset/voc/
31 | ann_file: test.txt
32 | label_file: label_list.txt
33 | transforms:
34 | type: Compose
35 | ops: ~
36 | shuffle: False
37 | num_workers: 4
38 | drop_last: False
39 | collate_fn:
40 | type: BatchImageCollateFuncion
41 |
--------------------------------------------------------------------------------
/rtdetrv2_pytorch/configs/rtdetr/include/dataloader.yml:
--------------------------------------------------------------------------------
1 |
2 | train_dataloader:
3 | dataset:
4 | return_masks: False
5 | transforms:
6 | ops:
7 | - {type: RandomPhotometricDistort, p: 0.5}
8 | - {type: RandomZoomOut, fill: 0}
9 | - {type: RandomIoUCrop, p: 0.8}
10 | - {type: SanitizeBoundingBoxes, min_size: 1}
11 | - {type: RandomHorizontalFlip}
12 | - {type: Resize, size: [640, 640], }
13 | - {type: SanitizeBoundingBoxes, min_size: 1}
14 | - {type: ConvertPILImage, dtype: 'float32', scale: True}
15 | - {type: ConvertBoxes, fmt: 'cxcywh', normalize: True}
16 | collate_fn:
17 | type: BatchImageCollateFuncion
18 | scales: [480, 512, 544, 576, 608, 640, 640, 640, 672, 704, 736, 768, 800]
19 | shuffle: True
20 | num_workers: 4
21 | total_batch_size: 16
22 |
23 | val_dataloader:
24 | dataset:
25 | transforms:
26 | ops:
27 | - {type: Resize, size: [640, 640]}
28 | - {type: ConvertPILImage, dtype: 'float32', scale: True}
29 | shuffle: False
30 | total_batch_size: 16
31 | num_workers: 8
--------------------------------------------------------------------------------
/rtdetrv2_pytorch/configs/rtdetr/include/optimizer.yml:
--------------------------------------------------------------------------------
1 |
2 | use_ema: True
3 | ema:
4 | type: ModelEMA
5 | decay: 0.9999
6 | warmups: 2000
7 |
8 |
9 | epoches: 72
10 | clip_max_norm: 0.1
11 |
12 |
13 | optimizer:
14 | type: AdamW
15 | params:
16 | -
17 | params: '^(?=.*backbone)(?!.*(?:norm|bn)).*$'
18 | lr: 0.00001
19 | -
20 | params: '^(?=.*backbone)(?=.*(?:norm|bn)).*$'
21 | weight_decay: 0.
22 | lr: 0.00001
23 | -
24 | params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn|bias)).*$'
25 | weight_decay: 0.
26 |
27 | lr: 0.0001
28 | betas: [0.9, 0.999]
29 | weight_decay: 0.0001
30 |
31 |
32 | lr_scheduler:
33 | type: MultiStepLR
34 | milestones: [1000]
35 | gamma: 0.1
36 |
37 |
38 | lr_warmup_scheduler:
39 | type: LinearWarmup
40 | warmup_duration: 2000
--------------------------------------------------------------------------------
/rtdetrv2_pytorch/configs/rtdetr/include/rtdetr_r50vd.yml:
--------------------------------------------------------------------------------
1 | task: detection
2 |
3 | model: RTDETR
4 | criterion: RTDETRCriterion
5 | postprocessor: RTDETRPostProcessor
6 |
7 |
8 | use_focal_loss: True
9 | eval_spatial_size: [640, 640] # h w
10 |
11 |
12 | RTDETR:
13 | backbone: PResNet
14 | encoder: HybridEncoder
15 | decoder: RTDETRTransformer
16 |
17 |
18 | PResNet:
19 | depth: 50
20 | variant: d
21 | freeze_at: 0
22 | return_idx: [1, 2, 3]
23 | num_stages: 4
24 | freeze_norm: True
25 | pretrained: True
26 |
27 |
28 | HybridEncoder:
29 | in_channels: [512, 1024, 2048]
30 | feat_strides: [8, 16, 32]
31 |
32 | # intra
33 | hidden_dim: 256
34 | use_encoder_idx: [2]
35 | num_encoder_layers: 1
36 | nhead: 8
37 | dim_feedforward: 1024
38 | dropout: 0.
39 | enc_act: 'gelu'
40 |
41 | # cross
42 | expansion: 1.0
43 | depth_mult: 1
44 | act: 'silu'
45 |
46 | version: v1
47 |
48 | RTDETRTransformer:
49 | feat_channels: [256, 256, 256]
50 | feat_strides: [8, 16, 32]
51 | hidden_dim: 256
52 | num_levels: 3
53 |
54 | num_layers: 6
55 | num_queries: 300
56 |
57 | num_denoising: 100
58 | label_noise_ratio: 0.5
59 | box_noise_scale: 1.0 # 1.0 0.4
60 |
61 | eval_idx: -1
62 |
63 |
64 | RTDETRPostProcessor:
65 | num_top_queries: 300
66 |
67 |
68 | RTDETRCriterion:
69 | weight_dict: {loss_vfl: 1, loss_bbox: 5, loss_giou: 2,}
70 | losses: ['vfl', 'boxes', ]
71 | alpha: 0.75
72 | gamma: 2.0
73 |
74 | matcher:
75 | type: HungarianMatcher
76 | weight_dict: {cost_class: 2, cost_bbox: 5, cost_giou: 2}
77 | alpha: 0.25
78 | gamma: 2.0
79 |
80 |
--------------------------------------------------------------------------------
/rtdetrv2_pytorch/configs/rtdetr/rtdetr_r101vd_6x_coco.yml:
--------------------------------------------------------------------------------
1 |
2 | __include__: [
3 | '../dataset/coco_detection.yml',
4 | '../runtime.yml',
5 | './include/dataloader.yml',
6 | './include/optimizer.yml',
7 | './include/rtdetr_r50vd.yml',
8 | ]
9 |
10 |
11 | output_dir: ./output/rtdetr_r101vd_6x_coco
12 |
13 |
14 | PResNet:
15 | depth: 101
16 |
17 |
18 | HybridEncoder:
19 | # intra
20 | hidden_dim: 384
21 | dim_feedforward: 2048
22 |
23 |
24 | RTDETRTransformer:
25 | feat_channels: [384, 384, 384]
26 |
27 |
28 | optimizer:
29 | type: AdamW
30 | params:
31 | -
32 | params: '^(?=.*backbone)(?!.*norm|bn).*$'
33 | lr: 0.000001
34 | -
35 | params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn)).*$'
36 | weight_decay: 0.
37 |
38 | lr: 0.0001
39 | betas: [0.9, 0.999]
40 | weight_decay: 0.0001
41 |
42 |
--------------------------------------------------------------------------------
/rtdetrv2_pytorch/configs/rtdetr/rtdetr_r18vd_6x_coco.yml:
--------------------------------------------------------------------------------
1 |
2 | __include__: [
3 | '../dataset/coco_detection.yml',
4 | '../runtime.yml',
5 | './include/dataloader.yml',
6 | './include/optimizer.yml',
7 | './include/rtdetr_r50vd.yml',
8 | ]
9 |
10 |
11 | output_dir: ./output/rtdetr_r18vd_6x_coco
12 |
13 |
14 | PResNet:
15 | depth: 18
16 | freeze_at: -1
17 | freeze_norm: False
18 | pretrained: True
19 |
20 |
21 | HybridEncoder:
22 | in_channels: [128, 256, 512]
23 | hidden_dim: 256
24 | expansion: 0.5
25 |
26 |
27 | RTDETRTransformer:
28 | num_layers: 3
29 |
30 |
31 |
32 | optimizer:
33 | type: AdamW
34 | params:
35 | -
36 | params: '^(?=.*backbone)(?=.*norm|bn).*$'
37 | weight_decay: 0.
38 | lr: 0.00001
39 | -
40 | params: '^(?=.*backbone)(?!.*norm|bn).*$'
41 | lr: 0.00001
42 | -
43 | params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn|bias)).*$'
44 | weight_decay: 0.
45 |
46 | lr: 0.0001
47 | betas: [0.9, 0.999]
48 | weight_decay: 0.0001
49 |
--------------------------------------------------------------------------------
/rtdetrv2_pytorch/configs/rtdetr/rtdetr_r34vd_6x_coco.yml:
--------------------------------------------------------------------------------
1 |
2 | __include__: [
3 | '../dataset/coco_detection.yml',
4 | '../runtime.yml',
5 | './include/dataloader.yml',
6 | './include/optimizer.yml',
7 | './include/rtdetr_r50vd.yml',
8 | ]
9 |
10 |
11 | output_dir: ./output/rtdetr_r34vd_6x_coco
12 |
13 |
14 | PResNet:
15 | depth: 34
16 | freeze_at: -1
17 | freeze_norm: False
18 | pretrained: True
19 |
20 |
21 | HybridEncoder:
22 | in_channels: [128, 256, 512]
23 | hidden_dim: 256
24 | expansion: 0.5
25 |
26 |
27 | RTDETRTransformer:
28 | num_layers: 4
29 |
30 |
31 |
32 | optimizer:
33 | type: AdamW
34 | params:
35 | -
36 | params: '^(?=.*backbone)(?=.*norm|bn).*$'
37 | weight_decay: 0.
38 | lr: 0.00001
39 | -
40 | params: '^(?=.*backbone)(?!.*norm|bn).*$'
41 | lr: 0.00001
42 | -
43 | params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn|bias)).*$'
44 | weight_decay: 0.
45 |
46 | lr: 0.0001
47 | betas: [0.9, 0.999]
48 | weight_decay: 0.0001
49 |
--------------------------------------------------------------------------------
/rtdetrv2_pytorch/configs/rtdetr/rtdetr_r50vd_6x_coco.yml:
--------------------------------------------------------------------------------
1 |
2 | __include__: [
3 | '../dataset/coco_detection.yml',
4 | '../runtime.yml',
5 | './include/dataloader.yml',
6 | './include/optimizer.yml',
7 | './include/rtdetr_r50vd.yml',
8 | ]
9 |
10 |
11 | output_dir: ./output/rtdetr_r50vd_6x_coco
12 |
13 |
14 |
15 |
--------------------------------------------------------------------------------
/rtdetrv2_pytorch/configs/rtdetr/rtdetr_r50vd_m_6x_coco.yml:
--------------------------------------------------------------------------------
1 | __include__: [
2 | '../dataset/coco_detection.yml',
3 | '../runtime.yml',
4 | './include/dataloader.yml',
5 | './include/optimizer.yml',
6 | './include/rtdetr_r50vd.yml',
7 | ]
8 |
9 | output_dir: ./output/rtdetr_r50vd_m_6x_coco
10 |
11 |
12 | HybridEncoder:
13 | expansion: 0.5
14 |
15 |
16 | RTDETRTransformer:
17 | eval_idx: 2 # use 3th decoder layer to eval
18 |
19 |
20 |
21 | optimizer:
22 | type: AdamW
23 | params:
24 | -
25 | params: '^(?=.*backbone)(?!.*norm|bn).*$'
26 | lr: 0.00001
27 | -
28 | params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn|bias)).*$'
29 | weight_decay: 0.
30 |
31 | lr: 0.0001
32 | betas: [0.9, 0.999]
33 | weight_decay: 0.0001
34 |
35 |
--------------------------------------------------------------------------------
/rtdetrv2_pytorch/configs/rtdetrv2/include/dataloader.yml:
--------------------------------------------------------------------------------
1 |
2 | train_dataloader:
3 | dataset:
4 | transforms:
5 | ops:
6 | - {type: RandomPhotometricDistort, p: 0.5}
7 | - {type: RandomZoomOut, fill: 0}
8 | - {type: RandomIoUCrop, p: 0.8}
9 | - {type: SanitizeBoundingBoxes, min_size: 1}
10 | - {type: RandomHorizontalFlip}
11 | - {type: Resize, size: [640, 640], }
12 | - {type: SanitizeBoundingBoxes, min_size: 1}
13 | - {type: ConvertPILImage, dtype: 'float32', scale: True}
14 | - {type: ConvertBoxes, fmt: 'cxcywh', normalize: True}
15 | policy:
16 | name: stop_epoch
17 | epoch: 71 # epoch in [71, ~) stop `ops`
18 | ops: ['RandomPhotometricDistort', 'RandomZoomOut', 'RandomIoUCrop']
19 |
20 | collate_fn:
21 | type: BatchImageCollateFuncion
22 | scales: [480, 512, 544, 576, 608, 640, 640, 640, 672, 704, 736, 768, 800]
23 | stop_epoch: 71 # epoch in [71, ~) stop `multiscales`
24 |
25 | shuffle: True
26 | total_batch_size: 16 # total batch size equals to 16 (4 * 4)
27 | num_workers: 4
28 |
29 |
30 | val_dataloader:
31 | dataset:
32 | transforms:
33 | ops:
34 | - {type: Resize, size: [640, 640]}
35 | - {type: ConvertPILImage, dtype: 'float32', scale: True}
36 | shuffle: False
37 | total_batch_size: 32
38 | num_workers: 4
--------------------------------------------------------------------------------
/rtdetrv2_pytorch/configs/rtdetrv2/include/optimizer.yml:
--------------------------------------------------------------------------------
1 |
2 | use_amp: True
3 | use_ema: True
4 | ema:
5 | type: ModelEMA
6 | decay: 0.9999
7 | warmups: 2000
8 |
9 |
10 | epoches: 72
11 | clip_max_norm: 0.1
12 |
13 |
14 | optimizer:
15 | type: AdamW
16 | params:
17 | -
18 | params: '^(?=.*backbone)(?!.*norm).*$'
19 | lr: 0.00001
20 | -
21 | params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn)).*$'
22 | weight_decay: 0.
23 |
24 | lr: 0.0001
25 | betas: [0.9, 0.999]
26 | weight_decay: 0.0001
27 |
28 |
29 | lr_scheduler:
30 | type: MultiStepLR
31 | milestones: [1000]
32 | gamma: 0.1
33 |
34 |
35 | lr_warmup_scheduler:
36 | type: LinearWarmup
37 | warmup_duration: 2000
--------------------------------------------------------------------------------
/rtdetrv2_pytorch/configs/rtdetrv2/include/rtdetrv2_r50vd.yml:
--------------------------------------------------------------------------------
1 | task: detection
2 |
3 | model: RTDETR
4 | criterion: RTDETRCriterionv2
5 | postprocessor: RTDETRPostProcessor
6 |
7 |
8 | use_focal_loss: True
9 | eval_spatial_size: [640, 640] # h w
10 |
11 |
12 | RTDETR:
13 | backbone: PResNet
14 | encoder: HybridEncoder
15 | decoder: RTDETRTransformerv2
16 |
17 |
18 | PResNet:
19 | depth: 50
20 | variant: d
21 | freeze_at: 0
22 | return_idx: [1, 2, 3]
23 | num_stages: 4
24 | freeze_norm: True
25 | pretrained: True
26 |
27 |
28 | HybridEncoder:
29 | in_channels: [512, 1024, 2048]
30 | feat_strides: [8, 16, 32]
31 |
32 | # intra
33 | hidden_dim: 256
34 | use_encoder_idx: [2]
35 | num_encoder_layers: 1
36 | nhead: 8
37 | dim_feedforward: 1024
38 | dropout: 0.
39 | enc_act: 'gelu'
40 |
41 | # cross
42 | expansion: 1.0
43 | depth_mult: 1
44 | act: 'silu'
45 |
46 |
47 | RTDETRTransformerv2:
48 | feat_channels: [256, 256, 256]
49 | feat_strides: [8, 16, 32]
50 | hidden_dim: 256
51 | num_levels: 3
52 |
53 | num_layers: 6
54 | num_queries: 300
55 |
56 | num_denoising: 100
57 | label_noise_ratio: 0.5
58 | box_noise_scale: 1.0 # 1.0 0.4
59 |
60 | eval_idx: -1
61 |
62 | # NEW
63 | num_points: [4, 4, 4] # [3,3,3] [2,2,2]
64 | cross_attn_method: default # default, discrete
65 | query_select_method: default # default, agnostic
66 |
67 |
68 | RTDETRPostProcessor:
69 | num_top_queries: 300
70 |
71 |
72 | RTDETRCriterionv2:
73 | weight_dict: {loss_vfl: 1, loss_bbox: 5, loss_giou: 2,}
74 | losses: ['vfl', 'boxes', ]
75 | alpha: 0.75
76 | gamma: 2.0
77 |
78 | matcher:
79 | type: HungarianMatcher
80 | weight_dict: {cost_class: 2, cost_bbox: 5, cost_giou: 2}
81 | alpha: 0.25
82 | gamma: 2.0
83 |
84 |
--------------------------------------------------------------------------------
/rtdetrv2_pytorch/configs/rtdetrv2/rtdetrv2_hgnetv2_h_6x_coco.yml:
--------------------------------------------------------------------------------
1 | __include__: [
2 | '../dataset/coco_detection.yml',
3 | '../runtime.yml',
4 | './include/dataloader.yml',
5 | './include/optimizer.yml',
6 | './include/rtdetrv2_r50vd.yml',
7 | ]
8 |
9 |
10 | output_dir: ./output/rtdetrv2_hgnetv2_h_6x_coco
11 |
12 |
13 | RTDETR:
14 | backbone: HGNetv2
15 |
16 |
17 | HGNetv2:
18 | name: 'H'
19 | return_idx: [1, 2, 3]
20 | freeze_at: 0
21 | freeze_norm: True
22 | pretrained: True
23 |
24 |
25 | HybridEncoder:
26 | # intra
27 | hidden_dim: 512
28 | dim_feedforward: 2048
29 | num_encoder_layers: 2
30 |
31 |
32 | RTDETRTransformerv2:
33 | feat_channels: [512, 512, 512]
34 |
35 |
36 |
37 | optimizer:
38 | type: AdamW
39 | params:
40 | -
41 | params: '^(?=.*backbone)(?!.*norm|bn).*$'
42 | lr: 0.000005
43 | -
44 | params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn)).*$'
45 | weight_decay: 0.
46 |
47 | lr: 0.0001
48 | betas: [0.9, 0.999]
49 | weight_decay: 0.0001
50 |
51 |
--------------------------------------------------------------------------------
/rtdetrv2_pytorch/configs/rtdetrv2/rtdetrv2_hgnetv2_l_6x_coco.yml:
--------------------------------------------------------------------------------
1 | __include__: [
2 | '../dataset/coco_detection.yml',
3 | '../runtime.yml',
4 | './include/dataloader.yml',
5 | './include/optimizer.yml',
6 | './include/rtdetrv2_r50vd.yml',
7 | ]
8 |
9 |
10 | output_dir: ./output/rtdetrv2_hgnetv2_l_6x_coco
11 |
12 |
13 | RTDETR:
14 | backbone: HGNetv2
15 |
16 |
17 | HGNetv2:
18 | name: 'L'
19 | return_idx: [1, 2, 3]
20 | freeze_at: 0
21 | freeze_norm: True
22 | pretrained: True
23 |
24 |
25 | optimizer:
26 | type: AdamW
27 | params:
28 | -
29 | params: '^(?=.*backbone)(?!.*norm|bn).*$'
30 | lr: 0.000005
31 | -
32 | params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn)).*$'
33 | weight_decay: 0.
34 |
35 | lr: 0.0001
36 | betas: [0.9, 0.999]
37 | weight_decay: 0.0001
38 |
39 |
--------------------------------------------------------------------------------
/rtdetrv2_pytorch/configs/rtdetrv2/rtdetrv2_hgnetv2_x_6x_coco.yml:
--------------------------------------------------------------------------------
1 | __include__: [
2 | '../dataset/coco_detection.yml',
3 | '../runtime.yml',
4 | './include/dataloader.yml',
5 | './include/optimizer.yml',
6 | './include/rtdetrv2_r50vd.yml',
7 | ]
8 |
9 |
10 | output_dir: ./output/rtdetrv2_hgnetv2_x_6x_coco
11 |
12 |
13 | RTDETR:
14 | backbone: HGNetv2
15 |
16 |
17 | HGNetv2:
18 | name: 'X'
19 | return_idx: [1, 2, 3]
20 | freeze_at: 0
21 | freeze_norm: True
22 | pretrained: True
23 |
24 |
25 |
26 | HybridEncoder:
27 | # intra
28 | hidden_dim: 384
29 | dim_feedforward: 2048
30 |
31 |
32 | RTDETRTransformerv2:
33 | feat_channels: [384, 384, 384]
34 |
35 |
36 |
37 | optimizer:
38 | type: AdamW
39 | params:
40 | -
41 | params: '^(?=.*backbone)(?!.*norm|bn).*$'
42 | lr: 0.000001
43 | -
44 | params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn)).*$'
45 | weight_decay: 0.
46 |
47 | lr: 0.0001
48 | betas: [0.9, 0.999]
49 | weight_decay: 0.0001
50 |
51 |
--------------------------------------------------------------------------------
/rtdetrv2_pytorch/configs/rtdetrv2/rtdetrv2_r101vd_6x_coco.yml:
--------------------------------------------------------------------------------
1 | __include__: [
2 | '../dataset/coco_detection.yml',
3 | '../runtime.yml',
4 | './include/dataloader.yml',
5 | './include/optimizer.yml',
6 | './include/rtdetrv2_r50vd.yml',
7 | ]
8 |
9 |
10 | output_dir: ./output/rtdetrv2_r101vd_6x_coco
11 |
12 |
13 | PResNet:
14 | depth: 101
15 |
16 |
17 | HybridEncoder:
18 | # intra
19 | hidden_dim: 384
20 | dim_feedforward: 2048
21 |
22 |
23 | RTDETRTransformerv2:
24 | feat_channels: [384, 384, 384]
25 |
26 |
27 | optimizer:
28 | type: AdamW
29 | params:
30 | -
31 | params: '^(?=.*backbone)(?!.*norm|bn).*$'
32 | lr: 0.000001
33 | -
34 | params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn)).*$'
35 | weight_decay: 0.
36 |
37 | lr: 0.0001
38 | betas: [0.9, 0.999]
39 | weight_decay: 0.0001
40 |
41 |
--------------------------------------------------------------------------------
/rtdetrv2_pytorch/configs/rtdetrv2/rtdetrv2_r18vd_120e_coco.yml:
--------------------------------------------------------------------------------
1 | __include__: [
2 | '../dataset/coco_detection.yml',
3 | '../runtime.yml',
4 | './include/dataloader.yml',
5 | './include/optimizer.yml',
6 | './include/rtdetrv2_r50vd.yml',
7 | ]
8 |
9 |
10 | output_dir: ./output/rtdetrv2_r18vd_120e_coco
11 |
12 |
13 | PResNet:
14 | depth: 18
15 | freeze_at: -1
16 | freeze_norm: False
17 | pretrained: True
18 |
19 |
20 | HybridEncoder:
21 | in_channels: [128, 256, 512]
22 | hidden_dim: 256
23 | expansion: 0.5
24 |
25 |
26 | RTDETRTransformerv2:
27 | num_layers: 3
28 |
29 |
30 | epoches: 120
31 |
32 | optimizer:
33 | type: AdamW
34 | params:
35 | -
36 | params: '^(?=.*(?:norm|bn)).*$'
37 | weight_decay: 0.
38 |
39 |
40 | train_dataloader:
41 | dataset:
42 | transforms:
43 | policy:
44 | epoch: 117
45 | collate_fn:
46 | scales: ~
--------------------------------------------------------------------------------
/rtdetrv2_pytorch/configs/rtdetrv2/rtdetrv2_r18vd_120e_voc.yml:
--------------------------------------------------------------------------------
1 | __include__: [
2 | '../dataset/voc_detection.yml',
3 | '../runtime.yml',
4 | './include/dataloader.yml',
5 | './include/optimizer.yml',
6 | './include/rtdetrv2_r50vd.yml',
7 | ]
8 |
9 |
10 | output_dir: ./output/rtdetrv2_r18vd_120e_voc
11 |
12 |
13 | PResNet:
14 | depth: 18
15 | freeze_at: -1
16 | freeze_norm: False
17 | pretrained: True
18 |
19 |
20 | HybridEncoder:
21 | in_channels: [128, 256, 512]
22 | hidden_dim: 256
23 | expansion: 0.5
24 |
25 |
26 | RTDETRTransformerv2:
27 | num_layers: 3
28 |
29 |
30 | epoches: 120
31 |
32 | optimizer:
33 | type: AdamW
34 | params:
35 | -
36 | params: '^(?=.*(?:norm|bn)).*$'
37 | weight_decay: 0.
38 |
39 | train_dataloader:
40 | dataset:
41 | transforms:
42 | policy:
43 | epoch: 117
44 | collate_fn:
45 | scales: ~
46 | total_batch_size: 32
47 |
--------------------------------------------------------------------------------
/rtdetrv2_pytorch/configs/rtdetrv2/rtdetrv2_r18vd_dsp_3x_coco.yml:
--------------------------------------------------------------------------------
1 | __include__: [
2 | '../dataset/coco_detection.yml',
3 | '../runtime.yml',
4 | './include/dataloader.yml',
5 | './include/optimizer.yml',
6 | './include/rtdetrv2_r50vd.yml',
7 | ]
8 |
9 |
10 | tuning: https://github.com/lyuwenyu/storage/releases/download/v0.1/rtdetrv2_r18vd_120e_coco.pth
11 |
12 | output_dir: ./output/rtdetrv2_r18vd_dsp_3x_coco
13 |
14 | PResNet:
15 | depth: 18
16 | freeze_at: -1
17 | freeze_norm: False
18 | pretrained: True
19 |
20 |
21 | HybridEncoder:
22 | in_channels: [128, 256, 512]
23 | hidden_dim: 256
24 | expansion: 0.5
25 |
26 |
27 | RTDETRTransformerv2:
28 | num_layers: 3
29 | num_points: [4, 4, 4]
30 | cross_attn_method: discrete
31 |
32 |
33 | epoches: 36
34 |
35 | optimizer:
36 | type: AdamW
37 | params:
38 | -
39 | params: '^(?=.*(?:norm|bn)).*$'
40 | weight_decay: 0.
41 |
42 |
43 | train_dataloader:
44 | dataset:
45 | transforms:
46 | policy:
47 | epoch: 33
48 | collate_fn:
49 | scales: ~
--------------------------------------------------------------------------------
/rtdetrv2_pytorch/configs/rtdetrv2/rtdetrv2_r18vd_sp1_120e_coco.yml:
--------------------------------------------------------------------------------
1 | __include__: [
2 | '../dataset/coco_detection.yml',
3 | '../runtime.yml',
4 | './include/dataloader.yml',
5 | './include/optimizer.yml',
6 | './include/rtdetrv2_r50vd.yml',
7 | ]
8 |
9 |
10 | output_dir: ./output/rtdetrv2_r18vd_sp1_120e_coco
11 |
12 |
13 | PResNet:
14 | depth: 18
15 | freeze_at: -1
16 | freeze_norm: False
17 | pretrained: True
18 |
19 |
20 | HybridEncoder:
21 | in_channels: [128, 256, 512]
22 | hidden_dim: 256
23 | expansion: 0.5
24 |
25 |
26 | RTDETRTransformerv2:
27 | num_layers: 3
28 | num_points: [1, 1, 1]
29 |
30 |
31 | epoches: 120
32 |
33 | optimizer:
34 | type: AdamW
35 | params:
36 | -
37 | params: '^(?=.*(?:norm|bn)).*$'
38 | weight_decay: 0.
39 |
40 |
41 | train_dataloader:
42 | dataset:
43 | transforms:
44 | policy:
45 | epoch: 117
46 | collate_fn:
47 | scales: ~
--------------------------------------------------------------------------------
/rtdetrv2_pytorch/configs/rtdetrv2/rtdetrv2_r18vd_sp2_120e_coco.yml:
--------------------------------------------------------------------------------
1 | __include__: [
2 | '../dataset/coco_detection.yml',
3 | '../runtime.yml',
4 | './include/dataloader.yml',
5 | './include/optimizer.yml',
6 | './include/rtdetrv2_r50vd.yml',
7 | ]
8 |
9 |
10 | output_dir: ./output/rtdetrv2_r18vd_sp2_120e_coco
11 |
12 |
13 | PResNet:
14 | depth: 18
15 | freeze_at: -1
16 | freeze_norm: False
17 | pretrained: True
18 |
19 |
20 | HybridEncoder:
21 | in_channels: [128, 256, 512]
22 | hidden_dim: 256
23 | expansion: 0.5
24 |
25 |
26 | RTDETRTransformerv2:
27 | num_layers: 3
28 | num_points: [2, 2, 2]
29 |
30 |
31 | epoches: 120
32 |
33 | optimizer:
34 | type: AdamW
35 | params:
36 | -
37 | params: '^(?=.*(?:norm|bn)).*$'
38 | weight_decay: 0.
39 |
40 |
41 | train_dataloader:
42 | dataset:
43 | transforms:
44 | policy:
45 | epoch: 117
46 | collate_fn:
47 | scales: ~
--------------------------------------------------------------------------------
/rtdetrv2_pytorch/configs/rtdetrv2/rtdetrv2_r18vd_sp3_120e_coco.yml:
--------------------------------------------------------------------------------
1 | __include__: [
2 | '../dataset/coco_detection.yml',
3 | '../runtime.yml',
4 | './include/dataloader.yml',
5 | './include/optimizer.yml',
6 | './include/rtdetrv2_r50vd.yml',
7 | ]
8 |
9 |
10 | output_dir: ./output/rtdetrv2_r18vd_sp3_120e_coco
11 |
12 |
13 | PResNet:
14 | depth: 18
15 | freeze_at: -1
16 | freeze_norm: False
17 | pretrained: True
18 |
19 |
20 | HybridEncoder:
21 | in_channels: [128, 256, 512]
22 | hidden_dim: 256
23 | expansion: 0.5
24 |
25 |
26 | RTDETRTransformerv2:
27 | num_layers: 3
28 | num_points: [3, 3, 3]
29 |
30 |
31 | epoches: 120
32 |
33 | optimizer:
34 | type: AdamW
35 | params:
36 | -
37 | params: '^(?=.*(?:norm|bn)).*$'
38 | weight_decay: 0.
39 |
40 |
41 | train_dataloader:
42 | dataset:
43 | transforms:
44 | policy:
45 | epoch: 117
46 | collate_fn:
47 | scales: ~
--------------------------------------------------------------------------------
/rtdetrv2_pytorch/configs/rtdetrv2/rtdetrv2_r34vd_120e_coco.yml:
--------------------------------------------------------------------------------
1 | __include__: [
2 | '../dataset/coco_detection.yml',
3 | '../runtime.yml',
4 | './include/dataloader.yml',
5 | './include/optimizer.yml',
6 | './include/rtdetrv2_r50vd.yml',
7 | ]
8 |
9 |
10 | output_dir: ./output/rtdetrv2_r34vd_120e_coco
11 |
12 |
13 | PResNet:
14 | depth: 34
15 | freeze_at: -1
16 | freeze_norm: False
17 | pretrained: True
18 |
19 |
20 | HybridEncoder:
21 | in_channels: [128, 256, 512]
22 | hidden_dim: 256
23 | expansion: 0.5
24 |
25 |
26 | RTDETRTransformerv2:
27 | num_layers: 4
28 |
29 |
30 | epoches: 120
31 |
32 | optimizer:
33 | type: AdamW
34 | params:
35 | -
36 | params: '^(?=.*backbone)(?!.*norm|bn).*$'
37 | lr: 0.00005
38 | -
39 | params: '^(?=.*backbone)(?=.*norm|bn).*$'
40 | lr: 0.00005
41 | weight_decay: 0.
42 | -
43 | params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn|bias)).*$'
44 | weight_decay: 0.
45 |
46 | lr: 0.0001
47 | betas: [0.9, 0.999]
48 | weight_decay: 0.0001
49 |
50 |
51 | train_dataloader:
52 | dataset:
53 | transforms:
54 | policy:
55 | epoch: 117
56 | collate_fn:
57 | stop_epoch: 117
58 |
--------------------------------------------------------------------------------
/rtdetrv2_pytorch/configs/rtdetrv2/rtdetrv2_r34vd_dsp_1x_coco.yml:
--------------------------------------------------------------------------------
1 | __include__: [
2 | '../dataset/coco_detection.yml',
3 | '../runtime.yml',
4 | './include/dataloader.yml',
5 | './include/optimizer.yml',
6 | './include/rtdetrv2_r50vd.yml',
7 | ]
8 |
9 | tuning: https://github.com/lyuwenyu/storage/releases/download/v0.1/rtdetrv2_r34vd_120e_coco_ema.pth
10 |
11 | output_dir: ./output/rtdetrv2_r34vd_dsp_1x_coco
12 |
13 |
14 | PResNet:
15 | depth: 34
16 | freeze_at: -1
17 | freeze_norm: False
18 | pretrained: True
19 |
20 |
21 | HybridEncoder:
22 | in_channels: [128, 256, 512]
23 | hidden_dim: 256
24 | expansion: 0.5
25 |
26 |
27 | RTDETRTransformerv2:
28 | num_layers: 4
29 | cross_attn_method: discrete
30 |
31 |
32 | epoches: 12
33 |
34 | optimizer:
35 | type: AdamW
36 | params:
37 | -
38 | params: '^(?=.*backbone)(?!.*norm|bn).*$'
39 | lr: 0.00005
40 | -
41 | params: '^(?=.*backbone)(?=.*norm|bn).*$'
42 | lr: 0.00005
43 | weight_decay: 0.
44 | -
45 | params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn|bias)).*$'
46 | weight_decay: 0.
47 |
48 | lr: 0.0001
49 | betas: [0.9, 0.999]
50 | weight_decay: 0.0001
51 |
52 |
53 | train_dataloader:
54 | dataset:
55 | transforms:
56 | policy:
57 | epoch: 10
58 | collate_fn:
59 | stop_epoch: 10
60 |
--------------------------------------------------------------------------------
/rtdetrv2_pytorch/configs/rtdetrv2/rtdetrv2_r50vd_6x_coco.yml:
--------------------------------------------------------------------------------
1 | __include__: [
2 | '../dataset/coco_detection.yml',
3 | '../runtime.yml',
4 | './include/dataloader.yml',
5 | './include/optimizer.yml',
6 | './include/rtdetrv2_r50vd.yml',
7 | ]
8 |
9 |
10 | output_dir: ./output/rtdetrv2_r50vd_6x_coco
11 |
12 |
13 |
14 | optimizer:
15 | type: AdamW
16 | params:
17 | -
18 | params: '^(?=.*backbone)(?!.*norm).*$'
19 | lr: 0.00001
20 | -
21 | params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn)).*$'
22 | weight_decay: 0.
23 |
24 | lr: 0.0001
25 | betas: [0.9, 0.999]
26 | weight_decay: 0.0001
27 |
28 |
--------------------------------------------------------------------------------
/rtdetrv2_pytorch/configs/rtdetrv2/rtdetrv2_r50vd_dsp_1x_coco.yml:
--------------------------------------------------------------------------------
1 | __include__: [
2 | '../dataset/coco_detection.yml',
3 | '../runtime.yml',
4 | './include/dataloader.yml',
5 | './include/optimizer.yml',
6 | './include/rtdetrv2_r50vd.yml',
7 | ]
8 |
9 |
10 | tuning: https://github.com/lyuwenyu/storage/releases/download/v0.1/rtdetrv2_r50vd_6x_coco_ema.pth
11 |
12 | output_dir: ./output/rtdetrv2_r50vd_dsp_1x_coco
13 |
14 |
15 | RTDETRTransformerv2:
16 | cross_attn_method: discrete
17 |
18 |
19 | epoches: 12
20 |
21 | train_dataloader:
22 | dataset:
23 | transforms:
24 | policy:
25 | epoch: 10
26 | collate_fn:
27 | stop_epoch: 10
28 |
--------------------------------------------------------------------------------
/rtdetrv2_pytorch/configs/rtdetrv2/rtdetrv2_r50vd_m_7x_coco.yml:
--------------------------------------------------------------------------------
1 | __include__: [
2 | '../dataset/coco_detection.yml',
3 | '../runtime.yml',
4 | './include/dataloader.yml',
5 | './include/optimizer.yml',
6 | './include/rtdetrv2_r50vd.yml',
7 | ]
8 |
9 | output_dir: ./output/rtdetrv2_r50vd_m_6x_coco
10 |
11 |
12 | HybridEncoder:
13 | expansion: 0.5
14 |
15 |
16 | RTDETRTransformerv2:
17 | eval_idx: 2 # use 3th decoder layer to eval
18 |
19 |
20 | epoches: 84
21 |
22 | optimizer:
23 | type: AdamW
24 | params:
25 | -
26 | params: '^(?=.*backbone)(?!.*norm).*$'
27 | lr: 0.00001
28 | -
29 | params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn)).*$'
30 | weight_decay: 0.
31 |
32 | lr: 0.0001
33 | betas: [0.9, 0.999]
34 | weight_decay: 0.0001
35 |
36 |
37 | train_dataloader:
38 | dataset:
39 | transforms:
40 | policy:
41 | epoch: 81
42 | collate_fn:
43 | stop_epoch: 81
44 |
--------------------------------------------------------------------------------
/rtdetrv2_pytorch/configs/rtdetrv2/rtdetrv2_r50vd_m_dsp_3x_coco.yml:
--------------------------------------------------------------------------------
1 | __include__: [
2 | '../dataset/coco_detection.yml',
3 | '../runtime.yml',
4 | './include/dataloader.yml',
5 | './include/optimizer.yml',
6 | './include/rtdetrv2_r50vd.yml',
7 | ]
8 |
9 | output_dir: ./output/rtdetrv2_r50vd_m_dsp_3x_coco
10 | tuning: https://github.com/lyuwenyu/storage/releases/download/v0.1/rtdetrv2_r50vd_m_7x_coco_ema.pth
11 |
12 | HybridEncoder:
13 | expansion: 0.5
14 |
15 |
16 | RTDETRTransformerv2:
17 | eval_idx: 2 # use 3th decoder layer to eval
18 | cross_attn_method: discrete
19 |
20 |
21 | epoches: 36
22 |
23 | optimizer:
24 | type: AdamW
25 | params:
26 | -
27 | params: '^(?=.*backbone)(?!.*norm).*$'
28 | lr: 0.00001
29 | -
30 | params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn)).*$'
31 | weight_decay: 0.
32 |
33 | lr: 0.0001
34 | betas: [0.9, 0.999]
35 | weight_decay: 0.0001
36 |
37 |
38 | train_dataloader:
39 | dataset:
40 | transforms:
41 | policy:
42 | epoch: 33
43 | collate_fn:
44 | stop_epoch: 33
45 |
--------------------------------------------------------------------------------
/rtdetrv2_pytorch/configs/runtime.yml:
--------------------------------------------------------------------------------
1 |
2 | print_freq: 100
3 | output_dir: './logs'
4 | checkpoint_freq: 1
5 |
6 |
7 | sync_bn: True
8 | find_unused_parameters: False
9 |
10 |
11 | use_amp: False
12 | scaler:
13 | type: GradScaler
14 | enabled: True
15 |
16 |
17 | use_ema: False
18 | ema:
19 | type: ModelEMA
20 | decay: 0.9999
21 | warmups: 2000
22 |
--------------------------------------------------------------------------------
/rtdetrv2_pytorch/dataset/readme.md:
--------------------------------------------------------------------------------
1 |
2 |
3 | ```
4 | # configs/dataset/xxx.yml
5 | ln -s /path/to/dataset/ ./dataset/dataset_name
6 | ```
7 |
--------------------------------------------------------------------------------
/rtdetrv2_pytorch/docker-compose.yml:
--------------------------------------------------------------------------------
1 | version: "3.9"
2 |
3 | services:
4 | tensorrt-container:
5 | build:
6 | context: .
7 | dockerfile: Dockerfile
8 | image: rtdetr-v2:23.01
9 | volumes:
10 | - ./:/workspace
11 | runtime: nvidia
12 | environment:
13 | - NVIDIA_VISIBLE_DEVICES=all
14 | stdin_open: true
15 | tty: true
16 |
--------------------------------------------------------------------------------
/rtdetrv2_pytorch/references/deploy/readme.md:
--------------------------------------------------------------------------------
1 | # Deployment
2 |
3 |
--------------------------------------------------------------------------------
/rtdetrv2_pytorch/references/deploy/rtdetrv2_onnxruntime.py:
--------------------------------------------------------------------------------
1 | """Copyright(c) 2023 lyuwenyu. All Rights Reserved.
2 | """
3 |
4 | import torch
5 | import torchvision.transforms as T
6 |
7 | import numpy as np
8 | import onnxruntime as ort
9 | from PIL import Image, ImageDraw
10 |
11 |
12 | def draw(images, labels, boxes, scores, thrh = 0.6):
13 | for i, im in enumerate(images):
14 | draw = ImageDraw.Draw(im)
15 |
16 | scr = scores[i]
17 | lab = labels[i][scr > thrh]
18 | box = boxes[i][scr > thrh]
19 |
20 | for b in box:
21 | draw.rectangle(list(b), outline='red',)
22 | draw.text((b[0], b[1]), text=str(lab[i].item()), fill='blue', )
23 |
24 | im.save(f'results_{i}.jpg')
25 |
26 |
27 | def main(args, ):
28 | """main
29 | """
30 | sess = ort.InferenceSession(args.onnx_file)
31 | print(ort.get_device())
32 |
33 | im_pil = Image.open(args.im_file).convert('RGB')
34 | w, h = im_pil.size
35 | orig_size = torch.tensor([w, h])[None]
36 |
37 | transforms = T.Compose([
38 | T.Resize((640, 640)),
39 | T.ToTensor(),
40 | ])
41 | im_data = transforms(im_pil)[None]
42 |
43 | output = sess.run(
44 | # output_names=['labels', 'boxes', 'scores'],
45 | output_names=None,
46 | input_feed={'images': im_data.data.numpy(), "orig_target_sizes": orig_size.data.numpy()}
47 | )
48 |
49 | labels, boxes, scores = output
50 |
51 | draw([im_pil], labels, boxes, scores)
52 |
53 |
54 | if __name__ == '__main__':
55 | import argparse
56 | parser = argparse.ArgumentParser()
57 | parser.add_argument('--onnx-file', type=str, )
58 | parser.add_argument('--im-file', type=str, )
59 | # parser.add_argument('-d', '--device', type=str, default='cpu')
60 | args = parser.parse_args()
61 | main(args)
62 |
--------------------------------------------------------------------------------
/rtdetrv2_pytorch/references/deploy/rtdetrv2_openvino.py:
--------------------------------------------------------------------------------
1 | """Copyright(c) 2023 lyuwenyu. All Rights Reserved.
2 | """
3 |
4 |
5 | # please reference: https://github.com/guojin-yan/RT-DETR-OpenVINO
--------------------------------------------------------------------------------
/rtdetrv2_pytorch/references/deploy/rtdetrv2_torch.py:
--------------------------------------------------------------------------------
1 | """Copyright(c) 2023 lyuwenyu. All Rights Reserved.
2 | """
3 |
4 | import torch
5 | import torch.nn as nn
6 | import torchvision.transforms as T
7 |
8 | import numpy as np
9 | from PIL import Image, ImageDraw
10 |
11 | from src.core import YAMLConfig
12 |
13 |
14 | def draw(images, labels, boxes, scores, thrh = 0.6):
15 | for i, im in enumerate(images):
16 | draw = ImageDraw.Draw(im)
17 |
18 | scr = scores[i]
19 | lab = labels[i][scr > thrh]
20 | box = boxes[i][scr > thrh]
21 | scrs = scores[i][scr > thrh]
22 |
23 | for j,b in enumerate(box):
24 | draw.rectangle(list(b), outline='red',)
25 | draw.text((b[0], b[1]), text=f"{lab[j].item()} {round(scrs[j].item(),2)}", fill='blue', )
26 |
27 | im.save(f'results_{i}.jpg')
28 |
29 |
30 | def main(args, ):
31 | """main
32 | """
33 | cfg = YAMLConfig(args.config, resume=args.resume)
34 |
35 | if args.resume:
36 | checkpoint = torch.load(args.resume, map_location='cpu')
37 | if 'ema' in checkpoint:
38 | state = checkpoint['ema']['module']
39 | else:
40 | state = checkpoint['model']
41 | else:
42 | raise AttributeError('Only support resume to load model.state_dict by now.')
43 |
44 | # NOTE load train mode state -> convert to deploy mode
45 | cfg.model.load_state_dict(state)
46 |
47 | class Model(nn.Module):
48 | def __init__(self, ) -> None:
49 | super().__init__()
50 | self.model = cfg.model.deploy()
51 | self.postprocessor = cfg.postprocessor.deploy()
52 |
53 | def forward(self, images, orig_target_sizes):
54 | outputs = self.model(images)
55 | outputs = self.postprocessor(outputs, orig_target_sizes)
56 | return outputs
57 |
58 | model = Model().to(args.device)
59 |
60 | im_pil = Image.open(args.im_file).convert('RGB')
61 | w, h = im_pil.size
62 | orig_size = torch.tensor([w, h])[None].to(args.device)
63 |
64 | transforms = T.Compose([
65 | T.Resize((640, 640)),
66 | T.ToTensor(),
67 | ])
68 | im_data = transforms(im_pil)[None].to(args.device)
69 |
70 | output = model(im_data, orig_size)
71 | labels, boxes, scores = output
72 |
73 | draw([im_pil], labels, boxes, scores)
74 |
75 |
76 | if __name__ == '__main__':
77 | import argparse
78 | parser = argparse.ArgumentParser()
79 | parser.add_argument('-c', '--config', type=str, )
80 | parser.add_argument('-r', '--resume', type=str, )
81 | parser.add_argument('-f', '--im-file', type=str, )
82 | parser.add_argument('-d', '--device', type=str, default='cpu')
83 | args = parser.parse_args()
84 | main(args)
85 |
--------------------------------------------------------------------------------
/rtdetrv2_pytorch/requirements.txt:
--------------------------------------------------------------------------------
1 | torch>=2.0.1
2 | torchvision>=0.15.2
3 | faster-coco-eval>=1.6.5
4 | PyYAML
5 | tensorboard
6 | scipy
7 | pycocotools
8 | onnx
9 | onnxruntime-gpu
10 | tensorrt==8.5.2.2
--------------------------------------------------------------------------------
/rtdetrv2_pytorch/src/__init__.py:
--------------------------------------------------------------------------------
1 | """Copyright(c) 2023 lyuwenyu. All Rights Reserved.
2 | """
3 |
4 | # for register purpose
5 | from . import optim
6 | from . import data
7 | from . import nn
8 | from . import zoo
--------------------------------------------------------------------------------
/rtdetrv2_pytorch/src/core/__init__.py:
--------------------------------------------------------------------------------
1 | """Copyright(c) 2023 lyuwenyu. All Rights Reserved.
2 | """
3 |
4 | from .workspace import GLOBAL_CONFIG, register, create
5 | from .yaml_utils import *
6 | from ._config import BaseConfig
7 | from .yaml_config import YAMLConfig
8 |
--------------------------------------------------------------------------------
/rtdetrv2_pytorch/src/data/__init__.py:
--------------------------------------------------------------------------------
1 | """Copyright(c) 2023 lyuwenyu. All Rights Reserved.
2 | """
3 |
4 | from .dataset import *
5 | from .transforms import *
6 | from .dataloader import *
7 |
8 | from ._misc import convert_to_tv_tensor
9 |
10 |
11 |
12 |
13 | # def set_epoch(self, epoch) -> None:
14 | # self.epoch = epoch
15 | # def _set_epoch_func(datasets):
16 | # """Add `set_epoch` for datasets
17 | # """
18 | # from ..core import register
19 | # for ds in datasets:
20 | # register(ds)(set_epoch)
21 | # _set_epoch_func([CIFAR10, VOCDetection, CocoDetection])
--------------------------------------------------------------------------------
/rtdetrv2_pytorch/src/data/_misc.py:
--------------------------------------------------------------------------------
1 | """Copyright(c) 2023 lyuwenyu. All Rights Reserved.
2 | """
3 |
4 | import importlib.metadata
5 | from torch import Tensor
6 |
7 | if importlib.metadata.version('torchvision') == '0.15.2':
8 | import torchvision
9 | torchvision.disable_beta_transforms_warning()
10 |
11 | from torchvision.datapoints import BoundingBox as BoundingBoxes
12 | from torchvision.datapoints import BoundingBoxFormat, Mask, Image, Video
13 | from torchvision.transforms.v2 import SanitizeBoundingBox as SanitizeBoundingBoxes
14 | _boxes_keys = ['format', 'spatial_size']
15 |
16 | elif '0.17' > importlib.metadata.version('torchvision') >= '0.16':
17 | import torchvision
18 | torchvision.disable_beta_transforms_warning()
19 |
20 | from torchvision.transforms.v2 import SanitizeBoundingBoxes
21 | from torchvision.tv_tensors import (
22 | BoundingBoxes, BoundingBoxFormat, Mask, Image, Video)
23 | _boxes_keys = ['format', 'canvas_size']
24 |
25 | elif importlib.metadata.version('torchvision') >= '0.17':
26 | import torchvision
27 | from torchvision.transforms.v2 import SanitizeBoundingBoxes
28 | from torchvision.tv_tensors import (
29 | BoundingBoxes, BoundingBoxFormat, Mask, Image, Video)
30 | _boxes_keys = ['format', 'canvas_size']
31 |
32 | else:
33 | raise RuntimeError('Please make sure torchvision version >= 0.15.2')
34 |
35 |
36 |
37 | def convert_to_tv_tensor(tensor: Tensor, key: str, box_format='xyxy', spatial_size=None) -> Tensor:
38 | """
39 | Args:
40 | tensor (Tensor): input tensor
41 | key (str): transform to key
42 |
43 | Return:
44 | Dict[str, TV_Tensor]
45 | """
46 | assert key in ('boxes', 'masks', ), "Only support 'boxes' and 'masks'"
47 |
48 | if key == 'boxes':
49 | box_format = getattr(BoundingBoxFormat, box_format.upper())
50 | _kwargs = dict(zip(_boxes_keys, [box_format, spatial_size]))
51 | return BoundingBoxes(tensor, **_kwargs)
52 |
53 | if key == 'masks':
54 | return Mask(tensor)
55 |
56 |
--------------------------------------------------------------------------------
/rtdetrv2_pytorch/src/data/dataset/__init__.py:
--------------------------------------------------------------------------------
1 | """Copyright(c) 2023 lyuwenyu. All Rights Reserved.
2 | """
3 |
4 | # from ._dataset import DetDataset
5 | from .cifar_dataset import CIFAR10
6 | from .coco_dataset import CocoDetection
7 | from .coco_dataset import (
8 | CocoDetection,
9 | mscoco_category2name,
10 | mscoco_category2label,
11 | mscoco_label2category,
12 | )
13 | from .coco_eval import CocoEvaluator
14 | from .coco_utils import get_coco_api_from_dataset
15 | from .voc_detection import VOCDetection
16 | from .voc_eval import VOCEvaluator
17 |
--------------------------------------------------------------------------------
/rtdetrv2_pytorch/src/data/dataset/_dataset.py:
--------------------------------------------------------------------------------
1 | """Copyright(c) 2023 lyuwenyu. All Rights Reserved.
2 | """
3 |
4 | import torch
5 | import torch.utils.data as data
6 |
7 | class DetDataset(data.Dataset):
8 | def __getitem__(self, index):
9 | img, target = self.load_item(index)
10 | if self.transforms is not None:
11 | img, target, _ = self.transforms(img, target, self)
12 | return img, target
13 |
14 | def load_item(self, index):
15 | raise NotImplementedError("Please implement this function to return item before `transforms`.")
16 |
17 | def set_epoch(self, epoch) -> None:
18 | self._epoch = epoch
19 |
20 | @property
21 | def epoch(self):
22 | return self._epoch if hasattr(self, '_epoch') else -1
23 |
--------------------------------------------------------------------------------
/rtdetrv2_pytorch/src/data/dataset/cifar_dataset.py:
--------------------------------------------------------------------------------
1 | """Copyright(c) 2023 lyuwenyu. All Rights Reserved.
2 | """
3 |
4 |
5 | import torchvision
6 | from typing import Optional, Callable
7 |
8 | from ...core import register
9 |
10 | @register()
11 | class CIFAR10(torchvision.datasets.CIFAR10):
12 | __inject__ = ['transform', 'target_transform']
13 |
14 | def __init__(self, root: str, train: bool = True, transform: Optional[Callable] = None, target_transform: Optional[Callable] = None, download: bool = False) -> None:
15 | super().__init__(root, train, transform, target_transform, download)
16 |
17 |
--------------------------------------------------------------------------------
/rtdetrv2_pytorch/src/data/dataset/voc_detection.py:
--------------------------------------------------------------------------------
1 | """Copyright(c) 2023 lyuwenyu. All Rights Reserved.
2 | """
3 |
4 | from sympy import im
5 | import torch
6 | import torchvision
7 | import torchvision.transforms.functional as TVF
8 |
9 | import os
10 | from PIL import Image
11 | from typing import Optional, Callable
12 |
13 | try:
14 | from defusedxml.ElementTree import parse as ET_parse
15 | except ImportError:
16 | from xml.etree.ElementTree import parse as ET_parse
17 |
18 | from ._dataset import DetDataset
19 | from .._misc import convert_to_tv_tensor
20 | from ...core import register
21 |
22 | @register()
23 | class VOCDetection(torchvision.datasets.VOCDetection, DetDataset):
24 | __inject__ = ['transforms', ]
25 |
26 | def __init__(self, root: str, ann_file: str = "trainval.txt", label_file: str = "label_list.txt", transforms: Optional[Callable] = None):
27 |
28 | with open(os.path.join(root, ann_file), 'r') as f:
29 | lines = [x.strip() for x in f.readlines()]
30 | lines = [x.split(' ') for x in lines]
31 |
32 | self.images = [os.path.join(root, lin[0]) for lin in lines]
33 | self.targets = [os.path.join(root, lin[1]) for lin in lines]
34 | assert len(self.images) == len(self.targets)
35 |
36 | with open(os.path.join(root + label_file), 'r') as f:
37 | labels = f.readlines()
38 | labels = [lab.strip() for lab in labels]
39 |
40 | self.transforms = transforms
41 | self.labels_map = {lab: i for i, lab in enumerate(labels)}
42 |
43 | def __getitem__(self, index: int):
44 | image, target = self.load_item(index)
45 | if self.transforms is not None:
46 | image, target, _ = self.transforms(image, target, self)
47 | # target["orig_size"] = torch.tensor(TVF.get_image_size(image))
48 | return image, target
49 |
50 | def load_item(self, index: int):
51 | image = Image.open(self.images[index]).convert("RGB")
52 | target = self.parse_voc_xml(ET_parse(self.annotations[index]).getroot())
53 |
54 | output = {}
55 | output["image_id"] = torch.tensor([index])
56 | for k in ['area', 'boxes', 'labels', 'iscrowd']:
57 | output[k] = []
58 |
59 | for blob in target['annotation']['object']:
60 | box = [float(v) for v in blob['bndbox'].values()]
61 | output["boxes"].append(box)
62 | output["labels"].append(blob['name'])
63 | output["area"].append((box[2] - box[0]) * (box[3] - box[1]))
64 | output["iscrowd"].append(0)
65 |
66 | w, h = image.size
67 | boxes = torch.tensor(output["boxes"]) if len(output["boxes"]) > 0 else torch.zeros(0, 4)
68 | output['boxes'] = convert_to_tv_tensor(boxes, 'boxes', box_format='xyxy', spatial_size=[h, w])
69 | output['labels'] = torch.tensor([self.labels_map[lab] for lab in output["labels"]])
70 | output['area'] = torch.tensor(output['area'])
71 | output["iscrowd"] = torch.tensor(output["iscrowd"])
72 | output["orig_size"] = torch.tensor([w, h])
73 |
74 | return image, output
75 |
76 |
--------------------------------------------------------------------------------
/rtdetrv2_pytorch/src/data/dataset/voc_eval.py:
--------------------------------------------------------------------------------
1 | """Copyright(c) 2023 lyuwenyu. All Rights Reserved.
2 | """
3 |
4 | import torch
5 | import torchvision
6 |
7 |
8 | class VOCEvaluator(object):
9 | def __init__(self) -> None:
10 | pass
--------------------------------------------------------------------------------
/rtdetrv2_pytorch/src/data/transforms/__init__.py:
--------------------------------------------------------------------------------
1 | """"Copyright(c) 2023 lyuwenyu. All Rights Reserved.
2 | """
3 |
4 |
5 | from ._transforms import (
6 | EmptyTransform,
7 | RandomPhotometricDistort,
8 | RandomZoomOut,
9 | RandomIoUCrop,
10 | RandomHorizontalFlip,
11 | Resize,
12 | PadToSize,
13 | SanitizeBoundingBoxes,
14 | RandomCrop,
15 | Normalize,
16 | ConvertBoxes,
17 | ConvertPILImage,
18 | )
19 | from .container import Compose
20 | from .mosaic import Mosaic
21 |
--------------------------------------------------------------------------------
/rtdetrv2_pytorch/src/data/transforms/container.py:
--------------------------------------------------------------------------------
1 | """"Copyright(c) 2023 lyuwenyu. All Rights Reserved.
2 | """
3 |
4 | import torch
5 | import torch.nn as nn
6 |
7 | import torchvision
8 | torchvision.disable_beta_transforms_warning()
9 | import torchvision.transforms.v2 as T
10 |
11 | from typing import Any, Dict, List, Optional
12 |
13 | from ._transforms import EmptyTransform
14 | from ...core import register, GLOBAL_CONFIG
15 |
16 |
17 | @register()
18 | class Compose(T.Compose):
19 | def __init__(self, ops, policy=None) -> None:
20 | transforms = []
21 | if ops is not None:
22 | for op in ops:
23 | if isinstance(op, dict):
24 | name = op.pop('type')
25 | transfom = getattr(GLOBAL_CONFIG[name]['_pymodule'], GLOBAL_CONFIG[name]['_name'])(**op)
26 | transforms.append(transfom)
27 | op['type'] = name
28 |
29 | elif isinstance(op, nn.Module):
30 | transforms.append(op)
31 |
32 | else:
33 | raise ValueError('')
34 | else:
35 | transforms =[EmptyTransform(), ]
36 |
37 | super().__init__(transforms=transforms)
38 |
39 | if policy is None:
40 | policy = {'name': 'default'}
41 |
42 | self.policy = policy
43 | self.global_samples = 0
44 |
45 | def forward(self, *inputs: Any) -> Any:
46 | return self.get_forward(self.policy['name'])(*inputs)
47 |
48 | def get_forward(self, name):
49 | forwards = {
50 | 'default': self.default_forward,
51 | 'stop_epoch': self.stop_epoch_forward,
52 | 'stop_sample': self.stop_sample_forward,
53 | }
54 | return forwards[name]
55 |
56 | def default_forward(self, *inputs: Any) -> Any:
57 | sample = inputs if len(inputs) > 1 else inputs[0]
58 | for transform in self.transforms:
59 | sample = transform(sample)
60 | return sample
61 |
62 | def stop_epoch_forward(self, *inputs: Any):
63 | sample = inputs if len(inputs) > 1 else inputs[0]
64 | dataset = sample[-1]
65 |
66 | cur_epoch = dataset.epoch
67 | policy_ops = self.policy['ops']
68 | policy_epoch = self.policy['epoch']
69 |
70 | for transform in self.transforms:
71 | if type(transform).__name__ in policy_ops and cur_epoch >= policy_epoch:
72 | pass
73 | else:
74 | sample = transform(sample)
75 |
76 | return sample
77 |
78 |
79 | def stop_sample_forward(self, *inputs: Any):
80 | sample = inputs if len(inputs) > 1 else inputs[0]
81 | dataset = sample[-1]
82 |
83 | cur_epoch = dataset.epoch
84 | policy_ops = self.policy['ops']
85 | policy_sample = self.policy['sample']
86 |
87 | for transform in self.transforms:
88 | if type(transform).__name__ in policy_ops and self.global_samples >= policy_sample:
89 | pass
90 | else:
91 | sample = transform(sample)
92 |
93 | self.global_samples += 1
94 |
95 | return sample
96 |
--------------------------------------------------------------------------------
/rtdetrv2_pytorch/src/data/transforms/mosaic.py:
--------------------------------------------------------------------------------
1 | """"Copyright(c) 2023 lyuwenyu. All Rights Reserved.
2 | """
3 |
4 | import torch
5 | import torchvision
6 | torchvision.disable_beta_transforms_warning()
7 | import torchvision.transforms.v2 as T
8 | import torchvision.transforms.v2.functional as F
9 |
10 | import random
11 | from PIL import Image
12 |
13 | from .._misc import convert_to_tv_tensor
14 | from ...core import register
15 |
16 |
17 | @register()
18 | class Mosaic(T.Transform):
19 | def __init__(self, size, max_size=None, ) -> None:
20 | super().__init__()
21 | self.resize = T.Resize(size=size, max_size=max_size)
22 | self.crop = T.RandomCrop(size=max_size if max_size else size)
23 |
24 | # TODO add arg `output_size` for affine`
25 | # self.random_perspective = T.RandomPerspective(distortion_scale=0.5, p=1., )
26 | self.random_affine = T.RandomAffine(degrees=0, translate=(0.1, 0.1), scale=(0.5, 1.5), fill=114)
27 |
28 | def forward(self, *inputs):
29 | inputs = inputs if len(inputs) > 1 else inputs[0]
30 | image, target, dataset = inputs
31 |
32 | images = []
33 | targets = []
34 | indices = random.choices(range(len(dataset)), k=3)
35 | for i in indices:
36 | image, target = dataset.load_item(i)
37 | image, target = self.resize(image, target)
38 | images.append(image)
39 | targets.append(target)
40 |
41 | h, w = F.get_spatial_size(images[0])
42 | offset = [[0, 0], [w, 0], [0, h], [w, h]]
43 | image = Image.new(mode=images[0].mode, size=(w * 2, h * 2), color=0)
44 | for i, im in enumerate(images):
45 | image.paste(im, offset[i])
46 |
47 | offset = torch.tensor([[0, 0], [w, 0], [0, h], [w, h]]).repeat(1, 2)
48 | target = {}
49 | for k in targets[0]:
50 | if k == 'boxes':
51 | v = [t[k] + offset[i] for i, t in enumerate(targets)]
52 | else:
53 | v = [t[k] for t in targets]
54 |
55 | if isinstance(v[0], torch.Tensor):
56 | v = torch.cat(v, dim=0)
57 |
58 | target[k] = v
59 |
60 | if 'boxes' in target:
61 | # target['boxes'] = target['boxes'].clamp(0, 640 * 2 - 1)
62 | w, h = image.size
63 | target['boxes'] = convert_to_tv_tensor(target['boxes'], 'boxes', box_format='xyxy', spatial_size=[h, w])
64 |
65 | if 'masks' in target:
66 | target['masks'] = convert_to_tv_tensor(target['masks'], 'masks')
67 |
68 | image, target = self.random_affine(image, target)
69 | # image, target = self.resize(image, target)
70 | image, target = self.crop(image, target)
71 |
72 | return image, target, dataset
73 |
--------------------------------------------------------------------------------
/rtdetrv2_pytorch/src/data/transforms/presets.py:
--------------------------------------------------------------------------------
1 | """"Copyright(c) 2023 lyuwenyu. All Rights Reserved.
2 | """
3 |
--------------------------------------------------------------------------------
/rtdetrv2_pytorch/src/misc/__init__.py:
--------------------------------------------------------------------------------
1 | """Copyright(c) 2023 lyuwenyu. All Rights Reserved.
2 | """
3 |
4 | from .logger import *
5 | from .visualizer import *
6 | from .dist_utils import setup_seed, setup_print
7 | from .profiler_utils import stats
8 |
--------------------------------------------------------------------------------
/rtdetrv2_pytorch/src/misc/lazy_loader.py:
--------------------------------------------------------------------------------
1 | """
2 | https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/util/lazy_loader.py
3 | """
4 |
5 |
6 | import types
7 | import importlib
8 |
9 | class LazyLoader(types.ModuleType):
10 | """Lazily import a module, mainly to avoid pulling in large dependencies.
11 |
12 | `paddle`, and `ffmpeg` are examples of modules that are large and not always
13 | needed, and this allows them to only be loaded when they are used.
14 | """
15 |
16 | # The lint error here is incorrect.
17 | def __init__(self, local_name, parent_module_globals, name, warning=None):
18 | self._local_name = local_name
19 | self._parent_module_globals = parent_module_globals
20 | self._warning = warning
21 |
22 | # These members allows doctest correctly process this module member without
23 | # triggering self._load(). self._load() mutates parant_module_globals and
24 | # triggers a dict mutated during iteration error from doctest.py.
25 | # - for from_module()
26 | self.__module__ = name.rsplit(".", 1)[0]
27 | # - for is_routine()
28 | self.__wrapped__ = None
29 |
30 | super(LazyLoader, self).__init__(name)
31 |
32 | def _load(self):
33 | """Load the module and insert it into the parent's globals."""
34 | # Import the target module and insert it into the parent's namespace
35 | module = importlib.import_module(self.__name__)
36 | self._parent_module_globals[self._local_name] = module
37 |
38 | # Emit a warning if one was specified
39 | if self._warning:
40 | # logging.warning(self._warning)
41 | # Make sure to only warn once.
42 | self._warning = None
43 |
44 | # Update this object's dict so that if someone keeps a reference to the
45 | # LazyLoader, lookups are efficient (__getattr__ is only called on lookups
46 | # that fail).
47 | self.__dict__.update(module.__dict__)
48 |
49 | return module
50 |
51 | def __getattr__(self, item):
52 | module = self._load()
53 | return getattr(module, item)
54 |
55 | def __repr__(self):
56 | # Carefully to not trigger _load, since repr may be called in very
57 | # sensitive places.
58 | return f""
59 |
60 | def __dir__(self):
61 | module = self._load()
62 | return dir(module)
63 |
64 |
65 | # import paddle.nn as nn
66 | # nn = LazyLoader("nn", globals(), "paddle.nn")
67 |
68 | # class M(nn.Layer):
69 | # def __init__(self) -> None:
70 | # super().__init__()
71 |
--------------------------------------------------------------------------------
/rtdetrv2_pytorch/src/misc/profiler_utils.py:
--------------------------------------------------------------------------------
1 | """Copyright(c) 2023 lyuwenyu. All Rights Reserved.
2 | """
3 |
4 | import re
5 | import torch
6 | import torch.nn as nn
7 | from torch import Tensor
8 |
9 | from typing import List
10 |
11 | def stats(
12 | model: nn.Module,
13 | data: Tensor=None,
14 | input_shape: List=[1, 3, 640, 640],
15 | device: str='cpu',
16 | verbose=False) -> str:
17 |
18 | is_training = model.training
19 |
20 | model.train()
21 | num_params = sum([p.numel() for p in model.parameters() if p.requires_grad])
22 |
23 | model.eval()
24 | model = model.to(device)
25 |
26 | if data is None:
27 | data = torch.rand(*input_shape, device=device)
28 |
29 | def trace_handler(prof):
30 | print(prof.key_averages().table(
31 | sort_by="self_cuda_time_total", row_limit=-1))
32 |
33 | num_active = 2
34 | with torch.profiler.profile(
35 | activities=[
36 | torch.profiler.ProfilerActivity.CPU,
37 | torch.profiler.ProfilerActivity.CUDA,
38 | ],
39 | schedule=torch.profiler.schedule(
40 | wait=1,
41 | warmup=1,
42 | active=num_active,
43 | repeat=1
44 | ),
45 | # on_trace_ready=trace_handler,
46 | # on_trace_ready=torch.profiler.tensorboard_trace_handler('./log')
47 | # with_modules=True,
48 | with_flops=True,
49 | ) as p:
50 | for _ in range(5):
51 | _ = model(data)
52 | p.step()
53 |
54 | if is_training:
55 | model.train()
56 |
57 | info = p.key_averages().table(sort_by="self_cuda_time_total", row_limit=-1)
58 | num_flops = sum([float(v.strip()) for v in re.findall('(\d+.?\d+ *\n)', info)]) / num_active
59 |
60 | if verbose:
61 | # print(info)
62 | print(f'Total number of trainable parameters: {num_params}')
63 | print(f'Total number of flops: {int(num_flops)}M with {input_shape}')
64 |
65 | return {'n_parameters': num_params, 'n_flops': num_flops, 'info': info}
66 |
--------------------------------------------------------------------------------
/rtdetrv2_pytorch/src/misc/visualizer.py:
--------------------------------------------------------------------------------
1 | """"Copyright(c) 2023 lyuwenyu. All Rights Reserved.
2 | """
3 |
4 | import torch
5 | import torch.utils.data
6 |
7 | import torchvision
8 | torchvision.disable_beta_transforms_warning()
9 |
10 | import PIL
11 |
12 | __all__ = ['show_sample']
13 |
14 | def show_sample(sample):
15 | """for coco dataset/dataloader
16 | """
17 | import matplotlib.pyplot as plt
18 | from torchvision.transforms.v2 import functional as F
19 | from torchvision.utils import draw_bounding_boxes
20 |
21 | image, target = sample
22 | if isinstance(image, PIL.Image.Image):
23 | image = F.to_image_tensor(image)
24 |
25 | image = F.convert_dtype(image, torch.uint8)
26 | annotated_image = draw_bounding_boxes(image, target["boxes"], colors="yellow", width=3)
27 |
28 | fig, ax = plt.subplots()
29 | ax.imshow(annotated_image.permute(1, 2, 0).numpy())
30 | ax.set(xticklabels=[], yticklabels=[], xticks=[], yticks=[])
31 | fig.tight_layout()
32 | fig.show()
33 | plt.show()
34 |
35 |
--------------------------------------------------------------------------------
/rtdetrv2_pytorch/src/nn/__init__.py:
--------------------------------------------------------------------------------
1 | """Copyright(c) 2023 lyuwenyu. All Rights Reserved.
2 | """
3 |
4 |
5 | from .arch import *
6 | from .criterion import *
7 | from .postprocessor import *
8 |
9 | #
10 | from .backbone import *
11 |
12 |
13 | from .backbone import (
14 | get_activation,
15 | FrozenBatchNorm2d,
16 | freeze_batch_norm2d,
17 | )
--------------------------------------------------------------------------------
/rtdetrv2_pytorch/src/nn/arch/__init__.py:
--------------------------------------------------------------------------------
1 | """Copyright(c) 2023 lyuwenyu. All Rights Reserved.
2 | """
3 |
4 |
5 | from .classification import Classification, ClassHead
6 | from .yolo import YOLO
--------------------------------------------------------------------------------
/rtdetrv2_pytorch/src/nn/arch/classification.py:
--------------------------------------------------------------------------------
1 | """Copyright(c) 2023 lyuwenyu. All Rights Reserved.
2 | """
3 |
4 |
5 | import torch
6 | import torch.nn as nn
7 |
8 | from ...core import register
9 |
10 |
11 | __all__ = ['Classification', 'ClassHead']
12 |
13 |
14 | @register()
15 | class Classification(torch.nn.Module):
16 | __inject__ = ['backbone', 'head']
17 |
18 | def __init__(self, backbone: nn.Module, head: nn.Module=None):
19 | super().__init__()
20 |
21 | self.backbone = backbone
22 | self.head = head
23 |
24 | def forward(self, x):
25 | x = self.backbone(x)
26 |
27 | if self.head is not None:
28 | x = self.head(x)
29 |
30 | return x
31 |
32 |
33 | @register()
34 | class ClassHead(nn.Module):
35 | def __init__(self, hidden_dim, num_classes):
36 | super().__init__()
37 | self.pool = nn.AdaptiveAvgPool2d(1)
38 | self.proj = nn.Linear(hidden_dim, num_classes)
39 |
40 | def forward(self, x):
41 | x = x[0] if isinstance(x, (list, tuple)) else x
42 | x = self.pool(x)
43 | x = x.reshape(x.shape[0], -1)
44 | x = self.proj(x)
45 | return x
46 |
--------------------------------------------------------------------------------
/rtdetrv2_pytorch/src/nn/arch/yolo.py:
--------------------------------------------------------------------------------
1 | """Copyright(c) 2023 lyuwenyu. All Rights Reserved.
2 | """
3 |
4 | import torch
5 |
6 | from ...core import register
7 |
8 |
9 | __all__ = ['YOLO', ]
10 |
11 |
12 | @register()
13 | class YOLO(torch.nn.Module):
14 | __inject__ = ['backbone', 'neck', 'head', ]
15 |
16 | def __init__(self, backbone: torch.nn.Module, neck, head):
17 | super().__init__()
18 | self.backbone = backbone
19 | self.neck = neck
20 | self.head = head
21 |
22 | def forward(self, x, **kwargs):
23 | x = self.backbone(x)
24 | x = self.neck(x)
25 | x = self.head(x)
26 | return x
27 |
28 | def deploy(self, ):
29 | self.eval()
30 | for m in self.modules():
31 | if m is not self and hasattr(m, 'deploy'):
32 | m.deploy()
33 | return self
34 |
--------------------------------------------------------------------------------
/rtdetrv2_pytorch/src/nn/backbone/__init__.py:
--------------------------------------------------------------------------------
1 | """Copyright(c) 2023 lyuwenyu. All Rights Reserved.
2 | """
3 |
4 | from .common import (
5 | get_activation,
6 | FrozenBatchNorm2d,
7 | freeze_batch_norm2d,
8 | )
9 | from .presnet import PResNet
10 | from .test_resnet import MResNet
11 |
12 | from .timm_model import TimmModel
13 | from .torchvision_model import TorchVisionModel
14 |
15 | from .csp_resnet import CSPResNet
16 | from .csp_darknet import CSPDarkNet, CSPPAN
17 |
18 | from .hgnetv2 import HGNetv2
--------------------------------------------------------------------------------
/rtdetrv2_pytorch/src/nn/backbone/test_resnet.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.nn.functional as F
4 |
5 | from collections import OrderedDict
6 |
7 |
8 | from ...core import register
9 |
10 |
11 | class BasicBlock(nn.Module):
12 | expansion = 1
13 |
14 | def __init__(self, in_planes, planes, stride=1):
15 | super(BasicBlock, self).__init__()
16 |
17 | self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
18 | self.bn1 = nn.BatchNorm2d(planes)
19 |
20 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,stride=1, padding=1, bias=False)
21 | self.bn2 = nn.BatchNorm2d(planes)
22 |
23 | self.shortcut = nn.Sequential()
24 | if stride != 1 or in_planes != self.expansion*planes:
25 | self.shortcut = nn.Sequential(
26 | nn.Conv2d(in_planes, self.expansion*planes,kernel_size=1, stride=stride, bias=False),
27 | nn.BatchNorm2d(self.expansion*planes)
28 | )
29 | def forward(self, x):
30 | out = F.relu(self.bn1(self.conv1(x)))
31 | out = self.bn2(self.conv2(out))
32 | out += self.shortcut(x)
33 | out = F.relu(out)
34 | return out
35 |
36 |
37 |
38 | class _ResNet(nn.Module):
39 | def __init__(self, block, num_blocks, num_classes=10):
40 | super().__init__()
41 | self.in_planes = 64
42 |
43 | self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
44 | self.bn1 = nn.BatchNorm2d(64)
45 |
46 | self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
47 | self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
48 | self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
49 | self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
50 |
51 | self.linear = nn.Linear(512 * block.expansion, num_classes)
52 |
53 | def _make_layer(self, block, planes, num_blocks, stride):
54 | strides = [stride] + [1]*(num_blocks-1)
55 | layers = []
56 | for stride in strides:
57 | layers.append(block(self.in_planes, planes, stride))
58 | self.in_planes = planes * block.expansion
59 | return nn.Sequential(*layers)
60 |
61 | def forward(self, x):
62 | out = F.relu(self.bn1(self.conv1(x)))
63 | out = self.layer1(out)
64 | out = self.layer2(out)
65 | out = self.layer3(out)
66 | out = self.layer4(out)
67 | out = F.avg_pool2d(out, 4)
68 | out = out.view(out.size(0), -1)
69 | out = self.linear(out)
70 | return out
71 |
72 |
73 | @register()
74 | class MResNet(nn.Module):
75 | def __init__(self, num_classes=10, num_blocks=[2, 2, 2, 2]) -> None:
76 | super().__init__()
77 | self.model = _ResNet(BasicBlock, num_blocks, num_classes)
78 |
79 | def forward(self, x):
80 | return self.model(x)
81 |
82 |
--------------------------------------------------------------------------------
/rtdetrv2_pytorch/src/nn/backbone/timm_model.py:
--------------------------------------------------------------------------------
1 | """Copyright(c) 2023 lyuwenyu. All Rights Reserved.
2 |
3 | https://towardsdatascience.com/getting-started-with-pytorch-image-models-timm-a-practitioners-guide-4e77b4bf9055#0583
4 | """
5 |
6 | import torch
7 | from torchvision.models.feature_extraction import get_graph_node_names, create_feature_extractor
8 |
9 | from .utils import IntermediateLayerGetter
10 | from ...core import register
11 |
12 |
13 | @register()
14 | class TimmModel(torch.nn.Module):
15 | def __init__(self, \
16 | name,
17 | return_layers,
18 | pretrained=False,
19 | exportable=True,
20 | features_only=True,
21 | **kwargs) -> None:
22 |
23 | super().__init__()
24 |
25 | import timm
26 | model = timm.create_model(
27 | name,
28 | pretrained=pretrained,
29 | exportable=exportable,
30 | features_only=features_only,
31 | **kwargs
32 | )
33 | # nodes, _ = get_graph_node_names(model)
34 | # print(nodes)
35 | # features = {'': ''}
36 | # model = create_feature_extractor(model, return_nodes=features)
37 |
38 | assert set(return_layers).issubset(model.feature_info.module_name()), \
39 | f'return_layers should be a subset of {model.feature_info.module_name()}'
40 |
41 | # self.model = model
42 | self.model = IntermediateLayerGetter(model, return_layers)
43 |
44 | return_idx = [model.feature_info.module_name().index(name) for name in return_layers]
45 | self.strides = [model.feature_info.reduction()[i] for i in return_idx]
46 | self.channels = [model.feature_info.channels()[i] for i in return_idx]
47 | self.return_idx = return_idx
48 | self.return_layers = return_layers
49 |
50 | def forward(self, x: torch.Tensor):
51 | outputs = self.model(x)
52 | # outputs = [outputs[i] for i in self.return_idx]
53 | return outputs
54 |
55 |
56 | if __name__ == '__main__':
57 |
58 | model = TimmModel(name='resnet34', return_layers=['layer2', 'layer3'])
59 | data = torch.rand(1, 3, 640, 640)
60 | outputs = model(data)
61 |
62 | for output in outputs:
63 | print(output.shape)
64 |
65 | """
66 | model:
67 | type: TimmModel
68 | name: resnet34
69 | return_layers: ['layer2', 'layer4']
70 | """
71 |
--------------------------------------------------------------------------------
/rtdetrv2_pytorch/src/nn/backbone/torchvision_model.py:
--------------------------------------------------------------------------------
1 | """Copyright(c) 2023 lyuwenyu. All Rights Reserved.
2 | """
3 |
4 | import torch
5 | import torchvision
6 |
7 | from ...core import register
8 | from .utils import IntermediateLayerGetter
9 |
10 | __all__ = ['TorchVisionModel']
11 |
12 | @register()
13 | class TorchVisionModel(torch.nn.Module):
14 | def __init__(self, name, return_layers, weights=None, **kwargs) -> None:
15 | super().__init__()
16 |
17 | if weights is not None:
18 | weights = getattr(torchvision.models.get_model_weights(name), weights)
19 |
20 | model = torchvision.models.get_model(name, weights=weights, **kwargs)
21 |
22 | # TODO hard code.
23 | if hasattr(model, 'features'):
24 | model = IntermediateLayerGetter(model.features, return_layers)
25 | else:
26 | model = IntermediateLayerGetter(model, return_layers)
27 |
28 | self.model = model
29 |
30 | def forward(self, x):
31 | return self.model(x)
32 |
33 |
34 | # TorchVisionModel('swin_t', return_layers=['5', '7'])
35 | # TorchVisionModel('resnet34', return_layers=['layer2','layer3', 'layer4'])
36 |
37 | """
38 | TorchVisionModel:
39 | name: swin_t
40 | return_layers: ['5', '7']
41 | weights: DEFAULT
42 |
43 |
44 | model:
45 | type: TorchVisionModel
46 | name: resnet34
47 | return_layers: ['layer2','layer3', 'layer4']
48 | weights: DEFAULT
49 | """
--------------------------------------------------------------------------------
/rtdetrv2_pytorch/src/nn/backbone/utils.py:
--------------------------------------------------------------------------------
1 | """
2 | https://github.com/pytorch/vision/blob/main/torchvision/models/_utils.py
3 |
4 | Copyright(c) 2023 lyuwenyu. All Rights Reserved.
5 | """
6 |
7 | from collections import OrderedDict
8 | from typing import Dict, List
9 |
10 |
11 | import torch.nn as nn
12 |
13 |
14 | class IntermediateLayerGetter(nn.ModuleDict):
15 | """
16 | Module wrapper that returns intermediate layers from a model
17 |
18 | It has a strong assumption that the modules have been registered
19 | into the model in the same order as they are used.
20 | This means that one should **not** reuse the same nn.Module
21 | twice in the forward if you want this to work.
22 |
23 | Additionally, it is only able to query submodules that are directly
24 | assigned to the model. So if `model` is passed, `model.feature1` can
25 | be returned, but not `model.feature1.layer2`.
26 | """
27 |
28 | _version = 3
29 |
30 | def __init__(self, model: nn.Module, return_layers: List[str]) -> None:
31 | if not set(return_layers).issubset([name for name, _ in model.named_children()]):
32 | raise ValueError("return_layers are not present in model. {}"\
33 | .format([name for name, _ in model.named_children()]))
34 | orig_return_layers = return_layers
35 | return_layers = {str(k): str(k) for k in return_layers}
36 | layers = OrderedDict()
37 | for name, module in model.named_children():
38 | layers[name] = module
39 | if name in return_layers:
40 | del return_layers[name]
41 | if not return_layers:
42 | break
43 |
44 | super().__init__(layers)
45 | self.return_layers = orig_return_layers
46 |
47 | def forward(self, x):
48 | outputs = []
49 | for name, module in self.items():
50 | x = module(x)
51 | if name in self.return_layers:
52 | outputs.append(x)
53 |
54 | return outputs
55 |
56 |
--------------------------------------------------------------------------------
/rtdetrv2_pytorch/src/nn/criterion/__init__.py:
--------------------------------------------------------------------------------
1 | """Copyright(c) 2023 lyuwenyu. All Rights Reserved.
2 | """
3 |
4 |
5 | import torch.nn as nn
6 | from ...core import register
7 |
8 | from .det_criterion import DetCriterion
9 |
10 | CrossEntropyLoss = register()(nn.CrossEntropyLoss)
11 |
--------------------------------------------------------------------------------
/rtdetrv2_pytorch/src/nn/postprocessor/__init__.py:
--------------------------------------------------------------------------------
1 | """Copyright(c) 2023 lyuwenyu. All Rights Reserved.
2 | """
3 |
4 |
5 | from .nms_postprocessor import DetNMSPostProcessor
--------------------------------------------------------------------------------
/rtdetrv2_pytorch/src/nn/postprocessor/box_revert.py:
--------------------------------------------------------------------------------
1 | """Copyright(c) 2023 lyuwenyu. All Rights Reserved.
2 | """
3 |
4 | import torch
5 | import torchvision
6 | from torch import Tensor
7 | from enum import Enum
8 |
9 |
10 | class BoxProcessFormat(Enum):
11 | """Box process format
12 |
13 | Available formats are
14 | * ``RESIZE``
15 | * ``RESIZE_KEEP_RATIO``
16 | * ``RESIZE_KEEP_RATIO_PADDING``
17 | """
18 | RESIZE = 1
19 | RESIZE_KEEP_RATIO = 2
20 | RESIZE_KEEP_RATIO_PADDING = 3
21 |
22 |
23 | def box_revert(
24 | boxes: Tensor,
25 | orig_sizes: Tensor=None,
26 | eval_sizes: Tensor=None,
27 | inpt_sizes: Tensor=None,
28 | inpt_padding: Tensor=None,
29 | normalized: bool=True,
30 | in_fmt: str='cxcywh',
31 | out_fmt: str='xyxy',
32 | process_fmt=BoxProcessFormat.RESIZE,
33 | ) -> Tensor:
34 | """
35 | Args:
36 | boxes(Tensor), [N, :, 4], (x1, y1, x2, y2), pred boxes.
37 | inpt_sizes(Tensor), [N, 2], (w, h). input sizes.
38 | orig_sizes(Tensor), [N, 2], (w, h). origin sizes.
39 | inpt_padding (Tensor), [N, 2], (w_pad, h_pad, ...).
40 | (inpt_sizes + inpt_padding) == eval_sizes
41 | """
42 | assert in_fmt in ('cxcywh', 'xyxy'), ''
43 |
44 | if normalized and eval_sizes is not None:
45 | boxes = boxes * eval_sizes.repeat(1, 2).unsqueeze(1)
46 |
47 | if inpt_padding is not None:
48 | if in_fmt == 'xyxy':
49 | boxes -= inpt_padding[:, :2].repeat(1, 2).unsqueeze(1)
50 | elif in_fmt == 'cxcywh':
51 | boxes[..., :2] -= inpt_padding[:, :2].repeat(1, 2).unsqueeze(1)
52 |
53 | if orig_sizes is not None:
54 | orig_sizes = orig_sizes.repeat(1, 2).unsqueeze(1)
55 | if inpt_sizes is not None:
56 | inpt_sizes = inpt_sizes.repeat(1, 2).unsqueeze(1)
57 | boxes = boxes * (orig_sizes / inpt_sizes)
58 | else:
59 | boxes = boxes * orig_sizes
60 |
61 | boxes = torchvision.ops.box_convert(boxes, in_fmt=in_fmt, out_fmt=out_fmt)
62 | return boxes
63 |
--------------------------------------------------------------------------------
/rtdetrv2_pytorch/src/nn/postprocessor/detr_postprocessor.py:
--------------------------------------------------------------------------------
1 | """Copyright(c) 2023 lyuwenyu. All Rights Reserved.
2 | """
3 |
4 | import torch
5 | import torch.nn as nn
6 | import torch.nn.functional as F
7 |
8 | import torchvision
9 |
10 |
11 | __all__ = ['DetDETRPostProcessor']
12 |
13 | from .box_revert import box_revert
14 | from .box_revert import BoxProcessFormat
15 |
16 | def mod(a, b):
17 | out = a - a // b * b
18 | return out
19 |
20 | class DetDETRPostProcessor(nn.Module):
21 | def __init__(
22 | self,
23 | num_classes=80,
24 | use_focal_loss=True,
25 | num_top_queries=300,
26 | box_process_format=BoxProcessFormat.RESIZE,
27 | ) -> None:
28 | super().__init__()
29 | self.use_focal_loss = use_focal_loss
30 | self.num_top_queries = num_top_queries
31 | self.num_classes = int(num_classes)
32 | self.box_process_format = box_process_format
33 | self.deploy_mode = False
34 |
35 | def extra_repr(self) -> str:
36 | return f'use_focal_loss={self.use_focal_loss}, num_classes={self.num_classes}, num_top_queries={self.num_top_queries}'
37 |
38 | def forward(self, outputs, **kwargs):
39 | logits, boxes = outputs['pred_logits'], outputs['pred_boxes']
40 |
41 | if self.use_focal_loss:
42 | scores = F.sigmoid(logits)
43 | scores, index = torch.topk(scores.flatten(1), self.num_top_queries, dim=-1)
44 | labels = index % self.num_classes
45 | # labels = mod(index, self.num_classes) # for tensorrt
46 | index = index // self.num_classes
47 | boxes = boxes.gather(dim=1, index=index.unsqueeze(-1).repeat(1, 1, boxes.shape[-1]))
48 |
49 | else:
50 | scores = F.softmax(logits)[:, :, :-1]
51 | scores, labels = scores.max(dim=-1)
52 | if scores.shape[1] > self.num_top_queries:
53 | scores, index = torch.topk(scores, self.num_top_queries, dim=-1)
54 | labels = torch.gather(labels, dim=1, index=index)
55 | boxes = torch.gather(boxes, dim=1, index=index.unsqueeze(-1).tile(1, 1, boxes.shape[-1]))
56 |
57 | if kwargs is not None:
58 | boxes = box_revert(
59 | boxes,
60 | in_fmt='cxcywh',
61 | out_fmt='xyxy',
62 | process_fmt=self.box_process_format,
63 | normalized=True,
64 | **kwargs,
65 | )
66 |
67 | # TODO for onnx export
68 | if self.deploy_mode:
69 | return labels, boxes, scores
70 |
71 | results = []
72 | for lab, box, sco in zip(labels, boxes, scores):
73 | result = dict(labels=lab, boxes=box, scores=sco)
74 | results.append(result)
75 |
76 | return results
77 |
78 | def deploy(self, ):
79 | self.eval()
80 | self.deploy_mode = True
81 | return self
82 |
--------------------------------------------------------------------------------
/rtdetrv2_pytorch/src/nn/postprocessor/nms_postprocessor.py:
--------------------------------------------------------------------------------
1 | """Copyright(c) 2023 lyuwenyu. All Rights Reserved.
2 | """
3 |
4 | import torch
5 | import torch.nn.functional as F
6 | import torch.distributed
7 | import torchvision
8 | from torch import Tensor
9 |
10 | from ...core import register
11 |
12 | from typing import Dict
13 |
14 |
15 | __all__ = ['DetNMSPostProcessor', ]
16 |
17 |
18 | @register()
19 | class DetNMSPostProcessor(torch.nn.Module):
20 | def __init__(self, \
21 | iou_threshold=0.7,
22 | score_threshold=0.01,
23 | keep_topk=300,
24 | box_fmt='cxcywh',
25 | logit_fmt='sigmoid') -> None:
26 | super().__init__()
27 | self.iou_threshold = iou_threshold
28 | self.score_threshold = score_threshold
29 | self.keep_topk = keep_topk
30 | self.box_fmt = box_fmt.lower()
31 | self.logit_fmt = logit_fmt.lower()
32 | self.logit_func = getattr(F, self.logit_fmt, None)
33 | self.deploy_mode = False
34 |
35 | def forward(self, outputs: Dict[str, Tensor], orig_target_sizes: Tensor):
36 | logits, boxes = outputs['pred_logits'], outputs['pred_boxes']
37 | pred_boxes = torchvision.ops.box_convert(boxes, in_fmt=self.box_fmt, out_fmt='xyxy')
38 | pred_boxes *= orig_target_sizes.repeat(1, 2).unsqueeze(1)
39 |
40 | values, pred_labels = torch.max(logits, dim=-1)
41 |
42 | if self.logit_func:
43 | pred_scores = self.logit_func(values)
44 | else:
45 | pred_scores = values
46 |
47 | # TODO for onnx export
48 | if self.deploy_mode:
49 | blobs = {
50 | 'pred_labels': pred_labels,
51 | 'pred_boxes': pred_boxes,
52 | 'pred_scores': pred_scores
53 | }
54 | return blobs
55 |
56 | results = []
57 | for i in range(logits.shape[0]):
58 | score_keep = pred_scores[i] > self.score_threshold
59 | pred_box = pred_boxes[i][score_keep]
60 | pred_label = pred_labels[i][score_keep]
61 | pred_score = pred_scores[i][score_keep]
62 |
63 | keep = torchvision.ops.batched_nms(pred_box, pred_score, pred_label, self.iou_threshold)
64 | keep = keep[:self.keep_topk]
65 |
66 | blob = {
67 | 'labels': pred_label[keep],
68 | 'boxes': pred_box[keep],
69 | 'scores': pred_score[keep],
70 | }
71 |
72 | results.append(blob)
73 |
74 | return results
75 |
76 | def deploy(self, ):
77 | self.eval()
78 | self.deploy_mode = True
79 | return self
80 |
--------------------------------------------------------------------------------
/rtdetrv2_pytorch/src/optim/__init__.py:
--------------------------------------------------------------------------------
1 | """Copyright(c) 2023 lyuwenyu. All Rights Reserved.
2 | """
3 |
4 | from .ema import *
5 | from .optim import *
6 | from .amp import *
7 | from .warmup import *
--------------------------------------------------------------------------------
/rtdetrv2_pytorch/src/optim/amp.py:
--------------------------------------------------------------------------------
1 | """Copyright(c) 2023 lyuwenyu. All Rights Reserved.
2 | """
3 |
4 |
5 | import torch.cuda.amp as amp
6 |
7 | from ..core import register
8 |
9 |
10 | __all__ = ['GradScaler']
11 |
12 | GradScaler = register()(amp.grad_scaler.GradScaler)
13 |
--------------------------------------------------------------------------------
/rtdetrv2_pytorch/src/optim/optim.py:
--------------------------------------------------------------------------------
1 | """Copyright(c) 2023 lyuwenyu. All Rights Reserved.
2 | """
3 |
4 |
5 | import torch.optim as optim
6 | import torch.optim.lr_scheduler as lr_scheduler
7 |
8 | from ..core import register
9 |
10 |
11 | __all__ = ['AdamW', 'SGD', 'Adam', 'MultiStepLR', 'CosineAnnealingLR', 'OneCycleLR', 'LambdaLR']
12 |
13 |
14 |
15 | SGD = register()(optim.SGD)
16 | Adam = register()(optim.Adam)
17 | AdamW = register()(optim.AdamW)
18 |
19 |
20 | MultiStepLR = register()(lr_scheduler.MultiStepLR)
21 | CosineAnnealingLR = register()(lr_scheduler.CosineAnnealingLR)
22 | OneCycleLR = register()(lr_scheduler.OneCycleLR)
23 | LambdaLR = register()(lr_scheduler.LambdaLR)
24 |
--------------------------------------------------------------------------------
/rtdetrv2_pytorch/src/optim/warmup.py:
--------------------------------------------------------------------------------
1 | """Copyright(c) 2023 lyuwenyu. All Rights Reserved.
2 | """
3 |
4 | from torch.optim.lr_scheduler import LRScheduler
5 |
6 | from ..core import register
7 |
8 |
9 | class Warmup(object):
10 | def __init__(self, lr_scheduler: LRScheduler, warmup_duration: int, last_step: int=-1) -> None:
11 | self.lr_scheduler = lr_scheduler
12 | self.warmup_end_values = [pg['lr'] for pg in lr_scheduler.optimizer.param_groups]
13 | self.last_step = last_step
14 | self.warmup_duration = warmup_duration
15 | self.step()
16 |
17 | def state_dict(self):
18 | return {k: v for k, v in self.__dict__.items() if k != 'lr_scheduler'}
19 |
20 | def load_state_dict(self, state_dict):
21 | self.__dict__.update(state_dict)
22 |
23 | def get_warmup_factor(self, step, **kwargs):
24 | raise NotImplementedError
25 |
26 | def step(self, ):
27 | self.last_step += 1
28 | if self.last_step >= self.warmup_duration:
29 | return
30 | factor = self.get_warmup_factor(self.last_step)
31 | for i, pg in enumerate(self.lr_scheduler.optimizer.param_groups):
32 | pg['lr'] = factor * self.warmup_end_values[i]
33 |
34 | def finished(self, ):
35 | if self.last_step >= self.warmup_duration:
36 | return True
37 | return False
38 |
39 |
40 | @register()
41 | class LinearWarmup(Warmup):
42 | def __init__(self, lr_scheduler: LRScheduler, warmup_duration: int, last_step: int = -1) -> None:
43 | super().__init__(lr_scheduler, warmup_duration, last_step)
44 |
45 | def get_warmup_factor(self, step):
46 | return min(1.0, (step + 1) / self.warmup_duration)
47 |
48 |
--------------------------------------------------------------------------------
/rtdetrv2_pytorch/src/solver/__init__.py:
--------------------------------------------------------------------------------
1 | """Copyright(c) 2023 lyuwenyu. All Rights Reserved.
2 | """
3 |
4 | from ._solver import BaseSolver
5 | from .clas_solver import ClasSolver
6 | from .det_solver import DetSolver
7 |
8 |
9 |
10 | from typing import Dict
11 |
12 | TASKS :Dict[str, BaseSolver] = {
13 | 'classification': ClasSolver,
14 | 'detection': DetSolver,
15 | }
--------------------------------------------------------------------------------
/rtdetrv2_pytorch/src/solver/clas_engine.py:
--------------------------------------------------------------------------------
1 | """Copyright(c) 2023 lyuwenyu. All Rights Reserved.
2 | """
3 |
4 | import torch
5 | import torch.nn as nn
6 |
7 | from ..misc import (MetricLogger, SmoothedValue, reduce_dict)
8 |
9 |
10 | def train_one_epoch(model: nn.Module, criterion: nn.Module, dataloader, optimizer, ema, epoch, device):
11 | """
12 | """
13 | model.train()
14 |
15 | metric_logger = MetricLogger(delimiter=" ")
16 | metric_logger.add_meter('lr', SmoothedValue(window_size=1, fmt='{value:.6f}'))
17 | print_freq = 100
18 | header = 'Epoch: [{}]'.format(epoch)
19 |
20 | for imgs, labels in metric_logger.log_every(dataloader, print_freq, header):
21 | imgs = imgs.to(device)
22 | labels = labels.to(device)
23 |
24 | preds = model(imgs)
25 | loss: torch.Tensor = criterion(preds, labels)
26 |
27 | optimizer.zero_grad()
28 | loss.backward()
29 | optimizer.step()
30 |
31 | if ema is not None:
32 | ema.update(model)
33 |
34 | loss_reduced_values = {k: v.item() for k, v in reduce_dict({'loss': loss}).items()}
35 | metric_logger.update(**loss_reduced_values)
36 | metric_logger.update(lr=optimizer.param_groups[0]["lr"])
37 |
38 | metric_logger.synchronize_between_processes()
39 | print("Averaged stats:", metric_logger)
40 |
41 | stats = {k: meter.global_avg for k, meter in metric_logger.meters.items()}
42 | return stats
43 |
44 |
45 |
46 | @torch.no_grad()
47 | def evaluate(model, criterion, dataloader, device):
48 | model.eval()
49 |
50 | metric_logger = MetricLogger(delimiter=" ")
51 | # metric_logger.add_meter('acc', SmoothedValue(window_size=1, fmt='{global_avg:.4f}'))
52 | # metric_logger.add_meter('loss', SmoothedValue(window_size=1, fmt='{value:.2f}'))
53 | metric_logger.add_meter('acc', SmoothedValue(window_size=1))
54 | metric_logger.add_meter('loss', SmoothedValue(window_size=1))
55 |
56 | header = 'Test:'
57 | for imgs, labels in metric_logger.log_every(dataloader, 10, header):
58 | imgs, labels = imgs.to(device), labels.to(device)
59 | preds = model(imgs)
60 |
61 | acc = (preds.argmax(dim=-1) == labels).sum() / preds.shape[0]
62 | loss = criterion(preds, labels)
63 |
64 | dict_reduced = reduce_dict({'acc': acc, 'loss': loss})
65 | reduced_values = {k: v.item() for k, v in dict_reduced.items()}
66 | metric_logger.update(**reduced_values)
67 |
68 | metric_logger.synchronize_between_processes()
69 | print("Averaged stats:", metric_logger)
70 |
71 | stats = {k: meter.global_avg for k, meter in metric_logger.meters.items()}
72 | return stats
73 |
74 |
75 |
--------------------------------------------------------------------------------
/rtdetrv2_pytorch/src/solver/clas_solver.py:
--------------------------------------------------------------------------------
1 | """Copyright(c) 2023 lyuwenyu. All Rights Reserved.
2 | """
3 |
4 | import time
5 | import json
6 | import datetime
7 | from pathlib import Path
8 |
9 | import torch
10 | import torch.nn as nn
11 |
12 | from ..misc import dist_utils
13 | from ._solver import BaseSolver
14 | from .clas_engine import train_one_epoch, evaluate
15 |
16 |
17 | class ClasSolver(BaseSolver):
18 |
19 | def fit(self, ):
20 | print("Start training")
21 | self.train()
22 | args = self.cfg
23 |
24 | n_parameters = sum(p.numel() for p in self.model.parameters() if p.requires_grad)
25 | print('Number of params:', n_parameters)
26 |
27 | output_dir = Path(args.output_dir)
28 | output_dir.mkdir(exist_ok=True)
29 |
30 | start_time = time.time()
31 | start_epoch = self.last_epoch + 1
32 | for epoch in range(start_epoch, args.epoches):
33 |
34 | if dist_utils.is_dist_available_and_initialized():
35 | self.train_dataloader.sampler.set_epoch(epoch)
36 |
37 | train_stats = train_one_epoch(self.model,
38 | self.criterion,
39 | self.train_dataloader,
40 | self.optimizer,
41 | self.ema,
42 | epoch=epoch,
43 | device=self.device)
44 | self.lr_scheduler.step()
45 | self.last_epoch += 1
46 |
47 | if output_dir:
48 | checkpoint_paths = [output_dir / 'checkpoint.pth']
49 | # extra checkpoint before LR drop and every 100 epochs
50 | if (epoch + 1) % args.checkpoint_freq == 0:
51 | checkpoint_paths.append(output_dir / f'checkpoint{epoch:04}.pth')
52 | for checkpoint_path in checkpoint_paths:
53 | dist_utils.save_on_master(self.state_dict(epoch), checkpoint_path)
54 |
55 | module = self.ema.module if self.ema else self.model
56 | test_stats = evaluate(module, self.criterion, self.val_dataloader, self.device)
57 |
58 | log_stats = {**{f'train_{k}': v for k, v in train_stats.items()},
59 | **{f'test_{k}': v for k, v in test_stats.items()},
60 | 'epoch': epoch,
61 | 'n_parameters': n_parameters}
62 |
63 | if output_dir and dist_utils.is_main_process():
64 | with (output_dir / "log.txt").open("a") as f:
65 | f.write(json.dumps(log_stats) + "\n")
66 |
67 | total_time = time.time() - start_time
68 | total_time_str = str(datetime.timedelta(seconds=int(total_time)))
69 | print('Training time {}'.format(total_time_str))
70 |
71 |
72 |
--------------------------------------------------------------------------------
/rtdetrv2_pytorch/src/zoo/__init__.py:
--------------------------------------------------------------------------------
1 | """Copyright(c) 2023 lyuwenyu. All Rights Reserved.
2 | """
3 |
4 |
5 | from . import rtdetr
6 |
--------------------------------------------------------------------------------
/rtdetrv2_pytorch/src/zoo/rtdetr/__init__.py:
--------------------------------------------------------------------------------
1 | """Copyright(c) 2023 lyuwenyu. All Rights Reserved.
2 | """
3 |
4 |
5 | from .rtdetr import RTDETR
6 | from .matcher import HungarianMatcher
7 | from .hybrid_encoder import HybridEncoder
8 | from .rtdetr_decoder import RTDETRTransformer
9 | from .rtdetr_criterion import RTDETRCriterion
10 | from .rtdetr_postprocessor import RTDETRPostProcessor
11 |
12 | # v2
13 | from .rtdetrv2_decoder import RTDETRTransformerv2
14 | from .rtdetrv2_criterion import RTDETRCriterionv2
--------------------------------------------------------------------------------
/rtdetrv2_pytorch/src/zoo/rtdetr/box_ops.py:
--------------------------------------------------------------------------------
1 | """
2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
3 | https://github.com/facebookresearch/detr/blob/main/util/box_ops.py
4 | """
5 |
6 | import torch
7 | from torch import Tensor
8 | from torchvision.ops.boxes import box_area
9 |
10 |
11 | def box_cxcywh_to_xyxy(x: Tensor) -> Tensor:
12 | x_c, y_c, w, h = x.unbind(-1)
13 | b = [(x_c - 0.5 * w), (y_c - 0.5 * h),
14 | (x_c + 0.5 * w), (y_c + 0.5 * h)]
15 | return torch.stack(b, dim=-1)
16 |
17 |
18 | def box_xyxy_to_cxcywh(x: Tensor) -> Tensor:
19 | x0, y0, x1, y1 = x.unbind(-1)
20 | b = [(x0 + x1) / 2, (y0 + y1) / 2,
21 | (x1 - x0), (y1 - y0)]
22 | return torch.stack(b, dim=-1)
23 |
24 |
25 | # modified from torchvision to also return the union
26 | def box_iou(boxes1: Tensor, boxes2: Tensor):
27 | area1 = box_area(boxes1)
28 | area2 = box_area(boxes2)
29 |
30 | lt = torch.max(boxes1[:, None, :2], boxes2[:, :2]) # [N,M,2]
31 | rb = torch.min(boxes1[:, None, 2:], boxes2[:, 2:]) # [N,M,2]
32 |
33 | wh = (rb - lt).clamp(min=0) # [N,M,2]
34 | inter = wh[:, :, 0] * wh[:, :, 1] # [N,M]
35 |
36 | union = area1[:, None] + area2 - inter
37 |
38 | iou = inter / union
39 | return iou, union
40 |
41 |
42 | def generalized_box_iou(boxes1, boxes2):
43 | """
44 | Generalized IoU from https://giou.stanford.edu/
45 |
46 | The boxes should be in [x0, y0, x1, y1] format
47 |
48 | Returns a [N, M] pairwise matrix, where N = len(boxes1)
49 | and M = len(boxes2)
50 | """
51 | # degenerate boxes gives inf / nan results
52 | # so do an early check
53 | assert (boxes1[:, 2:] >= boxes1[:, :2]).all()
54 | assert (boxes2[:, 2:] >= boxes2[:, :2]).all()
55 | iou, union = box_iou(boxes1, boxes2)
56 |
57 | lt = torch.min(boxes1[:, None, :2], boxes2[:, :2])
58 | rb = torch.max(boxes1[:, None, 2:], boxes2[:, 2:])
59 |
60 | wh = (rb - lt).clamp(min=0) # [N,M,2]
61 | area = wh[:, :, 0] * wh[:, :, 1]
62 |
63 | return iou - (area - union) / area
64 |
65 |
66 | def masks_to_boxes(masks):
67 | """Compute the bounding boxes around the provided masks
68 |
69 | The masks should be in format [N, H, W] where N is the number of masks, (H, W) are the spatial dimensions.
70 |
71 | Returns a [N, 4] tensors, with the boxes in xyxy format
72 | """
73 | if masks.numel() == 0:
74 | return torch.zeros((0, 4), device=masks.device)
75 |
76 | h, w = masks.shape[-2:]
77 |
78 | y = torch.arange(0, h, dtype=torch.float)
79 | x = torch.arange(0, w, dtype=torch.float)
80 | y, x = torch.meshgrid(y, x)
81 |
82 | x_mask = (masks * x.unsqueeze(0))
83 | x_max = x_mask.flatten(1).max(-1)[0]
84 | x_min = x_mask.masked_fill(~(masks.bool()), 1e8).flatten(1).min(-1)[0]
85 |
86 | y_mask = (masks * y.unsqueeze(0))
87 | y_max = y_mask.flatten(1).max(-1)[0]
88 | y_min = y_mask.masked_fill(~(masks.bool()), 1e8).flatten(1).min(-1)[0]
89 |
90 | return torch.stack([x_min, y_min, x_max, y_max], 1)
--------------------------------------------------------------------------------
/rtdetrv2_pytorch/src/zoo/rtdetr/conver_params.py:
--------------------------------------------------------------------------------
1 | """Copyright(c) 2023 lyuwenyu. All Rights Reserved.
2 | """
3 |
4 | import torch
5 |
6 | def main(args) -> None:
7 | import cvperception
8 | from cvperception.core import load_config, merge_config, create
9 | cfg = load_config(args.config)
10 | model: torch.nn.Module = create(cfg['model'], merge_config(cfg))
11 |
12 | if args.version == 1:
13 | state = model.state_dict()
14 | keys = [k for k in state.keys() if 'num_batches_tracked' not in k]
15 |
16 | elif args.version == 2:
17 | state = model.state_dict()
18 | ignore_keys = ['anchors', 'valid_mask', 'num_points_scale']
19 | keys = [k for k in state.keys() if 'num_batches_tracked' not in k]
20 | keys = [k for k in keys if not any([x in k for x in ignore_keys])]
21 |
22 | import paddle
23 | p_state = paddle.load(args.pdparams)
24 | pkeys = list(p_state.keys())
25 |
26 | assert len(keys) == len(pkeys), f'{len(keys)}, {len(pkeys)}'
27 |
28 | new_state = {}
29 | for i, k in enumerate(keys):
30 | pp = p_state[pkeys[i]]
31 | pp = torch.tensor(pp.numpy())
32 |
33 | if 'denoising_class_embed' in k:
34 | new_state[k] = torch.concat([pp, torch.zeros(1, pp.shape[-1])], dim=0)
35 | continue
36 |
37 | tp = state[k]
38 | if len(tp.shape) == 2:
39 | new_state[k] = pp.T
40 | elif len(tp.shape) == 1:
41 | new_state[k] = pp
42 | else:
43 | assert tp.shape == pp.shape, f'{k}, {pp.shape}, {tp.shape}'
44 | new_state[k] = pp
45 |
46 | assert len(new_state) == len(p_state), ''
47 |
48 | # checkpoint = {'ema': {'module': new_state, }}
49 | # torch.save(checkpoint, args.output_file)
50 |
51 | model.load_state_dict(new_state, strict=False)
52 |
53 | checkpoint = {'ema': {'module': model.state_dict(), }}
54 | torch.save(checkpoint, args.output_file)
55 |
56 |
57 |
58 | if __name__ == '__main__':
59 | import argparse
60 | parser = argparse.ArgumentParser()
61 | parser.add_argument('-c', '--config', type=str, )
62 | parser.add_argument('-p', '--pdparams', type=str, )
63 | parser.add_argument('-o', '--output_file', type=str, )
64 | parser.add_argument('-v', '--version', type=int, default=1)
65 |
66 | args = parser.parse_args()
67 | main(args)
68 |
69 | # python ./src/cvperception/zoo/rtdetr/conver_params.py -c configs/rtdetr/rtdetr_r18vd_6x_coco.yml -p rtdetr_r18vd_dec3_6x_coco.pdparams -o rtdetr_r18vd_dec3_6x_coco_new.pth
70 | # python ./src/cvperception/zoo/rtdetr/conver_params.py -c configs/rtdetr/rtdetr_r18vd_6x_coco.yml -p rtdetr_r18vd_5x_coco_objects365.pdparams -o rtdetr_r18vd_5x_coco_objects365_new.pth
71 | # python ./src/cvperception/zoo/rtdetr/conver_params.py -c configs/rtdetrv2/rtdetrv2_r50vd_120e_coco.yml -p rtdetr_r50vd_1x_objects365.pdparams -o rtdetrv2_r50vd_1x_objects365_new.pth -v 2
72 |
73 |
--------------------------------------------------------------------------------
/rtdetrv2_pytorch/src/zoo/rtdetr/rtdetr.py:
--------------------------------------------------------------------------------
1 | """Copyright(c) 2023 lyuwenyu. All Rights Reserved.
2 | """
3 |
4 | import torch
5 | import torch.nn as nn
6 | import torch.nn.functional as F
7 |
8 | import random
9 | import numpy as np
10 | from typing import List
11 |
12 | from ...core import register
13 |
14 |
15 | __all__ = ['RTDETR', ]
16 |
17 |
18 | @register()
19 | class RTDETR(nn.Module):
20 | __inject__ = ['backbone', 'encoder', 'decoder', ]
21 |
22 | def __init__(self, \
23 | backbone: nn.Module,
24 | encoder: nn.Module,
25 | decoder: nn.Module,
26 | ):
27 | super().__init__()
28 | self.backbone = backbone
29 | self.decoder = decoder
30 | self.encoder = encoder
31 |
32 | def forward(self, x, targets=None):
33 | x = self.backbone(x)
34 | x = self.encoder(x)
35 | x = self.decoder(x, targets)
36 |
37 | return x
38 |
39 | def deploy(self, ):
40 | self.eval()
41 | for m in self.modules():
42 | if hasattr(m, 'convert_to_deploy'):
43 | m.convert_to_deploy()
44 | return self
45 |
--------------------------------------------------------------------------------
/rtdetrv2_pytorch/tools/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 | Train/test script examples
4 | - `CUDA_VISIBLE_DEVICES=0,1,2,3 torchrun --nproc_per_node=4 --master-port=8989 tools/train.py -c path/to/config &> train.log 2>&1 &`
5 | - `-r path/to/checkpoint`
6 | - `--amp`
7 | - `--test-only`
8 |
9 |
10 | Export script examples
11 | - `python tools/export_onnx.py -c path/to/config -r path/to/checkpoint --check`
12 |
13 |
14 | Gpu do not release memory
15 | - `ps aux | grep "tools/train.py" | awk '{print $2}' | xargs kill -9`
16 |
17 |
18 | Save all logs
19 | - Appending `&> train.log 2>&1 &` or `&> train.log 2>&1`
20 |
21 |
22 | Tensorboard
23 | - `--summary-dir=/path/to/summary/dir` or `-u summary_dir=/path/to/summary/dir`
24 | - `tensorboard --host=ip --port=8989 --logdir=/path/to/summary/`
--------------------------------------------------------------------------------
/rtdetrv2_pytorch/tools/run_profile.py:
--------------------------------------------------------------------------------
1 |
2 | """Copyright(c) 2023 lyuwenyu. All Rights Reserved.
3 | """
4 |
5 | import torch
6 | import torch.nn as nn
7 | from torch import Tensor
8 |
9 | import re
10 | import os
11 | import sys
12 | sys.path.insert(0, os.path.join(os.path.dirname(os.path.abspath(__file__)), '..'))
13 | from src.core import YAMLConfig, yaml_utils
14 | from src.solver import TASKS
15 |
16 | from typing import Dict, List, Optional, Any
17 |
18 |
19 | __all__ = ["profile_stats"]
20 |
21 | def profile_stats(
22 | model: nn.Module,
23 | data: Optional[Tensor]=None,
24 | shape: List[int]=[1, 3, 640, 640],
25 | verbose: bool=False
26 | ) -> Dict[str, Any]:
27 |
28 | is_training = model.training
29 |
30 | model.train()
31 | num_params = sum([p.numel() for p in model.parameters() if p.requires_grad])
32 |
33 | model.eval()
34 |
35 | if data is None:
36 | dtype = next(model.parameters()).dtype
37 | device = next(model.parameters()).device
38 | data = torch.rand(*shape, dtype=dtype, device=device)
39 | print(device)
40 |
41 | def trace_handler(prof):
42 | print(prof.key_averages().table(sort_by='self_cuda_time_total', row_limit=-1))
43 |
44 | wait = 0
45 | warmup = 1
46 | active = 1
47 | repeat = 1
48 | skip_first = 0
49 | with torch.profiler.profile(
50 | activities=[
51 | torch.profiler.ProfilerActivity.CPU,
52 | torch.profiler.ProfilerActivity.CUDA,
53 | ],
54 | schedule=torch.profiler.schedule(
55 | wait=wait,
56 | warmup=warmup,
57 | active=active,
58 | repeat=repeat,
59 | skip_first=skip_first,
60 | ),
61 | with_flops=True,
62 | ) as p:
63 | n_step = skip_first + (wait + warmup + active) * repeat
64 | for _ in range(n_step):
65 | _ = model(data)
66 | p.step()
67 |
68 | if is_training:
69 | model.train()
70 |
71 | info = p.key_averages().table(sort_by='self_cuda_time_total', row_limit=-1)
72 | num_flops = sum([float(v.strip()) for v in re.findall('(\d+.?\d+ *\n)', info)]) / active
73 |
74 | if verbose:
75 | print(info)
76 | print(f'Total number of trainable parameters: {num_params}')
77 | print(f'Total number of flops: {int(num_flops)}M with {shape}')
78 |
79 | return {'n_parameters': num_params, 'n_flops': num_flops, 'info': info}
80 |
81 |
82 |
83 | if __name__ == "__main__":
84 | import argparse
85 | parser = argparse.ArgumentParser()
86 | parser.add_argument('-c', '--config', type=str, required=True)
87 | parser.add_argument('-d', '--device', type=str, default='cuda:0', help='device',)
88 | args = parser.parse_args()
89 |
90 | cfg = YAMLConfig(args.config, device=args.device)
91 | model = cfg.model.to(args.device)
92 |
93 | profile_stats(model, verbose=True)
94 |
--------------------------------------------------------------------------------
/rtdetrv2_pytorch/tools/train.py:
--------------------------------------------------------------------------------
1 | """Copyright(c) 2023 lyuwenyu. All Rights Reserved.
2 | """
3 |
4 | import os
5 | import sys
6 | sys.path.insert(0, os.path.join(os.path.dirname(os.path.abspath(__file__)), '..'))
7 |
8 | import argparse
9 |
10 | from src.misc import dist_utils
11 | from src.core import YAMLConfig, yaml_utils
12 | from src.solver import TASKS
13 |
14 |
15 | def main(args, ) -> None:
16 | """main
17 | """
18 | dist_utils.setup_distributed(args.print_rank, args.print_method, seed=args.seed)
19 |
20 | assert not all([args.tuning, args.resume]), \
21 | 'Only support from_scrach or resume or tuning at one time'
22 |
23 | update_dict = yaml_utils.parse_cli(args.update)
24 | update_dict.update({k: v for k, v in args.__dict__.items() \
25 | if k not in ['update', ] and v is not None})
26 |
27 | cfg = YAMLConfig(args.config, **update_dict)
28 | print('cfg: ', cfg.__dict__)
29 |
30 | solver = TASKS[cfg.yaml_cfg['task']](cfg)
31 |
32 | if args.test_only:
33 | solver.val()
34 | else:
35 | solver.fit()
36 |
37 | dist_utils.cleanup()
38 |
39 |
40 | if __name__ == '__main__':
41 |
42 | parser = argparse.ArgumentParser()
43 |
44 | # priority 0
45 | parser.add_argument('-c', '--config', type=str, required=True)
46 | parser.add_argument('-r', '--resume', type=str, help='resume from checkpoint')
47 | parser.add_argument('-t', '--tuning', type=str, help='tuning from checkpoint')
48 | parser.add_argument('-d', '--device', type=str, help='device',)
49 | parser.add_argument('--seed', type=int, help='exp reproducibility')
50 | parser.add_argument('--use-amp', action='store_true', help='auto mixed precision training')
51 | parser.add_argument('--output-dir', type=str, help='output directoy')
52 | parser.add_argument('--summary-dir', type=str, help='tensorboard summry')
53 | parser.add_argument('--test-only', action='store_true', default=False,)
54 |
55 | # priority 1
56 | parser.add_argument('-u', '--update', nargs='+', help='update yaml config')
57 |
58 | # env
59 | parser.add_argument('--print-method', type=str, default='builtin', help='print method')
60 | parser.add_argument('--print-rank', type=int, default=0, help='print rank id')
61 |
62 | parser.add_argument('--local-rank', type=int, help='local rank id')
63 | args = parser.parse_args()
64 |
65 | main(args)
66 |
--------------------------------------------------------------------------------