├── .github ├── FUNDING.yml └── ISSUE_TEMPLATE │ └── bug_report.md ├── .gitignore ├── LICENSE ├── README.md ├── README_cn.md ├── benchmark ├── README.md ├── dataset.py ├── trtexec.md ├── trtinfer.py ├── utils.py └── yolov8_onnx.py ├── hubconf.py ├── rtdetr_paddle ├── README.md ├── README_cn.md ├── configs │ ├── datasets │ │ ├── coco_detection.yml │ │ └── voc.yml │ ├── rtdetr │ │ ├── _base_ │ │ │ ├── optimizer_6x.yml │ │ │ ├── rtdetr_r50vd.yml │ │ │ └── rtdetr_reader.yml │ │ ├── rtdetr_hgnetv2_l_6x_coco.yml │ │ ├── rtdetr_hgnetv2_x_6x_coco.yml │ │ ├── rtdetr_r101vd_6x_coco.yml │ │ ├── rtdetr_r18vd_6x_coco.yml │ │ ├── rtdetr_r34vd_6x_coco.yml │ │ ├── rtdetr_r50vd_6x_coco.yml │ │ └── rtdetr_r50vd_m_6x_coco.yml │ └── runtime.yml ├── dataset │ ├── coco │ │ └── download_coco.py │ └── voc │ │ ├── create_list.py │ │ ├── download_voc.py │ │ └── label_list.txt ├── ppdet │ ├── __init__.py │ ├── core │ │ ├── __init__.py │ │ ├── config │ │ │ ├── __init__.py │ │ │ ├── schema.py │ │ │ └── yaml_helpers.py │ │ └── workspace.py │ ├── data │ │ ├── __init__.py │ │ ├── reader.py │ │ ├── shm_utils.py │ │ ├── source │ │ │ ├── __init__.py │ │ │ ├── category.py │ │ │ ├── coco.py │ │ │ ├── dataset.py │ │ │ └── voc.py │ │ ├── transform │ │ │ ├── __init__.py │ │ │ ├── batch_operators.py │ │ │ ├── op_helper.py │ │ │ └── operators.py │ │ └── utils.py │ ├── engine │ │ ├── __init__.py │ │ ├── callbacks.py │ │ ├── env.py │ │ ├── export_utils.py │ │ └── trainer.py │ ├── metrics │ │ ├── __init__.py │ │ ├── coco_utils.py │ │ ├── json_results.py │ │ ├── keypoint_metrics.py │ │ ├── map_utils.py │ │ ├── mcmot_metrics.py │ │ ├── metrics.py │ │ ├── mot_metrics.py │ │ ├── munkres.py │ │ ├── pose3d_metrics.py │ │ └── widerface_utils.py │ ├── modeling │ │ ├── __init__.py │ │ ├── architectures │ │ │ ├── __init__.py │ │ │ ├── detr.py │ │ │ └── meta_arch.py │ │ ├── backbones │ │ │ ├── __init__.py │ │ │ ├── convnext.py │ │ │ ├── csp_darknet.py │ │ │ ├── cspresnet.py │ │ │ ├── darknet.py │ │ │ ├── focalnet.py │ │ │ ├── hgnet_v2.py │ │ │ ├── lcnet.py │ │ │ ├── mobilenet_v1.py │ │ │ ├── mobilenet_v3.py │ │ │ ├── mobileone.py │ │ │ ├── name_adapter.py │ │ │ ├── resnet.py │ │ │ ├── shufflenet_v2.py │ │ │ ├── swin_transformer.py │ │ │ ├── trans_encoder.py │ │ │ ├── transformer_utils.py │ │ │ ├── vision_transformer.py │ │ │ └── vit_mae.py │ │ ├── bbox_utils.py │ │ ├── cls_utils.py │ │ ├── heads │ │ │ ├── __init__.py │ │ │ └── detr_head.py │ │ ├── initializer.py │ │ ├── keypoint_utils.py │ │ ├── layers.py │ │ ├── losses │ │ │ ├── __init__.py │ │ │ ├── detr_loss.py │ │ │ ├── focal_loss.py │ │ │ ├── gfocal_loss.py │ │ │ ├── iou_loss.py │ │ │ ├── smooth_l1_loss.py │ │ │ └── varifocal_loss.py │ │ ├── ops.py │ │ ├── post_process.py │ │ ├── shape_spec.py │ │ └── transformers │ │ │ ├── __init__.py │ │ │ ├── deformable_transformer.py │ │ │ ├── detr_transformer.py │ │ │ ├── dino_transformer.py │ │ │ ├── ext_op │ │ │ ├── README.md │ │ │ ├── ms_deformable_attn_op.cc │ │ │ ├── ms_deformable_attn_op.cu │ │ │ ├── setup_ms_deformable_attn_op.py │ │ │ └── test_ms_deformable_attn_op.py │ │ │ ├── hybrid_encoder.py │ │ │ ├── matchers.py │ │ │ ├── position_encoding.py │ │ │ ├── rtdetr_transformer.py │ │ │ └── utils.py │ ├── optimizer │ │ ├── __init__.py │ │ ├── ema.py │ │ ├── optimizer.py │ │ └── utils.py │ ├── utils │ │ ├── __init__.py │ │ ├── cam_utils.py │ │ ├── check.py │ │ ├── checkpoint.py │ │ ├── cli.py │ │ ├── colormap.py │ │ ├── download.py │ │ ├── fuse_utils.py │ │ ├── logger.py │ │ ├── profiler.py │ │ ├── stats.py │ │ ├── visualizer.py │ │ └── voc_utils.py │ └── version.py ├── requirements.txt └── tools │ ├── eval.py │ ├── export_model.py │ ├── infer.py │ ├── slice_image.py │ ├── train.py │ └── x2coco.py ├── rtdetr_pytorch ├── README.md ├── configs │ ├── dataset │ │ └── coco_detection.yml │ ├── rtdetr │ │ ├── include │ │ │ ├── dataloader.yml │ │ │ ├── dataloader_regnet.yml │ │ │ ├── optimizer.yml │ │ │ ├── optimizer_regnet.yml │ │ │ ├── rtdetr_dla34.yml │ │ │ ├── rtdetr_r50vd.yml │ │ │ └── rtdetr_regnet.yml │ │ ├── rtdetr_dla34_6x_coco.yml │ │ ├── rtdetr_r101vd_6x_coco.yml │ │ ├── rtdetr_r18vd_6x_coco.yml │ │ ├── rtdetr_r34vd_6x_coco.yml │ │ ├── rtdetr_r50vd_6x_coco.yml │ │ ├── rtdetr_r50vd_m_6x_coco.yml │ │ └── rtdetr_regnet_6x_coco.yml │ └── runtime.yml ├── requirements.txt ├── src │ ├── __init__.py │ ├── core │ │ ├── __init__.py │ │ ├── config.py │ │ ├── yaml_config.py │ │ └── yaml_utils.py │ ├── data │ │ ├── __init__.py │ │ ├── cifar10 │ │ │ └── __init__.py │ │ ├── coco │ │ │ ├── __init__.py │ │ │ ├── coco_dataset.py │ │ │ ├── coco_eval.py │ │ │ └── coco_utils.py │ │ ├── dataloader.py │ │ ├── functional.py │ │ └── transforms.py │ ├── misc │ │ ├── __init__.py │ │ ├── dist.py │ │ ├── logger.py │ │ └── visualizer.py │ ├── nn │ │ ├── __init__.py │ │ ├── arch │ │ │ ├── __init__.py │ │ │ └── classification.py │ │ ├── backbone │ │ │ ├── __init__.py │ │ │ ├── common.py │ │ │ ├── dla.py │ │ │ ├── presnet.py │ │ │ ├── regnet.py │ │ │ ├── test_resnet.py │ │ │ └── utils.py │ │ └── criterion │ │ │ ├── __init__.py │ │ │ └── utils.py │ ├── optim │ │ ├── __init__.py │ │ ├── amp.py │ │ ├── ema.py │ │ └── optim.py │ ├── solver │ │ ├── __init__.py │ │ ├── det_engine.py │ │ ├── det_solver.py │ │ └── solver.py │ └── zoo │ │ ├── __init__.py │ │ └── rtdetr │ │ ├── __init__.py │ │ ├── box_ops.py │ │ ├── denoising.py │ │ ├── hybrid_encoder.py │ │ ├── matcher.py │ │ ├── rtdetr.py │ │ ├── rtdetr_criterion.py │ │ ├── rtdetr_decoder.py │ │ ├── rtdetr_postprocessor.py │ │ └── utils.py └── tools │ ├── README.md │ ├── export_onnx.py │ ├── infer.py │ └── train.py ├── rtdetrv2_paddle └── readme.md └── rtdetrv2_pytorch ├── Dockerfile ├── README.md ├── configs ├── dataset │ ├── coco_detection.yml │ └── voc_detection.yml ├── rtdetr │ ├── include │ │ ├── dataloader.yml │ │ ├── optimizer.yml │ │ └── rtdetr_r50vd.yml │ ├── readme.md │ ├── rtdetr_r101vd_6x_coco.yml │ ├── rtdetr_r18vd_6x_coco.yml │ ├── rtdetr_r34vd_6x_coco.yml │ ├── rtdetr_r50vd_6x_coco.yml │ └── rtdetr_r50vd_m_6x_coco.yml ├── rtdetrv2 │ ├── include │ │ ├── dataloader.yml │ │ ├── optimizer.yml │ │ └── rtdetrv2_r50vd.yml │ ├── rtdetrv2_hgnetv2_h_6x_coco.yml │ ├── rtdetrv2_hgnetv2_l_6x_coco.yml │ ├── rtdetrv2_hgnetv2_x_6x_coco.yml │ ├── rtdetrv2_r101vd_6x_coco.yml │ ├── rtdetrv2_r18vd_120e_coco.yml │ ├── rtdetrv2_r18vd_120e_voc.yml │ ├── rtdetrv2_r18vd_dsp_3x_coco.yml │ ├── rtdetrv2_r18vd_sp1_120e_coco.yml │ ├── rtdetrv2_r18vd_sp2_120e_coco.yml │ ├── rtdetrv2_r18vd_sp3_120e_coco.yml │ ├── rtdetrv2_r34vd_120e_coco.yml │ ├── rtdetrv2_r34vd_dsp_1x_coco.yml │ ├── rtdetrv2_r50vd_6x_coco.yml │ ├── rtdetrv2_r50vd_dsp_1x_coco.yml │ ├── rtdetrv2_r50vd_m_7x_coco.yml │ └── rtdetrv2_r50vd_m_dsp_3x_coco.yml └── runtime.yml ├── dataset └── readme.md ├── docker-compose.yml ├── references └── deploy │ ├── readme.md │ ├── rtdetrv2_onnxruntime.py │ ├── rtdetrv2_openvino.py │ ├── rtdetrv2_tensorrt.py │ └── rtdetrv2_torch.py ├── requirements.txt ├── src ├── __init__.py ├── core │ ├── __init__.py │ ├── _config.py │ ├── workspace.py │ ├── yaml_config.py │ └── yaml_utils.py ├── data │ ├── __init__.py │ ├── _misc.py │ ├── dataloader.py │ ├── dataset │ │ ├── __init__.py │ │ ├── _dataset.py │ │ ├── cifar_dataset.py │ │ ├── coco_dataset.py │ │ ├── coco_eval.py │ │ ├── coco_utils.py │ │ ├── voc_detection.py │ │ └── voc_eval.py │ └── transforms │ │ ├── __init__.py │ │ ├── _transforms.py │ │ ├── container.py │ │ ├── functional.py │ │ ├── mosaic.py │ │ └── presets.py ├── misc │ ├── __init__.py │ ├── box_ops.py │ ├── dist_utils.py │ ├── lazy_loader.py │ ├── logger.py │ ├── profiler_utils.py │ └── visualizer.py ├── nn │ ├── __init__.py │ ├── arch │ │ ├── __init__.py │ │ ├── classification.py │ │ └── yolo.py │ ├── backbone │ │ ├── __init__.py │ │ ├── common.py │ │ ├── csp_darknet.py │ │ ├── csp_resnet.py │ │ ├── hgnetv2.py │ │ ├── presnet.py │ │ ├── test_resnet.py │ │ ├── timm_model.py │ │ ├── torchvision_model.py │ │ └── utils.py │ ├── criterion │ │ ├── __init__.py │ │ └── det_criterion.py │ └── postprocessor │ │ ├── __init__.py │ │ ├── box_revert.py │ │ ├── detr_postprocessor.py │ │ └── nms_postprocessor.py ├── optim │ ├── __init__.py │ ├── amp.py │ ├── ema.py │ ├── optim.py │ └── warmup.py ├── solver │ ├── __init__.py │ ├── _solver.py │ ├── clas_engine.py │ ├── clas_solver.py │ ├── det_engine.py │ └── det_solver.py └── zoo │ ├── __init__.py │ └── rtdetr │ ├── __init__.py │ ├── box_ops.py │ ├── conver_params.py │ ├── denoising.py │ ├── hybrid_encoder.py │ ├── matcher.py │ ├── rtdetr.py │ ├── rtdetr_criterion.py │ ├── rtdetr_decoder.py │ ├── rtdetr_postprocessor.py │ ├── rtdetrv2_criterion.py │ ├── rtdetrv2_decoder.py │ └── utils.py └── tools ├── README.md ├── export_onnx.py ├── export_trt.py ├── run_profile.py └── train.py /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | # These are supported funding model platforms 2 | 3 | github: # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2] 4 | patreon: # Replace with a single Patreon username 5 | open_collective: # Replace with a single Open Collective username 6 | ko_fi: # Replace with a single Ko-fi username 7 | tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel 8 | community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry 9 | liberapay: # Replace with a single Liberapay username 10 | issuehunt: # Replace with a single IssueHunt username 11 | otechie: # Replace with a single Otechie username 12 | lfx_crowdfunding: # Replace with a single LFX Crowdfunding project-name e.g., cloud-foundry 13 | custom: ['https://github.com/lyuwenyu/cvperception/assets/17582080/2b4bfcd5-5c0f-45fd-badf-3f6e5b0249ac']# Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2'] 14 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: '' 5 | labels: '' 6 | assignees: lyuwenyu 7 | 8 | --- 9 | 10 | **Star RTDETR** 11 | 请先在RTDETR主页点击**star**以支持本项目 12 | Star RTDETR to help more people discover this project. 13 | 14 | --- 15 | 16 | **Describe the bug** 17 | A clear and concise description of what the bug is. 18 | If applicable, add screenshots to help explain your problem. 19 | 20 | **To Reproduce** 21 | Steps to reproduce the behavior. 22 | -------------------------------------------------------------------------------- /benchmark/README.md: -------------------------------------------------------------------------------- 1 | # 论文测速使用的部分代码和工具 2 | 3 | 4 | ## 测试YOLO系列的速度 [in progress] 5 | 以[yolov8](https://github.com/ultralytics/ultralytics)为例 6 | 7 |
8 | 1. 转onnx 9 | 10 | 执行`yolov8_onnx.py`中的`export_onnx`函数,新增代码主要涉及输出格式的转换 11 |
12 | 13 | 14 |
15 | 2. 插入nms 16 | 17 | 使用`utils.py`中的`yolo_insert_nms`函数,导出onnx模型后使用[Netron](https://netron.app/)查看结构. image 18 |
19 | 20 | 21 |
22 | 3. 转tensorrt 23 | 24 | 可以使用`trtexec.md`中的的脚本转换,或者使用`utils.py`中的Python代码转换 25 | ```bash 26 | # trtexec -h 27 | trtexec --onnx=./yolov8l_w_nms.onnx --saveEngine=yolov8l_w_nms.engine --buildOnly --fp16 28 | ``` 29 |
30 | 31 | 32 |
33 | 4. trtexec测速 34 | 35 | 可以使用`trtexec.md`中的的脚本转换,去掉`--buildOnly`参数 36 | 37 |
38 | 39 | 40 | 41 |
42 | 5. profile分析(可选) 43 | 44 | 在4的基础之上加以下命令 45 | ```bash 46 | nsys profile --force-overwrite=true -t 'nvtx,cuda,osrt,cudnn' -c cudaProfilerApi -o yolov8l_w_nms 47 | ``` 48 | 可以使用nsys可视化分析 49 | image 50 | 51 |
52 | 53 | 54 |
55 | 6. Python测速或者部署 56 | 57 | 在Coco val数据集上测模型的平均速度使用`trtinfer.py`中的代码推理 58 | 59 |
60 | -------------------------------------------------------------------------------- /benchmark/trtexec.md: -------------------------------------------------------------------------------- 1 | 2 | ```bash 3 | # build tensorrt engine 4 | trtexec --onnx=./yolov8l_w_nms.onnx --saveEngine=yolov8l_w_nms.engine --buildOnly --fp16 5 | 6 | # using dynamic shapes 7 | # --explicitBatch --minShapes=image:1x3x640x640 --optShapes=image:8x3x640x640 --maxShapes=image:16x3x640x640 --shapes=image:8x3x640x640 8 | 9 | # timeline 10 | nsys profile --force-overwrite=true -t 'nvtx,cuda,osrt,cudnn' -c cudaProfilerApi -o yolov8l_w_nms trtexec --loadEngine=./yolov8l_w_nms.engine --fp16 --avgRuns=10 --loadInputs='image:input_tensor.bin' 11 | 12 | # https://forums.developer.nvidia.com/t/about-loadinputs-in-trtexec/218880 13 | ``` 14 | -------------------------------------------------------------------------------- /benchmark/utils.py: -------------------------------------------------------------------------------- 1 | '''by lyuwenyu 2 | ''' 3 | 4 | import time 5 | import contextlib 6 | import numpy as np 7 | from PIL import Image 8 | from collections import OrderedDict 9 | 10 | import onnx 11 | import torch 12 | import onnx_graphsurgeon 13 | 14 | 15 | def to_binary_data(path, size=(640, 640), output_name='input_tensor.bin'): 16 | '''--loadInputs='image:input_tensor.bin' 17 | ''' 18 | im = Image.open(path).resize(size) 19 | data = np.asarray(im, dtype=np.float32).transpose(2, 0, 1)[None] / 255. 20 | data.tofile(output_name) 21 | 22 | 23 | def yolo_insert_nms(path, score_threshold=0.01, iou_threshold=0.7, max_output_boxes=300, simplify=False): 24 | ''' 25 | http://www.xavierdupre.fr/app/onnxcustom/helpsphinx/api/onnxops/onnx__EfficientNMS_TRT.html 26 | https://huggingface.co/spaces/muttalib1326/Punjabi_Character_Detection/blob/3dd1e17054c64e5f6b2254278f96cfa2bf418cd4/utils/add_nms.py 27 | ''' 28 | onnx_model = onnx.load(path) 29 | 30 | if simplify: 31 | from onnxsim import simplify 32 | onnx_model, _ = simplify(onnx_model, overwrite_input_shapes={'image': [1, 3, 640, 640]}) 33 | 34 | graph = onnx_graphsurgeon.import_onnx(onnx_model) 35 | graph.toposort() 36 | graph.fold_constants() 37 | graph.cleanup() 38 | 39 | topk = max_output_boxes 40 | attrs = OrderedDict(plugin_version='1', 41 | background_class=-1, 42 | max_output_boxes=topk, 43 | score_threshold=score_threshold, 44 | iou_threshold=iou_threshold, 45 | score_activation=False, 46 | box_coding=0, ) 47 | 48 | outputs = [onnx_graphsurgeon.Variable('num_dets', np.int32, [-1, 1]), 49 | onnx_graphsurgeon.Variable('det_boxes', np.float32, [-1, topk, 4]), 50 | onnx_graphsurgeon.Variable('det_scores', np.float32, [-1, topk]), 51 | onnx_graphsurgeon.Variable('det_classes', np.int32, [-1, topk])] 52 | 53 | graph.layer(op='EfficientNMS_TRT', 54 | name="batched_nms", 55 | inputs=[graph.outputs[0], 56 | graph.outputs[1]], 57 | outputs=outputs, 58 | attrs=attrs, ) 59 | 60 | graph.outputs = outputs 61 | graph.cleanup().toposort() 62 | 63 | onnx.save(onnx_graphsurgeon.export_onnx(graph), f'yolo_w_nms.onnx') 64 | 65 | 66 | class TimeProfiler(contextlib.ContextDecorator): 67 | def __init__(self, ): 68 | self.total = 0 69 | 70 | def __enter__(self, ): 71 | self.start = self.time() 72 | return self 73 | 74 | def __exit__(self, type, value, traceback): 75 | self.total += self.time() - self.start 76 | 77 | def reset(self, ): 78 | self.total = 0 79 | 80 | def time(self, ): 81 | if torch.cuda.is_available(): 82 | torch.cuda.synchronize() 83 | return time.time() 84 | -------------------------------------------------------------------------------- /benchmark/yolov8_onnx.py: -------------------------------------------------------------------------------- 1 | '''by lyuwenyu 2 | ''' 3 | 4 | import torch 5 | import torchvision 6 | 7 | import numpy as np 8 | import onnxruntime as ort 9 | 10 | from utils import yolo_insert_nms 11 | 12 | class YOLOv8(torch.nn.Module): 13 | def __init__(self, name) -> None: 14 | super().__init__() 15 | from ultralytics import YOLO 16 | # Load a model 17 | # build a new model from scratch 18 | # model = YOLO(f'{name}.yaml') 19 | 20 | # load a pretrained model (recommended for training) 21 | model = YOLO(f'{name}.pt') 22 | self.model = model.model 23 | 24 | def forward(self, x): 25 | '''https://github.com/ultralytics/ultralytics/blob/main/ultralytics/nn/tasks.py#L216 26 | ''' 27 | pred: torch.Tensor = self.model(x)[0] # n 84 8400, 28 | pred = pred.permute(0, 2, 1) 29 | boxes, scores = pred.split([4, 80], dim=-1) 30 | boxes = torchvision.ops.box_convert(boxes, in_fmt='cxcywh', out_fmt='xyxy') 31 | 32 | return boxes, scores 33 | 34 | 35 | 36 | def export_onnx(name='yolov8n'): 37 | '''export onnx 38 | ''' 39 | m = YOLOv8(name) 40 | 41 | x = torch.rand(1, 3, 640, 640) 42 | dynamic_axes = { 43 | 'image': {0: '-1'} 44 | } 45 | torch.onnx.export(m, x, f'{name}.onnx', 46 | input_names=['image'], 47 | output_names=['boxes', 'scores'], 48 | opset_version=13, 49 | dynamic_axes=dynamic_axes) 50 | 51 | data = np.random.rand(1, 3, 640, 640).astype(np.float32) 52 | sess = ort.InferenceSession(f'{name}.onnx') 53 | _ = sess.run(output_names=None, input_feed={'image': data}) 54 | 55 | 56 | if __name__ == '__main__': 57 | 58 | import argparse 59 | parser = argparse.ArgumentParser() 60 | parser.add_argument('--name', type=str, default='yolov8l') 61 | parser.add_argument('--score_threshold', type=float, default=0.001) 62 | parser.add_argument('--iou_threshold', type=float, default=0.7) 63 | parser.add_argument('--max_output_boxes', type=int, default=300) 64 | args = parser.parse_args() 65 | 66 | export_onnx(name=args.name) 67 | 68 | yolo_insert_nms(path=f'{args.name}.onnx', 69 | score_threshold=args.score_threshold, 70 | iou_threshold=args.iou_threshold, 71 | max_output_boxes=args.max_output_boxes, ) 72 | 73 | -------------------------------------------------------------------------------- /rtdetr_paddle/configs/datasets/coco_detection.yml: -------------------------------------------------------------------------------- 1 | metric: COCO 2 | num_classes: 80 3 | 4 | TrainDataset: 5 | name: COCODataSet 6 | image_dir: train2017 7 | anno_path: annotations/instances_train2017.json 8 | dataset_dir: dataset/coco 9 | data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd'] 10 | 11 | EvalDataset: 12 | name: COCODataSet 13 | image_dir: val2017 14 | anno_path: annotations/instances_val2017.json 15 | dataset_dir: dataset/coco 16 | allow_empty: true 17 | 18 | TestDataset: 19 | name: ImageFolder 20 | anno_path: annotations/instances_val2017.json # also support txt (like VOC's label_list.txt) 21 | dataset_dir: dataset/coco # if set, anno_path will be 'dataset_dir/anno_path' 22 | -------------------------------------------------------------------------------- /rtdetr_paddle/configs/datasets/voc.yml: -------------------------------------------------------------------------------- 1 | metric: VOC 2 | map_type: 11point 3 | num_classes: 20 4 | 5 | TrainDataset: 6 | name: VOCDataSet 7 | dataset_dir: dataset/voc 8 | anno_path: trainval.txt 9 | label_list: label_list.txt 10 | data_fields: ['image', 'gt_bbox', 'gt_class', 'difficult'] 11 | 12 | EvalDataset: 13 | name: VOCDataSet 14 | dataset_dir: dataset/voc 15 | anno_path: test.txt 16 | label_list: label_list.txt 17 | data_fields: ['image', 'gt_bbox', 'gt_class', 'difficult'] 18 | 19 | TestDataset: 20 | name: ImageFolder 21 | anno_path: dataset/voc/label_list.txt 22 | -------------------------------------------------------------------------------- /rtdetr_paddle/configs/rtdetr/_base_/optimizer_6x.yml: -------------------------------------------------------------------------------- 1 | epoch: 72 2 | 3 | LearningRate: 4 | base_lr: 0.0001 5 | schedulers: 6 | - !PiecewiseDecay 7 | gamma: 1.0 8 | milestones: [100] 9 | use_warmup: true 10 | - !LinearWarmup 11 | start_factor: 0.001 12 | steps: 2000 13 | 14 | OptimizerBuilder: 15 | clip_grad_by_norm: 0.1 16 | regularizer: false 17 | optimizer: 18 | type: AdamW 19 | weight_decay: 0.0001 20 | -------------------------------------------------------------------------------- /rtdetr_paddle/configs/rtdetr/_base_/rtdetr_r50vd.yml: -------------------------------------------------------------------------------- 1 | architecture: DETR 2 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_ssld_v2_pretrained.pdparams 3 | norm_type: sync_bn 4 | use_ema: True 5 | ema_decay: 0.9999 6 | ema_decay_type: "exponential" 7 | ema_filter_no_grad: True 8 | hidden_dim: 256 9 | use_focal_loss: True 10 | eval_size: [640, 640] # h, w 11 | 12 | 13 | DETR: 14 | backbone: ResNet 15 | neck: HybridEncoder 16 | transformer: RTDETRTransformer 17 | detr_head: DINOHead 18 | post_process: DETRPostProcess 19 | 20 | ResNet: 21 | # index 0 stands for res2 22 | depth: 50 23 | variant: d 24 | norm_type: bn 25 | freeze_at: 0 26 | return_idx: [1, 2, 3] 27 | lr_mult_list: [0.1, 0.1, 0.1, 0.1] 28 | num_stages: 4 29 | freeze_stem_only: True 30 | 31 | HybridEncoder: 32 | hidden_dim: 256 33 | use_encoder_idx: [2] 34 | num_encoder_layers: 1 35 | encoder_layer: 36 | name: TransformerLayer 37 | d_model: 256 38 | nhead: 8 39 | dim_feedforward: 1024 40 | dropout: 0. 41 | activation: 'gelu' 42 | expansion: 1.0 43 | 44 | 45 | RTDETRTransformer: 46 | num_queries: 300 47 | position_embed_type: sine 48 | feat_strides: [8, 16, 32] 49 | num_levels: 3 50 | nhead: 8 51 | num_decoder_layers: 6 52 | dim_feedforward: 1024 53 | dropout: 0.0 54 | activation: relu 55 | num_denoising: 100 56 | label_noise_ratio: 0.5 57 | box_noise_scale: 1.0 58 | learnt_init_query: False 59 | 60 | DINOHead: 61 | loss: 62 | name: DINOLoss 63 | loss_coeff: {class: 1, bbox: 5, giou: 2} 64 | aux_loss: True 65 | use_vfl: True 66 | matcher: 67 | name: HungarianMatcher 68 | matcher_coeff: {class: 2, bbox: 5, giou: 2} 69 | 70 | DETRPostProcess: 71 | num_top_queries: 300 72 | -------------------------------------------------------------------------------- /rtdetr_paddle/configs/rtdetr/_base_/rtdetr_reader.yml: -------------------------------------------------------------------------------- 1 | worker_num: 4 2 | TrainReader: 3 | sample_transforms: 4 | - Decode: {} 5 | - RandomDistort: {prob: 0.8} 6 | - RandomExpand: {fill_value: [123.675, 116.28, 103.53]} 7 | - RandomCrop: {prob: 0.8} 8 | - RandomFlip: {} 9 | batch_transforms: 10 | - BatchRandomResize: {target_size: [480, 512, 544, 576, 608, 640, 640, 640, 672, 704, 736, 768, 800], random_size: True, random_interp: True, keep_ratio: False} 11 | - NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none} 12 | - NormalizeBox: {} 13 | - BboxXYXY2XYWH: {} 14 | - Permute: {} 15 | batch_size: 4 16 | shuffle: true 17 | drop_last: true 18 | collate_batch: false 19 | use_shared_memory: false 20 | 21 | 22 | EvalReader: 23 | sample_transforms: 24 | - Decode: {} 25 | - Resize: {target_size: [640, 640], keep_ratio: False, interp: 2} # target_size: (h, w) 26 | - NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none} 27 | - Permute: {} 28 | batch_size: 4 29 | shuffle: false 30 | drop_last: false 31 | 32 | 33 | TestReader: 34 | inputs_def: 35 | image_shape: [3, 640, 640] 36 | sample_transforms: 37 | - Decode: {} 38 | - Resize: {target_size: [640, 640], keep_ratio: False, interp: 2} 39 | - NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none} 40 | - Permute: {} 41 | batch_size: 1 42 | shuffle: false 43 | drop_last: false 44 | -------------------------------------------------------------------------------- /rtdetr_paddle/configs/rtdetr/rtdetr_hgnetv2_l_6x_coco.yml: -------------------------------------------------------------------------------- 1 | _BASE_: [ 2 | '../datasets/coco_detection.yml', 3 | '../runtime.yml', 4 | '_base_/optimizer_6x.yml', 5 | '_base_/rtdetr_r50vd.yml', 6 | '_base_/rtdetr_reader.yml', 7 | ] 8 | 9 | weights: output/rtdetr_hgnetv2_l_6x_coco/model_final 10 | pretrain_weights: https://bj.bcebos.com/v1/paddledet/models/pretrained/PPHGNetV2_L_ssld_pretrained.pdparams 11 | find_unused_parameters: True 12 | log_iter: 200 13 | 14 | 15 | DETR: 16 | backbone: PPHGNetV2 17 | 18 | PPHGNetV2: 19 | arch: 'L' 20 | return_idx: [1, 2, 3] 21 | freeze_stem_only: True 22 | freeze_at: 0 23 | freeze_norm: True 24 | lr_mult_list: [0., 0.05, 0.05, 0.05, 0.05] 25 | -------------------------------------------------------------------------------- /rtdetr_paddle/configs/rtdetr/rtdetr_hgnetv2_x_6x_coco.yml: -------------------------------------------------------------------------------- 1 | _BASE_: [ 2 | '../datasets/coco_detection.yml', 3 | '../runtime.yml', 4 | '_base_/optimizer_6x.yml', 5 | '_base_/rtdetr_r50vd.yml', 6 | '_base_/rtdetr_reader.yml', 7 | ] 8 | 9 | weights: output/rtdetr_hgnetv2_l_6x_coco/model_final 10 | pretrain_weights: https://bj.bcebos.com/v1/paddledet/models/pretrained/PPHGNetV2_X_ssld_pretrained.pdparams 11 | find_unused_parameters: True 12 | log_iter: 200 13 | 14 | 15 | 16 | DETR: 17 | backbone: PPHGNetV2 18 | 19 | 20 | PPHGNetV2: 21 | arch: 'X' 22 | return_idx: [1, 2, 3] 23 | freeze_stem_only: True 24 | freeze_at: 0 25 | freeze_norm: True 26 | lr_mult_list: [0., 0.01, 0.01, 0.01, 0.01] 27 | 28 | 29 | HybridEncoder: 30 | hidden_dim: 384 31 | use_encoder_idx: [2] 32 | num_encoder_layers: 1 33 | encoder_layer: 34 | name: TransformerLayer 35 | d_model: 384 36 | nhead: 8 37 | dim_feedforward: 2048 38 | dropout: 0. 39 | activation: 'gelu' 40 | expansion: 1.0 41 | -------------------------------------------------------------------------------- /rtdetr_paddle/configs/rtdetr/rtdetr_r101vd_6x_coco.yml: -------------------------------------------------------------------------------- 1 | _BASE_: [ 2 | '../datasets/coco_detection.yml', 3 | '../runtime.yml', 4 | '_base_/optimizer_6x.yml', 5 | '_base_/rtdetr_r50vd.yml', 6 | '_base_/rtdetr_reader.yml', 7 | ] 8 | 9 | weights: output/rtdetr_r101vd_6x_coco/model_final 10 | find_unused_parameters: True 11 | log_iter: 200 12 | 13 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet101_vd_ssld_pretrained.pdparams 14 | 15 | ResNet: 16 | # index 0 stands for res2 17 | depth: 101 18 | variant: d 19 | norm_type: bn 20 | freeze_at: 0 21 | return_idx: [1, 2, 3] 22 | lr_mult_list: [0.01, 0.01, 0.01, 0.01] 23 | num_stages: 4 24 | freeze_stem_only: True 25 | 26 | HybridEncoder: 27 | hidden_dim: 384 28 | use_encoder_idx: [2] 29 | num_encoder_layers: 1 30 | encoder_layer: 31 | name: TransformerLayer 32 | d_model: 384 33 | nhead: 8 34 | dim_feedforward: 2048 35 | dropout: 0. 36 | activation: 'gelu' 37 | expansion: 1.0 38 | -------------------------------------------------------------------------------- /rtdetr_paddle/configs/rtdetr/rtdetr_r18vd_6x_coco.yml: -------------------------------------------------------------------------------- 1 | _BASE_: [ 2 | '../datasets/coco_detection.yml', 3 | '../runtime.yml', 4 | '_base_/optimizer_6x.yml', 5 | '_base_/rtdetr_r50vd.yml', 6 | '_base_/rtdetr_reader.yml', 7 | ] 8 | 9 | weights: output/rtdetr_r18_6x_coco/model_final 10 | find_unused_parameters: True 11 | log_iter: 200 12 | 13 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet18_vd_pretrained.pdparams 14 | ResNet: 15 | depth: 18 16 | variant: d 17 | return_idx: [1, 2, 3] 18 | freeze_at: -1 19 | freeze_norm: false 20 | norm_decay: 0. 21 | 22 | HybridEncoder: 23 | hidden_dim: 256 24 | use_encoder_idx: [2] 25 | num_encoder_layers: 1 26 | encoder_layer: 27 | name: TransformerLayer 28 | d_model: 256 29 | nhead: 8 30 | dim_feedforward: 1024 31 | dropout: 0. 32 | activation: 'gelu' 33 | expansion: 0.5 34 | depth_mult: 1.0 35 | 36 | RTDETRTransformer: 37 | eval_idx: -1 38 | num_decoder_layers: 3 39 | -------------------------------------------------------------------------------- /rtdetr_paddle/configs/rtdetr/rtdetr_r34vd_6x_coco.yml: -------------------------------------------------------------------------------- 1 | _BASE_: [ 2 | '../datasets/coco_detection.yml', 3 | '../runtime.yml', 4 | '_base_/optimizer_6x.yml', 5 | '_base_/rtdetr_r50vd.yml', 6 | '_base_/rtdetr_reader.yml', 7 | ] 8 | 9 | weights: output/rtdetr_r34vd_6x_coco/model_final 10 | find_unused_parameters: True 11 | log_iter: 200 12 | 13 | pretrain_weights: https://bj.bcebos.com/v1/paddledet/models/pretrained/ResNet34_vd_pretrained.pdparams 14 | ResNet: 15 | depth: 34 16 | variant: d 17 | return_idx: [1, 2, 3] 18 | freeze_at: -1 19 | freeze_norm: false 20 | norm_decay: 0. 21 | 22 | HybridEncoder: 23 | hidden_dim: 256 24 | use_encoder_idx: [2] 25 | num_encoder_layers: 1 26 | encoder_layer: 27 | name: TransformerLayer 28 | d_model: 256 29 | nhead: 8 30 | dim_feedforward: 1024 31 | dropout: 0. 32 | activation: 'gelu' 33 | expansion: 0.5 34 | depth_mult: 1.0 35 | 36 | RTDETRTransformer: 37 | eval_idx: -1 38 | num_decoder_layers: 4 39 | -------------------------------------------------------------------------------- /rtdetr_paddle/configs/rtdetr/rtdetr_r50vd_6x_coco.yml: -------------------------------------------------------------------------------- 1 | _BASE_: [ 2 | '../datasets/coco_detection.yml', 3 | '../runtime.yml', 4 | '_base_/optimizer_6x.yml', 5 | '_base_/rtdetr_r50vd.yml', 6 | '_base_/rtdetr_reader.yml', 7 | ] 8 | 9 | weights: output/rtdetr_r50vd_6x_coco/model_final 10 | find_unused_parameters: True 11 | log_iter: 200 12 | -------------------------------------------------------------------------------- /rtdetr_paddle/configs/rtdetr/rtdetr_r50vd_m_6x_coco.yml: -------------------------------------------------------------------------------- 1 | _BASE_: [ 2 | '../datasets/coco_detection.yml', 3 | '../runtime.yml', 4 | '_base_/optimizer_6x.yml', 5 | '_base_/rtdetr_r50vd.yml', 6 | '_base_/rtdetr_reader.yml', 7 | ] 8 | 9 | weights: output/rtdetr_r50vd_m_6x_coco/model_final 10 | find_unused_parameters: True 11 | log_iter: 200 12 | 13 | HybridEncoder: 14 | hidden_dim: 256 15 | use_encoder_idx: [2] 16 | num_encoder_layers: 1 17 | encoder_layer: 18 | name: TransformerLayer 19 | d_model: 256 20 | nhead: 8 21 | dim_feedforward: 1024 22 | dropout: 0. 23 | activation: 'gelu' 24 | expansion: 0.5 25 | depth_mult: 1.0 26 | 27 | RTDETRTransformer: 28 | eval_idx: 2 # use 3th decoder layer to eval 29 | -------------------------------------------------------------------------------- /rtdetr_paddle/configs/runtime.yml: -------------------------------------------------------------------------------- 1 | use_gpu: true 2 | use_xpu: false 3 | use_mlu: false 4 | use_npu: false 5 | log_iter: 20 6 | save_dir: output 7 | snapshot_epoch: 1 8 | print_flops: false 9 | print_params: false 10 | 11 | # Exporting the model 12 | export: 13 | post_process: True # Whether post-processing is included in the network when export model. 14 | nms: True # Whether NMS is included in the network when export model. 15 | benchmark: False # It is used to testing model performance, if set `True`, post-process and NMS will not be exported. 16 | fuse_conv_bn: False 17 | -------------------------------------------------------------------------------- /rtdetr_paddle/dataset/coco/download_coco.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import sys 16 | import os.path as osp 17 | import logging 18 | # add python path of PaddleDetection to sys.path 19 | parent_path = osp.abspath(osp.join(__file__, *(['..'] * 3))) 20 | if parent_path not in sys.path: 21 | sys.path.append(parent_path) 22 | 23 | from ppdet.utils.download import download_dataset 24 | 25 | logging.basicConfig(level=logging.INFO) 26 | 27 | download_path = osp.split(osp.realpath(sys.argv[0]))[0] 28 | download_dataset(download_path, 'coco') 29 | -------------------------------------------------------------------------------- /rtdetr_paddle/dataset/voc/create_list.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import sys 16 | import os.path as osp 17 | import logging 18 | # add python path of PaddleDetection to sys.path 19 | parent_path = osp.abspath(osp.join(__file__, *(['..'] * 3))) 20 | if parent_path not in sys.path: 21 | sys.path.append(parent_path) 22 | 23 | from ppdet.utils.download import create_voc_list 24 | 25 | logging.basicConfig(level=logging.INFO) 26 | 27 | voc_path = osp.split(osp.realpath(sys.argv[0]))[0] 28 | create_voc_list(voc_path) 29 | -------------------------------------------------------------------------------- /rtdetr_paddle/dataset/voc/download_voc.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import sys 16 | import os.path as osp 17 | import logging 18 | # add python path of PaddleDetection to sys.path 19 | parent_path = osp.abspath(osp.join(__file__, *(['..'] * 3))) 20 | if parent_path not in sys.path: 21 | sys.path.append(parent_path) 22 | 23 | from ppdet.utils.download import download_dataset 24 | 25 | logging.basicConfig(level=logging.INFO) 26 | 27 | download_path = osp.split(osp.realpath(sys.argv[0]))[0] 28 | download_dataset(download_path, 'voc') 29 | -------------------------------------------------------------------------------- /rtdetr_paddle/dataset/voc/label_list.txt: -------------------------------------------------------------------------------- 1 | aeroplane 2 | bicycle 3 | bird 4 | boat 5 | bottle 6 | bus 7 | car 8 | cat 9 | chair 10 | cow 11 | diningtable 12 | dog 13 | horse 14 | motorbike 15 | person 16 | pottedplant 17 | sheep 18 | sofa 19 | train 20 | tvmonitor 21 | -------------------------------------------------------------------------------- /rtdetr_paddle/ppdet/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from . import (core, data, engine, modeling, optimizer, metrics, utils) 16 | 17 | 18 | try: 19 | from .version import full_version as __version__ 20 | from .version import commit as __git_commit__ 21 | except ImportError: 22 | import sys 23 | sys.stderr.write("Warning: import ppdet from source directory " \ 24 | "without installing, run 'python setup.py install' to " \ 25 | "install ppdet firstly\n") 26 | -------------------------------------------------------------------------------- /rtdetr_paddle/ppdet/core/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from . import config 16 | -------------------------------------------------------------------------------- /rtdetr_paddle/ppdet/core/config/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /rtdetr_paddle/ppdet/data/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from . import source 16 | from . import transform 17 | from . import reader 18 | 19 | from .source import * 20 | from .transform import * 21 | from .reader import * 22 | -------------------------------------------------------------------------------- /rtdetr_paddle/ppdet/data/shm_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import os 16 | 17 | SIZE_UNIT = ['K', 'M', 'G', 'T'] 18 | SHM_QUERY_CMD = 'df -h' 19 | SHM_KEY = 'shm' 20 | SHM_DEFAULT_MOUNT = '/dev/shm' 21 | 22 | # [ shared memory size check ] 23 | # In detection models, image/target data occupies a lot of memory, and 24 | # will occupy lots of shared memory in multi-process DataLoader, we use 25 | # following code to get shared memory size and perform a size check to 26 | # disable shared memory use if shared memory size is not enough. 27 | # Shared memory getting process as follows: 28 | # 1. use `df -h` get all mount info 29 | # 2. pick up spaces whose mount info contains 'shm' 30 | # 3. if 'shm' space number is only 1, return its size 31 | # 4. if there are multiple 'shm' space, try to find the default mount 32 | # directory '/dev/shm' is Linux-like system, otherwise return the 33 | # biggest space size. 34 | 35 | 36 | def _parse_size_in_M(size_str): 37 | if size_str[-1] == 'B': 38 | num, unit = size_str[:-2], size_str[-2] 39 | else: 40 | num, unit = size_str[:-1], size_str[-1] 41 | assert unit in SIZE_UNIT, \ 42 | "unknown shm size unit {}".format(unit) 43 | return float(num) * \ 44 | (1024 ** (SIZE_UNIT.index(unit) - 1)) 45 | 46 | 47 | def _get_shared_memory_size_in_M(): 48 | try: 49 | df_infos = os.popen(SHM_QUERY_CMD).readlines() 50 | except: 51 | return None 52 | else: 53 | shm_infos = [] 54 | for df_info in df_infos: 55 | info = df_info.strip() 56 | if info.find(SHM_KEY) >= 0: 57 | shm_infos.append(info.split()) 58 | 59 | if len(shm_infos) == 0: 60 | return None 61 | elif len(shm_infos) == 1: 62 | return _parse_size_in_M(shm_infos[0][3]) 63 | else: 64 | default_mount_infos = [ 65 | si for si in shm_infos if si[-1] == SHM_DEFAULT_MOUNT 66 | ] 67 | if default_mount_infos: 68 | return _parse_size_in_M(default_mount_infos[0][3]) 69 | else: 70 | return max([_parse_size_in_M(si[3]) for si in shm_infos]) 71 | -------------------------------------------------------------------------------- /rtdetr_paddle/ppdet/data/source/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .coco import * 16 | from .voc import * 17 | from .category import * 18 | from .dataset import ImageFolder 19 | -------------------------------------------------------------------------------- /rtdetr_paddle/ppdet/data/transform/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from . import operators 16 | from . import batch_operators 17 | 18 | 19 | from .operators import * 20 | from .batch_operators import * 21 | 22 | 23 | __all__ = [] 24 | __all__ += registered_ops 25 | 26 | -------------------------------------------------------------------------------- /rtdetr_paddle/ppdet/data/utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import numbers 16 | import numpy as np 17 | 18 | try: 19 | from collections.abc import Sequence, Mapping 20 | except: 21 | from collections import Sequence, Mapping 22 | 23 | 24 | def default_collate_fn(batch): 25 | """ 26 | Default batch collating function for :code:`paddle.io.DataLoader`, 27 | get input data as a list of sample datas, each element in list 28 | if the data of a sample, and sample data should composed of list, 29 | dictionary, string, number, numpy array, this 30 | function will parse input data recursively and stack number, 31 | numpy array and paddle.Tensor datas as batch datas. e.g. for 32 | following input data: 33 | [{'image': np.array(shape=[3, 224, 224]), 'label': 1}, 34 | {'image': np.array(shape=[3, 224, 224]), 'label': 3}, 35 | {'image': np.array(shape=[3, 224, 224]), 'label': 4}, 36 | {'image': np.array(shape=[3, 224, 224]), 'label': 5},] 37 | 38 | 39 | This default collate function zipped each number and numpy array 40 | field together and stack each field as the batch field as follows: 41 | {'image': np.array(shape=[4, 3, 224, 224]), 'label': np.array([1, 3, 4, 5])} 42 | Args: 43 | batch(list of sample data): batch should be a list of sample data. 44 | 45 | Returns: 46 | Batched data: batched each number, numpy array and paddle.Tensor 47 | in input data. 48 | """ 49 | sample = batch[0] 50 | if isinstance(sample, np.ndarray): 51 | batch = np.stack(batch, axis=0) 52 | return batch 53 | elif isinstance(sample, numbers.Number): 54 | batch = np.array(batch) 55 | return batch 56 | elif isinstance(sample, (str, bytes)): 57 | return batch 58 | elif isinstance(sample, Mapping): 59 | return { 60 | key: default_collate_fn([d[key] for d in batch]) 61 | for key in sample 62 | } 63 | elif isinstance(sample, Sequence): 64 | sample_fields_num = len(sample) 65 | if not all(len(sample) == sample_fields_num for sample in iter(batch)): 66 | raise RuntimeError( 67 | "fileds number not same among samples in a batch") 68 | return [default_collate_fn(fields) for fields in zip(*batch)] 69 | 70 | raise TypeError("batch data con only contains: tensor, numpy.ndarray, " 71 | "dict, list, number, but got {}".format(type(sample))) 72 | -------------------------------------------------------------------------------- /rtdetr_paddle/ppdet/engine/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from . import trainer 16 | from .trainer import * 17 | 18 | from . import callbacks 19 | from .callbacks import * 20 | 21 | from . import env 22 | from .env import * 23 | 24 | __all__ = trainer.__all__ \ 25 | + callbacks.__all__ \ 26 | + env.__all__ 27 | -------------------------------------------------------------------------------- /rtdetr_paddle/ppdet/engine/env.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from __future__ import absolute_import 16 | from __future__ import division 17 | from __future__ import print_function 18 | 19 | import os 20 | import random 21 | import numpy as np 22 | 23 | import paddle 24 | from paddle.distributed import fleet 25 | 26 | __all__ = ['init_parallel_env', 'set_random_seed', 'init_fleet_env'] 27 | 28 | 29 | def init_fleet_env(find_unused_parameters=False): 30 | strategy = fleet.DistributedStrategy() 31 | strategy.find_unused_parameters = find_unused_parameters 32 | fleet.init(is_collective=True, strategy=strategy) 33 | 34 | 35 | def init_parallel_env(): 36 | env = os.environ 37 | dist = 'PADDLE_TRAINER_ID' in env and 'PADDLE_TRAINERS_NUM' in env 38 | if dist: 39 | trainer_id = int(env['PADDLE_TRAINER_ID']) 40 | local_seed = (99 + trainer_id) 41 | random.seed(local_seed) 42 | np.random.seed(local_seed) 43 | 44 | paddle.distributed.init_parallel_env() 45 | 46 | 47 | def set_random_seed(seed): 48 | paddle.seed(seed) 49 | random.seed(seed) 50 | np.random.seed(seed) 51 | -------------------------------------------------------------------------------- /rtdetr_paddle/ppdet/metrics/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from . import metrics 16 | 17 | from .metrics import * 18 | from .pose3d_metrics import * 19 | 20 | from . import mot_metrics 21 | from .mot_metrics import * 22 | __all__ = metrics.__all__ + mot_metrics.__all__ 23 | 24 | from . import mcmot_metrics 25 | from .mcmot_metrics import * 26 | __all__ = metrics.__all__ + mcmot_metrics.__all__ -------------------------------------------------------------------------------- /rtdetr_paddle/ppdet/modeling/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import warnings 16 | warnings.filterwarnings( 17 | action='ignore', category=DeprecationWarning, module='ops') 18 | 19 | 20 | from .ops import * 21 | from .backbones import * 22 | from .heads import * 23 | from .losses import * 24 | from .architectures import * 25 | from .post_process import * 26 | from .layers import * 27 | from .transformers import * 28 | -------------------------------------------------------------------------------- /rtdetr_paddle/ppdet/modeling/architectures/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .meta_arch import * 16 | from .detr import * 17 | -------------------------------------------------------------------------------- /rtdetr_paddle/ppdet/modeling/backbones/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .resnet import * 16 | from .darknet import * 17 | from .mobilenet_v1 import * 18 | from .mobilenet_v3 import * 19 | from .shufflenet_v2 import * 20 | from .swin_transformer import * 21 | from .lcnet import * 22 | from .cspresnet import * 23 | from .csp_darknet import * 24 | from .convnext import * 25 | from .vision_transformer import * 26 | from .mobileone import * 27 | from .trans_encoder import * 28 | from .focalnet import * 29 | from .vit_mae import * 30 | from .hgnet_v2 import * 31 | -------------------------------------------------------------------------------- /rtdetr_paddle/ppdet/modeling/backbones/name_adapter.py: -------------------------------------------------------------------------------- 1 | class NameAdapter(object): 2 | """Fix the backbones variable names for pretrained weight""" 3 | 4 | def __init__(self, model): 5 | super(NameAdapter, self).__init__() 6 | self.model = model 7 | 8 | @property 9 | def model_type(self): 10 | return getattr(self.model, '_model_type', '') 11 | 12 | @property 13 | def variant(self): 14 | return getattr(self.model, 'variant', '') 15 | 16 | def fix_conv_norm_name(self, name): 17 | if name == "conv1": 18 | bn_name = "bn_" + name 19 | else: 20 | bn_name = "bn" + name[3:] 21 | # the naming rule is same as pretrained weight 22 | if self.model_type == 'SEResNeXt': 23 | bn_name = name + "_bn" 24 | return bn_name 25 | 26 | def fix_shortcut_name(self, name): 27 | if self.model_type == 'SEResNeXt': 28 | name = 'conv' + name + '_prj' 29 | return name 30 | 31 | def fix_bottleneck_name(self, name): 32 | if self.model_type == 'SEResNeXt': 33 | conv_name1 = 'conv' + name + '_x1' 34 | conv_name2 = 'conv' + name + '_x2' 35 | conv_name3 = 'conv' + name + '_x3' 36 | shortcut_name = name 37 | else: 38 | conv_name1 = name + "_branch2a" 39 | conv_name2 = name + "_branch2b" 40 | conv_name3 = name + "_branch2c" 41 | shortcut_name = name + "_branch1" 42 | return conv_name1, conv_name2, conv_name3, shortcut_name 43 | 44 | def fix_basicblock_name(self, name): 45 | if self.model_type == 'SEResNeXt': 46 | conv_name1 = 'conv' + name + '_x1' 47 | conv_name2 = 'conv' + name + '_x2' 48 | shortcut_name = name 49 | else: 50 | conv_name1 = name + "_branch2a" 51 | conv_name2 = name + "_branch2b" 52 | shortcut_name = name + "_branch1" 53 | return conv_name1, conv_name2, shortcut_name 54 | 55 | def fix_layer_warp_name(self, stage_num, count, i): 56 | name = 'res' + str(stage_num) 57 | if count > 10 and stage_num == 4: 58 | if i == 0: 59 | conv_name = name + "a" 60 | else: 61 | conv_name = name + "b" + str(i) 62 | else: 63 | conv_name = name + chr(ord("a") + i) 64 | if self.model_type == 'SEResNeXt': 65 | conv_name = str(stage_num + 2) + '_' + str(i + 1) 66 | return conv_name 67 | 68 | def fix_c1_stage_name(self): 69 | return "res_conv1" if self.model_type == 'ResNeXt' else "conv1" 70 | -------------------------------------------------------------------------------- /rtdetr_paddle/ppdet/modeling/cls_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | def _get_class_default_kwargs(cls, *args, **kwargs): 17 | """ 18 | Get default arguments of a class in dict format, if args and 19 | kwargs is specified, it will replace default arguments 20 | """ 21 | varnames = cls.__init__.__code__.co_varnames 22 | argcount = cls.__init__.__code__.co_argcount 23 | keys = varnames[:argcount] 24 | assert keys[0] == 'self' 25 | keys = keys[1:] 26 | 27 | values = list(cls.__init__.__defaults__) 28 | assert len(values) == len(keys) 29 | 30 | if len(args) > 0: 31 | for i, arg in enumerate(args): 32 | values[i] = arg 33 | 34 | default_kwargs = dict(zip(keys, values)) 35 | 36 | if len(kwargs) > 0: 37 | for k, v in kwargs.items(): 38 | default_kwargs[k] = v 39 | 40 | return default_kwargs 41 | -------------------------------------------------------------------------------- /rtdetr_paddle/ppdet/modeling/heads/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .detr_head import * 16 | 17 | -------------------------------------------------------------------------------- /rtdetr_paddle/ppdet/modeling/losses/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .iou_loss import * 16 | from .gfocal_loss import * 17 | from .detr_loss import * 18 | from .focal_loss import * 19 | from .smooth_l1_loss import * 20 | -------------------------------------------------------------------------------- /rtdetr_paddle/ppdet/modeling/losses/smooth_l1_loss.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from __future__ import absolute_import 16 | from __future__ import division 17 | from __future__ import print_function 18 | 19 | import paddle 20 | import paddle.nn as nn 21 | import paddle.nn.functional as F 22 | from ppdet.core.workspace import register 23 | 24 | __all__ = ['SmoothL1Loss'] 25 | 26 | @register 27 | class SmoothL1Loss(nn.Layer): 28 | """Smooth L1 Loss. 29 | Args: 30 | beta (float): controls smooth region, it becomes L1 Loss when beta=0.0 31 | loss_weight (float): the final loss will be multiplied by this 32 | """ 33 | def __init__(self, 34 | beta=1.0, 35 | loss_weight=1.0): 36 | super(SmoothL1Loss, self).__init__() 37 | assert beta >= 0 38 | self.beta = beta 39 | self.loss_weight = loss_weight 40 | 41 | def forward(self, pred, target, reduction='none'): 42 | """forward function, based on fvcore. 43 | Args: 44 | pred (Tensor): prediction tensor 45 | target (Tensor): target tensor, pred.shape must be the same as target.shape 46 | reduction (str): the way to reduce loss, one of (none, sum, mean) 47 | """ 48 | assert reduction in ('none', 'sum', 'mean') 49 | target = target.detach() 50 | if self.beta < 1e-5: 51 | loss = paddle.abs(pred - target) 52 | else: 53 | n = paddle.abs(pred - target) 54 | cond = n < self.beta 55 | loss = paddle.where(cond, 0.5 * n ** 2 / self.beta, n - 0.5 * self.beta) 56 | if reduction == 'mean': 57 | loss = loss.mean() if loss.size > 0 else 0.0 * loss.sum() 58 | elif reduction == 'sum': 59 | loss = loss.sum() 60 | return loss * self.loss_weight 61 | -------------------------------------------------------------------------------- /rtdetr_paddle/ppdet/modeling/shape_spec.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | # The code is based on: 16 | # https://github.com/facebookresearch/detectron2/blob/main/detectron2/layers/shape_spec.py 17 | 18 | from collections import namedtuple 19 | 20 | 21 | class ShapeSpec( 22 | namedtuple("_ShapeSpec", ["channels", "height", "width", "stride"])): 23 | def __new__(cls, channels=None, height=None, width=None, stride=None): 24 | return super(ShapeSpec, cls).__new__(cls, channels, height, width, 25 | stride) 26 | -------------------------------------------------------------------------------- /rtdetr_paddle/ppdet/modeling/transformers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from .utils import * 16 | from .matchers import * 17 | from .position_encoding import * 18 | from .rtdetr_transformer import * 19 | from .dino_transformer import * 20 | from .hybrid_encoder import * -------------------------------------------------------------------------------- /rtdetr_paddle/ppdet/modeling/transformers/ext_op/setup_ms_deformable_attn_op.py: -------------------------------------------------------------------------------- 1 | from paddle.utils.cpp_extension import CUDAExtension, setup 2 | 3 | if __name__ == "__main__": 4 | setup( 5 | name='deformable_detr_ops', 6 | ext_modules=CUDAExtension( 7 | sources=['ms_deformable_attn_op.cc', 'ms_deformable_attn_op.cu'])) 8 | -------------------------------------------------------------------------------- /rtdetr_paddle/ppdet/optimizer/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from . import optimizer 16 | from . import ema 17 | 18 | from .optimizer import * 19 | from .ema import * 20 | -------------------------------------------------------------------------------- /rtdetr_paddle/ppdet/optimizer/utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from __future__ import absolute_import 16 | from __future__ import division 17 | from __future__ import print_function 18 | 19 | import paddle.nn as nn 20 | 21 | from typing import List 22 | 23 | 24 | def get_bn_running_state_names(model: nn.Layer) -> List[str]: 25 | """Get all bn state full names including running mean and variance 26 | """ 27 | names = [] 28 | for n, m in model.named_sublayers(): 29 | if isinstance(m, (nn.BatchNorm2D, nn.SyncBatchNorm)): 30 | assert hasattr(m, '_mean'), f'assert {m} has _mean' 31 | assert hasattr(m, '_variance'), f'assert {m} has _variance' 32 | running_mean = f'{n}._mean' 33 | running_var = f'{n}._variance' 34 | names.extend([running_mean, running_var]) 35 | 36 | return names 37 | -------------------------------------------------------------------------------- /rtdetr_paddle/ppdet/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /rtdetr_paddle/ppdet/utils/colormap.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from __future__ import absolute_import 16 | from __future__ import division 17 | from __future__ import print_function 18 | from __future__ import unicode_literals 19 | 20 | import numpy as np 21 | 22 | 23 | def colormap(rgb=False): 24 | """ 25 | Get colormap 26 | 27 | The code of this function is copied from https://github.com/facebookresearch/Detectron/blob/main/detectron/utils/colormap.py 28 | """ 29 | color_list = np.array([ 30 | 0.000, 0.447, 0.741, 0.850, 0.325, 0.098, 0.929, 0.694, 0.125, 0.494, 31 | 0.184, 0.556, 0.466, 0.674, 0.188, 0.301, 0.745, 0.933, 0.635, 0.078, 32 | 0.184, 0.300, 0.300, 0.300, 0.600, 0.600, 0.600, 1.000, 0.000, 0.000, 33 | 1.000, 0.500, 0.000, 0.749, 0.749, 0.000, 0.000, 1.000, 0.000, 0.000, 34 | 0.000, 1.000, 0.667, 0.000, 1.000, 0.333, 0.333, 0.000, 0.333, 0.667, 35 | 0.000, 0.333, 1.000, 0.000, 0.667, 0.333, 0.000, 0.667, 0.667, 0.000, 36 | 0.667, 1.000, 0.000, 1.000, 0.333, 0.000, 1.000, 0.667, 0.000, 1.000, 37 | 1.000, 0.000, 0.000, 0.333, 0.500, 0.000, 0.667, 0.500, 0.000, 1.000, 38 | 0.500, 0.333, 0.000, 0.500, 0.333, 0.333, 0.500, 0.333, 0.667, 0.500, 39 | 0.333, 1.000, 0.500, 0.667, 0.000, 0.500, 0.667, 0.333, 0.500, 0.667, 40 | 0.667, 0.500, 0.667, 1.000, 0.500, 1.000, 0.000, 0.500, 1.000, 0.333, 41 | 0.500, 1.000, 0.667, 0.500, 1.000, 1.000, 0.500, 0.000, 0.333, 1.000, 42 | 0.000, 0.667, 1.000, 0.000, 1.000, 1.000, 0.333, 0.000, 1.000, 0.333, 43 | 0.333, 1.000, 0.333, 0.667, 1.000, 0.333, 1.000, 1.000, 0.667, 0.000, 44 | 1.000, 0.667, 0.333, 1.000, 0.667, 0.667, 1.000, 0.667, 1.000, 1.000, 45 | 1.000, 0.000, 1.000, 1.000, 0.333, 1.000, 1.000, 0.667, 1.000, 0.167, 46 | 0.000, 0.000, 0.333, 0.000, 0.000, 0.500, 0.000, 0.000, 0.667, 0.000, 47 | 0.000, 0.833, 0.000, 0.000, 1.000, 0.000, 0.000, 0.000, 0.167, 0.000, 48 | 0.000, 0.333, 0.000, 0.000, 0.500, 0.000, 0.000, 0.667, 0.000, 0.000, 49 | 0.833, 0.000, 0.000, 1.000, 0.000, 0.000, 0.000, 0.167, 0.000, 0.000, 50 | 0.333, 0.000, 0.000, 0.500, 0.000, 0.000, 0.667, 0.000, 0.000, 0.833, 51 | 0.000, 0.000, 1.000, 0.000, 0.000, 0.000, 0.143, 0.143, 0.143, 0.286, 52 | 0.286, 0.286, 0.429, 0.429, 0.429, 0.571, 0.571, 0.571, 0.714, 0.714, 53 | 0.714, 0.857, 0.857, 0.857, 1.000, 1.000, 1.000 54 | ]).astype(np.float32) 55 | color_list = color_list.reshape((-1, 3)) * 255 56 | if not rgb: 57 | color_list = color_list[:, ::-1] 58 | return color_list.astype('int32') 59 | -------------------------------------------------------------------------------- /rtdetr_paddle/ppdet/utils/logger.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import logging 16 | import os 17 | import sys 18 | 19 | import paddle.distributed as dist 20 | 21 | __all__ = ['setup_logger'] 22 | 23 | logger_initialized = [] 24 | 25 | 26 | def setup_logger(name="ppdet", output=None): 27 | """ 28 | Initialize logger and set its verbosity level to INFO. 29 | Args: 30 | output (str): a file name or a directory to save log. If None, will not save log file. 31 | If ends with ".txt" or ".log", assumed to be a file name. 32 | Otherwise, logs will be saved to `output/log.txt`. 33 | name (str): the root module name of this logger 34 | 35 | Returns: 36 | logging.Logger: a logger 37 | """ 38 | logger = logging.getLogger(name) 39 | if name in logger_initialized: 40 | return logger 41 | 42 | logger.setLevel(logging.INFO) 43 | logger.propagate = False 44 | 45 | formatter = logging.Formatter( 46 | "[%(asctime)s] %(name)s %(levelname)s: %(message)s", 47 | datefmt="%m/%d %H:%M:%S") 48 | # stdout logging: master only 49 | local_rank = dist.get_rank() 50 | if local_rank == 0: 51 | ch = logging.StreamHandler(stream=sys.stdout) 52 | ch.setLevel(logging.DEBUG) 53 | ch.setFormatter(formatter) 54 | logger.addHandler(ch) 55 | 56 | # file logging: all workers 57 | if output is not None: 58 | if output.endswith(".txt") or output.endswith(".log"): 59 | filename = output 60 | else: 61 | filename = os.path.join(output, "log.txt") 62 | if local_rank > 0: 63 | filename = filename + ".rank{}".format(local_rank) 64 | os.makedirs(os.path.dirname(filename)) 65 | fh = logging.FileHandler(filename, mode='a') 66 | fh.setLevel(logging.DEBUG) 67 | fh.setFormatter(logging.Formatter()) 68 | logger.addHandler(fh) 69 | logger_initialized.append(name) 70 | return logger 71 | -------------------------------------------------------------------------------- /rtdetr_paddle/ppdet/utils/stats.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import collections 16 | import numpy as np 17 | 18 | __all__ = ['SmoothedValue', 'TrainingStats'] 19 | 20 | 21 | class SmoothedValue(object): 22 | """Track a series of values and provide access to smoothed values over a 23 | window or the global series average. 24 | """ 25 | 26 | def __init__(self, window_size=20, fmt=None): 27 | if fmt is None: 28 | fmt = "{median:.4f} ({avg:.4f})" 29 | self.deque = collections.deque(maxlen=window_size) 30 | self.fmt = fmt 31 | self.total = 0. 32 | self.count = 0 33 | 34 | def update(self, value, n=1): 35 | self.deque.append(value) 36 | self.count += n 37 | self.total += value * n 38 | 39 | @property 40 | def median(self): 41 | return np.median(self.deque) 42 | 43 | @property 44 | def avg(self): 45 | return np.mean(self.deque) 46 | 47 | @property 48 | def max(self): 49 | return np.max(self.deque) 50 | 51 | @property 52 | def value(self): 53 | return self.deque[-1] 54 | 55 | @property 56 | def global_avg(self): 57 | return self.total / self.count 58 | 59 | def __str__(self): 60 | return self.fmt.format( 61 | median=self.median, avg=self.avg, max=self.max, value=self.value) 62 | 63 | 64 | class TrainingStats(object): 65 | def __init__(self, window_size, delimiter=' '): 66 | self.meters = None 67 | self.window_size = window_size 68 | self.delimiter = delimiter 69 | 70 | def update(self, stats): 71 | if self.meters is None: 72 | self.meters = { 73 | k: SmoothedValue(self.window_size) 74 | for k in stats.keys() 75 | } 76 | for k, v in self.meters.items(): 77 | v.update(float(stats[k])) 78 | 79 | def get(self, extras=None): 80 | stats = collections.OrderedDict() 81 | if extras: 82 | for k, v in extras.items(): 83 | stats[k] = v 84 | for k, v in self.meters.items(): 85 | stats[k] = format(v.median, '.6f') 86 | 87 | return stats 88 | 89 | def log(self, extras=None): 90 | d = self.get(extras) 91 | strs = [] 92 | for k, v in d.items(): 93 | strs.append("{}: {}".format(k, str(v))) 94 | return self.delimiter.join(strs) 95 | -------------------------------------------------------------------------------- /rtdetr_paddle/ppdet/utils/voc_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from __future__ import absolute_import 16 | from __future__ import division 17 | from __future__ import print_function 18 | 19 | import os 20 | import os.path as osp 21 | import re 22 | import random 23 | 24 | __all__ = ['create_list'] 25 | 26 | 27 | def create_list(devkit_dir, years, output_dir): 28 | """ 29 | create following list: 30 | 1. trainval.txt 31 | 2. test.txt 32 | """ 33 | trainval_list = [] 34 | test_list = [] 35 | for year in years: 36 | trainval, test = _walk_voc_dir(devkit_dir, year, output_dir) 37 | trainval_list.extend(trainval) 38 | test_list.extend(test) 39 | 40 | random.shuffle(trainval_list) 41 | with open(osp.join(output_dir, 'trainval.txt'), 'w') as ftrainval: 42 | for item in trainval_list: 43 | ftrainval.write(item[0] + ' ' + item[1] + '\n') 44 | 45 | with open(osp.join(output_dir, 'test.txt'), 'w') as fval: 46 | ct = 0 47 | for item in test_list: 48 | ct += 1 49 | fval.write(item[0] + ' ' + item[1] + '\n') 50 | 51 | 52 | def _get_voc_dir(devkit_dir, year, type): 53 | return osp.join(devkit_dir, 'VOC' + year, type) 54 | 55 | 56 | def _walk_voc_dir(devkit_dir, year, output_dir): 57 | filelist_dir = _get_voc_dir(devkit_dir, year, 'ImageSets/Main') 58 | annotation_dir = _get_voc_dir(devkit_dir, year, 'Annotations') 59 | img_dir = _get_voc_dir(devkit_dir, year, 'JPEGImages') 60 | trainval_list = [] 61 | test_list = [] 62 | added = set() 63 | 64 | for _, _, files in os.walk(filelist_dir): 65 | for fname in files: 66 | img_ann_list = [] 67 | if re.match(r'[a-z]+_trainval\.txt', fname): 68 | img_ann_list = trainval_list 69 | elif re.match(r'[a-z]+_test\.txt', fname): 70 | img_ann_list = test_list 71 | else: 72 | continue 73 | fpath = osp.join(filelist_dir, fname) 74 | for line in open(fpath): 75 | name_prefix = line.strip().split()[0] 76 | if name_prefix in added: 77 | continue 78 | added.add(name_prefix) 79 | ann_path = osp.join( 80 | osp.relpath(annotation_dir, output_dir), 81 | name_prefix + '.xml') 82 | img_path = osp.join( 83 | osp.relpath(img_dir, output_dir), name_prefix + '.jpg') 84 | img_ann_list.append((img_path, ann_path)) 85 | 86 | return trainval_list, test_list 87 | -------------------------------------------------------------------------------- /rtdetr_paddle/ppdet/version.py: -------------------------------------------------------------------------------- 1 | # THIS FILE IS GENERATED FROM PADDLEPADDLE SETUP.PY 2 | # 3 | full_version = '2.4.0' 4 | commit = '87ed5ba91eaeb332e8e5c3f4e7d5b1d765c75644' 5 | -------------------------------------------------------------------------------- /rtdetr_paddle/requirements.txt: -------------------------------------------------------------------------------- 1 | paddlepaddle-gpu==2.4.2 2 | tqdm 3 | typeguard 4 | visualdl>=2.2.0 5 | opencv-python <= 4.6.0 6 | PyYAML 7 | shapely 8 | scipy 9 | terminaltables 10 | Cython 11 | pycocotools 12 | setuptools 13 | -------------------------------------------------------------------------------- /rtdetr_paddle/tools/slice_image.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import argparse 16 | from tqdm import tqdm 17 | 18 | 19 | def slice_data(image_dir, dataset_json_path, output_dir, slice_size, 20 | overlap_ratio): 21 | try: 22 | from sahi.scripts.slice_coco import slice 23 | except Exception as e: 24 | raise RuntimeError( 25 | 'Unable to use sahi to slice images, please install sahi, for example: `pip install sahi`, see https://github.com/obss/sahi' 26 | ) 27 | tqdm.write( 28 | f" slicing for slice_size={slice_size}, overlap_ratio={overlap_ratio}") 29 | slice( 30 | image_dir=image_dir, 31 | dataset_json_path=dataset_json_path, 32 | output_dir=output_dir, 33 | slice_size=slice_size, 34 | overlap_ratio=overlap_ratio, ) 35 | 36 | 37 | def main(): 38 | parser = argparse.ArgumentParser() 39 | parser.add_argument( 40 | '--image_dir', type=str, default=None, help="The image folder path.") 41 | parser.add_argument( 42 | '--json_path', type=str, default=None, help="Dataset json path.") 43 | parser.add_argument( 44 | '--output_dir', type=str, default=None, help="Output dir.") 45 | parser.add_argument( 46 | '--slice_size', type=int, default=500, help="slice_size") 47 | parser.add_argument( 48 | '--overlap_ratio', type=float, default=0.25, help="overlap_ratio") 49 | args = parser.parse_args() 50 | 51 | slice_data(args.image_dir, args.json_path, args.output_dir, args.slice_size, 52 | args.overlap_ratio) 53 | 54 | 55 | if __name__ == "__main__": 56 | main() 57 | -------------------------------------------------------------------------------- /rtdetr_pytorch/configs/dataset/coco_detection.yml: -------------------------------------------------------------------------------- 1 | task: detection 2 | 3 | num_classes: 80 4 | remap_mscoco_category: True 5 | 6 | train_dataloader: 7 | type: DataLoader 8 | dataset: 9 | type: CocoDetection 10 | img_folder: ./dataset/coco/train2017/ 11 | ann_file: ./dataset/coco/annotations/instances_train2017.json 12 | transforms: 13 | type: Compose 14 | ops: ~ 15 | shuffle: True 16 | batch_size: 8 17 | num_workers: 4 18 | drop_last: True 19 | 20 | 21 | val_dataloader: 22 | type: DataLoader 23 | dataset: 24 | type: CocoDetection 25 | img_folder: ./dataset/coco/val2017/ 26 | ann_file: ./dataset/coco/annotations/instances_val2017.json 27 | transforms: 28 | type: Compose 29 | ops: ~ 30 | 31 | shuffle: False 32 | batch_size: 8 33 | num_workers: 4 34 | drop_last: False -------------------------------------------------------------------------------- /rtdetr_pytorch/configs/rtdetr/include/dataloader.yml: -------------------------------------------------------------------------------- 1 | # num_classes: 91 2 | # remap_mscoco_category: True 3 | 4 | train_dataloader: 5 | dataset: 6 | return_masks: False 7 | transforms: 8 | ops: 9 | - {type: RandomPhotometricDistort, p: 0.5} 10 | - {type: RandomZoomOut, fill: 0} 11 | - {type: RandomIoUCrop, p: 0.8} 12 | - {type: SanitizeBoundingBox, min_size: 1} 13 | - {type: RandomHorizontalFlip} 14 | - {type: Resize, size: [640, 640], } 15 | # - {type: Resize, size: 639, max_size: 640} 16 | # - {type: PadToSize, spatial_size: 640} 17 | - {type: ToImageTensor} 18 | - {type: ConvertDtype} 19 | - {type: SanitizeBoundingBox, min_size: 1} 20 | - {type: ConvertBox, out_fmt: 'cxcywh', normalize: True} 21 | shuffle: True 22 | batch_size: 4 23 | num_workers: 4 24 | collate_fn: default_collate_fn 25 | 26 | 27 | val_dataloader: 28 | dataset: 29 | transforms: 30 | ops: 31 | # - {type: Resize, size: 639, max_size: 640} 32 | # - {type: PadToSize, spatial_size: 640} 33 | - {type: Resize, size: [640, 640]} 34 | - {type: ToImageTensor} 35 | - {type: ConvertDtype} 36 | shuffle: False 37 | batch_size: 8 38 | num_workers: 4 39 | collate_fn: default_collate_fn 40 | -------------------------------------------------------------------------------- /rtdetr_pytorch/configs/rtdetr/include/dataloader_regnet.yml: -------------------------------------------------------------------------------- 1 | # num_classes: 91 2 | # remap_mscoco_category: True 3 | 4 | train_dataloader: 5 | dataset: 6 | return_masks: False 7 | transforms: 8 | ops: 9 | - {type: RandomPhotometricDistort, p: 0.5} 10 | - {type: RandomZoomOut, fill: 0} 11 | - {type: RandomIoUCrop, p: 0.8} 12 | - {type: SanitizeBoundingBox, min_size: 1} 13 | - {type: RandomHorizontalFlip} 14 | - {type: Resize, size: [640, 640], } 15 | # - {type: Resize, size: 639, max_size: 640} 16 | # - {type: PadToSize, spatial_size: 640} 17 | - {type: ToImageTensor} 18 | - {type: ConvertDtype} 19 | - {type: SanitizeBoundingBox, min_size: 1} 20 | - {type: ConvertBox, out_fmt: 'cxcywh', normalize: True} 21 | shuffle: True 22 | batch_size: 8 23 | num_workers: 2 24 | collate_fn: default_collate_fn 25 | 26 | 27 | val_dataloader: 28 | dataset: 29 | transforms: 30 | ops: 31 | # - {type: Resize, size: 639, max_size: 640} 32 | # - {type: PadToSize, spatial_size: 640} 33 | - {type: Resize, size: [640, 640]} 34 | - {type: ToImageTensor} 35 | - {type: ConvertDtype} 36 | shuffle: False 37 | batch_size: 8 38 | num_workers: 2 39 | collate_fn: default_collate_fn 40 | -------------------------------------------------------------------------------- /rtdetr_pytorch/configs/rtdetr/include/optimizer.yml: -------------------------------------------------------------------------------- 1 | 2 | use_ema: True 3 | ema: 4 | type: ModelEMA 5 | decay: 0.9999 6 | warmups: 2000 7 | 8 | 9 | find_unused_parameters: True 10 | 11 | epoches: 72 12 | clip_max_norm: 0.1 13 | 14 | optimizer: 15 | type: AdamW 16 | params: 17 | - 18 | params: 'backbone' 19 | lr: 0.00001 20 | - 21 | params: '^(?=.*encoder(?=.*bias|.*norm.*weight)).*$' 22 | weight_decay: 0. 23 | - 24 | params: '^(?=.*decoder(?=.*bias|.*norm.*weight)).*$' 25 | weight_decay: 0. 26 | 27 | lr: 0.0001 28 | betas: [0.9, 0.999] 29 | weight_decay: 0.0001 30 | 31 | 32 | lr_scheduler: 33 | type: MultiStepLR 34 | milestones: [1000] 35 | gamma: 0.1 36 | 37 | -------------------------------------------------------------------------------- /rtdetr_pytorch/configs/rtdetr/include/optimizer_regnet.yml: -------------------------------------------------------------------------------- 1 | 2 | use_ema: True 3 | ema: 4 | type: ModelEMA 5 | decay: 0.9999 6 | warmups: 2000 7 | 8 | 9 | find_unused_parameters: True 10 | 11 | epoches: 72 12 | clip_max_norm: 0.1 13 | 14 | optimizer: 15 | type: AdamW 16 | params: 17 | - 18 | params: '^(?=.*encoder(?=.*bias|.*norm.*weight)).*$' 19 | weight_decay: 0. 20 | - 21 | params: '^(?=.*decoder(?=.*bias|.*norm.*weight)).*$' 22 | weight_decay: 0. 23 | 24 | lr: 0.0001 25 | betas: [0.9, 0.999] 26 | weight_decay: 0.0001 27 | 28 | 29 | lr_scheduler: 30 | type: MultiStepLR 31 | milestones: [1000] 32 | gamma: 0.1 33 | 34 | -------------------------------------------------------------------------------- /rtdetr_pytorch/configs/rtdetr/include/rtdetr_dla34.yml: -------------------------------------------------------------------------------- 1 | task: detection 2 | 3 | model: RTDETR 4 | criterion: SetCriterion 5 | postprocessor: RTDETRPostProcessor 6 | 7 | 8 | RTDETR: 9 | backbone: DLANet 10 | encoder: HybridEncoder 11 | decoder: RTDETRTransformer 12 | multi_scale: [480, 512, 544, 576, 608, 640, 640, 640, 672, 704, 736, 768, 800] 13 | 14 | DLANet: 15 | dla: dla34 16 | pretrained: True 17 | return_idx: [1, 2, 3] 18 | 19 | 20 | HybridEncoder: 21 | in_channels: [128, 256, 512] 22 | feat_strides: [8, 16, 32] 23 | 24 | # intra 25 | hidden_dim: 256 26 | use_encoder_idx: [2] 27 | num_encoder_layers: 1 28 | nhead: 8 29 | dim_feedforward: 1024 30 | dropout: 0. 31 | enc_act: 'gelu' 32 | pe_temperature: 10000 33 | 34 | # cross 35 | expansion: 1.0 36 | depth_mult: 1 37 | act: 'silu' 38 | 39 | # eval 40 | eval_spatial_size: [640, 640] 41 | 42 | 43 | RTDETRTransformer: 44 | feat_channels: [256, 256, 256] 45 | feat_strides: [8, 16, 32] 46 | hidden_dim: 256 47 | num_levels: 3 48 | 49 | num_queries: 300 50 | 51 | num_decoder_layers: 6 52 | num_denoising: 100 53 | 54 | eval_idx: -1 55 | eval_spatial_size: [640, 640] 56 | 57 | 58 | use_focal_loss: True 59 | 60 | RTDETRPostProcessor: 61 | num_top_queries: 300 62 | 63 | 64 | SetCriterion: 65 | weight_dict: {loss_vfl: 1, loss_bbox: 5, loss_giou: 2,} 66 | losses: ['vfl', 'boxes', ] 67 | alpha: 0.75 68 | gamma: 2.0 69 | 70 | matcher: 71 | type: HungarianMatcher 72 | weight_dict: {cost_class: 2, cost_bbox: 5, cost_giou: 2} 73 | # use_focal_loss: True 74 | alpha: 0.25 75 | gamma: 2.0 76 | 77 | 78 | 79 | -------------------------------------------------------------------------------- /rtdetr_pytorch/configs/rtdetr/include/rtdetr_r50vd.yml: -------------------------------------------------------------------------------- 1 | task: detection 2 | 3 | model: RTDETR 4 | criterion: SetCriterion 5 | postprocessor: RTDETRPostProcessor 6 | 7 | 8 | RTDETR: 9 | backbone: PResNet 10 | encoder: HybridEncoder 11 | decoder: RTDETRTransformer 12 | multi_scale: [480, 512, 544, 576, 608, 640, 640, 640, 672, 704, 736, 768, 800] 13 | 14 | PResNet: 15 | depth: 50 16 | variant: d 17 | freeze_at: 0 18 | return_idx: [1, 2, 3] 19 | num_stages: 4 20 | freeze_norm: True 21 | pretrained: True 22 | 23 | HybridEncoder: 24 | in_channels: [512, 1024, 2048] 25 | feat_strides: [8, 16, 32] 26 | 27 | # intra 28 | hidden_dim: 256 29 | use_encoder_idx: [2] 30 | num_encoder_layers: 1 31 | nhead: 8 32 | dim_feedforward: 1024 33 | dropout: 0. 34 | enc_act: 'gelu' 35 | pe_temperature: 10000 36 | 37 | # cross 38 | expansion: 1.0 39 | depth_mult: 1 40 | act: 'silu' 41 | 42 | # eval 43 | eval_spatial_size: [640, 640] 44 | 45 | 46 | RTDETRTransformer: 47 | feat_channels: [256, 256, 256] 48 | feat_strides: [8, 16, 32] 49 | hidden_dim: 256 50 | num_levels: 3 51 | 52 | num_queries: 300 53 | 54 | num_decoder_layers: 6 55 | num_denoising: 100 56 | 57 | eval_idx: -1 58 | eval_spatial_size: [640, 640] 59 | 60 | 61 | use_focal_loss: True 62 | 63 | RTDETRPostProcessor: 64 | num_top_queries: 300 65 | 66 | 67 | SetCriterion: 68 | weight_dict: {loss_vfl: 1, loss_bbox: 5, loss_giou: 2,} 69 | losses: ['vfl', 'boxes', ] 70 | alpha: 0.75 71 | gamma: 2.0 72 | 73 | matcher: 74 | type: HungarianMatcher 75 | weight_dict: {cost_class: 2, cost_bbox: 5, cost_giou: 2} 76 | # use_focal_loss: True 77 | alpha: 0.25 78 | gamma: 2.0 79 | 80 | 81 | 82 | -------------------------------------------------------------------------------- /rtdetr_pytorch/configs/rtdetr/include/rtdetr_regnet.yml: -------------------------------------------------------------------------------- 1 | task: detection 2 | 3 | model: RTDETR 4 | criterion: SetCriterion 5 | postprocessor: RTDETRPostProcessor 6 | 7 | 8 | RTDETR: 9 | backbone: RegNet 10 | encoder: HybridEncoder 11 | decoder: RTDETRTransformer 12 | multi_scale: [480, 512, 544, 576, 608, 640, 640, 640, 672, 704, 736, 768, 800] 13 | 14 | 15 | RegNet: 16 | return_idx: [1, 2, 3] 17 | configuration: RegNetConfig() 18 | 19 | HybridEncoder: 20 | in_channels: [192, 512, 1088] 21 | feat_strides: [8, 16, 32] 22 | 23 | # intra 24 | hidden_dim: 256 25 | use_encoder_idx: [2] 26 | num_encoder_layers: 1 27 | nhead: 8 28 | dim_feedforward: 1024 29 | dropout: 0. 30 | enc_act: 'gelu' 31 | pe_temperature: 10000 32 | 33 | # cross 34 | expansion: 1.0 35 | depth_mult: 1 36 | act: 'silu' 37 | 38 | # eval 39 | eval_spatial_size: [640, 640] 40 | 41 | 42 | RTDETRTransformer: 43 | feat_channels: [256, 256, 256] 44 | feat_strides: [8, 16, 32] 45 | hidden_dim: 256 46 | num_levels: 3 47 | 48 | num_queries: 300 49 | 50 | num_decoder_layers: 6 51 | num_denoising: 100 52 | 53 | eval_idx: -1 54 | eval_spatial_size: [640, 640] 55 | 56 | 57 | use_focal_loss: True 58 | 59 | RTDETRPostProcessor: 60 | num_top_queries: 300 61 | 62 | 63 | SetCriterion: 64 | weight_dict: {loss_vfl: 1, loss_bbox: 5, loss_giou: 2,} 65 | losses: ['vfl', 'boxes', ] 66 | alpha: 0.75 67 | gamma: 2.0 68 | 69 | matcher: 70 | type: HungarianMatcher 71 | weight_dict: {cost_class: 2, cost_bbox: 5, cost_giou: 2} 72 | # use_focal_loss: True 73 | alpha: 0.25 74 | gamma: 2.0 75 | 76 | 77 | 78 | -------------------------------------------------------------------------------- /rtdetr_pytorch/configs/rtdetr/rtdetr_dla34_6x_coco.yml: -------------------------------------------------------------------------------- 1 | __include__: [ 2 | '../dataset/coco_detection.yml', 3 | '../runtime.yml', 4 | './include/dataloader.yml', 5 | './include/optimizer.yml', 6 | './include/rtdetr_dla34.yml', 7 | ] 8 | 9 | output_dir: ./output/rtdetr_dla34_6x_coco 10 | -------------------------------------------------------------------------------- /rtdetr_pytorch/configs/rtdetr/rtdetr_r101vd_6x_coco.yml: -------------------------------------------------------------------------------- 1 | __include__: [ 2 | '../dataset/coco_detection.yml', 3 | '../runtime.yml', 4 | './include/dataloader.yml', 5 | './include/optimizer.yml', 6 | './include/rtdetr_r50vd.yml', 7 | ] 8 | 9 | PResNet: 10 | depth: 101 11 | 12 | 13 | HybridEncoder: 14 | # intra 15 | hidden_dim: 384 16 | dim_feedforward: 2048 17 | 18 | 19 | RTDETRTransformer: 20 | feat_channels: [384, 384, 384] 21 | 22 | 23 | optimizer: 24 | type: AdamW 25 | params: 26 | - 27 | params: 'backbone' 28 | lr: 0.000001 -------------------------------------------------------------------------------- /rtdetr_pytorch/configs/rtdetr/rtdetr_r18vd_6x_coco.yml: -------------------------------------------------------------------------------- 1 | 2 | __include__: [ 3 | '../dataset/coco_detection.yml', 4 | '../runtime.yml', 5 | './include/dataloader.yml', 6 | './include/optimizer.yml', 7 | './include/rtdetr_r50vd.yml', 8 | ] 9 | 10 | 11 | output_dir: ./output/rtdetr_r18vd_6x_coco 12 | 13 | PResNet: 14 | depth: 18 15 | freeze_at: -1 16 | freeze_norm: False 17 | pretrained: True 18 | 19 | HybridEncoder: 20 | in_channels: [128, 256, 512] 21 | hidden_dim: 256 22 | expansion: 0.5 23 | 24 | 25 | RTDETRTransformer: 26 | eval_idx: -1 27 | num_decoder_layers: 3 28 | num_denoising: 100 29 | 30 | 31 | 32 | optimizer: 33 | type: AdamW 34 | params: 35 | - 36 | params: '^(?=.*backbone)(?=.*norm).*$' 37 | lr: 0.00001 38 | weight_decay: 0. 39 | - 40 | params: '^(?=.*backbone)(?!.*norm).*$' 41 | lr: 0.00001 42 | - 43 | params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bias)).*$' 44 | weight_decay: 0. 45 | 46 | lr: 0.0001 47 | betas: [0.9, 0.999] 48 | weight_decay: 0.0001 49 | 50 | -------------------------------------------------------------------------------- /rtdetr_pytorch/configs/rtdetr/rtdetr_r34vd_6x_coco.yml: -------------------------------------------------------------------------------- 1 | 2 | __include__: [ 3 | '../dataset/coco_detection.yml', 4 | '../runtime.yml', 5 | './include/dataloader.yml', 6 | './include/optimizer.yml', 7 | './include/rtdetr_r50vd.yml', 8 | ] 9 | 10 | 11 | output_dir: ./output/rtdetr_r34vd_6x_coco 12 | 13 | 14 | PResNet: 15 | depth: 34 16 | freeze_at: -1 17 | freeze_norm: False 18 | pretrained: True 19 | 20 | 21 | HybridEncoder: 22 | in_channels: [128, 256, 512] 23 | hidden_dim: 256 24 | expansion: 0.5 25 | 26 | 27 | RTDETRTransformer: 28 | num_decoder_layers: 4 29 | 30 | 31 | 32 | optimizer: 33 | type: AdamW 34 | params: 35 | - 36 | params: '^(?=.*backbone)(?=.*norm|bn).*$' 37 | weight_decay: 0. 38 | lr: 0.00001 39 | - 40 | params: '^(?=.*backbone)(?!.*norm|bn).*$' 41 | lr: 0.00001 42 | - 43 | params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn|bias)).*$' 44 | weight_decay: 0. 45 | 46 | lr: 0.0001 47 | betas: [0.9, 0.999] 48 | weight_decay: 0.0001 49 | -------------------------------------------------------------------------------- /rtdetr_pytorch/configs/rtdetr/rtdetr_r50vd_6x_coco.yml: -------------------------------------------------------------------------------- 1 | __include__: [ 2 | '../dataset/coco_detection.yml', 3 | '../runtime.yml', 4 | './include/dataloader.yml', 5 | './include/optimizer.yml', 6 | './include/rtdetr_r50vd.yml', 7 | ] 8 | 9 | output_dir: ./output/rtdetr_r50vd_6x_coco 10 | -------------------------------------------------------------------------------- /rtdetr_pytorch/configs/rtdetr/rtdetr_r50vd_m_6x_coco.yml: -------------------------------------------------------------------------------- 1 | __include__: [ 2 | '../dataset/coco_detection.yml', 3 | '../runtime.yml', 4 | './include/dataloader.yml', 5 | './include/optimizer.yml', 6 | './include/rtdetr_r50vd.yml', 7 | ] 8 | 9 | output_dir: ./output/rtdetr_r50vd_m_6x_coco 10 | 11 | 12 | HybridEncoder: 13 | expansion: 0.5 14 | 15 | RTDETRTransformer: 16 | eval_idx: 2 # use 3th decoder layer to eval -------------------------------------------------------------------------------- /rtdetr_pytorch/configs/rtdetr/rtdetr_regnet_6x_coco.yml: -------------------------------------------------------------------------------- 1 | __include__: [ 2 | '../dataset/coco_detection.yml', 3 | '../runtime.yml', 4 | './include/dataloader_regnet.yml', 5 | './include/optimizer_regnet.yml', 6 | './include/rtdetr_regnet.yml', 7 | ] 8 | 9 | output_dir: ./output/rtdetr_regnet_6x_coco 10 | -------------------------------------------------------------------------------- /rtdetr_pytorch/configs/runtime.yml: -------------------------------------------------------------------------------- 1 | sync_bn: True 2 | find_unused_parameters: False 3 | 4 | 5 | use_amp: False 6 | 7 | scaler: 8 | type: GradScaler 9 | enabled: True 10 | 11 | 12 | use_ema: False 13 | ema: 14 | type: ModelEMA 15 | decay: 0.9999 16 | warmups: 2000 17 | 18 | -------------------------------------------------------------------------------- /rtdetr_pytorch/requirements.txt: -------------------------------------------------------------------------------- 1 | torch==2.0.1 2 | torchvision==0.15.2 3 | onnx==1.14.0 4 | onnxruntime==1.15.1 5 | pycocotools 6 | PyYAML 7 | scipy 8 | transformers 9 | -------------------------------------------------------------------------------- /rtdetr_pytorch/src/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from . import data 3 | from . import nn 4 | from . import optim 5 | from . import zoo 6 | -------------------------------------------------------------------------------- /rtdetr_pytorch/src/core/__init__.py: -------------------------------------------------------------------------------- 1 | """by lyuwenyu 2 | """ 3 | 4 | # from .yaml_utils import register, create, load_config, merge_config, merge_dict 5 | from .yaml_utils import * 6 | from .config import BaseConfig 7 | from .yaml_config import YAMLConfig 8 | -------------------------------------------------------------------------------- /rtdetr_pytorch/src/data/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from .coco import * 3 | from .cifar10 import CIFAR10 4 | 5 | from .dataloader import * 6 | from .transforms import * 7 | 8 | -------------------------------------------------------------------------------- /rtdetr_pytorch/src/data/cifar10/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | import torchvision 3 | from typing import Optional, Callable 4 | 5 | from src.core import register 6 | 7 | 8 | @register 9 | class CIFAR10(torchvision.datasets.CIFAR10): 10 | __inject__ = ['transform', 'target_transform'] 11 | 12 | def __init__(self, root: str, train: bool = True, transform: Optional[Callable] = None, target_transform: Optional[Callable] = None, download: bool = False) -> None: 13 | super().__init__(root, train, transform, target_transform, download) 14 | 15 | -------------------------------------------------------------------------------- /rtdetr_pytorch/src/data/coco/__init__.py: -------------------------------------------------------------------------------- 1 | from .coco_dataset import ( 2 | CocoDetection, 3 | mscoco_category2label, 4 | mscoco_label2category, 5 | mscoco_category2name, 6 | ) 7 | from .coco_eval import * 8 | 9 | from .coco_utils import get_coco_api_from_dataset -------------------------------------------------------------------------------- /rtdetr_pytorch/src/data/dataloader.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.utils.data as data 3 | 4 | from src.core import register 5 | 6 | 7 | __all__ = ['DataLoader'] 8 | 9 | 10 | @register 11 | class DataLoader(data.DataLoader): 12 | __inject__ = ['dataset', 'collate_fn'] 13 | 14 | def __repr__(self) -> str: 15 | format_string = self.__class__.__name__ + "(" 16 | for n in ['dataset', 'batch_size', 'num_workers', 'drop_last', 'collate_fn']: 17 | format_string += "\n" 18 | format_string += " {0}: {1}".format(n, getattr(self, n)) 19 | format_string += "\n)" 20 | return format_string 21 | 22 | 23 | 24 | @register 25 | def default_collate_fn(items): 26 | '''default collate_fn 27 | ''' 28 | return torch.cat([x[0][None] for x in items], dim=0), [x[1] for x in items] 29 | -------------------------------------------------------------------------------- /rtdetr_pytorch/src/misc/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from .logger import * 3 | from .visualizer import * 4 | -------------------------------------------------------------------------------- /rtdetr_pytorch/src/misc/visualizer.py: -------------------------------------------------------------------------------- 1 | """"by lyuwenyu 2 | """ 3 | 4 | import torch 5 | import torch.utils.data 6 | 7 | import torchvision 8 | torchvision.disable_beta_transforms_warning() 9 | 10 | import PIL 11 | 12 | __all__ = ['show_sample'] 13 | 14 | def show_sample(sample): 15 | """for coco dataset/dataloader 16 | """ 17 | import matplotlib.pyplot as plt 18 | from torchvision.transforms.v2 import functional as F 19 | from torchvision.utils import draw_bounding_boxes 20 | 21 | image, target = sample 22 | if isinstance(image, PIL.Image.Image): 23 | image = F.to_image_tensor(image) 24 | 25 | image = F.convert_dtype(image, torch.uint8) 26 | annotated_image = draw_bounding_boxes(image, target["boxes"], colors="yellow", width=3) 27 | 28 | fig, ax = plt.subplots() 29 | ax.imshow(annotated_image.permute(1, 2, 0).numpy()) 30 | ax.set(xticklabels=[], yticklabels=[], xticks=[], yticks=[]) 31 | fig.tight_layout() 32 | fig.show() 33 | plt.show() 34 | 35 | -------------------------------------------------------------------------------- /rtdetr_pytorch/src/nn/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from .arch import * 3 | from .criterion import * 4 | 5 | # 6 | from .backbone import * 7 | 8 | -------------------------------------------------------------------------------- /rtdetr_pytorch/src/nn/arch/__init__.py: -------------------------------------------------------------------------------- 1 | from .classification import * 2 | -------------------------------------------------------------------------------- /rtdetr_pytorch/src/nn/arch/classification.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | from src.core import register 5 | 6 | 7 | __all__ = ['Classification', 'ClassHead'] 8 | 9 | 10 | @register 11 | class Classification(nn.Module): 12 | __inject__ = ['backbone', 'head'] 13 | 14 | def __init__(self, backbone: nn.Module, head: nn.Module=None): 15 | super().__init__() 16 | 17 | self.backbone = backbone 18 | self.head = head 19 | 20 | def forward(self, x): 21 | x = self.backbone(x) 22 | 23 | if self.head is not None: 24 | x = self.head(x) 25 | 26 | return x 27 | 28 | 29 | @register 30 | class ClassHead(nn.Module): 31 | def __init__(self, hidden_dim, num_classes): 32 | super().__init__() 33 | self.pool = nn.AdaptiveAvgPool2d(1) 34 | self.proj = nn.Linear(hidden_dim, num_classes) 35 | 36 | def forward(self, x): 37 | x = x[0] if isinstance(x, (list, tuple)) else x 38 | x = self.pool(x) 39 | x = x.reshape(x.shape[0], -1) 40 | x = self.proj(x) 41 | return x 42 | -------------------------------------------------------------------------------- /rtdetr_pytorch/src/nn/backbone/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from .presnet import * 3 | from .test_resnet import * 4 | from .regnet import * 5 | from .common import * 6 | from .dla import * -------------------------------------------------------------------------------- /rtdetr_pytorch/src/nn/backbone/regnet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from transformers import RegNetModel 4 | 5 | 6 | from src.core import register 7 | 8 | __all__ = ['RegNet'] 9 | 10 | @register 11 | class RegNet(nn.Module): 12 | def __init__(self, configuration, return_idx=[0, 1, 2, 3]): 13 | super(RegNet, self).__init__() 14 | self.model = RegNetModel.from_pretrained("facebook/regnet-y-040") 15 | self.return_idx = return_idx 16 | 17 | 18 | def forward(self, x): 19 | 20 | outputs = self.model(x, output_hidden_states = True) 21 | x = outputs.hidden_states[2:5] 22 | 23 | return x -------------------------------------------------------------------------------- /rtdetr_pytorch/src/nn/backbone/test_resnet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | from collections import OrderedDict 6 | 7 | 8 | from src.core import register 9 | 10 | 11 | class BasicBlock(nn.Module): 12 | expansion = 1 13 | 14 | def __init__(self, in_planes, planes, stride=1): 15 | super(BasicBlock, self).__init__() 16 | 17 | self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) 18 | self.bn1 = nn.BatchNorm2d(planes) 19 | 20 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,stride=1, padding=1, bias=False) 21 | self.bn2 = nn.BatchNorm2d(planes) 22 | 23 | self.shortcut = nn.Sequential() 24 | if stride != 1 or in_planes != self.expansion*planes: 25 | self.shortcut = nn.Sequential( 26 | nn.Conv2d(in_planes, self.expansion*planes,kernel_size=1, stride=stride, bias=False), 27 | nn.BatchNorm2d(self.expansion*planes) 28 | ) 29 | def forward(self, x): 30 | out = F.relu(self.bn1(self.conv1(x))) 31 | out = self.bn2(self.conv2(out)) 32 | out += self.shortcut(x) 33 | out = F.relu(out) 34 | return out 35 | 36 | 37 | 38 | class _ResNet(nn.Module): 39 | def __init__(self, block, num_blocks, num_classes=10): 40 | super().__init__() 41 | self.in_planes = 64 42 | 43 | self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False) 44 | self.bn1 = nn.BatchNorm2d(64) 45 | 46 | self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1) 47 | self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2) 48 | self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2) 49 | self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2) 50 | 51 | self.linear = nn.Linear(512 * block.expansion, num_classes) 52 | 53 | def _make_layer(self, block, planes, num_blocks, stride): 54 | strides = [stride] + [1]*(num_blocks-1) 55 | layers = [] 56 | for stride in strides: 57 | layers.append(block(self.in_planes, planes, stride)) 58 | self.in_planes = planes * block.expansion 59 | return nn.Sequential(*layers) 60 | 61 | def forward(self, x): 62 | out = F.relu(self.bn1(self.conv1(x))) 63 | out = self.layer1(out) 64 | out = self.layer2(out) 65 | out = self.layer3(out) 66 | out = self.layer4(out) 67 | out = F.avg_pool2d(out, 4) 68 | out = out.view(out.size(0), -1) 69 | out = self.linear(out) 70 | return out 71 | 72 | 73 | @register 74 | class MResNet(nn.Module): 75 | def __init__(self, num_classes=10, num_blocks=[2, 2, 2, 2]) -> None: 76 | super().__init__() 77 | self.model = _ResNet(BasicBlock, num_blocks, num_classes) 78 | 79 | def forward(self, x): 80 | return self.model(x) 81 | 82 | -------------------------------------------------------------------------------- /rtdetr_pytorch/src/nn/backbone/utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | https://github.com/pytorch/vision/blob/main/torchvision/models/_utils.py 3 | 4 | by lyuwenyu 5 | """ 6 | 7 | from collections import OrderedDict 8 | from typing import Dict, List 9 | 10 | 11 | import torch.nn as nn 12 | 13 | 14 | class IntermediateLayerGetter(nn.ModuleDict): 15 | """ 16 | Module wrapper that returns intermediate layers from a model 17 | 18 | It has a strong assumption that the modules have been registered 19 | into the model in the same order as they are used. 20 | This means that one should **not** reuse the same nn.Module 21 | twice in the forward if you want this to work. 22 | 23 | Additionally, it is only able to query submodules that are directly 24 | assigned to the model. So if `model` is passed, `model.feature1` can 25 | be returned, but not `model.feature1.layer2`. 26 | """ 27 | 28 | _version = 3 29 | 30 | def __init__(self, model: nn.Module, return_layers: List[str]) -> None: 31 | if not set(return_layers).issubset([name for name, _ in model.named_children()]): 32 | raise ValueError("return_layers are not present in model. {}"\ 33 | .format([name for name, _ in model.named_children()])) 34 | orig_return_layers = return_layers 35 | return_layers = {str(k): str(k) for k in return_layers} 36 | layers = OrderedDict() 37 | for name, module in model.named_children(): 38 | layers[name] = module 39 | if name in return_layers: 40 | del return_layers[name] 41 | if not return_layers: 42 | break 43 | 44 | super().__init__(layers) 45 | self.return_layers = orig_return_layers 46 | 47 | def forward(self, x): 48 | # out = OrderedDict() 49 | outputs = [] 50 | for name, module in self.items(): 51 | x = module(x) 52 | if name in self.return_layers: 53 | # out_name = self.return_layers[name] 54 | # out[out_name] = x 55 | outputs.append(x) 56 | 57 | return outputs 58 | 59 | -------------------------------------------------------------------------------- /rtdetr_pytorch/src/nn/criterion/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | import torch.nn as nn 3 | from src.core import register 4 | 5 | CrossEntropyLoss = register(nn.CrossEntropyLoss) 6 | 7 | -------------------------------------------------------------------------------- /rtdetr_pytorch/src/nn/criterion/utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torchvision 3 | 4 | 5 | 6 | def format_target(targets): 7 | ''' 8 | Args: 9 | targets (List[Dict]), 10 | Return: 11 | tensor (Tensor), [im_id, label, bbox,] 12 | ''' 13 | outputs = [] 14 | for i, tgt in enumerate(targets): 15 | boxes = torchvision.ops.box_convert(tgt['boxes'], in_fmt='xyxy', out_fmt='cxcywh') 16 | labels = tgt['labels'].reshape(-1, 1) 17 | im_ids = torch.ones_like(labels) * i 18 | outputs.append(torch.cat([im_ids, labels, boxes], dim=1)) 19 | 20 | return torch.cat(outputs, dim=0) 21 | -------------------------------------------------------------------------------- /rtdetr_pytorch/src/optim/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from .ema import * 3 | from .optim import * 4 | from .amp import * -------------------------------------------------------------------------------- /rtdetr_pytorch/src/optim/amp.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.cuda.amp as amp 4 | 5 | 6 | from src.core import register 7 | import src.misc.dist as dist 8 | 9 | 10 | __all__ = ['GradScaler'] 11 | 12 | GradScaler = register(amp.grad_scaler.GradScaler) 13 | -------------------------------------------------------------------------------- /rtdetr_pytorch/src/optim/optim.py: -------------------------------------------------------------------------------- 1 | 2 | import torch 3 | import torch.nn as nn 4 | import torch.optim as optim 5 | import torch.optim.lr_scheduler as lr_scheduler 6 | 7 | from src.core import register 8 | 9 | 10 | __all__ = ['AdamW', 'SGD', 'Adam', 'MultiStepLR', 'CosineAnnealingLR', 'OneCycleLR', 'LambdaLR'] 11 | 12 | 13 | 14 | SGD = register(optim.SGD) 15 | Adam = register(optim.Adam) 16 | AdamW = register(optim.AdamW) 17 | 18 | 19 | MultiStepLR = register(lr_scheduler.MultiStepLR) 20 | CosineAnnealingLR = register(lr_scheduler.CosineAnnealingLR) 21 | OneCycleLR = register(lr_scheduler.OneCycleLR) 22 | LambdaLR = register(lr_scheduler.LambdaLR) 23 | -------------------------------------------------------------------------------- /rtdetr_pytorch/src/solver/__init__.py: -------------------------------------------------------------------------------- 1 | """by lyuwenyu 2 | """ 3 | 4 | from .solver import BaseSolver 5 | from .det_solver import DetSolver 6 | 7 | 8 | from typing import Dict 9 | 10 | TASKS :Dict[str, BaseSolver] = { 11 | 'detection': DetSolver, 12 | } -------------------------------------------------------------------------------- /rtdetr_pytorch/src/zoo/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from .rtdetr import * 3 | -------------------------------------------------------------------------------- /rtdetr_pytorch/src/zoo/rtdetr/__init__.py: -------------------------------------------------------------------------------- 1 | """by lyuwenyu 2 | """ 3 | 4 | 5 | from .rtdetr import * 6 | 7 | from .hybrid_encoder import * 8 | from .rtdetr_decoder import * 9 | from .rtdetr_postprocessor import * 10 | from .rtdetr_criterion import * 11 | 12 | from .matcher import * 13 | -------------------------------------------------------------------------------- /rtdetr_pytorch/src/zoo/rtdetr/box_ops.py: -------------------------------------------------------------------------------- 1 | ''' 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 3 | https://github.com/facebookresearch/detr/blob/main/util/box_ops.py 4 | ''' 5 | 6 | import torch 7 | from torchvision.ops.boxes import box_area 8 | 9 | 10 | def box_cxcywh_to_xyxy(x): 11 | x_c, y_c, w, h = x.unbind(-1) 12 | b = [(x_c - 0.5 * w), (y_c - 0.5 * h), 13 | (x_c + 0.5 * w), (y_c + 0.5 * h)] 14 | return torch.stack(b, dim=-1) 15 | 16 | 17 | def box_xyxy_to_cxcywh(x): 18 | x0, y0, x1, y1 = x.unbind(-1) 19 | b = [(x0 + x1) / 2, (y0 + y1) / 2, 20 | (x1 - x0), (y1 - y0)] 21 | return torch.stack(b, dim=-1) 22 | 23 | 24 | # modified from torchvision to also return the union 25 | def box_iou(boxes1, boxes2): 26 | area1 = box_area(boxes1) 27 | area2 = box_area(boxes2) 28 | 29 | lt = torch.max(boxes1[:, None, :2], boxes2[:, :2]) # [N,M,2] 30 | rb = torch.min(boxes1[:, None, 2:], boxes2[:, 2:]) # [N,M,2] 31 | 32 | wh = (rb - lt).clamp(min=0) # [N,M,2] 33 | inter = wh[:, :, 0] * wh[:, :, 1] # [N,M] 34 | 35 | union = area1[:, None] + area2 - inter 36 | 37 | iou = inter / union 38 | return iou, union 39 | 40 | 41 | def generalized_box_iou(boxes1, boxes2): 42 | """ 43 | Generalized IoU from https://giou.stanford.edu/ 44 | 45 | The boxes should be in [x0, y0, x1, y1] format 46 | 47 | Returns a [N, M] pairwise matrix, where N = len(boxes1) 48 | and M = len(boxes2) 49 | """ 50 | # degenerate boxes gives inf / nan results 51 | # so do an early check 52 | assert (boxes1[:, 2:] >= boxes1[:, :2]).all() 53 | assert (boxes2[:, 2:] >= boxes2[:, :2]).all() 54 | iou, union = box_iou(boxes1, boxes2) 55 | 56 | lt = torch.min(boxes1[:, None, :2], boxes2[:, :2]) 57 | rb = torch.max(boxes1[:, None, 2:], boxes2[:, 2:]) 58 | 59 | wh = (rb - lt).clamp(min=0) # [N,M,2] 60 | area = wh[:, :, 0] * wh[:, :, 1] 61 | 62 | return iou - (area - union) / area 63 | 64 | 65 | def masks_to_boxes(masks): 66 | """Compute the bounding boxes around the provided masks 67 | 68 | The masks should be in format [N, H, W] where N is the number of masks, (H, W) are the spatial dimensions. 69 | 70 | Returns a [N, 4] tensors, with the boxes in xyxy format 71 | """ 72 | if masks.numel() == 0: 73 | return torch.zeros((0, 4), device=masks.device) 74 | 75 | h, w = masks.shape[-2:] 76 | 77 | y = torch.arange(0, h, dtype=torch.float) 78 | x = torch.arange(0, w, dtype=torch.float) 79 | y, x = torch.meshgrid(y, x) 80 | 81 | x_mask = (masks * x.unsqueeze(0)) 82 | x_max = x_mask.flatten(1).max(-1)[0] 83 | x_min = x_mask.masked_fill(~(masks.bool()), 1e8).flatten(1).min(-1)[0] 84 | 85 | y_mask = (masks * y.unsqueeze(0)) 86 | y_max = y_mask.flatten(1).max(-1)[0] 87 | y_min = y_mask.masked_fill(~(masks.bool()), 1e8).flatten(1).min(-1)[0] 88 | 89 | return torch.stack([x_min, y_min, x_max, y_max], 1) -------------------------------------------------------------------------------- /rtdetr_pytorch/src/zoo/rtdetr/rtdetr.py: -------------------------------------------------------------------------------- 1 | """by lyuwenyu 2 | """ 3 | 4 | import torch 5 | import torch.nn as nn 6 | import torch.nn.functional as F 7 | 8 | import random 9 | import numpy as np 10 | 11 | from src.core import register 12 | 13 | 14 | __all__ = ['RTDETR', ] 15 | 16 | 17 | @register 18 | class RTDETR(nn.Module): 19 | __inject__ = ['backbone', 'encoder', 'decoder', ] 20 | 21 | def __init__(self, backbone: nn.Module, encoder, decoder, multi_scale=None): 22 | super().__init__() 23 | self.backbone = backbone 24 | self.decoder = decoder 25 | self.encoder = encoder 26 | self.multi_scale = multi_scale 27 | 28 | def forward(self, x, targets=None): 29 | if self.multi_scale and self.training: 30 | sz = np.random.choice(self.multi_scale) 31 | x = F.interpolate(x, size=[sz, sz]) 32 | 33 | x = self.backbone(x) 34 | x = self.encoder(x) 35 | x = self.decoder(x, targets) 36 | 37 | return x 38 | 39 | def deploy(self, ): 40 | self.eval() 41 | for m in self.modules(): 42 | if hasattr(m, 'convert_to_deploy'): 43 | m.convert_to_deploy() 44 | return self 45 | -------------------------------------------------------------------------------- /rtdetr_pytorch/src/zoo/rtdetr/rtdetr_postprocessor.py: -------------------------------------------------------------------------------- 1 | """by lyuwenyu 2 | """ 3 | 4 | import torch 5 | import torch.nn as nn 6 | import torch.nn.functional as F 7 | 8 | import torchvision 9 | 10 | from src.core import register 11 | 12 | 13 | __all__ = ['RTDETRPostProcessor'] 14 | 15 | 16 | @register 17 | class RTDETRPostProcessor(nn.Module): 18 | __share__ = ['num_classes', 'use_focal_loss', 'num_top_queries', 'remap_mscoco_category'] 19 | 20 | def __init__(self, num_classes=80, use_focal_loss=True, num_top_queries=300, remap_mscoco_category=False) -> None: 21 | super().__init__() 22 | self.use_focal_loss = use_focal_loss 23 | self.num_top_queries = num_top_queries 24 | self.num_classes = num_classes 25 | self.remap_mscoco_category = remap_mscoco_category 26 | self.deploy_mode = False 27 | 28 | def extra_repr(self) -> str: 29 | return f'use_focal_loss={self.use_focal_loss}, num_classes={self.num_classes}, num_top_queries={self.num_top_queries}' 30 | 31 | # def forward(self, outputs, orig_target_sizes): 32 | def forward(self, outputs, orig_target_sizes): 33 | 34 | logits, boxes = outputs['pred_logits'], outputs['pred_boxes'] 35 | # orig_target_sizes = torch.stack([t["orig_size"] for t in targets], dim=0) 36 | 37 | bbox_pred = torchvision.ops.box_convert(boxes, in_fmt='cxcywh', out_fmt='xyxy') 38 | bbox_pred *= orig_target_sizes.repeat(1, 2).unsqueeze(1) 39 | 40 | if self.use_focal_loss: 41 | scores = F.sigmoid(logits) 42 | scores, index = torch.topk(scores.flatten(1), self.num_top_queries, axis=-1) 43 | labels = index % self.num_classes 44 | index = index // self.num_classes 45 | boxes = bbox_pred.gather(dim=1, index=index.unsqueeze(-1).repeat(1, 1, bbox_pred.shape[-1])) 46 | 47 | else: 48 | scores = F.softmax(logits)[:, :, :-1] 49 | scores, labels = scores.max(dim=-1) 50 | boxes = bbox_pred 51 | if scores.shape[1] > self.num_top_queries: 52 | scores, index = torch.topk(scores, self.num_top_queries, dim=-1) 53 | labels = torch.gather(labels, dim=1, index=index) 54 | boxes = torch.gather(boxes, dim=1, index=index.unsqueeze(-1).tile(1, 1, boxes.shape[-1])) 55 | 56 | # TODO for onnx export 57 | if self.deploy_mode: 58 | return labels, boxes, scores 59 | 60 | # TODO 61 | if self.remap_mscoco_category: 62 | from ...data.coco import mscoco_label2category 63 | labels = torch.tensor([mscoco_label2category[int(x.item())] for x in labels.flatten()])\ 64 | .to(boxes.device).reshape(labels.shape) 65 | 66 | results = [] 67 | for lab, box, sco in zip(labels, boxes, scores): 68 | result = dict(labels=lab, boxes=box, scores=sco) 69 | results.append(result) 70 | 71 | return results 72 | 73 | 74 | def deploy(self, ): 75 | self.eval() 76 | self.deploy_mode = True 77 | return self 78 | 79 | @property 80 | def iou_types(self, ): 81 | return ('bbox', ) 82 | -------------------------------------------------------------------------------- /rtdetr_pytorch/tools/README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | Train/test script examples 4 | - `CUDA_VISIBLE_DEVICES=0,1,2,3 torchrun --nproc_per_node=4 --master-port=8989 tools/train.py -c path/to/config &> train.log 2>&1 &` 5 | - `-r path/to/checkpoint` 6 | - `--amp` 7 | - `--test-only` 8 | 9 | 10 | Tuning script examples 11 | - `torchrun --master_port=8844 --nproc_per_node=4 tools/train.py -c configs/rtdetr/rtdetr_r18vd_6x_coco.yml -t https://github.com/lyuwenyu/storage/releases/download/v0.1/rtdetr_r18vd_5x_coco_objects365_from_paddle.pth` 12 | 13 | 14 | Export script examples 15 | - `python tools/export_onnx.py -c path/to/config -r path/to/checkpoint --check` 16 | 17 | 18 | GPU do not release memory 19 | - `ps aux | grep "tools/train.py" | awk '{print $2}' | xargs kill -9` 20 | 21 | 22 | Save all logs 23 | - Appending `&> train.log 2>&1 &` or `&> train.log 2>&1` 24 | 25 | -------------------------------------------------------------------------------- /rtdetr_pytorch/tools/train.py: -------------------------------------------------------------------------------- 1 | """by lyuwenyu 2 | """ 3 | 4 | import os 5 | import sys 6 | sys.path.insert(0, os.path.join(os.path.dirname(os.path.abspath(__file__)), '..')) 7 | import argparse 8 | 9 | import src.misc.dist as dist 10 | from src.core import YAMLConfig 11 | from src.solver import TASKS 12 | 13 | 14 | def main(args, ) -> None: 15 | '''main 16 | ''' 17 | dist.init_distributed() 18 | if args.seed is not None: 19 | dist.set_seed(args.seed) 20 | 21 | assert not all([args.tuning, args.resume]), \ 22 | 'Only support from_scrach or resume or tuning at one time' 23 | 24 | cfg = YAMLConfig( 25 | args.config, 26 | resume=args.resume, 27 | use_amp=args.amp, 28 | tuning=args.tuning 29 | ) 30 | 31 | solver = TASKS[cfg.yaml_cfg['task']](cfg) 32 | 33 | if args.test_only: 34 | solver.val() 35 | else: 36 | solver.fit() 37 | 38 | 39 | if __name__ == '__main__': 40 | 41 | parser = argparse.ArgumentParser() 42 | parser.add_argument('--config', '-c', type=str, ) 43 | parser.add_argument('--resume', '-r', type=str, ) 44 | parser.add_argument('--tuning', '-t', type=str, ) 45 | parser.add_argument('--test-only', action='store_true', default=False,) 46 | parser.add_argument('--amp', action='store_true', default=False,) 47 | parser.add_argument('--seed', type=int, help='seed',) 48 | args = parser.parse_args() 49 | 50 | main(args) 51 | -------------------------------------------------------------------------------- /rtdetrv2_paddle/readme.md: -------------------------------------------------------------------------------- 1 | see https://github.com/PaddlePaddle/PaddleDetection -------------------------------------------------------------------------------- /rtdetrv2_pytorch/Dockerfile: -------------------------------------------------------------------------------- 1 | # tensorrt:23.01-py3 (8.5.2.2) 2 | FROM nvcr.io/nvidia/tensorrt:23.01-py3 3 | 4 | WORKDIR /workspace 5 | 6 | COPY requirements.txt . 7 | 8 | RUN pip install --upgrade pip && \ 9 | pip install -r requirements.txt 10 | 11 | CMD ["/bin/bash"] 12 | -------------------------------------------------------------------------------- /rtdetrv2_pytorch/configs/dataset/coco_detection.yml: -------------------------------------------------------------------------------- 1 | task: detection 2 | 3 | evaluator: 4 | type: CocoEvaluator 5 | iou_types: ['bbox', ] 6 | 7 | # num_classes: 365 8 | # remap_mscoco_category: False 9 | 10 | # num_classes: 91 11 | # remap_mscoco_category: False 12 | 13 | num_classes: 80 14 | remap_mscoco_category: True 15 | 16 | 17 | train_dataloader: 18 | type: DataLoader 19 | dataset: 20 | type: CocoDetection 21 | img_folder: ./dataset/coco/train2017/ 22 | ann_file: ./dataset/coco/annotations/instances_train2017.json 23 | return_masks: False 24 | transforms: 25 | type: Compose 26 | ops: ~ 27 | shuffle: True 28 | num_workers: 4 29 | drop_last: True 30 | collate_fn: 31 | type: BatchImageCollateFuncion 32 | 33 | 34 | val_dataloader: 35 | type: DataLoader 36 | dataset: 37 | type: CocoDetection 38 | img_folder: ./dataset/coco/val2017/ 39 | ann_file: ./dataset/coco/annotations/instances_val2017.json 40 | return_masks: False 41 | transforms: 42 | type: Compose 43 | ops: ~ 44 | shuffle: False 45 | num_workers: 4 46 | drop_last: False 47 | collate_fn: 48 | type: BatchImageCollateFuncion 49 | -------------------------------------------------------------------------------- /rtdetrv2_pytorch/configs/dataset/voc_detection.yml: -------------------------------------------------------------------------------- 1 | task: detection 2 | 3 | evaluator: 4 | type: CocoEvaluator 5 | iou_types: ['bbox', ] 6 | 7 | num_classes: 20 8 | 9 | train_dataloader: 10 | type: DataLoader 11 | dataset: 12 | type: VOCDetection 13 | root: ./dataset/voc/ 14 | ann_file: trainval.txt 15 | label_file: label_list.txt 16 | transforms: 17 | type: Compose 18 | ops: ~ 19 | shuffle: True 20 | num_workers: 4 21 | drop_last: True 22 | collate_fn: 23 | type: BatchImageCollateFuncion 24 | 25 | 26 | val_dataloader: 27 | type: DataLoader 28 | dataset: 29 | type: VOCDetection 30 | root: ./dataset/voc/ 31 | ann_file: test.txt 32 | label_file: label_list.txt 33 | transforms: 34 | type: Compose 35 | ops: ~ 36 | shuffle: False 37 | num_workers: 4 38 | drop_last: False 39 | collate_fn: 40 | type: BatchImageCollateFuncion 41 | -------------------------------------------------------------------------------- /rtdetrv2_pytorch/configs/rtdetr/include/dataloader.yml: -------------------------------------------------------------------------------- 1 | 2 | train_dataloader: 3 | dataset: 4 | return_masks: False 5 | transforms: 6 | ops: 7 | - {type: RandomPhotometricDistort, p: 0.5} 8 | - {type: RandomZoomOut, fill: 0} 9 | - {type: RandomIoUCrop, p: 0.8} 10 | - {type: SanitizeBoundingBoxes, min_size: 1} 11 | - {type: RandomHorizontalFlip} 12 | - {type: Resize, size: [640, 640], } 13 | - {type: SanitizeBoundingBoxes, min_size: 1} 14 | - {type: ConvertPILImage, dtype: 'float32', scale: True} 15 | - {type: ConvertBoxes, fmt: 'cxcywh', normalize: True} 16 | collate_fn: 17 | type: BatchImageCollateFuncion 18 | scales: [480, 512, 544, 576, 608, 640, 640, 640, 672, 704, 736, 768, 800] 19 | shuffle: True 20 | num_workers: 4 21 | total_batch_size: 16 22 | 23 | val_dataloader: 24 | dataset: 25 | transforms: 26 | ops: 27 | - {type: Resize, size: [640, 640]} 28 | - {type: ConvertPILImage, dtype: 'float32', scale: True} 29 | shuffle: False 30 | total_batch_size: 16 31 | num_workers: 8 -------------------------------------------------------------------------------- /rtdetrv2_pytorch/configs/rtdetr/include/optimizer.yml: -------------------------------------------------------------------------------- 1 | 2 | use_ema: True 3 | ema: 4 | type: ModelEMA 5 | decay: 0.9999 6 | warmups: 2000 7 | 8 | 9 | epoches: 72 10 | clip_max_norm: 0.1 11 | 12 | 13 | optimizer: 14 | type: AdamW 15 | params: 16 | - 17 | params: '^(?=.*backbone)(?!.*(?:norm|bn)).*$' 18 | lr: 0.00001 19 | - 20 | params: '^(?=.*backbone)(?=.*(?:norm|bn)).*$' 21 | weight_decay: 0. 22 | lr: 0.00001 23 | - 24 | params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn|bias)).*$' 25 | weight_decay: 0. 26 | 27 | lr: 0.0001 28 | betas: [0.9, 0.999] 29 | weight_decay: 0.0001 30 | 31 | 32 | lr_scheduler: 33 | type: MultiStepLR 34 | milestones: [1000] 35 | gamma: 0.1 36 | 37 | 38 | lr_warmup_scheduler: 39 | type: LinearWarmup 40 | warmup_duration: 2000 -------------------------------------------------------------------------------- /rtdetrv2_pytorch/configs/rtdetr/include/rtdetr_r50vd.yml: -------------------------------------------------------------------------------- 1 | task: detection 2 | 3 | model: RTDETR 4 | criterion: RTDETRCriterion 5 | postprocessor: RTDETRPostProcessor 6 | 7 | 8 | use_focal_loss: True 9 | eval_spatial_size: [640, 640] # h w 10 | 11 | 12 | RTDETR: 13 | backbone: PResNet 14 | encoder: HybridEncoder 15 | decoder: RTDETRTransformer 16 | 17 | 18 | PResNet: 19 | depth: 50 20 | variant: d 21 | freeze_at: 0 22 | return_idx: [1, 2, 3] 23 | num_stages: 4 24 | freeze_norm: True 25 | pretrained: True 26 | 27 | 28 | HybridEncoder: 29 | in_channels: [512, 1024, 2048] 30 | feat_strides: [8, 16, 32] 31 | 32 | # intra 33 | hidden_dim: 256 34 | use_encoder_idx: [2] 35 | num_encoder_layers: 1 36 | nhead: 8 37 | dim_feedforward: 1024 38 | dropout: 0. 39 | enc_act: 'gelu' 40 | 41 | # cross 42 | expansion: 1.0 43 | depth_mult: 1 44 | act: 'silu' 45 | 46 | version: v1 47 | 48 | RTDETRTransformer: 49 | feat_channels: [256, 256, 256] 50 | feat_strides: [8, 16, 32] 51 | hidden_dim: 256 52 | num_levels: 3 53 | 54 | num_layers: 6 55 | num_queries: 300 56 | 57 | num_denoising: 100 58 | label_noise_ratio: 0.5 59 | box_noise_scale: 1.0 # 1.0 0.4 60 | 61 | eval_idx: -1 62 | 63 | 64 | RTDETRPostProcessor: 65 | num_top_queries: 300 66 | 67 | 68 | RTDETRCriterion: 69 | weight_dict: {loss_vfl: 1, loss_bbox: 5, loss_giou: 2,} 70 | losses: ['vfl', 'boxes', ] 71 | alpha: 0.75 72 | gamma: 2.0 73 | 74 | matcher: 75 | type: HungarianMatcher 76 | weight_dict: {cost_class: 2, cost_bbox: 5, cost_giou: 2} 77 | alpha: 0.25 78 | gamma: 2.0 79 | 80 | -------------------------------------------------------------------------------- /rtdetrv2_pytorch/configs/rtdetr/rtdetr_r101vd_6x_coco.yml: -------------------------------------------------------------------------------- 1 | 2 | __include__: [ 3 | '../dataset/coco_detection.yml', 4 | '../runtime.yml', 5 | './include/dataloader.yml', 6 | './include/optimizer.yml', 7 | './include/rtdetr_r50vd.yml', 8 | ] 9 | 10 | 11 | output_dir: ./output/rtdetr_r101vd_6x_coco 12 | 13 | 14 | PResNet: 15 | depth: 101 16 | 17 | 18 | HybridEncoder: 19 | # intra 20 | hidden_dim: 384 21 | dim_feedforward: 2048 22 | 23 | 24 | RTDETRTransformer: 25 | feat_channels: [384, 384, 384] 26 | 27 | 28 | optimizer: 29 | type: AdamW 30 | params: 31 | - 32 | params: '^(?=.*backbone)(?!.*norm|bn).*$' 33 | lr: 0.000001 34 | - 35 | params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn)).*$' 36 | weight_decay: 0. 37 | 38 | lr: 0.0001 39 | betas: [0.9, 0.999] 40 | weight_decay: 0.0001 41 | 42 | -------------------------------------------------------------------------------- /rtdetrv2_pytorch/configs/rtdetr/rtdetr_r18vd_6x_coco.yml: -------------------------------------------------------------------------------- 1 | 2 | __include__: [ 3 | '../dataset/coco_detection.yml', 4 | '../runtime.yml', 5 | './include/dataloader.yml', 6 | './include/optimizer.yml', 7 | './include/rtdetr_r50vd.yml', 8 | ] 9 | 10 | 11 | output_dir: ./output/rtdetr_r18vd_6x_coco 12 | 13 | 14 | PResNet: 15 | depth: 18 16 | freeze_at: -1 17 | freeze_norm: False 18 | pretrained: True 19 | 20 | 21 | HybridEncoder: 22 | in_channels: [128, 256, 512] 23 | hidden_dim: 256 24 | expansion: 0.5 25 | 26 | 27 | RTDETRTransformer: 28 | num_layers: 3 29 | 30 | 31 | 32 | optimizer: 33 | type: AdamW 34 | params: 35 | - 36 | params: '^(?=.*backbone)(?=.*norm|bn).*$' 37 | weight_decay: 0. 38 | lr: 0.00001 39 | - 40 | params: '^(?=.*backbone)(?!.*norm|bn).*$' 41 | lr: 0.00001 42 | - 43 | params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn|bias)).*$' 44 | weight_decay: 0. 45 | 46 | lr: 0.0001 47 | betas: [0.9, 0.999] 48 | weight_decay: 0.0001 49 | -------------------------------------------------------------------------------- /rtdetrv2_pytorch/configs/rtdetr/rtdetr_r34vd_6x_coco.yml: -------------------------------------------------------------------------------- 1 | 2 | __include__: [ 3 | '../dataset/coco_detection.yml', 4 | '../runtime.yml', 5 | './include/dataloader.yml', 6 | './include/optimizer.yml', 7 | './include/rtdetr_r50vd.yml', 8 | ] 9 | 10 | 11 | output_dir: ./output/rtdetr_r34vd_6x_coco 12 | 13 | 14 | PResNet: 15 | depth: 34 16 | freeze_at: -1 17 | freeze_norm: False 18 | pretrained: True 19 | 20 | 21 | HybridEncoder: 22 | in_channels: [128, 256, 512] 23 | hidden_dim: 256 24 | expansion: 0.5 25 | 26 | 27 | RTDETRTransformer: 28 | num_layers: 4 29 | 30 | 31 | 32 | optimizer: 33 | type: AdamW 34 | params: 35 | - 36 | params: '^(?=.*backbone)(?=.*norm|bn).*$' 37 | weight_decay: 0. 38 | lr: 0.00001 39 | - 40 | params: '^(?=.*backbone)(?!.*norm|bn).*$' 41 | lr: 0.00001 42 | - 43 | params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn|bias)).*$' 44 | weight_decay: 0. 45 | 46 | lr: 0.0001 47 | betas: [0.9, 0.999] 48 | weight_decay: 0.0001 49 | -------------------------------------------------------------------------------- /rtdetrv2_pytorch/configs/rtdetr/rtdetr_r50vd_6x_coco.yml: -------------------------------------------------------------------------------- 1 | 2 | __include__: [ 3 | '../dataset/coco_detection.yml', 4 | '../runtime.yml', 5 | './include/dataloader.yml', 6 | './include/optimizer.yml', 7 | './include/rtdetr_r50vd.yml', 8 | ] 9 | 10 | 11 | output_dir: ./output/rtdetr_r50vd_6x_coco 12 | 13 | 14 | 15 | -------------------------------------------------------------------------------- /rtdetrv2_pytorch/configs/rtdetr/rtdetr_r50vd_m_6x_coco.yml: -------------------------------------------------------------------------------- 1 | __include__: [ 2 | '../dataset/coco_detection.yml', 3 | '../runtime.yml', 4 | './include/dataloader.yml', 5 | './include/optimizer.yml', 6 | './include/rtdetr_r50vd.yml', 7 | ] 8 | 9 | output_dir: ./output/rtdetr_r50vd_m_6x_coco 10 | 11 | 12 | HybridEncoder: 13 | expansion: 0.5 14 | 15 | 16 | RTDETRTransformer: 17 | eval_idx: 2 # use 3th decoder layer to eval 18 | 19 | 20 | 21 | optimizer: 22 | type: AdamW 23 | params: 24 | - 25 | params: '^(?=.*backbone)(?!.*norm|bn).*$' 26 | lr: 0.00001 27 | - 28 | params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn|bias)).*$' 29 | weight_decay: 0. 30 | 31 | lr: 0.0001 32 | betas: [0.9, 0.999] 33 | weight_decay: 0.0001 34 | 35 | -------------------------------------------------------------------------------- /rtdetrv2_pytorch/configs/rtdetrv2/include/dataloader.yml: -------------------------------------------------------------------------------- 1 | 2 | train_dataloader: 3 | dataset: 4 | transforms: 5 | ops: 6 | - {type: RandomPhotometricDistort, p: 0.5} 7 | - {type: RandomZoomOut, fill: 0} 8 | - {type: RandomIoUCrop, p: 0.8} 9 | - {type: SanitizeBoundingBoxes, min_size: 1} 10 | - {type: RandomHorizontalFlip} 11 | - {type: Resize, size: [640, 640], } 12 | - {type: SanitizeBoundingBoxes, min_size: 1} 13 | - {type: ConvertPILImage, dtype: 'float32', scale: True} 14 | - {type: ConvertBoxes, fmt: 'cxcywh', normalize: True} 15 | policy: 16 | name: stop_epoch 17 | epoch: 71 # epoch in [71, ~) stop `ops` 18 | ops: ['RandomPhotometricDistort', 'RandomZoomOut', 'RandomIoUCrop'] 19 | 20 | collate_fn: 21 | type: BatchImageCollateFuncion 22 | scales: [480, 512, 544, 576, 608, 640, 640, 640, 672, 704, 736, 768, 800] 23 | stop_epoch: 71 # epoch in [71, ~) stop `multiscales` 24 | 25 | shuffle: True 26 | total_batch_size: 16 # total batch size equals to 16 (4 * 4) 27 | num_workers: 4 28 | 29 | 30 | val_dataloader: 31 | dataset: 32 | transforms: 33 | ops: 34 | - {type: Resize, size: [640, 640]} 35 | - {type: ConvertPILImage, dtype: 'float32', scale: True} 36 | shuffle: False 37 | total_batch_size: 32 38 | num_workers: 4 -------------------------------------------------------------------------------- /rtdetrv2_pytorch/configs/rtdetrv2/include/optimizer.yml: -------------------------------------------------------------------------------- 1 | 2 | use_amp: True 3 | use_ema: True 4 | ema: 5 | type: ModelEMA 6 | decay: 0.9999 7 | warmups: 2000 8 | 9 | 10 | epoches: 72 11 | clip_max_norm: 0.1 12 | 13 | 14 | optimizer: 15 | type: AdamW 16 | params: 17 | - 18 | params: '^(?=.*backbone)(?!.*norm).*$' 19 | lr: 0.00001 20 | - 21 | params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn)).*$' 22 | weight_decay: 0. 23 | 24 | lr: 0.0001 25 | betas: [0.9, 0.999] 26 | weight_decay: 0.0001 27 | 28 | 29 | lr_scheduler: 30 | type: MultiStepLR 31 | milestones: [1000] 32 | gamma: 0.1 33 | 34 | 35 | lr_warmup_scheduler: 36 | type: LinearWarmup 37 | warmup_duration: 2000 -------------------------------------------------------------------------------- /rtdetrv2_pytorch/configs/rtdetrv2/include/rtdetrv2_r50vd.yml: -------------------------------------------------------------------------------- 1 | task: detection 2 | 3 | model: RTDETR 4 | criterion: RTDETRCriterionv2 5 | postprocessor: RTDETRPostProcessor 6 | 7 | 8 | use_focal_loss: True 9 | eval_spatial_size: [640, 640] # h w 10 | 11 | 12 | RTDETR: 13 | backbone: PResNet 14 | encoder: HybridEncoder 15 | decoder: RTDETRTransformerv2 16 | 17 | 18 | PResNet: 19 | depth: 50 20 | variant: d 21 | freeze_at: 0 22 | return_idx: [1, 2, 3] 23 | num_stages: 4 24 | freeze_norm: True 25 | pretrained: True 26 | 27 | 28 | HybridEncoder: 29 | in_channels: [512, 1024, 2048] 30 | feat_strides: [8, 16, 32] 31 | 32 | # intra 33 | hidden_dim: 256 34 | use_encoder_idx: [2] 35 | num_encoder_layers: 1 36 | nhead: 8 37 | dim_feedforward: 1024 38 | dropout: 0. 39 | enc_act: 'gelu' 40 | 41 | # cross 42 | expansion: 1.0 43 | depth_mult: 1 44 | act: 'silu' 45 | 46 | 47 | RTDETRTransformerv2: 48 | feat_channels: [256, 256, 256] 49 | feat_strides: [8, 16, 32] 50 | hidden_dim: 256 51 | num_levels: 3 52 | 53 | num_layers: 6 54 | num_queries: 300 55 | 56 | num_denoising: 100 57 | label_noise_ratio: 0.5 58 | box_noise_scale: 1.0 # 1.0 0.4 59 | 60 | eval_idx: -1 61 | 62 | # NEW 63 | num_points: [4, 4, 4] # [3,3,3] [2,2,2] 64 | cross_attn_method: default # default, discrete 65 | query_select_method: default # default, agnostic 66 | 67 | 68 | RTDETRPostProcessor: 69 | num_top_queries: 300 70 | 71 | 72 | RTDETRCriterionv2: 73 | weight_dict: {loss_vfl: 1, loss_bbox: 5, loss_giou: 2,} 74 | losses: ['vfl', 'boxes', ] 75 | alpha: 0.75 76 | gamma: 2.0 77 | 78 | matcher: 79 | type: HungarianMatcher 80 | weight_dict: {cost_class: 2, cost_bbox: 5, cost_giou: 2} 81 | alpha: 0.25 82 | gamma: 2.0 83 | 84 | -------------------------------------------------------------------------------- /rtdetrv2_pytorch/configs/rtdetrv2/rtdetrv2_hgnetv2_h_6x_coco.yml: -------------------------------------------------------------------------------- 1 | __include__: [ 2 | '../dataset/coco_detection.yml', 3 | '../runtime.yml', 4 | './include/dataloader.yml', 5 | './include/optimizer.yml', 6 | './include/rtdetrv2_r50vd.yml', 7 | ] 8 | 9 | 10 | output_dir: ./output/rtdetrv2_hgnetv2_h_6x_coco 11 | 12 | 13 | RTDETR: 14 | backbone: HGNetv2 15 | 16 | 17 | HGNetv2: 18 | name: 'H' 19 | return_idx: [1, 2, 3] 20 | freeze_at: 0 21 | freeze_norm: True 22 | pretrained: True 23 | 24 | 25 | HybridEncoder: 26 | # intra 27 | hidden_dim: 512 28 | dim_feedforward: 2048 29 | num_encoder_layers: 2 30 | 31 | 32 | RTDETRTransformerv2: 33 | feat_channels: [512, 512, 512] 34 | 35 | 36 | 37 | optimizer: 38 | type: AdamW 39 | params: 40 | - 41 | params: '^(?=.*backbone)(?!.*norm|bn).*$' 42 | lr: 0.000005 43 | - 44 | params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn)).*$' 45 | weight_decay: 0. 46 | 47 | lr: 0.0001 48 | betas: [0.9, 0.999] 49 | weight_decay: 0.0001 50 | 51 | -------------------------------------------------------------------------------- /rtdetrv2_pytorch/configs/rtdetrv2/rtdetrv2_hgnetv2_l_6x_coco.yml: -------------------------------------------------------------------------------- 1 | __include__: [ 2 | '../dataset/coco_detection.yml', 3 | '../runtime.yml', 4 | './include/dataloader.yml', 5 | './include/optimizer.yml', 6 | './include/rtdetrv2_r50vd.yml', 7 | ] 8 | 9 | 10 | output_dir: ./output/rtdetrv2_hgnetv2_l_6x_coco 11 | 12 | 13 | RTDETR: 14 | backbone: HGNetv2 15 | 16 | 17 | HGNetv2: 18 | name: 'L' 19 | return_idx: [1, 2, 3] 20 | freeze_at: 0 21 | freeze_norm: True 22 | pretrained: True 23 | 24 | 25 | optimizer: 26 | type: AdamW 27 | params: 28 | - 29 | params: '^(?=.*backbone)(?!.*norm|bn).*$' 30 | lr: 0.000005 31 | - 32 | params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn)).*$' 33 | weight_decay: 0. 34 | 35 | lr: 0.0001 36 | betas: [0.9, 0.999] 37 | weight_decay: 0.0001 38 | 39 | -------------------------------------------------------------------------------- /rtdetrv2_pytorch/configs/rtdetrv2/rtdetrv2_hgnetv2_x_6x_coco.yml: -------------------------------------------------------------------------------- 1 | __include__: [ 2 | '../dataset/coco_detection.yml', 3 | '../runtime.yml', 4 | './include/dataloader.yml', 5 | './include/optimizer.yml', 6 | './include/rtdetrv2_r50vd.yml', 7 | ] 8 | 9 | 10 | output_dir: ./output/rtdetrv2_hgnetv2_x_6x_coco 11 | 12 | 13 | RTDETR: 14 | backbone: HGNetv2 15 | 16 | 17 | HGNetv2: 18 | name: 'X' 19 | return_idx: [1, 2, 3] 20 | freeze_at: 0 21 | freeze_norm: True 22 | pretrained: True 23 | 24 | 25 | 26 | HybridEncoder: 27 | # intra 28 | hidden_dim: 384 29 | dim_feedforward: 2048 30 | 31 | 32 | RTDETRTransformerv2: 33 | feat_channels: [384, 384, 384] 34 | 35 | 36 | 37 | optimizer: 38 | type: AdamW 39 | params: 40 | - 41 | params: '^(?=.*backbone)(?!.*norm|bn).*$' 42 | lr: 0.000001 43 | - 44 | params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn)).*$' 45 | weight_decay: 0. 46 | 47 | lr: 0.0001 48 | betas: [0.9, 0.999] 49 | weight_decay: 0.0001 50 | 51 | -------------------------------------------------------------------------------- /rtdetrv2_pytorch/configs/rtdetrv2/rtdetrv2_r101vd_6x_coco.yml: -------------------------------------------------------------------------------- 1 | __include__: [ 2 | '../dataset/coco_detection.yml', 3 | '../runtime.yml', 4 | './include/dataloader.yml', 5 | './include/optimizer.yml', 6 | './include/rtdetrv2_r50vd.yml', 7 | ] 8 | 9 | 10 | output_dir: ./output/rtdetrv2_r101vd_6x_coco 11 | 12 | 13 | PResNet: 14 | depth: 101 15 | 16 | 17 | HybridEncoder: 18 | # intra 19 | hidden_dim: 384 20 | dim_feedforward: 2048 21 | 22 | 23 | RTDETRTransformerv2: 24 | feat_channels: [384, 384, 384] 25 | 26 | 27 | optimizer: 28 | type: AdamW 29 | params: 30 | - 31 | params: '^(?=.*backbone)(?!.*norm|bn).*$' 32 | lr: 0.000001 33 | - 34 | params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn)).*$' 35 | weight_decay: 0. 36 | 37 | lr: 0.0001 38 | betas: [0.9, 0.999] 39 | weight_decay: 0.0001 40 | 41 | -------------------------------------------------------------------------------- /rtdetrv2_pytorch/configs/rtdetrv2/rtdetrv2_r18vd_120e_coco.yml: -------------------------------------------------------------------------------- 1 | __include__: [ 2 | '../dataset/coco_detection.yml', 3 | '../runtime.yml', 4 | './include/dataloader.yml', 5 | './include/optimizer.yml', 6 | './include/rtdetrv2_r50vd.yml', 7 | ] 8 | 9 | 10 | output_dir: ./output/rtdetrv2_r18vd_120e_coco 11 | 12 | 13 | PResNet: 14 | depth: 18 15 | freeze_at: -1 16 | freeze_norm: False 17 | pretrained: True 18 | 19 | 20 | HybridEncoder: 21 | in_channels: [128, 256, 512] 22 | hidden_dim: 256 23 | expansion: 0.5 24 | 25 | 26 | RTDETRTransformerv2: 27 | num_layers: 3 28 | 29 | 30 | epoches: 120 31 | 32 | optimizer: 33 | type: AdamW 34 | params: 35 | - 36 | params: '^(?=.*(?:norm|bn)).*$' 37 | weight_decay: 0. 38 | 39 | 40 | train_dataloader: 41 | dataset: 42 | transforms: 43 | policy: 44 | epoch: 117 45 | collate_fn: 46 | scales: ~ -------------------------------------------------------------------------------- /rtdetrv2_pytorch/configs/rtdetrv2/rtdetrv2_r18vd_120e_voc.yml: -------------------------------------------------------------------------------- 1 | __include__: [ 2 | '../dataset/voc_detection.yml', 3 | '../runtime.yml', 4 | './include/dataloader.yml', 5 | './include/optimizer.yml', 6 | './include/rtdetrv2_r50vd.yml', 7 | ] 8 | 9 | 10 | output_dir: ./output/rtdetrv2_r18vd_120e_voc 11 | 12 | 13 | PResNet: 14 | depth: 18 15 | freeze_at: -1 16 | freeze_norm: False 17 | pretrained: True 18 | 19 | 20 | HybridEncoder: 21 | in_channels: [128, 256, 512] 22 | hidden_dim: 256 23 | expansion: 0.5 24 | 25 | 26 | RTDETRTransformerv2: 27 | num_layers: 3 28 | 29 | 30 | epoches: 120 31 | 32 | optimizer: 33 | type: AdamW 34 | params: 35 | - 36 | params: '^(?=.*(?:norm|bn)).*$' 37 | weight_decay: 0. 38 | 39 | train_dataloader: 40 | dataset: 41 | transforms: 42 | policy: 43 | epoch: 117 44 | collate_fn: 45 | scales: ~ 46 | total_batch_size: 32 47 | -------------------------------------------------------------------------------- /rtdetrv2_pytorch/configs/rtdetrv2/rtdetrv2_r18vd_dsp_3x_coco.yml: -------------------------------------------------------------------------------- 1 | __include__: [ 2 | '../dataset/coco_detection.yml', 3 | '../runtime.yml', 4 | './include/dataloader.yml', 5 | './include/optimizer.yml', 6 | './include/rtdetrv2_r50vd.yml', 7 | ] 8 | 9 | 10 | tuning: https://github.com/lyuwenyu/storage/releases/download/v0.1/rtdetrv2_r18vd_120e_coco.pth 11 | 12 | output_dir: ./output/rtdetrv2_r18vd_dsp_3x_coco 13 | 14 | PResNet: 15 | depth: 18 16 | freeze_at: -1 17 | freeze_norm: False 18 | pretrained: True 19 | 20 | 21 | HybridEncoder: 22 | in_channels: [128, 256, 512] 23 | hidden_dim: 256 24 | expansion: 0.5 25 | 26 | 27 | RTDETRTransformerv2: 28 | num_layers: 3 29 | num_points: [4, 4, 4] 30 | cross_attn_method: discrete 31 | 32 | 33 | epoches: 36 34 | 35 | optimizer: 36 | type: AdamW 37 | params: 38 | - 39 | params: '^(?=.*(?:norm|bn)).*$' 40 | weight_decay: 0. 41 | 42 | 43 | train_dataloader: 44 | dataset: 45 | transforms: 46 | policy: 47 | epoch: 33 48 | collate_fn: 49 | scales: ~ -------------------------------------------------------------------------------- /rtdetrv2_pytorch/configs/rtdetrv2/rtdetrv2_r18vd_sp1_120e_coco.yml: -------------------------------------------------------------------------------- 1 | __include__: [ 2 | '../dataset/coco_detection.yml', 3 | '../runtime.yml', 4 | './include/dataloader.yml', 5 | './include/optimizer.yml', 6 | './include/rtdetrv2_r50vd.yml', 7 | ] 8 | 9 | 10 | output_dir: ./output/rtdetrv2_r18vd_sp1_120e_coco 11 | 12 | 13 | PResNet: 14 | depth: 18 15 | freeze_at: -1 16 | freeze_norm: False 17 | pretrained: True 18 | 19 | 20 | HybridEncoder: 21 | in_channels: [128, 256, 512] 22 | hidden_dim: 256 23 | expansion: 0.5 24 | 25 | 26 | RTDETRTransformerv2: 27 | num_layers: 3 28 | num_points: [1, 1, 1] 29 | 30 | 31 | epoches: 120 32 | 33 | optimizer: 34 | type: AdamW 35 | params: 36 | - 37 | params: '^(?=.*(?:norm|bn)).*$' 38 | weight_decay: 0. 39 | 40 | 41 | train_dataloader: 42 | dataset: 43 | transforms: 44 | policy: 45 | epoch: 117 46 | collate_fn: 47 | scales: ~ -------------------------------------------------------------------------------- /rtdetrv2_pytorch/configs/rtdetrv2/rtdetrv2_r18vd_sp2_120e_coco.yml: -------------------------------------------------------------------------------- 1 | __include__: [ 2 | '../dataset/coco_detection.yml', 3 | '../runtime.yml', 4 | './include/dataloader.yml', 5 | './include/optimizer.yml', 6 | './include/rtdetrv2_r50vd.yml', 7 | ] 8 | 9 | 10 | output_dir: ./output/rtdetrv2_r18vd_sp2_120e_coco 11 | 12 | 13 | PResNet: 14 | depth: 18 15 | freeze_at: -1 16 | freeze_norm: False 17 | pretrained: True 18 | 19 | 20 | HybridEncoder: 21 | in_channels: [128, 256, 512] 22 | hidden_dim: 256 23 | expansion: 0.5 24 | 25 | 26 | RTDETRTransformerv2: 27 | num_layers: 3 28 | num_points: [2, 2, 2] 29 | 30 | 31 | epoches: 120 32 | 33 | optimizer: 34 | type: AdamW 35 | params: 36 | - 37 | params: '^(?=.*(?:norm|bn)).*$' 38 | weight_decay: 0. 39 | 40 | 41 | train_dataloader: 42 | dataset: 43 | transforms: 44 | policy: 45 | epoch: 117 46 | collate_fn: 47 | scales: ~ -------------------------------------------------------------------------------- /rtdetrv2_pytorch/configs/rtdetrv2/rtdetrv2_r18vd_sp3_120e_coco.yml: -------------------------------------------------------------------------------- 1 | __include__: [ 2 | '../dataset/coco_detection.yml', 3 | '../runtime.yml', 4 | './include/dataloader.yml', 5 | './include/optimizer.yml', 6 | './include/rtdetrv2_r50vd.yml', 7 | ] 8 | 9 | 10 | output_dir: ./output/rtdetrv2_r18vd_sp3_120e_coco 11 | 12 | 13 | PResNet: 14 | depth: 18 15 | freeze_at: -1 16 | freeze_norm: False 17 | pretrained: True 18 | 19 | 20 | HybridEncoder: 21 | in_channels: [128, 256, 512] 22 | hidden_dim: 256 23 | expansion: 0.5 24 | 25 | 26 | RTDETRTransformerv2: 27 | num_layers: 3 28 | num_points: [3, 3, 3] 29 | 30 | 31 | epoches: 120 32 | 33 | optimizer: 34 | type: AdamW 35 | params: 36 | - 37 | params: '^(?=.*(?:norm|bn)).*$' 38 | weight_decay: 0. 39 | 40 | 41 | train_dataloader: 42 | dataset: 43 | transforms: 44 | policy: 45 | epoch: 117 46 | collate_fn: 47 | scales: ~ -------------------------------------------------------------------------------- /rtdetrv2_pytorch/configs/rtdetrv2/rtdetrv2_r34vd_120e_coco.yml: -------------------------------------------------------------------------------- 1 | __include__: [ 2 | '../dataset/coco_detection.yml', 3 | '../runtime.yml', 4 | './include/dataloader.yml', 5 | './include/optimizer.yml', 6 | './include/rtdetrv2_r50vd.yml', 7 | ] 8 | 9 | 10 | output_dir: ./output/rtdetrv2_r34vd_120e_coco 11 | 12 | 13 | PResNet: 14 | depth: 34 15 | freeze_at: -1 16 | freeze_norm: False 17 | pretrained: True 18 | 19 | 20 | HybridEncoder: 21 | in_channels: [128, 256, 512] 22 | hidden_dim: 256 23 | expansion: 0.5 24 | 25 | 26 | RTDETRTransformerv2: 27 | num_layers: 4 28 | 29 | 30 | epoches: 120 31 | 32 | optimizer: 33 | type: AdamW 34 | params: 35 | - 36 | params: '^(?=.*backbone)(?!.*norm|bn).*$' 37 | lr: 0.00005 38 | - 39 | params: '^(?=.*backbone)(?=.*norm|bn).*$' 40 | lr: 0.00005 41 | weight_decay: 0. 42 | - 43 | params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn|bias)).*$' 44 | weight_decay: 0. 45 | 46 | lr: 0.0001 47 | betas: [0.9, 0.999] 48 | weight_decay: 0.0001 49 | 50 | 51 | train_dataloader: 52 | dataset: 53 | transforms: 54 | policy: 55 | epoch: 117 56 | collate_fn: 57 | stop_epoch: 117 58 | -------------------------------------------------------------------------------- /rtdetrv2_pytorch/configs/rtdetrv2/rtdetrv2_r34vd_dsp_1x_coco.yml: -------------------------------------------------------------------------------- 1 | __include__: [ 2 | '../dataset/coco_detection.yml', 3 | '../runtime.yml', 4 | './include/dataloader.yml', 5 | './include/optimizer.yml', 6 | './include/rtdetrv2_r50vd.yml', 7 | ] 8 | 9 | tuning: https://github.com/lyuwenyu/storage/releases/download/v0.1/rtdetrv2_r34vd_120e_coco_ema.pth 10 | 11 | output_dir: ./output/rtdetrv2_r34vd_dsp_1x_coco 12 | 13 | 14 | PResNet: 15 | depth: 34 16 | freeze_at: -1 17 | freeze_norm: False 18 | pretrained: True 19 | 20 | 21 | HybridEncoder: 22 | in_channels: [128, 256, 512] 23 | hidden_dim: 256 24 | expansion: 0.5 25 | 26 | 27 | RTDETRTransformerv2: 28 | num_layers: 4 29 | cross_attn_method: discrete 30 | 31 | 32 | epoches: 12 33 | 34 | optimizer: 35 | type: AdamW 36 | params: 37 | - 38 | params: '^(?=.*backbone)(?!.*norm|bn).*$' 39 | lr: 0.00005 40 | - 41 | params: '^(?=.*backbone)(?=.*norm|bn).*$' 42 | lr: 0.00005 43 | weight_decay: 0. 44 | - 45 | params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn|bias)).*$' 46 | weight_decay: 0. 47 | 48 | lr: 0.0001 49 | betas: [0.9, 0.999] 50 | weight_decay: 0.0001 51 | 52 | 53 | train_dataloader: 54 | dataset: 55 | transforms: 56 | policy: 57 | epoch: 10 58 | collate_fn: 59 | stop_epoch: 10 60 | -------------------------------------------------------------------------------- /rtdetrv2_pytorch/configs/rtdetrv2/rtdetrv2_r50vd_6x_coco.yml: -------------------------------------------------------------------------------- 1 | __include__: [ 2 | '../dataset/coco_detection.yml', 3 | '../runtime.yml', 4 | './include/dataloader.yml', 5 | './include/optimizer.yml', 6 | './include/rtdetrv2_r50vd.yml', 7 | ] 8 | 9 | 10 | output_dir: ./output/rtdetrv2_r50vd_6x_coco 11 | 12 | 13 | 14 | optimizer: 15 | type: AdamW 16 | params: 17 | - 18 | params: '^(?=.*backbone)(?!.*norm).*$' 19 | lr: 0.00001 20 | - 21 | params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn)).*$' 22 | weight_decay: 0. 23 | 24 | lr: 0.0001 25 | betas: [0.9, 0.999] 26 | weight_decay: 0.0001 27 | 28 | -------------------------------------------------------------------------------- /rtdetrv2_pytorch/configs/rtdetrv2/rtdetrv2_r50vd_dsp_1x_coco.yml: -------------------------------------------------------------------------------- 1 | __include__: [ 2 | '../dataset/coco_detection.yml', 3 | '../runtime.yml', 4 | './include/dataloader.yml', 5 | './include/optimizer.yml', 6 | './include/rtdetrv2_r50vd.yml', 7 | ] 8 | 9 | 10 | tuning: https://github.com/lyuwenyu/storage/releases/download/v0.1/rtdetrv2_r50vd_6x_coco_ema.pth 11 | 12 | output_dir: ./output/rtdetrv2_r50vd_dsp_1x_coco 13 | 14 | 15 | RTDETRTransformerv2: 16 | cross_attn_method: discrete 17 | 18 | 19 | epoches: 12 20 | 21 | train_dataloader: 22 | dataset: 23 | transforms: 24 | policy: 25 | epoch: 10 26 | collate_fn: 27 | stop_epoch: 10 28 | -------------------------------------------------------------------------------- /rtdetrv2_pytorch/configs/rtdetrv2/rtdetrv2_r50vd_m_7x_coco.yml: -------------------------------------------------------------------------------- 1 | __include__: [ 2 | '../dataset/coco_detection.yml', 3 | '../runtime.yml', 4 | './include/dataloader.yml', 5 | './include/optimizer.yml', 6 | './include/rtdetrv2_r50vd.yml', 7 | ] 8 | 9 | output_dir: ./output/rtdetrv2_r50vd_m_6x_coco 10 | 11 | 12 | HybridEncoder: 13 | expansion: 0.5 14 | 15 | 16 | RTDETRTransformerv2: 17 | eval_idx: 2 # use 3th decoder layer to eval 18 | 19 | 20 | epoches: 84 21 | 22 | optimizer: 23 | type: AdamW 24 | params: 25 | - 26 | params: '^(?=.*backbone)(?!.*norm).*$' 27 | lr: 0.00001 28 | - 29 | params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn)).*$' 30 | weight_decay: 0. 31 | 32 | lr: 0.0001 33 | betas: [0.9, 0.999] 34 | weight_decay: 0.0001 35 | 36 | 37 | train_dataloader: 38 | dataset: 39 | transforms: 40 | policy: 41 | epoch: 81 42 | collate_fn: 43 | stop_epoch: 81 44 | -------------------------------------------------------------------------------- /rtdetrv2_pytorch/configs/rtdetrv2/rtdetrv2_r50vd_m_dsp_3x_coco.yml: -------------------------------------------------------------------------------- 1 | __include__: [ 2 | '../dataset/coco_detection.yml', 3 | '../runtime.yml', 4 | './include/dataloader.yml', 5 | './include/optimizer.yml', 6 | './include/rtdetrv2_r50vd.yml', 7 | ] 8 | 9 | output_dir: ./output/rtdetrv2_r50vd_m_dsp_3x_coco 10 | tuning: https://github.com/lyuwenyu/storage/releases/download/v0.1/rtdetrv2_r50vd_m_7x_coco_ema.pth 11 | 12 | HybridEncoder: 13 | expansion: 0.5 14 | 15 | 16 | RTDETRTransformerv2: 17 | eval_idx: 2 # use 3th decoder layer to eval 18 | cross_attn_method: discrete 19 | 20 | 21 | epoches: 36 22 | 23 | optimizer: 24 | type: AdamW 25 | params: 26 | - 27 | params: '^(?=.*backbone)(?!.*norm).*$' 28 | lr: 0.00001 29 | - 30 | params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn)).*$' 31 | weight_decay: 0. 32 | 33 | lr: 0.0001 34 | betas: [0.9, 0.999] 35 | weight_decay: 0.0001 36 | 37 | 38 | train_dataloader: 39 | dataset: 40 | transforms: 41 | policy: 42 | epoch: 33 43 | collate_fn: 44 | stop_epoch: 33 45 | -------------------------------------------------------------------------------- /rtdetrv2_pytorch/configs/runtime.yml: -------------------------------------------------------------------------------- 1 | 2 | print_freq: 100 3 | output_dir: './logs' 4 | checkpoint_freq: 1 5 | 6 | 7 | sync_bn: True 8 | find_unused_parameters: False 9 | 10 | 11 | use_amp: False 12 | scaler: 13 | type: GradScaler 14 | enabled: True 15 | 16 | 17 | use_ema: False 18 | ema: 19 | type: ModelEMA 20 | decay: 0.9999 21 | warmups: 2000 22 | -------------------------------------------------------------------------------- /rtdetrv2_pytorch/dataset/readme.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | ``` 4 | # configs/dataset/xxx.yml 5 | ln -s /path/to/dataset/ ./dataset/dataset_name 6 | ``` 7 | -------------------------------------------------------------------------------- /rtdetrv2_pytorch/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: "3.9" 2 | 3 | services: 4 | tensorrt-container: 5 | build: 6 | context: . 7 | dockerfile: Dockerfile 8 | image: rtdetr-v2:23.01 9 | volumes: 10 | - ./:/workspace 11 | runtime: nvidia 12 | environment: 13 | - NVIDIA_VISIBLE_DEVICES=all 14 | stdin_open: true 15 | tty: true 16 | -------------------------------------------------------------------------------- /rtdetrv2_pytorch/references/deploy/readme.md: -------------------------------------------------------------------------------- 1 | # Deployment 2 | 3 | -------------------------------------------------------------------------------- /rtdetrv2_pytorch/references/deploy/rtdetrv2_onnxruntime.py: -------------------------------------------------------------------------------- 1 | """Copyright(c) 2023 lyuwenyu. All Rights Reserved. 2 | """ 3 | 4 | import torch 5 | import torchvision.transforms as T 6 | 7 | import numpy as np 8 | import onnxruntime as ort 9 | from PIL import Image, ImageDraw 10 | 11 | 12 | def draw(images, labels, boxes, scores, thrh = 0.6): 13 | for i, im in enumerate(images): 14 | draw = ImageDraw.Draw(im) 15 | 16 | scr = scores[i] 17 | lab = labels[i][scr > thrh] 18 | box = boxes[i][scr > thrh] 19 | 20 | for b in box: 21 | draw.rectangle(list(b), outline='red',) 22 | draw.text((b[0], b[1]), text=str(lab[i].item()), fill='blue', ) 23 | 24 | im.save(f'results_{i}.jpg') 25 | 26 | 27 | def main(args, ): 28 | """main 29 | """ 30 | sess = ort.InferenceSession(args.onnx_file) 31 | print(ort.get_device()) 32 | 33 | im_pil = Image.open(args.im_file).convert('RGB') 34 | w, h = im_pil.size 35 | orig_size = torch.tensor([w, h])[None] 36 | 37 | transforms = T.Compose([ 38 | T.Resize((640, 640)), 39 | T.ToTensor(), 40 | ]) 41 | im_data = transforms(im_pil)[None] 42 | 43 | output = sess.run( 44 | # output_names=['labels', 'boxes', 'scores'], 45 | output_names=None, 46 | input_feed={'images': im_data.data.numpy(), "orig_target_sizes": orig_size.data.numpy()} 47 | ) 48 | 49 | labels, boxes, scores = output 50 | 51 | draw([im_pil], labels, boxes, scores) 52 | 53 | 54 | if __name__ == '__main__': 55 | import argparse 56 | parser = argparse.ArgumentParser() 57 | parser.add_argument('--onnx-file', type=str, ) 58 | parser.add_argument('--im-file', type=str, ) 59 | # parser.add_argument('-d', '--device', type=str, default='cpu') 60 | args = parser.parse_args() 61 | main(args) 62 | -------------------------------------------------------------------------------- /rtdetrv2_pytorch/references/deploy/rtdetrv2_openvino.py: -------------------------------------------------------------------------------- 1 | """Copyright(c) 2023 lyuwenyu. All Rights Reserved. 2 | """ 3 | 4 | 5 | # please reference: https://github.com/guojin-yan/RT-DETR-OpenVINO -------------------------------------------------------------------------------- /rtdetrv2_pytorch/references/deploy/rtdetrv2_torch.py: -------------------------------------------------------------------------------- 1 | """Copyright(c) 2023 lyuwenyu. All Rights Reserved. 2 | """ 3 | 4 | import torch 5 | import torch.nn as nn 6 | import torchvision.transforms as T 7 | 8 | import numpy as np 9 | from PIL import Image, ImageDraw 10 | 11 | from src.core import YAMLConfig 12 | 13 | 14 | def draw(images, labels, boxes, scores, thrh = 0.6): 15 | for i, im in enumerate(images): 16 | draw = ImageDraw.Draw(im) 17 | 18 | scr = scores[i] 19 | lab = labels[i][scr > thrh] 20 | box = boxes[i][scr > thrh] 21 | scrs = scores[i][scr > thrh] 22 | 23 | for j,b in enumerate(box): 24 | draw.rectangle(list(b), outline='red',) 25 | draw.text((b[0], b[1]), text=f"{lab[j].item()} {round(scrs[j].item(),2)}", fill='blue', ) 26 | 27 | im.save(f'results_{i}.jpg') 28 | 29 | 30 | def main(args, ): 31 | """main 32 | """ 33 | cfg = YAMLConfig(args.config, resume=args.resume) 34 | 35 | if args.resume: 36 | checkpoint = torch.load(args.resume, map_location='cpu') 37 | if 'ema' in checkpoint: 38 | state = checkpoint['ema']['module'] 39 | else: 40 | state = checkpoint['model'] 41 | else: 42 | raise AttributeError('Only support resume to load model.state_dict by now.') 43 | 44 | # NOTE load train mode state -> convert to deploy mode 45 | cfg.model.load_state_dict(state) 46 | 47 | class Model(nn.Module): 48 | def __init__(self, ) -> None: 49 | super().__init__() 50 | self.model = cfg.model.deploy() 51 | self.postprocessor = cfg.postprocessor.deploy() 52 | 53 | def forward(self, images, orig_target_sizes): 54 | outputs = self.model(images) 55 | outputs = self.postprocessor(outputs, orig_target_sizes) 56 | return outputs 57 | 58 | model = Model().to(args.device) 59 | 60 | im_pil = Image.open(args.im_file).convert('RGB') 61 | w, h = im_pil.size 62 | orig_size = torch.tensor([w, h])[None].to(args.device) 63 | 64 | transforms = T.Compose([ 65 | T.Resize((640, 640)), 66 | T.ToTensor(), 67 | ]) 68 | im_data = transforms(im_pil)[None].to(args.device) 69 | 70 | output = model(im_data, orig_size) 71 | labels, boxes, scores = output 72 | 73 | draw([im_pil], labels, boxes, scores) 74 | 75 | 76 | if __name__ == '__main__': 77 | import argparse 78 | parser = argparse.ArgumentParser() 79 | parser.add_argument('-c', '--config', type=str, ) 80 | parser.add_argument('-r', '--resume', type=str, ) 81 | parser.add_argument('-f', '--im-file', type=str, ) 82 | parser.add_argument('-d', '--device', type=str, default='cpu') 83 | args = parser.parse_args() 84 | main(args) 85 | -------------------------------------------------------------------------------- /rtdetrv2_pytorch/requirements.txt: -------------------------------------------------------------------------------- 1 | torch>=2.0.1 2 | torchvision>=0.15.2 3 | faster-coco-eval>=1.6.5 4 | PyYAML 5 | tensorboard 6 | scipy 7 | pycocotools 8 | onnx 9 | onnxruntime-gpu 10 | tensorrt==8.5.2.2 -------------------------------------------------------------------------------- /rtdetrv2_pytorch/src/__init__.py: -------------------------------------------------------------------------------- 1 | """Copyright(c) 2023 lyuwenyu. All Rights Reserved. 2 | """ 3 | 4 | # for register purpose 5 | from . import optim 6 | from . import data 7 | from . import nn 8 | from . import zoo -------------------------------------------------------------------------------- /rtdetrv2_pytorch/src/core/__init__.py: -------------------------------------------------------------------------------- 1 | """Copyright(c) 2023 lyuwenyu. All Rights Reserved. 2 | """ 3 | 4 | from .workspace import GLOBAL_CONFIG, register, create 5 | from .yaml_utils import * 6 | from ._config import BaseConfig 7 | from .yaml_config import YAMLConfig 8 | -------------------------------------------------------------------------------- /rtdetrv2_pytorch/src/data/__init__.py: -------------------------------------------------------------------------------- 1 | """Copyright(c) 2023 lyuwenyu. All Rights Reserved. 2 | """ 3 | 4 | from .dataset import * 5 | from .transforms import * 6 | from .dataloader import * 7 | 8 | from ._misc import convert_to_tv_tensor 9 | 10 | 11 | 12 | 13 | # def set_epoch(self, epoch) -> None: 14 | # self.epoch = epoch 15 | # def _set_epoch_func(datasets): 16 | # """Add `set_epoch` for datasets 17 | # """ 18 | # from ..core import register 19 | # for ds in datasets: 20 | # register(ds)(set_epoch) 21 | # _set_epoch_func([CIFAR10, VOCDetection, CocoDetection]) -------------------------------------------------------------------------------- /rtdetrv2_pytorch/src/data/_misc.py: -------------------------------------------------------------------------------- 1 | """Copyright(c) 2023 lyuwenyu. All Rights Reserved. 2 | """ 3 | 4 | import importlib.metadata 5 | from torch import Tensor 6 | 7 | if importlib.metadata.version('torchvision') == '0.15.2': 8 | import torchvision 9 | torchvision.disable_beta_transforms_warning() 10 | 11 | from torchvision.datapoints import BoundingBox as BoundingBoxes 12 | from torchvision.datapoints import BoundingBoxFormat, Mask, Image, Video 13 | from torchvision.transforms.v2 import SanitizeBoundingBox as SanitizeBoundingBoxes 14 | _boxes_keys = ['format', 'spatial_size'] 15 | 16 | elif '0.17' > importlib.metadata.version('torchvision') >= '0.16': 17 | import torchvision 18 | torchvision.disable_beta_transforms_warning() 19 | 20 | from torchvision.transforms.v2 import SanitizeBoundingBoxes 21 | from torchvision.tv_tensors import ( 22 | BoundingBoxes, BoundingBoxFormat, Mask, Image, Video) 23 | _boxes_keys = ['format', 'canvas_size'] 24 | 25 | elif importlib.metadata.version('torchvision') >= '0.17': 26 | import torchvision 27 | from torchvision.transforms.v2 import SanitizeBoundingBoxes 28 | from torchvision.tv_tensors import ( 29 | BoundingBoxes, BoundingBoxFormat, Mask, Image, Video) 30 | _boxes_keys = ['format', 'canvas_size'] 31 | 32 | else: 33 | raise RuntimeError('Please make sure torchvision version >= 0.15.2') 34 | 35 | 36 | 37 | def convert_to_tv_tensor(tensor: Tensor, key: str, box_format='xyxy', spatial_size=None) -> Tensor: 38 | """ 39 | Args: 40 | tensor (Tensor): input tensor 41 | key (str): transform to key 42 | 43 | Return: 44 | Dict[str, TV_Tensor] 45 | """ 46 | assert key in ('boxes', 'masks', ), "Only support 'boxes' and 'masks'" 47 | 48 | if key == 'boxes': 49 | box_format = getattr(BoundingBoxFormat, box_format.upper()) 50 | _kwargs = dict(zip(_boxes_keys, [box_format, spatial_size])) 51 | return BoundingBoxes(tensor, **_kwargs) 52 | 53 | if key == 'masks': 54 | return Mask(tensor) 55 | 56 | -------------------------------------------------------------------------------- /rtdetrv2_pytorch/src/data/dataset/__init__.py: -------------------------------------------------------------------------------- 1 | """Copyright(c) 2023 lyuwenyu. All Rights Reserved. 2 | """ 3 | 4 | # from ._dataset import DetDataset 5 | from .cifar_dataset import CIFAR10 6 | from .coco_dataset import CocoDetection 7 | from .coco_dataset import ( 8 | CocoDetection, 9 | mscoco_category2name, 10 | mscoco_category2label, 11 | mscoco_label2category, 12 | ) 13 | from .coco_eval import CocoEvaluator 14 | from .coco_utils import get_coco_api_from_dataset 15 | from .voc_detection import VOCDetection 16 | from .voc_eval import VOCEvaluator 17 | -------------------------------------------------------------------------------- /rtdetrv2_pytorch/src/data/dataset/_dataset.py: -------------------------------------------------------------------------------- 1 | """Copyright(c) 2023 lyuwenyu. All Rights Reserved. 2 | """ 3 | 4 | import torch 5 | import torch.utils.data as data 6 | 7 | class DetDataset(data.Dataset): 8 | def __getitem__(self, index): 9 | img, target = self.load_item(index) 10 | if self.transforms is not None: 11 | img, target, _ = self.transforms(img, target, self) 12 | return img, target 13 | 14 | def load_item(self, index): 15 | raise NotImplementedError("Please implement this function to return item before `transforms`.") 16 | 17 | def set_epoch(self, epoch) -> None: 18 | self._epoch = epoch 19 | 20 | @property 21 | def epoch(self): 22 | return self._epoch if hasattr(self, '_epoch') else -1 23 | -------------------------------------------------------------------------------- /rtdetrv2_pytorch/src/data/dataset/cifar_dataset.py: -------------------------------------------------------------------------------- 1 | """Copyright(c) 2023 lyuwenyu. All Rights Reserved. 2 | """ 3 | 4 | 5 | import torchvision 6 | from typing import Optional, Callable 7 | 8 | from ...core import register 9 | 10 | @register() 11 | class CIFAR10(torchvision.datasets.CIFAR10): 12 | __inject__ = ['transform', 'target_transform'] 13 | 14 | def __init__(self, root: str, train: bool = True, transform: Optional[Callable] = None, target_transform: Optional[Callable] = None, download: bool = False) -> None: 15 | super().__init__(root, train, transform, target_transform, download) 16 | 17 | -------------------------------------------------------------------------------- /rtdetrv2_pytorch/src/data/dataset/voc_detection.py: -------------------------------------------------------------------------------- 1 | """Copyright(c) 2023 lyuwenyu. All Rights Reserved. 2 | """ 3 | 4 | from sympy import im 5 | import torch 6 | import torchvision 7 | import torchvision.transforms.functional as TVF 8 | 9 | import os 10 | from PIL import Image 11 | from typing import Optional, Callable 12 | 13 | try: 14 | from defusedxml.ElementTree import parse as ET_parse 15 | except ImportError: 16 | from xml.etree.ElementTree import parse as ET_parse 17 | 18 | from ._dataset import DetDataset 19 | from .._misc import convert_to_tv_tensor 20 | from ...core import register 21 | 22 | @register() 23 | class VOCDetection(torchvision.datasets.VOCDetection, DetDataset): 24 | __inject__ = ['transforms', ] 25 | 26 | def __init__(self, root: str, ann_file: str = "trainval.txt", label_file: str = "label_list.txt", transforms: Optional[Callable] = None): 27 | 28 | with open(os.path.join(root, ann_file), 'r') as f: 29 | lines = [x.strip() for x in f.readlines()] 30 | lines = [x.split(' ') for x in lines] 31 | 32 | self.images = [os.path.join(root, lin[0]) for lin in lines] 33 | self.targets = [os.path.join(root, lin[1]) for lin in lines] 34 | assert len(self.images) == len(self.targets) 35 | 36 | with open(os.path.join(root + label_file), 'r') as f: 37 | labels = f.readlines() 38 | labels = [lab.strip() for lab in labels] 39 | 40 | self.transforms = transforms 41 | self.labels_map = {lab: i for i, lab in enumerate(labels)} 42 | 43 | def __getitem__(self, index: int): 44 | image, target = self.load_item(index) 45 | if self.transforms is not None: 46 | image, target, _ = self.transforms(image, target, self) 47 | # target["orig_size"] = torch.tensor(TVF.get_image_size(image)) 48 | return image, target 49 | 50 | def load_item(self, index: int): 51 | image = Image.open(self.images[index]).convert("RGB") 52 | target = self.parse_voc_xml(ET_parse(self.annotations[index]).getroot()) 53 | 54 | output = {} 55 | output["image_id"] = torch.tensor([index]) 56 | for k in ['area', 'boxes', 'labels', 'iscrowd']: 57 | output[k] = [] 58 | 59 | for blob in target['annotation']['object']: 60 | box = [float(v) for v in blob['bndbox'].values()] 61 | output["boxes"].append(box) 62 | output["labels"].append(blob['name']) 63 | output["area"].append((box[2] - box[0]) * (box[3] - box[1])) 64 | output["iscrowd"].append(0) 65 | 66 | w, h = image.size 67 | boxes = torch.tensor(output["boxes"]) if len(output["boxes"]) > 0 else torch.zeros(0, 4) 68 | output['boxes'] = convert_to_tv_tensor(boxes, 'boxes', box_format='xyxy', spatial_size=[h, w]) 69 | output['labels'] = torch.tensor([self.labels_map[lab] for lab in output["labels"]]) 70 | output['area'] = torch.tensor(output['area']) 71 | output["iscrowd"] = torch.tensor(output["iscrowd"]) 72 | output["orig_size"] = torch.tensor([w, h]) 73 | 74 | return image, output 75 | 76 | -------------------------------------------------------------------------------- /rtdetrv2_pytorch/src/data/dataset/voc_eval.py: -------------------------------------------------------------------------------- 1 | """Copyright(c) 2023 lyuwenyu. All Rights Reserved. 2 | """ 3 | 4 | import torch 5 | import torchvision 6 | 7 | 8 | class VOCEvaluator(object): 9 | def __init__(self) -> None: 10 | pass -------------------------------------------------------------------------------- /rtdetrv2_pytorch/src/data/transforms/__init__.py: -------------------------------------------------------------------------------- 1 | """"Copyright(c) 2023 lyuwenyu. All Rights Reserved. 2 | """ 3 | 4 | 5 | from ._transforms import ( 6 | EmptyTransform, 7 | RandomPhotometricDistort, 8 | RandomZoomOut, 9 | RandomIoUCrop, 10 | RandomHorizontalFlip, 11 | Resize, 12 | PadToSize, 13 | SanitizeBoundingBoxes, 14 | RandomCrop, 15 | Normalize, 16 | ConvertBoxes, 17 | ConvertPILImage, 18 | ) 19 | from .container import Compose 20 | from .mosaic import Mosaic 21 | -------------------------------------------------------------------------------- /rtdetrv2_pytorch/src/data/transforms/container.py: -------------------------------------------------------------------------------- 1 | """"Copyright(c) 2023 lyuwenyu. All Rights Reserved. 2 | """ 3 | 4 | import torch 5 | import torch.nn as nn 6 | 7 | import torchvision 8 | torchvision.disable_beta_transforms_warning() 9 | import torchvision.transforms.v2 as T 10 | 11 | from typing import Any, Dict, List, Optional 12 | 13 | from ._transforms import EmptyTransform 14 | from ...core import register, GLOBAL_CONFIG 15 | 16 | 17 | @register() 18 | class Compose(T.Compose): 19 | def __init__(self, ops, policy=None) -> None: 20 | transforms = [] 21 | if ops is not None: 22 | for op in ops: 23 | if isinstance(op, dict): 24 | name = op.pop('type') 25 | transfom = getattr(GLOBAL_CONFIG[name]['_pymodule'], GLOBAL_CONFIG[name]['_name'])(**op) 26 | transforms.append(transfom) 27 | op['type'] = name 28 | 29 | elif isinstance(op, nn.Module): 30 | transforms.append(op) 31 | 32 | else: 33 | raise ValueError('') 34 | else: 35 | transforms =[EmptyTransform(), ] 36 | 37 | super().__init__(transforms=transforms) 38 | 39 | if policy is None: 40 | policy = {'name': 'default'} 41 | 42 | self.policy = policy 43 | self.global_samples = 0 44 | 45 | def forward(self, *inputs: Any) -> Any: 46 | return self.get_forward(self.policy['name'])(*inputs) 47 | 48 | def get_forward(self, name): 49 | forwards = { 50 | 'default': self.default_forward, 51 | 'stop_epoch': self.stop_epoch_forward, 52 | 'stop_sample': self.stop_sample_forward, 53 | } 54 | return forwards[name] 55 | 56 | def default_forward(self, *inputs: Any) -> Any: 57 | sample = inputs if len(inputs) > 1 else inputs[0] 58 | for transform in self.transforms: 59 | sample = transform(sample) 60 | return sample 61 | 62 | def stop_epoch_forward(self, *inputs: Any): 63 | sample = inputs if len(inputs) > 1 else inputs[0] 64 | dataset = sample[-1] 65 | 66 | cur_epoch = dataset.epoch 67 | policy_ops = self.policy['ops'] 68 | policy_epoch = self.policy['epoch'] 69 | 70 | for transform in self.transforms: 71 | if type(transform).__name__ in policy_ops and cur_epoch >= policy_epoch: 72 | pass 73 | else: 74 | sample = transform(sample) 75 | 76 | return sample 77 | 78 | 79 | def stop_sample_forward(self, *inputs: Any): 80 | sample = inputs if len(inputs) > 1 else inputs[0] 81 | dataset = sample[-1] 82 | 83 | cur_epoch = dataset.epoch 84 | policy_ops = self.policy['ops'] 85 | policy_sample = self.policy['sample'] 86 | 87 | for transform in self.transforms: 88 | if type(transform).__name__ in policy_ops and self.global_samples >= policy_sample: 89 | pass 90 | else: 91 | sample = transform(sample) 92 | 93 | self.global_samples += 1 94 | 95 | return sample 96 | -------------------------------------------------------------------------------- /rtdetrv2_pytorch/src/data/transforms/mosaic.py: -------------------------------------------------------------------------------- 1 | """"Copyright(c) 2023 lyuwenyu. All Rights Reserved. 2 | """ 3 | 4 | import torch 5 | import torchvision 6 | torchvision.disable_beta_transforms_warning() 7 | import torchvision.transforms.v2 as T 8 | import torchvision.transforms.v2.functional as F 9 | 10 | import random 11 | from PIL import Image 12 | 13 | from .._misc import convert_to_tv_tensor 14 | from ...core import register 15 | 16 | 17 | @register() 18 | class Mosaic(T.Transform): 19 | def __init__(self, size, max_size=None, ) -> None: 20 | super().__init__() 21 | self.resize = T.Resize(size=size, max_size=max_size) 22 | self.crop = T.RandomCrop(size=max_size if max_size else size) 23 | 24 | # TODO add arg `output_size` for affine` 25 | # self.random_perspective = T.RandomPerspective(distortion_scale=0.5, p=1., ) 26 | self.random_affine = T.RandomAffine(degrees=0, translate=(0.1, 0.1), scale=(0.5, 1.5), fill=114) 27 | 28 | def forward(self, *inputs): 29 | inputs = inputs if len(inputs) > 1 else inputs[0] 30 | image, target, dataset = inputs 31 | 32 | images = [] 33 | targets = [] 34 | indices = random.choices(range(len(dataset)), k=3) 35 | for i in indices: 36 | image, target = dataset.load_item(i) 37 | image, target = self.resize(image, target) 38 | images.append(image) 39 | targets.append(target) 40 | 41 | h, w = F.get_spatial_size(images[0]) 42 | offset = [[0, 0], [w, 0], [0, h], [w, h]] 43 | image = Image.new(mode=images[0].mode, size=(w * 2, h * 2), color=0) 44 | for i, im in enumerate(images): 45 | image.paste(im, offset[i]) 46 | 47 | offset = torch.tensor([[0, 0], [w, 0], [0, h], [w, h]]).repeat(1, 2) 48 | target = {} 49 | for k in targets[0]: 50 | if k == 'boxes': 51 | v = [t[k] + offset[i] for i, t in enumerate(targets)] 52 | else: 53 | v = [t[k] for t in targets] 54 | 55 | if isinstance(v[0], torch.Tensor): 56 | v = torch.cat(v, dim=0) 57 | 58 | target[k] = v 59 | 60 | if 'boxes' in target: 61 | # target['boxes'] = target['boxes'].clamp(0, 640 * 2 - 1) 62 | w, h = image.size 63 | target['boxes'] = convert_to_tv_tensor(target['boxes'], 'boxes', box_format='xyxy', spatial_size=[h, w]) 64 | 65 | if 'masks' in target: 66 | target['masks'] = convert_to_tv_tensor(target['masks'], 'masks') 67 | 68 | image, target = self.random_affine(image, target) 69 | # image, target = self.resize(image, target) 70 | image, target = self.crop(image, target) 71 | 72 | return image, target, dataset 73 | -------------------------------------------------------------------------------- /rtdetrv2_pytorch/src/data/transforms/presets.py: -------------------------------------------------------------------------------- 1 | """"Copyright(c) 2023 lyuwenyu. All Rights Reserved. 2 | """ 3 | -------------------------------------------------------------------------------- /rtdetrv2_pytorch/src/misc/__init__.py: -------------------------------------------------------------------------------- 1 | """Copyright(c) 2023 lyuwenyu. All Rights Reserved. 2 | """ 3 | 4 | from .logger import * 5 | from .visualizer import * 6 | from .dist_utils import setup_seed, setup_print 7 | from .profiler_utils import stats 8 | -------------------------------------------------------------------------------- /rtdetrv2_pytorch/src/misc/lazy_loader.py: -------------------------------------------------------------------------------- 1 | """ 2 | https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/util/lazy_loader.py 3 | """ 4 | 5 | 6 | import types 7 | import importlib 8 | 9 | class LazyLoader(types.ModuleType): 10 | """Lazily import a module, mainly to avoid pulling in large dependencies. 11 | 12 | `paddle`, and `ffmpeg` are examples of modules that are large and not always 13 | needed, and this allows them to only be loaded when they are used. 14 | """ 15 | 16 | # The lint error here is incorrect. 17 | def __init__(self, local_name, parent_module_globals, name, warning=None): 18 | self._local_name = local_name 19 | self._parent_module_globals = parent_module_globals 20 | self._warning = warning 21 | 22 | # These members allows doctest correctly process this module member without 23 | # triggering self._load(). self._load() mutates parant_module_globals and 24 | # triggers a dict mutated during iteration error from doctest.py. 25 | # - for from_module() 26 | self.__module__ = name.rsplit(".", 1)[0] 27 | # - for is_routine() 28 | self.__wrapped__ = None 29 | 30 | super(LazyLoader, self).__init__(name) 31 | 32 | def _load(self): 33 | """Load the module and insert it into the parent's globals.""" 34 | # Import the target module and insert it into the parent's namespace 35 | module = importlib.import_module(self.__name__) 36 | self._parent_module_globals[self._local_name] = module 37 | 38 | # Emit a warning if one was specified 39 | if self._warning: 40 | # logging.warning(self._warning) 41 | # Make sure to only warn once. 42 | self._warning = None 43 | 44 | # Update this object's dict so that if someone keeps a reference to the 45 | # LazyLoader, lookups are efficient (__getattr__ is only called on lookups 46 | # that fail). 47 | self.__dict__.update(module.__dict__) 48 | 49 | return module 50 | 51 | def __getattr__(self, item): 52 | module = self._load() 53 | return getattr(module, item) 54 | 55 | def __repr__(self): 56 | # Carefully to not trigger _load, since repr may be called in very 57 | # sensitive places. 58 | return f"" 59 | 60 | def __dir__(self): 61 | module = self._load() 62 | return dir(module) 63 | 64 | 65 | # import paddle.nn as nn 66 | # nn = LazyLoader("nn", globals(), "paddle.nn") 67 | 68 | # class M(nn.Layer): 69 | # def __init__(self) -> None: 70 | # super().__init__() 71 | -------------------------------------------------------------------------------- /rtdetrv2_pytorch/src/misc/profiler_utils.py: -------------------------------------------------------------------------------- 1 | """Copyright(c) 2023 lyuwenyu. All Rights Reserved. 2 | """ 3 | 4 | import re 5 | import torch 6 | import torch.nn as nn 7 | from torch import Tensor 8 | 9 | from typing import List 10 | 11 | def stats( 12 | model: nn.Module, 13 | data: Tensor=None, 14 | input_shape: List=[1, 3, 640, 640], 15 | device: str='cpu', 16 | verbose=False) -> str: 17 | 18 | is_training = model.training 19 | 20 | model.train() 21 | num_params = sum([p.numel() for p in model.parameters() if p.requires_grad]) 22 | 23 | model.eval() 24 | model = model.to(device) 25 | 26 | if data is None: 27 | data = torch.rand(*input_shape, device=device) 28 | 29 | def trace_handler(prof): 30 | print(prof.key_averages().table( 31 | sort_by="self_cuda_time_total", row_limit=-1)) 32 | 33 | num_active = 2 34 | with torch.profiler.profile( 35 | activities=[ 36 | torch.profiler.ProfilerActivity.CPU, 37 | torch.profiler.ProfilerActivity.CUDA, 38 | ], 39 | schedule=torch.profiler.schedule( 40 | wait=1, 41 | warmup=1, 42 | active=num_active, 43 | repeat=1 44 | ), 45 | # on_trace_ready=trace_handler, 46 | # on_trace_ready=torch.profiler.tensorboard_trace_handler('./log') 47 | # with_modules=True, 48 | with_flops=True, 49 | ) as p: 50 | for _ in range(5): 51 | _ = model(data) 52 | p.step() 53 | 54 | if is_training: 55 | model.train() 56 | 57 | info = p.key_averages().table(sort_by="self_cuda_time_total", row_limit=-1) 58 | num_flops = sum([float(v.strip()) for v in re.findall('(\d+.?\d+ *\n)', info)]) / num_active 59 | 60 | if verbose: 61 | # print(info) 62 | print(f'Total number of trainable parameters: {num_params}') 63 | print(f'Total number of flops: {int(num_flops)}M with {input_shape}') 64 | 65 | return {'n_parameters': num_params, 'n_flops': num_flops, 'info': info} 66 | -------------------------------------------------------------------------------- /rtdetrv2_pytorch/src/misc/visualizer.py: -------------------------------------------------------------------------------- 1 | """"Copyright(c) 2023 lyuwenyu. All Rights Reserved. 2 | """ 3 | 4 | import torch 5 | import torch.utils.data 6 | 7 | import torchvision 8 | torchvision.disable_beta_transforms_warning() 9 | 10 | import PIL 11 | 12 | __all__ = ['show_sample'] 13 | 14 | def show_sample(sample): 15 | """for coco dataset/dataloader 16 | """ 17 | import matplotlib.pyplot as plt 18 | from torchvision.transforms.v2 import functional as F 19 | from torchvision.utils import draw_bounding_boxes 20 | 21 | image, target = sample 22 | if isinstance(image, PIL.Image.Image): 23 | image = F.to_image_tensor(image) 24 | 25 | image = F.convert_dtype(image, torch.uint8) 26 | annotated_image = draw_bounding_boxes(image, target["boxes"], colors="yellow", width=3) 27 | 28 | fig, ax = plt.subplots() 29 | ax.imshow(annotated_image.permute(1, 2, 0).numpy()) 30 | ax.set(xticklabels=[], yticklabels=[], xticks=[], yticks=[]) 31 | fig.tight_layout() 32 | fig.show() 33 | plt.show() 34 | 35 | -------------------------------------------------------------------------------- /rtdetrv2_pytorch/src/nn/__init__.py: -------------------------------------------------------------------------------- 1 | """Copyright(c) 2023 lyuwenyu. All Rights Reserved. 2 | """ 3 | 4 | 5 | from .arch import * 6 | from .criterion import * 7 | from .postprocessor import * 8 | 9 | # 10 | from .backbone import * 11 | 12 | 13 | from .backbone import ( 14 | get_activation, 15 | FrozenBatchNorm2d, 16 | freeze_batch_norm2d, 17 | ) -------------------------------------------------------------------------------- /rtdetrv2_pytorch/src/nn/arch/__init__.py: -------------------------------------------------------------------------------- 1 | """Copyright(c) 2023 lyuwenyu. All Rights Reserved. 2 | """ 3 | 4 | 5 | from .classification import Classification, ClassHead 6 | from .yolo import YOLO -------------------------------------------------------------------------------- /rtdetrv2_pytorch/src/nn/arch/classification.py: -------------------------------------------------------------------------------- 1 | """Copyright(c) 2023 lyuwenyu. All Rights Reserved. 2 | """ 3 | 4 | 5 | import torch 6 | import torch.nn as nn 7 | 8 | from ...core import register 9 | 10 | 11 | __all__ = ['Classification', 'ClassHead'] 12 | 13 | 14 | @register() 15 | class Classification(torch.nn.Module): 16 | __inject__ = ['backbone', 'head'] 17 | 18 | def __init__(self, backbone: nn.Module, head: nn.Module=None): 19 | super().__init__() 20 | 21 | self.backbone = backbone 22 | self.head = head 23 | 24 | def forward(self, x): 25 | x = self.backbone(x) 26 | 27 | if self.head is not None: 28 | x = self.head(x) 29 | 30 | return x 31 | 32 | 33 | @register() 34 | class ClassHead(nn.Module): 35 | def __init__(self, hidden_dim, num_classes): 36 | super().__init__() 37 | self.pool = nn.AdaptiveAvgPool2d(1) 38 | self.proj = nn.Linear(hidden_dim, num_classes) 39 | 40 | def forward(self, x): 41 | x = x[0] if isinstance(x, (list, tuple)) else x 42 | x = self.pool(x) 43 | x = x.reshape(x.shape[0], -1) 44 | x = self.proj(x) 45 | return x 46 | -------------------------------------------------------------------------------- /rtdetrv2_pytorch/src/nn/arch/yolo.py: -------------------------------------------------------------------------------- 1 | """Copyright(c) 2023 lyuwenyu. All Rights Reserved. 2 | """ 3 | 4 | import torch 5 | 6 | from ...core import register 7 | 8 | 9 | __all__ = ['YOLO', ] 10 | 11 | 12 | @register() 13 | class YOLO(torch.nn.Module): 14 | __inject__ = ['backbone', 'neck', 'head', ] 15 | 16 | def __init__(self, backbone: torch.nn.Module, neck, head): 17 | super().__init__() 18 | self.backbone = backbone 19 | self.neck = neck 20 | self.head = head 21 | 22 | def forward(self, x, **kwargs): 23 | x = self.backbone(x) 24 | x = self.neck(x) 25 | x = self.head(x) 26 | return x 27 | 28 | def deploy(self, ): 29 | self.eval() 30 | for m in self.modules(): 31 | if m is not self and hasattr(m, 'deploy'): 32 | m.deploy() 33 | return self 34 | -------------------------------------------------------------------------------- /rtdetrv2_pytorch/src/nn/backbone/__init__.py: -------------------------------------------------------------------------------- 1 | """Copyright(c) 2023 lyuwenyu. All Rights Reserved. 2 | """ 3 | 4 | from .common import ( 5 | get_activation, 6 | FrozenBatchNorm2d, 7 | freeze_batch_norm2d, 8 | ) 9 | from .presnet import PResNet 10 | from .test_resnet import MResNet 11 | 12 | from .timm_model import TimmModel 13 | from .torchvision_model import TorchVisionModel 14 | 15 | from .csp_resnet import CSPResNet 16 | from .csp_darknet import CSPDarkNet, CSPPAN 17 | 18 | from .hgnetv2 import HGNetv2 -------------------------------------------------------------------------------- /rtdetrv2_pytorch/src/nn/backbone/test_resnet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | from collections import OrderedDict 6 | 7 | 8 | from ...core import register 9 | 10 | 11 | class BasicBlock(nn.Module): 12 | expansion = 1 13 | 14 | def __init__(self, in_planes, planes, stride=1): 15 | super(BasicBlock, self).__init__() 16 | 17 | self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False) 18 | self.bn1 = nn.BatchNorm2d(planes) 19 | 20 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,stride=1, padding=1, bias=False) 21 | self.bn2 = nn.BatchNorm2d(planes) 22 | 23 | self.shortcut = nn.Sequential() 24 | if stride != 1 or in_planes != self.expansion*planes: 25 | self.shortcut = nn.Sequential( 26 | nn.Conv2d(in_planes, self.expansion*planes,kernel_size=1, stride=stride, bias=False), 27 | nn.BatchNorm2d(self.expansion*planes) 28 | ) 29 | def forward(self, x): 30 | out = F.relu(self.bn1(self.conv1(x))) 31 | out = self.bn2(self.conv2(out)) 32 | out += self.shortcut(x) 33 | out = F.relu(out) 34 | return out 35 | 36 | 37 | 38 | class _ResNet(nn.Module): 39 | def __init__(self, block, num_blocks, num_classes=10): 40 | super().__init__() 41 | self.in_planes = 64 42 | 43 | self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False) 44 | self.bn1 = nn.BatchNorm2d(64) 45 | 46 | self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1) 47 | self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2) 48 | self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2) 49 | self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2) 50 | 51 | self.linear = nn.Linear(512 * block.expansion, num_classes) 52 | 53 | def _make_layer(self, block, planes, num_blocks, stride): 54 | strides = [stride] + [1]*(num_blocks-1) 55 | layers = [] 56 | for stride in strides: 57 | layers.append(block(self.in_planes, planes, stride)) 58 | self.in_planes = planes * block.expansion 59 | return nn.Sequential(*layers) 60 | 61 | def forward(self, x): 62 | out = F.relu(self.bn1(self.conv1(x))) 63 | out = self.layer1(out) 64 | out = self.layer2(out) 65 | out = self.layer3(out) 66 | out = self.layer4(out) 67 | out = F.avg_pool2d(out, 4) 68 | out = out.view(out.size(0), -1) 69 | out = self.linear(out) 70 | return out 71 | 72 | 73 | @register() 74 | class MResNet(nn.Module): 75 | def __init__(self, num_classes=10, num_blocks=[2, 2, 2, 2]) -> None: 76 | super().__init__() 77 | self.model = _ResNet(BasicBlock, num_blocks, num_classes) 78 | 79 | def forward(self, x): 80 | return self.model(x) 81 | 82 | -------------------------------------------------------------------------------- /rtdetrv2_pytorch/src/nn/backbone/timm_model.py: -------------------------------------------------------------------------------- 1 | """Copyright(c) 2023 lyuwenyu. All Rights Reserved. 2 | 3 | https://towardsdatascience.com/getting-started-with-pytorch-image-models-timm-a-practitioners-guide-4e77b4bf9055#0583 4 | """ 5 | 6 | import torch 7 | from torchvision.models.feature_extraction import get_graph_node_names, create_feature_extractor 8 | 9 | from .utils import IntermediateLayerGetter 10 | from ...core import register 11 | 12 | 13 | @register() 14 | class TimmModel(torch.nn.Module): 15 | def __init__(self, \ 16 | name, 17 | return_layers, 18 | pretrained=False, 19 | exportable=True, 20 | features_only=True, 21 | **kwargs) -> None: 22 | 23 | super().__init__() 24 | 25 | import timm 26 | model = timm.create_model( 27 | name, 28 | pretrained=pretrained, 29 | exportable=exportable, 30 | features_only=features_only, 31 | **kwargs 32 | ) 33 | # nodes, _ = get_graph_node_names(model) 34 | # print(nodes) 35 | # features = {'': ''} 36 | # model = create_feature_extractor(model, return_nodes=features) 37 | 38 | assert set(return_layers).issubset(model.feature_info.module_name()), \ 39 | f'return_layers should be a subset of {model.feature_info.module_name()}' 40 | 41 | # self.model = model 42 | self.model = IntermediateLayerGetter(model, return_layers) 43 | 44 | return_idx = [model.feature_info.module_name().index(name) for name in return_layers] 45 | self.strides = [model.feature_info.reduction()[i] for i in return_idx] 46 | self.channels = [model.feature_info.channels()[i] for i in return_idx] 47 | self.return_idx = return_idx 48 | self.return_layers = return_layers 49 | 50 | def forward(self, x: torch.Tensor): 51 | outputs = self.model(x) 52 | # outputs = [outputs[i] for i in self.return_idx] 53 | return outputs 54 | 55 | 56 | if __name__ == '__main__': 57 | 58 | model = TimmModel(name='resnet34', return_layers=['layer2', 'layer3']) 59 | data = torch.rand(1, 3, 640, 640) 60 | outputs = model(data) 61 | 62 | for output in outputs: 63 | print(output.shape) 64 | 65 | """ 66 | model: 67 | type: TimmModel 68 | name: resnet34 69 | return_layers: ['layer2', 'layer4'] 70 | """ 71 | -------------------------------------------------------------------------------- /rtdetrv2_pytorch/src/nn/backbone/torchvision_model.py: -------------------------------------------------------------------------------- 1 | """Copyright(c) 2023 lyuwenyu. All Rights Reserved. 2 | """ 3 | 4 | import torch 5 | import torchvision 6 | 7 | from ...core import register 8 | from .utils import IntermediateLayerGetter 9 | 10 | __all__ = ['TorchVisionModel'] 11 | 12 | @register() 13 | class TorchVisionModel(torch.nn.Module): 14 | def __init__(self, name, return_layers, weights=None, **kwargs) -> None: 15 | super().__init__() 16 | 17 | if weights is not None: 18 | weights = getattr(torchvision.models.get_model_weights(name), weights) 19 | 20 | model = torchvision.models.get_model(name, weights=weights, **kwargs) 21 | 22 | # TODO hard code. 23 | if hasattr(model, 'features'): 24 | model = IntermediateLayerGetter(model.features, return_layers) 25 | else: 26 | model = IntermediateLayerGetter(model, return_layers) 27 | 28 | self.model = model 29 | 30 | def forward(self, x): 31 | return self.model(x) 32 | 33 | 34 | # TorchVisionModel('swin_t', return_layers=['5', '7']) 35 | # TorchVisionModel('resnet34', return_layers=['layer2','layer3', 'layer4']) 36 | 37 | """ 38 | TorchVisionModel: 39 | name: swin_t 40 | return_layers: ['5', '7'] 41 | weights: DEFAULT 42 | 43 | 44 | model: 45 | type: TorchVisionModel 46 | name: resnet34 47 | return_layers: ['layer2','layer3', 'layer4'] 48 | weights: DEFAULT 49 | """ -------------------------------------------------------------------------------- /rtdetrv2_pytorch/src/nn/backbone/utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | https://github.com/pytorch/vision/blob/main/torchvision/models/_utils.py 3 | 4 | Copyright(c) 2023 lyuwenyu. All Rights Reserved. 5 | """ 6 | 7 | from collections import OrderedDict 8 | from typing import Dict, List 9 | 10 | 11 | import torch.nn as nn 12 | 13 | 14 | class IntermediateLayerGetter(nn.ModuleDict): 15 | """ 16 | Module wrapper that returns intermediate layers from a model 17 | 18 | It has a strong assumption that the modules have been registered 19 | into the model in the same order as they are used. 20 | This means that one should **not** reuse the same nn.Module 21 | twice in the forward if you want this to work. 22 | 23 | Additionally, it is only able to query submodules that are directly 24 | assigned to the model. So if `model` is passed, `model.feature1` can 25 | be returned, but not `model.feature1.layer2`. 26 | """ 27 | 28 | _version = 3 29 | 30 | def __init__(self, model: nn.Module, return_layers: List[str]) -> None: 31 | if not set(return_layers).issubset([name for name, _ in model.named_children()]): 32 | raise ValueError("return_layers are not present in model. {}"\ 33 | .format([name for name, _ in model.named_children()])) 34 | orig_return_layers = return_layers 35 | return_layers = {str(k): str(k) for k in return_layers} 36 | layers = OrderedDict() 37 | for name, module in model.named_children(): 38 | layers[name] = module 39 | if name in return_layers: 40 | del return_layers[name] 41 | if not return_layers: 42 | break 43 | 44 | super().__init__(layers) 45 | self.return_layers = orig_return_layers 46 | 47 | def forward(self, x): 48 | outputs = [] 49 | for name, module in self.items(): 50 | x = module(x) 51 | if name in self.return_layers: 52 | outputs.append(x) 53 | 54 | return outputs 55 | 56 | -------------------------------------------------------------------------------- /rtdetrv2_pytorch/src/nn/criterion/__init__.py: -------------------------------------------------------------------------------- 1 | """Copyright(c) 2023 lyuwenyu. All Rights Reserved. 2 | """ 3 | 4 | 5 | import torch.nn as nn 6 | from ...core import register 7 | 8 | from .det_criterion import DetCriterion 9 | 10 | CrossEntropyLoss = register()(nn.CrossEntropyLoss) 11 | -------------------------------------------------------------------------------- /rtdetrv2_pytorch/src/nn/postprocessor/__init__.py: -------------------------------------------------------------------------------- 1 | """Copyright(c) 2023 lyuwenyu. All Rights Reserved. 2 | """ 3 | 4 | 5 | from .nms_postprocessor import DetNMSPostProcessor -------------------------------------------------------------------------------- /rtdetrv2_pytorch/src/nn/postprocessor/box_revert.py: -------------------------------------------------------------------------------- 1 | """Copyright(c) 2023 lyuwenyu. All Rights Reserved. 2 | """ 3 | 4 | import torch 5 | import torchvision 6 | from torch import Tensor 7 | from enum import Enum 8 | 9 | 10 | class BoxProcessFormat(Enum): 11 | """Box process format 12 | 13 | Available formats are 14 | * ``RESIZE`` 15 | * ``RESIZE_KEEP_RATIO`` 16 | * ``RESIZE_KEEP_RATIO_PADDING`` 17 | """ 18 | RESIZE = 1 19 | RESIZE_KEEP_RATIO = 2 20 | RESIZE_KEEP_RATIO_PADDING = 3 21 | 22 | 23 | def box_revert( 24 | boxes: Tensor, 25 | orig_sizes: Tensor=None, 26 | eval_sizes: Tensor=None, 27 | inpt_sizes: Tensor=None, 28 | inpt_padding: Tensor=None, 29 | normalized: bool=True, 30 | in_fmt: str='cxcywh', 31 | out_fmt: str='xyxy', 32 | process_fmt=BoxProcessFormat.RESIZE, 33 | ) -> Tensor: 34 | """ 35 | Args: 36 | boxes(Tensor), [N, :, 4], (x1, y1, x2, y2), pred boxes. 37 | inpt_sizes(Tensor), [N, 2], (w, h). input sizes. 38 | orig_sizes(Tensor), [N, 2], (w, h). origin sizes. 39 | inpt_padding (Tensor), [N, 2], (w_pad, h_pad, ...). 40 | (inpt_sizes + inpt_padding) == eval_sizes 41 | """ 42 | assert in_fmt in ('cxcywh', 'xyxy'), '' 43 | 44 | if normalized and eval_sizes is not None: 45 | boxes = boxes * eval_sizes.repeat(1, 2).unsqueeze(1) 46 | 47 | if inpt_padding is not None: 48 | if in_fmt == 'xyxy': 49 | boxes -= inpt_padding[:, :2].repeat(1, 2).unsqueeze(1) 50 | elif in_fmt == 'cxcywh': 51 | boxes[..., :2] -= inpt_padding[:, :2].repeat(1, 2).unsqueeze(1) 52 | 53 | if orig_sizes is not None: 54 | orig_sizes = orig_sizes.repeat(1, 2).unsqueeze(1) 55 | if inpt_sizes is not None: 56 | inpt_sizes = inpt_sizes.repeat(1, 2).unsqueeze(1) 57 | boxes = boxes * (orig_sizes / inpt_sizes) 58 | else: 59 | boxes = boxes * orig_sizes 60 | 61 | boxes = torchvision.ops.box_convert(boxes, in_fmt=in_fmt, out_fmt=out_fmt) 62 | return boxes 63 | -------------------------------------------------------------------------------- /rtdetrv2_pytorch/src/nn/postprocessor/detr_postprocessor.py: -------------------------------------------------------------------------------- 1 | """Copyright(c) 2023 lyuwenyu. All Rights Reserved. 2 | """ 3 | 4 | import torch 5 | import torch.nn as nn 6 | import torch.nn.functional as F 7 | 8 | import torchvision 9 | 10 | 11 | __all__ = ['DetDETRPostProcessor'] 12 | 13 | from .box_revert import box_revert 14 | from .box_revert import BoxProcessFormat 15 | 16 | def mod(a, b): 17 | out = a - a // b * b 18 | return out 19 | 20 | class DetDETRPostProcessor(nn.Module): 21 | def __init__( 22 | self, 23 | num_classes=80, 24 | use_focal_loss=True, 25 | num_top_queries=300, 26 | box_process_format=BoxProcessFormat.RESIZE, 27 | ) -> None: 28 | super().__init__() 29 | self.use_focal_loss = use_focal_loss 30 | self.num_top_queries = num_top_queries 31 | self.num_classes = int(num_classes) 32 | self.box_process_format = box_process_format 33 | self.deploy_mode = False 34 | 35 | def extra_repr(self) -> str: 36 | return f'use_focal_loss={self.use_focal_loss}, num_classes={self.num_classes}, num_top_queries={self.num_top_queries}' 37 | 38 | def forward(self, outputs, **kwargs): 39 | logits, boxes = outputs['pred_logits'], outputs['pred_boxes'] 40 | 41 | if self.use_focal_loss: 42 | scores = F.sigmoid(logits) 43 | scores, index = torch.topk(scores.flatten(1), self.num_top_queries, dim=-1) 44 | labels = index % self.num_classes 45 | # labels = mod(index, self.num_classes) # for tensorrt 46 | index = index // self.num_classes 47 | boxes = boxes.gather(dim=1, index=index.unsqueeze(-1).repeat(1, 1, boxes.shape[-1])) 48 | 49 | else: 50 | scores = F.softmax(logits)[:, :, :-1] 51 | scores, labels = scores.max(dim=-1) 52 | if scores.shape[1] > self.num_top_queries: 53 | scores, index = torch.topk(scores, self.num_top_queries, dim=-1) 54 | labels = torch.gather(labels, dim=1, index=index) 55 | boxes = torch.gather(boxes, dim=1, index=index.unsqueeze(-1).tile(1, 1, boxes.shape[-1])) 56 | 57 | if kwargs is not None: 58 | boxes = box_revert( 59 | boxes, 60 | in_fmt='cxcywh', 61 | out_fmt='xyxy', 62 | process_fmt=self.box_process_format, 63 | normalized=True, 64 | **kwargs, 65 | ) 66 | 67 | # TODO for onnx export 68 | if self.deploy_mode: 69 | return labels, boxes, scores 70 | 71 | results = [] 72 | for lab, box, sco in zip(labels, boxes, scores): 73 | result = dict(labels=lab, boxes=box, scores=sco) 74 | results.append(result) 75 | 76 | return results 77 | 78 | def deploy(self, ): 79 | self.eval() 80 | self.deploy_mode = True 81 | return self 82 | -------------------------------------------------------------------------------- /rtdetrv2_pytorch/src/nn/postprocessor/nms_postprocessor.py: -------------------------------------------------------------------------------- 1 | """Copyright(c) 2023 lyuwenyu. All Rights Reserved. 2 | """ 3 | 4 | import torch 5 | import torch.nn.functional as F 6 | import torch.distributed 7 | import torchvision 8 | from torch import Tensor 9 | 10 | from ...core import register 11 | 12 | from typing import Dict 13 | 14 | 15 | __all__ = ['DetNMSPostProcessor', ] 16 | 17 | 18 | @register() 19 | class DetNMSPostProcessor(torch.nn.Module): 20 | def __init__(self, \ 21 | iou_threshold=0.7, 22 | score_threshold=0.01, 23 | keep_topk=300, 24 | box_fmt='cxcywh', 25 | logit_fmt='sigmoid') -> None: 26 | super().__init__() 27 | self.iou_threshold = iou_threshold 28 | self.score_threshold = score_threshold 29 | self.keep_topk = keep_topk 30 | self.box_fmt = box_fmt.lower() 31 | self.logit_fmt = logit_fmt.lower() 32 | self.logit_func = getattr(F, self.logit_fmt, None) 33 | self.deploy_mode = False 34 | 35 | def forward(self, outputs: Dict[str, Tensor], orig_target_sizes: Tensor): 36 | logits, boxes = outputs['pred_logits'], outputs['pred_boxes'] 37 | pred_boxes = torchvision.ops.box_convert(boxes, in_fmt=self.box_fmt, out_fmt='xyxy') 38 | pred_boxes *= orig_target_sizes.repeat(1, 2).unsqueeze(1) 39 | 40 | values, pred_labels = torch.max(logits, dim=-1) 41 | 42 | if self.logit_func: 43 | pred_scores = self.logit_func(values) 44 | else: 45 | pred_scores = values 46 | 47 | # TODO for onnx export 48 | if self.deploy_mode: 49 | blobs = { 50 | 'pred_labels': pred_labels, 51 | 'pred_boxes': pred_boxes, 52 | 'pred_scores': pred_scores 53 | } 54 | return blobs 55 | 56 | results = [] 57 | for i in range(logits.shape[0]): 58 | score_keep = pred_scores[i] > self.score_threshold 59 | pred_box = pred_boxes[i][score_keep] 60 | pred_label = pred_labels[i][score_keep] 61 | pred_score = pred_scores[i][score_keep] 62 | 63 | keep = torchvision.ops.batched_nms(pred_box, pred_score, pred_label, self.iou_threshold) 64 | keep = keep[:self.keep_topk] 65 | 66 | blob = { 67 | 'labels': pred_label[keep], 68 | 'boxes': pred_box[keep], 69 | 'scores': pred_score[keep], 70 | } 71 | 72 | results.append(blob) 73 | 74 | return results 75 | 76 | def deploy(self, ): 77 | self.eval() 78 | self.deploy_mode = True 79 | return self 80 | -------------------------------------------------------------------------------- /rtdetrv2_pytorch/src/optim/__init__.py: -------------------------------------------------------------------------------- 1 | """Copyright(c) 2023 lyuwenyu. All Rights Reserved. 2 | """ 3 | 4 | from .ema import * 5 | from .optim import * 6 | from .amp import * 7 | from .warmup import * -------------------------------------------------------------------------------- /rtdetrv2_pytorch/src/optim/amp.py: -------------------------------------------------------------------------------- 1 | """Copyright(c) 2023 lyuwenyu. All Rights Reserved. 2 | """ 3 | 4 | 5 | import torch.cuda.amp as amp 6 | 7 | from ..core import register 8 | 9 | 10 | __all__ = ['GradScaler'] 11 | 12 | GradScaler = register()(amp.grad_scaler.GradScaler) 13 | -------------------------------------------------------------------------------- /rtdetrv2_pytorch/src/optim/optim.py: -------------------------------------------------------------------------------- 1 | """Copyright(c) 2023 lyuwenyu. All Rights Reserved. 2 | """ 3 | 4 | 5 | import torch.optim as optim 6 | import torch.optim.lr_scheduler as lr_scheduler 7 | 8 | from ..core import register 9 | 10 | 11 | __all__ = ['AdamW', 'SGD', 'Adam', 'MultiStepLR', 'CosineAnnealingLR', 'OneCycleLR', 'LambdaLR'] 12 | 13 | 14 | 15 | SGD = register()(optim.SGD) 16 | Adam = register()(optim.Adam) 17 | AdamW = register()(optim.AdamW) 18 | 19 | 20 | MultiStepLR = register()(lr_scheduler.MultiStepLR) 21 | CosineAnnealingLR = register()(lr_scheduler.CosineAnnealingLR) 22 | OneCycleLR = register()(lr_scheduler.OneCycleLR) 23 | LambdaLR = register()(lr_scheduler.LambdaLR) 24 | -------------------------------------------------------------------------------- /rtdetrv2_pytorch/src/optim/warmup.py: -------------------------------------------------------------------------------- 1 | """Copyright(c) 2023 lyuwenyu. All Rights Reserved. 2 | """ 3 | 4 | from torch.optim.lr_scheduler import LRScheduler 5 | 6 | from ..core import register 7 | 8 | 9 | class Warmup(object): 10 | def __init__(self, lr_scheduler: LRScheduler, warmup_duration: int, last_step: int=-1) -> None: 11 | self.lr_scheduler = lr_scheduler 12 | self.warmup_end_values = [pg['lr'] for pg in lr_scheduler.optimizer.param_groups] 13 | self.last_step = last_step 14 | self.warmup_duration = warmup_duration 15 | self.step() 16 | 17 | def state_dict(self): 18 | return {k: v for k, v in self.__dict__.items() if k != 'lr_scheduler'} 19 | 20 | def load_state_dict(self, state_dict): 21 | self.__dict__.update(state_dict) 22 | 23 | def get_warmup_factor(self, step, **kwargs): 24 | raise NotImplementedError 25 | 26 | def step(self, ): 27 | self.last_step += 1 28 | if self.last_step >= self.warmup_duration: 29 | return 30 | factor = self.get_warmup_factor(self.last_step) 31 | for i, pg in enumerate(self.lr_scheduler.optimizer.param_groups): 32 | pg['lr'] = factor * self.warmup_end_values[i] 33 | 34 | def finished(self, ): 35 | if self.last_step >= self.warmup_duration: 36 | return True 37 | return False 38 | 39 | 40 | @register() 41 | class LinearWarmup(Warmup): 42 | def __init__(self, lr_scheduler: LRScheduler, warmup_duration: int, last_step: int = -1) -> None: 43 | super().__init__(lr_scheduler, warmup_duration, last_step) 44 | 45 | def get_warmup_factor(self, step): 46 | return min(1.0, (step + 1) / self.warmup_duration) 47 | 48 | -------------------------------------------------------------------------------- /rtdetrv2_pytorch/src/solver/__init__.py: -------------------------------------------------------------------------------- 1 | """Copyright(c) 2023 lyuwenyu. All Rights Reserved. 2 | """ 3 | 4 | from ._solver import BaseSolver 5 | from .clas_solver import ClasSolver 6 | from .det_solver import DetSolver 7 | 8 | 9 | 10 | from typing import Dict 11 | 12 | TASKS :Dict[str, BaseSolver] = { 13 | 'classification': ClasSolver, 14 | 'detection': DetSolver, 15 | } -------------------------------------------------------------------------------- /rtdetrv2_pytorch/src/solver/clas_engine.py: -------------------------------------------------------------------------------- 1 | """Copyright(c) 2023 lyuwenyu. All Rights Reserved. 2 | """ 3 | 4 | import torch 5 | import torch.nn as nn 6 | 7 | from ..misc import (MetricLogger, SmoothedValue, reduce_dict) 8 | 9 | 10 | def train_one_epoch(model: nn.Module, criterion: nn.Module, dataloader, optimizer, ema, epoch, device): 11 | """ 12 | """ 13 | model.train() 14 | 15 | metric_logger = MetricLogger(delimiter=" ") 16 | metric_logger.add_meter('lr', SmoothedValue(window_size=1, fmt='{value:.6f}')) 17 | print_freq = 100 18 | header = 'Epoch: [{}]'.format(epoch) 19 | 20 | for imgs, labels in metric_logger.log_every(dataloader, print_freq, header): 21 | imgs = imgs.to(device) 22 | labels = labels.to(device) 23 | 24 | preds = model(imgs) 25 | loss: torch.Tensor = criterion(preds, labels) 26 | 27 | optimizer.zero_grad() 28 | loss.backward() 29 | optimizer.step() 30 | 31 | if ema is not None: 32 | ema.update(model) 33 | 34 | loss_reduced_values = {k: v.item() for k, v in reduce_dict({'loss': loss}).items()} 35 | metric_logger.update(**loss_reduced_values) 36 | metric_logger.update(lr=optimizer.param_groups[0]["lr"]) 37 | 38 | metric_logger.synchronize_between_processes() 39 | print("Averaged stats:", metric_logger) 40 | 41 | stats = {k: meter.global_avg for k, meter in metric_logger.meters.items()} 42 | return stats 43 | 44 | 45 | 46 | @torch.no_grad() 47 | def evaluate(model, criterion, dataloader, device): 48 | model.eval() 49 | 50 | metric_logger = MetricLogger(delimiter=" ") 51 | # metric_logger.add_meter('acc', SmoothedValue(window_size=1, fmt='{global_avg:.4f}')) 52 | # metric_logger.add_meter('loss', SmoothedValue(window_size=1, fmt='{value:.2f}')) 53 | metric_logger.add_meter('acc', SmoothedValue(window_size=1)) 54 | metric_logger.add_meter('loss', SmoothedValue(window_size=1)) 55 | 56 | header = 'Test:' 57 | for imgs, labels in metric_logger.log_every(dataloader, 10, header): 58 | imgs, labels = imgs.to(device), labels.to(device) 59 | preds = model(imgs) 60 | 61 | acc = (preds.argmax(dim=-1) == labels).sum() / preds.shape[0] 62 | loss = criterion(preds, labels) 63 | 64 | dict_reduced = reduce_dict({'acc': acc, 'loss': loss}) 65 | reduced_values = {k: v.item() for k, v in dict_reduced.items()} 66 | metric_logger.update(**reduced_values) 67 | 68 | metric_logger.synchronize_between_processes() 69 | print("Averaged stats:", metric_logger) 70 | 71 | stats = {k: meter.global_avg for k, meter in metric_logger.meters.items()} 72 | return stats 73 | 74 | 75 | -------------------------------------------------------------------------------- /rtdetrv2_pytorch/src/solver/clas_solver.py: -------------------------------------------------------------------------------- 1 | """Copyright(c) 2023 lyuwenyu. All Rights Reserved. 2 | """ 3 | 4 | import time 5 | import json 6 | import datetime 7 | from pathlib import Path 8 | 9 | import torch 10 | import torch.nn as nn 11 | 12 | from ..misc import dist_utils 13 | from ._solver import BaseSolver 14 | from .clas_engine import train_one_epoch, evaluate 15 | 16 | 17 | class ClasSolver(BaseSolver): 18 | 19 | def fit(self, ): 20 | print("Start training") 21 | self.train() 22 | args = self.cfg 23 | 24 | n_parameters = sum(p.numel() for p in self.model.parameters() if p.requires_grad) 25 | print('Number of params:', n_parameters) 26 | 27 | output_dir = Path(args.output_dir) 28 | output_dir.mkdir(exist_ok=True) 29 | 30 | start_time = time.time() 31 | start_epoch = self.last_epoch + 1 32 | for epoch in range(start_epoch, args.epoches): 33 | 34 | if dist_utils.is_dist_available_and_initialized(): 35 | self.train_dataloader.sampler.set_epoch(epoch) 36 | 37 | train_stats = train_one_epoch(self.model, 38 | self.criterion, 39 | self.train_dataloader, 40 | self.optimizer, 41 | self.ema, 42 | epoch=epoch, 43 | device=self.device) 44 | self.lr_scheduler.step() 45 | self.last_epoch += 1 46 | 47 | if output_dir: 48 | checkpoint_paths = [output_dir / 'checkpoint.pth'] 49 | # extra checkpoint before LR drop and every 100 epochs 50 | if (epoch + 1) % args.checkpoint_freq == 0: 51 | checkpoint_paths.append(output_dir / f'checkpoint{epoch:04}.pth') 52 | for checkpoint_path in checkpoint_paths: 53 | dist_utils.save_on_master(self.state_dict(epoch), checkpoint_path) 54 | 55 | module = self.ema.module if self.ema else self.model 56 | test_stats = evaluate(module, self.criterion, self.val_dataloader, self.device) 57 | 58 | log_stats = {**{f'train_{k}': v for k, v in train_stats.items()}, 59 | **{f'test_{k}': v for k, v in test_stats.items()}, 60 | 'epoch': epoch, 61 | 'n_parameters': n_parameters} 62 | 63 | if output_dir and dist_utils.is_main_process(): 64 | with (output_dir / "log.txt").open("a") as f: 65 | f.write(json.dumps(log_stats) + "\n") 66 | 67 | total_time = time.time() - start_time 68 | total_time_str = str(datetime.timedelta(seconds=int(total_time))) 69 | print('Training time {}'.format(total_time_str)) 70 | 71 | 72 | -------------------------------------------------------------------------------- /rtdetrv2_pytorch/src/zoo/__init__.py: -------------------------------------------------------------------------------- 1 | """Copyright(c) 2023 lyuwenyu. All Rights Reserved. 2 | """ 3 | 4 | 5 | from . import rtdetr 6 | -------------------------------------------------------------------------------- /rtdetrv2_pytorch/src/zoo/rtdetr/__init__.py: -------------------------------------------------------------------------------- 1 | """Copyright(c) 2023 lyuwenyu. All Rights Reserved. 2 | """ 3 | 4 | 5 | from .rtdetr import RTDETR 6 | from .matcher import HungarianMatcher 7 | from .hybrid_encoder import HybridEncoder 8 | from .rtdetr_decoder import RTDETRTransformer 9 | from .rtdetr_criterion import RTDETRCriterion 10 | from .rtdetr_postprocessor import RTDETRPostProcessor 11 | 12 | # v2 13 | from .rtdetrv2_decoder import RTDETRTransformerv2 14 | from .rtdetrv2_criterion import RTDETRCriterionv2 -------------------------------------------------------------------------------- /rtdetrv2_pytorch/src/zoo/rtdetr/box_ops.py: -------------------------------------------------------------------------------- 1 | """ 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved 3 | https://github.com/facebookresearch/detr/blob/main/util/box_ops.py 4 | """ 5 | 6 | import torch 7 | from torch import Tensor 8 | from torchvision.ops.boxes import box_area 9 | 10 | 11 | def box_cxcywh_to_xyxy(x: Tensor) -> Tensor: 12 | x_c, y_c, w, h = x.unbind(-1) 13 | b = [(x_c - 0.5 * w), (y_c - 0.5 * h), 14 | (x_c + 0.5 * w), (y_c + 0.5 * h)] 15 | return torch.stack(b, dim=-1) 16 | 17 | 18 | def box_xyxy_to_cxcywh(x: Tensor) -> Tensor: 19 | x0, y0, x1, y1 = x.unbind(-1) 20 | b = [(x0 + x1) / 2, (y0 + y1) / 2, 21 | (x1 - x0), (y1 - y0)] 22 | return torch.stack(b, dim=-1) 23 | 24 | 25 | # modified from torchvision to also return the union 26 | def box_iou(boxes1: Tensor, boxes2: Tensor): 27 | area1 = box_area(boxes1) 28 | area2 = box_area(boxes2) 29 | 30 | lt = torch.max(boxes1[:, None, :2], boxes2[:, :2]) # [N,M,2] 31 | rb = torch.min(boxes1[:, None, 2:], boxes2[:, 2:]) # [N,M,2] 32 | 33 | wh = (rb - lt).clamp(min=0) # [N,M,2] 34 | inter = wh[:, :, 0] * wh[:, :, 1] # [N,M] 35 | 36 | union = area1[:, None] + area2 - inter 37 | 38 | iou = inter / union 39 | return iou, union 40 | 41 | 42 | def generalized_box_iou(boxes1, boxes2): 43 | """ 44 | Generalized IoU from https://giou.stanford.edu/ 45 | 46 | The boxes should be in [x0, y0, x1, y1] format 47 | 48 | Returns a [N, M] pairwise matrix, where N = len(boxes1) 49 | and M = len(boxes2) 50 | """ 51 | # degenerate boxes gives inf / nan results 52 | # so do an early check 53 | assert (boxes1[:, 2:] >= boxes1[:, :2]).all() 54 | assert (boxes2[:, 2:] >= boxes2[:, :2]).all() 55 | iou, union = box_iou(boxes1, boxes2) 56 | 57 | lt = torch.min(boxes1[:, None, :2], boxes2[:, :2]) 58 | rb = torch.max(boxes1[:, None, 2:], boxes2[:, 2:]) 59 | 60 | wh = (rb - lt).clamp(min=0) # [N,M,2] 61 | area = wh[:, :, 0] * wh[:, :, 1] 62 | 63 | return iou - (area - union) / area 64 | 65 | 66 | def masks_to_boxes(masks): 67 | """Compute the bounding boxes around the provided masks 68 | 69 | The masks should be in format [N, H, W] where N is the number of masks, (H, W) are the spatial dimensions. 70 | 71 | Returns a [N, 4] tensors, with the boxes in xyxy format 72 | """ 73 | if masks.numel() == 0: 74 | return torch.zeros((0, 4), device=masks.device) 75 | 76 | h, w = masks.shape[-2:] 77 | 78 | y = torch.arange(0, h, dtype=torch.float) 79 | x = torch.arange(0, w, dtype=torch.float) 80 | y, x = torch.meshgrid(y, x) 81 | 82 | x_mask = (masks * x.unsqueeze(0)) 83 | x_max = x_mask.flatten(1).max(-1)[0] 84 | x_min = x_mask.masked_fill(~(masks.bool()), 1e8).flatten(1).min(-1)[0] 85 | 86 | y_mask = (masks * y.unsqueeze(0)) 87 | y_max = y_mask.flatten(1).max(-1)[0] 88 | y_min = y_mask.masked_fill(~(masks.bool()), 1e8).flatten(1).min(-1)[0] 89 | 90 | return torch.stack([x_min, y_min, x_max, y_max], 1) -------------------------------------------------------------------------------- /rtdetrv2_pytorch/src/zoo/rtdetr/conver_params.py: -------------------------------------------------------------------------------- 1 | """Copyright(c) 2023 lyuwenyu. All Rights Reserved. 2 | """ 3 | 4 | import torch 5 | 6 | def main(args) -> None: 7 | import cvperception 8 | from cvperception.core import load_config, merge_config, create 9 | cfg = load_config(args.config) 10 | model: torch.nn.Module = create(cfg['model'], merge_config(cfg)) 11 | 12 | if args.version == 1: 13 | state = model.state_dict() 14 | keys = [k for k in state.keys() if 'num_batches_tracked' not in k] 15 | 16 | elif args.version == 2: 17 | state = model.state_dict() 18 | ignore_keys = ['anchors', 'valid_mask', 'num_points_scale'] 19 | keys = [k for k in state.keys() if 'num_batches_tracked' not in k] 20 | keys = [k for k in keys if not any([x in k for x in ignore_keys])] 21 | 22 | import paddle 23 | p_state = paddle.load(args.pdparams) 24 | pkeys = list(p_state.keys()) 25 | 26 | assert len(keys) == len(pkeys), f'{len(keys)}, {len(pkeys)}' 27 | 28 | new_state = {} 29 | for i, k in enumerate(keys): 30 | pp = p_state[pkeys[i]] 31 | pp = torch.tensor(pp.numpy()) 32 | 33 | if 'denoising_class_embed' in k: 34 | new_state[k] = torch.concat([pp, torch.zeros(1, pp.shape[-1])], dim=0) 35 | continue 36 | 37 | tp = state[k] 38 | if len(tp.shape) == 2: 39 | new_state[k] = pp.T 40 | elif len(tp.shape) == 1: 41 | new_state[k] = pp 42 | else: 43 | assert tp.shape == pp.shape, f'{k}, {pp.shape}, {tp.shape}' 44 | new_state[k] = pp 45 | 46 | assert len(new_state) == len(p_state), '' 47 | 48 | # checkpoint = {'ema': {'module': new_state, }} 49 | # torch.save(checkpoint, args.output_file) 50 | 51 | model.load_state_dict(new_state, strict=False) 52 | 53 | checkpoint = {'ema': {'module': model.state_dict(), }} 54 | torch.save(checkpoint, args.output_file) 55 | 56 | 57 | 58 | if __name__ == '__main__': 59 | import argparse 60 | parser = argparse.ArgumentParser() 61 | parser.add_argument('-c', '--config', type=str, ) 62 | parser.add_argument('-p', '--pdparams', type=str, ) 63 | parser.add_argument('-o', '--output_file', type=str, ) 64 | parser.add_argument('-v', '--version', type=int, default=1) 65 | 66 | args = parser.parse_args() 67 | main(args) 68 | 69 | # python ./src/cvperception/zoo/rtdetr/conver_params.py -c configs/rtdetr/rtdetr_r18vd_6x_coco.yml -p rtdetr_r18vd_dec3_6x_coco.pdparams -o rtdetr_r18vd_dec3_6x_coco_new.pth 70 | # python ./src/cvperception/zoo/rtdetr/conver_params.py -c configs/rtdetr/rtdetr_r18vd_6x_coco.yml -p rtdetr_r18vd_5x_coco_objects365.pdparams -o rtdetr_r18vd_5x_coco_objects365_new.pth 71 | # python ./src/cvperception/zoo/rtdetr/conver_params.py -c configs/rtdetrv2/rtdetrv2_r50vd_120e_coco.yml -p rtdetr_r50vd_1x_objects365.pdparams -o rtdetrv2_r50vd_1x_objects365_new.pth -v 2 72 | 73 | -------------------------------------------------------------------------------- /rtdetrv2_pytorch/src/zoo/rtdetr/rtdetr.py: -------------------------------------------------------------------------------- 1 | """Copyright(c) 2023 lyuwenyu. All Rights Reserved. 2 | """ 3 | 4 | import torch 5 | import torch.nn as nn 6 | import torch.nn.functional as F 7 | 8 | import random 9 | import numpy as np 10 | from typing import List 11 | 12 | from ...core import register 13 | 14 | 15 | __all__ = ['RTDETR', ] 16 | 17 | 18 | @register() 19 | class RTDETR(nn.Module): 20 | __inject__ = ['backbone', 'encoder', 'decoder', ] 21 | 22 | def __init__(self, \ 23 | backbone: nn.Module, 24 | encoder: nn.Module, 25 | decoder: nn.Module, 26 | ): 27 | super().__init__() 28 | self.backbone = backbone 29 | self.decoder = decoder 30 | self.encoder = encoder 31 | 32 | def forward(self, x, targets=None): 33 | x = self.backbone(x) 34 | x = self.encoder(x) 35 | x = self.decoder(x, targets) 36 | 37 | return x 38 | 39 | def deploy(self, ): 40 | self.eval() 41 | for m in self.modules(): 42 | if hasattr(m, 'convert_to_deploy'): 43 | m.convert_to_deploy() 44 | return self 45 | -------------------------------------------------------------------------------- /rtdetrv2_pytorch/tools/README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | Train/test script examples 4 | - `CUDA_VISIBLE_DEVICES=0,1,2,3 torchrun --nproc_per_node=4 --master-port=8989 tools/train.py -c path/to/config &> train.log 2>&1 &` 5 | - `-r path/to/checkpoint` 6 | - `--amp` 7 | - `--test-only` 8 | 9 | 10 | Export script examples 11 | - `python tools/export_onnx.py -c path/to/config -r path/to/checkpoint --check` 12 | 13 | 14 | Gpu do not release memory 15 | - `ps aux | grep "tools/train.py" | awk '{print $2}' | xargs kill -9` 16 | 17 | 18 | Save all logs 19 | - Appending `&> train.log 2>&1 &` or `&> train.log 2>&1` 20 | 21 | 22 | Tensorboard 23 | - `--summary-dir=/path/to/summary/dir` or `-u summary_dir=/path/to/summary/dir` 24 | - `tensorboard --host=ip --port=8989 --logdir=/path/to/summary/` -------------------------------------------------------------------------------- /rtdetrv2_pytorch/tools/run_profile.py: -------------------------------------------------------------------------------- 1 | 2 | """Copyright(c) 2023 lyuwenyu. All Rights Reserved. 3 | """ 4 | 5 | import torch 6 | import torch.nn as nn 7 | from torch import Tensor 8 | 9 | import re 10 | import os 11 | import sys 12 | sys.path.insert(0, os.path.join(os.path.dirname(os.path.abspath(__file__)), '..')) 13 | from src.core import YAMLConfig, yaml_utils 14 | from src.solver import TASKS 15 | 16 | from typing import Dict, List, Optional, Any 17 | 18 | 19 | __all__ = ["profile_stats"] 20 | 21 | def profile_stats( 22 | model: nn.Module, 23 | data: Optional[Tensor]=None, 24 | shape: List[int]=[1, 3, 640, 640], 25 | verbose: bool=False 26 | ) -> Dict[str, Any]: 27 | 28 | is_training = model.training 29 | 30 | model.train() 31 | num_params = sum([p.numel() for p in model.parameters() if p.requires_grad]) 32 | 33 | model.eval() 34 | 35 | if data is None: 36 | dtype = next(model.parameters()).dtype 37 | device = next(model.parameters()).device 38 | data = torch.rand(*shape, dtype=dtype, device=device) 39 | print(device) 40 | 41 | def trace_handler(prof): 42 | print(prof.key_averages().table(sort_by='self_cuda_time_total', row_limit=-1)) 43 | 44 | wait = 0 45 | warmup = 1 46 | active = 1 47 | repeat = 1 48 | skip_first = 0 49 | with torch.profiler.profile( 50 | activities=[ 51 | torch.profiler.ProfilerActivity.CPU, 52 | torch.profiler.ProfilerActivity.CUDA, 53 | ], 54 | schedule=torch.profiler.schedule( 55 | wait=wait, 56 | warmup=warmup, 57 | active=active, 58 | repeat=repeat, 59 | skip_first=skip_first, 60 | ), 61 | with_flops=True, 62 | ) as p: 63 | n_step = skip_first + (wait + warmup + active) * repeat 64 | for _ in range(n_step): 65 | _ = model(data) 66 | p.step() 67 | 68 | if is_training: 69 | model.train() 70 | 71 | info = p.key_averages().table(sort_by='self_cuda_time_total', row_limit=-1) 72 | num_flops = sum([float(v.strip()) for v in re.findall('(\d+.?\d+ *\n)', info)]) / active 73 | 74 | if verbose: 75 | print(info) 76 | print(f'Total number of trainable parameters: {num_params}') 77 | print(f'Total number of flops: {int(num_flops)}M with {shape}') 78 | 79 | return {'n_parameters': num_params, 'n_flops': num_flops, 'info': info} 80 | 81 | 82 | 83 | if __name__ == "__main__": 84 | import argparse 85 | parser = argparse.ArgumentParser() 86 | parser.add_argument('-c', '--config', type=str, required=True) 87 | parser.add_argument('-d', '--device', type=str, default='cuda:0', help='device',) 88 | args = parser.parse_args() 89 | 90 | cfg = YAMLConfig(args.config, device=args.device) 91 | model = cfg.model.to(args.device) 92 | 93 | profile_stats(model, verbose=True) 94 | -------------------------------------------------------------------------------- /rtdetrv2_pytorch/tools/train.py: -------------------------------------------------------------------------------- 1 | """Copyright(c) 2023 lyuwenyu. All Rights Reserved. 2 | """ 3 | 4 | import os 5 | import sys 6 | sys.path.insert(0, os.path.join(os.path.dirname(os.path.abspath(__file__)), '..')) 7 | 8 | import argparse 9 | 10 | from src.misc import dist_utils 11 | from src.core import YAMLConfig, yaml_utils 12 | from src.solver import TASKS 13 | 14 | 15 | def main(args, ) -> None: 16 | """main 17 | """ 18 | dist_utils.setup_distributed(args.print_rank, args.print_method, seed=args.seed) 19 | 20 | assert not all([args.tuning, args.resume]), \ 21 | 'Only support from_scrach or resume or tuning at one time' 22 | 23 | update_dict = yaml_utils.parse_cli(args.update) 24 | update_dict.update({k: v for k, v in args.__dict__.items() \ 25 | if k not in ['update', ] and v is not None}) 26 | 27 | cfg = YAMLConfig(args.config, **update_dict) 28 | print('cfg: ', cfg.__dict__) 29 | 30 | solver = TASKS[cfg.yaml_cfg['task']](cfg) 31 | 32 | if args.test_only: 33 | solver.val() 34 | else: 35 | solver.fit() 36 | 37 | dist_utils.cleanup() 38 | 39 | 40 | if __name__ == '__main__': 41 | 42 | parser = argparse.ArgumentParser() 43 | 44 | # priority 0 45 | parser.add_argument('-c', '--config', type=str, required=True) 46 | parser.add_argument('-r', '--resume', type=str, help='resume from checkpoint') 47 | parser.add_argument('-t', '--tuning', type=str, help='tuning from checkpoint') 48 | parser.add_argument('-d', '--device', type=str, help='device',) 49 | parser.add_argument('--seed', type=int, help='exp reproducibility') 50 | parser.add_argument('--use-amp', action='store_true', help='auto mixed precision training') 51 | parser.add_argument('--output-dir', type=str, help='output directoy') 52 | parser.add_argument('--summary-dir', type=str, help='tensorboard summry') 53 | parser.add_argument('--test-only', action='store_true', default=False,) 54 | 55 | # priority 1 56 | parser.add_argument('-u', '--update', nargs='+', help='update yaml config') 57 | 58 | # env 59 | parser.add_argument('--print-method', type=str, default='builtin', help='print method') 60 | parser.add_argument('--print-rank', type=int, default=0, help='print rank id') 61 | 62 | parser.add_argument('--local-rank', type=int, help='local rank id') 63 | args = parser.parse_args() 64 | 65 | main(args) 66 | --------------------------------------------------------------------------------