├── rtdetr_pytorch
    ├── src
    │   ├── zoo
    │   │   ├── __init__.py
    │   │   └── rtdetr
    │   │   │   ├── __init__.py
    │   │   │   ├── rtdetr.py
    │   │   │   ├── box_ops.py
    │   │   │   ├── rtdetr_postprocessor.py
    │   │   │   ├── utils.py
    │   │   │   └── matcher.py
    │   ├── nn
    │   │   ├── arch
    │   │   │   ├── __init__.py
    │   │   │   └── classification.py
    │   │   ├── backbone
    │   │   │   ├── __init__.py
    │   │   │   ├── utils.py
    │   │   │   ├── test_resnet.py
    │   │   │   └── common.py
    │   │   ├── __init__.py
    │   │   └── criterion
    │   │   │   ├── __init__.py
    │   │   │   └── utils.py
    │   ├── misc
    │   │   ├── __init__.py
    │   │   ├── visualizer.py
    │   │   └── dist.py
    │   ├── optim
    │   │   ├── __init__.py
    │   │   ├── amp.py
    │   │   ├── optim.py
    │   │   └── ema.py
    │   ├── __init__.py
    │   ├── data
    │   │   ├── __init__.py
    │   │   ├── coco
    │   │   │   └── __init__.py
    │   │   ├── cifar10
    │   │   │   └── __init__.py
    │   │   ├── dataloader.py
    │   │   └── transforms.py
    │   ├── core
    │   │   ├── __init__.py
    │   │   └── yaml_config.py
    │   └── solver
    │   │   ├── __init__.py
    │   │   └── det_solver.py
    ├── requirements.txt
    ├── configs
    │   ├── runtime.yml
    │   ├── rtdetr
    │   │   ├── rtdetr_r50vd_6x_coco.yml
    │   │   ├── rtdetr_r50vd_m_6x_coco.yml
    │   │   ├── rtdetr_r101vd_6x_coco.yml
    │   │   ├── include
    │   │   │   ├── optimizer.yml
    │   │   │   ├── dataloader.yml
    │   │   │   └── rtdetr_r50vd.yml
    │   │   ├── rtdetr_r34vd_6x_coco.yml
    │   │   └── rtdetr_r18vd_6x_coco.yml
    │   └── dataset
    │   │   └── coco_detection.yml
    ├── tools
    │   ├── README.md
    │   ├── train.py
    │   └── export_onnx.py
    ├── temp.py
    └── README.md
├── rtdetr_paddle
    ├── ppdet
    │   ├── version.py
    │   ├── modeling
    │   │   ├── transformers
    │   │   │   ├── ext_op
    │   │   │   │   ├── setup_ms_deformable_attn_op.py
    │   │   │   │   ├── ms_deformable_attn_op.cc
    │   │   │   │   ├── README.md
    │   │   │   │   └── test_ms_deformable_attn_op.py
    │   │   │   ├── __init__.py
    │   │   │   └── position_encoding.py
    │   │   ├── heads
    │   │   │   └── __init__.py
    │   │   ├── architectures
    │   │   │   ├── __init__.py
    │   │   │   ├── detr.py
    │   │   │   └── meta_arch.py
    │   │   ├── losses
    │   │   │   ├── __init__.py
    │   │   │   └── smooth_l1_loss.py
    │   │   ├── __init__.py
    │   │   ├── backbones
    │   │   │   ├── __init__.py
    │   │   │   ├── name_adapter.py
    │   │   │   └── transformer_utils.py
    │   │   ├── shape_spec.py
    │   │   └── cls_utils.py
    │   ├── utils
    │   │   ├── __init__.py
    │   │   ├── logger.py
    │   │   ├── stats.py
    │   │   ├── colormap.py
    │   │   ├── voc_utils.py
    │   │   ├── profiler.py
    │   │   └── check.py
    │   ├── core
    │   │   ├── config
    │   │   │   ├── __init__.py
    │   │   │   └── yaml_helpers.py
    │   │   └── __init__.py
    │   ├── optimizer
    │   │   ├── __init__.py
    │   │   └── utils.py
    │   ├── data
    │   │   ├── source
    │   │   │   └── __init__.py
    │   │   ├── __init__.py
    │   │   ├── transform
    │   │   │   └── __init__.py
    │   │   ├── shm_utils.py
    │   │   └── utils.py
    │   ├── engine
    │   │   ├── __init__.py
    │   │   └── env.py
    │   ├── metrics
    │   │   └── __init__.py
    │   └── __init__.py
    ├── requirements.txt
    ├── dataset
    │   ├── voc
    │   │   ├── label_list.txt
    │   │   ├── create_list.py
    │   │   └── download_voc.py
    │   └── coco
    │   │   └── download_coco.py
    ├── configs
    │   ├── rtdetr
    │   │   ├── rtdetr_r50vd_6x_coco.yml
    │   │   ├── _base_
    │   │   │   ├── optimizer_6x.yml
    │   │   │   ├── rtdetr_reader.yml
    │   │   │   └── rtdetr_r50vd.yml
    │   │   ├── rtdetr_hgnetv2_l_6x_coco.yml
    │   │   ├── rtdetr_r50vd_m_6x_coco.yml
    │   │   ├── rtdetr_r18vd_6x_coco.yml
    │   │   ├── rtdetr_r34vd_6x_coco.yml
    │   │   ├── rtdetr_r101vd_6x_coco.yml
    │   │   └── rtdetr_hgnetv2_x_6x_coco.yml
    │   ├── runtime.yml
    │   └── datasets
    │   │   ├── voc.yml
    │   │   └── coco_detection.yml
    └── tools
    │   ├── slice_image.py
    │   └── export_model.py
├── .github
    ├── ISSUE_TEMPLATE
    │   └── bug_report.md
    └── FUNDING.yml
├── benchmark
    ├── trtexec.md
    ├── README.md
    ├── yolov8_onnx.py
    ├── utils.py
    ├── dataset.py
    └── trtinfer.py
├── README_cn.md
└── .gitignore


/rtdetr_pytorch/src/zoo/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | from .rtdetr import *
3 | 


--------------------------------------------------------------------------------
/rtdetr_pytorch/src/nn/arch/__init__.py:
--------------------------------------------------------------------------------
1 | from .classification import *
2 | 


--------------------------------------------------------------------------------
/rtdetr_pytorch/src/misc/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | from .logger import *
3 | from .visualizer import *
4 | 


--------------------------------------------------------------------------------
/rtdetr_pytorch/src/optim/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | from .ema import *
3 | from .optim import *
4 | from .amp import *


--------------------------------------------------------------------------------
/rtdetr_pytorch/src/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | from . import data 
3 | from . import nn
4 | from . import optim
5 | from . import zoo
6 | 


--------------------------------------------------------------------------------
/rtdetr_pytorch/src/nn/backbone/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | from .presnet import *
3 | from .test_resnet import *
4 | 
5 | from .common import *


--------------------------------------------------------------------------------
/rtdetr_pytorch/src/nn/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | from .arch import *
3 | from .criterion import *
4 | 
5 | # 
6 | from .backbone import *
7 | 
8 | 


--------------------------------------------------------------------------------
/rtdetr_pytorch/requirements.txt:
--------------------------------------------------------------------------------
1 | torch==2.0.1
2 | torchvision==0.15.2
3 | onnx==1.14.0
4 | onnxruntime==1.15.1
5 | pycocotools
6 | PyYAML
7 | scipy
8 | 


--------------------------------------------------------------------------------
/rtdetr_pytorch/src/data/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | from .coco import *
3 | from .cifar10 import CIFAR10
4 | 
5 | from .dataloader import *
6 | from .transforms import *
7 | 
8 | 


--------------------------------------------------------------------------------
/rtdetr_pytorch/src/nn/criterion/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | import torch.nn as nn 
3 | from src.core import register
4 | 
5 | CrossEntropyLoss = register(nn.CrossEntropyLoss)
6 | 
7 | 


--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/version.py:
--------------------------------------------------------------------------------
1 | # THIS FILE IS GENERATED FROM PADDLEPADDLE SETUP.PY
2 | #
3 | full_version    = '2.4.0'
4 | commit          = '87ed5ba91eaeb332e8e5c3f4e7d5b1d765c75644'
5 | 


--------------------------------------------------------------------------------
/rtdetr_paddle/requirements.txt:
--------------------------------------------------------------------------------
 1 | paddlepaddle-gpu==2.4.2
 2 | tqdm
 3 | typeguard
 4 | visualdl>=2.2.0
 5 | opencv-python <= 4.6.0
 6 | PyYAML
 7 | shapely
 8 | scipy
 9 | terminaltables
10 | Cython
11 | pycocotools
12 | setuptools
13 | 


--------------------------------------------------------------------------------
/rtdetr_pytorch/src/core/__init__.py:
--------------------------------------------------------------------------------
1 | """by lyuwenyu
2 | """
3 | 
4 | # from .yaml_utils import register, create, load_config, merge_config, merge_dict
5 | from .yaml_utils import *
6 | from .config import BaseConfig
7 | from .yaml_config import YAMLConfig
8 | 


--------------------------------------------------------------------------------
/rtdetr_pytorch/src/solver/__init__.py:
--------------------------------------------------------------------------------
 1 | """by lyuwenyu
 2 | """
 3 | 
 4 | from .solver import BaseSolver
 5 | from .det_solver import DetSolver
 6 | 
 7 | from typing import Dict
 8 | 
 9 | TASKS: Dict[str, BaseSolver] = {
10 |     'detection': DetSolver,
11 | }
12 | 


--------------------------------------------------------------------------------
/rtdetr_paddle/dataset/voc/label_list.txt:
--------------------------------------------------------------------------------
 1 | aeroplane
 2 | bicycle
 3 | bird
 4 | boat
 5 | bottle
 6 | bus
 7 | car
 8 | cat
 9 | chair
10 | cow
11 | diningtable
12 | dog
13 | horse
14 | motorbike
15 | person
16 | pottedplant
17 | sheep
18 | sofa
19 | train
20 | tvmonitor
21 | 


--------------------------------------------------------------------------------
/rtdetr_pytorch/src/data/coco/__init__.py:
--------------------------------------------------------------------------------
1 | from .coco_dataset import (
2 |     CocoDetection, 
3 |     mscoco_category2label,
4 |     mscoco_label2category,
5 |     mscoco_category2name,
6 | )
7 | from .coco_eval import *
8 | 
9 | from .coco_utils import get_coco_api_from_dataset


--------------------------------------------------------------------------------
/rtdetr_pytorch/configs/runtime.yml:
--------------------------------------------------------------------------------
 1 | sync_bn: True
 2 | find_unused_parameters: False
 3 | 
 4 | 
 5 | use_amp: False
 6 | 
 7 | scaler:
 8 |   type: GradScaler
 9 |   enabled: True
10 | 
11 | 
12 | use_ema: False
13 | ema:
14 |   type: ModelEMA
15 |   decay: 0.9999
16 |   warmups: 2000
17 | 
18 | 


--------------------------------------------------------------------------------
/rtdetr_pytorch/src/optim/amp.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn 
 3 | import torch.cuda.amp as amp
 4 | 
 5 | 
 6 | from src.core import register
 7 | import src.misc.dist as dist 
 8 | 
 9 | 
10 | __all__ = ['GradScaler']
11 | 
12 | GradScaler = register(amp.grad_scaler.GradScaler)
13 | 


--------------------------------------------------------------------------------
/rtdetr_pytorch/src/zoo/rtdetr/__init__.py:
--------------------------------------------------------------------------------
 1 | """by lyuwenyu
 2 | """
 3 | 
 4 | 
 5 | from .rtdetr import *
 6 | 
 7 | from .hybrid_encoder import *
 8 | from .rtdetr_decoder import *
 9 | from .rtdetr_postprocessor import *
10 | from .rtdetr_criterion import *
11 | 
12 | from .matcher import *
13 | 


--------------------------------------------------------------------------------
/rtdetr_pytorch/configs/rtdetr/rtdetr_r50vd_6x_coco.yml:
--------------------------------------------------------------------------------
 1 | __include__: [
 2 |   '../dataset/coco_detection.yml',
 3 |   '../runtime.yml',
 4 |   './include/dataloader.yml',
 5 |   './include/optimizer.yml',
 6 |   './include/rtdetr_r50vd.yml',
 7 | ]
 8 | 
 9 | output_dir: ./output/rtdetr_r50vd_6x_coco
10 | 


--------------------------------------------------------------------------------
/rtdetr_paddle/configs/rtdetr/rtdetr_r50vd_6x_coco.yml:
--------------------------------------------------------------------------------
 1 | _BASE_: [
 2 |   '../datasets/coco_detection.yml',
 3 |   '../runtime.yml',
 4 |   '_base_/optimizer_6x.yml',
 5 |   '_base_/rtdetr_r50vd.yml',
 6 |   '_base_/rtdetr_reader.yml',
 7 | ]
 8 | 
 9 | weights: output/rtdetr_r50vd_6x_coco/model_final
10 | find_unused_parameters: True
11 | log_iter: 200
12 | 


--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/modeling/transformers/ext_op/setup_ms_deformable_attn_op.py:
--------------------------------------------------------------------------------
1 | from paddle.utils.cpp_extension import CUDAExtension, setup
2 | 
3 | if __name__ == "__main__":
4 |     setup(
5 |         name='deformable_detr_ops',
6 |         ext_modules=CUDAExtension(
7 |             sources=['ms_deformable_attn_op.cc', 'ms_deformable_attn_op.cu']))
8 | 


--------------------------------------------------------------------------------
/rtdetr_paddle/configs/rtdetr/_base_/optimizer_6x.yml:
--------------------------------------------------------------------------------
 1 | epoch: 72
 2 | 
 3 | LearningRate:
 4 |   base_lr: 0.0001
 5 |   schedulers:
 6 |   - !PiecewiseDecay
 7 |     gamma: 1.0
 8 |     milestones: [100]
 9 |     use_warmup: true
10 |   - !LinearWarmup
11 |     start_factor: 0.001
12 |     steps: 2000
13 | 
14 | OptimizerBuilder:
15 |   clip_grad_by_norm: 0.1
16 |   regularizer: false
17 |   optimizer:
18 |     type: AdamW
19 |     weight_decay: 0.0001
20 | 


--------------------------------------------------------------------------------
/rtdetr_pytorch/configs/rtdetr/rtdetr_r50vd_m_6x_coco.yml:
--------------------------------------------------------------------------------
 1 | __include__: [
 2 |   '../dataset/coco_detection.yml',
 3 |   '../runtime.yml',
 4 |   './include/dataloader.yml',
 5 |   './include/optimizer.yml',
 6 |   './include/rtdetr_r50vd.yml',
 7 | ]
 8 | 
 9 | output_dir: ./output/rtdetr_r50vd_m_6x_coco
10 | 
11 | 
12 | # 相比于rtdetr_r50vd_6x_coco.yml 多的参数
13 | HybridEncoder:
14 |   expansion: 0.5
15 | 
16 | RTDETRTransformer:
17 |   eval_idx: 2 # use 3th decoder layer to eval


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Bug report
 3 | about: Create a report to help us improve
 4 | title: ''
 5 | labels: ''
 6 | assignees: lyuwenyu
 7 | 
 8 | ---
 9 | 
10 | **Star RTDETR**
11 | 请先在RTDETR主页点击**star**以支持本项目
12 | Star RTDETR to help more people discover this project. 
13 | 
14 | ---
15 | 
16 | **Describe the bug**
17 | A clear and concise description of what the bug is. 
18 | If applicable, add screenshots to help explain your problem. 
19 | 
20 | **To Reproduce**
21 | Steps to reproduce the behavior.
22 | 


--------------------------------------------------------------------------------
/rtdetr_pytorch/src/data/cifar10/__init__.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import torchvision
 3 | from typing import Optional, Callable
 4 | 
 5 | from src.core import register
 6 | 
 7 | 
 8 | @register
 9 | class CIFAR10(torchvision.datasets.CIFAR10):
10 |     __inject__ = ['transform', 'target_transform']
11 |     
12 |     def __init__(self, root: str, train: bool = True, transform: Optional[Callable] = None, target_transform: Optional[Callable] = None, download: bool = False) -> None:
13 |         super().__init__(root, train, transform, target_transform, download)
14 | 
15 | 


--------------------------------------------------------------------------------
/rtdetr_pytorch/configs/rtdetr/rtdetr_r101vd_6x_coco.yml:
--------------------------------------------------------------------------------
 1 | __include__: [
 2 |   '../dataset/coco_detection.yml',
 3 |   '../runtime.yml',
 4 |   './include/dataloader.yml',
 5 |   './include/optimizer.yml',
 6 |   './include/rtdetr_r50vd.yml',
 7 | ]
 8 | 
 9 | PResNet:
10 |   depth: 101
11 | 
12 | 
13 | HybridEncoder:
14 |   # intra
15 |   hidden_dim: 384
16 |   dim_feedforward: 2048
17 | 
18 | 
19 | RTDETRTransformer:
20 |   feat_channels: [384, 384, 384]
21 | 
22 | 
23 | optimizer:
24 |   type: AdamW
25 |   params: 
26 |     - 
27 |       params: 'backbone'
28 |       lr: 0.000001


--------------------------------------------------------------------------------
/rtdetr_paddle/configs/runtime.yml:
--------------------------------------------------------------------------------
 1 | use_gpu: true
 2 | use_xpu: false
 3 | use_mlu: false
 4 | use_npu: false
 5 | log_iter: 20
 6 | save_dir: output
 7 | snapshot_epoch: 1
 8 | print_flops: false
 9 | print_params: false
10 | 
11 | # Exporting the model
12 | export:
13 |   post_process: True  # Whether post-processing is included in the network when export model.
14 |   nms: True           # Whether NMS is included in the network when export model.
15 |   benchmark: False    # It is used to testing model performance, if set `True`, post-process and NMS will not be exported.
16 |   fuse_conv_bn: False
17 | 


--------------------------------------------------------------------------------
/rtdetr_paddle/configs/datasets/voc.yml:
--------------------------------------------------------------------------------
 1 | metric: VOC
 2 | map_type: 11point
 3 | num_classes: 20
 4 | 
 5 | TrainDataset:
 6 |   name: VOCDataSet
 7 |   dataset_dir: dataset/voc
 8 |   anno_path: trainval.txt
 9 |   label_list: label_list.txt
10 |   data_fields: [ 'image', 'gt_bbox', 'gt_class', 'difficult' ]
11 | 
12 | EvalDataset:
13 |   name: VOCDataSet
14 |   dataset_dir: dataset/voc
15 |   anno_path: test.txt
16 |   label_list: label_list.txt
17 |   data_fields: [ 'image', 'gt_bbox', 'gt_class', 'difficult' ]
18 | 
19 | TestDataset:
20 |   name: ImageFolder
21 |   anno_path: dataset/voc/label_list.txt
22 | 


--------------------------------------------------------------------------------
/benchmark/trtexec.md:
--------------------------------------------------------------------------------
 1 | 
 2 | ```bash
 3 | # build tensorrt engine 
 4 | trtexec --onnx=./yolov8l_w_nms.onnx --saveEngine=yolov8l_w_nms.engine --buildOnly --fp16
 5 | 
 6 | # using dynamic shapes
 7 | # --explicitBatch --minShapes=image:1x3x640x640 --optShapes=image:8x3x640x640  --maxShapes=image:16x3x640x640 --shapes=image:8x3x640x640
 8 | 
 9 | # timeline 
10 | nsys profile --force-overwrite=true  -t 'nvtx,cuda,osrt,cudnn' -c cudaProfilerApi -o yolov8l_w_nms  trtexec --loadEngine=./yolov8l_w_nms.engine --fp16 --avgRuns=10 --loadInputs='image:input_tensor.bin'
11 | 
12 | # https://forums.developer.nvidia.com/t/about-loadinputs-in-trtexec/218880
13 | ```
14 | 


--------------------------------------------------------------------------------
/rtdetr_pytorch/src/nn/criterion/utils.py:
--------------------------------------------------------------------------------
 1 | import torch 
 2 | import torchvision
 3 | 
 4 | 
 5 | 
 6 | def format_target(targets):
 7 |     '''
 8 |     Args:
 9 |         targets (List[Dict]),
10 |     Return: 
11 |         tensor (Tensor), [im_id, label, bbox,]
12 |     '''
13 |     outputs = []
14 |     for i, tgt in enumerate(targets):
15 |         boxes =  torchvision.ops.box_convert(tgt['boxes'], in_fmt='xyxy', out_fmt='cxcywh') 
16 |         labels = tgt['labels'].reshape(-1, 1)
17 |         im_ids = torch.ones_like(labels) * i
18 |         outputs.append(torch.cat([im_ids, labels, boxes], dim=1))
19 | 
20 |     return torch.cat(outputs, dim=0)
21 | 


--------------------------------------------------------------------------------
/rtdetr_pytorch/src/optim/optim.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import torch 
 3 | import torch.nn as nn 
 4 | import torch.optim as optim
 5 | import torch.optim.lr_scheduler as lr_scheduler
 6 | 
 7 | from src.core import register
 8 | 
 9 | 
10 | __all__ = ['AdamW', 'SGD', 'Adam', 'MultiStepLR', 'CosineAnnealingLR', 'OneCycleLR', 'LambdaLR']
11 | 
12 | 
13 | 
14 | SGD = register(optim.SGD)
15 | Adam = register(optim.Adam)
16 | AdamW = register(optim.AdamW)
17 | 
18 | 
19 | MultiStepLR = register(lr_scheduler.MultiStepLR)
20 | CosineAnnealingLR = register(lr_scheduler.CosineAnnealingLR)
21 | OneCycleLR = register(lr_scheduler.OneCycleLR)
22 | LambdaLR = register(lr_scheduler.LambdaLR)
23 | 


--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/rtdetr_paddle/configs/rtdetr/rtdetr_hgnetv2_l_6x_coco.yml:
--------------------------------------------------------------------------------
 1 | _BASE_: [
 2 |   '../datasets/coco_detection.yml',
 3 |   '../runtime.yml',
 4 |   '_base_/optimizer_6x.yml',
 5 |   '_base_/rtdetr_r50vd.yml',
 6 |   '_base_/rtdetr_reader.yml',
 7 | ]
 8 | 
 9 | weights: output/rtdetr_hgnetv2_l_6x_coco/model_final
10 | pretrain_weights: https://bj.bcebos.com/v1/paddledet/models/pretrained/PPHGNetV2_L_ssld_pretrained.pdparams
11 | find_unused_parameters: True
12 | log_iter: 200
13 | 
14 | 
15 | DETR:
16 |   backbone: PPHGNetV2
17 | 
18 | PPHGNetV2:
19 |   arch: 'L'
20 |   return_idx: [1, 2, 3]
21 |   freeze_stem_only: True
22 |   freeze_at: 0
23 |   freeze_norm: True
24 |   lr_mult_list: [0., 0.05, 0.05, 0.05, 0.05]
25 | 


--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/core/config/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/rtdetr_pytorch/configs/rtdetr/include/optimizer.yml:
--------------------------------------------------------------------------------
 1 | use_ema: True
 2 | ema:
 3 |   type: ModelEMA
 4 |   decay: 0.9999
 5 |   warmups: 2000
 6 | 
 7 | 
 8 | find_unused_parameters: True
 9 | 
10 | epoches: 72
11 | clip_max_norm: 0.1
12 | 
13 | optimizer:
14 |   type: AdamW
15 |   params:
16 |     - params: 'backbone'
17 |       lr: 0.00001
18 |     - params: '^(?=.*encoder(?=.*bias|.*norm.*weight)).*$'
19 |       weight_decay: 0.
20 |     - params: '^(?=.*decoder(?=.*bias|.*norm.*weight)).*$'
21 |       weight_decay: 0.
22 | 
23 |   lr: 0.0001
24 |   betas: [ 0.9, 0.999 ]
25 |   weight_decay: 0.0001
26 | 
27 | 
28 | lr_scheduler:
29 |   type: MultiStepLR
30 |   milestones: [ 1000 ]
31 |   gamma: 0.1
32 | 
33 | 


--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/core/__init__.py:
--------------------------------------------------------------------------------
 1 | #   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from . import config
16 | 


--------------------------------------------------------------------------------
/rtdetr_paddle/configs/rtdetr/rtdetr_r50vd_m_6x_coco.yml:
--------------------------------------------------------------------------------
 1 | _BASE_: [
 2 |   '../datasets/coco_detection.yml',
 3 |   '../runtime.yml',
 4 |   '_base_/optimizer_6x.yml',
 5 |   '_base_/rtdetr_r50vd.yml',
 6 |   '_base_/rtdetr_reader.yml',
 7 | ]
 8 | 
 9 | weights: output/rtdetr_r50vd_m_6x_coco/model_final
10 | find_unused_parameters: True
11 | log_iter: 200
12 | 
13 | HybridEncoder:
14 |   hidden_dim: 256
15 |   use_encoder_idx: [2]
16 |   num_encoder_layers: 1
17 |   encoder_layer:
18 |     name: TransformerLayer
19 |     d_model: 256
20 |     nhead: 8
21 |     dim_feedforward: 1024
22 |     dropout: 0.
23 |     activation: 'gelu'
24 |   expansion: 0.5
25 |   depth_mult: 1.0
26 | 
27 | RTDETRTransformer:
28 |   eval_idx: 2 # use 3th decoder layer to eval
29 | 


--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/modeling/heads/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .detr_head import *
16 | 
17 | 


--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/modeling/architectures/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 
 2 | #   
 3 | # Licensed under the Apache License, Version 2.0 (the "License");   
 4 | # you may not use this file except in compliance with the License.  
 5 | # You may obtain a copy of the License at   
 6 | #   
 7 | #     http://www.apache.org/licenses/LICENSE-2.0    
 8 | #   
 9 | # Unless required by applicable law or agreed to in writing, software   
10 | # distributed under the License is distributed on an "AS IS" BASIS, 
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
12 | # See the License for the specific language governing permissions and   
13 | # limitations under the License.
14 | 
15 | from .meta_arch import *
16 | from .detr import *
17 | 


--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/optimizer/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from . import optimizer
16 | from . import ema
17 | 
18 | from .optimizer import *
19 | from .ema import *
20 | 


--------------------------------------------------------------------------------
/rtdetr_paddle/configs/datasets/coco_detection.yml:
--------------------------------------------------------------------------------
 1 | metric: COCO
 2 | num_classes: 80
 3 | 
 4 | TrainDataset:
 5 |   name: COCODataSet
 6 |   image_dir: /mnt/h/ml_dataset_home/coco/train2017
 7 |   anno_path: /mnt/h/ml_dataset_home/coco/annotations/instances_train2017.json
 8 |   dataset_dir: dataset/coco
 9 |   data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd']
10 | 
11 | EvalDataset:
12 |   name: COCODataSet
13 |   image_dir: /mnt/h/ml_dataset_home/coco/val2017
14 |   anno_path: /mnt/h/ml_dataset_home/coco/instances_val2017.json
15 |   dataset_dir: dataset/coco
16 |   allow_empty: true
17 | 
18 | TestDataset:
19 |   name: ImageFolder
20 |   anno_path: annotations/instances_val2017.json # also support txt (like VOC's label_list.txt)
21 |   dataset_dir: dataset/coco # if set, anno_path will be 'dataset_dir/anno_path'
22 | 


--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/data/source/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .coco import *
16 | from .voc import *
17 | from .category import *
18 | from .dataset import ImageFolder
19 | 


--------------------------------------------------------------------------------
/rtdetr_pytorch/src/data/dataloader.py:
--------------------------------------------------------------------------------
 1 | import torch 
 2 | import torch.utils.data as data
 3 | 
 4 | from src.core import register
 5 | 
 6 | 
 7 | __all__ = ['DataLoader']
 8 | 
 9 | 
10 | @register
11 | class DataLoader(data.DataLoader):
12 |     __inject__ = ['dataset', 'collate_fn']
13 | 
14 |     def __repr__(self) -> str:
15 |         format_string = self.__class__.__name__ + "("
16 |         for n in ['dataset', 'batch_size', 'num_workers', 'drop_last', 'collate_fn']:
17 |             format_string += "\n"
18 |             format_string += "    {0}: {1}".format(n, getattr(self, n))
19 |         format_string += "\n)"
20 |         return format_string
21 | 
22 | 
23 | 
24 | @register
25 | def default_collate_fn(items):
26 |     '''default collate_fn
27 |     '''    
28 |     return torch.cat([x[0][None] for x in items], dim=0), [x[1] for x in items]
29 | 


--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/modeling/losses/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .iou_loss import *
16 | from .gfocal_loss import *
17 | from .detr_loss import *
18 | from .focal_loss import *
19 | from .smooth_l1_loss import *
20 | 


--------------------------------------------------------------------------------
/rtdetr_pytorch/tools/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | Train/test script examples
 4 | - `CUDA_VISIBLE_DEVICES=0,1,2,3 torchrun --nproc_per_node=4 --master-port=8989 tools/train.py -c path/to/config &> train.log 2>&1 &`
 5 | - `-r path/to/checkpoint`
 6 | - `--amp`
 7 | - `--test-only` 
 8 | 
 9 | 
10 | Tuning script examples
11 | - `torchrun --master_port=8844 --nproc_per_node=4 tools/train.py -c configs/rtdetr/rtdetr_r18vd_6x_coco.yml -t https://github.com/lyuwenyu/storage/releases/download/v0.1/rtdetr_r18vd_5x_coco_objects365_from_paddle.pth` 
12 | 
13 | 
14 | Export script examples
15 | - `python tools/export_onnx.py -c path/to/config -r path/to/checkpoint --check`
16 | 
17 | 
18 | GPU do not release memory
19 | - `ps aux | grep "tools/train.py" | awk '{print $2}' | xargs kill -9`
20 | 
21 | 
22 | Save all logs
23 | - Appending `&> train.log 2>&1 &` or `&> train.log 2>&1`
24 | 
25 | 


--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/data/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. 
 2 | #   
 3 | # Licensed under the Apache License, Version 2.0 (the "License");   
 4 | # you may not use this file except in compliance with the License.  
 5 | # You may obtain a copy of the License at   
 6 | #   
 7 | #     http://www.apache.org/licenses/LICENSE-2.0    
 8 | #   
 9 | # Unless required by applicable law or agreed to in writing, software   
10 | # distributed under the License is distributed on an "AS IS" BASIS, 
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
12 | # See the License for the specific language governing permissions and   
13 | # limitations under the License.
14 | 
15 | from . import source
16 | from . import transform
17 | from . import reader
18 | 
19 | from .source import *
20 | from .transform import *
21 | from .reader import *
22 | 


--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/modeling/transformers/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from .utils import *
16 | from .matchers import *
17 | from .position_encoding import *
18 | from .rtdetr_transformer import *
19 | from .dino_transformer import *
20 | from .hybrid_encoder import *


--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/data/transform/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from . import operators
16 | from . import batch_operators
17 | 
18 | 
19 | from .operators import *
20 | from .batch_operators import *
21 | 
22 | 
23 | __all__ = []
24 | __all__ += registered_ops
25 | 
26 | 


--------------------------------------------------------------------------------
/rtdetr_pytorch/configs/dataset/coco_detection.yml:
--------------------------------------------------------------------------------
 1 | task: detection
 2 | 
 3 | num_classes: 80
 4 | remap_mscoco_category: True
 5 | 
 6 | train_dataloader:
 7 |   type: DataLoader
 8 |   dataset:
 9 |     type: CocoDetection
10 |     img_folder: /mnt/h/ml_dataset_home/coco/train2017/
11 |     ann_file: /mnt/h/ml_dataset_home/coco/annotations/small_instances_train2017.json
12 |     transforms:
13 |       type: Compose
14 |       ops: ~
15 |   shuffle: True
16 |   batch_size: 1
17 |   num_workers: 4
18 |   drop_last: True
19 | 
20 | 
21 | val_dataloader:
22 |   type: DataLoader
23 |   dataset:
24 |     type: CocoDetection
25 |     img_folder: /mnt/h/ml_dataset_home/coco/train2017/
26 |     ann_file: /mnt/h/ml_dataset_home/coco/annotations/small_instances_train2017.json
27 |     transforms:
28 |       type: Compose
29 |       ops: ~
30 | 
31 |   shuffle: False
32 |   batch_size: 1
33 |   num_workers: 4
34 |   drop_last: False


--------------------------------------------------------------------------------
/.github/FUNDING.yml:
--------------------------------------------------------------------------------
 1 | # These are supported funding model platforms
 2 | 
 3 | github: # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2]
 4 | patreon: # Replace with a single Patreon username
 5 | open_collective: # Replace with a single Open Collective username
 6 | ko_fi: # Replace with a single Ko-fi username
 7 | tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel
 8 | community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry
 9 | liberapay: # Replace with a single Liberapay username
10 | issuehunt: # Replace with a single IssueHunt username
11 | otechie: # Replace with a single Otechie username
12 | lfx_crowdfunding: # Replace with a single LFX Crowdfunding project-name e.g., cloud-foundry
13 | custom: ['https://github.com/lyuwenyu/cvperception/assets/17582080/2b4bfcd5-5c0f-45fd-badf-3f6e5b0249ac']# Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2']
14 | 


--------------------------------------------------------------------------------
/rtdetr_paddle/configs/rtdetr/rtdetr_r18vd_6x_coco.yml:
--------------------------------------------------------------------------------
 1 | _BASE_: [
 2 |   '../datasets/coco_detection.yml',
 3 |   '../runtime.yml',
 4 |   '_base_/optimizer_6x.yml',
 5 |   '_base_/rtdetr_r50vd.yml',
 6 |   '_base_/rtdetr_reader.yml',
 7 | ]
 8 | 
 9 | weights: output/rtdetr_r18_6x_coco/model_final
10 | find_unused_parameters: True
11 | log_iter: 200
12 | 
13 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet18_vd_pretrained.pdparams
14 | ResNet:
15 |   depth: 18
16 |   variant: d
17 |   return_idx: [1, 2, 3]
18 |   freeze_at: -1
19 |   freeze_norm: false
20 |   norm_decay: 0.
21 | 
22 | HybridEncoder:
23 |   hidden_dim: 256
24 |   use_encoder_idx: [2]
25 |   num_encoder_layers: 1
26 |   encoder_layer:
27 |     name: TransformerLayer
28 |     d_model: 256
29 |     nhead: 8
30 |     dim_feedforward: 1024
31 |     dropout: 0.
32 |     activation: 'gelu'
33 |   expansion: 0.5
34 |   depth_mult: 1.0
35 | 
36 | RTDETRTransformer:
37 |   eval_idx: -1
38 |   num_decoder_layers: 3
39 | 


--------------------------------------------------------------------------------
/rtdetr_paddle/configs/rtdetr/rtdetr_r34vd_6x_coco.yml:
--------------------------------------------------------------------------------
 1 | _BASE_: [
 2 |   '../datasets/coco_detection.yml',
 3 |   '../runtime.yml',
 4 |   '_base_/optimizer_6x.yml',
 5 |   '_base_/rtdetr_r50vd.yml',
 6 |   '_base_/rtdetr_reader.yml',
 7 | ]
 8 | 
 9 | weights: output/rtdetr_r34vd_6x_coco/model_final
10 | find_unused_parameters: True
11 | log_iter: 200
12 | 
13 | pretrain_weights: https://bj.bcebos.com/v1/paddledet/models/pretrained/ResNet34_vd_pretrained.pdparams
14 | ResNet:
15 |   depth: 34
16 |   variant: d
17 |   return_idx: [1, 2, 3]
18 |   freeze_at: -1
19 |   freeze_norm: false
20 |   norm_decay: 0.
21 | 
22 | HybridEncoder:
23 |   hidden_dim: 256
24 |   use_encoder_idx: [2]
25 |   num_encoder_layers: 1
26 |   encoder_layer:
27 |     name: TransformerLayer
28 |     d_model: 256
29 |     nhead: 8
30 |     dim_feedforward: 1024
31 |     dropout: 0.
32 |     activation: 'gelu'
33 |   expansion: 0.5
34 |   depth_mult: 1.0
35 | 
36 | RTDETRTransformer:
37 |   eval_idx: -1
38 |   num_decoder_layers: 4
39 | 


--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/engine/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 
 2 | #   
 3 | # Licensed under the Apache License, Version 2.0 (the "License");   
 4 | # you may not use this file except in compliance with the License.  
 5 | # You may obtain a copy of the License at   
 6 | #   
 7 | #     http://www.apache.org/licenses/LICENSE-2.0    
 8 | #   
 9 | # Unless required by applicable law or agreed to in writing, software   
10 | # distributed under the License is distributed on an "AS IS" BASIS, 
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
12 | # See the License for the specific language governing permissions and   
13 | # limitations under the License.
14 | 
15 | from . import trainer
16 | from .trainer import *
17 | 
18 | from . import callbacks
19 | from .callbacks import *
20 | 
21 | from . import env
22 | from .env import *
23 | 
24 | __all__ = trainer.__all__ \
25 |         + callbacks.__all__ \
26 |         + env.__all__
27 | 


--------------------------------------------------------------------------------
/rtdetr_paddle/configs/rtdetr/rtdetr_r101vd_6x_coco.yml:
--------------------------------------------------------------------------------
 1 | _BASE_: [
 2 |   '../datasets/coco_detection.yml',
 3 |   '../runtime.yml',
 4 |   '_base_/optimizer_6x.yml',
 5 |   '_base_/rtdetr_r50vd.yml',
 6 |   '_base_/rtdetr_reader.yml',
 7 | ]
 8 | 
 9 | weights: output/rtdetr_r101vd_6x_coco/model_final
10 | find_unused_parameters: True
11 | log_iter: 200
12 | 
13 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet101_vd_ssld_pretrained.pdparams
14 | 
15 | ResNet:
16 |   # index 0 stands for res2
17 |   depth: 101
18 |   variant: d
19 |   norm_type: bn
20 |   freeze_at: 0
21 |   return_idx: [1, 2, 3]
22 |   lr_mult_list: [0.01, 0.01, 0.01, 0.01]
23 |   num_stages: 4
24 |   freeze_stem_only: True
25 | 
26 | HybridEncoder:
27 |   hidden_dim: 384
28 |   use_encoder_idx: [2]
29 |   num_encoder_layers: 1
30 |   encoder_layer:
31 |     name: TransformerLayer
32 |     d_model: 384
33 |     nhead: 8
34 |     dim_feedforward: 2048
35 |     dropout: 0.
36 |     activation: 'gelu'
37 |   expansion: 1.0
38 | 


--------------------------------------------------------------------------------
/rtdetr_paddle/configs/rtdetr/rtdetr_hgnetv2_x_6x_coco.yml:
--------------------------------------------------------------------------------
 1 | _BASE_: [
 2 |   '../datasets/coco_detection.yml',
 3 |   '../runtime.yml',
 4 |   '_base_/optimizer_6x.yml',
 5 |   '_base_/rtdetr_r50vd.yml',
 6 |   '_base_/rtdetr_reader.yml',
 7 | ]
 8 | 
 9 | weights: output/rtdetr_hgnetv2_l_6x_coco/model_final
10 | pretrain_weights: https://bj.bcebos.com/v1/paddledet/models/pretrained/PPHGNetV2_X_ssld_pretrained.pdparams
11 | find_unused_parameters: True
12 | log_iter: 200
13 | 
14 | 
15 | 
16 | DETR:
17 |   backbone: PPHGNetV2
18 | 
19 | 
20 | PPHGNetV2:
21 |   arch: 'X'
22 |   return_idx: [1, 2, 3]
23 |   freeze_stem_only: True
24 |   freeze_at: 0
25 |   freeze_norm: True
26 |   lr_mult_list: [0., 0.01, 0.01, 0.01, 0.01]
27 | 
28 | 
29 | HybridEncoder:
30 |   hidden_dim: 384
31 |   use_encoder_idx: [2]
32 |   num_encoder_layers: 1
33 |   encoder_layer:
34 |     name: TransformerLayer
35 |     d_model: 384
36 |     nhead: 8
37 |     dim_feedforward: 2048
38 |     dropout: 0.
39 |     activation: 'gelu'
40 |   expansion: 1.0
41 | 


--------------------------------------------------------------------------------
/rtdetr_pytorch/src/misc/visualizer.py:
--------------------------------------------------------------------------------
 1 | """"by lyuwenyu
 2 | """
 3 | 
 4 | import torch
 5 | import torch.utils.data
 6 | 
 7 | import torchvision
 8 | torchvision.disable_beta_transforms_warning()
 9 | 
10 | import PIL 
11 | 
12 | __all__ = ['show_sample']
13 | 
14 | def show_sample(sample):
15 |     """for coco dataset/dataloader
16 |     """
17 |     import matplotlib.pyplot as plt
18 |     from torchvision.transforms.v2 import functional as F
19 |     from torchvision.utils import draw_bounding_boxes
20 | 
21 |     image, target = sample
22 |     if isinstance(image, PIL.Image.Image):
23 |         image = F.to_image_tensor(image)
24 | 
25 |     image = F.convert_dtype(image, torch.uint8)
26 |     annotated_image = draw_bounding_boxes(image, target["boxes"], colors="yellow", width=3)
27 | 
28 |     fig, ax = plt.subplots()
29 |     ax.imshow(annotated_image.permute(1, 2, 0).numpy())
30 |     ax.set(xticklabels=[], yticklabels=[], xticks=[], yticks=[])
31 |     fig.tight_layout()
32 |     fig.show()
33 |     plt.show()
34 | 
35 | 


--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/metrics/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from . import metrics
16 | 
17 | from .metrics import *
18 | from .pose3d_metrics import *
19 | 
20 | from . import mot_metrics
21 | from .mot_metrics import *
22 | __all__ = metrics.__all__ + mot_metrics.__all__
23 | 
24 | from . import mcmot_metrics
25 | from .mcmot_metrics import *
26 | __all__ = metrics.__all__ + mcmot_metrics.__all__


--------------------------------------------------------------------------------
/rtdetr_pytorch/configs/rtdetr/rtdetr_r34vd_6x_coco.yml:
--------------------------------------------------------------------------------
 1 | 
 2 | __include__: [
 3 |   '../dataset/coco_detection.yml',
 4 |   '../runtime.yml',
 5 |   './include/dataloader.yml',
 6 |   './include/optimizer.yml',
 7 |   './include/rtdetr_r50vd.yml',
 8 | ]
 9 | 
10 | 
11 | output_dir: ./output/rtdetr_r34vd_6x_coco
12 | 
13 | 
14 | PResNet:
15 |   depth: 34
16 |   freeze_at: -1
17 |   freeze_norm: False
18 |   pretrained: True
19 | 
20 | 
21 | HybridEncoder:
22 |   in_channels: [128, 256, 512]
23 |   hidden_dim: 256
24 |   expansion: 0.5
25 | 
26 | 
27 | RTDETRTransformer:
28 |   num_decoder_layers: 4
29 | 
30 | 
31 | 
32 | optimizer:
33 |   type: AdamW
34 |   params: 
35 |     - 
36 |       params: '^(?=.*backbone)(?=.*norm|bn).*$'
37 |       weight_decay: 0.
38 |       lr: 0.00001
39 |     - 
40 |       params: '^(?=.*backbone)(?!.*norm|bn).*$'
41 |       lr: 0.00001
42 |     - 
43 |       params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn|bias)).*$'
44 |       weight_decay: 0.
45 | 
46 |   lr: 0.0001
47 |   betas: [0.9, 0.999]
48 |   weight_decay: 0.0001
49 | 


--------------------------------------------------------------------------------
/rtdetr_pytorch/configs/rtdetr/rtdetr_r18vd_6x_coco.yml:
--------------------------------------------------------------------------------
 1 | 
 2 | __include__: [
 3 |   '../dataset/coco_detection.yml',
 4 |   '../runtime.yml',
 5 |   './include/dataloader.yml',
 6 |   './include/optimizer.yml',
 7 |   './include/rtdetr_r50vd.yml',
 8 | ]
 9 | 
10 | 
11 | output_dir: ./output/rtdetr_r18vd_6x_coco
12 | 
13 | PResNet:
14 |   depth: 18
15 |   freeze_at: -1
16 |   freeze_norm: False
17 |   pretrained: True
18 | 
19 | HybridEncoder:
20 |   in_channels: [128, 256, 512]
21 |   hidden_dim: 256
22 |   expansion: 0.5
23 | 
24 | 
25 | RTDETRTransformer:
26 |   eval_idx: -1
27 |   num_decoder_layers: 3
28 |   num_denoising: 100
29 | 
30 | 
31 | 
32 | optimizer:
33 |   type: AdamW
34 |   params: 
35 |     - 
36 |       params: '^(?=.*backbone)(?=.*norm).*$'
37 |       lr: 0.00001
38 |       weight_decay: 0.
39 |     - 
40 |       params: '^(?=.*backbone)(?!.*norm).*$'
41 |       lr: 0.00001
42 |     - 
43 |       params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bias)).*$'
44 |       weight_decay: 0.
45 | 
46 |   lr: 0.0001
47 |   betas: [0.9, 0.999]
48 |   weight_decay: 0.0001
49 | 
50 | 


--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/modeling/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. 
 2 | #   
 3 | # Licensed under the Apache License, Version 2.0 (the "License");   
 4 | # you may not use this file except in compliance with the License.  
 5 | # You may obtain a copy of the License at   
 6 | #   
 7 | #     http://www.apache.org/licenses/LICENSE-2.0    
 8 | #   
 9 | # Unless required by applicable law or agreed to in writing, software   
10 | # distributed under the License is distributed on an "AS IS" BASIS, 
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
12 | # See the License for the specific language governing permissions and   
13 | # limitations under the License.
14 | 
15 | import warnings
16 | warnings.filterwarnings(
17 |     action='ignore', category=DeprecationWarning, module='ops')
18 | 
19 | 
20 | from .ops import *
21 | from .backbones import *
22 | from .heads import *
23 | from .losses import *
24 | from .architectures import *
25 | from .post_process import *
26 | from .layers import *
27 | from .transformers import *
28 | 


--------------------------------------------------------------------------------
/rtdetr_pytorch/src/nn/arch/classification.py:
--------------------------------------------------------------------------------
 1 | import torch 
 2 | import torch.nn as nn 
 3 | 
 4 | from src.core import register
 5 | 
 6 | 
 7 | __all__ = ['Classification', 'ClassHead']
 8 | 
 9 | 
10 | @register
11 | class Classification(nn.Module):
12 |     __inject__ = ['backbone', 'head']
13 | 
14 |     def __init__(self, backbone: nn.Module, head: nn.Module=None):
15 |         super().__init__()
16 |         
17 |         self.backbone = backbone
18 |         self.head = head
19 | 
20 |     def forward(self, x):
21 |         x = self.backbone(x)
22 | 
23 |         if self.head is not None:
24 |             x = self.head(x)
25 | 
26 |         return x 
27 | 
28 | 
29 | @register
30 | class ClassHead(nn.Module):
31 |     def __init__(self, hidden_dim, num_classes):
32 |         super().__init__()
33 |         self.pool = nn.AdaptiveAvgPool2d(1)
34 |         self.proj = nn.Linear(hidden_dim, num_classes)  
35 | 
36 |     def forward(self, x):
37 |         x = x[0] if isinstance(x, (list, tuple)) else x 
38 |         x = self.pool(x)
39 |         x = x.reshape(x.shape[0], -1)
40 |         x = self.proj(x)
41 |         return x 
42 | 


--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from . import (core, data, engine, modeling, optimizer, metrics, utils)
16 | 
17 | 
18 | try:
19 |     from .version import full_version as __version__
20 |     from .version import commit as __git_commit__
21 | except ImportError:
22 |     import sys
23 |     sys.stderr.write("Warning: import ppdet from source directory " \
24 |             "without installing, run 'python setup.py install' to " \
25 |             "install ppdet firstly\n")
26 | 


--------------------------------------------------------------------------------
/rtdetr_paddle/dataset/voc/create_list.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import sys
16 | import os.path as osp
17 | import logging
18 | # add python path of PaddleDetection to sys.path
19 | parent_path = osp.abspath(osp.join(__file__, *(['..'] * 3)))
20 | if parent_path not in sys.path:
21 |     sys.path.append(parent_path)
22 | 
23 | from ppdet.utils.download import create_voc_list
24 | 
25 | logging.basicConfig(level=logging.INFO)
26 | 
27 | voc_path = osp.split(osp.realpath(sys.argv[0]))[0]
28 | create_voc_list(voc_path)
29 | 


--------------------------------------------------------------------------------
/rtdetr_paddle/dataset/coco/download_coco.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import sys
16 | import os.path as osp
17 | import logging
18 | # add python path of PaddleDetection to sys.path
19 | parent_path = osp.abspath(osp.join(__file__, *(['..'] * 3)))
20 | if parent_path not in sys.path:
21 |     sys.path.append(parent_path)
22 | 
23 | from ppdet.utils.download import download_dataset
24 | 
25 | logging.basicConfig(level=logging.INFO)
26 | 
27 | download_path = osp.split(osp.realpath(sys.argv[0]))[0]
28 | download_dataset(download_path, 'coco')
29 | 


--------------------------------------------------------------------------------
/rtdetr_paddle/dataset/voc/download_voc.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import sys
16 | import os.path as osp
17 | import logging
18 | # add python path of PaddleDetection to sys.path
19 | parent_path = osp.abspath(osp.join(__file__, *(['..'] * 3)))
20 | if parent_path not in sys.path:
21 |     sys.path.append(parent_path)
22 | 
23 | from ppdet.utils.download import download_dataset
24 | 
25 | logging.basicConfig(level=logging.INFO)
26 | 
27 | download_path = osp.split(osp.realpath(sys.argv[0]))[0]
28 | download_dataset(download_path, 'voc')
29 | 


--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/modeling/backbones/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 
 2 | #   
 3 | # Licensed under the Apache License, Version 2.0 (the "License");   
 4 | # you may not use this file except in compliance with the License.  
 5 | # You may obtain a copy of the License at   
 6 | #   
 7 | #     http://www.apache.org/licenses/LICENSE-2.0    
 8 | # 
 9 | # Unless required by applicable law or agreed to in writing, software   
10 | # distributed under the License is distributed on an "AS IS" BASIS, 
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
12 | # See the License for the specific language governing permissions and   
13 | # limitations under the License.
14 | 
15 | from .resnet import *
16 | from .darknet import *
17 | from .mobilenet_v1 import *
18 | from .mobilenet_v3 import *
19 | from .shufflenet_v2 import *
20 | from .swin_transformer import *
21 | from .lcnet import *
22 | from .cspresnet import *
23 | from .csp_darknet import *
24 | from .convnext import *
25 | from .vision_transformer import *
26 | from .mobileone import *
27 | from .trans_encoder import *
28 | from .focalnet import *
29 | from .vit_mae import *
30 | from .hgnet_v2 import *
31 | 


--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/modeling/shape_spec.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 
 2 | #   
 3 | # Licensed under the Apache License, Version 2.0 (the "License");   
 4 | # you may not use this file except in compliance with the License.  
 5 | # You may obtain a copy of the License at   
 6 | #   
 7 | #     http://www.apache.org/licenses/LICENSE-2.0    
 8 | #   
 9 | # Unless required by applicable law or agreed to in writing, software   
10 | # distributed under the License is distributed on an "AS IS" BASIS, 
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
12 | # See the License for the specific language governing permissions and   
13 | # limitations under the License.
14 | 
15 | # The code is based on:
16 | # https://github.com/facebookresearch/detectron2/blob/main/detectron2/layers/shape_spec.py
17 | 
18 | from collections import namedtuple
19 | 
20 | 
21 | class ShapeSpec(
22 |         namedtuple("_ShapeSpec", ["channels", "height", "width", "stride"])):
23 |     def __new__(cls, channels=None, height=None, width=None, stride=None):
24 |         return super(ShapeSpec, cls).__new__(cls, channels, height, width,
25 |                                              stride)
26 | 


--------------------------------------------------------------------------------
/rtdetr_pytorch/configs/rtdetr/include/dataloader.yml:
--------------------------------------------------------------------------------
 1 | # num_classes: 91
 2 | # remap_mscoco_category: True
 3 | 
 4 | train_dataloader:
 5 |   dataset:
 6 |     return_masks: False
 7 |     transforms:
 8 |       ops:
 9 |         - {type: RandomPhotometricDistort, p: 0.5}
10 |         - {type: RandomZoomOut, fill: 0}
11 |         - {type: RandomIoUCrop, p: 0.8}
12 |         - {type: SanitizeBoundingBox, min_size: 1}
13 |         - {type: RandomHorizontalFlip}
14 |         - {type: Resize, size: [640, 640], }
15 |         # - {type: Resize, size: 639, max_size: 640}
16 |         # - {type: PadToSize, spatial_size: 640}
17 |         - { type: ToImageTensor }
18 |         - { type: ConvertDtype }
19 |         - { type: SanitizeBoundingBox, min_size: 1 }
20 |         - { type: ConvertBox, out_fmt: 'cxcywh', normalize: True }
21 |   shuffle: True
22 |   batch_size: 1
23 |   num_workers: 1
24 |   collate_fn: default_collate_fn
25 | 
26 | 
27 | val_dataloader:
28 |   dataset:
29 |     transforms:
30 |       ops:
31 |         # - {type: Resize, size: 639, max_size: 640}
32 |         # - {type: PadToSize, spatial_size: 640}
33 |         - { type: Resize, size: [ 640, 640 ] }
34 |         - { type: ToImageTensor }
35 |         - { type: ConvertDtype }
36 |   shuffle: False
37 |   batch_size: 1
38 |   num_workers: 1
39 |   collate_fn: default_collate_fn
40 | 


--------------------------------------------------------------------------------
/rtdetr_pytorch/tools/train.py:
--------------------------------------------------------------------------------
 1 | """by lyuwenyu
 2 | """
 3 | 
 4 | import os
 5 | import sys
 6 | 
 7 | sys.path.insert(0, os.path.join(os.path.dirname(os.path.abspath(__file__)), '..'))
 8 | import argparse
 9 | 
10 | import src.misc.dist as dist
11 | from src.core import YAMLConfig
12 | from src.solver import TASKS
13 | 
14 | 
15 | def main(args, ) -> None:
16 |     '''main
17 |     '''
18 |     dist.init_distributed()
19 | 
20 |     assert not all([args.tuning, args.resume]), \
21 |         'Only support from_scrach or resume or tuning at one time'
22 | 
23 |     cfg = YAMLConfig(
24 |         args.config,
25 |         resume=args.resume,
26 |         use_amp=args.amp,
27 |         tuning=args.tuning
28 |     )
29 | 
30 |     solver = TASKS[cfg.yaml_cfg['task']](cfg)
31 | 
32 |     if args.test_only:
33 |         solver.val()
34 |     else:
35 |         solver.fit()
36 | 
37 | 
38 | if __name__ == '__main__':
39 |     parser = argparse.ArgumentParser()
40 |     parser.add_argument('--config', '-c', type=str, )
41 |     parser.add_argument('--resume', '-r', type=str, )
42 |     parser.add_argument('--tuning', '-t', type=str, )
43 |     parser.add_argument('--test-only', action='store_true', default=False,)
44 |     parser.add_argument('--amp', action='store_true', default=False,)
45 | 
46 |     args = parser.parse_args()
47 | 
48 |     main(args)
49 | 


--------------------------------------------------------------------------------
/rtdetr_paddle/configs/rtdetr/_base_/rtdetr_reader.yml:
--------------------------------------------------------------------------------
 1 | worker_num: 4
 2 | TrainReader:
 3 |   sample_transforms:
 4 |     - Decode: {}
 5 |     - RandomDistort: {prob: 0.8}
 6 |     - RandomExpand: {fill_value: [123.675, 116.28, 103.53]}
 7 |     - RandomCrop: {prob: 0.8}
 8 |     - RandomFlip: {}
 9 |   batch_transforms:
10 |     - BatchRandomResize: {target_size: [480, 512, 544, 576, 608, 640, 640, 640, 672, 704, 736, 768, 800], random_size: True, random_interp: True, keep_ratio: False}
11 |     - NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none}
12 |     - NormalizeBox: {}
13 |     - BboxXYXY2XYWH: {}
14 |     - Permute: {}
15 |   batch_size: 4
16 |   shuffle: true
17 |   drop_last: true
18 |   collate_batch: false
19 |   use_shared_memory: false
20 | 
21 | 
22 | EvalReader:
23 |   sample_transforms:
24 |     - Decode: {}
25 |     - Resize: {target_size: [640, 640], keep_ratio: False, interp: 2} # target_size: (h, w)
26 |     - NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none}
27 |     - Permute: {}
28 |   batch_size: 4
29 |   shuffle: false
30 |   drop_last: false
31 | 
32 | 
33 | TestReader:
34 |   inputs_def:
35 |     image_shape: [3, 640, 640]
36 |   sample_transforms:
37 |     - Decode: {}
38 |     - Resize: {target_size: [640, 640], keep_ratio: False, interp: 2}
39 |     - NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none}
40 |     - Permute: {}
41 |   batch_size: 1
42 |   shuffle: false
43 |   drop_last: false
44 | 


--------------------------------------------------------------------------------
/rtdetr_pytorch/src/zoo/rtdetr/rtdetr.py:
--------------------------------------------------------------------------------
 1 | """by lyuwenyu
 2 | """
 3 | 
 4 | import torch
 5 | import torch.nn as nn
 6 | import torch.nn.functional as F
 7 | 
 8 | import random
 9 | import numpy as np
10 | 
11 | from src.core import register
12 | 
13 | __all__ = ['RTDETR', ]
14 | 
15 | 
16 | # 主模型,很少的代码
17 | @register
18 | class RTDETR(nn.Module):
19 |     __inject__ = ['backbone', 'encoder', 'decoder', ]
20 | 
21 |     def __init__(self, backbone: nn.Module, encoder, decoder, multi_scale=None):
22 |         super().__init__()
23 |         self.backbone = backbone
24 |         self.decoder = decoder
25 |         self.encoder = encoder
26 |         # 图像的多种尺寸 [480, 512, 544, 576, 608, 640, 640, 640, 672, 704, 736, 768, 800]
27 |         self.multi_scale = multi_scale
28 | 
29 |     def forward(self, x, targets=None):
30 |         # 随机选择一种图像尺寸，对x（输入的图像）进行插值，进行缩放
31 |         if self.multi_scale and self.training:
32 |             sz = np.random.choice(self.multi_scale)
33 |             x = F.interpolate(x, size=[sz, sz])
34 |         # 经过backbone
35 |         x = self.backbone(x)
36 |         # 经过encoder, HybridEncoder
37 |         x = self.encoder(x)
38 |         # 经过decoder, RTDETRTransformer (这里其实只有正常意义上的decoder)
39 |         x = self.decoder(x, targets)
40 | 
41 |         return x
42 | 
43 |     def deploy(self, ):
44 |         self.eval()
45 |         for m in self.modules():
46 |             if hasattr(m, 'convert_to_deploy'):
47 |                 m.convert_to_deploy()
48 |         return self
49 | 


--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/optimizer/utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from __future__ import absolute_import
16 | from __future__ import division
17 | from __future__ import print_function
18 | 
19 | import paddle.nn as nn
20 | 
21 | from typing import List
22 | 
23 | 
24 | def get_bn_running_state_names(model: nn.Layer) -> List[str]:
25 |     """Get all bn state full names including running mean and variance
26 |     """
27 |     names = []
28 |     for n, m in model.named_sublayers():
29 |         if isinstance(m, (nn.BatchNorm2D, nn.SyncBatchNorm)):
30 |             assert hasattr(m, '_mean'), f'assert {m} has _mean'
31 |             assert hasattr(m, '_variance'), f'assert {m} has _variance'
32 |             running_mean = f'{n}._mean'
33 |             running_var = f'{n}._variance'
34 |             names.extend([running_mean, running_var])
35 | 
36 |     return names
37 | 


--------------------------------------------------------------------------------
/benchmark/README.md:
--------------------------------------------------------------------------------
 1 | # 论文测速使用的部分代码和工具
 2 | 
 3 | 
 4 | ## 测试YOLO系列的速度 [in progress]
 5 | 以[yolov8](https://github.com/ultralytics/ultralytics)为例
 6 | 
 7 | <details open>
 8 | <summary>1. 转onnx </summary>  
 9 | 
10 | 执行`yolov8_onnx.py`中的`export_onnx`函数，新增代码主要涉及输出格式的转换
11 | </details>
12 | 
13 | 
14 | <details>
15 | <summary>2. 插入nms </summary>
16 | 
17 | 使用`utils.py`中的`yolo_insert_nms`函数，导出onnx模型后使用[Netron](https://netron.app/)查看结构. <img width="924" alt="image" src="https://github.com/lyuwenyu/RT-DETR/assets/17582080/cb466483-d3a3-4f23-a68d-7ab8825059c8">
18 | </details>
19 | 
20 | 
21 | <details>
22 | <summary>3. 转tensorrt </summary>
23 | 
24 | 可以使用`trtexec.md`中的的脚本转换，或者使用`utils.py`中的Python代码转换
25 | ```bash
26 | # trtexec -h
27 | trtexec --onnx=./yolov8l_w_nms.onnx --saveEngine=yolov8l_w_nms.engine --buildOnly --fp16
28 | ```
29 | </details>
30 | 
31 | 
32 | <details>
33 | <summary>4. trtexec测速 </summary>
34 | 
35 | 可以使用`trtexec.md`中的的脚本转换，去掉`--buildOnly`参数
36 | 
37 | </details>
38 | 
39 | 
40 | 
41 | <details>
42 | <summary>5. profile分析（可选） </summary>
43 | 
44 | 在4的基础之上加以下命令
45 | ```bash
46 | nsys profile --force-overwrite=true  -t 'nvtx,cuda,osrt,cudnn' -c cudaProfilerApi -o yolov8l_w_nms 
47 | ```
48 | 可以使用nsys可视化分析
49 | <img width="1090" alt="image" src="https://github.com/lyuwenyu/RT-DETR/assets/17582080/507d8bde-9e7c-4ae5-b571-976c540ef2c6">
50 | 
51 | </details>
52 | 
53 | 
54 | <details>
55 | <summary>6. Python测速或者部署   </summary>
56 | 
57 | 在Coco val数据集上测模型的平均速度使用`trtinfer.py`中的代码推理
58 | 
59 | </details>
60 | 


--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/modeling/cls_utils.py:
--------------------------------------------------------------------------------
 1 | #   Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 
16 | def _get_class_default_kwargs(cls, *args, **kwargs):
17 |     """
18 |     Get default arguments of a class in dict format, if args and
19 |     kwargs is specified, it will replace default arguments
20 |     """
21 |     varnames = cls.__init__.__code__.co_varnames
22 |     argcount = cls.__init__.__code__.co_argcount
23 |     keys = varnames[:argcount]
24 |     assert keys[0] == 'self'
25 |     keys = keys[1:]
26 | 
27 |     values = list(cls.__init__.__defaults__)
28 |     assert len(values) == len(keys)
29 | 
30 |     if len(args) > 0:
31 |         for i, arg in enumerate(args):
32 |             values[i] = arg
33 | 
34 |     default_kwargs = dict(zip(keys, values))
35 | 
36 |     if len(kwargs) > 0:
37 |         for k, v in kwargs.items():
38 |             default_kwargs[k] = v
39 | 
40 |     return default_kwargs
41 | 


--------------------------------------------------------------------------------
/rtdetr_pytorch/temp.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | from collections import defaultdict
 3 | 
 4 | new_file = '/mnt/h/ml_dataset_home/coco/annotations/instances_train2017_remove_1021.json'
 5 | 
 6 | with open('/mnt/h/ml_dataset_home/coco/annotations/instances_train2017.json') as file:
 7 |     data = json.load(file)
 8 |     origin = data.copy()
 9 |     images = data['images']
10 |     print("原始images 数量 {}".format(len(images)))
11 |     annos = data['annotations']
12 |     no_anno = []
13 |     id_anno = defaultdict(list)
14 |     for anno in annos:
15 |         id = anno['image_id']
16 |         id_anno[id].append(anno)
17 |         bbox = anno.get('bbox', None)
18 |         if bbox is None:
19 |             print("anno {} no bbox".format(anno['id']))
20 |         else:
21 |             if len(bbox) == 0:
22 |                 print("anno {} no bbox".format(anno['id']))
23 | 
24 | 
25 |     valid_image = []
26 |     invalid_image_id = []
27 |     for image in images:
28 |         id = image['id']
29 |         if id in id_anno:
30 |             valid_image.append(image)
31 |         else:
32 |             invalid_image_id.append(id)
33 |     print("invalid images: {}".format(len(invalid_image_id)))
34 | 
35 |     data['images'] = valid_image
36 | 
37 |     print(len(data['images']))
38 | 
39 |     # with open(new_file, 'w') as newf:
40 |     #     json.dump(data, newf, indent=4)
41 | 
42 |     print("新文件制作完成")
43 | 
44 | # Loaded 118287 images in COCO format from /mnt/h/ml_dataset_home/coco/annotations/instances_train2017.json
45 | # Removed 1021 images with no usable annotations. 117266 images left.
46 | 


--------------------------------------------------------------------------------
/rtdetr_paddle/configs/rtdetr/_base_/rtdetr_r50vd.yml:
--------------------------------------------------------------------------------
 1 | architecture: DETR
 2 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_ssld_v2_pretrained.pdparams
 3 | norm_type: sync_bn
 4 | use_ema: True
 5 | ema_decay: 0.9999
 6 | ema_decay_type: "exponential"
 7 | ema_filter_no_grad: True
 8 | hidden_dim: 256
 9 | use_focal_loss: True
10 | eval_size: [640, 640] # h, w
11 | 
12 | 
13 | DETR:
14 |   backbone: ResNet
15 |   neck: HybridEncoder
16 |   transformer: RTDETRTransformer
17 |   detr_head: DINOHead
18 |   post_process: DETRPostProcess
19 | 
20 | ResNet:
21 |   # index 0 stands for res2
22 |   depth: 50
23 |   variant: d
24 |   norm_type: bn
25 |   freeze_at: 0
26 |   return_idx: [1, 2, 3]
27 |   lr_mult_list: [0.1, 0.1, 0.1, 0.1]
28 |   num_stages: 4
29 |   freeze_stem_only: True
30 | 
31 | HybridEncoder:
32 |   hidden_dim: 256
33 |   use_encoder_idx: [2]
34 |   num_encoder_layers: 1
35 |   encoder_layer:
36 |     name: TransformerLayer
37 |     d_model: 256
38 |     nhead: 8
39 |     dim_feedforward: 1024
40 |     dropout: 0.
41 |     activation: 'gelu'
42 |   expansion: 1.0
43 | 
44 | 
45 | RTDETRTransformer:
46 |   num_queries: 300
47 |   position_embed_type: sine
48 |   feat_strides: [8, 16, 32]
49 |   num_levels: 3
50 |   nhead: 8
51 |   num_decoder_layers: 6
52 |   dim_feedforward: 1024
53 |   dropout: 0.0
54 |   activation: relu
55 |   num_denoising: 100
56 |   label_noise_ratio: 0.5
57 |   box_noise_scale: 1.0
58 |   learnt_init_query: False
59 | 
60 | DINOHead:
61 |   loss:
62 |     name: DINOLoss
63 |     loss_coeff: {class: 1, bbox: 5, giou: 2}
64 |     aux_loss: True
65 |     use_vfl: True
66 |     matcher:
67 |       name: HungarianMatcher
68 |       matcher_coeff: {class: 2, bbox: 5, giou: 2}
69 | 
70 | DETRPostProcess:
71 |   num_top_queries: 300
72 | 


--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/engine/env.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 
 2 | #   
 3 | # Licensed under the Apache License, Version 2.0 (the "License");   
 4 | # you may not use this file except in compliance with the License.  
 5 | # You may obtain a copy of the License at   
 6 | #   
 7 | #     http://www.apache.org/licenses/LICENSE-2.0    
 8 | #   
 9 | # Unless required by applicable law or agreed to in writing, software   
10 | # distributed under the License is distributed on an "AS IS" BASIS, 
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  
12 | # See the License for the specific language governing permissions and   
13 | # limitations under the License.
14 | 
15 | from __future__ import absolute_import
16 | from __future__ import division
17 | from __future__ import print_function
18 | 
19 | import os
20 | import random
21 | import numpy as np
22 | 
23 | import paddle
24 | from paddle.distributed import fleet
25 | 
26 | __all__ = ['init_parallel_env', 'set_random_seed', 'init_fleet_env']
27 | 
28 | 
29 | def init_fleet_env(find_unused_parameters=False):
30 |     strategy = fleet.DistributedStrategy()
31 |     strategy.find_unused_parameters = find_unused_parameters
32 |     fleet.init(is_collective=True, strategy=strategy)
33 | 
34 | 
35 | def init_parallel_env():
36 |     env = os.environ
37 |     dist = 'PADDLE_TRAINER_ID' in env and 'PADDLE_TRAINERS_NUM' in env
38 |     if dist:
39 |         trainer_id = int(env['PADDLE_TRAINER_ID'])
40 |         local_seed = (99 + trainer_id)
41 |         random.seed(local_seed)
42 |         np.random.seed(local_seed)
43 | 
44 |     paddle.distributed.init_parallel_env()
45 | 
46 | 
47 | def set_random_seed(seed):
48 |     paddle.seed(seed)
49 |     random.seed(seed)
50 |     np.random.seed(seed)
51 | 


--------------------------------------------------------------------------------
/rtdetr_pytorch/configs/rtdetr/include/rtdetr_r50vd.yml:
--------------------------------------------------------------------------------
 1 | task: detection
 2 | 
 3 | model: RTDETR
 4 | criterion: SetCriterion
 5 | postprocessor: RTDETRPostProcessor
 6 | 
 7 | 
 8 | RTDETR:
 9 |   backbone: PResNet
10 |   encoder: HybridEncoder
11 |   decoder: RTDETRTransformer
12 |   multi_scale: [ 480, 512, 544, 576, 608, 640, 640, 640, 672, 704, 736, 768, 800 ]
13 | 
14 | # backbone
15 | PResNet:
16 |   depth: 50
17 |   variant: d
18 |   freeze_at: 0
19 |   return_idx: [ 1, 2, 3 ]
20 |   num_stages: 4
21 |   freeze_norm: True
22 |   pretrained: True
23 | 
24 | # encoder
25 | HybridEncoder:
26 |   in_channels: [ 512, 1024, 2048 ]
27 |   feat_strides: [ 8, 16, 32 ]
28 | 
29 |   # intra
30 |   hidden_dim: 256
31 |   use_encoder_idx: [ 2 ]
32 |   num_encoder_layers: 1
33 |   nhead: 8
34 |   dim_feedforward: 1024
35 |   dropout: 0.
36 |   enc_act: 'gelu'
37 |   pe_temperature: 10000
38 |   
39 |   # cross
40 |   expansion: 1.0
41 |   depth_mult: 1
42 |   act: 'silu'
43 | 
44 |   # eval
45 |   eval_spatial_size: [ 640, 640 ]
46 | 
47 | # decoder
48 | RTDETRTransformer:
49 |   feat_channels: [ 256, 256, 256 ]
50 |   feat_strides: [ 8, 16, 32 ]
51 |   hidden_dim: 256
52 |   num_levels: 3
53 | 
54 |   num_queries: 300
55 | 
56 |   num_decoder_layers: 6
57 |   num_denoising: 100
58 |   
59 |   eval_idx: -1
60 |   eval_spatial_size: [ 640, 640 ]
61 | 
62 | 
63 | use_focal_loss: True
64 | 
65 | RTDETRPostProcessor:
66 |   num_top_queries: 300
67 | 
68 | 
69 | SetCriterion:
70 |   weight_dict: {loss_vfl: 1, loss_bbox: 5, loss_giou: 2,}
71 |   losses: ['vfl', 'boxes', ]
72 |   alpha: 0.75
73 |   gamma: 2.0
74 | 
75 |   matcher:
76 |     type: HungarianMatcher
77 |     weight_dict: { cost_class: 2, cost_bbox: 5, cost_giou: 2 }
78 |     # use_focal_loss: True 
79 |     alpha: 0.25
80 |     gamma: 2.0
81 | 
82 | 
83 | 
84 | 


--------------------------------------------------------------------------------
/rtdetr_pytorch/src/nn/backbone/utils.py:
--------------------------------------------------------------------------------
 1 | """
 2 | https://github.com/pytorch/vision/blob/main/torchvision/models/_utils.py
 3 | 
 4 | by lyuwenyu
 5 | """
 6 | 
 7 | from collections import OrderedDict
 8 | from typing import Dict, List
 9 | 
10 | 
11 | import torch.nn as nn 
12 | 
13 | 
14 | class IntermediateLayerGetter(nn.ModuleDict):
15 |     """
16 |     Module wrapper that returns intermediate layers from a model
17 | 
18 |     It has a strong assumption that the modules have been registered
19 |     into the model in the same order as they are used.
20 |     This means that one should **not** reuse the same nn.Module
21 |     twice in the forward if you want this to work.
22 | 
23 |     Additionally, it is only able to query submodules that are directly
24 |     assigned to the model. So if `model` is passed, `model.feature1` can
25 |     be returned, but not `model.feature1.layer2`.
26 |     """
27 | 
28 |     _version = 3
29 | 
30 |     def __init__(self, model: nn.Module, return_layers: List[str]) -> None:
31 |         if not set(return_layers).issubset([name for name, _ in model.named_children()]):
32 |             raise ValueError("return_layers are not present in model. {}"\
33 |                 .format([name for name, _ in model.named_children()]))
34 |         orig_return_layers = return_layers
35 |         return_layers = {str(k): str(k)  for k in return_layers}
36 |         layers = OrderedDict()
37 |         for name, module in model.named_children():
38 |             layers[name] = module
39 |             if name in return_layers:
40 |                 del return_layers[name]
41 |             if not return_layers:
42 |                 break
43 | 
44 |         super().__init__(layers)
45 |         self.return_layers = orig_return_layers
46 | 
47 |     def forward(self, x):
48 |         # out = OrderedDict()
49 |         outputs = []
50 |         for name, module in self.items():
51 |             x = module(x)
52 |             if name in self.return_layers:
53 |                 # out_name = self.return_layers[name]
54 |                 # out[out_name] = x
55 |                 outputs.append(x)
56 |         
57 |         return outputs
58 | 
59 | 


--------------------------------------------------------------------------------
/rtdetr_paddle/tools/slice_image.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import argparse
16 | from tqdm import tqdm
17 | 
18 | 
19 | def slice_data(image_dir, dataset_json_path, output_dir, slice_size,
20 |                overlap_ratio):
21 |     try:
22 |         from sahi.scripts.slice_coco import slice
23 |     except Exception as e:
24 |         raise RuntimeError(
25 |             'Unable to use sahi to slice images, please install sahi, for example: `pip install sahi`, see https://github.com/obss/sahi'
26 |         )
27 |     tqdm.write(
28 |         f" slicing for slice_size={slice_size}, overlap_ratio={overlap_ratio}")
29 |     slice(
30 |         image_dir=image_dir,
31 |         dataset_json_path=dataset_json_path,
32 |         output_dir=output_dir,
33 |         slice_size=slice_size,
34 |         overlap_ratio=overlap_ratio, )
35 | 
36 | 
37 | def main():
38 |     parser = argparse.ArgumentParser()
39 |     parser.add_argument(
40 |         '--image_dir', type=str, default=None, help="The image folder path.")
41 |     parser.add_argument(
42 |         '--json_path', type=str, default=None, help="Dataset json path.")
43 |     parser.add_argument(
44 |         '--output_dir', type=str, default=None, help="Output dir.")
45 |     parser.add_argument(
46 |         '--slice_size', type=int, default=500, help="slice_size")
47 |     parser.add_argument(
48 |         '--overlap_ratio', type=float, default=0.25, help="overlap_ratio")
49 |     args = parser.parse_args()
50 | 
51 |     slice_data(args.image_dir, args.json_path, args.output_dir, args.slice_size,
52 |                args.overlap_ratio)
53 | 
54 | 
55 | if __name__ == "__main__":
56 |     main()
57 | 


--------------------------------------------------------------------------------
/benchmark/yolov8_onnx.py:
--------------------------------------------------------------------------------
 1 | '''by lyuwenyu
 2 | '''
 3 | 
 4 | import torch 
 5 | import torchvision
 6 | 
 7 | import numpy as np 
 8 | import onnxruntime as ort 
 9 | 
10 | from utils import yolo_insert_nms
11 | 
12 | class YOLOv8(torch.nn.Module):
13 |     def __init__(self, name) -> None:
14 |         super().__init__()
15 |         from ultralytics import YOLO
16 |         # Load a model
17 |         # build a new model from scratch
18 |         # model = YOLO(f'{name}.yaml')  
19 | 
20 |         # load a pretrained model (recommended for training)
21 |         model = YOLO(f'{name}.pt')  
22 |         self.model = model.model
23 | 
24 |     def forward(self, x):
25 |         '''https://github.com/ultralytics/ultralytics/blob/main/ultralytics/nn/tasks.py#L216
26 |         '''
27 |         pred: torch.Tensor = self.model(x)[0] # n 84 8400,
28 |         pred = pred.permute(0, 2, 1)
29 |         boxes, scores = pred.split([4, 80], dim=-1)
30 |         boxes = torchvision.ops.box_convert(boxes, in_fmt='cxcywh', out_fmt='xyxy')
31 | 
32 |         return boxes, scores
33 | 
34 | 
35 | 
36 | def export_onnx(name='yolov8n'):
37 |     '''export onnx
38 |     '''
39 |     m = YOLOv8(name)
40 | 
41 |     x = torch.rand(1, 3, 640, 640)
42 |     dynamic_axes = {
43 |         'image': {0: '-1'}
44 |     }
45 |     torch.onnx.export(m, x, f'{name}.onnx', 
46 |                       input_names=['image'], 
47 |                       output_names=['boxes', 'scores'], 
48 |                       opset_version=13, 
49 |                       dynamic_axes=dynamic_axes)
50 | 
51 |     data = np.random.rand(1, 3, 640, 640).astype(np.float32)
52 |     sess = ort.InferenceSession(f'{name}.onnx')
53 |     _ = sess.run(output_names=None, input_feed={'image': data})
54 | 
55 | 
56 | if __name__ == '__main__':
57 | 
58 |     import argparse
59 |     parser = argparse.ArgumentParser()
60 |     parser.add_argument('--name', type=str, default='yolov8l')
61 |     parser.add_argument('--score_threshold', type=float, default=0.001)
62 |     parser.add_argument('--iou_threshold', type=float, default=0.7)
63 |     parser.add_argument('--max_output_boxes', type=int, default=300)
64 |     args = parser.parse_args()
65 | 
66 |     export_onnx(name=args.name)
67 |     
68 |     yolo_insert_nms(path=f'{args.name}.onnx', 
69 |                     score_threshold=args.score_threshold, 
70 |                     iou_threshold=args.iou_threshold, 
71 |                     max_output_boxes=args.max_output_boxes, )
72 | 
73 | 


--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/modeling/losses/smooth_l1_loss.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from __future__ import absolute_import
16 | from __future__ import division
17 | from __future__ import print_function
18 | 
19 | import paddle
20 | import paddle.nn as nn
21 | import paddle.nn.functional as F
22 | from ppdet.core.workspace import register
23 | 
24 | __all__ = ['SmoothL1Loss']
25 | 
26 | @register
27 | class SmoothL1Loss(nn.Layer):
28 |     """Smooth L1 Loss.
29 |     Args:
30 |         beta (float): controls smooth region, it becomes L1 Loss when beta=0.0
31 |         loss_weight (float): the final loss will be multiplied by this 
32 |     """
33 |     def __init__(self,
34 |                  beta=1.0,
35 |                  loss_weight=1.0):
36 |         super(SmoothL1Loss, self).__init__()
37 |         assert beta >= 0
38 |         self.beta = beta
39 |         self.loss_weight = loss_weight
40 | 
41 |     def forward(self, pred, target, reduction='none'):
42 |         """forward function, based on fvcore.
43 |         Args:
44 |             pred (Tensor): prediction tensor
45 |             target (Tensor): target tensor, pred.shape must be the same as target.shape
46 |             reduction (str): the way to reduce loss, one of (none, sum, mean)
47 |         """
48 |         assert reduction in ('none', 'sum', 'mean')
49 |         target = target.detach()
50 |         if self.beta < 1e-5:
51 |             loss = paddle.abs(pred - target)
52 |         else:
53 |             n = paddle.abs(pred - target)
54 |             cond = n < self.beta
55 |             loss = paddle.where(cond, 0.5 * n ** 2 / self.beta, n - 0.5 * self.beta)
56 |         if reduction == 'mean':
57 |             loss = loss.mean() if loss.size > 0 else 0.0 * loss.sum()
58 |         elif reduction == 'sum':
59 |             loss = loss.sum()
60 |         return loss * self.loss_weight
61 | 


--------------------------------------------------------------------------------
/README_cn.md:
--------------------------------------------------------------------------------
 1 | 简体中文 | [English](README.md)
 2 | 
 3 | # RT-DETR 
 4 | 
 5 | This is the official implementation of the paper "[DETRs Beat YOLOs on Real-time Object Detection](https://arxiv.org/abs/2304.08069)".
 6 | 
 7 | <div align="center">
 8 |   <img src="https://github.com/lyuwenyu/RT-DETR/assets/77494834/0ede1dc1-a854-43b6-9986-cf9090f11a61" width=500 >
 9 | </div>
10 | 
11 | ## 最新动态
12 | 
13 | - 发布RT-DETR-R50, RT-DETR-R101模型
14 | - 发布RT-DETR-R50-m模型（scale模型的范例）
15 | - 发布RT-DETR-R34, RT-DETR-R18模型
16 | - 发布RT-DETR-L, RT-DETR-X模型
17 | 
18 | 
19 | ## 代码仓库
20 | - [RT-DETR-paddle](./rtdetr_paddle)
21 | - [RT-DETR--pytorch](./rtdetr_pytorch)
22 | 
23 | 
24 | ## 简介
25 | <!-- We propose a **R**eal-**T**ime **DE**tection **TR**ansformer (RT-DETR), the first real-time end-to-end object detector to our best knowledge. Specifically, we design an efficient hybrid encoder to efficiently process multi-scale features by decoupling the intra-scale interaction and cross-scale fusion, and propose IoU-aware query selection to improve the initialization of object queries. In addition, our proposed detector supports flexibly adjustment of the inference speed by using different decoder layers without the need for retraining, which facilitates the practical application of real-time object detectors. Our RT-DETR-L achieves 53.0% AP on COCO val2017 and 114 FPS on T4 GPU, while RT-DETR-X achieves 54.8% AP and 74 FPS, outperforming all YOLO detectors of the same scale in both speed and accuracy. Furthermore, our RT-DETR-R50 achieves 53.1% AP and 108 FPS, outperforming DINO-Deformable-DETR-R50 by 2.2% AP in accuracy and by about 21 times in FPS.  -->
26 | RT-DETR是第一个实时端到端目标检测器。具体而言，我们设计了一个高效的混合编码器，通过解耦尺度内交互和跨尺度融合来高效处理多尺度特征，并提出了IoU感知的查询选择机制，以优化解码器查询的初始化。此外，RT-DETR支持通过使用不同的解码器层来灵活调整推理速度，而不需要重新训练，这有助于实时目标检测器的实际应用。RT-DETR-R50在COCO val2017上实现了53.1%的AP，在T4 GPU上实现了108FPS，RT-DETR-R101实现了54.3%的AP和74FPS，在速度和精度方面都优于相同规模的所有YOLO检测器。使用Objects365预训练之后, RT-DETR-R50 和 RT-DETR-R101 分别实现了 55.3% 和 56.2% AP的精度.
27 | 若要了解更多细节，请参考我们的论文[paper](https://arxiv.org/abs/2304.08069).
28 | 
29 | <div align="center">
30 |   <img src="https://github.com/lyuwenyu/RT-DETR/assets/77494834/c211a164-ddce-4084-8b71-fb73f29f363b" width=500 >
31 | </div>
32 | 
33 | ## 引用RT-DETR
34 | 如果需要在你的研究中使用RT-DETR，请通过以下方式引用我们的论文：
35 | ```
36 | @misc{lv2023detrs,
37 |       title={DETRs Beat YOLOs on Real-time Object Detection},
38 |       author={Yian Zhao and Wenyu Lv and Shangliang Xu and Jinman Wei and Guanzhong Wang and Qingqing Dang and Yi Liu and Jie Chen},
39 |       year={2023},
40 |       eprint={2304.08069},
41 |       archivePrefix={arXiv},
42 |       primaryClass={cs.CV}
43 | }
44 | ```
45 | 


--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/utils/logger.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import logging
16 | import os
17 | import sys
18 | 
19 | import paddle.distributed as dist
20 | 
21 | __all__ = ['setup_logger']
22 | 
23 | logger_initialized = []
24 | 
25 | 
26 | def setup_logger(name="ppdet", output=None):
27 |     """
28 |     Initialize logger and set its verbosity level to INFO.
29 |     Args:
30 |         output (str): a file name or a directory to save log. If None, will not save log file.
31 |             If ends with ".txt" or ".log", assumed to be a file name.
32 |             Otherwise, logs will be saved to `output/log.txt`.
33 |         name (str): the root module name of this logger
34 | 
35 |     Returns:
36 |         logging.Logger: a logger
37 |     """
38 |     logger = logging.getLogger(name)
39 |     if name in logger_initialized:
40 |         return logger
41 | 
42 |     logger.setLevel(logging.INFO)
43 |     logger.propagate = False
44 | 
45 |     formatter = logging.Formatter(
46 |         "[%(asctime)s] %(name)s %(levelname)s: %(message)s",
47 |         datefmt="%m/%d %H:%M:%S")
48 |     # stdout logging: master only
49 |     local_rank = dist.get_rank()
50 |     if local_rank == 0:
51 |         ch = logging.StreamHandler(stream=sys.stdout)
52 |         ch.setLevel(logging.DEBUG)
53 |         ch.setFormatter(formatter)
54 |         logger.addHandler(ch)
55 | 
56 |     # file logging: all workers
57 |     if output is not None:
58 |         if output.endswith(".txt") or output.endswith(".log"):
59 |             filename = output
60 |         else:
61 |             filename = os.path.join(output, "log.txt")
62 |         if local_rank > 0:
63 |             filename = filename + ".rank{}".format(local_rank)
64 |         os.makedirs(os.path.dirname(filename))
65 |         fh = logging.FileHandler(filename, mode='a')
66 |         fh.setLevel(logging.DEBUG)
67 |         fh.setFormatter(logging.Formatter())
68 |         logger.addHandler(fh)
69 |     logger_initialized.append(name)
70 |     return logger
71 | 


--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/modeling/backbones/name_adapter.py:
--------------------------------------------------------------------------------
 1 | class NameAdapter(object):
 2 |     """Fix the backbones variable names for pretrained weight"""
 3 | 
 4 |     def __init__(self, model):
 5 |         super(NameAdapter, self).__init__()
 6 |         self.model = model
 7 | 
 8 |     @property
 9 |     def model_type(self):
10 |         return getattr(self.model, '_model_type', '')
11 | 
12 |     @property
13 |     def variant(self):
14 |         return getattr(self.model, 'variant', '')
15 | 
16 |     def fix_conv_norm_name(self, name):
17 |         if name == "conv1":
18 |             bn_name = "bn_" + name
19 |         else:
20 |             bn_name = "bn" + name[3:]
21 |         # the naming rule is same as pretrained weight
22 |         if self.model_type == 'SEResNeXt':
23 |             bn_name = name + "_bn"
24 |         return bn_name
25 | 
26 |     def fix_shortcut_name(self, name):
27 |         if self.model_type == 'SEResNeXt':
28 |             name = 'conv' + name + '_prj'
29 |         return name
30 | 
31 |     def fix_bottleneck_name(self, name):
32 |         if self.model_type == 'SEResNeXt':
33 |             conv_name1 = 'conv' + name + '_x1'
34 |             conv_name2 = 'conv' + name + '_x2'
35 |             conv_name3 = 'conv' + name + '_x3'
36 |             shortcut_name = name
37 |         else:
38 |             conv_name1 = name + "_branch2a"
39 |             conv_name2 = name + "_branch2b"
40 |             conv_name3 = name + "_branch2c"
41 |             shortcut_name = name + "_branch1"
42 |         return conv_name1, conv_name2, conv_name3, shortcut_name
43 | 
44 |     def fix_basicblock_name(self, name):
45 |         if self.model_type == 'SEResNeXt':
46 |             conv_name1 = 'conv' + name + '_x1'
47 |             conv_name2 = 'conv' + name + '_x2'
48 |             shortcut_name = name
49 |         else:
50 |             conv_name1 = name + "_branch2a"
51 |             conv_name2 = name + "_branch2b"
52 |             shortcut_name = name + "_branch1"
53 |         return conv_name1, conv_name2, shortcut_name
54 | 
55 |     def fix_layer_warp_name(self, stage_num, count, i):
56 |         name = 'res' + str(stage_num)
57 |         if count > 10 and stage_num == 4:
58 |             if i == 0:
59 |                 conv_name = name + "a"
60 |             else:
61 |                 conv_name = name + "b" + str(i)
62 |         else:
63 |             conv_name = name + chr(ord("a") + i)
64 |         if self.model_type == 'SEResNeXt':
65 |             conv_name = str(stage_num + 2) + '_' + str(i + 1)
66 |         return conv_name
67 | 
68 |     def fix_c1_stage_name(self):
69 |         return "res_conv1" if self.model_type == 'ResNeXt' else "conv1"
70 | 


--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/data/shm_utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import os
16 | 
17 | SIZE_UNIT = ['K', 'M', 'G', 'T']
18 | SHM_QUERY_CMD = 'df -h'
19 | SHM_KEY = 'shm'
20 | SHM_DEFAULT_MOUNT = '/dev/shm'
21 | 
22 | # [ shared memory size check ]
23 | # In detection models, image/target data occupies a lot of memory, and
24 | # will occupy lots of shared memory in multi-process DataLoader, we use
25 | # following code to get shared memory size and perform a size check to
26 | # disable shared memory use if shared memory size is not enough.
27 | # Shared memory getting process as follows:
28 | # 1. use `df -h` get all mount info
29 | # 2. pick up spaces whose mount info contains 'shm'
30 | # 3. if 'shm' space number is only 1, return its size
31 | # 4. if there are multiple 'shm' space, try to find the default mount
32 | #    directory '/dev/shm' is Linux-like system, otherwise return the
33 | #    biggest space size.
34 | 
35 | 
36 | def _parse_size_in_M(size_str):
37 |     if size_str[-1] == 'B':
38 |         num, unit = size_str[:-2], size_str[-2]
39 |     else:
40 |         num, unit = size_str[:-1], size_str[-1]
41 |     assert unit in SIZE_UNIT, \
42 |             "unknown shm size unit {}".format(unit)
43 |     return float(num) * \
44 |             (1024 ** (SIZE_UNIT.index(unit) - 1))
45 | 
46 | 
47 | def _get_shared_memory_size_in_M():
48 |     try:
49 |         df_infos = os.popen(SHM_QUERY_CMD).readlines()
50 |     except:
51 |         return None
52 |     else:
53 |         shm_infos = []
54 |         for df_info in df_infos:
55 |             info = df_info.strip()
56 |             if info.find(SHM_KEY) >= 0:
57 |                 shm_infos.append(info.split())
58 | 
59 |         if len(shm_infos) == 0:
60 |             return None
61 |         elif len(shm_infos) == 1:
62 |             return _parse_size_in_M(shm_infos[0][3])
63 |         else:
64 |             default_mount_infos = [
65 |                 si for si in shm_infos if si[-1] == SHM_DEFAULT_MOUNT
66 |             ]
67 |             if default_mount_infos:
68 |                 return _parse_size_in_M(default_mount_infos[0][3])
69 |             else:
70 |                 return max([_parse_size_in_M(si[3]) for si in shm_infos])
71 | 


--------------------------------------------------------------------------------
/rtdetr_pytorch/src/zoo/rtdetr/box_ops.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 3 | https://github.com/facebookresearch/detr/blob/main/util/box_ops.py
 4 | '''
 5 | 
 6 | import torch
 7 | from torchvision.ops.boxes import box_area
 8 | 
 9 | 
10 | def box_cxcywh_to_xyxy(x):
11 |     x_c, y_c, w, h = x.unbind(-1)
12 |     b = [(x_c - 0.5 * w), (y_c - 0.5 * h),
13 |          (x_c + 0.5 * w), (y_c + 0.5 * h)]
14 |     return torch.stack(b, dim=-1)
15 | 
16 | 
17 | def box_xyxy_to_cxcywh(x):
18 |     x0, y0, x1, y1 = x.unbind(-1)
19 |     b = [(x0 + x1) / 2, (y0 + y1) / 2,
20 |          (x1 - x0), (y1 - y0)]
21 |     return torch.stack(b, dim=-1)
22 | 
23 | 
24 | # modified from torchvision to also return the union
25 | def box_iou(boxes1, boxes2):
26 |     area1 = box_area(boxes1)
27 |     area2 = box_area(boxes2)
28 | 
29 |     lt = torch.max(boxes1[:, None, :2], boxes2[:, :2])  # [N,M,2]
30 |     rb = torch.min(boxes1[:, None, 2:], boxes2[:, 2:])  # [N,M,2]
31 | 
32 |     wh = (rb - lt).clamp(min=0)  # [N,M,2]
33 |     inter = wh[:, :, 0] * wh[:, :, 1]  # [N,M]
34 | 
35 |     union = area1[:, None] + area2 - inter
36 | 
37 |     iou = inter / union
38 |     return iou, union
39 | 
40 | 
41 | def generalized_box_iou(boxes1, boxes2):
42 |     """
43 |     Generalized IoU from https://giou.stanford.edu/
44 | 
45 |     The boxes should be in [x0, y0, x1, y1] format
46 | 
47 |     Returns a [N, M] pairwise matrix, where N = len(boxes1)
48 |     and M = len(boxes2)
49 |     """
50 |     # degenerate boxes gives inf / nan results
51 |     # so do an early check
52 |     assert (boxes1[:, 2:] >= boxes1[:, :2]).all()
53 |     assert (boxes2[:, 2:] >= boxes2[:, :2]).all()
54 |     iou, union = box_iou(boxes1, boxes2)
55 | 
56 |     lt = torch.min(boxes1[:, None, :2], boxes2[:, :2])
57 |     rb = torch.max(boxes1[:, None, 2:], boxes2[:, 2:])
58 | 
59 |     wh = (rb - lt).clamp(min=0)  # [N,M,2]
60 |     area = wh[:, :, 0] * wh[:, :, 1]
61 | 
62 |     return iou - (area - union) / area
63 | 
64 | 
65 | def masks_to_boxes(masks):
66 |     """Compute the bounding boxes around the provided masks
67 | 
68 |     The masks should be in format [N, H, W] where N is the number of masks, (H, W) are the spatial dimensions.
69 | 
70 |     Returns a [N, 4] tensors, with the boxes in xyxy format
71 |     """
72 |     if masks.numel() == 0:
73 |         return torch.zeros((0, 4), device=masks.device)
74 | 
75 |     h, w = masks.shape[-2:]
76 | 
77 |     y = torch.arange(0, h, dtype=torch.float)
78 |     x = torch.arange(0, w, dtype=torch.float)
79 |     y, x = torch.meshgrid(y, x)
80 | 
81 |     x_mask = (masks * x.unsqueeze(0))
82 |     x_max = x_mask.flatten(1).max(-1)[0]
83 |     x_min = x_mask.masked_fill(~(masks.bool()), 1e8).flatten(1).min(-1)[0]
84 | 
85 |     y_mask = (masks * y.unsqueeze(0))
86 |     y_max = y_mask.flatten(1).max(-1)[0]
87 |     y_min = y_mask.masked_fill(~(masks.bool()), 1e8).flatten(1).min(-1)[0]
88 | 
89 |     return torch.stack([x_min, y_min, x_max, y_max], 1)
90 | 


--------------------------------------------------------------------------------
/benchmark/utils.py:
--------------------------------------------------------------------------------
 1 | '''by lyuwenyu
 2 | '''
 3 | 
 4 | import time 
 5 | import contextlib
 6 | import numpy as np
 7 | from PIL import Image
 8 | from collections import OrderedDict
 9 | 
10 | import onnx
11 | import torch 
12 | import onnx_graphsurgeon
13 | 
14 | 
15 | def to_binary_data(path, size=(640, 640), output_name='input_tensor.bin'):
16 |     '''--loadInputs='image:input_tensor.bin'
17 |     '''
18 |     im = Image.open(path).resize(size)
19 |     data = np.asarray(im, dtype=np.float32).transpose(2, 0, 1)[None] / 255.
20 |     data.tofile(output_name)
21 | 
22 | 
23 | def yolo_insert_nms(path, score_threshold=0.01, iou_threshold=0.7, max_output_boxes=300, simplify=False):
24 |     '''
25 |     http://www.xavierdupre.fr/app/onnxcustom/helpsphinx/api/onnxops/onnx__EfficientNMS_TRT.html
26 |     https://huggingface.co/spaces/muttalib1326/Punjabi_Character_Detection/blob/3dd1e17054c64e5f6b2254278f96cfa2bf418cd4/utils/add_nms.py
27 |     '''
28 |     onnx_model = onnx.load(path)
29 | 
30 |     if simplify:
31 |         from onnxsim import simplify
32 |         onnx_model, _ = simplify(onnx_model,  overwrite_input_shapes={'image': [1, 3, 640, 640]})
33 | 
34 |     graph = onnx_graphsurgeon.import_onnx(onnx_model)
35 |     graph.toposort()
36 |     graph.fold_constants()
37 |     graph.cleanup()
38 | 
39 |     topk = max_output_boxes
40 |     attrs = OrderedDict(plugin_version='1',
41 |                         background_class=-1,
42 |                         max_output_boxes=topk,
43 |                         score_threshold=score_threshold,
44 |                         iou_threshold=iou_threshold,
45 |                         score_activation=False,
46 |                         box_coding=0, )
47 | 
48 |     outputs = [onnx_graphsurgeon.Variable('num_dets', np.int32, [-1, 1]),
49 |                onnx_graphsurgeon.Variable('det_boxes', np.float32, [-1, topk, 4]),
50 |                onnx_graphsurgeon.Variable('det_scores', np.float32, [-1, topk]),
51 |                onnx_graphsurgeon.Variable('det_classes', np.int32, [-1, topk])]
52 | 
53 |     graph.layer(op='EfficientNMS_TRT', 
54 |                 name="batched_nms", 
55 |                 inputs=[graph.outputs[0], 
56 |                         graph.outputs[1]], 
57 |                 outputs=outputs, 
58 |                 attrs=attrs, )
59 | 
60 |     graph.outputs = outputs
61 |     graph.cleanup().toposort()
62 | 
63 |     onnx.save(onnx_graphsurgeon.export_onnx(graph), f'yolo_w_nms.onnx')
64 | 
65 | 
66 | class TimeProfiler(contextlib.ContextDecorator):
67 |     def __init__(self, ):
68 |         self.total = 0
69 |         
70 |     def __enter__(self, ):
71 |         self.start = self.time()
72 |         return self 
73 |     
74 |     def __exit__(self, type, value, traceback):
75 |         self.total += self.time() - self.start
76 |     
77 |     def reset(self, ):
78 |         self.total = 0
79 |     
80 |     def time(self, ):
81 |         if torch.cuda.is_available():
82 |             torch.cuda.synchronize()
83 |         return time.time()
84 | 


--------------------------------------------------------------------------------
/rtdetr_pytorch/src/nn/backbone/test_resnet.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn 
 3 | import torch.nn.functional as F 
 4 | 
 5 | from collections import OrderedDict
 6 | 
 7 | 
 8 | from src.core import register
 9 | 
10 | 
11 | class BasicBlock(nn.Module):
12 |     expansion = 1
13 | 
14 |     def __init__(self, in_planes, planes, stride=1):
15 |         super(BasicBlock, self).__init__()
16 | 
17 |         self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
18 |         self.bn1 = nn.BatchNorm2d(planes)
19 | 
20 |         self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,stride=1, padding=1, bias=False)
21 |         self.bn2 = nn.BatchNorm2d(planes)
22 | 
23 |         self.shortcut = nn.Sequential()         
24 |         if stride != 1 or in_planes != self.expansion*planes:
25 |             self.shortcut = nn.Sequential(
26 |                 nn.Conv2d(in_planes, self.expansion*planes,kernel_size=1, stride=stride, bias=False),
27 |                 nn.BatchNorm2d(self.expansion*planes)
28 |             )
29 |     def forward(self, x):
30 |         out = F.relu(self.bn1(self.conv1(x)))
31 |         out = self.bn2(self.conv2(out))       
32 |         out += self.shortcut(x)          
33 |         out = F.relu(out)
34 |         return out
35 | 
36 | 
37 | 
38 | class _ResNet(nn.Module):
39 |     def __init__(self, block, num_blocks, num_classes=10):
40 |         super().__init__()
41 |         self.in_planes = 64
42 | 
43 |         self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
44 |         self.bn1 = nn.BatchNorm2d(64)
45 |         
46 |         self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
47 |         self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
48 |         self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
49 |         self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
50 |         
51 |         self.linear = nn.Linear(512 * block.expansion, num_classes)
52 | 
53 |     def _make_layer(self, block, planes, num_blocks, stride):
54 |         strides = [stride] + [1]*(num_blocks-1)
55 |         layers = []
56 |         for stride in strides:
57 |             layers.append(block(self.in_planes, planes, stride))
58 |             self.in_planes = planes * block.expansion 
59 |         return nn.Sequential(*layers)
60 |         
61 |     def forward(self, x):
62 |         out = F.relu(self.bn1(self.conv1(x)))
63 |         out = self.layer1(out)
64 |         out = self.layer2(out)
65 |         out = self.layer3(out)
66 |         out = self.layer4(out)
67 |         out = F.avg_pool2d(out, 4)
68 |         out = out.view(out.size(0), -1)
69 |         out = self.linear(out)              
70 |         return out
71 |         
72 | 
73 | @register
74 | class MResNet(nn.Module):
75 |     def __init__(self, num_classes=10, num_blocks=[2, 2, 2, 2]) -> None:
76 |         super().__init__()
77 |         self.model = _ResNet(BasicBlock, num_blocks, num_classes)
78 |         
79 |     def forward(self, x):
80 |         return self.model(x)
81 | 
82 | 


--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/utils/stats.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import collections
16 | import numpy as np
17 | 
18 | __all__ = ['SmoothedValue', 'TrainingStats']
19 | 
20 | 
21 | class SmoothedValue(object):
22 |     """Track a series of values and provide access to smoothed values over a
23 |     window or the global series average.
24 |     """
25 | 
26 |     def __init__(self, window_size=20, fmt=None):
27 |         if fmt is None:
28 |             fmt = "{median:.4f} ({avg:.4f})"
29 |         self.deque = collections.deque(maxlen=window_size)
30 |         self.fmt = fmt
31 |         self.total = 0.
32 |         self.count = 0
33 | 
34 |     def update(self, value, n=1):
35 |         self.deque.append(value)
36 |         self.count += n
37 |         self.total += value * n
38 | 
39 |     @property
40 |     def median(self):
41 |         return np.median(self.deque)
42 | 
43 |     @property
44 |     def avg(self):
45 |         return np.mean(self.deque)
46 | 
47 |     @property
48 |     def max(self):
49 |         return np.max(self.deque)
50 | 
51 |     @property
52 |     def value(self):
53 |         return self.deque[-1]
54 | 
55 |     @property
56 |     def global_avg(self):
57 |         return self.total / self.count
58 | 
59 |     def __str__(self):
60 |         return self.fmt.format(
61 |             median=self.median, avg=self.avg, max=self.max, value=self.value)
62 | 
63 | 
64 | class TrainingStats(object):
65 |     def __init__(self, window_size, delimiter=' '):
66 |         self.meters = None
67 |         self.window_size = window_size
68 |         self.delimiter = delimiter
69 | 
70 |     def update(self, stats):
71 |         if self.meters is None:
72 |             self.meters = {
73 |                 k: SmoothedValue(self.window_size)
74 |                 for k in stats.keys()
75 |             }
76 |         for k, v in self.meters.items():
77 |             v.update(float(stats[k]))
78 |             
79 |     def get(self, extras=None):
80 |         stats = collections.OrderedDict()
81 |         if extras:
82 |             for k, v in extras.items():
83 |                 stats[k] = v
84 |         for k, v in self.meters.items():
85 |             stats[k] = format(v.median, '.6f')
86 | 
87 |         return stats
88 | 
89 |     def log(self, extras=None):
90 |         d = self.get(extras)
91 |         strs = []
92 |         for k, v in d.items():
93 |             strs.append("{}: {}".format(k, str(v)))
94 |         return self.delimiter.join(strs)
95 | 


--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/data/utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import numbers
16 | import numpy as np
17 | 
18 | try:
19 |     from collections.abc import Sequence, Mapping
20 | except:
21 |     from collections import Sequence, Mapping
22 | 
23 | 
24 | def default_collate_fn(batch):
25 |     """
26 |     Default batch collating function for :code:`paddle.io.DataLoader`,
27 |     get input data as a list of sample datas, each element in list
28 |     if the data of a sample, and sample data should composed of list,
29 |     dictionary, string, number, numpy array, this
30 |     function will parse input data recursively and stack number,
31 |     numpy array and paddle.Tensor datas as batch datas. e.g. for
32 |     following input data:
33 |     [{'image': np.array(shape=[3, 224, 224]), 'label': 1},
34 |      {'image': np.array(shape=[3, 224, 224]), 'label': 3},
35 |      {'image': np.array(shape=[3, 224, 224]), 'label': 4},
36 |      {'image': np.array(shape=[3, 224, 224]), 'label': 5},]
37 |     
38 |     
39 |     This default collate function zipped each number and numpy array
40 |     field together and stack each field as the batch field as follows:
41 |     {'image': np.array(shape=[4, 3, 224, 224]), 'label': np.array([1, 3, 4, 5])}
42 |     Args:  
43 |         batch(list of sample data): batch should be a list of sample data.
44 |     
45 |     Returns:
46 |         Batched data: batched each number, numpy array and paddle.Tensor
47 |                       in input data.
48 |     """
49 |     sample = batch[0]
50 |     if isinstance(sample, np.ndarray):
51 |         batch = np.stack(batch, axis=0)
52 |         return batch
53 |     elif isinstance(sample, numbers.Number):
54 |         batch = np.array(batch)
55 |         return batch
56 |     elif isinstance(sample, (str, bytes)):
57 |         return batch
58 |     elif isinstance(sample, Mapping):
59 |         return {
60 |             key: default_collate_fn([d[key] for d in batch])
61 |             for key in sample
62 |         }
63 |     elif isinstance(sample, Sequence):
64 |         sample_fields_num = len(sample)
65 |         if not all(len(sample) == sample_fields_num for sample in iter(batch)):
66 |             raise RuntimeError(
67 |                 "fileds number not same among samples in a batch")
68 |         return [default_collate_fn(fields) for fields in zip(*batch)]
69 | 
70 |     raise TypeError("batch data con only contains: tensor, numpy.ndarray, "
71 |                     "dict, list, number, but got {}".format(type(sample)))
72 | 


--------------------------------------------------------------------------------
/rtdetr_pytorch/src/zoo/rtdetr/rtdetr_postprocessor.py:
--------------------------------------------------------------------------------
 1 | """by lyuwenyu
 2 | """
 3 | 
 4 | import torch
 5 | import torch.nn as nn
 6 | import torch.nn.functional as F
 7 | 
 8 | import torchvision
 9 | 
10 | from src.core import register
11 | 
12 | __all__ = ['RTDETRPostProcessor']
13 | 
14 | 
15 | @register
16 | class RTDETRPostProcessor(nn.Module):
17 |     __share__ = ['num_classes', 'use_focal_loss', 'num_top_queries', 'remap_mscoco_category']
18 | 
19 |     def __init__(self, num_classes=80, use_focal_loss=True, num_top_queries=300, remap_mscoco_category=False) -> None:
20 |         super().__init__()
21 |         self.use_focal_loss = use_focal_loss
22 |         self.num_top_queries = num_top_queries
23 |         self.num_classes = num_classes
24 |         self.remap_mscoco_category = remap_mscoco_category
25 |         self.deploy_mode = False
26 | 
27 |     def extra_repr(self) -> str:
28 |         return f'use_focal_loss={self.use_focal_loss}, num_classes={self.num_classes}, num_top_queries={self.num_top_queries}'
29 | 
30 |     # def forward(self, outputs, orig_target_sizes):
31 |     def forward(self, outputs, orig_target_sizes):
32 | 
33 |         logits, boxes = outputs['pred_logits'], outputs['pred_boxes']
34 |         # orig_target_sizes = torch.stack([t["orig_size"] for t in targets], dim=0)        
35 | 
36 |         bbox_pred = torchvision.ops.box_convert(boxes, in_fmt='cxcywh', out_fmt='xyxy')
37 |         bbox_pred *= orig_target_sizes.repeat(1, 2).unsqueeze(1)
38 | 
39 |         if self.use_focal_loss:
40 |             scores = F.sigmoid(logits)
41 |             scores, index = torch.topk(scores.flatten(1), self.num_top_queries, axis=-1)
42 |             labels = index % self.num_classes
43 |             index = index // self.num_classes
44 |             boxes = bbox_pred.gather(dim=1, index=index.unsqueeze(-1).repeat(1, 1, bbox_pred.shape[-1]))
45 | 
46 |         else:
47 |             scores = F.softmax(logits)[:, :, :-1]
48 |             scores, labels = scores.max(dim=-1)
49 |             if scores.shape[1] > self.num_top_queries:
50 |                 scores, index = torch.topk(scores, self.num_top_queries, dim=-1)
51 |                 labels = torch.gather(labels, dim=1, index=index)
52 |                 boxes = torch.gather(boxes, dim=1, index=index.unsqueeze(-1).tile(1, 1, boxes.shape[-1]))
53 | 
54 |         # TODO for onnx export
55 |         if self.deploy_mode:
56 |             return labels, boxes, scores
57 | 
58 |         # TODO
59 |         if self.remap_mscoco_category:
60 |             from ...data.coco import mscoco_label2category
61 |             labels = torch.tensor([mscoco_label2category[int(x.item())] for x in labels.flatten()])\
62 |                 .to(boxes.device).reshape(labels.shape)
63 | 
64 |         results = []
65 |         for lab, box, sco in zip(labels, boxes, scores):
66 |             result = dict(labels=lab, boxes=box, scores=sco)
67 |             results.append(result)
68 | 
69 |         return results
70 | 
71 |     def deploy(self, ):
72 |         self.eval()
73 |         self.deploy_mode = True
74 |         return self
75 | 
76 |     @property
77 |     def iou_types(self, ):
78 |         return ('bbox',)
79 | 


--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/utils/colormap.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from __future__ import absolute_import
16 | from __future__ import division
17 | from __future__ import print_function
18 | from __future__ import unicode_literals
19 | 
20 | import numpy as np
21 | 
22 | 
23 | def colormap(rgb=False):
24 |     """
25 |     Get colormap
26 | 
27 |     The code of this function is copied from https://github.com/facebookresearch/Detectron/blob/main/detectron/utils/colormap.py
28 |     """
29 |     color_list = np.array([
30 |         0.000, 0.447, 0.741, 0.850, 0.325, 0.098, 0.929, 0.694, 0.125, 0.494,
31 |         0.184, 0.556, 0.466, 0.674, 0.188, 0.301, 0.745, 0.933, 0.635, 0.078,
32 |         0.184, 0.300, 0.300, 0.300, 0.600, 0.600, 0.600, 1.000, 0.000, 0.000,
33 |         1.000, 0.500, 0.000, 0.749, 0.749, 0.000, 0.000, 1.000, 0.000, 0.000,
34 |         0.000, 1.000, 0.667, 0.000, 1.000, 0.333, 0.333, 0.000, 0.333, 0.667,
35 |         0.000, 0.333, 1.000, 0.000, 0.667, 0.333, 0.000, 0.667, 0.667, 0.000,
36 |         0.667, 1.000, 0.000, 1.000, 0.333, 0.000, 1.000, 0.667, 0.000, 1.000,
37 |         1.000, 0.000, 0.000, 0.333, 0.500, 0.000, 0.667, 0.500, 0.000, 1.000,
38 |         0.500, 0.333, 0.000, 0.500, 0.333, 0.333, 0.500, 0.333, 0.667, 0.500,
39 |         0.333, 1.000, 0.500, 0.667, 0.000, 0.500, 0.667, 0.333, 0.500, 0.667,
40 |         0.667, 0.500, 0.667, 1.000, 0.500, 1.000, 0.000, 0.500, 1.000, 0.333,
41 |         0.500, 1.000, 0.667, 0.500, 1.000, 1.000, 0.500, 0.000, 0.333, 1.000,
42 |         0.000, 0.667, 1.000, 0.000, 1.000, 1.000, 0.333, 0.000, 1.000, 0.333,
43 |         0.333, 1.000, 0.333, 0.667, 1.000, 0.333, 1.000, 1.000, 0.667, 0.000,
44 |         1.000, 0.667, 0.333, 1.000, 0.667, 0.667, 1.000, 0.667, 1.000, 1.000,
45 |         1.000, 0.000, 1.000, 1.000, 0.333, 1.000, 1.000, 0.667, 1.000, 0.167,
46 |         0.000, 0.000, 0.333, 0.000, 0.000, 0.500, 0.000, 0.000, 0.667, 0.000,
47 |         0.000, 0.833, 0.000, 0.000, 1.000, 0.000, 0.000, 0.000, 0.167, 0.000,
48 |         0.000, 0.333, 0.000, 0.000, 0.500, 0.000, 0.000, 0.667, 0.000, 0.000,
49 |         0.833, 0.000, 0.000, 1.000, 0.000, 0.000, 0.000, 0.167, 0.000, 0.000,
50 |         0.333, 0.000, 0.000, 0.500, 0.000, 0.000, 0.667, 0.000, 0.000, 0.833,
51 |         0.000, 0.000, 1.000, 0.000, 0.000, 0.000, 0.143, 0.143, 0.143, 0.286,
52 |         0.286, 0.286, 0.429, 0.429, 0.429, 0.571, 0.571, 0.571, 0.714, 0.714,
53 |         0.714, 0.857, 0.857, 0.857, 1.000, 1.000, 1.000
54 |     ]).astype(np.float32)
55 |     color_list = color_list.reshape((-1, 3)) * 255
56 |     if not rgb:
57 |         color_list = color_list[:, ::-1]
58 |     return color_list.astype('int32')
59 | 


--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/utils/voc_utils.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from __future__ import absolute_import
16 | from __future__ import division
17 | from __future__ import print_function
18 | 
19 | import os
20 | import os.path as osp
21 | import re
22 | import random
23 | 
24 | __all__ = ['create_list']
25 | 
26 | 
27 | def create_list(devkit_dir, years, output_dir):
28 |     """
29 |     create following list:
30 |         1. trainval.txt
31 |         2. test.txt
32 |     """
33 |     trainval_list = []
34 |     test_list = []
35 |     for year in years:
36 |         trainval, test = _walk_voc_dir(devkit_dir, year, output_dir)
37 |         trainval_list.extend(trainval)
38 |         test_list.extend(test)
39 | 
40 |     random.shuffle(trainval_list)
41 |     with open(osp.join(output_dir, 'trainval.txt'), 'w') as ftrainval:
42 |         for item in trainval_list:
43 |             ftrainval.write(item[0] + ' ' + item[1] + '\n')
44 | 
45 |     with open(osp.join(output_dir, 'test.txt'), 'w') as fval:
46 |         ct = 0
47 |         for item in test_list:
48 |             ct += 1
49 |             fval.write(item[0] + ' ' + item[1] + '\n')
50 | 
51 | 
52 | def _get_voc_dir(devkit_dir, year, type):
53 |     return osp.join(devkit_dir, 'VOC' + year, type)
54 | 
55 | 
56 | def _walk_voc_dir(devkit_dir, year, output_dir):
57 |     filelist_dir = _get_voc_dir(devkit_dir, year, 'ImageSets/Main')
58 |     annotation_dir = _get_voc_dir(devkit_dir, year, 'Annotations')
59 |     img_dir = _get_voc_dir(devkit_dir, year, 'JPEGImages')
60 |     trainval_list = []
61 |     test_list = []
62 |     added = set()
63 | 
64 |     for _, _, files in os.walk(filelist_dir):
65 |         for fname in files:
66 |             img_ann_list = []
67 |             if re.match(r'[a-z]+_trainval\.txt', fname):
68 |                 img_ann_list = trainval_list
69 |             elif re.match(r'[a-z]+_test\.txt', fname):
70 |                 img_ann_list = test_list
71 |             else:
72 |                 continue
73 |             fpath = osp.join(filelist_dir, fname)
74 |             for line in open(fpath):
75 |                 name_prefix = line.strip().split()[0]
76 |                 if name_prefix in added:
77 |                     continue
78 |                 added.add(name_prefix)
79 |                 ann_path = osp.join(
80 |                     osp.relpath(annotation_dir, output_dir),
81 |                     name_prefix + '.xml')
82 |                 img_path = osp.join(
83 |                     osp.relpath(img_dir, output_dir), name_prefix + '.jpg')
84 |                 img_ann_list.append((img_path, ann_path))
85 | 
86 |     return trainval_list, test_list
87 | 


--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/modeling/transformers/ext_op/ms_deformable_attn_op.cc:
--------------------------------------------------------------------------------
 1 | /* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 2 | Licensed under the Apache License, Version 2.0 (the "License");
 3 | you may not use this file except in compliance with the License.
 4 | You may obtain a copy of the License at
 5 |     http://www.apache.org/licenses/LICENSE-2.0
 6 | Unless required by applicable law or agreed to in writing, software
 7 | distributed under the License is distributed on an "AS IS" BASIS,
 8 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 9 | See the License for the specific language governing permissions and
10 | limitations under the License. */
11 | 
12 | #include "paddle/extension.h"
13 | 
14 | #include <vector>
15 | 
16 | // declare GPU implementation
17 | std::vector<paddle::Tensor>
18 | MSDeformableAttnCUDAForward(const paddle::Tensor &value,
19 |                             const paddle::Tensor &value_spatial_shapes,
20 |                             const paddle::Tensor &value_level_start_index,
21 |                             const paddle::Tensor &sampling_locations,
22 |                             const paddle::Tensor &attention_weights);
23 | 
24 | std::vector<paddle::Tensor> MSDeformableAttnCUDABackward(
25 |     const paddle::Tensor &value, const paddle::Tensor &value_spatial_shapes,
26 |     const paddle::Tensor &value_level_start_index,
27 |     const paddle::Tensor &sampling_locations,
28 |     const paddle::Tensor &attention_weights, const paddle::Tensor &grad_out);
29 | 
30 | //// CPU not implemented
31 | 
32 | std::vector<std::vector<int64_t>>
33 | MSDeformableAttnInferShape(std::vector<int64_t> value_shape,
34 |                            std::vector<int64_t> value_spatial_shapes_shape,
35 |                            std::vector<int64_t> value_level_start_index_shape,
36 |                            std::vector<int64_t> sampling_locations_shape,
37 |                            std::vector<int64_t> attention_weights_shape) {
38 |   return {{value_shape[0], sampling_locations_shape[1],
39 |            value_shape[2] * value_shape[3]}};
40 | }
41 | 
42 | std::vector<paddle::DataType>
43 | MSDeformableAttnInferDtype(paddle::DataType value_dtype,
44 |                            paddle::DataType value_spatial_shapes_dtype,
45 |                            paddle::DataType value_level_start_index_dtype,
46 |                            paddle::DataType sampling_locations_dtype,
47 |                            paddle::DataType attention_weights_dtype) {
48 |   return {value_dtype};
49 | }
50 | 
51 | PD_BUILD_OP(ms_deformable_attn)
52 |     .Inputs({"Value", "SpatialShapes", "LevelIndex", "SamplingLocations",
53 |              "AttentionWeights"})
54 |     .Outputs({"Out"})
55 |     .SetKernelFn(PD_KERNEL(MSDeformableAttnCUDAForward))
56 |     .SetInferShapeFn(PD_INFER_SHAPE(MSDeformableAttnInferShape))
57 |     .SetInferDtypeFn(PD_INFER_DTYPE(MSDeformableAttnInferDtype));
58 | 
59 | PD_BUILD_GRAD_OP(ms_deformable_attn)
60 |     .Inputs({"Value", "SpatialShapes", "LevelIndex", "SamplingLocations",
61 |              "AttentionWeights", paddle::Grad("Out")})
62 |     .Outputs({paddle::Grad("Value"), paddle::Grad("SpatialShapes"),
63 |               paddle::Grad("LevelIndex"), paddle::Grad("SamplingLocations"),
64 |               paddle::Grad("AttentionWeights")})
65 |     .SetKernelFn(PD_KERNEL(MSDeformableAttnCUDABackward));
66 | 


--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/modeling/transformers/ext_op/README.md:
--------------------------------------------------------------------------------
 1 | # Multi-scale deformable attention自定义OP编译
 2 | 该自定义OP是参考[自定义外部算子](https://www.paddlepaddle.org.cn/documentation/docs/zh/guides/custom_op/new_cpp_op_cn.html) 。
 3 | 
 4 | ## 1. 环境依赖
 5 | - Paddle >= 2.3.2
 6 | - gcc 8.2
 7 | 
 8 | ## 2. 安装
 9 | 请在当前路径下进行编译安装
10 | ```
11 | cd rtdetr_paddle/ppdet/modeling/transformers/ext_op/
12 | python setup_ms_deformable_attn_op.py install
13 | ```
14 | 
15 | 编译完成后即可使用，以下为`ms_deformable_attn`的使用示例
16 | ```
17 | # 引入自定义op
18 | from deformable_detr_ops import ms_deformable_attn
19 | 
20 | # 构造fake input tensor
21 | bs, n_heads, c = 2, 8, 8
22 | query_length, n_levels, n_points = 2, 2, 2
23 | spatial_shapes = paddle.to_tensor([(6, 4), (3, 2)], dtype=paddle.int64)
24 | level_start_index = paddle.concat((paddle.to_tensor(
25 |     [0], dtype=paddle.int64), spatial_shapes.prod(1).cumsum(0)[:-1]))
26 | value_length = sum([(H * W).item() for H, W in spatial_shapes])
27 | 
28 | def get_test_tensors(channels):
29 |     value = paddle.rand(
30 |         [bs, value_length, n_heads, channels], dtype=paddle.float32) * 0.01
31 |     sampling_locations = paddle.rand(
32 |         [bs, query_length, n_heads, n_levels, n_points, 2],
33 |         dtype=paddle.float32)
34 |     attention_weights = paddle.rand(
35 |         [bs, query_length, n_heads, n_levels, n_points],
36 |         dtype=paddle.float32) + 1e-5
37 |     attention_weights /= attention_weights.sum(-1, keepdim=True).sum(
38 |         -2, keepdim=True)
39 |     return [value, sampling_locations, attention_weights]
40 | 
41 | value, sampling_locations, attention_weights = get_test_tensors(c)
42 | 
43 | output = ms_deformable_attn(value,
44 |                             spatial_shapes,
45 |                             level_start_index,
46 |                             sampling_locations,
47 |                             attention_weights)
48 | ```
49 | 
50 | ## 3. 单元测试
51 | 可以通过执行单元测试来确认自定义算子功能的正确性，执行单元测试的示例如下所示：
52 | ```
53 | python test_ms_deformable_attn_op.py
54 | ```
55 | 运行成功后，打印如下：
56 | ```
57 | *True check_forward_equal_with_paddle_float: max_abs_err 6.98e-10 max_rel_err 2.03e-07
58 | *tensor1 True check_gradient_numerical(D=30)
59 | *tensor2 True check_gradient_numerical(D=30)
60 | *tensor3 True check_gradient_numerical(D=30)
61 | *tensor1 True check_gradient_numerical(D=32)
62 | *tensor2 True check_gradient_numerical(D=32)
63 | *tensor3 True check_gradient_numerical(D=32)
64 | *tensor1 True check_gradient_numerical(D=64)
65 | *tensor2 True check_gradient_numerical(D=64)
66 | *tensor3 True check_gradient_numerical(D=64)
67 | *tensor1 True check_gradient_numerical(D=71)
68 | *tensor2 True check_gradient_numerical(D=71)
69 | *tensor3 True check_gradient_numerical(D=71)
70 | *tensor1 True check_gradient_numerical(D=128)
71 | *tensor2 True check_gradient_numerical(D=128)
72 | *tensor3 True check_gradient_numerical(D=128)
73 | *tensor1 True check_gradient_numerical(D=1024)
74 | *tensor2 True check_gradient_numerical(D=1024)
75 | *tensor3 True check_gradient_numerical(D=1024)
76 | *tensor1 True check_gradient_numerical(D=1025)
77 | *tensor2 True check_gradient_numerical(D=1025)
78 | *tensor3 True check_gradient_numerical(D=1025)
79 | *tensor1 True check_gradient_numerical(D=2048)
80 | *tensor2 True check_gradient_numerical(D=2048)
81 | *tensor3 True check_gradient_numerical(D=2048)
82 | *tensor1 True check_gradient_numerical(D=3096)
83 | *tensor2 True check_gradient_numerical(D=3096)
84 | *tensor3 True check_gradient_numerical(D=3096)
85 | ```
86 | 


--------------------------------------------------------------------------------
/benchmark/dataset.py:
--------------------------------------------------------------------------------
  1 | '''by lyuwenyu
  2 | '''
  3 | 
  4 | import os
  5 | import glob
  6 | from PIL import Image
  7 | 
  8 | import torch
  9 | import torch.utils.data as data
 10 | import torchvision
 11 | import torchvision.transforms as T 
 12 | import torchvision.transforms.functional as F 
 13 | 
 14 | 
 15 | class ToTensor(T.ToTensor):
 16 |     def __init__(self) -> None:
 17 |         super().__init__()
 18 | 
 19 |     def __call__(self, pic):
 20 |         if isinstance(pic, torch.Tensor):
 21 |             return pic 
 22 |         return super().__call__(pic)
 23 | 
 24 | class PadToSize(T.Pad):
 25 |     def __init__(self, size, fill=0, padding_mode='constant'):
 26 |         super().__init__(0, fill, padding_mode)
 27 |         self.size = size
 28 |         self.fill = fill
 29 | 
 30 |     def __call__(self, img):
 31 |         """
 32 |         Args:
 33 |             img (PIL Image or Tensor): Image to be padded.
 34 | 
 35 |         Returns:
 36 |             PIL Image or Tensor: Padded image.
 37 |         """
 38 |         w, h = F.get_image_size(img)
 39 |         padding = (0, 0, self.size[0] - w, self.size[1] - h)
 40 |         return F.pad(img, padding, self.fill, self.padding_mode)
 41 | 
 42 | 
 43 | class Dataset(data.Dataset):
 44 |     def __init__(self, img_dir: str='', preprocess: T.Compose=None, device='cuda:0') -> None:
 45 |         super().__init__()
 46 | 
 47 |         self.device = device
 48 |         self.size = 640
 49 | 
 50 |         self.im_path_list = list(glob.glob(os.path.join(img_dir, '*.jpg')))
 51 | 
 52 |         if preprocess is None: 
 53 |             self.preprocess = T.Compose([
 54 |                     T.Resize(size=639, max_size=640),
 55 |                     PadToSize(size=(640, 640), fill=114),
 56 |                     ToTensor(),
 57 |                     T.ConvertImageDtype(torch.float),
 58 |             ])
 59 |         else:
 60 |             self.preprocess = preprocess
 61 | 
 62 |     def __len__(self, ):
 63 |         return len(self.im_path_list)
 64 | 
 65 |     def __getitem__(self, index):
 66 |         # im = Image.open(self.img_path_list[index]).convert('RGB')
 67 |         im = torchvision.io.read_file(self.im_path_list[index])
 68 |         im = torchvision.io.decode_jpeg(im, mode=torchvision.io.ImageReadMode.RGB, device=self.device)
 69 |         _, h, w = im.shape # c,h,w
 70 | 
 71 |         im = self.preprocess(im)
 72 | 
 73 |         blob = {
 74 |             'image': im, 
 75 |             'im_shape': torch.tensor([self.size, self.size]).to(im.device),
 76 |             'scale_factor': torch.tensor([self.size / h, self.size / w]).to(im.device),
 77 |             'orig_size': torch.tensor([w, h]).to(im.device),
 78 |         }
 79 | 
 80 |         return blob
 81 | 
 82 |     @staticmethod
 83 |     def post_process():
 84 |         pass
 85 | 
 86 |     @staticmethod
 87 |     def collate_fn():
 88 |         pass
 89 | 
 90 | 
 91 | def draw_nms_result(blob, outputs, draw_score_threshold=0.25, name=''):
 92 |     '''show result
 93 |     Keys:
 94 |         'num_dets', 'det_boxes', 'det_scores', 'det_classes'
 95 |     '''    
 96 |     for i in range(blob['image'].shape[0]):
 97 |         det_scores = outputs['det_scores'][i]
 98 |         det_boxes = outputs['det_boxes'][i][det_scores > draw_score_threshold]
 99 |         
100 |         im = (blob['image'][i] * 255).to(torch.uint8)
101 |         im = torchvision.utils.draw_bounding_boxes(im, boxes=det_boxes, width=2)
102 |         Image.fromarray(im.permute(1, 2, 0).cpu().numpy()).save(f'test_{name}_{i}.jpg')
103 | 


--------------------------------------------------------------------------------
/rtdetr_pytorch/src/zoo/rtdetr/utils.py:
--------------------------------------------------------------------------------
  1 | """by lyuwenyu
  2 | """
  3 | 
  4 | import math
  5 | import torch 
  6 | import torch.nn as nn
  7 | import torch.nn.functional as F 
  8 | 
  9 | 
 10 | def inverse_sigmoid(x: torch.Tensor, eps: float=1e-5) -> torch.Tensor:
 11 |     x = x.clip(min=0., max=1.)
 12 |     return torch.log(x.clip(min=eps) / (1 - x).clip(min=eps))
 13 | 
 14 | 
 15 | def deformable_attention_core_func(value, value_spatial_shapes, sampling_locations, attention_weights):
 16 |     """
 17 |     Args:
 18 |         value (Tensor): [bs, value_length, n_head, c]
 19 |         value_spatial_shapes (Tensor|List): [n_levels, 2]
 20 |         value_level_start_index (Tensor|List): [n_levels]
 21 |         sampling_locations (Tensor): [bs, query_length, n_head, n_levels, n_points, 2]
 22 |         attention_weights (Tensor): [bs, query_length, n_head, n_levels, n_points]
 23 | 
 24 |     Returns:
 25 |         output (Tensor): [bs, Length_{query}, C]
 26 |     """
 27 |     bs, _, n_head, c = value.shape
 28 |     _, Len_q, _, n_levels, n_points, _ = sampling_locations.shape
 29 | 
 30 |     split_shape = [h * w for h, w in value_spatial_shapes]
 31 |     value_list = value.split(split_shape, dim=1)
 32 |     sampling_grids = 2 * sampling_locations - 1
 33 |     sampling_value_list = []
 34 |     for level, (h, w) in enumerate(value_spatial_shapes):
 35 |         # N_, H_*W_, M_, D_ -> N_, H_*W_, M_*D_ -> N_, M_*D_, H_*W_ -> N_*M_, D_, H_, W_
 36 |         value_l_ = value_list[level].flatten(2).permute(
 37 |             0, 2, 1).reshape(bs * n_head, c, h, w)
 38 |         # N_, Lq_, M_, P_, 2 -> N_, M_, Lq_, P_, 2 -> N_*M_, Lq_, P_, 2
 39 |         sampling_grid_l_ = sampling_grids[:, :, :, level].permute(
 40 |             0, 2, 1, 3, 4).flatten(0, 1)
 41 |         # N_*M_, D_, Lq_, P_
 42 |         sampling_value_l_ = F.grid_sample(
 43 |             value_l_,
 44 |             sampling_grid_l_,
 45 |             mode='bilinear',
 46 |             padding_mode='zeros',
 47 |             align_corners=False)
 48 |         sampling_value_list.append(sampling_value_l_)
 49 |     # (N_, Lq_, M_, L_, P_) -> (N_, M_, Lq_, L_, P_) -> (N_*M_, 1, Lq_, L_*P_)
 50 |     attention_weights = attention_weights.permute(0, 2, 1, 3, 4).reshape(
 51 |         bs * n_head, 1, Len_q, n_levels * n_points)
 52 |     output = (torch.stack(
 53 |         sampling_value_list, dim=-2).flatten(-2) *
 54 |               attention_weights).sum(-1).reshape(bs, n_head * c, Len_q)
 55 | 
 56 |     return output.permute(0, 2, 1)
 57 | 
 58 | 
 59 | import math 
 60 | def bias_init_with_prob(prior_prob=0.01):
 61 |     """initialize conv/fc bias value according to a given probability value."""
 62 |     bias_init = float(-math.log((1 - prior_prob) / prior_prob))
 63 |     return bias_init
 64 | 
 65 | 
 66 | 
 67 | def get_activation(act: str, inpace: bool=True):
 68 |     '''get activation
 69 |     '''
 70 |     act = act.lower()
 71 |     
 72 |     if act == 'silu':
 73 |         m = nn.SiLU()
 74 | 
 75 |     elif act == 'relu':
 76 |         m = nn.ReLU()
 77 | 
 78 |     elif act == 'leaky_relu':
 79 |         m = nn.LeakyReLU()
 80 | 
 81 |     elif act == 'silu':
 82 |         m = nn.SiLU()
 83 |     
 84 |     elif act == 'gelu':
 85 |         m = nn.GELU()
 86 |         
 87 |     elif act is None:
 88 |         m = nn.Identity()
 89 |     
 90 |     elif isinstance(act, nn.Module):
 91 |         m = act
 92 | 
 93 |     else:
 94 |         raise RuntimeError('')  
 95 | 
 96 |     if hasattr(m, 'inplace'):
 97 |         m.inplace = inpace
 98 |     
 99 |     return m 
100 | 
101 | 
102 | 


--------------------------------------------------------------------------------
/rtdetr_pytorch/src/nn/backbone/common.py:
--------------------------------------------------------------------------------
  1 | '''by lyuwenyu
  2 | '''
  3 | 
  4 | import torch 
  5 | import torch.nn as nn
  6 | 
  7 | 
  8 | 
  9 | class ConvNormLayer(nn.Module):
 10 |     def __init__(self, ch_in, ch_out, kernel_size, stride, padding=None, bias=False, act=None):
 11 |         super().__init__()
 12 |         self.conv = nn.Conv2d(
 13 |             ch_in, 
 14 |             ch_out, 
 15 |             kernel_size, 
 16 |             stride, 
 17 |             padding=(kernel_size-1)//2 if padding is None else padding, 
 18 |             bias=bias)
 19 |         self.norm = nn.BatchNorm2d(ch_out)
 20 |         self.act = nn.Identity() if act is None else get_activation(act) 
 21 | 
 22 |     def forward(self, x):
 23 |         return self.act(self.norm(self.conv(x)))
 24 | 
 25 | 
 26 | class FrozenBatchNorm2d(nn.Module):
 27 |     """copy and modified from https://github.com/facebookresearch/detr/blob/master/models/backbone.py
 28 |     BatchNorm2d where the batch statistics and the affine parameters are fixed.
 29 |     Copy-paste from torchvision.misc.ops with added eps before rqsrt,
 30 |     without which any other models than torchvision.models.resnet[18,34,50,101]
 31 |     produce nans.
 32 |     """
 33 |     def __init__(self, num_features, eps=1e-5):
 34 |         super(FrozenBatchNorm2d, self).__init__()
 35 |         n = num_features
 36 |         self.register_buffer("weight", torch.ones(n))
 37 |         self.register_buffer("bias", torch.zeros(n))
 38 |         self.register_buffer("running_mean", torch.zeros(n))
 39 |         self.register_buffer("running_var", torch.ones(n))
 40 |         self.eps = eps
 41 |         self.num_features = n 
 42 | 
 43 |     def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict,
 44 |                               missing_keys, unexpected_keys, error_msgs):
 45 |         num_batches_tracked_key = prefix + 'num_batches_tracked'
 46 |         if num_batches_tracked_key in state_dict:
 47 |             del state_dict[num_batches_tracked_key]
 48 | 
 49 |         super(FrozenBatchNorm2d, self)._load_from_state_dict(
 50 |             state_dict, prefix, local_metadata, strict,
 51 |             missing_keys, unexpected_keys, error_msgs)
 52 | 
 53 |     def forward(self, x):
 54 |         # move reshapes to the beginning
 55 |         # to make it fuser-friendly
 56 |         w = self.weight.reshape(1, -1, 1, 1)
 57 |         b = self.bias.reshape(1, -1, 1, 1)
 58 |         rv = self.running_var.reshape(1, -1, 1, 1)
 59 |         rm = self.running_mean.reshape(1, -1, 1, 1)
 60 |         scale = w * (rv + self.eps).rsqrt()
 61 |         bias = b - rm * scale
 62 |         return x * scale + bias
 63 | 
 64 |     def extra_repr(self):
 65 |         return (
 66 |             "{num_features}, eps={eps}".format(**self.__dict__)
 67 |         )
 68 | 
 69 | 
 70 | def get_activation(act: str, inpace: bool=True):
 71 |     '''get activation
 72 |     '''
 73 |     act = act.lower()
 74 |     
 75 |     if act == 'silu':
 76 |         m = nn.SiLU()
 77 | 
 78 |     elif act == 'relu':
 79 |         m = nn.ReLU()
 80 | 
 81 |     elif act == 'leaky_relu':
 82 |         m = nn.LeakyReLU()
 83 | 
 84 |     elif act == 'silu':
 85 |         m = nn.SiLU()
 86 |     
 87 |     elif act == 'gelu':
 88 |         m = nn.GELU()
 89 |         
 90 |     elif act is None:
 91 |         m = nn.Identity()
 92 |     
 93 |     elif isinstance(act, nn.Module):
 94 |         m = act
 95 | 
 96 |     else:
 97 |         raise RuntimeError('')  
 98 | 
 99 |     if hasattr(m, 'inplace'):
100 |         m.inplace = inpace
101 |     
102 |     return m 
103 | 


--------------------------------------------------------------------------------
/rtdetr_paddle/tools/export_model.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | from __future__ import absolute_import
 16 | from __future__ import division
 17 | from __future__ import print_function
 18 | 
 19 | import os
 20 | import sys
 21 | 
 22 | # add python path of PaddleDetection to sys.path
 23 | parent_path = os.path.abspath(os.path.join(__file__, *(['..'] * 2)))
 24 | sys.path.insert(0, parent_path)
 25 | 
 26 | # ignore warning log
 27 | import warnings
 28 | warnings.filterwarnings('ignore')
 29 | 
 30 | import paddle
 31 | from ppdet.core.workspace import load_config, merge_config
 32 | from ppdet.utils.check import check_gpu, check_version, check_config
 33 | from ppdet.utils.cli import ArgsParser
 34 | from ppdet.engine import Trainer
 35 | 
 36 | from ppdet.utils.logger import setup_logger
 37 | logger = setup_logger('export_model')
 38 | 
 39 | 
 40 | def parse_args():
 41 |     parser = ArgsParser()
 42 |     parser.add_argument(
 43 |         "--output_dir",
 44 |         type=str,
 45 |         default="output_inference",
 46 |         help="Directory for storing the output model files.")
 47 |     parser.add_argument(
 48 |         "--export_serving_model",
 49 |         type=bool,
 50 |         default=False,
 51 |         help="Whether to export serving model or not.")
 52 |     parser.add_argument(
 53 |         "--slim_config",
 54 |         default=None,
 55 |         type=str,
 56 |         help="Configuration file of slim method.")
 57 |     args = parser.parse_args()
 58 |     return args
 59 | 
 60 | 
 61 | def run(FLAGS, cfg):
 62 |     trainer = Trainer(cfg, mode='test')
 63 |     # load weights
 64 |     trainer.load_weights(cfg.weights)
 65 | 
 66 |     # export model
 67 |     trainer.export(FLAGS.output_dir)
 68 | 
 69 |     if FLAGS.export_serving_model:
 70 |         from paddle_serving_client.io import inference_model_to_serving
 71 |         model_name = os.path.splitext(os.path.split(cfg.filename)[-1])[0]
 72 | 
 73 |         inference_model_to_serving(
 74 |             dirname="{}/{}".format(FLAGS.output_dir, model_name),
 75 |             serving_server="{}/{}/serving_server".format(FLAGS.output_dir,
 76 |                                                          model_name),
 77 |             serving_client="{}/{}/serving_client".format(FLAGS.output_dir,
 78 |                                                          model_name),
 79 |             model_filename="model.pdmodel",
 80 |             params_filename="model.pdiparams")
 81 | 
 82 | 
 83 | def main():
 84 |     paddle.set_device("cpu")
 85 |     FLAGS = parse_args()
 86 |     cfg = load_config(FLAGS.config)
 87 |     merge_config(FLAGS.opt)
 88 | 
 89 |     # FIXME: Temporarily solve the priority problem of FLAGS.opt
 90 |     merge_config(FLAGS.opt)
 91 |     check_config(cfg)
 92 |     if 'use_gpu' not in cfg:
 93 |         cfg.use_gpu = False
 94 |     check_gpu(cfg.use_gpu)
 95 |     check_version()
 96 | 
 97 |     run(FLAGS, cfg)
 98 | 
 99 | 
100 | if __name__ == '__main__':
101 |     main()
102 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | *.py,cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | cover/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | .pybuilder/
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | #   For a library or package, you might want to ignore these files since the code is
 87 | #   intended to run in multiple environments; otherwise, check them in:
 88 | # .python-version
 89 | 
 90 | # pipenv
 91 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 92 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 93 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 94 | #   install all needed dependencies.
 95 | #Pipfile.lock
 96 | 
 97 | # poetry
 98 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
 99 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
100 | #   commonly ignored for libraries.
101 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102 | #poetry.lock
103 | 
104 | # pdm
105 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106 | #pdm.lock
107 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108 | #   in version control.
109 | #   https://pdm.fming.dev/#use-with-ide
110 | .pdm.toml
111 | 
112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113 | __pypackages__/
114 | 
115 | # Celery stuff
116 | celerybeat-schedule
117 | celerybeat.pid
118 | 
119 | # SageMath parsed files
120 | *.sage.py
121 | 
122 | # Environments
123 | .env
124 | .venv
125 | env/
126 | venv/
127 | ENV/
128 | env.bak/
129 | venv.bak/
130 | 
131 | # Spyder project settings
132 | .spyderproject
133 | .spyproject
134 | 
135 | # Rope project settings
136 | .ropeproject
137 | 
138 | # mkdocs documentation
139 | /site
140 | 
141 | # mypy
142 | .mypy_cache/
143 | .dmypy.json
144 | dmypy.json
145 | 
146 | # Pyre type checker
147 | .pyre/
148 | 
149 | # pytype static type analyzer
150 | .pytype/
151 | 
152 | # Cython debug symbols
153 | cython_debug/
154 | 
155 | # PyCharm
156 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
159 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
160 | .idea/
161 | 
162 | 
163 | .DS_Store
164 | *.ipynb
165 | *.pth 
166 | *.pdparams
167 | *.onnx
168 | rtdetr_pytorch/output/*
169 | rtdetr_pytorch/dataset/*


--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/modeling/transformers/position_encoding.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | #
 15 | # Modified from DETR (https://github.com/facebookresearch/detr)
 16 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
 17 | 
 18 | from __future__ import absolute_import
 19 | from __future__ import division
 20 | from __future__ import print_function
 21 | 
 22 | import math
 23 | import paddle
 24 | import paddle.nn as nn
 25 | 
 26 | from ppdet.core.workspace import register, serializable
 27 | 
 28 | 
 29 | @register
 30 | @serializable
 31 | class PositionEmbedding(nn.Layer):
 32 |     def __init__(self,
 33 |                  num_pos_feats=128,
 34 |                  temperature=10000,
 35 |                  normalize=True,
 36 |                  scale=2 * math.pi,
 37 |                  embed_type='sine',
 38 |                  num_embeddings=50,
 39 |                  offset=0.,
 40 |                  eps=1e-6):
 41 |         super(PositionEmbedding, self).__init__()
 42 |         assert embed_type in ['sine', 'learned']
 43 | 
 44 |         self.embed_type = embed_type
 45 |         self.offset = offset
 46 |         self.eps = eps
 47 |         if self.embed_type == 'sine':
 48 |             self.num_pos_feats = num_pos_feats
 49 |             self.temperature = temperature
 50 |             self.normalize = normalize
 51 |             self.scale = scale
 52 |         elif self.embed_type == 'learned':
 53 |             self.row_embed = nn.Embedding(num_embeddings, num_pos_feats)
 54 |             self.col_embed = nn.Embedding(num_embeddings, num_pos_feats)
 55 |         else:
 56 |             raise ValueError(f"{self.embed_type} is not supported.")
 57 | 
 58 |     def forward(self, mask):
 59 |         """
 60 |         Args:
 61 |             mask (Tensor): [B, H, W]
 62 |         Returns:
 63 |             pos (Tensor): [B, H, W, C]
 64 |         """
 65 |         if self.embed_type == 'sine':
 66 |             y_embed = mask.cumsum(1)
 67 |             x_embed = mask.cumsum(2)
 68 |             if self.normalize:
 69 |                 y_embed = (y_embed + self.offset) / (
 70 |                     y_embed[:, -1:, :] + self.eps) * self.scale
 71 |                 x_embed = (x_embed + self.offset) / (
 72 |                     x_embed[:, :, -1:] + self.eps) * self.scale
 73 | 
 74 |             dim_t = 2 * (paddle.arange(self.num_pos_feats) //
 75 |                          2).astype('float32')
 76 |             dim_t = self.temperature**(dim_t / self.num_pos_feats)
 77 | 
 78 |             pos_x = x_embed.unsqueeze(-1) / dim_t
 79 |             pos_y = y_embed.unsqueeze(-1) / dim_t
 80 |             pos_x = paddle.stack(
 81 |                 (pos_x[:, :, :, 0::2].sin(), pos_x[:, :, :, 1::2].cos()),
 82 |                 axis=4).flatten(3)
 83 |             pos_y = paddle.stack(
 84 |                 (pos_y[:, :, :, 0::2].sin(), pos_y[:, :, :, 1::2].cos()),
 85 |                 axis=4).flatten(3)
 86 |             return paddle.concat((pos_y, pos_x), axis=3)
 87 |         elif self.embed_type == 'learned':
 88 |             h, w = mask.shape[-2:]
 89 |             i = paddle.arange(w)
 90 |             j = paddle.arange(h)
 91 |             x_emb = self.col_embed(i)
 92 |             y_emb = self.row_embed(j)
 93 |             return paddle.concat(
 94 |                 [
 95 |                     x_emb.unsqueeze(0).tile([h, 1, 1]),
 96 |                     y_emb.unsqueeze(1).tile([1, w, 1]),
 97 |                 ],
 98 |                 axis=-1).unsqueeze(0)
 99 |         else:
100 |             raise ValueError(f"not supported {self.embed_type}")
101 | 


--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/core/config/yaml_helpers.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | import importlib
 16 | import inspect
 17 | 
 18 | import yaml
 19 | from .schema import SharedConfig
 20 | 
 21 | __all__ = ['serializable', 'Callable']
 22 | 
 23 | 
 24 | def represent_dictionary_order(self, dict_data):
 25 |     return self.represent_mapping('tag:yaml.org,2002:map', dict_data.items())
 26 | 
 27 | 
 28 | def setup_orderdict():
 29 |     from collections import OrderedDict
 30 |     yaml.add_representer(OrderedDict, represent_dictionary_order)
 31 | 
 32 | 
 33 | def _make_python_constructor(cls):
 34 |     def python_constructor(loader, node):
 35 |         if isinstance(node, yaml.SequenceNode):
 36 |             args = loader.construct_sequence(node, deep=True)
 37 |             return cls(*args)
 38 |         else:
 39 |             kwargs = loader.construct_mapping(node, deep=True)
 40 |             try:
 41 |                 return cls(**kwargs)
 42 |             except Exception as ex:
 43 |                 print("Error when construct {} instance from yaml config".
 44 |                       format(cls.__name__))
 45 |                 raise ex
 46 | 
 47 |     return python_constructor
 48 | 
 49 | 
 50 | def _make_python_representer(cls):
 51 |     # python 2 compatibility
 52 |     if hasattr(inspect, 'getfullargspec'):
 53 |         argspec = inspect.getfullargspec(cls)
 54 |     else:
 55 |         argspec = inspect.getfullargspec(cls.__init__)
 56 |     argnames = [arg for arg in argspec.args if arg != 'self']
 57 | 
 58 |     def python_representer(dumper, obj):
 59 |         if argnames:
 60 |             data = {name: getattr(obj, name) for name in argnames}
 61 |         else:
 62 |             data = obj.__dict__
 63 |         if '_id' in data:
 64 |             del data['_id']
 65 |         return dumper.represent_mapping(u'!{}'.format(cls.__name__), data)
 66 | 
 67 |     return python_representer
 68 | 
 69 | 
 70 | def serializable(cls):
 71 |     """
 72 |     Add loader and dumper for given class, which must be
 73 |     "trivially serializable"
 74 | 
 75 |     Args:
 76 |         cls: class to be serialized
 77 | 
 78 |     Returns: cls
 79 |     """
 80 |     yaml.add_constructor(u'!{}'.format(cls.__name__),
 81 |                          _make_python_constructor(cls))
 82 |     yaml.add_representer(cls, _make_python_representer(cls))
 83 |     return cls
 84 | 
 85 | 
 86 | yaml.add_representer(SharedConfig,
 87 |                      lambda d, o: d.represent_data(o.default_value))
 88 | 
 89 | 
 90 | @serializable
 91 | class Callable(object):
 92 |     """
 93 |     Helper to be used in Yaml for creating arbitrary class objects
 94 | 
 95 |     Args:
 96 |         full_type (str): the full module path to target function
 97 |     """
 98 | 
 99 |     def __init__(self, full_type, args=[], kwargs={}):
100 |         super(Callable, self).__init__()
101 |         self.full_type = full_type
102 |         self.args = args
103 |         self.kwargs = kwargs
104 | 
105 |     def __call__(self):
106 |         if '.' in self.full_type:
107 |             idx = self.full_type.rfind('.')
108 |             module = importlib.import_module(self.full_type[:idx])
109 |             func_name = self.full_type[idx + 1:]
110 |         else:
111 |             try:
112 |                 module = importlib.import_module('builtins')
113 |             except Exception:
114 |                 module = importlib.import_module('__builtin__')
115 |             func_name = self.full_type
116 | 
117 |         func = getattr(module, func_name)
118 |         return func(*self.args, **self.kwargs)
119 | 


--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/modeling/architectures/detr.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | from __future__ import absolute_import
 16 | from __future__ import division
 17 | from __future__ import print_function
 18 | 
 19 | import paddle
 20 | from .meta_arch import BaseArch
 21 | from ppdet.core.workspace import register, create
 22 | 
 23 | __all__ = ['DETR']
 24 | 
 25 | 
 26 | # Deformable DETR, DINO use the same architecture as DETR
 27 | 
 28 | 
 29 | @register
 30 | class DETR(BaseArch):
 31 |     __category__ = 'architecture'
 32 |     __inject__ = ['post_process']
 33 |     __shared__ = ['with_mask', 'exclude_post_process']
 34 | 
 35 |     def __init__(self,
 36 |                  backbone,
 37 |                  transformer='DETRTransformer',
 38 |                  detr_head='DETRHead',
 39 |                  neck=None,
 40 |                  post_process='DETRPostProcess',
 41 |                  with_mask=False,
 42 |                  exclude_post_process=False):
 43 |         super(DETR, self).__init__()
 44 |         self.backbone = backbone
 45 |         self.transformer = transformer
 46 |         self.detr_head = detr_head
 47 |         self.neck = neck
 48 |         self.post_process = post_process
 49 |         self.with_mask = with_mask
 50 |         self.exclude_post_process = exclude_post_process
 51 | 
 52 |     @classmethod
 53 |     def from_config(cls, cfg, *args, **kwargs):
 54 |         # backbone
 55 |         backbone = create(cfg['backbone'])
 56 |         # neck
 57 |         kwargs = {'input_shape': backbone.out_shape}
 58 |         neck = create(cfg['neck'], **kwargs) if cfg['neck'] else None
 59 | 
 60 |         # transformer
 61 |         if neck is not None:
 62 |             kwargs = {'input_shape': neck.out_shape}
 63 |         transformer = create(cfg['transformer'], **kwargs)
 64 |         # head
 65 |         kwargs = {
 66 |             'hidden_dim': transformer.hidden_dim,
 67 |             'nhead': transformer.nhead,
 68 |             'input_shape': backbone.out_shape
 69 |         }
 70 |         detr_head = create(cfg['detr_head'], **kwargs)
 71 | 
 72 |         return {
 73 |             'backbone': backbone,
 74 |             'transformer': transformer,
 75 |             "detr_head": detr_head,
 76 |             "neck": neck
 77 |         }
 78 | 
 79 |     def _forward(self):
 80 |         # Backbone
 81 |         body_feats = self.backbone(self.inputs)
 82 | 
 83 |         # Neck
 84 |         if self.neck is not None:
 85 |             body_feats = self.neck(body_feats)
 86 | 
 87 |         # Transformer
 88 |         pad_mask = self.inputs.get('pad_mask', None)
 89 |         out_transformer = self.transformer(body_feats, pad_mask, self.inputs)
 90 | 
 91 |         # DETR Head
 92 |         if self.training:
 93 |             detr_losses = self.detr_head(out_transformer, body_feats,
 94 |                                          self.inputs)
 95 |             detr_losses.update({
 96 |                 'loss': paddle.add_n(
 97 |                     [v for k, v in detr_losses.items() if 'log' not in k])
 98 |             })
 99 |             return detr_losses
100 |         else:
101 |             preds = self.detr_head(out_transformer, body_feats)
102 |             if self.exclude_post_process:
103 |                 bbox, bbox_num, mask = preds
104 |             else:
105 |                 bbox, bbox_num, mask = self.post_process(
106 |                     preds, self.inputs['im_shape'], self.inputs['scale_factor'],
107 |                     paddle.shape(self.inputs['image'])[2:])
108 | 
109 |             output = {'bbox': bbox, 'bbox_num': bbox_num}
110 |             if self.with_mask:
111 |                 output['mask'] = mask
112 |             return output
113 | 
114 |     def get_loss(self):
115 |         return self._forward()
116 | 
117 |     def get_pred(self):
118 |         return self._forward()
119 | 


--------------------------------------------------------------------------------
/rtdetr_pytorch/src/optim/ema.py:
--------------------------------------------------------------------------------
  1 | """
  2 | reference: 
  3 | https://github.com/ultralytics/yolov5/blob/master/utils/torch_utils.py#L404
  4 | 
  5 | by lyuwenyu
  6 | """
  7 | 
  8 | import torch
  9 | import torch.nn as nn 
 10 | 
 11 | import math
 12 | from copy import deepcopy
 13 | 
 14 | 
 15 | 
 16 | from src.core import register
 17 | import src.misc.dist as dist 
 18 | 
 19 | 
 20 | __all__ = ['ModelEMA']
 21 | 
 22 | 
 23 | 
 24 | @register
 25 | class ModelEMA(object):
 26 |     """ Model Exponential Moving Average from https://github.com/rwightman/pytorch-image-models
 27 |     Keep a moving average of everything in the model state_dict (parameters and buffers).
 28 |     This is intended to allow functionality like
 29 |     https://www.tensorflow.org/api_docs/python/tf/train/ExponentialMovingAverage
 30 |     A smoothed version of the weights is necessary for some training schemes to perform well.
 31 |     This class is sensitive where it is initialized in the sequence of model init,
 32 |     GPU assignment and distributed training wrappers.
 33 |     """
 34 |     def __init__(self, model: nn.Module, decay: float=0.9999, warmups: int=2000):
 35 |         super().__init__()
 36 | 
 37 |         # Create EMA
 38 |         self.module = deepcopy(dist.de_parallel(model)).eval()  # FP32 EMA
 39 |         
 40 |         # if next(model.parameters()).device.type != 'cpu':
 41 |         #     self.module.half()  # FP16 EMA
 42 |         
 43 |         self.decay = decay 
 44 |         self.warmups = warmups
 45 |         self.updates = 0  # number of EMA updates
 46 |         # self.filter_no_grad = filter_no_grad
 47 |         self.decay_fn = lambda x: decay * (1 - math.exp(-x / warmups))  # decay exponential ramp (to help early epochs)
 48 |         
 49 |         for p in self.module.parameters():
 50 |             p.requires_grad_(False)
 51 | 
 52 |     def update(self, model: nn.Module):
 53 |         # Update EMA parameters
 54 |         with torch.no_grad():
 55 |             self.updates += 1
 56 |             d = self.decay_fn(self.updates)
 57 | 
 58 |             msd = dist.de_parallel(model).state_dict()
 59 |             for k, v in self.module.state_dict().items():
 60 |                 if v.dtype.is_floating_point:
 61 |                     v *= d
 62 |                     v += (1 - d) * msd[k].detach()
 63 |             
 64 |     def to(self, *args, **kwargs):
 65 |         self.module = self.module.to(*args, **kwargs)
 66 |         return self
 67 | 
 68 |     def update_attr(self, model, include=(), exclude=('process_group', 'reducer')):
 69 |         # Update EMA attributes
 70 |         self.copy_attr(self.module, model, include, exclude)
 71 | 
 72 |     @staticmethod
 73 |     def copy_attr(a, b, include=(), exclude=()):
 74 |         # Copy attributes from b to a, options to only include [...] and to exclude [...]
 75 |         for k, v in b.__dict__.items():
 76 |             if (len(include) and k not in include) or k.startswith('_') or k in exclude:
 77 |                 continue
 78 |             else:
 79 |                 setattr(a, k, v)
 80 | 
 81 |     def state_dict(self, ):
 82 |         return dict(module=self.module.state_dict(), updates=self.updates, warmups=self.warmups)
 83 |     
 84 |     def load_state_dict(self, state):
 85 |         self.module.load_state_dict(state['module']) 
 86 |         if 'updates' in state:
 87 |             self.updates = state['updates']
 88 | 
 89 |     def forwad(self, ):
 90 |         raise RuntimeError('ema...')
 91 | 
 92 |     def extra_repr(self) -> str:
 93 |         return f'decay={self.decay}, warmups={self.warmups}'
 94 | 
 95 | 
 96 | 
 97 | 
 98 | class ExponentialMovingAverage(torch.optim.swa_utils.AveragedModel):
 99 |     """Maintains moving averages of model parameters using an exponential decay.
100 |     ``ema_avg = decay * avg_model_param + (1 - decay) * model_param``
101 |     `torch.optim.swa_utils.AveragedModel <https://pytorch.org/docs/stable/optim.html#custom-averaging-strategies>`_
102 |     is used to compute the EMA.
103 |     """
104 |     def __init__(self, model, decay, device="cpu", use_buffers=True):
105 | 
106 |         self.decay_fn = lambda x: decay * (1 - math.exp(-x / 2000))  
107 |         
108 |         def ema_avg(avg_model_param, model_param, num_averaged):
109 |             decay = self.decay_fn(num_averaged)
110 |             return decay * avg_model_param + (1 - decay) * model_param
111 | 
112 |         super().__init__(model, device, ema_avg, use_buffers=use_buffers)
113 | 
114 | 
115 | 
116 | 


--------------------------------------------------------------------------------
/rtdetr_pytorch/src/solver/det_solver.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | by lyuwenyu
  3 | '''
  4 | import time
  5 | import json
  6 | import datetime
  7 | 
  8 | import torch
  9 | 
 10 | from src.misc import dist
 11 | from src.data import get_coco_api_from_dataset
 12 | 
 13 | from .solver import BaseSolver
 14 | from .det_engine import train_one_epoch, evaluate
 15 | 
 16 | 
 17 | class DetSolver(BaseSolver):
 18 | 
 19 |     def fit(self, ):
 20 |         print("Start training")
 21 |         self.train()
 22 | 
 23 |         args = self.cfg
 24 | 
 25 |         n_parameters = sum(p.numel() for p in self.model.parameters() if p.requires_grad)
 26 |         print('number of params:', n_parameters)
 27 | 
 28 |         base_ds = get_coco_api_from_dataset(self.val_dataloader.dataset)
 29 |         # best_stat = {'coco_eval_bbox': 0, 'coco_eval_masks': 0, 'epoch': -1, }
 30 |         best_stat = {'epoch': -1, }
 31 | 
 32 |         start_time = time.time()
 33 |         for epoch in range(self.last_epoch + 1, args.epoches):
 34 |             if dist.is_dist_available_and_initialized():
 35 |                 self.train_dataloader.sampler.set_epoch(epoch)
 36 | 
 37 |             train_stats = train_one_epoch(
 38 |                 self.model, self.criterion, self.train_dataloader, self.optimizer, self.device, epoch,
 39 |                 args.clip_max_norm, print_freq=args.log_step, ema=self.ema, scaler=self.scaler)
 40 | 
 41 |             self.lr_scheduler.step()
 42 | 
 43 |             if self.output_dir:
 44 |                 checkpoint_paths = [self.output_dir / 'checkpoint.pth']
 45 |                 # extra checkpoint before LR drop and every 100 epochs
 46 |                 if (epoch + 1) % args.checkpoint_step == 0:
 47 |                     checkpoint_paths.append(self.output_dir / f'checkpoint{epoch:04}.pth')
 48 |                 for checkpoint_path in checkpoint_paths:
 49 |                     dist.save_on_master(self.state_dict(epoch), checkpoint_path)
 50 | 
 51 |             module = self.ema.module if self.ema else self.model
 52 |             test_stats, coco_evaluator = evaluate(
 53 |                 module, self.criterion, self.postprocessor, self.val_dataloader, base_ds, self.device, self.output_dir
 54 |             )
 55 | 
 56 |             # TODO 
 57 |             for k in test_stats.keys():
 58 |                 if k in best_stat:
 59 |                     best_stat['epoch'] = epoch if test_stats[k][0] > best_stat[k] else best_stat['epoch']
 60 |                     best_stat[k] = max(best_stat[k], test_stats[k][0])
 61 |                 else:
 62 |                     best_stat['epoch'] = epoch
 63 |                     best_stat[k] = test_stats[k][0]
 64 |             print('best_stat: ', best_stat)
 65 | 
 66 |             log_stats = {**{f'train_{k}': v for k, v in train_stats.items()},
 67 |                          **{f'test_{k}': v for k, v in test_stats.items()},
 68 |                          'epoch': epoch,
 69 |                          'n_parameters': n_parameters}
 70 | 
 71 |             if self.output_dir and dist.is_main_process():
 72 |                 with (self.output_dir / "log.txt").open("a") as f:
 73 |                     f.write(json.dumps(log_stats) + "\n")
 74 | 
 75 |                 # for evaluation logs
 76 |                 if coco_evaluator is not None:
 77 |                     (self.output_dir / 'eval').mkdir(exist_ok=True)
 78 |                     if "bbox" in coco_evaluator.coco_eval:
 79 |                         filenames = ['latest.pth']
 80 |                         if epoch % 50 == 0:
 81 |                             filenames.append(f'{epoch:03}.pth')
 82 |                         for name in filenames:
 83 |                             torch.save(coco_evaluator.coco_eval["bbox"].eval,
 84 |                                        self.output_dir / "eval" / name)
 85 | 
 86 |         total_time = time.time() - start_time
 87 |         total_time_str = str(datetime.timedelta(seconds=int(total_time)))
 88 |         print('Training time {}'.format(total_time_str))
 89 | 
 90 |     def val(self, ):
 91 |         self.eval()
 92 | 
 93 |         base_ds = get_coco_api_from_dataset(self.val_dataloader.dataset)
 94 | 
 95 |         module = self.ema.module if self.ema else self.model
 96 |         test_stats, coco_evaluator = evaluate(module, self.criterion, self.postprocessor,
 97 |                                               self.val_dataloader, base_ds, self.device, self.output_dir)
 98 | 
 99 |         if self.output_dir:
100 |             dist.save_on_master(coco_evaluator.coco_eval["bbox"].eval, self.output_dir / "eval.pth")
101 | 
102 |         return
103 | 


--------------------------------------------------------------------------------
/rtdetr_pytorch/README.md:
--------------------------------------------------------------------------------
  1 | ## TODO
  2 | <details>
  3 | <summary> see details </summary>
  4 | 
  5 | - [x] Training
  6 | - [x] Evaluation
  7 | - [x] Export onnx
  8 | - [x] Upload source code
  9 | - [x] Upload weight convert from paddle, see [*links*](https://github.com/lyuwenyu/RT-DETR/issues/42)
 10 | - [x] Align training details with the [*paddle version*](../rtdetr_paddle/)
 11 | - [x] Tuning rtdetr based on [*pretrained weights*](https://github.com/lyuwenyu/RT-DETR/issues/42)
 12 | 
 13 | </details>
 14 | 
 15 | 
 16 | ## Model Zoo
 17 | 
 18 | | Model | Dataset | Input Size | AP<sup>val</sup> | AP<sub>50</sub><sup>val</sup> | #Params(M) | FPS |  checkpoint |
 19 | | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: |
 20 | rtdetr_r18vd | COCO | 640 | 46.4 | 63.7 | 20 | 217 | [url<sup>*</sup>](https://github.com/lyuwenyu/storage/releases/download/v0.1/rtdetr_r18vd_dec3_6x_coco_from_paddle.pth)
 21 | rtdetr_r34vd | COCO | 640 | 48.9 | 66.8 | 31 | 161 | [url<sup>*</sup>](https://github.com/lyuwenyu/storage/releases/download/v0.1/rtdetr_r34vd_dec4_6x_coco_from_paddle.pth)
 22 | rtdetr_r50vd_m | COCO | 640 | 51.3 | 69.5 | 36 | 145 | [url<sup>*</sup>](https://github.com/lyuwenyu/storage/releases/download/v0.1/rtdetr_r50vd_m_6x_coco_from_paddle.pth)
 23 | rtdetr_r50vd | COCO | 640 | 53.1 | 71.2| 42 | 108 | [url<sup>*</sup>](https://github.com/lyuwenyu/storage/releases/download/v0.1/rtdetr_r50vd_6x_coco_from_paddle.pth)
 24 | rtdetr_r101vd | COCO | 640 | 54.3 | 72.8 | 76 | 74 | [url<sup>*</sup>](https://github.com/lyuwenyu/storage/releases/download/v0.1/rtdetr_r101vd_6x_coco_from_paddle.pth)
 25 | rtdetr_18vd | COCO+Objects365 | 640 | 49.0 | 66.5 | 20 | 217 | [url<sup>*</sup>](https://github.com/lyuwenyu/storage/releases/download/v0.1/rtdetr_r18vd_5x_coco_objects365_from_paddle.pth)
 26 | rtdetr_r50vd | COCO+Objects365 | 640 | 55.2 | 73.4 | 42 | 108 | [url<sup>*</sup>](https://github.com/lyuwenyu/storage/releases/download/v0.1/rtdetr_r50vd_2x_coco_objects365_from_paddle.pth)
 27 | rtdetr_r101vd | COCO+Objects365 | 640 | 56.2 | 74.5 | 76 | 74 | [url<sup>*</sup>](https://github.com/lyuwenyu/storage/releases/download/v0.1/rtdetr_r101vd_2x_coco_objects365_from_paddle.pth)
 28 | 
 29 | Notes
 30 | - `COCO + Objects365` in the table means finetuned model on `COCO` using pretrained weights trained on `Objects365`.
 31 | - `url`<sup>`*`</sup> is the url of pretrained weights convert from paddle model for save energy. *It may have slight differences between this table and paper*
 32 | <!-- - `FPS` is evaluated on a single T4 GPU with $batch\\_size = 1$ and $tensorrt\\_fp16$ mode -->
 33 | 
 34 | ## Quick start
 35 | 
 36 | <details>
 37 | <summary>Install</summary>
 38 | 
 39 | ```bash
 40 | pip install -r requirements.txt
 41 | ```
 42 | 
 43 | </details>
 44 | 
 45 | 
 46 | <details>
 47 | <summary>Data</summary>
 48 | 
 49 | - Download and extract COCO 2017 train and val images.
 50 | ```
 51 | path/to/coco/
 52 |   annotations/  # annotation json files
 53 |   train2017/    # train images
 54 |   val2017/      # val images
 55 | ```
 56 | - Modify config [`img_folder`, `ann_file`](configs/dataset/coco_detection.yml)
 57 | </details>
 58 | 
 59 | 
 60 | 
 61 | <details>
 62 | <summary>Training & Evaluation</summary>
 63 | 
 64 | - Training on a Single GPU:
 65 | 
 66 | ```shell
 67 | # training on single-gpu
 68 | export CUDA_VISIBLE_DEVICES=0
 69 | python tools/train.py -c configs/rtdetr/rtdetr_r50vd_6x_coco.yml
 70 | ```
 71 | 
 72 | - Training on Multiple GPUs:
 73 | 
 74 | ```shell
 75 | # train on multi-gpu
 76 | export CUDA_VISIBLE_DEVICES=0,1,2,3
 77 | torchrun --nproc_per_node=4 tools/train.py -c configs/rtdetr/rtdetr_r50vd_6x_coco.yml
 78 | ```
 79 | 
 80 | - Evaluation on Multiple GPUs:
 81 | 
 82 | ```shell
 83 | # val on multi-gpu
 84 | export CUDA_VISIBLE_DEVICES=0,1,2,3
 85 | torchrun --nproc_per_node=4 tools/train.py -c configs/rtdetr/rtdetr_r50vd_6x_coco.yml -r path/to/checkpoint --test-only
 86 | ```
 87 | 
 88 | </details>
 89 | 
 90 | 
 91 | 
 92 | <details>
 93 | <summary>Export</summary>
 94 | 
 95 | ```shell
 96 | python tools/export_onnx.py -c configs/rtdetr/rtdetr_r18vd_6x_coco.yml -r path/to/checkpoint --check
 97 | ```
 98 | </details>
 99 | 
100 | 
101 | 
102 | 
103 | <details open>
104 | <summary>Train custom data</summary>
105 | 
106 | 1. set `remap_mscoco_category: False`. This variable only works for ms-coco dataset. If you want to use `remap_mscoco_category` logic on your dataset, please modify variable [`mscoco_category2name`](https://github.com/lyuwenyu/RT-DETR/blob/main/rtdetr_pytorch/src/data/coco/coco_dataset.py#L154) based on your dataset.
107 | 
108 | 2. add `-t path/to/checkpoint` (optinal) to tuning rtdetr based on pretrained checkpoint. see [training script details](./tools/README.md).
109 | </details>
110 | 


--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/modeling/backbones/transformer_utils.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | import paddle
 16 | import paddle.nn as nn
 17 | import paddle.nn.functional as F
 18 | 
 19 | from paddle.nn.initializer import TruncatedNormal, Constant, Assign
 20 | 
 21 | # Common initializations
 22 | ones_ = Constant(value=1.)
 23 | zeros_ = Constant(value=0.)
 24 | trunc_normal_ = TruncatedNormal(std=.02)
 25 | 
 26 | 
 27 | # Common Layers
 28 | def drop_path(x, drop_prob=0., training=False):
 29 |     """
 30 |         Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).
 31 |         the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper...
 32 |         See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ...
 33 |     """
 34 |     if drop_prob == 0. or not training:
 35 |         return x
 36 |     keep_prob = paddle.to_tensor(1 - drop_prob, dtype=x.dtype)
 37 |     shape = (paddle.shape(x)[0], ) + (1, ) * (x.ndim - 1)
 38 |     random_tensor = keep_prob + paddle.rand(shape, dtype=x.dtype)
 39 |     random_tensor = paddle.floor(random_tensor)  # binarize
 40 |     output = x.divide(keep_prob) * random_tensor
 41 |     return output
 42 | 
 43 | 
 44 | class DropPath(nn.Layer):
 45 |     def __init__(self, drop_prob=None):
 46 |         super(DropPath, self).__init__()
 47 |         self.drop_prob = drop_prob
 48 | 
 49 |     def forward(self, x):
 50 |         return drop_path(x, self.drop_prob, self.training)
 51 | 
 52 | 
 53 | class Identity(nn.Layer):
 54 |     def __init__(self):
 55 |         super(Identity, self).__init__()
 56 | 
 57 |     def forward(self, input):
 58 |         return input
 59 | 
 60 | 
 61 | # common funcs
 62 | 
 63 | 
 64 | def to_2tuple(x):
 65 |     if isinstance(x, (list, tuple)):
 66 |         return x
 67 |     return tuple([x] * 2)
 68 | 
 69 | 
 70 | def add_parameter(layer, datas, name=None):
 71 |     parameter = layer.create_parameter(
 72 |         shape=(datas.shape), default_initializer=Assign(datas))
 73 |     if name:
 74 |         layer.add_parameter(name, parameter)
 75 |     return parameter
 76 | 
 77 | 
 78 | def window_partition(x, window_size):
 79 |     """
 80 |     Partition into non-overlapping windows with padding if needed.
 81 |     Args:
 82 |         x (tensor): input tokens with [B, H, W, C].
 83 |         window_size (int): window size.
 84 |     Returns:
 85 |         windows: windows after partition with [B * num_windows, window_size, window_size, C].
 86 |         (Hp, Wp): padded height and width before partition
 87 |     """
 88 |     B, H, W, C = paddle.shape(x)
 89 | 
 90 |     pad_h = (window_size - H % window_size) % window_size
 91 |     pad_w = (window_size - W % window_size) % window_size
 92 |     x = F.pad(x.transpose([0, 3, 1, 2]),
 93 |               paddle.to_tensor(
 94 |                   [0, int(pad_w), 0, int(pad_h)],
 95 |                   dtype='int32')).transpose([0, 2, 3, 1])
 96 |     Hp, Wp = H + pad_h, W + pad_w
 97 | 
 98 |     num_h, num_w = Hp // window_size, Wp // window_size
 99 | 
100 |     x = x.reshape([B, num_h, window_size, num_w, window_size, C])
101 |     windows = x.transpose([0, 1, 3, 2, 4, 5]).reshape(
102 |         [-1, window_size, window_size, C])
103 |     return windows, (Hp, Wp), (num_h, num_w)
104 | 
105 | 
106 | def window_unpartition(x, pad_hw, num_hw, hw):
107 |     """
108 |     Window unpartition into original sequences and removing padding.
109 |     Args:
110 |         x (tensor): input tokens with [B * num_windows, window_size, window_size, C].
111 |         pad_hw (Tuple): padded height and width (Hp, Wp).
112 |         hw (Tuple): original height and width (H, W) before padding.
113 |     Returns:
114 |         x: unpartitioned sequences with [B, H, W, C].
115 |     """
116 |     Hp, Wp = pad_hw
117 |     num_h, num_w = num_hw
118 |     H, W = hw
119 |     B, window_size, _, C = paddle.shape(x)
120 |     B = B // (num_h * num_w)
121 |     x = x.reshape([B, num_h, num_w, window_size, window_size, C])
122 |     x = x.transpose([0, 1, 3, 2, 4, 5]).reshape([B, Hp, Wp, C])
123 | 
124 |     return x[:, :H, :W, :]
125 | 


--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/utils/profiler.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | import sys
 16 | import paddle
 17 | 
 18 | # A global variable to record the number of calling times for profiler
 19 | # functions. It is used to specify the tracing range of training steps.
 20 | _profiler_step_id = 0
 21 | 
 22 | # A global variable to avoid parsing from string every time.
 23 | _profiler_options = None
 24 | 
 25 | 
 26 | class ProfilerOptions(object):
 27 |     '''
 28 |     Use a string to initialize a ProfilerOptions.
 29 |     The string should be in the format: "key1=value1;key2=value;key3=value3".
 30 |     For example:
 31 |       "profile_path=model.profile"
 32 |       "batch_range=[50, 60]; profile_path=model.profile"
 33 |       "batch_range=[50, 60]; tracer_option=OpDetail; profile_path=model.profile"
 34 | 
 35 |     ProfilerOptions supports following key-value pair:
 36 |       batch_range      - a integer list, e.g. [100, 110].
 37 |       state            - a string, the optional values are 'CPU', 'GPU' or 'All'.
 38 |       sorted_key       - a string, the optional values are 'calls', 'total',
 39 |                          'max', 'min' or 'ave.
 40 |       tracer_option    - a string, the optional values are 'Default', 'OpDetail',
 41 |                          'AllOpDetail'.
 42 |       profile_path     - a string, the path to save the serialized profile data,
 43 |                          which can be used to generate a timeline.
 44 |       exit_on_finished - a boolean.
 45 |     '''
 46 | 
 47 |     def __init__(self, options_str):
 48 |         assert isinstance(options_str, str)
 49 | 
 50 |         self._options = {
 51 |             'batch_range': [10, 20],
 52 |             'state': 'All',
 53 |             'sorted_key': 'total',
 54 |             'tracer_option': 'Default',
 55 |             'profile_path': '/tmp/profile',
 56 |             'exit_on_finished': True
 57 |         }
 58 |         self._parse_from_string(options_str)
 59 | 
 60 |     def _parse_from_string(self, options_str):
 61 |         for kv in options_str.replace(' ', '').split(';'):
 62 |             key, value = kv.split('=')
 63 |             if key == 'batch_range':
 64 |                 value_list = value.replace('[', '').replace(']', '').split(',')
 65 |                 value_list = list(map(int, value_list))
 66 |                 if len(value_list) >= 2 and value_list[0] >= 0 and value_list[
 67 |                         1] > value_list[0]:
 68 |                     self._options[key] = value_list
 69 |             elif key == 'exit_on_finished':
 70 |                 self._options[key] = value.lower() in ("yes", "true", "t", "1")
 71 |             elif key in [
 72 |                     'state', 'sorted_key', 'tracer_option', 'profile_path'
 73 |             ]:
 74 |                 self._options[key] = value
 75 | 
 76 |     def __getitem__(self, name):
 77 |         if self._options.get(name, None) is None:
 78 |             raise ValueError(
 79 |                 "ProfilerOptions does not have an option named %s." % name)
 80 |         return self._options[name]
 81 | 
 82 | 
 83 | def add_profiler_step(options_str=None):
 84 |     '''
 85 |     Enable the operator-level timing using PaddlePaddle's profiler.
 86 |     The profiler uses a independent variable to count the profiler steps.
 87 |     One call of this function is treated as a profiler step.
 88 | 
 89 |     Args:
 90 |       profiler_options - a string to initialize the ProfilerOptions.
 91 |                          Default is None, and the profiler is disabled.
 92 |     '''
 93 |     if options_str is None:
 94 |         return
 95 | 
 96 |     global _profiler_step_id
 97 |     global _profiler_options
 98 | 
 99 |     if _profiler_options is None:
100 |         _profiler_options = ProfilerOptions(options_str)
101 | 
102 |     if _profiler_step_id == _profiler_options['batch_range'][0]:
103 |         paddle.utils.profiler.start_profiler(_profiler_options['state'],
104 |                                              _profiler_options['tracer_option'])
105 |     elif _profiler_step_id == _profiler_options['batch_range'][1]:
106 |         paddle.utils.profiler.stop_profiler(_profiler_options['sorted_key'],
107 |                                             _profiler_options['profile_path'])
108 |         if _profiler_options['exit_on_finished']:
109 |             sys.exit(0)
110 | 
111 |     _profiler_step_id += 1
112 | 


--------------------------------------------------------------------------------
/rtdetr_pytorch/src/data/transforms.py:
--------------------------------------------------------------------------------
  1 | """"by lyuwenyu
  2 | """
  3 | 
  4 | 
  5 | import torch 
  6 | import torch.nn as nn 
  7 | 
  8 | import torchvision
  9 | torchvision.disable_beta_transforms_warning()
 10 | from torchvision import datapoints
 11 | 
 12 | import torchvision.transforms.v2 as T
 13 | import torchvision.transforms.v2.functional as F
 14 | 
 15 | from PIL import Image 
 16 | from typing import Any, Dict, List, Optional
 17 | 
 18 | from src.core import register, GLOBAL_CONFIG
 19 | 
 20 | 
 21 | __all__ = ['Compose', ]
 22 | 
 23 | 
 24 | RandomPhotometricDistort = register(T.RandomPhotometricDistort)
 25 | RandomZoomOut = register(T.RandomZoomOut)
 26 | # RandomIoUCrop = register(T.RandomIoUCrop)
 27 | RandomHorizontalFlip = register(T.RandomHorizontalFlip)
 28 | Resize = register(T.Resize)
 29 | ToImageTensor = register(T.ToImageTensor)
 30 | ConvertDtype = register(T.ConvertDtype)
 31 | SanitizeBoundingBox = register(T.SanitizeBoundingBox)
 32 | RandomCrop = register(T.RandomCrop)
 33 | Normalize = register(T.Normalize)
 34 | 
 35 | 
 36 | 
 37 | @register
 38 | class Compose(T.Compose):
 39 |     def __init__(self, ops) -> None:
 40 |         transforms = []
 41 |         if ops is not None:
 42 |             for op in ops:
 43 |                 if isinstance(op, dict):
 44 |                     name = op.pop('type')
 45 |                     transfom = getattr(GLOBAL_CONFIG[name]['_pymodule'], name)(**op)
 46 |                     transforms.append(transfom)
 47 |                     # op['type'] = name
 48 |                 elif isinstance(op, nn.Module):
 49 |                     transforms.append(op)
 50 | 
 51 |                 else:
 52 |                     raise ValueError('')
 53 |         else:
 54 |             transforms =[EmptyTransform(), ]
 55 |  
 56 |         super().__init__(transforms=transforms)
 57 | 
 58 | 
 59 | @register
 60 | class EmptyTransform(T.Transform):
 61 |     def __init__(self, ) -> None:
 62 |         super().__init__()
 63 | 
 64 |     def forward(self, *inputs):
 65 |         inputs = inputs if len(inputs) > 1 else inputs[0]
 66 |         return inputs
 67 | 
 68 | 
 69 | @register
 70 | class PadToSize(T.Pad):
 71 |     _transformed_types = (
 72 |         Image.Image,
 73 |         datapoints.Image,
 74 |         datapoints.Video,
 75 |         datapoints.Mask,
 76 |         datapoints.BoundingBox,
 77 |     )
 78 |     def _get_params(self, flat_inputs: List[Any]) -> Dict[str, Any]:
 79 |         sz = F.get_spatial_size(flat_inputs[0])
 80 |         h, w = self.spatial_size[0] - sz[0], self.spatial_size[1] - sz[1]
 81 |         self.padding = [0, 0, w, h]
 82 |         return dict(padding=self.padding)
 83 | 
 84 |     def __init__(self, spatial_size, fill=0, padding_mode='constant') -> None:
 85 |         if isinstance(spatial_size, int):
 86 |             spatial_size = (spatial_size, spatial_size)
 87 |         
 88 |         self.spatial_size = spatial_size
 89 |         super().__init__(0, fill, padding_mode)
 90 | 
 91 |     def _transform(self, inpt: Any, params: Dict[str, Any]) -> Any:        
 92 |         fill = self._fill[type(inpt)]
 93 |         padding = params['padding']
 94 |         return F.pad(inpt, padding=padding, fill=fill, padding_mode=self.padding_mode)  # type: ignore[arg-type]
 95 | 
 96 |     def __call__(self, *inputs: Any) -> Any:
 97 |         outputs = super().forward(*inputs)
 98 |         if len(outputs) > 1 and isinstance(outputs[1], dict):
 99 |             outputs[1]['padding'] = torch.tensor(self.padding)
100 |         return outputs
101 | 
102 | 
103 | @register
104 | class RandomIoUCrop(T.RandomIoUCrop):
105 |     def __init__(self, min_scale: float = 0.3, max_scale: float = 1, min_aspect_ratio: float = 0.5, max_aspect_ratio: float = 2, sampler_options: Optional[List[float]] = None, trials: int = 40, p: float = 1.0):
106 |         super().__init__(min_scale, max_scale, min_aspect_ratio, max_aspect_ratio, sampler_options, trials)
107 |         self.p = p 
108 | 
109 |     def __call__(self, *inputs: Any) -> Any:
110 |         if torch.rand(1) >= self.p:
111 |             return inputs if len(inputs) > 1 else inputs[0]
112 | 
113 |         return super().forward(*inputs)
114 | 
115 | 
116 | @register
117 | class ConvertBox(T.Transform):
118 |     _transformed_types = (
119 |         datapoints.BoundingBox,
120 |     )
121 |     def __init__(self, out_fmt='', normalize=False) -> None:
122 |         super().__init__()
123 |         self.out_fmt = out_fmt
124 |         self.normalize = normalize
125 | 
126 |         self.data_fmt = {
127 |             'xyxy': datapoints.BoundingBoxFormat.XYXY,
128 |             'cxcywh': datapoints.BoundingBoxFormat.CXCYWH
129 |         }
130 | 
131 |     def _transform(self, inpt: Any, params: Dict[str, Any]) -> Any:  
132 |         if self.out_fmt:
133 |             spatial_size = inpt.spatial_size
134 |             in_fmt = inpt.format.value.lower()
135 |             inpt = torchvision.ops.box_convert(inpt, in_fmt=in_fmt, out_fmt=self.out_fmt)
136 |             inpt = datapoints.BoundingBox(inpt, format=self.data_fmt[self.out_fmt], spatial_size=spatial_size)
137 |         
138 |         if self.normalize:
139 |             inpt = inpt / torch.tensor(inpt.spatial_size[::-1]).tile(2)[None]
140 | 
141 |         return inpt
142 | 
143 | 


--------------------------------------------------------------------------------
/rtdetr_pytorch/tools/export_onnx.py:
--------------------------------------------------------------------------------
  1 | """by lyuwenyu
  2 | """
  3 | 
  4 | import os 
  5 | import sys
  6 | sys.path.insert(0, os.path.join(os.path.dirname(os.path.abspath(__file__)), '..'))
  7 | 
  8 | import argparse
  9 | import numpy as np 
 10 | 
 11 | from src.core import YAMLConfig
 12 | 
 13 | import torch
 14 | import torch.nn as nn 
 15 | 
 16 | 
 17 | def main(args, ):
 18 |     """main
 19 |     """
 20 |     cfg = YAMLConfig(args.config, resume=args.resume)
 21 | 
 22 |     if args.resume:
 23 |         checkpoint = torch.load(args.resume, map_location='cpu') 
 24 |         if 'ema' in checkpoint:
 25 |             state = checkpoint['ema']['module']
 26 |         else:
 27 |             state = checkpoint['model']
 28 |     else:
 29 |         raise AttributeError('only support resume to load model.state_dict by now.')
 30 | 
 31 |     # NOTE load train mode state -> convert to deploy mode
 32 |     cfg.model.load_state_dict(state)
 33 | 
 34 |     class Model(nn.Module):
 35 |         def __init__(self, ) -> None:
 36 |             super().__init__()
 37 |             self.model = cfg.model.deploy()
 38 |             self.postprocessor = cfg.postprocessor.deploy()
 39 |             print(self.postprocessor.deploy_mode)
 40 |             
 41 |         def forward(self, images, orig_target_sizes):
 42 |             outputs = self.model(images)
 43 |             return self.postprocessor(outputs, orig_target_sizes)
 44 |     
 45 | 
 46 |     model = Model()
 47 | 
 48 |     dynamic_axes = {
 49 |         'images': {0: 'N', },
 50 |         'orig_target_sizes': {0: 'N'}
 51 |     }
 52 | 
 53 |     data = torch.rand(1, 3, 640, 640)
 54 |     size = torch.tensor([[640, 640]])
 55 | 
 56 |     torch.onnx.export(
 57 |         model, 
 58 |         (data, size), 
 59 |         args.file_name,
 60 |         input_names=['images', 'orig_target_sizes'],
 61 |         output_names=['labels', 'boxes', 'scores'],
 62 |         dynamic_axes=dynamic_axes,
 63 |         opset_version=16, 
 64 |         verbose=False
 65 |     )
 66 | 
 67 | 
 68 |     if args.check:
 69 |         import onnx
 70 |         onnx_model = onnx.load(args.file_name)
 71 |         onnx.checker.check_model(onnx_model)
 72 |         print('Check export onnx model done...')
 73 | 
 74 | 
 75 |     if args.simplify:
 76 |         import onnxsim
 77 |         dynamic = True 
 78 |         input_shapes = {'images': data.shape, 'orig_target_sizes': size.shape} if dynamic else None
 79 |         onnx_model_simplify, check = onnxsim.simplify(args.file_name, input_shapes=input_shapes, dynamic_input_shape=dynamic)
 80 |         onnx.save(onnx_model_simplify, args.file_name)
 81 |         print(f'Simplify onnx model {check}...')
 82 | 
 83 | 
 84 |     # import onnxruntime as ort 
 85 |     # from PIL import Image, ImageDraw, ImageFont
 86 |     # from torchvision.transforms import ToTensor
 87 |     # from src.data.coco.coco_dataset import mscoco_category2name, mscoco_category2label, mscoco_label2category
 88 | 
 89 |     # # print(onnx.helper.printable_graph(mm.graph))
 90 | 
 91 |     # # Load the original image without resizing
 92 |     # original_im = Image.open('./hongkong.jpg').convert('RGB')
 93 |     # original_size = original_im.size
 94 | 
 95 |     # # Resize the image for model input
 96 |     # im = original_im.resize((640, 640))
 97 |     # im_data = ToTensor()(im)[None]
 98 |     # print(im_data.shape)
 99 | 
100 |     # sess = ort.InferenceSession(args.file_name)
101 |     # output = sess.run(
102 |     #     # output_names=['labels', 'boxes', 'scores'],
103 |     #     output_names=None,
104 |     #     input_feed={'images': im_data.data.numpy(), "orig_target_sizes": size.data.numpy()}
105 |     # )
106 | 
107 |     # # print(type(output))
108 |     # # print([out.shape for out in output])
109 | 
110 |     # labels, boxes, scores = output
111 | 
112 |     # draw = ImageDraw.Draw(original_im)  # Draw on the original image
113 |     # thrh = 0.6
114 | 
115 |     # for i in range(im_data.shape[0]):
116 | 
117 |     #     scr = scores[i]
118 |     #     lab = labels[i][scr > thrh]
119 |     #     box = boxes[i][scr > thrh]
120 | 
121 |     #     print(i, sum(scr > thrh))
122 | 
123 |     #     for b, l in zip(box, lab):
124 |     #         # Scale the bounding boxes back to the original image size
125 |     #         b = [coord * original_size[j % 2] / 640 for j, coord in enumerate(b)]
126 |     #         # Get the category name from the label
127 |     #         category_name = mscoco_category2name[mscoco_label2category[l]]
128 |     #         draw.rectangle(list(b), outline='red', width=2)
129 |     #         font = ImageFont.truetype("Arial.ttf", 15)
130 |     #         draw.text((b[0], b[1]), text=category_name, fill='yellow', font=font)
131 | 
132 |     # # Save the original image with bounding boxes
133 |     # original_im.save('test.jpg')
134 | 
135 | 
136 | if __name__ == '__main__':
137 | 
138 |     parser = argparse.ArgumentParser()
139 |     parser.add_argument('--config', '-c', type=str, )
140 |     parser.add_argument('--resume', '-r', type=str, )
141 |     parser.add_argument('--file-name', '-f', type=str, default='model.onnx')
142 |     parser.add_argument('--check',  action='store_true', default=False,)
143 |     parser.add_argument('--simplify',  action='store_true', default=False,)
144 | 
145 |     args = parser.parse_args()
146 | 
147 |     main(args)
148 | 


--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/modeling/architectures/meta_arch.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | 
  5 | import numpy as np
  6 | import paddle
  7 | import paddle.nn as nn
  8 | import typing
  9 | 
 10 | from ppdet.core.workspace import register
 11 | from ppdet.modeling.post_process import nms
 12 | 
 13 | __all__ = ['BaseArch']
 14 | 
 15 | 
 16 | @register
 17 | class BaseArch(nn.Layer):
 18 |     def __init__(self, data_format='NCHW', use_extra_data=False):
 19 |         super(BaseArch, self).__init__()
 20 |         self.data_format = data_format
 21 |         self.inputs = {}
 22 |         self.fuse_norm = False
 23 |         self.use_extra_data = use_extra_data
 24 | 
 25 |     def load_meanstd(self, cfg_transform):
 26 |         scale = 1.
 27 |         mean = np.array([0.485, 0.456, 0.406], dtype=np.float32)
 28 |         std = np.array([0.229, 0.224, 0.225], dtype=np.float32)
 29 |         for item in cfg_transform:
 30 |             if 'NormalizeImage' in item:
 31 |                 mean = np.array(
 32 |                     item['NormalizeImage']['mean'], dtype=np.float32)
 33 |                 std = np.array(item['NormalizeImage']['std'], dtype=np.float32)
 34 |                 if item['NormalizeImage'].get('is_scale', True):
 35 |                     scale = 1. / 255.
 36 |                 break
 37 |         if self.data_format == 'NHWC':
 38 |             self.scale = paddle.to_tensor(scale / std).reshape((1, 1, 1, 3))
 39 |             self.bias = paddle.to_tensor(-mean / std).reshape((1, 1, 1, 3))
 40 |         else:
 41 |             self.scale = paddle.to_tensor(scale / std).reshape((1, 3, 1, 1))
 42 |             self.bias = paddle.to_tensor(-mean / std).reshape((1, 3, 1, 1))
 43 | 
 44 |     def forward(self, inputs):
 45 |         if self.data_format == 'NHWC':
 46 |             image = inputs['image']
 47 |             inputs['image'] = paddle.transpose(image, [0, 2, 3, 1])
 48 | 
 49 |         if self.fuse_norm:
 50 |             image = inputs['image']
 51 |             self.inputs['image'] = image * self.scale + self.bias
 52 |             self.inputs['im_shape'] = inputs['im_shape']
 53 |             self.inputs['scale_factor'] = inputs['scale_factor']
 54 |         else:
 55 |             self.inputs = inputs
 56 | 
 57 |         self.model_arch()
 58 | 
 59 |         if self.training:
 60 |             out = self.get_loss()
 61 |         else:
 62 |             inputs_list = []
 63 |             # multi-scale input
 64 |             if not isinstance(inputs, typing.Sequence):
 65 |                 inputs_list.append(inputs)
 66 |             else:
 67 |                 inputs_list.extend(inputs)
 68 |             outs = []
 69 |             for inp in inputs_list:
 70 |                 if self.fuse_norm:
 71 |                     self.inputs['image'] = inp['image'] * self.scale + self.bias
 72 |                     self.inputs['im_shape'] = inp['im_shape']
 73 |                     self.inputs['scale_factor'] = inp['scale_factor']
 74 |                 else:
 75 |                     self.inputs = inp
 76 |                 outs.append(self.get_pred())
 77 | 
 78 |             # multi-scale test
 79 |             if len(outs) > 1:
 80 |                 out = self.merge_multi_scale_predictions(outs)
 81 |             else:
 82 |                 out = outs[0]
 83 |         return out
 84 | 
 85 |     def merge_multi_scale_predictions(self, outs):
 86 |         # default values for architectures not included in following list
 87 |         num_classes = 80
 88 |         nms_threshold = 0.5
 89 |         keep_top_k = 100
 90 | 
 91 |         if self.__class__.__name__ in ('CascadeRCNN', 'FasterRCNN', 'MaskRCNN'):
 92 |             num_classes = self.bbox_head.num_classes
 93 |             keep_top_k = self.bbox_post_process.nms.keep_top_k
 94 |             nms_threshold = self.bbox_post_process.nms.nms_threshold
 95 |         else:
 96 |             raise Exception(
 97 |                 "Multi scale test only supports CascadeRCNN, FasterRCNN and MaskRCNN for now"
 98 |             )
 99 | 
100 |         final_boxes = []
101 |         all_scale_outs = paddle.concat([o['bbox'] for o in outs]).numpy()
102 |         for c in range(num_classes):
103 |             idxs = all_scale_outs[:, 0] == c
104 |             if np.count_nonzero(idxs) == 0:
105 |                 continue
106 |             r = nms(all_scale_outs[idxs, 1:], nms_threshold)
107 |             final_boxes.append(
108 |                 np.concatenate([np.full((r.shape[0], 1), c), r], 1))
109 |         out = np.concatenate(final_boxes)
110 |         out = np.concatenate(sorted(
111 |             out, key=lambda e: e[1])[-keep_top_k:]).reshape((-1, 6))
112 |         out = {
113 |             'bbox': paddle.to_tensor(out),
114 |             'bbox_num': paddle.to_tensor(np.array([out.shape[0], ]))
115 |         }
116 | 
117 |         return out
118 | 
119 |     def build_inputs(self, data, input_def):
120 |         inputs = {}
121 |         for i, k in enumerate(input_def):
122 |             inputs[k] = data[i]
123 |         return inputs
124 | 
125 |     def model_arch(self, ):
126 |         pass
127 | 
128 |     def get_loss(self, ):
129 |         raise NotImplementedError("Should implement get_loss method!")
130 | 
131 |     def get_pred(self, ):
132 |         raise NotImplementedError("Should implement get_pred method!")
133 | 


--------------------------------------------------------------------------------
/rtdetr_pytorch/src/zoo/rtdetr/matcher.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
  3 | Modules to compute the matching cost and solve the corresponding LSAP.
  4 | 
  5 | by lyuwenyu
  6 | """
  7 | 
  8 | import torch
  9 | import torch.nn.functional as F 
 10 | 
 11 | from scipy.optimize import linear_sum_assignment
 12 | from torch import nn
 13 | 
 14 | from .box_ops import box_cxcywh_to_xyxy, generalized_box_iou
 15 | 
 16 | from src.core import register
 17 | 
 18 | 
 19 | @register
 20 | class HungarianMatcher(nn.Module):
 21 |     """This class computes an assignment between the targets and the predictions of the network
 22 | 
 23 |     For efficiency reasons, the targets don't include the no_object. Because of this, in general,
 24 |     there are more predictions than targets. In this case, we do a 1-to-1 matching of the best predictions,
 25 |     while the others are un-matched (and thus treated as non-objects).
 26 |     """
 27 | 
 28 |     __share__ = ['use_focal_loss', ]
 29 | 
 30 |     def __init__(self, weight_dict, use_focal_loss=False, alpha=0.25, gamma=2.0):
 31 |         """Creates the matcher
 32 | 
 33 |         Params:
 34 |             cost_class: This is the relative weight of the classification error in the matching cost
 35 |             cost_bbox: This is the relative weight of the L1 error of the bounding box coordinates in the matching cost
 36 |             cost_giou: This is the relative weight of the giou loss of the bounding box in the matching cost
 37 |         """
 38 |         super().__init__()
 39 |         self.cost_class = weight_dict['cost_class']
 40 |         self.cost_bbox = weight_dict['cost_bbox']
 41 |         self.cost_giou = weight_dict['cost_giou']
 42 | 
 43 |         self.use_focal_loss = use_focal_loss
 44 |         self.alpha = alpha
 45 |         self.gamma = gamma
 46 | 
 47 |         assert self.cost_class != 0 or self.cost_bbox != 0 or self.cost_giou != 0, "all costs cant be 0"
 48 | 
 49 |     @torch.no_grad()
 50 |     def forward(self, outputs, targets):
 51 |         """ Performs the matching
 52 | 
 53 |         Params:
 54 |             outputs: This is a dict that contains at least these entries:
 55 |                  "pred_logits": Tensor of dim [batch_size, num_queries, num_classes] with the classification logits
 56 |                  "pred_boxes": Tensor of dim [batch_size, num_queries, 4] with the predicted box coordinates
 57 | 
 58 |             targets: This is a list of targets (len(targets) = batch_size), where each target is a dict containing:
 59 |                  "labels": Tensor of dim [num_target_boxes] (where num_target_boxes is the number of ground-truth
 60 |                            objects in the target) containing the class labels
 61 |                  "boxes": Tensor of dim [num_target_boxes, 4] containing the target box coordinates
 62 | 
 63 |         Returns:
 64 |             A list of size batch_size, containing tuples of (index_i, index_j) where:
 65 |                 - index_i is the indices of the selected predictions (in order)
 66 |                 - index_j is the indices of the corresponding selected targets (in order)
 67 |             For each batch element, it holds:
 68 |                 len(index_i) = len(index_j) = min(num_queries, num_target_boxes)
 69 |         """
 70 |         bs, num_queries = outputs["pred_logits"].shape[:2]
 71 | 
 72 |         # We flatten to compute the cost matrices in a batch
 73 |         if self.use_focal_loss:
 74 |             out_prob = F.sigmoid(outputs["pred_logits"].flatten(0, 1))
 75 |         else:
 76 |             out_prob = outputs["pred_logits"].flatten(0, 1).softmax(-1)  # [batch_size * num_queries, num_classes]
 77 | 
 78 |         out_bbox = outputs["pred_boxes"].flatten(0, 1)  # [batch_size * num_queries, 4]
 79 | 
 80 |         # Also concat the target labels and boxes
 81 |         tgt_ids = torch.cat([v["labels"] for v in targets])
 82 |         tgt_bbox = torch.cat([v["boxes"] for v in targets])
 83 | 
 84 |         # Compute the classification cost. Contrary to the loss, we don't use the NLL,
 85 |         # but approximate it in 1 - proba[target class].
 86 |         # The 1 is a constant that doesn't change the matching, it can be ommitted.
 87 |         if self.use_focal_loss:
 88 |             out_prob = out_prob[:, tgt_ids]
 89 |             neg_cost_class = (1 - self.alpha) * (out_prob**self.gamma) * (-(1 - out_prob + 1e-8).log())
 90 |             pos_cost_class = self.alpha * ((1 - out_prob)**self.gamma) * (-(out_prob + 1e-8).log())
 91 |             cost_class = pos_cost_class - neg_cost_class        
 92 |         else:
 93 |             cost_class = -out_prob[:, tgt_ids]
 94 | 
 95 |         # Compute the L1 cost between boxes
 96 |         cost_bbox = torch.cdist(out_bbox, tgt_bbox, p=1)
 97 | 
 98 |         # Compute the giou cost betwen boxes
 99 |         cost_giou = -generalized_box_iou(box_cxcywh_to_xyxy(out_bbox), box_cxcywh_to_xyxy(tgt_bbox))
100 |         
101 |         # Final cost matrix
102 |         C = self.cost_bbox * cost_bbox + self.cost_class * cost_class + self.cost_giou * cost_giou
103 |         C = C.view(bs, num_queries, -1).cpu()
104 | 
105 |         sizes = [len(v["boxes"]) for v in targets]
106 |         indices = [linear_sum_assignment(c[i]) for i, c in enumerate(C.split(sizes, -1))]
107 | 
108 |         return [(torch.as_tensor(i, dtype=torch.int64), torch.as_tensor(j, dtype=torch.int64)) for i, j in indices]
109 | 


--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/utils/check.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | from __future__ import absolute_import
 16 | from __future__ import division
 17 | from __future__ import print_function
 18 | 
 19 | import sys
 20 | 
 21 | import paddle
 22 | import six
 23 | import paddle.version as paddle_version
 24 | 
 25 | from .logger import setup_logger
 26 | logger = setup_logger(__name__)
 27 | 
 28 | __all__ = [
 29 |     'check_gpu', 'check_npu', 'check_xpu', 'check_mlu', 'check_version',
 30 |     'check_config'
 31 | ]
 32 | 
 33 | 
 34 | def check_mlu(use_mlu):
 35 |     """
 36 |     Log error and exit when set use_mlu=true in paddlepaddle
 37 |     cpu/gpu/xpu/npu version.
 38 |     """
 39 |     err = "Config use_mlu cannot be set as true while you are " \
 40 |           "using paddlepaddle cpu/gpu/xpu/npu version ! \nPlease try: \n" \
 41 |           "\t1. Install paddlepaddle-mlu to run model on MLU \n" \
 42 |           "\t2. Set use_mlu as false in config file to run " \
 43 |           "model on CPU/GPU/XPU/NPU"
 44 | 
 45 |     try:
 46 |         if use_mlu and not paddle.is_compiled_with_mlu():
 47 |             logger.error(err)
 48 |             sys.exit(1)
 49 |     except Exception as e:
 50 |         pass
 51 | 
 52 | 
 53 | def check_npu(use_npu):
 54 |     """
 55 |     Log error and exit when set use_npu=true in paddlepaddle
 56 |     version without paddle-custom-npu installed.
 57 |     """
 58 |     err = "Config use_npu cannot be set as true while you are " \
 59 |           "using paddlepaddle version without paddle-custom-npu " \
 60 |           "installed! \nPlease try: \n" \
 61 |           "\t1. Install paddle-custom-npu to run model on NPU \n" \
 62 |           "\t2. Set use_npu as false in config file to run " \
 63 |           "model on other devices supported."
 64 | 
 65 |     try:
 66 |         if use_npu and not 'npu' in paddle.device.get_all_custom_device_type():
 67 |             logger.error(err)
 68 |             sys.exit(1)
 69 |     except Exception as e:
 70 |         pass
 71 | 
 72 | 
 73 | def check_xpu(use_xpu):
 74 |     """
 75 |     Log error and exit when set use_xpu=true in paddlepaddle
 76 |     cpu/gpu/npu version.
 77 |     """
 78 |     err = "Config use_xpu cannot be set as true while you are " \
 79 |           "using paddlepaddle cpu/gpu/npu version ! \nPlease try: \n" \
 80 |           "\t1. Install paddlepaddle-xpu to run model on XPU \n" \
 81 |           "\t2. Set use_xpu as false in config file to run " \
 82 |           "model on CPU/GPU/NPU"
 83 | 
 84 |     try:
 85 |         if use_xpu and not paddle.is_compiled_with_xpu():
 86 |             logger.error(err)
 87 |             sys.exit(1)
 88 |     except Exception as e:
 89 |         pass
 90 | 
 91 | 
 92 | def check_gpu(use_gpu):
 93 |     """
 94 |     Log error and exit when set use_gpu=true in paddlepaddle
 95 |     cpu version.
 96 |     """
 97 |     err = "Config use_gpu cannot be set as true while you are " \
 98 |           "using paddlepaddle cpu version ! \nPlease try: \n" \
 99 |           "\t1. Install paddlepaddle-gpu to run model on GPU \n" \
100 |           "\t2. Set use_gpu as false in config file to run " \
101 |           "model on CPU"
102 | 
103 |     try:
104 |         if use_gpu and not paddle.is_compiled_with_cuda():
105 |             logger.error(err)
106 |             sys.exit(1)
107 |     except Exception as e:
108 |         pass
109 | 
110 | 
111 | def check_version(version='2.2'):
112 |     """
113 |     Log error and exit when the installed version of paddlepaddle is
114 |     not satisfied.
115 |     """
116 |     err = "PaddlePaddle version {} or higher is required, " \
117 |           "or a suitable develop version is satisfied as well. \n" \
118 |           "Please make sure the version is good with your code.".format(version)
119 | 
120 |     version_installed = [
121 |         paddle_version.major, paddle_version.minor, paddle_version.patch,
122 |         paddle_version.rc
123 |     ]
124 | 
125 |     if version_installed == ['0', '0', '0', '0']:
126 |         return
127 | 
128 |     version_split = version.split('.')
129 | 
130 |     length = min(len(version_installed), len(version_split))
131 |     for i in six.moves.range(length):
132 |         if version_installed[i] > version_split[i]:
133 |             return
134 |         if version_installed[i] < version_split[i]:
135 |             raise Exception(err)
136 | 
137 | 
138 | def check_config(cfg):
139 |     """
140 |     Check the correctness of the configuration file. Log error and exit
141 |     when Config is not compliant.
142 |     """
143 |     err = "'{}' not specified in config file. Please set it in config file."
144 |     check_list = ['architecture', 'num_classes']
145 |     try:
146 |         for var in check_list:
147 |             if not var in cfg:
148 |                 logger.error(err.format(var))
149 |                 sys.exit(1)
150 |     except Exception as e:
151 |         pass
152 | 
153 |     if 'log_iter' not in cfg:
154 |         cfg.log_iter = 20
155 | 
156 |     return cfg
157 | 


--------------------------------------------------------------------------------
/rtdetr_pytorch/src/misc/dist.py:
--------------------------------------------------------------------------------
  1 | """
  2 | reference
  3 | - https://github.com/pytorch/vision/blob/main/references/detection/utils.py
  4 | - https://github.com/facebookresearch/detr/blob/master/util/misc.py#L406
  5 | 
  6 | by lyuwenyu
  7 | """
  8 | 
  9 | import random
 10 | import numpy as np 
 11 | 
 12 | import torch
 13 | import torch.nn as nn 
 14 | import torch.distributed
 15 | import torch.distributed as tdist
 16 | 
 17 | from torch.nn.parallel import DistributedDataParallel as DDP
 18 | 
 19 | from torch.utils.data import DistributedSampler
 20 | from torch.utils.data.dataloader import DataLoader
 21 | 
 22 | 
 23 | def init_distributed():
 24 |     '''
 25 |     distributed setup
 26 |     args:
 27 |         backend (str), ('nccl', 'gloo')
 28 |     '''
 29 |     try:
 30 |         # # https://pytorch.org/docs/stable/elastic/run.html
 31 |         # LOCAL_RANK = int(os.getenv('LOCAL_RANK', -1))  
 32 |         # RANK = int(os.getenv('RANK', -1))
 33 |         # WORLD_SIZE = int(os.getenv('WORLD_SIZE', 1))
 34 |         
 35 |         tdist.init_process_group(init_method='env://', )
 36 |         torch.distributed.barrier()
 37 | 
 38 |         rank = get_rank()
 39 |         device = torch.device(f'cuda:{rank}')
 40 |         torch.cuda.set_device(device)
 41 | 
 42 |         setup_print(rank == 0)
 43 |         print('Initialized distributed mode...')
 44 | 
 45 |         return True 
 46 | 
 47 |     except:
 48 |         print('Not init distributed mode.')
 49 |         return False 
 50 | 
 51 | 
 52 | def setup_print(is_main):
 53 |     '''This function disables printing when not in master process
 54 |     '''
 55 |     import builtins as __builtin__
 56 |     builtin_print = __builtin__.print
 57 | 
 58 |     def print(*args, **kwargs):
 59 |         force = kwargs.pop('force', False)
 60 |         if is_main or force:
 61 |             builtin_print(*args, **kwargs)
 62 | 
 63 |     __builtin__.print = print
 64 | 
 65 | 
 66 | def is_dist_available_and_initialized():
 67 |     if not tdist.is_available():
 68 |         return False
 69 |     if not tdist.is_initialized():
 70 |         return False
 71 |     return True
 72 | 
 73 | 
 74 | def get_rank():
 75 |     if not is_dist_available_and_initialized():
 76 |         return 0
 77 |     return tdist.get_rank()
 78 | 
 79 | 
 80 | def get_world_size():
 81 |     if not is_dist_available_and_initialized():
 82 |         return 1
 83 |     return tdist.get_world_size()
 84 | 
 85 |     
 86 | def is_main_process():
 87 |     return get_rank() == 0
 88 | 
 89 | 
 90 | def save_on_master(*args, **kwargs):
 91 |     if is_main_process():
 92 |         torch.save(*args, **kwargs)
 93 | 
 94 | 
 95 | 
 96 | def warp_model(model, find_unused_parameters=False, sync_bn=False,):
 97 |     if is_dist_available_and_initialized():
 98 |         rank = get_rank()
 99 |         model = nn.SyncBatchNorm.convert_sync_batchnorm(model) if sync_bn else model 
100 |         model = DDP(model, device_ids=[rank], output_device=rank, find_unused_parameters=find_unused_parameters)
101 |     return model
102 | 
103 | 
104 | def warp_loader(loader, shuffle=False):        
105 |     if is_dist_available_and_initialized():
106 |         sampler = DistributedSampler(loader.dataset, shuffle=shuffle)
107 |         loader = DataLoader(loader.dataset, 
108 |                             loader.batch_size, 
109 |                             sampler=sampler, 
110 |                             drop_last=loader.drop_last, 
111 |                             collate_fn=loader.collate_fn, 
112 |                             pin_memory=loader.pin_memory,
113 |                             num_workers=loader.num_workers, )
114 |     return loader
115 | 
116 | 
117 | 
118 | def is_parallel(model) -> bool:
119 |     # Returns True if model is of type DP or DDP
120 |     return type(model) in (torch.nn.parallel.DataParallel, torch.nn.parallel.DistributedDataParallel)
121 | 
122 | 
123 | def de_parallel(model) -> nn.Module:
124 |     # De-parallelize a model: returns single-GPU model if model is of type DP or DDP
125 |     return model.module if is_parallel(model) else model
126 | 
127 | 
128 | def reduce_dict(data, avg=True):
129 |     '''
130 |     Args 
131 |         data dict: input, {k: v, ...}
132 |         avg bool: true
133 |     '''
134 |     world_size = get_world_size()
135 |     if world_size < 2:
136 |         return data
137 |     
138 |     with torch.no_grad():
139 |         keys, values = [], []
140 |         for k in sorted(data.keys()):
141 |             keys.append(k)
142 |             values.append(data[k])
143 | 
144 |         values = torch.stack(values, dim=0)
145 |         tdist.all_reduce(values)
146 | 
147 |         if avg is True:
148 |             values /= world_size
149 |         
150 |         _data = {k: v for k, v in zip(keys, values)}
151 |     
152 |     return _data
153 | 
154 | 
155 | 
156 | def all_gather(data):
157 |     """
158 |     Run all_gather on arbitrary picklable data (not necessarily tensors)
159 |     Args:
160 |         data: any picklable object
161 |     Returns:
162 |         list[data]: list of data gathered from each rank
163 |     """
164 |     world_size = get_world_size()
165 |     if world_size == 1:
166 |         return [data]
167 |     data_list = [None] * world_size
168 |     tdist.all_gather_object(data_list, data)
169 |     return data_list
170 | 
171 |     
172 | import time 
173 | def sync_time():
174 |     '''sync_time
175 |     '''
176 |     if torch.cuda.is_available():
177 |         torch.cuda.synchronize()
178 | 
179 |     return time.time()
180 | 
181 | 
182 | 
183 | def set_seed(seed):
184 |     # fix the seed for reproducibility
185 |     seed = seed + get_rank()
186 |     torch.manual_seed(seed)
187 |     np.random.seed(seed)
188 |     random.seed(seed)
189 | 
190 | 
191 | 


--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/modeling/transformers/ext_op/test_ms_deformable_attn_op.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | from __future__ import absolute_import
 16 | from __future__ import print_function
 17 | from __future__ import division
 18 | 
 19 | import os
 20 | import sys
 21 | import random
 22 | import numpy as np
 23 | import paddle
 24 | # add python path of PaddleDetection to sys.path
 25 | parent_path = os.path.abspath(os.path.join(__file__, *(['..'] * 5)))
 26 | if parent_path not in sys.path:
 27 |     sys.path.append(parent_path)
 28 | 
 29 | from ppdet.modeling.transformers.utils import deformable_attention_core_func
 30 | ms_deform_attn_core_paddle = deformable_attention_core_func
 31 | 
 32 | try:
 33 |     gpu_index = int(sys.argv[1])
 34 | except:
 35 |     gpu_index = 0
 36 | print(f'Use gpu {gpu_index} to test...')
 37 | paddle.set_device(f'gpu:{gpu_index}')
 38 | 
 39 | try:
 40 |     from deformable_detr_ops import ms_deformable_attn
 41 | except Exception as e:
 42 |     print('import deformable_detr_ops error', e)
 43 |     sys.exit(-1)
 44 | 
 45 | paddle.seed(1)
 46 | random.seed(1)
 47 | np.random.seed(1)
 48 | 
 49 | bs, n_heads, c = 2, 8, 8
 50 | query_length, n_levels, n_points = 2, 2, 2
 51 | spatial_shapes = paddle.to_tensor([(6, 4), (3, 2)], dtype=paddle.int64)
 52 | level_start_index = paddle.concat((paddle.to_tensor(
 53 |     [0], dtype=paddle.int64), spatial_shapes.prod(1).cumsum(0)[:-1]))
 54 | value_length = sum([(H * W).item() for H, W in spatial_shapes])
 55 | 
 56 | 
 57 | def get_test_tensors(channels):
 58 |     value = paddle.rand(
 59 |         [bs, value_length, n_heads, channels], dtype=paddle.float32) * 0.01
 60 |     sampling_locations = paddle.rand(
 61 |         [bs, query_length, n_heads, n_levels, n_points, 2],
 62 |         dtype=paddle.float32)
 63 |     attention_weights = paddle.rand(
 64 |         [bs, query_length, n_heads, n_levels, n_points],
 65 |         dtype=paddle.float32) + 1e-5
 66 |     attention_weights /= attention_weights.sum(-1, keepdim=True).sum(
 67 |         -2, keepdim=True)
 68 | 
 69 |     return [value, sampling_locations, attention_weights]
 70 | 
 71 | 
 72 | @paddle.no_grad()
 73 | def check_forward_equal_with_paddle_float():
 74 |     value, sampling_locations, attention_weights = get_test_tensors(c)
 75 | 
 76 |     output_paddle = ms_deform_attn_core_paddle(
 77 |         value, spatial_shapes, level_start_index, sampling_locations,
 78 |         attention_weights).detach().cpu()
 79 |     output_cuda = ms_deformable_attn(value, spatial_shapes, level_start_index,
 80 |                                      sampling_locations,
 81 |                                      attention_weights).detach().cpu()
 82 |     fwdok = paddle.allclose(
 83 |         output_cuda, output_paddle, rtol=1e-2, atol=1e-3).item()
 84 |     max_abs_err = (output_cuda - output_paddle).abs().max().item()
 85 |     max_rel_err = (
 86 |         (output_cuda - output_paddle).abs() / output_paddle.abs()).max().item()
 87 | 
 88 |     print(
 89 |         f'*{fwdok} check_forward_equal_with_paddle_float: max_abs_err {max_abs_err:.2e} max_rel_err {max_rel_err:.2e}'
 90 |     )
 91 | 
 92 | 
 93 | def check_gradient_numerical(channels=4):
 94 |     value_paddle, sampling_locations_paddle, attention_weights_paddle = get_test_tensors(
 95 |         channels)
 96 |     value_paddle.stop_gradient = False
 97 |     sampling_locations_paddle.stop_gradient = False
 98 |     attention_weights_paddle.stop_gradient = False
 99 | 
100 |     value_cuda = value_paddle.detach().clone()
101 |     sampling_locations_cuda = sampling_locations_paddle.detach().clone()
102 |     attention_weights_cuda = attention_weights_paddle.detach().clone()
103 |     value_cuda.stop_gradient = False
104 |     sampling_locations_cuda.stop_gradient = False
105 |     attention_weights_cuda.stop_gradient = False
106 | 
107 |     output_paddle = ms_deform_attn_core_paddle(
108 |         value_paddle, spatial_shapes, level_start_index,
109 |         sampling_locations_paddle, attention_weights_paddle)
110 |     output_paddle.sum().backward()
111 | 
112 |     output_cuda = ms_deformable_attn(value_cuda, spatial_shapes,
113 |                                      level_start_index, sampling_locations_cuda,
114 |                                      attention_weights_cuda)
115 |     output_cuda.sum().backward()
116 | 
117 |     res = paddle.allclose(
118 |         value_paddle.grad, value_cuda.grad, rtol=1e-2, atol=1e-3).item()
119 |     print(f'*tensor1 {res} check_gradient_numerical(D={channels})')
120 | 
121 |     res = paddle.allclose(
122 |         sampling_locations_paddle.grad,
123 |         sampling_locations_cuda.grad,
124 |         rtol=1e-2,
125 |         atol=1e-3).item()
126 |     print(f'*tensor2 {res} check_gradient_numerical(D={channels})')
127 | 
128 |     res = paddle.allclose(
129 |         attention_weights_paddle.grad,
130 |         attention_weights_cuda.grad,
131 |         rtol=1e-2,
132 |         atol=1e-3).item()
133 |     print(f'*tensor3 {res} check_gradient_numerical(D={channels})')
134 | 
135 | 
136 | if __name__ == '__main__':
137 |     check_forward_equal_with_paddle_float()
138 | 
139 |     for channels in [30, 32, 64, 71, 128, 1024, 1025, 2048, 3096]:
140 |         check_gradient_numerical(channels)
141 | 


--------------------------------------------------------------------------------
/rtdetr_pytorch/src/core/yaml_config.py:
--------------------------------------------------------------------------------
  1 | """by lyuwenyu
  2 | """
  3 | 
  4 | import torch 
  5 | import torch.nn as nn
  6 | 
  7 | import re 
  8 | import copy
  9 | 
 10 | from .config import BaseConfig
 11 | from .yaml_utils import load_config, merge_config, create, merge_dict
 12 | 
 13 | 
 14 | class YAMLConfig(BaseConfig):
 15 |     def __init__(self, cfg_path: str, **kwargs) -> None:
 16 |         super().__init__()
 17 | 
 18 |         cfg = load_config(cfg_path)
 19 |         merge_dict(cfg, kwargs)
 20 | 
 21 |         # pprint(cfg)
 22 | 
 23 |         self.yaml_cfg = cfg 
 24 | 
 25 |         self.log_step = cfg.get('log_step', 100)
 26 |         self.checkpoint_step = cfg.get('checkpoint_step', 1)
 27 |         self.epoches = cfg.get('epoches', -1)
 28 |         self.resume = cfg.get('resume', '')
 29 |         self.tuning = cfg.get('tuning', '')
 30 |         self.sync_bn = cfg.get('sync_bn', False)
 31 |         self.output_dir = cfg.get('output_dir', None)
 32 |         
 33 |         self.use_ema = cfg.get('use_ema', False)
 34 |         self.use_amp = cfg.get('use_amp', False)
 35 |         self.autocast = cfg.get('autocast', dict())
 36 |         self.find_unused_parameters = cfg.get('find_unused_parameters', None)
 37 |         self.clip_max_norm = cfg.get('clip_max_norm', 0.)
 38 | 
 39 | 
 40 |     @property
 41 |     def model(self, ) -> torch.nn.Module:
 42 |         if self._model is None and 'model' in self.yaml_cfg:
 43 |             merge_config(self.yaml_cfg)
 44 |             self._model = create(self.yaml_cfg['model'])
 45 |         return self._model 
 46 | 
 47 |     @property
 48 |     def postprocessor(self, ) -> torch.nn.Module:
 49 |         if self._postprocessor is None and 'postprocessor' in self.yaml_cfg:
 50 |             merge_config(self.yaml_cfg)
 51 |             self._postprocessor = create(self.yaml_cfg['postprocessor'])
 52 |         return self._postprocessor
 53 | 
 54 |     @property
 55 |     def criterion(self, ):
 56 |         if self._criterion is None and 'criterion' in self.yaml_cfg:
 57 |             merge_config(self.yaml_cfg)
 58 |             self._criterion = create(self.yaml_cfg['criterion'])
 59 |         return self._criterion
 60 | 
 61 |     
 62 |     @property
 63 |     def optimizer(self, ):
 64 |         if self._optimizer is None and 'optimizer' in self.yaml_cfg:
 65 |             merge_config(self.yaml_cfg)
 66 |             params = self.get_optim_params(self.yaml_cfg['optimizer'], self.model)
 67 |             self._optimizer = create('optimizer', params=params)
 68 | 
 69 |         return self._optimizer
 70 |     
 71 |     @property
 72 |     def lr_scheduler(self, ):
 73 |         if self._lr_scheduler is None and 'lr_scheduler' in self.yaml_cfg:
 74 |             merge_config(self.yaml_cfg)
 75 |             self._lr_scheduler = create('lr_scheduler', optimizer=self.optimizer)
 76 |             print('Initial lr: ', self._lr_scheduler.get_last_lr())
 77 | 
 78 |         return self._lr_scheduler
 79 |     
 80 |     @property
 81 |     def train_dataloader(self, ):
 82 |         if self._train_dataloader is None and 'train_dataloader' in self.yaml_cfg:
 83 |             merge_config(self.yaml_cfg)
 84 |             self._train_dataloader = create('train_dataloader')
 85 |             self._train_dataloader.shuffle = self.yaml_cfg['train_dataloader'].get('shuffle', False)
 86 | 
 87 |         return self._train_dataloader
 88 |     
 89 |     @property
 90 |     def val_dataloader(self, ):
 91 |         if self._val_dataloader is None and 'val_dataloader' in self.yaml_cfg:
 92 |             merge_config(self.yaml_cfg)
 93 |             self._val_dataloader = create('val_dataloader')
 94 |             self._val_dataloader.shuffle = self.yaml_cfg['val_dataloader'].get('shuffle', False)
 95 | 
 96 |         return self._val_dataloader
 97 |     
 98 |     
 99 |     @property
100 |     def ema(self, ):
101 |         if self._ema is None and self.yaml_cfg.get('use_ema', False):
102 |             merge_config(self.yaml_cfg)
103 |             self._ema = create('ema', model=self.model)
104 |             
105 |         return self._ema
106 |     
107 | 
108 |     @property
109 |     def scaler(self, ):
110 |         if self._scaler is None and self.yaml_cfg.get('use_amp', False):
111 |             merge_config(self.yaml_cfg)
112 |             self._scaler = create('scaler')
113 | 
114 |         return self._scaler
115 | 
116 |  
117 |     @staticmethod
118 |     def get_optim_params(cfg: dict, model: nn.Module):
119 |         '''
120 |         E.g.:
121 |             ^(?=.*a)(?=.*b).*$         means including a and b
122 |             ^((?!b.)*a((?!b).)*$       means including a but not b
123 |             ^((?!b|c).)*a((?!b|c).)*$  means including a but not (b | c)
124 |         '''
125 |         assert 'type' in cfg, ''
126 |         cfg = copy.deepcopy(cfg)
127 | 
128 |         if 'params' not in cfg:
129 |             return model.parameters() 
130 | 
131 |         assert isinstance(cfg['params'], list), ''
132 | 
133 |         param_groups = []
134 |         visited = []
135 |         for pg in cfg['params']:
136 |             pattern = pg['params']
137 |             params = {k: v for k, v in model.named_parameters() if v.requires_grad and len(re.findall(pattern, k)) > 0}
138 |             pg['params'] = params.values()
139 |             param_groups.append(pg)
140 |             visited.extend(list(params.keys()))
141 | 
142 |         names = [k for k, v in model.named_parameters() if v.requires_grad]
143 | 
144 |         if len(visited) < len(names):
145 |             unseen = set(names) - set(visited)
146 |             params = {k: v for k, v in model.named_parameters() if v.requires_grad and k in unseen}
147 |             param_groups.append({'params': params.values()})
148 |             visited.extend(list(params.keys()))
149 | 
150 |         assert len(visited) == len(names), ''
151 | 
152 |         return param_groups
153 | 


--------------------------------------------------------------------------------
/benchmark/trtinfer.py:
--------------------------------------------------------------------------------
  1 | '''by lyuwenyu
  2 | '''
  3 | 
  4 | import time 
  5 | import contextlib
  6 | from collections import namedtuple, OrderedDict
  7 | 
  8 | import torch
  9 | import numpy as np
 10 | import tensorrt as trt
 11 | 
 12 | from utils import TimeProfiler
 13 | 
 14 | class TRTInference(object):
 15 |     def __init__(self, engine_path, device='cuda:0', backend='torch', max_batch_size=32, verbose=False):
 16 |         self.engine_path = engine_path
 17 |         self.device = device
 18 |         self.backend = backend
 19 |         self.max_batch_size = max_batch_size
 20 |         
 21 |         self.logger = trt.Logger(trt.Logger.VERBOSE) if verbose else trt.Logger(trt.Logger.INFO)  
 22 | 
 23 |         self.engine = self.load_engine(engine_path)
 24 | 
 25 |         self.context = self.engine.create_execution_context()
 26 | 
 27 |         self.bindings = self.get_bindings(self.engine, self.context, self.max_batch_size, self.device)
 28 |         self.bindings_addr = OrderedDict((n, v.ptr) for n, v in self.bindings.items())
 29 | 
 30 |         self.input_names = self.get_input_names()
 31 |         self.output_names = self.get_output_names()
 32 |         
 33 |         if self.backend == 'cuda':
 34 |             self.stream = cuda.Stream()
 35 | 
 36 |         self.time_profile = TimeProfiler()
 37 | 
 38 |     def init(self, ):
 39 |         self.dynamic = False 
 40 | 
 41 |     def load_engine(self, path):
 42 |         '''load engine
 43 |         '''
 44 |         trt.init_libnvinfer_plugins(self.logger, '')
 45 |         with open(path, 'rb') as f, trt.Runtime(self.logger) as runtime:
 46 |             return runtime.deserialize_cuda_engine(f.read())
 47 |     
 48 |     def get_input_names(self, ):
 49 |         names = []
 50 |         for _, name in enumerate(self.engine):
 51 |             if self.engine.get_tensor_mode(name) == trt.TensorIOMode.INPUT:
 52 |                 names.append(name)
 53 |         return names
 54 |     
 55 |     def get_output_names(self, ):
 56 |         names = []
 57 |         for _, name in enumerate(self.engine):
 58 |             if self.engine.get_tensor_mode(name) == trt.TensorIOMode.OUTPUT:
 59 |                 names.append(name)
 60 |         return names
 61 | 
 62 |     def get_bindings(self, engine, context, max_batch_size=32, device=None):
 63 |         '''build binddings
 64 |         '''
 65 |         Binding = namedtuple('Binding', ('name', 'dtype', 'shape', 'data', 'ptr'))
 66 |         bindings = OrderedDict()
 67 |         # max_batch_size = 1
 68 | 
 69 |         for i, name in enumerate(engine):
 70 |             shape = engine.get_tensor_shape(name)
 71 |             dtype = trt.nptype(engine.get_tensor_dtype(name))
 72 | 
 73 |             if shape[0] == -1:
 74 |                 dynamic = True 
 75 |                 shape[0] = max_batch_size
 76 |                 if engine.get_tensor_mode(name) == trt.TensorIOMode.INPUT:  # dynamic
 77 |                     context.set_input_shape(name, shape)
 78 | 
 79 |             if self.backend == 'cuda':
 80 |                 if engine.get_tensor_mode(name) == trt.TensorIOMode.INPUT:
 81 |                     data = np.random.randn(*shape).astype(dtype)
 82 |                     ptr = cuda.mem_alloc(data.nbytes)
 83 |                     bindings[name] = Binding(name, dtype, shape, data, ptr) 
 84 |                 else:
 85 |                     data = cuda.pagelocked_empty(trt.volume(shape), dtype)
 86 |                     ptr = cuda.mem_alloc(data.nbytes)
 87 |                     bindings[name] = Binding(name, dtype, shape, data, ptr) 
 88 | 
 89 |             else:
 90 |                 data = torch.from_numpy(np.empty(shape, dtype=dtype)).to(device)
 91 |                 bindings[name] = Binding(name, dtype, shape, data, data.data_ptr())
 92 | 
 93 |         return bindings
 94 | 
 95 |     def run_torch(self, blob):
 96 |         '''torch input
 97 |         '''
 98 |         for n in self.input_names:
 99 |             if self.bindings[n].shape != blob[n].shape:
100 |                 self.context.set_input_shape(n, blob[n].shape) 
101 |                 self.bindings[n] = self.bindings[n]._replace(shape=blob[n].shape)
102 | 
103 |         self.bindings_addr.update({n: blob[n].data_ptr() for n in self.input_names})
104 |         self.context.execute_v2(list(self.bindings_addr.values()))
105 |         outputs = {n: self.bindings[n].data for n in self.output_names}
106 | 
107 |         return outputs
108 | 
109 | 
110 |     def async_run_cuda(self, blob):
111 |         '''numpy input
112 |         '''
113 |         for n in self.input_names:
114 |             cuda.memcpy_htod_async(self.bindings_addr[n], blob[n], self.stream)
115 |         
116 |         bindings_addr = [int(v) for _, v in self.bindings_addr.items()]
117 |         self.context.execute_async_v2(bindings=bindings_addr, stream_handle=self.stream.handle)
118 |         
119 |         outputs = {}
120 |         for n in self.output_names:
121 |             cuda.memcpy_dtoh_async(self.bindings[n].data, self.bindings[n].ptr, self.stream)
122 |             outputs[n] = self.bindings[n].data
123 |         
124 |         self.stream.synchronize()
125 |         
126 |         return outputs
127 |     
128 |     def __call__(self, blob):
129 |         if self.backend == 'torch':
130 |             return self.run_torch(blob)
131 | 
132 |         elif self.backend == 'cuda':
133 |             return self.async_run_cuda(blob)
134 | 
135 |     def synchronize(self, ):
136 |         if self.backend == 'torch' and torch.cuda.is_available():
137 |             torch.cuda.synchronize()
138 | 
139 |         elif self.backend == 'cuda':
140 |             self.stream.synchronize()
141 |     
142 |     def warmup(self, blob, n):
143 |         for _ in range(n):
144 |             _ = self(blob)
145 | 
146 |     def speed(self, blob, n):
147 |         self.time_profile.reset()
148 |         for _ in range(n):
149 |             with self.time_profile:
150 |                 _ = self(blob)
151 | 
152 |         return self.time_profile.total / n 
153 | 
154 | 


--------------------------------------------------------------------------------