├── rtdetr_pytorch
├── src
│ ├── zoo
│ │ ├── __init__.py
│ │ └── rtdetr
│ │ │ ├── __init__.py
│ │ │ ├── rtdetr.py
│ │ │ ├── box_ops.py
│ │ │ ├── rtdetr_postprocessor.py
│ │ │ ├── utils.py
│ │ │ └── matcher.py
│ ├── nn
│ │ ├── arch
│ │ │ ├── __init__.py
│ │ │ └── classification.py
│ │ ├── backbone
│ │ │ ├── __init__.py
│ │ │ ├── utils.py
│ │ │ ├── test_resnet.py
│ │ │ └── common.py
│ │ ├── __init__.py
│ │ └── criterion
│ │ │ ├── __init__.py
│ │ │ └── utils.py
│ ├── misc
│ │ ├── __init__.py
│ │ ├── visualizer.py
│ │ └── dist.py
│ ├── optim
│ │ ├── __init__.py
│ │ ├── amp.py
│ │ ├── optim.py
│ │ └── ema.py
│ ├── __init__.py
│ ├── data
│ │ ├── __init__.py
│ │ ├── coco
│ │ │ └── __init__.py
│ │ ├── cifar10
│ │ │ └── __init__.py
│ │ ├── dataloader.py
│ │ └── transforms.py
│ ├── core
│ │ ├── __init__.py
│ │ └── yaml_config.py
│ └── solver
│ │ ├── __init__.py
│ │ └── det_solver.py
├── requirements.txt
├── configs
│ ├── runtime.yml
│ ├── rtdetr
│ │ ├── rtdetr_r50vd_6x_coco.yml
│ │ ├── rtdetr_r50vd_m_6x_coco.yml
│ │ ├── rtdetr_r101vd_6x_coco.yml
│ │ ├── include
│ │ │ ├── optimizer.yml
│ │ │ ├── dataloader.yml
│ │ │ └── rtdetr_r50vd.yml
│ │ ├── rtdetr_r34vd_6x_coco.yml
│ │ └── rtdetr_r18vd_6x_coco.yml
│ └── dataset
│ │ └── coco_detection.yml
├── tools
│ ├── README.md
│ ├── train.py
│ └── export_onnx.py
├── temp.py
└── README.md
├── rtdetr_paddle
├── ppdet
│ ├── version.py
│ ├── modeling
│ │ ├── transformers
│ │ │ ├── ext_op
│ │ │ │ ├── setup_ms_deformable_attn_op.py
│ │ │ │ ├── ms_deformable_attn_op.cc
│ │ │ │ ├── README.md
│ │ │ │ └── test_ms_deformable_attn_op.py
│ │ │ ├── __init__.py
│ │ │ └── position_encoding.py
│ │ ├── heads
│ │ │ └── __init__.py
│ │ ├── architectures
│ │ │ ├── __init__.py
│ │ │ ├── detr.py
│ │ │ └── meta_arch.py
│ │ ├── losses
│ │ │ ├── __init__.py
│ │ │ └── smooth_l1_loss.py
│ │ ├── __init__.py
│ │ ├── backbones
│ │ │ ├── __init__.py
│ │ │ ├── name_adapter.py
│ │ │ └── transformer_utils.py
│ │ ├── shape_spec.py
│ │ └── cls_utils.py
│ ├── utils
│ │ ├── __init__.py
│ │ ├── logger.py
│ │ ├── stats.py
│ │ ├── colormap.py
│ │ ├── voc_utils.py
│ │ ├── profiler.py
│ │ └── check.py
│ ├── core
│ │ ├── config
│ │ │ ├── __init__.py
│ │ │ └── yaml_helpers.py
│ │ └── __init__.py
│ ├── optimizer
│ │ ├── __init__.py
│ │ └── utils.py
│ ├── data
│ │ ├── source
│ │ │ └── __init__.py
│ │ ├── __init__.py
│ │ ├── transform
│ │ │ └── __init__.py
│ │ ├── shm_utils.py
│ │ └── utils.py
│ ├── engine
│ │ ├── __init__.py
│ │ └── env.py
│ ├── metrics
│ │ └── __init__.py
│ └── __init__.py
├── requirements.txt
├── dataset
│ ├── voc
│ │ ├── label_list.txt
│ │ ├── create_list.py
│ │ └── download_voc.py
│ └── coco
│ │ └── download_coco.py
├── configs
│ ├── rtdetr
│ │ ├── rtdetr_r50vd_6x_coco.yml
│ │ ├── _base_
│ │ │ ├── optimizer_6x.yml
│ │ │ ├── rtdetr_reader.yml
│ │ │ └── rtdetr_r50vd.yml
│ │ ├── rtdetr_hgnetv2_l_6x_coco.yml
│ │ ├── rtdetr_r50vd_m_6x_coco.yml
│ │ ├── rtdetr_r18vd_6x_coco.yml
│ │ ├── rtdetr_r34vd_6x_coco.yml
│ │ ├── rtdetr_r101vd_6x_coco.yml
│ │ └── rtdetr_hgnetv2_x_6x_coco.yml
│ ├── runtime.yml
│ └── datasets
│ │ ├── voc.yml
│ │ └── coco_detection.yml
└── tools
│ ├── slice_image.py
│ └── export_model.py
├── .github
├── ISSUE_TEMPLATE
│ └── bug_report.md
└── FUNDING.yml
├── benchmark
├── trtexec.md
├── README.md
├── yolov8_onnx.py
├── utils.py
├── dataset.py
└── trtinfer.py
├── README_cn.md
└── .gitignore
/rtdetr_pytorch/src/zoo/__init__.py:
--------------------------------------------------------------------------------
1 |
2 | from .rtdetr import *
3 |
--------------------------------------------------------------------------------
/rtdetr_pytorch/src/nn/arch/__init__.py:
--------------------------------------------------------------------------------
1 | from .classification import *
2 |
--------------------------------------------------------------------------------
/rtdetr_pytorch/src/misc/__init__.py:
--------------------------------------------------------------------------------
1 |
2 | from .logger import *
3 | from .visualizer import *
4 |
--------------------------------------------------------------------------------
/rtdetr_pytorch/src/optim/__init__.py:
--------------------------------------------------------------------------------
1 |
2 | from .ema import *
3 | from .optim import *
4 | from .amp import *
--------------------------------------------------------------------------------
/rtdetr_pytorch/src/__init__.py:
--------------------------------------------------------------------------------
1 |
2 | from . import data
3 | from . import nn
4 | from . import optim
5 | from . import zoo
6 |
--------------------------------------------------------------------------------
/rtdetr_pytorch/src/nn/backbone/__init__.py:
--------------------------------------------------------------------------------
1 |
2 | from .presnet import *
3 | from .test_resnet import *
4 |
5 | from .common import *
--------------------------------------------------------------------------------
/rtdetr_pytorch/src/nn/__init__.py:
--------------------------------------------------------------------------------
1 |
2 | from .arch import *
3 | from .criterion import *
4 |
5 | #
6 | from .backbone import *
7 |
8 |
--------------------------------------------------------------------------------
/rtdetr_pytorch/requirements.txt:
--------------------------------------------------------------------------------
1 | torch==2.0.1
2 | torchvision==0.15.2
3 | onnx==1.14.0
4 | onnxruntime==1.15.1
5 | pycocotools
6 | PyYAML
7 | scipy
8 |
--------------------------------------------------------------------------------
/rtdetr_pytorch/src/data/__init__.py:
--------------------------------------------------------------------------------
1 |
2 | from .coco import *
3 | from .cifar10 import CIFAR10
4 |
5 | from .dataloader import *
6 | from .transforms import *
7 |
8 |
--------------------------------------------------------------------------------
/rtdetr_pytorch/src/nn/criterion/__init__.py:
--------------------------------------------------------------------------------
1 |
2 | import torch.nn as nn
3 | from src.core import register
4 |
5 | CrossEntropyLoss = register(nn.CrossEntropyLoss)
6 |
7 |
--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/version.py:
--------------------------------------------------------------------------------
1 | # THIS FILE IS GENERATED FROM PADDLEPADDLE SETUP.PY
2 | #
3 | full_version = '2.4.0'
4 | commit = '87ed5ba91eaeb332e8e5c3f4e7d5b1d765c75644'
5 |
--------------------------------------------------------------------------------
/rtdetr_paddle/requirements.txt:
--------------------------------------------------------------------------------
1 | paddlepaddle-gpu==2.4.2
2 | tqdm
3 | typeguard
4 | visualdl>=2.2.0
5 | opencv-python <= 4.6.0
6 | PyYAML
7 | shapely
8 | scipy
9 | terminaltables
10 | Cython
11 | pycocotools
12 | setuptools
13 |
--------------------------------------------------------------------------------
/rtdetr_pytorch/src/core/__init__.py:
--------------------------------------------------------------------------------
1 | """by lyuwenyu
2 | """
3 |
4 | # from .yaml_utils import register, create, load_config, merge_config, merge_dict
5 | from .yaml_utils import *
6 | from .config import BaseConfig
7 | from .yaml_config import YAMLConfig
8 |
--------------------------------------------------------------------------------
/rtdetr_pytorch/src/solver/__init__.py:
--------------------------------------------------------------------------------
1 | """by lyuwenyu
2 | """
3 |
4 | from .solver import BaseSolver
5 | from .det_solver import DetSolver
6 |
7 | from typing import Dict
8 |
9 | TASKS: Dict[str, BaseSolver] = {
10 | 'detection': DetSolver,
11 | }
12 |
--------------------------------------------------------------------------------
/rtdetr_paddle/dataset/voc/label_list.txt:
--------------------------------------------------------------------------------
1 | aeroplane
2 | bicycle
3 | bird
4 | boat
5 | bottle
6 | bus
7 | car
8 | cat
9 | chair
10 | cow
11 | diningtable
12 | dog
13 | horse
14 | motorbike
15 | person
16 | pottedplant
17 | sheep
18 | sofa
19 | train
20 | tvmonitor
21 |
--------------------------------------------------------------------------------
/rtdetr_pytorch/src/data/coco/__init__.py:
--------------------------------------------------------------------------------
1 | from .coco_dataset import (
2 | CocoDetection,
3 | mscoco_category2label,
4 | mscoco_label2category,
5 | mscoco_category2name,
6 | )
7 | from .coco_eval import *
8 |
9 | from .coco_utils import get_coco_api_from_dataset
--------------------------------------------------------------------------------
/rtdetr_pytorch/configs/runtime.yml:
--------------------------------------------------------------------------------
1 | sync_bn: True
2 | find_unused_parameters: False
3 |
4 |
5 | use_amp: False
6 |
7 | scaler:
8 | type: GradScaler
9 | enabled: True
10 |
11 |
12 | use_ema: False
13 | ema:
14 | type: ModelEMA
15 | decay: 0.9999
16 | warmups: 2000
17 |
18 |
--------------------------------------------------------------------------------
/rtdetr_pytorch/src/optim/amp.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.cuda.amp as amp
4 |
5 |
6 | from src.core import register
7 | import src.misc.dist as dist
8 |
9 |
10 | __all__ = ['GradScaler']
11 |
12 | GradScaler = register(amp.grad_scaler.GradScaler)
13 |
--------------------------------------------------------------------------------
/rtdetr_pytorch/src/zoo/rtdetr/__init__.py:
--------------------------------------------------------------------------------
1 | """by lyuwenyu
2 | """
3 |
4 |
5 | from .rtdetr import *
6 |
7 | from .hybrid_encoder import *
8 | from .rtdetr_decoder import *
9 | from .rtdetr_postprocessor import *
10 | from .rtdetr_criterion import *
11 |
12 | from .matcher import *
13 |
--------------------------------------------------------------------------------
/rtdetr_pytorch/configs/rtdetr/rtdetr_r50vd_6x_coco.yml:
--------------------------------------------------------------------------------
1 | __include__: [
2 | '../dataset/coco_detection.yml',
3 | '../runtime.yml',
4 | './include/dataloader.yml',
5 | './include/optimizer.yml',
6 | './include/rtdetr_r50vd.yml',
7 | ]
8 |
9 | output_dir: ./output/rtdetr_r50vd_6x_coco
10 |
--------------------------------------------------------------------------------
/rtdetr_paddle/configs/rtdetr/rtdetr_r50vd_6x_coco.yml:
--------------------------------------------------------------------------------
1 | _BASE_: [
2 | '../datasets/coco_detection.yml',
3 | '../runtime.yml',
4 | '_base_/optimizer_6x.yml',
5 | '_base_/rtdetr_r50vd.yml',
6 | '_base_/rtdetr_reader.yml',
7 | ]
8 |
9 | weights: output/rtdetr_r50vd_6x_coco/model_final
10 | find_unused_parameters: True
11 | log_iter: 200
12 |
--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/modeling/transformers/ext_op/setup_ms_deformable_attn_op.py:
--------------------------------------------------------------------------------
1 | from paddle.utils.cpp_extension import CUDAExtension, setup
2 |
3 | if __name__ == "__main__":
4 | setup(
5 | name='deformable_detr_ops',
6 | ext_modules=CUDAExtension(
7 | sources=['ms_deformable_attn_op.cc', 'ms_deformable_attn_op.cu']))
8 |
--------------------------------------------------------------------------------
/rtdetr_paddle/configs/rtdetr/_base_/optimizer_6x.yml:
--------------------------------------------------------------------------------
1 | epoch: 72
2 |
3 | LearningRate:
4 | base_lr: 0.0001
5 | schedulers:
6 | - !PiecewiseDecay
7 | gamma: 1.0
8 | milestones: [100]
9 | use_warmup: true
10 | - !LinearWarmup
11 | start_factor: 0.001
12 | steps: 2000
13 |
14 | OptimizerBuilder:
15 | clip_grad_by_norm: 0.1
16 | regularizer: false
17 | optimizer:
18 | type: AdamW
19 | weight_decay: 0.0001
20 |
--------------------------------------------------------------------------------
/rtdetr_pytorch/configs/rtdetr/rtdetr_r50vd_m_6x_coco.yml:
--------------------------------------------------------------------------------
1 | __include__: [
2 | '../dataset/coco_detection.yml',
3 | '../runtime.yml',
4 | './include/dataloader.yml',
5 | './include/optimizer.yml',
6 | './include/rtdetr_r50vd.yml',
7 | ]
8 |
9 | output_dir: ./output/rtdetr_r50vd_m_6x_coco
10 |
11 |
12 | # 相比于rtdetr_r50vd_6x_coco.yml 多的参数
13 | HybridEncoder:
14 | expansion: 0.5
15 |
16 | RTDETRTransformer:
17 | eval_idx: 2 # use 3th decoder layer to eval
--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
1 | ---
2 | name: Bug report
3 | about: Create a report to help us improve
4 | title: ''
5 | labels: ''
6 | assignees: lyuwenyu
7 |
8 | ---
9 |
10 | **Star RTDETR**
11 | 请先在RTDETR主页点击**star**以支持本项目
12 | Star RTDETR to help more people discover this project.
13 |
14 | ---
15 |
16 | **Describe the bug**
17 | A clear and concise description of what the bug is.
18 | If applicable, add screenshots to help explain your problem.
19 |
20 | **To Reproduce**
21 | Steps to reproduce the behavior.
22 |
--------------------------------------------------------------------------------
/rtdetr_pytorch/src/data/cifar10/__init__.py:
--------------------------------------------------------------------------------
1 |
2 | import torchvision
3 | from typing import Optional, Callable
4 |
5 | from src.core import register
6 |
7 |
8 | @register
9 | class CIFAR10(torchvision.datasets.CIFAR10):
10 | __inject__ = ['transform', 'target_transform']
11 |
12 | def __init__(self, root: str, train: bool = True, transform: Optional[Callable] = None, target_transform: Optional[Callable] = None, download: bool = False) -> None:
13 | super().__init__(root, train, transform, target_transform, download)
14 |
15 |
--------------------------------------------------------------------------------
/rtdetr_pytorch/configs/rtdetr/rtdetr_r101vd_6x_coco.yml:
--------------------------------------------------------------------------------
1 | __include__: [
2 | '../dataset/coco_detection.yml',
3 | '../runtime.yml',
4 | './include/dataloader.yml',
5 | './include/optimizer.yml',
6 | './include/rtdetr_r50vd.yml',
7 | ]
8 |
9 | PResNet:
10 | depth: 101
11 |
12 |
13 | HybridEncoder:
14 | # intra
15 | hidden_dim: 384
16 | dim_feedforward: 2048
17 |
18 |
19 | RTDETRTransformer:
20 | feat_channels: [384, 384, 384]
21 |
22 |
23 | optimizer:
24 | type: AdamW
25 | params:
26 | -
27 | params: 'backbone'
28 | lr: 0.000001
--------------------------------------------------------------------------------
/rtdetr_paddle/configs/runtime.yml:
--------------------------------------------------------------------------------
1 | use_gpu: true
2 | use_xpu: false
3 | use_mlu: false
4 | use_npu: false
5 | log_iter: 20
6 | save_dir: output
7 | snapshot_epoch: 1
8 | print_flops: false
9 | print_params: false
10 |
11 | # Exporting the model
12 | export:
13 | post_process: True # Whether post-processing is included in the network when export model.
14 | nms: True # Whether NMS is included in the network when export model.
15 | benchmark: False # It is used to testing model performance, if set `True`, post-process and NMS will not be exported.
16 | fuse_conv_bn: False
17 |
--------------------------------------------------------------------------------
/rtdetr_paddle/configs/datasets/voc.yml:
--------------------------------------------------------------------------------
1 | metric: VOC
2 | map_type: 11point
3 | num_classes: 20
4 |
5 | TrainDataset:
6 | name: VOCDataSet
7 | dataset_dir: dataset/voc
8 | anno_path: trainval.txt
9 | label_list: label_list.txt
10 | data_fields: [ 'image', 'gt_bbox', 'gt_class', 'difficult' ]
11 |
12 | EvalDataset:
13 | name: VOCDataSet
14 | dataset_dir: dataset/voc
15 | anno_path: test.txt
16 | label_list: label_list.txt
17 | data_fields: [ 'image', 'gt_bbox', 'gt_class', 'difficult' ]
18 |
19 | TestDataset:
20 | name: ImageFolder
21 | anno_path: dataset/voc/label_list.txt
22 |
--------------------------------------------------------------------------------
/benchmark/trtexec.md:
--------------------------------------------------------------------------------
1 |
2 | ```bash
3 | # build tensorrt engine
4 | trtexec --onnx=./yolov8l_w_nms.onnx --saveEngine=yolov8l_w_nms.engine --buildOnly --fp16
5 |
6 | # using dynamic shapes
7 | # --explicitBatch --minShapes=image:1x3x640x640 --optShapes=image:8x3x640x640 --maxShapes=image:16x3x640x640 --shapes=image:8x3x640x640
8 |
9 | # timeline
10 | nsys profile --force-overwrite=true -t 'nvtx,cuda,osrt,cudnn' -c cudaProfilerApi -o yolov8l_w_nms trtexec --loadEngine=./yolov8l_w_nms.engine --fp16 --avgRuns=10 --loadInputs='image:input_tensor.bin'
11 |
12 | # https://forums.developer.nvidia.com/t/about-loadinputs-in-trtexec/218880
13 | ```
14 |
--------------------------------------------------------------------------------
/rtdetr_pytorch/src/nn/criterion/utils.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torchvision
3 |
4 |
5 |
6 | def format_target(targets):
7 | '''
8 | Args:
9 | targets (List[Dict]),
10 | Return:
11 | tensor (Tensor), [im_id, label, bbox,]
12 | '''
13 | outputs = []
14 | for i, tgt in enumerate(targets):
15 | boxes = torchvision.ops.box_convert(tgt['boxes'], in_fmt='xyxy', out_fmt='cxcywh')
16 | labels = tgt['labels'].reshape(-1, 1)
17 | im_ids = torch.ones_like(labels) * i
18 | outputs.append(torch.cat([im_ids, labels, boxes], dim=1))
19 |
20 | return torch.cat(outputs, dim=0)
21 |
--------------------------------------------------------------------------------
/rtdetr_pytorch/src/optim/optim.py:
--------------------------------------------------------------------------------
1 |
2 | import torch
3 | import torch.nn as nn
4 | import torch.optim as optim
5 | import torch.optim.lr_scheduler as lr_scheduler
6 |
7 | from src.core import register
8 |
9 |
10 | __all__ = ['AdamW', 'SGD', 'Adam', 'MultiStepLR', 'CosineAnnealingLR', 'OneCycleLR', 'LambdaLR']
11 |
12 |
13 |
14 | SGD = register(optim.SGD)
15 | Adam = register(optim.Adam)
16 | AdamW = register(optim.AdamW)
17 |
18 |
19 | MultiStepLR = register(lr_scheduler.MultiStepLR)
20 | CosineAnnealingLR = register(lr_scheduler.CosineAnnealingLR)
21 | OneCycleLR = register(lr_scheduler.OneCycleLR)
22 | LambdaLR = register(lr_scheduler.LambdaLR)
23 |
--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/utils/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
--------------------------------------------------------------------------------
/rtdetr_paddle/configs/rtdetr/rtdetr_hgnetv2_l_6x_coco.yml:
--------------------------------------------------------------------------------
1 | _BASE_: [
2 | '../datasets/coco_detection.yml',
3 | '../runtime.yml',
4 | '_base_/optimizer_6x.yml',
5 | '_base_/rtdetr_r50vd.yml',
6 | '_base_/rtdetr_reader.yml',
7 | ]
8 |
9 | weights: output/rtdetr_hgnetv2_l_6x_coco/model_final
10 | pretrain_weights: https://bj.bcebos.com/v1/paddledet/models/pretrained/PPHGNetV2_L_ssld_pretrained.pdparams
11 | find_unused_parameters: True
12 | log_iter: 200
13 |
14 |
15 | DETR:
16 | backbone: PPHGNetV2
17 |
18 | PPHGNetV2:
19 | arch: 'L'
20 | return_idx: [1, 2, 3]
21 | freeze_stem_only: True
22 | freeze_at: 0
23 | freeze_norm: True
24 | lr_mult_list: [0., 0.05, 0.05, 0.05, 0.05]
25 |
--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/core/config/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
--------------------------------------------------------------------------------
/rtdetr_pytorch/configs/rtdetr/include/optimizer.yml:
--------------------------------------------------------------------------------
1 | use_ema: True
2 | ema:
3 | type: ModelEMA
4 | decay: 0.9999
5 | warmups: 2000
6 |
7 |
8 | find_unused_parameters: True
9 |
10 | epoches: 72
11 | clip_max_norm: 0.1
12 |
13 | optimizer:
14 | type: AdamW
15 | params:
16 | - params: 'backbone'
17 | lr: 0.00001
18 | - params: '^(?=.*encoder(?=.*bias|.*norm.*weight)).*$'
19 | weight_decay: 0.
20 | - params: '^(?=.*decoder(?=.*bias|.*norm.*weight)).*$'
21 | weight_decay: 0.
22 |
23 | lr: 0.0001
24 | betas: [ 0.9, 0.999 ]
25 | weight_decay: 0.0001
26 |
27 |
28 | lr_scheduler:
29 | type: MultiStepLR
30 | milestones: [ 1000 ]
31 | gamma: 0.1
32 |
33 |
--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/core/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | from . import config
16 |
--------------------------------------------------------------------------------
/rtdetr_paddle/configs/rtdetr/rtdetr_r50vd_m_6x_coco.yml:
--------------------------------------------------------------------------------
1 | _BASE_: [
2 | '../datasets/coco_detection.yml',
3 | '../runtime.yml',
4 | '_base_/optimizer_6x.yml',
5 | '_base_/rtdetr_r50vd.yml',
6 | '_base_/rtdetr_reader.yml',
7 | ]
8 |
9 | weights: output/rtdetr_r50vd_m_6x_coco/model_final
10 | find_unused_parameters: True
11 | log_iter: 200
12 |
13 | HybridEncoder:
14 | hidden_dim: 256
15 | use_encoder_idx: [2]
16 | num_encoder_layers: 1
17 | encoder_layer:
18 | name: TransformerLayer
19 | d_model: 256
20 | nhead: 8
21 | dim_feedforward: 1024
22 | dropout: 0.
23 | activation: 'gelu'
24 | expansion: 0.5
25 | depth_mult: 1.0
26 |
27 | RTDETRTransformer:
28 | eval_idx: 2 # use 3th decoder layer to eval
29 |
--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/modeling/heads/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | from .detr_head import *
16 |
17 |
--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/modeling/architectures/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | from .meta_arch import *
16 | from .detr import *
17 |
--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/optimizer/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | from . import optimizer
16 | from . import ema
17 |
18 | from .optimizer import *
19 | from .ema import *
20 |
--------------------------------------------------------------------------------
/rtdetr_paddle/configs/datasets/coco_detection.yml:
--------------------------------------------------------------------------------
1 | metric: COCO
2 | num_classes: 80
3 |
4 | TrainDataset:
5 | name: COCODataSet
6 | image_dir: /mnt/h/ml_dataset_home/coco/train2017
7 | anno_path: /mnt/h/ml_dataset_home/coco/annotations/instances_train2017.json
8 | dataset_dir: dataset/coco
9 | data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd']
10 |
11 | EvalDataset:
12 | name: COCODataSet
13 | image_dir: /mnt/h/ml_dataset_home/coco/val2017
14 | anno_path: /mnt/h/ml_dataset_home/coco/instances_val2017.json
15 | dataset_dir: dataset/coco
16 | allow_empty: true
17 |
18 | TestDataset:
19 | name: ImageFolder
20 | anno_path: annotations/instances_val2017.json # also support txt (like VOC's label_list.txt)
21 | dataset_dir: dataset/coco # if set, anno_path will be 'dataset_dir/anno_path'
22 |
--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/data/source/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | from .coco import *
16 | from .voc import *
17 | from .category import *
18 | from .dataset import ImageFolder
19 |
--------------------------------------------------------------------------------
/rtdetr_pytorch/src/data/dataloader.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.utils.data as data
3 |
4 | from src.core import register
5 |
6 |
7 | __all__ = ['DataLoader']
8 |
9 |
10 | @register
11 | class DataLoader(data.DataLoader):
12 | __inject__ = ['dataset', 'collate_fn']
13 |
14 | def __repr__(self) -> str:
15 | format_string = self.__class__.__name__ + "("
16 | for n in ['dataset', 'batch_size', 'num_workers', 'drop_last', 'collate_fn']:
17 | format_string += "\n"
18 | format_string += " {0}: {1}".format(n, getattr(self, n))
19 | format_string += "\n)"
20 | return format_string
21 |
22 |
23 |
24 | @register
25 | def default_collate_fn(items):
26 | '''default collate_fn
27 | '''
28 | return torch.cat([x[0][None] for x in items], dim=0), [x[1] for x in items]
29 |
--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/modeling/losses/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | from .iou_loss import *
16 | from .gfocal_loss import *
17 | from .detr_loss import *
18 | from .focal_loss import *
19 | from .smooth_l1_loss import *
20 |
--------------------------------------------------------------------------------
/rtdetr_pytorch/tools/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 | Train/test script examples
4 | - `CUDA_VISIBLE_DEVICES=0,1,2,3 torchrun --nproc_per_node=4 --master-port=8989 tools/train.py -c path/to/config &> train.log 2>&1 &`
5 | - `-r path/to/checkpoint`
6 | - `--amp`
7 | - `--test-only`
8 |
9 |
10 | Tuning script examples
11 | - `torchrun --master_port=8844 --nproc_per_node=4 tools/train.py -c configs/rtdetr/rtdetr_r18vd_6x_coco.yml -t https://github.com/lyuwenyu/storage/releases/download/v0.1/rtdetr_r18vd_5x_coco_objects365_from_paddle.pth`
12 |
13 |
14 | Export script examples
15 | - `python tools/export_onnx.py -c path/to/config -r path/to/checkpoint --check`
16 |
17 |
18 | GPU do not release memory
19 | - `ps aux | grep "tools/train.py" | awk '{print $2}' | xargs kill -9`
20 |
21 |
22 | Save all logs
23 | - Appending `&> train.log 2>&1 &` or `&> train.log 2>&1`
24 |
25 |
--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/data/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | from . import source
16 | from . import transform
17 | from . import reader
18 |
19 | from .source import *
20 | from .transform import *
21 | from .reader import *
22 |
--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/modeling/transformers/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | from .utils import *
16 | from .matchers import *
17 | from .position_encoding import *
18 | from .rtdetr_transformer import *
19 | from .dino_transformer import *
20 | from .hybrid_encoder import *
--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/data/transform/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | from . import operators
16 | from . import batch_operators
17 |
18 |
19 | from .operators import *
20 | from .batch_operators import *
21 |
22 |
23 | __all__ = []
24 | __all__ += registered_ops
25 |
26 |
--------------------------------------------------------------------------------
/rtdetr_pytorch/configs/dataset/coco_detection.yml:
--------------------------------------------------------------------------------
1 | task: detection
2 |
3 | num_classes: 80
4 | remap_mscoco_category: True
5 |
6 | train_dataloader:
7 | type: DataLoader
8 | dataset:
9 | type: CocoDetection
10 | img_folder: /mnt/h/ml_dataset_home/coco/train2017/
11 | ann_file: /mnt/h/ml_dataset_home/coco/annotations/small_instances_train2017.json
12 | transforms:
13 | type: Compose
14 | ops: ~
15 | shuffle: True
16 | batch_size: 1
17 | num_workers: 4
18 | drop_last: True
19 |
20 |
21 | val_dataloader:
22 | type: DataLoader
23 | dataset:
24 | type: CocoDetection
25 | img_folder: /mnt/h/ml_dataset_home/coco/train2017/
26 | ann_file: /mnt/h/ml_dataset_home/coco/annotations/small_instances_train2017.json
27 | transforms:
28 | type: Compose
29 | ops: ~
30 |
31 | shuffle: False
32 | batch_size: 1
33 | num_workers: 4
34 | drop_last: False
--------------------------------------------------------------------------------
/.github/FUNDING.yml:
--------------------------------------------------------------------------------
1 | # These are supported funding model platforms
2 |
3 | github: # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2]
4 | patreon: # Replace with a single Patreon username
5 | open_collective: # Replace with a single Open Collective username
6 | ko_fi: # Replace with a single Ko-fi username
7 | tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel
8 | community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry
9 | liberapay: # Replace with a single Liberapay username
10 | issuehunt: # Replace with a single IssueHunt username
11 | otechie: # Replace with a single Otechie username
12 | lfx_crowdfunding: # Replace with a single LFX Crowdfunding project-name e.g., cloud-foundry
13 | custom: ['https://github.com/lyuwenyu/cvperception/assets/17582080/2b4bfcd5-5c0f-45fd-badf-3f6e5b0249ac']# Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2']
14 |
--------------------------------------------------------------------------------
/rtdetr_paddle/configs/rtdetr/rtdetr_r18vd_6x_coco.yml:
--------------------------------------------------------------------------------
1 | _BASE_: [
2 | '../datasets/coco_detection.yml',
3 | '../runtime.yml',
4 | '_base_/optimizer_6x.yml',
5 | '_base_/rtdetr_r50vd.yml',
6 | '_base_/rtdetr_reader.yml',
7 | ]
8 |
9 | weights: output/rtdetr_r18_6x_coco/model_final
10 | find_unused_parameters: True
11 | log_iter: 200
12 |
13 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet18_vd_pretrained.pdparams
14 | ResNet:
15 | depth: 18
16 | variant: d
17 | return_idx: [1, 2, 3]
18 | freeze_at: -1
19 | freeze_norm: false
20 | norm_decay: 0.
21 |
22 | HybridEncoder:
23 | hidden_dim: 256
24 | use_encoder_idx: [2]
25 | num_encoder_layers: 1
26 | encoder_layer:
27 | name: TransformerLayer
28 | d_model: 256
29 | nhead: 8
30 | dim_feedforward: 1024
31 | dropout: 0.
32 | activation: 'gelu'
33 | expansion: 0.5
34 | depth_mult: 1.0
35 |
36 | RTDETRTransformer:
37 | eval_idx: -1
38 | num_decoder_layers: 3
39 |
--------------------------------------------------------------------------------
/rtdetr_paddle/configs/rtdetr/rtdetr_r34vd_6x_coco.yml:
--------------------------------------------------------------------------------
1 | _BASE_: [
2 | '../datasets/coco_detection.yml',
3 | '../runtime.yml',
4 | '_base_/optimizer_6x.yml',
5 | '_base_/rtdetr_r50vd.yml',
6 | '_base_/rtdetr_reader.yml',
7 | ]
8 |
9 | weights: output/rtdetr_r34vd_6x_coco/model_final
10 | find_unused_parameters: True
11 | log_iter: 200
12 |
13 | pretrain_weights: https://bj.bcebos.com/v1/paddledet/models/pretrained/ResNet34_vd_pretrained.pdparams
14 | ResNet:
15 | depth: 34
16 | variant: d
17 | return_idx: [1, 2, 3]
18 | freeze_at: -1
19 | freeze_norm: false
20 | norm_decay: 0.
21 |
22 | HybridEncoder:
23 | hidden_dim: 256
24 | use_encoder_idx: [2]
25 | num_encoder_layers: 1
26 | encoder_layer:
27 | name: TransformerLayer
28 | d_model: 256
29 | nhead: 8
30 | dim_feedforward: 1024
31 | dropout: 0.
32 | activation: 'gelu'
33 | expansion: 0.5
34 | depth_mult: 1.0
35 |
36 | RTDETRTransformer:
37 | eval_idx: -1
38 | num_decoder_layers: 4
39 |
--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/engine/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | from . import trainer
16 | from .trainer import *
17 |
18 | from . import callbacks
19 | from .callbacks import *
20 |
21 | from . import env
22 | from .env import *
23 |
24 | __all__ = trainer.__all__ \
25 | + callbacks.__all__ \
26 | + env.__all__
27 |
--------------------------------------------------------------------------------
/rtdetr_paddle/configs/rtdetr/rtdetr_r101vd_6x_coco.yml:
--------------------------------------------------------------------------------
1 | _BASE_: [
2 | '../datasets/coco_detection.yml',
3 | '../runtime.yml',
4 | '_base_/optimizer_6x.yml',
5 | '_base_/rtdetr_r50vd.yml',
6 | '_base_/rtdetr_reader.yml',
7 | ]
8 |
9 | weights: output/rtdetr_r101vd_6x_coco/model_final
10 | find_unused_parameters: True
11 | log_iter: 200
12 |
13 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet101_vd_ssld_pretrained.pdparams
14 |
15 | ResNet:
16 | # index 0 stands for res2
17 | depth: 101
18 | variant: d
19 | norm_type: bn
20 | freeze_at: 0
21 | return_idx: [1, 2, 3]
22 | lr_mult_list: [0.01, 0.01, 0.01, 0.01]
23 | num_stages: 4
24 | freeze_stem_only: True
25 |
26 | HybridEncoder:
27 | hidden_dim: 384
28 | use_encoder_idx: [2]
29 | num_encoder_layers: 1
30 | encoder_layer:
31 | name: TransformerLayer
32 | d_model: 384
33 | nhead: 8
34 | dim_feedforward: 2048
35 | dropout: 0.
36 | activation: 'gelu'
37 | expansion: 1.0
38 |
--------------------------------------------------------------------------------
/rtdetr_paddle/configs/rtdetr/rtdetr_hgnetv2_x_6x_coco.yml:
--------------------------------------------------------------------------------
1 | _BASE_: [
2 | '../datasets/coco_detection.yml',
3 | '../runtime.yml',
4 | '_base_/optimizer_6x.yml',
5 | '_base_/rtdetr_r50vd.yml',
6 | '_base_/rtdetr_reader.yml',
7 | ]
8 |
9 | weights: output/rtdetr_hgnetv2_l_6x_coco/model_final
10 | pretrain_weights: https://bj.bcebos.com/v1/paddledet/models/pretrained/PPHGNetV2_X_ssld_pretrained.pdparams
11 | find_unused_parameters: True
12 | log_iter: 200
13 |
14 |
15 |
16 | DETR:
17 | backbone: PPHGNetV2
18 |
19 |
20 | PPHGNetV2:
21 | arch: 'X'
22 | return_idx: [1, 2, 3]
23 | freeze_stem_only: True
24 | freeze_at: 0
25 | freeze_norm: True
26 | lr_mult_list: [0., 0.01, 0.01, 0.01, 0.01]
27 |
28 |
29 | HybridEncoder:
30 | hidden_dim: 384
31 | use_encoder_idx: [2]
32 | num_encoder_layers: 1
33 | encoder_layer:
34 | name: TransformerLayer
35 | d_model: 384
36 | nhead: 8
37 | dim_feedforward: 2048
38 | dropout: 0.
39 | activation: 'gelu'
40 | expansion: 1.0
41 |
--------------------------------------------------------------------------------
/rtdetr_pytorch/src/misc/visualizer.py:
--------------------------------------------------------------------------------
1 | """"by lyuwenyu
2 | """
3 |
4 | import torch
5 | import torch.utils.data
6 |
7 | import torchvision
8 | torchvision.disable_beta_transforms_warning()
9 |
10 | import PIL
11 |
12 | __all__ = ['show_sample']
13 |
14 | def show_sample(sample):
15 | """for coco dataset/dataloader
16 | """
17 | import matplotlib.pyplot as plt
18 | from torchvision.transforms.v2 import functional as F
19 | from torchvision.utils import draw_bounding_boxes
20 |
21 | image, target = sample
22 | if isinstance(image, PIL.Image.Image):
23 | image = F.to_image_tensor(image)
24 |
25 | image = F.convert_dtype(image, torch.uint8)
26 | annotated_image = draw_bounding_boxes(image, target["boxes"], colors="yellow", width=3)
27 |
28 | fig, ax = plt.subplots()
29 | ax.imshow(annotated_image.permute(1, 2, 0).numpy())
30 | ax.set(xticklabels=[], yticklabels=[], xticks=[], yticks=[])
31 | fig.tight_layout()
32 | fig.show()
33 | plt.show()
34 |
35 |
--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/metrics/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | from . import metrics
16 |
17 | from .metrics import *
18 | from .pose3d_metrics import *
19 |
20 | from . import mot_metrics
21 | from .mot_metrics import *
22 | __all__ = metrics.__all__ + mot_metrics.__all__
23 |
24 | from . import mcmot_metrics
25 | from .mcmot_metrics import *
26 | __all__ = metrics.__all__ + mcmot_metrics.__all__
--------------------------------------------------------------------------------
/rtdetr_pytorch/configs/rtdetr/rtdetr_r34vd_6x_coco.yml:
--------------------------------------------------------------------------------
1 |
2 | __include__: [
3 | '../dataset/coco_detection.yml',
4 | '../runtime.yml',
5 | './include/dataloader.yml',
6 | './include/optimizer.yml',
7 | './include/rtdetr_r50vd.yml',
8 | ]
9 |
10 |
11 | output_dir: ./output/rtdetr_r34vd_6x_coco
12 |
13 |
14 | PResNet:
15 | depth: 34
16 | freeze_at: -1
17 | freeze_norm: False
18 | pretrained: True
19 |
20 |
21 | HybridEncoder:
22 | in_channels: [128, 256, 512]
23 | hidden_dim: 256
24 | expansion: 0.5
25 |
26 |
27 | RTDETRTransformer:
28 | num_decoder_layers: 4
29 |
30 |
31 |
32 | optimizer:
33 | type: AdamW
34 | params:
35 | -
36 | params: '^(?=.*backbone)(?=.*norm|bn).*$'
37 | weight_decay: 0.
38 | lr: 0.00001
39 | -
40 | params: '^(?=.*backbone)(?!.*norm|bn).*$'
41 | lr: 0.00001
42 | -
43 | params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn|bias)).*$'
44 | weight_decay: 0.
45 |
46 | lr: 0.0001
47 | betas: [0.9, 0.999]
48 | weight_decay: 0.0001
49 |
--------------------------------------------------------------------------------
/rtdetr_pytorch/configs/rtdetr/rtdetr_r18vd_6x_coco.yml:
--------------------------------------------------------------------------------
1 |
2 | __include__: [
3 | '../dataset/coco_detection.yml',
4 | '../runtime.yml',
5 | './include/dataloader.yml',
6 | './include/optimizer.yml',
7 | './include/rtdetr_r50vd.yml',
8 | ]
9 |
10 |
11 | output_dir: ./output/rtdetr_r18vd_6x_coco
12 |
13 | PResNet:
14 | depth: 18
15 | freeze_at: -1
16 | freeze_norm: False
17 | pretrained: True
18 |
19 | HybridEncoder:
20 | in_channels: [128, 256, 512]
21 | hidden_dim: 256
22 | expansion: 0.5
23 |
24 |
25 | RTDETRTransformer:
26 | eval_idx: -1
27 | num_decoder_layers: 3
28 | num_denoising: 100
29 |
30 |
31 |
32 | optimizer:
33 | type: AdamW
34 | params:
35 | -
36 | params: '^(?=.*backbone)(?=.*norm).*$'
37 | lr: 0.00001
38 | weight_decay: 0.
39 | -
40 | params: '^(?=.*backbone)(?!.*norm).*$'
41 | lr: 0.00001
42 | -
43 | params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bias)).*$'
44 | weight_decay: 0.
45 |
46 | lr: 0.0001
47 | betas: [0.9, 0.999]
48 | weight_decay: 0.0001
49 |
50 |
--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/modeling/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | import warnings
16 | warnings.filterwarnings(
17 | action='ignore', category=DeprecationWarning, module='ops')
18 |
19 |
20 | from .ops import *
21 | from .backbones import *
22 | from .heads import *
23 | from .losses import *
24 | from .architectures import *
25 | from .post_process import *
26 | from .layers import *
27 | from .transformers import *
28 |
--------------------------------------------------------------------------------
/rtdetr_pytorch/src/nn/arch/classification.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 |
4 | from src.core import register
5 |
6 |
7 | __all__ = ['Classification', 'ClassHead']
8 |
9 |
10 | @register
11 | class Classification(nn.Module):
12 | __inject__ = ['backbone', 'head']
13 |
14 | def __init__(self, backbone: nn.Module, head: nn.Module=None):
15 | super().__init__()
16 |
17 | self.backbone = backbone
18 | self.head = head
19 |
20 | def forward(self, x):
21 | x = self.backbone(x)
22 |
23 | if self.head is not None:
24 | x = self.head(x)
25 |
26 | return x
27 |
28 |
29 | @register
30 | class ClassHead(nn.Module):
31 | def __init__(self, hidden_dim, num_classes):
32 | super().__init__()
33 | self.pool = nn.AdaptiveAvgPool2d(1)
34 | self.proj = nn.Linear(hidden_dim, num_classes)
35 |
36 | def forward(self, x):
37 | x = x[0] if isinstance(x, (list, tuple)) else x
38 | x = self.pool(x)
39 | x = x.reshape(x.shape[0], -1)
40 | x = self.proj(x)
41 | return x
42 |
--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | from . import (core, data, engine, modeling, optimizer, metrics, utils)
16 |
17 |
18 | try:
19 | from .version import full_version as __version__
20 | from .version import commit as __git_commit__
21 | except ImportError:
22 | import sys
23 | sys.stderr.write("Warning: import ppdet from source directory " \
24 | "without installing, run 'python setup.py install' to " \
25 | "install ppdet firstly\n")
26 |
--------------------------------------------------------------------------------
/rtdetr_paddle/dataset/voc/create_list.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | import sys
16 | import os.path as osp
17 | import logging
18 | # add python path of PaddleDetection to sys.path
19 | parent_path = osp.abspath(osp.join(__file__, *(['..'] * 3)))
20 | if parent_path not in sys.path:
21 | sys.path.append(parent_path)
22 |
23 | from ppdet.utils.download import create_voc_list
24 |
25 | logging.basicConfig(level=logging.INFO)
26 |
27 | voc_path = osp.split(osp.realpath(sys.argv[0]))[0]
28 | create_voc_list(voc_path)
29 |
--------------------------------------------------------------------------------
/rtdetr_paddle/dataset/coco/download_coco.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | import sys
16 | import os.path as osp
17 | import logging
18 | # add python path of PaddleDetection to sys.path
19 | parent_path = osp.abspath(osp.join(__file__, *(['..'] * 3)))
20 | if parent_path not in sys.path:
21 | sys.path.append(parent_path)
22 |
23 | from ppdet.utils.download import download_dataset
24 |
25 | logging.basicConfig(level=logging.INFO)
26 |
27 | download_path = osp.split(osp.realpath(sys.argv[0]))[0]
28 | download_dataset(download_path, 'coco')
29 |
--------------------------------------------------------------------------------
/rtdetr_paddle/dataset/voc/download_voc.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | import sys
16 | import os.path as osp
17 | import logging
18 | # add python path of PaddleDetection to sys.path
19 | parent_path = osp.abspath(osp.join(__file__, *(['..'] * 3)))
20 | if parent_path not in sys.path:
21 | sys.path.append(parent_path)
22 |
23 | from ppdet.utils.download import download_dataset
24 |
25 | logging.basicConfig(level=logging.INFO)
26 |
27 | download_path = osp.split(osp.realpath(sys.argv[0]))[0]
28 | download_dataset(download_path, 'voc')
29 |
--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/modeling/backbones/__init__.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | from .resnet import *
16 | from .darknet import *
17 | from .mobilenet_v1 import *
18 | from .mobilenet_v3 import *
19 | from .shufflenet_v2 import *
20 | from .swin_transformer import *
21 | from .lcnet import *
22 | from .cspresnet import *
23 | from .csp_darknet import *
24 | from .convnext import *
25 | from .vision_transformer import *
26 | from .mobileone import *
27 | from .trans_encoder import *
28 | from .focalnet import *
29 | from .vit_mae import *
30 | from .hgnet_v2 import *
31 |
--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/modeling/shape_spec.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | # The code is based on:
16 | # https://github.com/facebookresearch/detectron2/blob/main/detectron2/layers/shape_spec.py
17 |
18 | from collections import namedtuple
19 |
20 |
21 | class ShapeSpec(
22 | namedtuple("_ShapeSpec", ["channels", "height", "width", "stride"])):
23 | def __new__(cls, channels=None, height=None, width=None, stride=None):
24 | return super(ShapeSpec, cls).__new__(cls, channels, height, width,
25 | stride)
26 |
--------------------------------------------------------------------------------
/rtdetr_pytorch/configs/rtdetr/include/dataloader.yml:
--------------------------------------------------------------------------------
1 | # num_classes: 91
2 | # remap_mscoco_category: True
3 |
4 | train_dataloader:
5 | dataset:
6 | return_masks: False
7 | transforms:
8 | ops:
9 | - {type: RandomPhotometricDistort, p: 0.5}
10 | - {type: RandomZoomOut, fill: 0}
11 | - {type: RandomIoUCrop, p: 0.8}
12 | - {type: SanitizeBoundingBox, min_size: 1}
13 | - {type: RandomHorizontalFlip}
14 | - {type: Resize, size: [640, 640], }
15 | # - {type: Resize, size: 639, max_size: 640}
16 | # - {type: PadToSize, spatial_size: 640}
17 | - { type: ToImageTensor }
18 | - { type: ConvertDtype }
19 | - { type: SanitizeBoundingBox, min_size: 1 }
20 | - { type: ConvertBox, out_fmt: 'cxcywh', normalize: True }
21 | shuffle: True
22 | batch_size: 1
23 | num_workers: 1
24 | collate_fn: default_collate_fn
25 |
26 |
27 | val_dataloader:
28 | dataset:
29 | transforms:
30 | ops:
31 | # - {type: Resize, size: 639, max_size: 640}
32 | # - {type: PadToSize, spatial_size: 640}
33 | - { type: Resize, size: [ 640, 640 ] }
34 | - { type: ToImageTensor }
35 | - { type: ConvertDtype }
36 | shuffle: False
37 | batch_size: 1
38 | num_workers: 1
39 | collate_fn: default_collate_fn
40 |
--------------------------------------------------------------------------------
/rtdetr_pytorch/tools/train.py:
--------------------------------------------------------------------------------
1 | """by lyuwenyu
2 | """
3 |
4 | import os
5 | import sys
6 |
7 | sys.path.insert(0, os.path.join(os.path.dirname(os.path.abspath(__file__)), '..'))
8 | import argparse
9 |
10 | import src.misc.dist as dist
11 | from src.core import YAMLConfig
12 | from src.solver import TASKS
13 |
14 |
15 | def main(args, ) -> None:
16 | '''main
17 | '''
18 | dist.init_distributed()
19 |
20 | assert not all([args.tuning, args.resume]), \
21 | 'Only support from_scrach or resume or tuning at one time'
22 |
23 | cfg = YAMLConfig(
24 | args.config,
25 | resume=args.resume,
26 | use_amp=args.amp,
27 | tuning=args.tuning
28 | )
29 |
30 | solver = TASKS[cfg.yaml_cfg['task']](cfg)
31 |
32 | if args.test_only:
33 | solver.val()
34 | else:
35 | solver.fit()
36 |
37 |
38 | if __name__ == '__main__':
39 | parser = argparse.ArgumentParser()
40 | parser.add_argument('--config', '-c', type=str, )
41 | parser.add_argument('--resume', '-r', type=str, )
42 | parser.add_argument('--tuning', '-t', type=str, )
43 | parser.add_argument('--test-only', action='store_true', default=False,)
44 | parser.add_argument('--amp', action='store_true', default=False,)
45 |
46 | args = parser.parse_args()
47 |
48 | main(args)
49 |
--------------------------------------------------------------------------------
/rtdetr_paddle/configs/rtdetr/_base_/rtdetr_reader.yml:
--------------------------------------------------------------------------------
1 | worker_num: 4
2 | TrainReader:
3 | sample_transforms:
4 | - Decode: {}
5 | - RandomDistort: {prob: 0.8}
6 | - RandomExpand: {fill_value: [123.675, 116.28, 103.53]}
7 | - RandomCrop: {prob: 0.8}
8 | - RandomFlip: {}
9 | batch_transforms:
10 | - BatchRandomResize: {target_size: [480, 512, 544, 576, 608, 640, 640, 640, 672, 704, 736, 768, 800], random_size: True, random_interp: True, keep_ratio: False}
11 | - NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none}
12 | - NormalizeBox: {}
13 | - BboxXYXY2XYWH: {}
14 | - Permute: {}
15 | batch_size: 4
16 | shuffle: true
17 | drop_last: true
18 | collate_batch: false
19 | use_shared_memory: false
20 |
21 |
22 | EvalReader:
23 | sample_transforms:
24 | - Decode: {}
25 | - Resize: {target_size: [640, 640], keep_ratio: False, interp: 2} # target_size: (h, w)
26 | - NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none}
27 | - Permute: {}
28 | batch_size: 4
29 | shuffle: false
30 | drop_last: false
31 |
32 |
33 | TestReader:
34 | inputs_def:
35 | image_shape: [3, 640, 640]
36 | sample_transforms:
37 | - Decode: {}
38 | - Resize: {target_size: [640, 640], keep_ratio: False, interp: 2}
39 | - NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none}
40 | - Permute: {}
41 | batch_size: 1
42 | shuffle: false
43 | drop_last: false
44 |
--------------------------------------------------------------------------------
/rtdetr_pytorch/src/zoo/rtdetr/rtdetr.py:
--------------------------------------------------------------------------------
1 | """by lyuwenyu
2 | """
3 |
4 | import torch
5 | import torch.nn as nn
6 | import torch.nn.functional as F
7 |
8 | import random
9 | import numpy as np
10 |
11 | from src.core import register
12 |
13 | __all__ = ['RTDETR', ]
14 |
15 |
16 | # 主模型,很少的代码
17 | @register
18 | class RTDETR(nn.Module):
19 | __inject__ = ['backbone', 'encoder', 'decoder', ]
20 |
21 | def __init__(self, backbone: nn.Module, encoder, decoder, multi_scale=None):
22 | super().__init__()
23 | self.backbone = backbone
24 | self.decoder = decoder
25 | self.encoder = encoder
26 | # 图像的多种尺寸 [480, 512, 544, 576, 608, 640, 640, 640, 672, 704, 736, 768, 800]
27 | self.multi_scale = multi_scale
28 |
29 | def forward(self, x, targets=None):
30 | # 随机选择一种图像尺寸,对x(输入的图像)进行插值,进行缩放
31 | if self.multi_scale and self.training:
32 | sz = np.random.choice(self.multi_scale)
33 | x = F.interpolate(x, size=[sz, sz])
34 | # 经过backbone
35 | x = self.backbone(x)
36 | # 经过encoder, HybridEncoder
37 | x = self.encoder(x)
38 | # 经过decoder, RTDETRTransformer (这里其实只有正常意义上的decoder)
39 | x = self.decoder(x, targets)
40 |
41 | return x
42 |
43 | def deploy(self, ):
44 | self.eval()
45 | for m in self.modules():
46 | if hasattr(m, 'convert_to_deploy'):
47 | m.convert_to_deploy()
48 | return self
49 |
--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/optimizer/utils.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | from __future__ import absolute_import
16 | from __future__ import division
17 | from __future__ import print_function
18 |
19 | import paddle.nn as nn
20 |
21 | from typing import List
22 |
23 |
24 | def get_bn_running_state_names(model: nn.Layer) -> List[str]:
25 | """Get all bn state full names including running mean and variance
26 | """
27 | names = []
28 | for n, m in model.named_sublayers():
29 | if isinstance(m, (nn.BatchNorm2D, nn.SyncBatchNorm)):
30 | assert hasattr(m, '_mean'), f'assert {m} has _mean'
31 | assert hasattr(m, '_variance'), f'assert {m} has _variance'
32 | running_mean = f'{n}._mean'
33 | running_var = f'{n}._variance'
34 | names.extend([running_mean, running_var])
35 |
36 | return names
37 |
--------------------------------------------------------------------------------
/benchmark/README.md:
--------------------------------------------------------------------------------
1 | # 论文测速使用的部分代码和工具
2 |
3 |
4 | ## 测试YOLO系列的速度 [in progress]
5 | 以[yolov8](https://github.com/ultralytics/ultralytics)为例
6 |
7 |
8 | 1. 转onnx
9 |
10 | 执行`yolov8_onnx.py`中的`export_onnx`函数,新增代码主要涉及输出格式的转换
11 |
12 |
13 |
14 |
15 | 2. 插入nms
16 |
17 | 使用`utils.py`中的`yolo_insert_nms`函数,导出onnx模型后使用[Netron](https://netron.app/)查看结构.
18 |
19 |
20 |
21 |
22 | 3. 转tensorrt
23 |
24 | 可以使用`trtexec.md`中的的脚本转换,或者使用`utils.py`中的Python代码转换
25 | ```bash
26 | # trtexec -h
27 | trtexec --onnx=./yolov8l_w_nms.onnx --saveEngine=yolov8l_w_nms.engine --buildOnly --fp16
28 | ```
29 |
30 |
31 |
32 |
33 | 4. trtexec测速
34 |
35 | 可以使用`trtexec.md`中的的脚本转换,去掉`--buildOnly`参数
36 |
37 |
38 |
39 |
40 |
41 |
42 | 5. profile分析(可选)
43 |
44 | 在4的基础之上加以下命令
45 | ```bash
46 | nsys profile --force-overwrite=true -t 'nvtx,cuda,osrt,cudnn' -c cudaProfilerApi -o yolov8l_w_nms
47 | ```
48 | 可以使用nsys可视化分析
49 |
50 |
51 |
52 |
53 |
54 |
55 | 6. Python测速或者部署
56 |
57 | 在Coco val数据集上测模型的平均速度使用`trtinfer.py`中的代码推理
58 |
59 |
60 |
--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/modeling/cls_utils.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 |
16 | def _get_class_default_kwargs(cls, *args, **kwargs):
17 | """
18 | Get default arguments of a class in dict format, if args and
19 | kwargs is specified, it will replace default arguments
20 | """
21 | varnames = cls.__init__.__code__.co_varnames
22 | argcount = cls.__init__.__code__.co_argcount
23 | keys = varnames[:argcount]
24 | assert keys[0] == 'self'
25 | keys = keys[1:]
26 |
27 | values = list(cls.__init__.__defaults__)
28 | assert len(values) == len(keys)
29 |
30 | if len(args) > 0:
31 | for i, arg in enumerate(args):
32 | values[i] = arg
33 |
34 | default_kwargs = dict(zip(keys, values))
35 |
36 | if len(kwargs) > 0:
37 | for k, v in kwargs.items():
38 | default_kwargs[k] = v
39 |
40 | return default_kwargs
41 |
--------------------------------------------------------------------------------
/rtdetr_pytorch/temp.py:
--------------------------------------------------------------------------------
1 | import json
2 | from collections import defaultdict
3 |
4 | new_file = '/mnt/h/ml_dataset_home/coco/annotations/instances_train2017_remove_1021.json'
5 |
6 | with open('/mnt/h/ml_dataset_home/coco/annotations/instances_train2017.json') as file:
7 | data = json.load(file)
8 | origin = data.copy()
9 | images = data['images']
10 | print("原始images 数量 {}".format(len(images)))
11 | annos = data['annotations']
12 | no_anno = []
13 | id_anno = defaultdict(list)
14 | for anno in annos:
15 | id = anno['image_id']
16 | id_anno[id].append(anno)
17 | bbox = anno.get('bbox', None)
18 | if bbox is None:
19 | print("anno {} no bbox".format(anno['id']))
20 | else:
21 | if len(bbox) == 0:
22 | print("anno {} no bbox".format(anno['id']))
23 |
24 |
25 | valid_image = []
26 | invalid_image_id = []
27 | for image in images:
28 | id = image['id']
29 | if id in id_anno:
30 | valid_image.append(image)
31 | else:
32 | invalid_image_id.append(id)
33 | print("invalid images: {}".format(len(invalid_image_id)))
34 |
35 | data['images'] = valid_image
36 |
37 | print(len(data['images']))
38 |
39 | # with open(new_file, 'w') as newf:
40 | # json.dump(data, newf, indent=4)
41 |
42 | print("新文件制作完成")
43 |
44 | # Loaded 118287 images in COCO format from /mnt/h/ml_dataset_home/coco/annotations/instances_train2017.json
45 | # Removed 1021 images with no usable annotations. 117266 images left.
46 |
--------------------------------------------------------------------------------
/rtdetr_paddle/configs/rtdetr/_base_/rtdetr_r50vd.yml:
--------------------------------------------------------------------------------
1 | architecture: DETR
2 | pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_ssld_v2_pretrained.pdparams
3 | norm_type: sync_bn
4 | use_ema: True
5 | ema_decay: 0.9999
6 | ema_decay_type: "exponential"
7 | ema_filter_no_grad: True
8 | hidden_dim: 256
9 | use_focal_loss: True
10 | eval_size: [640, 640] # h, w
11 |
12 |
13 | DETR:
14 | backbone: ResNet
15 | neck: HybridEncoder
16 | transformer: RTDETRTransformer
17 | detr_head: DINOHead
18 | post_process: DETRPostProcess
19 |
20 | ResNet:
21 | # index 0 stands for res2
22 | depth: 50
23 | variant: d
24 | norm_type: bn
25 | freeze_at: 0
26 | return_idx: [1, 2, 3]
27 | lr_mult_list: [0.1, 0.1, 0.1, 0.1]
28 | num_stages: 4
29 | freeze_stem_only: True
30 |
31 | HybridEncoder:
32 | hidden_dim: 256
33 | use_encoder_idx: [2]
34 | num_encoder_layers: 1
35 | encoder_layer:
36 | name: TransformerLayer
37 | d_model: 256
38 | nhead: 8
39 | dim_feedforward: 1024
40 | dropout: 0.
41 | activation: 'gelu'
42 | expansion: 1.0
43 |
44 |
45 | RTDETRTransformer:
46 | num_queries: 300
47 | position_embed_type: sine
48 | feat_strides: [8, 16, 32]
49 | num_levels: 3
50 | nhead: 8
51 | num_decoder_layers: 6
52 | dim_feedforward: 1024
53 | dropout: 0.0
54 | activation: relu
55 | num_denoising: 100
56 | label_noise_ratio: 0.5
57 | box_noise_scale: 1.0
58 | learnt_init_query: False
59 |
60 | DINOHead:
61 | loss:
62 | name: DINOLoss
63 | loss_coeff: {class: 1, bbox: 5, giou: 2}
64 | aux_loss: True
65 | use_vfl: True
66 | matcher:
67 | name: HungarianMatcher
68 | matcher_coeff: {class: 2, bbox: 5, giou: 2}
69 |
70 | DETRPostProcess:
71 | num_top_queries: 300
72 |
--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/engine/env.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | from __future__ import absolute_import
16 | from __future__ import division
17 | from __future__ import print_function
18 |
19 | import os
20 | import random
21 | import numpy as np
22 |
23 | import paddle
24 | from paddle.distributed import fleet
25 |
26 | __all__ = ['init_parallel_env', 'set_random_seed', 'init_fleet_env']
27 |
28 |
29 | def init_fleet_env(find_unused_parameters=False):
30 | strategy = fleet.DistributedStrategy()
31 | strategy.find_unused_parameters = find_unused_parameters
32 | fleet.init(is_collective=True, strategy=strategy)
33 |
34 |
35 | def init_parallel_env():
36 | env = os.environ
37 | dist = 'PADDLE_TRAINER_ID' in env and 'PADDLE_TRAINERS_NUM' in env
38 | if dist:
39 | trainer_id = int(env['PADDLE_TRAINER_ID'])
40 | local_seed = (99 + trainer_id)
41 | random.seed(local_seed)
42 | np.random.seed(local_seed)
43 |
44 | paddle.distributed.init_parallel_env()
45 |
46 |
47 | def set_random_seed(seed):
48 | paddle.seed(seed)
49 | random.seed(seed)
50 | np.random.seed(seed)
51 |
--------------------------------------------------------------------------------
/rtdetr_pytorch/configs/rtdetr/include/rtdetr_r50vd.yml:
--------------------------------------------------------------------------------
1 | task: detection
2 |
3 | model: RTDETR
4 | criterion: SetCriterion
5 | postprocessor: RTDETRPostProcessor
6 |
7 |
8 | RTDETR:
9 | backbone: PResNet
10 | encoder: HybridEncoder
11 | decoder: RTDETRTransformer
12 | multi_scale: [ 480, 512, 544, 576, 608, 640, 640, 640, 672, 704, 736, 768, 800 ]
13 |
14 | # backbone
15 | PResNet:
16 | depth: 50
17 | variant: d
18 | freeze_at: 0
19 | return_idx: [ 1, 2, 3 ]
20 | num_stages: 4
21 | freeze_norm: True
22 | pretrained: True
23 |
24 | # encoder
25 | HybridEncoder:
26 | in_channels: [ 512, 1024, 2048 ]
27 | feat_strides: [ 8, 16, 32 ]
28 |
29 | # intra
30 | hidden_dim: 256
31 | use_encoder_idx: [ 2 ]
32 | num_encoder_layers: 1
33 | nhead: 8
34 | dim_feedforward: 1024
35 | dropout: 0.
36 | enc_act: 'gelu'
37 | pe_temperature: 10000
38 |
39 | # cross
40 | expansion: 1.0
41 | depth_mult: 1
42 | act: 'silu'
43 |
44 | # eval
45 | eval_spatial_size: [ 640, 640 ]
46 |
47 | # decoder
48 | RTDETRTransformer:
49 | feat_channels: [ 256, 256, 256 ]
50 | feat_strides: [ 8, 16, 32 ]
51 | hidden_dim: 256
52 | num_levels: 3
53 |
54 | num_queries: 300
55 |
56 | num_decoder_layers: 6
57 | num_denoising: 100
58 |
59 | eval_idx: -1
60 | eval_spatial_size: [ 640, 640 ]
61 |
62 |
63 | use_focal_loss: True
64 |
65 | RTDETRPostProcessor:
66 | num_top_queries: 300
67 |
68 |
69 | SetCriterion:
70 | weight_dict: {loss_vfl: 1, loss_bbox: 5, loss_giou: 2,}
71 | losses: ['vfl', 'boxes', ]
72 | alpha: 0.75
73 | gamma: 2.0
74 |
75 | matcher:
76 | type: HungarianMatcher
77 | weight_dict: { cost_class: 2, cost_bbox: 5, cost_giou: 2 }
78 | # use_focal_loss: True
79 | alpha: 0.25
80 | gamma: 2.0
81 |
82 |
83 |
84 |
--------------------------------------------------------------------------------
/rtdetr_pytorch/src/nn/backbone/utils.py:
--------------------------------------------------------------------------------
1 | """
2 | https://github.com/pytorch/vision/blob/main/torchvision/models/_utils.py
3 |
4 | by lyuwenyu
5 | """
6 |
7 | from collections import OrderedDict
8 | from typing import Dict, List
9 |
10 |
11 | import torch.nn as nn
12 |
13 |
14 | class IntermediateLayerGetter(nn.ModuleDict):
15 | """
16 | Module wrapper that returns intermediate layers from a model
17 |
18 | It has a strong assumption that the modules have been registered
19 | into the model in the same order as they are used.
20 | This means that one should **not** reuse the same nn.Module
21 | twice in the forward if you want this to work.
22 |
23 | Additionally, it is only able to query submodules that are directly
24 | assigned to the model. So if `model` is passed, `model.feature1` can
25 | be returned, but not `model.feature1.layer2`.
26 | """
27 |
28 | _version = 3
29 |
30 | def __init__(self, model: nn.Module, return_layers: List[str]) -> None:
31 | if not set(return_layers).issubset([name for name, _ in model.named_children()]):
32 | raise ValueError("return_layers are not present in model. {}"\
33 | .format([name for name, _ in model.named_children()]))
34 | orig_return_layers = return_layers
35 | return_layers = {str(k): str(k) for k in return_layers}
36 | layers = OrderedDict()
37 | for name, module in model.named_children():
38 | layers[name] = module
39 | if name in return_layers:
40 | del return_layers[name]
41 | if not return_layers:
42 | break
43 |
44 | super().__init__(layers)
45 | self.return_layers = orig_return_layers
46 |
47 | def forward(self, x):
48 | # out = OrderedDict()
49 | outputs = []
50 | for name, module in self.items():
51 | x = module(x)
52 | if name in self.return_layers:
53 | # out_name = self.return_layers[name]
54 | # out[out_name] = x
55 | outputs.append(x)
56 |
57 | return outputs
58 |
59 |
--------------------------------------------------------------------------------
/rtdetr_paddle/tools/slice_image.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | import argparse
16 | from tqdm import tqdm
17 |
18 |
19 | def slice_data(image_dir, dataset_json_path, output_dir, slice_size,
20 | overlap_ratio):
21 | try:
22 | from sahi.scripts.slice_coco import slice
23 | except Exception as e:
24 | raise RuntimeError(
25 | 'Unable to use sahi to slice images, please install sahi, for example: `pip install sahi`, see https://github.com/obss/sahi'
26 | )
27 | tqdm.write(
28 | f" slicing for slice_size={slice_size}, overlap_ratio={overlap_ratio}")
29 | slice(
30 | image_dir=image_dir,
31 | dataset_json_path=dataset_json_path,
32 | output_dir=output_dir,
33 | slice_size=slice_size,
34 | overlap_ratio=overlap_ratio, )
35 |
36 |
37 | def main():
38 | parser = argparse.ArgumentParser()
39 | parser.add_argument(
40 | '--image_dir', type=str, default=None, help="The image folder path.")
41 | parser.add_argument(
42 | '--json_path', type=str, default=None, help="Dataset json path.")
43 | parser.add_argument(
44 | '--output_dir', type=str, default=None, help="Output dir.")
45 | parser.add_argument(
46 | '--slice_size', type=int, default=500, help="slice_size")
47 | parser.add_argument(
48 | '--overlap_ratio', type=float, default=0.25, help="overlap_ratio")
49 | args = parser.parse_args()
50 |
51 | slice_data(args.image_dir, args.json_path, args.output_dir, args.slice_size,
52 | args.overlap_ratio)
53 |
54 |
55 | if __name__ == "__main__":
56 | main()
57 |
--------------------------------------------------------------------------------
/benchmark/yolov8_onnx.py:
--------------------------------------------------------------------------------
1 | '''by lyuwenyu
2 | '''
3 |
4 | import torch
5 | import torchvision
6 |
7 | import numpy as np
8 | import onnxruntime as ort
9 |
10 | from utils import yolo_insert_nms
11 |
12 | class YOLOv8(torch.nn.Module):
13 | def __init__(self, name) -> None:
14 | super().__init__()
15 | from ultralytics import YOLO
16 | # Load a model
17 | # build a new model from scratch
18 | # model = YOLO(f'{name}.yaml')
19 |
20 | # load a pretrained model (recommended for training)
21 | model = YOLO(f'{name}.pt')
22 | self.model = model.model
23 |
24 | def forward(self, x):
25 | '''https://github.com/ultralytics/ultralytics/blob/main/ultralytics/nn/tasks.py#L216
26 | '''
27 | pred: torch.Tensor = self.model(x)[0] # n 84 8400,
28 | pred = pred.permute(0, 2, 1)
29 | boxes, scores = pred.split([4, 80], dim=-1)
30 | boxes = torchvision.ops.box_convert(boxes, in_fmt='cxcywh', out_fmt='xyxy')
31 |
32 | return boxes, scores
33 |
34 |
35 |
36 | def export_onnx(name='yolov8n'):
37 | '''export onnx
38 | '''
39 | m = YOLOv8(name)
40 |
41 | x = torch.rand(1, 3, 640, 640)
42 | dynamic_axes = {
43 | 'image': {0: '-1'}
44 | }
45 | torch.onnx.export(m, x, f'{name}.onnx',
46 | input_names=['image'],
47 | output_names=['boxes', 'scores'],
48 | opset_version=13,
49 | dynamic_axes=dynamic_axes)
50 |
51 | data = np.random.rand(1, 3, 640, 640).astype(np.float32)
52 | sess = ort.InferenceSession(f'{name}.onnx')
53 | _ = sess.run(output_names=None, input_feed={'image': data})
54 |
55 |
56 | if __name__ == '__main__':
57 |
58 | import argparse
59 | parser = argparse.ArgumentParser()
60 | parser.add_argument('--name', type=str, default='yolov8l')
61 | parser.add_argument('--score_threshold', type=float, default=0.001)
62 | parser.add_argument('--iou_threshold', type=float, default=0.7)
63 | parser.add_argument('--max_output_boxes', type=int, default=300)
64 | args = parser.parse_args()
65 |
66 | export_onnx(name=args.name)
67 |
68 | yolo_insert_nms(path=f'{args.name}.onnx',
69 | score_threshold=args.score_threshold,
70 | iou_threshold=args.iou_threshold,
71 | max_output_boxes=args.max_output_boxes, )
72 |
73 |
--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/modeling/losses/smooth_l1_loss.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | from __future__ import absolute_import
16 | from __future__ import division
17 | from __future__ import print_function
18 |
19 | import paddle
20 | import paddle.nn as nn
21 | import paddle.nn.functional as F
22 | from ppdet.core.workspace import register
23 |
24 | __all__ = ['SmoothL1Loss']
25 |
26 | @register
27 | class SmoothL1Loss(nn.Layer):
28 | """Smooth L1 Loss.
29 | Args:
30 | beta (float): controls smooth region, it becomes L1 Loss when beta=0.0
31 | loss_weight (float): the final loss will be multiplied by this
32 | """
33 | def __init__(self,
34 | beta=1.0,
35 | loss_weight=1.0):
36 | super(SmoothL1Loss, self).__init__()
37 | assert beta >= 0
38 | self.beta = beta
39 | self.loss_weight = loss_weight
40 |
41 | def forward(self, pred, target, reduction='none'):
42 | """forward function, based on fvcore.
43 | Args:
44 | pred (Tensor): prediction tensor
45 | target (Tensor): target tensor, pred.shape must be the same as target.shape
46 | reduction (str): the way to reduce loss, one of (none, sum, mean)
47 | """
48 | assert reduction in ('none', 'sum', 'mean')
49 | target = target.detach()
50 | if self.beta < 1e-5:
51 | loss = paddle.abs(pred - target)
52 | else:
53 | n = paddle.abs(pred - target)
54 | cond = n < self.beta
55 | loss = paddle.where(cond, 0.5 * n ** 2 / self.beta, n - 0.5 * self.beta)
56 | if reduction == 'mean':
57 | loss = loss.mean() if loss.size > 0 else 0.0 * loss.sum()
58 | elif reduction == 'sum':
59 | loss = loss.sum()
60 | return loss * self.loss_weight
61 |
--------------------------------------------------------------------------------
/README_cn.md:
--------------------------------------------------------------------------------
1 | 简体中文 | [English](README.md)
2 |
3 | # RT-DETR
4 |
5 | This is the official implementation of the paper "[DETRs Beat YOLOs on Real-time Object Detection](https://arxiv.org/abs/2304.08069)".
6 |
7 |
8 |

9 |
10 |
11 | ## 最新动态
12 |
13 | - 发布RT-DETR-R50, RT-DETR-R101模型
14 | - 发布RT-DETR-R50-m模型(scale模型的范例)
15 | - 发布RT-DETR-R34, RT-DETR-R18模型
16 | - 发布RT-DETR-L, RT-DETR-X模型
17 |
18 |
19 | ## 代码仓库
20 | - [RT-DETR-paddle](./rtdetr_paddle)
21 | - [RT-DETR--pytorch](./rtdetr_pytorch)
22 |
23 |
24 | ## 简介
25 |
26 | RT-DETR是第一个实时端到端目标检测器。具体而言,我们设计了一个高效的混合编码器,通过解耦尺度内交互和跨尺度融合来高效处理多尺度特征,并提出了IoU感知的查询选择机制,以优化解码器查询的初始化。此外,RT-DETR支持通过使用不同的解码器层来灵活调整推理速度,而不需要重新训练,这有助于实时目标检测器的实际应用。RT-DETR-R50在COCO val2017上实现了53.1%的AP,在T4 GPU上实现了108FPS,RT-DETR-R101实现了54.3%的AP和74FPS,在速度和精度方面都优于相同规模的所有YOLO检测器。使用Objects365预训练之后, RT-DETR-R50 和 RT-DETR-R101 分别实现了 55.3% 和 56.2% AP的精度.
27 | 若要了解更多细节,请参考我们的论文[paper](https://arxiv.org/abs/2304.08069).
28 |
29 |
30 |

31 |
32 |
33 | ## 引用RT-DETR
34 | 如果需要在你的研究中使用RT-DETR,请通过以下方式引用我们的论文:
35 | ```
36 | @misc{lv2023detrs,
37 | title={DETRs Beat YOLOs on Real-time Object Detection},
38 | author={Yian Zhao and Wenyu Lv and Shangliang Xu and Jinman Wei and Guanzhong Wang and Qingqing Dang and Yi Liu and Jie Chen},
39 | year={2023},
40 | eprint={2304.08069},
41 | archivePrefix={arXiv},
42 | primaryClass={cs.CV}
43 | }
44 | ```
45 |
--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/utils/logger.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | import logging
16 | import os
17 | import sys
18 |
19 | import paddle.distributed as dist
20 |
21 | __all__ = ['setup_logger']
22 |
23 | logger_initialized = []
24 |
25 |
26 | def setup_logger(name="ppdet", output=None):
27 | """
28 | Initialize logger and set its verbosity level to INFO.
29 | Args:
30 | output (str): a file name or a directory to save log. If None, will not save log file.
31 | If ends with ".txt" or ".log", assumed to be a file name.
32 | Otherwise, logs will be saved to `output/log.txt`.
33 | name (str): the root module name of this logger
34 |
35 | Returns:
36 | logging.Logger: a logger
37 | """
38 | logger = logging.getLogger(name)
39 | if name in logger_initialized:
40 | return logger
41 |
42 | logger.setLevel(logging.INFO)
43 | logger.propagate = False
44 |
45 | formatter = logging.Formatter(
46 | "[%(asctime)s] %(name)s %(levelname)s: %(message)s",
47 | datefmt="%m/%d %H:%M:%S")
48 | # stdout logging: master only
49 | local_rank = dist.get_rank()
50 | if local_rank == 0:
51 | ch = logging.StreamHandler(stream=sys.stdout)
52 | ch.setLevel(logging.DEBUG)
53 | ch.setFormatter(formatter)
54 | logger.addHandler(ch)
55 |
56 | # file logging: all workers
57 | if output is not None:
58 | if output.endswith(".txt") or output.endswith(".log"):
59 | filename = output
60 | else:
61 | filename = os.path.join(output, "log.txt")
62 | if local_rank > 0:
63 | filename = filename + ".rank{}".format(local_rank)
64 | os.makedirs(os.path.dirname(filename))
65 | fh = logging.FileHandler(filename, mode='a')
66 | fh.setLevel(logging.DEBUG)
67 | fh.setFormatter(logging.Formatter())
68 | logger.addHandler(fh)
69 | logger_initialized.append(name)
70 | return logger
71 |
--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/modeling/backbones/name_adapter.py:
--------------------------------------------------------------------------------
1 | class NameAdapter(object):
2 | """Fix the backbones variable names for pretrained weight"""
3 |
4 | def __init__(self, model):
5 | super(NameAdapter, self).__init__()
6 | self.model = model
7 |
8 | @property
9 | def model_type(self):
10 | return getattr(self.model, '_model_type', '')
11 |
12 | @property
13 | def variant(self):
14 | return getattr(self.model, 'variant', '')
15 |
16 | def fix_conv_norm_name(self, name):
17 | if name == "conv1":
18 | bn_name = "bn_" + name
19 | else:
20 | bn_name = "bn" + name[3:]
21 | # the naming rule is same as pretrained weight
22 | if self.model_type == 'SEResNeXt':
23 | bn_name = name + "_bn"
24 | return bn_name
25 |
26 | def fix_shortcut_name(self, name):
27 | if self.model_type == 'SEResNeXt':
28 | name = 'conv' + name + '_prj'
29 | return name
30 |
31 | def fix_bottleneck_name(self, name):
32 | if self.model_type == 'SEResNeXt':
33 | conv_name1 = 'conv' + name + '_x1'
34 | conv_name2 = 'conv' + name + '_x2'
35 | conv_name3 = 'conv' + name + '_x3'
36 | shortcut_name = name
37 | else:
38 | conv_name1 = name + "_branch2a"
39 | conv_name2 = name + "_branch2b"
40 | conv_name3 = name + "_branch2c"
41 | shortcut_name = name + "_branch1"
42 | return conv_name1, conv_name2, conv_name3, shortcut_name
43 |
44 | def fix_basicblock_name(self, name):
45 | if self.model_type == 'SEResNeXt':
46 | conv_name1 = 'conv' + name + '_x1'
47 | conv_name2 = 'conv' + name + '_x2'
48 | shortcut_name = name
49 | else:
50 | conv_name1 = name + "_branch2a"
51 | conv_name2 = name + "_branch2b"
52 | shortcut_name = name + "_branch1"
53 | return conv_name1, conv_name2, shortcut_name
54 |
55 | def fix_layer_warp_name(self, stage_num, count, i):
56 | name = 'res' + str(stage_num)
57 | if count > 10 and stage_num == 4:
58 | if i == 0:
59 | conv_name = name + "a"
60 | else:
61 | conv_name = name + "b" + str(i)
62 | else:
63 | conv_name = name + chr(ord("a") + i)
64 | if self.model_type == 'SEResNeXt':
65 | conv_name = str(stage_num + 2) + '_' + str(i + 1)
66 | return conv_name
67 |
68 | def fix_c1_stage_name(self):
69 | return "res_conv1" if self.model_type == 'ResNeXt' else "conv1"
70 |
--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/data/shm_utils.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | import os
16 |
17 | SIZE_UNIT = ['K', 'M', 'G', 'T']
18 | SHM_QUERY_CMD = 'df -h'
19 | SHM_KEY = 'shm'
20 | SHM_DEFAULT_MOUNT = '/dev/shm'
21 |
22 | # [ shared memory size check ]
23 | # In detection models, image/target data occupies a lot of memory, and
24 | # will occupy lots of shared memory in multi-process DataLoader, we use
25 | # following code to get shared memory size and perform a size check to
26 | # disable shared memory use if shared memory size is not enough.
27 | # Shared memory getting process as follows:
28 | # 1. use `df -h` get all mount info
29 | # 2. pick up spaces whose mount info contains 'shm'
30 | # 3. if 'shm' space number is only 1, return its size
31 | # 4. if there are multiple 'shm' space, try to find the default mount
32 | # directory '/dev/shm' is Linux-like system, otherwise return the
33 | # biggest space size.
34 |
35 |
36 | def _parse_size_in_M(size_str):
37 | if size_str[-1] == 'B':
38 | num, unit = size_str[:-2], size_str[-2]
39 | else:
40 | num, unit = size_str[:-1], size_str[-1]
41 | assert unit in SIZE_UNIT, \
42 | "unknown shm size unit {}".format(unit)
43 | return float(num) * \
44 | (1024 ** (SIZE_UNIT.index(unit) - 1))
45 |
46 |
47 | def _get_shared_memory_size_in_M():
48 | try:
49 | df_infos = os.popen(SHM_QUERY_CMD).readlines()
50 | except:
51 | return None
52 | else:
53 | shm_infos = []
54 | for df_info in df_infos:
55 | info = df_info.strip()
56 | if info.find(SHM_KEY) >= 0:
57 | shm_infos.append(info.split())
58 |
59 | if len(shm_infos) == 0:
60 | return None
61 | elif len(shm_infos) == 1:
62 | return _parse_size_in_M(shm_infos[0][3])
63 | else:
64 | default_mount_infos = [
65 | si for si in shm_infos if si[-1] == SHM_DEFAULT_MOUNT
66 | ]
67 | if default_mount_infos:
68 | return _parse_size_in_M(default_mount_infos[0][3])
69 | else:
70 | return max([_parse_size_in_M(si[3]) for si in shm_infos])
71 |
--------------------------------------------------------------------------------
/rtdetr_pytorch/src/zoo/rtdetr/box_ops.py:
--------------------------------------------------------------------------------
1 | '''
2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
3 | https://github.com/facebookresearch/detr/blob/main/util/box_ops.py
4 | '''
5 |
6 | import torch
7 | from torchvision.ops.boxes import box_area
8 |
9 |
10 | def box_cxcywh_to_xyxy(x):
11 | x_c, y_c, w, h = x.unbind(-1)
12 | b = [(x_c - 0.5 * w), (y_c - 0.5 * h),
13 | (x_c + 0.5 * w), (y_c + 0.5 * h)]
14 | return torch.stack(b, dim=-1)
15 |
16 |
17 | def box_xyxy_to_cxcywh(x):
18 | x0, y0, x1, y1 = x.unbind(-1)
19 | b = [(x0 + x1) / 2, (y0 + y1) / 2,
20 | (x1 - x0), (y1 - y0)]
21 | return torch.stack(b, dim=-1)
22 |
23 |
24 | # modified from torchvision to also return the union
25 | def box_iou(boxes1, boxes2):
26 | area1 = box_area(boxes1)
27 | area2 = box_area(boxes2)
28 |
29 | lt = torch.max(boxes1[:, None, :2], boxes2[:, :2]) # [N,M,2]
30 | rb = torch.min(boxes1[:, None, 2:], boxes2[:, 2:]) # [N,M,2]
31 |
32 | wh = (rb - lt).clamp(min=0) # [N,M,2]
33 | inter = wh[:, :, 0] * wh[:, :, 1] # [N,M]
34 |
35 | union = area1[:, None] + area2 - inter
36 |
37 | iou = inter / union
38 | return iou, union
39 |
40 |
41 | def generalized_box_iou(boxes1, boxes2):
42 | """
43 | Generalized IoU from https://giou.stanford.edu/
44 |
45 | The boxes should be in [x0, y0, x1, y1] format
46 |
47 | Returns a [N, M] pairwise matrix, where N = len(boxes1)
48 | and M = len(boxes2)
49 | """
50 | # degenerate boxes gives inf / nan results
51 | # so do an early check
52 | assert (boxes1[:, 2:] >= boxes1[:, :2]).all()
53 | assert (boxes2[:, 2:] >= boxes2[:, :2]).all()
54 | iou, union = box_iou(boxes1, boxes2)
55 |
56 | lt = torch.min(boxes1[:, None, :2], boxes2[:, :2])
57 | rb = torch.max(boxes1[:, None, 2:], boxes2[:, 2:])
58 |
59 | wh = (rb - lt).clamp(min=0) # [N,M,2]
60 | area = wh[:, :, 0] * wh[:, :, 1]
61 |
62 | return iou - (area - union) / area
63 |
64 |
65 | def masks_to_boxes(masks):
66 | """Compute the bounding boxes around the provided masks
67 |
68 | The masks should be in format [N, H, W] where N is the number of masks, (H, W) are the spatial dimensions.
69 |
70 | Returns a [N, 4] tensors, with the boxes in xyxy format
71 | """
72 | if masks.numel() == 0:
73 | return torch.zeros((0, 4), device=masks.device)
74 |
75 | h, w = masks.shape[-2:]
76 |
77 | y = torch.arange(0, h, dtype=torch.float)
78 | x = torch.arange(0, w, dtype=torch.float)
79 | y, x = torch.meshgrid(y, x)
80 |
81 | x_mask = (masks * x.unsqueeze(0))
82 | x_max = x_mask.flatten(1).max(-1)[0]
83 | x_min = x_mask.masked_fill(~(masks.bool()), 1e8).flatten(1).min(-1)[0]
84 |
85 | y_mask = (masks * y.unsqueeze(0))
86 | y_max = y_mask.flatten(1).max(-1)[0]
87 | y_min = y_mask.masked_fill(~(masks.bool()), 1e8).flatten(1).min(-1)[0]
88 |
89 | return torch.stack([x_min, y_min, x_max, y_max], 1)
90 |
--------------------------------------------------------------------------------
/benchmark/utils.py:
--------------------------------------------------------------------------------
1 | '''by lyuwenyu
2 | '''
3 |
4 | import time
5 | import contextlib
6 | import numpy as np
7 | from PIL import Image
8 | from collections import OrderedDict
9 |
10 | import onnx
11 | import torch
12 | import onnx_graphsurgeon
13 |
14 |
15 | def to_binary_data(path, size=(640, 640), output_name='input_tensor.bin'):
16 | '''--loadInputs='image:input_tensor.bin'
17 | '''
18 | im = Image.open(path).resize(size)
19 | data = np.asarray(im, dtype=np.float32).transpose(2, 0, 1)[None] / 255.
20 | data.tofile(output_name)
21 |
22 |
23 | def yolo_insert_nms(path, score_threshold=0.01, iou_threshold=0.7, max_output_boxes=300, simplify=False):
24 | '''
25 | http://www.xavierdupre.fr/app/onnxcustom/helpsphinx/api/onnxops/onnx__EfficientNMS_TRT.html
26 | https://huggingface.co/spaces/muttalib1326/Punjabi_Character_Detection/blob/3dd1e17054c64e5f6b2254278f96cfa2bf418cd4/utils/add_nms.py
27 | '''
28 | onnx_model = onnx.load(path)
29 |
30 | if simplify:
31 | from onnxsim import simplify
32 | onnx_model, _ = simplify(onnx_model, overwrite_input_shapes={'image': [1, 3, 640, 640]})
33 |
34 | graph = onnx_graphsurgeon.import_onnx(onnx_model)
35 | graph.toposort()
36 | graph.fold_constants()
37 | graph.cleanup()
38 |
39 | topk = max_output_boxes
40 | attrs = OrderedDict(plugin_version='1',
41 | background_class=-1,
42 | max_output_boxes=topk,
43 | score_threshold=score_threshold,
44 | iou_threshold=iou_threshold,
45 | score_activation=False,
46 | box_coding=0, )
47 |
48 | outputs = [onnx_graphsurgeon.Variable('num_dets', np.int32, [-1, 1]),
49 | onnx_graphsurgeon.Variable('det_boxes', np.float32, [-1, topk, 4]),
50 | onnx_graphsurgeon.Variable('det_scores', np.float32, [-1, topk]),
51 | onnx_graphsurgeon.Variable('det_classes', np.int32, [-1, topk])]
52 |
53 | graph.layer(op='EfficientNMS_TRT',
54 | name="batched_nms",
55 | inputs=[graph.outputs[0],
56 | graph.outputs[1]],
57 | outputs=outputs,
58 | attrs=attrs, )
59 |
60 | graph.outputs = outputs
61 | graph.cleanup().toposort()
62 |
63 | onnx.save(onnx_graphsurgeon.export_onnx(graph), f'yolo_w_nms.onnx')
64 |
65 |
66 | class TimeProfiler(contextlib.ContextDecorator):
67 | def __init__(self, ):
68 | self.total = 0
69 |
70 | def __enter__(self, ):
71 | self.start = self.time()
72 | return self
73 |
74 | def __exit__(self, type, value, traceback):
75 | self.total += self.time() - self.start
76 |
77 | def reset(self, ):
78 | self.total = 0
79 |
80 | def time(self, ):
81 | if torch.cuda.is_available():
82 | torch.cuda.synchronize()
83 | return time.time()
84 |
--------------------------------------------------------------------------------
/rtdetr_pytorch/src/nn/backbone/test_resnet.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.nn.functional as F
4 |
5 | from collections import OrderedDict
6 |
7 |
8 | from src.core import register
9 |
10 |
11 | class BasicBlock(nn.Module):
12 | expansion = 1
13 |
14 | def __init__(self, in_planes, planes, stride=1):
15 | super(BasicBlock, self).__init__()
16 |
17 | self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
18 | self.bn1 = nn.BatchNorm2d(planes)
19 |
20 | self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,stride=1, padding=1, bias=False)
21 | self.bn2 = nn.BatchNorm2d(planes)
22 |
23 | self.shortcut = nn.Sequential()
24 | if stride != 1 or in_planes != self.expansion*planes:
25 | self.shortcut = nn.Sequential(
26 | nn.Conv2d(in_planes, self.expansion*planes,kernel_size=1, stride=stride, bias=False),
27 | nn.BatchNorm2d(self.expansion*planes)
28 | )
29 | def forward(self, x):
30 | out = F.relu(self.bn1(self.conv1(x)))
31 | out = self.bn2(self.conv2(out))
32 | out += self.shortcut(x)
33 | out = F.relu(out)
34 | return out
35 |
36 |
37 |
38 | class _ResNet(nn.Module):
39 | def __init__(self, block, num_blocks, num_classes=10):
40 | super().__init__()
41 | self.in_planes = 64
42 |
43 | self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
44 | self.bn1 = nn.BatchNorm2d(64)
45 |
46 | self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
47 | self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
48 | self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
49 | self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
50 |
51 | self.linear = nn.Linear(512 * block.expansion, num_classes)
52 |
53 | def _make_layer(self, block, planes, num_blocks, stride):
54 | strides = [stride] + [1]*(num_blocks-1)
55 | layers = []
56 | for stride in strides:
57 | layers.append(block(self.in_planes, planes, stride))
58 | self.in_planes = planes * block.expansion
59 | return nn.Sequential(*layers)
60 |
61 | def forward(self, x):
62 | out = F.relu(self.bn1(self.conv1(x)))
63 | out = self.layer1(out)
64 | out = self.layer2(out)
65 | out = self.layer3(out)
66 | out = self.layer4(out)
67 | out = F.avg_pool2d(out, 4)
68 | out = out.view(out.size(0), -1)
69 | out = self.linear(out)
70 | return out
71 |
72 |
73 | @register
74 | class MResNet(nn.Module):
75 | def __init__(self, num_classes=10, num_blocks=[2, 2, 2, 2]) -> None:
76 | super().__init__()
77 | self.model = _ResNet(BasicBlock, num_blocks, num_classes)
78 |
79 | def forward(self, x):
80 | return self.model(x)
81 |
82 |
--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/utils/stats.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | import collections
16 | import numpy as np
17 |
18 | __all__ = ['SmoothedValue', 'TrainingStats']
19 |
20 |
21 | class SmoothedValue(object):
22 | """Track a series of values and provide access to smoothed values over a
23 | window or the global series average.
24 | """
25 |
26 | def __init__(self, window_size=20, fmt=None):
27 | if fmt is None:
28 | fmt = "{median:.4f} ({avg:.4f})"
29 | self.deque = collections.deque(maxlen=window_size)
30 | self.fmt = fmt
31 | self.total = 0.
32 | self.count = 0
33 |
34 | def update(self, value, n=1):
35 | self.deque.append(value)
36 | self.count += n
37 | self.total += value * n
38 |
39 | @property
40 | def median(self):
41 | return np.median(self.deque)
42 |
43 | @property
44 | def avg(self):
45 | return np.mean(self.deque)
46 |
47 | @property
48 | def max(self):
49 | return np.max(self.deque)
50 |
51 | @property
52 | def value(self):
53 | return self.deque[-1]
54 |
55 | @property
56 | def global_avg(self):
57 | return self.total / self.count
58 |
59 | def __str__(self):
60 | return self.fmt.format(
61 | median=self.median, avg=self.avg, max=self.max, value=self.value)
62 |
63 |
64 | class TrainingStats(object):
65 | def __init__(self, window_size, delimiter=' '):
66 | self.meters = None
67 | self.window_size = window_size
68 | self.delimiter = delimiter
69 |
70 | def update(self, stats):
71 | if self.meters is None:
72 | self.meters = {
73 | k: SmoothedValue(self.window_size)
74 | for k in stats.keys()
75 | }
76 | for k, v in self.meters.items():
77 | v.update(float(stats[k]))
78 |
79 | def get(self, extras=None):
80 | stats = collections.OrderedDict()
81 | if extras:
82 | for k, v in extras.items():
83 | stats[k] = v
84 | for k, v in self.meters.items():
85 | stats[k] = format(v.median, '.6f')
86 |
87 | return stats
88 |
89 | def log(self, extras=None):
90 | d = self.get(extras)
91 | strs = []
92 | for k, v in d.items():
93 | strs.append("{}: {}".format(k, str(v)))
94 | return self.delimiter.join(strs)
95 |
--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/data/utils.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | import numbers
16 | import numpy as np
17 |
18 | try:
19 | from collections.abc import Sequence, Mapping
20 | except:
21 | from collections import Sequence, Mapping
22 |
23 |
24 | def default_collate_fn(batch):
25 | """
26 | Default batch collating function for :code:`paddle.io.DataLoader`,
27 | get input data as a list of sample datas, each element in list
28 | if the data of a sample, and sample data should composed of list,
29 | dictionary, string, number, numpy array, this
30 | function will parse input data recursively and stack number,
31 | numpy array and paddle.Tensor datas as batch datas. e.g. for
32 | following input data:
33 | [{'image': np.array(shape=[3, 224, 224]), 'label': 1},
34 | {'image': np.array(shape=[3, 224, 224]), 'label': 3},
35 | {'image': np.array(shape=[3, 224, 224]), 'label': 4},
36 | {'image': np.array(shape=[3, 224, 224]), 'label': 5},]
37 |
38 |
39 | This default collate function zipped each number and numpy array
40 | field together and stack each field as the batch field as follows:
41 | {'image': np.array(shape=[4, 3, 224, 224]), 'label': np.array([1, 3, 4, 5])}
42 | Args:
43 | batch(list of sample data): batch should be a list of sample data.
44 |
45 | Returns:
46 | Batched data: batched each number, numpy array and paddle.Tensor
47 | in input data.
48 | """
49 | sample = batch[0]
50 | if isinstance(sample, np.ndarray):
51 | batch = np.stack(batch, axis=0)
52 | return batch
53 | elif isinstance(sample, numbers.Number):
54 | batch = np.array(batch)
55 | return batch
56 | elif isinstance(sample, (str, bytes)):
57 | return batch
58 | elif isinstance(sample, Mapping):
59 | return {
60 | key: default_collate_fn([d[key] for d in batch])
61 | for key in sample
62 | }
63 | elif isinstance(sample, Sequence):
64 | sample_fields_num = len(sample)
65 | if not all(len(sample) == sample_fields_num for sample in iter(batch)):
66 | raise RuntimeError(
67 | "fileds number not same among samples in a batch")
68 | return [default_collate_fn(fields) for fields in zip(*batch)]
69 |
70 | raise TypeError("batch data con only contains: tensor, numpy.ndarray, "
71 | "dict, list, number, but got {}".format(type(sample)))
72 |
--------------------------------------------------------------------------------
/rtdetr_pytorch/src/zoo/rtdetr/rtdetr_postprocessor.py:
--------------------------------------------------------------------------------
1 | """by lyuwenyu
2 | """
3 |
4 | import torch
5 | import torch.nn as nn
6 | import torch.nn.functional as F
7 |
8 | import torchvision
9 |
10 | from src.core import register
11 |
12 | __all__ = ['RTDETRPostProcessor']
13 |
14 |
15 | @register
16 | class RTDETRPostProcessor(nn.Module):
17 | __share__ = ['num_classes', 'use_focal_loss', 'num_top_queries', 'remap_mscoco_category']
18 |
19 | def __init__(self, num_classes=80, use_focal_loss=True, num_top_queries=300, remap_mscoco_category=False) -> None:
20 | super().__init__()
21 | self.use_focal_loss = use_focal_loss
22 | self.num_top_queries = num_top_queries
23 | self.num_classes = num_classes
24 | self.remap_mscoco_category = remap_mscoco_category
25 | self.deploy_mode = False
26 |
27 | def extra_repr(self) -> str:
28 | return f'use_focal_loss={self.use_focal_loss}, num_classes={self.num_classes}, num_top_queries={self.num_top_queries}'
29 |
30 | # def forward(self, outputs, orig_target_sizes):
31 | def forward(self, outputs, orig_target_sizes):
32 |
33 | logits, boxes = outputs['pred_logits'], outputs['pred_boxes']
34 | # orig_target_sizes = torch.stack([t["orig_size"] for t in targets], dim=0)
35 |
36 | bbox_pred = torchvision.ops.box_convert(boxes, in_fmt='cxcywh', out_fmt='xyxy')
37 | bbox_pred *= orig_target_sizes.repeat(1, 2).unsqueeze(1)
38 |
39 | if self.use_focal_loss:
40 | scores = F.sigmoid(logits)
41 | scores, index = torch.topk(scores.flatten(1), self.num_top_queries, axis=-1)
42 | labels = index % self.num_classes
43 | index = index // self.num_classes
44 | boxes = bbox_pred.gather(dim=1, index=index.unsqueeze(-1).repeat(1, 1, bbox_pred.shape[-1]))
45 |
46 | else:
47 | scores = F.softmax(logits)[:, :, :-1]
48 | scores, labels = scores.max(dim=-1)
49 | if scores.shape[1] > self.num_top_queries:
50 | scores, index = torch.topk(scores, self.num_top_queries, dim=-1)
51 | labels = torch.gather(labels, dim=1, index=index)
52 | boxes = torch.gather(boxes, dim=1, index=index.unsqueeze(-1).tile(1, 1, boxes.shape[-1]))
53 |
54 | # TODO for onnx export
55 | if self.deploy_mode:
56 | return labels, boxes, scores
57 |
58 | # TODO
59 | if self.remap_mscoco_category:
60 | from ...data.coco import mscoco_label2category
61 | labels = torch.tensor([mscoco_label2category[int(x.item())] for x in labels.flatten()])\
62 | .to(boxes.device).reshape(labels.shape)
63 |
64 | results = []
65 | for lab, box, sco in zip(labels, boxes, scores):
66 | result = dict(labels=lab, boxes=box, scores=sco)
67 | results.append(result)
68 |
69 | return results
70 |
71 | def deploy(self, ):
72 | self.eval()
73 | self.deploy_mode = True
74 | return self
75 |
76 | @property
77 | def iou_types(self, ):
78 | return ('bbox',)
79 |
--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/utils/colormap.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | from __future__ import absolute_import
16 | from __future__ import division
17 | from __future__ import print_function
18 | from __future__ import unicode_literals
19 |
20 | import numpy as np
21 |
22 |
23 | def colormap(rgb=False):
24 | """
25 | Get colormap
26 |
27 | The code of this function is copied from https://github.com/facebookresearch/Detectron/blob/main/detectron/utils/colormap.py
28 | """
29 | color_list = np.array([
30 | 0.000, 0.447, 0.741, 0.850, 0.325, 0.098, 0.929, 0.694, 0.125, 0.494,
31 | 0.184, 0.556, 0.466, 0.674, 0.188, 0.301, 0.745, 0.933, 0.635, 0.078,
32 | 0.184, 0.300, 0.300, 0.300, 0.600, 0.600, 0.600, 1.000, 0.000, 0.000,
33 | 1.000, 0.500, 0.000, 0.749, 0.749, 0.000, 0.000, 1.000, 0.000, 0.000,
34 | 0.000, 1.000, 0.667, 0.000, 1.000, 0.333, 0.333, 0.000, 0.333, 0.667,
35 | 0.000, 0.333, 1.000, 0.000, 0.667, 0.333, 0.000, 0.667, 0.667, 0.000,
36 | 0.667, 1.000, 0.000, 1.000, 0.333, 0.000, 1.000, 0.667, 0.000, 1.000,
37 | 1.000, 0.000, 0.000, 0.333, 0.500, 0.000, 0.667, 0.500, 0.000, 1.000,
38 | 0.500, 0.333, 0.000, 0.500, 0.333, 0.333, 0.500, 0.333, 0.667, 0.500,
39 | 0.333, 1.000, 0.500, 0.667, 0.000, 0.500, 0.667, 0.333, 0.500, 0.667,
40 | 0.667, 0.500, 0.667, 1.000, 0.500, 1.000, 0.000, 0.500, 1.000, 0.333,
41 | 0.500, 1.000, 0.667, 0.500, 1.000, 1.000, 0.500, 0.000, 0.333, 1.000,
42 | 0.000, 0.667, 1.000, 0.000, 1.000, 1.000, 0.333, 0.000, 1.000, 0.333,
43 | 0.333, 1.000, 0.333, 0.667, 1.000, 0.333, 1.000, 1.000, 0.667, 0.000,
44 | 1.000, 0.667, 0.333, 1.000, 0.667, 0.667, 1.000, 0.667, 1.000, 1.000,
45 | 1.000, 0.000, 1.000, 1.000, 0.333, 1.000, 1.000, 0.667, 1.000, 0.167,
46 | 0.000, 0.000, 0.333, 0.000, 0.000, 0.500, 0.000, 0.000, 0.667, 0.000,
47 | 0.000, 0.833, 0.000, 0.000, 1.000, 0.000, 0.000, 0.000, 0.167, 0.000,
48 | 0.000, 0.333, 0.000, 0.000, 0.500, 0.000, 0.000, 0.667, 0.000, 0.000,
49 | 0.833, 0.000, 0.000, 1.000, 0.000, 0.000, 0.000, 0.167, 0.000, 0.000,
50 | 0.333, 0.000, 0.000, 0.500, 0.000, 0.000, 0.667, 0.000, 0.000, 0.833,
51 | 0.000, 0.000, 1.000, 0.000, 0.000, 0.000, 0.143, 0.143, 0.143, 0.286,
52 | 0.286, 0.286, 0.429, 0.429, 0.429, 0.571, 0.571, 0.571, 0.714, 0.714,
53 | 0.714, 0.857, 0.857, 0.857, 1.000, 1.000, 1.000
54 | ]).astype(np.float32)
55 | color_list = color_list.reshape((-1, 3)) * 255
56 | if not rgb:
57 | color_list = color_list[:, ::-1]
58 | return color_list.astype('int32')
59 |
--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/utils/voc_utils.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | from __future__ import absolute_import
16 | from __future__ import division
17 | from __future__ import print_function
18 |
19 | import os
20 | import os.path as osp
21 | import re
22 | import random
23 |
24 | __all__ = ['create_list']
25 |
26 |
27 | def create_list(devkit_dir, years, output_dir):
28 | """
29 | create following list:
30 | 1. trainval.txt
31 | 2. test.txt
32 | """
33 | trainval_list = []
34 | test_list = []
35 | for year in years:
36 | trainval, test = _walk_voc_dir(devkit_dir, year, output_dir)
37 | trainval_list.extend(trainval)
38 | test_list.extend(test)
39 |
40 | random.shuffle(trainval_list)
41 | with open(osp.join(output_dir, 'trainval.txt'), 'w') as ftrainval:
42 | for item in trainval_list:
43 | ftrainval.write(item[0] + ' ' + item[1] + '\n')
44 |
45 | with open(osp.join(output_dir, 'test.txt'), 'w') as fval:
46 | ct = 0
47 | for item in test_list:
48 | ct += 1
49 | fval.write(item[0] + ' ' + item[1] + '\n')
50 |
51 |
52 | def _get_voc_dir(devkit_dir, year, type):
53 | return osp.join(devkit_dir, 'VOC' + year, type)
54 |
55 |
56 | def _walk_voc_dir(devkit_dir, year, output_dir):
57 | filelist_dir = _get_voc_dir(devkit_dir, year, 'ImageSets/Main')
58 | annotation_dir = _get_voc_dir(devkit_dir, year, 'Annotations')
59 | img_dir = _get_voc_dir(devkit_dir, year, 'JPEGImages')
60 | trainval_list = []
61 | test_list = []
62 | added = set()
63 |
64 | for _, _, files in os.walk(filelist_dir):
65 | for fname in files:
66 | img_ann_list = []
67 | if re.match(r'[a-z]+_trainval\.txt', fname):
68 | img_ann_list = trainval_list
69 | elif re.match(r'[a-z]+_test\.txt', fname):
70 | img_ann_list = test_list
71 | else:
72 | continue
73 | fpath = osp.join(filelist_dir, fname)
74 | for line in open(fpath):
75 | name_prefix = line.strip().split()[0]
76 | if name_prefix in added:
77 | continue
78 | added.add(name_prefix)
79 | ann_path = osp.join(
80 | osp.relpath(annotation_dir, output_dir),
81 | name_prefix + '.xml')
82 | img_path = osp.join(
83 | osp.relpath(img_dir, output_dir), name_prefix + '.jpg')
84 | img_ann_list.append((img_path, ann_path))
85 |
86 | return trainval_list, test_list
87 |
--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/modeling/transformers/ext_op/ms_deformable_attn_op.cc:
--------------------------------------------------------------------------------
1 | /* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
2 | Licensed under the Apache License, Version 2.0 (the "License");
3 | you may not use this file except in compliance with the License.
4 | You may obtain a copy of the License at
5 | http://www.apache.org/licenses/LICENSE-2.0
6 | Unless required by applicable law or agreed to in writing, software
7 | distributed under the License is distributed on an "AS IS" BASIS,
8 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9 | See the License for the specific language governing permissions and
10 | limitations under the License. */
11 |
12 | #include "paddle/extension.h"
13 |
14 | #include
15 |
16 | // declare GPU implementation
17 | std::vector
18 | MSDeformableAttnCUDAForward(const paddle::Tensor &value,
19 | const paddle::Tensor &value_spatial_shapes,
20 | const paddle::Tensor &value_level_start_index,
21 | const paddle::Tensor &sampling_locations,
22 | const paddle::Tensor &attention_weights);
23 |
24 | std::vector MSDeformableAttnCUDABackward(
25 | const paddle::Tensor &value, const paddle::Tensor &value_spatial_shapes,
26 | const paddle::Tensor &value_level_start_index,
27 | const paddle::Tensor &sampling_locations,
28 | const paddle::Tensor &attention_weights, const paddle::Tensor &grad_out);
29 |
30 | //// CPU not implemented
31 |
32 | std::vector>
33 | MSDeformableAttnInferShape(std::vector value_shape,
34 | std::vector value_spatial_shapes_shape,
35 | std::vector value_level_start_index_shape,
36 | std::vector sampling_locations_shape,
37 | std::vector attention_weights_shape) {
38 | return {{value_shape[0], sampling_locations_shape[1],
39 | value_shape[2] * value_shape[3]}};
40 | }
41 |
42 | std::vector
43 | MSDeformableAttnInferDtype(paddle::DataType value_dtype,
44 | paddle::DataType value_spatial_shapes_dtype,
45 | paddle::DataType value_level_start_index_dtype,
46 | paddle::DataType sampling_locations_dtype,
47 | paddle::DataType attention_weights_dtype) {
48 | return {value_dtype};
49 | }
50 |
51 | PD_BUILD_OP(ms_deformable_attn)
52 | .Inputs({"Value", "SpatialShapes", "LevelIndex", "SamplingLocations",
53 | "AttentionWeights"})
54 | .Outputs({"Out"})
55 | .SetKernelFn(PD_KERNEL(MSDeformableAttnCUDAForward))
56 | .SetInferShapeFn(PD_INFER_SHAPE(MSDeformableAttnInferShape))
57 | .SetInferDtypeFn(PD_INFER_DTYPE(MSDeformableAttnInferDtype));
58 |
59 | PD_BUILD_GRAD_OP(ms_deformable_attn)
60 | .Inputs({"Value", "SpatialShapes", "LevelIndex", "SamplingLocations",
61 | "AttentionWeights", paddle::Grad("Out")})
62 | .Outputs({paddle::Grad("Value"), paddle::Grad("SpatialShapes"),
63 | paddle::Grad("LevelIndex"), paddle::Grad("SamplingLocations"),
64 | paddle::Grad("AttentionWeights")})
65 | .SetKernelFn(PD_KERNEL(MSDeformableAttnCUDABackward));
66 |
--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/modeling/transformers/ext_op/README.md:
--------------------------------------------------------------------------------
1 | # Multi-scale deformable attention自定义OP编译
2 | 该自定义OP是参考[自定义外部算子](https://www.paddlepaddle.org.cn/documentation/docs/zh/guides/custom_op/new_cpp_op_cn.html) 。
3 |
4 | ## 1. 环境依赖
5 | - Paddle >= 2.3.2
6 | - gcc 8.2
7 |
8 | ## 2. 安装
9 | 请在当前路径下进行编译安装
10 | ```
11 | cd rtdetr_paddle/ppdet/modeling/transformers/ext_op/
12 | python setup_ms_deformable_attn_op.py install
13 | ```
14 |
15 | 编译完成后即可使用,以下为`ms_deformable_attn`的使用示例
16 | ```
17 | # 引入自定义op
18 | from deformable_detr_ops import ms_deformable_attn
19 |
20 | # 构造fake input tensor
21 | bs, n_heads, c = 2, 8, 8
22 | query_length, n_levels, n_points = 2, 2, 2
23 | spatial_shapes = paddle.to_tensor([(6, 4), (3, 2)], dtype=paddle.int64)
24 | level_start_index = paddle.concat((paddle.to_tensor(
25 | [0], dtype=paddle.int64), spatial_shapes.prod(1).cumsum(0)[:-1]))
26 | value_length = sum([(H * W).item() for H, W in spatial_shapes])
27 |
28 | def get_test_tensors(channels):
29 | value = paddle.rand(
30 | [bs, value_length, n_heads, channels], dtype=paddle.float32) * 0.01
31 | sampling_locations = paddle.rand(
32 | [bs, query_length, n_heads, n_levels, n_points, 2],
33 | dtype=paddle.float32)
34 | attention_weights = paddle.rand(
35 | [bs, query_length, n_heads, n_levels, n_points],
36 | dtype=paddle.float32) + 1e-5
37 | attention_weights /= attention_weights.sum(-1, keepdim=True).sum(
38 | -2, keepdim=True)
39 | return [value, sampling_locations, attention_weights]
40 |
41 | value, sampling_locations, attention_weights = get_test_tensors(c)
42 |
43 | output = ms_deformable_attn(value,
44 | spatial_shapes,
45 | level_start_index,
46 | sampling_locations,
47 | attention_weights)
48 | ```
49 |
50 | ## 3. 单元测试
51 | 可以通过执行单元测试来确认自定义算子功能的正确性,执行单元测试的示例如下所示:
52 | ```
53 | python test_ms_deformable_attn_op.py
54 | ```
55 | 运行成功后,打印如下:
56 | ```
57 | *True check_forward_equal_with_paddle_float: max_abs_err 6.98e-10 max_rel_err 2.03e-07
58 | *tensor1 True check_gradient_numerical(D=30)
59 | *tensor2 True check_gradient_numerical(D=30)
60 | *tensor3 True check_gradient_numerical(D=30)
61 | *tensor1 True check_gradient_numerical(D=32)
62 | *tensor2 True check_gradient_numerical(D=32)
63 | *tensor3 True check_gradient_numerical(D=32)
64 | *tensor1 True check_gradient_numerical(D=64)
65 | *tensor2 True check_gradient_numerical(D=64)
66 | *tensor3 True check_gradient_numerical(D=64)
67 | *tensor1 True check_gradient_numerical(D=71)
68 | *tensor2 True check_gradient_numerical(D=71)
69 | *tensor3 True check_gradient_numerical(D=71)
70 | *tensor1 True check_gradient_numerical(D=128)
71 | *tensor2 True check_gradient_numerical(D=128)
72 | *tensor3 True check_gradient_numerical(D=128)
73 | *tensor1 True check_gradient_numerical(D=1024)
74 | *tensor2 True check_gradient_numerical(D=1024)
75 | *tensor3 True check_gradient_numerical(D=1024)
76 | *tensor1 True check_gradient_numerical(D=1025)
77 | *tensor2 True check_gradient_numerical(D=1025)
78 | *tensor3 True check_gradient_numerical(D=1025)
79 | *tensor1 True check_gradient_numerical(D=2048)
80 | *tensor2 True check_gradient_numerical(D=2048)
81 | *tensor3 True check_gradient_numerical(D=2048)
82 | *tensor1 True check_gradient_numerical(D=3096)
83 | *tensor2 True check_gradient_numerical(D=3096)
84 | *tensor3 True check_gradient_numerical(D=3096)
85 | ```
86 |
--------------------------------------------------------------------------------
/benchmark/dataset.py:
--------------------------------------------------------------------------------
1 | '''by lyuwenyu
2 | '''
3 |
4 | import os
5 | import glob
6 | from PIL import Image
7 |
8 | import torch
9 | import torch.utils.data as data
10 | import torchvision
11 | import torchvision.transforms as T
12 | import torchvision.transforms.functional as F
13 |
14 |
15 | class ToTensor(T.ToTensor):
16 | def __init__(self) -> None:
17 | super().__init__()
18 |
19 | def __call__(self, pic):
20 | if isinstance(pic, torch.Tensor):
21 | return pic
22 | return super().__call__(pic)
23 |
24 | class PadToSize(T.Pad):
25 | def __init__(self, size, fill=0, padding_mode='constant'):
26 | super().__init__(0, fill, padding_mode)
27 | self.size = size
28 | self.fill = fill
29 |
30 | def __call__(self, img):
31 | """
32 | Args:
33 | img (PIL Image or Tensor): Image to be padded.
34 |
35 | Returns:
36 | PIL Image or Tensor: Padded image.
37 | """
38 | w, h = F.get_image_size(img)
39 | padding = (0, 0, self.size[0] - w, self.size[1] - h)
40 | return F.pad(img, padding, self.fill, self.padding_mode)
41 |
42 |
43 | class Dataset(data.Dataset):
44 | def __init__(self, img_dir: str='', preprocess: T.Compose=None, device='cuda:0') -> None:
45 | super().__init__()
46 |
47 | self.device = device
48 | self.size = 640
49 |
50 | self.im_path_list = list(glob.glob(os.path.join(img_dir, '*.jpg')))
51 |
52 | if preprocess is None:
53 | self.preprocess = T.Compose([
54 | T.Resize(size=639, max_size=640),
55 | PadToSize(size=(640, 640), fill=114),
56 | ToTensor(),
57 | T.ConvertImageDtype(torch.float),
58 | ])
59 | else:
60 | self.preprocess = preprocess
61 |
62 | def __len__(self, ):
63 | return len(self.im_path_list)
64 |
65 | def __getitem__(self, index):
66 | # im = Image.open(self.img_path_list[index]).convert('RGB')
67 | im = torchvision.io.read_file(self.im_path_list[index])
68 | im = torchvision.io.decode_jpeg(im, mode=torchvision.io.ImageReadMode.RGB, device=self.device)
69 | _, h, w = im.shape # c,h,w
70 |
71 | im = self.preprocess(im)
72 |
73 | blob = {
74 | 'image': im,
75 | 'im_shape': torch.tensor([self.size, self.size]).to(im.device),
76 | 'scale_factor': torch.tensor([self.size / h, self.size / w]).to(im.device),
77 | 'orig_size': torch.tensor([w, h]).to(im.device),
78 | }
79 |
80 | return blob
81 |
82 | @staticmethod
83 | def post_process():
84 | pass
85 |
86 | @staticmethod
87 | def collate_fn():
88 | pass
89 |
90 |
91 | def draw_nms_result(blob, outputs, draw_score_threshold=0.25, name=''):
92 | '''show result
93 | Keys:
94 | 'num_dets', 'det_boxes', 'det_scores', 'det_classes'
95 | '''
96 | for i in range(blob['image'].shape[0]):
97 | det_scores = outputs['det_scores'][i]
98 | det_boxes = outputs['det_boxes'][i][det_scores > draw_score_threshold]
99 |
100 | im = (blob['image'][i] * 255).to(torch.uint8)
101 | im = torchvision.utils.draw_bounding_boxes(im, boxes=det_boxes, width=2)
102 | Image.fromarray(im.permute(1, 2, 0).cpu().numpy()).save(f'test_{name}_{i}.jpg')
103 |
--------------------------------------------------------------------------------
/rtdetr_pytorch/src/zoo/rtdetr/utils.py:
--------------------------------------------------------------------------------
1 | """by lyuwenyu
2 | """
3 |
4 | import math
5 | import torch
6 | import torch.nn as nn
7 | import torch.nn.functional as F
8 |
9 |
10 | def inverse_sigmoid(x: torch.Tensor, eps: float=1e-5) -> torch.Tensor:
11 | x = x.clip(min=0., max=1.)
12 | return torch.log(x.clip(min=eps) / (1 - x).clip(min=eps))
13 |
14 |
15 | def deformable_attention_core_func(value, value_spatial_shapes, sampling_locations, attention_weights):
16 | """
17 | Args:
18 | value (Tensor): [bs, value_length, n_head, c]
19 | value_spatial_shapes (Tensor|List): [n_levels, 2]
20 | value_level_start_index (Tensor|List): [n_levels]
21 | sampling_locations (Tensor): [bs, query_length, n_head, n_levels, n_points, 2]
22 | attention_weights (Tensor): [bs, query_length, n_head, n_levels, n_points]
23 |
24 | Returns:
25 | output (Tensor): [bs, Length_{query}, C]
26 | """
27 | bs, _, n_head, c = value.shape
28 | _, Len_q, _, n_levels, n_points, _ = sampling_locations.shape
29 |
30 | split_shape = [h * w for h, w in value_spatial_shapes]
31 | value_list = value.split(split_shape, dim=1)
32 | sampling_grids = 2 * sampling_locations - 1
33 | sampling_value_list = []
34 | for level, (h, w) in enumerate(value_spatial_shapes):
35 | # N_, H_*W_, M_, D_ -> N_, H_*W_, M_*D_ -> N_, M_*D_, H_*W_ -> N_*M_, D_, H_, W_
36 | value_l_ = value_list[level].flatten(2).permute(
37 | 0, 2, 1).reshape(bs * n_head, c, h, w)
38 | # N_, Lq_, M_, P_, 2 -> N_, M_, Lq_, P_, 2 -> N_*M_, Lq_, P_, 2
39 | sampling_grid_l_ = sampling_grids[:, :, :, level].permute(
40 | 0, 2, 1, 3, 4).flatten(0, 1)
41 | # N_*M_, D_, Lq_, P_
42 | sampling_value_l_ = F.grid_sample(
43 | value_l_,
44 | sampling_grid_l_,
45 | mode='bilinear',
46 | padding_mode='zeros',
47 | align_corners=False)
48 | sampling_value_list.append(sampling_value_l_)
49 | # (N_, Lq_, M_, L_, P_) -> (N_, M_, Lq_, L_, P_) -> (N_*M_, 1, Lq_, L_*P_)
50 | attention_weights = attention_weights.permute(0, 2, 1, 3, 4).reshape(
51 | bs * n_head, 1, Len_q, n_levels * n_points)
52 | output = (torch.stack(
53 | sampling_value_list, dim=-2).flatten(-2) *
54 | attention_weights).sum(-1).reshape(bs, n_head * c, Len_q)
55 |
56 | return output.permute(0, 2, 1)
57 |
58 |
59 | import math
60 | def bias_init_with_prob(prior_prob=0.01):
61 | """initialize conv/fc bias value according to a given probability value."""
62 | bias_init = float(-math.log((1 - prior_prob) / prior_prob))
63 | return bias_init
64 |
65 |
66 |
67 | def get_activation(act: str, inpace: bool=True):
68 | '''get activation
69 | '''
70 | act = act.lower()
71 |
72 | if act == 'silu':
73 | m = nn.SiLU()
74 |
75 | elif act == 'relu':
76 | m = nn.ReLU()
77 |
78 | elif act == 'leaky_relu':
79 | m = nn.LeakyReLU()
80 |
81 | elif act == 'silu':
82 | m = nn.SiLU()
83 |
84 | elif act == 'gelu':
85 | m = nn.GELU()
86 |
87 | elif act is None:
88 | m = nn.Identity()
89 |
90 | elif isinstance(act, nn.Module):
91 | m = act
92 |
93 | else:
94 | raise RuntimeError('')
95 |
96 | if hasattr(m, 'inplace'):
97 | m.inplace = inpace
98 |
99 | return m
100 |
101 |
102 |
--------------------------------------------------------------------------------
/rtdetr_pytorch/src/nn/backbone/common.py:
--------------------------------------------------------------------------------
1 | '''by lyuwenyu
2 | '''
3 |
4 | import torch
5 | import torch.nn as nn
6 |
7 |
8 |
9 | class ConvNormLayer(nn.Module):
10 | def __init__(self, ch_in, ch_out, kernel_size, stride, padding=None, bias=False, act=None):
11 | super().__init__()
12 | self.conv = nn.Conv2d(
13 | ch_in,
14 | ch_out,
15 | kernel_size,
16 | stride,
17 | padding=(kernel_size-1)//2 if padding is None else padding,
18 | bias=bias)
19 | self.norm = nn.BatchNorm2d(ch_out)
20 | self.act = nn.Identity() if act is None else get_activation(act)
21 |
22 | def forward(self, x):
23 | return self.act(self.norm(self.conv(x)))
24 |
25 |
26 | class FrozenBatchNorm2d(nn.Module):
27 | """copy and modified from https://github.com/facebookresearch/detr/blob/master/models/backbone.py
28 | BatchNorm2d where the batch statistics and the affine parameters are fixed.
29 | Copy-paste from torchvision.misc.ops with added eps before rqsrt,
30 | without which any other models than torchvision.models.resnet[18,34,50,101]
31 | produce nans.
32 | """
33 | def __init__(self, num_features, eps=1e-5):
34 | super(FrozenBatchNorm2d, self).__init__()
35 | n = num_features
36 | self.register_buffer("weight", torch.ones(n))
37 | self.register_buffer("bias", torch.zeros(n))
38 | self.register_buffer("running_mean", torch.zeros(n))
39 | self.register_buffer("running_var", torch.ones(n))
40 | self.eps = eps
41 | self.num_features = n
42 |
43 | def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict,
44 | missing_keys, unexpected_keys, error_msgs):
45 | num_batches_tracked_key = prefix + 'num_batches_tracked'
46 | if num_batches_tracked_key in state_dict:
47 | del state_dict[num_batches_tracked_key]
48 |
49 | super(FrozenBatchNorm2d, self)._load_from_state_dict(
50 | state_dict, prefix, local_metadata, strict,
51 | missing_keys, unexpected_keys, error_msgs)
52 |
53 | def forward(self, x):
54 | # move reshapes to the beginning
55 | # to make it fuser-friendly
56 | w = self.weight.reshape(1, -1, 1, 1)
57 | b = self.bias.reshape(1, -1, 1, 1)
58 | rv = self.running_var.reshape(1, -1, 1, 1)
59 | rm = self.running_mean.reshape(1, -1, 1, 1)
60 | scale = w * (rv + self.eps).rsqrt()
61 | bias = b - rm * scale
62 | return x * scale + bias
63 |
64 | def extra_repr(self):
65 | return (
66 | "{num_features}, eps={eps}".format(**self.__dict__)
67 | )
68 |
69 |
70 | def get_activation(act: str, inpace: bool=True):
71 | '''get activation
72 | '''
73 | act = act.lower()
74 |
75 | if act == 'silu':
76 | m = nn.SiLU()
77 |
78 | elif act == 'relu':
79 | m = nn.ReLU()
80 |
81 | elif act == 'leaky_relu':
82 | m = nn.LeakyReLU()
83 |
84 | elif act == 'silu':
85 | m = nn.SiLU()
86 |
87 | elif act == 'gelu':
88 | m = nn.GELU()
89 |
90 | elif act is None:
91 | m = nn.Identity()
92 |
93 | elif isinstance(act, nn.Module):
94 | m = act
95 |
96 | else:
97 | raise RuntimeError('')
98 |
99 | if hasattr(m, 'inplace'):
100 | m.inplace = inpace
101 |
102 | return m
103 |
--------------------------------------------------------------------------------
/rtdetr_paddle/tools/export_model.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | from __future__ import absolute_import
16 | from __future__ import division
17 | from __future__ import print_function
18 |
19 | import os
20 | import sys
21 |
22 | # add python path of PaddleDetection to sys.path
23 | parent_path = os.path.abspath(os.path.join(__file__, *(['..'] * 2)))
24 | sys.path.insert(0, parent_path)
25 |
26 | # ignore warning log
27 | import warnings
28 | warnings.filterwarnings('ignore')
29 |
30 | import paddle
31 | from ppdet.core.workspace import load_config, merge_config
32 | from ppdet.utils.check import check_gpu, check_version, check_config
33 | from ppdet.utils.cli import ArgsParser
34 | from ppdet.engine import Trainer
35 |
36 | from ppdet.utils.logger import setup_logger
37 | logger = setup_logger('export_model')
38 |
39 |
40 | def parse_args():
41 | parser = ArgsParser()
42 | parser.add_argument(
43 | "--output_dir",
44 | type=str,
45 | default="output_inference",
46 | help="Directory for storing the output model files.")
47 | parser.add_argument(
48 | "--export_serving_model",
49 | type=bool,
50 | default=False,
51 | help="Whether to export serving model or not.")
52 | parser.add_argument(
53 | "--slim_config",
54 | default=None,
55 | type=str,
56 | help="Configuration file of slim method.")
57 | args = parser.parse_args()
58 | return args
59 |
60 |
61 | def run(FLAGS, cfg):
62 | trainer = Trainer(cfg, mode='test')
63 | # load weights
64 | trainer.load_weights(cfg.weights)
65 |
66 | # export model
67 | trainer.export(FLAGS.output_dir)
68 |
69 | if FLAGS.export_serving_model:
70 | from paddle_serving_client.io import inference_model_to_serving
71 | model_name = os.path.splitext(os.path.split(cfg.filename)[-1])[0]
72 |
73 | inference_model_to_serving(
74 | dirname="{}/{}".format(FLAGS.output_dir, model_name),
75 | serving_server="{}/{}/serving_server".format(FLAGS.output_dir,
76 | model_name),
77 | serving_client="{}/{}/serving_client".format(FLAGS.output_dir,
78 | model_name),
79 | model_filename="model.pdmodel",
80 | params_filename="model.pdiparams")
81 |
82 |
83 | def main():
84 | paddle.set_device("cpu")
85 | FLAGS = parse_args()
86 | cfg = load_config(FLAGS.config)
87 | merge_config(FLAGS.opt)
88 |
89 | # FIXME: Temporarily solve the priority problem of FLAGS.opt
90 | merge_config(FLAGS.opt)
91 | check_config(cfg)
92 | if 'use_gpu' not in cfg:
93 | cfg.use_gpu = False
94 | check_gpu(cfg.use_gpu)
95 | check_version()
96 |
97 | run(FLAGS, cfg)
98 |
99 |
100 | if __name__ == '__main__':
101 | main()
102 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | share/python-wheels/
24 | *.egg-info/
25 | .installed.cfg
26 | *.egg
27 | MANIFEST
28 |
29 | # PyInstaller
30 | # Usually these files are written by a python script from a template
31 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
32 | *.manifest
33 | *.spec
34 |
35 | # Installer logs
36 | pip-log.txt
37 | pip-delete-this-directory.txt
38 |
39 | # Unit test / coverage reports
40 | htmlcov/
41 | .tox/
42 | .nox/
43 | .coverage
44 | .coverage.*
45 | .cache
46 | nosetests.xml
47 | coverage.xml
48 | *.cover
49 | *.py,cover
50 | .hypothesis/
51 | .pytest_cache/
52 | cover/
53 |
54 | # Translations
55 | *.mo
56 | *.pot
57 |
58 | # Django stuff:
59 | *.log
60 | local_settings.py
61 | db.sqlite3
62 | db.sqlite3-journal
63 |
64 | # Flask stuff:
65 | instance/
66 | .webassets-cache
67 |
68 | # Scrapy stuff:
69 | .scrapy
70 |
71 | # Sphinx documentation
72 | docs/_build/
73 |
74 | # PyBuilder
75 | .pybuilder/
76 | target/
77 |
78 | # Jupyter Notebook
79 | .ipynb_checkpoints
80 |
81 | # IPython
82 | profile_default/
83 | ipython_config.py
84 |
85 | # pyenv
86 | # For a library or package, you might want to ignore these files since the code is
87 | # intended to run in multiple environments; otherwise, check them in:
88 | # .python-version
89 |
90 | # pipenv
91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
94 | # install all needed dependencies.
95 | #Pipfile.lock
96 |
97 | # poetry
98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
99 | # This is especially recommended for binary packages to ensure reproducibility, and is more
100 | # commonly ignored for libraries.
101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102 | #poetry.lock
103 |
104 | # pdm
105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106 | #pdm.lock
107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108 | # in version control.
109 | # https://pdm.fming.dev/#use-with-ide
110 | .pdm.toml
111 |
112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113 | __pypackages__/
114 |
115 | # Celery stuff
116 | celerybeat-schedule
117 | celerybeat.pid
118 |
119 | # SageMath parsed files
120 | *.sage.py
121 |
122 | # Environments
123 | .env
124 | .venv
125 | env/
126 | venv/
127 | ENV/
128 | env.bak/
129 | venv.bak/
130 |
131 | # Spyder project settings
132 | .spyderproject
133 | .spyproject
134 |
135 | # Rope project settings
136 | .ropeproject
137 |
138 | # mkdocs documentation
139 | /site
140 |
141 | # mypy
142 | .mypy_cache/
143 | .dmypy.json
144 | dmypy.json
145 |
146 | # Pyre type checker
147 | .pyre/
148 |
149 | # pytype static type analyzer
150 | .pytype/
151 |
152 | # Cython debug symbols
153 | cython_debug/
154 |
155 | # PyCharm
156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158 | # and can be added to the global gitignore or merged into this file. For a more nuclear
159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder.
160 | .idea/
161 |
162 |
163 | .DS_Store
164 | *.ipynb
165 | *.pth
166 | *.pdparams
167 | *.onnx
168 | rtdetr_pytorch/output/*
169 | rtdetr_pytorch/dataset/*
--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/modeling/transformers/position_encoding.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | #
15 | # Modified from DETR (https://github.com/facebookresearch/detr)
16 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
17 |
18 | from __future__ import absolute_import
19 | from __future__ import division
20 | from __future__ import print_function
21 |
22 | import math
23 | import paddle
24 | import paddle.nn as nn
25 |
26 | from ppdet.core.workspace import register, serializable
27 |
28 |
29 | @register
30 | @serializable
31 | class PositionEmbedding(nn.Layer):
32 | def __init__(self,
33 | num_pos_feats=128,
34 | temperature=10000,
35 | normalize=True,
36 | scale=2 * math.pi,
37 | embed_type='sine',
38 | num_embeddings=50,
39 | offset=0.,
40 | eps=1e-6):
41 | super(PositionEmbedding, self).__init__()
42 | assert embed_type in ['sine', 'learned']
43 |
44 | self.embed_type = embed_type
45 | self.offset = offset
46 | self.eps = eps
47 | if self.embed_type == 'sine':
48 | self.num_pos_feats = num_pos_feats
49 | self.temperature = temperature
50 | self.normalize = normalize
51 | self.scale = scale
52 | elif self.embed_type == 'learned':
53 | self.row_embed = nn.Embedding(num_embeddings, num_pos_feats)
54 | self.col_embed = nn.Embedding(num_embeddings, num_pos_feats)
55 | else:
56 | raise ValueError(f"{self.embed_type} is not supported.")
57 |
58 | def forward(self, mask):
59 | """
60 | Args:
61 | mask (Tensor): [B, H, W]
62 | Returns:
63 | pos (Tensor): [B, H, W, C]
64 | """
65 | if self.embed_type == 'sine':
66 | y_embed = mask.cumsum(1)
67 | x_embed = mask.cumsum(2)
68 | if self.normalize:
69 | y_embed = (y_embed + self.offset) / (
70 | y_embed[:, -1:, :] + self.eps) * self.scale
71 | x_embed = (x_embed + self.offset) / (
72 | x_embed[:, :, -1:] + self.eps) * self.scale
73 |
74 | dim_t = 2 * (paddle.arange(self.num_pos_feats) //
75 | 2).astype('float32')
76 | dim_t = self.temperature**(dim_t / self.num_pos_feats)
77 |
78 | pos_x = x_embed.unsqueeze(-1) / dim_t
79 | pos_y = y_embed.unsqueeze(-1) / dim_t
80 | pos_x = paddle.stack(
81 | (pos_x[:, :, :, 0::2].sin(), pos_x[:, :, :, 1::2].cos()),
82 | axis=4).flatten(3)
83 | pos_y = paddle.stack(
84 | (pos_y[:, :, :, 0::2].sin(), pos_y[:, :, :, 1::2].cos()),
85 | axis=4).flatten(3)
86 | return paddle.concat((pos_y, pos_x), axis=3)
87 | elif self.embed_type == 'learned':
88 | h, w = mask.shape[-2:]
89 | i = paddle.arange(w)
90 | j = paddle.arange(h)
91 | x_emb = self.col_embed(i)
92 | y_emb = self.row_embed(j)
93 | return paddle.concat(
94 | [
95 | x_emb.unsqueeze(0).tile([h, 1, 1]),
96 | y_emb.unsqueeze(1).tile([1, w, 1]),
97 | ],
98 | axis=-1).unsqueeze(0)
99 | else:
100 | raise ValueError(f"not supported {self.embed_type}")
101 |
--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/core/config/yaml_helpers.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | import importlib
16 | import inspect
17 |
18 | import yaml
19 | from .schema import SharedConfig
20 |
21 | __all__ = ['serializable', 'Callable']
22 |
23 |
24 | def represent_dictionary_order(self, dict_data):
25 | return self.represent_mapping('tag:yaml.org,2002:map', dict_data.items())
26 |
27 |
28 | def setup_orderdict():
29 | from collections import OrderedDict
30 | yaml.add_representer(OrderedDict, represent_dictionary_order)
31 |
32 |
33 | def _make_python_constructor(cls):
34 | def python_constructor(loader, node):
35 | if isinstance(node, yaml.SequenceNode):
36 | args = loader.construct_sequence(node, deep=True)
37 | return cls(*args)
38 | else:
39 | kwargs = loader.construct_mapping(node, deep=True)
40 | try:
41 | return cls(**kwargs)
42 | except Exception as ex:
43 | print("Error when construct {} instance from yaml config".
44 | format(cls.__name__))
45 | raise ex
46 |
47 | return python_constructor
48 |
49 |
50 | def _make_python_representer(cls):
51 | # python 2 compatibility
52 | if hasattr(inspect, 'getfullargspec'):
53 | argspec = inspect.getfullargspec(cls)
54 | else:
55 | argspec = inspect.getfullargspec(cls.__init__)
56 | argnames = [arg for arg in argspec.args if arg != 'self']
57 |
58 | def python_representer(dumper, obj):
59 | if argnames:
60 | data = {name: getattr(obj, name) for name in argnames}
61 | else:
62 | data = obj.__dict__
63 | if '_id' in data:
64 | del data['_id']
65 | return dumper.represent_mapping(u'!{}'.format(cls.__name__), data)
66 |
67 | return python_representer
68 |
69 |
70 | def serializable(cls):
71 | """
72 | Add loader and dumper for given class, which must be
73 | "trivially serializable"
74 |
75 | Args:
76 | cls: class to be serialized
77 |
78 | Returns: cls
79 | """
80 | yaml.add_constructor(u'!{}'.format(cls.__name__),
81 | _make_python_constructor(cls))
82 | yaml.add_representer(cls, _make_python_representer(cls))
83 | return cls
84 |
85 |
86 | yaml.add_representer(SharedConfig,
87 | lambda d, o: d.represent_data(o.default_value))
88 |
89 |
90 | @serializable
91 | class Callable(object):
92 | """
93 | Helper to be used in Yaml for creating arbitrary class objects
94 |
95 | Args:
96 | full_type (str): the full module path to target function
97 | """
98 |
99 | def __init__(self, full_type, args=[], kwargs={}):
100 | super(Callable, self).__init__()
101 | self.full_type = full_type
102 | self.args = args
103 | self.kwargs = kwargs
104 |
105 | def __call__(self):
106 | if '.' in self.full_type:
107 | idx = self.full_type.rfind('.')
108 | module = importlib.import_module(self.full_type[:idx])
109 | func_name = self.full_type[idx + 1:]
110 | else:
111 | try:
112 | module = importlib.import_module('builtins')
113 | except Exception:
114 | module = importlib.import_module('__builtin__')
115 | func_name = self.full_type
116 |
117 | func = getattr(module, func_name)
118 | return func(*self.args, **self.kwargs)
119 |
--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/modeling/architectures/detr.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | from __future__ import absolute_import
16 | from __future__ import division
17 | from __future__ import print_function
18 |
19 | import paddle
20 | from .meta_arch import BaseArch
21 | from ppdet.core.workspace import register, create
22 |
23 | __all__ = ['DETR']
24 |
25 |
26 | # Deformable DETR, DINO use the same architecture as DETR
27 |
28 |
29 | @register
30 | class DETR(BaseArch):
31 | __category__ = 'architecture'
32 | __inject__ = ['post_process']
33 | __shared__ = ['with_mask', 'exclude_post_process']
34 |
35 | def __init__(self,
36 | backbone,
37 | transformer='DETRTransformer',
38 | detr_head='DETRHead',
39 | neck=None,
40 | post_process='DETRPostProcess',
41 | with_mask=False,
42 | exclude_post_process=False):
43 | super(DETR, self).__init__()
44 | self.backbone = backbone
45 | self.transformer = transformer
46 | self.detr_head = detr_head
47 | self.neck = neck
48 | self.post_process = post_process
49 | self.with_mask = with_mask
50 | self.exclude_post_process = exclude_post_process
51 |
52 | @classmethod
53 | def from_config(cls, cfg, *args, **kwargs):
54 | # backbone
55 | backbone = create(cfg['backbone'])
56 | # neck
57 | kwargs = {'input_shape': backbone.out_shape}
58 | neck = create(cfg['neck'], **kwargs) if cfg['neck'] else None
59 |
60 | # transformer
61 | if neck is not None:
62 | kwargs = {'input_shape': neck.out_shape}
63 | transformer = create(cfg['transformer'], **kwargs)
64 | # head
65 | kwargs = {
66 | 'hidden_dim': transformer.hidden_dim,
67 | 'nhead': transformer.nhead,
68 | 'input_shape': backbone.out_shape
69 | }
70 | detr_head = create(cfg['detr_head'], **kwargs)
71 |
72 | return {
73 | 'backbone': backbone,
74 | 'transformer': transformer,
75 | "detr_head": detr_head,
76 | "neck": neck
77 | }
78 |
79 | def _forward(self):
80 | # Backbone
81 | body_feats = self.backbone(self.inputs)
82 |
83 | # Neck
84 | if self.neck is not None:
85 | body_feats = self.neck(body_feats)
86 |
87 | # Transformer
88 | pad_mask = self.inputs.get('pad_mask', None)
89 | out_transformer = self.transformer(body_feats, pad_mask, self.inputs)
90 |
91 | # DETR Head
92 | if self.training:
93 | detr_losses = self.detr_head(out_transformer, body_feats,
94 | self.inputs)
95 | detr_losses.update({
96 | 'loss': paddle.add_n(
97 | [v for k, v in detr_losses.items() if 'log' not in k])
98 | })
99 | return detr_losses
100 | else:
101 | preds = self.detr_head(out_transformer, body_feats)
102 | if self.exclude_post_process:
103 | bbox, bbox_num, mask = preds
104 | else:
105 | bbox, bbox_num, mask = self.post_process(
106 | preds, self.inputs['im_shape'], self.inputs['scale_factor'],
107 | paddle.shape(self.inputs['image'])[2:])
108 |
109 | output = {'bbox': bbox, 'bbox_num': bbox_num}
110 | if self.with_mask:
111 | output['mask'] = mask
112 | return output
113 |
114 | def get_loss(self):
115 | return self._forward()
116 |
117 | def get_pred(self):
118 | return self._forward()
119 |
--------------------------------------------------------------------------------
/rtdetr_pytorch/src/optim/ema.py:
--------------------------------------------------------------------------------
1 | """
2 | reference:
3 | https://github.com/ultralytics/yolov5/blob/master/utils/torch_utils.py#L404
4 |
5 | by lyuwenyu
6 | """
7 |
8 | import torch
9 | import torch.nn as nn
10 |
11 | import math
12 | from copy import deepcopy
13 |
14 |
15 |
16 | from src.core import register
17 | import src.misc.dist as dist
18 |
19 |
20 | __all__ = ['ModelEMA']
21 |
22 |
23 |
24 | @register
25 | class ModelEMA(object):
26 | """ Model Exponential Moving Average from https://github.com/rwightman/pytorch-image-models
27 | Keep a moving average of everything in the model state_dict (parameters and buffers).
28 | This is intended to allow functionality like
29 | https://www.tensorflow.org/api_docs/python/tf/train/ExponentialMovingAverage
30 | A smoothed version of the weights is necessary for some training schemes to perform well.
31 | This class is sensitive where it is initialized in the sequence of model init,
32 | GPU assignment and distributed training wrappers.
33 | """
34 | def __init__(self, model: nn.Module, decay: float=0.9999, warmups: int=2000):
35 | super().__init__()
36 |
37 | # Create EMA
38 | self.module = deepcopy(dist.de_parallel(model)).eval() # FP32 EMA
39 |
40 | # if next(model.parameters()).device.type != 'cpu':
41 | # self.module.half() # FP16 EMA
42 |
43 | self.decay = decay
44 | self.warmups = warmups
45 | self.updates = 0 # number of EMA updates
46 | # self.filter_no_grad = filter_no_grad
47 | self.decay_fn = lambda x: decay * (1 - math.exp(-x / warmups)) # decay exponential ramp (to help early epochs)
48 |
49 | for p in self.module.parameters():
50 | p.requires_grad_(False)
51 |
52 | def update(self, model: nn.Module):
53 | # Update EMA parameters
54 | with torch.no_grad():
55 | self.updates += 1
56 | d = self.decay_fn(self.updates)
57 |
58 | msd = dist.de_parallel(model).state_dict()
59 | for k, v in self.module.state_dict().items():
60 | if v.dtype.is_floating_point:
61 | v *= d
62 | v += (1 - d) * msd[k].detach()
63 |
64 | def to(self, *args, **kwargs):
65 | self.module = self.module.to(*args, **kwargs)
66 | return self
67 |
68 | def update_attr(self, model, include=(), exclude=('process_group', 'reducer')):
69 | # Update EMA attributes
70 | self.copy_attr(self.module, model, include, exclude)
71 |
72 | @staticmethod
73 | def copy_attr(a, b, include=(), exclude=()):
74 | # Copy attributes from b to a, options to only include [...] and to exclude [...]
75 | for k, v in b.__dict__.items():
76 | if (len(include) and k not in include) or k.startswith('_') or k in exclude:
77 | continue
78 | else:
79 | setattr(a, k, v)
80 |
81 | def state_dict(self, ):
82 | return dict(module=self.module.state_dict(), updates=self.updates, warmups=self.warmups)
83 |
84 | def load_state_dict(self, state):
85 | self.module.load_state_dict(state['module'])
86 | if 'updates' in state:
87 | self.updates = state['updates']
88 |
89 | def forwad(self, ):
90 | raise RuntimeError('ema...')
91 |
92 | def extra_repr(self) -> str:
93 | return f'decay={self.decay}, warmups={self.warmups}'
94 |
95 |
96 |
97 |
98 | class ExponentialMovingAverage(torch.optim.swa_utils.AveragedModel):
99 | """Maintains moving averages of model parameters using an exponential decay.
100 | ``ema_avg = decay * avg_model_param + (1 - decay) * model_param``
101 | `torch.optim.swa_utils.AveragedModel `_
102 | is used to compute the EMA.
103 | """
104 | def __init__(self, model, decay, device="cpu", use_buffers=True):
105 |
106 | self.decay_fn = lambda x: decay * (1 - math.exp(-x / 2000))
107 |
108 | def ema_avg(avg_model_param, model_param, num_averaged):
109 | decay = self.decay_fn(num_averaged)
110 | return decay * avg_model_param + (1 - decay) * model_param
111 |
112 | super().__init__(model, device, ema_avg, use_buffers=use_buffers)
113 |
114 |
115 |
116 |
--------------------------------------------------------------------------------
/rtdetr_pytorch/src/solver/det_solver.py:
--------------------------------------------------------------------------------
1 | '''
2 | by lyuwenyu
3 | '''
4 | import time
5 | import json
6 | import datetime
7 |
8 | import torch
9 |
10 | from src.misc import dist
11 | from src.data import get_coco_api_from_dataset
12 |
13 | from .solver import BaseSolver
14 | from .det_engine import train_one_epoch, evaluate
15 |
16 |
17 | class DetSolver(BaseSolver):
18 |
19 | def fit(self, ):
20 | print("Start training")
21 | self.train()
22 |
23 | args = self.cfg
24 |
25 | n_parameters = sum(p.numel() for p in self.model.parameters() if p.requires_grad)
26 | print('number of params:', n_parameters)
27 |
28 | base_ds = get_coco_api_from_dataset(self.val_dataloader.dataset)
29 | # best_stat = {'coco_eval_bbox': 0, 'coco_eval_masks': 0, 'epoch': -1, }
30 | best_stat = {'epoch': -1, }
31 |
32 | start_time = time.time()
33 | for epoch in range(self.last_epoch + 1, args.epoches):
34 | if dist.is_dist_available_and_initialized():
35 | self.train_dataloader.sampler.set_epoch(epoch)
36 |
37 | train_stats = train_one_epoch(
38 | self.model, self.criterion, self.train_dataloader, self.optimizer, self.device, epoch,
39 | args.clip_max_norm, print_freq=args.log_step, ema=self.ema, scaler=self.scaler)
40 |
41 | self.lr_scheduler.step()
42 |
43 | if self.output_dir:
44 | checkpoint_paths = [self.output_dir / 'checkpoint.pth']
45 | # extra checkpoint before LR drop and every 100 epochs
46 | if (epoch + 1) % args.checkpoint_step == 0:
47 | checkpoint_paths.append(self.output_dir / f'checkpoint{epoch:04}.pth')
48 | for checkpoint_path in checkpoint_paths:
49 | dist.save_on_master(self.state_dict(epoch), checkpoint_path)
50 |
51 | module = self.ema.module if self.ema else self.model
52 | test_stats, coco_evaluator = evaluate(
53 | module, self.criterion, self.postprocessor, self.val_dataloader, base_ds, self.device, self.output_dir
54 | )
55 |
56 | # TODO
57 | for k in test_stats.keys():
58 | if k in best_stat:
59 | best_stat['epoch'] = epoch if test_stats[k][0] > best_stat[k] else best_stat['epoch']
60 | best_stat[k] = max(best_stat[k], test_stats[k][0])
61 | else:
62 | best_stat['epoch'] = epoch
63 | best_stat[k] = test_stats[k][0]
64 | print('best_stat: ', best_stat)
65 |
66 | log_stats = {**{f'train_{k}': v for k, v in train_stats.items()},
67 | **{f'test_{k}': v for k, v in test_stats.items()},
68 | 'epoch': epoch,
69 | 'n_parameters': n_parameters}
70 |
71 | if self.output_dir and dist.is_main_process():
72 | with (self.output_dir / "log.txt").open("a") as f:
73 | f.write(json.dumps(log_stats) + "\n")
74 |
75 | # for evaluation logs
76 | if coco_evaluator is not None:
77 | (self.output_dir / 'eval').mkdir(exist_ok=True)
78 | if "bbox" in coco_evaluator.coco_eval:
79 | filenames = ['latest.pth']
80 | if epoch % 50 == 0:
81 | filenames.append(f'{epoch:03}.pth')
82 | for name in filenames:
83 | torch.save(coco_evaluator.coco_eval["bbox"].eval,
84 | self.output_dir / "eval" / name)
85 |
86 | total_time = time.time() - start_time
87 | total_time_str = str(datetime.timedelta(seconds=int(total_time)))
88 | print('Training time {}'.format(total_time_str))
89 |
90 | def val(self, ):
91 | self.eval()
92 |
93 | base_ds = get_coco_api_from_dataset(self.val_dataloader.dataset)
94 |
95 | module = self.ema.module if self.ema else self.model
96 | test_stats, coco_evaluator = evaluate(module, self.criterion, self.postprocessor,
97 | self.val_dataloader, base_ds, self.device, self.output_dir)
98 |
99 | if self.output_dir:
100 | dist.save_on_master(coco_evaluator.coco_eval["bbox"].eval, self.output_dir / "eval.pth")
101 |
102 | return
103 |
--------------------------------------------------------------------------------
/rtdetr_pytorch/README.md:
--------------------------------------------------------------------------------
1 | ## TODO
2 |
3 | see details
4 |
5 | - [x] Training
6 | - [x] Evaluation
7 | - [x] Export onnx
8 | - [x] Upload source code
9 | - [x] Upload weight convert from paddle, see [*links*](https://github.com/lyuwenyu/RT-DETR/issues/42)
10 | - [x] Align training details with the [*paddle version*](../rtdetr_paddle/)
11 | - [x] Tuning rtdetr based on [*pretrained weights*](https://github.com/lyuwenyu/RT-DETR/issues/42)
12 |
13 |
14 |
15 |
16 | ## Model Zoo
17 |
18 | | Model | Dataset | Input Size | APval | AP50val | #Params(M) | FPS | checkpoint |
19 | | :---: | :---: | :---: | :---: | :---: | :---: | :---: | :---: |
20 | rtdetr_r18vd | COCO | 640 | 46.4 | 63.7 | 20 | 217 | [url*](https://github.com/lyuwenyu/storage/releases/download/v0.1/rtdetr_r18vd_dec3_6x_coco_from_paddle.pth)
21 | rtdetr_r34vd | COCO | 640 | 48.9 | 66.8 | 31 | 161 | [url*](https://github.com/lyuwenyu/storage/releases/download/v0.1/rtdetr_r34vd_dec4_6x_coco_from_paddle.pth)
22 | rtdetr_r50vd_m | COCO | 640 | 51.3 | 69.5 | 36 | 145 | [url*](https://github.com/lyuwenyu/storage/releases/download/v0.1/rtdetr_r50vd_m_6x_coco_from_paddle.pth)
23 | rtdetr_r50vd | COCO | 640 | 53.1 | 71.2| 42 | 108 | [url*](https://github.com/lyuwenyu/storage/releases/download/v0.1/rtdetr_r50vd_6x_coco_from_paddle.pth)
24 | rtdetr_r101vd | COCO | 640 | 54.3 | 72.8 | 76 | 74 | [url*](https://github.com/lyuwenyu/storage/releases/download/v0.1/rtdetr_r101vd_6x_coco_from_paddle.pth)
25 | rtdetr_18vd | COCO+Objects365 | 640 | 49.0 | 66.5 | 20 | 217 | [url*](https://github.com/lyuwenyu/storage/releases/download/v0.1/rtdetr_r18vd_5x_coco_objects365_from_paddle.pth)
26 | rtdetr_r50vd | COCO+Objects365 | 640 | 55.2 | 73.4 | 42 | 108 | [url*](https://github.com/lyuwenyu/storage/releases/download/v0.1/rtdetr_r50vd_2x_coco_objects365_from_paddle.pth)
27 | rtdetr_r101vd | COCO+Objects365 | 640 | 56.2 | 74.5 | 76 | 74 | [url*](https://github.com/lyuwenyu/storage/releases/download/v0.1/rtdetr_r101vd_2x_coco_objects365_from_paddle.pth)
28 |
29 | Notes
30 | - `COCO + Objects365` in the table means finetuned model on `COCO` using pretrained weights trained on `Objects365`.
31 | - `url``*` is the url of pretrained weights convert from paddle model for save energy. *It may have slight differences between this table and paper*
32 |
33 |
34 | ## Quick start
35 |
36 |
37 | Install
38 |
39 | ```bash
40 | pip install -r requirements.txt
41 | ```
42 |
43 |
44 |
45 |
46 |
47 | Data
48 |
49 | - Download and extract COCO 2017 train and val images.
50 | ```
51 | path/to/coco/
52 | annotations/ # annotation json files
53 | train2017/ # train images
54 | val2017/ # val images
55 | ```
56 | - Modify config [`img_folder`, `ann_file`](configs/dataset/coco_detection.yml)
57 |
58 |
59 |
60 |
61 |
62 | Training & Evaluation
63 |
64 | - Training on a Single GPU:
65 |
66 | ```shell
67 | # training on single-gpu
68 | export CUDA_VISIBLE_DEVICES=0
69 | python tools/train.py -c configs/rtdetr/rtdetr_r50vd_6x_coco.yml
70 | ```
71 |
72 | - Training on Multiple GPUs:
73 |
74 | ```shell
75 | # train on multi-gpu
76 | export CUDA_VISIBLE_DEVICES=0,1,2,3
77 | torchrun --nproc_per_node=4 tools/train.py -c configs/rtdetr/rtdetr_r50vd_6x_coco.yml
78 | ```
79 |
80 | - Evaluation on Multiple GPUs:
81 |
82 | ```shell
83 | # val on multi-gpu
84 | export CUDA_VISIBLE_DEVICES=0,1,2,3
85 | torchrun --nproc_per_node=4 tools/train.py -c configs/rtdetr/rtdetr_r50vd_6x_coco.yml -r path/to/checkpoint --test-only
86 | ```
87 |
88 |
89 |
90 |
91 |
92 |
93 | Export
94 |
95 | ```shell
96 | python tools/export_onnx.py -c configs/rtdetr/rtdetr_r18vd_6x_coco.yml -r path/to/checkpoint --check
97 | ```
98 |
99 |
100 |
101 |
102 |
103 |
104 | Train custom data
105 |
106 | 1. set `remap_mscoco_category: False`. This variable only works for ms-coco dataset. If you want to use `remap_mscoco_category` logic on your dataset, please modify variable [`mscoco_category2name`](https://github.com/lyuwenyu/RT-DETR/blob/main/rtdetr_pytorch/src/data/coco/coco_dataset.py#L154) based on your dataset.
107 |
108 | 2. add `-t path/to/checkpoint` (optinal) to tuning rtdetr based on pretrained checkpoint. see [training script details](./tools/README.md).
109 |
110 |
--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/modeling/backbones/transformer_utils.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | import paddle
16 | import paddle.nn as nn
17 | import paddle.nn.functional as F
18 |
19 | from paddle.nn.initializer import TruncatedNormal, Constant, Assign
20 |
21 | # Common initializations
22 | ones_ = Constant(value=1.)
23 | zeros_ = Constant(value=0.)
24 | trunc_normal_ = TruncatedNormal(std=.02)
25 |
26 |
27 | # Common Layers
28 | def drop_path(x, drop_prob=0., training=False):
29 | """
30 | Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).
31 | the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper...
32 | See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ...
33 | """
34 | if drop_prob == 0. or not training:
35 | return x
36 | keep_prob = paddle.to_tensor(1 - drop_prob, dtype=x.dtype)
37 | shape = (paddle.shape(x)[0], ) + (1, ) * (x.ndim - 1)
38 | random_tensor = keep_prob + paddle.rand(shape, dtype=x.dtype)
39 | random_tensor = paddle.floor(random_tensor) # binarize
40 | output = x.divide(keep_prob) * random_tensor
41 | return output
42 |
43 |
44 | class DropPath(nn.Layer):
45 | def __init__(self, drop_prob=None):
46 | super(DropPath, self).__init__()
47 | self.drop_prob = drop_prob
48 |
49 | def forward(self, x):
50 | return drop_path(x, self.drop_prob, self.training)
51 |
52 |
53 | class Identity(nn.Layer):
54 | def __init__(self):
55 | super(Identity, self).__init__()
56 |
57 | def forward(self, input):
58 | return input
59 |
60 |
61 | # common funcs
62 |
63 |
64 | def to_2tuple(x):
65 | if isinstance(x, (list, tuple)):
66 | return x
67 | return tuple([x] * 2)
68 |
69 |
70 | def add_parameter(layer, datas, name=None):
71 | parameter = layer.create_parameter(
72 | shape=(datas.shape), default_initializer=Assign(datas))
73 | if name:
74 | layer.add_parameter(name, parameter)
75 | return parameter
76 |
77 |
78 | def window_partition(x, window_size):
79 | """
80 | Partition into non-overlapping windows with padding if needed.
81 | Args:
82 | x (tensor): input tokens with [B, H, W, C].
83 | window_size (int): window size.
84 | Returns:
85 | windows: windows after partition with [B * num_windows, window_size, window_size, C].
86 | (Hp, Wp): padded height and width before partition
87 | """
88 | B, H, W, C = paddle.shape(x)
89 |
90 | pad_h = (window_size - H % window_size) % window_size
91 | pad_w = (window_size - W % window_size) % window_size
92 | x = F.pad(x.transpose([0, 3, 1, 2]),
93 | paddle.to_tensor(
94 | [0, int(pad_w), 0, int(pad_h)],
95 | dtype='int32')).transpose([0, 2, 3, 1])
96 | Hp, Wp = H + pad_h, W + pad_w
97 |
98 | num_h, num_w = Hp // window_size, Wp // window_size
99 |
100 | x = x.reshape([B, num_h, window_size, num_w, window_size, C])
101 | windows = x.transpose([0, 1, 3, 2, 4, 5]).reshape(
102 | [-1, window_size, window_size, C])
103 | return windows, (Hp, Wp), (num_h, num_w)
104 |
105 |
106 | def window_unpartition(x, pad_hw, num_hw, hw):
107 | """
108 | Window unpartition into original sequences and removing padding.
109 | Args:
110 | x (tensor): input tokens with [B * num_windows, window_size, window_size, C].
111 | pad_hw (Tuple): padded height and width (Hp, Wp).
112 | hw (Tuple): original height and width (H, W) before padding.
113 | Returns:
114 | x: unpartitioned sequences with [B, H, W, C].
115 | """
116 | Hp, Wp = pad_hw
117 | num_h, num_w = num_hw
118 | H, W = hw
119 | B, window_size, _, C = paddle.shape(x)
120 | B = B // (num_h * num_w)
121 | x = x.reshape([B, num_h, num_w, window_size, window_size, C])
122 | x = x.transpose([0, 1, 3, 2, 4, 5]).reshape([B, Hp, Wp, C])
123 |
124 | return x[:, :H, :W, :]
125 |
--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/utils/profiler.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | import sys
16 | import paddle
17 |
18 | # A global variable to record the number of calling times for profiler
19 | # functions. It is used to specify the tracing range of training steps.
20 | _profiler_step_id = 0
21 |
22 | # A global variable to avoid parsing from string every time.
23 | _profiler_options = None
24 |
25 |
26 | class ProfilerOptions(object):
27 | '''
28 | Use a string to initialize a ProfilerOptions.
29 | The string should be in the format: "key1=value1;key2=value;key3=value3".
30 | For example:
31 | "profile_path=model.profile"
32 | "batch_range=[50, 60]; profile_path=model.profile"
33 | "batch_range=[50, 60]; tracer_option=OpDetail; profile_path=model.profile"
34 |
35 | ProfilerOptions supports following key-value pair:
36 | batch_range - a integer list, e.g. [100, 110].
37 | state - a string, the optional values are 'CPU', 'GPU' or 'All'.
38 | sorted_key - a string, the optional values are 'calls', 'total',
39 | 'max', 'min' or 'ave.
40 | tracer_option - a string, the optional values are 'Default', 'OpDetail',
41 | 'AllOpDetail'.
42 | profile_path - a string, the path to save the serialized profile data,
43 | which can be used to generate a timeline.
44 | exit_on_finished - a boolean.
45 | '''
46 |
47 | def __init__(self, options_str):
48 | assert isinstance(options_str, str)
49 |
50 | self._options = {
51 | 'batch_range': [10, 20],
52 | 'state': 'All',
53 | 'sorted_key': 'total',
54 | 'tracer_option': 'Default',
55 | 'profile_path': '/tmp/profile',
56 | 'exit_on_finished': True
57 | }
58 | self._parse_from_string(options_str)
59 |
60 | def _parse_from_string(self, options_str):
61 | for kv in options_str.replace(' ', '').split(';'):
62 | key, value = kv.split('=')
63 | if key == 'batch_range':
64 | value_list = value.replace('[', '').replace(']', '').split(',')
65 | value_list = list(map(int, value_list))
66 | if len(value_list) >= 2 and value_list[0] >= 0 and value_list[
67 | 1] > value_list[0]:
68 | self._options[key] = value_list
69 | elif key == 'exit_on_finished':
70 | self._options[key] = value.lower() in ("yes", "true", "t", "1")
71 | elif key in [
72 | 'state', 'sorted_key', 'tracer_option', 'profile_path'
73 | ]:
74 | self._options[key] = value
75 |
76 | def __getitem__(self, name):
77 | if self._options.get(name, None) is None:
78 | raise ValueError(
79 | "ProfilerOptions does not have an option named %s." % name)
80 | return self._options[name]
81 |
82 |
83 | def add_profiler_step(options_str=None):
84 | '''
85 | Enable the operator-level timing using PaddlePaddle's profiler.
86 | The profiler uses a independent variable to count the profiler steps.
87 | One call of this function is treated as a profiler step.
88 |
89 | Args:
90 | profiler_options - a string to initialize the ProfilerOptions.
91 | Default is None, and the profiler is disabled.
92 | '''
93 | if options_str is None:
94 | return
95 |
96 | global _profiler_step_id
97 | global _profiler_options
98 |
99 | if _profiler_options is None:
100 | _profiler_options = ProfilerOptions(options_str)
101 |
102 | if _profiler_step_id == _profiler_options['batch_range'][0]:
103 | paddle.utils.profiler.start_profiler(_profiler_options['state'],
104 | _profiler_options['tracer_option'])
105 | elif _profiler_step_id == _profiler_options['batch_range'][1]:
106 | paddle.utils.profiler.stop_profiler(_profiler_options['sorted_key'],
107 | _profiler_options['profile_path'])
108 | if _profiler_options['exit_on_finished']:
109 | sys.exit(0)
110 |
111 | _profiler_step_id += 1
112 |
--------------------------------------------------------------------------------
/rtdetr_pytorch/src/data/transforms.py:
--------------------------------------------------------------------------------
1 | """"by lyuwenyu
2 | """
3 |
4 |
5 | import torch
6 | import torch.nn as nn
7 |
8 | import torchvision
9 | torchvision.disable_beta_transforms_warning()
10 | from torchvision import datapoints
11 |
12 | import torchvision.transforms.v2 as T
13 | import torchvision.transforms.v2.functional as F
14 |
15 | from PIL import Image
16 | from typing import Any, Dict, List, Optional
17 |
18 | from src.core import register, GLOBAL_CONFIG
19 |
20 |
21 | __all__ = ['Compose', ]
22 |
23 |
24 | RandomPhotometricDistort = register(T.RandomPhotometricDistort)
25 | RandomZoomOut = register(T.RandomZoomOut)
26 | # RandomIoUCrop = register(T.RandomIoUCrop)
27 | RandomHorizontalFlip = register(T.RandomHorizontalFlip)
28 | Resize = register(T.Resize)
29 | ToImageTensor = register(T.ToImageTensor)
30 | ConvertDtype = register(T.ConvertDtype)
31 | SanitizeBoundingBox = register(T.SanitizeBoundingBox)
32 | RandomCrop = register(T.RandomCrop)
33 | Normalize = register(T.Normalize)
34 |
35 |
36 |
37 | @register
38 | class Compose(T.Compose):
39 | def __init__(self, ops) -> None:
40 | transforms = []
41 | if ops is not None:
42 | for op in ops:
43 | if isinstance(op, dict):
44 | name = op.pop('type')
45 | transfom = getattr(GLOBAL_CONFIG[name]['_pymodule'], name)(**op)
46 | transforms.append(transfom)
47 | # op['type'] = name
48 | elif isinstance(op, nn.Module):
49 | transforms.append(op)
50 |
51 | else:
52 | raise ValueError('')
53 | else:
54 | transforms =[EmptyTransform(), ]
55 |
56 | super().__init__(transforms=transforms)
57 |
58 |
59 | @register
60 | class EmptyTransform(T.Transform):
61 | def __init__(self, ) -> None:
62 | super().__init__()
63 |
64 | def forward(self, *inputs):
65 | inputs = inputs if len(inputs) > 1 else inputs[0]
66 | return inputs
67 |
68 |
69 | @register
70 | class PadToSize(T.Pad):
71 | _transformed_types = (
72 | Image.Image,
73 | datapoints.Image,
74 | datapoints.Video,
75 | datapoints.Mask,
76 | datapoints.BoundingBox,
77 | )
78 | def _get_params(self, flat_inputs: List[Any]) -> Dict[str, Any]:
79 | sz = F.get_spatial_size(flat_inputs[0])
80 | h, w = self.spatial_size[0] - sz[0], self.spatial_size[1] - sz[1]
81 | self.padding = [0, 0, w, h]
82 | return dict(padding=self.padding)
83 |
84 | def __init__(self, spatial_size, fill=0, padding_mode='constant') -> None:
85 | if isinstance(spatial_size, int):
86 | spatial_size = (spatial_size, spatial_size)
87 |
88 | self.spatial_size = spatial_size
89 | super().__init__(0, fill, padding_mode)
90 |
91 | def _transform(self, inpt: Any, params: Dict[str, Any]) -> Any:
92 | fill = self._fill[type(inpt)]
93 | padding = params['padding']
94 | return F.pad(inpt, padding=padding, fill=fill, padding_mode=self.padding_mode) # type: ignore[arg-type]
95 |
96 | def __call__(self, *inputs: Any) -> Any:
97 | outputs = super().forward(*inputs)
98 | if len(outputs) > 1 and isinstance(outputs[1], dict):
99 | outputs[1]['padding'] = torch.tensor(self.padding)
100 | return outputs
101 |
102 |
103 | @register
104 | class RandomIoUCrop(T.RandomIoUCrop):
105 | def __init__(self, min_scale: float = 0.3, max_scale: float = 1, min_aspect_ratio: float = 0.5, max_aspect_ratio: float = 2, sampler_options: Optional[List[float]] = None, trials: int = 40, p: float = 1.0):
106 | super().__init__(min_scale, max_scale, min_aspect_ratio, max_aspect_ratio, sampler_options, trials)
107 | self.p = p
108 |
109 | def __call__(self, *inputs: Any) -> Any:
110 | if torch.rand(1) >= self.p:
111 | return inputs if len(inputs) > 1 else inputs[0]
112 |
113 | return super().forward(*inputs)
114 |
115 |
116 | @register
117 | class ConvertBox(T.Transform):
118 | _transformed_types = (
119 | datapoints.BoundingBox,
120 | )
121 | def __init__(self, out_fmt='', normalize=False) -> None:
122 | super().__init__()
123 | self.out_fmt = out_fmt
124 | self.normalize = normalize
125 |
126 | self.data_fmt = {
127 | 'xyxy': datapoints.BoundingBoxFormat.XYXY,
128 | 'cxcywh': datapoints.BoundingBoxFormat.CXCYWH
129 | }
130 |
131 | def _transform(self, inpt: Any, params: Dict[str, Any]) -> Any:
132 | if self.out_fmt:
133 | spatial_size = inpt.spatial_size
134 | in_fmt = inpt.format.value.lower()
135 | inpt = torchvision.ops.box_convert(inpt, in_fmt=in_fmt, out_fmt=self.out_fmt)
136 | inpt = datapoints.BoundingBox(inpt, format=self.data_fmt[self.out_fmt], spatial_size=spatial_size)
137 |
138 | if self.normalize:
139 | inpt = inpt / torch.tensor(inpt.spatial_size[::-1]).tile(2)[None]
140 |
141 | return inpt
142 |
143 |
--------------------------------------------------------------------------------
/rtdetr_pytorch/tools/export_onnx.py:
--------------------------------------------------------------------------------
1 | """by lyuwenyu
2 | """
3 |
4 | import os
5 | import sys
6 | sys.path.insert(0, os.path.join(os.path.dirname(os.path.abspath(__file__)), '..'))
7 |
8 | import argparse
9 | import numpy as np
10 |
11 | from src.core import YAMLConfig
12 |
13 | import torch
14 | import torch.nn as nn
15 |
16 |
17 | def main(args, ):
18 | """main
19 | """
20 | cfg = YAMLConfig(args.config, resume=args.resume)
21 |
22 | if args.resume:
23 | checkpoint = torch.load(args.resume, map_location='cpu')
24 | if 'ema' in checkpoint:
25 | state = checkpoint['ema']['module']
26 | else:
27 | state = checkpoint['model']
28 | else:
29 | raise AttributeError('only support resume to load model.state_dict by now.')
30 |
31 | # NOTE load train mode state -> convert to deploy mode
32 | cfg.model.load_state_dict(state)
33 |
34 | class Model(nn.Module):
35 | def __init__(self, ) -> None:
36 | super().__init__()
37 | self.model = cfg.model.deploy()
38 | self.postprocessor = cfg.postprocessor.deploy()
39 | print(self.postprocessor.deploy_mode)
40 |
41 | def forward(self, images, orig_target_sizes):
42 | outputs = self.model(images)
43 | return self.postprocessor(outputs, orig_target_sizes)
44 |
45 |
46 | model = Model()
47 |
48 | dynamic_axes = {
49 | 'images': {0: 'N', },
50 | 'orig_target_sizes': {0: 'N'}
51 | }
52 |
53 | data = torch.rand(1, 3, 640, 640)
54 | size = torch.tensor([[640, 640]])
55 |
56 | torch.onnx.export(
57 | model,
58 | (data, size),
59 | args.file_name,
60 | input_names=['images', 'orig_target_sizes'],
61 | output_names=['labels', 'boxes', 'scores'],
62 | dynamic_axes=dynamic_axes,
63 | opset_version=16,
64 | verbose=False
65 | )
66 |
67 |
68 | if args.check:
69 | import onnx
70 | onnx_model = onnx.load(args.file_name)
71 | onnx.checker.check_model(onnx_model)
72 | print('Check export onnx model done...')
73 |
74 |
75 | if args.simplify:
76 | import onnxsim
77 | dynamic = True
78 | input_shapes = {'images': data.shape, 'orig_target_sizes': size.shape} if dynamic else None
79 | onnx_model_simplify, check = onnxsim.simplify(args.file_name, input_shapes=input_shapes, dynamic_input_shape=dynamic)
80 | onnx.save(onnx_model_simplify, args.file_name)
81 | print(f'Simplify onnx model {check}...')
82 |
83 |
84 | # import onnxruntime as ort
85 | # from PIL import Image, ImageDraw, ImageFont
86 | # from torchvision.transforms import ToTensor
87 | # from src.data.coco.coco_dataset import mscoco_category2name, mscoco_category2label, mscoco_label2category
88 |
89 | # # print(onnx.helper.printable_graph(mm.graph))
90 |
91 | # # Load the original image without resizing
92 | # original_im = Image.open('./hongkong.jpg').convert('RGB')
93 | # original_size = original_im.size
94 |
95 | # # Resize the image for model input
96 | # im = original_im.resize((640, 640))
97 | # im_data = ToTensor()(im)[None]
98 | # print(im_data.shape)
99 |
100 | # sess = ort.InferenceSession(args.file_name)
101 | # output = sess.run(
102 | # # output_names=['labels', 'boxes', 'scores'],
103 | # output_names=None,
104 | # input_feed={'images': im_data.data.numpy(), "orig_target_sizes": size.data.numpy()}
105 | # )
106 |
107 | # # print(type(output))
108 | # # print([out.shape for out in output])
109 |
110 | # labels, boxes, scores = output
111 |
112 | # draw = ImageDraw.Draw(original_im) # Draw on the original image
113 | # thrh = 0.6
114 |
115 | # for i in range(im_data.shape[0]):
116 |
117 | # scr = scores[i]
118 | # lab = labels[i][scr > thrh]
119 | # box = boxes[i][scr > thrh]
120 |
121 | # print(i, sum(scr > thrh))
122 |
123 | # for b, l in zip(box, lab):
124 | # # Scale the bounding boxes back to the original image size
125 | # b = [coord * original_size[j % 2] / 640 for j, coord in enumerate(b)]
126 | # # Get the category name from the label
127 | # category_name = mscoco_category2name[mscoco_label2category[l]]
128 | # draw.rectangle(list(b), outline='red', width=2)
129 | # font = ImageFont.truetype("Arial.ttf", 15)
130 | # draw.text((b[0], b[1]), text=category_name, fill='yellow', font=font)
131 |
132 | # # Save the original image with bounding boxes
133 | # original_im.save('test.jpg')
134 |
135 |
136 | if __name__ == '__main__':
137 |
138 | parser = argparse.ArgumentParser()
139 | parser.add_argument('--config', '-c', type=str, )
140 | parser.add_argument('--resume', '-r', type=str, )
141 | parser.add_argument('--file-name', '-f', type=str, default='model.onnx')
142 | parser.add_argument('--check', action='store_true', default=False,)
143 | parser.add_argument('--simplify', action='store_true', default=False,)
144 |
145 | args = parser.parse_args()
146 |
147 | main(args)
148 |
--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/modeling/architectures/meta_arch.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 |
5 | import numpy as np
6 | import paddle
7 | import paddle.nn as nn
8 | import typing
9 |
10 | from ppdet.core.workspace import register
11 | from ppdet.modeling.post_process import nms
12 |
13 | __all__ = ['BaseArch']
14 |
15 |
16 | @register
17 | class BaseArch(nn.Layer):
18 | def __init__(self, data_format='NCHW', use_extra_data=False):
19 | super(BaseArch, self).__init__()
20 | self.data_format = data_format
21 | self.inputs = {}
22 | self.fuse_norm = False
23 | self.use_extra_data = use_extra_data
24 |
25 | def load_meanstd(self, cfg_transform):
26 | scale = 1.
27 | mean = np.array([0.485, 0.456, 0.406], dtype=np.float32)
28 | std = np.array([0.229, 0.224, 0.225], dtype=np.float32)
29 | for item in cfg_transform:
30 | if 'NormalizeImage' in item:
31 | mean = np.array(
32 | item['NormalizeImage']['mean'], dtype=np.float32)
33 | std = np.array(item['NormalizeImage']['std'], dtype=np.float32)
34 | if item['NormalizeImage'].get('is_scale', True):
35 | scale = 1. / 255.
36 | break
37 | if self.data_format == 'NHWC':
38 | self.scale = paddle.to_tensor(scale / std).reshape((1, 1, 1, 3))
39 | self.bias = paddle.to_tensor(-mean / std).reshape((1, 1, 1, 3))
40 | else:
41 | self.scale = paddle.to_tensor(scale / std).reshape((1, 3, 1, 1))
42 | self.bias = paddle.to_tensor(-mean / std).reshape((1, 3, 1, 1))
43 |
44 | def forward(self, inputs):
45 | if self.data_format == 'NHWC':
46 | image = inputs['image']
47 | inputs['image'] = paddle.transpose(image, [0, 2, 3, 1])
48 |
49 | if self.fuse_norm:
50 | image = inputs['image']
51 | self.inputs['image'] = image * self.scale + self.bias
52 | self.inputs['im_shape'] = inputs['im_shape']
53 | self.inputs['scale_factor'] = inputs['scale_factor']
54 | else:
55 | self.inputs = inputs
56 |
57 | self.model_arch()
58 |
59 | if self.training:
60 | out = self.get_loss()
61 | else:
62 | inputs_list = []
63 | # multi-scale input
64 | if not isinstance(inputs, typing.Sequence):
65 | inputs_list.append(inputs)
66 | else:
67 | inputs_list.extend(inputs)
68 | outs = []
69 | for inp in inputs_list:
70 | if self.fuse_norm:
71 | self.inputs['image'] = inp['image'] * self.scale + self.bias
72 | self.inputs['im_shape'] = inp['im_shape']
73 | self.inputs['scale_factor'] = inp['scale_factor']
74 | else:
75 | self.inputs = inp
76 | outs.append(self.get_pred())
77 |
78 | # multi-scale test
79 | if len(outs) > 1:
80 | out = self.merge_multi_scale_predictions(outs)
81 | else:
82 | out = outs[0]
83 | return out
84 |
85 | def merge_multi_scale_predictions(self, outs):
86 | # default values for architectures not included in following list
87 | num_classes = 80
88 | nms_threshold = 0.5
89 | keep_top_k = 100
90 |
91 | if self.__class__.__name__ in ('CascadeRCNN', 'FasterRCNN', 'MaskRCNN'):
92 | num_classes = self.bbox_head.num_classes
93 | keep_top_k = self.bbox_post_process.nms.keep_top_k
94 | nms_threshold = self.bbox_post_process.nms.nms_threshold
95 | else:
96 | raise Exception(
97 | "Multi scale test only supports CascadeRCNN, FasterRCNN and MaskRCNN for now"
98 | )
99 |
100 | final_boxes = []
101 | all_scale_outs = paddle.concat([o['bbox'] for o in outs]).numpy()
102 | for c in range(num_classes):
103 | idxs = all_scale_outs[:, 0] == c
104 | if np.count_nonzero(idxs) == 0:
105 | continue
106 | r = nms(all_scale_outs[idxs, 1:], nms_threshold)
107 | final_boxes.append(
108 | np.concatenate([np.full((r.shape[0], 1), c), r], 1))
109 | out = np.concatenate(final_boxes)
110 | out = np.concatenate(sorted(
111 | out, key=lambda e: e[1])[-keep_top_k:]).reshape((-1, 6))
112 | out = {
113 | 'bbox': paddle.to_tensor(out),
114 | 'bbox_num': paddle.to_tensor(np.array([out.shape[0], ]))
115 | }
116 |
117 | return out
118 |
119 | def build_inputs(self, data, input_def):
120 | inputs = {}
121 | for i, k in enumerate(input_def):
122 | inputs[k] = data[i]
123 | return inputs
124 |
125 | def model_arch(self, ):
126 | pass
127 |
128 | def get_loss(self, ):
129 | raise NotImplementedError("Should implement get_loss method!")
130 |
131 | def get_pred(self, ):
132 | raise NotImplementedError("Should implement get_pred method!")
133 |
--------------------------------------------------------------------------------
/rtdetr_pytorch/src/zoo/rtdetr/matcher.py:
--------------------------------------------------------------------------------
1 | """
2 | Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
3 | Modules to compute the matching cost and solve the corresponding LSAP.
4 |
5 | by lyuwenyu
6 | """
7 |
8 | import torch
9 | import torch.nn.functional as F
10 |
11 | from scipy.optimize import linear_sum_assignment
12 | from torch import nn
13 |
14 | from .box_ops import box_cxcywh_to_xyxy, generalized_box_iou
15 |
16 | from src.core import register
17 |
18 |
19 | @register
20 | class HungarianMatcher(nn.Module):
21 | """This class computes an assignment between the targets and the predictions of the network
22 |
23 | For efficiency reasons, the targets don't include the no_object. Because of this, in general,
24 | there are more predictions than targets. In this case, we do a 1-to-1 matching of the best predictions,
25 | while the others are un-matched (and thus treated as non-objects).
26 | """
27 |
28 | __share__ = ['use_focal_loss', ]
29 |
30 | def __init__(self, weight_dict, use_focal_loss=False, alpha=0.25, gamma=2.0):
31 | """Creates the matcher
32 |
33 | Params:
34 | cost_class: This is the relative weight of the classification error in the matching cost
35 | cost_bbox: This is the relative weight of the L1 error of the bounding box coordinates in the matching cost
36 | cost_giou: This is the relative weight of the giou loss of the bounding box in the matching cost
37 | """
38 | super().__init__()
39 | self.cost_class = weight_dict['cost_class']
40 | self.cost_bbox = weight_dict['cost_bbox']
41 | self.cost_giou = weight_dict['cost_giou']
42 |
43 | self.use_focal_loss = use_focal_loss
44 | self.alpha = alpha
45 | self.gamma = gamma
46 |
47 | assert self.cost_class != 0 or self.cost_bbox != 0 or self.cost_giou != 0, "all costs cant be 0"
48 |
49 | @torch.no_grad()
50 | def forward(self, outputs, targets):
51 | """ Performs the matching
52 |
53 | Params:
54 | outputs: This is a dict that contains at least these entries:
55 | "pred_logits": Tensor of dim [batch_size, num_queries, num_classes] with the classification logits
56 | "pred_boxes": Tensor of dim [batch_size, num_queries, 4] with the predicted box coordinates
57 |
58 | targets: This is a list of targets (len(targets) = batch_size), where each target is a dict containing:
59 | "labels": Tensor of dim [num_target_boxes] (where num_target_boxes is the number of ground-truth
60 | objects in the target) containing the class labels
61 | "boxes": Tensor of dim [num_target_boxes, 4] containing the target box coordinates
62 |
63 | Returns:
64 | A list of size batch_size, containing tuples of (index_i, index_j) where:
65 | - index_i is the indices of the selected predictions (in order)
66 | - index_j is the indices of the corresponding selected targets (in order)
67 | For each batch element, it holds:
68 | len(index_i) = len(index_j) = min(num_queries, num_target_boxes)
69 | """
70 | bs, num_queries = outputs["pred_logits"].shape[:2]
71 |
72 | # We flatten to compute the cost matrices in a batch
73 | if self.use_focal_loss:
74 | out_prob = F.sigmoid(outputs["pred_logits"].flatten(0, 1))
75 | else:
76 | out_prob = outputs["pred_logits"].flatten(0, 1).softmax(-1) # [batch_size * num_queries, num_classes]
77 |
78 | out_bbox = outputs["pred_boxes"].flatten(0, 1) # [batch_size * num_queries, 4]
79 |
80 | # Also concat the target labels and boxes
81 | tgt_ids = torch.cat([v["labels"] for v in targets])
82 | tgt_bbox = torch.cat([v["boxes"] for v in targets])
83 |
84 | # Compute the classification cost. Contrary to the loss, we don't use the NLL,
85 | # but approximate it in 1 - proba[target class].
86 | # The 1 is a constant that doesn't change the matching, it can be ommitted.
87 | if self.use_focal_loss:
88 | out_prob = out_prob[:, tgt_ids]
89 | neg_cost_class = (1 - self.alpha) * (out_prob**self.gamma) * (-(1 - out_prob + 1e-8).log())
90 | pos_cost_class = self.alpha * ((1 - out_prob)**self.gamma) * (-(out_prob + 1e-8).log())
91 | cost_class = pos_cost_class - neg_cost_class
92 | else:
93 | cost_class = -out_prob[:, tgt_ids]
94 |
95 | # Compute the L1 cost between boxes
96 | cost_bbox = torch.cdist(out_bbox, tgt_bbox, p=1)
97 |
98 | # Compute the giou cost betwen boxes
99 | cost_giou = -generalized_box_iou(box_cxcywh_to_xyxy(out_bbox), box_cxcywh_to_xyxy(tgt_bbox))
100 |
101 | # Final cost matrix
102 | C = self.cost_bbox * cost_bbox + self.cost_class * cost_class + self.cost_giou * cost_giou
103 | C = C.view(bs, num_queries, -1).cpu()
104 |
105 | sizes = [len(v["boxes"]) for v in targets]
106 | indices = [linear_sum_assignment(c[i]) for i, c in enumerate(C.split(sizes, -1))]
107 |
108 | return [(torch.as_tensor(i, dtype=torch.int64), torch.as_tensor(j, dtype=torch.int64)) for i, j in indices]
109 |
--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/utils/check.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | from __future__ import absolute_import
16 | from __future__ import division
17 | from __future__ import print_function
18 |
19 | import sys
20 |
21 | import paddle
22 | import six
23 | import paddle.version as paddle_version
24 |
25 | from .logger import setup_logger
26 | logger = setup_logger(__name__)
27 |
28 | __all__ = [
29 | 'check_gpu', 'check_npu', 'check_xpu', 'check_mlu', 'check_version',
30 | 'check_config'
31 | ]
32 |
33 |
34 | def check_mlu(use_mlu):
35 | """
36 | Log error and exit when set use_mlu=true in paddlepaddle
37 | cpu/gpu/xpu/npu version.
38 | """
39 | err = "Config use_mlu cannot be set as true while you are " \
40 | "using paddlepaddle cpu/gpu/xpu/npu version ! \nPlease try: \n" \
41 | "\t1. Install paddlepaddle-mlu to run model on MLU \n" \
42 | "\t2. Set use_mlu as false in config file to run " \
43 | "model on CPU/GPU/XPU/NPU"
44 |
45 | try:
46 | if use_mlu and not paddle.is_compiled_with_mlu():
47 | logger.error(err)
48 | sys.exit(1)
49 | except Exception as e:
50 | pass
51 |
52 |
53 | def check_npu(use_npu):
54 | """
55 | Log error and exit when set use_npu=true in paddlepaddle
56 | version without paddle-custom-npu installed.
57 | """
58 | err = "Config use_npu cannot be set as true while you are " \
59 | "using paddlepaddle version without paddle-custom-npu " \
60 | "installed! \nPlease try: \n" \
61 | "\t1. Install paddle-custom-npu to run model on NPU \n" \
62 | "\t2. Set use_npu as false in config file to run " \
63 | "model on other devices supported."
64 |
65 | try:
66 | if use_npu and not 'npu' in paddle.device.get_all_custom_device_type():
67 | logger.error(err)
68 | sys.exit(1)
69 | except Exception as e:
70 | pass
71 |
72 |
73 | def check_xpu(use_xpu):
74 | """
75 | Log error and exit when set use_xpu=true in paddlepaddle
76 | cpu/gpu/npu version.
77 | """
78 | err = "Config use_xpu cannot be set as true while you are " \
79 | "using paddlepaddle cpu/gpu/npu version ! \nPlease try: \n" \
80 | "\t1. Install paddlepaddle-xpu to run model on XPU \n" \
81 | "\t2. Set use_xpu as false in config file to run " \
82 | "model on CPU/GPU/NPU"
83 |
84 | try:
85 | if use_xpu and not paddle.is_compiled_with_xpu():
86 | logger.error(err)
87 | sys.exit(1)
88 | except Exception as e:
89 | pass
90 |
91 |
92 | def check_gpu(use_gpu):
93 | """
94 | Log error and exit when set use_gpu=true in paddlepaddle
95 | cpu version.
96 | """
97 | err = "Config use_gpu cannot be set as true while you are " \
98 | "using paddlepaddle cpu version ! \nPlease try: \n" \
99 | "\t1. Install paddlepaddle-gpu to run model on GPU \n" \
100 | "\t2. Set use_gpu as false in config file to run " \
101 | "model on CPU"
102 |
103 | try:
104 | if use_gpu and not paddle.is_compiled_with_cuda():
105 | logger.error(err)
106 | sys.exit(1)
107 | except Exception as e:
108 | pass
109 |
110 |
111 | def check_version(version='2.2'):
112 | """
113 | Log error and exit when the installed version of paddlepaddle is
114 | not satisfied.
115 | """
116 | err = "PaddlePaddle version {} or higher is required, " \
117 | "or a suitable develop version is satisfied as well. \n" \
118 | "Please make sure the version is good with your code.".format(version)
119 |
120 | version_installed = [
121 | paddle_version.major, paddle_version.minor, paddle_version.patch,
122 | paddle_version.rc
123 | ]
124 |
125 | if version_installed == ['0', '0', '0', '0']:
126 | return
127 |
128 | version_split = version.split('.')
129 |
130 | length = min(len(version_installed), len(version_split))
131 | for i in six.moves.range(length):
132 | if version_installed[i] > version_split[i]:
133 | return
134 | if version_installed[i] < version_split[i]:
135 | raise Exception(err)
136 |
137 |
138 | def check_config(cfg):
139 | """
140 | Check the correctness of the configuration file. Log error and exit
141 | when Config is not compliant.
142 | """
143 | err = "'{}' not specified in config file. Please set it in config file."
144 | check_list = ['architecture', 'num_classes']
145 | try:
146 | for var in check_list:
147 | if not var in cfg:
148 | logger.error(err.format(var))
149 | sys.exit(1)
150 | except Exception as e:
151 | pass
152 |
153 | if 'log_iter' not in cfg:
154 | cfg.log_iter = 20
155 |
156 | return cfg
157 |
--------------------------------------------------------------------------------
/rtdetr_pytorch/src/misc/dist.py:
--------------------------------------------------------------------------------
1 | """
2 | reference
3 | - https://github.com/pytorch/vision/blob/main/references/detection/utils.py
4 | - https://github.com/facebookresearch/detr/blob/master/util/misc.py#L406
5 |
6 | by lyuwenyu
7 | """
8 |
9 | import random
10 | import numpy as np
11 |
12 | import torch
13 | import torch.nn as nn
14 | import torch.distributed
15 | import torch.distributed as tdist
16 |
17 | from torch.nn.parallel import DistributedDataParallel as DDP
18 |
19 | from torch.utils.data import DistributedSampler
20 | from torch.utils.data.dataloader import DataLoader
21 |
22 |
23 | def init_distributed():
24 | '''
25 | distributed setup
26 | args:
27 | backend (str), ('nccl', 'gloo')
28 | '''
29 | try:
30 | # # https://pytorch.org/docs/stable/elastic/run.html
31 | # LOCAL_RANK = int(os.getenv('LOCAL_RANK', -1))
32 | # RANK = int(os.getenv('RANK', -1))
33 | # WORLD_SIZE = int(os.getenv('WORLD_SIZE', 1))
34 |
35 | tdist.init_process_group(init_method='env://', )
36 | torch.distributed.barrier()
37 |
38 | rank = get_rank()
39 | device = torch.device(f'cuda:{rank}')
40 | torch.cuda.set_device(device)
41 |
42 | setup_print(rank == 0)
43 | print('Initialized distributed mode...')
44 |
45 | return True
46 |
47 | except:
48 | print('Not init distributed mode.')
49 | return False
50 |
51 |
52 | def setup_print(is_main):
53 | '''This function disables printing when not in master process
54 | '''
55 | import builtins as __builtin__
56 | builtin_print = __builtin__.print
57 |
58 | def print(*args, **kwargs):
59 | force = kwargs.pop('force', False)
60 | if is_main or force:
61 | builtin_print(*args, **kwargs)
62 |
63 | __builtin__.print = print
64 |
65 |
66 | def is_dist_available_and_initialized():
67 | if not tdist.is_available():
68 | return False
69 | if not tdist.is_initialized():
70 | return False
71 | return True
72 |
73 |
74 | def get_rank():
75 | if not is_dist_available_and_initialized():
76 | return 0
77 | return tdist.get_rank()
78 |
79 |
80 | def get_world_size():
81 | if not is_dist_available_and_initialized():
82 | return 1
83 | return tdist.get_world_size()
84 |
85 |
86 | def is_main_process():
87 | return get_rank() == 0
88 |
89 |
90 | def save_on_master(*args, **kwargs):
91 | if is_main_process():
92 | torch.save(*args, **kwargs)
93 |
94 |
95 |
96 | def warp_model(model, find_unused_parameters=False, sync_bn=False,):
97 | if is_dist_available_and_initialized():
98 | rank = get_rank()
99 | model = nn.SyncBatchNorm.convert_sync_batchnorm(model) if sync_bn else model
100 | model = DDP(model, device_ids=[rank], output_device=rank, find_unused_parameters=find_unused_parameters)
101 | return model
102 |
103 |
104 | def warp_loader(loader, shuffle=False):
105 | if is_dist_available_and_initialized():
106 | sampler = DistributedSampler(loader.dataset, shuffle=shuffle)
107 | loader = DataLoader(loader.dataset,
108 | loader.batch_size,
109 | sampler=sampler,
110 | drop_last=loader.drop_last,
111 | collate_fn=loader.collate_fn,
112 | pin_memory=loader.pin_memory,
113 | num_workers=loader.num_workers, )
114 | return loader
115 |
116 |
117 |
118 | def is_parallel(model) -> bool:
119 | # Returns True if model is of type DP or DDP
120 | return type(model) in (torch.nn.parallel.DataParallel, torch.nn.parallel.DistributedDataParallel)
121 |
122 |
123 | def de_parallel(model) -> nn.Module:
124 | # De-parallelize a model: returns single-GPU model if model is of type DP or DDP
125 | return model.module if is_parallel(model) else model
126 |
127 |
128 | def reduce_dict(data, avg=True):
129 | '''
130 | Args
131 | data dict: input, {k: v, ...}
132 | avg bool: true
133 | '''
134 | world_size = get_world_size()
135 | if world_size < 2:
136 | return data
137 |
138 | with torch.no_grad():
139 | keys, values = [], []
140 | for k in sorted(data.keys()):
141 | keys.append(k)
142 | values.append(data[k])
143 |
144 | values = torch.stack(values, dim=0)
145 | tdist.all_reduce(values)
146 |
147 | if avg is True:
148 | values /= world_size
149 |
150 | _data = {k: v for k, v in zip(keys, values)}
151 |
152 | return _data
153 |
154 |
155 |
156 | def all_gather(data):
157 | """
158 | Run all_gather on arbitrary picklable data (not necessarily tensors)
159 | Args:
160 | data: any picklable object
161 | Returns:
162 | list[data]: list of data gathered from each rank
163 | """
164 | world_size = get_world_size()
165 | if world_size == 1:
166 | return [data]
167 | data_list = [None] * world_size
168 | tdist.all_gather_object(data_list, data)
169 | return data_list
170 |
171 |
172 | import time
173 | def sync_time():
174 | '''sync_time
175 | '''
176 | if torch.cuda.is_available():
177 | torch.cuda.synchronize()
178 |
179 | return time.time()
180 |
181 |
182 |
183 | def set_seed(seed):
184 | # fix the seed for reproducibility
185 | seed = seed + get_rank()
186 | torch.manual_seed(seed)
187 | np.random.seed(seed)
188 | random.seed(seed)
189 |
190 |
191 |
--------------------------------------------------------------------------------
/rtdetr_paddle/ppdet/modeling/transformers/ext_op/test_ms_deformable_attn_op.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 |
15 | from __future__ import absolute_import
16 | from __future__ import print_function
17 | from __future__ import division
18 |
19 | import os
20 | import sys
21 | import random
22 | import numpy as np
23 | import paddle
24 | # add python path of PaddleDetection to sys.path
25 | parent_path = os.path.abspath(os.path.join(__file__, *(['..'] * 5)))
26 | if parent_path not in sys.path:
27 | sys.path.append(parent_path)
28 |
29 | from ppdet.modeling.transformers.utils import deformable_attention_core_func
30 | ms_deform_attn_core_paddle = deformable_attention_core_func
31 |
32 | try:
33 | gpu_index = int(sys.argv[1])
34 | except:
35 | gpu_index = 0
36 | print(f'Use gpu {gpu_index} to test...')
37 | paddle.set_device(f'gpu:{gpu_index}')
38 |
39 | try:
40 | from deformable_detr_ops import ms_deformable_attn
41 | except Exception as e:
42 | print('import deformable_detr_ops error', e)
43 | sys.exit(-1)
44 |
45 | paddle.seed(1)
46 | random.seed(1)
47 | np.random.seed(1)
48 |
49 | bs, n_heads, c = 2, 8, 8
50 | query_length, n_levels, n_points = 2, 2, 2
51 | spatial_shapes = paddle.to_tensor([(6, 4), (3, 2)], dtype=paddle.int64)
52 | level_start_index = paddle.concat((paddle.to_tensor(
53 | [0], dtype=paddle.int64), spatial_shapes.prod(1).cumsum(0)[:-1]))
54 | value_length = sum([(H * W).item() for H, W in spatial_shapes])
55 |
56 |
57 | def get_test_tensors(channels):
58 | value = paddle.rand(
59 | [bs, value_length, n_heads, channels], dtype=paddle.float32) * 0.01
60 | sampling_locations = paddle.rand(
61 | [bs, query_length, n_heads, n_levels, n_points, 2],
62 | dtype=paddle.float32)
63 | attention_weights = paddle.rand(
64 | [bs, query_length, n_heads, n_levels, n_points],
65 | dtype=paddle.float32) + 1e-5
66 | attention_weights /= attention_weights.sum(-1, keepdim=True).sum(
67 | -2, keepdim=True)
68 |
69 | return [value, sampling_locations, attention_weights]
70 |
71 |
72 | @paddle.no_grad()
73 | def check_forward_equal_with_paddle_float():
74 | value, sampling_locations, attention_weights = get_test_tensors(c)
75 |
76 | output_paddle = ms_deform_attn_core_paddle(
77 | value, spatial_shapes, level_start_index, sampling_locations,
78 | attention_weights).detach().cpu()
79 | output_cuda = ms_deformable_attn(value, spatial_shapes, level_start_index,
80 | sampling_locations,
81 | attention_weights).detach().cpu()
82 | fwdok = paddle.allclose(
83 | output_cuda, output_paddle, rtol=1e-2, atol=1e-3).item()
84 | max_abs_err = (output_cuda - output_paddle).abs().max().item()
85 | max_rel_err = (
86 | (output_cuda - output_paddle).abs() / output_paddle.abs()).max().item()
87 |
88 | print(
89 | f'*{fwdok} check_forward_equal_with_paddle_float: max_abs_err {max_abs_err:.2e} max_rel_err {max_rel_err:.2e}'
90 | )
91 |
92 |
93 | def check_gradient_numerical(channels=4):
94 | value_paddle, sampling_locations_paddle, attention_weights_paddle = get_test_tensors(
95 | channels)
96 | value_paddle.stop_gradient = False
97 | sampling_locations_paddle.stop_gradient = False
98 | attention_weights_paddle.stop_gradient = False
99 |
100 | value_cuda = value_paddle.detach().clone()
101 | sampling_locations_cuda = sampling_locations_paddle.detach().clone()
102 | attention_weights_cuda = attention_weights_paddle.detach().clone()
103 | value_cuda.stop_gradient = False
104 | sampling_locations_cuda.stop_gradient = False
105 | attention_weights_cuda.stop_gradient = False
106 |
107 | output_paddle = ms_deform_attn_core_paddle(
108 | value_paddle, spatial_shapes, level_start_index,
109 | sampling_locations_paddle, attention_weights_paddle)
110 | output_paddle.sum().backward()
111 |
112 | output_cuda = ms_deformable_attn(value_cuda, spatial_shapes,
113 | level_start_index, sampling_locations_cuda,
114 | attention_weights_cuda)
115 | output_cuda.sum().backward()
116 |
117 | res = paddle.allclose(
118 | value_paddle.grad, value_cuda.grad, rtol=1e-2, atol=1e-3).item()
119 | print(f'*tensor1 {res} check_gradient_numerical(D={channels})')
120 |
121 | res = paddle.allclose(
122 | sampling_locations_paddle.grad,
123 | sampling_locations_cuda.grad,
124 | rtol=1e-2,
125 | atol=1e-3).item()
126 | print(f'*tensor2 {res} check_gradient_numerical(D={channels})')
127 |
128 | res = paddle.allclose(
129 | attention_weights_paddle.grad,
130 | attention_weights_cuda.grad,
131 | rtol=1e-2,
132 | atol=1e-3).item()
133 | print(f'*tensor3 {res} check_gradient_numerical(D={channels})')
134 |
135 |
136 | if __name__ == '__main__':
137 | check_forward_equal_with_paddle_float()
138 |
139 | for channels in [30, 32, 64, 71, 128, 1024, 1025, 2048, 3096]:
140 | check_gradient_numerical(channels)
141 |
--------------------------------------------------------------------------------
/rtdetr_pytorch/src/core/yaml_config.py:
--------------------------------------------------------------------------------
1 | """by lyuwenyu
2 | """
3 |
4 | import torch
5 | import torch.nn as nn
6 |
7 | import re
8 | import copy
9 |
10 | from .config import BaseConfig
11 | from .yaml_utils import load_config, merge_config, create, merge_dict
12 |
13 |
14 | class YAMLConfig(BaseConfig):
15 | def __init__(self, cfg_path: str, **kwargs) -> None:
16 | super().__init__()
17 |
18 | cfg = load_config(cfg_path)
19 | merge_dict(cfg, kwargs)
20 |
21 | # pprint(cfg)
22 |
23 | self.yaml_cfg = cfg
24 |
25 | self.log_step = cfg.get('log_step', 100)
26 | self.checkpoint_step = cfg.get('checkpoint_step', 1)
27 | self.epoches = cfg.get('epoches', -1)
28 | self.resume = cfg.get('resume', '')
29 | self.tuning = cfg.get('tuning', '')
30 | self.sync_bn = cfg.get('sync_bn', False)
31 | self.output_dir = cfg.get('output_dir', None)
32 |
33 | self.use_ema = cfg.get('use_ema', False)
34 | self.use_amp = cfg.get('use_amp', False)
35 | self.autocast = cfg.get('autocast', dict())
36 | self.find_unused_parameters = cfg.get('find_unused_parameters', None)
37 | self.clip_max_norm = cfg.get('clip_max_norm', 0.)
38 |
39 |
40 | @property
41 | def model(self, ) -> torch.nn.Module:
42 | if self._model is None and 'model' in self.yaml_cfg:
43 | merge_config(self.yaml_cfg)
44 | self._model = create(self.yaml_cfg['model'])
45 | return self._model
46 |
47 | @property
48 | def postprocessor(self, ) -> torch.nn.Module:
49 | if self._postprocessor is None and 'postprocessor' in self.yaml_cfg:
50 | merge_config(self.yaml_cfg)
51 | self._postprocessor = create(self.yaml_cfg['postprocessor'])
52 | return self._postprocessor
53 |
54 | @property
55 | def criterion(self, ):
56 | if self._criterion is None and 'criterion' in self.yaml_cfg:
57 | merge_config(self.yaml_cfg)
58 | self._criterion = create(self.yaml_cfg['criterion'])
59 | return self._criterion
60 |
61 |
62 | @property
63 | def optimizer(self, ):
64 | if self._optimizer is None and 'optimizer' in self.yaml_cfg:
65 | merge_config(self.yaml_cfg)
66 | params = self.get_optim_params(self.yaml_cfg['optimizer'], self.model)
67 | self._optimizer = create('optimizer', params=params)
68 |
69 | return self._optimizer
70 |
71 | @property
72 | def lr_scheduler(self, ):
73 | if self._lr_scheduler is None and 'lr_scheduler' in self.yaml_cfg:
74 | merge_config(self.yaml_cfg)
75 | self._lr_scheduler = create('lr_scheduler', optimizer=self.optimizer)
76 | print('Initial lr: ', self._lr_scheduler.get_last_lr())
77 |
78 | return self._lr_scheduler
79 |
80 | @property
81 | def train_dataloader(self, ):
82 | if self._train_dataloader is None and 'train_dataloader' in self.yaml_cfg:
83 | merge_config(self.yaml_cfg)
84 | self._train_dataloader = create('train_dataloader')
85 | self._train_dataloader.shuffle = self.yaml_cfg['train_dataloader'].get('shuffle', False)
86 |
87 | return self._train_dataloader
88 |
89 | @property
90 | def val_dataloader(self, ):
91 | if self._val_dataloader is None and 'val_dataloader' in self.yaml_cfg:
92 | merge_config(self.yaml_cfg)
93 | self._val_dataloader = create('val_dataloader')
94 | self._val_dataloader.shuffle = self.yaml_cfg['val_dataloader'].get('shuffle', False)
95 |
96 | return self._val_dataloader
97 |
98 |
99 | @property
100 | def ema(self, ):
101 | if self._ema is None and self.yaml_cfg.get('use_ema', False):
102 | merge_config(self.yaml_cfg)
103 | self._ema = create('ema', model=self.model)
104 |
105 | return self._ema
106 |
107 |
108 | @property
109 | def scaler(self, ):
110 | if self._scaler is None and self.yaml_cfg.get('use_amp', False):
111 | merge_config(self.yaml_cfg)
112 | self._scaler = create('scaler')
113 |
114 | return self._scaler
115 |
116 |
117 | @staticmethod
118 | def get_optim_params(cfg: dict, model: nn.Module):
119 | '''
120 | E.g.:
121 | ^(?=.*a)(?=.*b).*$ means including a and b
122 | ^((?!b.)*a((?!b).)*$ means including a but not b
123 | ^((?!b|c).)*a((?!b|c).)*$ means including a but not (b | c)
124 | '''
125 | assert 'type' in cfg, ''
126 | cfg = copy.deepcopy(cfg)
127 |
128 | if 'params' not in cfg:
129 | return model.parameters()
130 |
131 | assert isinstance(cfg['params'], list), ''
132 |
133 | param_groups = []
134 | visited = []
135 | for pg in cfg['params']:
136 | pattern = pg['params']
137 | params = {k: v for k, v in model.named_parameters() if v.requires_grad and len(re.findall(pattern, k)) > 0}
138 | pg['params'] = params.values()
139 | param_groups.append(pg)
140 | visited.extend(list(params.keys()))
141 |
142 | names = [k for k, v in model.named_parameters() if v.requires_grad]
143 |
144 | if len(visited) < len(names):
145 | unseen = set(names) - set(visited)
146 | params = {k: v for k, v in model.named_parameters() if v.requires_grad and k in unseen}
147 | param_groups.append({'params': params.values()})
148 | visited.extend(list(params.keys()))
149 |
150 | assert len(visited) == len(names), ''
151 |
152 | return param_groups
153 |
--------------------------------------------------------------------------------
/benchmark/trtinfer.py:
--------------------------------------------------------------------------------
1 | '''by lyuwenyu
2 | '''
3 |
4 | import time
5 | import contextlib
6 | from collections import namedtuple, OrderedDict
7 |
8 | import torch
9 | import numpy as np
10 | import tensorrt as trt
11 |
12 | from utils import TimeProfiler
13 |
14 | class TRTInference(object):
15 | def __init__(self, engine_path, device='cuda:0', backend='torch', max_batch_size=32, verbose=False):
16 | self.engine_path = engine_path
17 | self.device = device
18 | self.backend = backend
19 | self.max_batch_size = max_batch_size
20 |
21 | self.logger = trt.Logger(trt.Logger.VERBOSE) if verbose else trt.Logger(trt.Logger.INFO)
22 |
23 | self.engine = self.load_engine(engine_path)
24 |
25 | self.context = self.engine.create_execution_context()
26 |
27 | self.bindings = self.get_bindings(self.engine, self.context, self.max_batch_size, self.device)
28 | self.bindings_addr = OrderedDict((n, v.ptr) for n, v in self.bindings.items())
29 |
30 | self.input_names = self.get_input_names()
31 | self.output_names = self.get_output_names()
32 |
33 | if self.backend == 'cuda':
34 | self.stream = cuda.Stream()
35 |
36 | self.time_profile = TimeProfiler()
37 |
38 | def init(self, ):
39 | self.dynamic = False
40 |
41 | def load_engine(self, path):
42 | '''load engine
43 | '''
44 | trt.init_libnvinfer_plugins(self.logger, '')
45 | with open(path, 'rb') as f, trt.Runtime(self.logger) as runtime:
46 | return runtime.deserialize_cuda_engine(f.read())
47 |
48 | def get_input_names(self, ):
49 | names = []
50 | for _, name in enumerate(self.engine):
51 | if self.engine.get_tensor_mode(name) == trt.TensorIOMode.INPUT:
52 | names.append(name)
53 | return names
54 |
55 | def get_output_names(self, ):
56 | names = []
57 | for _, name in enumerate(self.engine):
58 | if self.engine.get_tensor_mode(name) == trt.TensorIOMode.OUTPUT:
59 | names.append(name)
60 | return names
61 |
62 | def get_bindings(self, engine, context, max_batch_size=32, device=None):
63 | '''build binddings
64 | '''
65 | Binding = namedtuple('Binding', ('name', 'dtype', 'shape', 'data', 'ptr'))
66 | bindings = OrderedDict()
67 | # max_batch_size = 1
68 |
69 | for i, name in enumerate(engine):
70 | shape = engine.get_tensor_shape(name)
71 | dtype = trt.nptype(engine.get_tensor_dtype(name))
72 |
73 | if shape[0] == -1:
74 | dynamic = True
75 | shape[0] = max_batch_size
76 | if engine.get_tensor_mode(name) == trt.TensorIOMode.INPUT: # dynamic
77 | context.set_input_shape(name, shape)
78 |
79 | if self.backend == 'cuda':
80 | if engine.get_tensor_mode(name) == trt.TensorIOMode.INPUT:
81 | data = np.random.randn(*shape).astype(dtype)
82 | ptr = cuda.mem_alloc(data.nbytes)
83 | bindings[name] = Binding(name, dtype, shape, data, ptr)
84 | else:
85 | data = cuda.pagelocked_empty(trt.volume(shape), dtype)
86 | ptr = cuda.mem_alloc(data.nbytes)
87 | bindings[name] = Binding(name, dtype, shape, data, ptr)
88 |
89 | else:
90 | data = torch.from_numpy(np.empty(shape, dtype=dtype)).to(device)
91 | bindings[name] = Binding(name, dtype, shape, data, data.data_ptr())
92 |
93 | return bindings
94 |
95 | def run_torch(self, blob):
96 | '''torch input
97 | '''
98 | for n in self.input_names:
99 | if self.bindings[n].shape != blob[n].shape:
100 | self.context.set_input_shape(n, blob[n].shape)
101 | self.bindings[n] = self.bindings[n]._replace(shape=blob[n].shape)
102 |
103 | self.bindings_addr.update({n: blob[n].data_ptr() for n in self.input_names})
104 | self.context.execute_v2(list(self.bindings_addr.values()))
105 | outputs = {n: self.bindings[n].data for n in self.output_names}
106 |
107 | return outputs
108 |
109 |
110 | def async_run_cuda(self, blob):
111 | '''numpy input
112 | '''
113 | for n in self.input_names:
114 | cuda.memcpy_htod_async(self.bindings_addr[n], blob[n], self.stream)
115 |
116 | bindings_addr = [int(v) for _, v in self.bindings_addr.items()]
117 | self.context.execute_async_v2(bindings=bindings_addr, stream_handle=self.stream.handle)
118 |
119 | outputs = {}
120 | for n in self.output_names:
121 | cuda.memcpy_dtoh_async(self.bindings[n].data, self.bindings[n].ptr, self.stream)
122 | outputs[n] = self.bindings[n].data
123 |
124 | self.stream.synchronize()
125 |
126 | return outputs
127 |
128 | def __call__(self, blob):
129 | if self.backend == 'torch':
130 | return self.run_torch(blob)
131 |
132 | elif self.backend == 'cuda':
133 | return self.async_run_cuda(blob)
134 |
135 | def synchronize(self, ):
136 | if self.backend == 'torch' and torch.cuda.is_available():
137 | torch.cuda.synchronize()
138 |
139 | elif self.backend == 'cuda':
140 | self.stream.synchronize()
141 |
142 | def warmup(self, blob, n):
143 | for _ in range(n):
144 | _ = self(blob)
145 |
146 | def speed(self, blob, n):
147 | self.time_profile.reset()
148 | for _ in range(n):
149 | with self.time_profile:
150 | _ = self(blob)
151 |
152 | return self.time_profile.total / n
153 |
154 |
--------------------------------------------------------------------------------