├── __init__.py ├── utils ├── __init__.py ├── upsample.py └── register.py ├── core ├── losses │ ├── ghm_loss.py │ ├── balanced_l1_loss.py │ ├── __init__.py │ ├── l1_loss.py │ ├── cross_entropy.py │ └── generalized_focal_loss.py ├── layers │ ├── csrc │ │ └── CMakeLists.txt │ ├── deformable_roi_pooling.py │ ├── activations.py │ ├── scale.py │ ├── max_in_out.py │ ├── nearest_upsamling.py │ ├── proposal_layer.py │ ├── __init__.py │ ├── position_sensitive_average_pooling.py │ ├── drop_block.py │ └── weight_standardization_conv2d.py ├── anchors │ ├── ssd_anchor_generator.py │ └── __init__.py ├── samplers │ ├── iou_balanced_negative_sampler.py │ ├── instance_balanced_positive_sampler.py │ ├── __init__.py │ ├── combined_sampler.py │ ├── random_sampler.py │ ├── pseudo_sampler.py │ ├── ohem_sampler.py │ └── sampler.py ├── metrics │ ├── __init__.py │ └── no_op_metric.py ├── learning_rate_schedules │ ├── __init__.py │ └── step_decay.py ├── optimizers │ ├── __init__.py │ ├── accum_optimizer.py │ └── lookahead_optimizer.py ├── assigners │ ├── __init__.py │ ├── assigner.py │ ├── uniform_assigner.py │ └── min_cost_assigner.py ├── __init__.py ├── bbox │ ├── __init__.py │ ├── bbox_transform.py │ └── overlaps.py └── builder.py ├── models ├── detectors │ ├── paa.py │ ├── detector.py │ ├── __init__.py │ ├── one_stage.py │ ├── two_stage.py │ └── detr.py ├── backbones │ ├── acnet.py │ ├── repvgg.py │ ├── shufflenet_v1.py │ ├── shufflenet_v2.py │ ├── darknet53.py │ ├── __init__.py │ └── backbone.py ├── heads │ ├── bbox_heads │ │ └── __init__.py │ ├── dense_heads │ │ ├── __init__.py │ │ └── retinanet_head.py │ ├── roi_heads │ │ ├── __init__.py │ │ └── base_roi_head.py │ ├── anchor_free_heads │ │ └── __init__.py │ ├── __init__.py │ └── head.py ├── __init__.py ├── builder.py └── necks │ ├── __init__.py │ ├── feature_fusion_pyramid.py │ ├── path_aggregation_neck.py │ └── dlaup.py ├── data ├── images │ └── panda.jpg ├── __init__.py ├── datasets │ ├── __init__.py │ └── dataset.py ├── builder.py └── augmentations │ ├── __init__.py │ └── mixup.py ├── logs ├── events.out.tfevents.1617251244.bail.141074.7195.v2 ├── events.out.tfevents.1617251266.bail.141421.7195.v2 ├── events.out.tfevents.1617253323.bail.168083.21721.v2 ├── events.out.tfevents.1617253358.bail.168621.21721.v2 ├── events.out.tfevents.1617253447.bail.169883.21721.v2 ├── events.out.tfevents.1617253477.bail.170342.21721.v2 ├── events.out.tfevents.1617253496.bail.170673.21721.v2 ├── events.out.tfevents.1617253615.bail.172245.21721.v2 ├── events.out.tfevents.1617672692.bail.4032193.7195.v2 ├── events.out.tfevents.1617672814.bail.4033287.7372.v2 ├── events.out.tfevents.1617673033.bail.4036164.7374.v2 ├── events.out.tfevents.1617673230.bail.4038971.7374.v2 ├── events.out.tfevents.1617673732.bail.4045563.7374.v2 ├── events.out.tfevents.1617673754.bail.4045938.7374.v2 ├── events.out.tfevents.1617673850.bail.4047306.7374.v2 ├── events.out.tfevents.1617673895.bail.4047984.7374.v2 ├── events.out.tfevents.1617673960.bail.4048987.7374.v2 ├── events.out.tfevents.1617673981.bail.4049390.7374.v2 ├── events.out.tfevents.1617674034.bail.4050216.7374.v2 ├── events.out.tfevents.1617674077.bail.4050915.7374.v2 ├── events.out.tfevents.1617674147.bail.4051983.7374.v2 ├── events.out.tfevents.1617674268.bail.4053636.7376.v2 ├── events.out.tfevents.1617674295.bail.4054108.7377.v2 ├── events.out.tfevents.1617674328.bail.4054652.7382.v2 ├── events.out.tfevents.1617674400.bail.4055796.7376.v2 ├── events.out.tfevents.1617674438.bail.4056411.7376.v2 ├── events.out.tfevents.1617674491.bail.4057234.7376.v2 ├── events.out.tfevents.1617674515.bail.4057654.7377.v2 ├── events.out.tfevents.1617674556.bail.4058324.7377.v2 ├── events.out.tfevents.1617674688.bail.4060116.7377.v2 ├── events.out.tfevents.1617674720.bail.4060652.7377.v2 ├── events.out.tfevents.1617674978.bail.4064067.7377.v2 ├── events.out.tfevents.1617675005.bail.4064577.7377.v2 ├── events.out.tfevents.1617675053.bail.4065391.7377.v2 ├── events.out.tfevents.1617675271.bail.4068526.7373.v2 ├── events.out.tfevents.1617675471.bail.4071386.7375.v2 ├── events.out.tfevents.1617675640.bail.4073700.7379.v2 ├── events.out.tfevents.1617675886.bail.4076950.7379.v2 ├── events.out.tfevents.1617675939.bail.4077770.7379.v2 ├── events.out.tfevents.1617675973.bail.4078309.7379.v2 ├── events.out.tfevents.1617676041.bail.4079359.7382.v2 ├── events.out.tfevents.1617676071.bail.4079888.7380.v2 ├── events.out.tfevents.1617676113.bail.4080534.7380.v2 ├── events.out.tfevents.1617676313.bail.4083267.7381.v2 ├── events.out.tfevents.1617676338.bail.4083710.7382.v2 ├── events.out.tfevents.1617676398.bail.4084624.7386.v2 ├── events.out.tfevents.1617676432.bail.4085178.7390.v2 ├── events.out.tfevents.1617676473.bail.4085910.7390.v2 └── events.out.tfevents.1617676528.bail.4086783.7377.v2 ├── trainers └── __init__.py ├── README.md ├── .gitignore ├── create_coco_dataset.py ├── configs ├── __init__.py └── onenet_config.py ├── train.py ├── yamls ├── gfl_x101_32x4d_fpn_mstrain_2x_coco.yaml ├── gfl_r50_fpn_1x_coco.yaml ├── gfl_r101_fpn_mstrain_2x_coco.yaml ├── gfl_r50_fpn_mstrain_2x_coco.yaml ├── gflv2_r50_fpn_1x.yaml ├── retinanet_r101_fpn_2x_coco.yaml ├── retinanet_r50_fpn_2x_coco.yaml ├── retinanet_x101_32x4d_fpn_2x_coco.yaml ├── retinanet_x101_64x4d_fpn_2x_coco.yaml ├── gflv2_r101_fpn_ms2x.yaml ├── gflv2_r50_fpn_ms2x.yaml ├── atss_r50_fpn_1x_coco.yaml ├── atss_r101_fpn_1x_coco.yaml ├── YOLOF_R50_C5_1x.yaml ├── YOLOF_R101_C5_1x.yaml ├── YOLOF_X_101_64x4d_C5_1x.yaml ├── YOLOF_R101_DC5_1x.yaml └── YOLOF_R50_DC5_1x.yaml └── export_saved_model.py /__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /core/losses/ghm_loss.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /models/detectors/paa.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /models/backbones/acnet.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /models/backbones/repvgg.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /core/layers/csrc/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /core/losses/balanced_l1_loss.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /models/backbones/shufflenet_v1.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /models/backbones/shufflenet_v2.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /core/anchors/ssd_anchor_generator.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /core/layers/deformable_roi_pooling.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /models/heads/bbox_heads/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /models/heads/dense_heads/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /models/heads/roi_heads/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /models/heads/anchor_free_heads/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /core/samplers/iou_balanced_negative_sampler.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /core/samplers/instance_balanced_positive_sampler.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /models/backbones/darknet53.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | -------------------------------------------------------------------------------- /data/images/panda.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wavce/letsdet/HEAD/data/images/panda.jpg -------------------------------------------------------------------------------- /logs/events.out.tfevents.1617251244.bail.141074.7195.v2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wavce/letsdet/HEAD/logs/events.out.tfevents.1617251244.bail.141074.7195.v2 -------------------------------------------------------------------------------- /logs/events.out.tfevents.1617251266.bail.141421.7195.v2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wavce/letsdet/HEAD/logs/events.out.tfevents.1617251266.bail.141421.7195.v2 -------------------------------------------------------------------------------- /logs/events.out.tfevents.1617253323.bail.168083.21721.v2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wavce/letsdet/HEAD/logs/events.out.tfevents.1617253323.bail.168083.21721.v2 -------------------------------------------------------------------------------- /logs/events.out.tfevents.1617253358.bail.168621.21721.v2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wavce/letsdet/HEAD/logs/events.out.tfevents.1617253358.bail.168621.21721.v2 -------------------------------------------------------------------------------- /logs/events.out.tfevents.1617253447.bail.169883.21721.v2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wavce/letsdet/HEAD/logs/events.out.tfevents.1617253447.bail.169883.21721.v2 -------------------------------------------------------------------------------- /logs/events.out.tfevents.1617253477.bail.170342.21721.v2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wavce/letsdet/HEAD/logs/events.out.tfevents.1617253477.bail.170342.21721.v2 -------------------------------------------------------------------------------- /logs/events.out.tfevents.1617253496.bail.170673.21721.v2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wavce/letsdet/HEAD/logs/events.out.tfevents.1617253496.bail.170673.21721.v2 -------------------------------------------------------------------------------- /logs/events.out.tfevents.1617253615.bail.172245.21721.v2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wavce/letsdet/HEAD/logs/events.out.tfevents.1617253615.bail.172245.21721.v2 -------------------------------------------------------------------------------- /logs/events.out.tfevents.1617672692.bail.4032193.7195.v2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wavce/letsdet/HEAD/logs/events.out.tfevents.1617672692.bail.4032193.7195.v2 -------------------------------------------------------------------------------- /logs/events.out.tfevents.1617672814.bail.4033287.7372.v2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wavce/letsdet/HEAD/logs/events.out.tfevents.1617672814.bail.4033287.7372.v2 -------------------------------------------------------------------------------- /logs/events.out.tfevents.1617673033.bail.4036164.7374.v2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wavce/letsdet/HEAD/logs/events.out.tfevents.1617673033.bail.4036164.7374.v2 -------------------------------------------------------------------------------- /logs/events.out.tfevents.1617673230.bail.4038971.7374.v2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wavce/letsdet/HEAD/logs/events.out.tfevents.1617673230.bail.4038971.7374.v2 -------------------------------------------------------------------------------- /logs/events.out.tfevents.1617673732.bail.4045563.7374.v2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wavce/letsdet/HEAD/logs/events.out.tfevents.1617673732.bail.4045563.7374.v2 -------------------------------------------------------------------------------- /logs/events.out.tfevents.1617673754.bail.4045938.7374.v2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wavce/letsdet/HEAD/logs/events.out.tfevents.1617673754.bail.4045938.7374.v2 -------------------------------------------------------------------------------- /logs/events.out.tfevents.1617673850.bail.4047306.7374.v2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wavce/letsdet/HEAD/logs/events.out.tfevents.1617673850.bail.4047306.7374.v2 -------------------------------------------------------------------------------- /logs/events.out.tfevents.1617673895.bail.4047984.7374.v2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wavce/letsdet/HEAD/logs/events.out.tfevents.1617673895.bail.4047984.7374.v2 -------------------------------------------------------------------------------- /logs/events.out.tfevents.1617673960.bail.4048987.7374.v2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wavce/letsdet/HEAD/logs/events.out.tfevents.1617673960.bail.4048987.7374.v2 -------------------------------------------------------------------------------- /logs/events.out.tfevents.1617673981.bail.4049390.7374.v2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wavce/letsdet/HEAD/logs/events.out.tfevents.1617673981.bail.4049390.7374.v2 -------------------------------------------------------------------------------- /logs/events.out.tfevents.1617674034.bail.4050216.7374.v2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wavce/letsdet/HEAD/logs/events.out.tfevents.1617674034.bail.4050216.7374.v2 -------------------------------------------------------------------------------- /logs/events.out.tfevents.1617674077.bail.4050915.7374.v2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wavce/letsdet/HEAD/logs/events.out.tfevents.1617674077.bail.4050915.7374.v2 -------------------------------------------------------------------------------- /logs/events.out.tfevents.1617674147.bail.4051983.7374.v2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wavce/letsdet/HEAD/logs/events.out.tfevents.1617674147.bail.4051983.7374.v2 -------------------------------------------------------------------------------- /logs/events.out.tfevents.1617674268.bail.4053636.7376.v2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wavce/letsdet/HEAD/logs/events.out.tfevents.1617674268.bail.4053636.7376.v2 -------------------------------------------------------------------------------- /logs/events.out.tfevents.1617674295.bail.4054108.7377.v2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wavce/letsdet/HEAD/logs/events.out.tfevents.1617674295.bail.4054108.7377.v2 -------------------------------------------------------------------------------- /logs/events.out.tfevents.1617674328.bail.4054652.7382.v2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wavce/letsdet/HEAD/logs/events.out.tfevents.1617674328.bail.4054652.7382.v2 -------------------------------------------------------------------------------- /logs/events.out.tfevents.1617674400.bail.4055796.7376.v2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wavce/letsdet/HEAD/logs/events.out.tfevents.1617674400.bail.4055796.7376.v2 -------------------------------------------------------------------------------- /logs/events.out.tfevents.1617674438.bail.4056411.7376.v2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wavce/letsdet/HEAD/logs/events.out.tfevents.1617674438.bail.4056411.7376.v2 -------------------------------------------------------------------------------- /logs/events.out.tfevents.1617674491.bail.4057234.7376.v2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wavce/letsdet/HEAD/logs/events.out.tfevents.1617674491.bail.4057234.7376.v2 -------------------------------------------------------------------------------- /logs/events.out.tfevents.1617674515.bail.4057654.7377.v2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wavce/letsdet/HEAD/logs/events.out.tfevents.1617674515.bail.4057654.7377.v2 -------------------------------------------------------------------------------- /logs/events.out.tfevents.1617674556.bail.4058324.7377.v2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wavce/letsdet/HEAD/logs/events.out.tfevents.1617674556.bail.4058324.7377.v2 -------------------------------------------------------------------------------- /logs/events.out.tfevents.1617674688.bail.4060116.7377.v2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wavce/letsdet/HEAD/logs/events.out.tfevents.1617674688.bail.4060116.7377.v2 -------------------------------------------------------------------------------- /logs/events.out.tfevents.1617674720.bail.4060652.7377.v2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wavce/letsdet/HEAD/logs/events.out.tfevents.1617674720.bail.4060652.7377.v2 -------------------------------------------------------------------------------- /logs/events.out.tfevents.1617674978.bail.4064067.7377.v2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wavce/letsdet/HEAD/logs/events.out.tfevents.1617674978.bail.4064067.7377.v2 -------------------------------------------------------------------------------- /logs/events.out.tfevents.1617675005.bail.4064577.7377.v2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wavce/letsdet/HEAD/logs/events.out.tfevents.1617675005.bail.4064577.7377.v2 -------------------------------------------------------------------------------- /logs/events.out.tfevents.1617675053.bail.4065391.7377.v2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wavce/letsdet/HEAD/logs/events.out.tfevents.1617675053.bail.4065391.7377.v2 -------------------------------------------------------------------------------- /logs/events.out.tfevents.1617675271.bail.4068526.7373.v2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wavce/letsdet/HEAD/logs/events.out.tfevents.1617675271.bail.4068526.7373.v2 -------------------------------------------------------------------------------- /logs/events.out.tfevents.1617675471.bail.4071386.7375.v2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wavce/letsdet/HEAD/logs/events.out.tfevents.1617675471.bail.4071386.7375.v2 -------------------------------------------------------------------------------- /logs/events.out.tfevents.1617675640.bail.4073700.7379.v2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wavce/letsdet/HEAD/logs/events.out.tfevents.1617675640.bail.4073700.7379.v2 -------------------------------------------------------------------------------- /logs/events.out.tfevents.1617675886.bail.4076950.7379.v2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wavce/letsdet/HEAD/logs/events.out.tfevents.1617675886.bail.4076950.7379.v2 -------------------------------------------------------------------------------- /logs/events.out.tfevents.1617675939.bail.4077770.7379.v2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wavce/letsdet/HEAD/logs/events.out.tfevents.1617675939.bail.4077770.7379.v2 -------------------------------------------------------------------------------- /logs/events.out.tfevents.1617675973.bail.4078309.7379.v2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wavce/letsdet/HEAD/logs/events.out.tfevents.1617675973.bail.4078309.7379.v2 -------------------------------------------------------------------------------- /logs/events.out.tfevents.1617676041.bail.4079359.7382.v2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wavce/letsdet/HEAD/logs/events.out.tfevents.1617676041.bail.4079359.7382.v2 -------------------------------------------------------------------------------- /logs/events.out.tfevents.1617676071.bail.4079888.7380.v2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wavce/letsdet/HEAD/logs/events.out.tfevents.1617676071.bail.4079888.7380.v2 -------------------------------------------------------------------------------- /logs/events.out.tfevents.1617676113.bail.4080534.7380.v2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wavce/letsdet/HEAD/logs/events.out.tfevents.1617676113.bail.4080534.7380.v2 -------------------------------------------------------------------------------- /logs/events.out.tfevents.1617676313.bail.4083267.7381.v2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wavce/letsdet/HEAD/logs/events.out.tfevents.1617676313.bail.4083267.7381.v2 -------------------------------------------------------------------------------- /logs/events.out.tfevents.1617676338.bail.4083710.7382.v2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wavce/letsdet/HEAD/logs/events.out.tfevents.1617676338.bail.4083710.7382.v2 -------------------------------------------------------------------------------- /logs/events.out.tfevents.1617676398.bail.4084624.7386.v2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wavce/letsdet/HEAD/logs/events.out.tfevents.1617676398.bail.4084624.7386.v2 -------------------------------------------------------------------------------- /logs/events.out.tfevents.1617676432.bail.4085178.7390.v2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wavce/letsdet/HEAD/logs/events.out.tfevents.1617676432.bail.4085178.7390.v2 -------------------------------------------------------------------------------- /logs/events.out.tfevents.1617676473.bail.4085910.7390.v2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wavce/letsdet/HEAD/logs/events.out.tfevents.1617676473.bail.4085910.7390.v2 -------------------------------------------------------------------------------- /logs/events.out.tfevents.1617676528.bail.4086783.7377.v2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wavce/letsdet/HEAD/logs/events.out.tfevents.1617676528.bail.4086783.7377.v2 -------------------------------------------------------------------------------- /data/__init__.py: -------------------------------------------------------------------------------- 1 | from .datasets.coco_dataset import COCODataset 2 | from .builder import build_dataset 3 | 4 | 5 | 6 | __all__ = [ 7 | "build_dataset" 8 | ] 9 | 10 | -------------------------------------------------------------------------------- /core/anchors/__init__.py: -------------------------------------------------------------------------------- 1 | from .anchor_generator import AnchorGenerator 2 | from .anchor_generator_v2 import AnchorGeneratorV2 3 | 4 | 5 | __all__ = [ 6 | "AnchorGenerator", "AnchorGeneratorV2" 7 | ] 8 | -------------------------------------------------------------------------------- /trainers/__init__.py: -------------------------------------------------------------------------------- 1 | from .multi_gpu_trainer import MultiGPUTrainer 2 | from .single_gpu_trainer import SingleGPUTrainer 3 | 4 | 5 | __all__ = [ 6 | "MultiGPUTrainer", "SingleGPUTrainer" 7 | ] 8 | -------------------------------------------------------------------------------- /core/metrics/__init__.py: -------------------------------------------------------------------------------- 1 | from .average_precision import AP 2 | from .wider_face_ap import WiderFaceAP 3 | from .mean_average_precision import mAP 4 | 5 | 6 | __all__ = [ 7 | "AP", "WiderFaceAP", "mAP" 8 | ] 9 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ### 描述 2 | 3 | letsdet是一个基于Tensorflow的目标检测算法库,其中许多模块模仿了MMDetection[^1],从而也继承了MMDetection的一些特定,例如模块化等。 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | [^1]: https://github.com/open-mmlab/mmdetection 22 | 23 | -------------------------------------------------------------------------------- /models/__init__.py: -------------------------------------------------------------------------------- 1 | from .backbones import * 2 | from .necks import * 3 | from .heads import * 4 | from .detectors import * 5 | from .builder import build_backbone, build_neck, build_head, build_detector 6 | 7 | 8 | __all__ = [ 9 | "build_backbone", "build_neck", "build_head", "build_detector" 10 | ] -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # Distribution / packaging 7 | .Python 8 | *.egg 9 | MANIFEST 10 | 11 | # Environments 12 | .env 13 | .venv 14 | env/ 15 | venv/ 16 | ENV/ 17 | env.bak/ 18 | venv.bak/ 19 | 20 | # vscode 21 | .vscode 22 | .idea 23 | .DS_Store 24 | 25 | -------------------------------------------------------------------------------- /core/learning_rate_schedules/__init__.py: -------------------------------------------------------------------------------- 1 | from .step_decay import StepDecay 2 | from .tflr import PolynomialDecay, ExponentialDecay, CosineDecay, LinearCosineDecay, PiecewiseConstantDecay 3 | 4 | 5 | __all__ = [ 6 | "StepDecay", "PolynomialDecay", "ExponentialDecay", "CosineDecay", 7 | "LinearCosineDecay", "PiecewiseConstantDecay" 8 | ] 9 | -------------------------------------------------------------------------------- /data/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | from .dataset import Dataset 2 | 3 | from .objects365_dataset import Objects365Dataset 4 | 5 | 6 | DATASET = { 7 | "objects365": Objects365Dataset 8 | } 9 | 10 | 11 | def build_dataset(name, **kwargs): 12 | return DATASET[name](**kwargs).dataset() 13 | 14 | 15 | __all__ = [ 16 | "Dataset", 17 | "build_dataset" 18 | ] 19 | -------------------------------------------------------------------------------- /data/builder.py: -------------------------------------------------------------------------------- 1 | from utils.register import Register 2 | 3 | 4 | DATASETS = Register("dataset") 5 | AUGMENTATIONS = Register("augmentations") 6 | 7 | 8 | def build_dataset(dataset, **kwargs): 9 | return DATASETS[dataset](**kwargs).dataset() 10 | 11 | 12 | def build_augmentation(augmentation, **kwargs): 13 | return AUGMENTATIONS[augmentation](**kwargs) 14 | 15 | -------------------------------------------------------------------------------- /core/samplers/__init__.py: -------------------------------------------------------------------------------- 1 | from .sampler import Sampler 2 | from .ohem_sampler import OHEMSampler 3 | from .pseudo_sampler import PseudoSampler 4 | from .random_sampler import RandomSampler 5 | from .combined_sampler import CombinedSampler 6 | 7 | 8 | __all__ =[ 9 | "Sampler", 10 | "OHEMSampler", 11 | "PseudoSampler", 12 | "RandomSampler", 13 | "CombinedSampler" 14 | ] 15 | 16 | 17 | -------------------------------------------------------------------------------- /core/optimizers/__init__.py: -------------------------------------------------------------------------------- 1 | from .accum_optimizer import AccumOptimizer 2 | from .lookahead_optimizer import LookaheadOptimizer 3 | from .tfoptimizers import SGD, Adadelta, Adagrad, Adam, Adamax, Nadam, RMSprop 4 | from .gradient_centralization import SGDGC, AdamGC 5 | 6 | 7 | __all_ = [ 8 | "SGD", "Adadelta", "Adagrad", "Adam", "Adamax", "Nadam", "RMSprop", 9 | "SGDGC", "AdamGC", "AccumOptimizer", "LookaheadOptimizer" 10 | ] 11 | -------------------------------------------------------------------------------- /core/assigners/__init__.py: -------------------------------------------------------------------------------- 1 | from .fcos_assigner import FCOSAssigner 2 | from .atss_assigner import ATSSAssigner 3 | from .max_iou_assigner import MaxIoUAssigner 4 | from .uniform_assigner import UniformAssigner 5 | from .min_cost_assigner import MinCostAssigner 6 | from .center_heatmap_assigner import CenterHeatmapAssigner 7 | 8 | 9 | __all__ = [ 10 | "ATSSAssigner", 11 | "FCOSAssigner", 12 | "MaxIoUAssigner", 13 | "MinCostAssigner", 14 | "UniformAssigner", 15 | "CenterHeatmapAssigner" 16 | ] 17 | -------------------------------------------------------------------------------- /data/augmentations/__init__.py: -------------------------------------------------------------------------------- 1 | from .mosaic import Mosaic 2 | from .mixup import Mixup 3 | from .transforms import Pad 4 | from .transforms import Resize 5 | from .transforms import RandCropOrPad 6 | from .transforms import RandomDistortColor 7 | from .transforms import FlipLeftToRight 8 | from .transforms import SSDCrop 9 | 10 | 11 | __all__ = [ 12 | "Pad", 13 | "Resize", 14 | "Mixup", 15 | "Mosaic", 16 | "RandCropOrPad", 17 | "SSDCrop", 18 | "RandomDistortColor", 19 | "FlipLeftToRight", 20 | ] 21 | -------------------------------------------------------------------------------- /core/metrics/no_op_metric.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from ..builder import METRICS 3 | 4 | 5 | @METRICS.register 6 | class NoOpMetric(tf.keras.metrics.Metric): 7 | def __init__(self, **kwargs): 8 | super(NoOpMetric, self).__init__(**kwargs) 9 | 10 | self.value = self.add_weight(name="no_op_value", initializer="zeros") 11 | 12 | def update_state(self, value, sample_weight=None): 13 | self.value.assign(tf.cast(value, self.value.dtype)) 14 | 15 | def result(self): 16 | return self.value 17 | -------------------------------------------------------------------------------- /models/detectors/detector.py: -------------------------------------------------------------------------------- 1 | from abc import ABCMeta 2 | from abc import abstractclassmethod 3 | import tensorflow as tf 4 | 5 | 6 | class Detector(metaclass=ABCMeta): 7 | def __init__(self, cfg, training=True): 8 | self.cfg = cfg 9 | self.training = training 10 | 11 | @abstractclassmethod 12 | def compute_losses(self, predictions, image_info): 13 | raise NotImplementedError() 14 | 15 | @abstractclassmethod 16 | def save_weights(self, name): 17 | raise NotImplementedError() 18 | 19 | -------------------------------------------------------------------------------- /utils/upsample.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | 4 | def nearest_upsample2d(inputs, factor): 5 | # Instead of broadcasting with a 6-d tensor, we're using stacking here 6 | # for TfLite compatibity. 7 | bs, h, w, c = tf.shape(inputs)[0], tf.shape(inputs)[1], tf.shape(inputs)[2], tf.shape(inputs)[3] 8 | # bs = -1 if bs is None else bs 9 | data = tf.reshape(inputs, [bs, h, 1, w, 1, c]) * tf.ones([1, 1, factor, 1, factor, 1], dtype=inputs.dtype) 10 | 11 | return tf.reshape(data, [bs, h * scale, w * scale, c]) 12 | -------------------------------------------------------------------------------- /models/builder.py: -------------------------------------------------------------------------------- 1 | from utils.register import Register 2 | 3 | 4 | BACKBONES = Register("backbones") 5 | NECKS = Register("necks") 6 | HEADS = Register("heads") 7 | DETECTORS = Register("detectors") 8 | 9 | 10 | def build_backbone(backbone, **kwargs): 11 | return BACKBONES[backbone](**kwargs) 12 | 13 | 14 | def build_neck(neck, **kwargs): 15 | return NECKS[neck](**kwargs) 16 | 17 | 18 | def build_head(head, **kwargs): 19 | return HEADS[head](**kwargs) 20 | 21 | 22 | def build_detector(detector, **kwargs): 23 | return DETECTORS[detector](**kwargs) 24 | 25 | -------------------------------------------------------------------------------- /models/necks/__init__.py: -------------------------------------------------------------------------------- 1 | from .fpn import FPN 2 | from .bifpn import BiFPN 3 | from .dlaup import dla_up 4 | from .nas_fpn import nas_fpn 5 | from .dilated_encoder import DilatedEncoder 6 | from .centernet_deconv import centernet_deconv 7 | from .path_aggregation_neck import path_aggregation_neck 8 | from .feature_fusion_pyramid import feature_fusion_pyramid 9 | 10 | 11 | __all__ = [ 12 | "FPN", 13 | "BiFPN", 14 | "dla_up", 15 | "nas_fpn", 16 | "DilatedEncoder", 17 | "centernet_deconv", 18 | "path_aggregation_neck", 19 | "feature_fusion_pyramid", 20 | ] 21 | -------------------------------------------------------------------------------- /core/layers/activations.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | 4 | @tf.custom_gradient 5 | def _mish(x): 6 | x1 = tf.nn.tanh(tf.nn.softplus(x)) 7 | 8 | def _grad(dy): 9 | dx = x1 + x * tf.nn.sigmoid(x) * (1 - x1 * x1) 10 | 11 | return dx * dy 12 | 13 | return x * x1, _grad 14 | 15 | 16 | class Mish(tf.keras.layers.Layer): 17 | def __init__(self, **kwargs): 18 | super(Mish, self).__init__(**kwargs) 19 | 20 | def call(self, inputs): 21 | # x = inputs * (tf.nn.tanh(tf.nn.softplus(inputs))) 22 | 23 | return _mish(inputs) 24 | 25 | 26 | -------------------------------------------------------------------------------- /core/samplers/combined_sampler.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from .sampler import Sampler 3 | from ..builder import SAMPLERS 4 | 5 | 6 | @SAMPLERS.register 7 | class CombinedSampler(Sampler): 8 | def __init__(self, pos_sampler, neg_sampler, **kwargs): 9 | super(CombinedSampler, self).__init__(**kwargs) 10 | 11 | self.positive_sampler = pos_sampler 12 | self.negative_sampler = neg_sampler 13 | 14 | def _sample_positive(self, assigned_labels, num_expected_proposals, **kwargs): 15 | raise NotImplementedError 16 | 17 | def _sample_negative(self, assigned_labels, num_expected_proposals, **kwargs): 18 | raise NotImplementedError -------------------------------------------------------------------------------- /models/detectors/__init__.py: -------------------------------------------------------------------------------- 1 | from .detector import Detector 2 | 3 | from .gfl import GFL 4 | from .atss import ATSS 5 | from .fcos import FCOS 6 | from .gflv2 import GFLV2 7 | from .onenet import OneNet 8 | from .yolov4 import YOLOv4 9 | from .yolov5 import YOLOv5 10 | from .centernet import CenterNet 11 | from .retinanet import RetinaNet 12 | from .faster_rcnn import FasterRCNN 13 | from .efficientdet import EfficientDet 14 | 15 | 16 | 17 | __all__ = [ 18 | "GFL", 19 | "ATSS", 20 | "FCOS", 21 | "GFLV2", 22 | "OneNet", 23 | "YOLOv4", 24 | "YOLOv5", 25 | "CenterNet", 26 | "RetinaNet", 27 | "FasterRCNN", 28 | "EfficientDet", 29 | ] 30 | 31 | -------------------------------------------------------------------------------- /core/losses/__init__.py: -------------------------------------------------------------------------------- 1 | from .focal_loss import FocalLoss, ModifiedFocalLoss 2 | from .l1_loss import SmoothL1Loss, RegL1Loss 3 | from .cross_entropy import CrossEntropy, BinaryCrossEntropy 4 | from .iou_loss import IoULoss, BoundedIoULoss, GIoULoss, DIoULoss, CIoULoss 5 | from .generalized_focal_loss import DistributionFocalLoss, QualityFocalLoss 6 | 7 | __all__ = [ 8 | "ModifiedFocalLoss", 9 | "FocalLoss", 10 | "RegL1Loss", 11 | "SmoothL1Loss", 12 | "CrossEntropy", 13 | "BinaryCrossEntropy", 14 | "IoULoss", 15 | "BoundedIoULoss", 16 | "GIoULoss", 17 | "DIoULoss", 18 | "CIoULoss", 19 | "QualityFocalLoss", 20 | "DistributionFocalLoss" 21 | ] 22 | 23 | -------------------------------------------------------------------------------- /core/__init__.py: -------------------------------------------------------------------------------- 1 | from core.layers.nms import * 2 | from .assigners import * 3 | from .samplers import * 4 | from .losses import * 5 | from .metrics import * 6 | from .optimizers import * 7 | from .metrics import * 8 | from .learning_rate_schedules import * 9 | from .builder import ( 10 | build_assigner, build_sampler, 11 | build_loss, build_optimizer, 12 | build_learning_rate_scheduler, 13 | build_metric, build_nms, 14 | build_anchor_generator 15 | ) 16 | 17 | 18 | __all__ = [ 19 | "build_assigner", "build_sampler", "build_loss", "build_optimizer", 20 | "build_learning_rate_scheduler", "build_metric", "build_anchor_generator" 21 | ] 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | -------------------------------------------------------------------------------- /core/layers/scale.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | 4 | class Scale(tf.keras.layers.Layer): 5 | def __init__(self, value, **kwargs): 6 | super(Scale, self).__init__(**kwargs) 7 | 8 | self.value = value 9 | 10 | def build(self, input_shape): 11 | self.scale = self.add_weight(name="scale", 12 | trainable=True, 13 | shape=[], 14 | dtype=self.dtype, 15 | initializer=tf.keras.initializers.Constant(self.value)) 16 | 17 | def call(self, inputs, **kwargs): 18 | return inputs * self.scale 19 | 20 | def compute_output_shape(self, input_shape): 21 | return input_shape 22 | 23 | def get_config(self): 24 | config = {"value": self.value} 25 | 26 | base_config = super(Scale, self).get_config() 27 | 28 | return dict(list(base_config.items()) + list(config.items())) 29 | -------------------------------------------------------------------------------- /models/heads/__init__.py: -------------------------------------------------------------------------------- 1 | from .head import BaseHead 2 | from .dense_heads.anchor_head import AnchorHead 3 | from .dense_heads.atss_head import ATSSHead 4 | from .dense_heads.retinanet_head import RetinaNetHead 5 | from .dense_heads.fcos_head import FCOSHead 6 | from .dense_heads.rpn_head import RPNHead 7 | from .bbox_heads.bbox_head import BBoxHead 8 | from .roi_heads.standard_roi_head import StandardRoIHead 9 | from .dense_heads.gfl_head import GFLHead 10 | from .dense_heads.gflv2_head import GFLV2Head 11 | from .dense_heads.yolof_head import YOLOFHead 12 | from .anchor_free_heads.center_heatmap_head import CenterHeatmapHead 13 | from .anchor_free_heads.onenet_head import OneNetHead 14 | 15 | 16 | __all__ = [ 17 | "BaseHead", 18 | "AnchorHead", 19 | "ATSSHead", 20 | "RetinaNetHead", 21 | "FCOSHead", 22 | "RPNHead", 23 | "BBoxHead", 24 | "StandardRoIHead", 25 | "GFLHead", 26 | "GFLV2Head", 27 | "CenterHeatmapHead", 28 | "OneNetHead", 29 | "YOLOFHead" 30 | ] 31 | -------------------------------------------------------------------------------- /core/bbox/__init__.py: -------------------------------------------------------------------------------- 1 | from .bbox_transform import Box2Delta 2 | from .bbox_transform import Delta2Box 3 | from .overlaps import compute_iou 4 | from .overlaps import compute_unaligned_iou 5 | from .bbox_transform import Distance2Box 6 | from .bbox_transform import Box2Distance 7 | 8 | 9 | def build_decoder(decoder, **kwargs): 10 | if decoder == "Delta2Box": 11 | return Delta2Box(**kwargs) 12 | 13 | if decoder == "Distance2Box": 14 | return Distance2Box() 15 | 16 | raise TypeError("Could not interpret bbox decoder function identifier: {}".format(repr(decoder))) 17 | 18 | 19 | def build_encoder(encoder, **kwargs): 20 | if encoder == "Box2Delta": 21 | return Box2Delta(**kwargs) 22 | 23 | if encoder == "Box2Distance": 24 | return Box2Distance() 25 | 26 | raise TypeError("Could not interpret bbox encoder function identifier: {}".format(repr(encoder))) 27 | 28 | 29 | __all__ = [ 30 | "build_encoder", 31 | "build_decoder", 32 | "compute_iou", 33 | "compute_unaligned_iou" 34 | ] 35 | -------------------------------------------------------------------------------- /core/learning_rate_schedules/step_decay.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from ..builder import LR_SCHEDULERS 3 | 4 | 5 | @LR_SCHEDULERS.register 6 | class StepDecay(tf.keras.optimizers.schedules.LearningRateSchedule): 7 | def __init__(self, initial_learning_rate, decay_steps, decay_rate, name="StepDecay"): 8 | super(StepDecay, self).__init__(name=name) 9 | 10 | self.lr = initial_learning_rate 11 | self.decay_steps = decay_steps 12 | self.decay_rate = decay_rate 13 | 14 | def __call__(self, global_step): 15 | with tf.name_scope("StepDecay"): 16 | self.lr = tf.convert_to_tensor(self.lr, name="initial_learning_rate") 17 | dtype = self.lr.dtype 18 | decay_rate = tf.cast(self.decay_rate, dtype) 19 | 20 | if tf.equal(global_step % self.decay_steps, 0): 21 | self.lr = tf.multiply(self.lr, tf.pow(decay_rate, global_step // self.decay_steps)) 22 | 23 | return self.lr 24 | 25 | def get_config(self): 26 | return {"initial_learning_rate": self.lr, 27 | "decay_steps": self.decay_steps, 28 | "decay_rate": self.decay_rate, 29 | "name": self.name} 30 | -------------------------------------------------------------------------------- /core/samplers/random_sampler.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from .sampler import Sampler 3 | from ..builder import SAMPLERS 4 | 5 | 6 | @SAMPLERS.register 7 | class RandomSampler(Sampler): 8 | def __init__(self, num_proposals, pos_fraction, neg_pos_ub=-1, add_gt_as_proposals=True, **kwargs): 9 | super(RandomSampler, self).__init__(num_proposals, pos_fraction, neg_pos_ub, add_gt_as_proposals) 10 | 11 | def _random_choice(self, indices, num): 12 | return tf.random.shuffle(indices)[:num] 13 | 14 | def _sample_positive(self, assigned_labels, num_expected_proposals, **kwargs): 15 | pos_inds = tf.squeeze(tf.where(assigned_labels >= 1), 1) 16 | 17 | if tf.size(pos_inds) <= num_expected_proposals: 18 | return pos_inds 19 | 20 | return self._random_choice(pos_inds, num_expected_proposals) 21 | 22 | def _sample_negative(self, assigned_labels, num_expected_proposals, **kwargs): 23 | neg_inds = tf.squeeze(tf.where(assigned_labels == 0), 1) 24 | if tf.size(neg_inds) <= num_expected_proposals: 25 | return neg_inds 26 | 27 | return self._random_choice(neg_inds, num_expected_proposals) 28 | -------------------------------------------------------------------------------- /core/builder.py: -------------------------------------------------------------------------------- 1 | from utils.register import Register 2 | 3 | 4 | ASSIGNERS = Register(name="assigners") 5 | 6 | SAMPLERS = Register(name="samplers") 7 | 8 | LOSSES = Register(name="losses") 9 | 10 | OPTIMIZERS = Register(name="optimizers") 11 | 12 | LR_SCHEDULERS = Register(name="lr_schedulers") 13 | 14 | METRICS = Register(name="metrics") 15 | 16 | ANCHOR_GENERATORS = Register(name="anchor_generator") 17 | 18 | NMS = Register(name="nms") 19 | 20 | 21 | def build_assigner(assigner, **kwargs): 22 | return ASSIGNERS[assigner](**kwargs) 23 | 24 | 25 | def build_sampler(sampler, **kwargs): 26 | return SAMPLERS[sampler](**kwargs) 27 | 28 | 29 | def build_loss(loss, **kwargs): 30 | return LOSSES[loss](**kwargs) 31 | 32 | 33 | def build_learning_rate_scheduler(scheduler, **kwargs): 34 | return LR_SCHEDULERS[scheduler](**kwargs) 35 | 36 | 37 | def build_metric(metric, **kwargs): 38 | return METRICS[metric](**kwargs) 39 | 40 | 41 | def build_optimizer(optimizer, **kwargs): 42 | return OPTIMIZERS[optimizer](**kwargs) 43 | 44 | 45 | def build_nms(nms, **kwargs): 46 | return NMS[nms](**kwargs) 47 | 48 | 49 | def build_anchor_generator(generator, **kwargs): 50 | return ANCHOR_GENERATORS[generator](**kwargs) 51 | -------------------------------------------------------------------------------- /models/heads/roi_heads/base_roi_head.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from ..head import BaseHead 3 | 4 | 5 | class BaseRoIHead(BaseHead): 6 | def __init__(self, cfg, test_cfg, num_classes=80, is_training=True, **kwargs): 7 | super(BaseRoIHead, self).__init__(cfg=cfg, test_cfg=test_cfg, num_classes=num_classes, is_training=is_training, **kwargs) 8 | 9 | if cfg.get("bbox_head"): 10 | self.pooled_size = cfg.bbox_head.roi_pooling.pooled_size 11 | self._make_bbox_head(cfg.bbox_head) 12 | 13 | if cfg.get("mask_head"): 14 | self._make_mask_head(cfg.mask_head) 15 | 16 | @property 17 | def min_level(self): 18 | return self.cfg.get("min_level") 19 | 20 | @property 21 | def max_level(self): 22 | return self.cfg.get("max_level") 23 | 24 | def _make_bbox_head(self, bbox_head_cfg): 25 | raise NotImplementedError() 26 | 27 | def _make_mask_head(self, mask_head_cfg): 28 | raise NotImplementedError() 29 | 30 | @property 31 | def has_bbox_head(self): 32 | return hasattr(self, "bbox_head") 33 | 34 | @property 35 | def has_mask_head(self): 36 | return hasattr(self, "mask_head") 37 | 38 | -------------------------------------------------------------------------------- /utils/register.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | 4 | class Register: 5 | """Module register""" 6 | 7 | def __init__(self, name): 8 | self._dict = {} 9 | self._name = name 10 | 11 | def __setitem__(self, key, value): 12 | if not callable(value): 13 | raise Exception("Value of a Registry must be a callable.") 14 | if key is None: 15 | key = value.__name__ 16 | if key in self._dict: 17 | logging.warning("Key %s already in registry %s." % (key, self._name)) 18 | 19 | self._dict[key] = value 20 | 21 | def register(self, param): 22 | """Decorator to register a function or class.""" 23 | 24 | def decorator(key, value): 25 | self[key] = value 26 | return value 27 | 28 | if callable(param): 29 | # @reg.register 30 | return decorator(None, param) 31 | # @reg.register('alias') 32 | return lambda x: decorator(param, x) 33 | 34 | def __getitem__(self, key): 35 | try: 36 | return self._dict[key] 37 | except Exception as e: 38 | logging.error(f"module {key} not found: {e}") 39 | raise e 40 | 41 | def __contains__(self, key): 42 | return key in self._dict 43 | 44 | def keys(self): 45 | """key""" 46 | return self._dict.keys() 47 | 48 | -------------------------------------------------------------------------------- /core/samplers/pseudo_sampler.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from .sampler import Sampler 3 | from ..builder import SAMPLERS 4 | 5 | 6 | @SAMPLERS.register 7 | class PseudoSampler(Sampler): 8 | def __init__(self, **kwargs): 9 | pass 10 | 11 | def _sample_positive(self, assigned_labels, num_expected_proposals, **kwargs): 12 | raise NotImplementedError 13 | 14 | def _sample_negative(self, assigned_labels, num_expected_proposals, **kwargs): 15 | raise NotImplementedError 16 | 17 | def sample(self, assigned_boxes, assigned_labels, **kwargs): 18 | """Sample positive and negative boxes. 19 | 20 | Args: 21 | assigned_boxes (Tensor): The assigned boxes in assigner. 22 | assigned_labels (Tensor): The assigned labels in assigner. 23 | 24 | Returns: 25 | A dict -> target_boxes, target_labels, box_weights, label_weights 26 | """ 27 | pos_mask = assigned_labels >= 1 28 | box_weights = tf.cast(pos_mask, tf.float32) 29 | 30 | valid_mask = assigned_labels >= 0 31 | target_labels = tf.where(valid_mask, tf.cast(assigned_labels, tf.int64), tf.zeros_like(assigned_labels, tf.int64)) 32 | label_weights = tf.cast(valid_mask, tf.float32) 33 | 34 | return assigned_boxes, target_labels, box_weights, label_weights 35 | -------------------------------------------------------------------------------- /core/samplers/ohem_sampler.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from .sampler import Sampler 3 | from ..builder import SAMPLERS 4 | 5 | 6 | @SAMPLERS.register 7 | class OHEMSampler(Sampler): 8 | def __init__(self, num_proposals, pos_fraction, neg_pos_ub=-1, add_gt_as_proposals=True, **kwargs): 9 | super(OHEMSampler, self).__init__(num_proposals, pos_fraction, neg_pos_ub, add_gt_as_proposals) 10 | 11 | def _hard_mining(self, losses, indices, num): 12 | valid_losses = tf.gather(losses, indices) 13 | 14 | _, top_k_inds = tf.nn.top_k(valid_losses, k=num) 15 | 16 | return tf.stop_gradient(top_k_inds) 17 | 18 | def _sample_positive(self, assigned_labels, losses, num_expected_proposals, **kwargs): 19 | pos_inds = tf.where(assigned_labels >= 1) 20 | pos_inds = tf.squeeze(pos_inds, 1) 21 | if tf.size(pos_inds) <= num_expected_proposals: 22 | return pos_inds 23 | 24 | return self._hard_mining(losses, pos_inds, num_expected_proposals) 25 | 26 | def _sample_negative(self, assigned_labels, losses, num_expected_proposals, **kwargs): 27 | neg_inds = tf.where(assigned_labels == 0) 28 | neg_inds = tf.squeeze(neg_inds, 1) 29 | if tf.size(neg_inds) <= num_expected_proposals: 30 | return neg_inds 31 | 32 | return self._hard_mining(losses, neg_inds, num_expected_proposals) 33 | -------------------------------------------------------------------------------- /create_coco_dataset.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import argparse 3 | from data.datasets.coco_dataset import COCODataset 4 | 5 | 6 | parser = argparse.ArgumentParser() 7 | parser.add_argument("--dataset_dir", default=None, type=str, 8 | help="""The directory contains image and anntotation filePycharmProjects: 9 | │ └─COCO 10 | │ ├─train2017(training images) 11 | │ ├─val2017(valimages) 12 | │ └─annotations""") 13 | parser.add_argument("--phase", default="train", type=str, 14 | help="The phase of dataset, e.g. for `train2017`, the value should be `train`." 15 | " for `val2017`, the value should be `val`.") 16 | parser.add_argument("--version", default=2017, type=int, 17 | help="The version of dataset, e.g. for `train2017`, the value should be `2017`," 18 | " for `val2017`, the value should be `2017`.") 19 | parser.add_argument("--max_images_per_tfrecord", default=20000, type=int, 20 | help="The maximum images per tfrecord.") 21 | 22 | args = parser.parse_args() 23 | 24 | assert args.dataset_dir is not None, "Must provide dataset directory." 25 | 26 | coco = COCODataset(args.dataset_dir, training=True) 27 | coco.create_tf_record(phase=args.phase, version=args.version, max_imgs_per_tfrecord=args.max_images_per_tfrecord) 28 | -------------------------------------------------------------------------------- /core/layers/max_in_out.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | 4 | class MaxInOut(tf.keras.layers.Layer): 5 | def __init__(self, num_negative, num_positive, axis=-1, **kwargs): 6 | super(MaxInOut, self).__init__(**kwargs) 7 | self.num_pos = num_positive 8 | self.num_neg = num_negative 9 | self.axis = axis 10 | 11 | self._max_in = num_negative > 1 12 | 13 | def build(self, input_shape): 14 | super(MaxInOut, self).__init__(input_shape) 15 | 16 | def call(self, inputs): 17 | neg, pos = tf.split(inputs, [self.num_neg, self.num_pos], self.axis) 18 | if self._max_in: 19 | neg = tf.reduce_max(neg, axis=self.axis, keepdims=True) 20 | else: 21 | pos = tf.reduce_max(pos, axis=self.axis, keepdims=True) 22 | 23 | outputs = tf.concat([neg, pos], axis=self.axis) 24 | 25 | return outputs 26 | 27 | def compute_output_shape(self, input_shape): 28 | if self.axis == -1 or self.axis == 3: 29 | return tf.TensorShape([input_shape[0], input_shape[1], input_shape[2], 2]) 30 | else: 31 | return tf.TensorShape([input_shape[0], 2, input_shape[2], input_shape[3]]) 32 | 33 | def get_config(self): 34 | config = { 35 | 'num_positive': self.num_pos, 36 | "num_negative": self.num_neg, 37 | "axis": self.axis 38 | } 39 | 40 | base_config = super(MaxInOut, self).get_config() 41 | 42 | return dict(list(base_config.items()) + list(config.items())) 43 | -------------------------------------------------------------------------------- /core/losses/l1_loss.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from ..builder import LOSSES 3 | 4 | 5 | # @LOSSES.register 6 | # class SmoothL1Loss(tf.keras.losses.Loss): 7 | # def __init__(self, delta=1.0, weight=1., reduction=tf.keras.losses.Reduction.NONE): 8 | # super(SmoothL1Loss, self).__init__(reduction=reduction, name="SmoothL1Loss") 9 | # self.weight = weight 10 | # self.delta = delta 11 | 12 | # def _smooth_l1_loss(self, y_true, y_pred): 13 | # diff = tf.math.abs(y_pred - y_true) 14 | # loss = tf.where(diff < self.delta, 0.5 * diff * diff / self.delta, diff - 0.5 * self.delta) 15 | 16 | # return loss 17 | 18 | # def call(self, y_true, y_pred): 19 | # loss = self._smooth_l1_loss(y_true, y_pred) 20 | 21 | # return loss * self.weight 22 | 23 | @LOSSES.register 24 | class SmoothL1Loss(tf.keras.losses.Huber): 25 | def __init__(self, delta=1.0, weight=1., reduction=tf.keras.losses.Reduction.NONE): 26 | super(SmoothL1Loss, self).__init__(reduction=reduction) 27 | 28 | self.weight = weight 29 | self.delta = delta 30 | 31 | def call(self, y_true, y_pred): 32 | loss = super(SmoothL1Loss, self).call(y_true, y_pred) 33 | 34 | return loss * self.weight 35 | 36 | 37 | @LOSSES.register 38 | class RegL1Loss(tf.keras.losses.Loss): 39 | def __init__(self, weight=1., reduction=tf.keras.losses.Reduction.NONE): 40 | super(RegL1Loss, self).__init__(reduction=reduction) 41 | 42 | self.weight = weight 43 | 44 | def call(self, y_true, y_pred): 45 | loss = tf.math.abs(y_true - y_pred) * self.weight 46 | 47 | return loss 48 | -------------------------------------------------------------------------------- /core/layers/nearest_upsamling.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | 4 | class NearestUpsampling2D(tf.keras.layers.Layer): 5 | """Nearest neighbor upsampling implementation. 6 | 7 | Args: 8 | scale: An integer multiple to scale resolution of input data. 9 | """ 10 | def __init__(self, scale, **kwargs): 11 | super(NearestUpsampling2D, self).__init__(**kwargs) 12 | if "data_format" in kwargs: 13 | data_format = kwargs.pop("data_format") 14 | assert data_format in {"channels_first", "channels_last"} 15 | self.data_format = data_format 16 | 17 | self.scale = scale 18 | 19 | def build(self, input_shape): 20 | super(NearestUpsampling2D, self).__init__(input_shape) 21 | 22 | def call(self, inputs, **kwargs): 23 | # Instead of broadcasting with a 6-d tensor, we're using stacking here 24 | # for TfLite compatibity. 25 | bs, h, w, c = tf.shape(inputs)[0], tf.shape(inputs)[1], tf.shape(inputs)[2], tf.shape(inputs)[3] 26 | # bs, h, w, c = inputs.get_shape().as_list() 27 | # bs = -1 if bs is None else bs 28 | # outputs = tf.stack([inputs] * self.scale, axis=3) 29 | # outputs = tf.stack([outputs] * self.scale, axis=2) 30 | scale = self.scale 31 | data = tf.reshape(inputs, [bs, h, 1, w, 1, c]) * tf.ones([1, 1, scale, 1, scale, 1], dtype=inputs.dtype) 32 | return tf.reshape(data, [bs, h * scale, w * scale, c]) 33 | 34 | def compute_output_shape(self, input_shape): 35 | batch_size, h, w, c = input_shape[0], input_shape[1], input_shape[2], input_shape[3] 36 | return tf.TensorShape([batch_size, h * self.scale, w * self.scale, c]) 37 | 38 | -------------------------------------------------------------------------------- /core/bbox/bbox_transform.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from utils import box_utils 3 | 4 | 5 | class Box2Delta(object): 6 | def __init__(self, weights=None): 7 | self.weights = weights 8 | 9 | def __call__(self, proposals, boxes): 10 | return box_utils.encode_boxes(boxes, proposals, self.weights) 11 | 12 | 13 | class Delta2Box(object): 14 | def __init__(self, weights=None): 15 | self.weights = weights 16 | 17 | def __call__(self, proposals, delta): 18 | return box_utils.decode_boxes(delta, proposals, self.weights) 19 | 20 | 21 | class Distance2Box(object): 22 | def __call__(self, distances, grid_y, grid_x): 23 | with tf.name_scope("distance2box"): 24 | grid_y = tf.cast(tf.expand_dims(grid_y, 0), distances.dtype) 25 | grid_x = tf.cast(tf.expand_dims(grid_x, 0), distances.dtype) 26 | 27 | boxes = tf.stack([grid_y - distances[..., 0], 28 | grid_x - distances[..., 1], 29 | grid_y + distances[..., 2], 30 | grid_x + distances[..., 3]], axis=-1) 31 | 32 | return boxes 33 | 34 | 35 | class Box2Distance(object): 36 | def __call__(self, boxes, grid_y, grid_x): 37 | with tf.name_scope("box2distance"): 38 | grid_y = tf.cast(tf.expand_dims(grid_y, 0), boxes.dtype) 39 | grid_x = tf.cast(tf.expand_dims(grid_x, 0), boxes.dtype) 40 | 41 | dist = tf.stack([grid_y - boxes[..., 0], 42 | grid_x - boxes[..., 1], 43 | boxes[..., 2] - grid_y, 44 | boxes[..., 3] - grid_x], axis=-1) 45 | 46 | return dist 47 | -------------------------------------------------------------------------------- /core/layers/proposal_layer.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | 4 | def _single_level_rois_select(boxes, scores, topk, max_nms_outputs, nms_threshold): 5 | boxes = tf.cast(boxes, tf.float32) 6 | scores = tf.cast(scores, tf.float32) 7 | scores = tf.squeeze(scores, -1) 8 | 9 | topk_scores, topk_indices = tf.nn.top_k(scores, k=topk) 10 | topk_indices = tf.stack( 11 | [tf.tile(tf.range(tf.shape(boxes)[0])[:, None], [1, tf.shape(topk_scores)[1]]), topk_indices], -1) 12 | topk_boxes = tf.gather_nd(boxes, topk_indices) 13 | 14 | nmsed_boxes, nmsed_scores, _, _ = tf.image.combined_non_max_suppression( 15 | tf.expand_dims(topk_boxes, -2), 16 | tf.expand_dims(topk_scores, -1), 17 | max_nms_outputs, 18 | max_nms_outputs, 19 | nms_threshold) 20 | 21 | return nmsed_boxes, nmsed_scores 22 | 23 | 24 | class ProposalLayer(tf.keras.layers.Layer): 25 | def __init__(self, pre_nms_size=12000, post_nms_size=2000, max_total_size=2000, iou_threshold=0.7, min_size=0, **kwargs): 26 | super(ProposalLayer, self).__init__(**kwargs) 27 | 28 | self.min_size = min_size 29 | self.nms_pre = pre_nms_size 30 | self.nms_post = post_nms_size 31 | self.iou_threshold = iou_threshold 32 | self.max_total_size = max_total_size 33 | 34 | def call(self, boxes, scores): 35 | selected_boxes, selected_scores = _single_level_rois_select( 36 | boxes, scores, self.nms_pre, self.max_total_size, self.iou_threshold) 37 | 38 | return selected_boxes[:, :self.nms_post], selected_scores[:, :self.nms_post] 39 | 40 | def compute_output_shape(self, input_shape): 41 | return tf.TensorShape([input_shape[0], self.nms_post, 4]) 42 | -------------------------------------------------------------------------------- /configs/__init__.py: -------------------------------------------------------------------------------- 1 | from .base_config import Config 2 | from .yolov5_config import get_yolov5_config 3 | from .atss_config import get_atss_config 4 | from .fcos_config import get_fcos_config 5 | from .faster_rcnn_config import get_faster_rcnn_config 6 | from .efficientdet_config import get_efficientdet_config 7 | from .gfl_config import get_gfl_config 8 | from .centernet_config import get_centernet_config 9 | from .retinanet_config import get_retinanet_config 10 | 11 | 12 | CONFIG_DICT = { 13 | "EfficientDetD0": lambda x: get_efficientdet_config("EfficientDetD0", x), 14 | "EfficientDetD1": lambda x: get_efficientdet_config("EfficientDetD1", x), 15 | "EfficientDetD2": lambda x: get_efficientdet_config("EfficientDetD2", x), 16 | "EfficientDetD3": lambda x: get_efficientdet_config("EfficientDetD3", x), 17 | "EfficientDetD4": lambda x: get_efficientdet_config("EfficientDetD4", x), 18 | "EfficientDetD5": lambda x: get_efficientdet_config("EfficientDetD5", x), 19 | "EfficientDetD6": lambda x: get_efficientdet_config("EfficientDetD6", x), 20 | "EfficientDetD7": lambda x: get_efficientdet_config("EfficientDetD7", x), 21 | "FasterRCNN": lambda x: get_faster_rcnn_config(x), 22 | "FCOS": lambda x: get_fcos_config(x), 23 | "ATSS": lambda x: get_atss_config(x), 24 | "GFL": lambda x: get_gfl_config(x), 25 | "YOLOv5s": lambda x: get_yolov5_config(x, .33, .50, "yolov5s"), 26 | "YOLOv5m": lambda x: get_yolov5_config(x, .67, .75, "yolov5m"), 27 | "YOLOv5l": lambda x: get_yolov5_config(x, 1., 1., "yolov5l"), 28 | "YOLOv5x": lambda x: get_yolov5_config(x, 1.22, 1.25, "yolov5x"), 29 | "CenterNet": lambda x: get_centernet_config(x), 30 | "RetinaNet": lambda x: get_retinanet_config(x), 31 | "OneNet": lambda x: get_onenet_config(x), 32 | } 33 | 34 | 35 | def build_configs(name): 36 | return CONFIG_DICT[name] 37 | -------------------------------------------------------------------------------- /models/backbones/__init__.py: -------------------------------------------------------------------------------- 1 | from .backbone import Backbone 2 | 3 | from .vgg import VGG16, VGG19 4 | from .densenet import DenseNet121, DenseNet169, DenseNet201 5 | from .resnet import ResNet50, ResNet101, ResNet152, CaffeResNet50, CaffeResNet101, CaffeResNet152 6 | from .resnet_v2 import ResNet50V2, ResNet101V2, ResNet152V2 7 | from .efficientnet import ( 8 | EfficientNetB0, 9 | EfficientNetB1, 10 | EfficientNetB2, 11 | EfficientNetB3, 12 | EfficientNetB4, 13 | EfficientNetB5, 14 | EfficientNetB6, 15 | EfficientNetB7 16 | ) 17 | from .resnext import ResNeXt50_32X4D, ResNeXt101_32X4D, ResNeXt101_64X4D, ResNeXt101B_64X4D 18 | from .dla import DLA34, DLA46C, DLA46XC, DLA60, DLA60C, DLA60X, DLA60XC, DLA102, DLA102X, DLA102X2, DLA169 19 | from .resnet_v1b import ( 20 | ResNet50V1D, ResNet101V1D, ResNet152V1D, 21 | ResNet50V1E, ResNet101V1E, ResNet152V1E 22 | ) 23 | from .hourglass import HourglassNet 24 | 25 | 26 | __all__ = [ 27 | "VGG16", 28 | "VGG19", 29 | "HourglassNet", 30 | "ResNet50", 31 | "ResNet101", 32 | "ResNet152", 33 | "CaffeResNet50", 34 | "CaffeResNet101", 35 | "CaffeResNet152", 36 | "ResNet50V2", 37 | "ResNet101V2", 38 | "ResNet152V2", 39 | "DenseNet121", 40 | "DenseNet169", 41 | "DenseNet201", 42 | "EfficientNetB0", 43 | "EfficientNetB1", 44 | "EfficientNetB2", 45 | "EfficientNetB3", 46 | "EfficientNetB4", 47 | "EfficientNetB5", 48 | "EfficientNetB6", 49 | "EfficientNetB7", 50 | "DLA34", 51 | "DLA46C", 52 | "DLA46XC", 53 | "DLA60", 54 | "DLA60C", 55 | "DLA60X", 56 | "DLA60XC", 57 | "DLA102", 58 | "DLA102X", 59 | "DLA102X2", 60 | "DLA169", 61 | "ResNet50V1D", 62 | "ResNet101V1D", 63 | "ResNet152V1D", 64 | "ResNet50V1E", 65 | "ResNet101V1E", 66 | "ResNet152V1E", 67 | "ResNeXt50_32X4D", 68 | "ResNeXt101_32X4D", 69 | "ResNeXt101_64X4D", 70 | "ResNeXt101B_64X4D" 71 | ] 72 | 73 | -------------------------------------------------------------------------------- /core/losses/cross_entropy.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from ..builder import LOSSES 3 | 4 | 5 | @LOSSES.register 6 | class BinaryCrossEntropy(tf.keras.losses.Loss): 7 | def __init__(self, 8 | from_logits=True, 9 | label_smoothing=0.0, 10 | weight=1., 11 | reduction=tf.keras.losses.Reduction.NONE, 12 | name="BinaryCrossEntropy"): 13 | super(BinaryCrossEntropy, self).__init__(reduction=reduction, name=name) 14 | 15 | assert from_logits 16 | self.weight = weight 17 | self.from_logits = from_logits 18 | self.label_smoothing = label_smoothing 19 | 20 | def call(self, y_true, y_pred): 21 | smooth_y_true = tf.cond( 22 | tf.greater(self.label_smoothing, 0.), 23 | lambda: (y_true * (1. - self.label_smoothing) + 24 | self.label_smoothing / (tf.cast(tf.shape(y_true)[-1], y_true.dtype) - 1.)), 25 | lambda: y_true) 26 | 27 | return tf.nn.sigmoid_cross_entropy_with_logits(labels=smooth_y_true, logits=y_pred) * self.weight 28 | 29 | 30 | @LOSSES.register 31 | class CrossEntropy(tf.keras.losses.Loss): 32 | def __init__(self, 33 | from_logits=True, 34 | label_smoothing=0.01, 35 | weight=1., 36 | reduction=tf.keras.losses.Reduction.NONE, 37 | name="CrossEntropy"): 38 | super(CrossEntropy, self).__init__(reduction=reduction, name=name) 39 | 40 | self.weight = weight 41 | self.from_logits = from_logits 42 | self.label_smoothing = label_smoothing 43 | 44 | def call(self, y_true, y_pred): 45 | smooth_y_true = tf.cond( 46 | tf.greater(self.label_smoothing, 0.), 47 | lambda: ((y_true * (1. - self.label_smoothing) + 48 | self.label_smoothing / (tf.cast(tf.shape(y_true)[-1], y_true.dtype) - 1.))), 49 | lambda: y_true) 50 | 51 | return tf.nn.softmax_cross_entropy_with_logits(labels=smooth_y_true, logits=y_pred) * self.weight 52 | -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | import os 2 | import logging 3 | import argparse 4 | import tensorflow as tf 5 | from configs import Config 6 | from configs import build_configs 7 | from trainers import MultiGPUTrainer 8 | from trainers import SingleGPUTrainer 9 | 10 | 11 | def main(): 12 | parser = argparse.ArgumentParser() 13 | parser.add_argument("--detector", 14 | type=str, 15 | default="CenterNet", 16 | help="The detector name, e.g.`efficientdet`, `efficient_fcos`.") 17 | parser.add_argument("--gpus", 18 | type=str, 19 | default="0,1,2,3", 20 | help="Use multi-gpu training or not, default False, means use one gpu.") 21 | parser.add_argument("--cfg", 22 | type=str, 23 | default=None, 24 | help="The conifg file (yaml), if None, using default.") 25 | parser.add_argument("--num_classes", 26 | type=int, 27 | default=80, 28 | help="The number of classes, default 80 (COCO).") 29 | 30 | args = parser.parse_args() 31 | 32 | os.environ["CUDA_VISIBLE_DEVICES"] = args.gpus 33 | 34 | tf.random.set_seed(2333) 35 | # tf.config.optimizer.set_jit(True) 36 | 37 | logger = logging.getLogger() 38 | logger.setLevel(logging.INFO) 39 | logging.basicConfig(format="%(asctime)s %(levelname)s - %(message)s", 40 | datefmt="%Y-%m-%d %H:%M:%S") 41 | 42 | physical_devices = tf.config.experimental.list_physical_devices("GPU") 43 | for device in physical_devices: 44 | tf.config.experimental.set_memory_growth(device, True) 45 | 46 | if args.cfg is None: 47 | cfg = build_configs(args.detector)(args.num_classes) 48 | else: 49 | cfg = Config() 50 | cfg.parse_from_yaml(args.cfg) 51 | 52 | num_gpus = len(args.gpus.strip().split(",")) 53 | if num_gpus > 1: 54 | trainer = MultiGPUTrainer(cfg=cfg, logger=logger) 55 | else: 56 | trainer = SingleGPUTrainer(cfg=cfg, logger=logger) 57 | 58 | trainer.run() 59 | 60 | 61 | if __name__ == '__main__': 62 | main() 63 | -------------------------------------------------------------------------------- /core/assigners/assigner.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | 4 | class Assigner(object): 5 | def __init__(self, dtype=tf.float32): 6 | 7 | self.dtype = dtype 8 | 9 | @property 10 | def _param_dtype(self): 11 | if self.dtype == tf.float16 or self.dtype == tf.bfloat16: 12 | return tf.float32 13 | 14 | return self.dtype or tf.float32 15 | 16 | def assign(self, gt_boxes, gt_labels, proposals): 17 | """Assign gt to boxes/ 18 | 19 | This method assign a gt box to every box (proposal/anchor), each box 20 | will be assigned with -1, 0 or a positive number. -1 means don't care, 21 | 0 means negative sample, positive number is the index (1-based) of 22 | assigned gt. 23 | 24 | The assignment is done in following steps, the order matters: 25 | 1. initialize target boxes and labels. 26 | 2. assign proposals whose iou with all gts < neg_iou_thresh to 0. 27 | 3. for each box, if the iou with its nearest gt >= pos_iou_thresh, 28 | assign it to that box. 29 | 4. for each gt box, assign its best proposals (may be more than 30 | one) to itself. 31 | 32 | Args: 33 | proposals (Tensor): Bounding boxes to be assigned, shape (n, 4). 34 | gt_boxes (Tensor): Ground-truth boxes, shape (k, 4). 35 | gt_labels (Tensor): Ground-truth labels, shape (k, ). 36 | 37 | Returns: 38 | target_boxes (Tensor), target_labels (Tensor). 39 | """ 40 | raise NotImplementedError() 41 | 42 | def assign_wrt_overlaps(self, overlaps, gt_boxes, gt_labels): 43 | """Assign w.r.t. the overlaps of boxes with gts. 44 | 45 | Args: 46 | overlaps (Tensor): Overlaps between k gt_boxes and n proposals, 47 | shape (k, n). 48 | gt_boxes (Tensor): Ground-truth boxes, shape (k, 4). 49 | gt_labels (Tensor): Ground-truth labels, shape (k, ). 50 | 51 | Returns: 52 | target_boxes (Tensor), target_labels (Tensor). 53 | """ 54 | raise NotImplementedError() 55 | 56 | def __call__(self, gt_boxes, gt_labels, proposals): 57 | with tf.name_scope("max_iou_assigner"): 58 | return self.assign(gt_boxes, gt_labels, proposals) -------------------------------------------------------------------------------- /models/heads/dense_heads/retinanet_head.py: -------------------------------------------------------------------------------- 1 | import math 2 | import tensorflow as tf 3 | from ...builder import HEADS 4 | from .anchor_head import AnchorHead 5 | from core.layers import build_activation 6 | from core.layers import build_convolution 7 | from core.layers import build_normalization 8 | 9 | 10 | @HEADS.register 11 | class RetinaNetHead(AnchorHead): 12 | def __init__(self, **kwargs): 13 | super(RetinaNetHead, self).__init__(**kwargs) 14 | 15 | self._make_shared_convs() 16 | self._make_init_layers() 17 | self._init_anchor_generators() 18 | 19 | def _make_init_layers(self): 20 | self.classifier = tf.keras.layers.Conv2D( 21 | filters=self.num_anchors * self.num_classes, 22 | kernel_size=(3, 3), 23 | strides=(1, 1), 24 | padding="same", 25 | kernel_initializer=tf.keras.initializers.RandomNormal(stddev=0.01), 26 | bias_initializer=tf.keras.initializers.Constant(-math.log((1. - self.cfg.prior) / self.cfg.prior)), 27 | name="predicted_class") 28 | 29 | self.regressor = tf.keras.layers.Conv2D( 30 | filters=self.num_anchors * 4, 31 | kernel_size=(3, 3), 32 | strides=(1, 1), 33 | padding="same", 34 | kernel_initializer=tf.keras.initializers.RandomNormal(stddev=0.01), 35 | name="predicted_box") 36 | 37 | def call(self, inputs, training=None): 38 | predicted_boxes = dict() 39 | predicted_labels = dict() 40 | total_anchors = dict() 41 | for i, level in enumerate(range(self.min_level, self.max_level + 1)): 42 | box_feat = self.box_shared_convs(inputs[i], training=training) 43 | label_feat = self.class_shared_convs(inputs[i], training=training) 44 | 45 | pred_boxes = self.regressor(box_feat) 46 | pred_labels = self.classifier(label_feat) 47 | 48 | h, w = tf.shape(box_feat)[1], tf.shape(box_feat)[2] 49 | anchors = self.anchor_generators[i](h, w) 50 | 51 | predicted_boxes["level%d" % level] = pred_boxes 52 | predicted_labels["level%d" % level] = pred_labels 53 | total_anchors["level%d" % level] = anchors 54 | 55 | outputs = dict(boxes=predicted_boxes, 56 | labels=predicted_labels, 57 | total_anchors=total_anchors) 58 | 59 | if self.is_training: 60 | return outputs 61 | 62 | return self.get_boxes(outputs) 63 | -------------------------------------------------------------------------------- /models/detectors/one_stage.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | from . import Detector 4 | from ..builder import DETECTORS 5 | from ..builder import build_head 6 | from ..builder import build_neck 7 | from ..builder import build_backbone 8 | 9 | 10 | class OneStageDetector(Detector): 11 | def __init__(self, cfg, training=True): 12 | super(OneStageDetector, self).__init__(cfg, training=training) 13 | 14 | self.data_format = cfg.data_format 15 | 16 | inputs = tf.keras.Input(shape=(None, None, 3)) 17 | self.backbone = build_backbone(input_tensor=inputs, **cfg.backbone.as_dict()) 18 | x = self.backbone(inputs) 19 | 20 | if cfg.get("neck"): 21 | if isinstance(x, (list, tuple)): 22 | input_shapes = [i.shape.as_list()[1:] for i in x] 23 | if cfg.neck.get("downsample_ratio"): 24 | first_level = int(np.log2(cfg.neck.downsample_ratio)) ## for centernet 25 | x = x[first_level:] 26 | else: 27 | input_shapes = x.shape.as_list()[1:] 28 | self.neck = build_neck(input_shapes=input_shapes, name="neck", **cfg.neck.as_dict()) 29 | x = self.neck(x) 30 | 31 | if cfg.get("anchors"): 32 | self.head = build_head(cfg.head.head, 33 | cfg=cfg.head, 34 | test_cfg=cfg.test, 35 | anchor_cfg=cfg.anchors, 36 | num_classes=cfg.num_classes, 37 | is_training=training, 38 | name="head") 39 | else: 40 | self.head = build_head(cfg.head.head, 41 | cfg=cfg.head, 42 | test_cfg=cfg.test, 43 | num_classes=cfg.num_classes, 44 | is_training=training, 45 | name="head") 46 | x = self.head(x) 47 | self.detector = tf.keras.Model(inputs=inputs, outputs=x) 48 | 49 | def load_pretrained_weights(self, pretrained_weights_path=None): 50 | if pretrained_weights_path: 51 | self.backbone.load_weights(pretrained_weights_path, by_name=True, skip_mismatch=True) 52 | print("Restored pre-trained weights from %s." % pretrained_weights_path) 53 | 54 | else: 55 | print("Train model from scratch.") 56 | 57 | def compute_losses(self, predictions, image_info): 58 | return self.head.compute_losses(predictions, image_info) 59 | 60 | def save_weights(self, name): 61 | self.detector.save_weights(name) 62 | 63 | @tf.function 64 | def __call__(self, inputs, training): 65 | x = self.detector(inputs, training=training) 66 | return x 67 | 68 | -------------------------------------------------------------------------------- /models/detectors/two_stage.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from . import Detector 3 | from utils import box_utils 4 | from ..builder import build_neck 5 | from ..builder import build_head 6 | from core.bbox import build_decoder 7 | from ..builder import build_backbone 8 | from core.layers import ProposalLayer 9 | 10 | 11 | class TwoStageDetector(Detector): 12 | def __init__(self, cfg, training=True, **kwargs): 13 | super(TwoStageDetector, self).__init__(cfg, **kwargs) 14 | 15 | self.data_format = cfg.data_format 16 | 17 | inputs = tf.keras.Input(shape=(None, None, 3)) 18 | self.backbone = build_backbone(input_tensor=inputs, **cfg.backbone.as_dict()) 19 | x = self.backbone(inputs) 20 | 21 | if cfg.get("neck"): 22 | if isinstance(x, (list, tuple)): 23 | input_shapes = [i.shape.as_list()[1:] for i in x] 24 | else: 25 | input_shapes = x.shape.as_list()[1:] 26 | self.neck = build_neck(input_shapes=input_shapes, name="neck", **cfg.neck.as_dict()) 27 | x = self.neck(x) 28 | 29 | if cfg.get("anchors"): 30 | self.rpn_head = build_head(cfg.rpn_head.head, 31 | cfg=cfg.rpn_head, 32 | anchor_cfg=cfg.anchors, 33 | is_training=training, 34 | name="rpn_head") 35 | else: 36 | self.rpn_head = build_head(cfg.rpn_head.head, 37 | cfg=cfg.rpn_head, 38 | is_training=training, 39 | name="rpn_head") 40 | rpn_ouputs, proposals = self.rpn_head(x) 41 | x = build_head(cfg.roi_head.head, 42 | cfg=cfg.roi_head, 43 | test_cfg=cfg.test, 44 | num_classes=cfg.num_classes, 45 | is_training=training, 46 | name="roi_heads")([x, proposals]) 47 | 48 | self.detector = tf.keras.Model(inputs=inputs, outputs=[proposals, x]) 49 | 50 | def load_pretrained_weights(self, pretrained_weights_path=None): 51 | if pretrained_weights_path: 52 | self.backbone.load_weights(pretrained_weights_path, by_name=True, skip_mismatch=True) 53 | print("Restored pre-trained weights from %s." % pretrained_weights_path) 54 | 55 | else: 56 | print("Train model from scratch.") 57 | 58 | def compute_losses(self, rpn_outputs, rcnn_ouputs, image_info): 59 | return self.rpn_head.compute_losses(rpn_outputs, image_info) 60 | 61 | def save_weights(self, name): 62 | self.detector.save_weights(name) 63 | 64 | @tf.function 65 | def __call__(self, inputs, training): 66 | x = self.detector(inputs, training=training) 67 | return x 68 | 69 | 70 | 71 | -------------------------------------------------------------------------------- /data/augmentations/mixup.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import tensorflow_probability as tfp 3 | from ..builder import AUGMENTATIONS 4 | 5 | 6 | @AUGMENTATIONS.register 7 | class Mixup(object): 8 | def __init__(self, batch_size, alpha, prob=0.5, max_boxes=200): 9 | self.alpha = alpha 10 | self.batch_size = batch_size 11 | self.prob = prob 12 | self.max_boxes = max_boxes 13 | 14 | def _mixup(self, images, boxes, labels): 15 | """Applies Mixup regularization to a batch of images and labels. 16 | 17 | [1] Hongyi Zhang, Moustapha Cisse, Yann N. Dauphin, David Lopez-Paz 18 | Mixup: Beyond Empirical Risk Minimization. 19 | ICLR'18, https://arxiv.org/abs/1710.09412 20 | 21 | Args: 22 | images: A batch of images of shape [batch_size, ...] 23 | labels: A batch of labels of shape [batch_size, num_classes] 24 | 25 | Returns: 26 | A tuple of (images, boxes, labels) with the same dimensions as the input with 27 | Mixup regularization applied. 28 | """ 29 | mix_weight = tfp.distributions.Beta(self.alpha, self.alpha).sample([self.batch_size, 1]) 30 | mix_weight = tf.maximum(mix_weight, 1. - mix_weight) 31 | images_mix_weight = tf.reshape(mix_weight, [self.batch_size, 1, 1, 1]) 32 | # Mixup on a single batch is implemented by taking a weighted sum with the same batch in reverse. 33 | image_dtype = images.dtype 34 | images = tf.cast(images, mix_weight.dtype) 35 | images_mix = images * images_mix_weight + images[::-1] * (1. - images_mix_weight) 36 | 37 | boxes_mix = tf.concat([boxes, boxes[::-1]], 1) 38 | labels_mix = tf.concat([labels, labels[::-1]], 1) 39 | 40 | def _fn(b, l): 41 | valid = l != 0 42 | l = tf.boolean_mask(l, valid) 43 | b = tf.boolean_mask(b, valid) 44 | num = tf.size(l) 45 | if num < self.max_boxes: 46 | l = tf.concat([l, tf.zeros([self.max_boxes - num], l.dtype)], 0) 47 | b = tf.concat([b, tf.zeros([self.max_boxes - num, 4], b.dtype)], 0) 48 | else: 49 | l = l[:self.max_boxes] 50 | b = b[:self.max_boxes] 51 | 52 | return b, l 53 | 54 | boxes_mix, labels_mix = tf.map_fn( 55 | lambda inp: _fn(*inp), 56 | elems=(boxes_mix, labels_mix), 57 | fn_output_signature=(boxes_mix.dtype, labels_mix.dtype)) 58 | 59 | images_mix = tf.cast(images_mix, image_dtype) 60 | 61 | return images_mix, boxes_mix, labels_mix 62 | 63 | def __call__(self, images, images_info): 64 | with tf.name_scope("mixup"): 65 | images = tf.cast(images, tf.uint8) 66 | images, images_info["boxes"], images_info["labels"] = tf.cond( 67 | tf.random.uniform([]) >= self.prob, 68 | lambda: self._mixup(images, images_info["boxes"], images_info["labels"]), 69 | lambda: (images, images_info["boxes"], images_info["labels"])) 70 | 71 | return images, images_info 72 | -------------------------------------------------------------------------------- /core/samplers/sampler.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from abc import ABCMeta 3 | from abc import abstractmethod 4 | 5 | 6 | class Sampler(metaclass=ABCMeta): 7 | def __init__(self, num_proposals, pos_fraction, neg_pos_ub=-1, add_gt_as_proposals=False, **kwargs): 8 | self.num_proposals = num_proposals 9 | self.pos_fraction = pos_fraction 10 | self.neg_pos_ub = neg_pos_ub 11 | self.add_gt_as_proposals = add_gt_as_proposals 12 | 13 | self.positive_sampler = self 14 | self.negative_sampler = self 15 | 16 | @abstractmethod 17 | def _sample_positive(self, assigned_labels, num_expected_proposals, **kwargs): 18 | pass 19 | 20 | @abstractmethod 21 | def _sample_negative(self, assigned_labels, num_expected_proposals, **kwargs): 22 | pass 23 | 24 | def sample(self, assigned_boxes, assigned_labels, gt_boxes=None, gt_labels=None, **kwargs): 25 | """Sample positive and negative boxes. 26 | 27 | Args: 28 | assigned_boxes (Tensor): The assigned boxes in assigner. 29 | assigned_labels (Tensor): The assigned labels in assigner. 30 | gt_boxes (Tensor): ground truth boxes. 31 | gt_labels (Tensor): ground truth labels. 32 | 33 | Returns: 34 | A dict -> target_boxes, target_labels, box_weights, label_weights 35 | """ 36 | if self.add_gt_as_proposals and gt_boxes is not None: 37 | assigned_boxes = tf.concat([gt_boxes, assigned_boxes], 0) 38 | assigned_labels = tf.concat([gt_labels, assigned_labels], 0) 39 | 40 | num_expected_pos = int(self.num_proposals * self.pos_fraction) 41 | pos_inds = self.positive_sampler._sample_positive(assigned_labels, num_expected_pos, **kwargs) 42 | num_sampled_pos = tf.size(pos_inds) 43 | num_expected_neg = self.num_proposals - num_sampled_pos 44 | 45 | if self.neg_pos_ub >= 0: 46 | _pos = tf.maximum(1, num_expected_pos) 47 | neg_upper_bound = num_expected_neg * _pos 48 | 49 | if num_expected_neg > neg_upper_bound: 50 | num_expected_neg = neg_upper_bound 51 | 52 | neg_inds = self.negative_sampler._sample_negative(assigned_labels, num_expected_neg, **kwargs) 53 | 54 | box_weights = tf.zeros_like(assigned_labels, dtype=tf.float32) 55 | box_weights = tf.tensor_scatter_nd_update( 56 | box_weights, pos_inds[:, None], tf.ones_like(pos_inds, box_weights.dtype)) 57 | label_weights = tf.tensor_scatter_nd_update( 58 | box_weights, neg_inds[:, None], tf.ones_like(neg_inds, box_weights.dtype)) 59 | 60 | # target_labels = tf.where(label_weights >= 1, assigned_labels, tf.zeros_like(assigned_labels)) 61 | # box_weights = tf.expand_dims(box_weights, -1) 62 | 63 | return assigned_boxes, assigned_labels, box_weights, label_weights 64 | 65 | def __call__(self, assigned_boxes, assigned_labels, gt_boxes=None, gt_labels=None, **kwargs): 66 | with tf.name_scope("sample"): 67 | return self.sample(assigned_boxes, assigned_labels, gt_boxes=gt_boxes, gt_labels=gt_labels, **kwargs) 68 | 69 | -------------------------------------------------------------------------------- /core/assigners/uniform_assigner.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from .assigner import Assigner 3 | from ..builder import ASSIGNERS 4 | from utils import box_utils 5 | 6 | 7 | @ASSIGNERS.register 8 | class UniformAssigner(Assigner): 9 | """ 10 | Uniform Matching between the anchors and gt boxes, which can achieve 11 | balance in positive anchors. 12 | 13 | Args: 14 | match_times(int): Number of positive anchors for each gt box. 15 | """ 16 | def __init__(self, 17 | pos_ignore_thresh: float = 0.7, 18 | neg_ignore_thresh: float = 0.15, 19 | match_times: int = 4, **kwargs): 20 | super(UniformAssigner, self).__init__(**kwargs) 21 | 22 | self.match_times = match_times 23 | self.pos_ignore_thresh = pos_ignore_thresh 24 | self.neg_ignore_thresh = neg_ignore_thresh 25 | 26 | def _cdist(self, x, y): 27 | with tf.name_scope("cdist"): 28 | x = tf.expand_dims(x, 0) 29 | y = tf.expand_dims(y, 1) 30 | 31 | dist = tf.sqrt(tf.reduce_sum(tf.square(x - y), -1)) 32 | 33 | return dist 34 | 35 | def assign(self, gt_boxes, gt_labels, anchors, predicted_boxes): 36 | with tf.name_scope("assign"): 37 | # Compute the L1 cost between boxes 38 | # Note that we use anchors and predict boxes both 39 | C = self._cdist(predicted_boxes, gt_boxes) 40 | C1 = self._cdist(anchors, gt_boxes) 41 | 42 | _, indices = tf.nn.top_k(C, k=self.match_times) 43 | _, indices2 = tf.nn.top_k(C1, k=self.match_times) 44 | 45 | indices = tf.transpose(indices) 46 | indices2 = tf.transpose(indices2) 47 | indices = tf.reshape(indices, [-1, 1]) 48 | indices2 = tf.reshape(indices2, [-1, 1]) 49 | indices = tf.concat([indices, indices2], 0) 50 | gt_boxes = tf.tile(gt_boxes, [self.match_times * 2, 1]) 51 | gt_labels = tf.tile(gt_labels, [self.match_times * 2]) 52 | 53 | anchor_ious = box_utils.bbox_overlap(anchors, gt_boxes) 54 | pos_anchor_ious = tf.gather_nd(anchor_ious, tf.concat([indices, gt_labels[:, None]], -1)) 55 | pos_ignore_mask = pos_anchor_ious < self.neg_ignore_thresh 56 | 57 | gt_labels = tf.where(pos_ignore_mask, 0 - tf.ones_like(gt_labels), gt_labels) 58 | 59 | tgt_boxes = tf.scatter_nd(indices, gt_boxes, tf.shape(predicted_boxes)) 60 | tgt_labels = tf.scatter_nd(indices, gt_labels, tf.shape(predicted_boxes[:, 0])) 61 | 62 | pred_ious = box_utils.bbox_overlap(predicted_boxes, gt_boxes) 63 | pred_max_ious = tf.reduce_max(pred_ious, 1) 64 | 65 | neg_ignore_mask = pred_max_ious > self.neg_ignore_thresh 66 | tgt_labels = tf.where(neg_ignore_mask, 0 - tf.ones_like(tgt_labels), tgt_labels) 67 | 68 | return tgt_boxes, tgt_labels 69 | 70 | def __call__(self, gt_boxes, gt_labels, anchors, pred_boxes): 71 | return self.assign(gt_boxes, gt_labels, anchors, pred_boxes) 72 | 73 | 74 | 75 | if __name__ == "__main__": 76 | pboxes = tf.random.uniform([100, 4]) 77 | anchors = tf.random.uniform([100, 4]) 78 | 79 | gt_boxes = tf.random.uniform([2, 4]) 80 | gt_labels = tf.constant([2, 3]) 81 | 82 | assigner = UniformAssigner(8) 83 | assigner.assign(gt_boxes, gt_labels, anchors, pboxes) -------------------------------------------------------------------------------- /core/assigners/min_cost_assigner.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from .assigner import Assigner 3 | from ..builder import ASSIGNERS 4 | from core.bbox import compute_unaligned_iou 5 | 6 | 7 | @ASSIGNERS.register 8 | class MinCostAssigner(Assigner): 9 | def __init__(self, class_weight=1., l1_weight=1., iou_weight=1., iou_type="giou", alpha=0.25, gamma=2., **kwargs): 10 | super(MinCostAssigner, self).__init__(**kwargs) 11 | 12 | self._class_weights = class_weight 13 | self._l1_weight = l1_weight 14 | self._iou_weight = iou_weight 15 | self._iou_type = iou_type 16 | 17 | self._gamma = gamma 18 | self._alpha = alpha 19 | 20 | def assign(self, gt_boxes, gt_labels, pred_boxes, pred_labels): 21 | with tf.name_scope("assign"): 22 | valid_mask = gt_labels > 0 23 | gt_labels = tf.boolean_mask(gt_labels, valid_mask) - 1 24 | gt_boxes = tf.boolean_mask(gt_boxes, valid_mask) 25 | 26 | # Compute the classification cost. 27 | num_classes = tf.shape(pred_labels)[-1] 28 | hw = tf.shape(pred_boxes)[:2] 29 | hwhw = tf.tile(tf.cast(hw, tf.float32), [2]) 30 | pred_boxes = tf.reshape(pred_boxes, [hw[0] * hw[1], 4]) 31 | pred_labels = tf.reshape(pred_labels, [hw[0] * hw[1], num_classes]) 32 | 33 | pred_probs = tf.nn.sigmoid(pred_labels) 34 | 35 | neg_label_cost = (1 - self._alpha) * tf.pow(pred_probs, self._gamma) * (-tf.math.log(1 - pred_probs + 1e-8)) 36 | pos_label_cost = self._alpha * tf.pow(1 - pred_probs, self._gamma) * (-tf.math.log(pred_probs + 1e-8)) 37 | label_cost = tf.gather(neg_label_cost, gt_labels, axis=-1) - tf.gather(pos_label_cost, gt_labels, axis=-1) 38 | 39 | # Compute the L1 cost between boxes 40 | bbox_cost = tf.reduce_sum(tf.abs(tf.expand_dims(pred_boxes, 1) / hwhw - tf.expand_dims(gt_boxes, 0) / hwhw), 2) 41 | 42 | # Comput the IoU cost between boxes 43 | giou_cost = compute_unaligned_iou(gt_boxes, pred_boxes, self._iou_type) 44 | 45 | cost = self._class_weights * label_cost + self._l1_weight * bbox_cost + self._iou_weight * giou_cost 46 | 47 | inds = tf.argmin(cost, 0) 48 | 49 | tgt_boxes = tf.zeros_like(pred_boxes) 50 | tgt_labels = tf.zeros_like(pred_labels) 51 | 52 | tgt_boxes = tf.tensor_scatter_nd_update(tgt_boxes, inds[:, None], gt_boxes) 53 | tgt_labels = tf.tensor_scatter_nd_update(tgt_labels, inds[:, None], tf.one_hot(gt_labels, num_classes)) 54 | 55 | return tgt_boxes, tgt_labels 56 | 57 | def __call__(self, gt_boxes, gt_labels, pred_boxes, pred_labels): 58 | return self.assign(gt_boxes, gt_labels, pred_boxes, pred_labels) 59 | 60 | 61 | def test(): 62 | import numpy as np 63 | 64 | pred_boxes = tf.random.uniform([64, 64, 4], 0, 255) 65 | pred_labels = tf.random.uniform([64, 64, 80], -5., 5.) 66 | 67 | gt_boxes = tf.constant([[32, 120, 120, 256], [200, 201, 434, 472]], tf.float32) 68 | gt_labels = tf.constant([1, 23], tf.int32) 69 | 70 | assigner = MinCostAssigner() 71 | boxes, labels = assigner(gt_boxes, gt_labels, pred_boxes, pred_labels) 72 | print(gt_boxes) 73 | print(tf.gather_nd(boxes, tf.where(boxes > 0))) 74 | print(tf.where(labels == 1)) 75 | 76 | 77 | if __name__ == "__main__": 78 | test() 79 | 80 | -------------------------------------------------------------------------------- /data/datasets/dataset.py: -------------------------------------------------------------------------------- 1 | import math 2 | import tensorflow as tf 3 | from ..augmentations import Mixup 4 | from ..augmentations import Mosaic 5 | from ..builder import build_augmentation 6 | 7 | 8 | class Dataset(object): 9 | def __init__(self, 10 | dataset_dir, 11 | training=True, 12 | batch_size=32, 13 | augmentations=[], 14 | max_boxes=200, 15 | skip_crowd=True, 16 | mosaic=None, 17 | mixup=None, 18 | dtype=tf.float32, 19 | **kwargs): 20 | self.dataset_dir = dataset_dir 21 | self.training = training 22 | self.batch_size = batch_size 23 | self.max_boxes = max_boxes 24 | self.skip_crowd = skip_crowd 25 | self.dtype = dtype 26 | 27 | if mosaic is not None: 28 | self.mosaic = Mosaic(max_boxes=max_boxes, **mosaic) 29 | assert "ResizeV2" in [list(n.keys())[0] for n in augmentations], "Whe using Mosaic, ResizeV2 shoud in augmentations." 30 | if mixup is not None: 31 | self.mixup = Mixup(batch_size=batch_size, max_boxes=max_boxes, **mixup) 32 | 33 | self.augmentations = [build_augmentation(**kw) for kw in augmentations] 34 | 35 | def compose(self, image, image_info): 36 | for aug in self.augmentations: 37 | image, image_info = aug(image, image_info) 38 | 39 | return image, image_info 40 | 41 | def is_valid_jpg(self, jpg_file): 42 | with open(jpg_file, 'rb') as f: 43 | f.seek(-2, 2) 44 | buf = f.read() 45 | f.close() 46 | return buf == b'\xff\xd9' # 判定jpg是否包含结束字段 47 | 48 | def _bytes_list_feature(self, value): 49 | if not isinstance(value, list): 50 | value = [value] 51 | return tf.train.Feature(bytes_list=tf.train.BytesList(value=value)) 52 | 53 | def _int64_list_feature(self, value): 54 | if not isinstance(value, list): 55 | value = [value] 56 | 57 | return tf.train.Feature(int64_list=tf.train.Int64List(value=value)) 58 | 59 | def _float_list_feature(self, value): 60 | if not isinstance(value, list): 61 | value = [value] 62 | 63 | return tf.train.Feature(float_list=tf.train.FloatList(value=value)) 64 | 65 | def create_tfrecord(self, image_dir, image_info_file, output_dir, num_shards): 66 | raise NotImplementedError() 67 | 68 | def parser(self, serialized): 69 | raise NotImplementedError() 70 | 71 | def dataset(self): 72 | with tf.device("/cpu:0"): 73 | dataset = tf.data.TFRecordDataset(self.tf_record_sources) 74 | dataset = dataset.map(map_func=self.parser) 75 | 76 | if hasattr(self, "mosaic"): 77 | self.batch_size *= 4 78 | if self.training: 79 | dataset = dataset.shuffle(buffer_size=self.batch_size * 10) 80 | dataset = dataset.batch(batch_size=self.batch_size, drop_remainder=True) 81 | 82 | if hasattr(self, "mosaic"): 83 | dataset = dataset.map(self.mosaic) 84 | 85 | # call mixup shoud after mosaic 86 | if hasattr(self, "mixup"): 87 | dataset = dataset.map(self.mixup) 88 | 89 | return dataset.prefetch(tf.data.experimental.AUTOTUNE) -------------------------------------------------------------------------------- /core/optimizers/accum_optimizer.py: -------------------------------------------------------------------------------- 1 | from tensorflow.python.keras import backend as K 2 | from tensorflow.python.keras.optimizers import Optimizer 3 | from ..builder import OPTIMIZERS 4 | 5 | 6 | @OPTIMIZERS.register 7 | class AccumOptimizer(Optimizer): 8 | """继承Optimizer类,包装原有优化器,实现梯度累积。 9 | # 参数 10 | optimizer:优化器实例,支持目前所有的keras优化器; 11 | steps_per_update:累积的步数。 12 | # 返回 13 | 一个新的keras优化器 14 | Inheriting Optimizer class, wrapping the original optimizer 15 | to achieve a new corresponding optimizer of gradient accumulation. 16 | # Arguments 17 | optimizer: an instance of keras optimizer (supporting 18 | all keras optimizers currently available); 19 | steps_per_update: the steps of gradient accumulation 20 | # Returns 21 | a new keras optimizer. 22 | """ 23 | def __init__(self, optimizer, steps_per_update=1, **kwargs): 24 | super(AccumOptimizer, self).__init__(**kwargs) 25 | 26 | self.optimizer = optimizer 27 | with K.name_scope(self.__class__.__name__): 28 | self.steps_per_update = steps_per_update 29 | self.iterations = K.variable(0, "int64", "iteration") 30 | self.cond = K.equal(self.iterations % steps_per_update, 0) 31 | self.lr = self.optimizer.lr 32 | 33 | self.accum_grads = None 34 | 35 | self.optimizer.lr = K.switch(self.cond, self.lr, 0) 36 | for attr in ["momentum", "rho", "beta_1", "beta_2"]: 37 | if hasattr(self.optimizer, attr): 38 | value = getattr(self.optimizer, attr) 39 | setattr(self, attr, value) 40 | setattr(self.optimizer, attr, 1. - 1e-7) 41 | 42 | for cfg in self.optimizer.get_config(): 43 | if not hasattr(self, cfg): 44 | value = getattr(self.optimizer, cfg) 45 | setattr(self, cfg, value) 46 | 47 | # Cover the original get_gradients method with accumulative gradients. 48 | def get_gradients(loss, params): 49 | return [ag / self.steps_per_update for ag in self.accum_grads] 50 | 51 | self.optimizer.get_gradients = get_gradients 52 | 53 | def get_updates(self, loss, params): 54 | self.updates = [ 55 | K.update_add(self.iterations, 1), 56 | K.update_add(self.optimizer.iterations, K.constant(self.cond, "int64")) 57 | ] 58 | 59 | # accumulate gradients 60 | self.accum_grads = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] 61 | grads = self.get_gradients(loss, params) 62 | for g, ag in zip(grads, self.accum_grads): 63 | self.updates.append(K.update(ag, K.switch(self.cond, ag * 0, ag + g))) 64 | 65 | self.updates.extend(self.optimizer.get_updates()[1:]) 66 | self.weights.extend(self.optimizer.weights) 67 | 68 | return self.updates 69 | 70 | def get_config(self): 71 | iterations = K.eval(self.iterations) 72 | K.set_value(self.iterations, 0) 73 | config = self.optimizer.get_config() 74 | K.set_value(self.iterations, iterations) 75 | 76 | return config 77 | 78 | @property 79 | def learning_rate(self): 80 | return self.optimizer.learning_rate 81 | 82 | @learning_rate.setter 83 | def learning_rate(self, value): 84 | self.optimizer.learning_rate = value 85 | 86 | -------------------------------------------------------------------------------- /models/necks/feature_fusion_pyramid.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from core.layers import build_activation 3 | from core.layers import build_convolution 4 | from core.layers import build_normalization 5 | from models.builder import NECKS 6 | 7 | 8 | @NECKS.register("FeautreFusionPyramid") 9 | def feature_fusion_pyramid(inputs, 10 | convolution="conv2d", 11 | normalization="batch_norm", 12 | activation="relu", 13 | output_filters=(), 14 | num_outputs=6, 15 | group=32, 16 | weight_decay=0., 17 | add_extra_conv=False, 18 | use_multiplication=False): 19 | assert len(inputs) == len(output_filters) 20 | num_inputs = len(inputs) 21 | 22 | output_filters = [output_filters] * num_inputs\ 23 | if isinstance(output_filters, (int, float)) else output_filters 24 | 25 | # build top-down path 26 | kernel_regularizer = tf.keras.regularizers.l2(weight_decay) 27 | for i in range(num_inputs - 1, 0, -1): 28 | top = tf.keras.layers.Conv2DTranspose(filters=output_filters[i-1], 29 | kernel_size=(4, 4), 30 | strides=(2, 2), 31 | padding="same", 32 | kernel_regularizer=kernel_regularizer)(inputs[i]) 33 | if use_multiplication: 34 | inputs[i-1] = tf.keras.layers.Multiply()([top, inputs[i-1]]) 35 | else: 36 | inputs[i-1] = tf.keras.layers.Add()([top, inputs[i-1]]) 37 | inputs[i-1] = conv_block(convolution, 38 | filters=256, 39 | kernel_size=(1, 1), 40 | strides=(1, 1), 41 | kernel_regularizer=kernel_regularizer, 42 | normalization=normalization, 43 | group=group, 44 | activation=activation, 45 | name="reduced_conv2d_" + str(i+1))(inputs[i-1]) 46 | 47 | inputs[-1] = conv_block(convolution, 48 | filters=256, 49 | kernel_size=(1, 1), 50 | strides=(1, 1), 51 | kernel_regularizer=kernel_regularizer, 52 | normalization=normalization, 53 | group=group, 54 | activation=activation, 55 | name="reduced_conv2d_" + str(i + 1))(inputs[-1]) 56 | 57 | for i in range(num_inputs, num_outputs): 58 | if add_extra_conv: 59 | inputs.append(conv_block(convolution, 60 | filters=256, 61 | kernel_size=(3, 3), 62 | strides=(2, 2), 63 | kernel_regularizer=kernel_regularizer, 64 | normalization=normalization, 65 | group=group, 66 | activation=activation, 67 | name="reduced_conv2d_" + str(i + 1))) 68 | else: 69 | inputs.append(tf.keras.layers.MaxPool2D( 70 | (2, 2), (2, 2), "same", name="extra_max_pool_" + str(i+1))(inputs[-1])) 71 | 72 | return inputs 73 | 74 | -------------------------------------------------------------------------------- /models/backbones/backbone.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | 4 | 5 | class Backbone(object): 6 | def __init__(self, 7 | name, 8 | convolution='conv2d', 9 | kernel_initializer=tf.keras.initializers.VarianceScaling(), 10 | normalization=dict(normalization="batch_norm", momentum=0.9, epsilon=1e-5, axis=-1, trainable=True), 11 | activation=dict(activation="relu"), 12 | output_indices=(3, 4), 13 | strides=(2, 2, 2, 2, 2), 14 | dilation_rates=(1, 1, 1, 1, 1), 15 | frozen_stages=(-1,), 16 | data_format="channels_last", 17 | input_shape=None, 18 | input_tensor=None, 19 | dropblock=None, 20 | num_classes=1000, 21 | drop_rate=0.5): 22 | """The backbone base class. 23 | 24 | Args: 25 | convolution: (str) the convolution using in backbone. 26 | normalization: (dict) the normalization layer, default None, if None, means not use normalization. 27 | activation: (dict) activation name. 28 | output_indices: (list[tuple]) the indices for outputs, e.g. [3, 4, 5] means 29 | output the stage 3, stage 4 and stage 5 in backbone. 30 | strides: (list[tuple]) the strides for every stage in backbone, e.g. [1, 1, 1, 1, 1]. 31 | dilation_rates: (list[tuple]) the dilation_rates for every stage in backbone. 32 | frozen_stages: (list[tuple]) the indices for which stage should be frozen, 33 | e.g. [1, 2, 3] means frozen stage 1, stage 2 and stage 3. 34 | frozen_batch_normalization: (bool) Does frozen batch normalization. 35 | """ 36 | assert isinstance(output_indices, (list, tuple)) or output_indices is None 37 | assert isinstance(strides, (list, tuple)) or strides is None 38 | assert isinstance(frozen_stages, (list, tuple)) or frozen_stages is None 39 | assert isinstance(dilation_rates, (list, tuple)) or dilation_rates is None 40 | 41 | self.name = name 42 | self.output_indices = output_indices 43 | self.strides = strides 44 | self.frozen_stages = frozen_stages 45 | self.dilation_rates = dilation_rates 46 | self.normalization = normalization 47 | self.convolution = convolution 48 | self.activation = activation 49 | self.dropblock = dropblock 50 | self.num_classes = num_classes 51 | self.drop_rate = drop_rate 52 | self.kernel_initializer = kernel_initializer 53 | self.data_format = data_format 54 | 55 | self._rgb_mean = np.array([[[[0.485, 0.456, 0.406]]]]) * 255. 56 | self._rgb_std = np.array([[[[0.229, 0.224, 0.225]]]]) * 255. 57 | 58 | if input_tensor is None: 59 | img_input = tf.keras.layers.Input(shape=input_shape) 60 | else: 61 | if not tf.keras.backend.is_keras_tensor(input_tensor): 62 | img_input = tf.keras.layers.Input(tensor=input_tensor, shape=input_shape) 63 | else: 64 | img_input = input_tensor 65 | 66 | self.img_input = img_input 67 | self.input_shape = input_shape 68 | self.input_tensor = input_tensor 69 | if output_indices: 70 | self._is_classifier = -1 in self.output_indices 71 | 72 | def build_model(self): 73 | raise NotImplementedError() 74 | 75 | def init_weights(self, pre_trained_weights_path): 76 | pass 77 | 78 | def load_pre_trained_weights(self, pre_trained_weights_path): 79 | pass 80 | -------------------------------------------------------------------------------- /core/layers/__init__.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import tensorflow_addons as tfa 3 | from .activations import Mish 4 | from .scale import Scale 5 | from .max_in_out import MaxInOut 6 | from .drop_block import DropBlock2D 7 | from .nms import FastNonMaxSuppression 8 | from .nms import NonMaxSuppression 9 | from .nms import CombinedNonMaxSuppression 10 | from .nms import SoftNonMaxSuppression 11 | # from .normalizations import L2Normalization 12 | from .dcnv2 import DCNv2 13 | from .normalizations import GroupNormalization 14 | from .nearest_upsamling import NearestUpsampling2D 15 | from .weight_standardization_conv2d import WSConv2D 16 | from .normalizations import FilterResponseNormalization 17 | from .normalizations import InstanceNormalization 18 | from .normalizations import FrozenBatchNormalization 19 | from .proposal_layer import ProposalLayer 20 | from .position_sensitive_roi_pooling import PSRoIPooling 21 | from .position_sensitive_average_pooling import PSAvgPooling 22 | from .roi_pooling import SingleLevelAlignedRoIPooling, MultiLevelAlignedRoIPooling 23 | 24 | 25 | def build_convolution(convolution, **kwargs): 26 | if convolution == "depthwise_conv2d": 27 | return tf.keras.layers.DepthwiseConv2D(**kwargs) 28 | elif convolution == "wsconv2d": 29 | return WSConv2D(**kwargs) 30 | elif convolution == "conv2d": 31 | return tf.keras.layers.Conv2D(**kwargs) 32 | elif convolution == "separable_conv2d": 33 | return tf.keras.layers.SeparableConv2D(**kwargs) 34 | elif convolution == "dcnv2": 35 | return DCNv2(**kwargs) 36 | else: 37 | raise TypeError("Could not interpret convolution function identifier: {}".format(repr(convolution))) 38 | 39 | 40 | def build_normalization(normalization, **kwargs): 41 | if normalization == "group_norm": 42 | return GroupNormalization(**kwargs) 43 | elif normalization == "batch_norm": 44 | return tf.keras.layers.BatchNormalization(**kwargs) 45 | elif normalization == "frozen_batch_norm": 46 | return FrozenBatchNormalization(**kwargs) 47 | # elif normalization == "switchable_norm": 48 | # return SwitchableNormalization(**kwargs) 49 | elif normalization == "filter_response_norm": 50 | return FilterResponseNormalization(**kwargs) 51 | elif normalization == "sync_batch_norm": 52 | return tf.keras.layers.experimental.SyncBatchNormalization(**kwargs) 53 | else: 54 | raise TypeError("Could not interpret normalization function identifier: {}".format( 55 | repr(normalization))) 56 | 57 | 58 | def build_activation(**kwargs): 59 | if kwargs["activation"] == "leaky_relu": 60 | kwargs.pop("activation") 61 | return tf.keras.layers.LeakyReLU(**kwargs) 62 | if kwargs["activation"] == "mish": 63 | kwargs.pop("activation") 64 | return Mish(**kwargs) 65 | 66 | return tf.keras.layers.Activation(**kwargs) 67 | 68 | 69 | def build_roi_pooling(roi_pooling, **kwargs): 70 | if roi_pooling == "SingleLevelAlignedRoIPooling": 71 | return SingleLevelAlignedRoIPooling(**kwargs) 72 | 73 | if roi_pooling == "MultiLevelAlignedRoIPooling": 74 | return MultiLevelAlignedRoIPooling(**kwargs) 75 | 76 | if roi_pooling == "PSRoIPooling": 77 | return PSRoIPooling(**kwargs) 78 | 79 | if roi_pooling == "PSAvgPooling": 80 | return PSAvgPooling(**kwargs) 81 | 82 | raise TypeError("Could not interpret roi_pooling function identifier: {}".format(repr(roi_pooling))) 83 | 84 | 85 | __all__ = [ 86 | "Scale", 87 | "MaxInOut", 88 | "DropBlock2D", 89 | "L2Normalization", 90 | "build_activation", 91 | "build_convolution", 92 | "build_normalization", 93 | "NearestUpsampling2D", 94 | "build_roi_pooling", 95 | "ProposalLayer" 96 | ] 97 | -------------------------------------------------------------------------------- /yamls/gfl_x101_32x4d_fpn_mstrain_2x_coco.yaml: -------------------------------------------------------------------------------- 1 | anchors: 2 | aspect_ratios: 3 | - 1.0 4 | generator: AnchorGeneratorV2 5 | num_anchors: 1 6 | octave_base_scale: 8 7 | scales_per_octave: 1 8 | strides: 9 | - 8 10 | - 16 11 | - 32 12 | - 64 13 | - 128 14 | backbone: 15 | activation: 16 | activation: relu 17 | backbone: ResNeXt101_32X4D 18 | dilation_rates: 19 | - 1 20 | - 1 21 | - 1 22 | - 1 23 | - 1 24 | dropblock: null 25 | frozen_stages: 26 | - 1 27 | normalization: 28 | epsilon: 0.0001 29 | momentum: 0.997 30 | normalization: batch_norm 31 | trainable: false 32 | output_indices: 33 | - 3 34 | - 4 35 | - 5 36 | strides: 37 | - 2 38 | - 2 39 | - 2 40 | - 2 41 | - 2 42 | data_format: channels_last 43 | detector: GFL 44 | dtype: float16 45 | excluding_weight_names: 46 | - predicted_box 47 | - predicted_class 48 | head: 49 | activation: 50 | activation: relu 51 | assigner: 52 | assigner: ATSSAssigner 53 | topk: 9 54 | bbox_decoder: 55 | decoder: Distance2Box 56 | weights: null 57 | bbox_encoder: 58 | encoder: Box2Distance 59 | weights: null 60 | bbox_loss: 61 | loss: GIoULoss 62 | reduction: sum 63 | weight: 2.0 64 | dfl_loss: 65 | loss: DistributionFocalLoss 66 | reduction: sum 67 | weight: 0.25 68 | dropblock: null 69 | feat_dims: 256 70 | head: GFLHead 71 | label_loss: 72 | beta: 2.0 73 | from_logits: true 74 | loss: QualityFocalLoss 75 | reduction: sum 76 | weight: 1.0 77 | max_level: 7 78 | min_level: 3 79 | normalization: 80 | groups: 32 81 | normalization: group_norm 82 | num_classes: 80 83 | prior: 0.01 84 | reg_max: 16 85 | repeats: 4 86 | sampler: 87 | sampler: PseudoSampler 88 | use_sigmoid: true 89 | input_shape: !!python/tuple 90 | - 1024 91 | - 1024 92 | - 3 93 | neck: 94 | add_extra_convs: true 95 | feat_dims: 256 96 | max_level: 5 97 | min_level: 3 98 | neck: FPN 99 | num_output_levels: 5 100 | relu_before_extra_convs: true 101 | num_classes: 80 102 | test: 103 | iou_threshold: 0.6 104 | nms: CombinedNonMaxSuppression 105 | post_nms_size: 100 106 | pre_nms_size: 5000 107 | score_threshold: 0.3 108 | train: 109 | checkpoint_dir: checkpoints/gfl 110 | dataset: 111 | augmentations: 112 | - FlipLeftToRight: 113 | probability: 0.5 114 | - RandomDistortColor: 115 | probability: 1.0 116 | - Resize: 117 | max_scale: 2.0 118 | min_scale: 0.5 119 | size: &id001 !!python/tuple 120 | - 1024 121 | - 1024 122 | strides: 32 123 | batch_size: 4 124 | dataset: COCODataset 125 | dataset_dir: /data/bail/COCO 126 | num_samples: 118287 127 | training: true 128 | gradient_clip_norm: 10.0 129 | log_every_n_steps: 100 130 | mixed_precision: 131 | loss_scale: null 132 | optimizer: 133 | momentum: 0.9 134 | optimizer: SGD 135 | pretrained_weights_path: /data/bail/pretrained_weights/resnet50/resnet50.ckpt 136 | save_ckpt_steps: 5000 137 | scheduler: 138 | learning_rate_scheduler: 139 | boundaries: 140 | - 16 141 | - 22 142 | scheduler: PiecewiseConstantDecay 143 | values: 144 | - 0.02 145 | - 0.002 146 | - 0.0002 147 | train_epochs: 24 148 | warmup: 149 | steps: 800 150 | warmup_learning_rate: 0.001 151 | summary_dir: logs/gfl 152 | val: 153 | dataset: 154 | augmentations: 155 | - Resize: 156 | max_scale: 1.0 157 | min_scale: 1.0 158 | size: *id001 159 | strides: 32 160 | batch_size: 4 161 | dataset: COCODataset 162 | dataset_dir: /data/bail/COCO 163 | training: false 164 | samples: 5000 165 | weight_decay: 0.0001 166 | -------------------------------------------------------------------------------- /models/necks/path_aggregation_neck.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from core.layers import build_activation 3 | from core.layers import build_convolution 4 | from core.layers import build_normalization 5 | from models.builder import NECKS 6 | 7 | 8 | @NECKS.register("PAN") 9 | def path_aggregation_neck(inputs, 10 | convolution="conv2d", 11 | normalization=dict(normalization="batch_norm", momentum=0.9, epsilon=1e-3, axis=-1, trainable=True), 12 | activation=dict(activation="relu"), 13 | feat_dims=64, 14 | min_level=3, 15 | max_level=7, 16 | add_extra_conv=False, 17 | dropblock=None, 18 | weight_decay=0., 19 | use_multiplication=False, 20 | name="path_aggregation_neck"): 21 | kernel_regularizer = (tf.keras.regularizers.l2(weight_decay) 22 | if weight_decay is not None and weight_decay > 0 else None) 23 | num_outputs = max_level - min_level + 1 24 | output_filters = [output_filters] * num_outputs \ 25 | if isinstance(output_filters, int) else output_filters 26 | features = [] 27 | num_inputs = len(inputs) 28 | for i, features in enumerate(inputs): 29 | x = conv_block(convolution="conv2d", 30 | filters=feat_dims, 31 | kernel_size=(1, 1), 32 | strides=(1, 1), 33 | kernel_regularizer=kernel_regularizer, 34 | normalization=normalization, 35 | activation=activation, 36 | dropblock=dropblock, 37 | name="top_down_conv2d_%d" % (i+1))(features) 38 | features.append(x) 39 | 40 | for i in range(num_inputs - 1, 0, -1): 41 | top = tf.keras.layers.UpSampling2D((2, 2), interpolation="nearest")(features[i+1]) 42 | if use_multiplication: 43 | features[i] = tf.keras.layers.Multiply()([features[i], top]) 44 | else: 45 | features[i] = tf.keras.layers.Add()([features[i], top]) 46 | 47 | for i in range(1, num_inputs): 48 | x = conv_block(convolution="conv2d", 49 | filters=feat_dims, 50 | kernel_size=(3, 3), 51 | strides=(2, 2), 52 | kernel_regularizer=kernel_regularizer, 53 | normalization=normalization, 54 | activation=activation, 55 | dropblock=dropblock, 56 | name="bottom_up_conv2d_%d" % (i+1))(features[i-1]) 57 | if use_multiplication: 58 | features[i] = tf.keras.layers.Multiply()([x, features[i]]) 59 | else: 60 | features[i] = tf.keras.layers.Add()([x, features[i]]) 61 | 62 | for i in range(num_inputs, num_outputs): 63 | if add_extra_conv: 64 | features.append((conv_block(convolution, 65 | filters=output_filters[i], 66 | kernel_size=(3, 3), 67 | strides=(2, 2), 68 | kernel_regularizer=kernel_regularizer, 69 | normalization=normalization, 70 | group=group, 71 | activation=activation, 72 | name="extra_conv2d_%d" % (i + 1))(features[-1]))) 73 | else: 74 | features.append(tf.keras.layers.MaxPool2D(pool_size=(2, 2), 75 | strides=(2, 2))(features[-1])) 76 | 77 | return features 78 | -------------------------------------------------------------------------------- /yamls/gfl_r50_fpn_1x_coco.yaml: -------------------------------------------------------------------------------- 1 | anchors: 2 | aspect_ratios: 3 | - 1.0 4 | generator: AnchorGeneratorV2 5 | num_anchors: 1 6 | octave_base_scale: 8 7 | scales_per_octave: 1 8 | strides: 9 | - 8 10 | - 16 11 | - 32 12 | - 64 13 | - 128 14 | backbone: 15 | activation: 16 | activation: relu 17 | backbone: ResNet50 18 | dilation_rates: 19 | - 1 20 | - 1 21 | - 1 22 | - 1 23 | - 1 24 | dropblock: null 25 | frozen_stages: 26 | - 1 27 | normalization: 28 | epsilon: 0.0001 29 | momentum: 0.997 30 | normalization: batch_norm 31 | trainable: false 32 | output_indices: 33 | - 3 34 | - 4 35 | - 5 36 | strides: 37 | - 2 38 | - 2 39 | - 2 40 | - 2 41 | - 2 42 | data_format: channels_last 43 | detector: GFL 44 | dtype: float16 45 | excluding_weight_names: 46 | - predicted_box 47 | - predicted_class 48 | head: 49 | activation: 50 | activation: relu 51 | assigner: 52 | assigner: ATSSAssigner 53 | topk: 9 54 | bbox_decoder: 55 | decoder: Distance2Box 56 | weights: null 57 | bbox_encoder: 58 | encoder: Box2Distance 59 | weights: null 60 | bbox_loss: 61 | loss: GIoULoss 62 | reduction: sum 63 | weight: 2.0 64 | dfl_loss: 65 | loss: DistributionFocalLoss 66 | reduction: sum 67 | weight: 0.25 68 | dropblock: null 69 | feat_dims: 256 70 | head: GFLHead 71 | label_loss: 72 | beta: 2.0 73 | from_logits: true 74 | loss: QualityFocalLoss 75 | reduction: sum 76 | weight: 1.0 77 | max_level: 7 78 | min_level: 3 79 | normalization: 80 | groups: 32 81 | normalization: group_norm 82 | num_classes: 80 83 | prior: 0.01 84 | reg_max: 16 85 | repeats: 4 86 | sampler: 87 | sampler: PseudoSampler 88 | use_sigmoid: true 89 | input_shape: !!python/tuple 90 | - 1024 91 | - 1024 92 | - 3 93 | neck: 94 | add_extra_convs: true 95 | feat_dims: 256 96 | max_level: 5 97 | min_level: 3 98 | neck: FPN 99 | num_output_levels: 5 100 | relu_before_extra_convs: true 101 | num_classes: 80 102 | test: 103 | iou_threshold: 0.6 104 | nms: CombinedNonMaxSuppression 105 | post_nms_size: 100 106 | pre_nms_size: 5000 107 | score_threshold: 0.3 108 | train: 109 | checkpoint_dir: checkpoints/gfl 110 | dataset: 111 | augmentations: 112 | - augmentation: FlipLeftToRight 113 | probability: 0.5 114 | - augmentation: RandomDistortColor 115 | - augmentation: Resize 116 | img_scale: 117 | - &id001 !!python/tuple 118 | - 1333 119 | - 800 120 | keep_ratio: true 121 | - augmentation: Pad 122 | size_divisor: 32 123 | batch_size: 4 124 | dataset: COCODataset 125 | dataset_dir: /data/bail/COCO 126 | num_samples: 118287 127 | training: true 128 | gradient_clip_norm: 10.0 129 | log_every_n_steps: 100 130 | mixed_precision: 131 | loss_scale: null 132 | optimizer: 133 | momentum: 0.9 134 | optimizer: SGD 135 | pretrained_weights_path: /data/bail/pretrained_weights/resnet50/resnet50.ckpt 136 | save_ckpt_steps: 5000 137 | scheduler: 138 | learning_rate_scheduler: 139 | boundaries: 140 | - 16 141 | - 22 142 | scheduler: PiecewiseConstantDecay 143 | values: 144 | - 0.02 145 | - 0.002 146 | - 0.0002 147 | train_epochs: 24 148 | warmup: 149 | steps: 800 150 | warmup_learning_rate: 0.001 151 | summary_dir: logs/gfl 152 | val: 153 | dataset: 154 | augmentations: 155 | - augmentation: Resize 156 | img_scale: 157 | - *id001 158 | keep_ratio: true 159 | - augmentation: Pad 160 | size_divisor: 32 161 | batch_size: 4 162 | dataset: COCODataset 163 | dataset_dir: /data/bail/COCO 164 | training: false 165 | samples: 5000 166 | weight_decay: 0.0001 167 | -------------------------------------------------------------------------------- /yamls/gfl_r101_fpn_mstrain_2x_coco.yaml: -------------------------------------------------------------------------------- 1 | anchors: 2 | aspect_ratios: 3 | - 1.0 4 | generator: AnchorGeneratorV2 5 | num_anchors: 1 6 | octave_base_scale: 8 7 | scales_per_octave: 1 8 | strides: 9 | - 8 10 | - 16 11 | - 32 12 | - 64 13 | - 128 14 | backbone: 15 | activation: 16 | activation: relu 17 | backbone: ResNet101 18 | dilation_rates: 19 | - 1 20 | - 1 21 | - 1 22 | - 1 23 | - 1 24 | dropblock: null 25 | frozen_stages: 26 | - 1 27 | normalization: 28 | epsilon: 0.0001 29 | momentum: 0.997 30 | normalization: batch_norm 31 | trainable: false 32 | output_indices: 33 | - 3 34 | - 4 35 | - 5 36 | strides: 37 | - 2 38 | - 2 39 | - 2 40 | - 2 41 | - 2 42 | data_format: channels_last 43 | detector: GFL 44 | dtype: float16 45 | excluding_weight_names: 46 | - predicted_box 47 | - predicted_class 48 | head: 49 | activation: 50 | activation: relu 51 | assigner: 52 | assigner: ATSSAssigner 53 | topk: 9 54 | bbox_decoder: 55 | decoder: Distance2Box 56 | weights: null 57 | bbox_encoder: 58 | encoder: Box2Distance 59 | weights: null 60 | bbox_loss: 61 | loss: GIoULoss 62 | reduction: sum 63 | weight: 2.0 64 | dfl_loss: 65 | loss: DistributionFocalLoss 66 | reduction: sum 67 | weight: 0.25 68 | dropblock: null 69 | feat_dims: 256 70 | head: GFLHead 71 | label_loss: 72 | beta: 2.0 73 | from_logits: true 74 | loss: QualityFocalLoss 75 | reduction: sum 76 | weight: 1.0 77 | max_level: 7 78 | min_level: 3 79 | normalization: 80 | groups: 32 81 | normalization: group_norm 82 | num_classes: 80 83 | prior: 0.01 84 | reg_max: 16 85 | repeats: 4 86 | sampler: 87 | sampler: PseudoSampler 88 | use_sigmoid: true 89 | input_shape: !!python/tuple 90 | - 1024 91 | - 1024 92 | - 3 93 | neck: 94 | add_extra_convs: true 95 | feat_dims: 256 96 | max_level: 5 97 | min_level: 3 98 | neck: FPN 99 | num_output_levels: 5 100 | relu_before_extra_convs: true 101 | num_classes: 80 102 | test: 103 | iou_threshold: 0.6 104 | nms: CombinedNonMaxSuppression 105 | post_nms_size: 100 106 | pre_nms_size: 5000 107 | score_threshold: 0.3 108 | train: 109 | checkpoint_dir: checkpoints/gfl 110 | dataset: 111 | augmentations: 112 | - augmentation: FlipLeftToRight 113 | probability: 0.5 114 | - augmentation: RandomDistortColor 115 | - augmentation: Resize 116 | img_scale: 117 | - &id001 !!python/tuple 118 | - 1333 119 | - 800 120 | keep_ratio: true 121 | - augmentation: Pad 122 | size_divisor: 32 123 | batch_size: 4 124 | dataset: COCODataset 125 | dataset_dir: /data/bail/COCO 126 | num_samples: 118287 127 | training: true 128 | gradient_clip_norm: 10.0 129 | log_every_n_steps: 100 130 | mixed_precision: 131 | loss_scale: null 132 | optimizer: 133 | momentum: 0.9 134 | optimizer: SGD 135 | pretrained_weights_path: /data/bail/pretrained_weights/resnet50/resnet50.ckpt 136 | save_ckpt_steps: 5000 137 | scheduler: 138 | learning_rate_scheduler: 139 | boundaries: 140 | - 16 141 | - 22 142 | scheduler: PiecewiseConstantDecay 143 | values: 144 | - 0.02 145 | - 0.002 146 | - 0.0002 147 | train_epochs: 24 148 | warmup: 149 | steps: 800 150 | warmup_learning_rate: 0.001 151 | summary_dir: logs/gfl 152 | val: 153 | dataset: 154 | augmentations: 155 | - augmentation: Resize 156 | img_scale: 157 | - *id001 158 | keep_ratio: true 159 | - augmentation: Pad 160 | size_divisor: 32 161 | batch_size: 4 162 | dataset: COCODataset 163 | dataset_dir: /data/bail/COCO 164 | training: false 165 | samples: 5000 166 | weight_decay: 0.0001 167 | -------------------------------------------------------------------------------- /yamls/gfl_r50_fpn_mstrain_2x_coco.yaml: -------------------------------------------------------------------------------- 1 | anchors: 2 | aspect_ratios: 3 | - 1.0 4 | generator: AnchorGeneratorV2 5 | num_anchors: 1 6 | octave_base_scale: 8 7 | scales_per_octave: 1 8 | strides: 9 | - 8 10 | - 16 11 | - 32 12 | - 64 13 | - 128 14 | backbone: 15 | activation: 16 | activation: relu 17 | backbone: ResNet50 18 | dilation_rates: 19 | - 1 20 | - 1 21 | - 1 22 | - 1 23 | - 1 24 | dropblock: null 25 | frozen_stages: 26 | - 1 27 | normalization: 28 | epsilon: 0.0001 29 | momentum: 0.997 30 | normalization: batch_norm 31 | trainable: false 32 | output_indices: 33 | - 3 34 | - 4 35 | - 5 36 | strides: 37 | - 2 38 | - 2 39 | - 2 40 | - 2 41 | - 2 42 | data_format: channels_last 43 | detector: GFL 44 | dtype: float16 45 | excluding_weight_names: 46 | - predicted_box 47 | - predicted_class 48 | head: 49 | activation: 50 | activation: relu 51 | assigner: 52 | assigner: ATSSAssigner 53 | topk: 9 54 | bbox_decoder: 55 | decoder: Distance2Box 56 | weights: null 57 | bbox_encoder: 58 | encoder: Box2Distance 59 | weights: null 60 | bbox_loss: 61 | loss: GIoULoss 62 | reduction: sum 63 | weight: 2.0 64 | dfl_loss: 65 | loss: DistributionFocalLoss 66 | reduction: sum 67 | weight: 0.25 68 | dropblock: null 69 | feat_dims: 256 70 | head: GFLHead 71 | label_loss: 72 | beta: 2.0 73 | from_logits: true 74 | loss: QualityFocalLoss 75 | reduction: sum 76 | weight: 1.0 77 | max_level: 7 78 | min_level: 3 79 | normalization: 80 | groups: 32 81 | normalization: group_norm 82 | num_classes: 80 83 | prior: 0.01 84 | reg_max: 16 85 | repeats: 4 86 | sampler: 87 | sampler: PseudoSampler 88 | use_sigmoid: true 89 | input_shape: !!python/tuple 90 | - 1024 91 | - 1024 92 | - 3 93 | neck: 94 | add_extra_convs: true 95 | feat_dims: 256 96 | max_level: 5 97 | min_level: 3 98 | neck: FPN 99 | num_output_levels: 5 100 | relu_before_extra_convs: true 101 | num_classes: 80 102 | test: 103 | iou_threshold: 0.6 104 | nms: CombinedNonMaxSuppression 105 | post_nms_size: 100 106 | pre_nms_size: 5000 107 | score_threshold: 0.3 108 | train: 109 | checkpoint_dir: checkpoints/gfl 110 | dataset: 111 | augmentations: 112 | - augmentation: FlipLeftToRight 113 | probability: 0.5 114 | - augmentation: RandomDistortColor 115 | - augmentation: Resize 116 | img_scale: 117 | - &id001 !!python/tuple 118 | - 1333 119 | - 800 120 | keep_ratio: true 121 | - augmentation: Pad 122 | size_divisor: 32 123 | batch_size: 4 124 | dataset: COCODataset 125 | dataset_dir: /data/bail/COCO 126 | num_samples: 118287 127 | training: true 128 | gradient_clip_norm: 10.0 129 | log_every_n_steps: 100 130 | mixed_precision: 131 | loss_scale: null 132 | optimizer: 133 | momentum: 0.9 134 | optimizer: SGD 135 | pretrained_weights_path: /data/bail/pretrained_weights/resnet50/resnet50.ckpt 136 | save_ckpt_steps: 5000 137 | scheduler: 138 | learning_rate_scheduler: 139 | boundaries: 140 | - 16 141 | - 22 142 | scheduler: PiecewiseConstantDecay 143 | values: 144 | - 0.02 145 | - 0.002 146 | - 0.0002 147 | train_epochs: 24 148 | warmup: 149 | steps: 800 150 | warmup_learning_rate: 0.001 151 | summary_dir: logs/gfl 152 | val: 153 | dataset: 154 | augmentations: 155 | - augmentation: Resize 156 | img_scale: 157 | - *id001 158 | keep_ratio: true 159 | - augmentation: Pad 160 | size_divisor: 32 161 | batch_size: 4 162 | dataset: COCODataset 163 | dataset_dir: /data/bail/COCO 164 | training: false 165 | samples: 5000 166 | weight_decay: 0.0001 167 | -------------------------------------------------------------------------------- /yamls/gflv2_r50_fpn_1x.yaml: -------------------------------------------------------------------------------- 1 | anchors: 2 | aspect_ratios: 3 | - 1.0 4 | generator: AnchorGeneratorV2 5 | num_anchors: 1 6 | octave_base_scale: 8 7 | scales_per_octave: 1 8 | strides: 9 | - 8 10 | - 16 11 | - 32 12 | - 64 13 | - 128 14 | backbone: 15 | activation: 16 | activation: relu 17 | backbone: ResNet50 18 | dilation_rates: 19 | - 1 20 | - 1 21 | - 1 22 | - 1 23 | - 1 24 | dropblock: null 25 | frozen_stages: 26 | - 1 27 | normalization: 28 | epsilon: 0.0001 29 | momentum: 0.997 30 | normalization: batch_norm 31 | trainable: false 32 | output_indices: 33 | - 3 34 | - 4 35 | - 5 36 | strides: 37 | - 2 38 | - 2 39 | - 2 40 | - 2 41 | - 2 42 | data_format: channels_last 43 | detector: GFLV2 44 | dtype: float16 45 | excluding_weight_names: 46 | - predicted_box 47 | - predicted_class 48 | head: 49 | activation: 50 | activation: relu 51 | add_mean: true 52 | assigner: 53 | assigner: ATSSAssigner 54 | topk: 9 55 | bbox_decoder: 56 | decoder: Distance2Box 57 | weights: null 58 | bbox_encoder: 59 | encoder: Box2Distance 60 | weights: null 61 | bbox_loss: 62 | loss: GIoULoss 63 | reduction: sum 64 | weight: 2.0 65 | dfl_loss: 66 | loss: DistributionFocalLoss 67 | reduction: sum 68 | weight: 0.25 69 | dropblock: null 70 | feat_dims: 256 71 | head: GFLV2Head 72 | label_loss: 73 | beta: 2.0 74 | from_logits: false 75 | loss: QualityFocalLoss 76 | reduction: sum 77 | weight: 1.0 78 | max_level: 7 79 | min_level: 3 80 | normalization: 81 | groups: 32 82 | normalization: group_norm 83 | num_classes: 80 84 | prior: 0.01 85 | quality_filters: 64 86 | reg_max: 16 87 | reg_topk: 4 88 | repeats: 4 89 | sampler: 90 | sampler: PseudoSampler 91 | use_sigmoid: true 92 | input_shape: !!python/tuple 93 | - 1024 94 | - 1024 95 | - 3 96 | neck: 97 | add_extra_convs: true 98 | feat_dims: 256 99 | max_level: 5 100 | min_level: 3 101 | neck: FPN 102 | num_output_levels: 5 103 | relu_before_extra_convs: true 104 | num_classes: 80 105 | test: 106 | iou_threshold: 0.6 107 | nms: CombinedNonMaxSuppression 108 | post_nms_size: 100 109 | pre_nms_size: 1000 110 | score_threshold: 0.3 111 | train: 112 | checkpoint_dir: checkpoints/gfl 113 | dataset: 114 | augmentations: 115 | - FlipLeftToRight: 116 | probability: 0.5 117 | - RandomDistortColor: 118 | probability: 1.0 119 | - Resize: 120 | max_scale: 2.0 121 | min_scale: 0.5 122 | size: &id001 !!python/tuple 123 | - 1024 124 | - 1024 125 | strides: 32 126 | batch_size: 4 127 | dataset: COCODataset 128 | dataset_dir: /data/bail/COCO 129 | num_samples: 118287 130 | training: true 131 | gradient_clip_norm: 10.0 132 | log_every_n_steps: 100 133 | mixed_precision: 134 | loss_scale: null 135 | optimizer: 136 | momentum: 0.9 137 | optimizer: SGD 138 | pretrained_weights_path: /data/bail/pretrained_weights/resnet50/resnet50.ckpt 139 | save_ckpt_steps: 5000 140 | scheduler: 141 | learning_rate_scheduler: 142 | boundaries: 143 | - 16 144 | - 22 145 | scheduler: PiecewiseConstantDecay 146 | values: 147 | - 0.02 148 | - 0.002 149 | - 0.0002 150 | train_epochs: 24 151 | warmup: 152 | steps: 800 153 | warmup_learning_rate: 0.001 154 | summary_dir: logs/gfl 155 | val: 156 | dataset: 157 | augmentations: 158 | - Resize: 159 | max_scale: 1.0 160 | min_scale: 1.0 161 | size: *id001 162 | strides: 32 163 | batch_size: 4 164 | dataset: COCODataset 165 | dataset_dir: /data/bail/COCO 166 | training: false 167 | samples: 5000 168 | weight_decay: 0.0001 169 | -------------------------------------------------------------------------------- /yamls/retinanet_r101_fpn_2x_coco.yaml: -------------------------------------------------------------------------------- 1 | anchors: 2 | aspect_ratios: 3 | - 0.5 4 | - 1.0 5 | - 2.0 6 | generator: AnchorGeneratorV2 7 | num_anchors: 9 8 | octave_base_scale: 4 9 | scales_per_octave: 3 10 | strides: 11 | - 8 12 | - 16 13 | - 32 14 | - 64 15 | - 128 16 | assigner: 17 | assigner: MaxIoUAssigner 18 | neg_iou_thresh: 0.4 19 | pos_iou_thresh: 0.5 20 | backbone: 21 | activation: 22 | activation: relu 23 | backbone: ResNet101 24 | dilation_rates: 25 | - 1 26 | - 1 27 | - 1 28 | - 1 29 | - 1 30 | dropblock: null 31 | frozen_stages: 32 | - 1 33 | normalization: 34 | epsilon: 0.0001 35 | momentum: 0.997 36 | normalization: batch_norm 37 | trainable: false 38 | output_indices: 39 | - 3 40 | - 4 41 | - 5 42 | strides: 43 | - 2 44 | - 2 45 | - 2 46 | - 2 47 | - 2 48 | bbox_decoder: 49 | decoder: Delta2Box 50 | weights: 51 | - 1.0 52 | - 1.0 53 | - 1.0 54 | - 1.0 55 | bbox_encoder: 56 | encoder: Box2Delta 57 | weights: 58 | - 1.0 59 | - 1.0 60 | - 1.0 61 | - 1.0 62 | bbox_loss: 63 | loss: SmoothL1Loss 64 | reduction: sum 65 | weight: 1.0 66 | data_format: channels_last 67 | detector: GFL 68 | dtype: float16 69 | excluding_weight_names: 70 | - predicted_box 71 | - predicted_class 72 | head: 73 | activation: 74 | activation: relu 75 | dropblock: null 76 | feat_dims: 256 77 | head: RetinaNetHead 78 | max_level: 7 79 | min_level: 3 80 | normalization: null 81 | num_classes: 80 82 | prior: 0.01 83 | repeats: 4 84 | use_sigmoid: true 85 | input_shape: !!python/tuple 86 | - 1024 87 | - 1024 88 | - 3 89 | label_loss: 90 | alpha: 0.25 91 | from_logits: true 92 | gamma: 2.0 93 | loss: FocalLoss 94 | reduction: sum 95 | weight: 1.0 96 | neck: 97 | add_extra_convs: true 98 | add_extra_convs_on_c5: true 99 | feat_dims: 256 100 | max_level: 5 101 | min_level: 3 102 | neck: FPN 103 | num_output_levels: 5 104 | relu_before_extra_convs: false 105 | num_classes: 80 106 | sampler: 107 | sampler: PseudoSampler 108 | test: 109 | iou_threshold: 0.6 110 | nms: CombinedNonMaxSuppression 111 | post_nms_size: 100 112 | pre_nms_size: 5000 113 | score_threshold: 0.35 114 | train: 115 | checkpoint_dir: checkpoints/retinanet 116 | dataset: 117 | augmentations: 118 | - FlipLeftToRight: 119 | probability: 0.5 120 | - RandomDistortColor: 121 | probability: 1.0 122 | - Resize: 123 | max_scale: 2.0 124 | min_scale: 0.5 125 | size: &id001 !!python/tuple 126 | - 1024 127 | - 1024 128 | strides: 32 129 | batch_size: 4 130 | dataset: COCODataset 131 | dataset_dir: /data/bail/COCO 132 | num_samples: 118287 133 | training: true 134 | gradient_clip_norm: 10.0 135 | input_size: *id001 136 | log_every_n_steps: 100 137 | mixed_precision: 138 | loss_scale: null 139 | optimizer: 140 | momentum: 0.9 141 | optimizer: SGD 142 | pretrained_weights_path: /data/bail/pretrained_weights/resnet50/resnet50.ckpt 143 | save_ckpt_steps: 5000 144 | scheduler: 145 | learning_rate_scheduler: 146 | boundaries: 147 | - 16 148 | - 22 149 | scheduler: PiecewiseConstantDecay 150 | values: 151 | - 0.02 152 | - 0.002 153 | - 0.0002 154 | train_epochs: 24 155 | warmup: 156 | steps: 800 157 | warmup_learning_rate: 0.001 158 | summary_dir: logs/retinanet 159 | val: 160 | dataset: 161 | augmentations: 162 | - Resize: 163 | max_scale: 1.0 164 | min_scale: 1.0 165 | size: *id001 166 | strides: 32 167 | batch_size: 4 168 | dataset: COCODataset 169 | dataset_dir: /data/bail/COCO 170 | training: false 171 | input_size: *id001 172 | samples: 5000 173 | weight_decay: 0.0001 174 | -------------------------------------------------------------------------------- /yamls/retinanet_r50_fpn_2x_coco.yaml: -------------------------------------------------------------------------------- 1 | anchors: 2 | aspect_ratios: 3 | - 0.5 4 | - 1.0 5 | - 2.0 6 | generator: AnchorGeneratorV2 7 | num_anchors: 9 8 | octave_base_scale: 4 9 | scales_per_octave: 3 10 | strides: 11 | - 8 12 | - 16 13 | - 32 14 | - 64 15 | - 128 16 | assigner: 17 | assigner: MaxIoUAssigner 18 | neg_iou_thresh: 0.4 19 | pos_iou_thresh: 0.5 20 | backbone: 21 | activation: 22 | activation: relu 23 | backbone: ResNet50 24 | dilation_rates: 25 | - 1 26 | - 1 27 | - 1 28 | - 1 29 | - 1 30 | dropblock: null 31 | frozen_stages: 32 | - 1 33 | normalization: 34 | epsilon: 0.0001 35 | momentum: 0.997 36 | normalization: batch_norm 37 | trainable: false 38 | output_indices: 39 | - 3 40 | - 4 41 | - 5 42 | strides: 43 | - 2 44 | - 2 45 | - 2 46 | - 2 47 | - 2 48 | bbox_decoder: 49 | decoder: Delta2Box 50 | weights: 51 | - 1.0 52 | - 1.0 53 | - 1.0 54 | - 1.0 55 | bbox_encoder: 56 | encoder: Box2Delta 57 | weights: 58 | - 1.0 59 | - 1.0 60 | - 1.0 61 | - 1.0 62 | bbox_loss: 63 | loss: SmoothL1Loss 64 | reduction: sum 65 | weight: 1.0 66 | data_format: channels_last 67 | detector: GFL 68 | dtype: float16 69 | excluding_weight_names: 70 | - predicted_box 71 | - predicted_class 72 | head: 73 | activation: 74 | activation: relu 75 | dropblock: null 76 | feat_dims: 256 77 | head: RetinaNetHead 78 | max_level: 7 79 | min_level: 3 80 | normalization: null 81 | num_classes: 80 82 | prior: 0.01 83 | repeats: 4 84 | use_sigmoid: true 85 | input_shape: !!python/tuple 86 | - 1024 87 | - 1024 88 | - 3 89 | label_loss: 90 | alpha: 0.25 91 | from_logits: true 92 | gamma: 2.0 93 | loss: FocalLoss 94 | reduction: sum 95 | weight: 1.0 96 | neck: 97 | add_extra_convs: true 98 | add_extra_convs_on_c5: true 99 | feat_dims: 256 100 | max_level: 5 101 | min_level: 3 102 | neck: FPN 103 | num_output_levels: 5 104 | relu_before_extra_convs: false 105 | num_classes: 80 106 | sampler: 107 | sampler: PseudoSampler 108 | test: 109 | iou_threshold: 0.6 110 | nms: CombinedNonMaxSuppression 111 | post_nms_size: 100 112 | pre_nms_size: 5000 113 | score_threshold: 0.35 114 | train: 115 | checkpoint_dir: checkpoints/retinanet 116 | dataset: 117 | augmentations: 118 | - FlipLeftToRight: 119 | probability: 0.5 120 | - RandomDistortColor: 121 | probability: 1.0 122 | - Resize: 123 | max_scale: 2.0 124 | min_scale: 0.5 125 | size: &id001 !!python/tuple 126 | - 1024 127 | - 1024 128 | strides: 32 129 | batch_size: 4 130 | dataset: COCODataset 131 | dataset_dir: /data/bail/COCO 132 | num_samples: 118287 133 | training: true 134 | gradient_clip_norm: 10.0 135 | input_size: *id001 136 | log_every_n_steps: 100 137 | mixed_precision: 138 | loss_scale: null 139 | optimizer: 140 | momentum: 0.9 141 | optimizer: SGD 142 | pretrained_weights_path: /data/bail/pretrained_weights/resnet50/resnet50.ckpt 143 | save_ckpt_steps: 5000 144 | scheduler: 145 | learning_rate_scheduler: 146 | boundaries: 147 | - 16 148 | - 22 149 | scheduler: PiecewiseConstantDecay 150 | values: 151 | - 0.02 152 | - 0.002 153 | - 0.0002 154 | train_epochs: 24 155 | warmup: 156 | steps: 800 157 | warmup_learning_rate: 0.001 158 | summary_dir: logs/retinanet 159 | val: 160 | dataset: 161 | augmentations: 162 | - Resize: 163 | max_scale: 1.0 164 | min_scale: 1.0 165 | size: *id001 166 | strides: 32 167 | batch_size: 4 168 | dataset: COCODataset 169 | dataset_dir: /data/bail/COCO 170 | training: false 171 | input_size: *id001 172 | samples: 5000 173 | weight_decay: 0.0001 174 | -------------------------------------------------------------------------------- /yamls/retinanet_x101_32x4d_fpn_2x_coco.yaml: -------------------------------------------------------------------------------- 1 | anchors: 2 | aspect_ratios: 3 | - 0.5 4 | - 1.0 5 | - 2.0 6 | generator: AnchorGeneratorV2 7 | num_anchors: 9 8 | octave_base_scale: 4 9 | scales_per_octave: 3 10 | strides: 11 | - 8 12 | - 16 13 | - 32 14 | - 64 15 | - 128 16 | assigner: 17 | assigner: MaxIoUAssigner 18 | neg_iou_thresh: 0.4 19 | pos_iou_thresh: 0.5 20 | backbone: 21 | activation: 22 | activation: relu 23 | backbone: ResNeXt101_32X4D 24 | dilation_rates: 25 | - 1 26 | - 1 27 | - 1 28 | - 1 29 | - 1 30 | dropblock: null 31 | frozen_stages: 32 | - 1 33 | normalization: 34 | epsilon: 0.0001 35 | momentum: 0.997 36 | normalization: batch_norm 37 | trainable: false 38 | output_indices: 39 | - 3 40 | - 4 41 | - 5 42 | strides: 43 | - 2 44 | - 2 45 | - 2 46 | - 2 47 | - 2 48 | bbox_decoder: 49 | decoder: Delta2Box 50 | weights: 51 | - 1.0 52 | - 1.0 53 | - 1.0 54 | - 1.0 55 | bbox_encoder: 56 | encoder: Box2Delta 57 | weights: 58 | - 1.0 59 | - 1.0 60 | - 1.0 61 | - 1.0 62 | bbox_loss: 63 | loss: SmoothL1Loss 64 | reduction: sum 65 | weight: 1.0 66 | data_format: channels_last 67 | detector: GFL 68 | dtype: float16 69 | excluding_weight_names: 70 | - predicted_box 71 | - predicted_class 72 | head: 73 | activation: 74 | activation: relu 75 | dropblock: null 76 | feat_dims: 256 77 | head: RetinaNetHead 78 | max_level: 7 79 | min_level: 3 80 | normalization: null 81 | num_classes: 80 82 | prior: 0.01 83 | repeats: 4 84 | use_sigmoid: true 85 | input_shape: !!python/tuple 86 | - 1024 87 | - 1024 88 | - 3 89 | label_loss: 90 | alpha: 0.25 91 | from_logits: true 92 | gamma: 2.0 93 | loss: FocalLoss 94 | reduction: sum 95 | weight: 1.0 96 | neck: 97 | add_extra_convs: true 98 | add_extra_convs_on_c5: true 99 | feat_dims: 256 100 | max_level: 5 101 | min_level: 3 102 | neck: FPN 103 | num_output_levels: 5 104 | relu_before_extra_convs: false 105 | num_classes: 80 106 | sampler: 107 | sampler: PseudoSampler 108 | test: 109 | iou_threshold: 0.6 110 | nms: CombinedNonMaxSuppression 111 | post_nms_size: 100 112 | pre_nms_size: 5000 113 | score_threshold: 0.35 114 | train: 115 | checkpoint_dir: checkpoints/retinanet 116 | dataset: 117 | augmentations: 118 | - FlipLeftToRight: 119 | probability: 0.5 120 | - RandomDistortColor: 121 | probability: 1.0 122 | - Resize: 123 | max_scale: 2.0 124 | min_scale: 0.5 125 | size: &id001 !!python/tuple 126 | - 1024 127 | - 1024 128 | strides: 32 129 | batch_size: 4 130 | dataset: COCODataset 131 | dataset_dir: /data/bail/COCO 132 | num_samples: 118287 133 | training: true 134 | gradient_clip_norm: 10.0 135 | input_size: *id001 136 | log_every_n_steps: 100 137 | mixed_precision: 138 | loss_scale: null 139 | optimizer: 140 | momentum: 0.9 141 | optimizer: SGD 142 | pretrained_weights_path: /data/bail/pretrained_weights/resnet50/resnet50.ckpt 143 | save_ckpt_steps: 5000 144 | scheduler: 145 | learning_rate_scheduler: 146 | boundaries: 147 | - 16 148 | - 22 149 | scheduler: PiecewiseConstantDecay 150 | values: 151 | - 0.02 152 | - 0.002 153 | - 0.0002 154 | train_epochs: 24 155 | warmup: 156 | steps: 800 157 | warmup_learning_rate: 0.001 158 | summary_dir: logs/retinanet 159 | val: 160 | dataset: 161 | augmentations: 162 | - Resize: 163 | max_scale: 1.0 164 | min_scale: 1.0 165 | size: *id001 166 | strides: 32 167 | batch_size: 4 168 | dataset: COCODataset 169 | dataset_dir: /data/bail/COCO 170 | training: false 171 | input_size: *id001 172 | samples: 5000 173 | weight_decay: 0.0001 174 | -------------------------------------------------------------------------------- /yamls/retinanet_x101_64x4d_fpn_2x_coco.yaml: -------------------------------------------------------------------------------- 1 | anchors: 2 | aspect_ratios: 3 | - 0.5 4 | - 1.0 5 | - 2.0 6 | generator: AnchorGeneratorV2 7 | num_anchors: 9 8 | octave_base_scale: 4 9 | scales_per_octave: 3 10 | strides: 11 | - 8 12 | - 16 13 | - 32 14 | - 64 15 | - 128 16 | assigner: 17 | assigner: MaxIoUAssigner 18 | neg_iou_thresh: 0.4 19 | pos_iou_thresh: 0.5 20 | backbone: 21 | activation: 22 | activation: relu 23 | backbone: ResNeXt101_64X4D 24 | dilation_rates: 25 | - 1 26 | - 1 27 | - 1 28 | - 1 29 | - 1 30 | dropblock: null 31 | frozen_stages: 32 | - 1 33 | normalization: 34 | epsilon: 0.0001 35 | momentum: 0.997 36 | normalization: batch_norm 37 | trainable: false 38 | output_indices: 39 | - 3 40 | - 4 41 | - 5 42 | strides: 43 | - 2 44 | - 2 45 | - 2 46 | - 2 47 | - 2 48 | bbox_decoder: 49 | decoder: Delta2Box 50 | weights: 51 | - 1.0 52 | - 1.0 53 | - 1.0 54 | - 1.0 55 | bbox_encoder: 56 | encoder: Box2Delta 57 | weights: 58 | - 1.0 59 | - 1.0 60 | - 1.0 61 | - 1.0 62 | bbox_loss: 63 | loss: SmoothL1Loss 64 | reduction: sum 65 | weight: 1.0 66 | data_format: channels_last 67 | detector: GFL 68 | dtype: float16 69 | excluding_weight_names: 70 | - predicted_box 71 | - predicted_class 72 | head: 73 | activation: 74 | activation: relu 75 | dropblock: null 76 | feat_dims: 256 77 | head: RetinaNetHead 78 | max_level: 7 79 | min_level: 3 80 | normalization: null 81 | num_classes: 80 82 | prior: 0.01 83 | repeats: 4 84 | use_sigmoid: true 85 | input_shape: !!python/tuple 86 | - 1024 87 | - 1024 88 | - 3 89 | label_loss: 90 | alpha: 0.25 91 | from_logits: true 92 | gamma: 2.0 93 | loss: FocalLoss 94 | reduction: sum 95 | weight: 1.0 96 | neck: 97 | add_extra_convs: true 98 | add_extra_convs_on_c5: true 99 | feat_dims: 256 100 | max_level: 5 101 | min_level: 3 102 | neck: FPN 103 | num_output_levels: 5 104 | relu_before_extra_convs: false 105 | num_classes: 80 106 | sampler: 107 | sampler: PseudoSampler 108 | test: 109 | iou_threshold: 0.6 110 | nms: CombinedNonMaxSuppression 111 | post_nms_size: 100 112 | pre_nms_size: 5000 113 | score_threshold: 0.35 114 | train: 115 | checkpoint_dir: checkpoints/retinanet 116 | dataset: 117 | augmentations: 118 | - FlipLeftToRight: 119 | probability: 0.5 120 | - RandomDistortColor: 121 | probability: 1.0 122 | - Resize: 123 | max_scale: 2.0 124 | min_scale: 0.5 125 | size: &id001 !!python/tuple 126 | - 1024 127 | - 1024 128 | strides: 32 129 | batch_size: 4 130 | dataset: COCODataset 131 | dataset_dir: /data/bail/COCO 132 | num_samples: 118287 133 | training: true 134 | gradient_clip_norm: 10.0 135 | input_size: *id001 136 | log_every_n_steps: 100 137 | mixed_precision: 138 | loss_scale: null 139 | optimizer: 140 | momentum: 0.9 141 | optimizer: SGD 142 | pretrained_weights_path: /data/bail/pretrained_weights/resnet50/resnet50.ckpt 143 | save_ckpt_steps: 5000 144 | scheduler: 145 | learning_rate_scheduler: 146 | boundaries: 147 | - 16 148 | - 22 149 | scheduler: PiecewiseConstantDecay 150 | values: 151 | - 0.02 152 | - 0.002 153 | - 0.0002 154 | train_epochs: 24 155 | warmup: 156 | steps: 800 157 | warmup_learning_rate: 0.001 158 | summary_dir: logs/retinanet 159 | val: 160 | dataset: 161 | augmentations: 162 | - Resize: 163 | max_scale: 1.0 164 | min_scale: 1.0 165 | size: *id001 166 | strides: 32 167 | batch_size: 4 168 | dataset: COCODataset 169 | dataset_dir: /data/bail/COCO 170 | training: false 171 | input_size: *id001 172 | samples: 5000 173 | weight_decay: 0.0001 174 | -------------------------------------------------------------------------------- /yamls/gflv2_r101_fpn_ms2x.yaml: -------------------------------------------------------------------------------- 1 | anchors: 2 | aspect_ratios: 3 | - 1.0 4 | generator: AnchorGeneratorV2 5 | num_anchors: 1 6 | octave_base_scale: 8 7 | scales_per_octave: 1 8 | strides: 9 | - 8 10 | - 16 11 | - 32 12 | - 64 13 | - 128 14 | backbone: 15 | activation: 16 | activation: relu 17 | backbone: ResNet101 18 | dilation_rates: 19 | - 1 20 | - 1 21 | - 1 22 | - 1 23 | - 1 24 | dropblock: null 25 | frozen_stages: 26 | - 1 27 | normalization: 28 | epsilon: 0.0001 29 | momentum: 0.997 30 | normalization: batch_norm 31 | trainable: false 32 | output_indices: 33 | - 3 34 | - 4 35 | - 5 36 | strides: 37 | - 2 38 | - 2 39 | - 2 40 | - 2 41 | - 2 42 | data_format: channels_last 43 | detector: GFLV2 44 | dtype: float16 45 | excluding_weight_names: 46 | - predicted_box 47 | - predicted_class 48 | head: 49 | activation: 50 | activation: relu 51 | add_mean: true 52 | assigner: 53 | assigner: ATSSAssigner 54 | topk: 9 55 | bbox_decoder: 56 | decoder: Distance2Box 57 | weights: null 58 | bbox_encoder: 59 | encoder: Box2Distance 60 | weights: null 61 | bbox_loss: 62 | loss: GIoULoss 63 | reduction: sum 64 | weight: 2.0 65 | dfl_loss: 66 | loss: DistributionFocalLoss 67 | reduction: sum 68 | weight: 0.25 69 | dropblock: null 70 | feat_dims: 256 71 | head: GFLV2Head 72 | label_loss: 73 | beta: 2.0 74 | from_logits: false 75 | loss: QualityFocalLoss 76 | reduction: sum 77 | weight: 1.0 78 | max_level: 7 79 | min_level: 3 80 | normalization: 81 | groups: 32 82 | normalization: group_norm 83 | num_classes: 80 84 | prior: 0.01 85 | quality_filters: 64 86 | reg_max: 16 87 | reg_topk: 4 88 | repeats: 4 89 | sampler: 90 | sampler: PseudoSampler 91 | use_sigmoid: true 92 | input_shape: !!python/tuple 93 | - 1024 94 | - 1024 95 | - 3 96 | neck: 97 | add_extra_convs: true 98 | feat_dims: 256 99 | max_level: 5 100 | min_level: 3 101 | neck: FPN 102 | num_output_levels: 5 103 | relu_before_extra_convs: true 104 | num_classes: 80 105 | test: 106 | iou_threshold: 0.6 107 | nms: CombinedNonMaxSuppression 108 | post_nms_size: 100 109 | pre_nms_size: 1000 110 | score_threshold: 0.35 111 | train: 112 | checkpoint_dir: checkpoints/gfl 113 | dataset: 114 | augmentations: 115 | - augmentation: FlipLeftToRight 116 | probability: 0.5 117 | - augmentation: RandomDistortColor 118 | - augmentation: Resize 119 | img_scale: 120 | - &id001 !!python/tuple 121 | - 1333 122 | - 800 123 | keep_ratio: true 124 | - augmentation: Pad 125 | size_divisor: 32 126 | batch_size: 4 127 | dataset: COCODataset 128 | dataset_dir: /data/bail/COCO 129 | num_samples: 118287 130 | training: true 131 | gradient_clip_norm: 10.0 132 | log_every_n_steps: 100 133 | mixed_precision: 134 | loss_scale: null 135 | optimizer: 136 | momentum: 0.9 137 | optimizer: SGD 138 | pretrained_weights_path: /data/bail/pretrained_weights/resnet50/resnet50.ckpt 139 | save_ckpt_steps: 5000 140 | scheduler: 141 | learning_rate_scheduler: 142 | boundaries: 143 | - 16 144 | - 22 145 | scheduler: PiecewiseConstantDecay 146 | values: 147 | - 0.02 148 | - 0.002 149 | - 0.0002 150 | train_epochs: 24 151 | warmup: 152 | steps: 800 153 | warmup_learning_rate: 0.001 154 | summary_dir: logs/gfl 155 | val: 156 | dataset: 157 | augmentations: 158 | - augmentation: Resize 159 | img_scale: 160 | - *id001 161 | keep_ratio: true 162 | - augmentation: Pad 163 | size_divisor: 32 164 | batch_size: 4 165 | dataset: COCODataset 166 | dataset_dir: /data/bail/COCO 167 | training: false 168 | samples: 5000 169 | weight_decay: 0.0001 170 | -------------------------------------------------------------------------------- /yamls/gflv2_r50_fpn_ms2x.yaml: -------------------------------------------------------------------------------- 1 | anchors: 2 | aspect_ratios: 3 | - 1.0 4 | generator: AnchorGeneratorV2 5 | num_anchors: 1 6 | octave_base_scale: 8 7 | scales_per_octave: 1 8 | strides: 9 | - 8 10 | - 16 11 | - 32 12 | - 64 13 | - 128 14 | backbone: 15 | activation: 16 | activation: relu 17 | backbone: ResNet50 18 | dilation_rates: 19 | - 1 20 | - 1 21 | - 1 22 | - 1 23 | - 1 24 | dropblock: null 25 | frozen_stages: 26 | - 1 27 | normalization: 28 | epsilon: 0.0001 29 | momentum: 0.997 30 | normalization: batch_norm 31 | trainable: false 32 | output_indices: 33 | - 3 34 | - 4 35 | - 5 36 | strides: 37 | - 2 38 | - 2 39 | - 2 40 | - 2 41 | - 2 42 | data_format: channels_last 43 | detector: GFLV2 44 | dtype: float16 45 | excluding_weight_names: 46 | - predicted_box 47 | - predicted_class 48 | head: 49 | activation: 50 | activation: relu 51 | add_mean: true 52 | assigner: 53 | assigner: ATSSAssigner 54 | topk: 9 55 | bbox_decoder: 56 | decoder: Distance2Box 57 | weights: null 58 | bbox_encoder: 59 | encoder: Box2Distance 60 | weights: null 61 | bbox_loss: 62 | loss: GIoULoss 63 | reduction: sum 64 | weight: 2.0 65 | dfl_loss: 66 | loss: DistributionFocalLoss 67 | reduction: sum 68 | weight: 0.25 69 | dropblock: null 70 | feat_dims: 256 71 | head: GFLV2Head 72 | label_loss: 73 | beta: 2.0 74 | from_logits: false 75 | loss: QualityFocalLoss 76 | reduction: sum 77 | weight: 1.0 78 | max_level: 7 79 | min_level: 3 80 | normalization: 81 | groups: 32 82 | normalization: group_norm 83 | num_classes: 80 84 | prior: 0.01 85 | quality_filters: 64 86 | reg_max: 16 87 | reg_topk: 4 88 | repeats: 4 89 | sampler: 90 | sampler: PseudoSampler 91 | use_sigmoid: true 92 | input_shape: !!python/tuple 93 | - 1024 94 | - 1024 95 | - 3 96 | neck: 97 | add_extra_convs: true 98 | feat_dims: 256 99 | max_level: 5 100 | min_level: 3 101 | neck: FPN 102 | num_output_levels: 5 103 | relu_before_extra_convs: true 104 | num_classes: 80 105 | test: 106 | iou_threshold: 0.6 107 | nms: CombinedNonMaxSuppression 108 | post_nms_size: 100 109 | pre_nms_size: 1000 110 | score_threshold: 0.35 111 | train: 112 | checkpoint_dir: checkpoints/gfl 113 | dataset: 114 | augmentations: 115 | - augmentation: FlipLeftToRight 116 | probability: 0.5 117 | - augmentation: RandomDistortColor 118 | - augmentation: Resize 119 | img_scale: 120 | - &id001 !!python/tuple 121 | - 1333 122 | - 800 123 | keep_ratio: true 124 | - augmentation: Pad 125 | size_divisor: 32 126 | batch_size: 4 127 | dataset: COCODataset 128 | dataset_dir: /data/bail/COCO 129 | num_samples: 118287 130 | training: true 131 | gradient_clip_norm: 10.0 132 | log_every_n_steps: 100 133 | mixed_precision: 134 | loss_scale: null 135 | optimizer: 136 | momentum: 0.9 137 | optimizer: SGD 138 | pretrained_weights_path: /data/bail/pretrained_weights/resnet50/resnet50.ckpt 139 | save_ckpt_steps: 5000 140 | scheduler: 141 | learning_rate_scheduler: 142 | boundaries: 143 | - 16 144 | - 22 145 | scheduler: PiecewiseConstantDecay 146 | values: 147 | - 0.02 148 | - 0.002 149 | - 0.0002 150 | train_epochs: 24 151 | warmup: 152 | steps: 800 153 | warmup_learning_rate: 0.001 154 | summary_dir: logs/gfl 155 | val: 156 | dataset: 157 | augmentations: 158 | - augmentation: Resize 159 | img_scale: 160 | - *id001 161 | keep_ratio: true 162 | - augmentation: Pad 163 | size_divisor: 32 164 | batch_size: 4 165 | dataset: COCODataset 166 | dataset_dir: /data/bail/COCO 167 | training: false 168 | samples: 5000 169 | weight_decay: 0.0001 170 | -------------------------------------------------------------------------------- /models/heads/head.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from utils import box_utils 3 | from core import build_loss 4 | from core import build_sampler 5 | from core import build_assigner 6 | from core.builder import build_nms 7 | from core.bbox import build_decoder 8 | from core.bbox import build_encoder 9 | from ..common import ConvNormActBlock 10 | 11 | 12 | class BaseHead(tf.keras.Model): 13 | def __init__(self, cfg, test_cfg, anchor_cfg=None, num_classes=80, is_training=True, data_format="channels_last", **kwargs): 14 | super(BaseHead, self).__init__(**kwargs) 15 | 16 | self.num_classes = num_classes 17 | self.cfg = cfg 18 | self.anchor_cfg = anchor_cfg 19 | self.test_cfg = test_cfg 20 | self.is_training = is_training 21 | self.data_format = data_format 22 | 23 | if test_cfg and test_cfg.get("nms") is not None: 24 | self.nms = build_nms(**test_cfg.as_dict()) 25 | 26 | self.use_sigmoid = True 27 | if cfg.get("use_sigmoid") is not None: 28 | self.use_sigmoid = cfg.use_sigmoid 29 | self._label_dims = num_classes if self.use_sigmoid else num_classes + 1 30 | 31 | self.bbox_loss_func = build_loss(**cfg.bbox_loss.as_dict()) if cfg.get("bbox_loss") is not None else None 32 | self._use_iou_loss = False 33 | if self.bbox_loss_func is not None: 34 | self._use_iou_loss = "IoU" in cfg.bbox_loss.loss 35 | self.label_loss_func = build_loss(**cfg.label_loss.as_dict()) if cfg.get("label_loss") is not None else None 36 | 37 | self.sampler = build_sampler(**cfg.sampler.as_dict()) if cfg.get("sampler") is not None else None 38 | self.assigner = build_assigner(**cfg.assigner.as_dict()) if cfg.get("assigner") is not None else None 39 | 40 | self.bbox_decoder = build_decoder(**cfg.bbox_decoder.as_dict()) if cfg.get("bbox_decoder") is not None else None 41 | self.bbox_encoder = build_encoder(**cfg.bbox_encoder.as_dict()) if cfg.get("bbox_encoder") is not None else None 42 | 43 | @property 44 | def min_level(self): 45 | if self.cfg.get("min_level"): 46 | return self.cfg.min_level 47 | 48 | return None 49 | 50 | @property 51 | def max_level(self): 52 | if self.cfg.get("max_level"): 53 | return self.cfg.max_level 54 | return None 55 | 56 | def _make_shared_convs(self): 57 | self.box_shared_convs = tf.keras.Sequential(name="box_net") 58 | self.class_shared_convs = tf.keras.Sequential(name="cls_net") 59 | 60 | for i in range(self.cfg.repeats): 61 | self.box_shared_convs.add( 62 | ConvNormActBlock(filters=self.cfg.feat_dims, 63 | kernel_size=(3, 3), 64 | padding="same", 65 | strides=(1, 1), 66 | normalization=self.cfg.normalization.as_dict() if self.cfg.normalization else None, 67 | activation=self.cfg.activation.as_dict(), 68 | name="%d" % i)) 69 | self.class_shared_convs.add( 70 | ConvNormActBlock(filters=self.cfg.feat_dims, 71 | kernel_size=(3, 3), 72 | strides=(1, 1), 73 | padding="same", 74 | normalization=self.cfg.normalization.as_dict() if self.cfg.normalization else None, 75 | activation=self.cfg.activation.as_dict(), 76 | name="%d" % i)) 77 | 78 | def get_targets(self, gt_boxes, gt_labels, total_anchors): 79 | raise NotImplementedError() 80 | 81 | def compute_losses(self, predictions, image_info): 82 | raise NotImplementedError() 83 | 84 | def get_boxes(self, outputs): 85 | raise NotImplementedError() 86 | 87 | -------------------------------------------------------------------------------- /export_saved_model.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import tensorflow as tf 3 | from models import build_detector 4 | from configs import build_configs 5 | from core import build_optimizer 6 | 7 | 8 | parser = argparse.ArgumentParser() 9 | parser.add_argument("--detector", required=True, type=str) 10 | parser.add_argument("--config", type=str, default=None, help="The yaml file, default None.") 11 | parser.add_argument("--saved_model_dir", required=True, default=None, type=str) 12 | parser.add_argument("--ckpt", type=str, default=None, help="The checkpoint dir or h5 file.") 13 | 14 | parser.add_argument("--nms", type=str, default="CombinedNonMaxSuppression", help="The NMS type.") 15 | parser.add_argument("--iou_threshold", type=float, default=0.5, help="The iou threshold for NMS.") 16 | parser.add_argument("--score_threshold", type=float, default=0.3, help="The score threshold for NMS.") 17 | parser.add_argument("--update_threshold", type=float, default=0.1, help="The update threshold for MatrixNMS.") 18 | parser.add_argument("--pre_nms_size", type=int, default=4000, help="The number of detections before NMS.") 19 | parser.add_argument("--post_nms_size", type=int, default=100, help="The number of detections after NMS.") 20 | parser.add_argument("--nms_kernel", default="gaussian", type=str, help="The kernel type of MatrixNMS.") 21 | parser.add_argument("--nms_sigma", default=2.0, type=float, help="The sigma for MatrixNMS or SoftNMS.") 22 | parser.add_argument("--nms_type", type=str, default=None, 23 | help="If [--nms] is NonMaxSuppressionWithQuality, the [--nms_type] is necessary.") 24 | 25 | args = parser.parse_args() 26 | 27 | 28 | cfg = build_configs(args.detector) 29 | 30 | if args.config is None: 31 | cfg.test.nms = args.nms 32 | cfg.test.iou_threshold = args.iou_threshold 33 | cfg.test.score_threshold = args.score_threshold 34 | cfg.test.pre_nms_size = args.pre_nms_size 35 | cfg.test.post_nms_size = args.post_nms_size 36 | 37 | if args.nms == "MatrixNonMaxSuppression": 38 | cfg.test.update_threshold = args.update_threshold 39 | cfg.test.kernel = args.nms_kernel 40 | 41 | if args.nms == "NonMaxSuppressionWithQuality": 42 | assert args.nms_type is not None, "When [--nms] is `NonMaxSuppressionWithQuality`, [--nms_type] is necessary." 43 | 44 | if args.nms in ["MatrixNonMaxSuppression", "SoftNonMaxSuppression"]: 45 | cfg.test.sigma = args.nms_sigma 46 | 47 | if args.nms == "NonMaxSuppressionWithQuality": 48 | cfg.test.nms_type = args.nms_type 49 | if args.nms_type in ["soft_nms", "matrix_nms"]: 50 | cfg.test.sigma = args.nms_sigma 51 | else: 52 | cfg.override(args.config) 53 | 54 | detector = build_detector(cfg.detector, return_loss=False, cfg=cfg) 55 | images = tf.random.uniform([1, cfg.train.input_size[0], cfg.train.input_size[1], 3]) 56 | images = tf.cast(images, tf.uint8) 57 | detector(images) 58 | 59 | if args.ckpt is not None and ".h5" in args.ckpt: 60 | detector.load_weights(args.ckpt) 61 | else: 62 | optimizer = build_optimizer(**cfg.train.optimizer.as_dict()) 63 | 64 | checkpoint = tf.train.Checkpoint(optimizer=optimizer, detector=detector) 65 | manager = tf.train.CheckpointManager( 66 | checkpoint=checkpoint, directory=cfg.train.checkpoint_dir, max_to_keep=10) 67 | latest_checkpoint = manager.latest_checkpoint 68 | checkpoint.restore(latest_checkpoint) 69 | 70 | 71 | saved_model_dir = args.saved_model_dir or "./saved_model/" + args.detector 72 | 73 | tf.saved_model.save(detector, saved_model_dir) 74 | print("saved model to %s" % saved_model_dir) 75 | 76 | # images = tf.random.uniform([1, cfg.train.input_size[0], cfg.train.input_size[1], 3]) 77 | # image_info = {"valid_size": tf.constant([[cfg.train.input_size[0], cfg.train.input_size[1]]]), 78 | # "input_size": tf.constant([[cfg.train.input_size[0], cfg.train.input_size[1]]]), 79 | # "scale_factor": 1.} 80 | # print(detector((images, image_info), training=False)) 81 | -------------------------------------------------------------------------------- /yamls/atss_r50_fpn_1x_coco.yaml: -------------------------------------------------------------------------------- 1 | anchors: 2 | aspect_ratios: 3 | - 1.0 4 | generator: AnchorGeneratorV2 5 | num_anchors: 1 6 | octave_base_scale: 8 7 | scales_per_octave: 1 8 | strides: 9 | - 8 10 | - 16 11 | - 32 12 | - 64 13 | - 128 14 | backbone: 15 | activation: 16 | activation: relu 17 | backbone: ResNet50 18 | dilation_rates: 19 | - 1 20 | - 1 21 | - 1 22 | - 1 23 | - 1 24 | dropblock: null 25 | frozen_stages: 26 | - -1 27 | normalization: 28 | epsilon: 0.0001 29 | momentum: 0.997 30 | normalization: batch_norm 31 | trainable: false 32 | output_indices: 33 | - 3 34 | - 4 35 | - 5 36 | strides: 37 | - 2 38 | - 2 39 | - 2 40 | - 2 41 | - 2 42 | data_format: channels_last 43 | detector: ATSS 44 | dtype: float32 45 | excluding_weight_names: 46 | - predicted_box 47 | - predicted_class 48 | - predicted_centerness 49 | head: 50 | activation: 51 | activation: relu 52 | assigner: 53 | assigner: ATSSAssigner 54 | topk: 9 55 | bbox_decoder: 56 | decoder: Delta2Box 57 | weights: 58 | - 10.0 59 | - 10.0 60 | - 5.0 61 | - 5.0 62 | bbox_encoder: 63 | encoder: Box2Delta 64 | weights: 65 | - 10.0 66 | - 10.0 67 | - 5.0 68 | - 5.0 69 | bbox_loss: 70 | loss: GIoULoss 71 | reduction: sum 72 | weight: 2.0 73 | centerness_loss: 74 | from_logits: true 75 | loss: BinaryCrossEntropy 76 | reduction: sum 77 | weight: 1.0 78 | convolution: conv2d 79 | dropblock: null 80 | feat_dims: 256 81 | head: ATSSHead 82 | label_loss: 83 | alpha: 0.25 84 | from_logits: true 85 | gamma: 2.0 86 | label_smoothing: 0.0 87 | loss: FocalLoss 88 | reduction: sum 89 | weight: 1.0 90 | max_level: 7 91 | min_level: 3 92 | normalization: 93 | groups: 32 94 | normalization: group_norm 95 | num_classes: 80 96 | prior: 0.01 97 | repeats: 4 98 | sampler: 99 | sampler: PseudoSampler 100 | use_sigmoid: true 101 | input_shape: !!python/tuple 102 | - 1024 103 | - 1024 104 | - 3 105 | neck: 106 | add_extra_convs: true 107 | feat_dims: 256 108 | max_level: 5 109 | min_level: 3 110 | neck: FPN 111 | num_output_levels: 5 112 | relu_before_extra_convs: true 113 | num_classes: 80 114 | test: 115 | iou_threshold: 0.6 116 | nms: CombinedNonMaxSuppression 117 | nms_type: nms 118 | post_nms_size: 100 119 | pre_nms_size: 1000 120 | score_threshold: 0.35 121 | train: 122 | checkpoint_dir: checkpoints/atss 123 | dataset: 124 | augmentations: 125 | - augmentation: FlipLeftToRight 126 | probability: 0.5 127 | - augmentation: RandomDistortColor 128 | - augmentation: Resize 129 | img_scale: !!python/tuple 130 | - 0.2 131 | - 2 132 | keep_ratio: true 133 | multiscale_mode: range 134 | - augmentation: Pad 135 | size_divisor: 32 136 | batch_size: 4 137 | dataset: COCODataset 138 | dataset_dir: /data/bail/COCO 139 | num_classes: 80 140 | num_samples: 118287 141 | training: true 142 | gradient_clip_norm: 0.0 143 | log_every_n_steps: 100 144 | mixed_precision: 145 | loss_scale: null 146 | optimizer: 147 | momentum: 0.9 148 | optimizer: SGD 149 | pretrained_weights_path: /data/bail/pretrained_weights/resnet50/resnet50.ckpt 150 | save_ckpt_steps: 5000 151 | scheduler: 152 | learning_rate_scheduler: 153 | initial_learning_rate: 0.02 154 | scheduler: CosineDecay 155 | train_epochs: 36 156 | warmup: 157 | steps: 800 158 | warmup_learning_rate: 0.001 159 | summary_dir: logs/atss 160 | val: 161 | dataset: 162 | augmentations: 163 | - augmentation: Resize 164 | img_scale: 165 | - !!python/tuple 166 | - 1333 167 | - !!python/tuple 168 | - 1024 169 | - 1024 170 | keep_ratio: true 171 | batch_size: 4 172 | dataset: COCODataset 173 | dataset_dir: /data/bail/COCO 174 | training: false 175 | samples: 5000 176 | weight_decay: 0.0001 177 | -------------------------------------------------------------------------------- /yamls/atss_r101_fpn_1x_coco.yaml: -------------------------------------------------------------------------------- 1 | anchors: 2 | aspect_ratios: 3 | - 1.0 4 | generator: AnchorGeneratorV2 5 | num_anchors: 1 6 | octave_base_scale: 8 7 | scales_per_octave: 1 8 | strides: 9 | - 8 10 | - 16 11 | - 32 12 | - 64 13 | - 128 14 | backbone: 15 | activation: 16 | activation: relu 17 | backbone: ResNet101 18 | dilation_rates: 19 | - 1 20 | - 1 21 | - 1 22 | - 1 23 | - 1 24 | dropblock: null 25 | frozen_stages: 26 | - -1 27 | normalization: 28 | epsilon: 0.0001 29 | momentum: 0.997 30 | normalization: batch_norm 31 | trainable: false 32 | output_indices: 33 | - 3 34 | - 4 35 | - 5 36 | strides: 37 | - 2 38 | - 2 39 | - 2 40 | - 2 41 | - 2 42 | data_format: channels_last 43 | detector: ATSS 44 | dtype: float32 45 | excluding_weight_names: 46 | - predicted_box 47 | - predicted_class 48 | - predicted_centerness 49 | head: 50 | activation: 51 | activation: relu 52 | assigner: 53 | assigner: ATSSAssigner 54 | topk: 9 55 | bbox_decoder: 56 | decoder: Delta2Box 57 | weights: 58 | - 10.0 59 | - 10.0 60 | - 5.0 61 | - 5.0 62 | bbox_encoder: 63 | encoder: Box2Delta 64 | weights: 65 | - 10.0 66 | - 10.0 67 | - 5.0 68 | - 5.0 69 | bbox_loss: 70 | loss: GIoULoss 71 | reduction: sum 72 | weight: 2.0 73 | centerness_loss: 74 | from_logits: true 75 | loss: BinaryCrossEntropy 76 | reduction: sum 77 | weight: 1.0 78 | convolution: conv2d 79 | dropblock: null 80 | feat_dims: 256 81 | head: ATSSHead 82 | label_loss: 83 | alpha: 0.25 84 | from_logits: true 85 | gamma: 2.0 86 | label_smoothing: 0.0 87 | loss: FocalLoss 88 | reduction: sum 89 | weight: 1.0 90 | max_level: 7 91 | min_level: 3 92 | normalization: 93 | groups: 32 94 | normalization: group_norm 95 | num_classes: 80 96 | prior: 0.01 97 | repeats: 4 98 | sampler: 99 | sampler: PseudoSampler 100 | use_sigmoid: true 101 | input_shape: !!python/tuple 102 | - 1024 103 | - 1024 104 | - 3 105 | neck: 106 | add_extra_convs: true 107 | feat_dims: 256 108 | max_level: 5 109 | min_level: 3 110 | neck: FPN 111 | num_output_levels: 5 112 | relu_before_extra_convs: true 113 | num_classes: 80 114 | test: 115 | iou_threshold: 0.6 116 | nms: CombinedNonMaxSuppression 117 | nms_type: nms 118 | post_nms_size: 100 119 | pre_nms_size: 1000 120 | score_threshold: 0.35 121 | train: 122 | checkpoint_dir: checkpoints/atss 123 | dataset: 124 | augmentations: 125 | - augmentation: FlipLeftToRight 126 | probability: 0.5 127 | - augmentation: RandomDistortColor 128 | - augmentation: Resize 129 | img_scale: !!python/tuple 130 | - 0.2 131 | - 2 132 | keep_ratio: true 133 | multiscale_mode: range 134 | - augmentation: Pad 135 | size_divisor: 32 136 | batch_size: 4 137 | dataset: COCODataset 138 | dataset_dir: /data/bail/COCO 139 | num_classes: 80 140 | num_samples: 118287 141 | training: true 142 | gradient_clip_norm: 0.0 143 | log_every_n_steps: 100 144 | mixed_precision: 145 | loss_scale: null 146 | optimizer: 147 | momentum: 0.9 148 | optimizer: SGD 149 | pretrained_weights_path: /data/bail/pretrained_weights/resnet50/resnet50.ckpt 150 | save_ckpt_steps: 5000 151 | scheduler: 152 | learning_rate_scheduler: 153 | initial_learning_rate: 0.02 154 | scheduler: CosineDecay 155 | train_epochs: 36 156 | warmup: 157 | steps: 800 158 | warmup_learning_rate: 0.001 159 | summary_dir: logs/atss 160 | val: 161 | dataset: 162 | augmentations: 163 | - augmentation: Resize 164 | img_scale: 165 | - !!python/tuple 166 | - 1333 167 | - !!python/tuple 168 | - 1024 169 | - 1024 170 | keep_ratio: true 171 | batch_size: 4 172 | dataset: COCODataset 173 | dataset_dir: /data/bail/COCO 174 | training: false 175 | samples: 5000 176 | weight_decay: 0.0001 177 | -------------------------------------------------------------------------------- /configs/onenet_config.py: -------------------------------------------------------------------------------- 1 | from configs import Config 2 | 3 | 4 | def get_onenet_config(num_classes=80): 5 | h = Config() 6 | 7 | input_size = (512, 512) 8 | h.detector = "OneNet" 9 | h.dtype = "float16" 10 | h.data_format = "channels_last" 11 | h.input_shape = (input_size[0], input_size[1], 3) 12 | h.num_classes = num_classes 13 | h.backbone = dict(backbone="ResNet18", 14 | dropblock=None, 15 | normalization=dict(normalization="batch_norm", momentum=0.997, epsilon=1e-4, trainable=False), 16 | activation=dict(activation="relu"), 17 | strides=[2, 2, 2, 2, 2], 18 | dilation_rates=[1, 1, 1, 1, 1], 19 | output_indices=[2, 3, 4, 5], 20 | frozen_stages=[1, ]) 21 | 22 | h.neck=dict(neck="CenterNetDeconv", 23 | normalization=dict(normalization="batch_norm", momentum=0.997, epsilon=1e-4, trainable=False), 24 | activation=dict(activation="relu")) 25 | h.head=dict(head="OneNetHead", 26 | activation=dict(activation="relu"), 27 | feat_dims=64, 28 | dropblock=None, 29 | num_classes=num_classes, 30 | strides=4, 31 | prior=0.01, 32 | use_sigmoid=True, 33 | assigner = dict(assigner="MinCostAssigner", class_weight=2., l1_weight=2., iou_weight=5., iou_type="giou", alpha=0.25, gamma=2.), 34 | label_loss = dict(loss="FocalLoss", alpha=0.25, gamma=2., reduction="sum"), 35 | bbox_loss = dict(loss="RegL1Loss", weight=1., reduction="sum")) 36 | 37 | h.weight_decay = 1e-4 38 | h.excluding_weight_names = ["predicted_box", "predicted_class"] 39 | h.train=dict(dataset=dict(dataset="COCODataset", 40 | batch_size=4, 41 | dataset_dir="/data/bail/COCO", 42 | training=True, 43 | augmentations=[ 44 | dict(augmentation="FlipLeftToRight", probability=0.5), 45 | dict(augmentation="RandomDistortColor"), 46 | dict(augmentation="Resize", img_scale=(0.2, 2), multiscale_mode="range", keep_ratio=True), 47 | dict(augmentation="RandCropOrPad", size=(input_size, input_size), clip_box_base_center=False), 48 | ], 49 | num_samples=118287), 50 | pretrained_weights_path="/data/bail/pretrained_weights/resnet50/resnet50.ckpt", 51 | 52 | optimizer=dict(optimizer="SGD", momentum=0.9), 53 | mixed_precision=dict(loss_scale=None), # The loss scale in mixed precision training. If None, use dynamic. 54 | gradient_clip_norm=10.0, 55 | 56 | scheduler=dict(train_epochs=24, 57 | learning_rate_scheduler=dict(scheduler="PiecewiseConstantDecay", 58 | boundaries=[16, 22], 59 | values=[0.02, 0.002, 0.0002]), 60 | warmup=dict(warmup_learning_rate=0.001, steps=800)), 61 | checkpoint_dir="checkpoints/onenet", 62 | summary_dir="logs/onenet", 63 | log_every_n_steps=100, 64 | save_ckpt_steps=5000) 65 | h.val=dict(dataset=dict(dataset="COCODataset", 66 | batch_size=4, 67 | dataset_dir="/data/bail/COCO", 68 | training=False, 69 | augmentations=[ 70 | dict(augmentation="Resize", img_scale=[(1333, input_size)], keep_ratio=True), 71 | dict(augmentation="Pad", size_divisor=32) 72 | ]), 73 | samples=5000) 74 | h.test=dict(topk=100, score_threshold=0.3) 75 | 76 | return h 77 | -------------------------------------------------------------------------------- /yamls/YOLOF_R50_C5_1x.yaml: -------------------------------------------------------------------------------- 1 | anchors: 2 | aspect_ratios: 3 | - 1.0 4 | generator: AnchorGenerator 5 | num_anchors: 5 6 | scales: 7 | - 32 8 | - 64 9 | - 128 10 | - 256 11 | - 512 12 | strides: 32 13 | backbone: 14 | activation: 15 | activation: relu 16 | backbone: CaffeResNet50 17 | dilation_rates: 18 | - 1 19 | - 1 20 | - 1 21 | - 1 22 | - 1 23 | dropblock: null 24 | frozen_stages: 25 | - 1 26 | normalization: 27 | epsilon: 0.0001 28 | momentum: 0.997 29 | normalization: batch_norm 30 | trainable: false 31 | output_indices: 32 | - 5 33 | strides: 34 | - 2 35 | - 2 36 | - 2 37 | - 2 38 | - 2 39 | data_format: channels_last 40 | detector: YOLOF 41 | dtype: float16 42 | excluding_weight_names: 43 | - predicted_box 44 | - predicted_class 45 | head: 46 | activation: 47 | activation: relu 48 | assigner: 49 | assigner: UniformAssigner 50 | match_times: 8 51 | neg_ignore_thresh: 0.15 52 | pos_ignore_thresh: 0.7 53 | bbox_decoder: 54 | decoder: Delta2Box 55 | weights: 56 | - 1.0 57 | - 1.0 58 | - 1.0 59 | - 1.0 60 | bbox_encoder: 61 | encoder: Box2Delta 62 | weights: 63 | - 1.0 64 | - 1.0 65 | - 1.0 66 | - 1.0 67 | bbox_loss: 68 | loss: GIoULoss 69 | reduction: sum 70 | weight: 2.0 71 | cls_num_convs: 2 72 | feat_dims: 512 73 | head: YOLOFHead 74 | kernel_initializer: he_normal 75 | label_loss: 76 | alpha: 0.25 77 | from_logits: true 78 | gamma: 2.0 79 | loss: FocalLoss 80 | reduction: sum 81 | weight: 1.0 82 | normalization: 83 | axis: -1 84 | epsilon: 0.001 85 | momentum: 0.9 86 | normalization: batch_norm 87 | trainable: true 88 | prior: 0.01 89 | reg_num_convs: 4 90 | sampler: 91 | sampler: PseudoSampler 92 | use_sigmoid: true 93 | input_shape: !!python/tuple 94 | - 1024 95 | - 1024 96 | - 3 97 | neck: 98 | activation: 99 | activation: relu 100 | data_format: channels_last 101 | dilation_rates: 102 | - 2 103 | - 4 104 | - 6 105 | - 8 106 | filters: 512 107 | kernel_initializer: he_normal 108 | midfilters: 128 109 | neck: DilatedEncoder 110 | normalization: 111 | axis: -1 112 | epsilon: 0.001 113 | momentum: 0.9 114 | normalization: batch_norm 115 | trainable: true 116 | num_classes: 80 117 | test: 118 | iou_threshold: 0.5 119 | nms: CombinedNonMaxSuppression 120 | post_nms_size: 100 121 | pre_nms_size: 2000 122 | score_threshold: 0.35 123 | train: 124 | checkpoint_dir: checkpoints/yolof 125 | dataset: 126 | augmentations: 127 | - augmentation: FlipLeftToRight 128 | probability: 0.5 129 | - augmentation: RandomDistortColor 130 | - augmentation: Resize 131 | img_scale: 132 | - &id001 !!python/tuple 133 | - 1333 134 | - 800 135 | keep_ratio: true 136 | - augmentation: Pad 137 | size_divisor: 32 138 | batch_size: 4 139 | dataset: COCODataset 140 | dataset_dir: /data/bail/COCO 141 | num_samples: 118287 142 | training: true 143 | gradient_clip_norm: 10.0 144 | log_every_n_steps: 100 145 | mixed_precision: 146 | loss_scale: null 147 | optimizer: 148 | momentum: 0.9 149 | optimizer: SGD 150 | pretrained_weights_path: /data/bail/pretrained_weights/resnet50/resnet50.ckpt 151 | save_ckpt_steps: 5000 152 | scheduler: 153 | learning_rate_scheduler: 154 | boundaries: 155 | - 16 156 | - 22 157 | scheduler: PiecewiseConstantDecay 158 | values: 159 | - 0.02 160 | - 0.002 161 | - 0.0002 162 | train_epochs: 24 163 | warmup: 164 | steps: 800 165 | warmup_learning_rate: 0.001 166 | summary_dir: logs/yolof 167 | val: 168 | dataset: 169 | augmentations: 170 | - augmentation: Resize 171 | img_scale: 172 | - *id001 173 | keep_ratio: true 174 | - augmentation: Pad 175 | size_divisor: 32 176 | batch_size: 4 177 | dataset: COCODataset 178 | dataset_dir: /data/bail/COCO 179 | training: false 180 | samples: 5000 181 | weight_decay: 0.0001 182 | -------------------------------------------------------------------------------- /yamls/YOLOF_R101_C5_1x.yaml: -------------------------------------------------------------------------------- 1 | anchors: 2 | aspect_ratios: 3 | - 1.0 4 | generator: AnchorGenerator 5 | num_anchors: 5 6 | scales: 7 | - 32 8 | - 64 9 | - 128 10 | - 256 11 | - 512 12 | strides: 32 13 | backbone: 14 | activation: 15 | activation: relu 16 | backbone: CaffeResNet101 17 | dilation_rates: 18 | - 1 19 | - 1 20 | - 1 21 | - 1 22 | - 1 23 | dropblock: null 24 | frozen_stages: 25 | - 1 26 | normalization: 27 | epsilon: 0.0001 28 | momentum: 0.997 29 | normalization: batch_norm 30 | trainable: false 31 | output_indices: 32 | - 5 33 | strides: 34 | - 2 35 | - 2 36 | - 2 37 | - 2 38 | - 2 39 | data_format: channels_last 40 | detector: YOLOF 41 | dtype: float16 42 | excluding_weight_names: 43 | - predicted_box 44 | - predicted_class 45 | head: 46 | activation: 47 | activation: relu 48 | assigner: 49 | assigner: UniformAssigner 50 | match_times: 8 51 | neg_ignore_thresh: 0.15 52 | pos_ignore_thresh: 0.7 53 | bbox_decoder: 54 | decoder: Delta2Box 55 | weights: 56 | - 1.0 57 | - 1.0 58 | - 1.0 59 | - 1.0 60 | bbox_encoder: 61 | encoder: Box2Delta 62 | weights: 63 | - 1.0 64 | - 1.0 65 | - 1.0 66 | - 1.0 67 | bbox_loss: 68 | loss: GIoULoss 69 | reduction: sum 70 | weight: 2.0 71 | cls_num_convs: 2 72 | feat_dims: 512 73 | head: YOLOFHead 74 | kernel_initializer: he_normal 75 | label_loss: 76 | alpha: 0.25 77 | from_logits: true 78 | gamma: 2.0 79 | loss: FocalLoss 80 | reduction: sum 81 | weight: 1.0 82 | normalization: 83 | axis: -1 84 | epsilon: 0.001 85 | momentum: 0.9 86 | normalization: batch_norm 87 | trainable: true 88 | prior: 0.01 89 | reg_num_convs: 4 90 | sampler: 91 | sampler: PseudoSampler 92 | use_sigmoid: true 93 | input_shape: !!python/tuple 94 | - 1024 95 | - 1024 96 | - 3 97 | neck: 98 | activation: 99 | activation: relu 100 | data_format: channels_last 101 | dilation_rates: 102 | - 2 103 | - 4 104 | - 6 105 | - 8 106 | filters: 512 107 | kernel_initializer: he_normal 108 | midfilters: 128 109 | neck: DilatedEncoder 110 | normalization: 111 | axis: -1 112 | epsilon: 0.001 113 | momentum: 0.9 114 | normalization: batch_norm 115 | trainable: true 116 | num_classes: 80 117 | test: 118 | iou_threshold: 0.5 119 | nms: CombinedNonMaxSuppression 120 | post_nms_size: 100 121 | pre_nms_size: 2000 122 | score_threshold: 0.35 123 | train: 124 | checkpoint_dir: checkpoints/yolof 125 | dataset: 126 | augmentations: 127 | - augmentation: FlipLeftToRight 128 | probability: 0.5 129 | - augmentation: RandomDistortColor 130 | - augmentation: Resize 131 | img_scale: 132 | - &id001 !!python/tuple 133 | - 1333 134 | - 800 135 | keep_ratio: true 136 | - augmentation: Pad 137 | size_divisor: 32 138 | batch_size: 4 139 | dataset: COCODataset 140 | dataset_dir: /data/bail/COCO 141 | num_samples: 118287 142 | training: true 143 | gradient_clip_norm: 10.0 144 | log_every_n_steps: 100 145 | mixed_precision: 146 | loss_scale: null 147 | optimizer: 148 | momentum: 0.9 149 | optimizer: SGD 150 | pretrained_weights_path: /data/bail/pretrained_weights/resnet50/resnet50.ckpt 151 | save_ckpt_steps: 5000 152 | scheduler: 153 | learning_rate_scheduler: 154 | boundaries: 155 | - 16 156 | - 22 157 | scheduler: PiecewiseConstantDecay 158 | values: 159 | - 0.02 160 | - 0.002 161 | - 0.0002 162 | train_epochs: 24 163 | warmup: 164 | steps: 800 165 | warmup_learning_rate: 0.001 166 | summary_dir: logs/yolof 167 | val: 168 | dataset: 169 | augmentations: 170 | - augmentation: Resize 171 | img_scale: 172 | - *id001 173 | keep_ratio: true 174 | - augmentation: Pad 175 | size_divisor: 32 176 | batch_size: 4 177 | dataset: COCODataset 178 | dataset_dir: /data/bail/COCO 179 | training: false 180 | samples: 5000 181 | weight_decay: 0.0001 182 | -------------------------------------------------------------------------------- /yamls/YOLOF_X_101_64x4d_C5_1x.yaml: -------------------------------------------------------------------------------- 1 | anchors: 2 | aspect_ratios: 3 | - 1.0 4 | generator: AnchorGenerator 5 | num_anchors: 5 6 | scales: 7 | - 32 8 | - 64 9 | - 128 10 | - 256 11 | - 512 12 | strides: 32 13 | backbone: 14 | activation: 15 | activation: relu 16 | backbone: ResNeXt101_64X4D 17 | dilation_rates: 18 | - 1 19 | - 1 20 | - 1 21 | - 1 22 | - 1 23 | dropblock: null 24 | frozen_stages: 25 | - 1 26 | normalization: 27 | epsilon: 0.0001 28 | momentum: 0.997 29 | normalization: batch_norm 30 | trainable: false 31 | output_indices: 32 | - 5 33 | strides: 34 | - 2 35 | - 2 36 | - 2 37 | - 2 38 | - 2 39 | data_format: channels_last 40 | detector: YOLOF 41 | dtype: float16 42 | excluding_weight_names: 43 | - predicted_box 44 | - predicted_class 45 | head: 46 | activation: 47 | activation: relu 48 | assigner: 49 | assigner: UniformAssigner 50 | match_times: 8 51 | neg_ignore_thresh: 0.15 52 | pos_ignore_thresh: 0.7 53 | bbox_decoder: 54 | decoder: Delta2Box 55 | weights: 56 | - 1.0 57 | - 1.0 58 | - 1.0 59 | - 1.0 60 | bbox_encoder: 61 | encoder: Box2Delta 62 | weights: 63 | - 1.0 64 | - 1.0 65 | - 1.0 66 | - 1.0 67 | bbox_loss: 68 | loss: GIoULoss 69 | reduction: sum 70 | weight: 2.0 71 | cls_num_convs: 2 72 | feat_dims: 512 73 | head: YOLOFHead 74 | kernel_initializer: he_normal 75 | label_loss: 76 | alpha: 0.25 77 | from_logits: true 78 | gamma: 2.0 79 | loss: FocalLoss 80 | reduction: sum 81 | weight: 1.0 82 | normalization: 83 | axis: -1 84 | epsilon: 0.001 85 | momentum: 0.9 86 | normalization: batch_norm 87 | trainable: true 88 | prior: 0.01 89 | reg_num_convs: 4 90 | sampler: 91 | sampler: PseudoSampler 92 | use_sigmoid: true 93 | input_shape: !!python/tuple 94 | - 1024 95 | - 1024 96 | - 3 97 | neck: 98 | activation: 99 | activation: relu 100 | data_format: channels_last 101 | dilation_rates: 102 | - 2 103 | - 4 104 | - 6 105 | - 8 106 | filters: 512 107 | kernel_initializer: he_normal 108 | midfilters: 128 109 | neck: DilatedEncoder 110 | normalization: 111 | axis: -1 112 | epsilon: 0.001 113 | momentum: 0.9 114 | normalization: batch_norm 115 | trainable: true 116 | num_classes: 80 117 | test: 118 | iou_threshold: 0.5 119 | nms: CombinedNonMaxSuppression 120 | post_nms_size: 100 121 | pre_nms_size: 2000 122 | score_threshold: 0.35 123 | train: 124 | checkpoint_dir: checkpoints/yolof 125 | dataset: 126 | augmentations: 127 | - augmentation: FlipLeftToRight 128 | probability: 0.5 129 | - augmentation: RandomDistortColor 130 | - augmentation: Resize 131 | img_scale: 132 | - &id001 !!python/tuple 133 | - 1333 134 | - 800 135 | keep_ratio: true 136 | - augmentation: Pad 137 | size_divisor: 32 138 | batch_size: 4 139 | dataset: COCODataset 140 | dataset_dir: /data/bail/COCO 141 | num_samples: 118287 142 | training: true 143 | gradient_clip_norm: 10.0 144 | log_every_n_steps: 100 145 | mixed_precision: 146 | loss_scale: null 147 | optimizer: 148 | momentum: 0.9 149 | optimizer: SGD 150 | pretrained_weights_path: /data/bail/pretrained_weights/resnet50/resnet50.ckpt 151 | save_ckpt_steps: 5000 152 | scheduler: 153 | learning_rate_scheduler: 154 | boundaries: 155 | - 16 156 | - 22 157 | scheduler: PiecewiseConstantDecay 158 | values: 159 | - 0.02 160 | - 0.002 161 | - 0.0002 162 | train_epochs: 24 163 | warmup: 164 | steps: 800 165 | warmup_learning_rate: 0.001 166 | summary_dir: logs/yolof 167 | val: 168 | dataset: 169 | augmentations: 170 | - augmentation: Resize 171 | img_scale: 172 | - *id001 173 | keep_ratio: true 174 | - augmentation: Pad 175 | size_divisor: 32 176 | batch_size: 4 177 | dataset: COCODataset 178 | dataset_dir: /data/bail/COCO 179 | training: false 180 | samples: 5000 181 | weight_decay: 0.0001 182 | -------------------------------------------------------------------------------- /yamls/YOLOF_R101_DC5_1x.yaml: -------------------------------------------------------------------------------- 1 | anchors: 2 | aspect_ratios: 3 | - 1.0 4 | generator: AnchorGenerator 5 | num_anchors: 6 6 | scales: 7 | - 16 8 | - 32 9 | - 64 10 | - 128 11 | - 256 12 | - 512 13 | strides: 16 14 | backbone: 15 | activation: 16 | activation: relu 17 | backbone: CaffeResNet101 18 | dilation_rates: 19 | - 1 20 | - 1 21 | - 1 22 | - 1 23 | - 2 24 | dropblock: null 25 | frozen_stages: 26 | - 1 27 | normalization: 28 | epsilon: 0.0001 29 | momentum: 0.997 30 | normalization: batch_norm 31 | trainable: false 32 | output_indices: 33 | - 5 34 | strides: 35 | - 2 36 | - 2 37 | - 2 38 | - 2 39 | - 1 40 | data_format: channels_last 41 | detector: YOLOF 42 | dtype: float16 43 | excluding_weight_names: 44 | - predicted_box 45 | - predicted_class 46 | head: 47 | activation: 48 | activation: relu 49 | assigner: 50 | assigner: UniformAssigner 51 | match_times: 8 52 | neg_ignore_thresh: 0.15 53 | pos_ignore_thresh: 0.7 54 | bbox_decoder: 55 | decoder: Delta2Box 56 | weights: 57 | - 1.0 58 | - 1.0 59 | - 1.0 60 | - 1.0 61 | bbox_encoder: 62 | encoder: Box2Delta 63 | weights: 64 | - 1.0 65 | - 1.0 66 | - 1.0 67 | - 1.0 68 | bbox_loss: 69 | loss: GIoULoss 70 | reduction: sum 71 | weight: 2.0 72 | cls_num_convs: 2 73 | feat_dims: 512 74 | head: YOLOFHead 75 | kernel_initializer: he_normal 76 | label_loss: 77 | alpha: 0.25 78 | from_logits: true 79 | gamma: 2.0 80 | loss: FocalLoss 81 | reduction: sum 82 | weight: 1.0 83 | normalization: 84 | axis: -1 85 | epsilon: 0.001 86 | momentum: 0.9 87 | normalization: batch_norm 88 | trainable: true 89 | prior: 0.01 90 | reg_num_convs: 4 91 | sampler: 92 | sampler: PseudoSampler 93 | use_sigmoid: true 94 | input_shape: !!python/tuple 95 | - 1024 96 | - 1024 97 | - 3 98 | neck: 99 | activation: 100 | activation: relu 101 | data_format: channels_last 102 | dilation_rates: 103 | - 4 104 | - 8 105 | - 12 106 | - 16 107 | filters: 512 108 | kernel_initializer: he_normal 109 | midfilters: 128 110 | neck: DilatedEncoder 111 | normalization: 112 | axis: -1 113 | epsilon: 0.001 114 | momentum: 0.9 115 | normalization: batch_norm 116 | trainable: true 117 | num_classes: 80 118 | test: 119 | iou_threshold: 0.6 120 | nms: CombinedNonMaxSuppression 121 | post_nms_size: 100 122 | pre_nms_size: 2000 123 | score_threshold: 0.25 124 | train: 125 | checkpoint_dir: checkpoints/yolof 126 | dataset: 127 | augmentations: 128 | - augmentation: FlipLeftToRight 129 | probability: 0.5 130 | - augmentation: RandomDistortColor 131 | - augmentation: Resize 132 | img_scale: 133 | - &id001 !!python/tuple 134 | - 1333 135 | - 800 136 | keep_ratio: true 137 | - augmentation: Pad 138 | size_divisor: 32 139 | batch_size: 4 140 | dataset: COCODataset 141 | dataset_dir: /data/bail/COCO 142 | num_samples: 118287 143 | training: true 144 | gradient_clip_norm: 10.0 145 | log_every_n_steps: 100 146 | mixed_precision: 147 | loss_scale: null 148 | optimizer: 149 | momentum: 0.9 150 | optimizer: SGD 151 | pretrained_weights_path: /data/bail/pretrained_weights/resnet50/resnet50.ckpt 152 | save_ckpt_steps: 5000 153 | scheduler: 154 | learning_rate_scheduler: 155 | boundaries: 156 | - 16 157 | - 22 158 | scheduler: PiecewiseConstantDecay 159 | values: 160 | - 0.02 161 | - 0.002 162 | - 0.0002 163 | train_epochs: 24 164 | warmup: 165 | steps: 800 166 | warmup_learning_rate: 0.001 167 | summary_dir: logs/yolof 168 | val: 169 | dataset: 170 | augmentations: 171 | - augmentation: Resize 172 | img_scale: 173 | - *id001 174 | keep_ratio: true 175 | - augmentation: Pad 176 | size_divisor: 32 177 | batch_size: 4 178 | dataset: COCODataset 179 | dataset_dir: /data/bail/COCO 180 | training: false 181 | samples: 5000 182 | weight_decay: 0.0001 183 | -------------------------------------------------------------------------------- /yamls/YOLOF_R50_DC5_1x.yaml: -------------------------------------------------------------------------------- 1 | anchors: 2 | aspect_ratios: 3 | - 1.0 4 | generator: AnchorGenerator 5 | num_anchors: 6 6 | scales: 7 | - 16 8 | - 32 9 | - 64 10 | - 128 11 | - 256 12 | - 512 13 | strides: 16 14 | backbone: 15 | activation: 16 | activation: relu 17 | backbone: CaffeResNet50 18 | dilation_rates: 19 | - 1 20 | - 1 21 | - 1 22 | - 1 23 | - 2 24 | dropblock: null 25 | frozen_stages: 26 | - 1 27 | normalization: 28 | epsilon: 0.0001 29 | momentum: 0.997 30 | normalization: batch_norm 31 | trainable: false 32 | output_indices: 33 | - 5 34 | strides: 35 | - 2 36 | - 2 37 | - 2 38 | - 2 39 | - 1 40 | data_format: channels_last 41 | detector: YOLOF 42 | dtype: float16 43 | excluding_weight_names: 44 | - predicted_box 45 | - predicted_class 46 | head: 47 | activation: 48 | activation: relu 49 | assigner: 50 | assigner: UniformAssigner 51 | match_times: 8 52 | neg_ignore_thresh: 0.15 53 | pos_ignore_thresh: 0.7 54 | bbox_decoder: 55 | decoder: Delta2Box 56 | weights: 57 | - 1.0 58 | - 1.0 59 | - 1.0 60 | - 1.0 61 | bbox_encoder: 62 | encoder: Box2Delta 63 | weights: 64 | - 1.0 65 | - 1.0 66 | - 1.0 67 | - 1.0 68 | bbox_loss: 69 | loss: GIoULoss 70 | reduction: sum 71 | weight: 2.0 72 | cls_num_convs: 2 73 | feat_dims: 512 74 | head: YOLOFHead 75 | kernel_initializer: he_normal 76 | label_loss: 77 | alpha: 0.25 78 | from_logits: true 79 | gamma: 2.0 80 | loss: FocalLoss 81 | reduction: sum 82 | weight: 1.0 83 | normalization: 84 | axis: -1 85 | epsilon: 0.001 86 | momentum: 0.9 87 | normalization: batch_norm 88 | trainable: true 89 | prior: 0.01 90 | reg_num_convs: 4 91 | sampler: 92 | sampler: PseudoSampler 93 | use_sigmoid: true 94 | input_shape: !!python/tuple 95 | - 1024 96 | - 1024 97 | - 3 98 | neck: 99 | activation: 100 | activation: relu 101 | data_format: channels_last 102 | dilation_rates: 103 | - 4 104 | - 8 105 | - 12 106 | - 16 107 | filters: 512 108 | kernel_initializer: he_normal 109 | midfilters: 128 110 | neck: DilatedEncoder 111 | normalization: 112 | axis: -1 113 | epsilon: 0.001 114 | momentum: 0.9 115 | normalization: batch_norm 116 | trainable: true 117 | num_classes: 80 118 | test: 119 | iou_threshold: 0.5 120 | nms: CombinedNonMaxSuppression 121 | post_nms_size: 100 122 | pre_nms_size: 2000 123 | score_threshold: 0.35 124 | train: 125 | checkpoint_dir: checkpoints/yolof 126 | dataset: 127 | augmentations: 128 | - augmentation: FlipLeftToRight 129 | probability: 0.5 130 | - augmentation: RandomDistortColor 131 | - augmentation: Resize 132 | img_scale: 133 | - &id001 !!python/tuple 134 | - 1333 135 | - 800 136 | keep_ratio: true 137 | - augmentation: Pad 138 | size_divisor: 32 139 | batch_size: 4 140 | dataset: COCODataset 141 | dataset_dir: /data/bail/COCO 142 | num_samples: 118287 143 | training: true 144 | gradient_clip_norm: 10.0 145 | log_every_n_steps: 100 146 | mixed_precision: 147 | loss_scale: null 148 | optimizer: 149 | momentum: 0.9 150 | optimizer: SGD 151 | pretrained_weights_path: /data/bail/pretrained_weights/resnet50/resnet50.ckpt 152 | save_ckpt_steps: 5000 153 | scheduler: 154 | learning_rate_scheduler: 155 | boundaries: 156 | - 16 157 | - 22 158 | scheduler: PiecewiseConstantDecay 159 | values: 160 | - 0.02 161 | - 0.002 162 | - 0.0002 163 | train_epochs: 24 164 | warmup: 165 | steps: 800 166 | warmup_learning_rate: 0.001 167 | summary_dir: logs/yolof 168 | val: 169 | dataset: 170 | augmentations: 171 | - augmentation: Resize 172 | img_scale: 173 | - *id001 174 | keep_ratio: true 175 | - augmentation: Pad 176 | size_divisor: 32 177 | batch_size: 4 178 | dataset: COCODataset 179 | dataset_dir: /data/bail/COCO 180 | training: false 181 | samples: 5000 182 | weight_decay: 0.0001 183 | -------------------------------------------------------------------------------- /core/losses/generalized_focal_loss.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from ..builder import LOSSES 3 | 4 | 5 | @LOSSES.register 6 | class QualityFocalLoss(tf.keras.losses.Loss): 7 | def __init__(self, 8 | from_logits=True, 9 | use_sigmoid=True, 10 | beta=2.0, 11 | reduction=tf.keras.losses.Reduction.SUM, 12 | weight=1., 13 | name="QualityFocalLoss"): 14 | super(QualityFocalLoss, self).__init__(reduction=reduction, name=name) 15 | 16 | assert use_sigmoid, "Only support sigmoid." 17 | self.use_sigmoid = use_sigmoid 18 | self.from_logits = from_logits 19 | self.weight = weight 20 | self.beta = beta 21 | 22 | def _quality_focal_loss(self, labels, scores, y_pred, beta=2.0, from_logits=True): 23 | """ 24 | labels (Tensor): Target category label (one-hot) with shape (B, N, C). 25 | scores (Tensor): target quality label with shape (B, N,). 26 | y_pred (Tensor): Predicted joint representation of classification 27 | and quality (IoU) estimation with shape (B, N, C), C is the number of 28 | classes. 29 | beta (foat): The beta parameter for calculating the modulating factor. 30 | Defaults to 2.0. 31 | from_logits (bool): Is the `y_pred` from logits, default is True. 32 | """ 33 | with tf.name_scope("quality_focal_loss"): 34 | pos_mask = labels == 1. 35 | pos_scores = tf.boolean_mask(scores, tf.reduce_any(pos_mask, -1)) 36 | labels = tf.tensor_scatter_nd_update(labels, tf.where(pos_mask), pos_scores) 37 | 38 | if from_logits: 39 | loss = tf.nn.sigmoid_cross_entropy_with_logits(labels, y_pred) 40 | else: 41 | loss = tf.keras.losses.binary_crossentropy(labels, y_pred, False) 42 | 43 | scale_factor = tf.nn.sigmoid(y_pred) 44 | num_classes = tf.shape(y_pred)[-1] 45 | pos_scale_factor = tf.abs(tf.tile(tf.expand_dims(scores, -1), [1, 1, num_classes]) - scale_factor) 46 | scale_factor = tf.where(pos_mask, pos_scale_factor, scale_factor) 47 | scale_factor = tf.pow(scale_factor, beta) 48 | 49 | weighted_loss = scale_factor * loss * self.weight 50 | 51 | return weighted_loss 52 | 53 | def call(self, y_true, y_pred): 54 | y_true, qulaity_scores = y_true 55 | return self._quality_focal_loss(y_true, qulaity_scores, y_pred, self.beta, self.from_logits) 56 | 57 | 58 | @LOSSES.register 59 | class DistributionFocalLoss(tf.keras.losses.Loss): 60 | """Distribution Focal Loss (DFL) is from `Generalized Focal Loss: Learning 61 | Qualified and Distributed Bounding Boxes for Dense Object Detection 62 | `_. 63 | 64 | Returns: 65 | torch.Tensor: Loss tensor with shape (N,). 66 | """ 67 | def __init__(self, 68 | from_logits=True, 69 | reduction=tf.keras.losses.Reduction.SUM, 70 | weight=1., 71 | name="FocalLoss"): 72 | super(DistributionFocalLoss, self).__init__(reduction=reduction, name=name) 73 | 74 | assert from_logits, "Only support logits." 75 | self.from_logits = from_logits 76 | 77 | self.weight = weight 78 | 79 | def _distribution_focal_loss(self, y_true, y_pred): 80 | with tf.name_scope("distribution_focal_loss"): 81 | dist_left = tf.cast(y_true, tf.int64) 82 | dist_right = dist_left + 1 83 | 84 | weight_left = tf.cast(dist_right, tf.float32) - y_true 85 | weight_right = y_true - tf.cast(dist_left, tf.float32) 86 | 87 | loss_left = tf.nn.sparse_softmax_cross_entropy_with_logits( 88 | labels=dist_left, logits=y_pred) * weight_left 89 | loss_right = tf.nn.sparse_softmax_cross_entropy_with_logits( 90 | labels=dist_right, logits=y_pred) * weight_right 91 | 92 | return loss_left + loss_right 93 | 94 | def call(self, y_true, y_pred): 95 | return self._distribution_focal_loss(y_true, y_pred) * self.weight 96 | -------------------------------------------------------------------------------- /models/detectors/detr.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | 4 | class Encoder(tf.keras.Model): 5 | def __init__(self, 6 | num_heads, 7 | dim, 8 | hidden_dim=2048, 9 | dropout=0.1, 10 | activation="relu", 11 | **kwargs): 12 | super(Encoder, self).__init__(**kwargs) 13 | 14 | self.self_attn = tf.keras.layers.MultiHeadAttention( 15 | num_heads=num_heads, 16 | key_dim=dim, 17 | dropout=dropout, 18 | name="self_attn") 19 | self.dropout1 = tf.keras.layers.Dropout(rate=dropout, name="dropout1") 20 | self.norm1 = tf.keras.layers.LayerNormalization(axis=-1, name="norm1") 21 | 22 | self.linear1 = tf.keras.layers.Dense(units=hidden_dim, activation=activation, name="linear1") 23 | self.dropout2 = tf.keras.layers.Dropout(rate=dropout, name="/dropout2") 24 | self.linear2 = tf.keras.layers.Dense(units=hidden_dim, name="/linear2") 25 | self.dropout3 = tf.keras.layers.Dropout(rate=dropout, name="/dropout3") 26 | self.norm2 = tf.keras.layers.LayerNormalization(axis=-1, name="/norm2") 27 | 28 | def call(self, src, src_mask=None, pos_embed=None, training=None): 29 | query = key = src if pos_embed is None else pos_embed + src 30 | src2 = self.self_attn(query=query, key=key, value=src, mask=src_mask, training=training) 31 | src += self.dropout1(src2, training=training) 32 | src = self.norm1(src) 33 | src2 = self.linear2(self.dropout2(self.linear1(src), training=training)) 34 | src += self.dropout3(src2, training=training) 35 | src = self.norm2(src) 36 | 37 | return src 38 | 39 | 40 | class Decoder(tf.keras.Model): 41 | def __init__(self, 42 | dim, 43 | num_heads, 44 | hidden_dim=2048, 45 | dropout=0.1, 46 | activation="relu", 47 | **kwargs): 48 | super(Decoder, self).__init__(**kwargs) 49 | 50 | self.self_attn = tf.keras.layers.MultiHeadAttention( 51 | num_heads=num_heads, 52 | key_dim=dim, 53 | dropout=dropout, 54 | name="/self_attn") 55 | self.dropout1 = tf.keras.layers.Dropout(rate=dropout, name="dropout1")(x) 56 | self.norm1 = tf.keras.layers.LayerNormalization(axis=-1, name="norm1")(x) 57 | 58 | self.multihead_attn = tf.keras.layers.MultiHeadAttention( 59 | num_heads=num_heads, 60 | key_dim=dim, 61 | dropout=dropout, 62 | name="multihead_attn") 63 | self.dropout2 = tf.keras.layers.Dropout(rate=dropout, name="dropout2") 64 | self.norm2 = tf.keras.layers.LayerNormalization(axis=-1, name="norm2") 65 | 66 | self.linear1 = tf.keras.layers.Dense(units=hidden_dim, activation=activation, name="linear1") 67 | self.dropout3 = tf.keras.layers.Dropout(rate=dropout, name="dropout3") 68 | self.linear2 = tf.keras.layers.Dense(units=hidden_dim, name="linear2") 69 | self.dropout4 = tf.keras.layers.Dropout(rate=dropout, name="dropout4") 70 | self.norm3 = tf.keras.layers.LayerNormalization(axis=-1, name="norm3") 71 | 72 | def call(self, target, memory, target_mask=None, memeory_mask=None, pos_embed=None, query_pos_embed=None, training=None): 73 | q = k = target if query_pos_embed is None else target + query_pos_embed 74 | 75 | target2 = self.self_attn(query=q, key=k, value=target, mask=target_mask, training=training) 76 | target += self.dropout1(target2, training=training) 77 | target = self.norm1(target) 78 | 79 | target2 = self.multihead_attn( 80 | query=target if query_pos_embed is None else target + query_pos_embed, 81 | key=memory if pos_embed is None else memory + pos_embed, 82 | value=memory, 83 | mask=memory, 84 | training=training) 85 | target += self.dropout2(target2) 86 | target = self.norm2(target) 87 | target2 = self.linear2(self.dropout2(self.linear1(target), training=training)) 88 | target += self.dropout3(target2, training=training) 89 | target = self.norm2(target) 90 | 91 | return target 92 | 93 | 94 | 95 | 96 | 97 | 98 | -------------------------------------------------------------------------------- /core/layers/position_sensitive_average_pooling.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | 4 | class PSAvgPooling(tf.keras.layers.Layer): 5 | def __init__(self, num_boxes, crop_size=(9, 9), num_spatial_bins=(3, 3), **kwargs): 6 | super(PSAvgPooling, self).__init__(**kwargs) 7 | 8 | total_bins = 1 9 | bin_crop_size = [] 10 | for num_bins, crop_dim in zip(num_spatial_bins, crop_size): 11 | if num_bins < 1: 12 | raise ValueError("num_spatial_bins should be >= 1.") 13 | 14 | if crop_dim % num_bins != 0: 15 | raise ValueError("crop_size should be divisible by num_spatial_bins.") 16 | 17 | total_bins *= num_bins 18 | bin_crop_size.append(crop_dim // num_bins) 19 | 20 | if bin_crop_size[0] != bin_crop_size[1]: 21 | raise ValueError("Only support square bin crop size for now.") 22 | 23 | self.num_spatial_bins = num_spatial_bins 24 | self.bin_crop_size = bin_crop_size 25 | self.total_bins = total_bins 26 | self.num_boxes = num_boxes 27 | 28 | def build(self, input_shape): 29 | self.weights = self.add_weight(name="weight", 30 | shape=[1, 1, 1, 1, self.num_spatial_bins], 31 | dtype=self.dtype, 32 | initializer=tf.keras.initializers.Ones()) 33 | 34 | def _one_image_pooling(self, image, boxes): 35 | y1, x1, y2, x2 = tf.unstack(boxes, 4, -1) 36 | ps_boxes = tf.TensorArray(size=self.total_bins, dtype=boxes.dtype) 37 | 38 | i = tf.constant(0, tf.int32) 39 | for bin_y in tf.range(self.num_spatial_bins[0], dtype=boxes.dtype): 40 | step_y = (y2 - y1) / tf.cast(self.num_spatial_bins[0], dtype=boxes.dtype) 41 | for bin_x in tf.range(self.num_spatial_bins[1], dtype=boxes.dtype): 42 | step_x = (x2 - x1) / tf.cast(self.num_spatial_bins[1], dtype=boxes.dtype) 43 | i += 1 44 | box = tf.convert_to_tensor([y1 + bin_y * step_y, 45 | x1 + bin_x * step_x, 46 | y1 + bin_y * (step_y + 1), 47 | x1 + bin_x * (step_x + 1)], dtype=boxes.dtype) 48 | ps_boxes.write(i, box) 49 | 50 | step_split = tf.shape(image)[-1] // self.total_bins 51 | image_crops = tf.TensorArray(size=self.total_bins, dtype=image.dtype) 52 | # image_splits = tf.split(image, self.total_bins, axis=-1) 53 | for i in tf.range(self.total_bins): 54 | split = image[..., i*step_split: (i+1)*step_split] 55 | split_crop = tf.image.crop_and_resize(image=tf.expand_dims(split, 0), 56 | boxes=ps_boxes.read(i), 57 | box_indices=tf.zeros(tf.shape(boxes)[0], dtype=tf.int32), 58 | crop_size=self.bin_crop_size) 59 | image_crops.write(i, split_crop) 60 | 61 | features = image_crops.stack(axis=-1) # [num_boxes, crop_height, crop_width, depth, num_bins] 62 | features = tf.reduce_mean(features, [1, 2], keepdims=True) # [num_boxes, 1, 1, depth, num_bins] 63 | 64 | features = tf.reduce_mean(features * self.weights, -1) # [num_boxes, 1, 1, depth] 65 | 66 | ps_boxes.close() 67 | image_crops.close() 68 | 69 | return features 70 | 71 | def call(self, inputs, boxes): 72 | batch_size = tf.shape(inputs)[0] 73 | features = tf.TensorArray(size=batch_size) 74 | 75 | for i in tf.range(batch_size): 76 | feat = self._one_image_pooling(inputs[0], boxes[0]) 77 | features.write(i, feat) 78 | 79 | return features.concate(axis=0) 80 | 81 | def compute_output_shape(self, input_shape): 82 | return tf.TensorShape([self.num_boxes, 1, 1, input_shape[-1] // self.total_bins]) 83 | 84 | def get_config(self,): 85 | config = {"num_boxes": self.num_boxes, 86 | "bin_crop_size": self.bin_crop_size, 87 | "num_spatial_bins": self.num_spatial_bins} 88 | 89 | base_config = super(PSAvgPooling, self).get_config() 90 | 91 | return dict(list(base_config.items()) + list(config.items())) 92 | -------------------------------------------------------------------------------- /core/optimizers/lookahead_optimizer.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from tensorflow.python.keras import backend 3 | from tensorflow.python.distribute import distribution_strategy_context 4 | from ..builder import OPTIMIZERS 5 | 6 | 7 | @OPTIMIZERS.register 8 | class LookaheadOptimizer(tf.keras.optimizers.Optimizer): 9 | def __init__(self, optimizer, k=5, alpha=0.5, name=None, **kwargs): 10 | super(LookaheadOptimizer, self).__init__(name=name, **kwargs) 11 | 12 | self.k = tf.constant(k, dtype=tf.float32) 13 | self.alpha = tf.constant(alpha, dtype=tf.float32) 14 | self._optimizer = optimizer 15 | 16 | self._iterations = self._optimizer.iterations 17 | self.slow_weights = [] 18 | 19 | self.add_slow_weights = True 20 | 21 | self.replica_context = tf.distribute.get_replica_context() 22 | 23 | def _create_slots(self, var_list): 24 | for var in var_list: 25 | self.add_slot(var, "slow") 26 | 27 | def _resource_apply_dense(self, grad, var, apply_state=None): 28 | pass 29 | 30 | def _resource_apply_sparse(self, grad, var, indices, apply_state=None): 31 | pass 32 | 33 | def _update_weights(self, fast_weights, slow_weights, alpha): 34 | def _update_slow_weight(slow_weight, fast_weight, a): 35 | slow_weight.assign_add(a * (fast_weight - slow_weight)) 36 | 37 | def _update_fast_weight(fast_weight, slow_weight): 38 | fast_weight.assign(slow_weight) 39 | 40 | if tf.equal(tf.cast(self._iterations, tf.float32) % self.k, 0): 41 | if distribution_strategy_context.has_strategy(): 42 | distribution = distribution_strategy_context.get_replica_context() 43 | 44 | for fast, slow in zip(fast_weights, slow_weights): 45 | distribution.extended.call_for_each_replica(_update_slow_weight, 46 | args=(slow, fast.value(), alpha)) 47 | distribution.extended.call_for_each_replica(_update_fast_weight, 48 | args=(fast, slow.value())) 49 | else: 50 | for fast, slow in zip(fast_weights, slow_weights): 51 | _update_slow_weight(slow, fast.value(), alpha) 52 | _update_fast_weight(fast, slow.value()) 53 | 54 | def apply_gradients(self, grads_and_vars, name=None): 55 | fast_weights = [v for _, v in grads_and_vars] 56 | if self.add_slow_weights: 57 | self.slow_weights = [ 58 | tf.Variable(initial_value=w.value(), 59 | trainable=False, 60 | name=w.name.split(":")[0] + "/slow") 61 | for w in fast_weights 62 | ] 63 | self.add_slow_weights = False 64 | 65 | res = self._optimizer.apply_gradients(grads_and_vars, name=name) 66 | 67 | self._update_weights(fast_weights, self.slow_weights, self.alpha) 68 | 69 | return res 70 | 71 | def get_config(self): 72 | config = self._optimizer.get_config() 73 | 74 | return config 75 | 76 | @property 77 | def learning_rate(self): 78 | return self._optimizer.learning_rate 79 | 80 | @learning_rate.setter 81 | def learning_rate(self, value): 82 | self._optimizer.learning_rate = value 83 | 84 | @property 85 | def lr(self): 86 | return self._optimizer.lr 87 | 88 | @lr.setter 89 | def lr(self, lr): 90 | self._optimizer.lr = lr 91 | 92 | def get_weights(self): 93 | return self._optimizer.get_weights() 94 | 95 | def set_weights(self, weights): 96 | return self._optimizer.set_weights(weights) 97 | 98 | @property 99 | def iterations(self): 100 | return self._optimizer.iterations 101 | 102 | @iterations.setter 103 | def iterations(self, variable): 104 | self._optimizer.iterations = variable 105 | 106 | def get_slot_names(self): 107 | return self._optimizer.get_slot_names() 108 | 109 | def variables(self): 110 | return self._optimizer.variables() 111 | 112 | @property 113 | def weights(self): 114 | return self._optimizer.weights 115 | 116 | -------------------------------------------------------------------------------- /core/layers/drop_block.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | 4 | class DropBlock2D(tf.keras.layers.Layer): 5 | def __init__(self, block_size=7, drop_rate=0.1, data_format="channels_last", **kwargs): 6 | super(DropBlock2D, self).__init__(**kwargs) 7 | 8 | self.block_size = block_size 9 | self.keep_prob = 1. - drop_rate 10 | 11 | assert data_format in {"channels_first", "channels_last"} 12 | self.data_format = data_format 13 | 14 | def build(self, input_shape): 15 | super(DropBlock2D, self).build(input_shape) 16 | 17 | def _drop_block_nhwc(self, inputs): 18 | with tf.name_scope("drop_block_nhwc"): 19 | input_shape = tf.shape(inputs) 20 | top, left = input_shape[1] // 2, input_shape[2] // 2 21 | bottom, right = input_shape[1] - top, input_shape[2] - left 22 | padding = [[0, 0], [top, bottom], [left, right], [0, 0]] 23 | 24 | feat_size = tf.cast(input_shape[1:3], self.dtype) 25 | gamma1 = (1. - self.keep_prob) / (self.block_size * self.block_size) 26 | gamma2 = (feat_size[0] * feat_size[1]) / (feat_size[0] - self.block_size + 1.) / \ 27 | (feat_size[1] - self.block_size + 1.) 28 | 29 | gamma = gamma1 * gamma2 30 | 31 | mask_shape = [input_shape[0], 32 | input_shape[1] - self.block_size + 1, 33 | input_shape[2] - self.block_size + 1, 34 | input_shape[3]] 35 | mask = tf.nn.relu(tf.sign(gamma - tf.random.uniform(mask_shape, 0, 1, dtype=self.dtype))) 36 | 37 | mask = tf.pad(mask, paddings=padding) 38 | mask = tf.nn.max_pool(mask, [1, self.block_size, self.block_size, 1], [1, 1, 1, 1], "SAME", "NHWC") 39 | 40 | mask = 1. - mask 41 | mask = mask * tf.cast(tf.size(mask), mask.dtype) / tf.reduce_sum(mask) 42 | mask = tf.cast(mask, inputs.dtype) 43 | outputs = mask * inputs 44 | 45 | return outputs 46 | 47 | def _drop_block_nchw(self, inputs): 48 | with tf.name_scope("drop_block_nchw"): 49 | input_shape = tf.shape(inputs) 50 | top, left = input_shape[2] // 2, input_shape[3] // 2 51 | bottom, right = input_shape[2] - top, input_shape[3] - left 52 | padding = [[0, 0], [0, 0], [top, bottom], [left, right]] 53 | 54 | feat_size = tf.cast(input_shape[2:], self.dtype) 55 | gamma1 = (1. - self.keep_prob) / (self.block_size * self.block_size) 56 | gamma2 = (feat_size[0] * feat_size[1]) / (feat_size[0] - self.block_size + 1.) / \ 57 | (feat_size[1] - self.block_size + 1.) 58 | 59 | gamma = gamma1 * gamma2 60 | 61 | mask_shape = [input_shape[0], 62 | input_shape[1], 63 | input_shape[2] - self.block_size + 1, 64 | input_shape[3] - self.block_size + 1] 65 | mask = tf.nn.relu(tf.sign(gamma - tf.random.uniform(mask_shape, 0, 1, dtype=self.dtype))) 66 | 67 | mask = tf.pad(mask, paddings=padding) 68 | mask = tf.nn.max_pool(mask, [1, self.block_size, self.block_size, 1], [1, 1, 1, 1], "SAME", "NCHW") 69 | 70 | mask = 1. - mask 71 | mask = mask * tf.cast(tf.size(mask), mask.dtype) / tf.reduce_sum(mask) 72 | mask = tf.cast(mask, inputs.dtype) 73 | outputs = mask * inputs 74 | 75 | return outputs 76 | 77 | def call(self, inputs, training=None): 78 | if training is None: 79 | training = tf.keras.backend.learning_phase() 80 | 81 | if training: 82 | if self.data_format == "channels_first": 83 | return self._drop_block_nchw(inputs) 84 | 85 | return self._drop_block_nhwc(inputs) 86 | 87 | return inputs 88 | 89 | def compute_output_shape(self, input_shape): 90 | return input_shape 91 | 92 | def get_config(self): 93 | config = {"block_size": self.block_size, 94 | "keep_probability": self.keep_prob} 95 | 96 | base_config = super(DropBlock2D, self).get_config() 97 | 98 | return dict(list(base_config.items()) + list(config.items())) 99 | 100 | -------------------------------------------------------------------------------- /models/necks/dlaup.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | from ..common import ConvNormActBlock 4 | from ..builder import NECKS 5 | 6 | 7 | def ida_up(inputs, 8 | kernel_size, 9 | infilters, 10 | outfilters, 11 | up_factors, 12 | data_format="channels_last", 13 | normalization=dict(normalization="batch_norm", momentum=0.9, epsilon=1e-5, axis=-1, trainable=True), 14 | activation=dict(activation="relu"), 15 | kernel_initializer="he_normal", 16 | name="ida_up"): 17 | assert len(inputs) == len(infilters), '{} vs {} inputs'.format(len(infilters), len(inputs)) 18 | 19 | for i, inp in enumerate(inputs): 20 | if infilters[i] == outfilters: 21 | x = inp 22 | else: 23 | x = ConvNormActBlock(outfilters, 24 | kernel_size=1, 25 | strides=1, 26 | kernel_initializer=kernel_initializer, 27 | activation=activation, 28 | normalization=normalization, 29 | name=name + "/proj_%d" % i)(inp) 30 | if up_factors[i] != 1: 31 | kernel_size = (up_factors[i] * 2, up_factors[i] * 2) 32 | strides = (up_factors[i], up_factors[i]) 33 | x = tf.keras.layers.Conv2DTranspose(filters=outfilters, 34 | kernel_size=kernel_size, 35 | strides=strides, 36 | padding="same", 37 | output_padding=None, 38 | groups=outfilters, 39 | kernel_initializer=kernel_initializer, 40 | name=name + "/up_%d" % i, 41 | use_bias=False)(x) 42 | inputs[i] = x 43 | 44 | x = inputs[0] 45 | outputs = [] 46 | channel_axis = -1 if data_format == "channels_last" else 1 47 | for i in range(1, len(inputs)): 48 | x = tf.keras.layers.Concatenate(axis=channel_axis, name=name + "/cat%d" % i)([x, inputs[i]]) 49 | x = ConvNormActBlock(outfilters, 50 | kernel_size=kernel_size, 51 | strides=1, 52 | kernel_initializer=kernel_initializer, 53 | activation=activation, 54 | normalization=normalization, 55 | name=name + "/node_%d" % i)(x) 56 | 57 | outputs.append(x) 58 | 59 | return x, outputs 60 | 61 | 62 | @NECKS.register("DLAUp") 63 | def dla_up(filters=None, 64 | input_shapes=None, 65 | downsample_ratio=4, 66 | data_format="channels_last", 67 | normalization=dict(normalization="batch_norm", momentum=0.9, epsilon=1e-5, axis=-1, trainable=True), 68 | activation=dict(activation="relu"), 69 | kernel_initializer="he_normal", 70 | name="dla_up"): 71 | assert downsample_ratio in [2, 4, 8, 16] 72 | 73 | first_level = int(np.log2(downsample_ratio)) 74 | scales = [2 ** i for i in range(len(filters[first_level:]))] 75 | 76 | inputs = [tf.keras.Input(shape=shape) for shape in input_shapes[first_level:]] 77 | 78 | layers = [i for i in inputs] 79 | infilters = filters 80 | scales = np.array(scales, dtype=int) 81 | for i in range(len(filters[first_level:]) - 1): 82 | j = -i - 2 83 | x, y = ida_up(layers[j:], 84 | kernel_size=3, 85 | infilters=infilters[j:], 86 | outfilters=filters[j], 87 | up_factors=scales[j:] // scales[j], 88 | data_format=data_format, 89 | normalization=normalization, 90 | activation=activation, 91 | kernel_initializer=kernel_initializer, 92 | name=name + "/ida_%d" % i) 93 | scales[j+1:] = scales[j] 94 | infilters[j+1:] = [filters[j] for _ in filters[j+1:]] 95 | layers[-i-1:] = y 96 | 97 | return tf.keras.Model(inputs=inputs, outputs=x, name=name) 98 | 99 | -------------------------------------------------------------------------------- /core/layers/weight_standardization_conv2d.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from tensorflow.python.keras.utils import conv_utils 3 | from tensorflow.python.keras.engine.input_spec import InputSpec 4 | 5 | 6 | class WSConv2D(tf.keras.layers.Conv2D): 7 | def __init__(self, 8 | filters, 9 | kernel_size, 10 | strides=(1, 1), 11 | padding='valid', 12 | data_format=None, 13 | dilation_rate=(1, 1), 14 | activation=None, 15 | use_bias=True, 16 | kernel_initializer='glorot_uniform', 17 | bias_initializer='zeros', 18 | kernel_regularizer=None, 19 | bias_regularizer=None, 20 | activity_regularizer=None, 21 | kernel_constraint=None, 22 | bias_constraint=None, 23 | **kwargs): 24 | super(WSConv2D, self).__init__(filters, 25 | kernel_size, 26 | strides=strides, 27 | padding=padding, 28 | data_format=data_format, 29 | dilation_rate=dilation_rate, 30 | activation=activation, 31 | use_bias=use_bias, 32 | kernel_initializer=kernel_initializer, 33 | bias_initializer=bias_initializer, 34 | kernel_regularizer=kernel_regularizer, 35 | bias_regularizer=bias_regularizer, 36 | activity_regularizer=activity_regularizer, 37 | kernel_constraint=kernel_constraint, 38 | bias_constraint=bias_constraint, 39 | **kwargs) 40 | 41 | def build(self, input_shape): 42 | input_shape = tf.TensorShape(input_shape) 43 | channel_axis = self._get_channel_axis() 44 | if input_shape.dims[channel_axis].value is None: 45 | raise ValueError('The channel dimension of the inputs ' 46 | 'should be defined. Found `None`.') 47 | input_dim = int(input_shape[channel_axis]) 48 | kernel_shape = self.kernel_size + (input_dim, self.filters) 49 | 50 | self.kernel = self.add_weight( 51 | name='kernel', 52 | shape=kernel_shape, 53 | initializer=self.kernel_initializer, 54 | regularizer=self.kernel_regularizer, 55 | constraint=self.kernel_constraint, 56 | trainable=True, 57 | dtype=self.dtype) 58 | if self.use_bias: 59 | self.bias = self.add_weight( 60 | name='bias', 61 | shape=(self.filters,), 62 | initializer=self.bias_initializer, 63 | regularizer=self.bias_regularizer, 64 | constraint=self.bias_constraint, 65 | trainable=True, 66 | dtype=self.dtype) 67 | else: 68 | self.bias = None 69 | 70 | mean, variance = tf.nn.moments(self.kernel.value(), [0, 1, 2], keepdims=True) 71 | self.kernel.assign_sub(mean) 72 | self.kernel.assign(self.kernel.value() / (tf.sqrt(variance) + 1e-5)) 73 | 74 | self.input_spec = tf.keras.backend.InputSpec(ndim=self.rank + 2, 75 | axes={channel_axis: input_dim}) 76 | self._convolution_op = tf.nn.Convolution(input_shape, 77 | filter_shape=self.kernel.shape, 78 | dilation_rate=self.dilation_rate, 79 | strides=self.strides, 80 | padding=self._get_padding_op(), 81 | data_format=conv_utils.convert_data_format(self.data_format, 82 | self.rank + 2)) 83 | self.built = True 84 | 85 | def call(self, inputs): 86 | return super(WSConv2D, self).call(inputs) 87 | 88 | def compute_output_shape(self, input_shape): 89 | return super(WSConv2D, self).compute_output_shape(input_shape) 90 | -------------------------------------------------------------------------------- /core/bbox/overlaps.py: -------------------------------------------------------------------------------- 1 | import math 2 | import tensorflow as tf 3 | 4 | 5 | def _get_v(b1_height, b1_width, b2_height, b2_width): 6 | """Get the consistency measurement of aspect ratio for ciou.""" 7 | 8 | @tf.custom_gradient 9 | def _get_grad_v(height, width): 10 | """backpropogate gradient.""" 11 | arctan = (tf.atan(tf.math.divide_no_nan(b1_width, b1_height)) - 12 | tf.atan(tf.math.divide_no_nan(width, height))) 13 | v = 4 * ((arctan / math.pi) ** 2) 14 | 15 | def _grad_v(dv): 16 | """Grad for eager mode.""" 17 | gdw = dv * 8 * arctan * height / (math.pi**2) 18 | gdh = -dv * 8 * arctan * width / (math.pi**2) 19 | return [gdh, gdw] 20 | 21 | # def _grad_v_graph(dv, variables): 22 | # """Grad for graph mode.""" 23 | # gdw = dv * 8 * arctan * height / (math.pi ** 2) 24 | # gdh = -dv * 8 * arctan * width / (math.pi ** 2) 25 | 26 | # return [gdh, gdw], tf.gradients(v, variables, grad_ys=dv) 27 | 28 | return v, _grad_v 29 | 30 | return _get_grad_v(b2_height, b2_width) 31 | 32 | 33 | def compute_iou(target_boxes, predicted_boxes, iou_type="iou"): 34 | """Computing the IoU for aligned boxes. 35 | 36 | Args: 37 | predicted_boxes: predicted boxes, with coordinate [y_min, x_min, y_max, x_max]. 38 | target_boxes: target boxes, with coordinate [y_min, x_min, y_max, x_max]. 39 | iou_type: one of ['iou', 'ciou', 'diou', 'giou']. 40 | Returns: 41 | IoU loss float `Tensor`. 42 | """ 43 | iou_type = iou_type.lower() 44 | assert iou_type in ["iou", "ciou", "diou", "giou"] 45 | t_y1, t_x1, t_y2, t_x2 = tf.unstack(target_boxes, num=4, axis=-1) 46 | p_y1, p_x1, p_y2, p_x2 = tf.unstack(predicted_boxes, num=4, axis=-1) 47 | 48 | zeros = tf.zeros_like(t_y1) 49 | p_width = tf.maximum(zeros, p_x2 - p_x1) 50 | p_height = tf.maximum(zeros, p_y2 - p_y1) 51 | t_width = tf.maximum(zeros, t_x2 - t_x1) 52 | t_height = tf.maximum(zeros, t_y2 - t_y1) 53 | p_area = p_width * p_height 54 | t_area = t_width * t_height 55 | 56 | # intersection 57 | i_x1 = tf.maximum(t_x1, p_x1) 58 | i_y1 = tf.maximum(t_y1, p_y1) 59 | i_x2 = tf.minimum(t_x2, p_x2) 60 | i_y2 = tf.minimum(t_y2, p_y2) 61 | i_width = tf.maximum(zeros, i_x2 - i_x1) 62 | i_height = tf.maximum(zeros, i_y2 - i_y1) 63 | i_area = i_width * i_height 64 | 65 | # union 66 | u_area = p_area + t_area - i_area 67 | iou_v = tf.math.divide_no_nan(i_area, u_area) 68 | if iou_type == "iou": 69 | return iou_v 70 | 71 | # enclose 72 | e_y1 = tf.minimum(p_y1, t_y1) 73 | e_x1 = tf.minimum(p_x1, t_x1) 74 | e_y2 = tf.maximum(p_y2, t_y2) 75 | e_x2 = tf.maximum(p_x2, t_x2) 76 | 77 | assert iou_type in ["diou", "ciou", "giou"] 78 | if iou_type == "giou": 79 | e_width = e_x2 - e_x1 80 | e_height = e_y2 - e_y1 81 | e_area = e_width * e_height 82 | giou_v = iou_v - tf.math.divide_no_nan(e_area - iou_v, e_area) 83 | 84 | return giou_v 85 | 86 | assert iou_type in ["diou", "ciou"] 87 | # box center 88 | p_center = tf.stack([(p_y1 + p_y2) / 2, (p_x1 + p_x2) / 2], axis=-1) 89 | t_center = tf.stack([(t_y1 + t_y2) / 2, (t_x1 + t_x2) / 2], axis=-1) 90 | 91 | center_dist = tf.linalg.norm(p_center - t_center, axis=-1) ** 2 92 | diag_dist = tf.linalg.norm(tf.stack([e_y2 - e_y1, e_x2 - e_x1], -1), axis=-1) ** 2 93 | diou_v = iou_v - tf.math.divide_no_nan(center_dist, diag_dist) 94 | 95 | if iou_type == "diou": 96 | return diou_v 97 | 98 | assert iou_type == "ciou" 99 | 100 | v = _get_v(p_height, p_width, t_height, t_width) 101 | alpha = tf.math.divide_no_nan(v, (1 - iou_v) + v) 102 | 103 | return diou_v - alpha * v 104 | 105 | 106 | def compute_unaligned_iou(target_boxes, predicted_boxes, iou_type="iou"): 107 | """Computing the IoU for unaligned boxes. 108 | 109 | Args: 110 | predicted_boxes: predicted boxes, with coordinate [y_min, x_min, y_max, x_max]. 111 | target_boxes: target boxes, with coordinate [y_min, x_min, y_max, x_max]. 112 | iou_type: one of ['iou', 'ciou', 'diou', 'giou']. 113 | Returns: 114 | IoU loss float `Tensor`. 115 | """ 116 | 117 | predicted_boxes = tf.expand_dims(predicted_boxes, 1) 118 | target_boxes = tf.expand_dims(target_boxes, 0) 119 | 120 | return compute_iou(target_boxes, predicted_boxes) 121 | --------------------------------------------------------------------------------